1
Mostly this is patches from me and RTH cleaning up and doing
1
I don't have anything else queued up at the moment, so this is just
2
more decodetree conversion for AArch32 Neon. The major new feature
2
Richard's SME patches.
3
is Dongjiu Geng's patchset to report host memory errors to KVM guests;
4
also a new aspeed board from Patrick Williams.
5
3
6
thanks
7
-- PMM
4
-- PMM
8
5
9
The following changes since commit 035b448b84f3557206abc44d786c5d3db2638f7d:
6
The following changes since commit 63b38f6c85acd312c2cab68554abf33adf4ee2b3:
10
7
11
Merge remote-tracking branch 'remotes/gkurz/tags/9p-next-2020-05-14' into staging (2020-05-14 10:58:30 +0100)
8
Merge tag 'pull-target-arm-20220707' of https://git.linaro.org/people/pmaydell/qemu-arm into staging (2022-07-08 06:17:11 +0530)
12
9
13
are available in the Git repository at:
10
are available in the Git repository at:
14
11
15
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20200514
12
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20220711
16
13
17
for you to fetch changes up to e95485f85657be21135c17a9226e297c21e73360:
14
for you to fetch changes up to f9982ceaf26df27d15547a3a7990a95019e9e3a8:
18
15
19
target/arm: Convert NEON VFMA, VFMS 3-reg-same insns to decodetree (2020-05-14 15:03:09 +0100)
16
linux-user/aarch64: Add SME related hwcap entries (2022-07-11 13:43:52 +0100)
20
17
21
----------------------------------------------------------------
18
----------------------------------------------------------------
22
target-arm queue:
19
target-arm:
23
* target/arm: Use correct GDB XML for M-profile cores
20
* Implement SME emulation, for both system and linux-user
24
* target/arm: Code cleanup to use gvec APIs better
25
* aspeed: Add support for the sonorapass-bmc board
26
* target/arm: Support reporting KVM host memory errors
27
to the guest via ACPI notifications
28
* target/arm: Finish conversion of Neon 3-reg-same insns to decodetree
29
21
30
----------------------------------------------------------------
22
----------------------------------------------------------------
31
Dongjiu Geng (10):
23
Richard Henderson (45):
32
acpi: nvdimm: change NVDIMM_UUID_LE to a common macro
24
target/arm: Handle SME in aarch64_cpu_dump_state
33
hw/arm/virt: Introduce a RAS machine option
25
target/arm: Add infrastructure for disas_sme
34
docs: APEI GHES generation and CPER record description
26
target/arm: Trap non-streaming usage when Streaming SVE is active
35
ACPI: Build related register address fields via hardware error fw_cfg blob
27
target/arm: Mark ADR as non-streaming
36
ACPI: Build Hardware Error Source Table
28
target/arm: Mark RDFFR, WRFFR, SETFFR as non-streaming
37
ACPI: Record the Generic Error Status Block address
29
target/arm: Mark BDEP, BEXT, BGRP, COMPACT, FEXPA, FTSSEL as non-streaming
38
KVM: Move hwpoison page related functions into kvm-all.c
30
target/arm: Mark PMULL, FMMLA as non-streaming
39
ACPI: Record Generic Error Status Block(GESB) table
31
target/arm: Mark FTSMUL, FTMAD, FADDA as non-streaming
40
target-arm: kvm64: handle SIGBUS signal from kernel or KVM
32
target/arm: Mark SMMLA, UMMLA, USMMLA as non-streaming
41
MAINTAINERS: Add ACPI/HEST/GHES entries
33
target/arm: Mark string/histo/crypto as non-streaming
34
target/arm: Mark gather/scatter load/store as non-streaming
35
target/arm: Mark gather prefetch as non-streaming
36
target/arm: Mark LDFF1 and LDNF1 as non-streaming
37
target/arm: Mark LD1RO as non-streaming
38
target/arm: Add SME enablement checks
39
target/arm: Handle SME in sve_access_check
40
target/arm: Implement SME RDSVL, ADDSVL, ADDSPL
41
target/arm: Implement SME ZERO
42
target/arm: Implement SME MOVA
43
target/arm: Implement SME LD1, ST1
44
target/arm: Export unpredicated ld/st from translate-sve.c
45
target/arm: Implement SME LDR, STR
46
target/arm: Implement SME ADDHA, ADDVA
47
target/arm: Implement FMOPA, FMOPS (non-widening)
48
target/arm: Implement BFMOPA, BFMOPS
49
target/arm: Implement FMOPA, FMOPS (widening)
50
target/arm: Implement SME integer outer product
51
target/arm: Implement PSEL
52
target/arm: Implement REVD
53
target/arm: Implement SCLAMP, UCLAMP
54
target/arm: Reset streaming sve state on exception boundaries
55
target/arm: Enable SME for -cpu max
56
linux-user/aarch64: Clear tpidr2_el0 if CLONE_SETTLS
57
linux-user/aarch64: Reset PSTATE.SM on syscalls
58
linux-user/aarch64: Add SM bit to SVE signal context
59
linux-user/aarch64: Tidy target_restore_sigframe error return
60
linux-user/aarch64: Do not allow duplicate or short sve records
61
linux-user/aarch64: Verify extra record lock succeeded
62
linux-user/aarch64: Move sve record checks into restore
63
linux-user/aarch64: Implement SME signal handling
64
linux-user: Rename sve prctls
65
linux-user/aarch64: Implement PR_SME_GET_VL, PR_SME_SET_VL
66
target/arm: Only set ZEN in reset if SVE present
67
target/arm: Enable SME for user-only
68
linux-user/aarch64: Add SME related hwcap entries
42
69
43
Patrick Williams (1):
70
docs/system/arm/emulation.rst | 4 +
44
aspeed: Add support for the sonorapass-bmc board
71
linux-user/aarch64/target_cpu.h | 5 +-
45
72
linux-user/aarch64/target_prctl.h | 62 +-
46
Peter Maydell (18):
73
target/arm/cpu.h | 7 +
47
target/arm: Use correct GDB XML for M-profile cores
74
target/arm/helper-sme.h | 126 ++++
48
target/arm: Convert Neon 3-reg-same VQRDMLAH/VQRDMLSH to decodetree
75
target/arm/helper-sve.h | 4 +
49
target/arm: Convert Neon 3-reg-same SHA to decodetree
76
target/arm/helper.h | 18 +
50
target/arm: Convert Neon 64-bit element 3-reg-same insns
77
target/arm/translate-a64.h | 45 ++
51
target/arm: Convert Neon VHADD 3-reg-same insns
78
target/arm/translate.h | 16 +
52
target/arm: Convert Neon VABA/VABD 3-reg-same to decodetree
79
target/arm/sme-fa64.decode | 60 ++
53
target/arm: Convert Neon VRHADD, VHSUB 3-reg-same insns to decodetree
80
target/arm/sme.decode | 88 +++
54
target/arm: Convert Neon VQSHL, VRSHL, VQRSHL 3-reg-same insns to decodetree
81
target/arm/sve.decode | 41 +-
55
target/arm: Convert Neon VPMAX/VPMIN 3-reg-same insns to decodetree
82
linux-user/aarch64/cpu_loop.c | 9 +
56
target/arm: Convert Neon VPADD 3-reg-same insns to decodetree
83
linux-user/aarch64/signal.c | 243 ++++++--
57
target/arm: Convert Neon VQDMULH/VQRDMULH 3-reg-same to decodetree
84
linux-user/elfload.c | 20 +
58
target/arm: Convert Neon VADD, VSUB, VABD 3-reg-same insns to decodetree
85
linux-user/syscall.c | 28 +-
59
target/arm: Convert Neon VPMIN/VPMAX/VPADD float 3-reg-same insns to decodetree
86
target/arm/cpu.c | 35 +-
60
target/arm: Convert Neon fp VMUL, VMLA, VMLS 3-reg-same insns to decodetree
87
target/arm/cpu64.c | 11 +
61
target/arm: Convert Neon 3-reg-same compare insns to decodetree
88
target/arm/helper.c | 56 +-
62
target/arm: Move 'env' argument of recps_f32 and rsqrts_f32 helpers to usual place
89
target/arm/sme_helper.c | 1140 +++++++++++++++++++++++++++++++++++++
63
target/arm: Convert Neon fp VMAX/VMIN/VMAXNM/VMINNM/VRECPS/VRSQRTS to decodetree
90
target/arm/sve_helper.c | 28 +
64
target/arm: Convert NEON VFMA, VFMS 3-reg-same insns to decodetree
91
target/arm/translate-a64.c | 103 +++-
65
92
target/arm/translate-sme.c | 373 ++++++++++++
66
Richard Henderson (16):
93
target/arm/translate-sve.c | 393 ++++++++++---
67
target/arm: Create gen_gvec_[us]sra
94
target/arm/translate-vfp.c | 12 +
68
target/arm: Create gen_gvec_{u,s}{rshr,rsra}
95
target/arm/translate.c | 2 +
69
target/arm: Create gen_gvec_{sri,sli}
96
target/arm/vec_helper.c | 24 +
70
target/arm: Remove unnecessary range check for VSHL
97
target/arm/meson.build | 3 +
71
target/arm: Tidy handle_vec_simd_shri
98
28 files changed, 2821 insertions(+), 135 deletions(-)
72
target/arm: Create gen_gvec_{ceq,clt,cle,cgt,cge}0
99
create mode 100644 target/arm/sme-fa64.decode
73
target/arm: Create gen_gvec_{mla,mls}
100
create mode 100644 target/arm/sme.decode
74
target/arm: Swap argument order for VSHL during decode
101
create mode 100644 target/arm/translate-sme.c
75
target/arm: Create gen_gvec_{cmtst,ushl,sshl}
76
target/arm: Create gen_gvec_{uqadd, sqadd, uqsub, sqsub}
77
target/arm: Remove fp_status from helper_{recpe, rsqrte}_u32
78
target/arm: Create gen_gvec_{qrdmla,qrdmls}
79
target/arm: Pass pointer to qc to qrdmla/qrdmls
80
target/arm: Clear tail in gvec_fmul_idx_*, gvec_fmla_idx_*
81
target/arm: Vectorize SABD/UABD
82
target/arm: Vectorize SABA/UABA
83
84
docs/specs/acpi_hest_ghes.rst | 110 ++
85
docs/specs/index.rst | 1 +
86
configure | 4 +-
87
default-configs/arm-softmmu.mak | 1 +
88
include/hw/acpi/aml-build.h | 1 +
89
include/hw/acpi/generic_event_device.h | 2 +
90
include/hw/acpi/ghes.h | 74 +
91
include/hw/arm/virt.h | 1 +
92
include/qemu/uuid.h | 27 +
93
include/sysemu/kvm.h | 3 +-
94
include/sysemu/kvm_int.h | 12 +
95
target/arm/cpu.h | 4 +
96
target/arm/helper.h | 78 +-
97
target/arm/internals.h | 5 +-
98
target/arm/translate.h | 84 +-
99
target/i386/cpu.h | 2 +
100
target/arm/neon-dp.decode | 119 +-
101
accel/kvm/kvm-all.c | 36 +
102
hw/acpi/aml-build.c | 2 +
103
hw/acpi/generic_event_device.c | 19 +
104
hw/acpi/ghes.c | 448 ++++++
105
hw/acpi/nvdimm.c | 10 +-
106
hw/arm/aspeed.c | 78 ++
107
hw/arm/virt-acpi-build.c | 15 +
108
hw/arm/virt.c | 23 +
109
target/arm/cpu_tcg.c | 1 +
110
target/arm/gdbstub.c | 22 +-
111
target/arm/helper.c | 2 +-
112
target/arm/kvm64.c | 77 ++
113
target/arm/neon_helper.c | 17 -
114
target/arm/tlb_helper.c | 2 +-
115
target/arm/translate-a64.c | 210 +--
116
target/arm/translate-neon.inc.c | 682 +++++++++-
117
target/arm/translate.c | 2349 +++++++++++++++++---------------
118
target/arm/vec_helper.c | 240 +++-
119
target/arm/vfp_helper.c | 9 +-
120
target/i386/kvm.c | 36 -
121
MAINTAINERS | 9 +
122
gdb-xml/arm-m-profile.xml | 27 +
123
hw/acpi/Kconfig | 4 +
124
hw/acpi/Makefile.objs | 1 +
125
41 files changed, 3402 insertions(+), 1445 deletions(-)
126
create mode 100644 docs/specs/acpi_hest_ghes.rst
127
create mode 100644 include/hw/acpi/ghes.h
128
create mode 100644 hw/acpi/ghes.c
129
create mode 100644 gdb-xml/arm-m-profile.xml
130
diff view generated by jsdifflib
1
Convert the Neon VQDMULH and VQRDMULH 3-reg-same insns to
1
From: Richard Henderson <richard.henderson@linaro.org>
2
decodetree. These are the last integer operations in the
3
3-reg-same group.
4
2
3
Dump SVCR, plus use the correct access check for Streaming Mode.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-2-richard.henderson@linaro.org
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20200512163904.10918-11-peter.maydell@linaro.org
8
---
9
---
9
target/arm/neon-dp.decode | 3 +++
10
target/arm/cpu.c | 17 ++++++++++++++++-
10
target/arm/translate-neon.inc.c | 24 ++++++++++++++++++++++++
11
1 file changed, 16 insertions(+), 1 deletion(-)
11
target/arm/translate.c | 24 +-----------------------
12
3 files changed, 28 insertions(+), 23 deletions(-)
13
12
14
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
15
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/neon-dp.decode
15
--- a/target/arm/cpu.c
17
+++ b/target/arm/neon-dp.decode
16
+++ b/target/arm/cpu.c
18
@@ -XXX,XX +XXX,XX @@ VPMAX_U_3s 1111 001 1 0 . .. .... .... 1010 . . . 0 .... @3same_q0
17
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
19
VPMIN_S_3s 1111 001 0 0 . .. .... .... 1010 . . . 1 .... @3same_q0
18
int i;
20
VPMIN_U_3s 1111 001 1 0 . .. .... .... 1010 . . . 1 .... @3same_q0
19
int el = arm_current_el(env);
21
20
const char *ns_status;
22
+VQDMULH_3s 1111 001 0 0 . .. .... .... 1011 . . . 0 .... @3same
21
+ bool sve;
23
+VQRDMULH_3s 1111 001 1 0 . .. .... .... 1011 . . . 0 .... @3same
22
24
+
23
qemu_fprintf(f, " PC=%016" PRIx64 " ", env->pc);
25
VPADD_3s 1111 001 0 0 . .. .... .... 1011 . . . 1 .... @3same_q0
24
for (i = 0; i < 32; i++) {
26
25
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
27
VQRDMLAH_3s 1111 001 1 0 . .. .... .... 1011 ... 1 .... @3same
26
el,
28
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
27
psr & PSTATE_SP ? 'h' : 't');
29
index XXXXXXX..XXXXXXX 100644
28
30
--- a/target/arm/translate-neon.inc.c
29
+ if (cpu_isar_feature(aa64_sme, cpu)) {
31
+++ b/target/arm/translate-neon.inc.c
30
+ qemu_fprintf(f, " SVCR=%08" PRIx64 " %c%c",
32
@@ -XXX,XX +XXX,XX @@ DO_3SAME_PAIR(VPMIN_S, pmin_s)
31
+ env->svcr,
33
DO_3SAME_PAIR(VPMAX_U, pmax_u)
32
+ (FIELD_EX64(env->svcr, SVCR, ZA) ? 'Z' : '-'),
34
DO_3SAME_PAIR(VPMIN_U, pmin_u)
33
+ (FIELD_EX64(env->svcr, SVCR, SM) ? 'S' : '-'));
35
DO_3SAME_PAIR(VPADD, padd_u)
34
+ }
36
+
35
if (cpu_isar_feature(aa64_bti, cpu)) {
37
+#define DO_3SAME_VQDMULH(INSN, FUNC) \
36
qemu_fprintf(f, " BTYPE=%d", (psr & PSTATE_BTYPE) >> 10);
38
+ WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \
37
}
39
+ WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \
38
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
40
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
39
qemu_fprintf(f, " FPCR=%08x FPSR=%08x\n",
41
+ uint32_t rn_ofs, uint32_t rm_ofs, \
40
vfp_get_fpcr(env), vfp_get_fpsr(env));
42
+ uint32_t oprsz, uint32_t maxsz) \
41
43
+ { \
42
- if (cpu_isar_feature(aa64_sve, cpu) && sve_exception_el(env, el) == 0) {
44
+ static const GVecGen3 ops[2] = { \
43
+ if (cpu_isar_feature(aa64_sme, cpu) && FIELD_EX64(env->svcr, SVCR, SM)) {
45
+ { .fni4 = gen_##INSN##_tramp16 }, \
44
+ sve = sme_exception_el(env, el) == 0;
46
+ { .fni4 = gen_##INSN##_tramp32 }, \
45
+ } else if (cpu_isar_feature(aa64_sve, cpu)) {
47
+ }; \
46
+ sve = sve_exception_el(env, el) == 0;
48
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \
47
+ } else {
49
+ } \
48
+ sve = false;
50
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
51
+ { \
52
+ if (a->size != 1 && a->size != 2) { \
53
+ return false; \
54
+ } \
55
+ return do_3same(s, a, gen_##INSN##_3s); \
56
+ }
49
+ }
57
+
50
+
58
+DO_3SAME_VQDMULH(VQDMULH, qdmulh)
51
+ if (sve) {
59
+DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
52
int j, zcr_len = sve_vqm1_for_el(env, el);
60
diff --git a/target/arm/translate.c b/target/arm/translate.c
53
61
index XXXXXXX..XXXXXXX 100644
54
for (i = 0; i <= FFR_PRED_NUM; i++) {
62
--- a/target/arm/translate.c
63
+++ b/target/arm/translate.c
64
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
65
case NEON_3R_VPMAX:
66
case NEON_3R_VPMIN:
67
case NEON_3R_VPADD_VQRDMLAH:
68
+ case NEON_3R_VQDMULH_VQRDMULH:
69
/* Already handled by decodetree */
70
return 1;
71
}
72
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
73
tmp2 = neon_load_reg(rm, pass);
74
}
75
switch (op) {
76
- case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */
77
- if (!u) { /* VQDMULH */
78
- switch (size) {
79
- case 1:
80
- gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
81
- break;
82
- case 2:
83
- gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
84
- break;
85
- default: abort();
86
- }
87
- } else { /* VQRDMULH */
88
- switch (size) {
89
- case 1:
90
- gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
91
- break;
92
- case 2:
93
- gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
94
- break;
95
- default: abort();
96
- }
97
- }
98
- break;
99
case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
100
{
101
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
102
--
55
--
103
2.20.1
56
2.25.1
104
105
diff view generated by jsdifflib
1
From: Dongjiu Geng <gengdongjiu@huawei.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This patch builds error_block_address and read_ack_register fields
3
This includes the build rules for the decoder, and the
4
in hardware errors table , the error_block_address points to Generic
4
new file for translation, but excludes any instructions.
5
Error Status Block(GESB) via bios_linker. The max size for one GESB
6
is 1kb, For more detailed information, please refer to
7
document: docs/specs/acpi_hest_ghes.rst
8
5
9
Now we only support one Error source, if necessary, we can extend to
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
support more.
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
8
Message-id: 20220708151540.18136-3-richard.henderson@linaro.org
12
Suggested-by: Laszlo Ersek <lersek@redhat.com>
13
Signed-off-by: Xiang Zheng <zhengxiang9@huawei.com>
14
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
15
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
16
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
17
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
18
Message-id: 20200512030609.19593-5-gengdongjiu@huawei.com
19
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
20
---
10
---
21
default-configs/arm-softmmu.mak | 1 +
11
target/arm/translate-a64.h | 1 +
22
include/hw/acpi/aml-build.h | 1 +
12
target/arm/sme.decode | 20 ++++++++++++++++++++
23
include/hw/acpi/ghes.h | 28 +++++++++++
13
target/arm/translate-a64.c | 7 ++++++-
24
hw/acpi/aml-build.c | 2 +
14
target/arm/translate-sme.c | 35 +++++++++++++++++++++++++++++++++++
25
hw/acpi/ghes.c | 89 +++++++++++++++++++++++++++++++++
15
target/arm/meson.build | 2 ++
26
hw/arm/virt-acpi-build.c | 5 ++
16
5 files changed, 64 insertions(+), 1 deletion(-)
27
hw/acpi/Kconfig | 4 ++
17
create mode 100644 target/arm/sme.decode
28
hw/acpi/Makefile.objs | 1 +
18
create mode 100644 target/arm/translate-sme.c
29
8 files changed, 131 insertions(+)
30
create mode 100644 include/hw/acpi/ghes.h
31
create mode 100644 hw/acpi/ghes.c
32
19
33
diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
20
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
34
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
35
--- a/default-configs/arm-softmmu.mak
22
--- a/target/arm/translate-a64.h
36
+++ b/default-configs/arm-softmmu.mak
23
+++ b/target/arm/translate-a64.h
37
@@ -XXX,XX +XXX,XX @@ CONFIG_FSL_IMX7=y
24
@@ -XXX,XX +XXX,XX @@ static inline int pred_gvec_reg_size(DisasContext *s)
38
CONFIG_FSL_IMX6UL=y
25
}
39
CONFIG_SEMIHOSTING=y
26
40
CONFIG_ALLWINNER_H3=y
27
bool disas_sve(DisasContext *, uint32_t);
41
+CONFIG_ACPI_APEI=y
28
+bool disas_sme(DisasContext *, uint32_t);
42
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
29
43
index XXXXXXX..XXXXXXX 100644
30
void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
44
--- a/include/hw/acpi/aml-build.h
31
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
45
+++ b/include/hw/acpi/aml-build.h
32
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
46
@@ -XXX,XX +XXX,XX @@ struct AcpiBuildTables {
47
GArray *rsdp;
48
GArray *tcpalog;
49
GArray *vmgenid;
50
+ GArray *hardware_errors;
51
BIOSLinker *linker;
52
} AcpiBuildTables;
53
54
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
55
new file mode 100644
33
new file mode 100644
56
index XXXXXXX..XXXXXXX
34
index XXXXXXX..XXXXXXX
57
--- /dev/null
35
--- /dev/null
58
+++ b/include/hw/acpi/ghes.h
36
+++ b/target/arm/sme.decode
59
@@ -XXX,XX +XXX,XX @@
37
@@ -XXX,XX +XXX,XX @@
60
+/*
38
+# AArch64 SME instruction descriptions
61
+ * Support for generating APEI tables and recording CPER for Guests
39
+#
62
+ *
40
+# Copyright (c) 2022 Linaro, Ltd
63
+ * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO., LTD.
41
+#
64
+ *
42
+# This library is free software; you can redistribute it and/or
65
+ * Author: Dongjiu Geng <gengdongjiu@huawei.com>
43
+# modify it under the terms of the GNU Lesser General Public
66
+ *
44
+# License as published by the Free Software Foundation; either
67
+ * This program is free software; you can redistribute it and/or modify
45
+# version 2.1 of the License, or (at your option) any later version.
68
+ * it under the terms of the GNU General Public License as published by
46
+#
69
+ * the Free Software Foundation; either version 2 of the License, or
47
+# This library is distributed in the hope that it will be useful,
70
+ * (at your option) any later version.
48
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
49
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
50
+# Lesser General Public License for more details.
51
+#
52
+# You should have received a copy of the GNU Lesser General Public
53
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
71
+
54
+
72
+ * This program is distributed in the hope that it will be useful,
55
+#
73
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
56
+# This file is processed by scripts/decodetree.py
74
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
57
+#
75
+ * GNU General Public License for more details.
58
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
76
+
77
+ * You should have received a copy of the GNU General Public License along
78
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
79
+ */
80
+
81
+#ifndef ACPI_GHES_H
82
+#define ACPI_GHES_H
83
+
84
+#include "hw/acpi/bios-linker-loader.h"
85
+
86
+void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker);
87
+#endif
88
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
89
index XXXXXXX..XXXXXXX 100644
59
index XXXXXXX..XXXXXXX 100644
90
--- a/hw/acpi/aml-build.c
60
--- a/target/arm/translate-a64.c
91
+++ b/hw/acpi/aml-build.c
61
+++ b/target/arm/translate-a64.c
92
@@ -XXX,XX +XXX,XX @@ void acpi_build_tables_init(AcpiBuildTables *tables)
62
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
93
tables->table_data = g_array_new(false, true /* clear */, 1);
63
}
94
tables->tcpalog = g_array_new(false, true /* clear */, 1);
64
95
tables->vmgenid = g_array_new(false, true /* clear */, 1);
65
switch (extract32(insn, 25, 4)) {
96
+ tables->hardware_errors = g_array_new(false, true /* clear */, 1);
66
- case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
97
tables->linker = bios_linker_loader_init();
67
+ case 0x0:
98
}
68
+ if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) {
99
69
+ unallocated_encoding(s);
100
@@ -XXX,XX +XXX,XX @@ void acpi_build_tables_cleanup(AcpiBuildTables *tables, bool mfre)
70
+ }
101
g_array_free(tables->table_data, true);
71
+ break;
102
g_array_free(tables->tcpalog, mfre);
72
+ case 0x1: case 0x3: /* UNALLOCATED */
103
g_array_free(tables->vmgenid, mfre);
73
unallocated_encoding(s);
104
+ g_array_free(tables->hardware_errors, mfre);
74
break;
105
}
75
case 0x2:
106
76
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
107
/*
108
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
109
new file mode 100644
77
new file mode 100644
110
index XXXXXXX..XXXXXXX
78
index XXXXXXX..XXXXXXX
111
--- /dev/null
79
--- /dev/null
112
+++ b/hw/acpi/ghes.c
80
+++ b/target/arm/translate-sme.c
113
@@ -XXX,XX +XXX,XX @@
81
@@ -XXX,XX +XXX,XX @@
114
+/*
82
+/*
115
+ * Support for generating APEI tables and recording CPER for Guests
83
+ * AArch64 SME translation
116
+ *
84
+ *
117
+ * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO., LTD.
85
+ * Copyright (c) 2022 Linaro, Ltd
118
+ *
86
+ *
119
+ * Author: Dongjiu Geng <gengdongjiu@huawei.com>
87
+ * This library is free software; you can redistribute it and/or
88
+ * modify it under the terms of the GNU Lesser General Public
89
+ * License as published by the Free Software Foundation; either
90
+ * version 2.1 of the License, or (at your option) any later version.
120
+ *
91
+ *
121
+ * This program is free software; you can redistribute it and/or modify
92
+ * This library is distributed in the hope that it will be useful,
122
+ * it under the terms of the GNU General Public License as published by
123
+ * the Free Software Foundation; either version 2 of the License, or
124
+ * (at your option) any later version.
125
+
126
+ * This program is distributed in the hope that it will be useful,
127
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
93
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
128
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
94
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
129
+ * GNU General Public License for more details.
95
+ * Lesser General Public License for more details.
130
+
96
+ *
131
+ * You should have received a copy of the GNU General Public License along
97
+ * You should have received a copy of the GNU Lesser General Public
132
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
98
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
133
+ */
99
+ */
134
+
100
+
135
+#include "qemu/osdep.h"
101
+#include "qemu/osdep.h"
136
+#include "qemu/units.h"
102
+#include "cpu.h"
137
+#include "hw/acpi/ghes.h"
103
+#include "tcg/tcg-op.h"
138
+#include "hw/acpi/aml-build.h"
104
+#include "tcg/tcg-op-gvec.h"
105
+#include "tcg/tcg-gvec-desc.h"
106
+#include "translate.h"
107
+#include "exec/helper-gen.h"
108
+#include "translate-a64.h"
109
+#include "fpu/softfloat.h"
139
+
110
+
140
+#define ACPI_GHES_ERRORS_FW_CFG_FILE "etc/hardware_errors"
141
+#define ACPI_GHES_DATA_ADDR_FW_CFG_FILE "etc/hardware_errors_addr"
142
+
143
+/* The max size in bytes for one error block */
144
+#define ACPI_GHES_MAX_RAW_DATA_LENGTH (1 * KiB)
145
+
146
+/* Now only support ARMv8 SEA notification type error source */
147
+#define ACPI_GHES_ERROR_SOURCE_COUNT 1
148
+
111
+
149
+/*
112
+/*
150
+ * Build table for the hardware error fw_cfg blob.
113
+ * Include the generated decoder.
151
+ * Initialize "etc/hardware_errors" and "etc/hardware_errors_addr" fw_cfg blobs.
152
+ * See docs/specs/acpi_hest_ghes.rst for blobs format.
153
+ */
114
+ */
154
+void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker)
155
+{
156
+ int i, error_status_block_offset;
157
+
115
+
158
+ /* Build error_block_address */
116
+#include "decode-sme.c.inc"
159
+ for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
117
diff --git a/target/arm/meson.build b/target/arm/meson.build
160
+ build_append_int_noprefix(hardware_errors, 0, sizeof(uint64_t));
161
+ }
162
+
163
+ /* Build read_ack_register */
164
+ for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
165
+ /*
166
+ * Initialize the value of read_ack_register to 1, so GHES can be
167
+ * writeable after (re)boot.
168
+ * ACPI 6.2: 18.3.2.8 Generic Hardware Error Source version 2
169
+ * (GHESv2 - Type 10)
170
+ */
171
+ build_append_int_noprefix(hardware_errors, 1, sizeof(uint64_t));
172
+ }
173
+
174
+ /* Generic Error Status Block offset in the hardware error fw_cfg blob */
175
+ error_status_block_offset = hardware_errors->len;
176
+
177
+ /* Reserve space for Error Status Data Block */
178
+ acpi_data_push(hardware_errors,
179
+ ACPI_GHES_MAX_RAW_DATA_LENGTH * ACPI_GHES_ERROR_SOURCE_COUNT);
180
+
181
+ /* Tell guest firmware to place hardware_errors blob into RAM */
182
+ bios_linker_loader_alloc(linker, ACPI_GHES_ERRORS_FW_CFG_FILE,
183
+ hardware_errors, sizeof(uint64_t), false);
184
+
185
+ for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
186
+ /*
187
+ * Tell firmware to patch error_block_address entries to point to
188
+ * corresponding "Generic Error Status Block"
189
+ */
190
+ bios_linker_loader_add_pointer(linker,
191
+ ACPI_GHES_ERRORS_FW_CFG_FILE, sizeof(uint64_t) * i,
192
+ sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE,
193
+ error_status_block_offset + i * ACPI_GHES_MAX_RAW_DATA_LENGTH);
194
+ }
195
+
196
+ /*
197
+ * tell firmware to write hardware_errors GPA into
198
+ * hardware_errors_addr fw_cfg, once the former has been initialized.
199
+ */
200
+ bios_linker_loader_write_pointer(linker, ACPI_GHES_DATA_ADDR_FW_CFG_FILE,
201
+ 0, sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE, 0);
202
+}
203
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
204
index XXXXXXX..XXXXXXX 100644
118
index XXXXXXX..XXXXXXX 100644
205
--- a/hw/arm/virt-acpi-build.c
119
--- a/target/arm/meson.build
206
+++ b/hw/arm/virt-acpi-build.c
120
+++ b/target/arm/meson.build
207
@@ -XXX,XX +XXX,XX @@
121
@@ -XXX,XX +XXX,XX @@
208
#include "sysemu/reset.h"
122
gen = [
209
#include "kvm_arm.h"
123
decodetree.process('sve.decode', extra_args: '--decode=disas_sve'),
210
#include "migration/vmstate.h"
124
+ decodetree.process('sme.decode', extra_args: '--decode=disas_sme'),
211
+#include "hw/acpi/ghes.h"
125
decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'),
212
126
decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'),
213
#define ARM_SPI_BASE 32
127
decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'),
214
128
@@ -XXX,XX +XXX,XX @@ arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
215
@@ -XXX,XX +XXX,XX @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
129
'sme_helper.c',
216
acpi_add_table(table_offsets, tables_blob);
130
'translate-a64.c',
217
build_spcr(tables_blob, tables->linker, vms);
131
'translate-sve.c',
218
132
+ 'translate-sme.c',
219
+ if (vms->ras) {
133
))
220
+ build_ghes_error_table(tables->hardware_errors, tables->linker);
134
221
+ }
135
arm_softmmu_ss = ss.source_set()
222
+
223
if (ms->numa_state->num_nodes > 0) {
224
acpi_add_table(table_offsets, tables_blob);
225
build_srat(tables_blob, tables->linker, vms);
226
diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig
227
index XXXXXXX..XXXXXXX 100644
228
--- a/hw/acpi/Kconfig
229
+++ b/hw/acpi/Kconfig
230
@@ -XXX,XX +XXX,XX @@ config ACPI_HMAT
231
bool
232
depends on ACPI
233
234
+config ACPI_APEI
235
+ bool
236
+ depends on ACPI
237
+
238
config ACPI_PCI
239
bool
240
depends on ACPI && PCI
241
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
242
index XXXXXXX..XXXXXXX 100644
243
--- a/hw/acpi/Makefile.objs
244
+++ b/hw/acpi/Makefile.objs
245
@@ -XXX,XX +XXX,XX @@ common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
246
common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o
247
common-obj-$(CONFIG_ACPI_HW_REDUCED) += generic_event_device.o
248
common-obj-$(CONFIG_ACPI_HMAT) += hmat.o
249
+common-obj-$(CONFIG_ACPI_APEI) += ghes.o
250
common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
251
common-obj-$(call lnot,$(CONFIG_PC)) += acpi-x86-stub.o
252
253
--
136
--
254
2.20.1
137
2.25.1
255
256
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The functions eliminate duplication of the special cases for
3
This new behaviour is in the ARM pseudocode function
4
this operation. They match up with the GVecGen2iFn typedef.
4
AArch64.CheckFPAdvSIMDEnabled, which applies to AArch32
5
5
via AArch32.CheckAdvSIMDOrFPEnabled when the EL to which
6
Add out-of-line helpers. We got away with only having inline
6
the trap would be delivered is in AArch64 mode.
7
expanders because the neon vector size is only 16 bytes, and
7
8
we know that the inline expansion will always succeed.
8
Given that ARMv9 drops support for AArch32 outside EL0, the trap EL
9
When we reuse this for SVE, tcg-gvec-op may decide to use an
9
detection ought to be trivially true, but the pseudocode still contains
10
out-of-line helper due to longer vector lengths.
10
a number of conditions, and QEMU has not yet committed to dropping A32
11
support for EL[12] when v9 features are present.
12
13
Since the computation of SME_TRAP_NONSTREAMING is necessarily different
14
for the two modes, we might as well preserve bits within TBFLAG_ANY and
15
allocate separate bits within TBFLAG_A32 and TBFLAG_A64 instead.
16
17
Note that DDI0616A.a has typos for bits [22:21] of LD1RO in the table
18
of instructions illegal in streaming mode.
11
19
12
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
20
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
21
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
Message-id: 20200513163245.17915-4-richard.henderson@linaro.org
22
Message-id: 20220708151540.18136-4-richard.henderson@linaro.org
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
23
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
---
24
---
17
target/arm/helper.h | 10 ++
25
target/arm/cpu.h | 7 +++
18
target/arm/translate.h | 7 +-
26
target/arm/translate.h | 4 ++
19
target/arm/translate-a64.c | 20 +---
27
target/arm/sme-fa64.decode | 90 ++++++++++++++++++++++++++++++++++++++
20
target/arm/translate.c | 186 +++++++++++++++++++++----------------
28
target/arm/helper.c | 41 +++++++++++++++++
21
target/arm/vec_helper.c | 38 ++++++++
29
target/arm/translate-a64.c | 40 ++++++++++++++++-
22
5 files changed, 160 insertions(+), 101 deletions(-)
30
target/arm/translate-vfp.c | 12 +++++
23
31
target/arm/translate.c | 2 +
24
diff --git a/target/arm/helper.h b/target/arm/helper.h
32
target/arm/meson.build | 1 +
25
index XXXXXXX..XXXXXXX 100644
33
8 files changed, 195 insertions(+), 2 deletions(-)
26
--- a/target/arm/helper.h
34
create mode 100644 target/arm/sme-fa64.decode
27
+++ b/target/arm/helper.h
35
28
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(gvec_ursra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
36
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
29
DEF_HELPER_FLAGS_3(gvec_ursra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
37
index XXXXXXX..XXXXXXX 100644
30
DEF_HELPER_FLAGS_3(gvec_ursra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
38
--- a/target/arm/cpu.h
31
39
+++ b/target/arm/cpu.h
32
+DEF_HELPER_FLAGS_3(gvec_sri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
40
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A32, HSTR_ACTIVE, 9, 1)
33
+DEF_HELPER_FLAGS_3(gvec_sri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
41
* the same thing as the current security state of the processor!
34
+DEF_HELPER_FLAGS_3(gvec_sri_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
42
*/
35
+DEF_HELPER_FLAGS_3(gvec_sri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
43
FIELD(TBFLAG_A32, NS, 10, 1)
36
+
44
+/*
37
+DEF_HELPER_FLAGS_3(gvec_sli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
45
+ * Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not.
38
+DEF_HELPER_FLAGS_3(gvec_sli_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
46
+ * This requires an SME trap from AArch32 mode when using NEON.
39
+DEF_HELPER_FLAGS_3(gvec_sli_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
47
+ */
40
+DEF_HELPER_FLAGS_3(gvec_sli_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
48
+FIELD(TBFLAG_A32, SME_TRAP_NONSTREAMING, 11, 1)
41
+
49
42
#ifdef TARGET_AARCH64
50
/*
43
#include "helper-a64.h"
51
* Bit usage when in AArch32 state, for M-profile only.
44
#include "helper-sve.h"
52
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, SMEEXC_EL, 20, 2)
53
FIELD(TBFLAG_A64, PSTATE_SM, 22, 1)
54
FIELD(TBFLAG_A64, PSTATE_ZA, 23, 1)
55
FIELD(TBFLAG_A64, SVL, 24, 4)
56
+/* Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not. */
57
+FIELD(TBFLAG_A64, SME_TRAP_NONSTREAMING, 28, 1)
58
59
/*
60
* Helpers for using the above.
45
diff --git a/target/arm/translate.h b/target/arm/translate.h
61
diff --git a/target/arm/translate.h b/target/arm/translate.h
46
index XXXXXXX..XXXXXXX 100644
62
index XXXXXXX..XXXXXXX 100644
47
--- a/target/arm/translate.h
63
--- a/target/arm/translate.h
48
+++ b/target/arm/translate.h
64
+++ b/target/arm/translate.h
49
@@ -XXX,XX +XXX,XX @@ extern const GVecGen3 mls_op[4];
65
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
50
extern const GVecGen3 cmtst_op[4];
66
bool pstate_sm;
51
extern const GVecGen3 sshl_op[4];
67
/* True if PSTATE.ZA is set. */
52
extern const GVecGen3 ushl_op[4];
68
bool pstate_za;
53
-extern const GVecGen2i sri_op[4];
69
+ /* True if non-streaming insns should raise an SME Streaming exception. */
54
-extern const GVecGen2i sli_op[4];
70
+ bool sme_trap_nonstreaming;
55
extern const GVecGen4 uqadd_op[4];
71
+ /* True if the current instruction is non-streaming. */
56
extern const GVecGen4 sqadd_op[4];
72
+ bool is_nonstreaming;
57
extern const GVecGen4 uqsub_op[4];
73
/* True if MVE insns are definitely not predicated by VPR or LTPSIZE */
58
@@ -XXX,XX +XXX,XX @@ void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
74
bool mve_no_pred;
59
void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
75
/*
60
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
76
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
61
77
new file mode 100644
62
+void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
78
index XXXXXXX..XXXXXXX
63
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
79
--- /dev/null
64
+void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
80
+++ b/target/arm/sme-fa64.decode
65
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
81
@@ -XXX,XX +XXX,XX @@
82
+# AArch64 SME allowed instruction decoding
83
+#
84
+# Copyright (c) 2022 Linaro, Ltd
85
+#
86
+# This library is free software; you can redistribute it and/or
87
+# modify it under the terms of the GNU Lesser General Public
88
+# License as published by the Free Software Foundation; either
89
+# version 2.1 of the License, or (at your option) any later version.
90
+#
91
+# This library is distributed in the hope that it will be useful,
92
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
93
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
94
+# Lesser General Public License for more details.
95
+#
96
+# You should have received a copy of the GNU Lesser General Public
97
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
98
+
99
+#
100
+# This file is processed by scripts/decodetree.py
101
+#
102
+
103
+# These patterns are taken from Appendix E1.1 of DDI0616 A.a,
104
+# Arm Architecture Reference Manual Supplement,
105
+# The Scalable Matrix Extension (SME), for Armv9-A
106
+
107
+{
108
+ [
109
+ OK 0-00 1110 0000 0001 0010 11-- ---- ---- # SMOV W|Xd,Vn.B[0]
110
+ OK 0-00 1110 0000 0010 0010 11-- ---- ---- # SMOV W|Xd,Vn.H[0]
111
+ OK 0100 1110 0000 0100 0010 11-- ---- ---- # SMOV Xd,Vn.S[0]
112
+ OK 0000 1110 0000 0001 0011 11-- ---- ---- # UMOV Wd,Vn.B[0]
113
+ OK 0000 1110 0000 0010 0011 11-- ---- ---- # UMOV Wd,Vn.H[0]
114
+ OK 0000 1110 0000 0100 0011 11-- ---- ---- # UMOV Wd,Vn.S[0]
115
+ OK 0100 1110 0000 1000 0011 11-- ---- ---- # UMOV Xd,Vn.D[0]
116
+ ]
117
+ FAIL 0--0 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD vector operations
118
+}
119
+
120
+{
121
+ [
122
+ OK 0101 1110 --1- ---- 11-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar)
123
+ OK 0101 1110 -10- ---- 00-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar, FP16)
124
+ OK 01-1 1110 1-10 0001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar)
125
+ OK 01-1 1110 1111 1001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar, FP16)
126
+ ]
127
+ FAIL 01-1 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD single-element operations
128
+}
129
+
130
+FAIL 0-00 110- ---- ---- ---- ---- ---- ---- # Advanced SIMD structure load/store
131
+FAIL 1100 1110 ---- ---- ---- ---- ---- ---- # Advanced SIMD cryptography extensions
132
+FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
133
+
134
+# These are the "avoidance of doubt" final table of Illegal Advanced SIMD instructions
135
+# We don't actually need to include these, as the default is OK.
136
+# -001 111- ---- ---- ---- ---- ---- ---- # Scalar floating-point operations
137
+# --10 110- ---- ---- ---- ---- ---- ---- # Load/store pair of FP registers
138
+# --01 1100 ---- ---- ---- ---- ---- ---- # Load FP register (PC-relative literal)
139
+# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm)
140
+# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
141
+# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
142
+
143
+FAIL 0000 0100 --1- ---- 1010 ---- ---- ---- # ADR
144
+FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
145
+FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
146
+FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
147
+FAIL 0010 0101 --10 1--- 1001 ---- ---- ---- # WRFFR, SETFFR
148
+FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
149
+FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
150
+FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
151
+FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
152
+FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
153
+FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
154
+FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
155
+FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
156
+FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
157
+FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
158
+FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
159
+FAIL 1000 010- -01- ---- 1--- ---- ---- ---- # SVE 32-bit gather load (vector+imm)
160
+FAIL 1000 0100 0-0- ---- 0--- ---- ---- ---- # SVE 32-bit gather load byte (scalar+vector)
161
+FAIL 1000 0100 1--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load half (scalar+vector)
162
+FAIL 1000 0101 0--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load word (scalar+vector)
163
+FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
164
+FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
165
+FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
166
+FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
167
+FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
168
+FAIL 1110 010- -00- ---- 001- ---- ---- ---- # SVE2 64-bit scatter NT store (vector+scalar)
169
+FAIL 1110 010- -10- ---- 001- ---- ---- ---- # SVE2 32-bit scatter NT store (vector+scalar)
170
+FAIL 1110 010- ---- ---- 1-0- ---- ---- ---- # SVE scatter store (scalar+32-bit vector)
171
+FAIL 1110 010- ---- ---- 101- ---- ---- ---- # SVE scatter store (misc)
172
diff --git a/target/arm/helper.c b/target/arm/helper.c
173
index XXXXXXX..XXXXXXX 100644
174
--- a/target/arm/helper.c
175
+++ b/target/arm/helper.c
176
@@ -XXX,XX +XXX,XX @@ int sme_exception_el(CPUARMState *env, int el)
177
return 0;
178
}
179
180
+/* This corresponds to the ARM pseudocode function IsFullA64Enabled(). */
181
+static bool sme_fa64(CPUARMState *env, int el)
182
+{
183
+ if (!cpu_isar_feature(aa64_sme_fa64, env_archcpu(env))) {
184
+ return false;
185
+ }
186
+
187
+ if (el <= 1 && !el_is_in_host(env, el)) {
188
+ if (!FIELD_EX64(env->vfp.smcr_el[1], SMCR, FA64)) {
189
+ return false;
190
+ }
191
+ }
192
+ if (el <= 2 && arm_is_el2_enabled(env)) {
193
+ if (!FIELD_EX64(env->vfp.smcr_el[2], SMCR, FA64)) {
194
+ return false;
195
+ }
196
+ }
197
+ if (arm_feature(env, ARM_FEATURE_EL3)) {
198
+ if (!FIELD_EX64(env->vfp.smcr_el[3], SMCR, FA64)) {
199
+ return false;
200
+ }
201
+ }
202
+
203
+ return true;
204
+}
66
+
205
+
67
/*
206
/*
68
* Forward to the isar_feature_* tests given a DisasContext pointer.
207
* Given that SVE is enabled, return the vector length for EL.
69
*/
208
*/
209
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el,
210
DP_TBFLAG_ANY(flags, PSTATE__IL, 1);
211
}
212
213
+ /*
214
+ * The SME exception we are testing for is raised via
215
+ * AArch64.CheckFPAdvSIMDEnabled(), as called from
216
+ * AArch32.CheckAdvSIMDOrFPEnabled().
217
+ */
218
+ if (el == 0
219
+ && FIELD_EX64(env->svcr, SVCR, SM)
220
+ && (!arm_is_el2_enabled(env)
221
+ || (arm_el_is_aa64(env, 2) && !(env->cp15.hcr_el2 & HCR_TGE)))
222
+ && arm_el_is_aa64(env, 1)
223
+ && !sme_fa64(env, el)) {
224
+ DP_TBFLAG_A32(flags, SME_TRAP_NONSTREAMING, 1);
225
+ }
226
+
227
return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
228
}
229
230
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
231
}
232
if (FIELD_EX64(env->svcr, SVCR, SM)) {
233
DP_TBFLAG_A64(flags, PSTATE_SM, 1);
234
+ DP_TBFLAG_A64(flags, SME_TRAP_NONSTREAMING, !sme_fa64(env, el));
235
}
236
DP_TBFLAG_A64(flags, PSTATE_ZA, FIELD_EX64(env->svcr, SVCR, ZA));
237
}
70
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
238
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
71
index XXXXXXX..XXXXXXX 100644
239
index XXXXXXX..XXXXXXX 100644
72
--- a/target/arm/translate-a64.c
240
--- a/target/arm/translate-a64.c
73
+++ b/target/arm/translate-a64.c
241
+++ b/target/arm/translate-a64.c
74
@@ -XXX,XX +XXX,XX @@ static void gen_gvec_op2(DisasContext *s, bool is_q, int rd,
242
@@ -XXX,XX +XXX,XX @@ static void do_vec_ld(DisasContext *s, int destidx, int element,
75
is_q ? 16 : 8, vec_full_reg_size(s), gvec_op);
243
* unallocated-encoding checks (otherwise the syndrome information
244
* for the resulting exception will be incorrect).
245
*/
246
-static bool fp_access_check(DisasContext *s)
247
+static bool fp_access_check_only(DisasContext *s)
248
{
249
if (s->fp_excp_el) {
250
assert(!s->fp_access_checked);
251
@@ -XXX,XX +XXX,XX @@ static bool fp_access_check(DisasContext *s)
252
return true;
76
}
253
}
77
254
78
-/* Expand a 2-operand + immediate AdvSIMD vector operation using
255
+static bool fp_access_check(DisasContext *s)
79
- * an op descriptor.
256
+{
80
- */
257
+ if (!fp_access_check_only(s)) {
81
-static void gen_gvec_op2i(DisasContext *s, bool is_q, int rd,
258
+ return false;
82
- int rn, int64_t imm, const GVecGen2i *gvec_op)
259
+ }
83
-{
260
+ if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
84
- tcg_gen_gvec_2i(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
261
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
85
- is_q ? 16 : 8, vec_full_reg_size(s), imm, gvec_op);
262
+ syn_smetrap(SME_ET_Streaming, false));
86
-}
263
+ return false;
87
-
264
+ }
88
/* Expand a 3-operand AdvSIMD vector operation using an op descriptor. */
265
+ return true;
89
static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
266
+}
90
int rn, int rm, const GVecGen3 *gvec_op)
267
+
91
@@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
268
/* Check that SVE access is enabled. If it is, return true.
92
gen_gvec_fn2i(s, is_q, rd, rn, shift,
269
* If not, emit code to generate an appropriate exception and return false.
93
is_u ? gen_gvec_usra : gen_gvec_ssra, size);
270
*/
271
@@ -XXX,XX +XXX,XX @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
272
default:
273
g_assert_not_reached();
274
}
275
- if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
276
+ if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
94
return;
277
return;
95
+
278
} else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
96
case 0x08: /* SRI */
97
- /* Shift count same as element size is valid but does nothing. */
98
- if (shift == 8 << size) {
99
- goto done;
100
- }
101
- gen_gvec_op2i(s, is_q, rd, rn, shift, &sri_op[size]);
102
+ gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sri, size);
103
return;
279
return;
104
280
@@ -XXX,XX +XXX,XX @@ static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
105
case 0x00: /* SSHR / USHR */
281
}
106
@@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
107
}
108
tcg_temp_free_i64(tcg_round);
109
110
- done:
111
clear_vec_high(s, is_q, rd);
112
}
282
}
113
283
114
@@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
284
+/*
115
}
285
+ * Include the generated SME FA64 decoder.
116
286
+ */
117
if (insert) {
287
+
118
- gen_gvec_op2i(s, is_q, rd, rn, shift, &sli_op[size]);
288
+#include "decode-sme-fa64.c.inc"
119
+ gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
289
+
120
} else {
290
+static bool trans_OK(DisasContext *s, arg_OK *a)
121
gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
291
+{
122
}
292
+ return true;
293
+}
294
+
295
+static bool trans_FAIL(DisasContext *s, arg_OK *a)
296
+{
297
+ s->is_nonstreaming = true;
298
+ return true;
299
+}
300
+
301
/**
302
* is_guarded_page:
303
* @env: The cpu environment
304
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
305
dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
306
dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
307
dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
308
+ dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
309
dc->vec_len = 0;
310
dc->vec_stride = 0;
311
dc->cp_regs = arm_cpu->cp_regs;
312
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
313
}
314
}
315
316
+ s->is_nonstreaming = false;
317
+ if (s->sme_trap_nonstreaming) {
318
+ disas_sme_fa64(s, insn);
319
+ }
320
+
321
switch (extract32(insn, 25, 4)) {
322
case 0x0:
323
if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) {
324
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
325
index XXXXXXX..XXXXXXX 100644
326
--- a/target/arm/translate-vfp.c
327
+++ b/target/arm/translate-vfp.c
328
@@ -XXX,XX +XXX,XX @@ static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled)
329
return false;
330
}
331
332
+ /*
333
+ * Note that rebuild_hflags_a32 has already accounted for being in EL0
334
+ * and the higher EL in A64 mode, etc. Unlike A64 mode, there do not
335
+ * appear to be any insns which touch VFP which are allowed.
336
+ */
337
+ if (s->sme_trap_nonstreaming) {
338
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
339
+ syn_smetrap(SME_ET_Streaming,
340
+ s->base.pc_next - s->pc_curr == 2));
341
+ return false;
342
+ }
343
+
344
if (!s->vfp_enabled && !ignore_vfp_enabled) {
345
assert(!arm_dc_feature(s, ARM_FEATURE_M));
346
unallocated_encoding(s);
123
diff --git a/target/arm/translate.c b/target/arm/translate.c
347
diff --git a/target/arm/translate.c b/target/arm/translate.c
124
index XXXXXXX..XXXXXXX 100644
348
index XXXXXXX..XXXXXXX 100644
125
--- a/target/arm/translate.c
349
--- a/target/arm/translate.c
126
+++ b/target/arm/translate.c
350
+++ b/target/arm/translate.c
127
@@ -XXX,XX +XXX,XX @@ static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
351
@@ -XXX,XX +XXX,XX @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
128
352
dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
129
static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
353
dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
130
{
354
}
131
- if (sh == 0) {
355
+ dc->sme_trap_nonstreaming =
132
- tcg_gen_mov_vec(d, a);
356
+ EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
133
- } else {
357
}
134
- TCGv_vec t = tcg_temp_new_vec_matching(d);
358
dc->cp_regs = cpu->cp_regs;
135
- TCGv_vec m = tcg_temp_new_vec_matching(d);
359
dc->features = env->features;
136
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
360
diff --git a/target/arm/meson.build b/target/arm/meson.build
137
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
361
index XXXXXXX..XXXXXXX 100644
138
362
--- a/target/arm/meson.build
139
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
363
+++ b/target/arm/meson.build
140
- tcg_gen_shri_vec(vece, t, a, sh);
364
@@ -XXX,XX +XXX,XX @@
141
- tcg_gen_and_vec(vece, d, d, m);
365
gen = [
142
- tcg_gen_or_vec(vece, d, d, t);
366
decodetree.process('sve.decode', extra_args: '--decode=disas_sve'),
143
+ tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
367
decodetree.process('sme.decode', extra_args: '--decode=disas_sme'),
144
+ tcg_gen_shri_vec(vece, t, a, sh);
368
+ decodetree.process('sme-fa64.decode', extra_args: '--static-decode=disas_sme_fa64'),
145
+ tcg_gen_and_vec(vece, d, d, m);
369
decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'),
146
+ tcg_gen_or_vec(vece, d, d, t);
370
decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'),
147
371
decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'),
148
- tcg_temp_free_vec(t);
149
- tcg_temp_free_vec(m);
150
- }
151
+ tcg_temp_free_vec(t);
152
+ tcg_temp_free_vec(m);
153
}
154
155
-static const TCGOpcode vecop_list_sri[] = { INDEX_op_shri_vec, 0 };
156
+void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
157
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
158
+{
159
+ static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
160
+ const GVecGen2i ops[4] = {
161
+ { .fni8 = gen_shr8_ins_i64,
162
+ .fniv = gen_shr_ins_vec,
163
+ .fno = gen_helper_gvec_sri_b,
164
+ .load_dest = true,
165
+ .opt_opc = vecop_list,
166
+ .vece = MO_8 },
167
+ { .fni8 = gen_shr16_ins_i64,
168
+ .fniv = gen_shr_ins_vec,
169
+ .fno = gen_helper_gvec_sri_h,
170
+ .load_dest = true,
171
+ .opt_opc = vecop_list,
172
+ .vece = MO_16 },
173
+ { .fni4 = gen_shr32_ins_i32,
174
+ .fniv = gen_shr_ins_vec,
175
+ .fno = gen_helper_gvec_sri_s,
176
+ .load_dest = true,
177
+ .opt_opc = vecop_list,
178
+ .vece = MO_32 },
179
+ { .fni8 = gen_shr64_ins_i64,
180
+ .fniv = gen_shr_ins_vec,
181
+ .fno = gen_helper_gvec_sri_d,
182
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
183
+ .load_dest = true,
184
+ .opt_opc = vecop_list,
185
+ .vece = MO_64 },
186
+ };
187
188
-const GVecGen2i sri_op[4] = {
189
- { .fni8 = gen_shr8_ins_i64,
190
- .fniv = gen_shr_ins_vec,
191
- .load_dest = true,
192
- .opt_opc = vecop_list_sri,
193
- .vece = MO_8 },
194
- { .fni8 = gen_shr16_ins_i64,
195
- .fniv = gen_shr_ins_vec,
196
- .load_dest = true,
197
- .opt_opc = vecop_list_sri,
198
- .vece = MO_16 },
199
- { .fni4 = gen_shr32_ins_i32,
200
- .fniv = gen_shr_ins_vec,
201
- .load_dest = true,
202
- .opt_opc = vecop_list_sri,
203
- .vece = MO_32 },
204
- { .fni8 = gen_shr64_ins_i64,
205
- .fniv = gen_shr_ins_vec,
206
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
207
- .load_dest = true,
208
- .opt_opc = vecop_list_sri,
209
- .vece = MO_64 },
210
-};
211
+ /* tszimm encoding produces immediates in the range [1..esize]. */
212
+ tcg_debug_assert(shift > 0);
213
+ tcg_debug_assert(shift <= (8 << vece));
214
+
215
+ /* Shift of esize leaves destination unchanged. */
216
+ if (shift < (8 << vece)) {
217
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
218
+ } else {
219
+ /* Nop, but we do need to clear the tail. */
220
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
221
+ }
222
+}
223
224
static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
225
{
226
@@ -XXX,XX +XXX,XX @@ static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
227
228
static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
229
{
230
- if (sh == 0) {
231
- tcg_gen_mov_vec(d, a);
232
- } else {
233
- TCGv_vec t = tcg_temp_new_vec_matching(d);
234
- TCGv_vec m = tcg_temp_new_vec_matching(d);
235
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
236
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
237
238
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
239
- tcg_gen_shli_vec(vece, t, a, sh);
240
- tcg_gen_and_vec(vece, d, d, m);
241
- tcg_gen_or_vec(vece, d, d, t);
242
+ tcg_gen_shli_vec(vece, t, a, sh);
243
+ tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
244
+ tcg_gen_and_vec(vece, d, d, m);
245
+ tcg_gen_or_vec(vece, d, d, t);
246
247
- tcg_temp_free_vec(t);
248
- tcg_temp_free_vec(m);
249
- }
250
+ tcg_temp_free_vec(t);
251
+ tcg_temp_free_vec(m);
252
}
253
254
-static const TCGOpcode vecop_list_sli[] = { INDEX_op_shli_vec, 0 };
255
+void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
256
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
257
+{
258
+ static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
259
+ const GVecGen2i ops[4] = {
260
+ { .fni8 = gen_shl8_ins_i64,
261
+ .fniv = gen_shl_ins_vec,
262
+ .fno = gen_helper_gvec_sli_b,
263
+ .load_dest = true,
264
+ .opt_opc = vecop_list,
265
+ .vece = MO_8 },
266
+ { .fni8 = gen_shl16_ins_i64,
267
+ .fniv = gen_shl_ins_vec,
268
+ .fno = gen_helper_gvec_sli_h,
269
+ .load_dest = true,
270
+ .opt_opc = vecop_list,
271
+ .vece = MO_16 },
272
+ { .fni4 = gen_shl32_ins_i32,
273
+ .fniv = gen_shl_ins_vec,
274
+ .fno = gen_helper_gvec_sli_s,
275
+ .load_dest = true,
276
+ .opt_opc = vecop_list,
277
+ .vece = MO_32 },
278
+ { .fni8 = gen_shl64_ins_i64,
279
+ .fniv = gen_shl_ins_vec,
280
+ .fno = gen_helper_gvec_sli_d,
281
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
282
+ .load_dest = true,
283
+ .opt_opc = vecop_list,
284
+ .vece = MO_64 },
285
+ };
286
287
-const GVecGen2i sli_op[4] = {
288
- { .fni8 = gen_shl8_ins_i64,
289
- .fniv = gen_shl_ins_vec,
290
- .load_dest = true,
291
- .opt_opc = vecop_list_sli,
292
- .vece = MO_8 },
293
- { .fni8 = gen_shl16_ins_i64,
294
- .fniv = gen_shl_ins_vec,
295
- .load_dest = true,
296
- .opt_opc = vecop_list_sli,
297
- .vece = MO_16 },
298
- { .fni4 = gen_shl32_ins_i32,
299
- .fniv = gen_shl_ins_vec,
300
- .load_dest = true,
301
- .opt_opc = vecop_list_sli,
302
- .vece = MO_32 },
303
- { .fni8 = gen_shl64_ins_i64,
304
- .fniv = gen_shl_ins_vec,
305
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
306
- .load_dest = true,
307
- .opt_opc = vecop_list_sli,
308
- .vece = MO_64 },
309
-};
310
+ /* tszimm encoding produces immediates in the range [0..esize-1]. */
311
+ tcg_debug_assert(shift >= 0);
312
+ tcg_debug_assert(shift < (8 << vece));
313
+
314
+ if (shift == 0) {
315
+ tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
316
+ } else {
317
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
318
+ }
319
+}
320
321
static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
322
{
323
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
324
}
325
/* Right shift comes here negative. */
326
shift = -shift;
327
- /* Shift out of range leaves destination unchanged. */
328
- if (shift < 8 << size) {
329
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
330
- shift, &sri_op[size]);
331
- }
332
+ gen_gvec_sri(size, rd_ofs, rm_ofs, shift,
333
+ vec_size, vec_size);
334
return 0;
335
336
case 5: /* VSHL, VSLI */
337
if (u) { /* VSLI */
338
- /* Shift out of range leaves destination unchanged. */
339
- if (shift < 8 << size) {
340
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size,
341
- vec_size, shift, &sli_op[size]);
342
- }
343
+ gen_gvec_sli(size, rd_ofs, rm_ofs, shift,
344
+ vec_size, vec_size);
345
} else { /* VSHL */
346
/* Shifts larger than the element size are
347
* architecturally valid and results in zero.
348
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
349
index XXXXXXX..XXXXXXX 100644
350
--- a/target/arm/vec_helper.c
351
+++ b/target/arm/vec_helper.c
352
@@ -XXX,XX +XXX,XX @@ DO_RSRA(gvec_ursra_d, uint64_t)
353
354
#undef DO_RSRA
355
356
+#define DO_SRI(NAME, TYPE) \
357
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
358
+{ \
359
+ intptr_t i, oprsz = simd_oprsz(desc); \
360
+ int shift = simd_data(desc); \
361
+ TYPE *d = vd, *n = vn; \
362
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
363
+ d[i] = deposit64(d[i], 0, sizeof(TYPE) * 8 - shift, n[i] >> shift); \
364
+ } \
365
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
366
+}
367
+
368
+DO_SRI(gvec_sri_b, uint8_t)
369
+DO_SRI(gvec_sri_h, uint16_t)
370
+DO_SRI(gvec_sri_s, uint32_t)
371
+DO_SRI(gvec_sri_d, uint64_t)
372
+
373
+#undef DO_SRI
374
+
375
+#define DO_SLI(NAME, TYPE) \
376
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
377
+{ \
378
+ intptr_t i, oprsz = simd_oprsz(desc); \
379
+ int shift = simd_data(desc); \
380
+ TYPE *d = vd, *n = vn; \
381
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
382
+ d[i] = deposit64(d[i], shift, sizeof(TYPE) * 8 - shift, n[i]); \
383
+ } \
384
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
385
+}
386
+
387
+DO_SLI(gvec_sli_b, uint8_t)
388
+DO_SLI(gvec_sli_h, uint16_t)
389
+DO_SLI(gvec_sli_s, uint32_t)
390
+DO_SLI(gvec_sli_d, uint64_t)
391
+
392
+#undef DO_SLI
393
+
394
/*
395
* Convert float16 to float32, raising no exceptions and
396
* preserving exceptional values, including SNaN.
397
--
372
--
398
2.20.1
373
2.25.1
399
400
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Provide a functional interface for the vector expansion.
3
Mark ADR as a non-streaming instruction, which should trap
4
This fits better with the existing set of helpers that
4
if full a64 support is not enabled in streaming mode.
5
we provide for other operations.
5
6
Removing entries from sme-fa64.decode is an easy way to see
7
what remains to be done.
6
8
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20200513163245.17915-11-richard.henderson@linaro.org
11
Message-id: 20220708151540.18136-5-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
13
---
12
target/arm/translate.h | 13 +-
14
target/arm/translate.h | 7 +++++++
13
target/arm/translate-a64.c | 22 ++-
15
target/arm/sme-fa64.decode | 1 -
14
target/arm/translate-neon.inc.c | 19 +--
16
target/arm/translate-sve.c | 8 ++++----
15
target/arm/translate.c | 228 +++++++++++++++++---------------
17
3 files changed, 11 insertions(+), 5 deletions(-)
16
4 files changed, 147 insertions(+), 135 deletions(-)
17
18
18
diff --git a/target/arm/translate.h b/target/arm/translate.h
19
diff --git a/target/arm/translate.h b/target/arm/translate.h
19
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/translate.h
21
--- a/target/arm/translate.h
21
+++ b/target/arm/translate.h
22
+++ b/target/arm/translate.h
22
@@ -XXX,XX +XXX,XX @@ void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
23
@@ -XXX,XX +XXX,XX @@ uint64_t asimd_imm_const(uint32_t imm, int cmode, int op);
23
void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
24
static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
24
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
25
{ return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); }
25
26
26
-extern const GVecGen4 uqadd_op[4];
27
+#define TRANS_FEAT_NONSTREAMING(NAME, FEAT, FUNC, ...) \
27
-extern const GVecGen4 sqadd_op[4];
28
+ static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
28
-extern const GVecGen4 uqsub_op[4];
29
+ { \
29
-extern const GVecGen4 sqsub_op[4];
30
+ s->is_nonstreaming = true; \
30
void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
31
+ return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); \
31
void gen_ushl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
32
+ }
32
void gen_sshl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
33
void gen_ushl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
34
void gen_sshl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
35
36
+void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
37
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
38
+void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
39
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
40
+void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
41
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
42
+void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
43
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
44
+
33
+
45
void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
34
#endif /* TARGET_ARM_TRANSLATE_H */
46
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
35
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
47
void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
48
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
49
index XXXXXXX..XXXXXXX 100644
36
index XXXXXXX..XXXXXXX 100644
50
--- a/target/arm/translate-a64.c
37
--- a/target/arm/sme-fa64.decode
51
+++ b/target/arm/translate-a64.c
38
+++ b/target/arm/sme-fa64.decode
52
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
39
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
53
40
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
54
switch (opcode) {
41
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
55
case 0x01: /* SQADD, UQADD */
42
56
- tcg_gen_gvec_4(vec_full_reg_offset(s, rd),
43
-FAIL 0000 0100 --1- ---- 1010 ---- ---- ---- # ADR
57
- offsetof(CPUARMState, vfp.qc),
44
FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
58
- vec_full_reg_offset(s, rn),
45
FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
59
- vec_full_reg_offset(s, rm),
46
FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
60
- is_q ? 16 : 8, vec_full_reg_size(s),
47
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
61
- (u ? uqadd_op : sqadd_op) + size);
62
+ if (u) {
63
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size);
64
+ } else {
65
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size);
66
+ }
67
return;
68
case 0x05: /* SQSUB, UQSUB */
69
- tcg_gen_gvec_4(vec_full_reg_offset(s, rd),
70
- offsetof(CPUARMState, vfp.qc),
71
- vec_full_reg_offset(s, rn),
72
- vec_full_reg_offset(s, rm),
73
- is_q ? 16 : 8, vec_full_reg_size(s),
74
- (u ? uqsub_op : sqsub_op) + size);
75
+ if (u) {
76
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size);
77
+ } else {
78
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size);
79
+ }
80
return;
81
case 0x08: /* SSHL, USHL */
82
if (u) {
83
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
84
index XXXXXXX..XXXXXXX 100644
48
index XXXXXXX..XXXXXXX 100644
85
--- a/target/arm/translate-neon.inc.c
49
--- a/target/arm/translate-sve.c
86
+++ b/target/arm/translate-neon.inc.c
50
+++ b/target/arm/translate-sve.c
87
@@ -XXX,XX +XXX,XX @@ DO_3SAME(VORN, tcg_gen_gvec_orc)
51
@@ -XXX,XX +XXX,XX @@ static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
88
DO_3SAME(VEOR, tcg_gen_gvec_xor)
52
return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
89
DO_3SAME(VSHL_S, gen_gvec_sshl)
90
DO_3SAME(VSHL_U, gen_gvec_ushl)
91
+DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
92
+DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
93
+DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
94
+DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
95
96
/* These insns are all gvec_bitsel but with the inputs in various orders. */
97
#define DO_3SAME_BITSEL(INSN, O1, O2, O3) \
98
@@ -XXX,XX +XXX,XX @@ DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
99
DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
100
DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
101
102
-#define DO_3SAME_GVEC4(INSN, OPARRAY) \
103
- static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
104
- uint32_t rn_ofs, uint32_t rm_ofs, \
105
- uint32_t oprsz, uint32_t maxsz) \
106
- { \
107
- tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), \
108
- rn_ofs, rm_ofs, oprsz, maxsz, &OPARRAY[vece]); \
109
- } \
110
- DO_3SAME(INSN, gen_##INSN##_3s)
111
-
112
-DO_3SAME_GVEC4(VQADD_S, sqadd_op)
113
-DO_3SAME_GVEC4(VQADD_U, uqadd_op)
114
-DO_3SAME_GVEC4(VQSUB_S, sqsub_op)
115
-DO_3SAME_GVEC4(VQSUB_U, uqsub_op)
116
-
117
static void gen_VMUL_p_3s(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
118
uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)
119
{
120
diff --git a/target/arm/translate.c b/target/arm/translate.c
121
index XXXXXXX..XXXXXXX 100644
122
--- a/target/arm/translate.c
123
+++ b/target/arm/translate.c
124
@@ -XXX,XX +XXX,XX @@ static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
125
tcg_temp_free_vec(x);
126
}
53
}
127
54
128
-static const TCGOpcode vecop_list_uqadd[] = {
55
-TRANS_FEAT(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
129
- INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
56
-TRANS_FEAT(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
130
-};
57
-TRANS_FEAT(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
131
-
58
-TRANS_FEAT(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
132
-const GVecGen4 uqadd_op[4] = {
59
+TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
133
- { .fniv = gen_uqadd_vec,
60
+TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
134
- .fno = gen_helper_gvec_uqadd_b,
61
+TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
135
- .write_aofs = true,
62
+TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
136
- .opt_opc = vecop_list_uqadd,
63
137
- .vece = MO_8 },
64
/*
138
- { .fniv = gen_uqadd_vec,
65
*** SVE Integer Misc - Unpredicated Group
139
- .fno = gen_helper_gvec_uqadd_h,
140
- .write_aofs = true,
141
- .opt_opc = vecop_list_uqadd,
142
- .vece = MO_16 },
143
- { .fniv = gen_uqadd_vec,
144
- .fno = gen_helper_gvec_uqadd_s,
145
- .write_aofs = true,
146
- .opt_opc = vecop_list_uqadd,
147
- .vece = MO_32 },
148
- { .fniv = gen_uqadd_vec,
149
- .fno = gen_helper_gvec_uqadd_d,
150
- .write_aofs = true,
151
- .opt_opc = vecop_list_uqadd,
152
- .vece = MO_64 },
153
-};
154
+void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
155
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
156
+{
157
+ static const TCGOpcode vecop_list[] = {
158
+ INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
159
+ };
160
+ static const GVecGen4 ops[4] = {
161
+ { .fniv = gen_uqadd_vec,
162
+ .fno = gen_helper_gvec_uqadd_b,
163
+ .write_aofs = true,
164
+ .opt_opc = vecop_list,
165
+ .vece = MO_8 },
166
+ { .fniv = gen_uqadd_vec,
167
+ .fno = gen_helper_gvec_uqadd_h,
168
+ .write_aofs = true,
169
+ .opt_opc = vecop_list,
170
+ .vece = MO_16 },
171
+ { .fniv = gen_uqadd_vec,
172
+ .fno = gen_helper_gvec_uqadd_s,
173
+ .write_aofs = true,
174
+ .opt_opc = vecop_list,
175
+ .vece = MO_32 },
176
+ { .fniv = gen_uqadd_vec,
177
+ .fno = gen_helper_gvec_uqadd_d,
178
+ .write_aofs = true,
179
+ .opt_opc = vecop_list,
180
+ .vece = MO_64 },
181
+ };
182
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
183
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
184
+}
185
186
static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
187
TCGv_vec a, TCGv_vec b)
188
@@ -XXX,XX +XXX,XX @@ static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
189
tcg_temp_free_vec(x);
190
}
191
192
-static const TCGOpcode vecop_list_sqadd[] = {
193
- INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
194
-};
195
-
196
-const GVecGen4 sqadd_op[4] = {
197
- { .fniv = gen_sqadd_vec,
198
- .fno = gen_helper_gvec_sqadd_b,
199
- .opt_opc = vecop_list_sqadd,
200
- .write_aofs = true,
201
- .vece = MO_8 },
202
- { .fniv = gen_sqadd_vec,
203
- .fno = gen_helper_gvec_sqadd_h,
204
- .opt_opc = vecop_list_sqadd,
205
- .write_aofs = true,
206
- .vece = MO_16 },
207
- { .fniv = gen_sqadd_vec,
208
- .fno = gen_helper_gvec_sqadd_s,
209
- .opt_opc = vecop_list_sqadd,
210
- .write_aofs = true,
211
- .vece = MO_32 },
212
- { .fniv = gen_sqadd_vec,
213
- .fno = gen_helper_gvec_sqadd_d,
214
- .opt_opc = vecop_list_sqadd,
215
- .write_aofs = true,
216
- .vece = MO_64 },
217
-};
218
+void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
219
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
220
+{
221
+ static const TCGOpcode vecop_list[] = {
222
+ INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
223
+ };
224
+ static const GVecGen4 ops[4] = {
225
+ { .fniv = gen_sqadd_vec,
226
+ .fno = gen_helper_gvec_sqadd_b,
227
+ .opt_opc = vecop_list,
228
+ .write_aofs = true,
229
+ .vece = MO_8 },
230
+ { .fniv = gen_sqadd_vec,
231
+ .fno = gen_helper_gvec_sqadd_h,
232
+ .opt_opc = vecop_list,
233
+ .write_aofs = true,
234
+ .vece = MO_16 },
235
+ { .fniv = gen_sqadd_vec,
236
+ .fno = gen_helper_gvec_sqadd_s,
237
+ .opt_opc = vecop_list,
238
+ .write_aofs = true,
239
+ .vece = MO_32 },
240
+ { .fniv = gen_sqadd_vec,
241
+ .fno = gen_helper_gvec_sqadd_d,
242
+ .opt_opc = vecop_list,
243
+ .write_aofs = true,
244
+ .vece = MO_64 },
245
+ };
246
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
247
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
248
+}
249
250
static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
251
TCGv_vec a, TCGv_vec b)
252
@@ -XXX,XX +XXX,XX @@ static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
253
tcg_temp_free_vec(x);
254
}
255
256
-static const TCGOpcode vecop_list_uqsub[] = {
257
- INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
258
-};
259
-
260
-const GVecGen4 uqsub_op[4] = {
261
- { .fniv = gen_uqsub_vec,
262
- .fno = gen_helper_gvec_uqsub_b,
263
- .opt_opc = vecop_list_uqsub,
264
- .write_aofs = true,
265
- .vece = MO_8 },
266
- { .fniv = gen_uqsub_vec,
267
- .fno = gen_helper_gvec_uqsub_h,
268
- .opt_opc = vecop_list_uqsub,
269
- .write_aofs = true,
270
- .vece = MO_16 },
271
- { .fniv = gen_uqsub_vec,
272
- .fno = gen_helper_gvec_uqsub_s,
273
- .opt_opc = vecop_list_uqsub,
274
- .write_aofs = true,
275
- .vece = MO_32 },
276
- { .fniv = gen_uqsub_vec,
277
- .fno = gen_helper_gvec_uqsub_d,
278
- .opt_opc = vecop_list_uqsub,
279
- .write_aofs = true,
280
- .vece = MO_64 },
281
-};
282
+void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
283
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
284
+{
285
+ static const TCGOpcode vecop_list[] = {
286
+ INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
287
+ };
288
+ static const GVecGen4 ops[4] = {
289
+ { .fniv = gen_uqsub_vec,
290
+ .fno = gen_helper_gvec_uqsub_b,
291
+ .opt_opc = vecop_list,
292
+ .write_aofs = true,
293
+ .vece = MO_8 },
294
+ { .fniv = gen_uqsub_vec,
295
+ .fno = gen_helper_gvec_uqsub_h,
296
+ .opt_opc = vecop_list,
297
+ .write_aofs = true,
298
+ .vece = MO_16 },
299
+ { .fniv = gen_uqsub_vec,
300
+ .fno = gen_helper_gvec_uqsub_s,
301
+ .opt_opc = vecop_list,
302
+ .write_aofs = true,
303
+ .vece = MO_32 },
304
+ { .fniv = gen_uqsub_vec,
305
+ .fno = gen_helper_gvec_uqsub_d,
306
+ .opt_opc = vecop_list,
307
+ .write_aofs = true,
308
+ .vece = MO_64 },
309
+ };
310
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
311
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
312
+}
313
314
static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
315
TCGv_vec a, TCGv_vec b)
316
@@ -XXX,XX +XXX,XX @@ static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
317
tcg_temp_free_vec(x);
318
}
319
320
-static const TCGOpcode vecop_list_sqsub[] = {
321
- INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
322
-};
323
-
324
-const GVecGen4 sqsub_op[4] = {
325
- { .fniv = gen_sqsub_vec,
326
- .fno = gen_helper_gvec_sqsub_b,
327
- .opt_opc = vecop_list_sqsub,
328
- .write_aofs = true,
329
- .vece = MO_8 },
330
- { .fniv = gen_sqsub_vec,
331
- .fno = gen_helper_gvec_sqsub_h,
332
- .opt_opc = vecop_list_sqsub,
333
- .write_aofs = true,
334
- .vece = MO_16 },
335
- { .fniv = gen_sqsub_vec,
336
- .fno = gen_helper_gvec_sqsub_s,
337
- .opt_opc = vecop_list_sqsub,
338
- .write_aofs = true,
339
- .vece = MO_32 },
340
- { .fniv = gen_sqsub_vec,
341
- .fno = gen_helper_gvec_sqsub_d,
342
- .opt_opc = vecop_list_sqsub,
343
- .write_aofs = true,
344
- .vece = MO_64 },
345
-};
346
+void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
347
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
348
+{
349
+ static const TCGOpcode vecop_list[] = {
350
+ INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
351
+ };
352
+ static const GVecGen4 ops[4] = {
353
+ { .fniv = gen_sqsub_vec,
354
+ .fno = gen_helper_gvec_sqsub_b,
355
+ .opt_opc = vecop_list,
356
+ .write_aofs = true,
357
+ .vece = MO_8 },
358
+ { .fniv = gen_sqsub_vec,
359
+ .fno = gen_helper_gvec_sqsub_h,
360
+ .opt_opc = vecop_list,
361
+ .write_aofs = true,
362
+ .vece = MO_16 },
363
+ { .fniv = gen_sqsub_vec,
364
+ .fno = gen_helper_gvec_sqsub_s,
365
+ .opt_opc = vecop_list,
366
+ .write_aofs = true,
367
+ .vece = MO_32 },
368
+ { .fniv = gen_sqsub_vec,
369
+ .fno = gen_helper_gvec_sqsub_d,
370
+ .opt_opc = vecop_list,
371
+ .write_aofs = true,
372
+ .vece = MO_64 },
373
+ };
374
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
375
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
376
+}
377
378
/* Translate a NEON data processing instruction. Return nonzero if the
379
instruction is invalid.
380
--
66
--
381
2.20.1
67
2.25.1
382
383
diff view generated by jsdifflib
1
The usual location for the env argument in the argument list of a TCG helper
1
From: Richard Henderson <richard.henderson@linaro.org>
2
is immediately after the return-value argument. recps_f32 and rsqrts_f32
3
differ in that they put it at the end.
4
2
5
Move the env argument to its usual place; this will allow us to
3
Mark these as a non-streaming instructions, which should trap
6
more easily use these helper functions with the gvec APIs.
4
if full a64 support is not enabled in streaming mode.
7
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-6-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-id: 20200512163904.10918-16-peter.maydell@linaro.org
11
---
10
---
12
target/arm/helper.h | 4 ++--
11
target/arm/sme-fa64.decode | 2 --
13
target/arm/translate.c | 4 ++--
12
target/arm/translate-sve.c | 9 ++++++---
14
target/arm/vfp_helper.c | 4 ++--
13
2 files changed, 6 insertions(+), 5 deletions(-)
15
3 files changed, 6 insertions(+), 6 deletions(-)
16
14
17
diff --git a/target/arm/helper.h b/target/arm/helper.h
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
18
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper.h
17
--- a/target/arm/sme-fa64.decode
20
+++ b/target/arm/helper.h
18
+++ b/target/arm/sme-fa64.decode
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(vfp_fcvt_f64_to_f16, TCG_CALL_NO_RWG, f16, f64, ptr, i32)
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
22
DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr)
20
23
DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr)
21
FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
24
22
FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
25
-DEF_HELPER_3(recps_f32, f32, f32, f32, env)
23
-FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
26
-DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
24
-FAIL 0010 0101 --10 1--- 1001 ---- ---- ---- # WRFFR, SETFFR
27
+DEF_HELPER_3(recps_f32, f32, env, f32, f32)
25
FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
28
+DEF_HELPER_3(rsqrts_f32, f32, env, f32, f32)
26
FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
29
DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
27
FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
30
DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
28
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
31
DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
32
diff --git a/target/arm/translate.c b/target/arm/translate.c
33
index XXXXXXX..XXXXXXX 100644
29
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/translate.c
30
--- a/target/arm/translate-sve.c
35
+++ b/target/arm/translate.c
31
+++ b/target/arm/translate-sve.c
36
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
32
@@ -XXX,XX +XXX,XX @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
37
tcg_temp_free_ptr(fpstatus);
33
TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
38
} else {
34
39
if (size == 0) {
35
/* Note pat == 31 is #all, to set all elements. */
40
- gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
36
-TRANS_FEAT(SETFFR, aa64_sve, do_predset, 0, FFR_PRED_NUM, 31, false)
41
+ gen_helper_recps_f32(tmp, cpu_env, tmp, tmp2);
37
+TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve,
42
} else {
38
+ do_predset, 0, FFR_PRED_NUM, 31, false)
43
- gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
39
44
+ gen_helper_rsqrts_f32(tmp, cpu_env, tmp, tmp2);
40
/* Note pat == 32 is #unimp, to set no elements. */
45
}
41
TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
46
}
42
@@ -XXX,XX +XXX,XX @@ static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
47
break;
43
.rd = a->rd, .pg = a->pg, .s = a->s,
48
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
44
.rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
49
index XXXXXXX..XXXXXXX 100644
45
};
50
--- a/target/arm/vfp_helper.c
46
+
51
+++ b/target/arm/vfp_helper.c
47
+ s->is_nonstreaming = true;
52
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
48
return trans_AND_pppp(s, &alt_a);
53
#define float32_three make_float32(0x40400000)
54
#define float32_one_point_five make_float32(0x3fc00000)
55
56
-float32 HELPER(recps_f32)(float32 a, float32 b, CPUARMState *env)
57
+float32 HELPER(recps_f32)(CPUARMState *env, float32 a, float32 b)
58
{
59
float_status *s = &env->vfp.standard_fp_status;
60
if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
61
@@ -XXX,XX +XXX,XX @@ float32 HELPER(recps_f32)(float32 a, float32 b, CPUARMState *env)
62
return float32_sub(float32_two, float32_mul(a, b, s), s);
63
}
49
}
64
50
65
-float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUARMState *env)
51
-TRANS_FEAT(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
66
+float32 HELPER(rsqrts_f32)(CPUARMState *env, float32 a, float32 b)
52
-TRANS_FEAT(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
67
{
53
+TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
68
float_status *s = &env->vfp.standard_fp_status;
54
+TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
69
float32 product;
55
56
static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
57
void (*gen_fn)(TCGv_i32, TCGv_ptr,
70
--
58
--
71
2.20.1
59
2.25.1
72
73
diff view generated by jsdifflib
1
Convert the Neon integer 3-reg-same compare insns VCGE, VCGT,
1
From: Richard Henderson <richard.henderson@linaro.org>
2
VCEQ, VACGE and VACGT to decodetree.
3
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-7-richard.henderson@linaro.org
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20200512163904.10918-15-peter.maydell@linaro.org
7
---
10
---
8
target/arm/neon-dp.decode | 5 +++++
11
target/arm/sme-fa64.decode | 3 ---
9
target/arm/translate-neon.inc.c | 6 +++++
12
target/arm/translate-sve.c | 22 ++++++++++++----------
10
target/arm/translate.c | 39 ++-------------------------------
13
2 files changed, 12 insertions(+), 13 deletions(-)
11
3 files changed, 13 insertions(+), 37 deletions(-)
12
14
13
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
14
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/neon-dp.decode
17
--- a/target/arm/sme-fa64.decode
16
+++ b/target/arm/neon-dp.decode
18
+++ b/target/arm/sme-fa64.decode
17
@@ -XXX,XX +XXX,XX @@ VABD_fp_3s 1111 001 1 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
18
VMLA_fp_3s 1111 001 0 0 . 0 . .... .... 1101 ... 1 .... @3same_fp
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
19
VMLS_fp_3s 1111 001 0 0 . 1 . .... .... 1101 ... 1 .... @3same_fp
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
20
VMUL_fp_3s 1111 001 1 0 . 0 . .... .... 1101 ... 1 .... @3same_fp
22
21
+VCEQ_fp_3s 1111 001 0 0 . 0 . .... .... 1110 ... 0 .... @3same_fp
23
-FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
22
+VCGE_fp_3s 1111 001 1 0 . 0 . .... .... 1110 ... 0 .... @3same_fp
24
-FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
23
+VACGE_fp_3s 1111 001 1 0 . 0 . .... .... 1110 ... 1 .... @3same_fp
25
-FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
24
+VCGT_fp_3s 1111 001 1 0 . 1 . .... .... 1110 ... 0 .... @3same_fp
26
FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
25
+VACGT_fp_3s 1111 001 1 0 . 1 . .... .... 1110 ... 1 .... @3same_fp
27
FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
26
VPMAX_fp_3s 1111 001 1 0 . 0 . .... .... 1111 ... 0 .... @3same_fp_q0
28
FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
27
VPMIN_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 0 .... @3same_fp_q0
29
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
28
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
29
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/translate-neon.inc.c
31
--- a/target/arm/translate-sve.c
31
+++ b/target/arm/translate-neon.inc.c
32
+++ b/target/arm/translate-sve.c
32
@@ -XXX,XX +XXX,XX @@ DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s)
33
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2 * const fexpa_fns[4] = {
33
return do_3same_fp(s, a, FUNC, READS_VD); \
34
NULL, gen_helper_sve_fexpa_h,
34
}
35
gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
35
36
};
36
+DO_3S_FP(VCEQ, gen_helper_neon_ceq_f32, false)
37
-TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
37
+DO_3S_FP(VCGE, gen_helper_neon_cge_f32, false)
38
- fexpa_fns[a->esz], a->rd, a->rn, 0)
38
+DO_3S_FP(VCGT, gen_helper_neon_cgt_f32, false)
39
+TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
39
+DO_3S_FP(VACGE, gen_helper_neon_acge_f32, false)
40
+ fexpa_fns[a->esz], a->rd, a->rn, 0)
40
+DO_3S_FP(VACGT, gen_helper_neon_acgt_f32, false)
41
41
+
42
static gen_helper_gvec_3 * const ftssel_fns[4] = {
42
static void gen_VMLA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
43
NULL, gen_helper_sve_ftssel_h,
43
TCGv_ptr fpstatus)
44
gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
44
{
45
};
45
diff --git a/target/arm/translate.c b/target/arm/translate.c
46
-TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
46
index XXXXXXX..XXXXXXX 100644
47
+TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
47
--- a/target/arm/translate.c
48
+ ftssel_fns[a->esz], a, 0)
48
+++ b/target/arm/translate.c
49
49
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
50
/*
50
case NEON_3R_VQDMULH_VQRDMULH:
51
*** SVE Predicate Logical Operations Group
51
case NEON_3R_FLOAT_ARITH:
52
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
52
case NEON_3R_FLOAT_MULTIPLY:
53
static gen_helper_gvec_3 * const compact_fns[4] = {
53
+ case NEON_3R_FLOAT_CMP:
54
NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
54
+ case NEON_3R_FLOAT_ACMP:
55
};
55
/* Already handled by decodetree */
56
-TRANS_FEAT(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, compact_fns[a->esz], a, 0)
56
return 1;
57
+TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz,
57
}
58
+ compact_fns[a->esz], a, 0)
58
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
59
59
return 1; /* VPMIN/VPMAX handled by decodetree */
60
/* Call the helper that computes the ARM LastActiveElement pseudocode
60
}
61
* function, scaled by the element size. This includes the not found
61
break;
62
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const bext_fns[4] = {
62
- case NEON_3R_FLOAT_CMP:
63
gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
63
- if (!u && size) {
64
gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
64
- /* no encoding for U=0 C=1x */
65
};
65
- return 1;
66
-TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
66
- }
67
- bext_fns[a->esz], a, 0)
67
- break;
68
+TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
68
- case NEON_3R_FLOAT_ACMP:
69
+ bext_fns[a->esz], a, 0)
69
- if (!u) {
70
70
- return 1;
71
static gen_helper_gvec_3 * const bdep_fns[4] = {
71
- }
72
gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
72
- break;
73
gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
73
case NEON_3R_FLOAT_MISC:
74
};
74
/* VMAXNM/VMINNM in ARMv8 */
75
-TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
75
if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
76
- bdep_fns[a->esz], a, 0)
76
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
77
+TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
77
tmp = neon_load_reg(rn, pass);
78
+ bdep_fns[a->esz], a, 0)
78
tmp2 = neon_load_reg(rm, pass);
79
79
switch (op) {
80
static gen_helper_gvec_3 * const bgrp_fns[4] = {
80
- case NEON_3R_FLOAT_CMP:
81
gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
81
- {
82
gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
82
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
83
};
83
- if (!u) {
84
-TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
84
- gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
85
- bgrp_fns[a->esz], a, 0)
85
- } else {
86
+TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
86
- if (size == 0) {
87
+ bgrp_fns[a->esz], a, 0)
87
- gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
88
88
- } else {
89
static gen_helper_gvec_3 * const cadd_fns[4] = {
89
- gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
90
gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
90
- }
91
- }
92
- tcg_temp_free_ptr(fpstatus);
93
- break;
94
- }
95
- case NEON_3R_FLOAT_ACMP:
96
- {
97
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
98
- if (size == 0) {
99
- gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
100
- } else {
101
- gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
102
- }
103
- tcg_temp_free_ptr(fpstatus);
104
- break;
105
- }
106
case NEON_3R_FLOAT_MINMAX:
107
{
108
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
109
--
91
--
110
2.20.1
92
2.25.1
111
112
diff view generated by jsdifflib
1
From: Dongjiu Geng <gengdongjiu@huawei.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The little end UUID is used in many places, so make
3
Mark these as a non-streaming instructions, which should trap
4
NVDIMM_UUID_LE to a common macro to convert the UUID
4
if full a64 support is not enabled in streaming mode.
5
to a little end array.
6
5
7
Reviewed-by: Xiang Zheng <zhengxiang9@huawei.com>
8
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
9
Message-id: 20200512030609.19593-2-gengdongjiu@huawei.com
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-8-richard.henderson@linaro.org
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
10
---
13
include/qemu/uuid.h | 27 +++++++++++++++++++++++++++
11
target/arm/sme-fa64.decode | 2 --
14
hw/acpi/nvdimm.c | 10 +++-------
12
target/arm/translate-sve.c | 24 +++++++++++++++---------
15
2 files changed, 30 insertions(+), 7 deletions(-)
13
2 files changed, 15 insertions(+), 11 deletions(-)
16
14
17
diff --git a/include/qemu/uuid.h b/include/qemu/uuid.h
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
18
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
19
--- a/include/qemu/uuid.h
17
--- a/target/arm/sme-fa64.decode
20
+++ b/include/qemu/uuid.h
18
+++ b/target/arm/sme-fa64.decode
21
@@ -XXX,XX +XXX,XX @@ typedef struct {
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
24
-FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
25
FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
26
FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
27
FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
28
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/translate-sve.c
31
+++ b/target/arm/translate-sve.c
32
@@ -XXX,XX +XXX,XX @@ static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
33
gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
34
NULL, gen_helper_sve2_pmull_d,
22
};
35
};
23
} QemuUUID;
36
- if (a->esz == 0
24
37
- ? !dc_isar_feature(aa64_sve2_pmull128, s)
25
+/**
38
- : !dc_isar_feature(aa64_sve, s)) {
26
+ * UUID_LE - converts the fields of UUID to little-endian array,
27
+ * each of parameters is the filed of UUID.
28
+ *
29
+ * @time_low: The low field of the timestamp
30
+ * @time_mid: The middle field of the timestamp
31
+ * @time_hi_and_version: The high field of the timestamp
32
+ * multiplexed with the version number
33
+ * @clock_seq_hi_and_reserved: The high field of the clock
34
+ * sequence multiplexed with the variant
35
+ * @clock_seq_low: The low field of the clock sequence
36
+ * @node0: The spatially unique node0 identifier
37
+ * @node1: The spatially unique node1 identifier
38
+ * @node2: The spatially unique node2 identifier
39
+ * @node3: The spatially unique node3 identifier
40
+ * @node4: The spatially unique node4 identifier
41
+ * @node5: The spatially unique node5 identifier
42
+ */
43
+#define UUID_LE(time_low, time_mid, time_hi_and_version, \
44
+ clock_seq_hi_and_reserved, clock_seq_low, node0, node1, node2, \
45
+ node3, node4, node5) \
46
+ { (time_low) & 0xff, ((time_low) >> 8) & 0xff, ((time_low) >> 16) & 0xff, \
47
+ ((time_low) >> 24) & 0xff, (time_mid) & 0xff, ((time_mid) >> 8) & 0xff, \
48
+ (time_hi_and_version) & 0xff, ((time_hi_and_version) >> 8) & 0xff, \
49
+ (clock_seq_hi_and_reserved), (clock_seq_low), (node0), (node1), (node2),\
50
+ (node3), (node4), (node5) }
51
+
39
+
52
#define UUID_FMT "%02hhx%02hhx%02hhx%02hhx-" \
40
+ if (a->esz == 0) {
53
"%02hhx%02hhx-%02hhx%02hhx-" \
41
+ if (!dc_isar_feature(aa64_sve2_pmull128, s)) {
54
"%02hhx%02hhx-" \
42
+ return false;
55
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
43
+ }
56
index XXXXXXX..XXXXXXX 100644
44
+ s->is_nonstreaming = true;
57
--- a/hw/acpi/nvdimm.c
45
+ } else if (!dc_isar_feature(aa64_sve, s)) {
58
+++ b/hw/acpi/nvdimm.c
46
return false;
59
@@ -XXX,XX +XXX,XX @@
47
}
48
return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
49
@@ -XXX,XX +XXX,XX @@ DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
50
* SVE Integer Multiply-Add (unpredicated)
60
*/
51
*/
61
52
62
#include "qemu/osdep.h"
53
-TRANS_FEAT(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_s,
63
+#include "qemu/uuid.h"
54
- a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
64
#include "hw/acpi/acpi.h"
55
-TRANS_FEAT(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_d,
65
#include "hw/acpi/aml-build.h"
56
- a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
66
#include "hw/acpi/bios-linker-loader.h"
57
+TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz,
67
@@ -XXX,XX +XXX,XX @@
58
+ gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra,
68
#include "hw/mem/nvdimm.h"
59
+ 0, FPST_FPCR)
69
#include "qemu/nvdimm-utils.h"
60
+TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz,
70
61
+ gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra,
71
-#define NVDIMM_UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
62
+ 0, FPST_FPCR)
72
- { (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
63
73
- (b) & 0xff, ((b) >> 8) & 0xff, (c) & 0xff, ((c) >> 8) & 0xff, \
64
static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
74
- (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }
65
NULL, gen_helper_sve2_sqdmlal_zzzw_h,
75
-
66
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
76
/*
67
TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
77
* define Byte Addressable Persistent Memory (PM) Region according to
68
gen_helper_gvec_bfdot_idx, a)
78
* ACPI 6.0: 5.2.25.1 System Physical Address Range Structure.
69
79
*/
70
-TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
80
static const uint8_t nvdimm_nfit_spa_uuid[] =
71
- gen_helper_gvec_bfmmla, a, 0)
81
- NVDIMM_UUID_LE(0x66f0d379, 0xb4f3, 0x4074, 0xac, 0x43, 0x0d, 0x33,
72
+TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
82
- 0x18, 0xb7, 0x8c, 0xdb);
73
+ gen_helper_gvec_bfmmla, a, 0)
83
+ UUID_LE(0x66f0d379, 0xb4f3, 0x4074, 0xac, 0x43, 0x0d, 0x33,
74
84
+ 0x18, 0xb7, 0x8c, 0xdb);
75
static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
85
76
{
86
/*
87
* NVDIMM Firmware Interface Table
88
--
77
--
89
2.20.1
78
2.25.1
90
91
diff view generated by jsdifflib
1
From: Dongjiu Geng <gengdongjiu@huawei.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This patch builds Hardware Error Source Table(HEST) via fw_cfg blobs.
3
Mark these as a non-streaming instructions, which should trap
4
Now it only supports ARMv8 SEA, a type of Generic Hardware Error
4
if full a64 support is not enabled in streaming mode.
5
Source version 2(GHESv2) error source. Afterwards, we can extend
6
the supported types if needed. For the CPER section, currently it
7
is memory section because kernel mainly wants userspace to handle
8
the memory errors.
9
5
10
This patch follows the spec ACPI 6.2 to build the Hardware Error
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Source table. For more detailed information, please refer to
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
document: docs/specs/acpi_hest_ghes.rst
8
Message-id: 20220708151540.18136-9-richard.henderson@linaro.org
13
14
build_ghes_hw_error_notification() helper will help to add Hardware
15
Error Notification to ACPI tables without using packed C structures
16
and avoid endianness issues as API doesn't need explicit conversion.
17
18
Signed-off-by: Xiang Zheng <zhengxiang9@huawei.com>
19
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
20
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
21
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
22
Message-id: 20200512030609.19593-6-gengdongjiu@huawei.com
23
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
24
---
10
---
25
include/hw/acpi/ghes.h | 39 ++++++++++++
11
target/arm/sme-fa64.decode | 3 ---
26
hw/acpi/ghes.c | 126 +++++++++++++++++++++++++++++++++++++++
12
target/arm/translate-sve.c | 15 +++++++++++----
27
hw/arm/virt-acpi-build.c | 2 +
13
2 files changed, 11 insertions(+), 7 deletions(-)
28
3 files changed, 167 insertions(+)
29
14
30
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
31
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
32
--- a/include/hw/acpi/ghes.h
17
--- a/target/arm/sme-fa64.decode
33
+++ b/include/hw/acpi/ghes.h
18
+++ b/target/arm/sme-fa64.decode
34
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
35
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
36
#include "hw/acpi/bios-linker-loader.h"
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
37
22
38
+/*
23
-FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
39
+ * Values for Hardware Error Notification Type field
24
-FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
40
+ */
25
-FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
41
+enum AcpiGhesNotifyType {
26
FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
42
+ /* Polled */
27
FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
43
+ ACPI_GHES_NOTIFY_POLLED = 0,
28
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
44
+ /* External Interrupt */
29
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
45
+ ACPI_GHES_NOTIFY_EXTERNAL = 1,
30
index XXXXXXX..XXXXXXX 100644
46
+ /* Local Interrupt */
31
--- a/target/arm/translate-sve.c
47
+ ACPI_GHES_NOTIFY_LOCAL = 2,
32
+++ b/target/arm/translate-sve.c
48
+ /* SCI */
33
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
49
+ ACPI_GHES_NOTIFY_SCI = 3,
34
NULL, gen_helper_sve_ftmad_h,
50
+ /* NMI */
35
gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
51
+ ACPI_GHES_NOTIFY_NMI = 4,
36
};
52
+ /* CMCI, ACPI 5.0: 18.3.2.7, Table 18-290 */
37
-TRANS_FEAT(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
53
+ ACPI_GHES_NOTIFY_CMCI = 5,
38
- ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
54
+ /* MCE, ACPI 5.0: 18.3.2.7, Table 18-290 */
39
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
55
+ ACPI_GHES_NOTIFY_MCE = 6,
40
+TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
56
+ /* GPIO-Signal, ACPI 6.0: 18.3.2.7, Table 18-332 */
41
+ ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
57
+ ACPI_GHES_NOTIFY_GPIO = 7,
42
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
58
+ /* ARMv8 SEA, ACPI 6.1: 18.3.2.9, Table 18-345 */
43
59
+ ACPI_GHES_NOTIFY_SEA = 8,
44
/*
60
+ /* ARMv8 SEI, ACPI 6.1: 18.3.2.9, Table 18-345 */
45
*** SVE Floating Point Accumulating Reduction Group
61
+ ACPI_GHES_NOTIFY_SEI = 9,
46
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
62
+ /* External Interrupt - GSIV, ACPI 6.1: 18.3.2.9, Table 18-345 */
47
if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
63
+ ACPI_GHES_NOTIFY_GSIV = 10,
48
return false;
64
+ /* Software Delegated Exception, ACPI 6.2: 18.3.2.9, Table 18-383 */
49
}
65
+ ACPI_GHES_NOTIFY_SDEI = 11,
50
+ s->is_nonstreaming = true;
66
+ /* 12 and greater are reserved */
51
if (!sve_access_check(s)) {
67
+ ACPI_GHES_NOTIFY_RESERVED = 12
52
return true;
53
}
54
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
55
DO_FP3(FADD_zzz, fadd)
56
DO_FP3(FSUB_zzz, fsub)
57
DO_FP3(FMUL_zzz, fmul)
58
-DO_FP3(FTSMUL, ftsmul)
59
DO_FP3(FRECPS, recps)
60
DO_FP3(FRSQRTS, rsqrts)
61
62
#undef DO_FP3
63
64
+static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = {
65
+ NULL, gen_helper_gvec_ftsmul_h,
66
+ gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d
68
+};
67
+};
69
+
68
+TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
70
+enum {
69
+ ftsmul_fns[a->esz], a, 0)
71
+ ACPI_HEST_SRC_ID_SEA = 0,
72
+ /* future ids go here */
73
+ ACPI_HEST_SRC_ID_RESERVED,
74
+};
75
+
76
void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker);
77
+void acpi_build_hest(GArray *table_data, BIOSLinker *linker);
78
#endif
79
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
80
index XXXXXXX..XXXXXXX 100644
81
--- a/hw/acpi/ghes.c
82
+++ b/hw/acpi/ghes.c
83
@@ -XXX,XX +XXX,XX @@
84
#include "qemu/units.h"
85
#include "hw/acpi/ghes.h"
86
#include "hw/acpi/aml-build.h"
87
+#include "qemu/error-report.h"
88
89
#define ACPI_GHES_ERRORS_FW_CFG_FILE "etc/hardware_errors"
90
#define ACPI_GHES_DATA_ADDR_FW_CFG_FILE "etc/hardware_errors_addr"
91
@@ -XXX,XX +XXX,XX @@
92
/* Now only support ARMv8 SEA notification type error source */
93
#define ACPI_GHES_ERROR_SOURCE_COUNT 1
94
95
+/* Generic Hardware Error Source version 2 */
96
+#define ACPI_GHES_SOURCE_GENERIC_ERROR_V2 10
97
+
98
+/* Address offset in Generic Address Structure(GAS) */
99
+#define GAS_ADDR_OFFSET 4
100
+
101
+/*
102
+ * Hardware Error Notification
103
+ * ACPI 4.0: 17.3.2.7 Hardware Error Notification
104
+ * Composes dummy Hardware Error Notification descriptor of specified type
105
+ */
106
+static void build_ghes_hw_error_notification(GArray *table, const uint8_t type)
107
+{
108
+ /* Type */
109
+ build_append_int_noprefix(table, type, 1);
110
+ /*
111
+ * Length:
112
+ * Total length of the structure in bytes
113
+ */
114
+ build_append_int_noprefix(table, 28, 1);
115
+ /* Configuration Write Enable */
116
+ build_append_int_noprefix(table, 0, 2);
117
+ /* Poll Interval */
118
+ build_append_int_noprefix(table, 0, 4);
119
+ /* Vector */
120
+ build_append_int_noprefix(table, 0, 4);
121
+ /* Switch To Polling Threshold Value */
122
+ build_append_int_noprefix(table, 0, 4);
123
+ /* Switch To Polling Threshold Window */
124
+ build_append_int_noprefix(table, 0, 4);
125
+ /* Error Threshold Value */
126
+ build_append_int_noprefix(table, 0, 4);
127
+ /* Error Threshold Window */
128
+ build_append_int_noprefix(table, 0, 4);
129
+}
130
+
70
+
131
/*
71
/*
132
* Build table for the hardware error fw_cfg blob.
72
*** SVE Floating Point Arithmetic - Predicated Group
133
* Initialize "etc/hardware_errors" and "etc/hardware_errors_addr" fw_cfg blobs.
73
*/
134
@@ -XXX,XX +XXX,XX @@ void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker)
135
bios_linker_loader_write_pointer(linker, ACPI_GHES_DATA_ADDR_FW_CFG_FILE,
136
0, sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE, 0);
137
}
138
+
139
+/* Build Generic Hardware Error Source version 2 (GHESv2) */
140
+static void build_ghes_v2(GArray *table_data, int source_id, BIOSLinker *linker)
141
+{
142
+ uint64_t address_offset;
143
+ /*
144
+ * Type:
145
+ * Generic Hardware Error Source version 2(GHESv2 - Type 10)
146
+ */
147
+ build_append_int_noprefix(table_data, ACPI_GHES_SOURCE_GENERIC_ERROR_V2, 2);
148
+ /* Source Id */
149
+ build_append_int_noprefix(table_data, source_id, 2);
150
+ /* Related Source Id */
151
+ build_append_int_noprefix(table_data, 0xffff, 2);
152
+ /* Flags */
153
+ build_append_int_noprefix(table_data, 0, 1);
154
+ /* Enabled */
155
+ build_append_int_noprefix(table_data, 1, 1);
156
+
157
+ /* Number of Records To Pre-allocate */
158
+ build_append_int_noprefix(table_data, 1, 4);
159
+ /* Max Sections Per Record */
160
+ build_append_int_noprefix(table_data, 1, 4);
161
+ /* Max Raw Data Length */
162
+ build_append_int_noprefix(table_data, ACPI_GHES_MAX_RAW_DATA_LENGTH, 4);
163
+
164
+ address_offset = table_data->len;
165
+ /* Error Status Address */
166
+ build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 0x40, 0,
167
+ 4 /* QWord access */, 0);
168
+ bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE,
169
+ address_offset + GAS_ADDR_OFFSET, sizeof(uint64_t),
170
+ ACPI_GHES_ERRORS_FW_CFG_FILE, source_id * sizeof(uint64_t));
171
+
172
+ switch (source_id) {
173
+ case ACPI_HEST_SRC_ID_SEA:
174
+ /*
175
+ * Notification Structure
176
+ * Now only enable ARMv8 SEA notification type
177
+ */
178
+ build_ghes_hw_error_notification(table_data, ACPI_GHES_NOTIFY_SEA);
179
+ break;
180
+ default:
181
+ error_report("Not support this error source");
182
+ abort();
183
+ }
184
+
185
+ /* Error Status Block Length */
186
+ build_append_int_noprefix(table_data, ACPI_GHES_MAX_RAW_DATA_LENGTH, 4);
187
+
188
+ /*
189
+ * Read Ack Register
190
+ * ACPI 6.1: 18.3.2.8 Generic Hardware Error Source
191
+ * version 2 (GHESv2 - Type 10)
192
+ */
193
+ address_offset = table_data->len;
194
+ build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 0x40, 0,
195
+ 4 /* QWord access */, 0);
196
+ bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE,
197
+ address_offset + GAS_ADDR_OFFSET,
198
+ sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE,
199
+ (ACPI_GHES_ERROR_SOURCE_COUNT + source_id) * sizeof(uint64_t));
200
+
201
+ /*
202
+ * Read Ack Preserve field
203
+ * We only provide the first bit in Read Ack Register to OSPM to write
204
+ * while the other bits are preserved.
205
+ */
206
+ build_append_int_noprefix(table_data, ~0x1ULL, 8);
207
+ /* Read Ack Write */
208
+ build_append_int_noprefix(table_data, 0x1, 8);
209
+}
210
+
211
+/* Build Hardware Error Source Table */
212
+void acpi_build_hest(GArray *table_data, BIOSLinker *linker)
213
+{
214
+ uint64_t hest_start = table_data->len;
215
+
216
+ /* Hardware Error Source Table header*/
217
+ acpi_data_push(table_data, sizeof(AcpiTableHeader));
218
+
219
+ /* Error Source Count */
220
+ build_append_int_noprefix(table_data, ACPI_GHES_ERROR_SOURCE_COUNT, 4);
221
+
222
+ build_ghes_v2(table_data, ACPI_HEST_SRC_ID_SEA, linker);
223
+
224
+ build_header(linker, table_data, (void *)(table_data->data + hest_start),
225
+ "HEST", table_data->len - hest_start, 1, NULL, NULL);
226
+}
227
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
228
index XXXXXXX..XXXXXXX 100644
229
--- a/hw/arm/virt-acpi-build.c
230
+++ b/hw/arm/virt-acpi-build.c
231
@@ -XXX,XX +XXX,XX @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
232
233
if (vms->ras) {
234
build_ghes_error_table(tables->hardware_errors, tables->linker);
235
+ acpi_add_table(table_offsets, tables_blob);
236
+ acpi_build_hest(tables_blob, tables->linker);
237
}
238
239
if (ms->numa_state->num_nodes > 0) {
240
--
74
--
241
2.20.1
75
2.25.1
242
243
diff view generated by jsdifflib
1
Convert the Neon integer VPADD 3-reg-same insns to decodetree. These
1
From: Richard Henderson <richard.henderson@linaro.org>
2
are 'pairwise' operations. (Note that VQRDMLAH, which shares the
3
same primary opcode but has U=1, has already been converted.)
4
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-10-richard.henderson@linaro.org
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20200512163904.10918-10-peter.maydell@linaro.org
8
---
10
---
9
target/arm/neon-dp.decode | 2 ++
11
target/arm/sme-fa64.decode | 1 -
10
target/arm/translate-neon.inc.c | 2 ++
12
target/arm/translate-sve.c | 12 ++++++------
11
target/arm/translate.c | 19 +------------------
13
2 files changed, 6 insertions(+), 7 deletions(-)
12
3 files changed, 5 insertions(+), 18 deletions(-)
13
14
14
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
15
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/neon-dp.decode
17
--- a/target/arm/sme-fa64.decode
17
+++ b/target/arm/neon-dp.decode
18
+++ b/target/arm/sme-fa64.decode
18
@@ -XXX,XX +XXX,XX @@ VPMAX_U_3s 1111 001 1 0 . .. .... .... 1010 . . . 0 .... @3same_q0
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
19
VPMIN_S_3s 1111 001 0 0 . .. .... .... 1010 . . . 1 .... @3same_q0
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
20
VPMIN_U_3s 1111 001 1 0 . .. .... .... 1010 . . . 1 .... @3same_q0
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
21
22
22
+VPADD_3s 1111 001 0 0 . .. .... .... 1011 . . . 1 .... @3same_q0
23
-FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
23
+
24
FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
24
VQRDMLAH_3s 1111 001 1 0 . .. .... .... 1011 ... 1 .... @3same
25
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
25
26
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
26
SHA1_3s 1111 001 0 0 . optype:2 .... .... 1100 . 1 . 0 .... \
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
27
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
28
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-neon.inc.c
29
--- a/target/arm/translate-sve.c
30
+++ b/target/arm/translate-neon.inc.c
30
+++ b/target/arm/translate-sve.c
31
@@ -XXX,XX +XXX,XX @@ static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
31
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true)
32
#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
32
TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false)
33
#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
33
TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true)
34
#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
34
35
+#define gen_helper_neon_padd_u32 tcg_gen_add_i32
35
-TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
36
36
- gen_helper_gvec_smmla_b, a, 0)
37
DO_3SAME_PAIR(VPMAX_S, pmax_s)
37
-TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
38
DO_3SAME_PAIR(VPMIN_S, pmin_s)
38
- gen_helper_gvec_usmmla_b, a, 0)
39
DO_3SAME_PAIR(VPMAX_U, pmax_u)
39
-TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
40
DO_3SAME_PAIR(VPMIN_U, pmin_u)
40
- gen_helper_gvec_ummla_b, a, 0)
41
+DO_3SAME_PAIR(VPADD, padd_u)
41
+TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
42
diff --git a/target/arm/translate.c b/target/arm/translate.c
42
+ gen_helper_gvec_smmla_b, a, 0)
43
index XXXXXXX..XXXXXXX 100644
43
+TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
44
--- a/target/arm/translate.c
44
+ gen_helper_gvec_usmmla_b, a, 0)
45
+++ b/target/arm/translate.c
45
+TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
46
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
46
+ gen_helper_gvec_ummla_b, a, 0)
47
return 1;
47
48
}
48
TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
49
switch (op) {
49
gen_helper_gvec_bfdot, a, 0)
50
- case NEON_3R_VPADD_VQRDMLAH:
51
- if (!u) {
52
- break; /* VPADD */
53
- }
54
- /* VQRDMLAH : handled by decodetree */
55
- return 1;
56
-
57
case NEON_3R_VFM_VQRDMLSH:
58
if (!u) {
59
/* VFM, VFMS */
60
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
61
case NEON_3R_VQRSHL:
62
case NEON_3R_VPMAX:
63
case NEON_3R_VPMIN:
64
+ case NEON_3R_VPADD_VQRDMLAH:
65
/* Already handled by decodetree */
66
return 1;
67
}
68
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
69
}
70
pairwise = 0;
71
switch (op) {
72
- case NEON_3R_VPADD_VQRDMLAH:
73
- pairwise = 1;
74
- break;
75
case NEON_3R_FLOAT_ARITH:
76
pairwise = (u && size < 2); /* if VPADD (float) */
77
break;
78
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
79
}
80
}
81
break;
82
- case NEON_3R_VPADD_VQRDMLAH:
83
- switch (size) {
84
- case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
85
- case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
86
- case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
87
- default: abort();
88
- }
89
- break;
90
case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
91
{
92
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
93
--
50
--
94
2.20.1
51
2.25.1
95
96
diff view generated by jsdifflib
1
Convert the VQSHL, VRSHL and VQRSHL insns in the 3-reg-same
1
From: Richard Henderson <richard.henderson@linaro.org>
2
group to decodetree. We have already implemented the size==0b11
3
case of these insns; this commit handles the remaining sizes.
4
2
3
Mark these as non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-11-richard.henderson@linaro.org
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20200512163904.10918-8-peter.maydell@linaro.org
8
---
10
---
9
target/arm/neon-dp.decode | 30 ++++++++++++++++++-----
11
target/arm/sme-fa64.decode | 1 -
10
target/arm/translate-neon.inc.c | 43 +++++++++++++++++++++++++++++++++
12
target/arm/translate-sve.c | 35 ++++++++++++++++++-----------------
11
target/arm/translate.c | 22 +++--------------
13
2 files changed, 18 insertions(+), 18 deletions(-)
12
3 files changed, 70 insertions(+), 25 deletions(-)
13
14
14
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
15
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/neon-dp.decode
17
--- a/target/arm/sme-fa64.decode
17
+++ b/target/arm/neon-dp.decode
18
+++ b/target/arm/sme-fa64.decode
18
@@ -XXX,XX +XXX,XX @@ VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same_rev
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
19
@3same_64_rev .... ... . . . 11 .... .... .... . q:1 . . .... \
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
20
&3same vm=%vn_dp vn=%vm_dp vd=%vd_dp size=3
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
21
22
22
-VQSHL_S64_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev
23
-FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
23
-VQSHL_U64_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev
24
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
24
-VRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev
25
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
25
-VRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev
26
FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
26
-VQRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
27
-VQRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev
28
+{
29
+ VQSHL_S64_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev
30
+ VQSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_rev
31
+}
32
+{
33
+ VQSHL_U64_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev
34
+ VQSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_rev
35
+}
36
+{
37
+ VRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev
38
+ VRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_rev
39
+}
40
+{
41
+ VRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev
42
+ VRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_rev
43
+}
44
+{
45
+ VQRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev
46
+ VQRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_rev
47
+}
48
+{
49
+ VQRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev
50
+ VQRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_rev
51
+}
52
53
VMAX_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 0 .... @3same
54
VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same
55
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
56
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
57
--- a/target/arm/translate-neon.inc.c
29
--- a/target/arm/translate-sve.c
58
+++ b/target/arm/translate-neon.inc.c
30
+++ b/target/arm/translate-sve.c
59
@@ -XXX,XX +XXX,XX @@ DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
31
@@ -XXX,XX +XXX,XX @@ DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
60
return do_3same(s, a, gen_##INSN##_3s); \
32
static gen_helper_gvec_flags_4 * const match_fns[4] = {
61
}
33
gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
62
34
};
63
+/*
35
-TRANS_FEAT(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
64
+ * Some helper functions need to be passed the cpu_env. In order
36
+TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
65
+ * to use those with the gvec APIs like tcg_gen_gvec_3() we need
37
66
+ * to create wrapper functions whose prototype is a NeonGenTwoOpFn()
38
static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
67
+ * and which call a NeonGenTwoOpEnvFn().
39
gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
68
+ */
40
};
69
+#define WRAP_ENV_FN(WRAPNAME, FUNC) \
41
-TRANS_FEAT(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
70
+ static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \
42
+TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
71
+ { \
43
72
+ FUNC(d, cpu_env, n, m); \
44
static gen_helper_gvec_4 * const histcnt_fns[4] = {
73
+ }
45
NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
74
+
46
};
75
+#define DO_3SAME_32_ENV(INSN, FUNC) \
47
-TRANS_FEAT(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
76
+ WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \
48
- histcnt_fns[a->esz], a, 0)
77
+ WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \
49
+TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
78
+ WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \
50
+ histcnt_fns[a->esz], a, 0)
79
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
51
80
+ uint32_t rn_ofs, uint32_t rm_ofs, \
52
-TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
81
+ uint32_t oprsz, uint32_t maxsz) \
53
- a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
82
+ { \
54
+TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
83
+ static const GVecGen3 ops[4] = { \
55
+ a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
84
+ { .fni4 = gen_##INSN##_tramp8 }, \
56
85
+ { .fni4 = gen_##INSN##_tramp16 }, \
57
DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz)
86
+ { .fni4 = gen_##INSN##_tramp32 }, \
58
DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz)
87
+ { 0 }, \
59
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
88
+ }; \
60
TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
89
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
61
a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
90
+ } \
62
91
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
63
-TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
92
+ { \
64
- gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
93
+ if (a->size > 2) { \
65
+TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
94
+ return false; \
66
+ gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
95
+ } \
67
96
+ return do_3same(s, a, gen_##INSN##_3s); \
68
-TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
97
+ }
69
- gen_helper_crypto_aese, a, false)
98
+
70
-TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
99
DO_3SAME_32(VHADD_S, hadd_s)
71
- gen_helper_crypto_aese, a, true)
100
DO_3SAME_32(VHADD_U, hadd_u)
72
+TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
101
DO_3SAME_32(VHSUB_S, hsub_s)
73
+ gen_helper_crypto_aese, a, false)
102
DO_3SAME_32(VHSUB_U, hsub_u)
74
+TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
103
DO_3SAME_32(VRHADD_S, rhadd_s)
75
+ gen_helper_crypto_aese, a, true)
104
DO_3SAME_32(VRHADD_U, rhadd_u)
76
105
+DO_3SAME_32(VRSHL_S, rshl_s)
77
-TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
106
+DO_3SAME_32(VRSHL_U, rshl_u)
78
- gen_helper_crypto_sm4e, a, 0)
107
+
79
-TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
108
+DO_3SAME_32_ENV(VQSHL_S, qshl_s)
80
- gen_helper_crypto_sm4ekey, a, 0)
109
+DO_3SAME_32_ENV(VQSHL_U, qshl_u)
81
+TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
110
+DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
82
+ gen_helper_crypto_sm4e, a, 0)
111
+DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
83
+TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
112
diff --git a/target/arm/translate.c b/target/arm/translate.c
84
+ gen_helper_crypto_sm4ekey, a, 0)
113
index XXXXXXX..XXXXXXX 100644
85
114
--- a/target/arm/translate.c
86
-TRANS_FEAT(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, gen_gvec_rax1, a)
115
+++ b/target/arm/translate.c
87
+TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz,
116
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
88
+ gen_gvec_rax1, a)
117
case NEON_3R_VHSUB:
89
118
case NEON_3R_VABD:
90
TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
119
case NEON_3R_VABA:
91
gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
120
+ case NEON_3R_VQSHL:
121
+ case NEON_3R_VRSHL:
122
+ case NEON_3R_VQRSHL:
123
/* Already handled by decodetree */
124
return 1;
125
}
126
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
127
}
128
pairwise = 0;
129
switch (op) {
130
- case NEON_3R_VQSHL:
131
- case NEON_3R_VRSHL:
132
- case NEON_3R_VQRSHL:
133
- {
134
- int rtmp;
135
- /* Shift instruction operands are reversed. */
136
- rtmp = rn;
137
- rn = rm;
138
- rm = rtmp;
139
- }
140
- break;
141
case NEON_3R_VPADD_VQRDMLAH:
142
case NEON_3R_VPMAX:
143
case NEON_3R_VPMIN:
144
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
145
tmp2 = neon_load_reg(rm, pass);
146
}
147
switch (op) {
148
- case NEON_3R_VQSHL:
149
- GEN_NEON_INTEGER_OP_ENV(qshl);
150
- break;
151
- case NEON_3R_VRSHL:
152
- GEN_NEON_INTEGER_OP(rshl);
153
- break;
154
- case NEON_3R_VQRSHL:
155
- GEN_NEON_INTEGER_OP_ENV(qrshl);
156
break;
157
case NEON_3R_VPMAX:
158
GEN_NEON_INTEGER_OP(pmax);
159
--
92
--
160
2.20.1
93
2.25.1
161
162
diff view generated by jsdifflib
1
Convert the Neon VRHADD and VHSUB 3-reg-same insns to decodetree.
1
From: Richard Henderson <richard.henderson@linaro.org>
2
(These are all the other insns in 3-reg-same which were using
3
GEN_NEON_INTEGER_OP() and which are not pairwise or
4
reversed-operands.)
5
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-12-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20200512163904.10918-7-peter.maydell@linaro.org
9
---
10
---
10
target/arm/neon-dp.decode | 6 ++++++
11
target/arm/sme-fa64.decode | 9 ---------
11
target/arm/translate-neon.inc.c | 4 ++++
12
target/arm/translate-sve.c | 6 ++++++
12
target/arm/translate.c | 8 ++------
13
2 files changed, 6 insertions(+), 9 deletions(-)
13
3 files changed, 12 insertions(+), 6 deletions(-)
14
14
15
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/neon-dp.decode
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/neon-dp.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ VHADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 0 .... @3same
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
VQADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 1 .... @3same
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
VQADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 1 .... @3same
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
22
23
+VRHADD_S_3s 1111 001 0 0 . .. .... .... 0001 . . . 0 .... @3same
23
-FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
24
+VRHADD_U_3s 1111 001 1 0 . .. .... .... 0001 . . . 0 .... @3same
24
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
25
+
25
FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
26
@3same_logic .... ... . . . .. .... .... .... . q:1 .. .... \
26
-FAIL 1000 010- -01- ---- 1--- ---- ---- ---- # SVE 32-bit gather load (vector+imm)
27
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp size=0
27
-FAIL 1000 0100 0-0- ---- 0--- ---- ---- ---- # SVE 32-bit gather load byte (scalar+vector)
28
28
-FAIL 1000 0100 1--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load half (scalar+vector)
29
@@ -XXX,XX +XXX,XX @@ VBSL_3s 1111 001 1 0 . 01 .... .... 0001 ... 1 .... @3same_logic
29
-FAIL 1000 0101 0--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load word (scalar+vector)
30
VBIT_3s 1111 001 1 0 . 10 .... .... 0001 ... 1 .... @3same_logic
30
FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
31
VBIF_3s 1111 001 1 0 . 11 .... .... 0001 ... 1 .... @3same_logic
31
FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
32
32
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
33
+VHSUB_S_3s 1111 001 0 0 . .. .... .... 0010 . . . 0 .... @3same
33
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
34
+VHSUB_U_3s 1111 001 1 0 . .. .... .... 0010 . . . 0 .... @3same
34
FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
35
+
35
-FAIL 1110 010- -00- ---- 001- ---- ---- ---- # SVE2 64-bit scatter NT store (vector+scalar)
36
VQSUB_S_3s 1111 001 0 0 . .. .... .... 0010 . . . 1 .... @3same
36
-FAIL 1110 010- -10- ---- 001- ---- ---- ---- # SVE2 32-bit scatter NT store (vector+scalar)
37
VQSUB_U_3s 1111 001 1 0 . .. .... .... 0010 . . . 1 .... @3same
37
-FAIL 1110 010- ---- ---- 1-0- ---- ---- ---- # SVE scatter store (scalar+32-bit vector)
38
38
-FAIL 1110 010- ---- ---- 101- ---- ---- ---- # SVE scatter store (misc)
39
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
39
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
40
index XXXXXXX..XXXXXXX 100644
40
index XXXXXXX..XXXXXXX 100644
41
--- a/target/arm/translate-neon.inc.c
41
--- a/target/arm/translate-sve.c
42
+++ b/target/arm/translate-neon.inc.c
42
+++ b/target/arm/translate-sve.c
43
@@ -XXX,XX +XXX,XX @@ DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
43
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
44
44
if (!dc_isar_feature(aa64_sve, s)) {
45
DO_3SAME_32(VHADD_S, hadd_s)
45
return false;
46
DO_3SAME_32(VHADD_U, hadd_u)
46
}
47
+DO_3SAME_32(VHSUB_S, hsub_s)
47
+ s->is_nonstreaming = true;
48
+DO_3SAME_32(VHSUB_U, hsub_u)
48
if (!sve_access_check(s)) {
49
+DO_3SAME_32(VRHADD_S, rhadd_s)
49
return true;
50
+DO_3SAME_32(VRHADD_U, rhadd_u)
50
}
51
diff --git a/target/arm/translate.c b/target/arm/translate.c
51
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
52
index XXXXXXX..XXXXXXX 100644
52
if (!dc_isar_feature(aa64_sve, s)) {
53
--- a/target/arm/translate.c
53
return false;
54
+++ b/target/arm/translate.c
54
}
55
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
55
+ s->is_nonstreaming = true;
56
case NEON_3R_VSHL:
56
if (!sve_access_check(s)) {
57
case NEON_3R_SHA:
57
return true;
58
case NEON_3R_VHADD:
58
}
59
+ case NEON_3R_VRHADD:
59
@@ -XXX,XX +XXX,XX @@ static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
60
+ case NEON_3R_VHSUB:
60
if (!dc_isar_feature(aa64_sve2, s)) {
61
case NEON_3R_VABD:
61
return false;
62
case NEON_3R_VABA:
62
}
63
/* Already handled by decodetree */
63
+ s->is_nonstreaming = true;
64
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
64
if (!sve_access_check(s)) {
65
tmp2 = neon_load_reg(rm, pass);
65
return true;
66
}
66
}
67
switch (op) {
67
@@ -XXX,XX +XXX,XX @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
68
- case NEON_3R_VRHADD:
68
if (!dc_isar_feature(aa64_sve, s)) {
69
- GEN_NEON_INTEGER_OP(rhadd);
69
return false;
70
- break;
70
}
71
- case NEON_3R_VHSUB:
71
+ s->is_nonstreaming = true;
72
- GEN_NEON_INTEGER_OP(hsub);
72
if (!sve_access_check(s)) {
73
- break;
73
return true;
74
case NEON_3R_VQSHL:
74
}
75
GEN_NEON_INTEGER_OP_ENV(qshl);
75
@@ -XXX,XX +XXX,XX @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
76
break;
76
if (!dc_isar_feature(aa64_sve, s)) {
77
return false;
78
}
79
+ s->is_nonstreaming = true;
80
if (!sve_access_check(s)) {
81
return true;
82
}
83
@@ -XXX,XX +XXX,XX @@ static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
84
if (!dc_isar_feature(aa64_sve2, s)) {
85
return false;
86
}
87
+ s->is_nonstreaming = true;
88
if (!sve_access_check(s)) {
89
return true;
90
}
77
--
91
--
78
2.20.1
92
2.25.1
79
80
diff view generated by jsdifflib
1
From: Dongjiu Geng <gengdongjiu@huawei.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
RAS Virtualization feature is not supported now, so
3
Mark these as a non-streaming instructions, which should trap if full
4
add a RAS machine option and disable it by default.
4
a64 support is not enabled in streaming mode. In this case, introduce
5
PRF_ns (prefetch non-streaming) to handle the checks.
5
6
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Xiang Zheng <zhengxiang9@huawei.com>
9
Message-id: 20220708151540.18136-13-richard.henderson@linaro.org
9
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
10
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
11
Message-id: 20200512030609.19593-3-gengdongjiu@huawei.com
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
11
---
14
include/hw/arm/virt.h | 1 +
12
target/arm/sme-fa64.decode | 3 ---
15
hw/arm/virt.c | 23 +++++++++++++++++++++++
13
target/arm/sve.decode | 10 +++++-----
16
2 files changed, 24 insertions(+)
14
target/arm/translate-sve.c | 11 +++++++++++
15
3 files changed, 16 insertions(+), 8 deletions(-)
17
16
18
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
17
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
19
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
20
--- a/include/hw/arm/virt.h
19
--- a/target/arm/sme-fa64.decode
21
+++ b/include/hw/arm/virt.h
20
+++ b/target/arm/sme-fa64.decode
22
@@ -XXX,XX +XXX,XX @@ typedef struct {
21
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
23
bool highmem_ecam;
22
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
24
bool its;
23
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
25
bool virt;
24
26
+ bool ras;
25
-FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
27
OnOffAuto acpi;
26
-FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
28
VirtGICType gic_version;
27
FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
29
VirtIOMMUType iommu;
28
FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
30
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
29
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
30
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
31
-FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
32
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
31
index XXXXXXX..XXXXXXX 100644
33
index XXXXXXX..XXXXXXX 100644
32
--- a/hw/arm/virt.c
34
--- a/target/arm/sve.decode
33
+++ b/hw/arm/virt.c
35
+++ b/target/arm/sve.decode
34
@@ -XXX,XX +XXX,XX @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name,
36
@@ -XXX,XX +XXX,XX @@ LD1RO_zpri 1010010 .. 01 0.... 001 ... ..... ..... \
35
visit_type_OnOffAuto(v, name, &vms->acpi, errp);
37
@rpri_load_msz nreg=0
38
39
# SVE 32-bit gather prefetch (scalar plus 32-bit scaled offsets)
40
-PRF 1000010 00 -1 ----- 0-- --- ----- 0 ----
41
+PRF_ns 1000010 00 -1 ----- 0-- --- ----- 0 ----
42
43
# SVE 32-bit gather prefetch (vector plus immediate)
44
-PRF 1000010 -- 00 ----- 111 --- ----- 0 ----
45
+PRF_ns 1000010 -- 00 ----- 111 --- ----- 0 ----
46
47
# SVE contiguous prefetch (scalar plus immediate)
48
PRF 1000010 11 1- ----- 0-- --- ----- 0 ----
49
@@ -XXX,XX +XXX,XX @@ LD1_zpiz 1100010 .. 01 ..... 1.. ... ..... ..... \
50
@rpri_g_load esz=3
51
52
# SVE 64-bit gather prefetch (scalar plus 64-bit scaled offsets)
53
-PRF 1100010 00 11 ----- 1-- --- ----- 0 ----
54
+PRF_ns 1100010 00 11 ----- 1-- --- ----- 0 ----
55
56
# SVE 64-bit gather prefetch (scalar plus unpacked 32-bit scaled offsets)
57
-PRF 1100010 00 -1 ----- 0-- --- ----- 0 ----
58
+PRF_ns 1100010 00 -1 ----- 0-- --- ----- 0 ----
59
60
# SVE 64-bit gather prefetch (vector plus immediate)
61
-PRF 1100010 -- 00 ----- 111 --- ----- 0 ----
62
+PRF_ns 1100010 -- 00 ----- 111 --- ----- 0 ----
63
64
### SVE Memory Store Group
65
66
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/target/arm/translate-sve.c
69
+++ b/target/arm/translate-sve.c
70
@@ -XXX,XX +XXX,XX @@ static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
71
return true;
36
}
72
}
37
73
38
+static bool virt_get_ras(Object *obj, Error **errp)
74
+static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a)
39
+{
75
+{
40
+ VirtMachineState *vms = VIRT_MACHINE(obj);
76
+ if (!dc_isar_feature(aa64_sve, s)) {
41
+
77
+ return false;
42
+ return vms->ras;
78
+ }
79
+ /* Prefetch is a nop within QEMU. */
80
+ s->is_nonstreaming = true;
81
+ (void)sve_access_check(s);
82
+ return true;
43
+}
83
+}
44
+
84
+
45
+static void virt_set_ras(Object *obj, bool value, Error **errp)
85
/*
46
+{
86
* Move Prefix
47
+ VirtMachineState *vms = VIRT_MACHINE(obj);
87
*
48
+
49
+ vms->ras = value;
50
+}
51
+
52
static char *virt_get_gic_version(Object *obj, Error **errp)
53
{
54
VirtMachineState *vms = VIRT_MACHINE(obj);
55
@@ -XXX,XX +XXX,XX @@ static void virt_instance_init(Object *obj)
56
"Valid values are none and smmuv3",
57
NULL);
58
59
+ /* Default disallows RAS instantiation */
60
+ vms->ras = false;
61
+ object_property_add_bool(obj, "ras", virt_get_ras,
62
+ virt_set_ras, NULL);
63
+ object_property_set_description(obj, "ras",
64
+ "Set on/off to enable/disable reporting host memory errors "
65
+ "to a KVM guest using ACPI and guest external abort exceptions",
66
+ NULL);
67
+
68
vms->irqmap = a15irqmap;
69
70
virt_flash_create(vms);
71
--
88
--
72
2.20.1
89
2.25.1
73
74
diff view generated by jsdifflib
1
Convert the Neon VABA and VABD insns in the 3-reg-same group to
1
From: Richard Henderson <richard.henderson@linaro.org>
2
decodetree.
3
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-14-richard.henderson@linaro.org
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20200512163904.10918-6-peter.maydell@linaro.org
7
---
10
---
8
target/arm/neon-dp.decode | 6 ++++++
11
target/arm/sme-fa64.decode | 2 --
9
target/arm/translate-neon.inc.c | 4 ++++
12
target/arm/translate-sve.c | 2 ++
10
target/arm/translate.c | 22 ++--------------------
13
2 files changed, 2 insertions(+), 2 deletions(-)
11
3 files changed, 12 insertions(+), 20 deletions(-)
12
14
13
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
14
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/neon-dp.decode
17
--- a/target/arm/sme-fa64.decode
16
+++ b/target/arm/neon-dp.decode
18
+++ b/target/arm/sme-fa64.decode
17
@@ -XXX,XX +XXX,XX @@ VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
18
VMIN_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 1 .... @3same
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
19
VMIN_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 1 .... @3same
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
20
22
21
+VABD_S_3s 1111 001 0 0 . .. .... .... 0111 . . . 0 .... @3same
23
-FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
22
+VABD_U_3s 1111 001 1 0 . .. .... .... 0111 . . . 0 .... @3same
24
-FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
23
+
25
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
24
+VABA_S_3s 1111 001 0 0 . .. .... .... 0111 . . . 1 .... @3same
26
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
25
+VABA_U_3s 1111 001 1 0 . .. .... .... 0111 . . . 1 .... @3same
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
26
+
27
VADD_3s 1111 001 0 0 . .. .... .... 1000 . . . 0 .... @3same
28
VSUB_3s 1111 001 1 0 . .. .... .... 1000 . . . 0 .... @3same
29
30
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
31
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/translate-neon.inc.c
29
--- a/target/arm/translate-sve.c
33
+++ b/target/arm/translate-neon.inc.c
30
+++ b/target/arm/translate-sve.c
34
@@ -XXX,XX +XXX,XX @@ DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
31
@@ -XXX,XX +XXX,XX @@ static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
35
DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
32
if (!dc_isar_feature(aa64_sve, s)) {
36
DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
33
return false;
37
DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
34
}
38
+DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
35
+ s->is_nonstreaming = true;
39
+DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
36
if (sve_access_check(s)) {
40
+DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
37
TCGv_i64 addr = new_tmp_a64(s);
41
+DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
38
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
42
39
@@ -XXX,XX +XXX,XX @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
43
#define DO_3SAME_CMP(INSN, COND) \
40
if (!dc_isar_feature(aa64_sve, s)) {
44
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
41
return false;
45
diff --git a/target/arm/translate.c b/target/arm/translate.c
42
}
46
index XXXXXXX..XXXXXXX 100644
43
+ s->is_nonstreaming = true;
47
--- a/target/arm/translate.c
44
if (sve_access_check(s)) {
48
+++ b/target/arm/translate.c
45
int vsz = vec_full_reg_size(s);
49
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
46
int elements = vsz >> dtype_esz[a->dtype];
50
/* VQRDMLSH : handled by decodetree */
51
return 1;
52
53
- case NEON_3R_VABD:
54
- if (u) {
55
- gen_gvec_uabd(size, rd_ofs, rn_ofs, rm_ofs,
56
- vec_size, vec_size);
57
- } else {
58
- gen_gvec_sabd(size, rd_ofs, rn_ofs, rm_ofs,
59
- vec_size, vec_size);
60
- }
61
- return 0;
62
-
63
- case NEON_3R_VABA:
64
- if (u) {
65
- gen_gvec_uaba(size, rd_ofs, rn_ofs, rm_ofs,
66
- vec_size, vec_size);
67
- } else {
68
- gen_gvec_saba(size, rd_ofs, rn_ofs, rm_ofs,
69
- vec_size, vec_size);
70
- }
71
- return 0;
72
-
73
case NEON_3R_VADD_VSUB:
74
case NEON_3R_LOGIC:
75
case NEON_3R_VMAX:
76
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
77
case NEON_3R_VSHL:
78
case NEON_3R_SHA:
79
case NEON_3R_VHADD:
80
+ case NEON_3R_VABD:
81
+ case NEON_3R_VABA:
82
/* Already handled by decodetree */
83
return 1;
84
}
85
--
47
--
86
2.20.1
48
2.25.1
87
88
diff view generated by jsdifflib
1
Convert the Neon VHADD insns in the 3-reg-same group to decodetree.
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-15-richard.henderson@linaro.org
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20200512163904.10918-5-peter.maydell@linaro.org
6
---
10
---
7
target/arm/neon-dp.decode | 2 ++
11
target/arm/sme-fa64.decode | 3 ---
8
target/arm/translate-neon.inc.c | 24 ++++++++++++++++++++++++
12
target/arm/translate-sve.c | 2 ++
9
target/arm/translate.c | 4 +---
13
2 files changed, 2 insertions(+), 3 deletions(-)
10
3 files changed, 27 insertions(+), 3 deletions(-)
11
14
12
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
13
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/neon-dp.decode
17
--- a/target/arm/sme-fa64.decode
15
+++ b/target/arm/neon-dp.decode
18
+++ b/target/arm/sme-fa64.decode
16
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
17
@3same .... ... . . . size:2 .... .... .... . q:1 . . .... \
20
# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm)
18
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp
21
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
19
22
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
20
+VHADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 0 .... @3same
23
-
21
+VHADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 0 .... @3same
24
-FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
22
VQADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 1 .... @3same
25
-FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
23
VQADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 1 .... @3same
26
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
24
25
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
26
index XXXXXXX..XXXXXXX 100644
27
index XXXXXXX..XXXXXXX 100644
27
--- a/target/arm/translate-neon.inc.c
28
--- a/target/arm/translate-sve.c
28
+++ b/target/arm/translate-neon.inc.c
29
+++ b/target/arm/translate-sve.c
29
@@ -XXX,XX +XXX,XX @@ DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
30
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
30
DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
31
if (a->rm == 31) {
31
DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
32
return false;
32
DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
33
}
33
+
34
+ s->is_nonstreaming = true;
34
+#define DO_3SAME_32(INSN, FUNC) \
35
if (sve_access_check(s)) {
35
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
36
TCGv_i64 addr = new_tmp_a64(s);
36
+ uint32_t rn_ofs, uint32_t rm_ofs, \
37
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
37
+ uint32_t oprsz, uint32_t maxsz) \
38
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
38
+ { \
39
if (!dc_isar_feature(aa64_sve_f64mm, s)) {
39
+ static const GVecGen3 ops[4] = { \
40
return false;
40
+ { .fni4 = gen_helper_neon_##FUNC##8 }, \
41
}
41
+ { .fni4 = gen_helper_neon_##FUNC##16 }, \
42
+ s->is_nonstreaming = true;
42
+ { .fni4 = gen_helper_neon_##FUNC##32 }, \
43
if (sve_access_check(s)) {
43
+ { 0 }, \
44
TCGv_i64 addr = new_tmp_a64(s);
44
+ }; \
45
tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
45
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
46
+ } \
47
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
48
+ { \
49
+ if (a->size > 2) { \
50
+ return false; \
51
+ } \
52
+ return do_3same(s, a, gen_##INSN##_3s); \
53
+ }
54
+
55
+DO_3SAME_32(VHADD_S, hadd_s)
56
+DO_3SAME_32(VHADD_U, hadd_u)
57
diff --git a/target/arm/translate.c b/target/arm/translate.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/target/arm/translate.c
60
+++ b/target/arm/translate.c
61
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
62
case NEON_3R_VML:
63
case NEON_3R_VSHL:
64
case NEON_3R_SHA:
65
+ case NEON_3R_VHADD:
66
/* Already handled by decodetree */
67
return 1;
68
}
69
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
70
tmp2 = neon_load_reg(rm, pass);
71
}
72
switch (op) {
73
- case NEON_3R_VHADD:
74
- GEN_NEON_INTEGER_OP(hadd);
75
- break;
76
case NEON_3R_VRHADD:
77
GEN_NEON_INTEGER_OP(rhadd);
78
break;
79
--
46
--
80
2.20.1
47
2.25.1
81
82
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The functions eliminate duplication of the special cases for
3
These functions will be used to verify that the cpu
4
this operation. They match up with the GVecGen2iFn typedef.
4
is in the correct state for a given instruction.
5
6
Add out-of-line helpers. We got away with only having inline
7
expanders because the neon vector size is only 16 bytes, and
8
we know that the inline expansion will always succeed.
9
When we reuse this for SVE, tcg-gvec-op may decide to use an
10
out-of-line helper due to longer vector lengths.
11
5
12
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
Message-id: 20200513163245.17915-2-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-16-richard.henderson@linaro.org
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
---
10
---
17
target/arm/helper.h | 10 +++
11
target/arm/translate-a64.h | 21 +++++++++++++++++++++
18
target/arm/translate.h | 7 +-
12
target/arm/translate-a64.c | 34 ++++++++++++++++++++++++++++++++++
19
target/arm/translate-a64.c | 15 +---
13
2 files changed, 55 insertions(+)
20
target/arm/translate.c | 161 ++++++++++++++++++++++---------------
21
target/arm/vec_helper.c | 25 ++++++
22
5 files changed, 139 insertions(+), 79 deletions(-)
23
14
24
diff --git a/target/arm/helper.h b/target/arm/helper.h
15
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
25
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/helper.h
17
--- a/target/arm/translate-a64.h
27
+++ b/target/arm/helper.h
18
+++ b/target/arm/translate-a64.h
28
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_pmull_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
19
@@ -XXX,XX +XXX,XX @@ void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v);
29
20
bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
30
DEF_HELPER_FLAGS_4(neon_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
21
unsigned int imms, unsigned int immr);
31
22
bool sve_access_check(DisasContext *s);
32
+DEF_HELPER_FLAGS_3(gvec_ssra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
23
+bool sme_enabled_check(DisasContext *s);
33
+DEF_HELPER_FLAGS_3(gvec_ssra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
24
+bool sme_enabled_check_with_svcr(DisasContext *s, unsigned);
34
+DEF_HELPER_FLAGS_3(gvec_ssra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
35
+DEF_HELPER_FLAGS_3(gvec_ssra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
36
+
25
+
37
+DEF_HELPER_FLAGS_3(gvec_usra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
26
+/* This function corresponds to CheckStreamingSVEEnabled. */
38
+DEF_HELPER_FLAGS_3(gvec_usra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
27
+static inline bool sme_sm_enabled_check(DisasContext *s)
39
+DEF_HELPER_FLAGS_3(gvec_usra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
28
+{
40
+DEF_HELPER_FLAGS_3(gvec_usra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
29
+ return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK);
30
+}
41
+
31
+
42
#ifdef TARGET_AARCH64
32
+/* This function corresponds to CheckSMEAndZAEnabled. */
43
#include "helper-a64.h"
33
+static inline bool sme_za_enabled_check(DisasContext *s)
44
#include "helper-sve.h"
34
+{
45
diff --git a/target/arm/translate.h b/target/arm/translate.h
35
+ return sme_enabled_check_with_svcr(s, R_SVCR_ZA_MASK);
46
index XXXXXXX..XXXXXXX 100644
36
+}
47
--- a/target/arm/translate.h
48
+++ b/target/arm/translate.h
49
@@ -XXX,XX +XXX,XX @@ extern const GVecGen3 mls_op[4];
50
extern const GVecGen3 cmtst_op[4];
51
extern const GVecGen3 sshl_op[4];
52
extern const GVecGen3 ushl_op[4];
53
-extern const GVecGen2i ssra_op[4];
54
-extern const GVecGen2i usra_op[4];
55
extern const GVecGen2i sri_op[4];
56
extern const GVecGen2i sli_op[4];
57
extern const GVecGen4 uqadd_op[4];
58
@@ -XXX,XX +XXX,XX @@ void gen_sshl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
59
void gen_ushl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
60
void gen_sshl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
61
62
+void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
63
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
64
+void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
65
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
66
+
37
+
67
/*
38
+/* Note that this function corresponds to CheckStreamingSVEAndZAEnabled. */
68
* Forward to the isar_feature_* tests given a DisasContext pointer.
39
+static inline bool sme_smza_enabled_check(DisasContext *s)
69
*/
40
+{
41
+ return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK | R_SVCR_ZA_MASK);
42
+}
43
+
44
TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr);
45
TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
46
bool tag_checked, int log2_size);
70
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
47
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
71
index XXXXXXX..XXXXXXX 100644
48
index XXXXXXX..XXXXXXX 100644
72
--- a/target/arm/translate-a64.c
49
--- a/target/arm/translate-a64.c
73
+++ b/target/arm/translate-a64.c
50
+++ b/target/arm/translate-a64.c
74
@@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
51
@@ -XXX,XX +XXX,XX @@ static bool sme_access_check(DisasContext *s)
75
52
return true;
76
switch (opcode) {
77
case 0x02: /* SSRA / USRA (accumulate) */
78
- if (is_u) {
79
- /* Shift count same as element size produces zero to add. */
80
- if (shift == 8 << size) {
81
- goto done;
82
- }
83
- gen_gvec_op2i(s, is_q, rd, rn, shift, &usra_op[size]);
84
- } else {
85
- /* Shift count same as element size produces all sign to add. */
86
- if (shift == 8 << size) {
87
- shift -= 1;
88
- }
89
- gen_gvec_op2i(s, is_q, rd, rn, shift, &ssra_op[size]);
90
- }
91
+ gen_gvec_fn2i(s, is_q, rd, rn, shift,
92
+ is_u ? gen_gvec_usra : gen_gvec_ssra, size);
93
return;
94
case 0x08: /* SRI */
95
/* Shift count same as element size is valid but does nothing. */
96
diff --git a/target/arm/translate.c b/target/arm/translate.c
97
index XXXXXXX..XXXXXXX 100644
98
--- a/target/arm/translate.c
99
+++ b/target/arm/translate.c
100
@@ -XXX,XX +XXX,XX @@ static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
101
tcg_gen_add_vec(vece, d, d, a);
102
}
53
}
103
54
104
-static const TCGOpcode vecop_list_ssra[] = {
55
+/* This function corresponds to CheckSMEEnabled. */
105
- INDEX_op_sari_vec, INDEX_op_add_vec, 0
56
+bool sme_enabled_check(DisasContext *s)
106
-};
107
+void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
108
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
109
+{
57
+{
110
+ static const TCGOpcode vecop_list[] = {
111
+ INDEX_op_sari_vec, INDEX_op_add_vec, 0
112
+ };
113
+ static const GVecGen2i ops[4] = {
114
+ { .fni8 = gen_ssra8_i64,
115
+ .fniv = gen_ssra_vec,
116
+ .fno = gen_helper_gvec_ssra_b,
117
+ .load_dest = true,
118
+ .opt_opc = vecop_list,
119
+ .vece = MO_8 },
120
+ { .fni8 = gen_ssra16_i64,
121
+ .fniv = gen_ssra_vec,
122
+ .fno = gen_helper_gvec_ssra_h,
123
+ .load_dest = true,
124
+ .opt_opc = vecop_list,
125
+ .vece = MO_16 },
126
+ { .fni4 = gen_ssra32_i32,
127
+ .fniv = gen_ssra_vec,
128
+ .fno = gen_helper_gvec_ssra_s,
129
+ .load_dest = true,
130
+ .opt_opc = vecop_list,
131
+ .vece = MO_32 },
132
+ { .fni8 = gen_ssra64_i64,
133
+ .fniv = gen_ssra_vec,
134
+ .fno = gen_helper_gvec_ssra_b,
135
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
136
+ .opt_opc = vecop_list,
137
+ .load_dest = true,
138
+ .vece = MO_64 },
139
+ };
140
141
-const GVecGen2i ssra_op[4] = {
142
- { .fni8 = gen_ssra8_i64,
143
- .fniv = gen_ssra_vec,
144
- .load_dest = true,
145
- .opt_opc = vecop_list_ssra,
146
- .vece = MO_8 },
147
- { .fni8 = gen_ssra16_i64,
148
- .fniv = gen_ssra_vec,
149
- .load_dest = true,
150
- .opt_opc = vecop_list_ssra,
151
- .vece = MO_16 },
152
- { .fni4 = gen_ssra32_i32,
153
- .fniv = gen_ssra_vec,
154
- .load_dest = true,
155
- .opt_opc = vecop_list_ssra,
156
- .vece = MO_32 },
157
- { .fni8 = gen_ssra64_i64,
158
- .fniv = gen_ssra_vec,
159
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
160
- .opt_opc = vecop_list_ssra,
161
- .load_dest = true,
162
- .vece = MO_64 },
163
-};
164
+ /* tszimm encoding produces immediates in the range [1..esize]. */
165
+ tcg_debug_assert(shift > 0);
166
+ tcg_debug_assert(shift <= (8 << vece));
167
+
168
+ /*
58
+ /*
169
+ * Shifts larger than the element size are architecturally valid.
59
+ * Note that unlike sve_excp_el, we have not constrained sme_excp_el
170
+ * Signed results in all sign bits.
60
+ * to be zero when fp_excp_el has priority. This is because we need
61
+ * sme_excp_el by itself for cpregs access checks.
171
+ */
62
+ */
172
+ shift = MIN(shift, (8 << vece) - 1);
63
+ if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
173
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
64
+ s->fp_access_checked = true;
174
+}
65
+ return sme_access_check(s);
175
176
static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
177
{
178
@@ -XXX,XX +XXX,XX @@ static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
179
tcg_gen_add_vec(vece, d, d, a);
180
}
181
182
-static const TCGOpcode vecop_list_usra[] = {
183
- INDEX_op_shri_vec, INDEX_op_add_vec, 0
184
-};
185
+void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
186
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
187
+{
188
+ static const TCGOpcode vecop_list[] = {
189
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
190
+ };
191
+ static const GVecGen2i ops[4] = {
192
+ { .fni8 = gen_usra8_i64,
193
+ .fniv = gen_usra_vec,
194
+ .fno = gen_helper_gvec_usra_b,
195
+ .load_dest = true,
196
+ .opt_opc = vecop_list,
197
+ .vece = MO_8, },
198
+ { .fni8 = gen_usra16_i64,
199
+ .fniv = gen_usra_vec,
200
+ .fno = gen_helper_gvec_usra_h,
201
+ .load_dest = true,
202
+ .opt_opc = vecop_list,
203
+ .vece = MO_16, },
204
+ { .fni4 = gen_usra32_i32,
205
+ .fniv = gen_usra_vec,
206
+ .fno = gen_helper_gvec_usra_s,
207
+ .load_dest = true,
208
+ .opt_opc = vecop_list,
209
+ .vece = MO_32, },
210
+ { .fni8 = gen_usra64_i64,
211
+ .fniv = gen_usra_vec,
212
+ .fno = gen_helper_gvec_usra_d,
213
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
214
+ .load_dest = true,
215
+ .opt_opc = vecop_list,
216
+ .vece = MO_64, },
217
+ };
218
219
-const GVecGen2i usra_op[4] = {
220
- { .fni8 = gen_usra8_i64,
221
- .fniv = gen_usra_vec,
222
- .load_dest = true,
223
- .opt_opc = vecop_list_usra,
224
- .vece = MO_8, },
225
- { .fni8 = gen_usra16_i64,
226
- .fniv = gen_usra_vec,
227
- .load_dest = true,
228
- .opt_opc = vecop_list_usra,
229
- .vece = MO_16, },
230
- { .fni4 = gen_usra32_i32,
231
- .fniv = gen_usra_vec,
232
- .load_dest = true,
233
- .opt_opc = vecop_list_usra,
234
- .vece = MO_32, },
235
- { .fni8 = gen_usra64_i64,
236
- .fniv = gen_usra_vec,
237
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
238
- .load_dest = true,
239
- .opt_opc = vecop_list_usra,
240
- .vece = MO_64, },
241
-};
242
+ /* tszimm encoding produces immediates in the range [1..esize]. */
243
+ tcg_debug_assert(shift > 0);
244
+ tcg_debug_assert(shift <= (8 << vece));
245
+
246
+ /*
247
+ * Shifts larger than the element size are architecturally valid.
248
+ * Unsigned results in all zeros as input to accumulate: nop.
249
+ */
250
+ if (shift < (8 << vece)) {
251
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
252
+ } else {
253
+ /* Nop, but we do need to clear the tail. */
254
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
255
+ }
66
+ }
256
+}
67
+ return fp_access_check_only(s);
257
258
static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
259
{
260
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
261
case 1: /* VSRA */
262
/* Right shift comes here negative. */
263
shift = -shift;
264
- /* Shifts larger than the element size are architecturally
265
- * valid. Unsigned results in all zeros; signed results
266
- * in all sign bits.
267
- */
268
- if (!u) {
269
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
270
- MIN(shift, (8 << size) - 1),
271
- &ssra_op[size]);
272
- } else if (shift >= 8 << size) {
273
- /* rd += 0 */
274
+ if (u) {
275
+ gen_gvec_usra(size, rd_ofs, rm_ofs, shift,
276
+ vec_size, vec_size);
277
} else {
278
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
279
- shift, &usra_op[size]);
280
+ gen_gvec_ssra(size, rd_ofs, rm_ofs, shift,
281
+ vec_size, vec_size);
282
}
283
return 0;
284
285
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
286
index XXXXXXX..XXXXXXX 100644
287
--- a/target/arm/vec_helper.c
288
+++ b/target/arm/vec_helper.c
289
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sqsub_d)(void *vd, void *vq, void *vn,
290
clear_tail(d, oprsz, simd_maxsz(desc));
291
}
292
293
+
294
+#define DO_SRA(NAME, TYPE) \
295
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
296
+{ \
297
+ intptr_t i, oprsz = simd_oprsz(desc); \
298
+ int shift = simd_data(desc); \
299
+ TYPE *d = vd, *n = vn; \
300
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
301
+ d[i] += n[i] >> shift; \
302
+ } \
303
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
304
+}
68
+}
305
+
69
+
306
+DO_SRA(gvec_ssra_b, int8_t)
70
+/* Common subroutine for CheckSMEAnd*Enabled. */
307
+DO_SRA(gvec_ssra_h, int16_t)
71
+bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
308
+DO_SRA(gvec_ssra_s, int32_t)
72
+{
309
+DO_SRA(gvec_ssra_d, int64_t)
73
+ if (!sme_enabled_check(s)) {
310
+
74
+ return false;
311
+DO_SRA(gvec_usra_b, uint8_t)
75
+ }
312
+DO_SRA(gvec_usra_h, uint16_t)
76
+ if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
313
+DO_SRA(gvec_usra_s, uint32_t)
77
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
314
+DO_SRA(gvec_usra_d, uint64_t)
78
+ syn_smetrap(SME_ET_NotStreaming, false));
315
+
79
+ return false;
316
+#undef DO_SRA
80
+ }
81
+ if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
82
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
83
+ syn_smetrap(SME_ET_InactiveZA, false));
84
+ return false;
85
+ }
86
+ return true;
87
+}
317
+
88
+
318
/*
89
/*
319
* Convert float16 to float32, raising no exceptions and
90
* This utility function is for doing register extension with an
320
* preserving exceptional values, including SNaN.
91
* optional shift. You will likely want to pass a temporary for the
321
--
92
--
322
2.20.1
93
2.25.1
323
324
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Now that we've converted all cases to gvec, there is quite a bit
3
The pseudocode for CheckSVEEnabled gains a check for Streaming
4
of dead code at the end of the function. Remove it.
4
SVE mode, and for SME present but SVE absent.
5
6
Sink the call to gen_gvec_fn2i to the end, loading a function
7
pointer within the switch statement.
8
5
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20200513163245.17915-6-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-17-richard.henderson@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
10
---
14
target/arm/translate-a64.c | 56 ++++++++++----------------------------
11
target/arm/translate-a64.c | 22 ++++++++++++++++------
15
1 file changed, 14 insertions(+), 42 deletions(-)
12
1 file changed, 16 insertions(+), 6 deletions(-)
16
13
17
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
14
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
18
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/translate-a64.c
16
--- a/target/arm/translate-a64.c
20
+++ b/target/arm/translate-a64.c
17
+++ b/target/arm/translate-a64.c
21
@@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
18
@@ -XXX,XX +XXX,XX @@ static bool fp_access_check(DisasContext *s)
22
int size = 32 - clz32(immh) - 1;
19
return true;
23
int immhb = immh << 3 | immb;
20
}
24
int shift = 2 * (8 << size) - immhb;
21
25
- bool accumulate = false;
22
-/* Check that SVE access is enabled. If it is, return true.
26
- int dsize = is_q ? 128 : 64;
23
+/*
27
- int esize = 8 << size;
24
+ * Check that SVE access is enabled. If it is, return true.
28
- int elements = dsize/esize;
25
* If not, emit code to generate an appropriate exception and return false.
29
- MemOp memop = size | (is_u ? 0 : MO_SIGN);
26
+ * This function corresponds to CheckSVEEnabled().
30
- TCGv_i64 tcg_rn = new_tmp_a64(s);
27
*/
31
- TCGv_i64 tcg_rd = new_tmp_a64(s);
28
bool sve_access_check(DisasContext *s)
32
- TCGv_i64 tcg_round;
29
{
33
- uint64_t round_const;
30
- if (s->sve_excp_el) {
34
- int i;
31
- assert(!s->sve_access_checked);
35
+ GVecGen2iFn *gvec_fn;
32
- s->sve_access_checked = true;
36
33
-
37
if (extract32(immh, 3, 1) && !is_q) {
34
+ if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
38
unallocated_encoding(s);
35
+ assert(dc_isar_feature(aa64_sme, s));
39
@@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
36
+ if (!sme_sm_enabled_check(s)) {
40
37
+ goto fail_exit;
41
switch (opcode) {
38
+ }
42
case 0x02: /* SSRA / USRA (accumulate) */
39
+ } else if (s->sve_excp_el) {
43
- gen_gvec_fn2i(s, is_q, rd, rn, shift,
40
gen_exception_insn_el(s, s->pc_curr, EXCP_UDEF,
44
- is_u ? gen_gvec_usra : gen_gvec_ssra, size);
41
syn_sve_access_trap(), s->sve_excp_el);
45
- return;
42
- return false;
46
+ gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
43
+ goto fail_exit;
47
+ break;
48
49
case 0x08: /* SRI */
50
- gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sri, size);
51
- return;
52
+ gvec_fn = gen_gvec_sri;
53
+ break;
54
55
case 0x00: /* SSHR / USHR */
56
if (is_u) {
57
@@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
58
/* Shift count the same size as element size produces zero. */
59
tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
60
is_q ? 16 : 8, vec_full_reg_size(s), 0);
61
- } else {
62
- gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shri, size);
63
+ return;
64
}
65
+ gvec_fn = tcg_gen_gvec_shri;
66
} else {
67
/* Shift count the same size as element size produces all sign. */
68
if (shift == 8 << size) {
69
shift -= 1;
70
}
71
- gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_sari, size);
72
+ gvec_fn = tcg_gen_gvec_sari;
73
}
74
- return;
75
+ break;
76
77
case 0x04: /* SRSHR / URSHR (rounding) */
78
- gen_gvec_fn2i(s, is_q, rd, rn, shift,
79
- is_u ? gen_gvec_urshr : gen_gvec_srshr, size);
80
- return;
81
+ gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
82
+ break;
83
84
case 0x06: /* SRSRA / URSRA (accum + rounding) */
85
- gen_gvec_fn2i(s, is_q, rd, rn, shift,
86
- is_u ? gen_gvec_ursra : gen_gvec_srsra, size);
87
- return;
88
+ gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
89
+ break;
90
91
default:
92
g_assert_not_reached();
93
}
44
}
94
45
s->sve_access_checked = true;
95
- round_const = 1ULL << (shift - 1);
46
return fp_access_check(s);
96
- tcg_round = tcg_const_i64(round_const);
47
+
97
-
48
+ fail_exit:
98
- for (i = 0; i < elements; i++) {
49
+ /* Assert that we only raise one exception per instruction. */
99
- read_vec_element(s, tcg_rn, rn, i, memop);
50
+ assert(!s->sve_access_checked);
100
- if (accumulate) {
51
+ s->sve_access_checked = true;
101
- read_vec_element(s, tcg_rd, rd, i, memop);
52
+ return false;
102
- }
103
-
104
- handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
105
- accumulate, is_u, size, shift);
106
-
107
- write_vec_element(s, tcg_rd, rd, i, size);
108
- }
109
- tcg_temp_free_i64(tcg_round);
110
-
111
- clear_vec_high(s, is_q, rd);
112
+ gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
113
}
53
}
114
54
115
/* SHL/SLI - Vector shift left */
55
/*
116
--
56
--
117
2.20.1
57
2.25.1
118
119
diff view generated by jsdifflib
1
Convert the Neon SHA instructions in the 3-reg-same group
1
From: Richard Henderson <richard.henderson@linaro.org>
2
to decodetree.
3
2
3
These SME instructions are nominally within the SVE decode space,
4
so we add them to sve.decode and translate-sve.c.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-18-richard.henderson@linaro.org
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20200512163904.10918-3-peter.maydell@linaro.org
7
---
10
---
8
target/arm/neon-dp.decode | 10 +++
11
target/arm/translate-a64.h | 12 ++++++++++++
9
target/arm/translate-neon.inc.c | 139 ++++++++++++++++++++++++++++++++
12
target/arm/sve.decode | 5 ++++-
10
target/arm/translate.c | 46 +----------
13
target/arm/translate-sve.c | 38 ++++++++++++++++++++++++++++++++++++++
11
3 files changed, 151 insertions(+), 44 deletions(-)
14
3 files changed, 54 insertions(+), 1 deletion(-)
12
15
13
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
16
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
14
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/neon-dp.decode
18
--- a/target/arm/translate-a64.h
16
+++ b/target/arm/neon-dp.decode
19
+++ b/target/arm/translate-a64.h
17
@@ -XXX,XX +XXX,XX @@ VMUL_3s 1111 001 0 0 . .. .... .... 1001 . . . 1 .... @3same
20
@@ -XXX,XX +XXX,XX @@ static inline int vec_full_reg_size(DisasContext *s)
18
VMUL_p_3s 1111 001 1 0 . .. .... .... 1001 . . . 1 .... @3same
21
return s->vl;
19
22
}
20
VQRDMLAH_3s 1111 001 1 0 . .. .... .... 1011 ... 1 .... @3same
23
24
+/* Return the byte size of the vector register, SVL / 8. */
25
+static inline int streaming_vec_reg_size(DisasContext *s)
26
+{
27
+ return s->svl;
28
+}
21
+
29
+
22
+SHA1_3s 1111 001 0 0 . optype:2 .... .... 1100 . 1 . 0 .... \
30
/*
23
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
31
* Return the offset info CPUARMState of the predicate vector register Pn.
24
+SHA256H_3s 1111 001 1 0 . 00 .... .... 1100 . 1 . 0 .... \
32
* Note for this purpose, FFR is P16.
25
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
33
@@ -XXX,XX +XXX,XX @@ static inline int pred_full_reg_size(DisasContext *s)
26
+SHA256H2_3s 1111 001 1 0 . 01 .... .... 1100 . 1 . 0 .... \
34
return s->vl >> 3;
27
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
35
}
28
+SHA256SU1_3s 1111 001 1 0 . 10 .... .... 1100 . 1 . 0 .... \
36
29
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
37
+/* Return the byte size of the predicate register, SVL / 64. */
38
+static inline int streaming_pred_reg_size(DisasContext *s)
39
+{
40
+ return s->svl >> 3;
41
+}
30
+
42
+
31
VQRDMLSH_3s 1111 001 1 0 . .. .... .... 1100 ... 1 .... @3same
43
/*
32
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
44
* Round up the size of a register to a size allowed by
45
* the tcg vector infrastructure. Any operation which uses this
46
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
33
index XXXXXXX..XXXXXXX 100644
47
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/translate-neon.inc.c
48
--- a/target/arm/sve.decode
35
+++ b/target/arm/translate-neon.inc.c
49
+++ b/target/arm/sve.decode
36
@@ -XXX,XX +XXX,XX @@ static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
50
@@ -XXX,XX +XXX,XX @@ INDEX_ri 00000100 esz:2 1 imm:s5 010001 rn:5 rd:5
37
51
# SVE index generation (register start, register increment)
38
DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
52
INDEX_rr 00000100 .. 1 ..... 010011 ..... ..... @rd_rn_rm
39
DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
53
40
+
54
-### SVE Stack Allocation Group
41
+static bool trans_SHA1_3s(DisasContext *s, arg_SHA1_3s *a)
55
+### SVE / Streaming SVE Stack Allocation Group
56
57
# SVE stack frame adjustment
58
ADDVL 00000100 001 ..... 01010 ...... ..... @rd_rn_i6
59
+ADDSVL 00000100 001 ..... 01011 ...... ..... @rd_rn_i6
60
ADDPL 00000100 011 ..... 01010 ...... ..... @rd_rn_i6
61
+ADDSPL 00000100 011 ..... 01011 ...... ..... @rd_rn_i6
62
63
# SVE stack frame size
64
RDVL 00000100 101 11111 01010 imm:s6 rd:5
65
+RDSVL 00000100 101 11111 01011 imm:s6 rd:5
66
67
### SVE Bitwise Shift - Unpredicated Group
68
69
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/target/arm/translate-sve.c
72
+++ b/target/arm/translate-sve.c
73
@@ -XXX,XX +XXX,XX @@ static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
74
return true;
75
}
76
77
+static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a)
42
+{
78
+{
43
+ TCGv_ptr ptr1, ptr2, ptr3;
79
+ if (!dc_isar_feature(aa64_sme, s)) {
44
+ TCGv_i32 tmp;
45
+
46
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
47
+ !dc_isar_feature(aa32_sha1, s)) {
48
+ return false;
80
+ return false;
49
+ }
81
+ }
50
+
82
+ if (sme_enabled_check(s)) {
51
+ /* UNDEF accesses to D16-D31 if they don't exist. */
83
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
52
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
84
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
53
+ ((a->vd | a->vn | a->vm) & 0x10)) {
85
+ tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s));
54
+ return false;
55
+ }
86
+ }
56
+
57
+ if ((a->vn | a->vm | a->vd) & 1) {
58
+ return false;
59
+ }
60
+
61
+ if (!vfp_access_check(s)) {
62
+ return true;
63
+ }
64
+
65
+ ptr1 = vfp_reg_ptr(true, a->vd);
66
+ ptr2 = vfp_reg_ptr(true, a->vn);
67
+ ptr3 = vfp_reg_ptr(true, a->vm);
68
+ tmp = tcg_const_i32(a->optype);
69
+ gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp);
70
+ tcg_temp_free_i32(tmp);
71
+ tcg_temp_free_ptr(ptr1);
72
+ tcg_temp_free_ptr(ptr2);
73
+ tcg_temp_free_ptr(ptr3);
74
+
75
+ return true;
87
+ return true;
76
+}
88
+}
77
+
89
+
78
+static bool trans_SHA256H_3s(DisasContext *s, arg_SHA256H_3s *a)
90
static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
91
{
92
if (!dc_isar_feature(aa64_sve, s)) {
93
@@ -XXX,XX +XXX,XX @@ static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
94
return true;
95
}
96
97
+static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a)
79
+{
98
+{
80
+ TCGv_ptr ptr1, ptr2, ptr3;
99
+ if (!dc_isar_feature(aa64_sme, s)) {
81
+
82
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
83
+ !dc_isar_feature(aa32_sha2, s)) {
84
+ return false;
100
+ return false;
85
+ }
101
+ }
86
+
102
+ if (sme_enabled_check(s)) {
87
+ /* UNDEF accesses to D16-D31 if they don't exist. */
103
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
88
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
104
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
89
+ ((a->vd | a->vn | a->vm) & 0x10)) {
105
+ tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s));
90
+ return false;
91
+ }
106
+ }
92
+
93
+ if ((a->vn | a->vm | a->vd) & 1) {
94
+ return false;
95
+ }
96
+
97
+ if (!vfp_access_check(s)) {
98
+ return true;
99
+ }
100
+
101
+ ptr1 = vfp_reg_ptr(true, a->vd);
102
+ ptr2 = vfp_reg_ptr(true, a->vn);
103
+ ptr3 = vfp_reg_ptr(true, a->vm);
104
+ gen_helper_crypto_sha256h(ptr1, ptr2, ptr3);
105
+ tcg_temp_free_ptr(ptr1);
106
+ tcg_temp_free_ptr(ptr2);
107
+ tcg_temp_free_ptr(ptr3);
108
+
109
+ return true;
107
+ return true;
110
+}
108
+}
111
+
109
+
112
+static bool trans_SHA256H2_3s(DisasContext *s, arg_SHA256H2_3s *a)
110
static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
111
{
112
if (!dc_isar_feature(aa64_sve, s)) {
113
@@ -XXX,XX +XXX,XX @@ static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
114
return true;
115
}
116
117
+static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a)
113
+{
118
+{
114
+ TCGv_ptr ptr1, ptr2, ptr3;
119
+ if (!dc_isar_feature(aa64_sme, s)) {
115
+
116
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
117
+ !dc_isar_feature(aa32_sha2, s)) {
118
+ return false;
120
+ return false;
119
+ }
121
+ }
120
+
122
+ if (sme_enabled_check(s)) {
121
+ /* UNDEF accesses to D16-D31 if they don't exist. */
123
+ TCGv_i64 reg = cpu_reg(s, a->rd);
122
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
124
+ tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s));
123
+ ((a->vd | a->vn | a->vm) & 0x10)) {
124
+ return false;
125
+ }
125
+ }
126
+
127
+ if ((a->vn | a->vm | a->vd) & 1) {
128
+ return false;
129
+ }
130
+
131
+ if (!vfp_access_check(s)) {
132
+ return true;
133
+ }
134
+
135
+ ptr1 = vfp_reg_ptr(true, a->vd);
136
+ ptr2 = vfp_reg_ptr(true, a->vn);
137
+ ptr3 = vfp_reg_ptr(true, a->vm);
138
+ gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3);
139
+ tcg_temp_free_ptr(ptr1);
140
+ tcg_temp_free_ptr(ptr2);
141
+ tcg_temp_free_ptr(ptr3);
142
+
143
+ return true;
126
+ return true;
144
+}
127
+}
145
+
128
+
146
+static bool trans_SHA256SU1_3s(DisasContext *s, arg_SHA256SU1_3s *a)
129
/*
147
+{
130
*** SVE Compute Vector Address Group
148
+ TCGv_ptr ptr1, ptr2, ptr3;
131
*/
149
+
150
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
151
+ !dc_isar_feature(aa32_sha2, s)) {
152
+ return false;
153
+ }
154
+
155
+ /* UNDEF accesses to D16-D31 if they don't exist. */
156
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
157
+ ((a->vd | a->vn | a->vm) & 0x10)) {
158
+ return false;
159
+ }
160
+
161
+ if ((a->vn | a->vm | a->vd) & 1) {
162
+ return false;
163
+ }
164
+
165
+ if (!vfp_access_check(s)) {
166
+ return true;
167
+ }
168
+
169
+ ptr1 = vfp_reg_ptr(true, a->vd);
170
+ ptr2 = vfp_reg_ptr(true, a->vn);
171
+ ptr3 = vfp_reg_ptr(true, a->vm);
172
+ gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3);
173
+ tcg_temp_free_ptr(ptr1);
174
+ tcg_temp_free_ptr(ptr2);
175
+ tcg_temp_free_ptr(ptr3);
176
+
177
+ return true;
178
+}
179
diff --git a/target/arm/translate.c b/target/arm/translate.c
180
index XXXXXXX..XXXXXXX 100644
181
--- a/target/arm/translate.c
182
+++ b/target/arm/translate.c
183
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
184
int vec_size;
185
uint32_t imm;
186
TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
187
- TCGv_ptr ptr1, ptr2, ptr3;
188
+ TCGv_ptr ptr1, ptr2;
189
TCGv_i64 tmp64;
190
191
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
192
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
193
return 1;
194
}
195
switch (op) {
196
- case NEON_3R_SHA:
197
- /* The SHA-1/SHA-256 3-register instructions require special
198
- * treatment here, as their size field is overloaded as an
199
- * op type selector, and they all consume their input in a
200
- * single pass.
201
- */
202
- if (!q) {
203
- return 1;
204
- }
205
- if (!u) { /* SHA-1 */
206
- if (!dc_isar_feature(aa32_sha1, s)) {
207
- return 1;
208
- }
209
- ptr1 = vfp_reg_ptr(true, rd);
210
- ptr2 = vfp_reg_ptr(true, rn);
211
- ptr3 = vfp_reg_ptr(true, rm);
212
- tmp4 = tcg_const_i32(size);
213
- gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
214
- tcg_temp_free_i32(tmp4);
215
- } else { /* SHA-256 */
216
- if (!dc_isar_feature(aa32_sha2, s) || size == 3) {
217
- return 1;
218
- }
219
- ptr1 = vfp_reg_ptr(true, rd);
220
- ptr2 = vfp_reg_ptr(true, rn);
221
- ptr3 = vfp_reg_ptr(true, rm);
222
- switch (size) {
223
- case 0:
224
- gen_helper_crypto_sha256h(ptr1, ptr2, ptr3);
225
- break;
226
- case 1:
227
- gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3);
228
- break;
229
- case 2:
230
- gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3);
231
- break;
232
- }
233
- }
234
- tcg_temp_free_ptr(ptr1);
235
- tcg_temp_free_ptr(ptr2);
236
- tcg_temp_free_ptr(ptr3);
237
- return 0;
238
-
239
case NEON_3R_VPADD_VQRDMLAH:
240
if (!u) {
241
break; /* VPADD */
242
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
243
case NEON_3R_VMUL:
244
case NEON_3R_VML:
245
case NEON_3R_VSHL:
246
+ case NEON_3R_SHA:
247
/* Already handled by decodetree */
248
return 1;
249
}
250
--
132
--
251
2.20.1
133
2.25.1
252
253
diff view generated by jsdifflib
1
From: Dongjiu Geng <gengdongjiu@huawei.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Record the GHEB address via fw_cfg file, when recording
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
a error to CPER, it will use this address to find out
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Generic Error Data Entries and write the error.
5
Message-id: 20220708151540.18136-19-richard.henderson@linaro.org
6
7
In order to avoid migration failure, make hardware
8
error table address to a part of GED device instead
9
of global variable, then this address will be migrated
10
to target QEMU.
11
12
Acked-by: Xiang Zheng <zhengxiang9@huawei.com>
13
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
14
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
15
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
16
Message-id: 20200512030609.19593-7-gengdongjiu@huawei.com
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
---
7
---
19
include/hw/acpi/generic_event_device.h | 2 ++
8
target/arm/helper-sme.h | 2 ++
20
include/hw/acpi/ghes.h | 6 ++++++
9
target/arm/sme.decode | 4 ++++
21
hw/acpi/generic_event_device.c | 19 +++++++++++++++++++
10
target/arm/sme_helper.c | 25 +++++++++++++++++++++++++
22
hw/acpi/ghes.c | 14 ++++++++++++++
11
target/arm/translate-sme.c | 13 +++++++++++++
23
hw/arm/virt-acpi-build.c | 8 ++++++++
12
4 files changed, 44 insertions(+)
24
5 files changed, 49 insertions(+)
25
13
26
diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
27
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
28
--- a/include/hw/acpi/generic_event_device.h
16
--- a/target/arm/helper-sme.h
29
+++ b/include/hw/acpi/generic_event_device.h
17
+++ b/target/arm/helper-sme.h
30
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@
31
19
32
#include "hw/sysbus.h"
20
DEF_HELPER_FLAGS_2(set_pstate_sm, TCG_CALL_NO_RWG, void, env, i32)
33
#include "hw/acpi/memory_hotplug.h"
21
DEF_HELPER_FLAGS_2(set_pstate_za, TCG_CALL_NO_RWG, void, env, i32)
34
+#include "hw/acpi/ghes.h"
22
+
35
23
+DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32)
36
#define ACPI_POWER_BUTTON_DEVICE "PWRB"
24
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
37
38
@@ -XXX,XX +XXX,XX @@ typedef struct AcpiGedState {
39
GEDState ged_state;
40
uint32_t ged_event_bitmap;
41
qemu_irq irq;
42
+ AcpiGhesState ghes_state;
43
} AcpiGedState;
44
45
void build_ged_aml(Aml *table, const char* name, HotplugHandler *hotplug_dev,
46
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
47
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
48
--- a/include/hw/acpi/ghes.h
26
--- a/target/arm/sme.decode
49
+++ b/include/hw/acpi/ghes.h
27
+++ b/target/arm/sme.decode
50
@@ -XXX,XX +XXX,XX @@ enum {
28
@@ -XXX,XX +XXX,XX @@
51
ACPI_HEST_SRC_ID_RESERVED,
29
#
52
};
30
# This file is processed by scripts/decodetree.py
53
31
#
54
+typedef struct AcpiGhesState {
55
+ uint64_t ghes_addr_le;
56
+} AcpiGhesState;
57
+
32
+
58
void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker);
33
+### SME Misc
59
void acpi_build_hest(GArray *table_data, BIOSLinker *linker);
34
+
60
+void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
35
+ZERO 11000000 00 001 00000000000 imm:8
61
+ GArray *hardware_errors);
36
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
62
#endif
63
diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
64
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
65
--- a/hw/acpi/generic_event_device.c
38
--- a/target/arm/sme_helper.c
66
+++ b/hw/acpi/generic_event_device.c
39
+++ b/target/arm/sme_helper.c
67
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_ged_state = {
40
@@ -XXX,XX +XXX,XX @@ void helper_set_pstate_za(CPUARMState *env, uint32_t i)
41
memset(env->zarray, 0, sizeof(env->zarray));
68
}
42
}
69
};
70
71
+static bool ghes_needed(void *opaque)
72
+{
73
+ AcpiGedState *s = opaque;
74
+ return s->ghes_state.ghes_addr_le;
75
+}
76
+
77
+static const VMStateDescription vmstate_ghes_state = {
78
+ .name = "acpi-ged/ghes",
79
+ .version_id = 1,
80
+ .minimum_version_id = 1,
81
+ .needed = ghes_needed,
82
+ .fields = (VMStateField[]) {
83
+ VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,
84
+ vmstate_ghes_state, AcpiGhesState),
85
+ VMSTATE_END_OF_LIST()
86
+ }
87
+};
88
+
89
static const VMStateDescription vmstate_acpi_ged = {
90
.name = "acpi-ged",
91
.version_id = 1,
92
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_acpi_ged = {
93
},
94
.subsections = (const VMStateDescription * []) {
95
&vmstate_memhp_state,
96
+ &vmstate_ghes_state,
97
NULL
98
}
99
};
100
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
101
index XXXXXXX..XXXXXXX 100644
102
--- a/hw/acpi/ghes.c
103
+++ b/hw/acpi/ghes.c
104
@@ -XXX,XX +XXX,XX @@
105
#include "hw/acpi/ghes.h"
106
#include "hw/acpi/aml-build.h"
107
#include "qemu/error-report.h"
108
+#include "hw/acpi/generic_event_device.h"
109
+#include "hw/nvram/fw_cfg.h"
110
111
#define ACPI_GHES_ERRORS_FW_CFG_FILE "etc/hardware_errors"
112
#define ACPI_GHES_DATA_ADDR_FW_CFG_FILE "etc/hardware_errors_addr"
113
@@ -XXX,XX +XXX,XX @@ void acpi_build_hest(GArray *table_data, BIOSLinker *linker)
114
build_header(linker, table_data, (void *)(table_data->data + hest_start),
115
"HEST", table_data->len - hest_start, 1, NULL, NULL);
116
}
43
}
117
+
44
+
118
+void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
45
+void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
119
+ GArray *hardware_error)
120
+{
46
+{
121
+ /* Create a read-only fw_cfg file for GHES */
47
+ uint32_t i;
122
+ fw_cfg_add_file(s, ACPI_GHES_ERRORS_FW_CFG_FILE, hardware_error->data,
123
+ hardware_error->len);
124
+
48
+
125
+ /* Create a read-write fw_cfg file for Address */
49
+ /*
126
+ fw_cfg_add_file_callback(s, ACPI_GHES_DATA_ADDR_FW_CFG_FILE, NULL, NULL,
50
+ * Special case clearing the entire ZA space.
127
+ NULL, &(ags->ghes_addr_le), sizeof(ags->ghes_addr_le), false);
51
+ * This falls into the CONSTRAINED UNPREDICTABLE zeroing of any
128
+}
52
+ * parts of the ZA storage outside of SVL.
129
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
53
+ */
130
index XXXXXXX..XXXXXXX 100644
54
+ if (imm == 0xff) {
131
--- a/hw/arm/virt-acpi-build.c
55
+ memset(env->zarray, 0, sizeof(env->zarray));
132
+++ b/hw/arm/virt-acpi-build.c
56
+ return;
133
@@ -XXX,XX +XXX,XX @@ void virt_acpi_setup(VirtMachineState *vms)
134
{
135
AcpiBuildTables tables;
136
AcpiBuildState *build_state;
137
+ AcpiGedState *acpi_ged_state;
138
139
if (!vms->fw_cfg) {
140
trace_virt_acpi_setup();
141
@@ -XXX,XX +XXX,XX @@ void virt_acpi_setup(VirtMachineState *vms)
142
fw_cfg_add_file(vms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, tables.tcpalog->data,
143
acpi_data_len(tables.tcpalog));
144
145
+ if (vms->ras) {
146
+ assert(vms->acpi_dev);
147
+ acpi_ged_state = ACPI_GED(vms->acpi_dev);
148
+ acpi_ghes_add_fw_cfg(&acpi_ged_state->ghes_state,
149
+ vms->fw_cfg, tables.hardware_errors);
150
+ }
57
+ }
151
+
58
+
152
build_state->rsdp_mr = acpi_add_rom_blob(virt_acpi_build_update,
59
+ /*
153
build_state, tables.rsdp,
60
+ * Recall that ZAnH.D[m] is spread across ZA[n+8*m],
154
ACPI_BUILD_RSDP_FILE, 0);
61
+ * so each row is discontiguous within ZA[].
62
+ */
63
+ for (i = 0; i < svl; i++) {
64
+ if (imm & (1 << (i % 8))) {
65
+ memset(&env->zarray[i], 0, svl);
66
+ }
67
+ }
68
+}
69
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/target/arm/translate-sme.c
72
+++ b/target/arm/translate-sme.c
73
@@ -XXX,XX +XXX,XX @@
74
*/
75
76
#include "decode-sme.c.inc"
77
+
78
+
79
+static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
80
+{
81
+ if (!dc_isar_feature(aa64_sme, s)) {
82
+ return false;
83
+ }
84
+ if (sme_za_enabled_check(s)) {
85
+ gen_helper_sme_zero(cpu_env, tcg_constant_i32(a->imm),
86
+ tcg_constant_i32(streaming_vec_reg_size(s)));
87
+ }
88
+ return true;
89
+}
155
--
90
--
156
2.20.1
91
2.25.1
157
158
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Provide a functional interface for the vector expansion.
3
We can reuse the SVE functions for implementing moves to/from
4
This fits better with the existing set of helpers that
4
horizontal tile slices, but we need new ones for moves to/from
5
we provide for other operations.
5
vertical tile slices.
6
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20200513163245.17915-10-richard.henderson@linaro.org
9
Message-id: 20220708151540.18136-20-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
11
---
12
target/arm/translate.h | 10 ++-
12
target/arm/helper-sme.h | 12 +++
13
target/arm/translate-a64.c | 18 ++--
13
target/arm/helper-sve.h | 2 +
14
target/arm/translate-neon.inc.c | 23 +----
14
target/arm/translate-a64.h | 8 ++
15
target/arm/translate.c | 146 +++++++++++++++++---------------
15
target/arm/translate.h | 5 ++
16
4 files changed, 95 insertions(+), 102 deletions(-)
16
target/arm/sme.decode | 15 ++++
17
target/arm/sme_helper.c | 151 ++++++++++++++++++++++++++++++++++++-
18
target/arm/sve_helper.c | 12 +++
19
target/arm/translate-sme.c | 127 +++++++++++++++++++++++++++++++
20
8 files changed, 331 insertions(+), 1 deletion(-)
17
21
22
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/target/arm/helper-sme.h
25
+++ b/target/arm/helper-sme.h
26
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(set_pstate_sm, TCG_CALL_NO_RWG, void, env, i32)
27
DEF_HELPER_FLAGS_2(set_pstate_za, TCG_CALL_NO_RWG, void, env, i32)
28
29
DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32)
30
+
31
+/* Move to/from vertical array slices, i.e. columns, so 'c'. */
32
+DEF_HELPER_FLAGS_4(sme_mova_cz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
33
+DEF_HELPER_FLAGS_4(sme_mova_zc_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_4(sme_mova_cz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
35
+DEF_HELPER_FLAGS_4(sme_mova_zc_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_4(sme_mova_cz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
37
+DEF_HELPER_FLAGS_4(sme_mova_zc_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
39
+DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
41
+DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
42
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/helper-sve.h
45
+++ b/target/arm/helper-sve.h
46
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sve_sel_zpzz_s, TCG_CALL_NO_RWG,
47
void, ptr, ptr, ptr, ptr, i32)
48
DEF_HELPER_FLAGS_5(sve_sel_zpzz_d, TCG_CALL_NO_RWG,
49
void, ptr, ptr, ptr, ptr, i32)
50
+DEF_HELPER_FLAGS_5(sve_sel_zpzz_q, TCG_CALL_NO_RWG,
51
+ void, ptr, ptr, ptr, ptr, i32)
52
53
DEF_HELPER_FLAGS_5(sve2_addp_zpzz_b, TCG_CALL_NO_RWG,
54
void, ptr, ptr, ptr, ptr, i32)
55
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/arm/translate-a64.h
58
+++ b/target/arm/translate-a64.h
59
@@ -XXX,XX +XXX,XX @@ static inline int pred_gvec_reg_size(DisasContext *s)
60
return size_for_gvec(pred_full_reg_size(s));
61
}
62
63
+/* Return a newly allocated pointer to the predicate register. */
64
+static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno)
65
+{
66
+ TCGv_ptr ret = tcg_temp_new_ptr();
67
+ tcg_gen_addi_ptr(ret, cpu_env, pred_full_reg_offset(s, regno));
68
+ return ret;
69
+}
70
+
71
bool disas_sve(DisasContext *, uint32_t);
72
bool disas_sme(DisasContext *, uint32_t);
73
18
diff --git a/target/arm/translate.h b/target/arm/translate.h
74
diff --git a/target/arm/translate.h b/target/arm/translate.h
19
index XXXXXXX..XXXXXXX 100644
75
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/translate.h
76
--- a/target/arm/translate.h
21
+++ b/target/arm/translate.h
77
+++ b/target/arm/translate.h
22
@@ -XXX,XX +XXX,XX @@ void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
78
@@ -XXX,XX +XXX,XX @@ static inline int plus_2(DisasContext *s, int x)
23
void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
79
return x + 2;
24
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
25
26
-extern const GVecGen3 cmtst_op[4];
27
-extern const GVecGen3 sshl_op[4];
28
-extern const GVecGen3 ushl_op[4];
29
+void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
30
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
31
+void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
32
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
33
+void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
34
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
35
+
36
extern const GVecGen4 uqadd_op[4];
37
extern const GVecGen4 sqadd_op[4];
38
extern const GVecGen4 uqsub_op[4];
39
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/target/arm/translate-a64.c
42
+++ b/target/arm/translate-a64.c
43
@@ -XXX,XX +XXX,XX @@ static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
44
is_q ? 16 : 8, vec_full_reg_size(s));
45
}
80
}
46
81
47
-/* Expand a 3-operand AdvSIMD vector operation using an op descriptor. */
82
+static inline int plus_12(DisasContext *s, int x)
48
-static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
83
+{
49
- int rn, int rm, const GVecGen3 *gvec_op)
84
+ return x + 12;
50
-{
85
+}
51
- tcg_gen_gvec_3(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
86
+
52
- vec_full_reg_offset(s, rm), is_q ? 16 : 8,
87
static inline int times_2(DisasContext *s, int x)
53
- vec_full_reg_size(s), gvec_op);
88
{
54
-}
89
return x * 2;
55
-
90
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
56
/* Expand a 3-operand operation using an out-of-line helper. */
91
index XXXXXXX..XXXXXXX 100644
57
static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
92
--- a/target/arm/sme.decode
58
int rn, int rm, int data, gen_helper_gvec_3 *fn)
93
+++ b/target/arm/sme.decode
59
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
94
@@ -XXX,XX +XXX,XX @@
60
(u ? uqsub_op : sqsub_op) + size);
95
### SME Misc
61
return;
96
62
case 0x08: /* SSHL, USHL */
97
ZERO 11000000 00 001 00000000000 imm:8
63
- gen_gvec_op3(s, is_q, rd, rn, rm,
98
+
64
- u ? &ushl_op[size] : &sshl_op[size]);
99
+### SME Move into/from Array
65
+ if (u) {
100
+
66
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size);
101
+%mova_rs 13:2 !function=plus_12
102
+&mova esz rs pg zr za_imm v:bool to_vec:bool
103
+
104
+MOVA 11000000 esz:2 00000 0 v:1 .. pg:3 zr:5 0 za_imm:4 \
105
+ &mova to_vec=0 rs=%mova_rs
106
+MOVA 11000000 11 00000 1 v:1 .. pg:3 zr:5 0 za_imm:4 \
107
+ &mova to_vec=0 rs=%mova_rs esz=4
108
+
109
+MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \
110
+ &mova to_vec=1 rs=%mova_rs
111
+MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \
112
+ &mova to_vec=1 rs=%mova_rs esz=4
113
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
114
index XXXXXXX..XXXXXXX 100644
115
--- a/target/arm/sme_helper.c
116
+++ b/target/arm/sme_helper.c
117
@@ -XXX,XX +XXX,XX @@
118
119
#include "qemu/osdep.h"
120
#include "cpu.h"
121
-#include "internals.h"
122
+#include "tcg/tcg-gvec-desc.h"
123
#include "exec/helper-proto.h"
124
+#include "qemu/int128.h"
125
+#include "vec_internal.h"
126
127
/* ResetSVEState */
128
void arm_reset_sve_state(CPUARMState *env)
129
@@ -XXX,XX +XXX,XX @@ void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
130
}
131
}
132
}
133
+
134
+
135
+/*
136
+ * When considering the ZA storage as an array of elements of
137
+ * type T, the index within that array of the Nth element of
138
+ * a vertical slice of a tile can be calculated like this,
139
+ * regardless of the size of type T. This is because the tiles
140
+ * are interleaved, so if type T is size N bytes then row 1 of
141
+ * the tile is N rows away from row 0. The division by N to
142
+ * convert a byte offset into an array index and the multiplication
143
+ * by N to convert from vslice-index-within-the-tile to
144
+ * the index within the ZA storage cancel out.
145
+ */
146
+#define tile_vslice_index(i) ((i) * sizeof(ARMVectorReg))
147
+
148
+/*
149
+ * When doing byte arithmetic on the ZA storage, the element
150
+ * byteoff bytes away in a tile vertical slice is always this
151
+ * many bytes away in the ZA storage, regardless of the
152
+ * size of the tile element, assuming that byteoff is a multiple
153
+ * of the element size. Again this is because of the interleaving
154
+ * of the tiles. For instance if we have 1 byte per element then
155
+ * each row of the ZA storage has one byte of the vslice data,
156
+ * and (counting from 0) byte 8 goes in row 8 of the storage
157
+ * at offset (8 * row-size-in-bytes).
158
+ * If we have 8 bytes per element then each row of the ZA storage
159
+ * has 8 bytes of the data, but there are 8 interleaved tiles and
160
+ * so byte 8 of the data goes into row 1 of the tile,
161
+ * which is again row 8 of the storage, so the offset is still
162
+ * (8 * row-size-in-bytes). Similarly for other element sizes.
163
+ */
164
+#define tile_vslice_offset(byteoff) ((byteoff) * sizeof(ARMVectorReg))
165
+
166
+
167
+/*
168
+ * Move Zreg vector to ZArray column.
169
+ */
170
+#define DO_MOVA_C(NAME, TYPE, H) \
171
+void HELPER(NAME)(void *za, void *vn, void *vg, uint32_t desc) \
172
+{ \
173
+ int i, oprsz = simd_oprsz(desc); \
174
+ for (i = 0; i < oprsz; ) { \
175
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
176
+ do { \
177
+ if (pg & 1) { \
178
+ *(TYPE *)(za + tile_vslice_offset(i)) = *(TYPE *)(vn + H(i)); \
179
+ } \
180
+ i += sizeof(TYPE); \
181
+ pg >>= sizeof(TYPE); \
182
+ } while (i & 15); \
183
+ } \
184
+}
185
+
186
+DO_MOVA_C(sme_mova_cz_b, uint8_t, H1)
187
+DO_MOVA_C(sme_mova_cz_h, uint16_t, H1_2)
188
+DO_MOVA_C(sme_mova_cz_s, uint32_t, H1_4)
189
+
190
+void HELPER(sme_mova_cz_d)(void *za, void *vn, void *vg, uint32_t desc)
191
+{
192
+ int i, oprsz = simd_oprsz(desc) / 8;
193
+ uint8_t *pg = vg;
194
+ uint64_t *n = vn;
195
+ uint64_t *a = za;
196
+
197
+ for (i = 0; i < oprsz; i++) {
198
+ if (pg[H1(i)] & 1) {
199
+ a[tile_vslice_index(i)] = n[i];
200
+ }
201
+ }
202
+}
203
+
204
+void HELPER(sme_mova_cz_q)(void *za, void *vn, void *vg, uint32_t desc)
205
+{
206
+ int i, oprsz = simd_oprsz(desc) / 16;
207
+ uint16_t *pg = vg;
208
+ Int128 *n = vn;
209
+ Int128 *a = za;
210
+
211
+ /*
212
+ * Int128 is used here simply to copy 16 bytes, and to simplify
213
+ * the address arithmetic.
214
+ */
215
+ for (i = 0; i < oprsz; i++) {
216
+ if (pg[H2(i)] & 1) {
217
+ a[tile_vslice_index(i)] = n[i];
218
+ }
219
+ }
220
+}
221
+
222
+#undef DO_MOVA_C
223
+
224
+/*
225
+ * Move ZArray column to Zreg vector.
226
+ */
227
+#define DO_MOVA_Z(NAME, TYPE, H) \
228
+void HELPER(NAME)(void *vd, void *za, void *vg, uint32_t desc) \
229
+{ \
230
+ int i, oprsz = simd_oprsz(desc); \
231
+ for (i = 0; i < oprsz; ) { \
232
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
233
+ do { \
234
+ if (pg & 1) { \
235
+ *(TYPE *)(vd + H(i)) = *(TYPE *)(za + tile_vslice_offset(i)); \
236
+ } \
237
+ i += sizeof(TYPE); \
238
+ pg >>= sizeof(TYPE); \
239
+ } while (i & 15); \
240
+ } \
241
+}
242
+
243
+DO_MOVA_Z(sme_mova_zc_b, uint8_t, H1)
244
+DO_MOVA_Z(sme_mova_zc_h, uint16_t, H1_2)
245
+DO_MOVA_Z(sme_mova_zc_s, uint32_t, H1_4)
246
+
247
+void HELPER(sme_mova_zc_d)(void *vd, void *za, void *vg, uint32_t desc)
248
+{
249
+ int i, oprsz = simd_oprsz(desc) / 8;
250
+ uint8_t *pg = vg;
251
+ uint64_t *d = vd;
252
+ uint64_t *a = za;
253
+
254
+ for (i = 0; i < oprsz; i++) {
255
+ if (pg[H1(i)] & 1) {
256
+ d[i] = a[tile_vslice_index(i)];
257
+ }
258
+ }
259
+}
260
+
261
+void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc)
262
+{
263
+ int i, oprsz = simd_oprsz(desc) / 16;
264
+ uint16_t *pg = vg;
265
+ Int128 *d = vd;
266
+ Int128 *a = za;
267
+
268
+ /*
269
+ * Int128 is used here simply to copy 16 bytes, and to simplify
270
+ * the address arithmetic.
271
+ */
272
+ for (i = 0; i < oprsz; i++, za += sizeof(ARMVectorReg)) {
273
+ if (pg[H2(i)] & 1) {
274
+ d[i] = a[tile_vslice_index(i)];
275
+ }
276
+ }
277
+}
278
+
279
+#undef DO_MOVA_Z
280
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
281
index XXXXXXX..XXXXXXX 100644
282
--- a/target/arm/sve_helper.c
283
+++ b/target/arm/sve_helper.c
284
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm,
285
}
286
}
287
288
+void HELPER(sve_sel_zpzz_q)(void *vd, void *vn, void *vm,
289
+ void *vg, uint32_t desc)
290
+{
291
+ intptr_t i, opr_sz = simd_oprsz(desc) / 16;
292
+ Int128 *d = vd, *n = vn, *m = vm;
293
+ uint16_t *pg = vg;
294
+
295
+ for (i = 0; i < opr_sz; i += 1) {
296
+ d[i] = (pg[H2(i)] & 1 ? n : m)[i];
297
+ }
298
+}
299
+
300
/* Two operand comparison controlled by a predicate.
301
* ??? It is very tempting to want to be able to expand this inline
302
* with x86 instructions, e.g.
303
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
304
index XXXXXXX..XXXXXXX 100644
305
--- a/target/arm/translate-sme.c
306
+++ b/target/arm/translate-sme.c
307
@@ -XXX,XX +XXX,XX @@
308
#include "decode-sme.c.inc"
309
310
311
+/*
312
+ * Resolve tile.size[index] to a host pointer, where tile and index
313
+ * are always decoded together, dependent on the element size.
314
+ */
315
+static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
316
+ int tile_index, bool vertical)
317
+{
318
+ int tile = tile_index >> (4 - esz);
319
+ int index = esz == MO_128 ? 0 : extract32(tile_index, 0, 4 - esz);
320
+ int pos, len, offset;
321
+ TCGv_i32 tmp;
322
+ TCGv_ptr addr;
323
+
324
+ /* Compute the final index, which is Rs+imm. */
325
+ tmp = tcg_temp_new_i32();
326
+ tcg_gen_trunc_tl_i32(tmp, cpu_reg(s, rs));
327
+ tcg_gen_addi_i32(tmp, tmp, index);
328
+
329
+ /* Prepare a power-of-two modulo via extraction of @len bits. */
330
+ len = ctz32(streaming_vec_reg_size(s)) - esz;
331
+
332
+ if (vertical) {
333
+ /*
334
+ * Compute the byte offset of the index within the tile:
335
+ * (index % (svl / size)) * size
336
+ * = (index % (svl >> esz)) << esz
337
+ * Perform the power-of-two modulo via extraction of the low @len bits.
338
+ * Perform the multiply by shifting left by @pos bits.
339
+ * Perform these operations simultaneously via deposit into zero.
340
+ */
341
+ pos = esz;
342
+ tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
343
+
344
+ /*
345
+ * For big-endian, adjust the indexed column byte offset within
346
+ * the uint64_t host words that make up env->zarray[].
347
+ */
348
+ if (HOST_BIG_ENDIAN && esz < MO_64) {
349
+ tcg_gen_xori_i32(tmp, tmp, 8 - (1 << esz));
350
+ }
351
+ } else {
352
+ /*
353
+ * Compute the byte offset of the index within the tile:
354
+ * (index % (svl / size)) * (size * sizeof(row))
355
+ * = (index % (svl >> esz)) << (esz + log2(sizeof(row)))
356
+ */
357
+ pos = esz + ctz32(sizeof(ARMVectorReg));
358
+ tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
359
+
360
+ /* Row slices are always aligned and need no endian adjustment. */
361
+ }
362
+
363
+ /* The tile byte offset within env->zarray is the row. */
364
+ offset = tile * sizeof(ARMVectorReg);
365
+
366
+ /* Include the byte offset of zarray to make this relative to env. */
367
+ offset += offsetof(CPUARMState, zarray);
368
+ tcg_gen_addi_i32(tmp, tmp, offset);
369
+
370
+ /* Add the byte offset to env to produce the final pointer. */
371
+ addr = tcg_temp_new_ptr();
372
+ tcg_gen_ext_i32_ptr(addr, tmp);
373
+ tcg_temp_free_i32(tmp);
374
+ tcg_gen_add_ptr(addr, addr, cpu_env);
375
+
376
+ return addr;
377
+}
378
+
379
static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
380
{
381
if (!dc_isar_feature(aa64_sme, s)) {
382
@@ -XXX,XX +XXX,XX @@ static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
383
}
384
return true;
385
}
386
+
387
+static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
388
+{
389
+ static gen_helper_gvec_4 * const h_fns[5] = {
390
+ gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
391
+ gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d,
392
+ gen_helper_sve_sel_zpzz_q
393
+ };
394
+ static gen_helper_gvec_3 * const cz_fns[5] = {
395
+ gen_helper_sme_mova_cz_b, gen_helper_sme_mova_cz_h,
396
+ gen_helper_sme_mova_cz_s, gen_helper_sme_mova_cz_d,
397
+ gen_helper_sme_mova_cz_q,
398
+ };
399
+ static gen_helper_gvec_3 * const zc_fns[5] = {
400
+ gen_helper_sme_mova_zc_b, gen_helper_sme_mova_zc_h,
401
+ gen_helper_sme_mova_zc_s, gen_helper_sme_mova_zc_d,
402
+ gen_helper_sme_mova_zc_q,
403
+ };
404
+
405
+ TCGv_ptr t_za, t_zr, t_pg;
406
+ TCGv_i32 t_desc;
407
+ int svl;
408
+
409
+ if (!dc_isar_feature(aa64_sme, s)) {
410
+ return false;
411
+ }
412
+ if (!sme_smza_enabled_check(s)) {
413
+ return true;
414
+ }
415
+
416
+ t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
417
+ t_zr = vec_full_reg_ptr(s, a->zr);
418
+ t_pg = pred_full_reg_ptr(s, a->pg);
419
+
420
+ svl = streaming_vec_reg_size(s);
421
+ t_desc = tcg_constant_i32(simd_desc(svl, svl, 0));
422
+
423
+ if (a->v) {
424
+ /* Vertical slice -- use sme mova helpers. */
425
+ if (a->to_vec) {
426
+ zc_fns[a->esz](t_zr, t_za, t_pg, t_desc);
67
+ } else {
427
+ } else {
68
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size);
428
+ cz_fns[a->esz](t_za, t_zr, t_pg, t_desc);
69
+ }
429
+ }
70
return;
430
+ } else {
71
case 0x0c: /* SMAX, UMAX */
431
+ /* Horizontal slice -- reuse sve sel helpers. */
72
if (u) {
432
+ if (a->to_vec) {
73
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
433
+ h_fns[a->esz](t_zr, t_za, t_zr, t_pg, t_desc);
74
return;
434
+ } else {
75
case 0x11:
435
+ h_fns[a->esz](t_za, t_zr, t_za, t_pg, t_desc);
76
if (!u) { /* CMTST */
436
+ }
77
- gen_gvec_op3(s, is_q, rd, rn, rm, &cmtst_op[size]);
437
+ }
78
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
438
+
79
return;
439
+ tcg_temp_free_ptr(t_za);
80
}
440
+ tcg_temp_free_ptr(t_zr);
81
/* else CMEQ */
441
+ tcg_temp_free_ptr(t_pg);
82
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
442
+
83
index XXXXXXX..XXXXXXX 100644
443
+ return true;
84
--- a/target/arm/translate-neon.inc.c
444
+}
85
+++ b/target/arm/translate-neon.inc.c
86
@@ -XXX,XX +XXX,XX @@ DO_3SAME(VBIC, tcg_gen_gvec_andc)
87
DO_3SAME(VORR, tcg_gen_gvec_or)
88
DO_3SAME(VORN, tcg_gen_gvec_orc)
89
DO_3SAME(VEOR, tcg_gen_gvec_xor)
90
+DO_3SAME(VSHL_S, gen_gvec_sshl)
91
+DO_3SAME(VSHL_U, gen_gvec_ushl)
92
93
/* These insns are all gvec_bitsel but with the inputs in various orders. */
94
#define DO_3SAME_BITSEL(INSN, O1, O2, O3) \
95
@@ -XXX,XX +XXX,XX @@ DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
96
DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
97
DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
98
DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
99
+DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
100
101
#define DO_3SAME_CMP(INSN, COND) \
102
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
103
@@ -XXX,XX +XXX,XX @@ DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
104
DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
105
DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
106
107
-static void gen_VTST_3s(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
108
- uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)
109
-{
110
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &cmtst_op[vece]);
111
-}
112
-DO_3SAME_NO_SZ_3(VTST, gen_VTST_3s)
113
-
114
#define DO_3SAME_GVEC4(INSN, OPARRAY) \
115
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
116
uint32_t rn_ofs, uint32_t rm_ofs, \
117
@@ -XXX,XX +XXX,XX @@ static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
118
}
119
return do_3same(s, a, gen_VMUL_p_3s);
120
}
121
-
122
-#define DO_3SAME_GVEC3_SHIFT(INSN, OPARRAY) \
123
- static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
124
- uint32_t rn_ofs, uint32_t rm_ofs, \
125
- uint32_t oprsz, uint32_t maxsz) \
126
- { \
127
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, \
128
- oprsz, maxsz, &OPARRAY[vece]); \
129
- } \
130
- DO_3SAME(INSN, gen_##INSN##_3s)
131
-
132
-DO_3SAME_GVEC3_SHIFT(VSHL_S, sshl_op)
133
-DO_3SAME_GVEC3_SHIFT(VSHL_U, ushl_op)
134
diff --git a/target/arm/translate.c b/target/arm/translate.c
135
index XXXXXXX..XXXXXXX 100644
136
--- a/target/arm/translate.c
137
+++ b/target/arm/translate.c
138
@@ -XXX,XX +XXX,XX @@ static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
139
tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
140
}
141
142
-static const TCGOpcode vecop_list_cmtst[] = { INDEX_op_cmp_vec, 0 };
143
-
144
-const GVecGen3 cmtst_op[4] = {
145
- { .fni4 = gen_helper_neon_tst_u8,
146
- .fniv = gen_cmtst_vec,
147
- .opt_opc = vecop_list_cmtst,
148
- .vece = MO_8 },
149
- { .fni4 = gen_helper_neon_tst_u16,
150
- .fniv = gen_cmtst_vec,
151
- .opt_opc = vecop_list_cmtst,
152
- .vece = MO_16 },
153
- { .fni4 = gen_cmtst_i32,
154
- .fniv = gen_cmtst_vec,
155
- .opt_opc = vecop_list_cmtst,
156
- .vece = MO_32 },
157
- { .fni8 = gen_cmtst_i64,
158
- .fniv = gen_cmtst_vec,
159
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
160
- .opt_opc = vecop_list_cmtst,
161
- .vece = MO_64 },
162
-};
163
+void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
164
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
165
+{
166
+ static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
167
+ static const GVecGen3 ops[4] = {
168
+ { .fni4 = gen_helper_neon_tst_u8,
169
+ .fniv = gen_cmtst_vec,
170
+ .opt_opc = vecop_list,
171
+ .vece = MO_8 },
172
+ { .fni4 = gen_helper_neon_tst_u16,
173
+ .fniv = gen_cmtst_vec,
174
+ .opt_opc = vecop_list,
175
+ .vece = MO_16 },
176
+ { .fni4 = gen_cmtst_i32,
177
+ .fniv = gen_cmtst_vec,
178
+ .opt_opc = vecop_list,
179
+ .vece = MO_32 },
180
+ { .fni8 = gen_cmtst_i64,
181
+ .fniv = gen_cmtst_vec,
182
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
183
+ .opt_opc = vecop_list,
184
+ .vece = MO_64 },
185
+ };
186
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
187
+}
188
189
void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
190
{
191
@@ -XXX,XX +XXX,XX @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
192
tcg_temp_free_vec(rsh);
193
}
194
195
-static const TCGOpcode ushl_list[] = {
196
- INDEX_op_neg_vec, INDEX_op_shlv_vec,
197
- INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
198
-};
199
-
200
-const GVecGen3 ushl_op[4] = {
201
- { .fniv = gen_ushl_vec,
202
- .fno = gen_helper_gvec_ushl_b,
203
- .opt_opc = ushl_list,
204
- .vece = MO_8 },
205
- { .fniv = gen_ushl_vec,
206
- .fno = gen_helper_gvec_ushl_h,
207
- .opt_opc = ushl_list,
208
- .vece = MO_16 },
209
- { .fni4 = gen_ushl_i32,
210
- .fniv = gen_ushl_vec,
211
- .opt_opc = ushl_list,
212
- .vece = MO_32 },
213
- { .fni8 = gen_ushl_i64,
214
- .fniv = gen_ushl_vec,
215
- .opt_opc = ushl_list,
216
- .vece = MO_64 },
217
-};
218
+void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
219
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
220
+{
221
+ static const TCGOpcode vecop_list[] = {
222
+ INDEX_op_neg_vec, INDEX_op_shlv_vec,
223
+ INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
224
+ };
225
+ static const GVecGen3 ops[4] = {
226
+ { .fniv = gen_ushl_vec,
227
+ .fno = gen_helper_gvec_ushl_b,
228
+ .opt_opc = vecop_list,
229
+ .vece = MO_8 },
230
+ { .fniv = gen_ushl_vec,
231
+ .fno = gen_helper_gvec_ushl_h,
232
+ .opt_opc = vecop_list,
233
+ .vece = MO_16 },
234
+ { .fni4 = gen_ushl_i32,
235
+ .fniv = gen_ushl_vec,
236
+ .opt_opc = vecop_list,
237
+ .vece = MO_32 },
238
+ { .fni8 = gen_ushl_i64,
239
+ .fniv = gen_ushl_vec,
240
+ .opt_opc = vecop_list,
241
+ .vece = MO_64 },
242
+ };
243
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
244
+}
245
246
void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
247
{
248
@@ -XXX,XX +XXX,XX @@ static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
249
tcg_temp_free_vec(tmp);
250
}
251
252
-static const TCGOpcode sshl_list[] = {
253
- INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
254
- INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
255
-};
256
-
257
-const GVecGen3 sshl_op[4] = {
258
- { .fniv = gen_sshl_vec,
259
- .fno = gen_helper_gvec_sshl_b,
260
- .opt_opc = sshl_list,
261
- .vece = MO_8 },
262
- { .fniv = gen_sshl_vec,
263
- .fno = gen_helper_gvec_sshl_h,
264
- .opt_opc = sshl_list,
265
- .vece = MO_16 },
266
- { .fni4 = gen_sshl_i32,
267
- .fniv = gen_sshl_vec,
268
- .opt_opc = sshl_list,
269
- .vece = MO_32 },
270
- { .fni8 = gen_sshl_i64,
271
- .fniv = gen_sshl_vec,
272
- .opt_opc = sshl_list,
273
- .vece = MO_64 },
274
-};
275
+void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
276
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
277
+{
278
+ static const TCGOpcode vecop_list[] = {
279
+ INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
280
+ INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
281
+ };
282
+ static const GVecGen3 ops[4] = {
283
+ { .fniv = gen_sshl_vec,
284
+ .fno = gen_helper_gvec_sshl_b,
285
+ .opt_opc = vecop_list,
286
+ .vece = MO_8 },
287
+ { .fniv = gen_sshl_vec,
288
+ .fno = gen_helper_gvec_sshl_h,
289
+ .opt_opc = vecop_list,
290
+ .vece = MO_16 },
291
+ { .fni4 = gen_sshl_i32,
292
+ .fniv = gen_sshl_vec,
293
+ .opt_opc = vecop_list,
294
+ .vece = MO_32 },
295
+ { .fni8 = gen_sshl_i64,
296
+ .fniv = gen_sshl_vec,
297
+ .opt_opc = vecop_list,
298
+ .vece = MO_64 },
299
+ };
300
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
301
+}
302
303
static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
304
TCGv_vec a, TCGv_vec b)
305
--
445
--
306
2.20.1
446
2.25.1
307
308
diff view generated by jsdifflib
1
From: Dongjiu Geng <gengdongjiu@huawei.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
kvm_arch_on_sigbus_vcpu() error injection uses source_id as
3
We cannot reuse the SVE functions for LD[1-4] and ST[1-4],
4
index in etc/hardware_errors to find out Error Status Data
4
because those functions accept only a Zreg register number.
5
Block entry corresponding to error source. So supported source_id
5
For SME, we want to pass a pointer into ZA storage.
6
values should be assigned here and not be changed afterwards to
7
make sure that guest will write error into expected Error Status
8
Data Block.
9
6
10
Before QEMU writes a new error to ACPI table, it will check whether
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
previous error has been acknowledged. If not acknowledged, the new
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
errors will be ignored and not be recorded. For the errors section
9
Message-id: 20220708151540.18136-21-richard.henderson@linaro.org
13
type, QEMU simulate it to memory section error.
14
15
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
16
Signed-off-by: Xiang Zheng <zhengxiang9@huawei.com>
17
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
18
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
19
Message-id: 20200512030609.19593-9-gengdongjiu@huawei.com
20
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
21
---
11
---
22
include/hw/acpi/ghes.h | 1 +
12
target/arm/helper-sme.h | 82 +++++
23
hw/acpi/ghes.c | 219 +++++++++++++++++++++++++++++++++++++++++
13
target/arm/sme.decode | 9 +
24
2 files changed, 220 insertions(+)
14
target/arm/sme_helper.c | 595 +++++++++++++++++++++++++++++++++++++
15
target/arm/translate-sme.c | 70 +++++
16
4 files changed, 756 insertions(+)
25
17
26
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
18
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
27
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
28
--- a/include/hw/acpi/ghes.h
20
--- a/target/arm/helper-sme.h
29
+++ b/include/hw/acpi/ghes.h
21
+++ b/target/arm/helper-sme.h
30
@@ -XXX,XX +XXX,XX @@ void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker);
22
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
31
void acpi_build_hest(GArray *table_data, BIOSLinker *linker);
23
DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
32
void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
24
DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
33
GArray *hardware_errors);
25
DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
34
+int acpi_ghes_record_errors(uint8_t notify, uint64_t error_physical_addr);
26
+
35
#endif
27
+DEF_HELPER_FLAGS_5(sme_ld1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
36
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
28
+DEF_HELPER_FLAGS_5(sme_ld1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
29
+DEF_HELPER_FLAGS_5(sme_ld1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
30
+DEF_HELPER_FLAGS_5(sme_ld1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
31
+
32
+DEF_HELPER_FLAGS_5(sme_ld1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
33
+DEF_HELPER_FLAGS_5(sme_ld1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
34
+DEF_HELPER_FLAGS_5(sme_ld1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
35
+DEF_HELPER_FLAGS_5(sme_ld1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
36
+DEF_HELPER_FLAGS_5(sme_ld1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
37
+DEF_HELPER_FLAGS_5(sme_ld1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
38
+DEF_HELPER_FLAGS_5(sme_ld1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
39
+DEF_HELPER_FLAGS_5(sme_ld1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
40
+
41
+DEF_HELPER_FLAGS_5(sme_ld1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
42
+DEF_HELPER_FLAGS_5(sme_ld1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
43
+DEF_HELPER_FLAGS_5(sme_ld1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
44
+DEF_HELPER_FLAGS_5(sme_ld1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
45
+DEF_HELPER_FLAGS_5(sme_ld1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
46
+DEF_HELPER_FLAGS_5(sme_ld1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
47
+DEF_HELPER_FLAGS_5(sme_ld1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
48
+DEF_HELPER_FLAGS_5(sme_ld1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
49
+
50
+DEF_HELPER_FLAGS_5(sme_ld1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
51
+DEF_HELPER_FLAGS_5(sme_ld1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
52
+DEF_HELPER_FLAGS_5(sme_ld1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
53
+DEF_HELPER_FLAGS_5(sme_ld1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
54
+DEF_HELPER_FLAGS_5(sme_ld1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
55
+DEF_HELPER_FLAGS_5(sme_ld1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
56
+DEF_HELPER_FLAGS_5(sme_ld1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
57
+DEF_HELPER_FLAGS_5(sme_ld1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
58
+
59
+DEF_HELPER_FLAGS_5(sme_ld1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
60
+DEF_HELPER_FLAGS_5(sme_ld1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
61
+DEF_HELPER_FLAGS_5(sme_ld1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
62
+DEF_HELPER_FLAGS_5(sme_ld1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
63
+DEF_HELPER_FLAGS_5(sme_ld1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
64
+DEF_HELPER_FLAGS_5(sme_ld1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
65
+DEF_HELPER_FLAGS_5(sme_ld1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
66
+DEF_HELPER_FLAGS_5(sme_ld1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
67
+
68
+DEF_HELPER_FLAGS_5(sme_st1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
69
+DEF_HELPER_FLAGS_5(sme_st1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
70
+DEF_HELPER_FLAGS_5(sme_st1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
71
+DEF_HELPER_FLAGS_5(sme_st1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
72
+
73
+DEF_HELPER_FLAGS_5(sme_st1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
74
+DEF_HELPER_FLAGS_5(sme_st1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
75
+DEF_HELPER_FLAGS_5(sme_st1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
76
+DEF_HELPER_FLAGS_5(sme_st1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
77
+DEF_HELPER_FLAGS_5(sme_st1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
78
+DEF_HELPER_FLAGS_5(sme_st1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
79
+DEF_HELPER_FLAGS_5(sme_st1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
80
+DEF_HELPER_FLAGS_5(sme_st1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
81
+
82
+DEF_HELPER_FLAGS_5(sme_st1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
83
+DEF_HELPER_FLAGS_5(sme_st1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
84
+DEF_HELPER_FLAGS_5(sme_st1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
85
+DEF_HELPER_FLAGS_5(sme_st1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
86
+DEF_HELPER_FLAGS_5(sme_st1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
87
+DEF_HELPER_FLAGS_5(sme_st1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
88
+DEF_HELPER_FLAGS_5(sme_st1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
89
+DEF_HELPER_FLAGS_5(sme_st1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
90
+
91
+DEF_HELPER_FLAGS_5(sme_st1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
92
+DEF_HELPER_FLAGS_5(sme_st1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
93
+DEF_HELPER_FLAGS_5(sme_st1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
94
+DEF_HELPER_FLAGS_5(sme_st1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
95
+DEF_HELPER_FLAGS_5(sme_st1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
96
+DEF_HELPER_FLAGS_5(sme_st1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
97
+DEF_HELPER_FLAGS_5(sme_st1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
98
+DEF_HELPER_FLAGS_5(sme_st1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
99
+
100
+DEF_HELPER_FLAGS_5(sme_st1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
101
+DEF_HELPER_FLAGS_5(sme_st1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
102
+DEF_HELPER_FLAGS_5(sme_st1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
103
+DEF_HELPER_FLAGS_5(sme_st1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
104
+DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
105
+DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
106
+DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
107
+DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
108
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
37
index XXXXXXX..XXXXXXX 100644
109
index XXXXXXX..XXXXXXX 100644
38
--- a/hw/acpi/ghes.c
110
--- a/target/arm/sme.decode
39
+++ b/hw/acpi/ghes.c
111
+++ b/target/arm/sme.decode
112
@@ -XXX,XX +XXX,XX @@ MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \
113
&mova to_vec=1 rs=%mova_rs
114
MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \
115
&mova to_vec=1 rs=%mova_rs esz=4
116
+
117
+### SME Memory
118
+
119
+&ldst esz rs pg rn rm za_imm v:bool st:bool
120
+
121
+LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
122
+ &ldst rs=%mova_rs
123
+LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
124
+ &ldst esz=4 rs=%mova_rs
125
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
126
index XXXXXXX..XXXXXXX 100644
127
--- a/target/arm/sme_helper.c
128
+++ b/target/arm/sme_helper.c
40
@@ -XXX,XX +XXX,XX @@
129
@@ -XXX,XX +XXX,XX @@
41
#include "qemu/error-report.h"
130
42
#include "hw/acpi/generic_event_device.h"
131
#include "qemu/osdep.h"
43
#include "hw/nvram/fw_cfg.h"
132
#include "cpu.h"
44
+#include "qemu/uuid.h"
133
+#include "internals.h"
45
134
#include "tcg/tcg-gvec-desc.h"
46
#define ACPI_GHES_ERRORS_FW_CFG_FILE "etc/hardware_errors"
135
#include "exec/helper-proto.h"
47
#define ACPI_GHES_DATA_ADDR_FW_CFG_FILE "etc/hardware_errors_addr"
136
+#include "exec/cpu_ldst.h"
48
@@ -XXX,XX +XXX,XX @@
137
+#include "exec/exec-all.h"
49
/* Address offset in Generic Address Structure(GAS) */
138
#include "qemu/int128.h"
50
#define GAS_ADDR_OFFSET 4
139
#include "vec_internal.h"
51
140
+#include "sve_ldst_internal.h"
141
142
/* ResetSVEState */
143
void arm_reset_sve_state(CPUARMState *env)
144
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc)
145
}
146
147
#undef DO_MOVA_Z
148
+
52
+/*
149
+/*
53
+ * The total size of Generic Error Data Entry
150
+ * Clear elements in a tile slice comprising len bytes.
54
+ * ACPI 6.1/6.2: 18.3.2.7.1 Generic Error Data,
55
+ * Table 18-343 Generic Error Data Entry
56
+ */
151
+ */
57
+#define ACPI_GHES_DATA_LENGTH 72
152
+
58
+
153
+typedef void ClearFn(void *ptr, size_t off, size_t len);
59
+/* The memory section CPER size, UEFI 2.6: N.2.5 Memory Error Section */
154
+
60
+#define ACPI_GHES_MEM_CPER_LENGTH 80
155
+static void clear_horizontal(void *ptr, size_t off, size_t len)
61
+
156
+{
62
+/* Masks for block_status flags */
157
+ memset(ptr + off, 0, len);
63
+#define ACPI_GEBS_UNCORRECTABLE 1
158
+}
159
+
160
+static void clear_vertical_b(void *vptr, size_t off, size_t len)
161
+{
162
+ for (size_t i = 0; i < len; ++i) {
163
+ *(uint8_t *)(vptr + tile_vslice_offset(i + off)) = 0;
164
+ }
165
+}
166
+
167
+static void clear_vertical_h(void *vptr, size_t off, size_t len)
168
+{
169
+ for (size_t i = 0; i < len; i += 2) {
170
+ *(uint16_t *)(vptr + tile_vslice_offset(i + off)) = 0;
171
+ }
172
+}
173
+
174
+static void clear_vertical_s(void *vptr, size_t off, size_t len)
175
+{
176
+ for (size_t i = 0; i < len; i += 4) {
177
+ *(uint32_t *)(vptr + tile_vslice_offset(i + off)) = 0;
178
+ }
179
+}
180
+
181
+static void clear_vertical_d(void *vptr, size_t off, size_t len)
182
+{
183
+ for (size_t i = 0; i < len; i += 8) {
184
+ *(uint64_t *)(vptr + tile_vslice_offset(i + off)) = 0;
185
+ }
186
+}
187
+
188
+static void clear_vertical_q(void *vptr, size_t off, size_t len)
189
+{
190
+ for (size_t i = 0; i < len; i += 16) {
191
+ memset(vptr + tile_vslice_offset(i + off), 0, 16);
192
+ }
193
+}
64
+
194
+
65
+/*
195
+/*
66
+ * Total size for Generic Error Status Block except Generic Error Data Entries
196
+ * Copy elements from an array into a tile slice comprising len bytes.
67
+ * ACPI 6.2: 18.3.2.7.1 Generic Error Data,
68
+ * Table 18-380 Generic Error Status Block
69
+ */
197
+ */
70
+#define ACPI_GHES_GESB_SIZE 20
198
+
199
+typedef void CopyFn(void *dst, const void *src, size_t len);
200
+
201
+static void copy_horizontal(void *dst, const void *src, size_t len)
202
+{
203
+ memcpy(dst, src, len);
204
+}
205
+
206
+static void copy_vertical_b(void *vdst, const void *vsrc, size_t len)
207
+{
208
+ const uint8_t *src = vsrc;
209
+ uint8_t *dst = vdst;
210
+ size_t i;
211
+
212
+ for (i = 0; i < len; ++i) {
213
+ dst[tile_vslice_index(i)] = src[i];
214
+ }
215
+}
216
+
217
+static void copy_vertical_h(void *vdst, const void *vsrc, size_t len)
218
+{
219
+ const uint16_t *src = vsrc;
220
+ uint16_t *dst = vdst;
221
+ size_t i;
222
+
223
+ for (i = 0; i < len / 2; ++i) {
224
+ dst[tile_vslice_index(i)] = src[i];
225
+ }
226
+}
227
+
228
+static void copy_vertical_s(void *vdst, const void *vsrc, size_t len)
229
+{
230
+ const uint32_t *src = vsrc;
231
+ uint32_t *dst = vdst;
232
+ size_t i;
233
+
234
+ for (i = 0; i < len / 4; ++i) {
235
+ dst[tile_vslice_index(i)] = src[i];
236
+ }
237
+}
238
+
239
+static void copy_vertical_d(void *vdst, const void *vsrc, size_t len)
240
+{
241
+ const uint64_t *src = vsrc;
242
+ uint64_t *dst = vdst;
243
+ size_t i;
244
+
245
+ for (i = 0; i < len / 8; ++i) {
246
+ dst[tile_vslice_index(i)] = src[i];
247
+ }
248
+}
249
+
250
+static void copy_vertical_q(void *vdst, const void *vsrc, size_t len)
251
+{
252
+ for (size_t i = 0; i < len; i += 16) {
253
+ memcpy(vdst + tile_vslice_offset(i), vsrc + i, 16);
254
+ }
255
+}
71
+
256
+
72
+/*
257
+/*
73
+ * Values for error_severity field
258
+ * Host and TLB primitives for vertical tile slice addressing.
74
+ */
259
+ */
75
+enum AcpiGenericErrorSeverity {
260
+
76
+ ACPI_CPER_SEV_RECOVERABLE = 0,
261
+#define DO_LD(NAME, TYPE, HOST, TLB) \
77
+ ACPI_CPER_SEV_FATAL = 1,
262
+static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \
78
+ ACPI_CPER_SEV_CORRECTED = 2,
263
+{ \
79
+ ACPI_CPER_SEV_NONE = 3,
264
+ TYPE val = HOST(host); \
80
+};
265
+ *(TYPE *)(za + tile_vslice_offset(off)) = val; \
81
+
266
+} \
82
/*
267
+static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \
83
* Hardware Error Notification
268
+ intptr_t off, target_ulong addr, uintptr_t ra) \
84
* ACPI 4.0: 17.3.2.7 Hardware Error Notification
269
+{ \
85
@@ -XXX,XX +XXX,XX @@ static void build_ghes_hw_error_notification(GArray *table, const uint8_t type)
270
+ TYPE val = TLB(env, useronly_clean_ptr(addr), ra); \
86
build_append_int_noprefix(table, 0, 4);
271
+ *(TYPE *)(za + tile_vslice_offset(off)) = val; \
272
+}
273
+
274
+#define DO_ST(NAME, TYPE, HOST, TLB) \
275
+static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \
276
+{ \
277
+ TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \
278
+ HOST(host, val); \
279
+} \
280
+static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \
281
+ intptr_t off, target_ulong addr, uintptr_t ra) \
282
+{ \
283
+ TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \
284
+ TLB(env, useronly_clean_ptr(addr), val, ra); \
285
+}
286
+
287
+/*
288
+ * The ARMVectorReg elements are stored in host-endian 64-bit units.
289
+ * For 128-bit quantities, the sequence defined by the Elem[] pseudocode
290
+ * corresponds to storing the two 64-bit pieces in little-endian order.
291
+ */
292
+#define DO_LDQ(HNAME, VNAME, BE, HOST, TLB) \
293
+static inline void HNAME##_host(void *za, intptr_t off, void *host) \
294
+{ \
295
+ uint64_t val0 = HOST(host), val1 = HOST(host + 8); \
296
+ uint64_t *ptr = za + off; \
297
+ ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
298
+} \
299
+static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
300
+{ \
301
+ HNAME##_host(za, tile_vslice_offset(off), host); \
302
+} \
303
+static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
304
+ target_ulong addr, uintptr_t ra) \
305
+{ \
306
+ uint64_t val0 = TLB(env, useronly_clean_ptr(addr), ra); \
307
+ uint64_t val1 = TLB(env, useronly_clean_ptr(addr + 8), ra); \
308
+ uint64_t *ptr = za + off; \
309
+ ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
310
+} \
311
+static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
312
+ target_ulong addr, uintptr_t ra) \
313
+{ \
314
+ HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \
315
+}
316
+
317
+#define DO_STQ(HNAME, VNAME, BE, HOST, TLB) \
318
+static inline void HNAME##_host(void *za, intptr_t off, void *host) \
319
+{ \
320
+ uint64_t *ptr = za + off; \
321
+ HOST(host, ptr[BE]); \
322
+ HOST(host + 1, ptr[!BE]); \
323
+} \
324
+static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
325
+{ \
326
+ HNAME##_host(za, tile_vslice_offset(off), host); \
327
+} \
328
+static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
329
+ target_ulong addr, uintptr_t ra) \
330
+{ \
331
+ uint64_t *ptr = za + off; \
332
+ TLB(env, useronly_clean_ptr(addr), ptr[BE], ra); \
333
+ TLB(env, useronly_clean_ptr(addr + 8), ptr[!BE], ra); \
334
+} \
335
+static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
336
+ target_ulong addr, uintptr_t ra) \
337
+{ \
338
+ HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \
339
+}
340
+
341
+DO_LD(ld1b, uint8_t, ldub_p, cpu_ldub_data_ra)
342
+DO_LD(ld1h_be, uint16_t, lduw_be_p, cpu_lduw_be_data_ra)
343
+DO_LD(ld1h_le, uint16_t, lduw_le_p, cpu_lduw_le_data_ra)
344
+DO_LD(ld1s_be, uint32_t, ldl_be_p, cpu_ldl_be_data_ra)
345
+DO_LD(ld1s_le, uint32_t, ldl_le_p, cpu_ldl_le_data_ra)
346
+DO_LD(ld1d_be, uint64_t, ldq_be_p, cpu_ldq_be_data_ra)
347
+DO_LD(ld1d_le, uint64_t, ldq_le_p, cpu_ldq_le_data_ra)
348
+
349
+DO_LDQ(sve_ld1qq_be, sme_ld1q_be, 1, ldq_be_p, cpu_ldq_be_data_ra)
350
+DO_LDQ(sve_ld1qq_le, sme_ld1q_le, 0, ldq_le_p, cpu_ldq_le_data_ra)
351
+
352
+DO_ST(st1b, uint8_t, stb_p, cpu_stb_data_ra)
353
+DO_ST(st1h_be, uint16_t, stw_be_p, cpu_stw_be_data_ra)
354
+DO_ST(st1h_le, uint16_t, stw_le_p, cpu_stw_le_data_ra)
355
+DO_ST(st1s_be, uint32_t, stl_be_p, cpu_stl_be_data_ra)
356
+DO_ST(st1s_le, uint32_t, stl_le_p, cpu_stl_le_data_ra)
357
+DO_ST(st1d_be, uint64_t, stq_be_p, cpu_stq_be_data_ra)
358
+DO_ST(st1d_le, uint64_t, stq_le_p, cpu_stq_le_data_ra)
359
+
360
+DO_STQ(sve_st1qq_be, sme_st1q_be, 1, stq_be_p, cpu_stq_be_data_ra)
361
+DO_STQ(sve_st1qq_le, sme_st1q_le, 0, stq_le_p, cpu_stq_le_data_ra)
362
+
363
+#undef DO_LD
364
+#undef DO_ST
365
+#undef DO_LDQ
366
+#undef DO_STQ
367
+
368
+/*
369
+ * Common helper for all contiguous predicated loads.
370
+ */
371
+
372
+static inline QEMU_ALWAYS_INLINE
373
+void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
374
+ const target_ulong addr, uint32_t desc, const uintptr_t ra,
375
+ const int esz, uint32_t mtedesc, bool vertical,
376
+ sve_ldst1_host_fn *host_fn,
377
+ sve_ldst1_tlb_fn *tlb_fn,
378
+ ClearFn *clr_fn,
379
+ CopyFn *cpy_fn)
380
+{
381
+ const intptr_t reg_max = simd_oprsz(desc);
382
+ const intptr_t esize = 1 << esz;
383
+ intptr_t reg_off, reg_last;
384
+ SVEContLdSt info;
385
+ void *host;
386
+ int flags;
387
+
388
+ /* Find the active elements. */
389
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) {
390
+ /* The entire predicate was false; no load occurs. */
391
+ clr_fn(za, 0, reg_max);
392
+ return;
393
+ }
394
+
395
+ /* Probe the page(s). Exit with exception for any invalid page. */
396
+ sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_LOAD, ra);
397
+
398
+ /* Handle watchpoints for all active elements. */
399
+ sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize,
400
+ BP_MEM_READ, ra);
401
+
402
+ /*
403
+ * Handle mte checks for all active elements.
404
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
405
+ */
406
+ if (mtedesc) {
407
+ sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize,
408
+ mtedesc, ra);
409
+ }
410
+
411
+ flags = info.page[0].flags | info.page[1].flags;
412
+ if (unlikely(flags != 0)) {
413
+#ifdef CONFIG_USER_ONLY
414
+ g_assert_not_reached();
415
+#else
416
+ /*
417
+ * At least one page includes MMIO.
418
+ * Any bus operation can fail with cpu_transaction_failed,
419
+ * which for ARM will raise SyncExternal. Perform the load
420
+ * into scratch memory to preserve register state until the end.
421
+ */
422
+ ARMVectorReg scratch = { };
423
+
424
+ reg_off = info.reg_off_first[0];
425
+ reg_last = info.reg_off_last[1];
426
+ if (reg_last < 0) {
427
+ reg_last = info.reg_off_split;
428
+ if (reg_last < 0) {
429
+ reg_last = info.reg_off_last[0];
430
+ }
431
+ }
432
+
433
+ do {
434
+ uint64_t pg = vg[reg_off >> 6];
435
+ do {
436
+ if ((pg >> (reg_off & 63)) & 1) {
437
+ tlb_fn(env, &scratch, reg_off, addr + reg_off, ra);
438
+ }
439
+ reg_off += esize;
440
+ } while (reg_off & 63);
441
+ } while (reg_off <= reg_last);
442
+
443
+ cpy_fn(za, &scratch, reg_max);
444
+ return;
445
+#endif
446
+ }
447
+
448
+ /* The entire operation is in RAM, on valid pages. */
449
+
450
+ reg_off = info.reg_off_first[0];
451
+ reg_last = info.reg_off_last[0];
452
+ host = info.page[0].host;
453
+
454
+ if (!vertical) {
455
+ memset(za, 0, reg_max);
456
+ } else if (reg_off) {
457
+ clr_fn(za, 0, reg_off);
458
+ }
459
+
460
+ while (reg_off <= reg_last) {
461
+ uint64_t pg = vg[reg_off >> 6];
462
+ do {
463
+ if ((pg >> (reg_off & 63)) & 1) {
464
+ host_fn(za, reg_off, host + reg_off);
465
+ } else if (vertical) {
466
+ clr_fn(za, reg_off, esize);
467
+ }
468
+ reg_off += esize;
469
+ } while (reg_off <= reg_last && (reg_off & 63));
470
+ }
471
+
472
+ /*
473
+ * Use the slow path to manage the cross-page misalignment.
474
+ * But we know this is RAM and cannot trap.
475
+ */
476
+ reg_off = info.reg_off_split;
477
+ if (unlikely(reg_off >= 0)) {
478
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
479
+ }
480
+
481
+ reg_off = info.reg_off_first[1];
482
+ if (unlikely(reg_off >= 0)) {
483
+ reg_last = info.reg_off_last[1];
484
+ host = info.page[1].host;
485
+
486
+ do {
487
+ uint64_t pg = vg[reg_off >> 6];
488
+ do {
489
+ if ((pg >> (reg_off & 63)) & 1) {
490
+ host_fn(za, reg_off, host + reg_off);
491
+ } else if (vertical) {
492
+ clr_fn(za, reg_off, esize);
493
+ }
494
+ reg_off += esize;
495
+ } while (reg_off & 63);
496
+ } while (reg_off <= reg_last);
497
+ }
498
+}
499
+
500
+static inline QEMU_ALWAYS_INLINE
501
+void sme_ld1_mte(CPUARMState *env, void *za, uint64_t *vg,
502
+ target_ulong addr, uint32_t desc, uintptr_t ra,
503
+ const int esz, bool vertical,
504
+ sve_ldst1_host_fn *host_fn,
505
+ sve_ldst1_tlb_fn *tlb_fn,
506
+ ClearFn *clr_fn,
507
+ CopyFn *cpy_fn)
508
+{
509
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
510
+ int bit55 = extract64(addr, 55, 1);
511
+
512
+ /* Remove mtedesc from the normal sve descriptor. */
513
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
514
+
515
+ /* Perform gross MTE suppression early. */
516
+ if (!tbi_check(desc, bit55) ||
517
+ tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
518
+ mtedesc = 0;
519
+ }
520
+
521
+ sme_ld1(env, za, vg, addr, desc, ra, esz, mtedesc, vertical,
522
+ host_fn, tlb_fn, clr_fn, cpy_fn);
523
+}
524
+
525
+#define DO_LD(L, END, ESZ) \
526
+void HELPER(sme_ld1##L##END##_h)(CPUARMState *env, void *za, void *vg, \
527
+ target_ulong addr, uint32_t desc) \
528
+{ \
529
+ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \
530
+ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \
531
+ clear_horizontal, copy_horizontal); \
532
+} \
533
+void HELPER(sme_ld1##L##END##_v)(CPUARMState *env, void *za, void *vg, \
534
+ target_ulong addr, uint32_t desc) \
535
+{ \
536
+ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \
537
+ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \
538
+ clear_vertical_##L, copy_vertical_##L); \
539
+} \
540
+void HELPER(sme_ld1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \
541
+ target_ulong addr, uint32_t desc) \
542
+{ \
543
+ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \
544
+ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \
545
+ clear_horizontal, copy_horizontal); \
546
+} \
547
+void HELPER(sme_ld1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \
548
+ target_ulong addr, uint32_t desc) \
549
+{ \
550
+ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \
551
+ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \
552
+ clear_vertical_##L, copy_vertical_##L); \
553
+}
554
+
555
+DO_LD(b, , MO_8)
556
+DO_LD(h, _be, MO_16)
557
+DO_LD(h, _le, MO_16)
558
+DO_LD(s, _be, MO_32)
559
+DO_LD(s, _le, MO_32)
560
+DO_LD(d, _be, MO_64)
561
+DO_LD(d, _le, MO_64)
562
+DO_LD(q, _be, MO_128)
563
+DO_LD(q, _le, MO_128)
564
+
565
+#undef DO_LD
566
+
567
+/*
568
+ * Common helper for all contiguous predicated stores.
569
+ */
570
+
571
+static inline QEMU_ALWAYS_INLINE
572
+void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
573
+ const target_ulong addr, uint32_t desc, const uintptr_t ra,
574
+ const int esz, uint32_t mtedesc, bool vertical,
575
+ sve_ldst1_host_fn *host_fn,
576
+ sve_ldst1_tlb_fn *tlb_fn)
577
+{
578
+ const intptr_t reg_max = simd_oprsz(desc);
579
+ const intptr_t esize = 1 << esz;
580
+ intptr_t reg_off, reg_last;
581
+ SVEContLdSt info;
582
+ void *host;
583
+ int flags;
584
+
585
+ /* Find the active elements. */
586
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) {
587
+ /* The entire predicate was false; no store occurs. */
588
+ return;
589
+ }
590
+
591
+ /* Probe the page(s). Exit with exception for any invalid page. */
592
+ sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_STORE, ra);
593
+
594
+ /* Handle watchpoints for all active elements. */
595
+ sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize,
596
+ BP_MEM_WRITE, ra);
597
+
598
+ /*
599
+ * Handle mte checks for all active elements.
600
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
601
+ */
602
+ if (mtedesc) {
603
+ sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize,
604
+ mtedesc, ra);
605
+ }
606
+
607
+ flags = info.page[0].flags | info.page[1].flags;
608
+ if (unlikely(flags != 0)) {
609
+#ifdef CONFIG_USER_ONLY
610
+ g_assert_not_reached();
611
+#else
612
+ /*
613
+ * At least one page includes MMIO.
614
+ * Any bus operation can fail with cpu_transaction_failed,
615
+ * which for ARM will raise SyncExternal. We cannot avoid
616
+ * this fault and will leave with the store incomplete.
617
+ */
618
+ reg_off = info.reg_off_first[0];
619
+ reg_last = info.reg_off_last[1];
620
+ if (reg_last < 0) {
621
+ reg_last = info.reg_off_split;
622
+ if (reg_last < 0) {
623
+ reg_last = info.reg_off_last[0];
624
+ }
625
+ }
626
+
627
+ do {
628
+ uint64_t pg = vg[reg_off >> 6];
629
+ do {
630
+ if ((pg >> (reg_off & 63)) & 1) {
631
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
632
+ }
633
+ reg_off += esize;
634
+ } while (reg_off & 63);
635
+ } while (reg_off <= reg_last);
636
+ return;
637
+#endif
638
+ }
639
+
640
+ reg_off = info.reg_off_first[0];
641
+ reg_last = info.reg_off_last[0];
642
+ host = info.page[0].host;
643
+
644
+ while (reg_off <= reg_last) {
645
+ uint64_t pg = vg[reg_off >> 6];
646
+ do {
647
+ if ((pg >> (reg_off & 63)) & 1) {
648
+ host_fn(za, reg_off, host + reg_off);
649
+ }
650
+ reg_off += 1 << esz;
651
+ } while (reg_off <= reg_last && (reg_off & 63));
652
+ }
653
+
654
+ /*
655
+ * Use the slow path to manage the cross-page misalignment.
656
+ * But we know this is RAM and cannot trap.
657
+ */
658
+ reg_off = info.reg_off_split;
659
+ if (unlikely(reg_off >= 0)) {
660
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
661
+ }
662
+
663
+ reg_off = info.reg_off_first[1];
664
+ if (unlikely(reg_off >= 0)) {
665
+ reg_last = info.reg_off_last[1];
666
+ host = info.page[1].host;
667
+
668
+ do {
669
+ uint64_t pg = vg[reg_off >> 6];
670
+ do {
671
+ if ((pg >> (reg_off & 63)) & 1) {
672
+ host_fn(za, reg_off, host + reg_off);
673
+ }
674
+ reg_off += 1 << esz;
675
+ } while (reg_off & 63);
676
+ } while (reg_off <= reg_last);
677
+ }
678
+}
679
+
680
+static inline QEMU_ALWAYS_INLINE
681
+void sme_st1_mte(CPUARMState *env, void *za, uint64_t *vg, target_ulong addr,
682
+ uint32_t desc, uintptr_t ra, int esz, bool vertical,
683
+ sve_ldst1_host_fn *host_fn,
684
+ sve_ldst1_tlb_fn *tlb_fn)
685
+{
686
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
687
+ int bit55 = extract64(addr, 55, 1);
688
+
689
+ /* Remove mtedesc from the normal sve descriptor. */
690
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
691
+
692
+ /* Perform gross MTE suppression early. */
693
+ if (!tbi_check(desc, bit55) ||
694
+ tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
695
+ mtedesc = 0;
696
+ }
697
+
698
+ sme_st1(env, za, vg, addr, desc, ra, esz, mtedesc,
699
+ vertical, host_fn, tlb_fn);
700
+}
701
+
702
+#define DO_ST(L, END, ESZ) \
703
+void HELPER(sme_st1##L##END##_h)(CPUARMState *env, void *za, void *vg, \
704
+ target_ulong addr, uint32_t desc) \
705
+{ \
706
+ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \
707
+ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \
708
+} \
709
+void HELPER(sme_st1##L##END##_v)(CPUARMState *env, void *za, void *vg, \
710
+ target_ulong addr, uint32_t desc) \
711
+{ \
712
+ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \
713
+ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \
714
+} \
715
+void HELPER(sme_st1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \
716
+ target_ulong addr, uint32_t desc) \
717
+{ \
718
+ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \
719
+ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \
720
+} \
721
+void HELPER(sme_st1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \
722
+ target_ulong addr, uint32_t desc) \
723
+{ \
724
+ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \
725
+ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \
726
+}
727
+
728
+DO_ST(b, , MO_8)
729
+DO_ST(h, _be, MO_16)
730
+DO_ST(h, _le, MO_16)
731
+DO_ST(s, _be, MO_32)
732
+DO_ST(s, _le, MO_32)
733
+DO_ST(d, _be, MO_64)
734
+DO_ST(d, _le, MO_64)
735
+DO_ST(q, _be, MO_128)
736
+DO_ST(q, _le, MO_128)
737
+
738
+#undef DO_ST
739
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
740
index XXXXXXX..XXXXXXX 100644
741
--- a/target/arm/translate-sme.c
742
+++ b/target/arm/translate-sme.c
743
@@ -XXX,XX +XXX,XX @@ static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
744
745
return true;
87
}
746
}
88
747
+
89
+/*
748
+static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
90
+ * Generic Error Data Entry
749
+{
91
+ * ACPI 6.1: 18.3.2.7.1 Generic Error Data
750
+ typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i32);
92
+ */
751
+
93
+static void acpi_ghes_generic_error_data(GArray *table,
94
+ const uint8_t *section_type, uint32_t error_severity,
95
+ uint8_t validation_bits, uint8_t flags,
96
+ uint32_t error_data_length, QemuUUID fru_id,
97
+ uint64_t time_stamp)
98
+{
99
+ const uint8_t fru_text[20] = {0};
100
+
101
+ /* Section Type */
102
+ g_array_append_vals(table, section_type, 16);
103
+
104
+ /* Error Severity */
105
+ build_append_int_noprefix(table, error_severity, 4);
106
+ /* Revision */
107
+ build_append_int_noprefix(table, 0x300, 2);
108
+ /* Validation Bits */
109
+ build_append_int_noprefix(table, validation_bits, 1);
110
+ /* Flags */
111
+ build_append_int_noprefix(table, flags, 1);
112
+ /* Error Data Length */
113
+ build_append_int_noprefix(table, error_data_length, 4);
114
+
115
+ /* FRU Id */
116
+ g_array_append_vals(table, fru_id.data, ARRAY_SIZE(fru_id.data));
117
+
118
+ /* FRU Text */
119
+ g_array_append_vals(table, fru_text, sizeof(fru_text));
120
+
121
+ /* Timestamp */
122
+ build_append_int_noprefix(table, time_stamp, 8);
123
+}
124
+
125
+/*
126
+ * Generic Error Status Block
127
+ * ACPI 6.1: 18.3.2.7.1 Generic Error Data
128
+ */
129
+static void acpi_ghes_generic_error_status(GArray *table, uint32_t block_status,
130
+ uint32_t raw_data_offset, uint32_t raw_data_length,
131
+ uint32_t data_length, uint32_t error_severity)
132
+{
133
+ /* Block Status */
134
+ build_append_int_noprefix(table, block_status, 4);
135
+ /* Raw Data Offset */
136
+ build_append_int_noprefix(table, raw_data_offset, 4);
137
+ /* Raw Data Length */
138
+ build_append_int_noprefix(table, raw_data_length, 4);
139
+ /* Data Length */
140
+ build_append_int_noprefix(table, data_length, 4);
141
+ /* Error Severity */
142
+ build_append_int_noprefix(table, error_severity, 4);
143
+}
144
+
145
+/* UEFI 2.6: N.2.5 Memory Error Section */
146
+static void acpi_ghes_build_append_mem_cper(GArray *table,
147
+ uint64_t error_physical_addr)
148
+{
149
+ /*
752
+ /*
150
+ * Memory Error Record
753
+ * Indexed by [esz][be][v][mte][st], which is (except for load/store)
754
+ * also the order in which the elements appear in the function names,
755
+ * and so how we must concatenate the pieces.
151
+ */
756
+ */
152
+
757
+
153
+ /* Validation Bits */
758
+#define FN_LS(F) { gen_helper_sme_ld1##F, gen_helper_sme_st1##F }
154
+ build_append_int_noprefix(table,
759
+#define FN_MTE(F) { FN_LS(F), FN_LS(F##_mte) }
155
+ (1ULL << 14) | /* Type Valid */
760
+#define FN_HV(F) { FN_MTE(F##_h), FN_MTE(F##_v) }
156
+ (1ULL << 1) /* Physical Address Valid */,
761
+#define FN_END(L, B) { FN_HV(L), FN_HV(B) }
157
+ 8);
762
+
158
+ /* Error Status */
763
+ static GenLdSt1 * const fns[5][2][2][2][2] = {
159
+ build_append_int_noprefix(table, 0, 8);
764
+ FN_END(b, b),
160
+ /* Physical Address */
765
+ FN_END(h_le, h_be),
161
+ build_append_int_noprefix(table, error_physical_addr, 8);
766
+ FN_END(s_le, s_be),
162
+ /* Skip all the detailed information normally found in such a record */
767
+ FN_END(d_le, d_be),
163
+ build_append_int_noprefix(table, 0, 48);
768
+ FN_END(q_le, q_be),
164
+ /* Memory Error Type */
769
+ };
165
+ build_append_int_noprefix(table, 0 /* Unknown error */, 1);
770
+
166
+ /* Skip all the detailed information normally found in such a record */
771
+#undef FN_LS
167
+ build_append_int_noprefix(table, 0, 7);
772
+#undef FN_MTE
168
+}
773
+#undef FN_HV
169
+
774
+#undef FN_END
170
+static int acpi_ghes_record_mem_error(uint64_t error_block_address,
775
+
171
+ uint64_t error_physical_addr)
776
+ TCGv_ptr t_za, t_pg;
172
+{
777
+ TCGv_i64 addr;
173
+ GArray *block;
778
+ int svl, desc = 0;
174
+
779
+ bool be = s->be_data == MO_BE;
175
+ /* Memory Error Section Type */
780
+ bool mte = s->mte_active[0];
176
+ const uint8_t uefi_cper_mem_sec[] =
781
+
177
+ UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \
782
+ if (!dc_isar_feature(aa64_sme, s)) {
178
+ 0xED, 0x7C, 0x83, 0xB1);
783
+ return false;
179
+
784
+ }
180
+ /* invalid fru id: ACPI 4.0: 17.3.2.6.1 Generic Error Data,
785
+ if (!sme_smza_enabled_check(s)) {
181
+ * Table 17-13 Generic Error Data Entry
786
+ return true;
182
+ */
787
+ }
183
+ QemuUUID fru_id = {};
788
+
184
+ uint32_t data_length;
789
+ t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
185
+
790
+ t_pg = pred_full_reg_ptr(s, a->pg);
186
+ block = g_array_new(false, true /* clear */, 1);
791
+ addr = tcg_temp_new_i64();
187
+
792
+
188
+ /* This is the length if adding a new generic error data entry*/
793
+ tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz);
189
+ data_length = ACPI_GHES_DATA_LENGTH + ACPI_GHES_MEM_CPER_LENGTH;
794
+ tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
190
+
795
+
191
+ /*
796
+ if (mte) {
192
+ * Check whether it will run out of the preallocated memory if adding a new
797
+ desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
193
+ * generic error data entry
798
+ desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
194
+ */
799
+ desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
195
+ if ((data_length + ACPI_GHES_GESB_SIZE) > ACPI_GHES_MAX_RAW_DATA_LENGTH) {
800
+ desc = FIELD_DP32(desc, MTEDESC, WRITE, a->st);
196
+ error_report("Not enough memory to record new CPER!!!");
801
+ desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << a->esz) - 1);
197
+ g_array_free(block, true);
802
+ desc <<= SVE_MTEDESC_SHIFT;
198
+ return -1;
803
+ } else {
199
+ }
804
+ addr = clean_data_tbi(s, addr);
200
+
805
+ }
201
+ /* Build the new generic error status block header */
806
+ svl = streaming_vec_reg_size(s);
202
+ acpi_ghes_generic_error_status(block, ACPI_GEBS_UNCORRECTABLE,
807
+ desc = simd_desc(svl, svl, desc);
203
+ 0, 0, data_length, ACPI_CPER_SEV_RECOVERABLE);
808
+
204
+
809
+ fns[a->esz][be][a->v][mte][a->st](cpu_env, t_za, t_pg, addr,
205
+ /* Build this new generic error data entry header */
810
+ tcg_constant_i32(desc));
206
+ acpi_ghes_generic_error_data(block, uefi_cper_mem_sec,
811
+
207
+ ACPI_CPER_SEV_RECOVERABLE, 0, 0,
812
+ tcg_temp_free_ptr(t_za);
208
+ ACPI_GHES_MEM_CPER_LENGTH, fru_id, 0);
813
+ tcg_temp_free_ptr(t_pg);
209
+
814
+ tcg_temp_free_i64(addr);
210
+ /* Build the memory section CPER for above new generic error data entry */
815
+ return true;
211
+ acpi_ghes_build_append_mem_cper(block, error_physical_addr);
212
+
213
+ /* Write the generic error data entry into guest memory */
214
+ cpu_physical_memory_write(error_block_address, block->data, block->len);
215
+
216
+ g_array_free(block, true);
217
+
218
+ return 0;
219
+}
220
+
221
/*
222
* Build table for the hardware error fw_cfg blob.
223
* Initialize "etc/hardware_errors" and "etc/hardware_errors_addr" fw_cfg blobs.
224
@@ -XXX,XX +XXX,XX @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
225
fw_cfg_add_file_callback(s, ACPI_GHES_DATA_ADDR_FW_CFG_FILE, NULL, NULL,
226
NULL, &(ags->ghes_addr_le), sizeof(ags->ghes_addr_le), false);
227
}
228
+
229
+int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address)
230
+{
231
+ uint64_t error_block_addr, read_ack_register_addr, read_ack_register = 0;
232
+ uint64_t start_addr;
233
+ bool ret = -1;
234
+ AcpiGedState *acpi_ged_state;
235
+ AcpiGhesState *ags;
236
+
237
+ assert(source_id < ACPI_HEST_SRC_ID_RESERVED);
238
+
239
+ acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED,
240
+ NULL));
241
+ g_assert(acpi_ged_state);
242
+ ags = &acpi_ged_state->ghes_state;
243
+
244
+ start_addr = le64_to_cpu(ags->ghes_addr_le);
245
+
246
+ if (physical_address) {
247
+
248
+ if (source_id < ACPI_HEST_SRC_ID_RESERVED) {
249
+ start_addr += source_id * sizeof(uint64_t);
250
+ }
251
+
252
+ cpu_physical_memory_read(start_addr, &error_block_addr,
253
+ sizeof(error_block_addr));
254
+
255
+ error_block_addr = le64_to_cpu(error_block_addr);
256
+
257
+ read_ack_register_addr = start_addr +
258
+ ACPI_GHES_ERROR_SOURCE_COUNT * sizeof(uint64_t);
259
+
260
+ cpu_physical_memory_read(read_ack_register_addr,
261
+ &read_ack_register, sizeof(read_ack_register));
262
+
263
+ /* zero means OSPM does not acknowledge the error */
264
+ if (!read_ack_register) {
265
+ error_report("OSPM does not acknowledge previous error,"
266
+ " so can not record CPER for current error anymore");
267
+ } else if (error_block_addr) {
268
+ read_ack_register = cpu_to_le64(0);
269
+ /*
270
+ * Clear the Read Ack Register, OSPM will write it to 1 when
271
+ * it acknowledges this error.
272
+ */
273
+ cpu_physical_memory_write(read_ack_register_addr,
274
+ &read_ack_register, sizeof(uint64_t));
275
+
276
+ ret = acpi_ghes_record_mem_error(error_block_addr,
277
+ physical_address);
278
+ } else
279
+ error_report("can not find Generic Error Status Block");
280
+ }
281
+
282
+ return ret;
283
+}
816
+}
284
--
817
--
285
2.20.1
818
2.25.1
286
287
diff view generated by jsdifflib
1
From: Dongjiu Geng <gengdongjiu@huawei.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
kvm_hwpoison_page_add() and kvm_unpoison_all() will both
3
Add a TCGv_ptr base argument, which will be cpu_env for SVE.
4
be used by X86 and ARM platforms, so moving them into
4
We will reuse this for SME save and restore array insns.
5
"accel/kvm/kvm-all.c" to avoid duplicate code.
6
7
For architectures that don't use the poison-list functionality
8
the reset handler will harmlessly do nothing, so let's register
9
the kvm_unpoison_all() function in the generic kvm_init() function.
10
5
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
Signed-off-by: Xiang Zheng <zhengxiang9@huawei.com>
8
Message-id: 20220708151540.18136-22-richard.henderson@linaro.org
14
Acked-by: Xiang Zheng <zhengxiang9@huawei.com>
15
Message-id: 20200512030609.19593-8-gengdongjiu@huawei.com
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
---
10
---
18
include/sysemu/kvm_int.h | 12 ++++++++++++
11
target/arm/translate-a64.h | 3 +++
19
accel/kvm/kvm-all.c | 36 ++++++++++++++++++++++++++++++++++++
12
target/arm/translate-sve.c | 48 ++++++++++++++++++++++++++++----------
20
target/i386/kvm.c | 36 ------------------------------------
13
2 files changed, 39 insertions(+), 12 deletions(-)
21
3 files changed, 48 insertions(+), 36 deletions(-)
22
14
23
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
15
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
24
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
25
--- a/include/sysemu/kvm_int.h
17
--- a/target/arm/translate-a64.h
26
+++ b/include/sysemu/kvm_int.h
18
+++ b/target/arm/translate-a64.h
27
@@ -XXX,XX +XXX,XX @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
19
@@ -XXX,XX +XXX,XX @@ void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
28
AddressSpace *as, int as_id);
20
uint32_t rm_ofs, int64_t shift,
29
21
uint32_t opr_sz, uint32_t max_sz);
30
void kvm_set_max_memslot_size(hwaddr max_slot_size);
22
23
+void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
24
+void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
31
+
25
+
32
+/**
26
#endif /* TARGET_ARM_TRANSLATE_A64_H */
33
+ * kvm_hwpoison_page_add:
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
34
+ *
35
+ * Parameters:
36
+ * @ram_addr: the address in the RAM for the poisoned page
37
+ *
38
+ * Add a poisoned page to the list
39
+ *
40
+ * Return: None.
41
+ */
42
+void kvm_hwpoison_page_add(ram_addr_t ram_addr);
43
#endif
44
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
45
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
46
--- a/accel/kvm/kvm-all.c
29
--- a/target/arm/translate-sve.c
47
+++ b/accel/kvm/kvm-all.c
30
+++ b/target/arm/translate-sve.c
48
@@ -XXX,XX +XXX,XX @@
31
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
49
#include "qapi/visitor.h"
32
* The load should begin at the address Rn + IMM.
50
#include "qapi/qapi-types-common.h"
33
*/
51
#include "qapi/qapi-visit-common.h"
34
52
+#include "sysemu/reset.h"
35
-static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
53
36
+void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
54
#include "hw/boards.h"
37
+ int len, int rn, int imm)
55
38
{
56
@@ -XXX,XX +XXX,XX @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension)
39
int len_align = QEMU_ALIGN_DOWN(len, 8);
57
return ret;
40
int len_remain = len % 8;
58
}
41
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
59
42
t0 = tcg_temp_new_i64();
60
+typedef struct HWPoisonPage {
43
for (i = 0; i < len_align; i += 8) {
61
+ ram_addr_t ram_addr;
44
tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
62
+ QLIST_ENTRY(HWPoisonPage) list;
45
- tcg_gen_st_i64(t0, cpu_env, vofs + i);
63
+} HWPoisonPage;
46
+ tcg_gen_st_i64(t0, base, vofs + i);
47
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
48
}
49
tcg_temp_free_i64(t0);
50
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
51
clean_addr = new_tmp_a64_local(s);
52
tcg_gen_mov_i64(clean_addr, t0);
53
54
+ if (base != cpu_env) {
55
+ TCGv_ptr b = tcg_temp_local_new_ptr();
56
+ tcg_gen_mov_ptr(b, base);
57
+ base = b;
58
+ }
64
+
59
+
65
+static QLIST_HEAD(, HWPoisonPage) hwpoison_page_list =
60
gen_set_label(loop);
66
+ QLIST_HEAD_INITIALIZER(hwpoison_page_list);
61
62
t0 = tcg_temp_new_i64();
63
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
64
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
65
66
tp = tcg_temp_new_ptr();
67
- tcg_gen_add_ptr(tp, cpu_env, i);
68
+ tcg_gen_add_ptr(tp, base, i);
69
tcg_gen_addi_ptr(i, i, 8);
70
tcg_gen_st_i64(t0, tp, vofs);
71
tcg_temp_free_ptr(tp);
72
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
73
74
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
75
tcg_temp_free_ptr(i);
67
+
76
+
68
+static void kvm_unpoison_all(void *param)
77
+ if (base != cpu_env) {
69
+{
78
+ tcg_temp_free_ptr(base);
70
+ HWPoisonPage *page, *next_page;
79
+ assert(len_remain == 0);
71
+
72
+ QLIST_FOREACH_SAFE(page, &hwpoison_page_list, list, next_page) {
73
+ QLIST_REMOVE(page, list);
74
+ qemu_ram_remap(page->ram_addr, TARGET_PAGE_SIZE);
75
+ g_free(page);
76
+ }
77
+}
78
+
79
+void kvm_hwpoison_page_add(ram_addr_t ram_addr)
80
+{
81
+ HWPoisonPage *page;
82
+
83
+ QLIST_FOREACH(page, &hwpoison_page_list, list) {
84
+ if (page->ram_addr == ram_addr) {
85
+ return;
86
+ }
80
+ }
87
+ }
88
+ page = g_new(HWPoisonPage, 1);
89
+ page->ram_addr = ram_addr;
90
+ QLIST_INSERT_HEAD(&hwpoison_page_list, page, list);
91
+}
92
+
93
static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size)
94
{
95
#if defined(HOST_WORDS_BIGENDIAN) != defined(TARGET_WORDS_BIGENDIAN)
96
@@ -XXX,XX +XXX,XX @@ static int kvm_init(MachineState *ms)
97
s->kernel_irqchip_split = mc->default_kernel_irqchip_split ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
98
}
81
}
99
82
100
+ qemu_register_reset(kvm_unpoison_all, NULL);
83
/*
101
+
84
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
102
if (s->kernel_irqchip_allowed) {
85
default:
103
kvm_irqchip_create(s);
86
g_assert_not_reached();
104
}
87
}
105
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
88
- tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
106
index XXXXXXX..XXXXXXX 100644
89
+ tcg_gen_st_i64(t0, base, vofs + len_align);
107
--- a/target/i386/kvm.c
90
tcg_temp_free_i64(t0);
108
+++ b/target/i386/kvm.c
109
@@ -XXX,XX +XXX,XX @@
110
#include "sysemu/sysemu.h"
111
#include "sysemu/hw_accel.h"
112
#include "sysemu/kvm_int.h"
113
-#include "sysemu/reset.h"
114
#include "sysemu/runstate.h"
115
#include "kvm_i386.h"
116
#include "hyperv.h"
117
@@ -XXX,XX +XXX,XX @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index)
118
}
91
}
119
}
92
}
120
93
121
-
94
/* Similarly for stores. */
122
-typedef struct HWPoisonPage {
95
-static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
123
- ram_addr_t ram_addr;
96
+void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
124
- QLIST_ENTRY(HWPoisonPage) list;
97
+ int len, int rn, int imm)
125
-} HWPoisonPage;
126
-
127
-static QLIST_HEAD(, HWPoisonPage) hwpoison_page_list =
128
- QLIST_HEAD_INITIALIZER(hwpoison_page_list);
129
-
130
-static void kvm_unpoison_all(void *param)
131
-{
132
- HWPoisonPage *page, *next_page;
133
-
134
- QLIST_FOREACH_SAFE(page, &hwpoison_page_list, list, next_page) {
135
- QLIST_REMOVE(page, list);
136
- qemu_ram_remap(page->ram_addr, TARGET_PAGE_SIZE);
137
- g_free(page);
138
- }
139
-}
140
-
141
-static void kvm_hwpoison_page_add(ram_addr_t ram_addr)
142
-{
143
- HWPoisonPage *page;
144
-
145
- QLIST_FOREACH(page, &hwpoison_page_list, list) {
146
- if (page->ram_addr == ram_addr) {
147
- return;
148
- }
149
- }
150
- page = g_new(HWPoisonPage, 1);
151
- page->ram_addr = ram_addr;
152
- QLIST_INSERT_HEAD(&hwpoison_page_list, page, list);
153
-}
154
-
155
static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap,
156
int *max_banks)
157
{
98
{
158
@@ -XXX,XX +XXX,XX @@ int kvm_arch_init(MachineState *ms, KVMState *s)
99
int len_align = QEMU_ALIGN_DOWN(len, 8);
159
fprintf(stderr, "e820_add_entry() table is full\n");
100
int len_remain = len % 8;
160
return ret;
101
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
102
103
t0 = tcg_temp_new_i64();
104
for (i = 0; i < len_align; i += 8) {
105
- tcg_gen_ld_i64(t0, cpu_env, vofs + i);
106
+ tcg_gen_ld_i64(t0, base, vofs + i);
107
tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
108
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
109
}
110
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
111
clean_addr = new_tmp_a64_local(s);
112
tcg_gen_mov_i64(clean_addr, t0);
113
114
+ if (base != cpu_env) {
115
+ TCGv_ptr b = tcg_temp_local_new_ptr();
116
+ tcg_gen_mov_ptr(b, base);
117
+ base = b;
118
+ }
119
+
120
gen_set_label(loop);
121
122
t0 = tcg_temp_new_i64();
123
tp = tcg_temp_new_ptr();
124
- tcg_gen_add_ptr(tp, cpu_env, i);
125
+ tcg_gen_add_ptr(tp, base, i);
126
tcg_gen_ld_i64(t0, tp, vofs);
127
tcg_gen_addi_ptr(i, i, 8);
128
tcg_temp_free_ptr(tp);
129
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
130
131
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
132
tcg_temp_free_ptr(i);
133
+
134
+ if (base != cpu_env) {
135
+ tcg_temp_free_ptr(base);
136
+ assert(len_remain == 0);
137
+ }
161
}
138
}
162
- qemu_register_reset(kvm_unpoison_all, NULL);
139
163
140
/* Predicate register stores can be any multiple of 2. */
164
shadow_mem = object_property_get_int(OBJECT(s), "kvm-shadow-mem", &error_abort);
141
if (len_remain) {
165
if (shadow_mem != -1) {
142
t0 = tcg_temp_new_i64();
143
- tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
144
+ tcg_gen_ld_i64(t0, base, vofs + len_align);
145
146
switch (len_remain) {
147
case 2:
148
@@ -XXX,XX +XXX,XX @@ static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
149
if (sve_access_check(s)) {
150
int size = vec_full_reg_size(s);
151
int off = vec_full_reg_offset(s, a->rd);
152
- do_ldr(s, off, size, a->rn, a->imm * size);
153
+ gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size);
154
}
155
return true;
156
}
157
@@ -XXX,XX +XXX,XX @@ static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
158
if (sve_access_check(s)) {
159
int size = pred_full_reg_size(s);
160
int off = pred_full_reg_offset(s, a->rd);
161
- do_ldr(s, off, size, a->rn, a->imm * size);
162
+ gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size);
163
}
164
return true;
165
}
166
@@ -XXX,XX +XXX,XX @@ static bool trans_STR_zri(DisasContext *s, arg_rri *a)
167
if (sve_access_check(s)) {
168
int size = vec_full_reg_size(s);
169
int off = vec_full_reg_offset(s, a->rd);
170
- do_str(s, off, size, a->rn, a->imm * size);
171
+ gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size);
172
}
173
return true;
174
}
175
@@ -XXX,XX +XXX,XX @@ static bool trans_STR_pri(DisasContext *s, arg_rri *a)
176
if (sve_access_check(s)) {
177
int size = pred_full_reg_size(s);
178
int off = pred_full_reg_offset(s, a->rd);
179
- do_str(s, off, size, a->rn, a->imm * size);
180
+ gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size);
181
}
182
return true;
183
}
166
--
184
--
167
2.20.1
185
2.25.1
168
169
diff view generated by jsdifflib
1
Convert the 64-bit element insns in the 3-reg-same group
1
From: Richard Henderson <richard.henderson@linaro.org>
2
to decodetree. This covers VQSHL, VRSHL and VQRSHL where
3
size==0b11.
4
2
3
We can reuse the SVE functions for LDR and STR, passing in the
4
base of the ZA vector and a zero offset.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-23-richard.henderson@linaro.org
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20200512163904.10918-4-peter.maydell@linaro.org
8
---
10
---
9
target/arm/neon-dp.decode | 13 +++++++++++
11
target/arm/sme.decode | 7 +++++++
10
target/arm/translate-neon.inc.c | 24 +++++++++++++++++++++
12
target/arm/translate-sme.c | 24 ++++++++++++++++++++++++
11
target/arm/translate.c | 38 ++-------------------------------
13
2 files changed, 31 insertions(+)
12
3 files changed, 39 insertions(+), 36 deletions(-)
13
14
14
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
15
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/neon-dp.decode
17
--- a/target/arm/sme.decode
17
+++ b/target/arm/neon-dp.decode
18
+++ b/target/arm/sme.decode
18
@@ -XXX,XX +XXX,XX @@ VCGE_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 1 .... @3same
19
@@ -XXX,XX +XXX,XX @@ LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
19
VSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 0 .... @3same_rev
20
&ldst rs=%mova_rs
20
VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same_rev
21
LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
21
22
&ldst esz=4 rs=%mova_rs
22
+# Insns operating on 64-bit elements (size!=0b11 handled elsewhere)
23
+# The _rev suffix indicates that Vn and Vm are reversed (as explained
24
+# by the comment for the @3same_rev format).
25
+@3same_64_rev .... ... . . . 11 .... .... .... . q:1 . . .... \
26
+ &3same vm=%vn_dp vn=%vm_dp vd=%vd_dp size=3
27
+
23
+
28
+VQSHL_S64_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev
24
+&ldstr rv rn imm
29
+VQSHL_U64_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev
25
+@ldstr ....... ... . ...... .. ... rn:5 . imm:4 \
30
+VRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev
26
+ &ldstr rv=%mova_rs
31
+VRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev
32
+VQRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev
33
+VQRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev
34
+
27
+
35
VMAX_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 0 .... @3same
28
+LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr
36
VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same
29
+STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr
37
VMIN_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 1 .... @3same
30
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
38
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
39
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/translate-neon.inc.c
32
--- a/target/arm/translate-sme.c
41
+++ b/target/arm/translate-neon.inc.c
33
+++ b/target/arm/translate-sme.c
42
@@ -XXX,XX +XXX,XX @@ static bool trans_SHA256SU1_3s(DisasContext *s, arg_SHA256SU1_3s *a)
34
@@ -XXX,XX +XXX,XX @@ static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
43
35
tcg_temp_free_i64(addr);
44
return true;
36
return true;
45
}
37
}
46
+
38
+
47
+#define DO_3SAME_64(INSN, FUNC) \
39
+typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int);
48
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
49
+ uint32_t rn_ofs, uint32_t rm_ofs, \
50
+ uint32_t oprsz, uint32_t maxsz) \
51
+ { \
52
+ static const GVecGen3 op = { .fni8 = FUNC }; \
53
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \
54
+ } \
55
+ DO_3SAME(INSN, gen_##INSN##_3s)
56
+
40
+
57
+#define DO_3SAME_64_ENV(INSN, FUNC) \
41
+static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
58
+ static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \
42
+{
59
+ { \
43
+ int svl = streaming_vec_reg_size(s);
60
+ FUNC(d, cpu_env, n, m); \
44
+ int imm = a->imm;
61
+ } \
45
+ TCGv_ptr base;
62
+ DO_3SAME_64(INSN, gen_##INSN##_elt)
63
+
46
+
64
+DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64)
47
+ if (!sme_za_enabled_check(s)) {
65
+DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64)
48
+ return true;
66
+DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
49
+ }
67
+DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
50
+
68
+DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
51
+ /* ZA[n] equates to ZA0H.B[n]. */
69
+DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
52
+ base = get_tile_rowcol(s, MO_8, a->rv, imm, false);
70
diff --git a/target/arm/translate.c b/target/arm/translate.c
53
+
71
index XXXXXXX..XXXXXXX 100644
54
+ fn(s, base, 0, svl, a->rn, imm * svl);
72
--- a/target/arm/translate.c
55
+
73
+++ b/target/arm/translate.c
56
+ tcg_temp_free_ptr(base);
74
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
57
+ return true;
75
}
58
+}
76
59
+
77
if (size == 3) {
60
+TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
78
- /* 64-bit element instructions. */
61
+TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
79
- for (pass = 0; pass < (q ? 2 : 1); pass++) {
80
- neon_load_reg64(cpu_V0, rn + pass);
81
- neon_load_reg64(cpu_V1, rm + pass);
82
- switch (op) {
83
- case NEON_3R_VQSHL:
84
- if (u) {
85
- gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
86
- cpu_V1, cpu_V0);
87
- } else {
88
- gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
89
- cpu_V1, cpu_V0);
90
- }
91
- break;
92
- case NEON_3R_VRSHL:
93
- if (u) {
94
- gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
95
- } else {
96
- gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
97
- }
98
- break;
99
- case NEON_3R_VQRSHL:
100
- if (u) {
101
- gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
102
- cpu_V1, cpu_V0);
103
- } else {
104
- gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
105
- cpu_V1, cpu_V0);
106
- }
107
- break;
108
- default:
109
- abort();
110
- }
111
- neon_store_reg64(cpu_V0, rd + pass);
112
- }
113
- return 0;
114
+ /* 64-bit element instructions: handled by decodetree */
115
+ return 1;
116
}
117
pairwise = 0;
118
switch (op) {
119
--
62
--
120
2.20.1
63
2.25.1
121
122
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
3
Create vectorized versions of handle_shri_with_rndacc
4
for shift+round and shift+round+accumulate. Add out-of-line
5
helpers in preparation for longer vector lengths from SVE.
6
2
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20200513163245.17915-3-richard.henderson@linaro.org
5
Message-id: 20220708151540.18136-24-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
7
---
12
target/arm/helper.h | 20 ++
8
target/arm/helper-sme.h | 5 +++
13
target/arm/translate.h | 9 +
9
target/arm/sme.decode | 11 +++++
14
target/arm/translate-a64.c | 11 +-
10
target/arm/sme_helper.c | 90 ++++++++++++++++++++++++++++++++++++++
15
target/arm/translate.c | 463 +++++++++++++++++++++++++++++++++++--
11
target/arm/translate-sme.c | 31 +++++++++++++
16
target/arm/vec_helper.c | 50 ++++
12
4 files changed, 137 insertions(+)
17
5 files changed, 527 insertions(+), 26 deletions(-)
18
13
19
diff --git a/target/arm/helper.h b/target/arm/helper.h
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
20
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/helper.h
16
--- a/target/arm/helper-sme.h
22
+++ b/target/arm/helper.h
17
+++ b/target/arm/helper-sme.h
23
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(gvec_usra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i
24
DEF_HELPER_FLAGS_3(gvec_usra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
19
DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
25
DEF_HELPER_FLAGS_3(gvec_usra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
20
DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
26
21
DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
27
+DEF_HELPER_FLAGS_3(gvec_srshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
28
+DEF_HELPER_FLAGS_3(gvec_srshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
29
+DEF_HELPER_FLAGS_3(gvec_srshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
30
+DEF_HELPER_FLAGS_3(gvec_srshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
31
+
22
+
32
+DEF_HELPER_FLAGS_3(gvec_urshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
23
+DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
33
+DEF_HELPER_FLAGS_3(gvec_urshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
24
+DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_3(gvec_urshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
25
+DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
35
+DEF_HELPER_FLAGS_3(gvec_urshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
26
+DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/sme.decode
30
+++ b/target/arm/sme.decode
31
@@ -XXX,XX +XXX,XX @@ LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
32
33
LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr
34
STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr
36
+
35
+
37
+DEF_HELPER_FLAGS_3(gvec_srsra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
36
+### SME Add Vector to Array
38
+DEF_HELPER_FLAGS_3(gvec_srsra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
39
+DEF_HELPER_FLAGS_3(gvec_srsra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_3(gvec_srsra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
41
+
37
+
42
+DEF_HELPER_FLAGS_3(gvec_ursra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
38
+&adda zad zn pm pn
43
+DEF_HELPER_FLAGS_3(gvec_ursra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
39
+@adda_32 ........ .. ..... . pm:3 pn:3 zn:5 ... zad:2 &adda
44
+DEF_HELPER_FLAGS_3(gvec_ursra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
40
+@adda_64 ........ .. ..... . pm:3 pn:3 zn:5 .. zad:3 &adda
45
+DEF_HELPER_FLAGS_3(gvec_ursra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
46
+
41
+
47
#ifdef TARGET_AARCH64
42
+ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32
48
#include "helper-a64.h"
43
+ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32
49
#include "helper-sve.h"
44
+ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64
50
diff --git a/target/arm/translate.h b/target/arm/translate.h
45
+ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
46
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
51
index XXXXXXX..XXXXXXX 100644
47
index XXXXXXX..XXXXXXX 100644
52
--- a/target/arm/translate.h
48
--- a/target/arm/sme_helper.c
53
+++ b/target/arm/translate.h
49
+++ b/target/arm/sme_helper.c
54
@@ -XXX,XX +XXX,XX @@ void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
50
@@ -XXX,XX +XXX,XX @@ DO_ST(q, _be, MO_128)
55
void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
51
DO_ST(q, _le, MO_128)
56
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
52
57
53
#undef DO_ST
58
+void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
59
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
60
+void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
61
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
62
+void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
63
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
64
+void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
65
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
66
+
54
+
67
/*
55
+void HELPER(sme_addha_s)(void *vzda, void *vzn, void *vpn,
68
* Forward to the isar_feature_* tests given a DisasContext pointer.
56
+ void *vpm, uint32_t desc)
69
*/
57
+{
70
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
58
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
71
index XXXXXXX..XXXXXXX 100644
59
+ uint64_t *pn = vpn, *pm = vpm;
72
--- a/target/arm/translate-a64.c
60
+ uint32_t *zda = vzda, *zn = vzn;
73
+++ b/target/arm/translate-a64.c
74
@@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
75
return;
76
77
case 0x04: /* SRSHR / URSHR (rounding) */
78
- break;
79
+ gen_gvec_fn2i(s, is_q, rd, rn, shift,
80
+ is_u ? gen_gvec_urshr : gen_gvec_srshr, size);
81
+ return;
82
+
61
+
83
case 0x06: /* SRSRA / URSRA (accum + rounding) */
62
+ for (row = 0; row < oprsz; ) {
84
- accumulate = true;
63
+ uint64_t pa = pn[row >> 4];
85
- break;
64
+ do {
86
+ gen_gvec_fn2i(s, is_q, rd, rn, shift,
65
+ if (pa & 1) {
87
+ is_u ? gen_gvec_ursra : gen_gvec_srsra, size);
66
+ for (col = 0; col < oprsz; ) {
88
+ return;
67
+ uint64_t pb = pm[col >> 4];
89
+
68
+ do {
90
default:
69
+ if (pb & 1) {
91
g_assert_not_reached();
70
+ zda[tile_vslice_index(row) + H4(col)] += zn[H4(col)];
92
}
71
+ }
93
diff --git a/target/arm/translate.c b/target/arm/translate.c
72
+ pb >>= 4;
94
index XXXXXXX..XXXXXXX 100644
73
+ } while (++col & 15);
95
--- a/target/arm/translate.c
74
+ }
96
+++ b/target/arm/translate.c
75
+ }
97
@@ -XXX,XX +XXX,XX @@ void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
76
+ pa >>= 4;
98
}
77
+ } while (++row & 15);
99
}
100
101
+/*
102
+ * Shift one less than the requested amount, and the low bit is
103
+ * the rounding bit. For the 8 and 16-bit operations, because we
104
+ * mask the low bit, we can perform a normal integer shift instead
105
+ * of a vector shift.
106
+ */
107
+static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
108
+{
109
+ TCGv_i64 t = tcg_temp_new_i64();
110
+
111
+ tcg_gen_shri_i64(t, a, sh - 1);
112
+ tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
113
+ tcg_gen_vec_sar8i_i64(d, a, sh);
114
+ tcg_gen_vec_add8_i64(d, d, t);
115
+ tcg_temp_free_i64(t);
116
+}
117
+
118
+static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
119
+{
120
+ TCGv_i64 t = tcg_temp_new_i64();
121
+
122
+ tcg_gen_shri_i64(t, a, sh - 1);
123
+ tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
124
+ tcg_gen_vec_sar16i_i64(d, a, sh);
125
+ tcg_gen_vec_add16_i64(d, d, t);
126
+ tcg_temp_free_i64(t);
127
+}
128
+
129
+static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
130
+{
131
+ TCGv_i32 t = tcg_temp_new_i32();
132
+
133
+ tcg_gen_extract_i32(t, a, sh - 1, 1);
134
+ tcg_gen_sari_i32(d, a, sh);
135
+ tcg_gen_add_i32(d, d, t);
136
+ tcg_temp_free_i32(t);
137
+}
138
+
139
+static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
140
+{
141
+ TCGv_i64 t = tcg_temp_new_i64();
142
+
143
+ tcg_gen_extract_i64(t, a, sh - 1, 1);
144
+ tcg_gen_sari_i64(d, a, sh);
145
+ tcg_gen_add_i64(d, d, t);
146
+ tcg_temp_free_i64(t);
147
+}
148
+
149
+static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
150
+{
151
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
152
+ TCGv_vec ones = tcg_temp_new_vec_matching(d);
153
+
154
+ tcg_gen_shri_vec(vece, t, a, sh - 1);
155
+ tcg_gen_dupi_vec(vece, ones, 1);
156
+ tcg_gen_and_vec(vece, t, t, ones);
157
+ tcg_gen_sari_vec(vece, d, a, sh);
158
+ tcg_gen_add_vec(vece, d, d, t);
159
+
160
+ tcg_temp_free_vec(t);
161
+ tcg_temp_free_vec(ones);
162
+}
163
+
164
+void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
165
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
166
+{
167
+ static const TCGOpcode vecop_list[] = {
168
+ INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
169
+ };
170
+ static const GVecGen2i ops[4] = {
171
+ { .fni8 = gen_srshr8_i64,
172
+ .fniv = gen_srshr_vec,
173
+ .fno = gen_helper_gvec_srshr_b,
174
+ .opt_opc = vecop_list,
175
+ .vece = MO_8 },
176
+ { .fni8 = gen_srshr16_i64,
177
+ .fniv = gen_srshr_vec,
178
+ .fno = gen_helper_gvec_srshr_h,
179
+ .opt_opc = vecop_list,
180
+ .vece = MO_16 },
181
+ { .fni4 = gen_srshr32_i32,
182
+ .fniv = gen_srshr_vec,
183
+ .fno = gen_helper_gvec_srshr_s,
184
+ .opt_opc = vecop_list,
185
+ .vece = MO_32 },
186
+ { .fni8 = gen_srshr64_i64,
187
+ .fniv = gen_srshr_vec,
188
+ .fno = gen_helper_gvec_srshr_d,
189
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
190
+ .opt_opc = vecop_list,
191
+ .vece = MO_64 },
192
+ };
193
+
194
+ /* tszimm encoding produces immediates in the range [1..esize] */
195
+ tcg_debug_assert(shift > 0);
196
+ tcg_debug_assert(shift <= (8 << vece));
197
+
198
+ if (shift == (8 << vece)) {
199
+ /*
200
+ * Shifts larger than the element size are architecturally valid.
201
+ * Signed results in all sign bits. With rounding, this produces
202
+ * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
203
+ * I.e. always zero.
204
+ */
205
+ tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
206
+ } else {
207
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
208
+ }
78
+ }
209
+}
79
+}
210
+
80
+
211
+static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
81
+void HELPER(sme_addha_d)(void *vzda, void *vzn, void *vpn,
82
+ void *vpm, uint32_t desc)
212
+{
83
+{
213
+ TCGv_i64 t = tcg_temp_new_i64();
84
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
85
+ uint8_t *pn = vpn, *pm = vpm;
86
+ uint64_t *zda = vzda, *zn = vzn;
214
+
87
+
215
+ gen_srshr8_i64(t, a, sh);
88
+ for (row = 0; row < oprsz; ++row) {
216
+ tcg_gen_vec_add8_i64(d, d, t);
89
+ if (pn[H1(row)] & 1) {
217
+ tcg_temp_free_i64(t);
90
+ for (col = 0; col < oprsz; ++col) {
218
+}
91
+ if (pm[H1(col)] & 1) {
219
+
92
+ zda[tile_vslice_index(row) + col] += zn[col];
220
+static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
93
+ }
221
+{
94
+ }
222
+ TCGv_i64 t = tcg_temp_new_i64();
95
+ }
223
+
224
+ gen_srshr16_i64(t, a, sh);
225
+ tcg_gen_vec_add16_i64(d, d, t);
226
+ tcg_temp_free_i64(t);
227
+}
228
+
229
+static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
230
+{
231
+ TCGv_i32 t = tcg_temp_new_i32();
232
+
233
+ gen_srshr32_i32(t, a, sh);
234
+ tcg_gen_add_i32(d, d, t);
235
+ tcg_temp_free_i32(t);
236
+}
237
+
238
+static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
239
+{
240
+ TCGv_i64 t = tcg_temp_new_i64();
241
+
242
+ gen_srshr64_i64(t, a, sh);
243
+ tcg_gen_add_i64(d, d, t);
244
+ tcg_temp_free_i64(t);
245
+}
246
+
247
+static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
248
+{
249
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
250
+
251
+ gen_srshr_vec(vece, t, a, sh);
252
+ tcg_gen_add_vec(vece, d, d, t);
253
+ tcg_temp_free_vec(t);
254
+}
255
+
256
+void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
257
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
258
+{
259
+ static const TCGOpcode vecop_list[] = {
260
+ INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
261
+ };
262
+ static const GVecGen2i ops[4] = {
263
+ { .fni8 = gen_srsra8_i64,
264
+ .fniv = gen_srsra_vec,
265
+ .fno = gen_helper_gvec_srsra_b,
266
+ .opt_opc = vecop_list,
267
+ .load_dest = true,
268
+ .vece = MO_8 },
269
+ { .fni8 = gen_srsra16_i64,
270
+ .fniv = gen_srsra_vec,
271
+ .fno = gen_helper_gvec_srsra_h,
272
+ .opt_opc = vecop_list,
273
+ .load_dest = true,
274
+ .vece = MO_16 },
275
+ { .fni4 = gen_srsra32_i32,
276
+ .fniv = gen_srsra_vec,
277
+ .fno = gen_helper_gvec_srsra_s,
278
+ .opt_opc = vecop_list,
279
+ .load_dest = true,
280
+ .vece = MO_32 },
281
+ { .fni8 = gen_srsra64_i64,
282
+ .fniv = gen_srsra_vec,
283
+ .fno = gen_helper_gvec_srsra_d,
284
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
285
+ .opt_opc = vecop_list,
286
+ .load_dest = true,
287
+ .vece = MO_64 },
288
+ };
289
+
290
+ /* tszimm encoding produces immediates in the range [1..esize] */
291
+ tcg_debug_assert(shift > 0);
292
+ tcg_debug_assert(shift <= (8 << vece));
293
+
294
+ /*
295
+ * Shifts larger than the element size are architecturally valid.
296
+ * Signed results in all sign bits. With rounding, this produces
297
+ * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
298
+ * I.e. always zero. With accumulation, this leaves D unchanged.
299
+ */
300
+ if (shift == (8 << vece)) {
301
+ /* Nop, but we do need to clear the tail. */
302
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
303
+ } else {
304
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
305
+ }
96
+ }
306
+}
97
+}
307
+
98
+
308
+static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
99
+void HELPER(sme_addva_s)(void *vzda, void *vzn, void *vpn,
100
+ void *vpm, uint32_t desc)
309
+{
101
+{
310
+ TCGv_i64 t = tcg_temp_new_i64();
102
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
103
+ uint64_t *pn = vpn, *pm = vpm;
104
+ uint32_t *zda = vzda, *zn = vzn;
311
+
105
+
312
+ tcg_gen_shri_i64(t, a, sh - 1);
106
+ for (row = 0; row < oprsz; ) {
313
+ tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
107
+ uint64_t pa = pn[row >> 4];
314
+ tcg_gen_vec_shr8i_i64(d, a, sh);
108
+ do {
315
+ tcg_gen_vec_add8_i64(d, d, t);
109
+ if (pa & 1) {
316
+ tcg_temp_free_i64(t);
110
+ uint32_t zn_row = zn[H4(row)];
317
+}
111
+ for (col = 0; col < oprsz; ) {
318
+
112
+ uint64_t pb = pm[col >> 4];
319
+static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
113
+ do {
320
+{
114
+ if (pb & 1) {
321
+ TCGv_i64 t = tcg_temp_new_i64();
115
+ zda[tile_vslice_index(row) + H4(col)] += zn_row;
322
+
116
+ }
323
+ tcg_gen_shri_i64(t, a, sh - 1);
117
+ pb >>= 4;
324
+ tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
118
+ } while (++col & 15);
325
+ tcg_gen_vec_shr16i_i64(d, a, sh);
119
+ }
326
+ tcg_gen_vec_add16_i64(d, d, t);
120
+ }
327
+ tcg_temp_free_i64(t);
121
+ pa >>= 4;
328
+}
122
+ } while (++row & 15);
329
+
330
+static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
331
+{
332
+ TCGv_i32 t = tcg_temp_new_i32();
333
+
334
+ tcg_gen_extract_i32(t, a, sh - 1, 1);
335
+ tcg_gen_shri_i32(d, a, sh);
336
+ tcg_gen_add_i32(d, d, t);
337
+ tcg_temp_free_i32(t);
338
+}
339
+
340
+static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
341
+{
342
+ TCGv_i64 t = tcg_temp_new_i64();
343
+
344
+ tcg_gen_extract_i64(t, a, sh - 1, 1);
345
+ tcg_gen_shri_i64(d, a, sh);
346
+ tcg_gen_add_i64(d, d, t);
347
+ tcg_temp_free_i64(t);
348
+}
349
+
350
+static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
351
+{
352
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
353
+ TCGv_vec ones = tcg_temp_new_vec_matching(d);
354
+
355
+ tcg_gen_shri_vec(vece, t, a, shift - 1);
356
+ tcg_gen_dupi_vec(vece, ones, 1);
357
+ tcg_gen_and_vec(vece, t, t, ones);
358
+ tcg_gen_shri_vec(vece, d, a, shift);
359
+ tcg_gen_add_vec(vece, d, d, t);
360
+
361
+ tcg_temp_free_vec(t);
362
+ tcg_temp_free_vec(ones);
363
+}
364
+
365
+void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
366
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
367
+{
368
+ static const TCGOpcode vecop_list[] = {
369
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
370
+ };
371
+ static const GVecGen2i ops[4] = {
372
+ { .fni8 = gen_urshr8_i64,
373
+ .fniv = gen_urshr_vec,
374
+ .fno = gen_helper_gvec_urshr_b,
375
+ .opt_opc = vecop_list,
376
+ .vece = MO_8 },
377
+ { .fni8 = gen_urshr16_i64,
378
+ .fniv = gen_urshr_vec,
379
+ .fno = gen_helper_gvec_urshr_h,
380
+ .opt_opc = vecop_list,
381
+ .vece = MO_16 },
382
+ { .fni4 = gen_urshr32_i32,
383
+ .fniv = gen_urshr_vec,
384
+ .fno = gen_helper_gvec_urshr_s,
385
+ .opt_opc = vecop_list,
386
+ .vece = MO_32 },
387
+ { .fni8 = gen_urshr64_i64,
388
+ .fniv = gen_urshr_vec,
389
+ .fno = gen_helper_gvec_urshr_d,
390
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
391
+ .opt_opc = vecop_list,
392
+ .vece = MO_64 },
393
+ };
394
+
395
+ /* tszimm encoding produces immediates in the range [1..esize] */
396
+ tcg_debug_assert(shift > 0);
397
+ tcg_debug_assert(shift <= (8 << vece));
398
+
399
+ if (shift == (8 << vece)) {
400
+ /*
401
+ * Shifts larger than the element size are architecturally valid.
402
+ * Unsigned results in zero. With rounding, this produces a
403
+ * copy of the most significant bit.
404
+ */
405
+ tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
406
+ } else {
407
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
408
+ }
123
+ }
409
+}
124
+}
410
+
125
+
411
+static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
126
+void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
127
+ void *vpm, uint32_t desc)
412
+{
128
+{
413
+ TCGv_i64 t = tcg_temp_new_i64();
129
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
130
+ uint8_t *pn = vpn, *pm = vpm;
131
+ uint64_t *zda = vzda, *zn = vzn;
414
+
132
+
415
+ if (sh == 8) {
133
+ for (row = 0; row < oprsz; ++row) {
416
+ tcg_gen_vec_shr8i_i64(t, a, 7);
134
+ if (pn[H1(row)] & 1) {
417
+ } else {
135
+ uint64_t zn_row = zn[row];
418
+ gen_urshr8_i64(t, a, sh);
136
+ for (col = 0; col < oprsz; ++col) {
137
+ if (pm[H1(col)] & 1) {
138
+ zda[tile_vslice_index(row) + col] += zn_row;
139
+ }
140
+ }
141
+ }
419
+ }
142
+ }
420
+ tcg_gen_vec_add8_i64(d, d, t);
143
+}
421
+ tcg_temp_free_i64(t);
144
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
145
index XXXXXXX..XXXXXXX 100644
146
--- a/target/arm/translate-sme.c
147
+++ b/target/arm/translate-sme.c
148
@@ -XXX,XX +XXX,XX @@ static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
149
150
TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
151
TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
152
+
153
+static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz,
154
+ gen_helper_gvec_4 *fn)
155
+{
156
+ int svl = streaming_vec_reg_size(s);
157
+ uint32_t desc = simd_desc(svl, svl, 0);
158
+ TCGv_ptr za, zn, pn, pm;
159
+
160
+ if (!sme_smza_enabled_check(s)) {
161
+ return true;
162
+ }
163
+
164
+ /* Sum XZR+zad to find ZAd. */
165
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
166
+ zn = vec_full_reg_ptr(s, a->zn);
167
+ pn = pred_full_reg_ptr(s, a->pn);
168
+ pm = pred_full_reg_ptr(s, a->pm);
169
+
170
+ fn(za, zn, pn, pm, tcg_constant_i32(desc));
171
+
172
+ tcg_temp_free_ptr(za);
173
+ tcg_temp_free_ptr(zn);
174
+ tcg_temp_free_ptr(pn);
175
+ tcg_temp_free_ptr(pm);
176
+ return true;
422
+}
177
+}
423
+
178
+
424
+static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
179
+TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
425
+{
180
+TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
426
+ TCGv_i64 t = tcg_temp_new_i64();
181
+TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
427
+
182
+TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
428
+ if (sh == 16) {
429
+ tcg_gen_vec_shr16i_i64(t, a, 15);
430
+ } else {
431
+ gen_urshr16_i64(t, a, sh);
432
+ }
433
+ tcg_gen_vec_add16_i64(d, d, t);
434
+ tcg_temp_free_i64(t);
435
+}
436
+
437
+static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
438
+{
439
+ TCGv_i32 t = tcg_temp_new_i32();
440
+
441
+ if (sh == 32) {
442
+ tcg_gen_shri_i32(t, a, 31);
443
+ } else {
444
+ gen_urshr32_i32(t, a, sh);
445
+ }
446
+ tcg_gen_add_i32(d, d, t);
447
+ tcg_temp_free_i32(t);
448
+}
449
+
450
+static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
451
+{
452
+ TCGv_i64 t = tcg_temp_new_i64();
453
+
454
+ if (sh == 64) {
455
+ tcg_gen_shri_i64(t, a, 63);
456
+ } else {
457
+ gen_urshr64_i64(t, a, sh);
458
+ }
459
+ tcg_gen_add_i64(d, d, t);
460
+ tcg_temp_free_i64(t);
461
+}
462
+
463
+static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
464
+{
465
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
466
+
467
+ if (sh == (8 << vece)) {
468
+ tcg_gen_shri_vec(vece, t, a, sh - 1);
469
+ } else {
470
+ gen_urshr_vec(vece, t, a, sh);
471
+ }
472
+ tcg_gen_add_vec(vece, d, d, t);
473
+ tcg_temp_free_vec(t);
474
+}
475
+
476
+void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
477
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
478
+{
479
+ static const TCGOpcode vecop_list[] = {
480
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
481
+ };
482
+ static const GVecGen2i ops[4] = {
483
+ { .fni8 = gen_ursra8_i64,
484
+ .fniv = gen_ursra_vec,
485
+ .fno = gen_helper_gvec_ursra_b,
486
+ .opt_opc = vecop_list,
487
+ .load_dest = true,
488
+ .vece = MO_8 },
489
+ { .fni8 = gen_ursra16_i64,
490
+ .fniv = gen_ursra_vec,
491
+ .fno = gen_helper_gvec_ursra_h,
492
+ .opt_opc = vecop_list,
493
+ .load_dest = true,
494
+ .vece = MO_16 },
495
+ { .fni4 = gen_ursra32_i32,
496
+ .fniv = gen_ursra_vec,
497
+ .fno = gen_helper_gvec_ursra_s,
498
+ .opt_opc = vecop_list,
499
+ .load_dest = true,
500
+ .vece = MO_32 },
501
+ { .fni8 = gen_ursra64_i64,
502
+ .fniv = gen_ursra_vec,
503
+ .fno = gen_helper_gvec_ursra_d,
504
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
505
+ .opt_opc = vecop_list,
506
+ .load_dest = true,
507
+ .vece = MO_64 },
508
+ };
509
+
510
+ /* tszimm encoding produces immediates in the range [1..esize] */
511
+ tcg_debug_assert(shift > 0);
512
+ tcg_debug_assert(shift <= (8 << vece));
513
+
514
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
515
+}
516
+
517
static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
518
{
519
uint64_t mask = dup_const(MO_8, 0xff >> shift);
520
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
521
}
522
return 0;
523
524
+ case 2: /* VRSHR */
525
+ /* Right shift comes here negative. */
526
+ shift = -shift;
527
+ if (u) {
528
+ gen_gvec_urshr(size, rd_ofs, rm_ofs, shift,
529
+ vec_size, vec_size);
530
+ } else {
531
+ gen_gvec_srshr(size, rd_ofs, rm_ofs, shift,
532
+ vec_size, vec_size);
533
+ }
534
+ return 0;
535
+
536
+ case 3: /* VRSRA */
537
+ /* Right shift comes here negative. */
538
+ shift = -shift;
539
+ if (u) {
540
+ gen_gvec_ursra(size, rd_ofs, rm_ofs, shift,
541
+ vec_size, vec_size);
542
+ } else {
543
+ gen_gvec_srsra(size, rd_ofs, rm_ofs, shift,
544
+ vec_size, vec_size);
545
+ }
546
+ return 0;
547
+
548
case 4: /* VSRI */
549
if (!u) {
550
return 1;
551
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
552
neon_load_reg64(cpu_V0, rm + pass);
553
tcg_gen_movi_i64(cpu_V1, imm);
554
switch (op) {
555
- case 2: /* VRSHR */
556
- case 3: /* VRSRA */
557
- if (u)
558
- gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
559
- else
560
- gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
561
- break;
562
case 6: /* VQSHLU */
563
gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
564
cpu_V0, cpu_V1);
565
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
566
default:
567
g_assert_not_reached();
568
}
569
- if (op == 3) {
570
- /* Accumulate. */
571
- neon_load_reg64(cpu_V1, rd + pass);
572
- tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
573
- }
574
neon_store_reg64(cpu_V0, rd + pass);
575
} else { /* size < 3 */
576
/* Operands in T0 and T1. */
577
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
578
tmp2 = tcg_temp_new_i32();
579
tcg_gen_movi_i32(tmp2, imm);
580
switch (op) {
581
- case 2: /* VRSHR */
582
- case 3: /* VRSRA */
583
- GEN_NEON_INTEGER_OP(rshl);
584
- break;
585
case 6: /* VQSHLU */
586
switch (size) {
587
case 0:
588
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
589
g_assert_not_reached();
590
}
591
tcg_temp_free_i32(tmp2);
592
-
593
- if (op == 3) {
594
- /* Accumulate. */
595
- tmp2 = neon_load_reg(rd, pass);
596
- gen_neon_add(size, tmp, tmp2);
597
- tcg_temp_free_i32(tmp2);
598
- }
599
neon_store_reg(rd, pass, tmp);
600
}
601
} /* for pass */
602
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
603
index XXXXXXX..XXXXXXX 100644
604
--- a/target/arm/vec_helper.c
605
+++ b/target/arm/vec_helper.c
606
@@ -XXX,XX +XXX,XX @@ DO_SRA(gvec_usra_d, uint64_t)
607
608
#undef DO_SRA
609
610
+#define DO_RSHR(NAME, TYPE) \
611
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
612
+{ \
613
+ intptr_t i, oprsz = simd_oprsz(desc); \
614
+ int shift = simd_data(desc); \
615
+ TYPE *d = vd, *n = vn; \
616
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
617
+ TYPE tmp = n[i] >> (shift - 1); \
618
+ d[i] = (tmp >> 1) + (tmp & 1); \
619
+ } \
620
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
621
+}
622
+
623
+DO_RSHR(gvec_srshr_b, int8_t)
624
+DO_RSHR(gvec_srshr_h, int16_t)
625
+DO_RSHR(gvec_srshr_s, int32_t)
626
+DO_RSHR(gvec_srshr_d, int64_t)
627
+
628
+DO_RSHR(gvec_urshr_b, uint8_t)
629
+DO_RSHR(gvec_urshr_h, uint16_t)
630
+DO_RSHR(gvec_urshr_s, uint32_t)
631
+DO_RSHR(gvec_urshr_d, uint64_t)
632
+
633
+#undef DO_RSHR
634
+
635
+#define DO_RSRA(NAME, TYPE) \
636
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
637
+{ \
638
+ intptr_t i, oprsz = simd_oprsz(desc); \
639
+ int shift = simd_data(desc); \
640
+ TYPE *d = vd, *n = vn; \
641
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
642
+ TYPE tmp = n[i] >> (shift - 1); \
643
+ d[i] += (tmp >> 1) + (tmp & 1); \
644
+ } \
645
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
646
+}
647
+
648
+DO_RSRA(gvec_srsra_b, int8_t)
649
+DO_RSRA(gvec_srsra_h, int16_t)
650
+DO_RSRA(gvec_srsra_s, int32_t)
651
+DO_RSRA(gvec_srsra_d, int64_t)
652
+
653
+DO_RSRA(gvec_ursra_b, uint8_t)
654
+DO_RSRA(gvec_ursra_h, uint16_t)
655
+DO_RSRA(gvec_ursra_s, uint32_t)
656
+DO_RSRA(gvec_ursra_d, uint64_t)
657
+
658
+#undef DO_RSRA
659
+
660
/*
661
* Convert float16 to float32, raising no exceptions and
662
* preserving exceptional values, including SNaN.
663
--
183
--
664
2.20.1
184
2.25.1
665
666
diff view generated by jsdifflib
1
Convert the Neon float VPMIN, VPMAX and VPADD 3-reg-same insns to
1
From: Richard Henderson <richard.henderson@linaro.org>
2
decodetree. These are the only remaining 'pairwise' operations,
3
so we can delete the pairwise-specific bits of the old decoder's
4
for-each-element loop now.
5
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220708151540.18136-25-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Message-id: 20200512163904.10918-13-peter.maydell@linaro.org
9
---
7
---
10
target/arm/neon-dp.decode | 5 +++
8
target/arm/helper-sme.h | 5 +++
11
target/arm/translate-neon.inc.c | 63 +++++++++++++++++++++++++++++++++
9
target/arm/sme.decode | 9 +++++
12
target/arm/translate.c | 63 +++++----------------------------
10
target/arm/sme_helper.c | 69 ++++++++++++++++++++++++++++++++++++++
13
3 files changed, 76 insertions(+), 55 deletions(-)
11
target/arm/translate-sme.c | 32 ++++++++++++++++++
12
4 files changed, 115 insertions(+)
14
13
15
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
16
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/neon-dp.decode
16
--- a/target/arm/helper-sme.h
18
+++ b/target/arm/neon-dp.decode
17
+++ b/target/arm/helper-sme.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
19
DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
20
DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
21
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
22
+
23
+DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
24
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
25
+DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
26
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/sme.decode
30
+++ b/target/arm/sme.decode
31
@@ -XXX,XX +XXX,XX @@ ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32
32
ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32
33
ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64
34
ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
35
+
36
+### SME Outer Product
37
+
38
+&op zad zn zm pm pn sub:bool
39
+@op_32 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 .. zad:2 &op
40
+@op_64 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 . zad:3 &op
41
+
42
+FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
43
+FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
44
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/target/arm/sme_helper.c
47
+++ b/target/arm/sme_helper.c
19
@@ -XXX,XX +XXX,XX @@
48
@@ -XXX,XX +XXX,XX @@
20
# For FP insns the high bit of 'size' is used as part of opcode decode
49
#include "exec/cpu_ldst.h"
21
@3same_fp .... ... . . . . size:1 .... .... .... . q:1 . . .... \
50
#include "exec/exec-all.h"
22
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp
51
#include "qemu/int128.h"
23
+@3same_fp_q0 .... ... . . . . size:1 .... .... .... . 0 . . .... \
52
+#include "fpu/softfloat.h"
24
+ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0
53
#include "vec_internal.h"
25
54
#include "sve_ldst_internal.h"
26
VHADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 0 .... @3same
55
27
VHADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 0 .... @3same
56
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
28
@@ -XXX,XX +XXX,XX @@ VQRDMLSH_3s 1111 001 1 0 . .. .... .... 1100 ... 1 .... @3same
57
}
29
58
}
30
VADD_fp_3s 1111 001 0 0 . 0 . .... .... 1101 ... 0 .... @3same_fp
59
}
31
VSUB_fp_3s 1111 001 0 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
60
+
32
+VPADD_fp_3s 1111 001 1 0 . 0 . .... .... 1101 ... 0 .... @3same_fp_q0
61
+void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn,
33
VABD_fp_3s 1111 001 1 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
62
+ void *vpm, void *vst, uint32_t desc)
34
+VPMAX_fp_3s 1111 001 1 0 . 0 . .... .... 1111 ... 0 .... @3same_fp_q0
63
+{
35
+VPMIN_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 0 .... @3same_fp_q0
64
+ intptr_t row, col, oprsz = simd_maxsz(desc);
36
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
65
+ uint32_t neg = simd_data(desc) << 31;
66
+ uint16_t *pn = vpn, *pm = vpm;
67
+ float_status fpst;
68
+
69
+ /*
70
+ * Make a copy of float_status because this operation does not
71
+ * update the cumulative fp exception status. It also produces
72
+ * default nans.
73
+ */
74
+ fpst = *(float_status *)vst;
75
+ set_default_nan_mode(true, &fpst);
76
+
77
+ for (row = 0; row < oprsz; ) {
78
+ uint16_t pa = pn[H2(row >> 4)];
79
+ do {
80
+ if (pa & 1) {
81
+ void *vza_row = vza + tile_vslice_offset(row);
82
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row)) ^ neg;
83
+
84
+ for (col = 0; col < oprsz; ) {
85
+ uint16_t pb = pm[H2(col >> 4)];
86
+ do {
87
+ if (pb & 1) {
88
+ uint32_t *a = vza_row + H1_4(col);
89
+ uint32_t *m = vzm + H1_4(col);
90
+ *a = float32_muladd(n, *m, *a, 0, vst);
91
+ }
92
+ col += 4;
93
+ pb >>= 4;
94
+ } while (col & 15);
95
+ }
96
+ }
97
+ row += 4;
98
+ pa >>= 4;
99
+ } while (row & 15);
100
+ }
101
+}
102
+
103
+void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn,
104
+ void *vpm, void *vst, uint32_t desc)
105
+{
106
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
107
+ uint64_t neg = (uint64_t)simd_data(desc) << 63;
108
+ uint64_t *za = vza, *zn = vzn, *zm = vzm;
109
+ uint8_t *pn = vpn, *pm = vpm;
110
+ float_status fpst = *(float_status *)vst;
111
+
112
+ set_default_nan_mode(true, &fpst);
113
+
114
+ for (row = 0; row < oprsz; ++row) {
115
+ if (pn[H1(row)] & 1) {
116
+ uint64_t *za_row = &za[tile_vslice_index(row)];
117
+ uint64_t n = zn[row] ^ neg;
118
+
119
+ for (col = 0; col < oprsz; ++col) {
120
+ if (pm[H1(col)] & 1) {
121
+ uint64_t *a = &za_row[col];
122
+ *a = float64_muladd(n, zm[col], *a, 0, &fpst);
123
+ }
124
+ }
125
+ }
126
+ }
127
+}
128
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
37
index XXXXXXX..XXXXXXX 100644
129
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/translate-neon.inc.c
130
--- a/target/arm/translate-sme.c
39
+++ b/target/arm/translate-neon.inc.c
131
+++ b/target/arm/translate-sme.c
40
@@ -XXX,XX +XXX,XX @@ DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
132
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
41
DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s)
133
TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
42
DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s)
134
TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
43
DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s)
135
TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
44
+
136
+
45
+static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
137
+static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
138
+ gen_helper_gvec_5_ptr *fn)
46
+{
139
+{
47
+ /* FP operations handled pairwise 32 bits at a time */
140
+ int svl = streaming_vec_reg_size(s);
48
+ TCGv_i32 tmp, tmp2, tmp3;
141
+ uint32_t desc = simd_desc(svl, svl, a->sub);
49
+ TCGv_ptr fpstatus;
142
+ TCGv_ptr za, zn, zm, pn, pm, fpst;
50
+
143
+
51
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
144
+ if (!sme_smza_enabled_check(s)) {
52
+ return false;
53
+ }
54
+
55
+ /* UNDEF accesses to D16-D31 if they don't exist. */
56
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
57
+ ((a->vd | a->vn | a->vm) & 0x10)) {
58
+ return false;
59
+ }
60
+
61
+ if (!vfp_access_check(s)) {
62
+ return true;
145
+ return true;
63
+ }
146
+ }
64
+
147
+
65
+ assert(a->q == 0); /* enforced by decode patterns */
148
+ /* Sum XZR+zad to find ZAd. */
149
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
150
+ zn = vec_full_reg_ptr(s, a->zn);
151
+ zm = vec_full_reg_ptr(s, a->zm);
152
+ pn = pred_full_reg_ptr(s, a->pn);
153
+ pm = pred_full_reg_ptr(s, a->pm);
154
+ fpst = fpstatus_ptr(FPST_FPCR);
66
+
155
+
67
+ /*
156
+ fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc));
68
+ * Note that we have to be careful not to clobber the source operands
69
+ * in the "vm == vd" case by storing the result of the first pass too
70
+ * early. Since Q is 0 there are always just two passes, so instead
71
+ * of a complicated loop over each pass we just unroll.
72
+ */
73
+ fpstatus = get_fpstatus_ptr(1);
74
+ tmp = neon_load_reg(a->vn, 0);
75
+ tmp2 = neon_load_reg(a->vn, 1);
76
+ fn(tmp, tmp, tmp2, fpstatus);
77
+ tcg_temp_free_i32(tmp2);
78
+
157
+
79
+ tmp3 = neon_load_reg(a->vm, 0);
158
+ tcg_temp_free_ptr(za);
80
+ tmp2 = neon_load_reg(a->vm, 1);
159
+ tcg_temp_free_ptr(zn);
81
+ fn(tmp3, tmp3, tmp2, fpstatus);
160
+ tcg_temp_free_ptr(pn);
82
+ tcg_temp_free_i32(tmp2);
161
+ tcg_temp_free_ptr(pm);
83
+ tcg_temp_free_ptr(fpstatus);
162
+ tcg_temp_free_ptr(fpst);
84
+
85
+ neon_store_reg(a->vd, 0, tmp);
86
+ neon_store_reg(a->vd, 1, tmp3);
87
+ return true;
163
+ return true;
88
+}
164
+}
89
+
165
+
90
+/*
166
+TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
91
+ * For all the functions using this macro, size == 1 means fp16,
167
+TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
92
+ * which is an architecture extension we don't implement yet.
93
+ */
94
+#define DO_3S_FP_PAIR(INSN,FUNC) \
95
+ static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
96
+ { \
97
+ if (a->size != 0) { \
98
+ /* TODO fp16 support */ \
99
+ return false; \
100
+ } \
101
+ return do_3same_fp_pair(s, a, FUNC); \
102
+ }
103
+
104
+DO_3S_FP_PAIR(VPADD, gen_helper_vfp_adds)
105
+DO_3S_FP_PAIR(VPMAX, gen_helper_vfp_maxs)
106
+DO_3S_FP_PAIR(VPMIN, gen_helper_vfp_mins)
107
diff --git a/target/arm/translate.c b/target/arm/translate.c
108
index XXXXXXX..XXXXXXX 100644
109
--- a/target/arm/translate.c
110
+++ b/target/arm/translate.c
111
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
112
int shift;
113
int pass;
114
int count;
115
- int pairwise;
116
int u;
117
int vec_size;
118
uint32_t imm;
119
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
120
case NEON_3R_VPMIN:
121
case NEON_3R_VPADD_VQRDMLAH:
122
case NEON_3R_VQDMULH_VQRDMULH:
123
+ case NEON_3R_FLOAT_ARITH:
124
/* Already handled by decodetree */
125
return 1;
126
}
127
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
128
/* 64-bit element instructions: handled by decodetree */
129
return 1;
130
}
131
- pairwise = 0;
132
switch (op) {
133
- case NEON_3R_FLOAT_ARITH:
134
- pairwise = (u && size < 2); /* if VPADD (float) */
135
- if (!pairwise) {
136
- return 1; /* handled by decodetree */
137
- }
138
- break;
139
case NEON_3R_FLOAT_MINMAX:
140
- pairwise = u; /* if VPMIN/VPMAX (float) */
141
+ if (u) {
142
+ return 1; /* VPMIN/VPMAX handled by decodetree */
143
+ }
144
break;
145
case NEON_3R_FLOAT_CMP:
146
if (!u && size) {
147
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
148
break;
149
}
150
151
- if (pairwise && q) {
152
- /* All the pairwise insns UNDEF if Q is set */
153
- return 1;
154
- }
155
-
156
for (pass = 0; pass < (q ? 4 : 2); pass++) {
157
158
- if (pairwise) {
159
- /* Pairwise. */
160
- if (pass < 1) {
161
- tmp = neon_load_reg(rn, 0);
162
- tmp2 = neon_load_reg(rn, 1);
163
- } else {
164
- tmp = neon_load_reg(rm, 0);
165
- tmp2 = neon_load_reg(rm, 1);
166
- }
167
- } else {
168
- /* Elementwise. */
169
- tmp = neon_load_reg(rn, pass);
170
- tmp2 = neon_load_reg(rm, pass);
171
- }
172
+ /* Elementwise. */
173
+ tmp = neon_load_reg(rn, pass);
174
+ tmp2 = neon_load_reg(rm, pass);
175
switch (op) {
176
- case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
177
- {
178
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
179
- switch ((u << 2) | size) {
180
- case 4: /* VPADD */
181
- gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
182
- break;
183
- default:
184
- abort();
185
- }
186
- tcg_temp_free_ptr(fpstatus);
187
- break;
188
- }
189
case NEON_3R_FLOAT_MULTIPLY:
190
{
191
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
192
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
193
}
194
tcg_temp_free_i32(tmp2);
195
196
- /* Save the result. For elementwise operations we can put it
197
- straight into the destination register. For pairwise operations
198
- we have to be careful to avoid clobbering the source operands. */
199
- if (pairwise && rd == rm) {
200
- neon_store_scratch(pass, tmp);
201
- } else {
202
- neon_store_reg(rd, pass, tmp);
203
- }
204
+ neon_store_reg(rd, pass, tmp);
205
206
} /* for pass */
207
- if (pairwise && rd == rm) {
208
- for (pass = 0; pass < (q ? 4 : 2); pass++) {
209
- tmp = neon_load_scratch(pass);
210
- neon_store_reg(rd, pass, tmp);
211
- }
212
- }
213
/* End of 3 register same size operations. */
214
} else if (insn & (1 << 4)) {
215
if ((insn & 0x00380080) != 0) {
216
--
168
--
217
2.20.1
169
2.25.1
218
219
diff view generated by jsdifflib
1
Convert the Neon integer VMUL, VMLA, and VMLS 3-reg-same inssn to
1
From: Richard Henderson <richard.henderson@linaro.org>
2
decodetree.
3
2
4
We don't have a gvec helper for multiply-accumulate, so VMLA and VMLS
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
need a loop function do_3same_fp(). This takes a reads_vd parameter
4
Message-id: 20220708151540.18136-26-richard.henderson@linaro.org
6
to do_3same_fp() which tells it to load the old value into vd before
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
calling the callback function, in the same way that the do_vfp_3op_sp()
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
and do_vfp_3op_dp() functions in translate-vfp.inc.c work. (The
7
---
9
only uses in this patch pass reads_vd == true, but later commits
8
target/arm/helper-sme.h | 2 ++
10
will use reads_vd == false.)
9
target/arm/sme.decode | 2 ++
10
target/arm/sme_helper.c | 56 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 30 ++++++++++++++++++++
12
4 files changed, 90 insertions(+)
11
13
12
This conversion fixes in passing an underdecoding for VMUL
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
13
(originally reported by Fredrik Strupe <fredrik@strupe.net>): bit 1
14
of the 'size' field must be 0. The old decoder didn't enforce this,
15
but the decodetree pattern does.
16
17
The gen_VMLA_fp_reg() function performs the addition operation
18
with the operands in the opposite order to the old decoder:
19
since Neon sets 'default NaN mode' float32_add operations are
20
commutative so there is no behaviour difference, but putting
21
them this way around matches the Arm ARM pseudocode and the
22
required operation order for the subtraction in gen_VMLS_fp_reg().
23
24
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
25
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
26
Message-id: 20200512163904.10918-14-peter.maydell@linaro.org
27
---
28
target/arm/neon-dp.decode | 3 ++
29
target/arm/translate-neon.inc.c | 81 +++++++++++++++++++++++++++++++++
30
target/arm/translate.c | 17 +------
31
3 files changed, 85 insertions(+), 16 deletions(-)
32
33
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
34
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
35
--- a/target/arm/neon-dp.decode
16
--- a/target/arm/helper-sme.h
36
+++ b/target/arm/neon-dp.decode
17
+++ b/target/arm/helper-sme.h
37
@@ -XXX,XX +XXX,XX @@ VADD_fp_3s 1111 001 0 0 . 0 . .... .... 1101 ... 0 .... @3same_fp
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
38
VSUB_fp_3s 1111 001 0 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
19
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
39
VPADD_fp_3s 1111 001 1 0 . 0 . .... .... 1101 ... 0 .... @3same_fp_q0
20
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
40
VABD_fp_3s 1111 001 1 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
21
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
41
+VMLA_fp_3s 1111 001 0 0 . 0 . .... .... 1101 ... 1 .... @3same_fp
22
+DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG,
42
+VMLS_fp_3s 1111 001 0 0 . 1 . .... .... 1101 ... 1 .... @3same_fp
23
+ void, ptr, ptr, ptr, ptr, ptr, i32)
43
+VMUL_fp_3s 1111 001 1 0 . 0 . .... .... 1101 ... 1 .... @3same_fp
24
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
44
VPMAX_fp_3s 1111 001 1 0 . 0 . .... .... 1111 ... 0 .... @3same_fp_q0
45
VPMIN_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 0 .... @3same_fp_q0
46
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
47
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/translate-neon.inc.c
26
--- a/target/arm/sme.decode
49
+++ b/target/arm/translate-neon.inc.c
27
+++ b/target/arm/sme.decode
50
@@ -XXX,XX +XXX,XX @@ DO_3SAME_PAIR(VPADD, padd_u)
28
@@ -XXX,XX +XXX,XX @@ ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
51
DO_3SAME_VQDMULH(VQDMULH, qdmulh)
29
52
DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
30
FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
53
31
FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
54
+static bool do_3same_fp(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn,
32
+
55
+ bool reads_vd)
33
+BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
34
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/sme_helper.c
37
+++ b/target/arm/sme_helper.c
38
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn,
39
}
40
}
41
}
42
+
43
+/*
44
+ * Alter PAIR as needed for controlling predicates being false,
45
+ * and for NEG on an enabled row element.
46
+ */
47
+static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
56
+{
48
+{
57
+ /*
49
+ /*
58
+ * FP operations handled elementwise 32 bits at a time.
50
+ * The pseudocode uses a conditional negate after the conditional zero.
59
+ * If reads_vd is true then the old value of Vd will be
51
+ * It is simpler here to unconditionally negate before conditional zero.
60
+ * loaded before calling the callback function. This is
61
+ * used for multiply-accumulate type operations.
62
+ */
52
+ */
63
+ TCGv_i32 tmp, tmp2;
53
+ pair ^= neg;
64
+ int pass;
54
+ if (!(pg & 1)) {
55
+ pair &= 0xffff0000u;
56
+ }
57
+ if (!(pg & 4)) {
58
+ pair &= 0x0000ffffu;
59
+ }
60
+ return pair;
61
+}
65
+
62
+
66
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
63
+void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
67
+ return false;
64
+ void *vpm, uint32_t desc)
65
+{
66
+ intptr_t row, col, oprsz = simd_maxsz(desc);
67
+ uint32_t neg = simd_data(desc) * 0x80008000u;
68
+ uint16_t *pn = vpn, *pm = vpm;
69
+
70
+ for (row = 0; row < oprsz; ) {
71
+ uint16_t prow = pn[H2(row >> 4)];
72
+ do {
73
+ void *vza_row = vza + tile_vslice_offset(row);
74
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row));
75
+
76
+ n = f16mop_adj_pair(n, prow, neg);
77
+
78
+ for (col = 0; col < oprsz; ) {
79
+ uint16_t pcol = pm[H2(col >> 4)];
80
+ do {
81
+ if (prow & pcol & 0b0101) {
82
+ uint32_t *a = vza_row + H1_4(col);
83
+ uint32_t m = *(uint32_t *)(vzm + H1_4(col));
84
+
85
+ m = f16mop_adj_pair(m, pcol, 0);
86
+ *a = bfdotadd(*a, n, m);
87
+
88
+ col += 4;
89
+ pcol >>= 4;
90
+ }
91
+ } while (col & 15);
92
+ }
93
+ row += 4;
94
+ prow >>= 4;
95
+ } while (row & 15);
68
+ }
96
+ }
97
+}
98
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/target/arm/translate-sme.c
101
+++ b/target/arm/translate-sme.c
102
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
103
TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
104
TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
105
106
+static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
107
+ gen_helper_gvec_5 *fn)
108
+{
109
+ int svl = streaming_vec_reg_size(s);
110
+ uint32_t desc = simd_desc(svl, svl, a->sub);
111
+ TCGv_ptr za, zn, zm, pn, pm;
69
+
112
+
70
+ /* UNDEF accesses to D16-D31 if they don't exist. */
113
+ if (!sme_smza_enabled_check(s)) {
71
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
72
+ ((a->vd | a->vn | a->vm) & 0x10)) {
73
+ return false;
74
+ }
75
+
76
+ if ((a->vn | a->vm | a->vd) & a->q) {
77
+ return false;
78
+ }
79
+
80
+ if (!vfp_access_check(s)) {
81
+ return true;
114
+ return true;
82
+ }
115
+ }
83
+
116
+
84
+ TCGv_ptr fpstatus = get_fpstatus_ptr(1);
117
+ /* Sum XZR+zad to find ZAd. */
85
+ for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
118
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
86
+ tmp = neon_load_reg(a->vn, pass);
119
+ zn = vec_full_reg_ptr(s, a->zn);
87
+ tmp2 = neon_load_reg(a->vm, pass);
120
+ zm = vec_full_reg_ptr(s, a->zm);
88
+ if (reads_vd) {
121
+ pn = pred_full_reg_ptr(s, a->pn);
89
+ TCGv_i32 tmp_rd = neon_load_reg(a->vd, pass);
122
+ pm = pred_full_reg_ptr(s, a->pm);
90
+ fn(tmp_rd, tmp, tmp2, fpstatus);
123
+
91
+ neon_store_reg(a->vd, pass, tmp_rd);
124
+ fn(za, zn, zm, pn, pm, tcg_constant_i32(desc));
92
+ tcg_temp_free_i32(tmp);
125
+
93
+ } else {
126
+ tcg_temp_free_ptr(za);
94
+ fn(tmp, tmp, tmp2, fpstatus);
127
+ tcg_temp_free_ptr(zn);
95
+ neon_store_reg(a->vd, pass, tmp);
128
+ tcg_temp_free_ptr(pn);
96
+ }
129
+ tcg_temp_free_ptr(pm);
97
+ tcg_temp_free_i32(tmp2);
98
+ }
99
+ tcg_temp_free_ptr(fpstatus);
100
+ return true;
130
+ return true;
101
+}
131
+}
102
+
132
+
103
/*
133
static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
104
* For all the functions using this macro, size == 1 means fp16,
134
gen_helper_gvec_5_ptr *fn)
105
* which is an architecture extension we don't implement yet.
135
{
106
@@ -XXX,XX +XXX,XX @@ DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
136
@@ -XXX,XX +XXX,XX @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
107
DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s)
137
108
DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s)
138
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
109
DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s)
139
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
110
+DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s)
111
+
140
+
112
+/*
141
+/* TODO: FEAT_EBF16 */
113
+ * For all the functions using this macro, size == 1 means fp16,
142
+TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
114
+ * which is an architecture extension we don't implement yet.
115
+ */
116
+#define DO_3S_FP(INSN,FUNC,READS_VD) \
117
+ static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
118
+ { \
119
+ if (a->size != 0) { \
120
+ /* TODO fp16 support */ \
121
+ return false; \
122
+ } \
123
+ return do_3same_fp(s, a, FUNC, READS_VD); \
124
+ }
125
+
126
+static void gen_VMLA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
127
+ TCGv_ptr fpstatus)
128
+{
129
+ gen_helper_vfp_muls(vn, vn, vm, fpstatus);
130
+ gen_helper_vfp_adds(vd, vd, vn, fpstatus);
131
+}
132
+
133
+static void gen_VMLS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
134
+ TCGv_ptr fpstatus)
135
+{
136
+ gen_helper_vfp_muls(vn, vn, vm, fpstatus);
137
+ gen_helper_vfp_subs(vd, vd, vn, fpstatus);
138
+}
139
+
140
+DO_3S_FP(VMLA, gen_VMLA_fp_3s, true)
141
+DO_3S_FP(VMLS, gen_VMLS_fp_3s, true)
142
143
static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
144
{
145
diff --git a/target/arm/translate.c b/target/arm/translate.c
146
index XXXXXXX..XXXXXXX 100644
147
--- a/target/arm/translate.c
148
+++ b/target/arm/translate.c
149
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
150
case NEON_3R_VPADD_VQRDMLAH:
151
case NEON_3R_VQDMULH_VQRDMULH:
152
case NEON_3R_FLOAT_ARITH:
153
+ case NEON_3R_FLOAT_MULTIPLY:
154
/* Already handled by decodetree */
155
return 1;
156
}
157
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
158
tmp = neon_load_reg(rn, pass);
159
tmp2 = neon_load_reg(rm, pass);
160
switch (op) {
161
- case NEON_3R_FLOAT_MULTIPLY:
162
- {
163
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
164
- gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
165
- if (!u) {
166
- tcg_temp_free_i32(tmp2);
167
- tmp2 = neon_load_reg(rd, pass);
168
- if (size == 0) {
169
- gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
170
- } else {
171
- gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
172
- }
173
- }
174
- tcg_temp_free_ptr(fpstatus);
175
- break;
176
- }
177
case NEON_3R_FLOAT_CMP:
178
{
179
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
180
--
143
--
181
2.20.1
144
2.25.1
182
183
diff view generated by jsdifflib
1
From: Patrick Williams <patrick@stwcx.xyz>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Sonora Pass is a 2 socket x86 motherboard designed by Facebook
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
and supported by OpenBMC. Strapping configuration was obtained
4
Message-id: 20220708151540.18136-27-richard.henderson@linaro.org
5
from hardware and i2c configuration is based on dts found at:
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/helper-sme.h | 2 ++
9
target/arm/sme.decode | 1 +
10
target/arm/sme_helper.c | 74 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 1 +
12
4 files changed, 78 insertions(+)
6
13
7
https://github.com/facebook/openbmc-linux/blob/1633c87b8ba7c162095787c988979b748ba65dc8/arch/arm/boot/dts/aspeed-bmc-facebook-sonorapass.dts
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
8
9
Booted a test image of http://github.com/facebook/openbmc to login
10
prompt.
11
12
Signed-off-by: Patrick Williams <patrick@stwcx.xyz>
13
Reviewed-by: Amithash Prasad <amithash@fb.com>
14
Reviewed-by: Cédric Le Goater <clg@kaod.org>
15
[PMM: fixed block comment style nit]
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
---
18
hw/arm/aspeed.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++
19
1 file changed, 78 insertions(+)
20
21
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
22
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
23
--- a/hw/arm/aspeed.c
16
--- a/target/arm/helper-sme.h
24
+++ b/hw/arm/aspeed.c
17
+++ b/target/arm/helper-sme.h
25
@@ -XXX,XX +XXX,XX @@ struct AspeedBoardState {
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
26
SCU_AST2500_HW_STRAP_ACPI_ENABLE | \
19
DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
27
SCU_HW_STRAP_SPI_MODE(SCU_HW_STRAP_SPI_MASTER))
20
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
28
21
29
+/* Sonorapass hardware value: 0xF100D216 */
22
+DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG,
30
+#define SONORAPASS_BMC_HW_STRAP1 ( \
23
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
31
+ SCU_AST2500_HW_STRAP_SPI_AUTOFETCH_ENABLE | \
24
DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
32
+ SCU_AST2500_HW_STRAP_GPIO_STRAP_ENABLE | \
25
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
33
+ SCU_AST2500_HW_STRAP_UART_DEBUG | \
26
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
34
+ SCU_AST2500_HW_STRAP_RESERVED28 | \
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
35
+ SCU_AST2500_HW_STRAP_DDR4_ENABLE | \
28
index XXXXXXX..XXXXXXX 100644
36
+ SCU_HW_STRAP_VGA_CLASS_CODE | \
29
--- a/target/arm/sme.decode
37
+ SCU_HW_STRAP_LPC_RESET_PIN | \
30
+++ b/target/arm/sme.decode
38
+ SCU_HW_STRAP_SPI_MODE(SCU_HW_STRAP_SPI_MASTER) | \
31
@@ -XXX,XX +XXX,XX @@ FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
39
+ SCU_AST2500_HW_STRAP_SET_AXI_AHB_RATIO(AXI_AHB_RATIO_2_1) | \
32
FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
40
+ SCU_HW_STRAP_VGA_BIOS_ROM | \
33
41
+ SCU_HW_STRAP_VGA_SIZE_SET(VGA_16M_DRAM) | \
34
BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
42
+ SCU_AST2500_HW_STRAP_RESERVED1)
35
+FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32
43
+
36
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
44
/* Swift hardware value: 0xF11AD206 */
37
index XXXXXXX..XXXXXXX 100644
45
#define SWIFT_BMC_HW_STRAP1 ( \
38
--- a/target/arm/sme_helper.c
46
AST2500_HW_STRAP1_DEFAULTS | \
39
+++ b/target/arm/sme_helper.c
47
@@ -XXX,XX +XXX,XX @@ static void swift_bmc_i2c_init(AspeedBoardState *bmc)
40
@@ -XXX,XX +XXX,XX @@ static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
48
i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 12), "tmp105", 0x4a);
41
return pair;
49
}
42
}
50
43
51
+static void sonorapass_bmc_i2c_init(AspeedBoardState *bmc)
44
+static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2,
45
+ float_status *s_std, float_status *s_odd)
52
+{
46
+{
53
+ AspeedSoCState *soc = &bmc->soc;
47
+ float64 e1r = float16_to_float64(e1 & 0xffff, true, s_std);
54
+
48
+ float64 e1c = float16_to_float64(e1 >> 16, true, s_std);
55
+ /* bus 2 : */
49
+ float64 e2r = float16_to_float64(e2 & 0xffff, true, s_std);
56
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 2), "tmp105", 0x48);
50
+ float64 e2c = float16_to_float64(e2 >> 16, true, s_std);
57
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 2), "tmp105", 0x49);
51
+ float64 t64;
58
+ /* bus 2 : pca9546 @ 0x73 */
52
+ float32 t32;
59
+
60
+ /* bus 3 : pca9548 @ 0x70 */
61
+
62
+ /* bus 4 : */
63
+ uint8_t *eeprom4_54 = g_malloc0(8 * 1024);
64
+ smbus_eeprom_init_one(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 4), 0x54,
65
+ eeprom4_54);
66
+ /* PCA9539 @ 0x76, but PCA9552 is compatible */
67
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 4), "pca9552", 0x76);
68
+ /* PCA9539 @ 0x77, but PCA9552 is compatible */
69
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 4), "pca9552", 0x77);
70
+
71
+ /* bus 6 : */
72
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 6), "tmp105", 0x48);
73
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 6), "tmp105", 0x49);
74
+ /* bus 6 : pca9546 @ 0x73 */
75
+
76
+ /* bus 8 : */
77
+ uint8_t *eeprom8_56 = g_malloc0(8 * 1024);
78
+ smbus_eeprom_init_one(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 8), 0x56,
79
+ eeprom8_56);
80
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 8), "pca9552", 0x60);
81
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 8), "pca9552", 0x61);
82
+ /* bus 8 : adc128d818 @ 0x1d */
83
+ /* bus 8 : adc128d818 @ 0x1f */
84
+
53
+
85
+ /*
54
+ /*
86
+ * bus 13 : pca9548 @ 0x71
55
+ * The ARM pseudocode function FPDot performs both multiplies
87
+ * - channel 3:
56
+ * and the add with a single rounding operation. Emulate this
88
+ * - tmm421 @ 0x4c
57
+ * by performing the first multiply in round-to-odd, then doing
89
+ * - tmp421 @ 0x4e
58
+ * the second multiply as fused multiply-add, and rounding to
90
+ * - tmp421 @ 0x4f
59
+ * float32 all in one step.
91
+ */
60
+ */
61
+ t64 = float64_mul(e1r, e2r, s_odd);
62
+ t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std);
92
+
63
+
64
+ /* This conversion is exact, because we've already rounded. */
65
+ t32 = float64_to_float32(t64, s_std);
66
+
67
+ /* The final accumulation step is not fused. */
68
+ return float32_add(sum, t32, s_std);
93
+}
69
+}
94
+
70
+
95
static void witherspoon_bmc_i2c_init(AspeedBoardState *bmc)
71
+void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
72
+ void *vpm, void *vst, uint32_t desc)
73
+{
74
+ intptr_t row, col, oprsz = simd_maxsz(desc);
75
+ uint32_t neg = simd_data(desc) * 0x80008000u;
76
+ uint16_t *pn = vpn, *pm = vpm;
77
+ float_status fpst_odd, fpst_std;
78
+
79
+ /*
80
+ * Make a copy of float_status because this operation does not
81
+ * update the cumulative fp exception status. It also produces
82
+ * default nans. Make a second copy with round-to-odd -- see above.
83
+ */
84
+ fpst_std = *(float_status *)vst;
85
+ set_default_nan_mode(true, &fpst_std);
86
+ fpst_odd = fpst_std;
87
+ set_float_rounding_mode(float_round_to_odd, &fpst_odd);
88
+
89
+ for (row = 0; row < oprsz; ) {
90
+ uint16_t prow = pn[H2(row >> 4)];
91
+ do {
92
+ void *vza_row = vza + tile_vslice_offset(row);
93
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row));
94
+
95
+ n = f16mop_adj_pair(n, prow, neg);
96
+
97
+ for (col = 0; col < oprsz; ) {
98
+ uint16_t pcol = pm[H2(col >> 4)];
99
+ do {
100
+ if (prow & pcol & 0b0101) {
101
+ uint32_t *a = vza_row + H1_4(col);
102
+ uint32_t m = *(uint32_t *)(vzm + H1_4(col));
103
+
104
+ m = f16mop_adj_pair(m, pcol, 0);
105
+ *a = f16_dotadd(*a, n, m, &fpst_std, &fpst_odd);
106
+
107
+ col += 4;
108
+ pcol >>= 4;
109
+ }
110
+ } while (col & 15);
111
+ }
112
+ row += 4;
113
+ prow >>= 4;
114
+ } while (row & 15);
115
+ }
116
+}
117
+
118
void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
119
void *vpm, uint32_t desc)
96
{
120
{
97
AspeedSoCState *soc = &bmc->soc;
121
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
98
@@ -XXX,XX +XXX,XX @@ static void aspeed_machine_romulus_class_init(ObjectClass *oc, void *data)
122
index XXXXXXX..XXXXXXX 100644
99
mc->default_ram_size = 512 * MiB;
123
--- a/target/arm/translate-sme.c
100
};
124
+++ b/target/arm/translate-sme.c
101
125
@@ -XXX,XX +XXX,XX @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
102
+static void aspeed_machine_sonorapass_class_init(ObjectClass *oc, void *data)
126
return true;
103
+{
127
}
104
+ MachineClass *mc = MACHINE_CLASS(oc);
128
105
+ AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
129
+TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h)
106
+
130
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
107
+ mc->desc = "OCP SonoraPass BMC (ARM1176)";
131
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
108
+ amc->soc_name = "ast2500-a1";
132
109
+ amc->hw_strap1 = SONORAPASS_BMC_HW_STRAP1;
110
+ amc->fmc_model = "mx66l1g45g";
111
+ amc->spi_model = "mx66l1g45g";
112
+ amc->num_cs = 2;
113
+ amc->i2c_init = sonorapass_bmc_i2c_init;
114
+ mc->default_ram_size = 512 * MiB;
115
+};
116
+
117
static void aspeed_machine_swift_class_init(ObjectClass *oc, void *data)
118
{
119
MachineClass *mc = MACHINE_CLASS(oc);
120
@@ -XXX,XX +XXX,XX @@ static const TypeInfo aspeed_machine_types[] = {
121
.name = MACHINE_TYPE_NAME("swift-bmc"),
122
.parent = TYPE_ASPEED_MACHINE,
123
.class_init = aspeed_machine_swift_class_init,
124
+ }, {
125
+ .name = MACHINE_TYPE_NAME("sonorapass-bmc"),
126
+ .parent = TYPE_ASPEED_MACHINE,
127
+ .class_init = aspeed_machine_sonorapass_class_init,
128
}, {
129
.name = MACHINE_TYPE_NAME("witherspoon-bmc"),
130
.parent = TYPE_ASPEED_MACHINE,
131
--
133
--
132
2.20.1
134
2.25.1
133
134
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Include 64-bit element size in preparation for SVE2.
3
This is SMOPA, SUMOPA, USMOPA_s, UMOPA, for both Int8 and Int16.
4
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20200513163245.17915-16-richard.henderson@linaro.org
7
Message-id: 20220708151540.18136-28-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
9
---
10
target/arm/helper.h | 10 +++
10
target/arm/helper-sme.h | 16 ++++++++
11
target/arm/translate.h | 5 ++
11
target/arm/sme.decode | 10 +++++
12
target/arm/translate-a64.c | 8 ++-
12
target/arm/sme_helper.c | 82 ++++++++++++++++++++++++++++++++++++++
13
target/arm/translate.c | 133 ++++++++++++++++++++++++++++++++++++-
13
target/arm/translate-sme.c | 10 +++++
14
target/arm/vec_helper.c | 24 +++++++
14
4 files changed, 118 insertions(+)
15
5 files changed, 176 insertions(+), 4 deletions(-)
16
15
17
diff --git a/target/arm/helper.h b/target/arm/helper.h
16
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
18
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper.h
18
--- a/target/arm/helper-sme.h
20
+++ b/target/arm/helper.h
19
+++ b/target/arm/helper-sme.h
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(gvec_sli_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
20
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
22
DEF_HELPER_FLAGS_3(gvec_sli_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
21
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
23
DEF_HELPER_FLAGS_3(gvec_sli_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
22
DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG,
24
23
void, ptr, ptr, ptr, ptr, ptr, i32)
25
+DEF_HELPER_FLAGS_4(gvec_sabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
24
+DEF_HELPER_FLAGS_6(sme_smopa_s, TCG_CALL_NO_RWG,
26
+DEF_HELPER_FLAGS_4(gvec_sabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
25
+ void, ptr, ptr, ptr, ptr, ptr, i32)
27
+DEF_HELPER_FLAGS_4(gvec_sabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
26
+DEF_HELPER_FLAGS_6(sme_umopa_s, TCG_CALL_NO_RWG,
28
+DEF_HELPER_FLAGS_4(gvec_sabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
27
+ void, ptr, ptr, ptr, ptr, ptr, i32)
28
+DEF_HELPER_FLAGS_6(sme_sumopa_s, TCG_CALL_NO_RWG,
29
+ void, ptr, ptr, ptr, ptr, ptr, i32)
30
+DEF_HELPER_FLAGS_6(sme_usmopa_s, TCG_CALL_NO_RWG,
31
+ void, ptr, ptr, ptr, ptr, ptr, i32)
32
+DEF_HELPER_FLAGS_6(sme_smopa_d, TCG_CALL_NO_RWG,
33
+ void, ptr, ptr, ptr, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_6(sme_umopa_d, TCG_CALL_NO_RWG,
35
+ void, ptr, ptr, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_6(sme_sumopa_d, TCG_CALL_NO_RWG,
37
+ void, ptr, ptr, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_6(sme_usmopa_d, TCG_CALL_NO_RWG,
39
+ void, ptr, ptr, ptr, ptr, ptr, i32)
40
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
41
index XXXXXXX..XXXXXXX 100644
42
--- a/target/arm/sme.decode
43
+++ b/target/arm/sme.decode
44
@@ -XXX,XX +XXX,XX @@ FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
45
46
BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
47
FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32
29
+
48
+
30
+DEF_HELPER_FLAGS_4(gvec_uabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
49
+SMOPA_s 1010000 0 10 0 ..... ... ... ..... . 00 .. @op_32
31
+DEF_HELPER_FLAGS_4(gvec_uabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
50
+SUMOPA_s 1010000 0 10 1 ..... ... ... ..... . 00 .. @op_32
32
+DEF_HELPER_FLAGS_4(gvec_uabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
51
+USMOPA_s 1010000 1 10 0 ..... ... ... ..... . 00 .. @op_32
33
+DEF_HELPER_FLAGS_4(gvec_uabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
52
+UMOPA_s 1010000 1 10 1 ..... ... ... ..... . 00 .. @op_32
34
+
53
+
35
#ifdef TARGET_AARCH64
54
+SMOPA_d 1010000 0 11 0 ..... ... ... ..... . 0 ... @op_64
36
#include "helper-a64.h"
55
+SUMOPA_d 1010000 0 11 1 ..... ... ... ..... . 0 ... @op_64
37
#include "helper-sve.h"
56
+USMOPA_d 1010000 1 11 0 ..... ... ... ..... . 0 ... @op_64
38
diff --git a/target/arm/translate.h b/target/arm/translate.h
57
+UMOPA_d 1010000 1 11 1 ..... ... ... ..... . 0 ... @op_64
58
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
39
index XXXXXXX..XXXXXXX 100644
59
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/translate.h
60
--- a/target/arm/sme_helper.c
41
+++ b/target/arm/translate.h
61
+++ b/target/arm/sme_helper.c
42
@@ -XXX,XX +XXX,XX @@ void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
62
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
43
void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
63
} while (row & 15);
44
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
64
}
45
65
}
46
+void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
47
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
48
+void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
49
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
50
+
66
+
51
/*
67
+typedef uint64_t IMOPFn(uint64_t, uint64_t, uint64_t, uint8_t, bool);
52
* Forward to the isar_feature_* tests given a DisasContext pointer.
68
+
53
*/
69
+static inline void do_imopa(uint64_t *za, uint64_t *zn, uint64_t *zm,
54
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
70
+ uint8_t *pn, uint8_t *pm,
55
index XXXXXXX..XXXXXXX 100644
71
+ uint32_t desc, IMOPFn *fn)
56
--- a/target/arm/translate-a64.c
72
+{
57
+++ b/target/arm/translate-a64.c
73
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
58
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
74
+ bool neg = simd_data(desc);
59
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
75
+
60
}
76
+ for (row = 0; row < oprsz; ++row) {
61
return;
77
+ uint8_t pa = pn[H1(row)];
62
+ case 0xe: /* SABD, UABD */
78
+ uint64_t *za_row = &za[tile_vslice_index(row)];
63
+ if (u) {
79
+ uint64_t n = zn[row];
64
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size);
80
+
65
+ } else {
81
+ for (col = 0; col < oprsz; ++col) {
66
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
82
+ uint8_t pb = pm[H1(col)];
83
+ uint64_t *a = &za_row[col];
84
+
85
+ *a = fn(n, zm[col], *a, pa & pb, neg);
67
+ }
86
+ }
68
+ return;
87
+ }
69
case 0x10: /* ADD, SUB */
70
if (u) {
71
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
72
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
73
genenvfn = fns[size][u];
74
break;
75
}
76
- case 0xe: /* SABD, UABD */
77
case 0xf: /* SABA, UABA */
78
{
79
static NeonGenTwoOpFn * const fns[3][2] = {
80
diff --git a/target/arm/translate.c b/target/arm/translate.c
81
index XXXXXXX..XXXXXXX 100644
82
--- a/target/arm/translate.c
83
+++ b/target/arm/translate.c
84
@@ -XXX,XX +XXX,XX @@ void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
85
rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
86
}
87
88
+static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
89
+{
90
+ TCGv_i32 t = tcg_temp_new_i32();
91
+
92
+ tcg_gen_sub_i32(t, a, b);
93
+ tcg_gen_sub_i32(d, b, a);
94
+ tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
95
+ tcg_temp_free_i32(t);
96
+}
88
+}
97
+
89
+
98
+static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
90
+#define DEF_IMOP_32(NAME, NTYPE, MTYPE) \
99
+{
91
+static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
100
+ TCGv_i64 t = tcg_temp_new_i64();
92
+{ \
101
+
93
+ uint32_t sum0 = 0, sum1 = 0; \
102
+ tcg_gen_sub_i64(t, a, b);
94
+ /* Apply P to N as a mask, making the inactive elements 0. */ \
103
+ tcg_gen_sub_i64(d, b, a);
95
+ n &= expand_pred_b(p); \
104
+ tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
96
+ sum0 += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
105
+ tcg_temp_free_i64(t);
97
+ sum0 += (NTYPE)(n >> 8) * (MTYPE)(m >> 8); \
98
+ sum0 += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
99
+ sum0 += (NTYPE)(n >> 24) * (MTYPE)(m >> 24); \
100
+ sum1 += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
101
+ sum1 += (NTYPE)(n >> 40) * (MTYPE)(m >> 40); \
102
+ sum1 += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
103
+ sum1 += (NTYPE)(n >> 56) * (MTYPE)(m >> 56); \
104
+ if (neg) { \
105
+ sum0 = (uint32_t)a - sum0, sum1 = (uint32_t)(a >> 32) - sum1; \
106
+ } else { \
107
+ sum0 = (uint32_t)a + sum0, sum1 = (uint32_t)(a >> 32) + sum1; \
108
+ } \
109
+ return ((uint64_t)sum1 << 32) | sum0; \
106
+}
110
+}
107
+
111
+
108
+static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
112
+#define DEF_IMOP_64(NAME, NTYPE, MTYPE) \
109
+{
113
+static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
110
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
114
+{ \
111
+
115
+ uint64_t sum = 0; \
112
+ tcg_gen_smin_vec(vece, t, a, b);
116
+ /* Apply P to N as a mask, making the inactive elements 0. */ \
113
+ tcg_gen_smax_vec(vece, d, a, b);
117
+ n &= expand_pred_h(p); \
114
+ tcg_gen_sub_vec(vece, d, d, t);
118
+ sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
115
+ tcg_temp_free_vec(t);
119
+ sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
120
+ sum += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
121
+ sum += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
122
+ return neg ? a - sum : a + sum; \
116
+}
123
+}
117
+
124
+
118
+void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
125
+DEF_IMOP_32(smopa_s, int8_t, int8_t)
119
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
126
+DEF_IMOP_32(umopa_s, uint8_t, uint8_t)
120
+{
127
+DEF_IMOP_32(sumopa_s, int8_t, uint8_t)
121
+ static const TCGOpcode vecop_list[] = {
128
+DEF_IMOP_32(usmopa_s, uint8_t, int8_t)
122
+ INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
123
+ };
124
+ static const GVecGen3 ops[4] = {
125
+ { .fniv = gen_sabd_vec,
126
+ .fno = gen_helper_gvec_sabd_b,
127
+ .opt_opc = vecop_list,
128
+ .vece = MO_8 },
129
+ { .fniv = gen_sabd_vec,
130
+ .fno = gen_helper_gvec_sabd_h,
131
+ .opt_opc = vecop_list,
132
+ .vece = MO_16 },
133
+ { .fni4 = gen_sabd_i32,
134
+ .fniv = gen_sabd_vec,
135
+ .fno = gen_helper_gvec_sabd_s,
136
+ .opt_opc = vecop_list,
137
+ .vece = MO_32 },
138
+ { .fni8 = gen_sabd_i64,
139
+ .fniv = gen_sabd_vec,
140
+ .fno = gen_helper_gvec_sabd_d,
141
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
142
+ .opt_opc = vecop_list,
143
+ .vece = MO_64 },
144
+ };
145
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
146
+}
147
+
129
+
148
+static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
130
+DEF_IMOP_64(smopa_d, int16_t, int16_t)
149
+{
131
+DEF_IMOP_64(umopa_d, uint16_t, uint16_t)
150
+ TCGv_i32 t = tcg_temp_new_i32();
132
+DEF_IMOP_64(sumopa_d, int16_t, uint16_t)
133
+DEF_IMOP_64(usmopa_d, uint16_t, int16_t)
151
+
134
+
152
+ tcg_gen_sub_i32(t, a, b);
135
+#define DEF_IMOPH(NAME) \
153
+ tcg_gen_sub_i32(d, b, a);
136
+ void HELPER(sme_##NAME)(void *vza, void *vzn, void *vzm, void *vpn, \
154
+ tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
137
+ void *vpm, uint32_t desc) \
155
+ tcg_temp_free_i32(t);
138
+ { do_imopa(vza, vzn, vzm, vpn, vpm, desc, NAME); }
156
+}
157
+
139
+
158
+static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
140
+DEF_IMOPH(smopa_s)
159
+{
141
+DEF_IMOPH(umopa_s)
160
+ TCGv_i64 t = tcg_temp_new_i64();
142
+DEF_IMOPH(sumopa_s)
143
+DEF_IMOPH(usmopa_s)
144
+DEF_IMOPH(smopa_d)
145
+DEF_IMOPH(umopa_d)
146
+DEF_IMOPH(sumopa_d)
147
+DEF_IMOPH(usmopa_d)
148
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
149
index XXXXXXX..XXXXXXX 100644
150
--- a/target/arm/translate-sme.c
151
+++ b/target/arm/translate-sme.c
152
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_f
153
154
/* TODO: FEAT_EBF16 */
155
TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
161
+
156
+
162
+ tcg_gen_sub_i64(t, a, b);
157
+TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s)
163
+ tcg_gen_sub_i64(d, b, a);
158
+TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s)
164
+ tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
159
+TRANS_FEAT(SUMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_sumopa_s)
165
+ tcg_temp_free_i64(t);
160
+TRANS_FEAT(USMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_usmopa_s)
166
+}
167
+
161
+
168
+static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
162
+TRANS_FEAT(SMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_smopa_d)
169
+{
163
+TRANS_FEAT(UMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_umopa_d)
170
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
164
+TRANS_FEAT(SUMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_sumopa_d)
171
+
165
+TRANS_FEAT(USMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_usmopa_d)
172
+ tcg_gen_umin_vec(vece, t, a, b);
173
+ tcg_gen_umax_vec(vece, d, a, b);
174
+ tcg_gen_sub_vec(vece, d, d, t);
175
+ tcg_temp_free_vec(t);
176
+}
177
+
178
+void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
179
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
180
+{
181
+ static const TCGOpcode vecop_list[] = {
182
+ INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
183
+ };
184
+ static const GVecGen3 ops[4] = {
185
+ { .fniv = gen_uabd_vec,
186
+ .fno = gen_helper_gvec_uabd_b,
187
+ .opt_opc = vecop_list,
188
+ .vece = MO_8 },
189
+ { .fniv = gen_uabd_vec,
190
+ .fno = gen_helper_gvec_uabd_h,
191
+ .opt_opc = vecop_list,
192
+ .vece = MO_16 },
193
+ { .fni4 = gen_uabd_i32,
194
+ .fniv = gen_uabd_vec,
195
+ .fno = gen_helper_gvec_uabd_s,
196
+ .opt_opc = vecop_list,
197
+ .vece = MO_32 },
198
+ { .fni8 = gen_uabd_i64,
199
+ .fniv = gen_uabd_vec,
200
+ .fno = gen_helper_gvec_uabd_d,
201
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
202
+ .opt_opc = vecop_list,
203
+ .vece = MO_64 },
204
+ };
205
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
206
+}
207
+
208
/* Translate a NEON data processing instruction. Return nonzero if the
209
instruction is invalid.
210
We process data in a mixture of 32-bit and 64-bit chunks.
211
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
212
}
213
return 1;
214
215
+ case NEON_3R_VABD:
216
+ if (u) {
217
+ gen_gvec_uabd(size, rd_ofs, rn_ofs, rm_ofs,
218
+ vec_size, vec_size);
219
+ } else {
220
+ gen_gvec_sabd(size, rd_ofs, rn_ofs, rm_ofs,
221
+ vec_size, vec_size);
222
+ }
223
+ return 0;
224
+
225
case NEON_3R_VADD_VSUB:
226
case NEON_3R_LOGIC:
227
case NEON_3R_VMAX:
228
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
229
case NEON_3R_VQRSHL:
230
GEN_NEON_INTEGER_OP_ENV(qrshl);
231
break;
232
- case NEON_3R_VABD:
233
- GEN_NEON_INTEGER_OP(abd);
234
- break;
235
case NEON_3R_VABA:
236
GEN_NEON_INTEGER_OP(abd);
237
tcg_temp_free_i32(tmp2);
238
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
239
index XXXXXXX..XXXXXXX 100644
240
--- a/target/arm/vec_helper.c
241
+++ b/target/arm/vec_helper.c
242
@@ -XXX,XX +XXX,XX @@ DO_CMP0(gvec_cgt0_h, int16_t, >)
243
DO_CMP0(gvec_cge0_h, int16_t, >=)
244
245
#undef DO_CMP0
246
+
247
+#define DO_ABD(NAME, TYPE) \
248
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
249
+{ \
250
+ intptr_t i, opr_sz = simd_oprsz(desc); \
251
+ TYPE *d = vd, *n = vn, *m = vm; \
252
+ \
253
+ for (i = 0; i < opr_sz / sizeof(TYPE); ++i) { \
254
+ d[i] = n[i] < m[i] ? m[i] - n[i] : n[i] - m[i]; \
255
+ } \
256
+ clear_tail(d, opr_sz, simd_maxsz(desc)); \
257
+}
258
+
259
+DO_ABD(gvec_sabd_b, int8_t)
260
+DO_ABD(gvec_sabd_h, int16_t)
261
+DO_ABD(gvec_sabd_s, int32_t)
262
+DO_ABD(gvec_sabd_d, int64_t)
263
+
264
+DO_ABD(gvec_uabd_b, uint8_t)
265
+DO_ABD(gvec_uabd_h, uint16_t)
266
+DO_ABD(gvec_uabd_s, uint32_t)
267
+DO_ABD(gvec_uabd_d, uint64_t)
268
+
269
+#undef DO_ABD
270
--
166
--
271
2.20.1
167
2.25.1
272
273
diff view generated by jsdifflib
1
Convert the Neon fp VMAX/VMIN/VMAXNM/VMINNM/VRECPS/VRSQRTS 3-reg-same
1
From: Richard Henderson <richard.henderson@linaro.org>
2
insns to decodetree. (These are all the remaining non-accumulation
3
instructions in this group.)
4
2
3
This is an SVE instruction that operates using the SVE vector
4
length but that it is present only if SME is implemented.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-29-richard.henderson@linaro.org
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20200512163904.10918-17-peter.maydell@linaro.org
8
---
10
---
9
target/arm/neon-dp.decode | 6 +++
11
target/arm/sve.decode | 20 +++++++++++++
10
target/arm/translate-neon.inc.c | 70 +++++++++++++++++++++++++++++++++
12
target/arm/translate-sve.c | 57 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate.c | 42 +-------------------
13
2 files changed, 77 insertions(+)
12
3 files changed, 78 insertions(+), 40 deletions(-)
13
14
14
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
15
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/neon-dp.decode
17
--- a/target/arm/sve.decode
17
+++ b/target/arm/neon-dp.decode
18
+++ b/target/arm/sve.decode
18
@@ -XXX,XX +XXX,XX @@ VCGE_fp_3s 1111 001 1 0 . 0 . .... .... 1110 ... 0 .... @3same_fp
19
@@ -XXX,XX +XXX,XX @@ BFMLALT_zzxw 01100100 11 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2
19
VACGE_fp_3s 1111 001 1 0 . 0 . .... .... 1110 ... 1 .... @3same_fp
20
20
VCGT_fp_3s 1111 001 1 0 . 1 . .... .... 1110 ... 0 .... @3same_fp
21
### SVE2 floating-point bfloat16 dot-product (indexed)
21
VACGT_fp_3s 1111 001 1 0 . 1 . .... .... 1110 ... 1 .... @3same_fp
22
BFDOT_zzxz 01100100 01 1 ..... 010000 ..... ..... @rrxr_2 esz=2
22
+VMAX_fp_3s 1111 001 0 0 . 0 . .... .... 1111 ... 0 .... @3same_fp
23
+
23
+VMIN_fp_3s 1111 001 0 0 . 1 . .... .... 1111 ... 0 .... @3same_fp
24
+### SVE broadcast predicate element
24
VPMAX_fp_3s 1111 001 1 0 . 0 . .... .... 1111 ... 0 .... @3same_fp_q0
25
+
25
VPMIN_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 0 .... @3same_fp_q0
26
+&psel esz pd pn pm rv imm
26
+VRECPS_fp_3s 1111 001 0 0 . 0 . .... .... 1111 ... 1 .... @3same_fp
27
+%psel_rv 16:2 !function=plus_12
27
+VRSQRTS_fp_3s 1111 001 0 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
28
+%psel_imm_b 22:2 19:2
28
+VMAXNM_fp_3s 1111 001 1 0 . 0 . .... .... 1111 ... 1 .... @3same_fp
29
+%psel_imm_h 22:2 20:1
29
+VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
30
+%psel_imm_s 22:2
30
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
31
+%psel_imm_d 23:1
32
+@psel ........ .. . ... .. .. pn:4 . pm:4 . pd:4 \
33
+ &psel rv=%psel_rv
34
+
35
+PSEL 00100101 .. 1 ..1 .. 01 .... 0 .... 0 .... \
36
+ @psel esz=0 imm=%psel_imm_b
37
+PSEL 00100101 .. 1 .10 .. 01 .... 0 .... 0 .... \
38
+ @psel esz=1 imm=%psel_imm_h
39
+PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \
40
+ @psel esz=2 imm=%psel_imm_s
41
+PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \
42
+ @psel esz=3 imm=%psel_imm_d
43
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
31
index XXXXXXX..XXXXXXX 100644
44
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/translate-neon.inc.c
45
--- a/target/arm/translate-sve.c
33
+++ b/target/arm/translate-neon.inc.c
46
+++ b/target/arm/translate-sve.c
34
@@ -XXX,XX +XXX,XX @@ DO_3S_FP(VCGE, gen_helper_neon_cge_f32, false)
47
@@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
35
DO_3S_FP(VCGT, gen_helper_neon_cgt_f32, false)
48
36
DO_3S_FP(VACGE, gen_helper_neon_acge_f32, false)
49
TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
37
DO_3S_FP(VACGT, gen_helper_neon_acgt_f32, false)
50
TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)
38
+DO_3S_FP(VMAX, gen_helper_vfp_maxs, false)
51
+
39
+DO_3S_FP(VMIN, gen_helper_vfp_mins, false)
52
+static bool trans_PSEL(DisasContext *s, arg_psel *a)
40
41
static void gen_VMLA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
42
TCGv_ptr fpstatus)
43
@@ -XXX,XX +XXX,XX @@ static void gen_VMLS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
44
DO_3S_FP(VMLA, gen_VMLA_fp_3s, true)
45
DO_3S_FP(VMLS, gen_VMLS_fp_3s, true)
46
47
+static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
48
+{
53
+{
49
+ if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
54
+ int vl = vec_full_reg_size(s);
55
+ int pl = pred_gvec_reg_size(s);
56
+ int elements = vl >> a->esz;
57
+ TCGv_i64 tmp, didx, dbit;
58
+ TCGv_ptr ptr;
59
+
60
+ if (!dc_isar_feature(aa64_sme, s)) {
50
+ return false;
61
+ return false;
51
+ }
62
+ }
52
+
63
+ if (!sve_access_check(s)) {
53
+ if (a->size != 0) {
64
+ return true;
54
+ /* TODO fp16 support */
55
+ return false;
56
+ }
65
+ }
57
+
66
+
58
+ return do_3same_fp(s, a, gen_helper_vfp_maxnums, false);
67
+ tmp = tcg_temp_new_i64();
59
+}
68
+ dbit = tcg_temp_new_i64();
69
+ didx = tcg_temp_new_i64();
70
+ ptr = tcg_temp_new_ptr();
60
+
71
+
61
+static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
72
+ /* Compute the predicate element. */
62
+{
73
+ tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm);
63
+ if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
74
+ if (is_power_of_2(elements)) {
64
+ return false;
75
+ tcg_gen_andi_i64(tmp, tmp, elements - 1);
76
+ } else {
77
+ tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements));
65
+ }
78
+ }
66
+
79
+
67
+ if (a->size != 0) {
80
+ /* Extract the predicate byte and bit indices. */
68
+ /* TODO fp16 support */
81
+ tcg_gen_shli_i64(tmp, tmp, a->esz);
69
+ return false;
82
+ tcg_gen_andi_i64(dbit, tmp, 7);
83
+ tcg_gen_shri_i64(didx, tmp, 3);
84
+ if (HOST_BIG_ENDIAN) {
85
+ tcg_gen_xori_i64(didx, didx, 7);
70
+ }
86
+ }
71
+
87
+
72
+ return do_3same_fp(s, a, gen_helper_vfp_minnums, false);
88
+ /* Load the predicate word. */
89
+ tcg_gen_trunc_i64_ptr(ptr, didx);
90
+ tcg_gen_add_ptr(ptr, ptr, cpu_env);
91
+ tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm));
92
+
93
+ /* Extract the predicate bit and replicate to MO_64. */
94
+ tcg_gen_shr_i64(tmp, tmp, dbit);
95
+ tcg_gen_andi_i64(tmp, tmp, 1);
96
+ tcg_gen_neg_i64(tmp, tmp);
97
+
98
+ /* Apply to either copy the source, or write zeros. */
99
+ tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd),
100
+ pred_full_reg_offset(s, a->pn), tmp, pl, pl);
101
+
102
+ tcg_temp_free_i64(tmp);
103
+ tcg_temp_free_i64(dbit);
104
+ tcg_temp_free_i64(didx);
105
+ tcg_temp_free_ptr(ptr);
106
+ return true;
73
+}
107
+}
74
+
75
+WRAP_ENV_FN(gen_VRECPS_tramp, gen_helper_recps_f32)
76
+
77
+static void gen_VRECPS_fp_3s(unsigned vece, uint32_t rd_ofs,
78
+ uint32_t rn_ofs, uint32_t rm_ofs,
79
+ uint32_t oprsz, uint32_t maxsz)
80
+{
81
+ static const GVecGen3 ops = { .fni4 = gen_VRECPS_tramp };
82
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops);
83
+}
84
+
85
+static bool trans_VRECPS_fp_3s(DisasContext *s, arg_3same *a)
86
+{
87
+ if (a->size != 0) {
88
+ /* TODO fp16 support */
89
+ return false;
90
+ }
91
+
92
+ return do_3same(s, a, gen_VRECPS_fp_3s);
93
+}
94
+
95
+WRAP_ENV_FN(gen_VRSQRTS_tramp, gen_helper_rsqrts_f32)
96
+
97
+static void gen_VRSQRTS_fp_3s(unsigned vece, uint32_t rd_ofs,
98
+ uint32_t rn_ofs, uint32_t rm_ofs,
99
+ uint32_t oprsz, uint32_t maxsz)
100
+{
101
+ static const GVecGen3 ops = { .fni4 = gen_VRSQRTS_tramp };
102
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops);
103
+}
104
+
105
+static bool trans_VRSQRTS_fp_3s(DisasContext *s, arg_3same *a)
106
+{
107
+ if (a->size != 0) {
108
+ /* TODO fp16 support */
109
+ return false;
110
+ }
111
+
112
+ return do_3same(s, a, gen_VRSQRTS_fp_3s);
113
+}
114
+
115
static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
116
{
117
/* FP operations handled pairwise 32 bits at a time */
118
diff --git a/target/arm/translate.c b/target/arm/translate.c
119
index XXXXXXX..XXXXXXX 100644
120
--- a/target/arm/translate.c
121
+++ b/target/arm/translate.c
122
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
123
case NEON_3R_FLOAT_MULTIPLY:
124
case NEON_3R_FLOAT_CMP:
125
case NEON_3R_FLOAT_ACMP:
126
+ case NEON_3R_FLOAT_MINMAX:
127
+ case NEON_3R_FLOAT_MISC:
128
/* Already handled by decodetree */
129
return 1;
130
}
131
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
132
return 1;
133
}
134
switch (op) {
135
- case NEON_3R_FLOAT_MINMAX:
136
- if (u) {
137
- return 1; /* VPMIN/VPMAX handled by decodetree */
138
- }
139
- break;
140
- case NEON_3R_FLOAT_MISC:
141
- /* VMAXNM/VMINNM in ARMv8 */
142
- if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
143
- return 1;
144
- }
145
- break;
146
case NEON_3R_VFM_VQRDMLSH:
147
if (!dc_isar_feature(aa32_simdfmac, s)) {
148
return 1;
149
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
150
tmp = neon_load_reg(rn, pass);
151
tmp2 = neon_load_reg(rm, pass);
152
switch (op) {
153
- case NEON_3R_FLOAT_MINMAX:
154
- {
155
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
156
- if (size == 0) {
157
- gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
158
- } else {
159
- gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
160
- }
161
- tcg_temp_free_ptr(fpstatus);
162
- break;
163
- }
164
- case NEON_3R_FLOAT_MISC:
165
- if (u) {
166
- /* VMAXNM/VMINNM */
167
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
168
- if (size == 0) {
169
- gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
170
- } else {
171
- gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
172
- }
173
- tcg_temp_free_ptr(fpstatus);
174
- } else {
175
- if (size == 0) {
176
- gen_helper_recps_f32(tmp, cpu_env, tmp, tmp2);
177
- } else {
178
- gen_helper_rsqrts_f32(tmp, cpu_env, tmp, tmp2);
179
- }
180
- }
181
- break;
182
case NEON_3R_VFM_VQRDMLSH:
183
{
184
/* VFMA, VFMS: fused multiply-add */
185
--
108
--
186
2.20.1
109
2.25.1
187
188
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Provide a functional interface for the vector expansion.
3
This is an SVE instruction that operates using the SVE vector
4
This fits better with the existing set of helpers that
4
length but that it is present only if SME is implemented.
5
we provide for other operations.
6
5
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20200513163245.17915-13-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-30-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
10
---
12
target/arm/translate.h | 5 ++++
11
target/arm/helper-sve.h | 2 ++
13
target/arm/translate-a64.c | 34 ++----------------------
12
target/arm/sve.decode | 1 +
14
target/arm/translate.c | 54 +++++++++++++++++++-------------------
13
target/arm/sve_helper.c | 16 ++++++++++++++++
15
3 files changed, 34 insertions(+), 59 deletions(-)
14
target/arm/translate-sve.c | 2 ++
15
4 files changed, 21 insertions(+)
16
16
17
diff --git a/target/arm/translate.h b/target/arm/translate.h
17
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
18
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/translate.h
19
--- a/target/arm/helper-sve.h
20
+++ b/target/arm/translate.h
20
+++ b/target/arm/helper-sve.h
21
@@ -XXX,XX +XXX,XX @@ void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_revh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
22
void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
22
23
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
23
DEF_HELPER_FLAGS_4(sve_revw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
24
24
25
+void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
25
+DEF_HELPER_FLAGS_4(sme_revd_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
26
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
27
+void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
28
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
29
+
26
+
30
/*
27
DEF_HELPER_FLAGS_4(sve_rbit_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
31
* Forward to the isar_feature_* tests given a DisasContext pointer.
28
DEF_HELPER_FLAGS_4(sve_rbit_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
32
*/
29
DEF_HELPER_FLAGS_4(sve_rbit_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
33
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
30
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
34
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
35
--- a/target/arm/translate-a64.c
32
--- a/target/arm/sve.decode
36
+++ b/target/arm/translate-a64.c
33
+++ b/target/arm/sve.decode
37
@@ -XXX,XX +XXX,XX @@ static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
34
@@ -XXX,XX +XXX,XX @@ REVB 00000101 .. 1001 00 100 ... ..... ..... @rd_pg_rn
38
is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
35
REVH 00000101 .. 1001 01 100 ... ..... ..... @rd_pg_rn
39
}
36
REVW 00000101 .. 1001 10 100 ... ..... ..... @rd_pg_rn
40
37
RBIT 00000101 .. 1001 11 100 ... ..... ..... @rd_pg_rn
41
-/* Expand a 3-operand + env pointer operation using
38
+REVD 00000101 00 1011 10 100 ... ..... ..... @rd_pg_rn_e0
42
- * an out-of-line helper.
39
43
- */
40
# SVE vector splice (predicated, destructive)
44
-static void gen_gvec_op3_env(DisasContext *s, bool is_q, int rd,
41
SPLICE 00000101 .. 101 100 100 ... ..... ..... @rdn_pg_rm
45
- int rn, int rm, gen_helper_gvec_3_ptr *fn)
42
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
46
-{
47
- tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
48
- vec_full_reg_offset(s, rn),
49
- vec_full_reg_offset(s, rm), cpu_env,
50
- is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
51
-}
52
-
53
/* Expand a 3-operand + fpstatus pointer + simd data value operation using
54
* an out-of-line helper.
55
*/
56
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
57
58
switch (opcode) {
59
case 0x0: /* SQRDMLAH (vector) */
60
- switch (size) {
61
- case 1:
62
- gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s16);
63
- break;
64
- case 2:
65
- gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s32);
66
- break;
67
- default:
68
- g_assert_not_reached();
69
- }
70
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size);
71
return;
72
73
case 0x1: /* SQRDMLSH (vector) */
74
- switch (size) {
75
- case 1:
76
- gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s16);
77
- break;
78
- case 2:
79
- gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s32);
80
- break;
81
- default:
82
- g_assert_not_reached();
83
- }
84
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size);
85
return;
86
87
case 0x2: /* SDOT / UDOT */
88
diff --git a/target/arm/translate.c b/target/arm/translate.c
89
index XXXXXXX..XXXXXXX 100644
43
index XXXXXXX..XXXXXXX 100644
90
--- a/target/arm/translate.c
44
--- a/target/arm/sve_helper.c
91
+++ b/target/arm/translate.c
45
+++ b/target/arm/sve_helper.c
92
@@ -XXX,XX +XXX,XX @@ static const uint8_t neon_2rm_sizes[] = {
46
@@ -XXX,XX +XXX,XX @@ DO_ZPZ_D(sve_revh_d, uint64_t, hswap64)
93
[NEON_2RM_VCVT_UF] = 0x4,
47
94
};
48
DO_ZPZ_D(sve_revw_d, uint64_t, wswap64)
95
49
96
-
50
+void HELPER(sme_revd_q)(void *vd, void *vn, void *vg, uint32_t desc)
97
-/* Expand v8.1 simd helper. */
51
+{
98
-static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
52
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
99
- int q, int rd, int rn, int rm)
53
+ uint64_t *d = vd, *n = vn;
100
+void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
54
+ uint8_t *pg = vg;
101
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
55
+
102
{
56
+ for (i = 0; i < opr_sz; i += 2) {
103
- if (dc_isar_feature(aa32_rdm, s)) {
57
+ if (pg[H1(i)] & 1) {
104
- int opr_sz = (1 + q) * 8;
58
+ uint64_t n0 = n[i + 0];
105
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
59
+ uint64_t n1 = n[i + 1];
106
- vfp_reg_offset(1, rn),
60
+ d[i + 0] = n1;
107
- vfp_reg_offset(1, rm), cpu_env,
61
+ d[i + 1] = n0;
108
- opr_sz, opr_sz, 0, fn);
62
+ }
109
- return 0;
63
+ }
110
- }
111
- return 1;
112
+ static gen_helper_gvec_3_ptr * const fns[2] = {
113
+ gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
114
+ };
115
+ tcg_debug_assert(vece >= 1 && vece <= 2);
116
+ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, cpu_env,
117
+ opr_sz, max_sz, 0, fns[vece - 1]);
118
+}
64
+}
119
+
65
+
120
+void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
66
DO_ZPZ(sve_rbit_b, uint8_t, H1, revbit8)
121
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
67
DO_ZPZ(sve_rbit_h, uint16_t, H1_2, revbit16)
122
+{
68
DO_ZPZ(sve_rbit_s, uint32_t, H1_4, revbit32)
123
+ static gen_helper_gvec_3_ptr * const fns[2] = {
69
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
124
+ gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
70
index XXXXXXX..XXXXXXX 100644
125
+ };
71
--- a/target/arm/translate-sve.c
126
+ tcg_debug_assert(vece >= 1 && vece <= 2);
72
+++ b/target/arm/translate-sve.c
127
+ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, cpu_env,
73
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
128
+ opr_sz, max_sz, 0, fns[vece - 1]);
74
TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
129
}
75
a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
130
76
131
#define GEN_CMP0(NAME, COND) \
77
+TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0)
132
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
78
+
133
break; /* VPADD */
79
TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
134
}
80
gen_helper_sve_splice, a, a->esz)
135
/* VQRDMLAH */
136
- switch (size) {
137
- case 1:
138
- return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,
139
- q, rd, rn, rm);
140
- case 2:
141
- return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,
142
- q, rd, rn, rm);
143
+ if (dc_isar_feature(aa32_rdm, s) && (size == 1 || size == 2)) {
144
+ gen_gvec_sqrdmlah_qc(size, rd_ofs, rn_ofs, rm_ofs,
145
+ vec_size, vec_size);
146
+ return 0;
147
}
148
return 1;
149
150
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
151
break;
152
}
153
/* VQRDMLSH */
154
- switch (size) {
155
- case 1:
156
- return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16,
157
- q, rd, rn, rm);
158
- case 2:
159
- return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32,
160
- q, rd, rn, rm);
161
+ if (dc_isar_feature(aa32_rdm, s) && (size == 1 || size == 2)) {
162
+ gen_gvec_sqrdmlsh_qc(size, rd_ofs, rn_ofs, rm_ofs,
163
+ vec_size, vec_size);
164
+ return 0;
165
}
166
return 1;
167
81
168
--
82
--
169
2.20.1
83
2.25.1
170
171
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Include 64-bit element size in preparation for SVE2.
3
This is an SVE instruction that operates using the SVE vector
4
length but that it is present only if SME is implemented.
4
5
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20200513163245.17915-17-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-31-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
10
---
10
target/arm/helper.h | 17 +++--
11
target/arm/helper.h | 18 +++++++
11
target/arm/translate.h | 5 ++
12
target/arm/sve.decode | 5 ++
12
target/arm/neon_helper.c | 10 ---
13
target/arm/translate-sve.c | 102 +++++++++++++++++++++++++++++++++++++
13
target/arm/translate-a64.c | 17 ++---
14
target/arm/vec_helper.c | 24 +++++++++
14
target/arm/translate.c | 134 +++++++++++++++++++++++++++++++++++--
15
4 files changed, 149 insertions(+)
15
target/arm/vec_helper.c | 24 +++++++
16
6 files changed, 174 insertions(+), 33 deletions(-)
17
16
18
diff --git a/target/arm/helper.h b/target/arm/helper.h
17
diff --git a/target/arm/helper.h b/target/arm/helper.h
19
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/helper.h
19
--- a/target/arm/helper.h
21
+++ b/target/arm/helper.h
20
+++ b/target/arm/helper.h
22
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_2(neon_pmax_s8, i32, i32, i32)
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG,
23
DEF_HELPER_2(neon_pmax_u16, i32, i32, i32)
22
DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG,
24
DEF_HELPER_2(neon_pmax_s16, i32, i32, i32)
23
void, ptr, ptr, ptr, ptr, ptr, i32)
25
24
26
-DEF_HELPER_2(neon_abd_u8, i32, i32, i32)
25
+DEF_HELPER_FLAGS_5(gvec_sclamp_b, TCG_CALL_NO_RWG,
27
-DEF_HELPER_2(neon_abd_s8, i32, i32, i32)
26
+ void, ptr, ptr, ptr, ptr, i32)
28
-DEF_HELPER_2(neon_abd_u16, i32, i32, i32)
27
+DEF_HELPER_FLAGS_5(gvec_sclamp_h, TCG_CALL_NO_RWG,
29
-DEF_HELPER_2(neon_abd_s16, i32, i32, i32)
28
+ void, ptr, ptr, ptr, ptr, i32)
30
-DEF_HELPER_2(neon_abd_u32, i32, i32, i32)
29
+DEF_HELPER_FLAGS_5(gvec_sclamp_s, TCG_CALL_NO_RWG,
31
-DEF_HELPER_2(neon_abd_s32, i32, i32, i32)
30
+ void, ptr, ptr, ptr, ptr, i32)
32
-
31
+DEF_HELPER_FLAGS_5(gvec_sclamp_d, TCG_CALL_NO_RWG,
33
DEF_HELPER_2(neon_shl_u16, i32, i32, i32)
32
+ void, ptr, ptr, ptr, ptr, i32)
34
DEF_HELPER_2(neon_shl_s16, i32, i32, i32)
33
+
35
DEF_HELPER_2(neon_rshl_u8, i32, i32, i32)
34
+DEF_HELPER_FLAGS_5(gvec_uclamp_b, TCG_CALL_NO_RWG,
36
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_uabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
35
+ void, ptr, ptr, ptr, ptr, i32)
37
DEF_HELPER_FLAGS_4(gvec_uabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_5(gvec_uclamp_h, TCG_CALL_NO_RWG,
38
DEF_HELPER_FLAGS_4(gvec_uabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
37
+ void, ptr, ptr, ptr, ptr, i32)
39
38
+DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG,
40
+DEF_HELPER_FLAGS_4(gvec_saba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
39
+ void, ptr, ptr, ptr, ptr, i32)
41
+DEF_HELPER_FLAGS_4(gvec_saba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG,
42
+DEF_HELPER_FLAGS_4(gvec_saba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
41
+ void, ptr, ptr, ptr, ptr, i32)
43
+DEF_HELPER_FLAGS_4(gvec_saba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
44
+
45
+DEF_HELPER_FLAGS_4(gvec_uaba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
46
+DEF_HELPER_FLAGS_4(gvec_uaba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
47
+DEF_HELPER_FLAGS_4(gvec_uaba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
48
+DEF_HELPER_FLAGS_4(gvec_uaba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
49
+
42
+
50
#ifdef TARGET_AARCH64
43
#ifdef TARGET_AARCH64
51
#include "helper-a64.h"
44
#include "helper-a64.h"
52
#include "helper-sve.h"
45
#include "helper-sve.h"
53
diff --git a/target/arm/translate.h b/target/arm/translate.h
46
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
54
index XXXXXXX..XXXXXXX 100644
47
index XXXXXXX..XXXXXXX 100644
55
--- a/target/arm/translate.h
48
--- a/target/arm/sve.decode
56
+++ b/target/arm/translate.h
49
+++ b/target/arm/sve.decode
57
@@ -XXX,XX +XXX,XX @@ void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
50
@@ -XXX,XX +XXX,XX @@ PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \
58
void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
51
@psel esz=2 imm=%psel_imm_s
59
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
52
PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \
60
53
@psel esz=3 imm=%psel_imm_d
61
+void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
54
+
62
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
55
+### SVE clamp
63
+void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
56
+
64
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
57
+SCLAMP 01000100 .. 0 ..... 110000 ..... ..... @rda_rn_rm
65
+
58
+UCLAMP 01000100 .. 0 ..... 110001 ..... ..... @rda_rn_rm
66
/*
59
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
67
* Forward to the isar_feature_* tests given a DisasContext pointer.
60
index XXXXXXX..XXXXXXX 100644
68
*/
61
--- a/target/arm/translate-sve.c
69
diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c
62
+++ b/target/arm/translate-sve.c
70
index XXXXXXX..XXXXXXX 100644
63
@@ -XXX,XX +XXX,XX @@ static bool trans_PSEL(DisasContext *s, arg_psel *a)
71
--- a/target/arm/neon_helper.c
64
tcg_temp_free_ptr(ptr);
72
+++ b/target/arm/neon_helper.c
65
return true;
73
@@ -XXX,XX +XXX,XX @@ NEON_POP(pmax_s16, neon_s16, 2)
74
NEON_POP(pmax_u16, neon_u16, 2)
75
#undef NEON_FN
76
77
-#define NEON_FN(dest, src1, src2) \
78
- dest = (src1 > src2) ? (src1 - src2) : (src2 - src1)
79
-NEON_VOP(abd_s8, neon_s8, 4)
80
-NEON_VOP(abd_u8, neon_u8, 4)
81
-NEON_VOP(abd_s16, neon_s16, 2)
82
-NEON_VOP(abd_u16, neon_u16, 2)
83
-NEON_VOP(abd_s32, neon_s32, 1)
84
-NEON_VOP(abd_u32, neon_u32, 1)
85
-#undef NEON_FN
86
-
87
#define NEON_FN(dest, src1, src2) do { \
88
int8_t tmp; \
89
tmp = (int8_t)src2; \
90
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/arm/translate-a64.c
93
+++ b/target/arm/translate-a64.c
94
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
95
gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
96
}
97
return;
98
+ case 0xf: /* SABA, UABA */
99
+ if (u) {
100
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size);
101
+ } else {
102
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size);
103
+ }
104
+ return;
105
case 0x10: /* ADD, SUB */
106
if (u) {
107
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
108
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
109
genenvfn = fns[size][u];
110
break;
111
}
112
- case 0xf: /* SABA, UABA */
113
- {
114
- static NeonGenTwoOpFn * const fns[3][2] = {
115
- { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
116
- { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
117
- { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
118
- };
119
- genfn = fns[size][u];
120
- break;
121
- }
122
case 0x16: /* SQDMULH, SQRDMULH */
123
{
124
static NeonGenTwoOpEnvFn * const fns[2][2] = {
125
diff --git a/target/arm/translate.c b/target/arm/translate.c
126
index XXXXXXX..XXXXXXX 100644
127
--- a/target/arm/translate.c
128
+++ b/target/arm/translate.c
129
@@ -XXX,XX +XXX,XX @@ void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
130
tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
131
}
66
}
132
67
+
133
+static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
68
+static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
134
+{
69
+{
135
+ TCGv_i32 t = tcg_temp_new_i32();
70
+ tcg_gen_smax_i32(d, a, n);
136
+ gen_sabd_i32(t, a, b);
71
+ tcg_gen_smin_i32(d, d, m);
137
+ tcg_gen_add_i32(d, d, t);
72
+}
138
+ tcg_temp_free_i32(t);
73
+
139
+}
74
+static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
140
+
75
+{
141
+static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
76
+ tcg_gen_smax_i64(d, a, n);
142
+{
77
+ tcg_gen_smin_i64(d, d, m);
143
+ TCGv_i64 t = tcg_temp_new_i64();
78
+}
144
+ gen_sabd_i64(t, a, b);
79
+
145
+ tcg_gen_add_i64(d, d, t);
80
+static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
146
+ tcg_temp_free_i64(t);
81
+ TCGv_vec m, TCGv_vec a)
147
+}
82
+{
148
+
83
+ tcg_gen_smax_vec(vece, d, a, n);
149
+static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
84
+ tcg_gen_smin_vec(vece, d, d, m);
150
+{
85
+}
151
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
86
+
152
+ gen_sabd_vec(vece, t, a, b);
87
+static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
153
+ tcg_gen_add_vec(vece, d, d, t);
88
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
154
+ tcg_temp_free_vec(t);
89
+{
155
+}
90
+ static const TCGOpcode vecop[] = {
156
+
157
+void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
158
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
159
+{
160
+ static const TCGOpcode vecop_list[] = {
161
+ INDEX_op_sub_vec, INDEX_op_add_vec,
162
+ INDEX_op_smin_vec, INDEX_op_smax_vec, 0
91
+ INDEX_op_smin_vec, INDEX_op_smax_vec, 0
163
+ };
92
+ };
164
+ static const GVecGen3 ops[4] = {
93
+ static const GVecGen4 ops[4] = {
165
+ { .fniv = gen_saba_vec,
94
+ { .fniv = gen_sclamp_vec,
166
+ .fno = gen_helper_gvec_saba_b,
95
+ .fno = gen_helper_gvec_sclamp_b,
167
+ .opt_opc = vecop_list,
96
+ .opt_opc = vecop,
168
+ .load_dest = true,
169
+ .vece = MO_8 },
97
+ .vece = MO_8 },
170
+ { .fniv = gen_saba_vec,
98
+ { .fniv = gen_sclamp_vec,
171
+ .fno = gen_helper_gvec_saba_h,
99
+ .fno = gen_helper_gvec_sclamp_h,
172
+ .opt_opc = vecop_list,
100
+ .opt_opc = vecop,
173
+ .load_dest = true,
174
+ .vece = MO_16 },
101
+ .vece = MO_16 },
175
+ { .fni4 = gen_saba_i32,
102
+ { .fni4 = gen_sclamp_i32,
176
+ .fniv = gen_saba_vec,
103
+ .fniv = gen_sclamp_vec,
177
+ .fno = gen_helper_gvec_saba_s,
104
+ .fno = gen_helper_gvec_sclamp_s,
178
+ .opt_opc = vecop_list,
105
+ .opt_opc = vecop,
179
+ .load_dest = true,
180
+ .vece = MO_32 },
106
+ .vece = MO_32 },
181
+ { .fni8 = gen_saba_i64,
107
+ { .fni8 = gen_sclamp_i64,
182
+ .fniv = gen_saba_vec,
108
+ .fniv = gen_sclamp_vec,
183
+ .fno = gen_helper_gvec_saba_d,
109
+ .fno = gen_helper_gvec_sclamp_d,
184
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
110
+ .opt_opc = vecop,
185
+ .opt_opc = vecop_list,
111
+ .vece = MO_64,
186
+ .load_dest = true,
112
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
187
+ .vece = MO_64 },
113
+ };
188
+ };
114
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
189
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
115
+}
190
+}
116
+
191
+
117
+TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a)
192
+static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
118
+
193
+{
119
+static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
194
+ TCGv_i32 t = tcg_temp_new_i32();
120
+{
195
+ gen_uabd_i32(t, a, b);
121
+ tcg_gen_umax_i32(d, a, n);
196
+ tcg_gen_add_i32(d, d, t);
122
+ tcg_gen_umin_i32(d, d, m);
197
+ tcg_temp_free_i32(t);
123
+}
198
+}
124
+
199
+
125
+static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
200
+static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
126
+{
201
+{
127
+ tcg_gen_umax_i64(d, a, n);
202
+ TCGv_i64 t = tcg_temp_new_i64();
128
+ tcg_gen_umin_i64(d, d, m);
203
+ gen_uabd_i64(t, a, b);
129
+}
204
+ tcg_gen_add_i64(d, d, t);
130
+
205
+ tcg_temp_free_i64(t);
131
+static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
206
+}
132
+ TCGv_vec m, TCGv_vec a)
207
+
133
+{
208
+static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
134
+ tcg_gen_umax_vec(vece, d, a, n);
209
+{
135
+ tcg_gen_umin_vec(vece, d, d, m);
210
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
136
+}
211
+ gen_uabd_vec(vece, t, a, b);
137
+
212
+ tcg_gen_add_vec(vece, d, d, t);
138
+static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
213
+ tcg_temp_free_vec(t);
139
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
214
+}
140
+{
215
+
141
+ static const TCGOpcode vecop[] = {
216
+void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
217
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
218
+{
219
+ static const TCGOpcode vecop_list[] = {
220
+ INDEX_op_sub_vec, INDEX_op_add_vec,
221
+ INDEX_op_umin_vec, INDEX_op_umax_vec, 0
142
+ INDEX_op_umin_vec, INDEX_op_umax_vec, 0
222
+ };
143
+ };
223
+ static const GVecGen3 ops[4] = {
144
+ static const GVecGen4 ops[4] = {
224
+ { .fniv = gen_uaba_vec,
145
+ { .fniv = gen_uclamp_vec,
225
+ .fno = gen_helper_gvec_uaba_b,
146
+ .fno = gen_helper_gvec_uclamp_b,
226
+ .opt_opc = vecop_list,
147
+ .opt_opc = vecop,
227
+ .load_dest = true,
228
+ .vece = MO_8 },
148
+ .vece = MO_8 },
229
+ { .fniv = gen_uaba_vec,
149
+ { .fniv = gen_uclamp_vec,
230
+ .fno = gen_helper_gvec_uaba_h,
150
+ .fno = gen_helper_gvec_uclamp_h,
231
+ .opt_opc = vecop_list,
151
+ .opt_opc = vecop,
232
+ .load_dest = true,
233
+ .vece = MO_16 },
152
+ .vece = MO_16 },
234
+ { .fni4 = gen_uaba_i32,
153
+ { .fni4 = gen_uclamp_i32,
235
+ .fniv = gen_uaba_vec,
154
+ .fniv = gen_uclamp_vec,
236
+ .fno = gen_helper_gvec_uaba_s,
155
+ .fno = gen_helper_gvec_uclamp_s,
237
+ .opt_opc = vecop_list,
156
+ .opt_opc = vecop,
238
+ .load_dest = true,
239
+ .vece = MO_32 },
157
+ .vece = MO_32 },
240
+ { .fni8 = gen_uaba_i64,
158
+ { .fni8 = gen_uclamp_i64,
241
+ .fniv = gen_uaba_vec,
159
+ .fniv = gen_uclamp_vec,
242
+ .fno = gen_helper_gvec_uaba_d,
160
+ .fno = gen_helper_gvec_uclamp_d,
243
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
161
+ .opt_opc = vecop,
244
+ .opt_opc = vecop_list,
162
+ .vece = MO_64,
245
+ .load_dest = true,
163
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
246
+ .vece = MO_64 },
164
+ };
247
+ };
165
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
248
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
166
+}
249
+}
167
+
250
+
168
+TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a)
251
/* Translate a NEON data processing instruction. Return nonzero if the
252
instruction is invalid.
253
We process data in a mixture of 32-bit and 64-bit chunks.
254
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
255
}
256
return 0;
257
258
+ case NEON_3R_VABA:
259
+ if (u) {
260
+ gen_gvec_uaba(size, rd_ofs, rn_ofs, rm_ofs,
261
+ vec_size, vec_size);
262
+ } else {
263
+ gen_gvec_saba(size, rd_ofs, rn_ofs, rm_ofs,
264
+ vec_size, vec_size);
265
+ }
266
+ return 0;
267
+
268
case NEON_3R_VADD_VSUB:
269
case NEON_3R_LOGIC:
270
case NEON_3R_VMAX:
271
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
272
case NEON_3R_VQRSHL:
273
GEN_NEON_INTEGER_OP_ENV(qrshl);
274
break;
275
- case NEON_3R_VABA:
276
- GEN_NEON_INTEGER_OP(abd);
277
- tcg_temp_free_i32(tmp2);
278
- tmp2 = neon_load_reg(rd, pass);
279
- gen_neon_add(size, tmp, tmp2);
280
- break;
281
case NEON_3R_VPMAX:
282
GEN_NEON_INTEGER_OP(pmax);
283
break;
284
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
169
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
285
index XXXXXXX..XXXXXXX 100644
170
index XXXXXXX..XXXXXXX 100644
286
--- a/target/arm/vec_helper.c
171
--- a/target/arm/vec_helper.c
287
+++ b/target/arm/vec_helper.c
172
+++ b/target/arm/vec_helper.c
288
@@ -XXX,XX +XXX,XX @@ DO_ABD(gvec_uabd_s, uint32_t)
173
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm,
289
DO_ABD(gvec_uabd_d, uint64_t)
174
}
290
175
clear_tail(d, opr_sz, simd_maxsz(desc));
291
#undef DO_ABD
176
}
292
+
177
+
293
+#define DO_ABA(NAME, TYPE) \
178
+#define DO_CLAMP(NAME, TYPE) \
294
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
179
+void HELPER(NAME)(void *d, void *n, void *m, void *a, uint32_t desc) \
295
+{ \
180
+{ \
296
+ intptr_t i, opr_sz = simd_oprsz(desc); \
181
+ intptr_t i, opr_sz = simd_oprsz(desc); \
297
+ TYPE *d = vd, *n = vn, *m = vm; \
182
+ for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
298
+ \
183
+ TYPE aa = *(TYPE *)(a + i); \
299
+ for (i = 0; i < opr_sz / sizeof(TYPE); ++i) { \
184
+ TYPE nn = *(TYPE *)(n + i); \
300
+ d[i] += n[i] < m[i] ? m[i] - n[i] : n[i] - m[i]; \
185
+ TYPE mm = *(TYPE *)(m + i); \
301
+ } \
186
+ TYPE dd = MIN(MAX(aa, nn), mm); \
302
+ clear_tail(d, opr_sz, simd_maxsz(desc)); \
187
+ *(TYPE *)(d + i) = dd; \
303
+}
188
+ } \
304
+
189
+ clear_tail(d, opr_sz, simd_maxsz(desc)); \
305
+DO_ABA(gvec_saba_b, int8_t)
190
+}
306
+DO_ABA(gvec_saba_h, int16_t)
191
+
307
+DO_ABA(gvec_saba_s, int32_t)
192
+DO_CLAMP(gvec_sclamp_b, int8_t)
308
+DO_ABA(gvec_saba_d, int64_t)
193
+DO_CLAMP(gvec_sclamp_h, int16_t)
309
+
194
+DO_CLAMP(gvec_sclamp_s, int32_t)
310
+DO_ABA(gvec_uaba_b, uint8_t)
195
+DO_CLAMP(gvec_sclamp_d, int64_t)
311
+DO_ABA(gvec_uaba_h, uint16_t)
196
+
312
+DO_ABA(gvec_uaba_s, uint32_t)
197
+DO_CLAMP(gvec_uclamp_b, uint8_t)
313
+DO_ABA(gvec_uaba_d, uint64_t)
198
+DO_CLAMP(gvec_uclamp_h, uint16_t)
314
+
199
+DO_CLAMP(gvec_uclamp_s, uint32_t)
315
+#undef DO_ABA
200
+DO_CLAMP(gvec_uclamp_d, uint64_t)
316
--
201
--
317
2.20.1
202
2.25.1
318
319
diff view generated by jsdifflib
1
From: Dongjiu Geng <gengdongjiu@huawei.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Add a SIGBUS signal handler. In this handler, it checks the SIGBUS type,
3
We can handle both exception entry and exception return by
4
translates the host VA delivered by host to guest PA, then fills this PA
4
hooking into aarch64_sve_change_el.
5
to guest APEI GHES memory, then notifies guest according to the SIGBUS
6
type.
7
5
8
When guest accesses the poisoned memory, it will generate a Synchronous
9
External Abort(SEA). Then host kernel gets an APEI notification and calls
10
memory_failure() to unmapped the affected page in stage 2, finally
11
returns to guest.
12
13
Guest continues to access the PG_hwpoison page, it will trap to KVM as
14
stage2 fault, then a SIGBUS_MCEERR_AR synchronous signal is delivered to
15
Qemu, Qemu records this error address into guest APEI GHES memory and
16
notifes guest using Synchronous-External-Abort(SEA).
17
18
In order to inject a vSEA, we introduce the kvm_inject_arm_sea() function
19
in which we can setup the type of exception and the syndrome information.
20
When switching to guest, the target vcpu will jump to the synchronous
21
external abort vector table entry.
22
23
The ESR_ELx.DFSC is set to synchronous external abort(0x10), and the
24
ESR_ELx.FnV is set to not valid(0x1), which will tell guest that FAR is
25
not valid and hold an UNKNOWN value. These values will be set to KVM
26
register structures through KVM_SET_ONE_REG IOCTL.
27
28
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
29
Signed-off-by: Xiang Zheng <zhengxiang9@huawei.com>
30
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
31
Acked-by: Xiang Zheng <zhengxiang9@huawei.com>
32
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
33
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
34
Message-id: 20200512030609.19593-10-gengdongjiu@huawei.com
8
Message-id: 20220708151540.18136-32-richard.henderson@linaro.org
35
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
36
---
10
---
37
include/sysemu/kvm.h | 3 +-
11
target/arm/helper.c | 15 +++++++++++++--
38
target/arm/cpu.h | 4 +++
12
1 file changed, 13 insertions(+), 2 deletions(-)
39
target/arm/internals.h | 5 +--
40
target/i386/cpu.h | 2 ++
41
target/arm/helper.c | 2 +-
42
target/arm/kvm64.c | 77 +++++++++++++++++++++++++++++++++++++++++
43
target/arm/tlb_helper.c | 2 +-
44
7 files changed, 89 insertions(+), 6 deletions(-)
45
13
46
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
47
index XXXXXXX..XXXXXXX 100644
48
--- a/include/sysemu/kvm.h
49
+++ b/include/sysemu/kvm.h
50
@@ -XXX,XX +XXX,XX @@ bool kvm_vcpu_id_is_valid(int vcpu_id);
51
/* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */
52
unsigned long kvm_arch_vcpu_id(CPUState *cpu);
53
54
-#ifdef TARGET_I386
55
-#define KVM_HAVE_MCE_INJECTION 1
56
+#ifdef KVM_HAVE_MCE_INJECTION
57
void kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
58
#endif
59
60
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
61
index XXXXXXX..XXXXXXX 100644
62
--- a/target/arm/cpu.h
63
+++ b/target/arm/cpu.h
64
@@ -XXX,XX +XXX,XX @@
65
/* ARM processors have a weak memory model */
66
#define TCG_GUEST_DEFAULT_MO (0)
67
68
+#ifdef TARGET_AARCH64
69
+#define KVM_HAVE_MCE_INJECTION 1
70
+#endif
71
+
72
#define EXCP_UDEF 1 /* undefined instruction */
73
#define EXCP_SWI 2 /* software interrupt */
74
#define EXCP_PREFETCH_ABORT 3
75
diff --git a/target/arm/internals.h b/target/arm/internals.h
76
index XXXXXXX..XXXXXXX 100644
77
--- a/target/arm/internals.h
78
+++ b/target/arm/internals.h
79
@@ -XXX,XX +XXX,XX @@ static inline uint32_t syn_insn_abort(int same_el, int ea, int s1ptw, int fsc)
80
| ARM_EL_IL | (ea << 9) | (s1ptw << 7) | fsc;
81
}
82
83
-static inline uint32_t syn_data_abort_no_iss(int same_el,
84
+static inline uint32_t syn_data_abort_no_iss(int same_el, int fnv,
85
int ea, int cm, int s1ptw,
86
int wnr, int fsc)
87
{
88
return (EC_DATAABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
89
| ARM_EL_IL
90
- | (ea << 9) | (cm << 8) | (s1ptw << 7) | (wnr << 6) | fsc;
91
+ | (fnv << 10) | (ea << 9) | (cm << 8) | (s1ptw << 7)
92
+ | (wnr << 6) | fsc;
93
}
94
95
static inline uint32_t syn_data_abort_with_iss(int same_el,
96
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
97
index XXXXXXX..XXXXXXX 100644
98
--- a/target/i386/cpu.h
99
+++ b/target/i386/cpu.h
100
@@ -XXX,XX +XXX,XX @@
101
/* The x86 has a strong memory model with some store-after-load re-ordering */
102
#define TCG_GUEST_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
103
104
+#define KVM_HAVE_MCE_INJECTION 1
105
+
106
/* Maximum instruction code size */
107
#define TARGET_MAX_INSN_SIZE 16
108
109
diff --git a/target/arm/helper.c b/target/arm/helper.c
14
diff --git a/target/arm/helper.c b/target/arm/helper.c
110
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
111
--- a/target/arm/helper.c
16
--- a/target/arm/helper.c
112
+++ b/target/arm/helper.c
17
+++ b/target/arm/helper.c
113
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value,
18
@@ -XXX,XX +XXX,XX @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
114
* Report exception with ESR indicating a fault due to a
19
return;
115
* translation table walk for a cache maintenance instruction.
20
}
116
*/
21
117
- syn = syn_data_abort_no_iss(current_el == target_el,
22
+ old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
118
+ syn = syn_data_abort_no_iss(current_el == target_el, 0,
23
+ new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
119
fi.ea, 1, fi.s1ptw, 1, fsc);
120
env->exception.vaddress = value;
121
env->exception.fsr = fsr;
122
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
123
index XXXXXXX..XXXXXXX 100644
124
--- a/target/arm/kvm64.c
125
+++ b/target/arm/kvm64.c
126
@@ -XXX,XX +XXX,XX @@
127
#include "sysemu/kvm_int.h"
128
#include "kvm_arm.h"
129
#include "internals.h"
130
+#include "hw/acpi/acpi.h"
131
+#include "hw/acpi/ghes.h"
132
+#include "hw/arm/virt.h"
133
134
static bool have_guest_debug;
135
136
@@ -XXX,XX +XXX,XX @@ int kvm_arm_cpreg_level(uint64_t regidx)
137
return KVM_PUT_RUNTIME_STATE;
138
}
139
140
+/* Callers must hold the iothread mutex lock */
141
+static void kvm_inject_arm_sea(CPUState *c)
142
+{
143
+ ARMCPU *cpu = ARM_CPU(c);
144
+ CPUARMState *env = &cpu->env;
145
+ CPUClass *cc = CPU_GET_CLASS(c);
146
+ uint32_t esr;
147
+ bool same_el;
148
+
149
+ c->exception_index = EXCP_DATA_ABORT;
150
+ env->exception.target_el = 1;
151
+
24
+
152
+ /*
25
+ /*
153
+ * Set the DFSC to synchronous external abort and set FnV to not valid,
26
+ * Both AArch64.TakeException and AArch64.ExceptionReturn
154
+ * this will tell guest the FAR_ELx is UNKNOWN for this abort.
27
+ * invoke ResetSVEState when taking an exception from, or
28
+ * returning to, AArch32 state when PSTATE.SM is enabled.
155
+ */
29
+ */
156
+ same_el = arm_current_el(env) == env->exception.target_el;
30
+ if (old_a64 != new_a64 && FIELD_EX64(env->svcr, SVCR, SM)) {
157
+ esr = syn_data_abort_no_iss(same_el, 1, 0, 0, 0, 0, 0x10);
31
+ arm_reset_sve_state(env);
158
+
32
+ return;
159
+ env->exception.syndrome = esr;
160
+
161
+ cc->do_interrupt(c);
162
+}
163
+
164
#define AARCH64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
165
KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
166
167
@@ -XXX,XX +XXX,XX @@ int kvm_arch_get_registers(CPUState *cs)
168
return ret;
169
}
170
171
+void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
172
+{
173
+ ram_addr_t ram_addr;
174
+ hwaddr paddr;
175
+ Object *obj = qdev_get_machine();
176
+ VirtMachineState *vms = VIRT_MACHINE(obj);
177
+ bool acpi_enabled = virt_is_acpi_enabled(vms);
178
+
179
+ assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
180
+
181
+ if (acpi_enabled && addr &&
182
+ object_property_get_bool(obj, "ras", NULL)) {
183
+ ram_addr = qemu_ram_addr_from_host(addr);
184
+ if (ram_addr != RAM_ADDR_INVALID &&
185
+ kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
186
+ kvm_hwpoison_page_add(ram_addr);
187
+ /*
188
+ * If this is a BUS_MCEERR_AR, we know we have been called
189
+ * synchronously from the vCPU thread, so we can easily
190
+ * synchronize the state and inject an error.
191
+ *
192
+ * TODO: we currently don't tell the guest at all about
193
+ * BUS_MCEERR_AO. In that case we might either be being
194
+ * called synchronously from the vCPU thread, or a bit
195
+ * later from the main thread, so doing the injection of
196
+ * the error would be more complicated.
197
+ */
198
+ if (code == BUS_MCEERR_AR) {
199
+ kvm_cpu_synchronize_state(c);
200
+ if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) {
201
+ kvm_inject_arm_sea(c);
202
+ } else {
203
+ error_report("failed to record the error");
204
+ abort();
205
+ }
206
+ }
207
+ return;
208
+ }
209
+ if (code == BUS_MCEERR_AO) {
210
+ error_report("Hardware memory error at addr %p for memory used by "
211
+ "QEMU itself instead of guest system!", addr);
212
+ }
213
+ }
33
+ }
214
+
34
+
215
+ if (code == BUS_MCEERR_AR) {
35
/*
216
+ error_report("Hardware memory error!");
36
* DDI0584A.d sec 3.2: "If SVE instructions are disabled or trapped
217
+ exit(1);
37
* at ELx, or not available because the EL is in AArch32 state, then
218
+ }
38
@@ -XXX,XX +XXX,XX @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
219
+}
39
* we already have the correct register contents when encountering the
220
+
40
* vq0->vq0 transition between EL0->EL1.
221
/* C6.6.29 BRK instruction */
222
static const uint32_t brk_insn = 0xd4200000;
223
224
diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c
225
index XXXXXXX..XXXXXXX 100644
226
--- a/target/arm/tlb_helper.c
227
+++ b/target/arm/tlb_helper.c
228
@@ -XXX,XX +XXX,XX @@ static inline uint32_t merge_syn_data_abort(uint32_t template_syn,
229
* ISV field.
230
*/
41
*/
231
if (!(template_syn & ARM_EL_ISV) || target_el != 2 || s1ptw) {
42
- old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
232
- syn = syn_data_abort_no_iss(same_el,
43
old_len = (old_a64 && !sve_exception_el(env, old_el)
233
+ syn = syn_data_abort_no_iss(same_el, 0,
44
? sve_vqm1_for_el(env, old_el) : 0);
234
ea, 0, s1ptw, is_write, fsc);
45
- new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
235
} else {
46
new_len = (new_a64 && !sve_exception_el(env, new_el)
236
/*
47
? sve_vqm1_for_el(env, new_el) : 0);
48
237
--
49
--
238
2.20.1
50
2.25.1
239
240
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
These operations do not touch fp_status.
3
Note that SME remains effectively disabled for user-only,
4
because we do not yet set CPACR_EL1.SMEN. This needs to
5
wait until the kernel ABI is implemented.
4
6
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20200513163245.17915-12-richard.henderson@linaro.org
9
Message-id: 20220708151540.18136-33-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
11
---
10
target/arm/helper.h | 4 ++--
12
docs/system/arm/emulation.rst | 4 ++++
11
target/arm/translate-a64.c | 5 ++---
13
target/arm/cpu64.c | 11 +++++++++++
12
target/arm/translate.c | 12 ++----------
14
2 files changed, 15 insertions(+)
13
target/arm/vfp_helper.c | 5 ++---
14
4 files changed, 8 insertions(+), 18 deletions(-)
15
15
16
diff --git a/target/arm/helper.h b/target/arm/helper.h
16
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
17
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/helper.h
18
--- a/docs/system/arm/emulation.rst
19
+++ b/target/arm/helper.h
19
+++ b/docs/system/arm/emulation.rst
20
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
20
@@ -XXX,XX +XXX,XX @@ the following architecture extensions:
21
DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
21
- FEAT_SHA512 (Advanced SIMD SHA512 instructions)
22
DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
22
- FEAT_SM3 (Advanced SIMD SM3 instructions)
23
DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
23
- FEAT_SM4 (Advanced SIMD SM4 instructions)
24
-DEF_HELPER_2(recpe_u32, i32, i32, ptr)
24
+- FEAT_SME (Scalable Matrix Extension)
25
-DEF_HELPER_FLAGS_2(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32, ptr)
25
+- FEAT_SME_FA64 (Full A64 instruction set in Streaming SVE mode)
26
+DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32)
26
+- FEAT_SME_F64F64 (Double-precision floating-point outer product instructions)
27
+DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32)
27
+- FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions)
28
DEF_HELPER_FLAGS_4(neon_tbl, TCG_CALL_NO_RWG, i32, i32, i32, ptr, i32)
28
- FEAT_SPECRES (Speculation restriction instructions)
29
29
- FEAT_SSBS (Speculative Store Bypass Safe)
30
DEF_HELPER_3(shl_cc, i32, env, i32, i32)
30
- FEAT_TLBIOS (TLB invalidate instructions in Outer Shareable domain)
31
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
31
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
32
index XXXXXXX..XXXXXXX 100644
32
index XXXXXXX..XXXXXXX 100644
33
--- a/target/arm/translate-a64.c
33
--- a/target/arm/cpu64.c
34
+++ b/target/arm/translate-a64.c
34
+++ b/target/arm/cpu64.c
35
@@ -XXX,XX +XXX,XX @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode,
35
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
36
36
*/
37
switch (opcode) {
37
t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3); /* FEAT_MTE3 */
38
case 0x3c: /* URECPE */
38
t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 0); /* FEAT_RASv1p1 + FEAT_DoubleFault */
39
- gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
39
+ t = FIELD_DP64(t, ID_AA64PFR1, SME, 1); /* FEAT_SME */
40
+ gen_helper_recpe_u32(tcg_res, tcg_op);
40
t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_2 */
41
break;
41
cpu->isar.id_aa64pfr1 = t;
42
case 0x3d: /* FRECPE */
42
43
gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
43
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
44
@@ -XXX,XX +XXX,XX @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
44
t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 5); /* FEAT_PMUv3p4 */
45
unallocated_encoding(s);
45
cpu->isar.id_aa64dfr0 = t;
46
return;
46
47
}
47
+ t = cpu->isar.id_aa64smfr0;
48
- need_fpstatus = true;
48
+ t = FIELD_DP64(t, ID_AA64SMFR0, F32F32, 1); /* FEAT_SME */
49
break;
49
+ t = FIELD_DP64(t, ID_AA64SMFR0, B16F32, 1); /* FEAT_SME */
50
case 0x1e: /* FRINT32Z */
50
+ t = FIELD_DP64(t, ID_AA64SMFR0, F16F32, 1); /* FEAT_SME */
51
case 0x1f: /* FRINT64Z */
51
+ t = FIELD_DP64(t, ID_AA64SMFR0, I8I32, 0xf); /* FEAT_SME */
52
@@ -XXX,XX +XXX,XX @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
52
+ t = FIELD_DP64(t, ID_AA64SMFR0, F64F64, 1); /* FEAT_SME_F64F64 */
53
gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
53
+ t = FIELD_DP64(t, ID_AA64SMFR0, I16I64, 0xf); /* FEAT_SME_I16I64 */
54
break;
54
+ t = FIELD_DP64(t, ID_AA64SMFR0, FA64, 1); /* FEAT_SME_FA64 */
55
case 0x7c: /* URSQRTE */
55
+ cpu->isar.id_aa64smfr0 = t;
56
- gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
56
+
57
+ gen_helper_rsqrte_u32(tcg_res, tcg_op);
57
/* Replicate the same data to the 32-bit id registers. */
58
break;
58
aa32_max_features(cpu);
59
case 0x1e: /* FRINT32Z */
60
case 0x5e: /* FRINT32X */
61
diff --git a/target/arm/translate.c b/target/arm/translate.c
62
index XXXXXXX..XXXXXXX 100644
63
--- a/target/arm/translate.c
64
+++ b/target/arm/translate.c
65
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
66
break;
67
}
68
case NEON_2RM_VRECPE:
69
- {
70
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
71
- gen_helper_recpe_u32(tmp, tmp, fpstatus);
72
- tcg_temp_free_ptr(fpstatus);
73
+ gen_helper_recpe_u32(tmp, tmp);
74
break;
75
- }
76
case NEON_2RM_VRSQRTE:
77
- {
78
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
79
- gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
80
- tcg_temp_free_ptr(fpstatus);
81
+ gen_helper_rsqrte_u32(tmp, tmp);
82
break;
83
- }
84
case NEON_2RM_VRECPE_F:
85
{
86
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
87
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
88
index XXXXXXX..XXXXXXX 100644
89
--- a/target/arm/vfp_helper.c
90
+++ b/target/arm/vfp_helper.c
91
@@ -XXX,XX +XXX,XX @@ float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
92
return make_float64(val);
93
}
94
95
-uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
96
+uint32_t HELPER(recpe_u32)(uint32_t a)
97
{
98
- /* float_status *s = fpstp; */
99
int input, estimate;
100
101
if ((a & 0x80000000) == 0) {
102
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
103
return deposit32(0, (32 - 9), 9, estimate);
104
}
105
106
-uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp)
107
+uint32_t HELPER(rsqrte_u32)(uint32_t a)
108
{
109
int estimate;
110
59
111
--
60
--
112
2.20.1
61
2.25.1
113
114
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Must clear the tail for AdvSIMD when SVE is enabled.
4
5
Fixes: ca40a6e6e39
6
Cc: qemu-stable@nongnu.org
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20200513163245.17915-15-richard.henderson@linaro.org
5
Message-id: 20220708151540.18136-34-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
7
---
12
target/arm/vec_helper.c | 2 ++
8
linux-user/aarch64/target_cpu.h | 5 ++++-
13
1 file changed, 2 insertions(+)
9
1 file changed, 4 insertions(+), 1 deletion(-)
14
10
15
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
11
diff --git a/linux-user/aarch64/target_cpu.h b/linux-user/aarch64/target_cpu.h
16
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/vec_helper.c
13
--- a/linux-user/aarch64/target_cpu.h
18
+++ b/target/arm/vec_helper.c
14
+++ b/linux-user/aarch64/target_cpu.h
19
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
15
@@ -XXX,XX +XXX,XX @@ static inline void cpu_clone_regs_parent(CPUARMState *env, unsigned flags)
20
d[i + j] = TYPE##_mul(n[i + j], mm, stat); \
16
21
} \
17
static inline void cpu_set_tls(CPUARMState *env, target_ulong newtls)
22
} \
18
{
23
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
19
- /* Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is
20
+ /*
21
+ * Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is
22
* different from AArch32 Linux, which uses TPIDRRO.
23
*/
24
env->cp15.tpidr_el[0] = newtls;
25
+ /* TPIDR2_EL0 is cleared with CLONE_SETTLS. */
26
+ env->cp15.tpidr2_el0 = 0;
24
}
27
}
25
28
26
DO_MUL_IDX(gvec_fmul_idx_h, float16, H2)
29
static inline abi_ulong get_sp_from_cpustate(CPUARMState *state)
27
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
28
mm, a[i + j], 0, stat); \
29
} \
30
} \
31
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
32
}
33
34
DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2)
35
--
30
--
36
2.20.1
31
2.25.1
37
38
diff view generated by jsdifflib
1
From: Dongjiu Geng <gengdongjiu@huawei.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
I and Xiang are willing to review the APEI-related patches and
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
volunteer as the reviewers for the HEST/GHES part.
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
5
Message-id: 20220708151540.18136-35-richard.henderson@linaro.org
6
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
7
Signed-off-by: Xiang Zheng <zhengxiang9@huawei.com>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Acked-by: Michael S. Tsirkin <mst@redhat.com>
10
Message-id: 20200512030609.19593-11-gengdongjiu@huawei.com
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
7
---
13
MAINTAINERS | 9 +++++++++
8
linux-user/aarch64/cpu_loop.c | 9 +++++++++
14
1 file changed, 9 insertions(+)
9
1 file changed, 9 insertions(+)
15
10
16
diff --git a/MAINTAINERS b/MAINTAINERS
11
diff --git a/linux-user/aarch64/cpu_loop.c b/linux-user/aarch64/cpu_loop.c
17
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
18
--- a/MAINTAINERS
13
--- a/linux-user/aarch64/cpu_loop.c
19
+++ b/MAINTAINERS
14
+++ b/linux-user/aarch64/cpu_loop.c
20
@@ -XXX,XX +XXX,XX @@ F: tests/qtest/bios-tables-test.c
15
@@ -XXX,XX +XXX,XX @@ void cpu_loop(CPUARMState *env)
21
F: tests/qtest/acpi-utils.[hc]
16
22
F: tests/data/acpi/
17
switch (trapnr) {
23
18
case EXCP_SWI:
24
+ACPI/HEST/GHES
19
+ /*
25
+R: Dongjiu Geng <gengdongjiu@huawei.com>
20
+ * On syscall, PSTATE.ZA is preserved, along with the ZA matrix.
26
+R: Xiang Zheng <zhengxiang9@huawei.com>
21
+ * PSTATE.SM is cleared, per SMSTOP, which does ResetSVEState.
27
+L: qemu-arm@nongnu.org
22
+ */
28
+S: Maintained
23
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
29
+F: hw/acpi/ghes.c
24
+ env->svcr = FIELD_DP64(env->svcr, SVCR, SM, 0);
30
+F: include/hw/acpi/ghes.h
25
+ arm_rebuild_hflags(env);
31
+F: docs/specs/acpi_hest_ghes.rst
26
+ arm_reset_sve_state(env);
32
+
27
+ }
33
ppc4xx
28
ret = do_syscall(env,
34
M: David Gibson <david@gibson.dropbear.id.au>
29
env->xregs[8],
35
L: qemu-ppc@nongnu.org
30
env->xregs[0],
36
--
31
--
37
2.20.1
32
2.25.1
38
39
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Provide a functional interface for the vector expansion.
3
Make sure to zero the currently reserved fields.
4
This fits better with the existing set of helpers that
5
we provide for other operations.
6
7
Macro-ize the 5 nearly identical comparisons.
8
4
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20200513163245.17915-7-richard.henderson@linaro.org
7
Message-id: 20220708151540.18136-36-richard.henderson@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
9
---
14
target/arm/translate.h | 16 ++-
10
linux-user/aarch64/signal.c | 9 ++++++++-
15
target/arm/translate-a64.c | 22 ++--
11
1 file changed, 8 insertions(+), 1 deletion(-)
16
target/arm/translate.c | 254 ++++++++-----------------------------
17
3 files changed, 74 insertions(+), 218 deletions(-)
18
12
19
diff --git a/target/arm/translate.h b/target/arm/translate.h
13
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
20
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/translate.h
15
--- a/linux-user/aarch64/signal.c
22
+++ b/target/arm/translate.h
16
+++ b/linux-user/aarch64/signal.c
23
@@ -XXX,XX +XXX,XX @@ static inline void gen_swstep_exception(DisasContext *s, int isv, int ex)
17
@@ -XXX,XX +XXX,XX @@ struct target_extra_context {
24
uint64_t vfp_expand_imm(int size, uint8_t imm8);
18
struct target_sve_context {
25
19
struct target_aarch64_ctx head;
26
/* Vector operations shared between ARM and AArch64. */
20
uint16_t vl;
27
-extern const GVecGen2 ceq0_op[4];
21
- uint16_t reserved[3];
28
-extern const GVecGen2 clt0_op[4];
22
+ uint16_t flags;
29
-extern const GVecGen2 cgt0_op[4];
23
+ uint16_t reserved[2];
30
-extern const GVecGen2 cle0_op[4];
24
/* The actual SVE data immediately follows. It is laid out
31
-extern const GVecGen2 cge0_op[4];
25
* according to TARGET_SVE_SIG_{Z,P}REG_OFFSET, based off of
32
+void gen_gvec_ceq0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
26
* the original struct pointer.
33
+ uint32_t opr_sz, uint32_t max_sz);
27
@@ -XXX,XX +XXX,XX @@ struct target_sve_context {
34
+void gen_gvec_clt0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
28
#define TARGET_SVE_SIG_CONTEXT_SIZE(VQ) \
35
+ uint32_t opr_sz, uint32_t max_sz);
29
(TARGET_SVE_SIG_PREG_OFFSET(VQ, 17))
36
+void gen_gvec_cgt0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
30
37
+ uint32_t opr_sz, uint32_t max_sz);
31
+#define TARGET_SVE_SIG_FLAG_SM 1
38
+void gen_gvec_cle0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
39
+ uint32_t opr_sz, uint32_t max_sz);
40
+void gen_gvec_cge0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
41
+ uint32_t opr_sz, uint32_t max_sz);
42
+
32
+
43
extern const GVecGen3 mla_op[4];
33
struct target_rt_sigframe {
44
extern const GVecGen3 mls_op[4];
34
struct target_siginfo info;
45
extern const GVecGen3 cmtst_op[4];
35
struct target_ucontext uc;
46
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
36
@@ -XXX,XX +XXX,XX @@ static void target_setup_sve_record(struct target_sve_context *sve,
47
index XXXXXXX..XXXXXXX 100644
37
{
48
--- a/target/arm/translate-a64.c
38
int i, j;
49
+++ b/target/arm/translate-a64.c
39
50
@@ -XXX,XX +XXX,XX @@ static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
40
+ memset(sve, 0, sizeof(*sve));
51
is_q ? 16 : 8, vec_full_reg_size(s));
41
__put_user(TARGET_SVE_MAGIC, &sve->head.magic);
52
}
42
__put_user(size, &sve->head.size);
53
43
__put_user(vq * TARGET_SVE_VQ_BYTES, &sve->vl);
54
-/* Expand a 2-operand AdvSIMD vector operation using an op descriptor. */
44
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
55
-static void gen_gvec_op2(DisasContext *s, bool is_q, int rd,
45
+ __put_user(TARGET_SVE_SIG_FLAG_SM, &sve->flags);
56
- int rn, const GVecGen2 *gvec_op)
57
-{
58
- tcg_gen_gvec_2(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
59
- is_q ? 16 : 8, vec_full_reg_size(s), gvec_op);
60
-}
61
-
62
/* Expand a 3-operand AdvSIMD vector operation using an op descriptor. */
63
static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
64
int rn, int rm, const GVecGen3 *gvec_op)
65
@@ -XXX,XX +XXX,XX @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
66
}
67
break;
68
case 0x8: /* CMGT, CMGE */
69
- gen_gvec_op2(s, is_q, rd, rn, u ? &cge0_op[size] : &cgt0_op[size]);
70
+ if (u) {
71
+ gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
72
+ } else {
73
+ gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
74
+ }
75
return;
76
case 0x9: /* CMEQ, CMLE */
77
- gen_gvec_op2(s, is_q, rd, rn, u ? &cle0_op[size] : &ceq0_op[size]);
78
+ if (u) {
79
+ gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
80
+ } else {
81
+ gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
82
+ }
83
return;
84
case 0xa: /* CMLT */
85
- gen_gvec_op2(s, is_q, rd, rn, &clt0_op[size]);
86
+ gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
87
return;
88
case 0xb:
89
if (u) { /* ABS, NEG */
90
diff --git a/target/arm/translate.c b/target/arm/translate.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/arm/translate.c
93
+++ b/target/arm/translate.c
94
@@ -XXX,XX +XXX,XX @@ static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
95
return 1;
96
}
97
98
-static void gen_ceq0_i32(TCGv_i32 d, TCGv_i32 a)
99
-{
100
- tcg_gen_setcondi_i32(TCG_COND_EQ, d, a, 0);
101
- tcg_gen_neg_i32(d, d);
102
-}
103
-
104
-static void gen_ceq0_i64(TCGv_i64 d, TCGv_i64 a)
105
-{
106
- tcg_gen_setcondi_i64(TCG_COND_EQ, d, a, 0);
107
- tcg_gen_neg_i64(d, d);
108
-}
109
-
110
-static void gen_ceq0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
111
-{
112
- TCGv_vec zero = tcg_const_zeros_vec_matching(d);
113
- tcg_gen_cmp_vec(TCG_COND_EQ, vece, d, a, zero);
114
- tcg_temp_free_vec(zero);
115
-}
116
+#define GEN_CMP0(NAME, COND) \
117
+ static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a) \
118
+ { \
119
+ tcg_gen_setcondi_i32(COND, d, a, 0); \
120
+ tcg_gen_neg_i32(d, d); \
121
+ } \
122
+ static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a) \
123
+ { \
124
+ tcg_gen_setcondi_i64(COND, d, a, 0); \
125
+ tcg_gen_neg_i64(d, d); \
126
+ } \
127
+ static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
128
+ { \
129
+ TCGv_vec zero = tcg_const_zeros_vec_matching(d); \
130
+ tcg_gen_cmp_vec(COND, vece, d, a, zero); \
131
+ tcg_temp_free_vec(zero); \
132
+ } \
133
+ void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m, \
134
+ uint32_t opr_sz, uint32_t max_sz) \
135
+ { \
136
+ const GVecGen2 op[4] = { \
137
+ { .fno = gen_helper_gvec_##NAME##0_b, \
138
+ .fniv = gen_##NAME##0_vec, \
139
+ .opt_opc = vecop_list_cmp, \
140
+ .vece = MO_8 }, \
141
+ { .fno = gen_helper_gvec_##NAME##0_h, \
142
+ .fniv = gen_##NAME##0_vec, \
143
+ .opt_opc = vecop_list_cmp, \
144
+ .vece = MO_16 }, \
145
+ { .fni4 = gen_##NAME##0_i32, \
146
+ .fniv = gen_##NAME##0_vec, \
147
+ .opt_opc = vecop_list_cmp, \
148
+ .vece = MO_32 }, \
149
+ { .fni8 = gen_##NAME##0_i64, \
150
+ .fniv = gen_##NAME##0_vec, \
151
+ .opt_opc = vecop_list_cmp, \
152
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64, \
153
+ .vece = MO_64 }, \
154
+ }; \
155
+ tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]); \
156
+ }
46
+ }
157
47
158
static const TCGOpcode vecop_list_cmp[] = {
48
/* Note that SVE regs are stored as a byte stream, with each byte element
159
INDEX_op_cmp_vec, 0
49
* at a subsequent address. This corresponds to a little-endian store
160
};
161
162
-const GVecGen2 ceq0_op[4] = {
163
- { .fno = gen_helper_gvec_ceq0_b,
164
- .fniv = gen_ceq0_vec,
165
- .opt_opc = vecop_list_cmp,
166
- .vece = MO_8 },
167
- { .fno = gen_helper_gvec_ceq0_h,
168
- .fniv = gen_ceq0_vec,
169
- .opt_opc = vecop_list_cmp,
170
- .vece = MO_16 },
171
- { .fni4 = gen_ceq0_i32,
172
- .fniv = gen_ceq0_vec,
173
- .opt_opc = vecop_list_cmp,
174
- .vece = MO_32 },
175
- { .fni8 = gen_ceq0_i64,
176
- .fniv = gen_ceq0_vec,
177
- .opt_opc = vecop_list_cmp,
178
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
179
- .vece = MO_64 },
180
-};
181
+GEN_CMP0(ceq, TCG_COND_EQ)
182
+GEN_CMP0(cle, TCG_COND_LE)
183
+GEN_CMP0(cge, TCG_COND_GE)
184
+GEN_CMP0(clt, TCG_COND_LT)
185
+GEN_CMP0(cgt, TCG_COND_GT)
186
187
-static void gen_cle0_i32(TCGv_i32 d, TCGv_i32 a)
188
-{
189
- tcg_gen_setcondi_i32(TCG_COND_LE, d, a, 0);
190
- tcg_gen_neg_i32(d, d);
191
-}
192
-
193
-static void gen_cle0_i64(TCGv_i64 d, TCGv_i64 a)
194
-{
195
- tcg_gen_setcondi_i64(TCG_COND_LE, d, a, 0);
196
- tcg_gen_neg_i64(d, d);
197
-}
198
-
199
-static void gen_cle0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
200
-{
201
- TCGv_vec zero = tcg_const_zeros_vec_matching(d);
202
- tcg_gen_cmp_vec(TCG_COND_LE, vece, d, a, zero);
203
- tcg_temp_free_vec(zero);
204
-}
205
-
206
-const GVecGen2 cle0_op[4] = {
207
- { .fno = gen_helper_gvec_cle0_b,
208
- .fniv = gen_cle0_vec,
209
- .opt_opc = vecop_list_cmp,
210
- .vece = MO_8 },
211
- { .fno = gen_helper_gvec_cle0_h,
212
- .fniv = gen_cle0_vec,
213
- .opt_opc = vecop_list_cmp,
214
- .vece = MO_16 },
215
- { .fni4 = gen_cle0_i32,
216
- .fniv = gen_cle0_vec,
217
- .opt_opc = vecop_list_cmp,
218
- .vece = MO_32 },
219
- { .fni8 = gen_cle0_i64,
220
- .fniv = gen_cle0_vec,
221
- .opt_opc = vecop_list_cmp,
222
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
223
- .vece = MO_64 },
224
-};
225
-
226
-static void gen_cge0_i32(TCGv_i32 d, TCGv_i32 a)
227
-{
228
- tcg_gen_setcondi_i32(TCG_COND_GE, d, a, 0);
229
- tcg_gen_neg_i32(d, d);
230
-}
231
-
232
-static void gen_cge0_i64(TCGv_i64 d, TCGv_i64 a)
233
-{
234
- tcg_gen_setcondi_i64(TCG_COND_GE, d, a, 0);
235
- tcg_gen_neg_i64(d, d);
236
-}
237
-
238
-static void gen_cge0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
239
-{
240
- TCGv_vec zero = tcg_const_zeros_vec_matching(d);
241
- tcg_gen_cmp_vec(TCG_COND_GE, vece, d, a, zero);
242
- tcg_temp_free_vec(zero);
243
-}
244
-
245
-const GVecGen2 cge0_op[4] = {
246
- { .fno = gen_helper_gvec_cge0_b,
247
- .fniv = gen_cge0_vec,
248
- .opt_opc = vecop_list_cmp,
249
- .vece = MO_8 },
250
- { .fno = gen_helper_gvec_cge0_h,
251
- .fniv = gen_cge0_vec,
252
- .opt_opc = vecop_list_cmp,
253
- .vece = MO_16 },
254
- { .fni4 = gen_cge0_i32,
255
- .fniv = gen_cge0_vec,
256
- .opt_opc = vecop_list_cmp,
257
- .vece = MO_32 },
258
- { .fni8 = gen_cge0_i64,
259
- .fniv = gen_cge0_vec,
260
- .opt_opc = vecop_list_cmp,
261
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
262
- .vece = MO_64 },
263
-};
264
-
265
-static void gen_clt0_i32(TCGv_i32 d, TCGv_i32 a)
266
-{
267
- tcg_gen_setcondi_i32(TCG_COND_LT, d, a, 0);
268
- tcg_gen_neg_i32(d, d);
269
-}
270
-
271
-static void gen_clt0_i64(TCGv_i64 d, TCGv_i64 a)
272
-{
273
- tcg_gen_setcondi_i64(TCG_COND_LT, d, a, 0);
274
- tcg_gen_neg_i64(d, d);
275
-}
276
-
277
-static void gen_clt0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
278
-{
279
- TCGv_vec zero = tcg_const_zeros_vec_matching(d);
280
- tcg_gen_cmp_vec(TCG_COND_LT, vece, d, a, zero);
281
- tcg_temp_free_vec(zero);
282
-}
283
-
284
-const GVecGen2 clt0_op[4] = {
285
- { .fno = gen_helper_gvec_clt0_b,
286
- .fniv = gen_clt0_vec,
287
- .opt_opc = vecop_list_cmp,
288
- .vece = MO_8 },
289
- { .fno = gen_helper_gvec_clt0_h,
290
- .fniv = gen_clt0_vec,
291
- .opt_opc = vecop_list_cmp,
292
- .vece = MO_16 },
293
- { .fni4 = gen_clt0_i32,
294
- .fniv = gen_clt0_vec,
295
- .opt_opc = vecop_list_cmp,
296
- .vece = MO_32 },
297
- { .fni8 = gen_clt0_i64,
298
- .fniv = gen_clt0_vec,
299
- .opt_opc = vecop_list_cmp,
300
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
301
- .vece = MO_64 },
302
-};
303
-
304
-static void gen_cgt0_i32(TCGv_i32 d, TCGv_i32 a)
305
-{
306
- tcg_gen_setcondi_i32(TCG_COND_GT, d, a, 0);
307
- tcg_gen_neg_i32(d, d);
308
-}
309
-
310
-static void gen_cgt0_i64(TCGv_i64 d, TCGv_i64 a)
311
-{
312
- tcg_gen_setcondi_i64(TCG_COND_GT, d, a, 0);
313
- tcg_gen_neg_i64(d, d);
314
-}
315
-
316
-static void gen_cgt0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
317
-{
318
- TCGv_vec zero = tcg_const_zeros_vec_matching(d);
319
- tcg_gen_cmp_vec(TCG_COND_GT, vece, d, a, zero);
320
- tcg_temp_free_vec(zero);
321
-}
322
-
323
-const GVecGen2 cgt0_op[4] = {
324
- { .fno = gen_helper_gvec_cgt0_b,
325
- .fniv = gen_cgt0_vec,
326
- .opt_opc = vecop_list_cmp,
327
- .vece = MO_8 },
328
- { .fno = gen_helper_gvec_cgt0_h,
329
- .fniv = gen_cgt0_vec,
330
- .opt_opc = vecop_list_cmp,
331
- .vece = MO_16 },
332
- { .fni4 = gen_cgt0_i32,
333
- .fniv = gen_cgt0_vec,
334
- .opt_opc = vecop_list_cmp,
335
- .vece = MO_32 },
336
- { .fni8 = gen_cgt0_i64,
337
- .fniv = gen_cgt0_vec,
338
- .opt_opc = vecop_list_cmp,
339
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
340
- .vece = MO_64 },
341
-};
342
+#undef GEN_CMP0
343
344
static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
345
{
346
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
347
break;
348
349
case NEON_2RM_VCEQ0:
350
- tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
351
- vec_size, &ceq0_op[size]);
352
+ gen_gvec_ceq0(size, rd_ofs, rm_ofs, vec_size, vec_size);
353
break;
354
case NEON_2RM_VCGT0:
355
- tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
356
- vec_size, &cgt0_op[size]);
357
+ gen_gvec_cgt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
358
break;
359
case NEON_2RM_VCLE0:
360
- tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
361
- vec_size, &cle0_op[size]);
362
+ gen_gvec_cle0(size, rd_ofs, rm_ofs, vec_size, vec_size);
363
break;
364
case NEON_2RM_VCGE0:
365
- tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
366
- vec_size, &cge0_op[size]);
367
+ gen_gvec_cge0(size, rd_ofs, rm_ofs, vec_size, vec_size);
368
break;
369
case NEON_2RM_VCLT0:
370
- tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
371
- vec_size, &clt0_op[size]);
372
+ gen_gvec_clt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
373
break;
374
375
default:
376
--
50
--
377
2.20.1
51
2.25.1
378
379
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Provide a functional interface for the vector expansion.
3
Fold the return value setting into the goto, so each
4
This fits better with the existing set of helpers that
4
point of failure need not do both.
5
we provide for other operations.
6
5
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20200513163245.17915-8-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-37-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
10
---
12
target/arm/translate.h | 7 +-
11
linux-user/aarch64/signal.c | 26 +++++++++++---------------
13
target/arm/translate-a64.c | 4 +-
12
1 file changed, 11 insertions(+), 15 deletions(-)
14
target/arm/translate-neon.inc.c | 16 +----
15
target/arm/translate.c | 117 +++++++++++++++++---------------
16
4 files changed, 71 insertions(+), 73 deletions(-)
17
13
18
diff --git a/target/arm/translate.h b/target/arm/translate.h
14
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
19
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/translate.h
16
--- a/linux-user/aarch64/signal.c
21
+++ b/target/arm/translate.h
17
+++ b/linux-user/aarch64/signal.c
22
@@ -XXX,XX +XXX,XX @@ void gen_gvec_cle0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
18
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
23
void gen_gvec_cge0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
19
struct target_sve_context *sve = NULL;
24
uint32_t opr_sz, uint32_t max_sz);
20
uint64_t extra_datap = 0;
25
21
bool used_extra = false;
26
-extern const GVecGen3 mla_op[4];
22
- bool err = false;
27
-extern const GVecGen3 mls_op[4];
23
int vq = 0, sve_size = 0;
28
+void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
24
29
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
25
target_restore_general_frame(env, sf);
30
+void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
26
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
31
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
27
switch (magic) {
28
case 0:
29
if (size != 0) {
30
- err = true;
31
- goto exit;
32
+ goto err;
33
}
34
if (used_extra) {
35
ctx = NULL;
36
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
37
38
case TARGET_FPSIMD_MAGIC:
39
if (fpsimd || size != sizeof(struct target_fpsimd_context)) {
40
- err = true;
41
- goto exit;
42
+ goto err;
43
}
44
fpsimd = (struct target_fpsimd_context *)ctx;
45
break;
46
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
47
break;
48
}
49
}
50
- err = true;
51
- goto exit;
52
+ goto err;
53
54
case TARGET_EXTRA_MAGIC:
55
if (extra || size != sizeof(struct target_extra_context)) {
56
- err = true;
57
- goto exit;
58
+ goto err;
59
}
60
__get_user(extra_datap,
61
&((struct target_extra_context *)ctx)->datap);
62
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
63
/* Unknown record -- we certainly didn't generate it.
64
* Did we in fact get out of sync?
65
*/
66
- err = true;
67
- goto exit;
68
+ goto err;
69
}
70
ctx = (void *)ctx + size;
71
}
72
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
73
if (fpsimd) {
74
target_restore_fpsimd_record(env, fpsimd);
75
} else {
76
- err = true;
77
+ goto err;
78
}
79
80
/* SVE data, if present, overwrites FPSIMD data. */
81
if (sve) {
82
target_restore_sve_record(env, sve, vq);
83
}
84
-
85
- exit:
86
unlock_user(extra, extra_datap, 0);
87
- return err;
88
+ return 0;
32
+
89
+
33
extern const GVecGen3 cmtst_op[4];
90
+ err:
34
extern const GVecGen3 sshl_op[4];
91
+ unlock_user(extra, extra_datap, 0);
35
extern const GVecGen3 ushl_op[4];
92
+ return 1;
36
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/translate-a64.c
39
+++ b/target/arm/translate-a64.c
40
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
41
return;
42
case 0x12: /* MLA, MLS */
43
if (u) {
44
- gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
45
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size);
46
} else {
47
- gen_gvec_op3(s, is_q, rd, rn, rm, &mla_op[size]);
48
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
49
}
50
return;
51
case 0x11:
52
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/target/arm/translate-neon.inc.c
55
+++ b/target/arm/translate-neon.inc.c
56
@@ -XXX,XX +XXX,XX @@ DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
57
DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
58
DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
59
DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
60
+DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
61
+DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
62
63
#define DO_3SAME_CMP(INSN, COND) \
64
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
65
@@ -XXX,XX +XXX,XX @@ static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
66
return do_3same(s, a, gen_VMUL_p_3s);
67
}
93
}
68
94
69
-#define DO_3SAME_GVEC3_NO_SZ_3(INSN, OPARRAY) \
95
static abi_ulong get_sigframe(struct target_sigaction *ka,
70
- static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
71
- uint32_t rn_ofs, uint32_t rm_ofs, \
72
- uint32_t oprsz, uint32_t maxsz) \
73
- { \
74
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, \
75
- oprsz, maxsz, &OPARRAY[vece]); \
76
- } \
77
- DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
78
-
79
-
80
-DO_3SAME_GVEC3_NO_SZ_3(VMLA, mla_op)
81
-DO_3SAME_GVEC3_NO_SZ_3(VMLS, mls_op)
82
-
83
#define DO_3SAME_GVEC3_SHIFT(INSN, OPARRAY) \
84
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
85
uint32_t rn_ofs, uint32_t rm_ofs, \
86
diff --git a/target/arm/translate.c b/target/arm/translate.c
87
index XXXXXXX..XXXXXXX 100644
88
--- a/target/arm/translate.c
89
+++ b/target/arm/translate.c
90
@@ -XXX,XX +XXX,XX @@ static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
91
/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
92
* these tables are shared with AArch64 which does support them.
93
*/
94
+void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
95
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
96
+{
97
+ static const TCGOpcode vecop_list[] = {
98
+ INDEX_op_mul_vec, INDEX_op_add_vec, 0
99
+ };
100
+ static const GVecGen3 ops[4] = {
101
+ { .fni4 = gen_mla8_i32,
102
+ .fniv = gen_mla_vec,
103
+ .load_dest = true,
104
+ .opt_opc = vecop_list,
105
+ .vece = MO_8 },
106
+ { .fni4 = gen_mla16_i32,
107
+ .fniv = gen_mla_vec,
108
+ .load_dest = true,
109
+ .opt_opc = vecop_list,
110
+ .vece = MO_16 },
111
+ { .fni4 = gen_mla32_i32,
112
+ .fniv = gen_mla_vec,
113
+ .load_dest = true,
114
+ .opt_opc = vecop_list,
115
+ .vece = MO_32 },
116
+ { .fni8 = gen_mla64_i64,
117
+ .fniv = gen_mla_vec,
118
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
119
+ .load_dest = true,
120
+ .opt_opc = vecop_list,
121
+ .vece = MO_64 },
122
+ };
123
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
124
+}
125
126
-static const TCGOpcode vecop_list_mla[] = {
127
- INDEX_op_mul_vec, INDEX_op_add_vec, 0
128
-};
129
-
130
-static const TCGOpcode vecop_list_mls[] = {
131
- INDEX_op_mul_vec, INDEX_op_sub_vec, 0
132
-};
133
-
134
-const GVecGen3 mla_op[4] = {
135
- { .fni4 = gen_mla8_i32,
136
- .fniv = gen_mla_vec,
137
- .load_dest = true,
138
- .opt_opc = vecop_list_mla,
139
- .vece = MO_8 },
140
- { .fni4 = gen_mla16_i32,
141
- .fniv = gen_mla_vec,
142
- .load_dest = true,
143
- .opt_opc = vecop_list_mla,
144
- .vece = MO_16 },
145
- { .fni4 = gen_mla32_i32,
146
- .fniv = gen_mla_vec,
147
- .load_dest = true,
148
- .opt_opc = vecop_list_mla,
149
- .vece = MO_32 },
150
- { .fni8 = gen_mla64_i64,
151
- .fniv = gen_mla_vec,
152
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
153
- .load_dest = true,
154
- .opt_opc = vecop_list_mla,
155
- .vece = MO_64 },
156
-};
157
-
158
-const GVecGen3 mls_op[4] = {
159
- { .fni4 = gen_mls8_i32,
160
- .fniv = gen_mls_vec,
161
- .load_dest = true,
162
- .opt_opc = vecop_list_mls,
163
- .vece = MO_8 },
164
- { .fni4 = gen_mls16_i32,
165
- .fniv = gen_mls_vec,
166
- .load_dest = true,
167
- .opt_opc = vecop_list_mls,
168
- .vece = MO_16 },
169
- { .fni4 = gen_mls32_i32,
170
- .fniv = gen_mls_vec,
171
- .load_dest = true,
172
- .opt_opc = vecop_list_mls,
173
- .vece = MO_32 },
174
- { .fni8 = gen_mls64_i64,
175
- .fniv = gen_mls_vec,
176
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
177
- .load_dest = true,
178
- .opt_opc = vecop_list_mls,
179
- .vece = MO_64 },
180
-};
181
+void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
182
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
183
+{
184
+ static const TCGOpcode vecop_list[] = {
185
+ INDEX_op_mul_vec, INDEX_op_sub_vec, 0
186
+ };
187
+ static const GVecGen3 ops[4] = {
188
+ { .fni4 = gen_mls8_i32,
189
+ .fniv = gen_mls_vec,
190
+ .load_dest = true,
191
+ .opt_opc = vecop_list,
192
+ .vece = MO_8 },
193
+ { .fni4 = gen_mls16_i32,
194
+ .fniv = gen_mls_vec,
195
+ .load_dest = true,
196
+ .opt_opc = vecop_list,
197
+ .vece = MO_16 },
198
+ { .fni4 = gen_mls32_i32,
199
+ .fniv = gen_mls_vec,
200
+ .load_dest = true,
201
+ .opt_opc = vecop_list,
202
+ .vece = MO_32 },
203
+ { .fni8 = gen_mls64_i64,
204
+ .fniv = gen_mls_vec,
205
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
206
+ .load_dest = true,
207
+ .opt_opc = vecop_list,
208
+ .vece = MO_64 },
209
+ };
210
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
211
+}
212
213
/* CMTST : test is "if (X & Y != 0)". */
214
static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
215
--
96
--
216
2.20.1
97
2.25.1
217
218
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
In 1dc8425e551, while converting to gvec, I added an extra range check
3
In parse_user_sigframe, the kernel rejects duplicate sve records,
4
against the shift count. This was unnecessary because the encoding of
4
or records that are smaller than the header. We were silently
5
the shift count produces 0 to the element size - 1.
5
allowing these cases to pass, dropping the record.
6
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20200513163245.17915-5-richard.henderson@linaro.org
9
Message-id: 20220708151540.18136-38-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
11
---
12
target/arm/translate.c | 12 ++----------
12
linux-user/aarch64/signal.c | 5 ++++-
13
1 file changed, 2 insertions(+), 10 deletions(-)
13
1 file changed, 4 insertions(+), 1 deletion(-)
14
14
15
diff --git a/target/arm/translate.c b/target/arm/translate.c
15
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
16
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/translate.c
17
--- a/linux-user/aarch64/signal.c
18
+++ b/target/arm/translate.c
18
+++ b/linux-user/aarch64/signal.c
19
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
19
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
20
gen_gvec_sli(size, rd_ofs, rm_ofs, shift,
20
break;
21
vec_size, vec_size);
21
22
} else { /* VSHL */
22
case TARGET_SVE_MAGIC:
23
- /* Shifts larger than the element size are
23
+ if (sve || size < sizeof(struct target_sve_context)) {
24
- * architecturally valid and results in zero.
24
+ goto err;
25
- */
25
+ }
26
- if (shift >= 8 << size) {
26
if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
27
- tcg_gen_gvec_dup_imm(size, rd_ofs,
27
vq = sve_vq(env);
28
- vec_size, vec_size, 0);
28
sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
29
- } else {
29
- if (!sve && size == sve_size) {
30
- tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
30
+ if (size == sve_size) {
31
- vec_size, vec_size);
31
sve = (struct target_sve_context *)ctx;
32
- }
32
break;
33
+ tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
34
+ vec_size, vec_size);
35
}
36
return 0;
37
}
33
}
38
--
34
--
39
2.20.1
35
2.25.1
40
41
diff view generated by jsdifflib
1
Convert the Neon VADD, VSUB, VABD 3-reg-same insns to decodetree.
1
From: Richard Henderson <richard.henderson@linaro.org>
2
We already have gvec helpers for addition and subtraction, but must
3
add one for fabd.
4
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-39-richard.henderson@linaro.org
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20200512163904.10918-12-peter.maydell@linaro.org
8
---
7
---
9
target/arm/helper.h | 3 ++-
8
linux-user/aarch64/signal.c | 3 +++
10
target/arm/neon-dp.decode | 8 ++++++++
9
1 file changed, 3 insertions(+)
11
target/arm/neon_helper.c | 7 -------
12
target/arm/translate-neon.inc.c | 28 ++++++++++++++++++++++++++++
13
target/arm/translate.c | 10 +++-------
14
target/arm/vec_helper.c | 7 +++++++
15
6 files changed, 48 insertions(+), 15 deletions(-)
16
10
17
diff --git a/target/arm/helper.h b/target/arm/helper.h
11
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
18
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper.h
13
--- a/linux-user/aarch64/signal.c
20
+++ b/target/arm/helper.h
14
+++ b/linux-user/aarch64/signal.c
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(neon_qneg_s16, TCG_CALL_NO_RWG, i32, env, i32)
15
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
22
DEF_HELPER_FLAGS_2(neon_qneg_s32, TCG_CALL_NO_RWG, i32, env, i32)
16
__get_user(extra_size,
23
DEF_HELPER_FLAGS_2(neon_qneg_s64, TCG_CALL_NO_RWG, i64, env, i64)
17
&((struct target_extra_context *)ctx)->size);
24
18
extra = lock_user(VERIFY_READ, extra_datap, extra_size, 0);
25
-DEF_HELPER_3(neon_abd_f32, i32, i32, i32, ptr)
19
+ if (!extra) {
26
DEF_HELPER_3(neon_ceq_f32, i32, i32, i32, ptr)
20
+ return 1;
27
DEF_HELPER_3(neon_cge_f32, i32, i32, i32, ptr)
28
DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, ptr)
29
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
30
DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
31
DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
32
33
+DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
34
+
35
DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG,
36
void, ptr, ptr, ptr, ptr, i32)
37
DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG,
38
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
39
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/neon-dp.decode
41
+++ b/target/arm/neon-dp.decode
42
@@ -XXX,XX +XXX,XX @@
43
@3same_q0 .... ... . . . size:2 .... .... .... . 0 . . .... \
44
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0
45
46
+# For FP insns the high bit of 'size' is used as part of opcode decode
47
+@3same_fp .... ... . . . . size:1 .... .... .... . q:1 . . .... \
48
+ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp
49
+
50
VHADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 0 .... @3same
51
VHADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 0 .... @3same
52
VQADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 1 .... @3same
53
@@ -XXX,XX +XXX,XX @@ SHA256SU1_3s 1111 001 1 0 . 10 .... .... 1100 . 1 . 0 .... \
54
vm=%vm_dp vn=%vn_dp vd=%vd_dp
55
56
VQRDMLSH_3s 1111 001 1 0 . .. .... .... 1100 ... 1 .... @3same
57
+
58
+VADD_fp_3s 1111 001 0 0 . 0 . .... .... 1101 ... 0 .... @3same_fp
59
+VSUB_fp_3s 1111 001 0 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
60
+VABD_fp_3s 1111 001 1 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
61
diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c
62
index XXXXXXX..XXXXXXX 100644
63
--- a/target/arm/neon_helper.c
64
+++ b/target/arm/neon_helper.c
65
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(neon_qneg_s64)(CPUARMState *env, uint64_t x)
66
}
67
68
/* NEON Float helpers. */
69
-uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b, void *fpstp)
70
-{
71
- float_status *fpst = fpstp;
72
- float32 f0 = make_float32(a);
73
- float32 f1 = make_float32(b);
74
- return float32_val(float32_abs(float32_sub(f0, f1, fpst)));
75
-}
76
77
/* Floating point comparisons produce an integer result.
78
* Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do.
79
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
80
index XXXXXXX..XXXXXXX 100644
81
--- a/target/arm/translate-neon.inc.c
82
+++ b/target/arm/translate-neon.inc.c
83
@@ -XXX,XX +XXX,XX @@ DO_3SAME_PAIR(VPADD, padd_u)
84
85
DO_3SAME_VQDMULH(VQDMULH, qdmulh)
86
DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
87
+
88
+/*
89
+ * For all the functions using this macro, size == 1 means fp16,
90
+ * which is an architecture extension we don't implement yet.
91
+ */
92
+#define DO_3S_FP_GVEC(INSN,FUNC) \
93
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
94
+ uint32_t rn_ofs, uint32_t rm_ofs, \
95
+ uint32_t oprsz, uint32_t maxsz) \
96
+ { \
97
+ TCGv_ptr fpst = get_fpstatus_ptr(1); \
98
+ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst, \
99
+ oprsz, maxsz, 0, FUNC); \
100
+ tcg_temp_free_ptr(fpst); \
101
+ } \
102
+ static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
103
+ { \
104
+ if (a->size != 0) { \
105
+ /* TODO fp16 support */ \
106
+ return false; \
107
+ } \
108
+ return do_3same(s, a, gen_##INSN##_3s); \
109
+ }
110
+
111
+
112
+DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s)
113
+DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s)
114
+DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s)
115
diff --git a/target/arm/translate.c b/target/arm/translate.c
116
index XXXXXXX..XXXXXXX 100644
117
--- a/target/arm/translate.c
118
+++ b/target/arm/translate.c
119
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
120
switch (op) {
121
case NEON_3R_FLOAT_ARITH:
122
pairwise = (u && size < 2); /* if VPADD (float) */
123
+ if (!pairwise) {
124
+ return 1; /* handled by decodetree */
125
+ }
21
+ }
126
break;
22
break;
127
case NEON_3R_FLOAT_MINMAX:
23
128
pairwise = u; /* if VPMIN/VPMAX (float) */
24
default:
129
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
130
{
131
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
132
switch ((u << 2) | size) {
133
- case 0: /* VADD */
134
case 4: /* VPADD */
135
gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
136
break;
137
- case 2: /* VSUB */
138
- gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
139
- break;
140
- case 6: /* VABD */
141
- gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
142
- break;
143
default:
144
abort();
145
}
146
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
147
index XXXXXXX..XXXXXXX 100644
148
--- a/target/arm/vec_helper.c
149
+++ b/target/arm/vec_helper.c
150
@@ -XXX,XX +XXX,XX @@ static float64 float64_ftsmul(float64 op1, uint64_t op2, float_status *stat)
151
return result;
152
}
153
154
+static float32 float32_abd(float32 op1, float32 op2, float_status *stat)
155
+{
156
+ return float32_abs(float32_sub(op1, op2, stat));
157
+}
158
+
159
#define DO_3OP(NAME, FUNC, TYPE) \
160
void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
161
{ \
162
@@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_ftsmul_h, float16_ftsmul, float16)
163
DO_3OP(gvec_ftsmul_s, float32_ftsmul, float32)
164
DO_3OP(gvec_ftsmul_d, float64_ftsmul, float64)
165
166
+DO_3OP(gvec_fabd_s, float32_abd, float32)
167
+
168
#ifdef TARGET_AARCH64
169
170
DO_3OP(gvec_recps_h, helper_recpsf_f16, float16)
171
--
25
--
172
2.20.1
26
2.25.1
173
174
diff view generated by jsdifflib
1
Convert the Neon floating point VFMA and VFMS insn to decodetree.
1
From: Richard Henderson <richard.henderson@linaro.org>
2
These are the last insns in the 3-reg-same group so we can
3
remove all the support/loop code from the old decoder.
4
2
3
Move the checks out of the parsing loop and into the
4
restore function. This more closely mirrors the code
5
structure in the kernel, and is slightly clearer.
6
7
Reject rather than silently skip incorrect VL and SVE record sizes,
8
bringing our checks in to line with those the kernel does.
9
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20220708151540.18136-40-richard.henderson@linaro.org
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20200512163904.10918-18-peter.maydell@linaro.org
8
---
14
---
9
target/arm/neon-dp.decode | 3 +
15
linux-user/aarch64/signal.c | 51 +++++++++++++++++++++++++------------
10
target/arm/translate-neon.inc.c | 41 ++++++++
16
1 file changed, 35 insertions(+), 16 deletions(-)
11
target/arm/translate.c | 176 +-------------------------------
12
3 files changed, 46 insertions(+), 174 deletions(-)
13
17
14
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
18
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
15
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/neon-dp.decode
20
--- a/linux-user/aarch64/signal.c
17
+++ b/target/arm/neon-dp.decode
21
+++ b/linux-user/aarch64/signal.c
18
@@ -XXX,XX +XXX,XX @@ SHA256H2_3s 1111 001 1 0 . 01 .... .... 1100 . 1 . 0 .... \
22
@@ -XXX,XX +XXX,XX @@ static void target_restore_fpsimd_record(CPUARMState *env,
19
SHA256SU1_3s 1111 001 1 0 . 10 .... .... 1100 . 1 . 0 .... \
23
}
20
vm=%vm_dp vn=%vn_dp vd=%vd_dp
21
22
+VFMA_fp_3s 1111 001 0 0 . 0 . .... .... 1100 ... 1 .... @3same_fp
23
+VFMS_fp_3s 1111 001 0 0 . 1 . .... .... 1100 ... 1 .... @3same_fp
24
+
25
VQRDMLSH_3s 1111 001 1 0 . .. .... .... 1100 ... 1 .... @3same
26
27
VADD_fp_3s 1111 001 0 0 . 0 . .... .... 1101 ... 0 .... @3same_fp
28
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/translate-neon.inc.c
31
+++ b/target/arm/translate-neon.inc.c
32
@@ -XXX,XX +XXX,XX @@ static bool trans_VRSQRTS_fp_3s(DisasContext *s, arg_3same *a)
33
return do_3same(s, a, gen_VRSQRTS_fp_3s);
34
}
24
}
35
25
36
+static void gen_VFMA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
26
-static void target_restore_sve_record(CPUARMState *env,
37
+ TCGv_ptr fpstatus)
27
- struct target_sve_context *sve, int vq)
38
+{
28
+static bool target_restore_sve_record(CPUARMState *env,
39
+ gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus);
29
+ struct target_sve_context *sve,
40
+}
30
+ int size)
41
+
31
{
42
+static bool trans_VFMA_fp_3s(DisasContext *s, arg_3same *a)
32
- int i, j;
43
+{
33
+ int i, j, vl, vq;
44
+ if (!dc_isar_feature(aa32_simdfmac, s)) {
34
35
- /* Note that SVE regs are stored as a byte stream, with each byte element
36
+ if (!cpu_isar_feature(aa64_sve, env_archcpu(env))) {
45
+ return false;
37
+ return false;
46
+ }
38
+ }
47
+
39
+
48
+ if (a->size != 0) {
40
+ __get_user(vl, &sve->vl);
49
+ /* TODO fp16 support */
41
+ vq = sve_vq(env);
42
+
43
+ /* Reject mismatched VL. */
44
+ if (vl != vq * TARGET_SVE_VQ_BYTES) {
50
+ return false;
45
+ return false;
51
+ }
46
+ }
52
+
47
+
53
+ return do_3same_fp(s, a, gen_VFMA_fp_3s, true);
48
+ /* Accept empty record -- used to clear PSTATE.SM. */
54
+}
49
+ if (size <= sizeof(*sve)) {
50
+ return true;
51
+ }
55
+
52
+
56
+static void gen_VFMS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
53
+ /* Reject non-empty but incomplete record. */
57
+ TCGv_ptr fpstatus)
54
+ if (size < TARGET_SVE_SIG_CONTEXT_SIZE(vq)) {
58
+{
59
+ gen_helper_vfp_negs(vn, vn);
60
+ gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus);
61
+}
62
+
63
+static bool trans_VFMS_fp_3s(DisasContext *s, arg_3same *a)
64
+{
65
+ if (!dc_isar_feature(aa32_simdfmac, s)) {
66
+ return false;
55
+ return false;
67
+ }
56
+ }
68
+
57
+
69
+ if (a->size != 0) {
58
+ /*
70
+ /* TODO fp16 support */
59
+ * Note that SVE regs are stored as a byte stream, with each byte element
71
+ return false;
60
* at a subsequent address. This corresponds to a little-endian load
72
+ }
61
* of our 64-bit hunks.
73
+
62
*/
74
+ return do_3same_fp(s, a, gen_VFMS_fp_3s, true);
63
@@ -XXX,XX +XXX,XX @@ static void target_restore_sve_record(CPUARMState *env,
75
+}
64
}
76
+
65
}
77
static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
78
{
79
/* FP operations handled pairwise 32 bits at a time */
80
diff --git a/target/arm/translate.c b/target/arm/translate.c
81
index XXXXXXX..XXXXXXX 100644
82
--- a/target/arm/translate.c
83
+++ b/target/arm/translate.c
84
@@ -XXX,XX +XXX,XX @@ static void gen_neon_narrow_op(int op, int u, int size,
85
}
66
}
67
+ return true;
86
}
68
}
87
69
88
-/* Symbolic constants for op fields for Neon 3-register same-length.
70
static int target_restore_sigframe(CPUARMState *env,
89
- * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
71
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
90
- * table A7-9.
72
struct target_sve_context *sve = NULL;
91
- */
73
uint64_t extra_datap = 0;
92
-#define NEON_3R_VHADD 0
74
bool used_extra = false;
93
-#define NEON_3R_VQADD 1
75
- int vq = 0, sve_size = 0;
94
-#define NEON_3R_VRHADD 2
76
+ int sve_size = 0;
95
-#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
77
96
-#define NEON_3R_VHSUB 4
78
target_restore_general_frame(env, sf);
97
-#define NEON_3R_VQSUB 5
79
98
-#define NEON_3R_VCGT 6
80
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
99
-#define NEON_3R_VCGE 7
81
if (sve || size < sizeof(struct target_sve_context)) {
100
-#define NEON_3R_VSHL 8
82
goto err;
101
-#define NEON_3R_VQSHL 9
83
}
102
-#define NEON_3R_VRSHL 10
84
- if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
103
-#define NEON_3R_VQRSHL 11
85
- vq = sve_vq(env);
104
-#define NEON_3R_VMAX 12
86
- sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
105
-#define NEON_3R_VMIN 13
87
- if (size == sve_size) {
106
-#define NEON_3R_VABD 14
88
- sve = (struct target_sve_context *)ctx;
107
-#define NEON_3R_VABA 15
89
- break;
108
-#define NEON_3R_VADD_VSUB 16
109
-#define NEON_3R_VTST_VCEQ 17
110
-#define NEON_3R_VML 18 /* VMLA, VMLS */
111
-#define NEON_3R_VMUL 19
112
-#define NEON_3R_VPMAX 20
113
-#define NEON_3R_VPMIN 21
114
-#define NEON_3R_VQDMULH_VQRDMULH 22
115
-#define NEON_3R_VPADD_VQRDMLAH 23
116
-#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
117
-#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
118
-#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
119
-#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
120
-#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
121
-#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
122
-#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
123
-#define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
124
-
125
-static const uint8_t neon_3r_sizes[] = {
126
- [NEON_3R_VHADD] = 0x7,
127
- [NEON_3R_VQADD] = 0xf,
128
- [NEON_3R_VRHADD] = 0x7,
129
- [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
130
- [NEON_3R_VHSUB] = 0x7,
131
- [NEON_3R_VQSUB] = 0xf,
132
- [NEON_3R_VCGT] = 0x7,
133
- [NEON_3R_VCGE] = 0x7,
134
- [NEON_3R_VSHL] = 0xf,
135
- [NEON_3R_VQSHL] = 0xf,
136
- [NEON_3R_VRSHL] = 0xf,
137
- [NEON_3R_VQRSHL] = 0xf,
138
- [NEON_3R_VMAX] = 0x7,
139
- [NEON_3R_VMIN] = 0x7,
140
- [NEON_3R_VABD] = 0x7,
141
- [NEON_3R_VABA] = 0x7,
142
- [NEON_3R_VADD_VSUB] = 0xf,
143
- [NEON_3R_VTST_VCEQ] = 0x7,
144
- [NEON_3R_VML] = 0x7,
145
- [NEON_3R_VMUL] = 0x7,
146
- [NEON_3R_VPMAX] = 0x7,
147
- [NEON_3R_VPMIN] = 0x7,
148
- [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
149
- [NEON_3R_VPADD_VQRDMLAH] = 0x7,
150
- [NEON_3R_SHA] = 0xf, /* size field encodes op type */
151
- [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
152
- [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
153
- [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
154
- [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
155
- [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
156
- [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
157
- [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
158
-};
159
-
160
/* Symbolic constants for op fields for Neon 2-register miscellaneous.
161
* The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
162
* table A7-13.
163
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
164
rm_ofs = neon_reg_offset(rm, 0);
165
166
if ((insn & (1 << 23)) == 0) {
167
- /* Three register same length. */
168
- op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
169
- /* Catch invalid op and bad size combinations: UNDEF */
170
- if ((neon_3r_sizes[op] & (1 << size)) == 0) {
171
- return 1;
172
- }
173
- /* All insns of this form UNDEF for either this condition or the
174
- * superset of cases "Q==1"; we catch the latter later.
175
- */
176
- if (q && ((rd | rn | rm) & 1)) {
177
- return 1;
178
- }
179
- switch (op) {
180
- case NEON_3R_VFM_VQRDMLSH:
181
- if (!u) {
182
- /* VFM, VFMS */
183
- if (size == 1) {
184
- return 1;
185
- }
90
- }
186
- break;
187
- }
91
- }
188
- /* VQRDMLSH : handled by decodetree */
92
- goto err;
189
- return 1;
93
+ sve = (struct target_sve_context *)ctx;
190
-
94
+ sve_size = size;
191
- case NEON_3R_VADD_VSUB:
95
+ break;
192
- case NEON_3R_LOGIC:
96
193
- case NEON_3R_VMAX:
97
case TARGET_EXTRA_MAGIC:
194
- case NEON_3R_VMIN:
98
if (extra || size != sizeof(struct target_extra_context)) {
195
- case NEON_3R_VTST_VCEQ:
99
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
196
- case NEON_3R_VCGT:
100
}
197
- case NEON_3R_VCGE:
101
198
- case NEON_3R_VQADD:
102
/* SVE data, if present, overwrites FPSIMD data. */
199
- case NEON_3R_VQSUB:
103
- if (sve) {
200
- case NEON_3R_VMUL:
104
- target_restore_sve_record(env, sve, vq);
201
- case NEON_3R_VML:
105
+ if (sve && !target_restore_sve_record(env, sve, sve_size)) {
202
- case NEON_3R_VSHL:
106
+ goto err;
203
- case NEON_3R_SHA:
107
}
204
- case NEON_3R_VHADD:
108
unlock_user(extra, extra_datap, 0);
205
- case NEON_3R_VRHADD:
109
return 0;
206
- case NEON_3R_VHSUB:
207
- case NEON_3R_VABD:
208
- case NEON_3R_VABA:
209
- case NEON_3R_VQSHL:
210
- case NEON_3R_VRSHL:
211
- case NEON_3R_VQRSHL:
212
- case NEON_3R_VPMAX:
213
- case NEON_3R_VPMIN:
214
- case NEON_3R_VPADD_VQRDMLAH:
215
- case NEON_3R_VQDMULH_VQRDMULH:
216
- case NEON_3R_FLOAT_ARITH:
217
- case NEON_3R_FLOAT_MULTIPLY:
218
- case NEON_3R_FLOAT_CMP:
219
- case NEON_3R_FLOAT_ACMP:
220
- case NEON_3R_FLOAT_MINMAX:
221
- case NEON_3R_FLOAT_MISC:
222
- /* Already handled by decodetree */
223
- return 1;
224
- }
225
-
226
- if (size == 3) {
227
- /* 64-bit element instructions: handled by decodetree */
228
- return 1;
229
- }
230
- switch (op) {
231
- case NEON_3R_VFM_VQRDMLSH:
232
- if (!dc_isar_feature(aa32_simdfmac, s)) {
233
- return 1;
234
- }
235
- break;
236
- default:
237
- break;
238
- }
239
-
240
- for (pass = 0; pass < (q ? 4 : 2); pass++) {
241
-
242
- /* Elementwise. */
243
- tmp = neon_load_reg(rn, pass);
244
- tmp2 = neon_load_reg(rm, pass);
245
- switch (op) {
246
- case NEON_3R_VFM_VQRDMLSH:
247
- {
248
- /* VFMA, VFMS: fused multiply-add */
249
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
250
- TCGv_i32 tmp3 = neon_load_reg(rd, pass);
251
- if (size) {
252
- /* VFMS */
253
- gen_helper_vfp_negs(tmp, tmp);
254
- }
255
- gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
256
- tcg_temp_free_i32(tmp3);
257
- tcg_temp_free_ptr(fpstatus);
258
- break;
259
- }
260
- default:
261
- abort();
262
- }
263
- tcg_temp_free_i32(tmp2);
264
-
265
- neon_store_reg(rd, pass, tmp);
266
-
267
- } /* for pass */
268
- /* End of 3 register same size operations. */
269
+ /* Three register same length: handled by decodetree */
270
+ return 1;
271
} else if (insn & (1 << 4)) {
272
if ((insn & 0x00380080) != 0) {
273
/* Two registers and shift. */
274
--
110
--
275
2.20.1
111
2.25.1
276
277
diff view generated by jsdifflib
1
Convert the Neon integer VPMAX and VPMIN 3-reg-same insns to
1
From: Richard Henderson <richard.henderson@linaro.org>
2
decodetree. These are 'pairwise' operations.
3
2
3
Set the SM bit in the SVE record on signal delivery, create the ZA record.
4
Restore SM and ZA state according to the records present on return.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-41-richard.henderson@linaro.org
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20200512163904.10918-9-peter.maydell@linaro.org
7
---
10
---
8
target/arm/neon-dp.decode | 9 +++++
11
linux-user/aarch64/signal.c | 167 +++++++++++++++++++++++++++++++++---
9
target/arm/translate-neon.inc.c | 71 +++++++++++++++++++++++++++++++++
12
1 file changed, 154 insertions(+), 13 deletions(-)
10
target/arm/translate.c | 17 +-------
11
3 files changed, 82 insertions(+), 15 deletions(-)
12
13
13
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
14
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
14
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/neon-dp.decode
16
--- a/linux-user/aarch64/signal.c
16
+++ b/target/arm/neon-dp.decode
17
+++ b/linux-user/aarch64/signal.c
17
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ struct target_sve_context {
18
@3same .... ... . . . size:2 .... .... .... . q:1 . . .... \
19
19
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp
20
#define TARGET_SVE_SIG_FLAG_SM 1
20
21
21
+@3same_q0 .... ... . . . size:2 .... .... .... . 0 . . .... \
22
+#define TARGET_ZA_MAGIC 0x54366345
22
+ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0
23
+
23
+
24
+struct target_za_context {
24
VHADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 0 .... @3same
25
+ struct target_aarch64_ctx head;
25
VHADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 0 .... @3same
26
+ uint16_t vl;
26
VQADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 1 .... @3same
27
+ uint16_t reserved[3];
27
@@ -XXX,XX +XXX,XX @@ VMLS_3s 1111 001 1 0 . .. .... .... 1001 . . . 0 .... @3same
28
+ /* The actual ZA data immediately follows. */
28
VMUL_3s 1111 001 0 0 . .. .... .... 1001 . . . 1 .... @3same
29
+};
29
VMUL_p_3s 1111 001 1 0 . .. .... .... 1001 . . . 1 .... @3same
30
+
30
31
+#define TARGET_ZA_SIG_REGS_OFFSET \
31
+VPMAX_S_3s 1111 001 0 0 . .. .... .... 1010 . . . 0 .... @3same_q0
32
+ QEMU_ALIGN_UP(sizeof(struct target_za_context), TARGET_SVE_VQ_BYTES)
32
+VPMAX_U_3s 1111 001 1 0 . .. .... .... 1010 . . . 0 .... @3same_q0
33
+#define TARGET_ZA_SIG_ZAV_OFFSET(VQ, N) \
33
+
34
+ (TARGET_ZA_SIG_REGS_OFFSET + (VQ) * TARGET_SVE_VQ_BYTES * (N))
34
+VPMIN_S_3s 1111 001 0 0 . .. .... .... 1010 . . . 1 .... @3same_q0
35
+#define TARGET_ZA_SIG_CONTEXT_SIZE(VQ) \
35
+VPMIN_U_3s 1111 001 1 0 . .. .... .... 1010 . . . 1 .... @3same_q0
36
+ TARGET_ZA_SIG_ZAV_OFFSET(VQ, VQ * TARGET_SVE_VQ_BYTES)
36
+
37
+
37
VQRDMLAH_3s 1111 001 1 0 . .. .... .... 1011 ... 1 .... @3same
38
struct target_rt_sigframe {
38
39
struct target_siginfo info;
39
SHA1_3s 1111 001 0 0 . optype:2 .... .... 1100 . 1 . 0 .... \
40
struct target_ucontext uc;
40
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
41
@@ -XXX,XX +XXX,XX @@ static void target_setup_end_record(struct target_aarch64_ctx *end)
41
index XXXXXXX..XXXXXXX 100644
42
}
42
--- a/target/arm/translate-neon.inc.c
43
43
+++ b/target/arm/translate-neon.inc.c
44
static void target_setup_sve_record(struct target_sve_context *sve,
44
@@ -XXX,XX +XXX,XX @@ DO_3SAME_32_ENV(VQSHL_S, qshl_s)
45
- CPUARMState *env, int vq, int size)
45
DO_3SAME_32_ENV(VQSHL_U, qshl_u)
46
+ CPUARMState *env, int size)
46
DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
47
{
47
DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
48
- int i, j;
48
+
49
+ int i, j, vq = sve_vq(env);
49
+static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
50
51
memset(sve, 0, sizeof(*sve));
52
__put_user(TARGET_SVE_MAGIC, &sve->head.magic);
53
@@ -XXX,XX +XXX,XX @@ static void target_setup_sve_record(struct target_sve_context *sve,
54
}
55
}
56
57
+static void target_setup_za_record(struct target_za_context *za,
58
+ CPUARMState *env, int size)
50
+{
59
+{
51
+ /* Operations handled pairwise 32 bits at a time */
60
+ int vq = sme_vq(env);
52
+ TCGv_i32 tmp, tmp2, tmp3;
61
+ int vl = vq * TARGET_SVE_VQ_BYTES;
53
+
62
+ int i, j;
54
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
63
+
64
+ memset(za, 0, sizeof(*za));
65
+ __put_user(TARGET_ZA_MAGIC, &za->head.magic);
66
+ __put_user(size, &za->head.size);
67
+ __put_user(vl, &za->vl);
68
+
69
+ if (size == TARGET_ZA_SIG_CONTEXT_SIZE(0)) {
70
+ return;
71
+ }
72
+ assert(size == TARGET_ZA_SIG_CONTEXT_SIZE(vq));
73
+
74
+ /*
75
+ * Note that ZA vectors are stored as a byte stream,
76
+ * with each byte element at a subsequent address.
77
+ */
78
+ for (i = 0; i < vl; ++i) {
79
+ uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
80
+ for (j = 0; j < vq * 2; ++j) {
81
+ __put_user_e(env->zarray[i].d[j], z + j, le);
82
+ }
83
+ }
84
+}
85
+
86
static void target_restore_general_frame(CPUARMState *env,
87
struct target_rt_sigframe *sf)
88
{
89
@@ -XXX,XX +XXX,XX @@ static void target_restore_fpsimd_record(CPUARMState *env,
90
91
static bool target_restore_sve_record(CPUARMState *env,
92
struct target_sve_context *sve,
93
- int size)
94
+ int size, int *svcr)
95
{
96
- int i, j, vl, vq;
97
+ int i, j, vl, vq, flags;
98
+ bool sm;
99
100
- if (!cpu_isar_feature(aa64_sve, env_archcpu(env))) {
101
+ __get_user(vl, &sve->vl);
102
+ __get_user(flags, &sve->flags);
103
+
104
+ sm = flags & TARGET_SVE_SIG_FLAG_SM;
105
+
106
+ /* The cpu must support Streaming or Non-streaming SVE. */
107
+ if (sm
108
+ ? !cpu_isar_feature(aa64_sme, env_archcpu(env))
109
+ : !cpu_isar_feature(aa64_sve, env_archcpu(env))) {
110
return false;
111
}
112
113
- __get_user(vl, &sve->vl);
114
- vq = sve_vq(env);
115
+ /*
116
+ * Note that we cannot use sve_vq() because that depends on the
117
+ * current setting of PSTATE.SM, not the state to be restored.
118
+ */
119
+ vq = sve_vqm1_for_el_sm(env, 0, sm) + 1;
120
121
/* Reject mismatched VL. */
122
if (vl != vq * TARGET_SVE_VQ_BYTES) {
123
@@ -XXX,XX +XXX,XX @@ static bool target_restore_sve_record(CPUARMState *env,
124
return false;
125
}
126
127
+ *svcr = FIELD_DP64(*svcr, SVCR, SM, sm);
128
+
129
/*
130
* Note that SVE regs are stored as a byte stream, with each byte element
131
* at a subsequent address. This corresponds to a little-endian load
132
@@ -XXX,XX +XXX,XX @@ static bool target_restore_sve_record(CPUARMState *env,
133
return true;
134
}
135
136
+static bool target_restore_za_record(CPUARMState *env,
137
+ struct target_za_context *za,
138
+ int size, int *svcr)
139
+{
140
+ int i, j, vl, vq;
141
+
142
+ if (!cpu_isar_feature(aa64_sme, env_archcpu(env))) {
55
+ return false;
143
+ return false;
56
+ }
144
+ }
57
+
145
+
58
+ /* UNDEF accesses to D16-D31 if they don't exist. */
146
+ __get_user(vl, &za->vl);
59
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
147
+ vq = sme_vq(env);
60
+ ((a->vd | a->vn | a->vm) & 0x10)) {
148
+
149
+ /* Reject mismatched VL. */
150
+ if (vl != vq * TARGET_SVE_VQ_BYTES) {
61
+ return false;
151
+ return false;
62
+ }
152
+ }
63
+
153
+
64
+ if (a->size == 3) {
154
+ /* Accept empty record -- used to clear PSTATE.ZA. */
155
+ if (size <= TARGET_ZA_SIG_CONTEXT_SIZE(0)) {
156
+ return true;
157
+ }
158
+
159
+ /* Reject non-empty but incomplete record. */
160
+ if (size < TARGET_ZA_SIG_CONTEXT_SIZE(vq)) {
65
+ return false;
161
+ return false;
66
+ }
162
+ }
67
+
163
+
68
+ if (!vfp_access_check(s)) {
164
+ *svcr = FIELD_DP64(*svcr, SVCR, ZA, 1);
69
+ return true;
165
+
70
+ }
166
+ for (i = 0; i < vl; ++i) {
71
+
167
+ uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
72
+ assert(a->q == 0); /* enforced by decode patterns */
168
+ for (j = 0; j < vq * 2; ++j) {
73
+
169
+ __get_user_e(env->zarray[i].d[j], z + j, le);
74
+ /*
170
+ }
75
+ * Note that we have to be careful not to clobber the source operands
171
+ }
76
+ * in the "vm == vd" case by storing the result of the first pass too
77
+ * early. Since Q is 0 there are always just two passes, so instead
78
+ * of a complicated loop over each pass we just unroll.
79
+ */
80
+ tmp = neon_load_reg(a->vn, 0);
81
+ tmp2 = neon_load_reg(a->vn, 1);
82
+ fn(tmp, tmp, tmp2);
83
+ tcg_temp_free_i32(tmp2);
84
+
85
+ tmp3 = neon_load_reg(a->vm, 0);
86
+ tmp2 = neon_load_reg(a->vm, 1);
87
+ fn(tmp3, tmp3, tmp2);
88
+ tcg_temp_free_i32(tmp2);
89
+
90
+ neon_store_reg(a->vd, 0, tmp);
91
+ neon_store_reg(a->vd, 1, tmp3);
92
+ return true;
172
+ return true;
93
+}
173
+}
94
+
174
+
95
+#define DO_3SAME_PAIR(INSN, func) \
175
static int target_restore_sigframe(CPUARMState *env,
96
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
176
struct target_rt_sigframe *sf)
97
+ { \
177
{
98
+ static NeonGenTwoOpFn * const fns[] = { \
178
struct target_aarch64_ctx *ctx, *extra = NULL;
99
+ gen_helper_neon_##func##8, \
179
struct target_fpsimd_context *fpsimd = NULL;
100
+ gen_helper_neon_##func##16, \
180
struct target_sve_context *sve = NULL;
101
+ gen_helper_neon_##func##32, \
181
+ struct target_za_context *za = NULL;
102
+ }; \
182
uint64_t extra_datap = 0;
103
+ if (a->size > 2) { \
183
bool used_extra = false;
104
+ return false; \
184
int sve_size = 0;
105
+ } \
185
+ int za_size = 0;
106
+ return do_3same_pair(s, a, fns[a->size]); \
186
+ int svcr = 0;
107
+ }
187
108
+
188
target_restore_general_frame(env, sf);
109
+/* 32-bit pairwise ops end up the same as the elementwise versions. */
189
110
+#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
190
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
111
+#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
191
sve_size = size;
112
+#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
113
+#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
114
+
115
+DO_3SAME_PAIR(VPMAX_S, pmax_s)
116
+DO_3SAME_PAIR(VPMIN_S, pmin_s)
117
+DO_3SAME_PAIR(VPMAX_U, pmax_u)
118
+DO_3SAME_PAIR(VPMIN_U, pmin_u)
119
diff --git a/target/arm/translate.c b/target/arm/translate.c
120
index XXXXXXX..XXXXXXX 100644
121
--- a/target/arm/translate.c
122
+++ b/target/arm/translate.c
123
@@ -XXX,XX +XXX,XX @@ static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
124
}
125
}
126
127
-/* 32-bit pairwise ops end up the same as the elementwise versions. */
128
-#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
129
-#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
130
-#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
131
-#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
132
-
133
#define GEN_NEON_INTEGER_OP_ENV(name) do { \
134
switch ((size << 1) | u) { \
135
case 0: \
136
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
137
case NEON_3R_VQSHL:
138
case NEON_3R_VRSHL:
139
case NEON_3R_VQRSHL:
140
+ case NEON_3R_VPMAX:
141
+ case NEON_3R_VPMIN:
142
/* Already handled by decodetree */
143
return 1;
144
}
145
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
146
pairwise = 0;
147
switch (op) {
148
case NEON_3R_VPADD_VQRDMLAH:
149
- case NEON_3R_VPMAX:
150
- case NEON_3R_VPMIN:
151
pairwise = 1;
152
break;
192
break;
153
case NEON_3R_FLOAT_ARITH:
193
154
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
194
+ case TARGET_ZA_MAGIC:
155
tmp2 = neon_load_reg(rm, pass);
195
+ if (za || size < sizeof(struct target_za_context)) {
156
}
196
+ goto err;
157
switch (op) {
197
+ }
158
- break;
198
+ za = (struct target_za_context *)ctx;
159
- case NEON_3R_VPMAX:
199
+ za_size = size;
160
- GEN_NEON_INTEGER_OP(pmax);
200
+ break;
161
- break;
201
+
162
- case NEON_3R_VPMIN:
202
case TARGET_EXTRA_MAGIC:
163
- GEN_NEON_INTEGER_OP(pmin);
203
if (extra || size != sizeof(struct target_extra_context)) {
164
- break;
204
goto err;
165
case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */
205
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
166
if (!u) { /* VQDMULH */
206
}
167
switch (size) {
207
208
/* SVE data, if present, overwrites FPSIMD data. */
209
- if (sve && !target_restore_sve_record(env, sve, sve_size)) {
210
+ if (sve && !target_restore_sve_record(env, sve, sve_size, &svcr)) {
211
goto err;
212
}
213
+ if (za && !target_restore_za_record(env, za, za_size, &svcr)) {
214
+ goto err;
215
+ }
216
+ if (env->svcr != svcr) {
217
+ env->svcr = svcr;
218
+ arm_rebuild_hflags(env);
219
+ }
220
unlock_user(extra, extra_datap, 0);
221
return 0;
222
223
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
224
.total_size = offsetof(struct target_rt_sigframe,
225
uc.tuc_mcontext.__reserved),
226
};
227
- int fpsimd_ofs, fr_ofs, sve_ofs = 0, vq = 0, sve_size = 0;
228
+ int fpsimd_ofs, fr_ofs, sve_ofs = 0, za_ofs = 0;
229
+ int sve_size = 0, za_size = 0;
230
struct target_rt_sigframe *frame;
231
struct target_rt_frame_record *fr;
232
abi_ulong frame_addr, return_addr;
233
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
234
&layout);
235
236
/* SVE state needs saving only if it exists. */
237
- if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
238
- vq = sve_vq(env);
239
- sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
240
+ if (cpu_isar_feature(aa64_sve, env_archcpu(env)) ||
241
+ cpu_isar_feature(aa64_sme, env_archcpu(env))) {
242
+ sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(sve_vq(env)), 16);
243
sve_ofs = alloc_sigframe_space(sve_size, &layout);
244
}
245
+ if (cpu_isar_feature(aa64_sme, env_archcpu(env))) {
246
+ /* ZA state needs saving only if it is enabled. */
247
+ if (FIELD_EX64(env->svcr, SVCR, ZA)) {
248
+ za_size = TARGET_ZA_SIG_CONTEXT_SIZE(sme_vq(env));
249
+ } else {
250
+ za_size = TARGET_ZA_SIG_CONTEXT_SIZE(0);
251
+ }
252
+ za_ofs = alloc_sigframe_space(za_size, &layout);
253
+ }
254
255
if (layout.extra_ofs) {
256
/* Reserve space for the extra end marker. The standard end marker
257
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
258
target_setup_end_record((void *)frame + layout.extra_end_ofs);
259
}
260
if (sve_ofs) {
261
- target_setup_sve_record((void *)frame + sve_ofs, env, vq, sve_size);
262
+ target_setup_sve_record((void *)frame + sve_ofs, env, sve_size);
263
+ }
264
+ if (za_ofs) {
265
+ target_setup_za_record((void *)frame + za_ofs, env, za_size);
266
}
267
268
/* Set up the stack frame for unwinding. */
269
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
270
env->btype = 2;
271
}
272
273
+ /*
274
+ * Invoke the signal handler with both SM and ZA disabled.
275
+ * When clearing SM, ResetSVEState, per SMSTOP.
276
+ */
277
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
278
+ arm_reset_sve_state(env);
279
+ }
280
+ if (env->svcr) {
281
+ env->svcr = 0;
282
+ arm_rebuild_hflags(env);
283
+ }
284
+
285
if (info) {
286
tswap_siginfo(&frame->info, info);
287
env->xregs[1] = frame_addr + offsetof(struct target_rt_sigframe, info);
168
--
288
--
169
2.20.1
289
2.25.1
170
171
diff view generated by jsdifflib
1
Convert the Neon VQRDMLAH and VQRDMLSH insns in the 3-reg-same group
1
From: Richard Henderson <richard.henderson@linaro.org>
2
to decodetree. These don't use do_3same() because they want to
3
operate on VFP double registers, whose offsets are different from the
4
neon_reg_offset() calculations do_3same does.
5
2
3
Add "sve" to the sve prctl functions, to distinguish
4
them from the coming "sme" prctls with similar names.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-42-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20200512163904.10918-2-peter.maydell@linaro.org
9
---
10
---
10
target/arm/neon-dp.decode | 3 +++
11
linux-user/aarch64/target_prctl.h | 8 ++++----
11
target/arm/translate-neon.inc.c | 15 +++++++++++++++
12
linux-user/syscall.c | 12 ++++++------
12
target/arm/translate.c | 14 ++------------
13
2 files changed, 10 insertions(+), 10 deletions(-)
13
3 files changed, 20 insertions(+), 12 deletions(-)
14
14
15
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h
16
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/neon-dp.decode
17
--- a/linux-user/aarch64/target_prctl.h
18
+++ b/target/arm/neon-dp.decode
18
+++ b/linux-user/aarch64/target_prctl.h
19
@@ -XXX,XX +XXX,XX @@ VMLS_3s 1111 001 1 0 . .. .... .... 1001 . . . 0 .... @3same
19
@@ -XXX,XX +XXX,XX @@
20
20
#ifndef AARCH64_TARGET_PRCTL_H
21
VMUL_3s 1111 001 0 0 . .. .... .... 1001 . . . 1 .... @3same
21
#define AARCH64_TARGET_PRCTL_H
22
VMUL_p_3s 1111 001 1 0 . .. .... .... 1001 . . . 1 .... @3same
22
23
+
23
-static abi_long do_prctl_get_vl(CPUArchState *env)
24
+VQRDMLAH_3s 1111 001 1 0 . .. .... .... 1011 ... 1 .... @3same
24
+static abi_long do_prctl_sve_get_vl(CPUArchState *env)
25
+VQRDMLSH_3s 1111 001 1 0 . .. .... .... 1100 ... 1 .... @3same
25
{
26
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
26
ARMCPU *cpu = env_archcpu(env);
27
if (cpu_isar_feature(aa64_sve, cpu)) {
28
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_get_vl(CPUArchState *env)
29
}
30
return -TARGET_EINVAL;
31
}
32
-#define do_prctl_get_vl do_prctl_get_vl
33
+#define do_prctl_sve_get_vl do_prctl_sve_get_vl
34
35
-static abi_long do_prctl_set_vl(CPUArchState *env, abi_long arg2)
36
+static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
37
{
38
/*
39
* We cannot support either PR_SVE_SET_VL_ONEXEC or PR_SVE_VL_INHERIT.
40
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_set_vl(CPUArchState *env, abi_long arg2)
41
}
42
return -TARGET_EINVAL;
43
}
44
-#define do_prctl_set_vl do_prctl_set_vl
45
+#define do_prctl_sve_set_vl do_prctl_sve_set_vl
46
47
static abi_long do_prctl_reset_keys(CPUArchState *env, abi_long arg2)
48
{
49
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
27
index XXXXXXX..XXXXXXX 100644
50
index XXXXXXX..XXXXXXX 100644
28
--- a/target/arm/translate-neon.inc.c
51
--- a/linux-user/syscall.c
29
+++ b/target/arm/translate-neon.inc.c
52
+++ b/linux-user/syscall.c
30
@@ -XXX,XX +XXX,XX @@ static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
53
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2)
31
}
54
#ifndef do_prctl_set_fp_mode
32
return do_3same(s, a, gen_VMUL_p_3s);
55
#define do_prctl_set_fp_mode do_prctl_inval1
33
}
56
#endif
34
+
57
-#ifndef do_prctl_get_vl
35
+#define DO_VQRDMLAH(INSN, FUNC) \
58
-#define do_prctl_get_vl do_prctl_inval0
36
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
59
+#ifndef do_prctl_sve_get_vl
37
+ { \
60
+#define do_prctl_sve_get_vl do_prctl_inval0
38
+ if (!dc_isar_feature(aa32_rdm, s)) { \
61
#endif
39
+ return false; \
62
-#ifndef do_prctl_set_vl
40
+ } \
63
-#define do_prctl_set_vl do_prctl_inval1
41
+ if (a->size != 1 && a->size != 2) { \
64
+#ifndef do_prctl_sve_set_vl
42
+ return false; \
65
+#define do_prctl_sve_set_vl do_prctl_inval1
43
+ } \
66
#endif
44
+ return do_3same(s, a, FUNC); \
67
#ifndef do_prctl_reset_keys
45
+ }
68
#define do_prctl_reset_keys do_prctl_inval1
46
+
69
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
47
+DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
70
case PR_SET_FP_MODE:
48
+DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
71
return do_prctl_set_fp_mode(env, arg2);
49
diff --git a/target/arm/translate.c b/target/arm/translate.c
72
case PR_SVE_GET_VL:
50
index XXXXXXX..XXXXXXX 100644
73
- return do_prctl_get_vl(env);
51
--- a/target/arm/translate.c
74
+ return do_prctl_sve_get_vl(env);
52
+++ b/target/arm/translate.c
75
case PR_SVE_SET_VL:
53
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
76
- return do_prctl_set_vl(env, arg2);
54
if (!u) {
77
+ return do_prctl_sve_set_vl(env, arg2);
55
break; /* VPADD */
78
case PR_PAC_RESET_KEYS:
56
}
79
if (arg3 || arg4 || arg5) {
57
- /* VQRDMLAH */
80
return -TARGET_EINVAL;
58
- if (dc_isar_feature(aa32_rdm, s) && (size == 1 || size == 2)) {
59
- gen_gvec_sqrdmlah_qc(size, rd_ofs, rn_ofs, rm_ofs,
60
- vec_size, vec_size);
61
- return 0;
62
- }
63
+ /* VQRDMLAH : handled by decodetree */
64
return 1;
65
66
case NEON_3R_VFM_VQRDMLSH:
67
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
68
}
69
break;
70
}
71
- /* VQRDMLSH */
72
- if (dc_isar_feature(aa32_rdm, s) && (size == 1 || size == 2)) {
73
- gen_gvec_sqrdmlsh_qc(size, rd_ofs, rn_ofs, rm_ofs,
74
- vec_size, vec_size);
75
- return 0;
76
- }
77
+ /* VQRDMLSH : handled by decodetree */
78
return 1;
79
80
case NEON_3R_VABD:
81
--
81
--
82
2.20.1
82
2.25.1
83
84
diff view generated by jsdifflib
1
GDB's remote protocol requires M-profile cores to use the feature
1
From: Richard Henderson <richard.henderson@linaro.org>
2
name 'org.gnu.gdb.arm.m-profile' instead of the 'org.gnu.gdb.arm.core'
3
feature used for A- and R-profile cores. We weren't doing this, which
4
meant GDB treated our M-profile cores like A-profile ones. This mostly
5
doesn't matter, but for instance means that it doesn't correctly
6
handle backtraces where an M-profile exception frame is involved.
7
2
8
Ship a copy of GDB's arm-m-profile.xml and use it on the M-profile
3
These prctl set the Streaming SVE vector length, which may
9
cores. The integer registers have the same offsets as the
4
be completely different from the Normal SVE vector length.
10
arm-core.xml, but register 25 is the M-profile XPSR rather than the
11
A-profile CPSR, so we need to update arm_cpu_gdb_read_register() and
12
arm_cpu_gdb_write_register() to handle XSPR reads and writes.
13
5
14
Fixes: https://bugs.launchpad.net/qemu/+bug/1877136
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-43-richard.henderson@linaro.org
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
17
Message-id: 20200507134755.13997-1-peter.maydell@linaro.org
18
---
10
---
19
configure | 4 ++--
11
linux-user/aarch64/target_prctl.h | 54 +++++++++++++++++++++++++++++++
20
target/arm/cpu_tcg.c | 1 +
12
linux-user/syscall.c | 16 +++++++++
21
target/arm/gdbstub.c | 22 ++++++++++++++++++----
13
2 files changed, 70 insertions(+)
22
gdb-xml/arm-m-profile.xml | 27 +++++++++++++++++++++++++++
23
4 files changed, 48 insertions(+), 6 deletions(-)
24
create mode 100644 gdb-xml/arm-m-profile.xml
25
14
26
diff --git a/configure b/configure
15
diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h
27
index XXXXXXX..XXXXXXX 100755
28
--- a/configure
29
+++ b/configure
30
@@ -XXX,XX +XXX,XX @@ case "$target_name" in
31
TARGET_SYSTBL_ABI=common,oabi
32
bflt="yes"
33
mttcg="yes"
34
- gdb_xml_files="arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
35
+ gdb_xml_files="arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml arm-m-profile.xml"
36
;;
37
aarch64|aarch64_be)
38
TARGET_ARCH=aarch64
39
TARGET_BASE_ARCH=arm
40
bflt="yes"
41
mttcg="yes"
42
- gdb_xml_files="aarch64-core.xml aarch64-fpu.xml arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
43
+ gdb_xml_files="aarch64-core.xml aarch64-fpu.xml arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml arm-m-profile.xml"
44
;;
45
cris)
46
;;
47
diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c
48
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/cpu_tcg.c
17
--- a/linux-user/aarch64/target_prctl.h
50
+++ b/target/arm/cpu_tcg.c
18
+++ b/linux-user/aarch64/target_prctl.h
51
@@ -XXX,XX +XXX,XX @@ static void arm_v7m_class_init(ObjectClass *oc, void *data)
19
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_get_vl(CPUArchState *env)
20
{
21
ARMCPU *cpu = env_archcpu(env);
22
if (cpu_isar_feature(aa64_sve, cpu)) {
23
+ /* PSTATE.SM is always unset on syscall entry. */
24
return sve_vq(env) * 16;
25
}
26
return -TARGET_EINVAL;
27
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
28
&& arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) {
29
uint32_t vq, old_vq;
30
31
+ /* PSTATE.SM is always unset on syscall entry. */
32
old_vq = sve_vq(env);
33
34
/*
35
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
36
}
37
#define do_prctl_sve_set_vl do_prctl_sve_set_vl
38
39
+static abi_long do_prctl_sme_get_vl(CPUArchState *env)
40
+{
41
+ ARMCPU *cpu = env_archcpu(env);
42
+ if (cpu_isar_feature(aa64_sme, cpu)) {
43
+ return sme_vq(env) * 16;
44
+ }
45
+ return -TARGET_EINVAL;
46
+}
47
+#define do_prctl_sme_get_vl do_prctl_sme_get_vl
48
+
49
+static abi_long do_prctl_sme_set_vl(CPUArchState *env, abi_long arg2)
50
+{
51
+ /*
52
+ * We cannot support either PR_SME_SET_VL_ONEXEC or PR_SME_VL_INHERIT.
53
+ * Note the kernel definition of sve_vl_valid allows for VQ=512,
54
+ * i.e. VL=8192, even though the architectural maximum is VQ=16.
55
+ */
56
+ if (cpu_isar_feature(aa64_sme, env_archcpu(env))
57
+ && arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) {
58
+ int vq, old_vq;
59
+
60
+ old_vq = sme_vq(env);
61
+
62
+ /*
63
+ * Bound the value of vq, so that we know that it fits into
64
+ * the 4-bit field in SMCR_EL1. Because PSTATE.SM is cleared
65
+ * on syscall entry, we are not modifying the current SVE
66
+ * vector length.
67
+ */
68
+ vq = MAX(arg2 / 16, 1);
69
+ vq = MIN(vq, 16);
70
+ env->vfp.smcr_el[1] =
71
+ FIELD_DP64(env->vfp.smcr_el[1], SMCR, LEN, vq - 1);
72
+
73
+ /* Delay rebuilding hflags until we know if ZA must change. */
74
+ vq = sve_vqm1_for_el_sm(env, 0, true) + 1;
75
+
76
+ if (vq != old_vq) {
77
+ /*
78
+ * PSTATE.ZA state is cleared on any change to SVL.
79
+ * We need not call arm_rebuild_hflags because PSTATE.SM was
80
+ * cleared on syscall entry, so this hasn't changed VL.
81
+ */
82
+ env->svcr = FIELD_DP64(env->svcr, SVCR, ZA, 0);
83
+ arm_rebuild_hflags(env);
84
+ }
85
+ return vq * 16;
86
+ }
87
+ return -TARGET_EINVAL;
88
+}
89
+#define do_prctl_sme_set_vl do_prctl_sme_set_vl
90
+
91
static abi_long do_prctl_reset_keys(CPUArchState *env, abi_long arg2)
92
{
93
ARMCPU *cpu = env_archcpu(env);
94
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
95
index XXXXXXX..XXXXXXX 100644
96
--- a/linux-user/syscall.c
97
+++ b/linux-user/syscall.c
98
@@ -XXX,XX +XXX,XX @@ abi_long do_arch_prctl(CPUX86State *env, int code, abi_ulong addr)
99
#ifndef PR_SET_SYSCALL_USER_DISPATCH
100
# define PR_SET_SYSCALL_USER_DISPATCH 59
52
#endif
101
#endif
53
102
+#ifndef PR_SME_SET_VL
54
cc->cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt;
103
+# define PR_SME_SET_VL 63
55
+ cc->gdb_core_xml_file = "arm-m-profile.xml";
104
+# define PR_SME_GET_VL 64
56
}
105
+# define PR_SME_VL_LEN_MASK 0xffff
57
106
+# define PR_SME_VL_INHERIT (1 << 17)
58
static const ARMCPUInfo arm_tcg_cpus[] = {
107
+#endif
59
diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
108
60
index XXXXXXX..XXXXXXX 100644
109
#include "target_prctl.h"
61
--- a/target/arm/gdbstub.c
110
62
+++ b/target/arm/gdbstub.c
111
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2)
63
@@ -XXX,XX +XXX,XX @@ int arm_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
112
#ifndef do_prctl_set_unalign
64
}
113
#define do_prctl_set_unalign do_prctl_inval1
65
return gdb_get_reg32(mem_buf, 0);
114
#endif
66
case 25:
115
+#ifndef do_prctl_sme_get_vl
67
- /* CPSR */
116
+#define do_prctl_sme_get_vl do_prctl_inval0
68
- return gdb_get_reg32(mem_buf, cpsr_read(env));
117
+#endif
69
+ /* CPSR, or XPSR for M-profile */
118
+#ifndef do_prctl_sme_set_vl
70
+ if (arm_feature(env, ARM_FEATURE_M)) {
119
+#define do_prctl_sme_set_vl do_prctl_inval1
71
+ return gdb_get_reg32(mem_buf, xpsr_read(env));
120
+#endif
72
+ } else {
121
73
+ return gdb_get_reg32(mem_buf, cpsr_read(env));
122
static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
74
+ }
123
abi_long arg3, abi_long arg4, abi_long arg5)
75
}
124
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
76
/* Unknown register. */
125
return do_prctl_sve_get_vl(env);
77
return 0;
126
case PR_SVE_SET_VL:
78
@@ -XXX,XX +XXX,XX @@ int arm_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
127
return do_prctl_sve_set_vl(env, arg2);
79
}
128
+ case PR_SME_GET_VL:
80
return 4;
129
+ return do_prctl_sme_get_vl(env);
81
case 25:
130
+ case PR_SME_SET_VL:
82
- /* CPSR */
131
+ return do_prctl_sme_set_vl(env, arg2);
83
- cpsr_write(env, tmp, 0xffffffff, CPSRWriteByGDBStub);
132
case PR_PAC_RESET_KEYS:
84
+ /* CPSR, or XPSR for M-profile */
133
if (arg3 || arg4 || arg5) {
85
+ if (arm_feature(env, ARM_FEATURE_M)) {
134
return -TARGET_EINVAL;
86
+ /*
87
+ * Don't allow writing to XPSR.Exception as it can cause
88
+ * a transition into or out of handler mode (it's not
89
+ * writeable via the MSR insn so this is a reasonable
90
+ * restriction). Other fields are safe to update.
91
+ */
92
+ xpsr_write(env, tmp, ~XPSR_EXCP);
93
+ } else {
94
+ cpsr_write(env, tmp, 0xffffffff, CPSRWriteByGDBStub);
95
+ }
96
return 4;
97
}
98
/* Unknown register. */
99
diff --git a/gdb-xml/arm-m-profile.xml b/gdb-xml/arm-m-profile.xml
100
new file mode 100644
101
index XXXXXXX..XXXXXXX
102
--- /dev/null
103
+++ b/gdb-xml/arm-m-profile.xml
104
@@ -XXX,XX +XXX,XX @@
105
+<?xml version="1.0"?>
106
+<!-- Copyright (C) 2010-2020 Free Software Foundation, Inc.
107
+
108
+ Copying and distribution of this file, with or without modification,
109
+ are permitted in any medium without royalty provided the copyright
110
+ notice and this notice are preserved. -->
111
+
112
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
113
+<feature name="org.gnu.gdb.arm.m-profile">
114
+ <reg name="r0" bitsize="32"/>
115
+ <reg name="r1" bitsize="32"/>
116
+ <reg name="r2" bitsize="32"/>
117
+ <reg name="r3" bitsize="32"/>
118
+ <reg name="r4" bitsize="32"/>
119
+ <reg name="r5" bitsize="32"/>
120
+ <reg name="r6" bitsize="32"/>
121
+ <reg name="r7" bitsize="32"/>
122
+ <reg name="r8" bitsize="32"/>
123
+ <reg name="r9" bitsize="32"/>
124
+ <reg name="r10" bitsize="32"/>
125
+ <reg name="r11" bitsize="32"/>
126
+ <reg name="r12" bitsize="32"/>
127
+ <reg name="sp" bitsize="32" type="data_ptr"/>
128
+ <reg name="lr" bitsize="32"/>
129
+ <reg name="pc" bitsize="32" type="code_ptr"/>
130
+ <reg name="xpsr" bitsize="32" regnum="25"/>
131
+</feature>
132
--
135
--
133
2.20.1
136
2.25.1
134
135
diff view generated by jsdifflib
1
From: Dongjiu Geng <gengdongjiu@huawei.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Add APEI/GHES detailed design document
3
There's no reason to set CPACR_EL1.ZEN if SVE disabled.
4
4
5
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Xiang Zheng <zhengxiang9@huawei.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
7
Message-id: 20220708151540.18136-44-richard.henderson@linaro.org
8
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
9
Message-id: 20200512030609.19593-4-gengdongjiu@huawei.com
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
9
---
12
docs/specs/acpi_hest_ghes.rst | 110 ++++++++++++++++++++++++++++++++++
10
target/arm/cpu.c | 7 +++----
13
docs/specs/index.rst | 1 +
11
1 file changed, 3 insertions(+), 4 deletions(-)
14
2 files changed, 111 insertions(+)
15
create mode 100644 docs/specs/acpi_hest_ghes.rst
16
12
17
diff --git a/docs/specs/acpi_hest_ghes.rst b/docs/specs/acpi_hest_ghes.rst
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
18
new file mode 100644
19
index XXXXXXX..XXXXXXX
20
--- /dev/null
21
+++ b/docs/specs/acpi_hest_ghes.rst
22
@@ -XXX,XX +XXX,XX @@
23
+APEI tables generating and CPER record
24
+======================================
25
+
26
+..
27
+ Copyright (c) 2020 HUAWEI TECHNOLOGIES CO., LTD.
28
+
29
+ This work is licensed under the terms of the GNU GPL, version 2 or later.
30
+ See the COPYING file in the top-level directory.
31
+
32
+Design Details
33
+--------------
34
+
35
+::
36
+
37
+ etc/acpi/tables etc/hardware_errors
38
+ ==================== ===============================
39
+ + +--------------------------+ +----------------------------+
40
+ | | HEST | +--------->| error_block_address1 |------+
41
+ | +--------------------------+ | +----------------------------+ |
42
+ | | GHES1 | | +------->| error_block_address2 |------+-+
43
+ | +--------------------------+ | | +----------------------------+ | |
44
+ | | ................. | | | | .............. | | |
45
+ | | error_status_address-----+-+ | -----------------------------+ | |
46
+ | | ................. | | +--->| error_block_addressN |------+-+---+
47
+ | | read_ack_register--------+-+ | | +----------------------------+ | | |
48
+ | | read_ack_preserve | +-+---+--->| read_ack_register1 | | | |
49
+ | | read_ack_write | | | +----------------------------+ | | |
50
+ + +--------------------------+ | +-+--->| read_ack_register2 | | | |
51
+ | | GHES2 | | | | +----------------------------+ | | |
52
+ + +--------------------------+ | | | | ............. | | | |
53
+ | | ................. | | | | +----------------------------+ | | |
54
+ | | error_status_address-----+---+ | | +->| read_ack_registerN | | | |
55
+ | | ................. | | | | +----------------------------+ | | |
56
+ | | read_ack_register--------+-----+ | | |Generic Error Status Block 1|<-----+ | |
57
+ | | read_ack_preserve | | | |-+------------------------+-+ | |
58
+ | | read_ack_write | | | | | CPER | | | |
59
+ + +--------------------------| | | | | CPER | | | |
60
+ | | ............... | | | | | .... | | | |
61
+ + +--------------------------+ | | | | CPER | | | |
62
+ | | GHESN | | | |-+------------------------+-| | |
63
+ + +--------------------------+ | | |Generic Error Status Block 2|<-------+ |
64
+ | | ................. | | | |-+------------------------+-+ |
65
+ | | error_status_address-----+-------+ | | | CPER | | |
66
+ | | ................. | | | | CPER | | |
67
+ | | read_ack_register--------+---------+ | | .... | | |
68
+ | | read_ack_preserve | | | CPER | | |
69
+ | | read_ack_write | +-+------------------------+-+ |
70
+ + +--------------------------+ | .......... | |
71
+ |----------------------------+ |
72
+ |Generic Error Status Block N |<----------+
73
+ |-+-------------------------+-+
74
+ | | CPER | |
75
+ | | CPER | |
76
+ | | .... | |
77
+ | | CPER | |
78
+ +-+-------------------------+-+
79
+
80
+
81
+(1) QEMU generates the ACPI HEST table. This table goes in the current
82
+ "etc/acpi/tables" fw_cfg blob. Each error source has different
83
+ notification types.
84
+
85
+(2) A new fw_cfg blob called "etc/hardware_errors" is introduced. QEMU
86
+ also needs to populate this blob. The "etc/hardware_errors" fw_cfg blob
87
+ contains an address registers table and an Error Status Data Block table.
88
+
89
+(3) The address registers table contains N Error Block Address entries
90
+ and N Read Ack Register entries. The size for each entry is 8-byte.
91
+ The Error Status Data Block table contains N Error Status Data Block
92
+ entries. The size for each entry is 4096(0x1000) bytes. The total size
93
+ for the "etc/hardware_errors" fw_cfg blob is (N * 8 * 2 + N * 4096) bytes.
94
+ N is the number of the kinds of hardware error sources.
95
+
96
+(4) QEMU generates the ACPI linker/loader script for the firmware. The
97
+ firmware pre-allocates memory for "etc/acpi/tables", "etc/hardware_errors"
98
+ and copies blob contents there.
99
+
100
+(5) QEMU generates N ADD_POINTER commands, which patch addresses in the
101
+ "error_status_address" fields of the HEST table with a pointer to the
102
+ corresponding "address registers" in the "etc/hardware_errors" blob.
103
+
104
+(6) QEMU generates N ADD_POINTER commands, which patch addresses in the
105
+ "read_ack_register" fields of the HEST table with a pointer to the
106
+ corresponding "read_ack_register" within the "etc/hardware_errors" blob.
107
+
108
+(7) QEMU generates N ADD_POINTER commands for the firmware, which patch
109
+ addresses in the "error_block_address" fields with a pointer to the
110
+ respective "Error Status Data Block" in the "etc/hardware_errors" blob.
111
+
112
+(8) QEMU defines a third and write-only fw_cfg blob which is called
113
+ "etc/hardware_errors_addr". Through that blob, the firmware can send back
114
+ the guest-side allocation addresses to QEMU. The "etc/hardware_errors_addr"
115
+ blob contains a 8-byte entry. QEMU generates a single WRITE_POINTER command
116
+ for the firmware. The firmware will write back the start address of
117
+ "etc/hardware_errors" blob to the fw_cfg file "etc/hardware_errors_addr".
118
+
119
+(9) When QEMU gets a SIGBUS from the kernel, QEMU writes CPER into corresponding
120
+ "Error Status Data Block", guest memory, and then injects platform specific
121
+ interrupt (in case of arm/virt machine it's Synchronous External Abort) as a
122
+ notification which is necessary for notifying the guest.
123
+
124
+(10) This notification (in virtual hardware) will be handled by the guest
125
+ kernel, on receiving notification, guest APEI driver could read the CPER error
126
+ and take appropriate action.
127
+
128
+(11) kvm_arch_on_sigbus_vcpu() uses source_id as index in "etc/hardware_errors" to
129
+ find out "Error Status Data Block" entry corresponding to error source. So supported
130
+ source_id values should be assigned here and not be changed afterwards to make sure
131
+ that guest will write error into expected "Error Status Data Block" even if guest was
132
+ migrated to a newer QEMU.
133
diff --git a/docs/specs/index.rst b/docs/specs/index.rst
134
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
135
--- a/docs/specs/index.rst
15
--- a/target/arm/cpu.c
136
+++ b/docs/specs/index.rst
16
+++ b/target/arm/cpu.c
137
@@ -XXX,XX +XXX,XX @@ Contents:
17
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
138
ppc-spapr-xive
18
/* and to the FP/Neon instructions */
139
acpi_hw_reduced_hotplug
19
env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
140
tpm
20
CPACR_EL1, FPEN, 3);
141
+ acpi_hest_ghes
21
- /* and to the SVE instructions */
22
- env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
23
- CPACR_EL1, ZEN, 3);
24
- /* with reasonable vector length */
25
+ /* and to the SVE instructions, with default vector length */
26
if (cpu_isar_feature(aa64_sve, cpu)) {
27
+ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
28
+ CPACR_EL1, ZEN, 3);
29
env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
30
}
31
/*
142
--
32
--
143
2.20.1
33
2.25.1
144
145
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Rather than perform the argument swap during code generation,
3
Enable SME, TPIDR2_EL0, and FA64 if supported by the cpu.
4
perform it during decode. This means it doesn't have to be
5
special cased later, and we can share code with aarch64 code
6
generation. Hopefully the decode comment addresses any confusion
7
that might arise in between.
8
4
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20200513163245.17915-9-richard.henderson@linaro.org
7
Message-id: 20220708151540.18136-45-richard.henderson@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
9
---
14
target/arm/neon-dp.decode | 17 +++++++++++++++--
10
target/arm/cpu.c | 11 +++++++++++
15
target/arm/translate-neon.inc.c | 3 +--
11
1 file changed, 11 insertions(+)
16
2 files changed, 16 insertions(+), 4 deletions(-)
17
12
18
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
19
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/neon-dp.decode
15
--- a/target/arm/cpu.c
21
+++ b/target/arm/neon-dp.decode
16
+++ b/target/arm/cpu.c
22
@@ -XXX,XX +XXX,XX @@ VCGT_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 0 .... @3same
17
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
23
VCGE_S_3s 1111 001 0 0 . .. .... .... 0011 . . . 1 .... @3same
18
CPACR_EL1, ZEN, 3);
24
VCGE_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 1 .... @3same
19
env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
25
20
}
26
-VSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 0 .... @3same
21
+ /* and for SME instructions, with default vector length, and TPIDR2 */
27
-VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same
22
+ if (cpu_isar_feature(aa64_sme, cpu)) {
28
+# The _rev suffix indicates that Vn and Vm are reversed. This is
23
+ env->cp15.sctlr_el[1] |= SCTLR_EnTP2;
29
+# the case for shifts. In the Arm ARM these insns are documented
24
+ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
30
+# with the Vm and Vn fields in their usual places, but in the
25
+ CPACR_EL1, SMEN, 3);
31
+# assembly the operands are listed "backwards", ie in the order
26
+ env->vfp.smcr_el[1] = cpu->sme_default_vq - 1;
32
+# Dd, Dm, Dn where other insns use Dd, Dn, Dm. For QEMU we choose
27
+ if (cpu_isar_feature(aa64_sme_fa64, cpu)) {
33
+# to consider Vm and Vn as being in different fields in the insn,
28
+ env->vfp.smcr_el[1] = FIELD_DP64(env->vfp.smcr_el[1],
34
+# which allows us to avoid special-casing shifts in the trans_
29
+ SMCR, FA64, 1);
35
+# function code. We would otherwise need to manually swap the operands
30
+ }
36
+# over to call Neon helper functions that are shared with AArch64,
31
+ }
37
+# which does not have this odd reversed-operand situation.
32
/*
38
+@3same_rev .... ... . . . size:2 .... .... .... . q:1 . . .... \
33
* Enable 48-bit address space (TODO: take reserved_va into account).
39
+ &3same vn=%vm_dp vm=%vn_dp vd=%vd_dp
34
* Enable TBI0 but not TBI1.
40
+
41
+VSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 0 .... @3same_rev
42
+VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same_rev
43
44
VMAX_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 0 .... @3same
45
VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same
46
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/translate-neon.inc.c
49
+++ b/target/arm/translate-neon.inc.c
50
@@ -XXX,XX +XXX,XX @@ static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
51
uint32_t rn_ofs, uint32_t rm_ofs, \
52
uint32_t oprsz, uint32_t maxsz) \
53
{ \
54
- /* Note the operation is vshl vd,vm,vn */ \
55
- tcg_gen_gvec_3(rd_ofs, rm_ofs, rn_ofs, \
56
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, \
57
oprsz, maxsz, &OPARRAY[vece]); \
58
} \
59
DO_3SAME(INSN, gen_##INSN##_3s)
60
--
35
--
61
2.20.1
36
2.25.1
62
63
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
3
Pass a pointer directly to env->vfp.qc[0], rather than env.
4
This will allow SVE2, which does not modify QC, to pass a
5
pointer to dummy storage.
6
7
Change the return type of inl_qrdml.h_s16 to match the
8
sense of the operation: signed.
9
2
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20200513163245.17915-14-richard.henderson@linaro.org
5
Message-id: 20220708151540.18136-46-richard.henderson@linaro.org
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
---
7
---
15
target/arm/translate.c | 18 ++++++++---
8
linux-user/elfload.c | 20 ++++++++++++++++++++
16
target/arm/vec_helper.c | 70 +++++++++++++++++++++++------------------
9
1 file changed, 20 insertions(+)
17
2 files changed, 54 insertions(+), 34 deletions(-)
18
10
19
diff --git a/target/arm/translate.c b/target/arm/translate.c
11
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
20
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/translate.c
13
--- a/linux-user/elfload.c
22
+++ b/target/arm/translate.c
14
+++ b/linux-user/elfload.c
23
@@ -XXX,XX +XXX,XX @@ static const uint8_t neon_2rm_sizes[] = {
15
@@ -XXX,XX +XXX,XX @@ enum {
24
[NEON_2RM_VCVT_UF] = 0x4,
16
ARM_HWCAP2_A64_RNG = 1 << 16,
17
ARM_HWCAP2_A64_BTI = 1 << 17,
18
ARM_HWCAP2_A64_MTE = 1 << 18,
19
+ ARM_HWCAP2_A64_ECV = 1 << 19,
20
+ ARM_HWCAP2_A64_AFP = 1 << 20,
21
+ ARM_HWCAP2_A64_RPRES = 1 << 21,
22
+ ARM_HWCAP2_A64_MTE3 = 1 << 22,
23
+ ARM_HWCAP2_A64_SME = 1 << 23,
24
+ ARM_HWCAP2_A64_SME_I16I64 = 1 << 24,
25
+ ARM_HWCAP2_A64_SME_F64F64 = 1 << 25,
26
+ ARM_HWCAP2_A64_SME_I8I32 = 1 << 26,
27
+ ARM_HWCAP2_A64_SME_F16F32 = 1 << 27,
28
+ ARM_HWCAP2_A64_SME_B16F32 = 1 << 28,
29
+ ARM_HWCAP2_A64_SME_F32F32 = 1 << 29,
30
+ ARM_HWCAP2_A64_SME_FA64 = 1 << 30,
25
};
31
};
26
32
27
+static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
33
#define ELF_HWCAP get_elf_hwcap()
28
+ uint32_t opr_sz, uint32_t max_sz,
34
@@ -XXX,XX +XXX,XX @@ static uint32_t get_elf_hwcap2(void)
29
+ gen_helper_gvec_3_ptr *fn)
35
GET_FEATURE_ID(aa64_rndr, ARM_HWCAP2_A64_RNG);
30
+{
36
GET_FEATURE_ID(aa64_bti, ARM_HWCAP2_A64_BTI);
31
+ TCGv_ptr qc_ptr = tcg_temp_new_ptr();
37
GET_FEATURE_ID(aa64_mte, ARM_HWCAP2_A64_MTE);
32
+
38
+ GET_FEATURE_ID(aa64_sme, (ARM_HWCAP2_A64_SME |
33
+ tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
39
+ ARM_HWCAP2_A64_SME_F32F32 |
34
+ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
40
+ ARM_HWCAP2_A64_SME_B16F32 |
35
+ opr_sz, max_sz, 0, fn);
41
+ ARM_HWCAP2_A64_SME_F16F32 |
36
+ tcg_temp_free_ptr(qc_ptr);
42
+ ARM_HWCAP2_A64_SME_I8I32));
37
+}
43
+ GET_FEATURE_ID(aa64_sme_f64f64, ARM_HWCAP2_A64_SME_F64F64);
38
+
44
+ GET_FEATURE_ID(aa64_sme_i16i64, ARM_HWCAP2_A64_SME_I16I64);
39
void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
45
+ GET_FEATURE_ID(aa64_sme_fa64, ARM_HWCAP2_A64_SME_FA64);
40
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
46
41
{
47
return hwcaps;
42
@@ -XXX,XX +XXX,XX @@ void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
43
gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
44
};
45
tcg_debug_assert(vece >= 1 && vece <= 2);
46
- tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, cpu_env,
47
- opr_sz, max_sz, 0, fns[vece - 1]);
48
+ gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
49
}
50
51
void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
52
@@ -XXX,XX +XXX,XX @@ void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
53
gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
54
};
55
tcg_debug_assert(vece >= 1 && vece <= 2);
56
- tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, cpu_env,
57
- opr_sz, max_sz, 0, fns[vece - 1]);
58
+ gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
59
}
60
61
#define GEN_CMP0(NAME, COND) \
62
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/arm/vec_helper.c
65
+++ b/target/arm/vec_helper.c
66
@@ -XXX,XX +XXX,XX @@
67
#define H4(x) (x)
68
#endif
69
70
-#define SET_QC() env->vfp.qc[0] = 1
71
-
72
static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
73
{
74
uint64_t *d = vd + opr_sz;
75
@@ -XXX,XX +XXX,XX @@ static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
76
}
77
78
/* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */
79
-static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1,
80
- int16_t src2, int16_t src3)
81
+static int16_t inl_qrdmlah_s16(int16_t src1, int16_t src2,
82
+ int16_t src3, uint32_t *sat)
83
{
84
/* Simplify:
85
* = ((a3 << 16) + ((e1 * e2) << 1) + (1 << 15)) >> 16
86
@@ -XXX,XX +XXX,XX @@ static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1,
87
ret = ((int32_t)src3 << 15) + ret + (1 << 14);
88
ret >>= 15;
89
if (ret != (int16_t)ret) {
90
- SET_QC();
91
+ *sat = 1;
92
ret = (ret < 0 ? -0x8000 : 0x7fff);
93
}
94
return ret;
95
@@ -XXX,XX +XXX,XX @@ static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1,
96
uint32_t HELPER(neon_qrdmlah_s16)(CPUARMState *env, uint32_t src1,
97
uint32_t src2, uint32_t src3)
98
{
99
- uint16_t e1 = inl_qrdmlah_s16(env, src1, src2, src3);
100
- uint16_t e2 = inl_qrdmlah_s16(env, src1 >> 16, src2 >> 16, src3 >> 16);
101
+ uint32_t *sat = &env->vfp.qc[0];
102
+ uint16_t e1 = inl_qrdmlah_s16(src1, src2, src3, sat);
103
+ uint16_t e2 = inl_qrdmlah_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat);
104
return deposit32(e1, 16, 16, e2);
105
}
106
107
void HELPER(gvec_qrdmlah_s16)(void *vd, void *vn, void *vm,
108
- void *ve, uint32_t desc)
109
+ void *vq, uint32_t desc)
110
{
111
uintptr_t opr_sz = simd_oprsz(desc);
112
int16_t *d = vd;
113
int16_t *n = vn;
114
int16_t *m = vm;
115
- CPUARMState *env = ve;
116
uintptr_t i;
117
118
for (i = 0; i < opr_sz / 2; ++i) {
119
- d[i] = inl_qrdmlah_s16(env, n[i], m[i], d[i]);
120
+ d[i] = inl_qrdmlah_s16(n[i], m[i], d[i], vq);
121
}
122
clear_tail(d, opr_sz, simd_maxsz(desc));
123
}
124
125
/* Signed saturating rounding doubling multiply-subtract high half, 16-bit */
126
-static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1,
127
- int16_t src2, int16_t src3)
128
+static int16_t inl_qrdmlsh_s16(int16_t src1, int16_t src2,
129
+ int16_t src3, uint32_t *sat)
130
{
131
/* Similarly, using subtraction:
132
* = ((a3 << 16) - ((e1 * e2) << 1) + (1 << 15)) >> 16
133
@@ -XXX,XX +XXX,XX @@ static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1,
134
ret = ((int32_t)src3 << 15) - ret + (1 << 14);
135
ret >>= 15;
136
if (ret != (int16_t)ret) {
137
- SET_QC();
138
+ *sat = 1;
139
ret = (ret < 0 ? -0x8000 : 0x7fff);
140
}
141
return ret;
142
@@ -XXX,XX +XXX,XX @@ static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1,
143
uint32_t HELPER(neon_qrdmlsh_s16)(CPUARMState *env, uint32_t src1,
144
uint32_t src2, uint32_t src3)
145
{
146
- uint16_t e1 = inl_qrdmlsh_s16(env, src1, src2, src3);
147
- uint16_t e2 = inl_qrdmlsh_s16(env, src1 >> 16, src2 >> 16, src3 >> 16);
148
+ uint32_t *sat = &env->vfp.qc[0];
149
+ uint16_t e1 = inl_qrdmlsh_s16(src1, src2, src3, sat);
150
+ uint16_t e2 = inl_qrdmlsh_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat);
151
return deposit32(e1, 16, 16, e2);
152
}
153
154
void HELPER(gvec_qrdmlsh_s16)(void *vd, void *vn, void *vm,
155
- void *ve, uint32_t desc)
156
+ void *vq, uint32_t desc)
157
{
158
uintptr_t opr_sz = simd_oprsz(desc);
159
int16_t *d = vd;
160
int16_t *n = vn;
161
int16_t *m = vm;
162
- CPUARMState *env = ve;
163
uintptr_t i;
164
165
for (i = 0; i < opr_sz / 2; ++i) {
166
- d[i] = inl_qrdmlsh_s16(env, n[i], m[i], d[i]);
167
+ d[i] = inl_qrdmlsh_s16(n[i], m[i], d[i], vq);
168
}
169
clear_tail(d, opr_sz, simd_maxsz(desc));
170
}
171
172
/* Signed saturating rounding doubling multiply-accumulate high half, 32-bit */
173
-uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1,
174
- int32_t src2, int32_t src3)
175
+static int32_t inl_qrdmlah_s32(int32_t src1, int32_t src2,
176
+ int32_t src3, uint32_t *sat)
177
{
178
/* Simplify similarly to int_qrdmlah_s16 above. */
179
int64_t ret = (int64_t)src1 * src2;
180
ret = ((int64_t)src3 << 31) + ret + (1 << 30);
181
ret >>= 31;
182
if (ret != (int32_t)ret) {
183
- SET_QC();
184
+ *sat = 1;
185
ret = (ret < 0 ? INT32_MIN : INT32_MAX);
186
}
187
return ret;
188
}
189
190
+uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1,
191
+ int32_t src2, int32_t src3)
192
+{
193
+ uint32_t *sat = &env->vfp.qc[0];
194
+ return inl_qrdmlah_s32(src1, src2, src3, sat);
195
+}
196
+
197
void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm,
198
- void *ve, uint32_t desc)
199
+ void *vq, uint32_t desc)
200
{
201
uintptr_t opr_sz = simd_oprsz(desc);
202
int32_t *d = vd;
203
int32_t *n = vn;
204
int32_t *m = vm;
205
- CPUARMState *env = ve;
206
uintptr_t i;
207
208
for (i = 0; i < opr_sz / 4; ++i) {
209
- d[i] = helper_neon_qrdmlah_s32(env, n[i], m[i], d[i]);
210
+ d[i] = inl_qrdmlah_s32(n[i], m[i], d[i], vq);
211
}
212
clear_tail(d, opr_sz, simd_maxsz(desc));
213
}
214
215
/* Signed saturating rounding doubling multiply-subtract high half, 32-bit */
216
-uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1,
217
- int32_t src2, int32_t src3)
218
+static int32_t inl_qrdmlsh_s32(int32_t src1, int32_t src2,
219
+ int32_t src3, uint32_t *sat)
220
{
221
/* Simplify similarly to int_qrdmlsh_s16 above. */
222
int64_t ret = (int64_t)src1 * src2;
223
ret = ((int64_t)src3 << 31) - ret + (1 << 30);
224
ret >>= 31;
225
if (ret != (int32_t)ret) {
226
- SET_QC();
227
+ *sat = 1;
228
ret = (ret < 0 ? INT32_MIN : INT32_MAX);
229
}
230
return ret;
231
}
232
233
+uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1,
234
+ int32_t src2, int32_t src3)
235
+{
236
+ uint32_t *sat = &env->vfp.qc[0];
237
+ return inl_qrdmlsh_s32(src1, src2, src3, sat);
238
+}
239
+
240
void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm,
241
- void *ve, uint32_t desc)
242
+ void *vq, uint32_t desc)
243
{
244
uintptr_t opr_sz = simd_oprsz(desc);
245
int32_t *d = vd;
246
int32_t *n = vn;
247
int32_t *m = vm;
248
- CPUARMState *env = ve;
249
uintptr_t i;
250
251
for (i = 0; i < opr_sz / 4; ++i) {
252
- d[i] = helper_neon_qrdmlsh_s32(env, n[i], m[i], d[i]);
253
+ d[i] = inl_qrdmlsh_s32(n[i], m[i], d[i], vq);
254
}
255
clear_tail(d, opr_sz, simd_maxsz(desc));
256
}
48
}
257
--
49
--
258
2.20.1
50
2.25.1
259
260
diff view generated by jsdifflib