1
Mostly this is patches from me and RTH cleaning up and doing
1
The following changes since commit 8f6330a807f2642dc2a3cdf33347aa28a4c00a87:
2
more decodetree conversion for AArch32 Neon. The major new feature
3
is Dongjiu Geng's patchset to report host memory errors to KVM guests;
4
also a new aspeed board from Patrick Williams.
5
2
6
thanks
3
Merge tag 'pull-maintainer-updates-060324-1' of https://gitlab.com/stsquad/qemu into staging (2024-03-06 16:56:20 +0000)
7
-- PMM
8
9
The following changes since commit 035b448b84f3557206abc44d786c5d3db2638f7d:
10
11
Merge remote-tracking branch 'remotes/gkurz/tags/9p-next-2020-05-14' into staging (2020-05-14 10:58:30 +0100)
12
4
13
are available in the Git repository at:
5
are available in the Git repository at:
14
6
15
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20200514
7
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20240308
16
8
17
for you to fetch changes up to e95485f85657be21135c17a9226e297c21e73360:
9
for you to fetch changes up to bbf6c6dbead82292a20951eb1204442a6b838de9:
18
10
19
target/arm: Convert NEON VFMA, VFMS 3-reg-same insns to decodetree (2020-05-14 15:03:09 +0100)
11
target/arm: Move v7m-related code from cpu32.c into a separate file (2024-03-08 14:45:03 +0000)
20
12
21
----------------------------------------------------------------
13
----------------------------------------------------------------
22
target-arm queue:
14
target-arm queue:
23
* target/arm: Use correct GDB XML for M-profile cores
15
* Implement FEAT_ECV
24
* target/arm: Code cleanup to use gvec APIs better
16
* STM32L4x5: Implement GPIO device
25
* aspeed: Add support for the sonorapass-bmc board
17
* Fix 32-bit SMOPA
26
* target/arm: Support reporting KVM host memory errors
18
* Refactor v7m related code from cpu32.c into its own file
27
to the guest via ACPI notifications
19
* hw/rtc/sun4v-rtc: Relicense to GPLv2-or-later
28
* target/arm: Finish conversion of Neon 3-reg-same insns to decodetree
29
20
30
----------------------------------------------------------------
21
----------------------------------------------------------------
31
Dongjiu Geng (10):
22
Inès Varhol (3):
32
acpi: nvdimm: change NVDIMM_UUID_LE to a common macro
23
hw/gpio: Implement STM32L4x5 GPIO
33
hw/arm/virt: Introduce a RAS machine option
24
hw/arm: Connect STM32L4x5 GPIO to STM32L4x5 SoC
34
docs: APEI GHES generation and CPER record description
25
tests/qtest: Add STM32L4x5 GPIO QTest testcase
35
ACPI: Build related register address fields via hardware error fw_cfg blob
36
ACPI: Build Hardware Error Source Table
37
ACPI: Record the Generic Error Status Block address
38
KVM: Move hwpoison page related functions into kvm-all.c
39
ACPI: Record Generic Error Status Block(GESB) table
40
target-arm: kvm64: handle SIGBUS signal from kernel or KVM
41
MAINTAINERS: Add ACPI/HEST/GHES entries
42
26
43
Patrick Williams (1):
27
Peter Maydell (9):
44
aspeed: Add support for the sonorapass-bmc board
28
target/arm: Move some register related defines to internals.h
29
target/arm: Timer _EL02 registers UNDEF for E2H == 0
30
target/arm: use FIELD macro for CNTHCTL bit definitions
31
target/arm: Don't allow RES0 CNTHCTL_EL2 bits to be written
32
target/arm: Implement new FEAT_ECV trap bits
33
target/arm: Define CNTPCTSS_EL0 and CNTVCTSS_EL0
34
target/arm: Implement FEAT_ECV CNTPOFF_EL2 handling
35
target/arm: Enable FEAT_ECV for 'max' CPU
36
hw/rtc/sun4v-rtc: Relicense to GPLv2-or-later
45
37
46
Peter Maydell (18):
38
Richard Henderson (1):
47
target/arm: Use correct GDB XML for M-profile cores
39
target/arm: Fix 32-bit SMOPA
48
target/arm: Convert Neon 3-reg-same VQRDMLAH/VQRDMLSH to decodetree
49
target/arm: Convert Neon 3-reg-same SHA to decodetree
50
target/arm: Convert Neon 64-bit element 3-reg-same insns
51
target/arm: Convert Neon VHADD 3-reg-same insns
52
target/arm: Convert Neon VABA/VABD 3-reg-same to decodetree
53
target/arm: Convert Neon VRHADD, VHSUB 3-reg-same insns to decodetree
54
target/arm: Convert Neon VQSHL, VRSHL, VQRSHL 3-reg-same insns to decodetree
55
target/arm: Convert Neon VPMAX/VPMIN 3-reg-same insns to decodetree
56
target/arm: Convert Neon VPADD 3-reg-same insns to decodetree
57
target/arm: Convert Neon VQDMULH/VQRDMULH 3-reg-same to decodetree
58
target/arm: Convert Neon VADD, VSUB, VABD 3-reg-same insns to decodetree
59
target/arm: Convert Neon VPMIN/VPMAX/VPADD float 3-reg-same insns to decodetree
60
target/arm: Convert Neon fp VMUL, VMLA, VMLS 3-reg-same insns to decodetree
61
target/arm: Convert Neon 3-reg-same compare insns to decodetree
62
target/arm: Move 'env' argument of recps_f32 and rsqrts_f32 helpers to usual place
63
target/arm: Convert Neon fp VMAX/VMIN/VMAXNM/VMINNM/VRECPS/VRSQRTS to decodetree
64
target/arm: Convert NEON VFMA, VFMS 3-reg-same insns to decodetree
65
40
66
Richard Henderson (16):
41
Thomas Huth (1):
67
target/arm: Create gen_gvec_[us]sra
42
target/arm: Move v7m-related code from cpu32.c into a separate file
68
target/arm: Create gen_gvec_{u,s}{rshr,rsra}
69
target/arm: Create gen_gvec_{sri,sli}
70
target/arm: Remove unnecessary range check for VSHL
71
target/arm: Tidy handle_vec_simd_shri
72
target/arm: Create gen_gvec_{ceq,clt,cle,cgt,cge}0
73
target/arm: Create gen_gvec_{mla,mls}
74
target/arm: Swap argument order for VSHL during decode
75
target/arm: Create gen_gvec_{cmtst,ushl,sshl}
76
target/arm: Create gen_gvec_{uqadd, sqadd, uqsub, sqsub}
77
target/arm: Remove fp_status from helper_{recpe, rsqrte}_u32
78
target/arm: Create gen_gvec_{qrdmla,qrdmls}
79
target/arm: Pass pointer to qc to qrdmla/qrdmls
80
target/arm: Clear tail in gvec_fmul_idx_*, gvec_fmla_idx_*
81
target/arm: Vectorize SABD/UABD
82
target/arm: Vectorize SABA/UABA
83
43
84
docs/specs/acpi_hest_ghes.rst | 110 ++
44
MAINTAINERS | 1 +
85
docs/specs/index.rst | 1 +
45
docs/system/arm/b-l475e-iot01a.rst | 2 +-
86
configure | 4 +-
46
docs/system/arm/emulation.rst | 1 +
87
default-configs/arm-softmmu.mak | 1 +
47
include/hw/arm/stm32l4x5_soc.h | 2 +
88
include/hw/acpi/aml-build.h | 1 +
48
include/hw/gpio/stm32l4x5_gpio.h | 71 +++++
89
include/hw/acpi/generic_event_device.h | 2 +
49
include/hw/misc/stm32l4x5_syscfg.h | 3 +-
90
include/hw/acpi/ghes.h | 74 +
50
include/hw/rtc/sun4v-rtc.h | 2 +-
91
include/hw/arm/virt.h | 1 +
51
target/arm/cpu-features.h | 10 +
92
include/qemu/uuid.h | 27 +
52
target/arm/cpu.h | 129 +--------
93
include/sysemu/kvm.h | 3 +-
53
target/arm/internals.h | 151 ++++++++++
94
include/sysemu/kvm_int.h | 12 +
54
hw/arm/stm32l4x5_soc.c | 71 ++++-
95
target/arm/cpu.h | 4 +
55
hw/gpio/stm32l4x5_gpio.c | 477 ++++++++++++++++++++++++++++++++
96
target/arm/helper.h | 78 +-
56
hw/misc/stm32l4x5_syscfg.c | 1 +
97
target/arm/internals.h | 5 +-
57
hw/rtc/sun4v-rtc.c | 2 +-
98
target/arm/translate.h | 84 +-
58
target/arm/helper.c | 189 ++++++++++++-
99
target/i386/cpu.h | 2 +
59
target/arm/tcg/cpu-v7m.c | 290 +++++++++++++++++++
100
target/arm/neon-dp.decode | 119 +-
60
target/arm/tcg/cpu32.c | 261 ------------------
101
accel/kvm/kvm-all.c | 36 +
61
target/arm/tcg/cpu64.c | 1 +
102
hw/acpi/aml-build.c | 2 +
62
target/arm/tcg/sme_helper.c | 77 +++---
103
hw/acpi/generic_event_device.c | 19 +
63
tests/qtest/stm32l4x5_gpio-test.c | 551 +++++++++++++++++++++++++++++++++++++
104
hw/acpi/ghes.c | 448 ++++++
64
tests/tcg/aarch64/sme-smopa-1.c | 47 ++++
105
hw/acpi/nvdimm.c | 10 +-
65
tests/tcg/aarch64/sme-smopa-2.c | 54 ++++
106
hw/arm/aspeed.c | 78 ++
66
hw/arm/Kconfig | 3 +-
107
hw/arm/virt-acpi-build.c | 15 +
67
hw/gpio/Kconfig | 3 +
108
hw/arm/virt.c | 23 +
68
hw/gpio/meson.build | 1 +
109
target/arm/cpu_tcg.c | 1 +
69
hw/gpio/trace-events | 6 +
110
target/arm/gdbstub.c | 22 +-
70
target/arm/meson.build | 3 +
111
target/arm/helper.c | 2 +-
71
target/arm/tcg/meson.build | 3 +
112
target/arm/kvm64.c | 77 ++
72
target/arm/trace-events | 1 +
113
target/arm/neon_helper.c | 17 -
73
tests/qtest/meson.build | 3 +-
114
target/arm/tlb_helper.c | 2 +-
74
tests/tcg/aarch64/Makefile.target | 2 +-
115
target/arm/translate-a64.c | 210 +--
75
31 files changed, 1962 insertions(+), 456 deletions(-)
116
target/arm/translate-neon.inc.c | 682 +++++++++-
76
create mode 100644 include/hw/gpio/stm32l4x5_gpio.h
117
target/arm/translate.c | 2349 +++++++++++++++++---------------
77
create mode 100644 hw/gpio/stm32l4x5_gpio.c
118
target/arm/vec_helper.c | 240 +++-
78
create mode 100644 target/arm/tcg/cpu-v7m.c
119
target/arm/vfp_helper.c | 9 +-
79
create mode 100644 tests/qtest/stm32l4x5_gpio-test.c
120
target/i386/kvm.c | 36 -
80
create mode 100644 tests/tcg/aarch64/sme-smopa-1.c
121
MAINTAINERS | 9 +
81
create mode 100644 tests/tcg/aarch64/sme-smopa-2.c
122
gdb-xml/arm-m-profile.xml | 27 +
123
hw/acpi/Kconfig | 4 +
124
hw/acpi/Makefile.objs | 1 +
125
41 files changed, 3402 insertions(+), 1445 deletions(-)
126
create mode 100644 docs/specs/acpi_hest_ghes.rst
127
create mode 100644 include/hw/acpi/ghes.h
128
create mode 100644 hw/acpi/ghes.c
129
create mode 100644 gdb-xml/arm-m-profile.xml
130
82
diff view generated by jsdifflib
Deleted patch
1
GDB's remote protocol requires M-profile cores to use the feature
2
name 'org.gnu.gdb.arm.m-profile' instead of the 'org.gnu.gdb.arm.core'
3
feature used for A- and R-profile cores. We weren't doing this, which
4
meant GDB treated our M-profile cores like A-profile ones. This mostly
5
doesn't matter, but for instance means that it doesn't correctly
6
handle backtraces where an M-profile exception frame is involved.
7
1
8
Ship a copy of GDB's arm-m-profile.xml and use it on the M-profile
9
cores. The integer registers have the same offsets as the
10
arm-core.xml, but register 25 is the M-profile XPSR rather than the
11
A-profile CPSR, so we need to update arm_cpu_gdb_read_register() and
12
arm_cpu_gdb_write_register() to handle XSPR reads and writes.
13
14
Fixes: https://bugs.launchpad.net/qemu/+bug/1877136
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
17
Message-id: 20200507134755.13997-1-peter.maydell@linaro.org
18
---
19
configure | 4 ++--
20
target/arm/cpu_tcg.c | 1 +
21
target/arm/gdbstub.c | 22 ++++++++++++++++++----
22
gdb-xml/arm-m-profile.xml | 27 +++++++++++++++++++++++++++
23
4 files changed, 48 insertions(+), 6 deletions(-)
24
create mode 100644 gdb-xml/arm-m-profile.xml
25
26
diff --git a/configure b/configure
27
index XXXXXXX..XXXXXXX 100755
28
--- a/configure
29
+++ b/configure
30
@@ -XXX,XX +XXX,XX @@ case "$target_name" in
31
TARGET_SYSTBL_ABI=common,oabi
32
bflt="yes"
33
mttcg="yes"
34
- gdb_xml_files="arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
35
+ gdb_xml_files="arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml arm-m-profile.xml"
36
;;
37
aarch64|aarch64_be)
38
TARGET_ARCH=aarch64
39
TARGET_BASE_ARCH=arm
40
bflt="yes"
41
mttcg="yes"
42
- gdb_xml_files="aarch64-core.xml aarch64-fpu.xml arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
43
+ gdb_xml_files="aarch64-core.xml aarch64-fpu.xml arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml arm-m-profile.xml"
44
;;
45
cris)
46
;;
47
diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/cpu_tcg.c
50
+++ b/target/arm/cpu_tcg.c
51
@@ -XXX,XX +XXX,XX @@ static void arm_v7m_class_init(ObjectClass *oc, void *data)
52
#endif
53
54
cc->cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt;
55
+ cc->gdb_core_xml_file = "arm-m-profile.xml";
56
}
57
58
static const ARMCPUInfo arm_tcg_cpus[] = {
59
diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
60
index XXXXXXX..XXXXXXX 100644
61
--- a/target/arm/gdbstub.c
62
+++ b/target/arm/gdbstub.c
63
@@ -XXX,XX +XXX,XX @@ int arm_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
64
}
65
return gdb_get_reg32(mem_buf, 0);
66
case 25:
67
- /* CPSR */
68
- return gdb_get_reg32(mem_buf, cpsr_read(env));
69
+ /* CPSR, or XPSR for M-profile */
70
+ if (arm_feature(env, ARM_FEATURE_M)) {
71
+ return gdb_get_reg32(mem_buf, xpsr_read(env));
72
+ } else {
73
+ return gdb_get_reg32(mem_buf, cpsr_read(env));
74
+ }
75
}
76
/* Unknown register. */
77
return 0;
78
@@ -XXX,XX +XXX,XX @@ int arm_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
79
}
80
return 4;
81
case 25:
82
- /* CPSR */
83
- cpsr_write(env, tmp, 0xffffffff, CPSRWriteByGDBStub);
84
+ /* CPSR, or XPSR for M-profile */
85
+ if (arm_feature(env, ARM_FEATURE_M)) {
86
+ /*
87
+ * Don't allow writing to XPSR.Exception as it can cause
88
+ * a transition into or out of handler mode (it's not
89
+ * writeable via the MSR insn so this is a reasonable
90
+ * restriction). Other fields are safe to update.
91
+ */
92
+ xpsr_write(env, tmp, ~XPSR_EXCP);
93
+ } else {
94
+ cpsr_write(env, tmp, 0xffffffff, CPSRWriteByGDBStub);
95
+ }
96
return 4;
97
}
98
/* Unknown register. */
99
diff --git a/gdb-xml/arm-m-profile.xml b/gdb-xml/arm-m-profile.xml
100
new file mode 100644
101
index XXXXXXX..XXXXXXX
102
--- /dev/null
103
+++ b/gdb-xml/arm-m-profile.xml
104
@@ -XXX,XX +XXX,XX @@
105
+<?xml version="1.0"?>
106
+<!-- Copyright (C) 2010-2020 Free Software Foundation, Inc.
107
+
108
+ Copying and distribution of this file, with or without modification,
109
+ are permitted in any medium without royalty provided the copyright
110
+ notice and this notice are preserved. -->
111
+
112
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
113
+<feature name="org.gnu.gdb.arm.m-profile">
114
+ <reg name="r0" bitsize="32"/>
115
+ <reg name="r1" bitsize="32"/>
116
+ <reg name="r2" bitsize="32"/>
117
+ <reg name="r3" bitsize="32"/>
118
+ <reg name="r4" bitsize="32"/>
119
+ <reg name="r5" bitsize="32"/>
120
+ <reg name="r6" bitsize="32"/>
121
+ <reg name="r7" bitsize="32"/>
122
+ <reg name="r8" bitsize="32"/>
123
+ <reg name="r9" bitsize="32"/>
124
+ <reg name="r10" bitsize="32"/>
125
+ <reg name="r11" bitsize="32"/>
126
+ <reg name="r12" bitsize="32"/>
127
+ <reg name="sp" bitsize="32" type="data_ptr"/>
128
+ <reg name="lr" bitsize="32"/>
129
+ <reg name="pc" bitsize="32" type="code_ptr"/>
130
+ <reg name="xpsr" bitsize="32" regnum="25"/>
131
+</feature>
132
--
133
2.20.1
134
135
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
The functions eliminate duplication of the special cases for
4
this operation. They match up with the GVecGen2iFn typedef.
5
6
Add out-of-line helpers. We got away with only having inline
7
expanders because the neon vector size is only 16 bytes, and
8
we know that the inline expansion will always succeed.
9
When we reuse this for SVE, tcg-gvec-op may decide to use an
10
out-of-line helper due to longer vector lengths.
11
12
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
Message-id: 20200513163245.17915-2-richard.henderson@linaro.org
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
---
17
target/arm/helper.h | 10 +++
18
target/arm/translate.h | 7 +-
19
target/arm/translate-a64.c | 15 +---
20
target/arm/translate.c | 161 ++++++++++++++++++++++---------------
21
target/arm/vec_helper.c | 25 ++++++
22
5 files changed, 139 insertions(+), 79 deletions(-)
23
24
diff --git a/target/arm/helper.h b/target/arm/helper.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/helper.h
27
+++ b/target/arm/helper.h
28
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_pmull_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
29
30
DEF_HELPER_FLAGS_4(neon_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
31
32
+DEF_HELPER_FLAGS_3(gvec_ssra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
33
+DEF_HELPER_FLAGS_3(gvec_ssra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_3(gvec_ssra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
35
+DEF_HELPER_FLAGS_3(gvec_ssra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
36
+
37
+DEF_HELPER_FLAGS_3(gvec_usra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_3(gvec_usra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
39
+DEF_HELPER_FLAGS_3(gvec_usra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_3(gvec_usra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
41
+
42
#ifdef TARGET_AARCH64
43
#include "helper-a64.h"
44
#include "helper-sve.h"
45
diff --git a/target/arm/translate.h b/target/arm/translate.h
46
index XXXXXXX..XXXXXXX 100644
47
--- a/target/arm/translate.h
48
+++ b/target/arm/translate.h
49
@@ -XXX,XX +XXX,XX @@ extern const GVecGen3 mls_op[4];
50
extern const GVecGen3 cmtst_op[4];
51
extern const GVecGen3 sshl_op[4];
52
extern const GVecGen3 ushl_op[4];
53
-extern const GVecGen2i ssra_op[4];
54
-extern const GVecGen2i usra_op[4];
55
extern const GVecGen2i sri_op[4];
56
extern const GVecGen2i sli_op[4];
57
extern const GVecGen4 uqadd_op[4];
58
@@ -XXX,XX +XXX,XX @@ void gen_sshl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
59
void gen_ushl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
60
void gen_sshl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
61
62
+void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
63
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
64
+void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
65
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
66
+
67
/*
68
* Forward to the isar_feature_* tests given a DisasContext pointer.
69
*/
70
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
71
index XXXXXXX..XXXXXXX 100644
72
--- a/target/arm/translate-a64.c
73
+++ b/target/arm/translate-a64.c
74
@@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
75
76
switch (opcode) {
77
case 0x02: /* SSRA / USRA (accumulate) */
78
- if (is_u) {
79
- /* Shift count same as element size produces zero to add. */
80
- if (shift == 8 << size) {
81
- goto done;
82
- }
83
- gen_gvec_op2i(s, is_q, rd, rn, shift, &usra_op[size]);
84
- } else {
85
- /* Shift count same as element size produces all sign to add. */
86
- if (shift == 8 << size) {
87
- shift -= 1;
88
- }
89
- gen_gvec_op2i(s, is_q, rd, rn, shift, &ssra_op[size]);
90
- }
91
+ gen_gvec_fn2i(s, is_q, rd, rn, shift,
92
+ is_u ? gen_gvec_usra : gen_gvec_ssra, size);
93
return;
94
case 0x08: /* SRI */
95
/* Shift count same as element size is valid but does nothing. */
96
diff --git a/target/arm/translate.c b/target/arm/translate.c
97
index XXXXXXX..XXXXXXX 100644
98
--- a/target/arm/translate.c
99
+++ b/target/arm/translate.c
100
@@ -XXX,XX +XXX,XX @@ static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
101
tcg_gen_add_vec(vece, d, d, a);
102
}
103
104
-static const TCGOpcode vecop_list_ssra[] = {
105
- INDEX_op_sari_vec, INDEX_op_add_vec, 0
106
-};
107
+void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
108
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
109
+{
110
+ static const TCGOpcode vecop_list[] = {
111
+ INDEX_op_sari_vec, INDEX_op_add_vec, 0
112
+ };
113
+ static const GVecGen2i ops[4] = {
114
+ { .fni8 = gen_ssra8_i64,
115
+ .fniv = gen_ssra_vec,
116
+ .fno = gen_helper_gvec_ssra_b,
117
+ .load_dest = true,
118
+ .opt_opc = vecop_list,
119
+ .vece = MO_8 },
120
+ { .fni8 = gen_ssra16_i64,
121
+ .fniv = gen_ssra_vec,
122
+ .fno = gen_helper_gvec_ssra_h,
123
+ .load_dest = true,
124
+ .opt_opc = vecop_list,
125
+ .vece = MO_16 },
126
+ { .fni4 = gen_ssra32_i32,
127
+ .fniv = gen_ssra_vec,
128
+ .fno = gen_helper_gvec_ssra_s,
129
+ .load_dest = true,
130
+ .opt_opc = vecop_list,
131
+ .vece = MO_32 },
132
+ { .fni8 = gen_ssra64_i64,
133
+ .fniv = gen_ssra_vec,
134
+ .fno = gen_helper_gvec_ssra_b,
135
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
136
+ .opt_opc = vecop_list,
137
+ .load_dest = true,
138
+ .vece = MO_64 },
139
+ };
140
141
-const GVecGen2i ssra_op[4] = {
142
- { .fni8 = gen_ssra8_i64,
143
- .fniv = gen_ssra_vec,
144
- .load_dest = true,
145
- .opt_opc = vecop_list_ssra,
146
- .vece = MO_8 },
147
- { .fni8 = gen_ssra16_i64,
148
- .fniv = gen_ssra_vec,
149
- .load_dest = true,
150
- .opt_opc = vecop_list_ssra,
151
- .vece = MO_16 },
152
- { .fni4 = gen_ssra32_i32,
153
- .fniv = gen_ssra_vec,
154
- .load_dest = true,
155
- .opt_opc = vecop_list_ssra,
156
- .vece = MO_32 },
157
- { .fni8 = gen_ssra64_i64,
158
- .fniv = gen_ssra_vec,
159
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
160
- .opt_opc = vecop_list_ssra,
161
- .load_dest = true,
162
- .vece = MO_64 },
163
-};
164
+ /* tszimm encoding produces immediates in the range [1..esize]. */
165
+ tcg_debug_assert(shift > 0);
166
+ tcg_debug_assert(shift <= (8 << vece));
167
+
168
+ /*
169
+ * Shifts larger than the element size are architecturally valid.
170
+ * Signed results in all sign bits.
171
+ */
172
+ shift = MIN(shift, (8 << vece) - 1);
173
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
174
+}
175
176
static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
177
{
178
@@ -XXX,XX +XXX,XX @@ static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
179
tcg_gen_add_vec(vece, d, d, a);
180
}
181
182
-static const TCGOpcode vecop_list_usra[] = {
183
- INDEX_op_shri_vec, INDEX_op_add_vec, 0
184
-};
185
+void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
186
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
187
+{
188
+ static const TCGOpcode vecop_list[] = {
189
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
190
+ };
191
+ static const GVecGen2i ops[4] = {
192
+ { .fni8 = gen_usra8_i64,
193
+ .fniv = gen_usra_vec,
194
+ .fno = gen_helper_gvec_usra_b,
195
+ .load_dest = true,
196
+ .opt_opc = vecop_list,
197
+ .vece = MO_8, },
198
+ { .fni8 = gen_usra16_i64,
199
+ .fniv = gen_usra_vec,
200
+ .fno = gen_helper_gvec_usra_h,
201
+ .load_dest = true,
202
+ .opt_opc = vecop_list,
203
+ .vece = MO_16, },
204
+ { .fni4 = gen_usra32_i32,
205
+ .fniv = gen_usra_vec,
206
+ .fno = gen_helper_gvec_usra_s,
207
+ .load_dest = true,
208
+ .opt_opc = vecop_list,
209
+ .vece = MO_32, },
210
+ { .fni8 = gen_usra64_i64,
211
+ .fniv = gen_usra_vec,
212
+ .fno = gen_helper_gvec_usra_d,
213
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
214
+ .load_dest = true,
215
+ .opt_opc = vecop_list,
216
+ .vece = MO_64, },
217
+ };
218
219
-const GVecGen2i usra_op[4] = {
220
- { .fni8 = gen_usra8_i64,
221
- .fniv = gen_usra_vec,
222
- .load_dest = true,
223
- .opt_opc = vecop_list_usra,
224
- .vece = MO_8, },
225
- { .fni8 = gen_usra16_i64,
226
- .fniv = gen_usra_vec,
227
- .load_dest = true,
228
- .opt_opc = vecop_list_usra,
229
- .vece = MO_16, },
230
- { .fni4 = gen_usra32_i32,
231
- .fniv = gen_usra_vec,
232
- .load_dest = true,
233
- .opt_opc = vecop_list_usra,
234
- .vece = MO_32, },
235
- { .fni8 = gen_usra64_i64,
236
- .fniv = gen_usra_vec,
237
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
238
- .load_dest = true,
239
- .opt_opc = vecop_list_usra,
240
- .vece = MO_64, },
241
-};
242
+ /* tszimm encoding produces immediates in the range [1..esize]. */
243
+ tcg_debug_assert(shift > 0);
244
+ tcg_debug_assert(shift <= (8 << vece));
245
+
246
+ /*
247
+ * Shifts larger than the element size are architecturally valid.
248
+ * Unsigned results in all zeros as input to accumulate: nop.
249
+ */
250
+ if (shift < (8 << vece)) {
251
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
252
+ } else {
253
+ /* Nop, but we do need to clear the tail. */
254
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
255
+ }
256
+}
257
258
static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
259
{
260
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
261
case 1: /* VSRA */
262
/* Right shift comes here negative. */
263
shift = -shift;
264
- /* Shifts larger than the element size are architecturally
265
- * valid. Unsigned results in all zeros; signed results
266
- * in all sign bits.
267
- */
268
- if (!u) {
269
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
270
- MIN(shift, (8 << size) - 1),
271
- &ssra_op[size]);
272
- } else if (shift >= 8 << size) {
273
- /* rd += 0 */
274
+ if (u) {
275
+ gen_gvec_usra(size, rd_ofs, rm_ofs, shift,
276
+ vec_size, vec_size);
277
} else {
278
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
279
- shift, &usra_op[size]);
280
+ gen_gvec_ssra(size, rd_ofs, rm_ofs, shift,
281
+ vec_size, vec_size);
282
}
283
return 0;
284
285
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
286
index XXXXXXX..XXXXXXX 100644
287
--- a/target/arm/vec_helper.c
288
+++ b/target/arm/vec_helper.c
289
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sqsub_d)(void *vd, void *vq, void *vn,
290
clear_tail(d, oprsz, simd_maxsz(desc));
291
}
292
293
+
294
+#define DO_SRA(NAME, TYPE) \
295
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
296
+{ \
297
+ intptr_t i, oprsz = simd_oprsz(desc); \
298
+ int shift = simd_data(desc); \
299
+ TYPE *d = vd, *n = vn; \
300
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
301
+ d[i] += n[i] >> shift; \
302
+ } \
303
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
304
+}
305
+
306
+DO_SRA(gvec_ssra_b, int8_t)
307
+DO_SRA(gvec_ssra_h, int16_t)
308
+DO_SRA(gvec_ssra_s, int32_t)
309
+DO_SRA(gvec_ssra_d, int64_t)
310
+
311
+DO_SRA(gvec_usra_b, uint8_t)
312
+DO_SRA(gvec_usra_h, uint16_t)
313
+DO_SRA(gvec_usra_s, uint32_t)
314
+DO_SRA(gvec_usra_d, uint64_t)
315
+
316
+#undef DO_SRA
317
+
318
/*
319
* Convert float16 to float32, raising no exceptions and
320
* preserving exceptional values, including SNaN.
321
--
322
2.20.1
323
324
diff view generated by jsdifflib
1
From: Dongjiu Geng <gengdongjiu@huawei.com>
1
cpu.h has a lot of #defines relating to CPU register fields.
2
Most of these aren't actually used outside target/arm code,
3
so there's no point in cluttering up the cpu.h file with them.
4
Move some easy ones to internals.h.
2
5
3
Add a SIGBUS signal handler. In this handler, it checks the SIGBUS type,
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
translates the host VA delivered by host to guest PA, then fills this PA
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
to guest APEI GHES memory, then notifies guest according to the SIGBUS
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
type.
9
Message-id: 20240301183219.2424889-2-peter.maydell@linaro.org
10
---
11
target/arm/cpu.h | 128 -----------------------------------------
12
target/arm/internals.h | 128 +++++++++++++++++++++++++++++++++++++++++
13
2 files changed, 128 insertions(+), 128 deletions(-)
7
14
8
When guest accesses the poisoned memory, it will generate a Synchronous
9
External Abort(SEA). Then host kernel gets an APEI notification and calls
10
memory_failure() to unmapped the affected page in stage 2, finally
11
returns to guest.
12
13
Guest continues to access the PG_hwpoison page, it will trap to KVM as
14
stage2 fault, then a SIGBUS_MCEERR_AR synchronous signal is delivered to
15
Qemu, Qemu records this error address into guest APEI GHES memory and
16
notifes guest using Synchronous-External-Abort(SEA).
17
18
In order to inject a vSEA, we introduce the kvm_inject_arm_sea() function
19
in which we can setup the type of exception and the syndrome information.
20
When switching to guest, the target vcpu will jump to the synchronous
21
external abort vector table entry.
22
23
The ESR_ELx.DFSC is set to synchronous external abort(0x10), and the
24
ESR_ELx.FnV is set to not valid(0x1), which will tell guest that FAR is
25
not valid and hold an UNKNOWN value. These values will be set to KVM
26
register structures through KVM_SET_ONE_REG IOCTL.
27
28
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
29
Signed-off-by: Xiang Zheng <zhengxiang9@huawei.com>
30
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
31
Acked-by: Xiang Zheng <zhengxiang9@huawei.com>
32
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
33
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
34
Message-id: 20200512030609.19593-10-gengdongjiu@huawei.com
35
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
36
---
37
include/sysemu/kvm.h | 3 +-
38
target/arm/cpu.h | 4 +++
39
target/arm/internals.h | 5 +--
40
target/i386/cpu.h | 2 ++
41
target/arm/helper.c | 2 +-
42
target/arm/kvm64.c | 77 +++++++++++++++++++++++++++++++++++++++++
43
target/arm/tlb_helper.c | 2 +-
44
7 files changed, 89 insertions(+), 6 deletions(-)
45
46
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
47
index XXXXXXX..XXXXXXX 100644
48
--- a/include/sysemu/kvm.h
49
+++ b/include/sysemu/kvm.h
50
@@ -XXX,XX +XXX,XX @@ bool kvm_vcpu_id_is_valid(int vcpu_id);
51
/* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */
52
unsigned long kvm_arch_vcpu_id(CPUState *cpu);
53
54
-#ifdef TARGET_I386
55
-#define KVM_HAVE_MCE_INJECTION 1
56
+#ifdef KVM_HAVE_MCE_INJECTION
57
void kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
58
#endif
59
60
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
15
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
61
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
62
--- a/target/arm/cpu.h
17
--- a/target/arm/cpu.h
63
+++ b/target/arm/cpu.h
18
+++ b/target/arm/cpu.h
64
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@ typedef struct ARMGenericTimer {
65
/* ARM processors have a weak memory model */
20
uint64_t ctl; /* Timer Control register */
66
#define TCG_GUEST_DEFAULT_MO (0)
21
} ARMGenericTimer;
67
22
68
+#ifdef TARGET_AARCH64
23
-#define VTCR_NSW (1u << 29)
69
+#define KVM_HAVE_MCE_INJECTION 1
24
-#define VTCR_NSA (1u << 30)
70
+#endif
25
-#define VSTCR_SW VTCR_NSW
71
+
26
-#define VSTCR_SA VTCR_NSA
72
#define EXCP_UDEF 1 /* undefined instruction */
27
-
73
#define EXCP_SWI 2 /* software interrupt */
28
/* Define a maximum sized vector register.
74
#define EXCP_PREFETCH_ABORT 3
29
* For 32-bit, this is a 128-bit NEON/AdvSIMD register.
30
* For 64-bit, this is a 2048-bit SVE register.
31
@@ -XXX,XX +XXX,XX @@ void pmu_init(ARMCPU *cpu);
32
#define SCTLR_SPINTMASK (1ULL << 62) /* FEAT_NMI */
33
#define SCTLR_TIDCP (1ULL << 63) /* FEAT_TIDCP1 */
34
35
-/* Bit definitions for CPACR (AArch32 only) */
36
-FIELD(CPACR, CP10, 20, 2)
37
-FIELD(CPACR, CP11, 22, 2)
38
-FIELD(CPACR, TRCDIS, 28, 1) /* matches CPACR_EL1.TTA */
39
-FIELD(CPACR, D32DIS, 30, 1) /* up to v7; RAZ in v8 */
40
-FIELD(CPACR, ASEDIS, 31, 1)
41
-
42
-/* Bit definitions for CPACR_EL1 (AArch64 only) */
43
-FIELD(CPACR_EL1, ZEN, 16, 2)
44
-FIELD(CPACR_EL1, FPEN, 20, 2)
45
-FIELD(CPACR_EL1, SMEN, 24, 2)
46
-FIELD(CPACR_EL1, TTA, 28, 1) /* matches CPACR.TRCDIS */
47
-
48
-/* Bit definitions for HCPTR (AArch32 only) */
49
-FIELD(HCPTR, TCP10, 10, 1)
50
-FIELD(HCPTR, TCP11, 11, 1)
51
-FIELD(HCPTR, TASE, 15, 1)
52
-FIELD(HCPTR, TTA, 20, 1)
53
-FIELD(HCPTR, TAM, 30, 1) /* matches CPTR_EL2.TAM */
54
-FIELD(HCPTR, TCPAC, 31, 1) /* matches CPTR_EL2.TCPAC */
55
-
56
-/* Bit definitions for CPTR_EL2 (AArch64 only) */
57
-FIELD(CPTR_EL2, TZ, 8, 1) /* !E2H */
58
-FIELD(CPTR_EL2, TFP, 10, 1) /* !E2H, matches HCPTR.TCP10 */
59
-FIELD(CPTR_EL2, TSM, 12, 1) /* !E2H */
60
-FIELD(CPTR_EL2, ZEN, 16, 2) /* E2H */
61
-FIELD(CPTR_EL2, FPEN, 20, 2) /* E2H */
62
-FIELD(CPTR_EL2, SMEN, 24, 2) /* E2H */
63
-FIELD(CPTR_EL2, TTA, 28, 1)
64
-FIELD(CPTR_EL2, TAM, 30, 1) /* matches HCPTR.TAM */
65
-FIELD(CPTR_EL2, TCPAC, 31, 1) /* matches HCPTR.TCPAC */
66
-
67
-/* Bit definitions for CPTR_EL3 (AArch64 only) */
68
-FIELD(CPTR_EL3, EZ, 8, 1)
69
-FIELD(CPTR_EL3, TFP, 10, 1)
70
-FIELD(CPTR_EL3, ESM, 12, 1)
71
-FIELD(CPTR_EL3, TTA, 20, 1)
72
-FIELD(CPTR_EL3, TAM, 30, 1)
73
-FIELD(CPTR_EL3, TCPAC, 31, 1)
74
-
75
-#define MDCR_MTPME (1U << 28)
76
-#define MDCR_TDCC (1U << 27)
77
-#define MDCR_HLP (1U << 26) /* MDCR_EL2 */
78
-#define MDCR_SCCD (1U << 23) /* MDCR_EL3 */
79
-#define MDCR_HCCD (1U << 23) /* MDCR_EL2 */
80
-#define MDCR_EPMAD (1U << 21)
81
-#define MDCR_EDAD (1U << 20)
82
-#define MDCR_TTRF (1U << 19)
83
-#define MDCR_STE (1U << 18) /* MDCR_EL3 */
84
-#define MDCR_SPME (1U << 17) /* MDCR_EL3 */
85
-#define MDCR_HPMD (1U << 17) /* MDCR_EL2 */
86
-#define MDCR_SDD (1U << 16)
87
-#define MDCR_SPD (3U << 14)
88
-#define MDCR_TDRA (1U << 11)
89
-#define MDCR_TDOSA (1U << 10)
90
-#define MDCR_TDA (1U << 9)
91
-#define MDCR_TDE (1U << 8)
92
-#define MDCR_HPME (1U << 7)
93
-#define MDCR_TPM (1U << 6)
94
-#define MDCR_TPMCR (1U << 5)
95
-#define MDCR_HPMN (0x1fU)
96
-
97
-/* Not all of the MDCR_EL3 bits are present in the 32-bit SDCR */
98
-#define SDCR_VALID_MASK (MDCR_MTPME | MDCR_TDCC | MDCR_SCCD | \
99
- MDCR_EPMAD | MDCR_EDAD | MDCR_TTRF | \
100
- MDCR_STE | MDCR_SPME | MDCR_SPD)
101
-
102
#define CPSR_M (0x1fU)
103
#define CPSR_T (1U << 5)
104
#define CPSR_F (1U << 6)
105
@@ -XXX,XX +XXX,XX @@ FIELD(CPTR_EL3, TCPAC, 31, 1)
106
#define XPSR_NZCV CPSR_NZCV
107
#define XPSR_IT CPSR_IT
108
109
-#define TTBCR_N (7U << 0) /* TTBCR.EAE==0 */
110
-#define TTBCR_T0SZ (7U << 0) /* TTBCR.EAE==1 */
111
-#define TTBCR_PD0 (1U << 4)
112
-#define TTBCR_PD1 (1U << 5)
113
-#define TTBCR_EPD0 (1U << 7)
114
-#define TTBCR_IRGN0 (3U << 8)
115
-#define TTBCR_ORGN0 (3U << 10)
116
-#define TTBCR_SH0 (3U << 12)
117
-#define TTBCR_T1SZ (3U << 16)
118
-#define TTBCR_A1 (1U << 22)
119
-#define TTBCR_EPD1 (1U << 23)
120
-#define TTBCR_IRGN1 (3U << 24)
121
-#define TTBCR_ORGN1 (3U << 26)
122
-#define TTBCR_SH1 (1U << 28)
123
-#define TTBCR_EAE (1U << 31)
124
-
125
-FIELD(VTCR, T0SZ, 0, 6)
126
-FIELD(VTCR, SL0, 6, 2)
127
-FIELD(VTCR, IRGN0, 8, 2)
128
-FIELD(VTCR, ORGN0, 10, 2)
129
-FIELD(VTCR, SH0, 12, 2)
130
-FIELD(VTCR, TG0, 14, 2)
131
-FIELD(VTCR, PS, 16, 3)
132
-FIELD(VTCR, VS, 19, 1)
133
-FIELD(VTCR, HA, 21, 1)
134
-FIELD(VTCR, HD, 22, 1)
135
-FIELD(VTCR, HWU59, 25, 1)
136
-FIELD(VTCR, HWU60, 26, 1)
137
-FIELD(VTCR, HWU61, 27, 1)
138
-FIELD(VTCR, HWU62, 28, 1)
139
-FIELD(VTCR, NSW, 29, 1)
140
-FIELD(VTCR, NSA, 30, 1)
141
-FIELD(VTCR, DS, 32, 1)
142
-FIELD(VTCR, SL2, 33, 1)
143
-
144
/* Bit definitions for ARMv8 SPSR (PSTATE) format.
145
* Only these are valid when in AArch64 mode; in
146
* AArch32 mode SPSRs are basically CPSR-format.
147
@@ -XXX,XX +XXX,XX @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask)
148
#define HCR_TWEDEN (1ULL << 59)
149
#define HCR_TWEDEL MAKE_64BIT_MASK(60, 4)
150
151
-#define HCRX_ENAS0 (1ULL << 0)
152
-#define HCRX_ENALS (1ULL << 1)
153
-#define HCRX_ENASR (1ULL << 2)
154
-#define HCRX_FNXS (1ULL << 3)
155
-#define HCRX_FGTNXS (1ULL << 4)
156
-#define HCRX_SMPME (1ULL << 5)
157
-#define HCRX_TALLINT (1ULL << 6)
158
-#define HCRX_VINMI (1ULL << 7)
159
-#define HCRX_VFNMI (1ULL << 8)
160
-#define HCRX_CMOW (1ULL << 9)
161
-#define HCRX_MCE2 (1ULL << 10)
162
-#define HCRX_MSCEN (1ULL << 11)
163
-
164
-#define HPFAR_NS (1ULL << 63)
165
-
166
#define SCR_NS (1ULL << 0)
167
#define SCR_IRQ (1ULL << 1)
168
#define SCR_FIQ (1ULL << 2)
169
@@ -XXX,XX +XXX,XX @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask)
170
#define SCR_GPF (1ULL << 48)
171
#define SCR_NSE (1ULL << 62)
172
173
-#define HSTR_TTEE (1 << 16)
174
-#define HSTR_TJDBX (1 << 17)
175
-
176
-#define CNTHCTL_CNTVMASK (1 << 18)
177
-#define CNTHCTL_CNTPMASK (1 << 19)
178
-
179
/* Return the current FPSCR value. */
180
uint32_t vfp_get_fpscr(CPUARMState *env);
181
void vfp_set_fpscr(CPUARMState *env, uint32_t val);
75
diff --git a/target/arm/internals.h b/target/arm/internals.h
182
diff --git a/target/arm/internals.h b/target/arm/internals.h
76
index XXXXXXX..XXXXXXX 100644
183
index XXXXXXX..XXXXXXX 100644
77
--- a/target/arm/internals.h
184
--- a/target/arm/internals.h
78
+++ b/target/arm/internals.h
185
+++ b/target/arm/internals.h
79
@@ -XXX,XX +XXX,XX @@ static inline uint32_t syn_insn_abort(int same_el, int ea, int s1ptw, int fsc)
186
@@ -XXX,XX +XXX,XX @@ FIELD(DBGWCR, WT, 20, 1)
80
| ARM_EL_IL | (ea << 9) | (s1ptw << 7) | fsc;
187
FIELD(DBGWCR, MASK, 24, 5)
81
}
188
FIELD(DBGWCR, SSCE, 29, 1)
82
189
83
-static inline uint32_t syn_data_abort_no_iss(int same_el,
190
+#define VTCR_NSW (1u << 29)
84
+static inline uint32_t syn_data_abort_no_iss(int same_el, int fnv,
191
+#define VTCR_NSA (1u << 30)
85
int ea, int cm, int s1ptw,
192
+#define VSTCR_SW VTCR_NSW
86
int wnr, int fsc)
193
+#define VSTCR_SA VTCR_NSA
87
{
194
+
88
return (EC_DATAABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
195
+/* Bit definitions for CPACR (AArch32 only) */
89
| ARM_EL_IL
196
+FIELD(CPACR, CP10, 20, 2)
90
- | (ea << 9) | (cm << 8) | (s1ptw << 7) | (wnr << 6) | fsc;
197
+FIELD(CPACR, CP11, 22, 2)
91
+ | (fnv << 10) | (ea << 9) | (cm << 8) | (s1ptw << 7)
198
+FIELD(CPACR, TRCDIS, 28, 1) /* matches CPACR_EL1.TTA */
92
+ | (wnr << 6) | fsc;
199
+FIELD(CPACR, D32DIS, 30, 1) /* up to v7; RAZ in v8 */
93
}
200
+FIELD(CPACR, ASEDIS, 31, 1)
94
201
+
95
static inline uint32_t syn_data_abort_with_iss(int same_el,
202
+/* Bit definitions for CPACR_EL1 (AArch64 only) */
96
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
203
+FIELD(CPACR_EL1, ZEN, 16, 2)
97
index XXXXXXX..XXXXXXX 100644
204
+FIELD(CPACR_EL1, FPEN, 20, 2)
98
--- a/target/i386/cpu.h
205
+FIELD(CPACR_EL1, SMEN, 24, 2)
99
+++ b/target/i386/cpu.h
206
+FIELD(CPACR_EL1, TTA, 28, 1) /* matches CPACR.TRCDIS */
100
@@ -XXX,XX +XXX,XX @@
207
+
101
/* The x86 has a strong memory model with some store-after-load re-ordering */
208
+/* Bit definitions for HCPTR (AArch32 only) */
102
#define TCG_GUEST_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
209
+FIELD(HCPTR, TCP10, 10, 1)
103
210
+FIELD(HCPTR, TCP11, 11, 1)
104
+#define KVM_HAVE_MCE_INJECTION 1
211
+FIELD(HCPTR, TASE, 15, 1)
105
+
212
+FIELD(HCPTR, TTA, 20, 1)
106
/* Maximum instruction code size */
213
+FIELD(HCPTR, TAM, 30, 1) /* matches CPTR_EL2.TAM */
107
#define TARGET_MAX_INSN_SIZE 16
214
+FIELD(HCPTR, TCPAC, 31, 1) /* matches CPTR_EL2.TCPAC */
108
215
+
109
diff --git a/target/arm/helper.c b/target/arm/helper.c
216
+/* Bit definitions for CPTR_EL2 (AArch64 only) */
110
index XXXXXXX..XXXXXXX 100644
217
+FIELD(CPTR_EL2, TZ, 8, 1) /* !E2H */
111
--- a/target/arm/helper.c
218
+FIELD(CPTR_EL2, TFP, 10, 1) /* !E2H, matches HCPTR.TCP10 */
112
+++ b/target/arm/helper.c
219
+FIELD(CPTR_EL2, TSM, 12, 1) /* !E2H */
113
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value,
220
+FIELD(CPTR_EL2, ZEN, 16, 2) /* E2H */
114
* Report exception with ESR indicating a fault due to a
221
+FIELD(CPTR_EL2, FPEN, 20, 2) /* E2H */
115
* translation table walk for a cache maintenance instruction.
222
+FIELD(CPTR_EL2, SMEN, 24, 2) /* E2H */
116
*/
223
+FIELD(CPTR_EL2, TTA, 28, 1)
117
- syn = syn_data_abort_no_iss(current_el == target_el,
224
+FIELD(CPTR_EL2, TAM, 30, 1) /* matches HCPTR.TAM */
118
+ syn = syn_data_abort_no_iss(current_el == target_el, 0,
225
+FIELD(CPTR_EL2, TCPAC, 31, 1) /* matches HCPTR.TCPAC */
119
fi.ea, 1, fi.s1ptw, 1, fsc);
226
+
120
env->exception.vaddress = value;
227
+/* Bit definitions for CPTR_EL3 (AArch64 only) */
121
env->exception.fsr = fsr;
228
+FIELD(CPTR_EL3, EZ, 8, 1)
122
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
229
+FIELD(CPTR_EL3, TFP, 10, 1)
123
index XXXXXXX..XXXXXXX 100644
230
+FIELD(CPTR_EL3, ESM, 12, 1)
124
--- a/target/arm/kvm64.c
231
+FIELD(CPTR_EL3, TTA, 20, 1)
125
+++ b/target/arm/kvm64.c
232
+FIELD(CPTR_EL3, TAM, 30, 1)
126
@@ -XXX,XX +XXX,XX @@
233
+FIELD(CPTR_EL3, TCPAC, 31, 1)
127
#include "sysemu/kvm_int.h"
234
+
128
#include "kvm_arm.h"
235
+#define MDCR_MTPME (1U << 28)
129
#include "internals.h"
236
+#define MDCR_TDCC (1U << 27)
130
+#include "hw/acpi/acpi.h"
237
+#define MDCR_HLP (1U << 26) /* MDCR_EL2 */
131
+#include "hw/acpi/ghes.h"
238
+#define MDCR_SCCD (1U << 23) /* MDCR_EL3 */
132
+#include "hw/arm/virt.h"
239
+#define MDCR_HCCD (1U << 23) /* MDCR_EL2 */
133
240
+#define MDCR_EPMAD (1U << 21)
134
static bool have_guest_debug;
241
+#define MDCR_EDAD (1U << 20)
135
242
+#define MDCR_TTRF (1U << 19)
136
@@ -XXX,XX +XXX,XX @@ int kvm_arm_cpreg_level(uint64_t regidx)
243
+#define MDCR_STE (1U << 18) /* MDCR_EL3 */
137
return KVM_PUT_RUNTIME_STATE;
244
+#define MDCR_SPME (1U << 17) /* MDCR_EL3 */
138
}
245
+#define MDCR_HPMD (1U << 17) /* MDCR_EL2 */
139
246
+#define MDCR_SDD (1U << 16)
140
+/* Callers must hold the iothread mutex lock */
247
+#define MDCR_SPD (3U << 14)
141
+static void kvm_inject_arm_sea(CPUState *c)
248
+#define MDCR_TDRA (1U << 11)
142
+{
249
+#define MDCR_TDOSA (1U << 10)
143
+ ARMCPU *cpu = ARM_CPU(c);
250
+#define MDCR_TDA (1U << 9)
144
+ CPUARMState *env = &cpu->env;
251
+#define MDCR_TDE (1U << 8)
145
+ CPUClass *cc = CPU_GET_CLASS(c);
252
+#define MDCR_HPME (1U << 7)
146
+ uint32_t esr;
253
+#define MDCR_TPM (1U << 6)
147
+ bool same_el;
254
+#define MDCR_TPMCR (1U << 5)
148
+
255
+#define MDCR_HPMN (0x1fU)
149
+ c->exception_index = EXCP_DATA_ABORT;
256
+
150
+ env->exception.target_el = 1;
257
+/* Not all of the MDCR_EL3 bits are present in the 32-bit SDCR */
151
+
258
+#define SDCR_VALID_MASK (MDCR_MTPME | MDCR_TDCC | MDCR_SCCD | \
152
+ /*
259
+ MDCR_EPMAD | MDCR_EDAD | MDCR_TTRF | \
153
+ * Set the DFSC to synchronous external abort and set FnV to not valid,
260
+ MDCR_STE | MDCR_SPME | MDCR_SPD)
154
+ * this will tell guest the FAR_ELx is UNKNOWN for this abort.
261
+
155
+ */
262
+#define TTBCR_N (7U << 0) /* TTBCR.EAE==0 */
156
+ same_el = arm_current_el(env) == env->exception.target_el;
263
+#define TTBCR_T0SZ (7U << 0) /* TTBCR.EAE==1 */
157
+ esr = syn_data_abort_no_iss(same_el, 1, 0, 0, 0, 0, 0x10);
264
+#define TTBCR_PD0 (1U << 4)
158
+
265
+#define TTBCR_PD1 (1U << 5)
159
+ env->exception.syndrome = esr;
266
+#define TTBCR_EPD0 (1U << 7)
160
+
267
+#define TTBCR_IRGN0 (3U << 8)
161
+ cc->do_interrupt(c);
268
+#define TTBCR_ORGN0 (3U << 10)
162
+}
269
+#define TTBCR_SH0 (3U << 12)
163
+
270
+#define TTBCR_T1SZ (3U << 16)
164
#define AARCH64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
271
+#define TTBCR_A1 (1U << 22)
165
KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
272
+#define TTBCR_EPD1 (1U << 23)
166
273
+#define TTBCR_IRGN1 (3U << 24)
167
@@ -XXX,XX +XXX,XX @@ int kvm_arch_get_registers(CPUState *cs)
274
+#define TTBCR_ORGN1 (3U << 26)
168
return ret;
275
+#define TTBCR_SH1 (1U << 28)
169
}
276
+#define TTBCR_EAE (1U << 31)
170
277
+
171
+void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
278
+FIELD(VTCR, T0SZ, 0, 6)
172
+{
279
+FIELD(VTCR, SL0, 6, 2)
173
+ ram_addr_t ram_addr;
280
+FIELD(VTCR, IRGN0, 8, 2)
174
+ hwaddr paddr;
281
+FIELD(VTCR, ORGN0, 10, 2)
175
+ Object *obj = qdev_get_machine();
282
+FIELD(VTCR, SH0, 12, 2)
176
+ VirtMachineState *vms = VIRT_MACHINE(obj);
283
+FIELD(VTCR, TG0, 14, 2)
177
+ bool acpi_enabled = virt_is_acpi_enabled(vms);
284
+FIELD(VTCR, PS, 16, 3)
178
+
285
+FIELD(VTCR, VS, 19, 1)
179
+ assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
286
+FIELD(VTCR, HA, 21, 1)
180
+
287
+FIELD(VTCR, HD, 22, 1)
181
+ if (acpi_enabled && addr &&
288
+FIELD(VTCR, HWU59, 25, 1)
182
+ object_property_get_bool(obj, "ras", NULL)) {
289
+FIELD(VTCR, HWU60, 26, 1)
183
+ ram_addr = qemu_ram_addr_from_host(addr);
290
+FIELD(VTCR, HWU61, 27, 1)
184
+ if (ram_addr != RAM_ADDR_INVALID &&
291
+FIELD(VTCR, HWU62, 28, 1)
185
+ kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
292
+FIELD(VTCR, NSW, 29, 1)
186
+ kvm_hwpoison_page_add(ram_addr);
293
+FIELD(VTCR, NSA, 30, 1)
187
+ /*
294
+FIELD(VTCR, DS, 32, 1)
188
+ * If this is a BUS_MCEERR_AR, we know we have been called
295
+FIELD(VTCR, SL2, 33, 1)
189
+ * synchronously from the vCPU thread, so we can easily
296
+
190
+ * synchronize the state and inject an error.
297
+#define HCRX_ENAS0 (1ULL << 0)
191
+ *
298
+#define HCRX_ENALS (1ULL << 1)
192
+ * TODO: we currently don't tell the guest at all about
299
+#define HCRX_ENASR (1ULL << 2)
193
+ * BUS_MCEERR_AO. In that case we might either be being
300
+#define HCRX_FNXS (1ULL << 3)
194
+ * called synchronously from the vCPU thread, or a bit
301
+#define HCRX_FGTNXS (1ULL << 4)
195
+ * later from the main thread, so doing the injection of
302
+#define HCRX_SMPME (1ULL << 5)
196
+ * the error would be more complicated.
303
+#define HCRX_TALLINT (1ULL << 6)
197
+ */
304
+#define HCRX_VINMI (1ULL << 7)
198
+ if (code == BUS_MCEERR_AR) {
305
+#define HCRX_VFNMI (1ULL << 8)
199
+ kvm_cpu_synchronize_state(c);
306
+#define HCRX_CMOW (1ULL << 9)
200
+ if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) {
307
+#define HCRX_MCE2 (1ULL << 10)
201
+ kvm_inject_arm_sea(c);
308
+#define HCRX_MSCEN (1ULL << 11)
202
+ } else {
309
+
203
+ error_report("failed to record the error");
310
+#define HPFAR_NS (1ULL << 63)
204
+ abort();
311
+
205
+ }
312
+#define HSTR_TTEE (1 << 16)
206
+ }
313
+#define HSTR_TJDBX (1 << 17)
207
+ return;
314
+
208
+ }
315
+#define CNTHCTL_CNTVMASK (1 << 18)
209
+ if (code == BUS_MCEERR_AO) {
316
+#define CNTHCTL_CNTPMASK (1 << 19)
210
+ error_report("Hardware memory error at addr %p for memory used by "
317
+
211
+ "QEMU itself instead of guest system!", addr);
318
/* We use a few fake FSR values for internal purposes in M profile.
212
+ }
319
* M profile cores don't have A/R format FSRs, but currently our
213
+ }
320
* get_phys_addr() code assumes A/R profile and reports failures via
214
+
215
+ if (code == BUS_MCEERR_AR) {
216
+ error_report("Hardware memory error!");
217
+ exit(1);
218
+ }
219
+}
220
+
221
/* C6.6.29 BRK instruction */
222
static const uint32_t brk_insn = 0xd4200000;
223
224
diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c
225
index XXXXXXX..XXXXXXX 100644
226
--- a/target/arm/tlb_helper.c
227
+++ b/target/arm/tlb_helper.c
228
@@ -XXX,XX +XXX,XX @@ static inline uint32_t merge_syn_data_abort(uint32_t template_syn,
229
* ISV field.
230
*/
231
if (!(template_syn & ARM_EL_ISV) || target_el != 2 || s1ptw) {
232
- syn = syn_data_abort_no_iss(same_el,
233
+ syn = syn_data_abort_no_iss(same_el, 0,
234
ea, 0, s1ptw, is_write, fsc);
235
} else {
236
/*
237
--
321
--
238
2.20.1
322
2.34.1
239
323
240
324
diff view generated by jsdifflib
1
The usual location for the env argument in the argument list of a TCG helper
1
The timer _EL02 registers should UNDEF for invalid accesses from EL2
2
is immediately after the return-value argument. recps_f32 and rsqrts_f32
2
or EL3 when HCR_EL2.E2H == 0, not take a cp access trap. We were
3
differ in that they put it at the end.
3
delivering the exception to EL2 with the wrong syndrome.
4
5
Move the env argument to its usual place; this will allow us to
6
more easily use these helper functions with the gvec APIs.
7
4
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-id: 20200512163904.10918-16-peter.maydell@linaro.org
7
Message-id: 20240301183219.2424889-3-peter.maydell@linaro.org
11
---
8
---
12
target/arm/helper.h | 4 ++--
9
target/arm/helper.c | 2 +-
13
target/arm/translate.c | 4 ++--
10
1 file changed, 1 insertion(+), 1 deletion(-)
14
target/arm/vfp_helper.c | 4 ++--
15
3 files changed, 6 insertions(+), 6 deletions(-)
16
11
17
diff --git a/target/arm/helper.h b/target/arm/helper.h
12
diff --git a/target/arm/helper.c b/target/arm/helper.c
18
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper.h
14
--- a/target/arm/helper.c
20
+++ b/target/arm/helper.h
15
+++ b/target/arm/helper.c
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(vfp_fcvt_f64_to_f16, TCG_CALL_NO_RWG, f16, f64, ptr, i32)
16
@@ -XXX,XX +XXX,XX @@ static CPAccessResult e2h_access(CPUARMState *env, const ARMCPRegInfo *ri,
22
DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr)
17
return CP_ACCESS_OK;
23
DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr)
18
}
24
19
if (!(arm_hcr_el2_eff(env) & HCR_E2H)) {
25
-DEF_HELPER_3(recps_f32, f32, f32, f32, env)
20
- return CP_ACCESS_TRAP;
26
-DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
21
+ return CP_ACCESS_TRAP_UNCATEGORIZED;
27
+DEF_HELPER_3(recps_f32, f32, env, f32, f32)
22
}
28
+DEF_HELPER_3(rsqrts_f32, f32, env, f32, f32)
23
return CP_ACCESS_OK;
29
DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
30
DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
31
DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
32
diff --git a/target/arm/translate.c b/target/arm/translate.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/translate.c
35
+++ b/target/arm/translate.c
36
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
37
tcg_temp_free_ptr(fpstatus);
38
} else {
39
if (size == 0) {
40
- gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
41
+ gen_helper_recps_f32(tmp, cpu_env, tmp, tmp2);
42
} else {
43
- gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
44
+ gen_helper_rsqrts_f32(tmp, cpu_env, tmp, tmp2);
45
}
46
}
47
break;
48
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/arm/vfp_helper.c
51
+++ b/target/arm/vfp_helper.c
52
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
53
#define float32_three make_float32(0x40400000)
54
#define float32_one_point_five make_float32(0x3fc00000)
55
56
-float32 HELPER(recps_f32)(float32 a, float32 b, CPUARMState *env)
57
+float32 HELPER(recps_f32)(CPUARMState *env, float32 a, float32 b)
58
{
59
float_status *s = &env->vfp.standard_fp_status;
60
if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
61
@@ -XXX,XX +XXX,XX @@ float32 HELPER(recps_f32)(float32 a, float32 b, CPUARMState *env)
62
return float32_sub(float32_two, float32_mul(a, b, s), s);
63
}
24
}
64
65
-float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUARMState *env)
66
+float32 HELPER(rsqrts_f32)(CPUARMState *env, float32 a, float32 b)
67
{
68
float_status *s = &env->vfp.standard_fp_status;
69
float32 product;
70
--
25
--
71
2.20.1
26
2.34.1
72
73
diff view generated by jsdifflib
1
Convert the Neon integer 3-reg-same compare insns VCGE, VCGT,
1
We prefer the FIELD macro over ad-hoc #defines for register bits;
2
VCEQ, VACGE and VACGT to decodetree.
2
switch CNTHCTL to that style before we add any more bits.
3
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20200512163904.10918-15-peter.maydell@linaro.org
7
Message-id: 20240301183219.2424889-4-peter.maydell@linaro.org
7
---
8
---
8
target/arm/neon-dp.decode | 5 +++++
9
target/arm/internals.h | 27 +++++++++++++++++++++++++--
9
target/arm/translate-neon.inc.c | 6 +++++
10
target/arm/helper.c | 9 ++++-----
10
target/arm/translate.c | 39 ++-------------------------------
11
2 files changed, 29 insertions(+), 7 deletions(-)
11
3 files changed, 13 insertions(+), 37 deletions(-)
12
12
13
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
13
diff --git a/target/arm/internals.h b/target/arm/internals.h
14
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/neon-dp.decode
15
--- a/target/arm/internals.h
16
+++ b/target/arm/neon-dp.decode
16
+++ b/target/arm/internals.h
17
@@ -XXX,XX +XXX,XX @@ VABD_fp_3s 1111 001 1 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
17
@@ -XXX,XX +XXX,XX @@ FIELD(VTCR, SL2, 33, 1)
18
VMLA_fp_3s 1111 001 0 0 . 0 . .... .... 1101 ... 1 .... @3same_fp
18
#define HSTR_TTEE (1 << 16)
19
VMLS_fp_3s 1111 001 0 0 . 1 . .... .... 1101 ... 1 .... @3same_fp
19
#define HSTR_TJDBX (1 << 17)
20
VMUL_fp_3s 1111 001 1 0 . 0 . .... .... 1101 ... 1 .... @3same_fp
20
21
+VCEQ_fp_3s 1111 001 0 0 . 0 . .... .... 1110 ... 0 .... @3same_fp
21
-#define CNTHCTL_CNTVMASK (1 << 18)
22
+VCGE_fp_3s 1111 001 1 0 . 0 . .... .... 1110 ... 0 .... @3same_fp
22
-#define CNTHCTL_CNTPMASK (1 << 19)
23
+VACGE_fp_3s 1111 001 1 0 . 0 . .... .... 1110 ... 1 .... @3same_fp
23
+/*
24
+VCGT_fp_3s 1111 001 1 0 . 1 . .... .... 1110 ... 0 .... @3same_fp
24
+ * Depending on the value of HCR_EL2.E2H, bits 0 and 1
25
+VACGT_fp_3s 1111 001 1 0 . 1 . .... .... 1110 ... 1 .... @3same_fp
25
+ * have different bit definitions, and EL1PCTEN might be
26
VPMAX_fp_3s 1111 001 1 0 . 0 . .... .... 1111 ... 0 .... @3same_fp_q0
26
+ * bit 0 or bit 10. We use _E2H1 and _E2H0 suffixes to
27
VPMIN_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 0 .... @3same_fp_q0
27
+ * disambiguate if necessary.
28
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
28
+ */
29
+FIELD(CNTHCTL, EL0PCTEN_E2H1, 0, 1)
30
+FIELD(CNTHCTL, EL0VCTEN_E2H1, 1, 1)
31
+FIELD(CNTHCTL, EL1PCTEN_E2H0, 0, 1)
32
+FIELD(CNTHCTL, EL1PCEN_E2H0, 1, 1)
33
+FIELD(CNTHCTL, EVNTEN, 2, 1)
34
+FIELD(CNTHCTL, EVNTDIR, 3, 1)
35
+FIELD(CNTHCTL, EVNTI, 4, 4)
36
+FIELD(CNTHCTL, EL0VTEN, 8, 1)
37
+FIELD(CNTHCTL, EL0PTEN, 9, 1)
38
+FIELD(CNTHCTL, EL1PCTEN_E2H1, 10, 1)
39
+FIELD(CNTHCTL, EL1PTEN, 11, 1)
40
+FIELD(CNTHCTL, ECV, 12, 1)
41
+FIELD(CNTHCTL, EL1TVT, 13, 1)
42
+FIELD(CNTHCTL, EL1TVCT, 14, 1)
43
+FIELD(CNTHCTL, EL1NVPCT, 15, 1)
44
+FIELD(CNTHCTL, EL1NVVCT, 16, 1)
45
+FIELD(CNTHCTL, EVNTIS, 17, 1)
46
+FIELD(CNTHCTL, CNTVMASK, 18, 1)
47
+FIELD(CNTHCTL, CNTPMASK, 19, 1)
48
49
/* We use a few fake FSR values for internal purposes in M profile.
50
* M profile cores don't have A/R format FSRs, but currently our
51
diff --git a/target/arm/helper.c b/target/arm/helper.c
29
index XXXXXXX..XXXXXXX 100644
52
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/translate-neon.inc.c
53
--- a/target/arm/helper.c
31
+++ b/target/arm/translate-neon.inc.c
54
+++ b/target/arm/helper.c
32
@@ -XXX,XX +XXX,XX @@ DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s)
55
@@ -XXX,XX +XXX,XX @@ static void gt_update_irq(ARMCPU *cpu, int timeridx)
33
return do_3same_fp(s, a, FUNC, READS_VD); \
56
* It is RES0 in Secure and NonSecure state.
57
*/
58
if ((ss == ARMSS_Root || ss == ARMSS_Realm) &&
59
- ((timeridx == GTIMER_VIRT && (cnthctl & CNTHCTL_CNTVMASK)) ||
60
- (timeridx == GTIMER_PHYS && (cnthctl & CNTHCTL_CNTPMASK)))) {
61
+ ((timeridx == GTIMER_VIRT && (cnthctl & R_CNTHCTL_CNTVMASK_MASK)) ||
62
+ (timeridx == GTIMER_PHYS && (cnthctl & R_CNTHCTL_CNTPMASK_MASK)))) {
63
irqstate = 0;
34
}
64
}
35
65
36
+DO_3S_FP(VCEQ, gen_helper_neon_ceq_f32, false)
66
@@ -XXX,XX +XXX,XX @@ static void gt_cnthctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
37
+DO_3S_FP(VCGE, gen_helper_neon_cge_f32, false)
38
+DO_3S_FP(VCGT, gen_helper_neon_cgt_f32, false)
39
+DO_3S_FP(VACGE, gen_helper_neon_acge_f32, false)
40
+DO_3S_FP(VACGT, gen_helper_neon_acgt_f32, false)
41
+
42
static void gen_VMLA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
43
TCGv_ptr fpstatus)
44
{
67
{
45
diff --git a/target/arm/translate.c b/target/arm/translate.c
68
ARMCPU *cpu = env_archcpu(env);
46
index XXXXXXX..XXXXXXX 100644
69
uint32_t oldval = env->cp15.cnthctl_el2;
47
--- a/target/arm/translate.c
70
-
48
+++ b/target/arm/translate.c
71
raw_write(env, ri, value);
49
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
72
50
case NEON_3R_VQDMULH_VQRDMULH:
73
- if ((oldval ^ value) & CNTHCTL_CNTVMASK) {
51
case NEON_3R_FLOAT_ARITH:
74
+ if ((oldval ^ value) & R_CNTHCTL_CNTVMASK_MASK) {
52
case NEON_3R_FLOAT_MULTIPLY:
75
gt_update_irq(cpu, GTIMER_VIRT);
53
+ case NEON_3R_FLOAT_CMP:
76
- } else if ((oldval ^ value) & CNTHCTL_CNTPMASK) {
54
+ case NEON_3R_FLOAT_ACMP:
77
+ } else if ((oldval ^ value) & R_CNTHCTL_CNTPMASK_MASK) {
55
/* Already handled by decodetree */
78
gt_update_irq(cpu, GTIMER_PHYS);
56
return 1;
79
}
57
}
80
}
58
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
59
return 1; /* VPMIN/VPMAX handled by decodetree */
60
}
61
break;
62
- case NEON_3R_FLOAT_CMP:
63
- if (!u && size) {
64
- /* no encoding for U=0 C=1x */
65
- return 1;
66
- }
67
- break;
68
- case NEON_3R_FLOAT_ACMP:
69
- if (!u) {
70
- return 1;
71
- }
72
- break;
73
case NEON_3R_FLOAT_MISC:
74
/* VMAXNM/VMINNM in ARMv8 */
75
if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
76
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
77
tmp = neon_load_reg(rn, pass);
78
tmp2 = neon_load_reg(rm, pass);
79
switch (op) {
80
- case NEON_3R_FLOAT_CMP:
81
- {
82
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
83
- if (!u) {
84
- gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
85
- } else {
86
- if (size == 0) {
87
- gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
88
- } else {
89
- gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
90
- }
91
- }
92
- tcg_temp_free_ptr(fpstatus);
93
- break;
94
- }
95
- case NEON_3R_FLOAT_ACMP:
96
- {
97
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
98
- if (size == 0) {
99
- gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
100
- } else {
101
- gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
102
- }
103
- tcg_temp_free_ptr(fpstatus);
104
- break;
105
- }
106
case NEON_3R_FLOAT_MINMAX:
107
{
108
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
109
--
81
--
110
2.20.1
82
2.34.1
111
83
112
84
diff view generated by jsdifflib
1
Convert the Neon integer VMUL, VMLA, and VMLS 3-reg-same inssn to
1
Don't allow the guest to write CNTHCTL_EL2 bits which don't exist.
2
decodetree.
2
This is not strictly architecturally required, but it is how we've
3
tended to implement registers more recently.
3
4
4
We don't have a gvec helper for multiply-accumulate, so VMLA and VMLS
5
In particular, bits [19:18] are only present with FEAT_RME,
5
need a loop function do_3same_fp(). This takes a reads_vd parameter
6
and bits [17:12] will only be present with FEAT_ECV.
6
to do_3same_fp() which tells it to load the old value into vd before
7
calling the callback function, in the same way that the do_vfp_3op_sp()
8
and do_vfp_3op_dp() functions in translate-vfp.inc.c work. (The
9
only uses in this patch pass reads_vd == true, but later commits
10
will use reads_vd == false.)
11
12
This conversion fixes in passing an underdecoding for VMUL
13
(originally reported by Fredrik Strupe <fredrik@strupe.net>): bit 1
14
of the 'size' field must be 0. The old decoder didn't enforce this,
15
but the decodetree pattern does.
16
17
The gen_VMLA_fp_reg() function performs the addition operation
18
with the operands in the opposite order to the old decoder:
19
since Neon sets 'default NaN mode' float32_add operations are
20
commutative so there is no behaviour difference, but putting
21
them this way around matches the Arm ARM pseudocode and the
22
required operation order for the subtraction in gen_VMLS_fp_reg().
23
7
24
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
25
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
26
Message-id: 20200512163904.10918-14-peter.maydell@linaro.org
10
Message-id: 20240301183219.2424889-5-peter.maydell@linaro.org
27
---
11
---
28
target/arm/neon-dp.decode | 3 ++
12
target/arm/helper.c | 18 ++++++++++++++++++
29
target/arm/translate-neon.inc.c | 81 +++++++++++++++++++++++++++++++++
13
1 file changed, 18 insertions(+)
30
target/arm/translate.c | 17 +------
31
3 files changed, 85 insertions(+), 16 deletions(-)
32
14
33
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
diff --git a/target/arm/helper.c b/target/arm/helper.c
34
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
35
--- a/target/arm/neon-dp.decode
17
--- a/target/arm/helper.c
36
+++ b/target/arm/neon-dp.decode
18
+++ b/target/arm/helper.c
37
@@ -XXX,XX +XXX,XX @@ VADD_fp_3s 1111 001 0 0 . 0 . .... .... 1101 ... 0 .... @3same_fp
19
@@ -XXX,XX +XXX,XX @@ static void gt_cnthctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
38
VSUB_fp_3s 1111 001 0 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
20
{
39
VPADD_fp_3s 1111 001 1 0 . 0 . .... .... 1101 ... 0 .... @3same_fp_q0
21
ARMCPU *cpu = env_archcpu(env);
40
VABD_fp_3s 1111 001 1 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
22
uint32_t oldval = env->cp15.cnthctl_el2;
41
+VMLA_fp_3s 1111 001 0 0 . 0 . .... .... 1101 ... 1 .... @3same_fp
23
+ uint32_t valid_mask =
42
+VMLS_fp_3s 1111 001 0 0 . 1 . .... .... 1101 ... 1 .... @3same_fp
24
+ R_CNTHCTL_EL0PCTEN_E2H1_MASK |
43
+VMUL_fp_3s 1111 001 1 0 . 0 . .... .... 1101 ... 1 .... @3same_fp
25
+ R_CNTHCTL_EL0VCTEN_E2H1_MASK |
44
VPMAX_fp_3s 1111 001 1 0 . 0 . .... .... 1111 ... 0 .... @3same_fp_q0
26
+ R_CNTHCTL_EVNTEN_MASK |
45
VPMIN_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 0 .... @3same_fp_q0
27
+ R_CNTHCTL_EVNTDIR_MASK |
46
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
28
+ R_CNTHCTL_EVNTI_MASK |
47
index XXXXXXX..XXXXXXX 100644
29
+ R_CNTHCTL_EL0VTEN_MASK |
48
--- a/target/arm/translate-neon.inc.c
30
+ R_CNTHCTL_EL0PTEN_MASK |
49
+++ b/target/arm/translate-neon.inc.c
31
+ R_CNTHCTL_EL1PCTEN_E2H1_MASK |
50
@@ -XXX,XX +XXX,XX @@ DO_3SAME_PAIR(VPADD, padd_u)
32
+ R_CNTHCTL_EL1PTEN_MASK;
51
DO_3SAME_VQDMULH(VQDMULH, qdmulh)
52
DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
53
54
+static bool do_3same_fp(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn,
55
+ bool reads_vd)
56
+{
57
+ /*
58
+ * FP operations handled elementwise 32 bits at a time.
59
+ * If reads_vd is true then the old value of Vd will be
60
+ * loaded before calling the callback function. This is
61
+ * used for multiply-accumulate type operations.
62
+ */
63
+ TCGv_i32 tmp, tmp2;
64
+ int pass;
65
+
33
+
66
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
34
+ if (cpu_isar_feature(aa64_rme, cpu)) {
67
+ return false;
35
+ valid_mask |= R_CNTHCTL_CNTVMASK_MASK | R_CNTHCTL_CNTPMASK_MASK;
68
+ }
36
+ }
69
+
37
+
70
+ /* UNDEF accesses to D16-D31 if they don't exist. */
38
+ /* Clear RES0 bits */
71
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
39
+ value &= valid_mask;
72
+ ((a->vd | a->vn | a->vm) & 0x10)) {
73
+ return false;
74
+ }
75
+
40
+
76
+ if ((a->vn | a->vm | a->vd) & a->q) {
41
raw_write(env, ri, value);
77
+ return false;
42
78
+ }
43
if ((oldval ^ value) & R_CNTHCTL_CNTVMASK_MASK) {
79
+
80
+ if (!vfp_access_check(s)) {
81
+ return true;
82
+ }
83
+
84
+ TCGv_ptr fpstatus = get_fpstatus_ptr(1);
85
+ for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
86
+ tmp = neon_load_reg(a->vn, pass);
87
+ tmp2 = neon_load_reg(a->vm, pass);
88
+ if (reads_vd) {
89
+ TCGv_i32 tmp_rd = neon_load_reg(a->vd, pass);
90
+ fn(tmp_rd, tmp, tmp2, fpstatus);
91
+ neon_store_reg(a->vd, pass, tmp_rd);
92
+ tcg_temp_free_i32(tmp);
93
+ } else {
94
+ fn(tmp, tmp, tmp2, fpstatus);
95
+ neon_store_reg(a->vd, pass, tmp);
96
+ }
97
+ tcg_temp_free_i32(tmp2);
98
+ }
99
+ tcg_temp_free_ptr(fpstatus);
100
+ return true;
101
+}
102
+
103
/*
104
* For all the functions using this macro, size == 1 means fp16,
105
* which is an architecture extension we don't implement yet.
106
@@ -XXX,XX +XXX,XX @@ DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
107
DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s)
108
DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s)
109
DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s)
110
+DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s)
111
+
112
+/*
113
+ * For all the functions using this macro, size == 1 means fp16,
114
+ * which is an architecture extension we don't implement yet.
115
+ */
116
+#define DO_3S_FP(INSN,FUNC,READS_VD) \
117
+ static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
118
+ { \
119
+ if (a->size != 0) { \
120
+ /* TODO fp16 support */ \
121
+ return false; \
122
+ } \
123
+ return do_3same_fp(s, a, FUNC, READS_VD); \
124
+ }
125
+
126
+static void gen_VMLA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
127
+ TCGv_ptr fpstatus)
128
+{
129
+ gen_helper_vfp_muls(vn, vn, vm, fpstatus);
130
+ gen_helper_vfp_adds(vd, vd, vn, fpstatus);
131
+}
132
+
133
+static void gen_VMLS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
134
+ TCGv_ptr fpstatus)
135
+{
136
+ gen_helper_vfp_muls(vn, vn, vm, fpstatus);
137
+ gen_helper_vfp_subs(vd, vd, vn, fpstatus);
138
+}
139
+
140
+DO_3S_FP(VMLA, gen_VMLA_fp_3s, true)
141
+DO_3S_FP(VMLS, gen_VMLS_fp_3s, true)
142
143
static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
144
{
145
diff --git a/target/arm/translate.c b/target/arm/translate.c
146
index XXXXXXX..XXXXXXX 100644
147
--- a/target/arm/translate.c
148
+++ b/target/arm/translate.c
149
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
150
case NEON_3R_VPADD_VQRDMLAH:
151
case NEON_3R_VQDMULH_VQRDMULH:
152
case NEON_3R_FLOAT_ARITH:
153
+ case NEON_3R_FLOAT_MULTIPLY:
154
/* Already handled by decodetree */
155
return 1;
156
}
157
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
158
tmp = neon_load_reg(rn, pass);
159
tmp2 = neon_load_reg(rm, pass);
160
switch (op) {
161
- case NEON_3R_FLOAT_MULTIPLY:
162
- {
163
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
164
- gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
165
- if (!u) {
166
- tcg_temp_free_i32(tmp2);
167
- tmp2 = neon_load_reg(rd, pass);
168
- if (size == 0) {
169
- gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
170
- } else {
171
- gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
172
- }
173
- }
174
- tcg_temp_free_ptr(fpstatus);
175
- break;
176
- }
177
case NEON_3R_FLOAT_CMP:
178
{
179
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
180
--
44
--
181
2.20.1
45
2.34.1
182
183
diff view generated by jsdifflib
1
Convert the Neon floating point VFMA and VFMS insn to decodetree.
1
The functionality defined by ID_AA64MMFR0_EL1.ECV == 1 is:
2
These are the last insns in the 3-reg-same group so we can
2
* four new trap bits for various counter and timer registers
3
remove all the support/loop code from the old decoder.
3
* the CNTHCTL_EL2.EVNTIS and CNTKCTL_EL1.EVNTIS bits which control
4
scaling of the event stream. This is a no-op for us, because we don't
5
implement the event stream (our WFE is a NOP): all we need to do is
6
allow CNTHCTL_EL2.ENVTIS to be read and written.
7
* extensions to PMSCR_EL1.PCT, PMSCR_EL2.PCT, TRFCR_EL1.TS and
8
TRFCR_EL2.TS: these are all no-ops for us, because we don't implement
9
FEAT_SPE or FEAT_TRF.
10
* new registers CNTPCTSS_EL0 and NCTVCTSS_EL0 which are
11
"self-sychronizing" views of the CNTPCT_EL0 and CNTVCT_EL0, meaning
12
that no barriers are needed around their accesses. For us these
13
are just the same as the normal views, because all our sysregs are
14
inherently self-sychronizing.
15
16
In this commit we implement the trap handling and permit the new
17
CNTHCTL_EL2 bits to be written.
4
18
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
19
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
20
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20200512163904.10918-18-peter.maydell@linaro.org
21
Message-id: 20240301183219.2424889-6-peter.maydell@linaro.org
8
---
22
---
9
target/arm/neon-dp.decode | 3 +
23
target/arm/cpu-features.h | 5 ++++
10
target/arm/translate-neon.inc.c | 41 ++++++++
24
target/arm/helper.c | 51 +++++++++++++++++++++++++++++++++++----
11
target/arm/translate.c | 176 +-------------------------------
25
2 files changed, 51 insertions(+), 5 deletions(-)
12
3 files changed, 46 insertions(+), 174 deletions(-)
13
26
14
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
27
diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
15
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/neon-dp.decode
29
--- a/target/arm/cpu-features.h
17
+++ b/target/arm/neon-dp.decode
30
+++ b/target/arm/cpu-features.h
18
@@ -XXX,XX +XXX,XX @@ SHA256H2_3s 1111 001 1 0 . 01 .... .... 1100 . 1 . 0 .... \
31
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_fgt(const ARMISARegisters *id)
19
SHA256SU1_3s 1111 001 1 0 . 10 .... .... 1100 . 1 . 0 .... \
32
return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, FGT) != 0;
20
vm=%vm_dp vn=%vn_dp vd=%vd_dp
21
22
+VFMA_fp_3s 1111 001 0 0 . 0 . .... .... 1100 ... 1 .... @3same_fp
23
+VFMS_fp_3s 1111 001 0 0 . 1 . .... .... 1100 ... 1 .... @3same_fp
24
+
25
VQRDMLSH_3s 1111 001 1 0 . .. .... .... 1100 ... 1 .... @3same
26
27
VADD_fp_3s 1111 001 0 0 . 0 . .... .... 1101 ... 0 .... @3same_fp
28
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/translate-neon.inc.c
31
+++ b/target/arm/translate-neon.inc.c
32
@@ -XXX,XX +XXX,XX @@ static bool trans_VRSQRTS_fp_3s(DisasContext *s, arg_3same *a)
33
return do_3same(s, a, gen_VRSQRTS_fp_3s);
34
}
33
}
35
34
36
+static void gen_VFMA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
35
+static inline bool isar_feature_aa64_ecv_traps(const ARMISARegisters *id)
37
+ TCGv_ptr fpstatus)
38
+{
36
+{
39
+ gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus);
37
+ return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, ECV) > 0;
40
+}
38
+}
41
+
39
+
42
+static bool trans_VFMA_fp_3s(DisasContext *s, arg_3same *a)
40
static inline bool isar_feature_aa64_vh(const ARMISARegisters *id)
41
{
42
return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, VH) != 0;
43
diff --git a/target/arm/helper.c b/target/arm/helper.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/arm/helper.c
46
+++ b/target/arm/helper.c
47
@@ -XXX,XX +XXX,XX @@ static CPAccessResult gt_counter_access(CPUARMState *env, int timeridx,
48
: !extract32(env->cp15.cnthctl_el2, 0, 1))) {
49
return CP_ACCESS_TRAP_EL2;
50
}
51
+ if (has_el2 && timeridx == GTIMER_VIRT) {
52
+ if (FIELD_EX64(env->cp15.cnthctl_el2, CNTHCTL, EL1TVCT)) {
53
+ return CP_ACCESS_TRAP_EL2;
54
+ }
55
+ }
56
break;
57
}
58
return CP_ACCESS_OK;
59
@@ -XXX,XX +XXX,XX @@ static CPAccessResult gt_timer_access(CPUARMState *env, int timeridx,
60
}
61
}
62
}
63
+ if (has_el2 && timeridx == GTIMER_VIRT) {
64
+ if (FIELD_EX64(env->cp15.cnthctl_el2, CNTHCTL, EL1TVT)) {
65
+ return CP_ACCESS_TRAP_EL2;
66
+ }
67
+ }
68
break;
69
}
70
return CP_ACCESS_OK;
71
@@ -XXX,XX +XXX,XX @@ static void gt_cnthctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
72
if (cpu_isar_feature(aa64_rme, cpu)) {
73
valid_mask |= R_CNTHCTL_CNTVMASK_MASK | R_CNTHCTL_CNTPMASK_MASK;
74
}
75
+ if (cpu_isar_feature(aa64_ecv_traps, cpu)) {
76
+ valid_mask |=
77
+ R_CNTHCTL_EL1TVT_MASK |
78
+ R_CNTHCTL_EL1TVCT_MASK |
79
+ R_CNTHCTL_EL1NVPCT_MASK |
80
+ R_CNTHCTL_EL1NVVCT_MASK |
81
+ R_CNTHCTL_EVNTIS_MASK;
82
+ }
83
84
/* Clear RES0 bits */
85
value &= valid_mask;
86
@@ -XXX,XX +XXX,XX @@ static CPAccessResult e2h_access(CPUARMState *env, const ARMCPRegInfo *ri,
87
{
88
if (arm_current_el(env) == 1) {
89
/* This must be a FEAT_NV access */
90
- /* TODO: FEAT_ECV will need to check CNTHCTL_EL2 here */
91
return CP_ACCESS_OK;
92
}
93
if (!(arm_hcr_el2_eff(env) & HCR_E2H)) {
94
@@ -XXX,XX +XXX,XX @@ static CPAccessResult e2h_access(CPUARMState *env, const ARMCPRegInfo *ri,
95
return CP_ACCESS_OK;
96
}
97
98
+static CPAccessResult access_el1nvpct(CPUARMState *env, const ARMCPRegInfo *ri,
99
+ bool isread)
43
+{
100
+{
44
+ if (!dc_isar_feature(aa32_simdfmac, s)) {
101
+ if (arm_current_el(env) == 1) {
45
+ return false;
102
+ /* This must be a FEAT_NV access with NVx == 101 */
103
+ if (FIELD_EX64(env->cp15.cnthctl_el2, CNTHCTL, EL1NVPCT)) {
104
+ return CP_ACCESS_TRAP_EL2;
105
+ }
46
+ }
106
+ }
47
+
107
+ return e2h_access(env, ri, isread);
48
+ if (a->size != 0) {
49
+ /* TODO fp16 support */
50
+ return false;
51
+ }
52
+
53
+ return do_3same_fp(s, a, gen_VFMA_fp_3s, true);
54
+}
108
+}
55
+
109
+
56
+static void gen_VFMS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
110
+static CPAccessResult access_el1nvvct(CPUARMState *env, const ARMCPRegInfo *ri,
57
+ TCGv_ptr fpstatus)
111
+ bool isread)
58
+{
112
+{
59
+ gen_helper_vfp_negs(vn, vn);
113
+ if (arm_current_el(env) == 1) {
60
+ gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus);
114
+ /* This must be a FEAT_NV access with NVx == 101 */
115
+ if (FIELD_EX64(env->cp15.cnthctl_el2, CNTHCTL, EL1NVVCT)) {
116
+ return CP_ACCESS_TRAP_EL2;
117
+ }
118
+ }
119
+ return e2h_access(env, ri, isread);
61
+}
120
+}
62
+
121
+
63
+static bool trans_VFMS_fp_3s(DisasContext *s, arg_3same *a)
122
/* Test if system register redirection is to occur in the current state. */
64
+{
123
static bool redirect_for_e2h(CPUARMState *env)
65
+ if (!dc_isar_feature(aa32_simdfmac, s)) {
66
+ return false;
67
+ }
68
+
69
+ if (a->size != 0) {
70
+ /* TODO fp16 support */
71
+ return false;
72
+ }
73
+
74
+ return do_3same_fp(s, a, gen_VFMS_fp_3s, true);
75
+}
76
+
77
static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
78
{
124
{
79
/* FP operations handled pairwise 32 bits at a time */
125
@@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo vhe_reginfo[] = {
80
diff --git a/target/arm/translate.c b/target/arm/translate.c
126
{ .name = "CNTP_CTL_EL02", .state = ARM_CP_STATE_AA64,
81
index XXXXXXX..XXXXXXX 100644
127
.opc0 = 3, .opc1 = 5, .crn = 14, .crm = 2, .opc2 = 1,
82
--- a/target/arm/translate.c
128
.type = ARM_CP_IO | ARM_CP_ALIAS,
83
+++ b/target/arm/translate.c
129
- .access = PL2_RW, .accessfn = e2h_access,
84
@@ -XXX,XX +XXX,XX @@ static void gen_neon_narrow_op(int op, int u, int size,
130
+ .access = PL2_RW, .accessfn = access_el1nvpct,
85
}
131
.nv2_redirect_offset = 0x180 | NV2_REDIR_NO_NV1,
86
}
132
.fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].ctl),
87
133
.writefn = gt_phys_ctl_write, .raw_writefn = raw_write },
88
-/* Symbolic constants for op fields for Neon 3-register same-length.
134
{ .name = "CNTV_CTL_EL02", .state = ARM_CP_STATE_AA64,
89
- * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
135
.opc0 = 3, .opc1 = 5, .crn = 14, .crm = 3, .opc2 = 1,
90
- * table A7-9.
136
.type = ARM_CP_IO | ARM_CP_ALIAS,
91
- */
137
- .access = PL2_RW, .accessfn = e2h_access,
92
-#define NEON_3R_VHADD 0
138
+ .access = PL2_RW, .accessfn = access_el1nvvct,
93
-#define NEON_3R_VQADD 1
139
.nv2_redirect_offset = 0x170 | NV2_REDIR_NO_NV1,
94
-#define NEON_3R_VRHADD 2
140
.fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].ctl),
95
-#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
141
.writefn = gt_virt_ctl_write, .raw_writefn = raw_write },
96
-#define NEON_3R_VHSUB 4
142
@@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo vhe_reginfo[] = {
97
-#define NEON_3R_VQSUB 5
143
.type = ARM_CP_IO | ARM_CP_ALIAS,
98
-#define NEON_3R_VCGT 6
144
.fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].cval),
99
-#define NEON_3R_VCGE 7
145
.nv2_redirect_offset = 0x178 | NV2_REDIR_NO_NV1,
100
-#define NEON_3R_VSHL 8
146
- .access = PL2_RW, .accessfn = e2h_access,
101
-#define NEON_3R_VQSHL 9
147
+ .access = PL2_RW, .accessfn = access_el1nvpct,
102
-#define NEON_3R_VRSHL 10
148
.writefn = gt_phys_cval_write, .raw_writefn = raw_write },
103
-#define NEON_3R_VQRSHL 11
149
{ .name = "CNTV_CVAL_EL02", .state = ARM_CP_STATE_AA64,
104
-#define NEON_3R_VMAX 12
150
.opc0 = 3, .opc1 = 5, .crn = 14, .crm = 3, .opc2 = 2,
105
-#define NEON_3R_VMIN 13
151
.type = ARM_CP_IO | ARM_CP_ALIAS,
106
-#define NEON_3R_VABD 14
152
.nv2_redirect_offset = 0x168 | NV2_REDIR_NO_NV1,
107
-#define NEON_3R_VABA 15
153
.fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].cval),
108
-#define NEON_3R_VADD_VSUB 16
154
- .access = PL2_RW, .accessfn = e2h_access,
109
-#define NEON_3R_VTST_VCEQ 17
155
+ .access = PL2_RW, .accessfn = access_el1nvvct,
110
-#define NEON_3R_VML 18 /* VMLA, VMLS */
156
.writefn = gt_virt_cval_write, .raw_writefn = raw_write },
111
-#define NEON_3R_VMUL 19
157
#endif
112
-#define NEON_3R_VPMAX 20
158
};
113
-#define NEON_3R_VPMIN 21
114
-#define NEON_3R_VQDMULH_VQRDMULH 22
115
-#define NEON_3R_VPADD_VQRDMLAH 23
116
-#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
117
-#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
118
-#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
119
-#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
120
-#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
121
-#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
122
-#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
123
-#define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
124
-
125
-static const uint8_t neon_3r_sizes[] = {
126
- [NEON_3R_VHADD] = 0x7,
127
- [NEON_3R_VQADD] = 0xf,
128
- [NEON_3R_VRHADD] = 0x7,
129
- [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
130
- [NEON_3R_VHSUB] = 0x7,
131
- [NEON_3R_VQSUB] = 0xf,
132
- [NEON_3R_VCGT] = 0x7,
133
- [NEON_3R_VCGE] = 0x7,
134
- [NEON_3R_VSHL] = 0xf,
135
- [NEON_3R_VQSHL] = 0xf,
136
- [NEON_3R_VRSHL] = 0xf,
137
- [NEON_3R_VQRSHL] = 0xf,
138
- [NEON_3R_VMAX] = 0x7,
139
- [NEON_3R_VMIN] = 0x7,
140
- [NEON_3R_VABD] = 0x7,
141
- [NEON_3R_VABA] = 0x7,
142
- [NEON_3R_VADD_VSUB] = 0xf,
143
- [NEON_3R_VTST_VCEQ] = 0x7,
144
- [NEON_3R_VML] = 0x7,
145
- [NEON_3R_VMUL] = 0x7,
146
- [NEON_3R_VPMAX] = 0x7,
147
- [NEON_3R_VPMIN] = 0x7,
148
- [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
149
- [NEON_3R_VPADD_VQRDMLAH] = 0x7,
150
- [NEON_3R_SHA] = 0xf, /* size field encodes op type */
151
- [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
152
- [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
153
- [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
154
- [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
155
- [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
156
- [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
157
- [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
158
-};
159
-
160
/* Symbolic constants for op fields for Neon 2-register miscellaneous.
161
* The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
162
* table A7-13.
163
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
164
rm_ofs = neon_reg_offset(rm, 0);
165
166
if ((insn & (1 << 23)) == 0) {
167
- /* Three register same length. */
168
- op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
169
- /* Catch invalid op and bad size combinations: UNDEF */
170
- if ((neon_3r_sizes[op] & (1 << size)) == 0) {
171
- return 1;
172
- }
173
- /* All insns of this form UNDEF for either this condition or the
174
- * superset of cases "Q==1"; we catch the latter later.
175
- */
176
- if (q && ((rd | rn | rm) & 1)) {
177
- return 1;
178
- }
179
- switch (op) {
180
- case NEON_3R_VFM_VQRDMLSH:
181
- if (!u) {
182
- /* VFM, VFMS */
183
- if (size == 1) {
184
- return 1;
185
- }
186
- break;
187
- }
188
- /* VQRDMLSH : handled by decodetree */
189
- return 1;
190
-
191
- case NEON_3R_VADD_VSUB:
192
- case NEON_3R_LOGIC:
193
- case NEON_3R_VMAX:
194
- case NEON_3R_VMIN:
195
- case NEON_3R_VTST_VCEQ:
196
- case NEON_3R_VCGT:
197
- case NEON_3R_VCGE:
198
- case NEON_3R_VQADD:
199
- case NEON_3R_VQSUB:
200
- case NEON_3R_VMUL:
201
- case NEON_3R_VML:
202
- case NEON_3R_VSHL:
203
- case NEON_3R_SHA:
204
- case NEON_3R_VHADD:
205
- case NEON_3R_VRHADD:
206
- case NEON_3R_VHSUB:
207
- case NEON_3R_VABD:
208
- case NEON_3R_VABA:
209
- case NEON_3R_VQSHL:
210
- case NEON_3R_VRSHL:
211
- case NEON_3R_VQRSHL:
212
- case NEON_3R_VPMAX:
213
- case NEON_3R_VPMIN:
214
- case NEON_3R_VPADD_VQRDMLAH:
215
- case NEON_3R_VQDMULH_VQRDMULH:
216
- case NEON_3R_FLOAT_ARITH:
217
- case NEON_3R_FLOAT_MULTIPLY:
218
- case NEON_3R_FLOAT_CMP:
219
- case NEON_3R_FLOAT_ACMP:
220
- case NEON_3R_FLOAT_MINMAX:
221
- case NEON_3R_FLOAT_MISC:
222
- /* Already handled by decodetree */
223
- return 1;
224
- }
225
-
226
- if (size == 3) {
227
- /* 64-bit element instructions: handled by decodetree */
228
- return 1;
229
- }
230
- switch (op) {
231
- case NEON_3R_VFM_VQRDMLSH:
232
- if (!dc_isar_feature(aa32_simdfmac, s)) {
233
- return 1;
234
- }
235
- break;
236
- default:
237
- break;
238
- }
239
-
240
- for (pass = 0; pass < (q ? 4 : 2); pass++) {
241
-
242
- /* Elementwise. */
243
- tmp = neon_load_reg(rn, pass);
244
- tmp2 = neon_load_reg(rm, pass);
245
- switch (op) {
246
- case NEON_3R_VFM_VQRDMLSH:
247
- {
248
- /* VFMA, VFMS: fused multiply-add */
249
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
250
- TCGv_i32 tmp3 = neon_load_reg(rd, pass);
251
- if (size) {
252
- /* VFMS */
253
- gen_helper_vfp_negs(tmp, tmp);
254
- }
255
- gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
256
- tcg_temp_free_i32(tmp3);
257
- tcg_temp_free_ptr(fpstatus);
258
- break;
259
- }
260
- default:
261
- abort();
262
- }
263
- tcg_temp_free_i32(tmp2);
264
-
265
- neon_store_reg(rd, pass, tmp);
266
-
267
- } /* for pass */
268
- /* End of 3 register same size operations. */
269
+ /* Three register same length: handled by decodetree */
270
+ return 1;
271
} else if (insn & (1 << 4)) {
272
if ((insn & 0x00380080) != 0) {
273
/* Two registers and shift. */
274
--
159
--
275
2.20.1
160
2.34.1
276
277
diff view generated by jsdifflib
1
From: Dongjiu Geng <gengdongjiu@huawei.com>
1
For FEAT_ECV, new registers CNTPCTSS_EL0 and CNTVCTSS_EL0 are
2
defined, which are "self-synchronized" views of the physical and
3
virtual counts as seen in the CNTPCT_EL0 and CNTVCT_EL0 registers
4
(meaning that no barriers are needed around accesses to them to
5
ensure that reads of them do not occur speculatively and out-of-order
6
with other instructions).
2
7
3
This patch builds Hardware Error Source Table(HEST) via fw_cfg blobs.
8
For QEMU, all our system registers are self-synchronized, so we can
4
Now it only supports ARMv8 SEA, a type of Generic Hardware Error
9
simply copy the existing implementation of CNTPCT_EL0 and CNTVCT_EL0
5
Source version 2(GHESv2) error source. Afterwards, we can extend
10
to the new register encodings.
6
the supported types if needed. For the CPER section, currently it
7
is memory section because kernel mainly wants userspace to handle
8
the memory errors.
9
11
10
This patch follows the spec ACPI 6.2 to build the Hardware Error
12
This means we now implement all the functionality required for
11
Source table. For more detailed information, please refer to
13
ID_AA64MMFR0_EL1.ECV == 0b0001.
12
document: docs/specs/acpi_hest_ghes.rst
13
14
14
build_ghes_hw_error_notification() helper will help to add Hardware
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
Error Notification to ACPI tables without using packed C structures
16
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
16
and avoid endianness issues as API doesn't need explicit conversion.
17
Message-id: 20240301183219.2424889-7-peter.maydell@linaro.org
18
---
19
target/arm/helper.c | 43 +++++++++++++++++++++++++++++++++++++++++++
20
1 file changed, 43 insertions(+)
17
21
18
Signed-off-by: Xiang Zheng <zhengxiang9@huawei.com>
22
diff --git a/target/arm/helper.c b/target/arm/helper.c
19
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
20
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
21
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
22
Message-id: 20200512030609.19593-6-gengdongjiu@huawei.com
23
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
24
---
25
include/hw/acpi/ghes.h | 39 ++++++++++++
26
hw/acpi/ghes.c | 126 +++++++++++++++++++++++++++++++++++++++
27
hw/arm/virt-acpi-build.c | 2 +
28
3 files changed, 167 insertions(+)
29
30
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
31
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
32
--- a/include/hw/acpi/ghes.h
24
--- a/target/arm/helper.c
33
+++ b/include/hw/acpi/ghes.h
25
+++ b/target/arm/helper.c
34
@@ -XXX,XX +XXX,XX @@
26
@@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
35
27
},
36
#include "hw/acpi/bios-linker-loader.h"
28
};
37
29
38
+/*
30
+/*
39
+ * Values for Hardware Error Notification Type field
31
+ * FEAT_ECV adds extra views of CNTVCT_EL0 and CNTPCT_EL0 which
32
+ * are "self-synchronizing". For QEMU all sysregs are self-synchronizing,
33
+ * so our implementations here are identical to the normal registers.
40
+ */
34
+ */
41
+enum AcpiGhesNotifyType {
35
+static const ARMCPRegInfo gen_timer_ecv_cp_reginfo[] = {
42
+ /* Polled */
36
+ { .name = "CNTVCTSS", .cp = 15, .crm = 14, .opc1 = 9,
43
+ ACPI_GHES_NOTIFY_POLLED = 0,
37
+ .access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_RAW | ARM_CP_IO,
44
+ /* External Interrupt */
38
+ .accessfn = gt_vct_access,
45
+ ACPI_GHES_NOTIFY_EXTERNAL = 1,
39
+ .readfn = gt_virt_cnt_read, .resetfn = arm_cp_reset_ignore,
46
+ /* Local Interrupt */
40
+ },
47
+ ACPI_GHES_NOTIFY_LOCAL = 2,
41
+ { .name = "CNTVCTSS_EL0", .state = ARM_CP_STATE_AA64,
48
+ /* SCI */
42
+ .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 6,
49
+ ACPI_GHES_NOTIFY_SCI = 3,
43
+ .access = PL0_R, .type = ARM_CP_NO_RAW | ARM_CP_IO,
50
+ /* NMI */
44
+ .accessfn = gt_vct_access, .readfn = gt_virt_cnt_read,
51
+ ACPI_GHES_NOTIFY_NMI = 4,
45
+ },
52
+ /* CMCI, ACPI 5.0: 18.3.2.7, Table 18-290 */
46
+ { .name = "CNTPCTSS", .cp = 15, .crm = 14, .opc1 = 8,
53
+ ACPI_GHES_NOTIFY_CMCI = 5,
47
+ .access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_RAW | ARM_CP_IO,
54
+ /* MCE, ACPI 5.0: 18.3.2.7, Table 18-290 */
48
+ .accessfn = gt_pct_access,
55
+ ACPI_GHES_NOTIFY_MCE = 6,
49
+ .readfn = gt_cnt_read, .resetfn = arm_cp_reset_ignore,
56
+ /* GPIO-Signal, ACPI 6.0: 18.3.2.7, Table 18-332 */
50
+ },
57
+ ACPI_GHES_NOTIFY_GPIO = 7,
51
+ { .name = "CNTPCTSS_EL0", .state = ARM_CP_STATE_AA64,
58
+ /* ARMv8 SEA, ACPI 6.1: 18.3.2.9, Table 18-345 */
52
+ .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 5,
59
+ ACPI_GHES_NOTIFY_SEA = 8,
53
+ .access = PL0_R, .type = ARM_CP_NO_RAW | ARM_CP_IO,
60
+ /* ARMv8 SEI, ACPI 6.1: 18.3.2.9, Table 18-345 */
54
+ .accessfn = gt_pct_access, .readfn = gt_cnt_read,
61
+ ACPI_GHES_NOTIFY_SEI = 9,
55
+ },
62
+ /* External Interrupt - GSIV, ACPI 6.1: 18.3.2.9, Table 18-345 */
63
+ ACPI_GHES_NOTIFY_GSIV = 10,
64
+ /* Software Delegated Exception, ACPI 6.2: 18.3.2.9, Table 18-383 */
65
+ ACPI_GHES_NOTIFY_SDEI = 11,
66
+ /* 12 and greater are reserved */
67
+ ACPI_GHES_NOTIFY_RESERVED = 12
68
+};
56
+};
69
+
57
+
70
+enum {
58
#else
71
+ ACPI_HEST_SRC_ID_SEA = 0,
59
72
+ /* future ids go here */
60
/*
73
+ ACPI_HEST_SRC_ID_RESERVED,
61
@@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
62
},
63
};
64
65
+/*
66
+ * CNTVCTSS_EL0 has the same trap conditions as CNTVCT_EL0, so it also
67
+ * is exposed to userspace by Linux.
68
+ */
69
+static const ARMCPRegInfo gen_timer_ecv_cp_reginfo[] = {
70
+ { .name = "CNTVCTSS_EL0", .state = ARM_CP_STATE_AA64,
71
+ .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 6,
72
+ .access = PL0_R, .type = ARM_CP_NO_RAW | ARM_CP_IO,
73
+ .readfn = gt_virt_cnt_read,
74
+ },
74
+};
75
+};
75
+
76
+
76
void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker);
77
+void acpi_build_hest(GArray *table_data, BIOSLinker *linker);
78
#endif
77
#endif
79
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
78
80
index XXXXXXX..XXXXXXX 100644
79
static void par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
81
--- a/hw/acpi/ghes.c
80
@@ -XXX,XX +XXX,XX @@ void register_cp_regs_for_features(ARMCPU *cpu)
82
+++ b/hw/acpi/ghes.c
81
if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) {
83
@@ -XXX,XX +XXX,XX @@
82
define_arm_cp_regs(cpu, generic_timer_cp_reginfo);
84
#include "qemu/units.h"
83
}
85
#include "hw/acpi/ghes.h"
84
+ if (cpu_isar_feature(aa64_ecv_traps, cpu)) {
86
#include "hw/acpi/aml-build.h"
85
+ define_arm_cp_regs(cpu, gen_timer_ecv_cp_reginfo);
87
+#include "qemu/error-report.h"
88
89
#define ACPI_GHES_ERRORS_FW_CFG_FILE "etc/hardware_errors"
90
#define ACPI_GHES_DATA_ADDR_FW_CFG_FILE "etc/hardware_errors_addr"
91
@@ -XXX,XX +XXX,XX @@
92
/* Now only support ARMv8 SEA notification type error source */
93
#define ACPI_GHES_ERROR_SOURCE_COUNT 1
94
95
+/* Generic Hardware Error Source version 2 */
96
+#define ACPI_GHES_SOURCE_GENERIC_ERROR_V2 10
97
+
98
+/* Address offset in Generic Address Structure(GAS) */
99
+#define GAS_ADDR_OFFSET 4
100
+
101
+/*
102
+ * Hardware Error Notification
103
+ * ACPI 4.0: 17.3.2.7 Hardware Error Notification
104
+ * Composes dummy Hardware Error Notification descriptor of specified type
105
+ */
106
+static void build_ghes_hw_error_notification(GArray *table, const uint8_t type)
107
+{
108
+ /* Type */
109
+ build_append_int_noprefix(table, type, 1);
110
+ /*
111
+ * Length:
112
+ * Total length of the structure in bytes
113
+ */
114
+ build_append_int_noprefix(table, 28, 1);
115
+ /* Configuration Write Enable */
116
+ build_append_int_noprefix(table, 0, 2);
117
+ /* Poll Interval */
118
+ build_append_int_noprefix(table, 0, 4);
119
+ /* Vector */
120
+ build_append_int_noprefix(table, 0, 4);
121
+ /* Switch To Polling Threshold Value */
122
+ build_append_int_noprefix(table, 0, 4);
123
+ /* Switch To Polling Threshold Window */
124
+ build_append_int_noprefix(table, 0, 4);
125
+ /* Error Threshold Value */
126
+ build_append_int_noprefix(table, 0, 4);
127
+ /* Error Threshold Window */
128
+ build_append_int_noprefix(table, 0, 4);
129
+}
130
+
131
/*
132
* Build table for the hardware error fw_cfg blob.
133
* Initialize "etc/hardware_errors" and "etc/hardware_errors_addr" fw_cfg blobs.
134
@@ -XXX,XX +XXX,XX @@ void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker)
135
bios_linker_loader_write_pointer(linker, ACPI_GHES_DATA_ADDR_FW_CFG_FILE,
136
0, sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE, 0);
137
}
138
+
139
+/* Build Generic Hardware Error Source version 2 (GHESv2) */
140
+static void build_ghes_v2(GArray *table_data, int source_id, BIOSLinker *linker)
141
+{
142
+ uint64_t address_offset;
143
+ /*
144
+ * Type:
145
+ * Generic Hardware Error Source version 2(GHESv2 - Type 10)
146
+ */
147
+ build_append_int_noprefix(table_data, ACPI_GHES_SOURCE_GENERIC_ERROR_V2, 2);
148
+ /* Source Id */
149
+ build_append_int_noprefix(table_data, source_id, 2);
150
+ /* Related Source Id */
151
+ build_append_int_noprefix(table_data, 0xffff, 2);
152
+ /* Flags */
153
+ build_append_int_noprefix(table_data, 0, 1);
154
+ /* Enabled */
155
+ build_append_int_noprefix(table_data, 1, 1);
156
+
157
+ /* Number of Records To Pre-allocate */
158
+ build_append_int_noprefix(table_data, 1, 4);
159
+ /* Max Sections Per Record */
160
+ build_append_int_noprefix(table_data, 1, 4);
161
+ /* Max Raw Data Length */
162
+ build_append_int_noprefix(table_data, ACPI_GHES_MAX_RAW_DATA_LENGTH, 4);
163
+
164
+ address_offset = table_data->len;
165
+ /* Error Status Address */
166
+ build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 0x40, 0,
167
+ 4 /* QWord access */, 0);
168
+ bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE,
169
+ address_offset + GAS_ADDR_OFFSET, sizeof(uint64_t),
170
+ ACPI_GHES_ERRORS_FW_CFG_FILE, source_id * sizeof(uint64_t));
171
+
172
+ switch (source_id) {
173
+ case ACPI_HEST_SRC_ID_SEA:
174
+ /*
175
+ * Notification Structure
176
+ * Now only enable ARMv8 SEA notification type
177
+ */
178
+ build_ghes_hw_error_notification(table_data, ACPI_GHES_NOTIFY_SEA);
179
+ break;
180
+ default:
181
+ error_report("Not support this error source");
182
+ abort();
183
+ }
86
+ }
184
+
87
if (arm_feature(env, ARM_FEATURE_VAPA)) {
185
+ /* Error Status Block Length */
88
ARMCPRegInfo vapa_cp_reginfo[] = {
186
+ build_append_int_noprefix(table_data, ACPI_GHES_MAX_RAW_DATA_LENGTH, 4);
89
{ .name = "PAR", .cp = 15, .crn = 7, .crm = 4, .opc1 = 0, .opc2 = 0,
187
+
188
+ /*
189
+ * Read Ack Register
190
+ * ACPI 6.1: 18.3.2.8 Generic Hardware Error Source
191
+ * version 2 (GHESv2 - Type 10)
192
+ */
193
+ address_offset = table_data->len;
194
+ build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 0x40, 0,
195
+ 4 /* QWord access */, 0);
196
+ bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE,
197
+ address_offset + GAS_ADDR_OFFSET,
198
+ sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE,
199
+ (ACPI_GHES_ERROR_SOURCE_COUNT + source_id) * sizeof(uint64_t));
200
+
201
+ /*
202
+ * Read Ack Preserve field
203
+ * We only provide the first bit in Read Ack Register to OSPM to write
204
+ * while the other bits are preserved.
205
+ */
206
+ build_append_int_noprefix(table_data, ~0x1ULL, 8);
207
+ /* Read Ack Write */
208
+ build_append_int_noprefix(table_data, 0x1, 8);
209
+}
210
+
211
+/* Build Hardware Error Source Table */
212
+void acpi_build_hest(GArray *table_data, BIOSLinker *linker)
213
+{
214
+ uint64_t hest_start = table_data->len;
215
+
216
+ /* Hardware Error Source Table header*/
217
+ acpi_data_push(table_data, sizeof(AcpiTableHeader));
218
+
219
+ /* Error Source Count */
220
+ build_append_int_noprefix(table_data, ACPI_GHES_ERROR_SOURCE_COUNT, 4);
221
+
222
+ build_ghes_v2(table_data, ACPI_HEST_SRC_ID_SEA, linker);
223
+
224
+ build_header(linker, table_data, (void *)(table_data->data + hest_start),
225
+ "HEST", table_data->len - hest_start, 1, NULL, NULL);
226
+}
227
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
228
index XXXXXXX..XXXXXXX 100644
229
--- a/hw/arm/virt-acpi-build.c
230
+++ b/hw/arm/virt-acpi-build.c
231
@@ -XXX,XX +XXX,XX @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
232
233
if (vms->ras) {
234
build_ghes_error_table(tables->hardware_errors, tables->linker);
235
+ acpi_add_table(table_offsets, tables_blob);
236
+ acpi_build_hest(tables_blob, tables->linker);
237
}
238
239
if (ms->numa_state->num_nodes > 0) {
240
--
90
--
241
2.20.1
91
2.34.1
242
243
diff view generated by jsdifflib
1
Convert the Neon fp VMAX/VMIN/VMAXNM/VMINNM/VRECPS/VRSQRTS 3-reg-same
1
When ID_AA64MMFR0_EL1.ECV is 0b0010, a new register CNTPOFF_EL2 is
2
insns to decodetree. (These are all the remaining non-accumulation
2
implemented. This is similar to the existing CNTVOFF_EL2, except
3
instructions in this group.)
3
that it controls a hypervisor-adjustable offset made to the physical
4
counter and timer.
5
6
Implement the handling for this register, which includes control/trap
7
bits in SCR_EL3 and CNTHCTL_EL2.
4
8
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20200512163904.10918-17-peter.maydell@linaro.org
11
Message-id: 20240301183219.2424889-8-peter.maydell@linaro.org
8
---
12
---
9
target/arm/neon-dp.decode | 6 +++
13
target/arm/cpu-features.h | 5 +++
10
target/arm/translate-neon.inc.c | 70 +++++++++++++++++++++++++++++++++
14
target/arm/cpu.h | 1 +
11
target/arm/translate.c | 42 +-------------------
15
target/arm/helper.c | 68 +++++++++++++++++++++++++++++++++++++--
12
3 files changed, 78 insertions(+), 40 deletions(-)
16
target/arm/trace-events | 1 +
17
4 files changed, 73 insertions(+), 2 deletions(-)
13
18
14
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
19
diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
15
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/neon-dp.decode
21
--- a/target/arm/cpu-features.h
17
+++ b/target/arm/neon-dp.decode
22
+++ b/target/arm/cpu-features.h
18
@@ -XXX,XX +XXX,XX @@ VCGE_fp_3s 1111 001 1 0 . 0 . .... .... 1110 ... 0 .... @3same_fp
23
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_ecv_traps(const ARMISARegisters *id)
19
VACGE_fp_3s 1111 001 1 0 . 0 . .... .... 1110 ... 1 .... @3same_fp
24
return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, ECV) > 0;
20
VCGT_fp_3s 1111 001 1 0 . 1 . .... .... 1110 ... 0 .... @3same_fp
25
}
21
VACGT_fp_3s 1111 001 1 0 . 1 . .... .... 1110 ... 1 .... @3same_fp
26
22
+VMAX_fp_3s 1111 001 0 0 . 0 . .... .... 1111 ... 0 .... @3same_fp
27
+static inline bool isar_feature_aa64_ecv(const ARMISARegisters *id)
23
+VMIN_fp_3s 1111 001 0 0 . 1 . .... .... 1111 ... 0 .... @3same_fp
24
VPMAX_fp_3s 1111 001 1 0 . 0 . .... .... 1111 ... 0 .... @3same_fp_q0
25
VPMIN_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 0 .... @3same_fp_q0
26
+VRECPS_fp_3s 1111 001 0 0 . 0 . .... .... 1111 ... 1 .... @3same_fp
27
+VRSQRTS_fp_3s 1111 001 0 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
28
+VMAXNM_fp_3s 1111 001 1 0 . 0 . .... .... 1111 ... 1 .... @3same_fp
29
+VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
30
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/translate-neon.inc.c
33
+++ b/target/arm/translate-neon.inc.c
34
@@ -XXX,XX +XXX,XX @@ DO_3S_FP(VCGE, gen_helper_neon_cge_f32, false)
35
DO_3S_FP(VCGT, gen_helper_neon_cgt_f32, false)
36
DO_3S_FP(VACGE, gen_helper_neon_acge_f32, false)
37
DO_3S_FP(VACGT, gen_helper_neon_acgt_f32, false)
38
+DO_3S_FP(VMAX, gen_helper_vfp_maxs, false)
39
+DO_3S_FP(VMIN, gen_helper_vfp_mins, false)
40
41
static void gen_VMLA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
42
TCGv_ptr fpstatus)
43
@@ -XXX,XX +XXX,XX @@ static void gen_VMLS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
44
DO_3S_FP(VMLA, gen_VMLA_fp_3s, true)
45
DO_3S_FP(VMLS, gen_VMLS_fp_3s, true)
46
47
+static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
48
+{
28
+{
49
+ if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
29
+ return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, ECV) > 1;
50
+ return false;
51
+ }
52
+
53
+ if (a->size != 0) {
54
+ /* TODO fp16 support */
55
+ return false;
56
+ }
57
+
58
+ return do_3same_fp(s, a, gen_helper_vfp_maxnums, false);
59
+}
30
+}
60
+
31
+
61
+static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
32
static inline bool isar_feature_aa64_vh(const ARMISARegisters *id)
33
{
34
return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, VH) != 0;
35
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
36
index XXXXXXX..XXXXXXX 100644
37
--- a/target/arm/cpu.h
38
+++ b/target/arm/cpu.h
39
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
40
uint64_t c14_cntkctl; /* Timer Control register */
41
uint64_t cnthctl_el2; /* Counter/Timer Hyp Control register */
42
uint64_t cntvoff_el2; /* Counter Virtual Offset register */
43
+ uint64_t cntpoff_el2; /* Counter Physical Offset register */
44
ARMGenericTimer c14_timer[NUM_GTIMERS];
45
uint32_t c15_cpar; /* XScale Coprocessor Access Register */
46
uint32_t c15_ticonfig; /* TI925T configuration byte. */
47
diff --git a/target/arm/helper.c b/target/arm/helper.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/helper.c
50
+++ b/target/arm/helper.c
51
@@ -XXX,XX +XXX,XX @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
52
if (cpu_isar_feature(aa64_rme, cpu)) {
53
valid_mask |= SCR_NSE | SCR_GPF;
54
}
55
+ if (cpu_isar_feature(aa64_ecv, cpu)) {
56
+ valid_mask |= SCR_ECVEN;
57
+ }
58
} else {
59
valid_mask &= ~(SCR_RW | SCR_ST);
60
if (cpu_isar_feature(aa32_ras, cpu)) {
61
@@ -XXX,XX +XXX,XX @@ void gt_rme_post_el_change(ARMCPU *cpu, void *ignored)
62
gt_update_irq(cpu, GTIMER_PHYS);
63
}
64
65
+static uint64_t gt_phys_raw_cnt_offset(CPUARMState *env)
62
+{
66
+{
63
+ if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
67
+ if ((env->cp15.scr_el3 & SCR_ECVEN) &&
64
+ return false;
68
+ FIELD_EX64(env->cp15.cnthctl_el2, CNTHCTL, ECV) &&
69
+ arm_is_el2_enabled(env) &&
70
+ (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) {
71
+ return env->cp15.cntpoff_el2;
65
+ }
72
+ }
66
+
73
+ return 0;
67
+ if (a->size != 0) {
68
+ /* TODO fp16 support */
69
+ return false;
70
+ }
71
+
72
+ return do_3same_fp(s, a, gen_helper_vfp_minnums, false);
73
+}
74
+}
74
+
75
+
75
+WRAP_ENV_FN(gen_VRECPS_tramp, gen_helper_recps_f32)
76
+static uint64_t gt_phys_cnt_offset(CPUARMState *env)
76
+
77
+static void gen_VRECPS_fp_3s(unsigned vece, uint32_t rd_ofs,
78
+ uint32_t rn_ofs, uint32_t rm_ofs,
79
+ uint32_t oprsz, uint32_t maxsz)
80
+{
77
+{
81
+ static const GVecGen3 ops = { .fni4 = gen_VRECPS_tramp };
78
+ if (arm_current_el(env) >= 2) {
82
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops);
79
+ return 0;
80
+ }
81
+ return gt_phys_raw_cnt_offset(env);
83
+}
82
+}
84
+
83
+
85
+static bool trans_VRECPS_fp_3s(DisasContext *s, arg_3same *a)
84
static void gt_recalc_timer(ARMCPU *cpu, int timeridx)
85
{
86
ARMGenericTimer *gt = &cpu->env.cp15.c14_timer[timeridx];
87
@@ -XXX,XX +XXX,XX @@ static void gt_recalc_timer(ARMCPU *cpu, int timeridx)
88
* reset timer to when ISTATUS next has to change
89
*/
90
uint64_t offset = timeridx == GTIMER_VIRT ?
91
- cpu->env.cp15.cntvoff_el2 : 0;
92
+ cpu->env.cp15.cntvoff_el2 : gt_phys_raw_cnt_offset(&cpu->env);
93
uint64_t count = gt_get_countervalue(&cpu->env);
94
/* Note that this must be unsigned 64 bit arithmetic: */
95
int istatus = count - offset >= gt->cval;
96
@@ -XXX,XX +XXX,XX @@ static void gt_timer_reset(CPUARMState *env, const ARMCPRegInfo *ri,
97
98
static uint64_t gt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri)
99
{
100
- return gt_get_countervalue(env);
101
+ return gt_get_countervalue(env) - gt_phys_cnt_offset(env);
102
}
103
104
static uint64_t gt_virt_cnt_offset(CPUARMState *env)
105
@@ -XXX,XX +XXX,XX @@ static uint64_t gt_tval_read(CPUARMState *env, const ARMCPRegInfo *ri,
106
case GTIMER_HYPVIRT:
107
offset = gt_virt_cnt_offset(env);
108
break;
109
+ case GTIMER_PHYS:
110
+ offset = gt_phys_cnt_offset(env);
111
+ break;
112
}
113
114
return (uint32_t)(env->cp15.c14_timer[timeridx].cval -
115
@@ -XXX,XX +XXX,XX @@ static void gt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
116
case GTIMER_HYPVIRT:
117
offset = gt_virt_cnt_offset(env);
118
break;
119
+ case GTIMER_PHYS:
120
+ offset = gt_phys_cnt_offset(env);
121
+ break;
122
}
123
124
trace_arm_gt_tval_write(timeridx, value);
125
@@ -XXX,XX +XXX,XX @@ static void gt_cnthctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
126
R_CNTHCTL_EL1NVVCT_MASK |
127
R_CNTHCTL_EVNTIS_MASK;
128
}
129
+ if (cpu_isar_feature(aa64_ecv, cpu)) {
130
+ valid_mask |= R_CNTHCTL_ECV_MASK;
131
+ }
132
133
/* Clear RES0 bits */
134
value &= valid_mask;
135
@@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo gen_timer_ecv_cp_reginfo[] = {
136
},
137
};
138
139
+static CPAccessResult gt_cntpoff_access(CPUARMState *env,
140
+ const ARMCPRegInfo *ri,
141
+ bool isread)
86
+{
142
+{
87
+ if (a->size != 0) {
143
+ if (arm_current_el(env) == 2 && !(env->cp15.scr_el3 & SCR_ECVEN)) {
88
+ /* TODO fp16 support */
144
+ return CP_ACCESS_TRAP_EL3;
89
+ return false;
90
+ }
145
+ }
91
+
146
+ return CP_ACCESS_OK;
92
+ return do_3same(s, a, gen_VRECPS_fp_3s);
93
+}
147
+}
94
+
148
+
95
+WRAP_ENV_FN(gen_VRSQRTS_tramp, gen_helper_rsqrts_f32)
149
+static void gt_cntpoff_write(CPUARMState *env, const ARMCPRegInfo *ri,
150
+ uint64_t value)
151
+{
152
+ ARMCPU *cpu = env_archcpu(env);
96
+
153
+
97
+static void gen_VRSQRTS_fp_3s(unsigned vece, uint32_t rd_ofs,
154
+ trace_arm_gt_cntpoff_write(value);
98
+ uint32_t rn_ofs, uint32_t rm_ofs,
155
+ raw_write(env, ri, value);
99
+ uint32_t oprsz, uint32_t maxsz)
156
+ gt_recalc_timer(cpu, GTIMER_PHYS);
100
+{
101
+ static const GVecGen3 ops = { .fni4 = gen_VRSQRTS_tramp };
102
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops);
103
+}
157
+}
104
+
158
+
105
+static bool trans_VRSQRTS_fp_3s(DisasContext *s, arg_3same *a)
159
+static const ARMCPRegInfo gen_timer_cntpoff_reginfo = {
106
+{
160
+ .name = "CNTPOFF_EL2", .state = ARM_CP_STATE_AA64,
107
+ if (a->size != 0) {
161
+ .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 0, .opc2 = 6,
108
+ /* TODO fp16 support */
162
+ .access = PL2_RW, .type = ARM_CP_IO, .resetvalue = 0,
109
+ return false;
163
+ .accessfn = gt_cntpoff_access, .writefn = gt_cntpoff_write,
164
+ .nv2_redirect_offset = 0x1a8,
165
+ .fieldoffset = offsetof(CPUARMState, cp15.cntpoff_el2),
166
+};
167
#else
168
169
/*
170
@@ -XXX,XX +XXX,XX @@ void register_cp_regs_for_features(ARMCPU *cpu)
171
if (cpu_isar_feature(aa64_ecv_traps, cpu)) {
172
define_arm_cp_regs(cpu, gen_timer_ecv_cp_reginfo);
173
}
174
+#ifndef CONFIG_USER_ONLY
175
+ if (cpu_isar_feature(aa64_ecv, cpu)) {
176
+ define_one_arm_cp_reg(cpu, &gen_timer_cntpoff_reginfo);
110
+ }
177
+ }
111
+
178
+#endif
112
+ return do_3same(s, a, gen_VRSQRTS_fp_3s);
179
if (arm_feature(env, ARM_FEATURE_VAPA)) {
113
+}
180
ARMCPRegInfo vapa_cp_reginfo[] = {
114
+
181
{ .name = "PAR", .cp = 15, .crn = 7, .crm = 4, .opc1 = 0, .opc2 = 0,
115
static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
182
diff --git a/target/arm/trace-events b/target/arm/trace-events
116
{
117
/* FP operations handled pairwise 32 bits at a time */
118
diff --git a/target/arm/translate.c b/target/arm/translate.c
119
index XXXXXXX..XXXXXXX 100644
183
index XXXXXXX..XXXXXXX 100644
120
--- a/target/arm/translate.c
184
--- a/target/arm/trace-events
121
+++ b/target/arm/translate.c
185
+++ b/target/arm/trace-events
122
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
186
@@ -XXX,XX +XXX,XX @@ arm_gt_tval_write(int timer, uint64_t value) "gt_tval_write: timer %d value 0x%"
123
case NEON_3R_FLOAT_MULTIPLY:
187
arm_gt_ctl_write(int timer, uint64_t value) "gt_ctl_write: timer %d value 0x%" PRIx64
124
case NEON_3R_FLOAT_CMP:
188
arm_gt_imask_toggle(int timer) "gt_ctl_write: timer %d IMASK toggle"
125
case NEON_3R_FLOAT_ACMP:
189
arm_gt_cntvoff_write(uint64_t value) "gt_cntvoff_write: value 0x%" PRIx64
126
+ case NEON_3R_FLOAT_MINMAX:
190
+arm_gt_cntpoff_write(uint64_t value) "gt_cntpoff_write: value 0x%" PRIx64
127
+ case NEON_3R_FLOAT_MISC:
191
arm_gt_update_irq(int timer, int irqstate) "gt_update_irq: timer %d irqstate %d"
128
/* Already handled by decodetree */
192
129
return 1;
193
# kvm.c
130
}
131
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
132
return 1;
133
}
134
switch (op) {
135
- case NEON_3R_FLOAT_MINMAX:
136
- if (u) {
137
- return 1; /* VPMIN/VPMAX handled by decodetree */
138
- }
139
- break;
140
- case NEON_3R_FLOAT_MISC:
141
- /* VMAXNM/VMINNM in ARMv8 */
142
- if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
143
- return 1;
144
- }
145
- break;
146
case NEON_3R_VFM_VQRDMLSH:
147
if (!dc_isar_feature(aa32_simdfmac, s)) {
148
return 1;
149
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
150
tmp = neon_load_reg(rn, pass);
151
tmp2 = neon_load_reg(rm, pass);
152
switch (op) {
153
- case NEON_3R_FLOAT_MINMAX:
154
- {
155
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
156
- if (size == 0) {
157
- gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
158
- } else {
159
- gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
160
- }
161
- tcg_temp_free_ptr(fpstatus);
162
- break;
163
- }
164
- case NEON_3R_FLOAT_MISC:
165
- if (u) {
166
- /* VMAXNM/VMINNM */
167
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
168
- if (size == 0) {
169
- gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
170
- } else {
171
- gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
172
- }
173
- tcg_temp_free_ptr(fpstatus);
174
- } else {
175
- if (size == 0) {
176
- gen_helper_recps_f32(tmp, cpu_env, tmp, tmp2);
177
- } else {
178
- gen_helper_rsqrts_f32(tmp, cpu_env, tmp, tmp2);
179
- }
180
- }
181
- break;
182
case NEON_3R_VFM_VQRDMLSH:
183
{
184
/* VFMA, VFMS: fused multiply-add */
185
--
194
--
186
2.20.1
195
2.34.1
187
188
diff view generated by jsdifflib
1
Convert the Neon float VPMIN, VPMAX and VPADD 3-reg-same insns to
1
Enable all FEAT_ECV features on the 'max' CPU.
2
decodetree. These are the only remaining 'pairwise' operations,
3
so we can delete the pairwise-specific bits of the old decoder's
4
for-each-element loop now.
5
2
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20200512163904.10918-13-peter.maydell@linaro.org
6
Message-id: 20240301183219.2424889-9-peter.maydell@linaro.org
9
---
7
---
10
target/arm/neon-dp.decode | 5 +++
8
docs/system/arm/emulation.rst | 1 +
11
target/arm/translate-neon.inc.c | 63 +++++++++++++++++++++++++++++++++
9
target/arm/tcg/cpu64.c | 1 +
12
target/arm/translate.c | 63 +++++----------------------------
10
2 files changed, 2 insertions(+)
13
3 files changed, 76 insertions(+), 55 deletions(-)
14
11
15
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
12
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
16
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/neon-dp.decode
14
--- a/docs/system/arm/emulation.rst
18
+++ b/target/arm/neon-dp.decode
15
+++ b/docs/system/arm/emulation.rst
19
@@ -XXX,XX +XXX,XX @@
16
@@ -XXX,XX +XXX,XX @@ the following architecture extensions:
20
# For FP insns the high bit of 'size' is used as part of opcode decode
17
- FEAT_DotProd (Advanced SIMD dot product instructions)
21
@3same_fp .... ... . . . . size:1 .... .... .... . q:1 . . .... \
18
- FEAT_DoubleFault (Double Fault Extension)
22
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp
19
- FEAT_E0PD (Preventing EL0 access to halves of address maps)
23
+@3same_fp_q0 .... ... . . . . size:1 .... .... .... . 0 . . .... \
20
+- FEAT_ECV (Enhanced Counter Virtualization)
24
+ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0
21
- FEAT_EPAC (Enhanced pointer authentication)
25
22
- FEAT_ETS (Enhanced Translation Synchronization)
26
VHADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 0 .... @3same
23
- FEAT_EVT (Enhanced Virtualization Traps)
27
VHADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 0 .... @3same
24
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
28
@@ -XXX,XX +XXX,XX @@ VQRDMLSH_3s 1111 001 1 0 . .. .... .... 1100 ... 1 .... @3same
29
30
VADD_fp_3s 1111 001 0 0 . 0 . .... .... 1101 ... 0 .... @3same_fp
31
VSUB_fp_3s 1111 001 0 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
32
+VPADD_fp_3s 1111 001 1 0 . 0 . .... .... 1101 ... 0 .... @3same_fp_q0
33
VABD_fp_3s 1111 001 1 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
34
+VPMAX_fp_3s 1111 001 1 0 . 0 . .... .... 1111 ... 0 .... @3same_fp_q0
35
+VPMIN_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 0 .... @3same_fp_q0
36
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
37
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/translate-neon.inc.c
26
--- a/target/arm/tcg/cpu64.c
39
+++ b/target/arm/translate-neon.inc.c
27
+++ b/target/arm/tcg/cpu64.c
40
@@ -XXX,XX +XXX,XX @@ DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
28
@@ -XXX,XX +XXX,XX @@ void aarch64_max_tcg_initfn(Object *obj)
41
DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s)
29
t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN64_2, 2); /* 64k stage2 supported */
42
DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s)
30
t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4_2, 2); /* 4k stage2 supported */
43
DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s)
31
t = FIELD_DP64(t, ID_AA64MMFR0, FGT, 1); /* FEAT_FGT */
44
+
32
+ t = FIELD_DP64(t, ID_AA64MMFR0, ECV, 2); /* FEAT_ECV */
45
+static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
33
cpu->isar.id_aa64mmfr0 = t;
46
+{
34
47
+ /* FP operations handled pairwise 32 bits at a time */
35
t = cpu->isar.id_aa64mmfr1;
48
+ TCGv_i32 tmp, tmp2, tmp3;
49
+ TCGv_ptr fpstatus;
50
+
51
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
52
+ return false;
53
+ }
54
+
55
+ /* UNDEF accesses to D16-D31 if they don't exist. */
56
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
57
+ ((a->vd | a->vn | a->vm) & 0x10)) {
58
+ return false;
59
+ }
60
+
61
+ if (!vfp_access_check(s)) {
62
+ return true;
63
+ }
64
+
65
+ assert(a->q == 0); /* enforced by decode patterns */
66
+
67
+ /*
68
+ * Note that we have to be careful not to clobber the source operands
69
+ * in the "vm == vd" case by storing the result of the first pass too
70
+ * early. Since Q is 0 there are always just two passes, so instead
71
+ * of a complicated loop over each pass we just unroll.
72
+ */
73
+ fpstatus = get_fpstatus_ptr(1);
74
+ tmp = neon_load_reg(a->vn, 0);
75
+ tmp2 = neon_load_reg(a->vn, 1);
76
+ fn(tmp, tmp, tmp2, fpstatus);
77
+ tcg_temp_free_i32(tmp2);
78
+
79
+ tmp3 = neon_load_reg(a->vm, 0);
80
+ tmp2 = neon_load_reg(a->vm, 1);
81
+ fn(tmp3, tmp3, tmp2, fpstatus);
82
+ tcg_temp_free_i32(tmp2);
83
+ tcg_temp_free_ptr(fpstatus);
84
+
85
+ neon_store_reg(a->vd, 0, tmp);
86
+ neon_store_reg(a->vd, 1, tmp3);
87
+ return true;
88
+}
89
+
90
+/*
91
+ * For all the functions using this macro, size == 1 means fp16,
92
+ * which is an architecture extension we don't implement yet.
93
+ */
94
+#define DO_3S_FP_PAIR(INSN,FUNC) \
95
+ static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
96
+ { \
97
+ if (a->size != 0) { \
98
+ /* TODO fp16 support */ \
99
+ return false; \
100
+ } \
101
+ return do_3same_fp_pair(s, a, FUNC); \
102
+ }
103
+
104
+DO_3S_FP_PAIR(VPADD, gen_helper_vfp_adds)
105
+DO_3S_FP_PAIR(VPMAX, gen_helper_vfp_maxs)
106
+DO_3S_FP_PAIR(VPMIN, gen_helper_vfp_mins)
107
diff --git a/target/arm/translate.c b/target/arm/translate.c
108
index XXXXXXX..XXXXXXX 100644
109
--- a/target/arm/translate.c
110
+++ b/target/arm/translate.c
111
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
112
int shift;
113
int pass;
114
int count;
115
- int pairwise;
116
int u;
117
int vec_size;
118
uint32_t imm;
119
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
120
case NEON_3R_VPMIN:
121
case NEON_3R_VPADD_VQRDMLAH:
122
case NEON_3R_VQDMULH_VQRDMULH:
123
+ case NEON_3R_FLOAT_ARITH:
124
/* Already handled by decodetree */
125
return 1;
126
}
127
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
128
/* 64-bit element instructions: handled by decodetree */
129
return 1;
130
}
131
- pairwise = 0;
132
switch (op) {
133
- case NEON_3R_FLOAT_ARITH:
134
- pairwise = (u && size < 2); /* if VPADD (float) */
135
- if (!pairwise) {
136
- return 1; /* handled by decodetree */
137
- }
138
- break;
139
case NEON_3R_FLOAT_MINMAX:
140
- pairwise = u; /* if VPMIN/VPMAX (float) */
141
+ if (u) {
142
+ return 1; /* VPMIN/VPMAX handled by decodetree */
143
+ }
144
break;
145
case NEON_3R_FLOAT_CMP:
146
if (!u && size) {
147
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
148
break;
149
}
150
151
- if (pairwise && q) {
152
- /* All the pairwise insns UNDEF if Q is set */
153
- return 1;
154
- }
155
-
156
for (pass = 0; pass < (q ? 4 : 2); pass++) {
157
158
- if (pairwise) {
159
- /* Pairwise. */
160
- if (pass < 1) {
161
- tmp = neon_load_reg(rn, 0);
162
- tmp2 = neon_load_reg(rn, 1);
163
- } else {
164
- tmp = neon_load_reg(rm, 0);
165
- tmp2 = neon_load_reg(rm, 1);
166
- }
167
- } else {
168
- /* Elementwise. */
169
- tmp = neon_load_reg(rn, pass);
170
- tmp2 = neon_load_reg(rm, pass);
171
- }
172
+ /* Elementwise. */
173
+ tmp = neon_load_reg(rn, pass);
174
+ tmp2 = neon_load_reg(rm, pass);
175
switch (op) {
176
- case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
177
- {
178
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
179
- switch ((u << 2) | size) {
180
- case 4: /* VPADD */
181
- gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
182
- break;
183
- default:
184
- abort();
185
- }
186
- tcg_temp_free_ptr(fpstatus);
187
- break;
188
- }
189
case NEON_3R_FLOAT_MULTIPLY:
190
{
191
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
192
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
193
}
194
tcg_temp_free_i32(tmp2);
195
196
- /* Save the result. For elementwise operations we can put it
197
- straight into the destination register. For pairwise operations
198
- we have to be careful to avoid clobbering the source operands. */
199
- if (pairwise && rd == rm) {
200
- neon_store_scratch(pass, tmp);
201
- } else {
202
- neon_store_reg(rd, pass, tmp);
203
- }
204
+ neon_store_reg(rd, pass, tmp);
205
206
} /* for pass */
207
- if (pairwise && rd == rm) {
208
- for (pass = 0; pass < (q ? 4 : 2); pass++) {
209
- tmp = neon_load_scratch(pass);
210
- neon_store_reg(rd, pass, tmp);
211
- }
212
- }
213
/* End of 3 register same size operations. */
214
} else if (insn & (1 << 4)) {
215
if ((insn & 0x00380080) != 0) {
216
--
36
--
217
2.20.1
37
2.34.1
218
38
219
39
diff view generated by jsdifflib
1
From: Dongjiu Geng <gengdongjiu@huawei.com>
1
From: Inès Varhol <ines.varhol@telecom-paris.fr>
2
2
3
This patch builds error_block_address and read_ack_register fields
3
Features supported :
4
in hardware errors table , the error_block_address points to Generic
4
- the 8 STM32L4x5 GPIOs are initialized with their reset values
5
Error Status Block(GESB) via bios_linker. The max size for one GESB
5
(except IDR, see below)
6
is 1kb, For more detailed information, please refer to
6
- input mode : setting a pin in input mode "externally" (using input
7
document: docs/specs/acpi_hest_ghes.rst
7
irqs) results in an out irq (transmitted to SYSCFG)
8
- output mode : setting a bit in ODR sets the corresponding out irq
9
(if this line is configured in output mode)
10
- pull-up, pull-down
11
- push-pull, open-drain
8
12
9
Now we only support one Error source, if necessary, we can extend to
13
Difference with the real GPIOs :
10
support more.
14
- Alternate Function and Analog mode aren't implemented :
15
pins in AF/Analog behave like pins in input mode
16
- floating pins stay at their last value
17
- register IDR reset values differ from the real one :
18
values are coherent with the other registers reset values
19
and the fact that AF/Analog modes aren't implemented
20
- setting I/O output speed isn't supported
21
- locking port bits isn't supported
22
- ADC function isn't supported
23
- GPIOH has 16 pins instead of 2 pins
24
- writing to registers LCKR, AFRL, AFRH and ASCR is ineffective
11
25
12
Suggested-by: Laszlo Ersek <lersek@redhat.com>
26
Signed-off-by: Arnaud Minier <arnaud.minier@telecom-paris.fr>
13
Signed-off-by: Xiang Zheng <zhengxiang9@huawei.com>
27
Signed-off-by: Inès Varhol <ines.varhol@telecom-paris.fr>
14
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
28
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
15
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
29
Acked-by: Alistair Francis <alistair.francis@wdc.com>
16
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
30
Message-id: 20240305210444.310665-2-ines.varhol@telecom-paris.fr
17
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
18
Message-id: 20200512030609.19593-5-gengdongjiu@huawei.com
19
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
31
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
20
---
32
---
21
default-configs/arm-softmmu.mak | 1 +
33
MAINTAINERS | 1 +
22
include/hw/acpi/aml-build.h | 1 +
34
docs/system/arm/b-l475e-iot01a.rst | 2 +-
23
include/hw/acpi/ghes.h | 28 +++++++++++
35
include/hw/gpio/stm32l4x5_gpio.h | 70 +++++
24
hw/acpi/aml-build.c | 2 +
36
hw/gpio/stm32l4x5_gpio.c | 477 +++++++++++++++++++++++++++++
25
hw/acpi/ghes.c | 89 +++++++++++++++++++++++++++++++++
37
hw/gpio/Kconfig | 3 +
26
hw/arm/virt-acpi-build.c | 5 ++
38
hw/gpio/meson.build | 1 +
27
hw/acpi/Kconfig | 4 ++
39
hw/gpio/trace-events | 6 +
28
hw/acpi/Makefile.objs | 1 +
40
7 files changed, 559 insertions(+), 1 deletion(-)
29
8 files changed, 131 insertions(+)
41
create mode 100644 include/hw/gpio/stm32l4x5_gpio.h
30
create mode 100644 include/hw/acpi/ghes.h
42
create mode 100644 hw/gpio/stm32l4x5_gpio.c
31
create mode 100644 hw/acpi/ghes.c
32
43
33
diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
44
diff --git a/MAINTAINERS b/MAINTAINERS
34
index XXXXXXX..XXXXXXX 100644
45
index XXXXXXX..XXXXXXX 100644
35
--- a/default-configs/arm-softmmu.mak
46
--- a/MAINTAINERS
36
+++ b/default-configs/arm-softmmu.mak
47
+++ b/MAINTAINERS
37
@@ -XXX,XX +XXX,XX @@ CONFIG_FSL_IMX7=y
48
@@ -XXX,XX +XXX,XX @@ F: hw/arm/stm32l4x5_soc.c
38
CONFIG_FSL_IMX6UL=y
49
F: hw/misc/stm32l4x5_exti.c
39
CONFIG_SEMIHOSTING=y
50
F: hw/misc/stm32l4x5_syscfg.c
40
CONFIG_ALLWINNER_H3=y
51
F: hw/misc/stm32l4x5_rcc.c
41
+CONFIG_ACPI_APEI=y
52
+F: hw/gpio/stm32l4x5_gpio.c
42
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
53
F: include/hw/*/stm32l4x5_*.h
54
55
B-L475E-IOT01A IoT Node
56
diff --git a/docs/system/arm/b-l475e-iot01a.rst b/docs/system/arm/b-l475e-iot01a.rst
43
index XXXXXXX..XXXXXXX 100644
57
index XXXXXXX..XXXXXXX 100644
44
--- a/include/hw/acpi/aml-build.h
58
--- a/docs/system/arm/b-l475e-iot01a.rst
45
+++ b/include/hw/acpi/aml-build.h
59
+++ b/docs/system/arm/b-l475e-iot01a.rst
46
@@ -XXX,XX +XXX,XX @@ struct AcpiBuildTables {
60
@@ -XXX,XX +XXX,XX @@ Currently B-L475E-IOT01A machine's only supports the following devices:
47
GArray *rsdp;
61
- STM32L4x5 EXTI (Extended interrupts and events controller)
48
GArray *tcpalog;
62
- STM32L4x5 SYSCFG (System configuration controller)
49
GArray *vmgenid;
63
- STM32L4x5 RCC (Reset and clock control)
50
+ GArray *hardware_errors;
64
+- STM32L4x5 GPIOs (General-purpose I/Os)
51
BIOSLinker *linker;
65
52
} AcpiBuildTables;
66
Missing devices
53
67
"""""""""""""""
54
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
68
@@ -XXX,XX +XXX,XX @@ Missing devices
69
The B-L475E-IOT01A does *not* support the following devices:
70
71
- Serial ports (UART)
72
-- General-purpose I/Os (GPIO)
73
- Analog to Digital Converter (ADC)
74
- SPI controller
75
- Timer controller (TIMER)
76
diff --git a/include/hw/gpio/stm32l4x5_gpio.h b/include/hw/gpio/stm32l4x5_gpio.h
55
new file mode 100644
77
new file mode 100644
56
index XXXXXXX..XXXXXXX
78
index XXXXXXX..XXXXXXX
57
--- /dev/null
79
--- /dev/null
58
+++ b/include/hw/acpi/ghes.h
80
+++ b/include/hw/gpio/stm32l4x5_gpio.h
59
@@ -XXX,XX +XXX,XX @@
81
@@ -XXX,XX +XXX,XX @@
60
+/*
82
+/*
61
+ * Support for generating APEI tables and recording CPER for Guests
83
+ * STM32L4x5 GPIO (General Purpose Input/Ouput)
62
+ *
84
+ *
63
+ * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO., LTD.
85
+ * Copyright (c) 2024 Arnaud Minier <arnaud.minier@telecom-paris.fr>
86
+ * Copyright (c) 2024 Inès Varhol <ines.varhol@telecom-paris.fr>
64
+ *
87
+ *
65
+ * Author: Dongjiu Geng <gengdongjiu@huawei.com>
88
+ * SPDX-License-Identifier: GPL-2.0-or-later
66
+ *
89
+ *
67
+ * This program is free software; you can redistribute it and/or modify
90
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
68
+ * it under the terms of the GNU General Public License as published by
91
+ * See the COPYING file in the top-level directory.
69
+ * the Free Software Foundation; either version 2 of the License, or
70
+ * (at your option) any later version.
71
+
72
+ * This program is distributed in the hope that it will be useful,
73
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
74
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
75
+ * GNU General Public License for more details.
76
+
77
+ * You should have received a copy of the GNU General Public License along
78
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
79
+ */
92
+ */
80
+
93
+
81
+#ifndef ACPI_GHES_H
94
+/*
82
+#define ACPI_GHES_H
95
+ * The reference used is the STMicroElectronics RM0351 Reference manual
83
+
96
+ * for STM32L4x5 and STM32L4x6 advanced Arm ® -based 32-bit MCUs.
84
+#include "hw/acpi/bios-linker-loader.h"
97
+ * https://www.st.com/en/microcontrollers-microprocessors/stm32l4x5/documentation.html
85
+
98
+ */
86
+void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker);
99
+
100
+#ifndef HW_STM32L4X5_GPIO_H
101
+#define HW_STM32L4X5_GPIO_H
102
+
103
+#include "hw/sysbus.h"
104
+#include "qom/object.h"
105
+
106
+#define TYPE_STM32L4X5_GPIO "stm32l4x5-gpio"
107
+OBJECT_DECLARE_SIMPLE_TYPE(Stm32l4x5GpioState, STM32L4X5_GPIO)
108
+
109
+#define GPIO_NUM_PINS 16
110
+
111
+struct Stm32l4x5GpioState {
112
+ SysBusDevice parent_obj;
113
+
114
+ MemoryRegion mmio;
115
+
116
+ /* GPIO registers */
117
+ uint32_t moder;
118
+ uint32_t otyper;
119
+ uint32_t ospeedr;
120
+ uint32_t pupdr;
121
+ uint32_t idr;
122
+ uint32_t odr;
123
+ uint32_t lckr;
124
+ uint32_t afrl;
125
+ uint32_t afrh;
126
+ uint32_t ascr;
127
+
128
+ /* GPIO registers reset values */
129
+ uint32_t moder_reset;
130
+ uint32_t ospeedr_reset;
131
+ uint32_t pupdr_reset;
132
+
133
+ /*
134
+ * External driving of pins.
135
+ * The pins can be set externally through the device
136
+ * anonymous input GPIOs lines under certain conditions.
137
+ * The pin must not be in push-pull output mode,
138
+ * and can't be set high in open-drain mode.
139
+ * Pins driven externally and configured to
140
+ * output mode will in general be "disconnected"
141
+ * (see `get_gpio_pinmask_to_disconnect()`)
142
+ */
143
+ uint16_t disconnected_pins;
144
+ uint16_t pins_connected_high;
145
+
146
+ char *name;
147
+ Clock *clk;
148
+ qemu_irq pin[GPIO_NUM_PINS];
149
+};
150
+
87
+#endif
151
+#endif
88
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
152
diff --git a/hw/gpio/stm32l4x5_gpio.c b/hw/gpio/stm32l4x5_gpio.c
89
index XXXXXXX..XXXXXXX 100644
90
--- a/hw/acpi/aml-build.c
91
+++ b/hw/acpi/aml-build.c
92
@@ -XXX,XX +XXX,XX @@ void acpi_build_tables_init(AcpiBuildTables *tables)
93
tables->table_data = g_array_new(false, true /* clear */, 1);
94
tables->tcpalog = g_array_new(false, true /* clear */, 1);
95
tables->vmgenid = g_array_new(false, true /* clear */, 1);
96
+ tables->hardware_errors = g_array_new(false, true /* clear */, 1);
97
tables->linker = bios_linker_loader_init();
98
}
99
100
@@ -XXX,XX +XXX,XX @@ void acpi_build_tables_cleanup(AcpiBuildTables *tables, bool mfre)
101
g_array_free(tables->table_data, true);
102
g_array_free(tables->tcpalog, mfre);
103
g_array_free(tables->vmgenid, mfre);
104
+ g_array_free(tables->hardware_errors, mfre);
105
}
106
107
/*
108
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
109
new file mode 100644
153
new file mode 100644
110
index XXXXXXX..XXXXXXX
154
index XXXXXXX..XXXXXXX
111
--- /dev/null
155
--- /dev/null
112
+++ b/hw/acpi/ghes.c
156
+++ b/hw/gpio/stm32l4x5_gpio.c
113
@@ -XXX,XX +XXX,XX @@
157
@@ -XXX,XX +XXX,XX @@
114
+/*
158
+/*
115
+ * Support for generating APEI tables and recording CPER for Guests
159
+ * STM32L4x5 GPIO (General Purpose Input/Ouput)
116
+ *
160
+ *
117
+ * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO., LTD.
161
+ * Copyright (c) 2024 Arnaud Minier <arnaud.minier@telecom-paris.fr>
162
+ * Copyright (c) 2024 Inès Varhol <ines.varhol@telecom-paris.fr>
118
+ *
163
+ *
119
+ * Author: Dongjiu Geng <gengdongjiu@huawei.com>
164
+ * SPDX-License-Identifier: GPL-2.0-or-later
120
+ *
165
+ *
121
+ * This program is free software; you can redistribute it and/or modify
166
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
122
+ * it under the terms of the GNU General Public License as published by
167
+ * See the COPYING file in the top-level directory.
123
+ * the Free Software Foundation; either version 2 of the License, or
124
+ * (at your option) any later version.
125
+
126
+ * This program is distributed in the hope that it will be useful,
127
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
128
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
129
+ * GNU General Public License for more details.
130
+
131
+ * You should have received a copy of the GNU General Public License along
132
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
133
+ */
168
+ */
134
+
169
+
170
+/*
171
+ * The reference used is the STMicroElectronics RM0351 Reference manual
172
+ * for STM32L4x5 and STM32L4x6 advanced Arm ® -based 32-bit MCUs.
173
+ * https://www.st.com/en/microcontrollers-microprocessors/stm32l4x5/documentation.html
174
+ */
175
+
135
+#include "qemu/osdep.h"
176
+#include "qemu/osdep.h"
136
+#include "qemu/units.h"
177
+#include "qemu/log.h"
137
+#include "hw/acpi/ghes.h"
178
+#include "hw/gpio/stm32l4x5_gpio.h"
138
+#include "hw/acpi/aml-build.h"
179
+#include "hw/irq.h"
139
+
180
+#include "hw/qdev-clock.h"
140
+#define ACPI_GHES_ERRORS_FW_CFG_FILE "etc/hardware_errors"
181
+#include "hw/qdev-properties.h"
141
+#define ACPI_GHES_DATA_ADDR_FW_CFG_FILE "etc/hardware_errors_addr"
182
+#include "qapi/visitor.h"
142
+
183
+#include "qapi/error.h"
143
+/* The max size in bytes for one error block */
184
+#include "migration/vmstate.h"
144
+#define ACPI_GHES_MAX_RAW_DATA_LENGTH (1 * KiB)
185
+#include "trace.h"
145
+
186
+
146
+/* Now only support ARMv8 SEA notification type error source */
187
+#define GPIO_MODER 0x00
147
+#define ACPI_GHES_ERROR_SOURCE_COUNT 1
188
+#define GPIO_OTYPER 0x04
189
+#define GPIO_OSPEEDR 0x08
190
+#define GPIO_PUPDR 0x0C
191
+#define GPIO_IDR 0x10
192
+#define GPIO_ODR 0x14
193
+#define GPIO_BSRR 0x18
194
+#define GPIO_LCKR 0x1C
195
+#define GPIO_AFRL 0x20
196
+#define GPIO_AFRH 0x24
197
+#define GPIO_BRR 0x28
198
+#define GPIO_ASCR 0x2C
199
+
200
+/* 0b11111111_11111111_00000000_00000000 */
201
+#define RESERVED_BITS_MASK 0xFFFF0000
202
+
203
+static void update_gpio_idr(Stm32l4x5GpioState *s);
204
+
205
+static bool is_pull_up(Stm32l4x5GpioState *s, unsigned pin)
206
+{
207
+ return extract32(s->pupdr, 2 * pin, 2) == 1;
208
+}
209
+
210
+static bool is_pull_down(Stm32l4x5GpioState *s, unsigned pin)
211
+{
212
+ return extract32(s->pupdr, 2 * pin, 2) == 2;
213
+}
214
+
215
+static bool is_output(Stm32l4x5GpioState *s, unsigned pin)
216
+{
217
+ return extract32(s->moder, 2 * pin, 2) == 1;
218
+}
219
+
220
+static bool is_open_drain(Stm32l4x5GpioState *s, unsigned pin)
221
+{
222
+ return extract32(s->otyper, pin, 1) == 1;
223
+}
224
+
225
+static bool is_push_pull(Stm32l4x5GpioState *s, unsigned pin)
226
+{
227
+ return extract32(s->otyper, pin, 1) == 0;
228
+}
229
+
230
+static void stm32l4x5_gpio_reset_hold(Object *obj)
231
+{
232
+ Stm32l4x5GpioState *s = STM32L4X5_GPIO(obj);
233
+
234
+ s->moder = s->moder_reset;
235
+ s->otyper = 0x00000000;
236
+ s->ospeedr = s->ospeedr_reset;
237
+ s->pupdr = s->pupdr_reset;
238
+ s->idr = 0x00000000;
239
+ s->odr = 0x00000000;
240
+ s->lckr = 0x00000000;
241
+ s->afrl = 0x00000000;
242
+ s->afrh = 0x00000000;
243
+ s->ascr = 0x00000000;
244
+
245
+ s->disconnected_pins = 0xFFFF;
246
+ s->pins_connected_high = 0x0000;
247
+ update_gpio_idr(s);
248
+}
249
+
250
+static void stm32l4x5_gpio_set(void *opaque, int line, int level)
251
+{
252
+ Stm32l4x5GpioState *s = opaque;
253
+ /*
254
+ * The pin isn't set if line is configured in output mode
255
+ * except if level is 0 and the output is open-drain.
256
+ * This way there will be no short-circuit prone situations.
257
+ */
258
+ if (is_output(s, line) && !(is_open_drain(s, line) && (level == 0))) {
259
+ qemu_log_mask(LOG_GUEST_ERROR, "Line %d can't be driven externally\n",
260
+ line);
261
+ return;
262
+ }
263
+
264
+ s->disconnected_pins &= ~(1 << line);
265
+ if (level) {
266
+ s->pins_connected_high |= (1 << line);
267
+ } else {
268
+ s->pins_connected_high &= ~(1 << line);
269
+ }
270
+ trace_stm32l4x5_gpio_pins(s->name, s->disconnected_pins,
271
+ s->pins_connected_high);
272
+ update_gpio_idr(s);
273
+}
274
+
275
+
276
+static void update_gpio_idr(Stm32l4x5GpioState *s)
277
+{
278
+ uint32_t new_idr_mask = 0;
279
+ uint32_t new_idr = s->odr;
280
+ uint32_t old_idr = s->idr;
281
+ int new_pin_state, old_pin_state;
282
+
283
+ for (int i = 0; i < GPIO_NUM_PINS; i++) {
284
+ if (is_output(s, i)) {
285
+ if (is_push_pull(s, i)) {
286
+ new_idr_mask |= (1 << i);
287
+ } else if (!(s->odr & (1 << i))) {
288
+ /* open-drain ODR 0 */
289
+ new_idr_mask |= (1 << i);
290
+ /* open-drain ODR 1 */
291
+ } else if (!(s->disconnected_pins & (1 << i)) &&
292
+ !(s->pins_connected_high & (1 << i))) {
293
+ /* open-drain ODR 1 with pin connected low */
294
+ new_idr_mask |= (1 << i);
295
+ new_idr &= ~(1 << i);
296
+ /* open-drain ODR 1 with unactive pin */
297
+ } else if (is_pull_up(s, i)) {
298
+ new_idr_mask |= (1 << i);
299
+ } else if (is_pull_down(s, i)) {
300
+ new_idr_mask |= (1 << i);
301
+ new_idr &= ~(1 << i);
302
+ }
303
+ /*
304
+ * The only case left is for open-drain ODR 1
305
+ * with unactive pin without pull-up or pull-down :
306
+ * the value is floating.
307
+ */
308
+ /* input or analog mode with connected pin */
309
+ } else if (!(s->disconnected_pins & (1 << i))) {
310
+ if (s->pins_connected_high & (1 << i)) {
311
+ /* pin high */
312
+ new_idr_mask |= (1 << i);
313
+ new_idr |= (1 << i);
314
+ } else {
315
+ /* pin low */
316
+ new_idr_mask |= (1 << i);
317
+ new_idr &= ~(1 << i);
318
+ }
319
+ /* input or analog mode with disconnected pin */
320
+ } else {
321
+ if (is_pull_up(s, i)) {
322
+ /* pull-up */
323
+ new_idr_mask |= (1 << i);
324
+ new_idr |= (1 << i);
325
+ } else if (is_pull_down(s, i)) {
326
+ /* pull-down */
327
+ new_idr_mask |= (1 << i);
328
+ new_idr &= ~(1 << i);
329
+ }
330
+ /*
331
+ * The only case left is for a disconnected pin
332
+ * without pull-up or pull-down :
333
+ * the value is floating.
334
+ */
335
+ }
336
+ }
337
+
338
+ s->idr = (old_idr & ~new_idr_mask) | (new_idr & new_idr_mask);
339
+ trace_stm32l4x5_gpio_update_idr(s->name, old_idr, s->idr);
340
+
341
+ for (int i = 0; i < GPIO_NUM_PINS; i++) {
342
+ if (new_idr_mask & (1 << i)) {
343
+ new_pin_state = (new_idr & (1 << i)) > 0;
344
+ old_pin_state = (old_idr & (1 << i)) > 0;
345
+ if (new_pin_state > old_pin_state) {
346
+ qemu_irq_raise(s->pin[i]);
347
+ } else if (new_pin_state < old_pin_state) {
348
+ qemu_irq_lower(s->pin[i]);
349
+ }
350
+ }
351
+ }
352
+}
148
+
353
+
149
+/*
354
+/*
150
+ * Build table for the hardware error fw_cfg blob.
355
+ * Return mask of pins that are both configured in output
151
+ * Initialize "etc/hardware_errors" and "etc/hardware_errors_addr" fw_cfg blobs.
356
+ * mode and externally driven (except pins in open-drain
152
+ * See docs/specs/acpi_hest_ghes.rst for blobs format.
357
+ * mode externally set to 0).
153
+ */
358
+ */
154
+void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker)
359
+static uint32_t get_gpio_pinmask_to_disconnect(Stm32l4x5GpioState *s)
155
+{
360
+{
156
+ int i, error_status_block_offset;
361
+ uint32_t pins_to_disconnect = 0;
157
+
362
+ for (int i = 0; i < GPIO_NUM_PINS; i++) {
158
+ /* Build error_block_address */
363
+ /* for each connected pin in output mode */
159
+ for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
364
+ if (!(s->disconnected_pins & (1 << i)) && is_output(s, i)) {
160
+ build_append_int_noprefix(hardware_errors, 0, sizeof(uint64_t));
365
+ /* if either push-pull or high level */
161
+ }
366
+ if (is_push_pull(s, i) || s->pins_connected_high & (1 << i)) {
162
+
367
+ pins_to_disconnect |= (1 << i);
163
+ /* Build read_ack_register */
368
+ qemu_log_mask(LOG_GUEST_ERROR,
164
+ for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
369
+ "Line %d can't be driven externally\n",
165
+ /*
370
+ i);
166
+ * Initialize the value of read_ack_register to 1, so GHES can be
371
+ }
167
+ * writeable after (re)boot.
372
+ }
168
+ * ACPI 6.2: 18.3.2.8 Generic Hardware Error Source version 2
373
+ }
169
+ * (GHESv2 - Type 10)
374
+ return pins_to_disconnect;
170
+ */
375
+}
171
+ build_append_int_noprefix(hardware_errors, 1, sizeof(uint64_t));
376
+
172
+ }
377
+/*
173
+
378
+ * Set field `disconnected_pins` and call `update_gpio_idr()`
174
+ /* Generic Error Status Block offset in the hardware error fw_cfg blob */
379
+ */
175
+ error_status_block_offset = hardware_errors->len;
380
+static void disconnect_gpio_pins(Stm32l4x5GpioState *s, uint16_t lines)
176
+
381
+{
177
+ /* Reserve space for Error Status Data Block */
382
+ s->disconnected_pins |= lines;
178
+ acpi_data_push(hardware_errors,
383
+ trace_stm32l4x5_gpio_pins(s->name, s->disconnected_pins,
179
+ ACPI_GHES_MAX_RAW_DATA_LENGTH * ACPI_GHES_ERROR_SOURCE_COUNT);
384
+ s->pins_connected_high);
180
+
385
+ update_gpio_idr(s);
181
+ /* Tell guest firmware to place hardware_errors blob into RAM */
386
+}
182
+ bios_linker_loader_alloc(linker, ACPI_GHES_ERRORS_FW_CFG_FILE,
387
+
183
+ hardware_errors, sizeof(uint64_t), false);
388
+static void disconnected_pins_set(Object *obj, Visitor *v,
184
+
389
+ const char *name, void *opaque, Error **errp)
185
+ for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
390
+{
186
+ /*
391
+ Stm32l4x5GpioState *s = STM32L4X5_GPIO(obj);
187
+ * Tell firmware to patch error_block_address entries to point to
392
+ uint16_t value;
188
+ * corresponding "Generic Error Status Block"
393
+ if (!visit_type_uint16(v, name, &value, errp)) {
189
+ */
394
+ return;
190
+ bios_linker_loader_add_pointer(linker,
395
+ }
191
+ ACPI_GHES_ERRORS_FW_CFG_FILE, sizeof(uint64_t) * i,
396
+ disconnect_gpio_pins(s, value);
192
+ sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE,
397
+}
193
+ error_status_block_offset + i * ACPI_GHES_MAX_RAW_DATA_LENGTH);
398
+
194
+ }
399
+static void disconnected_pins_get(Object *obj, Visitor *v,
195
+
400
+ const char *name, void *opaque, Error **errp)
196
+ /*
401
+{
197
+ * tell firmware to write hardware_errors GPA into
402
+ visit_type_uint16(v, name, (uint16_t *)opaque, errp);
198
+ * hardware_errors_addr fw_cfg, once the former has been initialized.
403
+}
199
+ */
404
+
200
+ bios_linker_loader_write_pointer(linker, ACPI_GHES_DATA_ADDR_FW_CFG_FILE,
405
+static void clock_freq_get(Object *obj, Visitor *v,
201
+ 0, sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE, 0);
406
+ const char *name, void *opaque, Error **errp)
202
+}
407
+{
203
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
408
+ Stm32l4x5GpioState *s = STM32L4X5_GPIO(obj);
409
+ uint32_t clock_freq_hz = clock_get_hz(s->clk);
410
+ visit_type_uint32(v, name, &clock_freq_hz, errp);
411
+}
412
+
413
+static void stm32l4x5_gpio_write(void *opaque, hwaddr addr,
414
+ uint64_t val64, unsigned int size)
415
+{
416
+ Stm32l4x5GpioState *s = opaque;
417
+
418
+ uint32_t value = val64;
419
+ trace_stm32l4x5_gpio_write(s->name, addr, val64);
420
+
421
+ switch (addr) {
422
+ case GPIO_MODER:
423
+ s->moder = value;
424
+ disconnect_gpio_pins(s, get_gpio_pinmask_to_disconnect(s));
425
+ qemu_log_mask(LOG_UNIMP,
426
+ "%s: Analog and AF modes aren't supported\n\
427
+ Analog and AF mode behave like input mode\n",
428
+ __func__);
429
+ return;
430
+ case GPIO_OTYPER:
431
+ s->otyper = value & ~RESERVED_BITS_MASK;
432
+ disconnect_gpio_pins(s, get_gpio_pinmask_to_disconnect(s));
433
+ return;
434
+ case GPIO_OSPEEDR:
435
+ qemu_log_mask(LOG_UNIMP,
436
+ "%s: Changing I/O output speed isn't supported\n\
437
+ I/O speed is already maximal\n",
438
+ __func__);
439
+ s->ospeedr = value;
440
+ return;
441
+ case GPIO_PUPDR:
442
+ s->pupdr = value;
443
+ update_gpio_idr(s);
444
+ return;
445
+ case GPIO_IDR:
446
+ qemu_log_mask(LOG_UNIMP,
447
+ "%s: GPIO->IDR is read-only\n",
448
+ __func__);
449
+ return;
450
+ case GPIO_ODR:
451
+ s->odr = value & ~RESERVED_BITS_MASK;
452
+ update_gpio_idr(s);
453
+ return;
454
+ case GPIO_BSRR: {
455
+ uint32_t bits_to_reset = (value & RESERVED_BITS_MASK) >> GPIO_NUM_PINS;
456
+ uint32_t bits_to_set = value & ~RESERVED_BITS_MASK;
457
+ /* If both BSx and BRx are set, BSx has priority.*/
458
+ s->odr &= ~bits_to_reset;
459
+ s->odr |= bits_to_set;
460
+ update_gpio_idr(s);
461
+ return;
462
+ }
463
+ case GPIO_LCKR:
464
+ qemu_log_mask(LOG_UNIMP,
465
+ "%s: Locking port bits configuration isn't supported\n",
466
+ __func__);
467
+ s->lckr = value & ~RESERVED_BITS_MASK;
468
+ return;
469
+ case GPIO_AFRL:
470
+ qemu_log_mask(LOG_UNIMP,
471
+ "%s: Alternate functions aren't supported\n",
472
+ __func__);
473
+ s->afrl = value;
474
+ return;
475
+ case GPIO_AFRH:
476
+ qemu_log_mask(LOG_UNIMP,
477
+ "%s: Alternate functions aren't supported\n",
478
+ __func__);
479
+ s->afrh = value;
480
+ return;
481
+ case GPIO_BRR: {
482
+ uint32_t bits_to_reset = value & ~RESERVED_BITS_MASK;
483
+ s->odr &= ~bits_to_reset;
484
+ update_gpio_idr(s);
485
+ return;
486
+ }
487
+ case GPIO_ASCR:
488
+ qemu_log_mask(LOG_UNIMP,
489
+ "%s: ADC function isn't supported\n",
490
+ __func__);
491
+ s->ascr = value & ~RESERVED_BITS_MASK;
492
+ return;
493
+ default:
494
+ qemu_log_mask(LOG_GUEST_ERROR,
495
+ "%s: Bad offset 0x%" HWADDR_PRIx "\n", __func__, addr);
496
+ }
497
+}
498
+
499
+static uint64_t stm32l4x5_gpio_read(void *opaque, hwaddr addr,
500
+ unsigned int size)
501
+{
502
+ Stm32l4x5GpioState *s = opaque;
503
+
504
+ trace_stm32l4x5_gpio_read(s->name, addr);
505
+
506
+ switch (addr) {
507
+ case GPIO_MODER:
508
+ return s->moder;
509
+ case GPIO_OTYPER:
510
+ return s->otyper;
511
+ case GPIO_OSPEEDR:
512
+ return s->ospeedr;
513
+ case GPIO_PUPDR:
514
+ return s->pupdr;
515
+ case GPIO_IDR:
516
+ return s->idr;
517
+ case GPIO_ODR:
518
+ return s->odr;
519
+ case GPIO_BSRR:
520
+ return 0;
521
+ case GPIO_LCKR:
522
+ return s->lckr;
523
+ case GPIO_AFRL:
524
+ return s->afrl;
525
+ case GPIO_AFRH:
526
+ return s->afrh;
527
+ case GPIO_BRR:
528
+ return 0;
529
+ case GPIO_ASCR:
530
+ return s->ascr;
531
+ default:
532
+ qemu_log_mask(LOG_GUEST_ERROR,
533
+ "%s: Bad offset 0x%" HWADDR_PRIx "\n", __func__, addr);
534
+ return 0;
535
+ }
536
+}
537
+
538
+static const MemoryRegionOps stm32l4x5_gpio_ops = {
539
+ .read = stm32l4x5_gpio_read,
540
+ .write = stm32l4x5_gpio_write,
541
+ .endianness = DEVICE_NATIVE_ENDIAN,
542
+ .impl = {
543
+ .min_access_size = 4,
544
+ .max_access_size = 4,
545
+ .unaligned = false,
546
+ },
547
+ .valid = {
548
+ .min_access_size = 4,
549
+ .max_access_size = 4,
550
+ .unaligned = false,
551
+ },
552
+};
553
+
554
+static void stm32l4x5_gpio_init(Object *obj)
555
+{
556
+ Stm32l4x5GpioState *s = STM32L4X5_GPIO(obj);
557
+
558
+ memory_region_init_io(&s->mmio, obj, &stm32l4x5_gpio_ops, s,
559
+ TYPE_STM32L4X5_GPIO, 0x400);
560
+
561
+ sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio);
562
+
563
+ qdev_init_gpio_out(DEVICE(obj), s->pin, GPIO_NUM_PINS);
564
+ qdev_init_gpio_in(DEVICE(obj), stm32l4x5_gpio_set, GPIO_NUM_PINS);
565
+
566
+ s->clk = qdev_init_clock_in(DEVICE(s), "clk", NULL, s, 0);
567
+
568
+ object_property_add(obj, "disconnected-pins", "uint16",
569
+ disconnected_pins_get, disconnected_pins_set,
570
+ NULL, &s->disconnected_pins);
571
+ object_property_add(obj, "clock-freq-hz", "uint32",
572
+ clock_freq_get, NULL, NULL, NULL);
573
+}
574
+
575
+static void stm32l4x5_gpio_realize(DeviceState *dev, Error **errp)
576
+{
577
+ Stm32l4x5GpioState *s = STM32L4X5_GPIO(dev);
578
+ if (!clock_has_source(s->clk)) {
579
+ error_setg(errp, "GPIO: clk input must be connected");
580
+ return;
581
+ }
582
+}
583
+
584
+static const VMStateDescription vmstate_stm32l4x5_gpio = {
585
+ .name = TYPE_STM32L4X5_GPIO,
586
+ .version_id = 1,
587
+ .minimum_version_id = 1,
588
+ .fields = (VMStateField[]){
589
+ VMSTATE_UINT32(moder, Stm32l4x5GpioState),
590
+ VMSTATE_UINT32(otyper, Stm32l4x5GpioState),
591
+ VMSTATE_UINT32(ospeedr, Stm32l4x5GpioState),
592
+ VMSTATE_UINT32(pupdr, Stm32l4x5GpioState),
593
+ VMSTATE_UINT32(idr, Stm32l4x5GpioState),
594
+ VMSTATE_UINT32(odr, Stm32l4x5GpioState),
595
+ VMSTATE_UINT32(lckr, Stm32l4x5GpioState),
596
+ VMSTATE_UINT32(afrl, Stm32l4x5GpioState),
597
+ VMSTATE_UINT32(afrh, Stm32l4x5GpioState),
598
+ VMSTATE_UINT32(ascr, Stm32l4x5GpioState),
599
+ VMSTATE_UINT16(disconnected_pins, Stm32l4x5GpioState),
600
+ VMSTATE_UINT16(pins_connected_high, Stm32l4x5GpioState),
601
+ VMSTATE_END_OF_LIST()
602
+ }
603
+};
604
+
605
+static Property stm32l4x5_gpio_properties[] = {
606
+ DEFINE_PROP_STRING("name", Stm32l4x5GpioState, name),
607
+ DEFINE_PROP_UINT32("mode-reset", Stm32l4x5GpioState, moder_reset, 0),
608
+ DEFINE_PROP_UINT32("ospeed-reset", Stm32l4x5GpioState, ospeedr_reset, 0),
609
+ DEFINE_PROP_UINT32("pupd-reset", Stm32l4x5GpioState, pupdr_reset, 0),
610
+ DEFINE_PROP_END_OF_LIST(),
611
+};
612
+
613
+static void stm32l4x5_gpio_class_init(ObjectClass *klass, void *data)
614
+{
615
+ DeviceClass *dc = DEVICE_CLASS(klass);
616
+ ResettableClass *rc = RESETTABLE_CLASS(klass);
617
+
618
+ device_class_set_props(dc, stm32l4x5_gpio_properties);
619
+ dc->vmsd = &vmstate_stm32l4x5_gpio;
620
+ dc->realize = stm32l4x5_gpio_realize;
621
+ rc->phases.hold = stm32l4x5_gpio_reset_hold;
622
+}
623
+
624
+static const TypeInfo stm32l4x5_gpio_types[] = {
625
+ {
626
+ .name = TYPE_STM32L4X5_GPIO,
627
+ .parent = TYPE_SYS_BUS_DEVICE,
628
+ .instance_size = sizeof(Stm32l4x5GpioState),
629
+ .instance_init = stm32l4x5_gpio_init,
630
+ .class_init = stm32l4x5_gpio_class_init,
631
+ },
632
+};
633
+
634
+DEFINE_TYPES(stm32l4x5_gpio_types)
635
diff --git a/hw/gpio/Kconfig b/hw/gpio/Kconfig
204
index XXXXXXX..XXXXXXX 100644
636
index XXXXXXX..XXXXXXX 100644
205
--- a/hw/arm/virt-acpi-build.c
637
--- a/hw/gpio/Kconfig
206
+++ b/hw/arm/virt-acpi-build.c
638
+++ b/hw/gpio/Kconfig
207
@@ -XXX,XX +XXX,XX @@
639
@@ -XXX,XX +XXX,XX @@ config GPIO_PWR
208
#include "sysemu/reset.h"
640
209
#include "kvm_arm.h"
641
config SIFIVE_GPIO
210
#include "migration/vmstate.h"
642
bool
211
+#include "hw/acpi/ghes.h"
643
+
212
644
+config STM32L4X5_GPIO
213
#define ARM_SPI_BASE 32
645
+ bool
214
646
diff --git a/hw/gpio/meson.build b/hw/gpio/meson.build
215
@@ -XXX,XX +XXX,XX @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
216
acpi_add_table(table_offsets, tables_blob);
217
build_spcr(tables_blob, tables->linker, vms);
218
219
+ if (vms->ras) {
220
+ build_ghes_error_table(tables->hardware_errors, tables->linker);
221
+ }
222
+
223
if (ms->numa_state->num_nodes > 0) {
224
acpi_add_table(table_offsets, tables_blob);
225
build_srat(tables_blob, tables->linker, vms);
226
diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig
227
index XXXXXXX..XXXXXXX 100644
647
index XXXXXXX..XXXXXXX 100644
228
--- a/hw/acpi/Kconfig
648
--- a/hw/gpio/meson.build
229
+++ b/hw/acpi/Kconfig
649
+++ b/hw/gpio/meson.build
230
@@ -XXX,XX +XXX,XX @@ config ACPI_HMAT
650
@@ -XXX,XX +XXX,XX @@ system_ss.add(when: 'CONFIG_RASPI', if_true: files(
231
bool
651
'bcm2835_gpio.c',
232
depends on ACPI
652
'bcm2838_gpio.c'
233
653
))
234
+config ACPI_APEI
654
+system_ss.add(when: 'CONFIG_STM32L4X5_SOC', if_true: files('stm32l4x5_gpio.c'))
235
+ bool
655
system_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files('aspeed_gpio.c'))
236
+ depends on ACPI
656
system_ss.add(when: 'CONFIG_SIFIVE_GPIO', if_true: files('sifive_gpio.c'))
237
+
657
diff --git a/hw/gpio/trace-events b/hw/gpio/trace-events
238
config ACPI_PCI
239
bool
240
depends on ACPI && PCI
241
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
242
index XXXXXXX..XXXXXXX 100644
658
index XXXXXXX..XXXXXXX 100644
243
--- a/hw/acpi/Makefile.objs
659
--- a/hw/gpio/trace-events
244
+++ b/hw/acpi/Makefile.objs
660
+++ b/hw/gpio/trace-events
245
@@ -XXX,XX +XXX,XX @@ common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
661
@@ -XXX,XX +XXX,XX @@ sifive_gpio_update_output_irq(int64_t line, int64_t value) "line %" PRIi64 " val
246
common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o
662
# aspeed_gpio.c
247
common-obj-$(CONFIG_ACPI_HW_REDUCED) += generic_event_device.o
663
aspeed_gpio_read(uint64_t offset, uint64_t value) "offset: 0x%" PRIx64 " value 0x%" PRIx64
248
common-obj-$(CONFIG_ACPI_HMAT) += hmat.o
664
aspeed_gpio_write(uint64_t offset, uint64_t value) "offset: 0x%" PRIx64 " value 0x%" PRIx64
249
+common-obj-$(CONFIG_ACPI_APEI) += ghes.o
665
+
250
common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
666
+# stm32l4x5_gpio.c
251
common-obj-$(call lnot,$(CONFIG_PC)) += acpi-x86-stub.o
667
+stm32l4x5_gpio_read(char *gpio, uint64_t addr) "GPIO%s addr: 0x%" PRIx64 " "
252
668
+stm32l4x5_gpio_write(char *gpio, uint64_t addr, uint64_t data) "GPIO%s addr: 0x%" PRIx64 " val: 0x%" PRIx64 ""
669
+stm32l4x5_gpio_update_idr(char *gpio, uint32_t old_idr, uint32_t new_idr) "GPIO%s from: 0x%x to: 0x%x"
670
+stm32l4x5_gpio_pins(char *gpio, uint16_t disconnected, uint16_t high) "GPIO%s disconnected pins: 0x%x levels: 0x%x"
253
--
671
--
254
2.20.1
672
2.34.1
255
673
256
674
diff view generated by jsdifflib
1
From: Dongjiu Geng <gengdongjiu@huawei.com>
1
From: Inès Varhol <ines.varhol@telecom-paris.fr>
2
2
3
Record the GHEB address via fw_cfg file, when recording
3
Signed-off-by: Arnaud Minier <arnaud.minier@telecom-paris.fr>
4
a error to CPER, it will use this address to find out
4
Signed-off-by: Inès Varhol <ines.varhol@telecom-paris.fr>
5
Generic Error Data Entries and write the error.
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
6
Acked-by: Alistair Francis <alistair.francis@wdc.com>
7
In order to avoid migration failure, make hardware
7
Message-id: 20240305210444.310665-3-ines.varhol@telecom-paris.fr
8
error table address to a part of GED device instead
9
of global variable, then this address will be migrated
10
to target QEMU.
11
12
Acked-by: Xiang Zheng <zhengxiang9@huawei.com>
13
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
14
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
15
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
16
Message-id: 20200512030609.19593-7-gengdongjiu@huawei.com
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
---
9
---
19
include/hw/acpi/generic_event_device.h | 2 ++
10
include/hw/arm/stm32l4x5_soc.h | 2 +
20
include/hw/acpi/ghes.h | 6 ++++++
11
include/hw/gpio/stm32l4x5_gpio.h | 1 +
21
hw/acpi/generic_event_device.c | 19 +++++++++++++++++++
12
include/hw/misc/stm32l4x5_syscfg.h | 3 +-
22
hw/acpi/ghes.c | 14 ++++++++++++++
13
hw/arm/stm32l4x5_soc.c | 71 +++++++++++++++++++++++-------
23
hw/arm/virt-acpi-build.c | 8 ++++++++
14
hw/misc/stm32l4x5_syscfg.c | 1 +
24
5 files changed, 49 insertions(+)
15
hw/arm/Kconfig | 3 +-
25
16
6 files changed, 63 insertions(+), 18 deletions(-)
26
diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h
17
27
index XXXXXXX..XXXXXXX 100644
18
diff --git a/include/hw/arm/stm32l4x5_soc.h b/include/hw/arm/stm32l4x5_soc.h
28
--- a/include/hw/acpi/generic_event_device.h
19
index XXXXXXX..XXXXXXX 100644
29
+++ b/include/hw/acpi/generic_event_device.h
20
--- a/include/hw/arm/stm32l4x5_soc.h
30
@@ -XXX,XX +XXX,XX @@
21
+++ b/include/hw/arm/stm32l4x5_soc.h
22
@@ -XXX,XX +XXX,XX @@
23
#include "hw/misc/stm32l4x5_syscfg.h"
24
#include "hw/misc/stm32l4x5_exti.h"
25
#include "hw/misc/stm32l4x5_rcc.h"
26
+#include "hw/gpio/stm32l4x5_gpio.h"
27
#include "qom/object.h"
28
29
#define TYPE_STM32L4X5_SOC "stm32l4x5-soc"
30
@@ -XXX,XX +XXX,XX @@ struct Stm32l4x5SocState {
31
OrIRQState exti_or_gates[NUM_EXTI_OR_GATES];
32
Stm32l4x5SyscfgState syscfg;
33
Stm32l4x5RccState rcc;
34
+ Stm32l4x5GpioState gpio[NUM_GPIOS];
35
36
MemoryRegion sram1;
37
MemoryRegion sram2;
38
diff --git a/include/hw/gpio/stm32l4x5_gpio.h b/include/hw/gpio/stm32l4x5_gpio.h
39
index XXXXXXX..XXXXXXX 100644
40
--- a/include/hw/gpio/stm32l4x5_gpio.h
41
+++ b/include/hw/gpio/stm32l4x5_gpio.h
42
@@ -XXX,XX +XXX,XX @@
43
#define TYPE_STM32L4X5_GPIO "stm32l4x5-gpio"
44
OBJECT_DECLARE_SIMPLE_TYPE(Stm32l4x5GpioState, STM32L4X5_GPIO)
45
46
+#define NUM_GPIOS 8
47
#define GPIO_NUM_PINS 16
48
49
struct Stm32l4x5GpioState {
50
diff --git a/include/hw/misc/stm32l4x5_syscfg.h b/include/hw/misc/stm32l4x5_syscfg.h
51
index XXXXXXX..XXXXXXX 100644
52
--- a/include/hw/misc/stm32l4x5_syscfg.h
53
+++ b/include/hw/misc/stm32l4x5_syscfg.h
54
@@ -XXX,XX +XXX,XX @@
31
55
32
#include "hw/sysbus.h"
56
#include "hw/sysbus.h"
33
#include "hw/acpi/memory_hotplug.h"
57
#include "qom/object.h"
34
+#include "hw/acpi/ghes.h"
58
+#include "hw/gpio/stm32l4x5_gpio.h"
35
59
36
#define ACPI_POWER_BUTTON_DEVICE "PWRB"
60
#define TYPE_STM32L4X5_SYSCFG "stm32l4x5-syscfg"
37
61
OBJECT_DECLARE_SIMPLE_TYPE(Stm32l4x5SyscfgState, STM32L4X5_SYSCFG)
38
@@ -XXX,XX +XXX,XX @@ typedef struct AcpiGedState {
62
39
GEDState ged_state;
63
-#define NUM_GPIOS 8
40
uint32_t ged_event_bitmap;
64
-#define GPIO_NUM_PINS 16
41
qemu_irq irq;
65
#define SYSCFG_NUM_EXTICR 4
42
+ AcpiGhesState ghes_state;
66
43
} AcpiGedState;
67
struct Stm32l4x5SyscfgState {
44
68
diff --git a/hw/arm/stm32l4x5_soc.c b/hw/arm/stm32l4x5_soc.c
45
void build_ged_aml(Aml *table, const char* name, HotplugHandler *hotplug_dev,
69
index XXXXXXX..XXXXXXX 100644
46
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
70
--- a/hw/arm/stm32l4x5_soc.c
47
index XXXXXXX..XXXXXXX 100644
71
+++ b/hw/arm/stm32l4x5_soc.c
48
--- a/include/hw/acpi/ghes.h
72
@@ -XXX,XX +XXX,XX @@
49
+++ b/include/hw/acpi/ghes.h
73
#include "sysemu/sysemu.h"
50
@@ -XXX,XX +XXX,XX @@ enum {
74
#include "hw/or-irq.h"
51
ACPI_HEST_SRC_ID_RESERVED,
75
#include "hw/arm/stm32l4x5_soc.h"
76
+#include "hw/gpio/stm32l4x5_gpio.h"
77
#include "hw/qdev-clock.h"
78
#include "hw/misc/unimp.h"
79
80
@@ -XXX,XX +XXX,XX @@ static const int exti_or_gate1_lines_in[EXTI_OR_GATE1_NUM_LINES_IN] = {
81
16, 35, 36, 37, 38,
52
};
82
};
53
83
54
+typedef struct AcpiGhesState {
84
+static const struct {
55
+ uint64_t ghes_addr_le;
85
+ uint32_t addr;
56
+} AcpiGhesState;
86
+ uint32_t moder_reset;
57
+
87
+ uint32_t ospeedr_reset;
58
void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker);
88
+ uint32_t pupdr_reset;
59
void acpi_build_hest(GArray *table_data, BIOSLinker *linker);
89
+} stm32l4x5_gpio_cfg[NUM_GPIOS] = {
60
+void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
90
+ { 0x48000000, 0xABFFFFFF, 0x0C000000, 0x64000000 },
61
+ GArray *hardware_errors);
91
+ { 0x48000400, 0xFFFFFEBF, 0x00000000, 0x00000100 },
62
#endif
92
+ { 0x48000800, 0xFFFFFFFF, 0x00000000, 0x00000000 },
63
diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
93
+ { 0x48000C00, 0xFFFFFFFF, 0x00000000, 0x00000000 },
64
index XXXXXXX..XXXXXXX 100644
94
+ { 0x48001000, 0xFFFFFFFF, 0x00000000, 0x00000000 },
65
--- a/hw/acpi/generic_event_device.c
95
+ { 0x48001400, 0xFFFFFFFF, 0x00000000, 0x00000000 },
66
+++ b/hw/acpi/generic_event_device.c
96
+ { 0x48001800, 0xFFFFFFFF, 0x00000000, 0x00000000 },
67
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_ged_state = {
97
+ { 0x48001C00, 0x0000000F, 0x00000000, 0x00000000 },
68
}
98
+};
69
};
99
+
70
100
static void stm32l4x5_soc_initfn(Object *obj)
71
+static bool ghes_needed(void *opaque)
101
{
72
+{
102
Stm32l4x5SocState *s = STM32L4X5_SOC(obj);
73
+ AcpiGedState *s = opaque;
103
@@ -XXX,XX +XXX,XX @@ static void stm32l4x5_soc_initfn(Object *obj)
74
+ return s->ghes_state.ghes_addr_le;
104
}
75
+}
105
object_initialize_child(obj, "syscfg", &s->syscfg, TYPE_STM32L4X5_SYSCFG);
76
+
106
object_initialize_child(obj, "rcc", &s->rcc, TYPE_STM32L4X5_RCC);
77
+static const VMStateDescription vmstate_ghes_state = {
107
+
78
+ .name = "acpi-ged/ghes",
108
+ for (unsigned i = 0; i < NUM_GPIOS; i++) {
79
+ .version_id = 1,
109
+ g_autofree char *name = g_strdup_printf("gpio%c", 'a' + i);
80
+ .minimum_version_id = 1,
110
+ object_initialize_child(obj, name, &s->gpio[i], TYPE_STM32L4X5_GPIO);
81
+ .needed = ghes_needed,
82
+ .fields = (VMStateField[]) {
83
+ VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,
84
+ vmstate_ghes_state, AcpiGhesState),
85
+ VMSTATE_END_OF_LIST()
86
+ }
111
+ }
87
+};
88
+
89
static const VMStateDescription vmstate_acpi_ged = {
90
.name = "acpi-ged",
91
.version_id = 1,
92
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_acpi_ged = {
93
},
94
.subsections = (const VMStateDescription * []) {
95
&vmstate_memhp_state,
96
+ &vmstate_ghes_state,
97
NULL
98
}
99
};
100
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
101
index XXXXXXX..XXXXXXX 100644
102
--- a/hw/acpi/ghes.c
103
+++ b/hw/acpi/ghes.c
104
@@ -XXX,XX +XXX,XX @@
105
#include "hw/acpi/ghes.h"
106
#include "hw/acpi/aml-build.h"
107
#include "qemu/error-report.h"
108
+#include "hw/acpi/generic_event_device.h"
109
+#include "hw/nvram/fw_cfg.h"
110
111
#define ACPI_GHES_ERRORS_FW_CFG_FILE "etc/hardware_errors"
112
#define ACPI_GHES_DATA_ADDR_FW_CFG_FILE "etc/hardware_errors_addr"
113
@@ -XXX,XX +XXX,XX @@ void acpi_build_hest(GArray *table_data, BIOSLinker *linker)
114
build_header(linker, table_data, (void *)(table_data->data + hest_start),
115
"HEST", table_data->len - hest_start, 1, NULL, NULL);
116
}
112
}
117
+
113
118
+void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
114
static void stm32l4x5_soc_realize(DeviceState *dev_soc, Error **errp)
119
+ GArray *hardware_error)
115
@@ -XXX,XX +XXX,XX @@ static void stm32l4x5_soc_realize(DeviceState *dev_soc, Error **errp)
120
+{
116
Stm32l4x5SocState *s = STM32L4X5_SOC(dev_soc);
121
+ /* Create a read-only fw_cfg file for GHES */
117
const Stm32l4x5SocClass *sc = STM32L4X5_SOC_GET_CLASS(dev_soc);
122
+ fw_cfg_add_file(s, ACPI_GHES_ERRORS_FW_CFG_FILE, hardware_error->data,
118
MemoryRegion *system_memory = get_system_memory();
123
+ hardware_error->len);
119
- DeviceState *armv7m;
124
+
120
+ DeviceState *armv7m, *dev;
125
+ /* Create a read-write fw_cfg file for Address */
121
SysBusDevice *busdev;
126
+ fw_cfg_add_file_callback(s, ACPI_GHES_DATA_ADDR_FW_CFG_FILE, NULL, NULL,
122
+ uint32_t pin_index;
127
+ NULL, &(ags->ghes_addr_le), sizeof(ags->ghes_addr_le), false);
123
128
+}
124
if (!memory_region_init_rom(&s->flash, OBJECT(dev_soc), "flash",
129
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
125
sc->flash_size, errp)) {
130
index XXXXXXX..XXXXXXX 100644
126
@@ -XXX,XX +XXX,XX @@ static void stm32l4x5_soc_realize(DeviceState *dev_soc, Error **errp)
131
--- a/hw/arm/virt-acpi-build.c
127
return;
132
+++ b/hw/arm/virt-acpi-build.c
128
}
133
@@ -XXX,XX +XXX,XX @@ void virt_acpi_setup(VirtMachineState *vms)
129
134
{
130
+ /* GPIOs */
135
AcpiBuildTables tables;
131
+ for (unsigned i = 0; i < NUM_GPIOS; i++) {
136
AcpiBuildState *build_state;
132
+ g_autofree char *name = g_strdup_printf("%c", 'A' + i);
137
+ AcpiGedState *acpi_ged_state;
133
+ dev = DEVICE(&s->gpio[i]);
138
134
+ qdev_prop_set_string(dev, "name", name);
139
if (!vms->fw_cfg) {
135
+ qdev_prop_set_uint32(dev, "mode-reset",
140
trace_virt_acpi_setup();
136
+ stm32l4x5_gpio_cfg[i].moder_reset);
141
@@ -XXX,XX +XXX,XX @@ void virt_acpi_setup(VirtMachineState *vms)
137
+ qdev_prop_set_uint32(dev, "ospeed-reset",
142
fw_cfg_add_file(vms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, tables.tcpalog->data,
138
+ stm32l4x5_gpio_cfg[i].ospeedr_reset);
143
acpi_data_len(tables.tcpalog));
139
+ qdev_prop_set_uint32(dev, "pupd-reset",
144
140
+ stm32l4x5_gpio_cfg[i].pupdr_reset);
145
+ if (vms->ras) {
141
+ busdev = SYS_BUS_DEVICE(&s->gpio[i]);
146
+ assert(vms->acpi_dev);
142
+ g_free(name);
147
+ acpi_ged_state = ACPI_GED(vms->acpi_dev);
143
+ name = g_strdup_printf("gpio%c-out", 'a' + i);
148
+ acpi_ghes_add_fw_cfg(&acpi_ged_state->ghes_state,
144
+ qdev_connect_clock_in(DEVICE(&s->gpio[i]), "clk",
149
+ vms->fw_cfg, tables.hardware_errors);
145
+ qdev_get_clock_out(DEVICE(&(s->rcc)), name));
146
+ if (!sysbus_realize(busdev, errp)) {
147
+ return;
148
+ }
149
+ sysbus_mmio_map(busdev, 0, stm32l4x5_gpio_cfg[i].addr);
150
+ }
150
+ }
151
+
151
+
152
build_state->rsdp_mr = acpi_add_rom_blob(virt_acpi_build_update,
152
/* System configuration controller */
153
build_state, tables.rsdp,
153
busdev = SYS_BUS_DEVICE(&s->syscfg);
154
ACPI_BUILD_RSDP_FILE, 0);
154
if (!sysbus_realize(busdev, errp)) {
155
return;
156
}
157
sysbus_mmio_map(busdev, 0, SYSCFG_ADDR);
158
- /*
159
- * TODO: when the GPIO device is implemented, connect it
160
- * to SYCFG using `qdev_connect_gpio_out`, NUM_GPIOS and
161
- * GPIO_NUM_PINS.
162
- */
163
+
164
+ for (unsigned i = 0; i < NUM_GPIOS; i++) {
165
+ for (unsigned j = 0; j < GPIO_NUM_PINS; j++) {
166
+ pin_index = GPIO_NUM_PINS * i + j;
167
+ qdev_connect_gpio_out(DEVICE(&s->gpio[i]), j,
168
+ qdev_get_gpio_in(DEVICE(&s->syscfg),
169
+ pin_index));
170
+ }
171
+ }
172
173
/* EXTI device */
174
busdev = SYS_BUS_DEVICE(&s->exti);
175
@@ -XXX,XX +XXX,XX @@ static void stm32l4x5_soc_realize(DeviceState *dev_soc, Error **errp)
176
}
177
}
178
179
- for (unsigned i = 0; i < 16; i++) {
180
+ for (unsigned i = 0; i < GPIO_NUM_PINS; i++) {
181
qdev_connect_gpio_out(DEVICE(&s->syscfg), i,
182
qdev_get_gpio_in(DEVICE(&s->exti), i));
183
}
184
@@ -XXX,XX +XXX,XX @@ static void stm32l4x5_soc_realize(DeviceState *dev_soc, Error **errp)
185
/* RESERVED: 0x40024400, 0x7FDBC00 */
186
187
/* AHB2 BUS */
188
- create_unimplemented_device("GPIOA", 0x48000000, 0x400);
189
- create_unimplemented_device("GPIOB", 0x48000400, 0x400);
190
- create_unimplemented_device("GPIOC", 0x48000800, 0x400);
191
- create_unimplemented_device("GPIOD", 0x48000C00, 0x400);
192
- create_unimplemented_device("GPIOE", 0x48001000, 0x400);
193
- create_unimplemented_device("GPIOF", 0x48001400, 0x400);
194
- create_unimplemented_device("GPIOG", 0x48001800, 0x400);
195
- create_unimplemented_device("GPIOH", 0x48001C00, 0x400);
196
/* RESERVED: 0x48002000, 0x7FDBC00 */
197
create_unimplemented_device("OTG_FS", 0x50000000, 0x40000);
198
create_unimplemented_device("ADC", 0x50040000, 0x400);
199
diff --git a/hw/misc/stm32l4x5_syscfg.c b/hw/misc/stm32l4x5_syscfg.c
200
index XXXXXXX..XXXXXXX 100644
201
--- a/hw/misc/stm32l4x5_syscfg.c
202
+++ b/hw/misc/stm32l4x5_syscfg.c
203
@@ -XXX,XX +XXX,XX @@
204
#include "hw/irq.h"
205
#include "migration/vmstate.h"
206
#include "hw/misc/stm32l4x5_syscfg.h"
207
+#include "hw/gpio/stm32l4x5_gpio.h"
208
209
#define SYSCFG_MEMRMP 0x00
210
#define SYSCFG_CFGR1 0x04
211
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
212
index XXXXXXX..XXXXXXX 100644
213
--- a/hw/arm/Kconfig
214
+++ b/hw/arm/Kconfig
215
@@ -XXX,XX +XXX,XX @@ config STM32L4X5_SOC
216
bool
217
select ARM_V7M
218
select OR_IRQ
219
- select STM32L4X5_SYSCFG
220
select STM32L4X5_EXTI
221
+ select STM32L4X5_SYSCFG
222
select STM32L4X5_RCC
223
+ select STM32L4X5_GPIO
224
225
config XLNX_ZYNQMP_ARM
226
bool
155
--
227
--
156
2.20.1
228
2.34.1
157
229
158
230
diff view generated by jsdifflib
1
From: Dongjiu Geng <gengdongjiu@huawei.com>
1
From: Inès Varhol <ines.varhol@telecom-paris.fr>
2
2
3
Add APEI/GHES detailed design document
3
The testcase contains :
4
- `test_idr_reset_value()` :
5
Checks the reset values of MODER, OTYPER, PUPDR, ODR and IDR.
6
- `test_gpio_output_mode()` :
7
Checks that writing a bit in register ODR results in the corresponding
8
pin rising or lowering, if this pin is configured in output mode.
9
- `test_gpio_input_mode()` :
10
Checks that a input pin set high or low externally results
11
in the pin rising and lowering.
12
- `test_pull_up_pull_down()` :
13
Checks that a floating pin in pull-up/down mode is actually high/down.
14
- `test_push_pull()` :
15
Checks that a pin set externally is disconnected when configured in
16
push-pull output mode, and can't be set externally while in this mode.
17
- `test_open_drain()` :
18
Checks that a pin set externally high is disconnected when configured
19
in open-drain output mode, and can't be set high while in this mode.
20
- `test_bsrr_brr()` :
21
Checks that writing to BSRR and BRR has the desired result in ODR.
22
- `test_clock_enable()` :
23
Checks that GPIO clock is at the right frequency after enabling it.
4
24
5
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
25
Acked-by: Thomas Huth <thuth@redhat.com>
6
Signed-off-by: Xiang Zheng <zhengxiang9@huawei.com>
26
Signed-off-by: Arnaud Minier <arnaud.minier@telecom-paris.fr>
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
27
Signed-off-by: Inès Varhol <ines.varhol@telecom-paris.fr>
8
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
28
Message-id: 20240305210444.310665-4-ines.varhol@telecom-paris.fr
9
Message-id: 20200512030609.19593-4-gengdongjiu@huawei.com
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
29
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
30
---
12
docs/specs/acpi_hest_ghes.rst | 110 ++++++++++++++++++++++++++++++++++
31
tests/qtest/stm32l4x5_gpio-test.c | 551 ++++++++++++++++++++++++++++++
13
docs/specs/index.rst | 1 +
32
tests/qtest/meson.build | 3 +-
14
2 files changed, 111 insertions(+)
33
2 files changed, 553 insertions(+), 1 deletion(-)
15
create mode 100644 docs/specs/acpi_hest_ghes.rst
34
create mode 100644 tests/qtest/stm32l4x5_gpio-test.c
16
35
17
diff --git a/docs/specs/acpi_hest_ghes.rst b/docs/specs/acpi_hest_ghes.rst
36
diff --git a/tests/qtest/stm32l4x5_gpio-test.c b/tests/qtest/stm32l4x5_gpio-test.c
18
new file mode 100644
37
new file mode 100644
19
index XXXXXXX..XXXXXXX
38
index XXXXXXX..XXXXXXX
20
--- /dev/null
39
--- /dev/null
21
+++ b/docs/specs/acpi_hest_ghes.rst
40
+++ b/tests/qtest/stm32l4x5_gpio-test.c
22
@@ -XXX,XX +XXX,XX @@
41
@@ -XXX,XX +XXX,XX @@
23
+APEI tables generating and CPER record
42
+/*
24
+======================================
43
+ * QTest testcase for STM32L4x5_GPIO
25
+
44
+ *
26
+..
45
+ * Copyright (c) 2024 Arnaud Minier <arnaud.minier@telecom-paris.fr>
27
+ Copyright (c) 2020 HUAWEI TECHNOLOGIES CO., LTD.
46
+ * Copyright (c) 2024 Inès Varhol <ines.varhol@telecom-paris.fr>
28
+
47
+ *
29
+ This work is licensed under the terms of the GNU GPL, version 2 or later.
48
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
30
+ See the COPYING file in the top-level directory.
49
+ * See the COPYING file in the top-level directory.
31
+
50
+ */
32
+Design Details
51
+
33
+--------------
52
+#include "qemu/osdep.h"
34
+
53
+#include "libqtest-single.h"
35
+::
54
+
36
+
55
+#define GPIO_BASE_ADDR 0x48000000
37
+ etc/acpi/tables etc/hardware_errors
56
+#define GPIO_SIZE 0x400
38
+ ==================== ===============================
57
+#define NUM_GPIOS 8
39
+ + +--------------------------+ +----------------------------+
58
+#define NUM_GPIO_PINS 16
40
+ | | HEST | +--------->| error_block_address1 |------+
59
+
41
+ | +--------------------------+ | +----------------------------+ |
60
+#define GPIO_A 0x48000000
42
+ | | GHES1 | | +------->| error_block_address2 |------+-+
61
+#define GPIO_B 0x48000400
43
+ | +--------------------------+ | | +----------------------------+ | |
62
+#define GPIO_C 0x48000800
44
+ | | ................. | | | | .............. | | |
63
+#define GPIO_D 0x48000C00
45
+ | | error_status_address-----+-+ | -----------------------------+ | |
64
+#define GPIO_E 0x48001000
46
+ | | ................. | | +--->| error_block_addressN |------+-+---+
65
+#define GPIO_F 0x48001400
47
+ | | read_ack_register--------+-+ | | +----------------------------+ | | |
66
+#define GPIO_G 0x48001800
48
+ | | read_ack_preserve | +-+---+--->| read_ack_register1 | | | |
67
+#define GPIO_H 0x48001C00
49
+ | | read_ack_write | | | +----------------------------+ | | |
68
+
50
+ + +--------------------------+ | +-+--->| read_ack_register2 | | | |
69
+#define MODER 0x00
51
+ | | GHES2 | | | | +----------------------------+ | | |
70
+#define OTYPER 0x04
52
+ + +--------------------------+ | | | | ............. | | | |
71
+#define PUPDR 0x0C
53
+ | | ................. | | | | +----------------------------+ | | |
72
+#define IDR 0x10
54
+ | | error_status_address-----+---+ | | +->| read_ack_registerN | | | |
73
+#define ODR 0x14
55
+ | | ................. | | | | +----------------------------+ | | |
74
+#define BSRR 0x18
56
+ | | read_ack_register--------+-----+ | | |Generic Error Status Block 1|<-----+ | |
75
+#define BRR 0x28
57
+ | | read_ack_preserve | | | |-+------------------------+-+ | |
76
+
58
+ | | read_ack_write | | | | | CPER | | | |
77
+#define MODER_INPUT 0
59
+ + +--------------------------| | | | | CPER | | | |
78
+#define MODER_OUTPUT 1
60
+ | | ............... | | | | | .... | | | |
79
+
61
+ + +--------------------------+ | | | | CPER | | | |
80
+#define PUPDR_NONE 0
62
+ | | GHESN | | | |-+------------------------+-| | |
81
+#define PUPDR_PULLUP 1
63
+ + +--------------------------+ | | |Generic Error Status Block 2|<-------+ |
82
+#define PUPDR_PULLDOWN 2
64
+ | | ................. | | | |-+------------------------+-+ |
83
+
65
+ | | error_status_address-----+-------+ | | | CPER | | |
84
+#define OTYPER_PUSH_PULL 0
66
+ | | ................. | | | | CPER | | |
85
+#define OTYPER_OPEN_DRAIN 1
67
+ | | read_ack_register--------+---------+ | | .... | | |
86
+
68
+ | | read_ack_preserve | | | CPER | | |
87
+const uint32_t moder_reset[NUM_GPIOS] = {
69
+ | | read_ack_write | +-+------------------------+-+ |
88
+ 0xABFFFFFF,
70
+ + +--------------------------+ | .......... | |
89
+ 0xFFFFFEBF,
71
+ |----------------------------+ |
90
+ 0xFFFFFFFF,
72
+ |Generic Error Status Block N |<----------+
91
+ 0xFFFFFFFF,
73
+ |-+-------------------------+-+
92
+ 0xFFFFFFFF,
74
+ | | CPER | |
93
+ 0xFFFFFFFF,
75
+ | | CPER | |
94
+ 0xFFFFFFFF,
76
+ | | .... | |
95
+ 0x0000000F
77
+ | | CPER | |
96
+};
78
+ +-+-------------------------+-+
97
+
79
+
98
+const uint32_t pupdr_reset[NUM_GPIOS] = {
80
+
99
+ 0x64000000,
81
+(1) QEMU generates the ACPI HEST table. This table goes in the current
100
+ 0x00000100,
82
+ "etc/acpi/tables" fw_cfg blob. Each error source has different
101
+ 0x00000000,
83
+ notification types.
102
+ 0x00000000,
84
+
103
+ 0x00000000,
85
+(2) A new fw_cfg blob called "etc/hardware_errors" is introduced. QEMU
104
+ 0x00000000,
86
+ also needs to populate this blob. The "etc/hardware_errors" fw_cfg blob
105
+ 0x00000000,
87
+ contains an address registers table and an Error Status Data Block table.
106
+ 0x00000000
88
+
107
+};
89
+(3) The address registers table contains N Error Block Address entries
108
+
90
+ and N Read Ack Register entries. The size for each entry is 8-byte.
109
+const uint32_t idr_reset[NUM_GPIOS] = {
91
+ The Error Status Data Block table contains N Error Status Data Block
110
+ 0x0000A000,
92
+ entries. The size for each entry is 4096(0x1000) bytes. The total size
111
+ 0x00000010,
93
+ for the "etc/hardware_errors" fw_cfg blob is (N * 8 * 2 + N * 4096) bytes.
112
+ 0x00000000,
94
+ N is the number of the kinds of hardware error sources.
113
+ 0x00000000,
95
+
114
+ 0x00000000,
96
+(4) QEMU generates the ACPI linker/loader script for the firmware. The
115
+ 0x00000000,
97
+ firmware pre-allocates memory for "etc/acpi/tables", "etc/hardware_errors"
116
+ 0x00000000,
98
+ and copies blob contents there.
117
+ 0x00000000
99
+
118
+};
100
+(5) QEMU generates N ADD_POINTER commands, which patch addresses in the
119
+
101
+ "error_status_address" fields of the HEST table with a pointer to the
120
+static uint32_t gpio_readl(unsigned int gpio, unsigned int offset)
102
+ corresponding "address registers" in the "etc/hardware_errors" blob.
121
+{
103
+
122
+ return readl(gpio + offset);
104
+(6) QEMU generates N ADD_POINTER commands, which patch addresses in the
123
+}
105
+ "read_ack_register" fields of the HEST table with a pointer to the
124
+
106
+ corresponding "read_ack_register" within the "etc/hardware_errors" blob.
125
+static void gpio_writel(unsigned int gpio, unsigned int offset, uint32_t value)
107
+
126
+{
108
+(7) QEMU generates N ADD_POINTER commands for the firmware, which patch
127
+ writel(gpio + offset, value);
109
+ addresses in the "error_block_address" fields with a pointer to the
128
+}
110
+ respective "Error Status Data Block" in the "etc/hardware_errors" blob.
129
+
111
+
130
+static void gpio_set_bit(unsigned int gpio, unsigned int reg,
112
+(8) QEMU defines a third and write-only fw_cfg blob which is called
131
+ unsigned int pin, uint32_t value)
113
+ "etc/hardware_errors_addr". Through that blob, the firmware can send back
132
+{
114
+ the guest-side allocation addresses to QEMU. The "etc/hardware_errors_addr"
133
+ uint32_t mask = 0xFFFFFFFF & ~(0x1 << pin);
115
+ blob contains a 8-byte entry. QEMU generates a single WRITE_POINTER command
134
+ gpio_writel(gpio, reg, (gpio_readl(gpio, reg) & mask) | value << pin);
116
+ for the firmware. The firmware will write back the start address of
135
+}
117
+ "etc/hardware_errors" blob to the fw_cfg file "etc/hardware_errors_addr".
136
+
118
+
137
+static void gpio_set_2bits(unsigned int gpio, unsigned int reg,
119
+(9) When QEMU gets a SIGBUS from the kernel, QEMU writes CPER into corresponding
138
+ unsigned int pin, uint32_t value)
120
+ "Error Status Data Block", guest memory, and then injects platform specific
139
+{
121
+ interrupt (in case of arm/virt machine it's Synchronous External Abort) as a
140
+ uint32_t offset = 2 * pin;
122
+ notification which is necessary for notifying the guest.
141
+ uint32_t mask = 0xFFFFFFFF & ~(0x3 << offset);
123
+
142
+ gpio_writel(gpio, reg, (gpio_readl(gpio, reg) & mask) | value << offset);
124
+(10) This notification (in virtual hardware) will be handled by the guest
143
+}
125
+ kernel, on receiving notification, guest APEI driver could read the CPER error
144
+
126
+ and take appropriate action.
145
+static unsigned int get_gpio_id(uint32_t gpio_addr)
127
+
146
+{
128
+(11) kvm_arch_on_sigbus_vcpu() uses source_id as index in "etc/hardware_errors" to
147
+ return (gpio_addr - GPIO_BASE_ADDR) / GPIO_SIZE;
129
+ find out "Error Status Data Block" entry corresponding to error source. So supported
148
+}
130
+ source_id values should be assigned here and not be changed afterwards to make sure
149
+
131
+ that guest will write error into expected "Error Status Data Block" even if guest was
150
+static void gpio_set_irq(unsigned int gpio, int num, int level)
132
+ migrated to a newer QEMU.
151
+{
133
diff --git a/docs/specs/index.rst b/docs/specs/index.rst
152
+ g_autofree char *name = g_strdup_printf("/machine/soc/gpio%c",
153
+ get_gpio_id(gpio) + 'a');
154
+ qtest_set_irq_in(global_qtest, name, NULL, num, level);
155
+}
156
+
157
+static void disconnect_all_pins(unsigned int gpio)
158
+{
159
+ g_autofree char *path = g_strdup_printf("/machine/soc/gpio%c",
160
+ get_gpio_id(gpio) + 'a');
161
+ QDict *r;
162
+
163
+ r = qtest_qmp(global_qtest, "{ 'execute': 'qom-set', 'arguments': "
164
+ "{ 'path': %s, 'property': 'disconnected-pins', 'value': %d } }",
165
+ path, 0xFFFF);
166
+ g_assert_false(qdict_haskey(r, "error"));
167
+ qobject_unref(r);
168
+}
169
+
170
+static uint32_t get_disconnected_pins(unsigned int gpio)
171
+{
172
+ g_autofree char *path = g_strdup_printf("/machine/soc/gpio%c",
173
+ get_gpio_id(gpio) + 'a');
174
+ uint32_t disconnected_pins = 0;
175
+ QDict *r;
176
+
177
+ r = qtest_qmp(global_qtest, "{ 'execute': 'qom-get', 'arguments':"
178
+ " { 'path': %s, 'property': 'disconnected-pins'} }", path);
179
+ g_assert_false(qdict_haskey(r, "error"));
180
+ disconnected_pins = qdict_get_int(r, "return");
181
+ qobject_unref(r);
182
+ return disconnected_pins;
183
+}
184
+
185
+static uint32_t reset(uint32_t gpio, unsigned int offset)
186
+{
187
+ switch (offset) {
188
+ case MODER:
189
+ return moder_reset[get_gpio_id(gpio)];
190
+ case PUPDR:
191
+ return pupdr_reset[get_gpio_id(gpio)];
192
+ case IDR:
193
+ return idr_reset[get_gpio_id(gpio)];
194
+ }
195
+ return 0x0;
196
+}
197
+
198
+static void system_reset(void)
199
+{
200
+ QDict *r;
201
+ r = qtest_qmp(global_qtest, "{'execute': 'system_reset'}");
202
+ g_assert_false(qdict_haskey(r, "error"));
203
+ qobject_unref(r);
204
+}
205
+
206
+static void test_idr_reset_value(void)
207
+{
208
+ /*
209
+ * Checks that the values in MODER, OTYPER, PUPDR and ODR
210
+ * after reset are correct, and that the value in IDR is
211
+ * coherent.
212
+ * Since AF and analog modes aren't implemented, IDR reset
213
+ * values aren't the same as with a real board.
214
+ *
215
+ * Register IDR contains the actual values of all GPIO pins.
216
+ * Its value depends on the pins' configuration
217
+ * (intput/output/analog : register MODER, push-pull/open-drain :
218
+ * register OTYPER, pull-up/pull-down/none : register PUPDR)
219
+ * and on the values stored in register ODR
220
+ * (in case the pin is in output mode).
221
+ */
222
+
223
+ gpio_writel(GPIO_A, MODER, 0xDEADBEEF);
224
+ gpio_writel(GPIO_A, ODR, 0xDEADBEEF);
225
+ gpio_writel(GPIO_A, OTYPER, 0xDEADBEEF);
226
+ gpio_writel(GPIO_A, PUPDR, 0xDEADBEEF);
227
+
228
+ gpio_writel(GPIO_B, MODER, 0xDEADBEEF);
229
+ gpio_writel(GPIO_B, ODR, 0xDEADBEEF);
230
+ gpio_writel(GPIO_B, OTYPER, 0xDEADBEEF);
231
+ gpio_writel(GPIO_B, PUPDR, 0xDEADBEEF);
232
+
233
+ gpio_writel(GPIO_C, MODER, 0xDEADBEEF);
234
+ gpio_writel(GPIO_C, ODR, 0xDEADBEEF);
235
+ gpio_writel(GPIO_C, OTYPER, 0xDEADBEEF);
236
+ gpio_writel(GPIO_C, PUPDR, 0xDEADBEEF);
237
+
238
+ gpio_writel(GPIO_H, MODER, 0xDEADBEEF);
239
+ gpio_writel(GPIO_H, ODR, 0xDEADBEEF);
240
+ gpio_writel(GPIO_H, OTYPER, 0xDEADBEEF);
241
+ gpio_writel(GPIO_H, PUPDR, 0xDEADBEEF);
242
+
243
+ system_reset();
244
+
245
+ uint32_t moder = gpio_readl(GPIO_A, MODER);
246
+ uint32_t odr = gpio_readl(GPIO_A, ODR);
247
+ uint32_t otyper = gpio_readl(GPIO_A, OTYPER);
248
+ uint32_t pupdr = gpio_readl(GPIO_A, PUPDR);
249
+ uint32_t idr = gpio_readl(GPIO_A, IDR);
250
+ /* 15: AF, 14: AF, 13: AF, 12: Analog ... */
251
+ /* here AF is the same as Analog and Input mode */
252
+ g_assert_cmphex(moder, ==, reset(GPIO_A, MODER));
253
+ g_assert_cmphex(odr, ==, reset(GPIO_A, ODR));
254
+ g_assert_cmphex(otyper, ==, reset(GPIO_A, OTYPER));
255
+ /* 15: pull-up, 14: pull-down, 13: pull-up, 12: neither ... */
256
+ g_assert_cmphex(pupdr, ==, reset(GPIO_A, PUPDR));
257
+ /* 15 : 1, 14: 0, 13: 1, 12 : reset value ... */
258
+ g_assert_cmphex(idr, ==, reset(GPIO_A, IDR));
259
+
260
+ moder = gpio_readl(GPIO_B, MODER);
261
+ odr = gpio_readl(GPIO_B, ODR);
262
+ otyper = gpio_readl(GPIO_B, OTYPER);
263
+ pupdr = gpio_readl(GPIO_B, PUPDR);
264
+ idr = gpio_readl(GPIO_B, IDR);
265
+ /* ... 5: Analog, 4: AF, 3: AF, 2: Analog ... */
266
+ /* here AF is the same as Analog and Input mode */
267
+ g_assert_cmphex(moder, ==, reset(GPIO_B, MODER));
268
+ g_assert_cmphex(odr, ==, reset(GPIO_B, ODR));
269
+ g_assert_cmphex(otyper, ==, reset(GPIO_B, OTYPER));
270
+ /* ... 5: neither, 4: pull-up, 3: neither ... */
271
+ g_assert_cmphex(pupdr, ==, reset(GPIO_B, PUPDR));
272
+ /* ... 5 : reset value, 4 : 1, 3 : reset value ... */
273
+ g_assert_cmphex(idr, ==, reset(GPIO_B, IDR));
274
+
275
+ moder = gpio_readl(GPIO_C, MODER);
276
+ odr = gpio_readl(GPIO_C, ODR);
277
+ otyper = gpio_readl(GPIO_C, OTYPER);
278
+ pupdr = gpio_readl(GPIO_C, PUPDR);
279
+ idr = gpio_readl(GPIO_C, IDR);
280
+ /* Analog, same as Input mode*/
281
+ g_assert_cmphex(moder, ==, reset(GPIO_C, MODER));
282
+ g_assert_cmphex(odr, ==, reset(GPIO_C, ODR));
283
+ g_assert_cmphex(otyper, ==, reset(GPIO_C, OTYPER));
284
+ /* no pull-up or pull-down */
285
+ g_assert_cmphex(pupdr, ==, reset(GPIO_C, PUPDR));
286
+ /* reset value */
287
+ g_assert_cmphex(idr, ==, reset(GPIO_C, IDR));
288
+
289
+ moder = gpio_readl(GPIO_H, MODER);
290
+ odr = gpio_readl(GPIO_H, ODR);
291
+ otyper = gpio_readl(GPIO_H, OTYPER);
292
+ pupdr = gpio_readl(GPIO_H, PUPDR);
293
+ idr = gpio_readl(GPIO_H, IDR);
294
+ /* Analog, same as Input mode */
295
+ g_assert_cmphex(moder, ==, reset(GPIO_H, MODER));
296
+ g_assert_cmphex(odr, ==, reset(GPIO_H, ODR));
297
+ g_assert_cmphex(otyper, ==, reset(GPIO_H, OTYPER));
298
+ /* no pull-up or pull-down */
299
+ g_assert_cmphex(pupdr, ==, reset(GPIO_H, PUPDR));
300
+ /* reset value */
301
+ g_assert_cmphex(idr, ==, reset(GPIO_H, IDR));
302
+}
303
+
304
+static void test_gpio_output_mode(const void *data)
305
+{
306
+ /*
307
+ * Checks that setting a bit in ODR sets the corresponding
308
+ * GPIO line high : it should set the right bit in IDR
309
+ * and send an irq to syscfg.
310
+ * Additionally, it checks that values written to ODR
311
+ * when not in output mode are stored and not discarded.
312
+ */
313
+ unsigned int pin = ((uint64_t)data) & 0xF;
314
+ uint32_t gpio = ((uint64_t)data) >> 32;
315
+ unsigned int gpio_id = get_gpio_id(gpio);
316
+
317
+ qtest_irq_intercept_in(global_qtest, "/machine/soc/syscfg");
318
+
319
+ /* Set a bit in ODR and check nothing happens */
320
+ gpio_set_bit(gpio, ODR, pin, 1);
321
+ g_assert_cmphex(gpio_readl(gpio, IDR), ==, reset(gpio, IDR));
322
+ g_assert_false(get_irq(gpio_id * NUM_GPIO_PINS + pin));
323
+
324
+ /* Configure the relevant line as output and check the pin is high */
325
+ gpio_set_2bits(gpio, MODER, pin, MODER_OUTPUT);
326
+ g_assert_cmphex(gpio_readl(gpio, IDR), ==, reset(gpio, IDR) | (1 << pin));
327
+ g_assert_true(get_irq(gpio_id * NUM_GPIO_PINS + pin));
328
+
329
+ /* Reset the bit in ODR and check the pin is low */
330
+ gpio_set_bit(gpio, ODR, pin, 0);
331
+ g_assert_cmphex(gpio_readl(gpio, IDR), ==, reset(gpio, IDR) & ~(1 << pin));
332
+ g_assert_false(get_irq(gpio_id * NUM_GPIO_PINS + pin));
333
+
334
+ /* Clean the test */
335
+ gpio_writel(gpio, ODR, reset(gpio, ODR));
336
+ gpio_writel(gpio, MODER, reset(gpio, MODER));
337
+ g_assert_cmphex(gpio_readl(gpio, IDR), ==, reset(gpio, IDR));
338
+ g_assert_false(get_irq(gpio_id * NUM_GPIO_PINS + pin));
339
+}
340
+
341
+static void test_gpio_input_mode(const void *data)
342
+{
343
+ /*
344
+ * Test that setting a line high/low externally sets the
345
+ * corresponding GPIO line high/low : it should set the
346
+ * right bit in IDR and send an irq to syscfg.
347
+ */
348
+ unsigned int pin = ((uint64_t)data) & 0xF;
349
+ uint32_t gpio = ((uint64_t)data) >> 32;
350
+ unsigned int gpio_id = get_gpio_id(gpio);
351
+
352
+ qtest_irq_intercept_in(global_qtest, "/machine/soc/syscfg");
353
+
354
+ /* Configure a line as input, raise it, and check that the pin is high */
355
+ gpio_set_2bits(gpio, MODER, pin, MODER_INPUT);
356
+ gpio_set_irq(gpio, pin, 1);
357
+ g_assert_cmphex(gpio_readl(gpio, IDR), ==, reset(gpio, IDR) | (1 << pin));
358
+ g_assert_true(get_irq(gpio_id * NUM_GPIO_PINS + pin));
359
+
360
+ /* Lower the line and check that the pin is low */
361
+ gpio_set_irq(gpio, pin, 0);
362
+ g_assert_cmphex(gpio_readl(gpio, IDR), ==, reset(gpio, IDR) & ~(1 << pin));
363
+ g_assert_false(get_irq(gpio_id * NUM_GPIO_PINS + pin));
364
+
365
+ /* Clean the test */
366
+ gpio_writel(gpio, MODER, reset(gpio, MODER));
367
+ disconnect_all_pins(gpio);
368
+ g_assert_cmphex(gpio_readl(gpio, IDR), ==, reset(gpio, IDR));
369
+}
370
+
371
+static void test_pull_up_pull_down(const void *data)
372
+{
373
+ /*
374
+ * Test that a floating pin with pull-up sets the pin
375
+ * high and vice-versa.
376
+ */
377
+ unsigned int pin = ((uint64_t)data) & 0xF;
378
+ uint32_t gpio = ((uint64_t)data) >> 32;
379
+ unsigned int gpio_id = get_gpio_id(gpio);
380
+
381
+ qtest_irq_intercept_in(global_qtest, "/machine/soc/syscfg");
382
+
383
+ /* Configure a line as input with pull-up, check the line is set high */
384
+ gpio_set_2bits(gpio, MODER, pin, MODER_INPUT);
385
+ gpio_set_2bits(gpio, PUPDR, pin, PUPDR_PULLUP);
386
+ g_assert_cmphex(gpio_readl(gpio, IDR), ==, reset(gpio, IDR) | (1 << pin));
387
+ g_assert_true(get_irq(gpio_id * NUM_GPIO_PINS + pin));
388
+
389
+ /* Configure the line with pull-down, check the line is low */
390
+ gpio_set_2bits(gpio, PUPDR, pin, PUPDR_PULLDOWN);
391
+ g_assert_cmphex(gpio_readl(gpio, IDR), ==, reset(gpio, IDR) & ~(1 << pin));
392
+ g_assert_false(get_irq(gpio_id * NUM_GPIO_PINS + pin));
393
+
394
+ /* Clean the test */
395
+ gpio_writel(gpio, MODER, reset(gpio, MODER));
396
+ gpio_writel(gpio, PUPDR, reset(gpio, PUPDR));
397
+ g_assert_cmphex(gpio_readl(gpio, IDR), ==, reset(gpio, IDR));
398
+}
399
+
400
+static void test_push_pull(const void *data)
401
+{
402
+ /*
403
+ * Test that configuring a line in push-pull output mode
404
+ * disconnects the pin, that the pin can't be set or reset
405
+ * externally afterwards.
406
+ */
407
+ unsigned int pin = ((uint64_t)data) & 0xF;
408
+ uint32_t gpio = ((uint64_t)data) >> 32;
409
+ uint32_t gpio2 = GPIO_BASE_ADDR + (GPIO_H - gpio);
410
+
411
+ qtest_irq_intercept_in(global_qtest, "/machine/soc/syscfg");
412
+
413
+ /* Setting a line high externally, configuring it in push-pull output */
414
+ /* And checking the pin was disconnected */
415
+ gpio_set_irq(gpio, pin, 1);
416
+ gpio_set_2bits(gpio, MODER, pin, MODER_OUTPUT);
417
+ g_assert_cmphex(get_disconnected_pins(gpio), ==, 0xFFFF);
418
+ g_assert_cmphex(gpio_readl(gpio, IDR), ==, reset(gpio, IDR) & ~(1 << pin));
419
+
420
+ /* Setting a line low externally, configuring it in push-pull output */
421
+ /* And checking the pin was disconnected */
422
+ gpio_set_irq(gpio2, pin, 0);
423
+ gpio_set_bit(gpio2, ODR, pin, 1);
424
+ gpio_set_2bits(gpio2, MODER, pin, MODER_OUTPUT);
425
+ g_assert_cmphex(get_disconnected_pins(gpio2), ==, 0xFFFF);
426
+ g_assert_cmphex(gpio_readl(gpio2, IDR), ==, reset(gpio2, IDR) | (1 << pin));
427
+
428
+ /* Trying to set a push-pull output pin, checking it doesn't work */
429
+ gpio_set_irq(gpio, pin, 1);
430
+ g_assert_cmphex(get_disconnected_pins(gpio), ==, 0xFFFF);
431
+ g_assert_cmphex(gpio_readl(gpio, IDR), ==, reset(gpio, IDR) & ~(1 << pin));
432
+
433
+ /* Trying to reset a push-pull output pin, checking it doesn't work */
434
+ gpio_set_irq(gpio2, pin, 0);
435
+ g_assert_cmphex(get_disconnected_pins(gpio2), ==, 0xFFFF);
436
+ g_assert_cmphex(gpio_readl(gpio2, IDR), ==, reset(gpio2, IDR) | (1 << pin));
437
+
438
+ /* Clean the test */
439
+ gpio_writel(gpio, MODER, reset(gpio, MODER));
440
+ gpio_writel(gpio2, ODR, reset(gpio2, ODR));
441
+ gpio_writel(gpio2, MODER, reset(gpio2, MODER));
442
+}
443
+
444
+static void test_open_drain(const void *data)
445
+{
446
+ /*
447
+ * Test that configuring a line in open-drain output mode
448
+ * disconnects a pin set high externally and that the pin
449
+ * can't be set high externally while configured in open-drain.
450
+ *
451
+ * However a pin set low externally shouldn't be disconnected,
452
+ * and it can be set low externally when in open-drain mode.
453
+ */
454
+ unsigned int pin = ((uint64_t)data) & 0xF;
455
+ uint32_t gpio = ((uint64_t)data) >> 32;
456
+ uint32_t gpio2 = GPIO_BASE_ADDR + (GPIO_H - gpio);
457
+
458
+ qtest_irq_intercept_in(global_qtest, "/machine/soc/syscfg");
459
+
460
+ /* Setting a line high externally, configuring it in open-drain output */
461
+ /* And checking the pin was disconnected */
462
+ gpio_set_irq(gpio, pin, 1);
463
+ gpio_set_bit(gpio, OTYPER, pin, OTYPER_OPEN_DRAIN);
464
+ gpio_set_2bits(gpio, MODER, pin, MODER_OUTPUT);
465
+ g_assert_cmphex(get_disconnected_pins(gpio), ==, 0xFFFF);
466
+ g_assert_cmphex(gpio_readl(gpio, IDR), ==, reset(gpio, IDR) & ~(1 << pin));
467
+
468
+ /* Setting a line low externally, configuring it in open-drain output */
469
+ /* And checking the pin wasn't disconnected */
470
+ gpio_set_irq(gpio2, pin, 0);
471
+ gpio_set_bit(gpio2, ODR, pin, 1);
472
+ gpio_set_bit(gpio2, OTYPER, pin, OTYPER_OPEN_DRAIN);
473
+ gpio_set_2bits(gpio2, MODER, pin, MODER_OUTPUT);
474
+ g_assert_cmphex(get_disconnected_pins(gpio2), ==, 0xFFFF & ~(1 << pin));
475
+ g_assert_cmphex(gpio_readl(gpio2, IDR), ==,
476
+ reset(gpio2, IDR) & ~(1 << pin));
477
+
478
+ /* Trying to set a open-drain output pin, checking it doesn't work */
479
+ gpio_set_irq(gpio, pin, 1);
480
+ g_assert_cmphex(get_disconnected_pins(gpio), ==, 0xFFFF);
481
+ g_assert_cmphex(gpio_readl(gpio, IDR), ==, reset(gpio, IDR) & ~(1 << pin));
482
+
483
+ /* Trying to reset a open-drain output pin, checking it works */
484
+ gpio_set_bit(gpio, ODR, pin, 1);
485
+ gpio_set_irq(gpio, pin, 0);
486
+ g_assert_cmphex(get_disconnected_pins(gpio2), ==, 0xFFFF & ~(1 << pin));
487
+ g_assert_cmphex(gpio_readl(gpio2, IDR), ==,
488
+ reset(gpio2, IDR) & ~(1 << pin));
489
+
490
+ /* Clean the test */
491
+ disconnect_all_pins(gpio2);
492
+ gpio_writel(gpio2, OTYPER, reset(gpio2, OTYPER));
493
+ gpio_writel(gpio2, ODR, reset(gpio2, ODR));
494
+ gpio_writel(gpio2, MODER, reset(gpio2, MODER));
495
+ g_assert_cmphex(gpio_readl(gpio2, IDR), ==, reset(gpio2, IDR));
496
+ disconnect_all_pins(gpio);
497
+ gpio_writel(gpio, OTYPER, reset(gpio, OTYPER));
498
+ gpio_writel(gpio, ODR, reset(gpio, ODR));
499
+ gpio_writel(gpio, MODER, reset(gpio, MODER));
500
+ g_assert_cmphex(gpio_readl(gpio, IDR), ==, reset(gpio, IDR));
501
+}
502
+
503
+static void test_bsrr_brr(const void *data)
504
+{
505
+ /*
506
+ * Test that writing a '1' in BSS and BSRR
507
+ * has the desired effect on ODR.
508
+ * In BSRR, BSx has priority over BRx.
509
+ */
510
+ unsigned int pin = ((uint64_t)data) & 0xF;
511
+ uint32_t gpio = ((uint64_t)data) >> 32;
512
+
513
+ gpio_writel(gpio, BSRR, (1 << pin));
514
+ g_assert_cmphex(gpio_readl(gpio, ODR), ==, reset(gpio, ODR) | (1 << pin));
515
+
516
+ gpio_writel(gpio, BSRR, (1 << (pin + NUM_GPIO_PINS)));
517
+ g_assert_cmphex(gpio_readl(gpio, ODR), ==, reset(gpio, ODR));
518
+
519
+ gpio_writel(gpio, BSRR, (1 << pin));
520
+ g_assert_cmphex(gpio_readl(gpio, ODR), ==, reset(gpio, ODR) | (1 << pin));
521
+
522
+ gpio_writel(gpio, BRR, (1 << pin));
523
+ g_assert_cmphex(gpio_readl(gpio, ODR), ==, reset(gpio, ODR));
524
+
525
+ /* BSx should have priority over BRx */
526
+ gpio_writel(gpio, BSRR, (1 << pin) | (1 << (pin + NUM_GPIO_PINS)));
527
+ g_assert_cmphex(gpio_readl(gpio, ODR), ==, reset(gpio, ODR) | (1 << pin));
528
+
529
+ gpio_writel(gpio, BRR, (1 << pin));
530
+ g_assert_cmphex(gpio_readl(gpio, ODR), ==, reset(gpio, ODR));
531
+
532
+ gpio_writel(gpio, ODR, reset(gpio, ODR));
533
+}
534
+
535
+int main(int argc, char **argv)
536
+{
537
+ int ret;
538
+
539
+ g_test_init(&argc, &argv, NULL);
540
+ g_test_set_nonfatal_assertions();
541
+ qtest_add_func("stm32l4x5/gpio/test_idr_reset_value",
542
+ test_idr_reset_value);
543
+ /*
544
+ * The inputs for the tests (gpio and pin) can be changed,
545
+ * but the tests don't work for pins that are high at reset
546
+ * (GPIOA15, GPIO13 and GPIOB5).
547
+ * Specifically, rising the pin then checking `get_irq()`
548
+ * is problematic since the pin was already high.
549
+ */
550
+ qtest_add_data_func("stm32l4x5/gpio/test_gpioc5_output_mode",
551
+ (void *)((uint64_t)GPIO_C << 32 | 5),
552
+ test_gpio_output_mode);
553
+ qtest_add_data_func("stm32l4x5/gpio/test_gpioh3_output_mode",
554
+ (void *)((uint64_t)GPIO_H << 32 | 3),
555
+ test_gpio_output_mode);
556
+ qtest_add_data_func("stm32l4x5/gpio/test_gpio_input_mode1",
557
+ (void *)((uint64_t)GPIO_D << 32 | 6),
558
+ test_gpio_input_mode);
559
+ qtest_add_data_func("stm32l4x5/gpio/test_gpio_input_mode2",
560
+ (void *)((uint64_t)GPIO_C << 32 | 10),
561
+ test_gpio_input_mode);
562
+ qtest_add_data_func("stm32l4x5/gpio/test_gpio_pull_up_pull_down1",
563
+ (void *)((uint64_t)GPIO_B << 32 | 5),
564
+ test_pull_up_pull_down);
565
+ qtest_add_data_func("stm32l4x5/gpio/test_gpio_pull_up_pull_down2",
566
+ (void *)((uint64_t)GPIO_F << 32 | 1),
567
+ test_pull_up_pull_down);
568
+ qtest_add_data_func("stm32l4x5/gpio/test_gpio_push_pull1",
569
+ (void *)((uint64_t)GPIO_G << 32 | 6),
570
+ test_push_pull);
571
+ qtest_add_data_func("stm32l4x5/gpio/test_gpio_push_pull2",
572
+ (void *)((uint64_t)GPIO_H << 32 | 3),
573
+ test_push_pull);
574
+ qtest_add_data_func("stm32l4x5/gpio/test_gpio_open_drain1",
575
+ (void *)((uint64_t)GPIO_C << 32 | 4),
576
+ test_open_drain);
577
+ qtest_add_data_func("stm32l4x5/gpio/test_gpio_open_drain2",
578
+ (void *)((uint64_t)GPIO_E << 32 | 11),
579
+ test_open_drain);
580
+ qtest_add_data_func("stm32l4x5/gpio/test_bsrr_brr1",
581
+ (void *)((uint64_t)GPIO_A << 32 | 12),
582
+ test_bsrr_brr);
583
+ qtest_add_data_func("stm32l4x5/gpio/test_bsrr_brr2",
584
+ (void *)((uint64_t)GPIO_D << 32 | 0),
585
+ test_bsrr_brr);
586
+
587
+ qtest_start("-machine b-l475e-iot01a");
588
+ ret = g_test_run();
589
+ qtest_end();
590
+
591
+ return ret;
592
+}
593
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
134
index XXXXXXX..XXXXXXX 100644
594
index XXXXXXX..XXXXXXX 100644
135
--- a/docs/specs/index.rst
595
--- a/tests/qtest/meson.build
136
+++ b/docs/specs/index.rst
596
+++ b/tests/qtest/meson.build
137
@@ -XXX,XX +XXX,XX @@ Contents:
597
@@ -XXX,XX +XXX,XX @@ qtests_aspeed = \
138
ppc-spapr-xive
598
qtests_stm32l4x5 = \
139
acpi_hw_reduced_hotplug
599
['stm32l4x5_exti-test',
140
tpm
600
'stm32l4x5_syscfg-test',
141
+ acpi_hest_ghes
601
- 'stm32l4x5_rcc-test']
602
+ 'stm32l4x5_rcc-test',
603
+ 'stm32l4x5_gpio-test']
604
605
qtests_arm = \
606
(config_all_devices.has_key('CONFIG_MPS2') ? ['sse-timer-test'] : []) + \
142
--
607
--
143
2.20.1
608
2.34.1
144
609
145
610
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Create vectorized versions of handle_shri_with_rndacc
3
While the 8-bit input elements are sequential in the input vector,
4
for shift+round and shift+round+accumulate. Add out-of-line
4
the 32-bit output elements are not sequential in the output matrix.
5
helpers in preparation for longer vector lengths from SVE.
5
Do not attempt to compute 2 32-bit outputs at the same time.
6
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Cc: qemu-stable@nongnu.org
8
Fixes: 23a5e3859f5 ("target/arm: Implement SME integer outer product")
9
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2083
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20200513163245.17915-3-richard.henderson@linaro.org
11
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
12
Message-id: 20240305163931.242795-1-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
14
---
12
target/arm/helper.h | 20 ++
15
target/arm/tcg/sme_helper.c | 77 ++++++++++++++++++-------------
13
target/arm/translate.h | 9 +
16
tests/tcg/aarch64/sme-smopa-1.c | 47 +++++++++++++++++++
14
target/arm/translate-a64.c | 11 +-
17
tests/tcg/aarch64/sme-smopa-2.c | 54 ++++++++++++++++++++++
15
target/arm/translate.c | 463 +++++++++++++++++++++++++++++++++++--
18
tests/tcg/aarch64/Makefile.target | 2 +-
16
target/arm/vec_helper.c | 50 ++++
19
4 files changed, 147 insertions(+), 33 deletions(-)
17
5 files changed, 527 insertions(+), 26 deletions(-)
20
create mode 100644 tests/tcg/aarch64/sme-smopa-1.c
18
21
create mode 100644 tests/tcg/aarch64/sme-smopa-2.c
19
diff --git a/target/arm/helper.h b/target/arm/helper.h
22
23
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
20
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/helper.h
25
--- a/target/arm/tcg/sme_helper.c
22
+++ b/target/arm/helper.h
26
+++ b/target/arm/tcg/sme_helper.c
23
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(gvec_usra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
27
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
24
DEF_HELPER_FLAGS_3(gvec_usra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
25
DEF_HELPER_FLAGS_3(gvec_usra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
26
27
+DEF_HELPER_FLAGS_3(gvec_srshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
28
+DEF_HELPER_FLAGS_3(gvec_srshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
29
+DEF_HELPER_FLAGS_3(gvec_srshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
30
+DEF_HELPER_FLAGS_3(gvec_srshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
31
+
32
+DEF_HELPER_FLAGS_3(gvec_urshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
33
+DEF_HELPER_FLAGS_3(gvec_urshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_3(gvec_urshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
35
+DEF_HELPER_FLAGS_3(gvec_urshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
36
+
37
+DEF_HELPER_FLAGS_3(gvec_srsra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_3(gvec_srsra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
39
+DEF_HELPER_FLAGS_3(gvec_srsra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_3(gvec_srsra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
41
+
42
+DEF_HELPER_FLAGS_3(gvec_ursra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
43
+DEF_HELPER_FLAGS_3(gvec_ursra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
44
+DEF_HELPER_FLAGS_3(gvec_ursra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
45
+DEF_HELPER_FLAGS_3(gvec_ursra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
46
+
47
#ifdef TARGET_AARCH64
48
#include "helper-a64.h"
49
#include "helper-sve.h"
50
diff --git a/target/arm/translate.h b/target/arm/translate.h
51
index XXXXXXX..XXXXXXX 100644
52
--- a/target/arm/translate.h
53
+++ b/target/arm/translate.h
54
@@ -XXX,XX +XXX,XX @@ void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
55
void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
56
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
57
58
+void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
59
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
60
+void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
61
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
62
+void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
63
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
64
+void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
65
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
66
+
67
/*
68
* Forward to the isar_feature_* tests given a DisasContext pointer.
69
*/
70
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
71
index XXXXXXX..XXXXXXX 100644
72
--- a/target/arm/translate-a64.c
73
+++ b/target/arm/translate-a64.c
74
@@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
75
return;
76
77
case 0x04: /* SRSHR / URSHR (rounding) */
78
- break;
79
+ gen_gvec_fn2i(s, is_q, rd, rn, shift,
80
+ is_u ? gen_gvec_urshr : gen_gvec_srshr, size);
81
+ return;
82
+
83
case 0x06: /* SRSRA / URSRA (accum + rounding) */
84
- accumulate = true;
85
- break;
86
+ gen_gvec_fn2i(s, is_q, rd, rn, shift,
87
+ is_u ? gen_gvec_ursra : gen_gvec_srsra, size);
88
+ return;
89
+
90
default:
91
g_assert_not_reached();
92
}
93
diff --git a/target/arm/translate.c b/target/arm/translate.c
94
index XXXXXXX..XXXXXXX 100644
95
--- a/target/arm/translate.c
96
+++ b/target/arm/translate.c
97
@@ -XXX,XX +XXX,XX @@ void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
98
}
28
}
99
}
29
}
100
30
101
+/*
31
-typedef uint64_t IMOPFn(uint64_t, uint64_t, uint64_t, uint8_t, bool);
102
+ * Shift one less than the requested amount, and the low bit is
32
+typedef uint32_t IMOPFn32(uint32_t, uint32_t, uint32_t, uint8_t, bool);
103
+ * the rounding bit. For the 8 and 16-bit operations, because we
33
+static inline void do_imopa_s(uint32_t *za, uint32_t *zn, uint32_t *zm,
104
+ * mask the low bit, we can perform a normal integer shift instead
34
+ uint8_t *pn, uint8_t *pm,
105
+ * of a vector shift.
35
+ uint32_t desc, IMOPFn32 *fn)
106
+ */
107
+static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
108
+{
36
+{
109
+ TCGv_i64 t = tcg_temp_new_i64();
37
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
110
+
38
+ bool neg = simd_data(desc);
111
+ tcg_gen_shri_i64(t, a, sh - 1);
39
112
+ tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
40
-static inline void do_imopa(uint64_t *za, uint64_t *zn, uint64_t *zm,
113
+ tcg_gen_vec_sar8i_i64(d, a, sh);
41
- uint8_t *pn, uint8_t *pm,
114
+ tcg_gen_vec_add8_i64(d, d, t);
42
- uint32_t desc, IMOPFn *fn)
115
+ tcg_temp_free_i64(t);
43
+ for (row = 0; row < oprsz; ++row) {
44
+ uint8_t pa = (pn[H1(row >> 1)] >> ((row & 1) * 4)) & 0xf;
45
+ uint32_t *za_row = &za[tile_vslice_index(row)];
46
+ uint32_t n = zn[H4(row)];
47
+
48
+ for (col = 0; col < oprsz; ++col) {
49
+ uint8_t pb = pm[H1(col >> 1)] >> ((col & 1) * 4);
50
+ uint32_t *a = &za_row[H4(col)];
51
+
52
+ *a = fn(n, zm[H4(col)], *a, pa & pb, neg);
53
+ }
54
+ }
116
+}
55
+}
117
+
56
+
118
+static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
57
+typedef uint64_t IMOPFn64(uint64_t, uint64_t, uint64_t, uint8_t, bool);
58
+static inline void do_imopa_d(uint64_t *za, uint64_t *zn, uint64_t *zm,
59
+ uint8_t *pn, uint8_t *pm,
60
+ uint32_t desc, IMOPFn64 *fn)
61
{
62
intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
63
bool neg = simd_data(desc);
64
@@ -XXX,XX +XXX,XX @@ static inline void do_imopa(uint64_t *za, uint64_t *zn, uint64_t *zm,
65
}
66
67
#define DEF_IMOP_32(NAME, NTYPE, MTYPE) \
68
-static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
69
+static uint32_t NAME(uint32_t n, uint32_t m, uint32_t a, uint8_t p, bool neg) \
70
{ \
71
- uint32_t sum0 = 0, sum1 = 0; \
72
+ uint32_t sum = 0; \
73
/* Apply P to N as a mask, making the inactive elements 0. */ \
74
n &= expand_pred_b(p); \
75
- sum0 += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
76
- sum0 += (NTYPE)(n >> 8) * (MTYPE)(m >> 8); \
77
- sum0 += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
78
- sum0 += (NTYPE)(n >> 24) * (MTYPE)(m >> 24); \
79
- sum1 += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
80
- sum1 += (NTYPE)(n >> 40) * (MTYPE)(m >> 40); \
81
- sum1 += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
82
- sum1 += (NTYPE)(n >> 56) * (MTYPE)(m >> 56); \
83
- if (neg) { \
84
- sum0 = (uint32_t)a - sum0, sum1 = (uint32_t)(a >> 32) - sum1; \
85
- } else { \
86
- sum0 = (uint32_t)a + sum0, sum1 = (uint32_t)(a >> 32) + sum1; \
87
- } \
88
- return ((uint64_t)sum1 << 32) | sum0; \
89
+ sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
90
+ sum += (NTYPE)(n >> 8) * (MTYPE)(m >> 8); \
91
+ sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
92
+ sum += (NTYPE)(n >> 24) * (MTYPE)(m >> 24); \
93
+ return neg ? a - sum : a + sum; \
94
}
95
96
#define DEF_IMOP_64(NAME, NTYPE, MTYPE) \
97
@@ -XXX,XX +XXX,XX @@ DEF_IMOP_64(umopa_d, uint16_t, uint16_t)
98
DEF_IMOP_64(sumopa_d, int16_t, uint16_t)
99
DEF_IMOP_64(usmopa_d, uint16_t, int16_t)
100
101
-#define DEF_IMOPH(NAME) \
102
- void HELPER(sme_##NAME)(void *vza, void *vzn, void *vzm, void *vpn, \
103
- void *vpm, uint32_t desc) \
104
- { do_imopa(vza, vzn, vzm, vpn, vpm, desc, NAME); }
105
+#define DEF_IMOPH(NAME, S) \
106
+ void HELPER(sme_##NAME##_##S)(void *vza, void *vzn, void *vzm, \
107
+ void *vpn, void *vpm, uint32_t desc) \
108
+ { do_imopa_##S(vza, vzn, vzm, vpn, vpm, desc, NAME##_##S); }
109
110
-DEF_IMOPH(smopa_s)
111
-DEF_IMOPH(umopa_s)
112
-DEF_IMOPH(sumopa_s)
113
-DEF_IMOPH(usmopa_s)
114
-DEF_IMOPH(smopa_d)
115
-DEF_IMOPH(umopa_d)
116
-DEF_IMOPH(sumopa_d)
117
-DEF_IMOPH(usmopa_d)
118
+DEF_IMOPH(smopa, s)
119
+DEF_IMOPH(umopa, s)
120
+DEF_IMOPH(sumopa, s)
121
+DEF_IMOPH(usmopa, s)
122
+
123
+DEF_IMOPH(smopa, d)
124
+DEF_IMOPH(umopa, d)
125
+DEF_IMOPH(sumopa, d)
126
+DEF_IMOPH(usmopa, d)
127
diff --git a/tests/tcg/aarch64/sme-smopa-1.c b/tests/tcg/aarch64/sme-smopa-1.c
128
new file mode 100644
129
index XXXXXXX..XXXXXXX
130
--- /dev/null
131
+++ b/tests/tcg/aarch64/sme-smopa-1.c
132
@@ -XXX,XX +XXX,XX @@
133
+#include <stdio.h>
134
+#include <string.h>
135
+
136
+int main()
119
+{
137
+{
120
+ TCGv_i64 t = tcg_temp_new_i64();
138
+ static const int cmp[4][4] = {
121
+
139
+ { 110, 134, 158, 182 },
122
+ tcg_gen_shri_i64(t, a, sh - 1);
140
+ { 390, 478, 566, 654 },
123
+ tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
141
+ { 670, 822, 974, 1126 },
124
+ tcg_gen_vec_sar16i_i64(d, a, sh);
142
+ { 950, 1166, 1382, 1598 }
125
+ tcg_gen_vec_add16_i64(d, d, t);
143
+ };
126
+ tcg_temp_free_i64(t);
144
+ int dst[4][4];
145
+ int *tmp = &dst[0][0];
146
+
147
+ asm volatile(
148
+ ".arch armv8-r+sme\n\t"
149
+ "smstart\n\t"
150
+ "index z0.b, #0, #1\n\t"
151
+ "movprfx z1, z0\n\t"
152
+ "add z1.b, z1.b, #16\n\t"
153
+ "ptrue p0.b\n\t"
154
+ "smopa za0.s, p0/m, p0/m, z0.b, z1.b\n\t"
155
+ "ptrue p0.s, vl4\n\t"
156
+ "mov w12, #0\n\t"
157
+ "st1w { za0h.s[w12, #0] }, p0, [%0]\n\t"
158
+ "add %0, %0, #16\n\t"
159
+ "st1w { za0h.s[w12, #1] }, p0, [%0]\n\t"
160
+ "add %0, %0, #16\n\t"
161
+ "st1w { za0h.s[w12, #2] }, p0, [%0]\n\t"
162
+ "add %0, %0, #16\n\t"
163
+ "st1w { za0h.s[w12, #3] }, p0, [%0]\n\t"
164
+ "smstop"
165
+ : "+r"(tmp) : : "memory");
166
+
167
+ if (memcmp(cmp, dst, sizeof(dst)) == 0) {
168
+ return 0;
169
+ }
170
+
171
+ /* See above for correct results. */
172
+ for (int i = 0; i < 4; ++i) {
173
+ for (int j = 0; j < 4; ++j) {
174
+ printf("%6d", dst[i][j]);
175
+ }
176
+ printf("\n");
177
+ }
178
+ return 1;
127
+}
179
+}
128
+
180
diff --git a/tests/tcg/aarch64/sme-smopa-2.c b/tests/tcg/aarch64/sme-smopa-2.c
129
+static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
181
new file mode 100644
182
index XXXXXXX..XXXXXXX
183
--- /dev/null
184
+++ b/tests/tcg/aarch64/sme-smopa-2.c
185
@@ -XXX,XX +XXX,XX @@
186
+#include <stdio.h>
187
+#include <string.h>
188
+
189
+int main()
130
+{
190
+{
131
+ TCGv_i32 t = tcg_temp_new_i32();
191
+ static const long cmp[4][4] = {
132
+
192
+ { 110, 134, 158, 182 },
133
+ tcg_gen_extract_i32(t, a, sh - 1, 1);
193
+ { 390, 478, 566, 654 },
134
+ tcg_gen_sari_i32(d, a, sh);
194
+ { 670, 822, 974, 1126 },
135
+ tcg_gen_add_i32(d, d, t);
195
+ { 950, 1166, 1382, 1598 }
136
+ tcg_temp_free_i32(t);
196
+ };
197
+ long dst[4][4];
198
+ long *tmp = &dst[0][0];
199
+ long svl;
200
+
201
+ /* Validate that we have a wide enough vector for 4 elements. */
202
+ asm(".arch armv8-r+sme-i64\n\trdsvl %0, #1" : "=r"(svl));
203
+ if (svl < 32) {
204
+ return 0;
205
+ }
206
+
207
+ asm volatile(
208
+ "smstart\n\t"
209
+ "index z0.h, #0, #1\n\t"
210
+ "movprfx z1, z0\n\t"
211
+ "add z1.h, z1.h, #16\n\t"
212
+ "ptrue p0.b\n\t"
213
+ "smopa za0.d, p0/m, p0/m, z0.h, z1.h\n\t"
214
+ "ptrue p0.d, vl4\n\t"
215
+ "mov w12, #0\n\t"
216
+ "st1d { za0h.d[w12, #0] }, p0, [%0]\n\t"
217
+ "add %0, %0, #32\n\t"
218
+ "st1d { za0h.d[w12, #1] }, p0, [%0]\n\t"
219
+ "mov w12, #2\n\t"
220
+ "add %0, %0, #32\n\t"
221
+ "st1d { za0h.d[w12, #0] }, p0, [%0]\n\t"
222
+ "add %0, %0, #32\n\t"
223
+ "st1d { za0h.d[w12, #1] }, p0, [%0]\n\t"
224
+ "smstop"
225
+ : "+r"(tmp) : : "memory");
226
+
227
+ if (memcmp(cmp, dst, sizeof(dst)) == 0) {
228
+ return 0;
229
+ }
230
+
231
+ /* See above for correct results. */
232
+ for (int i = 0; i < 4; ++i) {
233
+ for (int j = 0; j < 4; ++j) {
234
+ printf("%6ld", dst[i][j]);
235
+ }
236
+ printf("\n");
237
+ }
238
+ return 1;
137
+}
239
+}
138
+
240
diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target
139
+static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
140
+{
141
+ TCGv_i64 t = tcg_temp_new_i64();
142
+
143
+ tcg_gen_extract_i64(t, a, sh - 1, 1);
144
+ tcg_gen_sari_i64(d, a, sh);
145
+ tcg_gen_add_i64(d, d, t);
146
+ tcg_temp_free_i64(t);
147
+}
148
+
149
+static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
150
+{
151
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
152
+ TCGv_vec ones = tcg_temp_new_vec_matching(d);
153
+
154
+ tcg_gen_shri_vec(vece, t, a, sh - 1);
155
+ tcg_gen_dupi_vec(vece, ones, 1);
156
+ tcg_gen_and_vec(vece, t, t, ones);
157
+ tcg_gen_sari_vec(vece, d, a, sh);
158
+ tcg_gen_add_vec(vece, d, d, t);
159
+
160
+ tcg_temp_free_vec(t);
161
+ tcg_temp_free_vec(ones);
162
+}
163
+
164
+void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
165
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
166
+{
167
+ static const TCGOpcode vecop_list[] = {
168
+ INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
169
+ };
170
+ static const GVecGen2i ops[4] = {
171
+ { .fni8 = gen_srshr8_i64,
172
+ .fniv = gen_srshr_vec,
173
+ .fno = gen_helper_gvec_srshr_b,
174
+ .opt_opc = vecop_list,
175
+ .vece = MO_8 },
176
+ { .fni8 = gen_srshr16_i64,
177
+ .fniv = gen_srshr_vec,
178
+ .fno = gen_helper_gvec_srshr_h,
179
+ .opt_opc = vecop_list,
180
+ .vece = MO_16 },
181
+ { .fni4 = gen_srshr32_i32,
182
+ .fniv = gen_srshr_vec,
183
+ .fno = gen_helper_gvec_srshr_s,
184
+ .opt_opc = vecop_list,
185
+ .vece = MO_32 },
186
+ { .fni8 = gen_srshr64_i64,
187
+ .fniv = gen_srshr_vec,
188
+ .fno = gen_helper_gvec_srshr_d,
189
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
190
+ .opt_opc = vecop_list,
191
+ .vece = MO_64 },
192
+ };
193
+
194
+ /* tszimm encoding produces immediates in the range [1..esize] */
195
+ tcg_debug_assert(shift > 0);
196
+ tcg_debug_assert(shift <= (8 << vece));
197
+
198
+ if (shift == (8 << vece)) {
199
+ /*
200
+ * Shifts larger than the element size are architecturally valid.
201
+ * Signed results in all sign bits. With rounding, this produces
202
+ * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
203
+ * I.e. always zero.
204
+ */
205
+ tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
206
+ } else {
207
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
208
+ }
209
+}
210
+
211
+static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
212
+{
213
+ TCGv_i64 t = tcg_temp_new_i64();
214
+
215
+ gen_srshr8_i64(t, a, sh);
216
+ tcg_gen_vec_add8_i64(d, d, t);
217
+ tcg_temp_free_i64(t);
218
+}
219
+
220
+static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
221
+{
222
+ TCGv_i64 t = tcg_temp_new_i64();
223
+
224
+ gen_srshr16_i64(t, a, sh);
225
+ tcg_gen_vec_add16_i64(d, d, t);
226
+ tcg_temp_free_i64(t);
227
+}
228
+
229
+static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
230
+{
231
+ TCGv_i32 t = tcg_temp_new_i32();
232
+
233
+ gen_srshr32_i32(t, a, sh);
234
+ tcg_gen_add_i32(d, d, t);
235
+ tcg_temp_free_i32(t);
236
+}
237
+
238
+static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
239
+{
240
+ TCGv_i64 t = tcg_temp_new_i64();
241
+
242
+ gen_srshr64_i64(t, a, sh);
243
+ tcg_gen_add_i64(d, d, t);
244
+ tcg_temp_free_i64(t);
245
+}
246
+
247
+static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
248
+{
249
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
250
+
251
+ gen_srshr_vec(vece, t, a, sh);
252
+ tcg_gen_add_vec(vece, d, d, t);
253
+ tcg_temp_free_vec(t);
254
+}
255
+
256
+void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
257
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
258
+{
259
+ static const TCGOpcode vecop_list[] = {
260
+ INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
261
+ };
262
+ static const GVecGen2i ops[4] = {
263
+ { .fni8 = gen_srsra8_i64,
264
+ .fniv = gen_srsra_vec,
265
+ .fno = gen_helper_gvec_srsra_b,
266
+ .opt_opc = vecop_list,
267
+ .load_dest = true,
268
+ .vece = MO_8 },
269
+ { .fni8 = gen_srsra16_i64,
270
+ .fniv = gen_srsra_vec,
271
+ .fno = gen_helper_gvec_srsra_h,
272
+ .opt_opc = vecop_list,
273
+ .load_dest = true,
274
+ .vece = MO_16 },
275
+ { .fni4 = gen_srsra32_i32,
276
+ .fniv = gen_srsra_vec,
277
+ .fno = gen_helper_gvec_srsra_s,
278
+ .opt_opc = vecop_list,
279
+ .load_dest = true,
280
+ .vece = MO_32 },
281
+ { .fni8 = gen_srsra64_i64,
282
+ .fniv = gen_srsra_vec,
283
+ .fno = gen_helper_gvec_srsra_d,
284
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
285
+ .opt_opc = vecop_list,
286
+ .load_dest = true,
287
+ .vece = MO_64 },
288
+ };
289
+
290
+ /* tszimm encoding produces immediates in the range [1..esize] */
291
+ tcg_debug_assert(shift > 0);
292
+ tcg_debug_assert(shift <= (8 << vece));
293
+
294
+ /*
295
+ * Shifts larger than the element size are architecturally valid.
296
+ * Signed results in all sign bits. With rounding, this produces
297
+ * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
298
+ * I.e. always zero. With accumulation, this leaves D unchanged.
299
+ */
300
+ if (shift == (8 << vece)) {
301
+ /* Nop, but we do need to clear the tail. */
302
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
303
+ } else {
304
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
305
+ }
306
+}
307
+
308
+static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
309
+{
310
+ TCGv_i64 t = tcg_temp_new_i64();
311
+
312
+ tcg_gen_shri_i64(t, a, sh - 1);
313
+ tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
314
+ tcg_gen_vec_shr8i_i64(d, a, sh);
315
+ tcg_gen_vec_add8_i64(d, d, t);
316
+ tcg_temp_free_i64(t);
317
+}
318
+
319
+static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
320
+{
321
+ TCGv_i64 t = tcg_temp_new_i64();
322
+
323
+ tcg_gen_shri_i64(t, a, sh - 1);
324
+ tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
325
+ tcg_gen_vec_shr16i_i64(d, a, sh);
326
+ tcg_gen_vec_add16_i64(d, d, t);
327
+ tcg_temp_free_i64(t);
328
+}
329
+
330
+static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
331
+{
332
+ TCGv_i32 t = tcg_temp_new_i32();
333
+
334
+ tcg_gen_extract_i32(t, a, sh - 1, 1);
335
+ tcg_gen_shri_i32(d, a, sh);
336
+ tcg_gen_add_i32(d, d, t);
337
+ tcg_temp_free_i32(t);
338
+}
339
+
340
+static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
341
+{
342
+ TCGv_i64 t = tcg_temp_new_i64();
343
+
344
+ tcg_gen_extract_i64(t, a, sh - 1, 1);
345
+ tcg_gen_shri_i64(d, a, sh);
346
+ tcg_gen_add_i64(d, d, t);
347
+ tcg_temp_free_i64(t);
348
+}
349
+
350
+static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
351
+{
352
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
353
+ TCGv_vec ones = tcg_temp_new_vec_matching(d);
354
+
355
+ tcg_gen_shri_vec(vece, t, a, shift - 1);
356
+ tcg_gen_dupi_vec(vece, ones, 1);
357
+ tcg_gen_and_vec(vece, t, t, ones);
358
+ tcg_gen_shri_vec(vece, d, a, shift);
359
+ tcg_gen_add_vec(vece, d, d, t);
360
+
361
+ tcg_temp_free_vec(t);
362
+ tcg_temp_free_vec(ones);
363
+}
364
+
365
+void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
366
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
367
+{
368
+ static const TCGOpcode vecop_list[] = {
369
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
370
+ };
371
+ static const GVecGen2i ops[4] = {
372
+ { .fni8 = gen_urshr8_i64,
373
+ .fniv = gen_urshr_vec,
374
+ .fno = gen_helper_gvec_urshr_b,
375
+ .opt_opc = vecop_list,
376
+ .vece = MO_8 },
377
+ { .fni8 = gen_urshr16_i64,
378
+ .fniv = gen_urshr_vec,
379
+ .fno = gen_helper_gvec_urshr_h,
380
+ .opt_opc = vecop_list,
381
+ .vece = MO_16 },
382
+ { .fni4 = gen_urshr32_i32,
383
+ .fniv = gen_urshr_vec,
384
+ .fno = gen_helper_gvec_urshr_s,
385
+ .opt_opc = vecop_list,
386
+ .vece = MO_32 },
387
+ { .fni8 = gen_urshr64_i64,
388
+ .fniv = gen_urshr_vec,
389
+ .fno = gen_helper_gvec_urshr_d,
390
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
391
+ .opt_opc = vecop_list,
392
+ .vece = MO_64 },
393
+ };
394
+
395
+ /* tszimm encoding produces immediates in the range [1..esize] */
396
+ tcg_debug_assert(shift > 0);
397
+ tcg_debug_assert(shift <= (8 << vece));
398
+
399
+ if (shift == (8 << vece)) {
400
+ /*
401
+ * Shifts larger than the element size are architecturally valid.
402
+ * Unsigned results in zero. With rounding, this produces a
403
+ * copy of the most significant bit.
404
+ */
405
+ tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
406
+ } else {
407
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
408
+ }
409
+}
410
+
411
+static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
412
+{
413
+ TCGv_i64 t = tcg_temp_new_i64();
414
+
415
+ if (sh == 8) {
416
+ tcg_gen_vec_shr8i_i64(t, a, 7);
417
+ } else {
418
+ gen_urshr8_i64(t, a, sh);
419
+ }
420
+ tcg_gen_vec_add8_i64(d, d, t);
421
+ tcg_temp_free_i64(t);
422
+}
423
+
424
+static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
425
+{
426
+ TCGv_i64 t = tcg_temp_new_i64();
427
+
428
+ if (sh == 16) {
429
+ tcg_gen_vec_shr16i_i64(t, a, 15);
430
+ } else {
431
+ gen_urshr16_i64(t, a, sh);
432
+ }
433
+ tcg_gen_vec_add16_i64(d, d, t);
434
+ tcg_temp_free_i64(t);
435
+}
436
+
437
+static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
438
+{
439
+ TCGv_i32 t = tcg_temp_new_i32();
440
+
441
+ if (sh == 32) {
442
+ tcg_gen_shri_i32(t, a, 31);
443
+ } else {
444
+ gen_urshr32_i32(t, a, sh);
445
+ }
446
+ tcg_gen_add_i32(d, d, t);
447
+ tcg_temp_free_i32(t);
448
+}
449
+
450
+static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
451
+{
452
+ TCGv_i64 t = tcg_temp_new_i64();
453
+
454
+ if (sh == 64) {
455
+ tcg_gen_shri_i64(t, a, 63);
456
+ } else {
457
+ gen_urshr64_i64(t, a, sh);
458
+ }
459
+ tcg_gen_add_i64(d, d, t);
460
+ tcg_temp_free_i64(t);
461
+}
462
+
463
+static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
464
+{
465
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
466
+
467
+ if (sh == (8 << vece)) {
468
+ tcg_gen_shri_vec(vece, t, a, sh - 1);
469
+ } else {
470
+ gen_urshr_vec(vece, t, a, sh);
471
+ }
472
+ tcg_gen_add_vec(vece, d, d, t);
473
+ tcg_temp_free_vec(t);
474
+}
475
+
476
+void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
477
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
478
+{
479
+ static const TCGOpcode vecop_list[] = {
480
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
481
+ };
482
+ static const GVecGen2i ops[4] = {
483
+ { .fni8 = gen_ursra8_i64,
484
+ .fniv = gen_ursra_vec,
485
+ .fno = gen_helper_gvec_ursra_b,
486
+ .opt_opc = vecop_list,
487
+ .load_dest = true,
488
+ .vece = MO_8 },
489
+ { .fni8 = gen_ursra16_i64,
490
+ .fniv = gen_ursra_vec,
491
+ .fno = gen_helper_gvec_ursra_h,
492
+ .opt_opc = vecop_list,
493
+ .load_dest = true,
494
+ .vece = MO_16 },
495
+ { .fni4 = gen_ursra32_i32,
496
+ .fniv = gen_ursra_vec,
497
+ .fno = gen_helper_gvec_ursra_s,
498
+ .opt_opc = vecop_list,
499
+ .load_dest = true,
500
+ .vece = MO_32 },
501
+ { .fni8 = gen_ursra64_i64,
502
+ .fniv = gen_ursra_vec,
503
+ .fno = gen_helper_gvec_ursra_d,
504
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
505
+ .opt_opc = vecop_list,
506
+ .load_dest = true,
507
+ .vece = MO_64 },
508
+ };
509
+
510
+ /* tszimm encoding produces immediates in the range [1..esize] */
511
+ tcg_debug_assert(shift > 0);
512
+ tcg_debug_assert(shift <= (8 << vece));
513
+
514
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
515
+}
516
+
517
static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
518
{
519
uint64_t mask = dup_const(MO_8, 0xff >> shift);
520
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
521
}
522
return 0;
523
524
+ case 2: /* VRSHR */
525
+ /* Right shift comes here negative. */
526
+ shift = -shift;
527
+ if (u) {
528
+ gen_gvec_urshr(size, rd_ofs, rm_ofs, shift,
529
+ vec_size, vec_size);
530
+ } else {
531
+ gen_gvec_srshr(size, rd_ofs, rm_ofs, shift,
532
+ vec_size, vec_size);
533
+ }
534
+ return 0;
535
+
536
+ case 3: /* VRSRA */
537
+ /* Right shift comes here negative. */
538
+ shift = -shift;
539
+ if (u) {
540
+ gen_gvec_ursra(size, rd_ofs, rm_ofs, shift,
541
+ vec_size, vec_size);
542
+ } else {
543
+ gen_gvec_srsra(size, rd_ofs, rm_ofs, shift,
544
+ vec_size, vec_size);
545
+ }
546
+ return 0;
547
+
548
case 4: /* VSRI */
549
if (!u) {
550
return 1;
551
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
552
neon_load_reg64(cpu_V0, rm + pass);
553
tcg_gen_movi_i64(cpu_V1, imm);
554
switch (op) {
555
- case 2: /* VRSHR */
556
- case 3: /* VRSRA */
557
- if (u)
558
- gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
559
- else
560
- gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
561
- break;
562
case 6: /* VQSHLU */
563
gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
564
cpu_V0, cpu_V1);
565
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
566
default:
567
g_assert_not_reached();
568
}
569
- if (op == 3) {
570
- /* Accumulate. */
571
- neon_load_reg64(cpu_V1, rd + pass);
572
- tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
573
- }
574
neon_store_reg64(cpu_V0, rd + pass);
575
} else { /* size < 3 */
576
/* Operands in T0 and T1. */
577
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
578
tmp2 = tcg_temp_new_i32();
579
tcg_gen_movi_i32(tmp2, imm);
580
switch (op) {
581
- case 2: /* VRSHR */
582
- case 3: /* VRSRA */
583
- GEN_NEON_INTEGER_OP(rshl);
584
- break;
585
case 6: /* VQSHLU */
586
switch (size) {
587
case 0:
588
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
589
g_assert_not_reached();
590
}
591
tcg_temp_free_i32(tmp2);
592
-
593
- if (op == 3) {
594
- /* Accumulate. */
595
- tmp2 = neon_load_reg(rd, pass);
596
- gen_neon_add(size, tmp, tmp2);
597
- tcg_temp_free_i32(tmp2);
598
- }
599
neon_store_reg(rd, pass, tmp);
600
}
601
} /* for pass */
602
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
603
index XXXXXXX..XXXXXXX 100644
241
index XXXXXXX..XXXXXXX 100644
604
--- a/target/arm/vec_helper.c
242
--- a/tests/tcg/aarch64/Makefile.target
605
+++ b/target/arm/vec_helper.c
243
+++ b/tests/tcg/aarch64/Makefile.target
606
@@ -XXX,XX +XXX,XX @@ DO_SRA(gvec_usra_d, uint64_t)
244
@@ -XXX,XX +XXX,XX @@ endif
607
245
608
#undef DO_SRA
246
# SME Tests
609
247
ifneq ($(CROSS_AS_HAS_ARMV9_SME),)
610
+#define DO_RSHR(NAME, TYPE) \
248
-AARCH64_TESTS += sme-outprod1
611
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
249
+AARCH64_TESTS += sme-outprod1 sme-smopa-1 sme-smopa-2
612
+{ \
250
endif
613
+ intptr_t i, oprsz = simd_oprsz(desc); \
251
614
+ int shift = simd_data(desc); \
252
# System Registers Tests
615
+ TYPE *d = vd, *n = vn; \
616
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
617
+ TYPE tmp = n[i] >> (shift - 1); \
618
+ d[i] = (tmp >> 1) + (tmp & 1); \
619
+ } \
620
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
621
+}
622
+
623
+DO_RSHR(gvec_srshr_b, int8_t)
624
+DO_RSHR(gvec_srshr_h, int16_t)
625
+DO_RSHR(gvec_srshr_s, int32_t)
626
+DO_RSHR(gvec_srshr_d, int64_t)
627
+
628
+DO_RSHR(gvec_urshr_b, uint8_t)
629
+DO_RSHR(gvec_urshr_h, uint16_t)
630
+DO_RSHR(gvec_urshr_s, uint32_t)
631
+DO_RSHR(gvec_urshr_d, uint64_t)
632
+
633
+#undef DO_RSHR
634
+
635
+#define DO_RSRA(NAME, TYPE) \
636
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
637
+{ \
638
+ intptr_t i, oprsz = simd_oprsz(desc); \
639
+ int shift = simd_data(desc); \
640
+ TYPE *d = vd, *n = vn; \
641
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
642
+ TYPE tmp = n[i] >> (shift - 1); \
643
+ d[i] += (tmp >> 1) + (tmp & 1); \
644
+ } \
645
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
646
+}
647
+
648
+DO_RSRA(gvec_srsra_b, int8_t)
649
+DO_RSRA(gvec_srsra_h, int16_t)
650
+DO_RSRA(gvec_srsra_s, int32_t)
651
+DO_RSRA(gvec_srsra_d, int64_t)
652
+
653
+DO_RSRA(gvec_ursra_b, uint8_t)
654
+DO_RSRA(gvec_ursra_h, uint16_t)
655
+DO_RSRA(gvec_ursra_s, uint32_t)
656
+DO_RSRA(gvec_ursra_d, uint64_t)
657
+
658
+#undef DO_RSRA
659
+
660
/*
661
* Convert float16 to float32, raising no exceptions and
662
* preserving exceptional values, including SNaN.
663
--
253
--
664
2.20.1
254
2.34.1
665
255
666
256
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
The functions eliminate duplication of the special cases for
4
this operation. They match up with the GVecGen2iFn typedef.
5
6
Add out-of-line helpers. We got away with only having inline
7
expanders because the neon vector size is only 16 bytes, and
8
we know that the inline expansion will always succeed.
9
When we reuse this for SVE, tcg-gvec-op may decide to use an
10
out-of-line helper due to longer vector lengths.
11
12
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
Message-id: 20200513163245.17915-4-richard.henderson@linaro.org
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
---
17
target/arm/helper.h | 10 ++
18
target/arm/translate.h | 7 +-
19
target/arm/translate-a64.c | 20 +---
20
target/arm/translate.c | 186 +++++++++++++++++++++----------------
21
target/arm/vec_helper.c | 38 ++++++++
22
5 files changed, 160 insertions(+), 101 deletions(-)
23
24
diff --git a/target/arm/helper.h b/target/arm/helper.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/helper.h
27
+++ b/target/arm/helper.h
28
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(gvec_ursra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
29
DEF_HELPER_FLAGS_3(gvec_ursra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
30
DEF_HELPER_FLAGS_3(gvec_ursra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
31
32
+DEF_HELPER_FLAGS_3(gvec_sri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
33
+DEF_HELPER_FLAGS_3(gvec_sri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_3(gvec_sri_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
35
+DEF_HELPER_FLAGS_3(gvec_sri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
36
+
37
+DEF_HELPER_FLAGS_3(gvec_sli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_3(gvec_sli_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
39
+DEF_HELPER_FLAGS_3(gvec_sli_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_3(gvec_sli_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
41
+
42
#ifdef TARGET_AARCH64
43
#include "helper-a64.h"
44
#include "helper-sve.h"
45
diff --git a/target/arm/translate.h b/target/arm/translate.h
46
index XXXXXXX..XXXXXXX 100644
47
--- a/target/arm/translate.h
48
+++ b/target/arm/translate.h
49
@@ -XXX,XX +XXX,XX @@ extern const GVecGen3 mls_op[4];
50
extern const GVecGen3 cmtst_op[4];
51
extern const GVecGen3 sshl_op[4];
52
extern const GVecGen3 ushl_op[4];
53
-extern const GVecGen2i sri_op[4];
54
-extern const GVecGen2i sli_op[4];
55
extern const GVecGen4 uqadd_op[4];
56
extern const GVecGen4 sqadd_op[4];
57
extern const GVecGen4 uqsub_op[4];
58
@@ -XXX,XX +XXX,XX @@ void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
59
void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
60
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
61
62
+void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
63
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
64
+void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
65
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
66
+
67
/*
68
* Forward to the isar_feature_* tests given a DisasContext pointer.
69
*/
70
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
71
index XXXXXXX..XXXXXXX 100644
72
--- a/target/arm/translate-a64.c
73
+++ b/target/arm/translate-a64.c
74
@@ -XXX,XX +XXX,XX @@ static void gen_gvec_op2(DisasContext *s, bool is_q, int rd,
75
is_q ? 16 : 8, vec_full_reg_size(s), gvec_op);
76
}
77
78
-/* Expand a 2-operand + immediate AdvSIMD vector operation using
79
- * an op descriptor.
80
- */
81
-static void gen_gvec_op2i(DisasContext *s, bool is_q, int rd,
82
- int rn, int64_t imm, const GVecGen2i *gvec_op)
83
-{
84
- tcg_gen_gvec_2i(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
85
- is_q ? 16 : 8, vec_full_reg_size(s), imm, gvec_op);
86
-}
87
-
88
/* Expand a 3-operand AdvSIMD vector operation using an op descriptor. */
89
static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
90
int rn, int rm, const GVecGen3 *gvec_op)
91
@@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
92
gen_gvec_fn2i(s, is_q, rd, rn, shift,
93
is_u ? gen_gvec_usra : gen_gvec_ssra, size);
94
return;
95
+
96
case 0x08: /* SRI */
97
- /* Shift count same as element size is valid but does nothing. */
98
- if (shift == 8 << size) {
99
- goto done;
100
- }
101
- gen_gvec_op2i(s, is_q, rd, rn, shift, &sri_op[size]);
102
+ gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sri, size);
103
return;
104
105
case 0x00: /* SSHR / USHR */
106
@@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
107
}
108
tcg_temp_free_i64(tcg_round);
109
110
- done:
111
clear_vec_high(s, is_q, rd);
112
}
113
114
@@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
115
}
116
117
if (insert) {
118
- gen_gvec_op2i(s, is_q, rd, rn, shift, &sli_op[size]);
119
+ gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
120
} else {
121
gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
122
}
123
diff --git a/target/arm/translate.c b/target/arm/translate.c
124
index XXXXXXX..XXXXXXX 100644
125
--- a/target/arm/translate.c
126
+++ b/target/arm/translate.c
127
@@ -XXX,XX +XXX,XX @@ static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
128
129
static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
130
{
131
- if (sh == 0) {
132
- tcg_gen_mov_vec(d, a);
133
- } else {
134
- TCGv_vec t = tcg_temp_new_vec_matching(d);
135
- TCGv_vec m = tcg_temp_new_vec_matching(d);
136
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
137
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
138
139
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
140
- tcg_gen_shri_vec(vece, t, a, sh);
141
- tcg_gen_and_vec(vece, d, d, m);
142
- tcg_gen_or_vec(vece, d, d, t);
143
+ tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
144
+ tcg_gen_shri_vec(vece, t, a, sh);
145
+ tcg_gen_and_vec(vece, d, d, m);
146
+ tcg_gen_or_vec(vece, d, d, t);
147
148
- tcg_temp_free_vec(t);
149
- tcg_temp_free_vec(m);
150
- }
151
+ tcg_temp_free_vec(t);
152
+ tcg_temp_free_vec(m);
153
}
154
155
-static const TCGOpcode vecop_list_sri[] = { INDEX_op_shri_vec, 0 };
156
+void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
157
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
158
+{
159
+ static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
160
+ const GVecGen2i ops[4] = {
161
+ { .fni8 = gen_shr8_ins_i64,
162
+ .fniv = gen_shr_ins_vec,
163
+ .fno = gen_helper_gvec_sri_b,
164
+ .load_dest = true,
165
+ .opt_opc = vecop_list,
166
+ .vece = MO_8 },
167
+ { .fni8 = gen_shr16_ins_i64,
168
+ .fniv = gen_shr_ins_vec,
169
+ .fno = gen_helper_gvec_sri_h,
170
+ .load_dest = true,
171
+ .opt_opc = vecop_list,
172
+ .vece = MO_16 },
173
+ { .fni4 = gen_shr32_ins_i32,
174
+ .fniv = gen_shr_ins_vec,
175
+ .fno = gen_helper_gvec_sri_s,
176
+ .load_dest = true,
177
+ .opt_opc = vecop_list,
178
+ .vece = MO_32 },
179
+ { .fni8 = gen_shr64_ins_i64,
180
+ .fniv = gen_shr_ins_vec,
181
+ .fno = gen_helper_gvec_sri_d,
182
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
183
+ .load_dest = true,
184
+ .opt_opc = vecop_list,
185
+ .vece = MO_64 },
186
+ };
187
188
-const GVecGen2i sri_op[4] = {
189
- { .fni8 = gen_shr8_ins_i64,
190
- .fniv = gen_shr_ins_vec,
191
- .load_dest = true,
192
- .opt_opc = vecop_list_sri,
193
- .vece = MO_8 },
194
- { .fni8 = gen_shr16_ins_i64,
195
- .fniv = gen_shr_ins_vec,
196
- .load_dest = true,
197
- .opt_opc = vecop_list_sri,
198
- .vece = MO_16 },
199
- { .fni4 = gen_shr32_ins_i32,
200
- .fniv = gen_shr_ins_vec,
201
- .load_dest = true,
202
- .opt_opc = vecop_list_sri,
203
- .vece = MO_32 },
204
- { .fni8 = gen_shr64_ins_i64,
205
- .fniv = gen_shr_ins_vec,
206
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
207
- .load_dest = true,
208
- .opt_opc = vecop_list_sri,
209
- .vece = MO_64 },
210
-};
211
+ /* tszimm encoding produces immediates in the range [1..esize]. */
212
+ tcg_debug_assert(shift > 0);
213
+ tcg_debug_assert(shift <= (8 << vece));
214
+
215
+ /* Shift of esize leaves destination unchanged. */
216
+ if (shift < (8 << vece)) {
217
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
218
+ } else {
219
+ /* Nop, but we do need to clear the tail. */
220
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
221
+ }
222
+}
223
224
static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
225
{
226
@@ -XXX,XX +XXX,XX @@ static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
227
228
static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
229
{
230
- if (sh == 0) {
231
- tcg_gen_mov_vec(d, a);
232
- } else {
233
- TCGv_vec t = tcg_temp_new_vec_matching(d);
234
- TCGv_vec m = tcg_temp_new_vec_matching(d);
235
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
236
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
237
238
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
239
- tcg_gen_shli_vec(vece, t, a, sh);
240
- tcg_gen_and_vec(vece, d, d, m);
241
- tcg_gen_or_vec(vece, d, d, t);
242
+ tcg_gen_shli_vec(vece, t, a, sh);
243
+ tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
244
+ tcg_gen_and_vec(vece, d, d, m);
245
+ tcg_gen_or_vec(vece, d, d, t);
246
247
- tcg_temp_free_vec(t);
248
- tcg_temp_free_vec(m);
249
- }
250
+ tcg_temp_free_vec(t);
251
+ tcg_temp_free_vec(m);
252
}
253
254
-static const TCGOpcode vecop_list_sli[] = { INDEX_op_shli_vec, 0 };
255
+void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
256
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
257
+{
258
+ static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
259
+ const GVecGen2i ops[4] = {
260
+ { .fni8 = gen_shl8_ins_i64,
261
+ .fniv = gen_shl_ins_vec,
262
+ .fno = gen_helper_gvec_sli_b,
263
+ .load_dest = true,
264
+ .opt_opc = vecop_list,
265
+ .vece = MO_8 },
266
+ { .fni8 = gen_shl16_ins_i64,
267
+ .fniv = gen_shl_ins_vec,
268
+ .fno = gen_helper_gvec_sli_h,
269
+ .load_dest = true,
270
+ .opt_opc = vecop_list,
271
+ .vece = MO_16 },
272
+ { .fni4 = gen_shl32_ins_i32,
273
+ .fniv = gen_shl_ins_vec,
274
+ .fno = gen_helper_gvec_sli_s,
275
+ .load_dest = true,
276
+ .opt_opc = vecop_list,
277
+ .vece = MO_32 },
278
+ { .fni8 = gen_shl64_ins_i64,
279
+ .fniv = gen_shl_ins_vec,
280
+ .fno = gen_helper_gvec_sli_d,
281
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
282
+ .load_dest = true,
283
+ .opt_opc = vecop_list,
284
+ .vece = MO_64 },
285
+ };
286
287
-const GVecGen2i sli_op[4] = {
288
- { .fni8 = gen_shl8_ins_i64,
289
- .fniv = gen_shl_ins_vec,
290
- .load_dest = true,
291
- .opt_opc = vecop_list_sli,
292
- .vece = MO_8 },
293
- { .fni8 = gen_shl16_ins_i64,
294
- .fniv = gen_shl_ins_vec,
295
- .load_dest = true,
296
- .opt_opc = vecop_list_sli,
297
- .vece = MO_16 },
298
- { .fni4 = gen_shl32_ins_i32,
299
- .fniv = gen_shl_ins_vec,
300
- .load_dest = true,
301
- .opt_opc = vecop_list_sli,
302
- .vece = MO_32 },
303
- { .fni8 = gen_shl64_ins_i64,
304
- .fniv = gen_shl_ins_vec,
305
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
306
- .load_dest = true,
307
- .opt_opc = vecop_list_sli,
308
- .vece = MO_64 },
309
-};
310
+ /* tszimm encoding produces immediates in the range [0..esize-1]. */
311
+ tcg_debug_assert(shift >= 0);
312
+ tcg_debug_assert(shift < (8 << vece));
313
+
314
+ if (shift == 0) {
315
+ tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
316
+ } else {
317
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
318
+ }
319
+}
320
321
static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
322
{
323
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
324
}
325
/* Right shift comes here negative. */
326
shift = -shift;
327
- /* Shift out of range leaves destination unchanged. */
328
- if (shift < 8 << size) {
329
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
330
- shift, &sri_op[size]);
331
- }
332
+ gen_gvec_sri(size, rd_ofs, rm_ofs, shift,
333
+ vec_size, vec_size);
334
return 0;
335
336
case 5: /* VSHL, VSLI */
337
if (u) { /* VSLI */
338
- /* Shift out of range leaves destination unchanged. */
339
- if (shift < 8 << size) {
340
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size,
341
- vec_size, shift, &sli_op[size]);
342
- }
343
+ gen_gvec_sli(size, rd_ofs, rm_ofs, shift,
344
+ vec_size, vec_size);
345
} else { /* VSHL */
346
/* Shifts larger than the element size are
347
* architecturally valid and results in zero.
348
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
349
index XXXXXXX..XXXXXXX 100644
350
--- a/target/arm/vec_helper.c
351
+++ b/target/arm/vec_helper.c
352
@@ -XXX,XX +XXX,XX @@ DO_RSRA(gvec_ursra_d, uint64_t)
353
354
#undef DO_RSRA
355
356
+#define DO_SRI(NAME, TYPE) \
357
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
358
+{ \
359
+ intptr_t i, oprsz = simd_oprsz(desc); \
360
+ int shift = simd_data(desc); \
361
+ TYPE *d = vd, *n = vn; \
362
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
363
+ d[i] = deposit64(d[i], 0, sizeof(TYPE) * 8 - shift, n[i] >> shift); \
364
+ } \
365
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
366
+}
367
+
368
+DO_SRI(gvec_sri_b, uint8_t)
369
+DO_SRI(gvec_sri_h, uint16_t)
370
+DO_SRI(gvec_sri_s, uint32_t)
371
+DO_SRI(gvec_sri_d, uint64_t)
372
+
373
+#undef DO_SRI
374
+
375
+#define DO_SLI(NAME, TYPE) \
376
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
377
+{ \
378
+ intptr_t i, oprsz = simd_oprsz(desc); \
379
+ int shift = simd_data(desc); \
380
+ TYPE *d = vd, *n = vn; \
381
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
382
+ d[i] = deposit64(d[i], shift, sizeof(TYPE) * 8 - shift, n[i]); \
383
+ } \
384
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
385
+}
386
+
387
+DO_SLI(gvec_sli_b, uint8_t)
388
+DO_SLI(gvec_sli_h, uint16_t)
389
+DO_SLI(gvec_sli_s, uint32_t)
390
+DO_SLI(gvec_sli_d, uint64_t)
391
+
392
+#undef DO_SLI
393
+
394
/*
395
* Convert float16 to float32, raising no exceptions and
396
* preserving exceptional values, including SNaN.
397
--
398
2.20.1
399
400
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
In 1dc8425e551, while converting to gvec, I added an extra range check
4
against the shift count. This was unnecessary because the encoding of
5
the shift count produces 0 to the element size - 1.
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20200513163245.17915-5-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
target/arm/translate.c | 12 ++----------
13
1 file changed, 2 insertions(+), 10 deletions(-)
14
15
diff --git a/target/arm/translate.c b/target/arm/translate.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/translate.c
18
+++ b/target/arm/translate.c
19
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
20
gen_gvec_sli(size, rd_ofs, rm_ofs, shift,
21
vec_size, vec_size);
22
} else { /* VSHL */
23
- /* Shifts larger than the element size are
24
- * architecturally valid and results in zero.
25
- */
26
- if (shift >= 8 << size) {
27
- tcg_gen_gvec_dup_imm(size, rd_ofs,
28
- vec_size, vec_size, 0);
29
- } else {
30
- tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
31
- vec_size, vec_size);
32
- }
33
+ tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
34
+ vec_size, vec_size);
35
}
36
return 0;
37
}
38
--
39
2.20.1
40
41
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
Now that we've converted all cases to gvec, there is quite a bit
4
of dead code at the end of the function. Remove it.
5
6
Sink the call to gen_gvec_fn2i to the end, loading a function
7
pointer within the switch statement.
8
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20200513163245.17915-6-richard.henderson@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
14
target/arm/translate-a64.c | 56 ++++++++++----------------------------
15
1 file changed, 14 insertions(+), 42 deletions(-)
16
17
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/translate-a64.c
20
+++ b/target/arm/translate-a64.c
21
@@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
22
int size = 32 - clz32(immh) - 1;
23
int immhb = immh << 3 | immb;
24
int shift = 2 * (8 << size) - immhb;
25
- bool accumulate = false;
26
- int dsize = is_q ? 128 : 64;
27
- int esize = 8 << size;
28
- int elements = dsize/esize;
29
- MemOp memop = size | (is_u ? 0 : MO_SIGN);
30
- TCGv_i64 tcg_rn = new_tmp_a64(s);
31
- TCGv_i64 tcg_rd = new_tmp_a64(s);
32
- TCGv_i64 tcg_round;
33
- uint64_t round_const;
34
- int i;
35
+ GVecGen2iFn *gvec_fn;
36
37
if (extract32(immh, 3, 1) && !is_q) {
38
unallocated_encoding(s);
39
@@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
40
41
switch (opcode) {
42
case 0x02: /* SSRA / USRA (accumulate) */
43
- gen_gvec_fn2i(s, is_q, rd, rn, shift,
44
- is_u ? gen_gvec_usra : gen_gvec_ssra, size);
45
- return;
46
+ gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
47
+ break;
48
49
case 0x08: /* SRI */
50
- gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sri, size);
51
- return;
52
+ gvec_fn = gen_gvec_sri;
53
+ break;
54
55
case 0x00: /* SSHR / USHR */
56
if (is_u) {
57
@@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
58
/* Shift count the same size as element size produces zero. */
59
tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
60
is_q ? 16 : 8, vec_full_reg_size(s), 0);
61
- } else {
62
- gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shri, size);
63
+ return;
64
}
65
+ gvec_fn = tcg_gen_gvec_shri;
66
} else {
67
/* Shift count the same size as element size produces all sign. */
68
if (shift == 8 << size) {
69
shift -= 1;
70
}
71
- gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_sari, size);
72
+ gvec_fn = tcg_gen_gvec_sari;
73
}
74
- return;
75
+ break;
76
77
case 0x04: /* SRSHR / URSHR (rounding) */
78
- gen_gvec_fn2i(s, is_q, rd, rn, shift,
79
- is_u ? gen_gvec_urshr : gen_gvec_srshr, size);
80
- return;
81
+ gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
82
+ break;
83
84
case 0x06: /* SRSRA / URSRA (accum + rounding) */
85
- gen_gvec_fn2i(s, is_q, rd, rn, shift,
86
- is_u ? gen_gvec_ursra : gen_gvec_srsra, size);
87
- return;
88
+ gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
89
+ break;
90
91
default:
92
g_assert_not_reached();
93
}
94
95
- round_const = 1ULL << (shift - 1);
96
- tcg_round = tcg_const_i64(round_const);
97
-
98
- for (i = 0; i < elements; i++) {
99
- read_vec_element(s, tcg_rn, rn, i, memop);
100
- if (accumulate) {
101
- read_vec_element(s, tcg_rd, rd, i, memop);
102
- }
103
-
104
- handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
105
- accumulate, is_u, size, shift);
106
-
107
- write_vec_element(s, tcg_rd, rd, i, size);
108
- }
109
- tcg_temp_free_i64(tcg_round);
110
-
111
- clear_vec_high(s, is_q, rd);
112
+ gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
113
}
114
115
/* SHL/SLI - Vector shift left */
116
--
117
2.20.1
118
119
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
Provide a functional interface for the vector expansion.
4
This fits better with the existing set of helpers that
5
we provide for other operations.
6
7
Macro-ize the 5 nearly identical comparisons.
8
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20200513163245.17915-7-richard.henderson@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
14
target/arm/translate.h | 16 ++-
15
target/arm/translate-a64.c | 22 ++--
16
target/arm/translate.c | 254 ++++++++-----------------------------
17
3 files changed, 74 insertions(+), 218 deletions(-)
18
19
diff --git a/target/arm/translate.h b/target/arm/translate.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/translate.h
22
+++ b/target/arm/translate.h
23
@@ -XXX,XX +XXX,XX @@ static inline void gen_swstep_exception(DisasContext *s, int isv, int ex)
24
uint64_t vfp_expand_imm(int size, uint8_t imm8);
25
26
/* Vector operations shared between ARM and AArch64. */
27
-extern const GVecGen2 ceq0_op[4];
28
-extern const GVecGen2 clt0_op[4];
29
-extern const GVecGen2 cgt0_op[4];
30
-extern const GVecGen2 cle0_op[4];
31
-extern const GVecGen2 cge0_op[4];
32
+void gen_gvec_ceq0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
33
+ uint32_t opr_sz, uint32_t max_sz);
34
+void gen_gvec_clt0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
35
+ uint32_t opr_sz, uint32_t max_sz);
36
+void gen_gvec_cgt0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
37
+ uint32_t opr_sz, uint32_t max_sz);
38
+void gen_gvec_cle0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
39
+ uint32_t opr_sz, uint32_t max_sz);
40
+void gen_gvec_cge0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
41
+ uint32_t opr_sz, uint32_t max_sz);
42
+
43
extern const GVecGen3 mla_op[4];
44
extern const GVecGen3 mls_op[4];
45
extern const GVecGen3 cmtst_op[4];
46
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/translate-a64.c
49
+++ b/target/arm/translate-a64.c
50
@@ -XXX,XX +XXX,XX @@ static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
51
is_q ? 16 : 8, vec_full_reg_size(s));
52
}
53
54
-/* Expand a 2-operand AdvSIMD vector operation using an op descriptor. */
55
-static void gen_gvec_op2(DisasContext *s, bool is_q, int rd,
56
- int rn, const GVecGen2 *gvec_op)
57
-{
58
- tcg_gen_gvec_2(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
59
- is_q ? 16 : 8, vec_full_reg_size(s), gvec_op);
60
-}
61
-
62
/* Expand a 3-operand AdvSIMD vector operation using an op descriptor. */
63
static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
64
int rn, int rm, const GVecGen3 *gvec_op)
65
@@ -XXX,XX +XXX,XX @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
66
}
67
break;
68
case 0x8: /* CMGT, CMGE */
69
- gen_gvec_op2(s, is_q, rd, rn, u ? &cge0_op[size] : &cgt0_op[size]);
70
+ if (u) {
71
+ gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
72
+ } else {
73
+ gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
74
+ }
75
return;
76
case 0x9: /* CMEQ, CMLE */
77
- gen_gvec_op2(s, is_q, rd, rn, u ? &cle0_op[size] : &ceq0_op[size]);
78
+ if (u) {
79
+ gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
80
+ } else {
81
+ gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
82
+ }
83
return;
84
case 0xa: /* CMLT */
85
- gen_gvec_op2(s, is_q, rd, rn, &clt0_op[size]);
86
+ gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
87
return;
88
case 0xb:
89
if (u) { /* ABS, NEG */
90
diff --git a/target/arm/translate.c b/target/arm/translate.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/arm/translate.c
93
+++ b/target/arm/translate.c
94
@@ -XXX,XX +XXX,XX @@ static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
95
return 1;
96
}
97
98
-static void gen_ceq0_i32(TCGv_i32 d, TCGv_i32 a)
99
-{
100
- tcg_gen_setcondi_i32(TCG_COND_EQ, d, a, 0);
101
- tcg_gen_neg_i32(d, d);
102
-}
103
-
104
-static void gen_ceq0_i64(TCGv_i64 d, TCGv_i64 a)
105
-{
106
- tcg_gen_setcondi_i64(TCG_COND_EQ, d, a, 0);
107
- tcg_gen_neg_i64(d, d);
108
-}
109
-
110
-static void gen_ceq0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
111
-{
112
- TCGv_vec zero = tcg_const_zeros_vec_matching(d);
113
- tcg_gen_cmp_vec(TCG_COND_EQ, vece, d, a, zero);
114
- tcg_temp_free_vec(zero);
115
-}
116
+#define GEN_CMP0(NAME, COND) \
117
+ static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a) \
118
+ { \
119
+ tcg_gen_setcondi_i32(COND, d, a, 0); \
120
+ tcg_gen_neg_i32(d, d); \
121
+ } \
122
+ static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a) \
123
+ { \
124
+ tcg_gen_setcondi_i64(COND, d, a, 0); \
125
+ tcg_gen_neg_i64(d, d); \
126
+ } \
127
+ static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
128
+ { \
129
+ TCGv_vec zero = tcg_const_zeros_vec_matching(d); \
130
+ tcg_gen_cmp_vec(COND, vece, d, a, zero); \
131
+ tcg_temp_free_vec(zero); \
132
+ } \
133
+ void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m, \
134
+ uint32_t opr_sz, uint32_t max_sz) \
135
+ { \
136
+ const GVecGen2 op[4] = { \
137
+ { .fno = gen_helper_gvec_##NAME##0_b, \
138
+ .fniv = gen_##NAME##0_vec, \
139
+ .opt_opc = vecop_list_cmp, \
140
+ .vece = MO_8 }, \
141
+ { .fno = gen_helper_gvec_##NAME##0_h, \
142
+ .fniv = gen_##NAME##0_vec, \
143
+ .opt_opc = vecop_list_cmp, \
144
+ .vece = MO_16 }, \
145
+ { .fni4 = gen_##NAME##0_i32, \
146
+ .fniv = gen_##NAME##0_vec, \
147
+ .opt_opc = vecop_list_cmp, \
148
+ .vece = MO_32 }, \
149
+ { .fni8 = gen_##NAME##0_i64, \
150
+ .fniv = gen_##NAME##0_vec, \
151
+ .opt_opc = vecop_list_cmp, \
152
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64, \
153
+ .vece = MO_64 }, \
154
+ }; \
155
+ tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]); \
156
+ }
157
158
static const TCGOpcode vecop_list_cmp[] = {
159
INDEX_op_cmp_vec, 0
160
};
161
162
-const GVecGen2 ceq0_op[4] = {
163
- { .fno = gen_helper_gvec_ceq0_b,
164
- .fniv = gen_ceq0_vec,
165
- .opt_opc = vecop_list_cmp,
166
- .vece = MO_8 },
167
- { .fno = gen_helper_gvec_ceq0_h,
168
- .fniv = gen_ceq0_vec,
169
- .opt_opc = vecop_list_cmp,
170
- .vece = MO_16 },
171
- { .fni4 = gen_ceq0_i32,
172
- .fniv = gen_ceq0_vec,
173
- .opt_opc = vecop_list_cmp,
174
- .vece = MO_32 },
175
- { .fni8 = gen_ceq0_i64,
176
- .fniv = gen_ceq0_vec,
177
- .opt_opc = vecop_list_cmp,
178
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
179
- .vece = MO_64 },
180
-};
181
+GEN_CMP0(ceq, TCG_COND_EQ)
182
+GEN_CMP0(cle, TCG_COND_LE)
183
+GEN_CMP0(cge, TCG_COND_GE)
184
+GEN_CMP0(clt, TCG_COND_LT)
185
+GEN_CMP0(cgt, TCG_COND_GT)
186
187
-static void gen_cle0_i32(TCGv_i32 d, TCGv_i32 a)
188
-{
189
- tcg_gen_setcondi_i32(TCG_COND_LE, d, a, 0);
190
- tcg_gen_neg_i32(d, d);
191
-}
192
-
193
-static void gen_cle0_i64(TCGv_i64 d, TCGv_i64 a)
194
-{
195
- tcg_gen_setcondi_i64(TCG_COND_LE, d, a, 0);
196
- tcg_gen_neg_i64(d, d);
197
-}
198
-
199
-static void gen_cle0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
200
-{
201
- TCGv_vec zero = tcg_const_zeros_vec_matching(d);
202
- tcg_gen_cmp_vec(TCG_COND_LE, vece, d, a, zero);
203
- tcg_temp_free_vec(zero);
204
-}
205
-
206
-const GVecGen2 cle0_op[4] = {
207
- { .fno = gen_helper_gvec_cle0_b,
208
- .fniv = gen_cle0_vec,
209
- .opt_opc = vecop_list_cmp,
210
- .vece = MO_8 },
211
- { .fno = gen_helper_gvec_cle0_h,
212
- .fniv = gen_cle0_vec,
213
- .opt_opc = vecop_list_cmp,
214
- .vece = MO_16 },
215
- { .fni4 = gen_cle0_i32,
216
- .fniv = gen_cle0_vec,
217
- .opt_opc = vecop_list_cmp,
218
- .vece = MO_32 },
219
- { .fni8 = gen_cle0_i64,
220
- .fniv = gen_cle0_vec,
221
- .opt_opc = vecop_list_cmp,
222
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
223
- .vece = MO_64 },
224
-};
225
-
226
-static void gen_cge0_i32(TCGv_i32 d, TCGv_i32 a)
227
-{
228
- tcg_gen_setcondi_i32(TCG_COND_GE, d, a, 0);
229
- tcg_gen_neg_i32(d, d);
230
-}
231
-
232
-static void gen_cge0_i64(TCGv_i64 d, TCGv_i64 a)
233
-{
234
- tcg_gen_setcondi_i64(TCG_COND_GE, d, a, 0);
235
- tcg_gen_neg_i64(d, d);
236
-}
237
-
238
-static void gen_cge0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
239
-{
240
- TCGv_vec zero = tcg_const_zeros_vec_matching(d);
241
- tcg_gen_cmp_vec(TCG_COND_GE, vece, d, a, zero);
242
- tcg_temp_free_vec(zero);
243
-}
244
-
245
-const GVecGen2 cge0_op[4] = {
246
- { .fno = gen_helper_gvec_cge0_b,
247
- .fniv = gen_cge0_vec,
248
- .opt_opc = vecop_list_cmp,
249
- .vece = MO_8 },
250
- { .fno = gen_helper_gvec_cge0_h,
251
- .fniv = gen_cge0_vec,
252
- .opt_opc = vecop_list_cmp,
253
- .vece = MO_16 },
254
- { .fni4 = gen_cge0_i32,
255
- .fniv = gen_cge0_vec,
256
- .opt_opc = vecop_list_cmp,
257
- .vece = MO_32 },
258
- { .fni8 = gen_cge0_i64,
259
- .fniv = gen_cge0_vec,
260
- .opt_opc = vecop_list_cmp,
261
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
262
- .vece = MO_64 },
263
-};
264
-
265
-static void gen_clt0_i32(TCGv_i32 d, TCGv_i32 a)
266
-{
267
- tcg_gen_setcondi_i32(TCG_COND_LT, d, a, 0);
268
- tcg_gen_neg_i32(d, d);
269
-}
270
-
271
-static void gen_clt0_i64(TCGv_i64 d, TCGv_i64 a)
272
-{
273
- tcg_gen_setcondi_i64(TCG_COND_LT, d, a, 0);
274
- tcg_gen_neg_i64(d, d);
275
-}
276
-
277
-static void gen_clt0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
278
-{
279
- TCGv_vec zero = tcg_const_zeros_vec_matching(d);
280
- tcg_gen_cmp_vec(TCG_COND_LT, vece, d, a, zero);
281
- tcg_temp_free_vec(zero);
282
-}
283
-
284
-const GVecGen2 clt0_op[4] = {
285
- { .fno = gen_helper_gvec_clt0_b,
286
- .fniv = gen_clt0_vec,
287
- .opt_opc = vecop_list_cmp,
288
- .vece = MO_8 },
289
- { .fno = gen_helper_gvec_clt0_h,
290
- .fniv = gen_clt0_vec,
291
- .opt_opc = vecop_list_cmp,
292
- .vece = MO_16 },
293
- { .fni4 = gen_clt0_i32,
294
- .fniv = gen_clt0_vec,
295
- .opt_opc = vecop_list_cmp,
296
- .vece = MO_32 },
297
- { .fni8 = gen_clt0_i64,
298
- .fniv = gen_clt0_vec,
299
- .opt_opc = vecop_list_cmp,
300
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
301
- .vece = MO_64 },
302
-};
303
-
304
-static void gen_cgt0_i32(TCGv_i32 d, TCGv_i32 a)
305
-{
306
- tcg_gen_setcondi_i32(TCG_COND_GT, d, a, 0);
307
- tcg_gen_neg_i32(d, d);
308
-}
309
-
310
-static void gen_cgt0_i64(TCGv_i64 d, TCGv_i64 a)
311
-{
312
- tcg_gen_setcondi_i64(TCG_COND_GT, d, a, 0);
313
- tcg_gen_neg_i64(d, d);
314
-}
315
-
316
-static void gen_cgt0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
317
-{
318
- TCGv_vec zero = tcg_const_zeros_vec_matching(d);
319
- tcg_gen_cmp_vec(TCG_COND_GT, vece, d, a, zero);
320
- tcg_temp_free_vec(zero);
321
-}
322
-
323
-const GVecGen2 cgt0_op[4] = {
324
- { .fno = gen_helper_gvec_cgt0_b,
325
- .fniv = gen_cgt0_vec,
326
- .opt_opc = vecop_list_cmp,
327
- .vece = MO_8 },
328
- { .fno = gen_helper_gvec_cgt0_h,
329
- .fniv = gen_cgt0_vec,
330
- .opt_opc = vecop_list_cmp,
331
- .vece = MO_16 },
332
- { .fni4 = gen_cgt0_i32,
333
- .fniv = gen_cgt0_vec,
334
- .opt_opc = vecop_list_cmp,
335
- .vece = MO_32 },
336
- { .fni8 = gen_cgt0_i64,
337
- .fniv = gen_cgt0_vec,
338
- .opt_opc = vecop_list_cmp,
339
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
340
- .vece = MO_64 },
341
-};
342
+#undef GEN_CMP0
343
344
static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
345
{
346
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
347
break;
348
349
case NEON_2RM_VCEQ0:
350
- tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
351
- vec_size, &ceq0_op[size]);
352
+ gen_gvec_ceq0(size, rd_ofs, rm_ofs, vec_size, vec_size);
353
break;
354
case NEON_2RM_VCGT0:
355
- tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
356
- vec_size, &cgt0_op[size]);
357
+ gen_gvec_cgt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
358
break;
359
case NEON_2RM_VCLE0:
360
- tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
361
- vec_size, &cle0_op[size]);
362
+ gen_gvec_cle0(size, rd_ofs, rm_ofs, vec_size, vec_size);
363
break;
364
case NEON_2RM_VCGE0:
365
- tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
366
- vec_size, &cge0_op[size]);
367
+ gen_gvec_cge0(size, rd_ofs, rm_ofs, vec_size, vec_size);
368
break;
369
case NEON_2RM_VCLT0:
370
- tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
371
- vec_size, &clt0_op[size]);
372
+ gen_gvec_clt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
373
break;
374
375
default:
376
--
377
2.20.1
378
379
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
Provide a functional interface for the vector expansion.
4
This fits better with the existing set of helpers that
5
we provide for other operations.
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20200513163245.17915-8-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
target/arm/translate.h | 7 +-
13
target/arm/translate-a64.c | 4 +-
14
target/arm/translate-neon.inc.c | 16 +----
15
target/arm/translate.c | 117 +++++++++++++++++---------------
16
4 files changed, 71 insertions(+), 73 deletions(-)
17
18
diff --git a/target/arm/translate.h b/target/arm/translate.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/translate.h
21
+++ b/target/arm/translate.h
22
@@ -XXX,XX +XXX,XX @@ void gen_gvec_cle0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
23
void gen_gvec_cge0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
24
uint32_t opr_sz, uint32_t max_sz);
25
26
-extern const GVecGen3 mla_op[4];
27
-extern const GVecGen3 mls_op[4];
28
+void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
29
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
30
+void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
31
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
32
+
33
extern const GVecGen3 cmtst_op[4];
34
extern const GVecGen3 sshl_op[4];
35
extern const GVecGen3 ushl_op[4];
36
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/translate-a64.c
39
+++ b/target/arm/translate-a64.c
40
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
41
return;
42
case 0x12: /* MLA, MLS */
43
if (u) {
44
- gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
45
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size);
46
} else {
47
- gen_gvec_op3(s, is_q, rd, rn, rm, &mla_op[size]);
48
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
49
}
50
return;
51
case 0x11:
52
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/target/arm/translate-neon.inc.c
55
+++ b/target/arm/translate-neon.inc.c
56
@@ -XXX,XX +XXX,XX @@ DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
57
DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
58
DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
59
DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
60
+DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
61
+DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
62
63
#define DO_3SAME_CMP(INSN, COND) \
64
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
65
@@ -XXX,XX +XXX,XX @@ static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
66
return do_3same(s, a, gen_VMUL_p_3s);
67
}
68
69
-#define DO_3SAME_GVEC3_NO_SZ_3(INSN, OPARRAY) \
70
- static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
71
- uint32_t rn_ofs, uint32_t rm_ofs, \
72
- uint32_t oprsz, uint32_t maxsz) \
73
- { \
74
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, \
75
- oprsz, maxsz, &OPARRAY[vece]); \
76
- } \
77
- DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
78
-
79
-
80
-DO_3SAME_GVEC3_NO_SZ_3(VMLA, mla_op)
81
-DO_3SAME_GVEC3_NO_SZ_3(VMLS, mls_op)
82
-
83
#define DO_3SAME_GVEC3_SHIFT(INSN, OPARRAY) \
84
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
85
uint32_t rn_ofs, uint32_t rm_ofs, \
86
diff --git a/target/arm/translate.c b/target/arm/translate.c
87
index XXXXXXX..XXXXXXX 100644
88
--- a/target/arm/translate.c
89
+++ b/target/arm/translate.c
90
@@ -XXX,XX +XXX,XX @@ static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
91
/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
92
* these tables are shared with AArch64 which does support them.
93
*/
94
+void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
95
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
96
+{
97
+ static const TCGOpcode vecop_list[] = {
98
+ INDEX_op_mul_vec, INDEX_op_add_vec, 0
99
+ };
100
+ static const GVecGen3 ops[4] = {
101
+ { .fni4 = gen_mla8_i32,
102
+ .fniv = gen_mla_vec,
103
+ .load_dest = true,
104
+ .opt_opc = vecop_list,
105
+ .vece = MO_8 },
106
+ { .fni4 = gen_mla16_i32,
107
+ .fniv = gen_mla_vec,
108
+ .load_dest = true,
109
+ .opt_opc = vecop_list,
110
+ .vece = MO_16 },
111
+ { .fni4 = gen_mla32_i32,
112
+ .fniv = gen_mla_vec,
113
+ .load_dest = true,
114
+ .opt_opc = vecop_list,
115
+ .vece = MO_32 },
116
+ { .fni8 = gen_mla64_i64,
117
+ .fniv = gen_mla_vec,
118
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
119
+ .load_dest = true,
120
+ .opt_opc = vecop_list,
121
+ .vece = MO_64 },
122
+ };
123
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
124
+}
125
126
-static const TCGOpcode vecop_list_mla[] = {
127
- INDEX_op_mul_vec, INDEX_op_add_vec, 0
128
-};
129
-
130
-static const TCGOpcode vecop_list_mls[] = {
131
- INDEX_op_mul_vec, INDEX_op_sub_vec, 0
132
-};
133
-
134
-const GVecGen3 mla_op[4] = {
135
- { .fni4 = gen_mla8_i32,
136
- .fniv = gen_mla_vec,
137
- .load_dest = true,
138
- .opt_opc = vecop_list_mla,
139
- .vece = MO_8 },
140
- { .fni4 = gen_mla16_i32,
141
- .fniv = gen_mla_vec,
142
- .load_dest = true,
143
- .opt_opc = vecop_list_mla,
144
- .vece = MO_16 },
145
- { .fni4 = gen_mla32_i32,
146
- .fniv = gen_mla_vec,
147
- .load_dest = true,
148
- .opt_opc = vecop_list_mla,
149
- .vece = MO_32 },
150
- { .fni8 = gen_mla64_i64,
151
- .fniv = gen_mla_vec,
152
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
153
- .load_dest = true,
154
- .opt_opc = vecop_list_mla,
155
- .vece = MO_64 },
156
-};
157
-
158
-const GVecGen3 mls_op[4] = {
159
- { .fni4 = gen_mls8_i32,
160
- .fniv = gen_mls_vec,
161
- .load_dest = true,
162
- .opt_opc = vecop_list_mls,
163
- .vece = MO_8 },
164
- { .fni4 = gen_mls16_i32,
165
- .fniv = gen_mls_vec,
166
- .load_dest = true,
167
- .opt_opc = vecop_list_mls,
168
- .vece = MO_16 },
169
- { .fni4 = gen_mls32_i32,
170
- .fniv = gen_mls_vec,
171
- .load_dest = true,
172
- .opt_opc = vecop_list_mls,
173
- .vece = MO_32 },
174
- { .fni8 = gen_mls64_i64,
175
- .fniv = gen_mls_vec,
176
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
177
- .load_dest = true,
178
- .opt_opc = vecop_list_mls,
179
- .vece = MO_64 },
180
-};
181
+void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
182
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
183
+{
184
+ static const TCGOpcode vecop_list[] = {
185
+ INDEX_op_mul_vec, INDEX_op_sub_vec, 0
186
+ };
187
+ static const GVecGen3 ops[4] = {
188
+ { .fni4 = gen_mls8_i32,
189
+ .fniv = gen_mls_vec,
190
+ .load_dest = true,
191
+ .opt_opc = vecop_list,
192
+ .vece = MO_8 },
193
+ { .fni4 = gen_mls16_i32,
194
+ .fniv = gen_mls_vec,
195
+ .load_dest = true,
196
+ .opt_opc = vecop_list,
197
+ .vece = MO_16 },
198
+ { .fni4 = gen_mls32_i32,
199
+ .fniv = gen_mls_vec,
200
+ .load_dest = true,
201
+ .opt_opc = vecop_list,
202
+ .vece = MO_32 },
203
+ { .fni8 = gen_mls64_i64,
204
+ .fniv = gen_mls_vec,
205
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
206
+ .load_dest = true,
207
+ .opt_opc = vecop_list,
208
+ .vece = MO_64 },
209
+ };
210
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
211
+}
212
213
/* CMTST : test is "if (X & Y != 0)". */
214
static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
215
--
216
2.20.1
217
218
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
Rather than perform the argument swap during code generation,
4
perform it during decode. This means it doesn't have to be
5
special cased later, and we can share code with aarch64 code
6
generation. Hopefully the decode comment addresses any confusion
7
that might arise in between.
8
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20200513163245.17915-9-richard.henderson@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
14
target/arm/neon-dp.decode | 17 +++++++++++++++--
15
target/arm/translate-neon.inc.c | 3 +--
16
2 files changed, 16 insertions(+), 4 deletions(-)
17
18
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
19
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/neon-dp.decode
21
+++ b/target/arm/neon-dp.decode
22
@@ -XXX,XX +XXX,XX @@ VCGT_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 0 .... @3same
23
VCGE_S_3s 1111 001 0 0 . .. .... .... 0011 . . . 1 .... @3same
24
VCGE_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 1 .... @3same
25
26
-VSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 0 .... @3same
27
-VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same
28
+# The _rev suffix indicates that Vn and Vm are reversed. This is
29
+# the case for shifts. In the Arm ARM these insns are documented
30
+# with the Vm and Vn fields in their usual places, but in the
31
+# assembly the operands are listed "backwards", ie in the order
32
+# Dd, Dm, Dn where other insns use Dd, Dn, Dm. For QEMU we choose
33
+# to consider Vm and Vn as being in different fields in the insn,
34
+# which allows us to avoid special-casing shifts in the trans_
35
+# function code. We would otherwise need to manually swap the operands
36
+# over to call Neon helper functions that are shared with AArch64,
37
+# which does not have this odd reversed-operand situation.
38
+@3same_rev .... ... . . . size:2 .... .... .... . q:1 . . .... \
39
+ &3same vn=%vm_dp vm=%vn_dp vd=%vd_dp
40
+
41
+VSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 0 .... @3same_rev
42
+VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same_rev
43
44
VMAX_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 0 .... @3same
45
VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same
46
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/translate-neon.inc.c
49
+++ b/target/arm/translate-neon.inc.c
50
@@ -XXX,XX +XXX,XX @@ static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
51
uint32_t rn_ofs, uint32_t rm_ofs, \
52
uint32_t oprsz, uint32_t maxsz) \
53
{ \
54
- /* Note the operation is vshl vd,vm,vn */ \
55
- tcg_gen_gvec_3(rd_ofs, rm_ofs, rn_ofs, \
56
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, \
57
oprsz, maxsz, &OPARRAY[vece]); \
58
} \
59
DO_3SAME(INSN, gen_##INSN##_3s)
60
--
61
2.20.1
62
63
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
Provide a functional interface for the vector expansion.
4
This fits better with the existing set of helpers that
5
we provide for other operations.
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20200513163245.17915-10-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
target/arm/translate.h | 10 ++-
13
target/arm/translate-a64.c | 18 ++--
14
target/arm/translate-neon.inc.c | 23 +----
15
target/arm/translate.c | 146 +++++++++++++++++---------------
16
4 files changed, 95 insertions(+), 102 deletions(-)
17
18
diff --git a/target/arm/translate.h b/target/arm/translate.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/translate.h
21
+++ b/target/arm/translate.h
22
@@ -XXX,XX +XXX,XX @@ void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
23
void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
24
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
25
26
-extern const GVecGen3 cmtst_op[4];
27
-extern const GVecGen3 sshl_op[4];
28
-extern const GVecGen3 ushl_op[4];
29
+void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
30
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
31
+void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
32
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
33
+void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
34
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
35
+
36
extern const GVecGen4 uqadd_op[4];
37
extern const GVecGen4 sqadd_op[4];
38
extern const GVecGen4 uqsub_op[4];
39
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/target/arm/translate-a64.c
42
+++ b/target/arm/translate-a64.c
43
@@ -XXX,XX +XXX,XX @@ static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
44
is_q ? 16 : 8, vec_full_reg_size(s));
45
}
46
47
-/* Expand a 3-operand AdvSIMD vector operation using an op descriptor. */
48
-static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
49
- int rn, int rm, const GVecGen3 *gvec_op)
50
-{
51
- tcg_gen_gvec_3(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
52
- vec_full_reg_offset(s, rm), is_q ? 16 : 8,
53
- vec_full_reg_size(s), gvec_op);
54
-}
55
-
56
/* Expand a 3-operand operation using an out-of-line helper. */
57
static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
58
int rn, int rm, int data, gen_helper_gvec_3 *fn)
59
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
60
(u ? uqsub_op : sqsub_op) + size);
61
return;
62
case 0x08: /* SSHL, USHL */
63
- gen_gvec_op3(s, is_q, rd, rn, rm,
64
- u ? &ushl_op[size] : &sshl_op[size]);
65
+ if (u) {
66
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size);
67
+ } else {
68
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size);
69
+ }
70
return;
71
case 0x0c: /* SMAX, UMAX */
72
if (u) {
73
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
74
return;
75
case 0x11:
76
if (!u) { /* CMTST */
77
- gen_gvec_op3(s, is_q, rd, rn, rm, &cmtst_op[size]);
78
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
79
return;
80
}
81
/* else CMEQ */
82
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
83
index XXXXXXX..XXXXXXX 100644
84
--- a/target/arm/translate-neon.inc.c
85
+++ b/target/arm/translate-neon.inc.c
86
@@ -XXX,XX +XXX,XX @@ DO_3SAME(VBIC, tcg_gen_gvec_andc)
87
DO_3SAME(VORR, tcg_gen_gvec_or)
88
DO_3SAME(VORN, tcg_gen_gvec_orc)
89
DO_3SAME(VEOR, tcg_gen_gvec_xor)
90
+DO_3SAME(VSHL_S, gen_gvec_sshl)
91
+DO_3SAME(VSHL_U, gen_gvec_ushl)
92
93
/* These insns are all gvec_bitsel but with the inputs in various orders. */
94
#define DO_3SAME_BITSEL(INSN, O1, O2, O3) \
95
@@ -XXX,XX +XXX,XX @@ DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
96
DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
97
DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
98
DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
99
+DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
100
101
#define DO_3SAME_CMP(INSN, COND) \
102
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
103
@@ -XXX,XX +XXX,XX @@ DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
104
DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
105
DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
106
107
-static void gen_VTST_3s(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
108
- uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)
109
-{
110
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &cmtst_op[vece]);
111
-}
112
-DO_3SAME_NO_SZ_3(VTST, gen_VTST_3s)
113
-
114
#define DO_3SAME_GVEC4(INSN, OPARRAY) \
115
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
116
uint32_t rn_ofs, uint32_t rm_ofs, \
117
@@ -XXX,XX +XXX,XX @@ static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
118
}
119
return do_3same(s, a, gen_VMUL_p_3s);
120
}
121
-
122
-#define DO_3SAME_GVEC3_SHIFT(INSN, OPARRAY) \
123
- static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
124
- uint32_t rn_ofs, uint32_t rm_ofs, \
125
- uint32_t oprsz, uint32_t maxsz) \
126
- { \
127
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, \
128
- oprsz, maxsz, &OPARRAY[vece]); \
129
- } \
130
- DO_3SAME(INSN, gen_##INSN##_3s)
131
-
132
-DO_3SAME_GVEC3_SHIFT(VSHL_S, sshl_op)
133
-DO_3SAME_GVEC3_SHIFT(VSHL_U, ushl_op)
134
diff --git a/target/arm/translate.c b/target/arm/translate.c
135
index XXXXXXX..XXXXXXX 100644
136
--- a/target/arm/translate.c
137
+++ b/target/arm/translate.c
138
@@ -XXX,XX +XXX,XX @@ static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
139
tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
140
}
141
142
-static const TCGOpcode vecop_list_cmtst[] = { INDEX_op_cmp_vec, 0 };
143
-
144
-const GVecGen3 cmtst_op[4] = {
145
- { .fni4 = gen_helper_neon_tst_u8,
146
- .fniv = gen_cmtst_vec,
147
- .opt_opc = vecop_list_cmtst,
148
- .vece = MO_8 },
149
- { .fni4 = gen_helper_neon_tst_u16,
150
- .fniv = gen_cmtst_vec,
151
- .opt_opc = vecop_list_cmtst,
152
- .vece = MO_16 },
153
- { .fni4 = gen_cmtst_i32,
154
- .fniv = gen_cmtst_vec,
155
- .opt_opc = vecop_list_cmtst,
156
- .vece = MO_32 },
157
- { .fni8 = gen_cmtst_i64,
158
- .fniv = gen_cmtst_vec,
159
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
160
- .opt_opc = vecop_list_cmtst,
161
- .vece = MO_64 },
162
-};
163
+void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
164
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
165
+{
166
+ static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
167
+ static const GVecGen3 ops[4] = {
168
+ { .fni4 = gen_helper_neon_tst_u8,
169
+ .fniv = gen_cmtst_vec,
170
+ .opt_opc = vecop_list,
171
+ .vece = MO_8 },
172
+ { .fni4 = gen_helper_neon_tst_u16,
173
+ .fniv = gen_cmtst_vec,
174
+ .opt_opc = vecop_list,
175
+ .vece = MO_16 },
176
+ { .fni4 = gen_cmtst_i32,
177
+ .fniv = gen_cmtst_vec,
178
+ .opt_opc = vecop_list,
179
+ .vece = MO_32 },
180
+ { .fni8 = gen_cmtst_i64,
181
+ .fniv = gen_cmtst_vec,
182
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
183
+ .opt_opc = vecop_list,
184
+ .vece = MO_64 },
185
+ };
186
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
187
+}
188
189
void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
190
{
191
@@ -XXX,XX +XXX,XX @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
192
tcg_temp_free_vec(rsh);
193
}
194
195
-static const TCGOpcode ushl_list[] = {
196
- INDEX_op_neg_vec, INDEX_op_shlv_vec,
197
- INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
198
-};
199
-
200
-const GVecGen3 ushl_op[4] = {
201
- { .fniv = gen_ushl_vec,
202
- .fno = gen_helper_gvec_ushl_b,
203
- .opt_opc = ushl_list,
204
- .vece = MO_8 },
205
- { .fniv = gen_ushl_vec,
206
- .fno = gen_helper_gvec_ushl_h,
207
- .opt_opc = ushl_list,
208
- .vece = MO_16 },
209
- { .fni4 = gen_ushl_i32,
210
- .fniv = gen_ushl_vec,
211
- .opt_opc = ushl_list,
212
- .vece = MO_32 },
213
- { .fni8 = gen_ushl_i64,
214
- .fniv = gen_ushl_vec,
215
- .opt_opc = ushl_list,
216
- .vece = MO_64 },
217
-};
218
+void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
219
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
220
+{
221
+ static const TCGOpcode vecop_list[] = {
222
+ INDEX_op_neg_vec, INDEX_op_shlv_vec,
223
+ INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
224
+ };
225
+ static const GVecGen3 ops[4] = {
226
+ { .fniv = gen_ushl_vec,
227
+ .fno = gen_helper_gvec_ushl_b,
228
+ .opt_opc = vecop_list,
229
+ .vece = MO_8 },
230
+ { .fniv = gen_ushl_vec,
231
+ .fno = gen_helper_gvec_ushl_h,
232
+ .opt_opc = vecop_list,
233
+ .vece = MO_16 },
234
+ { .fni4 = gen_ushl_i32,
235
+ .fniv = gen_ushl_vec,
236
+ .opt_opc = vecop_list,
237
+ .vece = MO_32 },
238
+ { .fni8 = gen_ushl_i64,
239
+ .fniv = gen_ushl_vec,
240
+ .opt_opc = vecop_list,
241
+ .vece = MO_64 },
242
+ };
243
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
244
+}
245
246
void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
247
{
248
@@ -XXX,XX +XXX,XX @@ static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
249
tcg_temp_free_vec(tmp);
250
}
251
252
-static const TCGOpcode sshl_list[] = {
253
- INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
254
- INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
255
-};
256
-
257
-const GVecGen3 sshl_op[4] = {
258
- { .fniv = gen_sshl_vec,
259
- .fno = gen_helper_gvec_sshl_b,
260
- .opt_opc = sshl_list,
261
- .vece = MO_8 },
262
- { .fniv = gen_sshl_vec,
263
- .fno = gen_helper_gvec_sshl_h,
264
- .opt_opc = sshl_list,
265
- .vece = MO_16 },
266
- { .fni4 = gen_sshl_i32,
267
- .fniv = gen_sshl_vec,
268
- .opt_opc = sshl_list,
269
- .vece = MO_32 },
270
- { .fni8 = gen_sshl_i64,
271
- .fniv = gen_sshl_vec,
272
- .opt_opc = sshl_list,
273
- .vece = MO_64 },
274
-};
275
+void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
276
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
277
+{
278
+ static const TCGOpcode vecop_list[] = {
279
+ INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
280
+ INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
281
+ };
282
+ static const GVecGen3 ops[4] = {
283
+ { .fniv = gen_sshl_vec,
284
+ .fno = gen_helper_gvec_sshl_b,
285
+ .opt_opc = vecop_list,
286
+ .vece = MO_8 },
287
+ { .fniv = gen_sshl_vec,
288
+ .fno = gen_helper_gvec_sshl_h,
289
+ .opt_opc = vecop_list,
290
+ .vece = MO_16 },
291
+ { .fni4 = gen_sshl_i32,
292
+ .fniv = gen_sshl_vec,
293
+ .opt_opc = vecop_list,
294
+ .vece = MO_32 },
295
+ { .fni8 = gen_sshl_i64,
296
+ .fniv = gen_sshl_vec,
297
+ .opt_opc = vecop_list,
298
+ .vece = MO_64 },
299
+ };
300
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
301
+}
302
303
static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
304
TCGv_vec a, TCGv_vec b)
305
--
306
2.20.1
307
308
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
Provide a functional interface for the vector expansion.
4
This fits better with the existing set of helpers that
5
we provide for other operations.
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20200513163245.17915-11-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
target/arm/translate.h | 13 +-
13
target/arm/translate-a64.c | 22 ++-
14
target/arm/translate-neon.inc.c | 19 +--
15
target/arm/translate.c | 228 +++++++++++++++++---------------
16
4 files changed, 147 insertions(+), 135 deletions(-)
17
18
diff --git a/target/arm/translate.h b/target/arm/translate.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/translate.h
21
+++ b/target/arm/translate.h
22
@@ -XXX,XX +XXX,XX @@ void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
23
void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
24
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
25
26
-extern const GVecGen4 uqadd_op[4];
27
-extern const GVecGen4 sqadd_op[4];
28
-extern const GVecGen4 uqsub_op[4];
29
-extern const GVecGen4 sqsub_op[4];
30
void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
31
void gen_ushl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
32
void gen_sshl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
33
void gen_ushl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
34
void gen_sshl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
35
36
+void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
37
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
38
+void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
39
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
40
+void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
41
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
42
+void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
43
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
44
+
45
void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
46
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
47
void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
48
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/arm/translate-a64.c
51
+++ b/target/arm/translate-a64.c
52
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
53
54
switch (opcode) {
55
case 0x01: /* SQADD, UQADD */
56
- tcg_gen_gvec_4(vec_full_reg_offset(s, rd),
57
- offsetof(CPUARMState, vfp.qc),
58
- vec_full_reg_offset(s, rn),
59
- vec_full_reg_offset(s, rm),
60
- is_q ? 16 : 8, vec_full_reg_size(s),
61
- (u ? uqadd_op : sqadd_op) + size);
62
+ if (u) {
63
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size);
64
+ } else {
65
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size);
66
+ }
67
return;
68
case 0x05: /* SQSUB, UQSUB */
69
- tcg_gen_gvec_4(vec_full_reg_offset(s, rd),
70
- offsetof(CPUARMState, vfp.qc),
71
- vec_full_reg_offset(s, rn),
72
- vec_full_reg_offset(s, rm),
73
- is_q ? 16 : 8, vec_full_reg_size(s),
74
- (u ? uqsub_op : sqsub_op) + size);
75
+ if (u) {
76
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size);
77
+ } else {
78
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size);
79
+ }
80
return;
81
case 0x08: /* SSHL, USHL */
82
if (u) {
83
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
84
index XXXXXXX..XXXXXXX 100644
85
--- a/target/arm/translate-neon.inc.c
86
+++ b/target/arm/translate-neon.inc.c
87
@@ -XXX,XX +XXX,XX @@ DO_3SAME(VORN, tcg_gen_gvec_orc)
88
DO_3SAME(VEOR, tcg_gen_gvec_xor)
89
DO_3SAME(VSHL_S, gen_gvec_sshl)
90
DO_3SAME(VSHL_U, gen_gvec_ushl)
91
+DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
92
+DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
93
+DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
94
+DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
95
96
/* These insns are all gvec_bitsel but with the inputs in various orders. */
97
#define DO_3SAME_BITSEL(INSN, O1, O2, O3) \
98
@@ -XXX,XX +XXX,XX @@ DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
99
DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
100
DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
101
102
-#define DO_3SAME_GVEC4(INSN, OPARRAY) \
103
- static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
104
- uint32_t rn_ofs, uint32_t rm_ofs, \
105
- uint32_t oprsz, uint32_t maxsz) \
106
- { \
107
- tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), \
108
- rn_ofs, rm_ofs, oprsz, maxsz, &OPARRAY[vece]); \
109
- } \
110
- DO_3SAME(INSN, gen_##INSN##_3s)
111
-
112
-DO_3SAME_GVEC4(VQADD_S, sqadd_op)
113
-DO_3SAME_GVEC4(VQADD_U, uqadd_op)
114
-DO_3SAME_GVEC4(VQSUB_S, sqsub_op)
115
-DO_3SAME_GVEC4(VQSUB_U, uqsub_op)
116
-
117
static void gen_VMUL_p_3s(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
118
uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)
119
{
120
diff --git a/target/arm/translate.c b/target/arm/translate.c
121
index XXXXXXX..XXXXXXX 100644
122
--- a/target/arm/translate.c
123
+++ b/target/arm/translate.c
124
@@ -XXX,XX +XXX,XX @@ static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
125
tcg_temp_free_vec(x);
126
}
127
128
-static const TCGOpcode vecop_list_uqadd[] = {
129
- INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
130
-};
131
-
132
-const GVecGen4 uqadd_op[4] = {
133
- { .fniv = gen_uqadd_vec,
134
- .fno = gen_helper_gvec_uqadd_b,
135
- .write_aofs = true,
136
- .opt_opc = vecop_list_uqadd,
137
- .vece = MO_8 },
138
- { .fniv = gen_uqadd_vec,
139
- .fno = gen_helper_gvec_uqadd_h,
140
- .write_aofs = true,
141
- .opt_opc = vecop_list_uqadd,
142
- .vece = MO_16 },
143
- { .fniv = gen_uqadd_vec,
144
- .fno = gen_helper_gvec_uqadd_s,
145
- .write_aofs = true,
146
- .opt_opc = vecop_list_uqadd,
147
- .vece = MO_32 },
148
- { .fniv = gen_uqadd_vec,
149
- .fno = gen_helper_gvec_uqadd_d,
150
- .write_aofs = true,
151
- .opt_opc = vecop_list_uqadd,
152
- .vece = MO_64 },
153
-};
154
+void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
155
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
156
+{
157
+ static const TCGOpcode vecop_list[] = {
158
+ INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
159
+ };
160
+ static const GVecGen4 ops[4] = {
161
+ { .fniv = gen_uqadd_vec,
162
+ .fno = gen_helper_gvec_uqadd_b,
163
+ .write_aofs = true,
164
+ .opt_opc = vecop_list,
165
+ .vece = MO_8 },
166
+ { .fniv = gen_uqadd_vec,
167
+ .fno = gen_helper_gvec_uqadd_h,
168
+ .write_aofs = true,
169
+ .opt_opc = vecop_list,
170
+ .vece = MO_16 },
171
+ { .fniv = gen_uqadd_vec,
172
+ .fno = gen_helper_gvec_uqadd_s,
173
+ .write_aofs = true,
174
+ .opt_opc = vecop_list,
175
+ .vece = MO_32 },
176
+ { .fniv = gen_uqadd_vec,
177
+ .fno = gen_helper_gvec_uqadd_d,
178
+ .write_aofs = true,
179
+ .opt_opc = vecop_list,
180
+ .vece = MO_64 },
181
+ };
182
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
183
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
184
+}
185
186
static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
187
TCGv_vec a, TCGv_vec b)
188
@@ -XXX,XX +XXX,XX @@ static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
189
tcg_temp_free_vec(x);
190
}
191
192
-static const TCGOpcode vecop_list_sqadd[] = {
193
- INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
194
-};
195
-
196
-const GVecGen4 sqadd_op[4] = {
197
- { .fniv = gen_sqadd_vec,
198
- .fno = gen_helper_gvec_sqadd_b,
199
- .opt_opc = vecop_list_sqadd,
200
- .write_aofs = true,
201
- .vece = MO_8 },
202
- { .fniv = gen_sqadd_vec,
203
- .fno = gen_helper_gvec_sqadd_h,
204
- .opt_opc = vecop_list_sqadd,
205
- .write_aofs = true,
206
- .vece = MO_16 },
207
- { .fniv = gen_sqadd_vec,
208
- .fno = gen_helper_gvec_sqadd_s,
209
- .opt_opc = vecop_list_sqadd,
210
- .write_aofs = true,
211
- .vece = MO_32 },
212
- { .fniv = gen_sqadd_vec,
213
- .fno = gen_helper_gvec_sqadd_d,
214
- .opt_opc = vecop_list_sqadd,
215
- .write_aofs = true,
216
- .vece = MO_64 },
217
-};
218
+void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
219
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
220
+{
221
+ static const TCGOpcode vecop_list[] = {
222
+ INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
223
+ };
224
+ static const GVecGen4 ops[4] = {
225
+ { .fniv = gen_sqadd_vec,
226
+ .fno = gen_helper_gvec_sqadd_b,
227
+ .opt_opc = vecop_list,
228
+ .write_aofs = true,
229
+ .vece = MO_8 },
230
+ { .fniv = gen_sqadd_vec,
231
+ .fno = gen_helper_gvec_sqadd_h,
232
+ .opt_opc = vecop_list,
233
+ .write_aofs = true,
234
+ .vece = MO_16 },
235
+ { .fniv = gen_sqadd_vec,
236
+ .fno = gen_helper_gvec_sqadd_s,
237
+ .opt_opc = vecop_list,
238
+ .write_aofs = true,
239
+ .vece = MO_32 },
240
+ { .fniv = gen_sqadd_vec,
241
+ .fno = gen_helper_gvec_sqadd_d,
242
+ .opt_opc = vecop_list,
243
+ .write_aofs = true,
244
+ .vece = MO_64 },
245
+ };
246
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
247
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
248
+}
249
250
static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
251
TCGv_vec a, TCGv_vec b)
252
@@ -XXX,XX +XXX,XX @@ static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
253
tcg_temp_free_vec(x);
254
}
255
256
-static const TCGOpcode vecop_list_uqsub[] = {
257
- INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
258
-};
259
-
260
-const GVecGen4 uqsub_op[4] = {
261
- { .fniv = gen_uqsub_vec,
262
- .fno = gen_helper_gvec_uqsub_b,
263
- .opt_opc = vecop_list_uqsub,
264
- .write_aofs = true,
265
- .vece = MO_8 },
266
- { .fniv = gen_uqsub_vec,
267
- .fno = gen_helper_gvec_uqsub_h,
268
- .opt_opc = vecop_list_uqsub,
269
- .write_aofs = true,
270
- .vece = MO_16 },
271
- { .fniv = gen_uqsub_vec,
272
- .fno = gen_helper_gvec_uqsub_s,
273
- .opt_opc = vecop_list_uqsub,
274
- .write_aofs = true,
275
- .vece = MO_32 },
276
- { .fniv = gen_uqsub_vec,
277
- .fno = gen_helper_gvec_uqsub_d,
278
- .opt_opc = vecop_list_uqsub,
279
- .write_aofs = true,
280
- .vece = MO_64 },
281
-};
282
+void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
283
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
284
+{
285
+ static const TCGOpcode vecop_list[] = {
286
+ INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
287
+ };
288
+ static const GVecGen4 ops[4] = {
289
+ { .fniv = gen_uqsub_vec,
290
+ .fno = gen_helper_gvec_uqsub_b,
291
+ .opt_opc = vecop_list,
292
+ .write_aofs = true,
293
+ .vece = MO_8 },
294
+ { .fniv = gen_uqsub_vec,
295
+ .fno = gen_helper_gvec_uqsub_h,
296
+ .opt_opc = vecop_list,
297
+ .write_aofs = true,
298
+ .vece = MO_16 },
299
+ { .fniv = gen_uqsub_vec,
300
+ .fno = gen_helper_gvec_uqsub_s,
301
+ .opt_opc = vecop_list,
302
+ .write_aofs = true,
303
+ .vece = MO_32 },
304
+ { .fniv = gen_uqsub_vec,
305
+ .fno = gen_helper_gvec_uqsub_d,
306
+ .opt_opc = vecop_list,
307
+ .write_aofs = true,
308
+ .vece = MO_64 },
309
+ };
310
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
311
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
312
+}
313
314
static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
315
TCGv_vec a, TCGv_vec b)
316
@@ -XXX,XX +XXX,XX @@ static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
317
tcg_temp_free_vec(x);
318
}
319
320
-static const TCGOpcode vecop_list_sqsub[] = {
321
- INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
322
-};
323
-
324
-const GVecGen4 sqsub_op[4] = {
325
- { .fniv = gen_sqsub_vec,
326
- .fno = gen_helper_gvec_sqsub_b,
327
- .opt_opc = vecop_list_sqsub,
328
- .write_aofs = true,
329
- .vece = MO_8 },
330
- { .fniv = gen_sqsub_vec,
331
- .fno = gen_helper_gvec_sqsub_h,
332
- .opt_opc = vecop_list_sqsub,
333
- .write_aofs = true,
334
- .vece = MO_16 },
335
- { .fniv = gen_sqsub_vec,
336
- .fno = gen_helper_gvec_sqsub_s,
337
- .opt_opc = vecop_list_sqsub,
338
- .write_aofs = true,
339
- .vece = MO_32 },
340
- { .fniv = gen_sqsub_vec,
341
- .fno = gen_helper_gvec_sqsub_d,
342
- .opt_opc = vecop_list_sqsub,
343
- .write_aofs = true,
344
- .vece = MO_64 },
345
-};
346
+void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
347
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
348
+{
349
+ static const TCGOpcode vecop_list[] = {
350
+ INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
351
+ };
352
+ static const GVecGen4 ops[4] = {
353
+ { .fniv = gen_sqsub_vec,
354
+ .fno = gen_helper_gvec_sqsub_b,
355
+ .opt_opc = vecop_list,
356
+ .write_aofs = true,
357
+ .vece = MO_8 },
358
+ { .fniv = gen_sqsub_vec,
359
+ .fno = gen_helper_gvec_sqsub_h,
360
+ .opt_opc = vecop_list,
361
+ .write_aofs = true,
362
+ .vece = MO_16 },
363
+ { .fniv = gen_sqsub_vec,
364
+ .fno = gen_helper_gvec_sqsub_s,
365
+ .opt_opc = vecop_list,
366
+ .write_aofs = true,
367
+ .vece = MO_32 },
368
+ { .fniv = gen_sqsub_vec,
369
+ .fno = gen_helper_gvec_sqsub_d,
370
+ .opt_opc = vecop_list,
371
+ .write_aofs = true,
372
+ .vece = MO_64 },
373
+ };
374
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
375
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
376
+}
377
378
/* Translate a NEON data processing instruction. Return nonzero if the
379
instruction is invalid.
380
--
381
2.20.1
382
383
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
The sun4v RTC device model added under commit a0e893039cf2ce0 in 2016
2
was unfortunately added with a license of GPL-v3-or-later, which is
3
not compatible with other QEMU code which has a GPL-v2-only license.
2
4
3
These operations do not touch fp_status.
5
Relicense the code in the .c and the .h file to GPL-v2-or-later,
6
to make it compatible with the rest of QEMU.
4
7
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Cc: qemu-stable@nongnu.org
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Message-id: 20200513163245.17915-12-richard.henderson@linaro.org
10
Signed-off-by: Paolo Bonzini (for Red Hat) <pbonzini@redhat.com>
11
Signed-off-by: Artyom Tarasenko <atar4qemu@gmail.com>
12
Signed-off-by: Markus Armbruster <armbru@redhat.com>
13
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
14
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
15
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
16
Acked-by: Alex Bennée <alex.bennee@linaro.org>
17
Message-id: 20240223161300.938542-1-peter.maydell@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
19
---
10
target/arm/helper.h | 4 ++--
20
include/hw/rtc/sun4v-rtc.h | 2 +-
11
target/arm/translate-a64.c | 5 ++---
21
hw/rtc/sun4v-rtc.c | 2 +-
12
target/arm/translate.c | 12 ++----------
22
2 files changed, 2 insertions(+), 2 deletions(-)
13
target/arm/vfp_helper.c | 5 ++---
14
4 files changed, 8 insertions(+), 18 deletions(-)
15
23
16
diff --git a/target/arm/helper.h b/target/arm/helper.h
24
diff --git a/include/hw/rtc/sun4v-rtc.h b/include/hw/rtc/sun4v-rtc.h
17
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/helper.h
26
--- a/include/hw/rtc/sun4v-rtc.h
19
+++ b/target/arm/helper.h
27
+++ b/include/hw/rtc/sun4v-rtc.h
20
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
28
@@ -XXX,XX +XXX,XX @@
21
DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
29
*
22
DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
30
* Copyright (c) 2016 Artyom Tarasenko
23
DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
31
*
24
-DEF_HELPER_2(recpe_u32, i32, i32, ptr)
32
- * This code is licensed under the GNU GPL v3 or (at your option) any later
25
-DEF_HELPER_FLAGS_2(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32, ptr)
33
+ * This code is licensed under the GNU GPL v2 or (at your option) any later
26
+DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32)
34
* version.
27
+DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32)
35
*/
28
DEF_HELPER_FLAGS_4(neon_tbl, TCG_CALL_NO_RWG, i32, i32, i32, ptr, i32)
36
29
37
diff --git a/hw/rtc/sun4v-rtc.c b/hw/rtc/sun4v-rtc.c
30
DEF_HELPER_3(shl_cc, i32, env, i32, i32)
31
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
32
index XXXXXXX..XXXXXXX 100644
38
index XXXXXXX..XXXXXXX 100644
33
--- a/target/arm/translate-a64.c
39
--- a/hw/rtc/sun4v-rtc.c
34
+++ b/target/arm/translate-a64.c
40
+++ b/hw/rtc/sun4v-rtc.c
35
@@ -XXX,XX +XXX,XX @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode,
41
@@ -XXX,XX +XXX,XX @@
36
42
*
37
switch (opcode) {
43
* Copyright (c) 2016 Artyom Tarasenko
38
case 0x3c: /* URECPE */
44
*
39
- gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
45
- * This code is licensed under the GNU GPL v3 or (at your option) any later
40
+ gen_helper_recpe_u32(tcg_res, tcg_op);
46
+ * This code is licensed under the GNU GPL v2 or (at your option) any later
41
break;
47
* version.
42
case 0x3d: /* FRECPE */
48
*/
43
gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
44
@@ -XXX,XX +XXX,XX @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
45
unallocated_encoding(s);
46
return;
47
}
48
- need_fpstatus = true;
49
break;
50
case 0x1e: /* FRINT32Z */
51
case 0x1f: /* FRINT64Z */
52
@@ -XXX,XX +XXX,XX @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
53
gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
54
break;
55
case 0x7c: /* URSQRTE */
56
- gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
57
+ gen_helper_rsqrte_u32(tcg_res, tcg_op);
58
break;
59
case 0x1e: /* FRINT32Z */
60
case 0x5e: /* FRINT32X */
61
diff --git a/target/arm/translate.c b/target/arm/translate.c
62
index XXXXXXX..XXXXXXX 100644
63
--- a/target/arm/translate.c
64
+++ b/target/arm/translate.c
65
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
66
break;
67
}
68
case NEON_2RM_VRECPE:
69
- {
70
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
71
- gen_helper_recpe_u32(tmp, tmp, fpstatus);
72
- tcg_temp_free_ptr(fpstatus);
73
+ gen_helper_recpe_u32(tmp, tmp);
74
break;
75
- }
76
case NEON_2RM_VRSQRTE:
77
- {
78
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
79
- gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
80
- tcg_temp_free_ptr(fpstatus);
81
+ gen_helper_rsqrte_u32(tmp, tmp);
82
break;
83
- }
84
case NEON_2RM_VRECPE_F:
85
{
86
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
87
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
88
index XXXXXXX..XXXXXXX 100644
89
--- a/target/arm/vfp_helper.c
90
+++ b/target/arm/vfp_helper.c
91
@@ -XXX,XX +XXX,XX @@ float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
92
return make_float64(val);
93
}
94
95
-uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
96
+uint32_t HELPER(recpe_u32)(uint32_t a)
97
{
98
- /* float_status *s = fpstp; */
99
int input, estimate;
100
101
if ((a & 0x80000000) == 0) {
102
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
103
return deposit32(0, (32 - 9), 9, estimate);
104
}
105
106
-uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp)
107
+uint32_t HELPER(rsqrte_u32)(uint32_t a)
108
{
109
int estimate;
110
49
111
--
50
--
112
2.20.1
51
2.34.1
113
52
114
53
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
Provide a functional interface for the vector expansion.
4
This fits better with the existing set of helpers that
5
we provide for other operations.
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20200513163245.17915-13-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
target/arm/translate.h | 5 ++++
13
target/arm/translate-a64.c | 34 ++----------------------
14
target/arm/translate.c | 54 +++++++++++++++++++-------------------
15
3 files changed, 34 insertions(+), 59 deletions(-)
16
17
diff --git a/target/arm/translate.h b/target/arm/translate.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/translate.h
20
+++ b/target/arm/translate.h
21
@@ -XXX,XX +XXX,XX @@ void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
22
void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
23
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
24
25
+void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
26
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
27
+void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
28
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
29
+
30
/*
31
* Forward to the isar_feature_* tests given a DisasContext pointer.
32
*/
33
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/target/arm/translate-a64.c
36
+++ b/target/arm/translate-a64.c
37
@@ -XXX,XX +XXX,XX @@ static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
38
is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
39
}
40
41
-/* Expand a 3-operand + env pointer operation using
42
- * an out-of-line helper.
43
- */
44
-static void gen_gvec_op3_env(DisasContext *s, bool is_q, int rd,
45
- int rn, int rm, gen_helper_gvec_3_ptr *fn)
46
-{
47
- tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
48
- vec_full_reg_offset(s, rn),
49
- vec_full_reg_offset(s, rm), cpu_env,
50
- is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
51
-}
52
-
53
/* Expand a 3-operand + fpstatus pointer + simd data value operation using
54
* an out-of-line helper.
55
*/
56
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
57
58
switch (opcode) {
59
case 0x0: /* SQRDMLAH (vector) */
60
- switch (size) {
61
- case 1:
62
- gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s16);
63
- break;
64
- case 2:
65
- gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s32);
66
- break;
67
- default:
68
- g_assert_not_reached();
69
- }
70
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size);
71
return;
72
73
case 0x1: /* SQRDMLSH (vector) */
74
- switch (size) {
75
- case 1:
76
- gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s16);
77
- break;
78
- case 2:
79
- gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s32);
80
- break;
81
- default:
82
- g_assert_not_reached();
83
- }
84
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size);
85
return;
86
87
case 0x2: /* SDOT / UDOT */
88
diff --git a/target/arm/translate.c b/target/arm/translate.c
89
index XXXXXXX..XXXXXXX 100644
90
--- a/target/arm/translate.c
91
+++ b/target/arm/translate.c
92
@@ -XXX,XX +XXX,XX @@ static const uint8_t neon_2rm_sizes[] = {
93
[NEON_2RM_VCVT_UF] = 0x4,
94
};
95
96
-
97
-/* Expand v8.1 simd helper. */
98
-static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
99
- int q, int rd, int rn, int rm)
100
+void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
101
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
102
{
103
- if (dc_isar_feature(aa32_rdm, s)) {
104
- int opr_sz = (1 + q) * 8;
105
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
106
- vfp_reg_offset(1, rn),
107
- vfp_reg_offset(1, rm), cpu_env,
108
- opr_sz, opr_sz, 0, fn);
109
- return 0;
110
- }
111
- return 1;
112
+ static gen_helper_gvec_3_ptr * const fns[2] = {
113
+ gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
114
+ };
115
+ tcg_debug_assert(vece >= 1 && vece <= 2);
116
+ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, cpu_env,
117
+ opr_sz, max_sz, 0, fns[vece - 1]);
118
+}
119
+
120
+void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
121
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
122
+{
123
+ static gen_helper_gvec_3_ptr * const fns[2] = {
124
+ gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
125
+ };
126
+ tcg_debug_assert(vece >= 1 && vece <= 2);
127
+ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, cpu_env,
128
+ opr_sz, max_sz, 0, fns[vece - 1]);
129
}
130
131
#define GEN_CMP0(NAME, COND) \
132
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
133
break; /* VPADD */
134
}
135
/* VQRDMLAH */
136
- switch (size) {
137
- case 1:
138
- return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,
139
- q, rd, rn, rm);
140
- case 2:
141
- return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,
142
- q, rd, rn, rm);
143
+ if (dc_isar_feature(aa32_rdm, s) && (size == 1 || size == 2)) {
144
+ gen_gvec_sqrdmlah_qc(size, rd_ofs, rn_ofs, rm_ofs,
145
+ vec_size, vec_size);
146
+ return 0;
147
}
148
return 1;
149
150
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
151
break;
152
}
153
/* VQRDMLSH */
154
- switch (size) {
155
- case 1:
156
- return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16,
157
- q, rd, rn, rm);
158
- case 2:
159
- return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32,
160
- q, rd, rn, rm);
161
+ if (dc_isar_feature(aa32_rdm, s) && (size == 1 || size == 2)) {
162
+ gen_gvec_sqrdmlsh_qc(size, rd_ofs, rn_ofs, rm_ofs,
163
+ vec_size, vec_size);
164
+ return 0;
165
}
166
return 1;
167
168
--
169
2.20.1
170
171
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Thomas Huth <thuth@redhat.com>
2
2
3
Pass a pointer directly to env->vfp.qc[0], rather than env.
3
Move the code to a separate file so that we do not have to compile
4
This will allow SVE2, which does not modify QC, to pass a
4
it anymore if CONFIG_ARM_V7M is not set.
5
pointer to dummy storage.
6
5
7
Change the return type of inl_qrdml.h_s16 to match the
6
Signed-off-by: Thomas Huth <thuth@redhat.com>
8
sense of the operation: signed.
7
Message-id: 20240308141051.536599-2-thuth@redhat.com
9
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20200513163245.17915-14-richard.henderson@linaro.org
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
---
10
---
15
target/arm/translate.c | 18 ++++++++---
11
target/arm/tcg/cpu-v7m.c | 290 +++++++++++++++++++++++++++++++++++++
16
target/arm/vec_helper.c | 70 +++++++++++++++++++++++------------------
12
target/arm/tcg/cpu32.c | 261 ---------------------------------
17
2 files changed, 54 insertions(+), 34 deletions(-)
13
target/arm/meson.build | 3 +
14
target/arm/tcg/meson.build | 3 +
15
4 files changed, 296 insertions(+), 261 deletions(-)
16
create mode 100644 target/arm/tcg/cpu-v7m.c
18
17
19
diff --git a/target/arm/translate.c b/target/arm/translate.c
18
diff --git a/target/arm/tcg/cpu-v7m.c b/target/arm/tcg/cpu-v7m.c
19
new file mode 100644
20
index XXXXXXX..XXXXXXX
21
--- /dev/null
22
+++ b/target/arm/tcg/cpu-v7m.c
23
@@ -XXX,XX +XXX,XX @@
24
+/*
25
+ * QEMU ARMv7-M TCG-only CPUs.
26
+ *
27
+ * Copyright (c) 2012 SUSE LINUX Products GmbH
28
+ *
29
+ * This code is licensed under the GNU GPL v2 or later.
30
+ *
31
+ * SPDX-License-Identifier: GPL-2.0-or-later
32
+ */
33
+
34
+#include "qemu/osdep.h"
35
+#include "cpu.h"
36
+#include "hw/core/tcg-cpu-ops.h"
37
+#include "internals.h"
38
+
39
+#if !defined(CONFIG_USER_ONLY)
40
+
41
+#include "hw/intc/armv7m_nvic.h"
42
+
43
+static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
44
+{
45
+ CPUClass *cc = CPU_GET_CLASS(cs);
46
+ ARMCPU *cpu = ARM_CPU(cs);
47
+ CPUARMState *env = &cpu->env;
48
+ bool ret = false;
49
+
50
+ /*
51
+ * ARMv7-M interrupt masking works differently than -A or -R.
52
+ * There is no FIQ/IRQ distinction. Instead of I and F bits
53
+ * masking FIQ and IRQ interrupts, an exception is taken only
54
+ * if it is higher priority than the current execution priority
55
+ * (which depends on state like BASEPRI, FAULTMASK and the
56
+ * currently active exception).
57
+ */
58
+ if (interrupt_request & CPU_INTERRUPT_HARD
59
+ && (armv7m_nvic_can_take_pending_exception(env->nvic))) {
60
+ cs->exception_index = EXCP_IRQ;
61
+ cc->tcg_ops->do_interrupt(cs);
62
+ ret = true;
63
+ }
64
+ return ret;
65
+}
66
+
67
+#endif /* !CONFIG_USER_ONLY */
68
+
69
+static void cortex_m0_initfn(Object *obj)
70
+{
71
+ ARMCPU *cpu = ARM_CPU(obj);
72
+ set_feature(&cpu->env, ARM_FEATURE_V6);
73
+ set_feature(&cpu->env, ARM_FEATURE_M);
74
+
75
+ cpu->midr = 0x410cc200;
76
+
77
+ /*
78
+ * These ID register values are not guest visible, because
79
+ * we do not implement the Main Extension. They must be set
80
+ * to values corresponding to the Cortex-M0's implemented
81
+ * features, because QEMU generally controls its emulation
82
+ * by looking at ID register fields. We use the same values as
83
+ * for the M3.
84
+ */
85
+ cpu->isar.id_pfr0 = 0x00000030;
86
+ cpu->isar.id_pfr1 = 0x00000200;
87
+ cpu->isar.id_dfr0 = 0x00100000;
88
+ cpu->id_afr0 = 0x00000000;
89
+ cpu->isar.id_mmfr0 = 0x00000030;
90
+ cpu->isar.id_mmfr1 = 0x00000000;
91
+ cpu->isar.id_mmfr2 = 0x00000000;
92
+ cpu->isar.id_mmfr3 = 0x00000000;
93
+ cpu->isar.id_isar0 = 0x01141110;
94
+ cpu->isar.id_isar1 = 0x02111000;
95
+ cpu->isar.id_isar2 = 0x21112231;
96
+ cpu->isar.id_isar3 = 0x01111110;
97
+ cpu->isar.id_isar4 = 0x01310102;
98
+ cpu->isar.id_isar5 = 0x00000000;
99
+ cpu->isar.id_isar6 = 0x00000000;
100
+}
101
+
102
+static void cortex_m3_initfn(Object *obj)
103
+{
104
+ ARMCPU *cpu = ARM_CPU(obj);
105
+ set_feature(&cpu->env, ARM_FEATURE_V7);
106
+ set_feature(&cpu->env, ARM_FEATURE_M);
107
+ set_feature(&cpu->env, ARM_FEATURE_M_MAIN);
108
+ cpu->midr = 0x410fc231;
109
+ cpu->pmsav7_dregion = 8;
110
+ cpu->isar.id_pfr0 = 0x00000030;
111
+ cpu->isar.id_pfr1 = 0x00000200;
112
+ cpu->isar.id_dfr0 = 0x00100000;
113
+ cpu->id_afr0 = 0x00000000;
114
+ cpu->isar.id_mmfr0 = 0x00000030;
115
+ cpu->isar.id_mmfr1 = 0x00000000;
116
+ cpu->isar.id_mmfr2 = 0x00000000;
117
+ cpu->isar.id_mmfr3 = 0x00000000;
118
+ cpu->isar.id_isar0 = 0x01141110;
119
+ cpu->isar.id_isar1 = 0x02111000;
120
+ cpu->isar.id_isar2 = 0x21112231;
121
+ cpu->isar.id_isar3 = 0x01111110;
122
+ cpu->isar.id_isar4 = 0x01310102;
123
+ cpu->isar.id_isar5 = 0x00000000;
124
+ cpu->isar.id_isar6 = 0x00000000;
125
+}
126
+
127
+static void cortex_m4_initfn(Object *obj)
128
+{
129
+ ARMCPU *cpu = ARM_CPU(obj);
130
+
131
+ set_feature(&cpu->env, ARM_FEATURE_V7);
132
+ set_feature(&cpu->env, ARM_FEATURE_M);
133
+ set_feature(&cpu->env, ARM_FEATURE_M_MAIN);
134
+ set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP);
135
+ cpu->midr = 0x410fc240; /* r0p0 */
136
+ cpu->pmsav7_dregion = 8;
137
+ cpu->isar.mvfr0 = 0x10110021;
138
+ cpu->isar.mvfr1 = 0x11000011;
139
+ cpu->isar.mvfr2 = 0x00000000;
140
+ cpu->isar.id_pfr0 = 0x00000030;
141
+ cpu->isar.id_pfr1 = 0x00000200;
142
+ cpu->isar.id_dfr0 = 0x00100000;
143
+ cpu->id_afr0 = 0x00000000;
144
+ cpu->isar.id_mmfr0 = 0x00000030;
145
+ cpu->isar.id_mmfr1 = 0x00000000;
146
+ cpu->isar.id_mmfr2 = 0x00000000;
147
+ cpu->isar.id_mmfr3 = 0x00000000;
148
+ cpu->isar.id_isar0 = 0x01141110;
149
+ cpu->isar.id_isar1 = 0x02111000;
150
+ cpu->isar.id_isar2 = 0x21112231;
151
+ cpu->isar.id_isar3 = 0x01111110;
152
+ cpu->isar.id_isar4 = 0x01310102;
153
+ cpu->isar.id_isar5 = 0x00000000;
154
+ cpu->isar.id_isar6 = 0x00000000;
155
+}
156
+
157
+static void cortex_m7_initfn(Object *obj)
158
+{
159
+ ARMCPU *cpu = ARM_CPU(obj);
160
+
161
+ set_feature(&cpu->env, ARM_FEATURE_V7);
162
+ set_feature(&cpu->env, ARM_FEATURE_M);
163
+ set_feature(&cpu->env, ARM_FEATURE_M_MAIN);
164
+ set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP);
165
+ cpu->midr = 0x411fc272; /* r1p2 */
166
+ cpu->pmsav7_dregion = 8;
167
+ cpu->isar.mvfr0 = 0x10110221;
168
+ cpu->isar.mvfr1 = 0x12000011;
169
+ cpu->isar.mvfr2 = 0x00000040;
170
+ cpu->isar.id_pfr0 = 0x00000030;
171
+ cpu->isar.id_pfr1 = 0x00000200;
172
+ cpu->isar.id_dfr0 = 0x00100000;
173
+ cpu->id_afr0 = 0x00000000;
174
+ cpu->isar.id_mmfr0 = 0x00100030;
175
+ cpu->isar.id_mmfr1 = 0x00000000;
176
+ cpu->isar.id_mmfr2 = 0x01000000;
177
+ cpu->isar.id_mmfr3 = 0x00000000;
178
+ cpu->isar.id_isar0 = 0x01101110;
179
+ cpu->isar.id_isar1 = 0x02112000;
180
+ cpu->isar.id_isar2 = 0x20232231;
181
+ cpu->isar.id_isar3 = 0x01111131;
182
+ cpu->isar.id_isar4 = 0x01310132;
183
+ cpu->isar.id_isar5 = 0x00000000;
184
+ cpu->isar.id_isar6 = 0x00000000;
185
+}
186
+
187
+static void cortex_m33_initfn(Object *obj)
188
+{
189
+ ARMCPU *cpu = ARM_CPU(obj);
190
+
191
+ set_feature(&cpu->env, ARM_FEATURE_V8);
192
+ set_feature(&cpu->env, ARM_FEATURE_M);
193
+ set_feature(&cpu->env, ARM_FEATURE_M_MAIN);
194
+ set_feature(&cpu->env, ARM_FEATURE_M_SECURITY);
195
+ set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP);
196
+ cpu->midr = 0x410fd213; /* r0p3 */
197
+ cpu->pmsav7_dregion = 16;
198
+ cpu->sau_sregion = 8;
199
+ cpu->isar.mvfr0 = 0x10110021;
200
+ cpu->isar.mvfr1 = 0x11000011;
201
+ cpu->isar.mvfr2 = 0x00000040;
202
+ cpu->isar.id_pfr0 = 0x00000030;
203
+ cpu->isar.id_pfr1 = 0x00000210;
204
+ cpu->isar.id_dfr0 = 0x00200000;
205
+ cpu->id_afr0 = 0x00000000;
206
+ cpu->isar.id_mmfr0 = 0x00101F40;
207
+ cpu->isar.id_mmfr1 = 0x00000000;
208
+ cpu->isar.id_mmfr2 = 0x01000000;
209
+ cpu->isar.id_mmfr3 = 0x00000000;
210
+ cpu->isar.id_isar0 = 0x01101110;
211
+ cpu->isar.id_isar1 = 0x02212000;
212
+ cpu->isar.id_isar2 = 0x20232232;
213
+ cpu->isar.id_isar3 = 0x01111131;
214
+ cpu->isar.id_isar4 = 0x01310132;
215
+ cpu->isar.id_isar5 = 0x00000000;
216
+ cpu->isar.id_isar6 = 0x00000000;
217
+ cpu->clidr = 0x00000000;
218
+ cpu->ctr = 0x8000c000;
219
+}
220
+
221
+static void cortex_m55_initfn(Object *obj)
222
+{
223
+ ARMCPU *cpu = ARM_CPU(obj);
224
+
225
+ set_feature(&cpu->env, ARM_FEATURE_V8);
226
+ set_feature(&cpu->env, ARM_FEATURE_V8_1M);
227
+ set_feature(&cpu->env, ARM_FEATURE_M);
228
+ set_feature(&cpu->env, ARM_FEATURE_M_MAIN);
229
+ set_feature(&cpu->env, ARM_FEATURE_M_SECURITY);
230
+ set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP);
231
+ cpu->midr = 0x410fd221; /* r0p1 */
232
+ cpu->revidr = 0;
233
+ cpu->pmsav7_dregion = 16;
234
+ cpu->sau_sregion = 8;
235
+ /* These are the MVFR* values for the FPU + full MVE configuration */
236
+ cpu->isar.mvfr0 = 0x10110221;
237
+ cpu->isar.mvfr1 = 0x12100211;
238
+ cpu->isar.mvfr2 = 0x00000040;
239
+ cpu->isar.id_pfr0 = 0x20000030;
240
+ cpu->isar.id_pfr1 = 0x00000230;
241
+ cpu->isar.id_dfr0 = 0x10200000;
242
+ cpu->id_afr0 = 0x00000000;
243
+ cpu->isar.id_mmfr0 = 0x00111040;
244
+ cpu->isar.id_mmfr1 = 0x00000000;
245
+ cpu->isar.id_mmfr2 = 0x01000000;
246
+ cpu->isar.id_mmfr3 = 0x00000011;
247
+ cpu->isar.id_isar0 = 0x01103110;
248
+ cpu->isar.id_isar1 = 0x02212000;
249
+ cpu->isar.id_isar2 = 0x20232232;
250
+ cpu->isar.id_isar3 = 0x01111131;
251
+ cpu->isar.id_isar4 = 0x01310132;
252
+ cpu->isar.id_isar5 = 0x00000000;
253
+ cpu->isar.id_isar6 = 0x00000000;
254
+ cpu->clidr = 0x00000000; /* caches not implemented */
255
+ cpu->ctr = 0x8303c003;
256
+}
257
+
258
+static const TCGCPUOps arm_v7m_tcg_ops = {
259
+ .initialize = arm_translate_init,
260
+ .synchronize_from_tb = arm_cpu_synchronize_from_tb,
261
+ .debug_excp_handler = arm_debug_excp_handler,
262
+ .restore_state_to_opc = arm_restore_state_to_opc,
263
+
264
+#ifdef CONFIG_USER_ONLY
265
+ .record_sigsegv = arm_cpu_record_sigsegv,
266
+ .record_sigbus = arm_cpu_record_sigbus,
267
+#else
268
+ .tlb_fill = arm_cpu_tlb_fill,
269
+ .cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt,
270
+ .do_interrupt = arm_v7m_cpu_do_interrupt,
271
+ .do_transaction_failed = arm_cpu_do_transaction_failed,
272
+ .do_unaligned_access = arm_cpu_do_unaligned_access,
273
+ .adjust_watchpoint_address = arm_adjust_watchpoint_address,
274
+ .debug_check_watchpoint = arm_debug_check_watchpoint,
275
+ .debug_check_breakpoint = arm_debug_check_breakpoint,
276
+#endif /* !CONFIG_USER_ONLY */
277
+};
278
+
279
+static void arm_v7m_class_init(ObjectClass *oc, void *data)
280
+{
281
+ ARMCPUClass *acc = ARM_CPU_CLASS(oc);
282
+ CPUClass *cc = CPU_CLASS(oc);
283
+
284
+ acc->info = data;
285
+ cc->tcg_ops = &arm_v7m_tcg_ops;
286
+ cc->gdb_core_xml_file = "arm-m-profile.xml";
287
+}
288
+
289
+static const ARMCPUInfo arm_v7m_cpus[] = {
290
+ { .name = "cortex-m0", .initfn = cortex_m0_initfn,
291
+ .class_init = arm_v7m_class_init },
292
+ { .name = "cortex-m3", .initfn = cortex_m3_initfn,
293
+ .class_init = arm_v7m_class_init },
294
+ { .name = "cortex-m4", .initfn = cortex_m4_initfn,
295
+ .class_init = arm_v7m_class_init },
296
+ { .name = "cortex-m7", .initfn = cortex_m7_initfn,
297
+ .class_init = arm_v7m_class_init },
298
+ { .name = "cortex-m33", .initfn = cortex_m33_initfn,
299
+ .class_init = arm_v7m_class_init },
300
+ { .name = "cortex-m55", .initfn = cortex_m55_initfn,
301
+ .class_init = arm_v7m_class_init },
302
+};
303
+
304
+static void arm_v7m_cpu_register_types(void)
305
+{
306
+ size_t i;
307
+
308
+ for (i = 0; i < ARRAY_SIZE(arm_v7m_cpus); ++i) {
309
+ arm_cpu_register(&arm_v7m_cpus[i]);
310
+ }
311
+}
312
+
313
+type_init(arm_v7m_cpu_register_types)
314
diff --git a/target/arm/tcg/cpu32.c b/target/arm/tcg/cpu32.c
20
index XXXXXXX..XXXXXXX 100644
315
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/translate.c
316
--- a/target/arm/tcg/cpu32.c
22
+++ b/target/arm/translate.c
317
+++ b/target/arm/tcg/cpu32.c
23
@@ -XXX,XX +XXX,XX @@ static const uint8_t neon_2rm_sizes[] = {
318
@@ -XXX,XX +XXX,XX @@
24
[NEON_2RM_VCVT_UF] = 0x4,
319
#include "hw/boards.h"
25
};
320
#endif
26
321
#include "cpregs.h"
27
+static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
322
-#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
28
+ uint32_t opr_sz, uint32_t max_sz,
323
-#include "hw/intc/armv7m_nvic.h"
29
+ gen_helper_gvec_3_ptr *fn)
324
-#endif
30
+{
325
31
+ TCGv_ptr qc_ptr = tcg_temp_new_ptr();
326
32
+
327
/* Share AArch32 -cpu max features with AArch64. */
33
+ tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
328
@@ -XXX,XX +XXX,XX @@ void aa32_max_features(ARMCPU *cpu)
34
+ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
329
/* CPU models. These are not needed for the AArch64 linux-user build. */
35
+ opr_sz, max_sz, 0, fn);
330
#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64)
36
+ tcg_temp_free_ptr(qc_ptr);
331
37
+}
332
-#if !defined(CONFIG_USER_ONLY)
38
+
333
-static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
39
void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
334
-{
40
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
335
- CPUClass *cc = CPU_GET_CLASS(cs);
336
- ARMCPU *cpu = ARM_CPU(cs);
337
- CPUARMState *env = &cpu->env;
338
- bool ret = false;
339
-
340
- /*
341
- * ARMv7-M interrupt masking works differently than -A or -R.
342
- * There is no FIQ/IRQ distinction. Instead of I and F bits
343
- * masking FIQ and IRQ interrupts, an exception is taken only
344
- * if it is higher priority than the current execution priority
345
- * (which depends on state like BASEPRI, FAULTMASK and the
346
- * currently active exception).
347
- */
348
- if (interrupt_request & CPU_INTERRUPT_HARD
349
- && (armv7m_nvic_can_take_pending_exception(env->nvic))) {
350
- cs->exception_index = EXCP_IRQ;
351
- cc->tcg_ops->do_interrupt(cs);
352
- ret = true;
353
- }
354
- return ret;
355
-}
356
-#endif /* !CONFIG_USER_ONLY */
357
-
358
static void arm926_initfn(Object *obj)
41
{
359
{
42
@@ -XXX,XX +XXX,XX @@ void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
360
ARMCPU *cpu = ARM_CPU(obj);
43
gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
361
@@ -XXX,XX +XXX,XX @@ static void cortex_a15_initfn(Object *obj)
44
};
362
define_arm_cp_regs(cpu, cortexa15_cp_reginfo);
45
tcg_debug_assert(vece >= 1 && vece <= 2);
46
- tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, cpu_env,
47
- opr_sz, max_sz, 0, fns[vece - 1]);
48
+ gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
49
}
363
}
50
364
51
void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
365
-static void cortex_m0_initfn(Object *obj)
52
@@ -XXX,XX +XXX,XX @@ void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
366
-{
53
gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
367
- ARMCPU *cpu = ARM_CPU(obj);
54
};
368
- set_feature(&cpu->env, ARM_FEATURE_V6);
55
tcg_debug_assert(vece >= 1 && vece <= 2);
369
- set_feature(&cpu->env, ARM_FEATURE_M);
56
- tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, cpu_env,
370
-
57
- opr_sz, max_sz, 0, fns[vece - 1]);
371
- cpu->midr = 0x410cc200;
58
+ gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
372
-
373
- /*
374
- * These ID register values are not guest visible, because
375
- * we do not implement the Main Extension. They must be set
376
- * to values corresponding to the Cortex-M0's implemented
377
- * features, because QEMU generally controls its emulation
378
- * by looking at ID register fields. We use the same values as
379
- * for the M3.
380
- */
381
- cpu->isar.id_pfr0 = 0x00000030;
382
- cpu->isar.id_pfr1 = 0x00000200;
383
- cpu->isar.id_dfr0 = 0x00100000;
384
- cpu->id_afr0 = 0x00000000;
385
- cpu->isar.id_mmfr0 = 0x00000030;
386
- cpu->isar.id_mmfr1 = 0x00000000;
387
- cpu->isar.id_mmfr2 = 0x00000000;
388
- cpu->isar.id_mmfr3 = 0x00000000;
389
- cpu->isar.id_isar0 = 0x01141110;
390
- cpu->isar.id_isar1 = 0x02111000;
391
- cpu->isar.id_isar2 = 0x21112231;
392
- cpu->isar.id_isar3 = 0x01111110;
393
- cpu->isar.id_isar4 = 0x01310102;
394
- cpu->isar.id_isar5 = 0x00000000;
395
- cpu->isar.id_isar6 = 0x00000000;
396
-}
397
-
398
-static void cortex_m3_initfn(Object *obj)
399
-{
400
- ARMCPU *cpu = ARM_CPU(obj);
401
- set_feature(&cpu->env, ARM_FEATURE_V7);
402
- set_feature(&cpu->env, ARM_FEATURE_M);
403
- set_feature(&cpu->env, ARM_FEATURE_M_MAIN);
404
- cpu->midr = 0x410fc231;
405
- cpu->pmsav7_dregion = 8;
406
- cpu->isar.id_pfr0 = 0x00000030;
407
- cpu->isar.id_pfr1 = 0x00000200;
408
- cpu->isar.id_dfr0 = 0x00100000;
409
- cpu->id_afr0 = 0x00000000;
410
- cpu->isar.id_mmfr0 = 0x00000030;
411
- cpu->isar.id_mmfr1 = 0x00000000;
412
- cpu->isar.id_mmfr2 = 0x00000000;
413
- cpu->isar.id_mmfr3 = 0x00000000;
414
- cpu->isar.id_isar0 = 0x01141110;
415
- cpu->isar.id_isar1 = 0x02111000;
416
- cpu->isar.id_isar2 = 0x21112231;
417
- cpu->isar.id_isar3 = 0x01111110;
418
- cpu->isar.id_isar4 = 0x01310102;
419
- cpu->isar.id_isar5 = 0x00000000;
420
- cpu->isar.id_isar6 = 0x00000000;
421
-}
422
-
423
-static void cortex_m4_initfn(Object *obj)
424
-{
425
- ARMCPU *cpu = ARM_CPU(obj);
426
-
427
- set_feature(&cpu->env, ARM_FEATURE_V7);
428
- set_feature(&cpu->env, ARM_FEATURE_M);
429
- set_feature(&cpu->env, ARM_FEATURE_M_MAIN);
430
- set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP);
431
- cpu->midr = 0x410fc240; /* r0p0 */
432
- cpu->pmsav7_dregion = 8;
433
- cpu->isar.mvfr0 = 0x10110021;
434
- cpu->isar.mvfr1 = 0x11000011;
435
- cpu->isar.mvfr2 = 0x00000000;
436
- cpu->isar.id_pfr0 = 0x00000030;
437
- cpu->isar.id_pfr1 = 0x00000200;
438
- cpu->isar.id_dfr0 = 0x00100000;
439
- cpu->id_afr0 = 0x00000000;
440
- cpu->isar.id_mmfr0 = 0x00000030;
441
- cpu->isar.id_mmfr1 = 0x00000000;
442
- cpu->isar.id_mmfr2 = 0x00000000;
443
- cpu->isar.id_mmfr3 = 0x00000000;
444
- cpu->isar.id_isar0 = 0x01141110;
445
- cpu->isar.id_isar1 = 0x02111000;
446
- cpu->isar.id_isar2 = 0x21112231;
447
- cpu->isar.id_isar3 = 0x01111110;
448
- cpu->isar.id_isar4 = 0x01310102;
449
- cpu->isar.id_isar5 = 0x00000000;
450
- cpu->isar.id_isar6 = 0x00000000;
451
-}
452
-
453
-static void cortex_m7_initfn(Object *obj)
454
-{
455
- ARMCPU *cpu = ARM_CPU(obj);
456
-
457
- set_feature(&cpu->env, ARM_FEATURE_V7);
458
- set_feature(&cpu->env, ARM_FEATURE_M);
459
- set_feature(&cpu->env, ARM_FEATURE_M_MAIN);
460
- set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP);
461
- cpu->midr = 0x411fc272; /* r1p2 */
462
- cpu->pmsav7_dregion = 8;
463
- cpu->isar.mvfr0 = 0x10110221;
464
- cpu->isar.mvfr1 = 0x12000011;
465
- cpu->isar.mvfr2 = 0x00000040;
466
- cpu->isar.id_pfr0 = 0x00000030;
467
- cpu->isar.id_pfr1 = 0x00000200;
468
- cpu->isar.id_dfr0 = 0x00100000;
469
- cpu->id_afr0 = 0x00000000;
470
- cpu->isar.id_mmfr0 = 0x00100030;
471
- cpu->isar.id_mmfr1 = 0x00000000;
472
- cpu->isar.id_mmfr2 = 0x01000000;
473
- cpu->isar.id_mmfr3 = 0x00000000;
474
- cpu->isar.id_isar0 = 0x01101110;
475
- cpu->isar.id_isar1 = 0x02112000;
476
- cpu->isar.id_isar2 = 0x20232231;
477
- cpu->isar.id_isar3 = 0x01111131;
478
- cpu->isar.id_isar4 = 0x01310132;
479
- cpu->isar.id_isar5 = 0x00000000;
480
- cpu->isar.id_isar6 = 0x00000000;
481
-}
482
-
483
-static void cortex_m33_initfn(Object *obj)
484
-{
485
- ARMCPU *cpu = ARM_CPU(obj);
486
-
487
- set_feature(&cpu->env, ARM_FEATURE_V8);
488
- set_feature(&cpu->env, ARM_FEATURE_M);
489
- set_feature(&cpu->env, ARM_FEATURE_M_MAIN);
490
- set_feature(&cpu->env, ARM_FEATURE_M_SECURITY);
491
- set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP);
492
- cpu->midr = 0x410fd213; /* r0p3 */
493
- cpu->pmsav7_dregion = 16;
494
- cpu->sau_sregion = 8;
495
- cpu->isar.mvfr0 = 0x10110021;
496
- cpu->isar.mvfr1 = 0x11000011;
497
- cpu->isar.mvfr2 = 0x00000040;
498
- cpu->isar.id_pfr0 = 0x00000030;
499
- cpu->isar.id_pfr1 = 0x00000210;
500
- cpu->isar.id_dfr0 = 0x00200000;
501
- cpu->id_afr0 = 0x00000000;
502
- cpu->isar.id_mmfr0 = 0x00101F40;
503
- cpu->isar.id_mmfr1 = 0x00000000;
504
- cpu->isar.id_mmfr2 = 0x01000000;
505
- cpu->isar.id_mmfr3 = 0x00000000;
506
- cpu->isar.id_isar0 = 0x01101110;
507
- cpu->isar.id_isar1 = 0x02212000;
508
- cpu->isar.id_isar2 = 0x20232232;
509
- cpu->isar.id_isar3 = 0x01111131;
510
- cpu->isar.id_isar4 = 0x01310132;
511
- cpu->isar.id_isar5 = 0x00000000;
512
- cpu->isar.id_isar6 = 0x00000000;
513
- cpu->clidr = 0x00000000;
514
- cpu->ctr = 0x8000c000;
515
-}
516
-
517
-static void cortex_m55_initfn(Object *obj)
518
-{
519
- ARMCPU *cpu = ARM_CPU(obj);
520
-
521
- set_feature(&cpu->env, ARM_FEATURE_V8);
522
- set_feature(&cpu->env, ARM_FEATURE_V8_1M);
523
- set_feature(&cpu->env, ARM_FEATURE_M);
524
- set_feature(&cpu->env, ARM_FEATURE_M_MAIN);
525
- set_feature(&cpu->env, ARM_FEATURE_M_SECURITY);
526
- set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP);
527
- cpu->midr = 0x410fd221; /* r0p1 */
528
- cpu->revidr = 0;
529
- cpu->pmsav7_dregion = 16;
530
- cpu->sau_sregion = 8;
531
- /* These are the MVFR* values for the FPU + full MVE configuration */
532
- cpu->isar.mvfr0 = 0x10110221;
533
- cpu->isar.mvfr1 = 0x12100211;
534
- cpu->isar.mvfr2 = 0x00000040;
535
- cpu->isar.id_pfr0 = 0x20000030;
536
- cpu->isar.id_pfr1 = 0x00000230;
537
- cpu->isar.id_dfr0 = 0x10200000;
538
- cpu->id_afr0 = 0x00000000;
539
- cpu->isar.id_mmfr0 = 0x00111040;
540
- cpu->isar.id_mmfr1 = 0x00000000;
541
- cpu->isar.id_mmfr2 = 0x01000000;
542
- cpu->isar.id_mmfr3 = 0x00000011;
543
- cpu->isar.id_isar0 = 0x01103110;
544
- cpu->isar.id_isar1 = 0x02212000;
545
- cpu->isar.id_isar2 = 0x20232232;
546
- cpu->isar.id_isar3 = 0x01111131;
547
- cpu->isar.id_isar4 = 0x01310132;
548
- cpu->isar.id_isar5 = 0x00000000;
549
- cpu->isar.id_isar6 = 0x00000000;
550
- cpu->clidr = 0x00000000; /* caches not implemented */
551
- cpu->ctr = 0x8303c003;
552
-}
553
-
554
static const ARMCPRegInfo cortexr5_cp_reginfo[] = {
555
/* Dummy the TCM region regs for the moment */
556
{ .name = "ATCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 0,
557
@@ -XXX,XX +XXX,XX @@ static void pxa270c5_initfn(Object *obj)
558
cpu->reset_sctlr = 0x00000078;
59
}
559
}
60
560
61
#define GEN_CMP0(NAME, COND) \
561
-static const TCGCPUOps arm_v7m_tcg_ops = {
62
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
562
- .initialize = arm_translate_init,
563
- .synchronize_from_tb = arm_cpu_synchronize_from_tb,
564
- .debug_excp_handler = arm_debug_excp_handler,
565
- .restore_state_to_opc = arm_restore_state_to_opc,
566
-
567
-#ifdef CONFIG_USER_ONLY
568
- .record_sigsegv = arm_cpu_record_sigsegv,
569
- .record_sigbus = arm_cpu_record_sigbus,
570
-#else
571
- .tlb_fill = arm_cpu_tlb_fill,
572
- .cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt,
573
- .do_interrupt = arm_v7m_cpu_do_interrupt,
574
- .do_transaction_failed = arm_cpu_do_transaction_failed,
575
- .do_unaligned_access = arm_cpu_do_unaligned_access,
576
- .adjust_watchpoint_address = arm_adjust_watchpoint_address,
577
- .debug_check_watchpoint = arm_debug_check_watchpoint,
578
- .debug_check_breakpoint = arm_debug_check_breakpoint,
579
-#endif /* !CONFIG_USER_ONLY */
580
-};
581
-
582
-static void arm_v7m_class_init(ObjectClass *oc, void *data)
583
-{
584
- ARMCPUClass *acc = ARM_CPU_CLASS(oc);
585
- CPUClass *cc = CPU_CLASS(oc);
586
-
587
- acc->info = data;
588
- cc->tcg_ops = &arm_v7m_tcg_ops;
589
- cc->gdb_core_xml_file = "arm-m-profile.xml";
590
-}
591
-
592
#ifndef TARGET_AARCH64
593
/*
594
* -cpu max: a CPU with as many features enabled as our emulation supports.
595
@@ -XXX,XX +XXX,XX @@ static const ARMCPUInfo arm_tcg_cpus[] = {
596
{ .name = "cortex-a8", .initfn = cortex_a8_initfn },
597
{ .name = "cortex-a9", .initfn = cortex_a9_initfn },
598
{ .name = "cortex-a15", .initfn = cortex_a15_initfn },
599
- { .name = "cortex-m0", .initfn = cortex_m0_initfn,
600
- .class_init = arm_v7m_class_init },
601
- { .name = "cortex-m3", .initfn = cortex_m3_initfn,
602
- .class_init = arm_v7m_class_init },
603
- { .name = "cortex-m4", .initfn = cortex_m4_initfn,
604
- .class_init = arm_v7m_class_init },
605
- { .name = "cortex-m7", .initfn = cortex_m7_initfn,
606
- .class_init = arm_v7m_class_init },
607
- { .name = "cortex-m33", .initfn = cortex_m33_initfn,
608
- .class_init = arm_v7m_class_init },
609
- { .name = "cortex-m55", .initfn = cortex_m55_initfn,
610
- .class_init = arm_v7m_class_init },
611
{ .name = "cortex-r5", .initfn = cortex_r5_initfn },
612
{ .name = "cortex-r5f", .initfn = cortex_r5f_initfn },
613
{ .name = "cortex-r52", .initfn = cortex_r52_initfn },
614
diff --git a/target/arm/meson.build b/target/arm/meson.build
63
index XXXXXXX..XXXXXXX 100644
615
index XXXXXXX..XXXXXXX 100644
64
--- a/target/arm/vec_helper.c
616
--- a/target/arm/meson.build
65
+++ b/target/arm/vec_helper.c
617
+++ b/target/arm/meson.build
66
@@ -XXX,XX +XXX,XX @@
618
@@ -XXX,XX +XXX,XX @@ arm_system_ss.add(files(
67
#define H4(x) (x)
619
'ptw.c',
68
#endif
620
))
69
621
70
-#define SET_QC() env->vfp.qc[0] = 1
622
+arm_user_ss = ss.source_set()
71
-
623
+
72
static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
624
subdir('hvf')
73
{
625
74
uint64_t *d = vd + opr_sz;
626
if 'CONFIG_TCG' in config_all_accel
75
@@ -XXX,XX +XXX,XX @@ static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
627
@@ -XXX,XX +XXX,XX @@ endif
76
}
628
77
629
target_arch += {'arm': arm_ss}
78
/* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */
630
target_system_arch += {'arm': arm_system_ss}
79
-static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1,
631
+target_user_arch += {'arm': arm_user_ss}
80
- int16_t src2, int16_t src3)
632
diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build
81
+static int16_t inl_qrdmlah_s16(int16_t src1, int16_t src2,
633
index XXXXXXX..XXXXXXX 100644
82
+ int16_t src3, uint32_t *sat)
634
--- a/target/arm/tcg/meson.build
83
{
635
+++ b/target/arm/tcg/meson.build
84
/* Simplify:
636
@@ -XXX,XX +XXX,XX @@ arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
85
* = ((a3 << 16) + ((e1 * e2) << 1) + (1 << 15)) >> 16
637
arm_system_ss.add(files(
86
@@ -XXX,XX +XXX,XX @@ static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1,
638
'psci.c',
87
ret = ((int32_t)src3 << 15) + ret + (1 << 14);
639
))
88
ret >>= 15;
640
+
89
if (ret != (int16_t)ret) {
641
+arm_system_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('cpu-v7m.c'))
90
- SET_QC();
642
+arm_user_ss.add(when: 'TARGET_AARCH64', if_false: files('cpu-v7m.c'))
91
+ *sat = 1;
92
ret = (ret < 0 ? -0x8000 : 0x7fff);
93
}
94
return ret;
95
@@ -XXX,XX +XXX,XX @@ static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1,
96
uint32_t HELPER(neon_qrdmlah_s16)(CPUARMState *env, uint32_t src1,
97
uint32_t src2, uint32_t src3)
98
{
99
- uint16_t e1 = inl_qrdmlah_s16(env, src1, src2, src3);
100
- uint16_t e2 = inl_qrdmlah_s16(env, src1 >> 16, src2 >> 16, src3 >> 16);
101
+ uint32_t *sat = &env->vfp.qc[0];
102
+ uint16_t e1 = inl_qrdmlah_s16(src1, src2, src3, sat);
103
+ uint16_t e2 = inl_qrdmlah_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat);
104
return deposit32(e1, 16, 16, e2);
105
}
106
107
void HELPER(gvec_qrdmlah_s16)(void *vd, void *vn, void *vm,
108
- void *ve, uint32_t desc)
109
+ void *vq, uint32_t desc)
110
{
111
uintptr_t opr_sz = simd_oprsz(desc);
112
int16_t *d = vd;
113
int16_t *n = vn;
114
int16_t *m = vm;
115
- CPUARMState *env = ve;
116
uintptr_t i;
117
118
for (i = 0; i < opr_sz / 2; ++i) {
119
- d[i] = inl_qrdmlah_s16(env, n[i], m[i], d[i]);
120
+ d[i] = inl_qrdmlah_s16(n[i], m[i], d[i], vq);
121
}
122
clear_tail(d, opr_sz, simd_maxsz(desc));
123
}
124
125
/* Signed saturating rounding doubling multiply-subtract high half, 16-bit */
126
-static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1,
127
- int16_t src2, int16_t src3)
128
+static int16_t inl_qrdmlsh_s16(int16_t src1, int16_t src2,
129
+ int16_t src3, uint32_t *sat)
130
{
131
/* Similarly, using subtraction:
132
* = ((a3 << 16) - ((e1 * e2) << 1) + (1 << 15)) >> 16
133
@@ -XXX,XX +XXX,XX @@ static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1,
134
ret = ((int32_t)src3 << 15) - ret + (1 << 14);
135
ret >>= 15;
136
if (ret != (int16_t)ret) {
137
- SET_QC();
138
+ *sat = 1;
139
ret = (ret < 0 ? -0x8000 : 0x7fff);
140
}
141
return ret;
142
@@ -XXX,XX +XXX,XX @@ static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1,
143
uint32_t HELPER(neon_qrdmlsh_s16)(CPUARMState *env, uint32_t src1,
144
uint32_t src2, uint32_t src3)
145
{
146
- uint16_t e1 = inl_qrdmlsh_s16(env, src1, src2, src3);
147
- uint16_t e2 = inl_qrdmlsh_s16(env, src1 >> 16, src2 >> 16, src3 >> 16);
148
+ uint32_t *sat = &env->vfp.qc[0];
149
+ uint16_t e1 = inl_qrdmlsh_s16(src1, src2, src3, sat);
150
+ uint16_t e2 = inl_qrdmlsh_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat);
151
return deposit32(e1, 16, 16, e2);
152
}
153
154
void HELPER(gvec_qrdmlsh_s16)(void *vd, void *vn, void *vm,
155
- void *ve, uint32_t desc)
156
+ void *vq, uint32_t desc)
157
{
158
uintptr_t opr_sz = simd_oprsz(desc);
159
int16_t *d = vd;
160
int16_t *n = vn;
161
int16_t *m = vm;
162
- CPUARMState *env = ve;
163
uintptr_t i;
164
165
for (i = 0; i < opr_sz / 2; ++i) {
166
- d[i] = inl_qrdmlsh_s16(env, n[i], m[i], d[i]);
167
+ d[i] = inl_qrdmlsh_s16(n[i], m[i], d[i], vq);
168
}
169
clear_tail(d, opr_sz, simd_maxsz(desc));
170
}
171
172
/* Signed saturating rounding doubling multiply-accumulate high half, 32-bit */
173
-uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1,
174
- int32_t src2, int32_t src3)
175
+static int32_t inl_qrdmlah_s32(int32_t src1, int32_t src2,
176
+ int32_t src3, uint32_t *sat)
177
{
178
/* Simplify similarly to int_qrdmlah_s16 above. */
179
int64_t ret = (int64_t)src1 * src2;
180
ret = ((int64_t)src3 << 31) + ret + (1 << 30);
181
ret >>= 31;
182
if (ret != (int32_t)ret) {
183
- SET_QC();
184
+ *sat = 1;
185
ret = (ret < 0 ? INT32_MIN : INT32_MAX);
186
}
187
return ret;
188
}
189
190
+uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1,
191
+ int32_t src2, int32_t src3)
192
+{
193
+ uint32_t *sat = &env->vfp.qc[0];
194
+ return inl_qrdmlah_s32(src1, src2, src3, sat);
195
+}
196
+
197
void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm,
198
- void *ve, uint32_t desc)
199
+ void *vq, uint32_t desc)
200
{
201
uintptr_t opr_sz = simd_oprsz(desc);
202
int32_t *d = vd;
203
int32_t *n = vn;
204
int32_t *m = vm;
205
- CPUARMState *env = ve;
206
uintptr_t i;
207
208
for (i = 0; i < opr_sz / 4; ++i) {
209
- d[i] = helper_neon_qrdmlah_s32(env, n[i], m[i], d[i]);
210
+ d[i] = inl_qrdmlah_s32(n[i], m[i], d[i], vq);
211
}
212
clear_tail(d, opr_sz, simd_maxsz(desc));
213
}
214
215
/* Signed saturating rounding doubling multiply-subtract high half, 32-bit */
216
-uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1,
217
- int32_t src2, int32_t src3)
218
+static int32_t inl_qrdmlsh_s32(int32_t src1, int32_t src2,
219
+ int32_t src3, uint32_t *sat)
220
{
221
/* Simplify similarly to int_qrdmlsh_s16 above. */
222
int64_t ret = (int64_t)src1 * src2;
223
ret = ((int64_t)src3 << 31) - ret + (1 << 30);
224
ret >>= 31;
225
if (ret != (int32_t)ret) {
226
- SET_QC();
227
+ *sat = 1;
228
ret = (ret < 0 ? INT32_MIN : INT32_MAX);
229
}
230
return ret;
231
}
232
233
+uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1,
234
+ int32_t src2, int32_t src3)
235
+{
236
+ uint32_t *sat = &env->vfp.qc[0];
237
+ return inl_qrdmlsh_s32(src1, src2, src3, sat);
238
+}
239
+
240
void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm,
241
- void *ve, uint32_t desc)
242
+ void *vq, uint32_t desc)
243
{
244
uintptr_t opr_sz = simd_oprsz(desc);
245
int32_t *d = vd;
246
int32_t *n = vn;
247
int32_t *m = vm;
248
- CPUARMState *env = ve;
249
uintptr_t i;
250
251
for (i = 0; i < opr_sz / 4; ++i) {
252
- d[i] = helper_neon_qrdmlsh_s32(env, n[i], m[i], d[i]);
253
+ d[i] = inl_qrdmlsh_s32(n[i], m[i], d[i], vq);
254
}
255
clear_tail(d, opr_sz, simd_maxsz(desc));
256
}
257
--
643
--
258
2.20.1
644
2.34.1
259
260
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
Must clear the tail for AdvSIMD when SVE is enabled.
4
5
Fixes: ca40a6e6e39
6
Cc: qemu-stable@nongnu.org
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20200513163245.17915-15-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
target/arm/vec_helper.c | 2 ++
13
1 file changed, 2 insertions(+)
14
15
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/vec_helper.c
18
+++ b/target/arm/vec_helper.c
19
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
20
d[i + j] = TYPE##_mul(n[i + j], mm, stat); \
21
} \
22
} \
23
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
24
}
25
26
DO_MUL_IDX(gvec_fmul_idx_h, float16, H2)
27
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
28
mm, a[i + j], 0, stat); \
29
} \
30
} \
31
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
32
}
33
34
DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2)
35
--
36
2.20.1
37
38
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
Include 64-bit element size in preparation for SVE2.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20200513163245.17915-16-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
10
target/arm/helper.h | 10 +++
11
target/arm/translate.h | 5 ++
12
target/arm/translate-a64.c | 8 ++-
13
target/arm/translate.c | 133 ++++++++++++++++++++++++++++++++++++-
14
target/arm/vec_helper.c | 24 +++++++
15
5 files changed, 176 insertions(+), 4 deletions(-)
16
17
diff --git a/target/arm/helper.h b/target/arm/helper.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper.h
20
+++ b/target/arm/helper.h
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(gvec_sli_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
22
DEF_HELPER_FLAGS_3(gvec_sli_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
23
DEF_HELPER_FLAGS_3(gvec_sli_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
24
25
+DEF_HELPER_FLAGS_4(gvec_sabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
26
+DEF_HELPER_FLAGS_4(gvec_sabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
27
+DEF_HELPER_FLAGS_4(gvec_sabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
28
+DEF_HELPER_FLAGS_4(gvec_sabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
29
+
30
+DEF_HELPER_FLAGS_4(gvec_uabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
31
+DEF_HELPER_FLAGS_4(gvec_uabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
32
+DEF_HELPER_FLAGS_4(gvec_uabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
33
+DEF_HELPER_FLAGS_4(gvec_uabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
34
+
35
#ifdef TARGET_AARCH64
36
#include "helper-a64.h"
37
#include "helper-sve.h"
38
diff --git a/target/arm/translate.h b/target/arm/translate.h
39
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/translate.h
41
+++ b/target/arm/translate.h
42
@@ -XXX,XX +XXX,XX @@ void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
43
void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
44
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
45
46
+void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
47
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
48
+void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
49
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
50
+
51
/*
52
* Forward to the isar_feature_* tests given a DisasContext pointer.
53
*/
54
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
55
index XXXXXXX..XXXXXXX 100644
56
--- a/target/arm/translate-a64.c
57
+++ b/target/arm/translate-a64.c
58
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
59
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
60
}
61
return;
62
+ case 0xe: /* SABD, UABD */
63
+ if (u) {
64
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size);
65
+ } else {
66
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
67
+ }
68
+ return;
69
case 0x10: /* ADD, SUB */
70
if (u) {
71
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
72
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
73
genenvfn = fns[size][u];
74
break;
75
}
76
- case 0xe: /* SABD, UABD */
77
case 0xf: /* SABA, UABA */
78
{
79
static NeonGenTwoOpFn * const fns[3][2] = {
80
diff --git a/target/arm/translate.c b/target/arm/translate.c
81
index XXXXXXX..XXXXXXX 100644
82
--- a/target/arm/translate.c
83
+++ b/target/arm/translate.c
84
@@ -XXX,XX +XXX,XX @@ void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
85
rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
86
}
87
88
+static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
89
+{
90
+ TCGv_i32 t = tcg_temp_new_i32();
91
+
92
+ tcg_gen_sub_i32(t, a, b);
93
+ tcg_gen_sub_i32(d, b, a);
94
+ tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
95
+ tcg_temp_free_i32(t);
96
+}
97
+
98
+static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
99
+{
100
+ TCGv_i64 t = tcg_temp_new_i64();
101
+
102
+ tcg_gen_sub_i64(t, a, b);
103
+ tcg_gen_sub_i64(d, b, a);
104
+ tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
105
+ tcg_temp_free_i64(t);
106
+}
107
+
108
+static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
109
+{
110
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
111
+
112
+ tcg_gen_smin_vec(vece, t, a, b);
113
+ tcg_gen_smax_vec(vece, d, a, b);
114
+ tcg_gen_sub_vec(vece, d, d, t);
115
+ tcg_temp_free_vec(t);
116
+}
117
+
118
+void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
119
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
120
+{
121
+ static const TCGOpcode vecop_list[] = {
122
+ INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
123
+ };
124
+ static const GVecGen3 ops[4] = {
125
+ { .fniv = gen_sabd_vec,
126
+ .fno = gen_helper_gvec_sabd_b,
127
+ .opt_opc = vecop_list,
128
+ .vece = MO_8 },
129
+ { .fniv = gen_sabd_vec,
130
+ .fno = gen_helper_gvec_sabd_h,
131
+ .opt_opc = vecop_list,
132
+ .vece = MO_16 },
133
+ { .fni4 = gen_sabd_i32,
134
+ .fniv = gen_sabd_vec,
135
+ .fno = gen_helper_gvec_sabd_s,
136
+ .opt_opc = vecop_list,
137
+ .vece = MO_32 },
138
+ { .fni8 = gen_sabd_i64,
139
+ .fniv = gen_sabd_vec,
140
+ .fno = gen_helper_gvec_sabd_d,
141
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
142
+ .opt_opc = vecop_list,
143
+ .vece = MO_64 },
144
+ };
145
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
146
+}
147
+
148
+static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
149
+{
150
+ TCGv_i32 t = tcg_temp_new_i32();
151
+
152
+ tcg_gen_sub_i32(t, a, b);
153
+ tcg_gen_sub_i32(d, b, a);
154
+ tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
155
+ tcg_temp_free_i32(t);
156
+}
157
+
158
+static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
159
+{
160
+ TCGv_i64 t = tcg_temp_new_i64();
161
+
162
+ tcg_gen_sub_i64(t, a, b);
163
+ tcg_gen_sub_i64(d, b, a);
164
+ tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
165
+ tcg_temp_free_i64(t);
166
+}
167
+
168
+static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
169
+{
170
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
171
+
172
+ tcg_gen_umin_vec(vece, t, a, b);
173
+ tcg_gen_umax_vec(vece, d, a, b);
174
+ tcg_gen_sub_vec(vece, d, d, t);
175
+ tcg_temp_free_vec(t);
176
+}
177
+
178
+void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
179
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
180
+{
181
+ static const TCGOpcode vecop_list[] = {
182
+ INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
183
+ };
184
+ static const GVecGen3 ops[4] = {
185
+ { .fniv = gen_uabd_vec,
186
+ .fno = gen_helper_gvec_uabd_b,
187
+ .opt_opc = vecop_list,
188
+ .vece = MO_8 },
189
+ { .fniv = gen_uabd_vec,
190
+ .fno = gen_helper_gvec_uabd_h,
191
+ .opt_opc = vecop_list,
192
+ .vece = MO_16 },
193
+ { .fni4 = gen_uabd_i32,
194
+ .fniv = gen_uabd_vec,
195
+ .fno = gen_helper_gvec_uabd_s,
196
+ .opt_opc = vecop_list,
197
+ .vece = MO_32 },
198
+ { .fni8 = gen_uabd_i64,
199
+ .fniv = gen_uabd_vec,
200
+ .fno = gen_helper_gvec_uabd_d,
201
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
202
+ .opt_opc = vecop_list,
203
+ .vece = MO_64 },
204
+ };
205
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
206
+}
207
+
208
/* Translate a NEON data processing instruction. Return nonzero if the
209
instruction is invalid.
210
We process data in a mixture of 32-bit and 64-bit chunks.
211
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
212
}
213
return 1;
214
215
+ case NEON_3R_VABD:
216
+ if (u) {
217
+ gen_gvec_uabd(size, rd_ofs, rn_ofs, rm_ofs,
218
+ vec_size, vec_size);
219
+ } else {
220
+ gen_gvec_sabd(size, rd_ofs, rn_ofs, rm_ofs,
221
+ vec_size, vec_size);
222
+ }
223
+ return 0;
224
+
225
case NEON_3R_VADD_VSUB:
226
case NEON_3R_LOGIC:
227
case NEON_3R_VMAX:
228
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
229
case NEON_3R_VQRSHL:
230
GEN_NEON_INTEGER_OP_ENV(qrshl);
231
break;
232
- case NEON_3R_VABD:
233
- GEN_NEON_INTEGER_OP(abd);
234
- break;
235
case NEON_3R_VABA:
236
GEN_NEON_INTEGER_OP(abd);
237
tcg_temp_free_i32(tmp2);
238
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
239
index XXXXXXX..XXXXXXX 100644
240
--- a/target/arm/vec_helper.c
241
+++ b/target/arm/vec_helper.c
242
@@ -XXX,XX +XXX,XX @@ DO_CMP0(gvec_cgt0_h, int16_t, >)
243
DO_CMP0(gvec_cge0_h, int16_t, >=)
244
245
#undef DO_CMP0
246
+
247
+#define DO_ABD(NAME, TYPE) \
248
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
249
+{ \
250
+ intptr_t i, opr_sz = simd_oprsz(desc); \
251
+ TYPE *d = vd, *n = vn, *m = vm; \
252
+ \
253
+ for (i = 0; i < opr_sz / sizeof(TYPE); ++i) { \
254
+ d[i] = n[i] < m[i] ? m[i] - n[i] : n[i] - m[i]; \
255
+ } \
256
+ clear_tail(d, opr_sz, simd_maxsz(desc)); \
257
+}
258
+
259
+DO_ABD(gvec_sabd_b, int8_t)
260
+DO_ABD(gvec_sabd_h, int16_t)
261
+DO_ABD(gvec_sabd_s, int32_t)
262
+DO_ABD(gvec_sabd_d, int64_t)
263
+
264
+DO_ABD(gvec_uabd_b, uint8_t)
265
+DO_ABD(gvec_uabd_h, uint16_t)
266
+DO_ABD(gvec_uabd_s, uint32_t)
267
+DO_ABD(gvec_uabd_d, uint64_t)
268
+
269
+#undef DO_ABD
270
--
271
2.20.1
272
273
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
Include 64-bit element size in preparation for SVE2.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20200513163245.17915-17-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
10
target/arm/helper.h | 17 +++--
11
target/arm/translate.h | 5 ++
12
target/arm/neon_helper.c | 10 ---
13
target/arm/translate-a64.c | 17 ++---
14
target/arm/translate.c | 134 +++++++++++++++++++++++++++++++++++--
15
target/arm/vec_helper.c | 24 +++++++
16
6 files changed, 174 insertions(+), 33 deletions(-)
17
18
diff --git a/target/arm/helper.h b/target/arm/helper.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/helper.h
21
+++ b/target/arm/helper.h
22
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_2(neon_pmax_s8, i32, i32, i32)
23
DEF_HELPER_2(neon_pmax_u16, i32, i32, i32)
24
DEF_HELPER_2(neon_pmax_s16, i32, i32, i32)
25
26
-DEF_HELPER_2(neon_abd_u8, i32, i32, i32)
27
-DEF_HELPER_2(neon_abd_s8, i32, i32, i32)
28
-DEF_HELPER_2(neon_abd_u16, i32, i32, i32)
29
-DEF_HELPER_2(neon_abd_s16, i32, i32, i32)
30
-DEF_HELPER_2(neon_abd_u32, i32, i32, i32)
31
-DEF_HELPER_2(neon_abd_s32, i32, i32, i32)
32
-
33
DEF_HELPER_2(neon_shl_u16, i32, i32, i32)
34
DEF_HELPER_2(neon_shl_s16, i32, i32, i32)
35
DEF_HELPER_2(neon_rshl_u8, i32, i32, i32)
36
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_uabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
37
DEF_HELPER_FLAGS_4(gvec_uabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
38
DEF_HELPER_FLAGS_4(gvec_uabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
39
40
+DEF_HELPER_FLAGS_4(gvec_saba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
41
+DEF_HELPER_FLAGS_4(gvec_saba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
42
+DEF_HELPER_FLAGS_4(gvec_saba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
43
+DEF_HELPER_FLAGS_4(gvec_saba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
44
+
45
+DEF_HELPER_FLAGS_4(gvec_uaba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
46
+DEF_HELPER_FLAGS_4(gvec_uaba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
47
+DEF_HELPER_FLAGS_4(gvec_uaba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
48
+DEF_HELPER_FLAGS_4(gvec_uaba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
49
+
50
#ifdef TARGET_AARCH64
51
#include "helper-a64.h"
52
#include "helper-sve.h"
53
diff --git a/target/arm/translate.h b/target/arm/translate.h
54
index XXXXXXX..XXXXXXX 100644
55
--- a/target/arm/translate.h
56
+++ b/target/arm/translate.h
57
@@ -XXX,XX +XXX,XX @@ void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
58
void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
59
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
60
61
+void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
62
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
63
+void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
64
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
65
+
66
/*
67
* Forward to the isar_feature_* tests given a DisasContext pointer.
68
*/
69
diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/target/arm/neon_helper.c
72
+++ b/target/arm/neon_helper.c
73
@@ -XXX,XX +XXX,XX @@ NEON_POP(pmax_s16, neon_s16, 2)
74
NEON_POP(pmax_u16, neon_u16, 2)
75
#undef NEON_FN
76
77
-#define NEON_FN(dest, src1, src2) \
78
- dest = (src1 > src2) ? (src1 - src2) : (src2 - src1)
79
-NEON_VOP(abd_s8, neon_s8, 4)
80
-NEON_VOP(abd_u8, neon_u8, 4)
81
-NEON_VOP(abd_s16, neon_s16, 2)
82
-NEON_VOP(abd_u16, neon_u16, 2)
83
-NEON_VOP(abd_s32, neon_s32, 1)
84
-NEON_VOP(abd_u32, neon_u32, 1)
85
-#undef NEON_FN
86
-
87
#define NEON_FN(dest, src1, src2) do { \
88
int8_t tmp; \
89
tmp = (int8_t)src2; \
90
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/arm/translate-a64.c
93
+++ b/target/arm/translate-a64.c
94
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
95
gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
96
}
97
return;
98
+ case 0xf: /* SABA, UABA */
99
+ if (u) {
100
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size);
101
+ } else {
102
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size);
103
+ }
104
+ return;
105
case 0x10: /* ADD, SUB */
106
if (u) {
107
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
108
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
109
genenvfn = fns[size][u];
110
break;
111
}
112
- case 0xf: /* SABA, UABA */
113
- {
114
- static NeonGenTwoOpFn * const fns[3][2] = {
115
- { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
116
- { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
117
- { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
118
- };
119
- genfn = fns[size][u];
120
- break;
121
- }
122
case 0x16: /* SQDMULH, SQRDMULH */
123
{
124
static NeonGenTwoOpEnvFn * const fns[2][2] = {
125
diff --git a/target/arm/translate.c b/target/arm/translate.c
126
index XXXXXXX..XXXXXXX 100644
127
--- a/target/arm/translate.c
128
+++ b/target/arm/translate.c
129
@@ -XXX,XX +XXX,XX @@ void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
130
tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
131
}
132
133
+static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
134
+{
135
+ TCGv_i32 t = tcg_temp_new_i32();
136
+ gen_sabd_i32(t, a, b);
137
+ tcg_gen_add_i32(d, d, t);
138
+ tcg_temp_free_i32(t);
139
+}
140
+
141
+static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
142
+{
143
+ TCGv_i64 t = tcg_temp_new_i64();
144
+ gen_sabd_i64(t, a, b);
145
+ tcg_gen_add_i64(d, d, t);
146
+ tcg_temp_free_i64(t);
147
+}
148
+
149
+static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
150
+{
151
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
152
+ gen_sabd_vec(vece, t, a, b);
153
+ tcg_gen_add_vec(vece, d, d, t);
154
+ tcg_temp_free_vec(t);
155
+}
156
+
157
+void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
158
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
159
+{
160
+ static const TCGOpcode vecop_list[] = {
161
+ INDEX_op_sub_vec, INDEX_op_add_vec,
162
+ INDEX_op_smin_vec, INDEX_op_smax_vec, 0
163
+ };
164
+ static const GVecGen3 ops[4] = {
165
+ { .fniv = gen_saba_vec,
166
+ .fno = gen_helper_gvec_saba_b,
167
+ .opt_opc = vecop_list,
168
+ .load_dest = true,
169
+ .vece = MO_8 },
170
+ { .fniv = gen_saba_vec,
171
+ .fno = gen_helper_gvec_saba_h,
172
+ .opt_opc = vecop_list,
173
+ .load_dest = true,
174
+ .vece = MO_16 },
175
+ { .fni4 = gen_saba_i32,
176
+ .fniv = gen_saba_vec,
177
+ .fno = gen_helper_gvec_saba_s,
178
+ .opt_opc = vecop_list,
179
+ .load_dest = true,
180
+ .vece = MO_32 },
181
+ { .fni8 = gen_saba_i64,
182
+ .fniv = gen_saba_vec,
183
+ .fno = gen_helper_gvec_saba_d,
184
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
185
+ .opt_opc = vecop_list,
186
+ .load_dest = true,
187
+ .vece = MO_64 },
188
+ };
189
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
190
+}
191
+
192
+static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
193
+{
194
+ TCGv_i32 t = tcg_temp_new_i32();
195
+ gen_uabd_i32(t, a, b);
196
+ tcg_gen_add_i32(d, d, t);
197
+ tcg_temp_free_i32(t);
198
+}
199
+
200
+static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
201
+{
202
+ TCGv_i64 t = tcg_temp_new_i64();
203
+ gen_uabd_i64(t, a, b);
204
+ tcg_gen_add_i64(d, d, t);
205
+ tcg_temp_free_i64(t);
206
+}
207
+
208
+static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
209
+{
210
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
211
+ gen_uabd_vec(vece, t, a, b);
212
+ tcg_gen_add_vec(vece, d, d, t);
213
+ tcg_temp_free_vec(t);
214
+}
215
+
216
+void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
217
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
218
+{
219
+ static const TCGOpcode vecop_list[] = {
220
+ INDEX_op_sub_vec, INDEX_op_add_vec,
221
+ INDEX_op_umin_vec, INDEX_op_umax_vec, 0
222
+ };
223
+ static const GVecGen3 ops[4] = {
224
+ { .fniv = gen_uaba_vec,
225
+ .fno = gen_helper_gvec_uaba_b,
226
+ .opt_opc = vecop_list,
227
+ .load_dest = true,
228
+ .vece = MO_8 },
229
+ { .fniv = gen_uaba_vec,
230
+ .fno = gen_helper_gvec_uaba_h,
231
+ .opt_opc = vecop_list,
232
+ .load_dest = true,
233
+ .vece = MO_16 },
234
+ { .fni4 = gen_uaba_i32,
235
+ .fniv = gen_uaba_vec,
236
+ .fno = gen_helper_gvec_uaba_s,
237
+ .opt_opc = vecop_list,
238
+ .load_dest = true,
239
+ .vece = MO_32 },
240
+ { .fni8 = gen_uaba_i64,
241
+ .fniv = gen_uaba_vec,
242
+ .fno = gen_helper_gvec_uaba_d,
243
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
244
+ .opt_opc = vecop_list,
245
+ .load_dest = true,
246
+ .vece = MO_64 },
247
+ };
248
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
249
+}
250
+
251
/* Translate a NEON data processing instruction. Return nonzero if the
252
instruction is invalid.
253
We process data in a mixture of 32-bit and 64-bit chunks.
254
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
255
}
256
return 0;
257
258
+ case NEON_3R_VABA:
259
+ if (u) {
260
+ gen_gvec_uaba(size, rd_ofs, rn_ofs, rm_ofs,
261
+ vec_size, vec_size);
262
+ } else {
263
+ gen_gvec_saba(size, rd_ofs, rn_ofs, rm_ofs,
264
+ vec_size, vec_size);
265
+ }
266
+ return 0;
267
+
268
case NEON_3R_VADD_VSUB:
269
case NEON_3R_LOGIC:
270
case NEON_3R_VMAX:
271
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
272
case NEON_3R_VQRSHL:
273
GEN_NEON_INTEGER_OP_ENV(qrshl);
274
break;
275
- case NEON_3R_VABA:
276
- GEN_NEON_INTEGER_OP(abd);
277
- tcg_temp_free_i32(tmp2);
278
- tmp2 = neon_load_reg(rd, pass);
279
- gen_neon_add(size, tmp, tmp2);
280
- break;
281
case NEON_3R_VPMAX:
282
GEN_NEON_INTEGER_OP(pmax);
283
break;
284
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
285
index XXXXXXX..XXXXXXX 100644
286
--- a/target/arm/vec_helper.c
287
+++ b/target/arm/vec_helper.c
288
@@ -XXX,XX +XXX,XX @@ DO_ABD(gvec_uabd_s, uint32_t)
289
DO_ABD(gvec_uabd_d, uint64_t)
290
291
#undef DO_ABD
292
+
293
+#define DO_ABA(NAME, TYPE) \
294
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
295
+{ \
296
+ intptr_t i, opr_sz = simd_oprsz(desc); \
297
+ TYPE *d = vd, *n = vn, *m = vm; \
298
+ \
299
+ for (i = 0; i < opr_sz / sizeof(TYPE); ++i) { \
300
+ d[i] += n[i] < m[i] ? m[i] - n[i] : n[i] - m[i]; \
301
+ } \
302
+ clear_tail(d, opr_sz, simd_maxsz(desc)); \
303
+}
304
+
305
+DO_ABA(gvec_saba_b, int8_t)
306
+DO_ABA(gvec_saba_h, int16_t)
307
+DO_ABA(gvec_saba_s, int32_t)
308
+DO_ABA(gvec_saba_d, int64_t)
309
+
310
+DO_ABA(gvec_uaba_b, uint8_t)
311
+DO_ABA(gvec_uaba_h, uint16_t)
312
+DO_ABA(gvec_uaba_s, uint32_t)
313
+DO_ABA(gvec_uaba_d, uint64_t)
314
+
315
+#undef DO_ABA
316
--
317
2.20.1
318
319
diff view generated by jsdifflib
Deleted patch
1
From: Patrick Williams <patrick@stwcx.xyz>
2
1
3
Sonora Pass is a 2 socket x86 motherboard designed by Facebook
4
and supported by OpenBMC. Strapping configuration was obtained
5
from hardware and i2c configuration is based on dts found at:
6
7
https://github.com/facebook/openbmc-linux/blob/1633c87b8ba7c162095787c988979b748ba65dc8/arch/arm/boot/dts/aspeed-bmc-facebook-sonorapass.dts
8
9
Booted a test image of http://github.com/facebook/openbmc to login
10
prompt.
11
12
Signed-off-by: Patrick Williams <patrick@stwcx.xyz>
13
Reviewed-by: Amithash Prasad <amithash@fb.com>
14
Reviewed-by: Cédric Le Goater <clg@kaod.org>
15
[PMM: fixed block comment style nit]
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
---
18
hw/arm/aspeed.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++
19
1 file changed, 78 insertions(+)
20
21
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/hw/arm/aspeed.c
24
+++ b/hw/arm/aspeed.c
25
@@ -XXX,XX +XXX,XX @@ struct AspeedBoardState {
26
SCU_AST2500_HW_STRAP_ACPI_ENABLE | \
27
SCU_HW_STRAP_SPI_MODE(SCU_HW_STRAP_SPI_MASTER))
28
29
+/* Sonorapass hardware value: 0xF100D216 */
30
+#define SONORAPASS_BMC_HW_STRAP1 ( \
31
+ SCU_AST2500_HW_STRAP_SPI_AUTOFETCH_ENABLE | \
32
+ SCU_AST2500_HW_STRAP_GPIO_STRAP_ENABLE | \
33
+ SCU_AST2500_HW_STRAP_UART_DEBUG | \
34
+ SCU_AST2500_HW_STRAP_RESERVED28 | \
35
+ SCU_AST2500_HW_STRAP_DDR4_ENABLE | \
36
+ SCU_HW_STRAP_VGA_CLASS_CODE | \
37
+ SCU_HW_STRAP_LPC_RESET_PIN | \
38
+ SCU_HW_STRAP_SPI_MODE(SCU_HW_STRAP_SPI_MASTER) | \
39
+ SCU_AST2500_HW_STRAP_SET_AXI_AHB_RATIO(AXI_AHB_RATIO_2_1) | \
40
+ SCU_HW_STRAP_VGA_BIOS_ROM | \
41
+ SCU_HW_STRAP_VGA_SIZE_SET(VGA_16M_DRAM) | \
42
+ SCU_AST2500_HW_STRAP_RESERVED1)
43
+
44
/* Swift hardware value: 0xF11AD206 */
45
#define SWIFT_BMC_HW_STRAP1 ( \
46
AST2500_HW_STRAP1_DEFAULTS | \
47
@@ -XXX,XX +XXX,XX @@ static void swift_bmc_i2c_init(AspeedBoardState *bmc)
48
i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 12), "tmp105", 0x4a);
49
}
50
51
+static void sonorapass_bmc_i2c_init(AspeedBoardState *bmc)
52
+{
53
+ AspeedSoCState *soc = &bmc->soc;
54
+
55
+ /* bus 2 : */
56
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 2), "tmp105", 0x48);
57
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 2), "tmp105", 0x49);
58
+ /* bus 2 : pca9546 @ 0x73 */
59
+
60
+ /* bus 3 : pca9548 @ 0x70 */
61
+
62
+ /* bus 4 : */
63
+ uint8_t *eeprom4_54 = g_malloc0(8 * 1024);
64
+ smbus_eeprom_init_one(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 4), 0x54,
65
+ eeprom4_54);
66
+ /* PCA9539 @ 0x76, but PCA9552 is compatible */
67
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 4), "pca9552", 0x76);
68
+ /* PCA9539 @ 0x77, but PCA9552 is compatible */
69
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 4), "pca9552", 0x77);
70
+
71
+ /* bus 6 : */
72
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 6), "tmp105", 0x48);
73
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 6), "tmp105", 0x49);
74
+ /* bus 6 : pca9546 @ 0x73 */
75
+
76
+ /* bus 8 : */
77
+ uint8_t *eeprom8_56 = g_malloc0(8 * 1024);
78
+ smbus_eeprom_init_one(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 8), 0x56,
79
+ eeprom8_56);
80
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 8), "pca9552", 0x60);
81
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 8), "pca9552", 0x61);
82
+ /* bus 8 : adc128d818 @ 0x1d */
83
+ /* bus 8 : adc128d818 @ 0x1f */
84
+
85
+ /*
86
+ * bus 13 : pca9548 @ 0x71
87
+ * - channel 3:
88
+ * - tmm421 @ 0x4c
89
+ * - tmp421 @ 0x4e
90
+ * - tmp421 @ 0x4f
91
+ */
92
+
93
+}
94
+
95
static void witherspoon_bmc_i2c_init(AspeedBoardState *bmc)
96
{
97
AspeedSoCState *soc = &bmc->soc;
98
@@ -XXX,XX +XXX,XX @@ static void aspeed_machine_romulus_class_init(ObjectClass *oc, void *data)
99
mc->default_ram_size = 512 * MiB;
100
};
101
102
+static void aspeed_machine_sonorapass_class_init(ObjectClass *oc, void *data)
103
+{
104
+ MachineClass *mc = MACHINE_CLASS(oc);
105
+ AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
106
+
107
+ mc->desc = "OCP SonoraPass BMC (ARM1176)";
108
+ amc->soc_name = "ast2500-a1";
109
+ amc->hw_strap1 = SONORAPASS_BMC_HW_STRAP1;
110
+ amc->fmc_model = "mx66l1g45g";
111
+ amc->spi_model = "mx66l1g45g";
112
+ amc->num_cs = 2;
113
+ amc->i2c_init = sonorapass_bmc_i2c_init;
114
+ mc->default_ram_size = 512 * MiB;
115
+};
116
+
117
static void aspeed_machine_swift_class_init(ObjectClass *oc, void *data)
118
{
119
MachineClass *mc = MACHINE_CLASS(oc);
120
@@ -XXX,XX +XXX,XX @@ static const TypeInfo aspeed_machine_types[] = {
121
.name = MACHINE_TYPE_NAME("swift-bmc"),
122
.parent = TYPE_ASPEED_MACHINE,
123
.class_init = aspeed_machine_swift_class_init,
124
+ }, {
125
+ .name = MACHINE_TYPE_NAME("sonorapass-bmc"),
126
+ .parent = TYPE_ASPEED_MACHINE,
127
+ .class_init = aspeed_machine_sonorapass_class_init,
128
}, {
129
.name = MACHINE_TYPE_NAME("witherspoon-bmc"),
130
.parent = TYPE_ASPEED_MACHINE,
131
--
132
2.20.1
133
134
diff view generated by jsdifflib
Deleted patch
1
From: Dongjiu Geng <gengdongjiu@huawei.com>
2
1
3
The little end UUID is used in many places, so make
4
NVDIMM_UUID_LE to a common macro to convert the UUID
5
to a little end array.
6
7
Reviewed-by: Xiang Zheng <zhengxiang9@huawei.com>
8
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
9
Message-id: 20200512030609.19593-2-gengdongjiu@huawei.com
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
13
include/qemu/uuid.h | 27 +++++++++++++++++++++++++++
14
hw/acpi/nvdimm.c | 10 +++-------
15
2 files changed, 30 insertions(+), 7 deletions(-)
16
17
diff --git a/include/qemu/uuid.h b/include/qemu/uuid.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/include/qemu/uuid.h
20
+++ b/include/qemu/uuid.h
21
@@ -XXX,XX +XXX,XX @@ typedef struct {
22
};
23
} QemuUUID;
24
25
+/**
26
+ * UUID_LE - converts the fields of UUID to little-endian array,
27
+ * each of parameters is the filed of UUID.
28
+ *
29
+ * @time_low: The low field of the timestamp
30
+ * @time_mid: The middle field of the timestamp
31
+ * @time_hi_and_version: The high field of the timestamp
32
+ * multiplexed with the version number
33
+ * @clock_seq_hi_and_reserved: The high field of the clock
34
+ * sequence multiplexed with the variant
35
+ * @clock_seq_low: The low field of the clock sequence
36
+ * @node0: The spatially unique node0 identifier
37
+ * @node1: The spatially unique node1 identifier
38
+ * @node2: The spatially unique node2 identifier
39
+ * @node3: The spatially unique node3 identifier
40
+ * @node4: The spatially unique node4 identifier
41
+ * @node5: The spatially unique node5 identifier
42
+ */
43
+#define UUID_LE(time_low, time_mid, time_hi_and_version, \
44
+ clock_seq_hi_and_reserved, clock_seq_low, node0, node1, node2, \
45
+ node3, node4, node5) \
46
+ { (time_low) & 0xff, ((time_low) >> 8) & 0xff, ((time_low) >> 16) & 0xff, \
47
+ ((time_low) >> 24) & 0xff, (time_mid) & 0xff, ((time_mid) >> 8) & 0xff, \
48
+ (time_hi_and_version) & 0xff, ((time_hi_and_version) >> 8) & 0xff, \
49
+ (clock_seq_hi_and_reserved), (clock_seq_low), (node0), (node1), (node2),\
50
+ (node3), (node4), (node5) }
51
+
52
#define UUID_FMT "%02hhx%02hhx%02hhx%02hhx-" \
53
"%02hhx%02hhx-%02hhx%02hhx-" \
54
"%02hhx%02hhx-" \
55
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/hw/acpi/nvdimm.c
58
+++ b/hw/acpi/nvdimm.c
59
@@ -XXX,XX +XXX,XX @@
60
*/
61
62
#include "qemu/osdep.h"
63
+#include "qemu/uuid.h"
64
#include "hw/acpi/acpi.h"
65
#include "hw/acpi/aml-build.h"
66
#include "hw/acpi/bios-linker-loader.h"
67
@@ -XXX,XX +XXX,XX @@
68
#include "hw/mem/nvdimm.h"
69
#include "qemu/nvdimm-utils.h"
70
71
-#define NVDIMM_UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
72
- { (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
73
- (b) & 0xff, ((b) >> 8) & 0xff, (c) & 0xff, ((c) >> 8) & 0xff, \
74
- (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }
75
-
76
/*
77
* define Byte Addressable Persistent Memory (PM) Region according to
78
* ACPI 6.0: 5.2.25.1 System Physical Address Range Structure.
79
*/
80
static const uint8_t nvdimm_nfit_spa_uuid[] =
81
- NVDIMM_UUID_LE(0x66f0d379, 0xb4f3, 0x4074, 0xac, 0x43, 0x0d, 0x33,
82
- 0x18, 0xb7, 0x8c, 0xdb);
83
+ UUID_LE(0x66f0d379, 0xb4f3, 0x4074, 0xac, 0x43, 0x0d, 0x33,
84
+ 0x18, 0xb7, 0x8c, 0xdb);
85
86
/*
87
* NVDIMM Firmware Interface Table
88
--
89
2.20.1
90
91
diff view generated by jsdifflib
Deleted patch
1
From: Dongjiu Geng <gengdongjiu@huawei.com>
2
1
3
RAS Virtualization feature is not supported now, so
4
add a RAS machine option and disable it by default.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
8
Signed-off-by: Xiang Zheng <zhengxiang9@huawei.com>
9
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
10
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
11
Message-id: 20200512030609.19593-3-gengdongjiu@huawei.com
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
14
include/hw/arm/virt.h | 1 +
15
hw/arm/virt.c | 23 +++++++++++++++++++++++
16
2 files changed, 24 insertions(+)
17
18
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/include/hw/arm/virt.h
21
+++ b/include/hw/arm/virt.h
22
@@ -XXX,XX +XXX,XX @@ typedef struct {
23
bool highmem_ecam;
24
bool its;
25
bool virt;
26
+ bool ras;
27
OnOffAuto acpi;
28
VirtGICType gic_version;
29
VirtIOMMUType iommu;
30
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/hw/arm/virt.c
33
+++ b/hw/arm/virt.c
34
@@ -XXX,XX +XXX,XX @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name,
35
visit_type_OnOffAuto(v, name, &vms->acpi, errp);
36
}
37
38
+static bool virt_get_ras(Object *obj, Error **errp)
39
+{
40
+ VirtMachineState *vms = VIRT_MACHINE(obj);
41
+
42
+ return vms->ras;
43
+}
44
+
45
+static void virt_set_ras(Object *obj, bool value, Error **errp)
46
+{
47
+ VirtMachineState *vms = VIRT_MACHINE(obj);
48
+
49
+ vms->ras = value;
50
+}
51
+
52
static char *virt_get_gic_version(Object *obj, Error **errp)
53
{
54
VirtMachineState *vms = VIRT_MACHINE(obj);
55
@@ -XXX,XX +XXX,XX @@ static void virt_instance_init(Object *obj)
56
"Valid values are none and smmuv3",
57
NULL);
58
59
+ /* Default disallows RAS instantiation */
60
+ vms->ras = false;
61
+ object_property_add_bool(obj, "ras", virt_get_ras,
62
+ virt_set_ras, NULL);
63
+ object_property_set_description(obj, "ras",
64
+ "Set on/off to enable/disable reporting host memory errors "
65
+ "to a KVM guest using ACPI and guest external abort exceptions",
66
+ NULL);
67
+
68
vms->irqmap = a15irqmap;
69
70
virt_flash_create(vms);
71
--
72
2.20.1
73
74
diff view generated by jsdifflib
Deleted patch
1
From: Dongjiu Geng <gengdongjiu@huawei.com>
2
1
3
kvm_hwpoison_page_add() and kvm_unpoison_all() will both
4
be used by X86 and ARM platforms, so moving them into
5
"accel/kvm/kvm-all.c" to avoid duplicate code.
6
7
For architectures that don't use the poison-list functionality
8
the reset handler will harmlessly do nothing, so let's register
9
the kvm_unpoison_all() function in the generic kvm_init() function.
10
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
13
Signed-off-by: Xiang Zheng <zhengxiang9@huawei.com>
14
Acked-by: Xiang Zheng <zhengxiang9@huawei.com>
15
Message-id: 20200512030609.19593-8-gengdongjiu@huawei.com
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
---
18
include/sysemu/kvm_int.h | 12 ++++++++++++
19
accel/kvm/kvm-all.c | 36 ++++++++++++++++++++++++++++++++++++
20
target/i386/kvm.c | 36 ------------------------------------
21
3 files changed, 48 insertions(+), 36 deletions(-)
22
23
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
24
index XXXXXXX..XXXXXXX 100644
25
--- a/include/sysemu/kvm_int.h
26
+++ b/include/sysemu/kvm_int.h
27
@@ -XXX,XX +XXX,XX @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
28
AddressSpace *as, int as_id);
29
30
void kvm_set_max_memslot_size(hwaddr max_slot_size);
31
+
32
+/**
33
+ * kvm_hwpoison_page_add:
34
+ *
35
+ * Parameters:
36
+ * @ram_addr: the address in the RAM for the poisoned page
37
+ *
38
+ * Add a poisoned page to the list
39
+ *
40
+ * Return: None.
41
+ */
42
+void kvm_hwpoison_page_add(ram_addr_t ram_addr);
43
#endif
44
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/accel/kvm/kvm-all.c
47
+++ b/accel/kvm/kvm-all.c
48
@@ -XXX,XX +XXX,XX @@
49
#include "qapi/visitor.h"
50
#include "qapi/qapi-types-common.h"
51
#include "qapi/qapi-visit-common.h"
52
+#include "sysemu/reset.h"
53
54
#include "hw/boards.h"
55
56
@@ -XXX,XX +XXX,XX @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension)
57
return ret;
58
}
59
60
+typedef struct HWPoisonPage {
61
+ ram_addr_t ram_addr;
62
+ QLIST_ENTRY(HWPoisonPage) list;
63
+} HWPoisonPage;
64
+
65
+static QLIST_HEAD(, HWPoisonPage) hwpoison_page_list =
66
+ QLIST_HEAD_INITIALIZER(hwpoison_page_list);
67
+
68
+static void kvm_unpoison_all(void *param)
69
+{
70
+ HWPoisonPage *page, *next_page;
71
+
72
+ QLIST_FOREACH_SAFE(page, &hwpoison_page_list, list, next_page) {
73
+ QLIST_REMOVE(page, list);
74
+ qemu_ram_remap(page->ram_addr, TARGET_PAGE_SIZE);
75
+ g_free(page);
76
+ }
77
+}
78
+
79
+void kvm_hwpoison_page_add(ram_addr_t ram_addr)
80
+{
81
+ HWPoisonPage *page;
82
+
83
+ QLIST_FOREACH(page, &hwpoison_page_list, list) {
84
+ if (page->ram_addr == ram_addr) {
85
+ return;
86
+ }
87
+ }
88
+ page = g_new(HWPoisonPage, 1);
89
+ page->ram_addr = ram_addr;
90
+ QLIST_INSERT_HEAD(&hwpoison_page_list, page, list);
91
+}
92
+
93
static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size)
94
{
95
#if defined(HOST_WORDS_BIGENDIAN) != defined(TARGET_WORDS_BIGENDIAN)
96
@@ -XXX,XX +XXX,XX @@ static int kvm_init(MachineState *ms)
97
s->kernel_irqchip_split = mc->default_kernel_irqchip_split ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
98
}
99
100
+ qemu_register_reset(kvm_unpoison_all, NULL);
101
+
102
if (s->kernel_irqchip_allowed) {
103
kvm_irqchip_create(s);
104
}
105
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
106
index XXXXXXX..XXXXXXX 100644
107
--- a/target/i386/kvm.c
108
+++ b/target/i386/kvm.c
109
@@ -XXX,XX +XXX,XX @@
110
#include "sysemu/sysemu.h"
111
#include "sysemu/hw_accel.h"
112
#include "sysemu/kvm_int.h"
113
-#include "sysemu/reset.h"
114
#include "sysemu/runstate.h"
115
#include "kvm_i386.h"
116
#include "hyperv.h"
117
@@ -XXX,XX +XXX,XX @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index)
118
}
119
}
120
121
-
122
-typedef struct HWPoisonPage {
123
- ram_addr_t ram_addr;
124
- QLIST_ENTRY(HWPoisonPage) list;
125
-} HWPoisonPage;
126
-
127
-static QLIST_HEAD(, HWPoisonPage) hwpoison_page_list =
128
- QLIST_HEAD_INITIALIZER(hwpoison_page_list);
129
-
130
-static void kvm_unpoison_all(void *param)
131
-{
132
- HWPoisonPage *page, *next_page;
133
-
134
- QLIST_FOREACH_SAFE(page, &hwpoison_page_list, list, next_page) {
135
- QLIST_REMOVE(page, list);
136
- qemu_ram_remap(page->ram_addr, TARGET_PAGE_SIZE);
137
- g_free(page);
138
- }
139
-}
140
-
141
-static void kvm_hwpoison_page_add(ram_addr_t ram_addr)
142
-{
143
- HWPoisonPage *page;
144
-
145
- QLIST_FOREACH(page, &hwpoison_page_list, list) {
146
- if (page->ram_addr == ram_addr) {
147
- return;
148
- }
149
- }
150
- page = g_new(HWPoisonPage, 1);
151
- page->ram_addr = ram_addr;
152
- QLIST_INSERT_HEAD(&hwpoison_page_list, page, list);
153
-}
154
-
155
static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap,
156
int *max_banks)
157
{
158
@@ -XXX,XX +XXX,XX @@ int kvm_arch_init(MachineState *ms, KVMState *s)
159
fprintf(stderr, "e820_add_entry() table is full\n");
160
return ret;
161
}
162
- qemu_register_reset(kvm_unpoison_all, NULL);
163
164
shadow_mem = object_property_get_int(OBJECT(s), "kvm-shadow-mem", &error_abort);
165
if (shadow_mem != -1) {
166
--
167
2.20.1
168
169
diff view generated by jsdifflib
Deleted patch
1
From: Dongjiu Geng <gengdongjiu@huawei.com>
2
1
3
kvm_arch_on_sigbus_vcpu() error injection uses source_id as
4
index in etc/hardware_errors to find out Error Status Data
5
Block entry corresponding to error source. So supported source_id
6
values should be assigned here and not be changed afterwards to
7
make sure that guest will write error into expected Error Status
8
Data Block.
9
10
Before QEMU writes a new error to ACPI table, it will check whether
11
previous error has been acknowledged. If not acknowledged, the new
12
errors will be ignored and not be recorded. For the errors section
13
type, QEMU simulate it to memory section error.
14
15
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
16
Signed-off-by: Xiang Zheng <zhengxiang9@huawei.com>
17
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
18
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
19
Message-id: 20200512030609.19593-9-gengdongjiu@huawei.com
20
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
21
---
22
include/hw/acpi/ghes.h | 1 +
23
hw/acpi/ghes.c | 219 +++++++++++++++++++++++++++++++++++++++++
24
2 files changed, 220 insertions(+)
25
26
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
27
index XXXXXXX..XXXXXXX 100644
28
--- a/include/hw/acpi/ghes.h
29
+++ b/include/hw/acpi/ghes.h
30
@@ -XXX,XX +XXX,XX @@ void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker);
31
void acpi_build_hest(GArray *table_data, BIOSLinker *linker);
32
void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
33
GArray *hardware_errors);
34
+int acpi_ghes_record_errors(uint8_t notify, uint64_t error_physical_addr);
35
#endif
36
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/hw/acpi/ghes.c
39
+++ b/hw/acpi/ghes.c
40
@@ -XXX,XX +XXX,XX @@
41
#include "qemu/error-report.h"
42
#include "hw/acpi/generic_event_device.h"
43
#include "hw/nvram/fw_cfg.h"
44
+#include "qemu/uuid.h"
45
46
#define ACPI_GHES_ERRORS_FW_CFG_FILE "etc/hardware_errors"
47
#define ACPI_GHES_DATA_ADDR_FW_CFG_FILE "etc/hardware_errors_addr"
48
@@ -XXX,XX +XXX,XX @@
49
/* Address offset in Generic Address Structure(GAS) */
50
#define GAS_ADDR_OFFSET 4
51
52
+/*
53
+ * The total size of Generic Error Data Entry
54
+ * ACPI 6.1/6.2: 18.3.2.7.1 Generic Error Data,
55
+ * Table 18-343 Generic Error Data Entry
56
+ */
57
+#define ACPI_GHES_DATA_LENGTH 72
58
+
59
+/* The memory section CPER size, UEFI 2.6: N.2.5 Memory Error Section */
60
+#define ACPI_GHES_MEM_CPER_LENGTH 80
61
+
62
+/* Masks for block_status flags */
63
+#define ACPI_GEBS_UNCORRECTABLE 1
64
+
65
+/*
66
+ * Total size for Generic Error Status Block except Generic Error Data Entries
67
+ * ACPI 6.2: 18.3.2.7.1 Generic Error Data,
68
+ * Table 18-380 Generic Error Status Block
69
+ */
70
+#define ACPI_GHES_GESB_SIZE 20
71
+
72
+/*
73
+ * Values for error_severity field
74
+ */
75
+enum AcpiGenericErrorSeverity {
76
+ ACPI_CPER_SEV_RECOVERABLE = 0,
77
+ ACPI_CPER_SEV_FATAL = 1,
78
+ ACPI_CPER_SEV_CORRECTED = 2,
79
+ ACPI_CPER_SEV_NONE = 3,
80
+};
81
+
82
/*
83
* Hardware Error Notification
84
* ACPI 4.0: 17.3.2.7 Hardware Error Notification
85
@@ -XXX,XX +XXX,XX @@ static void build_ghes_hw_error_notification(GArray *table, const uint8_t type)
86
build_append_int_noprefix(table, 0, 4);
87
}
88
89
+/*
90
+ * Generic Error Data Entry
91
+ * ACPI 6.1: 18.3.2.7.1 Generic Error Data
92
+ */
93
+static void acpi_ghes_generic_error_data(GArray *table,
94
+ const uint8_t *section_type, uint32_t error_severity,
95
+ uint8_t validation_bits, uint8_t flags,
96
+ uint32_t error_data_length, QemuUUID fru_id,
97
+ uint64_t time_stamp)
98
+{
99
+ const uint8_t fru_text[20] = {0};
100
+
101
+ /* Section Type */
102
+ g_array_append_vals(table, section_type, 16);
103
+
104
+ /* Error Severity */
105
+ build_append_int_noprefix(table, error_severity, 4);
106
+ /* Revision */
107
+ build_append_int_noprefix(table, 0x300, 2);
108
+ /* Validation Bits */
109
+ build_append_int_noprefix(table, validation_bits, 1);
110
+ /* Flags */
111
+ build_append_int_noprefix(table, flags, 1);
112
+ /* Error Data Length */
113
+ build_append_int_noprefix(table, error_data_length, 4);
114
+
115
+ /* FRU Id */
116
+ g_array_append_vals(table, fru_id.data, ARRAY_SIZE(fru_id.data));
117
+
118
+ /* FRU Text */
119
+ g_array_append_vals(table, fru_text, sizeof(fru_text));
120
+
121
+ /* Timestamp */
122
+ build_append_int_noprefix(table, time_stamp, 8);
123
+}
124
+
125
+/*
126
+ * Generic Error Status Block
127
+ * ACPI 6.1: 18.3.2.7.1 Generic Error Data
128
+ */
129
+static void acpi_ghes_generic_error_status(GArray *table, uint32_t block_status,
130
+ uint32_t raw_data_offset, uint32_t raw_data_length,
131
+ uint32_t data_length, uint32_t error_severity)
132
+{
133
+ /* Block Status */
134
+ build_append_int_noprefix(table, block_status, 4);
135
+ /* Raw Data Offset */
136
+ build_append_int_noprefix(table, raw_data_offset, 4);
137
+ /* Raw Data Length */
138
+ build_append_int_noprefix(table, raw_data_length, 4);
139
+ /* Data Length */
140
+ build_append_int_noprefix(table, data_length, 4);
141
+ /* Error Severity */
142
+ build_append_int_noprefix(table, error_severity, 4);
143
+}
144
+
145
+/* UEFI 2.6: N.2.5 Memory Error Section */
146
+static void acpi_ghes_build_append_mem_cper(GArray *table,
147
+ uint64_t error_physical_addr)
148
+{
149
+ /*
150
+ * Memory Error Record
151
+ */
152
+
153
+ /* Validation Bits */
154
+ build_append_int_noprefix(table,
155
+ (1ULL << 14) | /* Type Valid */
156
+ (1ULL << 1) /* Physical Address Valid */,
157
+ 8);
158
+ /* Error Status */
159
+ build_append_int_noprefix(table, 0, 8);
160
+ /* Physical Address */
161
+ build_append_int_noprefix(table, error_physical_addr, 8);
162
+ /* Skip all the detailed information normally found in such a record */
163
+ build_append_int_noprefix(table, 0, 48);
164
+ /* Memory Error Type */
165
+ build_append_int_noprefix(table, 0 /* Unknown error */, 1);
166
+ /* Skip all the detailed information normally found in such a record */
167
+ build_append_int_noprefix(table, 0, 7);
168
+}
169
+
170
+static int acpi_ghes_record_mem_error(uint64_t error_block_address,
171
+ uint64_t error_physical_addr)
172
+{
173
+ GArray *block;
174
+
175
+ /* Memory Error Section Type */
176
+ const uint8_t uefi_cper_mem_sec[] =
177
+ UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \
178
+ 0xED, 0x7C, 0x83, 0xB1);
179
+
180
+ /* invalid fru id: ACPI 4.0: 17.3.2.6.1 Generic Error Data,
181
+ * Table 17-13 Generic Error Data Entry
182
+ */
183
+ QemuUUID fru_id = {};
184
+ uint32_t data_length;
185
+
186
+ block = g_array_new(false, true /* clear */, 1);
187
+
188
+ /* This is the length if adding a new generic error data entry*/
189
+ data_length = ACPI_GHES_DATA_LENGTH + ACPI_GHES_MEM_CPER_LENGTH;
190
+
191
+ /*
192
+ * Check whether it will run out of the preallocated memory if adding a new
193
+ * generic error data entry
194
+ */
195
+ if ((data_length + ACPI_GHES_GESB_SIZE) > ACPI_GHES_MAX_RAW_DATA_LENGTH) {
196
+ error_report("Not enough memory to record new CPER!!!");
197
+ g_array_free(block, true);
198
+ return -1;
199
+ }
200
+
201
+ /* Build the new generic error status block header */
202
+ acpi_ghes_generic_error_status(block, ACPI_GEBS_UNCORRECTABLE,
203
+ 0, 0, data_length, ACPI_CPER_SEV_RECOVERABLE);
204
+
205
+ /* Build this new generic error data entry header */
206
+ acpi_ghes_generic_error_data(block, uefi_cper_mem_sec,
207
+ ACPI_CPER_SEV_RECOVERABLE, 0, 0,
208
+ ACPI_GHES_MEM_CPER_LENGTH, fru_id, 0);
209
+
210
+ /* Build the memory section CPER for above new generic error data entry */
211
+ acpi_ghes_build_append_mem_cper(block, error_physical_addr);
212
+
213
+ /* Write the generic error data entry into guest memory */
214
+ cpu_physical_memory_write(error_block_address, block->data, block->len);
215
+
216
+ g_array_free(block, true);
217
+
218
+ return 0;
219
+}
220
+
221
/*
222
* Build table for the hardware error fw_cfg blob.
223
* Initialize "etc/hardware_errors" and "etc/hardware_errors_addr" fw_cfg blobs.
224
@@ -XXX,XX +XXX,XX @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
225
fw_cfg_add_file_callback(s, ACPI_GHES_DATA_ADDR_FW_CFG_FILE, NULL, NULL,
226
NULL, &(ags->ghes_addr_le), sizeof(ags->ghes_addr_le), false);
227
}
228
+
229
+int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address)
230
+{
231
+ uint64_t error_block_addr, read_ack_register_addr, read_ack_register = 0;
232
+ uint64_t start_addr;
233
+ bool ret = -1;
234
+ AcpiGedState *acpi_ged_state;
235
+ AcpiGhesState *ags;
236
+
237
+ assert(source_id < ACPI_HEST_SRC_ID_RESERVED);
238
+
239
+ acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED,
240
+ NULL));
241
+ g_assert(acpi_ged_state);
242
+ ags = &acpi_ged_state->ghes_state;
243
+
244
+ start_addr = le64_to_cpu(ags->ghes_addr_le);
245
+
246
+ if (physical_address) {
247
+
248
+ if (source_id < ACPI_HEST_SRC_ID_RESERVED) {
249
+ start_addr += source_id * sizeof(uint64_t);
250
+ }
251
+
252
+ cpu_physical_memory_read(start_addr, &error_block_addr,
253
+ sizeof(error_block_addr));
254
+
255
+ error_block_addr = le64_to_cpu(error_block_addr);
256
+
257
+ read_ack_register_addr = start_addr +
258
+ ACPI_GHES_ERROR_SOURCE_COUNT * sizeof(uint64_t);
259
+
260
+ cpu_physical_memory_read(read_ack_register_addr,
261
+ &read_ack_register, sizeof(read_ack_register));
262
+
263
+ /* zero means OSPM does not acknowledge the error */
264
+ if (!read_ack_register) {
265
+ error_report("OSPM does not acknowledge previous error,"
266
+ " so can not record CPER for current error anymore");
267
+ } else if (error_block_addr) {
268
+ read_ack_register = cpu_to_le64(0);
269
+ /*
270
+ * Clear the Read Ack Register, OSPM will write it to 1 when
271
+ * it acknowledges this error.
272
+ */
273
+ cpu_physical_memory_write(read_ack_register_addr,
274
+ &read_ack_register, sizeof(uint64_t));
275
+
276
+ ret = acpi_ghes_record_mem_error(error_block_addr,
277
+ physical_address);
278
+ } else
279
+ error_report("can not find Generic Error Status Block");
280
+ }
281
+
282
+ return ret;
283
+}
284
--
285
2.20.1
286
287
diff view generated by jsdifflib
Deleted patch
1
From: Dongjiu Geng <gengdongjiu@huawei.com>
2
1
3
I and Xiang are willing to review the APEI-related patches and
4
volunteer as the reviewers for the HEST/GHES part.
5
6
Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
7
Signed-off-by: Xiang Zheng <zhengxiang9@huawei.com>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Acked-by: Michael S. Tsirkin <mst@redhat.com>
10
Message-id: 20200512030609.19593-11-gengdongjiu@huawei.com
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
13
MAINTAINERS | 9 +++++++++
14
1 file changed, 9 insertions(+)
15
16
diff --git a/MAINTAINERS b/MAINTAINERS
17
index XXXXXXX..XXXXXXX 100644
18
--- a/MAINTAINERS
19
+++ b/MAINTAINERS
20
@@ -XXX,XX +XXX,XX @@ F: tests/qtest/bios-tables-test.c
21
F: tests/qtest/acpi-utils.[hc]
22
F: tests/data/acpi/
23
24
+ACPI/HEST/GHES
25
+R: Dongjiu Geng <gengdongjiu@huawei.com>
26
+R: Xiang Zheng <zhengxiang9@huawei.com>
27
+L: qemu-arm@nongnu.org
28
+S: Maintained
29
+F: hw/acpi/ghes.c
30
+F: include/hw/acpi/ghes.h
31
+F: docs/specs/acpi_hest_ghes.rst
32
+
33
ppc4xx
34
M: David Gibson <david@gibson.dropbear.id.au>
35
L: qemu-ppc@nongnu.org
36
--
37
2.20.1
38
39
diff view generated by jsdifflib
Deleted patch
1
Convert the Neon VQRDMLAH and VQRDMLSH insns in the 3-reg-same group
2
to decodetree. These don't use do_3same() because they want to
3
operate on VFP double registers, whose offsets are different from the
4
neon_reg_offset() calculations do_3same does.
5
1
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20200512163904.10918-2-peter.maydell@linaro.org
9
---
10
target/arm/neon-dp.decode | 3 +++
11
target/arm/translate-neon.inc.c | 15 +++++++++++++++
12
target/arm/translate.c | 14 ++------------
13
3 files changed, 20 insertions(+), 12 deletions(-)
14
15
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/neon-dp.decode
18
+++ b/target/arm/neon-dp.decode
19
@@ -XXX,XX +XXX,XX @@ VMLS_3s 1111 001 1 0 . .. .... .... 1001 . . . 0 .... @3same
20
21
VMUL_3s 1111 001 0 0 . .. .... .... 1001 . . . 1 .... @3same
22
VMUL_p_3s 1111 001 1 0 . .. .... .... 1001 . . . 1 .... @3same
23
+
24
+VQRDMLAH_3s 1111 001 1 0 . .. .... .... 1011 ... 1 .... @3same
25
+VQRDMLSH_3s 1111 001 1 0 . .. .... .... 1100 ... 1 .... @3same
26
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/arm/translate-neon.inc.c
29
+++ b/target/arm/translate-neon.inc.c
30
@@ -XXX,XX +XXX,XX @@ static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
31
}
32
return do_3same(s, a, gen_VMUL_p_3s);
33
}
34
+
35
+#define DO_VQRDMLAH(INSN, FUNC) \
36
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
37
+ { \
38
+ if (!dc_isar_feature(aa32_rdm, s)) { \
39
+ return false; \
40
+ } \
41
+ if (a->size != 1 && a->size != 2) { \
42
+ return false; \
43
+ } \
44
+ return do_3same(s, a, FUNC); \
45
+ }
46
+
47
+DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
48
+DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
49
diff --git a/target/arm/translate.c b/target/arm/translate.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/target/arm/translate.c
52
+++ b/target/arm/translate.c
53
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
54
if (!u) {
55
break; /* VPADD */
56
}
57
- /* VQRDMLAH */
58
- if (dc_isar_feature(aa32_rdm, s) && (size == 1 || size == 2)) {
59
- gen_gvec_sqrdmlah_qc(size, rd_ofs, rn_ofs, rm_ofs,
60
- vec_size, vec_size);
61
- return 0;
62
- }
63
+ /* VQRDMLAH : handled by decodetree */
64
return 1;
65
66
case NEON_3R_VFM_VQRDMLSH:
67
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
68
}
69
break;
70
}
71
- /* VQRDMLSH */
72
- if (dc_isar_feature(aa32_rdm, s) && (size == 1 || size == 2)) {
73
- gen_gvec_sqrdmlsh_qc(size, rd_ofs, rn_ofs, rm_ofs,
74
- vec_size, vec_size);
75
- return 0;
76
- }
77
+ /* VQRDMLSH : handled by decodetree */
78
return 1;
79
80
case NEON_3R_VABD:
81
--
82
2.20.1
83
84
diff view generated by jsdifflib
Deleted patch
1
Convert the Neon SHA instructions in the 3-reg-same group
2
to decodetree.
3
1
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20200512163904.10918-3-peter.maydell@linaro.org
7
---
8
target/arm/neon-dp.decode | 10 +++
9
target/arm/translate-neon.inc.c | 139 ++++++++++++++++++++++++++++++++
10
target/arm/translate.c | 46 +----------
11
3 files changed, 151 insertions(+), 44 deletions(-)
12
13
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/neon-dp.decode
16
+++ b/target/arm/neon-dp.decode
17
@@ -XXX,XX +XXX,XX @@ VMUL_3s 1111 001 0 0 . .. .... .... 1001 . . . 1 .... @3same
18
VMUL_p_3s 1111 001 1 0 . .. .... .... 1001 . . . 1 .... @3same
19
20
VQRDMLAH_3s 1111 001 1 0 . .. .... .... 1011 ... 1 .... @3same
21
+
22
+SHA1_3s 1111 001 0 0 . optype:2 .... .... 1100 . 1 . 0 .... \
23
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
24
+SHA256H_3s 1111 001 1 0 . 00 .... .... 1100 . 1 . 0 .... \
25
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
26
+SHA256H2_3s 1111 001 1 0 . 01 .... .... 1100 . 1 . 0 .... \
27
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
28
+SHA256SU1_3s 1111 001 1 0 . 10 .... .... 1100 . 1 . 0 .... \
29
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
30
+
31
VQRDMLSH_3s 1111 001 1 0 . .. .... .... 1100 ... 1 .... @3same
32
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/translate-neon.inc.c
35
+++ b/target/arm/translate-neon.inc.c
36
@@ -XXX,XX +XXX,XX @@ static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
37
38
DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
39
DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
40
+
41
+static bool trans_SHA1_3s(DisasContext *s, arg_SHA1_3s *a)
42
+{
43
+ TCGv_ptr ptr1, ptr2, ptr3;
44
+ TCGv_i32 tmp;
45
+
46
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
47
+ !dc_isar_feature(aa32_sha1, s)) {
48
+ return false;
49
+ }
50
+
51
+ /* UNDEF accesses to D16-D31 if they don't exist. */
52
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
53
+ ((a->vd | a->vn | a->vm) & 0x10)) {
54
+ return false;
55
+ }
56
+
57
+ if ((a->vn | a->vm | a->vd) & 1) {
58
+ return false;
59
+ }
60
+
61
+ if (!vfp_access_check(s)) {
62
+ return true;
63
+ }
64
+
65
+ ptr1 = vfp_reg_ptr(true, a->vd);
66
+ ptr2 = vfp_reg_ptr(true, a->vn);
67
+ ptr3 = vfp_reg_ptr(true, a->vm);
68
+ tmp = tcg_const_i32(a->optype);
69
+ gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp);
70
+ tcg_temp_free_i32(tmp);
71
+ tcg_temp_free_ptr(ptr1);
72
+ tcg_temp_free_ptr(ptr2);
73
+ tcg_temp_free_ptr(ptr3);
74
+
75
+ return true;
76
+}
77
+
78
+static bool trans_SHA256H_3s(DisasContext *s, arg_SHA256H_3s *a)
79
+{
80
+ TCGv_ptr ptr1, ptr2, ptr3;
81
+
82
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
83
+ !dc_isar_feature(aa32_sha2, s)) {
84
+ return false;
85
+ }
86
+
87
+ /* UNDEF accesses to D16-D31 if they don't exist. */
88
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
89
+ ((a->vd | a->vn | a->vm) & 0x10)) {
90
+ return false;
91
+ }
92
+
93
+ if ((a->vn | a->vm | a->vd) & 1) {
94
+ return false;
95
+ }
96
+
97
+ if (!vfp_access_check(s)) {
98
+ return true;
99
+ }
100
+
101
+ ptr1 = vfp_reg_ptr(true, a->vd);
102
+ ptr2 = vfp_reg_ptr(true, a->vn);
103
+ ptr3 = vfp_reg_ptr(true, a->vm);
104
+ gen_helper_crypto_sha256h(ptr1, ptr2, ptr3);
105
+ tcg_temp_free_ptr(ptr1);
106
+ tcg_temp_free_ptr(ptr2);
107
+ tcg_temp_free_ptr(ptr3);
108
+
109
+ return true;
110
+}
111
+
112
+static bool trans_SHA256H2_3s(DisasContext *s, arg_SHA256H2_3s *a)
113
+{
114
+ TCGv_ptr ptr1, ptr2, ptr3;
115
+
116
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
117
+ !dc_isar_feature(aa32_sha2, s)) {
118
+ return false;
119
+ }
120
+
121
+ /* UNDEF accesses to D16-D31 if they don't exist. */
122
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
123
+ ((a->vd | a->vn | a->vm) & 0x10)) {
124
+ return false;
125
+ }
126
+
127
+ if ((a->vn | a->vm | a->vd) & 1) {
128
+ return false;
129
+ }
130
+
131
+ if (!vfp_access_check(s)) {
132
+ return true;
133
+ }
134
+
135
+ ptr1 = vfp_reg_ptr(true, a->vd);
136
+ ptr2 = vfp_reg_ptr(true, a->vn);
137
+ ptr3 = vfp_reg_ptr(true, a->vm);
138
+ gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3);
139
+ tcg_temp_free_ptr(ptr1);
140
+ tcg_temp_free_ptr(ptr2);
141
+ tcg_temp_free_ptr(ptr3);
142
+
143
+ return true;
144
+}
145
+
146
+static bool trans_SHA256SU1_3s(DisasContext *s, arg_SHA256SU1_3s *a)
147
+{
148
+ TCGv_ptr ptr1, ptr2, ptr3;
149
+
150
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
151
+ !dc_isar_feature(aa32_sha2, s)) {
152
+ return false;
153
+ }
154
+
155
+ /* UNDEF accesses to D16-D31 if they don't exist. */
156
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
157
+ ((a->vd | a->vn | a->vm) & 0x10)) {
158
+ return false;
159
+ }
160
+
161
+ if ((a->vn | a->vm | a->vd) & 1) {
162
+ return false;
163
+ }
164
+
165
+ if (!vfp_access_check(s)) {
166
+ return true;
167
+ }
168
+
169
+ ptr1 = vfp_reg_ptr(true, a->vd);
170
+ ptr2 = vfp_reg_ptr(true, a->vn);
171
+ ptr3 = vfp_reg_ptr(true, a->vm);
172
+ gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3);
173
+ tcg_temp_free_ptr(ptr1);
174
+ tcg_temp_free_ptr(ptr2);
175
+ tcg_temp_free_ptr(ptr3);
176
+
177
+ return true;
178
+}
179
diff --git a/target/arm/translate.c b/target/arm/translate.c
180
index XXXXXXX..XXXXXXX 100644
181
--- a/target/arm/translate.c
182
+++ b/target/arm/translate.c
183
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
184
int vec_size;
185
uint32_t imm;
186
TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
187
- TCGv_ptr ptr1, ptr2, ptr3;
188
+ TCGv_ptr ptr1, ptr2;
189
TCGv_i64 tmp64;
190
191
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
192
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
193
return 1;
194
}
195
switch (op) {
196
- case NEON_3R_SHA:
197
- /* The SHA-1/SHA-256 3-register instructions require special
198
- * treatment here, as their size field is overloaded as an
199
- * op type selector, and they all consume their input in a
200
- * single pass.
201
- */
202
- if (!q) {
203
- return 1;
204
- }
205
- if (!u) { /* SHA-1 */
206
- if (!dc_isar_feature(aa32_sha1, s)) {
207
- return 1;
208
- }
209
- ptr1 = vfp_reg_ptr(true, rd);
210
- ptr2 = vfp_reg_ptr(true, rn);
211
- ptr3 = vfp_reg_ptr(true, rm);
212
- tmp4 = tcg_const_i32(size);
213
- gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
214
- tcg_temp_free_i32(tmp4);
215
- } else { /* SHA-256 */
216
- if (!dc_isar_feature(aa32_sha2, s) || size == 3) {
217
- return 1;
218
- }
219
- ptr1 = vfp_reg_ptr(true, rd);
220
- ptr2 = vfp_reg_ptr(true, rn);
221
- ptr3 = vfp_reg_ptr(true, rm);
222
- switch (size) {
223
- case 0:
224
- gen_helper_crypto_sha256h(ptr1, ptr2, ptr3);
225
- break;
226
- case 1:
227
- gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3);
228
- break;
229
- case 2:
230
- gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3);
231
- break;
232
- }
233
- }
234
- tcg_temp_free_ptr(ptr1);
235
- tcg_temp_free_ptr(ptr2);
236
- tcg_temp_free_ptr(ptr3);
237
- return 0;
238
-
239
case NEON_3R_VPADD_VQRDMLAH:
240
if (!u) {
241
break; /* VPADD */
242
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
243
case NEON_3R_VMUL:
244
case NEON_3R_VML:
245
case NEON_3R_VSHL:
246
+ case NEON_3R_SHA:
247
/* Already handled by decodetree */
248
return 1;
249
}
250
--
251
2.20.1
252
253
diff view generated by jsdifflib
Deleted patch
1
Convert the 64-bit element insns in the 3-reg-same group
2
to decodetree. This covers VQSHL, VRSHL and VQRSHL where
3
size==0b11.
4
1
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20200512163904.10918-4-peter.maydell@linaro.org
8
---
9
target/arm/neon-dp.decode | 13 +++++++++++
10
target/arm/translate-neon.inc.c | 24 +++++++++++++++++++++
11
target/arm/translate.c | 38 ++-------------------------------
12
3 files changed, 39 insertions(+), 36 deletions(-)
13
14
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/neon-dp.decode
17
+++ b/target/arm/neon-dp.decode
18
@@ -XXX,XX +XXX,XX @@ VCGE_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 1 .... @3same
19
VSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 0 .... @3same_rev
20
VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same_rev
21
22
+# Insns operating on 64-bit elements (size!=0b11 handled elsewhere)
23
+# The _rev suffix indicates that Vn and Vm are reversed (as explained
24
+# by the comment for the @3same_rev format).
25
+@3same_64_rev .... ... . . . 11 .... .... .... . q:1 . . .... \
26
+ &3same vm=%vn_dp vn=%vm_dp vd=%vd_dp size=3
27
+
28
+VQSHL_S64_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev
29
+VQSHL_U64_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev
30
+VRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev
31
+VRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev
32
+VQRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev
33
+VQRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev
34
+
35
VMAX_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 0 .... @3same
36
VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same
37
VMIN_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 1 .... @3same
38
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/translate-neon.inc.c
41
+++ b/target/arm/translate-neon.inc.c
42
@@ -XXX,XX +XXX,XX @@ static bool trans_SHA256SU1_3s(DisasContext *s, arg_SHA256SU1_3s *a)
43
44
return true;
45
}
46
+
47
+#define DO_3SAME_64(INSN, FUNC) \
48
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
49
+ uint32_t rn_ofs, uint32_t rm_ofs, \
50
+ uint32_t oprsz, uint32_t maxsz) \
51
+ { \
52
+ static const GVecGen3 op = { .fni8 = FUNC }; \
53
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \
54
+ } \
55
+ DO_3SAME(INSN, gen_##INSN##_3s)
56
+
57
+#define DO_3SAME_64_ENV(INSN, FUNC) \
58
+ static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \
59
+ { \
60
+ FUNC(d, cpu_env, n, m); \
61
+ } \
62
+ DO_3SAME_64(INSN, gen_##INSN##_elt)
63
+
64
+DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64)
65
+DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64)
66
+DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
67
+DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
68
+DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
69
+DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
70
diff --git a/target/arm/translate.c b/target/arm/translate.c
71
index XXXXXXX..XXXXXXX 100644
72
--- a/target/arm/translate.c
73
+++ b/target/arm/translate.c
74
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
75
}
76
77
if (size == 3) {
78
- /* 64-bit element instructions. */
79
- for (pass = 0; pass < (q ? 2 : 1); pass++) {
80
- neon_load_reg64(cpu_V0, rn + pass);
81
- neon_load_reg64(cpu_V1, rm + pass);
82
- switch (op) {
83
- case NEON_3R_VQSHL:
84
- if (u) {
85
- gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
86
- cpu_V1, cpu_V0);
87
- } else {
88
- gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
89
- cpu_V1, cpu_V0);
90
- }
91
- break;
92
- case NEON_3R_VRSHL:
93
- if (u) {
94
- gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
95
- } else {
96
- gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
97
- }
98
- break;
99
- case NEON_3R_VQRSHL:
100
- if (u) {
101
- gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
102
- cpu_V1, cpu_V0);
103
- } else {
104
- gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
105
- cpu_V1, cpu_V0);
106
- }
107
- break;
108
- default:
109
- abort();
110
- }
111
- neon_store_reg64(cpu_V0, rd + pass);
112
- }
113
- return 0;
114
+ /* 64-bit element instructions: handled by decodetree */
115
+ return 1;
116
}
117
pairwise = 0;
118
switch (op) {
119
--
120
2.20.1
121
122
diff view generated by jsdifflib
Deleted patch
1
Convert the Neon VHADD insns in the 3-reg-same group to decodetree.
2
1
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20200512163904.10918-5-peter.maydell@linaro.org
6
---
7
target/arm/neon-dp.decode | 2 ++
8
target/arm/translate-neon.inc.c | 24 ++++++++++++++++++++++++
9
target/arm/translate.c | 4 +---
10
3 files changed, 27 insertions(+), 3 deletions(-)
11
12
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/neon-dp.decode
15
+++ b/target/arm/neon-dp.decode
16
@@ -XXX,XX +XXX,XX @@
17
@3same .... ... . . . size:2 .... .... .... . q:1 . . .... \
18
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp
19
20
+VHADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 0 .... @3same
21
+VHADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 0 .... @3same
22
VQADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 1 .... @3same
23
VQADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 1 .... @3same
24
25
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/target/arm/translate-neon.inc.c
28
+++ b/target/arm/translate-neon.inc.c
29
@@ -XXX,XX +XXX,XX @@ DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
30
DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
31
DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
32
DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
33
+
34
+#define DO_3SAME_32(INSN, FUNC) \
35
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
36
+ uint32_t rn_ofs, uint32_t rm_ofs, \
37
+ uint32_t oprsz, uint32_t maxsz) \
38
+ { \
39
+ static const GVecGen3 ops[4] = { \
40
+ { .fni4 = gen_helper_neon_##FUNC##8 }, \
41
+ { .fni4 = gen_helper_neon_##FUNC##16 }, \
42
+ { .fni4 = gen_helper_neon_##FUNC##32 }, \
43
+ { 0 }, \
44
+ }; \
45
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
46
+ } \
47
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
48
+ { \
49
+ if (a->size > 2) { \
50
+ return false; \
51
+ } \
52
+ return do_3same(s, a, gen_##INSN##_3s); \
53
+ }
54
+
55
+DO_3SAME_32(VHADD_S, hadd_s)
56
+DO_3SAME_32(VHADD_U, hadd_u)
57
diff --git a/target/arm/translate.c b/target/arm/translate.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/target/arm/translate.c
60
+++ b/target/arm/translate.c
61
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
62
case NEON_3R_VML:
63
case NEON_3R_VSHL:
64
case NEON_3R_SHA:
65
+ case NEON_3R_VHADD:
66
/* Already handled by decodetree */
67
return 1;
68
}
69
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
70
tmp2 = neon_load_reg(rm, pass);
71
}
72
switch (op) {
73
- case NEON_3R_VHADD:
74
- GEN_NEON_INTEGER_OP(hadd);
75
- break;
76
case NEON_3R_VRHADD:
77
GEN_NEON_INTEGER_OP(rhadd);
78
break;
79
--
80
2.20.1
81
82
diff view generated by jsdifflib
Deleted patch
1
Convert the Neon VABA and VABD insns in the 3-reg-same group to
2
decodetree.
3
1
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20200512163904.10918-6-peter.maydell@linaro.org
7
---
8
target/arm/neon-dp.decode | 6 ++++++
9
target/arm/translate-neon.inc.c | 4 ++++
10
target/arm/translate.c | 22 ++--------------------
11
3 files changed, 12 insertions(+), 20 deletions(-)
12
13
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/neon-dp.decode
16
+++ b/target/arm/neon-dp.decode
17
@@ -XXX,XX +XXX,XX @@ VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same
18
VMIN_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 1 .... @3same
19
VMIN_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 1 .... @3same
20
21
+VABD_S_3s 1111 001 0 0 . .. .... .... 0111 . . . 0 .... @3same
22
+VABD_U_3s 1111 001 1 0 . .. .... .... 0111 . . . 0 .... @3same
23
+
24
+VABA_S_3s 1111 001 0 0 . .. .... .... 0111 . . . 1 .... @3same
25
+VABA_U_3s 1111 001 1 0 . .. .... .... 0111 . . . 1 .... @3same
26
+
27
VADD_3s 1111 001 0 0 . .. .... .... 1000 . . . 0 .... @3same
28
VSUB_3s 1111 001 1 0 . .. .... .... 1000 . . . 0 .... @3same
29
30
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/translate-neon.inc.c
33
+++ b/target/arm/translate-neon.inc.c
34
@@ -XXX,XX +XXX,XX @@ DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
35
DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
36
DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
37
DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
38
+DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
39
+DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
40
+DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
41
+DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
42
43
#define DO_3SAME_CMP(INSN, COND) \
44
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
45
diff --git a/target/arm/translate.c b/target/arm/translate.c
46
index XXXXXXX..XXXXXXX 100644
47
--- a/target/arm/translate.c
48
+++ b/target/arm/translate.c
49
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
50
/* VQRDMLSH : handled by decodetree */
51
return 1;
52
53
- case NEON_3R_VABD:
54
- if (u) {
55
- gen_gvec_uabd(size, rd_ofs, rn_ofs, rm_ofs,
56
- vec_size, vec_size);
57
- } else {
58
- gen_gvec_sabd(size, rd_ofs, rn_ofs, rm_ofs,
59
- vec_size, vec_size);
60
- }
61
- return 0;
62
-
63
- case NEON_3R_VABA:
64
- if (u) {
65
- gen_gvec_uaba(size, rd_ofs, rn_ofs, rm_ofs,
66
- vec_size, vec_size);
67
- } else {
68
- gen_gvec_saba(size, rd_ofs, rn_ofs, rm_ofs,
69
- vec_size, vec_size);
70
- }
71
- return 0;
72
-
73
case NEON_3R_VADD_VSUB:
74
case NEON_3R_LOGIC:
75
case NEON_3R_VMAX:
76
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
77
case NEON_3R_VSHL:
78
case NEON_3R_SHA:
79
case NEON_3R_VHADD:
80
+ case NEON_3R_VABD:
81
+ case NEON_3R_VABA:
82
/* Already handled by decodetree */
83
return 1;
84
}
85
--
86
2.20.1
87
88
diff view generated by jsdifflib
Deleted patch
1
Convert the Neon VRHADD and VHSUB 3-reg-same insns to decodetree.
2
(These are all the other insns in 3-reg-same which were using
3
GEN_NEON_INTEGER_OP() and which are not pairwise or
4
reversed-operands.)
5
1
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20200512163904.10918-7-peter.maydell@linaro.org
9
---
10
target/arm/neon-dp.decode | 6 ++++++
11
target/arm/translate-neon.inc.c | 4 ++++
12
target/arm/translate.c | 8 ++------
13
3 files changed, 12 insertions(+), 6 deletions(-)
14
15
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/neon-dp.decode
18
+++ b/target/arm/neon-dp.decode
19
@@ -XXX,XX +XXX,XX @@ VHADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 0 .... @3same
20
VQADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 1 .... @3same
21
VQADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 1 .... @3same
22
23
+VRHADD_S_3s 1111 001 0 0 . .. .... .... 0001 . . . 0 .... @3same
24
+VRHADD_U_3s 1111 001 1 0 . .. .... .... 0001 . . . 0 .... @3same
25
+
26
@3same_logic .... ... . . . .. .... .... .... . q:1 .. .... \
27
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp size=0
28
29
@@ -XXX,XX +XXX,XX @@ VBSL_3s 1111 001 1 0 . 01 .... .... 0001 ... 1 .... @3same_logic
30
VBIT_3s 1111 001 1 0 . 10 .... .... 0001 ... 1 .... @3same_logic
31
VBIF_3s 1111 001 1 0 . 11 .... .... 0001 ... 1 .... @3same_logic
32
33
+VHSUB_S_3s 1111 001 0 0 . .. .... .... 0010 . . . 0 .... @3same
34
+VHSUB_U_3s 1111 001 1 0 . .. .... .... 0010 . . . 0 .... @3same
35
+
36
VQSUB_S_3s 1111 001 0 0 . .. .... .... 0010 . . . 1 .... @3same
37
VQSUB_U_3s 1111 001 1 0 . .. .... .... 0010 . . . 1 .... @3same
38
39
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/target/arm/translate-neon.inc.c
42
+++ b/target/arm/translate-neon.inc.c
43
@@ -XXX,XX +XXX,XX @@ DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
44
45
DO_3SAME_32(VHADD_S, hadd_s)
46
DO_3SAME_32(VHADD_U, hadd_u)
47
+DO_3SAME_32(VHSUB_S, hsub_s)
48
+DO_3SAME_32(VHSUB_U, hsub_u)
49
+DO_3SAME_32(VRHADD_S, rhadd_s)
50
+DO_3SAME_32(VRHADD_U, rhadd_u)
51
diff --git a/target/arm/translate.c b/target/arm/translate.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/target/arm/translate.c
54
+++ b/target/arm/translate.c
55
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
56
case NEON_3R_VSHL:
57
case NEON_3R_SHA:
58
case NEON_3R_VHADD:
59
+ case NEON_3R_VRHADD:
60
+ case NEON_3R_VHSUB:
61
case NEON_3R_VABD:
62
case NEON_3R_VABA:
63
/* Already handled by decodetree */
64
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
65
tmp2 = neon_load_reg(rm, pass);
66
}
67
switch (op) {
68
- case NEON_3R_VRHADD:
69
- GEN_NEON_INTEGER_OP(rhadd);
70
- break;
71
- case NEON_3R_VHSUB:
72
- GEN_NEON_INTEGER_OP(hsub);
73
- break;
74
case NEON_3R_VQSHL:
75
GEN_NEON_INTEGER_OP_ENV(qshl);
76
break;
77
--
78
2.20.1
79
80
diff view generated by jsdifflib
Deleted patch
1
Convert the VQSHL, VRSHL and VQRSHL insns in the 3-reg-same
2
group to decodetree. We have already implemented the size==0b11
3
case of these insns; this commit handles the remaining sizes.
4
1
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20200512163904.10918-8-peter.maydell@linaro.org
8
---
9
target/arm/neon-dp.decode | 30 ++++++++++++++++++-----
10
target/arm/translate-neon.inc.c | 43 +++++++++++++++++++++++++++++++++
11
target/arm/translate.c | 22 +++--------------
12
3 files changed, 70 insertions(+), 25 deletions(-)
13
14
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/neon-dp.decode
17
+++ b/target/arm/neon-dp.decode
18
@@ -XXX,XX +XXX,XX @@ VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same_rev
19
@3same_64_rev .... ... . . . 11 .... .... .... . q:1 . . .... \
20
&3same vm=%vn_dp vn=%vm_dp vd=%vd_dp size=3
21
22
-VQSHL_S64_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev
23
-VQSHL_U64_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev
24
-VRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev
25
-VRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev
26
-VQRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev
27
-VQRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev
28
+{
29
+ VQSHL_S64_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev
30
+ VQSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_rev
31
+}
32
+{
33
+ VQSHL_U64_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev
34
+ VQSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_rev
35
+}
36
+{
37
+ VRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev
38
+ VRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_rev
39
+}
40
+{
41
+ VRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev
42
+ VRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_rev
43
+}
44
+{
45
+ VQRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev
46
+ VQRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_rev
47
+}
48
+{
49
+ VQRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev
50
+ VQRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_rev
51
+}
52
53
VMAX_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 0 .... @3same
54
VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same
55
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/arm/translate-neon.inc.c
58
+++ b/target/arm/translate-neon.inc.c
59
@@ -XXX,XX +XXX,XX @@ DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
60
return do_3same(s, a, gen_##INSN##_3s); \
61
}
62
63
+/*
64
+ * Some helper functions need to be passed the cpu_env. In order
65
+ * to use those with the gvec APIs like tcg_gen_gvec_3() we need
66
+ * to create wrapper functions whose prototype is a NeonGenTwoOpFn()
67
+ * and which call a NeonGenTwoOpEnvFn().
68
+ */
69
+#define WRAP_ENV_FN(WRAPNAME, FUNC) \
70
+ static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \
71
+ { \
72
+ FUNC(d, cpu_env, n, m); \
73
+ }
74
+
75
+#define DO_3SAME_32_ENV(INSN, FUNC) \
76
+ WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \
77
+ WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \
78
+ WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \
79
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
80
+ uint32_t rn_ofs, uint32_t rm_ofs, \
81
+ uint32_t oprsz, uint32_t maxsz) \
82
+ { \
83
+ static const GVecGen3 ops[4] = { \
84
+ { .fni4 = gen_##INSN##_tramp8 }, \
85
+ { .fni4 = gen_##INSN##_tramp16 }, \
86
+ { .fni4 = gen_##INSN##_tramp32 }, \
87
+ { 0 }, \
88
+ }; \
89
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
90
+ } \
91
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
92
+ { \
93
+ if (a->size > 2) { \
94
+ return false; \
95
+ } \
96
+ return do_3same(s, a, gen_##INSN##_3s); \
97
+ }
98
+
99
DO_3SAME_32(VHADD_S, hadd_s)
100
DO_3SAME_32(VHADD_U, hadd_u)
101
DO_3SAME_32(VHSUB_S, hsub_s)
102
DO_3SAME_32(VHSUB_U, hsub_u)
103
DO_3SAME_32(VRHADD_S, rhadd_s)
104
DO_3SAME_32(VRHADD_U, rhadd_u)
105
+DO_3SAME_32(VRSHL_S, rshl_s)
106
+DO_3SAME_32(VRSHL_U, rshl_u)
107
+
108
+DO_3SAME_32_ENV(VQSHL_S, qshl_s)
109
+DO_3SAME_32_ENV(VQSHL_U, qshl_u)
110
+DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
111
+DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
112
diff --git a/target/arm/translate.c b/target/arm/translate.c
113
index XXXXXXX..XXXXXXX 100644
114
--- a/target/arm/translate.c
115
+++ b/target/arm/translate.c
116
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
117
case NEON_3R_VHSUB:
118
case NEON_3R_VABD:
119
case NEON_3R_VABA:
120
+ case NEON_3R_VQSHL:
121
+ case NEON_3R_VRSHL:
122
+ case NEON_3R_VQRSHL:
123
/* Already handled by decodetree */
124
return 1;
125
}
126
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
127
}
128
pairwise = 0;
129
switch (op) {
130
- case NEON_3R_VQSHL:
131
- case NEON_3R_VRSHL:
132
- case NEON_3R_VQRSHL:
133
- {
134
- int rtmp;
135
- /* Shift instruction operands are reversed. */
136
- rtmp = rn;
137
- rn = rm;
138
- rm = rtmp;
139
- }
140
- break;
141
case NEON_3R_VPADD_VQRDMLAH:
142
case NEON_3R_VPMAX:
143
case NEON_3R_VPMIN:
144
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
145
tmp2 = neon_load_reg(rm, pass);
146
}
147
switch (op) {
148
- case NEON_3R_VQSHL:
149
- GEN_NEON_INTEGER_OP_ENV(qshl);
150
- break;
151
- case NEON_3R_VRSHL:
152
- GEN_NEON_INTEGER_OP(rshl);
153
- break;
154
- case NEON_3R_VQRSHL:
155
- GEN_NEON_INTEGER_OP_ENV(qrshl);
156
break;
157
case NEON_3R_VPMAX:
158
GEN_NEON_INTEGER_OP(pmax);
159
--
160
2.20.1
161
162
diff view generated by jsdifflib
Deleted patch
1
Convert the Neon integer VPMAX and VPMIN 3-reg-same insns to
2
decodetree. These are 'pairwise' operations.
3
1
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20200512163904.10918-9-peter.maydell@linaro.org
7
---
8
target/arm/neon-dp.decode | 9 +++++
9
target/arm/translate-neon.inc.c | 71 +++++++++++++++++++++++++++++++++
10
target/arm/translate.c | 17 +-------
11
3 files changed, 82 insertions(+), 15 deletions(-)
12
13
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/neon-dp.decode
16
+++ b/target/arm/neon-dp.decode
17
@@ -XXX,XX +XXX,XX @@
18
@3same .... ... . . . size:2 .... .... .... . q:1 . . .... \
19
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp
20
21
+@3same_q0 .... ... . . . size:2 .... .... .... . 0 . . .... \
22
+ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0
23
+
24
VHADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 0 .... @3same
25
VHADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 0 .... @3same
26
VQADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 1 .... @3same
27
@@ -XXX,XX +XXX,XX @@ VMLS_3s 1111 001 1 0 . .. .... .... 1001 . . . 0 .... @3same
28
VMUL_3s 1111 001 0 0 . .. .... .... 1001 . . . 1 .... @3same
29
VMUL_p_3s 1111 001 1 0 . .. .... .... 1001 . . . 1 .... @3same
30
31
+VPMAX_S_3s 1111 001 0 0 . .. .... .... 1010 . . . 0 .... @3same_q0
32
+VPMAX_U_3s 1111 001 1 0 . .. .... .... 1010 . . . 0 .... @3same_q0
33
+
34
+VPMIN_S_3s 1111 001 0 0 . .. .... .... 1010 . . . 1 .... @3same_q0
35
+VPMIN_U_3s 1111 001 1 0 . .. .... .... 1010 . . . 1 .... @3same_q0
36
+
37
VQRDMLAH_3s 1111 001 1 0 . .. .... .... 1011 ... 1 .... @3same
38
39
SHA1_3s 1111 001 0 0 . optype:2 .... .... 1100 . 1 . 0 .... \
40
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/target/arm/translate-neon.inc.c
43
+++ b/target/arm/translate-neon.inc.c
44
@@ -XXX,XX +XXX,XX @@ DO_3SAME_32_ENV(VQSHL_S, qshl_s)
45
DO_3SAME_32_ENV(VQSHL_U, qshl_u)
46
DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
47
DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
48
+
49
+static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
50
+{
51
+ /* Operations handled pairwise 32 bits at a time */
52
+ TCGv_i32 tmp, tmp2, tmp3;
53
+
54
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
55
+ return false;
56
+ }
57
+
58
+ /* UNDEF accesses to D16-D31 if they don't exist. */
59
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
60
+ ((a->vd | a->vn | a->vm) & 0x10)) {
61
+ return false;
62
+ }
63
+
64
+ if (a->size == 3) {
65
+ return false;
66
+ }
67
+
68
+ if (!vfp_access_check(s)) {
69
+ return true;
70
+ }
71
+
72
+ assert(a->q == 0); /* enforced by decode patterns */
73
+
74
+ /*
75
+ * Note that we have to be careful not to clobber the source operands
76
+ * in the "vm == vd" case by storing the result of the first pass too
77
+ * early. Since Q is 0 there are always just two passes, so instead
78
+ * of a complicated loop over each pass we just unroll.
79
+ */
80
+ tmp = neon_load_reg(a->vn, 0);
81
+ tmp2 = neon_load_reg(a->vn, 1);
82
+ fn(tmp, tmp, tmp2);
83
+ tcg_temp_free_i32(tmp2);
84
+
85
+ tmp3 = neon_load_reg(a->vm, 0);
86
+ tmp2 = neon_load_reg(a->vm, 1);
87
+ fn(tmp3, tmp3, tmp2);
88
+ tcg_temp_free_i32(tmp2);
89
+
90
+ neon_store_reg(a->vd, 0, tmp);
91
+ neon_store_reg(a->vd, 1, tmp3);
92
+ return true;
93
+}
94
+
95
+#define DO_3SAME_PAIR(INSN, func) \
96
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
97
+ { \
98
+ static NeonGenTwoOpFn * const fns[] = { \
99
+ gen_helper_neon_##func##8, \
100
+ gen_helper_neon_##func##16, \
101
+ gen_helper_neon_##func##32, \
102
+ }; \
103
+ if (a->size > 2) { \
104
+ return false; \
105
+ } \
106
+ return do_3same_pair(s, a, fns[a->size]); \
107
+ }
108
+
109
+/* 32-bit pairwise ops end up the same as the elementwise versions. */
110
+#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
111
+#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
112
+#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
113
+#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
114
+
115
+DO_3SAME_PAIR(VPMAX_S, pmax_s)
116
+DO_3SAME_PAIR(VPMIN_S, pmin_s)
117
+DO_3SAME_PAIR(VPMAX_U, pmax_u)
118
+DO_3SAME_PAIR(VPMIN_U, pmin_u)
119
diff --git a/target/arm/translate.c b/target/arm/translate.c
120
index XXXXXXX..XXXXXXX 100644
121
--- a/target/arm/translate.c
122
+++ b/target/arm/translate.c
123
@@ -XXX,XX +XXX,XX @@ static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
124
}
125
}
126
127
-/* 32-bit pairwise ops end up the same as the elementwise versions. */
128
-#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
129
-#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
130
-#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
131
-#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
132
-
133
#define GEN_NEON_INTEGER_OP_ENV(name) do { \
134
switch ((size << 1) | u) { \
135
case 0: \
136
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
137
case NEON_3R_VQSHL:
138
case NEON_3R_VRSHL:
139
case NEON_3R_VQRSHL:
140
+ case NEON_3R_VPMAX:
141
+ case NEON_3R_VPMIN:
142
/* Already handled by decodetree */
143
return 1;
144
}
145
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
146
pairwise = 0;
147
switch (op) {
148
case NEON_3R_VPADD_VQRDMLAH:
149
- case NEON_3R_VPMAX:
150
- case NEON_3R_VPMIN:
151
pairwise = 1;
152
break;
153
case NEON_3R_FLOAT_ARITH:
154
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
155
tmp2 = neon_load_reg(rm, pass);
156
}
157
switch (op) {
158
- break;
159
- case NEON_3R_VPMAX:
160
- GEN_NEON_INTEGER_OP(pmax);
161
- break;
162
- case NEON_3R_VPMIN:
163
- GEN_NEON_INTEGER_OP(pmin);
164
- break;
165
case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */
166
if (!u) { /* VQDMULH */
167
switch (size) {
168
--
169
2.20.1
170
171
diff view generated by jsdifflib
Deleted patch
1
Convert the Neon integer VPADD 3-reg-same insns to decodetree. These
2
are 'pairwise' operations. (Note that VQRDMLAH, which shares the
3
same primary opcode but has U=1, has already been converted.)
4
1
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20200512163904.10918-10-peter.maydell@linaro.org
8
---
9
target/arm/neon-dp.decode | 2 ++
10
target/arm/translate-neon.inc.c | 2 ++
11
target/arm/translate.c | 19 +------------------
12
3 files changed, 5 insertions(+), 18 deletions(-)
13
14
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/neon-dp.decode
17
+++ b/target/arm/neon-dp.decode
18
@@ -XXX,XX +XXX,XX @@ VPMAX_U_3s 1111 001 1 0 . .. .... .... 1010 . . . 0 .... @3same_q0
19
VPMIN_S_3s 1111 001 0 0 . .. .... .... 1010 . . . 1 .... @3same_q0
20
VPMIN_U_3s 1111 001 1 0 . .. .... .... 1010 . . . 1 .... @3same_q0
21
22
+VPADD_3s 1111 001 0 0 . .. .... .... 1011 . . . 1 .... @3same_q0
23
+
24
VQRDMLAH_3s 1111 001 1 0 . .. .... .... 1011 ... 1 .... @3same
25
26
SHA1_3s 1111 001 0 0 . optype:2 .... .... 1100 . 1 . 0 .... \
27
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-neon.inc.c
30
+++ b/target/arm/translate-neon.inc.c
31
@@ -XXX,XX +XXX,XX @@ static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
32
#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
33
#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
34
#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
35
+#define gen_helper_neon_padd_u32 tcg_gen_add_i32
36
37
DO_3SAME_PAIR(VPMAX_S, pmax_s)
38
DO_3SAME_PAIR(VPMIN_S, pmin_s)
39
DO_3SAME_PAIR(VPMAX_U, pmax_u)
40
DO_3SAME_PAIR(VPMIN_U, pmin_u)
41
+DO_3SAME_PAIR(VPADD, padd_u)
42
diff --git a/target/arm/translate.c b/target/arm/translate.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/translate.c
45
+++ b/target/arm/translate.c
46
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
47
return 1;
48
}
49
switch (op) {
50
- case NEON_3R_VPADD_VQRDMLAH:
51
- if (!u) {
52
- break; /* VPADD */
53
- }
54
- /* VQRDMLAH : handled by decodetree */
55
- return 1;
56
-
57
case NEON_3R_VFM_VQRDMLSH:
58
if (!u) {
59
/* VFM, VFMS */
60
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
61
case NEON_3R_VQRSHL:
62
case NEON_3R_VPMAX:
63
case NEON_3R_VPMIN:
64
+ case NEON_3R_VPADD_VQRDMLAH:
65
/* Already handled by decodetree */
66
return 1;
67
}
68
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
69
}
70
pairwise = 0;
71
switch (op) {
72
- case NEON_3R_VPADD_VQRDMLAH:
73
- pairwise = 1;
74
- break;
75
case NEON_3R_FLOAT_ARITH:
76
pairwise = (u && size < 2); /* if VPADD (float) */
77
break;
78
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
79
}
80
}
81
break;
82
- case NEON_3R_VPADD_VQRDMLAH:
83
- switch (size) {
84
- case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
85
- case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
86
- case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
87
- default: abort();
88
- }
89
- break;
90
case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
91
{
92
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
93
--
94
2.20.1
95
96
diff view generated by jsdifflib
Deleted patch
1
Convert the Neon VQDMULH and VQRDMULH 3-reg-same insns to
2
decodetree. These are the last integer operations in the
3
3-reg-same group.
4
1
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20200512163904.10918-11-peter.maydell@linaro.org
8
---
9
target/arm/neon-dp.decode | 3 +++
10
target/arm/translate-neon.inc.c | 24 ++++++++++++++++++++++++
11
target/arm/translate.c | 24 +-----------------------
12
3 files changed, 28 insertions(+), 23 deletions(-)
13
14
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/neon-dp.decode
17
+++ b/target/arm/neon-dp.decode
18
@@ -XXX,XX +XXX,XX @@ VPMAX_U_3s 1111 001 1 0 . .. .... .... 1010 . . . 0 .... @3same_q0
19
VPMIN_S_3s 1111 001 0 0 . .. .... .... 1010 . . . 1 .... @3same_q0
20
VPMIN_U_3s 1111 001 1 0 . .. .... .... 1010 . . . 1 .... @3same_q0
21
22
+VQDMULH_3s 1111 001 0 0 . .. .... .... 1011 . . . 0 .... @3same
23
+VQRDMULH_3s 1111 001 1 0 . .. .... .... 1011 . . . 0 .... @3same
24
+
25
VPADD_3s 1111 001 0 0 . .. .... .... 1011 . . . 1 .... @3same_q0
26
27
VQRDMLAH_3s 1111 001 1 0 . .. .... .... 1011 ... 1 .... @3same
28
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/translate-neon.inc.c
31
+++ b/target/arm/translate-neon.inc.c
32
@@ -XXX,XX +XXX,XX @@ DO_3SAME_PAIR(VPMIN_S, pmin_s)
33
DO_3SAME_PAIR(VPMAX_U, pmax_u)
34
DO_3SAME_PAIR(VPMIN_U, pmin_u)
35
DO_3SAME_PAIR(VPADD, padd_u)
36
+
37
+#define DO_3SAME_VQDMULH(INSN, FUNC) \
38
+ WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \
39
+ WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \
40
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
41
+ uint32_t rn_ofs, uint32_t rm_ofs, \
42
+ uint32_t oprsz, uint32_t maxsz) \
43
+ { \
44
+ static const GVecGen3 ops[2] = { \
45
+ { .fni4 = gen_##INSN##_tramp16 }, \
46
+ { .fni4 = gen_##INSN##_tramp32 }, \
47
+ }; \
48
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \
49
+ } \
50
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
51
+ { \
52
+ if (a->size != 1 && a->size != 2) { \
53
+ return false; \
54
+ } \
55
+ return do_3same(s, a, gen_##INSN##_3s); \
56
+ }
57
+
58
+DO_3SAME_VQDMULH(VQDMULH, qdmulh)
59
+DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
60
diff --git a/target/arm/translate.c b/target/arm/translate.c
61
index XXXXXXX..XXXXXXX 100644
62
--- a/target/arm/translate.c
63
+++ b/target/arm/translate.c
64
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
65
case NEON_3R_VPMAX:
66
case NEON_3R_VPMIN:
67
case NEON_3R_VPADD_VQRDMLAH:
68
+ case NEON_3R_VQDMULH_VQRDMULH:
69
/* Already handled by decodetree */
70
return 1;
71
}
72
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
73
tmp2 = neon_load_reg(rm, pass);
74
}
75
switch (op) {
76
- case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */
77
- if (!u) { /* VQDMULH */
78
- switch (size) {
79
- case 1:
80
- gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
81
- break;
82
- case 2:
83
- gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
84
- break;
85
- default: abort();
86
- }
87
- } else { /* VQRDMULH */
88
- switch (size) {
89
- case 1:
90
- gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
91
- break;
92
- case 2:
93
- gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
94
- break;
95
- default: abort();
96
- }
97
- }
98
- break;
99
case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
100
{
101
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
102
--
103
2.20.1
104
105
diff view generated by jsdifflib
Deleted patch
1
Convert the Neon VADD, VSUB, VABD 3-reg-same insns to decodetree.
2
We already have gvec helpers for addition and subtraction, but must
3
add one for fabd.
4
1
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20200512163904.10918-12-peter.maydell@linaro.org
8
---
9
target/arm/helper.h | 3 ++-
10
target/arm/neon-dp.decode | 8 ++++++++
11
target/arm/neon_helper.c | 7 -------
12
target/arm/translate-neon.inc.c | 28 ++++++++++++++++++++++++++++
13
target/arm/translate.c | 10 +++-------
14
target/arm/vec_helper.c | 7 +++++++
15
6 files changed, 48 insertions(+), 15 deletions(-)
16
17
diff --git a/target/arm/helper.h b/target/arm/helper.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper.h
20
+++ b/target/arm/helper.h
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(neon_qneg_s16, TCG_CALL_NO_RWG, i32, env, i32)
22
DEF_HELPER_FLAGS_2(neon_qneg_s32, TCG_CALL_NO_RWG, i32, env, i32)
23
DEF_HELPER_FLAGS_2(neon_qneg_s64, TCG_CALL_NO_RWG, i64, env, i64)
24
25
-DEF_HELPER_3(neon_abd_f32, i32, i32, i32, ptr)
26
DEF_HELPER_3(neon_ceq_f32, i32, i32, i32, ptr)
27
DEF_HELPER_3(neon_cge_f32, i32, i32, i32, ptr)
28
DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, ptr)
29
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
30
DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
31
DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
32
33
+DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
34
+
35
DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG,
36
void, ptr, ptr, ptr, ptr, i32)
37
DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG,
38
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
39
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/neon-dp.decode
41
+++ b/target/arm/neon-dp.decode
42
@@ -XXX,XX +XXX,XX @@
43
@3same_q0 .... ... . . . size:2 .... .... .... . 0 . . .... \
44
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0
45
46
+# For FP insns the high bit of 'size' is used as part of opcode decode
47
+@3same_fp .... ... . . . . size:1 .... .... .... . q:1 . . .... \
48
+ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp
49
+
50
VHADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 0 .... @3same
51
VHADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 0 .... @3same
52
VQADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 1 .... @3same
53
@@ -XXX,XX +XXX,XX @@ SHA256SU1_3s 1111 001 1 0 . 10 .... .... 1100 . 1 . 0 .... \
54
vm=%vm_dp vn=%vn_dp vd=%vd_dp
55
56
VQRDMLSH_3s 1111 001 1 0 . .. .... .... 1100 ... 1 .... @3same
57
+
58
+VADD_fp_3s 1111 001 0 0 . 0 . .... .... 1101 ... 0 .... @3same_fp
59
+VSUB_fp_3s 1111 001 0 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
60
+VABD_fp_3s 1111 001 1 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
61
diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c
62
index XXXXXXX..XXXXXXX 100644
63
--- a/target/arm/neon_helper.c
64
+++ b/target/arm/neon_helper.c
65
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(neon_qneg_s64)(CPUARMState *env, uint64_t x)
66
}
67
68
/* NEON Float helpers. */
69
-uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b, void *fpstp)
70
-{
71
- float_status *fpst = fpstp;
72
- float32 f0 = make_float32(a);
73
- float32 f1 = make_float32(b);
74
- return float32_val(float32_abs(float32_sub(f0, f1, fpst)));
75
-}
76
77
/* Floating point comparisons produce an integer result.
78
* Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do.
79
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
80
index XXXXXXX..XXXXXXX 100644
81
--- a/target/arm/translate-neon.inc.c
82
+++ b/target/arm/translate-neon.inc.c
83
@@ -XXX,XX +XXX,XX @@ DO_3SAME_PAIR(VPADD, padd_u)
84
85
DO_3SAME_VQDMULH(VQDMULH, qdmulh)
86
DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
87
+
88
+/*
89
+ * For all the functions using this macro, size == 1 means fp16,
90
+ * which is an architecture extension we don't implement yet.
91
+ */
92
+#define DO_3S_FP_GVEC(INSN,FUNC) \
93
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
94
+ uint32_t rn_ofs, uint32_t rm_ofs, \
95
+ uint32_t oprsz, uint32_t maxsz) \
96
+ { \
97
+ TCGv_ptr fpst = get_fpstatus_ptr(1); \
98
+ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst, \
99
+ oprsz, maxsz, 0, FUNC); \
100
+ tcg_temp_free_ptr(fpst); \
101
+ } \
102
+ static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
103
+ { \
104
+ if (a->size != 0) { \
105
+ /* TODO fp16 support */ \
106
+ return false; \
107
+ } \
108
+ return do_3same(s, a, gen_##INSN##_3s); \
109
+ }
110
+
111
+
112
+DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s)
113
+DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s)
114
+DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s)
115
diff --git a/target/arm/translate.c b/target/arm/translate.c
116
index XXXXXXX..XXXXXXX 100644
117
--- a/target/arm/translate.c
118
+++ b/target/arm/translate.c
119
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
120
switch (op) {
121
case NEON_3R_FLOAT_ARITH:
122
pairwise = (u && size < 2); /* if VPADD (float) */
123
+ if (!pairwise) {
124
+ return 1; /* handled by decodetree */
125
+ }
126
break;
127
case NEON_3R_FLOAT_MINMAX:
128
pairwise = u; /* if VPMIN/VPMAX (float) */
129
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
130
{
131
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
132
switch ((u << 2) | size) {
133
- case 0: /* VADD */
134
case 4: /* VPADD */
135
gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
136
break;
137
- case 2: /* VSUB */
138
- gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
139
- break;
140
- case 6: /* VABD */
141
- gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
142
- break;
143
default:
144
abort();
145
}
146
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
147
index XXXXXXX..XXXXXXX 100644
148
--- a/target/arm/vec_helper.c
149
+++ b/target/arm/vec_helper.c
150
@@ -XXX,XX +XXX,XX @@ static float64 float64_ftsmul(float64 op1, uint64_t op2, float_status *stat)
151
return result;
152
}
153
154
+static float32 float32_abd(float32 op1, float32 op2, float_status *stat)
155
+{
156
+ return float32_abs(float32_sub(op1, op2, stat));
157
+}
158
+
159
#define DO_3OP(NAME, FUNC, TYPE) \
160
void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
161
{ \
162
@@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_ftsmul_h, float16_ftsmul, float16)
163
DO_3OP(gvec_ftsmul_s, float32_ftsmul, float32)
164
DO_3OP(gvec_ftsmul_d, float64_ftsmul, float64)
165
166
+DO_3OP(gvec_fabd_s, float32_abd, float32)
167
+
168
#ifdef TARGET_AARCH64
169
170
DO_3OP(gvec_recps_h, helper_recpsf_f16, float16)
171
--
172
2.20.1
173
174
diff view generated by jsdifflib