1
The following changes since commit 53f306f316549d20c76886903181413d20842423:
1
The following changes since commit 5767815218efd3cbfd409505ed824d5f356044ae:
2
2
3
Merge remote-tracking branch 'remotes/ehabkost-gl/tags/x86-next-pull-request' into staging (2021-06-21 11:26:04 +0100)
3
Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging (2024-02-14 15:45:52 +0000)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20210621
7
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20240215
8
8
9
for you to fetch changes up to a83f1d9263d281f938a3984cda7104d55affd43a:
9
for you to fetch changes up to f780e63fe731b058fe52d43653600d8729a1b5f2:
10
10
11
docs/system: arm: Add nRF boards description (2021-06-21 17:24:33 +0100)
11
docs: Add documentation for the mps3-an536 board (2024-02-15 14:32:39 +0000)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
target-arm queue:
14
target-arm queue:
15
* Don't require 'virt' board to be compiled in for ACPI GHES code
15
* hw/arm/xilinx_zynq: Wire FIQ between CPU <> GIC
16
* docs: Document which architecture extensions we emulate
16
* linux-user/aarch64: Choose SYNC as the preferred MTE mode
17
* Fix bugs in M-profile FPCXT_NS accesses
17
* Fix some errors in SVE/SME handling of MTE tags
18
* First slice of MVE patches
18
* hw/pci-host/raven.c: Mark raven_io_ops as implementing unaligned accesses
19
* Implement MTE3
19
* hw/block/tc58128: Don't emit deprecation warning under qtest
20
* docs/system: arm: Add nRF boards description
20
* tests/qtest: Fix handling of npcm7xx and GMAC tests
21
* hw/arm/virt: Wire up non-secure EL2 virtual timer IRQ
22
* tests/qtest/npcm7xx_emc-test: Connect all NICs to a backend
23
* Don't assert on vmload/vmsave of M-profile CPUs
24
* hw/arm/smmuv3: add support for stage 1 access fault
25
* hw/arm/stellaris: QOM cleanups
26
* Use new CBAR encoding for all v8 CPUs, not all aarch64 CPUs
27
* Improve Cortex_R52 IMPDEF sysreg modelling
28
* Allow access to SPSR_hyp from hyp mode
29
* New board model mps3-an536 (Cortex-R52)
21
30
22
----------------------------------------------------------------
31
----------------------------------------------------------------
23
Alexandre Iooss (1):
32
Luc Michel (1):
24
docs/system: arm: Add nRF boards description
33
hw/arm/smmuv3: add support for stage 1 access fault
25
34
26
Peter Collingbourne (1):
35
Nabih Estefan (1):
27
target/arm: Implement MTE3
36
tests/qtest: Fix GMAC test to run on a machine in upstream QEMU
28
37
29
Peter Maydell (55):
38
Peter Maydell (22):
30
hw/acpi: Provide stub version of acpi_ghes_record_errors()
39
hw/pci-host/raven.c: Mark raven_io_ops as implementing unaligned accesses
31
hw/acpi: Provide function acpi_ghes_present()
40
hw/block/tc58128: Don't emit deprecation warning under qtest
32
target/arm: Use acpi_ghes_present() to see if we report ACPI memory errors
41
tests/qtest/meson.build: Don't include qtests_npcm7xx in qtests_aarch64
33
docs/system/arm: Document which architecture extensions we emulate
42
tests/qtest/bios-tables-test: Allow changes to virt GTDT
34
target/arm/translate-vfp.c: Whitespace fixes
43
hw/arm/virt: Wire up non-secure EL2 virtual timer IRQ
35
target/arm: Handle FPU being disabled in FPCXT_NS accesses
44
tests/qtest/bios-tables-tests: Update virt golden reference
36
target/arm: Don't NOCP fault for FPCXT_NS accesses
45
hw/arm/npcm7xx: Call qemu_configure_nic_device() for GMAC modules
37
target/arm: Handle writeback in VLDR/VSTR sysreg with no memory access
46
tests/qtest/npcm7xx_emc-test: Connect all NICs to a backend
38
target/arm: Factor FP context update code out into helper function
47
target/arm: Don't get MDCR_EL2 in pmu_counter_enabled() before checking ARM_FEATURE_PMU
39
target/arm: Split vfp_access_check() into A and M versions
48
target/arm: Use new CBAR encoding for all v8 CPUs, not all aarch64 CPUs
40
target/arm: Handle FPU check for FPCXT_NS insns via vfp_access_check_m()
49
target/arm: The Cortex-R52 has a read-only CBAR
41
target/arm: Implement MVE VLDR/VSTR (non-widening forms)
50
target/arm: Add Cortex-R52 IMPDEF sysregs
42
target/arm: Implement widening/narrowing MVE VLDR/VSTR insns
51
target/arm: Allow access to SPSR_hyp from hyp mode
43
target/arm: Implement MVE VCLZ
52
hw/misc/mps2-scc: Fix condition for CFG3 register
44
target/arm: Implement MVE VCLS
53
hw/misc/mps2-scc: Factor out which-board conditionals
45
target/arm: Implement MVE VREV16, VREV32, VREV64
54
hw/misc/mps2-scc: Make changes needed for AN536 FPGA image
46
target/arm: Implement MVE VMVN (register)
55
hw/arm/mps3r: Initial skeleton for mps3-an536 board
47
target/arm: Implement MVE VABS
56
hw/arm/mps3r: Add CPUs, GIC, and per-CPU RAM
48
target/arm: Implement MVE VNEG
57
hw/arm/mps3r: Add UARTs
49
tcg: Make gen_dup_i32/i64() public as tcg_gen_dup_i32/i64
58
hw/arm/mps3r: Add GPIO, watchdog, dual-timer, I2C devices
50
target/arm: Implement MVE VDUP
59
hw/arm/mps3r: Add remaining devices
51
target/arm: Implement MVE VAND, VBIC, VORR, VORN, VEOR
60
docs: Add documentation for the mps3-an536 board
52
target/arm: Implement MVE VADD, VSUB, VMUL
53
target/arm: Implement MVE VMULH
54
target/arm: Implement MVE VRMULH
55
target/arm: Implement MVE VMAX, VMIN
56
target/arm: Implement MVE VABD
57
target/arm: Implement MVE VHADD, VHSUB
58
target/arm: Implement MVE VMULL
59
target/arm: Implement MVE VMLALDAV
60
target/arm: Implement MVE VMLSLDAV
61
target/arm: Implement MVE VRMLALDAVH, VRMLSLDAVH
62
target/arm: Implement MVE VADD (scalar)
63
target/arm: Implement MVE VSUB, VMUL (scalar)
64
target/arm: Implement MVE VHADD, VHSUB (scalar)
65
target/arm: Implement MVE VBRSR
66
target/arm: Implement MVE VPST
67
target/arm: Implement MVE VQADD and VQSUB
68
target/arm: Implement MVE VQDMULH and VQRDMULH (scalar)
69
target/arm: Implement MVE VQDMULL scalar
70
target/arm: Implement MVE VQDMULH, VQRDMULH (vector)
71
target/arm: Implement MVE VQADD, VQSUB (vector)
72
target/arm: Implement MVE VQSHL (vector)
73
target/arm: Implement MVE VQRSHL
74
target/arm: Implement MVE VSHL insn
75
target/arm: Implement MVE VRSHL
76
target/arm: Implement MVE VQDMLADH and VQRDMLADH
77
target/arm: Implement MVE VQDMLSDH and VQRDMLSDH
78
target/arm: Implement MVE VQDMULL (vector)
79
target/arm: Implement MVE VRHADD
80
target/arm: Implement MVE VADC, VSBC
81
target/arm: Implement MVE VCADD
82
target/arm: Implement MVE VHCADD
83
target/arm: Implement MVE VADDV
84
target/arm: Make VMOV scalar <-> gpreg beatwise for MVE
85
61
86
docs/system/arm/emulation.rst | 103 ++++
62
Philippe Mathieu-Daudé (5):
87
docs/system/arm/nrf.rst | 51 ++
63
hw/arm/xilinx_zynq: Wire FIQ between CPU <> GIC
88
docs/system/target-arm.rst | 7 +
64
hw/arm/stellaris: Convert ADC controller to Resettable interface
89
include/hw/acpi/ghes.h | 9 +
65
hw/arm/stellaris: Convert I2C controller to Resettable interface
90
include/tcg/tcg-op.h | 8 +
66
hw/arm/stellaris: Add missing QOM 'machine' parent
91
include/tcg/tcg.h | 1 -
67
hw/arm/stellaris: Add missing QOM 'SoC' parent
92
target/arm/helper-mve.h | 357 +++++++++++++
93
target/arm/helper.h | 2 +
94
target/arm/internals.h | 11 +
95
target/arm/translate-a32.h | 3 +
96
target/arm/translate.h | 10 +
97
target/arm/m-nocp.decode | 24 +
98
target/arm/mve.decode | 240 +++++++++
99
target/arm/vfp.decode | 14 -
100
hw/acpi/ghes-stub.c | 22 +
101
hw/acpi/ghes.c | 17 +
102
target/arm/cpu64.c | 2 +-
103
target/arm/kvm64.c | 6 +-
104
target/arm/mte_helper.c | 82 +--
105
target/arm/mve_helper.c | 1160 +++++++++++++++++++++++++++++++++++++++++
106
target/arm/translate-m-nocp.c | 550 +++++++++++++++++++
107
target/arm/translate-mve.c | 759 +++++++++++++++++++++++++++
108
target/arm/translate-vfp.c | 741 +++++++-------------------
109
tcg/tcg-op-gvec.c | 20 +-
110
MAINTAINERS | 1 +
111
hw/acpi/meson.build | 6 +-
112
target/arm/meson.build | 1 +
113
27 files changed, 3578 insertions(+), 629 deletions(-)
114
create mode 100644 docs/system/arm/emulation.rst
115
create mode 100644 docs/system/arm/nrf.rst
116
create mode 100644 target/arm/helper-mve.h
117
create mode 100644 hw/acpi/ghes-stub.c
118
create mode 100644 target/arm/mve_helper.c
119
68
69
Richard Henderson (6):
70
linux-user/aarch64: Choose SYNC as the preferred MTE mode
71
target/arm: Fix nregs computation in do_{ld,st}_zpa
72
target/arm: Adjust and validate mtedesc sizem1
73
target/arm: Split out make_svemte_desc
74
target/arm: Handle mte in do_ldrq, do_ldro
75
target/arm: Fix SVE/SME gross MTE suppression checks
76
77
MAINTAINERS | 3 +-
78
docs/system/arm/mps2.rst | 37 +-
79
configs/devices/arm-softmmu/default.mak | 1 +
80
hw/arm/smmuv3-internal.h | 1 +
81
include/hw/arm/smmu-common.h | 1 +
82
include/hw/arm/virt.h | 2 +
83
include/hw/misc/mps2-scc.h | 1 +
84
linux-user/aarch64/target_prctl.h | 29 +-
85
target/arm/internals.h | 2 +-
86
target/arm/tcg/translate-a64.h | 2 +
87
hw/arm/mps3r.c | 640 ++++++++++++++++++++++++++++++++
88
hw/arm/npcm7xx.c | 1 +
89
hw/arm/smmu-common.c | 11 +
90
hw/arm/smmuv3.c | 1 +
91
hw/arm/stellaris.c | 47 ++-
92
hw/arm/virt-acpi-build.c | 20 +-
93
hw/arm/virt.c | 60 ++-
94
hw/arm/xilinx_zynq.c | 2 +
95
hw/block/tc58128.c | 4 +-
96
hw/misc/mps2-scc.c | 138 ++++++-
97
hw/pci-host/raven.c | 1 +
98
target/arm/helper.c | 14 +-
99
target/arm/tcg/cpu32.c | 109 ++++++
100
target/arm/tcg/op_helper.c | 43 ++-
101
target/arm/tcg/sme_helper.c | 8 +-
102
target/arm/tcg/sve_helper.c | 12 +-
103
target/arm/tcg/translate-sme.c | 15 +-
104
target/arm/tcg/translate-sve.c | 83 +++--
105
target/arm/tcg/translate.c | 19 +-
106
tests/qtest/npcm7xx_emc-test.c | 5 +-
107
tests/qtest/npcm_gmac-test.c | 84 +----
108
hw/arm/Kconfig | 5 +
109
hw/arm/meson.build | 1 +
110
tests/data/acpi/virt/FACP | Bin 276 -> 276 bytes
111
tests/data/acpi/virt/GTDT | Bin 96 -> 104 bytes
112
tests/qtest/meson.build | 4 +-
113
36 files changed, 1184 insertions(+), 222 deletions(-)
114
create mode 100644 hw/arm/mps3r.c
115
diff view generated by jsdifflib
Deleted patch
1
Generic code in target/arm wants to call acpi_ghes_record_errors();
2
provide a stub version so that we don't fail to link when
3
CONFIG_ACPI_APEI is not set. This requires us to add a new
4
ghes-stub.c file to contain it and the meson.build mechanics
5
to use it when appropriate.
6
1
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Dongjiu Geng <gengdongjiu1@gmail.com>
10
Message-id: 20210603171259.27962-2-peter.maydell@linaro.org
11
---
12
hw/acpi/ghes-stub.c | 17 +++++++++++++++++
13
hw/acpi/meson.build | 6 +++---
14
2 files changed, 20 insertions(+), 3 deletions(-)
15
create mode 100644 hw/acpi/ghes-stub.c
16
17
diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c
18
new file mode 100644
19
index XXXXXXX..XXXXXXX
20
--- /dev/null
21
+++ b/hw/acpi/ghes-stub.c
22
@@ -XXX,XX +XXX,XX @@
23
+/*
24
+ * Support for generating APEI tables and recording CPER for Guests:
25
+ * stub functions.
26
+ *
27
+ * Copyright (c) 2021 Linaro, Ltd
28
+ *
29
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
30
+ * See the COPYING file in the top-level directory.
31
+ */
32
+
33
+#include "qemu/osdep.h"
34
+#include "hw/acpi/ghes.h"
35
+
36
+int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address)
37
+{
38
+ return -1;
39
+}
40
diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build
41
index XXXXXXX..XXXXXXX 100644
42
--- a/hw/acpi/meson.build
43
+++ b/hw/acpi/meson.build
44
@@ -XXX,XX +XXX,XX @@ acpi_ss.add(when: 'CONFIG_ACPI_PCI', if_true: files('pci.c'))
45
acpi_ss.add(when: 'CONFIG_ACPI_VMGENID', if_true: files('vmgenid.c'))
46
acpi_ss.add(when: 'CONFIG_ACPI_HW_REDUCED', if_true: files('generic_event_device.c'))
47
acpi_ss.add(when: 'CONFIG_ACPI_HMAT', if_true: files('hmat.c'))
48
-acpi_ss.add(when: 'CONFIG_ACPI_APEI', if_true: files('ghes.c'))
49
+acpi_ss.add(when: 'CONFIG_ACPI_APEI', if_true: files('ghes.c'), if_false: files('ghes-stub.c'))
50
acpi_ss.add(when: 'CONFIG_ACPI_X86', if_true: files('core.c', 'piix4.c', 'pcihp.c'), if_false: files('acpi-stub.c'))
51
acpi_ss.add(when: 'CONFIG_ACPI_X86_ICH', if_true: files('ich9.c', 'tco.c'))
52
acpi_ss.add(when: 'CONFIG_IPMI', if_true: files('ipmi.c'), if_false: files('ipmi-stub.c'))
53
acpi_ss.add(when: 'CONFIG_PC', if_false: files('acpi-x86-stub.c'))
54
acpi_ss.add(when: 'CONFIG_TPM', if_true: files('tpm.c'))
55
-softmmu_ss.add(when: 'CONFIG_ACPI', if_false: files('acpi-stub.c', 'aml-build-stub.c'))
56
+softmmu_ss.add(when: 'CONFIG_ACPI', if_false: files('acpi-stub.c', 'aml-build-stub.c', 'ghes-stub.c'))
57
softmmu_ss.add_all(when: 'CONFIG_ACPI', if_true: acpi_ss)
58
softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('acpi-stub.c', 'aml-build-stub.c',
59
- 'acpi-x86-stub.c', 'ipmi-stub.c'))
60
+ 'acpi-x86-stub.c', 'ipmi-stub.c', 'ghes-stub.c'))
61
--
62
2.20.1
63
64
diff view generated by jsdifflib
Deleted patch
1
Allow code elsewhere in the system to check whether the ACPI GHES
2
table is present, so it can determine whether it is OK to try to
3
record an error by calling acpi_ghes_record_errors().
4
1
5
(We don't need to migrate the new 'present' field in AcpiGhesState,
6
because it is set once at system initialization and doesn't change.)
7
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Reviewed-by: Dongjiu Geng <gengdongjiu1@gmail.com>
11
Message-id: 20210603171259.27962-3-peter.maydell@linaro.org
12
---
13
include/hw/acpi/ghes.h | 9 +++++++++
14
hw/acpi/ghes-stub.c | 5 +++++
15
hw/acpi/ghes.c | 17 +++++++++++++++++
16
3 files changed, 31 insertions(+)
17
18
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/include/hw/acpi/ghes.h
21
+++ b/include/hw/acpi/ghes.h
22
@@ -XXX,XX +XXX,XX @@ enum {
23
24
typedef struct AcpiGhesState {
25
uint64_t ghes_addr_le;
26
+ bool present; /* True if GHES is present at all on this board */
27
} AcpiGhesState;
28
29
void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker);
30
@@ -XXX,XX +XXX,XX @@ void acpi_build_hest(GArray *table_data, BIOSLinker *linker,
31
void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
32
GArray *hardware_errors);
33
int acpi_ghes_record_errors(uint8_t notify, uint64_t error_physical_addr);
34
+
35
+/**
36
+ * acpi_ghes_present: Report whether ACPI GHES table is present
37
+ *
38
+ * Returns: true if the system has an ACPI GHES table and it is
39
+ * safe to call acpi_ghes_record_errors() to record a memory error.
40
+ */
41
+bool acpi_ghes_present(void);
42
#endif
43
diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/hw/acpi/ghes-stub.c
46
+++ b/hw/acpi/ghes-stub.c
47
@@ -XXX,XX +XXX,XX @@ int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address)
48
{
49
return -1;
50
}
51
+
52
+bool acpi_ghes_present(void)
53
+{
54
+ return false;
55
+}
56
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/hw/acpi/ghes.c
59
+++ b/hw/acpi/ghes.c
60
@@ -XXX,XX +XXX,XX @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
61
/* Create a read-write fw_cfg file for Address */
62
fw_cfg_add_file_callback(s, ACPI_GHES_DATA_ADDR_FW_CFG_FILE, NULL, NULL,
63
NULL, &(ags->ghes_addr_le), sizeof(ags->ghes_addr_le), false);
64
+
65
+ ags->present = true;
66
}
67
68
int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address)
69
@@ -XXX,XX +XXX,XX @@ int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address)
70
71
return ret;
72
}
73
+
74
+bool acpi_ghes_present(void)
75
+{
76
+ AcpiGedState *acpi_ged_state;
77
+ AcpiGhesState *ags;
78
+
79
+ acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED,
80
+ NULL));
81
+
82
+ if (!acpi_ged_state) {
83
+ return false;
84
+ }
85
+ ags = &acpi_ged_state->ghes_state;
86
+ return ags->present;
87
+}
88
--
89
2.20.1
90
91
diff view generated by jsdifflib
Deleted patch
1
The virt_is_acpi_enabled() function is specific to the virt board, as
2
is the check for its 'ras' property. Use the new acpi_ghes_present()
3
function to check whether we should report memory errors via
4
acpi_ghes_record_errors().
5
1
6
This avoids a link error if QEMU was built without support for the
7
virt board, and provides a mechanism that can be used by any future
8
board models that want to add ACPI memory error reporting support
9
(they only need to call acpi_ghes_add_fw_cfg()).
10
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Reviewed-by: Dongjiu Geng <gengdongjiu1@gmail.com>
14
Message-id: 20210603171259.27962-4-peter.maydell@linaro.org
15
---
16
target/arm/kvm64.c | 6 +-----
17
1 file changed, 1 insertion(+), 5 deletions(-)
18
19
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/kvm64.c
22
+++ b/target/arm/kvm64.c
23
@@ -XXX,XX +XXX,XX @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
24
{
25
ram_addr_t ram_addr;
26
hwaddr paddr;
27
- Object *obj = qdev_get_machine();
28
- VirtMachineState *vms = VIRT_MACHINE(obj);
29
- bool acpi_enabled = virt_is_acpi_enabled(vms);
30
31
assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
32
33
- if (acpi_enabled && addr &&
34
- object_property_get_bool(obj, "ras", NULL)) {
35
+ if (acpi_ghes_present() && addr) {
36
ram_addr = qemu_ram_addr_from_host(addr);
37
if (ram_addr != RAM_ADDR_INVALID &&
38
kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
39
--
40
2.20.1
41
42
diff view generated by jsdifflib
1
Implement the MVE VADDV insn, which performs an addition
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
across vector lanes.
3
2
3
Similarly to commits dadbb58f59..5ae79fe825 for other ARM boards,
4
connect FIQ output of the GIC CPU interfaces to the CPU.
5
6
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Message-id: 20240130152548.17855-1-philmd@linaro.org
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20210617121628.20116-44-peter.maydell@linaro.org
7
---
10
---
8
target/arm/helper-mve.h | 7 +++++++
11
hw/arm/xilinx_zynq.c | 2 ++
9
target/arm/mve.decode | 2 ++
12
1 file changed, 2 insertions(+)
10
target/arm/mve_helper.c | 24 +++++++++++++++++++++
11
target/arm/translate-mve.c | 43 ++++++++++++++++++++++++++++++++++++++
12
4 files changed, 76 insertions(+)
13
13
14
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
14
diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-mve.h
16
--- a/hw/arm/xilinx_zynq.c
17
+++ b/target/arm/helper-mve.h
17
+++ b/hw/arm/xilinx_zynq.c
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vrmlaldavhuw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
18
@@ -XXX,XX +XXX,XX @@ static void zynq_init(MachineState *machine)
19
19
sysbus_mmio_map(busdev, 0, MPCORE_PERIPHBASE);
20
DEF_HELPER_FLAGS_4(mve_vrmlsldavhsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
20
sysbus_connect_irq(busdev, 0,
21
DEF_HELPER_FLAGS_4(mve_vrmlsldavhxsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
21
qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_IRQ));
22
+
22
+ sysbus_connect_irq(busdev, 1,
23
+DEF_HELPER_FLAGS_3(mve_vaddvsb, TCG_CALL_NO_WG, i32, env, ptr, i32)
23
+ qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_FIQ));
24
+DEF_HELPER_FLAGS_3(mve_vaddvub, TCG_CALL_NO_WG, i32, env, ptr, i32)
24
25
+DEF_HELPER_FLAGS_3(mve_vaddvsh, TCG_CALL_NO_WG, i32, env, ptr, i32)
25
for (n = 0; n < 64; n++) {
26
+DEF_HELPER_FLAGS_3(mve_vaddvuh, TCG_CALL_NO_WG, i32, env, ptr, i32)
26
pic[n] = qdev_get_gpio_in(dev, n);
27
+DEF_HELPER_FLAGS_3(mve_vaddvsw, TCG_CALL_NO_WG, i32, env, ptr, i32)
28
+DEF_HELPER_FLAGS_3(mve_vaddvuw, TCG_CALL_NO_WG, i32, env, ptr, i32)
29
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/mve.decode
32
+++ b/target/arm/mve.decode
33
@@ -XXX,XX +XXX,XX @@ VBRSR 1111 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar
34
VQDMULH_scalar 1110 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar
35
VQRDMULH_scalar 1111 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar
36
37
+# Vector add across vector
38
+VADDV 111 u:1 1110 1111 size:2 01 ... 0 1111 0 0 a:1 0 qm:3 0 rda=%rdalo
39
40
# Predicate operations
41
%mask_22_13 22:1 13:3
42
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/mve_helper.c
45
+++ b/target/arm/mve_helper.c
46
@@ -XXX,XX +XXX,XX @@ DO_LDAVH(vrmlaldavhuw, 4, uint32_t, false, int128_add, int128_add, int128_make64
47
48
DO_LDAVH(vrmlsldavhsw, 4, int32_t, false, int128_add, int128_sub, int128_makes64)
49
DO_LDAVH(vrmlsldavhxsw, 4, int32_t, true, int128_add, int128_sub, int128_makes64)
50
+
51
+/* Vector add across vector */
52
+#define DO_VADDV(OP, ESIZE, TYPE) \
53
+ uint32_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vm, \
54
+ uint32_t ra) \
55
+ { \
56
+ uint16_t mask = mve_element_mask(env); \
57
+ unsigned e; \
58
+ TYPE *m = vm; \
59
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
60
+ if (mask & 1) { \
61
+ ra += m[H##ESIZE(e)]; \
62
+ } \
63
+ } \
64
+ mve_advance_vpt(env); \
65
+ return ra; \
66
+ } \
67
+
68
+DO_VADDV(vaddvsb, 1, uint8_t)
69
+DO_VADDV(vaddvsh, 2, uint16_t)
70
+DO_VADDV(vaddvsw, 4, uint32_t)
71
+DO_VADDV(vaddvub, 1, uint8_t)
72
+DO_VADDV(vaddvuh, 2, uint16_t)
73
+DO_VADDV(vaddvuw, 4, uint32_t)
74
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
75
index XXXXXXX..XXXXXXX 100644
76
--- a/target/arm/translate-mve.c
77
+++ b/target/arm/translate-mve.c
78
@@ -XXX,XX +XXX,XX @@ typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
79
typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
80
typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
81
typedef void MVEGenDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
82
+typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
83
84
/* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
85
static inline long mve_qreg_offset(unsigned reg)
86
@@ -XXX,XX +XXX,XX @@ static bool trans_VPST(DisasContext *s, arg_VPST *a)
87
mve_update_and_store_eci(s);
88
return true;
89
}
90
+
91
+static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
92
+{
93
+ /* VADDV: vector add across vector */
94
+ static MVEGenVADDVFn * const fns[4][2] = {
95
+ { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub },
96
+ { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh },
97
+ { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw },
98
+ { NULL, NULL }
99
+ };
100
+ TCGv_ptr qm;
101
+ TCGv_i32 rda;
102
+
103
+ if (!dc_isar_feature(aa32_mve, s) ||
104
+ a->size == 3) {
105
+ return false;
106
+ }
107
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
108
+ return true;
109
+ }
110
+
111
+ /*
112
+ * This insn is subject to beat-wise execution. Partial execution
113
+ * of an A=0 (no-accumulate) insn which does not execute the first
114
+ * beat must start with the current value of Rda, not zero.
115
+ */
116
+ if (a->a || mve_skip_first_beat(s)) {
117
+ /* Accumulate input from Rda */
118
+ rda = load_reg(s, a->rda);
119
+ } else {
120
+ /* Accumulate starting at zero */
121
+ rda = tcg_const_i32(0);
122
+ }
123
+
124
+ qm = mve_qreg_ptr(a->qm);
125
+ fns[a->size][a->u](rda, cpu_env, qm, rda);
126
+ store_reg(s, a->rda, rda);
127
+ tcg_temp_free_ptr(qm);
128
+
129
+ mve_update_eci(s);
130
+ return true;
131
+}
132
--
27
--
133
2.20.1
28
2.34.1
134
29
135
30
diff view generated by jsdifflib
1
Implement the MVE VHCADD insn, which is similar to VCADD
1
From: Richard Henderson <richard.henderson@linaro.org>
2
but performs a halving step. This one overlaps with VADC.
3
2
3
The API does not generate an error for setting ASYNC | SYNC; that merely
4
constrains the selection vs the per-cpu default. For qemu linux-user,
5
choose SYNC as the default.
6
7
Cc: qemu-stable@nongnu.org
8
Reported-by: Gustavo Romero <gustavo.romero@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Tested-by: Gustavo Romero <gustavo.romero@linaro.org>
11
Message-id: 20240207025210.8837-2-richard.henderson@linaro.org
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20210617121628.20116-43-peter.maydell@linaro.org
7
---
13
---
8
target/arm/helper-mve.h | 8 ++++++++
14
linux-user/aarch64/target_prctl.h | 29 +++++++++++++++++------------
9
target/arm/mve.decode | 8 ++++++--
15
1 file changed, 17 insertions(+), 12 deletions(-)
10
target/arm/mve_helper.c | 2 ++
11
target/arm/translate-mve.c | 4 +++-
12
4 files changed, 19 insertions(+), 3 deletions(-)
13
16
14
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
17
diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h
15
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-mve.h
19
--- a/linux-user/aarch64/target_prctl.h
17
+++ b/target/arm/helper-mve.h
20
+++ b/linux-user/aarch64/target_prctl.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vcadd270b, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
21
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_set_tagged_addr_ctrl(CPUArchState *env, abi_long arg2)
19
DEF_HELPER_FLAGS_4(mve_vcadd270h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
22
env->tagged_addr_enable = arg2 & PR_TAGGED_ADDR_ENABLE;
20
DEF_HELPER_FLAGS_4(mve_vcadd270w, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
23
21
24
if (cpu_isar_feature(aa64_mte, cpu)) {
22
+DEF_HELPER_FLAGS_4(mve_vhcadd90b, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
25
- switch (arg2 & PR_MTE_TCF_MASK) {
23
+DEF_HELPER_FLAGS_4(mve_vhcadd90h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
26
- case PR_MTE_TCF_NONE:
24
+DEF_HELPER_FLAGS_4(mve_vhcadd90w, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
27
- case PR_MTE_TCF_SYNC:
25
+
28
- case PR_MTE_TCF_ASYNC:
26
+DEF_HELPER_FLAGS_4(mve_vhcadd270b, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
29
- break;
27
+DEF_HELPER_FLAGS_4(mve_vhcadd270h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
30
- default:
28
+DEF_HELPER_FLAGS_4(mve_vhcadd270w, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
31
- return -EINVAL;
29
+
32
- }
30
DEF_HELPER_FLAGS_4(mve_vadd_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
33
-
31
DEF_HELPER_FLAGS_4(mve_vadd_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
34
/*
32
DEF_HELPER_FLAGS_4(mve_vadd_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
35
* Write PR_MTE_TCF to SCTLR_EL1[TCF0].
33
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
36
- * Note that the syscall values are consistent with hw.
34
index XXXXXXX..XXXXXXX 100644
37
+ *
35
--- a/target/arm/mve.decode
38
+ * The kernel has a per-cpu configuration for the sysadmin,
36
+++ b/target/arm/mve.decode
39
+ * /sys/devices/system/cpu/cpu<N>/mte_tcf_preferred,
37
@@ -XXX,XX +XXX,XX @@ VQDMULLT 111 . 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 1 @2op_sz28
40
+ * which qemu does not implement.
38
VRHADD_S 111 0 1111 0 . .. ... 0 ... 0 0001 . 1 . 0 ... 0 @2op
41
+ *
39
VRHADD_U 111 1 1111 0 . .. ... 0 ... 0 0001 . 1 . 0 ... 0 @2op
42
+ * Because there is no performance difference between the modes, and
40
43
+ * because SYNC is most useful for debugging MTE errors, choose SYNC
41
-VADC 1110 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 0 @2op_nosz
44
+ * as the preferred mode. With this preference, and the way the API
42
-VADCI 1110 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 0 @2op_nosz
45
+ * uses only two bits, there is no way for the program to select
43
+{
46
+ * ASYMM mode.
44
+ VADC 1110 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 0 @2op_nosz
47
*/
45
+ VADCI 1110 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 0 @2op_nosz
48
- env->cp15.sctlr_el[1] =
46
+ VHCADD90 1110 1110 0 . .. ... 0 ... 0 1111 . 0 . 0 ... 0 @2op
49
- deposit64(env->cp15.sctlr_el[1], 38, 2, arg2 >> PR_MTE_TCF_SHIFT);
47
+ VHCADD270 1110 1110 0 . .. ... 0 ... 1 1111 . 0 . 0 ... 0 @2op
50
+ unsigned tcf = 0;
48
+}
51
+ if (arg2 & PR_MTE_TCF_SYNC) {
49
52
+ tcf = 1;
50
{
53
+ } else if (arg2 & PR_MTE_TCF_ASYNC) {
51
VSBC 1111 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 0 @2op_nosz
54
+ tcf = 2;
52
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
55
+ }
53
index XXXXXXX..XXXXXXX 100644
56
+ env->cp15.sctlr_el[1] = deposit64(env->cp15.sctlr_el[1], 38, 2, tcf);
54
--- a/target/arm/mve_helper.c
57
55
+++ b/target/arm/mve_helper.c
58
/*
56
@@ -XXX,XX +XXX,XX @@ void HELPER(mve_vsbci)(CPUARMState *env, void *vd, void *vn, void *vm)
59
* Write PR_MTE_TAG to GCR_EL1[Exclude].
57
58
DO_VCADD_ALL(vcadd90, DO_SUB, DO_ADD)
59
DO_VCADD_ALL(vcadd270, DO_ADD, DO_SUB)
60
+DO_VCADD_ALL(vhcadd90, do_vhsub_s, do_vhadd_s)
61
+DO_VCADD_ALL(vhcadd270, do_vhadd_s, do_vhsub_s)
62
63
static inline int32_t do_sat_bhw(int64_t val, int64_t min, int64_t max, bool *s)
64
{
65
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
66
index XXXXXXX..XXXXXXX 100644
67
--- a/target/arm/translate-mve.c
68
+++ b/target/arm/translate-mve.c
69
@@ -XXX,XX +XXX,XX @@ DO_2OP(VRHADD_U, vrhaddu)
70
/*
71
* VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
72
* so we can reuse the DO_2OP macro. (Our implementation calculates the
73
- * "expected" results in this case.)
74
+ * "expected" results in this case.) Similarly for VHCADD.
75
*/
76
DO_2OP(VCADD90, vcadd90)
77
DO_2OP(VCADD270, vcadd270)
78
+DO_2OP(VHCADD90, vhcadd90)
79
+DO_2OP(VHCADD270, vhcadd270)
80
81
static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
82
{
83
--
60
--
84
2.20.1
61
2.34.1
85
86
diff view generated by jsdifflib
1
The Arm MVE VDUP implementation would like to be able to emit code to
1
From: Richard Henderson <richard.henderson@linaro.org>
2
duplicate a byte or halfword value into an i32. We have code to do
3
this already in tcg-op-gvec.c, so all we need to do is make the
4
functions global.
5
2
6
For consistency with other functions made available to the frontends:
3
The field is encoded as [0-3], which is convenient for
7
* we rename to tcg_gen_dup_*
4
indexing our array of function pointers, but the true
8
* we expose both the _i32 and _i64 forms
5
value is [1-4]. Adjust before calling do_mem_zpa.
9
* we provide the #define for a _tl form
10
6
11
Suggested-by: Richard Henderson <richard.henderson@linaro.org>
7
Add an assert, and move the comment re passing ZT to
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
the helper back next to the relevant code.
9
10
Cc: qemu-stable@nongnu.org
11
Fixes: 206adacfb8d ("target/arm: Add mte helpers for sve scalar + int loads")
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
Tested-by: Gustavo Romero <gustavo.romero@linaro.org>
14
Message-id: 20240207025210.8837-3-richard.henderson@linaro.org
15
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
Message-id: 20210617121628.20116-10-peter.maydell@linaro.org
15
---
17
---
16
include/tcg/tcg-op.h | 8 ++++++++
18
target/arm/tcg/translate-sve.c | 16 ++++++++--------
17
include/tcg/tcg.h | 1 -
19
1 file changed, 8 insertions(+), 8 deletions(-)
18
tcg/tcg-op-gvec.c | 20 ++++++++++----------
19
3 files changed, 18 insertions(+), 11 deletions(-)
20
20
21
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
21
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
22
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
23
--- a/include/tcg/tcg-op.h
23
--- a/target/arm/tcg/translate-sve.c
24
+++ b/include/tcg/tcg-op.h
24
+++ b/target/arm/tcg/translate-sve.c
25
@@ -XXX,XX +XXX,XX @@ void tcg_gen_umin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
25
@@ -XXX,XX +XXX,XX @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
26
void tcg_gen_umax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
26
TCGv_ptr t_pg;
27
void tcg_gen_abs_i32(TCGv_i32, TCGv_i32);
27
int desc = 0;
28
28
29
+/* Replicate a value of size @vece from @in to all the lanes in @out */
29
- /*
30
+void tcg_gen_dup_i32(unsigned vece, TCGv_i32 out, TCGv_i32 in);
30
- * For e.g. LD4, there are not enough arguments to pass all 4
31
+
31
- * registers as pointers, so encode the regno into the data field.
32
static inline void tcg_gen_discard_i32(TCGv_i32 arg)
32
- * For consistency, do this even for LD1.
33
{
33
- */
34
tcg_gen_op1_i32(INDEX_op_discard, arg);
34
+ assert(mte_n >= 1 && mte_n <= 4);
35
@@ -XXX,XX +XXX,XX @@ void tcg_gen_umin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
35
if (s->mte_active[0]) {
36
void tcg_gen_umax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
36
int msz = dtype_msz(dtype);
37
void tcg_gen_abs_i64(TCGv_i64, TCGv_i64);
37
38
38
@@ -XXX,XX +XXX,XX @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
39
+/* Replicate a value of size @vece from @in to all the lanes in @out */
39
addr = clean_data_tbi(s, addr);
40
+void tcg_gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in);
40
}
41
+
41
42
#if TCG_TARGET_REG_BITS == 64
42
+ /*
43
static inline void tcg_gen_discard_i64(TCGv_i64 arg)
43
+ * For e.g. LD4, there are not enough arguments to pass all 4
44
{
44
+ * registers as pointers, so encode the regno into the data field.
45
@@ -XXX,XX +XXX,XX @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
45
+ * For consistency, do this even for LD1.
46
#define tcg_gen_atomic_smax_fetch_tl tcg_gen_atomic_smax_fetch_i64
46
+ */
47
#define tcg_gen_atomic_umax_fetch_tl tcg_gen_atomic_umax_fetch_i64
47
desc = simd_desc(vsz, vsz, zt | desc);
48
#define tcg_gen_dup_tl_vec tcg_gen_dup_i64_vec
48
t_pg = tcg_temp_new_ptr();
49
+#define tcg_gen_dup_tl tcg_gen_dup_i64
49
50
#else
50
@@ -XXX,XX +XXX,XX @@ static void do_ld_zpa(DisasContext *s, int zt, int pg,
51
#define tcg_gen_movi_tl tcg_gen_movi_i32
51
* accessible via the instruction encoding.
52
#define tcg_gen_mov_tl tcg_gen_mov_i32
52
*/
53
@@ -XXX,XX +XXX,XX @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
53
assert(fn != NULL);
54
#define tcg_gen_atomic_smax_fetch_tl tcg_gen_atomic_smax_fetch_i32
54
- do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
55
#define tcg_gen_atomic_umax_fetch_tl tcg_gen_atomic_umax_fetch_i32
55
+ do_mem_zpa(s, zt, pg, addr, dtype, nreg + 1, false, fn);
56
#define tcg_gen_dup_tl_vec tcg_gen_dup_i32_vec
57
+#define tcg_gen_dup_tl tcg_gen_dup_i32
58
#endif
59
60
#if UINTPTR_MAX == UINT32_MAX
61
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
62
index XXXXXXX..XXXXXXX 100644
63
--- a/include/tcg/tcg.h
64
+++ b/include/tcg/tcg.h
65
@@ -XXX,XX +XXX,XX @@ uint64_t dup_const(unsigned vece, uint64_t c);
66
: (qemu_build_not_reached_always(), 0)) \
67
: dup_const(VECE, C))
68
69
-
70
/*
71
* Memory helpers that will be used by TCG generated code.
72
*/
73
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/tcg/tcg-op-gvec.c
76
+++ b/tcg/tcg-op-gvec.c
77
@@ -XXX,XX +XXX,XX @@ uint64_t (dup_const)(unsigned vece, uint64_t c)
78
}
56
}
79
57
80
/* Duplicate IN into OUT as per VECE. */
58
static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
81
-static void gen_dup_i32(unsigned vece, TCGv_i32 out, TCGv_i32 in)
59
@@ -XXX,XX +XXX,XX @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
82
+void tcg_gen_dup_i32(unsigned vece, TCGv_i32 out, TCGv_i32 in)
60
if (nreg == 0) {
83
{
61
/* ST1 */
84
switch (vece) {
62
fn = fn_single[s->mte_active[0]][be][msz][esz];
85
case MO_8:
63
- nreg = 1;
86
@@ -XXX,XX +XXX,XX @@ static void gen_dup_i32(unsigned vece, TCGv_i32 out, TCGv_i32 in)
64
} else {
65
/* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
66
assert(msz == esz);
67
fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
87
}
68
}
69
assert(fn != NULL);
70
- do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
71
+ do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg + 1, true, fn);
88
}
72
}
89
73
90
-static void gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in)
74
static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
91
+void tcg_gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in)
92
{
93
switch (vece) {
94
case MO_8:
95
@@ -XXX,XX +XXX,XX @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
96
&& (vece != MO_32 || !check_size_impl(oprsz, 4))) {
97
t_64 = tcg_temp_new_i64();
98
tcg_gen_extu_i32_i64(t_64, in_32);
99
- gen_dup_i64(vece, t_64, t_64);
100
+ tcg_gen_dup_i64(vece, t_64, t_64);
101
} else {
102
t_32 = tcg_temp_new_i32();
103
- gen_dup_i32(vece, t_32, in_32);
104
+ tcg_gen_dup_i32(vece, t_32, in_32);
105
}
106
} else if (in_64) {
107
/* We are given a 64-bit variable input. */
108
t_64 = tcg_temp_new_i64();
109
- gen_dup_i64(vece, t_64, in_64);
110
+ tcg_gen_dup_i64(vece, t_64, in_64);
111
} else {
112
/* We are given a constant input. */
113
/* For 64-bit hosts, use 64-bit constants for "simple" constants
114
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
115
} else if (g->fni8 && check_size_impl(oprsz, 8)) {
116
TCGv_i64 t64 = tcg_temp_new_i64();
117
118
- gen_dup_i64(g->vece, t64, c);
119
+ tcg_gen_dup_i64(g->vece, t64, c);
120
expand_2s_i64(dofs, aofs, oprsz, t64, g->scalar_first, g->fni8);
121
tcg_temp_free_i64(t64);
122
} else if (g->fni4 && check_size_impl(oprsz, 4)) {
123
TCGv_i32 t32 = tcg_temp_new_i32();
124
125
tcg_gen_extrl_i64_i32(t32, c);
126
- gen_dup_i32(g->vece, t32, t32);
127
+ tcg_gen_dup_i32(g->vece, t32, t32);
128
expand_2s_i32(dofs, aofs, oprsz, t32, g->scalar_first, g->fni4);
129
tcg_temp_free_i32(t32);
130
} else {
131
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_ands(unsigned vece, uint32_t dofs, uint32_t aofs,
132
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
133
{
134
TCGv_i64 tmp = tcg_temp_new_i64();
135
- gen_dup_i64(vece, tmp, c);
136
+ tcg_gen_dup_i64(vece, tmp, c);
137
tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ands);
138
tcg_temp_free_i64(tmp);
139
}
140
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_xors(unsigned vece, uint32_t dofs, uint32_t aofs,
141
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
142
{
143
TCGv_i64 tmp = tcg_temp_new_i64();
144
- gen_dup_i64(vece, tmp, c);
145
+ tcg_gen_dup_i64(vece, tmp, c);
146
tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_xors);
147
tcg_temp_free_i64(tmp);
148
}
149
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs,
150
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
151
{
152
TCGv_i64 tmp = tcg_temp_new_i64();
153
- gen_dup_i64(vece, tmp, c);
154
+ tcg_gen_dup_i64(vece, tmp, c);
155
tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ors);
156
tcg_temp_free_i64(tmp);
157
}
158
--
75
--
159
2.20.1
76
2.34.1
160
161
diff view generated by jsdifflib
1
If the guest makes an FPCXT_NS access when the FPU is disabled,
1
From: Richard Henderson <richard.henderson@linaro.org>
2
one of two things happens:
3
* if there is no active FP context, then the insn behaves the
4
same way as if the FPU was enabled: writes ignored, reads
5
same value as FPDSCR_NS
6
* if there is an active FP context, then we take a NOCP
7
exception
8
2
9
Add code to the sysreg read/write functions which emits
3
When we added SVE_MTEDESC_SHIFT, we effectively limited the
10
code to take the NOCP exception in the latter case.
4
maximum size of MTEDESC. Adjust SIZEM1 to consume the remaining
11
5
bits (32 - 10 - 5 - 12 == 5). Assert that the data to be stored
12
At the moment this will never be used, because the NOCP checks in
6
fits within the field (expecting 8 * 4 - 1 == 31, exact fit).
13
m-nocp.decode happen first, and so the trans functions are never
14
called when the FPU is disabled. The code will be needed when we
15
move the sysreg access insns to before the NOCP patterns in the
16
following commit.
17
7
18
Cc: qemu-stable@nongnu.org
8
Cc: qemu-stable@nongnu.org
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Tested-by: Gustavo Romero <gustavo.romero@linaro.org>
12
Message-id: 20240207025210.8837-4-richard.henderson@linaro.org
19
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
20
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
21
Message-id: 20210618141019.10671-3-peter.maydell@linaro.org
22
---
14
---
23
target/arm/translate-vfp.c | 32 ++++++++++++++++++++++++++++++--
15
target/arm/internals.h | 2 +-
24
1 file changed, 30 insertions(+), 2 deletions(-)
16
target/arm/tcg/translate-sve.c | 7 ++++---
17
2 files changed, 5 insertions(+), 4 deletions(-)
25
18
26
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
19
diff --git a/target/arm/internals.h b/target/arm/internals.h
27
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
28
--- a/target/arm/translate-vfp.c
21
--- a/target/arm/internals.h
29
+++ b/target/arm/translate-vfp.c
22
+++ b/target/arm/internals.h
30
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
23
@@ -XXX,XX +XXX,XX @@ FIELD(MTEDESC, TBI, 4, 2)
31
lab_end = gen_new_label();
24
FIELD(MTEDESC, TCMA, 6, 2)
32
/* fpInactive case: write is a NOP, so branch to end */
25
FIELD(MTEDESC, WRITE, 8, 1)
33
gen_branch_fpInactive(s, TCG_COND_NE, lab_end);
26
FIELD(MTEDESC, ALIGN, 9, 3)
34
- /* !fpInactive: PreserveFPState(), and reads same as FPCXT_S */
27
-FIELD(MTEDESC, SIZEM1, 12, SIMD_DATA_BITS - 12) /* size - 1 */
35
+ /*
28
+FIELD(MTEDESC, SIZEM1, 12, SIMD_DATA_BITS - SVE_MTEDESC_SHIFT - 12) /* size - 1 */
36
+ * !fpInactive: if FPU disabled, take NOCP exception;
29
37
+ * otherwise PreserveFPState(), and then FPCXT_NS writes
30
bool mte_probe(CPUARMState *env, uint32_t desc, uint64_t ptr);
38
+ * behave the same as FPCXT_S writes.
31
uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra);
39
+ */
32
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
40
+ if (s->fp_excp_el) {
33
index XXXXXXX..XXXXXXX 100644
41
+ gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
34
--- a/target/arm/tcg/translate-sve.c
42
+ syn_uncategorized(), s->fp_excp_el);
35
+++ b/target/arm/tcg/translate-sve.c
43
+ /*
36
@@ -XXX,XX +XXX,XX @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
44
+ * This was only a conditional exception, so override
37
{
45
+ * gen_exception_insn()'s default to DISAS_NORETURN
38
unsigned vsz = vec_full_reg_size(s);
46
+ */
39
TCGv_ptr t_pg;
47
+ s->base.is_jmp = DISAS_NEXT;
40
+ uint32_t sizem1;
48
+ break;
41
int desc = 0;
49
+ }
42
50
gen_preserve_fp_state(s);
43
assert(mte_n >= 1 && mte_n <= 4);
51
/* fall through */
44
+ sizem1 = (mte_n << dtype_msz(dtype)) - 1;
52
case ARM_VFP_FPCXT_S:
45
+ assert(sizem1 <= R_MTEDESC_SIZEM1_MASK >> R_MTEDESC_SIZEM1_SHIFT);
53
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
46
if (s->mte_active[0]) {
54
tcg_gen_br(lab_end);
47
- int msz = dtype_msz(dtype);
55
48
-
56
gen_set_label(lab_active);
49
desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
57
- /* !fpInactive: Reads the same as FPCXT_S, but side effects differ */
50
desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
58
+ /*
51
desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
59
+ * !fpInactive: if FPU disabled, take NOCP exception;
52
desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
60
+ * otherwise PreserveFPState(), and then FPCXT_NS
53
- desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
61
+ * reads the same as FPCXT_S.
54
+ desc = FIELD_DP32(desc, MTEDESC, SIZEM1, sizem1);
62
+ */
55
desc <<= SVE_MTEDESC_SHIFT;
63
+ if (s->fp_excp_el) {
56
} else {
64
+ gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
57
addr = clean_data_tbi(s, addr);
65
+ syn_uncategorized(), s->fp_excp_el);
66
+ /*
67
+ * This was only a conditional exception, so override
68
+ * gen_exception_insn()'s default to DISAS_NORETURN
69
+ */
70
+ s->base.is_jmp = DISAS_NEXT;
71
+ break;
72
+ }
73
gen_preserve_fp_state(s);
74
tmp = tcg_temp_new_i32();
75
sfpa = tcg_temp_new_i32();
76
--
58
--
77
2.20.1
59
2.34.1
78
79
diff view generated by jsdifflib
1
Implement the MVE VADC and VSBC insns. These perform an
1
From: Richard Henderson <richard.henderson@linaro.org>
2
add-with-carry or subtract-with-carry of the 32-bit elements in each
3
lane of the input vectors, where the carry-out of each add is the
4
carry-in of the next. The initial carry input is either 1 or is from
5
FPSCR.C; the carry out at the end is written back to FPSCR.C.
6
2
3
Share code that creates mtedesc and embeds within simd_desc.
4
5
Cc: qemu-stable@nongnu.org
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Tested-by: Gustavo Romero <gustavo.romero@linaro.org>
9
Message-id: 20240207025210.8837-5-richard.henderson@linaro.org
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20210617121628.20116-41-peter.maydell@linaro.org
10
---
11
---
11
target/arm/helper-mve.h | 5 ++++
12
target/arm/tcg/translate-a64.h | 2 ++
12
target/arm/mve.decode | 5 ++++
13
target/arm/tcg/translate-sme.c | 15 +++--------
13
target/arm/mve_helper.c | 52 ++++++++++++++++++++++++++++++++++++++
14
target/arm/tcg/translate-sve.c | 47 ++++++++++++++++++----------------
14
target/arm/translate-mve.c | 37 +++++++++++++++++++++++++++
15
3 files changed, 31 insertions(+), 33 deletions(-)
15
4 files changed, 99 insertions(+)
16
16
17
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
17
diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h
18
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper-mve.h
19
--- a/target/arm/tcg/translate-a64.h
20
+++ b/target/arm/helper-mve.h
20
+++ b/target/arm/tcg/translate-a64.h
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vrhaddub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
21
@@ -XXX,XX +XXX,XX @@ bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
22
DEF_HELPER_FLAGS_4(mve_vrhadduh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
22
bool sve_access_check(DisasContext *s);
23
DEF_HELPER_FLAGS_4(mve_vrhadduw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
23
bool sme_enabled_check(DisasContext *s);
24
24
bool sme_enabled_check_with_svcr(DisasContext *s, unsigned);
25
+DEF_HELPER_FLAGS_4(mve_vadc, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
25
+uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs,
26
+DEF_HELPER_FLAGS_4(mve_vadci, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
26
+ uint32_t msz, bool is_write, uint32_t data);
27
+DEF_HELPER_FLAGS_4(mve_vsbc, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
27
28
+DEF_HELPER_FLAGS_4(mve_vsbci, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
28
/* This function corresponds to CheckStreamingSVEEnabled. */
29
static inline bool sme_sm_enabled_check(DisasContext *s)
30
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/tcg/translate-sme.c
33
+++ b/target/arm/tcg/translate-sme.c
34
@@ -XXX,XX +XXX,XX @@ static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
35
36
TCGv_ptr t_za, t_pg;
37
TCGv_i64 addr;
38
- int svl, desc = 0;
39
+ uint32_t desc;
40
bool be = s->be_data == MO_BE;
41
bool mte = s->mte_active[0];
42
43
@@ -XXX,XX +XXX,XX @@ static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
44
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz);
45
tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
46
47
- if (mte) {
48
- desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
49
- desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
50
- desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
51
- desc = FIELD_DP32(desc, MTEDESC, WRITE, a->st);
52
- desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << a->esz) - 1);
53
- desc <<= SVE_MTEDESC_SHIFT;
54
- } else {
55
+ if (!mte) {
56
addr = clean_data_tbi(s, addr);
57
}
58
- svl = streaming_vec_reg_size(s);
59
- desc = simd_desc(svl, svl, desc);
29
+
60
+
30
DEF_HELPER_FLAGS_4(mve_vadd_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
61
+ desc = make_svemte_desc(s, streaming_vec_reg_size(s), 1, a->esz, a->st, 0);
31
DEF_HELPER_FLAGS_4(mve_vadd_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
62
32
DEF_HELPER_FLAGS_4(mve_vadd_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
63
fns[a->esz][be][a->v][mte][a->st](tcg_env, t_za, t_pg, addr,
33
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
64
tcg_constant_i32(desc));
65
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
34
index XXXXXXX..XXXXXXX 100644
66
index XXXXXXX..XXXXXXX 100644
35
--- a/target/arm/mve.decode
67
--- a/target/arm/tcg/translate-sve.c
36
+++ b/target/arm/mve.decode
68
+++ b/target/arm/tcg/translate-sve.c
37
@@ -XXX,XX +XXX,XX @@ VQDMULLT 111 . 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 1 @2op_sz28
69
@@ -XXX,XX +XXX,XX @@ static const uint8_t dtype_esz[16] = {
38
VRHADD_S 111 0 1111 0 . .. ... 0 ... 0 0001 . 1 . 0 ... 0 @2op
70
3, 2, 1, 3
39
VRHADD_U 111 1 1111 0 . .. ... 0 ... 0 0001 . 1 . 0 ... 0 @2op
71
};
40
72
41
+VADC 1110 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 0 @2op_nosz
73
-static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
42
+VSBC 1111 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 0 @2op_nosz
74
- int dtype, uint32_t mte_n, bool is_write,
43
+VADCI 1110 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 0 @2op_nosz
75
- gen_helper_gvec_mem *fn)
44
+VSBCI 1111 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 0 @2op_nosz
76
+uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs,
77
+ uint32_t msz, bool is_write, uint32_t data)
78
{
79
- unsigned vsz = vec_full_reg_size(s);
80
- TCGv_ptr t_pg;
81
uint32_t sizem1;
82
- int desc = 0;
83
+ uint32_t desc = 0;
84
85
- assert(mte_n >= 1 && mte_n <= 4);
86
- sizem1 = (mte_n << dtype_msz(dtype)) - 1;
87
+ /* Assert all of the data fits, with or without MTE enabled. */
88
+ assert(nregs >= 1 && nregs <= 4);
89
+ sizem1 = (nregs << msz) - 1;
90
assert(sizem1 <= R_MTEDESC_SIZEM1_MASK >> R_MTEDESC_SIZEM1_SHIFT);
91
+ assert(data < 1u << SVE_MTEDESC_SHIFT);
45
+
92
+
46
# Vector miscellaneous
93
if (s->mte_active[0]) {
47
94
desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
48
VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
95
desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
49
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
96
@@ -XXX,XX +XXX,XX @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
50
index XXXXXXX..XXXXXXX 100644
97
desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
51
--- a/target/arm/mve_helper.c
98
desc = FIELD_DP32(desc, MTEDESC, SIZEM1, sizem1);
52
+++ b/target/arm/mve_helper.c
99
desc <<= SVE_MTEDESC_SHIFT;
53
@@ -XXX,XX +XXX,XX @@ DO_2OP_U(vrshlu, DO_VRSHLU)
100
- } else {
54
DO_2OP_S(vrhadds, DO_RHADD_S)
55
DO_2OP_U(vrhaddu, DO_RHADD_U)
56
57
+static void do_vadc(CPUARMState *env, uint32_t *d, uint32_t *n, uint32_t *m,
58
+ uint32_t inv, uint32_t carry_in, bool update_flags)
59
+{
60
+ uint16_t mask = mve_element_mask(env);
61
+ unsigned e;
62
+
63
+ /* If any additions trigger, we will update flags. */
64
+ if (mask & 0x1111) {
65
+ update_flags = true;
66
+ }
101
+ }
67
+
102
+ return simd_desc(vsz, vsz, desc | data);
68
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) {
69
+ uint64_t r = carry_in;
70
+ r += n[H4(e)];
71
+ r += m[H4(e)] ^ inv;
72
+ if (mask & 1) {
73
+ carry_in = r >> 32;
74
+ }
75
+ mergemask(&d[H4(e)], r, mask);
76
+ }
77
+
78
+ if (update_flags) {
79
+ /* Store C, clear NZV. */
80
+ env->vfp.xregs[ARM_VFP_FPSCR] &= ~FPCR_NZCV_MASK;
81
+ env->vfp.xregs[ARM_VFP_FPSCR] |= carry_in * FPCR_C;
82
+ }
83
+ mve_advance_vpt(env);
84
+}
103
+}
85
+
104
+
86
+void HELPER(mve_vadc)(CPUARMState *env, void *vd, void *vn, void *vm)
105
+static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
106
+ int dtype, uint32_t nregs, bool is_write,
107
+ gen_helper_gvec_mem *fn)
87
+{
108
+{
88
+ bool carry_in = env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_C;
109
+ TCGv_ptr t_pg;
89
+ do_vadc(env, vd, vn, vm, 0, carry_in, false);
110
+ uint32_t desc;
90
+}
91
+
111
+
92
+void HELPER(mve_vsbc)(CPUARMState *env, void *vd, void *vn, void *vm)
112
+ if (!s->mte_active[0]) {
93
+{
113
addr = clean_data_tbi(s, addr);
94
+ bool carry_in = env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_C;
114
}
95
+ do_vadc(env, vd, vn, vm, -1, carry_in, false);
115
96
+}
116
@@ -XXX,XX +XXX,XX @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
117
* registers as pointers, so encode the regno into the data field.
118
* For consistency, do this even for LD1.
119
*/
120
- desc = simd_desc(vsz, vsz, zt | desc);
121
+ desc = make_svemte_desc(s, vec_full_reg_size(s), nregs,
122
+ dtype_msz(dtype), is_write, zt);
123
t_pg = tcg_temp_new_ptr();
124
125
tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
126
@@ -XXX,XX +XXX,XX @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
127
int scale, TCGv_i64 scalar, int msz, bool is_write,
128
gen_helper_gvec_mem_scatter *fn)
129
{
130
- unsigned vsz = vec_full_reg_size(s);
131
TCGv_ptr t_zm = tcg_temp_new_ptr();
132
TCGv_ptr t_pg = tcg_temp_new_ptr();
133
TCGv_ptr t_zt = tcg_temp_new_ptr();
134
- int desc = 0;
135
-
136
- if (s->mte_active[0]) {
137
- desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
138
- desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
139
- desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
140
- desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
141
- desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
142
- desc <<= SVE_MTEDESC_SHIFT;
143
- }
144
- desc = simd_desc(vsz, vsz, desc | scale);
145
+ uint32_t desc;
146
147
tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
148
tcg_gen_addi_ptr(t_zm, tcg_env, vec_full_reg_offset(s, zm));
149
tcg_gen_addi_ptr(t_zt, tcg_env, vec_full_reg_offset(s, zt));
97
+
150
+
98
+
151
+ desc = make_svemte_desc(s, vec_full_reg_size(s), 1, msz, is_write, scale);
99
+void HELPER(mve_vadci)(CPUARMState *env, void *vd, void *vn, void *vm)
152
fn(tcg_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
100
+{
101
+ do_vadc(env, vd, vn, vm, 0, 0, true);
102
+}
103
+
104
+void HELPER(mve_vsbci)(CPUARMState *env, void *vd, void *vn, void *vm)
105
+{
106
+ do_vadc(env, vd, vn, vm, -1, 1, true);
107
+}
108
+
109
static inline int32_t do_sat_bhw(int64_t val, int64_t min, int64_t max, bool *s)
110
{
111
if (val > max) {
112
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
113
index XXXXXXX..XXXXXXX 100644
114
--- a/target/arm/translate-mve.c
115
+++ b/target/arm/translate-mve.c
116
@@ -XXX,XX +XXX,XX @@ static bool trans_VQDMULLT(DisasContext *s, arg_2op *a)
117
return do_2op(s, a, fns[a->size]);
118
}
153
}
119
154
120
+/*
121
+ * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
122
+ * of the 32-bit elements in each lane of the input vectors, where the
123
+ * carry-out of each add is the carry-in of the next. The initial carry
124
+ * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
125
+ * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
126
+ * These insns are subject to beat-wise execution. Partial execution
127
+ * of an I=1 (initial carry input fixed) insn which does not
128
+ * execute the first beat must start with the current FPSCR.NZCV
129
+ * value, not the fixed constant input.
130
+ */
131
+static bool trans_VADC(DisasContext *s, arg_2op *a)
132
+{
133
+ return do_2op(s, a, gen_helper_mve_vadc);
134
+}
135
+
136
+static bool trans_VADCI(DisasContext *s, arg_2op *a)
137
+{
138
+ if (mve_skip_first_beat(s)) {
139
+ return trans_VADC(s, a);
140
+ }
141
+ return do_2op(s, a, gen_helper_mve_vadci);
142
+}
143
+
144
+static bool trans_VSBC(DisasContext *s, arg_2op *a)
145
+{
146
+ return do_2op(s, a, gen_helper_mve_vsbc);
147
+}
148
+
149
+static bool trans_VSBCI(DisasContext *s, arg_2op *a)
150
+{
151
+ if (mve_skip_first_beat(s)) {
152
+ return trans_VSBC(s, a);
153
+ }
154
+ return do_2op(s, a, gen_helper_mve_vsbci);
155
+}
156
+
157
static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
158
MVEGenTwoOpScalarFn fn)
159
{
160
--
155
--
161
2.20.1
156
2.34.1
162
163
diff view generated by jsdifflib
1
Implement the MVE VCADD insn, which performs a complex add with
1
From: Richard Henderson <richard.henderson@linaro.org>
2
rotate. Note that the size=0b11 encoding is VSBC.
3
2
4
The architecture grants some leeway for the "destination and Vm
3
These functions "use the standard load helpers", but
5
source overlap" case for the size MO_32 case, but we choose not to
4
fail to clean_data_tbi or populate mtedesc.
6
make use of it, instead always calculating all 16 bytes worth of
7
results before setting the destination register.
8
5
6
Cc: qemu-stable@nongnu.org
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Tested-by: Gustavo Romero <gustavo.romero@linaro.org>
10
Message-id: 20240207025210.8837-6-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20210617121628.20116-42-peter.maydell@linaro.org
12
---
12
---
13
target/arm/helper-mve.h | 8 ++++++++
13
target/arm/tcg/translate-sve.c | 15 +++++++++++++--
14
target/arm/mve.decode | 9 +++++++--
14
1 file changed, 13 insertions(+), 2 deletions(-)
15
target/arm/mve_helper.c | 29 +++++++++++++++++++++++++++++
16
target/arm/translate-mve.c | 7 +++++++
17
4 files changed, 51 insertions(+), 2 deletions(-)
18
15
19
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
16
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
20
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/helper-mve.h
18
--- a/target/arm/tcg/translate-sve.c
22
+++ b/target/arm/helper-mve.h
19
+++ b/target/arm/tcg/translate-sve.c
23
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vadci, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
20
@@ -XXX,XX +XXX,XX @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
24
DEF_HELPER_FLAGS_4(mve_vsbc, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
21
unsigned vsz = vec_full_reg_size(s);
25
DEF_HELPER_FLAGS_4(mve_vsbci, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
22
TCGv_ptr t_pg;
26
23
int poff;
27
+DEF_HELPER_FLAGS_4(mve_vcadd90b, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
24
+ uint32_t desc;
28
+DEF_HELPER_FLAGS_4(mve_vcadd90h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
25
29
+DEF_HELPER_FLAGS_4(mve_vcadd90w, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
26
/* Load the first quadword using the normal predicated load helpers. */
30
+
27
+ if (!s->mte_active[0]) {
31
+DEF_HELPER_FLAGS_4(mve_vcadd270b, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
28
+ addr = clean_data_tbi(s, addr);
32
+DEF_HELPER_FLAGS_4(mve_vcadd270h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
33
+DEF_HELPER_FLAGS_4(mve_vcadd270w, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
34
+
35
DEF_HELPER_FLAGS_4(mve_vadd_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
36
DEF_HELPER_FLAGS_4(mve_vadd_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
37
DEF_HELPER_FLAGS_4(mve_vadd_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
38
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
39
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/mve.decode
41
+++ b/target/arm/mve.decode
42
@@ -XXX,XX +XXX,XX @@ VRHADD_S 111 0 1111 0 . .. ... 0 ... 0 0001 . 1 . 0 ... 0 @2op
43
VRHADD_U 111 1 1111 0 . .. ... 0 ... 0 0001 . 1 . 0 ... 0 @2op
44
45
VADC 1110 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 0 @2op_nosz
46
-VSBC 1111 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 0 @2op_nosz
47
VADCI 1110 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 0 @2op_nosz
48
-VSBCI 1111 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 0 @2op_nosz
49
+
50
+{
51
+ VSBC 1111 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 0 @2op_nosz
52
+ VSBCI 1111 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 0 @2op_nosz
53
+ VCADD90 1111 1110 0 . .. ... 0 ... 0 1111 . 0 . 0 ... 0 @2op
54
+ VCADD270 1111 1110 0 . .. ... 0 ... 1 1111 . 0 . 0 ... 0 @2op
55
+}
56
57
# Vector miscellaneous
58
59
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
60
index XXXXXXX..XXXXXXX 100644
61
--- a/target/arm/mve_helper.c
62
+++ b/target/arm/mve_helper.c
63
@@ -XXX,XX +XXX,XX @@ void HELPER(mve_vsbci)(CPUARMState *env, void *vd, void *vn, void *vm)
64
do_vadc(env, vd, vn, vm, -1, 1, true);
65
}
66
67
+#define DO_VCADD(OP, ESIZE, TYPE, FN0, FN1) \
68
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, void *vm) \
69
+ { \
70
+ TYPE *d = vd, *n = vn, *m = vm; \
71
+ uint16_t mask = mve_element_mask(env); \
72
+ unsigned e; \
73
+ TYPE r[16 / ESIZE]; \
74
+ /* Calculate all results first to avoid overwriting inputs */ \
75
+ for (e = 0; e < 16 / ESIZE; e++) { \
76
+ if (!(e & 1)) { \
77
+ r[e] = FN0(n[H##ESIZE(e)], m[H##ESIZE(e + 1)]); \
78
+ } else { \
79
+ r[e] = FN1(n[H##ESIZE(e)], m[H##ESIZE(e - 1)]); \
80
+ } \
81
+ } \
82
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
83
+ mergemask(&d[H##ESIZE(e)], r[e], mask); \
84
+ } \
85
+ mve_advance_vpt(env); \
86
+ }
29
+ }
87
+
30
+
88
+#define DO_VCADD_ALL(OP, FN0, FN1) \
31
poff = pred_full_reg_offset(s, pg);
89
+ DO_VCADD(OP##b, 1, int8_t, FN0, FN1) \
32
if (vsz > 16) {
90
+ DO_VCADD(OP##h, 2, int16_t, FN0, FN1) \
33
/*
91
+ DO_VCADD(OP##w, 4, int32_t, FN0, FN1)
34
@@ -XXX,XX +XXX,XX @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
92
+
35
93
+DO_VCADD_ALL(vcadd90, DO_SUB, DO_ADD)
36
gen_helper_gvec_mem *fn
94
+DO_VCADD_ALL(vcadd270, DO_ADD, DO_SUB)
37
= ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
95
+
38
- fn(tcg_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
96
static inline int32_t do_sat_bhw(int64_t val, int64_t min, int64_t max, bool *s)
39
+ desc = make_svemte_desc(s, 16, 1, dtype_msz(dtype), false, zt);
97
{
40
+ fn(tcg_env, t_pg, addr, tcg_constant_i32(desc));
98
if (val > max) {
41
99
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
42
/* Replicate that first quadword. */
100
index XXXXXXX..XXXXXXX 100644
43
if (vsz > 16) {
101
--- a/target/arm/translate-mve.c
44
@@ -XXX,XX +XXX,XX @@ static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
102
+++ b/target/arm/translate-mve.c
45
unsigned vsz_r32;
103
@@ -XXX,XX +XXX,XX @@ DO_2OP(VQRDMLSDH, vqrdmlsdh)
46
TCGv_ptr t_pg;
104
DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
47
int poff, doff;
105
DO_2OP(VRHADD_S, vrhadds)
48
+ uint32_t desc;
106
DO_2OP(VRHADD_U, vrhaddu)
49
107
+/*
50
if (vsz < 32) {
108
+ * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
51
/*
109
+ * so we can reuse the DO_2OP macro. (Our implementation calculates the
52
@@ -XXX,XX +XXX,XX @@ static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
110
+ * "expected" results in this case.)
53
}
111
+ */
54
112
+DO_2OP(VCADD90, vcadd90)
55
/* Load the first octaword using the normal predicated load helpers. */
113
+DO_2OP(VCADD270, vcadd270)
56
+ if (!s->mte_active[0]) {
114
57
+ addr = clean_data_tbi(s, addr);
115
static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
58
+ }
116
{
59
60
poff = pred_full_reg_offset(s, pg);
61
if (vsz > 32) {
62
@@ -XXX,XX +XXX,XX @@ static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
63
64
gen_helper_gvec_mem *fn
65
= ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
66
- fn(tcg_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
67
+ desc = make_svemte_desc(s, 32, 1, dtype_msz(dtype), false, zt);
68
+ fn(tcg_env, t_pg, addr, tcg_constant_i32(desc));
69
70
/*
71
* Replicate that first octaword.
117
--
72
--
118
2.20.1
73
2.34.1
119
120
diff view generated by jsdifflib
1
vfp_access_check and its helper routine full_vfp_access_check() has
1
From: Richard Henderson <richard.henderson@linaro.org>
2
gradually grown and is now an awkward mix of A-profile only and
3
M-profile only pieces. Refactor it into an A-profile only and an
4
M-profile only version, taking advantage of the fact that now the
5
only direct call to full_vfp_access_check() is in A-profile-only
6
code.
7
2
3
The TBI and TCMA bits are located within mtedesc, not desc.
4
5
Cc: qemu-stable@nongnu.org
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Tested-by: Gustavo Romero <gustavo.romero@linaro.org>
9
Message-id: 20240207025210.8837-7-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-id: 20210618141019.10671-7-peter.maydell@linaro.org
11
---
11
---
12
target/arm/translate-vfp.c | 79 +++++++++++++++++++++++---------------
12
target/arm/tcg/sme_helper.c | 8 ++++----
13
1 file changed, 48 insertions(+), 31 deletions(-)
13
target/arm/tcg/sve_helper.c | 12 ++++++------
14
2 files changed, 10 insertions(+), 10 deletions(-)
14
15
15
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
16
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
16
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/translate-vfp.c
18
--- a/target/arm/tcg/sme_helper.c
18
+++ b/target/arm/translate-vfp.c
19
+++ b/target/arm/tcg/sme_helper.c
19
@@ -XXX,XX +XXX,XX @@ static void gen_update_fp_context(DisasContext *s)
20
@@ -XXX,XX +XXX,XX @@ void sme_ld1_mte(CPUARMState *env, void *za, uint64_t *vg,
20
}
21
desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
21
22
22
/*
23
/* Perform gross MTE suppression early. */
23
- * Check that VFP access is enabled. If it is, do the necessary
24
- if (!tbi_check(desc, bit55) ||
24
- * M-profile lazy-FP handling and then return true.
25
- tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
25
- * If not, emit code to generate an appropriate exception and
26
+ if (!tbi_check(mtedesc, bit55) ||
26
- * return false.
27
+ tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) {
27
+ * Check that VFP access is enabled, A-profile specific version.
28
mtedesc = 0;
28
+ *
29
+ * If VFP is enabled, return true. If not, emit code to generate an
30
+ * appropriate exception and return false.
31
* The ignore_vfp_enabled argument specifies that we should ignore
32
- * whether VFP is enabled via FPEXC[EN]: this should be true for FMXR/FMRX
33
+ * whether VFP is enabled via FPEXC.EN: this should be true for FMXR/FMRX
34
* accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
35
*/
36
-static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
37
+static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled)
38
{
39
if (s->fp_excp_el) {
40
- if (arm_dc_feature(s, ARM_FEATURE_M)) {
41
- /*
42
- * M-profile mostly catches the "FPU disabled" case early, in
43
- * disas_m_nocp(), but a few insns (eg LCTP, WLSTP, DLSTP)
44
- * which do coprocessor-checks are outside the large ranges of
45
- * the encoding space handled by the patterns in m-nocp.decode,
46
- * and for them we may need to raise NOCP here.
47
- */
48
- gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
49
- syn_uncategorized(), s->fp_excp_el);
50
- } else {
51
- gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
52
- syn_fp_access_trap(1, 0xe, false),
53
- s->fp_excp_el);
54
- }
55
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
56
+ syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
57
return false;
58
}
29
}
59
30
60
@@ -XXX,XX +XXX,XX @@ static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
31
@@ -XXX,XX +XXX,XX @@ void sme_st1_mte(CPUARMState *env, void *za, uint64_t *vg, target_ulong addr,
61
unallocated_encoding(s);
32
desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
62
return false;
33
34
/* Perform gross MTE suppression early. */
35
- if (!tbi_check(desc, bit55) ||
36
- tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
37
+ if (!tbi_check(mtedesc, bit55) ||
38
+ tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) {
39
mtedesc = 0;
63
}
40
}
64
+ return true;
41
65
+}
42
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
66
43
index XXXXXXX..XXXXXXX 100644
67
- if (arm_dc_feature(s, ARM_FEATURE_M)) {
44
--- a/target/arm/tcg/sve_helper.c
68
- /* Handle M-profile lazy FP state mechanics */
45
+++ b/target/arm/tcg/sve_helper.c
69
-
46
@@ -XXX,XX +XXX,XX @@ void sve_ldN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr,
70
- /* Trigger lazy-state preservation if necessary */
47
desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
71
- gen_preserve_fp_state(s);
48
72
-
49
/* Perform gross MTE suppression early. */
73
- /* Update ownership of FP context and create new FP context if needed */
50
- if (!tbi_check(desc, bit55) ||
74
- gen_update_fp_context(s);
51
- tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
75
+/*
52
+ if (!tbi_check(mtedesc, bit55) ||
76
+ * Check that VFP access is enabled, M-profile specific version.
53
+ tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) {
77
+ *
54
mtedesc = 0;
78
+ * If VFP is enabled, do the necessary M-profile lazy-FP handling and then
79
+ * return true. If not, emit code to generate an appropriate exception and
80
+ * return false.
81
+ */
82
+static bool vfp_access_check_m(DisasContext *s)
83
+{
84
+ if (s->fp_excp_el) {
85
+ /*
86
+ * M-profile mostly catches the "FPU disabled" case early, in
87
+ * disas_m_nocp(), but a few insns (eg LCTP, WLSTP, DLSTP)
88
+ * which do coprocessor-checks are outside the large ranges of
89
+ * the encoding space handled by the patterns in m-nocp.decode,
90
+ * and for them we may need to raise NOCP here.
91
+ */
92
+ gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
93
+ syn_uncategorized(), s->fp_excp_el);
94
+ return false;
95
}
55
}
96
56
97
+ /* Handle M-profile lazy FP state mechanics */
57
@@ -XXX,XX +XXX,XX @@ void sve_ldnfff1_r_mte(CPUARMState *env, void *vg, target_ulong addr,
98
+
58
desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
99
+ /* Trigger lazy-state preservation if necessary */
59
100
+ gen_preserve_fp_state(s);
60
/* Perform gross MTE suppression early. */
101
+
61
- if (!tbi_check(desc, bit55) ||
102
+ /* Update ownership of FP context and create new FP context if needed */
62
- tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
103
+ gen_update_fp_context(s);
63
+ if (!tbi_check(mtedesc, bit55) ||
104
+
64
+ tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) {
105
return true;
65
mtedesc = 0;
106
}
107
108
@@ -XXX,XX +XXX,XX @@ static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
109
*/
110
bool vfp_access_check(DisasContext *s)
111
{
112
- return full_vfp_access_check(s, false);
113
+ if (arm_dc_feature(s, ARM_FEATURE_M)) {
114
+ return vfp_access_check_m(s);
115
+ } else {
116
+ return vfp_access_check_a(s, false);
117
+ }
118
}
119
120
static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
121
@@ -XXX,XX +XXX,XX @@ static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
122
return false;
123
}
66
}
124
67
125
- if (!full_vfp_access_check(s, ignore_vfp_enabled)) {
68
@@ -XXX,XX +XXX,XX @@ void sve_stN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr,
126
+ /*
69
desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
127
+ * Call vfp_access_check_a() directly, because we need to tell
70
128
+ * it to ignore FPEXC.EN for some register accesses.
71
/* Perform gross MTE suppression early. */
129
+ */
72
- if (!tbi_check(desc, bit55) ||
130
+ if (!vfp_access_check_a(s, ignore_vfp_enabled)) {
73
- tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
131
return true;
74
+ if (!tbi_check(mtedesc, bit55) ||
75
+ tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) {
76
mtedesc = 0;
132
}
77
}
133
78
134
--
79
--
135
2.20.1
80
2.34.1
136
137
diff view generated by jsdifflib
1
These days the Arm architecture has a wide range of fine-grained
1
The raven_io_ops MemoryRegionOps is the only one in the source tree
2
optional extra architectural features. We implement quite a lot
2
which sets .valid.unaligned to indicate that it should support
3
of these but by no means all of them. Document what we do implement,
3
unaligned accesses and which does not also set .impl.unaligned to
4
so that users can find out without having to dig through back-issues
4
indicate that its read and write functions can do the unaligned
5
of our Changelog on the wiki.
5
handling themselves. This is a problem, because at the moment the
6
core memory system does not implement the support for handling
7
unaligned accesses by doing a series of aligned accesses and
8
combining them (system/memory.c:access_with_adjusted_size() has a
9
TODO comment noting this).
6
10
11
Fortunately raven_io_read() and raven_io_write() will correctly deal
12
with the case of being passed an unaligned address, so we can fix the
13
missing unaligned access support by setting .impl.unaligned in the
14
MemoryRegionOps struct.
15
16
Fixes: 9a1839164c9c8f06 ("raven: Implement non-contiguous I/O region")
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
18
Tested-by: Cédric Le Goater <clg@redhat.com>
9
Message-id: 20210617140328.28622-1-peter.maydell@linaro.org
19
Reviewed-by: Cédric Le Goater <clg@redhat.com>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
20
Message-id: 20240112134640.1775041-1-peter.maydell@linaro.org
11
---
21
---
12
docs/system/arm/emulation.rst | 102 ++++++++++++++++++++++++++++++++++
22
hw/pci-host/raven.c | 1 +
13
docs/system/target-arm.rst | 6 ++
23
1 file changed, 1 insertion(+)
14
2 files changed, 108 insertions(+)
15
create mode 100644 docs/system/arm/emulation.rst
16
24
17
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
25
diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c
18
new file mode 100644
19
index XXXXXXX..XXXXXXX
20
--- /dev/null
21
+++ b/docs/system/arm/emulation.rst
22
@@ -XXX,XX +XXX,XX @@
23
+A-profile CPU architecture support
24
+==================================
25
+
26
+QEMU's TCG emulation includes support for the Armv5, Armv6, Armv7 and
27
+Armv8 versions of the A-profile architecture. It also has support for
28
+the following architecture extensions:
29
+
30
+- FEAT_AA32BF16 (AArch32 BFloat16 instructions)
31
+- FEAT_AA32HPD (AArch32 hierarchical permission disables)
32
+- FEAT_AA32I8MM (AArch32 Int8 matrix multiplication instructions)
33
+- FEAT_AES (AESD and AESE instructions)
34
+- FEAT_BF16 (AArch64 BFloat16 instructions)
35
+- FEAT_BTI (Branch Target Identification)
36
+- FEAT_DIT (Data Independent Timing instructions)
37
+- FEAT_DPB (DC CVAP instruction)
38
+- FEAT_DotProd (Advanced SIMD dot product instructions)
39
+- FEAT_FCMA (Floating-point complex number instructions)
40
+- FEAT_FHM (Floating-point half-precision multiplication instructions)
41
+- FEAT_FP16 (Half-precision floating-point data processing)
42
+- FEAT_FRINTTS (Floating-point to integer instructions)
43
+- FEAT_FlagM (Flag manipulation instructions v2)
44
+- FEAT_FlagM2 (Enhancements to flag manipulation instructions)
45
+- FEAT_HPDS (Hierarchical permission disables)
46
+- FEAT_I8MM (AArch64 Int8 matrix multiplication instructions)
47
+- FEAT_JSCVT (JavaScript conversion instructions)
48
+- FEAT_LOR (Limited ordering regions)
49
+- FEAT_LRCPC (Load-acquire RCpc instructions)
50
+- FEAT_LRCPC2 (Load-acquire RCpc instructions v2)
51
+- FEAT_LSE (Large System Extensions)
52
+- FEAT_MTE (Memory Tagging Extension)
53
+- FEAT_MTE2 (Memory Tagging Extension)
54
+- FEAT_PAN (Privileged access never)
55
+- FEAT_PAN2 (AT S1E1R and AT S1E1W instruction variants affected by PSTATE.PAN)
56
+- FEAT_PAuth (Pointer authentication)
57
+- FEAT_PMULL (PMULL, PMULL2 instructions)
58
+- FEAT_PMUv3p1 (PMU Extensions v3.1)
59
+- FEAT_PMUv3p4 (PMU Extensions v3.4)
60
+- FEAT_RDM (Advanced SIMD rounding double multiply accumulate instructions)
61
+- FEAT_RNG (Random number generator)
62
+- FEAT_SB (Speculation Barrier)
63
+- FEAT_SEL2 (Secure EL2)
64
+- FEAT_SHA1 (SHA1 instructions)
65
+- FEAT_SHA256 (SHA256 instructions)
66
+- FEAT_SHA3 (Advanced SIMD SHA3 instructions)
67
+- FEAT_SHA512 (Advanced SIMD SHA512 instructions)
68
+- FEAT_SM3 (Advanced SIMD SM3 instructions)
69
+- FEAT_SM4 (Advanced SIMD SM4 instructions)
70
+- FEAT_SPECRES (Speculation restriction instructions)
71
+- FEAT_SSBS (Speculative Store Bypass Safe)
72
+- FEAT_TLBIOS (TLB invalidate instructions in Outer Shareable domain)
73
+- FEAT_TLBIRANGE (TLB invalidate range instructions)
74
+- FEAT_TTCNP (Translation table Common not private translations)
75
+- FEAT_TTST (Small translation tables)
76
+- FEAT_UAO (Unprivileged Access Override control)
77
+- FEAT_VHE (Virtualization Host Extensions)
78
+- FEAT_VMID16 (16-bit VMID)
79
+- FEAT_XNX (Translation table stage 2 Unprivileged Execute-never)
80
+- SVE (The Scalable Vector Extension)
81
+- SVE2 (The Scalable Vector Extension v2)
82
+
83
+For information on the specifics of these extensions, please refer
84
+to the `Armv8-A Arm Architecture Reference Manual
85
+<https://developer.arm.com/documentation/ddi0487/latest>`_.
86
+
87
+When a specific named CPU is being emulated, only those features which
88
+are present in hardware for that CPU are emulated. (If a feature is
89
+not in the list above then it is not supported, even if the real
90
+hardware should have it.) The ``max`` CPU enables all features.
91
+
92
+R-profile CPU architecture support
93
+==================================
94
+
95
+QEMU's TCG emulation support for R-profile CPUs is currently limited.
96
+We emulate only the Cortex-R5 and Cortex-R5F CPUs.
97
+
98
+M-profile CPU architecture support
99
+==================================
100
+
101
+QEMU's TCG emulation includes support for Armv6-M, Armv7-M, Armv8-M, and
102
+Armv8.1-M versions of the M-profile architucture. It also has support
103
+for the following architecture extensions:
104
+
105
+- FP (Floating-point Extension)
106
+- FPCXT (FPCXT access instructions)
107
+- HP (Half-precision floating-point instructions)
108
+- LOB (Low Overhead loops and Branch future)
109
+- M (Main Extension)
110
+- MPU (Memory Protection Unit Extension)
111
+- PXN (Privileged Execute Never)
112
+- RAS (Reliability, Serviceability and Availability): "minimum RAS Extension" only
113
+- S (Security Extension)
114
+- ST (System Timer Extension)
115
+
116
+For information on the specifics of these extensions, please refer
117
+to the `Armv8-M Arm Architecture Reference Manual
118
+<https://developer.arm.com/documentation/ddi0553/latest>`_.
119
+
120
+When a specific named CPU is being emulated, only those features which
121
+are present in hardware for that CPU are emulated. (If a feature is
122
+not in the list above then it is not supported, even if the real
123
+hardware should have it.) There is no equivalent of the ``max`` CPU for
124
+M-profile.
125
diff --git a/docs/system/target-arm.rst b/docs/system/target-arm.rst
126
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
127
--- a/docs/system/target-arm.rst
27
--- a/hw/pci-host/raven.c
128
+++ b/docs/system/target-arm.rst
28
+++ b/hw/pci-host/raven.c
129
@@ -XXX,XX +XXX,XX @@ undocumented; you can get a complete list by running
29
@@ -XXX,XX +XXX,XX @@ static const MemoryRegionOps raven_io_ops = {
130
arm/virt
30
.write = raven_io_write,
131
arm/xlnx-versal-virt
31
.endianness = DEVICE_LITTLE_ENDIAN,
132
32
.impl.max_access_size = 4,
133
+Emulated CPU architecture support
33
+ .impl.unaligned = true,
134
+=================================
34
.valid.unaligned = true,
135
+
35
};
136
+.. toctree::
137
+ arm/emulation
138
+
139
Arm CPU features
140
================
141
36
142
--
37
--
143
2.20.1
38
2.34.1
144
39
145
40
diff view generated by jsdifflib
1
Implement the MVE VRHADD insn, which performs a rounded halving
1
Suppress the deprecation warning when we're running under qtest,
2
addition.
2
to avoid "make check" including warning messages in its output.
3
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Message-id: 20210617121628.20116-40-peter.maydell@linaro.org
6
Message-id: 20240206154151.155620-1-peter.maydell@linaro.org
7
---
7
---
8
target/arm/helper-mve.h | 8 ++++++++
8
hw/block/tc58128.c | 4 +++-
9
target/arm/mve.decode | 3 +++
9
1 file changed, 3 insertions(+), 1 deletion(-)
10
target/arm/mve_helper.c | 6 ++++++
11
target/arm/translate-mve.c | 2 ++
12
4 files changed, 19 insertions(+)
13
10
14
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
11
diff --git a/hw/block/tc58128.c b/hw/block/tc58128.c
15
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-mve.h
13
--- a/hw/block/tc58128.c
17
+++ b/target/arm/helper-mve.h
14
+++ b/hw/block/tc58128.c
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vqdmullbw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
15
@@ -XXX,XX +XXX,XX @@ static sh7750_io_device tc58128 = {
19
DEF_HELPER_FLAGS_4(mve_vqdmullth, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
16
20
DEF_HELPER_FLAGS_4(mve_vqdmulltw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
17
int tc58128_init(struct SH7750State *s, const char *zone1, const char *zone2)
21
22
+DEF_HELPER_FLAGS_4(mve_vrhaddsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
23
+DEF_HELPER_FLAGS_4(mve_vrhaddsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
24
+DEF_HELPER_FLAGS_4(mve_vrhaddsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
25
+
26
+DEF_HELPER_FLAGS_4(mve_vrhaddub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
27
+DEF_HELPER_FLAGS_4(mve_vrhadduh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
28
+DEF_HELPER_FLAGS_4(mve_vrhadduw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
29
+
30
DEF_HELPER_FLAGS_4(mve_vadd_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
31
DEF_HELPER_FLAGS_4(mve_vadd_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
32
DEF_HELPER_FLAGS_4(mve_vadd_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
33
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
34
index XXXXXXX..XXXXXXX 100644
35
--- a/target/arm/mve.decode
36
+++ b/target/arm/mve.decode
37
@@ -XXX,XX +XXX,XX @@ VQRDMLSDHX 1111 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 1 @2op
38
VQDMULLB 111 . 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 1 @2op_sz28
39
VQDMULLT 111 . 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 1 @2op_sz28
40
41
+VRHADD_S 111 0 1111 0 . .. ... 0 ... 0 0001 . 1 . 0 ... 0 @2op
42
+VRHADD_U 111 1 1111 0 . .. ... 0 ... 0 0001 . 1 . 0 ... 0 @2op
43
+
44
# Vector miscellaneous
45
46
VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
47
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/mve_helper.c
50
+++ b/target/arm/mve_helper.c
51
@@ -XXX,XX +XXX,XX @@ DO_2OP_U(vshlu, DO_VSHLU)
52
DO_2OP_S(vrshls, DO_VRSHLS)
53
DO_2OP_U(vrshlu, DO_VRSHLU)
54
55
+#define DO_RHADD_S(N, M) (((int64_t)(N) + (M) + 1) >> 1)
56
+#define DO_RHADD_U(N, M) (((uint64_t)(N) + (M) + 1) >> 1)
57
+
58
+DO_2OP_S(vrhadds, DO_RHADD_S)
59
+DO_2OP_U(vrhaddu, DO_RHADD_U)
60
+
61
static inline int32_t do_sat_bhw(int64_t val, int64_t min, int64_t max, bool *s)
62
{
18
{
63
if (val > max) {
19
- warn_report_once("The TC58128 flash device is deprecated");
64
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
20
+ if (!qtest_enabled()) {
65
index XXXXXXX..XXXXXXX 100644
21
+ warn_report_once("The TC58128 flash device is deprecated");
66
--- a/target/arm/translate-mve.c
22
+ }
67
+++ b/target/arm/translate-mve.c
23
init_dev(&tc58128_devs[0], zone1);
68
@@ -XXX,XX +XXX,XX @@ DO_2OP(VQDMLSDH, vqdmlsdh)
24
init_dev(&tc58128_devs[1], zone2);
69
DO_2OP(VQDMLSDHX, vqdmlsdhx)
25
return sh7750_register_io_device(s, &tc58128);
70
DO_2OP(VQRDMLSDH, vqrdmlsdh)
71
DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
72
+DO_2OP(VRHADD_S, vrhadds)
73
+DO_2OP(VRHADD_U, vrhaddu)
74
75
static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
76
{
77
--
26
--
78
2.20.1
27
2.34.1
79
28
80
29
diff view generated by jsdifflib
1
Implement the vector form of the MVE VQDMULL insn.
1
We deliberately don't include qtests_npcm7xx in qtests_aarch64,
2
because we already get the coverage of those tests via qtests_arm,
3
and we don't want to use extra CI minutes testing them twice.
2
4
5
In commit 327b680877b79c4b we added it to qtests_aarch64; revert
6
that change.
7
8
Fixes: 327b680877b79c4b ("tests/qtest: Creating qtest for GMAC Module")
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-id: 20210617121628.20116-39-peter.maydell@linaro.org
11
Message-id: 20240206163043.315535-1-peter.maydell@linaro.org
6
---
12
---
7
target/arm/helper-mve.h | 5 +++++
13
tests/qtest/meson.build | 1 -
8
target/arm/mve.decode | 5 +++++
14
1 file changed, 1 deletion(-)
9
target/arm/mve_helper.c | 30 ++++++++++++++++++++++++++++++
10
target/arm/translate-mve.c | 30 ++++++++++++++++++++++++++++++
11
4 files changed, 70 insertions(+)
12
15
13
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
16
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
14
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/helper-mve.h
18
--- a/tests/qtest/meson.build
16
+++ b/target/arm/helper-mve.h
19
+++ b/tests/qtest/meson.build
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vqrdmlsdhxb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
20
@@ -XXX,XX +XXX,XX @@ qtests_aarch64 = \
18
DEF_HELPER_FLAGS_4(mve_vqrdmlsdhxh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
21
(config_all_devices.has_key('CONFIG_RASPI') ? ['bcm2835-dma-test'] : []) + \
19
DEF_HELPER_FLAGS_4(mve_vqrdmlsdhxw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
22
(config_all_accel.has_key('CONFIG_TCG') and \
20
23
config_all_devices.has_key('CONFIG_TPM_TIS_I2C') ? ['tpm-tis-i2c-test'] : []) + \
21
+DEF_HELPER_FLAGS_4(mve_vqdmullbh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
24
- (config_all_devices.has_key('CONFIG_NPCM7XX') ? qtests_npcm7xx : []) + \
22
+DEF_HELPER_FLAGS_4(mve_vqdmullbw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
25
['arm-cpu-features',
23
+DEF_HELPER_FLAGS_4(mve_vqdmullth, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
26
'numa-test',
24
+DEF_HELPER_FLAGS_4(mve_vqdmulltw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
27
'boot-serial-test',
25
+
26
DEF_HELPER_FLAGS_4(mve_vadd_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
27
DEF_HELPER_FLAGS_4(mve_vadd_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
28
DEF_HELPER_FLAGS_4(mve_vadd_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
29
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/mve.decode
32
+++ b/target/arm/mve.decode
33
@@ -XXX,XX +XXX,XX @@
34
@1op_nosz .... .... .... .... .... .... .... .... &1op qd=%qd qm=%qm size=0
35
@2op .... .... .. size:2 .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn
36
@2op_nosz .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn size=0
37
+@2op_sz28 .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn \
38
+ size=%size_28
39
40
# The _rev suffix indicates that Vn and Vm are reversed. This is
41
# the case for shifts. In the Arm ARM these insns are documented
42
@@ -XXX,XX +XXX,XX @@ VQDMLSDHX 1111 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 0 @2op
43
VQRDMLSDH 1111 1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 1 @2op
44
VQRDMLSDHX 1111 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 1 @2op
45
46
+VQDMULLB 111 . 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 1 @2op_sz28
47
+VQDMULLT 111 . 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 1 @2op_sz28
48
+
49
# Vector miscellaneous
50
51
VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
52
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/target/arm/mve_helper.c
55
+++ b/target/arm/mve_helper.c
56
@@ -XXX,XX +XXX,XX @@ DO_2OP_SAT_SCALAR_L(vqdmullt_scalarh, 1, 2, int16_t, 4, int32_t, \
57
DO_2OP_SAT_SCALAR_L(vqdmullt_scalarw, 1, 4, int32_t, 8, int64_t, \
58
do_qdmullw, SATMASK32)
59
60
+/*
61
+ * Long saturating ops
62
+ */
63
+#define DO_2OP_SAT_L(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN, SATMASK) \
64
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
65
+ void *vm) \
66
+ { \
67
+ LTYPE *d = vd; \
68
+ TYPE *n = vn, *m = vm; \
69
+ uint16_t mask = mve_element_mask(env); \
70
+ unsigned le; \
71
+ bool qc = false; \
72
+ for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
73
+ bool sat = false; \
74
+ LTYPE op1 = n[H##ESIZE(le * 2 + TOP)]; \
75
+ LTYPE op2 = m[H##ESIZE(le * 2 + TOP)]; \
76
+ mergemask(&d[H##LESIZE(le)], FN(op1, op2, &sat), mask); \
77
+ qc |= sat && (mask & SATMASK); \
78
+ } \
79
+ if (qc) { \
80
+ env->vfp.qc[0] = qc; \
81
+ } \
82
+ mve_advance_vpt(env); \
83
+ }
84
+
85
+DO_2OP_SAT_L(vqdmullbh, 0, 2, int16_t, 4, int32_t, do_qdmullh, SATMASK16B)
86
+DO_2OP_SAT_L(vqdmullbw, 0, 4, int32_t, 8, int64_t, do_qdmullw, SATMASK32)
87
+DO_2OP_SAT_L(vqdmullth, 1, 2, int16_t, 4, int32_t, do_qdmullh, SATMASK16T)
88
+DO_2OP_SAT_L(vqdmulltw, 1, 4, int32_t, 8, int64_t, do_qdmullw, SATMASK32)
89
+
90
static inline uint32_t do_vbrsrb(uint32_t n, uint32_t m)
91
{
92
m &= 0xff;
93
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
94
index XXXXXXX..XXXXXXX 100644
95
--- a/target/arm/translate-mve.c
96
+++ b/target/arm/translate-mve.c
97
@@ -XXX,XX +XXX,XX @@ DO_2OP(VQDMLSDHX, vqdmlsdhx)
98
DO_2OP(VQRDMLSDH, vqrdmlsdh)
99
DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
100
101
+static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
102
+{
103
+ static MVEGenTwoOpFn * const fns[] = {
104
+ NULL,
105
+ gen_helper_mve_vqdmullbh,
106
+ gen_helper_mve_vqdmullbw,
107
+ NULL,
108
+ };
109
+ if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
110
+ /* UNPREDICTABLE; we choose to undef */
111
+ return false;
112
+ }
113
+ return do_2op(s, a, fns[a->size]);
114
+}
115
+
116
+static bool trans_VQDMULLT(DisasContext *s, arg_2op *a)
117
+{
118
+ static MVEGenTwoOpFn * const fns[] = {
119
+ NULL,
120
+ gen_helper_mve_vqdmullth,
121
+ gen_helper_mve_vqdmulltw,
122
+ NULL,
123
+ };
124
+ if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
125
+ /* UNPREDICTABLE; we choose to undef */
126
+ return false;
127
+ }
128
+ return do_2op(s, a, fns[a->size]);
129
+}
130
+
131
static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
132
MVEGenTwoOpScalarFn fn)
133
{
134
--
28
--
135
2.20.1
29
2.34.1
136
30
137
31
diff view generated by jsdifflib
1
Implement the MVE VQDMLSDH and VQRDMLSDH insns, which are
1
Allow changes to the virt GTDT -- we are going to add the IRQ
2
like VQDMLADH and VQRDMLADH except that products are subtracted
2
entry for a new timer to it.
3
rather than added.
4
3
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
7
Message-id: 20210617121628.20116-38-peter.maydell@linaro.org
6
Message-id: 20240122143537.233498-2-peter.maydell@linaro.org
8
---
7
---
9
target/arm/helper-mve.h | 16 ++++++++++++++
8
tests/qtest/bios-tables-test-allowed-diff.h | 2 ++
10
target/arm/mve.decode | 5 +++++
9
1 file changed, 2 insertions(+)
11
target/arm/mve_helper.c | 44 ++++++++++++++++++++++++++++++++++++++
12
target/arm/translate-mve.c | 4 ++++
13
4 files changed, 69 insertions(+)
14
10
15
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
11
diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h
16
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/helper-mve.h
13
--- a/tests/qtest/bios-tables-test-allowed-diff.h
18
+++ b/target/arm/helper-mve.h
14
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vqrdmladhxb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
15
@@ -1 +1,3 @@
20
DEF_HELPER_FLAGS_4(mve_vqrdmladhxh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
16
/* List of comma-separated changed AML files to ignore */
21
DEF_HELPER_FLAGS_4(mve_vqrdmladhxw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
17
+"tests/data/acpi/virt/FACP",
22
18
+"tests/data/acpi/virt/GTDT",
23
+DEF_HELPER_FLAGS_4(mve_vqdmlsdhb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
24
+DEF_HELPER_FLAGS_4(mve_vqdmlsdhh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
25
+DEF_HELPER_FLAGS_4(mve_vqdmlsdhw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
26
+
27
+DEF_HELPER_FLAGS_4(mve_vqdmlsdhxb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
28
+DEF_HELPER_FLAGS_4(mve_vqdmlsdhxh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
29
+DEF_HELPER_FLAGS_4(mve_vqdmlsdhxw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
30
+
31
+DEF_HELPER_FLAGS_4(mve_vqrdmlsdhb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
32
+DEF_HELPER_FLAGS_4(mve_vqrdmlsdhh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
33
+DEF_HELPER_FLAGS_4(mve_vqrdmlsdhw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
34
+
35
+DEF_HELPER_FLAGS_4(mve_vqrdmlsdhxb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
36
+DEF_HELPER_FLAGS_4(mve_vqrdmlsdhxh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
37
+DEF_HELPER_FLAGS_4(mve_vqrdmlsdhxw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
38
+
39
DEF_HELPER_FLAGS_4(mve_vadd_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
40
DEF_HELPER_FLAGS_4(mve_vadd_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
41
DEF_HELPER_FLAGS_4(mve_vadd_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
42
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/mve.decode
45
+++ b/target/arm/mve.decode
46
@@ -XXX,XX +XXX,XX @@ VQDMLADHX 1110 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 0 @2op
47
VQRDMLADH 1110 1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 1 @2op
48
VQRDMLADHX 1110 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 1 @2op
49
50
+VQDMLSDH 1111 1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 0 @2op
51
+VQDMLSDHX 1111 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 0 @2op
52
+VQRDMLSDH 1111 1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 1 @2op
53
+VQRDMLSDHX 1111 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 1 @2op
54
+
55
# Vector miscellaneous
56
57
VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
58
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/target/arm/mve_helper.c
61
+++ b/target/arm/mve_helper.c
62
@@ -XXX,XX +XXX,XX @@ static int32_t do_vqdmladh_w(int32_t a, int32_t b, int32_t c, int32_t d,
63
return r >> 32;
64
}
65
66
+static int8_t do_vqdmlsdh_b(int8_t a, int8_t b, int8_t c, int8_t d,
67
+ int round, bool *sat)
68
+{
69
+ int64_t r = ((int64_t)a * b - (int64_t)c * d) * 2 + (round << 7);
70
+ return do_sat_bhw(r, INT16_MIN, INT16_MAX, sat) >> 8;
71
+}
72
+
73
+static int16_t do_vqdmlsdh_h(int16_t a, int16_t b, int16_t c, int16_t d,
74
+ int round, bool *sat)
75
+{
76
+ int64_t r = ((int64_t)a * b - (int64_t)c * d) * 2 + (round << 15);
77
+ return do_sat_bhw(r, INT32_MIN, INT32_MAX, sat) >> 16;
78
+}
79
+
80
+static int32_t do_vqdmlsdh_w(int32_t a, int32_t b, int32_t c, int32_t d,
81
+ int round, bool *sat)
82
+{
83
+ int64_t m1 = (int64_t)a * b;
84
+ int64_t m2 = (int64_t)c * d;
85
+ int64_t r;
86
+ /* The same ordering issue as in do_vqdmladh_w applies here too */
87
+ if (ssub64_overflow(m1, m2, &r) ||
88
+ sadd64_overflow(r, (round << 30), &r) ||
89
+ sadd64_overflow(r, r, &r)) {
90
+ *sat = true;
91
+ return r < 0 ? INT32_MAX : INT32_MIN;
92
+ }
93
+ return r >> 32;
94
+}
95
+
96
DO_VQDMLADH_OP(vqdmladhb, 1, int8_t, 0, 0, do_vqdmladh_b)
97
DO_VQDMLADH_OP(vqdmladhh, 2, int16_t, 0, 0, do_vqdmladh_h)
98
DO_VQDMLADH_OP(vqdmladhw, 4, int32_t, 0, 0, do_vqdmladh_w)
99
@@ -XXX,XX +XXX,XX @@ DO_VQDMLADH_OP(vqrdmladhxb, 1, int8_t, 1, 1, do_vqdmladh_b)
100
DO_VQDMLADH_OP(vqrdmladhxh, 2, int16_t, 1, 1, do_vqdmladh_h)
101
DO_VQDMLADH_OP(vqrdmladhxw, 4, int32_t, 1, 1, do_vqdmladh_w)
102
103
+DO_VQDMLADH_OP(vqdmlsdhb, 1, int8_t, 0, 0, do_vqdmlsdh_b)
104
+DO_VQDMLADH_OP(vqdmlsdhh, 2, int16_t, 0, 0, do_vqdmlsdh_h)
105
+DO_VQDMLADH_OP(vqdmlsdhw, 4, int32_t, 0, 0, do_vqdmlsdh_w)
106
+DO_VQDMLADH_OP(vqdmlsdhxb, 1, int8_t, 1, 0, do_vqdmlsdh_b)
107
+DO_VQDMLADH_OP(vqdmlsdhxh, 2, int16_t, 1, 0, do_vqdmlsdh_h)
108
+DO_VQDMLADH_OP(vqdmlsdhxw, 4, int32_t, 1, 0, do_vqdmlsdh_w)
109
+
110
+DO_VQDMLADH_OP(vqrdmlsdhb, 1, int8_t, 0, 1, do_vqdmlsdh_b)
111
+DO_VQDMLADH_OP(vqrdmlsdhh, 2, int16_t, 0, 1, do_vqdmlsdh_h)
112
+DO_VQDMLADH_OP(vqrdmlsdhw, 4, int32_t, 0, 1, do_vqdmlsdh_w)
113
+DO_VQDMLADH_OP(vqrdmlsdhxb, 1, int8_t, 1, 1, do_vqdmlsdh_b)
114
+DO_VQDMLADH_OP(vqrdmlsdhxh, 2, int16_t, 1, 1, do_vqdmlsdh_h)
115
+DO_VQDMLADH_OP(vqrdmlsdhxw, 4, int32_t, 1, 1, do_vqdmlsdh_w)
116
+
117
#define DO_2OP_SCALAR(OP, ESIZE, TYPE, FN) \
118
void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
119
uint32_t rm) \
120
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
121
index XXXXXXX..XXXXXXX 100644
122
--- a/target/arm/translate-mve.c
123
+++ b/target/arm/translate-mve.c
124
@@ -XXX,XX +XXX,XX @@ DO_2OP(VQDMLADH, vqdmladh)
125
DO_2OP(VQDMLADHX, vqdmladhx)
126
DO_2OP(VQRDMLADH, vqrdmladh)
127
DO_2OP(VQRDMLADHX, vqrdmladhx)
128
+DO_2OP(VQDMLSDH, vqdmlsdh)
129
+DO_2OP(VQDMLSDHX, vqdmlsdhx)
130
+DO_2OP(VQRDMLSDH, vqrdmlsdh)
131
+DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
132
133
static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
134
MVEGenTwoOpScalarFn fn)
135
--
19
--
136
2.20.1
20
2.34.1
137
138
diff view generated by jsdifflib
1
Implement the MVE VQDMLADH and VQRDMLADH insns. These multiply
1
Armv8.1+ CPUs have the Virtual Host Extension (VHE) which adds a
2
elements, and then add pairs of products, double, possibly round,
2
non-secure EL2 virtual timer. We implemented the timer itself in the
3
saturate and return the high half of the result.
3
CPU model, but never wired up its IRQ line to the GIC.
4
5
Wire up the IRQ line (this is always safe whether the CPU has the
6
interrupt or not, since it always creates the outbound IRQ line).
7
Report it to the guest via dtb and ACPI if the CPU has the feature.
8
9
The DTB binding is documented in the kernel's
10
Documentation/devicetree/bindings/timer/arm\,arch_timer.yaml
11
and the ACPI table entries are documented in the ACPI specification
12
version 6.3 or later.
13
14
Because the IRQ line ACPI binding is new in 6.3, we need to bump the
15
FADT table rev to show that we might be using 6.3 features.
16
17
Note that exposing this IRQ in the DTB will trigger a bug in EDK2
18
versions prior to edk2-stable202311, for users who use the virt board
19
with 'virtualization=on' to enable EL2 emulation and are booting an
20
EDK2 guest BIOS, if that EDK2 has assertions enabled. The effect is
21
that EDK2 will assert on bootup:
22
23
ASSERT [ArmTimerDxe] /home/kraxel/projects/qemu/roms/edk2/ArmVirtPkg/Library/ArmVirtTimerFdtClientLib/ArmVirtTimerFdtClientLib.c(72): PropSize == 36 || PropSize == 48
24
25
If you see that assertion you should do one of:
26
* update your EDK2 binaries to edk2-stable202311 or newer
27
* use the 'virt-8.2' versioned machine type
28
* not use 'virtualization=on'
29
30
(The versions shipped with QEMU itself have the fix.)
4
31
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
32
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
33
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
7
Message-id: 20210617121628.20116-37-peter.maydell@linaro.org
34
Message-id: 20240122143537.233498-3-peter.maydell@linaro.org
8
---
35
---
9
target/arm/helper-mve.h | 16 +++++++
36
include/hw/arm/virt.h | 2 ++
10
target/arm/mve.decode | 5 +++
37
hw/arm/virt-acpi-build.c | 20 ++++++++++----
11
target/arm/mve_helper.c | 89 ++++++++++++++++++++++++++++++++++++++
38
hw/arm/virt.c | 60 ++++++++++++++++++++++++++++++++++------
12
target/arm/translate-mve.c | 4 ++
39
3 files changed, 67 insertions(+), 15 deletions(-)
13
4 files changed, 114 insertions(+)
40
14
41
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
15
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
16
index XXXXXXX..XXXXXXX 100644
42
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/helper-mve.h
43
--- a/include/hw/arm/virt.h
18
+++ b/target/arm/helper-mve.h
44
+++ b/include/hw/arm/virt.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vqrshlub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
45
@@ -XXX,XX +XXX,XX @@ struct VirtMachineClass {
20
DEF_HELPER_FLAGS_4(mve_vqrshluh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
46
/* Machines < 6.2 have no support for describing cpu topology to guest */
21
DEF_HELPER_FLAGS_4(mve_vqrshluw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
47
bool no_cpu_topology;
22
48
bool no_tcg_lpa2;
23
+DEF_HELPER_FLAGS_4(mve_vqdmladhb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
49
+ bool no_ns_el2_virt_timer_irq;
24
+DEF_HELPER_FLAGS_4(mve_vqdmladhh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
50
};
25
+DEF_HELPER_FLAGS_4(mve_vqdmladhw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
51
26
+
52
struct VirtMachineState {
27
+DEF_HELPER_FLAGS_4(mve_vqdmladhxb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
53
@@ -XXX,XX +XXX,XX @@ struct VirtMachineState {
28
+DEF_HELPER_FLAGS_4(mve_vqdmladhxh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
54
PCIBus *bus;
29
+DEF_HELPER_FLAGS_4(mve_vqdmladhxw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
55
char *oem_id;
30
+
56
char *oem_table_id;
31
+DEF_HELPER_FLAGS_4(mve_vqrdmladhb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
57
+ bool ns_el2_virt_timer_irq;
32
+DEF_HELPER_FLAGS_4(mve_vqrdmladhh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
58
};
33
+DEF_HELPER_FLAGS_4(mve_vqrdmladhw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
59
34
+
60
#define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM)
35
+DEF_HELPER_FLAGS_4(mve_vqrdmladhxb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
61
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
36
+DEF_HELPER_FLAGS_4(mve_vqrdmladhxh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
37
+DEF_HELPER_FLAGS_4(mve_vqrdmladhxw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
38
+
39
DEF_HELPER_FLAGS_4(mve_vadd_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
40
DEF_HELPER_FLAGS_4(mve_vadd_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
41
DEF_HELPER_FLAGS_4(mve_vadd_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
42
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
43
index XXXXXXX..XXXXXXX 100644
62
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/mve.decode
63
--- a/hw/arm/virt-acpi-build.c
45
+++ b/target/arm/mve.decode
64
+++ b/hw/arm/virt-acpi-build.c
46
@@ -XXX,XX +XXX,XX @@ VQSHL_U 111 1 1111 0 . .. ... 0 ... 0 0100 . 1 . 1 ... 0 @2op_rev
65
@@ -XXX,XX +XXX,XX @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
47
VQRSHL_S 111 0 1111 0 . .. ... 0 ... 0 0101 . 1 . 1 ... 0 @2op_rev
66
}
48
VQRSHL_U 111 1 1111 0 . .. ... 0 ... 0 0101 . 1 . 1 ... 0 @2op_rev
67
49
68
/*
50
+VQDMLADH 1110 1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 0 @2op
69
- * ACPI spec, Revision 5.1
51
+VQDMLADHX 1110 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 0 @2op
70
- * 5.2.24 Generic Timer Description Table (GTDT)
52
+VQRDMLADH 1110 1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 1 @2op
71
+ * ACPI spec, Revision 6.5
53
+VQRDMLADHX 1110 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 1 @2op
72
+ * 5.2.25 Generic Timer Description Table (GTDT)
54
+
73
*/
55
# Vector miscellaneous
74
static void
56
75
build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
57
VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
76
@@ -XXX,XX +XXX,XX @@ build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
58
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
77
uint32_t irqflags = vmc->claim_edge_triggered_timers ?
78
1 : /* Interrupt is Edge triggered */
79
0; /* Interrupt is Level triggered */
80
- AcpiTable table = { .sig = "GTDT", .rev = 2, .oem_id = vms->oem_id,
81
+ AcpiTable table = { .sig = "GTDT", .rev = 3, .oem_id = vms->oem_id,
82
.oem_table_id = vms->oem_table_id };
83
84
acpi_table_begin(&table, table_data);
85
@@ -XXX,XX +XXX,XX @@ build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
86
build_append_int_noprefix(table_data, 0, 4);
87
/* Platform Timer Offset */
88
build_append_int_noprefix(table_data, 0, 4);
89
-
90
+ if (vms->ns_el2_virt_timer_irq) {
91
+ /* Virtual EL2 Timer GSIV */
92
+ build_append_int_noprefix(table_data, ARCH_TIMER_NS_EL2_VIRT_IRQ, 4);
93
+ /* Virtual EL2 Timer Flags */
94
+ build_append_int_noprefix(table_data, irqflags, 4);
95
+ } else {
96
+ build_append_int_noprefix(table_data, 0, 4);
97
+ build_append_int_noprefix(table_data, 0, 4);
98
+ }
99
acpi_table_end(linker, &table);
100
}
101
102
@@ -XXX,XX +XXX,XX @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
103
static void build_fadt_rev6(GArray *table_data, BIOSLinker *linker,
104
VirtMachineState *vms, unsigned dsdt_tbl_offset)
105
{
106
- /* ACPI v6.0 */
107
+ /* ACPI v6.3 */
108
AcpiFadtData fadt = {
109
.rev = 6,
110
- .minor_ver = 0,
111
+ .minor_ver = 3,
112
.flags = 1 << ACPI_FADT_F_HW_REDUCED_ACPI,
113
.xdsdt_tbl_offset = &dsdt_tbl_offset,
114
};
115
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
59
index XXXXXXX..XXXXXXX 100644
116
index XXXXXXX..XXXXXXX 100644
60
--- a/target/arm/mve_helper.c
117
--- a/hw/arm/virt.c
61
+++ b/target/arm/mve_helper.c
118
+++ b/hw/arm/virt.c
62
@@ -XXX,XX +XXX,XX @@ DO_2OP_SAT_U(vqshlu, DO_UQSHL_OP)
119
@@ -XXX,XX +XXX,XX @@ static void create_randomness(MachineState *ms, const char *node)
63
DO_2OP_SAT_S(vqrshls, DO_SQRSHL_OP)
120
qemu_fdt_setprop(ms->fdt, node, "rng-seed", seed.rng, sizeof(seed.rng));
64
DO_2OP_SAT_U(vqrshlu, DO_UQRSHL_OP)
121
}
65
122
66
+/*
123
+/*
67
+ * Multiply add dual returning high half
124
+ * The CPU object always exposes the NS EL2 virt timer IRQ line,
68
+ * The 'FN' here takes four inputs A, B, C, D, a 0/1 indicator of
125
+ * but we don't want to advertise it to the guest in the dtb or ACPI
69
+ * whether to add the rounding constant, and the pointer to the
126
+ * table unless it's really going to do something.
70
+ * saturation flag, and should do "(A * B + C * D) * 2 + rounding constant",
71
+ * saturate to twice the input size and return the high half; or
72
+ * (A * B - C * D) etc for VQDMLSDH.
73
+ */
127
+ */
74
+#define DO_VQDMLADH_OP(OP, ESIZE, TYPE, XCHG, ROUND, FN) \
128
+static bool ns_el2_virt_timer_present(void)
75
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
129
+{
76
+ void *vm) \
130
+ ARMCPU *cpu = ARM_CPU(qemu_get_cpu(0));
77
+ { \
131
+ CPUARMState *env = &cpu->env;
78
+ TYPE *d = vd, *n = vn, *m = vm; \
132
+
79
+ uint16_t mask = mve_element_mask(env); \
133
+ return arm_feature(env, ARM_FEATURE_AARCH64) &&
80
+ unsigned e; \
134
+ arm_feature(env, ARM_FEATURE_EL2) && cpu_isar_feature(aa64_vh, cpu);
81
+ bool qc = false; \
135
+}
82
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
136
+
83
+ bool sat = false; \
137
static void create_fdt(VirtMachineState *vms)
84
+ if ((e & 1) == XCHG) { \
138
{
85
+ TYPE r = FN(n[H##ESIZE(e)], \
139
MachineState *ms = MACHINE(vms);
86
+ m[H##ESIZE(e - XCHG)], \
140
@@ -XXX,XX +XXX,XX @@ static void fdt_add_timer_nodes(const VirtMachineState *vms)
87
+ n[H##ESIZE(e + (1 - 2 * XCHG))], \
141
"arm,armv7-timer");
88
+ m[H##ESIZE(e + (1 - XCHG))], \
142
}
89
+ ROUND, &sat); \
143
qemu_fdt_setprop(ms->fdt, "/timer", "always-on", NULL, 0);
90
+ mergemask(&d[H##ESIZE(e)], r, mask); \
144
- qemu_fdt_setprop_cells(ms->fdt, "/timer", "interrupts",
91
+ qc |= sat & mask & 1; \
145
- GIC_FDT_IRQ_TYPE_PPI,
92
+ } \
146
- INTID_TO_PPI(ARCH_TIMER_S_EL1_IRQ), irqflags,
93
+ } \
147
- GIC_FDT_IRQ_TYPE_PPI,
94
+ if (qc) { \
148
- INTID_TO_PPI(ARCH_TIMER_NS_EL1_IRQ), irqflags,
95
+ env->vfp.qc[0] = qc; \
149
- GIC_FDT_IRQ_TYPE_PPI,
96
+ } \
150
- INTID_TO_PPI(ARCH_TIMER_VIRT_IRQ), irqflags,
97
+ mve_advance_vpt(env); \
151
- GIC_FDT_IRQ_TYPE_PPI,
152
- INTID_TO_PPI(ARCH_TIMER_NS_EL2_IRQ), irqflags);
153
+ if (vms->ns_el2_virt_timer_irq) {
154
+ qemu_fdt_setprop_cells(ms->fdt, "/timer", "interrupts",
155
+ GIC_FDT_IRQ_TYPE_PPI,
156
+ INTID_TO_PPI(ARCH_TIMER_S_EL1_IRQ), irqflags,
157
+ GIC_FDT_IRQ_TYPE_PPI,
158
+ INTID_TO_PPI(ARCH_TIMER_NS_EL1_IRQ), irqflags,
159
+ GIC_FDT_IRQ_TYPE_PPI,
160
+ INTID_TO_PPI(ARCH_TIMER_VIRT_IRQ), irqflags,
161
+ GIC_FDT_IRQ_TYPE_PPI,
162
+ INTID_TO_PPI(ARCH_TIMER_NS_EL2_IRQ), irqflags,
163
+ GIC_FDT_IRQ_TYPE_PPI,
164
+ INTID_TO_PPI(ARCH_TIMER_NS_EL2_VIRT_IRQ), irqflags);
165
+ } else {
166
+ qemu_fdt_setprop_cells(ms->fdt, "/timer", "interrupts",
167
+ GIC_FDT_IRQ_TYPE_PPI,
168
+ INTID_TO_PPI(ARCH_TIMER_S_EL1_IRQ), irqflags,
169
+ GIC_FDT_IRQ_TYPE_PPI,
170
+ INTID_TO_PPI(ARCH_TIMER_NS_EL1_IRQ), irqflags,
171
+ GIC_FDT_IRQ_TYPE_PPI,
172
+ INTID_TO_PPI(ARCH_TIMER_VIRT_IRQ), irqflags,
173
+ GIC_FDT_IRQ_TYPE_PPI,
174
+ INTID_TO_PPI(ARCH_TIMER_NS_EL2_IRQ), irqflags);
98
+ }
175
+ }
99
+
176
}
100
+static int8_t do_vqdmladh_b(int8_t a, int8_t b, int8_t c, int8_t d,
177
101
+ int round, bool *sat)
178
static void fdt_add_cpu_nodes(const VirtMachineState *vms)
102
+{
179
@@ -XXX,XX +XXX,XX @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem)
103
+ int64_t r = ((int64_t)a * b + (int64_t)c * d) * 2 + (round << 7);
180
[GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ,
104
+ return do_sat_bhw(r, INT16_MIN, INT16_MAX, sat) >> 8;
181
[GTIMER_HYP] = ARCH_TIMER_NS_EL2_IRQ,
105
+}
182
[GTIMER_SEC] = ARCH_TIMER_S_EL1_IRQ,
106
+
183
+ [GTIMER_HYPVIRT] = ARCH_TIMER_NS_EL2_VIRT_IRQ,
107
+static int16_t do_vqdmladh_h(int16_t a, int16_t b, int16_t c, int16_t d,
184
};
108
+ int round, bool *sat)
185
109
+{
186
for (unsigned irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) {
110
+ int64_t r = ((int64_t)a * b + (int64_t)c * d) * 2 + (round << 15);
187
@@ -XXX,XX +XXX,XX @@ static void machvirt_init(MachineState *machine)
111
+ return do_sat_bhw(r, INT32_MIN, INT32_MAX, sat) >> 16;
188
qdev_realize(DEVICE(cpuobj), NULL, &error_fatal);
112
+}
189
object_unref(cpuobj);
113
+
190
}
114
+static int32_t do_vqdmladh_w(int32_t a, int32_t b, int32_t c, int32_t d,
191
+
115
+ int round, bool *sat)
192
+ /* Now we've created the CPUs we can see if they have the hypvirt timer */
116
+{
193
+ vms->ns_el2_virt_timer_irq = ns_el2_virt_timer_present() &&
117
+ int64_t m1 = (int64_t)a * b;
194
+ !vmc->no_ns_el2_virt_timer_irq;
118
+ int64_t m2 = (int64_t)c * d;
195
+
119
+ int64_t r;
196
fdt_add_timer_nodes(vms);
197
fdt_add_cpu_nodes(vms);
198
199
@@ -XXX,XX +XXX,XX @@ DEFINE_VIRT_MACHINE_AS_LATEST(9, 0)
200
201
static void virt_machine_8_2_options(MachineClass *mc)
202
{
203
+ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
204
+
205
virt_machine_9_0_options(mc);
206
compat_props_add(mc->compat_props, hw_compat_8_2, hw_compat_8_2_len);
120
+ /*
207
+ /*
121
+ * Architecturally we should do the entire add, double, round
208
+ * Don't expose NS_EL2_VIRT timer IRQ in DTB on ACPI on 8.2 and
122
+ * and then check for saturation. We do three saturating adds,
209
+ * earlier machines. (Exposing it tickles a bug in older EDK2
123
+ * but we need to be careful about the order. If the first
210
+ * guest BIOS binaries.)
124
+ * m1 + m2 saturates then it's impossible for the *2+rc to
125
+ * bring it back into the non-saturated range. However, if
126
+ * m1 + m2 is negative then it's possible that doing the doubling
127
+ * would take the intermediate result below INT64_MAX and the
128
+ * addition of the rounding constant then brings it back in range.
129
+ * So we add half the rounding constant before doubling rather
130
+ * than adding the rounding constant after the doubling.
131
+ */
211
+ */
132
+ if (sadd64_overflow(m1, m2, &r) ||
212
+ vmc->no_ns_el2_virt_timer_irq = true;
133
+ sadd64_overflow(r, (round << 30), &r) ||
213
}
134
+ sadd64_overflow(r, r, &r)) {
214
DEFINE_VIRT_MACHINE(8, 2)
135
+ *sat = true;
215
136
+ return r < 0 ? INT32_MAX : INT32_MIN;
137
+ }
138
+ return r >> 32;
139
+}
140
+
141
+DO_VQDMLADH_OP(vqdmladhb, 1, int8_t, 0, 0, do_vqdmladh_b)
142
+DO_VQDMLADH_OP(vqdmladhh, 2, int16_t, 0, 0, do_vqdmladh_h)
143
+DO_VQDMLADH_OP(vqdmladhw, 4, int32_t, 0, 0, do_vqdmladh_w)
144
+DO_VQDMLADH_OP(vqdmladhxb, 1, int8_t, 1, 0, do_vqdmladh_b)
145
+DO_VQDMLADH_OP(vqdmladhxh, 2, int16_t, 1, 0, do_vqdmladh_h)
146
+DO_VQDMLADH_OP(vqdmladhxw, 4, int32_t, 1, 0, do_vqdmladh_w)
147
+
148
+DO_VQDMLADH_OP(vqrdmladhb, 1, int8_t, 0, 1, do_vqdmladh_b)
149
+DO_VQDMLADH_OP(vqrdmladhh, 2, int16_t, 0, 1, do_vqdmladh_h)
150
+DO_VQDMLADH_OP(vqrdmladhw, 4, int32_t, 0, 1, do_vqdmladh_w)
151
+DO_VQDMLADH_OP(vqrdmladhxb, 1, int8_t, 1, 1, do_vqdmladh_b)
152
+DO_VQDMLADH_OP(vqrdmladhxh, 2, int16_t, 1, 1, do_vqdmladh_h)
153
+DO_VQDMLADH_OP(vqrdmladhxw, 4, int32_t, 1, 1, do_vqdmladh_w)
154
+
155
#define DO_2OP_SCALAR(OP, ESIZE, TYPE, FN) \
156
void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
157
uint32_t rm) \
158
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
159
index XXXXXXX..XXXXXXX 100644
160
--- a/target/arm/translate-mve.c
161
+++ b/target/arm/translate-mve.c
162
@@ -XXX,XX +XXX,XX @@ DO_2OP(VQSHL_S, vqshls)
163
DO_2OP(VQSHL_U, vqshlu)
164
DO_2OP(VQRSHL_S, vqrshls)
165
DO_2OP(VQRSHL_U, vqrshlu)
166
+DO_2OP(VQDMLADH, vqdmladh)
167
+DO_2OP(VQDMLADHX, vqdmladhx)
168
+DO_2OP(VQRDMLADH, vqrdmladh)
169
+DO_2OP(VQRDMLADHX, vqrdmladhx)
170
171
static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
172
MVEGenTwoOpScalarFn fn)
173
--
216
--
174
2.20.1
217
2.34.1
175
176
diff view generated by jsdifflib
1
Implement the MVE VRSHL insn (vector form).
1
Update the virt golden reference files to say that the FACP is ACPI
2
v6.3, and the GTDT table is a revision 3 table with space for the
3
virtual EL2 timer.
4
5
Diffs from iasl:
6
7
@@ -XXX,XX +XXX,XX @@
8
/*
9
* Intel ACPI Component Architecture
10
* AML/ASL+ Disassembler version 20200925 (64-bit version)
11
* Copyright (c) 2000 - 2020 Intel Corporation
12
*
13
- * Disassembly of tests/data/acpi/virt/FACP, Mon Jan 22 13:48:40 2024
14
+ * Disassembly of /tmp/aml-W8RZH2, Mon Jan 22 13:48:40 2024
15
*
16
* ACPI Data Table [FACP]
17
*
18
* Format: [HexOffset DecimalOffset ByteLength] FieldName : FieldValue
19
*/
20
21
[000h 0000 4] Signature : "FACP" [Fixed ACPI Description Table (FADT)]
22
[004h 0004 4] Table Length : 00000114
23
[008h 0008 1] Revision : 06
24
-[009h 0009 1] Checksum : 15
25
+[009h 0009 1] Checksum : 12
26
[00Ah 0010 6] Oem ID : "BOCHS "
27
[010h 0016 8] Oem Table ID : "BXPC "
28
[018h 0024 4] Oem Revision : 00000001
29
[01Ch 0028 4] Asl Compiler ID : "BXPC"
30
[020h 0032 4] Asl Compiler Revision : 00000001
31
32
[024h 0036 4] FACS Address : 00000000
33
[028h 0040 4] DSDT Address : 00000000
34
[02Ch 0044 1] Model : 00
35
[02Dh 0045 1] PM Profile : 00 [Unspecified]
36
[02Eh 0046 2] SCI Interrupt : 0000
37
[030h 0048 4] SMI Command Port : 00000000
38
[034h 0052 1] ACPI Enable Value : 00
39
[035h 0053 1] ACPI Disable Value : 00
40
[036h 0054 1] S4BIOS Command : 00
41
[037h 0055 1] P-State Control : 00
42
@@ -XXX,XX +XXX,XX @@
43
Use APIC Physical Destination Mode (V4) : 0
44
Hardware Reduced (V5) : 1
45
Low Power S0 Idle (V5) : 0
46
47
[074h 0116 12] Reset Register : [Generic Address Structure]
48
[074h 0116 1] Space ID : 00 [SystemMemory]
49
[075h 0117 1] Bit Width : 00
50
[076h 0118 1] Bit Offset : 00
51
[077h 0119 1] Encoded Access Width : 00 [Undefined/Legacy]
52
[078h 0120 8] Address : 0000000000000000
53
54
[080h 0128 1] Value to cause reset : 00
55
[081h 0129 2] ARM Flags (decoded below) : 0003
56
PSCI Compliant : 1
57
Must use HVC for PSCI : 1
58
59
-[083h 0131 1] FADT Minor Revision : 00
60
+[083h 0131 1] FADT Minor Revision : 03
61
[084h 0132 8] FACS Address : 0000000000000000
62
[08Ch 0140 8] DSDT Address : 0000000000000000
63
[094h 0148 12] PM1A Event Block : [Generic Address Structure]
64
[094h 0148 1] Space ID : 00 [SystemMemory]
65
[095h 0149 1] Bit Width : 00
66
[096h 0150 1] Bit Offset : 00
67
[097h 0151 1] Encoded Access Width : 00 [Undefined/Legacy]
68
[098h 0152 8] Address : 0000000000000000
69
70
[0A0h 0160 12] PM1B Event Block : [Generic Address Structure]
71
[0A0h 0160 1] Space ID : 00 [SystemMemory]
72
[0A1h 0161 1] Bit Width : 00
73
[0A2h 0162 1] Bit Offset : 00
74
[0A3h 0163 1] Encoded Access Width : 00 [Undefined/Legacy]
75
[0A4h 0164 8] Address : 0000000000000000
76
77
@@ -XXX,XX +XXX,XX @@
78
[0F5h 0245 1] Bit Width : 00
79
[0F6h 0246 1] Bit Offset : 00
80
[0F7h 0247 1] Encoded Access Width : 00 [Undefined/Legacy]
81
[0F8h 0248 8] Address : 0000000000000000
82
83
[100h 0256 12] Sleep Status Register : [Generic Address Structure]
84
[100h 0256 1] Space ID : 00 [SystemMemory]
85
[101h 0257 1] Bit Width : 00
86
[102h 0258 1] Bit Offset : 00
87
[103h 0259 1] Encoded Access Width : 00 [Undefined/Legacy]
88
[104h 0260 8] Address : 0000000000000000
89
90
[10Ch 0268 8] Hypervisor ID : 00000000554D4551
91
92
Raw Table Data: Length 276 (0x114)
93
94
- 0000: 46 41 43 50 14 01 00 00 06 15 42 4F 43 48 53 20 // FACP......BOCHS
95
+ 0000: 46 41 43 50 14 01 00 00 06 12 42 4F 43 48 53 20 // FACP......BOCHS
96
0010: 42 58 50 43 20 20 20 20 01 00 00 00 42 58 50 43 // BXPC ....BXPC
97
0020: 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 // ................
98
0030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 // ................
99
0040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 // ................
100
0050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 // ................
101
0060: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 // ................
102
0070: 00 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00 // ................
103
- 0080: 00 03 00 00 00 00 00 00 00 00 00 00 00 00 00 00 // ................
104
+ 0080: 00 03 00 03 00 00 00 00 00 00 00 00 00 00 00 00 // ................
105
0090: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 // ................
106
00A0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 // ................
107
00B0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 // ................
108
00C0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 // ................
109
00D0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 // ................
110
00E0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 // ................
111
00F0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 // ................
112
0100: 00 00 00 00 00 00 00 00 00 00 00 00 51 45 4D 55 // ............QEMU
113
0110: 00 00 00 00 // ....
114
115
@@ -XXX,XX +XXX,XX @@
116
/*
117
* Intel ACPI Component Architecture
118
* AML/ASL+ Disassembler version 20200925 (64-bit version)
119
* Copyright (c) 2000 - 2020 Intel Corporation
120
*
121
- * Disassembly of tests/data/acpi/virt/GTDT, Mon Jan 22 13:48:40 2024
122
+ * Disassembly of /tmp/aml-XDSZH2, Mon Jan 22 13:48:40 2024
123
*
124
* ACPI Data Table [GTDT]
125
*
126
* Format: [HexOffset DecimalOffset ByteLength] FieldName : FieldValue
127
*/
128
129
[000h 0000 4] Signature : "GTDT" [Generic Timer Description Table]
130
-[004h 0004 4] Table Length : 00000060
131
-[008h 0008 1] Revision : 02
132
-[009h 0009 1] Checksum : 9C
133
+[004h 0004 4] Table Length : 00000068
134
+[008h 0008 1] Revision : 03
135
+[009h 0009 1] Checksum : 93
136
[00Ah 0010 6] Oem ID : "BOCHS "
137
[010h 0016 8] Oem Table ID : "BXPC "
138
[018h 0024 4] Oem Revision : 00000001
139
[01Ch 0028 4] Asl Compiler ID : "BXPC"
140
[020h 0032 4] Asl Compiler Revision : 00000001
141
142
[024h 0036 8] Counter Block Address : FFFFFFFFFFFFFFFF
143
[02Ch 0044 4] Reserved : 00000000
144
145
[030h 0048 4] Secure EL1 Interrupt : 0000001D
146
[034h 0052 4] EL1 Flags (decoded below) : 00000000
147
Trigger Mode : 0
148
Polarity : 0
149
Always On : 0
150
151
[038h 0056 4] Non-Secure EL1 Interrupt : 0000001E
152
@@ -XXX,XX +XXX,XX @@
153
154
[040h 0064 4] Virtual Timer Interrupt : 0000001B
155
[044h 0068 4] VT Flags (decoded below) : 00000000
156
Trigger Mode : 0
157
Polarity : 0
158
Always On : 0
159
160
[048h 0072 4] Non-Secure EL2 Interrupt : 0000001A
161
[04Ch 0076 4] NEL2 Flags (decoded below) : 00000000
162
Trigger Mode : 0
163
Polarity : 0
164
Always On : 0
165
[050h 0080 8] Counter Read Block Address : FFFFFFFFFFFFFFFF
166
167
[058h 0088 4] Platform Timer Count : 00000000
168
[05Ch 0092 4] Platform Timer Offset : 00000000
169
+[060h 0096 4] Virtual EL2 Timer GSIV : 00000000
170
+[064h 0100 4] Virtual EL2 Timer Flags : 00000000
171
172
-Raw Table Data: Length 96 (0x60)
173
+Raw Table Data: Length 104 (0x68)
174
175
- 0000: 47 54 44 54 60 00 00 00 02 9C 42 4F 43 48 53 20 // GTDT`.....BOCHS
176
+ 0000: 47 54 44 54 68 00 00 00 03 93 42 4F 43 48 53 20 // GTDTh.....BOCHS
177
0010: 42 58 50 43 20 20 20 20 01 00 00 00 42 58 50 43 // BXPC ....BXPC
178
0020: 01 00 00 00 FF FF FF FF FF FF FF FF 00 00 00 00 // ................
179
0030: 1D 00 00 00 00 00 00 00 1E 00 00 00 04 00 00 00 // ................
180
0040: 1B 00 00 00 00 00 00 00 1A 00 00 00 00 00 00 00 // ................
181
0050: FF FF FF FF FF FF FF FF 00 00 00 00 00 00 00 00 // ................
182
+ 0060: 00 00 00 00 00 00 00 00 // ........
2
183
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
184
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
185
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
5
Message-id: 20210617121628.20116-36-peter.maydell@linaro.org
186
Message-id: 20240122143537.233498-4-peter.maydell@linaro.org
6
---
187
---
7
target/arm/helper-mve.h | 8 ++++++++
188
tests/qtest/bios-tables-test-allowed-diff.h | 2 --
8
target/arm/mve.decode | 3 +++
189
tests/data/acpi/virt/FACP | Bin 276 -> 276 bytes
9
target/arm/mve_helper.c | 4 ++++
190
tests/data/acpi/virt/GTDT | Bin 96 -> 104 bytes
10
target/arm/translate-mve.c | 2 ++
191
3 files changed, 2 deletions(-)
11
4 files changed, 17 insertions(+)
192
12
193
diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h
13
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
14
index XXXXXXX..XXXXXXX 100644
194
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/helper-mve.h
195
--- a/tests/qtest/bios-tables-test-allowed-diff.h
16
+++ b/target/arm/helper-mve.h
196
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vshlub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
197
@@ -1,3 +1 @@
18
DEF_HELPER_FLAGS_4(mve_vshluh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
198
/* List of comma-separated changed AML files to ignore */
19
DEF_HELPER_FLAGS_4(mve_vshluw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
199
-"tests/data/acpi/virt/FACP",
20
200
-"tests/data/acpi/virt/GTDT",
21
+DEF_HELPER_FLAGS_4(mve_vrshlsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
201
diff --git a/tests/data/acpi/virt/FACP b/tests/data/acpi/virt/FACP
22
+DEF_HELPER_FLAGS_4(mve_vrshlsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
23
+DEF_HELPER_FLAGS_4(mve_vrshlsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
24
+
25
+DEF_HELPER_FLAGS_4(mve_vrshlub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
26
+DEF_HELPER_FLAGS_4(mve_vrshluh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
27
+DEF_HELPER_FLAGS_4(mve_vrshluw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
28
+
29
DEF_HELPER_FLAGS_4(mve_vqshlsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
30
DEF_HELPER_FLAGS_4(mve_vqshlsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
31
DEF_HELPER_FLAGS_4(mve_vqshlsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
32
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
33
index XXXXXXX..XXXXXXX 100644
202
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/mve.decode
203
GIT binary patch
35
+++ b/target/arm/mve.decode
204
delta 25
36
@@ -XXX,XX +XXX,XX @@ VQSUB_U 111 1 1111 0 . .. ... 0 ... 0 0010 . 1 . 1 ... 0 @2op
205
gcmbQjG=+)F&CxkPgpq-PO=u!l<;2F$$vli407<0<)c^nh
37
VSHL_S 111 0 1111 0 . .. ... 0 ... 0 0100 . 1 . 0 ... 0 @2op_rev
206
38
VSHL_U 111 1 1111 0 . .. ... 0 ... 0 0100 . 1 . 0 ... 0 @2op_rev
207
delta 28
39
208
kcmbQjG=+)F&CxkPgpq-PO>`nx<-|!<6Akz$^DuG%0AAS!ssI20
40
+VRSHL_S 111 0 1111 0 . .. ... 0 ... 0 0101 . 1 . 0 ... 0 @2op_rev
209
41
+VRSHL_U 111 1 1111 0 . .. ... 0 ... 0 0101 . 1 . 0 ... 0 @2op_rev
210
diff --git a/tests/data/acpi/virt/GTDT b/tests/data/acpi/virt/GTDT
42
+
43
VQSHL_S 111 0 1111 0 . .. ... 0 ... 0 0100 . 1 . 1 ... 0 @2op_rev
44
VQSHL_U 111 1 1111 0 . .. ... 0 ... 0 0100 . 1 . 1 ... 0 @2op_rev
45
46
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
47
index XXXXXXX..XXXXXXX 100644
211
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/mve_helper.c
212
GIT binary patch
49
+++ b/target/arm/mve_helper.c
213
delta 25
50
@@ -XXX,XX +XXX,XX @@ DO_2OP_U(vhsubu, do_vhsub_u)
214
bcmYeu;BpUf3CUn!U|^m+kt>V?$N&QXMtB4L
51
215
52
#define DO_VSHLS(N, M) do_sqrshl_bhs(N, (int8_t)(M), sizeof(N) * 8, false, NULL)
216
delta 16
53
#define DO_VSHLU(N, M) do_uqrshl_bhs(N, (int8_t)(M), sizeof(N) * 8, false, NULL)
217
Xcmc~u;BpUf2}xjJU|^avkt+-UB60)u
54
+#define DO_VRSHLS(N, M) do_sqrshl_bhs(N, (int8_t)(M), sizeof(N) * 8, true, NULL)
218
55
+#define DO_VRSHLU(N, M) do_uqrshl_bhs(N, (int8_t)(M), sizeof(N) * 8, true, NULL)
56
57
DO_2OP_S(vshls, DO_VSHLS)
58
DO_2OP_U(vshlu, DO_VSHLU)
59
+DO_2OP_S(vrshls, DO_VRSHLS)
60
+DO_2OP_U(vrshlu, DO_VRSHLU)
61
62
static inline int32_t do_sat_bhw(int64_t val, int64_t min, int64_t max, bool *s)
63
{
64
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/target/arm/translate-mve.c
67
+++ b/target/arm/translate-mve.c
68
@@ -XXX,XX +XXX,XX @@ DO_2OP(VQSUB_S, vqsubs)
69
DO_2OP(VQSUB_U, vqsubu)
70
DO_2OP(VSHL_S, vshls)
71
DO_2OP(VSHL_U, vshlu)
72
+DO_2OP(VRSHL_S, vrshls)
73
+DO_2OP(VRSHL_U, vrshlu)
74
DO_2OP(VQSHL_S, vqshls)
75
DO_2OP(VQSHL_U, vqshlu)
76
DO_2OP(VQRSHL_S, vqrshls)
77
--
219
--
78
2.20.1
220
2.34.1
79
80
diff view generated by jsdifflib
1
Implement the MVE VSHL insn (vector form).
1
The patchset adding the GMAC ethernet to this SoC crossed in the
2
mail with the patchset cleaning up the NIC handling. When we
3
create the GMAC modules we must call qemu_configure_nic_device()
4
so that the user has the opportunity to use the -nic commandline
5
option to create a network backend and connect it to the GMACs.
2
6
7
Add the missing call.
8
9
Fixes: 21e5326a7c ("hw/arm: Add GMAC devices to NPCM7XX SoC")
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
5
Message-id: 20210617121628.20116-35-peter.maydell@linaro.org
12
Message-id: 20240206171231.396392-2-peter.maydell@linaro.org
6
---
13
---
7
target/arm/helper-mve.h | 8 ++++++++
14
hw/arm/npcm7xx.c | 1 +
8
target/arm/mve.decode | 3 +++
15
1 file changed, 1 insertion(+)
9
target/arm/mve_helper.c | 6 ++++++
10
target/arm/translate-mve.c | 2 ++
11
4 files changed, 19 insertions(+)
12
16
13
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
17
diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c
14
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/helper-mve.h
19
--- a/hw/arm/npcm7xx.c
16
+++ b/target/arm/helper-mve.h
20
+++ b/hw/arm/npcm7xx.c
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vqsubub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
21
@@ -XXX,XX +XXX,XX @@ static void npcm7xx_realize(DeviceState *dev, Error **errp)
18
DEF_HELPER_FLAGS_4(mve_vqsubuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
22
for (i = 0; i < ARRAY_SIZE(s->gmac); i++) {
19
DEF_HELPER_FLAGS_4(mve_vqsubuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
23
SysBusDevice *sbd = SYS_BUS_DEVICE(&s->gmac[i]);
20
24
21
+DEF_HELPER_FLAGS_4(mve_vshlsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
25
+ qemu_configure_nic_device(DEVICE(sbd), false, NULL);
22
+DEF_HELPER_FLAGS_4(mve_vshlsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
26
/*
23
+DEF_HELPER_FLAGS_4(mve_vshlsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
27
* The device exists regardless of whether it's connected to a QEMU
24
+
28
* netdev backend. So always instantiate it even if there is no
25
+DEF_HELPER_FLAGS_4(mve_vshlub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
26
+DEF_HELPER_FLAGS_4(mve_vshluh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
27
+DEF_HELPER_FLAGS_4(mve_vshluw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
28
+
29
DEF_HELPER_FLAGS_4(mve_vqshlsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
30
DEF_HELPER_FLAGS_4(mve_vqshlsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
31
DEF_HELPER_FLAGS_4(mve_vqshlsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
32
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/mve.decode
35
+++ b/target/arm/mve.decode
36
@@ -XXX,XX +XXX,XX @@ VQADD_U 111 1 1111 0 . .. ... 0 ... 0 0000 . 1 . 1 ... 0 @2op
37
VQSUB_S 111 0 1111 0 . .. ... 0 ... 0 0010 . 1 . 1 ... 0 @2op
38
VQSUB_U 111 1 1111 0 . .. ... 0 ... 0 0010 . 1 . 1 ... 0 @2op
39
40
+VSHL_S 111 0 1111 0 . .. ... 0 ... 0 0100 . 1 . 0 ... 0 @2op_rev
41
+VSHL_U 111 1 1111 0 . .. ... 0 ... 0 0100 . 1 . 0 ... 0 @2op_rev
42
+
43
VQSHL_S 111 0 1111 0 . .. ... 0 ... 0 0100 . 1 . 1 ... 0 @2op_rev
44
VQSHL_U 111 1 1111 0 . .. ... 0 ... 0 0100 . 1 . 1 ... 0 @2op_rev
45
46
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/mve_helper.c
49
+++ b/target/arm/mve_helper.c
50
@@ -XXX,XX +XXX,XX @@ DO_2OP_U(vhaddu, do_vhadd_u)
51
DO_2OP_S(vhsubs, do_vhsub_s)
52
DO_2OP_U(vhsubu, do_vhsub_u)
53
54
+#define DO_VSHLS(N, M) do_sqrshl_bhs(N, (int8_t)(M), sizeof(N) * 8, false, NULL)
55
+#define DO_VSHLU(N, M) do_uqrshl_bhs(N, (int8_t)(M), sizeof(N) * 8, false, NULL)
56
+
57
+DO_2OP_S(vshls, DO_VSHLS)
58
+DO_2OP_U(vshlu, DO_VSHLU)
59
+
60
static inline int32_t do_sat_bhw(int64_t val, int64_t min, int64_t max, bool *s)
61
{
62
if (val > max) {
63
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
64
index XXXXXXX..XXXXXXX 100644
65
--- a/target/arm/translate-mve.c
66
+++ b/target/arm/translate-mve.c
67
@@ -XXX,XX +XXX,XX @@ DO_2OP(VQADD_S, vqadds)
68
DO_2OP(VQADD_U, vqaddu)
69
DO_2OP(VQSUB_S, vqsubs)
70
DO_2OP(VQSUB_U, vqsubu)
71
+DO_2OP(VSHL_S, vshls)
72
+DO_2OP(VSHL_U, vshlu)
73
DO_2OP(VQSHL_S, vqshls)
74
DO_2OP(VQSHL_U, vqshlu)
75
DO_2OP(VQRSHL_S, vqrshls)
76
--
29
--
77
2.20.1
30
2.34.1
78
79
diff view generated by jsdifflib
1
Implement the MV VQRSHL (vector) insn. Again, the code to perform
1
Currently QEMU will warn if there is a NIC on the board that
2
the actual shifts is borrowed from neon_helper.c.
2
is not connected to a backend. By default the '-nic user' will
3
get used for all NICs, but if you manually connect a specific
4
NIC to a specific backend, then the other NICs on the board
5
have no backend and will be warned about:
6
7
qemu-system-arm: warning: nic npcm7xx-emc.1 has no peer
8
qemu-system-arm: warning: nic npcm-gmac.0 has no peer
9
qemu-system-arm: warning: nic npcm-gmac.1 has no peer
10
11
So suppress those warnings by manually connecting every NIC
12
on the board to some backend.
3
13
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
6
Message-id: 20210617121628.20116-34-peter.maydell@linaro.org
16
Reviewed-by: Thomas Huth <thuth@redhat.com>
17
Message-id: 20240206171231.396392-3-peter.maydell@linaro.org
7
---
18
---
8
target/arm/helper-mve.h | 8 ++++++++
19
tests/qtest/npcm7xx_emc-test.c | 5 ++++-
9
target/arm/mve.decode | 3 +++
20
1 file changed, 4 insertions(+), 1 deletion(-)
10
target/arm/mve_helper.c | 6 ++++++
11
target/arm/translate-mve.c | 2 ++
12
4 files changed, 19 insertions(+)
13
21
14
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
22
diff --git a/tests/qtest/npcm7xx_emc-test.c b/tests/qtest/npcm7xx_emc-test.c
15
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-mve.h
24
--- a/tests/qtest/npcm7xx_emc-test.c
17
+++ b/target/arm/helper-mve.h
25
+++ b/tests/qtest/npcm7xx_emc-test.c
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vqshlub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
26
@@ -XXX,XX +XXX,XX @@ static int *packet_test_init(int module_num, GString *cmd_line)
19
DEF_HELPER_FLAGS_4(mve_vqshluh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
27
* KISS and use -nic. The driver accepts 'emc0' and 'emc1' as aliases
20
DEF_HELPER_FLAGS_4(mve_vqshluw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
28
* in the 'model' field to specify the device to match.
21
29
*/
22
+DEF_HELPER_FLAGS_4(mve_vqrshlsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
30
- g_string_append_printf(cmd_line, " -nic socket,fd=%d,model=emc%d ",
23
+DEF_HELPER_FLAGS_4(mve_vqrshlsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
31
+ g_string_append_printf(cmd_line, " -nic socket,fd=%d,model=emc%d "
24
+DEF_HELPER_FLAGS_4(mve_vqrshlsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
32
+ "-nic user,model=npcm7xx-emc "
25
+
33
+ "-nic user,model=npcm-gmac "
26
+DEF_HELPER_FLAGS_4(mve_vqrshlub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
34
+ "-nic user,model=npcm-gmac",
27
+DEF_HELPER_FLAGS_4(mve_vqrshluh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
35
test_sockets[1], module_num);
28
+DEF_HELPER_FLAGS_4(mve_vqrshluw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
36
29
+
37
g_test_queue_destroy(packet_test_clear, test_sockets);
30
DEF_HELPER_FLAGS_4(mve_vadd_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
31
DEF_HELPER_FLAGS_4(mve_vadd_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
32
DEF_HELPER_FLAGS_4(mve_vadd_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
33
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
34
index XXXXXXX..XXXXXXX 100644
35
--- a/target/arm/mve.decode
36
+++ b/target/arm/mve.decode
37
@@ -XXX,XX +XXX,XX @@ VQSUB_U 111 1 1111 0 . .. ... 0 ... 0 0010 . 1 . 1 ... 0 @2op
38
VQSHL_S 111 0 1111 0 . .. ... 0 ... 0 0100 . 1 . 1 ... 0 @2op_rev
39
VQSHL_U 111 1 1111 0 . .. ... 0 ... 0 0100 . 1 . 1 ... 0 @2op_rev
40
41
+VQRSHL_S 111 0 1111 0 . .. ... 0 ... 0 0101 . 1 . 1 ... 0 @2op_rev
42
+VQRSHL_U 111 1 1111 0 . .. ... 0 ... 0 0101 . 1 . 1 ... 0 @2op_rev
43
+
44
# Vector miscellaneous
45
46
VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
47
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/mve_helper.c
50
+++ b/target/arm/mve_helper.c
51
@@ -XXX,XX +XXX,XX @@ DO_2OP_SAT(vqsubsw, 4, int32_t, DO_SQSUB_W)
52
WRAP_QRSHL_HELPER(do_sqrshl_bhs, N, M, false, satp)
53
#define DO_UQSHL_OP(N, M, satp) \
54
WRAP_QRSHL_HELPER(do_uqrshl_bhs, N, M, false, satp)
55
+#define DO_SQRSHL_OP(N, M, satp) \
56
+ WRAP_QRSHL_HELPER(do_sqrshl_bhs, N, M, true, satp)
57
+#define DO_UQRSHL_OP(N, M, satp) \
58
+ WRAP_QRSHL_HELPER(do_uqrshl_bhs, N, M, true, satp)
59
60
DO_2OP_SAT_S(vqshls, DO_SQSHL_OP)
61
DO_2OP_SAT_U(vqshlu, DO_UQSHL_OP)
62
+DO_2OP_SAT_S(vqrshls, DO_SQRSHL_OP)
63
+DO_2OP_SAT_U(vqrshlu, DO_UQRSHL_OP)
64
65
#define DO_2OP_SCALAR(OP, ESIZE, TYPE, FN) \
66
void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
67
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
68
index XXXXXXX..XXXXXXX 100644
69
--- a/target/arm/translate-mve.c
70
+++ b/target/arm/translate-mve.c
71
@@ -XXX,XX +XXX,XX @@ DO_2OP(VQSUB_S, vqsubs)
72
DO_2OP(VQSUB_U, vqsubu)
73
DO_2OP(VQSHL_S, vqshls)
74
DO_2OP(VQSHL_U, vqshlu)
75
+DO_2OP(VQRSHL_S, vqrshls)
76
+DO_2OP(VQRSHL_U, vqrshlu)
77
78
static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
79
MVEGenTwoOpScalarFn fn)
80
--
38
--
81
2.20.1
39
2.34.1
82
83
diff view generated by jsdifflib
1
In a CPU with MVE, the VMOV (vector lane to general-purpose register)
1
It doesn't make sense to read the value of MDCR_EL2 on a non-A-profile
2
and VMOV (general-purpose register to vector lane) insns are not
2
CPU, and in fact if you try to do it we will assert:
3
predicated, but they are subject to beatwise execution if they
4
are not in an IT block.
5
3
6
Since our implementation always executes all 4 beats in one tick,
4
#6 0x00007ffff4b95e96 in __GI___assert_fail
7
this means only that we need to handle PSR.ECI:
5
(assertion=0x5555565a8c70 "!arm_feature(env, ARM_FEATURE_M)", file=0x5555565a6e5c "../../target/arm/helper.c", line=12600, function=0x5555565a9560 <__PRETTY_FUNCTION__.0> "arm_security_space_below_el3") at ./assert/assert.c:101
8
* we must do the usual check for bad ECI state
6
#7 0x0000555555ebf412 in arm_security_space_below_el3 (env=0x555557bc8190) at ../../target/arm/helper.c:12600
9
* we must advance ECI state if the insn succeeds
7
#8 0x0000555555ea6f89 in arm_is_el2_enabled (env=0x555557bc8190) at ../../target/arm/cpu.h:2595
10
* if ECI says we should not be executing the beat corresponding
8
#9 0x0000555555ea942f in arm_mdcr_el2_eff (env=0x555557bc8190) at ../../target/arm/internals.h:1512
11
to the lane of the vector register being accessed then we
12
should skip performing the move
13
9
14
Note that if PSR.ECI is non-zero then we cannot be in an IT block.
10
We might call pmu_counter_enabled() on an M-profile CPU (for example
11
from the migration pre/post hooks in machine.c); this should always
12
return false because these CPUs don't set ARM_FEATURE_PMU.
15
13
14
Avoid the assertion by not calling arm_mdcr_el2_eff() before we
15
have done the early return for "PMU not present".
16
17
This fixes an assertion failure if you try to do a loadvm or
18
savevm for an M-profile board.
19
20
Cc: qemu-stable@nongnu.org
21
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2155
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
22
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
23
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
17
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
24
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
18
Message-id: 20210617121628.20116-45-peter.maydell@linaro.org
25
Message-id: 20240208153346.970021-1-peter.maydell@linaro.org
19
---
26
---
20
target/arm/translate-a32.h | 2 +
27
target/arm/helper.c | 12 ++++++++++--
21
target/arm/translate-mve.c | 4 +-
28
1 file changed, 10 insertions(+), 2 deletions(-)
22
target/arm/translate-vfp.c | 77 +++++++++++++++++++++++++++++++++++---
23
3 files changed, 75 insertions(+), 8 deletions(-)
24
29
25
diff --git a/target/arm/translate-a32.h b/target/arm/translate-a32.h
30
diff --git a/target/arm/helper.c b/target/arm/helper.c
26
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
27
--- a/target/arm/translate-a32.h
32
--- a/target/arm/helper.c
28
+++ b/target/arm/translate-a32.h
33
+++ b/target/arm/helper.c
29
@@ -XXX,XX +XXX,XX @@ long neon_full_reg_offset(unsigned reg);
34
@@ -XXX,XX +XXX,XX @@ static bool pmu_counter_enabled(CPUARMState *env, uint8_t counter)
30
long neon_element_offset(int reg, int element, MemOp memop);
35
bool enabled, prohibited = false, filtered;
31
void gen_rev16(TCGv_i32 dest, TCGv_i32 var);
36
bool secure = arm_is_secure(env);
32
void clear_eci_state(DisasContext *s);
37
int el = arm_current_el(env);
33
+bool mve_eci_check(DisasContext *s);
38
- uint64_t mdcr_el2 = arm_mdcr_el2_eff(env);
34
+void mve_update_and_store_eci(DisasContext *s);
39
- uint8_t hpmn = mdcr_el2 & MDCR_HPMN;
35
40
+ uint64_t mdcr_el2;
36
static inline TCGv_i32 load_cpu_offset(int offset)
41
+ uint8_t hpmn;
37
{
42
38
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/translate-mve.c
41
+++ b/target/arm/translate-mve.c
42
@@ -XXX,XX +XXX,XX @@ static bool mve_check_qreg_bank(DisasContext *s, int qmask)
43
return qmask < 8;
44
}
45
46
-static bool mve_eci_check(DisasContext *s)
47
+bool mve_eci_check(DisasContext *s)
48
{
49
/*
50
* This is a beatwise insn: check that ECI is valid (not a
51
@@ -XXX,XX +XXX,XX @@ static void mve_update_eci(DisasContext *s)
52
}
53
}
54
55
-static void mve_update_and_store_eci(DisasContext *s)
56
+void mve_update_and_store_eci(DisasContext *s)
57
{
58
/*
59
* For insns which don't call a helper function that will call
60
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
61
index XXXXXXX..XXXXXXX 100644
62
--- a/target/arm/translate-vfp.c
63
+++ b/target/arm/translate-vfp.c
64
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
65
return true;
66
}
67
68
+static bool mve_skip_vmov(DisasContext *s, int vn, int index, int size)
69
+{
70
+ /*
43
+ /*
71
+ * In a CPU with MVE, the VMOV (vector lane to general-purpose register)
44
+ * We might be called for M-profile cores where MDCR_EL2 doesn't
72
+ * and VMOV (general-purpose register to vector lane) insns are not
45
+ * exist and arm_mdcr_el2_eff() will assert, so this early-exit check
73
+ * predicated, but they are subject to beatwise execution if they are
46
+ * must be before we read that value.
74
+ * not in an IT block.
75
+ *
76
+ * Since our implementation always executes all 4 beats in one tick,
77
+ * this means only that if PSR.ECI says we should not be executing
78
+ * the beat corresponding to the lane of the vector register being
79
+ * accessed then we should skip performing the move, and that we need
80
+ * to do the usual check for bad ECI state and advance of ECI state.
81
+ *
82
+ * Note that if PSR.ECI is non-zero then we cannot be in an IT block.
83
+ *
84
+ * Return true if this VMOV scalar <-> gpreg should be skipped because
85
+ * the MVE PSR.ECI state says we skip the beat where the store happens.
86
+ */
47
+ */
87
+
48
if (!arm_feature(env, ARM_FEATURE_PMU)) {
88
+ /* Calculate the byte offset into Qn which we're going to access */
89
+ int ofs = (index << size) + ((vn & 1) * 8);
90
+
91
+ if (!dc_isar_feature(aa32_mve, s)) {
92
+ return false;
93
+ }
94
+
95
+ switch (s->eci) {
96
+ case ECI_NONE:
97
+ return false;
98
+ case ECI_A0:
99
+ return ofs < 4;
100
+ case ECI_A0A1:
101
+ return ofs < 8;
102
+ case ECI_A0A1A2:
103
+ case ECI_A0A1A2B0:
104
+ return ofs < 12;
105
+ default:
106
+ g_assert_not_reached();
107
+ }
108
+}
109
+
110
static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
111
{
112
/* VMOV scalar to general purpose register */
113
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
114
return false;
49
return false;
115
}
50
}
116
51
117
+ if (dc_isar_feature(aa32_mve, s)) {
52
+ mdcr_el2 = arm_mdcr_el2_eff(env);
118
+ if (!mve_eci_check(s)) {
53
+ hpmn = mdcr_el2 & MDCR_HPMN;
119
+ return true;
120
+ }
121
+ }
122
+
54
+
123
if (!vfp_access_check(s)) {
55
if (!arm_feature(env, ARM_FEATURE_EL2) ||
124
return true;
56
(counter < hpmn || counter == 31)) {
125
}
57
e = env->cp15.c9_pmcr & PMCRE;
126
127
- tmp = tcg_temp_new_i32();
128
- read_neon_element32(tmp, a->vn, a->index, a->size | (a->u ? 0 : MO_SIGN));
129
- store_reg(s, a->rt, tmp);
130
+ if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {
131
+ tmp = tcg_temp_new_i32();
132
+ read_neon_element32(tmp, a->vn, a->index,
133
+ a->size | (a->u ? 0 : MO_SIGN));
134
+ store_reg(s, a->rt, tmp);
135
+ }
136
137
+ if (dc_isar_feature(aa32_mve, s)) {
138
+ mve_update_and_store_eci(s);
139
+ }
140
return true;
141
}
142
143
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
144
return false;
145
}
146
147
+ if (dc_isar_feature(aa32_mve, s)) {
148
+ if (!mve_eci_check(s)) {
149
+ return true;
150
+ }
151
+ }
152
+
153
if (!vfp_access_check(s)) {
154
return true;
155
}
156
157
- tmp = load_reg(s, a->rt);
158
- write_neon_element32(tmp, a->vn, a->index, a->size);
159
- tcg_temp_free_i32(tmp);
160
+ if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {
161
+ tmp = load_reg(s, a->rt);
162
+ write_neon_element32(tmp, a->vn, a->index, a->size);
163
+ tcg_temp_free_i32(tmp);
164
+ }
165
166
+ if (dc_isar_feature(aa32_mve, s)) {
167
+ mve_update_and_store_eci(s);
168
+ }
169
return true;
170
}
171
172
--
58
--
173
2.20.1
59
2.34.1
174
60
175
61
diff view generated by jsdifflib
1
In the code for handling VFP system register accesses there is some
1
From: Nabih Estefan <nabihestefan@google.com>
2
stray whitespace after a unary '-' operator, and also some incorrect
3
indent in a couple of function prototypes. We're about to move this
4
code to another file, so fix the code style issues first so
5
checkpatch doesn't complain about the code-movement patch.
6
2
7
Cc: qemu-stable@nongnu.org
3
Fix the nocm_gmac-test.c file to run on a nuvoton 7xx machine instead
4
of 8xx. Also fix comments referencing this and values expecting 8xx.
5
6
Change-Id: Iabd0fba14910c3f1e883c4a9521350f3db9ffab8
7
Signed-Off-By: Nabih Estefan <nabihestefan@google.com>
8
Reviewed-by: Tyrone Ting <kfting@nuvoton.com>
9
Message-id: 20240208194759.2858582-2-nabihestefan@google.com
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
[PMM: commit message tweaks]
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-id: 20210618141019.10671-2-peter.maydell@linaro.org
11
---
13
---
12
target/arm/translate-vfp.c | 11 +++++------
14
tests/qtest/npcm_gmac-test.c | 84 +-----------------------------------
13
1 file changed, 5 insertions(+), 6 deletions(-)
15
tests/qtest/meson.build | 3 +-
16
2 files changed, 4 insertions(+), 83 deletions(-)
14
17
15
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
18
diff --git a/tests/qtest/npcm_gmac-test.c b/tests/qtest/npcm_gmac-test.c
16
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/translate-vfp.c
20
--- a/tests/qtest/npcm_gmac-test.c
18
+++ b/target/arm/translate-vfp.c
21
+++ b/tests/qtest/npcm_gmac-test.c
19
@@ -XXX,XX +XXX,XX @@ static void gen_branch_fpInactive(DisasContext *s, TCGCond cond,
22
@@ -XXX,XX +XXX,XX @@ typedef struct TestData {
23
const GMACModule *module;
24
} TestData;
25
26
-/* Values extracted from hw/arm/npcm8xx.c */
27
+/* Values extracted from hw/arm/npcm7xx.c */
28
static const GMACModule gmac_module_list[] = {
29
{
30
.irq = 14,
31
@@ -XXX,XX +XXX,XX @@ static const GMACModule gmac_module_list[] = {
32
.irq = 15,
33
.base_addr = 0xf0804000
34
},
35
- {
36
- .irq = 16,
37
- .base_addr = 0xf0806000
38
- },
39
- {
40
- .irq = 17,
41
- .base_addr = 0xf0808000
42
- }
43
};
44
45
/* Returns the index of the GMAC module. */
46
@@ -XXX,XX +XXX,XX @@ static uint32_t gmac_read(QTestState *qts, const GMACModule *mod,
47
return qtest_readl(qts, mod->base_addr + regno);
20
}
48
}
21
49
22
static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
50
-static uint16_t pcs_read(QTestState *qts, const GMACModule *mod,
51
- NPCMRegister regno)
52
-{
53
- uint32_t write_value = (regno & 0x3ffe00) >> 9;
54
- qtest_writel(qts, PCS_BASE_ADDRESS + NPCM_PCS_IND_AC_BA, write_value);
55
- uint32_t read_offset = regno & 0x1ff;
56
- return qtest_readl(qts, PCS_BASE_ADDRESS + read_offset);
57
-}
23
-
58
-
24
fp_sysreg_loadfn *loadfn,
59
/* Check that GMAC registers are reset to default value */
25
- void *opaque)
60
static void test_init(gconstpointer test_data)
26
+ void *opaque)
27
{
61
{
28
/* Do a write to an M-profile floating point system register */
62
const TestData *td = test_data;
29
TCGv_i32 tmp;
63
const GMACModule *mod = td->module;
30
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
64
- QTestState *qts = qtest_init("-machine npcm845-evb");
65
+ QTestState *qts = qtest_init("-machine npcm750-evb");
66
67
#define CHECK_REG32(regno, value) \
68
do { \
69
g_assert_cmphex(gmac_read(qts, mod, (regno)), ==, (value)); \
70
} while (0)
71
72
-#define CHECK_REG_PCS(regno, value) \
73
- do { \
74
- g_assert_cmphex(pcs_read(qts, mod, (regno)), ==, (value)); \
75
- } while (0)
76
-
77
CHECK_REG32(NPCM_DMA_BUS_MODE, 0x00020100);
78
CHECK_REG32(NPCM_DMA_XMT_POLL_DEMAND, 0);
79
CHECK_REG32(NPCM_DMA_RCV_POLL_DEMAND, 0);
80
@@ -XXX,XX +XXX,XX @@ static void test_init(gconstpointer test_data)
81
CHECK_REG32(NPCM_GMAC_PTP_TAR, 0);
82
CHECK_REG32(NPCM_GMAC_PTP_TTSR, 0);
83
84
- /* TODO Add registers PCS */
85
- if (mod->base_addr == 0xf0802000) {
86
- CHECK_REG_PCS(NPCM_PCS_SR_CTL_ID1, 0x699e);
87
- CHECK_REG_PCS(NPCM_PCS_SR_CTL_ID2, 0);
88
- CHECK_REG_PCS(NPCM_PCS_SR_CTL_STS, 0x8000);
89
-
90
- CHECK_REG_PCS(NPCM_PCS_SR_MII_CTRL, 0x1140);
91
- CHECK_REG_PCS(NPCM_PCS_SR_MII_STS, 0x0109);
92
- CHECK_REG_PCS(NPCM_PCS_SR_MII_DEV_ID1, 0x699e);
93
- CHECK_REG_PCS(NPCM_PCS_SR_MII_DEV_ID2, 0x0ced0);
94
- CHECK_REG_PCS(NPCM_PCS_SR_MII_AN_ADV, 0x0020);
95
- CHECK_REG_PCS(NPCM_PCS_SR_MII_LP_BABL, 0);
96
- CHECK_REG_PCS(NPCM_PCS_SR_MII_AN_EXPN, 0);
97
- CHECK_REG_PCS(NPCM_PCS_SR_MII_EXT_STS, 0xc000);
98
-
99
- CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_ABL, 0x0003);
100
- CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_TX_MAX_DLY_LWR, 0x0038);
101
- CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_TX_MAX_DLY_UPR, 0);
102
- CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_TX_MIN_DLY_LWR, 0x0038);
103
- CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_TX_MIN_DLY_UPR, 0);
104
- CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_RX_MAX_DLY_LWR, 0x0058);
105
- CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_RX_MAX_DLY_UPR, 0);
106
- CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_RX_MIN_DLY_LWR, 0x0048);
107
- CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_RX_MIN_DLY_UPR, 0);
108
-
109
- CHECK_REG_PCS(NPCM_PCS_VR_MII_MMD_DIG_CTRL1, 0x2400);
110
- CHECK_REG_PCS(NPCM_PCS_VR_MII_AN_CTRL, 0);
111
- CHECK_REG_PCS(NPCM_PCS_VR_MII_AN_INTR_STS, 0x000a);
112
- CHECK_REG_PCS(NPCM_PCS_VR_MII_TC, 0);
113
- CHECK_REG_PCS(NPCM_PCS_VR_MII_DBG_CTRL, 0);
114
- CHECK_REG_PCS(NPCM_PCS_VR_MII_EEE_MCTRL0, 0x899c);
115
- CHECK_REG_PCS(NPCM_PCS_VR_MII_EEE_TXTIMER, 0);
116
- CHECK_REG_PCS(NPCM_PCS_VR_MII_EEE_RXTIMER, 0);
117
- CHECK_REG_PCS(NPCM_PCS_VR_MII_LINK_TIMER_CTRL, 0);
118
- CHECK_REG_PCS(NPCM_PCS_VR_MII_EEE_MCTRL1, 0);
119
- CHECK_REG_PCS(NPCM_PCS_VR_MII_DIG_STS, 0x0010);
120
- CHECK_REG_PCS(NPCM_PCS_VR_MII_ICG_ERRCNT1, 0);
121
- CHECK_REG_PCS(NPCM_PCS_VR_MII_MISC_STS, 0);
122
- CHECK_REG_PCS(NPCM_PCS_VR_MII_RX_LSTS, 0);
123
- CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_TX_BSTCTRL0, 0x00a);
124
- CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_TX_LVLCTRL0, 0x007f);
125
- CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_TX_GENCTRL0, 0x0001);
126
- CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_TX_GENCTRL1, 0);
127
- CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_TX_STS, 0);
128
- CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_RX_GENCTRL0, 0x0100);
129
- CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_RX_GENCTRL1, 0x1100);
130
- CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_RX_LOS_CTRL0, 0x000e);
131
- CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_MPLL_CTRL0, 0x0100);
132
- CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_MPLL_CTRL1, 0x0032);
133
- CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_MPLL_STS, 0x0001);
134
- CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_MISC_CTRL2, 0);
135
- CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_LVL_CTRL, 0x0019);
136
- CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_MISC_CTRL0, 0);
137
- CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_MISC_CTRL1, 0);
138
- CHECK_REG_PCS(NPCM_PCS_VR_MII_DIG_CTRL2, 0);
139
- CHECK_REG_PCS(NPCM_PCS_VR_MII_DIG_ERRCNT_SEL, 0);
140
- }
141
-
142
qtest_quit(qts);
31
}
143
}
32
144
33
static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
145
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
34
- fp_sysreg_storefn *storefn,
146
index XXXXXXX..XXXXXXX 100644
35
- void *opaque)
147
--- a/tests/qtest/meson.build
36
+ fp_sysreg_storefn *storefn,
148
+++ b/tests/qtest/meson.build
37
+ void *opaque)
149
@@ -XXX,XX +XXX,XX @@ qtests_npcm7xx = \
38
{
150
'npcm7xx_sdhci-test',
39
/* Do a read from an M-profile floating point system register */
151
'npcm7xx_smbus-test',
40
TCGv_i32 tmp;
152
'npcm7xx_timer-test',
41
@@ -XXX,XX +XXX,XX @@ static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value)
153
- 'npcm7xx_watchdog_timer-test'] + \
42
TCGv_i32 addr;
154
+ 'npcm7xx_watchdog_timer-test',
43
155
+ 'npcm_gmac-test'] + \
44
if (!a->a) {
156
(slirp.found() ? ['npcm7xx_emc-test'] : [])
45
- offset = - offset;
157
qtests_aspeed = \
46
+ offset = -offset;
158
['aspeed_hace-test',
47
}
48
49
addr = load_reg(s, a->rn);
50
@@ -XXX,XX +XXX,XX @@ static TCGv_i32 memory_to_fp_sysreg(DisasContext *s, void *opaque)
51
TCGv_i32 value = tcg_temp_new_i32();
52
53
if (!a->a) {
54
- offset = - offset;
55
+ offset = -offset;
56
}
57
58
addr = load_reg(s, a->rn);
59
--
159
--
60
2.20.1
160
2.34.1
61
62
diff view generated by jsdifflib
1
Implement the scalar forms of the MVE VSUB and VMUL insns.
1
From: Luc Michel <luc.michel@amd.com>
2
2
3
An access fault is raised when the Access Flag is not set in the
4
looked-up PTE and the AFFD field is not set in the corresponding context
5
descriptor. This was already implemented for stage 2. Implement it for
6
stage 1 as well.
7
8
Signed-off-by: Luc Michel <luc.michel@amd.com>
9
Reviewed-by: Mostafa Saleh <smostafa@google.com>
10
Reviewed-by: Eric Auger <eric.auger@redhat.com>
11
Tested-by: Mostafa Saleh <smostafa@google.com>
12
Message-id: 20240213082211.3330400-1-luc.michel@amd.com
13
[PMM: tweaked comment text]
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20210617121628.20116-24-peter.maydell@linaro.org
6
---
15
---
7
target/arm/helper-mve.h | 8 ++++++++
16
hw/arm/smmuv3-internal.h | 1 +
8
target/arm/mve.decode | 2 ++
17
include/hw/arm/smmu-common.h | 1 +
9
target/arm/mve_helper.c | 2 ++
18
hw/arm/smmu-common.c | 11 +++++++++++
10
target/arm/translate-mve.c | 2 ++
19
hw/arm/smmuv3.c | 1 +
11
4 files changed, 14 insertions(+)
20
4 files changed, 14 insertions(+)
12
21
13
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
22
diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
14
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/helper-mve.h
24
--- a/hw/arm/smmuv3-internal.h
16
+++ b/target/arm/helper-mve.h
25
+++ b/hw/arm/smmuv3-internal.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vadd_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
26
@@ -XXX,XX +XXX,XX @@ static inline int pa_range(STE *ste)
18
DEF_HELPER_FLAGS_4(mve_vadd_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
27
#define CD_EPD(x, sel) extract32((x)->word[0], (16 * (sel)) + 14, 1)
19
DEF_HELPER_FLAGS_4(mve_vadd_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
28
#define CD_ENDI(x) extract32((x)->word[0], 15, 1)
20
29
#define CD_IPS(x) extract32((x)->word[1], 0 , 3)
21
+DEF_HELPER_FLAGS_4(mve_vsub_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
30
+#define CD_AFFD(x) extract32((x)->word[1], 3 , 1)
22
+DEF_HELPER_FLAGS_4(mve_vsub_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
31
#define CD_TBI(x) extract32((x)->word[1], 6 , 2)
23
+DEF_HELPER_FLAGS_4(mve_vsub_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
32
#define CD_HD(x) extract32((x)->word[1], 10 , 1)
33
#define CD_HA(x) extract32((x)->word[1], 11 , 1)
34
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
35
index XXXXXXX..XXXXXXX 100644
36
--- a/include/hw/arm/smmu-common.h
37
+++ b/include/hw/arm/smmu-common.h
38
@@ -XXX,XX +XXX,XX @@ typedef struct SMMUTransCfg {
39
bool disabled; /* smmu is disabled */
40
bool bypassed; /* translation is bypassed */
41
bool aborted; /* translation is aborted */
42
+ bool affd; /* AF fault disable */
43
uint32_t iotlb_hits; /* counts IOTLB hits */
44
uint32_t iotlb_misses; /* counts IOTLB misses*/
45
/* Used by stage-1 only. */
46
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/hw/arm/smmu-common.c
49
+++ b/hw/arm/smmu-common.c
50
@@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
51
pte_addr, pte, iova, gpa,
52
block_size >> 20);
53
}
24
+
54
+
25
+DEF_HELPER_FLAGS_4(mve_vmul_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
55
+ /*
26
+DEF_HELPER_FLAGS_4(mve_vmul_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
56
+ * QEMU does not currently implement HTTU, so if AFFD and PTE.AF
27
+DEF_HELPER_FLAGS_4(mve_vmul_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
57
+ * are 0 we take an Access flag fault. (5.4. Context Descriptor)
58
+ * An Access flag fault takes priority over a Permission fault.
59
+ */
60
+ if (!PTE_AF(pte) && !cfg->affd) {
61
+ info->type = SMMU_PTW_ERR_ACCESS;
62
+ goto error;
63
+ }
28
+
64
+
29
DEF_HELPER_FLAGS_4(mve_vmlaldavsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
65
ap = PTE_AP(pte);
30
DEF_HELPER_FLAGS_4(mve_vmlaldavsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
66
if (is_permission_fault(ap, perm)) {
31
DEF_HELPER_FLAGS_4(mve_vmlaldavxsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
67
info->type = SMMU_PTW_ERR_PERMISSION;
32
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
68
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
33
index XXXXXXX..XXXXXXX 100644
69
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/mve.decode
70
--- a/hw/arm/smmuv3.c
35
+++ b/target/arm/mve.decode
71
+++ b/hw/arm/smmuv3.c
36
@@ -XXX,XX +XXX,XX @@ VRMLSLDAVH 1111 1110 1 ... ... 0 ... x:1 1110 . 0 a:1 0 ... 1 @vmlaldav_no
72
@@ -XXX,XX +XXX,XX @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event)
37
# Scalar operations
73
cfg->oas = MIN(oas2bits(SMMU_IDR5_OAS), cfg->oas);
38
74
cfg->tbi = CD_TBI(cd);
39
VADD_scalar 1110 1110 0 . .. ... 1 ... 0 1111 . 100 .... @2scalar
75
cfg->asid = CD_ASID(cd);
40
+VSUB_scalar 1110 1110 0 . .. ... 1 ... 1 1111 . 100 .... @2scalar
76
+ cfg->affd = CD_AFFD(cd);
41
+VMUL_scalar 1110 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar
77
42
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
78
trace_smmuv3_decode_cd(cfg->oas);
43
index XXXXXXX..XXXXXXX 100644
79
44
--- a/target/arm/mve_helper.c
45
+++ b/target/arm/mve_helper.c
46
@@ -XXX,XX +XXX,XX @@ DO_2OP_U(vhsubu, do_vhsub_u)
47
DO_2OP_SCALAR(OP##w, 4, uint32_t, FN)
48
49
DO_2OP_SCALAR_U(vadd_scalar, DO_ADD)
50
+DO_2OP_SCALAR_U(vsub_scalar, DO_SUB)
51
+DO_2OP_SCALAR_U(vmul_scalar, DO_MUL)
52
53
/*
54
* Multiply add long dual accumulate ops.
55
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/arm/translate-mve.c
58
+++ b/target/arm/translate-mve.c
59
@@ -XXX,XX +XXX,XX @@ static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
60
}
61
62
DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
63
+DO_2OP_SCALAR(VSUB_scalar, vsub_scalar)
64
+DO_2OP_SCALAR(VMUL_scalar, vmul_scalar)
65
66
static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
67
MVEGenDualAccOpFn *fn)
68
--
80
--
69
2.20.1
81
2.34.1
70
71
diff view generated by jsdifflib
1
A few subcases of VLDR/VSTR sysreg succeed but do not perform a
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
memory access:
3
* VSTR of VPR when unprivileged
4
* VLDR to VPR when unprivileged
5
* VLDR to FPCXT_NS when fpInactive
6
2
7
In these cases, even though we don't do the memory access we should
3
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
still update the base register and perform the stack limit check if
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
the insn's addressing mode specifies writeback. Our implementation
5
Message-id: 20240213155214.13619-2-philmd@linaro.org
10
failed to do this, because we handle these side-effects inside the
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
memory_to_fp_sysreg() and fp_sysreg_to_memory() callback functions,
7
---
12
which are only called if there's something to load or store.
8
hw/arm/stellaris.c | 6 ++++--
9
1 file changed, 4 insertions(+), 2 deletions(-)
13
10
14
Fix this by adding an extra argument to the callbacks which is set to
11
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
15
true to actually perform the access and false to only do side effects
16
like writeback, and calling the callback with do_access = false
17
for the three cases listed above.
18
19
This produces slightly suboptimal code for the case of a write
20
to FPCXT_NS when the FPU is inactive and the insn didn't have
21
side effects (ie no writeback, or via VMSR), in which case we'll
22
generate a conditional branch over an unconditional branch.
23
But this doesn't seem to be important enough to merit requiring
24
the callback to report back whether it generated any code or not.
25
26
Cc: qemu-stable@nongnu.org
27
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
28
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
29
Message-id: 20210618141019.10671-5-peter.maydell@linaro.org
30
---
31
target/arm/translate-m-nocp.c | 102 ++++++++++++++++++++++++----------
32
1 file changed, 72 insertions(+), 30 deletions(-)
33
34
diff --git a/target/arm/translate-m-nocp.c b/target/arm/translate-m-nocp.c
35
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/translate-m-nocp.c
13
--- a/hw/arm/stellaris.c
37
+++ b/target/arm/translate-m-nocp.c
14
+++ b/hw/arm/stellaris.c
38
@@ -XXX,XX +XXX,XX @@ static bool trans_VSCCLRM(DisasContext *s, arg_VSCCLRM *a)
15
@@ -XXX,XX +XXX,XX @@ static void stellaris_adc_trigger(void *opaque, int irq, int level)
39
40
/*
41
* Emit code to store the sysreg to its final destination; frees the
42
- * TCG temp 'value' it is passed.
43
+ * TCG temp 'value' it is passed. do_access is true to do the store,
44
+ * and false to skip it and only perform side-effects like base
45
+ * register writeback.
46
*/
47
-typedef void fp_sysreg_storefn(DisasContext *s, void *opaque, TCGv_i32 value);
48
+typedef void fp_sysreg_storefn(DisasContext *s, void *opaque, TCGv_i32 value,
49
+ bool do_access);
50
/*
51
* Emit code to load the value to be copied to the sysreg; returns
52
- * a new TCG temporary
53
+ * a new TCG temporary. do_access is true to do the store,
54
+ * and false to skip it and only perform side-effects like base
55
+ * register writeback.
56
*/
57
-typedef TCGv_i32 fp_sysreg_loadfn(DisasContext *s, void *opaque);
58
+typedef TCGv_i32 fp_sysreg_loadfn(DisasContext *s, void *opaque,
59
+ bool do_access);
60
61
/* Common decode/access checks for fp sysreg read/write */
62
typedef enum FPSysRegCheckResult {
63
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
64
65
switch (regno) {
66
case ARM_VFP_FPSCR:
67
- tmp = loadfn(s, opaque);
68
+ tmp = loadfn(s, opaque, true);
69
gen_helper_vfp_set_fpscr(cpu_env, tmp);
70
tcg_temp_free_i32(tmp);
71
gen_lookup_tb(s);
72
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
73
case ARM_VFP_FPSCR_NZCVQC:
74
{
75
TCGv_i32 fpscr;
76
- tmp = loadfn(s, opaque);
77
+ tmp = loadfn(s, opaque, true);
78
if (dc_isar_feature(aa32_mve, s)) {
79
/* QC is only present for MVE; otherwise RES0 */
80
TCGv_i32 qc = tcg_temp_new_i32();
81
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
82
break;
83
}
84
case ARM_VFP_FPCXT_NS:
85
+ {
86
+ TCGLabel *lab_active = gen_new_label();
87
+
88
lab_end = gen_new_label();
89
- /* fpInactive case: write is a NOP, so branch to end */
90
- gen_branch_fpInactive(s, TCG_COND_NE, lab_end);
91
+ gen_branch_fpInactive(s, TCG_COND_EQ, lab_active);
92
+ /*
93
+ * fpInactive case: write is a NOP, so only do side effects
94
+ * like register writeback before we branch to end
95
+ */
96
+ loadfn(s, opaque, false);
97
+ tcg_gen_br(lab_end);
98
+
99
+ gen_set_label(lab_active);
100
/*
101
* !fpInactive: if FPU disabled, take NOCP exception;
102
* otherwise PreserveFPState(), and then FPCXT_NS writes
103
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
104
break;
105
}
106
gen_preserve_fp_state(s);
107
- /* fall through */
108
+ }
109
+ /* fall through */
110
case ARM_VFP_FPCXT_S:
111
{
112
TCGv_i32 sfpa, control;
113
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
114
* Set FPSCR and CONTROL.SFPA from value; the new FPSCR takes
115
* bits [27:0] from value and zeroes bits [31:28].
116
*/
117
- tmp = loadfn(s, opaque);
118
+ tmp = loadfn(s, opaque, true);
119
sfpa = tcg_temp_new_i32();
120
tcg_gen_shri_i32(sfpa, tmp, 31);
121
control = load_cpu_field(v7m.control[M_REG_S]);
122
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
123
case ARM_VFP_VPR:
124
/* Behaves as NOP if not privileged */
125
if (IS_USER(s)) {
126
+ loadfn(s, opaque, false);
127
break;
128
}
129
- tmp = loadfn(s, opaque);
130
+ tmp = loadfn(s, opaque, true);
131
store_cpu_field(tmp, v7m.vpr);
132
break;
133
case ARM_VFP_P0:
134
{
135
TCGv_i32 vpr;
136
- tmp = loadfn(s, opaque);
137
+ tmp = loadfn(s, opaque, true);
138
vpr = load_cpu_field(v7m.vpr);
139
tcg_gen_deposit_i32(vpr, vpr, tmp,
140
R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH);
141
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
142
case ARM_VFP_FPSCR:
143
tmp = tcg_temp_new_i32();
144
gen_helper_vfp_get_fpscr(tmp, cpu_env);
145
- storefn(s, opaque, tmp);
146
+ storefn(s, opaque, tmp, true);
147
break;
148
case ARM_VFP_FPSCR_NZCVQC:
149
tmp = tcg_temp_new_i32();
150
gen_helper_vfp_get_fpscr(tmp, cpu_env);
151
tcg_gen_andi_i32(tmp, tmp, FPCR_NZCVQC_MASK);
152
- storefn(s, opaque, tmp);
153
+ storefn(s, opaque, tmp, true);
154
break;
155
case QEMU_VFP_FPSCR_NZCV:
156
/*
157
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
158
*/
159
tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
160
tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
161
- storefn(s, opaque, tmp);
162
+ storefn(s, opaque, tmp, true);
163
break;
164
case ARM_VFP_FPCXT_S:
165
{
166
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
167
* Store result before updating FPSCR etc, in case
168
* it is a memory write which causes an exception.
169
*/
170
- storefn(s, opaque, tmp);
171
+ storefn(s, opaque, tmp, true);
172
/*
173
* Now we must reset FPSCR from FPDSCR_NS, and clear
174
* CONTROL.SFPA; so we'll end the TB here.
175
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
176
gen_branch_fpInactive(s, TCG_COND_EQ, lab_active);
177
/* fpInactive case: reads as FPDSCR_NS */
178
TCGv_i32 tmp = load_cpu_field(v7m.fpdscr[M_REG_NS]);
179
- storefn(s, opaque, tmp);
180
+ storefn(s, opaque, tmp, true);
181
lab_end = gen_new_label();
182
tcg_gen_br(lab_end);
183
184
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
185
tcg_gen_or_i32(tmp, tmp, sfpa);
186
tcg_temp_free_i32(control);
187
/* Store result before updating FPSCR, in case it faults */
188
- storefn(s, opaque, tmp);
189
+ storefn(s, opaque, tmp, true);
190
/* If SFPA is zero then set FPSCR from FPDSCR_NS */
191
fpdscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
192
zero = tcg_const_i32(0);
193
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
194
case ARM_VFP_VPR:
195
/* Behaves as NOP if not privileged */
196
if (IS_USER(s)) {
197
+ storefn(s, opaque, NULL, false);
198
break;
199
}
200
tmp = load_cpu_field(v7m.vpr);
201
- storefn(s, opaque, tmp);
202
+ storefn(s, opaque, tmp, true);
203
break;
204
case ARM_VFP_P0:
205
tmp = load_cpu_field(v7m.vpr);
206
tcg_gen_extract_i32(tmp, tmp, R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH);
207
- storefn(s, opaque, tmp);
208
+ storefn(s, opaque, tmp, true);
209
break;
210
default:
211
g_assert_not_reached();
212
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
213
return true;
214
}
215
216
-static void fp_sysreg_to_gpr(DisasContext *s, void *opaque, TCGv_i32 value)
217
+static void fp_sysreg_to_gpr(DisasContext *s, void *opaque, TCGv_i32 value,
218
+ bool do_access)
219
{
220
arg_VMSR_VMRS *a = opaque;
221
222
+ if (!do_access) {
223
+ return;
224
+ }
225
+
226
if (a->rt == 15) {
227
/* Set the 4 flag bits in the CPSR */
228
gen_set_nzcv(value);
229
@@ -XXX,XX +XXX,XX @@ static void fp_sysreg_to_gpr(DisasContext *s, void *opaque, TCGv_i32 value)
230
}
16
}
231
}
17
}
232
18
233
-static TCGv_i32 gpr_to_fp_sysreg(DisasContext *s, void *opaque)
19
-static void stellaris_adc_reset(StellarisADCState *s)
234
+static TCGv_i32 gpr_to_fp_sysreg(DisasContext *s, void *opaque, bool do_access)
20
+static void stellaris_adc_reset_hold(Object *obj)
235
{
21
{
236
arg_VMSR_VMRS *a = opaque;
22
+ StellarisADCState *s = STELLARIS_ADC(obj);
237
23
int n;
238
+ if (!do_access) {
24
239
+ return NULL;
25
for (n = 0; n < 4; n++) {
240
+ }
26
@@ -XXX,XX +XXX,XX @@ static void stellaris_adc_init(Object *obj)
241
return load_reg(s, a->rt);
27
memory_region_init_io(&s->iomem, obj, &stellaris_adc_ops, s,
28
"adc", 0x1000);
29
sysbus_init_mmio(sbd, &s->iomem);
30
- stellaris_adc_reset(s);
31
qdev_init_gpio_in(dev, stellaris_adc_trigger, 1);
242
}
32
}
243
33
244
@@ -XXX,XX +XXX,XX @@ static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
34
@@ -XXX,XX +XXX,XX @@ static const TypeInfo stellaris_i2c_info = {
245
}
35
static void stellaris_adc_class_init(ObjectClass *klass, void *data)
36
{
37
DeviceClass *dc = DEVICE_CLASS(klass);
38
+ ResettableClass *rc = RESETTABLE_CLASS(klass);
39
40
+ rc->phases.hold = stellaris_adc_reset_hold;
41
dc->vmsd = &vmstate_stellaris_adc;
246
}
42
}
247
43
248
-static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value)
249
+static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value,
250
+ bool do_access)
251
{
252
arg_vldr_sysreg *a = opaque;
253
uint32_t offset = a->imm;
254
@@ -XXX,XX +XXX,XX @@ static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value)
255
offset = -offset;
256
}
257
258
+ if (!do_access && !a->w) {
259
+ return;
260
+ }
261
+
262
addr = load_reg(s, a->rn);
263
if (a->p) {
264
tcg_gen_addi_i32(addr, addr, offset);
265
@@ -XXX,XX +XXX,XX @@ static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value)
266
gen_helper_v8m_stackcheck(cpu_env, addr);
267
}
268
269
- gen_aa32_st_i32(s, value, addr, get_mem_index(s),
270
- MO_UL | MO_ALIGN | s->be_data);
271
- tcg_temp_free_i32(value);
272
+ if (do_access) {
273
+ gen_aa32_st_i32(s, value, addr, get_mem_index(s),
274
+ MO_UL | MO_ALIGN | s->be_data);
275
+ tcg_temp_free_i32(value);
276
+ }
277
278
if (a->w) {
279
/* writeback */
280
@@ -XXX,XX +XXX,XX @@ static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value)
281
}
282
}
283
284
-static TCGv_i32 memory_to_fp_sysreg(DisasContext *s, void *opaque)
285
+static TCGv_i32 memory_to_fp_sysreg(DisasContext *s, void *opaque,
286
+ bool do_access)
287
{
288
arg_vldr_sysreg *a = opaque;
289
uint32_t offset = a->imm;
290
TCGv_i32 addr;
291
- TCGv_i32 value = tcg_temp_new_i32();
292
+ TCGv_i32 value = NULL;
293
294
if (!a->a) {
295
offset = -offset;
296
}
297
298
+ if (!do_access && !a->w) {
299
+ return NULL;
300
+ }
301
+
302
addr = load_reg(s, a->rn);
303
if (a->p) {
304
tcg_gen_addi_i32(addr, addr, offset);
305
@@ -XXX,XX +XXX,XX @@ static TCGv_i32 memory_to_fp_sysreg(DisasContext *s, void *opaque)
306
gen_helper_v8m_stackcheck(cpu_env, addr);
307
}
308
309
- gen_aa32_ld_i32(s, value, addr, get_mem_index(s),
310
- MO_UL | MO_ALIGN | s->be_data);
311
+ if (do_access) {
312
+ value = tcg_temp_new_i32();
313
+ gen_aa32_ld_i32(s, value, addr, get_mem_index(s),
314
+ MO_UL | MO_ALIGN | s->be_data);
315
+ }
316
317
if (a->w) {
318
/* writeback */
319
--
44
--
320
2.20.1
45
2.34.1
321
46
322
47
diff view generated by jsdifflib
1
From: Peter Collingbourne <pcc@google.com>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
MTE3 introduces an asymmetric tag checking mode, in which loads are
3
Suggested-by: Peter Maydell <peter.maydell@linaro.org>
4
checked synchronously and stores are checked asynchronously. Add
4
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
support for it.
5
Message-id: 20240213155214.13619-3-philmd@linaro.org
6
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Peter Collingbourne <pcc@google.com>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20210616195614.11785-1-pcc@google.com
10
[PMM: Add line to emulation.rst]
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
8
---
13
docs/system/arm/emulation.rst | 1 +
9
hw/arm/stellaris.c | 26 ++++++++++++++++++++++----
14
target/arm/cpu64.c | 2 +-
10
1 file changed, 22 insertions(+), 4 deletions(-)
15
target/arm/mte_helper.c | 82 ++++++++++++++++++++++-------------
16
3 files changed, 53 insertions(+), 32 deletions(-)
17
11
18
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
12
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
19
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
20
--- a/docs/system/arm/emulation.rst
14
--- a/hw/arm/stellaris.c
21
+++ b/docs/system/arm/emulation.rst
15
+++ b/hw/arm/stellaris.c
22
@@ -XXX,XX +XXX,XX @@ the following architecture extensions:
16
@@ -XXX,XX +XXX,XX @@ static void stellaris_sys_instance_init(Object *obj)
23
- FEAT_LSE (Large System Extensions)
17
s->sysclk = qdev_init_clock_out(DEVICE(s), "SYSCLK");
24
- FEAT_MTE (Memory Tagging Extension)
25
- FEAT_MTE2 (Memory Tagging Extension)
26
+- FEAT_MTE3 (MTE Asymmetric Fault Handling)
27
- FEAT_PAN (Privileged access never)
28
- FEAT_PAN2 (AT S1E1R and AT S1E1W instruction variants affected by PSTATE.PAN)
29
- FEAT_PAuth (Pointer authentication)
30
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/cpu64.c
33
+++ b/target/arm/cpu64.c
34
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
35
* during realize if the board provides no tag memory, much like
36
* we do for EL2 with the virtualization=on property.
37
*/
38
- t = FIELD_DP64(t, ID_AA64PFR1, MTE, 2);
39
+ t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3);
40
cpu->isar.id_aa64pfr1 = t;
41
42
t = cpu->isar.id_aa64mmfr0;
43
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/arm/mte_helper.c
46
+++ b/target/arm/mte_helper.c
47
@@ -XXX,XX +XXX,XX @@ void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val)
48
}
49
}
18
}
50
19
51
+static void mte_sync_check_fail(CPUARMState *env, uint32_t desc,
20
-/* I2C controller. */
52
+ uint64_t dirty_ptr, uintptr_t ra)
21
+/*
53
+{
22
+ * I2C controller.
54
+ int is_write, syn;
23
+ * ??? For now we only implement the master interface.
24
+ */
25
26
#define TYPE_STELLARIS_I2C "stellaris-i2c"
27
OBJECT_DECLARE_SIMPLE_TYPE(stellaris_i2c_state, STELLARIS_I2C)
28
@@ -XXX,XX +XXX,XX @@ static void stellaris_i2c_write(void *opaque, hwaddr offset,
29
stellaris_i2c_update(s);
30
}
31
32
-static void stellaris_i2c_reset(stellaris_i2c_state *s)
33
+static void stellaris_i2c_reset_enter(Object *obj, ResetType type)
34
{
35
+ stellaris_i2c_state *s = STELLARIS_I2C(obj);
55
+
36
+
56
+ env->exception.vaddress = dirty_ptr;
37
if (s->mcs & STELLARIS_I2C_MCS_BUSBSY)
57
+
38
i2c_end_transfer(s->bus);
58
+ is_write = FIELD_EX32(desc, MTEDESC, WRITE);
59
+ syn = syn_data_abort_no_iss(arm_current_el(env) != 0, 0, 0, 0, 0, is_write,
60
+ 0x11);
61
+ raise_exception_ra(env, EXCP_DATA_ABORT, syn, exception_target_el(env), ra);
62
+ g_assert_not_reached();
63
+}
39
+}
64
+
40
+
65
+static void mte_async_check_fail(CPUARMState *env, uint64_t dirty_ptr,
41
+static void stellaris_i2c_reset_hold(Object *obj)
66
+ uintptr_t ra, ARMMMUIdx arm_mmu_idx, int el)
67
+{
42
+{
68
+ int select;
43
+ stellaris_i2c_state *s = STELLARIS_I2C(obj);
69
+
44
70
+ if (regime_has_2_ranges(arm_mmu_idx)) {
45
s->msa = 0;
71
+ select = extract64(dirty_ptr, 55, 1);
46
s->mcs = 0;
72
+ } else {
47
@@ -XXX,XX +XXX,XX @@ static void stellaris_i2c_reset(stellaris_i2c_state *s)
73
+ select = 0;
48
s->mimr = 0;
74
+ }
49
s->mris = 0;
75
+ env->cp15.tfsr_el[el] |= 1 << select;
50
s->mcr = 0;
76
+#ifdef CONFIG_USER_ONLY
77
+ /*
78
+ * Stand in for a timer irq, setting _TIF_MTE_ASYNC_FAULT,
79
+ * which then sends a SIGSEGV when the thread is next scheduled.
80
+ * This cpu will return to the main loop at the end of the TB,
81
+ * which is rather sooner than "normal". But the alternative
82
+ * is waiting until the next syscall.
83
+ */
84
+ qemu_cpu_kick(env_cpu(env));
85
+#endif
86
+}
51
+}
87
+
52
+
88
/* Record a tag check failure. */
53
+static void stellaris_i2c_reset_exit(Object *obj)
89
static void mte_check_fail(CPUARMState *env, uint32_t desc,
54
+{
90
uint64_t dirty_ptr, uintptr_t ra)
55
+ stellaris_i2c_state *s = STELLARIS_I2C(obj);
56
+
57
stellaris_i2c_update(s);
58
}
59
60
@@ -XXX,XX +XXX,XX @@ static void stellaris_i2c_init(Object *obj)
61
memory_region_init_io(&s->iomem, obj, &stellaris_i2c_ops, s,
62
"i2c", 0x1000);
63
sysbus_init_mmio(sbd, &s->iomem);
64
- /* ??? For now we only implement the master interface. */
65
- stellaris_i2c_reset(s);
66
}
67
68
/* Analogue to Digital Converter. This is only partially implemented,
69
@@ -XXX,XX +XXX,XX @@ type_init(stellaris_machine_init)
70
static void stellaris_i2c_class_init(ObjectClass *klass, void *data)
91
{
71
{
92
int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
72
DeviceClass *dc = DEVICE_CLASS(klass);
93
ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx);
73
+ ResettableClass *rc = RESETTABLE_CLASS(klass);
94
- int el, reg_el, tcf, select, is_write, syn;
74
95
+ int el, reg_el, tcf;
75
+ rc->phases.enter = stellaris_i2c_reset_enter;
96
uint64_t sctlr;
76
+ rc->phases.hold = stellaris_i2c_reset_hold;
97
77
+ rc->phases.exit = stellaris_i2c_reset_exit;
98
reg_el = regime_el(env, arm_mmu_idx);
78
dc->vmsd = &vmstate_stellaris_i2c;
99
@@ -XXX,XX +XXX,XX @@ static void mte_check_fail(CPUARMState *env, uint32_t desc,
100
switch (tcf) {
101
case 1:
102
/* Tag check fail causes a synchronous exception. */
103
- env->exception.vaddress = dirty_ptr;
104
-
105
- is_write = FIELD_EX32(desc, MTEDESC, WRITE);
106
- syn = syn_data_abort_no_iss(arm_current_el(env) != 0, 0, 0, 0, 0,
107
- is_write, 0x11);
108
- raise_exception_ra(env, EXCP_DATA_ABORT, syn,
109
- exception_target_el(env), ra);
110
- /* noreturn, but fall through to the assert anyway */
111
+ mte_sync_check_fail(env, desc, dirty_ptr, ra);
112
+ break;
113
114
case 0:
115
/*
116
@@ -XXX,XX +XXX,XX @@ static void mte_check_fail(CPUARMState *env, uint32_t desc,
117
118
case 2:
119
/* Tag check fail causes asynchronous flag set. */
120
- if (regime_has_2_ranges(arm_mmu_idx)) {
121
- select = extract64(dirty_ptr, 55, 1);
122
- } else {
123
- select = 0;
124
- }
125
- env->cp15.tfsr_el[el] |= 1 << select;
126
-#ifdef CONFIG_USER_ONLY
127
- /*
128
- * Stand in for a timer irq, setting _TIF_MTE_ASYNC_FAULT,
129
- * which then sends a SIGSEGV when the thread is next scheduled.
130
- * This cpu will return to the main loop at the end of the TB,
131
- * which is rather sooner than "normal". But the alternative
132
- * is waiting until the next syscall.
133
- */
134
- qemu_cpu_kick(env_cpu(env));
135
-#endif
136
+ mte_async_check_fail(env, dirty_ptr, ra, arm_mmu_idx, el);
137
break;
138
139
- default:
140
- /* Case 3: Reserved. */
141
- qemu_log_mask(LOG_GUEST_ERROR,
142
- "Tag check failure with SCTLR_EL%d.TCF%s "
143
- "set to reserved value %d\n",
144
- reg_el, el ? "" : "0", tcf);
145
+ case 3:
146
+ /*
147
+ * Tag check fail causes asynchronous flag set for stores, or
148
+ * a synchronous exception for loads.
149
+ */
150
+ if (FIELD_EX32(desc, MTEDESC, WRITE)) {
151
+ mte_async_check_fail(env, dirty_ptr, ra, arm_mmu_idx, el);
152
+ } else {
153
+ mte_sync_check_fail(env, desc, dirty_ptr, ra);
154
+ }
155
break;
156
}
157
}
79
}
80
158
--
81
--
159
2.20.1
82
2.34.1
160
83
161
84
diff view generated by jsdifflib
1
From: Alexandre Iooss <erdnaxe@crans.org>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
This adds the target guide for BBC Micro:bit.
3
QDev objects created with qdev_new() need to manually add
4
their parent relationship with object_property_add_child().
4
5
5
Information is taken from https://wiki.qemu.org/Features/MicroBit
6
This commit plug the devices which aren't part of the SoC;
6
and from hw/arm/nrf51_soc.c.
7
they will be plugged into a SoC container in the next one.
7
8
8
Signed-off-by: Alexandre Iooss <erdnaxe@crans.org>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Joel Stanley <joel@jms.id.au>
11
Message-id: 20240213155214.13619-4-philmd@linaro.org
11
Message-id: 20210621075625.540471-1-erdnaxe@crans.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
13
---
14
docs/system/arm/nrf.rst | 51 ++++++++++++++++++++++++++++++++++++++
14
hw/arm/stellaris.c | 4 ++++
15
docs/system/target-arm.rst | 1 +
15
1 file changed, 4 insertions(+)
16
MAINTAINERS | 1 +
17
3 files changed, 53 insertions(+)
18
create mode 100644 docs/system/arm/nrf.rst
19
16
20
diff --git a/docs/system/arm/nrf.rst b/docs/system/arm/nrf.rst
17
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
21
new file mode 100644
22
index XXXXXXX..XXXXXXX
23
--- /dev/null
24
+++ b/docs/system/arm/nrf.rst
25
@@ -XXX,XX +XXX,XX @@
26
+Nordic nRF boards (``microbit``)
27
+================================
28
+
29
+The `Nordic nRF`_ chips are a family of ARM-based System-on-Chip that
30
+are designed to be used for low-power and short-range wireless solutions.
31
+
32
+.. _Nordic nRF: https://www.nordicsemi.com/Products
33
+
34
+The nRF51 series is the first series for short range wireless applications.
35
+It is superseded by the nRF52 series.
36
+The following machines are based on this chip :
37
+
38
+- ``microbit`` BBC micro:bit board with nRF51822 SoC
39
+
40
+There are other series such as nRF52, nRF53 and nRF91 which are currently not
41
+supported by QEMU.
42
+
43
+Supported devices
44
+-----------------
45
+
46
+ * ARM Cortex-M0 (ARMv6-M)
47
+ * Serial ports (UART)
48
+ * Clock controller
49
+ * Timers
50
+ * Random Number Generator (RNG)
51
+ * GPIO controller
52
+ * NVMC
53
+ * SWI
54
+
55
+Missing devices
56
+---------------
57
+
58
+ * Watchdog
59
+ * Real-Time Clock (RTC) controller
60
+ * TWI (i2c)
61
+ * SPI controller
62
+ * Analog to Digital Converter (ADC)
63
+ * Quadrature decoder
64
+ * Radio
65
+
66
+Boot options
67
+------------
68
+
69
+The Micro:bit machine can be started using the ``-device`` option to load a
70
+firmware in `ihex format`_. Example:
71
+
72
+.. _ihex format: https://en.wikipedia.org/wiki/Intel_HEX
73
+
74
+.. code-block:: bash
75
+
76
+ $ qemu-system-arm -M microbit -device loader,file=test.hex
77
diff --git a/docs/system/target-arm.rst b/docs/system/target-arm.rst
78
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
79
--- a/docs/system/target-arm.rst
19
--- a/hw/arm/stellaris.c
80
+++ b/docs/system/target-arm.rst
20
+++ b/hw/arm/stellaris.c
81
@@ -XXX,XX +XXX,XX @@ undocumented; you can get a complete list by running
21
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
82
arm/digic
22
&error_fatal);
83
arm/musicpal
23
84
arm/gumstix
24
ssddev = qdev_new("ssd0323");
85
+ arm/nrf
25
+ object_property_add_child(OBJECT(ms), "oled", OBJECT(ssddev));
86
arm/nseries
26
qdev_prop_set_uint8(ssddev, "cs", 1);
87
arm/nuvoton
27
qdev_realize_and_unref(ssddev, bus, &error_fatal);
88
arm/orangepi
28
89
diff --git a/MAINTAINERS b/MAINTAINERS
29
gpio_d_splitter = qdev_new(TYPE_SPLIT_IRQ);
90
index XXXXXXX..XXXXXXX 100644
30
+ object_property_add_child(OBJECT(ms), "splitter",
91
--- a/MAINTAINERS
31
+ OBJECT(gpio_d_splitter));
92
+++ b/MAINTAINERS
32
qdev_prop_set_uint32(gpio_d_splitter, "num-lines", 2);
93
@@ -XXX,XX +XXX,XX @@ F: hw/*/microbit*.c
33
qdev_realize_and_unref(gpio_d_splitter, NULL, &error_fatal);
94
F: include/hw/*/nrf51*.h
34
qdev_connect_gpio_out(
95
F: include/hw/*/microbit*.h
35
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
96
F: tests/qtest/microbit-test.c
36
DeviceState *gpad;
97
+F: docs/system/arm/nrf.rst
37
98
38
gpad = qdev_new(TYPE_STELLARIS_GAMEPAD);
99
AVR Machines
39
+ object_property_add_child(OBJECT(ms), "gamepad", OBJECT(gpad));
100
-------------
40
for (i = 0; i < ARRAY_SIZE(gpad_keycode); i++) {
41
qlist_append_int(gpad_keycode_list, gpad_keycode[i]);
42
}
101
--
43
--
102
2.20.1
44
2.34.1
103
45
104
46
diff view generated by jsdifflib
1
Implement the MVE VQSHL insn (encoding T4, which is the
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
vector-shift-by-vector version).
3
2
4
The DO_SQSHL_OP and DO_UQSHL_OP macros here are derived from
3
QDev objects created with qdev_new() need to manually add
5
the neon_helper.c code for qshl_u{8,16,32} and qshl_s{8,16,32}.
4
their parent relationship with object_property_add_child().
6
5
6
Since we don't model the SoC, just use a QOM container.
7
8
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Message-id: 20240213155214.13619-5-philmd@linaro.org
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20210617121628.20116-33-peter.maydell@linaro.org
10
---
12
---
11
target/arm/helper-mve.h | 8 ++++++++
13
hw/arm/stellaris.c | 11 ++++++++++-
12
target/arm/mve.decode | 12 ++++++++++++
14
1 file changed, 10 insertions(+), 1 deletion(-)
13
target/arm/mve_helper.c | 34 ++++++++++++++++++++++++++++++++++
14
target/arm/translate-mve.c | 2 ++
15
4 files changed, 56 insertions(+)
16
15
17
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
16
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
18
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper-mve.h
18
--- a/hw/arm/stellaris.c
20
+++ b/target/arm/helper-mve.h
19
+++ b/hw/arm/stellaris.c
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vqsubub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
20
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
22
DEF_HELPER_FLAGS_4(mve_vqsubuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
21
* 400fe000 system control
23
DEF_HELPER_FLAGS_4(mve_vqsubuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
22
*/
24
23
25
+DEF_HELPER_FLAGS_4(mve_vqshlsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
24
+ Object *soc_container;
26
+DEF_HELPER_FLAGS_4(mve_vqshlsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
25
DeviceState *gpio_dev[7], *nvic;
27
+DEF_HELPER_FLAGS_4(mve_vqshlsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
26
qemu_irq gpio_in[7][8];
27
qemu_irq gpio_out[7][8];
28
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
29
flash_size = (((board->dc0 & 0xffff) + 1) << 1) * 1024;
30
sram_size = ((board->dc0 >> 18) + 1) * 1024;
31
32
+ soc_container = object_new("container");
33
+ object_property_add_child(OBJECT(ms), "soc", soc_container);
28
+
34
+
29
+DEF_HELPER_FLAGS_4(mve_vqshlub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
35
/* Flash programming is done via the SCU, so pretend it is ROM. */
30
+DEF_HELPER_FLAGS_4(mve_vqshluh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
36
memory_region_init_rom(flash, NULL, "stellaris.flash", flash_size,
31
+DEF_HELPER_FLAGS_4(mve_vqshluw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
37
&error_fatal);
32
+
38
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
33
DEF_HELPER_FLAGS_4(mve_vadd_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
39
* need its sysclk output.
34
DEF_HELPER_FLAGS_4(mve_vadd_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
40
*/
35
DEF_HELPER_FLAGS_4(mve_vadd_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
41
ssys_dev = qdev_new(TYPE_STELLARIS_SYS);
36
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
42
+ object_property_add_child(soc_container, "sys", OBJECT(ssys_dev));
37
index XXXXXXX..XXXXXXX 100644
43
38
--- a/target/arm/mve.decode
44
/*
39
+++ b/target/arm/mve.decode
45
* Most devices come preprogrammed with a MAC address in the user data.
40
@@ -XXX,XX +XXX,XX @@
46
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
41
@2op .... .... .. size:2 .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn
47
sysbus_realize_and_unref(SYS_BUS_DEVICE(ssys_dev), &error_fatal);
42
@2op_nosz .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn size=0
48
43
49
nvic = qdev_new(TYPE_ARMV7M);
44
+# The _rev suffix indicates that Vn and Vm are reversed. This is
50
+ object_property_add_child(soc_container, "v7m", OBJECT(nvic));
45
+# the case for shifts. In the Arm ARM these insns are documented
51
qdev_prop_set_uint32(nvic, "num-irq", NUM_IRQ_LINES);
46
+# with the Vm and Vn fields in their usual places, but in the
52
qdev_prop_set_uint8(nvic, "num-prio-bits", NUM_PRIO_BITS);
47
+# assembly the operands are listed "backwards", ie in the order
53
qdev_prop_set_string(nvic, "cpu-type", ms->cpu_type);
48
+# Qd, Qm, Qn where other insns use Qd, Qn, Qm. For QEMU we choose
54
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
49
+# to consider Vm and Vn as being in different fields in the insn.
55
50
+# This gives us consistency with A64 and Neon.
56
dev = qdev_new(TYPE_STELLARIS_GPTM);
51
+@2op_rev .... .... .. size:2 .... .... .... .... .... &2op qd=%qd qm=%qn qn=%qm
57
sbd = SYS_BUS_DEVICE(dev);
52
+
58
+ object_property_add_child(soc_container, "gptm[*]", OBJECT(dev));
53
@2scalar .... .... .. size:2 .... .... .... .... rm:4 &2scalar qd=%qd qn=%qn
59
qdev_connect_clock_in(dev, "clk",
54
@2scalar_nosz .... .... .... .... .... .... .... rm:4 &2scalar qd=%qd qn=%qn
60
qdev_get_clock_out(ssys_dev, "SYSCLK"));
55
61
sysbus_realize_and_unref(sbd, &error_fatal);
56
@@ -XXX,XX +XXX,XX @@ VQADD_U 111 1 1111 0 . .. ... 0 ... 0 0000 . 1 . 1 ... 0 @2op
62
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
57
VQSUB_S 111 0 1111 0 . .. ... 0 ... 0 0010 . 1 . 1 ... 0 @2op
63
58
VQSUB_U 111 1 1111 0 . .. ... 0 ... 0 0010 . 1 . 1 ... 0 @2op
64
if (board->dc1 & (1 << 3)) { /* watchdog present */
59
65
dev = qdev_new(TYPE_LUMINARY_WATCHDOG);
60
+VQSHL_S 111 0 1111 0 . .. ... 0 ... 0 0100 . 1 . 1 ... 0 @2op_rev
66
-
61
+VQSHL_U 111 1 1111 0 . .. ... 0 ... 0 0100 . 1 . 1 ... 0 @2op_rev
67
+ object_property_add_child(soc_container, "wdg", OBJECT(dev));
62
+
68
qdev_connect_clock_in(dev, "WDOGCLK",
63
# Vector miscellaneous
69
qdev_get_clock_out(ssys_dev, "SYSCLK"));
64
70
65
VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
71
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
66
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
72
SysBusDevice *sbd;
67
index XXXXXXX..XXXXXXX 100644
73
68
--- a/target/arm/mve_helper.c
74
dev = qdev_new("pl011_luminary");
69
+++ b/target/arm/mve_helper.c
75
+ object_property_add_child(soc_container, "uart[*]", OBJECT(dev));
70
@@ -XXX,XX +XXX,XX @@ DO_1OP(vfnegs, 8, uint64_t, DO_FNEGS)
76
sbd = SYS_BUS_DEVICE(dev);
71
mve_advance_vpt(env); \
77
qdev_prop_set_chr(dev, "chardev", serial_hd(i));
72
}
78
sysbus_realize_and_unref(sbd, &error_fatal);
73
79
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
74
+/* provide unsigned 2-op helpers for all sizes */
80
DeviceState *enet;
75
+#define DO_2OP_SAT_U(OP, FN) \
81
76
+ DO_2OP_SAT(OP##b, 1, uint8_t, FN) \
82
enet = qdev_new("stellaris_enet");
77
+ DO_2OP_SAT(OP##h, 2, uint16_t, FN) \
83
+ object_property_add_child(soc_container, "enet", OBJECT(enet));
78
+ DO_2OP_SAT(OP##w, 4, uint32_t, FN)
84
if (nd) {
79
+
85
qdev_set_nic_properties(enet, nd);
80
+/* provide signed 2-op helpers for all sizes */
86
} else {
81
+#define DO_2OP_SAT_S(OP, FN) \
82
+ DO_2OP_SAT(OP##b, 1, int8_t, FN) \
83
+ DO_2OP_SAT(OP##h, 2, int16_t, FN) \
84
+ DO_2OP_SAT(OP##w, 4, int32_t, FN)
85
+
86
#define DO_AND(N, M) ((N) & (M))
87
#define DO_BIC(N, M) ((N) & ~(M))
88
#define DO_ORR(N, M) ((N) | (M))
89
@@ -XXX,XX +XXX,XX @@ DO_2OP_SAT(vqsubsb, 1, int8_t, DO_SQSUB_B)
90
DO_2OP_SAT(vqsubsh, 2, int16_t, DO_SQSUB_H)
91
DO_2OP_SAT(vqsubsw, 4, int32_t, DO_SQSUB_W)
92
93
+/*
94
+ * This wrapper fixes up the impedance mismatch between do_sqrshl_bhs()
95
+ * and friends wanting a uint32_t* sat and our needing a bool*.
96
+ */
97
+#define WRAP_QRSHL_HELPER(FN, N, M, ROUND, satp) \
98
+ ({ \
99
+ uint32_t su32 = 0; \
100
+ typeof(N) r = FN(N, (int8_t)(M), sizeof(N) * 8, ROUND, &su32); \
101
+ if (su32) { \
102
+ *satp = true; \
103
+ } \
104
+ r; \
105
+ })
106
+
107
+#define DO_SQSHL_OP(N, M, satp) \
108
+ WRAP_QRSHL_HELPER(do_sqrshl_bhs, N, M, false, satp)
109
+#define DO_UQSHL_OP(N, M, satp) \
110
+ WRAP_QRSHL_HELPER(do_uqrshl_bhs, N, M, false, satp)
111
+
112
+DO_2OP_SAT_S(vqshls, DO_SQSHL_OP)
113
+DO_2OP_SAT_U(vqshlu, DO_UQSHL_OP)
114
+
115
#define DO_2OP_SCALAR(OP, ESIZE, TYPE, FN) \
116
void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
117
uint32_t rm) \
118
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
119
index XXXXXXX..XXXXXXX 100644
120
--- a/target/arm/translate-mve.c
121
+++ b/target/arm/translate-mve.c
122
@@ -XXX,XX +XXX,XX @@ DO_2OP(VQADD_S, vqadds)
123
DO_2OP(VQADD_U, vqaddu)
124
DO_2OP(VQSUB_S, vqsubs)
125
DO_2OP(VQSUB_U, vqsubu)
126
+DO_2OP(VQSHL_S, vqshls)
127
+DO_2OP(VQSHL_U, vqshlu)
128
129
static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
130
MVEGenTwoOpScalarFn fn)
131
--
87
--
132
2.20.1
88
2.34.1
133
89
134
90
diff view generated by jsdifflib
1
Implement the vector forms of the MVE VQADD and VQSUB insns.
1
We support two different encodings for the AArch32 IMPDEF
2
CBAR register -- older cores like the Cortex A9, A7, A15
3
have this at 4, c15, c0, 0; newer cores like the
4
Cortex A35, A53, A57 and A72 have it at 1 c15 c0 0.
5
6
When we implemented this we picked which encoding to
7
use based on whether the CPU set ARM_FEATURE_AARCH64.
8
However this isn't right for three cases:
9
* the qemu-system-arm 'max' CPU, which is supposed to be
10
a variant on a Cortex-A57; it ought to use the same
11
encoding the A57 does and which the AArch64 'max'
12
exposes to AArch32 guest code
13
* the Cortex-R52, which is AArch32-only but has the CBAR
14
at the newer encoding (and where we incorrectly are
15
not yet setting ARM_FEATURE_CBAR_RO anyway)
16
* any possible future support for other v8 AArch32
17
only CPUs, or for supporting "boot the CPU into
18
AArch32 mode" on our existing cores like the A57 etc
19
20
Make the decision of the encoding be based on whether
21
the CPU implements the ARM_FEATURE_V8 flag instead.
22
23
This changes the behaviour only for the qemu-system-arm
24
'-cpu max'. We don't expect anybody to be relying on the
25
old behaviour because:
26
* it's not what the real hardware Cortex-A57 does
27
(and that's what our ID register claims we are)
28
* we don't implement the memory-mapped GICv3 support
29
which is the only thing that exists at the peripheral
30
base address pointed to by the register
2
31
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
32
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
33
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20210617121628.20116-32-peter.maydell@linaro.org
34
Message-id: 20240206132931.38376-2-peter.maydell@linaro.org
6
---
35
---
7
target/arm/helper-mve.h | 16 ++++++++++++++++
36
target/arm/helper.c | 2 +-
8
target/arm/mve.decode | 5 +++++
37
1 file changed, 1 insertion(+), 1 deletion(-)
9
target/arm/mve_helper.c | 14 ++++++++++++++
10
target/arm/translate-mve.c | 4 ++++
11
4 files changed, 39 insertions(+)
12
38
13
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
39
diff --git a/target/arm/helper.c b/target/arm/helper.c
14
index XXXXXXX..XXXXXXX 100644
40
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/helper-mve.h
41
--- a/target/arm/helper.c
16
+++ b/target/arm/helper-mve.h
42
+++ b/target/arm/helper.c
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vqrdmulhb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
43
@@ -XXX,XX +XXX,XX @@ void register_cp_regs_for_features(ARMCPU *cpu)
18
DEF_HELPER_FLAGS_4(mve_vqrdmulhh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
44
* AArch64 cores we might need to add a specific feature flag
19
DEF_HELPER_FLAGS_4(mve_vqrdmulhw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
45
* to indicate cores with "flavour 2" CBAR.
20
46
*/
21
+DEF_HELPER_FLAGS_4(mve_vqaddsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
47
- if (arm_feature(env, ARM_FEATURE_AARCH64)) {
22
+DEF_HELPER_FLAGS_4(mve_vqaddsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
48
+ if (arm_feature(env, ARM_FEATURE_V8)) {
23
+DEF_HELPER_FLAGS_4(mve_vqaddsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
49
/* 32 bit view is [31:18] 0...0 [43:32]. */
24
+
50
uint32_t cbar32 = (extract64(cpu->reset_cbar, 18, 14) << 18)
25
+DEF_HELPER_FLAGS_4(mve_vqaddub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
51
| extract64(cpu->reset_cbar, 32, 12);
26
+DEF_HELPER_FLAGS_4(mve_vqadduh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
27
+DEF_HELPER_FLAGS_4(mve_vqadduw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
28
+
29
+DEF_HELPER_FLAGS_4(mve_vqsubsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
30
+DEF_HELPER_FLAGS_4(mve_vqsubsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
31
+DEF_HELPER_FLAGS_4(mve_vqsubsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
32
+
33
+DEF_HELPER_FLAGS_4(mve_vqsubub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
34
+DEF_HELPER_FLAGS_4(mve_vqsubuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
35
+DEF_HELPER_FLAGS_4(mve_vqsubuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
36
+
37
DEF_HELPER_FLAGS_4(mve_vadd_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
38
DEF_HELPER_FLAGS_4(mve_vadd_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
39
DEF_HELPER_FLAGS_4(mve_vadd_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
40
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
41
index XXXXXXX..XXXXXXX 100644
42
--- a/target/arm/mve.decode
43
+++ b/target/arm/mve.decode
44
@@ -XXX,XX +XXX,XX @@ VMULL_TU 111 1 1110 0 . .. ... 1 ... 1 1110 . 0 . 0 ... 0 @2op
45
VQDMULH 1110 1111 0 . .. ... 0 ... 0 1011 . 1 . 0 ... 0 @2op
46
VQRDMULH 1111 1111 0 . .. ... 0 ... 0 1011 . 1 . 0 ... 0 @2op
47
48
+VQADD_S 111 0 1111 0 . .. ... 0 ... 0 0000 . 1 . 1 ... 0 @2op
49
+VQADD_U 111 1 1111 0 . .. ... 0 ... 0 0000 . 1 . 1 ... 0 @2op
50
+VQSUB_S 111 0 1111 0 . .. ... 0 ... 0 0010 . 1 . 1 ... 0 @2op
51
+VQSUB_U 111 1 1111 0 . .. ... 0 ... 0 0010 . 1 . 1 ... 0 @2op
52
+
53
# Vector miscellaneous
54
55
VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
56
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/target/arm/mve_helper.c
59
+++ b/target/arm/mve_helper.c
60
@@ -XXX,XX +XXX,XX @@ DO_2OP_SAT(vqrdmulhb, 1, int8_t, DO_QRDMULH_B)
61
DO_2OP_SAT(vqrdmulhh, 2, int16_t, DO_QRDMULH_H)
62
DO_2OP_SAT(vqrdmulhw, 4, int32_t, DO_QRDMULH_W)
63
64
+DO_2OP_SAT(vqaddub, 1, uint8_t, DO_UQADD_B)
65
+DO_2OP_SAT(vqadduh, 2, uint16_t, DO_UQADD_H)
66
+DO_2OP_SAT(vqadduw, 4, uint32_t, DO_UQADD_W)
67
+DO_2OP_SAT(vqaddsb, 1, int8_t, DO_SQADD_B)
68
+DO_2OP_SAT(vqaddsh, 2, int16_t, DO_SQADD_H)
69
+DO_2OP_SAT(vqaddsw, 4, int32_t, DO_SQADD_W)
70
+
71
+DO_2OP_SAT(vqsubub, 1, uint8_t, DO_UQSUB_B)
72
+DO_2OP_SAT(vqsubuh, 2, uint16_t, DO_UQSUB_H)
73
+DO_2OP_SAT(vqsubuw, 4, uint32_t, DO_UQSUB_W)
74
+DO_2OP_SAT(vqsubsb, 1, int8_t, DO_SQSUB_B)
75
+DO_2OP_SAT(vqsubsh, 2, int16_t, DO_SQSUB_H)
76
+DO_2OP_SAT(vqsubsw, 4, int32_t, DO_SQSUB_W)
77
+
78
#define DO_2OP_SCALAR(OP, ESIZE, TYPE, FN) \
79
void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
80
uint32_t rm) \
81
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
82
index XXXXXXX..XXXXXXX 100644
83
--- a/target/arm/translate-mve.c
84
+++ b/target/arm/translate-mve.c
85
@@ -XXX,XX +XXX,XX @@ DO_2OP(VMULL_TS, vmullts)
86
DO_2OP(VMULL_TU, vmulltu)
87
DO_2OP(VQDMULH, vqdmulh)
88
DO_2OP(VQRDMULH, vqrdmulh)
89
+DO_2OP(VQADD_S, vqadds)
90
+DO_2OP(VQADD_U, vqaddu)
91
+DO_2OP(VQSUB_S, vqsubs)
92
+DO_2OP(VQSUB_U, vqsubu)
93
94
static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
95
MVEGenTwoOpScalarFn fn)
96
--
52
--
97
2.20.1
53
2.34.1
98
99
diff view generated by jsdifflib
1
Implement the vector forms of the MVE VQDMULH and VQRDMULH insns.
1
The Cortex-R52 implements the Configuration Base Address Register
2
(CBAR), as a read-only register. Add ARM_FEATURE_CBAR_RO to this CPU
3
type, so that our implementation provides the register and the
4
associated qdev property.
2
5
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20210617121628.20116-31-peter.maydell@linaro.org
8
Message-id: 20240206132931.38376-3-peter.maydell@linaro.org
6
---
9
---
7
target/arm/helper-mve.h | 8 ++++++++
10
target/arm/tcg/cpu32.c | 1 +
8
target/arm/mve.decode | 3 +++
11
1 file changed, 1 insertion(+)
9
target/arm/mve_helper.c | 27 +++++++++++++++++++++++++++
10
target/arm/translate-mve.c | 2 ++
11
4 files changed, 40 insertions(+)
12
12
13
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
13
diff --git a/target/arm/tcg/cpu32.c b/target/arm/tcg/cpu32.c
14
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/helper-mve.h
15
--- a/target/arm/tcg/cpu32.c
16
+++ b/target/arm/helper-mve.h
16
+++ b/target/arm/tcg/cpu32.c
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vmulltub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
17
@@ -XXX,XX +XXX,XX @@ static void cortex_r52_initfn(Object *obj)
18
DEF_HELPER_FLAGS_4(mve_vmulltuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
18
set_feature(&cpu->env, ARM_FEATURE_PMSA);
19
DEF_HELPER_FLAGS_4(mve_vmulltuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
19
set_feature(&cpu->env, ARM_FEATURE_NEON);
20
20
set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
21
+DEF_HELPER_FLAGS_4(mve_vqdmulhb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
21
+ set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
22
+DEF_HELPER_FLAGS_4(mve_vqdmulhh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
22
cpu->midr = 0x411fd133; /* r1p3 */
23
+DEF_HELPER_FLAGS_4(mve_vqdmulhw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
23
cpu->revidr = 0x00000000;
24
+
24
cpu->reset_fpsid = 0x41034023;
25
+DEF_HELPER_FLAGS_4(mve_vqrdmulhb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
26
+DEF_HELPER_FLAGS_4(mve_vqrdmulhh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
27
+DEF_HELPER_FLAGS_4(mve_vqrdmulhw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
28
+
29
DEF_HELPER_FLAGS_4(mve_vadd_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
30
DEF_HELPER_FLAGS_4(mve_vadd_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
31
DEF_HELPER_FLAGS_4(mve_vadd_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
32
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/mve.decode
35
+++ b/target/arm/mve.decode
36
@@ -XXX,XX +XXX,XX @@ VMULL_BU 111 1 1110 0 . .. ... 1 ... 0 1110 . 0 . 0 ... 0 @2op
37
VMULL_TS 111 0 1110 0 . .. ... 1 ... 1 1110 . 0 . 0 ... 0 @2op
38
VMULL_TU 111 1 1110 0 . .. ... 1 ... 1 1110 . 0 . 0 ... 0 @2op
39
40
+VQDMULH 1110 1111 0 . .. ... 0 ... 0 1011 . 1 . 0 ... 0 @2op
41
+VQRDMULH 1111 1111 0 . .. ... 0 ... 0 1011 . 1 . 0 ... 0 @2op
42
+
43
# Vector miscellaneous
44
45
VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
46
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/mve_helper.c
49
+++ b/target/arm/mve_helper.c
50
@@ -XXX,XX +XXX,XX @@ DO_1OP(vfnegs, 8, uint64_t, DO_FNEGS)
51
mve_advance_vpt(env); \
52
}
53
54
+#define DO_2OP_SAT(OP, ESIZE, TYPE, FN) \
55
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, void *vm) \
56
+ { \
57
+ TYPE *d = vd, *n = vn, *m = vm; \
58
+ uint16_t mask = mve_element_mask(env); \
59
+ unsigned e; \
60
+ bool qc = false; \
61
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
62
+ bool sat = false; \
63
+ TYPE r = FN(n[H##ESIZE(e)], m[H##ESIZE(e)], &sat); \
64
+ mergemask(&d[H##ESIZE(e)], r, mask); \
65
+ qc |= sat & mask & 1; \
66
+ } \
67
+ if (qc) { \
68
+ env->vfp.qc[0] = qc; \
69
+ } \
70
+ mve_advance_vpt(env); \
71
+ }
72
+
73
#define DO_AND(N, M) ((N) & (M))
74
#define DO_BIC(N, M) ((N) & ~(M))
75
#define DO_ORR(N, M) ((N) | (M))
76
@@ -XXX,XX +XXX,XX @@ static inline int32_t do_sat_bhw(int64_t val, int64_t min, int64_t max, bool *s)
77
#define DO_QRDMULH_W(n, m, s) do_sat_bhw(((int64_t)n * m + (1 << 30)) >> 31, \
78
INT32_MIN, INT32_MAX, s)
79
80
+DO_2OP_SAT(vqdmulhb, 1, int8_t, DO_QDMULH_B)
81
+DO_2OP_SAT(vqdmulhh, 2, int16_t, DO_QDMULH_H)
82
+DO_2OP_SAT(vqdmulhw, 4, int32_t, DO_QDMULH_W)
83
+
84
+DO_2OP_SAT(vqrdmulhb, 1, int8_t, DO_QRDMULH_B)
85
+DO_2OP_SAT(vqrdmulhh, 2, int16_t, DO_QRDMULH_H)
86
+DO_2OP_SAT(vqrdmulhw, 4, int32_t, DO_QRDMULH_W)
87
+
88
#define DO_2OP_SCALAR(OP, ESIZE, TYPE, FN) \
89
void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
90
uint32_t rm) \
91
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
92
index XXXXXXX..XXXXXXX 100644
93
--- a/target/arm/translate-mve.c
94
+++ b/target/arm/translate-mve.c
95
@@ -XXX,XX +XXX,XX @@ DO_2OP(VMULL_BS, vmullbs)
96
DO_2OP(VMULL_BU, vmullbu)
97
DO_2OP(VMULL_TS, vmullts)
98
DO_2OP(VMULL_TU, vmulltu)
99
+DO_2OP(VQDMULH, vqdmulh)
100
+DO_2OP(VQRDMULH, vqrdmulh)
101
102
static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
103
MVEGenTwoOpScalarFn fn)
104
--
25
--
105
2.20.1
26
2.34.1
106
107
diff view generated by jsdifflib
1
Implement the MVE VMLALDAV insn, which multiplies pairs of integer
1
Add the Cortex-R52 IMPDEF sysregs, by defining them here and
2
elements, accumulating them into a 64-bit result in a pair of
2
also by enabling the AUXCR feature which defines the ACTLR
3
general-purpose registers.
3
and HACTLR registers. As is our usual practice, we make these
4
simple reads-as-zero stubs for now.
4
5
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20210617121628.20116-20-peter.maydell@linaro.org
8
Message-id: 20240206132931.38376-4-peter.maydell@linaro.org
8
---
9
---
9
target/arm/helper-mve.h | 8 ++++
10
target/arm/tcg/cpu32.c | 108 +++++++++++++++++++++++++++++++++++++++++
10
target/arm/translate.h | 10 ++++
11
1 file changed, 108 insertions(+)
11
target/arm/mve.decode | 15 ++++++
12
target/arm/mve_helper.c | 34 ++++++++++++++
13
target/arm/translate-mve.c | 96 ++++++++++++++++++++++++++++++++++++++
14
5 files changed, 163 insertions(+)
15
12
16
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
13
diff --git a/target/arm/tcg/cpu32.c b/target/arm/tcg/cpu32.c
17
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/helper-mve.h
15
--- a/target/arm/tcg/cpu32.c
19
+++ b/target/arm/helper-mve.h
16
+++ b/target/arm/tcg/cpu32.c
20
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vmulltsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
17
@@ -XXX,XX +XXX,XX @@ static void cortex_r5_initfn(Object *obj)
21
DEF_HELPER_FLAGS_4(mve_vmulltub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
18
define_arm_cp_regs(cpu, cortexr5_cp_reginfo);
22
DEF_HELPER_FLAGS_4(mve_vmulltuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
23
DEF_HELPER_FLAGS_4(mve_vmulltuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
24
+
25
+DEF_HELPER_FLAGS_4(mve_vmlaldavsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
26
+DEF_HELPER_FLAGS_4(mve_vmlaldavsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
27
+DEF_HELPER_FLAGS_4(mve_vmlaldavxsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
28
+DEF_HELPER_FLAGS_4(mve_vmlaldavxsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
29
+
30
+DEF_HELPER_FLAGS_4(mve_vmlaldavuh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
31
+DEF_HELPER_FLAGS_4(mve_vmlaldavuw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
32
diff --git a/target/arm/translate.h b/target/arm/translate.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/translate.h
35
+++ b/target/arm/translate.h
36
@@ -XXX,XX +XXX,XX @@ static inline int negate(DisasContext *s, int x)
37
return -x;
38
}
19
}
39
20
40
+static inline int plus_1(DisasContext *s, int x)
21
+static const ARMCPRegInfo cortex_r52_cp_reginfo[] = {
41
+{
22
+ { .name = "CPUACTLR", .cp = 15, .opc1 = 0, .crm = 15,
42
+ return x + 1;
23
+ .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
43
+}
24
+ { .name = "IMP_ATCMREGIONR",
44
+
25
+ .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 0,
45
static inline int plus_2(DisasContext *s, int x)
26
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
46
{
27
+ { .name = "IMP_BTCMREGIONR",
47
return x + 2;
28
+ .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 1,
48
@@ -XXX,XX +XXX,XX @@ static inline int times_4(DisasContext *s, int x)
29
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
49
return x * 4;
30
+ { .name = "IMP_CTCMREGIONR",
50
}
31
+ .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 2,
51
32
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
52
+static inline int times_2_plus_1(DisasContext *s, int x)
33
+ { .name = "IMP_CSCTLR",
53
+{
34
+ .cp = 15, .opc1 = 1, .crn = 9, .crm = 1, .opc2 = 0,
54
+ return x * 2 + 1;
35
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
55
+}
36
+ { .name = "IMP_BPCTLR",
56
+
37
+ .cp = 15, .opc1 = 1, .crn = 9, .crm = 1, .opc2 = 1,
57
static inline int arm_dc_feature(DisasContext *dc, int feature)
38
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
58
{
39
+ { .name = "IMP_MEMPROTCLR",
59
return (dc->features & (1ULL << feature)) != 0;
40
+ .cp = 15, .opc1 = 1, .crn = 9, .crm = 1, .opc2 = 2,
60
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
41
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
61
index XXXXXXX..XXXXXXX 100644
42
+ { .name = "IMP_SLAVEPCTLR",
62
--- a/target/arm/mve.decode
43
+ .cp = 15, .opc1 = 0, .crn = 11, .crm = 0, .opc2 = 0,
63
+++ b/target/arm/mve.decode
44
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
64
@@ -XXX,XX +XXX,XX @@ VNEG_fp 1111 1111 1 . 11 .. 01 ... 0 0111 11 . 0 ... 0 @1op
45
+ { .name = "IMP_PERIPHREGIONR",
65
VDUP 1110 1110 1 1 10 ... 0 .... 1011 . 0 0 1 0000 @vdup size=0
46
+ .cp = 15, .opc1 = 0, .crn = 15, .crm = 0, .opc2 = 0,
66
VDUP 1110 1110 1 0 10 ... 0 .... 1011 . 0 1 1 0000 @vdup size=1
47
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
67
VDUP 1110 1110 1 0 10 ... 0 .... 1011 . 0 0 1 0000 @vdup size=2
48
+ { .name = "IMP_FLASHIFREGIONR",
68
+
49
+ .cp = 15, .opc1 = 0, .crn = 15, .crm = 0, .opc2 = 1,
69
+# multiply-add long dual accumulate
50
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
70
+# rdahi: bits [3:1] from insn, bit 0 is 1
51
+ { .name = "IMP_BUILDOPTR",
71
+# rdalo: bits [3:1] from insn, bit 0 is 0
52
+ .cp = 15, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 0,
72
+%rdahi 20:3 !function=times_2_plus_1
53
+ .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
73
+%rdalo 13:3 !function=times_2
54
+ { .name = "IMP_PINOPTR",
74
+# size bit is 0 for 16 bit, 1 for 32 bit
55
+ .cp = 15, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 7,
75
+%size_16 16:1 !function=plus_1
56
+ .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
76
+
57
+ { .name = "IMP_QOSR",
77
+&vmlaldav rdahi rdalo size qn qm x a
58
+ .cp = 15, .opc1 = 1, .crn = 15, .crm = 3, .opc2 = 1,
78
+
59
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
79
+@vmlaldav .... .... . ... ... . ... . .... .... qm:3 . \
60
+ { .name = "IMP_BUSTIMEOUTR",
80
+ qn=%qn rdahi=%rdahi rdalo=%rdalo size=%size_16 &vmlaldav
61
+ .cp = 15, .opc1 = 1, .crn = 15, .crm = 3, .opc2 = 2,
81
+VMLALDAV_S 1110 1110 1 ... ... . ... x:1 1110 . 0 a:1 0 ... 0 @vmlaldav
62
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
82
+VMLALDAV_U 1111 1110 1 ... ... . ... x:1 1110 . 0 a:1 0 ... 0 @vmlaldav
63
+ { .name = "IMP_INTMONR",
83
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
64
+ .cp = 15, .opc1 = 1, .crn = 15, .crm = 3, .opc2 = 4,
84
index XXXXXXX..XXXXXXX 100644
65
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
85
--- a/target/arm/mve_helper.c
66
+ { .name = "IMP_ICERR0",
86
+++ b/target/arm/mve_helper.c
67
+ .cp = 15, .opc1 = 2, .crn = 15, .crm = 0, .opc2 = 0,
87
@@ -XXX,XX +XXX,XX @@ DO_2OP_S(vhadds, do_vhadd_s)
68
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
88
DO_2OP_U(vhaddu, do_vhadd_u)
69
+ { .name = "IMP_ICERR1",
89
DO_2OP_S(vhsubs, do_vhsub_s)
70
+ .cp = 15, .opc1 = 2, .crn = 15, .crm = 0, .opc2 = 1,
90
DO_2OP_U(vhsubu, do_vhsub_u)
71
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
72
+ { .name = "IMP_DCERR0",
73
+ .cp = 15, .opc1 = 2, .crn = 15, .crm = 1, .opc2 = 0,
74
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
75
+ { .name = "IMP_DCERR1",
76
+ .cp = 15, .opc1 = 2, .crn = 15, .crm = 1, .opc2 = 1,
77
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
78
+ { .name = "IMP_TCMERR0",
79
+ .cp = 15, .opc1 = 2, .crn = 15, .crm = 2, .opc2 = 0,
80
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
81
+ { .name = "IMP_TCMERR1",
82
+ .cp = 15, .opc1 = 2, .crn = 15, .crm = 2, .opc2 = 1,
83
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
84
+ { .name = "IMP_TCMSYNDR0",
85
+ .cp = 15, .opc1 = 2, .crn = 15, .crm = 2, .opc2 = 2,
86
+ .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
87
+ { .name = "IMP_TCMSYNDR1",
88
+ .cp = 15, .opc1 = 2, .crn = 15, .crm = 2, .opc2 = 3,
89
+ .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
90
+ { .name = "IMP_FLASHERR0",
91
+ .cp = 15, .opc1 = 2, .crn = 15, .crm = 3, .opc2 = 0,
92
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
93
+ { .name = "IMP_FLASHERR1",
94
+ .cp = 15, .opc1 = 2, .crn = 15, .crm = 3, .opc2 = 1,
95
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
96
+ { .name = "IMP_CDBGDR0",
97
+ .cp = 15, .opc1 = 3, .crn = 15, .crm = 0, .opc2 = 0,
98
+ .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
99
+ { .name = "IMP_CBDGBR1",
100
+ .cp = 15, .opc1 = 3, .crn = 15, .crm = 0, .opc2 = 1,
101
+ .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
102
+ { .name = "IMP_TESTR0",
103
+ .cp = 15, .opc1 = 4, .crn = 15, .crm = 0, .opc2 = 0,
104
+ .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
105
+ { .name = "IMP_TESTR1",
106
+ .cp = 15, .opc1 = 4, .crn = 15, .crm = 0, .opc2 = 1,
107
+ .access = PL1_W, .type = ARM_CP_NOP, .resetvalue = 0 },
108
+ { .name = "IMP_CDBGDCI",
109
+ .cp = 15, .opc1 = 0, .crn = 15, .crm = 15, .opc2 = 0,
110
+ .access = PL1_W, .type = ARM_CP_NOP, .resetvalue = 0 },
111
+ { .name = "IMP_CDBGDCT",
112
+ .cp = 15, .opc1 = 3, .crn = 15, .crm = 2, .opc2 = 0,
113
+ .access = PL1_W, .type = ARM_CP_NOP, .resetvalue = 0 },
114
+ { .name = "IMP_CDBGICT",
115
+ .cp = 15, .opc1 = 3, .crn = 15, .crm = 2, .opc2 = 1,
116
+ .access = PL1_W, .type = ARM_CP_NOP, .resetvalue = 0 },
117
+ { .name = "IMP_CDBGDCD",
118
+ .cp = 15, .opc1 = 3, .crn = 15, .crm = 4, .opc2 = 0,
119
+ .access = PL1_W, .type = ARM_CP_NOP, .resetvalue = 0 },
120
+ { .name = "IMP_CDBGICD",
121
+ .cp = 15, .opc1 = 3, .crn = 15, .crm = 4, .opc2 = 1,
122
+ .access = PL1_W, .type = ARM_CP_NOP, .resetvalue = 0 },
123
+};
91
+
124
+
92
+
125
+
93
+/*
126
static void cortex_r52_initfn(Object *obj)
94
+ * Multiply add long dual accumulate ops.
127
{
95
+ */
128
ARMCPU *cpu = ARM_CPU(obj);
96
+#define DO_LDAV(OP, ESIZE, TYPE, XCHG, EVENACC, ODDACC) \
129
@@ -XXX,XX +XXX,XX @@ static void cortex_r52_initfn(Object *obj)
97
+ uint64_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vn, \
130
set_feature(&cpu->env, ARM_FEATURE_NEON);
98
+ void *vm, uint64_t a) \
131
set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
99
+ { \
132
set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
100
+ uint16_t mask = mve_element_mask(env); \
133
+ set_feature(&cpu->env, ARM_FEATURE_AUXCR);
101
+ unsigned e; \
134
cpu->midr = 0x411fd133; /* r1p3 */
102
+ TYPE *n = vn, *m = vm; \
135
cpu->revidr = 0x00000000;
103
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
136
cpu->reset_fpsid = 0x41034023;
104
+ if (mask & 1) { \
137
@@ -XXX,XX +XXX,XX @@ static void cortex_r52_initfn(Object *obj)
105
+ if (e & 1) { \
138
106
+ a ODDACC \
139
cpu->pmsav7_dregion = 16;
107
+ (int64_t)n[H##ESIZE(e - 1 * XCHG)] * m[H##ESIZE(e)]; \
140
cpu->pmsav8r_hdregion = 16;
108
+ } else { \
109
+ a EVENACC \
110
+ (int64_t)n[H##ESIZE(e + 1 * XCHG)] * m[H##ESIZE(e)]; \
111
+ } \
112
+ } \
113
+ } \
114
+ mve_advance_vpt(env); \
115
+ return a; \
116
+ }
117
+
141
+
118
+DO_LDAV(vmlaldavsh, 2, int16_t, false, +=, +=)
142
+ define_arm_cp_regs(cpu, cortex_r52_cp_reginfo);
119
+DO_LDAV(vmlaldavxsh, 2, int16_t, true, +=, +=)
120
+DO_LDAV(vmlaldavsw, 4, int32_t, false, +=, +=)
121
+DO_LDAV(vmlaldavxsw, 4, int32_t, true, +=, +=)
122
+
123
+DO_LDAV(vmlaldavuh, 2, uint16_t, false, +=, +=)
124
+DO_LDAV(vmlaldavuw, 4, uint32_t, false, +=, +=)
125
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
126
index XXXXXXX..XXXXXXX 100644
127
--- a/target/arm/translate-mve.c
128
+++ b/target/arm/translate-mve.c
129
@@ -XXX,XX +XXX,XX @@
130
typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
131
typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
132
typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
133
+typedef void MVEGenDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
134
135
/* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
136
static inline long mve_qreg_offset(unsigned reg)
137
@@ -XXX,XX +XXX,XX @@ static void mve_update_eci(DisasContext *s)
138
}
139
}
143
}
140
144
141
+static bool mve_skip_first_beat(DisasContext *s)
145
static void cortex_r5f_initfn(Object *obj)
142
+{
143
+ /* Return true if PSR.ECI says we must skip the first beat of this insn */
144
+ switch (s->eci) {
145
+ case ECI_NONE:
146
+ return false;
147
+ case ECI_A0:
148
+ case ECI_A0A1:
149
+ case ECI_A0A1A2:
150
+ case ECI_A0A1A2B0:
151
+ return true;
152
+ default:
153
+ g_assert_not_reached();
154
+ }
155
+}
156
+
157
static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn)
158
{
159
TCGv_i32 addr;
160
@@ -XXX,XX +XXX,XX @@ DO_2OP(VMULL_BS, vmullbs)
161
DO_2OP(VMULL_BU, vmullbu)
162
DO_2OP(VMULL_TS, vmullts)
163
DO_2OP(VMULL_TU, vmulltu)
164
+
165
+static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
166
+ MVEGenDualAccOpFn *fn)
167
+{
168
+ TCGv_ptr qn, qm;
169
+ TCGv_i64 rda;
170
+ TCGv_i32 rdalo, rdahi;
171
+
172
+ if (!dc_isar_feature(aa32_mve, s) ||
173
+ !mve_check_qreg_bank(s, a->qn | a->qm) ||
174
+ !fn) {
175
+ return false;
176
+ }
177
+ /*
178
+ * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
179
+ * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
180
+ */
181
+ if (a->rdahi == 13 || a->rdahi == 15) {
182
+ return false;
183
+ }
184
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
185
+ return true;
186
+ }
187
+
188
+ qn = mve_qreg_ptr(a->qn);
189
+ qm = mve_qreg_ptr(a->qm);
190
+
191
+ /*
192
+ * This insn is subject to beat-wise execution. Partial execution
193
+ * of an A=0 (no-accumulate) insn which does not execute the first
194
+ * beat must start with the current rda value, not 0.
195
+ */
196
+ if (a->a || mve_skip_first_beat(s)) {
197
+ rda = tcg_temp_new_i64();
198
+ rdalo = load_reg(s, a->rdalo);
199
+ rdahi = load_reg(s, a->rdahi);
200
+ tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
201
+ tcg_temp_free_i32(rdalo);
202
+ tcg_temp_free_i32(rdahi);
203
+ } else {
204
+ rda = tcg_const_i64(0);
205
+ }
206
+
207
+ fn(rda, cpu_env, qn, qm, rda);
208
+ tcg_temp_free_ptr(qn);
209
+ tcg_temp_free_ptr(qm);
210
+
211
+ rdalo = tcg_temp_new_i32();
212
+ rdahi = tcg_temp_new_i32();
213
+ tcg_gen_extrl_i64_i32(rdalo, rda);
214
+ tcg_gen_extrh_i64_i32(rdahi, rda);
215
+ store_reg(s, a->rdalo, rdalo);
216
+ store_reg(s, a->rdahi, rdahi);
217
+ tcg_temp_free_i64(rda);
218
+ mve_update_eci(s);
219
+ return true;
220
+}
221
+
222
+static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a)
223
+{
224
+ static MVEGenDualAccOpFn * const fns[4][2] = {
225
+ { NULL, NULL },
226
+ { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh },
227
+ { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw },
228
+ { NULL, NULL },
229
+ };
230
+ return do_long_dual_acc(s, a, fns[a->size][a->x]);
231
+}
232
+
233
+static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a)
234
+{
235
+ static MVEGenDualAccOpFn * const fns[4][2] = {
236
+ { NULL, NULL },
237
+ { gen_helper_mve_vmlaldavuh, NULL },
238
+ { gen_helper_mve_vmlaldavuw, NULL },
239
+ { NULL, NULL },
240
+ };
241
+ return do_long_dual_acc(s, a, fns[a->size][a->x]);
242
+}
243
--
146
--
244
2.20.1
147
2.34.1
245
246
diff view generated by jsdifflib
1
Implement the MVE VQDMULL scalar insn. This multiplies the top or
1
Architecturally, the AArch32 MSR/MRS to/from banked register
2
bottom half of each element by the scalar, doubles and saturates
2
instructions are UNPREDICTABLE for attempts to access a banked
3
to a double-width result.
3
register that the guest could access in a more direct way (e.g.
4
using this insn to access r8_fiq when already in FIQ mode). QEMU has
5
chosen to UNDEF on all of these.
4
6
5
Note that this encoding overlaps with VQADD and VQSUB; it uses
7
However, for the case of accessing SPSR_hyp from hyp mode, it turns
6
what in VQADD and VQSUB would be the 'size=0b11' encoding.
8
out that real hardware permits this, with the same effect as if the
9
guest had directly written to SPSR. Further, there is some
10
guest code out there that assumes it can do this, because it
11
happens to work on hardware: an example Cortex-R52 startup code
12
fragment uses this, and it got copied into various other places,
13
including Zephyr. Zephyr was fixed to not use this:
14
https://github.com/zephyrproject-rtos/zephyr/issues/47330
15
but other examples are still out there, like the selftest
16
binary for the MPS3-AN536.
17
18
For convenience of being able to run guest code, permit
19
this UNPREDICTABLE access instead of UNDEFing it.
7
20
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
21
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
22
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-id: 20210617121628.20116-30-peter.maydell@linaro.org
23
Message-id: 20240206132931.38376-5-peter.maydell@linaro.org
11
---
24
---
12
target/arm/helper-mve.h | 5 +++
25
target/arm/tcg/op_helper.c | 43 ++++++++++++++++++++++++++------------
13
target/arm/mve.decode | 23 +++++++++++---
26
target/arm/tcg/translate.c | 19 +++++++++++------
14
target/arm/mve_helper.c | 65 ++++++++++++++++++++++++++++++++++++++
27
2 files changed, 43 insertions(+), 19 deletions(-)
15
target/arm/translate-mve.c | 30 ++++++++++++++++++
16
4 files changed, 119 insertions(+), 4 deletions(-)
17
28
18
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
29
diff --git a/target/arm/tcg/op_helper.c b/target/arm/tcg/op_helper.c
19
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/helper-mve.h
31
--- a/target/arm/tcg/op_helper.c
21
+++ b/target/arm/helper-mve.h
32
+++ b/target/arm/tcg/op_helper.c
22
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vbrsrb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
33
@@ -XXX,XX +XXX,XX @@ static void msr_mrs_banked_exc_checks(CPUARMState *env, uint32_t tgtmode,
23
DEF_HELPER_FLAGS_4(mve_vbrsrh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
34
*/
24
DEF_HELPER_FLAGS_4(mve_vbrsrw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
35
int curmode = env->uncached_cpsr & CPSR_M;
25
36
26
+DEF_HELPER_FLAGS_4(mve_vqdmullb_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
37
- if (regno == 17) {
27
+DEF_HELPER_FLAGS_4(mve_vqdmullb_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
38
- /* ELR_Hyp: a special case because access from tgtmode is OK */
28
+DEF_HELPER_FLAGS_4(mve_vqdmullt_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
39
- if (curmode != ARM_CPU_MODE_HYP && curmode != ARM_CPU_MODE_MON) {
29
+DEF_HELPER_FLAGS_4(mve_vqdmullt_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
40
- goto undef;
30
+
41
+ if (tgtmode == ARM_CPU_MODE_HYP) {
31
DEF_HELPER_FLAGS_4(mve_vmlaldavsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
42
+ /*
32
DEF_HELPER_FLAGS_4(mve_vmlaldavsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
43
+ * Handle Hyp target regs first because some are special cases
33
DEF_HELPER_FLAGS_4(mve_vmlaldavxsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
44
+ * which don't want the usual "not accessible from tgtmode" check.
34
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
45
+ */
46
+ switch (regno) {
47
+ case 16 ... 17: /* ELR_Hyp, SPSR_Hyp */
48
+ if (curmode != ARM_CPU_MODE_HYP && curmode != ARM_CPU_MODE_MON) {
49
+ goto undef;
50
+ }
51
+ break;
52
+ case 13:
53
+ if (curmode != ARM_CPU_MODE_MON) {
54
+ goto undef;
55
+ }
56
+ break;
57
+ default:
58
+ g_assert_not_reached();
59
}
60
return;
61
}
62
@@ -XXX,XX +XXX,XX @@ static void msr_mrs_banked_exc_checks(CPUARMState *env, uint32_t tgtmode,
63
}
64
}
65
66
- if (tgtmode == ARM_CPU_MODE_HYP) {
67
- /* SPSR_Hyp, r13_hyp: accessible from Monitor mode only */
68
- if (curmode != ARM_CPU_MODE_MON) {
69
- goto undef;
70
- }
71
- }
72
-
73
return;
74
75
undef:
76
@@ -XXX,XX +XXX,XX @@ void HELPER(msr_banked)(CPUARMState *env, uint32_t value, uint32_t tgtmode,
77
78
switch (regno) {
79
case 16: /* SPSRs */
80
- env->banked_spsr[bank_number(tgtmode)] = value;
81
+ if (tgtmode == (env->uncached_cpsr & CPSR_M)) {
82
+ /* Only happens for SPSR_Hyp access in Hyp mode */
83
+ env->spsr = value;
84
+ } else {
85
+ env->banked_spsr[bank_number(tgtmode)] = value;
86
+ }
87
break;
88
case 17: /* ELR_Hyp */
89
env->elr_el[2] = value;
90
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(mrs_banked)(CPUARMState *env, uint32_t tgtmode, uint32_t regno)
91
92
switch (regno) {
93
case 16: /* SPSRs */
94
- return env->banked_spsr[bank_number(tgtmode)];
95
+ if (tgtmode == (env->uncached_cpsr & CPSR_M)) {
96
+ /* Only happens for SPSR_Hyp access in Hyp mode */
97
+ return env->spsr;
98
+ } else {
99
+ return env->banked_spsr[bank_number(tgtmode)];
100
+ }
101
case 17: /* ELR_Hyp */
102
return env->elr_el[2];
103
case 13:
104
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
35
index XXXXXXX..XXXXXXX 100644
105
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/mve.decode
106
--- a/target/arm/tcg/translate.c
37
+++ b/target/arm/mve.decode
107
+++ b/target/arm/tcg/translate.c
38
@@ -XXX,XX +XXX,XX @@
108
@@ -XXX,XX +XXX,XX @@ static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
39
%qm 5:1 1:3
109
break;
40
%qn 7:1 17:3
110
case ARM_CPU_MODE_HYP:
41
111
/*
42
+# VQDMULL has size in bit 28: 0 for 16 bit, 1 for 32 bit
112
- * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
43
+%size_28 28:1 !function=plus_1
113
- * (and so we can forbid accesses from EL2 or below). elr_hyp
44
+
114
- * can be accessed also from Hyp mode, so forbid accesses from
45
&vldr_vstr rn qd imm p a w size l u
115
- * EL0 or EL1.
46
&1op qd qm size
116
+ * r13_hyp can only be accessed from Monitor mode, and so we
47
&2op qd qm qn size
117
+ * can forbid accesses from EL2 or below.
48
@@ -XXX,XX +XXX,XX @@
118
+ * elr_hyp can be accessed also from Hyp mode, so forbid
49
@2op_nosz .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn size=0
119
+ * accesses from EL0 or EL1.
50
120
+ * SPSR_hyp is supposed to be in the same category as r13_hyp
51
@2scalar .... .... .. size:2 .... .... .... .... rm:4 &2scalar qd=%qd qn=%qn
121
+ * and UNPREDICTABLE if accessed from anything except Monitor
52
+@2scalar_nosz .... .... .... .... .... .... .... rm:4 &2scalar qd=%qd qn=%qn
122
+ * mode. However there is some real-world code that will do
53
123
+ * it because at least some hardware happens to permit the
54
# Vector loads and stores
124
+ * access. (Notably a standard Cortex-R52 startup code fragment
55
125
+ * does this.) So we permit SPSR_hyp from Hyp mode also, to allow
56
@@ -XXX,XX +XXX,XX @@ VHADD_U_scalar 1111 1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar
126
+ * this (incorrect) guest code to run.
57
VHSUB_S_scalar 1110 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
127
*/
58
VHSUB_U_scalar 1111 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
128
- if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
59
129
- (s->current_el < 3 && *regno != 17)) {
60
-VQADD_S_scalar 1110 1110 0 . .. ... 0 ... 0 1111 . 110 .... @2scalar
130
+ if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2
61
-VQADD_U_scalar 1111 1110 0 . .. ... 0 ... 0 1111 . 110 .... @2scalar
131
+ || (s->current_el < 3 && *regno != 16 && *regno != 17)) {
62
-VQSUB_S_scalar 1110 1110 0 . .. ... 0 ... 1 1111 . 110 .... @2scalar
132
goto undef;
63
-VQSUB_U_scalar 1111 1110 0 . .. ... 0 ... 1 1111 . 110 .... @2scalar
133
}
64
+{
134
break;
65
+ VQADD_S_scalar 1110 1110 0 . .. ... 0 ... 0 1111 . 110 .... @2scalar
66
+ VQADD_U_scalar 1111 1110 0 . .. ... 0 ... 0 1111 . 110 .... @2scalar
67
+ VQDMULLB_scalar 111 . 1110 0 . 11 ... 0 ... 0 1111 . 110 .... @2scalar_nosz \
68
+ size=%size_28
69
+}
70
+
71
+{
72
+ VQSUB_S_scalar 1110 1110 0 . .. ... 0 ... 1 1111 . 110 .... @2scalar
73
+ VQSUB_U_scalar 1111 1110 0 . .. ... 0 ... 1 1111 . 110 .... @2scalar
74
+ VQDMULLT_scalar 111 . 1110 0 . 11 ... 0 ... 1 1111 . 110 .... @2scalar_nosz \
75
+ size=%size_28
76
+}
77
+
78
VBRSR 1111 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar
79
80
VQDMULH_scalar 1110 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar
81
VQRDMULH_scalar 1111 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar
82
83
+
84
# Predicate operations
85
%mask_22_13 22:1 13:3
86
VPST 1111 1110 0 . 11 000 1 ... 0 1111 0100 1101 mask=%mask_22_13
87
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
88
index XXXXXXX..XXXXXXX 100644
89
--- a/target/arm/mve_helper.c
90
+++ b/target/arm/mve_helper.c
91
@@ -XXX,XX +XXX,XX @@ DO_2OP_SAT_SCALAR(vqrdmulh_scalarb, 1, int8_t, DO_QRDMULH_B)
92
DO_2OP_SAT_SCALAR(vqrdmulh_scalarh, 2, int16_t, DO_QRDMULH_H)
93
DO_2OP_SAT_SCALAR(vqrdmulh_scalarw, 4, int32_t, DO_QRDMULH_W)
94
95
+/*
96
+ * Long saturating scalar ops. As with DO_2OP_L, TYPE and H are for the
97
+ * input (smaller) type and LESIZE, LTYPE, LH for the output (long) type.
98
+ * SATMASK specifies which bits of the predicate mask matter for determining
99
+ * whether to propagate a saturation indication into FPSCR.QC -- for
100
+ * the 16x16->32 case we must check only the bit corresponding to the T or B
101
+ * half that we used, but for the 32x32->64 case we propagate if the mask
102
+ * bit is set for either half.
103
+ */
104
+#define DO_2OP_SAT_SCALAR_L(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN, SATMASK) \
105
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
106
+ uint32_t rm) \
107
+ { \
108
+ LTYPE *d = vd; \
109
+ TYPE *n = vn; \
110
+ TYPE m = rm; \
111
+ uint16_t mask = mve_element_mask(env); \
112
+ unsigned le; \
113
+ bool qc = false; \
114
+ for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
115
+ bool sat = false; \
116
+ LTYPE r = FN((LTYPE)n[H##ESIZE(le * 2 + TOP)], m, &sat); \
117
+ mergemask(&d[H##LESIZE(le)], r, mask); \
118
+ qc |= sat && (mask & SATMASK); \
119
+ } \
120
+ if (qc) { \
121
+ env->vfp.qc[0] = qc; \
122
+ } \
123
+ mve_advance_vpt(env); \
124
+ }
125
+
126
+static inline int32_t do_qdmullh(int16_t n, int16_t m, bool *sat)
127
+{
128
+ int64_t r = ((int64_t)n * m) * 2;
129
+ return do_sat_bhw(r, INT32_MIN, INT32_MAX, sat);
130
+}
131
+
132
+static inline int64_t do_qdmullw(int32_t n, int32_t m, bool *sat)
133
+{
134
+ /* The multiply can't overflow, but the doubling might */
135
+ int64_t r = (int64_t)n * m;
136
+ if (r > INT64_MAX / 2) {
137
+ *sat = true;
138
+ return INT64_MAX;
139
+ } else if (r < INT64_MIN / 2) {
140
+ *sat = true;
141
+ return INT64_MIN;
142
+ } else {
143
+ return r * 2;
144
+ }
145
+}
146
+
147
+#define SATMASK16B 1
148
+#define SATMASK16T (1 << 2)
149
+#define SATMASK32 ((1 << 4) | 1)
150
+
151
+DO_2OP_SAT_SCALAR_L(vqdmullb_scalarh, 0, 2, int16_t, 4, int32_t, \
152
+ do_qdmullh, SATMASK16B)
153
+DO_2OP_SAT_SCALAR_L(vqdmullb_scalarw, 0, 4, int32_t, 8, int64_t, \
154
+ do_qdmullw, SATMASK32)
155
+DO_2OP_SAT_SCALAR_L(vqdmullt_scalarh, 1, 2, int16_t, 4, int32_t, \
156
+ do_qdmullh, SATMASK16T)
157
+DO_2OP_SAT_SCALAR_L(vqdmullt_scalarw, 1, 4, int32_t, 8, int64_t, \
158
+ do_qdmullw, SATMASK32)
159
+
160
static inline uint32_t do_vbrsrb(uint32_t n, uint32_t m)
161
{
162
m &= 0xff;
163
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
164
index XXXXXXX..XXXXXXX 100644
165
--- a/target/arm/translate-mve.c
166
+++ b/target/arm/translate-mve.c
167
@@ -XXX,XX +XXX,XX @@ DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar)
168
DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar)
169
DO_2OP_SCALAR(VBRSR, vbrsr)
170
171
+static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a)
172
+{
173
+ static MVEGenTwoOpScalarFn * const fns[] = {
174
+ NULL,
175
+ gen_helper_mve_vqdmullb_scalarh,
176
+ gen_helper_mve_vqdmullb_scalarw,
177
+ NULL,
178
+ };
179
+ if (a->qd == a->qn && a->size == MO_32) {
180
+ /* UNPREDICTABLE; we choose to undef */
181
+ return false;
182
+ }
183
+ return do_2op_scalar(s, a, fns[a->size]);
184
+}
185
+
186
+static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a)
187
+{
188
+ static MVEGenTwoOpScalarFn * const fns[] = {
189
+ NULL,
190
+ gen_helper_mve_vqdmullt_scalarh,
191
+ gen_helper_mve_vqdmullt_scalarw,
192
+ NULL,
193
+ };
194
+ if (a->qd == a->qn && a->size == MO_32) {
195
+ /* UNPREDICTABLE; we choose to undef */
196
+ return false;
197
+ }
198
+ return do_2op_scalar(s, a, fns[a->size]);
199
+}
200
+
201
static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
202
MVEGenDualAccOpFn *fn)
203
{
204
--
135
--
205
2.20.1
136
2.34.1
206
207
diff view generated by jsdifflib
1
Implement the MVE VQDMULH and VQRDMULH scalar insns, which multiply
1
We currently guard the CFG3 register read with
2
elements by the scalar, double, possibly round, take the high half
2
(scc_partno(s) == 0x524 && scc_partno(s) == 0x547)
3
and saturate.
3
which is clearly wrong as it is never true.
4
4
5
This register is present on all board types except AN524
6
and AN527; correct the condition.
7
8
Fixes: 6ac80818941829c0 ("hw/misc/mps2-scc: Implement changes for AN547")
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20210617121628.20116-29-peter.maydell@linaro.org
12
Message-id: 20240206132931.38376-6-peter.maydell@linaro.org
8
---
13
---
9
target/arm/helper-mve.h | 8 ++++++++
14
hw/misc/mps2-scc.c | 2 +-
10
target/arm/mve.decode | 3 +++
15
1 file changed, 1 insertion(+), 1 deletion(-)
11
target/arm/mve_helper.c | 25 +++++++++++++++++++++++++
12
target/arm/translate-mve.c | 2 ++
13
4 files changed, 38 insertions(+)
14
16
15
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
17
diff --git a/hw/misc/mps2-scc.c b/hw/misc/mps2-scc.c
16
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/helper-mve.h
19
--- a/hw/misc/mps2-scc.c
18
+++ b/target/arm/helper-mve.h
20
+++ b/hw/misc/mps2-scc.c
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vqsubu_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
21
@@ -XXX,XX +XXX,XX @@ static uint64_t mps2_scc_read(void *opaque, hwaddr offset, unsigned size)
20
DEF_HELPER_FLAGS_4(mve_vqsubu_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
22
r = s->cfg2;
21
DEF_HELPER_FLAGS_4(mve_vqsubu_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
23
break;
22
24
case A_CFG3:
23
+DEF_HELPER_FLAGS_4(mve_vqdmulh_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
25
- if (scc_partno(s) == 0x524 && scc_partno(s) == 0x547) {
24
+DEF_HELPER_FLAGS_4(mve_vqdmulh_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
26
+ if (scc_partno(s) == 0x524 || scc_partno(s) == 0x547) {
25
+DEF_HELPER_FLAGS_4(mve_vqdmulh_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
27
/* CFG3 reserved on AN524 */
26
+
28
goto bad_offset;
27
+DEF_HELPER_FLAGS_4(mve_vqrdmulh_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
29
}
28
+DEF_HELPER_FLAGS_4(mve_vqrdmulh_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
29
+DEF_HELPER_FLAGS_4(mve_vqrdmulh_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
30
+
31
DEF_HELPER_FLAGS_4(mve_vbrsrb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
32
DEF_HELPER_FLAGS_4(mve_vbrsrh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
33
DEF_HELPER_FLAGS_4(mve_vbrsrw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
34
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/mve.decode
37
+++ b/target/arm/mve.decode
38
@@ -XXX,XX +XXX,XX @@ VQSUB_S_scalar 1110 1110 0 . .. ... 0 ... 1 1111 . 110 .... @2scalar
39
VQSUB_U_scalar 1111 1110 0 . .. ... 0 ... 1 1111 . 110 .... @2scalar
40
VBRSR 1111 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar
41
42
+VQDMULH_scalar 1110 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar
43
+VQRDMULH_scalar 1111 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar
44
+
45
# Predicate operations
46
%mask_22_13 22:1 13:3
47
VPST 1111 1110 0 . 11 000 1 ... 0 1111 0100 1101 mask=%mask_22_13
48
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/arm/mve_helper.c
51
+++ b/target/arm/mve_helper.c
52
@@ -XXX,XX +XXX,XX @@ static inline int32_t do_sat_bhw(int64_t val, int64_t min, int64_t max, bool *s)
53
#define DO_UQSUB_H(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT16_MAX, s)
54
#define DO_UQSUB_W(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT32_MAX, s)
55
56
+/*
57
+ * For QDMULH and QRDMULH we simplify "double and shift by esize" into
58
+ * "shift by esize-1", adjusting the QRDMULH rounding constant to match.
59
+ */
60
+#define DO_QDMULH_B(n, m, s) do_sat_bhw(((int64_t)n * m) >> 7, \
61
+ INT8_MIN, INT8_MAX, s)
62
+#define DO_QDMULH_H(n, m, s) do_sat_bhw(((int64_t)n * m) >> 15, \
63
+ INT16_MIN, INT16_MAX, s)
64
+#define DO_QDMULH_W(n, m, s) do_sat_bhw(((int64_t)n * m) >> 31, \
65
+ INT32_MIN, INT32_MAX, s)
66
+
67
+#define DO_QRDMULH_B(n, m, s) do_sat_bhw(((int64_t)n * m + (1 << 6)) >> 7, \
68
+ INT8_MIN, INT8_MAX, s)
69
+#define DO_QRDMULH_H(n, m, s) do_sat_bhw(((int64_t)n * m + (1 << 14)) >> 15, \
70
+ INT16_MIN, INT16_MAX, s)
71
+#define DO_QRDMULH_W(n, m, s) do_sat_bhw(((int64_t)n * m + (1 << 30)) >> 31, \
72
+ INT32_MIN, INT32_MAX, s)
73
+
74
#define DO_2OP_SCALAR(OP, ESIZE, TYPE, FN) \
75
void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
76
uint32_t rm) \
77
@@ -XXX,XX +XXX,XX @@ DO_2OP_SAT_SCALAR(vqsubs_scalarb, 1, int8_t, DO_SQSUB_B)
78
DO_2OP_SAT_SCALAR(vqsubs_scalarh, 2, int16_t, DO_SQSUB_H)
79
DO_2OP_SAT_SCALAR(vqsubs_scalarw, 4, int32_t, DO_SQSUB_W)
80
81
+DO_2OP_SAT_SCALAR(vqdmulh_scalarb, 1, int8_t, DO_QDMULH_B)
82
+DO_2OP_SAT_SCALAR(vqdmulh_scalarh, 2, int16_t, DO_QDMULH_H)
83
+DO_2OP_SAT_SCALAR(vqdmulh_scalarw, 4, int32_t, DO_QDMULH_W)
84
+DO_2OP_SAT_SCALAR(vqrdmulh_scalarb, 1, int8_t, DO_QRDMULH_B)
85
+DO_2OP_SAT_SCALAR(vqrdmulh_scalarh, 2, int16_t, DO_QRDMULH_H)
86
+DO_2OP_SAT_SCALAR(vqrdmulh_scalarw, 4, int32_t, DO_QRDMULH_W)
87
+
88
static inline uint32_t do_vbrsrb(uint32_t n, uint32_t m)
89
{
90
m &= 0xff;
91
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
92
index XXXXXXX..XXXXXXX 100644
93
--- a/target/arm/translate-mve.c
94
+++ b/target/arm/translate-mve.c
95
@@ -XXX,XX +XXX,XX @@ DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
96
DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
97
DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
98
DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
99
+DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar)
100
+DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar)
101
DO_2OP_SCALAR(VBRSR, vbrsr)
102
103
static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
104
--
30
--
105
2.20.1
31
2.34.1
106
32
107
33
diff view generated by jsdifflib
1
Implement MVE VHADD and VHSUB insns, which perform an addition
1
The MPS SCC device has a lot of different flavours for the various
2
or subtraction and then halve the result.
2
different MPS FPGA images, which look mostly similar but have
3
differences in how particular registers are handled. Currently we
4
deal with this with a lot of open-coded checks on scc_partno(), but
5
as we add more board types this is getting a bit hard to read.
6
7
Factor out the conditions into some functions which we can
8
give more descriptive names to.
3
9
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20210617121628.20116-18-peter.maydell@linaro.org
13
Message-id: 20240206132931.38376-7-peter.maydell@linaro.org
7
---
14
---
8
target/arm/helper-mve.h | 14 ++++++++++++++
15
hw/misc/mps2-scc.c | 45 +++++++++++++++++++++++++++++++--------------
9
target/arm/mve.decode | 5 +++++
16
1 file changed, 31 insertions(+), 14 deletions(-)
10
target/arm/mve_helper.c | 25 +++++++++++++++++++++++++
11
target/arm/translate-mve.c | 4 ++++
12
4 files changed, 48 insertions(+)
13
17
14
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
18
diff --git a/hw/misc/mps2-scc.c b/hw/misc/mps2-scc.c
15
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-mve.h
20
--- a/hw/misc/mps2-scc.c
17
+++ b/target/arm/helper-mve.h
21
+++ b/hw/misc/mps2-scc.c
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vabdsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
22
@@ -XXX,XX +XXX,XX @@ static int scc_partno(MPS2SCC *s)
19
DEF_HELPER_FLAGS_4(mve_vabdub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
23
return extract32(s->id, 4, 8);
20
DEF_HELPER_FLAGS_4(mve_vabduh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
24
}
21
DEF_HELPER_FLAGS_4(mve_vabduw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
25
22
+
26
+/* Is CFG_REG2 present? */
23
+DEF_HELPER_FLAGS_4(mve_vhaddsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
27
+static bool have_cfg2(MPS2SCC *s)
24
+DEF_HELPER_FLAGS_4(mve_vhaddsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
25
+DEF_HELPER_FLAGS_4(mve_vhaddsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
26
+DEF_HELPER_FLAGS_4(mve_vhaddub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
27
+DEF_HELPER_FLAGS_4(mve_vhadduh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
28
+DEF_HELPER_FLAGS_4(mve_vhadduw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
29
+
30
+DEF_HELPER_FLAGS_4(mve_vhsubsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
31
+DEF_HELPER_FLAGS_4(mve_vhsubsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
32
+DEF_HELPER_FLAGS_4(mve_vhsubsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
33
+DEF_HELPER_FLAGS_4(mve_vhsubub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
34
+DEF_HELPER_FLAGS_4(mve_vhsubuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
35
+DEF_HELPER_FLAGS_4(mve_vhsubuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
36
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/mve.decode
39
+++ b/target/arm/mve.decode
40
@@ -XXX,XX +XXX,XX @@ VMIN_U 111 1 1111 0 . .. ... 0 ... 0 0110 . 1 . 1 ... 0 @2op
41
VABD_S 111 0 1111 0 . .. ... 0 ... 0 0111 . 1 . 0 ... 0 @2op
42
VABD_U 111 1 1111 0 . .. ... 0 ... 0 0111 . 1 . 0 ... 0 @2op
43
44
+VHADD_S 111 0 1111 0 . .. ... 0 ... 0 0000 . 1 . 0 ... 0 @2op
45
+VHADD_U 111 1 1111 0 . .. ... 0 ... 0 0000 . 1 . 0 ... 0 @2op
46
+VHSUB_S 111 0 1111 0 . .. ... 0 ... 0 0010 . 1 . 0 ... 0 @2op
47
+VHSUB_U 111 1 1111 0 . .. ... 0 ... 0 0010 . 1 . 0 ... 0 @2op
48
+
49
# Vector miscellaneous
50
51
VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
52
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/target/arm/mve_helper.c
55
+++ b/target/arm/mve_helper.c
56
@@ -XXX,XX +XXX,XX @@ DO_2OP_U(vminu, DO_MIN)
57
58
DO_2OP_S(vabds, DO_ABD)
59
DO_2OP_U(vabdu, DO_ABD)
60
+
61
+static inline uint32_t do_vhadd_u(uint32_t n, uint32_t m)
62
+{
28
+{
63
+ return ((uint64_t)n + m) >> 1;
29
+ return scc_partno(s) == 0x524 || scc_partno(s) == 0x547;
64
+}
30
+}
65
+
31
+
66
+static inline int32_t do_vhadd_s(int32_t n, int32_t m)
32
+/* Is CFG_REG3 present? */
33
+static bool have_cfg3(MPS2SCC *s)
67
+{
34
+{
68
+ return ((int64_t)n + m) >> 1;
35
+ return scc_partno(s) != 0x524 && scc_partno(s) != 0x547;
69
+}
36
+}
70
+
37
+
71
+static inline uint32_t do_vhsub_u(uint32_t n, uint32_t m)
38
+/* Is CFG_REG5 present? */
39
+static bool have_cfg5(MPS2SCC *s)
72
+{
40
+{
73
+ return ((uint64_t)n - m) >> 1;
41
+ return scc_partno(s) == 0x524 || scc_partno(s) == 0x547;
74
+}
42
+}
75
+
43
+
76
+static inline int32_t do_vhsub_s(int32_t n, int32_t m)
44
+/* Is CFG_REG6 present? */
45
+static bool have_cfg6(MPS2SCC *s)
77
+{
46
+{
78
+ return ((int64_t)n - m) >> 1;
47
+ return scc_partno(s) == 0x524;
79
+}
48
+}
80
+
49
+
81
+DO_2OP_S(vhadds, do_vhadd_s)
50
/* Handle a write via the SYS_CFG channel to the specified function/device.
82
+DO_2OP_U(vhaddu, do_vhadd_u)
51
* Return false on error (reported to guest via SYS_CFGCTRL ERROR bit).
83
+DO_2OP_S(vhsubs, do_vhsub_s)
52
*/
84
+DO_2OP_U(vhsubu, do_vhsub_u)
53
@@ -XXX,XX +XXX,XX @@ static uint64_t mps2_scc_read(void *opaque, hwaddr offset, unsigned size)
85
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
54
r = s->cfg1;
86
index XXXXXXX..XXXXXXX 100644
55
break;
87
--- a/target/arm/translate-mve.c
56
case A_CFG2:
88
+++ b/target/arm/translate-mve.c
57
- if (scc_partno(s) != 0x524 && scc_partno(s) != 0x547) {
89
@@ -XXX,XX +XXX,XX @@ DO_2OP(VMIN_S, vmins)
58
- /* CFG2 reserved on other boards */
90
DO_2OP(VMIN_U, vminu)
59
+ if (!have_cfg2(s)) {
91
DO_2OP(VABD_S, vabds)
60
goto bad_offset;
92
DO_2OP(VABD_U, vabdu)
61
}
93
+DO_2OP(VHADD_S, vhadds)
62
r = s->cfg2;
94
+DO_2OP(VHADD_U, vhaddu)
63
break;
95
+DO_2OP(VHSUB_S, vhsubs)
64
case A_CFG3:
96
+DO_2OP(VHSUB_U, vhsubu)
65
- if (scc_partno(s) == 0x524 || scc_partno(s) == 0x547) {
66
- /* CFG3 reserved on AN524 */
67
+ if (!have_cfg3(s)) {
68
goto bad_offset;
69
}
70
/* These are user-settable DIP switches on the board. We don't
71
@@ -XXX,XX +XXX,XX @@ static uint64_t mps2_scc_read(void *opaque, hwaddr offset, unsigned size)
72
r = s->cfg4;
73
break;
74
case A_CFG5:
75
- if (scc_partno(s) != 0x524 && scc_partno(s) != 0x547) {
76
- /* CFG5 reserved on other boards */
77
+ if (!have_cfg5(s)) {
78
goto bad_offset;
79
}
80
r = s->cfg5;
81
break;
82
case A_CFG6:
83
- if (scc_partno(s) != 0x524) {
84
- /* CFG6 reserved on other boards */
85
+ if (!have_cfg6(s)) {
86
goto bad_offset;
87
}
88
r = s->cfg6;
89
@@ -XXX,XX +XXX,XX @@ static void mps2_scc_write(void *opaque, hwaddr offset, uint64_t value,
90
}
91
break;
92
case A_CFG2:
93
- if (scc_partno(s) != 0x524 && scc_partno(s) != 0x547) {
94
- /* CFG2 reserved on other boards */
95
+ if (!have_cfg2(s)) {
96
goto bad_offset;
97
}
98
/* AN524: QSPI Select signal */
99
s->cfg2 = value;
100
break;
101
case A_CFG5:
102
- if (scc_partno(s) != 0x524 && scc_partno(s) != 0x547) {
103
- /* CFG5 reserved on other boards */
104
+ if (!have_cfg5(s)) {
105
goto bad_offset;
106
}
107
/* AN524: ACLK frequency in Hz */
108
s->cfg5 = value;
109
break;
110
case A_CFG6:
111
- if (scc_partno(s) != 0x524) {
112
- /* CFG6 reserved on other boards */
113
+ if (!have_cfg6(s)) {
114
goto bad_offset;
115
}
116
/* AN524: Clock divider for BRAM */
97
--
117
--
98
2.20.1
118
2.34.1
99
119
100
120
diff view generated by jsdifflib
1
Implement the MVE VQADD and VQSUB insns, which perform saturating
1
The MPS2 SCC device is broadly the same for all FPGA images, but has
2
addition of a scalar to each element. Note that individual bytes of
2
minor differences in the behaviour of the CFG registers depending on
3
each result element are used or discarded according to the predicate
3
the image. In many cases we don't really care about the functionality
4
mask, but FPSCR.QC is only set if the predicate mask for the lowest
4
controlled by these registers and a reads-as-written or similar
5
byte of the element is set.
5
behaviour is sufficient for the moment.
6
7
For the AN536 the required behaviour is:
8
9
* A_CFG0 has CPU reset and halt bits
10
- implement as reads-as-written for the moment
11
* A_CFG1 has flash or ATCM address 0 remap handling
12
- QEMU doesn't model this; implement as reads-as-written
13
* A_CFG2 has QSPI select (like AN524)
14
- implemented (no behaviour, as with AN524)
15
* A_CFG3 is MCC_MSB_ADDR "additional MCC addressing bits"
16
- QEMU doesn't care about these, so use the existing
17
RAZ behaviour for convenience
18
* A_CFG4 is board rev (like all other images)
19
- no change needed
20
* A_CFG5 is ACLK frq in hz (like AN524)
21
- implemented as reads-as-written, as for other boards
22
* A_CFG6 is core 0 vector table base address
23
- implemented as reads-as-written for the moment
24
* A_CFG7 is core 1 vector table base address
25
- implemented as reads-as-written for the moment
26
27
Make the changes necessary for this; leave TODO comments where
28
appropriate to indicate where we might want to come back and
29
implement things like CPU reset.
30
31
The other aspects of the device specific to this FPGA image (like the
32
values of the board ID and similar registers) will be set via the
33
device's qdev properties.
6
34
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
35
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
36
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20210617121628.20116-28-peter.maydell@linaro.org
37
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
38
Message-id: 20240206132931.38376-8-peter.maydell@linaro.org
10
---
39
---
11
target/arm/helper-mve.h | 16 ++++++++++
40
include/hw/misc/mps2-scc.h | 1 +
12
target/arm/mve.decode | 5 +++
41
hw/misc/mps2-scc.c | 101 +++++++++++++++++++++++++++++++++----
13
target/arm/mve_helper.c | 62 ++++++++++++++++++++++++++++++++++++++
42
2 files changed, 92 insertions(+), 10 deletions(-)
14
target/arm/translate-mve.c | 4 +++
43
15
4 files changed, 87 insertions(+)
44
diff --git a/include/hw/misc/mps2-scc.h b/include/hw/misc/mps2-scc.h
16
17
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
18
index XXXXXXX..XXXXXXX 100644
45
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper-mve.h
46
--- a/include/hw/misc/mps2-scc.h
20
+++ b/target/arm/helper-mve.h
47
+++ b/include/hw/misc/mps2-scc.h
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vhsubu_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
48
@@ -XXX,XX +XXX,XX @@ struct MPS2SCC {
22
DEF_HELPER_FLAGS_4(mve_vhsubu_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
49
uint32_t cfg4;
23
DEF_HELPER_FLAGS_4(mve_vhsubu_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
50
uint32_t cfg5;
24
51
uint32_t cfg6;
25
+DEF_HELPER_FLAGS_4(mve_vqadds_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
52
+ uint32_t cfg7;
26
+DEF_HELPER_FLAGS_4(mve_vqadds_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
53
uint32_t cfgdata_rtn;
27
+DEF_HELPER_FLAGS_4(mve_vqadds_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
54
uint32_t cfgdata_out;
28
+
55
uint32_t cfgctrl;
29
+DEF_HELPER_FLAGS_4(mve_vqaddu_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
56
diff --git a/hw/misc/mps2-scc.c b/hw/misc/mps2-scc.c
30
+DEF_HELPER_FLAGS_4(mve_vqaddu_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
31
+DEF_HELPER_FLAGS_4(mve_vqaddu_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
32
+
33
+DEF_HELPER_FLAGS_4(mve_vqsubs_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_4(mve_vqsubs_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
35
+DEF_HELPER_FLAGS_4(mve_vqsubs_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
36
+
37
+DEF_HELPER_FLAGS_4(mve_vqsubu_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_4(mve_vqsubu_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
39
+DEF_HELPER_FLAGS_4(mve_vqsubu_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
40
+
41
DEF_HELPER_FLAGS_4(mve_vbrsrb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
42
DEF_HELPER_FLAGS_4(mve_vbrsrh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
43
DEF_HELPER_FLAGS_4(mve_vbrsrw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
44
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
45
index XXXXXXX..XXXXXXX 100644
57
index XXXXXXX..XXXXXXX 100644
46
--- a/target/arm/mve.decode
58
--- a/hw/misc/mps2-scc.c
47
+++ b/target/arm/mve.decode
59
+++ b/hw/misc/mps2-scc.c
48
@@ -XXX,XX +XXX,XX @@ VHADD_S_scalar 1110 1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar
60
@@ -XXX,XX +XXX,XX @@ REG32(CFG3, 0xc)
49
VHADD_U_scalar 1111 1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar
61
REG32(CFG4, 0x10)
50
VHSUB_S_scalar 1110 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
62
REG32(CFG5, 0x14)
51
VHSUB_U_scalar 1111 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
63
REG32(CFG6, 0x18)
52
+
64
+REG32(CFG7, 0x1c)
53
+VQADD_S_scalar 1110 1110 0 . .. ... 0 ... 0 1111 . 110 .... @2scalar
65
REG32(CFGDATA_RTN, 0xa0)
54
+VQADD_U_scalar 1111 1110 0 . .. ... 0 ... 0 1111 . 110 .... @2scalar
66
REG32(CFGDATA_OUT, 0xa4)
55
+VQSUB_S_scalar 1110 1110 0 . .. ... 0 ... 1 1111 . 110 .... @2scalar
67
REG32(CFGCTRL, 0xa8)
56
+VQSUB_U_scalar 1111 1110 0 . .. ... 0 ... 1 1111 . 110 .... @2scalar
68
@@ -XXX,XX +XXX,XX @@ static int scc_partno(MPS2SCC *s)
57
VBRSR 1111 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar
69
/* Is CFG_REG2 present? */
58
70
static bool have_cfg2(MPS2SCC *s)
59
# Predicate operations
71
{
60
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
72
- return scc_partno(s) == 0x524 || scc_partno(s) == 0x547;
61
index XXXXXXX..XXXXXXX 100644
73
+ return scc_partno(s) == 0x524 || scc_partno(s) == 0x547 ||
62
--- a/target/arm/mve_helper.c
74
+ scc_partno(s) == 0x536;
63
+++ b/target/arm/mve_helper.c
75
}
64
@@ -XXX,XX +XXX,XX @@ DO_2OP_U(vhaddu, do_vhadd_u)
76
65
DO_2OP_S(vhsubs, do_vhsub_s)
77
/* Is CFG_REG3 present? */
66
DO_2OP_U(vhsubu, do_vhsub_u)
78
static bool have_cfg3(MPS2SCC *s)
67
79
{
68
+static inline int32_t do_sat_bhw(int64_t val, int64_t min, int64_t max, bool *s)
80
- return scc_partno(s) != 0x524 && scc_partno(s) != 0x547;
69
+{
81
+ return scc_partno(s) != 0x524 && scc_partno(s) != 0x547 &&
70
+ if (val > max) {
82
+ scc_partno(s) != 0x536;
71
+ *s = true;
83
}
72
+ return max;
84
73
+ } else if (val < min) {
85
/* Is CFG_REG5 present? */
74
+ *s = true;
86
static bool have_cfg5(MPS2SCC *s)
75
+ return min;
87
{
88
- return scc_partno(s) == 0x524 || scc_partno(s) == 0x547;
89
+ return scc_partno(s) == 0x524 || scc_partno(s) == 0x547 ||
90
+ scc_partno(s) == 0x536;
91
}
92
93
/* Is CFG_REG6 present? */
94
static bool have_cfg6(MPS2SCC *s)
95
{
96
- return scc_partno(s) == 0x524;
97
+ return scc_partno(s) == 0x524 || scc_partno(s) == 0x536;
98
+}
99
+
100
+/* Is CFG_REG7 present? */
101
+static bool have_cfg7(MPS2SCC *s)
102
+{
103
+ return scc_partno(s) == 0x536;
104
+}
105
+
106
+/* Does CFG_REG0 drive the 'remap' GPIO output? */
107
+static bool cfg0_is_remap(MPS2SCC *s)
108
+{
109
+ return scc_partno(s) != 0x536;
110
+}
111
+
112
+/* Is CFG_REG1 driving a set of LEDs? */
113
+static bool cfg1_is_leds(MPS2SCC *s)
114
+{
115
+ return scc_partno(s) != 0x536;
116
}
117
118
/* Handle a write via the SYS_CFG channel to the specified function/device.
119
@@ -XXX,XX +XXX,XX @@ static uint64_t mps2_scc_read(void *opaque, hwaddr offset, unsigned size)
120
if (!have_cfg3(s)) {
121
goto bad_offset;
122
}
123
- /* These are user-settable DIP switches on the board. We don't
124
+ /*
125
+ * These are user-settable DIP switches on the board. We don't
126
* model that, so just return zeroes.
127
+ *
128
+ * TODO: for AN536 this is MCC_MSB_ADDR "additional MCC addressing
129
+ * bits". These change which part of the DDR4 the motherboard
130
+ * configuration controller can see in its memory map (see the
131
+ * appnote section 2.4). QEMU doesn't model the MCC at all, so these
132
+ * bits are not interesting to us; read-as-zero is as good as anything
133
+ * else.
134
*/
135
r = 0;
136
break;
137
@@ -XXX,XX +XXX,XX @@ static uint64_t mps2_scc_read(void *opaque, hwaddr offset, unsigned size)
138
}
139
r = s->cfg6;
140
break;
141
+ case A_CFG7:
142
+ if (!have_cfg7(s)) {
143
+ goto bad_offset;
144
+ }
145
+ r = s->cfg7;
146
+ break;
147
case A_CFGDATA_RTN:
148
r = s->cfgdata_rtn;
149
break;
150
@@ -XXX,XX +XXX,XX @@ static void mps2_scc_write(void *opaque, hwaddr offset, uint64_t value,
151
* we always reflect bit 0 in the 'remap' GPIO output line,
152
* and let the board wire it up or not as it chooses.
153
* TODO on some boards bit 1 is CPU_WAIT.
154
+ *
155
+ * TODO: on the AN536 this register controls reset and halt
156
+ * for both CPUs. For the moment we don't implement this, so the
157
+ * register just reads as written.
158
*/
159
s->cfg0 = value;
160
- qemu_set_irq(s->remap, s->cfg0 & 1);
161
+ if (cfg0_is_remap(s)) {
162
+ qemu_set_irq(s->remap, s->cfg0 & 1);
163
+ }
164
break;
165
case A_CFG1:
166
s->cfg1 = value;
167
- for (size_t i = 0; i < ARRAY_SIZE(s->led); i++) {
168
- led_set_state(s->led[i], extract32(value, i, 1));
169
+ /*
170
+ * On most boards this register drives LEDs.
171
+ *
172
+ * TODO: for AN536 this controls whether flash and ATCM are
173
+ * enabled or disabled on reset. QEMU doesn't model this, and
174
+ * always wires up RAM in the ATCM area and ROM in the flash area.
175
+ */
176
+ if (cfg1_is_leds(s)) {
177
+ for (size_t i = 0; i < ARRAY_SIZE(s->led); i++) {
178
+ led_set_state(s->led[i], extract32(value, i, 1));
179
+ }
180
}
181
break;
182
case A_CFG2:
183
if (!have_cfg2(s)) {
184
goto bad_offset;
185
}
186
- /* AN524: QSPI Select signal */
187
+ /* AN524, AN536: QSPI Select signal */
188
s->cfg2 = value;
189
break;
190
case A_CFG5:
191
if (!have_cfg5(s)) {
192
goto bad_offset;
193
}
194
- /* AN524: ACLK frequency in Hz */
195
+ /* AN524, AN536: ACLK frequency in Hz */
196
s->cfg5 = value;
197
break;
198
case A_CFG6:
199
@@ -XXX,XX +XXX,XX @@ static void mps2_scc_write(void *opaque, hwaddr offset, uint64_t value,
200
goto bad_offset;
201
}
202
/* AN524: Clock divider for BRAM */
203
+ /* AN536: Core 0 vector table base address */
204
+ s->cfg6 = value;
205
+ break;
206
+ case A_CFG7:
207
+ if (!have_cfg7(s)) {
208
+ goto bad_offset;
209
+ }
210
+ /* AN536: Core 1 vector table base address */
211
s->cfg6 = value;
212
break;
213
case A_CFGDATA_OUT:
214
@@ -XXX,XX +XXX,XX @@ static void mps2_scc_finalize(Object *obj)
215
g_free(s->oscclk_reset);
216
}
217
218
+static bool cfg7_needed(void *opaque)
219
+{
220
+ MPS2SCC *s = opaque;
221
+
222
+ return have_cfg7(s);
223
+}
224
+
225
+static const VMStateDescription vmstate_cfg7 = {
226
+ .name = "mps2-scc/cfg7",
227
+ .version_id = 1,
228
+ .minimum_version_id = 1,
229
+ .needed = cfg7_needed,
230
+ .fields = (const VMStateField[]) {
231
+ VMSTATE_UINT32(cfg7, MPS2SCC),
232
+ VMSTATE_END_OF_LIST()
76
+ }
233
+ }
77
+ return val;
234
+};
78
+}
235
+
79
+
236
static const VMStateDescription mps2_scc_vmstate = {
80
+#define DO_SQADD_B(n, m, s) do_sat_bhw((int64_t)n + m, INT8_MIN, INT8_MAX, s)
237
.name = "mps2-scc",
81
+#define DO_SQADD_H(n, m, s) do_sat_bhw((int64_t)n + m, INT16_MIN, INT16_MAX, s)
238
.version_id = 3,
82
+#define DO_SQADD_W(n, m, s) do_sat_bhw((int64_t)n + m, INT32_MIN, INT32_MAX, s)
239
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription mps2_scc_vmstate = {
83
+
240
VMSTATE_VARRAY_UINT32(oscclk, MPS2SCC, num_oscclk,
84
+#define DO_UQADD_B(n, m, s) do_sat_bhw((int64_t)n + m, 0, UINT8_MAX, s)
241
0, vmstate_info_uint32, uint32_t),
85
+#define DO_UQADD_H(n, m, s) do_sat_bhw((int64_t)n + m, 0, UINT16_MAX, s)
242
VMSTATE_END_OF_LIST()
86
+#define DO_UQADD_W(n, m, s) do_sat_bhw((int64_t)n + m, 0, UINT32_MAX, s)
243
+ },
87
+
244
+ .subsections = (const VMStateDescription * const []) {
88
+#define DO_SQSUB_B(n, m, s) do_sat_bhw((int64_t)n - m, INT8_MIN, INT8_MAX, s)
245
+ &vmstate_cfg7,
89
+#define DO_SQSUB_H(n, m, s) do_sat_bhw((int64_t)n - m, INT16_MIN, INT16_MAX, s)
246
+ NULL
90
+#define DO_SQSUB_W(n, m, s) do_sat_bhw((int64_t)n - m, INT32_MIN, INT32_MAX, s)
91
+
92
+#define DO_UQSUB_B(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT8_MAX, s)
93
+#define DO_UQSUB_H(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT16_MAX, s)
94
+#define DO_UQSUB_W(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT32_MAX, s)
95
96
#define DO_2OP_SCALAR(OP, ESIZE, TYPE, FN) \
97
void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
98
@@ -XXX,XX +XXX,XX @@ DO_2OP_U(vhsubu, do_vhsub_u)
99
mve_advance_vpt(env); \
100
}
247
}
101
248
};
102
+#define DO_2OP_SAT_SCALAR(OP, ESIZE, TYPE, FN) \
249
103
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
104
+ uint32_t rm) \
105
+ { \
106
+ TYPE *d = vd, *n = vn; \
107
+ TYPE m = rm; \
108
+ uint16_t mask = mve_element_mask(env); \
109
+ unsigned e; \
110
+ bool qc = false; \
111
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
112
+ bool sat = false; \
113
+ mergemask(&d[H##ESIZE(e)], FN(n[H##ESIZE(e)], m, &sat), \
114
+ mask); \
115
+ qc |= sat & mask & 1; \
116
+ } \
117
+ if (qc) { \
118
+ env->vfp.qc[0] = qc; \
119
+ } \
120
+ mve_advance_vpt(env); \
121
+ }
122
+
123
/* provide unsigned 2-op scalar helpers for all sizes */
124
#define DO_2OP_SCALAR_U(OP, FN) \
125
DO_2OP_SCALAR(OP##b, 1, uint8_t, FN) \
126
@@ -XXX,XX +XXX,XX @@ DO_2OP_SCALAR_U(vhaddu_scalar, do_vhadd_u)
127
DO_2OP_SCALAR_S(vhsubs_scalar, do_vhsub_s)
128
DO_2OP_SCALAR_U(vhsubu_scalar, do_vhsub_u)
129
130
+DO_2OP_SAT_SCALAR(vqaddu_scalarb, 1, uint8_t, DO_UQADD_B)
131
+DO_2OP_SAT_SCALAR(vqaddu_scalarh, 2, uint16_t, DO_UQADD_H)
132
+DO_2OP_SAT_SCALAR(vqaddu_scalarw, 4, uint32_t, DO_UQADD_W)
133
+DO_2OP_SAT_SCALAR(vqadds_scalarb, 1, int8_t, DO_SQADD_B)
134
+DO_2OP_SAT_SCALAR(vqadds_scalarh, 2, int16_t, DO_SQADD_H)
135
+DO_2OP_SAT_SCALAR(vqadds_scalarw, 4, int32_t, DO_SQADD_W)
136
+
137
+DO_2OP_SAT_SCALAR(vqsubu_scalarb, 1, uint8_t, DO_UQSUB_B)
138
+DO_2OP_SAT_SCALAR(vqsubu_scalarh, 2, uint16_t, DO_UQSUB_H)
139
+DO_2OP_SAT_SCALAR(vqsubu_scalarw, 4, uint32_t, DO_UQSUB_W)
140
+DO_2OP_SAT_SCALAR(vqsubs_scalarb, 1, int8_t, DO_SQSUB_B)
141
+DO_2OP_SAT_SCALAR(vqsubs_scalarh, 2, int16_t, DO_SQSUB_H)
142
+DO_2OP_SAT_SCALAR(vqsubs_scalarw, 4, int32_t, DO_SQSUB_W)
143
+
144
static inline uint32_t do_vbrsrb(uint32_t n, uint32_t m)
145
{
146
m &= 0xff;
147
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
148
index XXXXXXX..XXXXXXX 100644
149
--- a/target/arm/translate-mve.c
150
+++ b/target/arm/translate-mve.c
151
@@ -XXX,XX +XXX,XX @@ DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
152
DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
153
DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
154
DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
155
+DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
156
+DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
157
+DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
158
+DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
159
DO_2OP_SCALAR(VBRSR, vbrsr)
160
161
static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
162
--
250
--
163
2.20.1
251
2.34.1
164
252
165
253
diff view generated by jsdifflib
1
Implement the forms of the MVE VLDR and VSTR insns which perform
1
The AN536 is another FPGA image for the MPS3 development board. Unlike
2
non-widening loads of bytes, halfwords or words from memory into
2
the existing FPGA images we already model, this board uses a Cortex-R
3
vector elements of the same width (encodings T5, T6, T7).
3
family CPU, and it does not use any equivalent to the M-profile
4
4
"Subsystem for Embedded" SoC-equivalent that we model in hw/arm/armsse.c.
5
(At the moment we know for MVE and M-profile in general that
5
It's therefore more convenient for us to model it as a completely
6
vfp_access_check() can never return false, but we include the
6
separate C file.
7
conventional return-true-on-failure check for consistency
7
8
with non-M-profile translation code.)
8
This commit adds the basic skeleton of the board model, and the
9
code to create all the RAM and ROM. We assume that we're probably
10
going to want to add more images in future, so use the same
11
base class/subclass setup that mps2-tz.c uses, even though at
12
the moment there's only a single subclass.
13
14
Following commits will add the CPUs and the peripherals.
9
15
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
17
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
12
Message-id: 20210617121628.20116-2-peter.maydell@linaro.org
18
Message-id: 20240206132931.38376-9-peter.maydell@linaro.org
13
---
19
---
14
target/arm/{translate-mve.c => helper-mve.h} | 19 +-
20
MAINTAINERS | 3 +-
15
target/arm/helper.h | 2 +
21
configs/devices/arm-softmmu/default.mak | 1 +
16
target/arm/internals.h | 11 ++
22
hw/arm/mps3r.c | 239 ++++++++++++++++++++++++
17
target/arm/mve.decode | 22 +++
23
hw/arm/Kconfig | 5 +
18
target/arm/mve_helper.c | 172 +++++++++++++++++++
24
hw/arm/meson.build | 1 +
19
target/arm/translate-mve.c | 119 +++++++++++++
25
5 files changed, 248 insertions(+), 1 deletion(-)
20
target/arm/meson.build | 1 +
26
create mode 100644 hw/arm/mps3r.c
21
7 files changed, 334 insertions(+), 12 deletions(-)
27
22
copy target/arm/{translate-mve.c => helper-mve.h} (61%)
28
diff --git a/MAINTAINERS b/MAINTAINERS
23
create mode 100644 target/arm/mve_helper.c
24
25
diff --git a/target/arm/translate-mve.c b/target/arm/helper-mve.h
26
similarity index 61%
27
copy from target/arm/translate-mve.c
28
copy to target/arm/helper-mve.h
29
index XXXXXXX..XXXXXXX 100644
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/translate-mve.c
30
--- a/MAINTAINERS
31
+++ b/target/arm/helper-mve.h
31
+++ b/MAINTAINERS
32
@@ -XXX,XX +XXX,XX @@
32
@@ -XXX,XX +XXX,XX @@ F: include/hw/misc/imx7_*.h
33
/*
33
F: hw/pci-host/designware.c
34
- * ARM translation: M-profile MVE instructions
34
F: include/hw/pci-host/designware.h
35
+ * M-profile MVE specific helper definitions
35
36
*
36
-MPS2
37
* Copyright (c) 2021 Linaro, Ltd.
37
+MPS2 / MPS3
38
*
38
M: Peter Maydell <peter.maydell@linaro.org>
39
@@ -XXX,XX +XXX,XX @@
39
L: qemu-arm@nongnu.org
40
* You should have received a copy of the GNU Lesser General Public
40
S: Maintained
41
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
41
F: hw/arm/mps2.c
42
*/
42
F: hw/arm/mps2-tz.c
43
-
43
+F: hw/arm/mps3r.c
44
-#include "qemu/osdep.h"
44
F: hw/misc/mps2-*.c
45
-#include "tcg/tcg-op.h"
45
F: include/hw/misc/mps2-*.h
46
-#include "tcg/tcg-op-gvec.h"
46
F: hw/arm/armsse.c
47
-#include "exec/exec-all.h"
47
diff --git a/configs/devices/arm-softmmu/default.mak b/configs/devices/arm-softmmu/default.mak
48
-#include "exec/gen-icount.h"
49
-#include "translate.h"
50
-#include "translate-a32.h"
51
-
52
-/* Include the generated decoder */
53
-#include "decode-mve.c.inc"
54
+DEF_HELPER_FLAGS_3(mve_vldrb, TCG_CALL_NO_WG, void, env, ptr, i32)
55
+DEF_HELPER_FLAGS_3(mve_vldrh, TCG_CALL_NO_WG, void, env, ptr, i32)
56
+DEF_HELPER_FLAGS_3(mve_vldrw, TCG_CALL_NO_WG, void, env, ptr, i32)
57
+DEF_HELPER_FLAGS_3(mve_vstrb, TCG_CALL_NO_WG, void, env, ptr, i32)
58
+DEF_HELPER_FLAGS_3(mve_vstrh, TCG_CALL_NO_WG, void, env, ptr, i32)
59
+DEF_HELPER_FLAGS_3(mve_vstrw, TCG_CALL_NO_WG, void, env, ptr, i32)
60
diff --git a/target/arm/helper.h b/target/arm/helper.h
61
index XXXXXXX..XXXXXXX 100644
48
index XXXXXXX..XXXXXXX 100644
62
--- a/target/arm/helper.h
49
--- a/configs/devices/arm-softmmu/default.mak
63
+++ b/target/arm/helper.h
50
+++ b/configs/devices/arm-softmmu/default.mak
64
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG,
51
@@ -XXX,XX +XXX,XX @@ CONFIG_ARM_VIRT=y
65
#include "helper-a64.h"
52
# CONFIG_INTEGRATOR=n
66
#include "helper-sve.h"
53
# CONFIG_FSL_IMX31=n
67
#endif
54
# CONFIG_MUSICPAL=n
68
+
55
+# CONFIG_MPS3R=n
69
+#include "helper-mve.h"
56
# CONFIG_MUSCA=n
70
diff --git a/target/arm/internals.h b/target/arm/internals.h
57
# CONFIG_CHEETAH=n
71
index XXXXXXX..XXXXXXX 100644
58
# CONFIG_SX1=n
72
--- a/target/arm/internals.h
59
diff --git a/hw/arm/mps3r.c b/hw/arm/mps3r.c
73
+++ b/target/arm/internals.h
74
@@ -XXX,XX +XXX,XX @@ static inline uint64_t useronly_maybe_clean_ptr(uint32_t desc, uint64_t ptr)
75
return ptr;
76
}
77
78
+/* Values for M-profile PSR.ECI for MVE insns */
79
+enum MVEECIState {
80
+ ECI_NONE = 0, /* No completed beats */
81
+ ECI_A0 = 1, /* Completed: A0 */
82
+ ECI_A0A1 = 2, /* Completed: A0, A1 */
83
+ /* 3 is reserved */
84
+ ECI_A0A1A2 = 4, /* Completed: A0, A1, A2 */
85
+ ECI_A0A1A2B0 = 5, /* Completed: A0, A1, A2, B0 */
86
+ /* All other values reserved */
87
+};
88
+
89
#endif
90
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/arm/mve.decode
93
+++ b/target/arm/mve.decode
94
@@ -XXX,XX +XXX,XX @@
95
#
96
# This file is processed by scripts/decodetree.py
97
#
98
+
99
+%qd 22:1 13:3
100
+
101
+&vldr_vstr rn qd imm p a w size l
102
+
103
+@vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd
104
+
105
+# Vector loads and stores
106
+
107
+# Non-widening loads/stores (P=0 W=0 is 'related encoding')
108
+VLDR_VSTR 1110110 0 a:1 . 1 . .... ... 111100 ....... @vldr_vstr \
109
+ size=0 p=0 w=1
110
+VLDR_VSTR 1110110 0 a:1 . 1 . .... ... 111101 ....... @vldr_vstr \
111
+ size=1 p=0 w=1
112
+VLDR_VSTR 1110110 0 a:1 . 1 . .... ... 111110 ....... @vldr_vstr \
113
+ size=2 p=0 w=1
114
+VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111100 ....... @vldr_vstr \
115
+ size=0 p=1
116
+VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111101 ....... @vldr_vstr \
117
+ size=1 p=1
118
+VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111110 ....... @vldr_vstr \
119
+ size=2 p=1
120
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
121
new file mode 100644
60
new file mode 100644
122
index XXXXXXX..XXXXXXX
61
index XXXXXXX..XXXXXXX
123
--- /dev/null
62
--- /dev/null
124
+++ b/target/arm/mve_helper.c
63
+++ b/hw/arm/mps3r.c
125
@@ -XXX,XX +XXX,XX @@
64
@@ -XXX,XX +XXX,XX @@
126
+/*
65
+/*
127
+ * M-profile MVE Operations
66
+ * Arm MPS3 board emulation for Cortex-R-based FPGA images.
67
+ * (For M-profile images see mps2.c and mps2tz.c.)
128
+ *
68
+ *
129
+ * Copyright (c) 2021 Linaro, Ltd.
69
+ * Copyright (c) 2017 Linaro Limited
70
+ * Written by Peter Maydell
130
+ *
71
+ *
131
+ * This library is free software; you can redistribute it and/or
72
+ * This program is free software; you can redistribute it and/or modify
132
+ * modify it under the terms of the GNU Lesser General Public
73
+ * it under the terms of the GNU General Public License version 2 or
133
+ * License as published by the Free Software Foundation; either
74
+ * (at your option) any later version.
134
+ * version 2.1 of the License, or (at your option) any later version.
75
+ */
76
+
77
+/*
78
+ * The MPS3 is an FPGA based dev board. This file handles FPGA images
79
+ * which use the Cortex-R CPUs. We model these separately from the
80
+ * M-profile images, because on M-profile the FPGA image is based on
81
+ * a "Subsystem for Embedded" which is similar to an SoC, whereas
82
+ * the R-profile FPGA images don't have that abstraction layer.
135
+ *
83
+ *
136
+ * This library is distributed in the hope that it will be useful,
84
+ * We model the following FPGA images here:
137
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
85
+ * "mps3-an536" -- dual Cortex-R52 as documented in Arm Application Note AN536
138
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
139
+ * Lesser General Public License for more details.
140
+ *
86
+ *
141
+ * You should have received a copy of the GNU Lesser General Public
87
+ * Application Note AN536:
142
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
88
+ * https://developer.arm.com/documentation/dai0536/latest/
143
+ */
89
+ */
144
+
90
+
145
+#include "qemu/osdep.h"
91
+#include "qemu/osdep.h"
92
+#include "qemu/units.h"
93
+#include "qapi/error.h"
94
+#include "exec/address-spaces.h"
146
+#include "cpu.h"
95
+#include "cpu.h"
147
+#include "internals.h"
96
+#include "hw/boards.h"
148
+#include "vec_internal.h"
97
+#include "hw/arm/boot.h"
149
+#include "exec/helper-proto.h"
98
+
150
+#include "exec/cpu_ldst.h"
99
+/* Define the layout of RAM and ROM in a board */
151
+#include "exec/exec-all.h"
100
+typedef struct RAMInfo {
152
+
101
+ const char *name;
153
+static uint16_t mve_element_mask(CPUARMState *env)
102
+ hwaddr base;
103
+ hwaddr size;
104
+ int mrindex; /* index into rams[]; -1 for the system RAM block */
105
+ int flags;
106
+} RAMInfo;
107
+
108
+/*
109
+ * The MPS3 DDR is 3GiB, but on a 32-bit host QEMU doesn't permit
110
+ * emulation of that much guest RAM, so artificially make it smaller.
111
+ */
112
+#if HOST_LONG_BITS == 32
113
+#define MPS3_DDR_SIZE (1 * GiB)
114
+#else
115
+#define MPS3_DDR_SIZE (3 * GiB)
116
+#endif
117
+
118
+/*
119
+ * Flag values:
120
+ * IS_MAIN: this is the main machine RAM
121
+ * IS_ROM: this area is read-only
122
+ */
123
+#define IS_MAIN 1
124
+#define IS_ROM 2
125
+
126
+#define MPS3R_RAM_MAX 9
127
+
128
+typedef enum MPS3RFPGAType {
129
+ FPGA_AN536,
130
+} MPS3RFPGAType;
131
+
132
+struct MPS3RMachineClass {
133
+ MachineClass parent;
134
+ MPS3RFPGAType fpga_type;
135
+ const RAMInfo *raminfo;
136
+};
137
+
138
+struct MPS3RMachineState {
139
+ MachineState parent;
140
+ MemoryRegion ram[MPS3R_RAM_MAX];
141
+};
142
+
143
+#define TYPE_MPS3R_MACHINE "mps3r"
144
+#define TYPE_MPS3R_AN536_MACHINE MACHINE_TYPE_NAME("mps3-an536")
145
+
146
+OBJECT_DECLARE_TYPE(MPS3RMachineState, MPS3RMachineClass, MPS3R_MACHINE)
147
+
148
+static const RAMInfo an536_raminfo[] = {
149
+ {
150
+ .name = "ATCM",
151
+ .base = 0x00000000,
152
+ .size = 0x00008000,
153
+ .mrindex = 0,
154
+ }, {
155
+ /* We model the QSPI flash as simple ROM for now */
156
+ .name = "QSPI",
157
+ .base = 0x08000000,
158
+ .size = 0x00800000,
159
+ .flags = IS_ROM,
160
+ .mrindex = 1,
161
+ }, {
162
+ .name = "BRAM",
163
+ .base = 0x10000000,
164
+ .size = 0x00080000,
165
+ .mrindex = 2,
166
+ }, {
167
+ .name = "DDR",
168
+ .base = 0x20000000,
169
+ .size = MPS3_DDR_SIZE,
170
+ .mrindex = -1,
171
+ }, {
172
+ .name = "ATCM0",
173
+ .base = 0xee000000,
174
+ .size = 0x00008000,
175
+ .mrindex = 3,
176
+ }, {
177
+ .name = "BTCM0",
178
+ .base = 0xee100000,
179
+ .size = 0x00008000,
180
+ .mrindex = 4,
181
+ }, {
182
+ .name = "CTCM0",
183
+ .base = 0xee200000,
184
+ .size = 0x00008000,
185
+ .mrindex = 5,
186
+ }, {
187
+ .name = "ATCM1",
188
+ .base = 0xee400000,
189
+ .size = 0x00008000,
190
+ .mrindex = 6,
191
+ }, {
192
+ .name = "BTCM1",
193
+ .base = 0xee500000,
194
+ .size = 0x00008000,
195
+ .mrindex = 7,
196
+ }, {
197
+ .name = "CTCM1",
198
+ .base = 0xee600000,
199
+ .size = 0x00008000,
200
+ .mrindex = 8,
201
+ }, {
202
+ .name = NULL,
203
+ }
204
+};
205
+
206
+static MemoryRegion *mr_for_raminfo(MPS3RMachineState *mms,
207
+ const RAMInfo *raminfo)
208
+{
209
+ /* Return an initialized MemoryRegion for the RAMInfo. */
210
+ MemoryRegion *ram;
211
+
212
+ if (raminfo->mrindex < 0) {
213
+ /* Means this RAMInfo is for QEMU's "system memory" */
214
+ MachineState *machine = MACHINE(mms);
215
+ assert(!(raminfo->flags & IS_ROM));
216
+ return machine->ram;
217
+ }
218
+
219
+ assert(raminfo->mrindex < MPS3R_RAM_MAX);
220
+ ram = &mms->ram[raminfo->mrindex];
221
+
222
+ memory_region_init_ram(ram, NULL, raminfo->name,
223
+ raminfo->size, &error_fatal);
224
+ if (raminfo->flags & IS_ROM) {
225
+ memory_region_set_readonly(ram, true);
226
+ }
227
+ return ram;
228
+}
229
+
230
+static void mps3r_common_init(MachineState *machine)
231
+{
232
+ MPS3RMachineState *mms = MPS3R_MACHINE(machine);
233
+ MPS3RMachineClass *mmc = MPS3R_MACHINE_GET_CLASS(mms);
234
+ MemoryRegion *sysmem = get_system_memory();
235
+
236
+ for (const RAMInfo *ri = mmc->raminfo; ri->name; ri++) {
237
+ MemoryRegion *mr = mr_for_raminfo(mms, ri);
238
+ memory_region_add_subregion(sysmem, ri->base, mr);
239
+ }
240
+}
241
+
242
+static void mps3r_set_default_ram_info(MPS3RMachineClass *mmc)
154
+{
243
+{
155
+ /*
244
+ /*
156
+ * Return the mask of which elements in the MVE vector should be
245
+ * Set mc->default_ram_size and default_ram_id from the
157
+ * updated. This is a combination of multiple things:
246
+ * information in mmc->raminfo.
158
+ * (1) by default, we update every lane in the vector
159
+ * (2) VPT predication stores its state in the VPR register;
160
+ * (3) low-overhead-branch tail predication will mask out part
161
+ * the vector on the final iteration of the loop
162
+ * (4) if EPSR.ECI is set then we must execute only some beats
163
+ * of the insn
164
+ * We combine all these into a 16-bit result with the same semantics
165
+ * as VPR.P0: 0 to mask the lane, 1 if it is active.
166
+ * 8-bit vector ops will look at all bits of the result;
167
+ * 16-bit ops will look at bits 0, 2, 4, ...;
168
+ * 32-bit ops will look at bits 0, 4, 8 and 12.
169
+ * Compare pseudocode GetCurInstrBeat(), though that only returns
170
+ * the 4-bit slice of the mask corresponding to a single beat.
171
+ */
247
+ */
172
+ uint16_t mask = FIELD_EX32(env->v7m.vpr, V7M_VPR, P0);
248
+ MachineClass *mc = MACHINE_CLASS(mmc);
173
+
249
+ const RAMInfo *p;
174
+ if (!(env->v7m.vpr & R_V7M_VPR_MASK01_MASK)) {
250
+
175
+ mask |= 0xff;
251
+ for (p = mmc->raminfo; p->name; p++) {
176
+ }
252
+ if (p->mrindex < 0) {
177
+ if (!(env->v7m.vpr & R_V7M_VPR_MASK23_MASK)) {
253
+ /* Found the entry for "system memory" */
178
+ mask |= 0xff00;
254
+ mc->default_ram_size = p->size;
179
+ }
255
+ mc->default_ram_id = p->name;
180
+
256
+ return;
181
+ if (env->v7m.ltpsize < 4 &&
182
+ env->regs[14] <= (1 << (4 - env->v7m.ltpsize))) {
183
+ /*
184
+ * Tail predication active, and this is the last loop iteration.
185
+ * The element size is (1 << ltpsize), and we only want to process
186
+ * loopcount elements, so we want to retain the least significant
187
+ * (loopcount * esize) predicate bits and zero out bits above that.
188
+ */
189
+ int masklen = env->regs[14] << env->v7m.ltpsize;
190
+ assert(masklen <= 16);
191
+ mask &= MAKE_64BIT_MASK(0, masklen);
192
+ }
193
+
194
+ if ((env->condexec_bits & 0xf) == 0) {
195
+ /*
196
+ * ECI bits indicate which beats are already executed;
197
+ * we handle this by effectively predicating them out.
198
+ */
199
+ int eci = env->condexec_bits >> 4;
200
+ switch (eci) {
201
+ case ECI_NONE:
202
+ break;
203
+ case ECI_A0:
204
+ mask &= 0xfff0;
205
+ break;
206
+ case ECI_A0A1:
207
+ mask &= 0xff00;
208
+ break;
209
+ case ECI_A0A1A2:
210
+ case ECI_A0A1A2B0:
211
+ mask &= 0xf000;
212
+ break;
213
+ default:
214
+ g_assert_not_reached();
215
+ }
257
+ }
216
+ }
258
+ }
217
+
259
+ g_assert_not_reached();
218
+ return mask;
260
+}
219
+}
261
+
220
+
262
+static void mps3r_class_init(ObjectClass *oc, void *data)
221
+static void mve_advance_vpt(CPUARMState *env)
263
+{
222
+{
264
+ MachineClass *mc = MACHINE_CLASS(oc);
223
+ /* Advance the VPT and ECI state if necessary */
265
+
224
+ uint32_t vpr = env->v7m.vpr;
266
+ mc->init = mps3r_common_init;
225
+ unsigned mask01, mask23;
267
+}
226
+
268
+
227
+ if ((env->condexec_bits & 0xf) == 0) {
269
+static void mps3r_an536_class_init(ObjectClass *oc, void *data)
228
+ env->condexec_bits = (env->condexec_bits == (ECI_A0A1A2B0 << 4)) ?
270
+{
229
+ (ECI_A0 << 4) : (ECI_NONE << 4);
271
+ MachineClass *mc = MACHINE_CLASS(oc);
230
+ }
272
+ MPS3RMachineClass *mmc = MPS3R_MACHINE_CLASS(oc);
231
+
273
+ static const char * const valid_cpu_types[] = {
232
+ if (!(vpr & (R_V7M_VPR_MASK01_MASK | R_V7M_VPR_MASK23_MASK))) {
274
+ ARM_CPU_TYPE_NAME("cortex-r52"),
233
+ /* VPT not enabled, nothing to do */
275
+ NULL
234
+ return;
276
+ };
235
+ }
277
+
236
+
278
+ mc->desc = "ARM MPS3 with AN536 FPGA image for Cortex-R52";
237
+ mask01 = FIELD_EX32(vpr, V7M_VPR, MASK01);
279
+ mc->default_cpus = 2;
238
+ mask23 = FIELD_EX32(vpr, V7M_VPR, MASK23);
280
+ mc->min_cpus = mc->default_cpus;
239
+ if (mask01 > 8) {
281
+ mc->max_cpus = mc->default_cpus;
240
+ /* high bit set, but not 0b1000: invert the relevant half of P0 */
282
+ mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-r52");
241
+ vpr ^= 0xff;
283
+ mc->valid_cpu_types = valid_cpu_types;
242
+ }
284
+ mmc->raminfo = an536_raminfo;
243
+ if (mask23 > 8) {
285
+ mps3r_set_default_ram_info(mmc);
244
+ /* high bit set, but not 0b1000: invert the relevant half of P0 */
286
+}
245
+ vpr ^= 0xff00;
287
+
246
+ }
288
+static const TypeInfo mps3r_machine_types[] = {
247
+ vpr = FIELD_DP32(vpr, V7M_VPR, MASK01, mask01 << 1);
289
+ {
248
+ vpr = FIELD_DP32(vpr, V7M_VPR, MASK23, mask23 << 1);
290
+ .name = TYPE_MPS3R_MACHINE,
249
+ env->v7m.vpr = vpr;
291
+ .parent = TYPE_MACHINE,
250
+}
292
+ .abstract = true,
251
+
293
+ .instance_size = sizeof(MPS3RMachineState),
252
+
294
+ .class_size = sizeof(MPS3RMachineClass),
253
+#define DO_VLDR(OP, MSIZE, LDTYPE, ESIZE, TYPE) \
295
+ .class_init = mps3r_class_init,
254
+ void HELPER(mve_##OP)(CPUARMState *env, void *vd, uint32_t addr) \
296
+ }, {
255
+ { \
297
+ .name = TYPE_MPS3R_AN536_MACHINE,
256
+ TYPE *d = vd; \
298
+ .parent = TYPE_MPS3R_MACHINE,
257
+ uint16_t mask = mve_element_mask(env); \
299
+ .class_init = mps3r_an536_class_init,
258
+ unsigned b, e; \
300
+ },
259
+ /* \
301
+};
260
+ * R_SXTM allows the dest reg to become UNKNOWN for abandoned \
302
+
261
+ * beats so we don't care if we update part of the dest and \
303
+DEFINE_TYPES(mps3r_machine_types);
262
+ * then take an exception. \
304
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
263
+ */ \
264
+ for (b = 0, e = 0; b < 16; b += ESIZE, e++) { \
265
+ if (mask & (1 << b)) { \
266
+ d[H##ESIZE(e)] = cpu_##LDTYPE##_data_ra(env, addr, GETPC()); \
267
+ } \
268
+ addr += MSIZE; \
269
+ } \
270
+ mve_advance_vpt(env); \
271
+ }
272
+
273
+#define DO_VSTR(OP, MSIZE, STTYPE, ESIZE, TYPE) \
274
+ void HELPER(mve_##OP)(CPUARMState *env, void *vd, uint32_t addr) \
275
+ { \
276
+ TYPE *d = vd; \
277
+ uint16_t mask = mve_element_mask(env); \
278
+ unsigned b, e; \
279
+ for (b = 0, e = 0; b < 16; b += ESIZE, e++) { \
280
+ if (mask & (1 << b)) { \
281
+ cpu_##STTYPE##_data_ra(env, addr, d[H##ESIZE(e)], GETPC()); \
282
+ } \
283
+ addr += MSIZE; \
284
+ } \
285
+ mve_advance_vpt(env); \
286
+ }
287
+
288
+DO_VLDR(vldrb, 1, ldub, 1, uint8_t)
289
+DO_VLDR(vldrh, 2, lduw, 2, uint16_t)
290
+DO_VLDR(vldrw, 4, ldl, 4, uint32_t)
291
+
292
+DO_VSTR(vstrb, 1, stb, 1, uint8_t)
293
+DO_VSTR(vstrh, 2, stw, 2, uint16_t)
294
+DO_VSTR(vstrw, 4, stl, 4, uint32_t)
295
+
296
+#undef DO_VLDR
297
+#undef DO_VSTR
298
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
299
index XXXXXXX..XXXXXXX 100644
305
index XXXXXXX..XXXXXXX 100644
300
--- a/target/arm/translate-mve.c
306
--- a/hw/arm/Kconfig
301
+++ b/target/arm/translate-mve.c
307
+++ b/hw/arm/Kconfig
302
@@ -XXX,XX +XXX,XX @@
308
@@ -XXX,XX +XXX,XX @@ config MAINSTONE
303
309
select PFLASH_CFI01
304
/* Include the generated decoder */
310
select SMC91C111
305
#include "decode-mve.c.inc"
311
306
+
312
+config MPS3R
307
+typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
313
+ bool
308
+
314
+ default y
309
+/* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
315
+ depends on TCG && ARM
310
+static inline long mve_qreg_offset(unsigned reg)
316
+
311
+{
317
config MUSCA
312
+ return offsetof(CPUARMState, vfp.zregs[reg].d[0]);
318
bool
313
+}
319
default y
314
+
320
diff --git a/hw/arm/meson.build b/hw/arm/meson.build
315
+static TCGv_ptr mve_qreg_ptr(unsigned reg)
316
+{
317
+ TCGv_ptr ret = tcg_temp_new_ptr();
318
+ tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg));
319
+ return ret;
320
+}
321
+
322
+static bool mve_check_qreg_bank(DisasContext *s, int qmask)
323
+{
324
+ /*
325
+ * Check whether Qregs are in range. For v8.1M only Q0..Q7
326
+ * are supported, see VFPSmallRegisterBank().
327
+ */
328
+ return qmask < 8;
329
+}
330
+
331
+static bool mve_eci_check(DisasContext *s)
332
+{
333
+ /*
334
+ * This is a beatwise insn: check that ECI is valid (not a
335
+ * reserved value) and note that we are handling it.
336
+ * Return true if OK, false if we generated an exception.
337
+ */
338
+ s->eci_handled = true;
339
+ switch (s->eci) {
340
+ case ECI_NONE:
341
+ case ECI_A0:
342
+ case ECI_A0A1:
343
+ case ECI_A0A1A2:
344
+ case ECI_A0A1A2B0:
345
+ return true;
346
+ default:
347
+ /* Reserved value: INVSTATE UsageFault */
348
+ gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
349
+ default_exception_el(s));
350
+ return false;
351
+ }
352
+}
353
+
354
+static void mve_update_eci(DisasContext *s)
355
+{
356
+ /*
357
+ * The helper function will always update the CPUState field,
358
+ * so we only need to update the DisasContext field.
359
+ */
360
+ if (s->eci) {
361
+ s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE;
362
+ }
363
+}
364
+
365
+static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn)
366
+{
367
+ TCGv_i32 addr;
368
+ uint32_t offset;
369
+ TCGv_ptr qreg;
370
+
371
+ if (!dc_isar_feature(aa32_mve, s) ||
372
+ !mve_check_qreg_bank(s, a->qd) ||
373
+ !fn) {
374
+ return false;
375
+ }
376
+
377
+ /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
378
+ if (a->rn == 15 || (a->rn == 13 && a->w)) {
379
+ return false;
380
+ }
381
+
382
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
383
+ return true;
384
+ }
385
+
386
+ offset = a->imm << a->size;
387
+ if (!a->a) {
388
+ offset = -offset;
389
+ }
390
+ addr = load_reg(s, a->rn);
391
+ if (a->p) {
392
+ tcg_gen_addi_i32(addr, addr, offset);
393
+ }
394
+
395
+ qreg = mve_qreg_ptr(a->qd);
396
+ fn(cpu_env, qreg, addr);
397
+ tcg_temp_free_ptr(qreg);
398
+
399
+ /*
400
+ * Writeback always happens after the last beat of the insn,
401
+ * regardless of predication
402
+ */
403
+ if (a->w) {
404
+ if (!a->p) {
405
+ tcg_gen_addi_i32(addr, addr, offset);
406
+ }
407
+ store_reg(s, a->rn, addr);
408
+ } else {
409
+ tcg_temp_free_i32(addr);
410
+ }
411
+ mve_update_eci(s);
412
+ return true;
413
+}
414
+
415
+static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
416
+{
417
+ static MVEGenLdStFn * const ldstfns[4][2] = {
418
+ { gen_helper_mve_vstrb, gen_helper_mve_vldrb },
419
+ { gen_helper_mve_vstrh, gen_helper_mve_vldrh },
420
+ { gen_helper_mve_vstrw, gen_helper_mve_vldrw },
421
+ { NULL, NULL }
422
+ };
423
+ return do_ldst(s, a, ldstfns[a->size][a->l]);
424
+}
425
diff --git a/target/arm/meson.build b/target/arm/meson.build
426
index XXXXXXX..XXXXXXX 100644
321
index XXXXXXX..XXXXXXX 100644
427
--- a/target/arm/meson.build
322
--- a/hw/arm/meson.build
428
+++ b/target/arm/meson.build
323
+++ b/hw/arm/meson.build
429
@@ -XXX,XX +XXX,XX @@ arm_ss.add(files(
324
@@ -XXX,XX +XXX,XX @@ arm_ss.add(when: 'CONFIG_HIGHBANK', if_true: files('highbank.c'))
430
'helper.c',
325
arm_ss.add(when: 'CONFIG_INTEGRATOR', if_true: files('integratorcp.c'))
431
'iwmmxt_helper.c',
326
arm_ss.add(when: 'CONFIG_MAINSTONE', if_true: files('mainstone.c'))
432
'm_helper.c',
327
arm_ss.add(when: 'CONFIG_MICROBIT', if_true: files('microbit.c'))
433
+ 'mve_helper.c',
328
+arm_ss.add(when: 'CONFIG_MPS3R', if_true: files('mps3r.c'))
434
'neon_helper.c',
329
arm_ss.add(when: 'CONFIG_MUSICPAL', if_true: files('musicpal.c'))
435
'op_helper.c',
330
arm_ss.add(when: 'CONFIG_NETDUINOPLUS2', if_true: files('netduinoplus2.c'))
436
'tlb_helper.c',
331
arm_ss.add(when: 'CONFIG_OLIMEX_STM32_H405', if_true: files('olimex-stm32-h405.c'))
437
--
332
--
438
2.20.1
333
2.34.1
439
334
440
335
diff view generated by jsdifflib
1
The M-profile architecture requires that accesses to FPCXT_NS when
1
Create the CPUs, the GIC, and the per-CPU RAM block for
2
there is no active FP state must not take a NOCP fault even if the
2
the mps3-an536 board.
3
FPU is disabled. We were not implementing this correctly, because
4
in our decode we catch the NOCP faults early in m-nocp.decode.
5
3
6
Fix this bug by moving all the handling of M-profile FP system
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
register accesses from vfp.decode into m-nocp.decode and putting
5
Message-id: 20240206132931.38376-10-peter.maydell@linaro.org
8
it above the NOCP blocks. This provides the correct behaviour:
6
---
9
* for accesses other than FPCXT_NS the trans functions call
7
hw/arm/mps3r.c | 180 ++++++++++++++++++++++++++++++++++++++++++++++++-
10
vfp_access_check(), which will check for FPU disabled and
8
1 file changed, 177 insertions(+), 3 deletions(-)
11
raise a NOCP exception if necessary
12
* for FPCXT_NS we have the special case code that doesn't
13
call vfp_access_check()
14
* when these trans functions want to raise an UNDEF they return
15
false, so the decoder will fall through into the NOCP blocks.
16
This means that NOCP correctly takes precedence over UNDEF
17
for these insns. (This is a difference from the other insns
18
handled by m-nocp.decode, where UNDEF takes precedence and
19
which we implement by having those trans functions call
20
unallocated_encoding() in the appropriate places.)
21
9
22
[Note for backport to stable: this commit has a semantic dependency
10
diff --git a/hw/arm/mps3r.c b/hw/arm/mps3r.c
23
on commit 9a486856e9173af, which was not marked as cc-stable because
24
we didn't know we'd need it for a for-stable bugfix.]
25
26
Cc: qemu-stable@nongnu.org
27
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
28
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
29
Message-id: 20210618141019.10671-4-peter.maydell@linaro.org
30
---
31
target/arm/translate-a32.h | 1 +
32
target/arm/m-nocp.decode | 24 ++
33
target/arm/vfp.decode | 14 -
34
target/arm/translate-m-nocp.c | 514 +++++++++++++++++++++++++++++++++
35
target/arm/translate-vfp.c | 517 +---------------------------------
36
5 files changed, 542 insertions(+), 528 deletions(-)
37
38
diff --git a/target/arm/translate-a32.h b/target/arm/translate-a32.h
39
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/translate-a32.h
12
--- a/hw/arm/mps3r.c
41
+++ b/target/arm/translate-a32.h
13
+++ b/hw/arm/mps3r.c
42
@@ -XXX,XX +XXX,XX @@ bool disas_neon_shared(DisasContext *s, uint32_t insn);
43
void load_reg_var(DisasContext *s, TCGv_i32 var, int reg);
44
void arm_gen_condlabel(DisasContext *s);
45
bool vfp_access_check(DisasContext *s);
46
+void gen_preserve_fp_state(DisasContext *s);
47
void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop);
48
void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop);
49
void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop);
50
diff --git a/target/arm/m-nocp.decode b/target/arm/m-nocp.decode
51
index XXXXXXX..XXXXXXX 100644
52
--- a/target/arm/m-nocp.decode
53
+++ b/target/arm/m-nocp.decode
54
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@
55
56
&nocp cp
57
58
+# M-profile VLDR/VSTR to sysreg
59
+%vldr_sysreg 22:1 13:3
60
+%imm7_0x4 0:7 !function=times_4
61
+
62
+&vldr_sysreg rn reg imm a w p
63
+@vldr_sysreg .... ... . a:1 . . . rn:4 ... . ... .. ....... \
64
+ reg=%vldr_sysreg imm=%imm7_0x4 &vldr_sysreg
65
+
66
{
67
# Special cases which do not take an early NOCP: VLLDM and VLSTM
68
VLLDM_VLSTM 1110 1100 001 l:1 rn:4 0000 1010 op:1 000 0000
69
@@ -XXX,XX +XXX,XX @@
70
VSCCLRM 1110 1100 1.01 1111 .... 1011 imm:7 0 vd=%vd_dp size=3
71
VSCCLRM 1110 1100 1.01 1111 .... 1010 imm:8 vd=%vd_sp size=2
72
73
+ # FP system register accesses: these are a special case because accesses
74
+ # to FPCXT_NS succeed even if the FPU is disabled. We therefore need
75
+ # to handle them before the big NOCP blocks. Note that within these
76
+ # insns NOCP still has higher priority than UNDEFs; this is implemented
77
+ # by their returning 'false' for UNDEF so as to fall through into the
78
+ # NOCP check (in contrast to VLLDM etc, which call unallocated_encoding()
79
+ # for the UNDEFs there that must take precedence over NOCP.)
80
+
81
+ VMSR_VMRS ---- 1110 111 l:1 reg:4 rt:4 1010 0001 0000
82
+
83
+ # P=0 W=0 is SEE "Related encodings", so split into two patterns
84
+ VLDR_sysreg ---- 110 1 . . w:1 1 .... ... 0 111 11 ....... @vldr_sysreg p=1
85
+ VLDR_sysreg ---- 110 0 . . 1 1 .... ... 0 111 11 ....... @vldr_sysreg p=0 w=1
86
+ VSTR_sysreg ---- 110 1 . . w:1 0 .... ... 0 111 11 ....... @vldr_sysreg p=1
87
+ VSTR_sysreg ---- 110 0 . . 1 0 .... ... 0 111 11 ....... @vldr_sysreg p=0 w=1
88
+
89
NOCP 111- 1110 ---- ---- ---- cp:4 ---- ---- &nocp
90
NOCP 111- 110- ---- ---- ---- cp:4 ---- ---- &nocp
91
# From v8.1M onwards this range will also NOCP:
92
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
93
index XXXXXXX..XXXXXXX 100644
94
--- a/target/arm/vfp.decode
95
+++ b/target/arm/vfp.decode
96
@@ -XXX,XX +XXX,XX @@ VLDR_VSTR_hp ---- 1101 u:1 .0 l:1 rn:4 .... 1001 imm:8 vd=%vd_sp
97
VLDR_VSTR_sp ---- 1101 u:1 .0 l:1 rn:4 .... 1010 imm:8 vd=%vd_sp
98
VLDR_VSTR_dp ---- 1101 u:1 .0 l:1 rn:4 .... 1011 imm:8 vd=%vd_dp
99
100
-# M-profile VLDR/VSTR to sysreg
101
-%vldr_sysreg 22:1 13:3
102
-%imm7_0x4 0:7 !function=times_4
103
-
104
-&vldr_sysreg rn reg imm a w p
105
-@vldr_sysreg .... ... . a:1 . . . rn:4 ... . ... .. ....... \
106
- reg=%vldr_sysreg imm=%imm7_0x4 &vldr_sysreg
107
-
108
-# P=0 W=0 is SEE "Related encodings", so split into two patterns
109
-VLDR_sysreg ---- 110 1 . . w:1 1 .... ... 0 111 11 ....... @vldr_sysreg p=1
110
-VLDR_sysreg ---- 110 0 . . 1 1 .... ... 0 111 11 ....... @vldr_sysreg p=0 w=1
111
-VSTR_sysreg ---- 110 1 . . w:1 0 .... ... 0 111 11 ....... @vldr_sysreg p=1
112
-VSTR_sysreg ---- 110 0 . . 1 0 .... ... 0 111 11 ....... @vldr_sysreg p=0 w=1
113
-
114
# We split the load/store multiple up into two patterns to avoid
115
# overlap with other insns in the "Advanced SIMD load/store and 64-bit move"
116
# grouping:
117
diff --git a/target/arm/translate-m-nocp.c b/target/arm/translate-m-nocp.c
118
index XXXXXXX..XXXXXXX 100644
119
--- a/target/arm/translate-m-nocp.c
120
+++ b/target/arm/translate-m-nocp.c
121
@@ -XXX,XX +XXX,XX @@
122
123
#include "qemu/osdep.h"
15
#include "qemu/osdep.h"
124
#include "tcg/tcg-op.h"
16
#include "qemu/units.h"
125
+#include "tcg/tcg-op-gvec.h"
17
#include "qapi/error.h"
126
#include "translate.h"
18
+#include "qapi/qmp/qlist.h"
127
#include "translate-a32.h"
19
#include "exec/address-spaces.h"
128
20
#include "cpu.h"
129
@@ -XXX,XX +XXX,XX @@ static bool trans_VSCCLRM(DisasContext *s, arg_VSCCLRM *a)
21
#include "hw/boards.h"
130
return true;
22
+#include "hw/qdev-properties.h"
23
#include "hw/arm/boot.h"
24
+#include "hw/arm/bsa.h"
25
+#include "hw/intc/arm_gicv3.h"
26
27
/* Define the layout of RAM and ROM in a board */
28
typedef struct RAMInfo {
29
@@ -XXX,XX +XXX,XX @@ typedef struct RAMInfo {
30
#define IS_ROM 2
31
32
#define MPS3R_RAM_MAX 9
33
+#define MPS3R_CPU_MAX 2
34
+
35
+#define PERIPHBASE 0xf0000000
36
+#define NUM_SPIS 96
37
38
typedef enum MPS3RFPGAType {
39
FPGA_AN536,
40
@@ -XXX,XX +XXX,XX @@ struct MPS3RMachineClass {
41
MachineClass parent;
42
MPS3RFPGAType fpga_type;
43
const RAMInfo *raminfo;
44
+ hwaddr loader_start;
45
};
46
47
struct MPS3RMachineState {
48
MachineState parent;
49
+ struct arm_boot_info bootinfo;
50
MemoryRegion ram[MPS3R_RAM_MAX];
51
+ Object *cpu[MPS3R_CPU_MAX];
52
+ MemoryRegion cpu_sysmem[MPS3R_CPU_MAX];
53
+ MemoryRegion sysmem_alias[MPS3R_CPU_MAX];
54
+ MemoryRegion cpu_ram[MPS3R_CPU_MAX];
55
+ GICv3State gic;
56
};
57
58
#define TYPE_MPS3R_MACHINE "mps3r"
59
@@ -XXX,XX +XXX,XX @@ static MemoryRegion *mr_for_raminfo(MPS3RMachineState *mms,
60
return ram;
131
}
61
}
132
62
133
+/*
63
+/*
134
+ * M-profile provides two different sets of instructions that can
64
+ * There is no defined secondary boot protocol for Linux for the AN536,
135
+ * access floating point system registers: VMSR/VMRS (which move
65
+ * because real hardware has a restriction that atomic operations between
136
+ * to/from a general purpose register) and VLDR/VSTR sysreg (which
66
+ * the two CPUs do not function correctly, and so true SMP is not
137
+ * move directly to/from memory). In some cases there are also side
67
+ * possible. Therefore for cases where the user is directly booting
138
+ * effects which must happen after any write to memory (which could
68
+ * a kernel, we treat the system as essentially uniprocessor, and
139
+ * cause an exception). So we implement the common logic for the
69
+ * put the secondary CPU into power-off state (as if the user on the
140
+ * sysreg access in gen_M_fp_sysreg_write() and gen_M_fp_sysreg_read(),
70
+ * real hardware had configured the secondary to be halted via the
141
+ * which take pointers to callback functions which will perform the
71
+ * SCC config registers).
142
+ * actual "read/write general purpose register" and "read/write
72
+ *
143
+ * memory" operations.
73
+ * Note that the default secondary boot code would not work here anyway
74
+ * as it assumes a GICv2, and we have a GICv3.
144
+ */
75
+ */
145
+
76
+static void mps3r_write_secondary_boot(ARMCPU *cpu,
146
+/*
77
+ const struct arm_boot_info *info)
147
+ * Emit code to store the sysreg to its final destination; frees the
148
+ * TCG temp 'value' it is passed.
149
+ */
150
+typedef void fp_sysreg_storefn(DisasContext *s, void *opaque, TCGv_i32 value);
151
+/*
152
+ * Emit code to load the value to be copied to the sysreg; returns
153
+ * a new TCG temporary
154
+ */
155
+typedef TCGv_i32 fp_sysreg_loadfn(DisasContext *s, void *opaque);
156
+
157
+/* Common decode/access checks for fp sysreg read/write */
158
+typedef enum FPSysRegCheckResult {
159
+ FPSysRegCheckFailed, /* caller should return false */
160
+ FPSysRegCheckDone, /* caller should return true */
161
+ FPSysRegCheckContinue, /* caller should continue generating code */
162
+} FPSysRegCheckResult;
163
+
164
+static FPSysRegCheckResult fp_sysreg_checks(DisasContext *s, int regno)
165
+{
166
+ if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
167
+ return FPSysRegCheckFailed;
168
+ }
169
+
170
+ switch (regno) {
171
+ case ARM_VFP_FPSCR:
172
+ case QEMU_VFP_FPSCR_NZCV:
173
+ break;
174
+ case ARM_VFP_FPSCR_NZCVQC:
175
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
176
+ return FPSysRegCheckFailed;
177
+ }
178
+ break;
179
+ case ARM_VFP_FPCXT_S:
180
+ case ARM_VFP_FPCXT_NS:
181
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
182
+ return FPSysRegCheckFailed;
183
+ }
184
+ if (!s->v8m_secure) {
185
+ return FPSysRegCheckFailed;
186
+ }
187
+ break;
188
+ case ARM_VFP_VPR:
189
+ case ARM_VFP_P0:
190
+ if (!dc_isar_feature(aa32_mve, s)) {
191
+ return FPSysRegCheckFailed;
192
+ }
193
+ break;
194
+ default:
195
+ return FPSysRegCheckFailed;
196
+ }
197
+
198
+ /*
199
+ * FPCXT_NS is a special case: it has specific handling for
200
+ * "current FP state is inactive", and must do the PreserveFPState()
201
+ * but not the usual full set of actions done by ExecuteFPCheck().
202
+ * So we don't call vfp_access_check() and the callers must handle this.
203
+ */
204
+ if (regno != ARM_VFP_FPCXT_NS && !vfp_access_check(s)) {
205
+ return FPSysRegCheckDone;
206
+ }
207
+ return FPSysRegCheckContinue;
208
+}
209
+
210
+static void gen_branch_fpInactive(DisasContext *s, TCGCond cond,
211
+ TCGLabel *label)
212
+{
78
+{
213
+ /*
79
+ /*
214
+ * FPCXT_NS is a special case: it has specific handling for
80
+ * Power the secondary CPU off. This means we don't need to write any
215
+ * "current FP state is inactive", and must do the PreserveFPState()
81
+ * boot code into guest memory. Note that the 'cpu' argument to this
216
+ * but not the usual full set of actions done by ExecuteFPCheck().
82
+ * function is the primary CPU we passed to arm_load_kernel(), not
217
+ * We don't have a TB flag that matches the fpInactive check, so we
83
+ * the secondary. Loop around all the other CPUs, as the boot.c
218
+ * do it at runtime as we don't expect FPCXT_NS accesses to be frequent.
84
+ * code does for the "disable secondaries if PSCI is enabled" case.
219
+ *
220
+ * Emit code that checks fpInactive and does a conditional
221
+ * branch to label based on it:
222
+ * if cond is TCG_COND_NE then branch if fpInactive != 0 (ie if inactive)
223
+ * if cond is TCG_COND_EQ then branch if fpInactive == 0 (ie if active)
224
+ */
85
+ */
225
+ assert(cond == TCG_COND_EQ || cond == TCG_COND_NE);
86
+ for (CPUState *cs = first_cpu; cs; cs = CPU_NEXT(cs)) {
226
+
87
+ if (cs != first_cpu) {
227
+ /* fpInactive = FPCCR_NS.ASPEN == 1 && CONTROL.FPCA == 0 */
88
+ object_property_set_bool(OBJECT(cs), "start-powered-off", true,
228
+ TCGv_i32 aspen, fpca;
89
+ &error_abort);
229
+ aspen = load_cpu_field(v7m.fpccr[M_REG_NS]);
230
+ fpca = load_cpu_field(v7m.control[M_REG_S]);
231
+ tcg_gen_andi_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
232
+ tcg_gen_xori_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
233
+ tcg_gen_andi_i32(fpca, fpca, R_V7M_CONTROL_FPCA_MASK);
234
+ tcg_gen_or_i32(fpca, fpca, aspen);
235
+ tcg_gen_brcondi_i32(tcg_invert_cond(cond), fpca, 0, label);
236
+ tcg_temp_free_i32(aspen);
237
+ tcg_temp_free_i32(fpca);
238
+}
239
+
240
+static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
241
+ fp_sysreg_loadfn *loadfn,
242
+ void *opaque)
243
+{
244
+ /* Do a write to an M-profile floating point system register */
245
+ TCGv_i32 tmp;
246
+ TCGLabel *lab_end = NULL;
247
+
248
+ switch (fp_sysreg_checks(s, regno)) {
249
+ case FPSysRegCheckFailed:
250
+ return false;
251
+ case FPSysRegCheckDone:
252
+ return true;
253
+ case FPSysRegCheckContinue:
254
+ break;
255
+ }
256
+
257
+ switch (regno) {
258
+ case ARM_VFP_FPSCR:
259
+ tmp = loadfn(s, opaque);
260
+ gen_helper_vfp_set_fpscr(cpu_env, tmp);
261
+ tcg_temp_free_i32(tmp);
262
+ gen_lookup_tb(s);
263
+ break;
264
+ case ARM_VFP_FPSCR_NZCVQC:
265
+ {
266
+ TCGv_i32 fpscr;
267
+ tmp = loadfn(s, opaque);
268
+ if (dc_isar_feature(aa32_mve, s)) {
269
+ /* QC is only present for MVE; otherwise RES0 */
270
+ TCGv_i32 qc = tcg_temp_new_i32();
271
+ tcg_gen_andi_i32(qc, tmp, FPCR_QC);
272
+ /*
273
+ * The 4 vfp.qc[] fields need only be "zero" vs "non-zero";
274
+ * here writing the same value into all elements is simplest.
275
+ */
276
+ tcg_gen_gvec_dup_i32(MO_32, offsetof(CPUARMState, vfp.qc),
277
+ 16, 16, qc);
278
+ }
90
+ }
279
+ tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
280
+ fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
281
+ tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK);
282
+ tcg_gen_or_i32(fpscr, fpscr, tmp);
283
+ store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]);
284
+ tcg_temp_free_i32(tmp);
285
+ break;
286
+ }
287
+ case ARM_VFP_FPCXT_NS:
288
+ lab_end = gen_new_label();
289
+ /* fpInactive case: write is a NOP, so branch to end */
290
+ gen_branch_fpInactive(s, TCG_COND_NE, lab_end);
291
+ /*
292
+ * !fpInactive: if FPU disabled, take NOCP exception;
293
+ * otherwise PreserveFPState(), and then FPCXT_NS writes
294
+ * behave the same as FPCXT_S writes.
295
+ */
296
+ if (s->fp_excp_el) {
297
+ gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
298
+ syn_uncategorized(), s->fp_excp_el);
299
+ /*
300
+ * This was only a conditional exception, so override
301
+ * gen_exception_insn()'s default to DISAS_NORETURN
302
+ */
303
+ s->base.is_jmp = DISAS_NEXT;
304
+ break;
305
+ }
306
+ gen_preserve_fp_state(s);
307
+ /* fall through */
308
+ case ARM_VFP_FPCXT_S:
309
+ {
310
+ TCGv_i32 sfpa, control;
311
+ /*
312
+ * Set FPSCR and CONTROL.SFPA from value; the new FPSCR takes
313
+ * bits [27:0] from value and zeroes bits [31:28].
314
+ */
315
+ tmp = loadfn(s, opaque);
316
+ sfpa = tcg_temp_new_i32();
317
+ tcg_gen_shri_i32(sfpa, tmp, 31);
318
+ control = load_cpu_field(v7m.control[M_REG_S]);
319
+ tcg_gen_deposit_i32(control, control, sfpa,
320
+ R_V7M_CONTROL_SFPA_SHIFT, 1);
321
+ store_cpu_field(control, v7m.control[M_REG_S]);
322
+ tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
323
+ gen_helper_vfp_set_fpscr(cpu_env, tmp);
324
+ tcg_temp_free_i32(tmp);
325
+ tcg_temp_free_i32(sfpa);
326
+ break;
327
+ }
328
+ case ARM_VFP_VPR:
329
+ /* Behaves as NOP if not privileged */
330
+ if (IS_USER(s)) {
331
+ break;
332
+ }
333
+ tmp = loadfn(s, opaque);
334
+ store_cpu_field(tmp, v7m.vpr);
335
+ break;
336
+ case ARM_VFP_P0:
337
+ {
338
+ TCGv_i32 vpr;
339
+ tmp = loadfn(s, opaque);
340
+ vpr = load_cpu_field(v7m.vpr);
341
+ tcg_gen_deposit_i32(vpr, vpr, tmp,
342
+ R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH);
343
+ store_cpu_field(vpr, v7m.vpr);
344
+ tcg_temp_free_i32(tmp);
345
+ break;
346
+ }
347
+ default:
348
+ g_assert_not_reached();
349
+ }
350
+ if (lab_end) {
351
+ gen_set_label(lab_end);
352
+ }
353
+ return true;
354
+}
355
+
356
+static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
357
+ fp_sysreg_storefn *storefn,
358
+ void *opaque)
359
+{
360
+ /* Do a read from an M-profile floating point system register */
361
+ TCGv_i32 tmp;
362
+ TCGLabel *lab_end = NULL;
363
+ bool lookup_tb = false;
364
+
365
+ switch (fp_sysreg_checks(s, regno)) {
366
+ case FPSysRegCheckFailed:
367
+ return false;
368
+ case FPSysRegCheckDone:
369
+ return true;
370
+ case FPSysRegCheckContinue:
371
+ break;
372
+ }
373
+
374
+ if (regno == ARM_VFP_FPSCR_NZCVQC && !dc_isar_feature(aa32_mve, s)) {
375
+ /* QC is RES0 without MVE, so NZCVQC simplifies to NZCV */
376
+ regno = QEMU_VFP_FPSCR_NZCV;
377
+ }
378
+
379
+ switch (regno) {
380
+ case ARM_VFP_FPSCR:
381
+ tmp = tcg_temp_new_i32();
382
+ gen_helper_vfp_get_fpscr(tmp, cpu_env);
383
+ storefn(s, opaque, tmp);
384
+ break;
385
+ case ARM_VFP_FPSCR_NZCVQC:
386
+ tmp = tcg_temp_new_i32();
387
+ gen_helper_vfp_get_fpscr(tmp, cpu_env);
388
+ tcg_gen_andi_i32(tmp, tmp, FPCR_NZCVQC_MASK);
389
+ storefn(s, opaque, tmp);
390
+ break;
391
+ case QEMU_VFP_FPSCR_NZCV:
392
+ /*
393
+ * Read just NZCV; this is a special case to avoid the
394
+ * helper call for the "VMRS to CPSR.NZCV" insn.
395
+ */
396
+ tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
397
+ tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
398
+ storefn(s, opaque, tmp);
399
+ break;
400
+ case ARM_VFP_FPCXT_S:
401
+ {
402
+ TCGv_i32 control, sfpa, fpscr;
403
+ /* Bits [27:0] from FPSCR, bit [31] from CONTROL.SFPA */
404
+ tmp = tcg_temp_new_i32();
405
+ sfpa = tcg_temp_new_i32();
406
+ gen_helper_vfp_get_fpscr(tmp, cpu_env);
407
+ tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
408
+ control = load_cpu_field(v7m.control[M_REG_S]);
409
+ tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
410
+ tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
411
+ tcg_gen_or_i32(tmp, tmp, sfpa);
412
+ tcg_temp_free_i32(sfpa);
413
+ /*
414
+ * Store result before updating FPSCR etc, in case
415
+ * it is a memory write which causes an exception.
416
+ */
417
+ storefn(s, opaque, tmp);
418
+ /*
419
+ * Now we must reset FPSCR from FPDSCR_NS, and clear
420
+ * CONTROL.SFPA; so we'll end the TB here.
421
+ */
422
+ tcg_gen_andi_i32(control, control, ~R_V7M_CONTROL_SFPA_MASK);
423
+ store_cpu_field(control, v7m.control[M_REG_S]);
424
+ fpscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
425
+ gen_helper_vfp_set_fpscr(cpu_env, fpscr);
426
+ tcg_temp_free_i32(fpscr);
427
+ lookup_tb = true;
428
+ break;
429
+ }
430
+ case ARM_VFP_FPCXT_NS:
431
+ {
432
+ TCGv_i32 control, sfpa, fpscr, fpdscr, zero;
433
+ TCGLabel *lab_active = gen_new_label();
434
+
435
+ lookup_tb = true;
436
+
437
+ gen_branch_fpInactive(s, TCG_COND_EQ, lab_active);
438
+ /* fpInactive case: reads as FPDSCR_NS */
439
+ TCGv_i32 tmp = load_cpu_field(v7m.fpdscr[M_REG_NS]);
440
+ storefn(s, opaque, tmp);
441
+ lab_end = gen_new_label();
442
+ tcg_gen_br(lab_end);
443
+
444
+ gen_set_label(lab_active);
445
+ /*
446
+ * !fpInactive: if FPU disabled, take NOCP exception;
447
+ * otherwise PreserveFPState(), and then FPCXT_NS
448
+ * reads the same as FPCXT_S.
449
+ */
450
+ if (s->fp_excp_el) {
451
+ gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
452
+ syn_uncategorized(), s->fp_excp_el);
453
+ /*
454
+ * This was only a conditional exception, so override
455
+ * gen_exception_insn()'s default to DISAS_NORETURN
456
+ */
457
+ s->base.is_jmp = DISAS_NEXT;
458
+ break;
459
+ }
460
+ gen_preserve_fp_state(s);
461
+ tmp = tcg_temp_new_i32();
462
+ sfpa = tcg_temp_new_i32();
463
+ fpscr = tcg_temp_new_i32();
464
+ gen_helper_vfp_get_fpscr(fpscr, cpu_env);
465
+ tcg_gen_andi_i32(tmp, fpscr, ~FPCR_NZCV_MASK);
466
+ control = load_cpu_field(v7m.control[M_REG_S]);
467
+ tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
468
+ tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
469
+ tcg_gen_or_i32(tmp, tmp, sfpa);
470
+ tcg_temp_free_i32(control);
471
+ /* Store result before updating FPSCR, in case it faults */
472
+ storefn(s, opaque, tmp);
473
+ /* If SFPA is zero then set FPSCR from FPDSCR_NS */
474
+ fpdscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
475
+ zero = tcg_const_i32(0);
476
+ tcg_gen_movcond_i32(TCG_COND_EQ, fpscr, sfpa, zero, fpdscr, fpscr);
477
+ gen_helper_vfp_set_fpscr(cpu_env, fpscr);
478
+ tcg_temp_free_i32(zero);
479
+ tcg_temp_free_i32(sfpa);
480
+ tcg_temp_free_i32(fpdscr);
481
+ tcg_temp_free_i32(fpscr);
482
+ break;
483
+ }
484
+ case ARM_VFP_VPR:
485
+ /* Behaves as NOP if not privileged */
486
+ if (IS_USER(s)) {
487
+ break;
488
+ }
489
+ tmp = load_cpu_field(v7m.vpr);
490
+ storefn(s, opaque, tmp);
491
+ break;
492
+ case ARM_VFP_P0:
493
+ tmp = load_cpu_field(v7m.vpr);
494
+ tcg_gen_extract_i32(tmp, tmp, R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH);
495
+ storefn(s, opaque, tmp);
496
+ break;
497
+ default:
498
+ g_assert_not_reached();
499
+ }
500
+
501
+ if (lab_end) {
502
+ gen_set_label(lab_end);
503
+ }
504
+ if (lookup_tb) {
505
+ gen_lookup_tb(s);
506
+ }
507
+ return true;
508
+}
509
+
510
+static void fp_sysreg_to_gpr(DisasContext *s, void *opaque, TCGv_i32 value)
511
+{
512
+ arg_VMSR_VMRS *a = opaque;
513
+
514
+ if (a->rt == 15) {
515
+ /* Set the 4 flag bits in the CPSR */
516
+ gen_set_nzcv(value);
517
+ tcg_temp_free_i32(value);
518
+ } else {
519
+ store_reg(s, a->rt, value);
520
+ }
91
+ }
521
+}
92
+}
522
+
93
+
523
+static TCGv_i32 gpr_to_fp_sysreg(DisasContext *s, void *opaque)
94
+static void mps3r_secondary_cpu_reset(ARMCPU *cpu,
95
+ const struct arm_boot_info *info)
524
+{
96
+{
525
+ arg_VMSR_VMRS *a = opaque;
97
+ /* We don't need to do anything here because the CPU will be off */
526
+
527
+ return load_reg(s, a->rt);
528
+}
98
+}
529
+
99
+
530
+static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
100
+static void create_gic(MPS3RMachineState *mms, MemoryRegion *sysmem)
531
+{
101
+{
102
+ MachineState *machine = MACHINE(mms);
103
+ DeviceState *gicdev;
104
+ QList *redist_region_count;
105
+
106
+ object_initialize_child(OBJECT(mms), "gic", &mms->gic, TYPE_ARM_GICV3);
107
+ gicdev = DEVICE(&mms->gic);
108
+ qdev_prop_set_uint32(gicdev, "num-cpu", machine->smp.cpus);
109
+ qdev_prop_set_uint32(gicdev, "num-irq", NUM_SPIS + GIC_INTERNAL);
110
+ redist_region_count = qlist_new();
111
+ qlist_append_int(redist_region_count, machine->smp.cpus);
112
+ qdev_prop_set_array(gicdev, "redist-region-count", redist_region_count);
113
+ object_property_set_link(OBJECT(&mms->gic), "sysmem",
114
+ OBJECT(sysmem), &error_fatal);
115
+ sysbus_realize(SYS_BUS_DEVICE(&mms->gic), &error_fatal);
116
+ sysbus_mmio_map(SYS_BUS_DEVICE(&mms->gic), 0, PERIPHBASE);
117
+ sysbus_mmio_map(SYS_BUS_DEVICE(&mms->gic), 1, PERIPHBASE + 0x100000);
532
+ /*
118
+ /*
533
+ * Accesses to R15 are UNPREDICTABLE; we choose to undef.
119
+ * Wire the outputs from each CPU's generic timer and the GICv3
534
+ * FPSCR -> r15 is a special case which writes to the PSR flags;
120
+ * maintenance interrupt signal to the appropriate GIC PPI inputs,
535
+ * set a->reg to a special value to tell gen_M_fp_sysreg_read()
121
+ * and the GIC's IRQ/FIQ/VIRQ/VFIQ interrupt outputs to the CPU's inputs.
536
+ * we only care about the top 4 bits of FPSCR there.
537
+ */
122
+ */
538
+ if (a->rt == 15) {
123
+ for (int i = 0; i < machine->smp.cpus; i++) {
539
+ if (a->l && a->reg == ARM_VFP_FPSCR) {
124
+ DeviceState *cpudev = DEVICE(mms->cpu[i]);
540
+ a->reg = QEMU_VFP_FPSCR_NZCV;
125
+ SysBusDevice *gicsbd = SYS_BUS_DEVICE(&mms->gic);
541
+ } else {
126
+ int intidbase = NUM_SPIS + i * GIC_INTERNAL;
542
+ return false;
127
+ int irq;
128
+ /*
129
+ * Mapping from the output timer irq lines from the CPU to the
130
+ * GIC PPI inputs used for this board. This isn't a BSA board,
131
+ * but it uses the standard convention for the PPI numbers.
132
+ */
133
+ const int timer_irq[] = {
134
+ [GTIMER_PHYS] = ARCH_TIMER_NS_EL1_IRQ,
135
+ [GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ,
136
+ [GTIMER_HYP] = ARCH_TIMER_NS_EL2_IRQ,
137
+ };
138
+
139
+ for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) {
140
+ qdev_connect_gpio_out(cpudev, irq,
141
+ qdev_get_gpio_in(gicdev,
142
+ intidbase + timer_irq[irq]));
543
+ }
143
+ }
544
+ }
144
+
545
+
145
+ qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", 0,
546
+ if (a->l) {
146
+ qdev_get_gpio_in(gicdev,
547
+ /* VMRS, move FP system register to gp register */
147
+ intidbase + ARCH_GIC_MAINT_IRQ));
548
+ return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_gpr, a);
148
+
549
+ } else {
149
+ qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0,
550
+ /* VMSR, move gp register to FP system register */
150
+ qdev_get_gpio_in(gicdev,
551
+ return gen_M_fp_sysreg_write(s, a->reg, gpr_to_fp_sysreg, a);
151
+ intidbase + VIRTUAL_PMU_IRQ));
152
+
153
+ sysbus_connect_irq(gicsbd, i,
154
+ qdev_get_gpio_in(cpudev, ARM_CPU_IRQ));
155
+ sysbus_connect_irq(gicsbd, i + machine->smp.cpus,
156
+ qdev_get_gpio_in(cpudev, ARM_CPU_FIQ));
157
+ sysbus_connect_irq(gicsbd, i + 2 * machine->smp.cpus,
158
+ qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ));
159
+ sysbus_connect_irq(gicsbd, i + 3 * machine->smp.cpus,
160
+ qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ));
552
+ }
161
+ }
553
+}
162
+}
554
+
163
+
555
+static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value)
164
static void mps3r_common_init(MachineState *machine)
556
+{
165
{
557
+ arg_vldr_sysreg *a = opaque;
166
MPS3RMachineState *mms = MPS3R_MACHINE(machine);
558
+ uint32_t offset = a->imm;
167
@@ -XXX,XX +XXX,XX @@ static void mps3r_common_init(MachineState *machine)
559
+ TCGv_i32 addr;
168
MemoryRegion *mr = mr_for_raminfo(mms, ri);
560
+
169
memory_region_add_subregion(sysmem, ri->base, mr);
561
+ if (!a->a) {
170
}
562
+ offset = -offset;
171
+
172
+ assert(machine->smp.cpus <= MPS3R_CPU_MAX);
173
+ for (int i = 0; i < machine->smp.cpus; i++) {
174
+ g_autofree char *sysmem_name = g_strdup_printf("cpu-%d-memory", i);
175
+ g_autofree char *ramname = g_strdup_printf("cpu-%d-memory", i);
176
+ g_autofree char *alias_name = g_strdup_printf("sysmem-alias-%d", i);
177
+
178
+ /*
179
+ * Each CPU has some private RAM/peripherals, so create the container
180
+ * which will house those, with the whole-machine system memory being
181
+ * used where there's no CPU-specific device. Note that we need the
182
+ * sysmem_alias aliases because we can't put one MR (the original
183
+ * 'sysmem') into more than one other MR.
184
+ */
185
+ memory_region_init(&mms->cpu_sysmem[i], OBJECT(machine),
186
+ sysmem_name, UINT64_MAX);
187
+ memory_region_init_alias(&mms->sysmem_alias[i], OBJECT(machine),
188
+ alias_name, sysmem, 0, UINT64_MAX);
189
+ memory_region_add_subregion_overlap(&mms->cpu_sysmem[i], 0,
190
+ &mms->sysmem_alias[i], -1);
191
+
192
+ mms->cpu[i] = object_new(machine->cpu_type);
193
+ object_property_set_link(mms->cpu[i], "memory",
194
+ OBJECT(&mms->cpu_sysmem[i]), &error_abort);
195
+ object_property_set_int(mms->cpu[i], "reset-cbar",
196
+ PERIPHBASE, &error_abort);
197
+ qdev_realize(DEVICE(mms->cpu[i]), NULL, &error_fatal);
198
+ object_unref(mms->cpu[i]);
199
+
200
+ /* Per-CPU RAM */
201
+ memory_region_init_ram(&mms->cpu_ram[i], NULL, ramname,
202
+ 0x1000, &error_fatal);
203
+ memory_region_add_subregion(&mms->cpu_sysmem[i], 0xe7c01000,
204
+ &mms->cpu_ram[i]);
563
+ }
205
+ }
564
+
206
+
565
+ addr = load_reg(s, a->rn);
207
+ create_gic(mms, sysmem);
566
+ if (a->p) {
208
+
567
+ tcg_gen_addi_i32(addr, addr, offset);
209
+ mms->bootinfo.ram_size = machine->ram_size;
568
+ }
210
+ mms->bootinfo.board_id = -1;
569
+
211
+ mms->bootinfo.loader_start = mmc->loader_start;
570
+ if (s->v8m_stackcheck && a->rn == 13 && a->w) {
212
+ mms->bootinfo.write_secondary_boot = mps3r_write_secondary_boot;
571
+ gen_helper_v8m_stackcheck(cpu_env, addr);
213
+ mms->bootinfo.secondary_cpu_reset_hook = mps3r_secondary_cpu_reset;
572
+ }
214
+ arm_load_kernel(ARM_CPU(mms->cpu[0]), machine, &mms->bootinfo);
573
+
574
+ gen_aa32_st_i32(s, value, addr, get_mem_index(s),
575
+ MO_UL | MO_ALIGN | s->be_data);
576
+ tcg_temp_free_i32(value);
577
+
578
+ if (a->w) {
579
+ /* writeback */
580
+ if (!a->p) {
581
+ tcg_gen_addi_i32(addr, addr, offset);
582
+ }
583
+ store_reg(s, a->rn, addr);
584
+ } else {
585
+ tcg_temp_free_i32(addr);
586
+ }
587
+}
588
+
589
+static TCGv_i32 memory_to_fp_sysreg(DisasContext *s, void *opaque)
590
+{
591
+ arg_vldr_sysreg *a = opaque;
592
+ uint32_t offset = a->imm;
593
+ TCGv_i32 addr;
594
+ TCGv_i32 value = tcg_temp_new_i32();
595
+
596
+ if (!a->a) {
597
+ offset = -offset;
598
+ }
599
+
600
+ addr = load_reg(s, a->rn);
601
+ if (a->p) {
602
+ tcg_gen_addi_i32(addr, addr, offset);
603
+ }
604
+
605
+ if (s->v8m_stackcheck && a->rn == 13 && a->w) {
606
+ gen_helper_v8m_stackcheck(cpu_env, addr);
607
+ }
608
+
609
+ gen_aa32_ld_i32(s, value, addr, get_mem_index(s),
610
+ MO_UL | MO_ALIGN | s->be_data);
611
+
612
+ if (a->w) {
613
+ /* writeback */
614
+ if (!a->p) {
615
+ tcg_gen_addi_i32(addr, addr, offset);
616
+ }
617
+ store_reg(s, a->rn, addr);
618
+ } else {
619
+ tcg_temp_free_i32(addr);
620
+ }
621
+ return value;
622
+}
623
+
624
+static bool trans_VLDR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
625
+{
626
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
627
+ return false;
628
+ }
629
+ if (a->rn == 15) {
630
+ return false;
631
+ }
632
+ return gen_M_fp_sysreg_write(s, a->reg, memory_to_fp_sysreg, a);
633
+}
634
+
635
+static bool trans_VSTR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
636
+{
637
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
638
+ return false;
639
+ }
640
+ if (a->rn == 15) {
641
+ return false;
642
+ }
643
+ return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_memory, a);
644
+}
645
+
646
static bool trans_NOCP(DisasContext *s, arg_nocp *a)
647
{
648
/*
649
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
650
index XXXXXXX..XXXXXXX 100644
651
--- a/target/arm/translate-vfp.c
652
+++ b/target/arm/translate-vfp.c
653
@@ -XXX,XX +XXX,XX @@ static inline long vfp_f16_offset(unsigned reg, bool top)
654
* Generate code for M-profile lazy FP state preservation if needed;
655
* this corresponds to the pseudocode PreserveFPState() function.
656
*/
657
-static void gen_preserve_fp_state(DisasContext *s)
658
+void gen_preserve_fp_state(DisasContext *s)
659
{
660
if (s->v7m_lspact) {
661
/*
662
@@ -XXX,XX +XXX,XX @@ static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
663
return true;
664
}
215
}
665
216
666
-/*
217
static void mps3r_set_default_ram_info(MPS3RMachineClass *mmc)
667
- * M-profile provides two different sets of instructions that can
218
@@ -XXX,XX +XXX,XX @@ static void mps3r_set_default_ram_info(MPS3RMachineClass *mmc)
668
- * access floating point system registers: VMSR/VMRS (which move
219
/* Found the entry for "system memory" */
669
- * to/from a general purpose register) and VLDR/VSTR sysreg (which
220
mc->default_ram_size = p->size;
670
- * move directly to/from memory). In some cases there are also side
221
mc->default_ram_id = p->name;
671
- * effects which must happen after any write to memory (which could
222
+ mmc->loader_start = p->base;
672
- * cause an exception). So we implement the common logic for the
223
return;
673
- * sysreg access in gen_M_fp_sysreg_write() and gen_M_fp_sysreg_read(),
224
}
674
- * which take pointers to callback functions which will perform the
675
- * actual "read/write general purpose register" and "read/write
676
- * memory" operations.
677
- */
678
-
679
-/*
680
- * Emit code to store the sysreg to its final destination; frees the
681
- * TCG temp 'value' it is passed.
682
- */
683
-typedef void fp_sysreg_storefn(DisasContext *s, void *opaque, TCGv_i32 value);
684
-/*
685
- * Emit code to load the value to be copied to the sysreg; returns
686
- * a new TCG temporary
687
- */
688
-typedef TCGv_i32 fp_sysreg_loadfn(DisasContext *s, void *opaque);
689
-
690
-/* Common decode/access checks for fp sysreg read/write */
691
-typedef enum FPSysRegCheckResult {
692
- FPSysRegCheckFailed, /* caller should return false */
693
- FPSysRegCheckDone, /* caller should return true */
694
- FPSysRegCheckContinue, /* caller should continue generating code */
695
-} FPSysRegCheckResult;
696
-
697
-static FPSysRegCheckResult fp_sysreg_checks(DisasContext *s, int regno)
698
-{
699
- if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
700
- return FPSysRegCheckFailed;
701
- }
702
-
703
- switch (regno) {
704
- case ARM_VFP_FPSCR:
705
- case QEMU_VFP_FPSCR_NZCV:
706
- break;
707
- case ARM_VFP_FPSCR_NZCVQC:
708
- if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
709
- return FPSysRegCheckFailed;
710
- }
711
- break;
712
- case ARM_VFP_FPCXT_S:
713
- case ARM_VFP_FPCXT_NS:
714
- if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
715
- return FPSysRegCheckFailed;
716
- }
717
- if (!s->v8m_secure) {
718
- return FPSysRegCheckFailed;
719
- }
720
- break;
721
- case ARM_VFP_VPR:
722
- case ARM_VFP_P0:
723
- if (!dc_isar_feature(aa32_mve, s)) {
724
- return FPSysRegCheckFailed;
725
- }
726
- break;
727
- default:
728
- return FPSysRegCheckFailed;
729
- }
730
-
731
- /*
732
- * FPCXT_NS is a special case: it has specific handling for
733
- * "current FP state is inactive", and must do the PreserveFPState()
734
- * but not the usual full set of actions done by ExecuteFPCheck().
735
- * So we don't call vfp_access_check() and the callers must handle this.
736
- */
737
- if (regno != ARM_VFP_FPCXT_NS && !vfp_access_check(s)) {
738
- return FPSysRegCheckDone;
739
- }
740
- return FPSysRegCheckContinue;
741
-}
742
-
743
-static void gen_branch_fpInactive(DisasContext *s, TCGCond cond,
744
- TCGLabel *label)
745
-{
746
- /*
747
- * FPCXT_NS is a special case: it has specific handling for
748
- * "current FP state is inactive", and must do the PreserveFPState()
749
- * but not the usual full set of actions done by ExecuteFPCheck().
750
- * We don't have a TB flag that matches the fpInactive check, so we
751
- * do it at runtime as we don't expect FPCXT_NS accesses to be frequent.
752
- *
753
- * Emit code that checks fpInactive and does a conditional
754
- * branch to label based on it:
755
- * if cond is TCG_COND_NE then branch if fpInactive != 0 (ie if inactive)
756
- * if cond is TCG_COND_EQ then branch if fpInactive == 0 (ie if active)
757
- */
758
- assert(cond == TCG_COND_EQ || cond == TCG_COND_NE);
759
-
760
- /* fpInactive = FPCCR_NS.ASPEN == 1 && CONTROL.FPCA == 0 */
761
- TCGv_i32 aspen, fpca;
762
- aspen = load_cpu_field(v7m.fpccr[M_REG_NS]);
763
- fpca = load_cpu_field(v7m.control[M_REG_S]);
764
- tcg_gen_andi_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
765
- tcg_gen_xori_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
766
- tcg_gen_andi_i32(fpca, fpca, R_V7M_CONTROL_FPCA_MASK);
767
- tcg_gen_or_i32(fpca, fpca, aspen);
768
- tcg_gen_brcondi_i32(tcg_invert_cond(cond), fpca, 0, label);
769
- tcg_temp_free_i32(aspen);
770
- tcg_temp_free_i32(fpca);
771
-}
772
-
773
-static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
774
- fp_sysreg_loadfn *loadfn,
775
- void *opaque)
776
-{
777
- /* Do a write to an M-profile floating point system register */
778
- TCGv_i32 tmp;
779
- TCGLabel *lab_end = NULL;
780
-
781
- switch (fp_sysreg_checks(s, regno)) {
782
- case FPSysRegCheckFailed:
783
- return false;
784
- case FPSysRegCheckDone:
785
- return true;
786
- case FPSysRegCheckContinue:
787
- break;
788
- }
789
-
790
- switch (regno) {
791
- case ARM_VFP_FPSCR:
792
- tmp = loadfn(s, opaque);
793
- gen_helper_vfp_set_fpscr(cpu_env, tmp);
794
- tcg_temp_free_i32(tmp);
795
- gen_lookup_tb(s);
796
- break;
797
- case ARM_VFP_FPSCR_NZCVQC:
798
- {
799
- TCGv_i32 fpscr;
800
- tmp = loadfn(s, opaque);
801
- if (dc_isar_feature(aa32_mve, s)) {
802
- /* QC is only present for MVE; otherwise RES0 */
803
- TCGv_i32 qc = tcg_temp_new_i32();
804
- tcg_gen_andi_i32(qc, tmp, FPCR_QC);
805
- /*
806
- * The 4 vfp.qc[] fields need only be "zero" vs "non-zero";
807
- * here writing the same value into all elements is simplest.
808
- */
809
- tcg_gen_gvec_dup_i32(MO_32, offsetof(CPUARMState, vfp.qc),
810
- 16, 16, qc);
811
- }
812
- tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
813
- fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
814
- tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK);
815
- tcg_gen_or_i32(fpscr, fpscr, tmp);
816
- store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]);
817
- tcg_temp_free_i32(tmp);
818
- break;
819
- }
820
- case ARM_VFP_FPCXT_NS:
821
- lab_end = gen_new_label();
822
- /* fpInactive case: write is a NOP, so branch to end */
823
- gen_branch_fpInactive(s, TCG_COND_NE, lab_end);
824
- /*
825
- * !fpInactive: if FPU disabled, take NOCP exception;
826
- * otherwise PreserveFPState(), and then FPCXT_NS writes
827
- * behave the same as FPCXT_S writes.
828
- */
829
- if (s->fp_excp_el) {
830
- gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
831
- syn_uncategorized(), s->fp_excp_el);
832
- /*
833
- * This was only a conditional exception, so override
834
- * gen_exception_insn()'s default to DISAS_NORETURN
835
- */
836
- s->base.is_jmp = DISAS_NEXT;
837
- break;
838
- }
839
- gen_preserve_fp_state(s);
840
- /* fall through */
841
- case ARM_VFP_FPCXT_S:
842
- {
843
- TCGv_i32 sfpa, control;
844
- /*
845
- * Set FPSCR and CONTROL.SFPA from value; the new FPSCR takes
846
- * bits [27:0] from value and zeroes bits [31:28].
847
- */
848
- tmp = loadfn(s, opaque);
849
- sfpa = tcg_temp_new_i32();
850
- tcg_gen_shri_i32(sfpa, tmp, 31);
851
- control = load_cpu_field(v7m.control[M_REG_S]);
852
- tcg_gen_deposit_i32(control, control, sfpa,
853
- R_V7M_CONTROL_SFPA_SHIFT, 1);
854
- store_cpu_field(control, v7m.control[M_REG_S]);
855
- tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
856
- gen_helper_vfp_set_fpscr(cpu_env, tmp);
857
- tcg_temp_free_i32(tmp);
858
- tcg_temp_free_i32(sfpa);
859
- break;
860
- }
861
- case ARM_VFP_VPR:
862
- /* Behaves as NOP if not privileged */
863
- if (IS_USER(s)) {
864
- break;
865
- }
866
- tmp = loadfn(s, opaque);
867
- store_cpu_field(tmp, v7m.vpr);
868
- break;
869
- case ARM_VFP_P0:
870
- {
871
- TCGv_i32 vpr;
872
- tmp = loadfn(s, opaque);
873
- vpr = load_cpu_field(v7m.vpr);
874
- tcg_gen_deposit_i32(vpr, vpr, tmp,
875
- R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH);
876
- store_cpu_field(vpr, v7m.vpr);
877
- tcg_temp_free_i32(tmp);
878
- break;
879
- }
880
- default:
881
- g_assert_not_reached();
882
- }
883
- if (lab_end) {
884
- gen_set_label(lab_end);
885
- }
886
- return true;
887
-}
888
-
889
-static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
890
- fp_sysreg_storefn *storefn,
891
- void *opaque)
892
-{
893
- /* Do a read from an M-profile floating point system register */
894
- TCGv_i32 tmp;
895
- TCGLabel *lab_end = NULL;
896
- bool lookup_tb = false;
897
-
898
- switch (fp_sysreg_checks(s, regno)) {
899
- case FPSysRegCheckFailed:
900
- return false;
901
- case FPSysRegCheckDone:
902
- return true;
903
- case FPSysRegCheckContinue:
904
- break;
905
- }
906
-
907
- if (regno == ARM_VFP_FPSCR_NZCVQC && !dc_isar_feature(aa32_mve, s)) {
908
- /* QC is RES0 without MVE, so NZCVQC simplifies to NZCV */
909
- regno = QEMU_VFP_FPSCR_NZCV;
910
- }
911
-
912
- switch (regno) {
913
- case ARM_VFP_FPSCR:
914
- tmp = tcg_temp_new_i32();
915
- gen_helper_vfp_get_fpscr(tmp, cpu_env);
916
- storefn(s, opaque, tmp);
917
- break;
918
- case ARM_VFP_FPSCR_NZCVQC:
919
- tmp = tcg_temp_new_i32();
920
- gen_helper_vfp_get_fpscr(tmp, cpu_env);
921
- tcg_gen_andi_i32(tmp, tmp, FPCR_NZCVQC_MASK);
922
- storefn(s, opaque, tmp);
923
- break;
924
- case QEMU_VFP_FPSCR_NZCV:
925
- /*
926
- * Read just NZCV; this is a special case to avoid the
927
- * helper call for the "VMRS to CPSR.NZCV" insn.
928
- */
929
- tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
930
- tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
931
- storefn(s, opaque, tmp);
932
- break;
933
- case ARM_VFP_FPCXT_S:
934
- {
935
- TCGv_i32 control, sfpa, fpscr;
936
- /* Bits [27:0] from FPSCR, bit [31] from CONTROL.SFPA */
937
- tmp = tcg_temp_new_i32();
938
- sfpa = tcg_temp_new_i32();
939
- gen_helper_vfp_get_fpscr(tmp, cpu_env);
940
- tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
941
- control = load_cpu_field(v7m.control[M_REG_S]);
942
- tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
943
- tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
944
- tcg_gen_or_i32(tmp, tmp, sfpa);
945
- tcg_temp_free_i32(sfpa);
946
- /*
947
- * Store result before updating FPSCR etc, in case
948
- * it is a memory write which causes an exception.
949
- */
950
- storefn(s, opaque, tmp);
951
- /*
952
- * Now we must reset FPSCR from FPDSCR_NS, and clear
953
- * CONTROL.SFPA; so we'll end the TB here.
954
- */
955
- tcg_gen_andi_i32(control, control, ~R_V7M_CONTROL_SFPA_MASK);
956
- store_cpu_field(control, v7m.control[M_REG_S]);
957
- fpscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
958
- gen_helper_vfp_set_fpscr(cpu_env, fpscr);
959
- tcg_temp_free_i32(fpscr);
960
- lookup_tb = true;
961
- break;
962
- }
963
- case ARM_VFP_FPCXT_NS:
964
- {
965
- TCGv_i32 control, sfpa, fpscr, fpdscr, zero;
966
- TCGLabel *lab_active = gen_new_label();
967
-
968
- lookup_tb = true;
969
-
970
- gen_branch_fpInactive(s, TCG_COND_EQ, lab_active);
971
- /* fpInactive case: reads as FPDSCR_NS */
972
- TCGv_i32 tmp = load_cpu_field(v7m.fpdscr[M_REG_NS]);
973
- storefn(s, opaque, tmp);
974
- lab_end = gen_new_label();
975
- tcg_gen_br(lab_end);
976
-
977
- gen_set_label(lab_active);
978
- /*
979
- * !fpInactive: if FPU disabled, take NOCP exception;
980
- * otherwise PreserveFPState(), and then FPCXT_NS
981
- * reads the same as FPCXT_S.
982
- */
983
- if (s->fp_excp_el) {
984
- gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
985
- syn_uncategorized(), s->fp_excp_el);
986
- /*
987
- * This was only a conditional exception, so override
988
- * gen_exception_insn()'s default to DISAS_NORETURN
989
- */
990
- s->base.is_jmp = DISAS_NEXT;
991
- break;
992
- }
993
- gen_preserve_fp_state(s);
994
- tmp = tcg_temp_new_i32();
995
- sfpa = tcg_temp_new_i32();
996
- fpscr = tcg_temp_new_i32();
997
- gen_helper_vfp_get_fpscr(fpscr, cpu_env);
998
- tcg_gen_andi_i32(tmp, fpscr, ~FPCR_NZCV_MASK);
999
- control = load_cpu_field(v7m.control[M_REG_S]);
1000
- tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
1001
- tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
1002
- tcg_gen_or_i32(tmp, tmp, sfpa);
1003
- tcg_temp_free_i32(control);
1004
- /* Store result before updating FPSCR, in case it faults */
1005
- storefn(s, opaque, tmp);
1006
- /* If SFPA is zero then set FPSCR from FPDSCR_NS */
1007
- fpdscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
1008
- zero = tcg_const_i32(0);
1009
- tcg_gen_movcond_i32(TCG_COND_EQ, fpscr, sfpa, zero, fpdscr, fpscr);
1010
- gen_helper_vfp_set_fpscr(cpu_env, fpscr);
1011
- tcg_temp_free_i32(zero);
1012
- tcg_temp_free_i32(sfpa);
1013
- tcg_temp_free_i32(fpdscr);
1014
- tcg_temp_free_i32(fpscr);
1015
- break;
1016
- }
1017
- case ARM_VFP_VPR:
1018
- /* Behaves as NOP if not privileged */
1019
- if (IS_USER(s)) {
1020
- break;
1021
- }
1022
- tmp = load_cpu_field(v7m.vpr);
1023
- storefn(s, opaque, tmp);
1024
- break;
1025
- case ARM_VFP_P0:
1026
- tmp = load_cpu_field(v7m.vpr);
1027
- tcg_gen_extract_i32(tmp, tmp, R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH);
1028
- storefn(s, opaque, tmp);
1029
- break;
1030
- default:
1031
- g_assert_not_reached();
1032
- }
1033
-
1034
- if (lab_end) {
1035
- gen_set_label(lab_end);
1036
- }
1037
- if (lookup_tb) {
1038
- gen_lookup_tb(s);
1039
- }
1040
- return true;
1041
-}
1042
-
1043
-static void fp_sysreg_to_gpr(DisasContext *s, void *opaque, TCGv_i32 value)
1044
-{
1045
- arg_VMSR_VMRS *a = opaque;
1046
-
1047
- if (a->rt == 15) {
1048
- /* Set the 4 flag bits in the CPSR */
1049
- gen_set_nzcv(value);
1050
- tcg_temp_free_i32(value);
1051
- } else {
1052
- store_reg(s, a->rt, value);
1053
- }
1054
-}
1055
-
1056
-static TCGv_i32 gpr_to_fp_sysreg(DisasContext *s, void *opaque)
1057
-{
1058
- arg_VMSR_VMRS *a = opaque;
1059
-
1060
- return load_reg(s, a->rt);
1061
-}
1062
-
1063
-static bool gen_M_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
1064
-{
1065
- /*
1066
- * Accesses to R15 are UNPREDICTABLE; we choose to undef.
1067
- * FPSCR -> r15 is a special case which writes to the PSR flags;
1068
- * set a->reg to a special value to tell gen_M_fp_sysreg_read()
1069
- * we only care about the top 4 bits of FPSCR there.
1070
- */
1071
- if (a->rt == 15) {
1072
- if (a->l && a->reg == ARM_VFP_FPSCR) {
1073
- a->reg = QEMU_VFP_FPSCR_NZCV;
1074
- } else {
1075
- return false;
1076
- }
1077
- }
1078
-
1079
- if (a->l) {
1080
- /* VMRS, move FP system register to gp register */
1081
- return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_gpr, a);
1082
- } else {
1083
- /* VMSR, move gp register to FP system register */
1084
- return gen_M_fp_sysreg_write(s, a->reg, gpr_to_fp_sysreg, a);
1085
- }
1086
-}
1087
-
1088
static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
1089
{
1090
TCGv_i32 tmp;
1091
bool ignore_vfp_enabled = false;
1092
1093
if (arm_dc_feature(s, ARM_FEATURE_M)) {
1094
- return gen_M_VMSR_VMRS(s, a);
1095
+ /* M profile version was already handled in m-nocp.decode */
1096
+ return false;
1097
}
225
}
1098
226
@@ -XXX,XX +XXX,XX @@ static void mps3r_an536_class_init(ObjectClass *oc, void *data)
1099
if (!dc_isar_feature(aa32_fpsp_v2, s)) {
227
};
1100
@@ -XXX,XX +XXX,XX @@ static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
228
1101
return true;
229
mc->desc = "ARM MPS3 with AN536 FPGA image for Cortex-R52";
1102
}
230
- mc->default_cpus = 2;
1103
231
- mc->min_cpus = mc->default_cpus;
1104
-static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value)
232
- mc->max_cpus = mc->default_cpus;
1105
-{
233
+ /*
1106
- arg_vldr_sysreg *a = opaque;
234
+ * In the real FPGA image there are always two cores, but the standard
1107
- uint32_t offset = a->imm;
235
+ * initial setting for the SCC SYSCON 0x000 register is 0x21, meaning
1108
- TCGv_i32 addr;
236
+ * that the second core is held in reset and halted. Many images built for
1109
-
237
+ * the board do not expect the second core to run at startup (especially
1110
- if (!a->a) {
238
+ * since on the real FPGA image it is not possible to use LDREX/STREX
1111
- offset = -offset;
239
+ * in RAM between the two cores, so a true SMP setup isn't supported).
1112
- }
240
+ *
1113
-
241
+ * As QEMU's equivalent of this, we support both -smp 1 and -smp 2,
1114
- addr = load_reg(s, a->rn);
242
+ * with the default being -smp 1. This seems a more intuitive UI for
1115
- if (a->p) {
243
+ * QEMU users than, for instance, having a machine property to allow
1116
- tcg_gen_addi_i32(addr, addr, offset);
244
+ * the user to set the initial value of the SYSCON 0x000 register.
1117
- }
245
+ */
1118
-
246
+ mc->default_cpus = 1;
1119
- if (s->v8m_stackcheck && a->rn == 13 && a->w) {
247
+ mc->min_cpus = 1;
1120
- gen_helper_v8m_stackcheck(cpu_env, addr);
248
+ mc->max_cpus = 2;
1121
- }
249
mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-r52");
1122
-
250
mc->valid_cpu_types = valid_cpu_types;
1123
- gen_aa32_st_i32(s, value, addr, get_mem_index(s),
251
mmc->raminfo = an536_raminfo;
1124
- MO_UL | MO_ALIGN | s->be_data);
1125
- tcg_temp_free_i32(value);
1126
-
1127
- if (a->w) {
1128
- /* writeback */
1129
- if (!a->p) {
1130
- tcg_gen_addi_i32(addr, addr, offset);
1131
- }
1132
- store_reg(s, a->rn, addr);
1133
- } else {
1134
- tcg_temp_free_i32(addr);
1135
- }
1136
-}
1137
-
1138
-static TCGv_i32 memory_to_fp_sysreg(DisasContext *s, void *opaque)
1139
-{
1140
- arg_vldr_sysreg *a = opaque;
1141
- uint32_t offset = a->imm;
1142
- TCGv_i32 addr;
1143
- TCGv_i32 value = tcg_temp_new_i32();
1144
-
1145
- if (!a->a) {
1146
- offset = -offset;
1147
- }
1148
-
1149
- addr = load_reg(s, a->rn);
1150
- if (a->p) {
1151
- tcg_gen_addi_i32(addr, addr, offset);
1152
- }
1153
-
1154
- if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1155
- gen_helper_v8m_stackcheck(cpu_env, addr);
1156
- }
1157
-
1158
- gen_aa32_ld_i32(s, value, addr, get_mem_index(s),
1159
- MO_UL | MO_ALIGN | s->be_data);
1160
-
1161
- if (a->w) {
1162
- /* writeback */
1163
- if (!a->p) {
1164
- tcg_gen_addi_i32(addr, addr, offset);
1165
- }
1166
- store_reg(s, a->rn, addr);
1167
- } else {
1168
- tcg_temp_free_i32(addr);
1169
- }
1170
- return value;
1171
-}
1172
-
1173
-static bool trans_VLDR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
1174
-{
1175
- if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
1176
- return false;
1177
- }
1178
- if (a->rn == 15) {
1179
- return false;
1180
- }
1181
- return gen_M_fp_sysreg_write(s, a->reg, memory_to_fp_sysreg, a);
1182
-}
1183
-
1184
-static bool trans_VSTR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
1185
-{
1186
- if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
1187
- return false;
1188
- }
1189
- if (a->rn == 15) {
1190
- return false;
1191
- }
1192
- return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_memory, a);
1193
-}
1194
1195
static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
1196
{
1197
--
252
--
1198
2.20.1
253
2.34.1
1199
1200
diff view generated by jsdifflib
1
Factor the code in full_vfp_access_check() which updates the
1
This board has a lot of UARTs: there is one UART per CPU in the
2
ownership of the FP context and creates a new FP context
2
per-CPU peripheral part of the address map, whose interrupts are
3
out into its own function.
3
connected as per-CPU interrupt lines. Then there are 4 UARTs in the
4
normal part of the peripheral space, whose interrupts are shared
5
peripheral interrupts.
6
7
Connect and wire them all up; this involves some OR gates where
8
multiple overflow interrupts are wired into one GIC input.
4
9
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Message-id: 20210618141019.10671-6-peter.maydell@linaro.org
12
Message-id: 20240206132931.38376-11-peter.maydell@linaro.org
8
---
13
---
9
target/arm/translate-vfp.c | 104 +++++++++++++++++++++----------------
14
hw/arm/mps3r.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++
10
1 file changed, 58 insertions(+), 46 deletions(-)
15
1 file changed, 94 insertions(+)
11
16
12
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
17
diff --git a/hw/arm/mps3r.c b/hw/arm/mps3r.c
13
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/translate-vfp.c
19
--- a/hw/arm/mps3r.c
15
+++ b/target/arm/translate-vfp.c
20
+++ b/hw/arm/mps3r.c
16
@@ -XXX,XX +XXX,XX @@ void gen_preserve_fp_state(DisasContext *s)
21
@@ -XXX,XX +XXX,XX @@
22
#include "qapi/qmp/qlist.h"
23
#include "exec/address-spaces.h"
24
#include "cpu.h"
25
+#include "sysemu/sysemu.h"
26
#include "hw/boards.h"
27
+#include "hw/or-irq.h"
28
#include "hw/qdev-properties.h"
29
#include "hw/arm/boot.h"
30
#include "hw/arm/bsa.h"
31
+#include "hw/char/cmsdk-apb-uart.h"
32
#include "hw/intc/arm_gicv3.h"
33
34
/* Define the layout of RAM and ROM in a board */
35
@@ -XXX,XX +XXX,XX @@ typedef struct RAMInfo {
36
37
#define MPS3R_RAM_MAX 9
38
#define MPS3R_CPU_MAX 2
39
+#define MPS3R_UART_MAX 4 /* shared UART count */
40
41
#define PERIPHBASE 0xf0000000
42
#define NUM_SPIS 96
43
@@ -XXX,XX +XXX,XX @@ struct MPS3RMachineState {
44
MemoryRegion sysmem_alias[MPS3R_CPU_MAX];
45
MemoryRegion cpu_ram[MPS3R_CPU_MAX];
46
GICv3State gic;
47
+ /* per-CPU UARTs followed by the shared UARTs */
48
+ CMSDKAPBUART uart[MPS3R_CPU_MAX + MPS3R_UART_MAX];
49
+ OrIRQState cpu_uart_oflow[MPS3R_CPU_MAX];
50
+ OrIRQState uart_oflow;
51
};
52
53
#define TYPE_MPS3R_MACHINE "mps3r"
54
@@ -XXX,XX +XXX,XX @@ struct MPS3RMachineState {
55
56
OBJECT_DECLARE_TYPE(MPS3RMachineState, MPS3RMachineClass, MPS3R_MACHINE)
57
58
+/*
59
+ * Main clock frequency CLK in Hz (50MHz). In the image there are also
60
+ * ACLK, MCLK, GPUCLK and PERIPHCLK at the same frequency; for our
61
+ * model we just roll them all into one.
62
+ */
63
+#define CLK_FRQ 50000000
64
+
65
static const RAMInfo an536_raminfo[] = {
66
{
67
.name = "ATCM",
68
@@ -XXX,XX +XXX,XX @@ static void create_gic(MPS3RMachineState *mms, MemoryRegion *sysmem)
17
}
69
}
18
}
70
}
19
71
20
+/*
72
+/*
21
+ * Generate code for M-profile FP context handling: update the
73
+ * Create UART uartno, and map it into the MemoryRegion mem at address baseaddr.
22
+ * ownership of the FP context, and create a new context if
74
+ * The qemu_irq arguments are where we connect the various IRQs from the UART.
23
+ * necessary. This corresponds to the parts of the pseudocode
24
+ * ExecuteFPCheck() after the inital PreserveFPState() call.
25
+ */
75
+ */
26
+static void gen_update_fp_context(DisasContext *s)
76
+static void create_uart(MPS3RMachineState *mms, int uartno, MemoryRegion *mem,
77
+ hwaddr baseaddr, qemu_irq txirq, qemu_irq rxirq,
78
+ qemu_irq txoverirq, qemu_irq rxoverirq,
79
+ qemu_irq combirq)
27
+{
80
+{
28
+ /* Update ownership of FP context: set FPCCR.S to match current state */
81
+ g_autofree char *s = g_strdup_printf("uart%d", uartno);
29
+ if (s->v8m_fpccr_s_wrong) {
82
+ SysBusDevice *sbd;
30
+ TCGv_i32 tmp;
31
+
83
+
32
+ tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
84
+ assert(uartno < ARRAY_SIZE(mms->uart));
33
+ if (s->v8m_secure) {
85
+ object_initialize_child(OBJECT(mms), s, &mms->uart[uartno],
34
+ tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
86
+ TYPE_CMSDK_APB_UART);
35
+ } else {
87
+ qdev_prop_set_uint32(DEVICE(&mms->uart[uartno]), "pclk-frq", CLK_FRQ);
36
+ tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
88
+ qdev_prop_set_chr(DEVICE(&mms->uart[uartno]), "chardev", serial_hd(uartno));
37
+ }
89
+ sbd = SYS_BUS_DEVICE(&mms->uart[uartno]);
38
+ store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
90
+ sysbus_realize(sbd, &error_fatal);
39
+ /* Don't need to do this for any further FP insns in this TB */
91
+ memory_region_add_subregion(mem, baseaddr,
40
+ s->v8m_fpccr_s_wrong = false;
92
+ sysbus_mmio_get_region(sbd, 0));
41
+ }
93
+ sysbus_connect_irq(sbd, 0, txirq);
42
+
94
+ sysbus_connect_irq(sbd, 1, rxirq);
43
+ if (s->v7m_new_fp_ctxt_needed) {
95
+ sysbus_connect_irq(sbd, 2, txoverirq);
44
+ /*
96
+ sysbus_connect_irq(sbd, 3, rxoverirq);
45
+ * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA,
97
+ sysbus_connect_irq(sbd, 4, combirq);
46
+ * the FPSCR, and VPR.
47
+ */
48
+ TCGv_i32 control, fpscr;
49
+ uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
50
+
51
+ fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
52
+ gen_helper_vfp_set_fpscr(cpu_env, fpscr);
53
+ tcg_temp_free_i32(fpscr);
54
+ if (dc_isar_feature(aa32_mve, s)) {
55
+ TCGv_i32 z32 = tcg_const_i32(0);
56
+ store_cpu_field(z32, v7m.vpr);
57
+ }
58
+
59
+ /*
60
+ * We don't need to arrange to end the TB, because the only
61
+ * parts of FPSCR which we cache in the TB flags are the VECLEN
62
+ * and VECSTRIDE, and those don't exist for M-profile.
63
+ */
64
+
65
+ if (s->v8m_secure) {
66
+ bits |= R_V7M_CONTROL_SFPA_MASK;
67
+ }
68
+ control = load_cpu_field(v7m.control[M_REG_S]);
69
+ tcg_gen_ori_i32(control, control, bits);
70
+ store_cpu_field(control, v7m.control[M_REG_S]);
71
+ /* Don't need to do this for any further FP insns in this TB */
72
+ s->v7m_new_fp_ctxt_needed = false;
73
+ }
74
+}
98
+}
75
+
99
+
76
/*
100
static void mps3r_common_init(MachineState *machine)
77
* Check that VFP access is enabled. If it is, do the necessary
101
{
78
* M-profile lazy-FP handling and then return true.
102
MPS3RMachineState *mms = MPS3R_MACHINE(machine);
79
@@ -XXX,XX +XXX,XX @@ static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
103
MPS3RMachineClass *mmc = MPS3R_MACHINE_GET_CLASS(mms);
80
/* Trigger lazy-state preservation if necessary */
104
MemoryRegion *sysmem = get_system_memory();
81
gen_preserve_fp_state(s);
105
+ DeviceState *gicdev;
82
106
83
- /* Update ownership of FP context: set FPCCR.S to match current state */
107
for (const RAMInfo *ri = mmc->raminfo; ri->name; ri++) {
84
- if (s->v8m_fpccr_s_wrong) {
108
MemoryRegion *mr = mr_for_raminfo(mms, ri);
85
- TCGv_i32 tmp;
109
@@ -XXX,XX +XXX,XX @@ static void mps3r_common_init(MachineState *machine)
86
-
87
- tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
88
- if (s->v8m_secure) {
89
- tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
90
- } else {
91
- tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
92
- }
93
- store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
94
- /* Don't need to do this for any further FP insns in this TB */
95
- s->v8m_fpccr_s_wrong = false;
96
- }
97
-
98
- if (s->v7m_new_fp_ctxt_needed) {
99
- /*
100
- * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA,
101
- * the FPSCR, and VPR.
102
- */
103
- TCGv_i32 control, fpscr;
104
- uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
105
-
106
- fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
107
- gen_helper_vfp_set_fpscr(cpu_env, fpscr);
108
- tcg_temp_free_i32(fpscr);
109
- if (dc_isar_feature(aa32_mve, s)) {
110
- TCGv_i32 z32 = tcg_const_i32(0);
111
- store_cpu_field(z32, v7m.vpr);
112
- }
113
-
114
- /*
115
- * We don't need to arrange to end the TB, because the only
116
- * parts of FPSCR which we cache in the TB flags are the VECLEN
117
- * and VECSTRIDE, and those don't exist for M-profile.
118
- */
119
-
120
- if (s->v8m_secure) {
121
- bits |= R_V7M_CONTROL_SFPA_MASK;
122
- }
123
- control = load_cpu_field(v7m.control[M_REG_S]);
124
- tcg_gen_ori_i32(control, control, bits);
125
- store_cpu_field(control, v7m.control[M_REG_S]);
126
- /* Don't need to do this for any further FP insns in this TB */
127
- s->v7m_new_fp_ctxt_needed = false;
128
- }
129
+ /* Update ownership of FP context and create new FP context if needed */
130
+ gen_update_fp_context(s);
131
}
110
}
132
111
133
return true;
112
create_gic(mms, sysmem);
113
+ gicdev = DEVICE(&mms->gic);
114
+
115
+ /*
116
+ * UARTs 0 and 1 are per-CPU; their interrupts are wired to
117
+ * the relevant CPU's PPI 0..3, aka INTID 16..19
118
+ */
119
+ for (int i = 0; i < machine->smp.cpus; i++) {
120
+ int intidbase = NUM_SPIS + i * GIC_INTERNAL;
121
+ g_autofree char *s = g_strdup_printf("cpu-uart-oflow-orgate%d", i);
122
+ DeviceState *orgate;
123
+
124
+ /* The two overflow IRQs from the UART are ORed together into PPI 3 */
125
+ object_initialize_child(OBJECT(mms), s, &mms->cpu_uart_oflow[i],
126
+ TYPE_OR_IRQ);
127
+ orgate = DEVICE(&mms->cpu_uart_oflow[i]);
128
+ qdev_prop_set_uint32(orgate, "num-lines", 2);
129
+ qdev_realize(orgate, NULL, &error_fatal);
130
+ qdev_connect_gpio_out(orgate, 0,
131
+ qdev_get_gpio_in(gicdev, intidbase + 19));
132
+
133
+ create_uart(mms, i, &mms->cpu_sysmem[i], 0xe7c00000,
134
+ qdev_get_gpio_in(gicdev, intidbase + 17), /* tx */
135
+ qdev_get_gpio_in(gicdev, intidbase + 16), /* rx */
136
+ qdev_get_gpio_in(orgate, 0), /* txover */
137
+ qdev_get_gpio_in(orgate, 1), /* rxover */
138
+ qdev_get_gpio_in(gicdev, intidbase + 18) /* combined */);
139
+ }
140
+ /*
141
+ * UARTs 2 to 5 are whole-system; all overflow IRQs are ORed
142
+ * together into IRQ 17
143
+ */
144
+ object_initialize_child(OBJECT(mms), "uart-oflow-orgate",
145
+ &mms->uart_oflow, TYPE_OR_IRQ);
146
+ qdev_prop_set_uint32(DEVICE(&mms->uart_oflow), "num-lines",
147
+ MPS3R_UART_MAX * 2);
148
+ qdev_realize(DEVICE(&mms->uart_oflow), NULL, &error_fatal);
149
+ qdev_connect_gpio_out(DEVICE(&mms->uart_oflow), 0,
150
+ qdev_get_gpio_in(gicdev, 17));
151
+
152
+ for (int i = 0; i < MPS3R_UART_MAX; i++) {
153
+ hwaddr baseaddr = 0xe0205000 + i * 0x1000;
154
+ int rxirq = 5 + i * 2, txirq = 6 + i * 2, combirq = 13 + i;
155
+
156
+ create_uart(mms, i + MPS3R_CPU_MAX, sysmem, baseaddr,
157
+ qdev_get_gpio_in(gicdev, txirq),
158
+ qdev_get_gpio_in(gicdev, rxirq),
159
+ qdev_get_gpio_in(DEVICE(&mms->uart_oflow), i * 2),
160
+ qdev_get_gpio_in(DEVICE(&mms->uart_oflow), i * 2 + 1),
161
+ qdev_get_gpio_in(gicdev, combirq));
162
+ }
163
164
mms->bootinfo.ram_size = machine->ram_size;
165
mms->bootinfo.board_id = -1;
134
--
166
--
135
2.20.1
167
2.34.1
136
168
137
169
diff view generated by jsdifflib
Deleted patch
1
Instead of open-coding the "take NOCP exception if FPU disabled,
2
otherwise call gen_preserve_fp_state()" code in the accessors for
3
FPCXT_NS, add an argument to vfp_access_check_m() which tells it to
4
skip the gen_update_fp_context() call, so we can use it for the
5
FPCXT_NS case.
6
1
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20210618141019.10671-8-peter.maydell@linaro.org
10
---
11
target/arm/translate-a32.h | 2 +-
12
target/arm/translate-m-nocp.c | 10 ++--------
13
target/arm/translate-vfp.c | 13 ++++++++-----
14
3 files changed, 11 insertions(+), 14 deletions(-)
15
16
diff --git a/target/arm/translate-a32.h b/target/arm/translate-a32.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/translate-a32.h
19
+++ b/target/arm/translate-a32.h
20
@@ -XXX,XX +XXX,XX @@ bool disas_neon_shared(DisasContext *s, uint32_t insn);
21
void load_reg_var(DisasContext *s, TCGv_i32 var, int reg);
22
void arm_gen_condlabel(DisasContext *s);
23
bool vfp_access_check(DisasContext *s);
24
-void gen_preserve_fp_state(DisasContext *s);
25
+bool vfp_access_check_m(DisasContext *s, bool skip_context_update);
26
void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop);
27
void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop);
28
void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop);
29
diff --git a/target/arm/translate-m-nocp.c b/target/arm/translate-m-nocp.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate-m-nocp.c
32
+++ b/target/arm/translate-m-nocp.c
33
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
34
* otherwise PreserveFPState(), and then FPCXT_NS writes
35
* behave the same as FPCXT_S writes.
36
*/
37
- if (s->fp_excp_el) {
38
- gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
39
- syn_uncategorized(), s->fp_excp_el);
40
+ if (!vfp_access_check_m(s, true)) {
41
/*
42
* This was only a conditional exception, so override
43
* gen_exception_insn()'s default to DISAS_NORETURN
44
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
45
s->base.is_jmp = DISAS_NEXT;
46
break;
47
}
48
- gen_preserve_fp_state(s);
49
}
50
/* fall through */
51
case ARM_VFP_FPCXT_S:
52
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
53
* otherwise PreserveFPState(), and then FPCXT_NS
54
* reads the same as FPCXT_S.
55
*/
56
- if (s->fp_excp_el) {
57
- gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
58
- syn_uncategorized(), s->fp_excp_el);
59
+ if (!vfp_access_check_m(s, true)) {
60
/*
61
* This was only a conditional exception, so override
62
* gen_exception_insn()'s default to DISAS_NORETURN
63
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
64
s->base.is_jmp = DISAS_NEXT;
65
break;
66
}
67
- gen_preserve_fp_state(s);
68
tmp = tcg_temp_new_i32();
69
sfpa = tcg_temp_new_i32();
70
fpscr = tcg_temp_new_i32();
71
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/target/arm/translate-vfp.c
74
+++ b/target/arm/translate-vfp.c
75
@@ -XXX,XX +XXX,XX @@ static inline long vfp_f16_offset(unsigned reg, bool top)
76
* Generate code for M-profile lazy FP state preservation if needed;
77
* this corresponds to the pseudocode PreserveFPState() function.
78
*/
79
-void gen_preserve_fp_state(DisasContext *s)
80
+static void gen_preserve_fp_state(DisasContext *s)
81
{
82
if (s->v7m_lspact) {
83
/*
84
@@ -XXX,XX +XXX,XX @@ static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled)
85
* If VFP is enabled, do the necessary M-profile lazy-FP handling and then
86
* return true. If not, emit code to generate an appropriate exception and
87
* return false.
88
+ * skip_context_update is true to skip the "update FP context" part of this.
89
*/
90
-static bool vfp_access_check_m(DisasContext *s)
91
+bool vfp_access_check_m(DisasContext *s, bool skip_context_update)
92
{
93
if (s->fp_excp_el) {
94
/*
95
@@ -XXX,XX +XXX,XX @@ static bool vfp_access_check_m(DisasContext *s)
96
/* Trigger lazy-state preservation if necessary */
97
gen_preserve_fp_state(s);
98
99
- /* Update ownership of FP context and create new FP context if needed */
100
- gen_update_fp_context(s);
101
+ if (!skip_context_update) {
102
+ /* Update ownership of FP context and create new FP context if needed */
103
+ gen_update_fp_context(s);
104
+ }
105
106
return true;
107
}
108
@@ -XXX,XX +XXX,XX @@ static bool vfp_access_check_m(DisasContext *s)
109
bool vfp_access_check(DisasContext *s)
110
{
111
if (arm_dc_feature(s, ARM_FEATURE_M)) {
112
- return vfp_access_check_m(s);
113
+ return vfp_access_check_m(s, false);
114
} else {
115
return vfp_access_check_a(s, false);
116
}
117
--
118
2.20.1
119
120
diff view generated by jsdifflib
Deleted patch
1
Implement the variants of MVE VLDR (encodings T1, T2) which perform
2
"widening" loads where bytes or halfwords are loaded from memory and
3
zero or sign-extended into halfword or word length vector elements,
4
and the narrowing MVE VSTR (encodings T1, T2) where bytes or
5
halfwords are stored from halfword or word elements.
6
1
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20210617121628.20116-3-peter.maydell@linaro.org
10
---
11
target/arm/helper-mve.h | 10 ++++++++++
12
target/arm/mve.decode | 25 +++++++++++++++++++++++--
13
target/arm/mve_helper.c | 11 +++++++++++
14
target/arm/translate-mve.c | 14 ++++++++++++++
15
4 files changed, 58 insertions(+), 2 deletions(-)
16
17
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper-mve.h
20
+++ b/target/arm/helper-mve.h
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(mve_vldrw, TCG_CALL_NO_WG, void, env, ptr, i32)
22
DEF_HELPER_FLAGS_3(mve_vstrb, TCG_CALL_NO_WG, void, env, ptr, i32)
23
DEF_HELPER_FLAGS_3(mve_vstrh, TCG_CALL_NO_WG, void, env, ptr, i32)
24
DEF_HELPER_FLAGS_3(mve_vstrw, TCG_CALL_NO_WG, void, env, ptr, i32)
25
+
26
+DEF_HELPER_FLAGS_3(mve_vldrb_sh, TCG_CALL_NO_WG, void, env, ptr, i32)
27
+DEF_HELPER_FLAGS_3(mve_vldrb_sw, TCG_CALL_NO_WG, void, env, ptr, i32)
28
+DEF_HELPER_FLAGS_3(mve_vldrb_uh, TCG_CALL_NO_WG, void, env, ptr, i32)
29
+DEF_HELPER_FLAGS_3(mve_vldrb_uw, TCG_CALL_NO_WG, void, env, ptr, i32)
30
+DEF_HELPER_FLAGS_3(mve_vldrh_sw, TCG_CALL_NO_WG, void, env, ptr, i32)
31
+DEF_HELPER_FLAGS_3(mve_vldrh_uw, TCG_CALL_NO_WG, void, env, ptr, i32)
32
+DEF_HELPER_FLAGS_3(mve_vstrb_h, TCG_CALL_NO_WG, void, env, ptr, i32)
33
+DEF_HELPER_FLAGS_3(mve_vstrb_w, TCG_CALL_NO_WG, void, env, ptr, i32)
34
+DEF_HELPER_FLAGS_3(mve_vstrh_w, TCG_CALL_NO_WG, void, env, ptr, i32)
35
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
36
index XXXXXXX..XXXXXXX 100644
37
--- a/target/arm/mve.decode
38
+++ b/target/arm/mve.decode
39
@@ -XXX,XX +XXX,XX @@
40
41
%qd 22:1 13:3
42
43
-&vldr_vstr rn qd imm p a w size l
44
+&vldr_vstr rn qd imm p a w size l u
45
46
-@vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd
47
+@vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd u=0
48
+# Note that both Rn and Qd are 3 bits only (no D bit)
49
+@vldst_wn ... u:1 ... . . . . l:1 . rn:3 qd:3 . ... .. imm:7 &vldr_vstr
50
51
# Vector loads and stores
52
53
+# Widening loads and narrowing stores:
54
+# for these P=0 W=0 is 'related encoding'; sz=11 is 'related encoding'
55
+# This means we need to expand out to multiple patterns for P, W, SZ.
56
+# For stores the U bit must be 0 but we catch that in the trans_ function.
57
+# The naming scheme here is "VLDSTB_H == in-memory byte load/store to/from
58
+# signed halfword element in register", etc.
59
+VLDSTB_H 111 . 110 0 a:1 0 1 . 0 ... ... 0 111 01 ....... @vldst_wn \
60
+ p=0 w=1 size=1
61
+VLDSTB_H 111 . 110 1 a:1 0 w:1 . 0 ... ... 0 111 01 ....... @vldst_wn \
62
+ p=1 size=1
63
+VLDSTB_W 111 . 110 0 a:1 0 1 . 0 ... ... 0 111 10 ....... @vldst_wn \
64
+ p=0 w=1 size=2
65
+VLDSTB_W 111 . 110 1 a:1 0 w:1 . 0 ... ... 0 111 10 ....... @vldst_wn \
66
+ p=1 size=2
67
+VLDSTH_W 111 . 110 0 a:1 0 1 . 1 ... ... 0 111 10 ....... @vldst_wn \
68
+ p=0 w=1 size=2
69
+VLDSTH_W 111 . 110 1 a:1 0 w:1 . 1 ... ... 0 111 10 ....... @vldst_wn \
70
+ p=1 size=2
71
+
72
# Non-widening loads/stores (P=0 W=0 is 'related encoding')
73
VLDR_VSTR 1110110 0 a:1 . 1 . .... ... 111100 ....... @vldr_vstr \
74
size=0 p=0 w=1
75
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
76
index XXXXXXX..XXXXXXX 100644
77
--- a/target/arm/mve_helper.c
78
+++ b/target/arm/mve_helper.c
79
@@ -XXX,XX +XXX,XX @@ DO_VSTR(vstrb, 1, stb, 1, uint8_t)
80
DO_VSTR(vstrh, 2, stw, 2, uint16_t)
81
DO_VSTR(vstrw, 4, stl, 4, uint32_t)
82
83
+DO_VLDR(vldrb_sh, 1, ldsb, 2, int16_t)
84
+DO_VLDR(vldrb_sw, 1, ldsb, 4, int32_t)
85
+DO_VLDR(vldrb_uh, 1, ldub, 2, uint16_t)
86
+DO_VLDR(vldrb_uw, 1, ldub, 4, uint32_t)
87
+DO_VLDR(vldrh_sw, 2, ldsw, 4, int32_t)
88
+DO_VLDR(vldrh_uw, 2, lduw, 4, uint32_t)
89
+
90
+DO_VSTR(vstrb_h, 1, stb, 2, int16_t)
91
+DO_VSTR(vstrb_w, 1, stb, 4, int32_t)
92
+DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
93
+
94
#undef DO_VLDR
95
#undef DO_VSTR
96
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
97
index XXXXXXX..XXXXXXX 100644
98
--- a/target/arm/translate-mve.c
99
+++ b/target/arm/translate-mve.c
100
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
101
};
102
return do_ldst(s, a, ldstfns[a->size][a->l]);
103
}
104
+
105
+#define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST) \
106
+ static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \
107
+ { \
108
+ static MVEGenLdStFn * const ldstfns[2][2] = { \
109
+ { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \
110
+ { NULL, gen_helper_mve_##ULD }, \
111
+ }; \
112
+ return do_ldst(s, a, ldstfns[a->u][a->l]); \
113
+ }
114
+
115
+DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h)
116
+DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w)
117
+DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w)
118
--
119
2.20.1
120
121
diff view generated by jsdifflib
Deleted patch
1
Implement the MVE VCLZ insn (and the necessary machinery
2
for MVE 1-input vector ops).
3
1
4
Note that for non-load instructions predication is always performed
5
at a byte level granularity regardless of element size (R_ZLSJ),
6
and so the masking logic here differs from that used in the VLDR
7
and VSTR helpers.
8
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20210617121628.20116-4-peter.maydell@linaro.org
12
---
13
target/arm/helper-mve.h | 4 ++
14
target/arm/mve.decode | 8 ++++
15
target/arm/mve_helper.c | 82 ++++++++++++++++++++++++++++++++++++++
16
target/arm/translate-mve.c | 38 ++++++++++++++++++
17
4 files changed, 132 insertions(+)
18
19
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/helper-mve.h
22
+++ b/target/arm/helper-mve.h
23
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(mve_vldrh_uw, TCG_CALL_NO_WG, void, env, ptr, i32)
24
DEF_HELPER_FLAGS_3(mve_vstrb_h, TCG_CALL_NO_WG, void, env, ptr, i32)
25
DEF_HELPER_FLAGS_3(mve_vstrb_w, TCG_CALL_NO_WG, void, env, ptr, i32)
26
DEF_HELPER_FLAGS_3(mve_vstrh_w, TCG_CALL_NO_WG, void, env, ptr, i32)
27
+
28
+DEF_HELPER_FLAGS_3(mve_vclzb, TCG_CALL_NO_WG, void, env, ptr, ptr)
29
+DEF_HELPER_FLAGS_3(mve_vclzh, TCG_CALL_NO_WG, void, env, ptr, ptr)
30
+DEF_HELPER_FLAGS_3(mve_vclzw, TCG_CALL_NO_WG, void, env, ptr, ptr)
31
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
32
index XXXXXXX..XXXXXXX 100644
33
--- a/target/arm/mve.decode
34
+++ b/target/arm/mve.decode
35
@@ -XXX,XX +XXX,XX @@
36
#
37
38
%qd 22:1 13:3
39
+%qm 5:1 1:3
40
41
&vldr_vstr rn qd imm p a w size l u
42
+&1op qd qm size
43
44
@vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd u=0
45
# Note that both Rn and Qd are 3 bits only (no D bit)
46
@vldst_wn ... u:1 ... . . . . l:1 . rn:3 qd:3 . ... .. imm:7 &vldr_vstr
47
48
+@1op .... .... .... size:2 .. .... .... .... .... &1op qd=%qd qm=%qm
49
+
50
# Vector loads and stores
51
52
# Widening loads and narrowing stores:
53
@@ -XXX,XX +XXX,XX @@ VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111101 ....... @vldr_vstr \
54
size=1 p=1
55
VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111110 ....... @vldr_vstr \
56
size=2 p=1
57
+
58
+# Vector miscellaneous
59
+
60
+VCLZ 1111 1111 1 . 11 .. 00 ... 0 0100 11 . 0 ... 0 @1op
61
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
62
index XXXXXXX..XXXXXXX 100644
63
--- a/target/arm/mve_helper.c
64
+++ b/target/arm/mve_helper.c
65
@@ -XXX,XX +XXX,XX @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
66
67
#undef DO_VLDR
68
#undef DO_VSTR
69
+
70
+/*
71
+ * The mergemask(D, R, M) macro performs the operation "*D = R" but
72
+ * storing only the bytes which correspond to 1 bits in M,
73
+ * leaving other bytes in *D unchanged. We use _Generic
74
+ * to select the correct implementation based on the type of D.
75
+ */
76
+
77
+static void mergemask_ub(uint8_t *d, uint8_t r, uint16_t mask)
78
+{
79
+ if (mask & 1) {
80
+ *d = r;
81
+ }
82
+}
83
+
84
+static void mergemask_sb(int8_t *d, int8_t r, uint16_t mask)
85
+{
86
+ mergemask_ub((uint8_t *)d, r, mask);
87
+}
88
+
89
+static void mergemask_uh(uint16_t *d, uint16_t r, uint16_t mask)
90
+{
91
+ uint16_t bmask = expand_pred_b_data[mask & 3];
92
+ *d = (*d & ~bmask) | (r & bmask);
93
+}
94
+
95
+static void mergemask_sh(int16_t *d, int16_t r, uint16_t mask)
96
+{
97
+ mergemask_uh((uint16_t *)d, r, mask);
98
+}
99
+
100
+static void mergemask_uw(uint32_t *d, uint32_t r, uint16_t mask)
101
+{
102
+ uint32_t bmask = expand_pred_b_data[mask & 0xf];
103
+ *d = (*d & ~bmask) | (r & bmask);
104
+}
105
+
106
+static void mergemask_sw(int32_t *d, int32_t r, uint16_t mask)
107
+{
108
+ mergemask_uw((uint32_t *)d, r, mask);
109
+}
110
+
111
+static void mergemask_uq(uint64_t *d, uint64_t r, uint16_t mask)
112
+{
113
+ uint64_t bmask = expand_pred_b_data[mask & 0xff];
114
+ *d = (*d & ~bmask) | (r & bmask);
115
+}
116
+
117
+static void mergemask_sq(int64_t *d, int64_t r, uint16_t mask)
118
+{
119
+ mergemask_uq((uint64_t *)d, r, mask);
120
+}
121
+
122
+#define mergemask(D, R, M) \
123
+ _Generic(D, \
124
+ uint8_t *: mergemask_ub, \
125
+ int8_t *: mergemask_sb, \
126
+ uint16_t *: mergemask_uh, \
127
+ int16_t *: mergemask_sh, \
128
+ uint32_t *: mergemask_uw, \
129
+ int32_t *: mergemask_sw, \
130
+ uint64_t *: mergemask_uq, \
131
+ int64_t *: mergemask_sq)(D, R, M)
132
+
133
+#define DO_1OP(OP, ESIZE, TYPE, FN) \
134
+ void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm) \
135
+ { \
136
+ TYPE *d = vd, *m = vm; \
137
+ uint16_t mask = mve_element_mask(env); \
138
+ unsigned e; \
139
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
140
+ mergemask(&d[H##ESIZE(e)], FN(m[H##ESIZE(e)]), mask); \
141
+ } \
142
+ mve_advance_vpt(env); \
143
+ }
144
+
145
+#define DO_CLZ_B(N) (clz32(N) - 24)
146
+#define DO_CLZ_H(N) (clz32(N) - 16)
147
+
148
+DO_1OP(vclzb, 1, uint8_t, DO_CLZ_B)
149
+DO_1OP(vclzh, 2, uint16_t, DO_CLZ_H)
150
+DO_1OP(vclzw, 4, uint32_t, clz32)
151
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
152
index XXXXXXX..XXXXXXX 100644
153
--- a/target/arm/translate-mve.c
154
+++ b/target/arm/translate-mve.c
155
@@ -XXX,XX +XXX,XX @@
156
#include "decode-mve.c.inc"
157
158
typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
159
+typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
160
161
/* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
162
static inline long mve_qreg_offset(unsigned reg)
163
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
164
DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h)
165
DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w)
166
DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w)
167
+
168
+static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
169
+{
170
+ TCGv_ptr qd, qm;
171
+
172
+ if (!dc_isar_feature(aa32_mve, s) ||
173
+ !mve_check_qreg_bank(s, a->qd | a->qm) ||
174
+ !fn) {
175
+ return false;
176
+ }
177
+
178
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
179
+ return true;
180
+ }
181
+
182
+ qd = mve_qreg_ptr(a->qd);
183
+ qm = mve_qreg_ptr(a->qm);
184
+ fn(cpu_env, qd, qm);
185
+ tcg_temp_free_ptr(qd);
186
+ tcg_temp_free_ptr(qm);
187
+ mve_update_eci(s);
188
+ return true;
189
+}
190
+
191
+#define DO_1OP(INSN, FN) \
192
+ static bool trans_##INSN(DisasContext *s, arg_1op *a) \
193
+ { \
194
+ static MVEGenOneOpFn * const fns[] = { \
195
+ gen_helper_mve_##FN##b, \
196
+ gen_helper_mve_##FN##h, \
197
+ gen_helper_mve_##FN##w, \
198
+ NULL, \
199
+ }; \
200
+ return do_1op(s, a, fns[a->size]); \
201
+ }
202
+
203
+DO_1OP(VCLZ, vclz)
204
--
205
2.20.1
206
207
diff view generated by jsdifflib
Deleted patch
1
Implement the MVE VCLS insn.
2
1
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20210617121628.20116-5-peter.maydell@linaro.org
6
---
7
target/arm/helper-mve.h | 4 ++++
8
target/arm/mve.decode | 1 +
9
target/arm/mve_helper.c | 7 +++++++
10
target/arm/translate-mve.c | 1 +
11
4 files changed, 13 insertions(+)
12
13
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/helper-mve.h
16
+++ b/target/arm/helper-mve.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(mve_vstrb_h, TCG_CALL_NO_WG, void, env, ptr, i32)
18
DEF_HELPER_FLAGS_3(mve_vstrb_w, TCG_CALL_NO_WG, void, env, ptr, i32)
19
DEF_HELPER_FLAGS_3(mve_vstrh_w, TCG_CALL_NO_WG, void, env, ptr, i32)
20
21
+DEF_HELPER_FLAGS_3(mve_vclsb, TCG_CALL_NO_WG, void, env, ptr, ptr)
22
+DEF_HELPER_FLAGS_3(mve_vclsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
23
+DEF_HELPER_FLAGS_3(mve_vclsw, TCG_CALL_NO_WG, void, env, ptr, ptr)
24
+
25
DEF_HELPER_FLAGS_3(mve_vclzb, TCG_CALL_NO_WG, void, env, ptr, ptr)
26
DEF_HELPER_FLAGS_3(mve_vclzh, TCG_CALL_NO_WG, void, env, ptr, ptr)
27
DEF_HELPER_FLAGS_3(mve_vclzw, TCG_CALL_NO_WG, void, env, ptr, ptr)
28
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/mve.decode
31
+++ b/target/arm/mve.decode
32
@@ -XXX,XX +XXX,XX @@ VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111110 ....... @vldr_vstr \
33
34
# Vector miscellaneous
35
36
+VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
37
VCLZ 1111 1111 1 . 11 .. 00 ... 0 0100 11 . 0 ... 0 @1op
38
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/mve_helper.c
41
+++ b/target/arm/mve_helper.c
42
@@ -XXX,XX +XXX,XX @@ static void mergemask_sq(int64_t *d, int64_t r, uint16_t mask)
43
mve_advance_vpt(env); \
44
}
45
46
+#define DO_CLS_B(N) (clrsb32(N) - 24)
47
+#define DO_CLS_H(N) (clrsb32(N) - 16)
48
+
49
+DO_1OP(vclsb, 1, int8_t, DO_CLS_B)
50
+DO_1OP(vclsh, 2, int16_t, DO_CLS_H)
51
+DO_1OP(vclsw, 4, int32_t, clrsb32)
52
+
53
#define DO_CLZ_B(N) (clz32(N) - 24)
54
#define DO_CLZ_H(N) (clz32(N) - 16)
55
56
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/target/arm/translate-mve.c
59
+++ b/target/arm/translate-mve.c
60
@@ -XXX,XX +XXX,XX @@ static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
61
}
62
63
DO_1OP(VCLZ, vclz)
64
+DO_1OP(VCLS, vcls)
65
--
66
2.20.1
67
68
diff view generated by jsdifflib
Deleted patch
1
Implement the MVE instructions VREV16, VREV32 and VREV64.
2
1
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20210617121628.20116-6-peter.maydell@linaro.org
6
---
7
target/arm/helper-mve.h | 7 +++++++
8
target/arm/mve.decode | 4 ++++
9
target/arm/mve_helper.c | 7 +++++++
10
target/arm/translate-mve.c | 33 +++++++++++++++++++++++++++++++++
11
4 files changed, 51 insertions(+)
12
13
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/helper-mve.h
16
+++ b/target/arm/helper-mve.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(mve_vclsw, TCG_CALL_NO_WG, void, env, ptr, ptr)
18
DEF_HELPER_FLAGS_3(mve_vclzb, TCG_CALL_NO_WG, void, env, ptr, ptr)
19
DEF_HELPER_FLAGS_3(mve_vclzh, TCG_CALL_NO_WG, void, env, ptr, ptr)
20
DEF_HELPER_FLAGS_3(mve_vclzw, TCG_CALL_NO_WG, void, env, ptr, ptr)
21
+
22
+DEF_HELPER_FLAGS_3(mve_vrev16b, TCG_CALL_NO_WG, void, env, ptr, ptr)
23
+DEF_HELPER_FLAGS_3(mve_vrev32b, TCG_CALL_NO_WG, void, env, ptr, ptr)
24
+DEF_HELPER_FLAGS_3(mve_vrev32h, TCG_CALL_NO_WG, void, env, ptr, ptr)
25
+DEF_HELPER_FLAGS_3(mve_vrev64b, TCG_CALL_NO_WG, void, env, ptr, ptr)
26
+DEF_HELPER_FLAGS_3(mve_vrev64h, TCG_CALL_NO_WG, void, env, ptr, ptr)
27
+DEF_HELPER_FLAGS_3(mve_vrev64w, TCG_CALL_NO_WG, void, env, ptr, ptr)
28
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/mve.decode
31
+++ b/target/arm/mve.decode
32
@@ -XXX,XX +XXX,XX @@ VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111110 ....... @vldr_vstr \
33
34
VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
35
VCLZ 1111 1111 1 . 11 .. 00 ... 0 0100 11 . 0 ... 0 @1op
36
+
37
+VREV16 1111 1111 1 . 11 .. 00 ... 0 0001 01 . 0 ... 0 @1op
38
+VREV32 1111 1111 1 . 11 .. 00 ... 0 0000 11 . 0 ... 0 @1op
39
+VREV64 1111 1111 1 . 11 .. 00 ... 0 0000 01 . 0 ... 0 @1op
40
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/target/arm/mve_helper.c
43
+++ b/target/arm/mve_helper.c
44
@@ -XXX,XX +XXX,XX @@ DO_1OP(vclsw, 4, int32_t, clrsb32)
45
DO_1OP(vclzb, 1, uint8_t, DO_CLZ_B)
46
DO_1OP(vclzh, 2, uint16_t, DO_CLZ_H)
47
DO_1OP(vclzw, 4, uint32_t, clz32)
48
+
49
+DO_1OP(vrev16b, 2, uint16_t, bswap16)
50
+DO_1OP(vrev32b, 4, uint32_t, bswap32)
51
+DO_1OP(vrev32h, 4, uint32_t, hswap32)
52
+DO_1OP(vrev64b, 8, uint64_t, bswap64)
53
+DO_1OP(vrev64h, 8, uint64_t, hswap64)
54
+DO_1OP(vrev64w, 8, uint64_t, wswap64)
55
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/arm/translate-mve.c
58
+++ b/target/arm/translate-mve.c
59
@@ -XXX,XX +XXX,XX @@ static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
60
61
DO_1OP(VCLZ, vclz)
62
DO_1OP(VCLS, vcls)
63
+
64
+static bool trans_VREV16(DisasContext *s, arg_1op *a)
65
+{
66
+ static MVEGenOneOpFn * const fns[] = {
67
+ gen_helper_mve_vrev16b,
68
+ NULL,
69
+ NULL,
70
+ NULL,
71
+ };
72
+ return do_1op(s, a, fns[a->size]);
73
+}
74
+
75
+static bool trans_VREV32(DisasContext *s, arg_1op *a)
76
+{
77
+ static MVEGenOneOpFn * const fns[] = {
78
+ gen_helper_mve_vrev32b,
79
+ gen_helper_mve_vrev32h,
80
+ NULL,
81
+ NULL,
82
+ };
83
+ return do_1op(s, a, fns[a->size]);
84
+}
85
+
86
+static bool trans_VREV64(DisasContext *s, arg_1op *a)
87
+{
88
+ static MVEGenOneOpFn * const fns[] = {
89
+ gen_helper_mve_vrev64b,
90
+ gen_helper_mve_vrev64h,
91
+ gen_helper_mve_vrev64w,
92
+ NULL,
93
+ };
94
+ return do_1op(s, a, fns[a->size]);
95
+}
96
--
97
2.20.1
98
99
diff view generated by jsdifflib
Deleted patch
1
Implement the MVE VMVN(register) operation. Note that for
2
predication this operation is byte-by-byte.
3
1
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20210617121628.20116-7-peter.maydell@linaro.org
7
---
8
target/arm/helper-mve.h | 2 ++
9
target/arm/mve.decode | 3 +++
10
target/arm/mve_helper.c | 4 ++++
11
target/arm/translate-mve.c | 5 +++++
12
4 files changed, 14 insertions(+)
13
14
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-mve.h
17
+++ b/target/arm/helper-mve.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(mve_vrev32h, TCG_CALL_NO_WG, void, env, ptr, ptr)
19
DEF_HELPER_FLAGS_3(mve_vrev64b, TCG_CALL_NO_WG, void, env, ptr, ptr)
20
DEF_HELPER_FLAGS_3(mve_vrev64h, TCG_CALL_NO_WG, void, env, ptr, ptr)
21
DEF_HELPER_FLAGS_3(mve_vrev64w, TCG_CALL_NO_WG, void, env, ptr, ptr)
22
+
23
+DEF_HELPER_FLAGS_3(mve_vmvn, TCG_CALL_NO_WG, void, env, ptr, ptr)
24
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/mve.decode
27
+++ b/target/arm/mve.decode
28
@@ -XXX,XX +XXX,XX @@
29
@vldst_wn ... u:1 ... . . . . l:1 . rn:3 qd:3 . ... .. imm:7 &vldr_vstr
30
31
@1op .... .... .... size:2 .. .... .... .... .... &1op qd=%qd qm=%qm
32
+@1op_nosz .... .... .... .... .... .... .... .... &1op qd=%qd qm=%qm size=0
33
34
# Vector loads and stores
35
36
@@ -XXX,XX +XXX,XX @@ VCLZ 1111 1111 1 . 11 .. 00 ... 0 0100 11 . 0 ... 0 @1op
37
VREV16 1111 1111 1 . 11 .. 00 ... 0 0001 01 . 0 ... 0 @1op
38
VREV32 1111 1111 1 . 11 .. 00 ... 0 0000 11 . 0 ... 0 @1op
39
VREV64 1111 1111 1 . 11 .. 00 ... 0 0000 01 . 0 ... 0 @1op
40
+
41
+VMVN 1111 1111 1 . 11 00 00 ... 0 0101 11 . 0 ... 0 @1op_nosz
42
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/mve_helper.c
45
+++ b/target/arm/mve_helper.c
46
@@ -XXX,XX +XXX,XX @@ DO_1OP(vrev32h, 4, uint32_t, hswap32)
47
DO_1OP(vrev64b, 8, uint64_t, bswap64)
48
DO_1OP(vrev64h, 8, uint64_t, hswap64)
49
DO_1OP(vrev64w, 8, uint64_t, wswap64)
50
+
51
+#define DO_NOT(N) (~(N))
52
+
53
+DO_1OP(vmvn, 8, uint64_t, DO_NOT)
54
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
55
index XXXXXXX..XXXXXXX 100644
56
--- a/target/arm/translate-mve.c
57
+++ b/target/arm/translate-mve.c
58
@@ -XXX,XX +XXX,XX @@ static bool trans_VREV64(DisasContext *s, arg_1op *a)
59
};
60
return do_1op(s, a, fns[a->size]);
61
}
62
+
63
+static bool trans_VMVN(DisasContext *s, arg_1op *a)
64
+{
65
+ return do_1op(s, a, gen_helper_mve_vmvn);
66
+}
67
--
68
2.20.1
69
70
diff view generated by jsdifflib
Deleted patch
1
Implement the MVE VABS functions (both integer and floating point).
2
1
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20210617121628.20116-8-peter.maydell@linaro.org
6
---
7
target/arm/helper-mve.h | 6 ++++++
8
target/arm/mve.decode | 3 +++
9
target/arm/mve_helper.c | 13 +++++++++++++
10
target/arm/translate-mve.c | 15 +++++++++++++++
11
4 files changed, 37 insertions(+)
12
13
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/helper-mve.h
16
+++ b/target/arm/helper-mve.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(mve_vrev64h, TCG_CALL_NO_WG, void, env, ptr, ptr)
18
DEF_HELPER_FLAGS_3(mve_vrev64w, TCG_CALL_NO_WG, void, env, ptr, ptr)
19
20
DEF_HELPER_FLAGS_3(mve_vmvn, TCG_CALL_NO_WG, void, env, ptr, ptr)
21
+
22
+DEF_HELPER_FLAGS_3(mve_vabsb, TCG_CALL_NO_WG, void, env, ptr, ptr)
23
+DEF_HELPER_FLAGS_3(mve_vabsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
24
+DEF_HELPER_FLAGS_3(mve_vabsw, TCG_CALL_NO_WG, void, env, ptr, ptr)
25
+DEF_HELPER_FLAGS_3(mve_vfabsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
26
+DEF_HELPER_FLAGS_3(mve_vfabss, TCG_CALL_NO_WG, void, env, ptr, ptr)
27
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/mve.decode
30
+++ b/target/arm/mve.decode
31
@@ -XXX,XX +XXX,XX @@ VREV32 1111 1111 1 . 11 .. 00 ... 0 0000 11 . 0 ... 0 @1op
32
VREV64 1111 1111 1 . 11 .. 00 ... 0 0000 01 . 0 ... 0 @1op
33
34
VMVN 1111 1111 1 . 11 00 00 ... 0 0101 11 . 0 ... 0 @1op_nosz
35
+
36
+VABS 1111 1111 1 . 11 .. 01 ... 0 0011 01 . 0 ... 0 @1op
37
+VABS_fp 1111 1111 1 . 11 .. 01 ... 0 0111 01 . 0 ... 0 @1op
38
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/mve_helper.c
41
+++ b/target/arm/mve_helper.c
42
@@ -XXX,XX +XXX,XX @@
43
#include "exec/helper-proto.h"
44
#include "exec/cpu_ldst.h"
45
#include "exec/exec-all.h"
46
+#include "tcg/tcg.h"
47
48
static uint16_t mve_element_mask(CPUARMState *env)
49
{
50
@@ -XXX,XX +XXX,XX @@ DO_1OP(vrev64w, 8, uint64_t, wswap64)
51
#define DO_NOT(N) (~(N))
52
53
DO_1OP(vmvn, 8, uint64_t, DO_NOT)
54
+
55
+#define DO_ABS(N) ((N) < 0 ? -(N) : (N))
56
+#define DO_FABSH(N) ((N) & dup_const(MO_16, 0x7fff))
57
+#define DO_FABSS(N) ((N) & dup_const(MO_32, 0x7fffffff))
58
+
59
+DO_1OP(vabsb, 1, int8_t, DO_ABS)
60
+DO_1OP(vabsh, 2, int16_t, DO_ABS)
61
+DO_1OP(vabsw, 4, int32_t, DO_ABS)
62
+
63
+/* We can do these 64 bits at a time */
64
+DO_1OP(vfabsh, 8, uint64_t, DO_FABSH)
65
+DO_1OP(vfabss, 8, uint64_t, DO_FABSS)
66
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/target/arm/translate-mve.c
69
+++ b/target/arm/translate-mve.c
70
@@ -XXX,XX +XXX,XX @@ static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
71
72
DO_1OP(VCLZ, vclz)
73
DO_1OP(VCLS, vcls)
74
+DO_1OP(VABS, vabs)
75
76
static bool trans_VREV16(DisasContext *s, arg_1op *a)
77
{
78
@@ -XXX,XX +XXX,XX @@ static bool trans_VMVN(DisasContext *s, arg_1op *a)
79
{
80
return do_1op(s, a, gen_helper_mve_vmvn);
81
}
82
+
83
+static bool trans_VABS_fp(DisasContext *s, arg_1op *a)
84
+{
85
+ static MVEGenOneOpFn * const fns[] = {
86
+ NULL,
87
+ gen_helper_mve_vfabsh,
88
+ gen_helper_mve_vfabss,
89
+ NULL,
90
+ };
91
+ if (!dc_isar_feature(aa32_mve_fp, s)) {
92
+ return false;
93
+ }
94
+ return do_1op(s, a, fns[a->size]);
95
+}
96
--
97
2.20.1
98
99
diff view generated by jsdifflib
Deleted patch
1
Implement the MVE VNEG insn (both integer and floating point forms).
2
1
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20210617121628.20116-9-peter.maydell@linaro.org
6
---
7
target/arm/helper-mve.h | 6 ++++++
8
target/arm/mve.decode | 2 ++
9
target/arm/mve_helper.c | 12 ++++++++++++
10
target/arm/translate-mve.c | 15 +++++++++++++++
11
4 files changed, 35 insertions(+)
12
13
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/helper-mve.h
16
+++ b/target/arm/helper-mve.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(mve_vabsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
18
DEF_HELPER_FLAGS_3(mve_vabsw, TCG_CALL_NO_WG, void, env, ptr, ptr)
19
DEF_HELPER_FLAGS_3(mve_vfabsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
20
DEF_HELPER_FLAGS_3(mve_vfabss, TCG_CALL_NO_WG, void, env, ptr, ptr)
21
+
22
+DEF_HELPER_FLAGS_3(mve_vnegb, TCG_CALL_NO_WG, void, env, ptr, ptr)
23
+DEF_HELPER_FLAGS_3(mve_vnegh, TCG_CALL_NO_WG, void, env, ptr, ptr)
24
+DEF_HELPER_FLAGS_3(mve_vnegw, TCG_CALL_NO_WG, void, env, ptr, ptr)
25
+DEF_HELPER_FLAGS_3(mve_vfnegh, TCG_CALL_NO_WG, void, env, ptr, ptr)
26
+DEF_HELPER_FLAGS_3(mve_vfnegs, TCG_CALL_NO_WG, void, env, ptr, ptr)
27
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/mve.decode
30
+++ b/target/arm/mve.decode
31
@@ -XXX,XX +XXX,XX @@ VMVN 1111 1111 1 . 11 00 00 ... 0 0101 11 . 0 ... 0 @1op_nosz
32
33
VABS 1111 1111 1 . 11 .. 01 ... 0 0011 01 . 0 ... 0 @1op
34
VABS_fp 1111 1111 1 . 11 .. 01 ... 0 0111 01 . 0 ... 0 @1op
35
+VNEG 1111 1111 1 . 11 .. 01 ... 0 0011 11 . 0 ... 0 @1op
36
+VNEG_fp 1111 1111 1 . 11 .. 01 ... 0 0111 11 . 0 ... 0 @1op
37
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/target/arm/mve_helper.c
40
+++ b/target/arm/mve_helper.c
41
@@ -XXX,XX +XXX,XX @@ DO_1OP(vabsw, 4, int32_t, DO_ABS)
42
/* We can do these 64 bits at a time */
43
DO_1OP(vfabsh, 8, uint64_t, DO_FABSH)
44
DO_1OP(vfabss, 8, uint64_t, DO_FABSS)
45
+
46
+#define DO_NEG(N) (-(N))
47
+#define DO_FNEGH(N) ((N) ^ dup_const(MO_16, 0x8000))
48
+#define DO_FNEGS(N) ((N) ^ dup_const(MO_32, 0x80000000))
49
+
50
+DO_1OP(vnegb, 1, int8_t, DO_NEG)
51
+DO_1OP(vnegh, 2, int16_t, DO_NEG)
52
+DO_1OP(vnegw, 4, int32_t, DO_NEG)
53
+
54
+/* We can do these 64 bits at a time */
55
+DO_1OP(vfnegh, 8, uint64_t, DO_FNEGH)
56
+DO_1OP(vfnegs, 8, uint64_t, DO_FNEGS)
57
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/target/arm/translate-mve.c
60
+++ b/target/arm/translate-mve.c
61
@@ -XXX,XX +XXX,XX @@ static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
62
DO_1OP(VCLZ, vclz)
63
DO_1OP(VCLS, vcls)
64
DO_1OP(VABS, vabs)
65
+DO_1OP(VNEG, vneg)
66
67
static bool trans_VREV16(DisasContext *s, arg_1op *a)
68
{
69
@@ -XXX,XX +XXX,XX @@ static bool trans_VABS_fp(DisasContext *s, arg_1op *a)
70
}
71
return do_1op(s, a, fns[a->size]);
72
}
73
+
74
+static bool trans_VNEG_fp(DisasContext *s, arg_1op *a)
75
+{
76
+ static MVEGenOneOpFn * const fns[] = {
77
+ NULL,
78
+ gen_helper_mve_vfnegh,
79
+ gen_helper_mve_vfnegs,
80
+ NULL,
81
+ };
82
+ if (!dc_isar_feature(aa32_mve_fp, s)) {
83
+ return false;
84
+ }
85
+ return do_1op(s, a, fns[a->size]);
86
+}
87
--
88
2.20.1
89
90
diff view generated by jsdifflib
Deleted patch
1
Implement the MVE VDUP insn, which duplicates a value from
2
a general-purpose register into every lane of a vector
3
register (subject to predication).
4
1
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20210617121628.20116-11-peter.maydell@linaro.org
8
---
9
target/arm/helper-mve.h | 2 ++
10
target/arm/mve.decode | 10 ++++++++++
11
target/arm/mve_helper.c | 16 ++++++++++++++++
12
target/arm/translate-mve.c | 27 +++++++++++++++++++++++++++
13
4 files changed, 55 insertions(+)
14
15
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/helper-mve.h
18
+++ b/target/arm/helper-mve.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(mve_vstrb_h, TCG_CALL_NO_WG, void, env, ptr, i32)
20
DEF_HELPER_FLAGS_3(mve_vstrb_w, TCG_CALL_NO_WG, void, env, ptr, i32)
21
DEF_HELPER_FLAGS_3(mve_vstrh_w, TCG_CALL_NO_WG, void, env, ptr, i32)
22
23
+DEF_HELPER_FLAGS_3(mve_vdup, TCG_CALL_NO_WG, void, env, ptr, i32)
24
+
25
DEF_HELPER_FLAGS_3(mve_vclsb, TCG_CALL_NO_WG, void, env, ptr, ptr)
26
DEF_HELPER_FLAGS_3(mve_vclsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
27
DEF_HELPER_FLAGS_3(mve_vclsw, TCG_CALL_NO_WG, void, env, ptr, ptr)
28
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/mve.decode
31
+++ b/target/arm/mve.decode
32
@@ -XXX,XX +XXX,XX @@
33
34
%qd 22:1 13:3
35
%qm 5:1 1:3
36
+%qn 7:1 17:3
37
38
&vldr_vstr rn qd imm p a w size l u
39
&1op qd qm size
40
@@ -XXX,XX +XXX,XX @@ VABS 1111 1111 1 . 11 .. 01 ... 0 0011 01 . 0 ... 0 @1op
41
VABS_fp 1111 1111 1 . 11 .. 01 ... 0 0111 01 . 0 ... 0 @1op
42
VNEG 1111 1111 1 . 11 .. 01 ... 0 0011 11 . 0 ... 0 @1op
43
VNEG_fp 1111 1111 1 . 11 .. 01 ... 0 0111 11 . 0 ... 0 @1op
44
+
45
+&vdup qd rt size
46
+# Qd is in the fields usually named Qn
47
+@vdup .... .... . . .. ... . rt:4 .... . . . . .... qd=%qn &vdup
48
+
49
+# B and E bits encode size, which we decode here to the usual size values
50
+VDUP 1110 1110 1 1 10 ... 0 .... 1011 . 0 0 1 0000 @vdup size=0
51
+VDUP 1110 1110 1 0 10 ... 0 .... 1011 . 0 1 1 0000 @vdup size=1
52
+VDUP 1110 1110 1 0 10 ... 0 .... 1011 . 0 0 1 0000 @vdup size=2
53
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
54
index XXXXXXX..XXXXXXX 100644
55
--- a/target/arm/mve_helper.c
56
+++ b/target/arm/mve_helper.c
57
@@ -XXX,XX +XXX,XX @@ static void mergemask_sq(int64_t *d, int64_t r, uint16_t mask)
58
uint64_t *: mergemask_uq, \
59
int64_t *: mergemask_sq)(D, R, M)
60
61
+void HELPER(mve_vdup)(CPUARMState *env, void *vd, uint32_t val)
62
+{
63
+ /*
64
+ * The generated code already replicated an 8 or 16 bit constant
65
+ * into the 32-bit value, so we only need to write the 32-bit
66
+ * value to all elements of the Qreg, allowing for predication.
67
+ */
68
+ uint32_t *d = vd;
69
+ uint16_t mask = mve_element_mask(env);
70
+ unsigned e;
71
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) {
72
+ mergemask(&d[H4(e)], val, mask);
73
+ }
74
+ mve_advance_vpt(env);
75
+}
76
+
77
#define DO_1OP(OP, ESIZE, TYPE, FN) \
78
void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm) \
79
{ \
80
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
81
index XXXXXXX..XXXXXXX 100644
82
--- a/target/arm/translate-mve.c
83
+++ b/target/arm/translate-mve.c
84
@@ -XXX,XX +XXX,XX @@ DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h)
85
DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w)
86
DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w)
87
88
+static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
89
+{
90
+ TCGv_ptr qd;
91
+ TCGv_i32 rt;
92
+
93
+ if (!dc_isar_feature(aa32_mve, s) ||
94
+ !mve_check_qreg_bank(s, a->qd)) {
95
+ return false;
96
+ }
97
+ if (a->rt == 13 || a->rt == 15) {
98
+ /* UNPREDICTABLE; we choose to UNDEF */
99
+ return false;
100
+ }
101
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
102
+ return true;
103
+ }
104
+
105
+ qd = mve_qreg_ptr(a->qd);
106
+ rt = load_reg(s, a->rt);
107
+ tcg_gen_dup_i32(a->size, rt, rt);
108
+ gen_helper_mve_vdup(cpu_env, qd, rt);
109
+ tcg_temp_free_ptr(qd);
110
+ tcg_temp_free_i32(rt);
111
+ mve_update_eci(s);
112
+ return true;
113
+}
114
+
115
static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
116
{
117
TCGv_ptr qd, qm;
118
--
119
2.20.1
120
121
diff view generated by jsdifflib
Deleted patch
1
Implement the MVE vector logical operations operating
2
on two registers.
3
1
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20210617121628.20116-12-peter.maydell@linaro.org
7
---
8
target/arm/helper-mve.h | 6 ++++++
9
target/arm/mve.decode | 9 +++++++++
10
target/arm/mve_helper.c | 26 ++++++++++++++++++++++++++
11
target/arm/translate-mve.c | 37 +++++++++++++++++++++++++++++++++++++
12
4 files changed, 78 insertions(+)
13
14
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-mve.h
17
+++ b/target/arm/helper-mve.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(mve_vnegh, TCG_CALL_NO_WG, void, env, ptr, ptr)
19
DEF_HELPER_FLAGS_3(mve_vnegw, TCG_CALL_NO_WG, void, env, ptr, ptr)
20
DEF_HELPER_FLAGS_3(mve_vfnegh, TCG_CALL_NO_WG, void, env, ptr, ptr)
21
DEF_HELPER_FLAGS_3(mve_vfnegs, TCG_CALL_NO_WG, void, env, ptr, ptr)
22
+
23
+DEF_HELPER_FLAGS_4(mve_vand, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
24
+DEF_HELPER_FLAGS_4(mve_vbic, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
25
+DEF_HELPER_FLAGS_4(mve_vorr, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
26
+DEF_HELPER_FLAGS_4(mve_vorn, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
27
+DEF_HELPER_FLAGS_4(mve_veor, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
28
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/mve.decode
31
+++ b/target/arm/mve.decode
32
@@ -XXX,XX +XXX,XX @@
33
34
&vldr_vstr rn qd imm p a w size l u
35
&1op qd qm size
36
+&2op qd qm qn size
37
38
@vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd u=0
39
# Note that both Rn and Qd are 3 bits only (no D bit)
40
@@ -XXX,XX +XXX,XX @@
41
42
@1op .... .... .... size:2 .. .... .... .... .... &1op qd=%qd qm=%qm
43
@1op_nosz .... .... .... .... .... .... .... .... &1op qd=%qd qm=%qm size=0
44
+@2op_nosz .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn size=0
45
46
# Vector loads and stores
47
48
@@ -XXX,XX +XXX,XX @@ VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111101 ....... @vldr_vstr \
49
VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111110 ....... @vldr_vstr \
50
size=2 p=1
51
52
+# Vector 2-op
53
+VAND 1110 1111 0 . 00 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
54
+VBIC 1110 1111 0 . 01 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
55
+VORR 1110 1111 0 . 10 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
56
+VORN 1110 1111 0 . 11 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
57
+VEOR 1111 1111 0 . 00 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
58
+
59
# Vector miscellaneous
60
61
VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
62
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/arm/mve_helper.c
65
+++ b/target/arm/mve_helper.c
66
@@ -XXX,XX +XXX,XX @@ DO_1OP(vnegw, 4, int32_t, DO_NEG)
67
/* We can do these 64 bits at a time */
68
DO_1OP(vfnegh, 8, uint64_t, DO_FNEGH)
69
DO_1OP(vfnegs, 8, uint64_t, DO_FNEGS)
70
+
71
+#define DO_2OP(OP, ESIZE, TYPE, FN) \
72
+ void HELPER(glue(mve_, OP))(CPUARMState *env, \
73
+ void *vd, void *vn, void *vm) \
74
+ { \
75
+ TYPE *d = vd, *n = vn, *m = vm; \
76
+ uint16_t mask = mve_element_mask(env); \
77
+ unsigned e; \
78
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
79
+ mergemask(&d[H##ESIZE(e)], \
80
+ FN(n[H##ESIZE(e)], m[H##ESIZE(e)]), mask); \
81
+ } \
82
+ mve_advance_vpt(env); \
83
+ }
84
+
85
+#define DO_AND(N, M) ((N) & (M))
86
+#define DO_BIC(N, M) ((N) & ~(M))
87
+#define DO_ORR(N, M) ((N) | (M))
88
+#define DO_ORN(N, M) ((N) | ~(M))
89
+#define DO_EOR(N, M) ((N) ^ (M))
90
+
91
+DO_2OP(vand, 8, uint64_t, DO_AND)
92
+DO_2OP(vbic, 8, uint64_t, DO_BIC)
93
+DO_2OP(vorr, 8, uint64_t, DO_ORR)
94
+DO_2OP(vorn, 8, uint64_t, DO_ORN)
95
+DO_2OP(veor, 8, uint64_t, DO_EOR)
96
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
97
index XXXXXXX..XXXXXXX 100644
98
--- a/target/arm/translate-mve.c
99
+++ b/target/arm/translate-mve.c
100
@@ -XXX,XX +XXX,XX @@
101
102
typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
103
typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
104
+typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
105
106
/* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
107
static inline long mve_qreg_offset(unsigned reg)
108
@@ -XXX,XX +XXX,XX @@ static bool trans_VNEG_fp(DisasContext *s, arg_1op *a)
109
}
110
return do_1op(s, a, fns[a->size]);
111
}
112
+
113
+static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn)
114
+{
115
+ TCGv_ptr qd, qn, qm;
116
+
117
+ if (!dc_isar_feature(aa32_mve, s) ||
118
+ !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) ||
119
+ !fn) {
120
+ return false;
121
+ }
122
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
123
+ return true;
124
+ }
125
+
126
+ qd = mve_qreg_ptr(a->qd);
127
+ qn = mve_qreg_ptr(a->qn);
128
+ qm = mve_qreg_ptr(a->qm);
129
+ fn(cpu_env, qd, qn, qm);
130
+ tcg_temp_free_ptr(qd);
131
+ tcg_temp_free_ptr(qn);
132
+ tcg_temp_free_ptr(qm);
133
+ mve_update_eci(s);
134
+ return true;
135
+}
136
+
137
+#define DO_LOGIC(INSN, HELPER) \
138
+ static bool trans_##INSN(DisasContext *s, arg_2op *a) \
139
+ { \
140
+ return do_2op(s, a, HELPER); \
141
+ }
142
+
143
+DO_LOGIC(VAND, gen_helper_mve_vand)
144
+DO_LOGIC(VBIC, gen_helper_mve_vbic)
145
+DO_LOGIC(VORR, gen_helper_mve_vorr)
146
+DO_LOGIC(VORN, gen_helper_mve_vorn)
147
+DO_LOGIC(VEOR, gen_helper_mve_veor)
148
--
149
2.20.1
150
151
diff view generated by jsdifflib
1
Implement the MVE VADD, VSUB and VMUL insns.
1
Add the GPIO, watchdog, dual-timer and I2C devices to the mps3-an536
2
board. These are all simple devices that just need to be created and
3
wired up.
2
4
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-id: 20210617121628.20116-13-peter.maydell@linaro.org
7
Message-id: 20240206132931.38376-12-peter.maydell@linaro.org
6
---
8
---
7
target/arm/helper-mve.h | 12 ++++++++++++
9
hw/arm/mps3r.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++
8
target/arm/mve.decode | 5 +++++
10
1 file changed, 59 insertions(+)
9
target/arm/mve_helper.c | 14 ++++++++++++++
10
target/arm/translate-mve.c | 16 ++++++++++++++++
11
4 files changed, 47 insertions(+)
12
11
13
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
12
diff --git a/hw/arm/mps3r.c b/hw/arm/mps3r.c
14
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/helper-mve.h
14
--- a/hw/arm/mps3r.c
16
+++ b/target/arm/helper-mve.h
15
+++ b/hw/arm/mps3r.c
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vbic, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
16
@@ -XXX,XX +XXX,XX @@
18
DEF_HELPER_FLAGS_4(mve_vorr, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
17
#include "sysemu/sysemu.h"
19
DEF_HELPER_FLAGS_4(mve_vorn, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
18
#include "hw/boards.h"
20
DEF_HELPER_FLAGS_4(mve_veor, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
19
#include "hw/or-irq.h"
20
+#include "hw/qdev-clock.h"
21
#include "hw/qdev-properties.h"
22
#include "hw/arm/boot.h"
23
#include "hw/arm/bsa.h"
24
#include "hw/char/cmsdk-apb-uart.h"
25
+#include "hw/i2c/arm_sbcon_i2c.h"
26
#include "hw/intc/arm_gicv3.h"
27
+#include "hw/misc/unimp.h"
28
+#include "hw/timer/cmsdk-apb-dualtimer.h"
29
+#include "hw/watchdog/cmsdk-apb-watchdog.h"
30
31
/* Define the layout of RAM and ROM in a board */
32
typedef struct RAMInfo {
33
@@ -XXX,XX +XXX,XX @@ struct MPS3RMachineState {
34
CMSDKAPBUART uart[MPS3R_CPU_MAX + MPS3R_UART_MAX];
35
OrIRQState cpu_uart_oflow[MPS3R_CPU_MAX];
36
OrIRQState uart_oflow;
37
+ CMSDKAPBWatchdog watchdog;
38
+ CMSDKAPBDualTimer dualtimer;
39
+ ArmSbconI2CState i2c[5];
40
+ Clock *clk;
41
};
42
43
#define TYPE_MPS3R_MACHINE "mps3r"
44
@@ -XXX,XX +XXX,XX @@ static void mps3r_common_init(MachineState *machine)
45
MemoryRegion *sysmem = get_system_memory();
46
DeviceState *gicdev;
47
48
+ mms->clk = clock_new(OBJECT(machine), "CLK");
49
+ clock_set_hz(mms->clk, CLK_FRQ);
21
+
50
+
22
+DEF_HELPER_FLAGS_4(mve_vaddb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
51
for (const RAMInfo *ri = mmc->raminfo; ri->name; ri++) {
23
+DEF_HELPER_FLAGS_4(mve_vaddh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
52
MemoryRegion *mr = mr_for_raminfo(mms, ri);
24
+DEF_HELPER_FLAGS_4(mve_vaddw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
53
memory_region_add_subregion(sysmem, ri->base, mr);
25
+
54
@@ -XXX,XX +XXX,XX @@ static void mps3r_common_init(MachineState *machine)
26
+DEF_HELPER_FLAGS_4(mve_vsubb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
55
qdev_get_gpio_in(gicdev, combirq));
27
+DEF_HELPER_FLAGS_4(mve_vsubh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
28
+DEF_HELPER_FLAGS_4(mve_vsubw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
29
+
30
+DEF_HELPER_FLAGS_4(mve_vmulb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
31
+DEF_HELPER_FLAGS_4(mve_vmulh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
32
+DEF_HELPER_FLAGS_4(mve_vmulw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
33
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
34
index XXXXXXX..XXXXXXX 100644
35
--- a/target/arm/mve.decode
36
+++ b/target/arm/mve.decode
37
@@ -XXX,XX +XXX,XX @@
38
39
@1op .... .... .... size:2 .. .... .... .... .... &1op qd=%qd qm=%qm
40
@1op_nosz .... .... .... .... .... .... .... .... &1op qd=%qd qm=%qm size=0
41
+@2op .... .... .. size:2 .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn
42
@2op_nosz .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn size=0
43
44
# Vector loads and stores
45
@@ -XXX,XX +XXX,XX @@ VORR 1110 1111 0 . 10 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
46
VORN 1110 1111 0 . 11 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
47
VEOR 1111 1111 0 . 00 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
48
49
+VADD 1110 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op
50
+VSUB 1111 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op
51
+VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op
52
+
53
# Vector miscellaneous
54
55
VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
56
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/target/arm/mve_helper.c
59
+++ b/target/arm/mve_helper.c
60
@@ -XXX,XX +XXX,XX @@ DO_1OP(vfnegs, 8, uint64_t, DO_FNEGS)
61
mve_advance_vpt(env); \
62
}
56
}
63
57
64
+/* provide unsigned 2-op helpers for all sizes */
58
+ for (int i = 0; i < 4; i++) {
65
+#define DO_2OP_U(OP, FN) \
59
+ /* CMSDK GPIO controllers */
66
+ DO_2OP(OP##b, 1, uint8_t, FN) \
60
+ g_autofree char *s = g_strdup_printf("gpio%d", i);
67
+ DO_2OP(OP##h, 2, uint16_t, FN) \
61
+ create_unimplemented_device(s, 0xe0000000 + i * 0x1000, 0x1000);
68
+ DO_2OP(OP##w, 4, uint32_t, FN)
69
+
70
#define DO_AND(N, M) ((N) & (M))
71
#define DO_BIC(N, M) ((N) & ~(M))
72
#define DO_ORR(N, M) ((N) | (M))
73
@@ -XXX,XX +XXX,XX @@ DO_2OP(vbic, 8, uint64_t, DO_BIC)
74
DO_2OP(vorr, 8, uint64_t, DO_ORR)
75
DO_2OP(vorn, 8, uint64_t, DO_ORN)
76
DO_2OP(veor, 8, uint64_t, DO_EOR)
77
+
78
+#define DO_ADD(N, M) ((N) + (M))
79
+#define DO_SUB(N, M) ((N) - (M))
80
+#define DO_MUL(N, M) ((N) * (M))
81
+
82
+DO_2OP_U(vadd, DO_ADD)
83
+DO_2OP_U(vsub, DO_SUB)
84
+DO_2OP_U(vmul, DO_MUL)
85
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
86
index XXXXXXX..XXXXXXX 100644
87
--- a/target/arm/translate-mve.c
88
+++ b/target/arm/translate-mve.c
89
@@ -XXX,XX +XXX,XX @@ DO_LOGIC(VBIC, gen_helper_mve_vbic)
90
DO_LOGIC(VORR, gen_helper_mve_vorr)
91
DO_LOGIC(VORN, gen_helper_mve_vorn)
92
DO_LOGIC(VEOR, gen_helper_mve_veor)
93
+
94
+#define DO_2OP(INSN, FN) \
95
+ static bool trans_##INSN(DisasContext *s, arg_2op *a) \
96
+ { \
97
+ static MVEGenTwoOpFn * const fns[] = { \
98
+ gen_helper_mve_##FN##b, \
99
+ gen_helper_mve_##FN##h, \
100
+ gen_helper_mve_##FN##w, \
101
+ NULL, \
102
+ }; \
103
+ return do_2op(s, a, fns[a->size]); \
104
+ }
62
+ }
105
+
63
+
106
+DO_2OP(VADD, vadd)
64
+ object_initialize_child(OBJECT(mms), "watchdog", &mms->watchdog,
107
+DO_2OP(VSUB, vsub)
65
+ TYPE_CMSDK_APB_WATCHDOG);
108
+DO_2OP(VMUL, vmul)
66
+ qdev_connect_clock_in(DEVICE(&mms->watchdog), "WDOGCLK", mms->clk);
67
+ sysbus_realize(SYS_BUS_DEVICE(&mms->watchdog), &error_fatal);
68
+ sysbus_connect_irq(SYS_BUS_DEVICE(&mms->watchdog), 0,
69
+ qdev_get_gpio_in(gicdev, 0));
70
+ sysbus_mmio_map(SYS_BUS_DEVICE(&mms->watchdog), 0, 0xe0100000);
71
+
72
+ object_initialize_child(OBJECT(mms), "dualtimer", &mms->dualtimer,
73
+ TYPE_CMSDK_APB_DUALTIMER);
74
+ qdev_connect_clock_in(DEVICE(&mms->dualtimer), "TIMCLK", mms->clk);
75
+ sysbus_realize(SYS_BUS_DEVICE(&mms->dualtimer), &error_fatal);
76
+ sysbus_connect_irq(SYS_BUS_DEVICE(&mms->dualtimer), 0,
77
+ qdev_get_gpio_in(gicdev, 3));
78
+ sysbus_connect_irq(SYS_BUS_DEVICE(&mms->dualtimer), 1,
79
+ qdev_get_gpio_in(gicdev, 1));
80
+ sysbus_connect_irq(SYS_BUS_DEVICE(&mms->dualtimer), 2,
81
+ qdev_get_gpio_in(gicdev, 2));
82
+ sysbus_mmio_map(SYS_BUS_DEVICE(&mms->dualtimer), 0, 0xe0101000);
83
+
84
+ for (int i = 0; i < ARRAY_SIZE(mms->i2c); i++) {
85
+ static const hwaddr i2cbase[] = {0xe0102000, /* Touch */
86
+ 0xe0103000, /* Audio */
87
+ 0xe0107000, /* Shield0 */
88
+ 0xe0108000, /* Shield1 */
89
+ 0xe0109000}; /* DDR4 EEPROM */
90
+ g_autofree char *s = g_strdup_printf("i2c%d", i);
91
+
92
+ object_initialize_child(OBJECT(mms), s, &mms->i2c[i],
93
+ TYPE_ARM_SBCON_I2C);
94
+ sysbus_realize(SYS_BUS_DEVICE(&mms->i2c[i]), &error_fatal);
95
+ sysbus_mmio_map(SYS_BUS_DEVICE(&mms->i2c[i]), 0, i2cbase[i]);
96
+ if (i != 2 && i != 3) {
97
+ /*
98
+ * internal-only bus: mark it full to avoid user-created
99
+ * i2c devices being plugged into it.
100
+ */
101
+ qbus_mark_full(qdev_get_child_bus(DEVICE(&mms->i2c[i]), "i2c"));
102
+ }
103
+ }
104
+
105
mms->bootinfo.ram_size = machine->ram_size;
106
mms->bootinfo.board_id = -1;
107
mms->bootinfo.loader_start = mmc->loader_start;
109
--
108
--
110
2.20.1
109
2.34.1
111
110
112
111
diff view generated by jsdifflib
Deleted patch
1
Implement the MVE VMULH insn, which performs a vector
2
multiply and returns the high half of the result.
3
1
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20210617121628.20116-14-peter.maydell@linaro.org
7
---
8
target/arm/helper-mve.h | 7 +++++++
9
target/arm/mve.decode | 3 +++
10
target/arm/mve_helper.c | 26 ++++++++++++++++++++++++++
11
target/arm/translate-mve.c | 2 ++
12
4 files changed, 38 insertions(+)
13
14
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-mve.h
17
+++ b/target/arm/helper-mve.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vsubw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
19
DEF_HELPER_FLAGS_4(mve_vmulb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
20
DEF_HELPER_FLAGS_4(mve_vmulh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
21
DEF_HELPER_FLAGS_4(mve_vmulw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
22
+
23
+DEF_HELPER_FLAGS_4(mve_vmulhsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
24
+DEF_HELPER_FLAGS_4(mve_vmulhsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
25
+DEF_HELPER_FLAGS_4(mve_vmulhsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
26
+DEF_HELPER_FLAGS_4(mve_vmulhub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
27
+DEF_HELPER_FLAGS_4(mve_vmulhuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
28
+DEF_HELPER_FLAGS_4(mve_vmulhuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
29
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/mve.decode
32
+++ b/target/arm/mve.decode
33
@@ -XXX,XX +XXX,XX @@ VADD 1110 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op
34
VSUB 1111 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op
35
VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op
36
37
+VMULH_S 111 0 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
38
+VMULH_U 111 1 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
39
+
40
# Vector miscellaneous
41
42
VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
43
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/arm/mve_helper.c
46
+++ b/target/arm/mve_helper.c
47
@@ -XXX,XX +XXX,XX @@ DO_2OP(veor, 8, uint64_t, DO_EOR)
48
DO_2OP_U(vadd, DO_ADD)
49
DO_2OP_U(vsub, DO_SUB)
50
DO_2OP_U(vmul, DO_MUL)
51
+
52
+/*
53
+ * Because the computation type is at least twice as large as required,
54
+ * these work for both signed and unsigned source types.
55
+ */
56
+static inline uint8_t do_mulh_b(int32_t n, int32_t m)
57
+{
58
+ return (n * m) >> 8;
59
+}
60
+
61
+static inline uint16_t do_mulh_h(int32_t n, int32_t m)
62
+{
63
+ return (n * m) >> 16;
64
+}
65
+
66
+static inline uint32_t do_mulh_w(int64_t n, int64_t m)
67
+{
68
+ return (n * m) >> 32;
69
+}
70
+
71
+DO_2OP(vmulhsb, 1, int8_t, do_mulh_b)
72
+DO_2OP(vmulhsh, 2, int16_t, do_mulh_h)
73
+DO_2OP(vmulhsw, 4, int32_t, do_mulh_w)
74
+DO_2OP(vmulhub, 1, uint8_t, do_mulh_b)
75
+DO_2OP(vmulhuh, 2, uint16_t, do_mulh_h)
76
+DO_2OP(vmulhuw, 4, uint32_t, do_mulh_w)
77
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
78
index XXXXXXX..XXXXXXX 100644
79
--- a/target/arm/translate-mve.c
80
+++ b/target/arm/translate-mve.c
81
@@ -XXX,XX +XXX,XX @@ DO_LOGIC(VEOR, gen_helper_mve_veor)
82
DO_2OP(VADD, vadd)
83
DO_2OP(VSUB, vsub)
84
DO_2OP(VMUL, vmul)
85
+DO_2OP(VMULH_S, vmulhs)
86
+DO_2OP(VMULH_U, vmulhu)
87
--
88
2.20.1
89
90
diff view generated by jsdifflib
Deleted patch
1
Implement the MVE VRMULH insn, which performs a rounding multiply
2
and then returns the high half.
3
1
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20210617121628.20116-15-peter.maydell@linaro.org
7
---
8
target/arm/helper-mve.h | 7 +++++++
9
target/arm/mve.decode | 3 +++
10
target/arm/mve_helper.c | 22 ++++++++++++++++++++++
11
target/arm/translate-mve.c | 2 ++
12
4 files changed, 34 insertions(+)
13
14
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-mve.h
17
+++ b/target/arm/helper-mve.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vmulhsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
19
DEF_HELPER_FLAGS_4(mve_vmulhub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
20
DEF_HELPER_FLAGS_4(mve_vmulhuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
21
DEF_HELPER_FLAGS_4(mve_vmulhuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
22
+
23
+DEF_HELPER_FLAGS_4(mve_vrmulhsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
24
+DEF_HELPER_FLAGS_4(mve_vrmulhsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
25
+DEF_HELPER_FLAGS_4(mve_vrmulhsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
26
+DEF_HELPER_FLAGS_4(mve_vrmulhub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
27
+DEF_HELPER_FLAGS_4(mve_vrmulhuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
28
+DEF_HELPER_FLAGS_4(mve_vrmulhuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
29
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/mve.decode
32
+++ b/target/arm/mve.decode
33
@@ -XXX,XX +XXX,XX @@ VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op
34
VMULH_S 111 0 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
35
VMULH_U 111 1 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
36
37
+VRMULH_S 111 0 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
38
+VRMULH_U 111 1 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
39
+
40
# Vector miscellaneous
41
42
VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
43
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/arm/mve_helper.c
46
+++ b/target/arm/mve_helper.c
47
@@ -XXX,XX +XXX,XX @@ static inline uint32_t do_mulh_w(int64_t n, int64_t m)
48
return (n * m) >> 32;
49
}
50
51
+static inline uint8_t do_rmulh_b(int32_t n, int32_t m)
52
+{
53
+ return (n * m + (1U << 7)) >> 8;
54
+}
55
+
56
+static inline uint16_t do_rmulh_h(int32_t n, int32_t m)
57
+{
58
+ return (n * m + (1U << 15)) >> 16;
59
+}
60
+
61
+static inline uint32_t do_rmulh_w(int64_t n, int64_t m)
62
+{
63
+ return (n * m + (1U << 31)) >> 32;
64
+}
65
+
66
DO_2OP(vmulhsb, 1, int8_t, do_mulh_b)
67
DO_2OP(vmulhsh, 2, int16_t, do_mulh_h)
68
DO_2OP(vmulhsw, 4, int32_t, do_mulh_w)
69
DO_2OP(vmulhub, 1, uint8_t, do_mulh_b)
70
DO_2OP(vmulhuh, 2, uint16_t, do_mulh_h)
71
DO_2OP(vmulhuw, 4, uint32_t, do_mulh_w)
72
+
73
+DO_2OP(vrmulhsb, 1, int8_t, do_rmulh_b)
74
+DO_2OP(vrmulhsh, 2, int16_t, do_rmulh_h)
75
+DO_2OP(vrmulhsw, 4, int32_t, do_rmulh_w)
76
+DO_2OP(vrmulhub, 1, uint8_t, do_rmulh_b)
77
+DO_2OP(vrmulhuh, 2, uint16_t, do_rmulh_h)
78
+DO_2OP(vrmulhuw, 4, uint32_t, do_rmulh_w)
79
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
80
index XXXXXXX..XXXXXXX 100644
81
--- a/target/arm/translate-mve.c
82
+++ b/target/arm/translate-mve.c
83
@@ -XXX,XX +XXX,XX @@ DO_2OP(VSUB, vsub)
84
DO_2OP(VMUL, vmul)
85
DO_2OP(VMULH_S, vmulhs)
86
DO_2OP(VMULH_U, vmulhu)
87
+DO_2OP(VRMULH_S, vrmulhs)
88
+DO_2OP(VRMULH_U, vrmulhu)
89
--
90
2.20.1
91
92
diff view generated by jsdifflib
Deleted patch
1
Implement the MVE VMAX and VMIN insns.
2
1
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20210617121628.20116-16-peter.maydell@linaro.org
6
---
7
target/arm/helper-mve.h | 14 ++++++++++++++
8
target/arm/mve.decode | 5 +++++
9
target/arm/mve_helper.c | 14 ++++++++++++++
10
target/arm/translate-mve.c | 4 ++++
11
4 files changed, 37 insertions(+)
12
13
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/helper-mve.h
16
+++ b/target/arm/helper-mve.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vrmulhsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
18
DEF_HELPER_FLAGS_4(mve_vrmulhub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
19
DEF_HELPER_FLAGS_4(mve_vrmulhuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
20
DEF_HELPER_FLAGS_4(mve_vrmulhuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
21
+
22
+DEF_HELPER_FLAGS_4(mve_vmaxsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
23
+DEF_HELPER_FLAGS_4(mve_vmaxsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
24
+DEF_HELPER_FLAGS_4(mve_vmaxsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
25
+DEF_HELPER_FLAGS_4(mve_vmaxub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
26
+DEF_HELPER_FLAGS_4(mve_vmaxuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
27
+DEF_HELPER_FLAGS_4(mve_vmaxuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
28
+
29
+DEF_HELPER_FLAGS_4(mve_vminsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
30
+DEF_HELPER_FLAGS_4(mve_vminsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
31
+DEF_HELPER_FLAGS_4(mve_vminsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
32
+DEF_HELPER_FLAGS_4(mve_vminub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
33
+DEF_HELPER_FLAGS_4(mve_vminuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
34
+DEF_HELPER_FLAGS_4(mve_vminuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
35
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
36
index XXXXXXX..XXXXXXX 100644
37
--- a/target/arm/mve.decode
38
+++ b/target/arm/mve.decode
39
@@ -XXX,XX +XXX,XX @@ VMULH_U 111 1 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
40
VRMULH_S 111 0 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
41
VRMULH_U 111 1 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
42
43
+VMAX_S 111 0 1111 0 . .. ... 0 ... 0 0110 . 1 . 0 ... 0 @2op
44
+VMAX_U 111 1 1111 0 . .. ... 0 ... 0 0110 . 1 . 0 ... 0 @2op
45
+VMIN_S 111 0 1111 0 . .. ... 0 ... 0 0110 . 1 . 1 ... 0 @2op
46
+VMIN_U 111 1 1111 0 . .. ... 0 ... 0 0110 . 1 . 1 ... 0 @2op
47
+
48
# Vector miscellaneous
49
50
VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
51
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/target/arm/mve_helper.c
54
+++ b/target/arm/mve_helper.c
55
@@ -XXX,XX +XXX,XX @@ DO_1OP(vfnegs, 8, uint64_t, DO_FNEGS)
56
DO_2OP(OP##h, 2, uint16_t, FN) \
57
DO_2OP(OP##w, 4, uint32_t, FN)
58
59
+/* provide signed 2-op helpers for all sizes */
60
+#define DO_2OP_S(OP, FN) \
61
+ DO_2OP(OP##b, 1, int8_t, FN) \
62
+ DO_2OP(OP##h, 2, int16_t, FN) \
63
+ DO_2OP(OP##w, 4, int32_t, FN)
64
+
65
#define DO_AND(N, M) ((N) & (M))
66
#define DO_BIC(N, M) ((N) & ~(M))
67
#define DO_ORR(N, M) ((N) | (M))
68
@@ -XXX,XX +XXX,XX @@ DO_2OP(vrmulhsw, 4, int32_t, do_rmulh_w)
69
DO_2OP(vrmulhub, 1, uint8_t, do_rmulh_b)
70
DO_2OP(vrmulhuh, 2, uint16_t, do_rmulh_h)
71
DO_2OP(vrmulhuw, 4, uint32_t, do_rmulh_w)
72
+
73
+#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
74
+#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
75
+
76
+DO_2OP_S(vmaxs, DO_MAX)
77
+DO_2OP_U(vmaxu, DO_MAX)
78
+DO_2OP_S(vmins, DO_MIN)
79
+DO_2OP_U(vminu, DO_MIN)
80
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
81
index XXXXXXX..XXXXXXX 100644
82
--- a/target/arm/translate-mve.c
83
+++ b/target/arm/translate-mve.c
84
@@ -XXX,XX +XXX,XX @@ DO_2OP(VMULH_S, vmulhs)
85
DO_2OP(VMULH_U, vmulhu)
86
DO_2OP(VRMULH_S, vrmulhs)
87
DO_2OP(VRMULH_U, vrmulhu)
88
+DO_2OP(VMAX_S, vmaxs)
89
+DO_2OP(VMAX_U, vmaxu)
90
+DO_2OP(VMIN_S, vmins)
91
+DO_2OP(VMIN_U, vminu)
92
--
93
2.20.1
94
95
diff view generated by jsdifflib
Deleted patch
1
Implement the MVE VABD insn.
2
1
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20210617121628.20116-17-peter.maydell@linaro.org
6
---
7
target/arm/helper-mve.h | 7 +++++++
8
target/arm/mve.decode | 3 +++
9
target/arm/mve_helper.c | 5 +++++
10
target/arm/translate-mve.c | 2 ++
11
4 files changed, 17 insertions(+)
12
13
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/helper-mve.h
16
+++ b/target/arm/helper-mve.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vminsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
18
DEF_HELPER_FLAGS_4(mve_vminub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
19
DEF_HELPER_FLAGS_4(mve_vminuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
20
DEF_HELPER_FLAGS_4(mve_vminuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
21
+
22
+DEF_HELPER_FLAGS_4(mve_vabdsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
23
+DEF_HELPER_FLAGS_4(mve_vabdsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
24
+DEF_HELPER_FLAGS_4(mve_vabdsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
25
+DEF_HELPER_FLAGS_4(mve_vabdub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
26
+DEF_HELPER_FLAGS_4(mve_vabduh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
27
+DEF_HELPER_FLAGS_4(mve_vabduw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
28
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/mve.decode
31
+++ b/target/arm/mve.decode
32
@@ -XXX,XX +XXX,XX @@ VMAX_U 111 1 1111 0 . .. ... 0 ... 0 0110 . 1 . 0 ... 0 @2op
33
VMIN_S 111 0 1111 0 . .. ... 0 ... 0 0110 . 1 . 1 ... 0 @2op
34
VMIN_U 111 1 1111 0 . .. ... 0 ... 0 0110 . 1 . 1 ... 0 @2op
35
36
+VABD_S 111 0 1111 0 . .. ... 0 ... 0 0111 . 1 . 0 ... 0 @2op
37
+VABD_U 111 1 1111 0 . .. ... 0 ... 0 0111 . 1 . 0 ... 0 @2op
38
+
39
# Vector miscellaneous
40
41
VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
42
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/mve_helper.c
45
+++ b/target/arm/mve_helper.c
46
@@ -XXX,XX +XXX,XX @@ DO_2OP_S(vmaxs, DO_MAX)
47
DO_2OP_U(vmaxu, DO_MAX)
48
DO_2OP_S(vmins, DO_MIN)
49
DO_2OP_U(vminu, DO_MIN)
50
+
51
+#define DO_ABD(N, M) ((N) >= (M) ? (N) - (M) : (M) - (N))
52
+
53
+DO_2OP_S(vabds, DO_ABD)
54
+DO_2OP_U(vabdu, DO_ABD)
55
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/arm/translate-mve.c
58
+++ b/target/arm/translate-mve.c
59
@@ -XXX,XX +XXX,XX @@ DO_2OP(VMAX_S, vmaxs)
60
DO_2OP(VMAX_U, vmaxu)
61
DO_2OP(VMIN_S, vmins)
62
DO_2OP(VMIN_U, vminu)
63
+DO_2OP(VABD_S, vabds)
64
+DO_2OP(VABD_U, vabdu)
65
--
66
2.20.1
67
68
diff view generated by jsdifflib
Deleted patch
1
Implement the MVE VMULL insn, which multiplies two single
2
width integer elements to produce a double width result.
3
1
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20210617121628.20116-19-peter.maydell@linaro.org
7
---
8
target/arm/helper-mve.h | 14 ++++++++++++++
9
target/arm/mve.decode | 5 +++++
10
target/arm/mve_helper.c | 34 ++++++++++++++++++++++++++++++++++
11
target/arm/translate-mve.c | 4 ++++
12
4 files changed, 57 insertions(+)
13
14
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-mve.h
17
+++ b/target/arm/helper-mve.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vhsubsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
19
DEF_HELPER_FLAGS_4(mve_vhsubub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
20
DEF_HELPER_FLAGS_4(mve_vhsubuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
21
DEF_HELPER_FLAGS_4(mve_vhsubuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
22
+
23
+DEF_HELPER_FLAGS_4(mve_vmullbsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
24
+DEF_HELPER_FLAGS_4(mve_vmullbsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
25
+DEF_HELPER_FLAGS_4(mve_vmullbsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
26
+DEF_HELPER_FLAGS_4(mve_vmullbub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
27
+DEF_HELPER_FLAGS_4(mve_vmullbuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
28
+DEF_HELPER_FLAGS_4(mve_vmullbuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
29
+
30
+DEF_HELPER_FLAGS_4(mve_vmulltsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
31
+DEF_HELPER_FLAGS_4(mve_vmulltsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
32
+DEF_HELPER_FLAGS_4(mve_vmulltsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
33
+DEF_HELPER_FLAGS_4(mve_vmulltub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
34
+DEF_HELPER_FLAGS_4(mve_vmulltuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
35
+DEF_HELPER_FLAGS_4(mve_vmulltuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
36
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/mve.decode
39
+++ b/target/arm/mve.decode
40
@@ -XXX,XX +XXX,XX @@ VHADD_U 111 1 1111 0 . .. ... 0 ... 0 0000 . 1 . 0 ... 0 @2op
41
VHSUB_S 111 0 1111 0 . .. ... 0 ... 0 0010 . 1 . 0 ... 0 @2op
42
VHSUB_U 111 1 1111 0 . .. ... 0 ... 0 0010 . 1 . 0 ... 0 @2op
43
44
+VMULL_BS 111 0 1110 0 . .. ... 1 ... 0 1110 . 0 . 0 ... 0 @2op
45
+VMULL_BU 111 1 1110 0 . .. ... 1 ... 0 1110 . 0 . 0 ... 0 @2op
46
+VMULL_TS 111 0 1110 0 . .. ... 1 ... 1 1110 . 0 . 0 ... 0 @2op
47
+VMULL_TU 111 1 1110 0 . .. ... 1 ... 1 1110 . 0 . 0 ... 0 @2op
48
+
49
# Vector miscellaneous
50
51
VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
52
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/target/arm/mve_helper.c
55
+++ b/target/arm/mve_helper.c
56
@@ -XXX,XX +XXX,XX @@ DO_1OP(vfnegs, 8, uint64_t, DO_FNEGS)
57
DO_2OP(OP##h, 2, int16_t, FN) \
58
DO_2OP(OP##w, 4, int32_t, FN)
59
60
+/*
61
+ * "Long" operations where two half-sized inputs (taken from either the
62
+ * top or the bottom of the input vector) produce a double-width result.
63
+ * Here ESIZE, TYPE are for the input, and LESIZE, LTYPE for the output.
64
+ */
65
+#define DO_2OP_L(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN) \
66
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, void *vm) \
67
+ { \
68
+ LTYPE *d = vd; \
69
+ TYPE *n = vn, *m = vm; \
70
+ uint16_t mask = mve_element_mask(env); \
71
+ unsigned le; \
72
+ for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
73
+ LTYPE r = FN((LTYPE)n[H##ESIZE(le * 2 + TOP)], \
74
+ m[H##ESIZE(le * 2 + TOP)]); \
75
+ mergemask(&d[H##LESIZE(le)], r, mask); \
76
+ } \
77
+ mve_advance_vpt(env); \
78
+ }
79
+
80
#define DO_AND(N, M) ((N) & (M))
81
#define DO_BIC(N, M) ((N) & ~(M))
82
#define DO_ORR(N, M) ((N) | (M))
83
@@ -XXX,XX +XXX,XX @@ DO_2OP_U(vadd, DO_ADD)
84
DO_2OP_U(vsub, DO_SUB)
85
DO_2OP_U(vmul, DO_MUL)
86
87
+DO_2OP_L(vmullbsb, 0, 1, int8_t, 2, int16_t, DO_MUL)
88
+DO_2OP_L(vmullbsh, 0, 2, int16_t, 4, int32_t, DO_MUL)
89
+DO_2OP_L(vmullbsw, 0, 4, int32_t, 8, int64_t, DO_MUL)
90
+DO_2OP_L(vmullbub, 0, 1, uint8_t, 2, uint16_t, DO_MUL)
91
+DO_2OP_L(vmullbuh, 0, 2, uint16_t, 4, uint32_t, DO_MUL)
92
+DO_2OP_L(vmullbuw, 0, 4, uint32_t, 8, uint64_t, DO_MUL)
93
+
94
+DO_2OP_L(vmulltsb, 1, 1, int8_t, 2, int16_t, DO_MUL)
95
+DO_2OP_L(vmulltsh, 1, 2, int16_t, 4, int32_t, DO_MUL)
96
+DO_2OP_L(vmulltsw, 1, 4, int32_t, 8, int64_t, DO_MUL)
97
+DO_2OP_L(vmulltub, 1, 1, uint8_t, 2, uint16_t, DO_MUL)
98
+DO_2OP_L(vmulltuh, 1, 2, uint16_t, 4, uint32_t, DO_MUL)
99
+DO_2OP_L(vmulltuw, 1, 4, uint32_t, 8, uint64_t, DO_MUL)
100
+
101
/*
102
* Because the computation type is at least twice as large as required,
103
* these work for both signed and unsigned source types.
104
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
105
index XXXXXXX..XXXXXXX 100644
106
--- a/target/arm/translate-mve.c
107
+++ b/target/arm/translate-mve.c
108
@@ -XXX,XX +XXX,XX @@ DO_2OP(VHADD_S, vhadds)
109
DO_2OP(VHADD_U, vhaddu)
110
DO_2OP(VHSUB_S, vhsubs)
111
DO_2OP(VHSUB_U, vhsubu)
112
+DO_2OP(VMULL_BS, vmullbs)
113
+DO_2OP(VMULL_BU, vmullbu)
114
+DO_2OP(VMULL_TS, vmullts)
115
+DO_2OP(VMULL_TU, vmulltu)
116
--
117
2.20.1
118
119
diff view generated by jsdifflib
Deleted patch
1
Implement the MVE insn VMLSLDAV, which multiplies source elements,
2
alternately adding and subtracting them, and accumulates into a
3
64-bit result in a pair of general purpose registers.
4
1
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20210617121628.20116-21-peter.maydell@linaro.org
8
---
9
target/arm/helper-mve.h | 5 +++++
10
target/arm/mve.decode | 2 ++
11
target/arm/mve_helper.c | 5 +++++
12
target/arm/translate-mve.c | 11 +++++++++++
13
4 files changed, 23 insertions(+)
14
15
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/helper-mve.h
18
+++ b/target/arm/helper-mve.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vmlaldavxsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
20
21
DEF_HELPER_FLAGS_4(mve_vmlaldavuh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
22
DEF_HELPER_FLAGS_4(mve_vmlaldavuw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
23
+
24
+DEF_HELPER_FLAGS_4(mve_vmlsldavsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
25
+DEF_HELPER_FLAGS_4(mve_vmlsldavsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
26
+DEF_HELPER_FLAGS_4(mve_vmlsldavxsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
27
+DEF_HELPER_FLAGS_4(mve_vmlsldavxsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
28
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/mve.decode
31
+++ b/target/arm/mve.decode
32
@@ -XXX,XX +XXX,XX @@ VDUP 1110 1110 1 0 10 ... 0 .... 1011 . 0 0 1 0000 @vdup size=2
33
qn=%qn rdahi=%rdahi rdalo=%rdalo size=%size_16 &vmlaldav
34
VMLALDAV_S 1110 1110 1 ... ... . ... x:1 1110 . 0 a:1 0 ... 0 @vmlaldav
35
VMLALDAV_U 1111 1110 1 ... ... . ... x:1 1110 . 0 a:1 0 ... 0 @vmlaldav
36
+
37
+VMLSLDAV 1110 1110 1 ... ... . ... x:1 1110 . 0 a:1 0 ... 1 @vmlaldav
38
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/mve_helper.c
41
+++ b/target/arm/mve_helper.c
42
@@ -XXX,XX +XXX,XX @@ DO_LDAV(vmlaldavxsw, 4, int32_t, true, +=, +=)
43
44
DO_LDAV(vmlaldavuh, 2, uint16_t, false, +=, +=)
45
DO_LDAV(vmlaldavuw, 4, uint32_t, false, +=, +=)
46
+
47
+DO_LDAV(vmlsldavsh, 2, int16_t, false, +=, -=)
48
+DO_LDAV(vmlsldavxsh, 2, int16_t, true, +=, -=)
49
+DO_LDAV(vmlsldavsw, 4, int32_t, false, +=, -=)
50
+DO_LDAV(vmlsldavxsw, 4, int32_t, true, +=, -=)
51
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/target/arm/translate-mve.c
54
+++ b/target/arm/translate-mve.c
55
@@ -XXX,XX +XXX,XX @@ static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a)
56
};
57
return do_long_dual_acc(s, a, fns[a->size][a->x]);
58
}
59
+
60
+static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a)
61
+{
62
+ static MVEGenDualAccOpFn * const fns[4][2] = {
63
+ { NULL, NULL },
64
+ { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh },
65
+ { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw },
66
+ { NULL, NULL },
67
+ };
68
+ return do_long_dual_acc(s, a, fns[a->size][a->x]);
69
+}
70
--
71
2.20.1
72
73
diff view generated by jsdifflib
Deleted patch
1
Implement the MVE VRMLALDAVH and VRMLSLDAVH insns, which accumulate
2
the results of a rounded multiply of pairs of elements into a 72-bit
3
accumulator, returning the top 64 bits in a pair of general purpose
4
registers.
5
1
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210617121628.20116-22-peter.maydell@linaro.org
9
---
10
target/arm/helper-mve.h | 8 ++++++++
11
target/arm/mve.decode | 7 +++++++
12
target/arm/mve_helper.c | 37 +++++++++++++++++++++++++++++++++++++
13
target/arm/translate-mve.c | 24 ++++++++++++++++++++++++
14
4 files changed, 76 insertions(+)
15
16
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/helper-mve.h
19
+++ b/target/arm/helper-mve.h
20
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vmlsldavsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
21
DEF_HELPER_FLAGS_4(mve_vmlsldavsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
22
DEF_HELPER_FLAGS_4(mve_vmlsldavxsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
23
DEF_HELPER_FLAGS_4(mve_vmlsldavxsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
24
+
25
+DEF_HELPER_FLAGS_4(mve_vrmlaldavhsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
26
+DEF_HELPER_FLAGS_4(mve_vrmlaldavhxsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
27
+
28
+DEF_HELPER_FLAGS_4(mve_vrmlaldavhuw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
29
+
30
+DEF_HELPER_FLAGS_4(mve_vrmlsldavhsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
31
+DEF_HELPER_FLAGS_4(mve_vrmlsldavhxsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
32
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/mve.decode
35
+++ b/target/arm/mve.decode
36
@@ -XXX,XX +XXX,XX @@ VDUP 1110 1110 1 0 10 ... 0 .... 1011 . 0 0 1 0000 @vdup size=2
37
38
@vmlaldav .... .... . ... ... . ... . .... .... qm:3 . \
39
qn=%qn rdahi=%rdahi rdalo=%rdalo size=%size_16 &vmlaldav
40
+@vmlaldav_nosz .... .... . ... ... . ... . .... .... qm:3 . \
41
+ qn=%qn rdahi=%rdahi rdalo=%rdalo size=0 &vmlaldav
42
VMLALDAV_S 1110 1110 1 ... ... . ... x:1 1110 . 0 a:1 0 ... 0 @vmlaldav
43
VMLALDAV_U 1111 1110 1 ... ... . ... x:1 1110 . 0 a:1 0 ... 0 @vmlaldav
44
45
VMLSLDAV 1110 1110 1 ... ... . ... x:1 1110 . 0 a:1 0 ... 1 @vmlaldav
46
+
47
+VRMLALDAVH_S 1110 1110 1 ... ... 0 ... x:1 1111 . 0 a:1 0 ... 0 @vmlaldav_nosz
48
+VRMLALDAVH_U 1111 1110 1 ... ... 0 ... x:1 1111 . 0 a:1 0 ... 0 @vmlaldav_nosz
49
+
50
+VRMLSLDAVH 1111 1110 1 ... ... 0 ... x:1 1110 . 0 a:1 0 ... 1 @vmlaldav_nosz
51
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/target/arm/mve_helper.c
54
+++ b/target/arm/mve_helper.c
55
@@ -XXX,XX +XXX,XX @@
56
*/
57
58
#include "qemu/osdep.h"
59
+#include "qemu/int128.h"
60
#include "cpu.h"
61
#include "internals.h"
62
#include "vec_internal.h"
63
@@ -XXX,XX +XXX,XX @@ DO_LDAV(vmlsldavsh, 2, int16_t, false, +=, -=)
64
DO_LDAV(vmlsldavxsh, 2, int16_t, true, +=, -=)
65
DO_LDAV(vmlsldavsw, 4, int32_t, false, +=, -=)
66
DO_LDAV(vmlsldavxsw, 4, int32_t, true, +=, -=)
67
+
68
+/*
69
+ * Rounding multiply add long dual accumulate high: we must keep
70
+ * a 72-bit internal accumulator value and return the top 64 bits.
71
+ */
72
+#define DO_LDAVH(OP, ESIZE, TYPE, XCHG, EVENACC, ODDACC, TO128) \
73
+ uint64_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vn, \
74
+ void *vm, uint64_t a) \
75
+ { \
76
+ uint16_t mask = mve_element_mask(env); \
77
+ unsigned e; \
78
+ TYPE *n = vn, *m = vm; \
79
+ Int128 acc = int128_lshift(TO128(a), 8); \
80
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
81
+ if (mask & 1) { \
82
+ if (e & 1) { \
83
+ acc = ODDACC(acc, TO128(n[H##ESIZE(e - 1 * XCHG)] * \
84
+ m[H##ESIZE(e)])); \
85
+ } else { \
86
+ acc = EVENACC(acc, TO128(n[H##ESIZE(e + 1 * XCHG)] * \
87
+ m[H##ESIZE(e)])); \
88
+ } \
89
+ acc = int128_add(acc, 1 << 7); \
90
+ } \
91
+ } \
92
+ mve_advance_vpt(env); \
93
+ return int128_getlo(int128_rshift(acc, 8)); \
94
+ }
95
+
96
+DO_LDAVH(vrmlaldavhsw, 4, int32_t, false, int128_add, int128_add, int128_makes64)
97
+DO_LDAVH(vrmlaldavhxsw, 4, int32_t, true, int128_add, int128_add, int128_makes64)
98
+
99
+DO_LDAVH(vrmlaldavhuw, 4, uint32_t, false, int128_add, int128_add, int128_make64)
100
+
101
+DO_LDAVH(vrmlsldavhsw, 4, int32_t, false, int128_add, int128_sub, int128_makes64)
102
+DO_LDAVH(vrmlsldavhxsw, 4, int32_t, true, int128_add, int128_sub, int128_makes64)
103
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
104
index XXXXXXX..XXXXXXX 100644
105
--- a/target/arm/translate-mve.c
106
+++ b/target/arm/translate-mve.c
107
@@ -XXX,XX +XXX,XX @@ static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a)
108
};
109
return do_long_dual_acc(s, a, fns[a->size][a->x]);
110
}
111
+
112
+static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a)
113
+{
114
+ static MVEGenDualAccOpFn * const fns[] = {
115
+ gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw,
116
+ };
117
+ return do_long_dual_acc(s, a, fns[a->x]);
118
+}
119
+
120
+static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a)
121
+{
122
+ static MVEGenDualAccOpFn * const fns[] = {
123
+ gen_helper_mve_vrmlaldavhuw, NULL,
124
+ };
125
+ return do_long_dual_acc(s, a, fns[a->x]);
126
+}
127
+
128
+static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a)
129
+{
130
+ static MVEGenDualAccOpFn * const fns[] = {
131
+ gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw,
132
+ };
133
+ return do_long_dual_acc(s, a, fns[a->x]);
134
+}
135
--
136
2.20.1
137
138
diff view generated by jsdifflib
1
Implement the scalar form of the MVE VADD insn. This takes the
1
Add the remaining devices (or unimplemented-device stubs) for
2
scalar operand from a general purpose register.
2
this board: SPI controllers, SCC, FPGAIO, I2S, RTC, the
3
QSPI write-config block, and ethernet.
3
4
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Message-id: 20210617121628.20116-23-peter.maydell@linaro.org
7
Message-id: 20240206132931.38376-13-peter.maydell@linaro.org
7
---
8
---
8
target/arm/helper-mve.h | 4 ++++
9
hw/arm/mps3r.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++
9
target/arm/mve.decode | 7 ++++++
10
1 file changed, 74 insertions(+)
10
target/arm/mve_helper.c | 22 +++++++++++++++++++
11
target/arm/translate-mve.c | 45 ++++++++++++++++++++++++++++++++++++++
12
4 files changed, 78 insertions(+)
13
11
14
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
12
diff --git a/hw/arm/mps3r.c b/hw/arm/mps3r.c
15
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-mve.h
14
--- a/hw/arm/mps3r.c
17
+++ b/target/arm/helper-mve.h
15
+++ b/hw/arm/mps3r.c
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vmulltub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
16
@@ -XXX,XX +XXX,XX @@
19
DEF_HELPER_FLAGS_4(mve_vmulltuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
17
#include "hw/char/cmsdk-apb-uart.h"
20
DEF_HELPER_FLAGS_4(mve_vmulltuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
18
#include "hw/i2c/arm_sbcon_i2c.h"
21
19
#include "hw/intc/arm_gicv3.h"
22
+DEF_HELPER_FLAGS_4(mve_vadd_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
20
+#include "hw/misc/mps2-scc.h"
23
+DEF_HELPER_FLAGS_4(mve_vadd_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
21
+#include "hw/misc/mps2-fpgaio.h"
24
+DEF_HELPER_FLAGS_4(mve_vadd_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
22
#include "hw/misc/unimp.h"
23
+#include "hw/net/lan9118.h"
24
+#include "hw/rtc/pl031.h"
25
+#include "hw/ssi/pl022.h"
26
#include "hw/timer/cmsdk-apb-dualtimer.h"
27
#include "hw/watchdog/cmsdk-apb-watchdog.h"
28
29
@@ -XXX,XX +XXX,XX @@ struct MPS3RMachineState {
30
CMSDKAPBWatchdog watchdog;
31
CMSDKAPBDualTimer dualtimer;
32
ArmSbconI2CState i2c[5];
33
+ PL022State spi[3];
34
+ MPS2SCC scc;
35
+ MPS2FPGAIO fpgaio;
36
+ UnimplementedDeviceState i2s_audio;
37
+ PL031State rtc;
38
Clock *clk;
39
};
40
41
@@ -XXX,XX +XXX,XX @@ static const RAMInfo an536_raminfo[] = {
42
}
43
};
44
45
+static const int an536_oscclk[] = {
46
+ 24000000, /* 24MHz reference for RTC and timers */
47
+ 50000000, /* 50MHz ACLK */
48
+ 50000000, /* 50MHz MCLK */
49
+ 50000000, /* 50MHz GPUCLK */
50
+ 24576000, /* 24.576MHz AUDCLK */
51
+ 23750000, /* 23.75MHz HDLCDCLK */
52
+ 100000000, /* 100MHz DDR4_REF_CLK */
53
+};
25
+
54
+
26
DEF_HELPER_FLAGS_4(mve_vmlaldavsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
55
static MemoryRegion *mr_for_raminfo(MPS3RMachineState *mms,
27
DEF_HELPER_FLAGS_4(mve_vmlaldavsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
56
const RAMInfo *raminfo)
28
DEF_HELPER_FLAGS_4(mve_vmlaldavxsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
57
{
29
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
58
@@ -XXX,XX +XXX,XX @@ static void mps3r_common_init(MachineState *machine)
30
index XXXXXXX..XXXXXXX 100644
59
MPS3RMachineClass *mmc = MPS3R_MACHINE_GET_CLASS(mms);
31
--- a/target/arm/mve.decode
60
MemoryRegion *sysmem = get_system_memory();
32
+++ b/target/arm/mve.decode
61
DeviceState *gicdev;
33
@@ -XXX,XX +XXX,XX @@
62
+ QList *oscclk;
34
&vldr_vstr rn qd imm p a w size l u
63
35
&1op qd qm size
64
mms->clk = clock_new(OBJECT(machine), "CLK");
36
&2op qd qm qn size
65
clock_set_hz(mms->clk, CLK_FRQ);
37
+&2scalar qd qn rm size
66
@@ -XXX,XX +XXX,XX @@ static void mps3r_common_init(MachineState *machine)
38
67
}
39
@vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd u=0
68
}
40
# Note that both Rn and Qd are 3 bits only (no D bit)
69
41
@@ -XXX,XX +XXX,XX @@
70
+ for (int i = 0; i < ARRAY_SIZE(mms->spi); i++) {
42
@2op .... .... .. size:2 .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn
71
+ g_autofree char *s = g_strdup_printf("spi%d", i);
43
@2op_nosz .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn size=0
72
+ hwaddr baseaddr = 0xe0104000 + i * 0x1000;
44
45
+@2scalar .... .... .. size:2 .... .... .... .... rm:4 &2scalar qd=%qd qn=%qn
46
+
73
+
47
# Vector loads and stores
74
+ object_initialize_child(OBJECT(mms), s, &mms->spi[i], TYPE_PL022);
48
75
+ sysbus_realize(SYS_BUS_DEVICE(&mms->spi[i]), &error_fatal);
49
# Widening loads and narrowing stores:
76
+ sysbus_mmio_map(SYS_BUS_DEVICE(&mms->spi[i]), 0, baseaddr);
50
@@ -XXX,XX +XXX,XX @@ VRMLALDAVH_S 1110 1110 1 ... ... 0 ... x:1 1111 . 0 a:1 0 ... 0 @vmlaldav_no
77
+ sysbus_connect_irq(SYS_BUS_DEVICE(&mms->spi[i]), 0,
51
VRMLALDAVH_U 1111 1110 1 ... ... 0 ... x:1 1111 . 0 a:1 0 ... 0 @vmlaldav_nosz
78
+ qdev_get_gpio_in(gicdev, 22 + i));
52
53
VRMLSLDAVH 1111 1110 1 ... ... 0 ... x:1 1110 . 0 a:1 0 ... 1 @vmlaldav_nosz
54
+
55
+# Scalar operations
56
+
57
+VADD_scalar 1110 1110 0 . .. ... 1 ... 0 1111 . 100 .... @2scalar
58
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/target/arm/mve_helper.c
61
+++ b/target/arm/mve_helper.c
62
@@ -XXX,XX +XXX,XX @@ DO_2OP_S(vhsubs, do_vhsub_s)
63
DO_2OP_U(vhsubu, do_vhsub_u)
64
65
66
+#define DO_2OP_SCALAR(OP, ESIZE, TYPE, FN) \
67
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
68
+ uint32_t rm) \
69
+ { \
70
+ TYPE *d = vd, *n = vn; \
71
+ TYPE m = rm; \
72
+ uint16_t mask = mve_element_mask(env); \
73
+ unsigned e; \
74
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
75
+ mergemask(&d[H##ESIZE(e)], FN(n[H##ESIZE(e)], m), mask); \
76
+ } \
77
+ mve_advance_vpt(env); \
78
+ }
79
+ }
79
+
80
+
80
+/* provide unsigned 2-op scalar helpers for all sizes */
81
+ object_initialize_child(OBJECT(mms), "scc", &mms->scc, TYPE_MPS2_SCC);
81
+#define DO_2OP_SCALAR_U(OP, FN) \
82
+ qdev_prop_set_uint32(DEVICE(&mms->scc), "scc-cfg0", 0);
82
+ DO_2OP_SCALAR(OP##b, 1, uint8_t, FN) \
83
+ qdev_prop_set_uint32(DEVICE(&mms->scc), "scc-cfg4", 0x2);
83
+ DO_2OP_SCALAR(OP##h, 2, uint16_t, FN) \
84
+ qdev_prop_set_uint32(DEVICE(&mms->scc), "scc-aid", 0x00200008);
84
+ DO_2OP_SCALAR(OP##w, 4, uint32_t, FN)
85
+ qdev_prop_set_uint32(DEVICE(&mms->scc), "scc-id", 0x41055360);
86
+ oscclk = qlist_new();
87
+ for (int i = 0; i < ARRAY_SIZE(an536_oscclk); i++) {
88
+ qlist_append_int(oscclk, an536_oscclk[i]);
89
+ }
90
+ qdev_prop_set_array(DEVICE(&mms->scc), "oscclk", oscclk);
91
+ sysbus_realize(SYS_BUS_DEVICE(&mms->scc), &error_fatal);
92
+ sysbus_mmio_map(SYS_BUS_DEVICE(&mms->scc), 0, 0xe0200000);
85
+
93
+
86
+DO_2OP_SCALAR_U(vadd_scalar, DO_ADD)
94
+ create_unimplemented_device("i2s-audio", 0xe0201000, 0x1000);
87
+
95
+
88
/*
96
+ object_initialize_child(OBJECT(mms), "fpgaio", &mms->fpgaio,
89
* Multiply add long dual accumulate ops.
97
+ TYPE_MPS2_FPGAIO);
90
*/
98
+ qdev_prop_set_uint32(DEVICE(&mms->fpgaio), "prescale-clk", an536_oscclk[1]);
91
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
99
+ qdev_prop_set_uint32(DEVICE(&mms->fpgaio), "num-leds", 10);
92
index XXXXXXX..XXXXXXX 100644
100
+ qdev_prop_set_bit(DEVICE(&mms->fpgaio), "has-switches", true);
93
--- a/target/arm/translate-mve.c
101
+ qdev_prop_set_bit(DEVICE(&mms->fpgaio), "has-dbgctrl", false);
94
+++ b/target/arm/translate-mve.c
102
+ sysbus_realize(SYS_BUS_DEVICE(&mms->fpgaio), &error_fatal);
95
@@ -XXX,XX +XXX,XX @@
103
+ sysbus_mmio_map(SYS_BUS_DEVICE(&mms->fpgaio), 0, 0xe0202000);
96
typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
97
typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
98
typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
99
+typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
100
typedef void MVEGenDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
101
102
/* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
103
@@ -XXX,XX +XXX,XX @@ DO_2OP(VMULL_BU, vmullbu)
104
DO_2OP(VMULL_TS, vmullts)
105
DO_2OP(VMULL_TU, vmulltu)
106
107
+static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
108
+ MVEGenTwoOpScalarFn fn)
109
+{
110
+ TCGv_ptr qd, qn;
111
+ TCGv_i32 rm;
112
+
104
+
113
+ if (!dc_isar_feature(aa32_mve, s) ||
105
+ create_unimplemented_device("clcd", 0xe0209000, 0x1000);
114
+ !mve_check_qreg_bank(s, a->qd | a->qn) ||
115
+ !fn) {
116
+ return false;
117
+ }
118
+ if (a->rm == 13 || a->rm == 15) {
119
+ /* UNPREDICTABLE */
120
+ return false;
121
+ }
122
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
123
+ return true;
124
+ }
125
+
106
+
126
+ qd = mve_qreg_ptr(a->qd);
107
+ object_initialize_child(OBJECT(mms), "rtc", &mms->rtc, TYPE_PL031);
127
+ qn = mve_qreg_ptr(a->qn);
108
+ sysbus_realize(SYS_BUS_DEVICE(&mms->rtc), &error_fatal);
128
+ rm = load_reg(s, a->rm);
109
+ sysbus_mmio_map(SYS_BUS_DEVICE(&mms->rtc), 0, 0xe020a000);
129
+ fn(cpu_env, qd, qn, rm);
110
+ sysbus_connect_irq(SYS_BUS_DEVICE(&mms->rtc), 0,
130
+ tcg_temp_free_i32(rm);
111
+ qdev_get_gpio_in(gicdev, 4));
131
+ tcg_temp_free_ptr(qd);
132
+ tcg_temp_free_ptr(qn);
133
+ mve_update_eci(s);
134
+ return true;
135
+}
136
+
112
+
137
+#define DO_2OP_SCALAR(INSN, FN) \
113
+ /*
138
+ static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \
114
+ * In hardware this is a LAN9220; the LAN9118 is software compatible
139
+ { \
115
+ * except that it doesn't support the checksum-offload feature.
140
+ static MVEGenTwoOpScalarFn * const fns[] = { \
116
+ */
141
+ gen_helper_mve_##FN##b, \
117
+ lan9118_init(0xe0300000,
142
+ gen_helper_mve_##FN##h, \
118
+ qdev_get_gpio_in(gicdev, 18));
143
+ gen_helper_mve_##FN##w, \
144
+ NULL, \
145
+ }; \
146
+ return do_2op_scalar(s, a, fns[a->size]); \
147
+ }
148
+
119
+
149
+DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
120
+ create_unimplemented_device("usb", 0xe0301000, 0x1000);
121
+ create_unimplemented_device("qspi-write-config", 0xe0600000, 0x1000);
150
+
122
+
151
static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
123
mms->bootinfo.ram_size = machine->ram_size;
152
MVEGenDualAccOpFn *fn)
124
mms->bootinfo.board_id = -1;
153
{
125
mms->bootinfo.loader_start = mmc->loader_start;
154
--
126
--
155
2.20.1
127
2.34.1
156
128
157
129
diff view generated by jsdifflib
Deleted patch
1
Implement the scalar variants of the MVE VHADD and VHSUB insns.
2
1
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20210617121628.20116-25-peter.maydell@linaro.org
6
---
7
target/arm/helper-mve.h | 16 ++++++++++++++++
8
target/arm/mve.decode | 4 ++++
9
target/arm/mve_helper.c | 8 ++++++++
10
target/arm/translate-mve.c | 4 ++++
11
4 files changed, 32 insertions(+)
12
13
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/helper-mve.h
16
+++ b/target/arm/helper-mve.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vmul_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
18
DEF_HELPER_FLAGS_4(mve_vmul_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
19
DEF_HELPER_FLAGS_4(mve_vmul_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
20
21
+DEF_HELPER_FLAGS_4(mve_vhadds_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
22
+DEF_HELPER_FLAGS_4(mve_vhadds_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
23
+DEF_HELPER_FLAGS_4(mve_vhadds_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
24
+
25
+DEF_HELPER_FLAGS_4(mve_vhaddu_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
26
+DEF_HELPER_FLAGS_4(mve_vhaddu_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
27
+DEF_HELPER_FLAGS_4(mve_vhaddu_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
28
+
29
+DEF_HELPER_FLAGS_4(mve_vhsubs_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
30
+DEF_HELPER_FLAGS_4(mve_vhsubs_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
31
+DEF_HELPER_FLAGS_4(mve_vhsubs_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
32
+
33
+DEF_HELPER_FLAGS_4(mve_vhsubu_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_4(mve_vhsubu_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
35
+DEF_HELPER_FLAGS_4(mve_vhsubu_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
36
+
37
DEF_HELPER_FLAGS_4(mve_vmlaldavsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
38
DEF_HELPER_FLAGS_4(mve_vmlaldavsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
39
DEF_HELPER_FLAGS_4(mve_vmlaldavxsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
40
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
41
index XXXXXXX..XXXXXXX 100644
42
--- a/target/arm/mve.decode
43
+++ b/target/arm/mve.decode
44
@@ -XXX,XX +XXX,XX @@ VRMLSLDAVH 1111 1110 1 ... ... 0 ... x:1 1110 . 0 a:1 0 ... 1 @vmlaldav_no
45
VADD_scalar 1110 1110 0 . .. ... 1 ... 0 1111 . 100 .... @2scalar
46
VSUB_scalar 1110 1110 0 . .. ... 1 ... 1 1111 . 100 .... @2scalar
47
VMUL_scalar 1110 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar
48
+VHADD_S_scalar 1110 1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar
49
+VHADD_U_scalar 1111 1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar
50
+VHSUB_S_scalar 1110 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
51
+VHSUB_U_scalar 1111 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
52
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/target/arm/mve_helper.c
55
+++ b/target/arm/mve_helper.c
56
@@ -XXX,XX +XXX,XX @@ DO_2OP_U(vhsubu, do_vhsub_u)
57
DO_2OP_SCALAR(OP##b, 1, uint8_t, FN) \
58
DO_2OP_SCALAR(OP##h, 2, uint16_t, FN) \
59
DO_2OP_SCALAR(OP##w, 4, uint32_t, FN)
60
+#define DO_2OP_SCALAR_S(OP, FN) \
61
+ DO_2OP_SCALAR(OP##b, 1, int8_t, FN) \
62
+ DO_2OP_SCALAR(OP##h, 2, int16_t, FN) \
63
+ DO_2OP_SCALAR(OP##w, 4, int32_t, FN)
64
65
DO_2OP_SCALAR_U(vadd_scalar, DO_ADD)
66
DO_2OP_SCALAR_U(vsub_scalar, DO_SUB)
67
DO_2OP_SCALAR_U(vmul_scalar, DO_MUL)
68
+DO_2OP_SCALAR_S(vhadds_scalar, do_vhadd_s)
69
+DO_2OP_SCALAR_U(vhaddu_scalar, do_vhadd_u)
70
+DO_2OP_SCALAR_S(vhsubs_scalar, do_vhsub_s)
71
+DO_2OP_SCALAR_U(vhsubu_scalar, do_vhsub_u)
72
73
/*
74
* Multiply add long dual accumulate ops.
75
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
76
index XXXXXXX..XXXXXXX 100644
77
--- a/target/arm/translate-mve.c
78
+++ b/target/arm/translate-mve.c
79
@@ -XXX,XX +XXX,XX @@ static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
80
DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
81
DO_2OP_SCALAR(VSUB_scalar, vsub_scalar)
82
DO_2OP_SCALAR(VMUL_scalar, vmul_scalar)
83
+DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
84
+DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
85
+DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
86
+DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
87
88
static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
89
MVEGenDualAccOpFn *fn)
90
--
91
2.20.1
92
93
diff view generated by jsdifflib
Deleted patch
1
Implement the MVE VBRSR insn, which reverses a specified
2
number of bits in each element, setting the rest to zero.
3
1
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20210617121628.20116-26-peter.maydell@linaro.org
7
---
8
target/arm/helper-mve.h | 4 ++++
9
target/arm/mve.decode | 1 +
10
target/arm/mve_helper.c | 43 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-mve.c | 1 +
12
4 files changed, 49 insertions(+)
13
14
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-mve.h
17
+++ b/target/arm/helper-mve.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vhsubu_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
19
DEF_HELPER_FLAGS_4(mve_vhsubu_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
20
DEF_HELPER_FLAGS_4(mve_vhsubu_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
21
22
+DEF_HELPER_FLAGS_4(mve_vbrsrb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
23
+DEF_HELPER_FLAGS_4(mve_vbrsrh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
24
+DEF_HELPER_FLAGS_4(mve_vbrsrw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
25
+
26
DEF_HELPER_FLAGS_4(mve_vmlaldavsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
27
DEF_HELPER_FLAGS_4(mve_vmlaldavsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
28
DEF_HELPER_FLAGS_4(mve_vmlaldavxsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
29
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/mve.decode
32
+++ b/target/arm/mve.decode
33
@@ -XXX,XX +XXX,XX @@ VHADD_S_scalar 1110 1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar
34
VHADD_U_scalar 1111 1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar
35
VHSUB_S_scalar 1110 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
36
VHSUB_U_scalar 1111 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
37
+VBRSR 1111 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar
38
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/mve_helper.c
41
+++ b/target/arm/mve_helper.c
42
@@ -XXX,XX +XXX,XX @@ DO_2OP_SCALAR_U(vhaddu_scalar, do_vhadd_u)
43
DO_2OP_SCALAR_S(vhsubs_scalar, do_vhsub_s)
44
DO_2OP_SCALAR_U(vhsubu_scalar, do_vhsub_u)
45
46
+static inline uint32_t do_vbrsrb(uint32_t n, uint32_t m)
47
+{
48
+ m &= 0xff;
49
+ if (m == 0) {
50
+ return 0;
51
+ }
52
+ n = revbit8(n);
53
+ if (m < 8) {
54
+ n >>= 8 - m;
55
+ }
56
+ return n;
57
+}
58
+
59
+static inline uint32_t do_vbrsrh(uint32_t n, uint32_t m)
60
+{
61
+ m &= 0xff;
62
+ if (m == 0) {
63
+ return 0;
64
+ }
65
+ n = revbit16(n);
66
+ if (m < 16) {
67
+ n >>= 16 - m;
68
+ }
69
+ return n;
70
+}
71
+
72
+static inline uint32_t do_vbrsrw(uint32_t n, uint32_t m)
73
+{
74
+ m &= 0xff;
75
+ if (m == 0) {
76
+ return 0;
77
+ }
78
+ n = revbit32(n);
79
+ if (m < 32) {
80
+ n >>= 32 - m;
81
+ }
82
+ return n;
83
+}
84
+
85
+DO_2OP_SCALAR(vbrsrb, 1, uint8_t, do_vbrsrb)
86
+DO_2OP_SCALAR(vbrsrh, 2, uint16_t, do_vbrsrh)
87
+DO_2OP_SCALAR(vbrsrw, 4, uint32_t, do_vbrsrw)
88
+
89
/*
90
* Multiply add long dual accumulate ops.
91
*/
92
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
93
index XXXXXXX..XXXXXXX 100644
94
--- a/target/arm/translate-mve.c
95
+++ b/target/arm/translate-mve.c
96
@@ -XXX,XX +XXX,XX @@ DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
97
DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
98
DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
99
DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
100
+DO_2OP_SCALAR(VBRSR, vbrsr)
101
102
static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
103
MVEGenDualAccOpFn *fn)
104
--
105
2.20.1
106
107
diff view generated by jsdifflib
1
Implement the MVE VPST insn, which sets the predicate mask
1
Add documentation for the mps3-an536 board type.
2
fields in the VPR to the immediate value encoded in the insn.
3
2
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Message-id: 20210617121628.20116-27-peter.maydell@linaro.org
5
Message-id: 20240206132931.38376-14-peter.maydell@linaro.org
7
---
6
---
8
target/arm/mve.decode | 4 +++
7
docs/system/arm/mps2.rst | 37 ++++++++++++++++++++++++++++++++++---
9
target/arm/translate-mve.c | 59 ++++++++++++++++++++++++++++++++++++++
8
1 file changed, 34 insertions(+), 3 deletions(-)
10
2 files changed, 63 insertions(+)
11
9
12
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
10
diff --git a/docs/system/arm/mps2.rst b/docs/system/arm/mps2.rst
13
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/mve.decode
12
--- a/docs/system/arm/mps2.rst
15
+++ b/target/arm/mve.decode
13
+++ b/docs/system/arm/mps2.rst
16
@@ -XXX,XX +XXX,XX @@ VHADD_U_scalar 1111 1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar
14
@@ -XXX,XX +XXX,XX @@
17
VHSUB_S_scalar 1110 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
15
-Arm MPS2 and MPS3 boards (``mps2-an385``, ``mps2-an386``, ``mps2-an500``, ``mps2-an505``, ``mps2-an511``, ``mps2-an521``, ``mps3-an524``, ``mps3-an547``)
18
VHSUB_U_scalar 1111 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
16
-=========================================================================================================================================================
19
VBRSR 1111 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar
17
+Arm MPS2 and MPS3 boards (``mps2-an385``, ``mps2-an386``, ``mps2-an500``, ``mps2-an505``, ``mps2-an511``, ``mps2-an521``, ``mps3-an524``, ``mps3-an536``, ``mps3-an547``)
18
+=========================================================================================================================================================================
19
20
-These board models all use Arm M-profile CPUs.
21
+These board models use Arm M-profile or R-profile CPUs.
22
23
The Arm MPS2, MPS2+ and MPS3 dev boards are FPGA based (the 2+ has a
24
bigger FPGA but is otherwise the same as the 2; the 3 has a bigger
25
@@ -XXX,XX +XXX,XX @@ FPGA image.
26
27
QEMU models the following FPGA images:
28
29
+FPGA images using M-profile CPUs:
20
+
30
+
21
+# Predicate operations
31
``mps2-an385``
22
+%mask_22_13 22:1 13:3
32
Cortex-M3 as documented in Arm Application Note AN385
23
+VPST 1111 1110 0 . 11 000 1 ... 0 1111 0100 1101 mask=%mask_22_13
33
``mps2-an386``
24
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
34
@@ -XXX,XX +XXX,XX @@ QEMU models the following FPGA images:
25
index XXXXXXX..XXXXXXX 100644
35
``mps3-an547``
26
--- a/target/arm/translate-mve.c
36
Cortex-M55 on an MPS3, as documented in Arm Application Note AN547
27
+++ b/target/arm/translate-mve.c
37
28
@@ -XXX,XX +XXX,XX @@ static void mve_update_eci(DisasContext *s)
38
+FPGA images using R-profile CPUs:
29
}
30
}
31
32
+static void mve_update_and_store_eci(DisasContext *s)
33
+{
34
+ /*
35
+ * For insns which don't call a helper function that will call
36
+ * mve_advance_vpt(), this version updates s->eci and also stores
37
+ * it out to the CPUState field.
38
+ */
39
+ if (s->eci) {
40
+ mve_update_eci(s);
41
+ store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits);
42
+ }
43
+}
44
+
39
+
45
static bool mve_skip_first_beat(DisasContext *s)
40
+``mps3-an536``
46
{
41
+ Dual Cortex-R52 on an MPS3, as documented in Arm Application Note AN536
47
/* Return true if PSR.ECI says we must skip the first beat of this insn */
48
@@ -XXX,XX +XXX,XX @@ static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a)
49
};
50
return do_long_dual_acc(s, a, fns[a->x]);
51
}
52
+
42
+
53
+static bool trans_VPST(DisasContext *s, arg_VPST *a)
43
Differences between QEMU and real hardware:
54
+{
44
55
+ TCGv_i32 vpr;
45
- AN385/AN386 remapping of low 16K of memory to either ZBT SSRAM1 or to
46
@@ -XXX,XX +XXX,XX @@ Differences between QEMU and real hardware:
47
flash, but only as simple ROM, so attempting to rewrite the flash
48
from the guest will fail
49
- QEMU does not model the USB controller in MPS3 boards
50
+- AN536 does not support runtime control of CPU reset and halt via
51
+ the SCC CFG_REG0 register.
52
+- AN536 does not support enabling or disabling the flash and ATCM
53
+ interfaces via the SCC CFG_REG1 register.
54
+- AN536 does not support setting of the initial vector table
55
+ base address via the SCC CFG_REG6 and CFG_REG7 register config,
56
+ and does not provide a mechanism for specifying these values at
57
+ startup, so all guest images must be built to start from TCM
58
+ (i.e. to expect the interrupt vector base at 0 from reset).
59
+- AN536 defaults to only creating a single CPU; this is the equivalent
60
+ of the way the real FPGA image usually runs with the second Cortex-R52
61
+ held in halt via the initial SCC CFG_REG0 register setting. You can
62
+ create the second CPU with ``-smp 2``; both CPUs will then start
63
+ execution immediately on startup.
56
+
64
+
57
+ /* mask == 0 is a "related encoding" */
65
+Note that for the AN536 the first UART is accessible only by
58
+ if (!dc_isar_feature(aa32_mve, s) || !a->mask) {
66
+CPU0, and the second UART is accessible only by CPU1. The
59
+ return false;
67
+first UART accessible shared between both CPUs is the third
60
+ }
68
+UART. Guest software might therefore be built to use either
61
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
69
+the first UART or the third UART; if you don't see any output
62
+ return true;
70
+from the UART you are looking at, try one of the others.
63
+ }
71
+(Even if the AN536 machine is started with a single CPU and so
64
+ /*
72
+no "CPU1-only UART", the UART numbering remains the same,
65
+ * Set the VPR mask fields. We take advantage of MASK01 and MASK23
73
+with the third UART being the first of the shared ones.)
66
+ * being adjacent fields in the register.
74
67
+ *
75
Machine-specific options
68
+ * This insn is not predicated, but it is subject to beat-wise
76
""""""""""""""""""""""""
69
+ * execution, and the mask is updated on the odd-numbered beats.
70
+ * So if PSR.ECI says we should skip beat 1, we mustn't update the
71
+ * 01 mask field.
72
+ */
73
+ vpr = load_cpu_field(v7m.vpr);
74
+ switch (s->eci) {
75
+ case ECI_NONE:
76
+ case ECI_A0:
77
+ /* Update both 01 and 23 fields */
78
+ tcg_gen_deposit_i32(vpr, vpr,
79
+ tcg_constant_i32(a->mask | (a->mask << 4)),
80
+ R_V7M_VPR_MASK01_SHIFT,
81
+ R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH);
82
+ break;
83
+ case ECI_A0A1:
84
+ case ECI_A0A1A2:
85
+ case ECI_A0A1A2B0:
86
+ /* Update only the 23 mask field */
87
+ tcg_gen_deposit_i32(vpr, vpr,
88
+ tcg_constant_i32(a->mask),
89
+ R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH);
90
+ break;
91
+ default:
92
+ g_assert_not_reached();
93
+ }
94
+ store_cpu_field(vpr, v7m.vpr);
95
+ mve_update_and_store_eci(s);
96
+ return true;
97
+}
98
--
77
--
99
2.20.1
78
2.34.1
100
79
101
80
diff view generated by jsdifflib