1
The following changes since commit 1ea06abceec61b6f3ab33dadb0510b6e09fb61e2:
1
Couple of trivial fixes for rc3...
2
2
3
Merge remote-tracking branch 'remotes/berrange-gitlab/tags/misc-fixes-pull-request' into staging (2021-06-14 15:59:13 +0100)
3
The following changes since commit 20661b75ea6093f5e59079d00a778a972d6732c5:
4
5
Merge tag 'pull-ppc-20220404' of https://github.com/legoater/qemu into staging (2022-04-04 15:48:55 +0100)
4
6
5
are available in the Git repository at:
7
are available in the Git repository at:
6
8
7
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20210615
9
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20220405
8
10
9
for you to fetch changes up to c611c956c7fdce651e30687b1f5d19b4cab78b6a:
11
for you to fetch changes up to 80b952bb694a90f7e530d407b01066894e64a443:
10
12
11
include/qemu/int128.h: Add function to create Int128 from int64_t (2021-06-15 16:18:50 +0100)
13
docs/system/devices/can.rst: correct links to CTU CAN FD IP core documentation. (2022-04-05 09:29:28 +0100)
12
14
13
----------------------------------------------------------------
15
----------------------------------------------------------------
14
target-arm queue:
16
target-arm queue:
15
* hw/intc/arm_gicv3_cpuif: Tolerate spurious EOIR writes
17
* docs/system/devices/can.rst: correct links to CTU CAN FD IP core documentation.
16
* handle some UNALLOCATED decode cases correctly rather
18
* xlnx-bbram: hw/nvram: Fix uninitialized Error *
17
than asserting
18
* hw: virt: consider hw_compat_6_0
19
* hw/arm: add quanta-gbs-bmc machine
20
* hw/intc/armv7m_nvic: Remove stale comment
21
* arm, acpi: Remove dependency on presence of 'virt' board
22
* target/arm: Fix mte page crossing test
23
* hw/arm: quanta-q71l add pca954x muxes
24
* target/arm: First few parts of MVE support
25
19
26
----------------------------------------------------------------
20
----------------------------------------------------------------
27
Heinrich Schuchardt (1):
21
Pavel Pisa (1):
28
hw: virt: consider hw_compat_6_0
22
docs/system/devices/can.rst: correct links to CTU CAN FD IP core documentation.
29
23
30
Jean-Philippe Brucker (1):
24
Tong Ho (1):
31
hw/intc/arm_gicv3_cpuif: Tolerate spurious EOIR writes
25
xlnx-bbram: hw/nvram: Fix uninitialized Error *
32
26
33
Patrick Venture (5):
27
docs/system/devices/can.rst | 6 +++---
34
hw/arm: add quanta-gbs-bmc machine
28
hw/nvram/xlnx-bbram.c | 2 +-
35
hw/arm: quanta-gbs-bmc add i2c comments
29
2 files changed, 4 insertions(+), 4 deletions(-)
36
hw/arm: gsj add i2c comments
37
hw/arm: gsj add pca9548
38
hw/arm: quanta-q71l add pca954x muxes
39
40
Peter Maydell (17):
41
hw/intc/armv7m_nvic: Remove stale comment
42
hw/acpi: Provide stub version of acpi_ghes_record_errors()
43
hw/acpi: Provide function acpi_ghes_present()
44
target/arm: Use acpi_ghes_present() to see if we report ACPI memory errors
45
target/arm: Provide and use H8 and H1_8 macros
46
target/arm: Enable FPSCR.QC bit for MVE
47
target/arm: Handle VPR semantics in existing code
48
target/arm: Add handling for PSR.ECI/ICI
49
target/arm: Let vfp_access_check() handle late NOCP checks
50
target/arm: Implement MVE LCTP
51
target/arm: Implement MVE WLSTP insn
52
target/arm: Implement MVE DLSTP
53
target/arm: Implement MVE LETP insn
54
target/arm: Add framework for MVE decode
55
target/arm: Move expand_pred_b() data to vec_helper.c
56
bitops.h: Provide hswap32(), hswap64(), wswap64() swapping operations
57
include/qemu/int128.h: Add function to create Int128 from int64_t
58
59
Richard Henderson (4):
60
target/arm: Diagnose UNALLOCATED in disas_simd_two_reg_misc_fp16
61
target/arm: Remove fprintf from disas_simd_mod_imm
62
target/arm: Diagnose UNALLOCATED in disas_simd_three_reg_same_fp16
63
target/arm: Fix mte page crossing test
64
65
include/hw/acpi/ghes.h | 9 +
66
include/qemu/bitops.h | 29 +++
67
include/qemu/int128.h | 10 +
68
target/arm/translate-a32.h | 2 +
69
target/arm/translate.h | 9 +
70
target/arm/vec_internal.h | 9 +
71
target/arm/mve.decode | 20 ++
72
target/arm/t32.decode | 15 +-
73
hw/acpi/ghes-stub.c | 22 +++
74
hw/acpi/ghes.c | 17 ++
75
hw/arm/aspeed.c | 11 +-
76
hw/arm/npcm7xx_boards.c | 107 ++++++++++-
77
hw/arm/virt.c | 2 +
78
hw/intc/arm_gicv3_cpuif.c | 5 +-
79
hw/intc/armv7m_nvic.c | 6 -
80
target/arm/kvm64.c | 6 +-
81
target/arm/m_helper.c | 54 +++++-
82
target/arm/mte_helper.c | 2 +-
83
target/arm/sve_helper.c | 381 +++++++++++++-------------------------
84
target/arm/translate-a64.c | 87 +++++----
85
target/arm/translate-m-nocp.c | 16 +-
86
target/arm/translate-mve.c | 29 +++
87
target/arm/translate-vfp.c | 65 +++++--
88
target/arm/translate.c | 300 ++++++++++++++++++++++++++++--
89
target/arm/vec_helper.c | 116 +++++++++++-
90
target/arm/vfp_helper.c | 3 +-
91
tests/tcg/aarch64/mte-7.c | 31 ++++
92
hw/acpi/meson.build | 6 +-
93
hw/arm/Kconfig | 2 +
94
target/arm/meson.build | 2 +
95
tests/tcg/aarch64/Makefile.target | 2 +-
96
31 files changed, 1019 insertions(+), 356 deletions(-)
97
create mode 100644 target/arm/mve.decode
98
create mode 100644 hw/acpi/ghes-stub.c
99
create mode 100644 target/arm/translate-mve.c
100
create mode 100644 tests/tcg/aarch64/mte-7.c
101
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Tong Ho <tong.ho@xilinx.com>
2
2
3
This fprintf+assert has been in place since the beginning.
3
This adds required initialization of Error * variable.
4
It is prior to the fp_access_check, so we're still good to
5
raise sigill here.
6
4
7
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/381
5
Signed-off-by: Tong Ho <tong.ho@xilinx.com>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Francisco Iglesias <frasse.iglesias@gmail.com>
9
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
10
Message-id: 20210604183506.916654-2-richard.henderson@linaro.org
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
8
---
14
target/arm/translate-a64.c | 4 ++--
9
hw/nvram/xlnx-bbram.c | 2 +-
15
1 file changed, 2 insertions(+), 2 deletions(-)
10
1 file changed, 1 insertion(+), 1 deletion(-)
16
11
17
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
12
diff --git a/hw/nvram/xlnx-bbram.c b/hw/nvram/xlnx-bbram.c
18
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/translate-a64.c
14
--- a/hw/nvram/xlnx-bbram.c
20
+++ b/target/arm/translate-a64.c
15
+++ b/hw/nvram/xlnx-bbram.c
21
@@ -XXX,XX +XXX,XX @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
16
@@ -XXX,XX +XXX,XX @@ static bool bbram_pgm_enabled(XlnxBBRam *s)
22
case 0x7f: /* FSQRT (vector) */
17
23
break;
18
static void bbram_bdrv_error(XlnxBBRam *s, int rc, gchar *detail)
24
default:
19
{
25
- fprintf(stderr, "%s: insn 0x%04x fpop 0x%2x\n", __func__, insn, fpop);
20
- Error *errp;
26
- g_assert_not_reached();
21
+ Error *errp = NULL;
27
+ unallocated_encoding(s);
22
28
+ return;
23
error_setg_errno(&errp, -rc, "%s: BBRAM backstore %s failed.",
29
}
24
blk_name(s->blk), detail);
30
31
32
--
25
--
33
2.20.1
26
2.25.1
34
35
diff view generated by jsdifflib
1
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
1
From: Pavel Pisa <pisa@cmp.felk.cvut.cz>
2
2
3
Commit 382c7160d1cd ("hw/intc/arm_gicv3_cpuif: Fix EOIR write access
3
Signed-off-by: Pavel Pisa <pisa@cmp.felk.cvut.cz>
4
check logic") added an assert_not_reached() if the guest writes the EOIR
4
Reviewed-by: Francisco Iglesias <frasse.iglesias@gmail.com>
5
register while no interrupt is active.
5
Message-id: 20220402204523.32643-1-pisa@cmp.felk.cvut.cz
6
7
It turns out some software does this: EDK2, in
8
GicV3ExitBootServicesEvent(), unconditionally write EOIR for all
9
interrupts that it manages. This now causes QEMU to abort when running
10
UEFI on a VM with GICv3. Although it is UNPREDICTABLE behavior and EDK2
11
does need fixing, the punishment seems a little harsh, especially since
12
icc_eoir_write() already tolerates writes of nonexistent interrupt
13
numbers. Display a guest error and tolerate spurious EOIR writes.
14
15
Fixes: 382c7160d1cd ("hw/intc/arm_gicv3_cpuif: Fix EOIR write access check logic")
16
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
17
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
18
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
19
Tested-by: Alex Bennée <alex.bennee@linaro.org>
20
Message-id: 20210604130352.1887560-1-jean-philippe@linaro.org
21
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
22
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
23
---
7
---
24
hw/intc/arm_gicv3_cpuif.c | 5 ++++-
8
docs/system/devices/can.rst | 6 +++---
25
1 file changed, 4 insertions(+), 1 deletion(-)
9
1 file changed, 3 insertions(+), 3 deletions(-)
26
10
27
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
11
diff --git a/docs/system/devices/can.rst b/docs/system/devices/can.rst
28
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
29
--- a/hw/intc/arm_gicv3_cpuif.c
13
--- a/docs/system/devices/can.rst
30
+++ b/hw/intc/arm_gicv3_cpuif.c
14
+++ b/docs/system/devices/can.rst
31
@@ -XXX,XX +XXX,XX @@
15
@@ -XXX,XX +XXX,XX @@ Links to other resources
32
16
(5) `GNU/Linux, CAN and CANopen in Real-time Control Applications Slides from LinuxDays 2017 (include updated RTLWS 2015 content) <https://www.linuxdays.cz/2017/video/Pavel_Pisa-CAN_canopen.pdf>`_
33
#include "qemu/osdep.h"
17
(6) `Linux SocketCAN utilities <https://github.com/linux-can/can-utils>`_
34
#include "qemu/bitops.h"
18
(7) `CTU CAN FD project including core VHDL design, Linux driver, test utilities etc. <https://gitlab.fel.cvut.cz/canbus/ctucanfd_ip_core>`_
35
+#include "qemu/log.h"
19
- (8) `CTU CAN FD Core Datasheet Documentation <http://canbus.pages.fel.cvut.cz/ctucanfd_ip_core/Progdokum.pdf>`_
36
#include "qemu/main-loop.h"
20
- (9) `CTU CAN FD Core System Architecture Documentation <http://canbus.pages.fel.cvut.cz/ctucanfd_ip_core/ctu_can_fd_architecture.pdf>`_
37
#include "trace.h"
21
- (10) `CTU CAN FD Driver Documentation <http://canbus.pages.fel.cvut.cz/ctucanfd_ip_core/driver_doc/ctucanfd-driver.html>`_
38
#include "gicv3_internal.h"
22
+ (8) `CTU CAN FD Core Datasheet Documentation <http://canbus.pages.fel.cvut.cz/ctucanfd_ip_core/doc/Datasheet.pdf>`_
39
@@ -XXX,XX +XXX,XX @@ static void icc_eoir_write(CPUARMState *env, const ARMCPRegInfo *ri,
23
+ (9) `CTU CAN FD Core System Architecture Documentation <http://canbus.pages.fel.cvut.cz/ctucanfd_ip_core/doc/System_Architecture.pdf>`_
40
}
24
+ (10) `CTU CAN FD Driver Documentation <https://canbus.pages.fel.cvut.cz/ctucanfd_ip_core/doc/linux_driver/build/ctucanfd-driver.html>`_
41
break;
25
(11) `Integration with PCIe interfacing for Intel/Altera Cyclone IV based board <https://gitlab.fel.cvut.cz/canbus/pcie-ctu_can_fd>`_
42
default:
43
- g_assert_not_reached();
44
+ qemu_log_mask(LOG_GUEST_ERROR,
45
+ "%s: IRQ %d isn't active\n", __func__, irq);
46
+ return;
47
}
48
49
icc_drop_prio(cs, grp);
50
--
26
--
51
2.20.1
27
2.25.1
52
53
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
The default of this switch is truly unreachable.
4
The switch selector is 3 bits, and all 8 cases are present.
5
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Message-id: 20210604183506.916654-3-richard.henderson@linaro.org
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
target/arm/translate-a64.c | 1 -
13
1 file changed, 1 deletion(-)
14
15
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/translate-a64.c
18
+++ b/target/arm/translate-a64.c
19
@@ -XXX,XX +XXX,XX @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
20
}
21
break;
22
default:
23
- fprintf(stderr, "%s: cmode_3_1: %x\n", __func__, cmode_3_1);
24
g_assert_not_reached();
25
}
26
27
--
28
2.20.1
29
30
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
This fprintf+assert has been in place since the beginning.
4
It is after to the fp_access_check, so we need to move the
5
check up. Fold that in to the pairwise filter.
6
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Message-id: 20210604183506.916654-4-richard.henderson@linaro.org
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
13
target/arm/translate-a64.c | 82 +++++++++++++++++++++++---------------
14
1 file changed, 50 insertions(+), 32 deletions(-)
15
16
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/translate-a64.c
19
+++ b/target/arm/translate-a64.c
20
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
21
*/
22
static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
23
{
24
- int opcode, fpopcode;
25
- int is_q, u, a, rm, rn, rd;
26
- int datasize, elements;
27
- int pass;
28
+ int opcode = extract32(insn, 11, 3);
29
+ int u = extract32(insn, 29, 1);
30
+ int a = extract32(insn, 23, 1);
31
+ int is_q = extract32(insn, 30, 1);
32
+ int rm = extract32(insn, 16, 5);
33
+ int rn = extract32(insn, 5, 5);
34
+ int rd = extract32(insn, 0, 5);
35
+ /*
36
+ * For these floating point ops, the U, a and opcode bits
37
+ * together indicate the operation.
38
+ */
39
+ int fpopcode = opcode | (a << 3) | (u << 4);
40
+ int datasize = is_q ? 128 : 64;
41
+ int elements = datasize / 16;
42
+ bool pairwise;
43
TCGv_ptr fpst;
44
- bool pairwise = false;
45
+ int pass;
46
+
47
+ switch (fpopcode) {
48
+ case 0x0: /* FMAXNM */
49
+ case 0x1: /* FMLA */
50
+ case 0x2: /* FADD */
51
+ case 0x3: /* FMULX */
52
+ case 0x4: /* FCMEQ */
53
+ case 0x6: /* FMAX */
54
+ case 0x7: /* FRECPS */
55
+ case 0x8: /* FMINNM */
56
+ case 0x9: /* FMLS */
57
+ case 0xa: /* FSUB */
58
+ case 0xe: /* FMIN */
59
+ case 0xf: /* FRSQRTS */
60
+ case 0x13: /* FMUL */
61
+ case 0x14: /* FCMGE */
62
+ case 0x15: /* FACGE */
63
+ case 0x17: /* FDIV */
64
+ case 0x1a: /* FABD */
65
+ case 0x1c: /* FCMGT */
66
+ case 0x1d: /* FACGT */
67
+ pairwise = false;
68
+ break;
69
+ case 0x10: /* FMAXNMP */
70
+ case 0x12: /* FADDP */
71
+ case 0x16: /* FMAXP */
72
+ case 0x18: /* FMINNMP */
73
+ case 0x1e: /* FMINP */
74
+ pairwise = true;
75
+ break;
76
+ default:
77
+ unallocated_encoding(s);
78
+ return;
79
+ }
80
81
if (!dc_isar_feature(aa64_fp16, s)) {
82
unallocated_encoding(s);
83
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
84
return;
85
}
86
87
- /* For these floating point ops, the U, a and opcode bits
88
- * together indicate the operation.
89
- */
90
- opcode = extract32(insn, 11, 3);
91
- u = extract32(insn, 29, 1);
92
- a = extract32(insn, 23, 1);
93
- is_q = extract32(insn, 30, 1);
94
- rm = extract32(insn, 16, 5);
95
- rn = extract32(insn, 5, 5);
96
- rd = extract32(insn, 0, 5);
97
-
98
- fpopcode = opcode | (a << 3) | (u << 4);
99
- datasize = is_q ? 128 : 64;
100
- elements = datasize / 16;
101
-
102
- switch (fpopcode) {
103
- case 0x10: /* FMAXNMP */
104
- case 0x12: /* FADDP */
105
- case 0x16: /* FMAXP */
106
- case 0x18: /* FMINNMP */
107
- case 0x1e: /* FMINP */
108
- pairwise = true;
109
- break;
110
- }
111
-
112
fpst = fpstatus_ptr(FPST_FPCR_F16);
113
114
if (pairwise) {
115
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
116
gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
117
break;
118
default:
119
- fprintf(stderr, "%s: insn 0x%04x, fpop 0x%2x @ 0x%" PRIx64 "\n",
120
- __func__, insn, fpopcode, s->pc_curr);
121
g_assert_not_reached();
122
}
123
124
--
125
2.20.1
126
127
diff view generated by jsdifflib
Deleted patch
1
From: Heinrich Schuchardt <xypron.glpk@gmx.de>
2
1
3
virt-6.0 must consider hw_compat_6_0.
4
5
Fixes: da7e13c00b59 ("hw: add compat machines for 6.1")
6
Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
7
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
8
Message-id: 20210610183500.54207-1-xypron.glpk@gmx.de
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
hw/arm/virt.c | 2 ++
12
1 file changed, 2 insertions(+)
13
14
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/arm/virt.c
17
+++ b/hw/arm/virt.c
18
@@ -XXX,XX +XXX,XX @@ DEFINE_VIRT_MACHINE_AS_LATEST(6, 1)
19
20
static void virt_machine_6_0_options(MachineClass *mc)
21
{
22
+ virt_machine_6_1_options(mc);
23
+ compat_props_add(mc->compat_props, hw_compat_6_0, hw_compat_6_0_len);
24
}
25
DEFINE_VIRT_MACHINE(6, 0)
26
27
--
28
2.20.1
29
30
diff view generated by jsdifflib
Deleted patch
1
From: Patrick Venture <venture@google.com>
2
1
3
Adds initial quanta-gbs-bmc machine support.
4
5
Tested: Boots to userspace.
6
Signed-off-by: Patrick Venture <venture@google.com>
7
Reviewed-by: Brandon Kim <brandonkim@google.com>
8
Reviewed-by: Hao Wu <wuhaotsh@google.com>
9
Message-id: 20210608193605.2611114-2-venture@google.com
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
13
hw/arm/npcm7xx_boards.c | 33 +++++++++++++++++++++++++++++++++
14
1 file changed, 33 insertions(+)
15
16
diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/arm/npcm7xx_boards.c
19
+++ b/hw/arm/npcm7xx_boards.c
20
@@ -XXX,XX +XXX,XX @@
21
22
#define NPCM750_EVB_POWER_ON_STRAPS 0x00001ff7
23
#define QUANTA_GSJ_POWER_ON_STRAPS 0x00001fff
24
+#define QUANTA_GBS_POWER_ON_STRAPS 0x000017ff
25
26
static const char npcm7xx_default_bootrom[] = "npcm7xx_bootrom.bin";
27
28
@@ -XXX,XX +XXX,XX @@ static void quanta_gsj_init(MachineState *machine)
29
npcm7xx_load_kernel(machine, soc);
30
}
31
32
+static void quanta_gbs_init(MachineState *machine)
33
+{
34
+ NPCM7xxState *soc;
35
+
36
+ soc = npcm7xx_create_soc(machine, QUANTA_GBS_POWER_ON_STRAPS);
37
+ npcm7xx_connect_dram(soc, machine->ram);
38
+ qdev_realize(DEVICE(soc), NULL, &error_fatal);
39
+
40
+ npcm7xx_load_bootrom(machine, soc);
41
+
42
+ npcm7xx_connect_flash(&soc->fiu[0], 0, "mx66u51235f",
43
+ drive_get(IF_MTD, 0, 0));
44
+
45
+ npcm7xx_load_kernel(machine, soc);
46
+}
47
+
48
static void npcm7xx_set_soc_type(NPCM7xxMachineClass *nmc, const char *type)
49
{
50
NPCM7xxClass *sc = NPCM7XX_CLASS(object_class_by_name(type));
51
@@ -XXX,XX +XXX,XX @@ static void gsj_machine_class_init(ObjectClass *oc, void *data)
52
mc->default_ram_size = 512 * MiB;
53
};
54
55
+static void gbs_bmc_machine_class_init(ObjectClass *oc, void *data)
56
+{
57
+ NPCM7xxMachineClass *nmc = NPCM7XX_MACHINE_CLASS(oc);
58
+ MachineClass *mc = MACHINE_CLASS(oc);
59
+
60
+ npcm7xx_set_soc_type(nmc, TYPE_NPCM730);
61
+
62
+ mc->desc = "Quanta GBS (Cortex-A9)";
63
+ mc->init = quanta_gbs_init;
64
+ mc->default_ram_size = 1 * GiB;
65
+}
66
+
67
static const TypeInfo npcm7xx_machine_types[] = {
68
{
69
.name = TYPE_NPCM7XX_MACHINE,
70
@@ -XXX,XX +XXX,XX @@ static const TypeInfo npcm7xx_machine_types[] = {
71
.name = MACHINE_TYPE_NAME("quanta-gsj"),
72
.parent = TYPE_NPCM7XX_MACHINE,
73
.class_init = gsj_machine_class_init,
74
+ }, {
75
+ .name = MACHINE_TYPE_NAME("quanta-gbs-bmc"),
76
+ .parent = TYPE_NPCM7XX_MACHINE,
77
+ .class_init = gbs_bmc_machine_class_init,
78
},
79
};
80
81
--
82
2.20.1
83
84
diff view generated by jsdifflib
Deleted patch
1
From: Patrick Venture <venture@google.com>
2
1
3
Add a comment and i2c method that describes the board layout.
4
5
Tested: firmware booted to userspace.
6
Signed-off-by: Patrick Venture <venture@google.com>
7
Reviewed-by: Brandon Kim <brandonkim@google.com>
8
Reviewed-by: Hao Wu <wuhaotsh@google.com>
9
Message-id: 20210608193605.2611114-3-venture@google.com
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
hw/arm/npcm7xx_boards.c | 60 +++++++++++++++++++++++++++++++++++++++++
13
1 file changed, 60 insertions(+)
14
15
diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/arm/npcm7xx_boards.c
18
+++ b/hw/arm/npcm7xx_boards.c
19
@@ -XXX,XX +XXX,XX @@ static void quanta_gsj_fan_init(NPCM7xxMachine *machine, NPCM7xxState *soc)
20
npcm7xx_connect_pwm_fan(soc, &splitter[2], 0x05, 1);
21
}
22
23
+static void quanta_gbs_i2c_init(NPCM7xxState *soc)
24
+{
25
+ /*
26
+ * i2c-0:
27
+ * pca9546@71
28
+ *
29
+ * i2c-1:
30
+ * pca9535@24
31
+ * pca9535@20
32
+ * pca9535@21
33
+ * pca9535@22
34
+ * pca9535@23
35
+ * pca9535@25
36
+ * pca9535@26
37
+ *
38
+ * i2c-2:
39
+ * sbtsi@4c
40
+ *
41
+ * i2c-5:
42
+ * atmel,24c64@50 mb_fru
43
+ * pca9546@71
44
+ * - channel 0: max31725@54
45
+ * - channel 1: max31725@55
46
+ * - channel 2: max31725@5d
47
+ * atmel,24c64@51 fan_fru
48
+ * - channel 3: atmel,24c64@52 hsbp_fru
49
+ *
50
+ * i2c-6:
51
+ * pca9545@73
52
+ *
53
+ * i2c-7:
54
+ * pca9545@72
55
+ *
56
+ * i2c-8:
57
+ * adi,adm1272@10
58
+ *
59
+ * i2c-9:
60
+ * pca9546@71
61
+ * - channel 0: isil,isl68137@60
62
+ * - channel 1: isil,isl68137@61
63
+ * - channel 2: isil,isl68137@63
64
+ * - channel 3: isil,isl68137@45
65
+ *
66
+ * i2c-10:
67
+ * pca9545@71
68
+ *
69
+ * i2c-11:
70
+ * pca9545@76
71
+ *
72
+ * i2c-12:
73
+ * maxim,max34451@4e
74
+ * isil,isl68137@5d
75
+ * isil,isl68137@5e
76
+ *
77
+ * i2c-14:
78
+ * pca9545@70
79
+ */
80
+}
81
+
82
static void npcm750_evb_init(MachineState *machine)
83
{
84
NPCM7xxState *soc;
85
@@ -XXX,XX +XXX,XX @@ static void quanta_gbs_init(MachineState *machine)
86
npcm7xx_connect_flash(&soc->fiu[0], 0, "mx66u51235f",
87
drive_get(IF_MTD, 0, 0));
88
89
+ quanta_gbs_i2c_init(soc);
90
npcm7xx_load_kernel(machine, soc);
91
}
92
93
--
94
2.20.1
95
96
diff view generated by jsdifflib
Deleted patch
1
In commit da6d674e509f0939b we split the NVIC code out from the GIC.
2
This allowed us to specify the NVIC's default value for the num-irq
3
property (64) in the usual way in its property list, and we deleted
4
the previous hack where we updated the value in the state struct in
5
the instance init function. Remove a stale comment about that hack
6
which we forgot to delete at that time.
7
1
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20210614161243.14211-1-peter.maydell@linaro.org
12
---
13
hw/intc/armv7m_nvic.c | 6 ------
14
1 file changed, 6 deletions(-)
15
16
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/intc/armv7m_nvic.c
19
+++ b/hw/intc/armv7m_nvic.c
20
@@ -XXX,XX +XXX,XX @@ static void armv7m_nvic_realize(DeviceState *dev, Error **errp)
21
22
static void armv7m_nvic_instance_init(Object *obj)
23
{
24
- /* We have a different default value for the num-irq property
25
- * than our superclass. This function runs after qdev init
26
- * has set the defaults from the Property array and before
27
- * any user-specified property setting, so just modify the
28
- * value in the GICState struct.
29
- */
30
DeviceState *dev = DEVICE(obj);
31
NVICState *nvic = NVIC(obj);
32
SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
33
--
34
2.20.1
35
36
diff view generated by jsdifflib
Deleted patch
1
Generic code in target/arm wants to call acpi_ghes_record_errors();
2
provide a stub version so that we don't fail to link when
3
CONFIG_ACPI_APEI is not set. This requires us to add a new
4
ghes-stub.c file to contain it and the meson.build mechanics
5
to use it when appropriate.
6
1
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Dongjiu Geng <gengdongjiu1@gmail.com>
10
Message-id: 20210603171259.27962-2-peter.maydell@linaro.org
11
---
12
hw/acpi/ghes-stub.c | 17 +++++++++++++++++
13
hw/acpi/meson.build | 6 +++---
14
2 files changed, 20 insertions(+), 3 deletions(-)
15
create mode 100644 hw/acpi/ghes-stub.c
16
17
diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c
18
new file mode 100644
19
index XXXXXXX..XXXXXXX
20
--- /dev/null
21
+++ b/hw/acpi/ghes-stub.c
22
@@ -XXX,XX +XXX,XX @@
23
+/*
24
+ * Support for generating APEI tables and recording CPER for Guests:
25
+ * stub functions.
26
+ *
27
+ * Copyright (c) 2021 Linaro, Ltd
28
+ *
29
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
30
+ * See the COPYING file in the top-level directory.
31
+ */
32
+
33
+#include "qemu/osdep.h"
34
+#include "hw/acpi/ghes.h"
35
+
36
+int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address)
37
+{
38
+ return -1;
39
+}
40
diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build
41
index XXXXXXX..XXXXXXX 100644
42
--- a/hw/acpi/meson.build
43
+++ b/hw/acpi/meson.build
44
@@ -XXX,XX +XXX,XX @@ acpi_ss.add(when: 'CONFIG_ACPI_PCI', if_true: files('pci.c'))
45
acpi_ss.add(when: 'CONFIG_ACPI_VMGENID', if_true: files('vmgenid.c'))
46
acpi_ss.add(when: 'CONFIG_ACPI_HW_REDUCED', if_true: files('generic_event_device.c'))
47
acpi_ss.add(when: 'CONFIG_ACPI_HMAT', if_true: files('hmat.c'))
48
-acpi_ss.add(when: 'CONFIG_ACPI_APEI', if_true: files('ghes.c'))
49
+acpi_ss.add(when: 'CONFIG_ACPI_APEI', if_true: files('ghes.c'), if_false:('ghes-stub.c'))
50
acpi_ss.add(when: 'CONFIG_ACPI_X86', if_true: files('core.c', 'piix4.c', 'pcihp.c'), if_false: files('acpi-stub.c'))
51
acpi_ss.add(when: 'CONFIG_ACPI_X86_ICH', if_true: files('ich9.c', 'tco.c'))
52
acpi_ss.add(when: 'CONFIG_IPMI', if_true: files('ipmi.c'), if_false: files('ipmi-stub.c'))
53
acpi_ss.add(when: 'CONFIG_PC', if_false: files('acpi-x86-stub.c'))
54
acpi_ss.add(when: 'CONFIG_TPM', if_true: files('tpm.c'))
55
-softmmu_ss.add(when: 'CONFIG_ACPI', if_false: files('acpi-stub.c', 'aml-build-stub.c'))
56
+softmmu_ss.add(when: 'CONFIG_ACPI', if_false: files('acpi-stub.c', 'aml-build-stub.c', 'ghes-stub.c'))
57
softmmu_ss.add_all(when: 'CONFIG_ACPI', if_true: acpi_ss)
58
softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('acpi-stub.c', 'aml-build-stub.c',
59
- 'acpi-x86-stub.c', 'ipmi-stub.c'))
60
+ 'acpi-x86-stub.c', 'ipmi-stub.c', 'ghes-stub.c'))
61
--
62
2.20.1
63
64
diff view generated by jsdifflib
Deleted patch
1
Allow code elsewhere in the system to check whether the ACPI GHES
2
table is present, so it can determine whether it is OK to try to
3
record an error by calling acpi_ghes_record_errors().
4
1
5
(We don't need to migrate the new 'present' field in AcpiGhesState,
6
because it is set once at system initialization and doesn't change.)
7
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Reviewed-by: Dongjiu Geng <gengdongjiu1@gmail.com>
11
Message-id: 20210603171259.27962-3-peter.maydell@linaro.org
12
---
13
include/hw/acpi/ghes.h | 9 +++++++++
14
hw/acpi/ghes-stub.c | 5 +++++
15
hw/acpi/ghes.c | 17 +++++++++++++++++
16
3 files changed, 31 insertions(+)
17
18
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/include/hw/acpi/ghes.h
21
+++ b/include/hw/acpi/ghes.h
22
@@ -XXX,XX +XXX,XX @@ enum {
23
24
typedef struct AcpiGhesState {
25
uint64_t ghes_addr_le;
26
+ bool present; /* True if GHES is present at all on this board */
27
} AcpiGhesState;
28
29
void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker);
30
@@ -XXX,XX +XXX,XX @@ void acpi_build_hest(GArray *table_data, BIOSLinker *linker,
31
void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
32
GArray *hardware_errors);
33
int acpi_ghes_record_errors(uint8_t notify, uint64_t error_physical_addr);
34
+
35
+/**
36
+ * acpi_ghes_present: Report whether ACPI GHES table is present
37
+ *
38
+ * Returns: true if the system has an ACPI GHES table and it is
39
+ * safe to call acpi_ghes_record_errors() to record a memory error.
40
+ */
41
+bool acpi_ghes_present(void);
42
#endif
43
diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/hw/acpi/ghes-stub.c
46
+++ b/hw/acpi/ghes-stub.c
47
@@ -XXX,XX +XXX,XX @@ int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address)
48
{
49
return -1;
50
}
51
+
52
+bool acpi_ghes_present(void)
53
+{
54
+ return false;
55
+}
56
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/hw/acpi/ghes.c
59
+++ b/hw/acpi/ghes.c
60
@@ -XXX,XX +XXX,XX @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
61
/* Create a read-write fw_cfg file for Address */
62
fw_cfg_add_file_callback(s, ACPI_GHES_DATA_ADDR_FW_CFG_FILE, NULL, NULL,
63
NULL, &(ags->ghes_addr_le), sizeof(ags->ghes_addr_le), false);
64
+
65
+ ags->present = true;
66
}
67
68
int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address)
69
@@ -XXX,XX +XXX,XX @@ int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address)
70
71
return ret;
72
}
73
+
74
+bool acpi_ghes_present(void)
75
+{
76
+ AcpiGedState *acpi_ged_state;
77
+ AcpiGhesState *ags;
78
+
79
+ acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED,
80
+ NULL));
81
+
82
+ if (!acpi_ged_state) {
83
+ return false;
84
+ }
85
+ ags = &acpi_ged_state->ghes_state;
86
+ return ags->present;
87
+}
88
--
89
2.20.1
90
91
diff view generated by jsdifflib
Deleted patch
1
The virt_is_acpi_enabled() function is specific to the virt board, as
2
is the check for its 'ras' property. Use the new acpi_ghes_present()
3
function to check whether we should report memory errors via
4
acpi_ghes_record_errors().
5
1
6
This avoids a link error if QEMU was built without support for the
7
virt board, and provides a mechanism that can be used by any future
8
board models that want to add ACPI memory error reporting support
9
(they only need to call acpi_ghes_add_fw_cfg()).
10
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Reviewed-by: Dongjiu Geng <gengdongjiu1@gmail.com>
14
Message-id: 20210603171259.27962-4-peter.maydell@linaro.org
15
---
16
target/arm/kvm64.c | 6 +-----
17
1 file changed, 1 insertion(+), 5 deletions(-)
18
19
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/kvm64.c
22
+++ b/target/arm/kvm64.c
23
@@ -XXX,XX +XXX,XX @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
24
{
25
ram_addr_t ram_addr;
26
hwaddr paddr;
27
- Object *obj = qdev_get_machine();
28
- VirtMachineState *vms = VIRT_MACHINE(obj);
29
- bool acpi_enabled = virt_is_acpi_enabled(vms);
30
31
assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
32
33
- if (acpi_enabled && addr &&
34
- object_property_get_bool(obj, "ras", NULL)) {
35
+ if (acpi_ghes_present() && addr) {
36
ram_addr = qemu_ram_addr_from_host(addr);
37
if (ram_addr != RAM_ADDR_INVALID &&
38
kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
39
--
40
2.20.1
41
42
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
The test was off-by-one, because tag_last points to the
4
last byte of the tag to check, thus tag_last - prev_page
5
will equal TARGET_PAGE_SIZE when we use the first byte
6
of the next page.
7
8
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/403
9
Reported-by: Peter Collingbourne <pcc@google.com>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20210612195707.840217-1-richard.henderson@linaro.org
12
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
---
15
target/arm/mte_helper.c | 2 +-
16
tests/tcg/aarch64/mte-7.c | 31 +++++++++++++++++++++++++++++++
17
tests/tcg/aarch64/Makefile.target | 2 +-
18
3 files changed, 33 insertions(+), 2 deletions(-)
19
create mode 100644 tests/tcg/aarch64/mte-7.c
20
21
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/target/arm/mte_helper.c
24
+++ b/target/arm/mte_helper.c
25
@@ -XXX,XX +XXX,XX @@ static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr,
26
prev_page = ptr & TARGET_PAGE_MASK;
27
next_page = prev_page + TARGET_PAGE_SIZE;
28
29
- if (likely(tag_last - prev_page <= TARGET_PAGE_SIZE)) {
30
+ if (likely(tag_last - prev_page < TARGET_PAGE_SIZE)) {
31
/* Memory access stays on one page. */
32
tag_size = ((tag_byte_last - tag_byte_first) / (2 * TAG_GRANULE)) + 1;
33
mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, sizem1 + 1,
34
diff --git a/tests/tcg/aarch64/mte-7.c b/tests/tcg/aarch64/mte-7.c
35
new file mode 100644
36
index XXXXXXX..XXXXXXX
37
--- /dev/null
38
+++ b/tests/tcg/aarch64/mte-7.c
39
@@ -XXX,XX +XXX,XX @@
40
+/*
41
+ * Memory tagging, unaligned access crossing pages.
42
+ * https://gitlab.com/qemu-project/qemu/-/issues/403
43
+ *
44
+ * Copyright (c) 2021 Linaro Ltd
45
+ * SPDX-License-Identifier: GPL-2.0-or-later
46
+ */
47
+
48
+#include "mte.h"
49
+
50
+int main(int ac, char **av)
51
+{
52
+ void *p;
53
+
54
+ enable_mte(PR_MTE_TCF_SYNC);
55
+ p = alloc_mte_mem(2 * 0x1000);
56
+
57
+ /* Tag the pointer. */
58
+ p = (void *)((unsigned long)p | (1ul << 56));
59
+
60
+ /* Store tag in sequential granules. */
61
+ asm("stg %0, [%0]" : : "r"(p + 0x0ff0));
62
+ asm("stg %0, [%0]" : : "r"(p + 0x1000));
63
+
64
+ /*
65
+ * Perform an unaligned store with tag 1 crossing the pages.
66
+ * Failure dies with SIGSEGV.
67
+ */
68
+ asm("str %0, [%0]" : : "r"(p + 0x0ffc));
69
+ return 0;
70
+}
71
diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target
72
index XXXXXXX..XXXXXXX 100644
73
--- a/tests/tcg/aarch64/Makefile.target
74
+++ b/tests/tcg/aarch64/Makefile.target
75
@@ -XXX,XX +XXX,XX @@ AARCH64_TESTS += bti-2
76
77
# MTE Tests
78
ifneq ($(DOCKER_IMAGE)$(CROSS_CC_HAS_ARMV8_MTE),)
79
-AARCH64_TESTS += mte-1 mte-2 mte-3 mte-4 mte-5 mte-6
80
+AARCH64_TESTS += mte-1 mte-2 mte-3 mte-4 mte-5 mte-6 mte-7
81
mte-%: CFLAGS += -march=armv8.5-a+memtag
82
endif
83
84
--
85
2.20.1
86
87
diff view generated by jsdifflib
Deleted patch
1
From: Patrick Venture <venture@google.com>
2
1
3
Adds comments to the board init to identify missing i2c devices.
4
5
Signed-off-by: Patrick Venture <venture@google.com>
6
Reviewed-by: Hao Wu <wuhaotsh@google.com>
7
Reviewed-by: Joel Stanley <joel@jms.id.au>
8
Message-id: 20210608202522.2677850-2-venture@google.com
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
hw/arm/npcm7xx_boards.c | 16 +++++++++++++++-
12
1 file changed, 15 insertions(+), 1 deletion(-)
13
14
diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/arm/npcm7xx_boards.c
17
+++ b/hw/arm/npcm7xx_boards.c
18
@@ -XXX,XX +XXX,XX @@ static void quanta_gsj_i2c_init(NPCM7xxState *soc)
19
at24c_eeprom_init(soc, 9, 0x55, 8192);
20
at24c_eeprom_init(soc, 10, 0x55, 8192);
21
22
- /* TODO: Add additional i2c devices. */
23
+ /*
24
+ * i2c-11:
25
+ * - power-brick@36: delta,dps800
26
+ * - hotswap@15: ti,lm5066i
27
+ */
28
+
29
+ /*
30
+ * i2c-12:
31
+ * - ucd90160@6b
32
+ */
33
+
34
+ /*
35
+ * i2c-15:
36
+ * - pca9548@75
37
+ */
38
}
39
40
static void quanta_gsj_fan_init(NPCM7xxMachine *machine, NPCM7xxState *soc)
41
--
42
2.20.1
43
44
diff view generated by jsdifflib
Deleted patch
1
From: Patrick Venture <venture@google.com>
2
1
3
Tested: Quanta-gsj firmware booted.
4
5
i2c /dev entries driver
6
I2C init bus 1 freq 100000
7
I2C init bus 2 freq 100000
8
I2C init bus 3 freq 100000
9
I2C init bus 4 freq 100000
10
I2C init bus 8 freq 100000
11
I2C init bus 9 freq 100000
12
at24 9-0055: 8192 byte 24c64 EEPROM, writable, 1 bytes/write
13
I2C init bus 10 freq 100000
14
at24 10-0055: 8192 byte 24c64 EEPROM, writable, 1 bytes/write
15
I2C init bus 12 freq 100000
16
I2C init bus 15 freq 100000
17
i2c i2c-15: Added multiplexed i2c bus 16
18
i2c i2c-15: Added multiplexed i2c bus 17
19
i2c i2c-15: Added multiplexed i2c bus 18
20
i2c i2c-15: Added multiplexed i2c bus 19
21
i2c i2c-15: Added multiplexed i2c bus 20
22
i2c i2c-15: Added multiplexed i2c bus 21
23
i2c i2c-15: Added multiplexed i2c bus 22
24
i2c i2c-15: Added multiplexed i2c bus 23
25
pca954x 15-0075: registered 8 multiplexed busses for I2C switch pca9548
26
27
Signed-off-by: Patrick Venture <venture@google.com>
28
Reviewed-by: Hao Wu <wuhaotsh@google.com>
29
Reviewed-by: Joel Stanley <joel@jms.id.au>
30
Message-id: 20210608202522.2677850-3-venture@google.com
31
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
32
---
33
hw/arm/npcm7xx_boards.c | 6 ++----
34
hw/arm/Kconfig | 1 +
35
2 files changed, 3 insertions(+), 4 deletions(-)
36
37
diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/hw/arm/npcm7xx_boards.c
40
+++ b/hw/arm/npcm7xx_boards.c
41
@@ -XXX,XX +XXX,XX @@
42
43
#include "hw/arm/npcm7xx.h"
44
#include "hw/core/cpu.h"
45
+#include "hw/i2c/i2c_mux_pca954x.h"
46
#include "hw/i2c/smbus_eeprom.h"
47
#include "hw/loader.h"
48
#include "hw/qdev-core.h"
49
@@ -XXX,XX +XXX,XX @@ static void quanta_gsj_i2c_init(NPCM7xxState *soc)
50
* - ucd90160@6b
51
*/
52
53
- /*
54
- * i2c-15:
55
- * - pca9548@75
56
- */
57
+ i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 15), "pca9548", 0x75);
58
}
59
60
static void quanta_gsj_fan_init(NPCM7xxMachine *machine, NPCM7xxState *soc)
61
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
62
index XXXXXXX..XXXXXXX 100644
63
--- a/hw/arm/Kconfig
64
+++ b/hw/arm/Kconfig
65
@@ -XXX,XX +XXX,XX @@ config NPCM7XX
66
select SERIAL
67
select SSI
68
select UNIMP
69
+ select PCA954X
70
71
config FSL_IMX25
72
bool
73
--
74
2.20.1
75
76
diff view generated by jsdifflib
Deleted patch
1
From: Patrick Venture <venture@google.com>
2
1
3
Adds the pca954x muxes expected.
4
5
Tested: Booted quanta-q71l image to userspace.
6
Signed-off-by: Patrick Venture <venture@google.com>
7
Reviewed-by: Hao Wu <wuhaotsh@google.com>
8
Reviewed-by: Joel Stanley <joel@jms.id.au>
9
Reviewed-by: Cédric Le Goater <clg@kaod.org>
10
Message-id: 20210608202522.2677850-4-venture@google.com
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
13
hw/arm/aspeed.c | 11 ++++++++---
14
hw/arm/Kconfig | 1 +
15
2 files changed, 9 insertions(+), 3 deletions(-)
16
17
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/arm/aspeed.c
20
+++ b/hw/arm/aspeed.c
21
@@ -XXX,XX +XXX,XX @@
22
#include "hw/arm/boot.h"
23
#include "hw/arm/aspeed.h"
24
#include "hw/arm/aspeed_soc.h"
25
+#include "hw/i2c/i2c_mux_pca954x.h"
26
#include "hw/i2c/smbus_eeprom.h"
27
#include "hw/misc/pca9552.h"
28
#include "hw/misc/tmp105.h"
29
@@ -XXX,XX +XXX,XX @@ static void quanta_q71l_bmc_i2c_init(AspeedMachineState *bmc)
30
/* TODO: i2c-1: Add Frontpanel FRU eeprom@57 24c64 */
31
/* TODO: Add Memory Riser i2c mux and eeproms. */
32
33
- /* TODO: i2c-2: pca9546@74 */
34
- /* TODO: i2c-2: pca9548@77 */
35
+ i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 2), "pca9546", 0x74);
36
+ i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 2), "pca9548", 0x77);
37
+
38
/* TODO: i2c-3: Add BIOS FRU eeprom@56 24c64 */
39
- /* TODO: i2c-7: Add pca9546@70 */
40
+
41
+ /* i2c-7 */
42
+ i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 7), "pca9546", 0x70);
43
/* - i2c@0: pmbus@59 */
44
/* - i2c@1: pmbus@58 */
45
/* - i2c@2: pmbus@58 */
46
/* - i2c@3: pmbus@59 */
47
+
48
/* TODO: i2c-7: Add PDB FRU eeprom@52 */
49
/* TODO: i2c-8: Add BMC FRU eeprom@50 */
50
}
51
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
52
index XXXXXXX..XXXXXXX 100644
53
--- a/hw/arm/Kconfig
54
+++ b/hw/arm/Kconfig
55
@@ -XXX,XX +XXX,XX @@ config ASPEED_SOC
56
select PCA9552
57
select SERIAL
58
select SMBUS_EEPROM
59
+ select PCA954X
60
select SSI
61
select SSI_M25P80
62
select TMP105
63
--
64
2.20.1
65
66
diff view generated by jsdifflib
Deleted patch
1
Currently we provide Hn and H1_n macros for accessing the correct
2
data within arrays of vector elements of size 1, 2 and 4, accounting
3
for host endianness. We don't provide any macros for elements of
4
size 8 because there the host endianness doesn't matter. However,
5
this does result in awkwardness where we need to pass empty arguments
6
to macros, because checkpatch complains about them. The empty
7
argument is a little confusing for humans to read as well.
8
1
9
Add H8() and H1_8() macros and use them where we were previously
10
passing empty arguments to macros.
11
12
Suggested-by: Richard Henderson <richard.henderson@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
15
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
16
Message-id: 20210614151007.4545-2-peter.maydell@linaro.org
17
Message-id: 20210610132505.5827-1-peter.maydell@linaro.org
18
---
19
target/arm/vec_internal.h | 8 +-
20
target/arm/sve_helper.c | 258 +++++++++++++++++++-------------------
21
target/arm/vec_helper.c | 14 +--
22
3 files changed, 143 insertions(+), 137 deletions(-)
23
24
diff --git a/target/arm/vec_internal.h b/target/arm/vec_internal.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/vec_internal.h
27
+++ b/target/arm/vec_internal.h
28
@@ -XXX,XX +XXX,XX @@
29
#define H2(x) (x)
30
#define H4(x) (x)
31
#endif
32
-
33
+/*
34
+ * Access to 64-bit elements isn't host-endian dependent; we provide H8
35
+ * and H1_8 so that when a function is being generated from a macro we
36
+ * can pass these rather than an empty macro argument, for clarity.
37
+ */
38
+#define H8(x) (x)
39
+#define H1_8(x) (x)
40
41
static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
42
{
43
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/arm/sve_helper.c
46
+++ b/target/arm/sve_helper.c
47
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \
48
49
DO_ZPZZ_PAIR_FP(sve2_faddp_zpzz_h, float16, H1_2, float16_add)
50
DO_ZPZZ_PAIR_FP(sve2_faddp_zpzz_s, float32, H1_4, float32_add)
51
-DO_ZPZZ_PAIR_FP(sve2_faddp_zpzz_d, float64, , float64_add)
52
+DO_ZPZZ_PAIR_FP(sve2_faddp_zpzz_d, float64, H1_8, float64_add)
53
54
DO_ZPZZ_PAIR_FP(sve2_fmaxnmp_zpzz_h, float16, H1_2, float16_maxnum)
55
DO_ZPZZ_PAIR_FP(sve2_fmaxnmp_zpzz_s, float32, H1_4, float32_maxnum)
56
-DO_ZPZZ_PAIR_FP(sve2_fmaxnmp_zpzz_d, float64, , float64_maxnum)
57
+DO_ZPZZ_PAIR_FP(sve2_fmaxnmp_zpzz_d, float64, H1_8, float64_maxnum)
58
59
DO_ZPZZ_PAIR_FP(sve2_fminnmp_zpzz_h, float16, H1_2, float16_minnum)
60
DO_ZPZZ_PAIR_FP(sve2_fminnmp_zpzz_s, float32, H1_4, float32_minnum)
61
-DO_ZPZZ_PAIR_FP(sve2_fminnmp_zpzz_d, float64, , float64_minnum)
62
+DO_ZPZZ_PAIR_FP(sve2_fminnmp_zpzz_d, float64, H1_8, float64_minnum)
63
64
DO_ZPZZ_PAIR_FP(sve2_fmaxp_zpzz_h, float16, H1_2, float16_max)
65
DO_ZPZZ_PAIR_FP(sve2_fmaxp_zpzz_s, float32, H1_4, float32_max)
66
-DO_ZPZZ_PAIR_FP(sve2_fmaxp_zpzz_d, float64, , float64_max)
67
+DO_ZPZZ_PAIR_FP(sve2_fmaxp_zpzz_d, float64, H1_8, float64_max)
68
69
DO_ZPZZ_PAIR_FP(sve2_fminp_zpzz_h, float16, H1_2, float16_min)
70
DO_ZPZZ_PAIR_FP(sve2_fminp_zpzz_s, float32, H1_4, float32_min)
71
-DO_ZPZZ_PAIR_FP(sve2_fminp_zpzz_d, float64, , float64_min)
72
+DO_ZPZZ_PAIR_FP(sve2_fminp_zpzz_d, float64, H1_8, float64_min)
73
74
#undef DO_ZPZZ_PAIR_FP
75
76
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
77
78
DO_ZZZ_TB(sve2_saddl_h, int16_t, int8_t, H1_2, H1, DO_ADD)
79
DO_ZZZ_TB(sve2_saddl_s, int32_t, int16_t, H1_4, H1_2, DO_ADD)
80
-DO_ZZZ_TB(sve2_saddl_d, int64_t, int32_t, , H1_4, DO_ADD)
81
+DO_ZZZ_TB(sve2_saddl_d, int64_t, int32_t, H1_8, H1_4, DO_ADD)
82
83
DO_ZZZ_TB(sve2_ssubl_h, int16_t, int8_t, H1_2, H1, DO_SUB)
84
DO_ZZZ_TB(sve2_ssubl_s, int32_t, int16_t, H1_4, H1_2, DO_SUB)
85
-DO_ZZZ_TB(sve2_ssubl_d, int64_t, int32_t, , H1_4, DO_SUB)
86
+DO_ZZZ_TB(sve2_ssubl_d, int64_t, int32_t, H1_8, H1_4, DO_SUB)
87
88
DO_ZZZ_TB(sve2_sabdl_h, int16_t, int8_t, H1_2, H1, DO_ABD)
89
DO_ZZZ_TB(sve2_sabdl_s, int32_t, int16_t, H1_4, H1_2, DO_ABD)
90
-DO_ZZZ_TB(sve2_sabdl_d, int64_t, int32_t, , H1_4, DO_ABD)
91
+DO_ZZZ_TB(sve2_sabdl_d, int64_t, int32_t, H1_8, H1_4, DO_ABD)
92
93
DO_ZZZ_TB(sve2_uaddl_h, uint16_t, uint8_t, H1_2, H1, DO_ADD)
94
DO_ZZZ_TB(sve2_uaddl_s, uint32_t, uint16_t, H1_4, H1_2, DO_ADD)
95
-DO_ZZZ_TB(sve2_uaddl_d, uint64_t, uint32_t, , H1_4, DO_ADD)
96
+DO_ZZZ_TB(sve2_uaddl_d, uint64_t, uint32_t, H1_8, H1_4, DO_ADD)
97
98
DO_ZZZ_TB(sve2_usubl_h, uint16_t, uint8_t, H1_2, H1, DO_SUB)
99
DO_ZZZ_TB(sve2_usubl_s, uint32_t, uint16_t, H1_4, H1_2, DO_SUB)
100
-DO_ZZZ_TB(sve2_usubl_d, uint64_t, uint32_t, , H1_4, DO_SUB)
101
+DO_ZZZ_TB(sve2_usubl_d, uint64_t, uint32_t, H1_8, H1_4, DO_SUB)
102
103
DO_ZZZ_TB(sve2_uabdl_h, uint16_t, uint8_t, H1_2, H1, DO_ABD)
104
DO_ZZZ_TB(sve2_uabdl_s, uint32_t, uint16_t, H1_4, H1_2, DO_ABD)
105
-DO_ZZZ_TB(sve2_uabdl_d, uint64_t, uint32_t, , H1_4, DO_ABD)
106
+DO_ZZZ_TB(sve2_uabdl_d, uint64_t, uint32_t, H1_8, H1_4, DO_ABD)
107
108
DO_ZZZ_TB(sve2_smull_zzz_h, int16_t, int8_t, H1_2, H1, DO_MUL)
109
DO_ZZZ_TB(sve2_smull_zzz_s, int32_t, int16_t, H1_4, H1_2, DO_MUL)
110
-DO_ZZZ_TB(sve2_smull_zzz_d, int64_t, int32_t, , H1_4, DO_MUL)
111
+DO_ZZZ_TB(sve2_smull_zzz_d, int64_t, int32_t, H1_8, H1_4, DO_MUL)
112
113
DO_ZZZ_TB(sve2_umull_zzz_h, uint16_t, uint8_t, H1_2, H1, DO_MUL)
114
DO_ZZZ_TB(sve2_umull_zzz_s, uint32_t, uint16_t, H1_4, H1_2, DO_MUL)
115
-DO_ZZZ_TB(sve2_umull_zzz_d, uint64_t, uint32_t, , H1_4, DO_MUL)
116
+DO_ZZZ_TB(sve2_umull_zzz_d, uint64_t, uint32_t, H1_8, H1_4, DO_MUL)
117
118
/* Note that the multiply cannot overflow, but the doubling can. */
119
static inline int16_t do_sqdmull_h(int16_t n, int16_t m)
120
@@ -XXX,XX +XXX,XX @@ static inline int64_t do_sqdmull_d(int64_t n, int64_t m)
121
122
DO_ZZZ_TB(sve2_sqdmull_zzz_h, int16_t, int8_t, H1_2, H1, do_sqdmull_h)
123
DO_ZZZ_TB(sve2_sqdmull_zzz_s, int32_t, int16_t, H1_4, H1_2, do_sqdmull_s)
124
-DO_ZZZ_TB(sve2_sqdmull_zzz_d, int64_t, int32_t, , H1_4, do_sqdmull_d)
125
+DO_ZZZ_TB(sve2_sqdmull_zzz_d, int64_t, int32_t, H1_8, H1_4, do_sqdmull_d)
126
127
#undef DO_ZZZ_TB
128
129
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
130
131
DO_ZZZ_WTB(sve2_saddw_h, int16_t, int8_t, H1_2, H1, DO_ADD)
132
DO_ZZZ_WTB(sve2_saddw_s, int32_t, int16_t, H1_4, H1_2, DO_ADD)
133
-DO_ZZZ_WTB(sve2_saddw_d, int64_t, int32_t, , H1_4, DO_ADD)
134
+DO_ZZZ_WTB(sve2_saddw_d, int64_t, int32_t, H1_8, H1_4, DO_ADD)
135
136
DO_ZZZ_WTB(sve2_ssubw_h, int16_t, int8_t, H1_2, H1, DO_SUB)
137
DO_ZZZ_WTB(sve2_ssubw_s, int32_t, int16_t, H1_4, H1_2, DO_SUB)
138
-DO_ZZZ_WTB(sve2_ssubw_d, int64_t, int32_t, , H1_4, DO_SUB)
139
+DO_ZZZ_WTB(sve2_ssubw_d, int64_t, int32_t, H1_8, H1_4, DO_SUB)
140
141
DO_ZZZ_WTB(sve2_uaddw_h, uint16_t, uint8_t, H1_2, H1, DO_ADD)
142
DO_ZZZ_WTB(sve2_uaddw_s, uint32_t, uint16_t, H1_4, H1_2, DO_ADD)
143
-DO_ZZZ_WTB(sve2_uaddw_d, uint64_t, uint32_t, , H1_4, DO_ADD)
144
+DO_ZZZ_WTB(sve2_uaddw_d, uint64_t, uint32_t, H1_8, H1_4, DO_ADD)
145
146
DO_ZZZ_WTB(sve2_usubw_h, uint16_t, uint8_t, H1_2, H1, DO_SUB)
147
DO_ZZZ_WTB(sve2_usubw_s, uint32_t, uint16_t, H1_4, H1_2, DO_SUB)
148
-DO_ZZZ_WTB(sve2_usubw_d, uint64_t, uint32_t, , H1_4, DO_SUB)
149
+DO_ZZZ_WTB(sve2_usubw_d, uint64_t, uint32_t, H1_8, H1_4, DO_SUB)
150
151
#undef DO_ZZZ_WTB
152
153
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
154
DO_ZZZ_NTB(sve2_eoril_b, uint8_t, H1, DO_EOR)
155
DO_ZZZ_NTB(sve2_eoril_h, uint16_t, H1_2, DO_EOR)
156
DO_ZZZ_NTB(sve2_eoril_s, uint32_t, H1_4, DO_EOR)
157
-DO_ZZZ_NTB(sve2_eoril_d, uint64_t, , DO_EOR)
158
+DO_ZZZ_NTB(sve2_eoril_d, uint64_t, H1_8, DO_EOR)
159
160
#undef DO_ZZZ_NTB
161
162
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
163
164
DO_ZZZW_ACC(sve2_sabal_h, int16_t, int8_t, H1_2, H1, DO_ABD)
165
DO_ZZZW_ACC(sve2_sabal_s, int32_t, int16_t, H1_4, H1_2, DO_ABD)
166
-DO_ZZZW_ACC(sve2_sabal_d, int64_t, int32_t, , H1_4, DO_ABD)
167
+DO_ZZZW_ACC(sve2_sabal_d, int64_t, int32_t, H1_8, H1_4, DO_ABD)
168
169
DO_ZZZW_ACC(sve2_uabal_h, uint16_t, uint8_t, H1_2, H1, DO_ABD)
170
DO_ZZZW_ACC(sve2_uabal_s, uint32_t, uint16_t, H1_4, H1_2, DO_ABD)
171
-DO_ZZZW_ACC(sve2_uabal_d, uint64_t, uint32_t, , H1_4, DO_ABD)
172
+DO_ZZZW_ACC(sve2_uabal_d, uint64_t, uint32_t, H1_8, H1_4, DO_ABD)
173
174
DO_ZZZW_ACC(sve2_smlal_zzzw_h, int16_t, int8_t, H1_2, H1, DO_MUL)
175
DO_ZZZW_ACC(sve2_smlal_zzzw_s, int32_t, int16_t, H1_4, H1_2, DO_MUL)
176
-DO_ZZZW_ACC(sve2_smlal_zzzw_d, int64_t, int32_t, , H1_4, DO_MUL)
177
+DO_ZZZW_ACC(sve2_smlal_zzzw_d, int64_t, int32_t, H1_8, H1_4, DO_MUL)
178
179
DO_ZZZW_ACC(sve2_umlal_zzzw_h, uint16_t, uint8_t, H1_2, H1, DO_MUL)
180
DO_ZZZW_ACC(sve2_umlal_zzzw_s, uint32_t, uint16_t, H1_4, H1_2, DO_MUL)
181
-DO_ZZZW_ACC(sve2_umlal_zzzw_d, uint64_t, uint32_t, , H1_4, DO_MUL)
182
+DO_ZZZW_ACC(sve2_umlal_zzzw_d, uint64_t, uint32_t, H1_8, H1_4, DO_MUL)
183
184
#define DO_NMUL(N, M) -(N * M)
185
186
DO_ZZZW_ACC(sve2_smlsl_zzzw_h, int16_t, int8_t, H1_2, H1, DO_NMUL)
187
DO_ZZZW_ACC(sve2_smlsl_zzzw_s, int32_t, int16_t, H1_4, H1_2, DO_NMUL)
188
-DO_ZZZW_ACC(sve2_smlsl_zzzw_d, int64_t, int32_t, , H1_4, DO_NMUL)
189
+DO_ZZZW_ACC(sve2_smlsl_zzzw_d, int64_t, int32_t, H1_8, H1_4, DO_NMUL)
190
191
DO_ZZZW_ACC(sve2_umlsl_zzzw_h, uint16_t, uint8_t, H1_2, H1, DO_NMUL)
192
DO_ZZZW_ACC(sve2_umlsl_zzzw_s, uint32_t, uint16_t, H1_4, H1_2, DO_NMUL)
193
-DO_ZZZW_ACC(sve2_umlsl_zzzw_d, uint64_t, uint32_t, , H1_4, DO_NMUL)
194
+DO_ZZZW_ACC(sve2_umlsl_zzzw_d, uint64_t, uint32_t, H1_8, H1_4, DO_NMUL)
195
196
#undef DO_ZZZW_ACC
197
198
@@ -XXX,XX +XXX,XX @@ DO_SQDMLAL(sve2_sqdmlal_zzzw_h, int16_t, int8_t, H1_2, H1,
199
do_sqdmull_h, DO_SQADD_H)
200
DO_SQDMLAL(sve2_sqdmlal_zzzw_s, int32_t, int16_t, H1_4, H1_2,
201
do_sqdmull_s, DO_SQADD_S)
202
-DO_SQDMLAL(sve2_sqdmlal_zzzw_d, int64_t, int32_t, , H1_4,
203
+DO_SQDMLAL(sve2_sqdmlal_zzzw_d, int64_t, int32_t, H1_8, H1_4,
204
do_sqdmull_d, do_sqadd_d)
205
206
DO_SQDMLAL(sve2_sqdmlsl_zzzw_h, int16_t, int8_t, H1_2, H1,
207
do_sqdmull_h, DO_SQSUB_H)
208
DO_SQDMLAL(sve2_sqdmlsl_zzzw_s, int32_t, int16_t, H1_4, H1_2,
209
do_sqdmull_s, DO_SQSUB_S)
210
-DO_SQDMLAL(sve2_sqdmlsl_zzzw_d, int64_t, int32_t, , H1_4,
211
+DO_SQDMLAL(sve2_sqdmlsl_zzzw_d, int64_t, int32_t, H1_8, H1_4,
212
do_sqdmull_d, do_sqsub_d)
213
214
#undef DO_SQDMLAL
215
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
216
DO_CMLA_FUNC(sve2_cmla_zzzz_b, uint8_t, H1, DO_CMLA)
217
DO_CMLA_FUNC(sve2_cmla_zzzz_h, uint16_t, H2, DO_CMLA)
218
DO_CMLA_FUNC(sve2_cmla_zzzz_s, uint32_t, H4, DO_CMLA)
219
-DO_CMLA_FUNC(sve2_cmla_zzzz_d, uint64_t, , DO_CMLA)
220
+DO_CMLA_FUNC(sve2_cmla_zzzz_d, uint64_t, H8, DO_CMLA)
221
222
#define DO_SQRDMLAH_B(N, M, A, S) \
223
do_sqrdmlah_b(N, M, A, S, true)
224
@@ -XXX,XX +XXX,XX @@ DO_CMLA_FUNC(sve2_cmla_zzzz_d, uint64_t, , DO_CMLA)
225
DO_CMLA_FUNC(sve2_sqrdcmlah_zzzz_b, int8_t, H1, DO_SQRDMLAH_B)
226
DO_CMLA_FUNC(sve2_sqrdcmlah_zzzz_h, int16_t, H2, DO_SQRDMLAH_H)
227
DO_CMLA_FUNC(sve2_sqrdcmlah_zzzz_s, int32_t, H4, DO_SQRDMLAH_S)
228
-DO_CMLA_FUNC(sve2_sqrdcmlah_zzzz_d, int64_t, , DO_SQRDMLAH_D)
229
+DO_CMLA_FUNC(sve2_sqrdcmlah_zzzz_d, int64_t, H8, DO_SQRDMLAH_D)
230
231
#define DO_CMLA_IDX_FUNC(NAME, TYPE, H, OP) \
232
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
233
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
234
235
DO_ZZXZ(sve2_sqrdmlah_idx_h, int16_t, H2, DO_SQRDMLAH_H)
236
DO_ZZXZ(sve2_sqrdmlah_idx_s, int32_t, H4, DO_SQRDMLAH_S)
237
-DO_ZZXZ(sve2_sqrdmlah_idx_d, int64_t, , DO_SQRDMLAH_D)
238
+DO_ZZXZ(sve2_sqrdmlah_idx_d, int64_t, H8, DO_SQRDMLAH_D)
239
240
#define DO_SQRDMLSH_H(N, M, A) \
241
({ uint32_t discard; do_sqrdmlah_h(N, M, A, true, true, &discard); })
242
@@ -XXX,XX +XXX,XX @@ DO_ZZXZ(sve2_sqrdmlah_idx_d, int64_t, , DO_SQRDMLAH_D)
243
244
DO_ZZXZ(sve2_sqrdmlsh_idx_h, int16_t, H2, DO_SQRDMLSH_H)
245
DO_ZZXZ(sve2_sqrdmlsh_idx_s, int32_t, H4, DO_SQRDMLSH_S)
246
-DO_ZZXZ(sve2_sqrdmlsh_idx_d, int64_t, , DO_SQRDMLSH_D)
247
+DO_ZZXZ(sve2_sqrdmlsh_idx_d, int64_t, H8, DO_SQRDMLSH_D)
248
249
#undef DO_ZZXZ
250
251
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
252
#define DO_MLA(N, M, A) (A + N * M)
253
254
DO_ZZXW(sve2_smlal_idx_s, int32_t, int16_t, H1_4, H1_2, DO_MLA)
255
-DO_ZZXW(sve2_smlal_idx_d, int64_t, int32_t, , H1_4, DO_MLA)
256
+DO_ZZXW(sve2_smlal_idx_d, int64_t, int32_t, H1_8, H1_4, DO_MLA)
257
DO_ZZXW(sve2_umlal_idx_s, uint32_t, uint16_t, H1_4, H1_2, DO_MLA)
258
-DO_ZZXW(sve2_umlal_idx_d, uint64_t, uint32_t, , H1_4, DO_MLA)
259
+DO_ZZXW(sve2_umlal_idx_d, uint64_t, uint32_t, H1_8, H1_4, DO_MLA)
260
261
#define DO_MLS(N, M, A) (A - N * M)
262
263
DO_ZZXW(sve2_smlsl_idx_s, int32_t, int16_t, H1_4, H1_2, DO_MLS)
264
-DO_ZZXW(sve2_smlsl_idx_d, int64_t, int32_t, , H1_4, DO_MLS)
265
+DO_ZZXW(sve2_smlsl_idx_d, int64_t, int32_t, H1_8, H1_4, DO_MLS)
266
DO_ZZXW(sve2_umlsl_idx_s, uint32_t, uint16_t, H1_4, H1_2, DO_MLS)
267
-DO_ZZXW(sve2_umlsl_idx_d, uint64_t, uint32_t, , H1_4, DO_MLS)
268
+DO_ZZXW(sve2_umlsl_idx_d, uint64_t, uint32_t, H1_8, H1_4, DO_MLS)
269
270
#define DO_SQDMLAL_S(N, M, A) DO_SQADD_S(A, do_sqdmull_s(N, M))
271
#define DO_SQDMLAL_D(N, M, A) do_sqadd_d(A, do_sqdmull_d(N, M))
272
273
DO_ZZXW(sve2_sqdmlal_idx_s, int32_t, int16_t, H1_4, H1_2, DO_SQDMLAL_S)
274
-DO_ZZXW(sve2_sqdmlal_idx_d, int64_t, int32_t, , H1_4, DO_SQDMLAL_D)
275
+DO_ZZXW(sve2_sqdmlal_idx_d, int64_t, int32_t, H1_8, H1_4, DO_SQDMLAL_D)
276
277
#define DO_SQDMLSL_S(N, M, A) DO_SQSUB_S(A, do_sqdmull_s(N, M))
278
#define DO_SQDMLSL_D(N, M, A) do_sqsub_d(A, do_sqdmull_d(N, M))
279
280
DO_ZZXW(sve2_sqdmlsl_idx_s, int32_t, int16_t, H1_4, H1_2, DO_SQDMLSL_S)
281
-DO_ZZXW(sve2_sqdmlsl_idx_d, int64_t, int32_t, , H1_4, DO_SQDMLSL_D)
282
+DO_ZZXW(sve2_sqdmlsl_idx_d, int64_t, int32_t, H1_8, H1_4, DO_SQDMLSL_D)
283
284
#undef DO_MLA
285
#undef DO_MLS
286
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
287
}
288
289
DO_ZZX(sve2_sqdmull_idx_s, int32_t, int16_t, H1_4, H1_2, do_sqdmull_s)
290
-DO_ZZX(sve2_sqdmull_idx_d, int64_t, int32_t, , H1_4, do_sqdmull_d)
291
+DO_ZZX(sve2_sqdmull_idx_d, int64_t, int32_t, H1_8, H1_4, do_sqdmull_d)
292
293
DO_ZZX(sve2_smull_idx_s, int32_t, int16_t, H1_4, H1_2, DO_MUL)
294
-DO_ZZX(sve2_smull_idx_d, int64_t, int32_t, , H1_4, DO_MUL)
295
+DO_ZZX(sve2_smull_idx_d, int64_t, int32_t, H1_8, H1_4, DO_MUL)
296
297
DO_ZZX(sve2_umull_idx_s, uint32_t, uint16_t, H1_4, H1_2, DO_MUL)
298
-DO_ZZX(sve2_umull_idx_d, uint64_t, uint32_t, , H1_4, DO_MUL)
299
+DO_ZZX(sve2_umull_idx_d, uint64_t, uint32_t, H1_8, H1_4, DO_MUL)
300
301
#undef DO_ZZX
302
303
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
304
DO_CADD(sve2_cadd_b, int8_t, H1, DO_ADD, DO_SUB)
305
DO_CADD(sve2_cadd_h, int16_t, H1_2, DO_ADD, DO_SUB)
306
DO_CADD(sve2_cadd_s, int32_t, H1_4, DO_ADD, DO_SUB)
307
-DO_CADD(sve2_cadd_d, int64_t, , DO_ADD, DO_SUB)
308
+DO_CADD(sve2_cadd_d, int64_t, H1_8, DO_ADD, DO_SUB)
309
310
DO_CADD(sve2_sqcadd_b, int8_t, H1, DO_SQADD_B, DO_SQSUB_B)
311
DO_CADD(sve2_sqcadd_h, int16_t, H1_2, DO_SQADD_H, DO_SQSUB_H)
312
DO_CADD(sve2_sqcadd_s, int32_t, H1_4, DO_SQADD_S, DO_SQSUB_S)
313
-DO_CADD(sve2_sqcadd_d, int64_t, , do_sqadd_d, do_sqsub_d)
314
+DO_CADD(sve2_sqcadd_d, int64_t, H1_8, do_sqadd_d, do_sqsub_d)
315
316
#undef DO_CADD
317
318
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
319
320
DO_ZZI_SHLL(sve2_sshll_h, int16_t, int8_t, H1_2, H1)
321
DO_ZZI_SHLL(sve2_sshll_s, int32_t, int16_t, H1_4, H1_2)
322
-DO_ZZI_SHLL(sve2_sshll_d, int64_t, int32_t, , H1_4)
323
+DO_ZZI_SHLL(sve2_sshll_d, int64_t, int32_t, H1_8, H1_4)
324
325
DO_ZZI_SHLL(sve2_ushll_h, uint16_t, uint8_t, H1_2, H1)
326
DO_ZZI_SHLL(sve2_ushll_s, uint32_t, uint16_t, H1_4, H1_2)
327
-DO_ZZI_SHLL(sve2_ushll_d, uint64_t, uint32_t, , H1_4)
328
+DO_ZZI_SHLL(sve2_ushll_d, uint64_t, uint32_t, H1_8, H1_4)
329
330
#undef DO_ZZI_SHLL
331
332
@@ -XXX,XX +XXX,XX @@ DO_SHRNB(sve2_shrnb_d, uint64_t, uint32_t, DO_SHR)
333
334
DO_SHRNT(sve2_shrnt_h, uint16_t, uint8_t, H1_2, H1, DO_SHR)
335
DO_SHRNT(sve2_shrnt_s, uint32_t, uint16_t, H1_4, H1_2, DO_SHR)
336
-DO_SHRNT(sve2_shrnt_d, uint64_t, uint32_t, , H1_4, DO_SHR)
337
+DO_SHRNT(sve2_shrnt_d, uint64_t, uint32_t, H1_8, H1_4, DO_SHR)
338
339
DO_SHRNB(sve2_rshrnb_h, uint16_t, uint8_t, do_urshr)
340
DO_SHRNB(sve2_rshrnb_s, uint32_t, uint16_t, do_urshr)
341
@@ -XXX,XX +XXX,XX @@ DO_SHRNB(sve2_rshrnb_d, uint64_t, uint32_t, do_urshr)
342
343
DO_SHRNT(sve2_rshrnt_h, uint16_t, uint8_t, H1_2, H1, do_urshr)
344
DO_SHRNT(sve2_rshrnt_s, uint32_t, uint16_t, H1_4, H1_2, do_urshr)
345
-DO_SHRNT(sve2_rshrnt_d, uint64_t, uint32_t, , H1_4, do_urshr)
346
+DO_SHRNT(sve2_rshrnt_d, uint64_t, uint32_t, H1_8, H1_4, do_urshr)
347
348
#define DO_SQSHRUN_H(x, sh) do_sat_bhs((int64_t)(x) >> sh, 0, UINT8_MAX)
349
#define DO_SQSHRUN_S(x, sh) do_sat_bhs((int64_t)(x) >> sh, 0, UINT16_MAX)
350
@@ -XXX,XX +XXX,XX @@ DO_SHRNB(sve2_sqshrunb_d, int64_t, uint32_t, DO_SQSHRUN_D)
351
352
DO_SHRNT(sve2_sqshrunt_h, int16_t, uint8_t, H1_2, H1, DO_SQSHRUN_H)
353
DO_SHRNT(sve2_sqshrunt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQSHRUN_S)
354
-DO_SHRNT(sve2_sqshrunt_d, int64_t, uint32_t, , H1_4, DO_SQSHRUN_D)
355
+DO_SHRNT(sve2_sqshrunt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQSHRUN_D)
356
357
#define DO_SQRSHRUN_H(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT8_MAX)
358
#define DO_SQRSHRUN_S(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT16_MAX)
359
@@ -XXX,XX +XXX,XX @@ DO_SHRNB(sve2_sqrshrunb_d, int64_t, uint32_t, DO_SQRSHRUN_D)
360
361
DO_SHRNT(sve2_sqrshrunt_h, int16_t, uint8_t, H1_2, H1, DO_SQRSHRUN_H)
362
DO_SHRNT(sve2_sqrshrunt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQRSHRUN_S)
363
-DO_SHRNT(sve2_sqrshrunt_d, int64_t, uint32_t, , H1_4, DO_SQRSHRUN_D)
364
+DO_SHRNT(sve2_sqrshrunt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQRSHRUN_D)
365
366
#define DO_SQSHRN_H(x, sh) do_sat_bhs(x >> sh, INT8_MIN, INT8_MAX)
367
#define DO_SQSHRN_S(x, sh) do_sat_bhs(x >> sh, INT16_MIN, INT16_MAX)
368
@@ -XXX,XX +XXX,XX @@ DO_SHRNB(sve2_sqshrnb_d, int64_t, uint32_t, DO_SQSHRN_D)
369
370
DO_SHRNT(sve2_sqshrnt_h, int16_t, uint8_t, H1_2, H1, DO_SQSHRN_H)
371
DO_SHRNT(sve2_sqshrnt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQSHRN_S)
372
-DO_SHRNT(sve2_sqshrnt_d, int64_t, uint32_t, , H1_4, DO_SQSHRN_D)
373
+DO_SHRNT(sve2_sqshrnt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQSHRN_D)
374
375
#define DO_SQRSHRN_H(x, sh) do_sat_bhs(do_srshr(x, sh), INT8_MIN, INT8_MAX)
376
#define DO_SQRSHRN_S(x, sh) do_sat_bhs(do_srshr(x, sh), INT16_MIN, INT16_MAX)
377
@@ -XXX,XX +XXX,XX @@ DO_SHRNB(sve2_sqrshrnb_d, int64_t, uint32_t, DO_SQRSHRN_D)
378
379
DO_SHRNT(sve2_sqrshrnt_h, int16_t, uint8_t, H1_2, H1, DO_SQRSHRN_H)
380
DO_SHRNT(sve2_sqrshrnt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQRSHRN_S)
381
-DO_SHRNT(sve2_sqrshrnt_d, int64_t, uint32_t, , H1_4, DO_SQRSHRN_D)
382
+DO_SHRNT(sve2_sqrshrnt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQRSHRN_D)
383
384
#define DO_UQSHRN_H(x, sh) MIN(x >> sh, UINT8_MAX)
385
#define DO_UQSHRN_S(x, sh) MIN(x >> sh, UINT16_MAX)
386
@@ -XXX,XX +XXX,XX @@ DO_SHRNB(sve2_uqshrnb_d, uint64_t, uint32_t, DO_UQSHRN_D)
387
388
DO_SHRNT(sve2_uqshrnt_h, uint16_t, uint8_t, H1_2, H1, DO_UQSHRN_H)
389
DO_SHRNT(sve2_uqshrnt_s, uint32_t, uint16_t, H1_4, H1_2, DO_UQSHRN_S)
390
-DO_SHRNT(sve2_uqshrnt_d, uint64_t, uint32_t, , H1_4, DO_UQSHRN_D)
391
+DO_SHRNT(sve2_uqshrnt_d, uint64_t, uint32_t, H1_8, H1_4, DO_UQSHRN_D)
392
393
#define DO_UQRSHRN_H(x, sh) MIN(do_urshr(x, sh), UINT8_MAX)
394
#define DO_UQRSHRN_S(x, sh) MIN(do_urshr(x, sh), UINT16_MAX)
395
@@ -XXX,XX +XXX,XX @@ DO_SHRNB(sve2_uqrshrnb_d, uint64_t, uint32_t, DO_UQRSHRN_D)
396
397
DO_SHRNT(sve2_uqrshrnt_h, uint16_t, uint8_t, H1_2, H1, DO_UQRSHRN_H)
398
DO_SHRNT(sve2_uqrshrnt_s, uint32_t, uint16_t, H1_4, H1_2, DO_UQRSHRN_S)
399
-DO_SHRNT(sve2_uqrshrnt_d, uint64_t, uint32_t, , H1_4, DO_UQRSHRN_D)
400
+DO_SHRNT(sve2_uqrshrnt_d, uint64_t, uint32_t, H1_8, H1_4, DO_UQRSHRN_D)
401
402
#undef DO_SHRNB
403
#undef DO_SHRNT
404
@@ -XXX,XX +XXX,XX @@ DO_BINOPNB(sve2_addhnb_d, uint64_t, uint32_t, 32, DO_ADDHN)
405
406
DO_BINOPNT(sve2_addhnt_h, uint16_t, uint8_t, 8, H1_2, H1, DO_ADDHN)
407
DO_BINOPNT(sve2_addhnt_s, uint32_t, uint16_t, 16, H1_4, H1_2, DO_ADDHN)
408
-DO_BINOPNT(sve2_addhnt_d, uint64_t, uint32_t, 32, , H1_4, DO_ADDHN)
409
+DO_BINOPNT(sve2_addhnt_d, uint64_t, uint32_t, 32, H1_8, H1_4, DO_ADDHN)
410
411
DO_BINOPNB(sve2_raddhnb_h, uint16_t, uint8_t, 8, DO_RADDHN)
412
DO_BINOPNB(sve2_raddhnb_s, uint32_t, uint16_t, 16, DO_RADDHN)
413
@@ -XXX,XX +XXX,XX @@ DO_BINOPNB(sve2_raddhnb_d, uint64_t, uint32_t, 32, DO_RADDHN)
414
415
DO_BINOPNT(sve2_raddhnt_h, uint16_t, uint8_t, 8, H1_2, H1, DO_RADDHN)
416
DO_BINOPNT(sve2_raddhnt_s, uint32_t, uint16_t, 16, H1_4, H1_2, DO_RADDHN)
417
-DO_BINOPNT(sve2_raddhnt_d, uint64_t, uint32_t, 32, , H1_4, DO_RADDHN)
418
+DO_BINOPNT(sve2_raddhnt_d, uint64_t, uint32_t, 32, H1_8, H1_4, DO_RADDHN)
419
420
DO_BINOPNB(sve2_subhnb_h, uint16_t, uint8_t, 8, DO_SUBHN)
421
DO_BINOPNB(sve2_subhnb_s, uint32_t, uint16_t, 16, DO_SUBHN)
422
@@ -XXX,XX +XXX,XX @@ DO_BINOPNB(sve2_subhnb_d, uint64_t, uint32_t, 32, DO_SUBHN)
423
424
DO_BINOPNT(sve2_subhnt_h, uint16_t, uint8_t, 8, H1_2, H1, DO_SUBHN)
425
DO_BINOPNT(sve2_subhnt_s, uint32_t, uint16_t, 16, H1_4, H1_2, DO_SUBHN)
426
-DO_BINOPNT(sve2_subhnt_d, uint64_t, uint32_t, 32, , H1_4, DO_SUBHN)
427
+DO_BINOPNT(sve2_subhnt_d, uint64_t, uint32_t, 32, H1_8, H1_4, DO_SUBHN)
428
429
DO_BINOPNB(sve2_rsubhnb_h, uint16_t, uint8_t, 8, DO_RSUBHN)
430
DO_BINOPNB(sve2_rsubhnb_s, uint32_t, uint16_t, 16, DO_RSUBHN)
431
@@ -XXX,XX +XXX,XX @@ DO_BINOPNB(sve2_rsubhnb_d, uint64_t, uint32_t, 32, DO_RSUBHN)
432
433
DO_BINOPNT(sve2_rsubhnt_h, uint16_t, uint8_t, 8, H1_2, H1, DO_RSUBHN)
434
DO_BINOPNT(sve2_rsubhnt_s, uint32_t, uint16_t, 16, H1_4, H1_2, DO_RSUBHN)
435
-DO_BINOPNT(sve2_rsubhnt_d, uint64_t, uint32_t, 32, , H1_4, DO_RSUBHN)
436
+DO_BINOPNT(sve2_rsubhnt_d, uint64_t, uint32_t, 32, H1_8, H1_4, DO_RSUBHN)
437
438
#undef DO_RSUBHN
439
#undef DO_SUBHN
440
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, uint64_t val, uint32_t desc) \
441
DO_INSR(sve_insr_b, uint8_t, H1)
442
DO_INSR(sve_insr_h, uint16_t, H1_2)
443
DO_INSR(sve_insr_s, uint32_t, H1_4)
444
-DO_INSR(sve_insr_d, uint64_t, )
445
+DO_INSR(sve_insr_d, uint64_t, H1_8)
446
447
#undef DO_INSR
448
449
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_tbx_##SUFF)(void *vd, void *vn, void *vm, uint32_t desc) \
450
DO_TB(b, uint8_t, H1)
451
DO_TB(h, uint16_t, H2)
452
DO_TB(s, uint32_t, H4)
453
-DO_TB(d, uint64_t, )
454
+DO_TB(d, uint64_t, H8)
455
456
#undef DO_TB
457
458
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
459
460
DO_UNPK(sve_sunpk_h, int16_t, int8_t, H2, H1)
461
DO_UNPK(sve_sunpk_s, int32_t, int16_t, H4, H2)
462
-DO_UNPK(sve_sunpk_d, int64_t, int32_t, , H4)
463
+DO_UNPK(sve_sunpk_d, int64_t, int32_t, H8, H4)
464
465
DO_UNPK(sve_uunpk_h, uint16_t, uint8_t, H2, H1)
466
DO_UNPK(sve_uunpk_s, uint32_t, uint16_t, H4, H2)
467
-DO_UNPK(sve_uunpk_d, uint64_t, uint32_t, , H4)
468
+DO_UNPK(sve_uunpk_d, uint64_t, uint32_t, H8, H4)
469
470
#undef DO_UNPK
471
472
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
473
DO_ZIP(sve_zip_b, uint8_t, H1)
474
DO_ZIP(sve_zip_h, uint16_t, H1_2)
475
DO_ZIP(sve_zip_s, uint32_t, H1_4)
476
-DO_ZIP(sve_zip_d, uint64_t, )
477
+DO_ZIP(sve_zip_d, uint64_t, H1_8)
478
DO_ZIP(sve2_zip_q, Int128, )
479
480
#define DO_UZP(NAME, TYPE, H) \
481
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
482
DO_UZP(sve_uzp_b, uint8_t, H1)
483
DO_UZP(sve_uzp_h, uint16_t, H1_2)
484
DO_UZP(sve_uzp_s, uint32_t, H1_4)
485
-DO_UZP(sve_uzp_d, uint64_t, )
486
+DO_UZP(sve_uzp_d, uint64_t, H1_8)
487
DO_UZP(sve2_uzp_q, Int128, )
488
489
#define DO_TRN(NAME, TYPE, H) \
490
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
491
DO_TRN(sve_trn_b, uint8_t, H1)
492
DO_TRN(sve_trn_h, uint16_t, H1_2)
493
DO_TRN(sve_trn_s, uint32_t, H1_4)
494
-DO_TRN(sve_trn_d, uint64_t, )
495
+DO_TRN(sve_trn_d, uint64_t, H1_8)
496
DO_TRN(sve2_trn_q, Int128, )
497
498
#undef DO_ZIP
499
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
500
#define DO_CMP_PPZZ_S(NAME, TYPE, OP) \
501
DO_CMP_PPZZ(NAME, TYPE, OP, H1_4, 0x1111111111111111ull)
502
#define DO_CMP_PPZZ_D(NAME, TYPE, OP) \
503
- DO_CMP_PPZZ(NAME, TYPE, OP, , 0x0101010101010101ull)
504
+ DO_CMP_PPZZ(NAME, TYPE, OP, H1_8, 0x0101010101010101ull)
505
506
DO_CMP_PPZZ_B(sve_cmpeq_ppzz_b, uint8_t, ==)
507
DO_CMP_PPZZ_H(sve_cmpeq_ppzz_h, uint16_t, ==)
508
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \
509
#define DO_CMP_PPZI_S(NAME, TYPE, OP) \
510
DO_CMP_PPZI(NAME, TYPE, OP, H1_4, 0x1111111111111111ull)
511
#define DO_CMP_PPZI_D(NAME, TYPE, OP) \
512
- DO_CMP_PPZI(NAME, TYPE, OP, , 0x0101010101010101ull)
513
+ DO_CMP_PPZI(NAME, TYPE, OP, H1_8, 0x0101010101010101ull)
514
515
DO_CMP_PPZI_B(sve_cmpeq_ppzi_b, uint8_t, ==)
516
DO_CMP_PPZI_H(sve_cmpeq_ppzi_h, uint16_t, ==)
517
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc) \
518
519
DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero)
520
DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero)
521
-DO_REDUCE(sve_faddv_d, float64, , add, float64_zero)
522
+DO_REDUCE(sve_faddv_d, float64, H1_8, add, float64_zero)
523
524
/* Identity is floatN_default_nan, without the function call. */
525
DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00)
526
DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000)
527
-DO_REDUCE(sve_fminnmv_d, float64, , minnum, 0x7FF8000000000000ULL)
528
+DO_REDUCE(sve_fminnmv_d, float64, H1_8, minnum, 0x7FF8000000000000ULL)
529
530
DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00)
531
DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000)
532
-DO_REDUCE(sve_fmaxnmv_d, float64, , maxnum, 0x7FF8000000000000ULL)
533
+DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, maxnum, 0x7FF8000000000000ULL)
534
535
DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity)
536
DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity)
537
-DO_REDUCE(sve_fminv_d, float64, , min, float64_infinity)
538
+DO_REDUCE(sve_fminv_d, float64, H1_8, min, float64_infinity)
539
540
DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity))
541
DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity))
542
-DO_REDUCE(sve_fmaxv_d, float64, , max, float64_chs(float64_infinity))
543
+DO_REDUCE(sve_fmaxv_d, float64, H1_8, max, float64_chs(float64_infinity))
544
545
#undef DO_REDUCE
546
547
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \
548
549
DO_ZPZZ_FP(sve_fadd_h, uint16_t, H1_2, float16_add)
550
DO_ZPZZ_FP(sve_fadd_s, uint32_t, H1_4, float32_add)
551
-DO_ZPZZ_FP(sve_fadd_d, uint64_t, , float64_add)
552
+DO_ZPZZ_FP(sve_fadd_d, uint64_t, H1_8, float64_add)
553
554
DO_ZPZZ_FP(sve_fsub_h, uint16_t, H1_2, float16_sub)
555
DO_ZPZZ_FP(sve_fsub_s, uint32_t, H1_4, float32_sub)
556
-DO_ZPZZ_FP(sve_fsub_d, uint64_t, , float64_sub)
557
+DO_ZPZZ_FP(sve_fsub_d, uint64_t, H1_8, float64_sub)
558
559
DO_ZPZZ_FP(sve_fmul_h, uint16_t, H1_2, float16_mul)
560
DO_ZPZZ_FP(sve_fmul_s, uint32_t, H1_4, float32_mul)
561
-DO_ZPZZ_FP(sve_fmul_d, uint64_t, , float64_mul)
562
+DO_ZPZZ_FP(sve_fmul_d, uint64_t, H1_8, float64_mul)
563
564
DO_ZPZZ_FP(sve_fdiv_h, uint16_t, H1_2, float16_div)
565
DO_ZPZZ_FP(sve_fdiv_s, uint32_t, H1_4, float32_div)
566
-DO_ZPZZ_FP(sve_fdiv_d, uint64_t, , float64_div)
567
+DO_ZPZZ_FP(sve_fdiv_d, uint64_t, H1_8, float64_div)
568
569
DO_ZPZZ_FP(sve_fmin_h, uint16_t, H1_2, float16_min)
570
DO_ZPZZ_FP(sve_fmin_s, uint32_t, H1_4, float32_min)
571
-DO_ZPZZ_FP(sve_fmin_d, uint64_t, , float64_min)
572
+DO_ZPZZ_FP(sve_fmin_d, uint64_t, H1_8, float64_min)
573
574
DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max)
575
DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max)
576
-DO_ZPZZ_FP(sve_fmax_d, uint64_t, , float64_max)
577
+DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max)
578
579
DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum)
580
DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum)
581
-DO_ZPZZ_FP(sve_fminnum_d, uint64_t, , float64_minnum)
582
+DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum)
583
584
DO_ZPZZ_FP(sve_fmaxnum_h, uint16_t, H1_2, float16_maxnum)
585
DO_ZPZZ_FP(sve_fmaxnum_s, uint32_t, H1_4, float32_maxnum)
586
-DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t, , float64_maxnum)
587
+DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t, H1_8, float64_maxnum)
588
589
static inline float16 abd_h(float16 a, float16 b, float_status *s)
590
{
591
@@ -XXX,XX +XXX,XX @@ static inline float64 abd_d(float64 a, float64 b, float_status *s)
592
593
DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h)
594
DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s)
595
-DO_ZPZZ_FP(sve_fabd_d, uint64_t, , abd_d)
596
+DO_ZPZZ_FP(sve_fabd_d, uint64_t, H1_8, abd_d)
597
598
static inline float64 scalbn_d(float64 a, int64_t b, float_status *s)
599
{
600
@@ -XXX,XX +XXX,XX @@ static inline float64 scalbn_d(float64 a, int64_t b, float_status *s)
601
602
DO_ZPZZ_FP(sve_fscalbn_h, int16_t, H1_2, float16_scalbn)
603
DO_ZPZZ_FP(sve_fscalbn_s, int32_t, H1_4, float32_scalbn)
604
-DO_ZPZZ_FP(sve_fscalbn_d, int64_t, , scalbn_d)
605
+DO_ZPZZ_FP(sve_fscalbn_d, int64_t, H1_8, scalbn_d)
606
607
DO_ZPZZ_FP(sve_fmulx_h, uint16_t, H1_2, helper_advsimd_mulxh)
608
DO_ZPZZ_FP(sve_fmulx_s, uint32_t, H1_4, helper_vfp_mulxs)
609
-DO_ZPZZ_FP(sve_fmulx_d, uint64_t, , helper_vfp_mulxd)
610
+DO_ZPZZ_FP(sve_fmulx_d, uint64_t, H1_8, helper_vfp_mulxd)
611
612
#undef DO_ZPZZ_FP
613
614
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vg, uint64_t scalar, \
615
616
DO_ZPZS_FP(sve_fadds_h, float16, H1_2, float16_add)
617
DO_ZPZS_FP(sve_fadds_s, float32, H1_4, float32_add)
618
-DO_ZPZS_FP(sve_fadds_d, float64, , float64_add)
619
+DO_ZPZS_FP(sve_fadds_d, float64, H1_8, float64_add)
620
621
DO_ZPZS_FP(sve_fsubs_h, float16, H1_2, float16_sub)
622
DO_ZPZS_FP(sve_fsubs_s, float32, H1_4, float32_sub)
623
-DO_ZPZS_FP(sve_fsubs_d, float64, , float64_sub)
624
+DO_ZPZS_FP(sve_fsubs_d, float64, H1_8, float64_sub)
625
626
DO_ZPZS_FP(sve_fmuls_h, float16, H1_2, float16_mul)
627
DO_ZPZS_FP(sve_fmuls_s, float32, H1_4, float32_mul)
628
-DO_ZPZS_FP(sve_fmuls_d, float64, , float64_mul)
629
+DO_ZPZS_FP(sve_fmuls_d, float64, H1_8, float64_mul)
630
631
static inline float16 subr_h(float16 a, float16 b, float_status *s)
632
{
633
@@ -XXX,XX +XXX,XX @@ static inline float64 subr_d(float64 a, float64 b, float_status *s)
634
635
DO_ZPZS_FP(sve_fsubrs_h, float16, H1_2, subr_h)
636
DO_ZPZS_FP(sve_fsubrs_s, float32, H1_4, subr_s)
637
-DO_ZPZS_FP(sve_fsubrs_d, float64, , subr_d)
638
+DO_ZPZS_FP(sve_fsubrs_d, float64, H1_8, subr_d)
639
640
DO_ZPZS_FP(sve_fmaxnms_h, float16, H1_2, float16_maxnum)
641
DO_ZPZS_FP(sve_fmaxnms_s, float32, H1_4, float32_maxnum)
642
-DO_ZPZS_FP(sve_fmaxnms_d, float64, , float64_maxnum)
643
+DO_ZPZS_FP(sve_fmaxnms_d, float64, H1_8, float64_maxnum)
644
645
DO_ZPZS_FP(sve_fminnms_h, float16, H1_2, float16_minnum)
646
DO_ZPZS_FP(sve_fminnms_s, float32, H1_4, float32_minnum)
647
-DO_ZPZS_FP(sve_fminnms_d, float64, , float64_minnum)
648
+DO_ZPZS_FP(sve_fminnms_d, float64, H1_8, float64_minnum)
649
650
DO_ZPZS_FP(sve_fmaxs_h, float16, H1_2, float16_max)
651
DO_ZPZS_FP(sve_fmaxs_s, float32, H1_4, float32_max)
652
-DO_ZPZS_FP(sve_fmaxs_d, float64, , float64_max)
653
+DO_ZPZS_FP(sve_fmaxs_d, float64, H1_8, float64_max)
654
655
DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min)
656
DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min)
657
-DO_ZPZS_FP(sve_fmins_d, float64, , float64_min)
658
+DO_ZPZS_FP(sve_fmins_d, float64, H1_8, float64_min)
659
660
/* Fully general two-operand expander, controlled by a predicate,
661
* With the extra float_status parameter.
662
@@ -XXX,XX +XXX,XX @@ static inline uint64_t vfp_float64_to_uint64_rtz(float64 f, float_status *s)
663
DO_ZPZ_FP(sve_fcvt_sh, uint32_t, H1_4, sve_f32_to_f16)
664
DO_ZPZ_FP(sve_fcvt_hs, uint32_t, H1_4, sve_f16_to_f32)
665
DO_ZPZ_FP(sve_bfcvt, uint32_t, H1_4, float32_to_bfloat16)
666
-DO_ZPZ_FP(sve_fcvt_dh, uint64_t, , sve_f64_to_f16)
667
-DO_ZPZ_FP(sve_fcvt_hd, uint64_t, , sve_f16_to_f64)
668
-DO_ZPZ_FP(sve_fcvt_ds, uint64_t, , float64_to_float32)
669
-DO_ZPZ_FP(sve_fcvt_sd, uint64_t, , float32_to_float64)
670
+DO_ZPZ_FP(sve_fcvt_dh, uint64_t, H1_8, sve_f64_to_f16)
671
+DO_ZPZ_FP(sve_fcvt_hd, uint64_t, H1_8, sve_f16_to_f64)
672
+DO_ZPZ_FP(sve_fcvt_ds, uint64_t, H1_8, float64_to_float32)
673
+DO_ZPZ_FP(sve_fcvt_sd, uint64_t, H1_8, float32_to_float64)
674
675
DO_ZPZ_FP(sve_fcvtzs_hh, uint16_t, H1_2, vfp_float16_to_int16_rtz)
676
DO_ZPZ_FP(sve_fcvtzs_hs, uint32_t, H1_4, helper_vfp_tosizh)
677
DO_ZPZ_FP(sve_fcvtzs_ss, uint32_t, H1_4, helper_vfp_tosizs)
678
-DO_ZPZ_FP(sve_fcvtzs_hd, uint64_t, , vfp_float16_to_int64_rtz)
679
-DO_ZPZ_FP(sve_fcvtzs_sd, uint64_t, , vfp_float32_to_int64_rtz)
680
-DO_ZPZ_FP(sve_fcvtzs_ds, uint64_t, , helper_vfp_tosizd)
681
-DO_ZPZ_FP(sve_fcvtzs_dd, uint64_t, , vfp_float64_to_int64_rtz)
682
+DO_ZPZ_FP(sve_fcvtzs_hd, uint64_t, H1_8, vfp_float16_to_int64_rtz)
683
+DO_ZPZ_FP(sve_fcvtzs_sd, uint64_t, H1_8, vfp_float32_to_int64_rtz)
684
+DO_ZPZ_FP(sve_fcvtzs_ds, uint64_t, H1_8, helper_vfp_tosizd)
685
+DO_ZPZ_FP(sve_fcvtzs_dd, uint64_t, H1_8, vfp_float64_to_int64_rtz)
686
687
DO_ZPZ_FP(sve_fcvtzu_hh, uint16_t, H1_2, vfp_float16_to_uint16_rtz)
688
DO_ZPZ_FP(sve_fcvtzu_hs, uint32_t, H1_4, helper_vfp_touizh)
689
DO_ZPZ_FP(sve_fcvtzu_ss, uint32_t, H1_4, helper_vfp_touizs)
690
-DO_ZPZ_FP(sve_fcvtzu_hd, uint64_t, , vfp_float16_to_uint64_rtz)
691
-DO_ZPZ_FP(sve_fcvtzu_sd, uint64_t, , vfp_float32_to_uint64_rtz)
692
-DO_ZPZ_FP(sve_fcvtzu_ds, uint64_t, , helper_vfp_touizd)
693
-DO_ZPZ_FP(sve_fcvtzu_dd, uint64_t, , vfp_float64_to_uint64_rtz)
694
+DO_ZPZ_FP(sve_fcvtzu_hd, uint64_t, H1_8, vfp_float16_to_uint64_rtz)
695
+DO_ZPZ_FP(sve_fcvtzu_sd, uint64_t, H1_8, vfp_float32_to_uint64_rtz)
696
+DO_ZPZ_FP(sve_fcvtzu_ds, uint64_t, H1_8, helper_vfp_touizd)
697
+DO_ZPZ_FP(sve_fcvtzu_dd, uint64_t, H1_8, vfp_float64_to_uint64_rtz)
698
699
DO_ZPZ_FP(sve_frint_h, uint16_t, H1_2, helper_advsimd_rinth)
700
DO_ZPZ_FP(sve_frint_s, uint32_t, H1_4, helper_rints)
701
-DO_ZPZ_FP(sve_frint_d, uint64_t, , helper_rintd)
702
+DO_ZPZ_FP(sve_frint_d, uint64_t, H1_8, helper_rintd)
703
704
DO_ZPZ_FP(sve_frintx_h, uint16_t, H1_2, float16_round_to_int)
705
DO_ZPZ_FP(sve_frintx_s, uint32_t, H1_4, float32_round_to_int)
706
-DO_ZPZ_FP(sve_frintx_d, uint64_t, , float64_round_to_int)
707
+DO_ZPZ_FP(sve_frintx_d, uint64_t, H1_8, float64_round_to_int)
708
709
DO_ZPZ_FP(sve_frecpx_h, uint16_t, H1_2, helper_frecpx_f16)
710
DO_ZPZ_FP(sve_frecpx_s, uint32_t, H1_4, helper_frecpx_f32)
711
-DO_ZPZ_FP(sve_frecpx_d, uint64_t, , helper_frecpx_f64)
712
+DO_ZPZ_FP(sve_frecpx_d, uint64_t, H1_8, helper_frecpx_f64)
713
714
DO_ZPZ_FP(sve_fsqrt_h, uint16_t, H1_2, float16_sqrt)
715
DO_ZPZ_FP(sve_fsqrt_s, uint32_t, H1_4, float32_sqrt)
716
-DO_ZPZ_FP(sve_fsqrt_d, uint64_t, , float64_sqrt)
717
+DO_ZPZ_FP(sve_fsqrt_d, uint64_t, H1_8, float64_sqrt)
718
719
DO_ZPZ_FP(sve_scvt_hh, uint16_t, H1_2, int16_to_float16)
720
DO_ZPZ_FP(sve_scvt_sh, uint32_t, H1_4, int32_to_float16)
721
DO_ZPZ_FP(sve_scvt_ss, uint32_t, H1_4, int32_to_float32)
722
-DO_ZPZ_FP(sve_scvt_sd, uint64_t, , int32_to_float64)
723
-DO_ZPZ_FP(sve_scvt_dh, uint64_t, , int64_to_float16)
724
-DO_ZPZ_FP(sve_scvt_ds, uint64_t, , int64_to_float32)
725
-DO_ZPZ_FP(sve_scvt_dd, uint64_t, , int64_to_float64)
726
+DO_ZPZ_FP(sve_scvt_sd, uint64_t, H1_8, int32_to_float64)
727
+DO_ZPZ_FP(sve_scvt_dh, uint64_t, H1_8, int64_to_float16)
728
+DO_ZPZ_FP(sve_scvt_ds, uint64_t, H1_8, int64_to_float32)
729
+DO_ZPZ_FP(sve_scvt_dd, uint64_t, H1_8, int64_to_float64)
730
731
DO_ZPZ_FP(sve_ucvt_hh, uint16_t, H1_2, uint16_to_float16)
732
DO_ZPZ_FP(sve_ucvt_sh, uint32_t, H1_4, uint32_to_float16)
733
DO_ZPZ_FP(sve_ucvt_ss, uint32_t, H1_4, uint32_to_float32)
734
-DO_ZPZ_FP(sve_ucvt_sd, uint64_t, , uint32_to_float64)
735
-DO_ZPZ_FP(sve_ucvt_dh, uint64_t, , uint64_to_float16)
736
-DO_ZPZ_FP(sve_ucvt_ds, uint64_t, , uint64_to_float32)
737
-DO_ZPZ_FP(sve_ucvt_dd, uint64_t, , uint64_to_float64)
738
+DO_ZPZ_FP(sve_ucvt_sd, uint64_t, H1_8, uint32_to_float64)
739
+DO_ZPZ_FP(sve_ucvt_dh, uint64_t, H1_8, uint64_to_float16)
740
+DO_ZPZ_FP(sve_ucvt_ds, uint64_t, H1_8, uint64_to_float32)
741
+DO_ZPZ_FP(sve_ucvt_dd, uint64_t, H1_8, uint64_to_float64)
742
743
static int16_t do_float16_logb_as_int(float16 a, float_status *s)
744
{
745
@@ -XXX,XX +XXX,XX @@ static int64_t do_float64_logb_as_int(float64 a, float_status *s)
746
747
DO_ZPZ_FP(flogb_h, float16, H1_2, do_float16_logb_as_int)
748
DO_ZPZ_FP(flogb_s, float32, H1_4, do_float32_logb_as_int)
749
-DO_ZPZ_FP(flogb_d, float64, , do_float64_logb_as_int)
750
+DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int)
751
752
#undef DO_ZPZ_FP
753
754
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \
755
#define DO_FPCMP_PPZZ_S(NAME, OP) \
756
DO_FPCMP_PPZZ(NAME##_s, float32, H1_4, OP)
757
#define DO_FPCMP_PPZZ_D(NAME, OP) \
758
- DO_FPCMP_PPZZ(NAME##_d, float64, , OP)
759
+ DO_FPCMP_PPZZ(NAME##_d, float64, H1_8, OP)
760
761
#define DO_FPCMP_PPZZ_ALL(NAME, OP) \
762
DO_FPCMP_PPZZ_H(NAME, OP) \
763
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vg, \
764
#define DO_FPCMP_PPZ0_S(NAME, OP) \
765
DO_FPCMP_PPZ0(NAME##_s, float32, H1_4, OP)
766
#define DO_FPCMP_PPZ0_D(NAME, OP) \
767
- DO_FPCMP_PPZ0(NAME##_d, float64, , OP)
768
+ DO_FPCMP_PPZ0(NAME##_d, float64, H1_8, OP)
769
770
#define DO_FPCMP_PPZ0_ALL(NAME, OP) \
771
DO_FPCMP_PPZ0_H(NAME, OP) \
772
@@ -XXX,XX +XXX,XX @@ DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t)
773
DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t, int8_t)
774
DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t)
775
DO_LD_PRIM_1(ld1bss, H1_4, uint32_t, int8_t)
776
-DO_LD_PRIM_1(ld1bdu, , uint64_t, uint8_t)
777
-DO_LD_PRIM_1(ld1bds, , uint64_t, int8_t)
778
+DO_LD_PRIM_1(ld1bdu, H1_8, uint64_t, uint8_t)
779
+DO_LD_PRIM_1(ld1bds, H1_8, uint64_t, int8_t)
780
781
#define DO_ST_PRIM_1(NAME, H, TE, TM) \
782
DO_ST_HOST(st1##NAME, H, TE, TM, stb_p) \
783
@@ -XXX,XX +XXX,XX @@ DO_LD_PRIM_1(ld1bds, , uint64_t, int8_t)
784
DO_ST_PRIM_1(bb, H1, uint8_t, uint8_t)
785
DO_ST_PRIM_1(bh, H1_2, uint16_t, uint8_t)
786
DO_ST_PRIM_1(bs, H1_4, uint32_t, uint8_t)
787
-DO_ST_PRIM_1(bd, , uint64_t, uint8_t)
788
+DO_ST_PRIM_1(bd, H1_8, uint64_t, uint8_t)
789
790
#define DO_LD_PRIM_2(NAME, H, TE, TM, LD) \
791
DO_LD_HOST(ld1##NAME##_be, H, TE, TM, LD##_be_p) \
792
@@ -XXX,XX +XXX,XX @@ DO_ST_PRIM_1(bd, , uint64_t, uint8_t)
793
DO_LD_PRIM_2(hh, H1_2, uint16_t, uint16_t, lduw)
794
DO_LD_PRIM_2(hsu, H1_4, uint32_t, uint16_t, lduw)
795
DO_LD_PRIM_2(hss, H1_4, uint32_t, int16_t, lduw)
796
-DO_LD_PRIM_2(hdu, , uint64_t, uint16_t, lduw)
797
-DO_LD_PRIM_2(hds, , uint64_t, int16_t, lduw)
798
+DO_LD_PRIM_2(hdu, H1_8, uint64_t, uint16_t, lduw)
799
+DO_LD_PRIM_2(hds, H1_8, uint64_t, int16_t, lduw)
800
801
DO_ST_PRIM_2(hh, H1_2, uint16_t, uint16_t, stw)
802
DO_ST_PRIM_2(hs, H1_4, uint32_t, uint16_t, stw)
803
-DO_ST_PRIM_2(hd, , uint64_t, uint16_t, stw)
804
+DO_ST_PRIM_2(hd, H1_8, uint64_t, uint16_t, stw)
805
806
DO_LD_PRIM_2(ss, H1_4, uint32_t, uint32_t, ldl)
807
-DO_LD_PRIM_2(sdu, , uint64_t, uint32_t, ldl)
808
-DO_LD_PRIM_2(sds, , uint64_t, int32_t, ldl)
809
+DO_LD_PRIM_2(sdu, H1_8, uint64_t, uint32_t, ldl)
810
+DO_LD_PRIM_2(sds, H1_8, uint64_t, int32_t, ldl)
811
812
DO_ST_PRIM_2(ss, H1_4, uint32_t, uint32_t, stl)
813
-DO_ST_PRIM_2(sd, , uint64_t, uint32_t, stl)
814
+DO_ST_PRIM_2(sd, H1_8, uint64_t, uint32_t, stl)
815
816
-DO_LD_PRIM_2(dd, , uint64_t, uint64_t, ldq)
817
-DO_ST_PRIM_2(dd, , uint64_t, uint64_t, stq)
818
+DO_LD_PRIM_2(dd, H1_8, uint64_t, uint64_t, ldq)
819
+DO_ST_PRIM_2(dd, H1_8, uint64_t, uint64_t, stq)
820
821
#undef DO_LD_TLB
822
#undef DO_ST_TLB
823
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \
824
825
DO_FCVTNT(sve_bfcvtnt, uint32_t, uint16_t, H1_4, H1_2, float32_to_bfloat16)
826
DO_FCVTNT(sve2_fcvtnt_sh, uint32_t, uint16_t, H1_4, H1_2, sve_f32_to_f16)
827
-DO_FCVTNT(sve2_fcvtnt_ds, uint64_t, uint32_t, , H1_4, float64_to_float32)
828
+DO_FCVTNT(sve2_fcvtnt_ds, uint64_t, uint32_t, H1_8, H1_4, float64_to_float32)
829
830
#define DO_FCVTLT(NAME, TYPEW, TYPEN, HW, HN, OP) \
831
void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \
832
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \
833
}
834
835
DO_FCVTLT(sve2_fcvtlt_hs, uint32_t, uint16_t, H1_4, H1_2, sve_f16_to_f32)
836
-DO_FCVTLT(sve2_fcvtlt_sd, uint64_t, uint32_t, , H1_4, float32_to_float64)
837
+DO_FCVTLT(sve2_fcvtlt_sd, uint64_t, uint32_t, H1_8, H1_4, float32_to_float64)
838
839
#undef DO_FCVTLT
840
#undef DO_FCVTNT
841
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
842
index XXXXXXX..XXXXXXX 100644
843
--- a/target/arm/vec_helper.c
844
+++ b/target/arm/vec_helper.c
845
@@ -XXX,XX +XXX,XX @@ DO_DOT_IDX(gvec_sdot_idx_b, int32_t, int8_t, int8_t, H4)
846
DO_DOT_IDX(gvec_udot_idx_b, uint32_t, uint8_t, uint8_t, H4)
847
DO_DOT_IDX(gvec_sudot_idx_b, int32_t, int8_t, uint8_t, H4)
848
DO_DOT_IDX(gvec_usdot_idx_b, int32_t, uint8_t, int8_t, H4)
849
-DO_DOT_IDX(gvec_sdot_idx_h, int64_t, int16_t, int16_t, )
850
-DO_DOT_IDX(gvec_udot_idx_h, uint64_t, uint16_t, uint16_t, )
851
+DO_DOT_IDX(gvec_sdot_idx_h, int64_t, int16_t, int16_t, H8)
852
+DO_DOT_IDX(gvec_udot_idx_h, uint64_t, uint16_t, uint16_t, H8)
853
854
void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm,
855
void *vfpst, uint32_t desc)
856
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
857
858
DO_MUL_IDX(gvec_mul_idx_h, uint16_t, H2)
859
DO_MUL_IDX(gvec_mul_idx_s, uint32_t, H4)
860
-DO_MUL_IDX(gvec_mul_idx_d, uint64_t, )
861
+DO_MUL_IDX(gvec_mul_idx_d, uint64_t, H8)
862
863
#undef DO_MUL_IDX
864
865
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
866
867
DO_MLA_IDX(gvec_mla_idx_h, uint16_t, +, H2)
868
DO_MLA_IDX(gvec_mla_idx_s, uint32_t, +, H4)
869
-DO_MLA_IDX(gvec_mla_idx_d, uint64_t, +, )
870
+DO_MLA_IDX(gvec_mla_idx_d, uint64_t, +, H8)
871
872
DO_MLA_IDX(gvec_mls_idx_h, uint16_t, -, H2)
873
DO_MLA_IDX(gvec_mls_idx_s, uint32_t, -, H4)
874
-DO_MLA_IDX(gvec_mls_idx_d, uint64_t, -, )
875
+DO_MLA_IDX(gvec_mls_idx_d, uint64_t, -, H8)
876
877
#undef DO_MLA_IDX
878
879
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
880
881
DO_FMUL_IDX(gvec_fmul_idx_h, nop, float16, H2)
882
DO_FMUL_IDX(gvec_fmul_idx_s, nop, float32, H4)
883
-DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64, )
884
+DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64, H8)
885
886
/*
887
* Non-fused multiply-accumulate operations, for Neon. NB that unlike
888
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
889
890
DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2)
891
DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4)
892
-DO_FMLA_IDX(gvec_fmla_idx_d, float64, )
893
+DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8)
894
895
#undef DO_FMLA_IDX
896
897
--
898
2.20.1
899
900
diff view generated by jsdifflib
Deleted patch
1
MVE has an FPSCR.QC bit similar to the A-profile Neon one; when MVE
2
is implemented make the bit writeable, both in the generic "load and
3
store FPSCR" helper functions and in the code for handling the NZCVQC
4
sysreg which we had previously left as "TODO when we implement MVE".
5
1
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210614151007.4545-3-peter.maydell@linaro.org
9
---
10
target/arm/translate-vfp.c | 30 +++++++++++++++++++++---------
11
target/arm/vfp_helper.c | 3 ++-
12
2 files changed, 23 insertions(+), 10 deletions(-)
13
14
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/translate-vfp.c
17
+++ b/target/arm/translate-vfp.c
18
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
19
{
20
TCGv_i32 fpscr;
21
tmp = loadfn(s, opaque);
22
- /*
23
- * TODO: when we implement MVE, write the QC bit.
24
- * For non-MVE, QC is RES0.
25
- */
26
+ if (dc_isar_feature(aa32_mve, s)) {
27
+ /* QC is only present for MVE; otherwise RES0 */
28
+ TCGv_i32 qc = tcg_temp_new_i32();
29
+ tcg_gen_andi_i32(qc, tmp, FPCR_QC);
30
+ /*
31
+ * The 4 vfp.qc[] fields need only be "zero" vs "non-zero";
32
+ * here writing the same value into all elements is simplest.
33
+ */
34
+ tcg_gen_gvec_dup_i32(MO_32, offsetof(CPUARMState, vfp.qc),
35
+ 16, 16, qc);
36
+ }
37
tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
38
fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
39
tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK);
40
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
41
break;
42
}
43
44
+ if (regno == ARM_VFP_FPSCR_NZCVQC && !dc_isar_feature(aa32_mve, s)) {
45
+ /* QC is RES0 without MVE, so NZCVQC simplifies to NZCV */
46
+ regno = QEMU_VFP_FPSCR_NZCV;
47
+ }
48
+
49
switch (regno) {
50
case ARM_VFP_FPSCR:
51
tmp = tcg_temp_new_i32();
52
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
53
storefn(s, opaque, tmp);
54
break;
55
case ARM_VFP_FPSCR_NZCVQC:
56
- /*
57
- * TODO: MVE has a QC bit, which we probably won't store
58
- * in the xregs[] field. For non-MVE, where QC is RES0,
59
- * we can just fall through to the FPSCR_NZCV case.
60
- */
61
+ tmp = tcg_temp_new_i32();
62
+ gen_helper_vfp_get_fpscr(tmp, cpu_env);
63
+ tcg_gen_andi_i32(tmp, tmp, FPCR_NZCVQC_MASK);
64
+ storefn(s, opaque, tmp);
65
+ break;
66
case QEMU_VFP_FPSCR_NZCV:
67
/*
68
* Read just NZCV; this is a special case to avoid the
69
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/target/arm/vfp_helper.c
72
+++ b/target/arm/vfp_helper.c
73
@@ -XXX,XX +XXX,XX @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
74
FPCR_LTPSIZE_LENGTH);
75
}
76
77
- if (arm_feature(env, ARM_FEATURE_NEON)) {
78
+ if (arm_feature(env, ARM_FEATURE_NEON) ||
79
+ cpu_isar_feature(aa32_mve, cpu)) {
80
/*
81
* The bit we set within fpscr_q is arbitrary; the register as a
82
* whole being zero/non-zero is what counts.
83
--
84
2.20.1
85
86
diff view generated by jsdifflib
Deleted patch
1
When MVE is supported, the VPR register has a place on the exception
2
stack frame in a previously reserved slot just above the FPSCR.
3
It must also be zeroed in various situations when we invalidate
4
FPU context.
5
1
6
Update the code which handles the stack frames (exception entry and
7
exit code, VLLDM, and VLSTM) to save/restore VPR.
8
9
Update code which invalidates FP registers (mostly also exception
10
entry and exit code, but also VSCCLRM and the code in
11
full_vfp_access_check() that corresponds to the ExecuteFPCheck()
12
pseudocode) to zero VPR.
13
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
16
Message-id: 20210614151007.4545-4-peter.maydell@linaro.org
17
---
18
target/arm/m_helper.c | 54 +++++++++++++++++++++++++++++------
19
target/arm/translate-m-nocp.c | 5 +++-
20
target/arm/translate-vfp.c | 9 ++++--
21
3 files changed, 57 insertions(+), 11 deletions(-)
22
23
diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/target/arm/m_helper.c
26
+++ b/target/arm/m_helper.c
27
@@ -XXX,XX +XXX,XX @@ void HELPER(v7m_preserve_fp_state)(CPUARMState *env)
28
uint32_t shi = extract64(dn, 32, 32);
29
30
if (i >= 16) {
31
- faddr += 8; /* skip the slot for the FPSCR */
32
+ faddr += 8; /* skip the slot for the FPSCR/VPR */
33
}
34
stacked_ok = stacked_ok &&
35
v7m_stack_write(cpu, faddr, slo, mmu_idx, STACK_LAZYFP) &&
36
@@ -XXX,XX +XXX,XX @@ void HELPER(v7m_preserve_fp_state)(CPUARMState *env)
37
stacked_ok = stacked_ok &&
38
v7m_stack_write(cpu, fpcar + 0x40,
39
vfp_get_fpscr(env), mmu_idx, STACK_LAZYFP);
40
+ if (cpu_isar_feature(aa32_mve, cpu)) {
41
+ stacked_ok = stacked_ok &&
42
+ v7m_stack_write(cpu, fpcar + 0x44,
43
+ env->v7m.vpr, mmu_idx, STACK_LAZYFP);
44
+ }
45
}
46
47
/*
48
@@ -XXX,XX +XXX,XX @@ void HELPER(v7m_preserve_fp_state)(CPUARMState *env)
49
env->v7m.fpccr[is_secure] &= ~R_V7M_FPCCR_LSPACT_MASK;
50
51
if (ts) {
52
- /* Clear s0 to s31 and the FPSCR */
53
+ /* Clear s0 to s31 and the FPSCR and VPR */
54
int i;
55
56
for (i = 0; i < 32; i += 2) {
57
*aa32_vfp_dreg(env, i / 2) = 0;
58
}
59
vfp_set_fpscr(env, 0);
60
+ if (cpu_isar_feature(aa32_mve, cpu)) {
61
+ env->v7m.vpr = 0;
62
+ }
63
}
64
/*
65
- * Otherwise s0 to s15 and FPSCR are UNKNOWN; we choose to leave them
66
+ * Otherwise s0 to s15, FPSCR and VPR are UNKNOWN; we choose to leave them
67
* unchanged.
68
*/
69
}
70
@@ -XXX,XX +XXX,XX @@ static void v7m_update_fpccr(CPUARMState *env, uint32_t frameptr,
71
void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr)
72
{
73
/* fptr is the value of Rn, the frame pointer we store the FP regs to */
74
+ ARMCPU *cpu = env_archcpu(env);
75
bool s = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK;
76
bool lspact = env->v7m.fpccr[s] & R_V7M_FPCCR_LSPACT_MASK;
77
uintptr_t ra = GETPC();
78
@@ -XXX,XX +XXX,XX @@ void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr)
79
cpu_stl_data_ra(env, faddr + 4, shi, ra);
80
}
81
cpu_stl_data_ra(env, fptr + 0x40, vfp_get_fpscr(env), ra);
82
+ if (cpu_isar_feature(aa32_mve, cpu)) {
83
+ cpu_stl_data_ra(env, fptr + 0x44, env->v7m.vpr, ra);
84
+ }
85
86
/*
87
- * If TS is 0 then s0 to s15 and FPSCR are UNKNOWN; we choose to
88
+ * If TS is 0 then s0 to s15, FPSCR and VPR are UNKNOWN; we choose to
89
* leave them unchanged, matching our choice in v7m_preserve_fp_state.
90
*/
91
if (ts) {
92
@@ -XXX,XX +XXX,XX @@ void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr)
93
*aa32_vfp_dreg(env, i / 2) = 0;
94
}
95
vfp_set_fpscr(env, 0);
96
+ if (cpu_isar_feature(aa32_mve, cpu)) {
97
+ env->v7m.vpr = 0;
98
+ }
99
}
100
} else {
101
v7m_update_fpccr(env, fptr, false);
102
@@ -XXX,XX +XXX,XX @@ void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr)
103
104
void HELPER(v7m_vlldm)(CPUARMState *env, uint32_t fptr)
105
{
106
+ ARMCPU *cpu = env_archcpu(env);
107
uintptr_t ra = GETPC();
108
109
/* fptr is the value of Rn, the frame pointer we load the FP regs from */
110
@@ -XXX,XX +XXX,XX @@ void HELPER(v7m_vlldm)(CPUARMState *env, uint32_t fptr)
111
uint32_t faddr = fptr + 4 * i;
112
113
if (i >= 16) {
114
- faddr += 8; /* skip the slot for the FPSCR */
115
+ faddr += 8; /* skip the slot for the FPSCR and VPR */
116
}
117
118
slo = cpu_ldl_data_ra(env, faddr, ra);
119
@@ -XXX,XX +XXX,XX @@ void HELPER(v7m_vlldm)(CPUARMState *env, uint32_t fptr)
120
}
121
fpscr = cpu_ldl_data_ra(env, fptr + 0x40, ra);
122
vfp_set_fpscr(env, fpscr);
123
+ if (cpu_isar_feature(aa32_mve, cpu)) {
124
+ env->v7m.vpr = cpu_ldl_data_ra(env, fptr + 0x44, ra);
125
+ }
126
}
127
128
env->v7m.control[M_REG_S] |= R_V7M_CONTROL_FPCA_MASK;
129
@@ -XXX,XX +XXX,XX @@ static bool v7m_push_stack(ARMCPU *cpu)
130
uint32_t shi = extract64(dn, 32, 32);
131
132
if (i >= 16) {
133
- faddr += 8; /* skip the slot for the FPSCR */
134
+ faddr += 8; /* skip the slot for the FPSCR and VPR */
135
}
136
stacked_ok = stacked_ok &&
137
v7m_stack_write(cpu, faddr, slo,
138
@@ -XXX,XX +XXX,XX @@ static bool v7m_push_stack(ARMCPU *cpu)
139
stacked_ok = stacked_ok &&
140
v7m_stack_write(cpu, frameptr + 0x60,
141
vfp_get_fpscr(env), mmu_idx, STACK_NORMAL);
142
+ if (cpu_isar_feature(aa32_mve, cpu)) {
143
+ stacked_ok = stacked_ok &&
144
+ v7m_stack_write(cpu, frameptr + 0x64,
145
+ env->v7m.vpr, mmu_idx, STACK_NORMAL);
146
+ }
147
if (cpacr_pass) {
148
for (i = 0; i < ((framesize == 0xa8) ? 32 : 16); i += 2) {
149
*aa32_vfp_dreg(env, i / 2) = 0;
150
}
151
vfp_set_fpscr(env, 0);
152
+ if (cpu_isar_feature(aa32_mve, cpu)) {
153
+ env->v7m.vpr = 0;
154
+ }
155
}
156
} else {
157
/* Lazy stacking enabled, save necessary info to stack later */
158
@@ -XXX,XX +XXX,XX @@ static void do_v7m_exception_exit(ARMCPU *cpu)
159
v7m_exception_taken(cpu, excret, true, false);
160
}
161
}
162
- /* Clear s0..s15 and FPSCR; TODO also VPR when MVE is implemented */
163
+ /* Clear s0..s15, FPSCR and VPR */
164
int i;
165
166
for (i = 0; i < 16; i += 2) {
167
*aa32_vfp_dreg(env, i / 2) = 0;
168
}
169
vfp_set_fpscr(env, 0);
170
+ if (cpu_isar_feature(aa32_mve, cpu)) {
171
+ env->v7m.vpr = 0;
172
+ }
173
}
174
}
175
176
@@ -XXX,XX +XXX,XX @@ static void do_v7m_exception_exit(ARMCPU *cpu)
177
uint32_t faddr = frameptr + 0x20 + 4 * i;
178
179
if (i >= 16) {
180
- faddr += 8; /* Skip the slot for the FPSCR */
181
+ faddr += 8; /* Skip the slot for the FPSCR and VPR */
182
}
183
184
pop_ok = pop_ok &&
185
@@ -XXX,XX +XXX,XX @@ static void do_v7m_exception_exit(ARMCPU *cpu)
186
if (pop_ok) {
187
vfp_set_fpscr(env, fpscr);
188
}
189
+ if (cpu_isar_feature(aa32_mve, cpu)) {
190
+ pop_ok = pop_ok &&
191
+ v7m_stack_read(cpu, &env->v7m.vpr,
192
+ frameptr + 0x64, mmu_idx);
193
+ }
194
if (!pop_ok) {
195
/*
196
* These regs are 0 if security extension present;
197
@@ -XXX,XX +XXX,XX @@ static void do_v7m_exception_exit(ARMCPU *cpu)
198
*aa32_vfp_dreg(env, i / 2) = 0;
199
}
200
vfp_set_fpscr(env, 0);
201
+ if (cpu_isar_feature(aa32_mve, cpu)) {
202
+ env->v7m.vpr = 0;
203
+ }
204
}
205
}
206
}
207
diff --git a/target/arm/translate-m-nocp.c b/target/arm/translate-m-nocp.c
208
index XXXXXXX..XXXXXXX 100644
209
--- a/target/arm/translate-m-nocp.c
210
+++ b/target/arm/translate-m-nocp.c
211
@@ -XXX,XX +XXX,XX @@ static bool trans_VSCCLRM(DisasContext *s, arg_VSCCLRM *a)
212
btmreg++;
213
}
214
assert(btmreg == topreg + 1);
215
- /* TODO: when MVE is implemented, zero VPR here */
216
+ if (dc_isar_feature(aa32_mve, s)) {
217
+ TCGv_i32 z32 = tcg_const_i32(0);
218
+ store_cpu_field(z32, v7m.vpr);
219
+ }
220
return true;
221
}
222
223
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
224
index XXXXXXX..XXXXXXX 100644
225
--- a/target/arm/translate-vfp.c
226
+++ b/target/arm/translate-vfp.c
227
@@ -XXX,XX +XXX,XX @@ static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
228
229
if (s->v7m_new_fp_ctxt_needed) {
230
/*
231
- * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA
232
- * and the FPSCR.
233
+ * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA,
234
+ * the FPSCR, and VPR.
235
*/
236
TCGv_i32 control, fpscr;
237
uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
238
@@ -XXX,XX +XXX,XX @@ static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
239
fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
240
gen_helper_vfp_set_fpscr(cpu_env, fpscr);
241
tcg_temp_free_i32(fpscr);
242
+ if (dc_isar_feature(aa32_mve, s)) {
243
+ TCGv_i32 z32 = tcg_const_i32(0);
244
+ store_cpu_field(z32, v7m.vpr);
245
+ }
246
+
247
/*
248
* We don't need to arrange to end the TB, because the only
249
* parts of FPSCR which we cache in the TB flags are the VECLEN
250
--
251
2.20.1
252
253
diff view generated by jsdifflib
Deleted patch
1
On A-profile, PSR bits [15:10][26:25] are always the IT state bits.
2
On M-profile, some of the reserved encodings of the IT state are used
3
to instead indicate partial progress through instructions that were
4
interrupted partway through by an exception and can be resumed.
5
1
6
These resumable instructions fall into two categories:
7
8
(1) load/store multiple instructions, where these bits are called
9
"ICI" and specify the register in the ldm/stm list where execution
10
should resume. (Specifically: LDM, STM, VLDM, VSTM, VLLDM, VLSTM,
11
CLRM, VSCCLRM.)
12
13
(2) MVE instructions subject to beatwise execution, where these bits
14
are called "ECI" and specify which beats in this and possibly also
15
the following MVE insn have been executed.
16
17
There are also a few insns (LE, LETP, and BKPT) which do not use the
18
ICI/ECI bits but must leave them alone.
19
20
Otherwise, we should raise an INVSTATE UsageFault for any attempt to
21
execute an insn with non-zero ICI/ECI bits.
22
23
So far we have been able to ignore ECI/ICI, because the architecture
24
allows the IMPDEF choice of "always restart load/store multiple from
25
the beginning regardless of ICI state", so the only thing we have
26
been missing is that we don't raise the INVSTATE fault for bad guest
27
code. However, MVE requires that we honour ECI bits and do not
28
rexecute beats of an insn that have already been executed.
29
30
Add the support in the decoder for handling ECI/ICI:
31
* identify the ECI/ICI case in the CONDEXEC TB flags
32
* when a load/store multiple insn succeeds, it updates the ECI/ICI
33
state (both in DisasContext and in the CPU state), and sets a flag
34
to say that the ECI/ICI state was handled
35
* if we find that the insn we just decoded did not handle the
36
ECI/ICI state, we delete all the code that we just generated for
37
it and instead emit the code to raise the INVFAULT. This allows
38
us to avoid having to update every non-MVE non-LDM/STM insn to
39
make it check for "is ECI/ICI set?".
40
41
We continue with our existing IMPDEF choice of not caring about the
42
ICI state for the load/store multiples and simply restarting them
43
from the beginning. Because we don't allow interrupts in the middle
44
of an insn, the only way we would see this state is if the guest set
45
ICI manually on return from an exception handler, so it's a corner
46
case which doesn't merit optimisation.
47
48
ICI update for LDM/STM is simple -- it always zeroes the state. ECI
49
update for MVE beatwise insns will be a little more complex, since
50
the ECI state may include information for the following insn.
51
52
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
53
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
54
Message-id: 20210614151007.4545-5-peter.maydell@linaro.org
55
---
56
target/arm/translate-a32.h | 1 +
57
target/arm/translate.h | 9 +++
58
target/arm/translate-m-nocp.c | 11 ++++
59
target/arm/translate-vfp.c | 6 ++
60
target/arm/translate.c | 111 ++++++++++++++++++++++++++++++++--
61
5 files changed, 133 insertions(+), 5 deletions(-)
62
63
diff --git a/target/arm/translate-a32.h b/target/arm/translate-a32.h
64
index XXXXXXX..XXXXXXX 100644
65
--- a/target/arm/translate-a32.h
66
+++ b/target/arm/translate-a32.h
67
@@ -XXX,XX +XXX,XX @@ long vfp_reg_offset(bool dp, unsigned reg);
68
long neon_full_reg_offset(unsigned reg);
69
long neon_element_offset(int reg, int element, MemOp memop);
70
void gen_rev16(TCGv_i32 dest, TCGv_i32 var);
71
+void clear_eci_state(DisasContext *s);
72
73
static inline TCGv_i32 load_cpu_offset(int offset)
74
{
75
diff --git a/target/arm/translate.h b/target/arm/translate.h
76
index XXXXXXX..XXXXXXX 100644
77
--- a/target/arm/translate.h
78
+++ b/target/arm/translate.h
79
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
80
/* Thumb-2 conditional execution bits. */
81
int condexec_mask;
82
int condexec_cond;
83
+ /* M-profile ECI/ICI exception-continuable instruction state */
84
+ int eci;
85
+ /*
86
+ * trans_ functions for insns which are continuable should set this true
87
+ * after decode (ie after any UNDEF checks)
88
+ */
89
+ bool eci_handled;
90
+ /* TCG op to rewind to if this turns out to be an invalid ECI state */
91
+ TCGOp *insn_eci_rewind;
92
int thumb;
93
int sctlr_b;
94
MemOp be_data;
95
diff --git a/target/arm/translate-m-nocp.c b/target/arm/translate-m-nocp.c
96
index XXXXXXX..XXXXXXX 100644
97
--- a/target/arm/translate-m-nocp.c
98
+++ b/target/arm/translate-m-nocp.c
99
@@ -XXX,XX +XXX,XX @@ static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a)
100
unallocated_encoding(s);
101
return true;
102
}
103
+
104
+ s->eci_handled = true;
105
+
106
/* If no fpu, NOP. */
107
if (!dc_isar_feature(aa32_vfp, s)) {
108
+ clear_eci_state(s);
109
return true;
110
}
111
112
@@ -XXX,XX +XXX,XX @@ static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a)
113
}
114
tcg_temp_free_i32(fptr);
115
116
+ clear_eci_state(s);
117
+
118
/* End the TB, because we have updated FP control bits */
119
s->base.is_jmp = DISAS_UPDATE_EXIT;
120
return true;
121
@@ -XXX,XX +XXX,XX @@ static bool trans_VSCCLRM(DisasContext *s, arg_VSCCLRM *a)
122
return true;
123
}
124
125
+ s->eci_handled = true;
126
+
127
if (!dc_isar_feature(aa32_vfp_simd, s)) {
128
/* NOP if we have neither FP nor MVE */
129
+ clear_eci_state(s);
130
return true;
131
}
132
133
@@ -XXX,XX +XXX,XX @@ static bool trans_VSCCLRM(DisasContext *s, arg_VSCCLRM *a)
134
TCGv_i32 z32 = tcg_const_i32(0);
135
store_cpu_field(z32, v7m.vpr);
136
}
137
+
138
+ clear_eci_state(s);
139
return true;
140
}
141
142
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
143
index XXXXXXX..XXXXXXX 100644
144
--- a/target/arm/translate-vfp.c
145
+++ b/target/arm/translate-vfp.c
146
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
147
return false;
148
}
149
150
+ s->eci_handled = true;
151
+
152
if (!vfp_access_check(s)) {
153
return true;
154
}
155
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
156
tcg_temp_free_i32(addr);
157
}
158
159
+ clear_eci_state(s);
160
return true;
161
}
162
163
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
164
return false;
165
}
166
167
+ s->eci_handled = true;
168
+
169
if (!vfp_access_check(s)) {
170
return true;
171
}
172
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
173
tcg_temp_free_i32(addr);
174
}
175
176
+ clear_eci_state(s);
177
return true;
178
}
179
180
diff --git a/target/arm/translate.c b/target/arm/translate.c
181
index XXXXXXX..XXXXXXX 100644
182
--- a/target/arm/translate.c
183
+++ b/target/arm/translate.c
184
@@ -XXX,XX +XXX,XX @@ static inline bool is_singlestepping(DisasContext *s)
185
return s->base.singlestep_enabled || s->ss_active;
186
}
187
188
+void clear_eci_state(DisasContext *s)
189
+{
190
+ /*
191
+ * Clear any ECI/ICI state: used when a load multiple/store
192
+ * multiple insn executes.
193
+ */
194
+ if (s->eci) {
195
+ TCGv_i32 tmp = tcg_const_i32(0);
196
+ store_cpu_field(tmp, condexec_bits);
197
+ s->eci = 0;
198
+ }
199
+}
200
+
201
static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
202
{
203
TCGv_i32 tmp1 = tcg_temp_new_i32();
204
@@ -XXX,XX +XXX,XX @@ static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
205
if (!ENABLE_ARCH_5) {
206
return false;
207
}
208
+ /* BKPT is OK with ECI set and leaves it untouched */
209
+ s->eci_handled = true;
210
if (arm_dc_feature(s, ARM_FEATURE_M) &&
211
semihosting_enabled() &&
212
#ifndef CONFIG_USER_ONLY
213
@@ -XXX,XX +XXX,XX @@ static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
214
return true;
215
}
216
217
+ s->eci_handled = true;
218
+
219
addr = op_addr_block_pre(s, a, n);
220
mem_idx = get_mem_index(s);
221
222
@@ -XXX,XX +XXX,XX @@ static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
223
}
224
225
op_addr_block_post(s, a, addr, n);
226
+ clear_eci_state(s);
227
return true;
228
}
229
230
@@ -XXX,XX +XXX,XX @@ static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
231
return true;
232
}
233
234
+ s->eci_handled = true;
235
+
236
addr = op_addr_block_pre(s, a, n);
237
mem_idx = get_mem_index(s);
238
loaded_base = false;
239
@@ -XXX,XX +XXX,XX @@ static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
240
/* Must exit loop to check un-masked IRQs */
241
s->base.is_jmp = DISAS_EXIT;
242
}
243
+ clear_eci_state(s);
244
return true;
245
}
246
247
@@ -XXX,XX +XXX,XX @@ static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
248
return false;
249
}
250
251
+ s->eci_handled = true;
252
+
253
zero = tcg_const_i32(0);
254
for (i = 0; i < 15; i++) {
255
if (extract32(a->list, i, 1)) {
256
@@ -XXX,XX +XXX,XX @@ static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
257
tcg_temp_free_i32(maskreg);
258
}
259
tcg_temp_free_i32(zero);
260
+ clear_eci_state(s);
261
return true;
262
}
263
264
@@ -XXX,XX +XXX,XX @@ static bool trans_LE(DisasContext *s, arg_LE *a)
265
return false;
266
}
267
268
+ /* LE/LETP is OK with ECI set and leaves it untouched */
269
+ s->eci_handled = true;
270
+
271
if (!a->f) {
272
/* Not loop-forever. If LR <= 1 this is the last loop: do nothing. */
273
arm_gen_condlabel(s);
274
@@ -XXX,XX +XXX,XX @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
275
dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
276
dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
277
condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
278
- dc->condexec_mask = (condexec & 0xf) << 1;
279
- dc->condexec_cond = condexec >> 4;
280
+ /*
281
+ * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
282
+ * is always the IT bits. On M-profile, some of the reserved encodings
283
+ * of IT are used instead to indicate either ICI or ECI, which
284
+ * indicate partial progress of a restartable insn that was interrupted
285
+ * partway through by an exception:
286
+ * * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
287
+ * * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
288
+ * In all cases CONDEXEC == 0 means "not in IT block or restartable
289
+ * insn, behave normally".
290
+ */
291
+ dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
292
+ dc->eci_handled = false;
293
+ dc->insn_eci_rewind = NULL;
294
+ if (condexec & 0xf) {
295
+ dc->condexec_mask = (condexec & 0xf) << 1;
296
+ dc->condexec_cond = condexec >> 4;
297
+ } else {
298
+ if (arm_feature(env, ARM_FEATURE_M)) {
299
+ dc->eci = condexec >> 4;
300
+ }
301
+ }
302
303
core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
304
dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
305
@@ -XXX,XX +XXX,XX @@ static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
306
static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
307
{
308
DisasContext *dc = container_of(dcbase, DisasContext, base);
309
+ /*
310
+ * The ECI/ICI bits share PSR bits with the IT bits, so we
311
+ * need to reconstitute the bits from the split-out DisasContext
312
+ * fields here.
313
+ */
314
+ uint32_t condexec_bits;
315
316
- tcg_gen_insn_start(dc->base.pc_next,
317
- (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
318
- 0);
319
+ if (dc->eci) {
320
+ condexec_bits = dc->eci << 4;
321
+ } else {
322
+ condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
323
+ }
324
+ tcg_gen_insn_start(dc->base.pc_next, condexec_bits, 0);
325
dc->insn_start = tcg_last_op();
326
}
327
328
@@ -XXX,XX +XXX,XX @@ static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
329
}
330
dc->insn = insn;
331
332
+ if (dc->eci) {
333
+ /*
334
+ * For M-profile continuable instructions, ECI/ICI handling
335
+ * falls into these cases:
336
+ * - interrupt-continuable instructions
337
+ * These are the various load/store multiple insns (both
338
+ * integer and fp). The ICI bits indicate the register
339
+ * where the load/store can resume. We make the IMPDEF
340
+ * choice to always do "instruction restart", ie ignore
341
+ * the ICI value and always execute the ldm/stm from the
342
+ * start. So all we need to do is zero PSR.ICI if the
343
+ * insn executes.
344
+ * - MVE instructions subject to beat-wise execution
345
+ * Here the ECI bits indicate which beats have already been
346
+ * executed, and we must honour this. Each insn of this
347
+ * type will handle it correctly. We will update PSR.ECI
348
+ * in the helper function for the insn (some ECI values
349
+ * mean that the following insn also has been partially
350
+ * executed).
351
+ * - Special cases which don't advance ECI
352
+ * The insns LE, LETP and BKPT leave the ECI/ICI state
353
+ * bits untouched.
354
+ * - all other insns (the common case)
355
+ * Non-zero ECI/ICI means an INVSTATE UsageFault.
356
+ * We place a rewind-marker here. Insns in the previous
357
+ * three categories will set a flag in the DisasContext.
358
+ * If the flag isn't set after we call disas_thumb_insn()
359
+ * or disas_thumb2_insn() then we know we have a "some other
360
+ * insn" case. We will rewind to the marker (ie throwing away
361
+ * all the generated code) and instead emit "take exception".
362
+ */
363
+ dc->insn_eci_rewind = tcg_last_op();
364
+ }
365
+
366
if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
367
uint32_t cond = dc->condexec_cond;
368
369
@@ -XXX,XX +XXX,XX @@ static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
370
}
371
}
372
373
+ if (dc->eci && !dc->eci_handled) {
374
+ /*
375
+ * Insn wasn't valid for ECI/ICI at all: undo what we
376
+ * just generated and instead emit an exception
377
+ */
378
+ tcg_remove_ops_after(dc->insn_eci_rewind);
379
+ dc->condjmp = 0;
380
+ gen_exception_insn(dc, dc->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
381
+ default_exception_el(dc));
382
+ }
383
+
384
arm_post_translate_insn(dc);
385
386
/* Thumb is a variable-length ISA. Stop translation when the next insn
387
--
388
2.20.1
389
390
diff view generated by jsdifflib
Deleted patch
1
In commit a3494d4671797c we reworked the M-profile handling of its
2
checks for when the NOCP exception should be raised because the FPU
3
is disabled, so that (in line with the architecture) the NOCP check
4
is done early over a large range of the encoding space, and takes
5
precedence over UNDEF exceptions. As part of this, we removed the
6
code from full_vfp_access_check() which raised an exception there for
7
M-profile with the FPU disabled, because it was no longer reachable.
8
1
9
For MVE, some instructions which are outside the "coprocessor space"
10
region of the encoding space must nonetheless do "is the FPU enabled"
11
checks and possibly raise a NOCP exception. (In particular this
12
covers the MVE-specific low-overhead branch insns LCTP, DLSTP and
13
WLSTP.) To support these insns, reinstate the code in
14
full_vfp_access_check(), so that their trans functions can call
15
vfp_access_check() and get the correct behaviour.
16
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
19
Message-id: 20210614151007.4545-6-peter.maydell@linaro.org
20
---
21
target/arm/translate-vfp.c | 20 +++++++++++++++-----
22
1 file changed, 15 insertions(+), 5 deletions(-)
23
24
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/translate-vfp.c
27
+++ b/target/arm/translate-vfp.c
28
@@ -XXX,XX +XXX,XX @@ static void gen_preserve_fp_state(DisasContext *s)
29
static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
30
{
31
if (s->fp_excp_el) {
32
- /* M-profile handled this earlier, in disas_m_nocp() */
33
- assert (!arm_dc_feature(s, ARM_FEATURE_M));
34
- gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
35
- syn_fp_access_trap(1, 0xe, false),
36
- s->fp_excp_el);
37
+ if (arm_dc_feature(s, ARM_FEATURE_M)) {
38
+ /*
39
+ * M-profile mostly catches the "FPU disabled" case early, in
40
+ * disas_m_nocp(), but a few insns (eg LCTP, WLSTP, DLSTP)
41
+ * which do coprocessor-checks are outside the large ranges of
42
+ * the encoding space handled by the patterns in m-nocp.decode,
43
+ * and for them we may need to raise NOCP here.
44
+ */
45
+ gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
46
+ syn_uncategorized(), s->fp_excp_el);
47
+ } else {
48
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
49
+ syn_fp_access_trap(1, 0xe, false),
50
+ s->fp_excp_el);
51
+ }
52
return false;
53
}
54
55
--
56
2.20.1
57
58
diff view generated by jsdifflib
Deleted patch
1
Implement the MVE LCTP instruction.
2
1
3
We put its decode and implementation with the other
4
low-overhead-branch insns because although it is only present if MVE
5
is implemented it is logically in the same group as the other LOB
6
insns.
7
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-id: 20210614151007.4545-7-peter.maydell@linaro.org
11
---
12
target/arm/t32.decode | 2 ++
13
target/arm/translate.c | 24 ++++++++++++++++++++++++
14
2 files changed, 26 insertions(+)
15
16
diff --git a/target/arm/t32.decode b/target/arm/t32.decode
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/t32.decode
19
+++ b/target/arm/t32.decode
20
@@ -XXX,XX +XXX,XX @@ BL 1111 0. .......... 11.1 ............ @branch24
21
DLS 1111 0 0000 100 rn:4 1110 0000 0000 0001
22
WLS 1111 0 0000 100 rn:4 1100 . .......... 1 imm=%lob_imm
23
LE 1111 0 0000 0 f:1 0 1111 1100 . .......... 1 imm=%lob_imm
24
+
25
+ LCTP 1111 0 0000 000 1111 1110 0000 0000 0001
26
]
27
}
28
diff --git a/target/arm/translate.c b/target/arm/translate.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/translate.c
31
+++ b/target/arm/translate.c
32
@@ -XXX,XX +XXX,XX @@ static bool trans_LE(DisasContext *s, arg_LE *a)
33
return true;
34
}
35
36
+static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
37
+{
38
+ /*
39
+ * M-profile Loop Clear with Tail Predication. Since our implementation
40
+ * doesn't cache branch information, all we need to do is reset
41
+ * FPSCR.LTPSIZE to 4.
42
+ */
43
+ TCGv_i32 ltpsize;
44
+
45
+ if (!dc_isar_feature(aa32_lob, s) ||
46
+ !dc_isar_feature(aa32_mve, s)) {
47
+ return false;
48
+ }
49
+
50
+ if (!vfp_access_check(s)) {
51
+ return true;
52
+ }
53
+
54
+ ltpsize = tcg_const_i32(4);
55
+ store_cpu_field(ltpsize, v7m.ltpsize);
56
+ return true;
57
+}
58
+
59
+
60
static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
61
{
62
TCGv_i32 addr, tmp;
63
--
64
2.20.1
65
66
diff view generated by jsdifflib
Deleted patch
1
Implement the MVE WLSTP insn; this is like the existing WLS insn,
2
except that it specifies a size value which is used to set
3
FPSCR.LTPSIZE.
4
1
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20210614151007.4545-8-peter.maydell@linaro.org
8
---
9
target/arm/t32.decode | 8 ++++++--
10
target/arm/translate.c | 37 ++++++++++++++++++++++++++++++++++++-
11
2 files changed, 42 insertions(+), 3 deletions(-)
12
13
diff --git a/target/arm/t32.decode b/target/arm/t32.decode
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/t32.decode
16
+++ b/target/arm/t32.decode
17
@@ -XXX,XX +XXX,XX @@ BL 1111 0. .......... 11.1 ............ @branch24
18
%lob_imm 1:10 11:1 !function=times_2
19
20
DLS 1111 0 0000 100 rn:4 1110 0000 0000 0001
21
- WLS 1111 0 0000 100 rn:4 1100 . .......... 1 imm=%lob_imm
22
- LE 1111 0 0000 0 f:1 0 1111 1100 . .......... 1 imm=%lob_imm
23
+ WLS 1111 0 0000 100 rn:4 1100 . .......... 1 imm=%lob_imm size=4
24
+ {
25
+ LE 1111 0 0000 0 f:1 0 1111 1100 . .......... 1 imm=%lob_imm
26
+ # This is WLSTP
27
+ WLS 1111 0 0000 0 size:2 rn:4 1100 . .......... 1 imm=%lob_imm
28
+ }
29
30
LCTP 1111 0 0000 000 1111 1110 0000 0000 0001
31
]
32
diff --git a/target/arm/translate.c b/target/arm/translate.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/translate.c
35
+++ b/target/arm/translate.c
36
@@ -XXX,XX +XXX,XX @@ static bool trans_WLS(DisasContext *s, arg_WLS *a)
37
return false;
38
}
39
if (a->rn == 13 || a->rn == 15) {
40
- /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
41
+ /*
42
+ * For WLSTP rn == 15 is a related encoding (LE); the
43
+ * other cases caught by this condition are all
44
+ * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
45
+ */
46
return false;
47
}
48
if (s->condexec_mask) {
49
@@ -XXX,XX +XXX,XX @@ static bool trans_WLS(DisasContext *s, arg_WLS *a)
50
*/
51
return false;
52
}
53
+ if (a->size != 4) {
54
+ /* WLSTP */
55
+ if (!dc_isar_feature(aa32_mve, s)) {
56
+ return false;
57
+ }
58
+ /*
59
+ * We need to check that the FPU is enabled here, but mustn't
60
+ * call vfp_access_check() to do that because we don't want to
61
+ * do the lazy state preservation in the "loop count is zero" case.
62
+ * Do the check-and-raise-exception by hand.
63
+ */
64
+ if (s->fp_excp_el) {
65
+ gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
66
+ syn_uncategorized(), s->fp_excp_el);
67
+ return true;
68
+ }
69
+ }
70
+
71
nextlabel = gen_new_label();
72
tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel);
73
tmp = load_reg(s, a->rn);
74
store_reg(s, 14, tmp);
75
+ if (a->size != 4) {
76
+ /*
77
+ * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
78
+ * lazy state preservation, new FP context creation, etc,
79
+ * that vfp_access_check() does. We know that the actual
80
+ * access check will succeed (ie it won't generate code that
81
+ * throws an exception) because we did that check by hand earlier.
82
+ */
83
+ bool ok = vfp_access_check(s);
84
+ assert(ok);
85
+ tmp = tcg_const_i32(a->size);
86
+ store_cpu_field(tmp, v7m.ltpsize);
87
+ }
88
gen_jmp_tb(s, s->base.pc_next, 1);
89
90
gen_set_label(nextlabel);
91
--
92
2.20.1
93
94
diff view generated by jsdifflib
Deleted patch
1
Implement the MVE DLSTP insn; this is like the existing DLS
2
insn, except that it must do an FPU access check and it
3
sets LTPSIZE to the value specified in the insn.
4
1
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20210614151007.4545-9-peter.maydell@linaro.org
8
---
9
target/arm/t32.decode | 9 ++++++---
10
target/arm/translate.c | 23 +++++++++++++++++++++--
11
2 files changed, 27 insertions(+), 5 deletions(-)
12
13
diff --git a/target/arm/t32.decode b/target/arm/t32.decode
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/t32.decode
16
+++ b/target/arm/t32.decode
17
@@ -XXX,XX +XXX,XX @@ BL 1111 0. .......... 11.1 ............ @branch24
18
# LE and WLS immediate
19
%lob_imm 1:10 11:1 !function=times_2
20
21
- DLS 1111 0 0000 100 rn:4 1110 0000 0000 0001
22
+ DLS 1111 0 0000 100 rn:4 1110 0000 0000 0001 size=4
23
WLS 1111 0 0000 100 rn:4 1100 . .......... 1 imm=%lob_imm size=4
24
{
25
LE 1111 0 0000 0 f:1 0 1111 1100 . .......... 1 imm=%lob_imm
26
# This is WLSTP
27
WLS 1111 0 0000 0 size:2 rn:4 1100 . .......... 1 imm=%lob_imm
28
}
29
-
30
- LCTP 1111 0 0000 000 1111 1110 0000 0000 0001
31
+ {
32
+ LCTP 1111 0 0000 000 1111 1110 0000 0000 0001
33
+ # This is DLSTP
34
+ DLS 1111 0 0000 0 size:2 rn:4 1110 0000 0000 0001
35
+ }
36
]
37
}
38
diff --git a/target/arm/translate.c b/target/arm/translate.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/translate.c
41
+++ b/target/arm/translate.c
42
@@ -XXX,XX +XXX,XX @@ static bool trans_DLS(DisasContext *s, arg_DLS *a)
43
return false;
44
}
45
if (a->rn == 13 || a->rn == 15) {
46
- /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
47
+ /*
48
+ * For DLSTP rn == 15 is a related encoding (LCTP); the
49
+ * other cases caught by this condition are all
50
+ * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
51
+ */
52
return false;
53
}
54
55
- /* Not a while loop, no tail predication: just set LR to the count */
56
+ if (a->size != 4) {
57
+ /* DLSTP */
58
+ if (!dc_isar_feature(aa32_mve, s)) {
59
+ return false;
60
+ }
61
+ if (!vfp_access_check(s)) {
62
+ return true;
63
+ }
64
+ }
65
+
66
+ /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
67
tmp = load_reg(s, a->rn);
68
store_reg(s, 14, tmp);
69
+ if (a->size != 4) {
70
+ /* DLSTP: set FPSCR.LTPSIZE */
71
+ tmp = tcg_const_i32(a->size);
72
+ store_cpu_field(tmp, v7m.ltpsize);
73
+ }
74
return true;
75
}
76
77
--
78
2.20.1
79
80
diff view generated by jsdifflib
Deleted patch
1
Implement the MVE LETP insn. This is like the existing LE loop-end
2
insn, but it must perform an FPU-enabled check, and on loop-exit it
3
resets LTPSIZE to 4.
4
1
5
To accommodate the requirement to do something on loop-exit, we drop
6
the use of condlabel and instead manage both the TB exits manually,
7
in the same way we already do in trans_WLS().
8
9
The other MVE-specific change to the LE insn is that we must raise an
10
INVSTATE UsageFault insn if LTPSIZE is not 4.
11
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
Message-id: 20210614151007.4545-10-peter.maydell@linaro.org
15
---
16
target/arm/t32.decode | 2 +-
17
target/arm/translate.c | 104 +++++++++++++++++++++++++++++++++++++----
18
2 files changed, 97 insertions(+), 9 deletions(-)
19
20
diff --git a/target/arm/t32.decode b/target/arm/t32.decode
21
index XXXXXXX..XXXXXXX 100644
22
--- a/target/arm/t32.decode
23
+++ b/target/arm/t32.decode
24
@@ -XXX,XX +XXX,XX @@ BL 1111 0. .......... 11.1 ............ @branch24
25
DLS 1111 0 0000 100 rn:4 1110 0000 0000 0001 size=4
26
WLS 1111 0 0000 100 rn:4 1100 . .......... 1 imm=%lob_imm size=4
27
{
28
- LE 1111 0 0000 0 f:1 0 1111 1100 . .......... 1 imm=%lob_imm
29
+ LE 1111 0 0000 0 f:1 tp:1 1111 1100 . .......... 1 imm=%lob_imm
30
# This is WLSTP
31
WLS 1111 0 0000 0 size:2 rn:4 1100 . .......... 1 imm=%lob_imm
32
}
33
diff --git a/target/arm/translate.c b/target/arm/translate.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/target/arm/translate.c
36
+++ b/target/arm/translate.c
37
@@ -XXX,XX +XXX,XX @@ static bool trans_LE(DisasContext *s, arg_LE *a)
38
* any faster.
39
*/
40
TCGv_i32 tmp;
41
+ TCGLabel *loopend;
42
+ bool fpu_active;
43
44
if (!dc_isar_feature(aa32_lob, s)) {
45
return false;
46
}
47
+ if (a->f && a->tp) {
48
+ return false;
49
+ }
50
+ if (s->condexec_mask) {
51
+ /*
52
+ * LE in an IT block is CONSTRAINED UNPREDICTABLE;
53
+ * we choose to UNDEF, because otherwise our use of
54
+ * gen_goto_tb(1) would clash with the use of TB exit 1
55
+ * in the dc->condjmp condition-failed codepath in
56
+ * arm_tr_tb_stop() and we'd get an assertion.
57
+ */
58
+ return false;
59
+ }
60
+ if (a->tp) {
61
+ /* LETP */
62
+ if (!dc_isar_feature(aa32_mve, s)) {
63
+ return false;
64
+ }
65
+ if (!vfp_access_check(s)) {
66
+ s->eci_handled = true;
67
+ return true;
68
+ }
69
+ }
70
71
/* LE/LETP is OK with ECI set and leaves it untouched */
72
s->eci_handled = true;
73
74
- if (!a->f) {
75
- /* Not loop-forever. If LR <= 1 this is the last loop: do nothing. */
76
- arm_gen_condlabel(s);
77
- tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, s->condlabel);
78
- /* Decrement LR */
79
- tmp = load_reg(s, 14);
80
- tcg_gen_addi_i32(tmp, tmp, -1);
81
- store_reg(s, 14, tmp);
82
+ /*
83
+ * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
84
+ * UsageFault exception for the LE insn in that case. Note that we
85
+ * are not directly checking FPSCR.LTPSIZE but instead check the
86
+ * pseudocode LTPSIZE() function, which returns 4 if the FPU is
87
+ * not currently active (ie ActiveFPState() returns false). We
88
+ * can identify not-active purely from our TB state flags, as the
89
+ * FPU is active only if:
90
+ * the FPU is enabled
91
+ * AND lazy state preservation is not active
92
+ * AND we do not need a new fp context (this is the ASPEN/FPCA check)
93
+ *
94
+ * Usually we don't need to care about this distinction between
95
+ * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
96
+ * will either take an exception or clear the conditions that make
97
+ * the FPU not active. But LE is an unusual case of a non-FP insn
98
+ * that looks at LTPSIZE.
99
+ */
100
+ fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
101
+
102
+ if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
103
+ /* Need to do a runtime check for LTPSIZE != 4 */
104
+ TCGLabel *skipexc = gen_new_label();
105
+ tmp = load_cpu_field(v7m.ltpsize);
106
+ tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc);
107
+ tcg_temp_free_i32(tmp);
108
+ gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
109
+ default_exception_el(s));
110
+ gen_set_label(skipexc);
111
+ }
112
+
113
+ if (a->f) {
114
+ /* Loop-forever: just jump back to the loop start */
115
+ gen_jmp(s, read_pc(s) - a->imm);
116
+ return true;
117
+ }
118
+
119
+ /*
120
+ * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
121
+ * For LE, we know at this point that LTPSIZE must be 4 and the
122
+ * loop decrement value is 1. For LETP we need to calculate the decrement
123
+ * value from LTPSIZE.
124
+ */
125
+ loopend = gen_new_label();
126
+ if (!a->tp) {
127
+ tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend);
128
+ tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
129
+ } else {
130
+ /*
131
+ * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
132
+ * so that decr stays live after the brcondi.
133
+ */
134
+ TCGv_i32 decr = tcg_temp_local_new_i32();
135
+ TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
136
+ tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
137
+ tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
138
+ tcg_temp_free_i32(ltpsize);
139
+
140
+ tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend);
141
+
142
+ tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
143
+ tcg_temp_free_i32(decr);
144
}
145
/* Jump back to the loop start */
146
gen_jmp(s, read_pc(s) - a->imm);
147
+
148
+ gen_set_label(loopend);
149
+ if (a->tp) {
150
+ /* Exits from tail-pred loops must reset LTPSIZE to 4 */
151
+ tmp = tcg_const_i32(4);
152
+ store_cpu_field(tmp, v7m.ltpsize);
153
+ }
154
+ /* End TB, continuing to following insn */
155
+ gen_jmp_tb(s, s->base.pc_next, 1);
156
return true;
157
}
158
159
--
160
2.20.1
161
162
diff view generated by jsdifflib
Deleted patch
1
Add the framework for decoding MVE insns, with the necessary new
2
files and the meson.build rules, but no actual content yet.
3
1
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20210614151007.4545-11-peter.maydell@linaro.org
7
---
8
target/arm/translate-a32.h | 1 +
9
target/arm/mve.decode | 20 ++++++++++++++++++++
10
target/arm/translate-mve.c | 29 +++++++++++++++++++++++++++++
11
target/arm/translate.c | 1 +
12
target/arm/meson.build | 2 ++
13
5 files changed, 53 insertions(+)
14
create mode 100644 target/arm/mve.decode
15
create mode 100644 target/arm/translate-mve.c
16
17
diff --git a/target/arm/translate-a32.h b/target/arm/translate-a32.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/translate-a32.h
20
+++ b/target/arm/translate-a32.h
21
@@ -XXX,XX +XXX,XX @@
22
23
/* Prototypes for autogenerated disassembler functions */
24
bool disas_m_nocp(DisasContext *dc, uint32_t insn);
25
+bool disas_mve(DisasContext *dc, uint32_t insn);
26
bool disas_vfp(DisasContext *s, uint32_t insn);
27
bool disas_vfp_uncond(DisasContext *s, uint32_t insn);
28
bool disas_neon_dp(DisasContext *s, uint32_t insn);
29
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
30
new file mode 100644
31
index XXXXXXX..XXXXXXX
32
--- /dev/null
33
+++ b/target/arm/mve.decode
34
@@ -XXX,XX +XXX,XX @@
35
+# M-profile MVE instruction descriptions
36
+#
37
+# Copyright (c) 2021 Linaro, Ltd
38
+#
39
+# This library is free software; you can redistribute it and/or
40
+# modify it under the terms of the GNU Lesser General Public
41
+# License as published by the Free Software Foundation; either
42
+# version 2.1 of the License, or (at your option) any later version.
43
+#
44
+# This library is distributed in the hope that it will be useful,
45
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
46
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
47
+# Lesser General Public License for more details.
48
+#
49
+# You should have received a copy of the GNU Lesser General Public
50
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
51
+
52
+#
53
+# This file is processed by scripts/decodetree.py
54
+#
55
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
56
new file mode 100644
57
index XXXXXXX..XXXXXXX
58
--- /dev/null
59
+++ b/target/arm/translate-mve.c
60
@@ -XXX,XX +XXX,XX @@
61
+/*
62
+ * ARM translation: M-profile MVE instructions
63
+ *
64
+ * Copyright (c) 2021 Linaro, Ltd.
65
+ *
66
+ * This library is free software; you can redistribute it and/or
67
+ * modify it under the terms of the GNU Lesser General Public
68
+ * License as published by the Free Software Foundation; either
69
+ * version 2.1 of the License, or (at your option) any later version.
70
+ *
71
+ * This library is distributed in the hope that it will be useful,
72
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
73
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
74
+ * Lesser General Public License for more details.
75
+ *
76
+ * You should have received a copy of the GNU Lesser General Public
77
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
78
+ */
79
+
80
+#include "qemu/osdep.h"
81
+#include "tcg/tcg-op.h"
82
+#include "tcg/tcg-op-gvec.h"
83
+#include "exec/exec-all.h"
84
+#include "exec/gen-icount.h"
85
+#include "translate.h"
86
+#include "translate-a32.h"
87
+
88
+/* Include the generated decoder */
89
+#include "decode-mve.c.inc"
90
diff --git a/target/arm/translate.c b/target/arm/translate.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/arm/translate.c
93
+++ b/target/arm/translate.c
94
@@ -XXX,XX +XXX,XX @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
95
if (disas_t32(s, insn) ||
96
disas_vfp_uncond(s, insn) ||
97
disas_neon_shared(s, insn) ||
98
+ disas_mve(s, insn) ||
99
((insn >> 28) == 0xe && disas_vfp(s, insn))) {
100
return;
101
}
102
diff --git a/target/arm/meson.build b/target/arm/meson.build
103
index XXXXXXX..XXXXXXX 100644
104
--- a/target/arm/meson.build
105
+++ b/target/arm/meson.build
106
@@ -XXX,XX +XXX,XX @@ gen = [
107
decodetree.process('vfp.decode', extra_args: '--decode=disas_vfp'),
108
decodetree.process('vfp-uncond.decode', extra_args: '--decode=disas_vfp_uncond'),
109
decodetree.process('m-nocp.decode', extra_args: '--decode=disas_m_nocp'),
110
+ decodetree.process('mve.decode', extra_args: '--decode=disas_mve'),
111
decodetree.process('a32.decode', extra_args: '--static-decode=disas_a32'),
112
decodetree.process('a32-uncond.decode', extra_args: '--static-decode=disas_a32_uncond'),
113
decodetree.process('t32.decode', extra_args: '--static-decode=disas_t32'),
114
@@ -XXX,XX +XXX,XX @@ arm_ss.add(files(
115
'tlb_helper.c',
116
'translate.c',
117
'translate-m-nocp.c',
118
+ 'translate-mve.c',
119
'translate-neon.c',
120
'translate-vfp.c',
121
'vec_helper.c',
122
--
123
2.20.1
124
125
diff view generated by jsdifflib
Deleted patch
1
For MVE, we want to re-use the large data table from expand_pred_b().
2
Move the data table to vec_helper.c so it is no longer in an SVE
3
specific source file.
4
1
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20210614151007.4545-14-peter.maydell@linaro.org
8
---
9
target/arm/vec_internal.h | 3 ++
10
target/arm/sve_helper.c | 103 ++------------------------------------
11
target/arm/vec_helper.c | 102 +++++++++++++++++++++++++++++++++++++
12
3 files changed, 109 insertions(+), 99 deletions(-)
13
14
diff --git a/target/arm/vec_internal.h b/target/arm/vec_internal.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/vec_internal.h
17
+++ b/target/arm/vec_internal.h
18
@@ -XXX,XX +XXX,XX @@
19
#define H8(x) (x)
20
#define H1_8(x) (x)
21
22
+/* Data for expanding active predicate bits to bytes, for byte elements. */
23
+extern const uint64_t expand_pred_b_data[256];
24
+
25
static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
26
{
27
uint64_t *d = vd + opr_sz;
28
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/sve_helper.c
31
+++ b/target/arm/sve_helper.c
32
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(sve_predtest)(void *vd, void *vg, uint32_t words)
33
return flags;
34
}
35
36
-/* Expand active predicate bits to bytes, for byte elements.
37
- * for (i = 0; i < 256; ++i) {
38
- * unsigned long m = 0;
39
- * for (j = 0; j < 8; j++) {
40
- * if ((i >> j) & 1) {
41
- * m |= 0xfful << (j << 3);
42
- * }
43
- * }
44
- * printf("0x%016lx,\n", m);
45
- * }
46
+/*
47
+ * Expand active predicate bits to bytes, for byte elements.
48
+ * (The data table itself is in vec_helper.c as MVE also needs it.)
49
*/
50
static inline uint64_t expand_pred_b(uint8_t byte)
51
{
52
- static const uint64_t word[256] = {
53
- 0x0000000000000000, 0x00000000000000ff, 0x000000000000ff00,
54
- 0x000000000000ffff, 0x0000000000ff0000, 0x0000000000ff00ff,
55
- 0x0000000000ffff00, 0x0000000000ffffff, 0x00000000ff000000,
56
- 0x00000000ff0000ff, 0x00000000ff00ff00, 0x00000000ff00ffff,
57
- 0x00000000ffff0000, 0x00000000ffff00ff, 0x00000000ffffff00,
58
- 0x00000000ffffffff, 0x000000ff00000000, 0x000000ff000000ff,
59
- 0x000000ff0000ff00, 0x000000ff0000ffff, 0x000000ff00ff0000,
60
- 0x000000ff00ff00ff, 0x000000ff00ffff00, 0x000000ff00ffffff,
61
- 0x000000ffff000000, 0x000000ffff0000ff, 0x000000ffff00ff00,
62
- 0x000000ffff00ffff, 0x000000ffffff0000, 0x000000ffffff00ff,
63
- 0x000000ffffffff00, 0x000000ffffffffff, 0x0000ff0000000000,
64
- 0x0000ff00000000ff, 0x0000ff000000ff00, 0x0000ff000000ffff,
65
- 0x0000ff0000ff0000, 0x0000ff0000ff00ff, 0x0000ff0000ffff00,
66
- 0x0000ff0000ffffff, 0x0000ff00ff000000, 0x0000ff00ff0000ff,
67
- 0x0000ff00ff00ff00, 0x0000ff00ff00ffff, 0x0000ff00ffff0000,
68
- 0x0000ff00ffff00ff, 0x0000ff00ffffff00, 0x0000ff00ffffffff,
69
- 0x0000ffff00000000, 0x0000ffff000000ff, 0x0000ffff0000ff00,
70
- 0x0000ffff0000ffff, 0x0000ffff00ff0000, 0x0000ffff00ff00ff,
71
- 0x0000ffff00ffff00, 0x0000ffff00ffffff, 0x0000ffffff000000,
72
- 0x0000ffffff0000ff, 0x0000ffffff00ff00, 0x0000ffffff00ffff,
73
- 0x0000ffffffff0000, 0x0000ffffffff00ff, 0x0000ffffffffff00,
74
- 0x0000ffffffffffff, 0x00ff000000000000, 0x00ff0000000000ff,
75
- 0x00ff00000000ff00, 0x00ff00000000ffff, 0x00ff000000ff0000,
76
- 0x00ff000000ff00ff, 0x00ff000000ffff00, 0x00ff000000ffffff,
77
- 0x00ff0000ff000000, 0x00ff0000ff0000ff, 0x00ff0000ff00ff00,
78
- 0x00ff0000ff00ffff, 0x00ff0000ffff0000, 0x00ff0000ffff00ff,
79
- 0x00ff0000ffffff00, 0x00ff0000ffffffff, 0x00ff00ff00000000,
80
- 0x00ff00ff000000ff, 0x00ff00ff0000ff00, 0x00ff00ff0000ffff,
81
- 0x00ff00ff00ff0000, 0x00ff00ff00ff00ff, 0x00ff00ff00ffff00,
82
- 0x00ff00ff00ffffff, 0x00ff00ffff000000, 0x00ff00ffff0000ff,
83
- 0x00ff00ffff00ff00, 0x00ff00ffff00ffff, 0x00ff00ffffff0000,
84
- 0x00ff00ffffff00ff, 0x00ff00ffffffff00, 0x00ff00ffffffffff,
85
- 0x00ffff0000000000, 0x00ffff00000000ff, 0x00ffff000000ff00,
86
- 0x00ffff000000ffff, 0x00ffff0000ff0000, 0x00ffff0000ff00ff,
87
- 0x00ffff0000ffff00, 0x00ffff0000ffffff, 0x00ffff00ff000000,
88
- 0x00ffff00ff0000ff, 0x00ffff00ff00ff00, 0x00ffff00ff00ffff,
89
- 0x00ffff00ffff0000, 0x00ffff00ffff00ff, 0x00ffff00ffffff00,
90
- 0x00ffff00ffffffff, 0x00ffffff00000000, 0x00ffffff000000ff,
91
- 0x00ffffff0000ff00, 0x00ffffff0000ffff, 0x00ffffff00ff0000,
92
- 0x00ffffff00ff00ff, 0x00ffffff00ffff00, 0x00ffffff00ffffff,
93
- 0x00ffffffff000000, 0x00ffffffff0000ff, 0x00ffffffff00ff00,
94
- 0x00ffffffff00ffff, 0x00ffffffffff0000, 0x00ffffffffff00ff,
95
- 0x00ffffffffffff00, 0x00ffffffffffffff, 0xff00000000000000,
96
- 0xff000000000000ff, 0xff0000000000ff00, 0xff0000000000ffff,
97
- 0xff00000000ff0000, 0xff00000000ff00ff, 0xff00000000ffff00,
98
- 0xff00000000ffffff, 0xff000000ff000000, 0xff000000ff0000ff,
99
- 0xff000000ff00ff00, 0xff000000ff00ffff, 0xff000000ffff0000,
100
- 0xff000000ffff00ff, 0xff000000ffffff00, 0xff000000ffffffff,
101
- 0xff0000ff00000000, 0xff0000ff000000ff, 0xff0000ff0000ff00,
102
- 0xff0000ff0000ffff, 0xff0000ff00ff0000, 0xff0000ff00ff00ff,
103
- 0xff0000ff00ffff00, 0xff0000ff00ffffff, 0xff0000ffff000000,
104
- 0xff0000ffff0000ff, 0xff0000ffff00ff00, 0xff0000ffff00ffff,
105
- 0xff0000ffffff0000, 0xff0000ffffff00ff, 0xff0000ffffffff00,
106
- 0xff0000ffffffffff, 0xff00ff0000000000, 0xff00ff00000000ff,
107
- 0xff00ff000000ff00, 0xff00ff000000ffff, 0xff00ff0000ff0000,
108
- 0xff00ff0000ff00ff, 0xff00ff0000ffff00, 0xff00ff0000ffffff,
109
- 0xff00ff00ff000000, 0xff00ff00ff0000ff, 0xff00ff00ff00ff00,
110
- 0xff00ff00ff00ffff, 0xff00ff00ffff0000, 0xff00ff00ffff00ff,
111
- 0xff00ff00ffffff00, 0xff00ff00ffffffff, 0xff00ffff00000000,
112
- 0xff00ffff000000ff, 0xff00ffff0000ff00, 0xff00ffff0000ffff,
113
- 0xff00ffff00ff0000, 0xff00ffff00ff00ff, 0xff00ffff00ffff00,
114
- 0xff00ffff00ffffff, 0xff00ffffff000000, 0xff00ffffff0000ff,
115
- 0xff00ffffff00ff00, 0xff00ffffff00ffff, 0xff00ffffffff0000,
116
- 0xff00ffffffff00ff, 0xff00ffffffffff00, 0xff00ffffffffffff,
117
- 0xffff000000000000, 0xffff0000000000ff, 0xffff00000000ff00,
118
- 0xffff00000000ffff, 0xffff000000ff0000, 0xffff000000ff00ff,
119
- 0xffff000000ffff00, 0xffff000000ffffff, 0xffff0000ff000000,
120
- 0xffff0000ff0000ff, 0xffff0000ff00ff00, 0xffff0000ff00ffff,
121
- 0xffff0000ffff0000, 0xffff0000ffff00ff, 0xffff0000ffffff00,
122
- 0xffff0000ffffffff, 0xffff00ff00000000, 0xffff00ff000000ff,
123
- 0xffff00ff0000ff00, 0xffff00ff0000ffff, 0xffff00ff00ff0000,
124
- 0xffff00ff00ff00ff, 0xffff00ff00ffff00, 0xffff00ff00ffffff,
125
- 0xffff00ffff000000, 0xffff00ffff0000ff, 0xffff00ffff00ff00,
126
- 0xffff00ffff00ffff, 0xffff00ffffff0000, 0xffff00ffffff00ff,
127
- 0xffff00ffffffff00, 0xffff00ffffffffff, 0xffffff0000000000,
128
- 0xffffff00000000ff, 0xffffff000000ff00, 0xffffff000000ffff,
129
- 0xffffff0000ff0000, 0xffffff0000ff00ff, 0xffffff0000ffff00,
130
- 0xffffff0000ffffff, 0xffffff00ff000000, 0xffffff00ff0000ff,
131
- 0xffffff00ff00ff00, 0xffffff00ff00ffff, 0xffffff00ffff0000,
132
- 0xffffff00ffff00ff, 0xffffff00ffffff00, 0xffffff00ffffffff,
133
- 0xffffffff00000000, 0xffffffff000000ff, 0xffffffff0000ff00,
134
- 0xffffffff0000ffff, 0xffffffff00ff0000, 0xffffffff00ff00ff,
135
- 0xffffffff00ffff00, 0xffffffff00ffffff, 0xffffffffff000000,
136
- 0xffffffffff0000ff, 0xffffffffff00ff00, 0xffffffffff00ffff,
137
- 0xffffffffffff0000, 0xffffffffffff00ff, 0xffffffffffffff00,
138
- 0xffffffffffffffff,
139
- };
140
- return word[byte];
141
+ return expand_pred_b_data[byte];
142
}
143
144
/* Similarly for half-word elements.
145
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
146
index XXXXXXX..XXXXXXX 100644
147
--- a/target/arm/vec_helper.c
148
+++ b/target/arm/vec_helper.c
149
@@ -XXX,XX +XXX,XX @@
150
#include "qemu/int128.h"
151
#include "vec_internal.h"
152
153
+/*
154
+ * Data for expanding active predicate bits to bytes, for byte elements.
155
+ *
156
+ * for (i = 0; i < 256; ++i) {
157
+ * unsigned long m = 0;
158
+ * for (j = 0; j < 8; j++) {
159
+ * if ((i >> j) & 1) {
160
+ * m |= 0xfful << (j << 3);
161
+ * }
162
+ * }
163
+ * printf("0x%016lx,\n", m);
164
+ * }
165
+ */
166
+const uint64_t expand_pred_b_data[256] = {
167
+ 0x0000000000000000, 0x00000000000000ff, 0x000000000000ff00,
168
+ 0x000000000000ffff, 0x0000000000ff0000, 0x0000000000ff00ff,
169
+ 0x0000000000ffff00, 0x0000000000ffffff, 0x00000000ff000000,
170
+ 0x00000000ff0000ff, 0x00000000ff00ff00, 0x00000000ff00ffff,
171
+ 0x00000000ffff0000, 0x00000000ffff00ff, 0x00000000ffffff00,
172
+ 0x00000000ffffffff, 0x000000ff00000000, 0x000000ff000000ff,
173
+ 0x000000ff0000ff00, 0x000000ff0000ffff, 0x000000ff00ff0000,
174
+ 0x000000ff00ff00ff, 0x000000ff00ffff00, 0x000000ff00ffffff,
175
+ 0x000000ffff000000, 0x000000ffff0000ff, 0x000000ffff00ff00,
176
+ 0x000000ffff00ffff, 0x000000ffffff0000, 0x000000ffffff00ff,
177
+ 0x000000ffffffff00, 0x000000ffffffffff, 0x0000ff0000000000,
178
+ 0x0000ff00000000ff, 0x0000ff000000ff00, 0x0000ff000000ffff,
179
+ 0x0000ff0000ff0000, 0x0000ff0000ff00ff, 0x0000ff0000ffff00,
180
+ 0x0000ff0000ffffff, 0x0000ff00ff000000, 0x0000ff00ff0000ff,
181
+ 0x0000ff00ff00ff00, 0x0000ff00ff00ffff, 0x0000ff00ffff0000,
182
+ 0x0000ff00ffff00ff, 0x0000ff00ffffff00, 0x0000ff00ffffffff,
183
+ 0x0000ffff00000000, 0x0000ffff000000ff, 0x0000ffff0000ff00,
184
+ 0x0000ffff0000ffff, 0x0000ffff00ff0000, 0x0000ffff00ff00ff,
185
+ 0x0000ffff00ffff00, 0x0000ffff00ffffff, 0x0000ffffff000000,
186
+ 0x0000ffffff0000ff, 0x0000ffffff00ff00, 0x0000ffffff00ffff,
187
+ 0x0000ffffffff0000, 0x0000ffffffff00ff, 0x0000ffffffffff00,
188
+ 0x0000ffffffffffff, 0x00ff000000000000, 0x00ff0000000000ff,
189
+ 0x00ff00000000ff00, 0x00ff00000000ffff, 0x00ff000000ff0000,
190
+ 0x00ff000000ff00ff, 0x00ff000000ffff00, 0x00ff000000ffffff,
191
+ 0x00ff0000ff000000, 0x00ff0000ff0000ff, 0x00ff0000ff00ff00,
192
+ 0x00ff0000ff00ffff, 0x00ff0000ffff0000, 0x00ff0000ffff00ff,
193
+ 0x00ff0000ffffff00, 0x00ff0000ffffffff, 0x00ff00ff00000000,
194
+ 0x00ff00ff000000ff, 0x00ff00ff0000ff00, 0x00ff00ff0000ffff,
195
+ 0x00ff00ff00ff0000, 0x00ff00ff00ff00ff, 0x00ff00ff00ffff00,
196
+ 0x00ff00ff00ffffff, 0x00ff00ffff000000, 0x00ff00ffff0000ff,
197
+ 0x00ff00ffff00ff00, 0x00ff00ffff00ffff, 0x00ff00ffffff0000,
198
+ 0x00ff00ffffff00ff, 0x00ff00ffffffff00, 0x00ff00ffffffffff,
199
+ 0x00ffff0000000000, 0x00ffff00000000ff, 0x00ffff000000ff00,
200
+ 0x00ffff000000ffff, 0x00ffff0000ff0000, 0x00ffff0000ff00ff,
201
+ 0x00ffff0000ffff00, 0x00ffff0000ffffff, 0x00ffff00ff000000,
202
+ 0x00ffff00ff0000ff, 0x00ffff00ff00ff00, 0x00ffff00ff00ffff,
203
+ 0x00ffff00ffff0000, 0x00ffff00ffff00ff, 0x00ffff00ffffff00,
204
+ 0x00ffff00ffffffff, 0x00ffffff00000000, 0x00ffffff000000ff,
205
+ 0x00ffffff0000ff00, 0x00ffffff0000ffff, 0x00ffffff00ff0000,
206
+ 0x00ffffff00ff00ff, 0x00ffffff00ffff00, 0x00ffffff00ffffff,
207
+ 0x00ffffffff000000, 0x00ffffffff0000ff, 0x00ffffffff00ff00,
208
+ 0x00ffffffff00ffff, 0x00ffffffffff0000, 0x00ffffffffff00ff,
209
+ 0x00ffffffffffff00, 0x00ffffffffffffff, 0xff00000000000000,
210
+ 0xff000000000000ff, 0xff0000000000ff00, 0xff0000000000ffff,
211
+ 0xff00000000ff0000, 0xff00000000ff00ff, 0xff00000000ffff00,
212
+ 0xff00000000ffffff, 0xff000000ff000000, 0xff000000ff0000ff,
213
+ 0xff000000ff00ff00, 0xff000000ff00ffff, 0xff000000ffff0000,
214
+ 0xff000000ffff00ff, 0xff000000ffffff00, 0xff000000ffffffff,
215
+ 0xff0000ff00000000, 0xff0000ff000000ff, 0xff0000ff0000ff00,
216
+ 0xff0000ff0000ffff, 0xff0000ff00ff0000, 0xff0000ff00ff00ff,
217
+ 0xff0000ff00ffff00, 0xff0000ff00ffffff, 0xff0000ffff000000,
218
+ 0xff0000ffff0000ff, 0xff0000ffff00ff00, 0xff0000ffff00ffff,
219
+ 0xff0000ffffff0000, 0xff0000ffffff00ff, 0xff0000ffffffff00,
220
+ 0xff0000ffffffffff, 0xff00ff0000000000, 0xff00ff00000000ff,
221
+ 0xff00ff000000ff00, 0xff00ff000000ffff, 0xff00ff0000ff0000,
222
+ 0xff00ff0000ff00ff, 0xff00ff0000ffff00, 0xff00ff0000ffffff,
223
+ 0xff00ff00ff000000, 0xff00ff00ff0000ff, 0xff00ff00ff00ff00,
224
+ 0xff00ff00ff00ffff, 0xff00ff00ffff0000, 0xff00ff00ffff00ff,
225
+ 0xff00ff00ffffff00, 0xff00ff00ffffffff, 0xff00ffff00000000,
226
+ 0xff00ffff000000ff, 0xff00ffff0000ff00, 0xff00ffff0000ffff,
227
+ 0xff00ffff00ff0000, 0xff00ffff00ff00ff, 0xff00ffff00ffff00,
228
+ 0xff00ffff00ffffff, 0xff00ffffff000000, 0xff00ffffff0000ff,
229
+ 0xff00ffffff00ff00, 0xff00ffffff00ffff, 0xff00ffffffff0000,
230
+ 0xff00ffffffff00ff, 0xff00ffffffffff00, 0xff00ffffffffffff,
231
+ 0xffff000000000000, 0xffff0000000000ff, 0xffff00000000ff00,
232
+ 0xffff00000000ffff, 0xffff000000ff0000, 0xffff000000ff00ff,
233
+ 0xffff000000ffff00, 0xffff000000ffffff, 0xffff0000ff000000,
234
+ 0xffff0000ff0000ff, 0xffff0000ff00ff00, 0xffff0000ff00ffff,
235
+ 0xffff0000ffff0000, 0xffff0000ffff00ff, 0xffff0000ffffff00,
236
+ 0xffff0000ffffffff, 0xffff00ff00000000, 0xffff00ff000000ff,
237
+ 0xffff00ff0000ff00, 0xffff00ff0000ffff, 0xffff00ff00ff0000,
238
+ 0xffff00ff00ff00ff, 0xffff00ff00ffff00, 0xffff00ff00ffffff,
239
+ 0xffff00ffff000000, 0xffff00ffff0000ff, 0xffff00ffff00ff00,
240
+ 0xffff00ffff00ffff, 0xffff00ffffff0000, 0xffff00ffffff00ff,
241
+ 0xffff00ffffffff00, 0xffff00ffffffffff, 0xffffff0000000000,
242
+ 0xffffff00000000ff, 0xffffff000000ff00, 0xffffff000000ffff,
243
+ 0xffffff0000ff0000, 0xffffff0000ff00ff, 0xffffff0000ffff00,
244
+ 0xffffff0000ffffff, 0xffffff00ff000000, 0xffffff00ff0000ff,
245
+ 0xffffff00ff00ff00, 0xffffff00ff00ffff, 0xffffff00ffff0000,
246
+ 0xffffff00ffff00ff, 0xffffff00ffffff00, 0xffffff00ffffffff,
247
+ 0xffffffff00000000, 0xffffffff000000ff, 0xffffffff0000ff00,
248
+ 0xffffffff0000ffff, 0xffffffff00ff0000, 0xffffffff00ff00ff,
249
+ 0xffffffff00ffff00, 0xffffffff00ffffff, 0xffffffffff000000,
250
+ 0xffffffffff0000ff, 0xffffffffff00ff00, 0xffffffffff00ffff,
251
+ 0xffffffffffff0000, 0xffffffffffff00ff, 0xffffffffffffff00,
252
+ 0xffffffffffffffff,
253
+};
254
+
255
/* Signed saturating rounding doubling multiply-accumulate high half, 8-bit */
256
int8_t do_sqrdmlah_b(int8_t src1, int8_t src2, int8_t src3,
257
bool neg, bool round)
258
--
259
2.20.1
260
261
diff view generated by jsdifflib
Deleted patch
1
Currently the ARM SVE helper code defines locally some utility
2
functions for swapping 16-bit halfwords within 32-bit or 64-bit
3
values and for swapping 32-bit words within 64-bit values,
4
parallel to the byte-swapping bswap16/32/64 functions.
5
1
6
We want these also for the ARM MVE code, and they're potentially
7
generally useful for other targets, so move them to bitops.h.
8
(We don't put them in bswap.h with the bswap* functions because
9
they are implemented in terms of the rotate operations also
10
defined in bitops.h, and including bitops.h from bswap.h seems
11
better avoided.)
12
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
16
Message-id: 20210614151007.4545-17-peter.maydell@linaro.org
17
---
18
include/qemu/bitops.h | 29 +++++++++++++++++++++++++++++
19
target/arm/sve_helper.c | 20 --------------------
20
2 files changed, 29 insertions(+), 20 deletions(-)
21
22
diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/include/qemu/bitops.h
25
+++ b/include/qemu/bitops.h
26
@@ -XXX,XX +XXX,XX @@ static inline uint64_t ror64(uint64_t word, unsigned int shift)
27
return (word >> shift) | (word << ((64 - shift) & 63));
28
}
29
30
+/**
31
+ * hswap32 - swap 16-bit halfwords within a 32-bit value
32
+ * @h: value to swap
33
+ */
34
+static inline uint32_t hswap32(uint32_t h)
35
+{
36
+ return rol32(h, 16);
37
+}
38
+
39
+/**
40
+ * hswap64 - swap 16-bit halfwords within a 64-bit value
41
+ * @h: value to swap
42
+ */
43
+static inline uint64_t hswap64(uint64_t h)
44
+{
45
+ uint64_t m = 0x0000ffff0000ffffull;
46
+ h = rol64(h, 32);
47
+ return ((h & m) << 16) | ((h >> 16) & m);
48
+}
49
+
50
+/**
51
+ * wswap64 - swap 32-bit words within a 64-bit value
52
+ * @h: value to swap
53
+ */
54
+static inline uint64_t wswap64(uint64_t h)
55
+{
56
+ return rol64(h, 32);
57
+}
58
+
59
/**
60
* extract32:
61
* @value: the value to extract the bit field from
62
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/arm/sve_helper.c
65
+++ b/target/arm/sve_helper.c
66
@@ -XXX,XX +XXX,XX @@ static inline uint64_t expand_pred_s(uint8_t byte)
67
return word[byte & 0x11];
68
}
69
70
-/* Swap 16-bit words within a 32-bit word. */
71
-static inline uint32_t hswap32(uint32_t h)
72
-{
73
- return rol32(h, 16);
74
-}
75
-
76
-/* Swap 16-bit words within a 64-bit word. */
77
-static inline uint64_t hswap64(uint64_t h)
78
-{
79
- uint64_t m = 0x0000ffff0000ffffull;
80
- h = rol64(h, 32);
81
- return ((h & m) << 16) | ((h >> 16) & m);
82
-}
83
-
84
-/* Swap 32-bit words within a 64-bit word. */
85
-static inline uint64_t wswap64(uint64_t h)
86
-{
87
- return rol64(h, 32);
88
-}
89
-
90
#define LOGICAL_PPPP(NAME, FUNC) \
91
void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
92
{ \
93
--
94
2.20.1
95
96
diff view generated by jsdifflib
Deleted patch
1
int128_make64() creates an Int128 from an unsigned 64 bit value; add
2
a function int128_makes64() creating an Int128 from a signed 64 bit
3
value.
4
1
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Message-id: 20210614151007.4545-34-peter.maydell@linaro.org
9
---
10
include/qemu/int128.h | 10 ++++++++++
11
1 file changed, 10 insertions(+)
12
13
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/qemu/int128.h
16
+++ b/include/qemu/int128.h
17
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_make64(uint64_t a)
18
return a;
19
}
20
21
+static inline Int128 int128_makes64(int64_t a)
22
+{
23
+ return a;
24
+}
25
+
26
static inline Int128 int128_make128(uint64_t lo, uint64_t hi)
27
{
28
return (__uint128_t)hi << 64 | lo;
29
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_make64(uint64_t a)
30
return (Int128) { a, 0 };
31
}
32
33
+static inline Int128 int128_makes64(int64_t a)
34
+{
35
+ return (Int128) { a, a >> 63 };
36
+}
37
+
38
static inline Int128 int128_make128(uint64_t lo, uint64_t hi)
39
{
40
return (Int128) { lo, hi };
41
--
42
2.20.1
43
44
diff view generated by jsdifflib