1 | The following changes since commit 1ea06abceec61b6f3ab33dadb0510b6e09fb61e2: | 1 | Couple of trivial fixes for rc3... |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/berrange-gitlab/tags/misc-fixes-pull-request' into staging (2021-06-14 15:59:13 +0100) | 3 | The following changes since commit 20661b75ea6093f5e59079d00a778a972d6732c5: |
4 | |||
5 | Merge tag 'pull-ppc-20220404' of https://github.com/legoater/qemu into staging (2022-04-04 15:48:55 +0100) | ||
4 | 6 | ||
5 | are available in the Git repository at: | 7 | are available in the Git repository at: |
6 | 8 | ||
7 | https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20210615 | 9 | https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20220405 |
8 | 10 | ||
9 | for you to fetch changes up to c611c956c7fdce651e30687b1f5d19b4cab78b6a: | 11 | for you to fetch changes up to 80b952bb694a90f7e530d407b01066894e64a443: |
10 | 12 | ||
11 | include/qemu/int128.h: Add function to create Int128 from int64_t (2021-06-15 16:18:50 +0100) | 13 | docs/system/devices/can.rst: correct links to CTU CAN FD IP core documentation. (2022-04-05 09:29:28 +0100) |
12 | 14 | ||
13 | ---------------------------------------------------------------- | 15 | ---------------------------------------------------------------- |
14 | target-arm queue: | 16 | target-arm queue: |
15 | * hw/intc/arm_gicv3_cpuif: Tolerate spurious EOIR writes | 17 | * docs/system/devices/can.rst: correct links to CTU CAN FD IP core documentation. |
16 | * handle some UNALLOCATED decode cases correctly rather | 18 | * xlnx-bbram: hw/nvram: Fix uninitialized Error * |
17 | than asserting | ||
18 | * hw: virt: consider hw_compat_6_0 | ||
19 | * hw/arm: add quanta-gbs-bmc machine | ||
20 | * hw/intc/armv7m_nvic: Remove stale comment | ||
21 | * arm, acpi: Remove dependency on presence of 'virt' board | ||
22 | * target/arm: Fix mte page crossing test | ||
23 | * hw/arm: quanta-q71l add pca954x muxes | ||
24 | * target/arm: First few parts of MVE support | ||
25 | 19 | ||
26 | ---------------------------------------------------------------- | 20 | ---------------------------------------------------------------- |
27 | Heinrich Schuchardt (1): | 21 | Pavel Pisa (1): |
28 | hw: virt: consider hw_compat_6_0 | 22 | docs/system/devices/can.rst: correct links to CTU CAN FD IP core documentation. |
29 | 23 | ||
30 | Jean-Philippe Brucker (1): | 24 | Tong Ho (1): |
31 | hw/intc/arm_gicv3_cpuif: Tolerate spurious EOIR writes | 25 | xlnx-bbram: hw/nvram: Fix uninitialized Error * |
32 | 26 | ||
33 | Patrick Venture (5): | 27 | docs/system/devices/can.rst | 6 +++--- |
34 | hw/arm: add quanta-gbs-bmc machine | 28 | hw/nvram/xlnx-bbram.c | 2 +- |
35 | hw/arm: quanta-gbs-bmc add i2c comments | 29 | 2 files changed, 4 insertions(+), 4 deletions(-) |
36 | hw/arm: gsj add i2c comments | ||
37 | hw/arm: gsj add pca9548 | ||
38 | hw/arm: quanta-q71l add pca954x muxes | ||
39 | |||
40 | Peter Maydell (17): | ||
41 | hw/intc/armv7m_nvic: Remove stale comment | ||
42 | hw/acpi: Provide stub version of acpi_ghes_record_errors() | ||
43 | hw/acpi: Provide function acpi_ghes_present() | ||
44 | target/arm: Use acpi_ghes_present() to see if we report ACPI memory errors | ||
45 | target/arm: Provide and use H8 and H1_8 macros | ||
46 | target/arm: Enable FPSCR.QC bit for MVE | ||
47 | target/arm: Handle VPR semantics in existing code | ||
48 | target/arm: Add handling for PSR.ECI/ICI | ||
49 | target/arm: Let vfp_access_check() handle late NOCP checks | ||
50 | target/arm: Implement MVE LCTP | ||
51 | target/arm: Implement MVE WLSTP insn | ||
52 | target/arm: Implement MVE DLSTP | ||
53 | target/arm: Implement MVE LETP insn | ||
54 | target/arm: Add framework for MVE decode | ||
55 | target/arm: Move expand_pred_b() data to vec_helper.c | ||
56 | bitops.h: Provide hswap32(), hswap64(), wswap64() swapping operations | ||
57 | include/qemu/int128.h: Add function to create Int128 from int64_t | ||
58 | |||
59 | Richard Henderson (4): | ||
60 | target/arm: Diagnose UNALLOCATED in disas_simd_two_reg_misc_fp16 | ||
61 | target/arm: Remove fprintf from disas_simd_mod_imm | ||
62 | target/arm: Diagnose UNALLOCATED in disas_simd_three_reg_same_fp16 | ||
63 | target/arm: Fix mte page crossing test | ||
64 | |||
65 | include/hw/acpi/ghes.h | 9 + | ||
66 | include/qemu/bitops.h | 29 +++ | ||
67 | include/qemu/int128.h | 10 + | ||
68 | target/arm/translate-a32.h | 2 + | ||
69 | target/arm/translate.h | 9 + | ||
70 | target/arm/vec_internal.h | 9 + | ||
71 | target/arm/mve.decode | 20 ++ | ||
72 | target/arm/t32.decode | 15 +- | ||
73 | hw/acpi/ghes-stub.c | 22 +++ | ||
74 | hw/acpi/ghes.c | 17 ++ | ||
75 | hw/arm/aspeed.c | 11 +- | ||
76 | hw/arm/npcm7xx_boards.c | 107 ++++++++++- | ||
77 | hw/arm/virt.c | 2 + | ||
78 | hw/intc/arm_gicv3_cpuif.c | 5 +- | ||
79 | hw/intc/armv7m_nvic.c | 6 - | ||
80 | target/arm/kvm64.c | 6 +- | ||
81 | target/arm/m_helper.c | 54 +++++- | ||
82 | target/arm/mte_helper.c | 2 +- | ||
83 | target/arm/sve_helper.c | 381 +++++++++++++------------------------- | ||
84 | target/arm/translate-a64.c | 87 +++++---- | ||
85 | target/arm/translate-m-nocp.c | 16 +- | ||
86 | target/arm/translate-mve.c | 29 +++ | ||
87 | target/arm/translate-vfp.c | 65 +++++-- | ||
88 | target/arm/translate.c | 300 ++++++++++++++++++++++++++++-- | ||
89 | target/arm/vec_helper.c | 116 +++++++++++- | ||
90 | target/arm/vfp_helper.c | 3 +- | ||
91 | tests/tcg/aarch64/mte-7.c | 31 ++++ | ||
92 | hw/acpi/meson.build | 6 +- | ||
93 | hw/arm/Kconfig | 2 + | ||
94 | target/arm/meson.build | 2 + | ||
95 | tests/tcg/aarch64/Makefile.target | 2 +- | ||
96 | 31 files changed, 1019 insertions(+), 356 deletions(-) | ||
97 | create mode 100644 target/arm/mve.decode | ||
98 | create mode 100644 hw/acpi/ghes-stub.c | ||
99 | create mode 100644 target/arm/translate-mve.c | ||
100 | create mode 100644 tests/tcg/aarch64/mte-7.c | ||
101 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Tong Ho <tong.ho@xilinx.com> |
---|---|---|---|
2 | 2 | ||
3 | This fprintf+assert has been in place since the beginning. | 3 | This adds required initialization of Error * variable. |
4 | It is prior to the fp_access_check, so we're still good to | ||
5 | raise sigill here. | ||
6 | 4 | ||
7 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/381 | 5 | Signed-off-by: Tong Ho <tong.ho@xilinx.com> |
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Reviewed-by: Francisco Iglesias <frasse.iglesias@gmail.com> |
9 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
10 | Message-id: 20210604183506.916654-2-richard.henderson@linaro.org | ||
11 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 7 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
13 | --- | 8 | --- |
14 | target/arm/translate-a64.c | 4 ++-- | 9 | hw/nvram/xlnx-bbram.c | 2 +- |
15 | 1 file changed, 2 insertions(+), 2 deletions(-) | 10 | 1 file changed, 1 insertion(+), 1 deletion(-) |
16 | 11 | ||
17 | diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c | 12 | diff --git a/hw/nvram/xlnx-bbram.c b/hw/nvram/xlnx-bbram.c |
18 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/target/arm/translate-a64.c | 14 | --- a/hw/nvram/xlnx-bbram.c |
20 | +++ b/target/arm/translate-a64.c | 15 | +++ b/hw/nvram/xlnx-bbram.c |
21 | @@ -XXX,XX +XXX,XX @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) | 16 | @@ -XXX,XX +XXX,XX @@ static bool bbram_pgm_enabled(XlnxBBRam *s) |
22 | case 0x7f: /* FSQRT (vector) */ | 17 | |
23 | break; | 18 | static void bbram_bdrv_error(XlnxBBRam *s, int rc, gchar *detail) |
24 | default: | 19 | { |
25 | - fprintf(stderr, "%s: insn 0x%04x fpop 0x%2x\n", __func__, insn, fpop); | 20 | - Error *errp; |
26 | - g_assert_not_reached(); | 21 | + Error *errp = NULL; |
27 | + unallocated_encoding(s); | 22 | |
28 | + return; | 23 | error_setg_errno(&errp, -rc, "%s: BBRAM backstore %s failed.", |
29 | } | 24 | blk_name(s->blk), detail); |
30 | |||
31 | |||
32 | -- | 25 | -- |
33 | 2.20.1 | 26 | 2.25.1 |
34 | |||
35 | diff view generated by jsdifflib |
1 | From: Jean-Philippe Brucker <jean-philippe@linaro.org> | 1 | From: Pavel Pisa <pisa@cmp.felk.cvut.cz> |
---|---|---|---|
2 | 2 | ||
3 | Commit 382c7160d1cd ("hw/intc/arm_gicv3_cpuif: Fix EOIR write access | 3 | Signed-off-by: Pavel Pisa <pisa@cmp.felk.cvut.cz> |
4 | check logic") added an assert_not_reached() if the guest writes the EOIR | 4 | Reviewed-by: Francisco Iglesias <frasse.iglesias@gmail.com> |
5 | register while no interrupt is active. | 5 | Message-id: 20220402204523.32643-1-pisa@cmp.felk.cvut.cz |
6 | |||
7 | It turns out some software does this: EDK2, in | ||
8 | GicV3ExitBootServicesEvent(), unconditionally write EOIR for all | ||
9 | interrupts that it manages. This now causes QEMU to abort when running | ||
10 | UEFI on a VM with GICv3. Although it is UNPREDICTABLE behavior and EDK2 | ||
11 | does need fixing, the punishment seems a little harsh, especially since | ||
12 | icc_eoir_write() already tolerates writes of nonexistent interrupt | ||
13 | numbers. Display a guest error and tolerate spurious EOIR writes. | ||
14 | |||
15 | Fixes: 382c7160d1cd ("hw/intc/arm_gicv3_cpuif: Fix EOIR write access check logic") | ||
16 | Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | ||
17 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
18 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
19 | Tested-by: Alex Bennée <alex.bennee@linaro.org> | ||
20 | Message-id: 20210604130352.1887560-1-jean-philippe@linaro.org | ||
21 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
22 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
23 | --- | 7 | --- |
24 | hw/intc/arm_gicv3_cpuif.c | 5 ++++- | 8 | docs/system/devices/can.rst | 6 +++--- |
25 | 1 file changed, 4 insertions(+), 1 deletion(-) | 9 | 1 file changed, 3 insertions(+), 3 deletions(-) |
26 | 10 | ||
27 | diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c | 11 | diff --git a/docs/system/devices/can.rst b/docs/system/devices/can.rst |
28 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/hw/intc/arm_gicv3_cpuif.c | 13 | --- a/docs/system/devices/can.rst |
30 | +++ b/hw/intc/arm_gicv3_cpuif.c | 14 | +++ b/docs/system/devices/can.rst |
31 | @@ -XXX,XX +XXX,XX @@ | 15 | @@ -XXX,XX +XXX,XX @@ Links to other resources |
32 | 16 | (5) `GNU/Linux, CAN and CANopen in Real-time Control Applications Slides from LinuxDays 2017 (include updated RTLWS 2015 content) <https://www.linuxdays.cz/2017/video/Pavel_Pisa-CAN_canopen.pdf>`_ | |
33 | #include "qemu/osdep.h" | 17 | (6) `Linux SocketCAN utilities <https://github.com/linux-can/can-utils>`_ |
34 | #include "qemu/bitops.h" | 18 | (7) `CTU CAN FD project including core VHDL design, Linux driver, test utilities etc. <https://gitlab.fel.cvut.cz/canbus/ctucanfd_ip_core>`_ |
35 | +#include "qemu/log.h" | 19 | - (8) `CTU CAN FD Core Datasheet Documentation <http://canbus.pages.fel.cvut.cz/ctucanfd_ip_core/Progdokum.pdf>`_ |
36 | #include "qemu/main-loop.h" | 20 | - (9) `CTU CAN FD Core System Architecture Documentation <http://canbus.pages.fel.cvut.cz/ctucanfd_ip_core/ctu_can_fd_architecture.pdf>`_ |
37 | #include "trace.h" | 21 | - (10) `CTU CAN FD Driver Documentation <http://canbus.pages.fel.cvut.cz/ctucanfd_ip_core/driver_doc/ctucanfd-driver.html>`_ |
38 | #include "gicv3_internal.h" | 22 | + (8) `CTU CAN FD Core Datasheet Documentation <http://canbus.pages.fel.cvut.cz/ctucanfd_ip_core/doc/Datasheet.pdf>`_ |
39 | @@ -XXX,XX +XXX,XX @@ static void icc_eoir_write(CPUARMState *env, const ARMCPRegInfo *ri, | 23 | + (9) `CTU CAN FD Core System Architecture Documentation <http://canbus.pages.fel.cvut.cz/ctucanfd_ip_core/doc/System_Architecture.pdf>`_ |
40 | } | 24 | + (10) `CTU CAN FD Driver Documentation <https://canbus.pages.fel.cvut.cz/ctucanfd_ip_core/doc/linux_driver/build/ctucanfd-driver.html>`_ |
41 | break; | 25 | (11) `Integration with PCIe interfacing for Intel/Altera Cyclone IV based board <https://gitlab.fel.cvut.cz/canbus/pcie-ctu_can_fd>`_ |
42 | default: | ||
43 | - g_assert_not_reached(); | ||
44 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
45 | + "%s: IRQ %d isn't active\n", __func__, irq); | ||
46 | + return; | ||
47 | } | ||
48 | |||
49 | icc_drop_prio(cs, grp); | ||
50 | -- | 26 | -- |
51 | 2.20.1 | 27 | 2.25.1 |
52 | |||
53 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | The default of this switch is truly unreachable. | ||
4 | The switch selector is 3 bits, and all 8 cases are present. | ||
5 | |||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
8 | Message-id: 20210604183506.916654-3-richard.henderson@linaro.org | ||
9 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | --- | ||
12 | target/arm/translate-a64.c | 1 - | ||
13 | 1 file changed, 1 deletion(-) | ||
14 | |||
15 | diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/arm/translate-a64.c | ||
18 | +++ b/target/arm/translate-a64.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) | ||
20 | } | ||
21 | break; | ||
22 | default: | ||
23 | - fprintf(stderr, "%s: cmode_3_1: %x\n", __func__, cmode_3_1); | ||
24 | g_assert_not_reached(); | ||
25 | } | ||
26 | |||
27 | -- | ||
28 | 2.20.1 | ||
29 | |||
30 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | This fprintf+assert has been in place since the beginning. | ||
4 | It is after to the fp_access_check, so we need to move the | ||
5 | check up. Fold that in to the pairwise filter. | ||
6 | |||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
9 | Message-id: 20210604183506.916654-4-richard.henderson@linaro.org | ||
10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | --- | ||
13 | target/arm/translate-a64.c | 82 +++++++++++++++++++++++--------------- | ||
14 | 1 file changed, 50 insertions(+), 32 deletions(-) | ||
15 | |||
16 | diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/arm/translate-a64.c | ||
19 | +++ b/target/arm/translate-a64.c | ||
20 | @@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn) | ||
21 | */ | ||
22 | static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) | ||
23 | { | ||
24 | - int opcode, fpopcode; | ||
25 | - int is_q, u, a, rm, rn, rd; | ||
26 | - int datasize, elements; | ||
27 | - int pass; | ||
28 | + int opcode = extract32(insn, 11, 3); | ||
29 | + int u = extract32(insn, 29, 1); | ||
30 | + int a = extract32(insn, 23, 1); | ||
31 | + int is_q = extract32(insn, 30, 1); | ||
32 | + int rm = extract32(insn, 16, 5); | ||
33 | + int rn = extract32(insn, 5, 5); | ||
34 | + int rd = extract32(insn, 0, 5); | ||
35 | + /* | ||
36 | + * For these floating point ops, the U, a and opcode bits | ||
37 | + * together indicate the operation. | ||
38 | + */ | ||
39 | + int fpopcode = opcode | (a << 3) | (u << 4); | ||
40 | + int datasize = is_q ? 128 : 64; | ||
41 | + int elements = datasize / 16; | ||
42 | + bool pairwise; | ||
43 | TCGv_ptr fpst; | ||
44 | - bool pairwise = false; | ||
45 | + int pass; | ||
46 | + | ||
47 | + switch (fpopcode) { | ||
48 | + case 0x0: /* FMAXNM */ | ||
49 | + case 0x1: /* FMLA */ | ||
50 | + case 0x2: /* FADD */ | ||
51 | + case 0x3: /* FMULX */ | ||
52 | + case 0x4: /* FCMEQ */ | ||
53 | + case 0x6: /* FMAX */ | ||
54 | + case 0x7: /* FRECPS */ | ||
55 | + case 0x8: /* FMINNM */ | ||
56 | + case 0x9: /* FMLS */ | ||
57 | + case 0xa: /* FSUB */ | ||
58 | + case 0xe: /* FMIN */ | ||
59 | + case 0xf: /* FRSQRTS */ | ||
60 | + case 0x13: /* FMUL */ | ||
61 | + case 0x14: /* FCMGE */ | ||
62 | + case 0x15: /* FACGE */ | ||
63 | + case 0x17: /* FDIV */ | ||
64 | + case 0x1a: /* FABD */ | ||
65 | + case 0x1c: /* FCMGT */ | ||
66 | + case 0x1d: /* FACGT */ | ||
67 | + pairwise = false; | ||
68 | + break; | ||
69 | + case 0x10: /* FMAXNMP */ | ||
70 | + case 0x12: /* FADDP */ | ||
71 | + case 0x16: /* FMAXP */ | ||
72 | + case 0x18: /* FMINNMP */ | ||
73 | + case 0x1e: /* FMINP */ | ||
74 | + pairwise = true; | ||
75 | + break; | ||
76 | + default: | ||
77 | + unallocated_encoding(s); | ||
78 | + return; | ||
79 | + } | ||
80 | |||
81 | if (!dc_isar_feature(aa64_fp16, s)) { | ||
82 | unallocated_encoding(s); | ||
83 | @@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) | ||
84 | return; | ||
85 | } | ||
86 | |||
87 | - /* For these floating point ops, the U, a and opcode bits | ||
88 | - * together indicate the operation. | ||
89 | - */ | ||
90 | - opcode = extract32(insn, 11, 3); | ||
91 | - u = extract32(insn, 29, 1); | ||
92 | - a = extract32(insn, 23, 1); | ||
93 | - is_q = extract32(insn, 30, 1); | ||
94 | - rm = extract32(insn, 16, 5); | ||
95 | - rn = extract32(insn, 5, 5); | ||
96 | - rd = extract32(insn, 0, 5); | ||
97 | - | ||
98 | - fpopcode = opcode | (a << 3) | (u << 4); | ||
99 | - datasize = is_q ? 128 : 64; | ||
100 | - elements = datasize / 16; | ||
101 | - | ||
102 | - switch (fpopcode) { | ||
103 | - case 0x10: /* FMAXNMP */ | ||
104 | - case 0x12: /* FADDP */ | ||
105 | - case 0x16: /* FMAXP */ | ||
106 | - case 0x18: /* FMINNMP */ | ||
107 | - case 0x1e: /* FMINP */ | ||
108 | - pairwise = true; | ||
109 | - break; | ||
110 | - } | ||
111 | - | ||
112 | fpst = fpstatus_ptr(FPST_FPCR_F16); | ||
113 | |||
114 | if (pairwise) { | ||
115 | @@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) | ||
116 | gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); | ||
117 | break; | ||
118 | default: | ||
119 | - fprintf(stderr, "%s: insn 0x%04x, fpop 0x%2x @ 0x%" PRIx64 "\n", | ||
120 | - __func__, insn, fpopcode, s->pc_curr); | ||
121 | g_assert_not_reached(); | ||
122 | } | ||
123 | |||
124 | -- | ||
125 | 2.20.1 | ||
126 | |||
127 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Heinrich Schuchardt <xypron.glpk@gmx.de> | ||
2 | 1 | ||
3 | virt-6.0 must consider hw_compat_6_0. | ||
4 | |||
5 | Fixes: da7e13c00b59 ("hw: add compat machines for 6.1") | ||
6 | Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de> | ||
7 | Reviewed-by: Cornelia Huck <cohuck@redhat.com> | ||
8 | Message-id: 20210610183500.54207-1-xypron.glpk@gmx.de | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | --- | ||
11 | hw/arm/virt.c | 2 ++ | ||
12 | 1 file changed, 2 insertions(+) | ||
13 | |||
14 | diff --git a/hw/arm/virt.c b/hw/arm/virt.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/hw/arm/virt.c | ||
17 | +++ b/hw/arm/virt.c | ||
18 | @@ -XXX,XX +XXX,XX @@ DEFINE_VIRT_MACHINE_AS_LATEST(6, 1) | ||
19 | |||
20 | static void virt_machine_6_0_options(MachineClass *mc) | ||
21 | { | ||
22 | + virt_machine_6_1_options(mc); | ||
23 | + compat_props_add(mc->compat_props, hw_compat_6_0, hw_compat_6_0_len); | ||
24 | } | ||
25 | DEFINE_VIRT_MACHINE(6, 0) | ||
26 | |||
27 | -- | ||
28 | 2.20.1 | ||
29 | |||
30 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Patrick Venture <venture@google.com> | ||
2 | 1 | ||
3 | Adds initial quanta-gbs-bmc machine support. | ||
4 | |||
5 | Tested: Boots to userspace. | ||
6 | Signed-off-by: Patrick Venture <venture@google.com> | ||
7 | Reviewed-by: Brandon Kim <brandonkim@google.com> | ||
8 | Reviewed-by: Hao Wu <wuhaotsh@google.com> | ||
9 | Message-id: 20210608193605.2611114-2-venture@google.com | ||
10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | --- | ||
13 | hw/arm/npcm7xx_boards.c | 33 +++++++++++++++++++++++++++++++++ | ||
14 | 1 file changed, 33 insertions(+) | ||
15 | |||
16 | diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/hw/arm/npcm7xx_boards.c | ||
19 | +++ b/hw/arm/npcm7xx_boards.c | ||
20 | @@ -XXX,XX +XXX,XX @@ | ||
21 | |||
22 | #define NPCM750_EVB_POWER_ON_STRAPS 0x00001ff7 | ||
23 | #define QUANTA_GSJ_POWER_ON_STRAPS 0x00001fff | ||
24 | +#define QUANTA_GBS_POWER_ON_STRAPS 0x000017ff | ||
25 | |||
26 | static const char npcm7xx_default_bootrom[] = "npcm7xx_bootrom.bin"; | ||
27 | |||
28 | @@ -XXX,XX +XXX,XX @@ static void quanta_gsj_init(MachineState *machine) | ||
29 | npcm7xx_load_kernel(machine, soc); | ||
30 | } | ||
31 | |||
32 | +static void quanta_gbs_init(MachineState *machine) | ||
33 | +{ | ||
34 | + NPCM7xxState *soc; | ||
35 | + | ||
36 | + soc = npcm7xx_create_soc(machine, QUANTA_GBS_POWER_ON_STRAPS); | ||
37 | + npcm7xx_connect_dram(soc, machine->ram); | ||
38 | + qdev_realize(DEVICE(soc), NULL, &error_fatal); | ||
39 | + | ||
40 | + npcm7xx_load_bootrom(machine, soc); | ||
41 | + | ||
42 | + npcm7xx_connect_flash(&soc->fiu[0], 0, "mx66u51235f", | ||
43 | + drive_get(IF_MTD, 0, 0)); | ||
44 | + | ||
45 | + npcm7xx_load_kernel(machine, soc); | ||
46 | +} | ||
47 | + | ||
48 | static void npcm7xx_set_soc_type(NPCM7xxMachineClass *nmc, const char *type) | ||
49 | { | ||
50 | NPCM7xxClass *sc = NPCM7XX_CLASS(object_class_by_name(type)); | ||
51 | @@ -XXX,XX +XXX,XX @@ static void gsj_machine_class_init(ObjectClass *oc, void *data) | ||
52 | mc->default_ram_size = 512 * MiB; | ||
53 | }; | ||
54 | |||
55 | +static void gbs_bmc_machine_class_init(ObjectClass *oc, void *data) | ||
56 | +{ | ||
57 | + NPCM7xxMachineClass *nmc = NPCM7XX_MACHINE_CLASS(oc); | ||
58 | + MachineClass *mc = MACHINE_CLASS(oc); | ||
59 | + | ||
60 | + npcm7xx_set_soc_type(nmc, TYPE_NPCM730); | ||
61 | + | ||
62 | + mc->desc = "Quanta GBS (Cortex-A9)"; | ||
63 | + mc->init = quanta_gbs_init; | ||
64 | + mc->default_ram_size = 1 * GiB; | ||
65 | +} | ||
66 | + | ||
67 | static const TypeInfo npcm7xx_machine_types[] = { | ||
68 | { | ||
69 | .name = TYPE_NPCM7XX_MACHINE, | ||
70 | @@ -XXX,XX +XXX,XX @@ static const TypeInfo npcm7xx_machine_types[] = { | ||
71 | .name = MACHINE_TYPE_NAME("quanta-gsj"), | ||
72 | .parent = TYPE_NPCM7XX_MACHINE, | ||
73 | .class_init = gsj_machine_class_init, | ||
74 | + }, { | ||
75 | + .name = MACHINE_TYPE_NAME("quanta-gbs-bmc"), | ||
76 | + .parent = TYPE_NPCM7XX_MACHINE, | ||
77 | + .class_init = gbs_bmc_machine_class_init, | ||
78 | }, | ||
79 | }; | ||
80 | |||
81 | -- | ||
82 | 2.20.1 | ||
83 | |||
84 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Patrick Venture <venture@google.com> | ||
2 | 1 | ||
3 | Add a comment and i2c method that describes the board layout. | ||
4 | |||
5 | Tested: firmware booted to userspace. | ||
6 | Signed-off-by: Patrick Venture <venture@google.com> | ||
7 | Reviewed-by: Brandon Kim <brandonkim@google.com> | ||
8 | Reviewed-by: Hao Wu <wuhaotsh@google.com> | ||
9 | Message-id: 20210608193605.2611114-3-venture@google.com | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | --- | ||
12 | hw/arm/npcm7xx_boards.c | 60 +++++++++++++++++++++++++++++++++++++++++ | ||
13 | 1 file changed, 60 insertions(+) | ||
14 | |||
15 | diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/hw/arm/npcm7xx_boards.c | ||
18 | +++ b/hw/arm/npcm7xx_boards.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static void quanta_gsj_fan_init(NPCM7xxMachine *machine, NPCM7xxState *soc) | ||
20 | npcm7xx_connect_pwm_fan(soc, &splitter[2], 0x05, 1); | ||
21 | } | ||
22 | |||
23 | +static void quanta_gbs_i2c_init(NPCM7xxState *soc) | ||
24 | +{ | ||
25 | + /* | ||
26 | + * i2c-0: | ||
27 | + * pca9546@71 | ||
28 | + * | ||
29 | + * i2c-1: | ||
30 | + * pca9535@24 | ||
31 | + * pca9535@20 | ||
32 | + * pca9535@21 | ||
33 | + * pca9535@22 | ||
34 | + * pca9535@23 | ||
35 | + * pca9535@25 | ||
36 | + * pca9535@26 | ||
37 | + * | ||
38 | + * i2c-2: | ||
39 | + * sbtsi@4c | ||
40 | + * | ||
41 | + * i2c-5: | ||
42 | + * atmel,24c64@50 mb_fru | ||
43 | + * pca9546@71 | ||
44 | + * - channel 0: max31725@54 | ||
45 | + * - channel 1: max31725@55 | ||
46 | + * - channel 2: max31725@5d | ||
47 | + * atmel,24c64@51 fan_fru | ||
48 | + * - channel 3: atmel,24c64@52 hsbp_fru | ||
49 | + * | ||
50 | + * i2c-6: | ||
51 | + * pca9545@73 | ||
52 | + * | ||
53 | + * i2c-7: | ||
54 | + * pca9545@72 | ||
55 | + * | ||
56 | + * i2c-8: | ||
57 | + * adi,adm1272@10 | ||
58 | + * | ||
59 | + * i2c-9: | ||
60 | + * pca9546@71 | ||
61 | + * - channel 0: isil,isl68137@60 | ||
62 | + * - channel 1: isil,isl68137@61 | ||
63 | + * - channel 2: isil,isl68137@63 | ||
64 | + * - channel 3: isil,isl68137@45 | ||
65 | + * | ||
66 | + * i2c-10: | ||
67 | + * pca9545@71 | ||
68 | + * | ||
69 | + * i2c-11: | ||
70 | + * pca9545@76 | ||
71 | + * | ||
72 | + * i2c-12: | ||
73 | + * maxim,max34451@4e | ||
74 | + * isil,isl68137@5d | ||
75 | + * isil,isl68137@5e | ||
76 | + * | ||
77 | + * i2c-14: | ||
78 | + * pca9545@70 | ||
79 | + */ | ||
80 | +} | ||
81 | + | ||
82 | static void npcm750_evb_init(MachineState *machine) | ||
83 | { | ||
84 | NPCM7xxState *soc; | ||
85 | @@ -XXX,XX +XXX,XX @@ static void quanta_gbs_init(MachineState *machine) | ||
86 | npcm7xx_connect_flash(&soc->fiu[0], 0, "mx66u51235f", | ||
87 | drive_get(IF_MTD, 0, 0)); | ||
88 | |||
89 | + quanta_gbs_i2c_init(soc); | ||
90 | npcm7xx_load_kernel(machine, soc); | ||
91 | } | ||
92 | |||
93 | -- | ||
94 | 2.20.1 | ||
95 | |||
96 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | In commit da6d674e509f0939b we split the NVIC code out from the GIC. | ||
2 | This allowed us to specify the NVIC's default value for the num-irq | ||
3 | property (64) in the usual way in its property list, and we deleted | ||
4 | the previous hack where we updated the value in the state struct in | ||
5 | the instance init function. Remove a stale comment about that hack | ||
6 | which we forgot to delete at that time. | ||
7 | 1 | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | Message-id: 20210614161243.14211-1-peter.maydell@linaro.org | ||
12 | --- | ||
13 | hw/intc/armv7m_nvic.c | 6 ------ | ||
14 | 1 file changed, 6 deletions(-) | ||
15 | |||
16 | diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/hw/intc/armv7m_nvic.c | ||
19 | +++ b/hw/intc/armv7m_nvic.c | ||
20 | @@ -XXX,XX +XXX,XX @@ static void armv7m_nvic_realize(DeviceState *dev, Error **errp) | ||
21 | |||
22 | static void armv7m_nvic_instance_init(Object *obj) | ||
23 | { | ||
24 | - /* We have a different default value for the num-irq property | ||
25 | - * than our superclass. This function runs after qdev init | ||
26 | - * has set the defaults from the Property array and before | ||
27 | - * any user-specified property setting, so just modify the | ||
28 | - * value in the GICState struct. | ||
29 | - */ | ||
30 | DeviceState *dev = DEVICE(obj); | ||
31 | NVICState *nvic = NVIC(obj); | ||
32 | SysBusDevice *sbd = SYS_BUS_DEVICE(obj); | ||
33 | -- | ||
34 | 2.20.1 | ||
35 | |||
36 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Generic code in target/arm wants to call acpi_ghes_record_errors(); | ||
2 | provide a stub version so that we don't fail to link when | ||
3 | CONFIG_ACPI_APEI is not set. This requires us to add a new | ||
4 | ghes-stub.c file to contain it and the meson.build mechanics | ||
5 | to use it when appropriate. | ||
6 | 1 | ||
7 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Reviewed-by: Dongjiu Geng <gengdongjiu1@gmail.com> | ||
10 | Message-id: 20210603171259.27962-2-peter.maydell@linaro.org | ||
11 | --- | ||
12 | hw/acpi/ghes-stub.c | 17 +++++++++++++++++ | ||
13 | hw/acpi/meson.build | 6 +++--- | ||
14 | 2 files changed, 20 insertions(+), 3 deletions(-) | ||
15 | create mode 100644 hw/acpi/ghes-stub.c | ||
16 | |||
17 | diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c | ||
18 | new file mode 100644 | ||
19 | index XXXXXXX..XXXXXXX | ||
20 | --- /dev/null | ||
21 | +++ b/hw/acpi/ghes-stub.c | ||
22 | @@ -XXX,XX +XXX,XX @@ | ||
23 | +/* | ||
24 | + * Support for generating APEI tables and recording CPER for Guests: | ||
25 | + * stub functions. | ||
26 | + * | ||
27 | + * Copyright (c) 2021 Linaro, Ltd | ||
28 | + * | ||
29 | + * This work is licensed under the terms of the GNU GPL, version 2 or later. | ||
30 | + * See the COPYING file in the top-level directory. | ||
31 | + */ | ||
32 | + | ||
33 | +#include "qemu/osdep.h" | ||
34 | +#include "hw/acpi/ghes.h" | ||
35 | + | ||
36 | +int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address) | ||
37 | +{ | ||
38 | + return -1; | ||
39 | +} | ||
40 | diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/hw/acpi/meson.build | ||
43 | +++ b/hw/acpi/meson.build | ||
44 | @@ -XXX,XX +XXX,XX @@ acpi_ss.add(when: 'CONFIG_ACPI_PCI', if_true: files('pci.c')) | ||
45 | acpi_ss.add(when: 'CONFIG_ACPI_VMGENID', if_true: files('vmgenid.c')) | ||
46 | acpi_ss.add(when: 'CONFIG_ACPI_HW_REDUCED', if_true: files('generic_event_device.c')) | ||
47 | acpi_ss.add(when: 'CONFIG_ACPI_HMAT', if_true: files('hmat.c')) | ||
48 | -acpi_ss.add(when: 'CONFIG_ACPI_APEI', if_true: files('ghes.c')) | ||
49 | +acpi_ss.add(when: 'CONFIG_ACPI_APEI', if_true: files('ghes.c'), if_false:('ghes-stub.c')) | ||
50 | acpi_ss.add(when: 'CONFIG_ACPI_X86', if_true: files('core.c', 'piix4.c', 'pcihp.c'), if_false: files('acpi-stub.c')) | ||
51 | acpi_ss.add(when: 'CONFIG_ACPI_X86_ICH', if_true: files('ich9.c', 'tco.c')) | ||
52 | acpi_ss.add(when: 'CONFIG_IPMI', if_true: files('ipmi.c'), if_false: files('ipmi-stub.c')) | ||
53 | acpi_ss.add(when: 'CONFIG_PC', if_false: files('acpi-x86-stub.c')) | ||
54 | acpi_ss.add(when: 'CONFIG_TPM', if_true: files('tpm.c')) | ||
55 | -softmmu_ss.add(when: 'CONFIG_ACPI', if_false: files('acpi-stub.c', 'aml-build-stub.c')) | ||
56 | +softmmu_ss.add(when: 'CONFIG_ACPI', if_false: files('acpi-stub.c', 'aml-build-stub.c', 'ghes-stub.c')) | ||
57 | softmmu_ss.add_all(when: 'CONFIG_ACPI', if_true: acpi_ss) | ||
58 | softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('acpi-stub.c', 'aml-build-stub.c', | ||
59 | - 'acpi-x86-stub.c', 'ipmi-stub.c')) | ||
60 | + 'acpi-x86-stub.c', 'ipmi-stub.c', 'ghes-stub.c')) | ||
61 | -- | ||
62 | 2.20.1 | ||
63 | |||
64 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Allow code elsewhere in the system to check whether the ACPI GHES | ||
2 | table is present, so it can determine whether it is OK to try to | ||
3 | record an error by calling acpi_ghes_record_errors(). | ||
4 | 1 | ||
5 | (We don't need to migrate the new 'present' field in AcpiGhesState, | ||
6 | because it is set once at system initialization and doesn't change.) | ||
7 | |||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | Reviewed-by: Dongjiu Geng <gengdongjiu1@gmail.com> | ||
11 | Message-id: 20210603171259.27962-3-peter.maydell@linaro.org | ||
12 | --- | ||
13 | include/hw/acpi/ghes.h | 9 +++++++++ | ||
14 | hw/acpi/ghes-stub.c | 5 +++++ | ||
15 | hw/acpi/ghes.c | 17 +++++++++++++++++ | ||
16 | 3 files changed, 31 insertions(+) | ||
17 | |||
18 | diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/include/hw/acpi/ghes.h | ||
21 | +++ b/include/hw/acpi/ghes.h | ||
22 | @@ -XXX,XX +XXX,XX @@ enum { | ||
23 | |||
24 | typedef struct AcpiGhesState { | ||
25 | uint64_t ghes_addr_le; | ||
26 | + bool present; /* True if GHES is present at all on this board */ | ||
27 | } AcpiGhesState; | ||
28 | |||
29 | void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker); | ||
30 | @@ -XXX,XX +XXX,XX @@ void acpi_build_hest(GArray *table_data, BIOSLinker *linker, | ||
31 | void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s, | ||
32 | GArray *hardware_errors); | ||
33 | int acpi_ghes_record_errors(uint8_t notify, uint64_t error_physical_addr); | ||
34 | + | ||
35 | +/** | ||
36 | + * acpi_ghes_present: Report whether ACPI GHES table is present | ||
37 | + * | ||
38 | + * Returns: true if the system has an ACPI GHES table and it is | ||
39 | + * safe to call acpi_ghes_record_errors() to record a memory error. | ||
40 | + */ | ||
41 | +bool acpi_ghes_present(void); | ||
42 | #endif | ||
43 | diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c | ||
44 | index XXXXXXX..XXXXXXX 100644 | ||
45 | --- a/hw/acpi/ghes-stub.c | ||
46 | +++ b/hw/acpi/ghes-stub.c | ||
47 | @@ -XXX,XX +XXX,XX @@ int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address) | ||
48 | { | ||
49 | return -1; | ||
50 | } | ||
51 | + | ||
52 | +bool acpi_ghes_present(void) | ||
53 | +{ | ||
54 | + return false; | ||
55 | +} | ||
56 | diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/hw/acpi/ghes.c | ||
59 | +++ b/hw/acpi/ghes.c | ||
60 | @@ -XXX,XX +XXX,XX @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s, | ||
61 | /* Create a read-write fw_cfg file for Address */ | ||
62 | fw_cfg_add_file_callback(s, ACPI_GHES_DATA_ADDR_FW_CFG_FILE, NULL, NULL, | ||
63 | NULL, &(ags->ghes_addr_le), sizeof(ags->ghes_addr_le), false); | ||
64 | + | ||
65 | + ags->present = true; | ||
66 | } | ||
67 | |||
68 | int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address) | ||
69 | @@ -XXX,XX +XXX,XX @@ int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address) | ||
70 | |||
71 | return ret; | ||
72 | } | ||
73 | + | ||
74 | +bool acpi_ghes_present(void) | ||
75 | +{ | ||
76 | + AcpiGedState *acpi_ged_state; | ||
77 | + AcpiGhesState *ags; | ||
78 | + | ||
79 | + acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED, | ||
80 | + NULL)); | ||
81 | + | ||
82 | + if (!acpi_ged_state) { | ||
83 | + return false; | ||
84 | + } | ||
85 | + ags = &acpi_ged_state->ghes_state; | ||
86 | + return ags->present; | ||
87 | +} | ||
88 | -- | ||
89 | 2.20.1 | ||
90 | |||
91 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | The virt_is_acpi_enabled() function is specific to the virt board, as | ||
2 | is the check for its 'ras' property. Use the new acpi_ghes_present() | ||
3 | function to check whether we should report memory errors via | ||
4 | acpi_ghes_record_errors(). | ||
5 | 1 | ||
6 | This avoids a link error if QEMU was built without support for the | ||
7 | virt board, and provides a mechanism that can be used by any future | ||
8 | board models that want to add ACPI memory error reporting support | ||
9 | (they only need to call acpi_ghes_add_fw_cfg()). | ||
10 | |||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | Reviewed-by: Dongjiu Geng <gengdongjiu1@gmail.com> | ||
14 | Message-id: 20210603171259.27962-4-peter.maydell@linaro.org | ||
15 | --- | ||
16 | target/arm/kvm64.c | 6 +----- | ||
17 | 1 file changed, 1 insertion(+), 5 deletions(-) | ||
18 | |||
19 | diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/target/arm/kvm64.c | ||
22 | +++ b/target/arm/kvm64.c | ||
23 | @@ -XXX,XX +XXX,XX @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) | ||
24 | { | ||
25 | ram_addr_t ram_addr; | ||
26 | hwaddr paddr; | ||
27 | - Object *obj = qdev_get_machine(); | ||
28 | - VirtMachineState *vms = VIRT_MACHINE(obj); | ||
29 | - bool acpi_enabled = virt_is_acpi_enabled(vms); | ||
30 | |||
31 | assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); | ||
32 | |||
33 | - if (acpi_enabled && addr && | ||
34 | - object_property_get_bool(obj, "ras", NULL)) { | ||
35 | + if (acpi_ghes_present() && addr) { | ||
36 | ram_addr = qemu_ram_addr_from_host(addr); | ||
37 | if (ram_addr != RAM_ADDR_INVALID && | ||
38 | kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { | ||
39 | -- | ||
40 | 2.20.1 | ||
41 | |||
42 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | The test was off-by-one, because tag_last points to the | ||
4 | last byte of the tag to check, thus tag_last - prev_page | ||
5 | will equal TARGET_PAGE_SIZE when we use the first byte | ||
6 | of the next page. | ||
7 | |||
8 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/403 | ||
9 | Reported-by: Peter Collingbourne <pcc@google.com> | ||
10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | Message-id: 20210612195707.840217-1-richard.henderson@linaro.org | ||
12 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
14 | --- | ||
15 | target/arm/mte_helper.c | 2 +- | ||
16 | tests/tcg/aarch64/mte-7.c | 31 +++++++++++++++++++++++++++++++ | ||
17 | tests/tcg/aarch64/Makefile.target | 2 +- | ||
18 | 3 files changed, 33 insertions(+), 2 deletions(-) | ||
19 | create mode 100644 tests/tcg/aarch64/mte-7.c | ||
20 | |||
21 | diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/target/arm/mte_helper.c | ||
24 | +++ b/target/arm/mte_helper.c | ||
25 | @@ -XXX,XX +XXX,XX @@ static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr, | ||
26 | prev_page = ptr & TARGET_PAGE_MASK; | ||
27 | next_page = prev_page + TARGET_PAGE_SIZE; | ||
28 | |||
29 | - if (likely(tag_last - prev_page <= TARGET_PAGE_SIZE)) { | ||
30 | + if (likely(tag_last - prev_page < TARGET_PAGE_SIZE)) { | ||
31 | /* Memory access stays on one page. */ | ||
32 | tag_size = ((tag_byte_last - tag_byte_first) / (2 * TAG_GRANULE)) + 1; | ||
33 | mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, sizem1 + 1, | ||
34 | diff --git a/tests/tcg/aarch64/mte-7.c b/tests/tcg/aarch64/mte-7.c | ||
35 | new file mode 100644 | ||
36 | index XXXXXXX..XXXXXXX | ||
37 | --- /dev/null | ||
38 | +++ b/tests/tcg/aarch64/mte-7.c | ||
39 | @@ -XXX,XX +XXX,XX @@ | ||
40 | +/* | ||
41 | + * Memory tagging, unaligned access crossing pages. | ||
42 | + * https://gitlab.com/qemu-project/qemu/-/issues/403 | ||
43 | + * | ||
44 | + * Copyright (c) 2021 Linaro Ltd | ||
45 | + * SPDX-License-Identifier: GPL-2.0-or-later | ||
46 | + */ | ||
47 | + | ||
48 | +#include "mte.h" | ||
49 | + | ||
50 | +int main(int ac, char **av) | ||
51 | +{ | ||
52 | + void *p; | ||
53 | + | ||
54 | + enable_mte(PR_MTE_TCF_SYNC); | ||
55 | + p = alloc_mte_mem(2 * 0x1000); | ||
56 | + | ||
57 | + /* Tag the pointer. */ | ||
58 | + p = (void *)((unsigned long)p | (1ul << 56)); | ||
59 | + | ||
60 | + /* Store tag in sequential granules. */ | ||
61 | + asm("stg %0, [%0]" : : "r"(p + 0x0ff0)); | ||
62 | + asm("stg %0, [%0]" : : "r"(p + 0x1000)); | ||
63 | + | ||
64 | + /* | ||
65 | + * Perform an unaligned store with tag 1 crossing the pages. | ||
66 | + * Failure dies with SIGSEGV. | ||
67 | + */ | ||
68 | + asm("str %0, [%0]" : : "r"(p + 0x0ffc)); | ||
69 | + return 0; | ||
70 | +} | ||
71 | diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target | ||
72 | index XXXXXXX..XXXXXXX 100644 | ||
73 | --- a/tests/tcg/aarch64/Makefile.target | ||
74 | +++ b/tests/tcg/aarch64/Makefile.target | ||
75 | @@ -XXX,XX +XXX,XX @@ AARCH64_TESTS += bti-2 | ||
76 | |||
77 | # MTE Tests | ||
78 | ifneq ($(DOCKER_IMAGE)$(CROSS_CC_HAS_ARMV8_MTE),) | ||
79 | -AARCH64_TESTS += mte-1 mte-2 mte-3 mte-4 mte-5 mte-6 | ||
80 | +AARCH64_TESTS += mte-1 mte-2 mte-3 mte-4 mte-5 mte-6 mte-7 | ||
81 | mte-%: CFLAGS += -march=armv8.5-a+memtag | ||
82 | endif | ||
83 | |||
84 | -- | ||
85 | 2.20.1 | ||
86 | |||
87 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Patrick Venture <venture@google.com> | ||
2 | 1 | ||
3 | Adds comments to the board init to identify missing i2c devices. | ||
4 | |||
5 | Signed-off-by: Patrick Venture <venture@google.com> | ||
6 | Reviewed-by: Hao Wu <wuhaotsh@google.com> | ||
7 | Reviewed-by: Joel Stanley <joel@jms.id.au> | ||
8 | Message-id: 20210608202522.2677850-2-venture@google.com | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | --- | ||
11 | hw/arm/npcm7xx_boards.c | 16 +++++++++++++++- | ||
12 | 1 file changed, 15 insertions(+), 1 deletion(-) | ||
13 | |||
14 | diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/hw/arm/npcm7xx_boards.c | ||
17 | +++ b/hw/arm/npcm7xx_boards.c | ||
18 | @@ -XXX,XX +XXX,XX @@ static void quanta_gsj_i2c_init(NPCM7xxState *soc) | ||
19 | at24c_eeprom_init(soc, 9, 0x55, 8192); | ||
20 | at24c_eeprom_init(soc, 10, 0x55, 8192); | ||
21 | |||
22 | - /* TODO: Add additional i2c devices. */ | ||
23 | + /* | ||
24 | + * i2c-11: | ||
25 | + * - power-brick@36: delta,dps800 | ||
26 | + * - hotswap@15: ti,lm5066i | ||
27 | + */ | ||
28 | + | ||
29 | + /* | ||
30 | + * i2c-12: | ||
31 | + * - ucd90160@6b | ||
32 | + */ | ||
33 | + | ||
34 | + /* | ||
35 | + * i2c-15: | ||
36 | + * - pca9548@75 | ||
37 | + */ | ||
38 | } | ||
39 | |||
40 | static void quanta_gsj_fan_init(NPCM7xxMachine *machine, NPCM7xxState *soc) | ||
41 | -- | ||
42 | 2.20.1 | ||
43 | |||
44 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Patrick Venture <venture@google.com> | ||
2 | 1 | ||
3 | Tested: Quanta-gsj firmware booted. | ||
4 | |||
5 | i2c /dev entries driver | ||
6 | I2C init bus 1 freq 100000 | ||
7 | I2C init bus 2 freq 100000 | ||
8 | I2C init bus 3 freq 100000 | ||
9 | I2C init bus 4 freq 100000 | ||
10 | I2C init bus 8 freq 100000 | ||
11 | I2C init bus 9 freq 100000 | ||
12 | at24 9-0055: 8192 byte 24c64 EEPROM, writable, 1 bytes/write | ||
13 | I2C init bus 10 freq 100000 | ||
14 | at24 10-0055: 8192 byte 24c64 EEPROM, writable, 1 bytes/write | ||
15 | I2C init bus 12 freq 100000 | ||
16 | I2C init bus 15 freq 100000 | ||
17 | i2c i2c-15: Added multiplexed i2c bus 16 | ||
18 | i2c i2c-15: Added multiplexed i2c bus 17 | ||
19 | i2c i2c-15: Added multiplexed i2c bus 18 | ||
20 | i2c i2c-15: Added multiplexed i2c bus 19 | ||
21 | i2c i2c-15: Added multiplexed i2c bus 20 | ||
22 | i2c i2c-15: Added multiplexed i2c bus 21 | ||
23 | i2c i2c-15: Added multiplexed i2c bus 22 | ||
24 | i2c i2c-15: Added multiplexed i2c bus 23 | ||
25 | pca954x 15-0075: registered 8 multiplexed busses for I2C switch pca9548 | ||
26 | |||
27 | Signed-off-by: Patrick Venture <venture@google.com> | ||
28 | Reviewed-by: Hao Wu <wuhaotsh@google.com> | ||
29 | Reviewed-by: Joel Stanley <joel@jms.id.au> | ||
30 | Message-id: 20210608202522.2677850-3-venture@google.com | ||
31 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
32 | --- | ||
33 | hw/arm/npcm7xx_boards.c | 6 ++---- | ||
34 | hw/arm/Kconfig | 1 + | ||
35 | 2 files changed, 3 insertions(+), 4 deletions(-) | ||
36 | |||
37 | diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/hw/arm/npcm7xx_boards.c | ||
40 | +++ b/hw/arm/npcm7xx_boards.c | ||
41 | @@ -XXX,XX +XXX,XX @@ | ||
42 | |||
43 | #include "hw/arm/npcm7xx.h" | ||
44 | #include "hw/core/cpu.h" | ||
45 | +#include "hw/i2c/i2c_mux_pca954x.h" | ||
46 | #include "hw/i2c/smbus_eeprom.h" | ||
47 | #include "hw/loader.h" | ||
48 | #include "hw/qdev-core.h" | ||
49 | @@ -XXX,XX +XXX,XX @@ static void quanta_gsj_i2c_init(NPCM7xxState *soc) | ||
50 | * - ucd90160@6b | ||
51 | */ | ||
52 | |||
53 | - /* | ||
54 | - * i2c-15: | ||
55 | - * - pca9548@75 | ||
56 | - */ | ||
57 | + i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 15), "pca9548", 0x75); | ||
58 | } | ||
59 | |||
60 | static void quanta_gsj_fan_init(NPCM7xxMachine *machine, NPCM7xxState *soc) | ||
61 | diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig | ||
62 | index XXXXXXX..XXXXXXX 100644 | ||
63 | --- a/hw/arm/Kconfig | ||
64 | +++ b/hw/arm/Kconfig | ||
65 | @@ -XXX,XX +XXX,XX @@ config NPCM7XX | ||
66 | select SERIAL | ||
67 | select SSI | ||
68 | select UNIMP | ||
69 | + select PCA954X | ||
70 | |||
71 | config FSL_IMX25 | ||
72 | bool | ||
73 | -- | ||
74 | 2.20.1 | ||
75 | |||
76 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Patrick Venture <venture@google.com> | ||
2 | 1 | ||
3 | Adds the pca954x muxes expected. | ||
4 | |||
5 | Tested: Booted quanta-q71l image to userspace. | ||
6 | Signed-off-by: Patrick Venture <venture@google.com> | ||
7 | Reviewed-by: Hao Wu <wuhaotsh@google.com> | ||
8 | Reviewed-by: Joel Stanley <joel@jms.id.au> | ||
9 | Reviewed-by: Cédric Le Goater <clg@kaod.org> | ||
10 | Message-id: 20210608202522.2677850-4-venture@google.com | ||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | --- | ||
13 | hw/arm/aspeed.c | 11 ++++++++--- | ||
14 | hw/arm/Kconfig | 1 + | ||
15 | 2 files changed, 9 insertions(+), 3 deletions(-) | ||
16 | |||
17 | diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/hw/arm/aspeed.c | ||
20 | +++ b/hw/arm/aspeed.c | ||
21 | @@ -XXX,XX +XXX,XX @@ | ||
22 | #include "hw/arm/boot.h" | ||
23 | #include "hw/arm/aspeed.h" | ||
24 | #include "hw/arm/aspeed_soc.h" | ||
25 | +#include "hw/i2c/i2c_mux_pca954x.h" | ||
26 | #include "hw/i2c/smbus_eeprom.h" | ||
27 | #include "hw/misc/pca9552.h" | ||
28 | #include "hw/misc/tmp105.h" | ||
29 | @@ -XXX,XX +XXX,XX @@ static void quanta_q71l_bmc_i2c_init(AspeedMachineState *bmc) | ||
30 | /* TODO: i2c-1: Add Frontpanel FRU eeprom@57 24c64 */ | ||
31 | /* TODO: Add Memory Riser i2c mux and eeproms. */ | ||
32 | |||
33 | - /* TODO: i2c-2: pca9546@74 */ | ||
34 | - /* TODO: i2c-2: pca9548@77 */ | ||
35 | + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 2), "pca9546", 0x74); | ||
36 | + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 2), "pca9548", 0x77); | ||
37 | + | ||
38 | /* TODO: i2c-3: Add BIOS FRU eeprom@56 24c64 */ | ||
39 | - /* TODO: i2c-7: Add pca9546@70 */ | ||
40 | + | ||
41 | + /* i2c-7 */ | ||
42 | + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 7), "pca9546", 0x70); | ||
43 | /* - i2c@0: pmbus@59 */ | ||
44 | /* - i2c@1: pmbus@58 */ | ||
45 | /* - i2c@2: pmbus@58 */ | ||
46 | /* - i2c@3: pmbus@59 */ | ||
47 | + | ||
48 | /* TODO: i2c-7: Add PDB FRU eeprom@52 */ | ||
49 | /* TODO: i2c-8: Add BMC FRU eeprom@50 */ | ||
50 | } | ||
51 | diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/hw/arm/Kconfig | ||
54 | +++ b/hw/arm/Kconfig | ||
55 | @@ -XXX,XX +XXX,XX @@ config ASPEED_SOC | ||
56 | select PCA9552 | ||
57 | select SERIAL | ||
58 | select SMBUS_EEPROM | ||
59 | + select PCA954X | ||
60 | select SSI | ||
61 | select SSI_M25P80 | ||
62 | select TMP105 | ||
63 | -- | ||
64 | 2.20.1 | ||
65 | |||
66 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Currently we provide Hn and H1_n macros for accessing the correct | ||
2 | data within arrays of vector elements of size 1, 2 and 4, accounting | ||
3 | for host endianness. We don't provide any macros for elements of | ||
4 | size 8 because there the host endianness doesn't matter. However, | ||
5 | this does result in awkwardness where we need to pass empty arguments | ||
6 | to macros, because checkpatch complains about them. The empty | ||
7 | argument is a little confusing for humans to read as well. | ||
8 | 1 | ||
9 | Add H8() and H1_8() macros and use them where we were previously | ||
10 | passing empty arguments to macros. | ||
11 | |||
12 | Suggested-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
14 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
15 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
16 | Message-id: 20210614151007.4545-2-peter.maydell@linaro.org | ||
17 | Message-id: 20210610132505.5827-1-peter.maydell@linaro.org | ||
18 | --- | ||
19 | target/arm/vec_internal.h | 8 +- | ||
20 | target/arm/sve_helper.c | 258 +++++++++++++++++++------------------- | ||
21 | target/arm/vec_helper.c | 14 +-- | ||
22 | 3 files changed, 143 insertions(+), 137 deletions(-) | ||
23 | |||
24 | diff --git a/target/arm/vec_internal.h b/target/arm/vec_internal.h | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/target/arm/vec_internal.h | ||
27 | +++ b/target/arm/vec_internal.h | ||
28 | @@ -XXX,XX +XXX,XX @@ | ||
29 | #define H2(x) (x) | ||
30 | #define H4(x) (x) | ||
31 | #endif | ||
32 | - | ||
33 | +/* | ||
34 | + * Access to 64-bit elements isn't host-endian dependent; we provide H8 | ||
35 | + * and H1_8 so that when a function is being generated from a macro we | ||
36 | + * can pass these rather than an empty macro argument, for clarity. | ||
37 | + */ | ||
38 | +#define H8(x) (x) | ||
39 | +#define H1_8(x) (x) | ||
40 | |||
41 | static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) | ||
42 | { | ||
43 | diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c | ||
44 | index XXXXXXX..XXXXXXX 100644 | ||
45 | --- a/target/arm/sve_helper.c | ||
46 | +++ b/target/arm/sve_helper.c | ||
47 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ | ||
48 | |||
49 | DO_ZPZZ_PAIR_FP(sve2_faddp_zpzz_h, float16, H1_2, float16_add) | ||
50 | DO_ZPZZ_PAIR_FP(sve2_faddp_zpzz_s, float32, H1_4, float32_add) | ||
51 | -DO_ZPZZ_PAIR_FP(sve2_faddp_zpzz_d, float64, , float64_add) | ||
52 | +DO_ZPZZ_PAIR_FP(sve2_faddp_zpzz_d, float64, H1_8, float64_add) | ||
53 | |||
54 | DO_ZPZZ_PAIR_FP(sve2_fmaxnmp_zpzz_h, float16, H1_2, float16_maxnum) | ||
55 | DO_ZPZZ_PAIR_FP(sve2_fmaxnmp_zpzz_s, float32, H1_4, float32_maxnum) | ||
56 | -DO_ZPZZ_PAIR_FP(sve2_fmaxnmp_zpzz_d, float64, , float64_maxnum) | ||
57 | +DO_ZPZZ_PAIR_FP(sve2_fmaxnmp_zpzz_d, float64, H1_8, float64_maxnum) | ||
58 | |||
59 | DO_ZPZZ_PAIR_FP(sve2_fminnmp_zpzz_h, float16, H1_2, float16_minnum) | ||
60 | DO_ZPZZ_PAIR_FP(sve2_fminnmp_zpzz_s, float32, H1_4, float32_minnum) | ||
61 | -DO_ZPZZ_PAIR_FP(sve2_fminnmp_zpzz_d, float64, , float64_minnum) | ||
62 | +DO_ZPZZ_PAIR_FP(sve2_fminnmp_zpzz_d, float64, H1_8, float64_minnum) | ||
63 | |||
64 | DO_ZPZZ_PAIR_FP(sve2_fmaxp_zpzz_h, float16, H1_2, float16_max) | ||
65 | DO_ZPZZ_PAIR_FP(sve2_fmaxp_zpzz_s, float32, H1_4, float32_max) | ||
66 | -DO_ZPZZ_PAIR_FP(sve2_fmaxp_zpzz_d, float64, , float64_max) | ||
67 | +DO_ZPZZ_PAIR_FP(sve2_fmaxp_zpzz_d, float64, H1_8, float64_max) | ||
68 | |||
69 | DO_ZPZZ_PAIR_FP(sve2_fminp_zpzz_h, float16, H1_2, float16_min) | ||
70 | DO_ZPZZ_PAIR_FP(sve2_fminp_zpzz_s, float32, H1_4, float32_min) | ||
71 | -DO_ZPZZ_PAIR_FP(sve2_fminp_zpzz_d, float64, , float64_min) | ||
72 | +DO_ZPZZ_PAIR_FP(sve2_fminp_zpzz_d, float64, H1_8, float64_min) | ||
73 | |||
74 | #undef DO_ZPZZ_PAIR_FP | ||
75 | |||
76 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ | ||
77 | |||
78 | DO_ZZZ_TB(sve2_saddl_h, int16_t, int8_t, H1_2, H1, DO_ADD) | ||
79 | DO_ZZZ_TB(sve2_saddl_s, int32_t, int16_t, H1_4, H1_2, DO_ADD) | ||
80 | -DO_ZZZ_TB(sve2_saddl_d, int64_t, int32_t, , H1_4, DO_ADD) | ||
81 | +DO_ZZZ_TB(sve2_saddl_d, int64_t, int32_t, H1_8, H1_4, DO_ADD) | ||
82 | |||
83 | DO_ZZZ_TB(sve2_ssubl_h, int16_t, int8_t, H1_2, H1, DO_SUB) | ||
84 | DO_ZZZ_TB(sve2_ssubl_s, int32_t, int16_t, H1_4, H1_2, DO_SUB) | ||
85 | -DO_ZZZ_TB(sve2_ssubl_d, int64_t, int32_t, , H1_4, DO_SUB) | ||
86 | +DO_ZZZ_TB(sve2_ssubl_d, int64_t, int32_t, H1_8, H1_4, DO_SUB) | ||
87 | |||
88 | DO_ZZZ_TB(sve2_sabdl_h, int16_t, int8_t, H1_2, H1, DO_ABD) | ||
89 | DO_ZZZ_TB(sve2_sabdl_s, int32_t, int16_t, H1_4, H1_2, DO_ABD) | ||
90 | -DO_ZZZ_TB(sve2_sabdl_d, int64_t, int32_t, , H1_4, DO_ABD) | ||
91 | +DO_ZZZ_TB(sve2_sabdl_d, int64_t, int32_t, H1_8, H1_4, DO_ABD) | ||
92 | |||
93 | DO_ZZZ_TB(sve2_uaddl_h, uint16_t, uint8_t, H1_2, H1, DO_ADD) | ||
94 | DO_ZZZ_TB(sve2_uaddl_s, uint32_t, uint16_t, H1_4, H1_2, DO_ADD) | ||
95 | -DO_ZZZ_TB(sve2_uaddl_d, uint64_t, uint32_t, , H1_4, DO_ADD) | ||
96 | +DO_ZZZ_TB(sve2_uaddl_d, uint64_t, uint32_t, H1_8, H1_4, DO_ADD) | ||
97 | |||
98 | DO_ZZZ_TB(sve2_usubl_h, uint16_t, uint8_t, H1_2, H1, DO_SUB) | ||
99 | DO_ZZZ_TB(sve2_usubl_s, uint32_t, uint16_t, H1_4, H1_2, DO_SUB) | ||
100 | -DO_ZZZ_TB(sve2_usubl_d, uint64_t, uint32_t, , H1_4, DO_SUB) | ||
101 | +DO_ZZZ_TB(sve2_usubl_d, uint64_t, uint32_t, H1_8, H1_4, DO_SUB) | ||
102 | |||
103 | DO_ZZZ_TB(sve2_uabdl_h, uint16_t, uint8_t, H1_2, H1, DO_ABD) | ||
104 | DO_ZZZ_TB(sve2_uabdl_s, uint32_t, uint16_t, H1_4, H1_2, DO_ABD) | ||
105 | -DO_ZZZ_TB(sve2_uabdl_d, uint64_t, uint32_t, , H1_4, DO_ABD) | ||
106 | +DO_ZZZ_TB(sve2_uabdl_d, uint64_t, uint32_t, H1_8, H1_4, DO_ABD) | ||
107 | |||
108 | DO_ZZZ_TB(sve2_smull_zzz_h, int16_t, int8_t, H1_2, H1, DO_MUL) | ||
109 | DO_ZZZ_TB(sve2_smull_zzz_s, int32_t, int16_t, H1_4, H1_2, DO_MUL) | ||
110 | -DO_ZZZ_TB(sve2_smull_zzz_d, int64_t, int32_t, , H1_4, DO_MUL) | ||
111 | +DO_ZZZ_TB(sve2_smull_zzz_d, int64_t, int32_t, H1_8, H1_4, DO_MUL) | ||
112 | |||
113 | DO_ZZZ_TB(sve2_umull_zzz_h, uint16_t, uint8_t, H1_2, H1, DO_MUL) | ||
114 | DO_ZZZ_TB(sve2_umull_zzz_s, uint32_t, uint16_t, H1_4, H1_2, DO_MUL) | ||
115 | -DO_ZZZ_TB(sve2_umull_zzz_d, uint64_t, uint32_t, , H1_4, DO_MUL) | ||
116 | +DO_ZZZ_TB(sve2_umull_zzz_d, uint64_t, uint32_t, H1_8, H1_4, DO_MUL) | ||
117 | |||
118 | /* Note that the multiply cannot overflow, but the doubling can. */ | ||
119 | static inline int16_t do_sqdmull_h(int16_t n, int16_t m) | ||
120 | @@ -XXX,XX +XXX,XX @@ static inline int64_t do_sqdmull_d(int64_t n, int64_t m) | ||
121 | |||
122 | DO_ZZZ_TB(sve2_sqdmull_zzz_h, int16_t, int8_t, H1_2, H1, do_sqdmull_h) | ||
123 | DO_ZZZ_TB(sve2_sqdmull_zzz_s, int32_t, int16_t, H1_4, H1_2, do_sqdmull_s) | ||
124 | -DO_ZZZ_TB(sve2_sqdmull_zzz_d, int64_t, int32_t, , H1_4, do_sqdmull_d) | ||
125 | +DO_ZZZ_TB(sve2_sqdmull_zzz_d, int64_t, int32_t, H1_8, H1_4, do_sqdmull_d) | ||
126 | |||
127 | #undef DO_ZZZ_TB | ||
128 | |||
129 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ | ||
130 | |||
131 | DO_ZZZ_WTB(sve2_saddw_h, int16_t, int8_t, H1_2, H1, DO_ADD) | ||
132 | DO_ZZZ_WTB(sve2_saddw_s, int32_t, int16_t, H1_4, H1_2, DO_ADD) | ||
133 | -DO_ZZZ_WTB(sve2_saddw_d, int64_t, int32_t, , H1_4, DO_ADD) | ||
134 | +DO_ZZZ_WTB(sve2_saddw_d, int64_t, int32_t, H1_8, H1_4, DO_ADD) | ||
135 | |||
136 | DO_ZZZ_WTB(sve2_ssubw_h, int16_t, int8_t, H1_2, H1, DO_SUB) | ||
137 | DO_ZZZ_WTB(sve2_ssubw_s, int32_t, int16_t, H1_4, H1_2, DO_SUB) | ||
138 | -DO_ZZZ_WTB(sve2_ssubw_d, int64_t, int32_t, , H1_4, DO_SUB) | ||
139 | +DO_ZZZ_WTB(sve2_ssubw_d, int64_t, int32_t, H1_8, H1_4, DO_SUB) | ||
140 | |||
141 | DO_ZZZ_WTB(sve2_uaddw_h, uint16_t, uint8_t, H1_2, H1, DO_ADD) | ||
142 | DO_ZZZ_WTB(sve2_uaddw_s, uint32_t, uint16_t, H1_4, H1_2, DO_ADD) | ||
143 | -DO_ZZZ_WTB(sve2_uaddw_d, uint64_t, uint32_t, , H1_4, DO_ADD) | ||
144 | +DO_ZZZ_WTB(sve2_uaddw_d, uint64_t, uint32_t, H1_8, H1_4, DO_ADD) | ||
145 | |||
146 | DO_ZZZ_WTB(sve2_usubw_h, uint16_t, uint8_t, H1_2, H1, DO_SUB) | ||
147 | DO_ZZZ_WTB(sve2_usubw_s, uint32_t, uint16_t, H1_4, H1_2, DO_SUB) | ||
148 | -DO_ZZZ_WTB(sve2_usubw_d, uint64_t, uint32_t, , H1_4, DO_SUB) | ||
149 | +DO_ZZZ_WTB(sve2_usubw_d, uint64_t, uint32_t, H1_8, H1_4, DO_SUB) | ||
150 | |||
151 | #undef DO_ZZZ_WTB | ||
152 | |||
153 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ | ||
154 | DO_ZZZ_NTB(sve2_eoril_b, uint8_t, H1, DO_EOR) | ||
155 | DO_ZZZ_NTB(sve2_eoril_h, uint16_t, H1_2, DO_EOR) | ||
156 | DO_ZZZ_NTB(sve2_eoril_s, uint32_t, H1_4, DO_EOR) | ||
157 | -DO_ZZZ_NTB(sve2_eoril_d, uint64_t, , DO_EOR) | ||
158 | +DO_ZZZ_NTB(sve2_eoril_d, uint64_t, H1_8, DO_EOR) | ||
159 | |||
160 | #undef DO_ZZZ_NTB | ||
161 | |||
162 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ | ||
163 | |||
164 | DO_ZZZW_ACC(sve2_sabal_h, int16_t, int8_t, H1_2, H1, DO_ABD) | ||
165 | DO_ZZZW_ACC(sve2_sabal_s, int32_t, int16_t, H1_4, H1_2, DO_ABD) | ||
166 | -DO_ZZZW_ACC(sve2_sabal_d, int64_t, int32_t, , H1_4, DO_ABD) | ||
167 | +DO_ZZZW_ACC(sve2_sabal_d, int64_t, int32_t, H1_8, H1_4, DO_ABD) | ||
168 | |||
169 | DO_ZZZW_ACC(sve2_uabal_h, uint16_t, uint8_t, H1_2, H1, DO_ABD) | ||
170 | DO_ZZZW_ACC(sve2_uabal_s, uint32_t, uint16_t, H1_4, H1_2, DO_ABD) | ||
171 | -DO_ZZZW_ACC(sve2_uabal_d, uint64_t, uint32_t, , H1_4, DO_ABD) | ||
172 | +DO_ZZZW_ACC(sve2_uabal_d, uint64_t, uint32_t, H1_8, H1_4, DO_ABD) | ||
173 | |||
174 | DO_ZZZW_ACC(sve2_smlal_zzzw_h, int16_t, int8_t, H1_2, H1, DO_MUL) | ||
175 | DO_ZZZW_ACC(sve2_smlal_zzzw_s, int32_t, int16_t, H1_4, H1_2, DO_MUL) | ||
176 | -DO_ZZZW_ACC(sve2_smlal_zzzw_d, int64_t, int32_t, , H1_4, DO_MUL) | ||
177 | +DO_ZZZW_ACC(sve2_smlal_zzzw_d, int64_t, int32_t, H1_8, H1_4, DO_MUL) | ||
178 | |||
179 | DO_ZZZW_ACC(sve2_umlal_zzzw_h, uint16_t, uint8_t, H1_2, H1, DO_MUL) | ||
180 | DO_ZZZW_ACC(sve2_umlal_zzzw_s, uint32_t, uint16_t, H1_4, H1_2, DO_MUL) | ||
181 | -DO_ZZZW_ACC(sve2_umlal_zzzw_d, uint64_t, uint32_t, , H1_4, DO_MUL) | ||
182 | +DO_ZZZW_ACC(sve2_umlal_zzzw_d, uint64_t, uint32_t, H1_8, H1_4, DO_MUL) | ||
183 | |||
184 | #define DO_NMUL(N, M) -(N * M) | ||
185 | |||
186 | DO_ZZZW_ACC(sve2_smlsl_zzzw_h, int16_t, int8_t, H1_2, H1, DO_NMUL) | ||
187 | DO_ZZZW_ACC(sve2_smlsl_zzzw_s, int32_t, int16_t, H1_4, H1_2, DO_NMUL) | ||
188 | -DO_ZZZW_ACC(sve2_smlsl_zzzw_d, int64_t, int32_t, , H1_4, DO_NMUL) | ||
189 | +DO_ZZZW_ACC(sve2_smlsl_zzzw_d, int64_t, int32_t, H1_8, H1_4, DO_NMUL) | ||
190 | |||
191 | DO_ZZZW_ACC(sve2_umlsl_zzzw_h, uint16_t, uint8_t, H1_2, H1, DO_NMUL) | ||
192 | DO_ZZZW_ACC(sve2_umlsl_zzzw_s, uint32_t, uint16_t, H1_4, H1_2, DO_NMUL) | ||
193 | -DO_ZZZW_ACC(sve2_umlsl_zzzw_d, uint64_t, uint32_t, , H1_4, DO_NMUL) | ||
194 | +DO_ZZZW_ACC(sve2_umlsl_zzzw_d, uint64_t, uint32_t, H1_8, H1_4, DO_NMUL) | ||
195 | |||
196 | #undef DO_ZZZW_ACC | ||
197 | |||
198 | @@ -XXX,XX +XXX,XX @@ DO_SQDMLAL(sve2_sqdmlal_zzzw_h, int16_t, int8_t, H1_2, H1, | ||
199 | do_sqdmull_h, DO_SQADD_H) | ||
200 | DO_SQDMLAL(sve2_sqdmlal_zzzw_s, int32_t, int16_t, H1_4, H1_2, | ||
201 | do_sqdmull_s, DO_SQADD_S) | ||
202 | -DO_SQDMLAL(sve2_sqdmlal_zzzw_d, int64_t, int32_t, , H1_4, | ||
203 | +DO_SQDMLAL(sve2_sqdmlal_zzzw_d, int64_t, int32_t, H1_8, H1_4, | ||
204 | do_sqdmull_d, do_sqadd_d) | ||
205 | |||
206 | DO_SQDMLAL(sve2_sqdmlsl_zzzw_h, int16_t, int8_t, H1_2, H1, | ||
207 | do_sqdmull_h, DO_SQSUB_H) | ||
208 | DO_SQDMLAL(sve2_sqdmlsl_zzzw_s, int32_t, int16_t, H1_4, H1_2, | ||
209 | do_sqdmull_s, DO_SQSUB_S) | ||
210 | -DO_SQDMLAL(sve2_sqdmlsl_zzzw_d, int64_t, int32_t, , H1_4, | ||
211 | +DO_SQDMLAL(sve2_sqdmlsl_zzzw_d, int64_t, int32_t, H1_8, H1_4, | ||
212 | do_sqdmull_d, do_sqsub_d) | ||
213 | |||
214 | #undef DO_SQDMLAL | ||
215 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ | ||
216 | DO_CMLA_FUNC(sve2_cmla_zzzz_b, uint8_t, H1, DO_CMLA) | ||
217 | DO_CMLA_FUNC(sve2_cmla_zzzz_h, uint16_t, H2, DO_CMLA) | ||
218 | DO_CMLA_FUNC(sve2_cmla_zzzz_s, uint32_t, H4, DO_CMLA) | ||
219 | -DO_CMLA_FUNC(sve2_cmla_zzzz_d, uint64_t, , DO_CMLA) | ||
220 | +DO_CMLA_FUNC(sve2_cmla_zzzz_d, uint64_t, H8, DO_CMLA) | ||
221 | |||
222 | #define DO_SQRDMLAH_B(N, M, A, S) \ | ||
223 | do_sqrdmlah_b(N, M, A, S, true) | ||
224 | @@ -XXX,XX +XXX,XX @@ DO_CMLA_FUNC(sve2_cmla_zzzz_d, uint64_t, , DO_CMLA) | ||
225 | DO_CMLA_FUNC(sve2_sqrdcmlah_zzzz_b, int8_t, H1, DO_SQRDMLAH_B) | ||
226 | DO_CMLA_FUNC(sve2_sqrdcmlah_zzzz_h, int16_t, H2, DO_SQRDMLAH_H) | ||
227 | DO_CMLA_FUNC(sve2_sqrdcmlah_zzzz_s, int32_t, H4, DO_SQRDMLAH_S) | ||
228 | -DO_CMLA_FUNC(sve2_sqrdcmlah_zzzz_d, int64_t, , DO_SQRDMLAH_D) | ||
229 | +DO_CMLA_FUNC(sve2_sqrdcmlah_zzzz_d, int64_t, H8, DO_SQRDMLAH_D) | ||
230 | |||
231 | #define DO_CMLA_IDX_FUNC(NAME, TYPE, H, OP) \ | ||
232 | void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ | ||
233 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ | ||
234 | |||
235 | DO_ZZXZ(sve2_sqrdmlah_idx_h, int16_t, H2, DO_SQRDMLAH_H) | ||
236 | DO_ZZXZ(sve2_sqrdmlah_idx_s, int32_t, H4, DO_SQRDMLAH_S) | ||
237 | -DO_ZZXZ(sve2_sqrdmlah_idx_d, int64_t, , DO_SQRDMLAH_D) | ||
238 | +DO_ZZXZ(sve2_sqrdmlah_idx_d, int64_t, H8, DO_SQRDMLAH_D) | ||
239 | |||
240 | #define DO_SQRDMLSH_H(N, M, A) \ | ||
241 | ({ uint32_t discard; do_sqrdmlah_h(N, M, A, true, true, &discard); }) | ||
242 | @@ -XXX,XX +XXX,XX @@ DO_ZZXZ(sve2_sqrdmlah_idx_d, int64_t, , DO_SQRDMLAH_D) | ||
243 | |||
244 | DO_ZZXZ(sve2_sqrdmlsh_idx_h, int16_t, H2, DO_SQRDMLSH_H) | ||
245 | DO_ZZXZ(sve2_sqrdmlsh_idx_s, int32_t, H4, DO_SQRDMLSH_S) | ||
246 | -DO_ZZXZ(sve2_sqrdmlsh_idx_d, int64_t, , DO_SQRDMLSH_D) | ||
247 | +DO_ZZXZ(sve2_sqrdmlsh_idx_d, int64_t, H8, DO_SQRDMLSH_D) | ||
248 | |||
249 | #undef DO_ZZXZ | ||
250 | |||
251 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ | ||
252 | #define DO_MLA(N, M, A) (A + N * M) | ||
253 | |||
254 | DO_ZZXW(sve2_smlal_idx_s, int32_t, int16_t, H1_4, H1_2, DO_MLA) | ||
255 | -DO_ZZXW(sve2_smlal_idx_d, int64_t, int32_t, , H1_4, DO_MLA) | ||
256 | +DO_ZZXW(sve2_smlal_idx_d, int64_t, int32_t, H1_8, H1_4, DO_MLA) | ||
257 | DO_ZZXW(sve2_umlal_idx_s, uint32_t, uint16_t, H1_4, H1_2, DO_MLA) | ||
258 | -DO_ZZXW(sve2_umlal_idx_d, uint64_t, uint32_t, , H1_4, DO_MLA) | ||
259 | +DO_ZZXW(sve2_umlal_idx_d, uint64_t, uint32_t, H1_8, H1_4, DO_MLA) | ||
260 | |||
261 | #define DO_MLS(N, M, A) (A - N * M) | ||
262 | |||
263 | DO_ZZXW(sve2_smlsl_idx_s, int32_t, int16_t, H1_4, H1_2, DO_MLS) | ||
264 | -DO_ZZXW(sve2_smlsl_idx_d, int64_t, int32_t, , H1_4, DO_MLS) | ||
265 | +DO_ZZXW(sve2_smlsl_idx_d, int64_t, int32_t, H1_8, H1_4, DO_MLS) | ||
266 | DO_ZZXW(sve2_umlsl_idx_s, uint32_t, uint16_t, H1_4, H1_2, DO_MLS) | ||
267 | -DO_ZZXW(sve2_umlsl_idx_d, uint64_t, uint32_t, , H1_4, DO_MLS) | ||
268 | +DO_ZZXW(sve2_umlsl_idx_d, uint64_t, uint32_t, H1_8, H1_4, DO_MLS) | ||
269 | |||
270 | #define DO_SQDMLAL_S(N, M, A) DO_SQADD_S(A, do_sqdmull_s(N, M)) | ||
271 | #define DO_SQDMLAL_D(N, M, A) do_sqadd_d(A, do_sqdmull_d(N, M)) | ||
272 | |||
273 | DO_ZZXW(sve2_sqdmlal_idx_s, int32_t, int16_t, H1_4, H1_2, DO_SQDMLAL_S) | ||
274 | -DO_ZZXW(sve2_sqdmlal_idx_d, int64_t, int32_t, , H1_4, DO_SQDMLAL_D) | ||
275 | +DO_ZZXW(sve2_sqdmlal_idx_d, int64_t, int32_t, H1_8, H1_4, DO_SQDMLAL_D) | ||
276 | |||
277 | #define DO_SQDMLSL_S(N, M, A) DO_SQSUB_S(A, do_sqdmull_s(N, M)) | ||
278 | #define DO_SQDMLSL_D(N, M, A) do_sqsub_d(A, do_sqdmull_d(N, M)) | ||
279 | |||
280 | DO_ZZXW(sve2_sqdmlsl_idx_s, int32_t, int16_t, H1_4, H1_2, DO_SQDMLSL_S) | ||
281 | -DO_ZZXW(sve2_sqdmlsl_idx_d, int64_t, int32_t, , H1_4, DO_SQDMLSL_D) | ||
282 | +DO_ZZXW(sve2_sqdmlsl_idx_d, int64_t, int32_t, H1_8, H1_4, DO_SQDMLSL_D) | ||
283 | |||
284 | #undef DO_MLA | ||
285 | #undef DO_MLS | ||
286 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ | ||
287 | } | ||
288 | |||
289 | DO_ZZX(sve2_sqdmull_idx_s, int32_t, int16_t, H1_4, H1_2, do_sqdmull_s) | ||
290 | -DO_ZZX(sve2_sqdmull_idx_d, int64_t, int32_t, , H1_4, do_sqdmull_d) | ||
291 | +DO_ZZX(sve2_sqdmull_idx_d, int64_t, int32_t, H1_8, H1_4, do_sqdmull_d) | ||
292 | |||
293 | DO_ZZX(sve2_smull_idx_s, int32_t, int16_t, H1_4, H1_2, DO_MUL) | ||
294 | -DO_ZZX(sve2_smull_idx_d, int64_t, int32_t, , H1_4, DO_MUL) | ||
295 | +DO_ZZX(sve2_smull_idx_d, int64_t, int32_t, H1_8, H1_4, DO_MUL) | ||
296 | |||
297 | DO_ZZX(sve2_umull_idx_s, uint32_t, uint16_t, H1_4, H1_2, DO_MUL) | ||
298 | -DO_ZZX(sve2_umull_idx_d, uint64_t, uint32_t, , H1_4, DO_MUL) | ||
299 | +DO_ZZX(sve2_umull_idx_d, uint64_t, uint32_t, H1_8, H1_4, DO_MUL) | ||
300 | |||
301 | #undef DO_ZZX | ||
302 | |||
303 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ | ||
304 | DO_CADD(sve2_cadd_b, int8_t, H1, DO_ADD, DO_SUB) | ||
305 | DO_CADD(sve2_cadd_h, int16_t, H1_2, DO_ADD, DO_SUB) | ||
306 | DO_CADD(sve2_cadd_s, int32_t, H1_4, DO_ADD, DO_SUB) | ||
307 | -DO_CADD(sve2_cadd_d, int64_t, , DO_ADD, DO_SUB) | ||
308 | +DO_CADD(sve2_cadd_d, int64_t, H1_8, DO_ADD, DO_SUB) | ||
309 | |||
310 | DO_CADD(sve2_sqcadd_b, int8_t, H1, DO_SQADD_B, DO_SQSUB_B) | ||
311 | DO_CADD(sve2_sqcadd_h, int16_t, H1_2, DO_SQADD_H, DO_SQSUB_H) | ||
312 | DO_CADD(sve2_sqcadd_s, int32_t, H1_4, DO_SQADD_S, DO_SQSUB_S) | ||
313 | -DO_CADD(sve2_sqcadd_d, int64_t, , do_sqadd_d, do_sqsub_d) | ||
314 | +DO_CADD(sve2_sqcadd_d, int64_t, H1_8, do_sqadd_d, do_sqsub_d) | ||
315 | |||
316 | #undef DO_CADD | ||
317 | |||
318 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ | ||
319 | |||
320 | DO_ZZI_SHLL(sve2_sshll_h, int16_t, int8_t, H1_2, H1) | ||
321 | DO_ZZI_SHLL(sve2_sshll_s, int32_t, int16_t, H1_4, H1_2) | ||
322 | -DO_ZZI_SHLL(sve2_sshll_d, int64_t, int32_t, , H1_4) | ||
323 | +DO_ZZI_SHLL(sve2_sshll_d, int64_t, int32_t, H1_8, H1_4) | ||
324 | |||
325 | DO_ZZI_SHLL(sve2_ushll_h, uint16_t, uint8_t, H1_2, H1) | ||
326 | DO_ZZI_SHLL(sve2_ushll_s, uint32_t, uint16_t, H1_4, H1_2) | ||
327 | -DO_ZZI_SHLL(sve2_ushll_d, uint64_t, uint32_t, , H1_4) | ||
328 | +DO_ZZI_SHLL(sve2_ushll_d, uint64_t, uint32_t, H1_8, H1_4) | ||
329 | |||
330 | #undef DO_ZZI_SHLL | ||
331 | |||
332 | @@ -XXX,XX +XXX,XX @@ DO_SHRNB(sve2_shrnb_d, uint64_t, uint32_t, DO_SHR) | ||
333 | |||
334 | DO_SHRNT(sve2_shrnt_h, uint16_t, uint8_t, H1_2, H1, DO_SHR) | ||
335 | DO_SHRNT(sve2_shrnt_s, uint32_t, uint16_t, H1_4, H1_2, DO_SHR) | ||
336 | -DO_SHRNT(sve2_shrnt_d, uint64_t, uint32_t, , H1_4, DO_SHR) | ||
337 | +DO_SHRNT(sve2_shrnt_d, uint64_t, uint32_t, H1_8, H1_4, DO_SHR) | ||
338 | |||
339 | DO_SHRNB(sve2_rshrnb_h, uint16_t, uint8_t, do_urshr) | ||
340 | DO_SHRNB(sve2_rshrnb_s, uint32_t, uint16_t, do_urshr) | ||
341 | @@ -XXX,XX +XXX,XX @@ DO_SHRNB(sve2_rshrnb_d, uint64_t, uint32_t, do_urshr) | ||
342 | |||
343 | DO_SHRNT(sve2_rshrnt_h, uint16_t, uint8_t, H1_2, H1, do_urshr) | ||
344 | DO_SHRNT(sve2_rshrnt_s, uint32_t, uint16_t, H1_4, H1_2, do_urshr) | ||
345 | -DO_SHRNT(sve2_rshrnt_d, uint64_t, uint32_t, , H1_4, do_urshr) | ||
346 | +DO_SHRNT(sve2_rshrnt_d, uint64_t, uint32_t, H1_8, H1_4, do_urshr) | ||
347 | |||
348 | #define DO_SQSHRUN_H(x, sh) do_sat_bhs((int64_t)(x) >> sh, 0, UINT8_MAX) | ||
349 | #define DO_SQSHRUN_S(x, sh) do_sat_bhs((int64_t)(x) >> sh, 0, UINT16_MAX) | ||
350 | @@ -XXX,XX +XXX,XX @@ DO_SHRNB(sve2_sqshrunb_d, int64_t, uint32_t, DO_SQSHRUN_D) | ||
351 | |||
352 | DO_SHRNT(sve2_sqshrunt_h, int16_t, uint8_t, H1_2, H1, DO_SQSHRUN_H) | ||
353 | DO_SHRNT(sve2_sqshrunt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQSHRUN_S) | ||
354 | -DO_SHRNT(sve2_sqshrunt_d, int64_t, uint32_t, , H1_4, DO_SQSHRUN_D) | ||
355 | +DO_SHRNT(sve2_sqshrunt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQSHRUN_D) | ||
356 | |||
357 | #define DO_SQRSHRUN_H(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT8_MAX) | ||
358 | #define DO_SQRSHRUN_S(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT16_MAX) | ||
359 | @@ -XXX,XX +XXX,XX @@ DO_SHRNB(sve2_sqrshrunb_d, int64_t, uint32_t, DO_SQRSHRUN_D) | ||
360 | |||
361 | DO_SHRNT(sve2_sqrshrunt_h, int16_t, uint8_t, H1_2, H1, DO_SQRSHRUN_H) | ||
362 | DO_SHRNT(sve2_sqrshrunt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQRSHRUN_S) | ||
363 | -DO_SHRNT(sve2_sqrshrunt_d, int64_t, uint32_t, , H1_4, DO_SQRSHRUN_D) | ||
364 | +DO_SHRNT(sve2_sqrshrunt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQRSHRUN_D) | ||
365 | |||
366 | #define DO_SQSHRN_H(x, sh) do_sat_bhs(x >> sh, INT8_MIN, INT8_MAX) | ||
367 | #define DO_SQSHRN_S(x, sh) do_sat_bhs(x >> sh, INT16_MIN, INT16_MAX) | ||
368 | @@ -XXX,XX +XXX,XX @@ DO_SHRNB(sve2_sqshrnb_d, int64_t, uint32_t, DO_SQSHRN_D) | ||
369 | |||
370 | DO_SHRNT(sve2_sqshrnt_h, int16_t, uint8_t, H1_2, H1, DO_SQSHRN_H) | ||
371 | DO_SHRNT(sve2_sqshrnt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQSHRN_S) | ||
372 | -DO_SHRNT(sve2_sqshrnt_d, int64_t, uint32_t, , H1_4, DO_SQSHRN_D) | ||
373 | +DO_SHRNT(sve2_sqshrnt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQSHRN_D) | ||
374 | |||
375 | #define DO_SQRSHRN_H(x, sh) do_sat_bhs(do_srshr(x, sh), INT8_MIN, INT8_MAX) | ||
376 | #define DO_SQRSHRN_S(x, sh) do_sat_bhs(do_srshr(x, sh), INT16_MIN, INT16_MAX) | ||
377 | @@ -XXX,XX +XXX,XX @@ DO_SHRNB(sve2_sqrshrnb_d, int64_t, uint32_t, DO_SQRSHRN_D) | ||
378 | |||
379 | DO_SHRNT(sve2_sqrshrnt_h, int16_t, uint8_t, H1_2, H1, DO_SQRSHRN_H) | ||
380 | DO_SHRNT(sve2_sqrshrnt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQRSHRN_S) | ||
381 | -DO_SHRNT(sve2_sqrshrnt_d, int64_t, uint32_t, , H1_4, DO_SQRSHRN_D) | ||
382 | +DO_SHRNT(sve2_sqrshrnt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQRSHRN_D) | ||
383 | |||
384 | #define DO_UQSHRN_H(x, sh) MIN(x >> sh, UINT8_MAX) | ||
385 | #define DO_UQSHRN_S(x, sh) MIN(x >> sh, UINT16_MAX) | ||
386 | @@ -XXX,XX +XXX,XX @@ DO_SHRNB(sve2_uqshrnb_d, uint64_t, uint32_t, DO_UQSHRN_D) | ||
387 | |||
388 | DO_SHRNT(sve2_uqshrnt_h, uint16_t, uint8_t, H1_2, H1, DO_UQSHRN_H) | ||
389 | DO_SHRNT(sve2_uqshrnt_s, uint32_t, uint16_t, H1_4, H1_2, DO_UQSHRN_S) | ||
390 | -DO_SHRNT(sve2_uqshrnt_d, uint64_t, uint32_t, , H1_4, DO_UQSHRN_D) | ||
391 | +DO_SHRNT(sve2_uqshrnt_d, uint64_t, uint32_t, H1_8, H1_4, DO_UQSHRN_D) | ||
392 | |||
393 | #define DO_UQRSHRN_H(x, sh) MIN(do_urshr(x, sh), UINT8_MAX) | ||
394 | #define DO_UQRSHRN_S(x, sh) MIN(do_urshr(x, sh), UINT16_MAX) | ||
395 | @@ -XXX,XX +XXX,XX @@ DO_SHRNB(sve2_uqrshrnb_d, uint64_t, uint32_t, DO_UQRSHRN_D) | ||
396 | |||
397 | DO_SHRNT(sve2_uqrshrnt_h, uint16_t, uint8_t, H1_2, H1, DO_UQRSHRN_H) | ||
398 | DO_SHRNT(sve2_uqrshrnt_s, uint32_t, uint16_t, H1_4, H1_2, DO_UQRSHRN_S) | ||
399 | -DO_SHRNT(sve2_uqrshrnt_d, uint64_t, uint32_t, , H1_4, DO_UQRSHRN_D) | ||
400 | +DO_SHRNT(sve2_uqrshrnt_d, uint64_t, uint32_t, H1_8, H1_4, DO_UQRSHRN_D) | ||
401 | |||
402 | #undef DO_SHRNB | ||
403 | #undef DO_SHRNT | ||
404 | @@ -XXX,XX +XXX,XX @@ DO_BINOPNB(sve2_addhnb_d, uint64_t, uint32_t, 32, DO_ADDHN) | ||
405 | |||
406 | DO_BINOPNT(sve2_addhnt_h, uint16_t, uint8_t, 8, H1_2, H1, DO_ADDHN) | ||
407 | DO_BINOPNT(sve2_addhnt_s, uint32_t, uint16_t, 16, H1_4, H1_2, DO_ADDHN) | ||
408 | -DO_BINOPNT(sve2_addhnt_d, uint64_t, uint32_t, 32, , H1_4, DO_ADDHN) | ||
409 | +DO_BINOPNT(sve2_addhnt_d, uint64_t, uint32_t, 32, H1_8, H1_4, DO_ADDHN) | ||
410 | |||
411 | DO_BINOPNB(sve2_raddhnb_h, uint16_t, uint8_t, 8, DO_RADDHN) | ||
412 | DO_BINOPNB(sve2_raddhnb_s, uint32_t, uint16_t, 16, DO_RADDHN) | ||
413 | @@ -XXX,XX +XXX,XX @@ DO_BINOPNB(sve2_raddhnb_d, uint64_t, uint32_t, 32, DO_RADDHN) | ||
414 | |||
415 | DO_BINOPNT(sve2_raddhnt_h, uint16_t, uint8_t, 8, H1_2, H1, DO_RADDHN) | ||
416 | DO_BINOPNT(sve2_raddhnt_s, uint32_t, uint16_t, 16, H1_4, H1_2, DO_RADDHN) | ||
417 | -DO_BINOPNT(sve2_raddhnt_d, uint64_t, uint32_t, 32, , H1_4, DO_RADDHN) | ||
418 | +DO_BINOPNT(sve2_raddhnt_d, uint64_t, uint32_t, 32, H1_8, H1_4, DO_RADDHN) | ||
419 | |||
420 | DO_BINOPNB(sve2_subhnb_h, uint16_t, uint8_t, 8, DO_SUBHN) | ||
421 | DO_BINOPNB(sve2_subhnb_s, uint32_t, uint16_t, 16, DO_SUBHN) | ||
422 | @@ -XXX,XX +XXX,XX @@ DO_BINOPNB(sve2_subhnb_d, uint64_t, uint32_t, 32, DO_SUBHN) | ||
423 | |||
424 | DO_BINOPNT(sve2_subhnt_h, uint16_t, uint8_t, 8, H1_2, H1, DO_SUBHN) | ||
425 | DO_BINOPNT(sve2_subhnt_s, uint32_t, uint16_t, 16, H1_4, H1_2, DO_SUBHN) | ||
426 | -DO_BINOPNT(sve2_subhnt_d, uint64_t, uint32_t, 32, , H1_4, DO_SUBHN) | ||
427 | +DO_BINOPNT(sve2_subhnt_d, uint64_t, uint32_t, 32, H1_8, H1_4, DO_SUBHN) | ||
428 | |||
429 | DO_BINOPNB(sve2_rsubhnb_h, uint16_t, uint8_t, 8, DO_RSUBHN) | ||
430 | DO_BINOPNB(sve2_rsubhnb_s, uint32_t, uint16_t, 16, DO_RSUBHN) | ||
431 | @@ -XXX,XX +XXX,XX @@ DO_BINOPNB(sve2_rsubhnb_d, uint64_t, uint32_t, 32, DO_RSUBHN) | ||
432 | |||
433 | DO_BINOPNT(sve2_rsubhnt_h, uint16_t, uint8_t, 8, H1_2, H1, DO_RSUBHN) | ||
434 | DO_BINOPNT(sve2_rsubhnt_s, uint32_t, uint16_t, 16, H1_4, H1_2, DO_RSUBHN) | ||
435 | -DO_BINOPNT(sve2_rsubhnt_d, uint64_t, uint32_t, 32, , H1_4, DO_RSUBHN) | ||
436 | +DO_BINOPNT(sve2_rsubhnt_d, uint64_t, uint32_t, 32, H1_8, H1_4, DO_RSUBHN) | ||
437 | |||
438 | #undef DO_RSUBHN | ||
439 | #undef DO_SUBHN | ||
440 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, uint64_t val, uint32_t desc) \ | ||
441 | DO_INSR(sve_insr_b, uint8_t, H1) | ||
442 | DO_INSR(sve_insr_h, uint16_t, H1_2) | ||
443 | DO_INSR(sve_insr_s, uint32_t, H1_4) | ||
444 | -DO_INSR(sve_insr_d, uint64_t, ) | ||
445 | +DO_INSR(sve_insr_d, uint64_t, H1_8) | ||
446 | |||
447 | #undef DO_INSR | ||
448 | |||
449 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_tbx_##SUFF)(void *vd, void *vn, void *vm, uint32_t desc) \ | ||
450 | DO_TB(b, uint8_t, H1) | ||
451 | DO_TB(h, uint16_t, H2) | ||
452 | DO_TB(s, uint32_t, H4) | ||
453 | -DO_TB(d, uint64_t, ) | ||
454 | +DO_TB(d, uint64_t, H8) | ||
455 | |||
456 | #undef DO_TB | ||
457 | |||
458 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ | ||
459 | |||
460 | DO_UNPK(sve_sunpk_h, int16_t, int8_t, H2, H1) | ||
461 | DO_UNPK(sve_sunpk_s, int32_t, int16_t, H4, H2) | ||
462 | -DO_UNPK(sve_sunpk_d, int64_t, int32_t, , H4) | ||
463 | +DO_UNPK(sve_sunpk_d, int64_t, int32_t, H8, H4) | ||
464 | |||
465 | DO_UNPK(sve_uunpk_h, uint16_t, uint8_t, H2, H1) | ||
466 | DO_UNPK(sve_uunpk_s, uint32_t, uint16_t, H4, H2) | ||
467 | -DO_UNPK(sve_uunpk_d, uint64_t, uint32_t, , H4) | ||
468 | +DO_UNPK(sve_uunpk_d, uint64_t, uint32_t, H8, H4) | ||
469 | |||
470 | #undef DO_UNPK | ||
471 | |||
472 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ | ||
473 | DO_ZIP(sve_zip_b, uint8_t, H1) | ||
474 | DO_ZIP(sve_zip_h, uint16_t, H1_2) | ||
475 | DO_ZIP(sve_zip_s, uint32_t, H1_4) | ||
476 | -DO_ZIP(sve_zip_d, uint64_t, ) | ||
477 | +DO_ZIP(sve_zip_d, uint64_t, H1_8) | ||
478 | DO_ZIP(sve2_zip_q, Int128, ) | ||
479 | |||
480 | #define DO_UZP(NAME, TYPE, H) \ | ||
481 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ | ||
482 | DO_UZP(sve_uzp_b, uint8_t, H1) | ||
483 | DO_UZP(sve_uzp_h, uint16_t, H1_2) | ||
484 | DO_UZP(sve_uzp_s, uint32_t, H1_4) | ||
485 | -DO_UZP(sve_uzp_d, uint64_t, ) | ||
486 | +DO_UZP(sve_uzp_d, uint64_t, H1_8) | ||
487 | DO_UZP(sve2_uzp_q, Int128, ) | ||
488 | |||
489 | #define DO_TRN(NAME, TYPE, H) \ | ||
490 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ | ||
491 | DO_TRN(sve_trn_b, uint8_t, H1) | ||
492 | DO_TRN(sve_trn_h, uint16_t, H1_2) | ||
493 | DO_TRN(sve_trn_s, uint32_t, H1_4) | ||
494 | -DO_TRN(sve_trn_d, uint64_t, ) | ||
495 | +DO_TRN(sve_trn_d, uint64_t, H1_8) | ||
496 | DO_TRN(sve2_trn_q, Int128, ) | ||
497 | |||
498 | #undef DO_ZIP | ||
499 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ | ||
500 | #define DO_CMP_PPZZ_S(NAME, TYPE, OP) \ | ||
501 | DO_CMP_PPZZ(NAME, TYPE, OP, H1_4, 0x1111111111111111ull) | ||
502 | #define DO_CMP_PPZZ_D(NAME, TYPE, OP) \ | ||
503 | - DO_CMP_PPZZ(NAME, TYPE, OP, , 0x0101010101010101ull) | ||
504 | + DO_CMP_PPZZ(NAME, TYPE, OP, H1_8, 0x0101010101010101ull) | ||
505 | |||
506 | DO_CMP_PPZZ_B(sve_cmpeq_ppzz_b, uint8_t, ==) | ||
507 | DO_CMP_PPZZ_H(sve_cmpeq_ppzz_h, uint16_t, ==) | ||
508 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ | ||
509 | #define DO_CMP_PPZI_S(NAME, TYPE, OP) \ | ||
510 | DO_CMP_PPZI(NAME, TYPE, OP, H1_4, 0x1111111111111111ull) | ||
511 | #define DO_CMP_PPZI_D(NAME, TYPE, OP) \ | ||
512 | - DO_CMP_PPZI(NAME, TYPE, OP, , 0x0101010101010101ull) | ||
513 | + DO_CMP_PPZI(NAME, TYPE, OP, H1_8, 0x0101010101010101ull) | ||
514 | |||
515 | DO_CMP_PPZI_B(sve_cmpeq_ppzi_b, uint8_t, ==) | ||
516 | DO_CMP_PPZI_H(sve_cmpeq_ppzi_h, uint16_t, ==) | ||
517 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc) \ | ||
518 | |||
519 | DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero) | ||
520 | DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero) | ||
521 | -DO_REDUCE(sve_faddv_d, float64, , add, float64_zero) | ||
522 | +DO_REDUCE(sve_faddv_d, float64, H1_8, add, float64_zero) | ||
523 | |||
524 | /* Identity is floatN_default_nan, without the function call. */ | ||
525 | DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00) | ||
526 | DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000) | ||
527 | -DO_REDUCE(sve_fminnmv_d, float64, , minnum, 0x7FF8000000000000ULL) | ||
528 | +DO_REDUCE(sve_fminnmv_d, float64, H1_8, minnum, 0x7FF8000000000000ULL) | ||
529 | |||
530 | DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00) | ||
531 | DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000) | ||
532 | -DO_REDUCE(sve_fmaxnmv_d, float64, , maxnum, 0x7FF8000000000000ULL) | ||
533 | +DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, maxnum, 0x7FF8000000000000ULL) | ||
534 | |||
535 | DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity) | ||
536 | DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity) | ||
537 | -DO_REDUCE(sve_fminv_d, float64, , min, float64_infinity) | ||
538 | +DO_REDUCE(sve_fminv_d, float64, H1_8, min, float64_infinity) | ||
539 | |||
540 | DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity)) | ||
541 | DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity)) | ||
542 | -DO_REDUCE(sve_fmaxv_d, float64, , max, float64_chs(float64_infinity)) | ||
543 | +DO_REDUCE(sve_fmaxv_d, float64, H1_8, max, float64_chs(float64_infinity)) | ||
544 | |||
545 | #undef DO_REDUCE | ||
546 | |||
547 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ | ||
548 | |||
549 | DO_ZPZZ_FP(sve_fadd_h, uint16_t, H1_2, float16_add) | ||
550 | DO_ZPZZ_FP(sve_fadd_s, uint32_t, H1_4, float32_add) | ||
551 | -DO_ZPZZ_FP(sve_fadd_d, uint64_t, , float64_add) | ||
552 | +DO_ZPZZ_FP(sve_fadd_d, uint64_t, H1_8, float64_add) | ||
553 | |||
554 | DO_ZPZZ_FP(sve_fsub_h, uint16_t, H1_2, float16_sub) | ||
555 | DO_ZPZZ_FP(sve_fsub_s, uint32_t, H1_4, float32_sub) | ||
556 | -DO_ZPZZ_FP(sve_fsub_d, uint64_t, , float64_sub) | ||
557 | +DO_ZPZZ_FP(sve_fsub_d, uint64_t, H1_8, float64_sub) | ||
558 | |||
559 | DO_ZPZZ_FP(sve_fmul_h, uint16_t, H1_2, float16_mul) | ||
560 | DO_ZPZZ_FP(sve_fmul_s, uint32_t, H1_4, float32_mul) | ||
561 | -DO_ZPZZ_FP(sve_fmul_d, uint64_t, , float64_mul) | ||
562 | +DO_ZPZZ_FP(sve_fmul_d, uint64_t, H1_8, float64_mul) | ||
563 | |||
564 | DO_ZPZZ_FP(sve_fdiv_h, uint16_t, H1_2, float16_div) | ||
565 | DO_ZPZZ_FP(sve_fdiv_s, uint32_t, H1_4, float32_div) | ||
566 | -DO_ZPZZ_FP(sve_fdiv_d, uint64_t, , float64_div) | ||
567 | +DO_ZPZZ_FP(sve_fdiv_d, uint64_t, H1_8, float64_div) | ||
568 | |||
569 | DO_ZPZZ_FP(sve_fmin_h, uint16_t, H1_2, float16_min) | ||
570 | DO_ZPZZ_FP(sve_fmin_s, uint32_t, H1_4, float32_min) | ||
571 | -DO_ZPZZ_FP(sve_fmin_d, uint64_t, , float64_min) | ||
572 | +DO_ZPZZ_FP(sve_fmin_d, uint64_t, H1_8, float64_min) | ||
573 | |||
574 | DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max) | ||
575 | DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max) | ||
576 | -DO_ZPZZ_FP(sve_fmax_d, uint64_t, , float64_max) | ||
577 | +DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max) | ||
578 | |||
579 | DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum) | ||
580 | DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum) | ||
581 | -DO_ZPZZ_FP(sve_fminnum_d, uint64_t, , float64_minnum) | ||
582 | +DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum) | ||
583 | |||
584 | DO_ZPZZ_FP(sve_fmaxnum_h, uint16_t, H1_2, float16_maxnum) | ||
585 | DO_ZPZZ_FP(sve_fmaxnum_s, uint32_t, H1_4, float32_maxnum) | ||
586 | -DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t, , float64_maxnum) | ||
587 | +DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t, H1_8, float64_maxnum) | ||
588 | |||
589 | static inline float16 abd_h(float16 a, float16 b, float_status *s) | ||
590 | { | ||
591 | @@ -XXX,XX +XXX,XX @@ static inline float64 abd_d(float64 a, float64 b, float_status *s) | ||
592 | |||
593 | DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h) | ||
594 | DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s) | ||
595 | -DO_ZPZZ_FP(sve_fabd_d, uint64_t, , abd_d) | ||
596 | +DO_ZPZZ_FP(sve_fabd_d, uint64_t, H1_8, abd_d) | ||
597 | |||
598 | static inline float64 scalbn_d(float64 a, int64_t b, float_status *s) | ||
599 | { | ||
600 | @@ -XXX,XX +XXX,XX @@ static inline float64 scalbn_d(float64 a, int64_t b, float_status *s) | ||
601 | |||
602 | DO_ZPZZ_FP(sve_fscalbn_h, int16_t, H1_2, float16_scalbn) | ||
603 | DO_ZPZZ_FP(sve_fscalbn_s, int32_t, H1_4, float32_scalbn) | ||
604 | -DO_ZPZZ_FP(sve_fscalbn_d, int64_t, , scalbn_d) | ||
605 | +DO_ZPZZ_FP(sve_fscalbn_d, int64_t, H1_8, scalbn_d) | ||
606 | |||
607 | DO_ZPZZ_FP(sve_fmulx_h, uint16_t, H1_2, helper_advsimd_mulxh) | ||
608 | DO_ZPZZ_FP(sve_fmulx_s, uint32_t, H1_4, helper_vfp_mulxs) | ||
609 | -DO_ZPZZ_FP(sve_fmulx_d, uint64_t, , helper_vfp_mulxd) | ||
610 | +DO_ZPZZ_FP(sve_fmulx_d, uint64_t, H1_8, helper_vfp_mulxd) | ||
611 | |||
612 | #undef DO_ZPZZ_FP | ||
613 | |||
614 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vg, uint64_t scalar, \ | ||
615 | |||
616 | DO_ZPZS_FP(sve_fadds_h, float16, H1_2, float16_add) | ||
617 | DO_ZPZS_FP(sve_fadds_s, float32, H1_4, float32_add) | ||
618 | -DO_ZPZS_FP(sve_fadds_d, float64, , float64_add) | ||
619 | +DO_ZPZS_FP(sve_fadds_d, float64, H1_8, float64_add) | ||
620 | |||
621 | DO_ZPZS_FP(sve_fsubs_h, float16, H1_2, float16_sub) | ||
622 | DO_ZPZS_FP(sve_fsubs_s, float32, H1_4, float32_sub) | ||
623 | -DO_ZPZS_FP(sve_fsubs_d, float64, , float64_sub) | ||
624 | +DO_ZPZS_FP(sve_fsubs_d, float64, H1_8, float64_sub) | ||
625 | |||
626 | DO_ZPZS_FP(sve_fmuls_h, float16, H1_2, float16_mul) | ||
627 | DO_ZPZS_FP(sve_fmuls_s, float32, H1_4, float32_mul) | ||
628 | -DO_ZPZS_FP(sve_fmuls_d, float64, , float64_mul) | ||
629 | +DO_ZPZS_FP(sve_fmuls_d, float64, H1_8, float64_mul) | ||
630 | |||
631 | static inline float16 subr_h(float16 a, float16 b, float_status *s) | ||
632 | { | ||
633 | @@ -XXX,XX +XXX,XX @@ static inline float64 subr_d(float64 a, float64 b, float_status *s) | ||
634 | |||
635 | DO_ZPZS_FP(sve_fsubrs_h, float16, H1_2, subr_h) | ||
636 | DO_ZPZS_FP(sve_fsubrs_s, float32, H1_4, subr_s) | ||
637 | -DO_ZPZS_FP(sve_fsubrs_d, float64, , subr_d) | ||
638 | +DO_ZPZS_FP(sve_fsubrs_d, float64, H1_8, subr_d) | ||
639 | |||
640 | DO_ZPZS_FP(sve_fmaxnms_h, float16, H1_2, float16_maxnum) | ||
641 | DO_ZPZS_FP(sve_fmaxnms_s, float32, H1_4, float32_maxnum) | ||
642 | -DO_ZPZS_FP(sve_fmaxnms_d, float64, , float64_maxnum) | ||
643 | +DO_ZPZS_FP(sve_fmaxnms_d, float64, H1_8, float64_maxnum) | ||
644 | |||
645 | DO_ZPZS_FP(sve_fminnms_h, float16, H1_2, float16_minnum) | ||
646 | DO_ZPZS_FP(sve_fminnms_s, float32, H1_4, float32_minnum) | ||
647 | -DO_ZPZS_FP(sve_fminnms_d, float64, , float64_minnum) | ||
648 | +DO_ZPZS_FP(sve_fminnms_d, float64, H1_8, float64_minnum) | ||
649 | |||
650 | DO_ZPZS_FP(sve_fmaxs_h, float16, H1_2, float16_max) | ||
651 | DO_ZPZS_FP(sve_fmaxs_s, float32, H1_4, float32_max) | ||
652 | -DO_ZPZS_FP(sve_fmaxs_d, float64, , float64_max) | ||
653 | +DO_ZPZS_FP(sve_fmaxs_d, float64, H1_8, float64_max) | ||
654 | |||
655 | DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min) | ||
656 | DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min) | ||
657 | -DO_ZPZS_FP(sve_fmins_d, float64, , float64_min) | ||
658 | +DO_ZPZS_FP(sve_fmins_d, float64, H1_8, float64_min) | ||
659 | |||
660 | /* Fully general two-operand expander, controlled by a predicate, | ||
661 | * With the extra float_status parameter. | ||
662 | @@ -XXX,XX +XXX,XX @@ static inline uint64_t vfp_float64_to_uint64_rtz(float64 f, float_status *s) | ||
663 | DO_ZPZ_FP(sve_fcvt_sh, uint32_t, H1_4, sve_f32_to_f16) | ||
664 | DO_ZPZ_FP(sve_fcvt_hs, uint32_t, H1_4, sve_f16_to_f32) | ||
665 | DO_ZPZ_FP(sve_bfcvt, uint32_t, H1_4, float32_to_bfloat16) | ||
666 | -DO_ZPZ_FP(sve_fcvt_dh, uint64_t, , sve_f64_to_f16) | ||
667 | -DO_ZPZ_FP(sve_fcvt_hd, uint64_t, , sve_f16_to_f64) | ||
668 | -DO_ZPZ_FP(sve_fcvt_ds, uint64_t, , float64_to_float32) | ||
669 | -DO_ZPZ_FP(sve_fcvt_sd, uint64_t, , float32_to_float64) | ||
670 | +DO_ZPZ_FP(sve_fcvt_dh, uint64_t, H1_8, sve_f64_to_f16) | ||
671 | +DO_ZPZ_FP(sve_fcvt_hd, uint64_t, H1_8, sve_f16_to_f64) | ||
672 | +DO_ZPZ_FP(sve_fcvt_ds, uint64_t, H1_8, float64_to_float32) | ||
673 | +DO_ZPZ_FP(sve_fcvt_sd, uint64_t, H1_8, float32_to_float64) | ||
674 | |||
675 | DO_ZPZ_FP(sve_fcvtzs_hh, uint16_t, H1_2, vfp_float16_to_int16_rtz) | ||
676 | DO_ZPZ_FP(sve_fcvtzs_hs, uint32_t, H1_4, helper_vfp_tosizh) | ||
677 | DO_ZPZ_FP(sve_fcvtzs_ss, uint32_t, H1_4, helper_vfp_tosizs) | ||
678 | -DO_ZPZ_FP(sve_fcvtzs_hd, uint64_t, , vfp_float16_to_int64_rtz) | ||
679 | -DO_ZPZ_FP(sve_fcvtzs_sd, uint64_t, , vfp_float32_to_int64_rtz) | ||
680 | -DO_ZPZ_FP(sve_fcvtzs_ds, uint64_t, , helper_vfp_tosizd) | ||
681 | -DO_ZPZ_FP(sve_fcvtzs_dd, uint64_t, , vfp_float64_to_int64_rtz) | ||
682 | +DO_ZPZ_FP(sve_fcvtzs_hd, uint64_t, H1_8, vfp_float16_to_int64_rtz) | ||
683 | +DO_ZPZ_FP(sve_fcvtzs_sd, uint64_t, H1_8, vfp_float32_to_int64_rtz) | ||
684 | +DO_ZPZ_FP(sve_fcvtzs_ds, uint64_t, H1_8, helper_vfp_tosizd) | ||
685 | +DO_ZPZ_FP(sve_fcvtzs_dd, uint64_t, H1_8, vfp_float64_to_int64_rtz) | ||
686 | |||
687 | DO_ZPZ_FP(sve_fcvtzu_hh, uint16_t, H1_2, vfp_float16_to_uint16_rtz) | ||
688 | DO_ZPZ_FP(sve_fcvtzu_hs, uint32_t, H1_4, helper_vfp_touizh) | ||
689 | DO_ZPZ_FP(sve_fcvtzu_ss, uint32_t, H1_4, helper_vfp_touizs) | ||
690 | -DO_ZPZ_FP(sve_fcvtzu_hd, uint64_t, , vfp_float16_to_uint64_rtz) | ||
691 | -DO_ZPZ_FP(sve_fcvtzu_sd, uint64_t, , vfp_float32_to_uint64_rtz) | ||
692 | -DO_ZPZ_FP(sve_fcvtzu_ds, uint64_t, , helper_vfp_touizd) | ||
693 | -DO_ZPZ_FP(sve_fcvtzu_dd, uint64_t, , vfp_float64_to_uint64_rtz) | ||
694 | +DO_ZPZ_FP(sve_fcvtzu_hd, uint64_t, H1_8, vfp_float16_to_uint64_rtz) | ||
695 | +DO_ZPZ_FP(sve_fcvtzu_sd, uint64_t, H1_8, vfp_float32_to_uint64_rtz) | ||
696 | +DO_ZPZ_FP(sve_fcvtzu_ds, uint64_t, H1_8, helper_vfp_touizd) | ||
697 | +DO_ZPZ_FP(sve_fcvtzu_dd, uint64_t, H1_8, vfp_float64_to_uint64_rtz) | ||
698 | |||
699 | DO_ZPZ_FP(sve_frint_h, uint16_t, H1_2, helper_advsimd_rinth) | ||
700 | DO_ZPZ_FP(sve_frint_s, uint32_t, H1_4, helper_rints) | ||
701 | -DO_ZPZ_FP(sve_frint_d, uint64_t, , helper_rintd) | ||
702 | +DO_ZPZ_FP(sve_frint_d, uint64_t, H1_8, helper_rintd) | ||
703 | |||
704 | DO_ZPZ_FP(sve_frintx_h, uint16_t, H1_2, float16_round_to_int) | ||
705 | DO_ZPZ_FP(sve_frintx_s, uint32_t, H1_4, float32_round_to_int) | ||
706 | -DO_ZPZ_FP(sve_frintx_d, uint64_t, , float64_round_to_int) | ||
707 | +DO_ZPZ_FP(sve_frintx_d, uint64_t, H1_8, float64_round_to_int) | ||
708 | |||
709 | DO_ZPZ_FP(sve_frecpx_h, uint16_t, H1_2, helper_frecpx_f16) | ||
710 | DO_ZPZ_FP(sve_frecpx_s, uint32_t, H1_4, helper_frecpx_f32) | ||
711 | -DO_ZPZ_FP(sve_frecpx_d, uint64_t, , helper_frecpx_f64) | ||
712 | +DO_ZPZ_FP(sve_frecpx_d, uint64_t, H1_8, helper_frecpx_f64) | ||
713 | |||
714 | DO_ZPZ_FP(sve_fsqrt_h, uint16_t, H1_2, float16_sqrt) | ||
715 | DO_ZPZ_FP(sve_fsqrt_s, uint32_t, H1_4, float32_sqrt) | ||
716 | -DO_ZPZ_FP(sve_fsqrt_d, uint64_t, , float64_sqrt) | ||
717 | +DO_ZPZ_FP(sve_fsqrt_d, uint64_t, H1_8, float64_sqrt) | ||
718 | |||
719 | DO_ZPZ_FP(sve_scvt_hh, uint16_t, H1_2, int16_to_float16) | ||
720 | DO_ZPZ_FP(sve_scvt_sh, uint32_t, H1_4, int32_to_float16) | ||
721 | DO_ZPZ_FP(sve_scvt_ss, uint32_t, H1_4, int32_to_float32) | ||
722 | -DO_ZPZ_FP(sve_scvt_sd, uint64_t, , int32_to_float64) | ||
723 | -DO_ZPZ_FP(sve_scvt_dh, uint64_t, , int64_to_float16) | ||
724 | -DO_ZPZ_FP(sve_scvt_ds, uint64_t, , int64_to_float32) | ||
725 | -DO_ZPZ_FP(sve_scvt_dd, uint64_t, , int64_to_float64) | ||
726 | +DO_ZPZ_FP(sve_scvt_sd, uint64_t, H1_8, int32_to_float64) | ||
727 | +DO_ZPZ_FP(sve_scvt_dh, uint64_t, H1_8, int64_to_float16) | ||
728 | +DO_ZPZ_FP(sve_scvt_ds, uint64_t, H1_8, int64_to_float32) | ||
729 | +DO_ZPZ_FP(sve_scvt_dd, uint64_t, H1_8, int64_to_float64) | ||
730 | |||
731 | DO_ZPZ_FP(sve_ucvt_hh, uint16_t, H1_2, uint16_to_float16) | ||
732 | DO_ZPZ_FP(sve_ucvt_sh, uint32_t, H1_4, uint32_to_float16) | ||
733 | DO_ZPZ_FP(sve_ucvt_ss, uint32_t, H1_4, uint32_to_float32) | ||
734 | -DO_ZPZ_FP(sve_ucvt_sd, uint64_t, , uint32_to_float64) | ||
735 | -DO_ZPZ_FP(sve_ucvt_dh, uint64_t, , uint64_to_float16) | ||
736 | -DO_ZPZ_FP(sve_ucvt_ds, uint64_t, , uint64_to_float32) | ||
737 | -DO_ZPZ_FP(sve_ucvt_dd, uint64_t, , uint64_to_float64) | ||
738 | +DO_ZPZ_FP(sve_ucvt_sd, uint64_t, H1_8, uint32_to_float64) | ||
739 | +DO_ZPZ_FP(sve_ucvt_dh, uint64_t, H1_8, uint64_to_float16) | ||
740 | +DO_ZPZ_FP(sve_ucvt_ds, uint64_t, H1_8, uint64_to_float32) | ||
741 | +DO_ZPZ_FP(sve_ucvt_dd, uint64_t, H1_8, uint64_to_float64) | ||
742 | |||
743 | static int16_t do_float16_logb_as_int(float16 a, float_status *s) | ||
744 | { | ||
745 | @@ -XXX,XX +XXX,XX @@ static int64_t do_float64_logb_as_int(float64 a, float_status *s) | ||
746 | |||
747 | DO_ZPZ_FP(flogb_h, float16, H1_2, do_float16_logb_as_int) | ||
748 | DO_ZPZ_FP(flogb_s, float32, H1_4, do_float32_logb_as_int) | ||
749 | -DO_ZPZ_FP(flogb_d, float64, , do_float64_logb_as_int) | ||
750 | +DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int) | ||
751 | |||
752 | #undef DO_ZPZ_FP | ||
753 | |||
754 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ | ||
755 | #define DO_FPCMP_PPZZ_S(NAME, OP) \ | ||
756 | DO_FPCMP_PPZZ(NAME##_s, float32, H1_4, OP) | ||
757 | #define DO_FPCMP_PPZZ_D(NAME, OP) \ | ||
758 | - DO_FPCMP_PPZZ(NAME##_d, float64, , OP) | ||
759 | + DO_FPCMP_PPZZ(NAME##_d, float64, H1_8, OP) | ||
760 | |||
761 | #define DO_FPCMP_PPZZ_ALL(NAME, OP) \ | ||
762 | DO_FPCMP_PPZZ_H(NAME, OP) \ | ||
763 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vg, \ | ||
764 | #define DO_FPCMP_PPZ0_S(NAME, OP) \ | ||
765 | DO_FPCMP_PPZ0(NAME##_s, float32, H1_4, OP) | ||
766 | #define DO_FPCMP_PPZ0_D(NAME, OP) \ | ||
767 | - DO_FPCMP_PPZ0(NAME##_d, float64, , OP) | ||
768 | + DO_FPCMP_PPZ0(NAME##_d, float64, H1_8, OP) | ||
769 | |||
770 | #define DO_FPCMP_PPZ0_ALL(NAME, OP) \ | ||
771 | DO_FPCMP_PPZ0_H(NAME, OP) \ | ||
772 | @@ -XXX,XX +XXX,XX @@ DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t) | ||
773 | DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t, int8_t) | ||
774 | DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t) | ||
775 | DO_LD_PRIM_1(ld1bss, H1_4, uint32_t, int8_t) | ||
776 | -DO_LD_PRIM_1(ld1bdu, , uint64_t, uint8_t) | ||
777 | -DO_LD_PRIM_1(ld1bds, , uint64_t, int8_t) | ||
778 | +DO_LD_PRIM_1(ld1bdu, H1_8, uint64_t, uint8_t) | ||
779 | +DO_LD_PRIM_1(ld1bds, H1_8, uint64_t, int8_t) | ||
780 | |||
781 | #define DO_ST_PRIM_1(NAME, H, TE, TM) \ | ||
782 | DO_ST_HOST(st1##NAME, H, TE, TM, stb_p) \ | ||
783 | @@ -XXX,XX +XXX,XX @@ DO_LD_PRIM_1(ld1bds, , uint64_t, int8_t) | ||
784 | DO_ST_PRIM_1(bb, H1, uint8_t, uint8_t) | ||
785 | DO_ST_PRIM_1(bh, H1_2, uint16_t, uint8_t) | ||
786 | DO_ST_PRIM_1(bs, H1_4, uint32_t, uint8_t) | ||
787 | -DO_ST_PRIM_1(bd, , uint64_t, uint8_t) | ||
788 | +DO_ST_PRIM_1(bd, H1_8, uint64_t, uint8_t) | ||
789 | |||
790 | #define DO_LD_PRIM_2(NAME, H, TE, TM, LD) \ | ||
791 | DO_LD_HOST(ld1##NAME##_be, H, TE, TM, LD##_be_p) \ | ||
792 | @@ -XXX,XX +XXX,XX @@ DO_ST_PRIM_1(bd, , uint64_t, uint8_t) | ||
793 | DO_LD_PRIM_2(hh, H1_2, uint16_t, uint16_t, lduw) | ||
794 | DO_LD_PRIM_2(hsu, H1_4, uint32_t, uint16_t, lduw) | ||
795 | DO_LD_PRIM_2(hss, H1_4, uint32_t, int16_t, lduw) | ||
796 | -DO_LD_PRIM_2(hdu, , uint64_t, uint16_t, lduw) | ||
797 | -DO_LD_PRIM_2(hds, , uint64_t, int16_t, lduw) | ||
798 | +DO_LD_PRIM_2(hdu, H1_8, uint64_t, uint16_t, lduw) | ||
799 | +DO_LD_PRIM_2(hds, H1_8, uint64_t, int16_t, lduw) | ||
800 | |||
801 | DO_ST_PRIM_2(hh, H1_2, uint16_t, uint16_t, stw) | ||
802 | DO_ST_PRIM_2(hs, H1_4, uint32_t, uint16_t, stw) | ||
803 | -DO_ST_PRIM_2(hd, , uint64_t, uint16_t, stw) | ||
804 | +DO_ST_PRIM_2(hd, H1_8, uint64_t, uint16_t, stw) | ||
805 | |||
806 | DO_LD_PRIM_2(ss, H1_4, uint32_t, uint32_t, ldl) | ||
807 | -DO_LD_PRIM_2(sdu, , uint64_t, uint32_t, ldl) | ||
808 | -DO_LD_PRIM_2(sds, , uint64_t, int32_t, ldl) | ||
809 | +DO_LD_PRIM_2(sdu, H1_8, uint64_t, uint32_t, ldl) | ||
810 | +DO_LD_PRIM_2(sds, H1_8, uint64_t, int32_t, ldl) | ||
811 | |||
812 | DO_ST_PRIM_2(ss, H1_4, uint32_t, uint32_t, stl) | ||
813 | -DO_ST_PRIM_2(sd, , uint64_t, uint32_t, stl) | ||
814 | +DO_ST_PRIM_2(sd, H1_8, uint64_t, uint32_t, stl) | ||
815 | |||
816 | -DO_LD_PRIM_2(dd, , uint64_t, uint64_t, ldq) | ||
817 | -DO_ST_PRIM_2(dd, , uint64_t, uint64_t, stq) | ||
818 | +DO_LD_PRIM_2(dd, H1_8, uint64_t, uint64_t, ldq) | ||
819 | +DO_ST_PRIM_2(dd, H1_8, uint64_t, uint64_t, stq) | ||
820 | |||
821 | #undef DO_LD_TLB | ||
822 | #undef DO_ST_TLB | ||
823 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \ | ||
824 | |||
825 | DO_FCVTNT(sve_bfcvtnt, uint32_t, uint16_t, H1_4, H1_2, float32_to_bfloat16) | ||
826 | DO_FCVTNT(sve2_fcvtnt_sh, uint32_t, uint16_t, H1_4, H1_2, sve_f32_to_f16) | ||
827 | -DO_FCVTNT(sve2_fcvtnt_ds, uint64_t, uint32_t, , H1_4, float64_to_float32) | ||
828 | +DO_FCVTNT(sve2_fcvtnt_ds, uint64_t, uint32_t, H1_8, H1_4, float64_to_float32) | ||
829 | |||
830 | #define DO_FCVTLT(NAME, TYPEW, TYPEN, HW, HN, OP) \ | ||
831 | void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \ | ||
832 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \ | ||
833 | } | ||
834 | |||
835 | DO_FCVTLT(sve2_fcvtlt_hs, uint32_t, uint16_t, H1_4, H1_2, sve_f16_to_f32) | ||
836 | -DO_FCVTLT(sve2_fcvtlt_sd, uint64_t, uint32_t, , H1_4, float32_to_float64) | ||
837 | +DO_FCVTLT(sve2_fcvtlt_sd, uint64_t, uint32_t, H1_8, H1_4, float32_to_float64) | ||
838 | |||
839 | #undef DO_FCVTLT | ||
840 | #undef DO_FCVTNT | ||
841 | diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c | ||
842 | index XXXXXXX..XXXXXXX 100644 | ||
843 | --- a/target/arm/vec_helper.c | ||
844 | +++ b/target/arm/vec_helper.c | ||
845 | @@ -XXX,XX +XXX,XX @@ DO_DOT_IDX(gvec_sdot_idx_b, int32_t, int8_t, int8_t, H4) | ||
846 | DO_DOT_IDX(gvec_udot_idx_b, uint32_t, uint8_t, uint8_t, H4) | ||
847 | DO_DOT_IDX(gvec_sudot_idx_b, int32_t, int8_t, uint8_t, H4) | ||
848 | DO_DOT_IDX(gvec_usdot_idx_b, int32_t, uint8_t, int8_t, H4) | ||
849 | -DO_DOT_IDX(gvec_sdot_idx_h, int64_t, int16_t, int16_t, ) | ||
850 | -DO_DOT_IDX(gvec_udot_idx_h, uint64_t, uint16_t, uint16_t, ) | ||
851 | +DO_DOT_IDX(gvec_sdot_idx_h, int64_t, int16_t, int16_t, H8) | ||
852 | +DO_DOT_IDX(gvec_udot_idx_h, uint64_t, uint16_t, uint16_t, H8) | ||
853 | |||
854 | void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm, | ||
855 | void *vfpst, uint32_t desc) | ||
856 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ | ||
857 | |||
858 | DO_MUL_IDX(gvec_mul_idx_h, uint16_t, H2) | ||
859 | DO_MUL_IDX(gvec_mul_idx_s, uint32_t, H4) | ||
860 | -DO_MUL_IDX(gvec_mul_idx_d, uint64_t, ) | ||
861 | +DO_MUL_IDX(gvec_mul_idx_d, uint64_t, H8) | ||
862 | |||
863 | #undef DO_MUL_IDX | ||
864 | |||
865 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ | ||
866 | |||
867 | DO_MLA_IDX(gvec_mla_idx_h, uint16_t, +, H2) | ||
868 | DO_MLA_IDX(gvec_mla_idx_s, uint32_t, +, H4) | ||
869 | -DO_MLA_IDX(gvec_mla_idx_d, uint64_t, +, ) | ||
870 | +DO_MLA_IDX(gvec_mla_idx_d, uint64_t, +, H8) | ||
871 | |||
872 | DO_MLA_IDX(gvec_mls_idx_h, uint16_t, -, H2) | ||
873 | DO_MLA_IDX(gvec_mls_idx_s, uint32_t, -, H4) | ||
874 | -DO_MLA_IDX(gvec_mls_idx_d, uint64_t, -, ) | ||
875 | +DO_MLA_IDX(gvec_mls_idx_d, uint64_t, -, H8) | ||
876 | |||
877 | #undef DO_MLA_IDX | ||
878 | |||
879 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ | ||
880 | |||
881 | DO_FMUL_IDX(gvec_fmul_idx_h, nop, float16, H2) | ||
882 | DO_FMUL_IDX(gvec_fmul_idx_s, nop, float32, H4) | ||
883 | -DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64, ) | ||
884 | +DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64, H8) | ||
885 | |||
886 | /* | ||
887 | * Non-fused multiply-accumulate operations, for Neon. NB that unlike | ||
888 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \ | ||
889 | |||
890 | DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2) | ||
891 | DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4) | ||
892 | -DO_FMLA_IDX(gvec_fmla_idx_d, float64, ) | ||
893 | +DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8) | ||
894 | |||
895 | #undef DO_FMLA_IDX | ||
896 | |||
897 | -- | ||
898 | 2.20.1 | ||
899 | |||
900 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | MVE has an FPSCR.QC bit similar to the A-profile Neon one; when MVE | ||
2 | is implemented make the bit writeable, both in the generic "load and | ||
3 | store FPSCR" helper functions and in the code for handling the NZCVQC | ||
4 | sysreg which we had previously left as "TODO when we implement MVE". | ||
5 | 1 | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20210614151007.4545-3-peter.maydell@linaro.org | ||
9 | --- | ||
10 | target/arm/translate-vfp.c | 30 +++++++++++++++++++++--------- | ||
11 | target/arm/vfp_helper.c | 3 ++- | ||
12 | 2 files changed, 23 insertions(+), 10 deletions(-) | ||
13 | |||
14 | diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/arm/translate-vfp.c | ||
17 | +++ b/target/arm/translate-vfp.c | ||
18 | @@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno, | ||
19 | { | ||
20 | TCGv_i32 fpscr; | ||
21 | tmp = loadfn(s, opaque); | ||
22 | - /* | ||
23 | - * TODO: when we implement MVE, write the QC bit. | ||
24 | - * For non-MVE, QC is RES0. | ||
25 | - */ | ||
26 | + if (dc_isar_feature(aa32_mve, s)) { | ||
27 | + /* QC is only present for MVE; otherwise RES0 */ | ||
28 | + TCGv_i32 qc = tcg_temp_new_i32(); | ||
29 | + tcg_gen_andi_i32(qc, tmp, FPCR_QC); | ||
30 | + /* | ||
31 | + * The 4 vfp.qc[] fields need only be "zero" vs "non-zero"; | ||
32 | + * here writing the same value into all elements is simplest. | ||
33 | + */ | ||
34 | + tcg_gen_gvec_dup_i32(MO_32, offsetof(CPUARMState, vfp.qc), | ||
35 | + 16, 16, qc); | ||
36 | + } | ||
37 | tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK); | ||
38 | fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]); | ||
39 | tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK); | ||
40 | @@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno, | ||
41 | break; | ||
42 | } | ||
43 | |||
44 | + if (regno == ARM_VFP_FPSCR_NZCVQC && !dc_isar_feature(aa32_mve, s)) { | ||
45 | + /* QC is RES0 without MVE, so NZCVQC simplifies to NZCV */ | ||
46 | + regno = QEMU_VFP_FPSCR_NZCV; | ||
47 | + } | ||
48 | + | ||
49 | switch (regno) { | ||
50 | case ARM_VFP_FPSCR: | ||
51 | tmp = tcg_temp_new_i32(); | ||
52 | @@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno, | ||
53 | storefn(s, opaque, tmp); | ||
54 | break; | ||
55 | case ARM_VFP_FPSCR_NZCVQC: | ||
56 | - /* | ||
57 | - * TODO: MVE has a QC bit, which we probably won't store | ||
58 | - * in the xregs[] field. For non-MVE, where QC is RES0, | ||
59 | - * we can just fall through to the FPSCR_NZCV case. | ||
60 | - */ | ||
61 | + tmp = tcg_temp_new_i32(); | ||
62 | + gen_helper_vfp_get_fpscr(tmp, cpu_env); | ||
63 | + tcg_gen_andi_i32(tmp, tmp, FPCR_NZCVQC_MASK); | ||
64 | + storefn(s, opaque, tmp); | ||
65 | + break; | ||
66 | case QEMU_VFP_FPSCR_NZCV: | ||
67 | /* | ||
68 | * Read just NZCV; this is a special case to avoid the | ||
69 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
70 | index XXXXXXX..XXXXXXX 100644 | ||
71 | --- a/target/arm/vfp_helper.c | ||
72 | +++ b/target/arm/vfp_helper.c | ||
73 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val) | ||
74 | FPCR_LTPSIZE_LENGTH); | ||
75 | } | ||
76 | |||
77 | - if (arm_feature(env, ARM_FEATURE_NEON)) { | ||
78 | + if (arm_feature(env, ARM_FEATURE_NEON) || | ||
79 | + cpu_isar_feature(aa32_mve, cpu)) { | ||
80 | /* | ||
81 | * The bit we set within fpscr_q is arbitrary; the register as a | ||
82 | * whole being zero/non-zero is what counts. | ||
83 | -- | ||
84 | 2.20.1 | ||
85 | |||
86 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | When MVE is supported, the VPR register has a place on the exception | ||
2 | stack frame in a previously reserved slot just above the FPSCR. | ||
3 | It must also be zeroed in various situations when we invalidate | ||
4 | FPU context. | ||
5 | 1 | ||
6 | Update the code which handles the stack frames (exception entry and | ||
7 | exit code, VLLDM, and VLSTM) to save/restore VPR. | ||
8 | |||
9 | Update code which invalidates FP registers (mostly also exception | ||
10 | entry and exit code, but also VSCCLRM and the code in | ||
11 | full_vfp_access_check() that corresponds to the ExecuteFPCheck() | ||
12 | pseudocode) to zero VPR. | ||
13 | |||
14 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
15 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
16 | Message-id: 20210614151007.4545-4-peter.maydell@linaro.org | ||
17 | --- | ||
18 | target/arm/m_helper.c | 54 +++++++++++++++++++++++++++++------ | ||
19 | target/arm/translate-m-nocp.c | 5 +++- | ||
20 | target/arm/translate-vfp.c | 9 ++++-- | ||
21 | 3 files changed, 57 insertions(+), 11 deletions(-) | ||
22 | |||
23 | diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/target/arm/m_helper.c | ||
26 | +++ b/target/arm/m_helper.c | ||
27 | @@ -XXX,XX +XXX,XX @@ void HELPER(v7m_preserve_fp_state)(CPUARMState *env) | ||
28 | uint32_t shi = extract64(dn, 32, 32); | ||
29 | |||
30 | if (i >= 16) { | ||
31 | - faddr += 8; /* skip the slot for the FPSCR */ | ||
32 | + faddr += 8; /* skip the slot for the FPSCR/VPR */ | ||
33 | } | ||
34 | stacked_ok = stacked_ok && | ||
35 | v7m_stack_write(cpu, faddr, slo, mmu_idx, STACK_LAZYFP) && | ||
36 | @@ -XXX,XX +XXX,XX @@ void HELPER(v7m_preserve_fp_state)(CPUARMState *env) | ||
37 | stacked_ok = stacked_ok && | ||
38 | v7m_stack_write(cpu, fpcar + 0x40, | ||
39 | vfp_get_fpscr(env), mmu_idx, STACK_LAZYFP); | ||
40 | + if (cpu_isar_feature(aa32_mve, cpu)) { | ||
41 | + stacked_ok = stacked_ok && | ||
42 | + v7m_stack_write(cpu, fpcar + 0x44, | ||
43 | + env->v7m.vpr, mmu_idx, STACK_LAZYFP); | ||
44 | + } | ||
45 | } | ||
46 | |||
47 | /* | ||
48 | @@ -XXX,XX +XXX,XX @@ void HELPER(v7m_preserve_fp_state)(CPUARMState *env) | ||
49 | env->v7m.fpccr[is_secure] &= ~R_V7M_FPCCR_LSPACT_MASK; | ||
50 | |||
51 | if (ts) { | ||
52 | - /* Clear s0 to s31 and the FPSCR */ | ||
53 | + /* Clear s0 to s31 and the FPSCR and VPR */ | ||
54 | int i; | ||
55 | |||
56 | for (i = 0; i < 32; i += 2) { | ||
57 | *aa32_vfp_dreg(env, i / 2) = 0; | ||
58 | } | ||
59 | vfp_set_fpscr(env, 0); | ||
60 | + if (cpu_isar_feature(aa32_mve, cpu)) { | ||
61 | + env->v7m.vpr = 0; | ||
62 | + } | ||
63 | } | ||
64 | /* | ||
65 | - * Otherwise s0 to s15 and FPSCR are UNKNOWN; we choose to leave them | ||
66 | + * Otherwise s0 to s15, FPSCR and VPR are UNKNOWN; we choose to leave them | ||
67 | * unchanged. | ||
68 | */ | ||
69 | } | ||
70 | @@ -XXX,XX +XXX,XX @@ static void v7m_update_fpccr(CPUARMState *env, uint32_t frameptr, | ||
71 | void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr) | ||
72 | { | ||
73 | /* fptr is the value of Rn, the frame pointer we store the FP regs to */ | ||
74 | + ARMCPU *cpu = env_archcpu(env); | ||
75 | bool s = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK; | ||
76 | bool lspact = env->v7m.fpccr[s] & R_V7M_FPCCR_LSPACT_MASK; | ||
77 | uintptr_t ra = GETPC(); | ||
78 | @@ -XXX,XX +XXX,XX @@ void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr) | ||
79 | cpu_stl_data_ra(env, faddr + 4, shi, ra); | ||
80 | } | ||
81 | cpu_stl_data_ra(env, fptr + 0x40, vfp_get_fpscr(env), ra); | ||
82 | + if (cpu_isar_feature(aa32_mve, cpu)) { | ||
83 | + cpu_stl_data_ra(env, fptr + 0x44, env->v7m.vpr, ra); | ||
84 | + } | ||
85 | |||
86 | /* | ||
87 | - * If TS is 0 then s0 to s15 and FPSCR are UNKNOWN; we choose to | ||
88 | + * If TS is 0 then s0 to s15, FPSCR and VPR are UNKNOWN; we choose to | ||
89 | * leave them unchanged, matching our choice in v7m_preserve_fp_state. | ||
90 | */ | ||
91 | if (ts) { | ||
92 | @@ -XXX,XX +XXX,XX @@ void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr) | ||
93 | *aa32_vfp_dreg(env, i / 2) = 0; | ||
94 | } | ||
95 | vfp_set_fpscr(env, 0); | ||
96 | + if (cpu_isar_feature(aa32_mve, cpu)) { | ||
97 | + env->v7m.vpr = 0; | ||
98 | + } | ||
99 | } | ||
100 | } else { | ||
101 | v7m_update_fpccr(env, fptr, false); | ||
102 | @@ -XXX,XX +XXX,XX @@ void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr) | ||
103 | |||
104 | void HELPER(v7m_vlldm)(CPUARMState *env, uint32_t fptr) | ||
105 | { | ||
106 | + ARMCPU *cpu = env_archcpu(env); | ||
107 | uintptr_t ra = GETPC(); | ||
108 | |||
109 | /* fptr is the value of Rn, the frame pointer we load the FP regs from */ | ||
110 | @@ -XXX,XX +XXX,XX @@ void HELPER(v7m_vlldm)(CPUARMState *env, uint32_t fptr) | ||
111 | uint32_t faddr = fptr + 4 * i; | ||
112 | |||
113 | if (i >= 16) { | ||
114 | - faddr += 8; /* skip the slot for the FPSCR */ | ||
115 | + faddr += 8; /* skip the slot for the FPSCR and VPR */ | ||
116 | } | ||
117 | |||
118 | slo = cpu_ldl_data_ra(env, faddr, ra); | ||
119 | @@ -XXX,XX +XXX,XX @@ void HELPER(v7m_vlldm)(CPUARMState *env, uint32_t fptr) | ||
120 | } | ||
121 | fpscr = cpu_ldl_data_ra(env, fptr + 0x40, ra); | ||
122 | vfp_set_fpscr(env, fpscr); | ||
123 | + if (cpu_isar_feature(aa32_mve, cpu)) { | ||
124 | + env->v7m.vpr = cpu_ldl_data_ra(env, fptr + 0x44, ra); | ||
125 | + } | ||
126 | } | ||
127 | |||
128 | env->v7m.control[M_REG_S] |= R_V7M_CONTROL_FPCA_MASK; | ||
129 | @@ -XXX,XX +XXX,XX @@ static bool v7m_push_stack(ARMCPU *cpu) | ||
130 | uint32_t shi = extract64(dn, 32, 32); | ||
131 | |||
132 | if (i >= 16) { | ||
133 | - faddr += 8; /* skip the slot for the FPSCR */ | ||
134 | + faddr += 8; /* skip the slot for the FPSCR and VPR */ | ||
135 | } | ||
136 | stacked_ok = stacked_ok && | ||
137 | v7m_stack_write(cpu, faddr, slo, | ||
138 | @@ -XXX,XX +XXX,XX @@ static bool v7m_push_stack(ARMCPU *cpu) | ||
139 | stacked_ok = stacked_ok && | ||
140 | v7m_stack_write(cpu, frameptr + 0x60, | ||
141 | vfp_get_fpscr(env), mmu_idx, STACK_NORMAL); | ||
142 | + if (cpu_isar_feature(aa32_mve, cpu)) { | ||
143 | + stacked_ok = stacked_ok && | ||
144 | + v7m_stack_write(cpu, frameptr + 0x64, | ||
145 | + env->v7m.vpr, mmu_idx, STACK_NORMAL); | ||
146 | + } | ||
147 | if (cpacr_pass) { | ||
148 | for (i = 0; i < ((framesize == 0xa8) ? 32 : 16); i += 2) { | ||
149 | *aa32_vfp_dreg(env, i / 2) = 0; | ||
150 | } | ||
151 | vfp_set_fpscr(env, 0); | ||
152 | + if (cpu_isar_feature(aa32_mve, cpu)) { | ||
153 | + env->v7m.vpr = 0; | ||
154 | + } | ||
155 | } | ||
156 | } else { | ||
157 | /* Lazy stacking enabled, save necessary info to stack later */ | ||
158 | @@ -XXX,XX +XXX,XX @@ static void do_v7m_exception_exit(ARMCPU *cpu) | ||
159 | v7m_exception_taken(cpu, excret, true, false); | ||
160 | } | ||
161 | } | ||
162 | - /* Clear s0..s15 and FPSCR; TODO also VPR when MVE is implemented */ | ||
163 | + /* Clear s0..s15, FPSCR and VPR */ | ||
164 | int i; | ||
165 | |||
166 | for (i = 0; i < 16; i += 2) { | ||
167 | *aa32_vfp_dreg(env, i / 2) = 0; | ||
168 | } | ||
169 | vfp_set_fpscr(env, 0); | ||
170 | + if (cpu_isar_feature(aa32_mve, cpu)) { | ||
171 | + env->v7m.vpr = 0; | ||
172 | + } | ||
173 | } | ||
174 | } | ||
175 | |||
176 | @@ -XXX,XX +XXX,XX @@ static void do_v7m_exception_exit(ARMCPU *cpu) | ||
177 | uint32_t faddr = frameptr + 0x20 + 4 * i; | ||
178 | |||
179 | if (i >= 16) { | ||
180 | - faddr += 8; /* Skip the slot for the FPSCR */ | ||
181 | + faddr += 8; /* Skip the slot for the FPSCR and VPR */ | ||
182 | } | ||
183 | |||
184 | pop_ok = pop_ok && | ||
185 | @@ -XXX,XX +XXX,XX @@ static void do_v7m_exception_exit(ARMCPU *cpu) | ||
186 | if (pop_ok) { | ||
187 | vfp_set_fpscr(env, fpscr); | ||
188 | } | ||
189 | + if (cpu_isar_feature(aa32_mve, cpu)) { | ||
190 | + pop_ok = pop_ok && | ||
191 | + v7m_stack_read(cpu, &env->v7m.vpr, | ||
192 | + frameptr + 0x64, mmu_idx); | ||
193 | + } | ||
194 | if (!pop_ok) { | ||
195 | /* | ||
196 | * These regs are 0 if security extension present; | ||
197 | @@ -XXX,XX +XXX,XX @@ static void do_v7m_exception_exit(ARMCPU *cpu) | ||
198 | *aa32_vfp_dreg(env, i / 2) = 0; | ||
199 | } | ||
200 | vfp_set_fpscr(env, 0); | ||
201 | + if (cpu_isar_feature(aa32_mve, cpu)) { | ||
202 | + env->v7m.vpr = 0; | ||
203 | + } | ||
204 | } | ||
205 | } | ||
206 | } | ||
207 | diff --git a/target/arm/translate-m-nocp.c b/target/arm/translate-m-nocp.c | ||
208 | index XXXXXXX..XXXXXXX 100644 | ||
209 | --- a/target/arm/translate-m-nocp.c | ||
210 | +++ b/target/arm/translate-m-nocp.c | ||
211 | @@ -XXX,XX +XXX,XX @@ static bool trans_VSCCLRM(DisasContext *s, arg_VSCCLRM *a) | ||
212 | btmreg++; | ||
213 | } | ||
214 | assert(btmreg == topreg + 1); | ||
215 | - /* TODO: when MVE is implemented, zero VPR here */ | ||
216 | + if (dc_isar_feature(aa32_mve, s)) { | ||
217 | + TCGv_i32 z32 = tcg_const_i32(0); | ||
218 | + store_cpu_field(z32, v7m.vpr); | ||
219 | + } | ||
220 | return true; | ||
221 | } | ||
222 | |||
223 | diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c | ||
224 | index XXXXXXX..XXXXXXX 100644 | ||
225 | --- a/target/arm/translate-vfp.c | ||
226 | +++ b/target/arm/translate-vfp.c | ||
227 | @@ -XXX,XX +XXX,XX @@ static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled) | ||
228 | |||
229 | if (s->v7m_new_fp_ctxt_needed) { | ||
230 | /* | ||
231 | - * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA | ||
232 | - * and the FPSCR. | ||
233 | + * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA, | ||
234 | + * the FPSCR, and VPR. | ||
235 | */ | ||
236 | TCGv_i32 control, fpscr; | ||
237 | uint32_t bits = R_V7M_CONTROL_FPCA_MASK; | ||
238 | @@ -XXX,XX +XXX,XX @@ static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled) | ||
239 | fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]); | ||
240 | gen_helper_vfp_set_fpscr(cpu_env, fpscr); | ||
241 | tcg_temp_free_i32(fpscr); | ||
242 | + if (dc_isar_feature(aa32_mve, s)) { | ||
243 | + TCGv_i32 z32 = tcg_const_i32(0); | ||
244 | + store_cpu_field(z32, v7m.vpr); | ||
245 | + } | ||
246 | + | ||
247 | /* | ||
248 | * We don't need to arrange to end the TB, because the only | ||
249 | * parts of FPSCR which we cache in the TB flags are the VECLEN | ||
250 | -- | ||
251 | 2.20.1 | ||
252 | |||
253 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | On A-profile, PSR bits [15:10][26:25] are always the IT state bits. | ||
2 | On M-profile, some of the reserved encodings of the IT state are used | ||
3 | to instead indicate partial progress through instructions that were | ||
4 | interrupted partway through by an exception and can be resumed. | ||
5 | 1 | ||
6 | These resumable instructions fall into two categories: | ||
7 | |||
8 | (1) load/store multiple instructions, where these bits are called | ||
9 | "ICI" and specify the register in the ldm/stm list where execution | ||
10 | should resume. (Specifically: LDM, STM, VLDM, VSTM, VLLDM, VLSTM, | ||
11 | CLRM, VSCCLRM.) | ||
12 | |||
13 | (2) MVE instructions subject to beatwise execution, where these bits | ||
14 | are called "ECI" and specify which beats in this and possibly also | ||
15 | the following MVE insn have been executed. | ||
16 | |||
17 | There are also a few insns (LE, LETP, and BKPT) which do not use the | ||
18 | ICI/ECI bits but must leave them alone. | ||
19 | |||
20 | Otherwise, we should raise an INVSTATE UsageFault for any attempt to | ||
21 | execute an insn with non-zero ICI/ECI bits. | ||
22 | |||
23 | So far we have been able to ignore ECI/ICI, because the architecture | ||
24 | allows the IMPDEF choice of "always restart load/store multiple from | ||
25 | the beginning regardless of ICI state", so the only thing we have | ||
26 | been missing is that we don't raise the INVSTATE fault for bad guest | ||
27 | code. However, MVE requires that we honour ECI bits and do not | ||
28 | rexecute beats of an insn that have already been executed. | ||
29 | |||
30 | Add the support in the decoder for handling ECI/ICI: | ||
31 | * identify the ECI/ICI case in the CONDEXEC TB flags | ||
32 | * when a load/store multiple insn succeeds, it updates the ECI/ICI | ||
33 | state (both in DisasContext and in the CPU state), and sets a flag | ||
34 | to say that the ECI/ICI state was handled | ||
35 | * if we find that the insn we just decoded did not handle the | ||
36 | ECI/ICI state, we delete all the code that we just generated for | ||
37 | it and instead emit the code to raise the INVFAULT. This allows | ||
38 | us to avoid having to update every non-MVE non-LDM/STM insn to | ||
39 | make it check for "is ECI/ICI set?". | ||
40 | |||
41 | We continue with our existing IMPDEF choice of not caring about the | ||
42 | ICI state for the load/store multiples and simply restarting them | ||
43 | from the beginning. Because we don't allow interrupts in the middle | ||
44 | of an insn, the only way we would see this state is if the guest set | ||
45 | ICI manually on return from an exception handler, so it's a corner | ||
46 | case which doesn't merit optimisation. | ||
47 | |||
48 | ICI update for LDM/STM is simple -- it always zeroes the state. ECI | ||
49 | update for MVE beatwise insns will be a little more complex, since | ||
50 | the ECI state may include information for the following insn. | ||
51 | |||
52 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
53 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
54 | Message-id: 20210614151007.4545-5-peter.maydell@linaro.org | ||
55 | --- | ||
56 | target/arm/translate-a32.h | 1 + | ||
57 | target/arm/translate.h | 9 +++ | ||
58 | target/arm/translate-m-nocp.c | 11 ++++ | ||
59 | target/arm/translate-vfp.c | 6 ++ | ||
60 | target/arm/translate.c | 111 ++++++++++++++++++++++++++++++++-- | ||
61 | 5 files changed, 133 insertions(+), 5 deletions(-) | ||
62 | |||
63 | diff --git a/target/arm/translate-a32.h b/target/arm/translate-a32.h | ||
64 | index XXXXXXX..XXXXXXX 100644 | ||
65 | --- a/target/arm/translate-a32.h | ||
66 | +++ b/target/arm/translate-a32.h | ||
67 | @@ -XXX,XX +XXX,XX @@ long vfp_reg_offset(bool dp, unsigned reg); | ||
68 | long neon_full_reg_offset(unsigned reg); | ||
69 | long neon_element_offset(int reg, int element, MemOp memop); | ||
70 | void gen_rev16(TCGv_i32 dest, TCGv_i32 var); | ||
71 | +void clear_eci_state(DisasContext *s); | ||
72 | |||
73 | static inline TCGv_i32 load_cpu_offset(int offset) | ||
74 | { | ||
75 | diff --git a/target/arm/translate.h b/target/arm/translate.h | ||
76 | index XXXXXXX..XXXXXXX 100644 | ||
77 | --- a/target/arm/translate.h | ||
78 | +++ b/target/arm/translate.h | ||
79 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { | ||
80 | /* Thumb-2 conditional execution bits. */ | ||
81 | int condexec_mask; | ||
82 | int condexec_cond; | ||
83 | + /* M-profile ECI/ICI exception-continuable instruction state */ | ||
84 | + int eci; | ||
85 | + /* | ||
86 | + * trans_ functions for insns which are continuable should set this true | ||
87 | + * after decode (ie after any UNDEF checks) | ||
88 | + */ | ||
89 | + bool eci_handled; | ||
90 | + /* TCG op to rewind to if this turns out to be an invalid ECI state */ | ||
91 | + TCGOp *insn_eci_rewind; | ||
92 | int thumb; | ||
93 | int sctlr_b; | ||
94 | MemOp be_data; | ||
95 | diff --git a/target/arm/translate-m-nocp.c b/target/arm/translate-m-nocp.c | ||
96 | index XXXXXXX..XXXXXXX 100644 | ||
97 | --- a/target/arm/translate-m-nocp.c | ||
98 | +++ b/target/arm/translate-m-nocp.c | ||
99 | @@ -XXX,XX +XXX,XX @@ static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a) | ||
100 | unallocated_encoding(s); | ||
101 | return true; | ||
102 | } | ||
103 | + | ||
104 | + s->eci_handled = true; | ||
105 | + | ||
106 | /* If no fpu, NOP. */ | ||
107 | if (!dc_isar_feature(aa32_vfp, s)) { | ||
108 | + clear_eci_state(s); | ||
109 | return true; | ||
110 | } | ||
111 | |||
112 | @@ -XXX,XX +XXX,XX @@ static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a) | ||
113 | } | ||
114 | tcg_temp_free_i32(fptr); | ||
115 | |||
116 | + clear_eci_state(s); | ||
117 | + | ||
118 | /* End the TB, because we have updated FP control bits */ | ||
119 | s->base.is_jmp = DISAS_UPDATE_EXIT; | ||
120 | return true; | ||
121 | @@ -XXX,XX +XXX,XX @@ static bool trans_VSCCLRM(DisasContext *s, arg_VSCCLRM *a) | ||
122 | return true; | ||
123 | } | ||
124 | |||
125 | + s->eci_handled = true; | ||
126 | + | ||
127 | if (!dc_isar_feature(aa32_vfp_simd, s)) { | ||
128 | /* NOP if we have neither FP nor MVE */ | ||
129 | + clear_eci_state(s); | ||
130 | return true; | ||
131 | } | ||
132 | |||
133 | @@ -XXX,XX +XXX,XX @@ static bool trans_VSCCLRM(DisasContext *s, arg_VSCCLRM *a) | ||
134 | TCGv_i32 z32 = tcg_const_i32(0); | ||
135 | store_cpu_field(z32, v7m.vpr); | ||
136 | } | ||
137 | + | ||
138 | + clear_eci_state(s); | ||
139 | return true; | ||
140 | } | ||
141 | |||
142 | diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c | ||
143 | index XXXXXXX..XXXXXXX 100644 | ||
144 | --- a/target/arm/translate-vfp.c | ||
145 | +++ b/target/arm/translate-vfp.c | ||
146 | @@ -XXX,XX +XXX,XX @@ static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a) | ||
147 | return false; | ||
148 | } | ||
149 | |||
150 | + s->eci_handled = true; | ||
151 | + | ||
152 | if (!vfp_access_check(s)) { | ||
153 | return true; | ||
154 | } | ||
155 | @@ -XXX,XX +XXX,XX @@ static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a) | ||
156 | tcg_temp_free_i32(addr); | ||
157 | } | ||
158 | |||
159 | + clear_eci_state(s); | ||
160 | return true; | ||
161 | } | ||
162 | |||
163 | @@ -XXX,XX +XXX,XX @@ static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a) | ||
164 | return false; | ||
165 | } | ||
166 | |||
167 | + s->eci_handled = true; | ||
168 | + | ||
169 | if (!vfp_access_check(s)) { | ||
170 | return true; | ||
171 | } | ||
172 | @@ -XXX,XX +XXX,XX @@ static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a) | ||
173 | tcg_temp_free_i32(addr); | ||
174 | } | ||
175 | |||
176 | + clear_eci_state(s); | ||
177 | return true; | ||
178 | } | ||
179 | |||
180 | diff --git a/target/arm/translate.c b/target/arm/translate.c | ||
181 | index XXXXXXX..XXXXXXX 100644 | ||
182 | --- a/target/arm/translate.c | ||
183 | +++ b/target/arm/translate.c | ||
184 | @@ -XXX,XX +XXX,XX @@ static inline bool is_singlestepping(DisasContext *s) | ||
185 | return s->base.singlestep_enabled || s->ss_active; | ||
186 | } | ||
187 | |||
188 | +void clear_eci_state(DisasContext *s) | ||
189 | +{ | ||
190 | + /* | ||
191 | + * Clear any ECI/ICI state: used when a load multiple/store | ||
192 | + * multiple insn executes. | ||
193 | + */ | ||
194 | + if (s->eci) { | ||
195 | + TCGv_i32 tmp = tcg_const_i32(0); | ||
196 | + store_cpu_field(tmp, condexec_bits); | ||
197 | + s->eci = 0; | ||
198 | + } | ||
199 | +} | ||
200 | + | ||
201 | static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b) | ||
202 | { | ||
203 | TCGv_i32 tmp1 = tcg_temp_new_i32(); | ||
204 | @@ -XXX,XX +XXX,XX @@ static bool trans_BKPT(DisasContext *s, arg_BKPT *a) | ||
205 | if (!ENABLE_ARCH_5) { | ||
206 | return false; | ||
207 | } | ||
208 | + /* BKPT is OK with ECI set and leaves it untouched */ | ||
209 | + s->eci_handled = true; | ||
210 | if (arm_dc_feature(s, ARM_FEATURE_M) && | ||
211 | semihosting_enabled() && | ||
212 | #ifndef CONFIG_USER_ONLY | ||
213 | @@ -XXX,XX +XXX,XX @@ static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n) | ||
214 | return true; | ||
215 | } | ||
216 | |||
217 | + s->eci_handled = true; | ||
218 | + | ||
219 | addr = op_addr_block_pre(s, a, n); | ||
220 | mem_idx = get_mem_index(s); | ||
221 | |||
222 | @@ -XXX,XX +XXX,XX @@ static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n) | ||
223 | } | ||
224 | |||
225 | op_addr_block_post(s, a, addr, n); | ||
226 | + clear_eci_state(s); | ||
227 | return true; | ||
228 | } | ||
229 | |||
230 | @@ -XXX,XX +XXX,XX @@ static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n) | ||
231 | return true; | ||
232 | } | ||
233 | |||
234 | + s->eci_handled = true; | ||
235 | + | ||
236 | addr = op_addr_block_pre(s, a, n); | ||
237 | mem_idx = get_mem_index(s); | ||
238 | loaded_base = false; | ||
239 | @@ -XXX,XX +XXX,XX @@ static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n) | ||
240 | /* Must exit loop to check un-masked IRQs */ | ||
241 | s->base.is_jmp = DISAS_EXIT; | ||
242 | } | ||
243 | + clear_eci_state(s); | ||
244 | return true; | ||
245 | } | ||
246 | |||
247 | @@ -XXX,XX +XXX,XX @@ static bool trans_CLRM(DisasContext *s, arg_CLRM *a) | ||
248 | return false; | ||
249 | } | ||
250 | |||
251 | + s->eci_handled = true; | ||
252 | + | ||
253 | zero = tcg_const_i32(0); | ||
254 | for (i = 0; i < 15; i++) { | ||
255 | if (extract32(a->list, i, 1)) { | ||
256 | @@ -XXX,XX +XXX,XX @@ static bool trans_CLRM(DisasContext *s, arg_CLRM *a) | ||
257 | tcg_temp_free_i32(maskreg); | ||
258 | } | ||
259 | tcg_temp_free_i32(zero); | ||
260 | + clear_eci_state(s); | ||
261 | return true; | ||
262 | } | ||
263 | |||
264 | @@ -XXX,XX +XXX,XX @@ static bool trans_LE(DisasContext *s, arg_LE *a) | ||
265 | return false; | ||
266 | } | ||
267 | |||
268 | + /* LE/LETP is OK with ECI set and leaves it untouched */ | ||
269 | + s->eci_handled = true; | ||
270 | + | ||
271 | if (!a->f) { | ||
272 | /* Not loop-forever. If LR <= 1 this is the last loop: do nothing. */ | ||
273 | arm_gen_condlabel(s); | ||
274 | @@ -XXX,XX +XXX,XX @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) | ||
275 | dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB); | ||
276 | dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; | ||
277 | condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC); | ||
278 | - dc->condexec_mask = (condexec & 0xf) << 1; | ||
279 | - dc->condexec_cond = condexec >> 4; | ||
280 | + /* | ||
281 | + * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this | ||
282 | + * is always the IT bits. On M-profile, some of the reserved encodings | ||
283 | + * of IT are used instead to indicate either ICI or ECI, which | ||
284 | + * indicate partial progress of a restartable insn that was interrupted | ||
285 | + * partway through by an exception: | ||
286 | + * * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits | ||
287 | + * * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits | ||
288 | + * In all cases CONDEXEC == 0 means "not in IT block or restartable | ||
289 | + * insn, behave normally". | ||
290 | + */ | ||
291 | + dc->eci = dc->condexec_mask = dc->condexec_cond = 0; | ||
292 | + dc->eci_handled = false; | ||
293 | + dc->insn_eci_rewind = NULL; | ||
294 | + if (condexec & 0xf) { | ||
295 | + dc->condexec_mask = (condexec & 0xf) << 1; | ||
296 | + dc->condexec_cond = condexec >> 4; | ||
297 | + } else { | ||
298 | + if (arm_feature(env, ARM_FEATURE_M)) { | ||
299 | + dc->eci = condexec >> 4; | ||
300 | + } | ||
301 | + } | ||
302 | |||
303 | core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); | ||
304 | dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx); | ||
305 | @@ -XXX,XX +XXX,XX @@ static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu) | ||
306 | static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) | ||
307 | { | ||
308 | DisasContext *dc = container_of(dcbase, DisasContext, base); | ||
309 | + /* | ||
310 | + * The ECI/ICI bits share PSR bits with the IT bits, so we | ||
311 | + * need to reconstitute the bits from the split-out DisasContext | ||
312 | + * fields here. | ||
313 | + */ | ||
314 | + uint32_t condexec_bits; | ||
315 | |||
316 | - tcg_gen_insn_start(dc->base.pc_next, | ||
317 | - (dc->condexec_cond << 4) | (dc->condexec_mask >> 1), | ||
318 | - 0); | ||
319 | + if (dc->eci) { | ||
320 | + condexec_bits = dc->eci << 4; | ||
321 | + } else { | ||
322 | + condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1); | ||
323 | + } | ||
324 | + tcg_gen_insn_start(dc->base.pc_next, condexec_bits, 0); | ||
325 | dc->insn_start = tcg_last_op(); | ||
326 | } | ||
327 | |||
328 | @@ -XXX,XX +XXX,XX @@ static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) | ||
329 | } | ||
330 | dc->insn = insn; | ||
331 | |||
332 | + if (dc->eci) { | ||
333 | + /* | ||
334 | + * For M-profile continuable instructions, ECI/ICI handling | ||
335 | + * falls into these cases: | ||
336 | + * - interrupt-continuable instructions | ||
337 | + * These are the various load/store multiple insns (both | ||
338 | + * integer and fp). The ICI bits indicate the register | ||
339 | + * where the load/store can resume. We make the IMPDEF | ||
340 | + * choice to always do "instruction restart", ie ignore | ||
341 | + * the ICI value and always execute the ldm/stm from the | ||
342 | + * start. So all we need to do is zero PSR.ICI if the | ||
343 | + * insn executes. | ||
344 | + * - MVE instructions subject to beat-wise execution | ||
345 | + * Here the ECI bits indicate which beats have already been | ||
346 | + * executed, and we must honour this. Each insn of this | ||
347 | + * type will handle it correctly. We will update PSR.ECI | ||
348 | + * in the helper function for the insn (some ECI values | ||
349 | + * mean that the following insn also has been partially | ||
350 | + * executed). | ||
351 | + * - Special cases which don't advance ECI | ||
352 | + * The insns LE, LETP and BKPT leave the ECI/ICI state | ||
353 | + * bits untouched. | ||
354 | + * - all other insns (the common case) | ||
355 | + * Non-zero ECI/ICI means an INVSTATE UsageFault. | ||
356 | + * We place a rewind-marker here. Insns in the previous | ||
357 | + * three categories will set a flag in the DisasContext. | ||
358 | + * If the flag isn't set after we call disas_thumb_insn() | ||
359 | + * or disas_thumb2_insn() then we know we have a "some other | ||
360 | + * insn" case. We will rewind to the marker (ie throwing away | ||
361 | + * all the generated code) and instead emit "take exception". | ||
362 | + */ | ||
363 | + dc->insn_eci_rewind = tcg_last_op(); | ||
364 | + } | ||
365 | + | ||
366 | if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) { | ||
367 | uint32_t cond = dc->condexec_cond; | ||
368 | |||
369 | @@ -XXX,XX +XXX,XX @@ static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) | ||
370 | } | ||
371 | } | ||
372 | |||
373 | + if (dc->eci && !dc->eci_handled) { | ||
374 | + /* | ||
375 | + * Insn wasn't valid for ECI/ICI at all: undo what we | ||
376 | + * just generated and instead emit an exception | ||
377 | + */ | ||
378 | + tcg_remove_ops_after(dc->insn_eci_rewind); | ||
379 | + dc->condjmp = 0; | ||
380 | + gen_exception_insn(dc, dc->pc_curr, EXCP_INVSTATE, syn_uncategorized(), | ||
381 | + default_exception_el(dc)); | ||
382 | + } | ||
383 | + | ||
384 | arm_post_translate_insn(dc); | ||
385 | |||
386 | /* Thumb is a variable-length ISA. Stop translation when the next insn | ||
387 | -- | ||
388 | 2.20.1 | ||
389 | |||
390 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | In commit a3494d4671797c we reworked the M-profile handling of its | ||
2 | checks for when the NOCP exception should be raised because the FPU | ||
3 | is disabled, so that (in line with the architecture) the NOCP check | ||
4 | is done early over a large range of the encoding space, and takes | ||
5 | precedence over UNDEF exceptions. As part of this, we removed the | ||
6 | code from full_vfp_access_check() which raised an exception there for | ||
7 | M-profile with the FPU disabled, because it was no longer reachable. | ||
8 | 1 | ||
9 | For MVE, some instructions which are outside the "coprocessor space" | ||
10 | region of the encoding space must nonetheless do "is the FPU enabled" | ||
11 | checks and possibly raise a NOCP exception. (In particular this | ||
12 | covers the MVE-specific low-overhead branch insns LCTP, DLSTP and | ||
13 | WLSTP.) To support these insns, reinstate the code in | ||
14 | full_vfp_access_check(), so that their trans functions can call | ||
15 | vfp_access_check() and get the correct behaviour. | ||
16 | |||
17 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
18 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
19 | Message-id: 20210614151007.4545-6-peter.maydell@linaro.org | ||
20 | --- | ||
21 | target/arm/translate-vfp.c | 20 +++++++++++++++----- | ||
22 | 1 file changed, 15 insertions(+), 5 deletions(-) | ||
23 | |||
24 | diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/target/arm/translate-vfp.c | ||
27 | +++ b/target/arm/translate-vfp.c | ||
28 | @@ -XXX,XX +XXX,XX @@ static void gen_preserve_fp_state(DisasContext *s) | ||
29 | static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled) | ||
30 | { | ||
31 | if (s->fp_excp_el) { | ||
32 | - /* M-profile handled this earlier, in disas_m_nocp() */ | ||
33 | - assert (!arm_dc_feature(s, ARM_FEATURE_M)); | ||
34 | - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, | ||
35 | - syn_fp_access_trap(1, 0xe, false), | ||
36 | - s->fp_excp_el); | ||
37 | + if (arm_dc_feature(s, ARM_FEATURE_M)) { | ||
38 | + /* | ||
39 | + * M-profile mostly catches the "FPU disabled" case early, in | ||
40 | + * disas_m_nocp(), but a few insns (eg LCTP, WLSTP, DLSTP) | ||
41 | + * which do coprocessor-checks are outside the large ranges of | ||
42 | + * the encoding space handled by the patterns in m-nocp.decode, | ||
43 | + * and for them we may need to raise NOCP here. | ||
44 | + */ | ||
45 | + gen_exception_insn(s, s->pc_curr, EXCP_NOCP, | ||
46 | + syn_uncategorized(), s->fp_excp_el); | ||
47 | + } else { | ||
48 | + gen_exception_insn(s, s->pc_curr, EXCP_UDEF, | ||
49 | + syn_fp_access_trap(1, 0xe, false), | ||
50 | + s->fp_excp_el); | ||
51 | + } | ||
52 | return false; | ||
53 | } | ||
54 | |||
55 | -- | ||
56 | 2.20.1 | ||
57 | |||
58 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Implement the MVE LCTP instruction. | ||
2 | 1 | ||
3 | We put its decode and implementation with the other | ||
4 | low-overhead-branch insns because although it is only present if MVE | ||
5 | is implemented it is logically in the same group as the other LOB | ||
6 | insns. | ||
7 | |||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | Message-id: 20210614151007.4545-7-peter.maydell@linaro.org | ||
11 | --- | ||
12 | target/arm/t32.decode | 2 ++ | ||
13 | target/arm/translate.c | 24 ++++++++++++++++++++++++ | ||
14 | 2 files changed, 26 insertions(+) | ||
15 | |||
16 | diff --git a/target/arm/t32.decode b/target/arm/t32.decode | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/arm/t32.decode | ||
19 | +++ b/target/arm/t32.decode | ||
20 | @@ -XXX,XX +XXX,XX @@ BL 1111 0. .......... 11.1 ............ @branch24 | ||
21 | DLS 1111 0 0000 100 rn:4 1110 0000 0000 0001 | ||
22 | WLS 1111 0 0000 100 rn:4 1100 . .......... 1 imm=%lob_imm | ||
23 | LE 1111 0 0000 0 f:1 0 1111 1100 . .......... 1 imm=%lob_imm | ||
24 | + | ||
25 | + LCTP 1111 0 0000 000 1111 1110 0000 0000 0001 | ||
26 | ] | ||
27 | } | ||
28 | diff --git a/target/arm/translate.c b/target/arm/translate.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/arm/translate.c | ||
31 | +++ b/target/arm/translate.c | ||
32 | @@ -XXX,XX +XXX,XX @@ static bool trans_LE(DisasContext *s, arg_LE *a) | ||
33 | return true; | ||
34 | } | ||
35 | |||
36 | +static bool trans_LCTP(DisasContext *s, arg_LCTP *a) | ||
37 | +{ | ||
38 | + /* | ||
39 | + * M-profile Loop Clear with Tail Predication. Since our implementation | ||
40 | + * doesn't cache branch information, all we need to do is reset | ||
41 | + * FPSCR.LTPSIZE to 4. | ||
42 | + */ | ||
43 | + TCGv_i32 ltpsize; | ||
44 | + | ||
45 | + if (!dc_isar_feature(aa32_lob, s) || | ||
46 | + !dc_isar_feature(aa32_mve, s)) { | ||
47 | + return false; | ||
48 | + } | ||
49 | + | ||
50 | + if (!vfp_access_check(s)) { | ||
51 | + return true; | ||
52 | + } | ||
53 | + | ||
54 | + ltpsize = tcg_const_i32(4); | ||
55 | + store_cpu_field(ltpsize, v7m.ltpsize); | ||
56 | + return true; | ||
57 | +} | ||
58 | + | ||
59 | + | ||
60 | static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half) | ||
61 | { | ||
62 | TCGv_i32 addr, tmp; | ||
63 | -- | ||
64 | 2.20.1 | ||
65 | |||
66 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Implement the MVE WLSTP insn; this is like the existing WLS insn, | ||
2 | except that it specifies a size value which is used to set | ||
3 | FPSCR.LTPSIZE. | ||
4 | 1 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-id: 20210614151007.4545-8-peter.maydell@linaro.org | ||
8 | --- | ||
9 | target/arm/t32.decode | 8 ++++++-- | ||
10 | target/arm/translate.c | 37 ++++++++++++++++++++++++++++++++++++- | ||
11 | 2 files changed, 42 insertions(+), 3 deletions(-) | ||
12 | |||
13 | diff --git a/target/arm/t32.decode b/target/arm/t32.decode | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/arm/t32.decode | ||
16 | +++ b/target/arm/t32.decode | ||
17 | @@ -XXX,XX +XXX,XX @@ BL 1111 0. .......... 11.1 ............ @branch24 | ||
18 | %lob_imm 1:10 11:1 !function=times_2 | ||
19 | |||
20 | DLS 1111 0 0000 100 rn:4 1110 0000 0000 0001 | ||
21 | - WLS 1111 0 0000 100 rn:4 1100 . .......... 1 imm=%lob_imm | ||
22 | - LE 1111 0 0000 0 f:1 0 1111 1100 . .......... 1 imm=%lob_imm | ||
23 | + WLS 1111 0 0000 100 rn:4 1100 . .......... 1 imm=%lob_imm size=4 | ||
24 | + { | ||
25 | + LE 1111 0 0000 0 f:1 0 1111 1100 . .......... 1 imm=%lob_imm | ||
26 | + # This is WLSTP | ||
27 | + WLS 1111 0 0000 0 size:2 rn:4 1100 . .......... 1 imm=%lob_imm | ||
28 | + } | ||
29 | |||
30 | LCTP 1111 0 0000 000 1111 1110 0000 0000 0001 | ||
31 | ] | ||
32 | diff --git a/target/arm/translate.c b/target/arm/translate.c | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/target/arm/translate.c | ||
35 | +++ b/target/arm/translate.c | ||
36 | @@ -XXX,XX +XXX,XX @@ static bool trans_WLS(DisasContext *s, arg_WLS *a) | ||
37 | return false; | ||
38 | } | ||
39 | if (a->rn == 13 || a->rn == 15) { | ||
40 | - /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ | ||
41 | + /* | ||
42 | + * For WLSTP rn == 15 is a related encoding (LE); the | ||
43 | + * other cases caught by this condition are all | ||
44 | + * CONSTRAINED UNPREDICTABLE: we choose to UNDEF | ||
45 | + */ | ||
46 | return false; | ||
47 | } | ||
48 | if (s->condexec_mask) { | ||
49 | @@ -XXX,XX +XXX,XX @@ static bool trans_WLS(DisasContext *s, arg_WLS *a) | ||
50 | */ | ||
51 | return false; | ||
52 | } | ||
53 | + if (a->size != 4) { | ||
54 | + /* WLSTP */ | ||
55 | + if (!dc_isar_feature(aa32_mve, s)) { | ||
56 | + return false; | ||
57 | + } | ||
58 | + /* | ||
59 | + * We need to check that the FPU is enabled here, but mustn't | ||
60 | + * call vfp_access_check() to do that because we don't want to | ||
61 | + * do the lazy state preservation in the "loop count is zero" case. | ||
62 | + * Do the check-and-raise-exception by hand. | ||
63 | + */ | ||
64 | + if (s->fp_excp_el) { | ||
65 | + gen_exception_insn(s, s->pc_curr, EXCP_NOCP, | ||
66 | + syn_uncategorized(), s->fp_excp_el); | ||
67 | + return true; | ||
68 | + } | ||
69 | + } | ||
70 | + | ||
71 | nextlabel = gen_new_label(); | ||
72 | tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel); | ||
73 | tmp = load_reg(s, a->rn); | ||
74 | store_reg(s, 14, tmp); | ||
75 | + if (a->size != 4) { | ||
76 | + /* | ||
77 | + * WLSTP: set FPSCR.LTPSIZE. This requires that we do the | ||
78 | + * lazy state preservation, new FP context creation, etc, | ||
79 | + * that vfp_access_check() does. We know that the actual | ||
80 | + * access check will succeed (ie it won't generate code that | ||
81 | + * throws an exception) because we did that check by hand earlier. | ||
82 | + */ | ||
83 | + bool ok = vfp_access_check(s); | ||
84 | + assert(ok); | ||
85 | + tmp = tcg_const_i32(a->size); | ||
86 | + store_cpu_field(tmp, v7m.ltpsize); | ||
87 | + } | ||
88 | gen_jmp_tb(s, s->base.pc_next, 1); | ||
89 | |||
90 | gen_set_label(nextlabel); | ||
91 | -- | ||
92 | 2.20.1 | ||
93 | |||
94 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Implement the MVE DLSTP insn; this is like the existing DLS | ||
2 | insn, except that it must do an FPU access check and it | ||
3 | sets LTPSIZE to the value specified in the insn. | ||
4 | 1 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-id: 20210614151007.4545-9-peter.maydell@linaro.org | ||
8 | --- | ||
9 | target/arm/t32.decode | 9 ++++++--- | ||
10 | target/arm/translate.c | 23 +++++++++++++++++++++-- | ||
11 | 2 files changed, 27 insertions(+), 5 deletions(-) | ||
12 | |||
13 | diff --git a/target/arm/t32.decode b/target/arm/t32.decode | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/arm/t32.decode | ||
16 | +++ b/target/arm/t32.decode | ||
17 | @@ -XXX,XX +XXX,XX @@ BL 1111 0. .......... 11.1 ............ @branch24 | ||
18 | # LE and WLS immediate | ||
19 | %lob_imm 1:10 11:1 !function=times_2 | ||
20 | |||
21 | - DLS 1111 0 0000 100 rn:4 1110 0000 0000 0001 | ||
22 | + DLS 1111 0 0000 100 rn:4 1110 0000 0000 0001 size=4 | ||
23 | WLS 1111 0 0000 100 rn:4 1100 . .......... 1 imm=%lob_imm size=4 | ||
24 | { | ||
25 | LE 1111 0 0000 0 f:1 0 1111 1100 . .......... 1 imm=%lob_imm | ||
26 | # This is WLSTP | ||
27 | WLS 1111 0 0000 0 size:2 rn:4 1100 . .......... 1 imm=%lob_imm | ||
28 | } | ||
29 | - | ||
30 | - LCTP 1111 0 0000 000 1111 1110 0000 0000 0001 | ||
31 | + { | ||
32 | + LCTP 1111 0 0000 000 1111 1110 0000 0000 0001 | ||
33 | + # This is DLSTP | ||
34 | + DLS 1111 0 0000 0 size:2 rn:4 1110 0000 0000 0001 | ||
35 | + } | ||
36 | ] | ||
37 | } | ||
38 | diff --git a/target/arm/translate.c b/target/arm/translate.c | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/target/arm/translate.c | ||
41 | +++ b/target/arm/translate.c | ||
42 | @@ -XXX,XX +XXX,XX @@ static bool trans_DLS(DisasContext *s, arg_DLS *a) | ||
43 | return false; | ||
44 | } | ||
45 | if (a->rn == 13 || a->rn == 15) { | ||
46 | - /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ | ||
47 | + /* | ||
48 | + * For DLSTP rn == 15 is a related encoding (LCTP); the | ||
49 | + * other cases caught by this condition are all | ||
50 | + * CONSTRAINED UNPREDICTABLE: we choose to UNDEF | ||
51 | + */ | ||
52 | return false; | ||
53 | } | ||
54 | |||
55 | - /* Not a while loop, no tail predication: just set LR to the count */ | ||
56 | + if (a->size != 4) { | ||
57 | + /* DLSTP */ | ||
58 | + if (!dc_isar_feature(aa32_mve, s)) { | ||
59 | + return false; | ||
60 | + } | ||
61 | + if (!vfp_access_check(s)) { | ||
62 | + return true; | ||
63 | + } | ||
64 | + } | ||
65 | + | ||
66 | + /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */ | ||
67 | tmp = load_reg(s, a->rn); | ||
68 | store_reg(s, 14, tmp); | ||
69 | + if (a->size != 4) { | ||
70 | + /* DLSTP: set FPSCR.LTPSIZE */ | ||
71 | + tmp = tcg_const_i32(a->size); | ||
72 | + store_cpu_field(tmp, v7m.ltpsize); | ||
73 | + } | ||
74 | return true; | ||
75 | } | ||
76 | |||
77 | -- | ||
78 | 2.20.1 | ||
79 | |||
80 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Implement the MVE LETP insn. This is like the existing LE loop-end | ||
2 | insn, but it must perform an FPU-enabled check, and on loop-exit it | ||
3 | resets LTPSIZE to 4. | ||
4 | 1 | ||
5 | To accommodate the requirement to do something on loop-exit, we drop | ||
6 | the use of condlabel and instead manage both the TB exits manually, | ||
7 | in the same way we already do in trans_WLS(). | ||
8 | |||
9 | The other MVE-specific change to the LE insn is that we must raise an | ||
10 | INVSTATE UsageFault insn if LTPSIZE is not 4. | ||
11 | |||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | Message-id: 20210614151007.4545-10-peter.maydell@linaro.org | ||
15 | --- | ||
16 | target/arm/t32.decode | 2 +- | ||
17 | target/arm/translate.c | 104 +++++++++++++++++++++++++++++++++++++---- | ||
18 | 2 files changed, 97 insertions(+), 9 deletions(-) | ||
19 | |||
20 | diff --git a/target/arm/t32.decode b/target/arm/t32.decode | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/t32.decode | ||
23 | +++ b/target/arm/t32.decode | ||
24 | @@ -XXX,XX +XXX,XX @@ BL 1111 0. .......... 11.1 ............ @branch24 | ||
25 | DLS 1111 0 0000 100 rn:4 1110 0000 0000 0001 size=4 | ||
26 | WLS 1111 0 0000 100 rn:4 1100 . .......... 1 imm=%lob_imm size=4 | ||
27 | { | ||
28 | - LE 1111 0 0000 0 f:1 0 1111 1100 . .......... 1 imm=%lob_imm | ||
29 | + LE 1111 0 0000 0 f:1 tp:1 1111 1100 . .......... 1 imm=%lob_imm | ||
30 | # This is WLSTP | ||
31 | WLS 1111 0 0000 0 size:2 rn:4 1100 . .......... 1 imm=%lob_imm | ||
32 | } | ||
33 | diff --git a/target/arm/translate.c b/target/arm/translate.c | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/target/arm/translate.c | ||
36 | +++ b/target/arm/translate.c | ||
37 | @@ -XXX,XX +XXX,XX @@ static bool trans_LE(DisasContext *s, arg_LE *a) | ||
38 | * any faster. | ||
39 | */ | ||
40 | TCGv_i32 tmp; | ||
41 | + TCGLabel *loopend; | ||
42 | + bool fpu_active; | ||
43 | |||
44 | if (!dc_isar_feature(aa32_lob, s)) { | ||
45 | return false; | ||
46 | } | ||
47 | + if (a->f && a->tp) { | ||
48 | + return false; | ||
49 | + } | ||
50 | + if (s->condexec_mask) { | ||
51 | + /* | ||
52 | + * LE in an IT block is CONSTRAINED UNPREDICTABLE; | ||
53 | + * we choose to UNDEF, because otherwise our use of | ||
54 | + * gen_goto_tb(1) would clash with the use of TB exit 1 | ||
55 | + * in the dc->condjmp condition-failed codepath in | ||
56 | + * arm_tr_tb_stop() and we'd get an assertion. | ||
57 | + */ | ||
58 | + return false; | ||
59 | + } | ||
60 | + if (a->tp) { | ||
61 | + /* LETP */ | ||
62 | + if (!dc_isar_feature(aa32_mve, s)) { | ||
63 | + return false; | ||
64 | + } | ||
65 | + if (!vfp_access_check(s)) { | ||
66 | + s->eci_handled = true; | ||
67 | + return true; | ||
68 | + } | ||
69 | + } | ||
70 | |||
71 | /* LE/LETP is OK with ECI set and leaves it untouched */ | ||
72 | s->eci_handled = true; | ||
73 | |||
74 | - if (!a->f) { | ||
75 | - /* Not loop-forever. If LR <= 1 this is the last loop: do nothing. */ | ||
76 | - arm_gen_condlabel(s); | ||
77 | - tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, s->condlabel); | ||
78 | - /* Decrement LR */ | ||
79 | - tmp = load_reg(s, 14); | ||
80 | - tcg_gen_addi_i32(tmp, tmp, -1); | ||
81 | - store_reg(s, 14, tmp); | ||
82 | + /* | ||
83 | + * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE | ||
84 | + * UsageFault exception for the LE insn in that case. Note that we | ||
85 | + * are not directly checking FPSCR.LTPSIZE but instead check the | ||
86 | + * pseudocode LTPSIZE() function, which returns 4 if the FPU is | ||
87 | + * not currently active (ie ActiveFPState() returns false). We | ||
88 | + * can identify not-active purely from our TB state flags, as the | ||
89 | + * FPU is active only if: | ||
90 | + * the FPU is enabled | ||
91 | + * AND lazy state preservation is not active | ||
92 | + * AND we do not need a new fp context (this is the ASPEN/FPCA check) | ||
93 | + * | ||
94 | + * Usually we don't need to care about this distinction between | ||
95 | + * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check() | ||
96 | + * will either take an exception or clear the conditions that make | ||
97 | + * the FPU not active. But LE is an unusual case of a non-FP insn | ||
98 | + * that looks at LTPSIZE. | ||
99 | + */ | ||
100 | + fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed; | ||
101 | + | ||
102 | + if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) { | ||
103 | + /* Need to do a runtime check for LTPSIZE != 4 */ | ||
104 | + TCGLabel *skipexc = gen_new_label(); | ||
105 | + tmp = load_cpu_field(v7m.ltpsize); | ||
106 | + tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc); | ||
107 | + tcg_temp_free_i32(tmp); | ||
108 | + gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(), | ||
109 | + default_exception_el(s)); | ||
110 | + gen_set_label(skipexc); | ||
111 | + } | ||
112 | + | ||
113 | + if (a->f) { | ||
114 | + /* Loop-forever: just jump back to the loop start */ | ||
115 | + gen_jmp(s, read_pc(s) - a->imm); | ||
116 | + return true; | ||
117 | + } | ||
118 | + | ||
119 | + /* | ||
120 | + * Not loop-forever. If LR <= loop-decrement-value this is the last loop. | ||
121 | + * For LE, we know at this point that LTPSIZE must be 4 and the | ||
122 | + * loop decrement value is 1. For LETP we need to calculate the decrement | ||
123 | + * value from LTPSIZE. | ||
124 | + */ | ||
125 | + loopend = gen_new_label(); | ||
126 | + if (!a->tp) { | ||
127 | + tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend); | ||
128 | + tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1); | ||
129 | + } else { | ||
130 | + /* | ||
131 | + * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local | ||
132 | + * so that decr stays live after the brcondi. | ||
133 | + */ | ||
134 | + TCGv_i32 decr = tcg_temp_local_new_i32(); | ||
135 | + TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize); | ||
136 | + tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize); | ||
137 | + tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr); | ||
138 | + tcg_temp_free_i32(ltpsize); | ||
139 | + | ||
140 | + tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend); | ||
141 | + | ||
142 | + tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr); | ||
143 | + tcg_temp_free_i32(decr); | ||
144 | } | ||
145 | /* Jump back to the loop start */ | ||
146 | gen_jmp(s, read_pc(s) - a->imm); | ||
147 | + | ||
148 | + gen_set_label(loopend); | ||
149 | + if (a->tp) { | ||
150 | + /* Exits from tail-pred loops must reset LTPSIZE to 4 */ | ||
151 | + tmp = tcg_const_i32(4); | ||
152 | + store_cpu_field(tmp, v7m.ltpsize); | ||
153 | + } | ||
154 | + /* End TB, continuing to following insn */ | ||
155 | + gen_jmp_tb(s, s->base.pc_next, 1); | ||
156 | return true; | ||
157 | } | ||
158 | |||
159 | -- | ||
160 | 2.20.1 | ||
161 | |||
162 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Add the framework for decoding MVE insns, with the necessary new | ||
2 | files and the meson.build rules, but no actual content yet. | ||
3 | 1 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Message-id: 20210614151007.4545-11-peter.maydell@linaro.org | ||
7 | --- | ||
8 | target/arm/translate-a32.h | 1 + | ||
9 | target/arm/mve.decode | 20 ++++++++++++++++++++ | ||
10 | target/arm/translate-mve.c | 29 +++++++++++++++++++++++++++++ | ||
11 | target/arm/translate.c | 1 + | ||
12 | target/arm/meson.build | 2 ++ | ||
13 | 5 files changed, 53 insertions(+) | ||
14 | create mode 100644 target/arm/mve.decode | ||
15 | create mode 100644 target/arm/translate-mve.c | ||
16 | |||
17 | diff --git a/target/arm/translate-a32.h b/target/arm/translate-a32.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/arm/translate-a32.h | ||
20 | +++ b/target/arm/translate-a32.h | ||
21 | @@ -XXX,XX +XXX,XX @@ | ||
22 | |||
23 | /* Prototypes for autogenerated disassembler functions */ | ||
24 | bool disas_m_nocp(DisasContext *dc, uint32_t insn); | ||
25 | +bool disas_mve(DisasContext *dc, uint32_t insn); | ||
26 | bool disas_vfp(DisasContext *s, uint32_t insn); | ||
27 | bool disas_vfp_uncond(DisasContext *s, uint32_t insn); | ||
28 | bool disas_neon_dp(DisasContext *s, uint32_t insn); | ||
29 | diff --git a/target/arm/mve.decode b/target/arm/mve.decode | ||
30 | new file mode 100644 | ||
31 | index XXXXXXX..XXXXXXX | ||
32 | --- /dev/null | ||
33 | +++ b/target/arm/mve.decode | ||
34 | @@ -XXX,XX +XXX,XX @@ | ||
35 | +# M-profile MVE instruction descriptions | ||
36 | +# | ||
37 | +# Copyright (c) 2021 Linaro, Ltd | ||
38 | +# | ||
39 | +# This library is free software; you can redistribute it and/or | ||
40 | +# modify it under the terms of the GNU Lesser General Public | ||
41 | +# License as published by the Free Software Foundation; either | ||
42 | +# version 2.1 of the License, or (at your option) any later version. | ||
43 | +# | ||
44 | +# This library is distributed in the hope that it will be useful, | ||
45 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
46 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
47 | +# Lesser General Public License for more details. | ||
48 | +# | ||
49 | +# You should have received a copy of the GNU Lesser General Public | ||
50 | +# License along with this library; if not, see <http://www.gnu.org/licenses/>. | ||
51 | + | ||
52 | +# | ||
53 | +# This file is processed by scripts/decodetree.py | ||
54 | +# | ||
55 | diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c | ||
56 | new file mode 100644 | ||
57 | index XXXXXXX..XXXXXXX | ||
58 | --- /dev/null | ||
59 | +++ b/target/arm/translate-mve.c | ||
60 | @@ -XXX,XX +XXX,XX @@ | ||
61 | +/* | ||
62 | + * ARM translation: M-profile MVE instructions | ||
63 | + * | ||
64 | + * Copyright (c) 2021 Linaro, Ltd. | ||
65 | + * | ||
66 | + * This library is free software; you can redistribute it and/or | ||
67 | + * modify it under the terms of the GNU Lesser General Public | ||
68 | + * License as published by the Free Software Foundation; either | ||
69 | + * version 2.1 of the License, or (at your option) any later version. | ||
70 | + * | ||
71 | + * This library is distributed in the hope that it will be useful, | ||
72 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
73 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
74 | + * Lesser General Public License for more details. | ||
75 | + * | ||
76 | + * You should have received a copy of the GNU Lesser General Public | ||
77 | + * License along with this library; if not, see <http://www.gnu.org/licenses/>. | ||
78 | + */ | ||
79 | + | ||
80 | +#include "qemu/osdep.h" | ||
81 | +#include "tcg/tcg-op.h" | ||
82 | +#include "tcg/tcg-op-gvec.h" | ||
83 | +#include "exec/exec-all.h" | ||
84 | +#include "exec/gen-icount.h" | ||
85 | +#include "translate.h" | ||
86 | +#include "translate-a32.h" | ||
87 | + | ||
88 | +/* Include the generated decoder */ | ||
89 | +#include "decode-mve.c.inc" | ||
90 | diff --git a/target/arm/translate.c b/target/arm/translate.c | ||
91 | index XXXXXXX..XXXXXXX 100644 | ||
92 | --- a/target/arm/translate.c | ||
93 | +++ b/target/arm/translate.c | ||
94 | @@ -XXX,XX +XXX,XX @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) | ||
95 | if (disas_t32(s, insn) || | ||
96 | disas_vfp_uncond(s, insn) || | ||
97 | disas_neon_shared(s, insn) || | ||
98 | + disas_mve(s, insn) || | ||
99 | ((insn >> 28) == 0xe && disas_vfp(s, insn))) { | ||
100 | return; | ||
101 | } | ||
102 | diff --git a/target/arm/meson.build b/target/arm/meson.build | ||
103 | index XXXXXXX..XXXXXXX 100644 | ||
104 | --- a/target/arm/meson.build | ||
105 | +++ b/target/arm/meson.build | ||
106 | @@ -XXX,XX +XXX,XX @@ gen = [ | ||
107 | decodetree.process('vfp.decode', extra_args: '--decode=disas_vfp'), | ||
108 | decodetree.process('vfp-uncond.decode', extra_args: '--decode=disas_vfp_uncond'), | ||
109 | decodetree.process('m-nocp.decode', extra_args: '--decode=disas_m_nocp'), | ||
110 | + decodetree.process('mve.decode', extra_args: '--decode=disas_mve'), | ||
111 | decodetree.process('a32.decode', extra_args: '--static-decode=disas_a32'), | ||
112 | decodetree.process('a32-uncond.decode', extra_args: '--static-decode=disas_a32_uncond'), | ||
113 | decodetree.process('t32.decode', extra_args: '--static-decode=disas_t32'), | ||
114 | @@ -XXX,XX +XXX,XX @@ arm_ss.add(files( | ||
115 | 'tlb_helper.c', | ||
116 | 'translate.c', | ||
117 | 'translate-m-nocp.c', | ||
118 | + 'translate-mve.c', | ||
119 | 'translate-neon.c', | ||
120 | 'translate-vfp.c', | ||
121 | 'vec_helper.c', | ||
122 | -- | ||
123 | 2.20.1 | ||
124 | |||
125 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | For MVE, we want to re-use the large data table from expand_pred_b(). | ||
2 | Move the data table to vec_helper.c so it is no longer in an SVE | ||
3 | specific source file. | ||
4 | 1 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-id: 20210614151007.4545-14-peter.maydell@linaro.org | ||
8 | --- | ||
9 | target/arm/vec_internal.h | 3 ++ | ||
10 | target/arm/sve_helper.c | 103 ++------------------------------------ | ||
11 | target/arm/vec_helper.c | 102 +++++++++++++++++++++++++++++++++++++ | ||
12 | 3 files changed, 109 insertions(+), 99 deletions(-) | ||
13 | |||
14 | diff --git a/target/arm/vec_internal.h b/target/arm/vec_internal.h | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/arm/vec_internal.h | ||
17 | +++ b/target/arm/vec_internal.h | ||
18 | @@ -XXX,XX +XXX,XX @@ | ||
19 | #define H8(x) (x) | ||
20 | #define H1_8(x) (x) | ||
21 | |||
22 | +/* Data for expanding active predicate bits to bytes, for byte elements. */ | ||
23 | +extern const uint64_t expand_pred_b_data[256]; | ||
24 | + | ||
25 | static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) | ||
26 | { | ||
27 | uint64_t *d = vd + opr_sz; | ||
28 | diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/arm/sve_helper.c | ||
31 | +++ b/target/arm/sve_helper.c | ||
32 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(sve_predtest)(void *vd, void *vg, uint32_t words) | ||
33 | return flags; | ||
34 | } | ||
35 | |||
36 | -/* Expand active predicate bits to bytes, for byte elements. | ||
37 | - * for (i = 0; i < 256; ++i) { | ||
38 | - * unsigned long m = 0; | ||
39 | - * for (j = 0; j < 8; j++) { | ||
40 | - * if ((i >> j) & 1) { | ||
41 | - * m |= 0xfful << (j << 3); | ||
42 | - * } | ||
43 | - * } | ||
44 | - * printf("0x%016lx,\n", m); | ||
45 | - * } | ||
46 | +/* | ||
47 | + * Expand active predicate bits to bytes, for byte elements. | ||
48 | + * (The data table itself is in vec_helper.c as MVE also needs it.) | ||
49 | */ | ||
50 | static inline uint64_t expand_pred_b(uint8_t byte) | ||
51 | { | ||
52 | - static const uint64_t word[256] = { | ||
53 | - 0x0000000000000000, 0x00000000000000ff, 0x000000000000ff00, | ||
54 | - 0x000000000000ffff, 0x0000000000ff0000, 0x0000000000ff00ff, | ||
55 | - 0x0000000000ffff00, 0x0000000000ffffff, 0x00000000ff000000, | ||
56 | - 0x00000000ff0000ff, 0x00000000ff00ff00, 0x00000000ff00ffff, | ||
57 | - 0x00000000ffff0000, 0x00000000ffff00ff, 0x00000000ffffff00, | ||
58 | - 0x00000000ffffffff, 0x000000ff00000000, 0x000000ff000000ff, | ||
59 | - 0x000000ff0000ff00, 0x000000ff0000ffff, 0x000000ff00ff0000, | ||
60 | - 0x000000ff00ff00ff, 0x000000ff00ffff00, 0x000000ff00ffffff, | ||
61 | - 0x000000ffff000000, 0x000000ffff0000ff, 0x000000ffff00ff00, | ||
62 | - 0x000000ffff00ffff, 0x000000ffffff0000, 0x000000ffffff00ff, | ||
63 | - 0x000000ffffffff00, 0x000000ffffffffff, 0x0000ff0000000000, | ||
64 | - 0x0000ff00000000ff, 0x0000ff000000ff00, 0x0000ff000000ffff, | ||
65 | - 0x0000ff0000ff0000, 0x0000ff0000ff00ff, 0x0000ff0000ffff00, | ||
66 | - 0x0000ff0000ffffff, 0x0000ff00ff000000, 0x0000ff00ff0000ff, | ||
67 | - 0x0000ff00ff00ff00, 0x0000ff00ff00ffff, 0x0000ff00ffff0000, | ||
68 | - 0x0000ff00ffff00ff, 0x0000ff00ffffff00, 0x0000ff00ffffffff, | ||
69 | - 0x0000ffff00000000, 0x0000ffff000000ff, 0x0000ffff0000ff00, | ||
70 | - 0x0000ffff0000ffff, 0x0000ffff00ff0000, 0x0000ffff00ff00ff, | ||
71 | - 0x0000ffff00ffff00, 0x0000ffff00ffffff, 0x0000ffffff000000, | ||
72 | - 0x0000ffffff0000ff, 0x0000ffffff00ff00, 0x0000ffffff00ffff, | ||
73 | - 0x0000ffffffff0000, 0x0000ffffffff00ff, 0x0000ffffffffff00, | ||
74 | - 0x0000ffffffffffff, 0x00ff000000000000, 0x00ff0000000000ff, | ||
75 | - 0x00ff00000000ff00, 0x00ff00000000ffff, 0x00ff000000ff0000, | ||
76 | - 0x00ff000000ff00ff, 0x00ff000000ffff00, 0x00ff000000ffffff, | ||
77 | - 0x00ff0000ff000000, 0x00ff0000ff0000ff, 0x00ff0000ff00ff00, | ||
78 | - 0x00ff0000ff00ffff, 0x00ff0000ffff0000, 0x00ff0000ffff00ff, | ||
79 | - 0x00ff0000ffffff00, 0x00ff0000ffffffff, 0x00ff00ff00000000, | ||
80 | - 0x00ff00ff000000ff, 0x00ff00ff0000ff00, 0x00ff00ff0000ffff, | ||
81 | - 0x00ff00ff00ff0000, 0x00ff00ff00ff00ff, 0x00ff00ff00ffff00, | ||
82 | - 0x00ff00ff00ffffff, 0x00ff00ffff000000, 0x00ff00ffff0000ff, | ||
83 | - 0x00ff00ffff00ff00, 0x00ff00ffff00ffff, 0x00ff00ffffff0000, | ||
84 | - 0x00ff00ffffff00ff, 0x00ff00ffffffff00, 0x00ff00ffffffffff, | ||
85 | - 0x00ffff0000000000, 0x00ffff00000000ff, 0x00ffff000000ff00, | ||
86 | - 0x00ffff000000ffff, 0x00ffff0000ff0000, 0x00ffff0000ff00ff, | ||
87 | - 0x00ffff0000ffff00, 0x00ffff0000ffffff, 0x00ffff00ff000000, | ||
88 | - 0x00ffff00ff0000ff, 0x00ffff00ff00ff00, 0x00ffff00ff00ffff, | ||
89 | - 0x00ffff00ffff0000, 0x00ffff00ffff00ff, 0x00ffff00ffffff00, | ||
90 | - 0x00ffff00ffffffff, 0x00ffffff00000000, 0x00ffffff000000ff, | ||
91 | - 0x00ffffff0000ff00, 0x00ffffff0000ffff, 0x00ffffff00ff0000, | ||
92 | - 0x00ffffff00ff00ff, 0x00ffffff00ffff00, 0x00ffffff00ffffff, | ||
93 | - 0x00ffffffff000000, 0x00ffffffff0000ff, 0x00ffffffff00ff00, | ||
94 | - 0x00ffffffff00ffff, 0x00ffffffffff0000, 0x00ffffffffff00ff, | ||
95 | - 0x00ffffffffffff00, 0x00ffffffffffffff, 0xff00000000000000, | ||
96 | - 0xff000000000000ff, 0xff0000000000ff00, 0xff0000000000ffff, | ||
97 | - 0xff00000000ff0000, 0xff00000000ff00ff, 0xff00000000ffff00, | ||
98 | - 0xff00000000ffffff, 0xff000000ff000000, 0xff000000ff0000ff, | ||
99 | - 0xff000000ff00ff00, 0xff000000ff00ffff, 0xff000000ffff0000, | ||
100 | - 0xff000000ffff00ff, 0xff000000ffffff00, 0xff000000ffffffff, | ||
101 | - 0xff0000ff00000000, 0xff0000ff000000ff, 0xff0000ff0000ff00, | ||
102 | - 0xff0000ff0000ffff, 0xff0000ff00ff0000, 0xff0000ff00ff00ff, | ||
103 | - 0xff0000ff00ffff00, 0xff0000ff00ffffff, 0xff0000ffff000000, | ||
104 | - 0xff0000ffff0000ff, 0xff0000ffff00ff00, 0xff0000ffff00ffff, | ||
105 | - 0xff0000ffffff0000, 0xff0000ffffff00ff, 0xff0000ffffffff00, | ||
106 | - 0xff0000ffffffffff, 0xff00ff0000000000, 0xff00ff00000000ff, | ||
107 | - 0xff00ff000000ff00, 0xff00ff000000ffff, 0xff00ff0000ff0000, | ||
108 | - 0xff00ff0000ff00ff, 0xff00ff0000ffff00, 0xff00ff0000ffffff, | ||
109 | - 0xff00ff00ff000000, 0xff00ff00ff0000ff, 0xff00ff00ff00ff00, | ||
110 | - 0xff00ff00ff00ffff, 0xff00ff00ffff0000, 0xff00ff00ffff00ff, | ||
111 | - 0xff00ff00ffffff00, 0xff00ff00ffffffff, 0xff00ffff00000000, | ||
112 | - 0xff00ffff000000ff, 0xff00ffff0000ff00, 0xff00ffff0000ffff, | ||
113 | - 0xff00ffff00ff0000, 0xff00ffff00ff00ff, 0xff00ffff00ffff00, | ||
114 | - 0xff00ffff00ffffff, 0xff00ffffff000000, 0xff00ffffff0000ff, | ||
115 | - 0xff00ffffff00ff00, 0xff00ffffff00ffff, 0xff00ffffffff0000, | ||
116 | - 0xff00ffffffff00ff, 0xff00ffffffffff00, 0xff00ffffffffffff, | ||
117 | - 0xffff000000000000, 0xffff0000000000ff, 0xffff00000000ff00, | ||
118 | - 0xffff00000000ffff, 0xffff000000ff0000, 0xffff000000ff00ff, | ||
119 | - 0xffff000000ffff00, 0xffff000000ffffff, 0xffff0000ff000000, | ||
120 | - 0xffff0000ff0000ff, 0xffff0000ff00ff00, 0xffff0000ff00ffff, | ||
121 | - 0xffff0000ffff0000, 0xffff0000ffff00ff, 0xffff0000ffffff00, | ||
122 | - 0xffff0000ffffffff, 0xffff00ff00000000, 0xffff00ff000000ff, | ||
123 | - 0xffff00ff0000ff00, 0xffff00ff0000ffff, 0xffff00ff00ff0000, | ||
124 | - 0xffff00ff00ff00ff, 0xffff00ff00ffff00, 0xffff00ff00ffffff, | ||
125 | - 0xffff00ffff000000, 0xffff00ffff0000ff, 0xffff00ffff00ff00, | ||
126 | - 0xffff00ffff00ffff, 0xffff00ffffff0000, 0xffff00ffffff00ff, | ||
127 | - 0xffff00ffffffff00, 0xffff00ffffffffff, 0xffffff0000000000, | ||
128 | - 0xffffff00000000ff, 0xffffff000000ff00, 0xffffff000000ffff, | ||
129 | - 0xffffff0000ff0000, 0xffffff0000ff00ff, 0xffffff0000ffff00, | ||
130 | - 0xffffff0000ffffff, 0xffffff00ff000000, 0xffffff00ff0000ff, | ||
131 | - 0xffffff00ff00ff00, 0xffffff00ff00ffff, 0xffffff00ffff0000, | ||
132 | - 0xffffff00ffff00ff, 0xffffff00ffffff00, 0xffffff00ffffffff, | ||
133 | - 0xffffffff00000000, 0xffffffff000000ff, 0xffffffff0000ff00, | ||
134 | - 0xffffffff0000ffff, 0xffffffff00ff0000, 0xffffffff00ff00ff, | ||
135 | - 0xffffffff00ffff00, 0xffffffff00ffffff, 0xffffffffff000000, | ||
136 | - 0xffffffffff0000ff, 0xffffffffff00ff00, 0xffffffffff00ffff, | ||
137 | - 0xffffffffffff0000, 0xffffffffffff00ff, 0xffffffffffffff00, | ||
138 | - 0xffffffffffffffff, | ||
139 | - }; | ||
140 | - return word[byte]; | ||
141 | + return expand_pred_b_data[byte]; | ||
142 | } | ||
143 | |||
144 | /* Similarly for half-word elements. | ||
145 | diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c | ||
146 | index XXXXXXX..XXXXXXX 100644 | ||
147 | --- a/target/arm/vec_helper.c | ||
148 | +++ b/target/arm/vec_helper.c | ||
149 | @@ -XXX,XX +XXX,XX @@ | ||
150 | #include "qemu/int128.h" | ||
151 | #include "vec_internal.h" | ||
152 | |||
153 | +/* | ||
154 | + * Data for expanding active predicate bits to bytes, for byte elements. | ||
155 | + * | ||
156 | + * for (i = 0; i < 256; ++i) { | ||
157 | + * unsigned long m = 0; | ||
158 | + * for (j = 0; j < 8; j++) { | ||
159 | + * if ((i >> j) & 1) { | ||
160 | + * m |= 0xfful << (j << 3); | ||
161 | + * } | ||
162 | + * } | ||
163 | + * printf("0x%016lx,\n", m); | ||
164 | + * } | ||
165 | + */ | ||
166 | +const uint64_t expand_pred_b_data[256] = { | ||
167 | + 0x0000000000000000, 0x00000000000000ff, 0x000000000000ff00, | ||
168 | + 0x000000000000ffff, 0x0000000000ff0000, 0x0000000000ff00ff, | ||
169 | + 0x0000000000ffff00, 0x0000000000ffffff, 0x00000000ff000000, | ||
170 | + 0x00000000ff0000ff, 0x00000000ff00ff00, 0x00000000ff00ffff, | ||
171 | + 0x00000000ffff0000, 0x00000000ffff00ff, 0x00000000ffffff00, | ||
172 | + 0x00000000ffffffff, 0x000000ff00000000, 0x000000ff000000ff, | ||
173 | + 0x000000ff0000ff00, 0x000000ff0000ffff, 0x000000ff00ff0000, | ||
174 | + 0x000000ff00ff00ff, 0x000000ff00ffff00, 0x000000ff00ffffff, | ||
175 | + 0x000000ffff000000, 0x000000ffff0000ff, 0x000000ffff00ff00, | ||
176 | + 0x000000ffff00ffff, 0x000000ffffff0000, 0x000000ffffff00ff, | ||
177 | + 0x000000ffffffff00, 0x000000ffffffffff, 0x0000ff0000000000, | ||
178 | + 0x0000ff00000000ff, 0x0000ff000000ff00, 0x0000ff000000ffff, | ||
179 | + 0x0000ff0000ff0000, 0x0000ff0000ff00ff, 0x0000ff0000ffff00, | ||
180 | + 0x0000ff0000ffffff, 0x0000ff00ff000000, 0x0000ff00ff0000ff, | ||
181 | + 0x0000ff00ff00ff00, 0x0000ff00ff00ffff, 0x0000ff00ffff0000, | ||
182 | + 0x0000ff00ffff00ff, 0x0000ff00ffffff00, 0x0000ff00ffffffff, | ||
183 | + 0x0000ffff00000000, 0x0000ffff000000ff, 0x0000ffff0000ff00, | ||
184 | + 0x0000ffff0000ffff, 0x0000ffff00ff0000, 0x0000ffff00ff00ff, | ||
185 | + 0x0000ffff00ffff00, 0x0000ffff00ffffff, 0x0000ffffff000000, | ||
186 | + 0x0000ffffff0000ff, 0x0000ffffff00ff00, 0x0000ffffff00ffff, | ||
187 | + 0x0000ffffffff0000, 0x0000ffffffff00ff, 0x0000ffffffffff00, | ||
188 | + 0x0000ffffffffffff, 0x00ff000000000000, 0x00ff0000000000ff, | ||
189 | + 0x00ff00000000ff00, 0x00ff00000000ffff, 0x00ff000000ff0000, | ||
190 | + 0x00ff000000ff00ff, 0x00ff000000ffff00, 0x00ff000000ffffff, | ||
191 | + 0x00ff0000ff000000, 0x00ff0000ff0000ff, 0x00ff0000ff00ff00, | ||
192 | + 0x00ff0000ff00ffff, 0x00ff0000ffff0000, 0x00ff0000ffff00ff, | ||
193 | + 0x00ff0000ffffff00, 0x00ff0000ffffffff, 0x00ff00ff00000000, | ||
194 | + 0x00ff00ff000000ff, 0x00ff00ff0000ff00, 0x00ff00ff0000ffff, | ||
195 | + 0x00ff00ff00ff0000, 0x00ff00ff00ff00ff, 0x00ff00ff00ffff00, | ||
196 | + 0x00ff00ff00ffffff, 0x00ff00ffff000000, 0x00ff00ffff0000ff, | ||
197 | + 0x00ff00ffff00ff00, 0x00ff00ffff00ffff, 0x00ff00ffffff0000, | ||
198 | + 0x00ff00ffffff00ff, 0x00ff00ffffffff00, 0x00ff00ffffffffff, | ||
199 | + 0x00ffff0000000000, 0x00ffff00000000ff, 0x00ffff000000ff00, | ||
200 | + 0x00ffff000000ffff, 0x00ffff0000ff0000, 0x00ffff0000ff00ff, | ||
201 | + 0x00ffff0000ffff00, 0x00ffff0000ffffff, 0x00ffff00ff000000, | ||
202 | + 0x00ffff00ff0000ff, 0x00ffff00ff00ff00, 0x00ffff00ff00ffff, | ||
203 | + 0x00ffff00ffff0000, 0x00ffff00ffff00ff, 0x00ffff00ffffff00, | ||
204 | + 0x00ffff00ffffffff, 0x00ffffff00000000, 0x00ffffff000000ff, | ||
205 | + 0x00ffffff0000ff00, 0x00ffffff0000ffff, 0x00ffffff00ff0000, | ||
206 | + 0x00ffffff00ff00ff, 0x00ffffff00ffff00, 0x00ffffff00ffffff, | ||
207 | + 0x00ffffffff000000, 0x00ffffffff0000ff, 0x00ffffffff00ff00, | ||
208 | + 0x00ffffffff00ffff, 0x00ffffffffff0000, 0x00ffffffffff00ff, | ||
209 | + 0x00ffffffffffff00, 0x00ffffffffffffff, 0xff00000000000000, | ||
210 | + 0xff000000000000ff, 0xff0000000000ff00, 0xff0000000000ffff, | ||
211 | + 0xff00000000ff0000, 0xff00000000ff00ff, 0xff00000000ffff00, | ||
212 | + 0xff00000000ffffff, 0xff000000ff000000, 0xff000000ff0000ff, | ||
213 | + 0xff000000ff00ff00, 0xff000000ff00ffff, 0xff000000ffff0000, | ||
214 | + 0xff000000ffff00ff, 0xff000000ffffff00, 0xff000000ffffffff, | ||
215 | + 0xff0000ff00000000, 0xff0000ff000000ff, 0xff0000ff0000ff00, | ||
216 | + 0xff0000ff0000ffff, 0xff0000ff00ff0000, 0xff0000ff00ff00ff, | ||
217 | + 0xff0000ff00ffff00, 0xff0000ff00ffffff, 0xff0000ffff000000, | ||
218 | + 0xff0000ffff0000ff, 0xff0000ffff00ff00, 0xff0000ffff00ffff, | ||
219 | + 0xff0000ffffff0000, 0xff0000ffffff00ff, 0xff0000ffffffff00, | ||
220 | + 0xff0000ffffffffff, 0xff00ff0000000000, 0xff00ff00000000ff, | ||
221 | + 0xff00ff000000ff00, 0xff00ff000000ffff, 0xff00ff0000ff0000, | ||
222 | + 0xff00ff0000ff00ff, 0xff00ff0000ffff00, 0xff00ff0000ffffff, | ||
223 | + 0xff00ff00ff000000, 0xff00ff00ff0000ff, 0xff00ff00ff00ff00, | ||
224 | + 0xff00ff00ff00ffff, 0xff00ff00ffff0000, 0xff00ff00ffff00ff, | ||
225 | + 0xff00ff00ffffff00, 0xff00ff00ffffffff, 0xff00ffff00000000, | ||
226 | + 0xff00ffff000000ff, 0xff00ffff0000ff00, 0xff00ffff0000ffff, | ||
227 | + 0xff00ffff00ff0000, 0xff00ffff00ff00ff, 0xff00ffff00ffff00, | ||
228 | + 0xff00ffff00ffffff, 0xff00ffffff000000, 0xff00ffffff0000ff, | ||
229 | + 0xff00ffffff00ff00, 0xff00ffffff00ffff, 0xff00ffffffff0000, | ||
230 | + 0xff00ffffffff00ff, 0xff00ffffffffff00, 0xff00ffffffffffff, | ||
231 | + 0xffff000000000000, 0xffff0000000000ff, 0xffff00000000ff00, | ||
232 | + 0xffff00000000ffff, 0xffff000000ff0000, 0xffff000000ff00ff, | ||
233 | + 0xffff000000ffff00, 0xffff000000ffffff, 0xffff0000ff000000, | ||
234 | + 0xffff0000ff0000ff, 0xffff0000ff00ff00, 0xffff0000ff00ffff, | ||
235 | + 0xffff0000ffff0000, 0xffff0000ffff00ff, 0xffff0000ffffff00, | ||
236 | + 0xffff0000ffffffff, 0xffff00ff00000000, 0xffff00ff000000ff, | ||
237 | + 0xffff00ff0000ff00, 0xffff00ff0000ffff, 0xffff00ff00ff0000, | ||
238 | + 0xffff00ff00ff00ff, 0xffff00ff00ffff00, 0xffff00ff00ffffff, | ||
239 | + 0xffff00ffff000000, 0xffff00ffff0000ff, 0xffff00ffff00ff00, | ||
240 | + 0xffff00ffff00ffff, 0xffff00ffffff0000, 0xffff00ffffff00ff, | ||
241 | + 0xffff00ffffffff00, 0xffff00ffffffffff, 0xffffff0000000000, | ||
242 | + 0xffffff00000000ff, 0xffffff000000ff00, 0xffffff000000ffff, | ||
243 | + 0xffffff0000ff0000, 0xffffff0000ff00ff, 0xffffff0000ffff00, | ||
244 | + 0xffffff0000ffffff, 0xffffff00ff000000, 0xffffff00ff0000ff, | ||
245 | + 0xffffff00ff00ff00, 0xffffff00ff00ffff, 0xffffff00ffff0000, | ||
246 | + 0xffffff00ffff00ff, 0xffffff00ffffff00, 0xffffff00ffffffff, | ||
247 | + 0xffffffff00000000, 0xffffffff000000ff, 0xffffffff0000ff00, | ||
248 | + 0xffffffff0000ffff, 0xffffffff00ff0000, 0xffffffff00ff00ff, | ||
249 | + 0xffffffff00ffff00, 0xffffffff00ffffff, 0xffffffffff000000, | ||
250 | + 0xffffffffff0000ff, 0xffffffffff00ff00, 0xffffffffff00ffff, | ||
251 | + 0xffffffffffff0000, 0xffffffffffff00ff, 0xffffffffffffff00, | ||
252 | + 0xffffffffffffffff, | ||
253 | +}; | ||
254 | + | ||
255 | /* Signed saturating rounding doubling multiply-accumulate high half, 8-bit */ | ||
256 | int8_t do_sqrdmlah_b(int8_t src1, int8_t src2, int8_t src3, | ||
257 | bool neg, bool round) | ||
258 | -- | ||
259 | 2.20.1 | ||
260 | |||
261 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Currently the ARM SVE helper code defines locally some utility | ||
2 | functions for swapping 16-bit halfwords within 32-bit or 64-bit | ||
3 | values and for swapping 32-bit words within 64-bit values, | ||
4 | parallel to the byte-swapping bswap16/32/64 functions. | ||
5 | 1 | ||
6 | We want these also for the ARM MVE code, and they're potentially | ||
7 | generally useful for other targets, so move them to bitops.h. | ||
8 | (We don't put them in bswap.h with the bswap* functions because | ||
9 | they are implemented in terms of the rotate operations also | ||
10 | defined in bitops.h, and including bitops.h from bswap.h seems | ||
11 | better avoided.) | ||
12 | |||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
14 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
15 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
16 | Message-id: 20210614151007.4545-17-peter.maydell@linaro.org | ||
17 | --- | ||
18 | include/qemu/bitops.h | 29 +++++++++++++++++++++++++++++ | ||
19 | target/arm/sve_helper.c | 20 -------------------- | ||
20 | 2 files changed, 29 insertions(+), 20 deletions(-) | ||
21 | |||
22 | diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/include/qemu/bitops.h | ||
25 | +++ b/include/qemu/bitops.h | ||
26 | @@ -XXX,XX +XXX,XX @@ static inline uint64_t ror64(uint64_t word, unsigned int shift) | ||
27 | return (word >> shift) | (word << ((64 - shift) & 63)); | ||
28 | } | ||
29 | |||
30 | +/** | ||
31 | + * hswap32 - swap 16-bit halfwords within a 32-bit value | ||
32 | + * @h: value to swap | ||
33 | + */ | ||
34 | +static inline uint32_t hswap32(uint32_t h) | ||
35 | +{ | ||
36 | + return rol32(h, 16); | ||
37 | +} | ||
38 | + | ||
39 | +/** | ||
40 | + * hswap64 - swap 16-bit halfwords within a 64-bit value | ||
41 | + * @h: value to swap | ||
42 | + */ | ||
43 | +static inline uint64_t hswap64(uint64_t h) | ||
44 | +{ | ||
45 | + uint64_t m = 0x0000ffff0000ffffull; | ||
46 | + h = rol64(h, 32); | ||
47 | + return ((h & m) << 16) | ((h >> 16) & m); | ||
48 | +} | ||
49 | + | ||
50 | +/** | ||
51 | + * wswap64 - swap 32-bit words within a 64-bit value | ||
52 | + * @h: value to swap | ||
53 | + */ | ||
54 | +static inline uint64_t wswap64(uint64_t h) | ||
55 | +{ | ||
56 | + return rol64(h, 32); | ||
57 | +} | ||
58 | + | ||
59 | /** | ||
60 | * extract32: | ||
61 | * @value: the value to extract the bit field from | ||
62 | diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/arm/sve_helper.c | ||
65 | +++ b/target/arm/sve_helper.c | ||
66 | @@ -XXX,XX +XXX,XX @@ static inline uint64_t expand_pred_s(uint8_t byte) | ||
67 | return word[byte & 0x11]; | ||
68 | } | ||
69 | |||
70 | -/* Swap 16-bit words within a 32-bit word. */ | ||
71 | -static inline uint32_t hswap32(uint32_t h) | ||
72 | -{ | ||
73 | - return rol32(h, 16); | ||
74 | -} | ||
75 | - | ||
76 | -/* Swap 16-bit words within a 64-bit word. */ | ||
77 | -static inline uint64_t hswap64(uint64_t h) | ||
78 | -{ | ||
79 | - uint64_t m = 0x0000ffff0000ffffull; | ||
80 | - h = rol64(h, 32); | ||
81 | - return ((h & m) << 16) | ((h >> 16) & m); | ||
82 | -} | ||
83 | - | ||
84 | -/* Swap 32-bit words within a 64-bit word. */ | ||
85 | -static inline uint64_t wswap64(uint64_t h) | ||
86 | -{ | ||
87 | - return rol64(h, 32); | ||
88 | -} | ||
89 | - | ||
90 | #define LOGICAL_PPPP(NAME, FUNC) \ | ||
91 | void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ | ||
92 | { \ | ||
93 | -- | ||
94 | 2.20.1 | ||
95 | |||
96 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | int128_make64() creates an Int128 from an unsigned 64 bit value; add | ||
2 | a function int128_makes64() creating an Int128 from a signed 64 bit | ||
3 | value. | ||
4 | 1 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
8 | Message-id: 20210614151007.4545-34-peter.maydell@linaro.org | ||
9 | --- | ||
10 | include/qemu/int128.h | 10 ++++++++++ | ||
11 | 1 file changed, 10 insertions(+) | ||
12 | |||
13 | diff --git a/include/qemu/int128.h b/include/qemu/int128.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/include/qemu/int128.h | ||
16 | +++ b/include/qemu/int128.h | ||
17 | @@ -XXX,XX +XXX,XX @@ static inline Int128 int128_make64(uint64_t a) | ||
18 | return a; | ||
19 | } | ||
20 | |||
21 | +static inline Int128 int128_makes64(int64_t a) | ||
22 | +{ | ||
23 | + return a; | ||
24 | +} | ||
25 | + | ||
26 | static inline Int128 int128_make128(uint64_t lo, uint64_t hi) | ||
27 | { | ||
28 | return (__uint128_t)hi << 64 | lo; | ||
29 | @@ -XXX,XX +XXX,XX @@ static inline Int128 int128_make64(uint64_t a) | ||
30 | return (Int128) { a, 0 }; | ||
31 | } | ||
32 | |||
33 | +static inline Int128 int128_makes64(int64_t a) | ||
34 | +{ | ||
35 | + return (Int128) { a, a >> 63 }; | ||
36 | +} | ||
37 | + | ||
38 | static inline Int128 int128_make128(uint64_t lo, uint64_t hi) | ||
39 | { | ||
40 | return (Int128) { lo, hi }; | ||
41 | -- | ||
42 | 2.20.1 | ||
43 | |||
44 | diff view generated by jsdifflib |