1
The following changes since commit 5a67d7735d4162630769ef495cf813244fc850df:
1
Hi; most of this is the first half of the A64 simd decodetree
2
conversion; the rest is a mix of fixes from the last couple of weeks.
2
3
3
Merge remote-tracking branch 'remotes/berrange-gitlab/tags/tls-deps-pull-request' into staging (2021-07-02 08:22:39 +0100)
4
v2 uses patches from the v2 decodetree series to avoid a few
5
regressions in some A32 insns.
6
7
(Richard: I'm still planning to review the second half of the
8
v2 decodetree series; I just wanted to get the respin of this
9
pullreq out today...)
10
11
thanks
12
-- PMM
13
14
The following changes since commit ad10b4badc1dd5b28305f9b9f1168cf0aa3ae946:
15
16
Merge tag 'pull-error-2024-05-27' of https://repo.or.cz/qemu/armbru into staging (2024-05-27 06:40:42 -0700)
4
17
5
are available in the Git repository at:
18
are available in the Git repository at:
6
19
7
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20210702
20
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20240528
8
21
9
for you to fetch changes up to 04ea4d3cfd0a21b248ece8eb7a9436a3d9898dd8:
22
for you to fetch changes up to f240df3c31b40e4cf1af1f156a88efc1a1df406c:
10
23
11
target/arm: Implement MVE shifts by register (2021-07-02 11:48:38 +0100)
24
target/arm: Convert disas_simd_3same_logic to decodetree (2024-05-28 14:29:01 +0100)
12
25
13
----------------------------------------------------------------
26
----------------------------------------------------------------
14
target-arm queue:
27
target-arm queue:
15
* more MVE instructions
28
* xlnx_dpdma: fix descriptor endianness bug
16
* hw/gpio/gpio_pwr: use shutdown function for reboot
29
* hvf: arm: Fix encodings for ID_AA64PFR1_EL1 and debug System registers
17
* target/arm: Check NaN mode before silencing NaN
30
* hw/arm/npcm7xx: remove setting of mp-affinity
18
* tests: Boot and halt a Linux guest on the Raspberry Pi 2 machine
31
* hw/char: Correct STM32L4x5 usart register CR2 field ADD_0 size
19
* hw/arm: Add basic power management to raspi.
32
* hw/intc/arm_gic: Fix handling of NS view of GICC_APR<n>
20
* docs/system/arm: Add quanta-gbs-bmc, quanta-q7l1-bmc
33
* hw/input/tsc2005: Fix -Wchar-subscripts warning in tsc2005_txrx()
34
* hw: arm: Remove use of tabs in some source files
35
* docs/system: Remove ADC from raspi documentation
36
* target/arm: Start of the conversion of A64 SIMD to decodetree
21
37
22
----------------------------------------------------------------
38
----------------------------------------------------------------
23
Joe Komlodi (1):
39
Alexandra Diupina (1):
24
target/arm: Check NaN mode before silencing NaN
40
xlnx_dpdma: fix descriptor endianness bug
25
41
26
Maxim Uvarov (1):
42
Andrey Shumilin (1):
27
hw/gpio/gpio_pwr: use shutdown function for reboot
43
hw/intc/arm_gic: Fix handling of NS view of GICC_APR<n>
28
44
29
Nolan Leake (1):
45
Dorjoy Chowdhury (1):
30
hw/arm: Add basic power management to raspi.
46
hw/arm/npcm7xx: remove setting of mp-affinity
31
47
32
Patrick Venture (2):
48
Inès Varhol (1):
33
docs/system/arm: Add quanta-q7l1-bmc reference
49
hw/char: Correct STM32L4x5 usart register CR2 field ADD_0 size
34
docs/system/arm: Add quanta-gbs-bmc reference
35
36
Peter Maydell (18):
37
target/arm: Fix MVE widening/narrowing VLDR/VSTR offset calculation
38
target/arm: Fix bugs in MVE VRMLALDAVH, VRMLSLDAVH
39
target/arm: Make asimd_imm_const() public
40
target/arm: Use asimd_imm_const for A64 decode
41
target/arm: Use dup_const() instead of bitfield_replicate()
42
target/arm: Implement MVE logical immediate insns
43
target/arm: Implement MVE vector shift left by immediate insns
44
target/arm: Implement MVE vector shift right by immediate insns
45
target/arm: Implement MVE VSHLL
46
target/arm: Implement MVE VSRI, VSLI
47
target/arm: Implement MVE VSHRN, VRSHRN
48
target/arm: Implement MVE saturating narrowing shifts
49
target/arm: Implement MVE VSHLC
50
target/arm: Implement MVE VADDLV
51
target/arm: Implement MVE long shifts by immediate
52
target/arm: Implement MVE long shifts by register
53
target/arm: Implement MVE shifts by immediate
54
target/arm: Implement MVE shifts by register
55
50
56
Philippe Mathieu-Daudé (1):
51
Philippe Mathieu-Daudé (1):
57
tests: Boot and halt a Linux guest on the Raspberry Pi 2 machine
52
hw/input/tsc2005: Fix -Wchar-subscripts warning in tsc2005_txrx()
58
53
59
docs/system/arm/aspeed.rst | 1 +
54
Rayhan Faizel (1):
60
docs/system/arm/nuvoton.rst | 5 +-
55
docs/system: Remove ADC from raspi documentation
61
include/hw/arm/bcm2835_peripherals.h | 3 +-
62
include/hw/misc/bcm2835_powermgt.h | 29 ++
63
target/arm/helper-mve.h | 108 +++++++
64
target/arm/translate.h | 41 +++
65
target/arm/mve.decode | 177 ++++++++++-
66
target/arm/t32.decode | 71 ++++-
67
hw/arm/bcm2835_peripherals.c | 13 +-
68
hw/gpio/gpio_pwr.c | 2 +-
69
hw/misc/bcm2835_powermgt.c | 160 ++++++++++
70
target/arm/helper-a64.c | 12 +-
71
target/arm/mve_helper.c | 524 +++++++++++++++++++++++++++++++--
72
target/arm/translate-a64.c | 86 +-----
73
target/arm/translate-mve.c | 261 +++++++++++++++-
74
target/arm/translate-neon.c | 81 -----
75
target/arm/translate.c | 327 +++++++++++++++++++-
76
target/arm/vfp_helper.c | 24 +-
77
hw/misc/meson.build | 1 +
78
tests/acceptance/boot_linux_console.py | 43 +++
79
20 files changed, 1760 insertions(+), 209 deletions(-)
80
create mode 100644 include/hw/misc/bcm2835_powermgt.h
81
create mode 100644 hw/misc/bcm2835_powermgt.c
82
56
57
Richard Henderson (34):
58
target/arm: Use PLD, PLDW, PLI not NOP for t32
59
target/arm: Zero-extend writeback for fp16 FCVTZS (scalar, integer)
60
target/arm: Fix decode of FMOV (hp) vs MOVI
61
target/arm: Verify sz=0 for Advanced SIMD scalar pairwise (fp16)
62
target/arm: Split out gengvec.c
63
target/arm: Split out gengvec64.c
64
target/arm: Convert Cryptographic AES to decodetree
65
target/arm: Convert Cryptographic 3-register SHA to decodetree
66
target/arm: Convert Cryptographic 2-register SHA to decodetree
67
target/arm: Convert Cryptographic 3-register SHA512 to decodetree
68
target/arm: Convert Cryptographic 2-register SHA512 to decodetree
69
target/arm: Convert Cryptographic 4-register to decodetree
70
target/arm: Convert Cryptographic 3-register, imm2 to decodetree
71
target/arm: Convert XAR to decodetree
72
target/arm: Convert Advanced SIMD copy to decodetree
73
target/arm: Convert FMULX to decodetree
74
target/arm: Convert FADD, FSUB, FDIV, FMUL to decodetree
75
target/arm: Convert FMAX, FMIN, FMAXNM, FMINNM to decodetree
76
target/arm: Introduce vfp_load_reg16
77
target/arm: Expand vfp neg and abs inline
78
target/arm: Convert FNMUL to decodetree
79
target/arm: Convert FMLA, FMLS to decodetree
80
target/arm: Convert FCMEQ, FCMGE, FCMGT, FACGE, FACGT to decodetree
81
target/arm: Convert FABD to decodetree
82
target/arm: Convert FRECPS, FRSQRTS to decodetree
83
target/arm: Convert FADDP to decodetree
84
target/arm: Convert FMAXP, FMINP, FMAXNMP, FMINNMP to decodetree
85
target/arm: Use gvec for neon faddp, fmaxp, fminp
86
target/arm: Convert ADDP to decodetree
87
target/arm: Use gvec for neon padd
88
target/arm: Convert SMAXP, SMINP, UMAXP, UMINP to decodetree
89
target/arm: Use gvec for neon pmax, pmin
90
target/arm: Convert FMLAL, FMLSL to decodetree
91
target/arm: Convert disas_simd_3same_logic to decodetree
92
93
Tanmay Patil (1):
94
hw: arm: Remove use of tabs in some source files
95
96
Zenghui Yu (1):
97
hvf: arm: Fix encodings for ID_AA64PFR1_EL1 and debug System registers
98
99
docs/system/arm/raspi.rst | 1 -
100
target/arm/helper.h | 68 +-
101
target/arm/tcg/helper-a64.h | 12 +
102
target/arm/tcg/translate-a64.h | 4 +
103
target/arm/tcg/translate.h | 51 +
104
target/arm/tcg/a64.decode | 315 +++-
105
target/arm/tcg/t32.decode | 25 +-
106
hw/arm/boot.c | 8 +-
107
hw/arm/npcm7xx.c | 3 -
108
hw/char/omap_uart.c | 49 +-
109
hw/char/stm32l4x5_usart.c | 2 +-
110
hw/dma/xlnx_dpdma.c | 68 +-
111
hw/gpio/zaurus.c | 59 +-
112
hw/input/tsc2005.c | 135 +-
113
hw/intc/arm_gic.c | 4 +-
114
target/arm/hvf/hvf.c | 130 +-
115
target/arm/tcg/gengvec.c | 1672 +++++++++++++++++++++
116
target/arm/tcg/gengvec64.c | 190 +++
117
target/arm/tcg/neon_helper.c | 5 -
118
target/arm/tcg/translate-a64.c | 3137 +++++++++++++--------------------------
119
target/arm/tcg/translate-neon.c | 136 +-
120
target/arm/tcg/translate-sve.c | 145 +-
121
target/arm/tcg/translate-vfp.c | 93 +-
122
target/arm/tcg/translate.c | 1592 +-------------------
123
target/arm/tcg/vec_helper.c | 221 ++-
124
target/arm/vfp_helper.c | 30 -
125
target/arm/tcg/meson.build | 2 +
126
27 files changed, 3860 insertions(+), 4297 deletions(-)
127
create mode 100644 target/arm/tcg/gengvec.c
128
create mode 100644 target/arm/tcg/gengvec64.c
129
diff view generated by jsdifflib
1
Implement the MVE shift-right-and-narrow insn VSHRN and VRSHRN.
1
From: Alexandra Diupina <adiupina@astralinux.ru>
2
2
3
do_urshr() is borrowed from sve_helper.c.
3
Add xlnx_dpdma_read_descriptor() and
4
xlnx_dpdma_write_descriptor() functions.
5
xlnx_dpdma_read_descriptor() combines reading a
6
descriptor from desc_addr by calling dma_memory_read()
7
and swapping the desc fields from guest memory order
8
to host memory order. xlnx_dpdma_write_descriptor()
9
performs similar actions when writing a descriptor.
4
10
11
Found by Linux Verification Center (linuxtesting.org) with SVACE.
12
13
Fixes: d3c6369a96 ("introduce xlnx-dpdma")
14
Signed-off-by: Alexandra Diupina <adiupina@astralinux.ru>
15
[PMM: tweaked indent, dropped behaviour change for write-failure case]
16
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20210628135835.6690-12-peter.maydell@linaro.org
8
---
18
---
9
target/arm/helper-mve.h | 10 ++++++++++
19
hw/dma/xlnx_dpdma.c | 68 ++++++++++++++++++++++++++++++++++++++++++---
10
target/arm/mve.decode | 11 +++++++++++
20
1 file changed, 64 insertions(+), 4 deletions(-)
11
target/arm/mve_helper.c | 40 ++++++++++++++++++++++++++++++++++++++
12
target/arm/translate-mve.c | 15 ++++++++++++++
13
4 files changed, 76 insertions(+)
14
21
15
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
22
diff --git a/hw/dma/xlnx_dpdma.c b/hw/dma/xlnx_dpdma.c
16
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/helper-mve.h
24
--- a/hw/dma/xlnx_dpdma.c
18
+++ b/target/arm/helper-mve.h
25
+++ b/hw/dma/xlnx_dpdma.c
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vsriw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
26
@@ -XXX,XX +XXX,XX @@ static void xlnx_dpdma_register_types(void)
20
DEF_HELPER_FLAGS_4(mve_vslib, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
27
type_register_static(&xlnx_dpdma_info);
21
DEF_HELPER_FLAGS_4(mve_vslih, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
28
}
22
DEF_HELPER_FLAGS_4(mve_vsliw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
29
23
+
30
+static MemTxResult xlnx_dpdma_read_descriptor(XlnxDPDMAState *s,
24
+DEF_HELPER_FLAGS_4(mve_vshrnbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
31
+ uint64_t desc_addr,
25
+DEF_HELPER_FLAGS_4(mve_vshrnbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
32
+ DPDMADescriptor *desc)
26
+DEF_HELPER_FLAGS_4(mve_vshrntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
33
+{
27
+DEF_HELPER_FLAGS_4(mve_vshrnth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
34
+ MemTxResult res = dma_memory_read(&address_space_memory, desc_addr,
28
+
35
+ &desc, sizeof(DPDMADescriptor),
29
+DEF_HELPER_FLAGS_4(mve_vrshrnbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
36
+ MEMTXATTRS_UNSPECIFIED);
30
+DEF_HELPER_FLAGS_4(mve_vrshrnbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
37
+ if (res) {
31
+DEF_HELPER_FLAGS_4(mve_vrshrntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
38
+ return res;
32
+DEF_HELPER_FLAGS_4(mve_vrshrnth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
33
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
34
index XXXXXXX..XXXXXXX 100644
35
--- a/target/arm/mve.decode
36
+++ b/target/arm/mve.decode
37
@@ -XXX,XX +XXX,XX @@ VSRI 111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_w
38
VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_b
39
VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_h
40
VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_w
41
+
42
+# Narrowing shifts (which only support b and h sizes)
43
+VSHRNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_b
44
+VSHRNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_h
45
+VSHRNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_b
46
+VSHRNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_h
47
+
48
+VRSHRNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_b
49
+VRSHRNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_h
50
+VRSHRNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_b
51
+VRSHRNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_h
52
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/target/arm/mve_helper.c
55
+++ b/target/arm/mve_helper.c
56
@@ -XXX,XX +XXX,XX @@ DO_2SHIFT_INSERT(vsliw, 4, DO_SHL, SHL_MASK)
57
58
DO_VSHLL_ALL(vshllb, false)
59
DO_VSHLL_ALL(vshllt, true)
60
+
61
+/*
62
+ * Narrowing right shifts, taking a double sized input, shifting it
63
+ * and putting the result in either the top or bottom half of the output.
64
+ * ESIZE, TYPE are the output, and LESIZE, LTYPE the input.
65
+ */
66
+#define DO_VSHRN(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN) \
67
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
68
+ void *vm, uint32_t shift) \
69
+ { \
70
+ LTYPE *m = vm; \
71
+ TYPE *d = vd; \
72
+ uint16_t mask = mve_element_mask(env); \
73
+ unsigned le; \
74
+ for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
75
+ TYPE r = FN(m[H##LESIZE(le)], shift); \
76
+ mergemask(&d[H##ESIZE(le * 2 + TOP)], r, mask); \
77
+ } \
78
+ mve_advance_vpt(env); \
79
+ }
39
+ }
80
+
40
+
81
+#define DO_VSHRN_ALL(OP, FN) \
41
+ /* Convert from LE into host endianness. */
82
+ DO_VSHRN(OP##bb, false, 1, uint8_t, 2, uint16_t, FN) \
42
+ desc->control = le32_to_cpu(desc->control);
83
+ DO_VSHRN(OP##bh, false, 2, uint16_t, 4, uint32_t, FN) \
43
+ desc->descriptor_id = le32_to_cpu(desc->descriptor_id);
84
+ DO_VSHRN(OP##tb, true, 1, uint8_t, 2, uint16_t, FN) \
44
+ desc->xfer_size = le32_to_cpu(desc->xfer_size);
85
+ DO_VSHRN(OP##th, true, 2, uint16_t, 4, uint32_t, FN)
45
+ desc->line_size_stride = le32_to_cpu(desc->line_size_stride);
46
+ desc->timestamp_lsb = le32_to_cpu(desc->timestamp_lsb);
47
+ desc->timestamp_msb = le32_to_cpu(desc->timestamp_msb);
48
+ desc->address_extension = le32_to_cpu(desc->address_extension);
49
+ desc->next_descriptor = le32_to_cpu(desc->next_descriptor);
50
+ desc->source_address = le32_to_cpu(desc->source_address);
51
+ desc->address_extension_23 = le32_to_cpu(desc->address_extension_23);
52
+ desc->address_extension_45 = le32_to_cpu(desc->address_extension_45);
53
+ desc->source_address2 = le32_to_cpu(desc->source_address2);
54
+ desc->source_address3 = le32_to_cpu(desc->source_address3);
55
+ desc->source_address4 = le32_to_cpu(desc->source_address4);
56
+ desc->source_address5 = le32_to_cpu(desc->source_address5);
57
+ desc->crc = le32_to_cpu(desc->crc);
86
+
58
+
87
+static inline uint64_t do_urshr(uint64_t x, unsigned sh)
59
+ return res;
88
+{
89
+ if (likely(sh < 64)) {
90
+ return (x >> sh) + ((x >> (sh - 1)) & 1);
91
+ } else if (sh == 64) {
92
+ return x >> 63;
93
+ } else {
94
+ return 0;
95
+ }
96
+}
60
+}
97
+
61
+
98
+DO_VSHRN_ALL(vshrn, DO_SHR)
62
+static MemTxResult xlnx_dpdma_write_descriptor(uint64_t desc_addr,
99
+DO_VSHRN_ALL(vrshrn, do_urshr)
63
+ DPDMADescriptor *desc)
100
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
64
+{
101
index XXXXXXX..XXXXXXX 100644
65
+ DPDMADescriptor tmp_desc = *desc;
102
--- a/target/arm/translate-mve.c
103
+++ b/target/arm/translate-mve.c
104
@@ -XXX,XX +XXX,XX @@ DO_VSHLL(VSHLL_BS, vshllbs)
105
DO_VSHLL(VSHLL_BU, vshllbu)
106
DO_VSHLL(VSHLL_TS, vshllts)
107
DO_VSHLL(VSHLL_TU, vshlltu)
108
+
66
+
109
+#define DO_2SHIFT_N(INSN, FN) \
67
+ /* Convert from host endianness into LE. */
110
+ static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
68
+ tmp_desc.control = cpu_to_le32(tmp_desc.control);
111
+ { \
69
+ tmp_desc.descriptor_id = cpu_to_le32(tmp_desc.descriptor_id);
112
+ static MVEGenTwoOpShiftFn * const fns[] = { \
70
+ tmp_desc.xfer_size = cpu_to_le32(tmp_desc.xfer_size);
113
+ gen_helper_mve_##FN##b, \
71
+ tmp_desc.line_size_stride = cpu_to_le32(tmp_desc.line_size_stride);
114
+ gen_helper_mve_##FN##h, \
72
+ tmp_desc.timestamp_lsb = cpu_to_le32(tmp_desc.timestamp_lsb);
115
+ }; \
73
+ tmp_desc.timestamp_msb = cpu_to_le32(tmp_desc.timestamp_msb);
116
+ return do_2shift(s, a, fns[a->size], false); \
74
+ tmp_desc.address_extension = cpu_to_le32(tmp_desc.address_extension);
117
+ }
75
+ tmp_desc.next_descriptor = cpu_to_le32(tmp_desc.next_descriptor);
76
+ tmp_desc.source_address = cpu_to_le32(tmp_desc.source_address);
77
+ tmp_desc.address_extension_23 = cpu_to_le32(tmp_desc.address_extension_23);
78
+ tmp_desc.address_extension_45 = cpu_to_le32(tmp_desc.address_extension_45);
79
+ tmp_desc.source_address2 = cpu_to_le32(tmp_desc.source_address2);
80
+ tmp_desc.source_address3 = cpu_to_le32(tmp_desc.source_address3);
81
+ tmp_desc.source_address4 = cpu_to_le32(tmp_desc.source_address4);
82
+ tmp_desc.source_address5 = cpu_to_le32(tmp_desc.source_address5);
83
+ tmp_desc.crc = cpu_to_le32(tmp_desc.crc);
118
+
84
+
119
+DO_2SHIFT_N(VSHRNB, vshrnb)
85
+ return dma_memory_write(&address_space_memory, desc_addr, &tmp_desc,
120
+DO_2SHIFT_N(VSHRNT, vshrnt)
86
+ sizeof(DPDMADescriptor), MEMTXATTRS_UNSPECIFIED);
121
+DO_2SHIFT_N(VRSHRNB, vrshrnb)
87
+}
122
+DO_2SHIFT_N(VRSHRNT, vrshrnt)
88
+
89
size_t xlnx_dpdma_start_operation(XlnxDPDMAState *s, uint8_t channel,
90
bool one_desc)
91
{
92
@@ -XXX,XX +XXX,XX @@ size_t xlnx_dpdma_start_operation(XlnxDPDMAState *s, uint8_t channel,
93
desc_addr = xlnx_dpdma_descriptor_next_address(s, channel);
94
}
95
96
- if (dma_memory_read(&address_space_memory, desc_addr, &desc,
97
- sizeof(DPDMADescriptor), MEMTXATTRS_UNSPECIFIED)) {
98
+ if (xlnx_dpdma_read_descriptor(s, desc_addr, &desc)) {
99
s->registers[DPDMA_EISR] |= ((1 << 1) << channel);
100
xlnx_dpdma_update_irq(s);
101
s->operation_finished[channel] = true;
102
@@ -XXX,XX +XXX,XX @@ size_t xlnx_dpdma_start_operation(XlnxDPDMAState *s, uint8_t channel,
103
/* The descriptor need to be updated when it's completed. */
104
DPRINTF("update the descriptor with the done flag set.\n");
105
xlnx_dpdma_desc_set_done(&desc);
106
- dma_memory_write(&address_space_memory, desc_addr, &desc,
107
- sizeof(DPDMADescriptor), MEMTXATTRS_UNSPECIFIED);
108
+ if (xlnx_dpdma_write_descriptor(desc_addr, &desc)) {
109
+ DPRINTF("Can't write the descriptor.\n");
110
+ /* TODO: check hardware behaviour for memory write failure */
111
+ }
112
}
113
114
if (xlnx_dpdma_desc_completion_interrupt(&desc)) {
123
--
115
--
124
2.20.1
116
2.34.1
125
126
diff view generated by jsdifflib
New patch
1
From: Zenghui Yu <zenghui.yu@linux.dev>
1
2
3
We wrongly encoded ID_AA64PFR1_EL1 using {3,0,0,4,2} in hvf_sreg_match[] so
4
we fail to get the expected ARMCPRegInfo from cp_regs hash table with the
5
wrong key.
6
7
Fix it with the correct encoding {3,0,0,4,1}. With that fixed, the Linux
8
guest can properly detect FEAT_SSBS2 on my M1 HW.
9
10
All DBG{B,W}{V,C}R_EL1 registers are also wrongly encoded with op0 == 14.
11
It happens to work because HVF_SYSREG(CRn, CRm, 14, op1, op2) equals to
12
HVF_SYSREG(CRn, CRm, 2, op1, op2), by definition. But we shouldn't rely on
13
it.
14
15
Cc: qemu-stable@nongnu.org
16
Fixes: a1477da3ddeb ("hvf: Add Apple Silicon support")
17
Signed-off-by: Zenghui Yu <zenghui.yu@linux.dev>
18
Reviewed-by: Alexander Graf <agraf@csgraf.de>
19
Message-id: 20240503153453.54389-1-zenghui.yu@linux.dev
20
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
21
---
22
target/arm/hvf/hvf.c | 130 +++++++++++++++++++++----------------------
23
1 file changed, 65 insertions(+), 65 deletions(-)
24
25
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/target/arm/hvf/hvf.c
28
+++ b/target/arm/hvf/hvf.c
29
@@ -XXX,XX +XXX,XX @@ struct hvf_sreg_match {
30
};
31
32
static struct hvf_sreg_match hvf_sreg_match[] = {
33
- { HV_SYS_REG_DBGBVR0_EL1, HVF_SYSREG(0, 0, 14, 0, 4) },
34
- { HV_SYS_REG_DBGBCR0_EL1, HVF_SYSREG(0, 0, 14, 0, 5) },
35
- { HV_SYS_REG_DBGWVR0_EL1, HVF_SYSREG(0, 0, 14, 0, 6) },
36
- { HV_SYS_REG_DBGWCR0_EL1, HVF_SYSREG(0, 0, 14, 0, 7) },
37
+ { HV_SYS_REG_DBGBVR0_EL1, HVF_SYSREG(0, 0, 2, 0, 4) },
38
+ { HV_SYS_REG_DBGBCR0_EL1, HVF_SYSREG(0, 0, 2, 0, 5) },
39
+ { HV_SYS_REG_DBGWVR0_EL1, HVF_SYSREG(0, 0, 2, 0, 6) },
40
+ { HV_SYS_REG_DBGWCR0_EL1, HVF_SYSREG(0, 0, 2, 0, 7) },
41
42
- { HV_SYS_REG_DBGBVR1_EL1, HVF_SYSREG(0, 1, 14, 0, 4) },
43
- { HV_SYS_REG_DBGBCR1_EL1, HVF_SYSREG(0, 1, 14, 0, 5) },
44
- { HV_SYS_REG_DBGWVR1_EL1, HVF_SYSREG(0, 1, 14, 0, 6) },
45
- { HV_SYS_REG_DBGWCR1_EL1, HVF_SYSREG(0, 1, 14, 0, 7) },
46
+ { HV_SYS_REG_DBGBVR1_EL1, HVF_SYSREG(0, 1, 2, 0, 4) },
47
+ { HV_SYS_REG_DBGBCR1_EL1, HVF_SYSREG(0, 1, 2, 0, 5) },
48
+ { HV_SYS_REG_DBGWVR1_EL1, HVF_SYSREG(0, 1, 2, 0, 6) },
49
+ { HV_SYS_REG_DBGWCR1_EL1, HVF_SYSREG(0, 1, 2, 0, 7) },
50
51
- { HV_SYS_REG_DBGBVR2_EL1, HVF_SYSREG(0, 2, 14, 0, 4) },
52
- { HV_SYS_REG_DBGBCR2_EL1, HVF_SYSREG(0, 2, 14, 0, 5) },
53
- { HV_SYS_REG_DBGWVR2_EL1, HVF_SYSREG(0, 2, 14, 0, 6) },
54
- { HV_SYS_REG_DBGWCR2_EL1, HVF_SYSREG(0, 2, 14, 0, 7) },
55
+ { HV_SYS_REG_DBGBVR2_EL1, HVF_SYSREG(0, 2, 2, 0, 4) },
56
+ { HV_SYS_REG_DBGBCR2_EL1, HVF_SYSREG(0, 2, 2, 0, 5) },
57
+ { HV_SYS_REG_DBGWVR2_EL1, HVF_SYSREG(0, 2, 2, 0, 6) },
58
+ { HV_SYS_REG_DBGWCR2_EL1, HVF_SYSREG(0, 2, 2, 0, 7) },
59
60
- { HV_SYS_REG_DBGBVR3_EL1, HVF_SYSREG(0, 3, 14, 0, 4) },
61
- { HV_SYS_REG_DBGBCR3_EL1, HVF_SYSREG(0, 3, 14, 0, 5) },
62
- { HV_SYS_REG_DBGWVR3_EL1, HVF_SYSREG(0, 3, 14, 0, 6) },
63
- { HV_SYS_REG_DBGWCR3_EL1, HVF_SYSREG(0, 3, 14, 0, 7) },
64
+ { HV_SYS_REG_DBGBVR3_EL1, HVF_SYSREG(0, 3, 2, 0, 4) },
65
+ { HV_SYS_REG_DBGBCR3_EL1, HVF_SYSREG(0, 3, 2, 0, 5) },
66
+ { HV_SYS_REG_DBGWVR3_EL1, HVF_SYSREG(0, 3, 2, 0, 6) },
67
+ { HV_SYS_REG_DBGWCR3_EL1, HVF_SYSREG(0, 3, 2, 0, 7) },
68
69
- { HV_SYS_REG_DBGBVR4_EL1, HVF_SYSREG(0, 4, 14, 0, 4) },
70
- { HV_SYS_REG_DBGBCR4_EL1, HVF_SYSREG(0, 4, 14, 0, 5) },
71
- { HV_SYS_REG_DBGWVR4_EL1, HVF_SYSREG(0, 4, 14, 0, 6) },
72
- { HV_SYS_REG_DBGWCR4_EL1, HVF_SYSREG(0, 4, 14, 0, 7) },
73
+ { HV_SYS_REG_DBGBVR4_EL1, HVF_SYSREG(0, 4, 2, 0, 4) },
74
+ { HV_SYS_REG_DBGBCR4_EL1, HVF_SYSREG(0, 4, 2, 0, 5) },
75
+ { HV_SYS_REG_DBGWVR4_EL1, HVF_SYSREG(0, 4, 2, 0, 6) },
76
+ { HV_SYS_REG_DBGWCR4_EL1, HVF_SYSREG(0, 4, 2, 0, 7) },
77
78
- { HV_SYS_REG_DBGBVR5_EL1, HVF_SYSREG(0, 5, 14, 0, 4) },
79
- { HV_SYS_REG_DBGBCR5_EL1, HVF_SYSREG(0, 5, 14, 0, 5) },
80
- { HV_SYS_REG_DBGWVR5_EL1, HVF_SYSREG(0, 5, 14, 0, 6) },
81
- { HV_SYS_REG_DBGWCR5_EL1, HVF_SYSREG(0, 5, 14, 0, 7) },
82
+ { HV_SYS_REG_DBGBVR5_EL1, HVF_SYSREG(0, 5, 2, 0, 4) },
83
+ { HV_SYS_REG_DBGBCR5_EL1, HVF_SYSREG(0, 5, 2, 0, 5) },
84
+ { HV_SYS_REG_DBGWVR5_EL1, HVF_SYSREG(0, 5, 2, 0, 6) },
85
+ { HV_SYS_REG_DBGWCR5_EL1, HVF_SYSREG(0, 5, 2, 0, 7) },
86
87
- { HV_SYS_REG_DBGBVR6_EL1, HVF_SYSREG(0, 6, 14, 0, 4) },
88
- { HV_SYS_REG_DBGBCR6_EL1, HVF_SYSREG(0, 6, 14, 0, 5) },
89
- { HV_SYS_REG_DBGWVR6_EL1, HVF_SYSREG(0, 6, 14, 0, 6) },
90
- { HV_SYS_REG_DBGWCR6_EL1, HVF_SYSREG(0, 6, 14, 0, 7) },
91
+ { HV_SYS_REG_DBGBVR6_EL1, HVF_SYSREG(0, 6, 2, 0, 4) },
92
+ { HV_SYS_REG_DBGBCR6_EL1, HVF_SYSREG(0, 6, 2, 0, 5) },
93
+ { HV_SYS_REG_DBGWVR6_EL1, HVF_SYSREG(0, 6, 2, 0, 6) },
94
+ { HV_SYS_REG_DBGWCR6_EL1, HVF_SYSREG(0, 6, 2, 0, 7) },
95
96
- { HV_SYS_REG_DBGBVR7_EL1, HVF_SYSREG(0, 7, 14, 0, 4) },
97
- { HV_SYS_REG_DBGBCR7_EL1, HVF_SYSREG(0, 7, 14, 0, 5) },
98
- { HV_SYS_REG_DBGWVR7_EL1, HVF_SYSREG(0, 7, 14, 0, 6) },
99
- { HV_SYS_REG_DBGWCR7_EL1, HVF_SYSREG(0, 7, 14, 0, 7) },
100
+ { HV_SYS_REG_DBGBVR7_EL1, HVF_SYSREG(0, 7, 2, 0, 4) },
101
+ { HV_SYS_REG_DBGBCR7_EL1, HVF_SYSREG(0, 7, 2, 0, 5) },
102
+ { HV_SYS_REG_DBGWVR7_EL1, HVF_SYSREG(0, 7, 2, 0, 6) },
103
+ { HV_SYS_REG_DBGWCR7_EL1, HVF_SYSREG(0, 7, 2, 0, 7) },
104
105
- { HV_SYS_REG_DBGBVR8_EL1, HVF_SYSREG(0, 8, 14, 0, 4) },
106
- { HV_SYS_REG_DBGBCR8_EL1, HVF_SYSREG(0, 8, 14, 0, 5) },
107
- { HV_SYS_REG_DBGWVR8_EL1, HVF_SYSREG(0, 8, 14, 0, 6) },
108
- { HV_SYS_REG_DBGWCR8_EL1, HVF_SYSREG(0, 8, 14, 0, 7) },
109
+ { HV_SYS_REG_DBGBVR8_EL1, HVF_SYSREG(0, 8, 2, 0, 4) },
110
+ { HV_SYS_REG_DBGBCR8_EL1, HVF_SYSREG(0, 8, 2, 0, 5) },
111
+ { HV_SYS_REG_DBGWVR8_EL1, HVF_SYSREG(0, 8, 2, 0, 6) },
112
+ { HV_SYS_REG_DBGWCR8_EL1, HVF_SYSREG(0, 8, 2, 0, 7) },
113
114
- { HV_SYS_REG_DBGBVR9_EL1, HVF_SYSREG(0, 9, 14, 0, 4) },
115
- { HV_SYS_REG_DBGBCR9_EL1, HVF_SYSREG(0, 9, 14, 0, 5) },
116
- { HV_SYS_REG_DBGWVR9_EL1, HVF_SYSREG(0, 9, 14, 0, 6) },
117
- { HV_SYS_REG_DBGWCR9_EL1, HVF_SYSREG(0, 9, 14, 0, 7) },
118
+ { HV_SYS_REG_DBGBVR9_EL1, HVF_SYSREG(0, 9, 2, 0, 4) },
119
+ { HV_SYS_REG_DBGBCR9_EL1, HVF_SYSREG(0, 9, 2, 0, 5) },
120
+ { HV_SYS_REG_DBGWVR9_EL1, HVF_SYSREG(0, 9, 2, 0, 6) },
121
+ { HV_SYS_REG_DBGWCR9_EL1, HVF_SYSREG(0, 9, 2, 0, 7) },
122
123
- { HV_SYS_REG_DBGBVR10_EL1, HVF_SYSREG(0, 10, 14, 0, 4) },
124
- { HV_SYS_REG_DBGBCR10_EL1, HVF_SYSREG(0, 10, 14, 0, 5) },
125
- { HV_SYS_REG_DBGWVR10_EL1, HVF_SYSREG(0, 10, 14, 0, 6) },
126
- { HV_SYS_REG_DBGWCR10_EL1, HVF_SYSREG(0, 10, 14, 0, 7) },
127
+ { HV_SYS_REG_DBGBVR10_EL1, HVF_SYSREG(0, 10, 2, 0, 4) },
128
+ { HV_SYS_REG_DBGBCR10_EL1, HVF_SYSREG(0, 10, 2, 0, 5) },
129
+ { HV_SYS_REG_DBGWVR10_EL1, HVF_SYSREG(0, 10, 2, 0, 6) },
130
+ { HV_SYS_REG_DBGWCR10_EL1, HVF_SYSREG(0, 10, 2, 0, 7) },
131
132
- { HV_SYS_REG_DBGBVR11_EL1, HVF_SYSREG(0, 11, 14, 0, 4) },
133
- { HV_SYS_REG_DBGBCR11_EL1, HVF_SYSREG(0, 11, 14, 0, 5) },
134
- { HV_SYS_REG_DBGWVR11_EL1, HVF_SYSREG(0, 11, 14, 0, 6) },
135
- { HV_SYS_REG_DBGWCR11_EL1, HVF_SYSREG(0, 11, 14, 0, 7) },
136
+ { HV_SYS_REG_DBGBVR11_EL1, HVF_SYSREG(0, 11, 2, 0, 4) },
137
+ { HV_SYS_REG_DBGBCR11_EL1, HVF_SYSREG(0, 11, 2, 0, 5) },
138
+ { HV_SYS_REG_DBGWVR11_EL1, HVF_SYSREG(0, 11, 2, 0, 6) },
139
+ { HV_SYS_REG_DBGWCR11_EL1, HVF_SYSREG(0, 11, 2, 0, 7) },
140
141
- { HV_SYS_REG_DBGBVR12_EL1, HVF_SYSREG(0, 12, 14, 0, 4) },
142
- { HV_SYS_REG_DBGBCR12_EL1, HVF_SYSREG(0, 12, 14, 0, 5) },
143
- { HV_SYS_REG_DBGWVR12_EL1, HVF_SYSREG(0, 12, 14, 0, 6) },
144
- { HV_SYS_REG_DBGWCR12_EL1, HVF_SYSREG(0, 12, 14, 0, 7) },
145
+ { HV_SYS_REG_DBGBVR12_EL1, HVF_SYSREG(0, 12, 2, 0, 4) },
146
+ { HV_SYS_REG_DBGBCR12_EL1, HVF_SYSREG(0, 12, 2, 0, 5) },
147
+ { HV_SYS_REG_DBGWVR12_EL1, HVF_SYSREG(0, 12, 2, 0, 6) },
148
+ { HV_SYS_REG_DBGWCR12_EL1, HVF_SYSREG(0, 12, 2, 0, 7) },
149
150
- { HV_SYS_REG_DBGBVR13_EL1, HVF_SYSREG(0, 13, 14, 0, 4) },
151
- { HV_SYS_REG_DBGBCR13_EL1, HVF_SYSREG(0, 13, 14, 0, 5) },
152
- { HV_SYS_REG_DBGWVR13_EL1, HVF_SYSREG(0, 13, 14, 0, 6) },
153
- { HV_SYS_REG_DBGWCR13_EL1, HVF_SYSREG(0, 13, 14, 0, 7) },
154
+ { HV_SYS_REG_DBGBVR13_EL1, HVF_SYSREG(0, 13, 2, 0, 4) },
155
+ { HV_SYS_REG_DBGBCR13_EL1, HVF_SYSREG(0, 13, 2, 0, 5) },
156
+ { HV_SYS_REG_DBGWVR13_EL1, HVF_SYSREG(0, 13, 2, 0, 6) },
157
+ { HV_SYS_REG_DBGWCR13_EL1, HVF_SYSREG(0, 13, 2, 0, 7) },
158
159
- { HV_SYS_REG_DBGBVR14_EL1, HVF_SYSREG(0, 14, 14, 0, 4) },
160
- { HV_SYS_REG_DBGBCR14_EL1, HVF_SYSREG(0, 14, 14, 0, 5) },
161
- { HV_SYS_REG_DBGWVR14_EL1, HVF_SYSREG(0, 14, 14, 0, 6) },
162
- { HV_SYS_REG_DBGWCR14_EL1, HVF_SYSREG(0, 14, 14, 0, 7) },
163
+ { HV_SYS_REG_DBGBVR14_EL1, HVF_SYSREG(0, 14, 2, 0, 4) },
164
+ { HV_SYS_REG_DBGBCR14_EL1, HVF_SYSREG(0, 14, 2, 0, 5) },
165
+ { HV_SYS_REG_DBGWVR14_EL1, HVF_SYSREG(0, 14, 2, 0, 6) },
166
+ { HV_SYS_REG_DBGWCR14_EL1, HVF_SYSREG(0, 14, 2, 0, 7) },
167
168
- { HV_SYS_REG_DBGBVR15_EL1, HVF_SYSREG(0, 15, 14, 0, 4) },
169
- { HV_SYS_REG_DBGBCR15_EL1, HVF_SYSREG(0, 15, 14, 0, 5) },
170
- { HV_SYS_REG_DBGWVR15_EL1, HVF_SYSREG(0, 15, 14, 0, 6) },
171
- { HV_SYS_REG_DBGWCR15_EL1, HVF_SYSREG(0, 15, 14, 0, 7) },
172
+ { HV_SYS_REG_DBGBVR15_EL1, HVF_SYSREG(0, 15, 2, 0, 4) },
173
+ { HV_SYS_REG_DBGBCR15_EL1, HVF_SYSREG(0, 15, 2, 0, 5) },
174
+ { HV_SYS_REG_DBGWVR15_EL1, HVF_SYSREG(0, 15, 2, 0, 6) },
175
+ { HV_SYS_REG_DBGWCR15_EL1, HVF_SYSREG(0, 15, 2, 0, 7) },
176
177
#ifdef SYNC_NO_RAW_REGS
178
/*
179
@@ -XXX,XX +XXX,XX @@ static struct hvf_sreg_match hvf_sreg_match[] = {
180
{ HV_SYS_REG_MPIDR_EL1, HVF_SYSREG(0, 0, 3, 0, 5) },
181
{ HV_SYS_REG_ID_AA64PFR0_EL1, HVF_SYSREG(0, 4, 3, 0, 0) },
182
#endif
183
- { HV_SYS_REG_ID_AA64PFR1_EL1, HVF_SYSREG(0, 4, 3, 0, 2) },
184
+ { HV_SYS_REG_ID_AA64PFR1_EL1, HVF_SYSREG(0, 4, 3, 0, 1) },
185
{ HV_SYS_REG_ID_AA64DFR0_EL1, HVF_SYSREG(0, 5, 3, 0, 0) },
186
{ HV_SYS_REG_ID_AA64DFR1_EL1, HVF_SYSREG(0, 5, 3, 0, 1) },
187
{ HV_SYS_REG_ID_AA64ISAR0_EL1, HVF_SYSREG(0, 6, 3, 0, 0) },
188
--
189
2.34.1
diff view generated by jsdifflib
1
Implement the MVE VSRI and VSLI insns, which perform a
1
From: Dorjoy Chowdhury <dorjoychy111@gmail.com>
2
shift-and-insert operation.
3
2
3
The value of the mp-affinity property being set in npcm7xx_realize is
4
always the same as the default value it would have when arm_cpu_realizefn
5
is called if the property is not set here. So there is no need to set
6
the property value in npcm7xx_realize function.
7
8
Signed-off-by: Dorjoy Chowdhury <dorjoychy111@gmail.com>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
11
Message-id: 20240504141733.14813-1-dorjoychy111@gmail.com
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20210628135835.6690-11-peter.maydell@linaro.org
7
---
13
---
8
target/arm/helper-mve.h | 8 ++++++++
14
hw/arm/npcm7xx.c | 3 ---
9
target/arm/mve.decode | 9 ++++++++
15
1 file changed, 3 deletions(-)
10
target/arm/mve_helper.c | 42 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-mve.c | 3 +++
12
4 files changed, 62 insertions(+)
13
16
14
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
17
diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c
15
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-mve.h
19
--- a/hw/arm/npcm7xx.c
17
+++ b/target/arm/helper-mve.h
20
+++ b/hw/arm/npcm7xx.c
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vshlltsb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
21
@@ -XXX,XX +XXX,XX @@ static void npcm7xx_realize(DeviceState *dev, Error **errp)
19
DEF_HELPER_FLAGS_4(mve_vshlltsh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
22
20
DEF_HELPER_FLAGS_4(mve_vshlltub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
23
/* CPUs */
21
DEF_HELPER_FLAGS_4(mve_vshlltuh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
24
for (i = 0; i < nc->num_cpus; i++) {
22
+
25
- object_property_set_int(OBJECT(&s->cpu[i]), "mp-affinity",
23
+DEF_HELPER_FLAGS_4(mve_vsrib, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
26
- arm_build_mp_affinity(i, NPCM7XX_MAX_NUM_CPUS),
24
+DEF_HELPER_FLAGS_4(mve_vsrih, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
27
- &error_abort);
25
+DEF_HELPER_FLAGS_4(mve_vsriw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
28
object_property_set_int(OBJECT(&s->cpu[i]), "reset-cbar",
26
+
29
NPCM7XX_GIC_CPU_IF_ADDR, &error_abort);
27
+DEF_HELPER_FLAGS_4(mve_vslib, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
30
object_property_set_bool(OBJECT(&s->cpu[i]), "reset-hivecs", true,
28
+DEF_HELPER_FLAGS_4(mve_vslih, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
29
+DEF_HELPER_FLAGS_4(mve_vsliw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
30
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/mve.decode
33
+++ b/target/arm/mve.decode
34
@@ -XXX,XX +XXX,XX @@ VSHLL_TS 111 0 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h
35
36
VSHLL_TU 111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_b
37
VSHLL_TU 111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h
38
+
39
+# Shift-and-insert
40
+VSRI 111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_b
41
+VSRI 111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_h
42
+VSRI 111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_w
43
+
44
+VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_b
45
+VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_h
46
+VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_w
47
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/mve_helper.c
50
+++ b/target/arm/mve_helper.c
51
@@ -XXX,XX +XXX,XX @@ DO_2SHIFT_SAT_S(vqshlui_s, DO_SUQSHL_OP)
52
DO_2SHIFT_U(vrshli_u, DO_VRSHLU)
53
DO_2SHIFT_S(vrshli_s, DO_VRSHLS)
54
55
+/* Shift-and-insert; we always work with 64 bits at a time */
56
+#define DO_2SHIFT_INSERT(OP, ESIZE, SHIFTFN, MASKFN) \
57
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
58
+ void *vm, uint32_t shift) \
59
+ { \
60
+ uint64_t *d = vd, *m = vm; \
61
+ uint16_t mask; \
62
+ uint64_t shiftmask; \
63
+ unsigned e; \
64
+ if (shift == 0 || shift == ESIZE * 8) { \
65
+ /* \
66
+ * Only VSLI can shift by 0; only VSRI can shift by <dt>. \
67
+ * The generic logic would give the right answer for 0 but \
68
+ * fails for <dt>. \
69
+ */ \
70
+ goto done; \
71
+ } \
72
+ assert(shift < ESIZE * 8); \
73
+ mask = mve_element_mask(env); \
74
+ /* ESIZE / 2 gives the MO_* value if ESIZE is in [1,2,4] */ \
75
+ shiftmask = dup_const(ESIZE / 2, MASKFN(ESIZE * 8, shift)); \
76
+ for (e = 0; e < 16 / 8; e++, mask >>= 8) { \
77
+ uint64_t r = (SHIFTFN(m[H8(e)], shift) & shiftmask) | \
78
+ (d[H8(e)] & ~shiftmask); \
79
+ mergemask(&d[H8(e)], r, mask); \
80
+ } \
81
+done: \
82
+ mve_advance_vpt(env); \
83
+ }
84
+
85
+#define DO_SHL(N, SHIFT) ((N) << (SHIFT))
86
+#define DO_SHR(N, SHIFT) ((N) >> (SHIFT))
87
+#define SHL_MASK(EBITS, SHIFT) MAKE_64BIT_MASK((SHIFT), (EBITS) - (SHIFT))
88
+#define SHR_MASK(EBITS, SHIFT) MAKE_64BIT_MASK(0, (EBITS) - (SHIFT))
89
+
90
+DO_2SHIFT_INSERT(vsrib, 1, DO_SHR, SHR_MASK)
91
+DO_2SHIFT_INSERT(vsrih, 2, DO_SHR, SHR_MASK)
92
+DO_2SHIFT_INSERT(vsriw, 4, DO_SHR, SHR_MASK)
93
+DO_2SHIFT_INSERT(vslib, 1, DO_SHL, SHL_MASK)
94
+DO_2SHIFT_INSERT(vslih, 2, DO_SHL, SHL_MASK)
95
+DO_2SHIFT_INSERT(vsliw, 4, DO_SHL, SHL_MASK)
96
+
97
/*
98
* Long shifts taking half-sized inputs from top or bottom of the input
99
* vector and producing a double-width result. ESIZE, TYPE are for
100
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
101
index XXXXXXX..XXXXXXX 100644
102
--- a/target/arm/translate-mve.c
103
+++ b/target/arm/translate-mve.c
104
@@ -XXX,XX +XXX,XX @@ DO_2SHIFT(VSHRI_U, vshli_u, true)
105
DO_2SHIFT(VRSHRI_S, vrshli_s, true)
106
DO_2SHIFT(VRSHRI_U, vrshli_u, true)
107
108
+DO_2SHIFT(VSRI, vsri, false)
109
+DO_2SHIFT(VSLI, vsli, false)
110
+
111
#define DO_VSHLL(INSN, FN) \
112
static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
113
{ \
114
--
31
--
115
2.20.1
32
2.34.1
116
33
117
34
diff view generated by jsdifflib
1
Use dup_const() instead of bitfield_replicate() in
1
From: Inès Varhol <ines.varhol@telecom-paris.fr>
2
disas_simd_mod_imm().
3
2
4
(We can't replace the other use of bitfield_replicate() in this file,
3
Signed-off-by: Arnaud Minier <arnaud.minier@telecom-paris.fr>
5
in logic_imm_decode_wmask(), because that location needs to handle 2
4
Signed-off-by: Inès Varhol <ines.varhol@telecom-paris.fr>
6
and 4 bit elements, which dup_const() cannot.)
5
Message-id: 20240505141613.387508-1-ines.varhol@telecom-paris.fr
7
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-id: 20210628135835.6690-6-peter.maydell@linaro.org
11
---
8
---
12
target/arm/translate-a64.c | 2 +-
9
hw/char/stm32l4x5_usart.c | 2 +-
13
1 file changed, 1 insertion(+), 1 deletion(-)
10
1 file changed, 1 insertion(+), 1 deletion(-)
14
11
15
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
12
diff --git a/hw/char/stm32l4x5_usart.c b/hw/char/stm32l4x5_usart.c
16
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/translate-a64.c
14
--- a/hw/char/stm32l4x5_usart.c
18
+++ b/target/arm/translate-a64.c
15
+++ b/hw/char/stm32l4x5_usart.c
19
@@ -XXX,XX +XXX,XX @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
16
@@ -XXX,XX +XXX,XX @@ REG32(CR1, 0x00)
20
/* FMOV (vector, immediate) - half-precision */
17
FIELD(CR1, UE, 0, 1) /* USART enable */
21
imm = vfp_expand_imm(MO_16, abcdefgh);
18
REG32(CR2, 0x04)
22
/* now duplicate across the lanes */
19
FIELD(CR2, ADD_1, 28, 4) /* ADD[7:4] */
23
- imm = bitfield_replicate(imm, 16);
20
- FIELD(CR2, ADD_0, 24, 1) /* ADD[3:0] */
24
+ imm = dup_const(MO_16, imm);
21
+ FIELD(CR2, ADD_0, 24, 4) /* ADD[3:0] */
25
} else {
22
FIELD(CR2, RTOEN, 23, 1) /* Receiver timeout enable */
26
imm = asimd_imm_const(abcdefgh, cmode, is_neg);
23
FIELD(CR2, ABRMOD, 21, 2) /* Auto baud rate mode */
27
}
24
FIELD(CR2, ABREN, 20, 1) /* Auto baud rate enable */
28
--
25
--
29
2.20.1
26
2.34.1
30
27
31
28
diff view generated by jsdifflib
1
The initial implementation of the MVE VRMLALDAVH and VRMLSLDAVH
1
From: Andrey Shumilin <shum.sdl@nppct.ru>
2
insns had some bugs:
3
* the 32x32 multiply of elements was being done as 32x32->32,
4
not 32x32->64
5
* we were incorrectly maintaining the accumulator in its full
6
72-bit form across all 4 beats of the insn; in the pseudocode
7
it is squashed back into the 64 bits of the RdaHi:RdaLo
8
registers after each beat
9
2
10
In particular, fixing the second of these allows us to recast
3
In gic_cpu_read() and gic_cpu_write(), we delegate the handling of
11
the implementation to avoid 128-bit arithmetic entirely.
4
reading and writing the Non-Secure view of the GICC_APR<n> registers
5
to functions gic_apr_ns_view() and gic_apr_write_ns_view().
6
Unfortunately we got the order of the arguments wrong, swapping the
7
CPU number and the register number (which the compiler doesn't catch
8
because they're both integers).
12
9
13
Since the element size here is always 4, we can also drop the
10
Most guests probably didn't notice this bug because directly
14
parameterization of ESIZE to make the code a little more readable.
11
accessing the APR registers is typically something only done by
12
firmware when it is doing state save for going into a sleep mode.
15
13
16
Suggested-by: Richard Henderson <richard.henderson@linaro.org>
14
Correct the mismatched call arguments.
15
16
Found by Linux Verification Center (linuxtesting.org) with SVACE.
17
18
Cc: qemu-stable@nongnu.org
19
Fixes: 51fd06e0ee ("hw/intc/arm_gic: Fix handling of GICC_APR<n>, GICC_NSAPR<n> registers")
20
Signed-off-by: Andrey Shumilin <shum.sdl@nppct.ru>
21
[PMM: Rewrote commit message]
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
22
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
23
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
19
Message-id: 20210628135835.6690-3-peter.maydell@linaro.org
24
Reviewed-by: Alex Bennée<alex.bennee@linaro.org>
20
---
25
---
21
target/arm/mve_helper.c | 38 +++++++++++++++++++++-----------------
26
hw/intc/arm_gic.c | 4 ++--
22
1 file changed, 21 insertions(+), 17 deletions(-)
27
1 file changed, 2 insertions(+), 2 deletions(-)
23
28
24
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
29
diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c
25
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/mve_helper.c
31
--- a/hw/intc/arm_gic.c
27
+++ b/target/arm/mve_helper.c
32
+++ b/hw/intc/arm_gic.c
28
@@ -XXX,XX +XXX,XX @@
33
@@ -XXX,XX +XXX,XX @@ static MemTxResult gic_cpu_read(GICState *s, int cpu, int offset,
29
*/
34
*data = s->h_apr[gic_get_vcpu_real_id(cpu)];
30
35
} else if (gic_cpu_ns_access(s, cpu, attrs)) {
31
#include "qemu/osdep.h"
36
/* NS view of GICC_APR<n> is the top half of GIC_NSAPR<n> */
32
-#include "qemu/int128.h"
37
- *data = gic_apr_ns_view(s, regno, cpu);
33
#include "cpu.h"
38
+ *data = gic_apr_ns_view(s, cpu, regno);
34
#include "internals.h"
39
} else {
35
#include "vec_internal.h"
40
*data = s->apr[regno][cpu];
36
@@ -XXX,XX +XXX,XX @@ DO_LDAV(vmlsldavsw, 4, int32_t, false, +=, -=)
41
}
37
DO_LDAV(vmlsldavxsw, 4, int32_t, true, +=, -=)
42
@@ -XXX,XX +XXX,XX @@ static MemTxResult gic_cpu_write(GICState *s, int cpu, int offset,
38
43
s->h_apr[gic_get_vcpu_real_id(cpu)] = value;
39
/*
44
} else if (gic_cpu_ns_access(s, cpu, attrs)) {
40
- * Rounding multiply add long dual accumulate high: we must keep
45
/* NS view of GICC_APR<n> is the top half of GIC_NSAPR<n> */
41
- * a 72-bit internal accumulator value and return the top 64 bits.
46
- gic_apr_write_ns_view(s, regno, cpu, value);
42
+ * Rounding multiply add long dual accumulate high. In the pseudocode
47
+ gic_apr_write_ns_view(s, cpu, regno, value);
43
+ * this is implemented with a 72-bit internal accumulator value of which
48
} else {
44
+ * the top 64 bits are returned. We optimize this to avoid having to
49
s->apr[regno][cpu] = value;
45
+ * use 128-bit arithmetic -- we can do this because the 74-bit accumulator
50
}
46
+ * is squashed back into 64-bits after each beat.
47
*/
48
-#define DO_LDAVH(OP, ESIZE, TYPE, XCHG, EVENACC, ODDACC, TO128) \
49
+#define DO_LDAVH(OP, TYPE, LTYPE, XCHG, SUB) \
50
uint64_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vn, \
51
void *vm, uint64_t a) \
52
{ \
53
uint16_t mask = mve_element_mask(env); \
54
unsigned e; \
55
TYPE *n = vn, *m = vm; \
56
- Int128 acc = int128_lshift(TO128(a), 8); \
57
- for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
58
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) { \
59
if (mask & 1) { \
60
+ LTYPE mul; \
61
if (e & 1) { \
62
- acc = ODDACC(acc, TO128(n[H##ESIZE(e - 1 * XCHG)] * \
63
- m[H##ESIZE(e)])); \
64
+ mul = (LTYPE)n[H4(e - 1 * XCHG)] * m[H4(e)]; \
65
+ if (SUB) { \
66
+ mul = -mul; \
67
+ } \
68
} else { \
69
- acc = EVENACC(acc, TO128(n[H##ESIZE(e + 1 * XCHG)] * \
70
- m[H##ESIZE(e)])); \
71
+ mul = (LTYPE)n[H4(e + 1 * XCHG)] * m[H4(e)]; \
72
} \
73
- acc = int128_add(acc, int128_make64(1 << 7)); \
74
+ mul = (mul >> 8) + ((mul >> 7) & 1); \
75
+ a += mul; \
76
} \
77
} \
78
mve_advance_vpt(env); \
79
- return int128_getlo(int128_rshift(acc, 8)); \
80
+ return a; \
81
}
82
83
-DO_LDAVH(vrmlaldavhsw, 4, int32_t, false, int128_add, int128_add, int128_makes64)
84
-DO_LDAVH(vrmlaldavhxsw, 4, int32_t, true, int128_add, int128_add, int128_makes64)
85
+DO_LDAVH(vrmlaldavhsw, int32_t, int64_t, false, false)
86
+DO_LDAVH(vrmlaldavhxsw, int32_t, int64_t, true, false)
87
88
-DO_LDAVH(vrmlaldavhuw, 4, uint32_t, false, int128_add, int128_add, int128_make64)
89
+DO_LDAVH(vrmlaldavhuw, uint32_t, uint64_t, false, false)
90
91
-DO_LDAVH(vrmlsldavhsw, 4, int32_t, false, int128_add, int128_sub, int128_makes64)
92
-DO_LDAVH(vrmlsldavhxsw, 4, int32_t, true, int128_add, int128_sub, int128_makes64)
93
+DO_LDAVH(vrmlsldavhsw, int32_t, int64_t, false, true)
94
+DO_LDAVH(vrmlsldavhxsw, int32_t, int64_t, true, true)
95
96
/* Vector add across vector */
97
#define DO_VADDV(OP, ESIZE, TYPE) \
98
--
51
--
99
2.20.1
52
2.34.1
100
53
101
54
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
Add a test booting and quickly shutdown a raspi2 machine,
3
Check the function index is in range and use an unsigned
4
to test the power management model:
4
variable to avoid the following warning with GCC 13.2.0:
5
5
6
(1/1) tests/acceptance/boot_linux_console.py:BootLinuxConsole.test_arm_raspi2_initrd:
6
[666/5358] Compiling C object libcommon.fa.p/hw_input_tsc2005.c.o
7
console: [ 0.000000] Booting Linux on physical CPU 0xf00
7
hw/input/tsc2005.c: In function 'tsc2005_timer_tick':
8
console: [ 0.000000] Linux version 4.14.98-v7+ (dom@dom-XPS-13-9370) (gcc version 4.9.3 (crosstool-NG crosstool-ng-1.22.0-88-g8460611)) #1200 SMP Tue Feb 12 20:27:48 GMT 2019
8
hw/input/tsc2005.c:416:26: warning: array subscript has type 'char' [-Wchar-subscripts]
9
console: [ 0.000000] CPU: ARMv7 Processor [410fc075] revision 5 (ARMv7), cr=10c5387d
9
416 | s->dav |= mode_regs[s->function];
10
console: [ 0.000000] CPU: div instructions available: patching division code
10
| ~^~~~~~~~~~
11
console: [ 0.000000] CPU: PIPT / VIPT nonaliasing data cache, VIPT aliasing instruction cache
12
console: [ 0.000000] OF: fdt: Machine model: Raspberry Pi 2 Model B
13
...
14
console: Boot successful.
15
console: cat /proc/cpuinfo
16
console: / # cat /proc/cpuinfo
17
...
18
console: processor : 3
19
console: model name : ARMv7 Processor rev 5 (v7l)
20
console: BogoMIPS : 125.00
21
console: Features : half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva idivt vfpd32 lpae evtstrm
22
console: CPU implementer : 0x41
23
console: CPU architecture: 7
24
console: CPU variant : 0x0
25
console: CPU part : 0xc07
26
console: CPU revision : 5
27
console: Hardware : BCM2835
28
console: Revision : 0000
29
console: Serial : 0000000000000000
30
console: cat /proc/iomem
31
console: / # cat /proc/iomem
32
console: 00000000-3bffffff : System RAM
33
console: 00008000-00afffff : Kernel code
34
console: 00c00000-00d468ef : Kernel data
35
console: 3f006000-3f006fff : dwc_otg
36
console: 3f007000-3f007eff : /soc/dma@7e007000
37
console: 3f00b880-3f00b8bf : /soc/mailbox@7e00b880
38
console: 3f100000-3f100027 : /soc/watchdog@7e100000
39
console: 3f101000-3f102fff : /soc/cprman@7e101000
40
console: 3f200000-3f2000b3 : /soc/gpio@7e200000
41
PASS (24.59 s)
42
RESULTS : PASS 1 | ERROR 0 | FAIL 0 | SKIP 0 | WARN 0 | INTERRUPT 0 | CANCEL 0
43
JOB TIME : 25.02 s
44
11
45
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
12
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
46
Reviewed-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
13
Message-id: 20240508143513.44996-1-philmd@linaro.org
47
Message-id: 20210531113837.1689775-1-f4bug@amsat.org
14
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
15
[PMM: fixed missing ')']
48
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
49
---
17
---
50
tests/acceptance/boot_linux_console.py | 43 ++++++++++++++++++++++++++
18
hw/input/tsc2005.c | 5 ++++-
51
1 file changed, 43 insertions(+)
19
1 file changed, 4 insertions(+), 1 deletion(-)
52
20
53
diff --git a/tests/acceptance/boot_linux_console.py b/tests/acceptance/boot_linux_console.py
21
diff --git a/hw/input/tsc2005.c b/hw/input/tsc2005.c
54
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
55
--- a/tests/acceptance/boot_linux_console.py
23
--- a/hw/input/tsc2005.c
56
+++ b/tests/acceptance/boot_linux_console.py
24
+++ b/hw/input/tsc2005.c
57
@@ -XXX,XX +XXX,XX @@
25
@@ -XXX,XX +XXX,XX @@ uint32_t tsc2005_txrx(void *opaque, uint32_t value, int len)
58
from avocado import skip
26
static void tsc2005_timer_tick(void *opaque)
59
from avocado import skipUnless
27
{
60
from avocado_qemu import Test
28
TSC2005State *s = opaque;
61
+from avocado_qemu import exec_command
29
+ unsigned int function = s->function;
62
from avocado_qemu import exec_command_and_wait_for_pattern
63
from avocado_qemu import interrupt_interactive_console_until_pattern
64
from avocado_qemu import wait_for_console_pattern
65
@@ -XXX,XX +XXX,XX @@ def test_arm_raspi2_uart0(self):
66
"""
67
self.do_test_arm_raspi2(0)
68
69
+ def test_arm_raspi2_initrd(self):
70
+ """
71
+ :avocado: tags=arch:arm
72
+ :avocado: tags=machine:raspi2
73
+ """
74
+ deb_url = ('http://archive.raspberrypi.org/debian/'
75
+ 'pool/main/r/raspberrypi-firmware/'
76
+ 'raspberrypi-kernel_1.20190215-1_armhf.deb')
77
+ deb_hash = 'cd284220b32128c5084037553db3c482426f3972'
78
+ deb_path = self.fetch_asset(deb_url, asset_hash=deb_hash)
79
+ kernel_path = self.extract_from_deb(deb_path, '/boot/kernel7.img')
80
+ dtb_path = self.extract_from_deb(deb_path, '/boot/bcm2709-rpi-2-b.dtb')
81
+
30
+
82
+ initrd_url = ('https://github.com/groeck/linux-build-test/raw/'
31
+ assert(function < ARRAY_SIZE(mode_regs));
83
+ '2eb0a73b5d5a28df3170c546ddaaa9757e1e0848/rootfs/'
32
84
+ 'arm/rootfs-armv7a.cpio.gz')
33
/* Timer ticked -- a set of conversions has been finished. */
85
+ initrd_hash = '604b2e45cdf35045846b8bbfbf2129b1891bdc9c'
34
86
+ initrd_path_gz = self.fetch_asset(initrd_url, asset_hash=initrd_hash)
35
@@ -XXX,XX +XXX,XX @@ static void tsc2005_timer_tick(void *opaque)
87
+ initrd_path = os.path.join(self.workdir, 'rootfs.cpio')
36
return;
88
+ archive.gzip_uncompress(initrd_path_gz, initrd_path)
37
89
+
38
s->busy = false;
90
+ self.vm.set_console()
39
- s->dav |= mode_regs[s->function];
91
+ kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
40
+ s->dav |= mode_regs[function];
92
+ 'earlycon=pl011,0x3f201000 console=ttyAMA0 '
41
s->function = -1;
93
+ 'panic=-1 noreboot ' +
42
tsc2005_pin_update(s);
94
+ 'dwc_otg.fiq_fsm_enable=0')
43
}
95
+ self.vm.add_args('-kernel', kernel_path,
96
+ '-dtb', dtb_path,
97
+ '-initrd', initrd_path,
98
+ '-append', kernel_command_line,
99
+ '-no-reboot')
100
+ self.vm.launch()
101
+ self.wait_for_console_pattern('Boot successful.')
102
+
103
+ exec_command_and_wait_for_pattern(self, 'cat /proc/cpuinfo',
104
+ 'BCM2835')
105
+ exec_command_and_wait_for_pattern(self, 'cat /proc/iomem',
106
+ '/soc/cprman@7e101000')
107
+ exec_command(self, 'halt')
108
+ # Wait for VM to shut down gracefully
109
+ self.vm.wait()
110
+
111
def test_arm_exynos4210_initrd(self):
112
"""
113
:avocado: tags=arch:arm
114
--
44
--
115
2.20.1
45
2.34.1
116
46
117
47
diff view generated by jsdifflib
1
In do_ldst(), the calculation of the offset needs to be based on the
1
From: Tanmay Patil <tanmaynpatil105@gmail.com>
2
size of the memory access, not the size of the elements in the
3
vector. This meant we were getting it wrong for the widening and
4
narrowing variants of the various VLDR and VSTR insns.
5
2
3
Some of the source files for older devices use hardcoded tabs
4
instead of our current coding standard's required spaces.
5
Fix these in the following files:
6
    - hw/arm/boot.c
7
    - hw/char/omap_uart.c
8
    - hw/gpio/zaurus.c
9
    - hw/input/tsc2005.c
10
11
This commit is mostly whitespace-only changes; it also
12
adds curly-braces to some 'if' statements.
13
14
This addresses part of https://gitlab.com/qemu-project/qemu/-/issues/373
15
but some other files remain to be handled.
16
17
Signed-off-by: Tanmay Patil <tanmaynpatil105@gmail.com>
18
Message-id: 20240508081502.88375-1-tanmaynpatil105@gmail.com
19
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
20
[PMM: tweaked commit message]
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
21
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210628135835.6690-2-peter.maydell@linaro.org
9
---
22
---
10
target/arm/translate-mve.c | 17 +++++++++--------
23
hw/arm/boot.c | 8 +--
11
1 file changed, 9 insertions(+), 8 deletions(-)
24
hw/char/omap_uart.c | 49 +++++++++--------
25
hw/gpio/zaurus.c | 59 ++++++++++----------
26
hw/input/tsc2005.c | 130 ++++++++++++++++++++++++--------------------
27
4 files changed, 130 insertions(+), 116 deletions(-)
12
28
13
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
29
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
14
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/translate-mve.c
31
--- a/hw/arm/boot.c
16
+++ b/target/arm/translate-mve.c
32
+++ b/hw/arm/boot.c
17
@@ -XXX,XX +XXX,XX @@ static bool mve_skip_first_beat(DisasContext *s)
33
@@ -XXX,XX +XXX,XX @@ static void set_kernel_args_old(const struct arm_boot_info *info,
34
WRITE_WORD(p, info->ram_size / 4096);
35
/* ramdisk_size */
36
WRITE_WORD(p, 0);
37
-#define FLAG_READONLY    1
38
-#define FLAG_RDLOAD    4
39
-#define FLAG_RDPROMPT    8
40
+#define FLAG_READONLY 1
41
+#define FLAG_RDLOAD 4
42
+#define FLAG_RDPROMPT 8
43
/* flags */
44
WRITE_WORD(p, FLAG_READONLY | FLAG_RDLOAD | FLAG_RDPROMPT);
45
/* rootdev */
46
- WRITE_WORD(p, (31 << 8) | 0);    /* /dev/mtdblock0 */
47
+ WRITE_WORD(p, (31 << 8) | 0); /* /dev/mtdblock0 */
48
/* video_num_cols */
49
WRITE_WORD(p, 0);
50
/* video_num_rows */
51
diff --git a/hw/char/omap_uart.c b/hw/char/omap_uart.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/hw/char/omap_uart.c
54
+++ b/hw/char/omap_uart.c
55
@@ -XXX,XX +XXX,XX @@ struct omap_uart_s *omap_uart_init(hwaddr base,
56
s->fclk = fclk;
57
s->irq = irq;
58
s->serial = serial_mm_init(get_system_memory(), base, 2, irq,
59
- omap_clk_getrate(fclk)/16,
60
+ omap_clk_getrate(fclk) / 16,
61
chr ?: qemu_chr_new(label, "null", NULL),
62
DEVICE_NATIVE_ENDIAN);
63
return s;
64
@@ -XXX,XX +XXX,XX @@ static uint64_t omap_uart_read(void *opaque, hwaddr addr, unsigned size)
18
}
65
}
66
67
switch (addr) {
68
- case 0x20:    /* MDR1 */
69
+ case 0x20: /* MDR1 */
70
return s->mdr[0];
71
- case 0x24:    /* MDR2 */
72
+ case 0x24: /* MDR2 */
73
return s->mdr[1];
74
- case 0x40:    /* SCR */
75
+ case 0x40: /* SCR */
76
return s->scr;
77
- case 0x44:    /* SSR */
78
+ case 0x44: /* SSR */
79
return 0x0;
80
- case 0x48:    /* EBLR (OMAP2) */
81
+ case 0x48: /* EBLR (OMAP2) */
82
return s->eblr;
83
- case 0x4C:    /* OSC_12M_SEL (OMAP1) */
84
+ case 0x4C: /* OSC_12M_SEL (OMAP1) */
85
return s->clksel;
86
- case 0x50:    /* MVR */
87
+ case 0x50: /* MVR */
88
return 0x30;
89
- case 0x54:    /* SYSC (OMAP2) */
90
+ case 0x54: /* SYSC (OMAP2) */
91
return s->syscontrol;
92
- case 0x58:    /* SYSS (OMAP2) */
93
+ case 0x58: /* SYSS (OMAP2) */
94
return 1;
95
- case 0x5c:    /* WER (OMAP2) */
96
+ case 0x5c: /* WER (OMAP2) */
97
return s->wkup;
98
- case 0x60:    /* CFPS (OMAP2) */
99
+ case 0x60: /* CFPS (OMAP2) */
100
return s->cfps;
101
}
102
103
@@ -XXX,XX +XXX,XX @@ static void omap_uart_write(void *opaque, hwaddr addr,
104
}
105
106
switch (addr) {
107
- case 0x20:    /* MDR1 */
108
+ case 0x20: /* MDR1 */
109
s->mdr[0] = value & 0x7f;
110
break;
111
- case 0x24:    /* MDR2 */
112
+ case 0x24: /* MDR2 */
113
s->mdr[1] = value & 0xff;
114
break;
115
- case 0x40:    /* SCR */
116
+ case 0x40: /* SCR */
117
s->scr = value & 0xff;
118
break;
119
- case 0x48:    /* EBLR (OMAP2) */
120
+ case 0x48: /* EBLR (OMAP2) */
121
s->eblr = value & 0xff;
122
break;
123
- case 0x4C:    /* OSC_12M_SEL (OMAP1) */
124
+ case 0x4C: /* OSC_12M_SEL (OMAP1) */
125
s->clksel = value & 1;
126
break;
127
- case 0x44:    /* SSR */
128
- case 0x50:    /* MVR */
129
- case 0x58:    /* SYSS (OMAP2) */
130
+ case 0x44: /* SSR */
131
+ case 0x50: /* MVR */
132
+ case 0x58: /* SYSS (OMAP2) */
133
OMAP_RO_REG(addr);
134
break;
135
- case 0x54:    /* SYSC (OMAP2) */
136
+ case 0x54: /* SYSC (OMAP2) */
137
s->syscontrol = value & 0x1d;
138
- if (value & 2)
139
+ if (value & 2) {
140
omap_uart_reset(s);
141
+ }
142
break;
143
- case 0x5c:    /* WER (OMAP2) */
144
+ case 0x5c: /* WER (OMAP2) */
145
s->wkup = value & 0x7f;
146
break;
147
- case 0x60:    /* CFPS (OMAP2) */
148
+ case 0x60: /* CFPS (OMAP2) */
149
s->cfps = value & 0xff;
150
break;
151
default:
152
diff --git a/hw/gpio/zaurus.c b/hw/gpio/zaurus.c
153
index XXXXXXX..XXXXXXX 100644
154
--- a/hw/gpio/zaurus.c
155
+++ b/hw/gpio/zaurus.c
156
@@ -XXX,XX +XXX,XX @@ struct ScoopInfo {
157
uint16_t isr;
158
};
159
160
-#define SCOOP_MCR    0x00
161
-#define SCOOP_CDR    0x04
162
-#define SCOOP_CSR    0x08
163
-#define SCOOP_CPR    0x0c
164
-#define SCOOP_CCR    0x10
165
-#define SCOOP_IRR_IRM    0x14
166
-#define SCOOP_IMR    0x18
167
-#define SCOOP_ISR    0x1c
168
-#define SCOOP_GPCR    0x20
169
-#define SCOOP_GPWR    0x24
170
-#define SCOOP_GPRR    0x28
171
+#define SCOOP_MCR 0x00
172
+#define SCOOP_CDR 0x04
173
+#define SCOOP_CSR 0x08
174
+#define SCOOP_CPR 0x0c
175
+#define SCOOP_CCR 0x10
176
+#define SCOOP_IRR_IRM 0x14
177
+#define SCOOP_IMR 0x18
178
+#define SCOOP_ISR 0x1c
179
+#define SCOOP_GPCR 0x20
180
+#define SCOOP_GPWR 0x24
181
+#define SCOOP_GPRR 0x28
182
183
-static inline void scoop_gpio_handler_update(ScoopInfo *s) {
184
+static inline void scoop_gpio_handler_update(ScoopInfo *s)
185
+{
186
uint32_t level, diff;
187
int bit;
188
level = s->gpio_level & s->gpio_dir;
189
@@ -XXX,XX +XXX,XX @@ static void scoop_write(void *opaque, hwaddr addr,
190
break;
191
case SCOOP_CPR:
192
s->power = value;
193
- if (value & 0x80)
194
+ if (value & 0x80) {
195
s->power |= 0x8040;
196
+ }
197
break;
198
case SCOOP_CCR:
199
s->ccr = value;
200
@@ -XXX,XX +XXX,XX @@ static void scoop_write(void *opaque, hwaddr addr,
201
scoop_gpio_handler_update(s);
202
break;
203
case SCOOP_GPWR:
204
- case SCOOP_GPRR:    /* GPRR is probably R/O in real HW */
205
+ case SCOOP_GPRR: /* GPRR is probably R/O in real HW */
206
s->gpio_level = value & s->gpio_dir;
207
scoop_gpio_handler_update(s);
208
break;
209
@@ -XXX,XX +XXX,XX @@ static void scoop_gpio_set(void *opaque, int line, int level)
210
{
211
ScoopInfo *s = (ScoopInfo *) opaque;
212
213
- if (level)
214
+ if (level) {
215
s->gpio_level |= (1 << line);
216
- else
217
+ } else {
218
s->gpio_level &= ~(1 << line);
219
+ }
19
}
220
}
20
221
21
-static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn)
222
static void scoop_init(Object *obj)
22
+static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn,
223
@@ -XXX,XX +XXX,XX @@ static int scoop_post_load(void *opaque, int version_id)
23
+ unsigned msize)
224
return 0;
225
}
226
227
-static bool is_version_0 (void *opaque, int version_id)
228
+static bool is_version_0(void *opaque, int version_id)
24
{
229
{
25
TCGv_i32 addr;
230
return version_id == 0;
26
uint32_t offset;
231
}
27
@@ -XXX,XX +XXX,XX @@ static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn)
232
@@ -XXX,XX +XXX,XX @@ type_init(scoop_register_types)
28
return true;
233
234
/* Write the bootloader parameters memory area. */
235
236
-#define MAGIC_CHG(a, b, c, d)    ((d << 24) | (c << 16) | (b << 8) | a)
237
+#define MAGIC_CHG(a, b, c, d) ((d << 24) | (c << 16) | (b << 8) | a)
238
239
static struct QEMU_PACKED sl_param_info {
240
uint32_t comadj_keyword;
241
@@ -XXX,XX +XXX,XX @@ static struct QEMU_PACKED sl_param_info {
242
uint32_t phad_keyword;
243
int32_t phadadj;
244
} zaurus_bootparam = {
245
- .comadj_keyword    = MAGIC_CHG('C', 'M', 'A', 'D'),
246
- .comadj        = 125,
247
- .uuid_keyword    = MAGIC_CHG('U', 'U', 'I', 'D'),
248
- .uuid        = { -1 },
249
- .touch_keyword    = MAGIC_CHG('T', 'U', 'C', 'H'),
250
- .touch_xp        = -1,
251
- .adadj_keyword    = MAGIC_CHG('B', 'V', 'A', 'D'),
252
- .adadj        = -1,
253
- .phad_keyword    = MAGIC_CHG('P', 'H', 'A', 'D'),
254
- .phadadj        = 0x01,
255
+ .comadj_keyword = MAGIC_CHG('C', 'M', 'A', 'D'),
256
+ .comadj = 125,
257
+ .uuid_keyword = MAGIC_CHG('U', 'U', 'I', 'D'),
258
+ .uuid = { -1 },
259
+ .touch_keyword = MAGIC_CHG('T', 'U', 'C', 'H'),
260
+ .touch_xp = -1,
261
+ .adadj_keyword = MAGIC_CHG('B', 'V', 'A', 'D'),
262
+ .adadj = -1,
263
+ .phad_keyword = MAGIC_CHG('P', 'H', 'A', 'D'),
264
+ .phadadj = 0x01,
265
};
266
267
void sl_bootparam_write(hwaddr ptr)
268
diff --git a/hw/input/tsc2005.c b/hw/input/tsc2005.c
269
index XXXXXXX..XXXXXXX 100644
270
--- a/hw/input/tsc2005.c
271
+++ b/hw/input/tsc2005.c
272
@@ -XXX,XX +XXX,XX @@
273
#include "migration/vmstate.h"
274
#include "trace.h"
275
276
-#define TSC_CUT_RESOLUTION(value, p)    ((value) >> (16 - (p ? 12 : 10)))
277
+#define TSC_CUT_RESOLUTION(value, p) ((value) >> (16 - (p ? 12 : 10)))
278
279
typedef struct {
280
- qemu_irq pint;    /* Combination of the nPENIRQ and DAV signals */
281
+ qemu_irq pint; /* Combination of the nPENIRQ and DAV signals */
282
QEMUTimer *timer;
283
uint16_t model;
284
285
@@ -XXX,XX +XXX,XX @@ typedef struct {
286
} TSC2005State;
287
288
enum {
289
- TSC_MODE_XYZ_SCAN    = 0x0,
290
+ TSC_MODE_XYZ_SCAN = 0x0,
291
TSC_MODE_XY_SCAN,
292
TSC_MODE_X,
293
TSC_MODE_Y,
294
@@ -XXX,XX +XXX,XX @@ enum {
295
};
296
297
static const uint16_t mode_regs[16] = {
298
- 0xf000,    /* X, Y, Z scan */
299
- 0xc000,    /* X, Y scan */
300
- 0x8000,    /* X */
301
- 0x4000,    /* Y */
302
- 0x3000,    /* Z */
303
- 0x0800,    /* AUX */
304
- 0x0400,    /* TEMP1 */
305
- 0x0200,    /* TEMP2 */
306
- 0x0800,    /* AUX scan */
307
- 0x0040,    /* X test */
308
- 0x0020,    /* Y test */
309
- 0x0080,    /* Short-circuit test */
310
- 0x0000,    /* Reserved */
311
- 0x0000,    /* X+, X- drivers */
312
- 0x0000,    /* Y+, Y- drivers */
313
- 0x0000,    /* Y+, X- drivers */
314
+ 0xf000, /* X, Y, Z scan */
315
+ 0xc000, /* X, Y scan */
316
+ 0x8000, /* X */
317
+ 0x4000, /* Y */
318
+ 0x3000, /* Z */
319
+ 0x0800, /* AUX */
320
+ 0x0400, /* TEMP1 */
321
+ 0x0200, /* TEMP2 */
322
+ 0x0800, /* AUX scan */
323
+ 0x0040, /* X test */
324
+ 0x0020, /* Y test */
325
+ 0x0080, /* Short-circuit test */
326
+ 0x0000, /* Reserved */
327
+ 0x0000, /* X+, X- drivers */
328
+ 0x0000, /* Y+, Y- drivers */
329
+ 0x0000, /* Y+, X- drivers */
330
};
331
332
-#define X_TRANSFORM(s)            \
333
+#define X_TRANSFORM(s) \
334
((s->y * s->tr[0] - s->x * s->tr[1]) / s->tr[2] + s->tr[3])
335
-#define Y_TRANSFORM(s)            \
336
+#define Y_TRANSFORM(s) \
337
((s->y * s->tr[4] - s->x * s->tr[5]) / s->tr[6] + s->tr[7])
338
-#define Z1_TRANSFORM(s)            \
339
+#define Z1_TRANSFORM(s) \
340
((400 - ((s)->x >> 7) + ((s)->pressure << 10)) << 4)
341
-#define Z2_TRANSFORM(s)            \
342
+#define Z2_TRANSFORM(s) \
343
((4000 + ((s)->y >> 7) - ((s)->pressure << 10)) << 4)
344
345
-#define AUX_VAL                (700 << 4)    /* +/- 3 at 12-bit */
346
-#define TEMP1_VAL            (1264 << 4)    /* +/- 5 at 12-bit */
347
-#define TEMP2_VAL            (1531 << 4)    /* +/- 5 at 12-bit */
348
+#define AUX_VAL (700 << 4) /* +/- 3 at 12-bit */
349
+#define TEMP1_VAL (1264 << 4) /* +/- 5 at 12-bit */
350
+#define TEMP2_VAL (1531 << 4) /* +/- 5 at 12-bit */
351
352
static uint16_t tsc2005_read(TSC2005State *s, int reg)
353
{
354
uint16_t ret;
355
356
switch (reg) {
357
- case 0x0:    /* X */
358
+ case 0x0: /* X */
359
s->dav &= ~mode_regs[TSC_MODE_X];
360
return TSC_CUT_RESOLUTION(X_TRANSFORM(s), s->precision) +
361
(s->noise & 3);
362
- case 0x1:    /* Y */
363
+ case 0x1: /* Y */
364
s->dav &= ~mode_regs[TSC_MODE_Y];
365
- s->noise ++;
366
+ s->noise++;
367
return TSC_CUT_RESOLUTION(Y_TRANSFORM(s), s->precision) ^
368
(s->noise & 3);
369
- case 0x2:    /* Z1 */
370
+ case 0x2: /* Z1 */
371
s->dav &= 0xdfff;
372
return TSC_CUT_RESOLUTION(Z1_TRANSFORM(s), s->precision) -
373
(s->noise & 3);
374
- case 0x3:    /* Z2 */
375
+ case 0x3: /* Z2 */
376
s->dav &= 0xefff;
377
return TSC_CUT_RESOLUTION(Z2_TRANSFORM(s), s->precision) |
378
(s->noise & 3);
379
380
- case 0x4:    /* AUX */
381
+ case 0x4: /* AUX */
382
s->dav &= ~mode_regs[TSC_MODE_AUX];
383
return TSC_CUT_RESOLUTION(AUX_VAL, s->precision);
384
385
- case 0x5:    /* TEMP1 */
386
+ case 0x5: /* TEMP1 */
387
s->dav &= ~mode_regs[TSC_MODE_TEMP1];
388
return TSC_CUT_RESOLUTION(TEMP1_VAL, s->precision) -
389
(s->noise & 5);
390
- case 0x6:    /* TEMP2 */
391
+ case 0x6: /* TEMP2 */
392
s->dav &= 0xdfff;
393
s->dav &= ~mode_regs[TSC_MODE_TEMP2];
394
return TSC_CUT_RESOLUTION(TEMP2_VAL, s->precision) ^
395
(s->noise & 3);
396
397
- case 0x7:    /* Status */
398
+ case 0x7: /* Status */
399
ret = s->dav | (s->reset << 7) | (s->pdst << 2) | 0x0;
400
s->dav &= ~(mode_regs[TSC_MODE_X_TEST] | mode_regs[TSC_MODE_Y_TEST] |
401
mode_regs[TSC_MODE_TS_TEST]);
402
s->reset = true;
403
return ret;
404
405
- case 0x8: /* AUX high threshold */
406
+ case 0x8: /* AUX high threshold */
407
return s->aux_thr[1];
408
- case 0x9: /* AUX low threshold */
409
+ case 0x9: /* AUX low threshold */
410
return s->aux_thr[0];
411
412
- case 0xa: /* TEMP high threshold */
413
+ case 0xa: /* TEMP high threshold */
414
return s->temp_thr[1];
415
- case 0xb: /* TEMP low threshold */
416
+ case 0xb: /* TEMP low threshold */
417
return s->temp_thr[0];
418
419
- case 0xc:    /* CFR0 */
420
+ case 0xc: /* CFR0 */
421
return (s->pressure << 15) | ((!s->busy) << 14) |
422
- (s->nextprecision << 13) | s->timing[0];
423
- case 0xd:    /* CFR1 */
424
+ (s->nextprecision << 13) | s->timing[0];
425
+ case 0xd: /* CFR1 */
426
return s->timing[1];
427
- case 0xe:    /* CFR2 */
428
+ case 0xe: /* CFR2 */
429
return (s->pin_func << 14) | s->filter;
430
431
- case 0xf:    /* Function select status */
432
+ case 0xf: /* Function select status */
433
return s->function >= 0 ? 1 << s->function : 0;
29
}
434
}
30
435
31
- offset = a->imm << a->size;
436
@@ -XXX,XX +XXX,XX @@ static void tsc2005_write(TSC2005State *s, int reg, uint16_t data)
32
+ offset = a->imm << msize;
437
s->temp_thr[0] = data;
33
if (!a->a) {
438
break;
34
offset = -offset;
439
440
- case 0xc:    /* CFR0 */
441
+ case 0xc: /* CFR0 */
442
s->host_mode = (data >> 15) != 0;
443
if (s->enabled != !(data & 0x4000)) {
444
s->enabled = !(data & 0x4000);
445
trace_tsc2005_sense(s->enabled ? "enabled" : "disabled");
446
- if (s->busy && !s->enabled)
447
+ if (s->busy && !s->enabled) {
448
timer_del(s->timer);
449
+ }
450
s->busy = s->busy && s->enabled;
451
}
452
s->nextprecision = (data >> 13) & 1;
453
@@ -XXX,XX +XXX,XX @@ static void tsc2005_write(TSC2005State *s, int reg, uint16_t data)
454
"tsc2005_write: illegal conversion clock setting\n");
455
}
456
break;
457
- case 0xd:    /* CFR1 */
458
+ case 0xd: /* CFR1 */
459
s->timing[1] = data & 0xf07;
460
break;
461
- case 0xe:    /* CFR2 */
462
+ case 0xe: /* CFR2 */
463
s->pin_func = (data >> 14) & 3;
464
s->filter = data & 0x3fff;
465
break;
466
@@ -XXX,XX +XXX,XX @@ static void tsc2005_pin_update(TSC2005State *s)
467
switch (s->nextfunction) {
468
case TSC_MODE_XYZ_SCAN:
469
case TSC_MODE_XY_SCAN:
470
- if (!s->host_mode && s->dav)
471
+ if (!s->host_mode && s->dav) {
472
s->enabled = false;
473
- if (!s->pressure)
474
+ }
475
+ if (!s->pressure) {
476
return;
477
+ }
478
/* Fall through */
479
case TSC_MODE_AUX_SCAN:
480
break;
481
@@ -XXX,XX +XXX,XX @@ static void tsc2005_pin_update(TSC2005State *s)
482
case TSC_MODE_X:
483
case TSC_MODE_Y:
484
case TSC_MODE_Z:
485
- if (!s->pressure)
486
+ if (!s->pressure) {
487
return;
488
+ }
489
/* Fall through */
490
case TSC_MODE_AUX:
491
case TSC_MODE_TEMP1:
492
@@ -XXX,XX +XXX,XX @@ static void tsc2005_pin_update(TSC2005State *s)
493
case TSC_MODE_X_TEST:
494
case TSC_MODE_Y_TEST:
495
case TSC_MODE_TS_TEST:
496
- if (s->dav)
497
+ if (s->dav) {
498
s->enabled = false;
499
+ }
500
break;
501
502
case TSC_MODE_RESERVED:
503
@@ -XXX,XX +XXX,XX @@ static void tsc2005_pin_update(TSC2005State *s)
504
return;
35
}
505
}
36
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
506
37
{ gen_helper_mve_vstrw, gen_helper_mve_vldrw },
507
- if (!s->enabled || s->busy)
38
{ NULL, NULL }
508
+ if (!s->enabled || s->busy) {
39
};
509
return;
40
- return do_ldst(s, a, ldstfns[a->size][a->l]);
510
+ }
41
+ return do_ldst(s, a, ldstfns[a->size][a->l], a->size);
511
512
s->busy = true;
513
s->precision = s->nextprecision;
514
s->function = s->nextfunction;
515
- s->pdst = !s->pnd0;    /* Synchronised on internal clock */
516
+ s->pdst = !s->pnd0; /* Synchronised on internal clock */
517
expires = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
518
(NANOSECONDS_PER_SECOND >> 7);
519
timer_mod(s->timer, expires);
520
@@ -XXX,XX +XXX,XX @@ static uint8_t tsc2005_txrx_word(void *opaque, uint8_t value)
521
TSC2005State *s = opaque;
522
uint32_t ret = 0;
523
524
- switch (s->state ++) {
525
+ switch (s->state++) {
526
case 0:
527
if (value & 0x80) {
528
/* Command */
529
@@ -XXX,XX +XXX,XX @@ static uint8_t tsc2005_txrx_word(void *opaque, uint8_t value)
530
if (s->enabled != !(value & 1)) {
531
s->enabled = !(value & 1);
532
trace_tsc2005_sense(s->enabled ? "enabled" : "disabled");
533
- if (s->busy && !s->enabled)
534
+ if (s->busy && !s->enabled) {
535
timer_del(s->timer);
536
+ }
537
s->busy = s->busy && s->enabled;
538
}
539
tsc2005_pin_update(s);
540
@@ -XXX,XX +XXX,XX @@ static uint8_t tsc2005_txrx_word(void *opaque, uint8_t value)
541
break;
542
543
case 1:
544
- if (s->command)
545
+ if (s->command) {
546
ret = (s->data >> 8) & 0xff;
547
- else
548
+ } else {
549
s->data |= value << 8;
550
+ }
551
break;
552
553
case 2:
554
@@ -XXX,XX +XXX,XX @@ static void tsc2005_timer_tick(void *opaque)
555
556
/* Timer ticked -- a set of conversions has been finished. */
557
558
- if (!s->busy)
559
+ if (!s->busy) {
560
return;
561
+ }
562
563
s->busy = false;
564
s->dav |= mode_regs[function];
565
@@ -XXX,XX +XXX,XX @@ static void tsc2005_touchscreen_event(void *opaque,
566
* signaling TS events immediately, but for now we simulate
567
* the first conversion delay for sake of correctness.
568
*/
569
- if (p != s->pressure)
570
+ if (p != s->pressure) {
571
tsc2005_pin_update(s);
572
+ }
42
}
573
}
43
574
44
-#define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST) \
575
static int tsc2005_post_load(void *opaque, int version_id)
45
+#define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \
46
static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \
47
{ \
48
static MVEGenLdStFn * const ldstfns[2][2] = { \
49
{ gen_helper_mve_##ST, gen_helper_mve_##SLD }, \
50
{ NULL, gen_helper_mve_##ULD }, \
51
}; \
52
- return do_ldst(s, a, ldstfns[a->u][a->l]); \
53
+ return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \
54
}
55
56
-DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h)
57
-DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w)
58
-DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w)
59
+DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8)
60
+DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8)
61
+DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16)
62
63
static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
64
{
65
--
576
--
66
2.20.1
577
2.34.1
67
68
diff view generated by jsdifflib
1
From: Patrick Venture <venture@google.com>
1
From: Rayhan Faizel <rayhan.faizel@gmail.com>
2
2
3
Add line item reference to quanta-gbs-bmc machine.
3
None of the RPi boards have ADC on-board. In real life, an external ADC chip
4
is required to operate on analog signals.
4
5
5
Signed-off-by: Patrick Venture <venture@google.com>
6
Signed-off-by: Rayhan Faizel <rayhan.faizel@gmail.com>
6
Reviewed-by: Cédric Le Goater <clg@kaod.org>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Message-id: 20210615192848.1065297-3-venture@google.com
8
Message-id: 20240512085716.222326-1-rayhan.faizel@gmail.com
8
[PMM: fixed underline Sphinx warning]
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
docs/system/arm/nuvoton.rst | 5 +++--
11
docs/system/arm/raspi.rst | 1 -
12
1 file changed, 3 insertions(+), 2 deletions(-)
12
1 file changed, 1 deletion(-)
13
13
14
diff --git a/docs/system/arm/nuvoton.rst b/docs/system/arm/nuvoton.rst
14
diff --git a/docs/system/arm/raspi.rst b/docs/system/arm/raspi.rst
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/docs/system/arm/nuvoton.rst
16
--- a/docs/system/arm/raspi.rst
17
+++ b/docs/system/arm/nuvoton.rst
17
+++ b/docs/system/arm/raspi.rst
18
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ Implemented devices
19
-Nuvoton iBMC boards (``npcm750-evb``, ``quanta-gsj``)
19
Missing devices
20
-=====================================================
20
---------------
21
+Nuvoton iBMC boards (``*-bmc``, ``npcm750-evb``, ``quanta-gsj``)
21
22
+================================================================
22
- * Analog to Digital Converter (ADC)
23
23
* Pulse Width Modulation (PWM)
24
The `Nuvoton iBMC`_ chips (NPCM7xx) are a family of ARM-based SoCs that are
24
* PCIE Root Port (raspi4b)
25
designed to be used as Baseboard Management Controllers (BMCs) in various
25
* GENET Ethernet Controller (raspi4b)
26
@@ -XXX,XX +XXX,XX @@ segment. The following machines are based on this chip :
27
The NPCM730 SoC has two Cortex-A9 cores and is targeted for Data Center and
28
Hyperscale applications. The following machines are based on this chip :
29
30
+- ``quanta-gbs-bmc`` Quanta GBS server BMC
31
- ``quanta-gsj`` Quanta GSJ server BMC
32
33
There are also two more SoCs, NPCM710 and NPCM705, which are single-core
34
--
26
--
35
2.20.1
27
2.34.1
36
28
37
29
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
This fixes a bug in that neither PLI nor PLDW are present in ARMv6T2,
4
but are introduced with ARMv7 and ARMv7MP respectively.
5
For clarity, do not use NOP for PLD.
6
7
Note that there is no PLDW (literal). Architecturally in the
8
T1 encoding of "PLD (literal)" bit 5 is "(0)", which means
9
that it should be zero and if it is not then the behaviour
10
is CONSTRAINED UNPREDICTABLE (might UNDEF, NOP, or ignore the
11
value of the bit).
12
13
In our implementation we have patterns for both:
14
15
+ PLD 1111 1000 -001 1111 1111 ------------ # (literal)
16
+ PLD 1111 1000 -011 1111 1111 ------------ # (literal)
17
18
and so we effectively ignore the value of bit 5. (This is a
19
permitted option for this CONSTRAINED UNPREDICTABLE.) This isn't a
20
behaviour change in this commit, since we previously had NOP lines
21
for both those patterns.
22
23
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
24
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
25
Message-id: 20240524232121.284515-3-richard.henderson@linaro.org
26
[PMM: adjusted commit message to note that PLD (lit) T1 bit 5
27
being 1 is an UNPREDICTABLE case.]
28
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
29
---
30
target/arm/tcg/t32.decode | 25 ++++++++++++-------------
31
target/arm/tcg/translate.c | 4 ++--
32
2 files changed, 14 insertions(+), 15 deletions(-)
33
34
diff --git a/target/arm/tcg/t32.decode b/target/arm/tcg/t32.decode
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/tcg/t32.decode
37
+++ b/target/arm/tcg/t32.decode
38
@@ -XXX,XX +XXX,XX @@ STR_ri 1111 1000 1100 .... .... ............ @ldst_ri_pos
39
# Note that Load, unsigned (literal) overlaps all other load encodings.
40
{
41
{
42
- NOP 1111 1000 -001 1111 1111 ------------ # PLD
43
+ PLD 1111 1000 -001 1111 1111 ------------ # (literal)
44
LDRB_ri 1111 1000 .001 1111 .... ............ @ldst_ri_lit
45
}
46
{
47
- NOP 1111 1000 1001 ---- 1111 ------------ # PLD
48
+ PLD 1111 1000 1001 ---- 1111 ------------ # (immediate T1)
49
LDRB_ri 1111 1000 1001 .... .... ............ @ldst_ri_pos
50
}
51
LDRB_ri 1111 1000 0001 .... .... 1..1 ........ @ldst_ri_idx
52
{
53
- NOP 1111 1000 0001 ---- 1111 1100 -------- # PLD
54
+ PLD 1111 1000 0001 ---- 1111 1100 -------- # (immediate T2)
55
LDRB_ri 1111 1000 0001 .... .... 1100 ........ @ldst_ri_neg
56
}
57
LDRBT_ri 1111 1000 0001 .... .... 1110 ........ @ldst_ri_unp
58
{
59
- NOP 1111 1000 0001 ---- 1111 000000 -- ---- # PLD
60
+ PLD 1111 1000 0001 ---- 1111 000000 -- ---- # (register)
61
LDRB_rr 1111 1000 0001 .... .... 000000 .. .... @ldst_rr
62
}
63
}
64
{
65
{
66
- NOP 1111 1000 -011 1111 1111 ------------ # PLD
67
+ PLD 1111 1000 -011 1111 1111 ------------ # (literal)
68
LDRH_ri 1111 1000 .011 1111 .... ............ @ldst_ri_lit
69
}
70
{
71
- NOP 1111 1000 1011 ---- 1111 ------------ # PLDW
72
+ PLDW 1111 1000 1011 ---- 1111 ------------ # (immediate T1)
73
LDRH_ri 1111 1000 1011 .... .... ............ @ldst_ri_pos
74
}
75
LDRH_ri 1111 1000 0011 .... .... 1..1 ........ @ldst_ri_idx
76
{
77
- NOP 1111 1000 0011 ---- 1111 1100 -------- # PLDW
78
+ PLDW 1111 1000 0011 ---- 1111 1100 -------- # (immediate T2)
79
LDRH_ri 1111 1000 0011 .... .... 1100 ........ @ldst_ri_neg
80
}
81
LDRHT_ri 1111 1000 0011 .... .... 1110 ........ @ldst_ri_unp
82
{
83
- NOP 1111 1000 0011 ---- 1111 000000 -- ---- # PLDW
84
+ PLDW 1111 1000 0011 ---- 1111 000000 -- ---- # (register)
85
LDRH_rr 1111 1000 0011 .... .... 000000 .. .... @ldst_rr
86
}
87
}
88
@@ -XXX,XX +XXX,XX @@ STR_ri 1111 1000 1100 .... .... ............ @ldst_ri_pos
89
LDRT_ri 1111 1000 0101 .... .... 1110 ........ @ldst_ri_unp
90
LDR_rr 1111 1000 0101 .... .... 000000 .. .... @ldst_rr
91
}
92
-# NOPs here are PLI.
93
{
94
{
95
- NOP 1111 1001 -001 1111 1111 ------------
96
+ PLI 1111 1001 -001 1111 1111 ------------ # (literal T3)
97
LDRSB_ri 1111 1001 .001 1111 .... ............ @ldst_ri_lit
98
}
99
{
100
- NOP 1111 1001 1001 ---- 1111 ------------
101
+ PLI 1111 1001 1001 ---- 1111 ------------ # (immediate T1)
102
LDRSB_ri 1111 1001 1001 .... .... ............ @ldst_ri_pos
103
}
104
LDRSB_ri 1111 1001 0001 .... .... 1..1 ........ @ldst_ri_idx
105
{
106
- NOP 1111 1001 0001 ---- 1111 1100 --------
107
+ PLI 1111 1001 0001 ---- 1111 1100 -------- # (immediate T2)
108
LDRSB_ri 1111 1001 0001 .... .... 1100 ........ @ldst_ri_neg
109
}
110
LDRSBT_ri 1111 1001 0001 .... .... 1110 ........ @ldst_ri_unp
111
{
112
- NOP 1111 1001 0001 ---- 1111 000000 -- ----
113
+ PLI 1111 1001 0001 ---- 1111 000000 -- ---- # (register)
114
LDRSB_rr 1111 1001 0001 .... .... 000000 .. .... @ldst_rr
115
}
116
}
117
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
118
index XXXXXXX..XXXXXXX 100644
119
--- a/target/arm/tcg/translate.c
120
+++ b/target/arm/tcg/translate.c
121
@@ -XXX,XX +XXX,XX @@ static bool trans_PLD(DisasContext *s, arg_PLD *a)
122
return ENABLE_ARCH_5TE;
123
}
124
125
-static bool trans_PLDW(DisasContext *s, arg_PLD *a)
126
+static bool trans_PLDW(DisasContext *s, arg_PLDW *a)
127
{
128
return arm_dc_feature(s, ARM_FEATURE_V7MP);
129
}
130
131
-static bool trans_PLI(DisasContext *s, arg_PLD *a)
132
+static bool trans_PLI(DisasContext *s, arg_PLI *a)
133
{
134
return ENABLE_ARCH_7;
135
}
136
--
137
2.34.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Fixes RISU mismatch for "fcvtzs h31, h0, #14".
4
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Message-id: 20240524232121.284515-5-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
10
target/arm/tcg/translate-a64.c | 3 +++
11
1 file changed, 3 insertions(+)
12
13
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/tcg/translate-a64.c
16
+++ b/target/arm/tcg/translate-a64.c
17
@@ -XXX,XX +XXX,XX @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
18
read_vec_element_i32(s, tcg_op, rn, pass, size);
19
fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
20
if (is_scalar) {
21
+ if (size == MO_16 && !is_u) {
22
+ tcg_gen_ext16u_i32(tcg_op, tcg_op);
23
+ }
24
write_fp_sreg(s, rd, tcg_op);
25
} else {
26
write_vec_element_i32(s, tcg_op, rd, pass, size);
27
--
28
2.34.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
The decode of FMOV (vector, immediate, half-precision) vs
4
invalid cases of MOVI are incorrect.
5
6
Fixes RISU mismatch for invalid insn 0x2f01fd31.
7
8
Fixes: 70b4e6a4457 ("arm/translate-a64: add FP16 FMOV to simd_mod_imm")
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Message-id: 20240524232121.284515-6-richard.henderson@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
14
target/arm/tcg/translate-a64.c | 24 ++++++++++++++----------
15
1 file changed, 14 insertions(+), 10 deletions(-)
16
17
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/tcg/translate-a64.c
20
+++ b/target/arm/tcg/translate-a64.c
21
@@ -XXX,XX +XXX,XX @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
22
bool is_q = extract32(insn, 30, 1);
23
uint64_t imm = 0;
24
25
- if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
26
- /* Check for FMOV (vector, immediate) - half-precision */
27
- if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
28
+ if (o2) {
29
+ if (cmode != 0xf || is_neg) {
30
unallocated_encoding(s);
31
return;
32
}
33
- }
34
-
35
- if (!fp_access_check(s)) {
36
- return;
37
- }
38
-
39
- if (cmode == 15 && o2 && !is_neg) {
40
/* FMOV (vector, immediate) - half-precision */
41
+ if (!dc_isar_feature(aa64_fp16, s)) {
42
+ unallocated_encoding(s);
43
+ return;
44
+ }
45
imm = vfp_expand_imm(MO_16, abcdefgh);
46
/* now duplicate across the lanes */
47
imm = dup_const(MO_16, imm);
48
} else {
49
+ if (cmode == 0xf && is_neg && !is_q) {
50
+ unallocated_encoding(s);
51
+ return;
52
+ }
53
imm = asimd_imm_const(abcdefgh, cmode, is_neg);
54
}
55
56
+ if (!fp_access_check(s)) {
57
+ return;
58
+ }
59
+
60
if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
61
/* MOVI or MVNI, with MVNI negation handled above. */
62
tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
63
--
64
2.34.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
All of these insns have "if sz == '1' then UNDEFINED" in their pseudocode.
4
Fixes a RISU miscompare for invalid insn 0x5ef0c87a.
5
6
Fixes: 5c36d89567c ("arm/translate-a64: add all FP16 ops in simd_scalar_pairwise")
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Message-id: 20240524232121.284515-7-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
target/arm/tcg/translate-a64.c | 2 +-
13
1 file changed, 1 insertion(+), 1 deletion(-)
14
15
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/tcg/translate-a64.c
18
+++ b/target/arm/tcg/translate-a64.c
19
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
20
case 0x2f: /* FMINP */
21
/* FP op, size[0] is 32 or 64 bit*/
22
if (!u) {
23
- if (!dc_isar_feature(aa64_fp16, s)) {
24
+ if ((size & 1) || !dc_isar_feature(aa64_fp16, s)) {
25
unallocated_encoding(s);
26
return;
27
} else {
28
--
29
2.34.1
diff view generated by jsdifflib
1
Implement the MVE shifts by immediate, which perform shifts
1
From: Richard Henderson <richard.henderson@linaro.org>
2
on a single general-purpose register.
3
2
4
These patterns overlap with the long-shift-by-immediates,
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
so we have to rearrange the grouping a little here.
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20240524232121.284515-8-richard.henderson@linaro.org
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
---
9
target/arm/tcg/translate.h | 5 +
10
target/arm/tcg/gengvec.c | 1612 ++++++++++++++++++++++++++++++++++++
11
target/arm/tcg/translate.c | 1588 -----------------------------------
12
target/arm/tcg/meson.build | 1 +
13
4 files changed, 1618 insertions(+), 1588 deletions(-)
14
create mode 100644 target/arm/tcg/gengvec.c
6
15
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20210628135835.6690-18-peter.maydell@linaro.org
10
---
11
target/arm/helper-mve.h | 3 ++
12
target/arm/translate.h | 1 +
13
target/arm/t32.decode | 31 ++++++++++++++-----
14
target/arm/mve_helper.c | 10 ++++++
15
target/arm/translate.c | 68 +++++++++++++++++++++++++++++++++++++++--
16
5 files changed, 104 insertions(+), 9 deletions(-)
17
18
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
19
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/helper-mve.h
18
--- a/target/arm/tcg/translate.h
21
+++ b/target/arm/helper-mve.h
19
+++ b/target/arm/tcg/translate.h
22
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(mve_sqrshrl, TCG_CALL_NO_RWG, i64, env, i64, i32)
20
@@ -XXX,XX +XXX,XX @@ void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
23
DEF_HELPER_FLAGS_3(mve_uqrshll, TCG_CALL_NO_RWG, i64, env, i64, i32)
21
void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
24
DEF_HELPER_FLAGS_3(mve_sqrshrl48, TCG_CALL_NO_RWG, i64, env, i64, i32)
22
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
25
DEF_HELPER_FLAGS_3(mve_uqrshll48, TCG_CALL_NO_RWG, i64, env, i64, i32)
23
26
+
24
+void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh);
27
+DEF_HELPER_FLAGS_3(mve_uqshl, TCG_CALL_NO_RWG, i32, env, i32, i32)
25
+void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh);
28
+DEF_HELPER_FLAGS_3(mve_sqshl, TCG_CALL_NO_RWG, i32, env, i32, i32)
26
+void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh);
29
diff --git a/target/arm/translate.h b/target/arm/translate.h
27
+void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh);
30
index XXXXXXX..XXXXXXX 100644
28
+
31
--- a/target/arm/translate.h
29
void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
32
+++ b/target/arm/translate.h
30
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
33
@@ -XXX,XX +XXX,XX @@ typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
31
void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
34
typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
32
diff --git a/target/arm/tcg/gengvec.c b/target/arm/tcg/gengvec.c
35
typedef void WideShiftImmFn(TCGv_i64, TCGv_i64, int64_t shift);
33
new file mode 100644
36
typedef void WideShiftFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i32);
34
index XXXXXXX..XXXXXXX
37
+typedef void ShiftImmFn(TCGv_i32, TCGv_i32, int32_t shift);
35
--- /dev/null
38
36
+++ b/target/arm/tcg/gengvec.c
39
/**
40
* arm_tbflags_from_tb:
41
diff --git a/target/arm/t32.decode b/target/arm/t32.decode
42
index XXXXXXX..XXXXXXX 100644
43
--- a/target/arm/t32.decode
44
+++ b/target/arm/t32.decode
45
@@ -XXX,XX +XXX,XX @@
37
@@ -XXX,XX +XXX,XX @@
46
38
+/*
47
&mve_shl_ri rdalo rdahi shim
39
+ * ARM generic vector expansion
48
&mve_shl_rr rdalo rdahi rm
40
+ *
49
+&mve_sh_ri rda shim
41
+ * Copyright (c) 2003 Fabrice Bellard
50
42
+ * Copyright (c) 2005-2007 CodeSourcery
51
# rdahi: bits [3:1] from insn, bit 0 is 1
43
+ * Copyright (c) 2007 OpenedHand, Ltd.
52
# rdalo: bits [3:1] from insn, bit 0 is 0
44
+ *
53
@@ -XXX,XX +XXX,XX @@
45
+ * This library is free software; you can redistribute it and/or
54
&mve_shl_ri shim=%imm5_12_6 rdalo=%rdalo_17 rdahi=%rdahi_9
46
+ * modify it under the terms of the GNU Lesser General Public
55
@mve_shl_rr ....... .... . ... . rm:4 ... . .. .. .... \
47
+ * License as published by the Free Software Foundation; either
56
&mve_shl_rr rdalo=%rdalo_17 rdahi=%rdahi_9
48
+ * version 2.1 of the License, or (at your option) any later version.
57
+@mve_sh_ri ....... .... . rda:4 . ... ... . .. .. .... \
49
+ *
58
+ &mve_sh_ri shim=%imm5_12_6
50
+ * This library is distributed in the hope that it will be useful,
59
51
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
60
{
52
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
61
TST_xrri 1110101 0000 1 .... 0 ... 1111 .... .... @S_xrr_shi
53
+ * Lesser General Public License for more details.
62
@@ -XXX,XX +XXX,XX @@ BIC_rrri 1110101 0001 . .... 0 ... .... .... .... @s_rrr_shi
54
+ *
63
# the rest fall through (where ORR_rrri and MOV_rxri will end up
55
+ * You should have received a copy of the GNU Lesser General Public
64
# handling them as r13 and r15 accesses with the same semantics as A32).
56
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
65
[
57
+ */
66
- LSLL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 00 1111 @mve_shl_ri
58
+
67
- LSRL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 01 1111 @mve_shl_ri
59
+#include "qemu/osdep.h"
68
- ASRL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 10 1111 @mve_shl_ri
60
+#include "translate.h"
69
+ {
61
+
70
+ UQSHL_ri 1110101 0010 1 .... 0 ... 1111 .. 00 1111 @mve_sh_ri
62
+
71
+ LSLL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 00 1111 @mve_shl_ri
63
+static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
72
+ UQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 00 1111 @mve_shl_ri
64
+ uint32_t opr_sz, uint32_t max_sz,
65
+ gen_helper_gvec_3_ptr *fn)
66
+{
67
+ TCGv_ptr qc_ptr = tcg_temp_new_ptr();
68
+
69
+ tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc));
70
+ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
71
+ opr_sz, max_sz, 0, fn);
72
+}
73
+
74
+void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
75
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
76
+{
77
+ static gen_helper_gvec_3_ptr * const fns[2] = {
78
+ gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
79
+ };
80
+ tcg_debug_assert(vece >= 1 && vece <= 2);
81
+ gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
82
+}
83
+
84
+void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
85
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
86
+{
87
+ static gen_helper_gvec_3_ptr * const fns[2] = {
88
+ gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
89
+ };
90
+ tcg_debug_assert(vece >= 1 && vece <= 2);
91
+ gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
92
+}
93
+
94
+#define GEN_CMP0(NAME, COND) \
95
+ void NAME(unsigned vece, uint32_t d, uint32_t m, \
96
+ uint32_t opr_sz, uint32_t max_sz) \
97
+ { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
98
+
99
+GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
100
+GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
101
+GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
102
+GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
103
+GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
104
+
105
+#undef GEN_CMP0
106
+
107
+static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
108
+{
109
+ tcg_gen_vec_sar8i_i64(a, a, shift);
110
+ tcg_gen_vec_add8_i64(d, d, a);
111
+}
112
+
113
+static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
114
+{
115
+ tcg_gen_vec_sar16i_i64(a, a, shift);
116
+ tcg_gen_vec_add16_i64(d, d, a);
117
+}
118
+
119
+static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
120
+{
121
+ tcg_gen_sari_i32(a, a, shift);
122
+ tcg_gen_add_i32(d, d, a);
123
+}
124
+
125
+static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
126
+{
127
+ tcg_gen_sari_i64(a, a, shift);
128
+ tcg_gen_add_i64(d, d, a);
129
+}
130
+
131
+static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
132
+{
133
+ tcg_gen_sari_vec(vece, a, a, sh);
134
+ tcg_gen_add_vec(vece, d, d, a);
135
+}
136
+
137
+void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
138
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
139
+{
140
+ static const TCGOpcode vecop_list[] = {
141
+ INDEX_op_sari_vec, INDEX_op_add_vec, 0
142
+ };
143
+ static const GVecGen2i ops[4] = {
144
+ { .fni8 = gen_ssra8_i64,
145
+ .fniv = gen_ssra_vec,
146
+ .fno = gen_helper_gvec_ssra_b,
147
+ .load_dest = true,
148
+ .opt_opc = vecop_list,
149
+ .vece = MO_8 },
150
+ { .fni8 = gen_ssra16_i64,
151
+ .fniv = gen_ssra_vec,
152
+ .fno = gen_helper_gvec_ssra_h,
153
+ .load_dest = true,
154
+ .opt_opc = vecop_list,
155
+ .vece = MO_16 },
156
+ { .fni4 = gen_ssra32_i32,
157
+ .fniv = gen_ssra_vec,
158
+ .fno = gen_helper_gvec_ssra_s,
159
+ .load_dest = true,
160
+ .opt_opc = vecop_list,
161
+ .vece = MO_32 },
162
+ { .fni8 = gen_ssra64_i64,
163
+ .fniv = gen_ssra_vec,
164
+ .fno = gen_helper_gvec_ssra_d,
165
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
166
+ .opt_opc = vecop_list,
167
+ .load_dest = true,
168
+ .vece = MO_64 },
169
+ };
170
+
171
+ /* tszimm encoding produces immediates in the range [1..esize]. */
172
+ tcg_debug_assert(shift > 0);
173
+ tcg_debug_assert(shift <= (8 << vece));
174
+
175
+ /*
176
+ * Shifts larger than the element size are architecturally valid.
177
+ * Signed results in all sign bits.
178
+ */
179
+ shift = MIN(shift, (8 << vece) - 1);
180
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
181
+}
182
+
183
+static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
184
+{
185
+ tcg_gen_vec_shr8i_i64(a, a, shift);
186
+ tcg_gen_vec_add8_i64(d, d, a);
187
+}
188
+
189
+static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
190
+{
191
+ tcg_gen_vec_shr16i_i64(a, a, shift);
192
+ tcg_gen_vec_add16_i64(d, d, a);
193
+}
194
+
195
+static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
196
+{
197
+ tcg_gen_shri_i32(a, a, shift);
198
+ tcg_gen_add_i32(d, d, a);
199
+}
200
+
201
+static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
202
+{
203
+ tcg_gen_shri_i64(a, a, shift);
204
+ tcg_gen_add_i64(d, d, a);
205
+}
206
+
207
+static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
208
+{
209
+ tcg_gen_shri_vec(vece, a, a, sh);
210
+ tcg_gen_add_vec(vece, d, d, a);
211
+}
212
+
213
+void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
214
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
215
+{
216
+ static const TCGOpcode vecop_list[] = {
217
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
218
+ };
219
+ static const GVecGen2i ops[4] = {
220
+ { .fni8 = gen_usra8_i64,
221
+ .fniv = gen_usra_vec,
222
+ .fno = gen_helper_gvec_usra_b,
223
+ .load_dest = true,
224
+ .opt_opc = vecop_list,
225
+ .vece = MO_8, },
226
+ { .fni8 = gen_usra16_i64,
227
+ .fniv = gen_usra_vec,
228
+ .fno = gen_helper_gvec_usra_h,
229
+ .load_dest = true,
230
+ .opt_opc = vecop_list,
231
+ .vece = MO_16, },
232
+ { .fni4 = gen_usra32_i32,
233
+ .fniv = gen_usra_vec,
234
+ .fno = gen_helper_gvec_usra_s,
235
+ .load_dest = true,
236
+ .opt_opc = vecop_list,
237
+ .vece = MO_32, },
238
+ { .fni8 = gen_usra64_i64,
239
+ .fniv = gen_usra_vec,
240
+ .fno = gen_helper_gvec_usra_d,
241
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
242
+ .load_dest = true,
243
+ .opt_opc = vecop_list,
244
+ .vece = MO_64, },
245
+ };
246
+
247
+ /* tszimm encoding produces immediates in the range [1..esize]. */
248
+ tcg_debug_assert(shift > 0);
249
+ tcg_debug_assert(shift <= (8 << vece));
250
+
251
+ /*
252
+ * Shifts larger than the element size are architecturally valid.
253
+ * Unsigned results in all zeros as input to accumulate: nop.
254
+ */
255
+ if (shift < (8 << vece)) {
256
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
257
+ } else {
258
+ /* Nop, but we do need to clear the tail. */
259
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
73
+ }
260
+ }
74
261
+}
75
- UQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 00 1111 @mve_shl_ri
262
+
76
- URSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 01 1111 @mve_shl_ri
263
+/*
77
- SRSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 10 1111 @mve_shl_ri
264
+ * Shift one less than the requested amount, and the low bit is
78
- SQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 11 1111 @mve_shl_ri
265
+ * the rounding bit. For the 8 and 16-bit operations, because we
79
+ {
266
+ * mask the low bit, we can perform a normal integer shift instead
80
+ URSHR_ri 1110101 0010 1 .... 0 ... 1111 .. 01 1111 @mve_sh_ri
267
+ * of a vector shift.
81
+ LSRL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 01 1111 @mve_shl_ri
268
+ */
82
+ URSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 01 1111 @mve_shl_ri
269
+static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
83
+ }
270
+{
84
+
271
+ TCGv_i64 t = tcg_temp_new_i64();
85
+ {
272
+
86
+ SRSHR_ri 1110101 0010 1 .... 0 ... 1111 .. 10 1111 @mve_sh_ri
273
+ tcg_gen_shri_i64(t, a, sh - 1);
87
+ ASRL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 10 1111 @mve_shl_ri
274
+ tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
88
+ SRSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 10 1111 @mve_shl_ri
275
+ tcg_gen_vec_sar8i_i64(d, a, sh);
89
+ }
276
+ tcg_gen_vec_add8_i64(d, d, t);
90
+
277
+}
91
+ {
278
+
92
+ SQSHL_ri 1110101 0010 1 .... 0 ... 1111 .. 11 1111 @mve_sh_ri
279
+static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
93
+ SQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 11 1111 @mve_shl_ri
280
+{
94
+ }
281
+ TCGv_i64 t = tcg_temp_new_i64();
95
282
+
96
LSLL_rr 1110101 0010 1 ... 0 .... ... 1 0000 1101 @mve_shl_rr
283
+ tcg_gen_shri_i64(t, a, sh - 1);
97
ASRL_rr 1110101 0010 1 ... 0 .... ... 1 0010 1101 @mve_shl_rr
284
+ tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
98
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
285
+ tcg_gen_vec_sar16i_i64(d, a, sh);
99
index XXXXXXX..XXXXXXX 100644
286
+ tcg_gen_vec_add16_i64(d, d, t);
100
--- a/target/arm/mve_helper.c
287
+}
101
+++ b/target/arm/mve_helper.c
288
+
102
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(mve_uqrshll48)(CPUARMState *env, uint64_t n, uint32_t shift)
289
+void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
103
{
290
+{
104
return do_uqrshl48_d(n, (int8_t)shift, true, &env->QF);
105
}
106
+
107
+uint32_t HELPER(mve_uqshl)(CPUARMState *env, uint32_t n, uint32_t shift)
108
+{
109
+ return do_uqrshl_bhs(n, (int8_t)shift, 32, false, &env->QF);
110
+}
111
+
112
+uint32_t HELPER(mve_sqshl)(CPUARMState *env, uint32_t n, uint32_t shift)
113
+{
114
+ return do_sqrshl_bhs(n, (int8_t)shift, 32, false, &env->QF);
115
+}
116
diff --git a/target/arm/translate.c b/target/arm/translate.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/target/arm/translate.c
119
+++ b/target/arm/translate.c
120
@@ -XXX,XX +XXX,XX @@ static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
121
122
static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
123
{
124
- TCGv_i32 t = tcg_temp_new_i32();
125
+ TCGv_i32 t;
291
+ TCGv_i32 t;
126
292
+
127
+ /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
293
+ /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
128
+ if (sh == 32) {
294
+ if (sh == 32) {
129
+ tcg_gen_movi_i32(d, 0);
295
+ tcg_gen_movi_i32(d, 0);
130
+ return;
296
+ return;
131
+ }
297
+ }
132
+ t = tcg_temp_new_i32();
298
+ t = tcg_temp_new_i32();
133
tcg_gen_extract_i32(t, a, sh - 1, 1);
299
+ tcg_gen_extract_i32(t, a, sh - 1, 1);
134
tcg_gen_sari_i32(d, a, sh);
300
+ tcg_gen_sari_i32(d, a, sh);
135
tcg_gen_add_i32(d, d, t);
301
+ tcg_gen_add_i32(d, d, t);
136
@@ -XXX,XX +XXX,XX @@ static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
302
+}
137
303
+
138
static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
304
+ void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
139
{
305
+{
140
- TCGv_i32 t = tcg_temp_new_i32();
306
+ TCGv_i64 t = tcg_temp_new_i64();
307
+
308
+ tcg_gen_extract_i64(t, a, sh - 1, 1);
309
+ tcg_gen_sari_i64(d, a, sh);
310
+ tcg_gen_add_i64(d, d, t);
311
+}
312
+
313
+static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
314
+{
315
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
316
+ TCGv_vec ones = tcg_temp_new_vec_matching(d);
317
+
318
+ tcg_gen_shri_vec(vece, t, a, sh - 1);
319
+ tcg_gen_dupi_vec(vece, ones, 1);
320
+ tcg_gen_and_vec(vece, t, t, ones);
321
+ tcg_gen_sari_vec(vece, d, a, sh);
322
+ tcg_gen_add_vec(vece, d, d, t);
323
+}
324
+
325
+void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
326
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
327
+{
328
+ static const TCGOpcode vecop_list[] = {
329
+ INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
330
+ };
331
+ static const GVecGen2i ops[4] = {
332
+ { .fni8 = gen_srshr8_i64,
333
+ .fniv = gen_srshr_vec,
334
+ .fno = gen_helper_gvec_srshr_b,
335
+ .opt_opc = vecop_list,
336
+ .vece = MO_8 },
337
+ { .fni8 = gen_srshr16_i64,
338
+ .fniv = gen_srshr_vec,
339
+ .fno = gen_helper_gvec_srshr_h,
340
+ .opt_opc = vecop_list,
341
+ .vece = MO_16 },
342
+ { .fni4 = gen_srshr32_i32,
343
+ .fniv = gen_srshr_vec,
344
+ .fno = gen_helper_gvec_srshr_s,
345
+ .opt_opc = vecop_list,
346
+ .vece = MO_32 },
347
+ { .fni8 = gen_srshr64_i64,
348
+ .fniv = gen_srshr_vec,
349
+ .fno = gen_helper_gvec_srshr_d,
350
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
351
+ .opt_opc = vecop_list,
352
+ .vece = MO_64 },
353
+ };
354
+
355
+ /* tszimm encoding produces immediates in the range [1..esize] */
356
+ tcg_debug_assert(shift > 0);
357
+ tcg_debug_assert(shift <= (8 << vece));
358
+
359
+ if (shift == (8 << vece)) {
360
+ /*
361
+ * Shifts larger than the element size are architecturally valid.
362
+ * Signed results in all sign bits. With rounding, this produces
363
+ * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
364
+ * I.e. always zero.
365
+ */
366
+ tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
367
+ } else {
368
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
369
+ }
370
+}
371
+
372
+static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
373
+{
374
+ TCGv_i64 t = tcg_temp_new_i64();
375
+
376
+ gen_srshr8_i64(t, a, sh);
377
+ tcg_gen_vec_add8_i64(d, d, t);
378
+}
379
+
380
+static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
381
+{
382
+ TCGv_i64 t = tcg_temp_new_i64();
383
+
384
+ gen_srshr16_i64(t, a, sh);
385
+ tcg_gen_vec_add16_i64(d, d, t);
386
+}
387
+
388
+static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
389
+{
390
+ TCGv_i32 t = tcg_temp_new_i32();
391
+
392
+ gen_srshr32_i32(t, a, sh);
393
+ tcg_gen_add_i32(d, d, t);
394
+}
395
+
396
+static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
397
+{
398
+ TCGv_i64 t = tcg_temp_new_i64();
399
+
400
+ gen_srshr64_i64(t, a, sh);
401
+ tcg_gen_add_i64(d, d, t);
402
+}
403
+
404
+static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
405
+{
406
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
407
+
408
+ gen_srshr_vec(vece, t, a, sh);
409
+ tcg_gen_add_vec(vece, d, d, t);
410
+}
411
+
412
+void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
413
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
414
+{
415
+ static const TCGOpcode vecop_list[] = {
416
+ INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
417
+ };
418
+ static const GVecGen2i ops[4] = {
419
+ { .fni8 = gen_srsra8_i64,
420
+ .fniv = gen_srsra_vec,
421
+ .fno = gen_helper_gvec_srsra_b,
422
+ .opt_opc = vecop_list,
423
+ .load_dest = true,
424
+ .vece = MO_8 },
425
+ { .fni8 = gen_srsra16_i64,
426
+ .fniv = gen_srsra_vec,
427
+ .fno = gen_helper_gvec_srsra_h,
428
+ .opt_opc = vecop_list,
429
+ .load_dest = true,
430
+ .vece = MO_16 },
431
+ { .fni4 = gen_srsra32_i32,
432
+ .fniv = gen_srsra_vec,
433
+ .fno = gen_helper_gvec_srsra_s,
434
+ .opt_opc = vecop_list,
435
+ .load_dest = true,
436
+ .vece = MO_32 },
437
+ { .fni8 = gen_srsra64_i64,
438
+ .fniv = gen_srsra_vec,
439
+ .fno = gen_helper_gvec_srsra_d,
440
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
441
+ .opt_opc = vecop_list,
442
+ .load_dest = true,
443
+ .vece = MO_64 },
444
+ };
445
+
446
+ /* tszimm encoding produces immediates in the range [1..esize] */
447
+ tcg_debug_assert(shift > 0);
448
+ tcg_debug_assert(shift <= (8 << vece));
449
+
450
+ /*
451
+ * Shifts larger than the element size are architecturally valid.
452
+ * Signed results in all sign bits. With rounding, this produces
453
+ * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
454
+ * I.e. always zero. With accumulation, this leaves D unchanged.
455
+ */
456
+ if (shift == (8 << vece)) {
457
+ /* Nop, but we do need to clear the tail. */
458
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
459
+ } else {
460
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
461
+ }
462
+}
463
+
464
+static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
465
+{
466
+ TCGv_i64 t = tcg_temp_new_i64();
467
+
468
+ tcg_gen_shri_i64(t, a, sh - 1);
469
+ tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
470
+ tcg_gen_vec_shr8i_i64(d, a, sh);
471
+ tcg_gen_vec_add8_i64(d, d, t);
472
+}
473
+
474
+static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
475
+{
476
+ TCGv_i64 t = tcg_temp_new_i64();
477
+
478
+ tcg_gen_shri_i64(t, a, sh - 1);
479
+ tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
480
+ tcg_gen_vec_shr16i_i64(d, a, sh);
481
+ tcg_gen_vec_add16_i64(d, d, t);
482
+}
483
+
484
+void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
485
+{
141
+ TCGv_i32 t;
486
+ TCGv_i32 t;
142
487
+
143
+ /* Handle shift by the input size for the benefit of trans_URSHR_ri */
488
+ /* Handle shift by the input size for the benefit of trans_URSHR_ri */
144
+ if (sh == 32) {
489
+ if (sh == 32) {
145
+ tcg_gen_extract_i32(d, a, sh - 1, 1);
490
+ tcg_gen_extract_i32(d, a, sh - 1, 1);
146
+ return;
491
+ return;
147
+ }
492
+ }
148
+ t = tcg_temp_new_i32();
493
+ t = tcg_temp_new_i32();
149
tcg_gen_extract_i32(t, a, sh - 1, 1);
494
+ tcg_gen_extract_i32(t, a, sh - 1, 1);
150
tcg_gen_shri_i32(d, a, sh);
495
+ tcg_gen_shri_i32(d, a, sh);
151
tcg_gen_add_i32(d, d, t);
496
+ tcg_gen_add_i32(d, d, t);
152
@@ -XXX,XX +XXX,XX @@ static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
497
+}
153
return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
498
+
499
+void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
500
+{
501
+ TCGv_i64 t = tcg_temp_new_i64();
502
+
503
+ tcg_gen_extract_i64(t, a, sh - 1, 1);
504
+ tcg_gen_shri_i64(d, a, sh);
505
+ tcg_gen_add_i64(d, d, t);
506
+}
507
+
508
+static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
509
+{
510
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
511
+ TCGv_vec ones = tcg_temp_new_vec_matching(d);
512
+
513
+ tcg_gen_shri_vec(vece, t, a, shift - 1);
514
+ tcg_gen_dupi_vec(vece, ones, 1);
515
+ tcg_gen_and_vec(vece, t, t, ones);
516
+ tcg_gen_shri_vec(vece, d, a, shift);
517
+ tcg_gen_add_vec(vece, d, d, t);
518
+}
519
+
520
+void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
521
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
522
+{
523
+ static const TCGOpcode vecop_list[] = {
524
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
525
+ };
526
+ static const GVecGen2i ops[4] = {
527
+ { .fni8 = gen_urshr8_i64,
528
+ .fniv = gen_urshr_vec,
529
+ .fno = gen_helper_gvec_urshr_b,
530
+ .opt_opc = vecop_list,
531
+ .vece = MO_8 },
532
+ { .fni8 = gen_urshr16_i64,
533
+ .fniv = gen_urshr_vec,
534
+ .fno = gen_helper_gvec_urshr_h,
535
+ .opt_opc = vecop_list,
536
+ .vece = MO_16 },
537
+ { .fni4 = gen_urshr32_i32,
538
+ .fniv = gen_urshr_vec,
539
+ .fno = gen_helper_gvec_urshr_s,
540
+ .opt_opc = vecop_list,
541
+ .vece = MO_32 },
542
+ { .fni8 = gen_urshr64_i64,
543
+ .fniv = gen_urshr_vec,
544
+ .fno = gen_helper_gvec_urshr_d,
545
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
546
+ .opt_opc = vecop_list,
547
+ .vece = MO_64 },
548
+ };
549
+
550
+ /* tszimm encoding produces immediates in the range [1..esize] */
551
+ tcg_debug_assert(shift > 0);
552
+ tcg_debug_assert(shift <= (8 << vece));
553
+
554
+ if (shift == (8 << vece)) {
555
+ /*
556
+ * Shifts larger than the element size are architecturally valid.
557
+ * Unsigned results in zero. With rounding, this produces a
558
+ * copy of the most significant bit.
559
+ */
560
+ tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
561
+ } else {
562
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
563
+ }
564
+}
565
+
566
+static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
567
+{
568
+ TCGv_i64 t = tcg_temp_new_i64();
569
+
570
+ if (sh == 8) {
571
+ tcg_gen_vec_shr8i_i64(t, a, 7);
572
+ } else {
573
+ gen_urshr8_i64(t, a, sh);
574
+ }
575
+ tcg_gen_vec_add8_i64(d, d, t);
576
+}
577
+
578
+static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
579
+{
580
+ TCGv_i64 t = tcg_temp_new_i64();
581
+
582
+ if (sh == 16) {
583
+ tcg_gen_vec_shr16i_i64(t, a, 15);
584
+ } else {
585
+ gen_urshr16_i64(t, a, sh);
586
+ }
587
+ tcg_gen_vec_add16_i64(d, d, t);
588
+}
589
+
590
+static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
591
+{
592
+ TCGv_i32 t = tcg_temp_new_i32();
593
+
594
+ if (sh == 32) {
595
+ tcg_gen_shri_i32(t, a, 31);
596
+ } else {
597
+ gen_urshr32_i32(t, a, sh);
598
+ }
599
+ tcg_gen_add_i32(d, d, t);
600
+}
601
+
602
+static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
603
+{
604
+ TCGv_i64 t = tcg_temp_new_i64();
605
+
606
+ if (sh == 64) {
607
+ tcg_gen_shri_i64(t, a, 63);
608
+ } else {
609
+ gen_urshr64_i64(t, a, sh);
610
+ }
611
+ tcg_gen_add_i64(d, d, t);
612
+}
613
+
614
+static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
615
+{
616
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
617
+
618
+ if (sh == (8 << vece)) {
619
+ tcg_gen_shri_vec(vece, t, a, sh - 1);
620
+ } else {
621
+ gen_urshr_vec(vece, t, a, sh);
622
+ }
623
+ tcg_gen_add_vec(vece, d, d, t);
624
+}
625
+
626
+void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
627
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
628
+{
629
+ static const TCGOpcode vecop_list[] = {
630
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
631
+ };
632
+ static const GVecGen2i ops[4] = {
633
+ { .fni8 = gen_ursra8_i64,
634
+ .fniv = gen_ursra_vec,
635
+ .fno = gen_helper_gvec_ursra_b,
636
+ .opt_opc = vecop_list,
637
+ .load_dest = true,
638
+ .vece = MO_8 },
639
+ { .fni8 = gen_ursra16_i64,
640
+ .fniv = gen_ursra_vec,
641
+ .fno = gen_helper_gvec_ursra_h,
642
+ .opt_opc = vecop_list,
643
+ .load_dest = true,
644
+ .vece = MO_16 },
645
+ { .fni4 = gen_ursra32_i32,
646
+ .fniv = gen_ursra_vec,
647
+ .fno = gen_helper_gvec_ursra_s,
648
+ .opt_opc = vecop_list,
649
+ .load_dest = true,
650
+ .vece = MO_32 },
651
+ { .fni8 = gen_ursra64_i64,
652
+ .fniv = gen_ursra_vec,
653
+ .fno = gen_helper_gvec_ursra_d,
654
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
655
+ .opt_opc = vecop_list,
656
+ .load_dest = true,
657
+ .vece = MO_64 },
658
+ };
659
+
660
+ /* tszimm encoding produces immediates in the range [1..esize] */
661
+ tcg_debug_assert(shift > 0);
662
+ tcg_debug_assert(shift <= (8 << vece));
663
+
664
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
665
+}
666
+
667
+static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
668
+{
669
+ uint64_t mask = dup_const(MO_8, 0xff >> shift);
670
+ TCGv_i64 t = tcg_temp_new_i64();
671
+
672
+ tcg_gen_shri_i64(t, a, shift);
673
+ tcg_gen_andi_i64(t, t, mask);
674
+ tcg_gen_andi_i64(d, d, ~mask);
675
+ tcg_gen_or_i64(d, d, t);
676
+}
677
+
678
+static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
679
+{
680
+ uint64_t mask = dup_const(MO_16, 0xffff >> shift);
681
+ TCGv_i64 t = tcg_temp_new_i64();
682
+
683
+ tcg_gen_shri_i64(t, a, shift);
684
+ tcg_gen_andi_i64(t, t, mask);
685
+ tcg_gen_andi_i64(d, d, ~mask);
686
+ tcg_gen_or_i64(d, d, t);
687
+}
688
+
689
+static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
690
+{
691
+ tcg_gen_shri_i32(a, a, shift);
692
+ tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
693
+}
694
+
695
+static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
696
+{
697
+ tcg_gen_shri_i64(a, a, shift);
698
+ tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
699
+}
700
+
701
+static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
702
+{
703
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
704
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
705
+
706
+ tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
707
+ tcg_gen_shri_vec(vece, t, a, sh);
708
+ tcg_gen_and_vec(vece, d, d, m);
709
+ tcg_gen_or_vec(vece, d, d, t);
710
+}
711
+
712
+void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
713
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
714
+{
715
+ static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
716
+ const GVecGen2i ops[4] = {
717
+ { .fni8 = gen_shr8_ins_i64,
718
+ .fniv = gen_shr_ins_vec,
719
+ .fno = gen_helper_gvec_sri_b,
720
+ .load_dest = true,
721
+ .opt_opc = vecop_list,
722
+ .vece = MO_8 },
723
+ { .fni8 = gen_shr16_ins_i64,
724
+ .fniv = gen_shr_ins_vec,
725
+ .fno = gen_helper_gvec_sri_h,
726
+ .load_dest = true,
727
+ .opt_opc = vecop_list,
728
+ .vece = MO_16 },
729
+ { .fni4 = gen_shr32_ins_i32,
730
+ .fniv = gen_shr_ins_vec,
731
+ .fno = gen_helper_gvec_sri_s,
732
+ .load_dest = true,
733
+ .opt_opc = vecop_list,
734
+ .vece = MO_32 },
735
+ { .fni8 = gen_shr64_ins_i64,
736
+ .fniv = gen_shr_ins_vec,
737
+ .fno = gen_helper_gvec_sri_d,
738
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
739
+ .load_dest = true,
740
+ .opt_opc = vecop_list,
741
+ .vece = MO_64 },
742
+ };
743
+
744
+ /* tszimm encoding produces immediates in the range [1..esize]. */
745
+ tcg_debug_assert(shift > 0);
746
+ tcg_debug_assert(shift <= (8 << vece));
747
+
748
+ /* Shift of esize leaves destination unchanged. */
749
+ if (shift < (8 << vece)) {
750
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
751
+ } else {
752
+ /* Nop, but we do need to clear the tail. */
753
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
754
+ }
755
+}
756
+
757
+static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
758
+{
759
+ uint64_t mask = dup_const(MO_8, 0xff << shift);
760
+ TCGv_i64 t = tcg_temp_new_i64();
761
+
762
+ tcg_gen_shli_i64(t, a, shift);
763
+ tcg_gen_andi_i64(t, t, mask);
764
+ tcg_gen_andi_i64(d, d, ~mask);
765
+ tcg_gen_or_i64(d, d, t);
766
+}
767
+
768
+static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
769
+{
770
+ uint64_t mask = dup_const(MO_16, 0xffff << shift);
771
+ TCGv_i64 t = tcg_temp_new_i64();
772
+
773
+ tcg_gen_shli_i64(t, a, shift);
774
+ tcg_gen_andi_i64(t, t, mask);
775
+ tcg_gen_andi_i64(d, d, ~mask);
776
+ tcg_gen_or_i64(d, d, t);
777
+}
778
+
779
+static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
780
+{
781
+ tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
782
+}
783
+
784
+static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
785
+{
786
+ tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
787
+}
788
+
789
+static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
790
+{
791
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
792
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
793
+
794
+ tcg_gen_shli_vec(vece, t, a, sh);
795
+ tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
796
+ tcg_gen_and_vec(vece, d, d, m);
797
+ tcg_gen_or_vec(vece, d, d, t);
798
+}
799
+
800
+void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
801
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
802
+{
803
+ static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
804
+ const GVecGen2i ops[4] = {
805
+ { .fni8 = gen_shl8_ins_i64,
806
+ .fniv = gen_shl_ins_vec,
807
+ .fno = gen_helper_gvec_sli_b,
808
+ .load_dest = true,
809
+ .opt_opc = vecop_list,
810
+ .vece = MO_8 },
811
+ { .fni8 = gen_shl16_ins_i64,
812
+ .fniv = gen_shl_ins_vec,
813
+ .fno = gen_helper_gvec_sli_h,
814
+ .load_dest = true,
815
+ .opt_opc = vecop_list,
816
+ .vece = MO_16 },
817
+ { .fni4 = gen_shl32_ins_i32,
818
+ .fniv = gen_shl_ins_vec,
819
+ .fno = gen_helper_gvec_sli_s,
820
+ .load_dest = true,
821
+ .opt_opc = vecop_list,
822
+ .vece = MO_32 },
823
+ { .fni8 = gen_shl64_ins_i64,
824
+ .fniv = gen_shl_ins_vec,
825
+ .fno = gen_helper_gvec_sli_d,
826
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
827
+ .load_dest = true,
828
+ .opt_opc = vecop_list,
829
+ .vece = MO_64 },
830
+ };
831
+
832
+ /* tszimm encoding produces immediates in the range [0..esize-1]. */
833
+ tcg_debug_assert(shift >= 0);
834
+ tcg_debug_assert(shift < (8 << vece));
835
+
836
+ if (shift == 0) {
837
+ tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
838
+ } else {
839
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
840
+ }
841
+}
842
+
843
+static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
844
+{
845
+ gen_helper_neon_mul_u8(a, a, b);
846
+ gen_helper_neon_add_u8(d, d, a);
847
+}
848
+
849
+static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
850
+{
851
+ gen_helper_neon_mul_u8(a, a, b);
852
+ gen_helper_neon_sub_u8(d, d, a);
853
+}
854
+
855
+static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
856
+{
857
+ gen_helper_neon_mul_u16(a, a, b);
858
+ gen_helper_neon_add_u16(d, d, a);
859
+}
860
+
861
+static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
862
+{
863
+ gen_helper_neon_mul_u16(a, a, b);
864
+ gen_helper_neon_sub_u16(d, d, a);
865
+}
866
+
867
+static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
868
+{
869
+ tcg_gen_mul_i32(a, a, b);
870
+ tcg_gen_add_i32(d, d, a);
871
+}
872
+
873
+static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
874
+{
875
+ tcg_gen_mul_i32(a, a, b);
876
+ tcg_gen_sub_i32(d, d, a);
877
+}
878
+
879
+static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
880
+{
881
+ tcg_gen_mul_i64(a, a, b);
882
+ tcg_gen_add_i64(d, d, a);
883
+}
884
+
885
+static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
886
+{
887
+ tcg_gen_mul_i64(a, a, b);
888
+ tcg_gen_sub_i64(d, d, a);
889
+}
890
+
891
+static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
892
+{
893
+ tcg_gen_mul_vec(vece, a, a, b);
894
+ tcg_gen_add_vec(vece, d, d, a);
895
+}
896
+
897
+static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
898
+{
899
+ tcg_gen_mul_vec(vece, a, a, b);
900
+ tcg_gen_sub_vec(vece, d, d, a);
901
+}
902
+
903
+/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
904
+ * these tables are shared with AArch64 which does support them.
905
+ */
906
+void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
907
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
908
+{
909
+ static const TCGOpcode vecop_list[] = {
910
+ INDEX_op_mul_vec, INDEX_op_add_vec, 0
911
+ };
912
+ static const GVecGen3 ops[4] = {
913
+ { .fni4 = gen_mla8_i32,
914
+ .fniv = gen_mla_vec,
915
+ .load_dest = true,
916
+ .opt_opc = vecop_list,
917
+ .vece = MO_8 },
918
+ { .fni4 = gen_mla16_i32,
919
+ .fniv = gen_mla_vec,
920
+ .load_dest = true,
921
+ .opt_opc = vecop_list,
922
+ .vece = MO_16 },
923
+ { .fni4 = gen_mla32_i32,
924
+ .fniv = gen_mla_vec,
925
+ .load_dest = true,
926
+ .opt_opc = vecop_list,
927
+ .vece = MO_32 },
928
+ { .fni8 = gen_mla64_i64,
929
+ .fniv = gen_mla_vec,
930
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
931
+ .load_dest = true,
932
+ .opt_opc = vecop_list,
933
+ .vece = MO_64 },
934
+ };
935
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
936
+}
937
+
938
+void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
939
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
940
+{
941
+ static const TCGOpcode vecop_list[] = {
942
+ INDEX_op_mul_vec, INDEX_op_sub_vec, 0
943
+ };
944
+ static const GVecGen3 ops[4] = {
945
+ { .fni4 = gen_mls8_i32,
946
+ .fniv = gen_mls_vec,
947
+ .load_dest = true,
948
+ .opt_opc = vecop_list,
949
+ .vece = MO_8 },
950
+ { .fni4 = gen_mls16_i32,
951
+ .fniv = gen_mls_vec,
952
+ .load_dest = true,
953
+ .opt_opc = vecop_list,
954
+ .vece = MO_16 },
955
+ { .fni4 = gen_mls32_i32,
956
+ .fniv = gen_mls_vec,
957
+ .load_dest = true,
958
+ .opt_opc = vecop_list,
959
+ .vece = MO_32 },
960
+ { .fni8 = gen_mls64_i64,
961
+ .fniv = gen_mls_vec,
962
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
963
+ .load_dest = true,
964
+ .opt_opc = vecop_list,
965
+ .vece = MO_64 },
966
+ };
967
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
968
+}
969
+
970
+/* CMTST : test is "if (X & Y != 0)". */
971
+static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
972
+{
973
+ tcg_gen_and_i32(d, a, b);
974
+ tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0));
975
+}
976
+
977
+void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
978
+{
979
+ tcg_gen_and_i64(d, a, b);
980
+ tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0));
981
+}
982
+
983
+static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
984
+{
985
+ tcg_gen_and_vec(vece, d, a, b);
986
+ tcg_gen_dupi_vec(vece, a, 0);
987
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
988
+}
989
+
990
+void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
991
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
992
+{
993
+ static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
994
+ static const GVecGen3 ops[4] = {
995
+ { .fni4 = gen_helper_neon_tst_u8,
996
+ .fniv = gen_cmtst_vec,
997
+ .opt_opc = vecop_list,
998
+ .vece = MO_8 },
999
+ { .fni4 = gen_helper_neon_tst_u16,
1000
+ .fniv = gen_cmtst_vec,
1001
+ .opt_opc = vecop_list,
1002
+ .vece = MO_16 },
1003
+ { .fni4 = gen_cmtst_i32,
1004
+ .fniv = gen_cmtst_vec,
1005
+ .opt_opc = vecop_list,
1006
+ .vece = MO_32 },
1007
+ { .fni8 = gen_cmtst_i64,
1008
+ .fniv = gen_cmtst_vec,
1009
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1010
+ .opt_opc = vecop_list,
1011
+ .vece = MO_64 },
1012
+ };
1013
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1014
+}
1015
+
1016
+void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
1017
+{
1018
+ TCGv_i32 lval = tcg_temp_new_i32();
1019
+ TCGv_i32 rval = tcg_temp_new_i32();
1020
+ TCGv_i32 lsh = tcg_temp_new_i32();
1021
+ TCGv_i32 rsh = tcg_temp_new_i32();
1022
+ TCGv_i32 zero = tcg_constant_i32(0);
1023
+ TCGv_i32 max = tcg_constant_i32(32);
1024
+
1025
+ /*
1026
+ * Rely on the TCG guarantee that out of range shifts produce
1027
+ * unspecified results, not undefined behaviour (i.e. no trap).
1028
+ * Discard out-of-range results after the fact.
1029
+ */
1030
+ tcg_gen_ext8s_i32(lsh, shift);
1031
+ tcg_gen_neg_i32(rsh, lsh);
1032
+ tcg_gen_shl_i32(lval, src, lsh);
1033
+ tcg_gen_shr_i32(rval, src, rsh);
1034
+ tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
1035
+ tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
1036
+}
1037
+
1038
+void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
1039
+{
1040
+ TCGv_i64 lval = tcg_temp_new_i64();
1041
+ TCGv_i64 rval = tcg_temp_new_i64();
1042
+ TCGv_i64 lsh = tcg_temp_new_i64();
1043
+ TCGv_i64 rsh = tcg_temp_new_i64();
1044
+ TCGv_i64 zero = tcg_constant_i64(0);
1045
+ TCGv_i64 max = tcg_constant_i64(64);
1046
+
1047
+ /*
1048
+ * Rely on the TCG guarantee that out of range shifts produce
1049
+ * unspecified results, not undefined behaviour (i.e. no trap).
1050
+ * Discard out-of-range results after the fact.
1051
+ */
1052
+ tcg_gen_ext8s_i64(lsh, shift);
1053
+ tcg_gen_neg_i64(rsh, lsh);
1054
+ tcg_gen_shl_i64(lval, src, lsh);
1055
+ tcg_gen_shr_i64(rval, src, rsh);
1056
+ tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
1057
+ tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
1058
+}
1059
+
1060
+static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
1061
+ TCGv_vec src, TCGv_vec shift)
1062
+{
1063
+ TCGv_vec lval = tcg_temp_new_vec_matching(dst);
1064
+ TCGv_vec rval = tcg_temp_new_vec_matching(dst);
1065
+ TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
1066
+ TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
1067
+ TCGv_vec msk, max;
1068
+
1069
+ tcg_gen_neg_vec(vece, rsh, shift);
1070
+ if (vece == MO_8) {
1071
+ tcg_gen_mov_vec(lsh, shift);
1072
+ } else {
1073
+ msk = tcg_temp_new_vec_matching(dst);
1074
+ tcg_gen_dupi_vec(vece, msk, 0xff);
1075
+ tcg_gen_and_vec(vece, lsh, shift, msk);
1076
+ tcg_gen_and_vec(vece, rsh, rsh, msk);
1077
+ }
1078
+
1079
+ /*
1080
+ * Rely on the TCG guarantee that out of range shifts produce
1081
+ * unspecified results, not undefined behaviour (i.e. no trap).
1082
+ * Discard out-of-range results after the fact.
1083
+ */
1084
+ tcg_gen_shlv_vec(vece, lval, src, lsh);
1085
+ tcg_gen_shrv_vec(vece, rval, src, rsh);
1086
+
1087
+ max = tcg_temp_new_vec_matching(dst);
1088
+ tcg_gen_dupi_vec(vece, max, 8 << vece);
1089
+
1090
+ /*
1091
+ * The choice of LT (signed) and GEU (unsigned) are biased toward
1092
+ * the instructions of the x86_64 host. For MO_8, the whole byte
1093
+ * is significant so we must use an unsigned compare; otherwise we
1094
+ * have already masked to a byte and so a signed compare works.
1095
+ * Other tcg hosts have a full set of comparisons and do not care.
1096
+ */
1097
+ if (vece == MO_8) {
1098
+ tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
1099
+ tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
1100
+ tcg_gen_andc_vec(vece, lval, lval, lsh);
1101
+ tcg_gen_andc_vec(vece, rval, rval, rsh);
1102
+ } else {
1103
+ tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
1104
+ tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
1105
+ tcg_gen_and_vec(vece, lval, lval, lsh);
1106
+ tcg_gen_and_vec(vece, rval, rval, rsh);
1107
+ }
1108
+ tcg_gen_or_vec(vece, dst, lval, rval);
1109
+}
1110
+
1111
+void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1112
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1113
+{
1114
+ static const TCGOpcode vecop_list[] = {
1115
+ INDEX_op_neg_vec, INDEX_op_shlv_vec,
1116
+ INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
1117
+ };
1118
+ static const GVecGen3 ops[4] = {
1119
+ { .fniv = gen_ushl_vec,
1120
+ .fno = gen_helper_gvec_ushl_b,
1121
+ .opt_opc = vecop_list,
1122
+ .vece = MO_8 },
1123
+ { .fniv = gen_ushl_vec,
1124
+ .fno = gen_helper_gvec_ushl_h,
1125
+ .opt_opc = vecop_list,
1126
+ .vece = MO_16 },
1127
+ { .fni4 = gen_ushl_i32,
1128
+ .fniv = gen_ushl_vec,
1129
+ .opt_opc = vecop_list,
1130
+ .vece = MO_32 },
1131
+ { .fni8 = gen_ushl_i64,
1132
+ .fniv = gen_ushl_vec,
1133
+ .opt_opc = vecop_list,
1134
+ .vece = MO_64 },
1135
+ };
1136
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1137
+}
1138
+
1139
+void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
1140
+{
1141
+ TCGv_i32 lval = tcg_temp_new_i32();
1142
+ TCGv_i32 rval = tcg_temp_new_i32();
1143
+ TCGv_i32 lsh = tcg_temp_new_i32();
1144
+ TCGv_i32 rsh = tcg_temp_new_i32();
1145
+ TCGv_i32 zero = tcg_constant_i32(0);
1146
+ TCGv_i32 max = tcg_constant_i32(31);
1147
+
1148
+ /*
1149
+ * Rely on the TCG guarantee that out of range shifts produce
1150
+ * unspecified results, not undefined behaviour (i.e. no trap).
1151
+ * Discard out-of-range results after the fact.
1152
+ */
1153
+ tcg_gen_ext8s_i32(lsh, shift);
1154
+ tcg_gen_neg_i32(rsh, lsh);
1155
+ tcg_gen_shl_i32(lval, src, lsh);
1156
+ tcg_gen_umin_i32(rsh, rsh, max);
1157
+ tcg_gen_sar_i32(rval, src, rsh);
1158
+ tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
1159
+ tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
1160
+}
1161
+
1162
+void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
1163
+{
1164
+ TCGv_i64 lval = tcg_temp_new_i64();
1165
+ TCGv_i64 rval = tcg_temp_new_i64();
1166
+ TCGv_i64 lsh = tcg_temp_new_i64();
1167
+ TCGv_i64 rsh = tcg_temp_new_i64();
1168
+ TCGv_i64 zero = tcg_constant_i64(0);
1169
+ TCGv_i64 max = tcg_constant_i64(63);
1170
+
1171
+ /*
1172
+ * Rely on the TCG guarantee that out of range shifts produce
1173
+ * unspecified results, not undefined behaviour (i.e. no trap).
1174
+ * Discard out-of-range results after the fact.
1175
+ */
1176
+ tcg_gen_ext8s_i64(lsh, shift);
1177
+ tcg_gen_neg_i64(rsh, lsh);
1178
+ tcg_gen_shl_i64(lval, src, lsh);
1179
+ tcg_gen_umin_i64(rsh, rsh, max);
1180
+ tcg_gen_sar_i64(rval, src, rsh);
1181
+ tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
1182
+ tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
1183
+}
1184
+
1185
+static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
1186
+ TCGv_vec src, TCGv_vec shift)
1187
+{
1188
+ TCGv_vec lval = tcg_temp_new_vec_matching(dst);
1189
+ TCGv_vec rval = tcg_temp_new_vec_matching(dst);
1190
+ TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
1191
+ TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
1192
+ TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
1193
+
1194
+ /*
1195
+ * Rely on the TCG guarantee that out of range shifts produce
1196
+ * unspecified results, not undefined behaviour (i.e. no trap).
1197
+ * Discard out-of-range results after the fact.
1198
+ */
1199
+ tcg_gen_neg_vec(vece, rsh, shift);
1200
+ if (vece == MO_8) {
1201
+ tcg_gen_mov_vec(lsh, shift);
1202
+ } else {
1203
+ tcg_gen_dupi_vec(vece, tmp, 0xff);
1204
+ tcg_gen_and_vec(vece, lsh, shift, tmp);
1205
+ tcg_gen_and_vec(vece, rsh, rsh, tmp);
1206
+ }
1207
+
1208
+ /* Bound rsh so out of bound right shift gets -1. */
1209
+ tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
1210
+ tcg_gen_umin_vec(vece, rsh, rsh, tmp);
1211
+ tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
1212
+
1213
+ tcg_gen_shlv_vec(vece, lval, src, lsh);
1214
+ tcg_gen_sarv_vec(vece, rval, src, rsh);
1215
+
1216
+ /* Select in-bound left shift. */
1217
+ tcg_gen_andc_vec(vece, lval, lval, tmp);
1218
+
1219
+ /* Select between left and right shift. */
1220
+ if (vece == MO_8) {
1221
+ tcg_gen_dupi_vec(vece, tmp, 0);
1222
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
1223
+ } else {
1224
+ tcg_gen_dupi_vec(vece, tmp, 0x80);
1225
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
1226
+ }
1227
+}
1228
+
1229
+void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1230
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1231
+{
1232
+ static const TCGOpcode vecop_list[] = {
1233
+ INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
1234
+ INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
1235
+ };
1236
+ static const GVecGen3 ops[4] = {
1237
+ { .fniv = gen_sshl_vec,
1238
+ .fno = gen_helper_gvec_sshl_b,
1239
+ .opt_opc = vecop_list,
1240
+ .vece = MO_8 },
1241
+ { .fniv = gen_sshl_vec,
1242
+ .fno = gen_helper_gvec_sshl_h,
1243
+ .opt_opc = vecop_list,
1244
+ .vece = MO_16 },
1245
+ { .fni4 = gen_sshl_i32,
1246
+ .fniv = gen_sshl_vec,
1247
+ .opt_opc = vecop_list,
1248
+ .vece = MO_32 },
1249
+ { .fni8 = gen_sshl_i64,
1250
+ .fniv = gen_sshl_vec,
1251
+ .opt_opc = vecop_list,
1252
+ .vece = MO_64 },
1253
+ };
1254
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1255
+}
1256
+
1257
+static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
1258
+ TCGv_vec a, TCGv_vec b)
1259
+{
1260
+ TCGv_vec x = tcg_temp_new_vec_matching(t);
1261
+ tcg_gen_add_vec(vece, x, a, b);
1262
+ tcg_gen_usadd_vec(vece, t, a, b);
1263
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
1264
+ tcg_gen_or_vec(vece, sat, sat, x);
1265
+}
1266
+
1267
+void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1268
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1269
+{
1270
+ static const TCGOpcode vecop_list[] = {
1271
+ INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
1272
+ };
1273
+ static const GVecGen4 ops[4] = {
1274
+ { .fniv = gen_uqadd_vec,
1275
+ .fno = gen_helper_gvec_uqadd_b,
1276
+ .write_aofs = true,
1277
+ .opt_opc = vecop_list,
1278
+ .vece = MO_8 },
1279
+ { .fniv = gen_uqadd_vec,
1280
+ .fno = gen_helper_gvec_uqadd_h,
1281
+ .write_aofs = true,
1282
+ .opt_opc = vecop_list,
1283
+ .vece = MO_16 },
1284
+ { .fniv = gen_uqadd_vec,
1285
+ .fno = gen_helper_gvec_uqadd_s,
1286
+ .write_aofs = true,
1287
+ .opt_opc = vecop_list,
1288
+ .vece = MO_32 },
1289
+ { .fniv = gen_uqadd_vec,
1290
+ .fno = gen_helper_gvec_uqadd_d,
1291
+ .write_aofs = true,
1292
+ .opt_opc = vecop_list,
1293
+ .vece = MO_64 },
1294
+ };
1295
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
1296
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1297
+}
1298
+
1299
+static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
1300
+ TCGv_vec a, TCGv_vec b)
1301
+{
1302
+ TCGv_vec x = tcg_temp_new_vec_matching(t);
1303
+ tcg_gen_add_vec(vece, x, a, b);
1304
+ tcg_gen_ssadd_vec(vece, t, a, b);
1305
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
1306
+ tcg_gen_or_vec(vece, sat, sat, x);
1307
+}
1308
+
1309
+void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1310
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1311
+{
1312
+ static const TCGOpcode vecop_list[] = {
1313
+ INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
1314
+ };
1315
+ static const GVecGen4 ops[4] = {
1316
+ { .fniv = gen_sqadd_vec,
1317
+ .fno = gen_helper_gvec_sqadd_b,
1318
+ .opt_opc = vecop_list,
1319
+ .write_aofs = true,
1320
+ .vece = MO_8 },
1321
+ { .fniv = gen_sqadd_vec,
1322
+ .fno = gen_helper_gvec_sqadd_h,
1323
+ .opt_opc = vecop_list,
1324
+ .write_aofs = true,
1325
+ .vece = MO_16 },
1326
+ { .fniv = gen_sqadd_vec,
1327
+ .fno = gen_helper_gvec_sqadd_s,
1328
+ .opt_opc = vecop_list,
1329
+ .write_aofs = true,
1330
+ .vece = MO_32 },
1331
+ { .fniv = gen_sqadd_vec,
1332
+ .fno = gen_helper_gvec_sqadd_d,
1333
+ .opt_opc = vecop_list,
1334
+ .write_aofs = true,
1335
+ .vece = MO_64 },
1336
+ };
1337
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
1338
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1339
+}
1340
+
1341
+static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
1342
+ TCGv_vec a, TCGv_vec b)
1343
+{
1344
+ TCGv_vec x = tcg_temp_new_vec_matching(t);
1345
+ tcg_gen_sub_vec(vece, x, a, b);
1346
+ tcg_gen_ussub_vec(vece, t, a, b);
1347
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
1348
+ tcg_gen_or_vec(vece, sat, sat, x);
1349
+}
1350
+
1351
+void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1352
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1353
+{
1354
+ static const TCGOpcode vecop_list[] = {
1355
+ INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
1356
+ };
1357
+ static const GVecGen4 ops[4] = {
1358
+ { .fniv = gen_uqsub_vec,
1359
+ .fno = gen_helper_gvec_uqsub_b,
1360
+ .opt_opc = vecop_list,
1361
+ .write_aofs = true,
1362
+ .vece = MO_8 },
1363
+ { .fniv = gen_uqsub_vec,
1364
+ .fno = gen_helper_gvec_uqsub_h,
1365
+ .opt_opc = vecop_list,
1366
+ .write_aofs = true,
1367
+ .vece = MO_16 },
1368
+ { .fniv = gen_uqsub_vec,
1369
+ .fno = gen_helper_gvec_uqsub_s,
1370
+ .opt_opc = vecop_list,
1371
+ .write_aofs = true,
1372
+ .vece = MO_32 },
1373
+ { .fniv = gen_uqsub_vec,
1374
+ .fno = gen_helper_gvec_uqsub_d,
1375
+ .opt_opc = vecop_list,
1376
+ .write_aofs = true,
1377
+ .vece = MO_64 },
1378
+ };
1379
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
1380
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1381
+}
1382
+
1383
+static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
1384
+ TCGv_vec a, TCGv_vec b)
1385
+{
1386
+ TCGv_vec x = tcg_temp_new_vec_matching(t);
1387
+ tcg_gen_sub_vec(vece, x, a, b);
1388
+ tcg_gen_sssub_vec(vece, t, a, b);
1389
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
1390
+ tcg_gen_or_vec(vece, sat, sat, x);
1391
+}
1392
+
1393
+void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1394
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1395
+{
1396
+ static const TCGOpcode vecop_list[] = {
1397
+ INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
1398
+ };
1399
+ static const GVecGen4 ops[4] = {
1400
+ { .fniv = gen_sqsub_vec,
1401
+ .fno = gen_helper_gvec_sqsub_b,
1402
+ .opt_opc = vecop_list,
1403
+ .write_aofs = true,
1404
+ .vece = MO_8 },
1405
+ { .fniv = gen_sqsub_vec,
1406
+ .fno = gen_helper_gvec_sqsub_h,
1407
+ .opt_opc = vecop_list,
1408
+ .write_aofs = true,
1409
+ .vece = MO_16 },
1410
+ { .fniv = gen_sqsub_vec,
1411
+ .fno = gen_helper_gvec_sqsub_s,
1412
+ .opt_opc = vecop_list,
1413
+ .write_aofs = true,
1414
+ .vece = MO_32 },
1415
+ { .fniv = gen_sqsub_vec,
1416
+ .fno = gen_helper_gvec_sqsub_d,
1417
+ .opt_opc = vecop_list,
1418
+ .write_aofs = true,
1419
+ .vece = MO_64 },
1420
+ };
1421
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
1422
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1423
+}
1424
+
1425
+static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1426
+{
1427
+ TCGv_i32 t = tcg_temp_new_i32();
1428
+
1429
+ tcg_gen_sub_i32(t, a, b);
1430
+ tcg_gen_sub_i32(d, b, a);
1431
+ tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
1432
+}
1433
+
1434
+static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1435
+{
1436
+ TCGv_i64 t = tcg_temp_new_i64();
1437
+
1438
+ tcg_gen_sub_i64(t, a, b);
1439
+ tcg_gen_sub_i64(d, b, a);
1440
+ tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
1441
+}
1442
+
1443
+static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1444
+{
1445
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
1446
+
1447
+ tcg_gen_smin_vec(vece, t, a, b);
1448
+ tcg_gen_smax_vec(vece, d, a, b);
1449
+ tcg_gen_sub_vec(vece, d, d, t);
1450
+}
1451
+
1452
+void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1453
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1454
+{
1455
+ static const TCGOpcode vecop_list[] = {
1456
+ INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
1457
+ };
1458
+ static const GVecGen3 ops[4] = {
1459
+ { .fniv = gen_sabd_vec,
1460
+ .fno = gen_helper_gvec_sabd_b,
1461
+ .opt_opc = vecop_list,
1462
+ .vece = MO_8 },
1463
+ { .fniv = gen_sabd_vec,
1464
+ .fno = gen_helper_gvec_sabd_h,
1465
+ .opt_opc = vecop_list,
1466
+ .vece = MO_16 },
1467
+ { .fni4 = gen_sabd_i32,
1468
+ .fniv = gen_sabd_vec,
1469
+ .fno = gen_helper_gvec_sabd_s,
1470
+ .opt_opc = vecop_list,
1471
+ .vece = MO_32 },
1472
+ { .fni8 = gen_sabd_i64,
1473
+ .fniv = gen_sabd_vec,
1474
+ .fno = gen_helper_gvec_sabd_d,
1475
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1476
+ .opt_opc = vecop_list,
1477
+ .vece = MO_64 },
1478
+ };
1479
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1480
+}
1481
+
1482
+static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1483
+{
1484
+ TCGv_i32 t = tcg_temp_new_i32();
1485
+
1486
+ tcg_gen_sub_i32(t, a, b);
1487
+ tcg_gen_sub_i32(d, b, a);
1488
+ tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
1489
+}
1490
+
1491
+static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1492
+{
1493
+ TCGv_i64 t = tcg_temp_new_i64();
1494
+
1495
+ tcg_gen_sub_i64(t, a, b);
1496
+ tcg_gen_sub_i64(d, b, a);
1497
+ tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
1498
+}
1499
+
1500
+static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1501
+{
1502
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
1503
+
1504
+ tcg_gen_umin_vec(vece, t, a, b);
1505
+ tcg_gen_umax_vec(vece, d, a, b);
1506
+ tcg_gen_sub_vec(vece, d, d, t);
1507
+}
1508
+
1509
+void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1510
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1511
+{
1512
+ static const TCGOpcode vecop_list[] = {
1513
+ INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
1514
+ };
1515
+ static const GVecGen3 ops[4] = {
1516
+ { .fniv = gen_uabd_vec,
1517
+ .fno = gen_helper_gvec_uabd_b,
1518
+ .opt_opc = vecop_list,
1519
+ .vece = MO_8 },
1520
+ { .fniv = gen_uabd_vec,
1521
+ .fno = gen_helper_gvec_uabd_h,
1522
+ .opt_opc = vecop_list,
1523
+ .vece = MO_16 },
1524
+ { .fni4 = gen_uabd_i32,
1525
+ .fniv = gen_uabd_vec,
1526
+ .fno = gen_helper_gvec_uabd_s,
1527
+ .opt_opc = vecop_list,
1528
+ .vece = MO_32 },
1529
+ { .fni8 = gen_uabd_i64,
1530
+ .fniv = gen_uabd_vec,
1531
+ .fno = gen_helper_gvec_uabd_d,
1532
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1533
+ .opt_opc = vecop_list,
1534
+ .vece = MO_64 },
1535
+ };
1536
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1537
+}
1538
+
1539
+static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1540
+{
1541
+ TCGv_i32 t = tcg_temp_new_i32();
1542
+ gen_sabd_i32(t, a, b);
1543
+ tcg_gen_add_i32(d, d, t);
1544
+}
1545
+
1546
+static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1547
+{
1548
+ TCGv_i64 t = tcg_temp_new_i64();
1549
+ gen_sabd_i64(t, a, b);
1550
+ tcg_gen_add_i64(d, d, t);
1551
+}
1552
+
1553
+static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1554
+{
1555
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
1556
+ gen_sabd_vec(vece, t, a, b);
1557
+ tcg_gen_add_vec(vece, d, d, t);
1558
+}
1559
+
1560
+void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1561
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1562
+{
1563
+ static const TCGOpcode vecop_list[] = {
1564
+ INDEX_op_sub_vec, INDEX_op_add_vec,
1565
+ INDEX_op_smin_vec, INDEX_op_smax_vec, 0
1566
+ };
1567
+ static const GVecGen3 ops[4] = {
1568
+ { .fniv = gen_saba_vec,
1569
+ .fno = gen_helper_gvec_saba_b,
1570
+ .opt_opc = vecop_list,
1571
+ .load_dest = true,
1572
+ .vece = MO_8 },
1573
+ { .fniv = gen_saba_vec,
1574
+ .fno = gen_helper_gvec_saba_h,
1575
+ .opt_opc = vecop_list,
1576
+ .load_dest = true,
1577
+ .vece = MO_16 },
1578
+ { .fni4 = gen_saba_i32,
1579
+ .fniv = gen_saba_vec,
1580
+ .fno = gen_helper_gvec_saba_s,
1581
+ .opt_opc = vecop_list,
1582
+ .load_dest = true,
1583
+ .vece = MO_32 },
1584
+ { .fni8 = gen_saba_i64,
1585
+ .fniv = gen_saba_vec,
1586
+ .fno = gen_helper_gvec_saba_d,
1587
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1588
+ .opt_opc = vecop_list,
1589
+ .load_dest = true,
1590
+ .vece = MO_64 },
1591
+ };
1592
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1593
+}
1594
+
1595
+static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1596
+{
1597
+ TCGv_i32 t = tcg_temp_new_i32();
1598
+ gen_uabd_i32(t, a, b);
1599
+ tcg_gen_add_i32(d, d, t);
1600
+}
1601
+
1602
+static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1603
+{
1604
+ TCGv_i64 t = tcg_temp_new_i64();
1605
+ gen_uabd_i64(t, a, b);
1606
+ tcg_gen_add_i64(d, d, t);
1607
+}
1608
+
1609
+static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1610
+{
1611
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
1612
+ gen_uabd_vec(vece, t, a, b);
1613
+ tcg_gen_add_vec(vece, d, d, t);
1614
+}
1615
+
1616
+void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1617
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1618
+{
1619
+ static const TCGOpcode vecop_list[] = {
1620
+ INDEX_op_sub_vec, INDEX_op_add_vec,
1621
+ INDEX_op_umin_vec, INDEX_op_umax_vec, 0
1622
+ };
1623
+ static const GVecGen3 ops[4] = {
1624
+ { .fniv = gen_uaba_vec,
1625
+ .fno = gen_helper_gvec_uaba_b,
1626
+ .opt_opc = vecop_list,
1627
+ .load_dest = true,
1628
+ .vece = MO_8 },
1629
+ { .fniv = gen_uaba_vec,
1630
+ .fno = gen_helper_gvec_uaba_h,
1631
+ .opt_opc = vecop_list,
1632
+ .load_dest = true,
1633
+ .vece = MO_16 },
1634
+ { .fni4 = gen_uaba_i32,
1635
+ .fniv = gen_uaba_vec,
1636
+ .fno = gen_helper_gvec_uaba_s,
1637
+ .opt_opc = vecop_list,
1638
+ .load_dest = true,
1639
+ .vece = MO_32 },
1640
+ { .fni8 = gen_uaba_i64,
1641
+ .fniv = gen_uaba_vec,
1642
+ .fno = gen_helper_gvec_uaba_d,
1643
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1644
+ .opt_opc = vecop_list,
1645
+ .load_dest = true,
1646
+ .vece = MO_64 },
1647
+ };
1648
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1649
+}
1650
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
1651
index XXXXXXX..XXXXXXX 100644
1652
--- a/target/arm/tcg/translate.c
1653
+++ b/target/arm/tcg/translate.c
1654
@@ -XXX,XX +XXX,XX @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
1655
gen_rfe(s, pc, load_cpu_field(spsr));
154
}
1656
}
155
1657
156
+static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
1658
-static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
157
+{
1659
- uint32_t opr_sz, uint32_t max_sz,
158
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
1660
- gen_helper_gvec_3_ptr *fn)
159
+ /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
1661
-{
160
+ return false;
1662
- TCGv_ptr qc_ptr = tcg_temp_new_ptr();
161
+ }
1663
-
162
+ if (!dc_isar_feature(aa32_mve, s) ||
1664
- tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc));
163
+ !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
1665
- tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
164
+ a->rda == 13 || a->rda == 15) {
1666
- opr_sz, max_sz, 0, fn);
165
+ /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
1667
-}
166
+ unallocated_encoding(s);
1668
-
167
+ return true;
1669
-void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
168
+ }
1670
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
169
+
1671
-{
170
+ if (a->shim == 0) {
1672
- static gen_helper_gvec_3_ptr * const fns[2] = {
171
+ a->shim = 32;
1673
- gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
172
+ }
1674
- };
173
+ fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
1675
- tcg_debug_assert(vece >= 1 && vece <= 2);
174
+
1676
- gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
175
+ return true;
1677
-}
176
+}
1678
-
177
+
1679
-void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
178
+static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
1680
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
179
+{
1681
-{
180
+ return do_mve_sh_ri(s, a, gen_urshr32_i32);
1682
- static gen_helper_gvec_3_ptr * const fns[2] = {
181
+}
1683
- gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
182
+
1684
- };
183
+static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
1685
- tcg_debug_assert(vece >= 1 && vece <= 2);
184
+{
1686
- gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
185
+ return do_mve_sh_ri(s, a, gen_srshr32_i32);
1687
-}
186
+}
1688
-
187
+
1689
-#define GEN_CMP0(NAME, COND) \
188
+static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
1690
- void NAME(unsigned vece, uint32_t d, uint32_t m, \
189
+{
1691
- uint32_t opr_sz, uint32_t max_sz) \
190
+ gen_helper_mve_sqshl(r, cpu_env, n, tcg_constant_i32(shift));
1692
- { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
191
+}
1693
-
192
+
1694
-GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
193
+static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
1695
-GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
194
+{
1696
-GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
195
+ return do_mve_sh_ri(s, a, gen_mve_sqshl);
1697
-GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
196
+}
1698
-GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
197
+
1699
-
198
+static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
1700
-#undef GEN_CMP0
199
+{
1701
-
200
+ gen_helper_mve_uqshl(r, cpu_env, n, tcg_constant_i32(shift));
1702
-static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
201
+}
1703
-{
202
+
1704
- tcg_gen_vec_sar8i_i64(a, a, shift);
203
+static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
1705
- tcg_gen_vec_add8_i64(d, d, a);
204
+{
1706
-}
205
+ return do_mve_sh_ri(s, a, gen_mve_uqshl);
1707
-
206
+}
1708
-static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
207
+
1709
-{
208
/*
1710
- tcg_gen_vec_sar16i_i64(a, a, shift);
209
* Multiply and multiply accumulate
1711
- tcg_gen_vec_add16_i64(d, d, a);
210
*/
1712
-}
1713
-
1714
-static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
1715
-{
1716
- tcg_gen_sari_i32(a, a, shift);
1717
- tcg_gen_add_i32(d, d, a);
1718
-}
1719
-
1720
-static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
1721
-{
1722
- tcg_gen_sari_i64(a, a, shift);
1723
- tcg_gen_add_i64(d, d, a);
1724
-}
1725
-
1726
-static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
1727
-{
1728
- tcg_gen_sari_vec(vece, a, a, sh);
1729
- tcg_gen_add_vec(vece, d, d, a);
1730
-}
1731
-
1732
-void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
1733
- int64_t shift, uint32_t opr_sz, uint32_t max_sz)
1734
-{
1735
- static const TCGOpcode vecop_list[] = {
1736
- INDEX_op_sari_vec, INDEX_op_add_vec, 0
1737
- };
1738
- static const GVecGen2i ops[4] = {
1739
- { .fni8 = gen_ssra8_i64,
1740
- .fniv = gen_ssra_vec,
1741
- .fno = gen_helper_gvec_ssra_b,
1742
- .load_dest = true,
1743
- .opt_opc = vecop_list,
1744
- .vece = MO_8 },
1745
- { .fni8 = gen_ssra16_i64,
1746
- .fniv = gen_ssra_vec,
1747
- .fno = gen_helper_gvec_ssra_h,
1748
- .load_dest = true,
1749
- .opt_opc = vecop_list,
1750
- .vece = MO_16 },
1751
- { .fni4 = gen_ssra32_i32,
1752
- .fniv = gen_ssra_vec,
1753
- .fno = gen_helper_gvec_ssra_s,
1754
- .load_dest = true,
1755
- .opt_opc = vecop_list,
1756
- .vece = MO_32 },
1757
- { .fni8 = gen_ssra64_i64,
1758
- .fniv = gen_ssra_vec,
1759
- .fno = gen_helper_gvec_ssra_d,
1760
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1761
- .opt_opc = vecop_list,
1762
- .load_dest = true,
1763
- .vece = MO_64 },
1764
- };
1765
-
1766
- /* tszimm encoding produces immediates in the range [1..esize]. */
1767
- tcg_debug_assert(shift > 0);
1768
- tcg_debug_assert(shift <= (8 << vece));
1769
-
1770
- /*
1771
- * Shifts larger than the element size are architecturally valid.
1772
- * Signed results in all sign bits.
1773
- */
1774
- shift = MIN(shift, (8 << vece) - 1);
1775
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
1776
-}
1777
-
1778
-static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
1779
-{
1780
- tcg_gen_vec_shr8i_i64(a, a, shift);
1781
- tcg_gen_vec_add8_i64(d, d, a);
1782
-}
1783
-
1784
-static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
1785
-{
1786
- tcg_gen_vec_shr16i_i64(a, a, shift);
1787
- tcg_gen_vec_add16_i64(d, d, a);
1788
-}
1789
-
1790
-static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
1791
-{
1792
- tcg_gen_shri_i32(a, a, shift);
1793
- tcg_gen_add_i32(d, d, a);
1794
-}
1795
-
1796
-static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
1797
-{
1798
- tcg_gen_shri_i64(a, a, shift);
1799
- tcg_gen_add_i64(d, d, a);
1800
-}
1801
-
1802
-static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
1803
-{
1804
- tcg_gen_shri_vec(vece, a, a, sh);
1805
- tcg_gen_add_vec(vece, d, d, a);
1806
-}
1807
-
1808
-void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
1809
- int64_t shift, uint32_t opr_sz, uint32_t max_sz)
1810
-{
1811
- static const TCGOpcode vecop_list[] = {
1812
- INDEX_op_shri_vec, INDEX_op_add_vec, 0
1813
- };
1814
- static const GVecGen2i ops[4] = {
1815
- { .fni8 = gen_usra8_i64,
1816
- .fniv = gen_usra_vec,
1817
- .fno = gen_helper_gvec_usra_b,
1818
- .load_dest = true,
1819
- .opt_opc = vecop_list,
1820
- .vece = MO_8, },
1821
- { .fni8 = gen_usra16_i64,
1822
- .fniv = gen_usra_vec,
1823
- .fno = gen_helper_gvec_usra_h,
1824
- .load_dest = true,
1825
- .opt_opc = vecop_list,
1826
- .vece = MO_16, },
1827
- { .fni4 = gen_usra32_i32,
1828
- .fniv = gen_usra_vec,
1829
- .fno = gen_helper_gvec_usra_s,
1830
- .load_dest = true,
1831
- .opt_opc = vecop_list,
1832
- .vece = MO_32, },
1833
- { .fni8 = gen_usra64_i64,
1834
- .fniv = gen_usra_vec,
1835
- .fno = gen_helper_gvec_usra_d,
1836
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1837
- .load_dest = true,
1838
- .opt_opc = vecop_list,
1839
- .vece = MO_64, },
1840
- };
1841
-
1842
- /* tszimm encoding produces immediates in the range [1..esize]. */
1843
- tcg_debug_assert(shift > 0);
1844
- tcg_debug_assert(shift <= (8 << vece));
1845
-
1846
- /*
1847
- * Shifts larger than the element size are architecturally valid.
1848
- * Unsigned results in all zeros as input to accumulate: nop.
1849
- */
1850
- if (shift < (8 << vece)) {
1851
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
1852
- } else {
1853
- /* Nop, but we do need to clear the tail. */
1854
- tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
1855
- }
1856
-}
1857
-
1858
-/*
1859
- * Shift one less than the requested amount, and the low bit is
1860
- * the rounding bit. For the 8 and 16-bit operations, because we
1861
- * mask the low bit, we can perform a normal integer shift instead
1862
- * of a vector shift.
1863
- */
1864
-static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
1865
-{
1866
- TCGv_i64 t = tcg_temp_new_i64();
1867
-
1868
- tcg_gen_shri_i64(t, a, sh - 1);
1869
- tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
1870
- tcg_gen_vec_sar8i_i64(d, a, sh);
1871
- tcg_gen_vec_add8_i64(d, d, t);
1872
-}
1873
-
1874
-static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
1875
-{
1876
- TCGv_i64 t = tcg_temp_new_i64();
1877
-
1878
- tcg_gen_shri_i64(t, a, sh - 1);
1879
- tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
1880
- tcg_gen_vec_sar16i_i64(d, a, sh);
1881
- tcg_gen_vec_add16_i64(d, d, t);
1882
-}
1883
-
1884
-static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
1885
-{
1886
- TCGv_i32 t;
1887
-
1888
- /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
1889
- if (sh == 32) {
1890
- tcg_gen_movi_i32(d, 0);
1891
- return;
1892
- }
1893
- t = tcg_temp_new_i32();
1894
- tcg_gen_extract_i32(t, a, sh - 1, 1);
1895
- tcg_gen_sari_i32(d, a, sh);
1896
- tcg_gen_add_i32(d, d, t);
1897
-}
1898
-
1899
-static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
1900
-{
1901
- TCGv_i64 t = tcg_temp_new_i64();
1902
-
1903
- tcg_gen_extract_i64(t, a, sh - 1, 1);
1904
- tcg_gen_sari_i64(d, a, sh);
1905
- tcg_gen_add_i64(d, d, t);
1906
-}
1907
-
1908
-static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
1909
-{
1910
- TCGv_vec t = tcg_temp_new_vec_matching(d);
1911
- TCGv_vec ones = tcg_temp_new_vec_matching(d);
1912
-
1913
- tcg_gen_shri_vec(vece, t, a, sh - 1);
1914
- tcg_gen_dupi_vec(vece, ones, 1);
1915
- tcg_gen_and_vec(vece, t, t, ones);
1916
- tcg_gen_sari_vec(vece, d, a, sh);
1917
- tcg_gen_add_vec(vece, d, d, t);
1918
-}
1919
-
1920
-void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
1921
- int64_t shift, uint32_t opr_sz, uint32_t max_sz)
1922
-{
1923
- static const TCGOpcode vecop_list[] = {
1924
- INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
1925
- };
1926
- static const GVecGen2i ops[4] = {
1927
- { .fni8 = gen_srshr8_i64,
1928
- .fniv = gen_srshr_vec,
1929
- .fno = gen_helper_gvec_srshr_b,
1930
- .opt_opc = vecop_list,
1931
- .vece = MO_8 },
1932
- { .fni8 = gen_srshr16_i64,
1933
- .fniv = gen_srshr_vec,
1934
- .fno = gen_helper_gvec_srshr_h,
1935
- .opt_opc = vecop_list,
1936
- .vece = MO_16 },
1937
- { .fni4 = gen_srshr32_i32,
1938
- .fniv = gen_srshr_vec,
1939
- .fno = gen_helper_gvec_srshr_s,
1940
- .opt_opc = vecop_list,
1941
- .vece = MO_32 },
1942
- { .fni8 = gen_srshr64_i64,
1943
- .fniv = gen_srshr_vec,
1944
- .fno = gen_helper_gvec_srshr_d,
1945
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1946
- .opt_opc = vecop_list,
1947
- .vece = MO_64 },
1948
- };
1949
-
1950
- /* tszimm encoding produces immediates in the range [1..esize] */
1951
- tcg_debug_assert(shift > 0);
1952
- tcg_debug_assert(shift <= (8 << vece));
1953
-
1954
- if (shift == (8 << vece)) {
1955
- /*
1956
- * Shifts larger than the element size are architecturally valid.
1957
- * Signed results in all sign bits. With rounding, this produces
1958
- * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
1959
- * I.e. always zero.
1960
- */
1961
- tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
1962
- } else {
1963
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
1964
- }
1965
-}
1966
-
1967
-static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
1968
-{
1969
- TCGv_i64 t = tcg_temp_new_i64();
1970
-
1971
- gen_srshr8_i64(t, a, sh);
1972
- tcg_gen_vec_add8_i64(d, d, t);
1973
-}
1974
-
1975
-static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
1976
-{
1977
- TCGv_i64 t = tcg_temp_new_i64();
1978
-
1979
- gen_srshr16_i64(t, a, sh);
1980
- tcg_gen_vec_add16_i64(d, d, t);
1981
-}
1982
-
1983
-static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
1984
-{
1985
- TCGv_i32 t = tcg_temp_new_i32();
1986
-
1987
- gen_srshr32_i32(t, a, sh);
1988
- tcg_gen_add_i32(d, d, t);
1989
-}
1990
-
1991
-static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
1992
-{
1993
- TCGv_i64 t = tcg_temp_new_i64();
1994
-
1995
- gen_srshr64_i64(t, a, sh);
1996
- tcg_gen_add_i64(d, d, t);
1997
-}
1998
-
1999
-static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
2000
-{
2001
- TCGv_vec t = tcg_temp_new_vec_matching(d);
2002
-
2003
- gen_srshr_vec(vece, t, a, sh);
2004
- tcg_gen_add_vec(vece, d, d, t);
2005
-}
2006
-
2007
-void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
2008
- int64_t shift, uint32_t opr_sz, uint32_t max_sz)
2009
-{
2010
- static const TCGOpcode vecop_list[] = {
2011
- INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
2012
- };
2013
- static const GVecGen2i ops[4] = {
2014
- { .fni8 = gen_srsra8_i64,
2015
- .fniv = gen_srsra_vec,
2016
- .fno = gen_helper_gvec_srsra_b,
2017
- .opt_opc = vecop_list,
2018
- .load_dest = true,
2019
- .vece = MO_8 },
2020
- { .fni8 = gen_srsra16_i64,
2021
- .fniv = gen_srsra_vec,
2022
- .fno = gen_helper_gvec_srsra_h,
2023
- .opt_opc = vecop_list,
2024
- .load_dest = true,
2025
- .vece = MO_16 },
2026
- { .fni4 = gen_srsra32_i32,
2027
- .fniv = gen_srsra_vec,
2028
- .fno = gen_helper_gvec_srsra_s,
2029
- .opt_opc = vecop_list,
2030
- .load_dest = true,
2031
- .vece = MO_32 },
2032
- { .fni8 = gen_srsra64_i64,
2033
- .fniv = gen_srsra_vec,
2034
- .fno = gen_helper_gvec_srsra_d,
2035
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
2036
- .opt_opc = vecop_list,
2037
- .load_dest = true,
2038
- .vece = MO_64 },
2039
- };
2040
-
2041
- /* tszimm encoding produces immediates in the range [1..esize] */
2042
- tcg_debug_assert(shift > 0);
2043
- tcg_debug_assert(shift <= (8 << vece));
2044
-
2045
- /*
2046
- * Shifts larger than the element size are architecturally valid.
2047
- * Signed results in all sign bits. With rounding, this produces
2048
- * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
2049
- * I.e. always zero. With accumulation, this leaves D unchanged.
2050
- */
2051
- if (shift == (8 << vece)) {
2052
- /* Nop, but we do need to clear the tail. */
2053
- tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
2054
- } else {
2055
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
2056
- }
2057
-}
2058
-
2059
-static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
2060
-{
2061
- TCGv_i64 t = tcg_temp_new_i64();
2062
-
2063
- tcg_gen_shri_i64(t, a, sh - 1);
2064
- tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
2065
- tcg_gen_vec_shr8i_i64(d, a, sh);
2066
- tcg_gen_vec_add8_i64(d, d, t);
2067
-}
2068
-
2069
-static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
2070
-{
2071
- TCGv_i64 t = tcg_temp_new_i64();
2072
-
2073
- tcg_gen_shri_i64(t, a, sh - 1);
2074
- tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
2075
- tcg_gen_vec_shr16i_i64(d, a, sh);
2076
- tcg_gen_vec_add16_i64(d, d, t);
2077
-}
2078
-
2079
-static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
2080
-{
2081
- TCGv_i32 t;
2082
-
2083
- /* Handle shift by the input size for the benefit of trans_URSHR_ri */
2084
- if (sh == 32) {
2085
- tcg_gen_extract_i32(d, a, sh - 1, 1);
2086
- return;
2087
- }
2088
- t = tcg_temp_new_i32();
2089
- tcg_gen_extract_i32(t, a, sh - 1, 1);
2090
- tcg_gen_shri_i32(d, a, sh);
2091
- tcg_gen_add_i32(d, d, t);
2092
-}
2093
-
2094
-static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
2095
-{
2096
- TCGv_i64 t = tcg_temp_new_i64();
2097
-
2098
- tcg_gen_extract_i64(t, a, sh - 1, 1);
2099
- tcg_gen_shri_i64(d, a, sh);
2100
- tcg_gen_add_i64(d, d, t);
2101
-}
2102
-
2103
-static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
2104
-{
2105
- TCGv_vec t = tcg_temp_new_vec_matching(d);
2106
- TCGv_vec ones = tcg_temp_new_vec_matching(d);
2107
-
2108
- tcg_gen_shri_vec(vece, t, a, shift - 1);
2109
- tcg_gen_dupi_vec(vece, ones, 1);
2110
- tcg_gen_and_vec(vece, t, t, ones);
2111
- tcg_gen_shri_vec(vece, d, a, shift);
2112
- tcg_gen_add_vec(vece, d, d, t);
2113
-}
2114
-
2115
-void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
2116
- int64_t shift, uint32_t opr_sz, uint32_t max_sz)
2117
-{
2118
- static const TCGOpcode vecop_list[] = {
2119
- INDEX_op_shri_vec, INDEX_op_add_vec, 0
2120
- };
2121
- static const GVecGen2i ops[4] = {
2122
- { .fni8 = gen_urshr8_i64,
2123
- .fniv = gen_urshr_vec,
2124
- .fno = gen_helper_gvec_urshr_b,
2125
- .opt_opc = vecop_list,
2126
- .vece = MO_8 },
2127
- { .fni8 = gen_urshr16_i64,
2128
- .fniv = gen_urshr_vec,
2129
- .fno = gen_helper_gvec_urshr_h,
2130
- .opt_opc = vecop_list,
2131
- .vece = MO_16 },
2132
- { .fni4 = gen_urshr32_i32,
2133
- .fniv = gen_urshr_vec,
2134
- .fno = gen_helper_gvec_urshr_s,
2135
- .opt_opc = vecop_list,
2136
- .vece = MO_32 },
2137
- { .fni8 = gen_urshr64_i64,
2138
- .fniv = gen_urshr_vec,
2139
- .fno = gen_helper_gvec_urshr_d,
2140
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
2141
- .opt_opc = vecop_list,
2142
- .vece = MO_64 },
2143
- };
2144
-
2145
- /* tszimm encoding produces immediates in the range [1..esize] */
2146
- tcg_debug_assert(shift > 0);
2147
- tcg_debug_assert(shift <= (8 << vece));
2148
-
2149
- if (shift == (8 << vece)) {
2150
- /*
2151
- * Shifts larger than the element size are architecturally valid.
2152
- * Unsigned results in zero. With rounding, this produces a
2153
- * copy of the most significant bit.
2154
- */
2155
- tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
2156
- } else {
2157
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
2158
- }
2159
-}
2160
-
2161
-static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
2162
-{
2163
- TCGv_i64 t = tcg_temp_new_i64();
2164
-
2165
- if (sh == 8) {
2166
- tcg_gen_vec_shr8i_i64(t, a, 7);
2167
- } else {
2168
- gen_urshr8_i64(t, a, sh);
2169
- }
2170
- tcg_gen_vec_add8_i64(d, d, t);
2171
-}
2172
-
2173
-static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
2174
-{
2175
- TCGv_i64 t = tcg_temp_new_i64();
2176
-
2177
- if (sh == 16) {
2178
- tcg_gen_vec_shr16i_i64(t, a, 15);
2179
- } else {
2180
- gen_urshr16_i64(t, a, sh);
2181
- }
2182
- tcg_gen_vec_add16_i64(d, d, t);
2183
-}
2184
-
2185
-static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
2186
-{
2187
- TCGv_i32 t = tcg_temp_new_i32();
2188
-
2189
- if (sh == 32) {
2190
- tcg_gen_shri_i32(t, a, 31);
2191
- } else {
2192
- gen_urshr32_i32(t, a, sh);
2193
- }
2194
- tcg_gen_add_i32(d, d, t);
2195
-}
2196
-
2197
-static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
2198
-{
2199
- TCGv_i64 t = tcg_temp_new_i64();
2200
-
2201
- if (sh == 64) {
2202
- tcg_gen_shri_i64(t, a, 63);
2203
- } else {
2204
- gen_urshr64_i64(t, a, sh);
2205
- }
2206
- tcg_gen_add_i64(d, d, t);
2207
-}
2208
-
2209
-static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
2210
-{
2211
- TCGv_vec t = tcg_temp_new_vec_matching(d);
2212
-
2213
- if (sh == (8 << vece)) {
2214
- tcg_gen_shri_vec(vece, t, a, sh - 1);
2215
- } else {
2216
- gen_urshr_vec(vece, t, a, sh);
2217
- }
2218
- tcg_gen_add_vec(vece, d, d, t);
2219
-}
2220
-
2221
-void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
2222
- int64_t shift, uint32_t opr_sz, uint32_t max_sz)
2223
-{
2224
- static const TCGOpcode vecop_list[] = {
2225
- INDEX_op_shri_vec, INDEX_op_add_vec, 0
2226
- };
2227
- static const GVecGen2i ops[4] = {
2228
- { .fni8 = gen_ursra8_i64,
2229
- .fniv = gen_ursra_vec,
2230
- .fno = gen_helper_gvec_ursra_b,
2231
- .opt_opc = vecop_list,
2232
- .load_dest = true,
2233
- .vece = MO_8 },
2234
- { .fni8 = gen_ursra16_i64,
2235
- .fniv = gen_ursra_vec,
2236
- .fno = gen_helper_gvec_ursra_h,
2237
- .opt_opc = vecop_list,
2238
- .load_dest = true,
2239
- .vece = MO_16 },
2240
- { .fni4 = gen_ursra32_i32,
2241
- .fniv = gen_ursra_vec,
2242
- .fno = gen_helper_gvec_ursra_s,
2243
- .opt_opc = vecop_list,
2244
- .load_dest = true,
2245
- .vece = MO_32 },
2246
- { .fni8 = gen_ursra64_i64,
2247
- .fniv = gen_ursra_vec,
2248
- .fno = gen_helper_gvec_ursra_d,
2249
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
2250
- .opt_opc = vecop_list,
2251
- .load_dest = true,
2252
- .vece = MO_64 },
2253
- };
2254
-
2255
- /* tszimm encoding produces immediates in the range [1..esize] */
2256
- tcg_debug_assert(shift > 0);
2257
- tcg_debug_assert(shift <= (8 << vece));
2258
-
2259
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
2260
-}
2261
-
2262
-static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2263
-{
2264
- uint64_t mask = dup_const(MO_8, 0xff >> shift);
2265
- TCGv_i64 t = tcg_temp_new_i64();
2266
-
2267
- tcg_gen_shri_i64(t, a, shift);
2268
- tcg_gen_andi_i64(t, t, mask);
2269
- tcg_gen_andi_i64(d, d, ~mask);
2270
- tcg_gen_or_i64(d, d, t);
2271
-}
2272
-
2273
-static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2274
-{
2275
- uint64_t mask = dup_const(MO_16, 0xffff >> shift);
2276
- TCGv_i64 t = tcg_temp_new_i64();
2277
-
2278
- tcg_gen_shri_i64(t, a, shift);
2279
- tcg_gen_andi_i64(t, t, mask);
2280
- tcg_gen_andi_i64(d, d, ~mask);
2281
- tcg_gen_or_i64(d, d, t);
2282
-}
2283
-
2284
-static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
2285
-{
2286
- tcg_gen_shri_i32(a, a, shift);
2287
- tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
2288
-}
2289
-
2290
-static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2291
-{
2292
- tcg_gen_shri_i64(a, a, shift);
2293
- tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
2294
-}
2295
-
2296
-static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
2297
-{
2298
- TCGv_vec t = tcg_temp_new_vec_matching(d);
2299
- TCGv_vec m = tcg_temp_new_vec_matching(d);
2300
-
2301
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
2302
- tcg_gen_shri_vec(vece, t, a, sh);
2303
- tcg_gen_and_vec(vece, d, d, m);
2304
- tcg_gen_or_vec(vece, d, d, t);
2305
-}
2306
-
2307
-void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
2308
- int64_t shift, uint32_t opr_sz, uint32_t max_sz)
2309
-{
2310
- static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
2311
- const GVecGen2i ops[4] = {
2312
- { .fni8 = gen_shr8_ins_i64,
2313
- .fniv = gen_shr_ins_vec,
2314
- .fno = gen_helper_gvec_sri_b,
2315
- .load_dest = true,
2316
- .opt_opc = vecop_list,
2317
- .vece = MO_8 },
2318
- { .fni8 = gen_shr16_ins_i64,
2319
- .fniv = gen_shr_ins_vec,
2320
- .fno = gen_helper_gvec_sri_h,
2321
- .load_dest = true,
2322
- .opt_opc = vecop_list,
2323
- .vece = MO_16 },
2324
- { .fni4 = gen_shr32_ins_i32,
2325
- .fniv = gen_shr_ins_vec,
2326
- .fno = gen_helper_gvec_sri_s,
2327
- .load_dest = true,
2328
- .opt_opc = vecop_list,
2329
- .vece = MO_32 },
2330
- { .fni8 = gen_shr64_ins_i64,
2331
- .fniv = gen_shr_ins_vec,
2332
- .fno = gen_helper_gvec_sri_d,
2333
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
2334
- .load_dest = true,
2335
- .opt_opc = vecop_list,
2336
- .vece = MO_64 },
2337
- };
2338
-
2339
- /* tszimm encoding produces immediates in the range [1..esize]. */
2340
- tcg_debug_assert(shift > 0);
2341
- tcg_debug_assert(shift <= (8 << vece));
2342
-
2343
- /* Shift of esize leaves destination unchanged. */
2344
- if (shift < (8 << vece)) {
2345
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
2346
- } else {
2347
- /* Nop, but we do need to clear the tail. */
2348
- tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
2349
- }
2350
-}
2351
-
2352
-static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2353
-{
2354
- uint64_t mask = dup_const(MO_8, 0xff << shift);
2355
- TCGv_i64 t = tcg_temp_new_i64();
2356
-
2357
- tcg_gen_shli_i64(t, a, shift);
2358
- tcg_gen_andi_i64(t, t, mask);
2359
- tcg_gen_andi_i64(d, d, ~mask);
2360
- tcg_gen_or_i64(d, d, t);
2361
-}
2362
-
2363
-static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2364
-{
2365
- uint64_t mask = dup_const(MO_16, 0xffff << shift);
2366
- TCGv_i64 t = tcg_temp_new_i64();
2367
-
2368
- tcg_gen_shli_i64(t, a, shift);
2369
- tcg_gen_andi_i64(t, t, mask);
2370
- tcg_gen_andi_i64(d, d, ~mask);
2371
- tcg_gen_or_i64(d, d, t);
2372
-}
2373
-
2374
-static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
2375
-{
2376
- tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
2377
-}
2378
-
2379
-static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2380
-{
2381
- tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
2382
-}
2383
-
2384
-static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
2385
-{
2386
- TCGv_vec t = tcg_temp_new_vec_matching(d);
2387
- TCGv_vec m = tcg_temp_new_vec_matching(d);
2388
-
2389
- tcg_gen_shli_vec(vece, t, a, sh);
2390
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
2391
- tcg_gen_and_vec(vece, d, d, m);
2392
- tcg_gen_or_vec(vece, d, d, t);
2393
-}
2394
-
2395
-void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
2396
- int64_t shift, uint32_t opr_sz, uint32_t max_sz)
2397
-{
2398
- static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
2399
- const GVecGen2i ops[4] = {
2400
- { .fni8 = gen_shl8_ins_i64,
2401
- .fniv = gen_shl_ins_vec,
2402
- .fno = gen_helper_gvec_sli_b,
2403
- .load_dest = true,
2404
- .opt_opc = vecop_list,
2405
- .vece = MO_8 },
2406
- { .fni8 = gen_shl16_ins_i64,
2407
- .fniv = gen_shl_ins_vec,
2408
- .fno = gen_helper_gvec_sli_h,
2409
- .load_dest = true,
2410
- .opt_opc = vecop_list,
2411
- .vece = MO_16 },
2412
- { .fni4 = gen_shl32_ins_i32,
2413
- .fniv = gen_shl_ins_vec,
2414
- .fno = gen_helper_gvec_sli_s,
2415
- .load_dest = true,
2416
- .opt_opc = vecop_list,
2417
- .vece = MO_32 },
2418
- { .fni8 = gen_shl64_ins_i64,
2419
- .fniv = gen_shl_ins_vec,
2420
- .fno = gen_helper_gvec_sli_d,
2421
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
2422
- .load_dest = true,
2423
- .opt_opc = vecop_list,
2424
- .vece = MO_64 },
2425
- };
2426
-
2427
- /* tszimm encoding produces immediates in the range [0..esize-1]. */
2428
- tcg_debug_assert(shift >= 0);
2429
- tcg_debug_assert(shift < (8 << vece));
2430
-
2431
- if (shift == 0) {
2432
- tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
2433
- } else {
2434
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
2435
- }
2436
-}
2437
-
2438
-static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
2439
-{
2440
- gen_helper_neon_mul_u8(a, a, b);
2441
- gen_helper_neon_add_u8(d, d, a);
2442
-}
2443
-
2444
-static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
2445
-{
2446
- gen_helper_neon_mul_u8(a, a, b);
2447
- gen_helper_neon_sub_u8(d, d, a);
2448
-}
2449
-
2450
-static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
2451
-{
2452
- gen_helper_neon_mul_u16(a, a, b);
2453
- gen_helper_neon_add_u16(d, d, a);
2454
-}
2455
-
2456
-static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
2457
-{
2458
- gen_helper_neon_mul_u16(a, a, b);
2459
- gen_helper_neon_sub_u16(d, d, a);
2460
-}
2461
-
2462
-static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
2463
-{
2464
- tcg_gen_mul_i32(a, a, b);
2465
- tcg_gen_add_i32(d, d, a);
2466
-}
2467
-
2468
-static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
2469
-{
2470
- tcg_gen_mul_i32(a, a, b);
2471
- tcg_gen_sub_i32(d, d, a);
2472
-}
2473
-
2474
-static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
2475
-{
2476
- tcg_gen_mul_i64(a, a, b);
2477
- tcg_gen_add_i64(d, d, a);
2478
-}
2479
-
2480
-static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
2481
-{
2482
- tcg_gen_mul_i64(a, a, b);
2483
- tcg_gen_sub_i64(d, d, a);
2484
-}
2485
-
2486
-static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
2487
-{
2488
- tcg_gen_mul_vec(vece, a, a, b);
2489
- tcg_gen_add_vec(vece, d, d, a);
2490
-}
2491
-
2492
-static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
2493
-{
2494
- tcg_gen_mul_vec(vece, a, a, b);
2495
- tcg_gen_sub_vec(vece, d, d, a);
2496
-}
2497
-
2498
-/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
2499
- * these tables are shared with AArch64 which does support them.
2500
- */
2501
-void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2502
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2503
-{
2504
- static const TCGOpcode vecop_list[] = {
2505
- INDEX_op_mul_vec, INDEX_op_add_vec, 0
2506
- };
2507
- static const GVecGen3 ops[4] = {
2508
- { .fni4 = gen_mla8_i32,
2509
- .fniv = gen_mla_vec,
2510
- .load_dest = true,
2511
- .opt_opc = vecop_list,
2512
- .vece = MO_8 },
2513
- { .fni4 = gen_mla16_i32,
2514
- .fniv = gen_mla_vec,
2515
- .load_dest = true,
2516
- .opt_opc = vecop_list,
2517
- .vece = MO_16 },
2518
- { .fni4 = gen_mla32_i32,
2519
- .fniv = gen_mla_vec,
2520
- .load_dest = true,
2521
- .opt_opc = vecop_list,
2522
- .vece = MO_32 },
2523
- { .fni8 = gen_mla64_i64,
2524
- .fniv = gen_mla_vec,
2525
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
2526
- .load_dest = true,
2527
- .opt_opc = vecop_list,
2528
- .vece = MO_64 },
2529
- };
2530
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
2531
-}
2532
-
2533
-void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2534
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2535
-{
2536
- static const TCGOpcode vecop_list[] = {
2537
- INDEX_op_mul_vec, INDEX_op_sub_vec, 0
2538
- };
2539
- static const GVecGen3 ops[4] = {
2540
- { .fni4 = gen_mls8_i32,
2541
- .fniv = gen_mls_vec,
2542
- .load_dest = true,
2543
- .opt_opc = vecop_list,
2544
- .vece = MO_8 },
2545
- { .fni4 = gen_mls16_i32,
2546
- .fniv = gen_mls_vec,
2547
- .load_dest = true,
2548
- .opt_opc = vecop_list,
2549
- .vece = MO_16 },
2550
- { .fni4 = gen_mls32_i32,
2551
- .fniv = gen_mls_vec,
2552
- .load_dest = true,
2553
- .opt_opc = vecop_list,
2554
- .vece = MO_32 },
2555
- { .fni8 = gen_mls64_i64,
2556
- .fniv = gen_mls_vec,
2557
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
2558
- .load_dest = true,
2559
- .opt_opc = vecop_list,
2560
- .vece = MO_64 },
2561
- };
2562
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
2563
-}
2564
-
2565
-/* CMTST : test is "if (X & Y != 0)". */
2566
-static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
2567
-{
2568
- tcg_gen_and_i32(d, a, b);
2569
- tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0));
2570
-}
2571
-
2572
-void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
2573
-{
2574
- tcg_gen_and_i64(d, a, b);
2575
- tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0));
2576
-}
2577
-
2578
-static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
2579
-{
2580
- tcg_gen_and_vec(vece, d, a, b);
2581
- tcg_gen_dupi_vec(vece, a, 0);
2582
- tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
2583
-}
2584
-
2585
-void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2586
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2587
-{
2588
- static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
2589
- static const GVecGen3 ops[4] = {
2590
- { .fni4 = gen_helper_neon_tst_u8,
2591
- .fniv = gen_cmtst_vec,
2592
- .opt_opc = vecop_list,
2593
- .vece = MO_8 },
2594
- { .fni4 = gen_helper_neon_tst_u16,
2595
- .fniv = gen_cmtst_vec,
2596
- .opt_opc = vecop_list,
2597
- .vece = MO_16 },
2598
- { .fni4 = gen_cmtst_i32,
2599
- .fniv = gen_cmtst_vec,
2600
- .opt_opc = vecop_list,
2601
- .vece = MO_32 },
2602
- { .fni8 = gen_cmtst_i64,
2603
- .fniv = gen_cmtst_vec,
2604
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
2605
- .opt_opc = vecop_list,
2606
- .vece = MO_64 },
2607
- };
2608
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
2609
-}
2610
-
2611
-void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
2612
-{
2613
- TCGv_i32 lval = tcg_temp_new_i32();
2614
- TCGv_i32 rval = tcg_temp_new_i32();
2615
- TCGv_i32 lsh = tcg_temp_new_i32();
2616
- TCGv_i32 rsh = tcg_temp_new_i32();
2617
- TCGv_i32 zero = tcg_constant_i32(0);
2618
- TCGv_i32 max = tcg_constant_i32(32);
2619
-
2620
- /*
2621
- * Rely on the TCG guarantee that out of range shifts produce
2622
- * unspecified results, not undefined behaviour (i.e. no trap).
2623
- * Discard out-of-range results after the fact.
2624
- */
2625
- tcg_gen_ext8s_i32(lsh, shift);
2626
- tcg_gen_neg_i32(rsh, lsh);
2627
- tcg_gen_shl_i32(lval, src, lsh);
2628
- tcg_gen_shr_i32(rval, src, rsh);
2629
- tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
2630
- tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
2631
-}
2632
-
2633
-void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
2634
-{
2635
- TCGv_i64 lval = tcg_temp_new_i64();
2636
- TCGv_i64 rval = tcg_temp_new_i64();
2637
- TCGv_i64 lsh = tcg_temp_new_i64();
2638
- TCGv_i64 rsh = tcg_temp_new_i64();
2639
- TCGv_i64 zero = tcg_constant_i64(0);
2640
- TCGv_i64 max = tcg_constant_i64(64);
2641
-
2642
- /*
2643
- * Rely on the TCG guarantee that out of range shifts produce
2644
- * unspecified results, not undefined behaviour (i.e. no trap).
2645
- * Discard out-of-range results after the fact.
2646
- */
2647
- tcg_gen_ext8s_i64(lsh, shift);
2648
- tcg_gen_neg_i64(rsh, lsh);
2649
- tcg_gen_shl_i64(lval, src, lsh);
2650
- tcg_gen_shr_i64(rval, src, rsh);
2651
- tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
2652
- tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
2653
-}
2654
-
2655
-static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
2656
- TCGv_vec src, TCGv_vec shift)
2657
-{
2658
- TCGv_vec lval = tcg_temp_new_vec_matching(dst);
2659
- TCGv_vec rval = tcg_temp_new_vec_matching(dst);
2660
- TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
2661
- TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
2662
- TCGv_vec msk, max;
2663
-
2664
- tcg_gen_neg_vec(vece, rsh, shift);
2665
- if (vece == MO_8) {
2666
- tcg_gen_mov_vec(lsh, shift);
2667
- } else {
2668
- msk = tcg_temp_new_vec_matching(dst);
2669
- tcg_gen_dupi_vec(vece, msk, 0xff);
2670
- tcg_gen_and_vec(vece, lsh, shift, msk);
2671
- tcg_gen_and_vec(vece, rsh, rsh, msk);
2672
- }
2673
-
2674
- /*
2675
- * Rely on the TCG guarantee that out of range shifts produce
2676
- * unspecified results, not undefined behaviour (i.e. no trap).
2677
- * Discard out-of-range results after the fact.
2678
- */
2679
- tcg_gen_shlv_vec(vece, lval, src, lsh);
2680
- tcg_gen_shrv_vec(vece, rval, src, rsh);
2681
-
2682
- max = tcg_temp_new_vec_matching(dst);
2683
- tcg_gen_dupi_vec(vece, max, 8 << vece);
2684
-
2685
- /*
2686
- * The choice of LT (signed) and GEU (unsigned) are biased toward
2687
- * the instructions of the x86_64 host. For MO_8, the whole byte
2688
- * is significant so we must use an unsigned compare; otherwise we
2689
- * have already masked to a byte and so a signed compare works.
2690
- * Other tcg hosts have a full set of comparisons and do not care.
2691
- */
2692
- if (vece == MO_8) {
2693
- tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
2694
- tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
2695
- tcg_gen_andc_vec(vece, lval, lval, lsh);
2696
- tcg_gen_andc_vec(vece, rval, rval, rsh);
2697
- } else {
2698
- tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
2699
- tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
2700
- tcg_gen_and_vec(vece, lval, lval, lsh);
2701
- tcg_gen_and_vec(vece, rval, rval, rsh);
2702
- }
2703
- tcg_gen_or_vec(vece, dst, lval, rval);
2704
-}
2705
-
2706
-void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2707
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2708
-{
2709
- static const TCGOpcode vecop_list[] = {
2710
- INDEX_op_neg_vec, INDEX_op_shlv_vec,
2711
- INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
2712
- };
2713
- static const GVecGen3 ops[4] = {
2714
- { .fniv = gen_ushl_vec,
2715
- .fno = gen_helper_gvec_ushl_b,
2716
- .opt_opc = vecop_list,
2717
- .vece = MO_8 },
2718
- { .fniv = gen_ushl_vec,
2719
- .fno = gen_helper_gvec_ushl_h,
2720
- .opt_opc = vecop_list,
2721
- .vece = MO_16 },
2722
- { .fni4 = gen_ushl_i32,
2723
- .fniv = gen_ushl_vec,
2724
- .opt_opc = vecop_list,
2725
- .vece = MO_32 },
2726
- { .fni8 = gen_ushl_i64,
2727
- .fniv = gen_ushl_vec,
2728
- .opt_opc = vecop_list,
2729
- .vece = MO_64 },
2730
- };
2731
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
2732
-}
2733
-
2734
-void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
2735
-{
2736
- TCGv_i32 lval = tcg_temp_new_i32();
2737
- TCGv_i32 rval = tcg_temp_new_i32();
2738
- TCGv_i32 lsh = tcg_temp_new_i32();
2739
- TCGv_i32 rsh = tcg_temp_new_i32();
2740
- TCGv_i32 zero = tcg_constant_i32(0);
2741
- TCGv_i32 max = tcg_constant_i32(31);
2742
-
2743
- /*
2744
- * Rely on the TCG guarantee that out of range shifts produce
2745
- * unspecified results, not undefined behaviour (i.e. no trap).
2746
- * Discard out-of-range results after the fact.
2747
- */
2748
- tcg_gen_ext8s_i32(lsh, shift);
2749
- tcg_gen_neg_i32(rsh, lsh);
2750
- tcg_gen_shl_i32(lval, src, lsh);
2751
- tcg_gen_umin_i32(rsh, rsh, max);
2752
- tcg_gen_sar_i32(rval, src, rsh);
2753
- tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
2754
- tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
2755
-}
2756
-
2757
-void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
2758
-{
2759
- TCGv_i64 lval = tcg_temp_new_i64();
2760
- TCGv_i64 rval = tcg_temp_new_i64();
2761
- TCGv_i64 lsh = tcg_temp_new_i64();
2762
- TCGv_i64 rsh = tcg_temp_new_i64();
2763
- TCGv_i64 zero = tcg_constant_i64(0);
2764
- TCGv_i64 max = tcg_constant_i64(63);
2765
-
2766
- /*
2767
- * Rely on the TCG guarantee that out of range shifts produce
2768
- * unspecified results, not undefined behaviour (i.e. no trap).
2769
- * Discard out-of-range results after the fact.
2770
- */
2771
- tcg_gen_ext8s_i64(lsh, shift);
2772
- tcg_gen_neg_i64(rsh, lsh);
2773
- tcg_gen_shl_i64(lval, src, lsh);
2774
- tcg_gen_umin_i64(rsh, rsh, max);
2775
- tcg_gen_sar_i64(rval, src, rsh);
2776
- tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
2777
- tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
2778
-}
2779
-
2780
-static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
2781
- TCGv_vec src, TCGv_vec shift)
2782
-{
2783
- TCGv_vec lval = tcg_temp_new_vec_matching(dst);
2784
- TCGv_vec rval = tcg_temp_new_vec_matching(dst);
2785
- TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
2786
- TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
2787
- TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
2788
-
2789
- /*
2790
- * Rely on the TCG guarantee that out of range shifts produce
2791
- * unspecified results, not undefined behaviour (i.e. no trap).
2792
- * Discard out-of-range results after the fact.
2793
- */
2794
- tcg_gen_neg_vec(vece, rsh, shift);
2795
- if (vece == MO_8) {
2796
- tcg_gen_mov_vec(lsh, shift);
2797
- } else {
2798
- tcg_gen_dupi_vec(vece, tmp, 0xff);
2799
- tcg_gen_and_vec(vece, lsh, shift, tmp);
2800
- tcg_gen_and_vec(vece, rsh, rsh, tmp);
2801
- }
2802
-
2803
- /* Bound rsh so out of bound right shift gets -1. */
2804
- tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
2805
- tcg_gen_umin_vec(vece, rsh, rsh, tmp);
2806
- tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
2807
-
2808
- tcg_gen_shlv_vec(vece, lval, src, lsh);
2809
- tcg_gen_sarv_vec(vece, rval, src, rsh);
2810
-
2811
- /* Select in-bound left shift. */
2812
- tcg_gen_andc_vec(vece, lval, lval, tmp);
2813
-
2814
- /* Select between left and right shift. */
2815
- if (vece == MO_8) {
2816
- tcg_gen_dupi_vec(vece, tmp, 0);
2817
- tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
2818
- } else {
2819
- tcg_gen_dupi_vec(vece, tmp, 0x80);
2820
- tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
2821
- }
2822
-}
2823
-
2824
-void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2825
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2826
-{
2827
- static const TCGOpcode vecop_list[] = {
2828
- INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
2829
- INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
2830
- };
2831
- static const GVecGen3 ops[4] = {
2832
- { .fniv = gen_sshl_vec,
2833
- .fno = gen_helper_gvec_sshl_b,
2834
- .opt_opc = vecop_list,
2835
- .vece = MO_8 },
2836
- { .fniv = gen_sshl_vec,
2837
- .fno = gen_helper_gvec_sshl_h,
2838
- .opt_opc = vecop_list,
2839
- .vece = MO_16 },
2840
- { .fni4 = gen_sshl_i32,
2841
- .fniv = gen_sshl_vec,
2842
- .opt_opc = vecop_list,
2843
- .vece = MO_32 },
2844
- { .fni8 = gen_sshl_i64,
2845
- .fniv = gen_sshl_vec,
2846
- .opt_opc = vecop_list,
2847
- .vece = MO_64 },
2848
- };
2849
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
2850
-}
2851
-
2852
-static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
2853
- TCGv_vec a, TCGv_vec b)
2854
-{
2855
- TCGv_vec x = tcg_temp_new_vec_matching(t);
2856
- tcg_gen_add_vec(vece, x, a, b);
2857
- tcg_gen_usadd_vec(vece, t, a, b);
2858
- tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
2859
- tcg_gen_or_vec(vece, sat, sat, x);
2860
-}
2861
-
2862
-void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2863
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2864
-{
2865
- static const TCGOpcode vecop_list[] = {
2866
- INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
2867
- };
2868
- static const GVecGen4 ops[4] = {
2869
- { .fniv = gen_uqadd_vec,
2870
- .fno = gen_helper_gvec_uqadd_b,
2871
- .write_aofs = true,
2872
- .opt_opc = vecop_list,
2873
- .vece = MO_8 },
2874
- { .fniv = gen_uqadd_vec,
2875
- .fno = gen_helper_gvec_uqadd_h,
2876
- .write_aofs = true,
2877
- .opt_opc = vecop_list,
2878
- .vece = MO_16 },
2879
- { .fniv = gen_uqadd_vec,
2880
- .fno = gen_helper_gvec_uqadd_s,
2881
- .write_aofs = true,
2882
- .opt_opc = vecop_list,
2883
- .vece = MO_32 },
2884
- { .fniv = gen_uqadd_vec,
2885
- .fno = gen_helper_gvec_uqadd_d,
2886
- .write_aofs = true,
2887
- .opt_opc = vecop_list,
2888
- .vece = MO_64 },
2889
- };
2890
- tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
2891
- rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
2892
-}
2893
-
2894
-static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
2895
- TCGv_vec a, TCGv_vec b)
2896
-{
2897
- TCGv_vec x = tcg_temp_new_vec_matching(t);
2898
- tcg_gen_add_vec(vece, x, a, b);
2899
- tcg_gen_ssadd_vec(vece, t, a, b);
2900
- tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
2901
- tcg_gen_or_vec(vece, sat, sat, x);
2902
-}
2903
-
2904
-void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2905
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2906
-{
2907
- static const TCGOpcode vecop_list[] = {
2908
- INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
2909
- };
2910
- static const GVecGen4 ops[4] = {
2911
- { .fniv = gen_sqadd_vec,
2912
- .fno = gen_helper_gvec_sqadd_b,
2913
- .opt_opc = vecop_list,
2914
- .write_aofs = true,
2915
- .vece = MO_8 },
2916
- { .fniv = gen_sqadd_vec,
2917
- .fno = gen_helper_gvec_sqadd_h,
2918
- .opt_opc = vecop_list,
2919
- .write_aofs = true,
2920
- .vece = MO_16 },
2921
- { .fniv = gen_sqadd_vec,
2922
- .fno = gen_helper_gvec_sqadd_s,
2923
- .opt_opc = vecop_list,
2924
- .write_aofs = true,
2925
- .vece = MO_32 },
2926
- { .fniv = gen_sqadd_vec,
2927
- .fno = gen_helper_gvec_sqadd_d,
2928
- .opt_opc = vecop_list,
2929
- .write_aofs = true,
2930
- .vece = MO_64 },
2931
- };
2932
- tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
2933
- rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
2934
-}
2935
-
2936
-static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
2937
- TCGv_vec a, TCGv_vec b)
2938
-{
2939
- TCGv_vec x = tcg_temp_new_vec_matching(t);
2940
- tcg_gen_sub_vec(vece, x, a, b);
2941
- tcg_gen_ussub_vec(vece, t, a, b);
2942
- tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
2943
- tcg_gen_or_vec(vece, sat, sat, x);
2944
-}
2945
-
2946
-void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2947
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2948
-{
2949
- static const TCGOpcode vecop_list[] = {
2950
- INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
2951
- };
2952
- static const GVecGen4 ops[4] = {
2953
- { .fniv = gen_uqsub_vec,
2954
- .fno = gen_helper_gvec_uqsub_b,
2955
- .opt_opc = vecop_list,
2956
- .write_aofs = true,
2957
- .vece = MO_8 },
2958
- { .fniv = gen_uqsub_vec,
2959
- .fno = gen_helper_gvec_uqsub_h,
2960
- .opt_opc = vecop_list,
2961
- .write_aofs = true,
2962
- .vece = MO_16 },
2963
- { .fniv = gen_uqsub_vec,
2964
- .fno = gen_helper_gvec_uqsub_s,
2965
- .opt_opc = vecop_list,
2966
- .write_aofs = true,
2967
- .vece = MO_32 },
2968
- { .fniv = gen_uqsub_vec,
2969
- .fno = gen_helper_gvec_uqsub_d,
2970
- .opt_opc = vecop_list,
2971
- .write_aofs = true,
2972
- .vece = MO_64 },
2973
- };
2974
- tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
2975
- rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
2976
-}
2977
-
2978
-static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
2979
- TCGv_vec a, TCGv_vec b)
2980
-{
2981
- TCGv_vec x = tcg_temp_new_vec_matching(t);
2982
- tcg_gen_sub_vec(vece, x, a, b);
2983
- tcg_gen_sssub_vec(vece, t, a, b);
2984
- tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
2985
- tcg_gen_or_vec(vece, sat, sat, x);
2986
-}
2987
-
2988
-void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2989
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2990
-{
2991
- static const TCGOpcode vecop_list[] = {
2992
- INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
2993
- };
2994
- static const GVecGen4 ops[4] = {
2995
- { .fniv = gen_sqsub_vec,
2996
- .fno = gen_helper_gvec_sqsub_b,
2997
- .opt_opc = vecop_list,
2998
- .write_aofs = true,
2999
- .vece = MO_8 },
3000
- { .fniv = gen_sqsub_vec,
3001
- .fno = gen_helper_gvec_sqsub_h,
3002
- .opt_opc = vecop_list,
3003
- .write_aofs = true,
3004
- .vece = MO_16 },
3005
- { .fniv = gen_sqsub_vec,
3006
- .fno = gen_helper_gvec_sqsub_s,
3007
- .opt_opc = vecop_list,
3008
- .write_aofs = true,
3009
- .vece = MO_32 },
3010
- { .fniv = gen_sqsub_vec,
3011
- .fno = gen_helper_gvec_sqsub_d,
3012
- .opt_opc = vecop_list,
3013
- .write_aofs = true,
3014
- .vece = MO_64 },
3015
- };
3016
- tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
3017
- rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3018
-}
3019
-
3020
-static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3021
-{
3022
- TCGv_i32 t = tcg_temp_new_i32();
3023
-
3024
- tcg_gen_sub_i32(t, a, b);
3025
- tcg_gen_sub_i32(d, b, a);
3026
- tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
3027
-}
3028
-
3029
-static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3030
-{
3031
- TCGv_i64 t = tcg_temp_new_i64();
3032
-
3033
- tcg_gen_sub_i64(t, a, b);
3034
- tcg_gen_sub_i64(d, b, a);
3035
- tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
3036
-}
3037
-
3038
-static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3039
-{
3040
- TCGv_vec t = tcg_temp_new_vec_matching(d);
3041
-
3042
- tcg_gen_smin_vec(vece, t, a, b);
3043
- tcg_gen_smax_vec(vece, d, a, b);
3044
- tcg_gen_sub_vec(vece, d, d, t);
3045
-}
3046
-
3047
-void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3048
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3049
-{
3050
- static const TCGOpcode vecop_list[] = {
3051
- INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
3052
- };
3053
- static const GVecGen3 ops[4] = {
3054
- { .fniv = gen_sabd_vec,
3055
- .fno = gen_helper_gvec_sabd_b,
3056
- .opt_opc = vecop_list,
3057
- .vece = MO_8 },
3058
- { .fniv = gen_sabd_vec,
3059
- .fno = gen_helper_gvec_sabd_h,
3060
- .opt_opc = vecop_list,
3061
- .vece = MO_16 },
3062
- { .fni4 = gen_sabd_i32,
3063
- .fniv = gen_sabd_vec,
3064
- .fno = gen_helper_gvec_sabd_s,
3065
- .opt_opc = vecop_list,
3066
- .vece = MO_32 },
3067
- { .fni8 = gen_sabd_i64,
3068
- .fniv = gen_sabd_vec,
3069
- .fno = gen_helper_gvec_sabd_d,
3070
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3071
- .opt_opc = vecop_list,
3072
- .vece = MO_64 },
3073
- };
3074
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3075
-}
3076
-
3077
-static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3078
-{
3079
- TCGv_i32 t = tcg_temp_new_i32();
3080
-
3081
- tcg_gen_sub_i32(t, a, b);
3082
- tcg_gen_sub_i32(d, b, a);
3083
- tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
3084
-}
3085
-
3086
-static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3087
-{
3088
- TCGv_i64 t = tcg_temp_new_i64();
3089
-
3090
- tcg_gen_sub_i64(t, a, b);
3091
- tcg_gen_sub_i64(d, b, a);
3092
- tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
3093
-}
3094
-
3095
-static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3096
-{
3097
- TCGv_vec t = tcg_temp_new_vec_matching(d);
3098
-
3099
- tcg_gen_umin_vec(vece, t, a, b);
3100
- tcg_gen_umax_vec(vece, d, a, b);
3101
- tcg_gen_sub_vec(vece, d, d, t);
3102
-}
3103
-
3104
-void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3105
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3106
-{
3107
- static const TCGOpcode vecop_list[] = {
3108
- INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
3109
- };
3110
- static const GVecGen3 ops[4] = {
3111
- { .fniv = gen_uabd_vec,
3112
- .fno = gen_helper_gvec_uabd_b,
3113
- .opt_opc = vecop_list,
3114
- .vece = MO_8 },
3115
- { .fniv = gen_uabd_vec,
3116
- .fno = gen_helper_gvec_uabd_h,
3117
- .opt_opc = vecop_list,
3118
- .vece = MO_16 },
3119
- { .fni4 = gen_uabd_i32,
3120
- .fniv = gen_uabd_vec,
3121
- .fno = gen_helper_gvec_uabd_s,
3122
- .opt_opc = vecop_list,
3123
- .vece = MO_32 },
3124
- { .fni8 = gen_uabd_i64,
3125
- .fniv = gen_uabd_vec,
3126
- .fno = gen_helper_gvec_uabd_d,
3127
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3128
- .opt_opc = vecop_list,
3129
- .vece = MO_64 },
3130
- };
3131
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3132
-}
3133
-
3134
-static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3135
-{
3136
- TCGv_i32 t = tcg_temp_new_i32();
3137
- gen_sabd_i32(t, a, b);
3138
- tcg_gen_add_i32(d, d, t);
3139
-}
3140
-
3141
-static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3142
-{
3143
- TCGv_i64 t = tcg_temp_new_i64();
3144
- gen_sabd_i64(t, a, b);
3145
- tcg_gen_add_i64(d, d, t);
3146
-}
3147
-
3148
-static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3149
-{
3150
- TCGv_vec t = tcg_temp_new_vec_matching(d);
3151
- gen_sabd_vec(vece, t, a, b);
3152
- tcg_gen_add_vec(vece, d, d, t);
3153
-}
3154
-
3155
-void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3156
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3157
-{
3158
- static const TCGOpcode vecop_list[] = {
3159
- INDEX_op_sub_vec, INDEX_op_add_vec,
3160
- INDEX_op_smin_vec, INDEX_op_smax_vec, 0
3161
- };
3162
- static const GVecGen3 ops[4] = {
3163
- { .fniv = gen_saba_vec,
3164
- .fno = gen_helper_gvec_saba_b,
3165
- .opt_opc = vecop_list,
3166
- .load_dest = true,
3167
- .vece = MO_8 },
3168
- { .fniv = gen_saba_vec,
3169
- .fno = gen_helper_gvec_saba_h,
3170
- .opt_opc = vecop_list,
3171
- .load_dest = true,
3172
- .vece = MO_16 },
3173
- { .fni4 = gen_saba_i32,
3174
- .fniv = gen_saba_vec,
3175
- .fno = gen_helper_gvec_saba_s,
3176
- .opt_opc = vecop_list,
3177
- .load_dest = true,
3178
- .vece = MO_32 },
3179
- { .fni8 = gen_saba_i64,
3180
- .fniv = gen_saba_vec,
3181
- .fno = gen_helper_gvec_saba_d,
3182
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3183
- .opt_opc = vecop_list,
3184
- .load_dest = true,
3185
- .vece = MO_64 },
3186
- };
3187
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3188
-}
3189
-
3190
-static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3191
-{
3192
- TCGv_i32 t = tcg_temp_new_i32();
3193
- gen_uabd_i32(t, a, b);
3194
- tcg_gen_add_i32(d, d, t);
3195
-}
3196
-
3197
-static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3198
-{
3199
- TCGv_i64 t = tcg_temp_new_i64();
3200
- gen_uabd_i64(t, a, b);
3201
- tcg_gen_add_i64(d, d, t);
3202
-}
3203
-
3204
-static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3205
-{
3206
- TCGv_vec t = tcg_temp_new_vec_matching(d);
3207
- gen_uabd_vec(vece, t, a, b);
3208
- tcg_gen_add_vec(vece, d, d, t);
3209
-}
3210
-
3211
-void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3212
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3213
-{
3214
- static const TCGOpcode vecop_list[] = {
3215
- INDEX_op_sub_vec, INDEX_op_add_vec,
3216
- INDEX_op_umin_vec, INDEX_op_umax_vec, 0
3217
- };
3218
- static const GVecGen3 ops[4] = {
3219
- { .fniv = gen_uaba_vec,
3220
- .fno = gen_helper_gvec_uaba_b,
3221
- .opt_opc = vecop_list,
3222
- .load_dest = true,
3223
- .vece = MO_8 },
3224
- { .fniv = gen_uaba_vec,
3225
- .fno = gen_helper_gvec_uaba_h,
3226
- .opt_opc = vecop_list,
3227
- .load_dest = true,
3228
- .vece = MO_16 },
3229
- { .fni4 = gen_uaba_i32,
3230
- .fniv = gen_uaba_vec,
3231
- .fno = gen_helper_gvec_uaba_s,
3232
- .opt_opc = vecop_list,
3233
- .load_dest = true,
3234
- .vece = MO_32 },
3235
- { .fni8 = gen_uaba_i64,
3236
- .fniv = gen_uaba_vec,
3237
- .fno = gen_helper_gvec_uaba_d,
3238
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3239
- .opt_opc = vecop_list,
3240
- .load_dest = true,
3241
- .vece = MO_64 },
3242
- };
3243
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3244
-}
3245
-
3246
static bool aa32_cpreg_encoding_in_impdef_space(uint8_t crn, uint8_t crm)
3247
{
3248
static const uint16_t mask[3] = {
3249
diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build
3250
index XXXXXXX..XXXXXXX 100644
3251
--- a/target/arm/tcg/meson.build
3252
+++ b/target/arm/tcg/meson.build
3253
@@ -XXX,XX +XXX,XX @@ arm_ss.add(when: 'TARGET_AARCH64', if_true: gen_a64)
3254
3255
arm_ss.add(files(
3256
'cpu32.c',
3257
+ 'gengvec.c',
3258
'translate.c',
3259
'translate-m-nocp.c',
3260
'translate-mve.c',
211
--
3261
--
212
2.20.1
3262
2.34.1
213
3263
214
3264
diff view generated by jsdifflib
1
From: Nolan Leake <nolan@sigbus.net>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This is just enough to make reboot and poweroff work. Works for
3
Split some routines out of translate-a64.c and translate-sve.c
4
linux, u-boot, and the arm trusted firmware. Not tested, but should
4
that are used by both.
5
work for plan9, and bare-metal/hobby OSes, since they seem to generally
6
do what linux does for reset.
7
5
8
The watchdog timer functionality is not yet implemented.
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/64
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Nolan Leake <nolan@sigbus.net>
9
Message-id: 20240524232121.284515-9-richard.henderson@linaro.org
12
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
13
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
14
Message-id: 20210625210209.1870217-1-nolan@sigbus.net
15
[PMM: tweaked commit title; fixed region size to 0x200;
16
moved header file to include/]
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
---
11
---
19
include/hw/arm/bcm2835_peripherals.h | 3 +-
12
target/arm/tcg/translate-a64.h | 4 +
20
include/hw/misc/bcm2835_powermgt.h | 29 +++++
13
target/arm/tcg/gengvec64.c | 190 +++++++++++++++++++++++++++++++++
21
hw/arm/bcm2835_peripherals.c | 13 ++-
14
target/arm/tcg/translate-a64.c | 26 -----
22
hw/misc/bcm2835_powermgt.c | 160 +++++++++++++++++++++++++++
15
target/arm/tcg/translate-sve.c | 145 +------------------------
23
hw/misc/meson.build | 1 +
16
target/arm/tcg/meson.build | 1 +
24
5 files changed, 204 insertions(+), 2 deletions(-)
17
5 files changed, 197 insertions(+), 169 deletions(-)
25
create mode 100644 include/hw/misc/bcm2835_powermgt.h
18
create mode 100644 target/arm/tcg/gengvec64.c
26
create mode 100644 hw/misc/bcm2835_powermgt.c
27
19
28
diff --git a/include/hw/arm/bcm2835_peripherals.h b/include/hw/arm/bcm2835_peripherals.h
20
diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h
29
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
30
--- a/include/hw/arm/bcm2835_peripherals.h
22
--- a/target/arm/tcg/translate-a64.h
31
+++ b/include/hw/arm/bcm2835_peripherals.h
23
+++ b/target/arm/tcg/translate-a64.h
32
@@ -XXX,XX +XXX,XX @@
24
@@ -XXX,XX +XXX,XX @@ void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
33
#include "hw/misc/bcm2835_mphi.h"
25
void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
34
#include "hw/misc/bcm2835_thermal.h"
26
uint32_t rm_ofs, int64_t shift,
35
#include "hw/misc/bcm2835_cprman.h"
27
uint32_t opr_sz, uint32_t max_sz);
36
+#include "hw/misc/bcm2835_powermgt.h"
28
+void gen_gvec_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
37
#include "hw/sd/sdhci.h"
29
+ uint32_t a, uint32_t oprsz, uint32_t maxsz);
38
#include "hw/sd/bcm2835_sdhost.h"
30
+void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
39
#include "hw/gpio/bcm2835_gpio.h"
31
+ uint32_t a, uint32_t oprsz, uint32_t maxsz);
40
@@ -XXX,XX +XXX,XX @@ struct BCM2835PeripheralState {
32
41
BCM2835MphiState mphi;
33
void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
42
UnimplementedDeviceState txp;
34
void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
43
UnimplementedDeviceState armtmr;
35
diff --git a/target/arm/tcg/gengvec64.c b/target/arm/tcg/gengvec64.c
44
- UnimplementedDeviceState powermgt;
45
+ BCM2835PowerMgtState powermgt;
46
BCM2835CprmanState cprman;
47
PL011State uart0;
48
BCM2835AuxState aux;
49
diff --git a/include/hw/misc/bcm2835_powermgt.h b/include/hw/misc/bcm2835_powermgt.h
50
new file mode 100644
36
new file mode 100644
51
index XXXXXXX..XXXXXXX
37
index XXXXXXX..XXXXXXX
52
--- /dev/null
38
--- /dev/null
53
+++ b/include/hw/misc/bcm2835_powermgt.h
39
+++ b/target/arm/tcg/gengvec64.c
54
@@ -XXX,XX +XXX,XX @@
40
@@ -XXX,XX +XXX,XX @@
55
+/*
41
+/*
56
+ * BCM2835 Power Management emulation
42
+ * AArch64 generic vector expansion
57
+ *
43
+ *
58
+ * Copyright (C) 2017 Marcin Chojnacki <marcinch7@gmail.com>
44
+ * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
59
+ * Copyright (C) 2021 Nolan Leake <nolan@sigbus.net>
60
+ *
45
+ *
61
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
46
+ * This library is free software; you can redistribute it and/or
62
+ * See the COPYING file in the top-level directory.
47
+ * modify it under the terms of the GNU Lesser General Public
48
+ * License as published by the Free Software Foundation; either
49
+ * version 2.1 of the License, or (at your option) any later version.
50
+ *
51
+ * This library is distributed in the hope that it will be useful,
52
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
53
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
54
+ * Lesser General Public License for more details.
55
+ *
56
+ * You should have received a copy of the GNU Lesser General Public
57
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
63
+ */
58
+ */
64
+
59
+
65
+#ifndef BCM2835_POWERMGT_H
60
+#include "qemu/osdep.h"
66
+#define BCM2835_POWERMGT_H
61
+#include "translate.h"
67
+
62
+#include "translate-a64.h"
68
+#include "hw/sysbus.h"
63
+
69
+#include "qom/object.h"
64
+
70
+
65
+static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
71
+#define TYPE_BCM2835_POWERMGT "bcm2835-powermgt"
66
+{
72
+OBJECT_DECLARE_SIMPLE_TYPE(BCM2835PowerMgtState, BCM2835_POWERMGT)
67
+ tcg_gen_rotli_i64(d, m, 1);
73
+
68
+ tcg_gen_xor_i64(d, d, n);
74
+struct BCM2835PowerMgtState {
69
+}
75
+ SysBusDevice busdev;
70
+
76
+ MemoryRegion iomem;
71
+static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
77
+
72
+{
78
+ uint32_t rstc;
73
+ tcg_gen_rotli_vec(vece, d, m, 1);
79
+ uint32_t rsts;
74
+ tcg_gen_xor_vec(vece, d, d, n);
80
+ uint32_t wdog;
75
+}
81
+};
76
+
82
+
77
+void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
83
+#endif
78
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
84
diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c
79
+{
80
+ static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
81
+ static const GVecGen3 op = {
82
+ .fni8 = gen_rax1_i64,
83
+ .fniv = gen_rax1_vec,
84
+ .opt_opc = vecop_list,
85
+ .fno = gen_helper_crypto_rax1,
86
+ .vece = MO_64,
87
+ };
88
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
89
+}
90
+
91
+static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
92
+{
93
+ TCGv_i64 t = tcg_temp_new_i64();
94
+ uint64_t mask = dup_const(MO_8, 0xff >> sh);
95
+
96
+ tcg_gen_xor_i64(t, n, m);
97
+ tcg_gen_shri_i64(d, t, sh);
98
+ tcg_gen_shli_i64(t, t, 8 - sh);
99
+ tcg_gen_andi_i64(d, d, mask);
100
+ tcg_gen_andi_i64(t, t, ~mask);
101
+ tcg_gen_or_i64(d, d, t);
102
+}
103
+
104
+static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
105
+{
106
+ TCGv_i64 t = tcg_temp_new_i64();
107
+ uint64_t mask = dup_const(MO_16, 0xffff >> sh);
108
+
109
+ tcg_gen_xor_i64(t, n, m);
110
+ tcg_gen_shri_i64(d, t, sh);
111
+ tcg_gen_shli_i64(t, t, 16 - sh);
112
+ tcg_gen_andi_i64(d, d, mask);
113
+ tcg_gen_andi_i64(t, t, ~mask);
114
+ tcg_gen_or_i64(d, d, t);
115
+}
116
+
117
+static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
118
+{
119
+ tcg_gen_xor_i32(d, n, m);
120
+ tcg_gen_rotri_i32(d, d, sh);
121
+}
122
+
123
+static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
124
+{
125
+ tcg_gen_xor_i64(d, n, m);
126
+ tcg_gen_rotri_i64(d, d, sh);
127
+}
128
+
129
+static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
130
+ TCGv_vec m, int64_t sh)
131
+{
132
+ tcg_gen_xor_vec(vece, d, n, m);
133
+ tcg_gen_rotri_vec(vece, d, d, sh);
134
+}
135
+
136
+void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
137
+ uint32_t rm_ofs, int64_t shift,
138
+ uint32_t opr_sz, uint32_t max_sz)
139
+{
140
+ static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
141
+ static const GVecGen3i ops[4] = {
142
+ { .fni8 = gen_xar8_i64,
143
+ .fniv = gen_xar_vec,
144
+ .fno = gen_helper_sve2_xar_b,
145
+ .opt_opc = vecop,
146
+ .vece = MO_8 },
147
+ { .fni8 = gen_xar16_i64,
148
+ .fniv = gen_xar_vec,
149
+ .fno = gen_helper_sve2_xar_h,
150
+ .opt_opc = vecop,
151
+ .vece = MO_16 },
152
+ { .fni4 = gen_xar_i32,
153
+ .fniv = gen_xar_vec,
154
+ .fno = gen_helper_sve2_xar_s,
155
+ .opt_opc = vecop,
156
+ .vece = MO_32 },
157
+ { .fni8 = gen_xar_i64,
158
+ .fniv = gen_xar_vec,
159
+ .fno = gen_helper_gvec_xar_d,
160
+ .opt_opc = vecop,
161
+ .vece = MO_64 }
162
+ };
163
+ int esize = 8 << vece;
164
+
165
+ /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
166
+ tcg_debug_assert(shift >= 0);
167
+ tcg_debug_assert(shift <= esize);
168
+ shift &= esize - 1;
169
+
170
+ if (shift == 0) {
171
+ /* xar with no rotate devolves to xor. */
172
+ tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
173
+ } else {
174
+ tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
175
+ shift, &ops[vece]);
176
+ }
177
+}
178
+
179
+static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
180
+{
181
+ tcg_gen_xor_i64(d, n, m);
182
+ tcg_gen_xor_i64(d, d, k);
183
+}
184
+
185
+static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
186
+ TCGv_vec m, TCGv_vec k)
187
+{
188
+ tcg_gen_xor_vec(vece, d, n, m);
189
+ tcg_gen_xor_vec(vece, d, d, k);
190
+}
191
+
192
+void gen_gvec_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
193
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
194
+{
195
+ static const GVecGen4 op = {
196
+ .fni8 = gen_eor3_i64,
197
+ .fniv = gen_eor3_vec,
198
+ .fno = gen_helper_sve2_eor3,
199
+ .vece = MO_64,
200
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
201
+ };
202
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
203
+}
204
+
205
+static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
206
+{
207
+ tcg_gen_andc_i64(d, m, k);
208
+ tcg_gen_xor_i64(d, d, n);
209
+}
210
+
211
+static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
212
+ TCGv_vec m, TCGv_vec k)
213
+{
214
+ tcg_gen_andc_vec(vece, d, m, k);
215
+ tcg_gen_xor_vec(vece, d, d, n);
216
+}
217
+
218
+void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
219
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
220
+{
221
+ static const GVecGen4 op = {
222
+ .fni8 = gen_bcax_i64,
223
+ .fniv = gen_bcax_vec,
224
+ .fno = gen_helper_sve2_bcax,
225
+ .vece = MO_64,
226
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
227
+ };
228
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
229
+}
230
+
231
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
85
index XXXXXXX..XXXXXXX 100644
232
index XXXXXXX..XXXXXXX 100644
86
--- a/hw/arm/bcm2835_peripherals.c
233
--- a/target/arm/tcg/translate-a64.c
87
+++ b/hw/arm/bcm2835_peripherals.c
234
+++ b/target/arm/tcg/translate-a64.c
88
@@ -XXX,XX +XXX,XX @@ static void bcm2835_peripherals_init(Object *obj)
235
@@ -XXX,XX +XXX,XX @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
89
236
gen_gvec_op2_ool(s, true, rd, rn, 0, genfn);
90
object_property_add_const_link(OBJECT(&s->dwc2), "dma-mr",
91
OBJECT(&s->gpu_bus_mr));
92
+
93
+ /* Power Management */
94
+ object_initialize_child(obj, "powermgt", &s->powermgt,
95
+ TYPE_BCM2835_POWERMGT);
96
}
237
}
97
238
98
static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
239
-static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
99
@@ -XXX,XX +XXX,XX @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
240
-{
100
qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ,
241
- tcg_gen_rotli_i64(d, m, 1);
101
INTERRUPT_USB));
242
- tcg_gen_xor_i64(d, d, n);
102
243
-}
103
+ /* Power Management */
244
-
104
+ if (!sysbus_realize(SYS_BUS_DEVICE(&s->powermgt), errp)) {
245
-static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
105
+ return;
246
-{
106
+ }
247
- tcg_gen_rotli_vec(vece, d, m, 1);
107
+
248
- tcg_gen_xor_vec(vece, d, d, n);
108
+ memory_region_add_subregion(&s->peri_mr, PM_OFFSET,
249
-}
109
+ sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->powermgt), 0));
250
-
110
+
251
-void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
111
create_unimp(s, &s->txp, "bcm2835-txp", TXP_OFFSET, 0x1000);
252
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
112
create_unimp(s, &s->armtmr, "bcm2835-sp804", ARMCTRL_TIMER0_1_OFFSET, 0x40);
253
-{
113
- create_unimp(s, &s->powermgt, "bcm2835-powermgt", PM_OFFSET, 0x114);
254
- static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
114
create_unimp(s, &s->i2s, "bcm2835-i2s", I2S_OFFSET, 0x100);
255
- static const GVecGen3 op = {
115
create_unimp(s, &s->smi, "bcm2835-smi", SMI_OFFSET, 0x100);
256
- .fni8 = gen_rax1_i64,
116
create_unimp(s, &s->spi[0], "bcm2835-spi0", SPI0_OFFSET, 0x20);
257
- .fniv = gen_rax1_vec,
117
diff --git a/hw/misc/bcm2835_powermgt.c b/hw/misc/bcm2835_powermgt.c
258
- .opt_opc = vecop_list,
118
new file mode 100644
259
- .fno = gen_helper_crypto_rax1,
119
index XXXXXXX..XXXXXXX
260
- .vece = MO_64,
120
--- /dev/null
261
- };
121
+++ b/hw/misc/bcm2835_powermgt.c
262
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
122
@@ -XXX,XX +XXX,XX @@
263
-}
123
+/*
264
-
124
+ * BCM2835 Power Management emulation
265
/* Crypto three-reg SHA512
125
+ *
266
* 31 21 20 16 15 14 13 12 11 10 9 5 4 0
126
+ * Copyright (C) 2017 Marcin Chojnacki <marcinch7@gmail.com>
267
* +-----------------------+------+---+---+-----+--------+------+------+
127
+ * Copyright (C) 2021 Nolan Leake <nolan@sigbus.net>
268
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
128
+ *
129
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
130
+ * See the COPYING file in the top-level directory.
131
+ */
132
+
133
+#include "qemu/osdep.h"
134
+#include "qemu/log.h"
135
+#include "qemu/module.h"
136
+#include "hw/misc/bcm2835_powermgt.h"
137
+#include "migration/vmstate.h"
138
+#include "sysemu/runstate.h"
139
+
140
+#define PASSWORD 0x5a000000
141
+#define PASSWORD_MASK 0xff000000
142
+
143
+#define R_RSTC 0x1c
144
+#define V_RSTC_RESET 0x20
145
+#define R_RSTS 0x20
146
+#define V_RSTS_POWEROFF 0x555 /* Linux uses partition 63 to indicate halt. */
147
+#define R_WDOG 0x24
148
+
149
+static uint64_t bcm2835_powermgt_read(void *opaque, hwaddr offset,
150
+ unsigned size)
151
+{
152
+ BCM2835PowerMgtState *s = (BCM2835PowerMgtState *)opaque;
153
+ uint32_t res = 0;
154
+
155
+ switch (offset) {
156
+ case R_RSTC:
157
+ res = s->rstc;
158
+ break;
159
+ case R_RSTS:
160
+ res = s->rsts;
161
+ break;
162
+ case R_WDOG:
163
+ res = s->wdog;
164
+ break;
165
+
166
+ default:
167
+ qemu_log_mask(LOG_UNIMP,
168
+ "bcm2835_powermgt_read: Unknown offset 0x%08"HWADDR_PRIx
169
+ "\n", offset);
170
+ res = 0;
171
+ break;
172
+ }
173
+
174
+ return res;
175
+}
176
+
177
+static void bcm2835_powermgt_write(void *opaque, hwaddr offset,
178
+ uint64_t value, unsigned size)
179
+{
180
+ BCM2835PowerMgtState *s = (BCM2835PowerMgtState *)opaque;
181
+
182
+ if ((value & PASSWORD_MASK) != PASSWORD) {
183
+ qemu_log_mask(LOG_GUEST_ERROR,
184
+ "bcm2835_powermgt_write: Bad password 0x%"PRIx64
185
+ " at offset 0x%08"HWADDR_PRIx"\n",
186
+ value, offset);
187
+ return;
188
+ }
189
+
190
+ value = value & ~PASSWORD_MASK;
191
+
192
+ switch (offset) {
193
+ case R_RSTC:
194
+ s->rstc = value;
195
+ if (value & V_RSTC_RESET) {
196
+ if ((s->rsts & 0xfff) == V_RSTS_POWEROFF) {
197
+ qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
198
+ } else {
199
+ qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
200
+ }
201
+ }
202
+ break;
203
+ case R_RSTS:
204
+ qemu_log_mask(LOG_UNIMP,
205
+ "bcm2835_powermgt_write: RSTS\n");
206
+ s->rsts = value;
207
+ break;
208
+ case R_WDOG:
209
+ qemu_log_mask(LOG_UNIMP,
210
+ "bcm2835_powermgt_write: WDOG\n");
211
+ s->wdog = value;
212
+ break;
213
+
214
+ default:
215
+ qemu_log_mask(LOG_UNIMP,
216
+ "bcm2835_powermgt_write: Unknown offset 0x%08"HWADDR_PRIx
217
+ "\n", offset);
218
+ break;
219
+ }
220
+}
221
+
222
+static const MemoryRegionOps bcm2835_powermgt_ops = {
223
+ .read = bcm2835_powermgt_read,
224
+ .write = bcm2835_powermgt_write,
225
+ .endianness = DEVICE_NATIVE_ENDIAN,
226
+ .impl.min_access_size = 4,
227
+ .impl.max_access_size = 4,
228
+};
229
+
230
+static const VMStateDescription vmstate_bcm2835_powermgt = {
231
+ .name = TYPE_BCM2835_POWERMGT,
232
+ .version_id = 1,
233
+ .minimum_version_id = 1,
234
+ .fields = (VMStateField[]) {
235
+ VMSTATE_UINT32(rstc, BCM2835PowerMgtState),
236
+ VMSTATE_UINT32(rsts, BCM2835PowerMgtState),
237
+ VMSTATE_UINT32(wdog, BCM2835PowerMgtState),
238
+ VMSTATE_END_OF_LIST()
239
+ }
240
+};
241
+
242
+static void bcm2835_powermgt_init(Object *obj)
243
+{
244
+ BCM2835PowerMgtState *s = BCM2835_POWERMGT(obj);
245
+
246
+ memory_region_init_io(&s->iomem, obj, &bcm2835_powermgt_ops, s,
247
+ TYPE_BCM2835_POWERMGT, 0x200);
248
+ sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
249
+}
250
+
251
+static void bcm2835_powermgt_reset(DeviceState *dev)
252
+{
253
+ BCM2835PowerMgtState *s = BCM2835_POWERMGT(dev);
254
+
255
+ /* https://elinux.org/BCM2835_registers#PM */
256
+ s->rstc = 0x00000102;
257
+ s->rsts = 0x00001000;
258
+ s->wdog = 0x00000000;
259
+}
260
+
261
+static void bcm2835_powermgt_class_init(ObjectClass *klass, void *data)
262
+{
263
+ DeviceClass *dc = DEVICE_CLASS(klass);
264
+
265
+ dc->reset = bcm2835_powermgt_reset;
266
+ dc->vmsd = &vmstate_bcm2835_powermgt;
267
+}
268
+
269
+static TypeInfo bcm2835_powermgt_info = {
270
+ .name = TYPE_BCM2835_POWERMGT,
271
+ .parent = TYPE_SYS_BUS_DEVICE,
272
+ .instance_size = sizeof(BCM2835PowerMgtState),
273
+ .class_init = bcm2835_powermgt_class_init,
274
+ .instance_init = bcm2835_powermgt_init,
275
+};
276
+
277
+static void bcm2835_powermgt_register_types(void)
278
+{
279
+ type_register_static(&bcm2835_powermgt_info);
280
+}
281
+
282
+type_init(bcm2835_powermgt_register_types)
283
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
284
index XXXXXXX..XXXXXXX 100644
269
index XXXXXXX..XXXXXXX 100644
285
--- a/hw/misc/meson.build
270
--- a/target/arm/tcg/translate-sve.c
286
+++ b/hw/misc/meson.build
271
+++ b/target/arm/tcg/translate-sve.c
287
@@ -XXX,XX +XXX,XX @@ softmmu_ss.add(when: 'CONFIG_RASPI', if_true: files(
272
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a)
288
'bcm2835_rng.c',
273
TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a)
289
'bcm2835_thermal.c',
274
TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a)
290
'bcm2835_cprman.c',
275
291
+ 'bcm2835_powermgt.c',
276
-static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
292
))
277
-{
293
softmmu_ss.add(when: 'CONFIG_SLAVIO', if_true: files('slavio_misc.c'))
278
- TCGv_i64 t = tcg_temp_new_i64();
294
softmmu_ss.add(when: 'CONFIG_ZYNQ', if_true: files('zynq_slcr.c', 'zynq-xadc.c'))
279
- uint64_t mask = dup_const(MO_8, 0xff >> sh);
280
-
281
- tcg_gen_xor_i64(t, n, m);
282
- tcg_gen_shri_i64(d, t, sh);
283
- tcg_gen_shli_i64(t, t, 8 - sh);
284
- tcg_gen_andi_i64(d, d, mask);
285
- tcg_gen_andi_i64(t, t, ~mask);
286
- tcg_gen_or_i64(d, d, t);
287
-}
288
-
289
-static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
290
-{
291
- TCGv_i64 t = tcg_temp_new_i64();
292
- uint64_t mask = dup_const(MO_16, 0xffff >> sh);
293
-
294
- tcg_gen_xor_i64(t, n, m);
295
- tcg_gen_shri_i64(d, t, sh);
296
- tcg_gen_shli_i64(t, t, 16 - sh);
297
- tcg_gen_andi_i64(d, d, mask);
298
- tcg_gen_andi_i64(t, t, ~mask);
299
- tcg_gen_or_i64(d, d, t);
300
-}
301
-
302
-static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
303
-{
304
- tcg_gen_xor_i32(d, n, m);
305
- tcg_gen_rotri_i32(d, d, sh);
306
-}
307
-
308
-static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
309
-{
310
- tcg_gen_xor_i64(d, n, m);
311
- tcg_gen_rotri_i64(d, d, sh);
312
-}
313
-
314
-static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
315
- TCGv_vec m, int64_t sh)
316
-{
317
- tcg_gen_xor_vec(vece, d, n, m);
318
- tcg_gen_rotri_vec(vece, d, d, sh);
319
-}
320
-
321
-void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
322
- uint32_t rm_ofs, int64_t shift,
323
- uint32_t opr_sz, uint32_t max_sz)
324
-{
325
- static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
326
- static const GVecGen3i ops[4] = {
327
- { .fni8 = gen_xar8_i64,
328
- .fniv = gen_xar_vec,
329
- .fno = gen_helper_sve2_xar_b,
330
- .opt_opc = vecop,
331
- .vece = MO_8 },
332
- { .fni8 = gen_xar16_i64,
333
- .fniv = gen_xar_vec,
334
- .fno = gen_helper_sve2_xar_h,
335
- .opt_opc = vecop,
336
- .vece = MO_16 },
337
- { .fni4 = gen_xar_i32,
338
- .fniv = gen_xar_vec,
339
- .fno = gen_helper_sve2_xar_s,
340
- .opt_opc = vecop,
341
- .vece = MO_32 },
342
- { .fni8 = gen_xar_i64,
343
- .fniv = gen_xar_vec,
344
- .fno = gen_helper_gvec_xar_d,
345
- .opt_opc = vecop,
346
- .vece = MO_64 }
347
- };
348
- int esize = 8 << vece;
349
-
350
- /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
351
- tcg_debug_assert(shift >= 0);
352
- tcg_debug_assert(shift <= esize);
353
- shift &= esize - 1;
354
-
355
- if (shift == 0) {
356
- /* xar with no rotate devolves to xor. */
357
- tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
358
- } else {
359
- tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
360
- shift, &ops[vece]);
361
- }
362
-}
363
-
364
static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
365
{
366
if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
367
@@ -XXX,XX +XXX,XX @@ static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
368
return true;
369
}
370
371
-static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
372
-{
373
- tcg_gen_xor_i64(d, n, m);
374
- tcg_gen_xor_i64(d, d, k);
375
-}
376
-
377
-static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
378
- TCGv_vec m, TCGv_vec k)
379
-{
380
- tcg_gen_xor_vec(vece, d, n, m);
381
- tcg_gen_xor_vec(vece, d, d, k);
382
-}
383
-
384
-static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
385
- uint32_t a, uint32_t oprsz, uint32_t maxsz)
386
-{
387
- static const GVecGen4 op = {
388
- .fni8 = gen_eor3_i64,
389
- .fniv = gen_eor3_vec,
390
- .fno = gen_helper_sve2_eor3,
391
- .vece = MO_64,
392
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
393
- };
394
- tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
395
-}
396
-
397
-TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a)
398
-
399
-static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
400
-{
401
- tcg_gen_andc_i64(d, m, k);
402
- tcg_gen_xor_i64(d, d, n);
403
-}
404
-
405
-static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
406
- TCGv_vec m, TCGv_vec k)
407
-{
408
- tcg_gen_andc_vec(vece, d, m, k);
409
- tcg_gen_xor_vec(vece, d, d, n);
410
-}
411
-
412
-static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
413
- uint32_t a, uint32_t oprsz, uint32_t maxsz)
414
-{
415
- static const GVecGen4 op = {
416
- .fni8 = gen_bcax_i64,
417
- .fniv = gen_bcax_vec,
418
- .fno = gen_helper_sve2_bcax,
419
- .vece = MO_64,
420
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
421
- };
422
- tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
423
-}
424
-
425
-TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a)
426
+TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_eor3, a)
427
+TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_bcax, a)
428
429
static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
430
uint32_t a, uint32_t oprsz, uint32_t maxsz)
431
diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build
432
index XXXXXXX..XXXXXXX 100644
433
--- a/target/arm/tcg/meson.build
434
+++ b/target/arm/tcg/meson.build
435
@@ -XXX,XX +XXX,XX @@ arm_ss.add(files(
436
437
arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
438
'cpu64.c',
439
+ 'gengvec64.c',
440
'translate-a64.c',
441
'translate-sve.c',
442
'translate-sme.c',
295
--
443
--
296
2.20.1
444
2.34.1
297
445
298
446
diff view generated by jsdifflib
1
Implement the MVE VSHLC insn, which performs a shift left of the
1
From: Richard Henderson <richard.henderson@linaro.org>
2
entire vector with carry in bits provided from a general purpose
3
register and carry out bits written back to that register.
4
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20240524232121.284515-10-richard.henderson@linaro.org
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20210628135835.6690-14-peter.maydell@linaro.org
8
---
7
---
9
target/arm/helper-mve.h | 2 ++
8
target/arm/tcg/a64.decode | 21 +++++++--
10
target/arm/mve.decode | 2 ++
9
target/arm/tcg/translate-a64.c | 86 +++++++++++++++-------------------
11
target/arm/mve_helper.c | 38 ++++++++++++++++++++++++++++++++++++++
10
2 files changed, 54 insertions(+), 53 deletions(-)
12
target/arm/translate-mve.c | 30 ++++++++++++++++++++++++++++++
13
4 files changed, 72 insertions(+)
14
11
15
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
12
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
16
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/helper-mve.h
14
--- a/target/arm/tcg/a64.decode
18
+++ b/target/arm/helper-mve.h
15
+++ b/target/arm/tcg/a64.decode
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vqrshrunbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
16
@@ -XXX,XX +XXX,XX @@
20
DEF_HELPER_FLAGS_4(mve_vqrshrunbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
17
# This file is processed by scripts/decodetree.py
21
DEF_HELPER_FLAGS_4(mve_vqrshruntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
18
#
22
DEF_HELPER_FLAGS_4(mve_vqrshrunth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
19
20
-&r rn
21
-&ri rd imm
22
-&rri_sf rd rn imm sf
23
-&i imm
24
+%rd 0:5
25
26
+&r rn
27
+&ri rd imm
28
+&rri_sf rd rn imm sf
29
+&i imm
30
+&qrr_e q rd rn esz
31
+&qrrr_e q rd rn rm esz
23
+
32
+
24
+DEF_HELPER_FLAGS_4(mve_vshlc, TCG_CALL_NO_WG, i32, env, ptr, i32, i32)
33
+@rr_q1e0 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=0
25
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
34
+@r2r_q1e0 ........ ........ ...... rm:5 rd:5 &qrrr_e rn=%rd q=1 esz=0
35
36
### Data Processing - Immediate
37
38
@@ -XXX,XX +XXX,XX @@ CPYFE 00 011 0 01100 ..... .... 01 ..... ..... @cpy
39
CPYP 00 011 1 01000 ..... .... 01 ..... ..... @cpy
40
CPYM 00 011 1 01010 ..... .... 01 ..... ..... @cpy
41
CPYE 00 011 1 01100 ..... .... 01 ..... ..... @cpy
42
+
43
+### Cryptographic AES
44
+
45
+AESE 01001110 00 10100 00100 10 ..... ..... @r2r_q1e0
46
+AESD 01001110 00 10100 00101 10 ..... ..... @r2r_q1e0
47
+AESMC 01001110 00 10100 00110 10 ..... ..... @rr_q1e0
48
+AESIMC 01001110 00 10100 00111 10 ..... ..... @rr_q1e0
49
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
26
index XXXXXXX..XXXXXXX 100644
50
index XXXXXXX..XXXXXXX 100644
27
--- a/target/arm/mve.decode
51
--- a/target/arm/tcg/translate-a64.c
28
+++ b/target/arm/mve.decode
52
+++ b/target/arm/tcg/translate-a64.c
29
@@ -XXX,XX +XXX,XX @@ VQRSHRUNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_b
53
@@ -XXX,XX +XXX,XX @@ bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
30
VQRSHRUNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_h
54
return true;
31
VQRSHRUNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_b
55
}
32
VQRSHRUNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_h
56
57
+/*
58
+ * Expanders for AdvSIMD translation functions.
59
+ */
33
+
60
+
34
+VSHLC 111 0 1110 1 . 1 imm:5 ... 0 1111 1100 rdm:4 qd=%qd
61
+static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data,
35
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
62
+ gen_helper_gvec_2 *fn)
36
index XXXXXXX..XXXXXXX 100644
37
--- a/target/arm/mve_helper.c
38
+++ b/target/arm/mve_helper.c
39
@@ -XXX,XX +XXX,XX @@ DO_VSHRN_SAT_UB(vqrshrnb_ub, vqrshrnt_ub, DO_RSHRN_UB)
40
DO_VSHRN_SAT_UH(vqrshrnb_uh, vqrshrnt_uh, DO_RSHRN_UH)
41
DO_VSHRN_SAT_SB(vqrshrunbb, vqrshruntb, DO_RSHRUN_B)
42
DO_VSHRN_SAT_SH(vqrshrunbh, vqrshrunth, DO_RSHRUN_H)
43
+
44
+uint32_t HELPER(mve_vshlc)(CPUARMState *env, void *vd, uint32_t rdm,
45
+ uint32_t shift)
46
+{
63
+{
47
+ uint32_t *d = vd;
64
+ if (!a->q && a->esz == MO_64) {
48
+ uint16_t mask = mve_element_mask(env);
49
+ unsigned e;
50
+ uint32_t r;
51
+
52
+ /*
53
+ * For each 32-bit element, we shift it left, bringing in the
54
+ * low 'shift' bits of rdm at the bottom. Bits shifted out at
55
+ * the top become the new rdm, if the predicate mask permits.
56
+ * The final rdm value is returned to update the register.
57
+ * shift == 0 here means "shift by 32 bits".
58
+ */
59
+ if (shift == 0) {
60
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) {
61
+ r = rdm;
62
+ if (mask & 1) {
63
+ rdm = d[H4(e)];
64
+ }
65
+ mergemask(&d[H4(e)], r, mask);
66
+ }
67
+ } else {
68
+ uint32_t shiftmask = MAKE_64BIT_MASK(0, shift);
69
+
70
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) {
71
+ r = (d[H4(e)] << shift) | (rdm & shiftmask);
72
+ if (mask & 1) {
73
+ rdm = d[H4(e)] >> (32 - shift);
74
+ }
75
+ mergemask(&d[H4(e)], r, mask);
76
+ }
77
+ }
78
+ mve_advance_vpt(env);
79
+ return rdm;
80
+}
81
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
82
index XXXXXXX..XXXXXXX 100644
83
--- a/target/arm/translate-mve.c
84
+++ b/target/arm/translate-mve.c
85
@@ -XXX,XX +XXX,XX @@ DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
86
DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
87
DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
88
DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
89
+
90
+static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a)
91
+{
92
+ /*
93
+ * Whole Vector Left Shift with Carry. The carry is taken
94
+ * from a general purpose register and written back there.
95
+ * An imm of 0 means "shift by 32".
96
+ */
97
+ TCGv_ptr qd;
98
+ TCGv_i32 rdm;
99
+
100
+ if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
101
+ return false;
65
+ return false;
102
+ }
66
+ }
103
+ if (a->rdm == 13 || a->rdm == 15) {
67
+ if (fp_access_check(s)) {
104
+ /* CONSTRAINED UNPREDICTABLE: we UNDEF */
68
+ gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn);
69
+ }
70
+ return true;
71
+}
72
+
73
+static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data,
74
+ gen_helper_gvec_3 *fn)
75
+{
76
+ if (!a->q && a->esz == MO_64) {
105
+ return false;
77
+ return false;
106
+ }
78
+ }
107
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
79
+ if (fp_access_check(s)) {
108
+ return true;
80
+ gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn);
109
+ }
81
+ }
110
+
111
+ qd = mve_qreg_ptr(a->qd);
112
+ rdm = load_reg(s, a->rdm);
113
+ gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm));
114
+ store_reg(s, a->rdm, rdm);
115
+ tcg_temp_free_ptr(qd);
116
+ mve_update_eci(s);
117
+ return true;
82
+ return true;
118
+}
83
+}
84
+
85
/*
86
* This utility function is for doing register extension with an
87
* optional shift. You will likely want to pass a temporary for the
88
@@ -XXX,XX +XXX,XX @@ static bool trans_EXTR(DisasContext *s, arg_extract *a)
89
return true;
90
}
91
92
+/*
93
+ * Cryptographic AES
94
+ */
95
+
96
+TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese)
97
+TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd)
98
+TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc)
99
+TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc)
100
+
101
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
102
* Note that it is the caller's responsibility to ensure that the
103
* shift amount is in range (ie 0..31 or 0..63) and provide the ARM
104
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
105
}
106
}
107
108
-/* Crypto AES
109
- * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
110
- * +-----------------+------+-----------+--------+-----+------+------+
111
- * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
112
- * +-----------------+------+-----------+--------+-----+------+------+
113
- */
114
-static void disas_crypto_aes(DisasContext *s, uint32_t insn)
115
-{
116
- int size = extract32(insn, 22, 2);
117
- int opcode = extract32(insn, 12, 5);
118
- int rn = extract32(insn, 5, 5);
119
- int rd = extract32(insn, 0, 5);
120
- gen_helper_gvec_2 *genfn2 = NULL;
121
- gen_helper_gvec_3 *genfn3 = NULL;
122
-
123
- if (!dc_isar_feature(aa64_aes, s) || size != 0) {
124
- unallocated_encoding(s);
125
- return;
126
- }
127
-
128
- switch (opcode) {
129
- case 0x4: /* AESE */
130
- genfn3 = gen_helper_crypto_aese;
131
- break;
132
- case 0x6: /* AESMC */
133
- genfn2 = gen_helper_crypto_aesmc;
134
- break;
135
- case 0x5: /* AESD */
136
- genfn3 = gen_helper_crypto_aesd;
137
- break;
138
- case 0x7: /* AESIMC */
139
- genfn2 = gen_helper_crypto_aesimc;
140
- break;
141
- default:
142
- unallocated_encoding(s);
143
- return;
144
- }
145
-
146
- if (!fp_access_check(s)) {
147
- return;
148
- }
149
- if (genfn2) {
150
- gen_gvec_op2_ool(s, true, rd, rn, 0, genfn2);
151
- } else {
152
- gen_gvec_op3_ool(s, true, rd, rd, rn, 0, genfn3);
153
- }
154
-}
155
-
156
/* Crypto three-reg SHA
157
* 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
158
* +-----------------+------+---+------+---+--------+-----+------+------+
159
@@ -XXX,XX +XXX,XX @@ static const AArch64DecodeTable data_proc_simd[] = {
160
{ 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
161
{ 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
162
{ 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
163
- { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
164
{ 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
165
{ 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
166
{ 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
119
--
167
--
120
2.20.1
168
2.34.1
121
122
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20240524232121.284515-11-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/tcg/a64.decode | 11 +++++
9
target/arm/tcg/translate-a64.c | 78 +++++-----------------------------
10
2 files changed, 21 insertions(+), 68 deletions(-)
11
12
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/tcg/a64.decode
15
+++ b/target/arm/tcg/a64.decode
16
@@ -XXX,XX +XXX,XX @@
17
18
@rr_q1e0 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=0
19
@r2r_q1e0 ........ ........ ...... rm:5 rd:5 &qrrr_e rn=%rd q=1 esz=0
20
+@rrr_q1e0 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=0
21
22
### Data Processing - Immediate
23
24
@@ -XXX,XX +XXX,XX @@ AESE 01001110 00 10100 00100 10 ..... ..... @r2r_q1e0
25
AESD 01001110 00 10100 00101 10 ..... ..... @r2r_q1e0
26
AESMC 01001110 00 10100 00110 10 ..... ..... @rr_q1e0
27
AESIMC 01001110 00 10100 00111 10 ..... ..... @rr_q1e0
28
+
29
+### Cryptographic three-register SHA
30
+
31
+SHA1C 0101 1110 000 ..... 000000 ..... ..... @rrr_q1e0
32
+SHA1P 0101 1110 000 ..... 000100 ..... ..... @rrr_q1e0
33
+SHA1M 0101 1110 000 ..... 001000 ..... ..... @rrr_q1e0
34
+SHA1SU0 0101 1110 000 ..... 001100 ..... ..... @rrr_q1e0
35
+SHA256H 0101 1110 000 ..... 010000 ..... ..... @rrr_q1e0
36
+SHA256H2 0101 1110 000 ..... 010100 ..... ..... @rrr_q1e0
37
+SHA256SU1 0101 1110 000 ..... 011000 ..... ..... @rrr_q1e0
38
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/tcg/translate-a64.c
41
+++ b/target/arm/tcg/translate-a64.c
42
@@ -XXX,XX +XXX,XX @@ static bool trans_EXTR(DisasContext *s, arg_extract *a)
43
}
44
45
/*
46
- * Cryptographic AES
47
+ * Cryptographic AES, SHA
48
*/
49
50
TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese)
51
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd)
52
TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc)
53
TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc)
54
55
+TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c)
56
+TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p)
57
+TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m)
58
+TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0)
59
+
60
+TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h)
61
+TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2)
62
+TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1)
63
+
64
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
65
* Note that it is the caller's responsibility to ensure that the
66
* shift amount is in range (ie 0..31 or 0..63) and provide the ARM
67
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
68
}
69
}
70
71
-/* Crypto three-reg SHA
72
- * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
73
- * +-----------------+------+---+------+---+--------+-----+------+------+
74
- * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd |
75
- * +-----------------+------+---+------+---+--------+-----+------+------+
76
- */
77
-static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
78
-{
79
- int size = extract32(insn, 22, 2);
80
- int opcode = extract32(insn, 12, 3);
81
- int rm = extract32(insn, 16, 5);
82
- int rn = extract32(insn, 5, 5);
83
- int rd = extract32(insn, 0, 5);
84
- gen_helper_gvec_3 *genfn;
85
- bool feature;
86
-
87
- if (size != 0) {
88
- unallocated_encoding(s);
89
- return;
90
- }
91
-
92
- switch (opcode) {
93
- case 0: /* SHA1C */
94
- genfn = gen_helper_crypto_sha1c;
95
- feature = dc_isar_feature(aa64_sha1, s);
96
- break;
97
- case 1: /* SHA1P */
98
- genfn = gen_helper_crypto_sha1p;
99
- feature = dc_isar_feature(aa64_sha1, s);
100
- break;
101
- case 2: /* SHA1M */
102
- genfn = gen_helper_crypto_sha1m;
103
- feature = dc_isar_feature(aa64_sha1, s);
104
- break;
105
- case 3: /* SHA1SU0 */
106
- genfn = gen_helper_crypto_sha1su0;
107
- feature = dc_isar_feature(aa64_sha1, s);
108
- break;
109
- case 4: /* SHA256H */
110
- genfn = gen_helper_crypto_sha256h;
111
- feature = dc_isar_feature(aa64_sha256, s);
112
- break;
113
- case 5: /* SHA256H2 */
114
- genfn = gen_helper_crypto_sha256h2;
115
- feature = dc_isar_feature(aa64_sha256, s);
116
- break;
117
- case 6: /* SHA256SU1 */
118
- genfn = gen_helper_crypto_sha256su1;
119
- feature = dc_isar_feature(aa64_sha256, s);
120
- break;
121
- default:
122
- unallocated_encoding(s);
123
- return;
124
- }
125
-
126
- if (!feature) {
127
- unallocated_encoding(s);
128
- return;
129
- }
130
-
131
- if (!fp_access_check(s)) {
132
- return;
133
- }
134
- gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
135
-}
136
-
137
/* Crypto two-reg SHA
138
* 31 24 23 22 21 17 16 12 11 10 9 5 4 0
139
* +-----------------+------+-----------+--------+-----+------+------+
140
@@ -XXX,XX +XXX,XX @@ static const AArch64DecodeTable data_proc_simd[] = {
141
{ 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
142
{ 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
143
{ 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
144
- { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
145
{ 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
146
{ 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
147
{ 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
148
--
149
2.34.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20240524232121.284515-12-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/tcg/a64.decode | 6 ++++
9
target/arm/tcg/translate-a64.c | 54 +++-------------------------------
10
2 files changed, 10 insertions(+), 50 deletions(-)
11
12
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/tcg/a64.decode
15
+++ b/target/arm/tcg/a64.decode
16
@@ -XXX,XX +XXX,XX @@ SHA1SU0 0101 1110 000 ..... 001100 ..... ..... @rrr_q1e0
17
SHA256H 0101 1110 000 ..... 010000 ..... ..... @rrr_q1e0
18
SHA256H2 0101 1110 000 ..... 010100 ..... ..... @rrr_q1e0
19
SHA256SU1 0101 1110 000 ..... 011000 ..... ..... @rrr_q1e0
20
+
21
+### Cryptographic two-register SHA
22
+
23
+SHA1H 0101 1110 0010 1000 0000 10 ..... ..... @rr_q1e0
24
+SHA1SU1 0101 1110 0010 1000 0001 10 ..... ..... @rr_q1e0
25
+SHA256SU0 0101 1110 0010 1000 0010 10 ..... ..... @rr_q1e0
26
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/arm/tcg/translate-a64.c
29
+++ b/target/arm/tcg/translate-a64.c
30
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256
31
TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2)
32
TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1)
33
34
+TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h)
35
+TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1)
36
+TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0)
37
+
38
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
39
* Note that it is the caller's responsibility to ensure that the
40
* shift amount is in range (ie 0..31 or 0..63) and provide the ARM
41
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
42
}
43
}
44
45
-/* Crypto two-reg SHA
46
- * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
47
- * +-----------------+------+-----------+--------+-----+------+------+
48
- * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
49
- * +-----------------+------+-----------+--------+-----+------+------+
50
- */
51
-static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
52
-{
53
- int size = extract32(insn, 22, 2);
54
- int opcode = extract32(insn, 12, 5);
55
- int rn = extract32(insn, 5, 5);
56
- int rd = extract32(insn, 0, 5);
57
- gen_helper_gvec_2 *genfn;
58
- bool feature;
59
-
60
- if (size != 0) {
61
- unallocated_encoding(s);
62
- return;
63
- }
64
-
65
- switch (opcode) {
66
- case 0: /* SHA1H */
67
- feature = dc_isar_feature(aa64_sha1, s);
68
- genfn = gen_helper_crypto_sha1h;
69
- break;
70
- case 1: /* SHA1SU1 */
71
- feature = dc_isar_feature(aa64_sha1, s);
72
- genfn = gen_helper_crypto_sha1su1;
73
- break;
74
- case 2: /* SHA256SU0 */
75
- feature = dc_isar_feature(aa64_sha256, s);
76
- genfn = gen_helper_crypto_sha256su0;
77
- break;
78
- default:
79
- unallocated_encoding(s);
80
- return;
81
- }
82
-
83
- if (!feature) {
84
- unallocated_encoding(s);
85
- return;
86
- }
87
-
88
- if (!fp_access_check(s)) {
89
- return;
90
- }
91
- gen_gvec_op2_ool(s, true, rd, rn, 0, genfn);
92
-}
93
-
94
/* Crypto three-reg SHA512
95
* 31 21 20 16 15 14 13 12 11 10 9 5 4 0
96
* +-----------------------+------+---+---+-----+--------+------+------+
97
@@ -XXX,XX +XXX,XX @@ static const AArch64DecodeTable data_proc_simd[] = {
98
{ 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
99
{ 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
100
{ 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
101
- { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
102
{ 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
103
{ 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
104
{ 0xce000000, 0xff808000, disas_crypto_four_reg },
105
--
106
2.34.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20240524232121.284515-13-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/tcg/a64.decode | 11 ++++
9
target/arm/tcg/translate-a64.c | 97 ++++++++--------------------------
10
2 files changed, 32 insertions(+), 76 deletions(-)
11
12
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/tcg/a64.decode
15
+++ b/target/arm/tcg/a64.decode
16
@@ -XXX,XX +XXX,XX @@
17
@rr_q1e0 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=0
18
@r2r_q1e0 ........ ........ ...... rm:5 rd:5 &qrrr_e rn=%rd q=1 esz=0
19
@rrr_q1e0 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=0
20
+@rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3
21
22
### Data Processing - Immediate
23
24
@@ -XXX,XX +XXX,XX @@ SHA256SU1 0101 1110 000 ..... 011000 ..... ..... @rrr_q1e0
25
SHA1H 0101 1110 0010 1000 0000 10 ..... ..... @rr_q1e0
26
SHA1SU1 0101 1110 0010 1000 0001 10 ..... ..... @rr_q1e0
27
SHA256SU0 0101 1110 0010 1000 0010 10 ..... ..... @rr_q1e0
28
+
29
+### Cryptographic three-register SHA512
30
+
31
+SHA512H 1100 1110 011 ..... 100000 ..... ..... @rrr_q1e0
32
+SHA512H2 1100 1110 011 ..... 100001 ..... ..... @rrr_q1e0
33
+SHA512SU1 1100 1110 011 ..... 100010 ..... ..... @rrr_q1e0
34
+RAX1 1100 1110 011 ..... 100011 ..... ..... @rrr_q1e3
35
+SM3PARTW1 1100 1110 011 ..... 110000 ..... ..... @rrr_q1e0
36
+SM3PARTW2 1100 1110 011 ..... 110001 ..... ..... @rrr_q1e0
37
+SM4EKEY 1100 1110 011 ..... 110010 ..... ..... @rrr_q1e0
38
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/tcg/translate-a64.c
41
+++ b/target/arm/tcg/translate-a64.c
42
@@ -XXX,XX +XXX,XX @@ static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data,
43
return true;
44
}
45
46
+static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
47
+{
48
+ if (!a->q && a->esz == MO_64) {
49
+ return false;
50
+ }
51
+ if (fp_access_check(s)) {
52
+ gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
53
+ }
54
+ return true;
55
+}
56
+
57
/*
58
* This utility function is for doing register extension with an
59
* optional shift. You will likely want to pass a temporary for the
60
@@ -XXX,XX +XXX,XX @@ static bool trans_EXTR(DisasContext *s, arg_extract *a)
61
}
62
63
/*
64
- * Cryptographic AES, SHA
65
+ * Cryptographic AES, SHA, SHA512
66
*/
67
68
TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese)
69
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h)
70
TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1)
71
TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0)
72
73
+TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h)
74
+TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2)
75
+TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1)
76
+TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1)
77
+TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1)
78
+TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2)
79
+TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey)
80
+
81
+
82
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
83
* Note that it is the caller's responsibility to ensure that the
84
* shift amount is in range (ie 0..31 or 0..63) and provide the ARM
85
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
86
}
87
}
88
89
-/* Crypto three-reg SHA512
90
- * 31 21 20 16 15 14 13 12 11 10 9 5 4 0
91
- * +-----------------------+------+---+---+-----+--------+------+------+
92
- * | 1 1 0 0 1 1 1 0 0 1 1 | Rm | 1 | O | 0 0 | opcode | Rn | Rd |
93
- * +-----------------------+------+---+---+-----+--------+------+------+
94
- */
95
-static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
96
-{
97
- int opcode = extract32(insn, 10, 2);
98
- int o = extract32(insn, 14, 1);
99
- int rm = extract32(insn, 16, 5);
100
- int rn = extract32(insn, 5, 5);
101
- int rd = extract32(insn, 0, 5);
102
- bool feature;
103
- gen_helper_gvec_3 *oolfn = NULL;
104
- GVecGen3Fn *gvecfn = NULL;
105
-
106
- if (o == 0) {
107
- switch (opcode) {
108
- case 0: /* SHA512H */
109
- feature = dc_isar_feature(aa64_sha512, s);
110
- oolfn = gen_helper_crypto_sha512h;
111
- break;
112
- case 1: /* SHA512H2 */
113
- feature = dc_isar_feature(aa64_sha512, s);
114
- oolfn = gen_helper_crypto_sha512h2;
115
- break;
116
- case 2: /* SHA512SU1 */
117
- feature = dc_isar_feature(aa64_sha512, s);
118
- oolfn = gen_helper_crypto_sha512su1;
119
- break;
120
- case 3: /* RAX1 */
121
- feature = dc_isar_feature(aa64_sha3, s);
122
- gvecfn = gen_gvec_rax1;
123
- break;
124
- default:
125
- g_assert_not_reached();
126
- }
127
- } else {
128
- switch (opcode) {
129
- case 0: /* SM3PARTW1 */
130
- feature = dc_isar_feature(aa64_sm3, s);
131
- oolfn = gen_helper_crypto_sm3partw1;
132
- break;
133
- case 1: /* SM3PARTW2 */
134
- feature = dc_isar_feature(aa64_sm3, s);
135
- oolfn = gen_helper_crypto_sm3partw2;
136
- break;
137
- case 2: /* SM4EKEY */
138
- feature = dc_isar_feature(aa64_sm4, s);
139
- oolfn = gen_helper_crypto_sm4ekey;
140
- break;
141
- default:
142
- unallocated_encoding(s);
143
- return;
144
- }
145
- }
146
-
147
- if (!feature) {
148
- unallocated_encoding(s);
149
- return;
150
- }
151
-
152
- if (!fp_access_check(s)) {
153
- return;
154
- }
155
-
156
- if (oolfn) {
157
- gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn);
158
- } else {
159
- gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64);
160
- }
161
-}
162
-
163
/* Crypto two-reg SHA512
164
* 31 12 11 10 9 5 4 0
165
* +-----------------------------------------+--------+------+------+
166
@@ -XXX,XX +XXX,XX @@ static const AArch64DecodeTable data_proc_simd[] = {
167
{ 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
168
{ 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
169
{ 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
170
- { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
171
{ 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
172
{ 0xce000000, 0xff808000, disas_crypto_four_reg },
173
{ 0xce800000, 0xffe00000, disas_crypto_xar },
174
--
175
2.34.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20240524232121.284515-14-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/tcg/a64.decode | 5 ++++
9
target/arm/tcg/translate-a64.c | 50 ++--------------------------------
10
2 files changed, 8 insertions(+), 47 deletions(-)
11
12
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/tcg/a64.decode
15
+++ b/target/arm/tcg/a64.decode
16
@@ -XXX,XX +XXX,XX @@ RAX1 1100 1110 011 ..... 100011 ..... ..... @rrr_q1e3
17
SM3PARTW1 1100 1110 011 ..... 110000 ..... ..... @rrr_q1e0
18
SM3PARTW2 1100 1110 011 ..... 110001 ..... ..... @rrr_q1e0
19
SM4EKEY 1100 1110 011 ..... 110010 ..... ..... @rrr_q1e0
20
+
21
+### Cryptographic two-register SHA512
22
+
23
+SHA512SU0 1100 1110 110 00000 100000 ..... ..... @rr_q1e0
24
+SM4E 1100 1110 110 00000 100001 ..... ..... @r2r_q1e0
25
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/target/arm/tcg/translate-a64.c
28
+++ b/target/arm/tcg/translate-a64.c
29
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3part
30
TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2)
31
TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey)
32
33
+TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0)
34
+TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e)
35
+
36
37
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
38
* Note that it is the caller's responsibility to ensure that the
39
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
40
}
41
}
42
43
-/* Crypto two-reg SHA512
44
- * 31 12 11 10 9 5 4 0
45
- * +-----------------------------------------+--------+------+------+
46
- * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode | Rn | Rd |
47
- * +-----------------------------------------+--------+------+------+
48
- */
49
-static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
50
-{
51
- int opcode = extract32(insn, 10, 2);
52
- int rn = extract32(insn, 5, 5);
53
- int rd = extract32(insn, 0, 5);
54
- bool feature;
55
-
56
- switch (opcode) {
57
- case 0: /* SHA512SU0 */
58
- feature = dc_isar_feature(aa64_sha512, s);
59
- break;
60
- case 1: /* SM4E */
61
- feature = dc_isar_feature(aa64_sm4, s);
62
- break;
63
- default:
64
- unallocated_encoding(s);
65
- return;
66
- }
67
-
68
- if (!feature) {
69
- unallocated_encoding(s);
70
- return;
71
- }
72
-
73
- if (!fp_access_check(s)) {
74
- return;
75
- }
76
-
77
- switch (opcode) {
78
- case 0: /* SHA512SU0 */
79
- gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0);
80
- break;
81
- case 1: /* SM4E */
82
- gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e);
83
- break;
84
- default:
85
- g_assert_not_reached();
86
- }
87
-}
88
-
89
/* Crypto four-register
90
* 31 23 22 21 20 16 15 14 10 9 5 4 0
91
* +-------------------+-----+------+---+------+------+------+
92
@@ -XXX,XX +XXX,XX @@ static const AArch64DecodeTable data_proc_simd[] = {
93
{ 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
94
{ 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
95
{ 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
96
- { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
97
{ 0xce000000, 0xff808000, disas_crypto_four_reg },
98
{ 0xce800000, 0xffe00000, disas_crypto_xar },
99
{ 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
100
--
101
2.34.1
diff view generated by jsdifflib
1
Implement the MVE VADDLV insn; this is similar to VADDV, except
1
From: Richard Henderson <richard.henderson@linaro.org>
2
that it accumulates 32-bit elements into a 64-bit accumulator
3
stored in a pair of general-purpose registers.
4
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20240524232121.284515-15-richard.henderson@linaro.org
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20210628135835.6690-15-peter.maydell@linaro.org
8
---
7
---
9
target/arm/helper-mve.h | 3 ++
8
target/arm/tcg/a64.decode | 8 ++
10
target/arm/mve.decode | 6 +++-
9
target/arm/tcg/translate-a64.c | 132 +++++++++++----------------------
11
target/arm/mve_helper.c | 19 ++++++++++++
10
2 files changed, 51 insertions(+), 89 deletions(-)
12
target/arm/translate-mve.c | 63 ++++++++++++++++++++++++++++++++++++++
13
4 files changed, 90 insertions(+), 1 deletion(-)
14
11
15
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
12
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
16
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/helper-mve.h
14
--- a/target/arm/tcg/a64.decode
18
+++ b/target/arm/helper-mve.h
15
+++ b/target/arm/tcg/a64.decode
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(mve_vaddvuh, TCG_CALL_NO_WG, i32, env, ptr, i32)
16
@@ -XXX,XX +XXX,XX @@
20
DEF_HELPER_FLAGS_3(mve_vaddvsw, TCG_CALL_NO_WG, i32, env, ptr, i32)
17
&i imm
21
DEF_HELPER_FLAGS_3(mve_vaddvuw, TCG_CALL_NO_WG, i32, env, ptr, i32)
18
&qrr_e q rd rn esz
22
19
&qrrr_e q rd rn rm esz
23
+DEF_HELPER_FLAGS_3(mve_vaddlv_s, TCG_CALL_NO_WG, i64, env, ptr, i64)
20
+&qrrrr_e q rd rn rm ra esz
24
+DEF_HELPER_FLAGS_3(mve_vaddlv_u, TCG_CALL_NO_WG, i64, env, ptr, i64)
21
25
+
22
@rr_q1e0 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=0
26
DEF_HELPER_FLAGS_3(mve_vmovi, TCG_CALL_NO_WG, void, env, ptr, i64)
23
@r2r_q1e0 ........ ........ ...... rm:5 rd:5 &qrrr_e rn=%rd q=1 esz=0
27
DEF_HELPER_FLAGS_3(mve_vandi, TCG_CALL_NO_WG, void, env, ptr, i64)
24
@rrr_q1e0 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=0
28
DEF_HELPER_FLAGS_3(mve_vorri, TCG_CALL_NO_WG, void, env, ptr, i64)
25
@rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3
29
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
26
+@rrrr_q1e3 ........ ... rm:5 . ra:5 rn:5 rd:5 &qrrrr_e q=1 esz=3
27
28
### Data Processing - Immediate
29
30
@@ -XXX,XX +XXX,XX @@ SM4EKEY 1100 1110 011 ..... 110010 ..... ..... @rrr_q1e0
31
32
SHA512SU0 1100 1110 110 00000 100000 ..... ..... @rr_q1e0
33
SM4E 1100 1110 110 00000 100001 ..... ..... @r2r_q1e0
34
+
35
+### Cryptographic four-register
36
+
37
+EOR3 1100 1110 000 ..... 0 ..... ..... ..... @rrrr_q1e3
38
+BCAX 1100 1110 001 ..... 0 ..... ..... ..... @rrrr_q1e3
39
+SM3SS1 1100 1110 010 ..... 0 ..... ..... ..... @rrrr_q1e3
40
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
30
index XXXXXXX..XXXXXXX 100644
41
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/mve.decode
42
--- a/target/arm/tcg/translate-a64.c
32
+++ b/target/arm/mve.decode
43
+++ b/target/arm/tcg/translate-a64.c
33
@@ -XXX,XX +XXX,XX @@ VQDMULH_scalar 1110 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar
44
@@ -XXX,XX +XXX,XX @@ static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
34
VQRDMULH_scalar 1111 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar
35
36
# Vector add across vector
37
-VADDV 111 u:1 1110 1111 size:2 01 ... 0 1111 0 0 a:1 0 qm:3 0 rda=%rdalo
38
+{
39
+ VADDV 111 u:1 1110 1111 size:2 01 ... 0 1111 0 0 a:1 0 qm:3 0 rda=%rdalo
40
+ VADDLV 111 u:1 1110 1 ... 1001 ... 0 1111 00 a:1 0 qm:3 0 \
41
+ rdahi=%rdahi rdalo=%rdalo
42
+}
43
44
# Predicate operations
45
%mask_22_13 22:1 13:3
46
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/mve_helper.c
49
+++ b/target/arm/mve_helper.c
50
@@ -XXX,XX +XXX,XX @@ DO_VADDV(vaddvub, 1, uint8_t)
51
DO_VADDV(vaddvuh, 2, uint16_t)
52
DO_VADDV(vaddvuw, 4, uint32_t)
53
54
+#define DO_VADDLV(OP, TYPE, LTYPE) \
55
+ uint64_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vm, \
56
+ uint64_t ra) \
57
+ { \
58
+ uint16_t mask = mve_element_mask(env); \
59
+ unsigned e; \
60
+ TYPE *m = vm; \
61
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) { \
62
+ if (mask & 1) { \
63
+ ra += (LTYPE)m[H4(e)]; \
64
+ } \
65
+ } \
66
+ mve_advance_vpt(env); \
67
+ return ra; \
68
+ } \
69
+
70
+DO_VADDLV(vaddlv_s, int32_t, int64_t)
71
+DO_VADDLV(vaddlv_u, uint32_t, uint64_t)
72
+
73
/* Shifts by immediate */
74
#define DO_2SHIFT(OP, ESIZE, TYPE, FN) \
75
void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
76
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
77
index XXXXXXX..XXXXXXX 100644
78
--- a/target/arm/translate-mve.c
79
+++ b/target/arm/translate-mve.c
80
@@ -XXX,XX +XXX,XX @@ static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
81
return true;
45
return true;
82
}
46
}
83
47
84
+static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a)
48
+static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn)
85
+{
49
+{
86
+ /*
50
+ if (!a->q && a->esz == MO_64) {
87
+ * Vector Add Long Across Vector: accumulate the 32-bit
88
+ * elements of the vector into a 64-bit result stored in
89
+ * a pair of general-purpose registers.
90
+ * No need to check Qm's bank: it is only 3 bits in decode.
91
+ */
92
+ TCGv_ptr qm;
93
+ TCGv_i64 rda;
94
+ TCGv_i32 rdalo, rdahi;
95
+
96
+ if (!dc_isar_feature(aa32_mve, s)) {
97
+ return false;
51
+ return false;
98
+ }
52
+ }
99
+ /*
53
+ if (fp_access_check(s)) {
100
+ * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
54
+ gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz);
101
+ * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
55
+ }
102
+ */
103
+ if (a->rdahi == 13 || a->rdahi == 15) {
104
+ return false;
105
+ }
106
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
107
+ return true;
108
+ }
109
+
110
+ /*
111
+ * This insn is subject to beat-wise execution. Partial execution
112
+ * of an A=0 (no-accumulate) insn which does not execute the first
113
+ * beat must start with the current value of RdaHi:RdaLo, not zero.
114
+ */
115
+ if (a->a || mve_skip_first_beat(s)) {
116
+ /* Accumulate input from RdaHi:RdaLo */
117
+ rda = tcg_temp_new_i64();
118
+ rdalo = load_reg(s, a->rdalo);
119
+ rdahi = load_reg(s, a->rdahi);
120
+ tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
121
+ tcg_temp_free_i32(rdalo);
122
+ tcg_temp_free_i32(rdahi);
123
+ } else {
124
+ /* Accumulate starting at zero */
125
+ rda = tcg_const_i64(0);
126
+ }
127
+
128
+ qm = mve_qreg_ptr(a->qm);
129
+ if (a->u) {
130
+ gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda);
131
+ } else {
132
+ gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda);
133
+ }
134
+ tcg_temp_free_ptr(qm);
135
+
136
+ rdalo = tcg_temp_new_i32();
137
+ rdahi = tcg_temp_new_i32();
138
+ tcg_gen_extrl_i64_i32(rdalo, rda);
139
+ tcg_gen_extrh_i64_i32(rdahi, rda);
140
+ store_reg(s, a->rdalo, rdalo);
141
+ store_reg(s, a->rdahi, rdahi);
142
+ tcg_temp_free_i64(rda);
143
+ mve_update_eci(s);
144
+ return true;
56
+ return true;
145
+}
57
+}
146
+
58
+
147
static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn)
59
/*
148
{
60
* This utility function is for doing register extension with an
149
TCGv_ptr qd;
61
* optional shift. You will likely want to pass a temporary for the
62
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey)
63
TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0)
64
TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e)
65
66
+TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3)
67
+TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax)
68
+
69
+static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a)
70
+{
71
+ if (!dc_isar_feature(aa64_sm3, s)) {
72
+ return false;
73
+ }
74
+ if (fp_access_check(s)) {
75
+ TCGv_i32 tcg_op1 = tcg_temp_new_i32();
76
+ TCGv_i32 tcg_op2 = tcg_temp_new_i32();
77
+ TCGv_i32 tcg_op3 = tcg_temp_new_i32();
78
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
79
+ unsigned vsz, dofs;
80
+
81
+ read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32);
82
+ read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32);
83
+ read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32);
84
+
85
+ tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
86
+ tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
87
+ tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
88
+ tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
89
+
90
+ /* Clear the whole register first, then store bits [127:96]. */
91
+ vsz = vec_full_reg_size(s);
92
+ dofs = vec_full_reg_offset(s, a->rd);
93
+ tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
94
+ write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32);
95
+ }
96
+ return true;
97
+}
98
99
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
100
* Note that it is the caller's responsibility to ensure that the
101
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
102
}
103
}
104
105
-/* Crypto four-register
106
- * 31 23 22 21 20 16 15 14 10 9 5 4 0
107
- * +-------------------+-----+------+---+------+------+------+
108
- * | 1 1 0 0 1 1 1 0 0 | Op0 | Rm | 0 | Ra | Rn | Rd |
109
- * +-------------------+-----+------+---+------+------+------+
110
- */
111
-static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
112
-{
113
- int op0 = extract32(insn, 21, 2);
114
- int rm = extract32(insn, 16, 5);
115
- int ra = extract32(insn, 10, 5);
116
- int rn = extract32(insn, 5, 5);
117
- int rd = extract32(insn, 0, 5);
118
- bool feature;
119
-
120
- switch (op0) {
121
- case 0: /* EOR3 */
122
- case 1: /* BCAX */
123
- feature = dc_isar_feature(aa64_sha3, s);
124
- break;
125
- case 2: /* SM3SS1 */
126
- feature = dc_isar_feature(aa64_sm3, s);
127
- break;
128
- default:
129
- unallocated_encoding(s);
130
- return;
131
- }
132
-
133
- if (!feature) {
134
- unallocated_encoding(s);
135
- return;
136
- }
137
-
138
- if (!fp_access_check(s)) {
139
- return;
140
- }
141
-
142
- if (op0 < 2) {
143
- TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
144
- int pass;
145
-
146
- tcg_op1 = tcg_temp_new_i64();
147
- tcg_op2 = tcg_temp_new_i64();
148
- tcg_op3 = tcg_temp_new_i64();
149
- tcg_res[0] = tcg_temp_new_i64();
150
- tcg_res[1] = tcg_temp_new_i64();
151
-
152
- for (pass = 0; pass < 2; pass++) {
153
- read_vec_element(s, tcg_op1, rn, pass, MO_64);
154
- read_vec_element(s, tcg_op2, rm, pass, MO_64);
155
- read_vec_element(s, tcg_op3, ra, pass, MO_64);
156
-
157
- if (op0 == 0) {
158
- /* EOR3 */
159
- tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
160
- } else {
161
- /* BCAX */
162
- tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
163
- }
164
- tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
165
- }
166
- write_vec_element(s, tcg_res[0], rd, 0, MO_64);
167
- write_vec_element(s, tcg_res[1], rd, 1, MO_64);
168
- } else {
169
- TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
170
-
171
- tcg_op1 = tcg_temp_new_i32();
172
- tcg_op2 = tcg_temp_new_i32();
173
- tcg_op3 = tcg_temp_new_i32();
174
- tcg_res = tcg_temp_new_i32();
175
- tcg_zero = tcg_constant_i32(0);
176
-
177
- read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
178
- read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
179
- read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
180
-
181
- tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
182
- tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
183
- tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
184
- tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
185
-
186
- write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
187
- write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
188
- write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
189
- write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
190
- }
191
-}
192
-
193
/* Crypto XAR
194
* 31 21 20 16 15 10 9 5 4 0
195
* +-----------------------+------+--------+------+------+
196
@@ -XXX,XX +XXX,XX @@ static const AArch64DecodeTable data_proc_simd[] = {
197
{ 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
198
{ 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
199
{ 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
200
- { 0xce000000, 0xff808000, disas_crypto_four_reg },
201
{ 0xce800000, 0xffe00000, disas_crypto_xar },
202
{ 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
203
{ 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
150
--
204
--
151
2.20.1
205
2.34.1
152
153
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20240524232121.284515-16-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/tcg/a64.decode | 10 ++++++++
9
target/arm/tcg/translate-a64.c | 43 ++++++++++------------------------
10
2 files changed, 22 insertions(+), 31 deletions(-)
11
12
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/tcg/a64.decode
15
+++ b/target/arm/tcg/a64.decode
16
@@ -XXX,XX +XXX,XX @@ SM4E 1100 1110 110 00000 100001 ..... ..... @r2r_q1e0
17
EOR3 1100 1110 000 ..... 0 ..... ..... ..... @rrrr_q1e3
18
BCAX 1100 1110 001 ..... 0 ..... ..... ..... @rrrr_q1e3
19
SM3SS1 1100 1110 010 ..... 0 ..... ..... ..... @rrrr_q1e3
20
+
21
+### Cryptographic three-register, imm2
22
+
23
+&crypto3i rd rn rm imm
24
+@crypto3i ........ ... rm:5 .. imm:2 .. rn:5 rd:5 &crypto3i
25
+
26
+SM3TT1A 11001110 010 ..... 10 .. 00 ..... ..... @crypto3i
27
+SM3TT1B 11001110 010 ..... 10 .. 01 ..... ..... @crypto3i
28
+SM3TT2A 11001110 010 ..... 10 .. 10 ..... ..... @crypto3i
29
+SM3TT2B 11001110 010 ..... 10 .. 11 ..... ..... @crypto3i
30
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/tcg/translate-a64.c
33
+++ b/target/arm/tcg/translate-a64.c
34
@@ -XXX,XX +XXX,XX @@ static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a)
35
return true;
36
}
37
38
+static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn)
39
+{
40
+ if (fp_access_check(s)) {
41
+ gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn);
42
+ }
43
+ return true;
44
+}
45
+TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a)
46
+TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b)
47
+TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a)
48
+TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b)
49
+
50
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
51
* Note that it is the caller's responsibility to ensure that the
52
* shift amount is in range (ie 0..31 or 0..63) and provide the ARM
53
@@ -XXX,XX +XXX,XX @@ static void disas_crypto_xar(DisasContext *s, uint32_t insn)
54
vec_full_reg_size(s));
55
}
56
57
-/* Crypto three-reg imm2
58
- * 31 21 20 16 15 14 13 12 11 10 9 5 4 0
59
- * +-----------------------+------+-----+------+--------+------+------+
60
- * | 1 1 0 0 1 1 1 0 0 1 0 | Rm | 1 0 | imm2 | opcode | Rn | Rd |
61
- * +-----------------------+------+-----+------+--------+------+------+
62
- */
63
-static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
64
-{
65
- static gen_helper_gvec_3 * const fns[4] = {
66
- gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b,
67
- gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b,
68
- };
69
- int opcode = extract32(insn, 10, 2);
70
- int imm2 = extract32(insn, 12, 2);
71
- int rm = extract32(insn, 16, 5);
72
- int rn = extract32(insn, 5, 5);
73
- int rd = extract32(insn, 0, 5);
74
-
75
- if (!dc_isar_feature(aa64_sm3, s)) {
76
- unallocated_encoding(s);
77
- return;
78
- }
79
-
80
- if (!fp_access_check(s)) {
81
- return;
82
- }
83
-
84
- gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]);
85
-}
86
-
87
/* C3.6 Data processing - SIMD, inc Crypto
88
*
89
* As the decode gets a little complex we are using a table based
90
@@ -XXX,XX +XXX,XX @@ static const AArch64DecodeTable data_proc_simd[] = {
91
{ 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
92
{ 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
93
{ 0xce800000, 0xffe00000, disas_crypto_xar },
94
- { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
95
{ 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
96
{ 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
97
{ 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
98
--
99
2.34.1
diff view generated by jsdifflib
1
Implement the MVE shift-vector-left-by-immediate insns VSHL, VQSHL
1
From: Richard Henderson <richard.henderson@linaro.org>
2
and VQSHLU.
3
2
4
The size-and-immediate encoding here is the same as Neon, and we
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
handle it the same way neon-dp.decode does.
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20240524232121.284515-17-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/tcg/a64.decode | 4 ++++
9
target/arm/tcg/translate-a64.c | 43 +++++++++++-----------------------
10
2 files changed, 18 insertions(+), 29 deletions(-)
6
11
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20210628135835.6690-8-peter.maydell@linaro.org
10
---
11
target/arm/helper-mve.h | 16 +++++++++++
12
target/arm/mve.decode | 23 +++++++++++++++
13
target/arm/mve_helper.c | 57 ++++++++++++++++++++++++++++++++++++++
14
target/arm/translate-mve.c | 51 ++++++++++++++++++++++++++++++++++
15
4 files changed, 147 insertions(+)
16
17
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
18
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper-mve.h
14
--- a/target/arm/tcg/a64.decode
20
+++ b/target/arm/helper-mve.h
15
+++ b/target/arm/tcg/a64.decode
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(mve_vaddvuw, TCG_CALL_NO_WG, i32, env, ptr, i32)
16
@@ -XXX,XX +XXX,XX @@ SM3TT1A 11001110 010 ..... 10 .. 00 ..... ..... @crypto3i
22
DEF_HELPER_FLAGS_3(mve_vmovi, TCG_CALL_NO_WG, void, env, ptr, i64)
17
SM3TT1B 11001110 010 ..... 10 .. 01 ..... ..... @crypto3i
23
DEF_HELPER_FLAGS_3(mve_vandi, TCG_CALL_NO_WG, void, env, ptr, i64)
18
SM3TT2A 11001110 010 ..... 10 .. 10 ..... ..... @crypto3i
24
DEF_HELPER_FLAGS_3(mve_vorri, TCG_CALL_NO_WG, void, env, ptr, i64)
19
SM3TT2B 11001110 010 ..... 10 .. 11 ..... ..... @crypto3i
25
+
20
+
26
+DEF_HELPER_FLAGS_4(mve_vshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
21
+### Cryptographic XAR
27
+DEF_HELPER_FLAGS_4(mve_vshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
28
+DEF_HELPER_FLAGS_4(mve_vshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
29
+
22
+
30
+DEF_HELPER_FLAGS_4(mve_vqshli_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
23
+XAR 1100 1110 100 rm:5 imm:6 rn:5 rd:5
31
+DEF_HELPER_FLAGS_4(mve_vqshli_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
24
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
32
+DEF_HELPER_FLAGS_4(mve_vqshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
33
+
34
+DEF_HELPER_FLAGS_4(mve_vqshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
35
+DEF_HELPER_FLAGS_4(mve_vqshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_4(mve_vqshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
37
+
38
+DEF_HELPER_FLAGS_4(mve_vqshlui_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
39
+DEF_HELPER_FLAGS_4(mve_vqshlui_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_4(mve_vqshlui_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
41
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
42
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
43
--- a/target/arm/mve.decode
26
--- a/target/arm/tcg/translate-a64.c
44
+++ b/target/arm/mve.decode
27
+++ b/target/arm/tcg/translate-a64.c
45
@@ -XXX,XX +XXX,XX @@
28
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b)
46
&2op qd qm qn size
29
TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a)
47
&2scalar qd qn rm size
30
TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b)
48
&1imm qd imm cmode op
31
49
+&2shift qd qm shift size
32
+static bool trans_XAR(DisasContext *s, arg_XAR *a)
50
51
@vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd u=0
52
# Note that both Rn and Qd are 3 bits only (no D bit)
53
@@ -XXX,XX +XXX,XX @@
54
@2scalar .... .... .. size:2 .... .... .... .... rm:4 &2scalar qd=%qd qn=%qn
55
@2scalar_nosz .... .... .... .... .... .... .... rm:4 &2scalar qd=%qd qn=%qn
56
57
+@2_shl_b .... .... .. 001 shift:3 .... .... .... .... &2shift qd=%qd qm=%qm size=0
58
+@2_shl_h .... .... .. 01 shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1
59
+@2_shl_w .... .... .. 1 shift:5 .... .... .... .... &2shift qd=%qd qm=%qm size=2
60
+
61
# Vector loads and stores
62
63
# Widening loads and narrowing stores:
64
@@ -XXX,XX +XXX,XX @@ VPST 1111 1110 0 . 11 000 1 ... 0 1111 0100 1101 mask=%mask_22_13
65
# So we have a single decode line and check the cmode/op in the
66
# trans function.
67
Vimm_1r 111 . 1111 1 . 00 0 ... ... 0 .... 0 1 . 1 .... @1imm
68
+
69
+# Shifts by immediate
70
+
71
+VSHLI 111 0 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_b
72
+VSHLI 111 0 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_h
73
+VSHLI 111 0 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_w
74
+
75
+VQSHLI_S 111 0 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_b
76
+VQSHLI_S 111 0 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_h
77
+VQSHLI_S 111 0 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_w
78
+
79
+VQSHLI_U 111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_b
80
+VQSHLI_U 111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_h
81
+VQSHLI_U 111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_w
82
+
83
+VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_b
84
+VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_h
85
+VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_w
86
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
87
index XXXXXXX..XXXXXXX 100644
88
--- a/target/arm/mve_helper.c
89
+++ b/target/arm/mve_helper.c
90
@@ -XXX,XX +XXX,XX @@ DO_2OP_SAT(vqsubsw, 4, int32_t, DO_SQSUB_W)
91
WRAP_QRSHL_HELPER(do_sqrshl_bhs, N, M, true, satp)
92
#define DO_UQRSHL_OP(N, M, satp) \
93
WRAP_QRSHL_HELPER(do_uqrshl_bhs, N, M, true, satp)
94
+#define DO_SUQSHL_OP(N, M, satp) \
95
+ WRAP_QRSHL_HELPER(do_suqrshl_bhs, N, M, false, satp)
96
97
DO_2OP_SAT_S(vqshls, DO_SQSHL_OP)
98
DO_2OP_SAT_U(vqshlu, DO_UQSHL_OP)
99
@@ -XXX,XX +XXX,XX @@ DO_VADDV(vaddvsw, 4, uint32_t)
100
DO_VADDV(vaddvub, 1, uint8_t)
101
DO_VADDV(vaddvuh, 2, uint16_t)
102
DO_VADDV(vaddvuw, 4, uint32_t)
103
+
104
+/* Shifts by immediate */
105
+#define DO_2SHIFT(OP, ESIZE, TYPE, FN) \
106
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
107
+ void *vm, uint32_t shift) \
108
+ { \
109
+ TYPE *d = vd, *m = vm; \
110
+ uint16_t mask = mve_element_mask(env); \
111
+ unsigned e; \
112
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
113
+ mergemask(&d[H##ESIZE(e)], \
114
+ FN(m[H##ESIZE(e)], shift), mask); \
115
+ } \
116
+ mve_advance_vpt(env); \
117
+ }
118
+
119
+#define DO_2SHIFT_SAT(OP, ESIZE, TYPE, FN) \
120
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
121
+ void *vm, uint32_t shift) \
122
+ { \
123
+ TYPE *d = vd, *m = vm; \
124
+ uint16_t mask = mve_element_mask(env); \
125
+ unsigned e; \
126
+ bool qc = false; \
127
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
128
+ bool sat = false; \
129
+ mergemask(&d[H##ESIZE(e)], \
130
+ FN(m[H##ESIZE(e)], shift, &sat), mask); \
131
+ qc |= sat & mask & 1; \
132
+ } \
133
+ if (qc) { \
134
+ env->vfp.qc[0] = qc; \
135
+ } \
136
+ mve_advance_vpt(env); \
137
+ }
138
+
139
+/* provide unsigned 2-op shift helpers for all sizes */
140
+#define DO_2SHIFT_U(OP, FN) \
141
+ DO_2SHIFT(OP##b, 1, uint8_t, FN) \
142
+ DO_2SHIFT(OP##h, 2, uint16_t, FN) \
143
+ DO_2SHIFT(OP##w, 4, uint32_t, FN)
144
+
145
+#define DO_2SHIFT_SAT_U(OP, FN) \
146
+ DO_2SHIFT_SAT(OP##b, 1, uint8_t, FN) \
147
+ DO_2SHIFT_SAT(OP##h, 2, uint16_t, FN) \
148
+ DO_2SHIFT_SAT(OP##w, 4, uint32_t, FN)
149
+#define DO_2SHIFT_SAT_S(OP, FN) \
150
+ DO_2SHIFT_SAT(OP##b, 1, int8_t, FN) \
151
+ DO_2SHIFT_SAT(OP##h, 2, int16_t, FN) \
152
+ DO_2SHIFT_SAT(OP##w, 4, int32_t, FN)
153
+
154
+DO_2SHIFT_U(vshli_u, DO_VSHLU)
155
+DO_2SHIFT_SAT_U(vqshli_u, DO_UQSHL_OP)
156
+DO_2SHIFT_SAT_S(vqshli_s, DO_SQSHL_OP)
157
+DO_2SHIFT_SAT_S(vqshlui_s, DO_SUQSHL_OP)
158
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
159
index XXXXXXX..XXXXXXX 100644
160
--- a/target/arm/translate-mve.c
161
+++ b/target/arm/translate-mve.c
162
@@ -XXX,XX +XXX,XX @@ typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
163
typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
164
typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
165
typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
166
+typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
167
typedef void MVEGenDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
168
typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
169
typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
170
@@ -XXX,XX +XXX,XX @@ static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
171
}
172
return do_1imm(s, a, fn);
173
}
174
+
175
+static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
176
+ bool negateshift)
177
+{
33
+{
178
+ TCGv_ptr qd, qm;
34
+ if (!dc_isar_feature(aa64_sha3, s)) {
179
+ int shift = a->shift;
180
+
181
+ if (!dc_isar_feature(aa32_mve, s) ||
182
+ !mve_check_qreg_bank(s, a->qd | a->qm) ||
183
+ !fn) {
184
+ return false;
35
+ return false;
185
+ }
36
+ }
186
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
37
+ if (fp_access_check(s)) {
187
+ return true;
38
+ gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd),
39
+ vec_full_reg_offset(s, a->rn),
40
+ vec_full_reg_offset(s, a->rm), a->imm, 16,
41
+ vec_full_reg_size(s));
188
+ }
42
+ }
189
+
190
+ /*
191
+ * When we handle a right shift insn using a left-shift helper
192
+ * which permits a negative shift count to indicate a right-shift,
193
+ * we must negate the shift count.
194
+ */
195
+ if (negateshift) {
196
+ shift = -shift;
197
+ }
198
+
199
+ qd = mve_qreg_ptr(a->qd);
200
+ qm = mve_qreg_ptr(a->qm);
201
+ fn(cpu_env, qd, qm, tcg_constant_i32(shift));
202
+ tcg_temp_free_ptr(qd);
203
+ tcg_temp_free_ptr(qm);
204
+ mve_update_eci(s);
205
+ return true;
43
+ return true;
206
+}
44
+}
207
+
45
+
208
+#define DO_2SHIFT(INSN, FN, NEGATESHIFT) \
46
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
209
+ static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
47
* Note that it is the caller's responsibility to ensure that the
210
+ { \
48
* shift amount is in range (ie 0..31 or 0..63) and provide the ARM
211
+ static MVEGenTwoOpShiftFn * const fns[] = { \
49
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
212
+ gen_helper_mve_##FN##b, \
50
}
213
+ gen_helper_mve_##FN##h, \
51
}
214
+ gen_helper_mve_##FN##w, \
52
215
+ NULL, \
53
-/* Crypto XAR
216
+ }; \
54
- * 31 21 20 16 15 10 9 5 4 0
217
+ return do_2shift(s, a, fns[a->size], NEGATESHIFT); \
55
- * +-----------------------+------+--------+------+------+
218
+ }
56
- * | 1 1 0 0 1 1 1 0 1 0 0 | Rm | imm6 | Rn | Rd |
219
+
57
- * +-----------------------+------+--------+------+------+
220
+DO_2SHIFT(VSHLI, vshli_u, false)
58
- */
221
+DO_2SHIFT(VQSHLI_S, vqshli_s, false)
59
-static void disas_crypto_xar(DisasContext *s, uint32_t insn)
222
+DO_2SHIFT(VQSHLI_U, vqshli_u, false)
60
-{
223
+DO_2SHIFT(VQSHLUI, vqshlui_s, false)
61
- int rm = extract32(insn, 16, 5);
62
- int imm6 = extract32(insn, 10, 6);
63
- int rn = extract32(insn, 5, 5);
64
- int rd = extract32(insn, 0, 5);
65
-
66
- if (!dc_isar_feature(aa64_sha3, s)) {
67
- unallocated_encoding(s);
68
- return;
69
- }
70
-
71
- if (!fp_access_check(s)) {
72
- return;
73
- }
74
-
75
- gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd),
76
- vec_full_reg_offset(s, rn),
77
- vec_full_reg_offset(s, rm), imm6, 16,
78
- vec_full_reg_size(s));
79
-}
80
-
81
/* C3.6 Data processing - SIMD, inc Crypto
82
*
83
* As the decode gets a little complex we are using a table based
84
@@ -XXX,XX +XXX,XX @@ static const AArch64DecodeTable data_proc_simd[] = {
85
{ 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
86
{ 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
87
{ 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
88
- { 0xce800000, 0xffe00000, disas_crypto_xar },
89
{ 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
90
{ 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
91
{ 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
224
--
92
--
225
2.20.1
93
2.34.1
226
227
diff view generated by jsdifflib
1
Implement the MVE logical-immediate insns (VMOV, VMVN,
1
From: Richard Henderson <richard.henderson@linaro.org>
2
VORR and VBIC). These have essentially the same encoding
3
as their Neon equivalents, and we implement the decode
4
in the same way.
5
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20240524232121.284515-18-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210628135835.6690-7-peter.maydell@linaro.org
9
---
7
---
10
target/arm/helper-mve.h | 4 +++
8
target/arm/tcg/a64.decode | 13 +
11
target/arm/mve.decode | 17 +++++++++++++
9
target/arm/tcg/translate-a64.c | 426 +++++++++++----------------------
12
target/arm/mve_helper.c | 24 ++++++++++++++++++
10
2 files changed, 152 insertions(+), 287 deletions(-)
13
target/arm/translate-mve.c | 50 ++++++++++++++++++++++++++++++++++++++
14
4 files changed, 95 insertions(+)
15
11
16
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
12
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
17
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/helper-mve.h
14
--- a/target/arm/tcg/a64.decode
19
+++ b/target/arm/helper-mve.h
15
+++ b/target/arm/tcg/a64.decode
20
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(mve_vaddvsh, TCG_CALL_NO_WG, i32, env, ptr, i32)
16
@@ -XXX,XX +XXX,XX @@ SM3TT2B 11001110 010 ..... 10 .. 11 ..... ..... @crypto3i
21
DEF_HELPER_FLAGS_3(mve_vaddvuh, TCG_CALL_NO_WG, i32, env, ptr, i32)
17
### Cryptographic XAR
22
DEF_HELPER_FLAGS_3(mve_vaddvsw, TCG_CALL_NO_WG, i32, env, ptr, i32)
18
23
DEF_HELPER_FLAGS_3(mve_vaddvuw, TCG_CALL_NO_WG, i32, env, ptr, i32)
19
XAR 1100 1110 100 rm:5 imm:6 rn:5 rd:5
24
+
20
+
25
+DEF_HELPER_FLAGS_3(mve_vmovi, TCG_CALL_NO_WG, void, env, ptr, i64)
21
+### Advanced SIMD scalar copy
26
+DEF_HELPER_FLAGS_3(mve_vandi, TCG_CALL_NO_WG, void, env, ptr, i64)
22
+
27
+DEF_HELPER_FLAGS_3(mve_vorri, TCG_CALL_NO_WG, void, env, ptr, i64)
23
+DUP_element_s 0101 1110 000 imm:5 0 0000 1 rn:5 rd:5
28
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
24
+
25
+### Advanced SIMD copy
26
+
27
+DUP_element_v 0 q:1 00 1110 000 imm:5 0 0000 1 rn:5 rd:5
28
+DUP_general 0 q:1 00 1110 000 imm:5 0 0001 1 rn:5 rd:5
29
+INS_general 0 1 00 1110 000 imm:5 0 0011 1 rn:5 rd:5
30
+SMOV 0 q:1 00 1110 000 imm:5 0 0101 1 rn:5 rd:5
31
+UMOV 0 q:1 00 1110 000 imm:5 0 0111 1 rn:5 rd:5
32
+INS_element 0 1 10 1110 000 di:5 0 si:4 1 rn:5 rd:5
33
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
29
index XXXXXXX..XXXXXXX 100644
34
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/mve.decode
35
--- a/target/arm/tcg/translate-a64.c
31
+++ b/target/arm/mve.decode
36
+++ b/target/arm/tcg/translate-a64.c
32
@@ -XXX,XX +XXX,XX @@
37
@@ -XXX,XX +XXX,XX @@ static bool trans_XAR(DisasContext *s, arg_XAR *a)
33
# VQDMULL has size in bit 28: 0 for 16 bit, 1 for 32 bit
34
%size_28 28:1 !function=plus_1
35
36
+# 1imm format immediate
37
+%imm_28_16_0 28:1 16:3 0:4
38
+
39
&vldr_vstr rn qd imm p a w size l u
40
&1op qd qm size
41
&2op qd qm qn size
42
&2scalar qd qn rm size
43
+&1imm qd imm cmode op
44
45
@vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd u=0
46
# Note that both Rn and Qd are 3 bits only (no D bit)
47
@@ -XXX,XX +XXX,XX @@
48
@2op_nosz .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn size=0
49
@2op_sz28 .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn \
50
size=%size_28
51
+@1imm .... .... .... .... .... cmode:4 .. op:1 . .... &1imm qd=%qd imm=%imm_28_16_0
52
53
# The _rev suffix indicates that Vn and Vm are reversed. This is
54
# the case for shifts. In the Arm ARM these insns are documented
55
@@ -XXX,XX +XXX,XX @@ VADDV 111 u:1 1110 1111 size:2 01 ... 0 1111 0 0 a:1 0 qm:3 0 rda=%rd
56
# Predicate operations
57
%mask_22_13 22:1 13:3
58
VPST 1111 1110 0 . 11 000 1 ... 0 1111 0100 1101 mask=%mask_22_13
59
+
60
+# Logical immediate operations (1 reg and modified-immediate)
61
+
62
+# The cmode/op bits here decode VORR/VBIC/VMOV/VMVN, but
63
+# not in a way we can conveniently represent in decodetree without
64
+# a lot of repetition:
65
+# VORR: op=0, (cmode & 1) && cmode < 12
66
+# VBIC: op=1, (cmode & 1) && cmode < 12
67
+# VMOV: everything else
68
+# So we have a single decode line and check the cmode/op in the
69
+# trans function.
70
+Vimm_1r 111 . 1111 1 . 00 0 ... ... 0 .... 0 1 . 1 .... @1imm
71
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/target/arm/mve_helper.c
74
+++ b/target/arm/mve_helper.c
75
@@ -XXX,XX +XXX,XX @@ DO_1OP(vnegw, 4, int32_t, DO_NEG)
76
DO_1OP(vfnegh, 8, uint64_t, DO_FNEGH)
77
DO_1OP(vfnegs, 8, uint64_t, DO_FNEGS)
78
79
+/*
80
+ * 1 operand immediates: Vda is destination and possibly also one source.
81
+ * All these insns work at 64-bit widths.
82
+ */
83
+#define DO_1OP_IMM(OP, FN) \
84
+ void HELPER(mve_##OP)(CPUARMState *env, void *vda, uint64_t imm) \
85
+ { \
86
+ uint64_t *da = vda; \
87
+ uint16_t mask = mve_element_mask(env); \
88
+ unsigned e; \
89
+ for (e = 0; e < 16 / 8; e++, mask >>= 8) { \
90
+ mergemask(&da[H8(e)], FN(da[H8(e)], imm), mask); \
91
+ } \
92
+ mve_advance_vpt(env); \
93
+ }
94
+
95
+#define DO_MOVI(N, I) (I)
96
+#define DO_ANDI(N, I) ((N) & (I))
97
+#define DO_ORRI(N, I) ((N) | (I))
98
+
99
+DO_1OP_IMM(vmovi, DO_MOVI)
100
+DO_1OP_IMM(vandi, DO_ANDI)
101
+DO_1OP_IMM(vorri, DO_ORRI)
102
+
103
#define DO_2OP(OP, ESIZE, TYPE, FN) \
104
void HELPER(glue(mve_, OP))(CPUARMState *env, \
105
void *vd, void *vn, void *vm) \
106
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
107
index XXXXXXX..XXXXXXX 100644
108
--- a/target/arm/translate-mve.c
109
+++ b/target/arm/translate-mve.c
110
@@ -XXX,XX +XXX,XX @@ typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
111
typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
112
typedef void MVEGenDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
113
typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
114
+typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
115
116
/* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
117
static inline long mve_qreg_offset(unsigned reg)
118
@@ -XXX,XX +XXX,XX @@ static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
119
mve_update_eci(s);
120
return true;
38
return true;
121
}
39
}
122
+
40
123
+static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn)
41
+/*
124
+{
42
+ * Advanced SIMD copy
125
+ TCGv_ptr qd;
43
+ */
126
+ uint64_t imm;
44
+
127
+
45
+static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx)
128
+ if (!dc_isar_feature(aa32_mve, s) ||
46
+{
129
+ !mve_check_qreg_bank(s, a->qd) ||
47
+ unsigned esz = ctz32(imm);
130
+ !fn) {
48
+ if (esz <= MO_64) {
131
+ return false;
49
+ *pesz = esz;
132
+ }
50
+ *pidx = imm >> (esz + 1);
133
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
134
+ return true;
51
+ return true;
135
+ }
52
+ }
136
+
53
+ return false;
137
+ imm = asimd_imm_const(a->imm, a->cmode, a->op);
54
+}
138
+
55
+
139
+ qd = mve_qreg_ptr(a->qd);
56
+static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a)
140
+ fn(cpu_env, qd, tcg_constant_i64(imm));
57
+{
141
+ tcg_temp_free_ptr(qd);
58
+ MemOp esz;
142
+ mve_update_eci(s);
59
+ unsigned idx;
143
+ return true;
60
+
144
+}
61
+ if (!decode_esz_idx(a->imm, &esz, &idx)) {
145
+
62
+ return false;
146
+static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
63
+ }
147
+{
64
+ if (fp_access_check(s)) {
148
+ /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
65
+ /*
149
+ MVEGenOneOpImmFn *fn;
66
+ * This instruction just extracts the specified element and
150
+
67
+ * zero-extends it into the bottom of the destination register.
151
+ if ((a->cmode & 1) && a->cmode < 12) {
68
+ */
152
+ if (a->op) {
69
+ TCGv_i64 tmp = tcg_temp_new_i64();
153
+ /*
70
+ read_vec_element(s, tmp, a->rn, idx, esz);
154
+ * For op=1, the immediate will be inverted by asimd_imm_const(),
71
+ write_fp_dreg(s, a->rd, tmp);
155
+ * so the VBIC becomes a logical AND operation.
72
+ }
156
+ */
73
+ return true;
157
+ fn = gen_helper_mve_vandi;
74
+}
158
+ } else {
75
+
159
+ fn = gen_helper_mve_vorri;
76
+static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a)
77
+{
78
+ MemOp esz;
79
+ unsigned idx;
80
+
81
+ if (!decode_esz_idx(a->imm, &esz, &idx)) {
82
+ return false;
83
+ }
84
+ if (esz == MO_64 && !a->q) {
85
+ return false;
86
+ }
87
+ if (fp_access_check(s)) {
88
+ tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd),
89
+ vec_reg_offset(s, a->rn, idx, esz),
90
+ a->q ? 16 : 8, vec_full_reg_size(s));
91
+ }
92
+ return true;
93
+}
94
+
95
+static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a)
96
+{
97
+ MemOp esz;
98
+ unsigned idx;
99
+
100
+ if (!decode_esz_idx(a->imm, &esz, &idx)) {
101
+ return false;
102
+ }
103
+ if (esz == MO_64 && !a->q) {
104
+ return false;
105
+ }
106
+ if (fp_access_check(s)) {
107
+ tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
108
+ a->q ? 16 : 8, vec_full_reg_size(s),
109
+ cpu_reg(s, a->rn));
110
+ }
111
+ return true;
112
+}
113
+
114
+static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed)
115
+{
116
+ MemOp esz;
117
+ unsigned idx;
118
+
119
+ if (!decode_esz_idx(a->imm, &esz, &idx)) {
120
+ return false;
121
+ }
122
+ if (is_signed) {
123
+ if (esz == MO_64 || (esz == MO_32 && !a->q)) {
124
+ return false;
160
+ }
125
+ }
161
+ } else {
126
+ } else {
162
+ /* There is one unallocated cmode/op combination in this space */
127
+ if (esz == MO_64 ? !a->q : a->q) {
163
+ if (a->cmode == 15 && a->op == 1) {
164
+ return false;
128
+ return false;
165
+ }
129
+ }
166
+ /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
130
+ }
167
+ fn = gen_helper_mve_vmovi;
131
+ if (fp_access_check(s)) {
168
+ }
132
+ TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
169
+ return do_1imm(s, a, fn);
133
+ read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed);
170
+}
134
+ if (is_signed && !a->q) {
135
+ tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
136
+ }
137
+ }
138
+ return true;
139
+}
140
+
141
+TRANS(SMOV, do_smov_umov, a, MO_SIGN)
142
+TRANS(UMOV, do_smov_umov, a, 0)
143
+
144
+static bool trans_INS_general(DisasContext *s, arg_INS_general *a)
145
+{
146
+ MemOp esz;
147
+ unsigned idx;
148
+
149
+ if (!decode_esz_idx(a->imm, &esz, &idx)) {
150
+ return false;
151
+ }
152
+ if (fp_access_check(s)) {
153
+ write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz);
154
+ clear_vec_high(s, true, a->rd);
155
+ }
156
+ return true;
157
+}
158
+
159
+static bool trans_INS_element(DisasContext *s, arg_INS_element *a)
160
+{
161
+ MemOp esz;
162
+ unsigned didx, sidx;
163
+
164
+ if (!decode_esz_idx(a->di, &esz, &didx)) {
165
+ return false;
166
+ }
167
+ sidx = a->si >> esz;
168
+ if (fp_access_check(s)) {
169
+ TCGv_i64 tmp = tcg_temp_new_i64();
170
+
171
+ read_vec_element(s, tmp, a->rn, sidx, esz);
172
+ write_vec_element(s, tmp, a->rd, didx, esz);
173
+
174
+ /* INS is considered a 128-bit write for SVE. */
175
+ clear_vec_high(s, true, a->rd);
176
+ }
177
+ return true;
178
+}
179
+
180
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
181
* Note that it is the caller's responsibility to ensure that the
182
* shift amount is in range (ie 0..31 or 0..63) and provide the ARM
183
@@ -XXX,XX +XXX,XX @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
184
write_fp_dreg(s, rd, tcg_res);
185
}
186
187
-/* DUP (Element, Vector)
188
- *
189
- * 31 30 29 21 20 16 15 10 9 5 4 0
190
- * +---+---+-------------------+--------+-------------+------+------+
191
- * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
192
- * +---+---+-------------------+--------+-------------+------+------+
193
- *
194
- * size: encoded in imm5 (see ARM ARM LowestSetBit())
195
- */
196
-static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
197
- int imm5)
198
-{
199
- int size = ctz32(imm5);
200
- int index;
201
-
202
- if (size > 3 || (size == 3 && !is_q)) {
203
- unallocated_encoding(s);
204
- return;
205
- }
206
-
207
- if (!fp_access_check(s)) {
208
- return;
209
- }
210
-
211
- index = imm5 >> (size + 1);
212
- tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
213
- vec_reg_offset(s, rn, index, size),
214
- is_q ? 16 : 8, vec_full_reg_size(s));
215
-}
216
-
217
-/* DUP (element, scalar)
218
- * 31 21 20 16 15 10 9 5 4 0
219
- * +-----------------------+--------+-------------+------+------+
220
- * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
221
- * +-----------------------+--------+-------------+------+------+
222
- */
223
-static void handle_simd_dupes(DisasContext *s, int rd, int rn,
224
- int imm5)
225
-{
226
- int size = ctz32(imm5);
227
- int index;
228
- TCGv_i64 tmp;
229
-
230
- if (size > 3) {
231
- unallocated_encoding(s);
232
- return;
233
- }
234
-
235
- if (!fp_access_check(s)) {
236
- return;
237
- }
238
-
239
- index = imm5 >> (size + 1);
240
-
241
- /* This instruction just extracts the specified element and
242
- * zero-extends it into the bottom of the destination register.
243
- */
244
- tmp = tcg_temp_new_i64();
245
- read_vec_element(s, tmp, rn, index, size);
246
- write_fp_dreg(s, rd, tmp);
247
-}
248
-
249
-/* DUP (General)
250
- *
251
- * 31 30 29 21 20 16 15 10 9 5 4 0
252
- * +---+---+-------------------+--------+-------------+------+------+
253
- * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd |
254
- * +---+---+-------------------+--------+-------------+------+------+
255
- *
256
- * size: encoded in imm5 (see ARM ARM LowestSetBit())
257
- */
258
-static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
259
- int imm5)
260
-{
261
- int size = ctz32(imm5);
262
- uint32_t dofs, oprsz, maxsz;
263
-
264
- if (size > 3 || ((size == 3) && !is_q)) {
265
- unallocated_encoding(s);
266
- return;
267
- }
268
-
269
- if (!fp_access_check(s)) {
270
- return;
271
- }
272
-
273
- dofs = vec_full_reg_offset(s, rd);
274
- oprsz = is_q ? 16 : 8;
275
- maxsz = vec_full_reg_size(s);
276
-
277
- tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
278
-}
279
-
280
-/* INS (Element)
281
- *
282
- * 31 21 20 16 15 14 11 10 9 5 4 0
283
- * +-----------------------+--------+------------+---+------+------+
284
- * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
285
- * +-----------------------+--------+------------+---+------+------+
286
- *
287
- * size: encoded in imm5 (see ARM ARM LowestSetBit())
288
- * index: encoded in imm5<4:size+1>
289
- */
290
-static void handle_simd_inse(DisasContext *s, int rd, int rn,
291
- int imm4, int imm5)
292
-{
293
- int size = ctz32(imm5);
294
- int src_index, dst_index;
295
- TCGv_i64 tmp;
296
-
297
- if (size > 3) {
298
- unallocated_encoding(s);
299
- return;
300
- }
301
-
302
- if (!fp_access_check(s)) {
303
- return;
304
- }
305
-
306
- dst_index = extract32(imm5, 1+size, 5);
307
- src_index = extract32(imm4, size, 4);
308
-
309
- tmp = tcg_temp_new_i64();
310
-
311
- read_vec_element(s, tmp, rn, src_index, size);
312
- write_vec_element(s, tmp, rd, dst_index, size);
313
-
314
- /* INS is considered a 128-bit write for SVE. */
315
- clear_vec_high(s, true, rd);
316
-}
317
-
318
-
319
-/* INS (General)
320
- *
321
- * 31 21 20 16 15 10 9 5 4 0
322
- * +-----------------------+--------+-------------+------+------+
323
- * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd |
324
- * +-----------------------+--------+-------------+------+------+
325
- *
326
- * size: encoded in imm5 (see ARM ARM LowestSetBit())
327
- * index: encoded in imm5<4:size+1>
328
- */
329
-static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
330
-{
331
- int size = ctz32(imm5);
332
- int idx;
333
-
334
- if (size > 3) {
335
- unallocated_encoding(s);
336
- return;
337
- }
338
-
339
- if (!fp_access_check(s)) {
340
- return;
341
- }
342
-
343
- idx = extract32(imm5, 1 + size, 4 - size);
344
- write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
345
-
346
- /* INS is considered a 128-bit write for SVE. */
347
- clear_vec_high(s, true, rd);
348
-}
349
-
350
-/*
351
- * UMOV (General)
352
- * SMOV (General)
353
- *
354
- * 31 30 29 21 20 16 15 12 10 9 5 4 0
355
- * +---+---+-------------------+--------+-------------+------+------+
356
- * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd |
357
- * +---+---+-------------------+--------+-------------+------+------+
358
- *
359
- * U: unsigned when set
360
- * size: encoded in imm5 (see ARM ARM LowestSetBit())
361
- */
362
-static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
363
- int rn, int rd, int imm5)
364
-{
365
- int size = ctz32(imm5);
366
- int element;
367
- TCGv_i64 tcg_rd;
368
-
369
- /* Check for UnallocatedEncodings */
370
- if (is_signed) {
371
- if (size > 2 || (size == 2 && !is_q)) {
372
- unallocated_encoding(s);
373
- return;
374
- }
375
- } else {
376
- if (size > 3
377
- || (size < 3 && is_q)
378
- || (size == 3 && !is_q)) {
379
- unallocated_encoding(s);
380
- return;
381
- }
382
- }
383
-
384
- if (!fp_access_check(s)) {
385
- return;
386
- }
387
-
388
- element = extract32(imm5, 1+size, 4);
389
-
390
- tcg_rd = cpu_reg(s, rd);
391
- read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
392
- if (is_signed && !is_q) {
393
- tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
394
- }
395
-}
396
-
397
-/* AdvSIMD copy
398
- * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
399
- * +---+---+----+-----------------+------+---+------+---+------+------+
400
- * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
401
- * +---+---+----+-----------------+------+---+------+---+------+------+
402
- */
403
-static void disas_simd_copy(DisasContext *s, uint32_t insn)
404
-{
405
- int rd = extract32(insn, 0, 5);
406
- int rn = extract32(insn, 5, 5);
407
- int imm4 = extract32(insn, 11, 4);
408
- int op = extract32(insn, 29, 1);
409
- int is_q = extract32(insn, 30, 1);
410
- int imm5 = extract32(insn, 16, 5);
411
-
412
- if (op) {
413
- if (is_q) {
414
- /* INS (element) */
415
- handle_simd_inse(s, rd, rn, imm4, imm5);
416
- } else {
417
- unallocated_encoding(s);
418
- }
419
- } else {
420
- switch (imm4) {
421
- case 0:
422
- /* DUP (element - vector) */
423
- handle_simd_dupe(s, is_q, rd, rn, imm5);
424
- break;
425
- case 1:
426
- /* DUP (general) */
427
- handle_simd_dupg(s, is_q, rd, rn, imm5);
428
- break;
429
- case 3:
430
- if (is_q) {
431
- /* INS (general) */
432
- handle_simd_insg(s, rd, rn, imm5);
433
- } else {
434
- unallocated_encoding(s);
435
- }
436
- break;
437
- case 5:
438
- case 7:
439
- /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
440
- handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
441
- break;
442
- default:
443
- unallocated_encoding(s);
444
- break;
445
- }
446
- }
447
-}
448
-
449
/* AdvSIMD modified immediate
450
* 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0
451
* +---+---+----+---------------------+-----+-------+----+---+-------+------+
452
@@ -XXX,XX +XXX,XX @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
453
}
454
}
455
456
-/* AdvSIMD scalar copy
457
- * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
458
- * +-----+----+-----------------+------+---+------+---+------+------+
459
- * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
460
- * +-----+----+-----------------+------+---+------+---+------+------+
461
- */
462
-static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
463
-{
464
- int rd = extract32(insn, 0, 5);
465
- int rn = extract32(insn, 5, 5);
466
- int imm4 = extract32(insn, 11, 4);
467
- int imm5 = extract32(insn, 16, 5);
468
- int op = extract32(insn, 29, 1);
469
-
470
- if (op != 0 || imm4 != 0) {
471
- unallocated_encoding(s);
472
- return;
473
- }
474
-
475
- /* DUP (element, scalar) */
476
- handle_simd_dupes(s, rd, rn, imm5);
477
-}
478
-
479
/* AdvSIMD scalar pairwise
480
* 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
481
* +-----+---+-----------+------+-----------+--------+-----+------+------+
482
@@ -XXX,XX +XXX,XX @@ static const AArch64DecodeTable data_proc_simd[] = {
483
{ 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
484
{ 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
485
{ 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
486
- { 0x0e000400, 0x9fe08400, disas_simd_copy },
487
{ 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
488
/* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
489
{ 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
490
@@ -XXX,XX +XXX,XX @@ static const AArch64DecodeTable data_proc_simd[] = {
491
{ 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
492
{ 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
493
{ 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
494
- { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
495
{ 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
496
{ 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
497
{ 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
171
--
498
--
172
2.20.1
499
2.34.1
173
174
diff view generated by jsdifflib
1
The MVE extension to v8.1M includes some new shift instructions which
1
From: Richard Henderson <richard.henderson@linaro.org>
2
sit entirely within the non-coprocessor part of the encoding space
3
and which operate only on general-purpose registers. They take up
4
the space which was previously UNPREDICTABLE MOVS and ORRS encodings
5
with Rm == 13 or 15.
6
2
7
Implement the long shifts by immediate, which perform shifts on a
3
Convert all forms (scalar, vector, scalar indexed, vector indexed),
8
pair of general-purpose registers treated as a 64-bit quantity, with
4
which allows us to remove switch table entries elsewhere.
9
an immediate shift count between 1 and 32.
10
5
11
Awkwardly, because the MOVS and ORRS trans functions do not UNDEF for
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
the Rm==13,15 case, we need to explicitly emit code to UNDEF for the
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
cases where v8.1M now requires that. (Trying to change MOVS and ORRS
8
Message-id: 20240524232121.284515-19-richard.henderson@linaro.org
14
is too difficult, because the functions that generate the code are
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
shared between a dozen different kinds of arithmetic or logical
10
---
16
instruction for all A32, T16 and T32 encodings, and for some insns
11
target/arm/tcg/helper-a64.h | 8 ++
17
and some encodings Rm==13,15 are valid.)
12
target/arm/tcg/a64.decode | 45 +++++++
13
target/arm/tcg/translate-a64.c | 221 +++++++++++++++++++++++++++------
14
target/arm/tcg/vec_helper.c | 39 +++---
15
4 files changed, 259 insertions(+), 54 deletions(-)
18
16
19
We make the helper functions we need for UQSHLL and SQSHLL take
17
diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h
20
a 32-bit value which the helper casts to int8_t because we'll need
21
these helpers also for the shift-by-register insns, where the shift
22
count might be < 0 or > 32.
23
24
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
25
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
26
Message-id: 20210628135835.6690-16-peter.maydell@linaro.org
27
---
28
target/arm/helper-mve.h | 3 ++
29
target/arm/translate.h | 1 +
30
target/arm/t32.decode | 28 +++++++++++++
31
target/arm/mve_helper.c | 10 +++++
32
target/arm/translate.c | 90 +++++++++++++++++++++++++++++++++++++++++
33
5 files changed, 132 insertions(+)
34
35
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
36
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
37
--- a/target/arm/helper-mve.h
19
--- a/target/arm/tcg/helper-a64.h
38
+++ b/target/arm/helper-mve.h
20
+++ b/target/arm/tcg/helper-a64.h
39
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vqrshruntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(cpye, void, env, i32, i32, i32)
40
DEF_HELPER_FLAGS_4(mve_vqrshrunth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
22
DEF_HELPER_4(cpyfp, void, env, i32, i32, i32)
41
23
DEF_HELPER_4(cpyfm, void, env, i32, i32, i32)
42
DEF_HELPER_FLAGS_4(mve_vshlc, TCG_CALL_NO_WG, i32, env, ptr, i32, i32)
24
DEF_HELPER_4(cpyfe, void, env, i32, i32, i32)
43
+
25
+
44
+DEF_HELPER_FLAGS_3(mve_sqshll, TCG_CALL_NO_RWG, i64, env, i64, i32)
26
+DEF_HELPER_FLAGS_5(gvec_fmulx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
45
+DEF_HELPER_FLAGS_3(mve_uqshll, TCG_CALL_NO_RWG, i64, env, i64, i32)
27
+DEF_HELPER_FLAGS_5(gvec_fmulx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
46
diff --git a/target/arm/translate.h b/target/arm/translate.h
28
+DEF_HELPER_FLAGS_5(gvec_fmulx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
29
+
30
+DEF_HELPER_FLAGS_5(gvec_fmulx_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
31
+DEF_HELPER_FLAGS_5(gvec_fmulx_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
32
+DEF_HELPER_FLAGS_5(gvec_fmulx_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
33
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
47
index XXXXXXX..XXXXXXX 100644
34
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/translate.h
35
--- a/target/arm/tcg/a64.decode
49
+++ b/target/arm/translate.h
36
+++ b/target/arm/tcg/a64.decode
50
@@ -XXX,XX +XXX,XX @@ typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
37
@@ -XXX,XX +XXX,XX @@
51
typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
38
#
52
typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
39
53
typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
40
%rd 0:5
54
+typedef void WideShiftImmFn(TCGv_i64, TCGv_i64, int64_t shift);
41
+%esz_sd 22:1 !function=plus_2
55
42
+%hl 11:1 21:1
56
/**
43
+%hlm 11:1 20:2
57
* arm_tbflags_from_tb:
44
58
diff --git a/target/arm/t32.decode b/target/arm/t32.decode
45
&r rn
46
&ri rd imm
47
&rri_sf rd rn imm sf
48
&i imm
49
+&rrr_e rd rn rm esz
50
+&rrx_e rd rn rm idx esz
51
&qrr_e q rd rn esz
52
&qrrr_e q rd rn rm esz
53
+&qrrx_e q rd rn rm idx esz
54
&qrrrr_e q rd rn rm ra esz
55
56
+@rrr_h ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=1
57
+@rrr_sd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_sd
58
+
59
+@rrx_h ........ .. .. rm:4 .... . . rn:5 rd:5 &rrx_e esz=1 idx=%hlm
60
+@rrx_s ........ .. . rm:5 .... . . rn:5 rd:5 &rrx_e esz=2 idx=%hl
61
+@rrx_d ........ .. . rm:5 .... idx:1 . rn:5 rd:5 &rrx_e esz=3
62
+
63
@rr_q1e0 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=0
64
@r2r_q1e0 ........ ........ ...... rm:5 rd:5 &qrrr_e rn=%rd q=1 esz=0
65
@rrr_q1e0 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=0
66
@rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3
67
@rrrr_q1e3 ........ ... rm:5 . ra:5 rn:5 rd:5 &qrrrr_e q=1 esz=3
68
69
+@qrrr_h . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=1
70
+@qrrr_sd . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=%esz_sd
71
+
72
+@qrrx_h . q:1 .. .... .. .. rm:4 .... . . rn:5 rd:5 \
73
+ &qrrx_e esz=1 idx=%hlm
74
+@qrrx_s . q:1 .. .... .. . rm:5 .... . . rn:5 rd:5 \
75
+ &qrrx_e esz=2 idx=%hl
76
+@qrrx_d . q:1 .. .... .. . rm:5 .... idx:1 . rn:5 rd:5 \
77
+ &qrrx_e esz=3
78
+
79
### Data Processing - Immediate
80
81
# PC-rel addressing
82
@@ -XXX,XX +XXX,XX @@ INS_general 0 1 00 1110 000 imm:5 0 0011 1 rn:5 rd:5
83
SMOV 0 q:1 00 1110 000 imm:5 0 0101 1 rn:5 rd:5
84
UMOV 0 q:1 00 1110 000 imm:5 0 0111 1 rn:5 rd:5
85
INS_element 0 1 10 1110 000 di:5 0 si:4 1 rn:5 rd:5
86
+
87
+### Advanced SIMD scalar three same
88
+
89
+FMULX_s 0101 1110 010 ..... 00011 1 ..... ..... @rrr_h
90
+FMULX_s 0101 1110 0.1 ..... 11011 1 ..... ..... @rrr_sd
91
+
92
+### Advanced SIMD three same
93
+
94
+FMULX_v 0.00 1110 010 ..... 00011 1 ..... ..... @qrrr_h
95
+FMULX_v 0.00 1110 0.1 ..... 11011 1 ..... ..... @qrrr_sd
96
+
97
+### Advanced SIMD scalar x indexed element
98
+
99
+FMULX_si 0111 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
100
+FMULX_si 0111 1111 10 . ..... 1001 . 0 ..... ..... @rrx_s
101
+FMULX_si 0111 1111 11 0 ..... 1001 . 0 ..... ..... @rrx_d
102
+
103
+### Advanced SIMD vector x indexed element
104
+
105
+FMULX_vi 0.10 1111 00 .. .... 1001 . 0 ..... ..... @qrrx_h
106
+FMULX_vi 0.10 1111 10 . ..... 1001 . 0 ..... ..... @qrrx_s
107
+FMULX_vi 0.10 1111 11 0 ..... 1001 . 0 ..... ..... @qrrx_d
108
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
59
index XXXXXXX..XXXXXXX 100644
109
index XXXXXXX..XXXXXXX 100644
60
--- a/target/arm/t32.decode
110
--- a/target/arm/tcg/translate-a64.c
61
+++ b/target/arm/t32.decode
111
+++ b/target/arm/tcg/translate-a64.c
62
@@ -XXX,XX +XXX,XX @@
112
@@ -XXX,XX +XXX,XX @@ static bool trans_INS_element(DisasContext *s, arg_INS_element *a)
63
&mcr !extern cp opc1 crn crm opc2 rt
64
&mcrr !extern cp opc1 crm rt rt2
65
66
+&mve_shl_ri rdalo rdahi shim
67
+
68
+# rdahi: bits [3:1] from insn, bit 0 is 1
69
+# rdalo: bits [3:1] from insn, bit 0 is 0
70
+%rdahi_9 9:3 !function=times_2_plus_1
71
+%rdalo_17 17:3 !function=times_2
72
+
73
# Data-processing (register)
74
75
%imm5_12_6 12:3 6:2
76
@@ -XXX,XX +XXX,XX @@
77
@S_xrr_shi ....... .... . rn:4 .... .... .. shty:2 rm:4 \
78
&s_rrr_shi shim=%imm5_12_6 s=1 rd=0
79
80
+@mve_shl_ri ....... .... . ... . . ... ... . .. .. .... \
81
+ &mve_shl_ri shim=%imm5_12_6 rdalo=%rdalo_17 rdahi=%rdahi_9
82
+
83
{
84
TST_xrri 1110101 0000 1 .... 0 ... 1111 .... .... @S_xrr_shi
85
AND_rrri 1110101 0000 . .... 0 ... .... .... .... @s_rrr_shi
86
}
87
BIC_rrri 1110101 0001 . .... 0 ... .... .... .... @s_rrr_shi
88
{
89
+ # The v8.1M MVE shift insns overlap in encoding with MOVS/ORRS
90
+ # and are distinguished by having Rm==13 or 15. Those are UNPREDICTABLE
91
+ # cases for MOVS/ORRS. We decode the MVE cases first, ensuring that
92
+ # they explicitly call unallocated_encoding() for cases that must UNDEF
93
+ # (eg "using a new shift insn on a v8.1M CPU without MVE"), and letting
94
+ # the rest fall through (where ORR_rrri and MOV_rxri will end up
95
+ # handling them as r13 and r15 accesses with the same semantics as A32).
96
+ [
97
+ LSLL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 00 1111 @mve_shl_ri
98
+ LSRL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 01 1111 @mve_shl_ri
99
+ ASRL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 10 1111 @mve_shl_ri
100
+
101
+ UQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 00 1111 @mve_shl_ri
102
+ URSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 01 1111 @mve_shl_ri
103
+ SRSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 10 1111 @mve_shl_ri
104
+ SQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 11 1111 @mve_shl_ri
105
+ ]
106
+
107
MOV_rxri 1110101 0010 . 1111 0 ... .... .... .... @s_rxr_shi
108
ORR_rrri 1110101 0010 . .... 0 ... .... .... .... @s_rrr_shi
109
}
110
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
111
index XXXXXXX..XXXXXXX 100644
112
--- a/target/arm/mve_helper.c
113
+++ b/target/arm/mve_helper.c
114
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(mve_vshlc)(CPUARMState *env, void *vd, uint32_t rdm,
115
mve_advance_vpt(env);
116
return rdm;
117
}
118
+
119
+uint64_t HELPER(mve_sqshll)(CPUARMState *env, uint64_t n, uint32_t shift)
120
+{
121
+ return do_sqrshl_d(n, (int8_t)shift, false, &env->QF);
122
+}
123
+
124
+uint64_t HELPER(mve_uqshll)(CPUARMState *env, uint64_t n, uint32_t shift)
125
+{
126
+ return do_uqrshl_d(n, (int8_t)shift, false, &env->QF);
127
+}
128
diff --git a/target/arm/translate.c b/target/arm/translate.c
129
index XXXXXXX..XXXXXXX 100644
130
--- a/target/arm/translate.c
131
+++ b/target/arm/translate.c
132
@@ -XXX,XX +XXX,XX @@ static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
133
return true;
113
return true;
134
}
114
}
135
115
136
+/*
116
+/*
137
+ * v8.1M MVE wide-shifts
117
+ * Advanced SIMD three same
138
+ */
118
+ */
139
+static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
119
+
140
+ WideShiftImmFn *fn)
120
+typedef struct FPScalar {
121
+ void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
122
+ void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
123
+ void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
124
+} FPScalar;
125
+
126
+static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
141
+{
127
+{
142
+ TCGv_i64 rda;
128
+ switch (a->esz) {
143
+ TCGv_i32 rdalo, rdahi;
129
+ case MO_64:
144
+
130
+ if (fp_access_check(s)) {
145
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
131
+ TCGv_i64 t0 = read_fp_dreg(s, a->rn);
146
+ /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
132
+ TCGv_i64 t1 = read_fp_dreg(s, a->rm);
133
+ f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
134
+ write_fp_dreg(s, a->rd, t0);
135
+ }
136
+ break;
137
+ case MO_32:
138
+ if (fp_access_check(s)) {
139
+ TCGv_i32 t0 = read_fp_sreg(s, a->rn);
140
+ TCGv_i32 t1 = read_fp_sreg(s, a->rm);
141
+ f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
142
+ write_fp_sreg(s, a->rd, t0);
143
+ }
144
+ break;
145
+ case MO_16:
146
+ if (!dc_isar_feature(aa64_fp16, s)) {
147
+ return false;
148
+ }
149
+ if (fp_access_check(s)) {
150
+ TCGv_i32 t0 = read_fp_hreg(s, a->rn);
151
+ TCGv_i32 t1 = read_fp_hreg(s, a->rm);
152
+ f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
153
+ write_fp_sreg(s, a->rd, t0);
154
+ }
155
+ break;
156
+ default:
147
+ return false;
157
+ return false;
148
+ }
158
+ }
149
+ if (a->rdahi == 15) {
159
+ return true;
150
+ /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
160
+}
161
+
162
+static const FPScalar f_scalar_fmulx = {
163
+ gen_helper_advsimd_mulxh,
164
+ gen_helper_vfp_mulxs,
165
+ gen_helper_vfp_mulxd,
166
+};
167
+TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx)
168
+
169
+static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a,
170
+ gen_helper_gvec_3_ptr * const fns[3])
171
+{
172
+ MemOp esz = a->esz;
173
+
174
+ switch (esz) {
175
+ case MO_64:
176
+ if (!a->q) {
177
+ return false;
178
+ }
179
+ break;
180
+ case MO_32:
181
+ break;
182
+ case MO_16:
183
+ if (!dc_isar_feature(aa64_fp16, s)) {
184
+ return false;
185
+ }
186
+ break;
187
+ default:
151
+ return false;
188
+ return false;
152
+ }
189
+ }
153
+ if (!dc_isar_feature(aa32_mve, s) ||
190
+ if (fp_access_check(s)) {
154
+ !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
191
+ gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
155
+ a->rdahi == 13) {
192
+ esz == MO_16, 0, fns[esz - 1]);
156
+ /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
157
+ unallocated_encoding(s);
158
+ return true;
159
+ }
193
+ }
160
+
161
+ if (a->shim == 0) {
162
+ a->shim = 32;
163
+ }
164
+
165
+ rda = tcg_temp_new_i64();
166
+ rdalo = load_reg(s, a->rdalo);
167
+ rdahi = load_reg(s, a->rdahi);
168
+ tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
169
+
170
+ fn(rda, rda, a->shim);
171
+
172
+ tcg_gen_extrl_i64_i32(rdalo, rda);
173
+ tcg_gen_extrh_i64_i32(rdahi, rda);
174
+ store_reg(s, a->rdalo, rdalo);
175
+ store_reg(s, a->rdahi, rdahi);
176
+ tcg_temp_free_i64(rda);
177
+
178
+ return true;
194
+ return true;
179
+}
195
+}
180
+
196
+
181
+static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
197
+static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
198
+ gen_helper_gvec_fmulx_h,
199
+ gen_helper_gvec_fmulx_s,
200
+ gen_helper_gvec_fmulx_d,
201
+};
202
+TRANS(FMULX_v, do_fp3_vector, a, f_vector_fmulx)
203
+
204
+/*
205
+ * Advanced SIMD scalar/vector x indexed element
206
+ */
207
+
208
+static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
182
+{
209
+{
183
+ return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
210
+ switch (a->esz) {
211
+ case MO_64:
212
+ if (fp_access_check(s)) {
213
+ TCGv_i64 t0 = read_fp_dreg(s, a->rn);
214
+ TCGv_i64 t1 = tcg_temp_new_i64();
215
+
216
+ read_vec_element(s, t1, a->rm, a->idx, MO_64);
217
+ f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
218
+ write_fp_dreg(s, a->rd, t0);
219
+ }
220
+ break;
221
+ case MO_32:
222
+ if (fp_access_check(s)) {
223
+ TCGv_i32 t0 = read_fp_sreg(s, a->rn);
224
+ TCGv_i32 t1 = tcg_temp_new_i32();
225
+
226
+ read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
227
+ f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
228
+ write_fp_sreg(s, a->rd, t0);
229
+ }
230
+ break;
231
+ case MO_16:
232
+ if (!dc_isar_feature(aa64_fp16, s)) {
233
+ return false;
234
+ }
235
+ if (fp_access_check(s)) {
236
+ TCGv_i32 t0 = read_fp_hreg(s, a->rn);
237
+ TCGv_i32 t1 = tcg_temp_new_i32();
238
+
239
+ read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
240
+ f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
241
+ write_fp_sreg(s, a->rd, t0);
242
+ }
243
+ break;
244
+ default:
245
+ g_assert_not_reached();
246
+ }
247
+ return true;
184
+}
248
+}
185
+
249
+
186
+static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
250
+TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx)
251
+
252
+static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
253
+ gen_helper_gvec_3_ptr * const fns[3])
187
+{
254
+{
188
+ return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
255
+ MemOp esz = a->esz;
256
+
257
+ switch (esz) {
258
+ case MO_64:
259
+ if (!a->q) {
260
+ return false;
261
+ }
262
+ break;
263
+ case MO_32:
264
+ break;
265
+ case MO_16:
266
+ if (!dc_isar_feature(aa64_fp16, s)) {
267
+ return false;
268
+ }
269
+ break;
270
+ default:
271
+ g_assert_not_reached();
272
+ }
273
+ if (fp_access_check(s)) {
274
+ gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
275
+ esz == MO_16, a->idx, fns[esz - 1]);
276
+ }
277
+ return true;
189
+}
278
+}
190
+
279
+
191
+static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
280
+static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = {
192
+{
281
+ gen_helper_gvec_fmulx_idx_h,
193
+ return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
282
+ gen_helper_gvec_fmulx_idx_s,
194
+}
283
+ gen_helper_gvec_fmulx_idx_d,
195
+
284
+};
196
+static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
285
+TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
197
+{
286
+
198
+ gen_helper_mve_sqshll(r, cpu_env, n, tcg_constant_i32(shift));
287
+
199
+}
288
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
200
+
289
* Note that it is the caller's responsibility to ensure that the
201
+static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
290
* shift amount is in range (ie 0..31 or 0..63) and provide the ARM
202
+{
291
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
203
+ return do_mve_shl_ri(s, a, gen_mve_sqshll);
292
case 0x1a: /* FADD */
204
+}
293
gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
205
+
294
break;
206
+static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
295
- case 0x1b: /* FMULX */
207
+{
296
- gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
208
+ gen_helper_mve_uqshll(r, cpu_env, n, tcg_constant_i32(shift));
297
- break;
209
+}
298
case 0x1c: /* FCMEQ */
210
+
299
gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
211
+static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
300
break;
212
+{
301
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
213
+ return do_mve_shl_ri(s, a, gen_mve_uqshll);
302
gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
214
+}
303
break;
215
+
304
default:
216
+static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
305
+ case 0x1b: /* FMULX */
217
+{
306
g_assert_not_reached();
218
+ return do_mve_shl_ri(s, a, gen_srshr64_i64);
307
}
219
+}
308
220
+
309
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
221
+static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
310
case 0x1a: /* FADD */
222
+{
311
gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
223
+ return do_mve_shl_ri(s, a, gen_urshr64_i64);
312
break;
224
+}
313
- case 0x1b: /* FMULX */
225
+
314
- gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
315
- break;
316
case 0x1c: /* FCMEQ */
317
gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
318
break;
319
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
320
gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
321
break;
322
default:
323
+ case 0x1b: /* FMULX */
324
g_assert_not_reached();
325
}
326
327
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
328
/* Floating point: U, size[1] and opcode indicate operation */
329
int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
330
switch (fpopcode) {
331
- case 0x1b: /* FMULX */
332
case 0x1f: /* FRECPS */
333
case 0x3f: /* FRSQRTS */
334
case 0x5d: /* FACGE */
335
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
336
case 0x7a: /* FABD */
337
break;
338
default:
339
+ case 0x1b: /* FMULX */
340
unallocated_encoding(s);
341
return;
342
}
343
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
344
TCGv_i32 tcg_res;
345
346
switch (fpopcode) {
347
- case 0x03: /* FMULX */
348
case 0x04: /* FCMEQ (reg) */
349
case 0x07: /* FRECPS */
350
case 0x0f: /* FRSQRTS */
351
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
352
case 0x1d: /* FACGT */
353
break;
354
default:
355
+ case 0x03: /* FMULX */
356
unallocated_encoding(s);
357
return;
358
}
359
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
360
tcg_res = tcg_temp_new_i32();
361
362
switch (fpopcode) {
363
- case 0x03: /* FMULX */
364
- gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
365
- break;
366
case 0x04: /* FCMEQ (reg) */
367
gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
368
break;
369
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
370
gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
371
break;
372
default:
373
+ case 0x03: /* FMULX */
374
g_assert_not_reached();
375
}
376
377
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
378
handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
379
rn, rm, rd);
380
return;
381
- case 0x1b: /* FMULX */
382
case 0x1f: /* FRECPS */
383
case 0x3f: /* FRSQRTS */
384
case 0x5d: /* FACGE */
385
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
386
return;
387
388
default:
389
+ case 0x1b: /* FMULX */
390
unallocated_encoding(s);
391
return;
392
}
393
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
394
case 0x0: /* FMAXNM */
395
case 0x1: /* FMLA */
396
case 0x2: /* FADD */
397
- case 0x3: /* FMULX */
398
case 0x4: /* FCMEQ */
399
case 0x6: /* FMAX */
400
case 0x7: /* FRECPS */
401
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
402
pairwise = true;
403
break;
404
default:
405
+ case 0x3: /* FMULX */
406
unallocated_encoding(s);
407
return;
408
}
409
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
410
case 0x2: /* FADD */
411
gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
412
break;
413
- case 0x3: /* FMULX */
414
- gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
415
- break;
416
case 0x4: /* FCMEQ */
417
gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
418
break;
419
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
420
gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
421
break;
422
default:
423
+ case 0x3: /* FMULX */
424
g_assert_not_reached();
425
}
426
427
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
428
case 0x01: /* FMLA */
429
case 0x05: /* FMLS */
430
case 0x09: /* FMUL */
431
- case 0x19: /* FMULX */
432
is_fp = 1;
433
break;
434
case 0x1d: /* SQRDMLAH */
435
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
436
/* is_fp, but we pass tcg_env not fp_status. */
437
break;
438
default:
439
+ case 0x19: /* FMULX */
440
unallocated_encoding(s);
441
return;
442
}
443
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
444
case 0x09: /* FMUL */
445
gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
446
break;
447
- case 0x19: /* FMULX */
448
- gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
449
- break;
450
default:
451
+ case 0x19: /* FMULX */
452
g_assert_not_reached();
453
}
454
455
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
456
g_assert_not_reached();
457
}
458
break;
459
- case 0x19: /* FMULX */
460
- switch (size) {
461
- case 1:
462
- if (is_scalar) {
463
- gen_helper_advsimd_mulxh(tcg_res, tcg_op,
464
- tcg_idx, fpst);
465
- } else {
466
- gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
467
- tcg_idx, fpst);
468
- }
469
- break;
470
- case 2:
471
- gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
472
- break;
473
- default:
474
- g_assert_not_reached();
475
- }
476
- break;
477
case 0x0c: /* SQDMULH */
478
if (size == 1) {
479
gen_helper_neon_qdmulh_s16(tcg_res, tcg_env,
480
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
481
}
482
break;
483
default:
484
+ case 0x19: /* FMULX */
485
g_assert_not_reached();
486
}
487
488
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
489
index XXXXXXX..XXXXXXX 100644
490
--- a/target/arm/tcg/vec_helper.c
491
+++ b/target/arm/tcg/vec_helper.c
492
@@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_rsqrts_nf_h, float16_rsqrts_nf, float16)
493
DO_3OP(gvec_rsqrts_nf_s, float32_rsqrts_nf, float32)
494
495
#ifdef TARGET_AARCH64
496
+DO_3OP(gvec_fmulx_h, helper_advsimd_mulxh, float16)
497
+DO_3OP(gvec_fmulx_s, helper_vfp_mulxs, float32)
498
+DO_3OP(gvec_fmulx_d, helper_vfp_mulxd, float64)
499
500
DO_3OP(gvec_recps_h, helper_recpsf_f16, float16)
501
DO_3OP(gvec_recps_s, helper_recpsf_f32, float32)
502
@@ -XXX,XX +XXX,XX @@ DO_MLA_IDX(gvec_mls_idx_d, uint64_t, -, H8)
503
504
#undef DO_MLA_IDX
505
506
-#define DO_FMUL_IDX(NAME, ADD, TYPE, H) \
507
+#define DO_FMUL_IDX(NAME, ADD, MUL, TYPE, H) \
508
void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
509
{ \
510
intptr_t i, j, oprsz = simd_oprsz(desc); \
511
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
512
for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
513
TYPE mm = m[H(i + idx)]; \
514
for (j = 0; j < segment; j++) { \
515
- d[i + j] = TYPE##_##ADD(d[i + j], \
516
- TYPE##_mul(n[i + j], mm, stat), stat); \
517
+ d[i + j] = ADD(d[i + j], MUL(n[i + j], mm, stat), stat); \
518
} \
519
} \
520
clear_tail(d, oprsz, simd_maxsz(desc)); \
521
}
522
523
-#define float16_nop(N, M, S) (M)
524
-#define float32_nop(N, M, S) (M)
525
-#define float64_nop(N, M, S) (M)
526
+#define nop(N, M, S) (M)
527
528
-DO_FMUL_IDX(gvec_fmul_idx_h, nop, float16, H2)
529
-DO_FMUL_IDX(gvec_fmul_idx_s, nop, float32, H4)
530
-DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64, H8)
531
+DO_FMUL_IDX(gvec_fmul_idx_h, nop, float16_mul, float16, H2)
532
+DO_FMUL_IDX(gvec_fmul_idx_s, nop, float32_mul, float32, H4)
533
+DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64_mul, float64, H8)
534
+
535
+#ifdef TARGET_AARCH64
536
+
537
+DO_FMUL_IDX(gvec_fmulx_idx_h, nop, helper_advsimd_mulxh, float16, H2)
538
+DO_FMUL_IDX(gvec_fmulx_idx_s, nop, helper_vfp_mulxs, float32, H4)
539
+DO_FMUL_IDX(gvec_fmulx_idx_d, nop, helper_vfp_mulxd, float64, H8)
540
+
541
+#endif
542
+
543
+#undef nop
544
226
/*
545
/*
227
* Multiply and multiply accumulate
546
* Non-fused multiply-accumulate operations, for Neon. NB that unlike
547
* the fused ops below they assume accumulate both from and into Vd.
228
*/
548
*/
549
-DO_FMUL_IDX(gvec_fmla_nf_idx_h, add, float16, H2)
550
-DO_FMUL_IDX(gvec_fmla_nf_idx_s, add, float32, H4)
551
-DO_FMUL_IDX(gvec_fmls_nf_idx_h, sub, float16, H2)
552
-DO_FMUL_IDX(gvec_fmls_nf_idx_s, sub, float32, H4)
553
+DO_FMUL_IDX(gvec_fmla_nf_idx_h, float16_add, float16_mul, float16, H2)
554
+DO_FMUL_IDX(gvec_fmla_nf_idx_s, float32_add, float32_mul, float32, H4)
555
+DO_FMUL_IDX(gvec_fmls_nf_idx_h, float16_sub, float16_mul, float16, H2)
556
+DO_FMUL_IDX(gvec_fmls_nf_idx_s, float32_sub, float32_mul, float32, H4)
557
558
-#undef float16_nop
559
-#undef float32_nop
560
-#undef float64_nop
561
#undef DO_FMUL_IDX
562
563
#define DO_FMLA_IDX(NAME, TYPE, H) \
229
--
564
--
230
2.20.1
565
2.34.1
231
232
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20240524232121.284515-20-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/tcg/helper-a64.h | 4 +
9
target/arm/tcg/translate.h | 5 +
10
target/arm/tcg/a64.decode | 27 +++++
11
target/arm/tcg/translate-a64.c | 205 +++++++++++++++++----------------
12
target/arm/tcg/vec_helper.c | 4 +
13
5 files changed, 143 insertions(+), 102 deletions(-)
14
15
diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/tcg/helper-a64.h
18
+++ b/target/arm/tcg/helper-a64.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(cpyfp, void, env, i32, i32, i32)
20
DEF_HELPER_4(cpyfm, void, env, i32, i32, i32)
21
DEF_HELPER_4(cpyfe, void, env, i32, i32, i32)
22
23
+DEF_HELPER_FLAGS_5(gvec_fdiv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
24
+DEF_HELPER_FLAGS_5(gvec_fdiv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
25
+DEF_HELPER_FLAGS_5(gvec_fdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
26
+
27
DEF_HELPER_FLAGS_5(gvec_fmulx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
28
DEF_HELPER_FLAGS_5(gvec_fmulx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
29
DEF_HELPER_FLAGS_5(gvec_fmulx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
30
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/tcg/translate.h
33
+++ b/target/arm/tcg/translate.h
34
@@ -XXX,XX +XXX,XX @@ static inline int shl_12(DisasContext *s, int x)
35
return x << 12;
36
}
37
38
+static inline int xor_2(DisasContext *s, int x)
39
+{
40
+ return x ^ 2;
41
+}
42
+
43
static inline int neon_3same_fp_size(DisasContext *s, int x)
44
{
45
/* Convert 0==fp32, 1==fp16 into a MO_* value */
46
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/tcg/a64.decode
49
+++ b/target/arm/tcg/a64.decode
50
@@ -XXX,XX +XXX,XX @@
51
52
%rd 0:5
53
%esz_sd 22:1 !function=plus_2
54
+%esz_hsd 22:2 !function=xor_2
55
%hl 11:1 21:1
56
%hlm 11:1 20:2
57
58
@@ -XXX,XX +XXX,XX @@
59
60
@rrr_h ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=1
61
@rrr_sd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_sd
62
+@rrr_hsd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_hsd
63
64
@rrx_h ........ .. .. rm:4 .... . . rn:5 rd:5 &rrx_e esz=1 idx=%hlm
65
@rrx_s ........ .. . rm:5 .... . . rn:5 rd:5 &rrx_e esz=2 idx=%hl
66
@@ -XXX,XX +XXX,XX @@ INS_element 0 1 10 1110 000 di:5 0 si:4 1 rn:5 rd:5
67
68
### Advanced SIMD scalar three same
69
70
+FADD_s 0001 1110 ..1 ..... 0010 10 ..... ..... @rrr_hsd
71
+FSUB_s 0001 1110 ..1 ..... 0011 10 ..... ..... @rrr_hsd
72
+FDIV_s 0001 1110 ..1 ..... 0001 10 ..... ..... @rrr_hsd
73
+FMUL_s 0001 1110 ..1 ..... 0000 10 ..... ..... @rrr_hsd
74
+
75
FMULX_s 0101 1110 010 ..... 00011 1 ..... ..... @rrr_h
76
FMULX_s 0101 1110 0.1 ..... 11011 1 ..... ..... @rrr_sd
77
78
### Advanced SIMD three same
79
80
+FADD_v 0.00 1110 010 ..... 00010 1 ..... ..... @qrrr_h
81
+FADD_v 0.00 1110 0.1 ..... 11010 1 ..... ..... @qrrr_sd
82
+
83
+FSUB_v 0.00 1110 110 ..... 00010 1 ..... ..... @qrrr_h
84
+FSUB_v 0.00 1110 1.1 ..... 11010 1 ..... ..... @qrrr_sd
85
+
86
+FDIV_v 0.10 1110 010 ..... 00111 1 ..... ..... @qrrr_h
87
+FDIV_v 0.10 1110 0.1 ..... 11111 1 ..... ..... @qrrr_sd
88
+
89
+FMUL_v 0.10 1110 010 ..... 00011 1 ..... ..... @qrrr_h
90
+FMUL_v 0.10 1110 0.1 ..... 11011 1 ..... ..... @qrrr_sd
91
+
92
FMULX_v 0.00 1110 010 ..... 00011 1 ..... ..... @qrrr_h
93
FMULX_v 0.00 1110 0.1 ..... 11011 1 ..... ..... @qrrr_sd
94
95
### Advanced SIMD scalar x indexed element
96
97
+FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
98
+FMUL_si 0101 1111 10 . ..... 1001 . 0 ..... ..... @rrx_s
99
+FMUL_si 0101 1111 11 0 ..... 1001 . 0 ..... ..... @rrx_d
100
+
101
FMULX_si 0111 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
102
FMULX_si 0111 1111 10 . ..... 1001 . 0 ..... ..... @rrx_s
103
FMULX_si 0111 1111 11 0 ..... 1001 . 0 ..... ..... @rrx_d
104
105
### Advanced SIMD vector x indexed element
106
107
+FMUL_vi 0.00 1111 00 .. .... 1001 . 0 ..... ..... @qrrx_h
108
+FMUL_vi 0.00 1111 10 . ..... 1001 . 0 ..... ..... @qrrx_s
109
+FMUL_vi 0.00 1111 11 0 ..... 1001 . 0 ..... ..... @qrrx_d
110
+
111
FMULX_vi 0.10 1111 00 .. .... 1001 . 0 ..... ..... @qrrx_h
112
FMULX_vi 0.10 1111 10 . ..... 1001 . 0 ..... ..... @qrrx_s
113
FMULX_vi 0.10 1111 11 0 ..... 1001 . 0 ..... ..... @qrrx_d
114
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
115
index XXXXXXX..XXXXXXX 100644
116
--- a/target/arm/tcg/translate-a64.c
117
+++ b/target/arm/tcg/translate-a64.c
118
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
119
return true;
120
}
121
122
+static const FPScalar f_scalar_fadd = {
123
+ gen_helper_vfp_addh,
124
+ gen_helper_vfp_adds,
125
+ gen_helper_vfp_addd,
126
+};
127
+TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd)
128
+
129
+static const FPScalar f_scalar_fsub = {
130
+ gen_helper_vfp_subh,
131
+ gen_helper_vfp_subs,
132
+ gen_helper_vfp_subd,
133
+};
134
+TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub)
135
+
136
+static const FPScalar f_scalar_fdiv = {
137
+ gen_helper_vfp_divh,
138
+ gen_helper_vfp_divs,
139
+ gen_helper_vfp_divd,
140
+};
141
+TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv)
142
+
143
+static const FPScalar f_scalar_fmul = {
144
+ gen_helper_vfp_mulh,
145
+ gen_helper_vfp_muls,
146
+ gen_helper_vfp_muld,
147
+};
148
+TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul)
149
+
150
static const FPScalar f_scalar_fmulx = {
151
gen_helper_advsimd_mulxh,
152
gen_helper_vfp_mulxs,
153
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a,
154
return true;
155
}
156
157
+static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = {
158
+ gen_helper_gvec_fadd_h,
159
+ gen_helper_gvec_fadd_s,
160
+ gen_helper_gvec_fadd_d,
161
+};
162
+TRANS(FADD_v, do_fp3_vector, a, f_vector_fadd)
163
+
164
+static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = {
165
+ gen_helper_gvec_fsub_h,
166
+ gen_helper_gvec_fsub_s,
167
+ gen_helper_gvec_fsub_d,
168
+};
169
+TRANS(FSUB_v, do_fp3_vector, a, f_vector_fsub)
170
+
171
+static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = {
172
+ gen_helper_gvec_fdiv_h,
173
+ gen_helper_gvec_fdiv_s,
174
+ gen_helper_gvec_fdiv_d,
175
+};
176
+TRANS(FDIV_v, do_fp3_vector, a, f_vector_fdiv)
177
+
178
+static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = {
179
+ gen_helper_gvec_fmul_h,
180
+ gen_helper_gvec_fmul_s,
181
+ gen_helper_gvec_fmul_d,
182
+};
183
+TRANS(FMUL_v, do_fp3_vector, a, f_vector_fmul)
184
+
185
static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
186
gen_helper_gvec_fmulx_h,
187
gen_helper_gvec_fmulx_s,
188
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
189
return true;
190
}
191
192
+TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul)
193
TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx)
194
195
static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
196
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
197
return true;
198
}
199
200
+static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = {
201
+ gen_helper_gvec_fmul_idx_h,
202
+ gen_helper_gvec_fmul_idx_s,
203
+ gen_helper_gvec_fmul_idx_d,
204
+};
205
+TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul)
206
+
207
static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = {
208
gen_helper_gvec_fmulx_idx_h,
209
gen_helper_gvec_fmulx_idx_s,
210
@@ -XXX,XX +XXX,XX @@ static void handle_fp_2src_single(DisasContext *s, int opcode,
211
tcg_op2 = read_fp_sreg(s, rm);
212
213
switch (opcode) {
214
- case 0x0: /* FMUL */
215
- gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
216
- break;
217
- case 0x1: /* FDIV */
218
- gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
219
- break;
220
- case 0x2: /* FADD */
221
- gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
222
- break;
223
- case 0x3: /* FSUB */
224
- gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
225
- break;
226
case 0x4: /* FMAX */
227
gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
228
break;
229
@@ -XXX,XX +XXX,XX @@ static void handle_fp_2src_single(DisasContext *s, int opcode,
230
gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
231
gen_helper_vfp_negs(tcg_res, tcg_res);
232
break;
233
+ default:
234
+ case 0x0: /* FMUL */
235
+ case 0x1: /* FDIV */
236
+ case 0x2: /* FADD */
237
+ case 0x3: /* FSUB */
238
+ g_assert_not_reached();
239
}
240
241
write_fp_sreg(s, rd, tcg_res);
242
@@ -XXX,XX +XXX,XX @@ static void handle_fp_2src_double(DisasContext *s, int opcode,
243
tcg_op2 = read_fp_dreg(s, rm);
244
245
switch (opcode) {
246
- case 0x0: /* FMUL */
247
- gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
248
- break;
249
- case 0x1: /* FDIV */
250
- gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
251
- break;
252
- case 0x2: /* FADD */
253
- gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
254
- break;
255
- case 0x3: /* FSUB */
256
- gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
257
- break;
258
case 0x4: /* FMAX */
259
gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
260
break;
261
@@ -XXX,XX +XXX,XX @@ static void handle_fp_2src_double(DisasContext *s, int opcode,
262
gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
263
gen_helper_vfp_negd(tcg_res, tcg_res);
264
break;
265
+ default:
266
+ case 0x0: /* FMUL */
267
+ case 0x1: /* FDIV */
268
+ case 0x2: /* FADD */
269
+ case 0x3: /* FSUB */
270
+ g_assert_not_reached();
271
}
272
273
write_fp_dreg(s, rd, tcg_res);
274
@@ -XXX,XX +XXX,XX @@ static void handle_fp_2src_half(DisasContext *s, int opcode,
275
tcg_op2 = read_fp_hreg(s, rm);
276
277
switch (opcode) {
278
- case 0x0: /* FMUL */
279
- gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
280
- break;
281
- case 0x1: /* FDIV */
282
- gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
283
- break;
284
- case 0x2: /* FADD */
285
- gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
286
- break;
287
- case 0x3: /* FSUB */
288
- gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
289
- break;
290
case 0x4: /* FMAX */
291
gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
292
break;
293
@@ -XXX,XX +XXX,XX @@ static void handle_fp_2src_half(DisasContext *s, int opcode,
294
tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
295
break;
296
default:
297
+ case 0x0: /* FMUL */
298
+ case 0x1: /* FDIV */
299
+ case 0x2: /* FADD */
300
+ case 0x3: /* FSUB */
301
g_assert_not_reached();
302
}
303
304
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
305
case 0x18: /* FMAXNM */
306
gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
307
break;
308
- case 0x1a: /* FADD */
309
- gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
310
- break;
311
case 0x1c: /* FCMEQ */
312
gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
313
break;
314
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
315
case 0x38: /* FMINNM */
316
gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
317
break;
318
- case 0x3a: /* FSUB */
319
- gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
320
- break;
321
case 0x3e: /* FMIN */
322
gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
323
break;
324
case 0x3f: /* FRSQRTS */
325
gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
326
break;
327
- case 0x5b: /* FMUL */
328
- gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
329
- break;
330
case 0x5c: /* FCMGE */
331
gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
332
break;
333
case 0x5d: /* FACGE */
334
gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
335
break;
336
- case 0x5f: /* FDIV */
337
- gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
338
- break;
339
case 0x7a: /* FABD */
340
gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
341
gen_helper_vfp_absd(tcg_res, tcg_res);
342
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
343
gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
344
break;
345
default:
346
+ case 0x1a: /* FADD */
347
case 0x1b: /* FMULX */
348
+ case 0x3a: /* FSUB */
349
+ case 0x5b: /* FMUL */
350
+ case 0x5f: /* FDIV */
351
g_assert_not_reached();
352
}
353
354
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
355
gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
356
tcg_res, fpst);
357
break;
358
- case 0x1a: /* FADD */
359
- gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
360
- break;
361
case 0x1c: /* FCMEQ */
362
gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
363
break;
364
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
365
case 0x38: /* FMINNM */
366
gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
367
break;
368
- case 0x3a: /* FSUB */
369
- gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
370
- break;
371
case 0x3e: /* FMIN */
372
gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
373
break;
374
case 0x3f: /* FRSQRTS */
375
gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
376
break;
377
- case 0x5b: /* FMUL */
378
- gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
379
- break;
380
case 0x5c: /* FCMGE */
381
gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
382
break;
383
case 0x5d: /* FACGE */
384
gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
385
break;
386
- case 0x5f: /* FDIV */
387
- gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
388
- break;
389
case 0x7a: /* FABD */
390
gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
391
gen_helper_vfp_abss(tcg_res, tcg_res);
392
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
393
gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
394
break;
395
default:
396
+ case 0x1a: /* FADD */
397
case 0x1b: /* FMULX */
398
+ case 0x3a: /* FSUB */
399
+ case 0x5b: /* FMUL */
400
+ case 0x5f: /* FDIV */
401
g_assert_not_reached();
402
}
403
404
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
405
case 0x19: /* FMLA */
406
case 0x39: /* FMLS */
407
case 0x18: /* FMAXNM */
408
- case 0x1a: /* FADD */
409
case 0x1c: /* FCMEQ */
410
case 0x1e: /* FMAX */
411
case 0x38: /* FMINNM */
412
- case 0x3a: /* FSUB */
413
case 0x3e: /* FMIN */
414
- case 0x5b: /* FMUL */
415
case 0x5c: /* FCMGE */
416
- case 0x5f: /* FDIV */
417
case 0x7a: /* FABD */
418
case 0x7c: /* FCMGT */
419
if (!fp_access_check(s)) {
420
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
421
return;
422
423
default:
424
+ case 0x1a: /* FADD */
425
case 0x1b: /* FMULX */
426
+ case 0x3a: /* FSUB */
427
+ case 0x5b: /* FMUL */
428
+ case 0x5f: /* FDIV */
429
unallocated_encoding(s);
430
return;
431
}
432
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
433
switch (fpopcode) {
434
case 0x0: /* FMAXNM */
435
case 0x1: /* FMLA */
436
- case 0x2: /* FADD */
437
case 0x4: /* FCMEQ */
438
case 0x6: /* FMAX */
439
case 0x7: /* FRECPS */
440
case 0x8: /* FMINNM */
441
case 0x9: /* FMLS */
442
- case 0xa: /* FSUB */
443
case 0xe: /* FMIN */
444
case 0xf: /* FRSQRTS */
445
- case 0x13: /* FMUL */
446
case 0x14: /* FCMGE */
447
case 0x15: /* FACGE */
448
- case 0x17: /* FDIV */
449
case 0x1a: /* FABD */
450
case 0x1c: /* FCMGT */
451
case 0x1d: /* FACGT */
452
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
453
pairwise = true;
454
break;
455
default:
456
+ case 0x2: /* FADD */
457
case 0x3: /* FMULX */
458
+ case 0xa: /* FSUB */
459
+ case 0x13: /* FMUL */
460
+ case 0x17: /* FDIV */
461
unallocated_encoding(s);
462
return;
463
}
464
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
465
gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
466
fpst);
467
break;
468
- case 0x2: /* FADD */
469
- gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
470
- break;
471
case 0x4: /* FCMEQ */
472
gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
473
break;
474
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
475
gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
476
fpst);
477
break;
478
- case 0xa: /* FSUB */
479
- gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
480
- break;
481
case 0xe: /* FMIN */
482
gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
483
break;
484
case 0xf: /* FRSQRTS */
485
gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
486
break;
487
- case 0x13: /* FMUL */
488
- gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
489
- break;
490
case 0x14: /* FCMGE */
491
gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
492
break;
493
case 0x15: /* FACGE */
494
gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
495
break;
496
- case 0x17: /* FDIV */
497
- gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
498
- break;
499
case 0x1a: /* FABD */
500
gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
501
tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
502
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
503
gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
504
break;
505
default:
506
+ case 0x2: /* FADD */
507
case 0x3: /* FMULX */
508
+ case 0xa: /* FSUB */
509
+ case 0x13: /* FMUL */
510
+ case 0x17: /* FDIV */
511
g_assert_not_reached();
512
}
513
514
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
515
break;
516
case 0x01: /* FMLA */
517
case 0x05: /* FMLS */
518
- case 0x09: /* FMUL */
519
is_fp = 1;
520
break;
521
case 0x1d: /* SQRDMLAH */
522
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
523
/* is_fp, but we pass tcg_env not fp_status. */
524
break;
525
default:
526
+ case 0x09: /* FMUL */
527
case 0x19: /* FMULX */
528
unallocated_encoding(s);
529
return;
530
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
531
read_vec_element(s, tcg_res, rd, pass, MO_64);
532
gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
533
break;
534
- case 0x09: /* FMUL */
535
- gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
536
- break;
537
default:
538
+ case 0x09: /* FMUL */
539
case 0x19: /* FMULX */
540
g_assert_not_reached();
541
}
542
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
543
g_assert_not_reached();
544
}
545
break;
546
- case 0x09: /* FMUL */
547
- switch (size) {
548
- case 1:
549
- if (is_scalar) {
550
- gen_helper_advsimd_mulh(tcg_res, tcg_op,
551
- tcg_idx, fpst);
552
- } else {
553
- gen_helper_advsimd_mul2h(tcg_res, tcg_op,
554
- tcg_idx, fpst);
555
- }
556
- break;
557
- case 2:
558
- gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
559
- break;
560
- default:
561
- g_assert_not_reached();
562
- }
563
- break;
564
case 0x0c: /* SQDMULH */
565
if (size == 1) {
566
gen_helper_neon_qdmulh_s16(tcg_res, tcg_env,
567
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
568
}
569
break;
570
default:
571
+ case 0x09: /* FMUL */
572
case 0x19: /* FMULX */
573
g_assert_not_reached();
574
}
575
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
576
index XXXXXXX..XXXXXXX 100644
577
--- a/target/arm/tcg/vec_helper.c
578
+++ b/target/arm/tcg/vec_helper.c
579
@@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_rsqrts_nf_h, float16_rsqrts_nf, float16)
580
DO_3OP(gvec_rsqrts_nf_s, float32_rsqrts_nf, float32)
581
582
#ifdef TARGET_AARCH64
583
+DO_3OP(gvec_fdiv_h, float16_div, float16)
584
+DO_3OP(gvec_fdiv_s, float32_div, float32)
585
+DO_3OP(gvec_fdiv_d, float64_div, float64)
586
+
587
DO_3OP(gvec_fmulx_h, helper_advsimd_mulxh, float16)
588
DO_3OP(gvec_fmulx_s, helper_vfp_mulxs, float32)
589
DO_3OP(gvec_fmulx_d, helper_vfp_mulxd, float64)
590
--
591
2.34.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20240524232121.284515-21-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/helper.h | 4 +
9
target/arm/tcg/a64.decode | 17 ++++
10
target/arm/tcg/translate-a64.c | 168 +++++++++++++++++----------------
11
target/arm/tcg/vec_helper.c | 4 +
12
4 files changed, 113 insertions(+), 80 deletions(-)
13
14
diff --git a/target/arm/helper.h b/target/arm/helper.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper.h
17
+++ b/target/arm/helper.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_facgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
19
20
DEF_HELPER_FLAGS_5(gvec_fmax_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
21
DEF_HELPER_FLAGS_5(gvec_fmax_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
22
+DEF_HELPER_FLAGS_5(gvec_fmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
23
24
DEF_HELPER_FLAGS_5(gvec_fmin_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
25
DEF_HELPER_FLAGS_5(gvec_fmin_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
26
+DEF_HELPER_FLAGS_5(gvec_fmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
27
28
DEF_HELPER_FLAGS_5(gvec_fmaxnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
29
DEF_HELPER_FLAGS_5(gvec_fmaxnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
30
+DEF_HELPER_FLAGS_5(gvec_fmaxnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
31
32
DEF_HELPER_FLAGS_5(gvec_fminnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
33
DEF_HELPER_FLAGS_5(gvec_fminnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_5(gvec_fminnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
35
36
DEF_HELPER_FLAGS_5(gvec_recps_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
37
DEF_HELPER_FLAGS_5(gvec_recps_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
38
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
39
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/tcg/a64.decode
41
+++ b/target/arm/tcg/a64.decode
42
@@ -XXX,XX +XXX,XX @@ FSUB_s 0001 1110 ..1 ..... 0011 10 ..... ..... @rrr_hsd
43
FDIV_s 0001 1110 ..1 ..... 0001 10 ..... ..... @rrr_hsd
44
FMUL_s 0001 1110 ..1 ..... 0000 10 ..... ..... @rrr_hsd
45
46
+FMAX_s 0001 1110 ..1 ..... 0100 10 ..... ..... @rrr_hsd
47
+FMIN_s 0001 1110 ..1 ..... 0101 10 ..... ..... @rrr_hsd
48
+FMAXNM_s 0001 1110 ..1 ..... 0110 10 ..... ..... @rrr_hsd
49
+FMINNM_s 0001 1110 ..1 ..... 0111 10 ..... ..... @rrr_hsd
50
+
51
FMULX_s 0101 1110 010 ..... 00011 1 ..... ..... @rrr_h
52
FMULX_s 0101 1110 0.1 ..... 11011 1 ..... ..... @rrr_sd
53
54
@@ -XXX,XX +XXX,XX @@ FDIV_v 0.10 1110 0.1 ..... 11111 1 ..... ..... @qrrr_sd
55
FMUL_v 0.10 1110 010 ..... 00011 1 ..... ..... @qrrr_h
56
FMUL_v 0.10 1110 0.1 ..... 11011 1 ..... ..... @qrrr_sd
57
58
+FMAX_v 0.00 1110 010 ..... 00110 1 ..... ..... @qrrr_h
59
+FMAX_v 0.00 1110 0.1 ..... 11110 1 ..... ..... @qrrr_sd
60
+
61
+FMIN_v 0.00 1110 110 ..... 00110 1 ..... ..... @qrrr_h
62
+FMIN_v 0.00 1110 1.1 ..... 11110 1 ..... ..... @qrrr_sd
63
+
64
+FMAXNM_v 0.00 1110 010 ..... 00000 1 ..... ..... @qrrr_h
65
+FMAXNM_v 0.00 1110 0.1 ..... 11000 1 ..... ..... @qrrr_sd
66
+
67
+FMINNM_v 0.00 1110 110 ..... 00000 1 ..... ..... @qrrr_h
68
+FMINNM_v 0.00 1110 1.1 ..... 11000 1 ..... ..... @qrrr_sd
69
+
70
FMULX_v 0.00 1110 010 ..... 00011 1 ..... ..... @qrrr_h
71
FMULX_v 0.00 1110 0.1 ..... 11011 1 ..... ..... @qrrr_sd
72
73
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/target/arm/tcg/translate-a64.c
76
+++ b/target/arm/tcg/translate-a64.c
77
@@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fmul = {
78
};
79
TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul)
80
81
+static const FPScalar f_scalar_fmax = {
82
+ gen_helper_advsimd_maxh,
83
+ gen_helper_vfp_maxs,
84
+ gen_helper_vfp_maxd,
85
+};
86
+TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax)
87
+
88
+static const FPScalar f_scalar_fmin = {
89
+ gen_helper_advsimd_minh,
90
+ gen_helper_vfp_mins,
91
+ gen_helper_vfp_mind,
92
+};
93
+TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin)
94
+
95
+static const FPScalar f_scalar_fmaxnm = {
96
+ gen_helper_advsimd_maxnumh,
97
+ gen_helper_vfp_maxnums,
98
+ gen_helper_vfp_maxnumd,
99
+};
100
+TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm)
101
+
102
+static const FPScalar f_scalar_fminnm = {
103
+ gen_helper_advsimd_minnumh,
104
+ gen_helper_vfp_minnums,
105
+ gen_helper_vfp_minnumd,
106
+};
107
+TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm)
108
+
109
static const FPScalar f_scalar_fmulx = {
110
gen_helper_advsimd_mulxh,
111
gen_helper_vfp_mulxs,
112
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = {
113
};
114
TRANS(FMUL_v, do_fp3_vector, a, f_vector_fmul)
115
116
+static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = {
117
+ gen_helper_gvec_fmax_h,
118
+ gen_helper_gvec_fmax_s,
119
+ gen_helper_gvec_fmax_d,
120
+};
121
+TRANS(FMAX_v, do_fp3_vector, a, f_vector_fmax)
122
+
123
+static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = {
124
+ gen_helper_gvec_fmin_h,
125
+ gen_helper_gvec_fmin_s,
126
+ gen_helper_gvec_fmin_d,
127
+};
128
+TRANS(FMIN_v, do_fp3_vector, a, f_vector_fmin)
129
+
130
+static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = {
131
+ gen_helper_gvec_fmaxnum_h,
132
+ gen_helper_gvec_fmaxnum_s,
133
+ gen_helper_gvec_fmaxnum_d,
134
+};
135
+TRANS(FMAXNM_v, do_fp3_vector, a, f_vector_fmaxnm)
136
+
137
+static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = {
138
+ gen_helper_gvec_fminnum_h,
139
+ gen_helper_gvec_fminnum_s,
140
+ gen_helper_gvec_fminnum_d,
141
+};
142
+TRANS(FMINNM_v, do_fp3_vector, a, f_vector_fminnm)
143
+
144
static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
145
gen_helper_gvec_fmulx_h,
146
gen_helper_gvec_fmulx_s,
147
@@ -XXX,XX +XXX,XX @@ static void handle_fp_2src_single(DisasContext *s, int opcode,
148
tcg_op2 = read_fp_sreg(s, rm);
149
150
switch (opcode) {
151
- case 0x4: /* FMAX */
152
- gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
153
- break;
154
- case 0x5: /* FMIN */
155
- gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
156
- break;
157
- case 0x6: /* FMAXNM */
158
- gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
159
- break;
160
- case 0x7: /* FMINNM */
161
- gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
162
- break;
163
case 0x8: /* FNMUL */
164
gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
165
gen_helper_vfp_negs(tcg_res, tcg_res);
166
@@ -XXX,XX +XXX,XX @@ static void handle_fp_2src_single(DisasContext *s, int opcode,
167
case 0x1: /* FDIV */
168
case 0x2: /* FADD */
169
case 0x3: /* FSUB */
170
+ case 0x4: /* FMAX */
171
+ case 0x5: /* FMIN */
172
+ case 0x6: /* FMAXNM */
173
+ case 0x7: /* FMINNM */
174
g_assert_not_reached();
175
}
176
177
@@ -XXX,XX +XXX,XX @@ static void handle_fp_2src_double(DisasContext *s, int opcode,
178
tcg_op2 = read_fp_dreg(s, rm);
179
180
switch (opcode) {
181
- case 0x4: /* FMAX */
182
- gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
183
- break;
184
- case 0x5: /* FMIN */
185
- gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
186
- break;
187
- case 0x6: /* FMAXNM */
188
- gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
189
- break;
190
- case 0x7: /* FMINNM */
191
- gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
192
- break;
193
case 0x8: /* FNMUL */
194
gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
195
gen_helper_vfp_negd(tcg_res, tcg_res);
196
@@ -XXX,XX +XXX,XX @@ static void handle_fp_2src_double(DisasContext *s, int opcode,
197
case 0x1: /* FDIV */
198
case 0x2: /* FADD */
199
case 0x3: /* FSUB */
200
+ case 0x4: /* FMAX */
201
+ case 0x5: /* FMIN */
202
+ case 0x6: /* FMAXNM */
203
+ case 0x7: /* FMINNM */
204
g_assert_not_reached();
205
}
206
207
@@ -XXX,XX +XXX,XX @@ static void handle_fp_2src_half(DisasContext *s, int opcode,
208
tcg_op2 = read_fp_hreg(s, rm);
209
210
switch (opcode) {
211
- case 0x4: /* FMAX */
212
- gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
213
- break;
214
- case 0x5: /* FMIN */
215
- gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
216
- break;
217
- case 0x6: /* FMAXNM */
218
- gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
219
- break;
220
- case 0x7: /* FMINNM */
221
- gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
222
- break;
223
case 0x8: /* FNMUL */
224
gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
225
tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
226
@@ -XXX,XX +XXX,XX @@ static void handle_fp_2src_half(DisasContext *s, int opcode,
227
case 0x1: /* FDIV */
228
case 0x2: /* FADD */
229
case 0x3: /* FSUB */
230
+ case 0x4: /* FMAX */
231
+ case 0x5: /* FMIN */
232
+ case 0x6: /* FMAXNM */
233
+ case 0x7: /* FMINNM */
234
g_assert_not_reached();
235
}
236
237
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
238
gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
239
tcg_res, fpst);
240
break;
241
- case 0x18: /* FMAXNM */
242
- gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
243
- break;
244
case 0x1c: /* FCMEQ */
245
gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
246
break;
247
- case 0x1e: /* FMAX */
248
- gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
249
- break;
250
case 0x1f: /* FRECPS */
251
gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
252
break;
253
- case 0x38: /* FMINNM */
254
- gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
255
- break;
256
- case 0x3e: /* FMIN */
257
- gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
258
- break;
259
case 0x3f: /* FRSQRTS */
260
gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
261
break;
262
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
263
gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
264
break;
265
default:
266
+ case 0x18: /* FMAXNM */
267
case 0x1a: /* FADD */
268
case 0x1b: /* FMULX */
269
+ case 0x1e: /* FMAX */
270
+ case 0x38: /* FMINNM */
271
case 0x3a: /* FSUB */
272
+ case 0x3e: /* FMIN */
273
case 0x5b: /* FMUL */
274
case 0x5f: /* FDIV */
275
g_assert_not_reached();
276
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
277
case 0x1c: /* FCMEQ */
278
gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
279
break;
280
- case 0x1e: /* FMAX */
281
- gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
282
- break;
283
case 0x1f: /* FRECPS */
284
gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
285
break;
286
- case 0x18: /* FMAXNM */
287
- gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
288
- break;
289
- case 0x38: /* FMINNM */
290
- gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
291
- break;
292
- case 0x3e: /* FMIN */
293
- gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
294
- break;
295
case 0x3f: /* FRSQRTS */
296
gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
297
break;
298
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
299
gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
300
break;
301
default:
302
+ case 0x18: /* FMAXNM */
303
case 0x1a: /* FADD */
304
case 0x1b: /* FMULX */
305
+ case 0x1e: /* FMAX */
306
+ case 0x38: /* FMINNM */
307
case 0x3a: /* FSUB */
308
+ case 0x3e: /* FMIN */
309
case 0x5b: /* FMUL */
310
case 0x5f: /* FDIV */
311
g_assert_not_reached();
312
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
313
case 0x7d: /* FACGT */
314
case 0x19: /* FMLA */
315
case 0x39: /* FMLS */
316
- case 0x18: /* FMAXNM */
317
case 0x1c: /* FCMEQ */
318
- case 0x1e: /* FMAX */
319
- case 0x38: /* FMINNM */
320
- case 0x3e: /* FMIN */
321
case 0x5c: /* FCMGE */
322
case 0x7a: /* FABD */
323
case 0x7c: /* FCMGT */
324
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
325
return;
326
327
default:
328
+ case 0x18: /* FMAXNM */
329
case 0x1a: /* FADD */
330
case 0x1b: /* FMULX */
331
+ case 0x1e: /* FMAX */
332
+ case 0x38: /* FMINNM */
333
case 0x3a: /* FSUB */
334
+ case 0x3e: /* FMIN */
335
case 0x5b: /* FMUL */
336
case 0x5f: /* FDIV */
337
unallocated_encoding(s);
338
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
339
int pass;
340
341
switch (fpopcode) {
342
- case 0x0: /* FMAXNM */
343
case 0x1: /* FMLA */
344
case 0x4: /* FCMEQ */
345
- case 0x6: /* FMAX */
346
case 0x7: /* FRECPS */
347
- case 0x8: /* FMINNM */
348
case 0x9: /* FMLS */
349
- case 0xe: /* FMIN */
350
case 0xf: /* FRSQRTS */
351
case 0x14: /* FCMGE */
352
case 0x15: /* FACGE */
353
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
354
pairwise = true;
355
break;
356
default:
357
+ case 0x0: /* FMAXNM */
358
case 0x2: /* FADD */
359
case 0x3: /* FMULX */
360
+ case 0x6: /* FMAX */
361
+ case 0x8: /* FMINNM */
362
case 0xa: /* FSUB */
363
+ case 0xe: /* FMIN */
364
case 0x13: /* FMUL */
365
case 0x17: /* FDIV */
366
unallocated_encoding(s);
367
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
368
read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
369
370
switch (fpopcode) {
371
- case 0x0: /* FMAXNM */
372
- gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
373
- break;
374
case 0x1: /* FMLA */
375
read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
376
gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
377
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
378
case 0x4: /* FCMEQ */
379
gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
380
break;
381
- case 0x6: /* FMAX */
382
- gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
383
- break;
384
case 0x7: /* FRECPS */
385
gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
386
break;
387
- case 0x8: /* FMINNM */
388
- gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
389
- break;
390
case 0x9: /* FMLS */
391
/* As usual for ARM, separate negation for fused multiply-add */
392
tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
393
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
394
gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
395
fpst);
396
break;
397
- case 0xe: /* FMIN */
398
- gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
399
- break;
400
case 0xf: /* FRSQRTS */
401
gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
402
break;
403
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
404
gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
405
break;
406
default:
407
+ case 0x0: /* FMAXNM */
408
case 0x2: /* FADD */
409
case 0x3: /* FMULX */
410
+ case 0x6: /* FMAX */
411
+ case 0x8: /* FMINNM */
412
case 0xa: /* FSUB */
413
+ case 0xe: /* FMIN */
414
case 0x13: /* FMUL */
415
case 0x17: /* FDIV */
416
g_assert_not_reached();
417
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
418
index XXXXXXX..XXXXXXX 100644
419
--- a/target/arm/tcg/vec_helper.c
420
+++ b/target/arm/tcg/vec_helper.c
421
@@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_facgt_s, float32_acgt, float32)
422
423
DO_3OP(gvec_fmax_h, float16_max, float16)
424
DO_3OP(gvec_fmax_s, float32_max, float32)
425
+DO_3OP(gvec_fmax_d, float64_max, float64)
426
427
DO_3OP(gvec_fmin_h, float16_min, float16)
428
DO_3OP(gvec_fmin_s, float32_min, float32)
429
+DO_3OP(gvec_fmin_d, float64_min, float64)
430
431
DO_3OP(gvec_fmaxnum_h, float16_maxnum, float16)
432
DO_3OP(gvec_fmaxnum_s, float32_maxnum, float32)
433
+DO_3OP(gvec_fmaxnum_d, float64_maxnum, float64)
434
435
DO_3OP(gvec_fminnum_h, float16_minnum, float16)
436
DO_3OP(gvec_fminnum_s, float32_minnum, float32)
437
+DO_3OP(gvec_fminnum_d, float64_minnum, float64)
438
439
DO_3OP(gvec_recps_nf_h, float16_recps_nf, float16)
440
DO_3OP(gvec_recps_nf_s, float32_recps_nf, float32)
441
--
442
2.34.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Load and zero-extend float16 into a TCGv_i32 before
4
all scalar operations.
5
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Message-id: 20240524232121.284515-22-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/tcg/translate-vfp.c | 39 +++++++++++++++++++---------------
12
1 file changed, 22 insertions(+), 17 deletions(-)
13
14
diff --git a/target/arm/tcg/translate-vfp.c b/target/arm/tcg/translate-vfp.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/tcg/translate-vfp.c
17
+++ b/target/arm/tcg/translate-vfp.c
18
@@ -XXX,XX +XXX,XX @@ static inline void vfp_store_reg32(TCGv_i32 var, int reg)
19
tcg_gen_st_i32(var, tcg_env, vfp_reg_offset(false, reg));
20
}
21
22
+static inline void vfp_load_reg16(TCGv_i32 var, int reg)
23
+{
24
+ tcg_gen_ld16u_i32(var, tcg_env,
25
+ vfp_reg_offset(false, reg) + HOST_BIG_ENDIAN * 2);
26
+}
27
+
28
/*
29
* The imm8 encodes the sign bit, enough bits to represent an exponent in
30
* the range 01....1xx to 10....0xx, and the most significant 4 bits of
31
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
32
if (a->l) {
33
/* VFP to general purpose register */
34
tmp = tcg_temp_new_i32();
35
- vfp_load_reg32(tmp, a->vn);
36
- tcg_gen_andi_i32(tmp, tmp, 0xffff);
37
+ vfp_load_reg16(tmp, a->vn);
38
store_reg(s, a->rt, tmp);
39
} else {
40
/* general purpose register to VFP */
41
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
42
fd = tcg_temp_new_i32();
43
fpst = fpstatus_ptr(FPST_FPCR_F16);
44
45
- vfp_load_reg32(f0, vn);
46
- vfp_load_reg32(f1, vm);
47
+ vfp_load_reg16(f0, vn);
48
+ vfp_load_reg16(f1, vm);
49
50
if (reads_vd) {
51
- vfp_load_reg32(fd, vd);
52
+ vfp_load_reg16(fd, vd);
53
}
54
fn(fd, f0, f1, fpst);
55
vfp_store_reg32(fd, vd);
56
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
57
}
58
59
f0 = tcg_temp_new_i32();
60
- vfp_load_reg32(f0, vm);
61
+ vfp_load_reg16(f0, vm);
62
fn(f0, f0);
63
vfp_store_reg32(f0, vd);
64
65
@@ -XXX,XX +XXX,XX @@ static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
66
vm = tcg_temp_new_i32();
67
vd = tcg_temp_new_i32();
68
69
- vfp_load_reg32(vn, a->vn);
70
- vfp_load_reg32(vm, a->vm);
71
+ vfp_load_reg16(vn, a->vn);
72
+ vfp_load_reg16(vm, a->vm);
73
if (neg_n) {
74
/* VFNMS, VFMS */
75
gen_helper_vfp_negh(vn, vn);
76
}
77
- vfp_load_reg32(vd, a->vd);
78
+ vfp_load_reg16(vd, a->vd);
79
if (neg_d) {
80
/* VFNMA, VFNMS */
81
gen_helper_vfp_negh(vd, vd);
82
@@ -XXX,XX +XXX,XX @@ static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
83
vd = tcg_temp_new_i32();
84
vm = tcg_temp_new_i32();
85
86
- vfp_load_reg32(vd, a->vd);
87
+ vfp_load_reg16(vd, a->vd);
88
if (a->z) {
89
tcg_gen_movi_i32(vm, 0);
90
} else {
91
- vfp_load_reg32(vm, a->vm);
92
+ vfp_load_reg16(vm, a->vm);
93
}
94
95
if (a->e) {
96
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
97
}
98
99
tmp = tcg_temp_new_i32();
100
- vfp_load_reg32(tmp, a->vm);
101
+ vfp_load_reg16(tmp, a->vm);
102
fpst = fpstatus_ptr(FPST_FPCR_F16);
103
gen_helper_rinth(tmp, tmp, fpst);
104
vfp_store_reg32(tmp, a->vd);
105
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
106
}
107
108
tmp = tcg_temp_new_i32();
109
- vfp_load_reg32(tmp, a->vm);
110
+ vfp_load_reg16(tmp, a->vm);
111
fpst = fpstatus_ptr(FPST_FPCR_F16);
112
tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
113
gen_helper_rinth(tmp, tmp, fpst);
114
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
115
}
116
117
tmp = tcg_temp_new_i32();
118
- vfp_load_reg32(tmp, a->vm);
119
+ vfp_load_reg16(tmp, a->vm);
120
fpst = fpstatus_ptr(FPST_FPCR_F16);
121
gen_helper_rinth_exact(tmp, tmp, fpst);
122
vfp_store_reg32(tmp, a->vd);
123
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
124
125
fpst = fpstatus_ptr(FPST_FPCR_F16);
126
vm = tcg_temp_new_i32();
127
- vfp_load_reg32(vm, a->vm);
128
+ vfp_load_reg16(vm, a->vm);
129
130
if (a->s) {
131
if (a->rz) {
132
@@ -XXX,XX +XXX,XX @@ static bool trans_VINS(DisasContext *s, arg_VINS *a)
133
/* Insert low half of Vm into high half of Vd */
134
rm = tcg_temp_new_i32();
135
rd = tcg_temp_new_i32();
136
- vfp_load_reg32(rm, a->vm);
137
- vfp_load_reg32(rd, a->vd);
138
+ vfp_load_reg16(rm, a->vm);
139
+ vfp_load_reg16(rd, a->vd);
140
tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
141
vfp_store_reg32(rd, a->vd);
142
return true;
143
--
144
2.34.1
diff view generated by jsdifflib
1
From: Joe Komlodi <joe.komlodi@xilinx.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
If the CPU is running in default NaN mode (FPCR.DN == 1) and we execute
4
FRSQRTE, FRECPE, or FRECPX with a signaling NaN, parts_silence_nan_frac() will
5
assert due to fpst->default_nan_mode being set.
6
7
To avoid this, we check to see what NaN mode we're running in before we call
8
floatxx_silence_nan().
9
10
Signed-off-by: Joe Komlodi <joe.komlodi@xilinx.com>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 1624662174-175828-2-git-send-email-joe.komlodi@xilinx.com
13
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20240524232121.284515-23-richard.henderson@linaro.org
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
---
7
---
16
target/arm/helper-a64.c | 12 +++++++++---
8
target/arm/helper.h | 6 ----
17
target/arm/vfp_helper.c | 24 ++++++++++++++++++------
9
target/arm/tcg/translate.h | 30 +++++++++++++++++++
18
2 files changed, 27 insertions(+), 9 deletions(-)
10
target/arm/tcg/translate-a64.c | 44 +++++++++++++--------------
11
target/arm/tcg/translate-vfp.c | 54 +++++++++++++++++-----------------
12
target/arm/vfp_helper.c | 30 -------------------
13
5 files changed, 79 insertions(+), 85 deletions(-)
19
14
20
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
15
diff --git a/target/arm/helper.h b/target/arm/helper.h
21
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
22
--- a/target/arm/helper-a64.c
17
--- a/target/arm/helper.h
23
+++ b/target/arm/helper-a64.c
18
+++ b/target/arm/helper.h
24
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp)
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr)
25
float16 nan = a;
20
DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr)
26
if (float16_is_signaling_nan(a, fpst)) {
21
DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr)
27
float_raise(float_flag_invalid, fpst);
22
DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr)
28
- nan = float16_silence_nan(a, fpst);
23
-DEF_HELPER_1(vfp_negh, f16, f16)
29
+ if (!fpst->default_nan_mode) {
24
-DEF_HELPER_1(vfp_negs, f32, f32)
30
+ nan = float16_silence_nan(a, fpst);
25
-DEF_HELPER_1(vfp_negd, f64, f64)
31
+ }
26
-DEF_HELPER_1(vfp_absh, f16, f16)
27
-DEF_HELPER_1(vfp_abss, f32, f32)
28
-DEF_HELPER_1(vfp_absd, f64, f64)
29
DEF_HELPER_2(vfp_sqrth, f16, f16, env)
30
DEF_HELPER_2(vfp_sqrts, f32, f32, env)
31
DEF_HELPER_2(vfp_sqrtd, f64, f64, env)
32
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/tcg/translate.h
35
+++ b/target/arm/tcg/translate.h
36
@@ -XXX,XX +XXX,XX @@ static inline void gen_swstep_exception(DisasContext *s, int isv, int ex)
37
*/
38
uint64_t vfp_expand_imm(int size, uint8_t imm8);
39
40
+static inline void gen_vfp_absh(TCGv_i32 d, TCGv_i32 s)
41
+{
42
+ tcg_gen_andi_i32(d, s, INT16_MAX);
43
+}
44
+
45
+static inline void gen_vfp_abss(TCGv_i32 d, TCGv_i32 s)
46
+{
47
+ tcg_gen_andi_i32(d, s, INT32_MAX);
48
+}
49
+
50
+static inline void gen_vfp_absd(TCGv_i64 d, TCGv_i64 s)
51
+{
52
+ tcg_gen_andi_i64(d, s, INT64_MAX);
53
+}
54
+
55
+static inline void gen_vfp_negh(TCGv_i32 d, TCGv_i32 s)
56
+{
57
+ tcg_gen_xori_i32(d, s, 1u << 15);
58
+}
59
+
60
+static inline void gen_vfp_negs(TCGv_i32 d, TCGv_i32 s)
61
+{
62
+ tcg_gen_xori_i32(d, s, 1u << 31);
63
+}
64
+
65
+static inline void gen_vfp_negd(TCGv_i64 d, TCGv_i64 s)
66
+{
67
+ tcg_gen_xori_i64(d, s, 1ull << 63);
68
+}
69
+
70
/* Vector operations shared between ARM and AArch64. */
71
void gen_gvec_ceq0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
72
uint32_t opr_sz, uint32_t max_sz);
73
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/target/arm/tcg/translate-a64.c
76
+++ b/target/arm/tcg/translate-a64.c
77
@@ -XXX,XX +XXX,XX @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
78
tcg_gen_mov_i32(tcg_res, tcg_op);
79
break;
80
case 0x1: /* FABS */
81
- tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
82
+ gen_vfp_absh(tcg_res, tcg_op);
83
break;
84
case 0x2: /* FNEG */
85
- tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
86
+ gen_vfp_negh(tcg_res, tcg_op);
87
break;
88
case 0x3: /* FSQRT */
89
fpst = fpstatus_ptr(FPST_FPCR_F16);
90
@@ -XXX,XX +XXX,XX @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
91
tcg_gen_mov_i32(tcg_res, tcg_op);
92
goto done;
93
case 0x1: /* FABS */
94
- gen_helper_vfp_abss(tcg_res, tcg_op);
95
+ gen_vfp_abss(tcg_res, tcg_op);
96
goto done;
97
case 0x2: /* FNEG */
98
- gen_helper_vfp_negs(tcg_res, tcg_op);
99
+ gen_vfp_negs(tcg_res, tcg_op);
100
goto done;
101
case 0x3: /* FSQRT */
102
gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
103
@@ -XXX,XX +XXX,XX @@ static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
104
105
switch (opcode) {
106
case 0x1: /* FABS */
107
- gen_helper_vfp_absd(tcg_res, tcg_op);
108
+ gen_vfp_absd(tcg_res, tcg_op);
109
goto done;
110
case 0x2: /* FNEG */
111
- gen_helper_vfp_negd(tcg_res, tcg_op);
112
+ gen_vfp_negd(tcg_res, tcg_op);
113
goto done;
114
case 0x3: /* FSQRT */
115
gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env);
116
@@ -XXX,XX +XXX,XX @@ static void handle_fp_2src_single(DisasContext *s, int opcode,
117
switch (opcode) {
118
case 0x8: /* FNMUL */
119
gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
120
- gen_helper_vfp_negs(tcg_res, tcg_res);
121
+ gen_vfp_negs(tcg_res, tcg_res);
122
break;
123
default:
124
case 0x0: /* FMUL */
125
@@ -XXX,XX +XXX,XX @@ static void handle_fp_2src_double(DisasContext *s, int opcode,
126
switch (opcode) {
127
case 0x8: /* FNMUL */
128
gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
129
- gen_helper_vfp_negd(tcg_res, tcg_res);
130
+ gen_vfp_negd(tcg_res, tcg_res);
131
break;
132
default:
133
case 0x0: /* FMUL */
134
@@ -XXX,XX +XXX,XX @@ static void handle_fp_2src_half(DisasContext *s, int opcode,
135
switch (opcode) {
136
case 0x8: /* FNMUL */
137
gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
138
- tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
139
+ gen_vfp_negh(tcg_res, tcg_res);
140
break;
141
default:
142
case 0x0: /* FMUL */
143
@@ -XXX,XX +XXX,XX @@ static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
144
* flipped if it is a negated-input.
145
*/
146
if (o1 == true) {
147
- gen_helper_vfp_negs(tcg_op3, tcg_op3);
148
+ gen_vfp_negs(tcg_op3, tcg_op3);
149
}
150
151
if (o0 != o1) {
152
- gen_helper_vfp_negs(tcg_op1, tcg_op1);
153
+ gen_vfp_negs(tcg_op1, tcg_op1);
154
}
155
156
gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
157
@@ -XXX,XX +XXX,XX @@ static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
158
* flipped if it is a negated-input.
159
*/
160
if (o1 == true) {
161
- gen_helper_vfp_negd(tcg_op3, tcg_op3);
162
+ gen_vfp_negd(tcg_op3, tcg_op3);
163
}
164
165
if (o0 != o1) {
166
- gen_helper_vfp_negd(tcg_op1, tcg_op1);
167
+ gen_vfp_negd(tcg_op1, tcg_op1);
168
}
169
170
gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
171
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
172
switch (fpopcode) {
173
case 0x39: /* FMLS */
174
/* As usual for ARM, separate negation for fused multiply-add */
175
- gen_helper_vfp_negd(tcg_op1, tcg_op1);
176
+ gen_vfp_negd(tcg_op1, tcg_op1);
177
/* fall through */
178
case 0x19: /* FMLA */
179
read_vec_element(s, tcg_res, rd, pass, MO_64);
180
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
181
break;
182
case 0x7a: /* FABD */
183
gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
184
- gen_helper_vfp_absd(tcg_res, tcg_res);
185
+ gen_vfp_absd(tcg_res, tcg_res);
186
break;
187
case 0x7c: /* FCMGT */
188
gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
189
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
190
switch (fpopcode) {
191
case 0x39: /* FMLS */
192
/* As usual for ARM, separate negation for fused multiply-add */
193
- gen_helper_vfp_negs(tcg_op1, tcg_op1);
194
+ gen_vfp_negs(tcg_op1, tcg_op1);
195
/* fall through */
196
case 0x19: /* FMLA */
197
read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
198
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
199
break;
200
case 0x7a: /* FABD */
201
gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
202
- gen_helper_vfp_abss(tcg_res, tcg_res);
203
+ gen_vfp_abss(tcg_res, tcg_res);
204
break;
205
case 0x7c: /* FCMGT */
206
gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
207
@@ -XXX,XX +XXX,XX @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u,
32
}
208
}
33
if (fpst->default_nan_mode) {
209
break;
34
nan = float16_default_nan(fpst);
210
case 0x2f: /* FABS */
35
@@ -XXX,XX +XXX,XX @@ float32 HELPER(frecpx_f32)(float32 a, void *fpstp)
211
- gen_helper_vfp_absd(tcg_rd, tcg_rn);
36
float32 nan = a;
212
+ gen_vfp_absd(tcg_rd, tcg_rn);
37
if (float32_is_signaling_nan(a, fpst)) {
213
break;
38
float_raise(float_flag_invalid, fpst);
214
case 0x6f: /* FNEG */
39
- nan = float32_silence_nan(a, fpst);
215
- gen_helper_vfp_negd(tcg_rd, tcg_rn);
40
+ if (!fpst->default_nan_mode) {
216
+ gen_vfp_negd(tcg_rd, tcg_rn);
41
+ nan = float32_silence_nan(a, fpst);
217
break;
42
+ }
218
case 0x7f: /* FSQRT */
43
}
219
gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, tcg_env);
44
if (fpst->default_nan_mode) {
220
@@ -XXX,XX +XXX,XX @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
45
nan = float32_default_nan(fpst);
221
}
46
@@ -XXX,XX +XXX,XX @@ float64 HELPER(frecpx_f64)(float64 a, void *fpstp)
222
break;
47
float64 nan = a;
223
case 0x2f: /* FABS */
48
if (float64_is_signaling_nan(a, fpst)) {
224
- gen_helper_vfp_abss(tcg_res, tcg_op);
49
float_raise(float_flag_invalid, fpst);
225
+ gen_vfp_abss(tcg_res, tcg_op);
50
- nan = float64_silence_nan(a, fpst);
226
break;
51
+ if (!fpst->default_nan_mode) {
227
case 0x6f: /* FNEG */
52
+ nan = float64_silence_nan(a, fpst);
228
- gen_helper_vfp_negs(tcg_res, tcg_op);
53
+ }
229
+ gen_vfp_negs(tcg_res, tcg_op);
54
}
230
break;
55
if (fpst->default_nan_mode) {
231
case 0x7f: /* FSQRT */
56
nan = float64_default_nan(fpst);
232
gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
233
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
234
switch (16 * u + opcode) {
235
case 0x05: /* FMLS */
236
/* As usual for ARM, separate negation for fused multiply-add */
237
- gen_helper_vfp_negd(tcg_op, tcg_op);
238
+ gen_vfp_negd(tcg_op, tcg_op);
239
/* fall through */
240
case 0x01: /* FMLA */
241
read_vec_element(s, tcg_res, rd, pass, MO_64);
242
diff --git a/target/arm/tcg/translate-vfp.c b/target/arm/tcg/translate-vfp.c
243
index XXXXXXX..XXXXXXX 100644
244
--- a/target/arm/tcg/translate-vfp.c
245
+++ b/target/arm/tcg/translate-vfp.c
246
@@ -XXX,XX +XXX,XX @@ static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
247
TCGv_i32 tmp = tcg_temp_new_i32();
248
249
gen_helper_vfp_mulh(tmp, vn, vm, fpst);
250
- gen_helper_vfp_negh(tmp, tmp);
251
+ gen_vfp_negh(tmp, tmp);
252
gen_helper_vfp_addh(vd, vd, tmp, fpst);
253
}
254
255
@@ -XXX,XX +XXX,XX @@ static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
256
TCGv_i32 tmp = tcg_temp_new_i32();
257
258
gen_helper_vfp_muls(tmp, vn, vm, fpst);
259
- gen_helper_vfp_negs(tmp, tmp);
260
+ gen_vfp_negs(tmp, tmp);
261
gen_helper_vfp_adds(vd, vd, tmp, fpst);
262
}
263
264
@@ -XXX,XX +XXX,XX @@ static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
265
TCGv_i64 tmp = tcg_temp_new_i64();
266
267
gen_helper_vfp_muld(tmp, vn, vm, fpst);
268
- gen_helper_vfp_negd(tmp, tmp);
269
+ gen_vfp_negd(tmp, tmp);
270
gen_helper_vfp_addd(vd, vd, tmp, fpst);
271
}
272
273
@@ -XXX,XX +XXX,XX @@ static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
274
TCGv_i32 tmp = tcg_temp_new_i32();
275
276
gen_helper_vfp_mulh(tmp, vn, vm, fpst);
277
- gen_helper_vfp_negh(vd, vd);
278
+ gen_vfp_negh(vd, vd);
279
gen_helper_vfp_addh(vd, vd, tmp, fpst);
280
}
281
282
@@ -XXX,XX +XXX,XX @@ static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
283
TCGv_i32 tmp = tcg_temp_new_i32();
284
285
gen_helper_vfp_muls(tmp, vn, vm, fpst);
286
- gen_helper_vfp_negs(vd, vd);
287
+ gen_vfp_negs(vd, vd);
288
gen_helper_vfp_adds(vd, vd, tmp, fpst);
289
}
290
291
@@ -XXX,XX +XXX,XX @@ static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
292
TCGv_i64 tmp = tcg_temp_new_i64();
293
294
gen_helper_vfp_muld(tmp, vn, vm, fpst);
295
- gen_helper_vfp_negd(vd, vd);
296
+ gen_vfp_negd(vd, vd);
297
gen_helper_vfp_addd(vd, vd, tmp, fpst);
298
}
299
300
@@ -XXX,XX +XXX,XX @@ static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
301
TCGv_i32 tmp = tcg_temp_new_i32();
302
303
gen_helper_vfp_mulh(tmp, vn, vm, fpst);
304
- gen_helper_vfp_negh(tmp, tmp);
305
- gen_helper_vfp_negh(vd, vd);
306
+ gen_vfp_negh(tmp, tmp);
307
+ gen_vfp_negh(vd, vd);
308
gen_helper_vfp_addh(vd, vd, tmp, fpst);
309
}
310
311
@@ -XXX,XX +XXX,XX @@ static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
312
TCGv_i32 tmp = tcg_temp_new_i32();
313
314
gen_helper_vfp_muls(tmp, vn, vm, fpst);
315
- gen_helper_vfp_negs(tmp, tmp);
316
- gen_helper_vfp_negs(vd, vd);
317
+ gen_vfp_negs(tmp, tmp);
318
+ gen_vfp_negs(vd, vd);
319
gen_helper_vfp_adds(vd, vd, tmp, fpst);
320
}
321
322
@@ -XXX,XX +XXX,XX @@ static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
323
TCGv_i64 tmp = tcg_temp_new_i64();
324
325
gen_helper_vfp_muld(tmp, vn, vm, fpst);
326
- gen_helper_vfp_negd(tmp, tmp);
327
- gen_helper_vfp_negd(vd, vd);
328
+ gen_vfp_negd(tmp, tmp);
329
+ gen_vfp_negd(vd, vd);
330
gen_helper_vfp_addd(vd, vd, tmp, fpst);
331
}
332
333
@@ -XXX,XX +XXX,XX @@ static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
334
{
335
/* VNMUL: -(fn * fm) */
336
gen_helper_vfp_mulh(vd, vn, vm, fpst);
337
- gen_helper_vfp_negh(vd, vd);
338
+ gen_vfp_negh(vd, vd);
339
}
340
341
static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
342
@@ -XXX,XX +XXX,XX @@ static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
343
{
344
/* VNMUL: -(fn * fm) */
345
gen_helper_vfp_muls(vd, vn, vm, fpst);
346
- gen_helper_vfp_negs(vd, vd);
347
+ gen_vfp_negs(vd, vd);
348
}
349
350
static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
351
@@ -XXX,XX +XXX,XX @@ static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
352
{
353
/* VNMUL: -(fn * fm) */
354
gen_helper_vfp_muld(vd, vn, vm, fpst);
355
- gen_helper_vfp_negd(vd, vd);
356
+ gen_vfp_negd(vd, vd);
357
}
358
359
static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
360
@@ -XXX,XX +XXX,XX @@ static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
361
vfp_load_reg16(vm, a->vm);
362
if (neg_n) {
363
/* VFNMS, VFMS */
364
- gen_helper_vfp_negh(vn, vn);
365
+ gen_vfp_negh(vn, vn);
366
}
367
vfp_load_reg16(vd, a->vd);
368
if (neg_d) {
369
/* VFNMA, VFNMS */
370
- gen_helper_vfp_negh(vd, vd);
371
+ gen_vfp_negh(vd, vd);
372
}
373
fpst = fpstatus_ptr(FPST_FPCR_F16);
374
gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
375
@@ -XXX,XX +XXX,XX @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
376
vfp_load_reg32(vm, a->vm);
377
if (neg_n) {
378
/* VFNMS, VFMS */
379
- gen_helper_vfp_negs(vn, vn);
380
+ gen_vfp_negs(vn, vn);
381
}
382
vfp_load_reg32(vd, a->vd);
383
if (neg_d) {
384
/* VFNMA, VFNMS */
385
- gen_helper_vfp_negs(vd, vd);
386
+ gen_vfp_negs(vd, vd);
387
}
388
fpst = fpstatus_ptr(FPST_FPCR);
389
gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
390
@@ -XXX,XX +XXX,XX @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
391
vfp_load_reg64(vm, a->vm);
392
if (neg_n) {
393
/* VFNMS, VFMS */
394
- gen_helper_vfp_negd(vn, vn);
395
+ gen_vfp_negd(vn, vn);
396
}
397
vfp_load_reg64(vd, a->vd);
398
if (neg_d) {
399
/* VFNMA, VFNMS */
400
- gen_helper_vfp_negd(vd, vd);
401
+ gen_vfp_negd(vd, vd);
402
}
403
fpst = fpstatus_ptr(FPST_FPCR);
404
gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
405
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
406
DO_VFP_VMOV(VMOV_reg, sp, tcg_gen_mov_i32)
407
DO_VFP_VMOV(VMOV_reg, dp, tcg_gen_mov_i64)
408
409
-DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh, aa32_fp16_arith)
410
-DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss, aa32_fpsp_v2)
411
-DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd, aa32_fpdp_v2)
412
+DO_VFP_2OP(VABS, hp, gen_vfp_absh, aa32_fp16_arith)
413
+DO_VFP_2OP(VABS, sp, gen_vfp_abss, aa32_fpsp_v2)
414
+DO_VFP_2OP(VABS, dp, gen_vfp_absd, aa32_fpdp_v2)
415
416
-DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh, aa32_fp16_arith)
417
-DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs, aa32_fpsp_v2)
418
-DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd, aa32_fpdp_v2)
419
+DO_VFP_2OP(VNEG, hp, gen_vfp_negh, aa32_fp16_arith)
420
+DO_VFP_2OP(VNEG, sp, gen_vfp_negs, aa32_fpsp_v2)
421
+DO_VFP_2OP(VNEG, dp, gen_vfp_negd, aa32_fpdp_v2)
422
423
static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
424
{
57
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
425
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
58
index XXXXXXX..XXXXXXX 100644
426
index XXXXXXX..XXXXXXX 100644
59
--- a/target/arm/vfp_helper.c
427
--- a/target/arm/vfp_helper.c
60
+++ b/target/arm/vfp_helper.c
428
+++ b/target/arm/vfp_helper.c
61
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp)
429
@@ -XXX,XX +XXX,XX @@ VFP_BINOP(minnum)
62
float16 nan = f16;
430
VFP_BINOP(maxnum)
63
if (float16_is_signaling_nan(f16, fpst)) {
431
#undef VFP_BINOP
64
float_raise(float_flag_invalid, fpst);
432
65
- nan = float16_silence_nan(f16, fpst);
433
-dh_ctype_f16 VFP_HELPER(neg, h)(dh_ctype_f16 a)
66
+ if (!fpst->default_nan_mode) {
434
-{
67
+ nan = float16_silence_nan(f16, fpst);
435
- return float16_chs(a);
68
+ }
436
-}
69
}
437
-
70
if (fpst->default_nan_mode) {
438
-float32 VFP_HELPER(neg, s)(float32 a)
71
nan = float16_default_nan(fpst);
439
-{
72
@@ -XXX,XX +XXX,XX @@ float32 HELPER(recpe_f32)(float32 input, void *fpstp)
440
- return float32_chs(a);
73
float32 nan = f32;
441
-}
74
if (float32_is_signaling_nan(f32, fpst)) {
442
-
75
float_raise(float_flag_invalid, fpst);
443
-float64 VFP_HELPER(neg, d)(float64 a)
76
- nan = float32_silence_nan(f32, fpst);
444
-{
77
+ if (!fpst->default_nan_mode) {
445
- return float64_chs(a);
78
+ nan = float32_silence_nan(f32, fpst);
446
-}
79
+ }
447
-
80
}
448
-dh_ctype_f16 VFP_HELPER(abs, h)(dh_ctype_f16 a)
81
if (fpst->default_nan_mode) {
449
-{
82
nan = float32_default_nan(fpst);
450
- return float16_abs(a);
83
@@ -XXX,XX +XXX,XX @@ float64 HELPER(recpe_f64)(float64 input, void *fpstp)
451
-}
84
float64 nan = f64;
452
-
85
if (float64_is_signaling_nan(f64, fpst)) {
453
-float32 VFP_HELPER(abs, s)(float32 a)
86
float_raise(float_flag_invalid, fpst);
454
-{
87
- nan = float64_silence_nan(f64, fpst);
455
- return float32_abs(a);
88
+ if (!fpst->default_nan_mode) {
456
-}
89
+ nan = float64_silence_nan(f64, fpst);
457
-
90
+ }
458
-float64 VFP_HELPER(abs, d)(float64 a)
91
}
459
-{
92
if (fpst->default_nan_mode) {
460
- return float64_abs(a);
93
nan = float64_default_nan(fpst);
461
-}
94
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp)
462
-
95
float16 nan = f16;
463
dh_ctype_f16 VFP_HELPER(sqrt, h)(dh_ctype_f16 a, CPUARMState *env)
96
if (float16_is_signaling_nan(f16, s)) {
464
{
97
float_raise(float_flag_invalid, s);
465
return float16_sqrt(a, &env->vfp.fp_status_f16);
98
- nan = float16_silence_nan(f16, s);
99
+ if (!s->default_nan_mode) {
100
+ nan = float16_silence_nan(f16, fpstp);
101
+ }
102
}
103
if (s->default_nan_mode) {
104
nan = float16_default_nan(s);
105
@@ -XXX,XX +XXX,XX @@ float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
106
float32 nan = f32;
107
if (float32_is_signaling_nan(f32, s)) {
108
float_raise(float_flag_invalid, s);
109
- nan = float32_silence_nan(f32, s);
110
+ if (!s->default_nan_mode) {
111
+ nan = float32_silence_nan(f32, fpstp);
112
+ }
113
}
114
if (s->default_nan_mode) {
115
nan = float32_default_nan(s);
116
@@ -XXX,XX +XXX,XX @@ float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
117
float64 nan = f64;
118
if (float64_is_signaling_nan(f64, s)) {
119
float_raise(float_flag_invalid, s);
120
- nan = float64_silence_nan(f64, s);
121
+ if (!s->default_nan_mode) {
122
+ nan = float64_silence_nan(f64, fpstp);
123
+ }
124
}
125
if (s->default_nan_mode) {
126
nan = float64_default_nan(s);
127
--
466
--
128
2.20.1
467
2.34.1
129
130
diff view generated by jsdifflib
1
Implement the MVE VHLL (vector shift left long) insn. This has two
1
From: Richard Henderson <richard.henderson@linaro.org>
2
encodings: the T1 encoding is the usual shift-by-immediate format,
3
and the T2 encoding is a special case where the shift count is always
4
equal to the element size.
5
2
3
This is the last instruction within disas_fp_2src,
4
so remove that and its subroutines.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20240524232121.284515-24-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210628135835.6690-10-peter.maydell@linaro.org
9
---
10
---
10
target/arm/helper-mve.h | 9 +++++++
11
target/arm/tcg/a64.decode | 1 +
11
target/arm/mve.decode | 53 +++++++++++++++++++++++++++++++++++---
12
target/arm/tcg/translate-a64.c | 177 +++++----------------------------
12
target/arm/mve_helper.c | 32 +++++++++++++++++++++++
13
2 files changed, 27 insertions(+), 151 deletions(-)
13
target/arm/translate-mve.c | 15 +++++++++++
14
4 files changed, 105 insertions(+), 4 deletions(-)
15
14
16
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
15
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
17
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/helper-mve.h
17
--- a/target/arm/tcg/a64.decode
19
+++ b/target/arm/helper-mve.h
18
+++ b/target/arm/tcg/a64.decode
20
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vrshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
19
@@ -XXX,XX +XXX,XX @@ FADD_s 0001 1110 ..1 ..... 0010 10 ..... ..... @rrr_hsd
21
DEF_HELPER_FLAGS_4(mve_vrshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
20
FSUB_s 0001 1110 ..1 ..... 0011 10 ..... ..... @rrr_hsd
22
DEF_HELPER_FLAGS_4(mve_vrshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
21
FDIV_s 0001 1110 ..1 ..... 0001 10 ..... ..... @rrr_hsd
23
DEF_HELPER_FLAGS_4(mve_vrshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
22
FMUL_s 0001 1110 ..1 ..... 0000 10 ..... ..... @rrr_hsd
24
+
23
+FNMUL_s 0001 1110 ..1 ..... 1000 10 ..... ..... @rrr_hsd
25
+DEF_HELPER_FLAGS_4(mve_vshllbsb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
24
26
+DEF_HELPER_FLAGS_4(mve_vshllbsh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
25
FMAX_s 0001 1110 ..1 ..... 0100 10 ..... ..... @rrr_hsd
27
+DEF_HELPER_FLAGS_4(mve_vshllbub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
26
FMIN_s 0001 1110 ..1 ..... 0101 10 ..... ..... @rrr_hsd
28
+DEF_HELPER_FLAGS_4(mve_vshllbuh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
27
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
29
+DEF_HELPER_FLAGS_4(mve_vshlltsb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
30
+DEF_HELPER_FLAGS_4(mve_vshlltsh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
31
+DEF_HELPER_FLAGS_4(mve_vshlltub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
32
+DEF_HELPER_FLAGS_4(mve_vshlltuh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
33
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
34
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
35
--- a/target/arm/mve.decode
29
--- a/target/arm/tcg/translate-a64.c
36
+++ b/target/arm/mve.decode
30
+++ b/target/arm/tcg/translate-a64.c
37
@@ -XXX,XX +XXX,XX @@
31
@@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fmulx = {
38
@2_shl_h .... .... .. 01 shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1
32
};
39
@2_shl_w .... .... .. 1 shift:5 .... .... .... .... &2shift qd=%qd qm=%qm size=2
33
TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx)
40
34
41
+@2_shll_b .... .... ... 01 shift:3 .... .... .... .... &2shift qd=%qd qm=%qm size=0
35
+static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
42
+@2_shll_h .... .... ... 1 shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1
43
+# VSHLL encoding T2 where shift == esize
44
+@2_shll_esize_b .... .... .... 00 .. .... .... .... .... &2shift \
45
+ qd=%qd qm=%qm size=0 shift=8
46
+@2_shll_esize_h .... .... .... 01 .. .... .... .... .... &2shift \
47
+ qd=%qd qm=%qm size=1 shift=16
48
+
49
# Right shifts are encoded as N - shift, where N is the element size in bits.
50
%rshift_i5 16:5 !function=rsub_32
51
%rshift_i4 16:4 !function=rsub_16
52
@@ -XXX,XX +XXX,XX @@ VADD 1110 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op
53
VSUB 1111 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op
54
VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op
55
56
-VMULH_S 111 0 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
57
-VMULH_U 111 1 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
58
+# The VSHLL T2 encoding is not a @2op pattern, but is here because it
59
+# overlaps what would be size=0b11 VMULH/VRMULH
60
+{
36
+{
61
+ VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b
37
+ gen_helper_vfp_mulh(d, n, m, s);
62
+ VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h
38
+ gen_vfp_negh(d, d);
63
64
-VRMULH_S 111 0 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
65
-VRMULH_U 111 1 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
66
+ VMULH_S 111 0 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
67
+}
39
+}
68
+
40
+
41
+static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
69
+{
42
+{
70
+ VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b
43
+ gen_helper_vfp_muls(d, n, m, s);
71
+ VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h
44
+ gen_vfp_negs(d, d);
72
+
73
+ VMULH_U 111 1 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
74
+}
45
+}
75
+
46
+
47
+static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
76
+{
48
+{
77
+ VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b
49
+ gen_helper_vfp_muld(d, n, m, s);
78
+ VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h
50
+ gen_vfp_negd(d, d);
79
+
80
+ VRMULH_S 111 0 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
81
+}
51
+}
82
+
52
+
83
+{
53
+static const FPScalar f_scalar_fnmul = {
84
+ VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b
54
+ gen_fnmul_h,
85
+ VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h
55
+ gen_fnmul_s,
86
+
56
+ gen_fnmul_d,
87
+ VRMULH_U 111 1 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
57
+};
88
+}
58
+TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul)
89
59
+
90
VMAX_S 111 0 1111 0 . .. ... 0 ... 0 0110 . 1 . 0 ... 0 @2op
60
static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a,
91
VMAX_U 111 1 1111 0 . .. ... 0 ... 0 0110 . 1 . 0 ... 0 @2op
61
gen_helper_gvec_3_ptr * const fns[3])
92
@@ -XXX,XX +XXX,XX @@ VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w
62
{
93
VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_b
63
@@ -XXX,XX +XXX,XX @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
94
VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_h
64
}
95
VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w
65
}
96
+
66
97
+# VSHLL T1 encoding; the T2 VSHLL encoding is elsewhere in this file
67
-/* Floating-point data-processing (2 source) - single precision */
98
+VSHLL_BS 111 0 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_b
68
-static void handle_fp_2src_single(DisasContext *s, int opcode,
99
+VSHLL_BS 111 0 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_h
69
- int rd, int rn, int rm)
100
+
70
-{
101
+VSHLL_BU 111 1 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_b
71
- TCGv_i32 tcg_op1;
102
+VSHLL_BU 111 1 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_h
72
- TCGv_i32 tcg_op2;
103
+
73
- TCGv_i32 tcg_res;
104
+VSHLL_TS 111 0 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_b
74
- TCGv_ptr fpst;
105
+VSHLL_TS 111 0 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h
75
-
106
+
76
- tcg_res = tcg_temp_new_i32();
107
+VSHLL_TU 111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_b
77
- fpst = fpstatus_ptr(FPST_FPCR);
108
+VSHLL_TU 111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h
78
- tcg_op1 = read_fp_sreg(s, rn);
109
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
79
- tcg_op2 = read_fp_sreg(s, rm);
110
index XXXXXXX..XXXXXXX 100644
80
-
111
--- a/target/arm/mve_helper.c
81
- switch (opcode) {
112
+++ b/target/arm/mve_helper.c
82
- case 0x8: /* FNMUL */
113
@@ -XXX,XX +XXX,XX @@ DO_2SHIFT_SAT_S(vqshli_s, DO_SQSHL_OP)
83
- gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
114
DO_2SHIFT_SAT_S(vqshlui_s, DO_SUQSHL_OP)
84
- gen_vfp_negs(tcg_res, tcg_res);
115
DO_2SHIFT_U(vrshli_u, DO_VRSHLU)
85
- break;
116
DO_2SHIFT_S(vrshli_s, DO_VRSHLS)
86
- default:
117
+
87
- case 0x0: /* FMUL */
118
+/*
88
- case 0x1: /* FDIV */
119
+ * Long shifts taking half-sized inputs from top or bottom of the input
89
- case 0x2: /* FADD */
120
+ * vector and producing a double-width result. ESIZE, TYPE are for
90
- case 0x3: /* FSUB */
121
+ * the input, and LESIZE, LTYPE for the output.
91
- case 0x4: /* FMAX */
122
+ * Unlike the normal shift helpers, we do not handle negative shift counts,
92
- case 0x5: /* FMIN */
123
+ * because the long shift is strictly left-only.
93
- case 0x6: /* FMAXNM */
124
+ */
94
- case 0x7: /* FMINNM */
125
+#define DO_VSHLL(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE) \
95
- g_assert_not_reached();
126
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
96
- }
127
+ void *vm, uint32_t shift) \
97
-
128
+ { \
98
- write_fp_sreg(s, rd, tcg_res);
129
+ LTYPE *d = vd; \
99
-}
130
+ TYPE *m = vm; \
100
-
131
+ uint16_t mask = mve_element_mask(env); \
101
-/* Floating-point data-processing (2 source) - double precision */
132
+ unsigned le; \
102
-static void handle_fp_2src_double(DisasContext *s, int opcode,
133
+ assert(shift <= 16); \
103
- int rd, int rn, int rm)
134
+ for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
104
-{
135
+ LTYPE r = (LTYPE)m[H##ESIZE(le * 2 + TOP)] << shift; \
105
- TCGv_i64 tcg_op1;
136
+ mergemask(&d[H##LESIZE(le)], r, mask); \
106
- TCGv_i64 tcg_op2;
137
+ } \
107
- TCGv_i64 tcg_res;
138
+ mve_advance_vpt(env); \
108
- TCGv_ptr fpst;
139
+ }
109
-
140
+
110
- tcg_res = tcg_temp_new_i64();
141
+#define DO_VSHLL_ALL(OP, TOP) \
111
- fpst = fpstatus_ptr(FPST_FPCR);
142
+ DO_VSHLL(OP##sb, TOP, 1, int8_t, 2, int16_t) \
112
- tcg_op1 = read_fp_dreg(s, rn);
143
+ DO_VSHLL(OP##ub, TOP, 1, uint8_t, 2, uint16_t) \
113
- tcg_op2 = read_fp_dreg(s, rm);
144
+ DO_VSHLL(OP##sh, TOP, 2, int16_t, 4, int32_t) \
114
-
145
+ DO_VSHLL(OP##uh, TOP, 2, uint16_t, 4, uint32_t) \
115
- switch (opcode) {
146
+
116
- case 0x8: /* FNMUL */
147
+DO_VSHLL_ALL(vshllb, false)
117
- gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
148
+DO_VSHLL_ALL(vshllt, true)
118
- gen_vfp_negd(tcg_res, tcg_res);
149
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
119
- break;
150
index XXXXXXX..XXXXXXX 100644
120
- default:
151
--- a/target/arm/translate-mve.c
121
- case 0x0: /* FMUL */
152
+++ b/target/arm/translate-mve.c
122
- case 0x1: /* FDIV */
153
@@ -XXX,XX +XXX,XX @@ DO_2SHIFT(VSHRI_S, vshli_s, true)
123
- case 0x2: /* FADD */
154
DO_2SHIFT(VSHRI_U, vshli_u, true)
124
- case 0x3: /* FSUB */
155
DO_2SHIFT(VRSHRI_S, vrshli_s, true)
125
- case 0x4: /* FMAX */
156
DO_2SHIFT(VRSHRI_U, vrshli_u, true)
126
- case 0x5: /* FMIN */
157
+
127
- case 0x6: /* FMAXNM */
158
+#define DO_VSHLL(INSN, FN) \
128
- case 0x7: /* FMINNM */
159
+ static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
129
- g_assert_not_reached();
160
+ { \
130
- }
161
+ static MVEGenTwoOpShiftFn * const fns[] = { \
131
-
162
+ gen_helper_mve_##FN##b, \
132
- write_fp_dreg(s, rd, tcg_res);
163
+ gen_helper_mve_##FN##h, \
133
-}
164
+ }; \
134
-
165
+ return do_2shift(s, a, fns[a->size], false); \
135
-/* Floating-point data-processing (2 source) - half precision */
166
+ }
136
-static void handle_fp_2src_half(DisasContext *s, int opcode,
167
+
137
- int rd, int rn, int rm)
168
+DO_VSHLL(VSHLL_BS, vshllbs)
138
-{
169
+DO_VSHLL(VSHLL_BU, vshllbu)
139
- TCGv_i32 tcg_op1;
170
+DO_VSHLL(VSHLL_TS, vshllts)
140
- TCGv_i32 tcg_op2;
171
+DO_VSHLL(VSHLL_TU, vshlltu)
141
- TCGv_i32 tcg_res;
142
- TCGv_ptr fpst;
143
-
144
- tcg_res = tcg_temp_new_i32();
145
- fpst = fpstatus_ptr(FPST_FPCR_F16);
146
- tcg_op1 = read_fp_hreg(s, rn);
147
- tcg_op2 = read_fp_hreg(s, rm);
148
-
149
- switch (opcode) {
150
- case 0x8: /* FNMUL */
151
- gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
152
- gen_vfp_negh(tcg_res, tcg_res);
153
- break;
154
- default:
155
- case 0x0: /* FMUL */
156
- case 0x1: /* FDIV */
157
- case 0x2: /* FADD */
158
- case 0x3: /* FSUB */
159
- case 0x4: /* FMAX */
160
- case 0x5: /* FMIN */
161
- case 0x6: /* FMAXNM */
162
- case 0x7: /* FMINNM */
163
- g_assert_not_reached();
164
- }
165
-
166
- write_fp_sreg(s, rd, tcg_res);
167
-}
168
-
169
-/* Floating point data-processing (2 source)
170
- * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
171
- * +---+---+---+-----------+------+---+------+--------+-----+------+------+
172
- * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | opcode | 1 0 | Rn | Rd |
173
- * +---+---+---+-----------+------+---+------+--------+-----+------+------+
174
- */
175
-static void disas_fp_2src(DisasContext *s, uint32_t insn)
176
-{
177
- int mos = extract32(insn, 29, 3);
178
- int type = extract32(insn, 22, 2);
179
- int rd = extract32(insn, 0, 5);
180
- int rn = extract32(insn, 5, 5);
181
- int rm = extract32(insn, 16, 5);
182
- int opcode = extract32(insn, 12, 4);
183
-
184
- if (opcode > 8 || mos) {
185
- unallocated_encoding(s);
186
- return;
187
- }
188
-
189
- switch (type) {
190
- case 0:
191
- if (!fp_access_check(s)) {
192
- return;
193
- }
194
- handle_fp_2src_single(s, opcode, rd, rn, rm);
195
- break;
196
- case 1:
197
- if (!fp_access_check(s)) {
198
- return;
199
- }
200
- handle_fp_2src_double(s, opcode, rd, rn, rm);
201
- break;
202
- case 3:
203
- if (!dc_isar_feature(aa64_fp16, s)) {
204
- unallocated_encoding(s);
205
- return;
206
- }
207
- if (!fp_access_check(s)) {
208
- return;
209
- }
210
- handle_fp_2src_half(s, opcode, rd, rn, rm);
211
- break;
212
- default:
213
- unallocated_encoding(s);
214
- }
215
-}
216
-
217
/* Floating-point data-processing (3 source) - single precision */
218
static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
219
int rd, int rn, int rm, int ra)
220
@@ -XXX,XX +XXX,XX @@ static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
221
break;
222
case 2:
223
/* Floating point data-processing (2 source) */
224
- disas_fp_2src(s, insn);
225
+ unallocated_encoding(s); /* in decodetree */
226
break;
227
case 3:
228
/* Floating point conditional select */
172
--
229
--
173
2.20.1
230
2.34.1
174
175
diff view generated by jsdifflib
1
Implement the MVE long shifts by register, which perform shifts on a
1
From: Richard Henderson <richard.henderson@linaro.org>
2
pair of general-purpose registers treated as a 64-bit quantity, with
3
the shift count in another general-purpose register, which might be
4
either positive or negative.
5
2
6
Like the long-shifts-by-immediate, these encodings sit in the space
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
that was previously the UNPREDICTABLE MOVS/ORRS with Rm==13,15.
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Because LSLL_rr and ASRL_rr overlap with both MOV_rxri/ORR_rrri and
5
Message-id: 20240524232121.284515-25-richard.henderson@linaro.org
9
also with CSEL (as one of the previously-UNPREDICTABLE Rm==13 cases),
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
we have to move the CSEL pattern into the same decodetree group.
7
---
8
target/arm/helper.h | 2 +
9
target/arm/tcg/a64.decode | 22 +++
10
target/arm/tcg/translate-a64.c | 241 +++++++++++++++++----------------
11
target/arm/tcg/vec_helper.c | 14 ++
12
4 files changed, 163 insertions(+), 116 deletions(-)
11
13
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
diff --git a/target/arm/helper.h b/target/arm/helper.h
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
Message-id: 20210628135835.6690-17-peter.maydell@linaro.org
15
---
16
target/arm/helper-mve.h | 6 +++
17
target/arm/translate.h | 1 +
18
target/arm/t32.decode | 16 +++++--
19
target/arm/mve_helper.c | 93 +++++++++++++++++++++++++++++++++++++++++
20
target/arm/translate.c | 69 ++++++++++++++++++++++++++++++
21
5 files changed, 182 insertions(+), 3 deletions(-)
22
23
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
24
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
25
--- a/target/arm/helper-mve.h
16
--- a/target/arm/helper.h
26
+++ b/target/arm/helper-mve.h
17
+++ b/target/arm/helper.h
27
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vqrshrunth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_fmls_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
28
19
29
DEF_HELPER_FLAGS_4(mve_vshlc, TCG_CALL_NO_WG, i32, env, ptr, i32, i32)
20
DEF_HELPER_FLAGS_5(gvec_vfma_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
30
21
DEF_HELPER_FLAGS_5(gvec_vfma_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
31
+DEF_HELPER_FLAGS_3(mve_sshrl, TCG_CALL_NO_RWG, i64, env, i64, i32)
22
+DEF_HELPER_FLAGS_5(gvec_vfma_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
32
+DEF_HELPER_FLAGS_3(mve_ushll, TCG_CALL_NO_RWG, i64, env, i64, i32)
23
33
DEF_HELPER_FLAGS_3(mve_sqshll, TCG_CALL_NO_RWG, i64, env, i64, i32)
24
DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
34
DEF_HELPER_FLAGS_3(mve_uqshll, TCG_CALL_NO_RWG, i64, env, i64, i32)
25
DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
35
+DEF_HELPER_FLAGS_3(mve_sqrshrl, TCG_CALL_NO_RWG, i64, env, i64, i32)
26
+DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_3(mve_uqrshll, TCG_CALL_NO_RWG, i64, env, i64, i32)
27
37
+DEF_HELPER_FLAGS_3(mve_sqrshrl48, TCG_CALL_NO_RWG, i64, env, i64, i32)
28
DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG,
38
+DEF_HELPER_FLAGS_3(mve_uqrshll48, TCG_CALL_NO_RWG, i64, env, i64, i32)
29
void, ptr, ptr, ptr, ptr, i32)
39
diff --git a/target/arm/translate.h b/target/arm/translate.h
30
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
40
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
41
--- a/target/arm/translate.h
32
--- a/target/arm/tcg/a64.decode
42
+++ b/target/arm/translate.h
33
+++ b/target/arm/tcg/a64.decode
43
@@ -XXX,XX +XXX,XX @@ typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
34
@@ -XXX,XX +XXX,XX @@ FMINNM_v 0.00 1110 1.1 ..... 11000 1 ..... ..... @qrrr_sd
44
typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
35
FMULX_v 0.00 1110 010 ..... 00011 1 ..... ..... @qrrr_h
45
typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
36
FMULX_v 0.00 1110 0.1 ..... 11011 1 ..... ..... @qrrr_sd
46
typedef void WideShiftImmFn(TCGv_i64, TCGv_i64, int64_t shift);
37
47
+typedef void WideShiftFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i32);
38
+FMLA_v 0.00 1110 010 ..... 00001 1 ..... ..... @qrrr_h
48
39
+FMLA_v 0.00 1110 0.1 ..... 11001 1 ..... ..... @qrrr_sd
49
/**
40
+
50
* arm_tbflags_from_tb:
41
+FMLS_v 0.00 1110 110 ..... 00001 1 ..... ..... @qrrr_h
51
diff --git a/target/arm/t32.decode b/target/arm/t32.decode
42
+FMLS_v 0.00 1110 1.1 ..... 11001 1 ..... ..... @qrrr_sd
43
+
44
### Advanced SIMD scalar x indexed element
45
46
FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
47
FMUL_si 0101 1111 10 . ..... 1001 . 0 ..... ..... @rrx_s
48
FMUL_si 0101 1111 11 0 ..... 1001 . 0 ..... ..... @rrx_d
49
50
+FMLA_si 0101 1111 00 .. .... 0001 . 0 ..... ..... @rrx_h
51
+FMLA_si 0101 1111 10 .. .... 0001 . 0 ..... ..... @rrx_s
52
+FMLA_si 0101 1111 11 0. .... 0001 . 0 ..... ..... @rrx_d
53
+
54
+FMLS_si 0101 1111 00 .. .... 0101 . 0 ..... ..... @rrx_h
55
+FMLS_si 0101 1111 10 .. .... 0101 . 0 ..... ..... @rrx_s
56
+FMLS_si 0101 1111 11 0. .... 0101 . 0 ..... ..... @rrx_d
57
+
58
FMULX_si 0111 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
59
FMULX_si 0111 1111 10 . ..... 1001 . 0 ..... ..... @rrx_s
60
FMULX_si 0111 1111 11 0 ..... 1001 . 0 ..... ..... @rrx_d
61
@@ -XXX,XX +XXX,XX @@ FMUL_vi 0.00 1111 00 .. .... 1001 . 0 ..... ..... @qrrx_h
62
FMUL_vi 0.00 1111 10 . ..... 1001 . 0 ..... ..... @qrrx_s
63
FMUL_vi 0.00 1111 11 0 ..... 1001 . 0 ..... ..... @qrrx_d
64
65
+FMLA_vi 0.00 1111 00 .. .... 0001 . 0 ..... ..... @qrrx_h
66
+FMLA_vi 0.00 1111 10 . ..... 0001 . 0 ..... ..... @qrrx_s
67
+FMLA_vi 0.00 1111 11 0 ..... 0001 . 0 ..... ..... @qrrx_d
68
+
69
+FMLS_vi 0.00 1111 00 .. .... 0101 . 0 ..... ..... @qrrx_h
70
+FMLS_vi 0.00 1111 10 . ..... 0101 . 0 ..... ..... @qrrx_s
71
+FMLS_vi 0.00 1111 11 0 ..... 0101 . 0 ..... ..... @qrrx_d
72
+
73
FMULX_vi 0.10 1111 00 .. .... 1001 . 0 ..... ..... @qrrx_h
74
FMULX_vi 0.10 1111 10 . ..... 1001 . 0 ..... ..... @qrrx_s
75
FMULX_vi 0.10 1111 11 0 ..... 1001 . 0 ..... ..... @qrrx_d
76
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
52
index XXXXXXX..XXXXXXX 100644
77
index XXXXXXX..XXXXXXX 100644
53
--- a/target/arm/t32.decode
78
--- a/target/arm/tcg/translate-a64.c
54
+++ b/target/arm/t32.decode
79
+++ b/target/arm/tcg/translate-a64.c
55
@@ -XXX,XX +XXX,XX @@
80
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
56
&mcrr !extern cp opc1 crm rt rt2
81
};
57
82
TRANS(FMULX_v, do_fp3_vector, a, f_vector_fmulx)
58
&mve_shl_ri rdalo rdahi shim
83
59
+&mve_shl_rr rdalo rdahi rm
84
+static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = {
60
85
+ gen_helper_gvec_vfma_h,
61
# rdahi: bits [3:1] from insn, bit 0 is 1
86
+ gen_helper_gvec_vfma_s,
62
# rdalo: bits [3:1] from insn, bit 0 is 0
87
+ gen_helper_gvec_vfma_d,
63
@@ -XXX,XX +XXX,XX @@
88
+};
64
89
+TRANS(FMLA_v, do_fp3_vector, a, f_vector_fmla)
65
@mve_shl_ri ....... .... . ... . . ... ... . .. .. .... \
90
+
66
&mve_shl_ri shim=%imm5_12_6 rdalo=%rdalo_17 rdahi=%rdahi_9
91
+static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
67
+@mve_shl_rr ....... .... . ... . rm:4 ... . .. .. .... \
92
+ gen_helper_gvec_vfms_h,
68
+ &mve_shl_rr rdalo=%rdalo_17 rdahi=%rdahi_9
93
+ gen_helper_gvec_vfms_s,
69
94
+ gen_helper_gvec_vfms_d,
70
{
95
+};
71
TST_xrri 1110101 0000 1 .... 0 ... 1111 .... .... @S_xrr_shi
96
+TRANS(FMLS_v, do_fp3_vector, a, f_vector_fmls)
72
@@ -XXX,XX +XXX,XX @@ BIC_rrri 1110101 0001 . .... 0 ... .... .... .... @s_rrr_shi
97
+
73
URSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 01 1111 @mve_shl_ri
98
/*
74
SRSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 10 1111 @mve_shl_ri
99
* Advanced SIMD scalar/vector x indexed element
75
SQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 11 1111 @mve_shl_ri
100
*/
76
+
101
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
77
+ LSLL_rr 1110101 0010 1 ... 0 .... ... 1 0000 1101 @mve_shl_rr
102
TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul)
78
+ ASRL_rr 1110101 0010 1 ... 0 .... ... 1 0010 1101 @mve_shl_rr
103
TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx)
79
+ UQRSHLL64_rr 1110101 0010 1 ... 1 .... ... 1 0000 1101 @mve_shl_rr
104
80
+ SQRSHRL64_rr 1110101 0010 1 ... 1 .... ... 1 0010 1101 @mve_shl_rr
105
+static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
81
+ UQRSHLL48_rr 1110101 0010 1 ... 1 .... ... 1 1000 1101 @mve_shl_rr
82
+ SQRSHRL48_rr 1110101 0010 1 ... 1 .... ... 1 1010 1101 @mve_shl_rr
83
]
84
85
MOV_rxri 1110101 0010 . 1111 0 ... .... .... .... @s_rxr_shi
86
ORR_rrri 1110101 0010 . .... 0 ... .... .... .... @s_rrr_shi
87
+
88
+ # v8.1M CSEL and friends
89
+ CSEL 1110101 0010 1 rn:4 10 op:2 rd:4 fcond:4 rm:4
90
}
91
{
92
MVN_rxri 1110101 0011 . 1111 0 ... .... .... .... @s_rxr_shi
93
@@ -XXX,XX +XXX,XX @@ SBC_rrri 1110101 1011 . .... 0 ... .... .... .... @s_rrr_shi
94
}
95
RSB_rrri 1110101 1110 . .... 0 ... .... .... .... @s_rrr_shi
96
97
-# v8.1M CSEL and friends
98
-CSEL 1110101 0010 1 rn:4 10 op:2 rd:4 fcond:4 rm:4
99
-
100
# Data-processing (register-shifted register)
101
102
MOV_rxrr 1111 1010 0 shty:2 s:1 rm:4 1111 rd:4 0000 rs:4 \
103
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
104
index XXXXXXX..XXXXXXX 100644
105
--- a/target/arm/mve_helper.c
106
+++ b/target/arm/mve_helper.c
107
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(mve_vshlc)(CPUARMState *env, void *vd, uint32_t rdm,
108
return rdm;
109
}
110
111
+uint64_t HELPER(mve_sshrl)(CPUARMState *env, uint64_t n, uint32_t shift)
112
+{
106
+{
113
+ return do_sqrshl_d(n, -(int8_t)shift, false, NULL);
107
+ switch (a->esz) {
114
+}
108
+ case MO_64:
115
+
109
+ if (fp_access_check(s)) {
116
+uint64_t HELPER(mve_ushll)(CPUARMState *env, uint64_t n, uint32_t shift)
110
+ TCGv_i64 t0 = read_fp_dreg(s, a->rd);
117
+{
111
+ TCGv_i64 t1 = read_fp_dreg(s, a->rn);
118
+ return do_uqrshl_d(n, (int8_t)shift, false, NULL);
112
+ TCGv_i64 t2 = tcg_temp_new_i64();
119
+}
113
+
120
+
114
+ read_vec_element(s, t2, a->rm, a->idx, MO_64);
121
uint64_t HELPER(mve_sqshll)(CPUARMState *env, uint64_t n, uint32_t shift)
115
+ if (neg) {
122
{
116
+ gen_vfp_negd(t1, t1);
123
return do_sqrshl_d(n, (int8_t)shift, false, &env->QF);
117
+ }
124
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(mve_uqshll)(CPUARMState *env, uint64_t n, uint32_t shift)
118
+ gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR));
125
{
119
+ write_fp_dreg(s, a->rd, t0);
126
return do_uqrshl_d(n, (int8_t)shift, false, &env->QF);
127
}
128
+
129
+uint64_t HELPER(mve_sqrshrl)(CPUARMState *env, uint64_t n, uint32_t shift)
130
+{
131
+ return do_sqrshl_d(n, -(int8_t)shift, true, &env->QF);
132
+}
133
+
134
+uint64_t HELPER(mve_uqrshll)(CPUARMState *env, uint64_t n, uint32_t shift)
135
+{
136
+ return do_uqrshl_d(n, (int8_t)shift, true, &env->QF);
137
+}
138
+
139
+/* Operate on 64-bit values, but saturate at 48 bits */
140
+static inline int64_t do_sqrshl48_d(int64_t src, int64_t shift,
141
+ bool round, uint32_t *sat)
142
+{
143
+ if (shift <= -48) {
144
+ /* Rounding the sign bit always produces 0. */
145
+ if (round) {
146
+ return 0;
147
+ }
120
+ }
148
+ return src >> 63;
121
+ break;
149
+ } else if (shift < 0) {
122
+ case MO_32:
150
+ if (round) {
123
+ if (fp_access_check(s)) {
151
+ src >>= -shift - 1;
124
+ TCGv_i32 t0 = read_fp_sreg(s, a->rd);
152
+ return (src >> 1) + (src & 1);
125
+ TCGv_i32 t1 = read_fp_sreg(s, a->rn);
126
+ TCGv_i32 t2 = tcg_temp_new_i32();
127
+
128
+ read_vec_element_i32(s, t2, a->rm, a->idx, MO_32);
129
+ if (neg) {
130
+ gen_vfp_negs(t1, t1);
131
+ }
132
+ gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR));
133
+ write_fp_sreg(s, a->rd, t0);
153
+ }
134
+ }
154
+ return src >> -shift;
135
+ break;
155
+ } else if (shift < 48) {
136
+ case MO_16:
156
+ int64_t val = src << shift;
137
+ if (!dc_isar_feature(aa64_fp16, s)) {
157
+ int64_t extval = sextract64(val, 0, 48);
138
+ return false;
158
+ if (!sat || val == extval) {
159
+ return extval;
160
+ }
139
+ }
161
+ } else if (!sat || src == 0) {
140
+ if (fp_access_check(s)) {
162
+ return 0;
141
+ TCGv_i32 t0 = read_fp_hreg(s, a->rd);
142
+ TCGv_i32 t1 = read_fp_hreg(s, a->rn);
143
+ TCGv_i32 t2 = tcg_temp_new_i32();
144
+
145
+ read_vec_element_i32(s, t2, a->rm, a->idx, MO_16);
146
+ if (neg) {
147
+ gen_vfp_negh(t1, t1);
148
+ }
149
+ gen_helper_advsimd_muladdh(t0, t1, t2, t0,
150
+ fpstatus_ptr(FPST_FPCR_F16));
151
+ write_fp_sreg(s, a->rd, t0);
152
+ }
153
+ break;
154
+ default:
155
+ g_assert_not_reached();
163
+ }
156
+ }
164
+
165
+ *sat = 1;
166
+ return (1ULL << 47) - (src >= 0);
167
+}
168
+
169
+/* Operate on 64-bit values, but saturate at 48 bits */
170
+static inline uint64_t do_uqrshl48_d(uint64_t src, int64_t shift,
171
+ bool round, uint32_t *sat)
172
+{
173
+ uint64_t val, extval;
174
+
175
+ if (shift <= -(48 + round)) {
176
+ return 0;
177
+ } else if (shift < 0) {
178
+ if (round) {
179
+ val = src >> (-shift - 1);
180
+ val = (val >> 1) + (val & 1);
181
+ } else {
182
+ val = src >> -shift;
183
+ }
184
+ extval = extract64(val, 0, 48);
185
+ if (!sat || val == extval) {
186
+ return extval;
187
+ }
188
+ } else if (shift < 48) {
189
+ uint64_t val = src << shift;
190
+ uint64_t extval = extract64(val, 0, 48);
191
+ if (!sat || val == extval) {
192
+ return extval;
193
+ }
194
+ } else if (!sat || src == 0) {
195
+ return 0;
196
+ }
197
+
198
+ *sat = 1;
199
+ return MAKE_64BIT_MASK(0, 48);
200
+}
201
+
202
+uint64_t HELPER(mve_sqrshrl48)(CPUARMState *env, uint64_t n, uint32_t shift)
203
+{
204
+ return do_sqrshl48_d(n, -(int8_t)shift, true, &env->QF);
205
+}
206
+
207
+uint64_t HELPER(mve_uqrshll48)(CPUARMState *env, uint64_t n, uint32_t shift)
208
+{
209
+ return do_uqrshl48_d(n, (int8_t)shift, true, &env->QF);
210
+}
211
diff --git a/target/arm/translate.c b/target/arm/translate.c
212
index XXXXXXX..XXXXXXX 100644
213
--- a/target/arm/translate.c
214
+++ b/target/arm/translate.c
215
@@ -XXX,XX +XXX,XX @@ static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
216
return do_mve_shl_ri(s, a, gen_urshr64_i64);
217
}
218
219
+static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
220
+{
221
+ TCGv_i64 rda;
222
+ TCGv_i32 rdalo, rdahi;
223
+
224
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
225
+ /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
226
+ return false;
227
+ }
228
+ if (a->rdahi == 15) {
229
+ /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
230
+ return false;
231
+ }
232
+ if (!dc_isar_feature(aa32_mve, s) ||
233
+ !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
234
+ a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
235
+ a->rm == a->rdahi || a->rm == a->rdalo) {
236
+ /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
237
+ unallocated_encoding(s);
238
+ return true;
239
+ }
240
+
241
+ rda = tcg_temp_new_i64();
242
+ rdalo = load_reg(s, a->rdalo);
243
+ rdahi = load_reg(s, a->rdahi);
244
+ tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
245
+
246
+ /* The helper takes care of the sign-extension of the low 8 bits of Rm */
247
+ fn(rda, cpu_env, rda, cpu_R[a->rm]);
248
+
249
+ tcg_gen_extrl_i64_i32(rdalo, rda);
250
+ tcg_gen_extrh_i64_i32(rdahi, rda);
251
+ store_reg(s, a->rdalo, rdalo);
252
+ store_reg(s, a->rdahi, rdahi);
253
+ tcg_temp_free_i64(rda);
254
+
255
+ return true;
157
+ return true;
256
+}
158
+}
257
+
159
+
258
+static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
160
+TRANS(FMLA_si, do_fmla_scalar_idx, a, false)
161
+TRANS(FMLS_si, do_fmla_scalar_idx, a, true)
162
+
163
static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
164
gen_helper_gvec_3_ptr * const fns[3])
165
{
166
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = {
167
};
168
TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
169
170
+static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
259
+{
171
+{
260
+ return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
172
+ static gen_helper_gvec_4_ptr * const fns[3] = {
173
+ gen_helper_gvec_fmla_idx_h,
174
+ gen_helper_gvec_fmla_idx_s,
175
+ gen_helper_gvec_fmla_idx_d,
176
+ };
177
+ MemOp esz = a->esz;
178
+
179
+ switch (esz) {
180
+ case MO_64:
181
+ if (!a->q) {
182
+ return false;
183
+ }
184
+ break;
185
+ case MO_32:
186
+ break;
187
+ case MO_16:
188
+ if (!dc_isar_feature(aa64_fp16, s)) {
189
+ return false;
190
+ }
191
+ break;
192
+ default:
193
+ g_assert_not_reached();
194
+ }
195
+ if (fp_access_check(s)) {
196
+ gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
197
+ esz == MO_16, (a->idx << 1) | neg,
198
+ fns[esz - 1]);
199
+ }
200
+ return true;
261
+}
201
+}
262
+
202
+
263
+static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
203
+TRANS(FMLA_vi, do_fmla_vector_idx, a, false)
204
+TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
205
+
206
207
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
208
* Note that it is the caller's responsibility to ensure that the
209
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
210
read_vec_element(s, tcg_op2, rm, pass, MO_64);
211
212
switch (fpopcode) {
213
- case 0x39: /* FMLS */
214
- /* As usual for ARM, separate negation for fused multiply-add */
215
- gen_vfp_negd(tcg_op1, tcg_op1);
216
- /* fall through */
217
- case 0x19: /* FMLA */
218
- read_vec_element(s, tcg_res, rd, pass, MO_64);
219
- gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
220
- tcg_res, fpst);
221
- break;
222
case 0x1c: /* FCMEQ */
223
gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
224
break;
225
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
226
break;
227
default:
228
case 0x18: /* FMAXNM */
229
+ case 0x19: /* FMLA */
230
case 0x1a: /* FADD */
231
case 0x1b: /* FMULX */
232
case 0x1e: /* FMAX */
233
case 0x38: /* FMINNM */
234
+ case 0x39: /* FMLS */
235
case 0x3a: /* FSUB */
236
case 0x3e: /* FMIN */
237
case 0x5b: /* FMUL */
238
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
239
read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
240
241
switch (fpopcode) {
242
- case 0x39: /* FMLS */
243
- /* As usual for ARM, separate negation for fused multiply-add */
244
- gen_vfp_negs(tcg_op1, tcg_op1);
245
- /* fall through */
246
- case 0x19: /* FMLA */
247
- read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
248
- gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
249
- tcg_res, fpst);
250
- break;
251
case 0x1c: /* FCMEQ */
252
gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
253
break;
254
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
255
break;
256
default:
257
case 0x18: /* FMAXNM */
258
+ case 0x19: /* FMLA */
259
case 0x1a: /* FADD */
260
case 0x1b: /* FMULX */
261
case 0x1e: /* FMAX */
262
case 0x38: /* FMINNM */
263
+ case 0x39: /* FMLS */
264
case 0x3a: /* FSUB */
265
case 0x3e: /* FMIN */
266
case 0x5b: /* FMUL */
267
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
268
case 0x3f: /* FRSQRTS */
269
case 0x5d: /* FACGE */
270
case 0x7d: /* FACGT */
271
- case 0x19: /* FMLA */
272
- case 0x39: /* FMLS */
273
case 0x1c: /* FCMEQ */
274
case 0x5c: /* FCMGE */
275
case 0x7a: /* FABD */
276
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
277
278
default:
279
case 0x18: /* FMAXNM */
280
+ case 0x19: /* FMLA */
281
case 0x1a: /* FADD */
282
case 0x1b: /* FMULX */
283
case 0x1e: /* FMAX */
284
case 0x38: /* FMINNM */
285
+ case 0x39: /* FMLS */
286
case 0x3a: /* FSUB */
287
case 0x3e: /* FMIN */
288
case 0x5b: /* FMUL */
289
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
290
int pass;
291
292
switch (fpopcode) {
293
- case 0x1: /* FMLA */
294
case 0x4: /* FCMEQ */
295
case 0x7: /* FRECPS */
296
- case 0x9: /* FMLS */
297
case 0xf: /* FRSQRTS */
298
case 0x14: /* FCMGE */
299
case 0x15: /* FACGE */
300
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
301
break;
302
default:
303
case 0x0: /* FMAXNM */
304
+ case 0x1: /* FMLA */
305
case 0x2: /* FADD */
306
case 0x3: /* FMULX */
307
case 0x6: /* FMAX */
308
case 0x8: /* FMINNM */
309
+ case 0x9: /* FMLS */
310
case 0xa: /* FSUB */
311
case 0xe: /* FMIN */
312
case 0x13: /* FMUL */
313
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
314
read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
315
316
switch (fpopcode) {
317
- case 0x1: /* FMLA */
318
- read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
319
- gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
320
- fpst);
321
- break;
322
case 0x4: /* FCMEQ */
323
gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
324
break;
325
case 0x7: /* FRECPS */
326
gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
327
break;
328
- case 0x9: /* FMLS */
329
- /* As usual for ARM, separate negation for fused multiply-add */
330
- tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
331
- read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
332
- gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
333
- fpst);
334
- break;
335
case 0xf: /* FRSQRTS */
336
gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
337
break;
338
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
339
break;
340
default:
341
case 0x0: /* FMAXNM */
342
+ case 0x1: /* FMLA */
343
case 0x2: /* FADD */
344
case 0x3: /* FMULX */
345
case 0x6: /* FMAX */
346
case 0x8: /* FMINNM */
347
+ case 0x9: /* FMLS */
348
case 0xa: /* FSUB */
349
case 0xe: /* FMIN */
350
case 0x13: /* FMUL */
351
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
352
case 0x0c: /* SQDMULH */
353
case 0x0d: /* SQRDMULH */
354
break;
355
- case 0x01: /* FMLA */
356
- case 0x05: /* FMLS */
357
- is_fp = 1;
358
- break;
359
case 0x1d: /* SQRDMLAH */
360
case 0x1f: /* SQRDMLSH */
361
if (!dc_isar_feature(aa64_rdm, s)) {
362
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
363
/* is_fp, but we pass tcg_env not fp_status. */
364
break;
365
default:
366
+ case 0x01: /* FMLA */
367
+ case 0x05: /* FMLS */
368
case 0x09: /* FMUL */
369
case 0x19: /* FMULX */
370
unallocated_encoding(s);
371
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
372
373
switch (is_fp) {
374
case 1: /* normal fp */
375
- /* convert insn encoded size to MemOp size */
376
- switch (size) {
377
- case 0: /* half-precision */
378
- size = MO_16;
379
- is_fp16 = true;
380
- break;
381
- case MO_32: /* single precision */
382
- case MO_64: /* double precision */
383
- break;
384
- default:
385
- unallocated_encoding(s);
386
- return;
387
- }
388
- break;
389
+ unallocated_encoding(s); /* in decodetree */
390
+ return;
391
392
case 2: /* complex fp */
393
/* Each indexable element is a complex pair. */
394
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
395
}
396
397
if (size == 3) {
398
- TCGv_i64 tcg_idx = tcg_temp_new_i64();
399
- int pass;
400
-
401
- assert(is_fp && is_q && !is_long);
402
-
403
- read_vec_element(s, tcg_idx, rm, index, MO_64);
404
-
405
- for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
406
- TCGv_i64 tcg_op = tcg_temp_new_i64();
407
- TCGv_i64 tcg_res = tcg_temp_new_i64();
408
-
409
- read_vec_element(s, tcg_op, rn, pass, MO_64);
410
-
411
- switch (16 * u + opcode) {
412
- case 0x05: /* FMLS */
413
- /* As usual for ARM, separate negation for fused multiply-add */
414
- gen_vfp_negd(tcg_op, tcg_op);
415
- /* fall through */
416
- case 0x01: /* FMLA */
417
- read_vec_element(s, tcg_res, rd, pass, MO_64);
418
- gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
419
- break;
420
- default:
421
- case 0x09: /* FMUL */
422
- case 0x19: /* FMULX */
423
- g_assert_not_reached();
424
- }
425
-
426
- write_vec_element(s, tcg_res, rd, pass, MO_64);
427
- }
428
-
429
- clear_vec_high(s, !is_scalar, rd);
430
+ g_assert_not_reached();
431
} else if (!is_long) {
432
/* 32 bit floating point, or 16 or 32 bit integer.
433
* For the 16 bit scalar case we use the usual Neon helpers and
434
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
435
genfn(tcg_res, tcg_op, tcg_res);
436
break;
437
}
438
- case 0x05: /* FMLS */
439
- case 0x01: /* FMLA */
440
- read_vec_element_i32(s, tcg_res, rd, pass,
441
- is_scalar ? size : MO_32);
442
- switch (size) {
443
- case 1:
444
- if (opcode == 0x5) {
445
- /* As usual for ARM, separate negation for fused
446
- * multiply-add */
447
- tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
448
- }
449
- if (is_scalar) {
450
- gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
451
- tcg_res, fpst);
452
- } else {
453
- gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
454
- tcg_res, fpst);
455
- }
456
- break;
457
- case 2:
458
- if (opcode == 0x5) {
459
- /* As usual for ARM, separate negation for
460
- * fused multiply-add */
461
- tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
462
- }
463
- gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
464
- tcg_res, fpst);
465
- break;
466
- default:
467
- g_assert_not_reached();
468
- }
469
- break;
470
case 0x0c: /* SQDMULH */
471
if (size == 1) {
472
gen_helper_neon_qdmulh_s16(tcg_res, tcg_env,
473
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
474
}
475
break;
476
default:
477
+ case 0x01: /* FMLA */
478
+ case 0x05: /* FMLS */
479
case 0x09: /* FMUL */
480
case 0x19: /* FMULX */
481
g_assert_not_reached();
482
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
483
index XXXXXXX..XXXXXXX 100644
484
--- a/target/arm/tcg/vec_helper.c
485
+++ b/target/arm/tcg/vec_helper.c
486
@@ -XXX,XX +XXX,XX @@ static float32 float32_muladd_f(float32 dest, float32 op1, float32 op2,
487
return float32_muladd(op1, op2, dest, 0, stat);
488
}
489
490
+static float64 float64_muladd_f(float64 dest, float64 op1, float64 op2,
491
+ float_status *stat)
264
+{
492
+{
265
+ return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
493
+ return float64_muladd(op1, op2, dest, 0, stat);
266
+}
494
+}
267
+
495
+
268
+static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
496
static float16 float16_mulsub_f(float16 dest, float16 op1, float16 op2,
497
float_status *stat)
498
{
499
@@ -XXX,XX +XXX,XX @@ static float32 float32_mulsub_f(float32 dest, float32 op1, float32 op2,
500
return float32_muladd(float32_chs(op1), op2, dest, 0, stat);
501
}
502
503
+static float64 float64_mulsub_f(float64 dest, float64 op1, float64 op2,
504
+ float_status *stat)
269
+{
505
+{
270
+ return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
506
+ return float64_muladd(float64_chs(op1), op2, dest, 0, stat);
271
+}
507
+}
272
+
508
+
273
+static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
509
#define DO_MULADD(NAME, FUNC, TYPE) \
274
+{
510
void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
275
+ return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
511
{ \
276
+}
512
@@ -XXX,XX +XXX,XX @@ DO_MULADD(gvec_fmls_s, float32_mulsub_nf, float32)
277
+
513
278
+static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
514
DO_MULADD(gvec_vfma_h, float16_muladd_f, float16)
279
+{
515
DO_MULADD(gvec_vfma_s, float32_muladd_f, float32)
280
+ return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
516
+DO_MULADD(gvec_vfma_d, float64_muladd_f, float64)
281
+}
517
282
+
518
DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16)
283
+static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
519
DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32)
284
+{
520
+DO_MULADD(gvec_vfms_d, float64_mulsub_f, float64)
285
+ return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
521
286
+}
522
/* For the indexed ops, SVE applies the index per 128-bit vector segment.
287
+
523
* For AdvSIMD, there is of course only one such vector segment.
288
/*
289
* Multiply and multiply accumulate
290
*/
291
--
524
--
292
2.20.1
525
2.34.1
293
294
diff view generated by jsdifflib
1
Implement the MVE vector shift right by immediate insns VSHRI and
1
From: Richard Henderson <richard.henderson@linaro.org>
2
VRSHRI. As with Neon, we implement these by using helper functions
3
which perform left shifts but allow negative shift counts to indicate
4
right shifts.
5
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20240524232121.284515-26-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210628135835.6690-9-peter.maydell@linaro.org
9
---
7
---
10
target/arm/helper-mve.h | 12 ++++++++++++
8
target/arm/helper.h | 5 +
11
target/arm/translate.h | 20 ++++++++++++++++++++
9
target/arm/tcg/a64.decode | 30 ++++++
12
target/arm/mve.decode | 28 ++++++++++++++++++++++++++++
10
target/arm/tcg/translate-a64.c | 188 +++++++++++++++++++--------------
13
target/arm/mve_helper.c | 7 +++++++
11
target/arm/tcg/vec_helper.c | 30 ++++++
14
target/arm/translate-mve.c | 5 +++++
12
4 files changed, 174 insertions(+), 79 deletions(-)
15
target/arm/translate-neon.c | 18 ------------------
16
6 files changed, 72 insertions(+), 18 deletions(-)
17
13
18
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
14
diff --git a/target/arm/helper.h b/target/arm/helper.h
19
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/helper-mve.h
16
--- a/target/arm/helper.h
21
+++ b/target/arm/helper-mve.h
17
+++ b/target/arm/helper.h
22
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(mve_vmovi, TCG_CALL_NO_WG, void, env, ptr, i64)
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
23
DEF_HELPER_FLAGS_3(mve_vandi, TCG_CALL_NO_WG, void, env, ptr, i64)
19
24
DEF_HELPER_FLAGS_3(mve_vorri, TCG_CALL_NO_WG, void, env, ptr, i64)
20
DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
25
21
DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
26
+DEF_HELPER_FLAGS_4(mve_vshli_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
22
+DEF_HELPER_FLAGS_5(gvec_fceq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
27
+DEF_HELPER_FLAGS_4(mve_vshli_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
23
28
+DEF_HELPER_FLAGS_4(mve_vshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
24
DEF_HELPER_FLAGS_5(gvec_fcge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
29
+
25
DEF_HELPER_FLAGS_5(gvec_fcge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
30
DEF_HELPER_FLAGS_4(mve_vshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
26
+DEF_HELPER_FLAGS_5(gvec_fcge_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
31
DEF_HELPER_FLAGS_4(mve_vshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
27
32
DEF_HELPER_FLAGS_4(mve_vshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
28
DEF_HELPER_FLAGS_5(gvec_fcgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
33
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vqshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
29
DEF_HELPER_FLAGS_5(gvec_fcgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
34
DEF_HELPER_FLAGS_4(mve_vqshlui_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
30
+DEF_HELPER_FLAGS_5(gvec_fcgt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
35
DEF_HELPER_FLAGS_4(mve_vqshlui_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
31
36
DEF_HELPER_FLAGS_4(mve_vqshlui_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
32
DEF_HELPER_FLAGS_5(gvec_facge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
37
+
33
DEF_HELPER_FLAGS_5(gvec_facge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_4(mve_vrshli_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_5(gvec_facge_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
39
+DEF_HELPER_FLAGS_4(mve_vrshli_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
35
40
+DEF_HELPER_FLAGS_4(mve_vrshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
36
DEF_HELPER_FLAGS_5(gvec_facgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
41
+
37
DEF_HELPER_FLAGS_5(gvec_facgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
42
+DEF_HELPER_FLAGS_4(mve_vrshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_5(gvec_facgt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
43
+DEF_HELPER_FLAGS_4(mve_vrshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
39
44
+DEF_HELPER_FLAGS_4(mve_vrshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
40
DEF_HELPER_FLAGS_5(gvec_fmax_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
45
diff --git a/target/arm/translate.h b/target/arm/translate.h
41
DEF_HELPER_FLAGS_5(gvec_fmax_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
42
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
46
index XXXXXXX..XXXXXXX 100644
43
index XXXXXXX..XXXXXXX 100644
47
--- a/target/arm/translate.h
44
--- a/target/arm/tcg/a64.decode
48
+++ b/target/arm/translate.h
45
+++ b/target/arm/tcg/a64.decode
49
@@ -XXX,XX +XXX,XX @@ static inline int times_2_plus_1(DisasContext *s, int x)
46
@@ -XXX,XX +XXX,XX @@ FMINNM_s 0001 1110 ..1 ..... 0111 10 ..... ..... @rrr_hsd
50
return x * 2 + 1;
47
FMULX_s 0101 1110 010 ..... 00011 1 ..... ..... @rrr_h
48
FMULX_s 0101 1110 0.1 ..... 11011 1 ..... ..... @rrr_sd
49
50
+FCMEQ_s 0101 1110 010 ..... 00100 1 ..... ..... @rrr_h
51
+FCMEQ_s 0101 1110 0.1 ..... 11100 1 ..... ..... @rrr_sd
52
+
53
+FCMGE_s 0111 1110 010 ..... 00100 1 ..... ..... @rrr_h
54
+FCMGE_s 0111 1110 0.1 ..... 11100 1 ..... ..... @rrr_sd
55
+
56
+FCMGT_s 0111 1110 110 ..... 00100 1 ..... ..... @rrr_h
57
+FCMGT_s 0111 1110 1.1 ..... 11100 1 ..... ..... @rrr_sd
58
+
59
+FACGE_s 0111 1110 010 ..... 00101 1 ..... ..... @rrr_h
60
+FACGE_s 0111 1110 0.1 ..... 11101 1 ..... ..... @rrr_sd
61
+
62
+FACGT_s 0111 1110 110 ..... 00101 1 ..... ..... @rrr_h
63
+FACGT_s 0111 1110 1.1 ..... 11101 1 ..... ..... @rrr_sd
64
+
65
### Advanced SIMD three same
66
67
FADD_v 0.00 1110 010 ..... 00010 1 ..... ..... @qrrr_h
68
@@ -XXX,XX +XXX,XX @@ FMLA_v 0.00 1110 0.1 ..... 11001 1 ..... ..... @qrrr_sd
69
FMLS_v 0.00 1110 110 ..... 00001 1 ..... ..... @qrrr_h
70
FMLS_v 0.00 1110 1.1 ..... 11001 1 ..... ..... @qrrr_sd
71
72
+FCMEQ_v 0.00 1110 010 ..... 00100 1 ..... ..... @qrrr_h
73
+FCMEQ_v 0.00 1110 0.1 ..... 11100 1 ..... ..... @qrrr_sd
74
+
75
+FCMGE_v 0.10 1110 010 ..... 00100 1 ..... ..... @qrrr_h
76
+FCMGE_v 0.10 1110 0.1 ..... 11100 1 ..... ..... @qrrr_sd
77
+
78
+FCMGT_v 0.10 1110 110 ..... 00100 1 ..... ..... @qrrr_h
79
+FCMGT_v 0.10 1110 1.1 ..... 11100 1 ..... ..... @qrrr_sd
80
+
81
+FACGE_v 0.10 1110 010 ..... 00101 1 ..... ..... @qrrr_h
82
+FACGE_v 0.10 1110 0.1 ..... 11101 1 ..... ..... @qrrr_sd
83
+
84
+FACGT_v 0.10 1110 110 ..... 00101 1 ..... ..... @qrrr_h
85
+FACGT_v 0.10 1110 1.1 ..... 11101 1 ..... ..... @qrrr_sd
86
+
87
### Advanced SIMD scalar x indexed element
88
89
FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
90
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/arm/tcg/translate-a64.c
93
+++ b/target/arm/tcg/translate-a64.c
94
@@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fnmul = {
95
};
96
TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul)
97
98
+static const FPScalar f_scalar_fcmeq = {
99
+ gen_helper_advsimd_ceq_f16,
100
+ gen_helper_neon_ceq_f32,
101
+ gen_helper_neon_ceq_f64,
102
+};
103
+TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq)
104
+
105
+static const FPScalar f_scalar_fcmge = {
106
+ gen_helper_advsimd_cge_f16,
107
+ gen_helper_neon_cge_f32,
108
+ gen_helper_neon_cge_f64,
109
+};
110
+TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge)
111
+
112
+static const FPScalar f_scalar_fcmgt = {
113
+ gen_helper_advsimd_cgt_f16,
114
+ gen_helper_neon_cgt_f32,
115
+ gen_helper_neon_cgt_f64,
116
+};
117
+TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt)
118
+
119
+static const FPScalar f_scalar_facge = {
120
+ gen_helper_advsimd_acge_f16,
121
+ gen_helper_neon_acge_f32,
122
+ gen_helper_neon_acge_f64,
123
+};
124
+TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge)
125
+
126
+static const FPScalar f_scalar_facgt = {
127
+ gen_helper_advsimd_acgt_f16,
128
+ gen_helper_neon_acgt_f32,
129
+ gen_helper_neon_acgt_f64,
130
+};
131
+TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt)
132
+
133
static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a,
134
gen_helper_gvec_3_ptr * const fns[3])
135
{
136
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
137
};
138
TRANS(FMLS_v, do_fp3_vector, a, f_vector_fmls)
139
140
+static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = {
141
+ gen_helper_gvec_fceq_h,
142
+ gen_helper_gvec_fceq_s,
143
+ gen_helper_gvec_fceq_d,
144
+};
145
+TRANS(FCMEQ_v, do_fp3_vector, a, f_vector_fcmeq)
146
+
147
+static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = {
148
+ gen_helper_gvec_fcge_h,
149
+ gen_helper_gvec_fcge_s,
150
+ gen_helper_gvec_fcge_d,
151
+};
152
+TRANS(FCMGE_v, do_fp3_vector, a, f_vector_fcmge)
153
+
154
+static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = {
155
+ gen_helper_gvec_fcgt_h,
156
+ gen_helper_gvec_fcgt_s,
157
+ gen_helper_gvec_fcgt_d,
158
+};
159
+TRANS(FCMGT_v, do_fp3_vector, a, f_vector_fcmgt)
160
+
161
+static gen_helper_gvec_3_ptr * const f_vector_facge[3] = {
162
+ gen_helper_gvec_facge_h,
163
+ gen_helper_gvec_facge_s,
164
+ gen_helper_gvec_facge_d,
165
+};
166
+TRANS(FACGE_v, do_fp3_vector, a, f_vector_facge)
167
+
168
+static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = {
169
+ gen_helper_gvec_facgt_h,
170
+ gen_helper_gvec_facgt_s,
171
+ gen_helper_gvec_facgt_d,
172
+};
173
+TRANS(FACGT_v, do_fp3_vector, a, f_vector_facgt)
174
+
175
/*
176
* Advanced SIMD scalar/vector x indexed element
177
*/
178
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
179
read_vec_element(s, tcg_op2, rm, pass, MO_64);
180
181
switch (fpopcode) {
182
- case 0x1c: /* FCMEQ */
183
- gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
184
- break;
185
case 0x1f: /* FRECPS */
186
gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
187
break;
188
case 0x3f: /* FRSQRTS */
189
gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
190
break;
191
- case 0x5c: /* FCMGE */
192
- gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
193
- break;
194
- case 0x5d: /* FACGE */
195
- gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
196
- break;
197
case 0x7a: /* FABD */
198
gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
199
gen_vfp_absd(tcg_res, tcg_res);
200
break;
201
- case 0x7c: /* FCMGT */
202
- gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
203
- break;
204
- case 0x7d: /* FACGT */
205
- gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
206
- break;
207
default:
208
case 0x18: /* FMAXNM */
209
case 0x19: /* FMLA */
210
case 0x1a: /* FADD */
211
case 0x1b: /* FMULX */
212
+ case 0x1c: /* FCMEQ */
213
case 0x1e: /* FMAX */
214
case 0x38: /* FMINNM */
215
case 0x39: /* FMLS */
216
case 0x3a: /* FSUB */
217
case 0x3e: /* FMIN */
218
case 0x5b: /* FMUL */
219
+ case 0x5c: /* FCMGE */
220
+ case 0x5d: /* FACGE */
221
case 0x5f: /* FDIV */
222
+ case 0x7c: /* FCMGT */
223
+ case 0x7d: /* FACGT */
224
g_assert_not_reached();
225
}
226
227
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
228
read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
229
230
switch (fpopcode) {
231
- case 0x1c: /* FCMEQ */
232
- gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
233
- break;
234
case 0x1f: /* FRECPS */
235
gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
236
break;
237
case 0x3f: /* FRSQRTS */
238
gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
239
break;
240
- case 0x5c: /* FCMGE */
241
- gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
242
- break;
243
- case 0x5d: /* FACGE */
244
- gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
245
- break;
246
case 0x7a: /* FABD */
247
gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
248
gen_vfp_abss(tcg_res, tcg_res);
249
break;
250
- case 0x7c: /* FCMGT */
251
- gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
252
- break;
253
- case 0x7d: /* FACGT */
254
- gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
255
- break;
256
default:
257
case 0x18: /* FMAXNM */
258
case 0x19: /* FMLA */
259
case 0x1a: /* FADD */
260
case 0x1b: /* FMULX */
261
+ case 0x1c: /* FCMEQ */
262
case 0x1e: /* FMAX */
263
case 0x38: /* FMINNM */
264
case 0x39: /* FMLS */
265
case 0x3a: /* FSUB */
266
case 0x3e: /* FMIN */
267
case 0x5b: /* FMUL */
268
+ case 0x5c: /* FCMGE */
269
+ case 0x5d: /* FACGE */
270
case 0x5f: /* FDIV */
271
+ case 0x7c: /* FCMGT */
272
+ case 0x7d: /* FACGT */
273
g_assert_not_reached();
274
}
275
276
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
277
switch (fpopcode) {
278
case 0x1f: /* FRECPS */
279
case 0x3f: /* FRSQRTS */
280
+ case 0x7a: /* FABD */
281
+ break;
282
+ default:
283
+ case 0x1b: /* FMULX */
284
case 0x5d: /* FACGE */
285
case 0x7d: /* FACGT */
286
case 0x1c: /* FCMEQ */
287
case 0x5c: /* FCMGE */
288
case 0x7c: /* FCMGT */
289
- case 0x7a: /* FABD */
290
- break;
291
- default:
292
- case 0x1b: /* FMULX */
293
unallocated_encoding(s);
294
return;
295
}
296
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
297
TCGv_i32 tcg_res;
298
299
switch (fpopcode) {
300
- case 0x04: /* FCMEQ (reg) */
301
case 0x07: /* FRECPS */
302
case 0x0f: /* FRSQRTS */
303
- case 0x14: /* FCMGE (reg) */
304
- case 0x15: /* FACGE */
305
case 0x1a: /* FABD */
306
- case 0x1c: /* FCMGT (reg) */
307
- case 0x1d: /* FACGT */
308
break;
309
default:
310
case 0x03: /* FMULX */
311
+ case 0x04: /* FCMEQ (reg) */
312
+ case 0x14: /* FCMGE (reg) */
313
+ case 0x15: /* FACGE */
314
+ case 0x1c: /* FCMGT (reg) */
315
+ case 0x1d: /* FACGT */
316
unallocated_encoding(s);
317
return;
318
}
319
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
320
tcg_res = tcg_temp_new_i32();
321
322
switch (fpopcode) {
323
- case 0x04: /* FCMEQ (reg) */
324
- gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
325
- break;
326
case 0x07: /* FRECPS */
327
gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
328
break;
329
case 0x0f: /* FRSQRTS */
330
gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
331
break;
332
- case 0x14: /* FCMGE (reg) */
333
- gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
334
- break;
335
- case 0x15: /* FACGE */
336
- gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
337
- break;
338
case 0x1a: /* FABD */
339
gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
340
tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
341
break;
342
- case 0x1c: /* FCMGT (reg) */
343
- gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
344
- break;
345
- case 0x1d: /* FACGT */
346
- gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
347
- break;
348
default:
349
case 0x03: /* FMULX */
350
+ case 0x04: /* FCMEQ (reg) */
351
+ case 0x14: /* FCMGE (reg) */
352
+ case 0x15: /* FACGE */
353
+ case 0x1c: /* FCMGT (reg) */
354
+ case 0x1d: /* FACGT */
355
g_assert_not_reached();
356
}
357
358
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
359
return;
360
case 0x1f: /* FRECPS */
361
case 0x3f: /* FRSQRTS */
362
- case 0x5d: /* FACGE */
363
- case 0x7d: /* FACGT */
364
- case 0x1c: /* FCMEQ */
365
- case 0x5c: /* FCMGE */
366
case 0x7a: /* FABD */
367
- case 0x7c: /* FCMGT */
368
if (!fp_access_check(s)) {
369
return;
370
}
371
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
372
case 0x19: /* FMLA */
373
case 0x1a: /* FADD */
374
case 0x1b: /* FMULX */
375
+ case 0x1c: /* FCMEQ */
376
case 0x1e: /* FMAX */
377
case 0x38: /* FMINNM */
378
case 0x39: /* FMLS */
379
case 0x3a: /* FSUB */
380
case 0x3e: /* FMIN */
381
case 0x5b: /* FMUL */
382
+ case 0x5c: /* FCMGE */
383
+ case 0x5d: /* FACGE */
384
case 0x5f: /* FDIV */
385
+ case 0x7d: /* FACGT */
386
+ case 0x7c: /* FCMGT */
387
unallocated_encoding(s);
388
return;
389
}
390
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
391
int pass;
392
393
switch (fpopcode) {
394
- case 0x4: /* FCMEQ */
395
case 0x7: /* FRECPS */
396
case 0xf: /* FRSQRTS */
397
- case 0x14: /* FCMGE */
398
- case 0x15: /* FACGE */
399
case 0x1a: /* FABD */
400
- case 0x1c: /* FCMGT */
401
- case 0x1d: /* FACGT */
402
pairwise = false;
403
break;
404
case 0x10: /* FMAXNMP */
405
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
406
case 0x1: /* FMLA */
407
case 0x2: /* FADD */
408
case 0x3: /* FMULX */
409
+ case 0x4: /* FCMEQ */
410
case 0x6: /* FMAX */
411
case 0x8: /* FMINNM */
412
case 0x9: /* FMLS */
413
case 0xa: /* FSUB */
414
case 0xe: /* FMIN */
415
case 0x13: /* FMUL */
416
+ case 0x14: /* FCMGE */
417
+ case 0x15: /* FACGE */
418
case 0x17: /* FDIV */
419
+ case 0x1c: /* FCMGT */
420
+ case 0x1d: /* FACGT */
421
unallocated_encoding(s);
422
return;
423
}
424
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
425
read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
426
427
switch (fpopcode) {
428
- case 0x4: /* FCMEQ */
429
- gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
430
- break;
431
case 0x7: /* FRECPS */
432
gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
433
break;
434
case 0xf: /* FRSQRTS */
435
gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
436
break;
437
- case 0x14: /* FCMGE */
438
- gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
439
- break;
440
- case 0x15: /* FACGE */
441
- gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
442
- break;
443
case 0x1a: /* FABD */
444
gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
445
tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
446
break;
447
- case 0x1c: /* FCMGT */
448
- gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
449
- break;
450
- case 0x1d: /* FACGT */
451
- gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
452
- break;
453
default:
454
case 0x0: /* FMAXNM */
455
case 0x1: /* FMLA */
456
case 0x2: /* FADD */
457
case 0x3: /* FMULX */
458
+ case 0x4: /* FCMEQ */
459
case 0x6: /* FMAX */
460
case 0x8: /* FMINNM */
461
case 0x9: /* FMLS */
462
case 0xa: /* FSUB */
463
case 0xe: /* FMIN */
464
case 0x13: /* FMUL */
465
+ case 0x14: /* FCMGE */
466
+ case 0x15: /* FACGE */
467
case 0x17: /* FDIV */
468
+ case 0x1c: /* FCMGT */
469
+ case 0x1d: /* FACGT */
470
g_assert_not_reached();
471
}
472
473
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
474
index XXXXXXX..XXXXXXX 100644
475
--- a/target/arm/tcg/vec_helper.c
476
+++ b/target/arm/tcg/vec_helper.c
477
@@ -XXX,XX +XXX,XX @@ static uint32_t float32_ceq(float32 op1, float32 op2, float_status *stat)
478
return -float32_eq_quiet(op1, op2, stat);
51
}
479
}
52
480
53
+static inline int rsub_64(DisasContext *s, int x)
481
+static uint64_t float64_ceq(float64 op1, float64 op2, float_status *stat)
54
+{
482
+{
55
+ return 64 - x;
483
+ return -float64_eq_quiet(op1, op2, stat);
56
+}
484
+}
57
+
485
+
58
+static inline int rsub_32(DisasContext *s, int x)
486
static uint16_t float16_cge(float16 op1, float16 op2, float_status *stat)
487
{
488
return -float16_le(op2, op1, stat);
489
@@ -XXX,XX +XXX,XX @@ static uint32_t float32_cge(float32 op1, float32 op2, float_status *stat)
490
return -float32_le(op2, op1, stat);
491
}
492
493
+static uint64_t float64_cge(float64 op1, float64 op2, float_status *stat)
59
+{
494
+{
60
+ return 32 - x;
495
+ return -float64_le(op2, op1, stat);
61
+}
496
+}
62
+
497
+
63
+static inline int rsub_16(DisasContext *s, int x)
498
static uint16_t float16_cgt(float16 op1, float16 op2, float_status *stat)
499
{
500
return -float16_lt(op2, op1, stat);
501
@@ -XXX,XX +XXX,XX @@ static uint32_t float32_cgt(float32 op1, float32 op2, float_status *stat)
502
return -float32_lt(op2, op1, stat);
503
}
504
505
+static uint64_t float64_cgt(float64 op1, float64 op2, float_status *stat)
64
+{
506
+{
65
+ return 16 - x;
507
+ return -float64_lt(op2, op1, stat);
66
+}
508
+}
67
+
509
+
68
+static inline int rsub_8(DisasContext *s, int x)
510
static uint16_t float16_acge(float16 op1, float16 op2, float_status *stat)
511
{
512
return -float16_le(float16_abs(op2), float16_abs(op1), stat);
513
@@ -XXX,XX +XXX,XX @@ static uint32_t float32_acge(float32 op1, float32 op2, float_status *stat)
514
return -float32_le(float32_abs(op2), float32_abs(op1), stat);
515
}
516
517
+static uint64_t float64_acge(float64 op1, float64 op2, float_status *stat)
69
+{
518
+{
70
+ return 8 - x;
519
+ return -float64_le(float64_abs(op2), float64_abs(op1), stat);
71
+}
520
+}
72
+
521
+
73
static inline int arm_dc_feature(DisasContext *dc, int feature)
522
static uint16_t float16_acgt(float16 op1, float16 op2, float_status *stat)
74
{
523
{
75
return (dc->features & (1ULL << feature)) != 0;
524
return -float16_lt(float16_abs(op2), float16_abs(op1), stat);
76
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
525
@@ -XXX,XX +XXX,XX @@ static uint32_t float32_acgt(float32 op1, float32 op2, float_status *stat)
77
index XXXXXXX..XXXXXXX 100644
526
return -float32_lt(float32_abs(op2), float32_abs(op1), stat);
78
--- a/target/arm/mve.decode
79
+++ b/target/arm/mve.decode
80
@@ -XXX,XX +XXX,XX @@
81
@2_shl_h .... .... .. 01 shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1
82
@2_shl_w .... .... .. 1 shift:5 .... .... .... .... &2shift qd=%qd qm=%qm size=2
83
84
+# Right shifts are encoded as N - shift, where N is the element size in bits.
85
+%rshift_i5 16:5 !function=rsub_32
86
+%rshift_i4 16:4 !function=rsub_16
87
+%rshift_i3 16:3 !function=rsub_8
88
+
89
+@2_shr_b .... .... .. 001 ... .... .... .... .... &2shift qd=%qd qm=%qm \
90
+ size=0 shift=%rshift_i3
91
+@2_shr_h .... .... .. 01 .... .... .... .... .... &2shift qd=%qd qm=%qm \
92
+ size=1 shift=%rshift_i4
93
+@2_shr_w .... .... .. 1 ..... .... .... .... .... &2shift qd=%qd qm=%qm \
94
+ size=2 shift=%rshift_i5
95
+
96
# Vector loads and stores
97
98
# Widening loads and narrowing stores:
99
@@ -XXX,XX +XXX,XX @@ VQSHLI_U 111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_w
100
VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_b
101
VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_h
102
VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_w
103
+
104
+VSHRI_S 111 0 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_b
105
+VSHRI_S 111 0 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_h
106
+VSHRI_S 111 0 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_w
107
+
108
+VSHRI_U 111 1 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_b
109
+VSHRI_U 111 1 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_h
110
+VSHRI_U 111 1 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_w
111
+
112
+VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_b
113
+VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_h
114
+VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w
115
+
116
+VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_b
117
+VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_h
118
+VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w
119
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
120
index XXXXXXX..XXXXXXX 100644
121
--- a/target/arm/mve_helper.c
122
+++ b/target/arm/mve_helper.c
123
@@ -XXX,XX +XXX,XX @@ DO_VADDV(vaddvuw, 4, uint32_t)
124
DO_2SHIFT(OP##b, 1, uint8_t, FN) \
125
DO_2SHIFT(OP##h, 2, uint16_t, FN) \
126
DO_2SHIFT(OP##w, 4, uint32_t, FN)
127
+#define DO_2SHIFT_S(OP, FN) \
128
+ DO_2SHIFT(OP##b, 1, int8_t, FN) \
129
+ DO_2SHIFT(OP##h, 2, int16_t, FN) \
130
+ DO_2SHIFT(OP##w, 4, int32_t, FN)
131
132
#define DO_2SHIFT_SAT_U(OP, FN) \
133
DO_2SHIFT_SAT(OP##b, 1, uint8_t, FN) \
134
@@ -XXX,XX +XXX,XX @@ DO_VADDV(vaddvuw, 4, uint32_t)
135
DO_2SHIFT_SAT(OP##w, 4, int32_t, FN)
136
137
DO_2SHIFT_U(vshli_u, DO_VSHLU)
138
+DO_2SHIFT_S(vshli_s, DO_VSHLS)
139
DO_2SHIFT_SAT_U(vqshli_u, DO_UQSHL_OP)
140
DO_2SHIFT_SAT_S(vqshli_s, DO_SQSHL_OP)
141
DO_2SHIFT_SAT_S(vqshlui_s, DO_SUQSHL_OP)
142
+DO_2SHIFT_U(vrshli_u, DO_VRSHLU)
143
+DO_2SHIFT_S(vrshli_s, DO_VRSHLS)
144
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
145
index XXXXXXX..XXXXXXX 100644
146
--- a/target/arm/translate-mve.c
147
+++ b/target/arm/translate-mve.c
148
@@ -XXX,XX +XXX,XX @@ DO_2SHIFT(VSHLI, vshli_u, false)
149
DO_2SHIFT(VQSHLI_S, vqshli_s, false)
150
DO_2SHIFT(VQSHLI_U, vqshli_u, false)
151
DO_2SHIFT(VQSHLUI, vqshlui_s, false)
152
+/* These right shifts use a left-shift helper with negated shift count */
153
+DO_2SHIFT(VSHRI_S, vshli_s, true)
154
+DO_2SHIFT(VSHRI_U, vshli_u, true)
155
+DO_2SHIFT(VRSHRI_S, vrshli_s, true)
156
+DO_2SHIFT(VRSHRI_U, vrshli_u, true)
157
diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c
158
index XXXXXXX..XXXXXXX 100644
159
--- a/target/arm/translate-neon.c
160
+++ b/target/arm/translate-neon.c
161
@@ -XXX,XX +XXX,XX @@ static inline int plus1(DisasContext *s, int x)
162
return x + 1;
163
}
527
}
164
528
165
-static inline int rsub_64(DisasContext *s, int x)
529
+static uint64_t float64_acgt(float64 op1, float64 op2, float_status *stat)
166
-{
530
+{
167
- return 64 - x;
531
+ return -float64_lt(float64_abs(op2), float64_abs(op1), stat);
168
-}
532
+}
169
-
533
+
170
-static inline int rsub_32(DisasContext *s, int x)
534
static int16_t vfp_tosszh(float16 x, void *fpstp)
171
-{
172
- return 32 - x;
173
-}
174
-static inline int rsub_16(DisasContext *s, int x)
175
-{
176
- return 16 - x;
177
-}
178
-static inline int rsub_8(DisasContext *s, int x)
179
-{
180
- return 8 - x;
181
-}
182
-
183
static inline int neon_3same_fp_size(DisasContext *s, int x)
184
{
535
{
185
/* Convert 0==fp32, 1==fp16 into a MO_* value */
536
float_status *fpst = fpstp;
537
@@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_fabd_s, float32_abd, float32)
538
539
DO_3OP(gvec_fceq_h, float16_ceq, float16)
540
DO_3OP(gvec_fceq_s, float32_ceq, float32)
541
+DO_3OP(gvec_fceq_d, float64_ceq, float64)
542
543
DO_3OP(gvec_fcge_h, float16_cge, float16)
544
DO_3OP(gvec_fcge_s, float32_cge, float32)
545
+DO_3OP(gvec_fcge_d, float64_cge, float64)
546
547
DO_3OP(gvec_fcgt_h, float16_cgt, float16)
548
DO_3OP(gvec_fcgt_s, float32_cgt, float32)
549
+DO_3OP(gvec_fcgt_d, float64_cgt, float64)
550
551
DO_3OP(gvec_facge_h, float16_acge, float16)
552
DO_3OP(gvec_facge_s, float32_acge, float32)
553
+DO_3OP(gvec_facge_d, float64_acge, float64)
554
555
DO_3OP(gvec_facgt_h, float16_acgt, float16)
556
DO_3OP(gvec_facgt_s, float32_acgt, float32)
557
+DO_3OP(gvec_facgt_d, float64_acgt, float64)
558
559
DO_3OP(gvec_fmax_h, float16_max, float16)
560
DO_3OP(gvec_fmax_s, float32_max, float32)
186
--
561
--
187
2.20.1
562
2.34.1
188
189
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20240524232121.284515-27-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/helper.h | 1 +
9
target/arm/tcg/a64.decode | 6 ++++
10
target/arm/tcg/translate-a64.c | 60 ++++++++++++++++++++++------------
11
target/arm/tcg/vec_helper.c | 6 ++++
12
4 files changed, 53 insertions(+), 20 deletions(-)
13
14
diff --git a/target/arm/helper.h b/target/arm/helper.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper.h
17
+++ b/target/arm/helper.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
19
20
DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
21
DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
22
+DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
23
24
DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
25
DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
26
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/arm/tcg/a64.decode
29
+++ b/target/arm/tcg/a64.decode
30
@@ -XXX,XX +XXX,XX @@ FACGE_s 0111 1110 0.1 ..... 11101 1 ..... ..... @rrr_sd
31
FACGT_s 0111 1110 110 ..... 00101 1 ..... ..... @rrr_h
32
FACGT_s 0111 1110 1.1 ..... 11101 1 ..... ..... @rrr_sd
33
34
+FABD_s 0111 1110 110 ..... 00010 1 ..... ..... @rrr_h
35
+FABD_s 0111 1110 1.1 ..... 11010 1 ..... ..... @rrr_sd
36
+
37
### Advanced SIMD three same
38
39
FADD_v 0.00 1110 010 ..... 00010 1 ..... ..... @qrrr_h
40
@@ -XXX,XX +XXX,XX @@ FACGE_v 0.10 1110 0.1 ..... 11101 1 ..... ..... @qrrr_sd
41
FACGT_v 0.10 1110 110 ..... 00101 1 ..... ..... @qrrr_h
42
FACGT_v 0.10 1110 1.1 ..... 11101 1 ..... ..... @qrrr_sd
43
44
+FABD_v 0.10 1110 110 ..... 00010 1 ..... ..... @qrrr_h
45
+FABD_v 0.10 1110 1.1 ..... 11010 1 ..... ..... @qrrr_sd
46
+
47
### Advanced SIMD scalar x indexed element
48
49
FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
50
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/target/arm/tcg/translate-a64.c
53
+++ b/target/arm/tcg/translate-a64.c
54
@@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_facgt = {
55
};
56
TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt)
57
58
+static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
59
+{
60
+ gen_helper_vfp_subh(d, n, m, s);
61
+ gen_vfp_absh(d, d);
62
+}
63
+
64
+static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
65
+{
66
+ gen_helper_vfp_subs(d, n, m, s);
67
+ gen_vfp_abss(d, d);
68
+}
69
+
70
+static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
71
+{
72
+ gen_helper_vfp_subd(d, n, m, s);
73
+ gen_vfp_absd(d, d);
74
+}
75
+
76
+static const FPScalar f_scalar_fabd = {
77
+ gen_fabd_h,
78
+ gen_fabd_s,
79
+ gen_fabd_d,
80
+};
81
+TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd)
82
+
83
static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a,
84
gen_helper_gvec_3_ptr * const fns[3])
85
{
86
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = {
87
};
88
TRANS(FACGT_v, do_fp3_vector, a, f_vector_facgt)
89
90
+static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
91
+ gen_helper_gvec_fabd_h,
92
+ gen_helper_gvec_fabd_s,
93
+ gen_helper_gvec_fabd_d,
94
+};
95
+TRANS(FABD_v, do_fp3_vector, a, f_vector_fabd)
96
+
97
/*
98
* Advanced SIMD scalar/vector x indexed element
99
*/
100
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
101
case 0x3f: /* FRSQRTS */
102
gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
103
break;
104
- case 0x7a: /* FABD */
105
- gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
106
- gen_vfp_absd(tcg_res, tcg_res);
107
- break;
108
default:
109
case 0x18: /* FMAXNM */
110
case 0x19: /* FMLA */
111
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
112
case 0x5c: /* FCMGE */
113
case 0x5d: /* FACGE */
114
case 0x5f: /* FDIV */
115
+ case 0x7a: /* FABD */
116
case 0x7c: /* FCMGT */
117
case 0x7d: /* FACGT */
118
g_assert_not_reached();
119
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
120
case 0x3f: /* FRSQRTS */
121
gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
122
break;
123
- case 0x7a: /* FABD */
124
- gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
125
- gen_vfp_abss(tcg_res, tcg_res);
126
- break;
127
default:
128
case 0x18: /* FMAXNM */
129
case 0x19: /* FMLA */
130
@@ -XXX,XX +XXX,XX @@ static void handle_3same_float(DisasContext *s, int size, int elements,
131
case 0x5c: /* FCMGE */
132
case 0x5d: /* FACGE */
133
case 0x5f: /* FDIV */
134
+ case 0x7a: /* FABD */
135
case 0x7c: /* FCMGT */
136
case 0x7d: /* FACGT */
137
g_assert_not_reached();
138
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
139
switch (fpopcode) {
140
case 0x1f: /* FRECPS */
141
case 0x3f: /* FRSQRTS */
142
- case 0x7a: /* FABD */
143
break;
144
default:
145
case 0x1b: /* FMULX */
146
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
147
case 0x7d: /* FACGT */
148
case 0x1c: /* FCMEQ */
149
case 0x5c: /* FCMGE */
150
+ case 0x7a: /* FABD */
151
case 0x7c: /* FCMGT */
152
unallocated_encoding(s);
153
return;
154
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
155
switch (fpopcode) {
156
case 0x07: /* FRECPS */
157
case 0x0f: /* FRSQRTS */
158
- case 0x1a: /* FABD */
159
break;
160
default:
161
case 0x03: /* FMULX */
162
case 0x04: /* FCMEQ (reg) */
163
case 0x14: /* FCMGE (reg) */
164
case 0x15: /* FACGE */
165
+ case 0x1a: /* FABD */
166
case 0x1c: /* FCMGT (reg) */
167
case 0x1d: /* FACGT */
168
unallocated_encoding(s);
169
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
170
case 0x0f: /* FRSQRTS */
171
gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
172
break;
173
- case 0x1a: /* FABD */
174
- gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
175
- tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
176
- break;
177
default:
178
case 0x03: /* FMULX */
179
case 0x04: /* FCMEQ (reg) */
180
case 0x14: /* FCMGE (reg) */
181
case 0x15: /* FACGE */
182
+ case 0x1a: /* FABD */
183
case 0x1c: /* FCMGT (reg) */
184
case 0x1d: /* FACGT */
185
g_assert_not_reached();
186
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
187
return;
188
case 0x1f: /* FRECPS */
189
case 0x3f: /* FRSQRTS */
190
- case 0x7a: /* FABD */
191
if (!fp_access_check(s)) {
192
return;
193
}
194
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
195
case 0x5c: /* FCMGE */
196
case 0x5d: /* FACGE */
197
case 0x5f: /* FDIV */
198
+ case 0x7a: /* FABD */
199
case 0x7d: /* FACGT */
200
case 0x7c: /* FCMGT */
201
unallocated_encoding(s);
202
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
203
switch (fpopcode) {
204
case 0x7: /* FRECPS */
205
case 0xf: /* FRSQRTS */
206
- case 0x1a: /* FABD */
207
pairwise = false;
208
break;
209
case 0x10: /* FMAXNMP */
210
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
211
case 0x14: /* FCMGE */
212
case 0x15: /* FACGE */
213
case 0x17: /* FDIV */
214
+ case 0x1a: /* FABD */
215
case 0x1c: /* FCMGT */
216
case 0x1d: /* FACGT */
217
unallocated_encoding(s);
218
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
219
case 0xf: /* FRSQRTS */
220
gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
221
break;
222
- case 0x1a: /* FABD */
223
- gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
224
- tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
225
- break;
226
default:
227
case 0x0: /* FMAXNM */
228
case 0x1: /* FMLA */
229
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
230
case 0x14: /* FCMGE */
231
case 0x15: /* FACGE */
232
case 0x17: /* FDIV */
233
+ case 0x1a: /* FABD */
234
case 0x1c: /* FCMGT */
235
case 0x1d: /* FACGT */
236
g_assert_not_reached();
237
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
238
index XXXXXXX..XXXXXXX 100644
239
--- a/target/arm/tcg/vec_helper.c
240
+++ b/target/arm/tcg/vec_helper.c
241
@@ -XXX,XX +XXX,XX @@ static float32 float32_abd(float32 op1, float32 op2, float_status *stat)
242
return float32_abs(float32_sub(op1, op2, stat));
243
}
244
245
+static float64 float64_abd(float64 op1, float64 op2, float_status *stat)
246
+{
247
+ return float64_abs(float64_sub(op1, op2, stat));
248
+}
249
+
250
/*
251
* Reciprocal step. These are the AArch32 version which uses a
252
* non-fused multiply-and-subtract.
253
@@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_ftsmul_d, float64_ftsmul, float64)
254
255
DO_3OP(gvec_fabd_h, float16_abd, float16)
256
DO_3OP(gvec_fabd_s, float32_abd, float32)
257
+DO_3OP(gvec_fabd_d, float64_abd, float64)
258
259
DO_3OP(gvec_fceq_h, float16_ceq, float16)
260
DO_3OP(gvec_fceq_s, float32_ceq, float32)
261
--
262
2.34.1
diff view generated by jsdifflib
1
From: Patrick Venture <venture@google.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Adds a line-item reference to the supported quanta-q71l-bmc aspeed
3
These are the last instructions within handle_3same_float
4
entry.
4
and disas_simd_scalar_three_reg_same_fp16 so remove them.
5
5
6
Signed-off-by: Patrick Venture <venture@google.com>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Cédric Le Goater <clg@kaod.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210615192848.1065297-2-venture@google.com
8
Message-id: 20240524232121.284515-28-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
docs/system/arm/aspeed.rst | 1 +
11
target/arm/tcg/a64.decode | 12 ++
12
1 file changed, 1 insertion(+)
12
target/arm/tcg/translate-a64.c | 293 ++++-----------------------------
13
2 files changed, 46 insertions(+), 259 deletions(-)
13
14
14
diff --git a/docs/system/arm/aspeed.rst b/docs/system/arm/aspeed.rst
15
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
15
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
16
--- a/docs/system/arm/aspeed.rst
17
--- a/target/arm/tcg/a64.decode
17
+++ b/docs/system/arm/aspeed.rst
18
+++ b/target/arm/tcg/a64.decode
18
@@ -XXX,XX +XXX,XX @@ etc.
19
@@ -XXX,XX +XXX,XX @@ FACGT_s 0111 1110 1.1 ..... 11101 1 ..... ..... @rrr_sd
19
AST2400 SoC based machines :
20
FABD_s 0111 1110 110 ..... 00010 1 ..... ..... @rrr_h
20
21
FABD_s 0111 1110 1.1 ..... 11010 1 ..... ..... @rrr_sd
21
- ``palmetto-bmc`` OpenPOWER Palmetto POWER8 BMC
22
22
+- ``quanta-q71l-bmc`` OpenBMC Quanta BMC
23
+FRECPS_s 0101 1110 010 ..... 00111 1 ..... ..... @rrr_h
23
24
+FRECPS_s 0101 1110 0.1 ..... 11111 1 ..... ..... @rrr_sd
24
AST2500 SoC based machines :
25
+
26
+FRSQRTS_s 0101 1110 110 ..... 00111 1 ..... ..... @rrr_h
27
+FRSQRTS_s 0101 1110 1.1 ..... 11111 1 ..... ..... @rrr_sd
28
+
29
### Advanced SIMD three same
30
31
FADD_v 0.00 1110 010 ..... 00010 1 ..... ..... @qrrr_h
32
@@ -XXX,XX +XXX,XX @@ FACGT_v 0.10 1110 1.1 ..... 11101 1 ..... ..... @qrrr_sd
33
FABD_v 0.10 1110 110 ..... 00010 1 ..... ..... @qrrr_h
34
FABD_v 0.10 1110 1.1 ..... 11010 1 ..... ..... @qrrr_sd
35
36
+FRECPS_v 0.00 1110 010 ..... 00111 1 ..... ..... @qrrr_h
37
+FRECPS_v 0.00 1110 0.1 ..... 11111 1 ..... ..... @qrrr_sd
38
+
39
+FRSQRTS_v 0.00 1110 110 ..... 00111 1 ..... ..... @qrrr_h
40
+FRSQRTS_v 0.00 1110 1.1 ..... 11111 1 ..... ..... @qrrr_sd
41
+
42
### Advanced SIMD scalar x indexed element
43
44
FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
45
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
46
index XXXXXXX..XXXXXXX 100644
47
--- a/target/arm/tcg/translate-a64.c
48
+++ b/target/arm/tcg/translate-a64.c
49
@@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fabd = {
50
};
51
TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd)
52
53
+static const FPScalar f_scalar_frecps = {
54
+ gen_helper_recpsf_f16,
55
+ gen_helper_recpsf_f32,
56
+ gen_helper_recpsf_f64,
57
+};
58
+TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps)
59
+
60
+static const FPScalar f_scalar_frsqrts = {
61
+ gen_helper_rsqrtsf_f16,
62
+ gen_helper_rsqrtsf_f32,
63
+ gen_helper_rsqrtsf_f64,
64
+};
65
+TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts)
66
+
67
static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a,
68
gen_helper_gvec_3_ptr * const fns[3])
69
{
70
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
71
};
72
TRANS(FABD_v, do_fp3_vector, a, f_vector_fabd)
73
74
+static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
75
+ gen_helper_gvec_recps_h,
76
+ gen_helper_gvec_recps_s,
77
+ gen_helper_gvec_recps_d,
78
+};
79
+TRANS(FRECPS_v, do_fp3_vector, a, f_vector_frecps)
80
+
81
+static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
82
+ gen_helper_gvec_rsqrts_h,
83
+ gen_helper_gvec_rsqrts_s,
84
+ gen_helper_gvec_rsqrts_d,
85
+};
86
+TRANS(FRSQRTS_v, do_fp3_vector, a, f_vector_frsqrts)
87
+
88
/*
89
* Advanced SIMD scalar/vector x indexed element
90
*/
91
@@ -XXX,XX +XXX,XX @@ static void handle_3same_64(DisasContext *s, int opcode, bool u,
92
}
93
}
94
95
-/* Handle the 3-same-operands float operations; shared by the scalar
96
- * and vector encodings. The caller must filter out any encodings
97
- * not allocated for the encoding it is dealing with.
98
- */
99
-static void handle_3same_float(DisasContext *s, int size, int elements,
100
- int fpopcode, int rd, int rn, int rm)
101
-{
102
- int pass;
103
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
104
-
105
- for (pass = 0; pass < elements; pass++) {
106
- if (size) {
107
- /* Double */
108
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
109
- TCGv_i64 tcg_op2 = tcg_temp_new_i64();
110
- TCGv_i64 tcg_res = tcg_temp_new_i64();
111
-
112
- read_vec_element(s, tcg_op1, rn, pass, MO_64);
113
- read_vec_element(s, tcg_op2, rm, pass, MO_64);
114
-
115
- switch (fpopcode) {
116
- case 0x1f: /* FRECPS */
117
- gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
118
- break;
119
- case 0x3f: /* FRSQRTS */
120
- gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
121
- break;
122
- default:
123
- case 0x18: /* FMAXNM */
124
- case 0x19: /* FMLA */
125
- case 0x1a: /* FADD */
126
- case 0x1b: /* FMULX */
127
- case 0x1c: /* FCMEQ */
128
- case 0x1e: /* FMAX */
129
- case 0x38: /* FMINNM */
130
- case 0x39: /* FMLS */
131
- case 0x3a: /* FSUB */
132
- case 0x3e: /* FMIN */
133
- case 0x5b: /* FMUL */
134
- case 0x5c: /* FCMGE */
135
- case 0x5d: /* FACGE */
136
- case 0x5f: /* FDIV */
137
- case 0x7a: /* FABD */
138
- case 0x7c: /* FCMGT */
139
- case 0x7d: /* FACGT */
140
- g_assert_not_reached();
141
- }
142
-
143
- write_vec_element(s, tcg_res, rd, pass, MO_64);
144
- } else {
145
- /* Single */
146
- TCGv_i32 tcg_op1 = tcg_temp_new_i32();
147
- TCGv_i32 tcg_op2 = tcg_temp_new_i32();
148
- TCGv_i32 tcg_res = tcg_temp_new_i32();
149
-
150
- read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
151
- read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
152
-
153
- switch (fpopcode) {
154
- case 0x1f: /* FRECPS */
155
- gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
156
- break;
157
- case 0x3f: /* FRSQRTS */
158
- gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
159
- break;
160
- default:
161
- case 0x18: /* FMAXNM */
162
- case 0x19: /* FMLA */
163
- case 0x1a: /* FADD */
164
- case 0x1b: /* FMULX */
165
- case 0x1c: /* FCMEQ */
166
- case 0x1e: /* FMAX */
167
- case 0x38: /* FMINNM */
168
- case 0x39: /* FMLS */
169
- case 0x3a: /* FSUB */
170
- case 0x3e: /* FMIN */
171
- case 0x5b: /* FMUL */
172
- case 0x5c: /* FCMGE */
173
- case 0x5d: /* FACGE */
174
- case 0x5f: /* FDIV */
175
- case 0x7a: /* FABD */
176
- case 0x7c: /* FCMGT */
177
- case 0x7d: /* FACGT */
178
- g_assert_not_reached();
179
- }
180
-
181
- if (elements == 1) {
182
- /* scalar single so clear high part */
183
- TCGv_i64 tcg_tmp = tcg_temp_new_i64();
184
-
185
- tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
186
- write_vec_element(s, tcg_tmp, rd, pass, MO_64);
187
- } else {
188
- write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
189
- }
190
- }
191
- }
192
-
193
- clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
194
-}
195
-
196
/* AdvSIMD scalar three same
197
* 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
198
* +-----+---+-----------+------+---+------+--------+---+------+------+
199
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
200
bool u = extract32(insn, 29, 1);
201
TCGv_i64 tcg_rd;
202
203
- if (opcode >= 0x18) {
204
- /* Floating point: U, size[1] and opcode indicate operation */
205
- int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
206
- switch (fpopcode) {
207
- case 0x1f: /* FRECPS */
208
- case 0x3f: /* FRSQRTS */
209
- break;
210
- default:
211
- case 0x1b: /* FMULX */
212
- case 0x5d: /* FACGE */
213
- case 0x7d: /* FACGT */
214
- case 0x1c: /* FCMEQ */
215
- case 0x5c: /* FCMGE */
216
- case 0x7a: /* FABD */
217
- case 0x7c: /* FCMGT */
218
- unallocated_encoding(s);
219
- return;
220
- }
221
-
222
- if (!fp_access_check(s)) {
223
- return;
224
- }
225
-
226
- handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
227
- return;
228
- }
229
-
230
switch (opcode) {
231
case 0x1: /* SQADD, UQADD */
232
case 0x5: /* SQSUB, UQSUB */
233
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
234
write_fp_dreg(s, rd, tcg_rd);
235
}
236
237
-/* AdvSIMD scalar three same FP16
238
- * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0
239
- * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
240
- * | 0 1 | U | 1 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd |
241
- * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
242
- * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
243
- * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
244
- */
245
-static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
246
- uint32_t insn)
247
-{
248
- int rd = extract32(insn, 0, 5);
249
- int rn = extract32(insn, 5, 5);
250
- int opcode = extract32(insn, 11, 3);
251
- int rm = extract32(insn, 16, 5);
252
- bool u = extract32(insn, 29, 1);
253
- bool a = extract32(insn, 23, 1);
254
- int fpopcode = opcode | (a << 3) | (u << 4);
255
- TCGv_ptr fpst;
256
- TCGv_i32 tcg_op1;
257
- TCGv_i32 tcg_op2;
258
- TCGv_i32 tcg_res;
259
-
260
- switch (fpopcode) {
261
- case 0x07: /* FRECPS */
262
- case 0x0f: /* FRSQRTS */
263
- break;
264
- default:
265
- case 0x03: /* FMULX */
266
- case 0x04: /* FCMEQ (reg) */
267
- case 0x14: /* FCMGE (reg) */
268
- case 0x15: /* FACGE */
269
- case 0x1a: /* FABD */
270
- case 0x1c: /* FCMGT (reg) */
271
- case 0x1d: /* FACGT */
272
- unallocated_encoding(s);
273
- return;
274
- }
275
-
276
- if (!dc_isar_feature(aa64_fp16, s)) {
277
- unallocated_encoding(s);
278
- }
279
-
280
- if (!fp_access_check(s)) {
281
- return;
282
- }
283
-
284
- fpst = fpstatus_ptr(FPST_FPCR_F16);
285
-
286
- tcg_op1 = read_fp_hreg(s, rn);
287
- tcg_op2 = read_fp_hreg(s, rm);
288
- tcg_res = tcg_temp_new_i32();
289
-
290
- switch (fpopcode) {
291
- case 0x07: /* FRECPS */
292
- gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
293
- break;
294
- case 0x0f: /* FRSQRTS */
295
- gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
296
- break;
297
- default:
298
- case 0x03: /* FMULX */
299
- case 0x04: /* FCMEQ (reg) */
300
- case 0x14: /* FCMGE (reg) */
301
- case 0x15: /* FACGE */
302
- case 0x1a: /* FABD */
303
- case 0x1c: /* FCMGT (reg) */
304
- case 0x1d: /* FACGT */
305
- g_assert_not_reached();
306
- }
307
-
308
- write_fp_sreg(s, rd, tcg_res);
309
-}
310
-
311
/* AdvSIMD scalar three same extra
312
* 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0
313
* +-----+---+-----------+------+---+------+---+--------+---+----+----+
314
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
315
316
/* Pairwise op subgroup of C3.6.16.
317
*
318
- * This is called directly or via the handle_3same_float for float pairwise
319
+ * This is called directly for float pairwise
320
* operations where the opcode and size are calculated differently.
321
*/
322
static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
323
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
324
int rn = extract32(insn, 5, 5);
325
int rd = extract32(insn, 0, 5);
326
327
- int datasize = is_q ? 128 : 64;
328
- int esize = 32 << size;
329
- int elements = datasize / esize;
330
-
331
if (size == 1 && !is_q) {
332
unallocated_encoding(s);
333
return;
334
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
335
handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
336
rn, rm, rd);
337
return;
338
- case 0x1f: /* FRECPS */
339
- case 0x3f: /* FRSQRTS */
340
- if (!fp_access_check(s)) {
341
- return;
342
- }
343
- handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
344
- return;
345
346
case 0x1d: /* FMLAL */
347
case 0x3d: /* FMLSL */
348
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
349
case 0x1b: /* FMULX */
350
case 0x1c: /* FCMEQ */
351
case 0x1e: /* FMAX */
352
+ case 0x1f: /* FRECPS */
353
case 0x38: /* FMINNM */
354
case 0x39: /* FMLS */
355
case 0x3a: /* FSUB */
356
case 0x3e: /* FMIN */
357
+ case 0x3f: /* FRSQRTS */
358
case 0x5b: /* FMUL */
359
case 0x5c: /* FCMGE */
360
case 0x5d: /* FACGE */
361
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
362
* together indicate the operation.
363
*/
364
int fpopcode = opcode | (a << 3) | (u << 4);
365
- int datasize = is_q ? 128 : 64;
366
- int elements = datasize / 16;
367
bool pairwise;
368
TCGv_ptr fpst;
369
int pass;
370
371
switch (fpopcode) {
372
- case 0x7: /* FRECPS */
373
- case 0xf: /* FRSQRTS */
374
- pairwise = false;
375
- break;
376
case 0x10: /* FMAXNMP */
377
case 0x12: /* FADDP */
378
case 0x16: /* FMAXP */
379
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
380
case 0x3: /* FMULX */
381
case 0x4: /* FCMEQ */
382
case 0x6: /* FMAX */
383
+ case 0x7: /* FRECPS */
384
case 0x8: /* FMINNM */
385
case 0x9: /* FMLS */
386
case 0xa: /* FSUB */
387
case 0xe: /* FMIN */
388
+ case 0xf: /* FRSQRTS */
389
case 0x13: /* FMUL */
390
case 0x14: /* FCMGE */
391
case 0x15: /* FACGE */
392
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
393
write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
394
}
395
} else {
396
- for (pass = 0; pass < elements; pass++) {
397
- TCGv_i32 tcg_op1 = tcg_temp_new_i32();
398
- TCGv_i32 tcg_op2 = tcg_temp_new_i32();
399
- TCGv_i32 tcg_res = tcg_temp_new_i32();
400
-
401
- read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
402
- read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
403
-
404
- switch (fpopcode) {
405
- case 0x7: /* FRECPS */
406
- gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
407
- break;
408
- case 0xf: /* FRSQRTS */
409
- gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
410
- break;
411
- default:
412
- case 0x0: /* FMAXNM */
413
- case 0x1: /* FMLA */
414
- case 0x2: /* FADD */
415
- case 0x3: /* FMULX */
416
- case 0x4: /* FCMEQ */
417
- case 0x6: /* FMAX */
418
- case 0x8: /* FMINNM */
419
- case 0x9: /* FMLS */
420
- case 0xa: /* FSUB */
421
- case 0xe: /* FMIN */
422
- case 0x13: /* FMUL */
423
- case 0x14: /* FCMGE */
424
- case 0x15: /* FACGE */
425
- case 0x17: /* FDIV */
426
- case 0x1a: /* FABD */
427
- case 0x1c: /* FCMGT */
428
- case 0x1d: /* FACGT */
429
- g_assert_not_reached();
430
- }
431
-
432
- write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
433
- }
434
+ g_assert_not_reached();
435
}
436
437
clear_vec_high(s, is_q, rd);
438
@@ -XXX,XX +XXX,XX @@ static const AArch64DecodeTable data_proc_simd[] = {
439
{ 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
440
{ 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
441
{ 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
442
- { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
443
{ 0x00000000, 0x00000000, NULL }
444
};
25
445
26
--
446
--
27
2.20.1
447
2.34.1
28
29
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20240524232121.284515-29-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/helper.h | 4 ++
9
target/arm/tcg/a64.decode | 12 +++++
10
target/arm/tcg/translate-a64.c | 87 ++++++++++++++++++++++++++--------
11
target/arm/tcg/vec_helper.c | 23 +++++++++
12
4 files changed, 105 insertions(+), 21 deletions(-)
13
14
diff --git a/target/arm/helper.h b/target/arm/helper.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper.h
17
+++ b/target/arm/helper.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG,
19
DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG,
20
void, ptr, ptr, ptr, ptr, i32)
21
22
+DEF_HELPER_FLAGS_5(gvec_faddp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
23
+DEF_HELPER_FLAGS_5(gvec_faddp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
24
+DEF_HELPER_FLAGS_5(gvec_faddp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
25
+
26
#ifdef TARGET_AARCH64
27
#include "tcg/helper-a64.h"
28
#include "tcg/helper-sve.h"
29
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/tcg/a64.decode
32
+++ b/target/arm/tcg/a64.decode
33
@@ -XXX,XX +XXX,XX @@
34
&ri rd imm
35
&rri_sf rd rn imm sf
36
&i imm
37
+&rr_e rd rn esz
38
&rrr_e rd rn rm esz
39
&rrx_e rd rn rm idx esz
40
&qrr_e q rd rn esz
41
@@ -XXX,XX +XXX,XX @@
42
&qrrx_e q rd rn rm idx esz
43
&qrrrr_e q rd rn rm ra esz
44
45
+@rr_h ........ ... ..... ...... rn:5 rd:5 &rr_e esz=1
46
+@rr_sd ........ ... ..... ...... rn:5 rd:5 &rr_e esz=%esz_sd
47
+
48
@rrr_h ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=1
49
@rrr_sd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_sd
50
@rrr_hsd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_hsd
51
@@ -XXX,XX +XXX,XX @@ FRECPS_s 0101 1110 0.1 ..... 11111 1 ..... ..... @rrr_sd
52
FRSQRTS_s 0101 1110 110 ..... 00111 1 ..... ..... @rrr_h
53
FRSQRTS_s 0101 1110 1.1 ..... 11111 1 ..... ..... @rrr_sd
54
55
+### Advanced SIMD scalar pairwise
56
+
57
+FADDP_s 0101 1110 0011 0000 1101 10 ..... ..... @rr_h
58
+FADDP_s 0111 1110 0.11 0000 1101 10 ..... ..... @rr_sd
59
+
60
### Advanced SIMD three same
61
62
FADD_v 0.00 1110 010 ..... 00010 1 ..... ..... @qrrr_h
63
@@ -XXX,XX +XXX,XX @@ FRECPS_v 0.00 1110 0.1 ..... 11111 1 ..... ..... @qrrr_sd
64
FRSQRTS_v 0.00 1110 110 ..... 00111 1 ..... ..... @qrrr_h
65
FRSQRTS_v 0.00 1110 1.1 ..... 11111 1 ..... ..... @qrrr_sd
66
67
+FADDP_v 0.10 1110 010 ..... 00010 1 ..... ..... @qrrr_h
68
+FADDP_v 0.10 1110 0.1 ..... 11010 1 ..... ..... @qrrr_sd
69
+
70
### Advanced SIMD scalar x indexed element
71
72
FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
73
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/target/arm/tcg/translate-a64.c
76
+++ b/target/arm/tcg/translate-a64.c
77
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
78
};
79
TRANS(FRSQRTS_v, do_fp3_vector, a, f_vector_frsqrts)
80
81
+static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
82
+ gen_helper_gvec_faddp_h,
83
+ gen_helper_gvec_faddp_s,
84
+ gen_helper_gvec_faddp_d,
85
+};
86
+TRANS(FADDP_v, do_fp3_vector, a, f_vector_faddp)
87
+
88
/*
89
* Advanced SIMD scalar/vector x indexed element
90
*/
91
@@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
92
TRANS(FMLA_vi, do_fmla_vector_idx, a, false)
93
TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
94
95
+/*
96
+ * Advanced SIMD scalar pairwise
97
+ */
98
+
99
+static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
100
+{
101
+ switch (a->esz) {
102
+ case MO_64:
103
+ if (fp_access_check(s)) {
104
+ TCGv_i64 t0 = tcg_temp_new_i64();
105
+ TCGv_i64 t1 = tcg_temp_new_i64();
106
+
107
+ read_vec_element(s, t0, a->rn, 0, MO_64);
108
+ read_vec_element(s, t1, a->rn, 1, MO_64);
109
+ f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
110
+ write_fp_dreg(s, a->rd, t0);
111
+ }
112
+ break;
113
+ case MO_32:
114
+ if (fp_access_check(s)) {
115
+ TCGv_i32 t0 = tcg_temp_new_i32();
116
+ TCGv_i32 t1 = tcg_temp_new_i32();
117
+
118
+ read_vec_element_i32(s, t0, a->rn, 0, MO_32);
119
+ read_vec_element_i32(s, t1, a->rn, 1, MO_32);
120
+ f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
121
+ write_fp_sreg(s, a->rd, t0);
122
+ }
123
+ break;
124
+ case MO_16:
125
+ if (!dc_isar_feature(aa64_fp16, s)) {
126
+ return false;
127
+ }
128
+ if (fp_access_check(s)) {
129
+ TCGv_i32 t0 = tcg_temp_new_i32();
130
+ TCGv_i32 t1 = tcg_temp_new_i32();
131
+
132
+ read_vec_element_i32(s, t0, a->rn, 0, MO_16);
133
+ read_vec_element_i32(s, t1, a->rn, 1, MO_16);
134
+ f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
135
+ write_fp_sreg(s, a->rd, t0);
136
+ }
137
+ break;
138
+ default:
139
+ g_assert_not_reached();
140
+ }
141
+ return true;
142
+}
143
+
144
+TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd)
145
146
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
147
* Note that it is the caller's responsibility to ensure that the
148
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
149
fpst = NULL;
150
break;
151
case 0xc: /* FMAXNMP */
152
- case 0xd: /* FADDP */
153
case 0xf: /* FMAXP */
154
case 0x2c: /* FMINNMP */
155
case 0x2f: /* FMINP */
156
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
157
fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
158
break;
159
default:
160
+ case 0xd: /* FADDP */
161
unallocated_encoding(s);
162
return;
163
}
164
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
165
case 0xc: /* FMAXNMP */
166
gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
167
break;
168
- case 0xd: /* FADDP */
169
- gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
170
- break;
171
case 0xf: /* FMAXP */
172
gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
173
break;
174
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
175
gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
176
break;
177
default:
178
+ case 0xd: /* FADDP */
179
g_assert_not_reached();
180
}
181
182
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
183
case 0xc: /* FMAXNMP */
184
gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
185
break;
186
- case 0xd: /* FADDP */
187
- gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
188
- break;
189
case 0xf: /* FMAXP */
190
gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
191
break;
192
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
193
gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
194
break;
195
default:
196
+ case 0xd: /* FADDP */
197
g_assert_not_reached();
198
}
199
} else {
200
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
201
case 0xc: /* FMAXNMP */
202
gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
203
break;
204
- case 0xd: /* FADDP */
205
- gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
206
- break;
207
case 0xf: /* FMAXP */
208
gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
209
break;
210
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
211
gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
212
break;
213
default:
214
+ case 0xd: /* FADDP */
215
g_assert_not_reached();
216
}
217
}
218
@@ -XXX,XX +XXX,XX @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
219
case 0x58: /* FMAXNMP */
220
gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
221
break;
222
- case 0x5a: /* FADDP */
223
- gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
224
- break;
225
case 0x5e: /* FMAXP */
226
gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
227
break;
228
@@ -XXX,XX +XXX,XX @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
229
gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
230
break;
231
default:
232
+ case 0x5a: /* FADDP */
233
g_assert_not_reached();
234
}
235
}
236
@@ -XXX,XX +XXX,XX @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
237
case 0x58: /* FMAXNMP */
238
gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
239
break;
240
- case 0x5a: /* FADDP */
241
- gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
242
- break;
243
case 0x5e: /* FMAXP */
244
gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
245
break;
246
@@ -XXX,XX +XXX,XX @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
247
gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
248
break;
249
default:
250
+ case 0x5a: /* FADDP */
251
g_assert_not_reached();
252
}
253
254
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
255
256
switch (fpopcode) {
257
case 0x58: /* FMAXNMP */
258
- case 0x5a: /* FADDP */
259
case 0x5e: /* FMAXP */
260
case 0x78: /* FMINNMP */
261
case 0x7e: /* FMINP */
262
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
263
case 0x3a: /* FSUB */
264
case 0x3e: /* FMIN */
265
case 0x3f: /* FRSQRTS */
266
+ case 0x5a: /* FADDP */
267
case 0x5b: /* FMUL */
268
case 0x5c: /* FCMGE */
269
case 0x5d: /* FACGE */
270
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
271
272
switch (fpopcode) {
273
case 0x10: /* FMAXNMP */
274
- case 0x12: /* FADDP */
275
case 0x16: /* FMAXP */
276
case 0x18: /* FMINNMP */
277
case 0x1e: /* FMINP */
278
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
279
case 0xa: /* FSUB */
280
case 0xe: /* FMIN */
281
case 0xf: /* FRSQRTS */
282
+ case 0x12: /* FADDP */
283
case 0x13: /* FMUL */
284
case 0x14: /* FCMGE */
285
case 0x15: /* FACGE */
286
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
287
gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
288
fpst);
289
break;
290
- case 0x12: /* FADDP */
291
- gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
292
- break;
293
case 0x16: /* FMAXP */
294
gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
295
break;
296
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
297
gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
298
break;
299
default:
300
+ case 0x12: /* FADDP */
301
g_assert_not_reached();
302
}
303
}
304
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
305
index XXXXXXX..XXXXXXX 100644
306
--- a/target/arm/tcg/vec_helper.c
307
+++ b/target/arm/tcg/vec_helper.c
308
@@ -XXX,XX +XXX,XX @@ DO_NEON_PAIRWISE(neon_pmin, min)
309
310
#undef DO_NEON_PAIRWISE
311
312
+#define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \
313
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
314
+{ \
315
+ ARMVectorReg scratch; \
316
+ intptr_t oprsz = simd_oprsz(desc); \
317
+ intptr_t half = oprsz / sizeof(TYPE) / 2; \
318
+ TYPE *d = vd, *n = vn, *m = vm; \
319
+ if (unlikely(d == m)) { \
320
+ m = memcpy(&scratch, m, oprsz); \
321
+ } \
322
+ for (intptr_t i = 0; i < half; ++i) { \
323
+ d[H(i)] = FUNC(n[H(i * 2)], n[H(i * 2 + 1)], stat); \
324
+ } \
325
+ for (intptr_t i = 0; i < half; ++i) { \
326
+ d[H(i + half)] = FUNC(m[H(i * 2)], m[H(i * 2 + 1)], stat); \
327
+ } \
328
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
329
+}
330
+
331
+DO_3OP_PAIR(gvec_faddp_h, float16_add, float16, H2)
332
+DO_3OP_PAIR(gvec_faddp_s, float32_add, float32, H4)
333
+DO_3OP_PAIR(gvec_faddp_d, float64_add, float64, )
334
+
335
#define DO_VCVT_FIXED(NAME, FUNC, TYPE) \
336
void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \
337
{ \
338
--
339
2.34.1
diff view generated by jsdifflib
1
The A64 AdvSIMD modified-immediate grouping uses almost the same
1
From: Richard Henderson <richard.henderson@linaro.org>
2
constant encoding that A32 Neon does; reuse asimd_imm_const() (to
3
which we add the AArch64-specific case for cmode 15 op 1) instead of
4
reimplementing it all.
5
2
3
These are the last instructions within disas_simd_three_reg_same_fp16,
4
so remove it.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20240524232121.284515-30-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210628135835.6690-5-peter.maydell@linaro.org
9
---
10
---
10
target/arm/translate.h | 3 +-
11
target/arm/helper.h | 16 ++
11
target/arm/translate-a64.c | 86 ++++----------------------------------
12
target/arm/tcg/a64.decode | 24 +++
12
target/arm/translate.c | 17 +++++++-
13
target/arm/tcg/translate-a64.c | 296 ++++++---------------------------
13
3 files changed, 24 insertions(+), 82 deletions(-)
14
target/arm/tcg/vec_helper.c | 16 ++
15
4 files changed, 107 insertions(+), 245 deletions(-)
14
16
15
diff --git a/target/arm/translate.h b/target/arm/translate.h
17
diff --git a/target/arm/helper.h b/target/arm/helper.h
16
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/translate.h
19
--- a/target/arm/helper.h
18
+++ b/target/arm/translate.h
20
+++ b/target/arm/helper.h
19
@@ -XXX,XX +XXX,XX @@ static inline MemOp finalize_memop(DisasContext *s, MemOp opc)
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_faddp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
20
* VMVN and VBIC (when cmode < 14 && op == 1).
22
DEF_HELPER_FLAGS_5(gvec_faddp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
21
*
23
DEF_HELPER_FLAGS_5(gvec_faddp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
22
* The combination cmode == 15 op == 1 is a reserved encoding for AArch32;
24
23
- * callers must catch this.
25
+DEF_HELPER_FLAGS_5(gvec_fmaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
24
+ * callers must catch this; we return the 64-bit constant value defined
26
+DEF_HELPER_FLAGS_5(gvec_fmaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
25
+ * for AArch64.
27
+DEF_HELPER_FLAGS_5(gvec_fmaxp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
26
*
28
+
27
* cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 was UNPREDICTABLE in v7A but
29
+DEF_HELPER_FLAGS_5(gvec_fminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
28
* is either not unpredictable or merely CONSTRAINED UNPREDICTABLE in v8A;
30
+DEF_HELPER_FLAGS_5(gvec_fminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
29
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
31
+DEF_HELPER_FLAGS_5(gvec_fminp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
32
+
33
+DEF_HELPER_FLAGS_5(gvec_fmaxnump_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_5(gvec_fmaxnump_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
35
+DEF_HELPER_FLAGS_5(gvec_fmaxnump_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
36
+
37
+DEF_HELPER_FLAGS_5(gvec_fminnump_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_5(gvec_fminnump_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
39
+DEF_HELPER_FLAGS_5(gvec_fminnump_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
40
+
41
#ifdef TARGET_AARCH64
42
#include "tcg/helper-a64.h"
43
#include "tcg/helper-sve.h"
44
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
30
index XXXXXXX..XXXXXXX 100644
45
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate-a64.c
46
--- a/target/arm/tcg/a64.decode
32
+++ b/target/arm/translate-a64.c
47
+++ b/target/arm/tcg/a64.decode
33
@@ -XXX,XX +XXX,XX @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
48
@@ -XXX,XX +XXX,XX @@ FRSQRTS_s 0101 1110 1.1 ..... 11111 1 ..... ..... @rrr_sd
34
{
49
FADDP_s 0101 1110 0011 0000 1101 10 ..... ..... @rr_h
50
FADDP_s 0111 1110 0.11 0000 1101 10 ..... ..... @rr_sd
51
52
+FMAXP_s 0101 1110 0011 0000 1111 10 ..... ..... @rr_h
53
+FMAXP_s 0111 1110 0.11 0000 1111 10 ..... ..... @rr_sd
54
+
55
+FMINP_s 0101 1110 1011 0000 1111 10 ..... ..... @rr_h
56
+FMINP_s 0111 1110 1.11 0000 1111 10 ..... ..... @rr_sd
57
+
58
+FMAXNMP_s 0101 1110 0011 0000 1100 10 ..... ..... @rr_h
59
+FMAXNMP_s 0111 1110 0.11 0000 1100 10 ..... ..... @rr_sd
60
+
61
+FMINNMP_s 0101 1110 1011 0000 1100 10 ..... ..... @rr_h
62
+FMINNMP_s 0111 1110 1.11 0000 1100 10 ..... ..... @rr_sd
63
+
64
### Advanced SIMD three same
65
66
FADD_v 0.00 1110 010 ..... 00010 1 ..... ..... @qrrr_h
67
@@ -XXX,XX +XXX,XX @@ FRSQRTS_v 0.00 1110 1.1 ..... 11111 1 ..... ..... @qrrr_sd
68
FADDP_v 0.10 1110 010 ..... 00010 1 ..... ..... @qrrr_h
69
FADDP_v 0.10 1110 0.1 ..... 11010 1 ..... ..... @qrrr_sd
70
71
+FMAXP_v 0.10 1110 010 ..... 00110 1 ..... ..... @qrrr_h
72
+FMAXP_v 0.10 1110 0.1 ..... 11110 1 ..... ..... @qrrr_sd
73
+
74
+FMINP_v 0.10 1110 110 ..... 00110 1 ..... ..... @qrrr_h
75
+FMINP_v 0.10 1110 1.1 ..... 11110 1 ..... ..... @qrrr_sd
76
+
77
+FMAXNMP_v 0.10 1110 010 ..... 00000 1 ..... ..... @qrrr_h
78
+FMAXNMP_v 0.10 1110 0.1 ..... 11000 1 ..... ..... @qrrr_sd
79
+
80
+FMINNMP_v 0.10 1110 110 ..... 00000 1 ..... ..... @qrrr_h
81
+FMINNMP_v 0.10 1110 1.1 ..... 11000 1 ..... ..... @qrrr_sd
82
+
83
### Advanced SIMD scalar x indexed element
84
85
FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
86
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
87
index XXXXXXX..XXXXXXX 100644
88
--- a/target/arm/tcg/translate-a64.c
89
+++ b/target/arm/tcg/translate-a64.c
90
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
91
};
92
TRANS(FADDP_v, do_fp3_vector, a, f_vector_faddp)
93
94
+static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = {
95
+ gen_helper_gvec_fmaxp_h,
96
+ gen_helper_gvec_fmaxp_s,
97
+ gen_helper_gvec_fmaxp_d,
98
+};
99
+TRANS(FMAXP_v, do_fp3_vector, a, f_vector_fmaxp)
100
+
101
+static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = {
102
+ gen_helper_gvec_fminp_h,
103
+ gen_helper_gvec_fminp_s,
104
+ gen_helper_gvec_fminp_d,
105
+};
106
+TRANS(FMINP_v, do_fp3_vector, a, f_vector_fminp)
107
+
108
+static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = {
109
+ gen_helper_gvec_fmaxnump_h,
110
+ gen_helper_gvec_fmaxnump_s,
111
+ gen_helper_gvec_fmaxnump_d,
112
+};
113
+TRANS(FMAXNMP_v, do_fp3_vector, a, f_vector_fmaxnmp)
114
+
115
+static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = {
116
+ gen_helper_gvec_fminnump_h,
117
+ gen_helper_gvec_fminnump_s,
118
+ gen_helper_gvec_fminnump_d,
119
+};
120
+TRANS(FMINNMP_v, do_fp3_vector, a, f_vector_fminnmp)
121
+
122
/*
123
* Advanced SIMD scalar/vector x indexed element
124
*/
125
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
126
}
127
128
TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd)
129
+TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax)
130
+TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin)
131
+TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm)
132
+TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm)
133
134
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
135
* Note that it is the caller's responsibility to ensure that the
136
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
137
int opcode = extract32(insn, 12, 5);
138
int rn = extract32(insn, 5, 5);
35
int rd = extract32(insn, 0, 5);
139
int rd = extract32(insn, 0, 5);
36
int cmode = extract32(insn, 12, 4);
140
- TCGv_ptr fpst;
37
- int cmode_3_1 = extract32(cmode, 1, 3);
141
38
- int cmode_0 = extract32(cmode, 0, 1);
142
/* For some ops (the FP ones), size[1] is part of the encoding.
39
int o2 = extract32(insn, 11, 1);
143
* For ADDP strictly it is not but size[1] is always 1 for valid
40
uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
144
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
41
bool is_neg = extract32(insn, 29, 1);
145
if (!fp_access_check(s)) {
42
@@ -XXX,XX +XXX,XX @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
146
return;
147
}
148
-
149
- fpst = NULL;
150
break;
151
+ default:
152
case 0xc: /* FMAXNMP */
153
+ case 0xd: /* FADDP */
154
case 0xf: /* FMAXP */
155
case 0x2c: /* FMINNMP */
156
case 0x2f: /* FMINP */
157
- /* FP op, size[0] is 32 or 64 bit*/
158
- if (!u) {
159
- if ((size & 1) || !dc_isar_feature(aa64_fp16, s)) {
160
- unallocated_encoding(s);
161
- return;
162
- } else {
163
- size = MO_16;
164
- }
165
- } else {
166
- size = extract32(size, 0, 1) ? MO_64 : MO_32;
167
- }
168
-
169
- if (!fp_access_check(s)) {
170
- return;
171
- }
172
-
173
- fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
174
- break;
175
- default:
176
- case 0xd: /* FADDP */
177
unallocated_encoding(s);
43
return;
178
return;
44
}
179
}
45
180
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
46
- /* See AdvSIMDExpandImm() in ARM ARM */
181
case 0x3b: /* ADDP */
47
- switch (cmode_3_1) {
182
tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
48
- case 0: /* Replicate(Zeros(24):imm8, 2) */
183
break;
49
- case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
184
- case 0xc: /* FMAXNMP */
50
- case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
185
- gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
51
- case 3: /* Replicate(imm8:Zeros(24), 2) */
186
- break;
52
- {
187
- case 0xf: /* FMAXP */
53
- int shift = cmode_3_1 * 8;
188
- gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
54
- imm = bitfield_replicate(abcdefgh << shift, 32);
189
- break;
55
- break;
190
- case 0x2c: /* FMINNMP */
56
- }
191
- gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
57
- case 4: /* Replicate(Zeros(8):imm8, 4) */
192
- break;
58
- case 5: /* Replicate(imm8:Zeros(8), 4) */
193
- case 0x2f: /* FMINP */
59
- {
194
- gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
60
- int shift = (cmode_3_1 & 0x1) * 8;
195
- break;
61
- imm = bitfield_replicate(abcdefgh << shift, 16);
196
default:
62
- break;
197
+ case 0xc: /* FMAXNMP */
63
- }
198
case 0xd: /* FADDP */
64
- case 6:
199
+ case 0xf: /* FMAXP */
65
- if (cmode_0) {
200
+ case 0x2c: /* FMINNMP */
66
- /* Replicate(Zeros(8):imm8:Ones(16), 2) */
201
+ case 0x2f: /* FMINP */
67
- imm = (abcdefgh << 16) | 0xffff;
202
g_assert_not_reached();
203
}
204
205
write_fp_dreg(s, rd, tcg_res);
206
} else {
207
- TCGv_i32 tcg_op1 = tcg_temp_new_i32();
208
- TCGv_i32 tcg_op2 = tcg_temp_new_i32();
209
- TCGv_i32 tcg_res = tcg_temp_new_i32();
210
-
211
- read_vec_element_i32(s, tcg_op1, rn, 0, size);
212
- read_vec_element_i32(s, tcg_op2, rn, 1, size);
213
-
214
- if (size == MO_16) {
215
- switch (opcode) {
216
- case 0xc: /* FMAXNMP */
217
- gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
218
- break;
219
- case 0xf: /* FMAXP */
220
- gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
221
- break;
222
- case 0x2c: /* FMINNMP */
223
- gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
224
- break;
225
- case 0x2f: /* FMINP */
226
- gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
227
- break;
228
- default:
229
- case 0xd: /* FADDP */
230
- g_assert_not_reached();
231
- }
68
- } else {
232
- } else {
69
- /* Replicate(Zeros(16):imm8:Ones(8), 2) */
233
- switch (opcode) {
70
- imm = (abcdefgh << 8) | 0xff;
234
- case 0xc: /* FMAXNMP */
71
- }
235
- gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
72
- imm = bitfield_replicate(imm, 32);
236
- break;
73
- break;
237
- case 0xf: /* FMAXP */
74
- case 7:
238
- gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
75
- if (!cmode_0 && !is_neg) {
239
- break;
76
- imm = bitfield_replicate(abcdefgh, 8);
240
- case 0x2c: /* FMINNMP */
77
- } else if (!cmode_0 && is_neg) {
241
- gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
78
- int i;
242
- break;
79
- imm = 0;
243
- case 0x2f: /* FMINP */
80
- for (i = 0; i < 8; i++) {
244
- gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
81
- if ((abcdefgh) & (1 << i)) {
245
- break;
82
- imm |= 0xffULL << (i * 8);
246
- default:
83
- }
247
- case 0xd: /* FADDP */
84
- }
248
- g_assert_not_reached();
85
- } else if (cmode_0) {
86
- if (is_neg) {
87
- imm = (abcdefgh & 0x3f) << 48;
88
- if (abcdefgh & 0x80) {
89
- imm |= 0x8000000000000000ULL;
90
- }
91
- if (abcdefgh & 0x40) {
92
- imm |= 0x3fc0000000000000ULL;
93
- } else {
94
- imm |= 0x4000000000000000ULL;
95
- }
96
- } else {
97
- if (o2) {
98
- /* FMOV (vector, immediate) - half-precision */
99
- imm = vfp_expand_imm(MO_16, abcdefgh);
100
- /* now duplicate across the lanes */
101
- imm = bitfield_replicate(imm, 16);
102
- } else {
103
- imm = (abcdefgh & 0x3f) << 19;
104
- if (abcdefgh & 0x80) {
105
- imm |= 0x80000000;
106
- }
107
- if (abcdefgh & 0x40) {
108
- imm |= 0x3e000000;
109
- } else {
110
- imm |= 0x40000000;
111
- }
112
- imm |= (imm << 32);
113
- }
114
- }
249
- }
115
- }
250
- }
251
-
252
- write_fp_sreg(s, rd, tcg_res);
253
+ g_assert_not_reached();
254
}
255
}
256
257
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
258
static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
259
int size, int rn, int rm, int rd)
260
{
261
- TCGv_ptr fpst;
262
int pass;
263
264
- /* Floating point operations need fpst */
265
- if (opcode >= 0x58) {
266
- fpst = fpstatus_ptr(FPST_FPCR);
267
- } else {
268
- fpst = NULL;
269
- }
270
-
271
if (!fp_access_check(s)) {
272
return;
273
}
274
@@ -XXX,XX +XXX,XX @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
275
case 0x17: /* ADDP */
276
tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
277
break;
278
- case 0x58: /* FMAXNMP */
279
- gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
280
- break;
281
- case 0x5e: /* FMAXP */
282
- gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
283
- break;
284
- case 0x78: /* FMINNMP */
285
- gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
286
- break;
287
- case 0x7e: /* FMINP */
288
- gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
289
- break;
290
default:
291
+ case 0x58: /* FMAXNMP */
292
case 0x5a: /* FADDP */
293
+ case 0x5e: /* FMAXP */
294
+ case 0x78: /* FMINNMP */
295
+ case 0x7e: /* FMINP */
296
g_assert_not_reached();
297
}
298
}
299
@@ -XXX,XX +XXX,XX @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
300
genfn = fns[size][u];
301
break;
302
}
303
- /* The FP operations are all on single floats (32 bit) */
304
- case 0x58: /* FMAXNMP */
305
- gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
306
- break;
307
- case 0x5e: /* FMAXP */
308
- gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
309
- break;
310
- case 0x78: /* FMINNMP */
311
- gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
312
- break;
313
- case 0x7e: /* FMINP */
314
- gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
315
- break;
316
default:
317
+ case 0x58: /* FMAXNMP */
318
case 0x5a: /* FADDP */
319
+ case 0x5e: /* FMAXP */
320
+ case 0x78: /* FMINNMP */
321
+ case 0x7e: /* FMINP */
322
g_assert_not_reached();
323
}
324
325
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
326
}
327
328
switch (fpopcode) {
329
- case 0x58: /* FMAXNMP */
330
- case 0x5e: /* FMAXP */
331
- case 0x78: /* FMINNMP */
332
- case 0x7e: /* FMINP */
333
- if (size && !is_q) {
334
- unallocated_encoding(s);
335
- return;
336
- }
337
- handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
338
- rn, rm, rd);
339
- return;
340
-
341
case 0x1d: /* FMLAL */
342
case 0x3d: /* FMLSL */
343
case 0x59: /* FMLAL2 */
344
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
345
case 0x3a: /* FSUB */
346
case 0x3e: /* FMIN */
347
case 0x3f: /* FRSQRTS */
348
+ case 0x58: /* FMAXNMP */
349
case 0x5a: /* FADDP */
350
case 0x5b: /* FMUL */
351
case 0x5c: /* FCMGE */
352
case 0x5d: /* FACGE */
353
+ case 0x5e: /* FMAXP */
354
case 0x5f: /* FDIV */
355
+ case 0x78: /* FMINNMP */
356
case 0x7a: /* FABD */
357
case 0x7d: /* FACGT */
358
case 0x7c: /* FCMGT */
359
+ case 0x7e: /* FMINP */
360
unallocated_encoding(s);
361
return;
362
}
363
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
364
}
365
}
366
367
-/*
368
- * Advanced SIMD three same (ARMv8.2 FP16 variants)
369
- *
370
- * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0
371
- * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
372
- * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd |
373
- * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
374
- *
375
- * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
376
- * (register), FACGE, FABD, FCMGT (register) and FACGT.
377
- *
378
- */
379
-static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
380
-{
381
- int opcode = extract32(insn, 11, 3);
382
- int u = extract32(insn, 29, 1);
383
- int a = extract32(insn, 23, 1);
384
- int is_q = extract32(insn, 30, 1);
385
- int rm = extract32(insn, 16, 5);
386
- int rn = extract32(insn, 5, 5);
387
- int rd = extract32(insn, 0, 5);
388
- /*
389
- * For these floating point ops, the U, a and opcode bits
390
- * together indicate the operation.
391
- */
392
- int fpopcode = opcode | (a << 3) | (u << 4);
393
- bool pairwise;
394
- TCGv_ptr fpst;
395
- int pass;
396
-
397
- switch (fpopcode) {
398
- case 0x10: /* FMAXNMP */
399
- case 0x16: /* FMAXP */
400
- case 0x18: /* FMINNMP */
401
- case 0x1e: /* FMINP */
402
- pairwise = true;
116
- break;
403
- break;
117
- default:
404
- default:
405
- case 0x0: /* FMAXNM */
406
- case 0x1: /* FMLA */
407
- case 0x2: /* FADD */
408
- case 0x3: /* FMULX */
409
- case 0x4: /* FCMEQ */
410
- case 0x6: /* FMAX */
411
- case 0x7: /* FRECPS */
412
- case 0x8: /* FMINNM */
413
- case 0x9: /* FMLS */
414
- case 0xa: /* FSUB */
415
- case 0xe: /* FMIN */
416
- case 0xf: /* FRSQRTS */
417
- case 0x12: /* FADDP */
418
- case 0x13: /* FMUL */
419
- case 0x14: /* FCMGE */
420
- case 0x15: /* FACGE */
421
- case 0x17: /* FDIV */
422
- case 0x1a: /* FABD */
423
- case 0x1c: /* FCMGT */
424
- case 0x1d: /* FACGT */
425
- unallocated_encoding(s);
426
- return;
427
- }
428
-
429
- if (!dc_isar_feature(aa64_fp16, s)) {
430
- unallocated_encoding(s);
431
- return;
432
- }
433
-
434
- if (!fp_access_check(s)) {
435
- return;
436
- }
437
-
438
- fpst = fpstatus_ptr(FPST_FPCR_F16);
439
-
440
- if (pairwise) {
441
- int maxpass = is_q ? 8 : 4;
442
- TCGv_i32 tcg_op1 = tcg_temp_new_i32();
443
- TCGv_i32 tcg_op2 = tcg_temp_new_i32();
444
- TCGv_i32 tcg_res[8];
445
-
446
- for (pass = 0; pass < maxpass; pass++) {
447
- int passreg = pass < (maxpass / 2) ? rn : rm;
448
- int passelt = (pass << 1) & (maxpass - 1);
449
-
450
- read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
451
- read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
452
- tcg_res[pass] = tcg_temp_new_i32();
453
-
454
- switch (fpopcode) {
455
- case 0x10: /* FMAXNMP */
456
- gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
457
- fpst);
458
- break;
459
- case 0x16: /* FMAXP */
460
- gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
461
- break;
462
- case 0x18: /* FMINNMP */
463
- gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
464
- fpst);
465
- break;
466
- case 0x1e: /* FMINP */
467
- gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
468
- break;
469
- default:
470
- case 0x12: /* FADDP */
471
- g_assert_not_reached();
472
- }
473
- }
474
-
475
- for (pass = 0; pass < maxpass; pass++) {
476
- write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
477
- }
478
- } else {
118
- g_assert_not_reached();
479
- g_assert_not_reached();
119
- }
480
- }
120
-
481
-
121
- if (cmode_3_1 != 7 && is_neg) {
482
- clear_vec_high(s, is_q, rd);
122
- imm = ~imm;
483
-}
123
+ if (cmode == 15 && o2 && !is_neg) {
484
-
124
+ /* FMOV (vector, immediate) - half-precision */
485
/* AdvSIMD three same extra
125
+ imm = vfp_expand_imm(MO_16, abcdefgh);
486
* 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0
126
+ /* now duplicate across the lanes */
487
* +---+---+---+-----------+------+---+------+---+--------+---+----+----+
127
+ imm = bitfield_replicate(imm, 16);
488
@@ -XXX,XX +XXX,XX @@ static const AArch64DecodeTable data_proc_simd[] = {
128
+ } else {
489
{ 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
129
+ imm = asimd_imm_const(abcdefgh, cmode, is_neg);
490
{ 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
130
}
491
{ 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
131
492
- { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
132
if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
493
{ 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
133
diff --git a/target/arm/translate.c b/target/arm/translate.c
494
{ 0x00000000, 0x00000000, NULL }
495
};
496
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
134
index XXXXXXX..XXXXXXX 100644
497
index XXXXXXX..XXXXXXX 100644
135
--- a/target/arm/translate.c
498
--- a/target/arm/tcg/vec_helper.c
136
+++ b/target/arm/translate.c
499
+++ b/target/arm/tcg/vec_helper.c
137
@@ -XXX,XX +XXX,XX @@ uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
500
@@ -XXX,XX +XXX,XX @@ DO_3OP_PAIR(gvec_faddp_h, float16_add, float16, H2)
138
case 14:
501
DO_3OP_PAIR(gvec_faddp_s, float32_add, float32, H4)
139
if (op) {
502
DO_3OP_PAIR(gvec_faddp_d, float64_add, float64, )
140
/*
503
141
- * This is the only case where the top and bottom 32 bits
504
+DO_3OP_PAIR(gvec_fmaxp_h, float16_max, float16, H2)
142
- * of the encoded constant differ.
505
+DO_3OP_PAIR(gvec_fmaxp_s, float32_max, float32, H4)
143
+ * This and cmode == 15 op == 1 are the only cases where
506
+DO_3OP_PAIR(gvec_fmaxp_d, float64_max, float64, )
144
+ * the top and bottom 32 bits of the encoded constant differ.
507
+
145
*/
508
+DO_3OP_PAIR(gvec_fminp_h, float16_min, float16, H2)
146
uint64_t imm64 = 0;
509
+DO_3OP_PAIR(gvec_fminp_s, float32_min, float32, H4)
147
int n;
510
+DO_3OP_PAIR(gvec_fminp_d, float64_min, float64, )
148
@@ -XXX,XX +XXX,XX @@ uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
511
+
149
imm |= (imm << 8) | (imm << 16) | (imm << 24);
512
+DO_3OP_PAIR(gvec_fmaxnump_h, float16_maxnum, float16, H2)
150
break;
513
+DO_3OP_PAIR(gvec_fmaxnump_s, float32_maxnum, float32, H4)
151
case 15:
514
+DO_3OP_PAIR(gvec_fmaxnump_d, float64_maxnum, float64, )
152
+ if (op) {
515
+
153
+ /* Reserved encoding for AArch32; valid for AArch64 */
516
+DO_3OP_PAIR(gvec_fminnump_h, float16_minnum, float16, H2)
154
+ uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
517
+DO_3OP_PAIR(gvec_fminnump_s, float32_minnum, float32, H4)
155
+ if (imm & 0x80) {
518
+DO_3OP_PAIR(gvec_fminnump_d, float64_minnum, float64, )
156
+ imm64 |= 0x8000000000000000ULL;
519
+
157
+ }
520
#define DO_VCVT_FIXED(NAME, FUNC, TYPE) \
158
+ if (imm & 0x40) {
521
void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \
159
+ imm64 |= 0x3fc0000000000000ULL;
522
{ \
160
+ } else {
161
+ imm64 |= 0x4000000000000000ULL;
162
+ }
163
+ return imm64;
164
+ }
165
imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
166
| ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
167
break;
168
--
523
--
169
2.20.1
524
2.34.1
170
171
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20240524232121.284515-31-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/helper.h | 7 -----
9
target/arm/tcg/translate-neon.c | 55 ++-------------------------------
10
target/arm/tcg/vec_helper.c | 45 ---------------------------
11
3 files changed, 3 insertions(+), 104 deletions(-)
12
13
diff --git a/target/arm/helper.h b/target/arm/helper.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/helper.h
16
+++ b/target/arm/helper.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(gvec_fcmlas_idx, TCG_CALL_NO_RWG,
18
DEF_HELPER_FLAGS_6(gvec_fcmlad, TCG_CALL_NO_RWG,
19
void, ptr, ptr, ptr, ptr, ptr, i32)
20
21
-DEF_HELPER_FLAGS_5(neon_paddh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
22
-DEF_HELPER_FLAGS_5(neon_pmaxh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
23
-DEF_HELPER_FLAGS_5(neon_pminh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
24
-DEF_HELPER_FLAGS_5(neon_padds, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
25
-DEF_HELPER_FLAGS_5(neon_pmaxs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
26
-DEF_HELPER_FLAGS_5(neon_pmins, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
27
-
28
DEF_HELPER_FLAGS_4(gvec_sstoh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
29
DEF_HELPER_FLAGS_4(gvec_sitos, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
30
DEF_HELPER_FLAGS_4(gvec_ustoh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
31
diff --git a/target/arm/tcg/translate-neon.c b/target/arm/tcg/translate-neon.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/target/arm/tcg/translate-neon.c
34
+++ b/target/arm/tcg/translate-neon.c
35
@@ -XXX,XX +XXX,XX @@ DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
36
DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
37
DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
38
DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
39
+DO_3S_FP_GVEC(VPADD, gen_helper_gvec_faddp_s, gen_helper_gvec_faddp_h)
40
+DO_3S_FP_GVEC(VPMAX, gen_helper_gvec_fmaxp_s, gen_helper_gvec_fmaxp_h)
41
+DO_3S_FP_GVEC(VPMIN, gen_helper_gvec_fminp_s, gen_helper_gvec_fminp_h)
42
43
WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
44
WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
45
@@ -XXX,XX +XXX,XX @@ static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
46
return do_3same(s, a, gen_VMINNM_fp32_3s);
47
}
48
49
-static bool do_3same_fp_pair(DisasContext *s, arg_3same *a,
50
- gen_helper_gvec_3_ptr *fn)
51
-{
52
- /* FP pairwise operations */
53
- TCGv_ptr fpstatus;
54
-
55
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
56
- return false;
57
- }
58
-
59
- /* UNDEF accesses to D16-D31 if they don't exist. */
60
- if (!dc_isar_feature(aa32_simd_r32, s) &&
61
- ((a->vd | a->vn | a->vm) & 0x10)) {
62
- return false;
63
- }
64
-
65
- if (!vfp_access_check(s)) {
66
- return true;
67
- }
68
-
69
- assert(a->q == 0); /* enforced by decode patterns */
70
-
71
-
72
- fpstatus = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
73
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
74
- vfp_reg_offset(1, a->vn),
75
- vfp_reg_offset(1, a->vm),
76
- fpstatus, 8, 8, 0, fn);
77
-
78
- return true;
79
-}
80
-
81
-/*
82
- * For all the functions using this macro, size == 1 means fp16,
83
- * which is an architecture extension we don't implement yet.
84
- */
85
-#define DO_3S_FP_PAIR(INSN,FUNC) \
86
- static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
87
- { \
88
- if (a->size == MO_16) { \
89
- if (!dc_isar_feature(aa32_fp16_arith, s)) { \
90
- return false; \
91
- } \
92
- return do_3same_fp_pair(s, a, FUNC##h); \
93
- } \
94
- return do_3same_fp_pair(s, a, FUNC##s); \
95
- }
96
-
97
-DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd)
98
-DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax)
99
-DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin)
100
-
101
static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
102
{
103
/* Handle a 2-reg-shift insn which can be vectorized. */
104
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
105
index XXXXXXX..XXXXXXX 100644
106
--- a/target/arm/tcg/vec_helper.c
107
+++ b/target/arm/tcg/vec_helper.c
108
@@ -XXX,XX +XXX,XX @@ DO_ABA(gvec_uaba_d, uint64_t)
109
110
#undef DO_ABA
111
112
-#define DO_NEON_PAIRWISE(NAME, OP) \
113
- void HELPER(NAME##s)(void *vd, void *vn, void *vm, \
114
- void *stat, uint32_t oprsz) \
115
- { \
116
- float_status *fpst = stat; \
117
- float32 *d = vd; \
118
- float32 *n = vn; \
119
- float32 *m = vm; \
120
- float32 r0, r1; \
121
- \
122
- /* Read all inputs before writing outputs in case vm == vd */ \
123
- r0 = float32_##OP(n[H4(0)], n[H4(1)], fpst); \
124
- r1 = float32_##OP(m[H4(0)], m[H4(1)], fpst); \
125
- \
126
- d[H4(0)] = r0; \
127
- d[H4(1)] = r1; \
128
- } \
129
- \
130
- void HELPER(NAME##h)(void *vd, void *vn, void *vm, \
131
- void *stat, uint32_t oprsz) \
132
- { \
133
- float_status *fpst = stat; \
134
- float16 *d = vd; \
135
- float16 *n = vn; \
136
- float16 *m = vm; \
137
- float16 r0, r1, r2, r3; \
138
- \
139
- /* Read all inputs before writing outputs in case vm == vd */ \
140
- r0 = float16_##OP(n[H2(0)], n[H2(1)], fpst); \
141
- r1 = float16_##OP(n[H2(2)], n[H2(3)], fpst); \
142
- r2 = float16_##OP(m[H2(0)], m[H2(1)], fpst); \
143
- r3 = float16_##OP(m[H2(2)], m[H2(3)], fpst); \
144
- \
145
- d[H2(0)] = r0; \
146
- d[H2(1)] = r1; \
147
- d[H2(2)] = r2; \
148
- d[H2(3)] = r3; \
149
- }
150
-
151
-DO_NEON_PAIRWISE(neon_padd, add)
152
-DO_NEON_PAIRWISE(neon_pmax, max)
153
-DO_NEON_PAIRWISE(neon_pmin, min)
154
-
155
-#undef DO_NEON_PAIRWISE
156
-
157
#define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \
158
void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
159
{ \
160
--
161
2.34.1
diff view generated by jsdifflib
1
Implement the MVE saturating shift-right-and-narrow insns
1
From: Richard Henderson <richard.henderson@linaro.org>
2
VQSHRN, VQSHRUN, VQRSHRN and VQRSHRUN.
3
2
4
do_srshr() is borrowed from sve_helper.c.
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20240524232121.284515-32-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/helper.h | 5 ++
9
target/arm/tcg/translate.h | 3 +
10
target/arm/tcg/a64.decode | 6 ++
11
target/arm/tcg/gengvec.c | 12 ++++
12
target/arm/tcg/translate-a64.c | 128 ++++++---------------------------
13
target/arm/tcg/vec_helper.c | 30 ++++++++
14
6 files changed, 77 insertions(+), 107 deletions(-)
5
15
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
diff --git a/target/arm/helper.h b/target/arm/helper.h
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
17
index XXXXXXX..XXXXXXX 100644
8
Message-id: 20210628135835.6690-13-peter.maydell@linaro.org
18
--- a/target/arm/helper.h
9
---
19
+++ b/target/arm/helper.h
10
target/arm/helper-mve.h | 30 +++++++++++
20
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_fminnump_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i
11
target/arm/mve.decode | 28 ++++++++++
21
DEF_HELPER_FLAGS_5(gvec_fminnump_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
12
target/arm/mve_helper.c | 104 +++++++++++++++++++++++++++++++++++++
22
DEF_HELPER_FLAGS_5(gvec_fminnump_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
13
target/arm/translate-mve.c | 12 +++++
23
14
4 files changed, 174 insertions(+)
24
+DEF_HELPER_FLAGS_4(gvec_addp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
15
25
+DEF_HELPER_FLAGS_4(gvec_addp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
16
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
26
+DEF_HELPER_FLAGS_4(gvec_addp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
17
index XXXXXXX..XXXXXXX 100644
27
+DEF_HELPER_FLAGS_4(gvec_addp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
18
--- a/target/arm/helper-mve.h
28
+
19
+++ b/target/arm/helper-mve.h
29
#ifdef TARGET_AARCH64
20
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vrshrnbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
30
#include "tcg/helper-a64.h"
21
DEF_HELPER_FLAGS_4(mve_vrshrnbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
31
#include "tcg/helper-sve.h"
22
DEF_HELPER_FLAGS_4(mve_vrshrntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
32
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
23
DEF_HELPER_FLAGS_4(mve_vrshrnth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
33
index XXXXXXX..XXXXXXX 100644
24
+
34
--- a/target/arm/tcg/translate.h
25
+DEF_HELPER_FLAGS_4(mve_vqshrnb_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
35
+++ b/target/arm/tcg/translate.h
26
+DEF_HELPER_FLAGS_4(mve_vqshrnb_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
36
@@ -XXX,XX +XXX,XX @@ void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
27
+DEF_HELPER_FLAGS_4(mve_vqshrnt_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
37
void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
28
+DEF_HELPER_FLAGS_4(mve_vqshrnt_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
38
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
29
+
39
30
+DEF_HELPER_FLAGS_4(mve_vqshrnb_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
40
+void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
31
+DEF_HELPER_FLAGS_4(mve_vqshrnb_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
41
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
32
+DEF_HELPER_FLAGS_4(mve_vqshrnt_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
42
+
33
+DEF_HELPER_FLAGS_4(mve_vqshrnt_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
43
/*
34
+
44
* Forward to the isar_feature_* tests given a DisasContext pointer.
35
+DEF_HELPER_FLAGS_4(mve_vqshrunbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
45
*/
36
+DEF_HELPER_FLAGS_4(mve_vqshrunbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
46
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
37
+DEF_HELPER_FLAGS_4(mve_vqshruntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
47
index XXXXXXX..XXXXXXX 100644
38
+DEF_HELPER_FLAGS_4(mve_vqshrunth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
48
--- a/target/arm/tcg/a64.decode
39
+
49
+++ b/target/arm/tcg/a64.decode
40
+DEF_HELPER_FLAGS_4(mve_vqrshrnb_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
50
@@ -XXX,XX +XXX,XX @@
41
+DEF_HELPER_FLAGS_4(mve_vqrshrnb_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
51
&qrrrr_e q rd rn rm ra esz
42
+DEF_HELPER_FLAGS_4(mve_vqrshrnt_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
52
43
+DEF_HELPER_FLAGS_4(mve_vqrshrnt_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
53
@rr_h ........ ... ..... ...... rn:5 rd:5 &rr_e esz=1
44
+
54
+@rr_d ........ ... ..... ...... rn:5 rd:5 &rr_e esz=3
45
+DEF_HELPER_FLAGS_4(mve_vqrshrnb_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
55
@rr_sd ........ ... ..... ...... rn:5 rd:5 &rr_e esz=%esz_sd
46
+DEF_HELPER_FLAGS_4(mve_vqrshrnb_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
56
47
+DEF_HELPER_FLAGS_4(mve_vqrshrnt_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
57
@rrr_h ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=1
48
+DEF_HELPER_FLAGS_4(mve_vqrshrnt_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
58
@@ -XXX,XX +XXX,XX @@
49
+
59
50
+DEF_HELPER_FLAGS_4(mve_vqrshrunbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
60
@qrrr_h . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=1
51
+DEF_HELPER_FLAGS_4(mve_vqrshrunbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
61
@qrrr_sd . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=%esz_sd
52
+DEF_HELPER_FLAGS_4(mve_vqrshruntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
62
+@qrrr_e . q:1 ...... esz:2 . rm:5 ...... rn:5 rd:5 &qrrr_e
53
+DEF_HELPER_FLAGS_4(mve_vqrshrunth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
63
54
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
64
@qrrx_h . q:1 .. .... .. .. rm:4 .... . . rn:5 rd:5 \
55
index XXXXXXX..XXXXXXX 100644
65
&qrrx_e esz=1 idx=%hlm
56
--- a/target/arm/mve.decode
66
@@ -XXX,XX +XXX,XX @@ FMAXNMP_s 0111 1110 0.11 0000 1100 10 ..... ..... @rr_sd
57
+++ b/target/arm/mve.decode
67
FMINNMP_s 0101 1110 1011 0000 1100 10 ..... ..... @rr_h
58
@@ -XXX,XX +XXX,XX @@ VRSHRNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_b
68
FMINNMP_s 0111 1110 1.11 0000 1100 10 ..... ..... @rr_sd
59
VRSHRNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_h
69
60
VRSHRNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_b
70
+ADDP_s 0101 1110 1111 0001 1011 10 ..... ..... @rr_d
61
VRSHRNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_h
71
+
62
+
72
### Advanced SIMD three same
63
+VQSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_b
73
64
+VQSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_h
74
FADD_v 0.00 1110 010 ..... 00010 1 ..... ..... @qrrr_h
65
+VQSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_b
75
@@ -XXX,XX +XXX,XX @@ FMAXNMP_v 0.10 1110 0.1 ..... 11000 1 ..... ..... @qrrr_sd
66
+VQSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_h
76
FMINNMP_v 0.10 1110 110 ..... 00000 1 ..... ..... @qrrr_h
67
+VQSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_b
77
FMINNMP_v 0.10 1110 1.1 ..... 11000 1 ..... ..... @qrrr_sd
68
+VQSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_h
78
69
+VQSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_b
79
+ADDP_v 0.00 1110 ..1 ..... 10111 1 ..... ..... @qrrr_e
70
+VQSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_h
80
+
71
+
81
### Advanced SIMD scalar x indexed element
72
+VQSHRUNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_b
82
73
+VQSHRUNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_h
83
FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
74
+VQSHRUNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_b
84
diff --git a/target/arm/tcg/gengvec.c b/target/arm/tcg/gengvec.c
75
+VQSHRUNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_h
85
index XXXXXXX..XXXXXXX 100644
76
+
86
--- a/target/arm/tcg/gengvec.c
77
+VQRSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_b
87
+++ b/target/arm/tcg/gengvec.c
78
+VQRSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_h
88
@@ -XXX,XX +XXX,XX @@ void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
79
+VQRSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_b
89
};
80
+VQRSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_h
90
tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
81
+VQRSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_b
91
}
82
+VQRSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_h
92
+
83
+VQRSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_b
93
+void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
84
+VQRSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_h
94
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
85
+
95
+{
86
+VQRSHRUNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_b
96
+ static gen_helper_gvec_3 * const fns[4] = {
87
+VQRSHRUNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_h
97
+ gen_helper_gvec_addp_b,
88
+VQRSHRUNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_b
98
+ gen_helper_gvec_addp_h,
89
+VQRSHRUNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_h
99
+ gen_helper_gvec_addp_s,
90
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
100
+ gen_helper_gvec_addp_d,
91
index XXXXXXX..XXXXXXX 100644
101
+ };
92
--- a/target/arm/mve_helper.c
102
+ tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
93
+++ b/target/arm/mve_helper.c
103
+}
94
@@ -XXX,XX +XXX,XX @@ static inline uint64_t do_urshr(uint64_t x, unsigned sh)
104
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
105
index XXXXXXX..XXXXXXX 100644
106
--- a/target/arm/tcg/translate-a64.c
107
+++ b/target/arm/tcg/translate-a64.c
108
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = {
109
};
110
TRANS(FMINNMP_v, do_fp3_vector, a, f_vector_fminnmp)
111
112
+TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp)
113
+
114
/*
115
* Advanced SIMD scalar/vector x indexed element
116
*/
117
@@ -XXX,XX +XXX,XX @@ TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin)
118
TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm)
119
TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm)
120
121
+static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a)
122
+{
123
+ if (fp_access_check(s)) {
124
+ TCGv_i64 t0 = tcg_temp_new_i64();
125
+ TCGv_i64 t1 = tcg_temp_new_i64();
126
+
127
+ read_vec_element(s, t0, a->rn, 0, MO_64);
128
+ read_vec_element(s, t1, a->rn, 1, MO_64);
129
+ tcg_gen_add_i64(t0, t0, t1);
130
+ write_fp_dreg(s, a->rd, t0);
131
+ }
132
+ return true;
133
+}
134
+
135
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
136
* Note that it is the caller's responsibility to ensure that the
137
* shift amount is in range (ie 0..31 or 0..63) and provide the ARM
138
@@ -XXX,XX +XXX,XX @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
95
}
139
}
96
}
140
}
97
141
98
+static inline int64_t do_srshr(int64_t x, unsigned sh)
142
-/* AdvSIMD scalar pairwise
99
+{
143
- * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
100
+ if (likely(sh < 64)) {
144
- * +-----+---+-----------+------+-----------+--------+-----+------+------+
101
+ return (x >> sh) + ((x >> (sh - 1)) & 1);
145
- * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
102
+ } else {
146
- * +-----+---+-----------+------+-----------+--------+-----+------+------+
103
+ /* Rounding the sign bit always produces 0. */
147
- */
104
+ return 0;
148
-static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
105
+ }
149
-{
150
- int u = extract32(insn, 29, 1);
151
- int size = extract32(insn, 22, 2);
152
- int opcode = extract32(insn, 12, 5);
153
- int rn = extract32(insn, 5, 5);
154
- int rd = extract32(insn, 0, 5);
155
-
156
- /* For some ops (the FP ones), size[1] is part of the encoding.
157
- * For ADDP strictly it is not but size[1] is always 1 for valid
158
- * encodings.
159
- */
160
- opcode |= (extract32(size, 1, 1) << 5);
161
-
162
- switch (opcode) {
163
- case 0x3b: /* ADDP */
164
- if (u || size != 3) {
165
- unallocated_encoding(s);
166
- return;
167
- }
168
- if (!fp_access_check(s)) {
169
- return;
170
- }
171
- break;
172
- default:
173
- case 0xc: /* FMAXNMP */
174
- case 0xd: /* FADDP */
175
- case 0xf: /* FMAXP */
176
- case 0x2c: /* FMINNMP */
177
- case 0x2f: /* FMINP */
178
- unallocated_encoding(s);
179
- return;
180
- }
181
-
182
- if (size == MO_64) {
183
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
184
- TCGv_i64 tcg_op2 = tcg_temp_new_i64();
185
- TCGv_i64 tcg_res = tcg_temp_new_i64();
186
-
187
- read_vec_element(s, tcg_op1, rn, 0, MO_64);
188
- read_vec_element(s, tcg_op2, rn, 1, MO_64);
189
-
190
- switch (opcode) {
191
- case 0x3b: /* ADDP */
192
- tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
193
- break;
194
- default:
195
- case 0xc: /* FMAXNMP */
196
- case 0xd: /* FADDP */
197
- case 0xf: /* FMAXP */
198
- case 0x2c: /* FMINNMP */
199
- case 0x2f: /* FMINP */
200
- g_assert_not_reached();
201
- }
202
-
203
- write_fp_dreg(s, rd, tcg_res);
204
- } else {
205
- g_assert_not_reached();
206
- }
207
-}
208
-
209
/*
210
* Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
211
*
212
@@ -XXX,XX +XXX,XX @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
213
* adjacent elements being operated on to produce an element in the result.
214
*/
215
if (size == 3) {
216
- TCGv_i64 tcg_res[2];
217
-
218
- for (pass = 0; pass < 2; pass++) {
219
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
220
- TCGv_i64 tcg_op2 = tcg_temp_new_i64();
221
- int passreg = (pass == 0) ? rn : rm;
222
-
223
- read_vec_element(s, tcg_op1, passreg, 0, MO_64);
224
- read_vec_element(s, tcg_op2, passreg, 1, MO_64);
225
- tcg_res[pass] = tcg_temp_new_i64();
226
-
227
- switch (opcode) {
228
- case 0x17: /* ADDP */
229
- tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
230
- break;
231
- default:
232
- case 0x58: /* FMAXNMP */
233
- case 0x5a: /* FADDP */
234
- case 0x5e: /* FMAXP */
235
- case 0x78: /* FMINNMP */
236
- case 0x7e: /* FMINP */
237
- g_assert_not_reached();
238
- }
239
- }
240
-
241
- for (pass = 0; pass < 2; pass++) {
242
- write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
243
- }
244
+ g_assert_not_reached();
245
} else {
246
int maxpass = is_q ? 4 : 2;
247
TCGv_i32 tcg_res[4];
248
@@ -XXX,XX +XXX,XX @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
249
tcg_res[pass] = tcg_temp_new_i32();
250
251
switch (opcode) {
252
- case 0x17: /* ADDP */
253
- {
254
- static NeonGenTwoOpFn * const fns[3] = {
255
- gen_helper_neon_padd_u8,
256
- gen_helper_neon_padd_u16,
257
- tcg_gen_add_i32,
258
- };
259
- genfn = fns[size];
260
- break;
261
- }
262
case 0x14: /* SMAXP, UMAXP */
263
{
264
static NeonGenTwoOpFn * const fns[3][2] = {
265
@@ -XXX,XX +XXX,XX @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
266
break;
267
}
268
default:
269
+ case 0x17: /* ADDP */
270
case 0x58: /* FMAXNMP */
271
case 0x5a: /* FADDP */
272
case 0x5e: /* FMAXP */
273
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
274
case 0x3: /* logic ops */
275
disas_simd_3same_logic(s, insn);
276
break;
277
- case 0x17: /* ADDP */
278
case 0x14: /* SMAXP, UMAXP */
279
case 0x15: /* SMINP, UMINP */
280
{
281
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
282
default:
283
disas_simd_3same_int(s, insn);
284
break;
285
+ case 0x17: /* ADDP */
286
+ unallocated_encoding(s);
287
+ break;
288
}
289
}
290
291
@@ -XXX,XX +XXX,XX @@ static const AArch64DecodeTable data_proc_simd[] = {
292
{ 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
293
{ 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
294
{ 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
295
- { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
296
{ 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
297
{ 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
298
{ 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
299
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
300
index XXXXXXX..XXXXXXX 100644
301
--- a/target/arm/tcg/vec_helper.c
302
+++ b/target/arm/tcg/vec_helper.c
303
@@ -XXX,XX +XXX,XX @@ DO_3OP_PAIR(gvec_fminnump_h, float16_minnum, float16, H2)
304
DO_3OP_PAIR(gvec_fminnump_s, float32_minnum, float32, H4)
305
DO_3OP_PAIR(gvec_fminnump_d, float64_minnum, float64, )
306
307
+#undef DO_3OP_PAIR
308
+
309
+#define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \
310
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
311
+{ \
312
+ ARMVectorReg scratch; \
313
+ intptr_t oprsz = simd_oprsz(desc); \
314
+ intptr_t half = oprsz / sizeof(TYPE) / 2; \
315
+ TYPE *d = vd, *n = vn, *m = vm; \
316
+ if (unlikely(d == m)) { \
317
+ m = memcpy(&scratch, m, oprsz); \
318
+ } \
319
+ for (intptr_t i = 0; i < half; ++i) { \
320
+ d[H(i)] = FUNC(n[H(i * 2)], n[H(i * 2 + 1)]); \
321
+ } \
322
+ for (intptr_t i = 0; i < half; ++i) { \
323
+ d[H(i + half)] = FUNC(m[H(i * 2)], m[H(i * 2 + 1)]); \
324
+ } \
325
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
106
+}
326
+}
107
+
327
+
108
DO_VSHRN_ALL(vshrn, DO_SHR)
328
+#define ADD(A, B) (A + B)
109
DO_VSHRN_ALL(vrshrn, do_urshr)
329
+DO_3OP_PAIR(gvec_addp_b, ADD, uint8_t, H1)
110
+
330
+DO_3OP_PAIR(gvec_addp_h, ADD, uint16_t, H2)
111
+static inline int32_t do_sat_bhs(int64_t val, int64_t min, int64_t max,
331
+DO_3OP_PAIR(gvec_addp_s, ADD, uint32_t, H4)
112
+ bool *satp)
332
+DO_3OP_PAIR(gvec_addp_d, ADD, uint64_t, )
113
+{
333
+#undef ADD
114
+ if (val > max) {
334
+
115
+ *satp = true;
335
+#undef DO_3OP_PAIR
116
+ return max;
336
+
117
+ } else if (val < min) {
337
#define DO_VCVT_FIXED(NAME, FUNC, TYPE) \
118
+ *satp = true;
338
void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \
119
+ return min;
339
{ \
120
+ } else {
121
+ return val;
122
+ }
123
+}
124
+
125
+/* Saturating narrowing right shifts */
126
+#define DO_VSHRN_SAT(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN) \
127
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
128
+ void *vm, uint32_t shift) \
129
+ { \
130
+ LTYPE *m = vm; \
131
+ TYPE *d = vd; \
132
+ uint16_t mask = mve_element_mask(env); \
133
+ bool qc = false; \
134
+ unsigned le; \
135
+ for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
136
+ bool sat = false; \
137
+ TYPE r = FN(m[H##LESIZE(le)], shift, &sat); \
138
+ mergemask(&d[H##ESIZE(le * 2 + TOP)], r, mask); \
139
+ qc |= sat && (mask & 1 << (TOP * ESIZE)); \
140
+ } \
141
+ if (qc) { \
142
+ env->vfp.qc[0] = qc; \
143
+ } \
144
+ mve_advance_vpt(env); \
145
+ }
146
+
147
+#define DO_VSHRN_SAT_UB(BOP, TOP, FN) \
148
+ DO_VSHRN_SAT(BOP, false, 1, uint8_t, 2, uint16_t, FN) \
149
+ DO_VSHRN_SAT(TOP, true, 1, uint8_t, 2, uint16_t, FN)
150
+
151
+#define DO_VSHRN_SAT_UH(BOP, TOP, FN) \
152
+ DO_VSHRN_SAT(BOP, false, 2, uint16_t, 4, uint32_t, FN) \
153
+ DO_VSHRN_SAT(TOP, true, 2, uint16_t, 4, uint32_t, FN)
154
+
155
+#define DO_VSHRN_SAT_SB(BOP, TOP, FN) \
156
+ DO_VSHRN_SAT(BOP, false, 1, int8_t, 2, int16_t, FN) \
157
+ DO_VSHRN_SAT(TOP, true, 1, int8_t, 2, int16_t, FN)
158
+
159
+#define DO_VSHRN_SAT_SH(BOP, TOP, FN) \
160
+ DO_VSHRN_SAT(BOP, false, 2, int16_t, 4, int32_t, FN) \
161
+ DO_VSHRN_SAT(TOP, true, 2, int16_t, 4, int32_t, FN)
162
+
163
+#define DO_SHRN_SB(N, M, SATP) \
164
+ do_sat_bhs((int64_t)(N) >> (M), INT8_MIN, INT8_MAX, SATP)
165
+#define DO_SHRN_UB(N, M, SATP) \
166
+ do_sat_bhs((uint64_t)(N) >> (M), 0, UINT8_MAX, SATP)
167
+#define DO_SHRUN_B(N, M, SATP) \
168
+ do_sat_bhs((int64_t)(N) >> (M), 0, UINT8_MAX, SATP)
169
+
170
+#define DO_SHRN_SH(N, M, SATP) \
171
+ do_sat_bhs((int64_t)(N) >> (M), INT16_MIN, INT16_MAX, SATP)
172
+#define DO_SHRN_UH(N, M, SATP) \
173
+ do_sat_bhs((uint64_t)(N) >> (M), 0, UINT16_MAX, SATP)
174
+#define DO_SHRUN_H(N, M, SATP) \
175
+ do_sat_bhs((int64_t)(N) >> (M), 0, UINT16_MAX, SATP)
176
+
177
+#define DO_RSHRN_SB(N, M, SATP) \
178
+ do_sat_bhs(do_srshr(N, M), INT8_MIN, INT8_MAX, SATP)
179
+#define DO_RSHRN_UB(N, M, SATP) \
180
+ do_sat_bhs(do_urshr(N, M), 0, UINT8_MAX, SATP)
181
+#define DO_RSHRUN_B(N, M, SATP) \
182
+ do_sat_bhs(do_srshr(N, M), 0, UINT8_MAX, SATP)
183
+
184
+#define DO_RSHRN_SH(N, M, SATP) \
185
+ do_sat_bhs(do_srshr(N, M), INT16_MIN, INT16_MAX, SATP)
186
+#define DO_RSHRN_UH(N, M, SATP) \
187
+ do_sat_bhs(do_urshr(N, M), 0, UINT16_MAX, SATP)
188
+#define DO_RSHRUN_H(N, M, SATP) \
189
+ do_sat_bhs(do_srshr(N, M), 0, UINT16_MAX, SATP)
190
+
191
+DO_VSHRN_SAT_SB(vqshrnb_sb, vqshrnt_sb, DO_SHRN_SB)
192
+DO_VSHRN_SAT_SH(vqshrnb_sh, vqshrnt_sh, DO_SHRN_SH)
193
+DO_VSHRN_SAT_UB(vqshrnb_ub, vqshrnt_ub, DO_SHRN_UB)
194
+DO_VSHRN_SAT_UH(vqshrnb_uh, vqshrnt_uh, DO_SHRN_UH)
195
+DO_VSHRN_SAT_SB(vqshrunbb, vqshruntb, DO_SHRUN_B)
196
+DO_VSHRN_SAT_SH(vqshrunbh, vqshrunth, DO_SHRUN_H)
197
+
198
+DO_VSHRN_SAT_SB(vqrshrnb_sb, vqrshrnt_sb, DO_RSHRN_SB)
199
+DO_VSHRN_SAT_SH(vqrshrnb_sh, vqrshrnt_sh, DO_RSHRN_SH)
200
+DO_VSHRN_SAT_UB(vqrshrnb_ub, vqrshrnt_ub, DO_RSHRN_UB)
201
+DO_VSHRN_SAT_UH(vqrshrnb_uh, vqrshrnt_uh, DO_RSHRN_UH)
202
+DO_VSHRN_SAT_SB(vqrshrunbb, vqrshruntb, DO_RSHRUN_B)
203
+DO_VSHRN_SAT_SH(vqrshrunbh, vqrshrunth, DO_RSHRUN_H)
204
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
205
index XXXXXXX..XXXXXXX 100644
206
--- a/target/arm/translate-mve.c
207
+++ b/target/arm/translate-mve.c
208
@@ -XXX,XX +XXX,XX @@ DO_2SHIFT_N(VSHRNB, vshrnb)
209
DO_2SHIFT_N(VSHRNT, vshrnt)
210
DO_2SHIFT_N(VRSHRNB, vrshrnb)
211
DO_2SHIFT_N(VRSHRNT, vrshrnt)
212
+DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s)
213
+DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s)
214
+DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u)
215
+DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u)
216
+DO_2SHIFT_N(VQSHRUNB, vqshrunb)
217
+DO_2SHIFT_N(VQSHRUNT, vqshrunt)
218
+DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s)
219
+DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s)
220
+DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
221
+DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
222
+DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
223
+DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
224
--
340
--
225
2.20.1
341
2.34.1
226
227
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20240524232121.284515-33-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/helper.h | 2 --
9
target/arm/tcg/neon_helper.c | 5 -----
10
target/arm/tcg/translate-neon.c | 3 +--
11
3 files changed, 1 insertion(+), 9 deletions(-)
12
13
diff --git a/target/arm/helper.h b/target/arm/helper.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/helper.h
16
+++ b/target/arm/helper.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(neon_qrshl_s64, i64, env, i64, i64)
18
19
DEF_HELPER_2(neon_add_u8, i32, i32, i32)
20
DEF_HELPER_2(neon_add_u16, i32, i32, i32)
21
-DEF_HELPER_2(neon_padd_u8, i32, i32, i32)
22
-DEF_HELPER_2(neon_padd_u16, i32, i32, i32)
23
DEF_HELPER_2(neon_sub_u8, i32, i32, i32)
24
DEF_HELPER_2(neon_sub_u16, i32, i32, i32)
25
DEF_HELPER_2(neon_mul_u8, i32, i32, i32)
26
diff --git a/target/arm/tcg/neon_helper.c b/target/arm/tcg/neon_helper.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/arm/tcg/neon_helper.c
29
+++ b/target/arm/tcg/neon_helper.c
30
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(neon_add_u16)(uint32_t a, uint32_t b)
31
return (a + b) ^ mask;
32
}
33
34
-#define NEON_FN(dest, src1, src2) dest = src1 + src2
35
-NEON_POP(padd_u8, neon_u8, 4)
36
-NEON_POP(padd_u16, neon_u16, 2)
37
-#undef NEON_FN
38
-
39
#define NEON_FN(dest, src1, src2) dest = src1 - src2
40
NEON_VOP(sub_u8, neon_u8, 4)
41
NEON_VOP(sub_u16, neon_u16, 2)
42
diff --git a/target/arm/tcg/translate-neon.c b/target/arm/tcg/translate-neon.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/tcg/translate-neon.c
45
+++ b/target/arm/tcg/translate-neon.c
46
@@ -XXX,XX +XXX,XX @@ DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
47
DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
48
DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
49
DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
50
+DO_3SAME_NO_SZ_3(VPADD, gen_gvec_addp)
51
52
#define DO_3SAME_CMP(INSN, COND) \
53
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
54
@@ -XXX,XX +XXX,XX @@ static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
55
#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
56
#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
57
#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
58
-#define gen_helper_neon_padd_u32 tcg_gen_add_i32
59
60
DO_3SAME_PAIR(VPMAX_S, pmax_s)
61
DO_3SAME_PAIR(VPMIN_S, pmin_s)
62
DO_3SAME_PAIR(VPMAX_U, pmax_u)
63
DO_3SAME_PAIR(VPMIN_U, pmin_u)
64
-DO_3SAME_PAIR(VPADD, padd_u)
65
66
#define DO_3SAME_VQDMULH(INSN, FUNC) \
67
WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \
68
--
69
2.34.1
diff view generated by jsdifflib
1
Implement the MVE shifts by register, which perform
1
From: Richard Henderson <richard.henderson@linaro.org>
2
shifts on a single general-purpose register.
3
2
3
These are the last instructions within handle_simd_3same_pair
4
so remove it.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20240524232121.284515-34-richard.henderson@linaro.org
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20210628135835.6690-19-peter.maydell@linaro.org
7
---
10
---
8
target/arm/helper-mve.h | 2 ++
11
target/arm/helper.h | 16 +++++
9
target/arm/translate.h | 1 +
12
target/arm/tcg/translate.h | 8 +++
10
target/arm/t32.decode | 18 ++++++++++++++----
13
target/arm/tcg/a64.decode | 4 ++
11
target/arm/mve_helper.c | 10 ++++++++++
14
target/arm/tcg/gengvec.c | 48 +++++++++++++
12
target/arm/translate.c | 30 ++++++++++++++++++++++++++++++
15
target/arm/tcg/translate-a64.c | 119 +++++----------------------------
13
5 files changed, 57 insertions(+), 4 deletions(-)
16
target/arm/tcg/vec_helper.c | 16 +++++
17
6 files changed, 109 insertions(+), 102 deletions(-)
14
18
15
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
19
diff --git a/target/arm/helper.h b/target/arm/helper.h
16
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/helper-mve.h
21
--- a/target/arm/helper.h
18
+++ b/target/arm/helper-mve.h
22
+++ b/target/arm/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(mve_uqrshll48, TCG_CALL_NO_RWG, i64, env, i64, i32)
23
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_addp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
20
24
DEF_HELPER_FLAGS_4(gvec_addp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
21
DEF_HELPER_FLAGS_3(mve_uqshl, TCG_CALL_NO_RWG, i32, env, i32, i32)
25
DEF_HELPER_FLAGS_4(gvec_addp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
22
DEF_HELPER_FLAGS_3(mve_sqshl, TCG_CALL_NO_RWG, i32, env, i32, i32)
26
23
+DEF_HELPER_FLAGS_3(mve_uqrshl, TCG_CALL_NO_RWG, i32, env, i32, i32)
27
+DEF_HELPER_FLAGS_4(gvec_smaxp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
24
+DEF_HELPER_FLAGS_3(mve_sqrshr, TCG_CALL_NO_RWG, i32, env, i32, i32)
28
+DEF_HELPER_FLAGS_4(gvec_smaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
25
diff --git a/target/arm/translate.h b/target/arm/translate.h
29
+DEF_HELPER_FLAGS_4(gvec_smaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
26
index XXXXXXX..XXXXXXX 100644
30
+
27
--- a/target/arm/translate.h
31
+DEF_HELPER_FLAGS_4(gvec_sminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
28
+++ b/target/arm/translate.h
32
+DEF_HELPER_FLAGS_4(gvec_sminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
29
@@ -XXX,XX +XXX,XX @@ typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
33
+DEF_HELPER_FLAGS_4(gvec_sminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
30
typedef void WideShiftImmFn(TCGv_i64, TCGv_i64, int64_t shift);
34
+
31
typedef void WideShiftFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i32);
35
+DEF_HELPER_FLAGS_4(gvec_umaxp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
32
typedef void ShiftImmFn(TCGv_i32, TCGv_i32, int32_t shift);
36
+DEF_HELPER_FLAGS_4(gvec_umaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
33
+typedef void ShiftFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
37
+DEF_HELPER_FLAGS_4(gvec_umaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
34
38
+
35
/**
39
+DEF_HELPER_FLAGS_4(gvec_uminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
36
* arm_tbflags_from_tb:
40
+DEF_HELPER_FLAGS_4(gvec_uminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
37
diff --git a/target/arm/t32.decode b/target/arm/t32.decode
41
+DEF_HELPER_FLAGS_4(gvec_uminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
38
index XXXXXXX..XXXXXXX 100644
42
+
39
--- a/target/arm/t32.decode
43
#ifdef TARGET_AARCH64
40
+++ b/target/arm/t32.decode
44
#include "tcg/helper-a64.h"
41
@@ -XXX,XX +XXX,XX @@
45
#include "tcg/helper-sve.h"
42
&mve_shl_ri rdalo rdahi shim
46
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
43
&mve_shl_rr rdalo rdahi rm
47
index XXXXXXX..XXXXXXX 100644
44
&mve_sh_ri rda shim
48
--- a/target/arm/tcg/translate.h
45
+&mve_sh_rr rda rm
49
+++ b/target/arm/tcg/translate.h
46
50
@@ -XXX,XX +XXX,XX @@ void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
47
# rdahi: bits [3:1] from insn, bit 0 is 1
51
48
# rdalo: bits [3:1] from insn, bit 0 is 0
52
void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
49
@@ -XXX,XX +XXX,XX @@
53
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
50
&mve_shl_rr rdalo=%rdalo_17 rdahi=%rdahi_9
54
+void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
51
@mve_sh_ri ....... .... . rda:4 . ... ... . .. .. .... \
55
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
52
&mve_sh_ri shim=%imm5_12_6
56
+void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
53
+@mve_sh_rr ....... .... . rda:4 rm:4 .... .... .... &mve_sh_rr
57
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
54
58
+void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
55
{
59
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
56
TST_xrri 1110101 0000 1 .... 0 ... 1111 .... .... @S_xrr_shi
60
+void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
57
@@ -XXX,XX +XXX,XX @@ BIC_rrri 1110101 0001 . .... 0 ... .... .... .... @s_rrr_shi
61
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
58
SQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 11 1111 @mve_shl_ri
62
59
}
63
/*
60
64
* Forward to the isar_feature_* tests given a DisasContext pointer.
61
- LSLL_rr 1110101 0010 1 ... 0 .... ... 1 0000 1101 @mve_shl_rr
65
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
62
- ASRL_rr 1110101 0010 1 ... 0 .... ... 1 0010 1101 @mve_shl_rr
66
index XXXXXXX..XXXXXXX 100644
63
- UQRSHLL64_rr 1110101 0010 1 ... 1 .... ... 1 0000 1101 @mve_shl_rr
67
--- a/target/arm/tcg/a64.decode
64
- SQRSHRL64_rr 1110101 0010 1 ... 1 .... ... 1 0010 1101 @mve_shl_rr
68
+++ b/target/arm/tcg/a64.decode
65
+ {
69
@@ -XXX,XX +XXX,XX @@ FMINNMP_v 0.10 1110 110 ..... 00000 1 ..... ..... @qrrr_h
66
+ UQRSHL_rr 1110101 0010 1 .... .... 1111 0000 1101 @mve_sh_rr
70
FMINNMP_v 0.10 1110 1.1 ..... 11000 1 ..... ..... @qrrr_sd
67
+ LSLL_rr 1110101 0010 1 ... 0 .... ... 1 0000 1101 @mve_shl_rr
71
68
+ UQRSHLL64_rr 1110101 0010 1 ... 1 .... ... 1 0000 1101 @mve_shl_rr
72
ADDP_v 0.00 1110 ..1 ..... 10111 1 ..... ..... @qrrr_e
69
+ }
73
+SMAXP_v 0.00 1110 ..1 ..... 10100 1 ..... ..... @qrrr_e
70
+
74
+SMINP_v 0.00 1110 ..1 ..... 10101 1 ..... ..... @qrrr_e
71
+ {
75
+UMAXP_v 0.10 1110 ..1 ..... 10100 1 ..... ..... @qrrr_e
72
+ SQRSHR_rr 1110101 0010 1 .... .... 1111 0010 1101 @mve_sh_rr
76
+UMINP_v 0.10 1110 ..1 ..... 10101 1 ..... ..... @qrrr_e
73
+ ASRL_rr 1110101 0010 1 ... 0 .... ... 1 0010 1101 @mve_shl_rr
77
74
+ SQRSHRL64_rr 1110101 0010 1 ... 1 .... ... 1 0010 1101 @mve_shl_rr
78
### Advanced SIMD scalar x indexed element
75
+ }
79
76
+
80
diff --git a/target/arm/tcg/gengvec.c b/target/arm/tcg/gengvec.c
77
UQRSHLL48_rr 1110101 0010 1 ... 1 .... ... 1 1000 1101 @mve_shl_rr
81
index XXXXXXX..XXXXXXX 100644
78
SQRSHRL48_rr 1110101 0010 1 ... 1 .... ... 1 1010 1101 @mve_shl_rr
82
--- a/target/arm/tcg/gengvec.c
79
]
83
+++ b/target/arm/tcg/gengvec.c
80
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
84
@@ -XXX,XX +XXX,XX @@ void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
81
index XXXXXXX..XXXXXXX 100644
85
};
82
--- a/target/arm/mve_helper.c
86
tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
83
+++ b/target/arm/mve_helper.c
84
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(mve_sqshl)(CPUARMState *env, uint32_t n, uint32_t shift)
85
{
86
return do_sqrshl_bhs(n, (int8_t)shift, 32, false, &env->QF);
87
}
87
}
88
+
88
+
89
+uint32_t HELPER(mve_uqrshl)(CPUARMState *env, uint32_t n, uint32_t shift)
89
+void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
90
+{
90
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
91
+ return do_uqrshl_bhs(n, (int8_t)shift, 32, true, &env->QF);
91
+{
92
+}
92
+ static gen_helper_gvec_3 * const fns[4] = {
93
+
93
+ gen_helper_gvec_smaxp_b,
94
+uint32_t HELPER(mve_sqrshr)(CPUARMState *env, uint32_t n, uint32_t shift)
94
+ gen_helper_gvec_smaxp_h,
95
+{
95
+ gen_helper_gvec_smaxp_s,
96
+ return do_sqrshl_bhs(n, -(int8_t)shift, 32, true, &env->QF);
96
+ };
97
+}
97
+ tcg_debug_assert(vece <= MO_32);
98
diff --git a/target/arm/translate.c b/target/arm/translate.c
98
+ tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
99
index XXXXXXX..XXXXXXX 100644
99
+}
100
--- a/target/arm/translate.c
100
+
101
+++ b/target/arm/translate.c
101
+void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
102
@@ -XXX,XX +XXX,XX @@ static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
102
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
103
return do_mve_sh_ri(s, a, gen_mve_uqshl);
103
+{
104
+ static gen_helper_gvec_3 * const fns[4] = {
105
+ gen_helper_gvec_sminp_b,
106
+ gen_helper_gvec_sminp_h,
107
+ gen_helper_gvec_sminp_s,
108
+ };
109
+ tcg_debug_assert(vece <= MO_32);
110
+ tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
111
+}
112
+
113
+void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
114
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
115
+{
116
+ static gen_helper_gvec_3 * const fns[4] = {
117
+ gen_helper_gvec_umaxp_b,
118
+ gen_helper_gvec_umaxp_h,
119
+ gen_helper_gvec_umaxp_s,
120
+ };
121
+ tcg_debug_assert(vece <= MO_32);
122
+ tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
123
+}
124
+
125
+void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
126
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
127
+{
128
+ static gen_helper_gvec_3 * const fns[4] = {
129
+ gen_helper_gvec_uminp_b,
130
+ gen_helper_gvec_uminp_h,
131
+ gen_helper_gvec_uminp_s,
132
+ };
133
+ tcg_debug_assert(vece <= MO_32);
134
+ tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
135
+}
136
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
137
index XXXXXXX..XXXXXXX 100644
138
--- a/target/arm/tcg/translate-a64.c
139
+++ b/target/arm/tcg/translate-a64.c
140
@@ -XXX,XX +XXX,XX @@ static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
141
return true;
104
}
142
}
105
143
106
+static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
144
+static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
107
+{
145
+{
108
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
146
+ if (a->esz == MO_64) {
109
+ /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
110
+ return false;
147
+ return false;
111
+ }
148
+ }
112
+ if (!dc_isar_feature(aa32_mve, s) ||
149
+ if (fp_access_check(s)) {
113
+ !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
150
+ gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
114
+ a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
115
+ a->rm == a->rda) {
116
+ /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
117
+ unallocated_encoding(s);
118
+ return true;
119
+ }
151
+ }
120
+
121
+ /* The helper takes care of the sign-extension of the low 8 bits of Rm */
122
+ fn(cpu_R[a->rda], cpu_env, cpu_R[a->rda], cpu_R[a->rm]);
123
+ return true;
152
+ return true;
124
+}
153
+}
125
+
154
+
126
+static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
155
static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn)
127
+{
156
{
128
+ return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
157
if (!a->q && a->esz == MO_64) {
129
+}
158
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = {
130
+
159
TRANS(FMINNMP_v, do_fp3_vector, a, f_vector_fminnmp)
131
+static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
160
132
+{
161
TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp)
133
+ return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
162
+TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp)
134
+}
163
+TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp)
135
+
164
+TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp)
165
+TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp)
166
136
/*
167
/*
137
* Multiply and multiply accumulate
168
* Advanced SIMD scalar/vector x indexed element
138
*/
169
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
170
}
171
}
172
173
-/* Pairwise op subgroup of C3.6.16.
174
- *
175
- * This is called directly for float pairwise
176
- * operations where the opcode and size are calculated differently.
177
- */
178
-static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
179
- int size, int rn, int rm, int rd)
180
-{
181
- int pass;
182
-
183
- if (!fp_access_check(s)) {
184
- return;
185
- }
186
-
187
- /* These operations work on the concatenated rm:rn, with each pair of
188
- * adjacent elements being operated on to produce an element in the result.
189
- */
190
- if (size == 3) {
191
- g_assert_not_reached();
192
- } else {
193
- int maxpass = is_q ? 4 : 2;
194
- TCGv_i32 tcg_res[4];
195
-
196
- for (pass = 0; pass < maxpass; pass++) {
197
- TCGv_i32 tcg_op1 = tcg_temp_new_i32();
198
- TCGv_i32 tcg_op2 = tcg_temp_new_i32();
199
- NeonGenTwoOpFn *genfn = NULL;
200
- int passreg = pass < (maxpass / 2) ? rn : rm;
201
- int passelt = (is_q && (pass & 1)) ? 2 : 0;
202
-
203
- read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
204
- read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
205
- tcg_res[pass] = tcg_temp_new_i32();
206
-
207
- switch (opcode) {
208
- case 0x14: /* SMAXP, UMAXP */
209
- {
210
- static NeonGenTwoOpFn * const fns[3][2] = {
211
- { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
212
- { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
213
- { tcg_gen_smax_i32, tcg_gen_umax_i32 },
214
- };
215
- genfn = fns[size][u];
216
- break;
217
- }
218
- case 0x15: /* SMINP, UMINP */
219
- {
220
- static NeonGenTwoOpFn * const fns[3][2] = {
221
- { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
222
- { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
223
- { tcg_gen_smin_i32, tcg_gen_umin_i32 },
224
- };
225
- genfn = fns[size][u];
226
- break;
227
- }
228
- default:
229
- case 0x17: /* ADDP */
230
- case 0x58: /* FMAXNMP */
231
- case 0x5a: /* FADDP */
232
- case 0x5e: /* FMAXP */
233
- case 0x78: /* FMINNMP */
234
- case 0x7e: /* FMINP */
235
- g_assert_not_reached();
236
- }
237
-
238
- /* FP ops called directly, otherwise call now */
239
- if (genfn) {
240
- genfn(tcg_res[pass], tcg_op1, tcg_op2);
241
- }
242
- }
243
-
244
- for (pass = 0; pass < maxpass; pass++) {
245
- write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
246
- }
247
- clear_vec_high(s, is_q, rd);
248
- }
249
-}
250
-
251
/* Floating point op subgroup of C3.6.16. */
252
static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
253
{
254
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
255
case 0x3: /* logic ops */
256
disas_simd_3same_logic(s, insn);
257
break;
258
- case 0x14: /* SMAXP, UMAXP */
259
- case 0x15: /* SMINP, UMINP */
260
- {
261
- /* Pairwise operations */
262
- int is_q = extract32(insn, 30, 1);
263
- int u = extract32(insn, 29, 1);
264
- int size = extract32(insn, 22, 2);
265
- int rm = extract32(insn, 16, 5);
266
- int rn = extract32(insn, 5, 5);
267
- int rd = extract32(insn, 0, 5);
268
- if (opcode == 0x17) {
269
- if (u || (size == 3 && !is_q)) {
270
- unallocated_encoding(s);
271
- return;
272
- }
273
- } else {
274
- if (size == 3) {
275
- unallocated_encoding(s);
276
- return;
277
- }
278
- }
279
- handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
280
- break;
281
- }
282
case 0x18 ... 0x31:
283
/* floating point ops, sz[1] and U are part of opcode */
284
disas_simd_3same_float(s, insn);
285
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
286
default:
287
disas_simd_3same_int(s, insn);
288
break;
289
+ case 0x14: /* SMAXP, UMAXP */
290
+ case 0x15: /* SMINP, UMINP */
291
case 0x17: /* ADDP */
292
unallocated_encoding(s);
293
break;
294
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
295
index XXXXXXX..XXXXXXX 100644
296
--- a/target/arm/tcg/vec_helper.c
297
+++ b/target/arm/tcg/vec_helper.c
298
@@ -XXX,XX +XXX,XX @@ DO_3OP_PAIR(gvec_addp_s, ADD, uint32_t, H4)
299
DO_3OP_PAIR(gvec_addp_d, ADD, uint64_t, )
300
#undef ADD
301
302
+DO_3OP_PAIR(gvec_smaxp_b, MAX, int8_t, H1)
303
+DO_3OP_PAIR(gvec_smaxp_h, MAX, int16_t, H2)
304
+DO_3OP_PAIR(gvec_smaxp_s, MAX, int32_t, H4)
305
+
306
+DO_3OP_PAIR(gvec_umaxp_b, MAX, uint8_t, H1)
307
+DO_3OP_PAIR(gvec_umaxp_h, MAX, uint16_t, H2)
308
+DO_3OP_PAIR(gvec_umaxp_s, MAX, uint32_t, H4)
309
+
310
+DO_3OP_PAIR(gvec_sminp_b, MIN, int8_t, H1)
311
+DO_3OP_PAIR(gvec_sminp_h, MIN, int16_t, H2)
312
+DO_3OP_PAIR(gvec_sminp_s, MIN, int32_t, H4)
313
+
314
+DO_3OP_PAIR(gvec_uminp_b, MIN, uint8_t, H1)
315
+DO_3OP_PAIR(gvec_uminp_h, MIN, uint16_t, H2)
316
+DO_3OP_PAIR(gvec_uminp_s, MIN, uint32_t, H4)
317
+
318
#undef DO_3OP_PAIR
319
320
#define DO_VCVT_FIXED(NAME, FUNC, TYPE) \
139
--
321
--
140
2.20.1
322
2.34.1
141
142
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20240524232121.284515-35-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/tcg/translate-neon.c | 78 ++-------------------------------
9
1 file changed, 4 insertions(+), 74 deletions(-)
10
11
diff --git a/target/arm/tcg/translate-neon.c b/target/arm/tcg/translate-neon.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/tcg/translate-neon.c
14
+++ b/target/arm/tcg/translate-neon.c
15
@@ -XXX,XX +XXX,XX @@ DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
16
DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
17
DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
18
DO_3SAME_NO_SZ_3(VPADD, gen_gvec_addp)
19
+DO_3SAME_NO_SZ_3(VPMAX_S, gen_gvec_smaxp)
20
+DO_3SAME_NO_SZ_3(VPMIN_S, gen_gvec_sminp)
21
+DO_3SAME_NO_SZ_3(VPMAX_U, gen_gvec_umaxp)
22
+DO_3SAME_NO_SZ_3(VPMIN_U, gen_gvec_uminp)
23
24
#define DO_3SAME_CMP(INSN, COND) \
25
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
26
@@ -XXX,XX +XXX,XX @@ DO_3SAME_32_ENV(VQSHL_U, qshl_u)
27
DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
28
DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
29
30
-static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
31
-{
32
- /* Operations handled pairwise 32 bits at a time */
33
- TCGv_i32 tmp, tmp2, tmp3;
34
-
35
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
36
- return false;
37
- }
38
-
39
- /* UNDEF accesses to D16-D31 if they don't exist. */
40
- if (!dc_isar_feature(aa32_simd_r32, s) &&
41
- ((a->vd | a->vn | a->vm) & 0x10)) {
42
- return false;
43
- }
44
-
45
- if (a->size == 3) {
46
- return false;
47
- }
48
-
49
- if (!vfp_access_check(s)) {
50
- return true;
51
- }
52
-
53
- assert(a->q == 0); /* enforced by decode patterns */
54
-
55
- /*
56
- * Note that we have to be careful not to clobber the source operands
57
- * in the "vm == vd" case by storing the result of the first pass too
58
- * early. Since Q is 0 there are always just two passes, so instead
59
- * of a complicated loop over each pass we just unroll.
60
- */
61
- tmp = tcg_temp_new_i32();
62
- tmp2 = tcg_temp_new_i32();
63
- tmp3 = tcg_temp_new_i32();
64
-
65
- read_neon_element32(tmp, a->vn, 0, MO_32);
66
- read_neon_element32(tmp2, a->vn, 1, MO_32);
67
- fn(tmp, tmp, tmp2);
68
-
69
- read_neon_element32(tmp3, a->vm, 0, MO_32);
70
- read_neon_element32(tmp2, a->vm, 1, MO_32);
71
- fn(tmp3, tmp3, tmp2);
72
-
73
- write_neon_element32(tmp, a->vd, 0, MO_32);
74
- write_neon_element32(tmp3, a->vd, 1, MO_32);
75
-
76
- return true;
77
-}
78
-
79
-#define DO_3SAME_PAIR(INSN, func) \
80
- static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
81
- { \
82
- static NeonGenTwoOpFn * const fns[] = { \
83
- gen_helper_neon_##func##8, \
84
- gen_helper_neon_##func##16, \
85
- gen_helper_neon_##func##32, \
86
- }; \
87
- if (a->size > 2) { \
88
- return false; \
89
- } \
90
- return do_3same_pair(s, a, fns[a->size]); \
91
- }
92
-
93
-/* 32-bit pairwise ops end up the same as the elementwise versions. */
94
-#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
95
-#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
96
-#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
97
-#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
98
-
99
-DO_3SAME_PAIR(VPMAX_S, pmax_s)
100
-DO_3SAME_PAIR(VPMIN_S, pmin_s)
101
-DO_3SAME_PAIR(VPMAX_U, pmax_u)
102
-DO_3SAME_PAIR(VPMIN_U, pmin_u)
103
-
104
#define DO_3SAME_VQDMULH(INSN, FUNC) \
105
WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \
106
WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \
107
--
108
2.34.1
diff view generated by jsdifflib
1
From: Maxim Uvarov <maxim.uvarov@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
qemu has 2 type of functions: shutdown and reboot. Shutdown
4
function has to be used for machine shutdown. Otherwise we cause
5
a reset with a bogus "cause" value, when we intended a shutdown.
6
7
Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Message-id: 20210625111842.3790-3-maxim.uvarov@linaro.org
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
[PMM: tweaked commit message]
5
Message-id: 20240524232121.284515-36-richard.henderson@linaro.org
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
7
---
13
hw/gpio/gpio_pwr.c | 2 +-
8
target/arm/tcg/a64.decode | 10 +++
14
1 file changed, 1 insertion(+), 1 deletion(-)
9
target/arm/tcg/translate-a64.c | 144 ++++++++++-----------------------
10
2 files changed, 51 insertions(+), 103 deletions(-)
15
11
16
diff --git a/hw/gpio/gpio_pwr.c b/hw/gpio/gpio_pwr.c
12
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
17
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/gpio/gpio_pwr.c
14
--- a/target/arm/tcg/a64.decode
19
+++ b/hw/gpio/gpio_pwr.c
15
+++ b/target/arm/tcg/a64.decode
20
@@ -XXX,XX +XXX,XX @@ static void gpio_pwr_reset(void *opaque, int n, int level)
16
@@ -XXX,XX +XXX,XX @@ FMLA_v 0.00 1110 0.1 ..... 11001 1 ..... ..... @qrrr_sd
21
static void gpio_pwr_shutdown(void *opaque, int n, int level)
17
FMLS_v 0.00 1110 110 ..... 00001 1 ..... ..... @qrrr_h
22
{
18
FMLS_v 0.00 1110 1.1 ..... 11001 1 ..... ..... @qrrr_sd
23
if (level) {
19
24
- qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
20
+FMLAL_v 0.00 1110 001 ..... 11101 1 ..... ..... @qrrr_h
25
+ qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
21
+FMLSL_v 0.00 1110 101 ..... 11101 1 ..... ..... @qrrr_h
22
+FMLAL2_v 0.10 1110 001 ..... 11001 1 ..... ..... @qrrr_h
23
+FMLSL2_v 0.10 1110 101 ..... 11001 1 ..... ..... @qrrr_h
24
+
25
FCMEQ_v 0.00 1110 010 ..... 00100 1 ..... ..... @qrrr_h
26
FCMEQ_v 0.00 1110 0.1 ..... 11100 1 ..... ..... @qrrr_sd
27
28
@@ -XXX,XX +XXX,XX @@ FMLS_vi 0.00 1111 11 0 ..... 0101 . 0 ..... ..... @qrrx_d
29
FMULX_vi 0.10 1111 00 .. .... 1001 . 0 ..... ..... @qrrx_h
30
FMULX_vi 0.10 1111 10 . ..... 1001 . 0 ..... ..... @qrrx_s
31
FMULX_vi 0.10 1111 11 0 ..... 1001 . 0 ..... ..... @qrrx_d
32
+
33
+FMLAL_vi 0.00 1111 10 .. .... 0000 . 0 ..... ..... @qrrx_h
34
+FMLSL_vi 0.00 1111 10 .. .... 0100 . 0 ..... ..... @qrrx_h
35
+FMLAL2_vi 0.10 1111 10 .. .... 1000 . 0 ..... ..... @qrrx_h
36
+FMLSL2_vi 0.10 1111 10 .. .... 1100 . 0 ..... ..... @qrrx_h
37
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/target/arm/tcg/translate-a64.c
40
+++ b/target/arm/tcg/translate-a64.c
41
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = {
42
};
43
TRANS(FMINNMP_v, do_fp3_vector, a, f_vector_fminnmp)
44
45
+static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2)
46
+{
47
+ if (fp_access_check(s)) {
48
+ int data = (is_2 << 1) | is_s;
49
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
50
+ vec_full_reg_offset(s, a->rn),
51
+ vec_full_reg_offset(s, a->rm), tcg_env,
52
+ a->q ? 16 : 8, vec_full_reg_size(s),
53
+ data, gen_helper_gvec_fmlal_a64);
54
+ }
55
+ return true;
56
+}
57
+
58
+TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false)
59
+TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false)
60
+TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true)
61
+TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true)
62
+
63
TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp)
64
TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp)
65
TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp)
66
@@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
67
TRANS(FMLA_vi, do_fmla_vector_idx, a, false)
68
TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
69
70
+static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2)
71
+{
72
+ if (fp_access_check(s)) {
73
+ int data = (a->idx << 2) | (is_2 << 1) | is_s;
74
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
75
+ vec_full_reg_offset(s, a->rn),
76
+ vec_full_reg_offset(s, a->rm), tcg_env,
77
+ a->q ? 16 : 8, vec_full_reg_size(s),
78
+ data, gen_helper_gvec_fmlal_idx_a64);
79
+ }
80
+ return true;
81
+}
82
+
83
+TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false)
84
+TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false)
85
+TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true)
86
+TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true)
87
+
88
/*
89
* Advanced SIMD scalar pairwise
90
*/
91
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
26
}
92
}
27
}
93
}
28
94
95
-/* Floating point op subgroup of C3.6.16. */
96
-static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
97
-{
98
- /* For floating point ops, the U, size[1] and opcode bits
99
- * together indicate the operation. size[0] indicates single
100
- * or double.
101
- */
102
- int fpopcode = extract32(insn, 11, 5)
103
- | (extract32(insn, 23, 1) << 5)
104
- | (extract32(insn, 29, 1) << 6);
105
- int is_q = extract32(insn, 30, 1);
106
- int size = extract32(insn, 22, 1);
107
- int rm = extract32(insn, 16, 5);
108
- int rn = extract32(insn, 5, 5);
109
- int rd = extract32(insn, 0, 5);
110
-
111
- if (size == 1 && !is_q) {
112
- unallocated_encoding(s);
113
- return;
114
- }
115
-
116
- switch (fpopcode) {
117
- case 0x1d: /* FMLAL */
118
- case 0x3d: /* FMLSL */
119
- case 0x59: /* FMLAL2 */
120
- case 0x79: /* FMLSL2 */
121
- if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
122
- unallocated_encoding(s);
123
- return;
124
- }
125
- if (fp_access_check(s)) {
126
- int is_s = extract32(insn, 23, 1);
127
- int is_2 = extract32(insn, 29, 1);
128
- int data = (is_2 << 1) | is_s;
129
- tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
130
- vec_full_reg_offset(s, rn),
131
- vec_full_reg_offset(s, rm), tcg_env,
132
- is_q ? 16 : 8, vec_full_reg_size(s),
133
- data, gen_helper_gvec_fmlal_a64);
134
- }
135
- return;
136
-
137
- default:
138
- case 0x18: /* FMAXNM */
139
- case 0x19: /* FMLA */
140
- case 0x1a: /* FADD */
141
- case 0x1b: /* FMULX */
142
- case 0x1c: /* FCMEQ */
143
- case 0x1e: /* FMAX */
144
- case 0x1f: /* FRECPS */
145
- case 0x38: /* FMINNM */
146
- case 0x39: /* FMLS */
147
- case 0x3a: /* FSUB */
148
- case 0x3e: /* FMIN */
149
- case 0x3f: /* FRSQRTS */
150
- case 0x58: /* FMAXNMP */
151
- case 0x5a: /* FADDP */
152
- case 0x5b: /* FMUL */
153
- case 0x5c: /* FCMGE */
154
- case 0x5d: /* FACGE */
155
- case 0x5e: /* FMAXP */
156
- case 0x5f: /* FDIV */
157
- case 0x78: /* FMINNMP */
158
- case 0x7a: /* FABD */
159
- case 0x7d: /* FACGT */
160
- case 0x7c: /* FCMGT */
161
- case 0x7e: /* FMINP */
162
- unallocated_encoding(s);
163
- return;
164
- }
165
-}
166
-
167
/* Integer op subgroup of C3.6.16. */
168
static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
169
{
170
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
171
case 0x3: /* logic ops */
172
disas_simd_3same_logic(s, insn);
173
break;
174
- case 0x18 ... 0x31:
175
- /* floating point ops, sz[1] and U are part of opcode */
176
- disas_simd_3same_float(s, insn);
177
- break;
178
default:
179
disas_simd_3same_int(s, insn);
180
break;
181
case 0x14: /* SMAXP, UMAXP */
182
case 0x15: /* SMINP, UMINP */
183
case 0x17: /* ADDP */
184
+ case 0x18 ... 0x31: /* floating point ops */
185
unallocated_encoding(s);
186
break;
187
}
188
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
189
}
190
is_fp = 2;
191
break;
192
- case 0x00: /* FMLAL */
193
- case 0x04: /* FMLSL */
194
- case 0x18: /* FMLAL2 */
195
- case 0x1c: /* FMLSL2 */
196
- if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
197
- unallocated_encoding(s);
198
- return;
199
- }
200
- size = MO_16;
201
- /* is_fp, but we pass tcg_env not fp_status. */
202
- break;
203
default:
204
+ case 0x00: /* FMLAL */
205
case 0x01: /* FMLA */
206
+ case 0x04: /* FMLSL */
207
case 0x05: /* FMLS */
208
case 0x09: /* FMUL */
209
+ case 0x18: /* FMLAL2 */
210
case 0x19: /* FMULX */
211
+ case 0x1c: /* FMLSL2 */
212
unallocated_encoding(s);
213
return;
214
}
215
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
216
}
217
return;
218
219
- case 0x00: /* FMLAL */
220
- case 0x04: /* FMLSL */
221
- case 0x18: /* FMLAL2 */
222
- case 0x1c: /* FMLSL2 */
223
- {
224
- int is_s = extract32(opcode, 2, 1);
225
- int is_2 = u;
226
- int data = (index << 2) | (is_2 << 1) | is_s;
227
- tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
228
- vec_full_reg_offset(s, rn),
229
- vec_full_reg_offset(s, rm), tcg_env,
230
- is_q ? 16 : 8, vec_full_reg_size(s),
231
- data, gen_helper_gvec_fmlal_idx_a64);
232
- }
233
- return;
234
-
235
case 0x08: /* MUL */
236
if (!is_long && !is_scalar) {
237
static gen_helper_gvec_3 * const fns[3] = {
29
--
238
--
30
2.20.1
239
2.34.1
31
32
diff view generated by jsdifflib
1
The function asimd_imm_const() in translate-neon.c is an
1
From: Richard Henderson <richard.henderson@linaro.org>
2
implementation of the pseudocode AdvSIMDExpandImm(), which we will
3
also want for MVE. Move the implementation to translate.c, with a
4
prototype in translate.h.
5
2
3
This includes AND, ORR, EOR, BIC, ORN, BSF, BIT, BIF.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20240524232121.284515-37-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210628135835.6690-4-peter.maydell@linaro.org
9
---
9
---
10
target/arm/translate.h | 16 ++++++++++
10
target/arm/tcg/a64.decode | 10 +++++
11
target/arm/translate-neon.c | 63 -------------------------------------
11
target/arm/tcg/translate-a64.c | 68 ++++++++++------------------------
12
target/arm/translate.c | 57 +++++++++++++++++++++++++++++++++
12
2 files changed, 29 insertions(+), 49 deletions(-)
13
3 files changed, 73 insertions(+), 63 deletions(-)
14
13
15
diff --git a/target/arm/translate.h b/target/arm/translate.h
14
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
16
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/translate.h
16
--- a/target/arm/tcg/a64.decode
18
+++ b/target/arm/translate.h
17
+++ b/target/arm/tcg/a64.decode
19
@@ -XXX,XX +XXX,XX @@ static inline MemOp finalize_memop(DisasContext *s, MemOp opc)
18
@@ -XXX,XX +XXX,XX @@
20
return opc | s->be_data;
19
@rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3
20
@rrrr_q1e3 ........ ... rm:5 . ra:5 rn:5 rd:5 &qrrrr_e q=1 esz=3
21
22
+@qrrr_b . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=0
23
@qrrr_h . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=1
24
@qrrr_sd . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=%esz_sd
25
@qrrr_e . q:1 ...... esz:2 . rm:5 ...... rn:5 rd:5 &qrrr_e
26
@@ -XXX,XX +XXX,XX @@ SMINP_v 0.00 1110 ..1 ..... 10101 1 ..... ..... @qrrr_e
27
UMAXP_v 0.10 1110 ..1 ..... 10100 1 ..... ..... @qrrr_e
28
UMINP_v 0.10 1110 ..1 ..... 10101 1 ..... ..... @qrrr_e
29
30
+AND_v 0.00 1110 001 ..... 00011 1 ..... ..... @qrrr_b
31
+BIC_v 0.00 1110 011 ..... 00011 1 ..... ..... @qrrr_b
32
+ORR_v 0.00 1110 101 ..... 00011 1 ..... ..... @qrrr_b
33
+ORN_v 0.00 1110 111 ..... 00011 1 ..... ..... @qrrr_b
34
+EOR_v 0.10 1110 001 ..... 00011 1 ..... ..... @qrrr_b
35
+BSL_v 0.10 1110 011 ..... 00011 1 ..... ..... @qrrr_b
36
+BIT_v 0.10 1110 101 ..... 00011 1 ..... ..... @qrrr_b
37
+BIF_v 0.10 1110 111 ..... 00011 1 ..... ..... @qrrr_b
38
+
39
### Advanced SIMD scalar x indexed element
40
41
FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
42
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/tcg/translate-a64.c
45
+++ b/target/arm/tcg/translate-a64.c
46
@@ -XXX,XX +XXX,XX @@ TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp)
47
TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp)
48
TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp)
49
50
+TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and)
51
+TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc)
52
+TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or)
53
+TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc)
54
+TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor)
55
+
56
+static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c)
57
+{
58
+ if (fp_access_check(s)) {
59
+ gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0);
60
+ }
61
+ return true;
62
+}
63
+
64
+TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm)
65
+TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd)
66
+TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn)
67
+
68
/*
69
* Advanced SIMD scalar/vector x indexed element
70
*/
71
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
72
}
21
}
73
}
22
74
23
+/**
75
-/* Logic op (opcode == 3) subgroup of C3.6.16. */
24
+ * asimd_imm_const: Expand an encoded SIMD constant value
76
-static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
25
+ *
26
+ * Expand a SIMD constant value. This is essentially the pseudocode
27
+ * AdvSIMDExpandImm, except that we also perform the boolean NOT needed for
28
+ * VMVN and VBIC (when cmode < 14 && op == 1).
29
+ *
30
+ * The combination cmode == 15 op == 1 is a reserved encoding for AArch32;
31
+ * callers must catch this.
32
+ *
33
+ * cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 was UNPREDICTABLE in v7A but
34
+ * is either not unpredictable or merely CONSTRAINED UNPREDICTABLE in v8A;
35
+ * we produce an immediate constant value of 0 in these cases.
36
+ */
37
+uint64_t asimd_imm_const(uint32_t imm, int cmode, int op);
38
+
39
#endif /* TARGET_ARM_TRANSLATE_H */
40
diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/target/arm/translate-neon.c
43
+++ b/target/arm/translate-neon.c
44
@@ -XXX,XX +XXX,XX @@ DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
45
DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
46
DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
47
48
-static uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
49
-{
77
-{
50
- /*
78
- int rd = extract32(insn, 0, 5);
51
- * Expand the encoded constant.
79
- int rn = extract32(insn, 5, 5);
52
- * Note that cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
80
- int rm = extract32(insn, 16, 5);
53
- * We choose to not special-case this and will behave as if a
81
- int size = extract32(insn, 22, 2);
54
- * valid constant encoding of 0 had been given.
82
- bool is_u = extract32(insn, 29, 1);
55
- * cmode = 15 op = 1 must UNDEF; we assume decode has handled that.
83
- bool is_q = extract32(insn, 30, 1);
56
- */
57
- switch (cmode) {
58
- case 0: case 1:
59
- /* no-op */
60
- break;
61
- case 2: case 3:
62
- imm <<= 8;
63
- break;
64
- case 4: case 5:
65
- imm <<= 16;
66
- break;
67
- case 6: case 7:
68
- imm <<= 24;
69
- break;
70
- case 8: case 9:
71
- imm |= imm << 16;
72
- break;
73
- case 10: case 11:
74
- imm = (imm << 8) | (imm << 24);
75
- break;
76
- case 12:
77
- imm = (imm << 8) | 0xff;
78
- break;
79
- case 13:
80
- imm = (imm << 16) | 0xffff;
81
- break;
82
- case 14:
83
- if (op) {
84
- /*
85
- * This is the only case where the top and bottom 32 bits
86
- * of the encoded constant differ.
87
- */
88
- uint64_t imm64 = 0;
89
- int n;
90
-
84
-
91
- for (n = 0; n < 8; n++) {
85
- if (!fp_access_check(s)) {
92
- if (imm & (1 << n)) {
86
- return;
93
- imm64 |= (0xffULL << (n * 8));
94
- }
95
- }
96
- return imm64;
97
- }
98
- imm |= (imm << 8) | (imm << 16) | (imm << 24);
99
- break;
100
- case 15:
101
- imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
102
- | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
103
- break;
104
- }
87
- }
105
- if (op) {
88
-
106
- imm = ~imm;
89
- switch (size + 4 * is_u) {
90
- case 0: /* AND */
91
- gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
92
- return;
93
- case 1: /* BIC */
94
- gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
95
- return;
96
- case 2: /* ORR */
97
- gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
98
- return;
99
- case 3: /* ORN */
100
- gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
101
- return;
102
- case 4: /* EOR */
103
- gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
104
- return;
105
-
106
- case 5: /* BSL bitwise select */
107
- gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
108
- return;
109
- case 6: /* BIT, bitwise insert if true */
110
- gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
111
- return;
112
- case 7: /* BIF, bitwise insert if false */
113
- gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
114
- return;
115
-
116
- default:
117
- g_assert_not_reached();
107
- }
118
- }
108
- return dup_const(MO_32, imm);
109
-}
119
-}
110
-
120
-
111
static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
121
/* Integer op subgroup of C3.6.16. */
112
GVecGen2iFn *fn)
122
static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
113
{
123
{
114
diff --git a/target/arm/translate.c b/target/arm/translate.c
124
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
115
index XXXXXXX..XXXXXXX 100644
125
int opcode = extract32(insn, 11, 5);
116
--- a/target/arm/translate.c
126
117
+++ b/target/arm/translate.c
127
switch (opcode) {
118
@@ -XXX,XX +XXX,XX @@ void arm_translate_init(void)
128
- case 0x3: /* logic ops */
119
a64_translate_init();
129
- disas_simd_3same_logic(s, insn);
120
}
130
- break;
121
131
default:
122
+uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
132
disas_simd_3same_int(s, insn);
123
+{
133
break;
124
+ /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
134
+ case 0x3: /* logic ops */
125
+ switch (cmode) {
135
case 0x14: /* SMAXP, UMAXP */
126
+ case 0: case 1:
136
case 0x15: /* SMINP, UMINP */
127
+ /* no-op */
137
case 0x17: /* ADDP */
128
+ break;
129
+ case 2: case 3:
130
+ imm <<= 8;
131
+ break;
132
+ case 4: case 5:
133
+ imm <<= 16;
134
+ break;
135
+ case 6: case 7:
136
+ imm <<= 24;
137
+ break;
138
+ case 8: case 9:
139
+ imm |= imm << 16;
140
+ break;
141
+ case 10: case 11:
142
+ imm = (imm << 8) | (imm << 24);
143
+ break;
144
+ case 12:
145
+ imm = (imm << 8) | 0xff;
146
+ break;
147
+ case 13:
148
+ imm = (imm << 16) | 0xffff;
149
+ break;
150
+ case 14:
151
+ if (op) {
152
+ /*
153
+ * This is the only case where the top and bottom 32 bits
154
+ * of the encoded constant differ.
155
+ */
156
+ uint64_t imm64 = 0;
157
+ int n;
158
+
159
+ for (n = 0; n < 8; n++) {
160
+ if (imm & (1 << n)) {
161
+ imm64 |= (0xffULL << (n * 8));
162
+ }
163
+ }
164
+ return imm64;
165
+ }
166
+ imm |= (imm << 8) | (imm << 16) | (imm << 24);
167
+ break;
168
+ case 15:
169
+ imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
170
+ | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
171
+ break;
172
+ }
173
+ if (op) {
174
+ imm = ~imm;
175
+ }
176
+ return dup_const(MO_32, imm);
177
+}
178
+
179
/* Generate a label used for skipping this instruction */
180
void arm_gen_condlabel(DisasContext *s)
181
{
182
--
138
--
183
2.20.1
139
2.34.1
184
185
diff view generated by jsdifflib