1
Mostly my decodetree stuff, but also some patches for various
1
The following changes since commit 5a67d7735d4162630769ef495cf813244fc850df:
2
smaller bugs/features from others.
3
2
4
thanks
3
Merge remote-tracking branch 'remotes/berrange-gitlab/tags/tls-deps-pull-request' into staging (2021-07-02 08:22:39 +0100)
5
-- PMM
6
7
The following changes since commit 53550e81e2cafe7c03a39526b95cd21b5194d9b1:
8
9
Merge remote-tracking branch 'remotes/berrange/tags/qcrypto-next-pull-request' into staging (2020-06-15 16:36:34 +0100)
10
4
11
are available in the Git repository at:
5
are available in the Git repository at:
12
6
13
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20200616
7
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20210702
14
8
15
for you to fetch changes up to 64b397417a26509bcdff44ab94356a35c7901c79:
9
for you to fetch changes up to 04ea4d3cfd0a21b248ece8eb7a9436a3d9898dd8:
16
10
17
hw: arm: Set vendor property for IMX SDHCI emulations (2020-06-16 10:32:29 +0100)
11
target/arm: Implement MVE shifts by register (2021-07-02 11:48:38 +0100)
18
12
19
----------------------------------------------------------------
13
----------------------------------------------------------------
20
* hw: arm: Set vendor property for IMX SDHCI emulations
14
target-arm queue:
21
* sd: sdhci: Implement basic vendor specific register support
15
* more MVE instructions
22
* hw/net/imx_fec: Convert debug fprintf() to trace events
16
* hw/gpio/gpio_pwr: use shutdown function for reboot
23
* target/arm/cpu: adjust virtual time for all KVM arm cpus
17
* target/arm: Check NaN mode before silencing NaN
24
* Implement configurable descriptor size in ftgmac100
18
* tests: Boot and halt a Linux guest on the Raspberry Pi 2 machine
25
* hw/misc/imx6ul_ccm: Implement non writable bits in CCM registers
19
* hw/arm: Add basic power management to raspi.
26
* target/arm: More Neon decodetree conversion work
20
* docs/system/arm: Add quanta-gbs-bmc, quanta-q7l1-bmc
27
21
28
----------------------------------------------------------------
22
----------------------------------------------------------------
29
Erik Smit (1):
23
Joe Komlodi (1):
30
Implement configurable descriptor size in ftgmac100
24
target/arm: Check NaN mode before silencing NaN
31
25
32
Guenter Roeck (2):
26
Maxim Uvarov (1):
33
sd: sdhci: Implement basic vendor specific register support
27
hw/gpio/gpio_pwr: use shutdown function for reboot
34
hw: arm: Set vendor property for IMX SDHCI emulations
35
28
36
Jean-Christophe Dubois (2):
29
Nolan Leake (1):
37
hw/misc/imx6ul_ccm: Implement non writable bits in CCM registers
30
hw/arm: Add basic power management to raspi.
38
hw/net/imx_fec: Convert debug fprintf() to trace events
39
31
40
Peter Maydell (17):
32
Patrick Venture (2):
41
target/arm: Fix missing temp frees in do_vshll_2sh
33
docs/system/arm: Add quanta-q7l1-bmc reference
42
target/arm: Convert Neon 3-reg-diff prewidening ops to decodetree
34
docs/system/arm: Add quanta-gbs-bmc reference
43
target/arm: Convert Neon 3-reg-diff narrowing ops to decodetree
44
target/arm: Convert Neon 3-reg-diff VABAL, VABDL to decodetree
45
target/arm: Convert Neon 3-reg-diff long multiplies
46
target/arm: Convert Neon 3-reg-diff saturating doubling multiplies
47
target/arm: Convert Neon 3-reg-diff polynomial VMULL
48
target/arm: Add 'static' and 'const' annotations to VSHLL function arrays
49
target/arm: Add missing TCG temp free in do_2shift_env_64()
50
target/arm: Convert Neon 2-reg-scalar integer multiplies to decodetree
51
target/arm: Convert Neon 2-reg-scalar float multiplies to decodetree
52
target/arm: Convert Neon 2-reg-scalar VQDMULH, VQRDMULH to decodetree
53
target/arm: Convert Neon 2-reg-scalar VQRDMLAH, VQRDMLSH to decodetree
54
target/arm: Convert Neon 2-reg-scalar long multiplies to decodetree
55
target/arm: Convert Neon VEXT to decodetree
56
target/arm: Convert Neon VTBL, VTBX to decodetree
57
target/arm: Convert Neon VDUP (scalar) to decodetree
58
35
59
fangying (1):
36
Peter Maydell (18):
60
target/arm/cpu: adjust virtual time for all KVM arm cpus
37
target/arm: Fix MVE widening/narrowing VLDR/VSTR offset calculation
38
target/arm: Fix bugs in MVE VRMLALDAVH, VRMLSLDAVH
39
target/arm: Make asimd_imm_const() public
40
target/arm: Use asimd_imm_const for A64 decode
41
target/arm: Use dup_const() instead of bitfield_replicate()
42
target/arm: Implement MVE logical immediate insns
43
target/arm: Implement MVE vector shift left by immediate insns
44
target/arm: Implement MVE vector shift right by immediate insns
45
target/arm: Implement MVE VSHLL
46
target/arm: Implement MVE VSRI, VSLI
47
target/arm: Implement MVE VSHRN, VRSHRN
48
target/arm: Implement MVE saturating narrowing shifts
49
target/arm: Implement MVE VSHLC
50
target/arm: Implement MVE VADDLV
51
target/arm: Implement MVE long shifts by immediate
52
target/arm: Implement MVE long shifts by register
53
target/arm: Implement MVE shifts by immediate
54
target/arm: Implement MVE shifts by register
61
55
62
hw/sd/sdhci-internal.h | 5 +
56
Philippe Mathieu-Daudé (1):
63
include/hw/sd/sdhci.h | 5 +
57
tests: Boot and halt a Linux guest on the Raspberry Pi 2 machine
64
target/arm/translate.h | 1 +
65
target/arm/neon-dp.decode | 130 +++++
66
hw/arm/fsl-imx25.c | 6 +
67
hw/arm/fsl-imx6.c | 6 +
68
hw/arm/fsl-imx6ul.c | 2 +
69
hw/arm/fsl-imx7.c | 2 +
70
hw/misc/imx6ul_ccm.c | 76 ++-
71
hw/net/ftgmac100.c | 26 +-
72
hw/net/imx_fec.c | 106 ++--
73
hw/sd/sdhci.c | 18 +-
74
target/arm/cpu.c | 6 +-
75
target/arm/cpu64.c | 1 -
76
target/arm/kvm.c | 21 +-
77
target/arm/translate-neon.inc.c | 1148 ++++++++++++++++++++++++++++++++++++++-
78
target/arm/translate.c | 684 +----------------------
79
hw/net/trace-events | 18 +
80
18 files changed, 1495 insertions(+), 766 deletions(-)
81
58
59
docs/system/arm/aspeed.rst | 1 +
60
docs/system/arm/nuvoton.rst | 5 +-
61
include/hw/arm/bcm2835_peripherals.h | 3 +-
62
include/hw/misc/bcm2835_powermgt.h | 29 ++
63
target/arm/helper-mve.h | 108 +++++++
64
target/arm/translate.h | 41 +++
65
target/arm/mve.decode | 177 ++++++++++-
66
target/arm/t32.decode | 71 ++++-
67
hw/arm/bcm2835_peripherals.c | 13 +-
68
hw/gpio/gpio_pwr.c | 2 +-
69
hw/misc/bcm2835_powermgt.c | 160 ++++++++++
70
target/arm/helper-a64.c | 12 +-
71
target/arm/mve_helper.c | 524 +++++++++++++++++++++++++++++++--
72
target/arm/translate-a64.c | 86 +-----
73
target/arm/translate-mve.c | 261 +++++++++++++++-
74
target/arm/translate-neon.c | 81 -----
75
target/arm/translate.c | 327 +++++++++++++++++++-
76
target/arm/vfp_helper.c | 24 +-
77
hw/misc/meson.build | 1 +
78
tests/acceptance/boot_linux_console.py | 43 +++
79
20 files changed, 1760 insertions(+), 209 deletions(-)
80
create mode 100644 include/hw/misc/bcm2835_powermgt.h
81
create mode 100644 hw/misc/bcm2835_powermgt.c
82
diff view generated by jsdifflib
1
From: Guenter Roeck <linux@roeck-us.net>
1
From: Patrick Venture <venture@google.com>
2
2
3
Set vendor property to IMX to enable IMX specific functionality
3
Adds a line-item reference to the supported quanta-q71l-bmc aspeed
4
in sdhci code.
4
entry.
5
5
6
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Patrick Venture <venture@google.com>
7
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
7
Reviewed-by: Cédric Le Goater <clg@kaod.org>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Message-id: 20210615192848.1065297-2-venture@google.com
9
Message-id: 20200603145258.195920-3-linux@roeck-us.net
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
10
---
12
hw/arm/fsl-imx25.c | 6 ++++++
11
docs/system/arm/aspeed.rst | 1 +
13
hw/arm/fsl-imx6.c | 6 ++++++
12
1 file changed, 1 insertion(+)
14
hw/arm/fsl-imx6ul.c | 2 ++
15
hw/arm/fsl-imx7.c | 2 ++
16
4 files changed, 16 insertions(+)
17
13
18
diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c
14
diff --git a/docs/system/arm/aspeed.rst b/docs/system/arm/aspeed.rst
19
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/arm/fsl-imx25.c
16
--- a/docs/system/arm/aspeed.rst
21
+++ b/hw/arm/fsl-imx25.c
17
+++ b/docs/system/arm/aspeed.rst
22
@@ -XXX,XX +XXX,XX @@ static void fsl_imx25_realize(DeviceState *dev, Error **errp)
18
@@ -XXX,XX +XXX,XX @@ etc.
23
&err);
19
AST2400 SoC based machines :
24
object_property_set_uint(OBJECT(&s->esdhc[i]), IMX25_ESDHC_CAPABILITIES,
20
25
"capareg", &err);
21
- ``palmetto-bmc`` OpenPOWER Palmetto POWER8 BMC
26
+ object_property_set_uint(OBJECT(&s->esdhc[i]), SDHCI_VENDOR_IMX,
22
+- ``quanta-q71l-bmc`` OpenBMC Quanta BMC
27
+ "vendor", &err);
23
28
+ if (err) {
24
AST2500 SoC based machines :
29
+ error_propagate(errp, err);
30
+ return;
31
+ }
32
object_property_set_bool(OBJECT(&s->esdhc[i]), true, "realized", &err);
33
if (err) {
34
error_propagate(errp, err);
35
diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/hw/arm/fsl-imx6.c
38
+++ b/hw/arm/fsl-imx6.c
39
@@ -XXX,XX +XXX,XX @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp)
40
&err);
41
object_property_set_uint(OBJECT(&s->esdhc[i]), IMX6_ESDHC_CAPABILITIES,
42
"capareg", &err);
43
+ object_property_set_uint(OBJECT(&s->esdhc[i]), SDHCI_VENDOR_IMX,
44
+ "vendor", &err);
45
+ if (err) {
46
+ error_propagate(errp, err);
47
+ return;
48
+ }
49
object_property_set_bool(OBJECT(&s->esdhc[i]), true, "realized", &err);
50
if (err) {
51
error_propagate(errp, err);
52
diff --git a/hw/arm/fsl-imx6ul.c b/hw/arm/fsl-imx6ul.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/hw/arm/fsl-imx6ul.c
55
+++ b/hw/arm/fsl-imx6ul.c
56
@@ -XXX,XX +XXX,XX @@ static void fsl_imx6ul_realize(DeviceState *dev, Error **errp)
57
FSL_IMX6UL_USDHC2_IRQ,
58
};
59
60
+ object_property_set_uint(OBJECT(&s->usdhc[i]), SDHCI_VENDOR_IMX,
61
+ "vendor", &error_abort);
62
object_property_set_bool(OBJECT(&s->usdhc[i]), true, "realized",
63
&error_abort);
64
65
diff --git a/hw/arm/fsl-imx7.c b/hw/arm/fsl-imx7.c
66
index XXXXXXX..XXXXXXX 100644
67
--- a/hw/arm/fsl-imx7.c
68
+++ b/hw/arm/fsl-imx7.c
69
@@ -XXX,XX +XXX,XX @@ static void fsl_imx7_realize(DeviceState *dev, Error **errp)
70
FSL_IMX7_USDHC3_IRQ,
71
};
72
73
+ object_property_set_uint(OBJECT(&s->usdhc[i]), SDHCI_VENDOR_IMX,
74
+ "vendor", &error_abort);
75
object_property_set_bool(OBJECT(&s->usdhc[i]), true, "realized",
76
&error_abort);
77
25
78
--
26
--
79
2.20.1
27
2.20.1
80
28
81
29
diff view generated by jsdifflib
New patch
1
From: Patrick Venture <venture@google.com>
1
2
3
Add line item reference to quanta-gbs-bmc machine.
4
5
Signed-off-by: Patrick Venture <venture@google.com>
6
Reviewed-by: Cédric Le Goater <clg@kaod.org>
7
Message-id: 20210615192848.1065297-3-venture@google.com
8
[PMM: fixed underline Sphinx warning]
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
docs/system/arm/nuvoton.rst | 5 +++--
12
1 file changed, 3 insertions(+), 2 deletions(-)
13
14
diff --git a/docs/system/arm/nuvoton.rst b/docs/system/arm/nuvoton.rst
15
index XXXXXXX..XXXXXXX 100644
16
--- a/docs/system/arm/nuvoton.rst
17
+++ b/docs/system/arm/nuvoton.rst
18
@@ -XXX,XX +XXX,XX @@
19
-Nuvoton iBMC boards (``npcm750-evb``, ``quanta-gsj``)
20
-=====================================================
21
+Nuvoton iBMC boards (``*-bmc``, ``npcm750-evb``, ``quanta-gsj``)
22
+================================================================
23
24
The `Nuvoton iBMC`_ chips (NPCM7xx) are a family of ARM-based SoCs that are
25
designed to be used as Baseboard Management Controllers (BMCs) in various
26
@@ -XXX,XX +XXX,XX @@ segment. The following machines are based on this chip :
27
The NPCM730 SoC has two Cortex-A9 cores and is targeted for Data Center and
28
Hyperscale applications. The following machines are based on this chip :
29
30
+- ``quanta-gbs-bmc`` Quanta GBS server BMC
31
- ``quanta-gsj`` Quanta GSJ server BMC
32
33
There are also two more SoCs, NPCM710 and NPCM705, which are single-core
34
--
35
2.20.1
36
37
diff view generated by jsdifflib
1
From: Jean-Christophe Dubois <jcd@tribudubois.net>
1
From: Nolan Leake <nolan@sigbus.net>
2
2
3
Signed-off-by: Jean-Christophe Dubois <jcd@tribudubois.net>
3
This is just enough to make reboot and poweroff work. Works for
4
linux, u-boot, and the arm trusted firmware. Not tested, but should
5
work for plan9, and bare-metal/hobby OSes, since they seem to generally
6
do what linux does for reset.
7
8
The watchdog timer functionality is not yet implemented.
9
10
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/64
11
Signed-off-by: Nolan Leake <nolan@sigbus.net>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
12
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
13
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
[PMD: Fixed 32-bit format string using PRIx32/PRIx64]
14
Message-id: 20210625210209.1870217-1-nolan@sigbus.net
7
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
15
[PMM: tweaked commit title; fixed region size to 0x200;
16
moved header file to include/]
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
18
---
10
hw/net/imx_fec.c | 106 +++++++++++++++++++-------------------------
19
include/hw/arm/bcm2835_peripherals.h | 3 +-
11
hw/net/trace-events | 18 ++++++++
20
include/hw/misc/bcm2835_powermgt.h | 29 +++++
12
2 files changed, 63 insertions(+), 61 deletions(-)
21
hw/arm/bcm2835_peripherals.c | 13 ++-
13
22
hw/misc/bcm2835_powermgt.c | 160 +++++++++++++++++++++++++++
14
diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c
23
hw/misc/meson.build | 1 +
24
5 files changed, 204 insertions(+), 2 deletions(-)
25
create mode 100644 include/hw/misc/bcm2835_powermgt.h
26
create mode 100644 hw/misc/bcm2835_powermgt.c
27
28
diff --git a/include/hw/arm/bcm2835_peripherals.h b/include/hw/arm/bcm2835_peripherals.h
15
index XXXXXXX..XXXXXXX 100644
29
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/net/imx_fec.c
30
--- a/include/hw/arm/bcm2835_peripherals.h
17
+++ b/hw/net/imx_fec.c
31
+++ b/include/hw/arm/bcm2835_peripherals.h
18
@@ -XXX,XX +XXX,XX @@
32
@@ -XXX,XX +XXX,XX @@
19
#include "qemu/module.h"
33
#include "hw/misc/bcm2835_mphi.h"
20
#include "net/checksum.h"
34
#include "hw/misc/bcm2835_thermal.h"
21
#include "net/eth.h"
35
#include "hw/misc/bcm2835_cprman.h"
22
+#include "trace.h"
36
+#include "hw/misc/bcm2835_powermgt.h"
23
37
#include "hw/sd/sdhci.h"
24
/* For crc32 */
38
#include "hw/sd/bcm2835_sdhost.h"
25
#include <zlib.h>
39
#include "hw/gpio/bcm2835_gpio.h"
26
40
@@ -XXX,XX +XXX,XX @@ struct BCM2835PeripheralState {
27
-#ifndef DEBUG_IMX_FEC
41
BCM2835MphiState mphi;
28
-#define DEBUG_IMX_FEC 0
42
UnimplementedDeviceState txp;
29
-#endif
43
UnimplementedDeviceState armtmr;
30
-
44
- UnimplementedDeviceState powermgt;
31
-#define FEC_PRINTF(fmt, args...) \
45
+ BCM2835PowerMgtState powermgt;
32
- do { \
46
BCM2835CprmanState cprman;
33
- if (DEBUG_IMX_FEC) { \
47
PL011State uart0;
34
- fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_FEC, \
48
BCM2835AuxState aux;
35
- __func__, ##args); \
49
diff --git a/include/hw/misc/bcm2835_powermgt.h b/include/hw/misc/bcm2835_powermgt.h
36
- } \
50
new file mode 100644
37
- } while (0)
51
index XXXXXXX..XXXXXXX
38
-
52
--- /dev/null
39
-#ifndef DEBUG_IMX_PHY
53
+++ b/include/hw/misc/bcm2835_powermgt.h
40
-#define DEBUG_IMX_PHY 0
54
@@ -XXX,XX +XXX,XX @@
41
-#endif
55
+/*
42
-
56
+ * BCM2835 Power Management emulation
43
-#define PHY_PRINTF(fmt, args...) \
57
+ *
44
- do { \
58
+ * Copyright (C) 2017 Marcin Chojnacki <marcinch7@gmail.com>
45
- if (DEBUG_IMX_PHY) { \
59
+ * Copyright (C) 2021 Nolan Leake <nolan@sigbus.net>
46
- fprintf(stderr, "[%s.phy]%s: " fmt , TYPE_IMX_FEC, \
60
+ *
47
- __func__, ##args); \
61
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
48
- } \
62
+ * See the COPYING file in the top-level directory.
49
- } while (0)
63
+ */
50
-
64
+
51
#define IMX_MAX_DESC 1024
65
+#ifndef BCM2835_POWERMGT_H
52
66
+#define BCM2835_POWERMGT_H
53
static const char *imx_default_reg_name(IMXFECState *s, uint32_t index)
67
+
54
@@ -XXX,XX +XXX,XX @@ static void imx_eth_update(IMXFECState *s);
68
+#include "hw/sysbus.h"
55
* For now we don't handle any GPIO/interrupt line, so the OS will
69
+#include "qom/object.h"
56
* have to poll for the PHY status.
70
+
57
*/
71
+#define TYPE_BCM2835_POWERMGT "bcm2835-powermgt"
58
-static void phy_update_irq(IMXFECState *s)
72
+OBJECT_DECLARE_SIMPLE_TYPE(BCM2835PowerMgtState, BCM2835_POWERMGT)
59
+static void imx_phy_update_irq(IMXFECState *s)
73
+
60
{
74
+struct BCM2835PowerMgtState {
61
imx_eth_update(s);
75
+ SysBusDevice busdev;
76
+ MemoryRegion iomem;
77
+
78
+ uint32_t rstc;
79
+ uint32_t rsts;
80
+ uint32_t wdog;
81
+};
82
+
83
+#endif
84
diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c
85
index XXXXXXX..XXXXXXX 100644
86
--- a/hw/arm/bcm2835_peripherals.c
87
+++ b/hw/arm/bcm2835_peripherals.c
88
@@ -XXX,XX +XXX,XX @@ static void bcm2835_peripherals_init(Object *obj)
89
90
object_property_add_const_link(OBJECT(&s->dwc2), "dma-mr",
91
OBJECT(&s->gpu_bus_mr));
92
+
93
+ /* Power Management */
94
+ object_initialize_child(obj, "powermgt", &s->powermgt,
95
+ TYPE_BCM2835_POWERMGT);
62
}
96
}
63
97
64
-static void phy_update_link(IMXFECState *s)
98
static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
65
+static void imx_phy_update_link(IMXFECState *s)
99
@@ -XXX,XX +XXX,XX @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
66
{
100
qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ,
67
/* Autonegotiation status mirrors link status. */
101
INTERRUPT_USB));
68
if (qemu_get_queue(s->nic)->link_down) {
102
69
- PHY_PRINTF("link is down\n");
103
+ /* Power Management */
70
+ trace_imx_phy_update_link("down");
104
+ if (!sysbus_realize(SYS_BUS_DEVICE(&s->powermgt), errp)) {
71
s->phy_status &= ~0x0024;
105
+ return;
72
s->phy_int |= PHY_INT_DOWN;
106
+ }
73
} else {
107
+
74
- PHY_PRINTF("link is up\n");
108
+ memory_region_add_subregion(&s->peri_mr, PM_OFFSET,
75
+ trace_imx_phy_update_link("up");
109
+ sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->powermgt), 0));
76
s->phy_status |= 0x0024;
110
+
77
s->phy_int |= PHY_INT_ENERGYON;
111
create_unimp(s, &s->txp, "bcm2835-txp", TXP_OFFSET, 0x1000);
78
s->phy_int |= PHY_INT_AUTONEG_COMPLETE;
112
create_unimp(s, &s->armtmr, "bcm2835-sp804", ARMCTRL_TIMER0_1_OFFSET, 0x40);
79
}
113
- create_unimp(s, &s->powermgt, "bcm2835-powermgt", PM_OFFSET, 0x114);
80
- phy_update_irq(s);
114
create_unimp(s, &s->i2s, "bcm2835-i2s", I2S_OFFSET, 0x100);
81
+ imx_phy_update_irq(s);
115
create_unimp(s, &s->smi, "bcm2835-smi", SMI_OFFSET, 0x100);
82
}
116
create_unimp(s, &s->spi[0], "bcm2835-spi0", SPI0_OFFSET, 0x20);
83
117
diff --git a/hw/misc/bcm2835_powermgt.c b/hw/misc/bcm2835_powermgt.c
84
static void imx_eth_set_link(NetClientState *nc)
118
new file mode 100644
85
{
119
index XXXXXXX..XXXXXXX
86
- phy_update_link(IMX_FEC(qemu_get_nic_opaque(nc)));
120
--- /dev/null
87
+ imx_phy_update_link(IMX_FEC(qemu_get_nic_opaque(nc)));
121
+++ b/hw/misc/bcm2835_powermgt.c
88
}
122
@@ -XXX,XX +XXX,XX @@
89
123
+/*
90
-static void phy_reset(IMXFECState *s)
124
+ * BCM2835 Power Management emulation
91
+static void imx_phy_reset(IMXFECState *s)
125
+ *
92
{
126
+ * Copyright (C) 2017 Marcin Chojnacki <marcinch7@gmail.com>
93
+ trace_imx_phy_reset();
127
+ * Copyright (C) 2021 Nolan Leake <nolan@sigbus.net>
94
+
128
+ *
95
s->phy_status = 0x7809;
129
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
96
s->phy_control = 0x3000;
130
+ * See the COPYING file in the top-level directory.
97
s->phy_advertise = 0x01e1;
131
+ */
98
s->phy_int_mask = 0;
132
+
99
s->phy_int = 0;
133
+#include "qemu/osdep.h"
100
- phy_update_link(s);
134
+#include "qemu/log.h"
101
+ imx_phy_update_link(s);
135
+#include "qemu/module.h"
102
}
136
+#include "hw/misc/bcm2835_powermgt.h"
103
137
+#include "migration/vmstate.h"
104
-static uint32_t do_phy_read(IMXFECState *s, int reg)
138
+#include "sysemu/runstate.h"
105
+static uint32_t imx_phy_read(IMXFECState *s, int reg)
139
+
106
{
140
+#define PASSWORD 0x5a000000
107
uint32_t val;
141
+#define PASSWORD_MASK 0xff000000
108
142
+
109
@@ -XXX,XX +XXX,XX @@ static uint32_t do_phy_read(IMXFECState *s, int reg)
143
+#define R_RSTC 0x1c
110
case 29: /* Interrupt source. */
144
+#define V_RSTC_RESET 0x20
111
val = s->phy_int;
145
+#define R_RSTS 0x20
112
s->phy_int = 0;
146
+#define V_RSTS_POWEROFF 0x555 /* Linux uses partition 63 to indicate halt. */
113
- phy_update_irq(s);
147
+#define R_WDOG 0x24
114
+ imx_phy_update_irq(s);
148
+
115
break;
149
+static uint64_t bcm2835_powermgt_read(void *opaque, hwaddr offset,
116
case 30: /* Interrupt mask */
150
+ unsigned size)
117
val = s->phy_int_mask;
151
+{
118
@@ -XXX,XX +XXX,XX @@ static uint32_t do_phy_read(IMXFECState *s, int reg)
152
+ BCM2835PowerMgtState *s = (BCM2835PowerMgtState *)opaque;
119
break;
153
+ uint32_t res = 0;
120
}
154
+
121
155
+ switch (offset) {
122
- PHY_PRINTF("read 0x%04x @ %d\n", val, reg);
156
+ case R_RSTC:
123
+ trace_imx_phy_read(val, reg);
157
+ res = s->rstc;
124
158
+ break;
125
return val;
159
+ case R_RSTS:
126
}
160
+ res = s->rsts;
127
161
+ break;
128
-static void do_phy_write(IMXFECState *s, int reg, uint32_t val)
162
+ case R_WDOG:
129
+static void imx_phy_write(IMXFECState *s, int reg, uint32_t val)
163
+ res = s->wdog;
130
{
164
+ break;
131
- PHY_PRINTF("write 0x%04x @ %d\n", val, reg);
165
+
132
+ trace_imx_phy_write(val, reg);
166
+ default:
133
167
+ qemu_log_mask(LOG_UNIMP,
134
if (reg > 31) {
168
+ "bcm2835_powermgt_read: Unknown offset 0x%08"HWADDR_PRIx
135
/* we only advertise one phy */
169
+ "\n", offset);
136
@@ -XXX,XX +XXX,XX @@ static void do_phy_write(IMXFECState *s, int reg, uint32_t val)
170
+ res = 0;
137
switch (reg) {
171
+ break;
138
case 0: /* Basic Control */
172
+ }
139
if (val & 0x8000) {
173
+
140
- phy_reset(s);
174
+ return res;
141
+ imx_phy_reset(s);
175
+}
142
} else {
176
+
143
s->phy_control = val & 0x7980;
177
+static void bcm2835_powermgt_write(void *opaque, hwaddr offset,
144
/* Complete autonegotiation immediately. */
178
+ uint64_t value, unsigned size)
145
@@ -XXX,XX +XXX,XX @@ static void do_phy_write(IMXFECState *s, int reg, uint32_t val)
179
+{
146
break;
180
+ BCM2835PowerMgtState *s = (BCM2835PowerMgtState *)opaque;
147
case 30: /* Interrupt mask */
181
+
148
s->phy_int_mask = val & 0xff;
182
+ if ((value & PASSWORD_MASK) != PASSWORD) {
149
- phy_update_irq(s);
183
+ qemu_log_mask(LOG_GUEST_ERROR,
150
+ imx_phy_update_irq(s);
184
+ "bcm2835_powermgt_write: Bad password 0x%"PRIx64
151
break;
185
+ " at offset 0x%08"HWADDR_PRIx"\n",
152
case 17:
186
+ value, offset);
153
case 18:
187
+ return;
154
@@ -XXX,XX +XXX,XX @@ static void do_phy_write(IMXFECState *s, int reg, uint32_t val)
188
+ }
155
static void imx_fec_read_bd(IMXFECBufDesc *bd, dma_addr_t addr)
189
+
156
{
190
+ value = value & ~PASSWORD_MASK;
157
dma_memory_read(&address_space_memory, addr, bd, sizeof(*bd));
191
+
158
+
192
+ switch (offset) {
159
+ trace_imx_fec_read_bd(addr, bd->flags, bd->length, bd->data);
193
+ case R_RSTC:
160
}
194
+ s->rstc = value;
161
195
+ if (value & V_RSTC_RESET) {
162
static void imx_fec_write_bd(IMXFECBufDesc *bd, dma_addr_t addr)
196
+ if ((s->rsts & 0xfff) == V_RSTS_POWEROFF) {
163
@@ -XXX,XX +XXX,XX @@ static void imx_fec_write_bd(IMXFECBufDesc *bd, dma_addr_t addr)
197
+ qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
164
static void imx_enet_read_bd(IMXENETBufDesc *bd, dma_addr_t addr)
198
+ } else {
165
{
199
+ qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
166
dma_memory_read(&address_space_memory, addr, bd, sizeof(*bd));
200
+ }
167
+
201
+ }
168
+ trace_imx_enet_read_bd(addr, bd->flags, bd->length, bd->data,
202
+ break;
169
+ bd->option, bd->status);
203
+ case R_RSTS:
170
}
204
+ qemu_log_mask(LOG_UNIMP,
171
205
+ "bcm2835_powermgt_write: RSTS\n");
172
static void imx_enet_write_bd(IMXENETBufDesc *bd, dma_addr_t addr)
206
+ s->rsts = value;
173
@@ -XXX,XX +XXX,XX @@ static void imx_fec_do_tx(IMXFECState *s)
207
+ break;
174
int len;
208
+ case R_WDOG:
175
209
+ qemu_log_mask(LOG_UNIMP,
176
imx_fec_read_bd(&bd, addr);
210
+ "bcm2835_powermgt_write: WDOG\n");
177
- FEC_PRINTF("tx_bd %x flags %04x len %d data %08x\n",
211
+ s->wdog = value;
178
- addr, bd.flags, bd.length, bd.data);
212
+ break;
179
if ((bd.flags & ENET_BD_R) == 0) {
213
+
180
+
214
+ default:
181
/* Run out of descriptors to transmit. */
215
+ qemu_log_mask(LOG_UNIMP,
182
- FEC_PRINTF("tx_bd ran out of descriptors to transmit\n");
216
+ "bcm2835_powermgt_write: Unknown offset 0x%08"HWADDR_PRIx
183
+ trace_imx_eth_tx_bd_busy();
217
+ "\n", offset);
184
+
218
+ break;
185
break;
219
+ }
186
}
220
+}
187
len = bd.length;
221
+
188
@@ -XXX,XX +XXX,XX @@ static void imx_enet_do_tx(IMXFECState *s, uint32_t index)
222
+static const MemoryRegionOps bcm2835_powermgt_ops = {
189
int len;
223
+ .read = bcm2835_powermgt_read,
190
224
+ .write = bcm2835_powermgt_write,
191
imx_enet_read_bd(&bd, addr);
225
+ .endianness = DEVICE_NATIVE_ENDIAN,
192
- FEC_PRINTF("tx_bd %x flags %04x len %d data %08x option %04x "
226
+ .impl.min_access_size = 4,
193
- "status %04x\n", addr, bd.flags, bd.length, bd.data,
227
+ .impl.max_access_size = 4,
194
- bd.option, bd.status);
228
+};
195
if ((bd.flags & ENET_BD_R) == 0) {
229
+
196
/* Run out of descriptors to transmit. */
230
+static const VMStateDescription vmstate_bcm2835_powermgt = {
197
+
231
+ .name = TYPE_BCM2835_POWERMGT,
198
+ trace_imx_eth_tx_bd_busy();
232
+ .version_id = 1,
199
+
233
+ .minimum_version_id = 1,
200
break;
234
+ .fields = (VMStateField[]) {
201
}
235
+ VMSTATE_UINT32(rstc, BCM2835PowerMgtState),
202
len = bd.length;
236
+ VMSTATE_UINT32(rsts, BCM2835PowerMgtState),
203
@@ -XXX,XX +XXX,XX @@ static void imx_eth_enable_rx(IMXFECState *s, bool flush)
237
+ VMSTATE_UINT32(wdog, BCM2835PowerMgtState),
204
s->regs[ENET_RDAR] = (bd.flags & ENET_BD_E) ? ENET_RDAR_RDAR : 0;
238
+ VMSTATE_END_OF_LIST()
205
239
+ }
206
if (!s->regs[ENET_RDAR]) {
240
+};
207
- FEC_PRINTF("RX buffer full\n");
241
+
208
+ trace_imx_eth_rx_bd_full();
242
+static void bcm2835_powermgt_init(Object *obj)
209
} else if (flush) {
243
+{
210
qemu_flush_queued_packets(qemu_get_queue(s->nic));
244
+ BCM2835PowerMgtState *s = BCM2835_POWERMGT(obj);
211
}
245
+
212
@@ -XXX,XX +XXX,XX @@ static void imx_eth_reset(DeviceState *d)
246
+ memory_region_init_io(&s->iomem, obj, &bcm2835_powermgt_ops, s,
213
memset(s->tx_descriptor, 0, sizeof(s->tx_descriptor));
247
+ TYPE_BCM2835_POWERMGT, 0x200);
214
248
+ sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
215
/* We also reset the PHY */
249
+}
216
- phy_reset(s);
250
+
217
+ imx_phy_reset(s);
251
+static void bcm2835_powermgt_reset(DeviceState *dev)
218
}
252
+{
219
253
+ BCM2835PowerMgtState *s = BCM2835_POWERMGT(dev);
220
static uint32_t imx_default_read(IMXFECState *s, uint32_t index)
254
+
221
@@ -XXX,XX +XXX,XX @@ static uint64_t imx_eth_read(void *opaque, hwaddr offset, unsigned size)
255
+ /* https://elinux.org/BCM2835_registers#PM */
222
break;
256
+ s->rstc = 0x00000102;
223
}
257
+ s->rsts = 0x00001000;
224
258
+ s->wdog = 0x00000000;
225
- FEC_PRINTF("reg[%s] => 0x%" PRIx32 "\n", imx_eth_reg_name(s, index),
259
+}
226
- value);
260
+
227
+ trace_imx_eth_read(index, imx_eth_reg_name(s, index), value);
261
+static void bcm2835_powermgt_class_init(ObjectClass *klass, void *data)
228
262
+{
229
return value;
263
+ DeviceClass *dc = DEVICE_CLASS(klass);
230
}
264
+
231
@@ -XXX,XX +XXX,XX @@ static void imx_eth_write(void *opaque, hwaddr offset, uint64_t value,
265
+ dc->reset = bcm2835_powermgt_reset;
232
const bool single_tx_ring = !imx_eth_is_multi_tx_ring(s);
266
+ dc->vmsd = &vmstate_bcm2835_powermgt;
233
uint32_t index = offset >> 2;
267
+}
234
268
+
235
- FEC_PRINTF("reg[%s] <= 0x%" PRIx32 "\n", imx_eth_reg_name(s, index),
269
+static TypeInfo bcm2835_powermgt_info = {
236
- (uint32_t)value);
270
+ .name = TYPE_BCM2835_POWERMGT,
237
+ trace_imx_eth_write(index, imx_eth_reg_name(s, index), value);
271
+ .parent = TYPE_SYS_BUS_DEVICE,
238
272
+ .instance_size = sizeof(BCM2835PowerMgtState),
239
switch (index) {
273
+ .class_init = bcm2835_powermgt_class_init,
240
case ENET_EIR:
274
+ .instance_init = bcm2835_powermgt_init,
241
@@ -XXX,XX +XXX,XX @@ static void imx_eth_write(void *opaque, hwaddr offset, uint64_t value,
275
+};
242
if (extract32(value, 29, 1)) {
276
+
243
/* This is a read operation */
277
+static void bcm2835_powermgt_register_types(void)
244
s->regs[ENET_MMFR] = deposit32(s->regs[ENET_MMFR], 0, 16,
278
+{
245
- do_phy_read(s,
279
+ type_register_static(&bcm2835_powermgt_info);
246
+ imx_phy_read(s,
280
+}
247
extract32(value,
281
+
248
18, 10)));
282
+type_init(bcm2835_powermgt_register_types)
249
} else {
283
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
250
/* This a write operation */
251
- do_phy_write(s, extract32(value, 18, 10), extract32(value, 0, 16));
252
+ imx_phy_write(s, extract32(value, 18, 10), extract32(value, 0, 16));
253
}
254
/* raise the interrupt as the PHY operation is done */
255
s->regs[ENET_EIR] |= ENET_INT_MII;
256
@@ -XXX,XX +XXX,XX @@ static bool imx_eth_can_receive(NetClientState *nc)
257
{
258
IMXFECState *s = IMX_FEC(qemu_get_nic_opaque(nc));
259
260
- FEC_PRINTF("\n");
261
-
262
return !!s->regs[ENET_RDAR];
263
}
264
265
@@ -XXX,XX +XXX,XX @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf,
266
unsigned int buf_len;
267
size_t size = len;
268
269
- FEC_PRINTF("len %d\n", (int)size);
270
+ trace_imx_fec_receive(size);
271
272
if (!s->regs[ENET_RDAR]) {
273
qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Unexpected packet\n",
274
@@ -XXX,XX +XXX,XX @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf,
275
bd.length = buf_len;
276
size -= buf_len;
277
278
- FEC_PRINTF("rx_bd 0x%x length %d\n", addr, bd.length);
279
+ trace_imx_fec_receive_len(addr, bd.length);
280
281
/* The last 4 bytes are the CRC. */
282
if (size < 4) {
283
@@ -XXX,XX +XXX,XX @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf,
284
if (size == 0) {
285
/* Last buffer in frame. */
286
bd.flags |= flags | ENET_BD_L;
287
- FEC_PRINTF("rx frame flags %04x\n", bd.flags);
288
+
289
+ trace_imx_fec_receive_last(bd.flags);
290
+
291
s->regs[ENET_EIR] |= ENET_INT_RXF;
292
} else {
293
s->regs[ENET_EIR] |= ENET_INT_RXB;
294
@@ -XXX,XX +XXX,XX @@ static ssize_t imx_enet_receive(NetClientState *nc, const uint8_t *buf,
295
size_t size = len;
296
bool shift16 = s->regs[ENET_RACC] & ENET_RACC_SHIFT16;
297
298
- FEC_PRINTF("len %d\n", (int)size);
299
+ trace_imx_enet_receive(size);
300
301
if (!s->regs[ENET_RDAR]) {
302
qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Unexpected packet\n",
303
@@ -XXX,XX +XXX,XX @@ static ssize_t imx_enet_receive(NetClientState *nc, const uint8_t *buf,
304
bd.length = buf_len;
305
size -= buf_len;
306
307
- FEC_PRINTF("rx_bd 0x%x length %d\n", addr, bd.length);
308
+ trace_imx_enet_receive_len(addr, bd.length);
309
310
/* The last 4 bytes are the CRC. */
311
if (size < 4) {
312
@@ -XXX,XX +XXX,XX @@ static ssize_t imx_enet_receive(NetClientState *nc, const uint8_t *buf,
313
if (size == 0) {
314
/* Last buffer in frame. */
315
bd.flags |= flags | ENET_BD_L;
316
- FEC_PRINTF("rx frame flags %04x\n", bd.flags);
317
+
318
+ trace_imx_enet_receive_last(bd.flags);
319
+
320
/* Indicate that we've updated the last buffer descriptor. */
321
bd.last_buffer = ENET_BD_BDU;
322
if (bd.option & ENET_BD_RX_INT) {
323
diff --git a/hw/net/trace-events b/hw/net/trace-events
324
index XXXXXXX..XXXXXXX 100644
284
index XXXXXXX..XXXXXXX 100644
325
--- a/hw/net/trace-events
285
--- a/hw/misc/meson.build
326
+++ b/hw/net/trace-events
286
+++ b/hw/misc/meson.build
327
@@ -XXX,XX +XXX,XX @@ i82596_receive_packet(size_t sz) "len=%zu"
287
@@ -XXX,XX +XXX,XX @@ softmmu_ss.add(when: 'CONFIG_RASPI', if_true: files(
328
i82596_new_mac(const char *id_with_mac) "New MAC for: %s"
288
'bcm2835_rng.c',
329
i82596_set_multicast(uint16_t count) "Added %d multicast entries"
289
'bcm2835_thermal.c',
330
i82596_channel_attention(void *s) "%p: Received CHANNEL ATTENTION"
290
'bcm2835_cprman.c',
331
+
291
+ 'bcm2835_powermgt.c',
332
+# imx_fec.c
292
))
333
+imx_phy_read(uint32_t val, int reg) "0x%04"PRIx32" <= reg[%d]"
293
softmmu_ss.add(when: 'CONFIG_SLAVIO', if_true: files('slavio_misc.c'))
334
+imx_phy_write(uint32_t val, int reg) "0x%04"PRIx32" => reg[%d]"
294
softmmu_ss.add(when: 'CONFIG_ZYNQ', if_true: files('zynq_slcr.c', 'zynq-xadc.c'))
335
+imx_phy_update_link(const char *s) "%s"
336
+imx_phy_reset(void) ""
337
+imx_fec_read_bd(uint64_t addr, int flags, int len, int data) "tx_bd 0x%"PRIx64" flags 0x%04x len %d data 0x%08x"
338
+imx_enet_read_bd(uint64_t addr, int flags, int len, int data, int options, int status) "tx_bd 0x%"PRIx64" flags 0x%04x len %d data 0x%08x option 0x%04x status 0x%04x"
339
+imx_eth_tx_bd_busy(void) "tx_bd ran out of descriptors to transmit"
340
+imx_eth_rx_bd_full(void) "RX buffer is full"
341
+imx_eth_read(int reg, const char *reg_name, uint32_t value) "reg[%d:%s] => 0x%08"PRIx32
342
+imx_eth_write(int reg, const char *reg_name, uint64_t value) "reg[%d:%s] <= 0x%08"PRIx64
343
+imx_fec_receive(size_t size) "len %zu"
344
+imx_fec_receive_len(uint64_t addr, int len) "rx_bd 0x%"PRIx64" length %d"
345
+imx_fec_receive_last(int last) "rx frame flags 0x%04x"
346
+imx_enet_receive(size_t size) "len %zu"
347
+imx_enet_receive_len(uint64_t addr, int len) "rx_bd 0x%"PRIx64" length %d"
348
+imx_enet_receive_last(int last) "rx frame flags 0x%04x"
349
--
295
--
350
2.20.1
296
2.20.1
351
297
352
298
diff view generated by jsdifflib
1
From: Guenter Roeck <linux@roeck-us.net>
1
From: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
2
3
The Linux kernel's IMX code now uses vendor specific commands.
3
Add a test booting and quickly shutdown a raspi2 machine,
4
This results in endless warnings when booting the Linux kernel.
4
to test the power management model:
5
5
6
sdhci-esdhc-imx 2194000.usdhc: esdhc_wait_for_card_clock_gate_off:
6
(1/1) tests/acceptance/boot_linux_console.py:BootLinuxConsole.test_arm_raspi2_initrd:
7
    card clock still not gate off in 100us!.
7
console: [ 0.000000] Booting Linux on physical CPU 0xf00
8
console: [ 0.000000] Linux version 4.14.98-v7+ (dom@dom-XPS-13-9370) (gcc version 4.9.3 (crosstool-NG crosstool-ng-1.22.0-88-g8460611)) #1200 SMP Tue Feb 12 20:27:48 GMT 2019
9
console: [ 0.000000] CPU: ARMv7 Processor [410fc075] revision 5 (ARMv7), cr=10c5387d
10
console: [ 0.000000] CPU: div instructions available: patching division code
11
console: [ 0.000000] CPU: PIPT / VIPT nonaliasing data cache, VIPT aliasing instruction cache
12
console: [ 0.000000] OF: fdt: Machine model: Raspberry Pi 2 Model B
13
...
14
console: Boot successful.
15
console: cat /proc/cpuinfo
16
console: / # cat /proc/cpuinfo
17
...
18
console: processor : 3
19
console: model name : ARMv7 Processor rev 5 (v7l)
20
console: BogoMIPS : 125.00
21
console: Features : half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva idivt vfpd32 lpae evtstrm
22
console: CPU implementer : 0x41
23
console: CPU architecture: 7
24
console: CPU variant : 0x0
25
console: CPU part : 0xc07
26
console: CPU revision : 5
27
console: Hardware : BCM2835
28
console: Revision : 0000
29
console: Serial : 0000000000000000
30
console: cat /proc/iomem
31
console: / # cat /proc/iomem
32
console: 00000000-3bffffff : System RAM
33
console: 00008000-00afffff : Kernel code
34
console: 00c00000-00d468ef : Kernel data
35
console: 3f006000-3f006fff : dwc_otg
36
console: 3f007000-3f007eff : /soc/dma@7e007000
37
console: 3f00b880-3f00b8bf : /soc/mailbox@7e00b880
38
console: 3f100000-3f100027 : /soc/watchdog@7e100000
39
console: 3f101000-3f102fff : /soc/cprman@7e101000
40
console: 3f200000-3f2000b3 : /soc/gpio@7e200000
41
PASS (24.59 s)
42
RESULTS : PASS 1 | ERROR 0 | FAIL 0 | SKIP 0 | WARN 0 | INTERRUPT 0 | CANCEL 0
43
JOB TIME : 25.02 s
8
44
9
Implement support for the vendor specific command implemented in IMX hardware
45
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
10
to be able to avoid this warning.
46
Reviewed-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
11
47
Message-id: 20210531113837.1689775-1-f4bug@amsat.org
12
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
13
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
14
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
15
Message-id: 20200603145258.195920-2-linux@roeck-us.net
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
48
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
---
49
---
18
hw/sd/sdhci-internal.h | 5 +++++
50
tests/acceptance/boot_linux_console.py | 43 ++++++++++++++++++++++++++
19
include/hw/sd/sdhci.h | 5 +++++
51
1 file changed, 43 insertions(+)
20
hw/sd/sdhci.c | 18 +++++++++++++++++-
21
3 files changed, 27 insertions(+), 1 deletion(-)
22
52
23
diff --git a/hw/sd/sdhci-internal.h b/hw/sd/sdhci-internal.h
53
diff --git a/tests/acceptance/boot_linux_console.py b/tests/acceptance/boot_linux_console.py
24
index XXXXXXX..XXXXXXX 100644
54
index XXXXXXX..XXXXXXX 100644
25
--- a/hw/sd/sdhci-internal.h
55
--- a/tests/acceptance/boot_linux_console.py
26
+++ b/hw/sd/sdhci-internal.h
56
+++ b/tests/acceptance/boot_linux_console.py
27
@@ -XXX,XX +XXX,XX @@
57
@@ -XXX,XX +XXX,XX @@
28
#define SDHC_CMD_INHIBIT 0x00000001
58
from avocado import skip
29
#define SDHC_DATA_INHIBIT 0x00000002
59
from avocado import skipUnless
30
#define SDHC_DAT_LINE_ACTIVE 0x00000004
60
from avocado_qemu import Test
31
+#define SDHC_IMX_CLOCK_GATE_OFF 0x00000080
61
+from avocado_qemu import exec_command
32
#define SDHC_DOING_WRITE 0x00000100
62
from avocado_qemu import exec_command_and_wait_for_pattern
33
#define SDHC_DOING_READ 0x00000200
63
from avocado_qemu import interrupt_interactive_console_until_pattern
34
#define SDHC_SPACE_AVAILABLE 0x00000400
64
from avocado_qemu import wait_for_console_pattern
35
@@ -XXX,XX +XXX,XX @@ extern const VMStateDescription sdhci_vmstate;
65
@@ -XXX,XX +XXX,XX @@ def test_arm_raspi2_uart0(self):
36
66
"""
37
67
self.do_test_arm_raspi2(0)
38
#define ESDHC_MIX_CTRL 0x48
68
69
+ def test_arm_raspi2_initrd(self):
70
+ """
71
+ :avocado: tags=arch:arm
72
+ :avocado: tags=machine:raspi2
73
+ """
74
+ deb_url = ('http://archive.raspberrypi.org/debian/'
75
+ 'pool/main/r/raspberrypi-firmware/'
76
+ 'raspberrypi-kernel_1.20190215-1_armhf.deb')
77
+ deb_hash = 'cd284220b32128c5084037553db3c482426f3972'
78
+ deb_path = self.fetch_asset(deb_url, asset_hash=deb_hash)
79
+ kernel_path = self.extract_from_deb(deb_path, '/boot/kernel7.img')
80
+ dtb_path = self.extract_from_deb(deb_path, '/boot/bcm2709-rpi-2-b.dtb')
39
+
81
+
40
#define ESDHC_VENDOR_SPEC 0xc0
82
+ initrd_url = ('https://github.com/groeck/linux-build-test/raw/'
41
+#define ESDHC_IMX_FRC_SDCLK_ON (1 << 8)
83
+ '2eb0a73b5d5a28df3170c546ddaaa9757e1e0848/rootfs/'
84
+ 'arm/rootfs-armv7a.cpio.gz')
85
+ initrd_hash = '604b2e45cdf35045846b8bbfbf2129b1891bdc9c'
86
+ initrd_path_gz = self.fetch_asset(initrd_url, asset_hash=initrd_hash)
87
+ initrd_path = os.path.join(self.workdir, 'rootfs.cpio')
88
+ archive.gzip_uncompress(initrd_path_gz, initrd_path)
42
+
89
+
43
#define ESDHC_DLL_CTRL 0x60
90
+ self.vm.set_console()
44
91
+ kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
45
#define ESDHC_TUNING_CTRL 0xcc
92
+ 'earlycon=pl011,0x3f201000 console=ttyAMA0 '
46
@@ -XXX,XX +XXX,XX @@ extern const VMStateDescription sdhci_vmstate;
93
+ 'panic=-1 noreboot ' +
47
#define DEFINE_SDHCI_COMMON_PROPERTIES(_state) \
94
+ 'dwc_otg.fiq_fsm_enable=0')
48
DEFINE_PROP_UINT8("sd-spec-version", _state, sd_spec_version, 2), \
95
+ self.vm.add_args('-kernel', kernel_path,
49
DEFINE_PROP_UINT8("uhs", _state, uhs_mode, UHS_NOT_SUPPORTED), \
96
+ '-dtb', dtb_path,
50
+ DEFINE_PROP_UINT8("vendor", _state, vendor, SDHCI_VENDOR_NONE), \
97
+ '-initrd', initrd_path,
51
\
98
+ '-append', kernel_command_line,
52
/* Capabilities registers provide information on supported
99
+ '-no-reboot')
53
* features of this specific host controller implementation */ \
100
+ self.vm.launch()
54
diff --git a/include/hw/sd/sdhci.h b/include/hw/sd/sdhci.h
101
+ self.wait_for_console_pattern('Boot successful.')
55
index XXXXXXX..XXXXXXX 100644
56
--- a/include/hw/sd/sdhci.h
57
+++ b/include/hw/sd/sdhci.h
58
@@ -XXX,XX +XXX,XX @@ typedef struct SDHCIState {
59
uint16_t acmd12errsts; /* Auto CMD12 error status register */
60
uint16_t hostctl2; /* Host Control 2 */
61
uint64_t admasysaddr; /* ADMA System Address Register */
62
+ uint16_t vendor_spec; /* Vendor specific register */
63
64
/* Read-only registers */
65
uint64_t capareg; /* Capabilities Register */
66
@@ -XXX,XX +XXX,XX @@ typedef struct SDHCIState {
67
uint32_t quirks;
68
uint8_t sd_spec_version;
69
uint8_t uhs_mode;
70
+ uint8_t vendor; /* For vendor specific functionality */
71
} SDHCIState;
72
73
+#define SDHCI_VENDOR_NONE 0
74
+#define SDHCI_VENDOR_IMX 1
75
+
102
+
76
/*
103
+ exec_command_and_wait_for_pattern(self, 'cat /proc/cpuinfo',
77
* Controller does not provide transfer-complete interrupt when not
104
+ 'BCM2835')
78
* busy.
105
+ exec_command_and_wait_for_pattern(self, 'cat /proc/iomem',
79
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
106
+ '/soc/cprman@7e101000')
80
index XXXXXXX..XXXXXXX 100644
107
+ exec_command(self, 'halt')
81
--- a/hw/sd/sdhci.c
108
+ # Wait for VM to shut down gracefully
82
+++ b/hw/sd/sdhci.c
109
+ self.vm.wait()
83
@@ -XXX,XX +XXX,XX @@ static uint64_t usdhc_read(void *opaque, hwaddr offset, unsigned size)
84
}
85
break;
86
87
+ case ESDHC_VENDOR_SPEC:
88
+ ret = s->vendor_spec;
89
+ break;
90
case ESDHC_DLL_CTRL:
91
case ESDHC_TUNE_CTRL_STATUS:
92
case ESDHC_UNDOCUMENTED_REG27:
93
case ESDHC_TUNING_CTRL:
94
- case ESDHC_VENDOR_SPEC:
95
case ESDHC_MIX_CTRL:
96
case ESDHC_WTMK_LVL:
97
ret = 0;
98
@@ -XXX,XX +XXX,XX @@ usdhc_write(void *opaque, hwaddr offset, uint64_t val, unsigned size)
99
case ESDHC_UNDOCUMENTED_REG27:
100
case ESDHC_TUNING_CTRL:
101
case ESDHC_WTMK_LVL:
102
+ break;
103
+
110
+
104
case ESDHC_VENDOR_SPEC:
111
def test_arm_exynos4210_initrd(self):
105
+ s->vendor_spec = value;
112
"""
106
+ switch (s->vendor) {
113
:avocado: tags=arch:arm
107
+ case SDHCI_VENDOR_IMX:
108
+ if (value & ESDHC_IMX_FRC_SDCLK_ON) {
109
+ s->prnsts &= ~SDHC_IMX_CLOCK_GATE_OFF;
110
+ } else {
111
+ s->prnsts |= SDHC_IMX_CLOCK_GATE_OFF;
112
+ }
113
+ break;
114
+ default:
115
+ break;
116
+ }
117
break;
118
119
case SDHC_HOSTCTL:
120
--
114
--
121
2.20.1
115
2.20.1
122
116
123
117
diff view generated by jsdifflib
1
From: Jean-Christophe Dubois <jcd@tribudubois.net>
1
From: Joe Komlodi <joe.komlodi@xilinx.com>
2
2
3
Some bits of the CCM registers are non writable.
3
If the CPU is running in default NaN mode (FPCR.DN == 1) and we execute
4
FRSQRTE, FRECPE, or FRECPX with a signaling NaN, parts_silence_nan_frac() will
5
assert due to fpst->default_nan_mode being set.
4
6
5
This was left undone in the initial commit (all bits of registers were
7
To avoid this, we check to see what NaN mode we're running in before we call
6
writable).
8
floatxx_silence_nan().
7
9
8
This patch adds the required code to protect the non writable bits.
10
Signed-off-by: Joe Komlodi <joe.komlodi@xilinx.com>
9
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Jean-Christophe Dubois <jcd@tribudubois.net>
12
Message-id: 1624662174-175828-2-git-send-email-joe.komlodi@xilinx.com
11
Message-id: 20200608133508.550046-1-jcd@tribudubois.net
12
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
13
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
---
15
---
15
hw/misc/imx6ul_ccm.c | 76 ++++++++++++++++++++++++++++++++++++--------
16
target/arm/helper-a64.c | 12 +++++++++---
16
1 file changed, 63 insertions(+), 13 deletions(-)
17
target/arm/vfp_helper.c | 24 ++++++++++++++++++------
18
2 files changed, 27 insertions(+), 9 deletions(-)
17
19
18
diff --git a/hw/misc/imx6ul_ccm.c b/hw/misc/imx6ul_ccm.c
20
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
19
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/misc/imx6ul_ccm.c
22
--- a/target/arm/helper-a64.c
21
+++ b/hw/misc/imx6ul_ccm.c
23
+++ b/target/arm/helper-a64.c
22
@@ -XXX,XX +XXX,XX @@
24
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp)
23
25
float16 nan = a;
24
#include "trace.h"
26
if (float16_is_signaling_nan(a, fpst)) {
25
27
float_raise(float_flag_invalid, fpst);
26
+static const uint32_t ccm_mask[CCM_MAX] = {
28
- nan = float16_silence_nan(a, fpst);
27
+ [CCM_CCR] = 0xf01fef80,
29
+ if (!fpst->default_nan_mode) {
28
+ [CCM_CCDR] = 0xfffeffff,
30
+ nan = float16_silence_nan(a, fpst);
29
+ [CCM_CSR] = 0xffffffff,
31
+ }
30
+ [CCM_CCSR] = 0xfffffef2,
32
}
31
+ [CCM_CACRR] = 0xfffffff8,
33
if (fpst->default_nan_mode) {
32
+ [CCM_CBCDR] = 0xc1f8e000,
34
nan = float16_default_nan(fpst);
33
+ [CCM_CBCMR] = 0xfc03cfff,
35
@@ -XXX,XX +XXX,XX @@ float32 HELPER(frecpx_f32)(float32 a, void *fpstp)
34
+ [CCM_CSCMR1] = 0x80700000,
36
float32 nan = a;
35
+ [CCM_CSCMR2] = 0xe01ff003,
37
if (float32_is_signaling_nan(a, fpst)) {
36
+ [CCM_CSCDR1] = 0xfe00c780,
38
float_raise(float_flag_invalid, fpst);
37
+ [CCM_CS1CDR] = 0xfe00fe00,
39
- nan = float32_silence_nan(a, fpst);
38
+ [CCM_CS2CDR] = 0xf8007000,
40
+ if (!fpst->default_nan_mode) {
39
+ [CCM_CDCDR] = 0xf00fffff,
41
+ nan = float32_silence_nan(a, fpst);
40
+ [CCM_CHSCCDR] = 0xfffc01ff,
42
+ }
41
+ [CCM_CSCDR2] = 0xfe0001ff,
43
}
42
+ [CCM_CSCDR3] = 0xffffc1ff,
44
if (fpst->default_nan_mode) {
43
+ [CCM_CDHIPR] = 0xffffffff,
45
nan = float32_default_nan(fpst);
44
+ [CCM_CTOR] = 0x00000000,
46
@@ -XXX,XX +XXX,XX @@ float64 HELPER(frecpx_f64)(float64 a, void *fpstp)
45
+ [CCM_CLPCR] = 0xf39ff01c,
47
float64 nan = a;
46
+ [CCM_CISR] = 0xfb85ffbe,
48
if (float64_is_signaling_nan(a, fpst)) {
47
+ [CCM_CIMR] = 0xfb85ffbf,
49
float_raise(float_flag_invalid, fpst);
48
+ [CCM_CCOSR] = 0xfe00fe00,
50
- nan = float64_silence_nan(a, fpst);
49
+ [CCM_CGPR] = 0xfffc3fea,
51
+ if (!fpst->default_nan_mode) {
50
+ [CCM_CCGR0] = 0x00000000,
52
+ nan = float64_silence_nan(a, fpst);
51
+ [CCM_CCGR1] = 0x00000000,
53
+ }
52
+ [CCM_CCGR2] = 0x00000000,
54
}
53
+ [CCM_CCGR3] = 0x00000000,
55
if (fpst->default_nan_mode) {
54
+ [CCM_CCGR4] = 0x00000000,
56
nan = float64_default_nan(fpst);
55
+ [CCM_CCGR5] = 0x00000000,
57
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
56
+ [CCM_CCGR6] = 0x00000000,
58
index XXXXXXX..XXXXXXX 100644
57
+ [CCM_CMEOR] = 0xafffff1f,
59
--- a/target/arm/vfp_helper.c
58
+};
60
+++ b/target/arm/vfp_helper.c
59
+
61
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp)
60
+static const uint32_t analog_mask[CCM_ANALOG_MAX] = {
62
float16 nan = f16;
61
+ [CCM_ANALOG_PLL_ARM] = 0xfff60f80,
63
if (float16_is_signaling_nan(f16, fpst)) {
62
+ [CCM_ANALOG_PLL_USB1] = 0xfffe0fbc,
64
float_raise(float_flag_invalid, fpst);
63
+ [CCM_ANALOG_PLL_USB2] = 0xfffe0fbc,
65
- nan = float16_silence_nan(f16, fpst);
64
+ [CCM_ANALOG_PLL_SYS] = 0xfffa0ffe,
66
+ if (!fpst->default_nan_mode) {
65
+ [CCM_ANALOG_PLL_SYS_SS] = 0x00000000,
67
+ nan = float16_silence_nan(f16, fpst);
66
+ [CCM_ANALOG_PLL_SYS_NUM] = 0xc0000000,
68
+ }
67
+ [CCM_ANALOG_PLL_SYS_DENOM] = 0xc0000000,
69
}
68
+ [CCM_ANALOG_PLL_AUDIO] = 0xffe20f80,
70
if (fpst->default_nan_mode) {
69
+ [CCM_ANALOG_PLL_AUDIO_NUM] = 0xc0000000,
71
nan = float16_default_nan(fpst);
70
+ [CCM_ANALOG_PLL_AUDIO_DENOM] = 0xc0000000,
72
@@ -XXX,XX +XXX,XX @@ float32 HELPER(recpe_f32)(float32 input, void *fpstp)
71
+ [CCM_ANALOG_PLL_VIDEO] = 0xffe20f80,
73
float32 nan = f32;
72
+ [CCM_ANALOG_PLL_VIDEO_NUM] = 0xc0000000,
74
if (float32_is_signaling_nan(f32, fpst)) {
73
+ [CCM_ANALOG_PLL_VIDEO_DENOM] = 0xc0000000,
75
float_raise(float_flag_invalid, fpst);
74
+ [CCM_ANALOG_PLL_ENET] = 0xffc20ff0,
76
- nan = float32_silence_nan(f32, fpst);
75
+ [CCM_ANALOG_PFD_480] = 0x40404040,
77
+ if (!fpst->default_nan_mode) {
76
+ [CCM_ANALOG_PFD_528] = 0x40404040,
78
+ nan = float32_silence_nan(f32, fpst);
77
+ [PMU_MISC0] = 0x01fe8306,
79
+ }
78
+ [PMU_MISC1] = 0x07fcede0,
80
}
79
+ [PMU_MISC2] = 0x005f5f5f,
81
if (fpst->default_nan_mode) {
80
+};
82
nan = float32_default_nan(fpst);
81
+
83
@@ -XXX,XX +XXX,XX @@ float64 HELPER(recpe_f64)(float64 input, void *fpstp)
82
static const char *imx6ul_ccm_reg_name(uint32_t reg)
84
float64 nan = f64;
83
{
85
if (float64_is_signaling_nan(f64, fpst)) {
84
static char unknown[20];
86
float_raise(float_flag_invalid, fpst);
85
@@ -XXX,XX +XXX,XX @@ static void imx6ul_ccm_write(void *opaque, hwaddr offset, uint64_t value,
87
- nan = float64_silence_nan(f64, fpst);
86
88
+ if (!fpst->default_nan_mode) {
87
trace_ccm_write_reg(imx6ul_ccm_reg_name(index), (uint32_t)value);
89
+ nan = float64_silence_nan(f64, fpst);
88
90
+ }
89
- /*
91
}
90
- * We will do a better implementation later. In particular some bits
92
if (fpst->default_nan_mode) {
91
- * cannot be written to.
93
nan = float64_default_nan(fpst);
92
- */
94
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp)
93
- s->ccm[index] = (uint32_t)value;
95
float16 nan = f16;
94
+ s->ccm[index] = (s->ccm[index] & ccm_mask[index]) |
96
if (float16_is_signaling_nan(f16, s)) {
95
+ ((uint32_t)value & ~ccm_mask[index]);
97
float_raise(float_flag_invalid, s);
96
}
98
- nan = float16_silence_nan(f16, s);
97
99
+ if (!s->default_nan_mode) {
98
static uint64_t imx6ul_analog_read(void *opaque, hwaddr offset, unsigned size)
100
+ nan = float16_silence_nan(f16, fpstp);
99
@@ -XXX,XX +XXX,XX @@ static void imx6ul_analog_write(void *opaque, hwaddr offset, uint64_t value,
101
+ }
100
* the REG_NAME register. So we change the value of the
102
}
101
* REG_NAME register, setting bits passed in the value.
103
if (s->default_nan_mode) {
102
*/
104
nan = float16_default_nan(s);
103
- s->analog[index - 1] |= value;
105
@@ -XXX,XX +XXX,XX @@ float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
104
+ s->analog[index - 1] |= (value & ~analog_mask[index - 1]);
106
float32 nan = f32;
105
break;
107
if (float32_is_signaling_nan(f32, s)) {
106
case CCM_ANALOG_PLL_ARM_CLR:
108
float_raise(float_flag_invalid, s);
107
case CCM_ANALOG_PLL_USB1_CLR:
109
- nan = float32_silence_nan(f32, s);
108
@@ -XXX,XX +XXX,XX @@ static void imx6ul_analog_write(void *opaque, hwaddr offset, uint64_t value,
110
+ if (!s->default_nan_mode) {
109
* the REG_NAME register. So we change the value of the
111
+ nan = float32_silence_nan(f32, fpstp);
110
* REG_NAME register, unsetting bits passed in the value.
112
+ }
111
*/
113
}
112
- s->analog[index - 2] &= ~value;
114
if (s->default_nan_mode) {
113
+ s->analog[index - 2] &= ~(value & ~analog_mask[index - 2]);
115
nan = float32_default_nan(s);
114
break;
116
@@ -XXX,XX +XXX,XX @@ float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
115
case CCM_ANALOG_PLL_ARM_TOG:
117
float64 nan = f64;
116
case CCM_ANALOG_PLL_USB1_TOG:
118
if (float64_is_signaling_nan(f64, s)) {
117
@@ -XXX,XX +XXX,XX @@ static void imx6ul_analog_write(void *opaque, hwaddr offset, uint64_t value,
119
float_raise(float_flag_invalid, s);
118
* the REG_NAME register. So we change the value of the
120
- nan = float64_silence_nan(f64, s);
119
* REG_NAME register, toggling bits passed in the value.
121
+ if (!s->default_nan_mode) {
120
*/
122
+ nan = float64_silence_nan(f64, fpstp);
121
- s->analog[index - 3] ^= value;
123
+ }
122
+ s->analog[index - 3] ^= (value & ~analog_mask[index - 3]);
124
}
123
break;
125
if (s->default_nan_mode) {
124
default:
126
nan = float64_default_nan(s);
125
- /*
126
- * We will do a better implementation later. In particular some bits
127
- * cannot be written to.
128
- */
129
- s->analog[index] = value;
130
+ s->analog[index] = (s->analog[index] & analog_mask[index]) |
131
+ (value & ~analog_mask[index]);
132
break;
133
}
134
}
135
--
127
--
136
2.20.1
128
2.20.1
137
129
138
130
diff view generated by jsdifflib
1
From: fangying <fangying1@huawei.com>
1
From: Maxim Uvarov <maxim.uvarov@linaro.org>
2
2
3
Virtual time adjustment was implemented for virt-5.0 machine type,
3
qemu has 2 type of functions: shutdown and reboot. Shutdown
4
but the cpu property was enabled only for host-passthrough and max
4
function has to be used for machine shutdown. Otherwise we cause
5
cpu model. Let's add it for any KVM arm cpu which has the generic
5
a reset with a bogus "cause" value, when we intended a shutdown.
6
timer feature enabled.
7
6
8
Signed-off-by: Ying Fang <fangying1@huawei.com>
7
Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
9
Reviewed-by: Andrew Jones <drjones@redhat.com>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Message-id: 20200608121243.2076-1-fangying1@huawei.com
9
Message-id: 20210625111842.3790-3-maxim.uvarov@linaro.org
11
[PMM: minor commit message tweak, removed inaccurate
10
[PMM: tweaked commit message]
12
suggested-by tag]
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
---
12
---
15
target/arm/cpu.c | 6 ++++--
13
hw/gpio/gpio_pwr.c | 2 +-
16
target/arm/cpu64.c | 1 -
14
1 file changed, 1 insertion(+), 1 deletion(-)
17
target/arm/kvm.c | 21 +++++++++++----------
18
3 files changed, 15 insertions(+), 13 deletions(-)
19
15
20
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
16
diff --git a/hw/gpio/gpio_pwr.c b/hw/gpio/gpio_pwr.c
21
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
22
--- a/target/arm/cpu.c
18
--- a/hw/gpio/gpio_pwr.c
23
+++ b/target/arm/cpu.c
19
+++ b/hw/gpio/gpio_pwr.c
24
@@ -XXX,XX +XXX,XX @@ void arm_cpu_post_init(Object *obj)
20
@@ -XXX,XX +XXX,XX @@ static void gpio_pwr_reset(void *opaque, int n, int level)
25
if (arm_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER)) {
21
static void gpio_pwr_shutdown(void *opaque, int n, int level)
26
qdev_property_add_static(DEVICE(cpu), &arm_cpu_gt_cntfrq_property);
22
{
23
if (level) {
24
- qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
25
+ qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
27
}
26
}
28
+
29
+ if (kvm_enabled()) {
30
+ kvm_arm_add_vcpu_properties(obj);
31
+ }
32
}
27
}
33
28
34
static void arm_cpu_finalizefn(Object *obj)
35
@@ -XXX,XX +XXX,XX @@ static void arm_max_initfn(Object *obj)
36
37
if (kvm_enabled()) {
38
kvm_arm_set_cpu_features_from_host(cpu);
39
- kvm_arm_add_vcpu_properties(obj);
40
} else {
41
cortex_a15_initfn(obj);
42
43
@@ -XXX,XX +XXX,XX @@ static void arm_host_initfn(Object *obj)
44
if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
45
aarch64_add_sve_properties(obj);
46
}
47
- kvm_arm_add_vcpu_properties(obj);
48
arm_cpu_post_init(obj);
49
}
50
51
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/target/arm/cpu64.c
54
+++ b/target/arm/cpu64.c
55
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
56
57
if (kvm_enabled()) {
58
kvm_arm_set_cpu_features_from_host(cpu);
59
- kvm_arm_add_vcpu_properties(obj);
60
} else {
61
uint64_t t;
62
uint32_t u;
63
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
64
index XXXXXXX..XXXXXXX 100644
65
--- a/target/arm/kvm.c
66
+++ b/target/arm/kvm.c
67
@@ -XXX,XX +XXX,XX @@ static void kvm_no_adjvtime_set(Object *obj, bool value, Error **errp)
68
/* KVM VCPU properties should be prefixed with "kvm-". */
69
void kvm_arm_add_vcpu_properties(Object *obj)
70
{
71
- if (!kvm_enabled()) {
72
- return;
73
- }
74
+ ARMCPU *cpu = ARM_CPU(obj);
75
+ CPUARMState *env = &cpu->env;
76
77
- ARM_CPU(obj)->kvm_adjvtime = true;
78
- object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get,
79
- kvm_no_adjvtime_set);
80
- object_property_set_description(obj, "kvm-no-adjvtime",
81
- "Set on to disable the adjustment of "
82
- "the virtual counter. VM stopped time "
83
- "will be counted.");
84
+ if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) {
85
+ cpu->kvm_adjvtime = true;
86
+ object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get,
87
+ kvm_no_adjvtime_set);
88
+ object_property_set_description(obj, "kvm-no-adjvtime",
89
+ "Set on to disable the adjustment of "
90
+ "the virtual counter. VM stopped time "
91
+ "will be counted.");
92
+ }
93
}
94
95
bool kvm_arm_pmu_supported(CPUState *cpu)
96
--
29
--
97
2.20.1
30
2.20.1
98
31
99
32
diff view generated by jsdifflib
1
In commit 37bfce81b10450071 we accidentally introduced a leak of a TCG
1
In do_ldst(), the calculation of the offset needs to be based on the
2
temporary in do_2shift_env_64(); free it.
2
size of the memory access, not the size of the elements in the
3
vector. This meant we were getting it wrong for the widening and
4
narrowing variants of the various VLDR and VSTR insns.
3
5
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210628135835.6690-2-peter.maydell@linaro.org
6
---
9
---
7
target/arm/translate-neon.inc.c | 1 +
10
target/arm/translate-mve.c | 17 +++++++++--------
8
1 file changed, 1 insertion(+)
11
1 file changed, 9 insertions(+), 8 deletions(-)
9
12
10
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
13
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
11
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
12
--- a/target/arm/translate-neon.inc.c
15
--- a/target/arm/translate-mve.c
13
+++ b/target/arm/translate-neon.inc.c
16
+++ b/target/arm/translate-mve.c
14
@@ -XXX,XX +XXX,XX @@ static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
17
@@ -XXX,XX +XXX,XX @@ static bool mve_skip_first_beat(DisasContext *s)
15
neon_load_reg64(tmp, a->vm + pass);
16
fn(tmp, cpu_env, tmp, constimm);
17
neon_store_reg64(tmp, a->vd + pass);
18
+ tcg_temp_free_i64(tmp);
19
}
18
}
20
tcg_temp_free_i64(constimm);
19
}
21
return true;
20
21
-static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn)
22
+static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn,
23
+ unsigned msize)
24
{
25
TCGv_i32 addr;
26
uint32_t offset;
27
@@ -XXX,XX +XXX,XX @@ static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn)
28
return true;
29
}
30
31
- offset = a->imm << a->size;
32
+ offset = a->imm << msize;
33
if (!a->a) {
34
offset = -offset;
35
}
36
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
37
{ gen_helper_mve_vstrw, gen_helper_mve_vldrw },
38
{ NULL, NULL }
39
};
40
- return do_ldst(s, a, ldstfns[a->size][a->l]);
41
+ return do_ldst(s, a, ldstfns[a->size][a->l], a->size);
42
}
43
44
-#define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST) \
45
+#define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \
46
static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \
47
{ \
48
static MVEGenLdStFn * const ldstfns[2][2] = { \
49
{ gen_helper_mve_##ST, gen_helper_mve_##SLD }, \
50
{ NULL, gen_helper_mve_##ULD }, \
51
}; \
52
- return do_ldst(s, a, ldstfns[a->u][a->l]); \
53
+ return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \
54
}
55
56
-DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h)
57
-DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w)
58
-DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w)
59
+DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8)
60
+DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8)
61
+DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16)
62
63
static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
64
{
22
--
65
--
23
2.20.1
66
2.20.1
24
67
25
68
diff view generated by jsdifflib
1
From: Erik Smit <erik.lucas.smit@gmail.com>
1
The initial implementation of the MVE VRMLALDAVH and VRMLSLDAVH
2
insns had some bugs:
3
* the 32x32 multiply of elements was being done as 32x32->32,
4
not 32x32->64
5
* we were incorrectly maintaining the accumulator in its full
6
72-bit form across all 4 beats of the insn; in the pseudocode
7
it is squashed back into the 64 bits of the RdaHi:RdaLo
8
registers after each beat
2
9
3
The hardware supports configurable descriptor sizes, configured in the DBLAC
10
In particular, fixing the second of these allows us to recast
4
register.
11
the implementation to avoid 128-bit arithmetic entirely.
5
12
6
Most drivers use the default 4 word descriptor, which is currently hardcoded,
13
Since the element size here is always 4, we can also drop the
7
but Aspeed SDK configures 8 words to store extra data.
14
parameterization of ESIZE to make the code a little more readable.
8
15
9
Signed-off-by: Erik Smit <erik.lucas.smit@gmail.com>
16
Suggested-by: Richard Henderson <richard.henderson@linaro.org>
10
Reviewed-by: Cédric Le Goater <clg@kaod.org>
11
[PMM: removed unnecessary parens]
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
19
Message-id: 20210628135835.6690-3-peter.maydell@linaro.org
13
---
20
---
14
hw/net/ftgmac100.c | 26 ++++++++++++++++++++++++--
21
target/arm/mve_helper.c | 38 +++++++++++++++++++++-----------------
15
1 file changed, 24 insertions(+), 2 deletions(-)
22
1 file changed, 21 insertions(+), 17 deletions(-)
16
23
17
diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c
24
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
18
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/net/ftgmac100.c
26
--- a/target/arm/mve_helper.c
20
+++ b/hw/net/ftgmac100.c
27
+++ b/target/arm/mve_helper.c
21
@@ -XXX,XX +XXX,XX @@
28
@@ -XXX,XX +XXX,XX @@
22
#define FTGMAC100_APTC_TXPOLL_CNT(x) (((x) >> 8) & 0xf)
29
*/
23
#define FTGMAC100_APTC_TXPOLL_TIME_SEL (1 << 12)
30
24
31
#include "qemu/osdep.h"
25
+/*
32
-#include "qemu/int128.h"
26
+ * DMA burst length and arbitration control register
33
#include "cpu.h"
27
+ */
34
#include "internals.h"
28
+#define FTGMAC100_DBLAC_RXBURST_SIZE(x) (((x) >> 8) & 0x3)
35
#include "vec_internal.h"
29
+#define FTGMAC100_DBLAC_TXBURST_SIZE(x) (((x) >> 10) & 0x3)
36
@@ -XXX,XX +XXX,XX @@ DO_LDAV(vmlsldavsw, 4, int32_t, false, +=, -=)
30
+#define FTGMAC100_DBLAC_RXDES_SIZE(x) ((((x) >> 12) & 0xf) * 8)
37
DO_LDAV(vmlsldavxsw, 4, int32_t, true, +=, -=)
31
+#define FTGMAC100_DBLAC_TXDES_SIZE(x) ((((x) >> 16) & 0xf) * 8)
38
32
+#define FTGMAC100_DBLAC_IFG_CNT(x) (((x) >> 20) & 0x7)
33
+#define FTGMAC100_DBLAC_IFG_INC (1 << 23)
34
+
35
/*
39
/*
36
* PHY control register
40
- * Rounding multiply add long dual accumulate high: we must keep
41
- * a 72-bit internal accumulator value and return the top 64 bits.
42
+ * Rounding multiply add long dual accumulate high. In the pseudocode
43
+ * this is implemented with a 72-bit internal accumulator value of which
44
+ * the top 64 bits are returned. We optimize this to avoid having to
45
+ * use 128-bit arithmetic -- we can do this because the 74-bit accumulator
46
+ * is squashed back into 64-bits after each beat.
37
*/
47
*/
38
@@ -XXX,XX +XXX,XX @@ static void ftgmac100_do_tx(FTGMAC100State *s, uint32_t tx_ring,
48
-#define DO_LDAVH(OP, ESIZE, TYPE, XCHG, EVENACC, ODDACC, TO128) \
39
if (bd.des0 & s->txdes0_edotr) {
49
+#define DO_LDAVH(OP, TYPE, LTYPE, XCHG, SUB) \
40
addr = tx_ring;
50
uint64_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vn, \
41
} else {
51
void *vm, uint64_t a) \
42
- addr += sizeof(FTGMAC100Desc);
52
{ \
43
+ addr += FTGMAC100_DBLAC_TXDES_SIZE(s->dblac);
53
uint16_t mask = mve_element_mask(env); \
44
}
54
unsigned e; \
55
TYPE *n = vn, *m = vm; \
56
- Int128 acc = int128_lshift(TO128(a), 8); \
57
- for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
58
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) { \
59
if (mask & 1) { \
60
+ LTYPE mul; \
61
if (e & 1) { \
62
- acc = ODDACC(acc, TO128(n[H##ESIZE(e - 1 * XCHG)] * \
63
- m[H##ESIZE(e)])); \
64
+ mul = (LTYPE)n[H4(e - 1 * XCHG)] * m[H4(e)]; \
65
+ if (SUB) { \
66
+ mul = -mul; \
67
+ } \
68
} else { \
69
- acc = EVENACC(acc, TO128(n[H##ESIZE(e + 1 * XCHG)] * \
70
- m[H##ESIZE(e)])); \
71
+ mul = (LTYPE)n[H4(e + 1 * XCHG)] * m[H4(e)]; \
72
} \
73
- acc = int128_add(acc, int128_make64(1 << 7)); \
74
+ mul = (mul >> 8) + ((mul >> 7) & 1); \
75
+ a += mul; \
76
} \
77
} \
78
mve_advance_vpt(env); \
79
- return int128_getlo(int128_rshift(acc, 8)); \
80
+ return a; \
45
}
81
}
46
82
47
@@ -XXX,XX +XXX,XX @@ static void ftgmac100_write(void *opaque, hwaddr addr,
83
-DO_LDAVH(vrmlaldavhsw, 4, int32_t, false, int128_add, int128_add, int128_makes64)
48
s->phydata = value & 0xffff;
84
-DO_LDAVH(vrmlaldavhxsw, 4, int32_t, true, int128_add, int128_add, int128_makes64)
49
break;
85
+DO_LDAVH(vrmlaldavhsw, int32_t, int64_t, false, false)
50
case FTGMAC100_DBLAC: /* DMA Burst Length and Arbitration Control */
86
+DO_LDAVH(vrmlaldavhxsw, int32_t, int64_t, true, false)
51
+ if (FTGMAC100_DBLAC_TXDES_SIZE(s->dblac) < sizeof(FTGMAC100Desc)) {
87
52
+ qemu_log_mask(LOG_GUEST_ERROR,
88
-DO_LDAVH(vrmlaldavhuw, 4, uint32_t, false, int128_add, int128_add, int128_make64)
53
+ "%s: transmit descriptor too small : %d bytes\n",
89
+DO_LDAVH(vrmlaldavhuw, uint32_t, uint64_t, false, false)
54
+ __func__, FTGMAC100_DBLAC_TXDES_SIZE(s->dblac));
90
55
+ break;
91
-DO_LDAVH(vrmlsldavhsw, 4, int32_t, false, int128_add, int128_sub, int128_makes64)
56
+ }
92
-DO_LDAVH(vrmlsldavhxsw, 4, int32_t, true, int128_add, int128_sub, int128_makes64)
57
+ if (FTGMAC100_DBLAC_RXDES_SIZE(s->dblac) < sizeof(FTGMAC100Desc)) {
93
+DO_LDAVH(vrmlsldavhsw, int32_t, int64_t, false, true)
58
+ qemu_log_mask(LOG_GUEST_ERROR,
94
+DO_LDAVH(vrmlsldavhxsw, int32_t, int64_t, true, true)
59
+ "%s: receive descriptor too small : %d bytes\n",
95
60
+ __func__, FTGMAC100_DBLAC_RXDES_SIZE(s->dblac));
96
/* Vector add across vector */
61
+ break;
97
#define DO_VADDV(OP, ESIZE, TYPE) \
62
+ }
63
s->dblac = value;
64
break;
65
case FTGMAC100_REVR: /* Feature Register */
66
@@ -XXX,XX +XXX,XX @@ static ssize_t ftgmac100_receive(NetClientState *nc, const uint8_t *buf,
67
if (bd.des0 & s->rxdes0_edorr) {
68
addr = s->rx_ring;
69
} else {
70
- addr += sizeof(FTGMAC100Desc);
71
+ addr += FTGMAC100_DBLAC_RXDES_SIZE(s->dblac);
72
}
73
}
74
s->rx_descriptor = addr;
75
--
98
--
76
2.20.1
99
2.20.1
77
100
78
101
diff view generated by jsdifflib
1
Convert the VQDMULH and VQRDMULH insns in the 2-reg-scalar group
1
The function asimd_imm_const() in translate-neon.c is an
2
to decodetree.
2
implementation of the pseudocode AdvSIMDExpandImm(), which we will
3
also want for MVE. Move the implementation to translate.c, with a
4
prototype in translate.h.
3
5
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210628135835.6690-4-peter.maydell@linaro.org
6
---
9
---
7
target/arm/neon-dp.decode | 3 +++
10
target/arm/translate.h | 16 ++++++++++
8
target/arm/translate-neon.inc.c | 29 +++++++++++++++++++++++
11
target/arm/translate-neon.c | 63 -------------------------------------
9
target/arm/translate.c | 42 ++-------------------------------
12
target/arm/translate.c | 57 +++++++++++++++++++++++++++++++++
10
3 files changed, 34 insertions(+), 40 deletions(-)
13
3 files changed, 73 insertions(+), 63 deletions(-)
11
14
12
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
diff --git a/target/arm/translate.h b/target/arm/translate.h
13
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/neon-dp.decode
17
--- a/target/arm/translate.h
15
+++ b/target/arm/neon-dp.decode
18
+++ b/target/arm/translate.h
16
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
19
@@ -XXX,XX +XXX,XX @@ static inline MemOp finalize_memop(DisasContext *s, MemOp opc)
17
20
return opc | s->be_data;
18
VMUL_2sc 1111 001 . 1 . .. .... .... 1000 . 1 . 0 .... @2scalar
21
}
19
VMUL_F_2sc 1111 001 . 1 . .. .... .... 1001 . 1 . 0 .... @2scalar
22
23
+/**
24
+ * asimd_imm_const: Expand an encoded SIMD constant value
25
+ *
26
+ * Expand a SIMD constant value. This is essentially the pseudocode
27
+ * AdvSIMDExpandImm, except that we also perform the boolean NOT needed for
28
+ * VMVN and VBIC (when cmode < 14 && op == 1).
29
+ *
30
+ * The combination cmode == 15 op == 1 is a reserved encoding for AArch32;
31
+ * callers must catch this.
32
+ *
33
+ * cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 was UNPREDICTABLE in v7A but
34
+ * is either not unpredictable or merely CONSTRAINED UNPREDICTABLE in v8A;
35
+ * we produce an immediate constant value of 0 in these cases.
36
+ */
37
+uint64_t asimd_imm_const(uint32_t imm, int cmode, int op);
20
+
38
+
21
+ VQDMULH_2sc 1111 001 . 1 . .. .... .... 1100 . 1 . 0 .... @2scalar
39
#endif /* TARGET_ARM_TRANSLATE_H */
22
+ VQRDMULH_2sc 1111 001 . 1 . .. .... .... 1101 . 1 . 0 .... @2scalar
40
diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c
23
]
24
}
25
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
26
index XXXXXXX..XXXXXXX 100644
41
index XXXXXXX..XXXXXXX 100644
27
--- a/target/arm/translate-neon.inc.c
42
--- a/target/arm/translate-neon.c
28
+++ b/target/arm/translate-neon.inc.c
43
+++ b/target/arm/translate-neon.c
29
@@ -XXX,XX +XXX,XX @@ static bool trans_VMLS_F_2sc(DisasContext *s, arg_2scalar *a)
44
@@ -XXX,XX +XXX,XX @@ DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
30
45
DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
31
return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
46
DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
32
}
47
33
+
48
-static uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
34
+WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
49
-{
35
+WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
50
- /*
36
+WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
51
- * Expand the encoded constant.
37
+WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
52
- * Note that cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
38
+
53
- * We choose to not special-case this and will behave as if a
39
+static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
54
- * valid constant encoding of 0 had been given.
40
+{
55
- * cmode = 15 op = 1 must UNDEF; we assume decode has handled that.
41
+ static NeonGenTwoOpFn * const opfn[] = {
56
- */
42
+ NULL,
57
- switch (cmode) {
43
+ gen_VQDMULH_16,
58
- case 0: case 1:
44
+ gen_VQDMULH_32,
59
- /* no-op */
45
+ NULL,
60
- break;
46
+ };
61
- case 2: case 3:
47
+
62
- imm <<= 8;
48
+ return do_2scalar(s, a, opfn[a->size], NULL);
63
- break;
49
+}
64
- case 4: case 5:
50
+
65
- imm <<= 16;
51
+static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
66
- break;
52
+{
67
- case 6: case 7:
53
+ static NeonGenTwoOpFn * const opfn[] = {
68
- imm <<= 24;
54
+ NULL,
69
- break;
55
+ gen_VQRDMULH_16,
70
- case 8: case 9:
56
+ gen_VQRDMULH_32,
71
- imm |= imm << 16;
57
+ NULL,
72
- break;
58
+ };
73
- case 10: case 11:
59
+
74
- imm = (imm << 8) | (imm << 24);
60
+ return do_2scalar(s, a, opfn[a->size], NULL);
75
- break;
61
+}
76
- case 12:
77
- imm = (imm << 8) | 0xff;
78
- break;
79
- case 13:
80
- imm = (imm << 16) | 0xffff;
81
- break;
82
- case 14:
83
- if (op) {
84
- /*
85
- * This is the only case where the top and bottom 32 bits
86
- * of the encoded constant differ.
87
- */
88
- uint64_t imm64 = 0;
89
- int n;
90
-
91
- for (n = 0; n < 8; n++) {
92
- if (imm & (1 << n)) {
93
- imm64 |= (0xffULL << (n * 8));
94
- }
95
- }
96
- return imm64;
97
- }
98
- imm |= (imm << 8) | (imm << 16) | (imm << 24);
99
- break;
100
- case 15:
101
- imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
102
- | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
103
- break;
104
- }
105
- if (op) {
106
- imm = ~imm;
107
- }
108
- return dup_const(MO_32, imm);
109
-}
110
-
111
static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
112
GVecGen2iFn *fn)
113
{
62
diff --git a/target/arm/translate.c b/target/arm/translate.c
114
diff --git a/target/arm/translate.c b/target/arm/translate.c
63
index XXXXXXX..XXXXXXX 100644
115
index XXXXXXX..XXXXXXX 100644
64
--- a/target/arm/translate.c
116
--- a/target/arm/translate.c
65
+++ b/target/arm/translate.c
117
+++ b/target/arm/translate.c
66
@@ -XXX,XX +XXX,XX @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
118
@@ -XXX,XX +XXX,XX @@ void arm_translate_init(void)
67
119
a64_translate_init();
68
#define CPU_V001 cpu_V0, cpu_V0, cpu_V1
120
}
69
121
70
-static TCGv_i32 neon_load_scratch(int scratch)
122
+uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
71
-{
123
+{
72
- TCGv_i32 tmp = tcg_temp_new_i32();
124
+ /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
73
- tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
125
+ switch (cmode) {
74
- return tmp;
126
+ case 0: case 1:
75
-}
127
+ /* no-op */
76
-
128
+ break;
77
-static void neon_store_scratch(int scratch, TCGv_i32 var)
129
+ case 2: case 3:
78
-{
130
+ imm <<= 8;
79
- tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
131
+ break;
80
- tcg_temp_free_i32(var);
132
+ case 4: case 5:
81
-}
133
+ imm <<= 16;
82
-
134
+ break;
83
static int gen_neon_unzip(int rd, int rm, int size, int q)
135
+ case 6: case 7:
136
+ imm <<= 24;
137
+ break;
138
+ case 8: case 9:
139
+ imm |= imm << 16;
140
+ break;
141
+ case 10: case 11:
142
+ imm = (imm << 8) | (imm << 24);
143
+ break;
144
+ case 12:
145
+ imm = (imm << 8) | 0xff;
146
+ break;
147
+ case 13:
148
+ imm = (imm << 16) | 0xffff;
149
+ break;
150
+ case 14:
151
+ if (op) {
152
+ /*
153
+ * This is the only case where the top and bottom 32 bits
154
+ * of the encoded constant differ.
155
+ */
156
+ uint64_t imm64 = 0;
157
+ int n;
158
+
159
+ for (n = 0; n < 8; n++) {
160
+ if (imm & (1 << n)) {
161
+ imm64 |= (0xffULL << (n * 8));
162
+ }
163
+ }
164
+ return imm64;
165
+ }
166
+ imm |= (imm << 8) | (imm << 16) | (imm << 24);
167
+ break;
168
+ case 15:
169
+ imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
170
+ | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
171
+ break;
172
+ }
173
+ if (op) {
174
+ imm = ~imm;
175
+ }
176
+ return dup_const(MO_32, imm);
177
+}
178
+
179
/* Generate a label used for skipping this instruction */
180
void arm_gen_condlabel(DisasContext *s)
84
{
181
{
85
TCGv_ptr pd, pm;
86
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
87
case 1: /* Float VMLA scalar */
88
case 5: /* Floating point VMLS scalar */
89
case 9: /* Floating point VMUL scalar */
90
- return 1; /* handled by decodetree */
91
-
92
case 12: /* VQDMULH scalar */
93
case 13: /* VQRDMULH scalar */
94
- if (u && ((rd | rn) & 1)) {
95
- return 1;
96
- }
97
- tmp = neon_get_scalar(size, rm);
98
- neon_store_scratch(0, tmp);
99
- for (pass = 0; pass < (u ? 4 : 2); pass++) {
100
- tmp = neon_load_scratch(0);
101
- tmp2 = neon_load_reg(rn, pass);
102
- if (op == 12) {
103
- if (size == 1) {
104
- gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
105
- } else {
106
- gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
107
- }
108
- } else {
109
- if (size == 1) {
110
- gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
111
- } else {
112
- gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
113
- }
114
- }
115
- tcg_temp_free_i32(tmp2);
116
- neon_store_reg(rd, pass, tmp);
117
- }
118
- break;
119
+ return 1; /* handled by decodetree */
120
+
121
case 3: /* VQDMLAL scalar */
122
case 7: /* VQDMLSL scalar */
123
case 11: /* VQDMULL scalar */
124
--
182
--
125
2.20.1
183
2.20.1
126
184
127
185
diff view generated by jsdifflib
1
Convert the Neon 3-reg-diff insns VABAL and VABDL to decodetree.
1
The A64 AdvSIMD modified-immediate grouping uses almost the same
2
Like almost all the remaining insns in this group, these are
2
constant encoding that A32 Neon does; reuse asimd_imm_const() (to
3
a combination of a two-input operation which returns a double width
3
which we add the AArch64-specific case for cmode 15 op 1) instead of
4
result and then a possible accumulation of that double width
4
reimplementing it all.
5
result into the destination.
6
5
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210628135835.6690-5-peter.maydell@linaro.org
9
---
9
---
10
target/arm/translate.h | 1 +
10
target/arm/translate.h | 3 +-
11
target/arm/neon-dp.decode | 6 ++
11
target/arm/translate-a64.c | 86 ++++----------------------------------
12
target/arm/translate-neon.inc.c | 132 ++++++++++++++++++++++++++++++++
12
target/arm/translate.c | 17 +++++++-
13
target/arm/translate.c | 31 +-------
13
3 files changed, 24 insertions(+), 82 deletions(-)
14
4 files changed, 142 insertions(+), 28 deletions(-)
15
14
16
diff --git a/target/arm/translate.h b/target/arm/translate.h
15
diff --git a/target/arm/translate.h b/target/arm/translate.h
17
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/translate.h
17
--- a/target/arm/translate.h
19
+++ b/target/arm/translate.h
18
+++ b/target/arm/translate.h
20
@@ -XXX,XX +XXX,XX @@ typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
19
@@ -XXX,XX +XXX,XX @@ static inline MemOp finalize_memop(DisasContext *s, MemOp opc)
21
typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
20
* VMVN and VBIC (when cmode < 14 && op == 1).
22
typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
21
*
23
typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
22
* The combination cmode == 15 op == 1 is a reserved encoding for AArch32;
24
+typedef void NeonGenTwoOpWidenFn(TCGv_i64, TCGv_i32, TCGv_i32);
23
- * callers must catch this.
25
typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
24
+ * callers must catch this; we return the 64-bit constant value defined
26
typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
25
+ * for AArch64.
27
typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
26
*
28
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
27
* cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 was UNPREDICTABLE in v7A but
28
* is either not unpredictable or merely CONSTRAINED UNPREDICTABLE in v8A;
29
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
29
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/neon-dp.decode
31
--- a/target/arm/translate-a64.c
31
+++ b/target/arm/neon-dp.decode
32
+++ b/target/arm/translate-a64.c
32
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
33
@@ -XXX,XX +XXX,XX @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
33
VADDHN_3d 1111 001 0 1 . .. .... .... 0100 . 0 . 0 .... @3diff
34
{
34
VRADDHN_3d 1111 001 1 1 . .. .... .... 0100 . 0 . 0 .... @3diff
35
int rd = extract32(insn, 0, 5);
35
36
int cmode = extract32(insn, 12, 4);
36
+ VABAL_S_3d 1111 001 0 1 . .. .... .... 0101 . 0 . 0 .... @3diff
37
- int cmode_3_1 = extract32(cmode, 1, 3);
37
+ VABAL_U_3d 1111 001 1 1 . .. .... .... 0101 . 0 . 0 .... @3diff
38
- int cmode_0 = extract32(cmode, 0, 1);
38
+
39
int o2 = extract32(insn, 11, 1);
39
VSUBHN_3d 1111 001 0 1 . .. .... .... 0110 . 0 . 0 .... @3diff
40
uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
40
VRSUBHN_3d 1111 001 1 1 . .. .... .... 0110 . 0 . 0 .... @3diff
41
bool is_neg = extract32(insn, 29, 1);
41
+
42
@@ -XXX,XX +XXX,XX @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
42
+ VABDL_S_3d 1111 001 0 1 . .. .... .... 0111 . 0 . 0 .... @3diff
43
return;
43
+ VABDL_U_3d 1111 001 1 1 . .. .... .... 0111 . 0 . 0 .... @3diff
44
}
44
]
45
45
}
46
- /* See AdvSIMDExpandImm() in ARM ARM */
46
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
47
- switch (cmode_3_1) {
47
index XXXXXXX..XXXXXXX 100644
48
- case 0: /* Replicate(Zeros(24):imm8, 2) */
48
--- a/target/arm/translate-neon.inc.c
49
- case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
49
+++ b/target/arm/translate-neon.inc.c
50
- case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
50
@@ -XXX,XX +XXX,XX @@ DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
51
- case 3: /* Replicate(imm8:Zeros(24), 2) */
51
DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
52
- {
52
DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
53
- int shift = cmode_3_1 * 8;
53
DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
54
- imm = bitfield_replicate(abcdefgh << shift, 32);
54
+
55
- break;
55
+static bool do_long_3d(DisasContext *s, arg_3diff *a,
56
- }
56
+ NeonGenTwoOpWidenFn *opfn,
57
- case 4: /* Replicate(Zeros(8):imm8, 4) */
57
+ NeonGenTwo64OpFn *accfn)
58
- case 5: /* Replicate(imm8:Zeros(8), 4) */
58
+{
59
- {
59
+ /*
60
- int shift = (cmode_3_1 & 0x1) * 8;
60
+ * 3-regs different lengths, long operations.
61
- imm = bitfield_replicate(abcdefgh << shift, 16);
61
+ * These perform an operation on two inputs that returns a double-width
62
- break;
62
+ * result, and then possibly perform an accumulation operation of
63
- }
63
+ * that result into the double-width destination.
64
- case 6:
64
+ */
65
- if (cmode_0) {
65
+ TCGv_i64 rd0, rd1, tmp;
66
- /* Replicate(Zeros(8):imm8:Ones(16), 2) */
66
+ TCGv_i32 rn, rm;
67
- imm = (abcdefgh << 16) | 0xffff;
67
+
68
- } else {
68
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
69
- /* Replicate(Zeros(16):imm8:Ones(8), 2) */
69
+ return false;
70
- imm = (abcdefgh << 8) | 0xff;
70
+ }
71
- }
71
+
72
- imm = bitfield_replicate(imm, 32);
72
+ /* UNDEF accesses to D16-D31 if they don't exist. */
73
- break;
73
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
74
- case 7:
74
+ ((a->vd | a->vn | a->vm) & 0x10)) {
75
- if (!cmode_0 && !is_neg) {
75
+ return false;
76
- imm = bitfield_replicate(abcdefgh, 8);
76
+ }
77
- } else if (!cmode_0 && is_neg) {
77
+
78
- int i;
78
+ if (!opfn) {
79
- imm = 0;
79
+ /* size == 3 case, which is an entirely different insn group */
80
- for (i = 0; i < 8; i++) {
80
+ return false;
81
- if ((abcdefgh) & (1 << i)) {
81
+ }
82
- imm |= 0xffULL << (i * 8);
82
+
83
- }
83
+ if (a->vd & 1) {
84
- }
84
+ return false;
85
- } else if (cmode_0) {
85
+ }
86
- if (is_neg) {
86
+
87
- imm = (abcdefgh & 0x3f) << 48;
87
+ if (!vfp_access_check(s)) {
88
- if (abcdefgh & 0x80) {
88
+ return true;
89
- imm |= 0x8000000000000000ULL;
89
+ }
90
- }
90
+
91
- if (abcdefgh & 0x40) {
91
+ rd0 = tcg_temp_new_i64();
92
- imm |= 0x3fc0000000000000ULL;
92
+ rd1 = tcg_temp_new_i64();
93
- } else {
93
+
94
- imm |= 0x4000000000000000ULL;
94
+ rn = neon_load_reg(a->vn, 0);
95
- }
95
+ rm = neon_load_reg(a->vm, 0);
96
- } else {
96
+ opfn(rd0, rn, rm);
97
- if (o2) {
97
+ tcg_temp_free_i32(rn);
98
- /* FMOV (vector, immediate) - half-precision */
98
+ tcg_temp_free_i32(rm);
99
- imm = vfp_expand_imm(MO_16, abcdefgh);
99
+
100
- /* now duplicate across the lanes */
100
+ rn = neon_load_reg(a->vn, 1);
101
- imm = bitfield_replicate(imm, 16);
101
+ rm = neon_load_reg(a->vm, 1);
102
- } else {
102
+ opfn(rd1, rn, rm);
103
- imm = (abcdefgh & 0x3f) << 19;
103
+ tcg_temp_free_i32(rn);
104
- if (abcdefgh & 0x80) {
104
+ tcg_temp_free_i32(rm);
105
- imm |= 0x80000000;
105
+
106
- }
106
+ /* Don't store results until after all loads: they might overlap */
107
- if (abcdefgh & 0x40) {
107
+ if (accfn) {
108
- imm |= 0x3e000000;
108
+ tmp = tcg_temp_new_i64();
109
- } else {
109
+ neon_load_reg64(tmp, a->vd);
110
- imm |= 0x40000000;
110
+ accfn(tmp, tmp, rd0);
111
- }
111
+ neon_store_reg64(tmp, a->vd);
112
- imm |= (imm << 32);
112
+ neon_load_reg64(tmp, a->vd + 1);
113
- }
113
+ accfn(tmp, tmp, rd1);
114
- }
114
+ neon_store_reg64(tmp, a->vd + 1);
115
- }
115
+ tcg_temp_free_i64(tmp);
116
- break;
117
- default:
118
- g_assert_not_reached();
119
- }
120
-
121
- if (cmode_3_1 != 7 && is_neg) {
122
- imm = ~imm;
123
+ if (cmode == 15 && o2 && !is_neg) {
124
+ /* FMOV (vector, immediate) - half-precision */
125
+ imm = vfp_expand_imm(MO_16, abcdefgh);
126
+ /* now duplicate across the lanes */
127
+ imm = bitfield_replicate(imm, 16);
116
+ } else {
128
+ } else {
117
+ neon_store_reg64(rd0, a->vd);
129
+ imm = asimd_imm_const(abcdefgh, cmode, is_neg);
118
+ neon_store_reg64(rd1, a->vd + 1);
130
}
119
+ }
131
120
+
132
if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
121
+ tcg_temp_free_i64(rd0);
122
+ tcg_temp_free_i64(rd1);
123
+
124
+ return true;
125
+}
126
+
127
+static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
128
+{
129
+ static NeonGenTwoOpWidenFn * const opfn[] = {
130
+ gen_helper_neon_abdl_s16,
131
+ gen_helper_neon_abdl_s32,
132
+ gen_helper_neon_abdl_s64,
133
+ NULL,
134
+ };
135
+
136
+ return do_long_3d(s, a, opfn[a->size], NULL);
137
+}
138
+
139
+static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
140
+{
141
+ static NeonGenTwoOpWidenFn * const opfn[] = {
142
+ gen_helper_neon_abdl_u16,
143
+ gen_helper_neon_abdl_u32,
144
+ gen_helper_neon_abdl_u64,
145
+ NULL,
146
+ };
147
+
148
+ return do_long_3d(s, a, opfn[a->size], NULL);
149
+}
150
+
151
+static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
152
+{
153
+ static NeonGenTwoOpWidenFn * const opfn[] = {
154
+ gen_helper_neon_abdl_s16,
155
+ gen_helper_neon_abdl_s32,
156
+ gen_helper_neon_abdl_s64,
157
+ NULL,
158
+ };
159
+ static NeonGenTwo64OpFn * const addfn[] = {
160
+ gen_helper_neon_addl_u16,
161
+ gen_helper_neon_addl_u32,
162
+ tcg_gen_add_i64,
163
+ NULL,
164
+ };
165
+
166
+ return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
167
+}
168
+
169
+static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
170
+{
171
+ static NeonGenTwoOpWidenFn * const opfn[] = {
172
+ gen_helper_neon_abdl_u16,
173
+ gen_helper_neon_abdl_u32,
174
+ gen_helper_neon_abdl_u64,
175
+ NULL,
176
+ };
177
+ static NeonGenTwo64OpFn * const addfn[] = {
178
+ gen_helper_neon_addl_u16,
179
+ gen_helper_neon_addl_u32,
180
+ tcg_gen_add_i64,
181
+ NULL,
182
+ };
183
+
184
+ return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
185
+}
186
diff --git a/target/arm/translate.c b/target/arm/translate.c
133
diff --git a/target/arm/translate.c b/target/arm/translate.c
187
index XXXXXXX..XXXXXXX 100644
134
index XXXXXXX..XXXXXXX 100644
188
--- a/target/arm/translate.c
135
--- a/target/arm/translate.c
189
+++ b/target/arm/translate.c
136
+++ b/target/arm/translate.c
190
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
137
@@ -XXX,XX +XXX,XX @@ uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
191
{0, 0, 0, 7}, /* VSUBL: handled by decodetree */
138
case 14:
192
{0, 0, 0, 7}, /* VSUBW: handled by decodetree */
139
if (op) {
193
{0, 0, 0, 7}, /* VADDHN: handled by decodetree */
140
/*
194
- {0, 0, 0, 0}, /* VABAL */
141
- * This is the only case where the top and bottom 32 bits
195
+ {0, 0, 0, 7}, /* VABAL */
142
- * of the encoded constant differ.
196
{0, 0, 0, 7}, /* VSUBHN: handled by decodetree */
143
+ * This and cmode == 15 op == 1 are the only cases where
197
- {0, 0, 0, 0}, /* VABDL */
144
+ * the top and bottom 32 bits of the encoded constant differ.
198
+ {0, 0, 0, 7}, /* VABDL */
145
*/
199
{0, 0, 0, 0}, /* VMLAL */
146
uint64_t imm64 = 0;
200
{0, 0, 0, 9}, /* VQDMLAL */
147
int n;
201
{0, 0, 0, 0}, /* VMLSL */
148
@@ -XXX,XX +XXX,XX @@ uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
202
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
149
imm |= (imm << 8) | (imm << 16) | (imm << 24);
203
tmp2 = neon_load_reg(rm, pass);
150
break;
204
}
151
case 15:
205
switch (op) {
152
+ if (op) {
206
- case 5: case 7: /* VABAL, VABDL */
153
+ /* Reserved encoding for AArch32; valid for AArch64 */
207
- switch ((size << 1) | u) {
154
+ uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
208
- case 0:
155
+ if (imm & 0x80) {
209
- gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
156
+ imm64 |= 0x8000000000000000ULL;
210
- break;
157
+ }
211
- case 1:
158
+ if (imm & 0x40) {
212
- gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
159
+ imm64 |= 0x3fc0000000000000ULL;
213
- break;
160
+ } else {
214
- case 2:
161
+ imm64 |= 0x4000000000000000ULL;
215
- gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
162
+ }
216
- break;
163
+ return imm64;
217
- case 3:
164
+ }
218
- gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
165
imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
219
- break;
166
| ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
220
- case 4:
167
break;
221
- gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
222
- break;
223
- case 5:
224
- gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
225
- break;
226
- default: abort();
227
- }
228
- tcg_temp_free_i32(tmp2);
229
- tcg_temp_free_i32(tmp);
230
- break;
231
case 8: case 9: case 10: case 11: case 12: case 13:
232
/* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
233
gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
234
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
235
case 10: /* VMLSL */
236
gen_neon_negl(cpu_V0, size);
237
/* Fall through */
238
- case 5: case 8: /* VABAL, VMLAL */
239
+ case 8: /* VABAL, VMLAL */
240
gen_neon_addl(size);
241
break;
242
case 9: case 11: /* VQDMLAL, VQDMLSL */
243
--
168
--
244
2.20.1
169
2.20.1
245
170
246
171
diff view generated by jsdifflib
1
Mark the arrays of function pointers in trans_VSHLL_S_2sh() and
1
Use dup_const() instead of bitfield_replicate() in
2
trans_VSHLL_U_2sh() as both 'static' and 'const'.
2
disas_simd_mod_imm().
3
4
(We can't replace the other use of bitfield_replicate() in this file,
5
in logic_imm_decode_wmask(), because that location needs to handle 2
6
and 4 bit elements, which dup_const() cannot.)
3
7
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-id: 20210628135835.6690-6-peter.maydell@linaro.org
6
---
11
---
7
target/arm/translate-neon.inc.c | 4 ++--
12
target/arm/translate-a64.c | 2 +-
8
1 file changed, 2 insertions(+), 2 deletions(-)
13
1 file changed, 1 insertion(+), 1 deletion(-)
9
14
10
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
15
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
11
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
12
--- a/target/arm/translate-neon.inc.c
17
--- a/target/arm/translate-a64.c
13
+++ b/target/arm/translate-neon.inc.c
18
+++ b/target/arm/translate-a64.c
14
@@ -XXX,XX +XXX,XX @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
19
@@ -XXX,XX +XXX,XX @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
15
20
/* FMOV (vector, immediate) - half-precision */
16
static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
21
imm = vfp_expand_imm(MO_16, abcdefgh);
17
{
22
/* now duplicate across the lanes */
18
- NeonGenWidenFn *widenfn[] = {
23
- imm = bitfield_replicate(imm, 16);
19
+ static NeonGenWidenFn * const widenfn[] = {
24
+ imm = dup_const(MO_16, imm);
20
gen_helper_neon_widen_s8,
25
} else {
21
gen_helper_neon_widen_s16,
26
imm = asimd_imm_const(abcdefgh, cmode, is_neg);
22
tcg_gen_ext_i32_i64,
27
}
23
@@ -XXX,XX +XXX,XX @@ static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
24
25
static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
26
{
27
- NeonGenWidenFn *widenfn[] = {
28
+ static NeonGenWidenFn * const widenfn[] = {
29
gen_helper_neon_widen_u8,
30
gen_helper_neon_widen_u16,
31
tcg_gen_extu_i32_i64,
32
--
28
--
33
2.20.1
29
2.20.1
34
30
35
31
diff view generated by jsdifflib
1
Convert the Neon VDUP (scalar) insn to decodetree. (Note that we
1
Implement the MVE logical-immediate insns (VMOV, VMVN,
2
can't call this just "VDUP" as we used that already in vfp.decode for
2
VORR and VBIC). These have essentially the same encoding
3
the "VDUP (general purpose register" insn.)
3
as their Neon equivalents, and we implement the decode
4
in the same way.
4
5
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210628135835.6690-7-peter.maydell@linaro.org
7
---
9
---
8
target/arm/neon-dp.decode | 7 +++++++
10
target/arm/helper-mve.h | 4 +++
9
target/arm/translate-neon.inc.c | 26 ++++++++++++++++++++++++++
11
target/arm/mve.decode | 17 +++++++++++++
10
target/arm/translate.c | 25 +------------------------
12
target/arm/mve_helper.c | 24 ++++++++++++++++++
11
3 files changed, 34 insertions(+), 24 deletions(-)
13
target/arm/translate-mve.c | 50 ++++++++++++++++++++++++++++++++++++++
14
4 files changed, 95 insertions(+)
12
15
13
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
16
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
14
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/neon-dp.decode
18
--- a/target/arm/helper-mve.h
16
+++ b/target/arm/neon-dp.decode
19
+++ b/target/arm/helper-mve.h
17
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
20
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(mve_vaddvsh, TCG_CALL_NO_WG, i32, env, ptr, i32)
18
21
DEF_HELPER_FLAGS_3(mve_vaddvuh, TCG_CALL_NO_WG, i32, env, ptr, i32)
19
VTBL 1111 001 1 1 . 11 .... .... 10 len:2 . op:1 . 0 .... \
22
DEF_HELPER_FLAGS_3(mve_vaddvsw, TCG_CALL_NO_WG, i32, env, ptr, i32)
20
vm=%vm_dp vn=%vn_dp vd=%vd_dp
23
DEF_HELPER_FLAGS_3(mve_vaddvuw, TCG_CALL_NO_WG, i32, env, ptr, i32)
21
+
24
+
22
+ VDUP_scalar 1111 001 1 1 . 11 index:3 1 .... 11 000 q:1 . 0 .... \
25
+DEF_HELPER_FLAGS_3(mve_vmovi, TCG_CALL_NO_WG, void, env, ptr, i64)
23
+ vm=%vm_dp vd=%vd_dp size=0
26
+DEF_HELPER_FLAGS_3(mve_vandi, TCG_CALL_NO_WG, void, env, ptr, i64)
24
+ VDUP_scalar 1111 001 1 1 . 11 index:2 10 .... 11 000 q:1 . 0 .... \
27
+DEF_HELPER_FLAGS_3(mve_vorri, TCG_CALL_NO_WG, void, env, ptr, i64)
25
+ vm=%vm_dp vd=%vd_dp size=1
28
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
26
+ VDUP_scalar 1111 001 1 1 . 11 index:1 100 .... 11 000 q:1 . 0 .... \
27
+ vm=%vm_dp vd=%vd_dp size=2
28
]
29
30
# Subgroup for size != 0b11
31
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
32
index XXXXXXX..XXXXXXX 100644
29
index XXXXXXX..XXXXXXX 100644
33
--- a/target/arm/translate-neon.inc.c
30
--- a/target/arm/mve.decode
34
+++ b/target/arm/translate-neon.inc.c
31
+++ b/target/arm/mve.decode
35
@@ -XXX,XX +XXX,XX @@ static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
32
@@ -XXX,XX +XXX,XX @@
36
tcg_temp_free_i32(tmp);
33
# VQDMULL has size in bit 28: 0 for 16 bit, 1 for 32 bit
34
%size_28 28:1 !function=plus_1
35
36
+# 1imm format immediate
37
+%imm_28_16_0 28:1 16:3 0:4
38
+
39
&vldr_vstr rn qd imm p a w size l u
40
&1op qd qm size
41
&2op qd qm qn size
42
&2scalar qd qn rm size
43
+&1imm qd imm cmode op
44
45
@vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd u=0
46
# Note that both Rn and Qd are 3 bits only (no D bit)
47
@@ -XXX,XX +XXX,XX @@
48
@2op_nosz .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn size=0
49
@2op_sz28 .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn \
50
size=%size_28
51
+@1imm .... .... .... .... .... cmode:4 .. op:1 . .... &1imm qd=%qd imm=%imm_28_16_0
52
53
# The _rev suffix indicates that Vn and Vm are reversed. This is
54
# the case for shifts. In the Arm ARM these insns are documented
55
@@ -XXX,XX +XXX,XX @@ VADDV 111 u:1 1110 1111 size:2 01 ... 0 1111 0 0 a:1 0 qm:3 0 rda=%rd
56
# Predicate operations
57
%mask_22_13 22:1 13:3
58
VPST 1111 1110 0 . 11 000 1 ... 0 1111 0100 1101 mask=%mask_22_13
59
+
60
+# Logical immediate operations (1 reg and modified-immediate)
61
+
62
+# The cmode/op bits here decode VORR/VBIC/VMOV/VMVN, but
63
+# not in a way we can conveniently represent in decodetree without
64
+# a lot of repetition:
65
+# VORR: op=0, (cmode & 1) && cmode < 12
66
+# VBIC: op=1, (cmode & 1) && cmode < 12
67
+# VMOV: everything else
68
+# So we have a single decode line and check the cmode/op in the
69
+# trans function.
70
+Vimm_1r 111 . 1111 1 . 00 0 ... ... 0 .... 0 1 . 1 .... @1imm
71
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/target/arm/mve_helper.c
74
+++ b/target/arm/mve_helper.c
75
@@ -XXX,XX +XXX,XX @@ DO_1OP(vnegw, 4, int32_t, DO_NEG)
76
DO_1OP(vfnegh, 8, uint64_t, DO_FNEGH)
77
DO_1OP(vfnegs, 8, uint64_t, DO_FNEGS)
78
79
+/*
80
+ * 1 operand immediates: Vda is destination and possibly also one source.
81
+ * All these insns work at 64-bit widths.
82
+ */
83
+#define DO_1OP_IMM(OP, FN) \
84
+ void HELPER(mve_##OP)(CPUARMState *env, void *vda, uint64_t imm) \
85
+ { \
86
+ uint64_t *da = vda; \
87
+ uint16_t mask = mve_element_mask(env); \
88
+ unsigned e; \
89
+ for (e = 0; e < 16 / 8; e++, mask >>= 8) { \
90
+ mergemask(&da[H8(e)], FN(da[H8(e)], imm), mask); \
91
+ } \
92
+ mve_advance_vpt(env); \
93
+ }
94
+
95
+#define DO_MOVI(N, I) (I)
96
+#define DO_ANDI(N, I) ((N) & (I))
97
+#define DO_ORRI(N, I) ((N) | (I))
98
+
99
+DO_1OP_IMM(vmovi, DO_MOVI)
100
+DO_1OP_IMM(vandi, DO_ANDI)
101
+DO_1OP_IMM(vorri, DO_ORRI)
102
+
103
#define DO_2OP(OP, ESIZE, TYPE, FN) \
104
void HELPER(glue(mve_, OP))(CPUARMState *env, \
105
void *vd, void *vn, void *vm) \
106
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
107
index XXXXXXX..XXXXXXX 100644
108
--- a/target/arm/translate-mve.c
109
+++ b/target/arm/translate-mve.c
110
@@ -XXX,XX +XXX,XX @@ typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
111
typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
112
typedef void MVEGenDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
113
typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
114
+typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
115
116
/* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
117
static inline long mve_qreg_offset(unsigned reg)
118
@@ -XXX,XX +XXX,XX @@ static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
119
mve_update_eci(s);
37
return true;
120
return true;
38
}
121
}
39
+
122
+
40
+static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
123
+static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn)
41
+{
124
+{
42
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
125
+ TCGv_ptr qd;
126
+ uint64_t imm;
127
+
128
+ if (!dc_isar_feature(aa32_mve, s) ||
129
+ !mve_check_qreg_bank(s, a->qd) ||
130
+ !fn) {
43
+ return false;
131
+ return false;
44
+ }
132
+ }
45
+
133
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
46
+ /* UNDEF accesses to D16-D31 if they don't exist. */
47
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
48
+ ((a->vd | a->vm) & 0x10)) {
49
+ return false;
50
+ }
51
+
52
+ if (a->vd & a->q) {
53
+ return false;
54
+ }
55
+
56
+ if (!vfp_access_check(s)) {
57
+ return true;
134
+ return true;
58
+ }
135
+ }
59
+
136
+
60
+ tcg_gen_gvec_dup_mem(a->size, neon_reg_offset(a->vd, 0),
137
+ imm = asimd_imm_const(a->imm, a->cmode, a->op);
61
+ neon_element_offset(a->vm, a->index, a->size),
138
+
62
+ a->q ? 16 : 8, a->q ? 16 : 8);
139
+ qd = mve_qreg_ptr(a->qd);
140
+ fn(cpu_env, qd, tcg_constant_i64(imm));
141
+ tcg_temp_free_ptr(qd);
142
+ mve_update_eci(s);
63
+ return true;
143
+ return true;
64
+}
144
+}
65
diff --git a/target/arm/translate.c b/target/arm/translate.c
145
+
66
index XXXXXXX..XXXXXXX 100644
146
+static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
67
--- a/target/arm/translate.c
147
+{
68
+++ b/target/arm/translate.c
148
+ /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
69
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
149
+ MVEGenOneOpImmFn *fn;
70
}
150
+
71
break;
151
+ if ((a->cmode & 1) && a->cmode < 12) {
72
}
152
+ if (a->op) {
73
- } else if ((insn & (1 << 10)) == 0) {
153
+ /*
74
- /* VTBL, VTBX: handled by decodetree */
154
+ * For op=1, the immediate will be inverted by asimd_imm_const(),
75
- return 1;
155
+ * so the VBIC becomes a logical AND operation.
76
- } else if ((insn & 0x380) == 0) {
156
+ */
77
- /* VDUP */
157
+ fn = gen_helper_mve_vandi;
78
- int element;
158
+ } else {
79
- MemOp size;
159
+ fn = gen_helper_mve_vorri;
80
-
160
+ }
81
- if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
161
+ } else {
82
- return 1;
162
+ /* There is one unallocated cmode/op combination in this space */
83
- }
163
+ if (a->cmode == 15 && a->op == 1) {
84
- if (insn & (1 << 16)) {
164
+ return false;
85
- size = MO_8;
165
+ }
86
- element = (insn >> 17) & 7;
166
+ /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
87
- } else if (insn & (1 << 17)) {
167
+ fn = gen_helper_mve_vmovi;
88
- size = MO_16;
168
+ }
89
- element = (insn >> 18) & 3;
169
+ return do_1imm(s, a, fn);
90
- } else {
170
+}
91
- size = MO_32;
92
- element = (insn >> 19) & 1;
93
- }
94
- tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
95
- neon_element_offset(rm, element, size),
96
- q ? 16 : 8, q ? 16 : 8);
97
} else {
98
+ /* VTBL, VTBX, VDUP: handled by decodetree */
99
return 1;
100
}
101
}
102
--
171
--
103
2.20.1
172
2.20.1
104
173
105
174
diff view generated by jsdifflib
1
Convert the "pre-widening" insns VADDL, VSUBL, VADDW and VSUBW
1
Implement the MVE shift-vector-left-by-immediate insns VSHL, VQSHL
2
in the Neon 3-registers-different-lengths group to decodetree.
2
and VQSHLU.
3
These insns work by widening one or both inputs to double their
3
4
size, performing an add or subtract at the doubled size and
4
The size-and-immediate encoding here is the same as Neon, and we
5
then storing the double-size result.
5
handle it the same way neon-dp.decode does.
6
7
As usual, rather than copying the loop of the original decoder
8
(which needs awkward code to avoid problems when source and
9
destination registers overlap) we just unroll the two passes.
10
6
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20210628135835.6690-8-peter.maydell@linaro.org
13
---
10
---
14
target/arm/neon-dp.decode | 43 +++++++++++++
11
target/arm/helper-mve.h | 16 +++++++++++
15
target/arm/translate-neon.inc.c | 104 ++++++++++++++++++++++++++++++++
12
target/arm/mve.decode | 23 +++++++++++++++
16
target/arm/translate.c | 16 ++---
13
target/arm/mve_helper.c | 57 ++++++++++++++++++++++++++++++++++++++
17
3 files changed, 151 insertions(+), 12 deletions(-)
14
target/arm/translate-mve.c | 51 ++++++++++++++++++++++++++++++++++
18
15
4 files changed, 147 insertions(+)
19
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
16
20
index XXXXXXX..XXXXXXX 100644
17
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
21
--- a/target/arm/neon-dp.decode
18
index XXXXXXX..XXXXXXX 100644
22
+++ b/target/arm/neon-dp.decode
19
--- a/target/arm/helper-mve.h
23
@@ -XXX,XX +XXX,XX @@ VCVT_FU_2sh 1111 001 1 1 . ...... .... 1111 0 . . 1 .... @2reg_vcvt
20
+++ b/target/arm/helper-mve.h
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(mve_vaddvuw, TCG_CALL_NO_WG, i32, env, ptr, i32)
22
DEF_HELPER_FLAGS_3(mve_vmovi, TCG_CALL_NO_WG, void, env, ptr, i64)
23
DEF_HELPER_FLAGS_3(mve_vandi, TCG_CALL_NO_WG, void, env, ptr, i64)
24
DEF_HELPER_FLAGS_3(mve_vorri, TCG_CALL_NO_WG, void, env, ptr, i64)
25
+
26
+DEF_HELPER_FLAGS_4(mve_vshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
27
+DEF_HELPER_FLAGS_4(mve_vshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
28
+DEF_HELPER_FLAGS_4(mve_vshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
29
+
30
+DEF_HELPER_FLAGS_4(mve_vqshli_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
31
+DEF_HELPER_FLAGS_4(mve_vqshli_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
32
+DEF_HELPER_FLAGS_4(mve_vqshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
33
+
34
+DEF_HELPER_FLAGS_4(mve_vqshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
35
+DEF_HELPER_FLAGS_4(mve_vqshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_4(mve_vqshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
37
+
38
+DEF_HELPER_FLAGS_4(mve_vqshlui_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
39
+DEF_HELPER_FLAGS_4(mve_vqshlui_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_4(mve_vqshlui_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
41
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
42
index XXXXXXX..XXXXXXX 100644
43
--- a/target/arm/mve.decode
44
+++ b/target/arm/mve.decode
45
@@ -XXX,XX +XXX,XX @@
46
&2op qd qm qn size
47
&2scalar qd qn rm size
48
&1imm qd imm cmode op
49
+&2shift qd qm shift size
50
51
@vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd u=0
52
# Note that both Rn and Qd are 3 bits only (no D bit)
53
@@ -XXX,XX +XXX,XX @@
54
@2scalar .... .... .. size:2 .... .... .... .... rm:4 &2scalar qd=%qd qn=%qn
55
@2scalar_nosz .... .... .... .... .... .... .... rm:4 &2scalar qd=%qd qn=%qn
56
57
+@2_shl_b .... .... .. 001 shift:3 .... .... .... .... &2shift qd=%qd qm=%qm size=0
58
+@2_shl_h .... .... .. 01 shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1
59
+@2_shl_w .... .... .. 1 shift:5 .... .... .... .... &2shift qd=%qd qm=%qm size=2
60
+
61
# Vector loads and stores
62
63
# Widening loads and narrowing stores:
64
@@ -XXX,XX +XXX,XX @@ VPST 1111 1110 0 . 11 000 1 ... 0 1111 0100 1101 mask=%mask_22_13
24
# So we have a single decode line and check the cmode/op in the
65
# So we have a single decode line and check the cmode/op in the
25
# trans function.
66
# trans function.
26
Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
67
Vimm_1r 111 . 1111 1 . 00 0 ... ... 0 .... 0 1 . 1 .... @1imm
27
+
68
+
28
+######################################################################
69
+# Shifts by immediate
29
+# Within the "two registers, or three registers of different lengths"
70
+
30
+# grouping ([23,4]=0b10), bits [21:20] are either part of the opcode
71
+VSHLI 111 0 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_b
31
+# decode: 0b11 for VEXT, two-reg-misc, VTBL, and duplicate-scalar;
72
+VSHLI 111 0 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_h
32
+# or they are a size field for the three-reg-different-lengths and
73
+VSHLI 111 0 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_w
33
+# two-reg-and-scalar insn groups (where size cannot be 0b11). This
74
+
34
+# is slightly awkward for decodetree: we handle it with this
75
+VQSHLI_S 111 0 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_b
35
+# non-exclusive group which contains within it two exclusive groups:
76
+VQSHLI_S 111 0 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_h
36
+# one for the size=0b11 patterns, and one for the size-not-0b11
77
+VQSHLI_S 111 0 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_w
37
+# patterns. This allows us to check that none of the insns within
78
+
38
+# each subgroup accidentally overlap each other. Note that all the
79
+VQSHLI_U 111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_b
39
+# trans functions for the size-not-0b11 patterns must check and
80
+VQSHLI_U 111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_h
40
+# return false for size==3.
81
+VQSHLI_U 111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_w
41
+######################################################################
82
+
83
+VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_b
84
+VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_h
85
+VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_w
86
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
87
index XXXXXXX..XXXXXXX 100644
88
--- a/target/arm/mve_helper.c
89
+++ b/target/arm/mve_helper.c
90
@@ -XXX,XX +XXX,XX @@ DO_2OP_SAT(vqsubsw, 4, int32_t, DO_SQSUB_W)
91
WRAP_QRSHL_HELPER(do_sqrshl_bhs, N, M, true, satp)
92
#define DO_UQRSHL_OP(N, M, satp) \
93
WRAP_QRSHL_HELPER(do_uqrshl_bhs, N, M, true, satp)
94
+#define DO_SUQSHL_OP(N, M, satp) \
95
+ WRAP_QRSHL_HELPER(do_suqrshl_bhs, N, M, false, satp)
96
97
DO_2OP_SAT_S(vqshls, DO_SQSHL_OP)
98
DO_2OP_SAT_U(vqshlu, DO_UQSHL_OP)
99
@@ -XXX,XX +XXX,XX @@ DO_VADDV(vaddvsw, 4, uint32_t)
100
DO_VADDV(vaddvub, 1, uint8_t)
101
DO_VADDV(vaddvuh, 2, uint16_t)
102
DO_VADDV(vaddvuw, 4, uint32_t)
103
+
104
+/* Shifts by immediate */
105
+#define DO_2SHIFT(OP, ESIZE, TYPE, FN) \
106
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
107
+ void *vm, uint32_t shift) \
108
+ { \
109
+ TYPE *d = vd, *m = vm; \
110
+ uint16_t mask = mve_element_mask(env); \
111
+ unsigned e; \
112
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
113
+ mergemask(&d[H##ESIZE(e)], \
114
+ FN(m[H##ESIZE(e)], shift), mask); \
115
+ } \
116
+ mve_advance_vpt(env); \
117
+ }
118
+
119
+#define DO_2SHIFT_SAT(OP, ESIZE, TYPE, FN) \
120
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
121
+ void *vm, uint32_t shift) \
122
+ { \
123
+ TYPE *d = vd, *m = vm; \
124
+ uint16_t mask = mve_element_mask(env); \
125
+ unsigned e; \
126
+ bool qc = false; \
127
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
128
+ bool sat = false; \
129
+ mergemask(&d[H##ESIZE(e)], \
130
+ FN(m[H##ESIZE(e)], shift, &sat), mask); \
131
+ qc |= sat & mask & 1; \
132
+ } \
133
+ if (qc) { \
134
+ env->vfp.qc[0] = qc; \
135
+ } \
136
+ mve_advance_vpt(env); \
137
+ }
138
+
139
+/* provide unsigned 2-op shift helpers for all sizes */
140
+#define DO_2SHIFT_U(OP, FN) \
141
+ DO_2SHIFT(OP##b, 1, uint8_t, FN) \
142
+ DO_2SHIFT(OP##h, 2, uint16_t, FN) \
143
+ DO_2SHIFT(OP##w, 4, uint32_t, FN)
144
+
145
+#define DO_2SHIFT_SAT_U(OP, FN) \
146
+ DO_2SHIFT_SAT(OP##b, 1, uint8_t, FN) \
147
+ DO_2SHIFT_SAT(OP##h, 2, uint16_t, FN) \
148
+ DO_2SHIFT_SAT(OP##w, 4, uint32_t, FN)
149
+#define DO_2SHIFT_SAT_S(OP, FN) \
150
+ DO_2SHIFT_SAT(OP##b, 1, int8_t, FN) \
151
+ DO_2SHIFT_SAT(OP##h, 2, int16_t, FN) \
152
+ DO_2SHIFT_SAT(OP##w, 4, int32_t, FN)
153
+
154
+DO_2SHIFT_U(vshli_u, DO_VSHLU)
155
+DO_2SHIFT_SAT_U(vqshli_u, DO_UQSHL_OP)
156
+DO_2SHIFT_SAT_S(vqshli_s, DO_SQSHL_OP)
157
+DO_2SHIFT_SAT_S(vqshlui_s, DO_SUQSHL_OP)
158
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
159
index XXXXXXX..XXXXXXX 100644
160
--- a/target/arm/translate-mve.c
161
+++ b/target/arm/translate-mve.c
162
@@ -XXX,XX +XXX,XX @@ typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
163
typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
164
typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
165
typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
166
+typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
167
typedef void MVEGenDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
168
typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
169
typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
170
@@ -XXX,XX +XXX,XX @@ static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
171
}
172
return do_1imm(s, a, fn);
173
}
174
+
175
+static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
176
+ bool negateshift)
42
+{
177
+{
43
+ # 0b11 subgroup will go here
178
+ TCGv_ptr qd, qm;
44
+
179
+ int shift = a->shift;
45
+ # Subgroup for size != 0b11
180
+
46
+ [
181
+ if (!dc_isar_feature(aa32_mve, s) ||
47
+ ##################################################################
182
+ !mve_check_qreg_bank(s, a->qd | a->qm) ||
48
+ # 3-reg-different-length grouping:
183
+ !fn) {
49
+ # 1111 001 U 1 D sz!=11 Vn:4 Vd:4 opc:4 N 0 M 0 Vm:4
50
+ ##################################################################
51
+
52
+ &3diff vm vn vd size
53
+
54
+ @3diff .... ... . . . size:2 .... .... .... . . . . .... \
55
+ &3diff vm=%vm_dp vn=%vn_dp vd=%vd_dp
56
+
57
+ VADDL_S_3d 1111 001 0 1 . .. .... .... 0000 . 0 . 0 .... @3diff
58
+ VADDL_U_3d 1111 001 1 1 . .. .... .... 0000 . 0 . 0 .... @3diff
59
+
60
+ VADDW_S_3d 1111 001 0 1 . .. .... .... 0001 . 0 . 0 .... @3diff
61
+ VADDW_U_3d 1111 001 1 1 . .. .... .... 0001 . 0 . 0 .... @3diff
62
+
63
+ VSUBL_S_3d 1111 001 0 1 . .. .... .... 0010 . 0 . 0 .... @3diff
64
+ VSUBL_U_3d 1111 001 1 1 . .. .... .... 0010 . 0 . 0 .... @3diff
65
+
66
+ VSUBW_S_3d 1111 001 0 1 . .. .... .... 0011 . 0 . 0 .... @3diff
67
+ VSUBW_U_3d 1111 001 1 1 . .. .... .... 0011 . 0 . 0 .... @3diff
68
+ ]
69
+}
70
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
71
index XXXXXXX..XXXXXXX 100644
72
--- a/target/arm/translate-neon.inc.c
73
+++ b/target/arm/translate-neon.inc.c
74
@@ -XXX,XX +XXX,XX @@ static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
75
}
76
return do_1reg_imm(s, a, fn);
77
}
78
+
79
+static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
80
+ NeonGenWidenFn *widenfn,
81
+ NeonGenTwo64OpFn *opfn,
82
+ bool src1_wide)
83
+{
84
+ /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
85
+ TCGv_i64 rn0_64, rn1_64, rm_64;
86
+ TCGv_i32 rm;
87
+
88
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
89
+ return false;
184
+ return false;
90
+ }
185
+ }
91
+
186
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
92
+ /* UNDEF accesses to D16-D31 if they don't exist. */
93
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
94
+ ((a->vd | a->vn | a->vm) & 0x10)) {
95
+ return false;
96
+ }
97
+
98
+ if (!widenfn || !opfn) {
99
+ /* size == 3 case, which is an entirely different insn group */
100
+ return false;
101
+ }
102
+
103
+ if ((a->vd & 1) || (src1_wide && (a->vn & 1))) {
104
+ return false;
105
+ }
106
+
107
+ if (!vfp_access_check(s)) {
108
+ return true;
187
+ return true;
109
+ }
188
+ }
110
+
189
+
111
+ rn0_64 = tcg_temp_new_i64();
112
+ rn1_64 = tcg_temp_new_i64();
113
+ rm_64 = tcg_temp_new_i64();
114
+
115
+ if (src1_wide) {
116
+ neon_load_reg64(rn0_64, a->vn);
117
+ } else {
118
+ TCGv_i32 tmp = neon_load_reg(a->vn, 0);
119
+ widenfn(rn0_64, tmp);
120
+ tcg_temp_free_i32(tmp);
121
+ }
122
+ rm = neon_load_reg(a->vm, 0);
123
+
124
+ widenfn(rm_64, rm);
125
+ tcg_temp_free_i32(rm);
126
+ opfn(rn0_64, rn0_64, rm_64);
127
+
128
+ /*
190
+ /*
129
+ * Load second pass inputs before storing the first pass result, to
191
+ * When we handle a right shift insn using a left-shift helper
130
+ * avoid incorrect results if a narrow input overlaps with the result.
192
+ * which permits a negative shift count to indicate a right-shift,
193
+ * we must negate the shift count.
131
+ */
194
+ */
132
+ if (src1_wide) {
195
+ if (negateshift) {
133
+ neon_load_reg64(rn1_64, a->vn + 1);
196
+ shift = -shift;
134
+ } else {
197
+ }
135
+ TCGv_i32 tmp = neon_load_reg(a->vn, 1);
198
+
136
+ widenfn(rn1_64, tmp);
199
+ qd = mve_qreg_ptr(a->qd);
137
+ tcg_temp_free_i32(tmp);
200
+ qm = mve_qreg_ptr(a->qm);
138
+ }
201
+ fn(cpu_env, qd, qm, tcg_constant_i32(shift));
139
+ rm = neon_load_reg(a->vm, 1);
202
+ tcg_temp_free_ptr(qd);
140
+
203
+ tcg_temp_free_ptr(qm);
141
+ neon_store_reg64(rn0_64, a->vd);
204
+ mve_update_eci(s);
142
+
143
+ widenfn(rm_64, rm);
144
+ tcg_temp_free_i32(rm);
145
+ opfn(rn1_64, rn1_64, rm_64);
146
+ neon_store_reg64(rn1_64, a->vd + 1);
147
+
148
+ tcg_temp_free_i64(rn0_64);
149
+ tcg_temp_free_i64(rn1_64);
150
+ tcg_temp_free_i64(rm_64);
151
+
152
+ return true;
205
+ return true;
153
+}
206
+}
154
+
207
+
155
+#define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE) \
208
+#define DO_2SHIFT(INSN, FN, NEGATESHIFT) \
156
+ static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
209
+ static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
157
+ { \
210
+ { \
158
+ static NeonGenWidenFn * const widenfn[] = { \
211
+ static MVEGenTwoOpShiftFn * const fns[] = { \
159
+ gen_helper_neon_widen_##S##8, \
212
+ gen_helper_mve_##FN##b, \
160
+ gen_helper_neon_widen_##S##16, \
213
+ gen_helper_mve_##FN##h, \
161
+ tcg_gen_##EXT##_i32_i64, \
214
+ gen_helper_mve_##FN##w, \
162
+ NULL, \
215
+ NULL, \
163
+ }; \
216
+ }; \
164
+ static NeonGenTwo64OpFn * const addfn[] = { \
217
+ return do_2shift(s, a, fns[a->size], NEGATESHIFT); \
165
+ gen_helper_neon_##OP##l_u16, \
218
+ }
166
+ gen_helper_neon_##OP##l_u32, \
219
+
167
+ tcg_gen_##OP##_i64, \
220
+DO_2SHIFT(VSHLI, vshli_u, false)
168
+ NULL, \
221
+DO_2SHIFT(VQSHLI_S, vqshli_s, false)
169
+ }; \
222
+DO_2SHIFT(VQSHLI_U, vqshli_u, false)
170
+ return do_prewiden_3d(s, a, widenfn[a->size], \
223
+DO_2SHIFT(VQSHLUI, vqshlui_s, false)
171
+ addfn[a->size], SRC1WIDE); \
172
+ }
173
+
174
+DO_PREWIDEN(VADDL_S, s, ext, add, false)
175
+DO_PREWIDEN(VADDL_U, u, extu, add, false)
176
+DO_PREWIDEN(VSUBL_S, s, ext, sub, false)
177
+DO_PREWIDEN(VSUBL_U, u, extu, sub, false)
178
+DO_PREWIDEN(VADDW_S, s, ext, add, true)
179
+DO_PREWIDEN(VADDW_U, u, extu, add, true)
180
+DO_PREWIDEN(VSUBW_S, s, ext, sub, true)
181
+DO_PREWIDEN(VSUBW_U, u, extu, sub, true)
182
diff --git a/target/arm/translate.c b/target/arm/translate.c
183
index XXXXXXX..XXXXXXX 100644
184
--- a/target/arm/translate.c
185
+++ b/target/arm/translate.c
186
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
187
/* Three registers of different lengths. */
188
int src1_wide;
189
int src2_wide;
190
- int prewiden;
191
/* undefreq: bit 0 : UNDEF if size == 0
192
* bit 1 : UNDEF if size == 1
193
* bit 2 : UNDEF if size == 2
194
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
195
int undefreq;
196
/* prewiden, src1_wide, src2_wide, undefreq */
197
static const int neon_3reg_wide[16][4] = {
198
- {1, 0, 0, 0}, /* VADDL */
199
- {1, 1, 0, 0}, /* VADDW */
200
- {1, 0, 0, 0}, /* VSUBL */
201
- {1, 1, 0, 0}, /* VSUBW */
202
+ {0, 0, 0, 7}, /* VADDL: handled by decodetree */
203
+ {0, 0, 0, 7}, /* VADDW: handled by decodetree */
204
+ {0, 0, 0, 7}, /* VSUBL: handled by decodetree */
205
+ {0, 0, 0, 7}, /* VSUBW: handled by decodetree */
206
{0, 1, 1, 0}, /* VADDHN */
207
{0, 0, 0, 0}, /* VABAL */
208
{0, 1, 1, 0}, /* VSUBHN */
209
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
210
{0, 0, 0, 7}, /* Reserved: always UNDEF */
211
};
212
213
- prewiden = neon_3reg_wide[op][0];
214
src1_wide = neon_3reg_wide[op][1];
215
src2_wide = neon_3reg_wide[op][2];
216
undefreq = neon_3reg_wide[op][3];
217
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
218
} else {
219
tmp = neon_load_reg(rn, pass);
220
}
221
- if (prewiden) {
222
- gen_neon_widen(cpu_V0, tmp, size, u);
223
- }
224
}
225
if (src2_wide) {
226
neon_load_reg64(cpu_V1, rm + pass);
227
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
228
} else {
229
tmp2 = neon_load_reg(rm, pass);
230
}
231
- if (prewiden) {
232
- gen_neon_widen(cpu_V1, tmp2, size, u);
233
- }
234
}
235
switch (op) {
236
case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
237
--
224
--
238
2.20.1
225
2.20.1
239
226
240
227
diff view generated by jsdifflib
1
Convert the Neon 2-reg-scalar long multiplies to decodetree.
1
Implement the MVE vector shift right by immediate insns VSHRI and
2
These are the last instructions in the group.
2
VRSHRI. As with Neon, we implement these by using helper functions
3
which perform left shifts but allow negative shift counts to indicate
4
right shifts.
3
5
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210628135835.6690-9-peter.maydell@linaro.org
6
---
9
---
7
target/arm/neon-dp.decode | 18 ++++
10
target/arm/helper-mve.h | 12 ++++++++++++
8
target/arm/translate-neon.inc.c | 163 ++++++++++++++++++++++++++++
11
target/arm/translate.h | 20 ++++++++++++++++++++
9
target/arm/translate.c | 182 ++------------------------------
12
target/arm/mve.decode | 28 ++++++++++++++++++++++++++++
10
3 files changed, 187 insertions(+), 176 deletions(-)
13
target/arm/mve_helper.c | 7 +++++++
14
target/arm/translate-mve.c | 5 +++++
15
target/arm/translate-neon.c | 18 ------------------
16
6 files changed, 72 insertions(+), 18 deletions(-)
11
17
12
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
18
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
13
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/neon-dp.decode
20
--- a/target/arm/helper-mve.h
15
+++ b/target/arm/neon-dp.decode
21
+++ b/target/arm/helper-mve.h
16
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
22
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(mve_vmovi, TCG_CALL_NO_WG, void, env, ptr, i64)
17
23
DEF_HELPER_FLAGS_3(mve_vandi, TCG_CALL_NO_WG, void, env, ptr, i64)
18
@2scalar .... ... q:1 . . size:2 .... .... .... . . . . .... \
24
DEF_HELPER_FLAGS_3(mve_vorri, TCG_CALL_NO_WG, void, env, ptr, i64)
19
&2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp
25
20
+ # For the 'long' ops the Q bit is part of insn decode
26
+DEF_HELPER_FLAGS_4(mve_vshli_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
21
+ @2scalar_q0 .... ... . . . size:2 .... .... .... . . . . .... \
27
+DEF_HELPER_FLAGS_4(mve_vshli_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
22
+ &2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0
28
+DEF_HELPER_FLAGS_4(mve_vshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
23
24
VMLA_2sc 1111 001 . 1 . .. .... .... 0000 . 1 . 0 .... @2scalar
25
VMLA_F_2sc 1111 001 . 1 . .. .... .... 0001 . 1 . 0 .... @2scalar
26
27
+ VMLAL_S_2sc 1111 001 0 1 . .. .... .... 0010 . 1 . 0 .... @2scalar_q0
28
+ VMLAL_U_2sc 1111 001 1 1 . .. .... .... 0010 . 1 . 0 .... @2scalar_q0
29
+
29
+
30
+ VQDMLAL_2sc 1111 001 0 1 . .. .... .... 0011 . 1 . 0 .... @2scalar_q0
30
DEF_HELPER_FLAGS_4(mve_vshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
31
DEF_HELPER_FLAGS_4(mve_vshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
32
DEF_HELPER_FLAGS_4(mve_vshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
33
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vqshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
34
DEF_HELPER_FLAGS_4(mve_vqshlui_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
35
DEF_HELPER_FLAGS_4(mve_vqshlui_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
36
DEF_HELPER_FLAGS_4(mve_vqshlui_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
31
+
37
+
32
VMLS_2sc 1111 001 . 1 . .. .... .... 0100 . 1 . 0 .... @2scalar
38
+DEF_HELPER_FLAGS_4(mve_vrshli_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
33
VMLS_F_2sc 1111 001 . 1 . .. .... .... 0101 . 1 . 0 .... @2scalar
39
+DEF_HELPER_FLAGS_4(mve_vrshli_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
34
40
+DEF_HELPER_FLAGS_4(mve_vrshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
35
+ VMLSL_S_2sc 1111 001 0 1 . .. .... .... 0110 . 1 . 0 .... @2scalar_q0
36
+ VMLSL_U_2sc 1111 001 1 1 . .. .... .... 0110 . 1 . 0 .... @2scalar_q0
37
+
41
+
38
+ VQDMLSL_2sc 1111 001 0 1 . .. .... .... 0111 . 1 . 0 .... @2scalar_q0
42
+DEF_HELPER_FLAGS_4(mve_vrshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
39
+
43
+DEF_HELPER_FLAGS_4(mve_vrshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
40
VMUL_2sc 1111 001 . 1 . .. .... .... 1000 . 1 . 0 .... @2scalar
44
+DEF_HELPER_FLAGS_4(mve_vrshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
41
VMUL_F_2sc 1111 001 . 1 . .. .... .... 1001 . 1 . 0 .... @2scalar
45
diff --git a/target/arm/translate.h b/target/arm/translate.h
42
43
+ VMULL_S_2sc 1111 001 0 1 . .. .... .... 1010 . 1 . 0 .... @2scalar_q0
44
+ VMULL_U_2sc 1111 001 1 1 . .. .... .... 1010 . 1 . 0 .... @2scalar_q0
45
+
46
+ VQDMULL_2sc 1111 001 0 1 . .. .... .... 1011 . 1 . 0 .... @2scalar_q0
47
+
48
VQDMULH_2sc 1111 001 . 1 . .. .... .... 1100 . 1 . 0 .... @2scalar
49
VQRDMULH_2sc 1111 001 . 1 . .. .... .... 1101 . 1 . 0 .... @2scalar
50
51
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
52
index XXXXXXX..XXXXXXX 100644
46
index XXXXXXX..XXXXXXX 100644
53
--- a/target/arm/translate-neon.inc.c
47
--- a/target/arm/translate.h
54
+++ b/target/arm/translate-neon.inc.c
48
+++ b/target/arm/translate.h
55
@@ -XXX,XX +XXX,XX @@ static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
49
@@ -XXX,XX +XXX,XX @@ static inline int times_2_plus_1(DisasContext *s, int x)
56
};
50
return x * 2 + 1;
57
return do_vqrdmlah_2sc(s, a, opfn[a->size]);
58
}
51
}
59
+
52
60
+static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
53
+static inline int rsub_64(DisasContext *s, int x)
61
+ NeonGenTwoOpWidenFn *opfn,
62
+ NeonGenTwo64OpFn *accfn)
63
+{
54
+{
64
+ /*
55
+ return 64 - x;
65
+ * Two registers and a scalar, long operations: perform an
66
+ * operation on the input elements and the scalar which produces
67
+ * a double-width result, and then possibly perform an accumulation
68
+ * operation of that result into the destination.
69
+ */
70
+ TCGv_i32 scalar, rn;
71
+ TCGv_i64 rn0_64, rn1_64;
72
+
73
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
74
+ return false;
75
+ }
76
+
77
+ /* UNDEF accesses to D16-D31 if they don't exist. */
78
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
79
+ ((a->vd | a->vn | a->vm) & 0x10)) {
80
+ return false;
81
+ }
82
+
83
+ if (!opfn) {
84
+ /* Bad size (including size == 3, which is a different insn group) */
85
+ return false;
86
+ }
87
+
88
+ if (a->vd & 1) {
89
+ return false;
90
+ }
91
+
92
+ if (!vfp_access_check(s)) {
93
+ return true;
94
+ }
95
+
96
+ scalar = neon_get_scalar(a->size, a->vm);
97
+
98
+ /* Load all inputs before writing any outputs, in case of overlap */
99
+ rn = neon_load_reg(a->vn, 0);
100
+ rn0_64 = tcg_temp_new_i64();
101
+ opfn(rn0_64, rn, scalar);
102
+ tcg_temp_free_i32(rn);
103
+
104
+ rn = neon_load_reg(a->vn, 1);
105
+ rn1_64 = tcg_temp_new_i64();
106
+ opfn(rn1_64, rn, scalar);
107
+ tcg_temp_free_i32(rn);
108
+ tcg_temp_free_i32(scalar);
109
+
110
+ if (accfn) {
111
+ TCGv_i64 t64 = tcg_temp_new_i64();
112
+ neon_load_reg64(t64, a->vd);
113
+ accfn(t64, t64, rn0_64);
114
+ neon_store_reg64(t64, a->vd);
115
+ neon_load_reg64(t64, a->vd + 1);
116
+ accfn(t64, t64, rn1_64);
117
+ neon_store_reg64(t64, a->vd + 1);
118
+ tcg_temp_free_i64(t64);
119
+ } else {
120
+ neon_store_reg64(rn0_64, a->vd);
121
+ neon_store_reg64(rn1_64, a->vd + 1);
122
+ }
123
+ tcg_temp_free_i64(rn0_64);
124
+ tcg_temp_free_i64(rn1_64);
125
+ return true;
126
+}
56
+}
127
+
57
+
128
+static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
58
+static inline int rsub_32(DisasContext *s, int x)
129
+{
59
+{
130
+ static NeonGenTwoOpWidenFn * const opfn[] = {
60
+ return 32 - x;
131
+ NULL,
132
+ gen_helper_neon_mull_s16,
133
+ gen_mull_s32,
134
+ NULL,
135
+ };
136
+
137
+ return do_2scalar_long(s, a, opfn[a->size], NULL);
138
+}
61
+}
139
+
62
+
140
+static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
63
+static inline int rsub_16(DisasContext *s, int x)
141
+{
64
+{
142
+ static NeonGenTwoOpWidenFn * const opfn[] = {
65
+ return 16 - x;
143
+ NULL,
144
+ gen_helper_neon_mull_u16,
145
+ gen_mull_u32,
146
+ NULL,
147
+ };
148
+
149
+ return do_2scalar_long(s, a, opfn[a->size], NULL);
150
+}
66
+}
151
+
67
+
152
+#define DO_VMLAL_2SC(INSN, MULL, ACC) \
68
+static inline int rsub_8(DisasContext *s, int x)
153
+ static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \
154
+ { \
155
+ static NeonGenTwoOpWidenFn * const opfn[] = { \
156
+ NULL, \
157
+ gen_helper_neon_##MULL##16, \
158
+ gen_##MULL##32, \
159
+ NULL, \
160
+ }; \
161
+ static NeonGenTwo64OpFn * const accfn[] = { \
162
+ NULL, \
163
+ gen_helper_neon_##ACC##l_u32, \
164
+ tcg_gen_##ACC##_i64, \
165
+ NULL, \
166
+ }; \
167
+ return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \
168
+ }
169
+
170
+DO_VMLAL_2SC(VMLAL_S, mull_s, add)
171
+DO_VMLAL_2SC(VMLAL_U, mull_u, add)
172
+DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
173
+DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
174
+
175
+static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
176
+{
69
+{
177
+ static NeonGenTwoOpWidenFn * const opfn[] = {
70
+ return 8 - x;
178
+ NULL,
179
+ gen_VQDMULL_16,
180
+ gen_VQDMULL_32,
181
+ NULL,
182
+ };
183
+
184
+ return do_2scalar_long(s, a, opfn[a->size], NULL);
185
+}
71
+}
186
+
72
+
187
+static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
73
static inline int arm_dc_feature(DisasContext *dc, int feature)
188
+{
74
{
189
+ static NeonGenTwoOpWidenFn * const opfn[] = {
75
return (dc->features & (1ULL << feature)) != 0;
190
+ NULL,
76
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
191
+ gen_VQDMULL_16,
77
index XXXXXXX..XXXXXXX 100644
192
+ gen_VQDMULL_32,
78
--- a/target/arm/mve.decode
193
+ NULL,
79
+++ b/target/arm/mve.decode
194
+ };
80
@@ -XXX,XX +XXX,XX @@
195
+ static NeonGenTwo64OpFn * const accfn[] = {
81
@2_shl_h .... .... .. 01 shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1
196
+ NULL,
82
@2_shl_w .... .... .. 1 shift:5 .... .... .... .... &2shift qd=%qd qm=%qm size=2
197
+ gen_VQDMLAL_acc_16,
83
198
+ gen_VQDMLAL_acc_32,
84
+# Right shifts are encoded as N - shift, where N is the element size in bits.
199
+ NULL,
85
+%rshift_i5 16:5 !function=rsub_32
200
+ };
86
+%rshift_i4 16:4 !function=rsub_16
87
+%rshift_i3 16:3 !function=rsub_8
201
+
88
+
202
+ return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
89
+@2_shr_b .... .... .. 001 ... .... .... .... .... &2shift qd=%qd qm=%qm \
203
+}
90
+ size=0 shift=%rshift_i3
91
+@2_shr_h .... .... .. 01 .... .... .... .... .... &2shift qd=%qd qm=%qm \
92
+ size=1 shift=%rshift_i4
93
+@2_shr_w .... .... .. 1 ..... .... .... .... .... &2shift qd=%qd qm=%qm \
94
+ size=2 shift=%rshift_i5
204
+
95
+
205
+static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
96
# Vector loads and stores
206
+{
97
207
+ static NeonGenTwoOpWidenFn * const opfn[] = {
98
# Widening loads and narrowing stores:
208
+ NULL,
99
@@ -XXX,XX +XXX,XX @@ VQSHLI_U 111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_w
209
+ gen_VQDMULL_16,
100
VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_b
210
+ gen_VQDMULL_32,
101
VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_h
211
+ NULL,
102
VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_w
212
+ };
213
+ static NeonGenTwo64OpFn * const accfn[] = {
214
+ NULL,
215
+ gen_VQDMLSL_acc_16,
216
+ gen_VQDMLSL_acc_32,
217
+ NULL,
218
+ };
219
+
103
+
220
+ return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
104
+VSHRI_S 111 0 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_b
221
+}
105
+VSHRI_S 111 0 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_h
222
diff --git a/target/arm/translate.c b/target/arm/translate.c
106
+VSHRI_S 111 0 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_w
107
+
108
+VSHRI_U 111 1 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_b
109
+VSHRI_U 111 1 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_h
110
+VSHRI_U 111 1 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_w
111
+
112
+VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_b
113
+VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_h
114
+VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w
115
+
116
+VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_b
117
+VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_h
118
+VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w
119
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
223
index XXXXXXX..XXXXXXX 100644
120
index XXXXXXX..XXXXXXX 100644
224
--- a/target/arm/translate.c
121
--- a/target/arm/mve_helper.c
225
+++ b/target/arm/translate.c
122
+++ b/target/arm/mve_helper.c
226
@@ -XXX,XX +XXX,XX @@ static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
123
@@ -XXX,XX +XXX,XX @@ DO_VADDV(vaddvuw, 4, uint32_t)
227
tcg_gen_ext16s_i32(dest, var);
124
DO_2SHIFT(OP##b, 1, uint8_t, FN) \
125
DO_2SHIFT(OP##h, 2, uint16_t, FN) \
126
DO_2SHIFT(OP##w, 4, uint32_t, FN)
127
+#define DO_2SHIFT_S(OP, FN) \
128
+ DO_2SHIFT(OP##b, 1, int8_t, FN) \
129
+ DO_2SHIFT(OP##h, 2, int16_t, FN) \
130
+ DO_2SHIFT(OP##w, 4, int32_t, FN)
131
132
#define DO_2SHIFT_SAT_U(OP, FN) \
133
DO_2SHIFT_SAT(OP##b, 1, uint8_t, FN) \
134
@@ -XXX,XX +XXX,XX @@ DO_VADDV(vaddvuw, 4, uint32_t)
135
DO_2SHIFT_SAT(OP##w, 4, int32_t, FN)
136
137
DO_2SHIFT_U(vshli_u, DO_VSHLU)
138
+DO_2SHIFT_S(vshli_s, DO_VSHLS)
139
DO_2SHIFT_SAT_U(vqshli_u, DO_UQSHL_OP)
140
DO_2SHIFT_SAT_S(vqshli_s, DO_SQSHL_OP)
141
DO_2SHIFT_SAT_S(vqshlui_s, DO_SUQSHL_OP)
142
+DO_2SHIFT_U(vrshli_u, DO_VRSHLU)
143
+DO_2SHIFT_S(vrshli_s, DO_VRSHLS)
144
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
145
index XXXXXXX..XXXXXXX 100644
146
--- a/target/arm/translate-mve.c
147
+++ b/target/arm/translate-mve.c
148
@@ -XXX,XX +XXX,XX @@ DO_2SHIFT(VSHLI, vshli_u, false)
149
DO_2SHIFT(VQSHLI_S, vqshli_s, false)
150
DO_2SHIFT(VQSHLI_U, vqshli_u, false)
151
DO_2SHIFT(VQSHLUI, vqshlui_s, false)
152
+/* These right shifts use a left-shift helper with negated shift count */
153
+DO_2SHIFT(VSHRI_S, vshli_s, true)
154
+DO_2SHIFT(VSHRI_U, vshli_u, true)
155
+DO_2SHIFT(VRSHRI_S, vrshli_s, true)
156
+DO_2SHIFT(VRSHRI_U, vrshli_u, true)
157
diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c
158
index XXXXXXX..XXXXXXX 100644
159
--- a/target/arm/translate-neon.c
160
+++ b/target/arm/translate-neon.c
161
@@ -XXX,XX +XXX,XX @@ static inline int plus1(DisasContext *s, int x)
162
return x + 1;
228
}
163
}
229
164
230
-/* 32x32->64 multiply. Marks inputs as dead. */
165
-static inline int rsub_64(DisasContext *s, int x)
231
-static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
232
-{
166
-{
233
- TCGv_i32 lo = tcg_temp_new_i32();
167
- return 64 - x;
234
- TCGv_i32 hi = tcg_temp_new_i32();
235
- TCGv_i64 ret;
236
-
237
- tcg_gen_mulu2_i32(lo, hi, a, b);
238
- tcg_temp_free_i32(a);
239
- tcg_temp_free_i32(b);
240
-
241
- ret = tcg_temp_new_i64();
242
- tcg_gen_concat_i32_i64(ret, lo, hi);
243
- tcg_temp_free_i32(lo);
244
- tcg_temp_free_i32(hi);
245
-
246
- return ret;
247
-}
168
-}
248
-
169
-
249
-static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
170
-static inline int rsub_32(DisasContext *s, int x)
250
-{
171
-{
251
- TCGv_i32 lo = tcg_temp_new_i32();
172
- return 32 - x;
252
- TCGv_i32 hi = tcg_temp_new_i32();
173
-}
253
- TCGv_i64 ret;
174
-static inline int rsub_16(DisasContext *s, int x)
254
-
175
-{
255
- tcg_gen_muls2_i32(lo, hi, a, b);
176
- return 16 - x;
256
- tcg_temp_free_i32(a);
177
-}
257
- tcg_temp_free_i32(b);
178
-static inline int rsub_8(DisasContext *s, int x)
258
-
179
-{
259
- ret = tcg_temp_new_i64();
180
- return 8 - x;
260
- tcg_gen_concat_i32_i64(ret, lo, hi);
261
- tcg_temp_free_i32(lo);
262
- tcg_temp_free_i32(hi);
263
-
264
- return ret;
265
-}
181
-}
266
-
182
-
267
/* Swap low and high halfwords. */
183
static inline int neon_3same_fp_size(DisasContext *s, int x)
268
static void gen_swap_half(TCGv_i32 var)
269
{
184
{
270
@@ -XXX,XX +XXX,XX @@ static inline void gen_neon_addl(int size)
185
/* Convert 0==fp32, 1==fp16 into a MO_* value */
271
}
272
}
273
274
-static inline void gen_neon_negl(TCGv_i64 var, int size)
275
-{
276
- switch (size) {
277
- case 0: gen_helper_neon_negl_u16(var, var); break;
278
- case 1: gen_helper_neon_negl_u32(var, var); break;
279
- case 2:
280
- tcg_gen_neg_i64(var, var);
281
- break;
282
- default: abort();
283
- }
284
-}
285
-
286
-static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
287
-{
288
- switch (size) {
289
- case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
290
- case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
291
- default: abort();
292
- }
293
-}
294
-
295
-static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
296
- int size, int u)
297
-{
298
- TCGv_i64 tmp;
299
-
300
- switch ((size << 1) | u) {
301
- case 0: gen_helper_neon_mull_s8(dest, a, b); break;
302
- case 1: gen_helper_neon_mull_u8(dest, a, b); break;
303
- case 2: gen_helper_neon_mull_s16(dest, a, b); break;
304
- case 3: gen_helper_neon_mull_u16(dest, a, b); break;
305
- case 4:
306
- tmp = gen_muls_i64_i32(a, b);
307
- tcg_gen_mov_i64(dest, tmp);
308
- tcg_temp_free_i64(tmp);
309
- break;
310
- case 5:
311
- tmp = gen_mulu_i64_i32(a, b);
312
- tcg_gen_mov_i64(dest, tmp);
313
- tcg_temp_free_i64(tmp);
314
- break;
315
- default: abort();
316
- }
317
-
318
- /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
319
- Don't forget to clean them now. */
320
- if (size < 2) {
321
- tcg_temp_free_i32(a);
322
- tcg_temp_free_i32(b);
323
- }
324
-}
325
-
326
static void gen_neon_narrow_op(int op, int u, int size,
327
TCGv_i32 dest, TCGv_i64 src)
328
{
329
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
330
int u;
331
int vec_size;
332
uint32_t imm;
333
- TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
334
+ TCGv_i32 tmp, tmp2, tmp3, tmp5;
335
TCGv_ptr ptr1;
336
TCGv_i64 tmp64;
337
338
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
339
return 1;
340
} else { /* (insn & 0x00800010 == 0x00800000) */
341
if (size != 3) {
342
- op = (insn >> 8) & 0xf;
343
- if ((insn & (1 << 6)) == 0) {
344
- /* Three registers of different lengths: handled by decodetree */
345
- return 1;
346
- } else {
347
- /* Two registers and a scalar. NB that for ops of this form
348
- * the ARM ARM labels bit 24 as Q, but it is in our variable
349
- * 'u', not 'q'.
350
- */
351
- if (size == 0) {
352
- return 1;
353
- }
354
- switch (op) {
355
- case 0: /* Integer VMLA scalar */
356
- case 4: /* Integer VMLS scalar */
357
- case 8: /* Integer VMUL scalar */
358
- case 1: /* Float VMLA scalar */
359
- case 5: /* Floating point VMLS scalar */
360
- case 9: /* Floating point VMUL scalar */
361
- case 12: /* VQDMULH scalar */
362
- case 13: /* VQRDMULH scalar */
363
- case 14: /* VQRDMLAH scalar */
364
- case 15: /* VQRDMLSH scalar */
365
- return 1; /* handled by decodetree */
366
-
367
- case 3: /* VQDMLAL scalar */
368
- case 7: /* VQDMLSL scalar */
369
- case 11: /* VQDMULL scalar */
370
- if (u == 1) {
371
- return 1;
372
- }
373
- /* fall through */
374
- case 2: /* VMLAL sclar */
375
- case 6: /* VMLSL scalar */
376
- case 10: /* VMULL scalar */
377
- if (rd & 1) {
378
- return 1;
379
- }
380
- tmp2 = neon_get_scalar(size, rm);
381
- /* We need a copy of tmp2 because gen_neon_mull
382
- * deletes it during pass 0. */
383
- tmp4 = tcg_temp_new_i32();
384
- tcg_gen_mov_i32(tmp4, tmp2);
385
- tmp3 = neon_load_reg(rn, 1);
386
-
387
- for (pass = 0; pass < 2; pass++) {
388
- if (pass == 0) {
389
- tmp = neon_load_reg(rn, 0);
390
- } else {
391
- tmp = tmp3;
392
- tmp2 = tmp4;
393
- }
394
- gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
395
- if (op != 11) {
396
- neon_load_reg64(cpu_V1, rd + pass);
397
- }
398
- switch (op) {
399
- case 6:
400
- gen_neon_negl(cpu_V0, size);
401
- /* Fall through */
402
- case 2:
403
- gen_neon_addl(size);
404
- break;
405
- case 3: case 7:
406
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
407
- if (op == 7) {
408
- gen_neon_negl(cpu_V0, size);
409
- }
410
- gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
411
- break;
412
- case 10:
413
- /* no-op */
414
- break;
415
- case 11:
416
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
417
- break;
418
- default:
419
- abort();
420
- }
421
- neon_store_reg64(cpu_V0, rd + pass);
422
- }
423
- break;
424
- default:
425
- g_assert_not_reached();
426
- }
427
- }
428
+ /*
429
+ * Three registers of different lengths, or two registers and
430
+ * a scalar: handled by decodetree
431
+ */
432
+ return 1;
433
} else { /* size == 3 */
434
if (!u) {
435
/* Extract. */
436
--
186
--
437
2.20.1
187
2.20.1
438
188
439
189
diff view generated by jsdifflib
1
Convert the Neon 3-reg-diff insns VQDMULL, VQDMLAL and VQDMLSL:
1
Implement the MVE VHLL (vector shift left long) insn. This has two
2
these are all saturating doubling long multiplies with a possible
2
encodings: the T1 encoding is the usual shift-by-immediate format,
3
accumulate step.
3
and the T2 encoding is a special case where the shift count is always
4
4
equal to the element size.
5
These are the last insns in the group which use the pass-over-each
6
elements loop, so we can delete that code.
7
5
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210628135835.6690-10-peter.maydell@linaro.org
10
---
9
---
11
target/arm/neon-dp.decode | 6 +++
10
target/arm/helper-mve.h | 9 +++++++
12
target/arm/translate-neon.inc.c | 82 +++++++++++++++++++++++++++++++++
11
target/arm/mve.decode | 53 +++++++++++++++++++++++++++++++++++---
13
target/arm/translate.c | 59 ++----------------------
12
target/arm/mve_helper.c | 32 +++++++++++++++++++++++
14
3 files changed, 92 insertions(+), 55 deletions(-)
13
target/arm/translate-mve.c | 15 +++++++++++
14
4 files changed, 105 insertions(+), 4 deletions(-)
15
15
16
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
16
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
17
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/neon-dp.decode
18
--- a/target/arm/helper-mve.h
19
+++ b/target/arm/neon-dp.decode
19
+++ b/target/arm/helper-mve.h
20
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
20
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vrshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
21
VMLAL_S_3d 1111 001 0 1 . .. .... .... 1000 . 0 . 0 .... @3diff
21
DEF_HELPER_FLAGS_4(mve_vrshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
22
VMLAL_U_3d 1111 001 1 1 . .. .... .... 1000 . 0 . 0 .... @3diff
22
DEF_HELPER_FLAGS_4(mve_vrshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
23
23
DEF_HELPER_FLAGS_4(mve_vrshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
24
+ VQDMLAL_3d 1111 001 0 1 . .. .... .... 1001 . 0 . 0 .... @3diff
25
+
24
+
26
VMLSL_S_3d 1111 001 0 1 . .. .... .... 1010 . 0 . 0 .... @3diff
25
+DEF_HELPER_FLAGS_4(mve_vshllbsb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
27
VMLSL_U_3d 1111 001 1 1 . .. .... .... 1010 . 0 . 0 .... @3diff
26
+DEF_HELPER_FLAGS_4(mve_vshllbsh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
28
27
+DEF_HELPER_FLAGS_4(mve_vshllbub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
29
+ VQDMLSL_3d 1111 001 0 1 . .. .... .... 1011 . 0 . 0 .... @3diff
28
+DEF_HELPER_FLAGS_4(mve_vshllbuh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
29
+DEF_HELPER_FLAGS_4(mve_vshlltsb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
30
+DEF_HELPER_FLAGS_4(mve_vshlltsh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
31
+DEF_HELPER_FLAGS_4(mve_vshlltub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
32
+DEF_HELPER_FLAGS_4(mve_vshlltuh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
33
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
34
index XXXXXXX..XXXXXXX 100644
35
--- a/target/arm/mve.decode
36
+++ b/target/arm/mve.decode
37
@@ -XXX,XX +XXX,XX @@
38
@2_shl_h .... .... .. 01 shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1
39
@2_shl_w .... .... .. 1 shift:5 .... .... .... .... &2shift qd=%qd qm=%qm size=2
40
41
+@2_shll_b .... .... ... 01 shift:3 .... .... .... .... &2shift qd=%qd qm=%qm size=0
42
+@2_shll_h .... .... ... 1 shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1
43
+# VSHLL encoding T2 where shift == esize
44
+@2_shll_esize_b .... .... .... 00 .. .... .... .... .... &2shift \
45
+ qd=%qd qm=%qm size=0 shift=8
46
+@2_shll_esize_h .... .... .... 01 .. .... .... .... .... &2shift \
47
+ qd=%qd qm=%qm size=1 shift=16
30
+
48
+
31
VMULL_S_3d 1111 001 0 1 . .. .... .... 1100 . 0 . 0 .... @3diff
49
# Right shifts are encoded as N - shift, where N is the element size in bits.
32
VMULL_U_3d 1111 001 1 1 . .. .... .... 1100 . 0 . 0 .... @3diff
50
%rshift_i5 16:5 !function=rsub_32
33
+
51
%rshift_i4 16:4 !function=rsub_16
34
+ VQDMULL_3d 1111 001 0 1 . .. .... .... 1101 . 0 . 0 .... @3diff
52
@@ -XXX,XX +XXX,XX @@ VADD 1110 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op
35
]
53
VSUB 1111 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op
36
}
54
VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op
37
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
55
38
index XXXXXXX..XXXXXXX 100644
56
-VMULH_S 111 0 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
39
--- a/target/arm/translate-neon.inc.c
57
-VMULH_U 111 1 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
40
+++ b/target/arm/translate-neon.inc.c
58
+# The VSHLL T2 encoding is not a @2op pattern, but is here because it
41
@@ -XXX,XX +XXX,XX @@ DO_VMLAL(VMLAL_S,mull_s,add)
59
+# overlaps what would be size=0b11 VMULH/VRMULH
42
DO_VMLAL(VMLAL_U,mull_u,add)
43
DO_VMLAL(VMLSL_S,mull_s,sub)
44
DO_VMLAL(VMLSL_U,mull_u,sub)
45
+
46
+static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
47
+{
60
+{
48
+ gen_helper_neon_mull_s16(rd, rn, rm);
61
+ VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b
49
+ gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd);
62
+ VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h
63
64
-VRMULH_S 111 0 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
65
-VRMULH_U 111 1 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
66
+ VMULH_S 111 0 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
50
+}
67
+}
51
+
68
+
52
+static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
53
+{
69
+{
54
+ gen_mull_s32(rd, rn, rm);
70
+ VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b
55
+ gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd);
71
+ VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h
72
+
73
+ VMULH_U 111 1 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
56
+}
74
+}
57
+
75
+
58
+static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
59
+{
76
+{
60
+ static NeonGenTwoOpWidenFn * const opfn[] = {
77
+ VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b
61
+ NULL,
78
+ VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h
62
+ gen_VQDMULL_16,
63
+ gen_VQDMULL_32,
64
+ NULL,
65
+ };
66
+
79
+
67
+ return do_long_3d(s, a, opfn[a->size], NULL);
80
+ VRMULH_S 111 0 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
68
+}
81
+}
69
+
82
+
70
+static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
71
+{
83
+{
72
+ gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
84
+ VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b
85
+ VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h
86
+
87
+ VRMULH_U 111 1 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
73
+}
88
+}
89
90
VMAX_S 111 0 1111 0 . .. ... 0 ... 0 0110 . 1 . 0 ... 0 @2op
91
VMAX_U 111 1 1111 0 . .. ... 0 ... 0 0110 . 1 . 0 ... 0 @2op
92
@@ -XXX,XX +XXX,XX @@ VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w
93
VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_b
94
VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_h
95
VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w
74
+
96
+
75
+static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
97
+# VSHLL T1 encoding; the T2 VSHLL encoding is elsewhere in this file
76
+{
98
+VSHLL_BS 111 0 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_b
77
+ gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
99
+VSHLL_BS 111 0 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_h
78
+}
79
+
100
+
80
+static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
101
+VSHLL_BU 111 1 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_b
81
+{
102
+VSHLL_BU 111 1 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_h
82
+ static NeonGenTwoOpWidenFn * const opfn[] = {
83
+ NULL,
84
+ gen_VQDMULL_16,
85
+ gen_VQDMULL_32,
86
+ NULL,
87
+ };
88
+ static NeonGenTwo64OpFn * const accfn[] = {
89
+ NULL,
90
+ gen_VQDMLAL_acc_16,
91
+ gen_VQDMLAL_acc_32,
92
+ NULL,
93
+ };
94
+
103
+
95
+ return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
104
+VSHLL_TS 111 0 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_b
96
+}
105
+VSHLL_TS 111 0 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h
97
+
106
+
98
+static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
107
+VSHLL_TU 111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_b
99
+{
108
+VSHLL_TU 111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h
100
+ gen_helper_neon_negl_u32(rm, rm);
109
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
101
+ gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
110
index XXXXXXX..XXXXXXX 100644
102
+}
111
--- a/target/arm/mve_helper.c
112
+++ b/target/arm/mve_helper.c
113
@@ -XXX,XX +XXX,XX @@ DO_2SHIFT_SAT_S(vqshli_s, DO_SQSHL_OP)
114
DO_2SHIFT_SAT_S(vqshlui_s, DO_SUQSHL_OP)
115
DO_2SHIFT_U(vrshli_u, DO_VRSHLU)
116
DO_2SHIFT_S(vrshli_s, DO_VRSHLS)
103
+
117
+
104
+static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
118
+/*
105
+{
119
+ * Long shifts taking half-sized inputs from top or bottom of the input
106
+ tcg_gen_neg_i64(rm, rm);
120
+ * vector and producing a double-width result. ESIZE, TYPE are for
107
+ gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
121
+ * the input, and LESIZE, LTYPE for the output.
108
+}
122
+ * Unlike the normal shift helpers, we do not handle negative shift counts,
123
+ * because the long shift is strictly left-only.
124
+ */
125
+#define DO_VSHLL(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE) \
126
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
127
+ void *vm, uint32_t shift) \
128
+ { \
129
+ LTYPE *d = vd; \
130
+ TYPE *m = vm; \
131
+ uint16_t mask = mve_element_mask(env); \
132
+ unsigned le; \
133
+ assert(shift <= 16); \
134
+ for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
135
+ LTYPE r = (LTYPE)m[H##ESIZE(le * 2 + TOP)] << shift; \
136
+ mergemask(&d[H##LESIZE(le)], r, mask); \
137
+ } \
138
+ mve_advance_vpt(env); \
139
+ }
109
+
140
+
110
+static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
141
+#define DO_VSHLL_ALL(OP, TOP) \
111
+{
142
+ DO_VSHLL(OP##sb, TOP, 1, int8_t, 2, int16_t) \
112
+ static NeonGenTwoOpWidenFn * const opfn[] = {
143
+ DO_VSHLL(OP##ub, TOP, 1, uint8_t, 2, uint16_t) \
113
+ NULL,
144
+ DO_VSHLL(OP##sh, TOP, 2, int16_t, 4, int32_t) \
114
+ gen_VQDMULL_16,
145
+ DO_VSHLL(OP##uh, TOP, 2, uint16_t, 4, uint32_t) \
115
+ gen_VQDMULL_32,
116
+ NULL,
117
+ };
118
+ static NeonGenTwo64OpFn * const accfn[] = {
119
+ NULL,
120
+ gen_VQDMLSL_acc_16,
121
+ gen_VQDMLSL_acc_32,
122
+ NULL,
123
+ };
124
+
146
+
125
+ return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
147
+DO_VSHLL_ALL(vshllb, false)
126
+}
148
+DO_VSHLL_ALL(vshllt, true)
127
diff --git a/target/arm/translate.c b/target/arm/translate.c
149
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
128
index XXXXXXX..XXXXXXX 100644
150
index XXXXXXX..XXXXXXX 100644
129
--- a/target/arm/translate.c
151
--- a/target/arm/translate-mve.c
130
+++ b/target/arm/translate.c
152
+++ b/target/arm/translate-mve.c
131
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
153
@@ -XXX,XX +XXX,XX @@ DO_2SHIFT(VSHRI_S, vshli_s, true)
132
{0, 0, 0, 7}, /* VSUBHN: handled by decodetree */
154
DO_2SHIFT(VSHRI_U, vshli_u, true)
133
{0, 0, 0, 7}, /* VABDL */
155
DO_2SHIFT(VRSHRI_S, vrshli_s, true)
134
{0, 0, 0, 7}, /* VMLAL */
156
DO_2SHIFT(VRSHRI_U, vrshli_u, true)
135
- {0, 0, 0, 9}, /* VQDMLAL */
157
+
136
+ {0, 0, 0, 7}, /* VQDMLAL */
158
+#define DO_VSHLL(INSN, FN) \
137
{0, 0, 0, 7}, /* VMLSL */
159
+ static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
138
- {0, 0, 0, 9}, /* VQDMLSL */
160
+ { \
139
+ {0, 0, 0, 7}, /* VQDMLSL */
161
+ static MVEGenTwoOpShiftFn * const fns[] = { \
140
{0, 0, 0, 7}, /* Integer VMULL */
162
+ gen_helper_mve_##FN##b, \
141
- {0, 0, 0, 9}, /* VQDMULL */
163
+ gen_helper_mve_##FN##h, \
142
+ {0, 0, 0, 7}, /* VQDMULL */
164
+ }; \
143
{0, 0, 0, 0xa}, /* Polynomial VMULL */
165
+ return do_2shift(s, a, fns[a->size], false); \
144
{0, 0, 0, 7}, /* Reserved: always UNDEF */
166
+ }
145
};
167
+
146
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
168
+DO_VSHLL(VSHLL_BS, vshllbs)
147
}
169
+DO_VSHLL(VSHLL_BU, vshllbu)
148
return 0;
170
+DO_VSHLL(VSHLL_TS, vshllts)
149
}
171
+DO_VSHLL(VSHLL_TU, vshlltu)
150
-
151
- /* Avoid overlapping operands. Wide source operands are
152
- always aligned so will never overlap with wide
153
- destinations in problematic ways. */
154
- if (rd == rm) {
155
- tmp = neon_load_reg(rm, 1);
156
- neon_store_scratch(2, tmp);
157
- } else if (rd == rn) {
158
- tmp = neon_load_reg(rn, 1);
159
- neon_store_scratch(2, tmp);
160
- }
161
- tmp3 = NULL;
162
- for (pass = 0; pass < 2; pass++) {
163
- if (pass == 1 && rd == rn) {
164
- tmp = neon_load_scratch(2);
165
- } else {
166
- tmp = neon_load_reg(rn, pass);
167
- }
168
- if (pass == 1 && rd == rm) {
169
- tmp2 = neon_load_scratch(2);
170
- } else {
171
- tmp2 = neon_load_reg(rm, pass);
172
- }
173
- switch (op) {
174
- case 9: case 11: case 13:
175
- /* VQDMLAL, VQDMLSL, VQDMULL */
176
- gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
177
- break;
178
- default: /* 15 is RESERVED: caught earlier */
179
- abort();
180
- }
181
- if (op == 13) {
182
- /* VQDMULL */
183
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
184
- neon_store_reg64(cpu_V0, rd + pass);
185
- } else {
186
- /* Accumulate. */
187
- neon_load_reg64(cpu_V1, rd + pass);
188
- switch (op) {
189
- case 9: case 11: /* VQDMLAL, VQDMLSL */
190
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
191
- if (op == 11) {
192
- gen_neon_negl(cpu_V0, size);
193
- }
194
- gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
195
- break;
196
- default:
197
- abort();
198
- }
199
- neon_store_reg64(cpu_V0, rd + pass);
200
- }
201
- }
202
+ abort(); /* all others handled by decodetree */
203
} else {
204
/* Two registers and a scalar. NB that for ops of this form
205
* the ARM ARM labels bit 24 as Q, but it is in our variable
206
--
172
--
207
2.20.1
173
2.20.1
208
174
209
175
diff view generated by jsdifflib
1
The widenfn() in do_vshll_2sh() does not free the input 32-bit
1
Implement the MVE VSRI and VSLI insns, which perform a
2
TCGv, so we need to do this in the calling code.
2
shift-and-insert operation.
3
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Message-id: 20210628135835.6690-11-peter.maydell@linaro.org
7
---
7
---
8
target/arm/translate-neon.inc.c | 2 ++
8
target/arm/helper-mve.h | 8 ++++++++
9
1 file changed, 2 insertions(+)
9
target/arm/mve.decode | 9 ++++++++
10
target/arm/mve_helper.c | 42 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-mve.c | 3 +++
12
4 files changed, 62 insertions(+)
10
13
11
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
14
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
12
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-neon.inc.c
16
--- a/target/arm/helper-mve.h
14
+++ b/target/arm/translate-neon.inc.c
17
+++ b/target/arm/helper-mve.h
15
@@ -XXX,XX +XXX,XX @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vshlltsb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
16
tmp = tcg_temp_new_i64();
19
DEF_HELPER_FLAGS_4(mve_vshlltsh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
17
20
DEF_HELPER_FLAGS_4(mve_vshlltub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
18
widenfn(tmp, rm0);
21
DEF_HELPER_FLAGS_4(mve_vshlltuh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
19
+ tcg_temp_free_i32(rm0);
22
+
20
if (a->shift != 0) {
23
+DEF_HELPER_FLAGS_4(mve_vsrib, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
21
tcg_gen_shli_i64(tmp, tmp, a->shift);
24
+DEF_HELPER_FLAGS_4(mve_vsrih, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
22
tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
25
+DEF_HELPER_FLAGS_4(mve_vsriw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
23
@@ -XXX,XX +XXX,XX @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
26
+
24
neon_store_reg64(tmp, a->vd);
27
+DEF_HELPER_FLAGS_4(mve_vslib, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
25
28
+DEF_HELPER_FLAGS_4(mve_vslih, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
26
widenfn(tmp, rm1);
29
+DEF_HELPER_FLAGS_4(mve_vsliw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
27
+ tcg_temp_free_i32(rm1);
30
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
28
if (a->shift != 0) {
31
index XXXXXXX..XXXXXXX 100644
29
tcg_gen_shli_i64(tmp, tmp, a->shift);
32
--- a/target/arm/mve.decode
30
tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
33
+++ b/target/arm/mve.decode
34
@@ -XXX,XX +XXX,XX @@ VSHLL_TS 111 0 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h
35
36
VSHLL_TU 111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_b
37
VSHLL_TU 111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h
38
+
39
+# Shift-and-insert
40
+VSRI 111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_b
41
+VSRI 111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_h
42
+VSRI 111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_w
43
+
44
+VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_b
45
+VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_h
46
+VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_w
47
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/mve_helper.c
50
+++ b/target/arm/mve_helper.c
51
@@ -XXX,XX +XXX,XX @@ DO_2SHIFT_SAT_S(vqshlui_s, DO_SUQSHL_OP)
52
DO_2SHIFT_U(vrshli_u, DO_VRSHLU)
53
DO_2SHIFT_S(vrshli_s, DO_VRSHLS)
54
55
+/* Shift-and-insert; we always work with 64 bits at a time */
56
+#define DO_2SHIFT_INSERT(OP, ESIZE, SHIFTFN, MASKFN) \
57
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
58
+ void *vm, uint32_t shift) \
59
+ { \
60
+ uint64_t *d = vd, *m = vm; \
61
+ uint16_t mask; \
62
+ uint64_t shiftmask; \
63
+ unsigned e; \
64
+ if (shift == 0 || shift == ESIZE * 8) { \
65
+ /* \
66
+ * Only VSLI can shift by 0; only VSRI can shift by <dt>. \
67
+ * The generic logic would give the right answer for 0 but \
68
+ * fails for <dt>. \
69
+ */ \
70
+ goto done; \
71
+ } \
72
+ assert(shift < ESIZE * 8); \
73
+ mask = mve_element_mask(env); \
74
+ /* ESIZE / 2 gives the MO_* value if ESIZE is in [1,2,4] */ \
75
+ shiftmask = dup_const(ESIZE / 2, MASKFN(ESIZE * 8, shift)); \
76
+ for (e = 0; e < 16 / 8; e++, mask >>= 8) { \
77
+ uint64_t r = (SHIFTFN(m[H8(e)], shift) & shiftmask) | \
78
+ (d[H8(e)] & ~shiftmask); \
79
+ mergemask(&d[H8(e)], r, mask); \
80
+ } \
81
+done: \
82
+ mve_advance_vpt(env); \
83
+ }
84
+
85
+#define DO_SHL(N, SHIFT) ((N) << (SHIFT))
86
+#define DO_SHR(N, SHIFT) ((N) >> (SHIFT))
87
+#define SHL_MASK(EBITS, SHIFT) MAKE_64BIT_MASK((SHIFT), (EBITS) - (SHIFT))
88
+#define SHR_MASK(EBITS, SHIFT) MAKE_64BIT_MASK(0, (EBITS) - (SHIFT))
89
+
90
+DO_2SHIFT_INSERT(vsrib, 1, DO_SHR, SHR_MASK)
91
+DO_2SHIFT_INSERT(vsrih, 2, DO_SHR, SHR_MASK)
92
+DO_2SHIFT_INSERT(vsriw, 4, DO_SHR, SHR_MASK)
93
+DO_2SHIFT_INSERT(vslib, 1, DO_SHL, SHL_MASK)
94
+DO_2SHIFT_INSERT(vslih, 2, DO_SHL, SHL_MASK)
95
+DO_2SHIFT_INSERT(vsliw, 4, DO_SHL, SHL_MASK)
96
+
97
/*
98
* Long shifts taking half-sized inputs from top or bottom of the input
99
* vector and producing a double-width result. ESIZE, TYPE are for
100
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
101
index XXXXXXX..XXXXXXX 100644
102
--- a/target/arm/translate-mve.c
103
+++ b/target/arm/translate-mve.c
104
@@ -XXX,XX +XXX,XX @@ DO_2SHIFT(VSHRI_U, vshli_u, true)
105
DO_2SHIFT(VRSHRI_S, vrshli_s, true)
106
DO_2SHIFT(VRSHRI_U, vrshli_u, true)
107
108
+DO_2SHIFT(VSRI, vsri, false)
109
+DO_2SHIFT(VSLI, vsli, false)
110
+
111
#define DO_VSHLL(INSN, FN) \
112
static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
113
{ \
31
--
114
--
32
2.20.1
115
2.20.1
33
116
34
117
diff view generated by jsdifflib
1
Convert the float versions of VMLA, VMLS and VMUL in the Neon
1
Implement the MVE shift-right-and-narrow insn VSHRN and VRSHRN.
2
2-reg-scalar group to decodetree.
2
3
do_urshr() is borrowed from sve_helper.c.
3
4
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20210628135835.6690-12-peter.maydell@linaro.org
5
---
8
---
6
As noted in the comment on the WRAP_FP_FN macro, we could have
9
target/arm/helper-mve.h | 10 ++++++++++
7
had a do_2scalar_fp() function, but for 3 insns it seemed
10
target/arm/mve.decode | 11 +++++++++++
8
simpler to just do the wrapping to get hold of the fpstatus ptr.
11
target/arm/mve_helper.c | 40 ++++++++++++++++++++++++++++++++++++++
9
(These are the only fp insns in the group.)
12
target/arm/translate-mve.c | 15 ++++++++++++++
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
4 files changed, 76 insertions(+)
11
---
12
target/arm/neon-dp.decode | 3 ++
13
target/arm/translate-neon.inc.c | 65 +++++++++++++++++++++++++++++++++
14
target/arm/translate.c | 37 ++-----------------
15
3 files changed, 71 insertions(+), 34 deletions(-)
16
14
17
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
18
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/neon-dp.decode
17
--- a/target/arm/helper-mve.h
20
+++ b/target/arm/neon-dp.decode
18
+++ b/target/arm/helper-mve.h
21
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vsriw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
22
&2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp
20
DEF_HELPER_FLAGS_4(mve_vslib, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
23
21
DEF_HELPER_FLAGS_4(mve_vslih, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
24
VMLA_2sc 1111 001 . 1 . .. .... .... 0000 . 1 . 0 .... @2scalar
22
DEF_HELPER_FLAGS_4(mve_vsliw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
25
+ VMLA_F_2sc 1111 001 . 1 . .. .... .... 0001 . 1 . 0 .... @2scalar
23
+
26
24
+DEF_HELPER_FLAGS_4(mve_vshrnbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
27
VMLS_2sc 1111 001 . 1 . .. .... .... 0100 . 1 . 0 .... @2scalar
25
+DEF_HELPER_FLAGS_4(mve_vshrnbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
28
+ VMLS_F_2sc 1111 001 . 1 . .. .... .... 0101 . 1 . 0 .... @2scalar
26
+DEF_HELPER_FLAGS_4(mve_vshrntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
29
27
+DEF_HELPER_FLAGS_4(mve_vshrnth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
30
VMUL_2sc 1111 001 . 1 . .. .... .... 1000 . 1 . 0 .... @2scalar
28
+
31
+ VMUL_F_2sc 1111 001 . 1 . .. .... .... 1001 . 1 . 0 .... @2scalar
29
+DEF_HELPER_FLAGS_4(mve_vrshrnbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
32
]
30
+DEF_HELPER_FLAGS_4(mve_vrshrnbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
33
}
31
+DEF_HELPER_FLAGS_4(mve_vrshrntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
34
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
32
+DEF_HELPER_FLAGS_4(mve_vrshrnth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
33
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
35
index XXXXXXX..XXXXXXX 100644
34
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/translate-neon.inc.c
35
--- a/target/arm/mve.decode
37
+++ b/target/arm/translate-neon.inc.c
36
+++ b/target/arm/mve.decode
38
@@ -XXX,XX +XXX,XX @@ static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
37
@@ -XXX,XX +XXX,XX @@ VSRI 111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_w
39
38
VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_b
40
return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
39
VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_h
41
}
40
VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_w
41
+
42
+# Narrowing shifts (which only support b and h sizes)
43
+VSHRNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_b
44
+VSHRNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_h
45
+VSHRNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_b
46
+VSHRNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_h
47
+
48
+VRSHRNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_b
49
+VRSHRNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_h
50
+VRSHRNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_b
51
+VRSHRNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_h
52
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/target/arm/mve_helper.c
55
+++ b/target/arm/mve_helper.c
56
@@ -XXX,XX +XXX,XX @@ DO_2SHIFT_INSERT(vsliw, 4, DO_SHL, SHL_MASK)
57
58
DO_VSHLL_ALL(vshllb, false)
59
DO_VSHLL_ALL(vshllt, true)
42
+
60
+
43
+/*
61
+/*
44
+ * Rather than have a float-specific version of do_2scalar just for
62
+ * Narrowing right shifts, taking a double sized input, shifting it
45
+ * three insns, we wrap a NeonGenTwoSingleOpFn to turn it into
63
+ * and putting the result in either the top or bottom half of the output.
46
+ * a NeonGenTwoOpFn.
64
+ * ESIZE, TYPE are the output, and LESIZE, LTYPE the input.
47
+ */
65
+ */
48
+#define WRAP_FP_FN(WRAPNAME, FUNC) \
66
+#define DO_VSHRN(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN) \
49
+ static void WRAPNAME(TCGv_i32 rd, TCGv_i32 rn, TCGv_i32 rm) \
67
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
68
+ void *vm, uint32_t shift) \
50
+ { \
69
+ { \
51
+ TCGv_ptr fpstatus = get_fpstatus_ptr(1); \
70
+ LTYPE *m = vm; \
52
+ FUNC(rd, rn, rm, fpstatus); \
71
+ TYPE *d = vd; \
53
+ tcg_temp_free_ptr(fpstatus); \
72
+ uint16_t mask = mve_element_mask(env); \
73
+ unsigned le; \
74
+ for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
75
+ TYPE r = FN(m[H##LESIZE(le)], shift); \
76
+ mergemask(&d[H##ESIZE(le * 2 + TOP)], r, mask); \
77
+ } \
78
+ mve_advance_vpt(env); \
54
+ }
79
+ }
55
+
80
+
56
+WRAP_FP_FN(gen_VMUL_F_mul, gen_helper_vfp_muls)
81
+#define DO_VSHRN_ALL(OP, FN) \
57
+WRAP_FP_FN(gen_VMUL_F_add, gen_helper_vfp_adds)
82
+ DO_VSHRN(OP##bb, false, 1, uint8_t, 2, uint16_t, FN) \
58
+WRAP_FP_FN(gen_VMUL_F_sub, gen_helper_vfp_subs)
83
+ DO_VSHRN(OP##bh, false, 2, uint16_t, 4, uint32_t, FN) \
84
+ DO_VSHRN(OP##tb, true, 1, uint8_t, 2, uint16_t, FN) \
85
+ DO_VSHRN(OP##th, true, 2, uint16_t, 4, uint32_t, FN)
59
+
86
+
60
+static bool trans_VMUL_F_2sc(DisasContext *s, arg_2scalar *a)
87
+static inline uint64_t do_urshr(uint64_t x, unsigned sh)
61
+{
88
+{
62
+ static NeonGenTwoOpFn * const opfn[] = {
89
+ if (likely(sh < 64)) {
63
+ NULL,
90
+ return (x >> sh) + ((x >> (sh - 1)) & 1);
64
+ NULL, /* TODO: fp16 support */
91
+ } else if (sh == 64) {
65
+ gen_VMUL_F_mul,
92
+ return x >> 63;
66
+ NULL,
93
+ } else {
67
+ };
94
+ return 0;
68
+
95
+ }
69
+ return do_2scalar(s, a, opfn[a->size], NULL);
70
+}
96
+}
71
+
97
+
72
+static bool trans_VMLA_F_2sc(DisasContext *s, arg_2scalar *a)
98
+DO_VSHRN_ALL(vshrn, DO_SHR)
73
+{
99
+DO_VSHRN_ALL(vrshrn, do_urshr)
74
+ static NeonGenTwoOpFn * const opfn[] = {
100
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
75
+ NULL,
101
index XXXXXXX..XXXXXXX 100644
76
+ NULL, /* TODO: fp16 support */
102
--- a/target/arm/translate-mve.c
77
+ gen_VMUL_F_mul,
103
+++ b/target/arm/translate-mve.c
78
+ NULL,
104
@@ -XXX,XX +XXX,XX @@ DO_VSHLL(VSHLL_BS, vshllbs)
79
+ };
105
DO_VSHLL(VSHLL_BU, vshllbu)
80
+ static NeonGenTwoOpFn * const accfn[] = {
106
DO_VSHLL(VSHLL_TS, vshllts)
81
+ NULL,
107
DO_VSHLL(VSHLL_TU, vshlltu)
82
+ NULL, /* TODO: fp16 support */
83
+ gen_VMUL_F_add,
84
+ NULL,
85
+ };
86
+
108
+
87
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
109
+#define DO_2SHIFT_N(INSN, FN) \
88
+}
110
+ static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
111
+ { \
112
+ static MVEGenTwoOpShiftFn * const fns[] = { \
113
+ gen_helper_mve_##FN##b, \
114
+ gen_helper_mve_##FN##h, \
115
+ }; \
116
+ return do_2shift(s, a, fns[a->size], false); \
117
+ }
89
+
118
+
90
+static bool trans_VMLS_F_2sc(DisasContext *s, arg_2scalar *a)
119
+DO_2SHIFT_N(VSHRNB, vshrnb)
91
+{
120
+DO_2SHIFT_N(VSHRNT, vshrnt)
92
+ static NeonGenTwoOpFn * const opfn[] = {
121
+DO_2SHIFT_N(VRSHRNB, vrshrnb)
93
+ NULL,
122
+DO_2SHIFT_N(VRSHRNT, vrshrnt)
94
+ NULL, /* TODO: fp16 support */
95
+ gen_VMUL_F_mul,
96
+ NULL,
97
+ };
98
+ static NeonGenTwoOpFn * const accfn[] = {
99
+ NULL,
100
+ NULL, /* TODO: fp16 support */
101
+ gen_VMUL_F_sub,
102
+ NULL,
103
+ };
104
+
105
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
106
+}
107
diff --git a/target/arm/translate.c b/target/arm/translate.c
108
index XXXXXXX..XXXXXXX 100644
109
--- a/target/arm/translate.c
110
+++ b/target/arm/translate.c
111
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
112
case 0: /* Integer VMLA scalar */
113
case 4: /* Integer VMLS scalar */
114
case 8: /* Integer VMUL scalar */
115
- return 1; /* handled by decodetree */
116
-
117
case 1: /* Float VMLA scalar */
118
case 5: /* Floating point VMLS scalar */
119
case 9: /* Floating point VMUL scalar */
120
- if (size == 1) {
121
- return 1;
122
- }
123
- /* fall through */
124
+ return 1; /* handled by decodetree */
125
+
126
case 12: /* VQDMULH scalar */
127
case 13: /* VQRDMULH scalar */
128
if (u && ((rd | rn) & 1)) {
129
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
130
} else {
131
gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
132
}
133
- } else if (op == 13) {
134
+ } else {
135
if (size == 1) {
136
gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
137
} else {
138
gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
139
}
140
- } else {
141
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
142
- gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
143
- tcg_temp_free_ptr(fpstatus);
144
}
145
tcg_temp_free_i32(tmp2);
146
- if (op < 8) {
147
- /* Accumulate. */
148
- tmp2 = neon_load_reg(rd, pass);
149
- switch (op) {
150
- case 1:
151
- {
152
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
153
- gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
154
- tcg_temp_free_ptr(fpstatus);
155
- break;
156
- }
157
- case 5:
158
- {
159
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
160
- gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
161
- tcg_temp_free_ptr(fpstatus);
162
- break;
163
- }
164
- default:
165
- abort();
166
- }
167
- tcg_temp_free_i32(tmp2);
168
- }
169
neon_store_reg(rd, pass, tmp);
170
}
171
break;
172
--
123
--
173
2.20.1
124
2.20.1
174
125
175
126
diff view generated by jsdifflib
1
Convert the Neon 3-reg-diff insns VMULL, VMLAL and VMLSL; these perform
1
Implement the MVE saturating shift-right-and-narrow insns
2
a 32x32->64 multiply with possible accumulate.
2
VQSHRN, VQSHRUN, VQRSHRN and VQRSHRUN.
3
3
4
Note that for VMLSL we do the accumulate directly with a subtraction
4
do_srshr() is borrowed from sve_helper.c.
5
rather than doing a negate-then-add as the old code did.
6
5
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210628135835.6690-13-peter.maydell@linaro.org
9
---
9
---
10
target/arm/neon-dp.decode | 9 +++++
10
target/arm/helper-mve.h | 30 +++++++++++
11
target/arm/translate-neon.inc.c | 71 +++++++++++++++++++++++++++++++++
11
target/arm/mve.decode | 28 ++++++++++
12
target/arm/translate.c | 21 +++-------
12
target/arm/mve_helper.c | 104 +++++++++++++++++++++++++++++++++++++
13
3 files changed, 86 insertions(+), 15 deletions(-)
13
target/arm/translate-mve.c | 12 +++++
14
14
4 files changed, 174 insertions(+)
15
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
16
index XXXXXXX..XXXXXXX 100644
16
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
17
--- a/target/arm/neon-dp.decode
17
index XXXXXXX..XXXXXXX 100644
18
+++ b/target/arm/neon-dp.decode
18
--- a/target/arm/helper-mve.h
19
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
19
+++ b/target/arm/helper-mve.h
20
20
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vrshrnbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
21
VABDL_S_3d 1111 001 0 1 . .. .... .... 0111 . 0 . 0 .... @3diff
21
DEF_HELPER_FLAGS_4(mve_vrshrnbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
22
VABDL_U_3d 1111 001 1 1 . .. .... .... 0111 . 0 . 0 .... @3diff
22
DEF_HELPER_FLAGS_4(mve_vrshrntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
23
+
23
DEF_HELPER_FLAGS_4(mve_vrshrnth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
24
+ VMLAL_S_3d 1111 001 0 1 . .. .... .... 1000 . 0 . 0 .... @3diff
24
+
25
+ VMLAL_U_3d 1111 001 1 1 . .. .... .... 1000 . 0 . 0 .... @3diff
25
+DEF_HELPER_FLAGS_4(mve_vqshrnb_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
26
+
26
+DEF_HELPER_FLAGS_4(mve_vqshrnb_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
27
+ VMLSL_S_3d 1111 001 0 1 . .. .... .... 1010 . 0 . 0 .... @3diff
27
+DEF_HELPER_FLAGS_4(mve_vqshrnt_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
28
+ VMLSL_U_3d 1111 001 1 1 . .. .... .... 1010 . 0 . 0 .... @3diff
28
+DEF_HELPER_FLAGS_4(mve_vqshrnt_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
29
+
29
+
30
+ VMULL_S_3d 1111 001 0 1 . .. .... .... 1100 . 0 . 0 .... @3diff
30
+DEF_HELPER_FLAGS_4(mve_vqshrnb_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
31
+ VMULL_U_3d 1111 001 1 1 . .. .... .... 1100 . 0 . 0 .... @3diff
31
+DEF_HELPER_FLAGS_4(mve_vqshrnb_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
32
]
32
+DEF_HELPER_FLAGS_4(mve_vqshrnt_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
33
+DEF_HELPER_FLAGS_4(mve_vqshrnt_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
34
+
35
+DEF_HELPER_FLAGS_4(mve_vqshrunbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_4(mve_vqshrunbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
37
+DEF_HELPER_FLAGS_4(mve_vqshruntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_4(mve_vqshrunth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
39
+
40
+DEF_HELPER_FLAGS_4(mve_vqrshrnb_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
41
+DEF_HELPER_FLAGS_4(mve_vqrshrnb_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
42
+DEF_HELPER_FLAGS_4(mve_vqrshrnt_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
43
+DEF_HELPER_FLAGS_4(mve_vqrshrnt_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
44
+
45
+DEF_HELPER_FLAGS_4(mve_vqrshrnb_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
46
+DEF_HELPER_FLAGS_4(mve_vqrshrnb_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
47
+DEF_HELPER_FLAGS_4(mve_vqrshrnt_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
48
+DEF_HELPER_FLAGS_4(mve_vqrshrnt_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
49
+
50
+DEF_HELPER_FLAGS_4(mve_vqrshrunbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
51
+DEF_HELPER_FLAGS_4(mve_vqrshrunbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
52
+DEF_HELPER_FLAGS_4(mve_vqrshruntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
53
+DEF_HELPER_FLAGS_4(mve_vqrshrunth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
54
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
55
index XXXXXXX..XXXXXXX 100644
56
--- a/target/arm/mve.decode
57
+++ b/target/arm/mve.decode
58
@@ -XXX,XX +XXX,XX @@ VRSHRNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_b
59
VRSHRNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_h
60
VRSHRNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_b
61
VRSHRNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_h
62
+
63
+VQSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_b
64
+VQSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_h
65
+VQSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_b
66
+VQSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_h
67
+VQSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_b
68
+VQSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_h
69
+VQSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_b
70
+VQSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_h
71
+
72
+VQSHRUNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_b
73
+VQSHRUNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_h
74
+VQSHRUNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_b
75
+VQSHRUNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_h
76
+
77
+VQRSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_b
78
+VQRSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_h
79
+VQRSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_b
80
+VQRSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_h
81
+VQRSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_b
82
+VQRSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_h
83
+VQRSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_b
84
+VQRSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_h
85
+
86
+VQRSHRUNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_b
87
+VQRSHRUNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_h
88
+VQRSHRUNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_b
89
+VQRSHRUNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_h
90
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/arm/mve_helper.c
93
+++ b/target/arm/mve_helper.c
94
@@ -XXX,XX +XXX,XX @@ static inline uint64_t do_urshr(uint64_t x, unsigned sh)
95
}
33
}
96
}
34
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
97
35
index XXXXXXX..XXXXXXX 100644
98
+static inline int64_t do_srshr(int64_t x, unsigned sh)
36
--- a/target/arm/translate-neon.inc.c
37
+++ b/target/arm/translate-neon.inc.c
38
@@ -XXX,XX +XXX,XX @@ static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
39
40
return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
41
}
42
+
43
+static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
44
+{
99
+{
45
+ TCGv_i32 lo = tcg_temp_new_i32();
100
+ if (likely(sh < 64)) {
46
+ TCGv_i32 hi = tcg_temp_new_i32();
101
+ return (x >> sh) + ((x >> (sh - 1)) & 1);
47
+
102
+ } else {
48
+ tcg_gen_muls2_i32(lo, hi, rn, rm);
103
+ /* Rounding the sign bit always produces 0. */
49
+ tcg_gen_concat_i32_i64(rd, lo, hi);
104
+ return 0;
50
+
105
+ }
51
+ tcg_temp_free_i32(lo);
52
+ tcg_temp_free_i32(hi);
53
+}
106
+}
54
+
107
+
55
+static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
108
DO_VSHRN_ALL(vshrn, DO_SHR)
109
DO_VSHRN_ALL(vrshrn, do_urshr)
110
+
111
+static inline int32_t do_sat_bhs(int64_t val, int64_t min, int64_t max,
112
+ bool *satp)
56
+{
113
+{
57
+ TCGv_i32 lo = tcg_temp_new_i32();
114
+ if (val > max) {
58
+ TCGv_i32 hi = tcg_temp_new_i32();
115
+ *satp = true;
59
+
116
+ return max;
60
+ tcg_gen_mulu2_i32(lo, hi, rn, rm);
117
+ } else if (val < min) {
61
+ tcg_gen_concat_i32_i64(rd, lo, hi);
118
+ *satp = true;
62
+
119
+ return min;
63
+ tcg_temp_free_i32(lo);
120
+ } else {
64
+ tcg_temp_free_i32(hi);
121
+ return val;
122
+ }
65
+}
123
+}
66
+
124
+
67
+static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
125
+/* Saturating narrowing right shifts */
68
+{
126
+#define DO_VSHRN_SAT(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN) \
69
+ static NeonGenTwoOpWidenFn * const opfn[] = {
127
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
70
+ gen_helper_neon_mull_s8,
128
+ void *vm, uint32_t shift) \
71
+ gen_helper_neon_mull_s16,
129
+ { \
72
+ gen_mull_s32,
130
+ LTYPE *m = vm; \
73
+ NULL,
131
+ TYPE *d = vd; \
74
+ };
132
+ uint16_t mask = mve_element_mask(env); \
75
+
133
+ bool qc = false; \
76
+ return do_long_3d(s, a, opfn[a->size], NULL);
134
+ unsigned le; \
77
+}
135
+ for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
78
+
136
+ bool sat = false; \
79
+static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
137
+ TYPE r = FN(m[H##LESIZE(le)], shift, &sat); \
80
+{
138
+ mergemask(&d[H##ESIZE(le * 2 + TOP)], r, mask); \
81
+ static NeonGenTwoOpWidenFn * const opfn[] = {
139
+ qc |= sat && (mask & 1 << (TOP * ESIZE)); \
82
+ gen_helper_neon_mull_u8,
140
+ } \
83
+ gen_helper_neon_mull_u16,
141
+ if (qc) { \
84
+ gen_mull_u32,
142
+ env->vfp.qc[0] = qc; \
85
+ NULL,
143
+ } \
86
+ };
144
+ mve_advance_vpt(env); \
87
+
88
+ return do_long_3d(s, a, opfn[a->size], NULL);
89
+}
90
+
91
+#define DO_VMLAL(INSN,MULL,ACC) \
92
+ static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
93
+ { \
94
+ static NeonGenTwoOpWidenFn * const opfn[] = { \
95
+ gen_helper_neon_##MULL##8, \
96
+ gen_helper_neon_##MULL##16, \
97
+ gen_##MULL##32, \
98
+ NULL, \
99
+ }; \
100
+ static NeonGenTwo64OpFn * const accfn[] = { \
101
+ gen_helper_neon_##ACC##l_u16, \
102
+ gen_helper_neon_##ACC##l_u32, \
103
+ tcg_gen_##ACC##_i64, \
104
+ NULL, \
105
+ }; \
106
+ return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \
107
+ }
145
+ }
108
+
146
+
109
+DO_VMLAL(VMLAL_S,mull_s,add)
147
+#define DO_VSHRN_SAT_UB(BOP, TOP, FN) \
110
+DO_VMLAL(VMLAL_U,mull_u,add)
148
+ DO_VSHRN_SAT(BOP, false, 1, uint8_t, 2, uint16_t, FN) \
111
+DO_VMLAL(VMLSL_S,mull_s,sub)
149
+ DO_VSHRN_SAT(TOP, true, 1, uint8_t, 2, uint16_t, FN)
112
+DO_VMLAL(VMLSL_U,mull_u,sub)
150
+
113
diff --git a/target/arm/translate.c b/target/arm/translate.c
151
+#define DO_VSHRN_SAT_UH(BOP, TOP, FN) \
114
index XXXXXXX..XXXXXXX 100644
152
+ DO_VSHRN_SAT(BOP, false, 2, uint16_t, 4, uint32_t, FN) \
115
--- a/target/arm/translate.c
153
+ DO_VSHRN_SAT(TOP, true, 2, uint16_t, 4, uint32_t, FN)
116
+++ b/target/arm/translate.c
154
+
117
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
155
+#define DO_VSHRN_SAT_SB(BOP, TOP, FN) \
118
{0, 0, 0, 7}, /* VABAL */
156
+ DO_VSHRN_SAT(BOP, false, 1, int8_t, 2, int16_t, FN) \
119
{0, 0, 0, 7}, /* VSUBHN: handled by decodetree */
157
+ DO_VSHRN_SAT(TOP, true, 1, int8_t, 2, int16_t, FN)
120
{0, 0, 0, 7}, /* VABDL */
158
+
121
- {0, 0, 0, 0}, /* VMLAL */
159
+#define DO_VSHRN_SAT_SH(BOP, TOP, FN) \
122
+ {0, 0, 0, 7}, /* VMLAL */
160
+ DO_VSHRN_SAT(BOP, false, 2, int16_t, 4, int32_t, FN) \
123
{0, 0, 0, 9}, /* VQDMLAL */
161
+ DO_VSHRN_SAT(TOP, true, 2, int16_t, 4, int32_t, FN)
124
- {0, 0, 0, 0}, /* VMLSL */
162
+
125
+ {0, 0, 0, 7}, /* VMLSL */
163
+#define DO_SHRN_SB(N, M, SATP) \
126
{0, 0, 0, 9}, /* VQDMLSL */
164
+ do_sat_bhs((int64_t)(N) >> (M), INT8_MIN, INT8_MAX, SATP)
127
- {0, 0, 0, 0}, /* Integer VMULL */
165
+#define DO_SHRN_UB(N, M, SATP) \
128
+ {0, 0, 0, 7}, /* Integer VMULL */
166
+ do_sat_bhs((uint64_t)(N) >> (M), 0, UINT8_MAX, SATP)
129
{0, 0, 0, 9}, /* VQDMULL */
167
+#define DO_SHRUN_B(N, M, SATP) \
130
{0, 0, 0, 0xa}, /* Polynomial VMULL */
168
+ do_sat_bhs((int64_t)(N) >> (M), 0, UINT8_MAX, SATP)
131
{0, 0, 0, 7}, /* Reserved: always UNDEF */
169
+
132
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
170
+#define DO_SHRN_SH(N, M, SATP) \
133
tmp2 = neon_load_reg(rm, pass);
171
+ do_sat_bhs((int64_t)(N) >> (M), INT16_MIN, INT16_MAX, SATP)
134
}
172
+#define DO_SHRN_UH(N, M, SATP) \
135
switch (op) {
173
+ do_sat_bhs((uint64_t)(N) >> (M), 0, UINT16_MAX, SATP)
136
- case 8: case 9: case 10: case 11: case 12: case 13:
174
+#define DO_SHRUN_H(N, M, SATP) \
137
- /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
175
+ do_sat_bhs((int64_t)(N) >> (M), 0, UINT16_MAX, SATP)
138
+ case 9: case 11: case 13:
176
+
139
+ /* VQDMLAL, VQDMLSL, VQDMULL */
177
+#define DO_RSHRN_SB(N, M, SATP) \
140
gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
178
+ do_sat_bhs(do_srshr(N, M), INT8_MIN, INT8_MAX, SATP)
141
break;
179
+#define DO_RSHRN_UB(N, M, SATP) \
142
default: /* 15 is RESERVED: caught earlier */
180
+ do_sat_bhs(do_urshr(N, M), 0, UINT8_MAX, SATP)
143
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
181
+#define DO_RSHRUN_B(N, M, SATP) \
144
/* VQDMULL */
182
+ do_sat_bhs(do_srshr(N, M), 0, UINT8_MAX, SATP)
145
gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
183
+
146
neon_store_reg64(cpu_V0, rd + pass);
184
+#define DO_RSHRN_SH(N, M, SATP) \
147
- } else if (op == 5 || (op >= 8 && op <= 11)) {
185
+ do_sat_bhs(do_srshr(N, M), INT16_MIN, INT16_MAX, SATP)
148
+ } else {
186
+#define DO_RSHRN_UH(N, M, SATP) \
149
/* Accumulate. */
187
+ do_sat_bhs(do_urshr(N, M), 0, UINT16_MAX, SATP)
150
neon_load_reg64(cpu_V1, rd + pass);
188
+#define DO_RSHRUN_H(N, M, SATP) \
151
switch (op) {
189
+ do_sat_bhs(do_srshr(N, M), 0, UINT16_MAX, SATP)
152
- case 10: /* VMLSL */
190
+
153
- gen_neon_negl(cpu_V0, size);
191
+DO_VSHRN_SAT_SB(vqshrnb_sb, vqshrnt_sb, DO_SHRN_SB)
154
- /* Fall through */
192
+DO_VSHRN_SAT_SH(vqshrnb_sh, vqshrnt_sh, DO_SHRN_SH)
155
- case 8: /* VABAL, VMLAL */
193
+DO_VSHRN_SAT_UB(vqshrnb_ub, vqshrnt_ub, DO_SHRN_UB)
156
- gen_neon_addl(size);
194
+DO_VSHRN_SAT_UH(vqshrnb_uh, vqshrnt_uh, DO_SHRN_UH)
157
- break;
195
+DO_VSHRN_SAT_SB(vqshrunbb, vqshruntb, DO_SHRUN_B)
158
case 9: case 11: /* VQDMLAL, VQDMLSL */
196
+DO_VSHRN_SAT_SH(vqshrunbh, vqshrunth, DO_SHRUN_H)
159
gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
197
+
160
if (op == 11) {
198
+DO_VSHRN_SAT_SB(vqrshrnb_sb, vqrshrnt_sb, DO_RSHRN_SB)
161
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
199
+DO_VSHRN_SAT_SH(vqrshrnb_sh, vqrshrnt_sh, DO_RSHRN_SH)
162
abort();
200
+DO_VSHRN_SAT_UB(vqrshrnb_ub, vqrshrnt_ub, DO_RSHRN_UB)
163
}
201
+DO_VSHRN_SAT_UH(vqrshrnb_uh, vqrshrnt_uh, DO_RSHRN_UH)
164
neon_store_reg64(cpu_V0, rd + pass);
202
+DO_VSHRN_SAT_SB(vqrshrunbb, vqrshruntb, DO_RSHRUN_B)
165
- } else {
203
+DO_VSHRN_SAT_SH(vqrshrunbh, vqrshrunth, DO_RSHRUN_H)
166
- /* Write back the result. */
204
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
167
- neon_store_reg64(cpu_V0, rd + pass);
205
index XXXXXXX..XXXXXXX 100644
168
}
206
--- a/target/arm/translate-mve.c
169
}
207
+++ b/target/arm/translate-mve.c
170
} else {
208
@@ -XXX,XX +XXX,XX @@ DO_2SHIFT_N(VSHRNB, vshrnb)
209
DO_2SHIFT_N(VSHRNT, vshrnt)
210
DO_2SHIFT_N(VRSHRNB, vrshrnb)
211
DO_2SHIFT_N(VRSHRNT, vrshrnt)
212
+DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s)
213
+DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s)
214
+DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u)
215
+DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u)
216
+DO_2SHIFT_N(VQSHRUNB, vqshrunb)
217
+DO_2SHIFT_N(VQSHRUNT, vqshrunt)
218
+DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s)
219
+DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s)
220
+DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
221
+DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
222
+DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
223
+DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
171
--
224
--
172
2.20.1
225
2.20.1
173
226
174
227
diff view generated by jsdifflib
1
Convert the VQRDMLAH and VQRDMLSH insns in the 2-reg-scalar
1
Implement the MVE VSHLC insn, which performs a shift left of the
2
group to decodetree.
2
entire vector with carry in bits provided from a general purpose
3
register and carry out bits written back to that register.
3
4
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20210628135835.6690-14-peter.maydell@linaro.org
6
---
8
---
7
target/arm/neon-dp.decode | 3 ++
9
target/arm/helper-mve.h | 2 ++
8
target/arm/translate-neon.inc.c | 74 +++++++++++++++++++++++++++++++++
10
target/arm/mve.decode | 2 ++
9
target/arm/translate.c | 38 +----------------
11
target/arm/mve_helper.c | 38 ++++++++++++++++++++++++++++++++++++++
10
3 files changed, 79 insertions(+), 36 deletions(-)
12
target/arm/translate-mve.c | 30 ++++++++++++++++++++++++++++++
13
4 files changed, 72 insertions(+)
11
14
12
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
13
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/neon-dp.decode
17
--- a/target/arm/helper-mve.h
15
+++ b/target/arm/neon-dp.decode
18
+++ b/target/arm/helper-mve.h
16
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vqrshrunbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
17
20
DEF_HELPER_FLAGS_4(mve_vqrshrunbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
18
VQDMULH_2sc 1111 001 . 1 . .. .... .... 1100 . 1 . 0 .... @2scalar
21
DEF_HELPER_FLAGS_4(mve_vqrshruntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
19
VQRDMULH_2sc 1111 001 . 1 . .. .... .... 1101 . 1 . 0 .... @2scalar
22
DEF_HELPER_FLAGS_4(mve_vqrshrunth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
20
+
23
+
21
+ VQRDMLAH_2sc 1111 001 . 1 . .. .... .... 1110 . 1 . 0 .... @2scalar
24
+DEF_HELPER_FLAGS_4(mve_vshlc, TCG_CALL_NO_WG, i32, env, ptr, i32, i32)
22
+ VQRDMLSH_2sc 1111 001 . 1 . .. .... .... 1111 . 1 . 0 .... @2scalar
25
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
23
]
24
}
25
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
26
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
27
--- a/target/arm/translate-neon.inc.c
27
--- a/target/arm/mve.decode
28
+++ b/target/arm/translate-neon.inc.c
28
+++ b/target/arm/mve.decode
29
@@ -XXX,XX +XXX,XX @@ static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
29
@@ -XXX,XX +XXX,XX @@ VQRSHRUNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_b
30
30
VQRSHRUNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_h
31
return do_2scalar(s, a, opfn[a->size], NULL);
31
VQRSHRUNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_b
32
}
32
VQRSHRUNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_h
33
+
33
+
34
+static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
34
+VSHLC 111 0 1110 1 . 1 imm:5 ... 0 1111 1100 rdm:4 qd=%qd
35
+ NeonGenThreeOpEnvFn *opfn)
35
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/target/arm/mve_helper.c
38
+++ b/target/arm/mve_helper.c
39
@@ -XXX,XX +XXX,XX @@ DO_VSHRN_SAT_UB(vqrshrnb_ub, vqrshrnt_ub, DO_RSHRN_UB)
40
DO_VSHRN_SAT_UH(vqrshrnb_uh, vqrshrnt_uh, DO_RSHRN_UH)
41
DO_VSHRN_SAT_SB(vqrshrunbb, vqrshruntb, DO_RSHRUN_B)
42
DO_VSHRN_SAT_SH(vqrshrunbh, vqrshrunth, DO_RSHRUN_H)
43
+
44
+uint32_t HELPER(mve_vshlc)(CPUARMState *env, void *vd, uint32_t rdm,
45
+ uint32_t shift)
46
+{
47
+ uint32_t *d = vd;
48
+ uint16_t mask = mve_element_mask(env);
49
+ unsigned e;
50
+ uint32_t r;
51
+
52
+ /*
53
+ * For each 32-bit element, we shift it left, bringing in the
54
+ * low 'shift' bits of rdm at the bottom. Bits shifted out at
55
+ * the top become the new rdm, if the predicate mask permits.
56
+ * The final rdm value is returned to update the register.
57
+ * shift == 0 here means "shift by 32 bits".
58
+ */
59
+ if (shift == 0) {
60
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) {
61
+ r = rdm;
62
+ if (mask & 1) {
63
+ rdm = d[H4(e)];
64
+ }
65
+ mergemask(&d[H4(e)], r, mask);
66
+ }
67
+ } else {
68
+ uint32_t shiftmask = MAKE_64BIT_MASK(0, shift);
69
+
70
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) {
71
+ r = (d[H4(e)] << shift) | (rdm & shiftmask);
72
+ if (mask & 1) {
73
+ rdm = d[H4(e)] >> (32 - shift);
74
+ }
75
+ mergemask(&d[H4(e)], r, mask);
76
+ }
77
+ }
78
+ mve_advance_vpt(env);
79
+ return rdm;
80
+}
81
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
82
index XXXXXXX..XXXXXXX 100644
83
--- a/target/arm/translate-mve.c
84
+++ b/target/arm/translate-mve.c
85
@@ -XXX,XX +XXX,XX @@ DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
86
DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
87
DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
88
DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
89
+
90
+static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a)
36
+{
91
+{
37
+ /*
92
+ /*
38
+ * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
93
+ * Whole Vector Left Shift with Carry. The carry is taken
39
+ * performs a kind of fused op-then-accumulate using a helper
94
+ * from a general purpose register and written back there.
40
+ * function that takes all of rd, rn and the scalar at once.
95
+ * An imm of 0 means "shift by 32".
41
+ */
96
+ */
42
+ TCGv_i32 scalar;
97
+ TCGv_ptr qd;
43
+ int pass;
98
+ TCGv_i32 rdm;
44
+
99
+
45
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
100
+ if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
46
+ return false;
101
+ return false;
47
+ }
102
+ }
48
+
103
+ if (a->rdm == 13 || a->rdm == 15) {
49
+ if (!dc_isar_feature(aa32_rdm, s)) {
104
+ /* CONSTRAINED UNPREDICTABLE: we UNDEF */
50
+ return false;
105
+ return false;
51
+ }
106
+ }
52
+
107
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
53
+ /* UNDEF accesses to D16-D31 if they don't exist. */
54
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
55
+ ((a->vd | a->vn | a->vm) & 0x10)) {
56
+ return false;
57
+ }
58
+
59
+ if (!opfn) {
60
+ /* Bad size (including size == 3, which is a different insn group) */
61
+ return false;
62
+ }
63
+
64
+ if (a->q && ((a->vd | a->vn) & 1)) {
65
+ return false;
66
+ }
67
+
68
+ if (!vfp_access_check(s)) {
69
+ return true;
108
+ return true;
70
+ }
109
+ }
71
+
110
+
72
+ scalar = neon_get_scalar(a->size, a->vm);
111
+ qd = mve_qreg_ptr(a->qd);
73
+
112
+ rdm = load_reg(s, a->rdm);
74
+ for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
113
+ gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm));
75
+ TCGv_i32 rn = neon_load_reg(a->vn, pass);
114
+ store_reg(s, a->rdm, rdm);
76
+ TCGv_i32 rd = neon_load_reg(a->vd, pass);
115
+ tcg_temp_free_ptr(qd);
77
+ opfn(rd, cpu_env, rn, scalar, rd);
116
+ mve_update_eci(s);
78
+ tcg_temp_free_i32(rn);
79
+ neon_store_reg(a->vd, pass, rd);
80
+ }
81
+ tcg_temp_free_i32(scalar);
82
+
83
+ return true;
117
+ return true;
84
+}
118
+}
85
+
86
+static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
87
+{
88
+ static NeonGenThreeOpEnvFn *opfn[] = {
89
+ NULL,
90
+ gen_helper_neon_qrdmlah_s16,
91
+ gen_helper_neon_qrdmlah_s32,
92
+ NULL,
93
+ };
94
+ return do_vqrdmlah_2sc(s, a, opfn[a->size]);
95
+}
96
+
97
+static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
98
+{
99
+ static NeonGenThreeOpEnvFn *opfn[] = {
100
+ NULL,
101
+ gen_helper_neon_qrdmlsh_s16,
102
+ gen_helper_neon_qrdmlsh_s32,
103
+ NULL,
104
+ };
105
+ return do_vqrdmlah_2sc(s, a, opfn[a->size]);
106
+}
107
diff --git a/target/arm/translate.c b/target/arm/translate.c
108
index XXXXXXX..XXXXXXX 100644
109
--- a/target/arm/translate.c
110
+++ b/target/arm/translate.c
111
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
112
case 9: /* Floating point VMUL scalar */
113
case 12: /* VQDMULH scalar */
114
case 13: /* VQRDMULH scalar */
115
+ case 14: /* VQRDMLAH scalar */
116
+ case 15: /* VQRDMLSH scalar */
117
return 1; /* handled by decodetree */
118
119
case 3: /* VQDMLAL scalar */
120
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
121
neon_store_reg64(cpu_V0, rd + pass);
122
}
123
break;
124
- case 14: /* VQRDMLAH scalar */
125
- case 15: /* VQRDMLSH scalar */
126
- {
127
- NeonGenThreeOpEnvFn *fn;
128
-
129
- if (!dc_isar_feature(aa32_rdm, s)) {
130
- return 1;
131
- }
132
- if (u && ((rd | rn) & 1)) {
133
- return 1;
134
- }
135
- if (op == 14) {
136
- if (size == 1) {
137
- fn = gen_helper_neon_qrdmlah_s16;
138
- } else {
139
- fn = gen_helper_neon_qrdmlah_s32;
140
- }
141
- } else {
142
- if (size == 1) {
143
- fn = gen_helper_neon_qrdmlsh_s16;
144
- } else {
145
- fn = gen_helper_neon_qrdmlsh_s32;
146
- }
147
- }
148
-
149
- tmp2 = neon_get_scalar(size, rm);
150
- for (pass = 0; pass < (u ? 4 : 2); pass++) {
151
- tmp = neon_load_reg(rn, pass);
152
- tmp3 = neon_load_reg(rd, pass);
153
- fn(tmp, cpu_env, tmp, tmp2, tmp3);
154
- tcg_temp_free_i32(tmp3);
155
- neon_store_reg(rd, pass, tmp);
156
- }
157
- tcg_temp_free_i32(tmp2);
158
- }
159
- break;
160
default:
161
g_assert_not_reached();
162
}
163
--
119
--
164
2.20.1
120
2.20.1
165
121
166
122
diff view generated by jsdifflib
1
Convert the VMLA, VMLS and VMUL insns in the Neon "2 registers and a
1
Implement the MVE VADDLV insn; this is similar to VADDV, except
2
scalar" group to decodetree. These are 32x32->32 operations where
2
that it accumulates 32-bit elements into a 64-bit accumulator
3
one of the inputs is the scalar, followed by a possible accumulate
3
stored in a pair of general-purpose registers.
4
operation of the 32-bit result.
5
6
The refactoring removes some of the oddities of the old decoder:
7
* operands to the operation and accumulation were often
8
reversed (taking advantage of the fact that most of these ops
9
are commutative); the new code follows the pseudocode order
10
* the Q bit in the insn was in a local variable 'u'; in the
11
new code it is decoded into a->q
12
4
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20210628135835.6690-15-peter.maydell@linaro.org
15
---
8
---
16
target/arm/neon-dp.decode | 15 ++++
9
target/arm/helper-mve.h | 3 ++
17
target/arm/translate-neon.inc.c | 133 ++++++++++++++++++++++++++++++++
10
target/arm/mve.decode | 6 +++-
18
target/arm/translate.c | 77 ++----------------
11
target/arm/mve_helper.c | 19 ++++++++++++
19
3 files changed, 154 insertions(+), 71 deletions(-)
12
target/arm/translate-mve.c | 63 ++++++++++++++++++++++++++++++++++++++
13
4 files changed, 90 insertions(+), 1 deletion(-)
20
14
21
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
22
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
23
--- a/target/arm/neon-dp.decode
17
--- a/target/arm/helper-mve.h
24
+++ b/target/arm/neon-dp.decode
18
+++ b/target/arm/helper-mve.h
25
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(mve_vaddvuh, TCG_CALL_NO_WG, i32, env, ptr, i32)
26
VQDMULL_3d 1111 001 0 1 . .. .... .... 1101 . 0 . 0 .... @3diff
20
DEF_HELPER_FLAGS_3(mve_vaddvsw, TCG_CALL_NO_WG, i32, env, ptr, i32)
27
21
DEF_HELPER_FLAGS_3(mve_vaddvuw, TCG_CALL_NO_WG, i32, env, ptr, i32)
28
VMULL_P_3d 1111 001 0 1 . .. .... .... 1110 . 0 . 0 .... @3diff
22
23
+DEF_HELPER_FLAGS_3(mve_vaddlv_s, TCG_CALL_NO_WG, i64, env, ptr, i64)
24
+DEF_HELPER_FLAGS_3(mve_vaddlv_u, TCG_CALL_NO_WG, i64, env, ptr, i64)
29
+
25
+
30
+ ##################################################################
26
DEF_HELPER_FLAGS_3(mve_vmovi, TCG_CALL_NO_WG, void, env, ptr, i64)
31
+ # 2-regs-plus-scalar grouping:
27
DEF_HELPER_FLAGS_3(mve_vandi, TCG_CALL_NO_WG, void, env, ptr, i64)
32
+ # 1111 001 Q 1 D sz!=11 Vn:4 Vd:4 opc:4 N 1 M 0 Vm:4
28
DEF_HELPER_FLAGS_3(mve_vorri, TCG_CALL_NO_WG, void, env, ptr, i64)
33
+ ##################################################################
29
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
34
+ &2scalar vm vn vd size q
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/mve.decode
32
+++ b/target/arm/mve.decode
33
@@ -XXX,XX +XXX,XX @@ VQDMULH_scalar 1110 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar
34
VQRDMULH_scalar 1111 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar
35
36
# Vector add across vector
37
-VADDV 111 u:1 1110 1111 size:2 01 ... 0 1111 0 0 a:1 0 qm:3 0 rda=%rdalo
38
+{
39
+ VADDV 111 u:1 1110 1111 size:2 01 ... 0 1111 0 0 a:1 0 qm:3 0 rda=%rdalo
40
+ VADDLV 111 u:1 1110 1 ... 1001 ... 0 1111 00 a:1 0 qm:3 0 \
41
+ rdahi=%rdahi rdalo=%rdalo
42
+}
43
44
# Predicate operations
45
%mask_22_13 22:1 13:3
46
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/mve_helper.c
49
+++ b/target/arm/mve_helper.c
50
@@ -XXX,XX +XXX,XX @@ DO_VADDV(vaddvub, 1, uint8_t)
51
DO_VADDV(vaddvuh, 2, uint16_t)
52
DO_VADDV(vaddvuw, 4, uint32_t)
53
54
+#define DO_VADDLV(OP, TYPE, LTYPE) \
55
+ uint64_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vm, \
56
+ uint64_t ra) \
57
+ { \
58
+ uint16_t mask = mve_element_mask(env); \
59
+ unsigned e; \
60
+ TYPE *m = vm; \
61
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) { \
62
+ if (mask & 1) { \
63
+ ra += (LTYPE)m[H4(e)]; \
64
+ } \
65
+ } \
66
+ mve_advance_vpt(env); \
67
+ return ra; \
68
+ } \
35
+
69
+
36
+ @2scalar .... ... q:1 . . size:2 .... .... .... . . . . .... \
70
+DO_VADDLV(vaddlv_s, int32_t, int64_t)
37
+ &2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp
71
+DO_VADDLV(vaddlv_u, uint32_t, uint64_t)
38
+
72
+
39
+ VMLA_2sc 1111 001 . 1 . .. .... .... 0000 . 1 . 0 .... @2scalar
73
/* Shifts by immediate */
40
+
74
#define DO_2SHIFT(OP, ESIZE, TYPE, FN) \
41
+ VMLS_2sc 1111 001 . 1 . .. .... .... 0100 . 1 . 0 .... @2scalar
75
void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
42
+
76
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
43
+ VMUL_2sc 1111 001 . 1 . .. .... .... 1000 . 1 . 0 .... @2scalar
44
]
45
}
46
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
47
index XXXXXXX..XXXXXXX 100644
77
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/translate-neon.inc.c
78
--- a/target/arm/translate-mve.c
49
+++ b/target/arm/translate-neon.inc.c
79
+++ b/target/arm/translate-mve.c
50
@@ -XXX,XX +XXX,XX @@ static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
80
@@ -XXX,XX +XXX,XX @@ static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
51
16, 16, 0, fn_gvec);
52
return true;
81
return true;
53
}
82
}
54
+
83
55
+static void gen_neon_dup_low16(TCGv_i32 var)
84
+static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a)
56
+{
57
+ TCGv_i32 tmp = tcg_temp_new_i32();
58
+ tcg_gen_ext16u_i32(var, var);
59
+ tcg_gen_shli_i32(tmp, var, 16);
60
+ tcg_gen_or_i32(var, var, tmp);
61
+ tcg_temp_free_i32(tmp);
62
+}
63
+
64
+static void gen_neon_dup_high16(TCGv_i32 var)
65
+{
66
+ TCGv_i32 tmp = tcg_temp_new_i32();
67
+ tcg_gen_andi_i32(var, var, 0xffff0000);
68
+ tcg_gen_shri_i32(tmp, var, 16);
69
+ tcg_gen_or_i32(var, var, tmp);
70
+ tcg_temp_free_i32(tmp);
71
+}
72
+
73
+static inline TCGv_i32 neon_get_scalar(int size, int reg)
74
+{
75
+ TCGv_i32 tmp;
76
+ if (size == 1) {
77
+ tmp = neon_load_reg(reg & 7, reg >> 4);
78
+ if (reg & 8) {
79
+ gen_neon_dup_high16(tmp);
80
+ } else {
81
+ gen_neon_dup_low16(tmp);
82
+ }
83
+ } else {
84
+ tmp = neon_load_reg(reg & 15, reg >> 4);
85
+ }
86
+ return tmp;
87
+}
88
+
89
+static bool do_2scalar(DisasContext *s, arg_2scalar *a,
90
+ NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
91
+{
85
+{
92
+ /*
86
+ /*
93
+ * Two registers and a scalar: perform an operation between
87
+ * Vector Add Long Across Vector: accumulate the 32-bit
94
+ * the input elements and the scalar, and then possibly
88
+ * elements of the vector into a 64-bit result stored in
95
+ * perform an accumulation operation of that result into the
89
+ * a pair of general-purpose registers.
96
+ * destination.
90
+ * No need to check Qm's bank: it is only 3 bits in decode.
97
+ */
91
+ */
98
+ TCGv_i32 scalar;
92
+ TCGv_ptr qm;
99
+ int pass;
93
+ TCGv_i64 rda;
94
+ TCGv_i32 rdalo, rdahi;
100
+
95
+
101
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
96
+ if (!dc_isar_feature(aa32_mve, s)) {
102
+ return false;
97
+ return false;
103
+ }
98
+ }
104
+
99
+ /*
105
+ /* UNDEF accesses to D16-D31 if they don't exist. */
100
+ * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
106
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
101
+ * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
107
+ ((a->vd | a->vn | a->vm) & 0x10)) {
102
+ */
103
+ if (a->rdahi == 13 || a->rdahi == 15) {
108
+ return false;
104
+ return false;
109
+ }
105
+ }
110
+
106
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
111
+ if (!opfn) {
112
+ /* Bad size (including size == 3, which is a different insn group) */
113
+ return false;
114
+ }
115
+
116
+ if (a->q && ((a->vd | a->vn) & 1)) {
117
+ return false;
118
+ }
119
+
120
+ if (!vfp_access_check(s)) {
121
+ return true;
107
+ return true;
122
+ }
108
+ }
123
+
109
+
124
+ scalar = neon_get_scalar(a->size, a->vm);
110
+ /*
111
+ * This insn is subject to beat-wise execution. Partial execution
112
+ * of an A=0 (no-accumulate) insn which does not execute the first
113
+ * beat must start with the current value of RdaHi:RdaLo, not zero.
114
+ */
115
+ if (a->a || mve_skip_first_beat(s)) {
116
+ /* Accumulate input from RdaHi:RdaLo */
117
+ rda = tcg_temp_new_i64();
118
+ rdalo = load_reg(s, a->rdalo);
119
+ rdahi = load_reg(s, a->rdahi);
120
+ tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
121
+ tcg_temp_free_i32(rdalo);
122
+ tcg_temp_free_i32(rdahi);
123
+ } else {
124
+ /* Accumulate starting at zero */
125
+ rda = tcg_const_i64(0);
126
+ }
125
+
127
+
126
+ for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
128
+ qm = mve_qreg_ptr(a->qm);
127
+ TCGv_i32 tmp = neon_load_reg(a->vn, pass);
129
+ if (a->u) {
128
+ opfn(tmp, tmp, scalar);
130
+ gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda);
129
+ if (accfn) {
131
+ } else {
130
+ TCGv_i32 rd = neon_load_reg(a->vd, pass);
132
+ gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda);
131
+ accfn(tmp, rd, tmp);
132
+ tcg_temp_free_i32(rd);
133
+ }
134
+ neon_store_reg(a->vd, pass, tmp);
135
+ }
133
+ }
136
+ tcg_temp_free_i32(scalar);
134
+ tcg_temp_free_ptr(qm);
135
+
136
+ rdalo = tcg_temp_new_i32();
137
+ rdahi = tcg_temp_new_i32();
138
+ tcg_gen_extrl_i64_i32(rdalo, rda);
139
+ tcg_gen_extrh_i64_i32(rdahi, rda);
140
+ store_reg(s, a->rdalo, rdalo);
141
+ store_reg(s, a->rdahi, rdahi);
142
+ tcg_temp_free_i64(rda);
143
+ mve_update_eci(s);
137
+ return true;
144
+ return true;
138
+}
145
+}
139
+
146
+
140
+static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
147
static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn)
141
+{
142
+ static NeonGenTwoOpFn * const opfn[] = {
143
+ NULL,
144
+ gen_helper_neon_mul_u16,
145
+ tcg_gen_mul_i32,
146
+ NULL,
147
+ };
148
+
149
+ return do_2scalar(s, a, opfn[a->size], NULL);
150
+}
151
+
152
+static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
153
+{
154
+ static NeonGenTwoOpFn * const opfn[] = {
155
+ NULL,
156
+ gen_helper_neon_mul_u16,
157
+ tcg_gen_mul_i32,
158
+ NULL,
159
+ };
160
+ static NeonGenTwoOpFn * const accfn[] = {
161
+ NULL,
162
+ gen_helper_neon_add_u16,
163
+ tcg_gen_add_i32,
164
+ NULL,
165
+ };
166
+
167
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
168
+}
169
+
170
+static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
171
+{
172
+ static NeonGenTwoOpFn * const opfn[] = {
173
+ NULL,
174
+ gen_helper_neon_mul_u16,
175
+ tcg_gen_mul_i32,
176
+ NULL,
177
+ };
178
+ static NeonGenTwoOpFn * const accfn[] = {
179
+ NULL,
180
+ gen_helper_neon_sub_u16,
181
+ tcg_gen_sub_i32,
182
+ NULL,
183
+ };
184
+
185
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
186
+}
187
diff --git a/target/arm/translate.c b/target/arm/translate.c
188
index XXXXXXX..XXXXXXX 100644
189
--- a/target/arm/translate.c
190
+++ b/target/arm/translate.c
191
@@ -XXX,XX +XXX,XX @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn)
192
#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
193
#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
194
195
-static void gen_neon_dup_low16(TCGv_i32 var)
196
-{
197
- TCGv_i32 tmp = tcg_temp_new_i32();
198
- tcg_gen_ext16u_i32(var, var);
199
- tcg_gen_shli_i32(tmp, var, 16);
200
- tcg_gen_or_i32(var, var, tmp);
201
- tcg_temp_free_i32(tmp);
202
-}
203
-
204
-static void gen_neon_dup_high16(TCGv_i32 var)
205
-{
206
- TCGv_i32 tmp = tcg_temp_new_i32();
207
- tcg_gen_andi_i32(var, var, 0xffff0000);
208
- tcg_gen_shri_i32(tmp, var, 16);
209
- tcg_gen_or_i32(var, var, tmp);
210
- tcg_temp_free_i32(tmp);
211
-}
212
-
213
static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
214
{
148
{
215
#ifndef CONFIG_USER_ONLY
149
TCGv_ptr qd;
216
@@ -XXX,XX +XXX,XX @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
217
218
#define CPU_V001 cpu_V0, cpu_V0, cpu_V1
219
220
-static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
221
-{
222
- switch (size) {
223
- case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
224
- case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
225
- case 2: tcg_gen_add_i32(t0, t0, t1); break;
226
- default: abort();
227
- }
228
-}
229
-
230
-static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
231
-{
232
- switch (size) {
233
- case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
234
- case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
235
- case 2: tcg_gen_sub_i32(t0, t1, t0); break;
236
- default: return;
237
- }
238
-}
239
-
240
static TCGv_i32 neon_load_scratch(int scratch)
241
{
242
TCGv_i32 tmp = tcg_temp_new_i32();
243
@@ -XXX,XX +XXX,XX @@ static void neon_store_scratch(int scratch, TCGv_i32 var)
244
tcg_temp_free_i32(var);
245
}
246
247
-static inline TCGv_i32 neon_get_scalar(int size, int reg)
248
-{
249
- TCGv_i32 tmp;
250
- if (size == 1) {
251
- tmp = neon_load_reg(reg & 7, reg >> 4);
252
- if (reg & 8) {
253
- gen_neon_dup_high16(tmp);
254
- } else {
255
- gen_neon_dup_low16(tmp);
256
- }
257
- } else {
258
- tmp = neon_load_reg(reg & 15, reg >> 4);
259
- }
260
- return tmp;
261
-}
262
-
263
static int gen_neon_unzip(int rd, int rm, int size, int q)
264
{
265
TCGv_ptr pd, pm;
266
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
267
return 1;
268
}
269
switch (op) {
270
+ case 0: /* Integer VMLA scalar */
271
+ case 4: /* Integer VMLS scalar */
272
+ case 8: /* Integer VMUL scalar */
273
+ return 1; /* handled by decodetree */
274
+
275
case 1: /* Float VMLA scalar */
276
case 5: /* Floating point VMLS scalar */
277
case 9: /* Floating point VMUL scalar */
278
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
279
return 1;
280
}
281
/* fall through */
282
- case 0: /* Integer VMLA scalar */
283
- case 4: /* Integer VMLS scalar */
284
- case 8: /* Integer VMUL scalar */
285
case 12: /* VQDMULH scalar */
286
case 13: /* VQRDMULH scalar */
287
if (u && ((rd | rn) & 1)) {
288
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
289
} else {
290
gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
291
}
292
- } else if (op & 1) {
293
+ } else {
294
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
295
gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
296
tcg_temp_free_ptr(fpstatus);
297
- } else {
298
- switch (size) {
299
- case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
300
- case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
301
- case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
302
- default: abort();
303
- }
304
}
305
tcg_temp_free_i32(tmp2);
306
if (op < 8) {
307
/* Accumulate. */
308
tmp2 = neon_load_reg(rd, pass);
309
switch (op) {
310
- case 0:
311
- gen_neon_add(size, tmp, tmp2);
312
- break;
313
case 1:
314
{
315
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
316
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
317
tcg_temp_free_ptr(fpstatus);
318
break;
319
}
320
- case 4:
321
- gen_neon_rsb(size, tmp, tmp2);
322
- break;
323
case 5:
324
{
325
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
326
--
150
--
327
2.20.1
151
2.20.1
328
152
329
153
diff view generated by jsdifflib
1
Convert the Neon 3-reg-diff insn polynomial VMULL. This is the last
1
The MVE extension to v8.1M includes some new shift instructions which
2
insn in this group to be converted.
2
sit entirely within the non-coprocessor part of the encoding space
3
and which operate only on general-purpose registers. They take up
4
the space which was previously UNPREDICTABLE MOVS and ORRS encodings
5
with Rm == 13 or 15.
6
7
Implement the long shifts by immediate, which perform shifts on a
8
pair of general-purpose registers treated as a 64-bit quantity, with
9
an immediate shift count between 1 and 32.
10
11
Awkwardly, because the MOVS and ORRS trans functions do not UNDEF for
12
the Rm==13,15 case, we need to explicitly emit code to UNDEF for the
13
cases where v8.1M now requires that. (Trying to change MOVS and ORRS
14
is too difficult, because the functions that generate the code are
15
shared between a dozen different kinds of arithmetic or logical
16
instruction for all A32, T16 and T32 encodings, and for some insns
17
and some encodings Rm==13,15 are valid.)
18
19
We make the helper functions we need for UQSHLL and SQSHLL take
20
a 32-bit value which the helper casts to int8_t because we'll need
21
these helpers also for the shift-by-register insns, where the shift
22
count might be < 0 or > 32.
3
23
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
24
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
25
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
26
Message-id: 20210628135835.6690-16-peter.maydell@linaro.org
6
---
27
---
7
target/arm/neon-dp.decode | 2 ++
28
target/arm/helper-mve.h | 3 ++
8
target/arm/translate-neon.inc.c | 43 +++++++++++++++++++++++
29
target/arm/translate.h | 1 +
9
target/arm/translate.c | 60 ++-------------------------------
30
target/arm/t32.decode | 28 +++++++++++++
10
3 files changed, 48 insertions(+), 57 deletions(-)
31
target/arm/mve_helper.c | 10 +++++
11
32
target/arm/translate.c | 90 +++++++++++++++++++++++++++++++++++++++++
12
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
33
5 files changed, 132 insertions(+)
13
index XXXXXXX..XXXXXXX 100644
34
14
--- a/target/arm/neon-dp.decode
35
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
15
+++ b/target/arm/neon-dp.decode
36
index XXXXXXX..XXXXXXX 100644
16
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
37
--- a/target/arm/helper-mve.h
17
VMULL_U_3d 1111 001 1 1 . .. .... .... 1100 . 0 . 0 .... @3diff
38
+++ b/target/arm/helper-mve.h
18
39
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vqrshruntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
19
VQDMULL_3d 1111 001 0 1 . .. .... .... 1101 . 0 . 0 .... @3diff
40
DEF_HELPER_FLAGS_4(mve_vqrshrunth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
20
+
41
21
+ VMULL_P_3d 1111 001 0 1 . .. .... .... 1110 . 0 . 0 .... @3diff
42
DEF_HELPER_FLAGS_4(mve_vshlc, TCG_CALL_NO_WG, i32, env, ptr, i32, i32)
22
]
43
+
23
}
44
+DEF_HELPER_FLAGS_3(mve_sqshll, TCG_CALL_NO_RWG, i64, env, i64, i32)
24
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
45
+DEF_HELPER_FLAGS_3(mve_uqshll, TCG_CALL_NO_RWG, i64, env, i64, i32)
25
index XXXXXXX..XXXXXXX 100644
46
diff --git a/target/arm/translate.h b/target/arm/translate.h
26
--- a/target/arm/translate-neon.inc.c
47
index XXXXXXX..XXXXXXX 100644
27
+++ b/target/arm/translate-neon.inc.c
48
--- a/target/arm/translate.h
28
@@ -XXX,XX +XXX,XX @@ static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
49
+++ b/target/arm/translate.h
29
50
@@ -XXX,XX +XXX,XX @@ typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
30
return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
51
typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
31
}
52
typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
32
+
53
typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
33
+static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
54
+typedef void WideShiftImmFn(TCGv_i64, TCGv_i64, int64_t shift);
34
+{
55
35
+ gen_helper_gvec_3 *fn_gvec;
56
/**
36
+
57
* arm_tbflags_from_tb:
37
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
58
diff --git a/target/arm/t32.decode b/target/arm/t32.decode
38
+ return false;
59
index XXXXXXX..XXXXXXX 100644
39
+ }
60
--- a/target/arm/t32.decode
40
+
61
+++ b/target/arm/t32.decode
41
+ /* UNDEF accesses to D16-D31 if they don't exist. */
62
@@ -XXX,XX +XXX,XX @@
42
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
63
&mcr !extern cp opc1 crn crm opc2 rt
43
+ ((a->vd | a->vn | a->vm) & 0x10)) {
64
&mcrr !extern cp opc1 crm rt rt2
44
+ return false;
65
45
+ }
66
+&mve_shl_ri rdalo rdahi shim
46
+
67
+
47
+ if (a->vd & 1) {
68
+# rdahi: bits [3:1] from insn, bit 0 is 1
48
+ return false;
69
+# rdalo: bits [3:1] from insn, bit 0 is 0
49
+ }
70
+%rdahi_9 9:3 !function=times_2_plus_1
50
+
71
+%rdalo_17 17:3 !function=times_2
51
+ switch (a->size) {
72
+
52
+ case 0:
73
# Data-processing (register)
53
+ fn_gvec = gen_helper_neon_pmull_h;
74
54
+ break;
75
%imm5_12_6 12:3 6:2
55
+ case 2:
76
@@ -XXX,XX +XXX,XX @@
56
+ if (!dc_isar_feature(aa32_pmull, s)) {
77
@S_xrr_shi ....... .... . rn:4 .... .... .. shty:2 rm:4 \
57
+ return false;
78
&s_rrr_shi shim=%imm5_12_6 s=1 rd=0
58
+ }
79
59
+ fn_gvec = gen_helper_gvec_pmull_q;
80
+@mve_shl_ri ....... .... . ... . . ... ... . .. .. .... \
60
+ break;
81
+ &mve_shl_ri shim=%imm5_12_6 rdalo=%rdalo_17 rdahi=%rdahi_9
61
+ default:
82
+
62
+ return false;
83
{
63
+ }
84
TST_xrri 1110101 0000 1 .... 0 ... 1111 .... .... @S_xrr_shi
64
+
85
AND_rrri 1110101 0000 . .... 0 ... .... .... .... @s_rrr_shi
65
+ if (!vfp_access_check(s)) {
86
}
66
+ return true;
87
BIC_rrri 1110101 0001 . .... 0 ... .... .... .... @s_rrr_shi
67
+ }
88
{
68
+
89
+ # The v8.1M MVE shift insns overlap in encoding with MOVS/ORRS
69
+ tcg_gen_gvec_3_ool(neon_reg_offset(a->vd, 0),
90
+ # and are distinguished by having Rm==13 or 15. Those are UNPREDICTABLE
70
+ neon_reg_offset(a->vn, 0),
91
+ # cases for MOVS/ORRS. We decode the MVE cases first, ensuring that
71
+ neon_reg_offset(a->vm, 0),
92
+ # they explicitly call unallocated_encoding() for cases that must UNDEF
72
+ 16, 16, 0, fn_gvec);
93
+ # (eg "using a new shift insn on a v8.1M CPU without MVE"), and letting
73
+ return true;
94
+ # the rest fall through (where ORR_rrri and MOV_rxri will end up
95
+ # handling them as r13 and r15 accesses with the same semantics as A32).
96
+ [
97
+ LSLL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 00 1111 @mve_shl_ri
98
+ LSRL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 01 1111 @mve_shl_ri
99
+ ASRL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 10 1111 @mve_shl_ri
100
+
101
+ UQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 00 1111 @mve_shl_ri
102
+ URSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 01 1111 @mve_shl_ri
103
+ SRSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 10 1111 @mve_shl_ri
104
+ SQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 11 1111 @mve_shl_ri
105
+ ]
106
+
107
MOV_rxri 1110101 0010 . 1111 0 ... .... .... .... @s_rxr_shi
108
ORR_rrri 1110101 0010 . .... 0 ... .... .... .... @s_rrr_shi
109
}
110
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
111
index XXXXXXX..XXXXXXX 100644
112
--- a/target/arm/mve_helper.c
113
+++ b/target/arm/mve_helper.c
114
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(mve_vshlc)(CPUARMState *env, void *vd, uint32_t rdm,
115
mve_advance_vpt(env);
116
return rdm;
117
}
118
+
119
+uint64_t HELPER(mve_sqshll)(CPUARMState *env, uint64_t n, uint32_t shift)
120
+{
121
+ return do_sqrshl_d(n, (int8_t)shift, false, &env->QF);
122
+}
123
+
124
+uint64_t HELPER(mve_uqshll)(CPUARMState *env, uint64_t n, uint32_t shift)
125
+{
126
+ return do_uqrshl_d(n, (int8_t)shift, false, &env->QF);
74
+}
127
+}
75
diff --git a/target/arm/translate.c b/target/arm/translate.c
128
diff --git a/target/arm/translate.c b/target/arm/translate.c
76
index XXXXXXX..XXXXXXX 100644
129
index XXXXXXX..XXXXXXX 100644
77
--- a/target/arm/translate.c
130
--- a/target/arm/translate.c
78
+++ b/target/arm/translate.c
131
+++ b/target/arm/translate.c
79
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
132
@@ -XXX,XX +XXX,XX @@ static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
80
{
133
return true;
81
int op;
134
}
82
int q;
135
83
- int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
136
+/*
84
+ int rd, rn, rm, rd_ofs, rm_ofs;
137
+ * v8.1M MVE wide-shifts
85
int size;
138
+ */
86
int pass;
139
+static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
87
int u;
140
+ WideShiftImmFn *fn)
88
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
141
+{
89
size = (insn >> 20) & 3;
142
+ TCGv_i64 rda;
90
vec_size = q ? 16 : 8;
143
+ TCGv_i32 rdalo, rdahi;
91
rd_ofs = neon_reg_offset(rd, 0);
144
+
92
- rn_ofs = neon_reg_offset(rn, 0);
145
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
93
rm_ofs = neon_reg_offset(rm, 0);
146
+ /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
94
147
+ return false;
95
if ((insn & (1 << 23)) == 0) {
148
+ }
96
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
149
+ if (a->rdahi == 15) {
97
if (size != 3) {
150
+ /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
98
op = (insn >> 8) & 0xf;
151
+ return false;
99
if ((insn & (1 << 6)) == 0) {
152
+ }
100
- /* Three registers of different lengths. */
153
+ if (!dc_isar_feature(aa32_mve, s) ||
101
- /* undefreq: bit 0 : UNDEF if size == 0
154
+ !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
102
- * bit 1 : UNDEF if size == 1
155
+ a->rdahi == 13) {
103
- * bit 2 : UNDEF if size == 2
156
+ /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
104
- * bit 3 : UNDEF if U == 1
157
+ unallocated_encoding(s);
105
- * Note that [2:0] set implies 'always UNDEF'
158
+ return true;
106
- */
159
+ }
107
- int undefreq;
160
+
108
- /* prewiden, src1_wide, src2_wide, undefreq */
161
+ if (a->shim == 0) {
109
- static const int neon_3reg_wide[16][4] = {
162
+ a->shim = 32;
110
- {0, 0, 0, 7}, /* VADDL: handled by decodetree */
163
+ }
111
- {0, 0, 0, 7}, /* VADDW: handled by decodetree */
164
+
112
- {0, 0, 0, 7}, /* VSUBL: handled by decodetree */
165
+ rda = tcg_temp_new_i64();
113
- {0, 0, 0, 7}, /* VSUBW: handled by decodetree */
166
+ rdalo = load_reg(s, a->rdalo);
114
- {0, 0, 0, 7}, /* VADDHN: handled by decodetree */
167
+ rdahi = load_reg(s, a->rdahi);
115
- {0, 0, 0, 7}, /* VABAL */
168
+ tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
116
- {0, 0, 0, 7}, /* VSUBHN: handled by decodetree */
169
+
117
- {0, 0, 0, 7}, /* VABDL */
170
+ fn(rda, rda, a->shim);
118
- {0, 0, 0, 7}, /* VMLAL */
171
+
119
- {0, 0, 0, 7}, /* VQDMLAL */
172
+ tcg_gen_extrl_i64_i32(rdalo, rda);
120
- {0, 0, 0, 7}, /* VMLSL */
173
+ tcg_gen_extrh_i64_i32(rdahi, rda);
121
- {0, 0, 0, 7}, /* VQDMLSL */
174
+ store_reg(s, a->rdalo, rdalo);
122
- {0, 0, 0, 7}, /* Integer VMULL */
175
+ store_reg(s, a->rdahi, rdahi);
123
- {0, 0, 0, 7}, /* VQDMULL */
176
+ tcg_temp_free_i64(rda);
124
- {0, 0, 0, 0xa}, /* Polynomial VMULL */
177
+
125
- {0, 0, 0, 7}, /* Reserved: always UNDEF */
178
+ return true;
126
- };
179
+}
127
-
180
+
128
- undefreq = neon_3reg_wide[op][3];
181
+static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
129
-
182
+{
130
- if ((undefreq & (1 << size)) ||
183
+ return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
131
- ((undefreq & 8) && u)) {
184
+}
132
- return 1;
185
+
133
- }
186
+static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
134
- if (rd & 1) {
187
+{
135
- return 1;
188
+ return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
136
- }
189
+}
137
-
190
+
138
- /* Handle polynomial VMULL in a single pass. */
191
+static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
139
- if (op == 14) {
192
+{
140
- if (size == 0) {
193
+ return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
141
- /* VMULL.P8 */
194
+}
142
- tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
195
+
143
- 0, gen_helper_neon_pmull_h);
196
+static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
144
- } else {
197
+{
145
- /* VMULL.P64 */
198
+ gen_helper_mve_sqshll(r, cpu_env, n, tcg_constant_i32(shift));
146
- if (!dc_isar_feature(aa32_pmull, s)) {
199
+}
147
- return 1;
200
+
148
- }
201
+static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
149
- tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
202
+{
150
- 0, gen_helper_gvec_pmull_q);
203
+ return do_mve_shl_ri(s, a, gen_mve_sqshll);
151
- }
204
+}
152
- return 0;
205
+
153
- }
206
+static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
154
- abort(); /* all others handled by decodetree */
207
+{
155
+ /* Three registers of different lengths: handled by decodetree */
208
+ gen_helper_mve_uqshll(r, cpu_env, n, tcg_constant_i32(shift));
156
+ return 1;
209
+}
157
} else {
210
+
158
/* Two registers and a scalar. NB that for ops of this form
211
+static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
159
* the ARM ARM labels bit 24 as Q, but it is in our variable
212
+{
213
+ return do_mve_shl_ri(s, a, gen_mve_uqshll);
214
+}
215
+
216
+static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
217
+{
218
+ return do_mve_shl_ri(s, a, gen_srshr64_i64);
219
+}
220
+
221
+static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
222
+{
223
+ return do_mve_shl_ri(s, a, gen_urshr64_i64);
224
+}
225
+
226
/*
227
* Multiply and multiply accumulate
228
*/
160
--
229
--
161
2.20.1
230
2.20.1
162
231
163
232
diff view generated by jsdifflib
1
Convert the Neon VTBL, VTBX instructions to decodetree. The actual
1
Implement the MVE long shifts by register, which perform shifts on a
2
implementation of the insn is copied across to the new trans function
2
pair of general-purpose registers treated as a 64-bit quantity, with
3
unchanged except for renaming 'tmp5' to 'tmp4'.
3
the shift count in another general-purpose register, which might be
4
either positive or negative.
5
6
Like the long-shifts-by-immediate, these encodings sit in the space
7
that was previously the UNPREDICTABLE MOVS/ORRS with Rm==13,15.
8
Because LSLL_rr and ASRL_rr overlap with both MOV_rxri/ORR_rrri and
9
also with CSEL (as one of the previously-UNPREDICTABLE Rm==13 cases),
10
we have to move the CSEL pattern into the same decodetree group.
4
11
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
Message-id: 20210628135835.6690-17-peter.maydell@linaro.org
7
---
15
---
8
target/arm/neon-dp.decode | 3 ++
16
target/arm/helper-mve.h | 6 +++
9
target/arm/translate-neon.inc.c | 56 +++++++++++++++++++++++++++++++++
17
target/arm/translate.h | 1 +
10
target/arm/translate.c | 41 +++---------------------
18
target/arm/t32.decode | 16 +++++--
11
3 files changed, 63 insertions(+), 37 deletions(-)
19
target/arm/mve_helper.c | 93 +++++++++++++++++++++++++++++++++++++++++
12
20
target/arm/translate.c | 69 ++++++++++++++++++++++++++++++
13
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
21
5 files changed, 182 insertions(+), 3 deletions(-)
14
index XXXXXXX..XXXXXXX 100644
22
15
--- a/target/arm/neon-dp.decode
23
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
16
+++ b/target/arm/neon-dp.decode
24
index XXXXXXX..XXXXXXX 100644
17
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
25
--- a/target/arm/helper-mve.h
18
##################################################################
26
+++ b/target/arm/helper-mve.h
19
VEXT 1111 001 0 1 . 11 .... .... imm:4 . q:1 . 0 .... \
27
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(mve_vqrshrunth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
20
vm=%vm_dp vn=%vn_dp vd=%vd_dp
28
21
+
29
DEF_HELPER_FLAGS_4(mve_vshlc, TCG_CALL_NO_WG, i32, env, ptr, i32, i32)
22
+ VTBL 1111 001 1 1 . 11 .... .... 10 len:2 . op:1 . 0 .... \
30
23
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
31
+DEF_HELPER_FLAGS_3(mve_sshrl, TCG_CALL_NO_RWG, i64, env, i64, i32)
32
+DEF_HELPER_FLAGS_3(mve_ushll, TCG_CALL_NO_RWG, i64, env, i64, i32)
33
DEF_HELPER_FLAGS_3(mve_sqshll, TCG_CALL_NO_RWG, i64, env, i64, i32)
34
DEF_HELPER_FLAGS_3(mve_uqshll, TCG_CALL_NO_RWG, i64, env, i64, i32)
35
+DEF_HELPER_FLAGS_3(mve_sqrshrl, TCG_CALL_NO_RWG, i64, env, i64, i32)
36
+DEF_HELPER_FLAGS_3(mve_uqrshll, TCG_CALL_NO_RWG, i64, env, i64, i32)
37
+DEF_HELPER_FLAGS_3(mve_sqrshrl48, TCG_CALL_NO_RWG, i64, env, i64, i32)
38
+DEF_HELPER_FLAGS_3(mve_uqrshll48, TCG_CALL_NO_RWG, i64, env, i64, i32)
39
diff --git a/target/arm/translate.h b/target/arm/translate.h
40
index XXXXXXX..XXXXXXX 100644
41
--- a/target/arm/translate.h
42
+++ b/target/arm/translate.h
43
@@ -XXX,XX +XXX,XX @@ typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
44
typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
45
typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
46
typedef void WideShiftImmFn(TCGv_i64, TCGv_i64, int64_t shift);
47
+typedef void WideShiftFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i32);
48
49
/**
50
* arm_tbflags_from_tb:
51
diff --git a/target/arm/t32.decode b/target/arm/t32.decode
52
index XXXXXXX..XXXXXXX 100644
53
--- a/target/arm/t32.decode
54
+++ b/target/arm/t32.decode
55
@@ -XXX,XX +XXX,XX @@
56
&mcrr !extern cp opc1 crm rt rt2
57
58
&mve_shl_ri rdalo rdahi shim
59
+&mve_shl_rr rdalo rdahi rm
60
61
# rdahi: bits [3:1] from insn, bit 0 is 1
62
# rdalo: bits [3:1] from insn, bit 0 is 0
63
@@ -XXX,XX +XXX,XX @@
64
65
@mve_shl_ri ....... .... . ... . . ... ... . .. .. .... \
66
&mve_shl_ri shim=%imm5_12_6 rdalo=%rdalo_17 rdahi=%rdahi_9
67
+@mve_shl_rr ....... .... . ... . rm:4 ... . .. .. .... \
68
+ &mve_shl_rr rdalo=%rdalo_17 rdahi=%rdahi_9
69
70
{
71
TST_xrri 1110101 0000 1 .... 0 ... 1111 .... .... @S_xrr_shi
72
@@ -XXX,XX +XXX,XX @@ BIC_rrri 1110101 0001 . .... 0 ... .... .... .... @s_rrr_shi
73
URSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 01 1111 @mve_shl_ri
74
SRSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 10 1111 @mve_shl_ri
75
SQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 11 1111 @mve_shl_ri
76
+
77
+ LSLL_rr 1110101 0010 1 ... 0 .... ... 1 0000 1101 @mve_shl_rr
78
+ ASRL_rr 1110101 0010 1 ... 0 .... ... 1 0010 1101 @mve_shl_rr
79
+ UQRSHLL64_rr 1110101 0010 1 ... 1 .... ... 1 0000 1101 @mve_shl_rr
80
+ SQRSHRL64_rr 1110101 0010 1 ... 1 .... ... 1 0010 1101 @mve_shl_rr
81
+ UQRSHLL48_rr 1110101 0010 1 ... 1 .... ... 1 1000 1101 @mve_shl_rr
82
+ SQRSHRL48_rr 1110101 0010 1 ... 1 .... ... 1 1010 1101 @mve_shl_rr
24
]
83
]
25
84
26
# Subgroup for size != 0b11
85
MOV_rxri 1110101 0010 . 1111 0 ... .... .... .... @s_rxr_shi
27
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
86
ORR_rrri 1110101 0010 . .... 0 ... .... .... .... @s_rrr_shi
28
index XXXXXXX..XXXXXXX 100644
87
+
29
--- a/target/arm/translate-neon.inc.c
88
+ # v8.1M CSEL and friends
30
+++ b/target/arm/translate-neon.inc.c
89
+ CSEL 1110101 0010 1 rn:4 10 op:2 rd:4 fcond:4 rm:4
31
@@ -XXX,XX +XXX,XX @@ static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
90
}
32
}
91
{
33
return true;
92
MVN_rxri 1110101 0011 . 1111 0 ... .... .... .... @s_rxr_shi
34
}
93
@@ -XXX,XX +XXX,XX @@ SBC_rrri 1110101 1011 . .... 0 ... .... .... .... @s_rrr_shi
35
+
94
}
36
+static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
95
RSB_rrri 1110101 1110 . .... 0 ... .... .... .... @s_rrr_shi
37
+{
96
38
+ int n;
97
-# v8.1M CSEL and friends
39
+ TCGv_i32 tmp, tmp2, tmp3, tmp4;
98
-CSEL 1110101 0010 1 rn:4 10 op:2 rd:4 fcond:4 rm:4
40
+ TCGv_ptr ptr1;
99
-
41
+
100
# Data-processing (register-shifted register)
42
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
101
43
+ return false;
102
MOV_rxrr 1111 1010 0 shty:2 s:1 rm:4 1111 rd:4 0000 rs:4 \
44
+ }
103
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
45
+
104
index XXXXXXX..XXXXXXX 100644
46
+ /* UNDEF accesses to D16-D31 if they don't exist. */
105
--- a/target/arm/mve_helper.c
47
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
106
+++ b/target/arm/mve_helper.c
48
+ ((a->vd | a->vn | a->vm) & 0x10)) {
107
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(mve_vshlc)(CPUARMState *env, void *vd, uint32_t rdm,
49
+ return false;
108
return rdm;
50
+ }
109
}
51
+
110
52
+ if (!vfp_access_check(s)) {
111
+uint64_t HELPER(mve_sshrl)(CPUARMState *env, uint64_t n, uint32_t shift)
53
+ return true;
112
+{
54
+ }
113
+ return do_sqrshl_d(n, -(int8_t)shift, false, NULL);
55
+
114
+}
56
+ n = a->len + 1;
115
+
57
+ if ((a->vn + n) > 32) {
116
+uint64_t HELPER(mve_ushll)(CPUARMState *env, uint64_t n, uint32_t shift)
58
+ /*
117
+{
59
+ * This is UNPREDICTABLE; we choose to UNDEF to avoid the
118
+ return do_uqrshl_d(n, (int8_t)shift, false, NULL);
60
+ * helper function running off the end of the register file.
119
+}
61
+ */
120
+
62
+ return false;
121
uint64_t HELPER(mve_sqshll)(CPUARMState *env, uint64_t n, uint32_t shift)
63
+ }
122
{
64
+ n <<= 3;
123
return do_sqrshl_d(n, (int8_t)shift, false, &env->QF);
65
+ if (a->op) {
124
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(mve_uqshll)(CPUARMState *env, uint64_t n, uint32_t shift)
66
+ tmp = neon_load_reg(a->vd, 0);
125
{
67
+ } else {
126
return do_uqrshl_d(n, (int8_t)shift, false, &env->QF);
68
+ tmp = tcg_temp_new_i32();
127
}
69
+ tcg_gen_movi_i32(tmp, 0);
128
+
70
+ }
129
+uint64_t HELPER(mve_sqrshrl)(CPUARMState *env, uint64_t n, uint32_t shift)
71
+ tmp2 = neon_load_reg(a->vm, 0);
130
+{
72
+ ptr1 = vfp_reg_ptr(true, a->vn);
131
+ return do_sqrshl_d(n, -(int8_t)shift, true, &env->QF);
73
+ tmp4 = tcg_const_i32(n);
132
+}
74
+ gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp4);
133
+
75
+ tcg_temp_free_i32(tmp);
134
+uint64_t HELPER(mve_uqrshll)(CPUARMState *env, uint64_t n, uint32_t shift)
76
+ if (a->op) {
135
+{
77
+ tmp = neon_load_reg(a->vd, 1);
136
+ return do_uqrshl_d(n, (int8_t)shift, true, &env->QF);
78
+ } else {
137
+}
79
+ tmp = tcg_temp_new_i32();
138
+
80
+ tcg_gen_movi_i32(tmp, 0);
139
+/* Operate on 64-bit values, but saturate at 48 bits */
81
+ }
140
+static inline int64_t do_sqrshl48_d(int64_t src, int64_t shift,
82
+ tmp3 = neon_load_reg(a->vm, 1);
141
+ bool round, uint32_t *sat)
83
+ gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp4);
142
+{
84
+ tcg_temp_free_i32(tmp4);
143
+ if (shift <= -48) {
85
+ tcg_temp_free_ptr(ptr1);
144
+ /* Rounding the sign bit always produces 0. */
86
+ neon_store_reg(a->vd, 0, tmp2);
145
+ if (round) {
87
+ neon_store_reg(a->vd, 1, tmp3);
146
+ return 0;
88
+ tcg_temp_free_i32(tmp);
147
+ }
89
+ return true;
148
+ return src >> 63;
149
+ } else if (shift < 0) {
150
+ if (round) {
151
+ src >>= -shift - 1;
152
+ return (src >> 1) + (src & 1);
153
+ }
154
+ return src >> -shift;
155
+ } else if (shift < 48) {
156
+ int64_t val = src << shift;
157
+ int64_t extval = sextract64(val, 0, 48);
158
+ if (!sat || val == extval) {
159
+ return extval;
160
+ }
161
+ } else if (!sat || src == 0) {
162
+ return 0;
163
+ }
164
+
165
+ *sat = 1;
166
+ return (1ULL << 47) - (src >= 0);
167
+}
168
+
169
+/* Operate on 64-bit values, but saturate at 48 bits */
170
+static inline uint64_t do_uqrshl48_d(uint64_t src, int64_t shift,
171
+ bool round, uint32_t *sat)
172
+{
173
+ uint64_t val, extval;
174
+
175
+ if (shift <= -(48 + round)) {
176
+ return 0;
177
+ } else if (shift < 0) {
178
+ if (round) {
179
+ val = src >> (-shift - 1);
180
+ val = (val >> 1) + (val & 1);
181
+ } else {
182
+ val = src >> -shift;
183
+ }
184
+ extval = extract64(val, 0, 48);
185
+ if (!sat || val == extval) {
186
+ return extval;
187
+ }
188
+ } else if (shift < 48) {
189
+ uint64_t val = src << shift;
190
+ uint64_t extval = extract64(val, 0, 48);
191
+ if (!sat || val == extval) {
192
+ return extval;
193
+ }
194
+ } else if (!sat || src == 0) {
195
+ return 0;
196
+ }
197
+
198
+ *sat = 1;
199
+ return MAKE_64BIT_MASK(0, 48);
200
+}
201
+
202
+uint64_t HELPER(mve_sqrshrl48)(CPUARMState *env, uint64_t n, uint32_t shift)
203
+{
204
+ return do_sqrshl48_d(n, -(int8_t)shift, true, &env->QF);
205
+}
206
+
207
+uint64_t HELPER(mve_uqrshll48)(CPUARMState *env, uint64_t n, uint32_t shift)
208
+{
209
+ return do_uqrshl48_d(n, (int8_t)shift, true, &env->QF);
90
+}
210
+}
91
diff --git a/target/arm/translate.c b/target/arm/translate.c
211
diff --git a/target/arm/translate.c b/target/arm/translate.c
92
index XXXXXXX..XXXXXXX 100644
212
index XXXXXXX..XXXXXXX 100644
93
--- a/target/arm/translate.c
213
--- a/target/arm/translate.c
94
+++ b/target/arm/translate.c
214
+++ b/target/arm/translate.c
95
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
215
@@ -XXX,XX +XXX,XX @@ static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
96
{
216
return do_mve_shl_ri(s, a, gen_urshr64_i64);
97
int op;
217
}
98
int q;
218
99
- int rd, rn, rm, rd_ofs, rm_ofs;
219
+static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
100
+ int rd, rm, rd_ofs, rm_ofs;
220
+{
101
int size;
221
+ TCGv_i64 rda;
102
int pass;
222
+ TCGv_i32 rdalo, rdahi;
103
int u;
223
+
104
int vec_size;
224
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
105
- TCGv_i32 tmp, tmp2, tmp3, tmp5;
225
+ /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
106
- TCGv_ptr ptr1;
226
+ return false;
107
+ TCGv_i32 tmp, tmp2, tmp3;
227
+ }
108
228
+ if (a->rdahi == 15) {
109
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
229
+ /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
110
return 1;
230
+ return false;
111
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
231
+ }
112
q = (insn & (1 << 6)) != 0;
232
+ if (!dc_isar_feature(aa32_mve, s) ||
113
u = (insn >> 24) & 1;
233
+ !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
114
VFP_DREG_D(rd, insn);
234
+ a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
115
- VFP_DREG_N(rn, insn);
235
+ a->rm == a->rdahi || a->rm == a->rdalo) {
116
VFP_DREG_M(rm, insn);
236
+ /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
117
size = (insn >> 20) & 3;
237
+ unallocated_encoding(s);
118
vec_size = q ? 16 : 8;
238
+ return true;
119
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
239
+ }
120
break;
240
+
121
}
241
+ rda = tcg_temp_new_i64();
122
} else if ((insn & (1 << 10)) == 0) {
242
+ rdalo = load_reg(s, a->rdalo);
123
- /* VTBL, VTBX. */
243
+ rdahi = load_reg(s, a->rdahi);
124
- int n = ((insn >> 8) & 3) + 1;
244
+ tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
125
- if ((rn + n) > 32) {
245
+
126
- /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
246
+ /* The helper takes care of the sign-extension of the low 8 bits of Rm */
127
- * helper function running off the end of the register file.
247
+ fn(rda, cpu_env, rda, cpu_R[a->rm]);
128
- */
248
+
129
- return 1;
249
+ tcg_gen_extrl_i64_i32(rdalo, rda);
130
- }
250
+ tcg_gen_extrh_i64_i32(rdahi, rda);
131
- n <<= 3;
251
+ store_reg(s, a->rdalo, rdalo);
132
- if (insn & (1 << 6)) {
252
+ store_reg(s, a->rdahi, rdahi);
133
- tmp = neon_load_reg(rd, 0);
253
+ tcg_temp_free_i64(rda);
134
- } else {
254
+
135
- tmp = tcg_temp_new_i32();
255
+ return true;
136
- tcg_gen_movi_i32(tmp, 0);
256
+}
137
- }
257
+
138
- tmp2 = neon_load_reg(rm, 0);
258
+static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
139
- ptr1 = vfp_reg_ptr(true, rn);
259
+{
140
- tmp5 = tcg_const_i32(n);
260
+ return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
141
- gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
261
+}
142
- tcg_temp_free_i32(tmp);
262
+
143
- if (insn & (1 << 6)) {
263
+static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
144
- tmp = neon_load_reg(rd, 1);
264
+{
145
- } else {
265
+ return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
146
- tmp = tcg_temp_new_i32();
266
+}
147
- tcg_gen_movi_i32(tmp, 0);
267
+
148
- }
268
+static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
149
- tmp3 = neon_load_reg(rm, 1);
269
+{
150
- gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
270
+ return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
151
- tcg_temp_free_i32(tmp5);
271
+}
152
- tcg_temp_free_ptr(ptr1);
272
+
153
- neon_store_reg(rd, 0, tmp2);
273
+static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
154
- neon_store_reg(rd, 1, tmp3);
274
+{
155
- tcg_temp_free_i32(tmp);
275
+ return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
156
+ /* VTBL, VTBX: handled by decodetree */
276
+}
157
+ return 1;
277
+
158
} else if ((insn & 0x380) == 0) {
278
+static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
159
/* VDUP */
279
+{
160
int element;
280
+ return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
281
+}
282
+
283
+static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
284
+{
285
+ return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
286
+}
287
+
288
/*
289
* Multiply and multiply accumulate
290
*/
161
--
291
--
162
2.20.1
292
2.20.1
163
293
164
294
diff view generated by jsdifflib
1
Convert the Neon VEXT insn to decodetree. Rather than keeping the
1
Implement the MVE shifts by immediate, which perform shifts
2
old implementation which used fixed temporaries cpu_V0 and cpu_V1
2
on a single general-purpose register.
3
and did the extraction with by-hand shift and logic ops, we use
3
4
the TCG extract2 insn.
4
These patterns overlap with the long-shift-by-immediates,
5
5
so we have to rearrange the grouping a little here.
6
We don't need to special case 0 or 8 immediates any more as the
7
optimizer is smart enough to throw away the dead code.
8
6
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20210628135835.6690-18-peter.maydell@linaro.org
11
---
10
---
12
target/arm/neon-dp.decode | 8 +++-
11
target/arm/helper-mve.h | 3 ++
13
target/arm/translate-neon.inc.c | 76 +++++++++++++++++++++++++++++++++
12
target/arm/translate.h | 1 +
14
target/arm/translate.c | 58 +------------------------
13
target/arm/t32.decode | 31 ++++++++++++++-----
15
3 files changed, 85 insertions(+), 57 deletions(-)
14
target/arm/mve_helper.c | 10 ++++++
16
15
target/arm/translate.c | 68 +++++++++++++++++++++++++++++++++++++++--
17
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
16
5 files changed, 104 insertions(+), 9 deletions(-)
18
index XXXXXXX..XXXXXXX 100644
17
19
--- a/target/arm/neon-dp.decode
18
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
20
+++ b/target/arm/neon-dp.decode
19
index XXXXXXX..XXXXXXX 100644
21
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
20
--- a/target/arm/helper-mve.h
22
# return false for size==3.
21
+++ b/target/arm/helper-mve.h
23
######################################################################
22
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(mve_sqrshrl, TCG_CALL_NO_RWG, i64, env, i64, i32)
24
{
23
DEF_HELPER_FLAGS_3(mve_uqrshll, TCG_CALL_NO_RWG, i64, env, i64, i32)
25
- # 0b11 subgroup will go here
24
DEF_HELPER_FLAGS_3(mve_sqrshrl48, TCG_CALL_NO_RWG, i64, env, i64, i32)
26
+ [
25
DEF_HELPER_FLAGS_3(mve_uqrshll48, TCG_CALL_NO_RWG, i64, env, i64, i32)
27
+ ##################################################################
26
+
28
+ # Miscellaneous size=0b11 insns
27
+DEF_HELPER_FLAGS_3(mve_uqshl, TCG_CALL_NO_RWG, i32, env, i32, i32)
29
+ ##################################################################
28
+DEF_HELPER_FLAGS_3(mve_sqshl, TCG_CALL_NO_RWG, i32, env, i32, i32)
30
+ VEXT 1111 001 0 1 . 11 .... .... imm:4 . q:1 . 0 .... \
29
diff --git a/target/arm/translate.h b/target/arm/translate.h
31
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
30
index XXXXXXX..XXXXXXX 100644
32
+ ]
31
--- a/target/arm/translate.h
33
32
+++ b/target/arm/translate.h
34
# Subgroup for size != 0b11
33
@@ -XXX,XX +XXX,XX @@ typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
34
typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
35
typedef void WideShiftImmFn(TCGv_i64, TCGv_i64, int64_t shift);
36
typedef void WideShiftFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i32);
37
+typedef void ShiftImmFn(TCGv_i32, TCGv_i32, int32_t shift);
38
39
/**
40
* arm_tbflags_from_tb:
41
diff --git a/target/arm/t32.decode b/target/arm/t32.decode
42
index XXXXXXX..XXXXXXX 100644
43
--- a/target/arm/t32.decode
44
+++ b/target/arm/t32.decode
45
@@ -XXX,XX +XXX,XX @@
46
47
&mve_shl_ri rdalo rdahi shim
48
&mve_shl_rr rdalo rdahi rm
49
+&mve_sh_ri rda shim
50
51
# rdahi: bits [3:1] from insn, bit 0 is 1
52
# rdalo: bits [3:1] from insn, bit 0 is 0
53
@@ -XXX,XX +XXX,XX @@
54
&mve_shl_ri shim=%imm5_12_6 rdalo=%rdalo_17 rdahi=%rdahi_9
55
@mve_shl_rr ....... .... . ... . rm:4 ... . .. .. .... \
56
&mve_shl_rr rdalo=%rdalo_17 rdahi=%rdahi_9
57
+@mve_sh_ri ....... .... . rda:4 . ... ... . .. .. .... \
58
+ &mve_sh_ri shim=%imm5_12_6
59
60
{
61
TST_xrri 1110101 0000 1 .... 0 ... 1111 .... .... @S_xrr_shi
62
@@ -XXX,XX +XXX,XX @@ BIC_rrri 1110101 0001 . .... 0 ... .... .... .... @s_rrr_shi
63
# the rest fall through (where ORR_rrri and MOV_rxri will end up
64
# handling them as r13 and r15 accesses with the same semantics as A32).
35
[
65
[
36
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
66
- LSLL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 00 1111 @mve_shl_ri
37
index XXXXXXX..XXXXXXX 100644
67
- LSRL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 01 1111 @mve_shl_ri
38
--- a/target/arm/translate-neon.inc.c
68
- ASRL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 10 1111 @mve_shl_ri
39
+++ b/target/arm/translate-neon.inc.c
69
+ {
40
@@ -XXX,XX +XXX,XX @@ static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
70
+ UQSHL_ri 1110101 0010 1 .... 0 ... 1111 .. 00 1111 @mve_sh_ri
41
71
+ LSLL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 00 1111 @mve_shl_ri
42
return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
72
+ UQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 00 1111 @mve_shl_ri
73
+ }
74
75
- UQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 00 1111 @mve_shl_ri
76
- URSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 01 1111 @mve_shl_ri
77
- SRSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 10 1111 @mve_shl_ri
78
- SQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 11 1111 @mve_shl_ri
79
+ {
80
+ URSHR_ri 1110101 0010 1 .... 0 ... 1111 .. 01 1111 @mve_sh_ri
81
+ LSRL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 01 1111 @mve_shl_ri
82
+ URSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 01 1111 @mve_shl_ri
83
+ }
84
+
85
+ {
86
+ SRSHR_ri 1110101 0010 1 .... 0 ... 1111 .. 10 1111 @mve_sh_ri
87
+ ASRL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 10 1111 @mve_shl_ri
88
+ SRSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 10 1111 @mve_shl_ri
89
+ }
90
+
91
+ {
92
+ SQSHL_ri 1110101 0010 1 .... 0 ... 1111 .. 11 1111 @mve_sh_ri
93
+ SQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 11 1111 @mve_shl_ri
94
+ }
95
96
LSLL_rr 1110101 0010 1 ... 0 .... ... 1 0000 1101 @mve_shl_rr
97
ASRL_rr 1110101 0010 1 ... 0 .... ... 1 0010 1101 @mve_shl_rr
98
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/target/arm/mve_helper.c
101
+++ b/target/arm/mve_helper.c
102
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(mve_uqrshll48)(CPUARMState *env, uint64_t n, uint32_t shift)
103
{
104
return do_uqrshl48_d(n, (int8_t)shift, true, &env->QF);
43
}
105
}
44
+
106
+
45
+static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
107
+uint32_t HELPER(mve_uqshl)(CPUARMState *env, uint32_t n, uint32_t shift)
46
+{
108
+{
47
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
109
+ return do_uqrshl_bhs(n, (int8_t)shift, 32, false, &env->QF);
48
+ return false;
110
+}
49
+ }
111
+
50
+
112
+uint32_t HELPER(mve_sqshl)(CPUARMState *env, uint32_t n, uint32_t shift)
51
+ /* UNDEF accesses to D16-D31 if they don't exist. */
113
+{
52
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
114
+ return do_sqrshl_bhs(n, (int8_t)shift, 32, false, &env->QF);
53
+ ((a->vd | a->vn | a->vm) & 0x10)) {
54
+ return false;
55
+ }
56
+
57
+ if ((a->vn | a->vm | a->vd) & a->q) {
58
+ return false;
59
+ }
60
+
61
+ if (a->imm > 7 && !a->q) {
62
+ return false;
63
+ }
64
+
65
+ if (!vfp_access_check(s)) {
66
+ return true;
67
+ }
68
+
69
+ if (!a->q) {
70
+ /* Extract 64 bits from <Vm:Vn> */
71
+ TCGv_i64 left, right, dest;
72
+
73
+ left = tcg_temp_new_i64();
74
+ right = tcg_temp_new_i64();
75
+ dest = tcg_temp_new_i64();
76
+
77
+ neon_load_reg64(right, a->vn);
78
+ neon_load_reg64(left, a->vm);
79
+ tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
80
+ neon_store_reg64(dest, a->vd);
81
+
82
+ tcg_temp_free_i64(left);
83
+ tcg_temp_free_i64(right);
84
+ tcg_temp_free_i64(dest);
85
+ } else {
86
+ /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
87
+ TCGv_i64 left, middle, right, destleft, destright;
88
+
89
+ left = tcg_temp_new_i64();
90
+ middle = tcg_temp_new_i64();
91
+ right = tcg_temp_new_i64();
92
+ destleft = tcg_temp_new_i64();
93
+ destright = tcg_temp_new_i64();
94
+
95
+ if (a->imm < 8) {
96
+ neon_load_reg64(right, a->vn);
97
+ neon_load_reg64(middle, a->vn + 1);
98
+ tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
99
+ neon_load_reg64(left, a->vm);
100
+ tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
101
+ } else {
102
+ neon_load_reg64(right, a->vn + 1);
103
+ neon_load_reg64(middle, a->vm);
104
+ tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
105
+ neon_load_reg64(left, a->vm + 1);
106
+ tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
107
+ }
108
+
109
+ neon_store_reg64(destright, a->vd);
110
+ neon_store_reg64(destleft, a->vd + 1);
111
+
112
+ tcg_temp_free_i64(destright);
113
+ tcg_temp_free_i64(destleft);
114
+ tcg_temp_free_i64(right);
115
+ tcg_temp_free_i64(middle);
116
+ tcg_temp_free_i64(left);
117
+ }
118
+ return true;
119
+}
115
+}
120
diff --git a/target/arm/translate.c b/target/arm/translate.c
116
diff --git a/target/arm/translate.c b/target/arm/translate.c
121
index XXXXXXX..XXXXXXX 100644
117
index XXXXXXX..XXXXXXX 100644
122
--- a/target/arm/translate.c
118
--- a/target/arm/translate.c
123
+++ b/target/arm/translate.c
119
+++ b/target/arm/translate.c
124
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
120
@@ -XXX,XX +XXX,XX @@ static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
125
int pass;
121
126
int u;
122
static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
127
int vec_size;
123
{
128
- uint32_t imm;
124
- TCGv_i32 t = tcg_temp_new_i32();
129
TCGv_i32 tmp, tmp2, tmp3, tmp5;
125
+ TCGv_i32 t;
130
TCGv_ptr ptr1;
126
131
- TCGv_i64 tmp64;
127
+ /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
132
128
+ if (sh == 32) {
133
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
129
+ tcg_gen_movi_i32(d, 0);
134
return 1;
130
+ return;
135
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
131
+ }
136
return 1;
132
+ t = tcg_temp_new_i32();
137
} else { /* size == 3 */
133
tcg_gen_extract_i32(t, a, sh - 1, 1);
138
if (!u) {
134
tcg_gen_sari_i32(d, a, sh);
139
- /* Extract. */
135
tcg_gen_add_i32(d, d, t);
140
- imm = (insn >> 8) & 0xf;
136
@@ -XXX,XX +XXX,XX @@ static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
141
-
137
142
- if (imm > 7 && !q)
138
static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
143
- return 1;
139
{
144
-
140
- TCGv_i32 t = tcg_temp_new_i32();
145
- if (q && ((rd | rn | rm) & 1)) {
141
+ TCGv_i32 t;
146
- return 1;
142
147
- }
143
+ /* Handle shift by the input size for the benefit of trans_URSHR_ri */
148
-
144
+ if (sh == 32) {
149
- if (imm == 0) {
145
+ tcg_gen_extract_i32(d, a, sh - 1, 1);
150
- neon_load_reg64(cpu_V0, rn);
146
+ return;
151
- if (q) {
147
+ }
152
- neon_load_reg64(cpu_V1, rn + 1);
148
+ t = tcg_temp_new_i32();
153
- }
149
tcg_gen_extract_i32(t, a, sh - 1, 1);
154
- } else if (imm == 8) {
150
tcg_gen_shri_i32(d, a, sh);
155
- neon_load_reg64(cpu_V0, rn + 1);
151
tcg_gen_add_i32(d, d, t);
156
- if (q) {
152
@@ -XXX,XX +XXX,XX @@ static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
157
- neon_load_reg64(cpu_V1, rm);
153
return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
158
- }
154
}
159
- } else if (q) {
155
160
- tmp64 = tcg_temp_new_i64();
156
+static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
161
- if (imm < 8) {
157
+{
162
- neon_load_reg64(cpu_V0, rn);
158
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
163
- neon_load_reg64(tmp64, rn + 1);
159
+ /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
164
- } else {
160
+ return false;
165
- neon_load_reg64(cpu_V0, rn + 1);
161
+ }
166
- neon_load_reg64(tmp64, rm);
162
+ if (!dc_isar_feature(aa32_mve, s) ||
167
- }
163
+ !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
168
- tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
164
+ a->rda == 13 || a->rda == 15) {
169
- tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
165
+ /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
170
- tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
166
+ unallocated_encoding(s);
171
- if (imm < 8) {
167
+ return true;
172
- neon_load_reg64(cpu_V1, rm);
168
+ }
173
- } else {
169
+
174
- neon_load_reg64(cpu_V1, rm + 1);
170
+ if (a->shim == 0) {
175
- imm -= 8;
171
+ a->shim = 32;
176
- }
172
+ }
177
- tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
173
+ fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
178
- tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
174
+
179
- tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
175
+ return true;
180
- tcg_temp_free_i64(tmp64);
176
+}
181
- } else {
177
+
182
- /* BUGFIX */
178
+static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
183
- neon_load_reg64(cpu_V0, rn);
179
+{
184
- tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
180
+ return do_mve_sh_ri(s, a, gen_urshr32_i32);
185
- neon_load_reg64(cpu_V1, rm);
181
+}
186
- tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
182
+
187
- tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
183
+static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
188
- }
184
+{
189
- neon_store_reg64(cpu_V0, rd);
185
+ return do_mve_sh_ri(s, a, gen_srshr32_i32);
190
- if (q) {
186
+}
191
- neon_store_reg64(cpu_V1, rd + 1);
187
+
192
- }
188
+static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
193
+ /* Extract: handled by decodetree */
189
+{
194
+ return 1;
190
+ gen_helper_mve_sqshl(r, cpu_env, n, tcg_constant_i32(shift));
195
} else if ((insn & (1 << 11)) == 0) {
191
+}
196
/* Two register misc. */
192
+
197
op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
193
+static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
194
+{
195
+ return do_mve_sh_ri(s, a, gen_mve_sqshl);
196
+}
197
+
198
+static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
199
+{
200
+ gen_helper_mve_uqshl(r, cpu_env, n, tcg_constant_i32(shift));
201
+}
202
+
203
+static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
204
+{
205
+ return do_mve_sh_ri(s, a, gen_mve_uqshl);
206
+}
207
+
208
/*
209
* Multiply and multiply accumulate
210
*/
198
--
211
--
199
2.20.1
212
2.20.1
200
213
201
214
diff view generated by jsdifflib
1
Convert the narrow-to-high-half insns VADDHN, VSUBHN, VRADDHN,
1
Implement the MVE shifts by register, which perform
2
VRSUBHN in the Neon 3-registers-different-lengths group to
2
shifts on a single general-purpose register.
3
decodetree.
4
3
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20210628135835.6690-19-peter.maydell@linaro.org
7
---
7
---
8
target/arm/neon-dp.decode | 6 +++
8
target/arm/helper-mve.h | 2 ++
9
target/arm/translate-neon.inc.c | 87 +++++++++++++++++++++++++++++++
9
target/arm/translate.h | 1 +
10
target/arm/translate.c | 91 ++++-----------------------------
10
target/arm/t32.decode | 18 ++++++++++++++----
11
3 files changed, 104 insertions(+), 80 deletions(-)
11
target/arm/mve_helper.c | 10 ++++++++++
12
target/arm/translate.c | 30 ++++++++++++++++++++++++++++++
13
5 files changed, 57 insertions(+), 4 deletions(-)
12
14
13
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
14
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/neon-dp.decode
17
--- a/target/arm/helper-mve.h
16
+++ b/target/arm/neon-dp.decode
18
+++ b/target/arm/helper-mve.h
17
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(mve_uqrshll48, TCG_CALL_NO_RWG, i64, env, i64, i32)
18
20
19
VSUBW_S_3d 1111 001 0 1 . .. .... .... 0011 . 0 . 0 .... @3diff
21
DEF_HELPER_FLAGS_3(mve_uqshl, TCG_CALL_NO_RWG, i32, env, i32, i32)
20
VSUBW_U_3d 1111 001 1 1 . .. .... .... 0011 . 0 . 0 .... @3diff
22
DEF_HELPER_FLAGS_3(mve_sqshl, TCG_CALL_NO_RWG, i32, env, i32, i32)
21
+
23
+DEF_HELPER_FLAGS_3(mve_uqrshl, TCG_CALL_NO_RWG, i32, env, i32, i32)
22
+ VADDHN_3d 1111 001 0 1 . .. .... .... 0100 . 0 . 0 .... @3diff
24
+DEF_HELPER_FLAGS_3(mve_sqrshr, TCG_CALL_NO_RWG, i32, env, i32, i32)
23
+ VRADDHN_3d 1111 001 1 1 . .. .... .... 0100 . 0 . 0 .... @3diff
25
diff --git a/target/arm/translate.h b/target/arm/translate.h
24
+
25
+ VSUBHN_3d 1111 001 0 1 . .. .... .... 0110 . 0 . 0 .... @3diff
26
+ VRSUBHN_3d 1111 001 1 1 . .. .... .... 0110 . 0 . 0 .... @3diff
27
]
28
}
29
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
30
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate-neon.inc.c
27
--- a/target/arm/translate.h
32
+++ b/target/arm/translate-neon.inc.c
28
+++ b/target/arm/translate.h
33
@@ -XXX,XX +XXX,XX @@ DO_PREWIDEN(VADDW_S, s, ext, add, true)
29
@@ -XXX,XX +XXX,XX @@ typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
34
DO_PREWIDEN(VADDW_U, u, extu, add, true)
30
typedef void WideShiftImmFn(TCGv_i64, TCGv_i64, int64_t shift);
35
DO_PREWIDEN(VSUBW_S, s, ext, sub, true)
31
typedef void WideShiftFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i32);
36
DO_PREWIDEN(VSUBW_U, u, extu, sub, true)
32
typedef void ShiftImmFn(TCGv_i32, TCGv_i32, int32_t shift);
37
+
33
+typedef void ShiftFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
38
+static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
34
39
+ NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
35
/**
40
+{
36
* arm_tbflags_from_tb:
41
+ /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
37
diff --git a/target/arm/t32.decode b/target/arm/t32.decode
42
+ TCGv_i64 rn_64, rm_64;
38
index XXXXXXX..XXXXXXX 100644
43
+ TCGv_i32 rd0, rd1;
39
--- a/target/arm/t32.decode
44
+
40
+++ b/target/arm/t32.decode
45
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
41
@@ -XXX,XX +XXX,XX @@
46
+ return false;
42
&mve_shl_ri rdalo rdahi shim
43
&mve_shl_rr rdalo rdahi rm
44
&mve_sh_ri rda shim
45
+&mve_sh_rr rda rm
46
47
# rdahi: bits [3:1] from insn, bit 0 is 1
48
# rdalo: bits [3:1] from insn, bit 0 is 0
49
@@ -XXX,XX +XXX,XX @@
50
&mve_shl_rr rdalo=%rdalo_17 rdahi=%rdahi_9
51
@mve_sh_ri ....... .... . rda:4 . ... ... . .. .. .... \
52
&mve_sh_ri shim=%imm5_12_6
53
+@mve_sh_rr ....... .... . rda:4 rm:4 .... .... .... &mve_sh_rr
54
55
{
56
TST_xrri 1110101 0000 1 .... 0 ... 1111 .... .... @S_xrr_shi
57
@@ -XXX,XX +XXX,XX @@ BIC_rrri 1110101 0001 . .... 0 ... .... .... .... @s_rrr_shi
58
SQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 11 1111 @mve_shl_ri
59
}
60
61
- LSLL_rr 1110101 0010 1 ... 0 .... ... 1 0000 1101 @mve_shl_rr
62
- ASRL_rr 1110101 0010 1 ... 0 .... ... 1 0010 1101 @mve_shl_rr
63
- UQRSHLL64_rr 1110101 0010 1 ... 1 .... ... 1 0000 1101 @mve_shl_rr
64
- SQRSHRL64_rr 1110101 0010 1 ... 1 .... ... 1 0010 1101 @mve_shl_rr
65
+ {
66
+ UQRSHL_rr 1110101 0010 1 .... .... 1111 0000 1101 @mve_sh_rr
67
+ LSLL_rr 1110101 0010 1 ... 0 .... ... 1 0000 1101 @mve_shl_rr
68
+ UQRSHLL64_rr 1110101 0010 1 ... 1 .... ... 1 0000 1101 @mve_shl_rr
47
+ }
69
+ }
48
+
70
+
49
+ /* UNDEF accesses to D16-D31 if they don't exist. */
71
+ {
50
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
72
+ SQRSHR_rr 1110101 0010 1 .... .... 1111 0010 1101 @mve_sh_rr
51
+ ((a->vd | a->vn | a->vm) & 0x10)) {
73
+ ASRL_rr 1110101 0010 1 ... 0 .... ... 1 0010 1101 @mve_shl_rr
52
+ return false;
74
+ SQRSHRL64_rr 1110101 0010 1 ... 1 .... ... 1 0010 1101 @mve_shl_rr
53
+ }
75
+ }
54
+
76
+
55
+ if (!opfn || !narrowfn) {
77
UQRSHLL48_rr 1110101 0010 1 ... 1 .... ... 1 1000 1101 @mve_shl_rr
56
+ /* size == 3 case, which is an entirely different insn group */
78
SQRSHRL48_rr 1110101 0010 1 ... 1 .... ... 1 1010 1101 @mve_shl_rr
57
+ return false;
79
]
58
+ }
80
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
81
index XXXXXXX..XXXXXXX 100644
82
--- a/target/arm/mve_helper.c
83
+++ b/target/arm/mve_helper.c
84
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(mve_sqshl)(CPUARMState *env, uint32_t n, uint32_t shift)
85
{
86
return do_sqrshl_bhs(n, (int8_t)shift, 32, false, &env->QF);
87
}
59
+
88
+
60
+ if ((a->vn | a->vm) & 1) {
89
+uint32_t HELPER(mve_uqrshl)(CPUARMState *env, uint32_t n, uint32_t shift)
61
+ return false;
90
+{
62
+ }
91
+ return do_uqrshl_bhs(n, (int8_t)shift, 32, true, &env->QF);
63
+
64
+ if (!vfp_access_check(s)) {
65
+ return true;
66
+ }
67
+
68
+ rn_64 = tcg_temp_new_i64();
69
+ rm_64 = tcg_temp_new_i64();
70
+ rd0 = tcg_temp_new_i32();
71
+ rd1 = tcg_temp_new_i32();
72
+
73
+ neon_load_reg64(rn_64, a->vn);
74
+ neon_load_reg64(rm_64, a->vm);
75
+
76
+ opfn(rn_64, rn_64, rm_64);
77
+
78
+ narrowfn(rd0, rn_64);
79
+
80
+ neon_load_reg64(rn_64, a->vn + 1);
81
+ neon_load_reg64(rm_64, a->vm + 1);
82
+
83
+ opfn(rn_64, rn_64, rm_64);
84
+
85
+ narrowfn(rd1, rn_64);
86
+
87
+ neon_store_reg(a->vd, 0, rd0);
88
+ neon_store_reg(a->vd, 1, rd1);
89
+
90
+ tcg_temp_free_i64(rn_64);
91
+ tcg_temp_free_i64(rm_64);
92
+
93
+ return true;
94
+}
92
+}
95
+
93
+
96
+#define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \
94
+uint32_t HELPER(mve_sqrshr)(CPUARMState *env, uint32_t n, uint32_t shift)
97
+ static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
98
+ { \
99
+ static NeonGenTwo64OpFn * const addfn[] = { \
100
+ gen_helper_neon_##OP##l_u16, \
101
+ gen_helper_neon_##OP##l_u32, \
102
+ tcg_gen_##OP##_i64, \
103
+ NULL, \
104
+ }; \
105
+ static NeonGenNarrowFn * const narrowfn[] = { \
106
+ gen_helper_neon_##NARROWTYPE##_high_u8, \
107
+ gen_helper_neon_##NARROWTYPE##_high_u16, \
108
+ EXTOP, \
109
+ NULL, \
110
+ }; \
111
+ return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \
112
+ }
113
+
114
+static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
115
+{
95
+{
116
+ tcg_gen_addi_i64(rn, rn, 1u << 31);
96
+ return do_sqrshl_bhs(n, -(int8_t)shift, 32, true, &env->QF);
117
+ tcg_gen_extrh_i64_i32(rd, rn);
118
+}
97
+}
119
+
120
+DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
121
+DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
122
+DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
123
+DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
124
diff --git a/target/arm/translate.c b/target/arm/translate.c
98
diff --git a/target/arm/translate.c b/target/arm/translate.c
125
index XXXXXXX..XXXXXXX 100644
99
index XXXXXXX..XXXXXXX 100644
126
--- a/target/arm/translate.c
100
--- a/target/arm/translate.c
127
+++ b/target/arm/translate.c
101
+++ b/target/arm/translate.c
128
@@ -XXX,XX +XXX,XX @@ static inline void gen_neon_addl(int size)
102
@@ -XXX,XX +XXX,XX @@ static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
129
}
103
return do_mve_sh_ri(s, a, gen_mve_uqshl);
130
}
104
}
131
105
132
-static inline void gen_neon_subl(int size)
106
+static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
133
-{
107
+{
134
- switch (size) {
108
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
135
- case 0: gen_helper_neon_subl_u16(CPU_V001); break;
109
+ /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
136
- case 1: gen_helper_neon_subl_u32(CPU_V001); break;
110
+ return false;
137
- case 2: tcg_gen_sub_i64(CPU_V001); break;
111
+ }
138
- default: abort();
112
+ if (!dc_isar_feature(aa32_mve, s) ||
139
- }
113
+ !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
140
-}
114
+ a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
141
-
115
+ a->rm == a->rda) {
142
static inline void gen_neon_negl(TCGv_i64 var, int size)
116
+ /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
143
{
117
+ unallocated_encoding(s);
144
switch (size) {
118
+ return true;
145
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
119
+ }
146
op = (insn >> 8) & 0xf;
120
+
147
if ((insn & (1 << 6)) == 0) {
121
+ /* The helper takes care of the sign-extension of the low 8 bits of Rm */
148
/* Three registers of different lengths. */
122
+ fn(cpu_R[a->rda], cpu_env, cpu_R[a->rda], cpu_R[a->rm]);
149
- int src1_wide;
123
+ return true;
150
- int src2_wide;
124
+}
151
/* undefreq: bit 0 : UNDEF if size == 0
125
+
152
* bit 1 : UNDEF if size == 1
126
+static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
153
* bit 2 : UNDEF if size == 2
127
+{
154
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
128
+ return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
155
{0, 0, 0, 7}, /* VADDW: handled by decodetree */
129
+}
156
{0, 0, 0, 7}, /* VSUBL: handled by decodetree */
130
+
157
{0, 0, 0, 7}, /* VSUBW: handled by decodetree */
131
+static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
158
- {0, 1, 1, 0}, /* VADDHN */
132
+{
159
+ {0, 0, 0, 7}, /* VADDHN: handled by decodetree */
133
+ return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
160
{0, 0, 0, 0}, /* VABAL */
134
+}
161
- {0, 1, 1, 0}, /* VSUBHN */
135
+
162
+ {0, 0, 0, 7}, /* VSUBHN: handled by decodetree */
136
/*
163
{0, 0, 0, 0}, /* VABDL */
137
* Multiply and multiply accumulate
164
{0, 0, 0, 0}, /* VMLAL */
138
*/
165
{0, 0, 0, 9}, /* VQDMLAL */
166
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
167
{0, 0, 0, 7}, /* Reserved: always UNDEF */
168
};
169
170
- src1_wide = neon_3reg_wide[op][1];
171
- src2_wide = neon_3reg_wide[op][2];
172
undefreq = neon_3reg_wide[op][3];
173
174
if ((undefreq & (1 << size)) ||
175
((undefreq & 8) && u)) {
176
return 1;
177
}
178
- if ((src1_wide && (rn & 1)) ||
179
- (src2_wide && (rm & 1)) ||
180
- (!src2_wide && (rd & 1))) {
181
+ if (rd & 1) {
182
return 1;
183
}
184
185
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
186
/* Avoid overlapping operands. Wide source operands are
187
always aligned so will never overlap with wide
188
destinations in problematic ways. */
189
- if (rd == rm && !src2_wide) {
190
+ if (rd == rm) {
191
tmp = neon_load_reg(rm, 1);
192
neon_store_scratch(2, tmp);
193
- } else if (rd == rn && !src1_wide) {
194
+ } else if (rd == rn) {
195
tmp = neon_load_reg(rn, 1);
196
neon_store_scratch(2, tmp);
197
}
198
tmp3 = NULL;
199
for (pass = 0; pass < 2; pass++) {
200
- if (src1_wide) {
201
- neon_load_reg64(cpu_V0, rn + pass);
202
- tmp = NULL;
203
+ if (pass == 1 && rd == rn) {
204
+ tmp = neon_load_scratch(2);
205
} else {
206
- if (pass == 1 && rd == rn) {
207
- tmp = neon_load_scratch(2);
208
- } else {
209
- tmp = neon_load_reg(rn, pass);
210
- }
211
+ tmp = neon_load_reg(rn, pass);
212
}
213
- if (src2_wide) {
214
- neon_load_reg64(cpu_V1, rm + pass);
215
- tmp2 = NULL;
216
+ if (pass == 1 && rd == rm) {
217
+ tmp2 = neon_load_scratch(2);
218
} else {
219
- if (pass == 1 && rd == rm) {
220
- tmp2 = neon_load_scratch(2);
221
- } else {
222
- tmp2 = neon_load_reg(rm, pass);
223
- }
224
+ tmp2 = neon_load_reg(rm, pass);
225
}
226
switch (op) {
227
- case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
228
- gen_neon_addl(size);
229
- break;
230
- case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
231
- gen_neon_subl(size);
232
- break;
233
case 5: case 7: /* VABAL, VABDL */
234
switch ((size << 1) | u) {
235
case 0:
236
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
237
abort();
238
}
239
neon_store_reg64(cpu_V0, rd + pass);
240
- } else if (op == 4 || op == 6) {
241
- /* Narrowing operation. */
242
- tmp = tcg_temp_new_i32();
243
- if (!u) {
244
- switch (size) {
245
- case 0:
246
- gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
247
- break;
248
- case 1:
249
- gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
250
- break;
251
- case 2:
252
- tcg_gen_extrh_i64_i32(tmp, cpu_V0);
253
- break;
254
- default: abort();
255
- }
256
- } else {
257
- switch (size) {
258
- case 0:
259
- gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
260
- break;
261
- case 1:
262
- gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
263
- break;
264
- case 2:
265
- tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
266
- tcg_gen_extrh_i64_i32(tmp, cpu_V0);
267
- break;
268
- default: abort();
269
- }
270
- }
271
- if (pass == 0) {
272
- tmp3 = tmp;
273
- } else {
274
- neon_store_reg(rd, 0, tmp3);
275
- neon_store_reg(rd, 1, tmp);
276
- }
277
} else {
278
/* Write back the result. */
279
neon_store_reg64(cpu_V0, rd + pass);
280
--
139
--
281
2.20.1
140
2.20.1
282
141
283
142
diff view generated by jsdifflib