1
I don't have anything else queued up at the moment, so this is just
1
Hi; here's another arm pullreq; by volume most of this is
2
Richard's SME patches.
2
refactoring from me, but there are also some bugfixes and
3
other bits and pieces here.
3
4
5
thanks
4
-- PMM
6
-- PMM
5
7
6
The following changes since commit 63b38f6c85acd312c2cab68554abf33adf4ee2b3:
8
The following changes since commit ed734377ab3f3f3cc15d7aa301a87ab6370f2eed:
7
9
8
Merge tag 'pull-target-arm-20220707' of https://git.linaro.org/people/pmaydell/qemu-arm into staging (2022-07-08 06:17:11 +0530)
10
Merge tag 'linux-user-fix-gupnp-pull-request' of https://github.com/hdeller/qemu-hppa into staging (2025-01-24 14:43:07 -0500)
9
11
10
are available in the Git repository at:
12
are available in the Git repository at:
11
13
12
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20220711
14
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20250128-1
13
15
14
for you to fetch changes up to f9982ceaf26df27d15547a3a7990a95019e9e3a8:
16
for you to fetch changes up to 664280abddcb3cacc9c6204706bb739fcc1316f7:
15
17
16
linux-user/aarch64: Add SME related hwcap entries (2022-07-11 13:43:52 +0100)
18
hw/usb/canokey: Fix buffer overflow for OUT packet (2025-01-28 18:40:19 +0000)
17
19
18
----------------------------------------------------------------
20
----------------------------------------------------------------
19
target-arm:
21
target-arm queue:
20
* Implement SME emulation, for both system and linux-user
22
* hw/arm: Remove various uses of first_cpu global
23
* hw/char/imx_serial: Fix reset value of UFCR register
24
* hw/char/imx_serial: Update all state before restarting ageing timer
25
* hw/pci-host/designware: Expose MSI IRQ
26
* hw/arm/stellaris: refactoring, cleanup
27
* hw/arm/stellaris: map both I2C controllers
28
* tests/functional: Add a test for the arm microbit machine
29
* target/arm: arm_reset_sve_state() should set FPSR, not FPCR
30
* target/arm: refactorings preparatory to FEAT_AFP implementation
31
* fpu: Rename float_flag_input_denormal to float_flag_input_denormal_flushed
32
* fpu: Rename float_flag_output_denormal to float_flag_output_denormal_flushed
33
* hw/usb/canokey: Fix buffer overflow for OUT packet
21
34
22
----------------------------------------------------------------
35
----------------------------------------------------------------
23
Richard Henderson (45):
36
Bernhard Beschow (3):
24
target/arm: Handle SME in aarch64_cpu_dump_state
37
hw/char/imx_serial: Fix reset value of UFCR register
25
target/arm: Add infrastructure for disas_sme
38
hw/char/imx_serial: Update all state before restarting ageing timer
26
target/arm: Trap non-streaming usage when Streaming SVE is active
39
hw/pci-host/designware: Expose MSI IRQ
27
target/arm: Mark ADR as non-streaming
28
target/arm: Mark RDFFR, WRFFR, SETFFR as non-streaming
29
target/arm: Mark BDEP, BEXT, BGRP, COMPACT, FEXPA, FTSSEL as non-streaming
30
target/arm: Mark PMULL, FMMLA as non-streaming
31
target/arm: Mark FTSMUL, FTMAD, FADDA as non-streaming
32
target/arm: Mark SMMLA, UMMLA, USMMLA as non-streaming
33
target/arm: Mark string/histo/crypto as non-streaming
34
target/arm: Mark gather/scatter load/store as non-streaming
35
target/arm: Mark gather prefetch as non-streaming
36
target/arm: Mark LDFF1 and LDNF1 as non-streaming
37
target/arm: Mark LD1RO as non-streaming
38
target/arm: Add SME enablement checks
39
target/arm: Handle SME in sve_access_check
40
target/arm: Implement SME RDSVL, ADDSVL, ADDSPL
41
target/arm: Implement SME ZERO
42
target/arm: Implement SME MOVA
43
target/arm: Implement SME LD1, ST1
44
target/arm: Export unpredicated ld/st from translate-sve.c
45
target/arm: Implement SME LDR, STR
46
target/arm: Implement SME ADDHA, ADDVA
47
target/arm: Implement FMOPA, FMOPS (non-widening)
48
target/arm: Implement BFMOPA, BFMOPS
49
target/arm: Implement FMOPA, FMOPS (widening)
50
target/arm: Implement SME integer outer product
51
target/arm: Implement PSEL
52
target/arm: Implement REVD
53
target/arm: Implement SCLAMP, UCLAMP
54
target/arm: Reset streaming sve state on exception boundaries
55
target/arm: Enable SME for -cpu max
56
linux-user/aarch64: Clear tpidr2_el0 if CLONE_SETTLS
57
linux-user/aarch64: Reset PSTATE.SM on syscalls
58
linux-user/aarch64: Add SM bit to SVE signal context
59
linux-user/aarch64: Tidy target_restore_sigframe error return
60
linux-user/aarch64: Do not allow duplicate or short sve records
61
linux-user/aarch64: Verify extra record lock succeeded
62
linux-user/aarch64: Move sve record checks into restore
63
linux-user/aarch64: Implement SME signal handling
64
linux-user: Rename sve prctls
65
linux-user/aarch64: Implement PR_SME_GET_VL, PR_SME_SET_VL
66
target/arm: Only set ZEN in reset if SVE present
67
target/arm: Enable SME for user-only
68
linux-user/aarch64: Add SME related hwcap entries
69
40
70
docs/system/arm/emulation.rst | 4 +
41
Hongren Zheng (1):
71
linux-user/aarch64/target_cpu.h | 5 +-
42
hw/usb/canokey: Fix buffer overflow for OUT packet
72
linux-user/aarch64/target_prctl.h | 62 +-
43
73
target/arm/cpu.h | 7 +
44
Peter Maydell (22):
74
target/arm/helper-sme.h | 126 ++++
45
target/arm: arm_reset_sve_state() should set FPSR, not FPCR
75
target/arm/helper-sve.h | 4 +
46
target/arm: Use FPSR_ constants in vfp_exceptbits_from_host()
76
target/arm/helper.h | 18 +
47
target/arm: Use uint32_t in vfp_exceptbits_from_host()
77
target/arm/translate-a64.h | 45 ++
48
target/arm: Define new fp_status_a32 and fp_status_a64
78
target/arm/translate.h | 16 +
49
target/arm: Use vfp.fp_status_a64 in A64-only helper functions
79
target/arm/sme-fa64.decode | 60 ++
50
target/arm: Use fp_status_a64 or fp_status_a32 in is_ebf()
80
target/arm/sme.decode | 88 +++
51
target/arm: Use fp_status_a32 in vjvct helper
81
target/arm/sve.decode | 41 +-
52
target/arm: Use fp_status_a32 in vfp_cmp helpers
82
linux-user/aarch64/cpu_loop.c | 9 +
53
target/arm: Use FPST_A32 in A32 decoder
83
linux-user/aarch64/signal.c | 243 ++++++--
54
target/arm: Use FPST_A64 in A64 decoder
84
linux-user/elfload.c | 20 +
55
target/arm: Remove now-unused vfp.fp_status and FPST_FPCR
85
linux-user/syscall.c | 28 +-
56
target/arm: Define new fp_status_f16_a32 and fp_status_f16_a64
86
target/arm/cpu.c | 35 +-
57
target/arm: Use fp_status_f16_a32 in AArch32-only helpers
87
target/arm/cpu64.c | 11 +
58
target/arm: Use fp_status_f16_a64 in AArch64-only helpers
88
target/arm/helper.c | 56 +-
59
target/arm: Use FPST_A32_F16 in A32 decoder
89
target/arm/sme_helper.c | 1140 +++++++++++++++++++++++++++++++++++++
60
target/arm: Use FPST_A64_F16 in A64 decoder
90
target/arm/sve_helper.c | 28 +
61
target/arm: Remove now-unused vfp.fp_status_f16 and FPST_FPCR_F16
91
target/arm/translate-a64.c | 103 +++-
62
fpu: Rename float_flag_input_denormal to float_flag_input_denormal_flushed
92
target/arm/translate-sme.c | 373 ++++++++++++
63
fpu: Rename float_flag_output_denormal to float_flag_output_denormal_flushed
93
target/arm/translate-sve.c | 393 ++++++++++---
64
fpu: Fix a comment in softfloat-types.h
94
target/arm/translate-vfp.c | 12 +
65
target/arm: Remove redundant advsimd float16 helpers
95
target/arm/translate.c | 2 +
66
target/arm: Use FPST_A64_F16 for halfprec-to-other conversions
96
target/arm/vec_helper.c | 24 +
67
97
target/arm/meson.build | 3 +
68
Philippe Mathieu-Daudé (9):
98
28 files changed, 2821 insertions(+), 135 deletions(-)
69
hw/arm/nrf51: Rename ARMv7MState 'cpu' -> 'armv7m'
99
create mode 100644 target/arm/sme-fa64.decode
70
hw/arm/stellaris: Add 'armv7m' local variable
100
create mode 100644 target/arm/sme.decode
71
hw/arm/v7m: Remove use of &first_cpu in machine_init()
101
create mode 100644 target/arm/translate-sme.c
72
hw/arm/stellaris: Link each board schematic
73
hw/arm/stellaris: Constify read-only arrays
74
hw/arm/stellaris: Remove incorrect unimplemented i2c-0 at 0x40002000
75
hw/arm/stellaris: Replace magic numbers by definitions
76
hw/arm/stellaris: Use DEVCAP macro to access DeviceCapability registers
77
hw/arm/stellaris: Map both I2C controllers
78
79
Thomas Huth (1):
80
tests/functional: Add a test for the arm microbit machine
81
82
MAINTAINERS | 1 +
83
hw/usb/canokey.h | 4 --
84
include/fpu/softfloat-types.h | 10 +--
85
include/hw/arm/fsl-imx6.h | 4 +-
86
include/hw/arm/fsl-imx7.h | 4 +-
87
include/hw/arm/nrf51_soc.h | 2 +-
88
include/hw/char/imx_serial.h | 2 +-
89
include/hw/pci-host/designware.h | 1 +
90
target/arm/cpu.h | 12 ++--
91
target/arm/tcg/helper-a64.h | 8 ---
92
target/arm/tcg/translate.h | 32 ++++++---
93
fpu/softfloat.c | 6 +-
94
hw/arm/b-l475e-iot01a.c | 2 +-
95
hw/arm/fsl-imx6.c | 13 +++-
96
hw/arm/fsl-imx7.c | 13 +++-
97
hw/arm/microbit.c | 2 +-
98
hw/arm/mps2-tz.c | 2 +-
99
hw/arm/mps2.c | 2 +-
100
hw/arm/msf2-som.c | 2 +-
101
hw/arm/musca.c | 2 +-
102
hw/arm/netduino2.c | 2 +-
103
hw/arm/netduinoplus2.c | 2 +-
104
hw/arm/nrf51_soc.c | 18 ++---
105
hw/arm/olimex-stm32-h405.c | 2 +-
106
hw/arm/stellaris.c | 118 +++++++++++++++++++-----------
107
hw/arm/stm32vldiscovery.c | 2 +-
108
hw/char/imx_serial.c | 7 +-
109
hw/pci-host/designware.c | 7 +-
110
hw/usb/canokey.c | 6 +-
111
target/arm/cpu.c | 6 +-
112
target/arm/helper.c | 2 +-
113
target/arm/tcg/helper-a64.c | 9 ---
114
target/arm/tcg/sme_helper.c | 6 +-
115
target/arm/tcg/sve_helper.c | 6 +-
116
target/arm/tcg/translate-a64.c | 103 ++++++++++++++-------------
117
target/arm/tcg/translate-sme.c | 4 +-
118
target/arm/tcg/translate-sve.c | 130 +++++++++++++++++-----------------
119
target/arm/tcg/translate-vfp.c | 78 ++++++++++----------
120
target/arm/tcg/vec_helper.c | 22 +++---
121
target/arm/vfp_helper.c | 73 +++++++++++--------
122
target/i386/tcg/fpu_helper.c | 8 +--
123
target/m68k/fpu_helper.c | 2 +-
124
target/mips/tcg/msa_helper.c | 4 +-
125
target/rx/op_helper.c | 4 +-
126
target/tricore/fpu_helper.c | 6 +-
127
fpu/softfloat-parts.c.inc | 4 +-
128
hw/arm/Kconfig | 2 +
129
tests/functional/meson.build | 1 +
130
tests/functional/test_arm_microbit.py | 31 ++++++++
131
49 files changed, 452 insertions(+), 337 deletions(-)
132
create mode 100755 tests/functional/test_arm_microbit.py
133
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
Add "sve" to the sve prctl functions, to distinguish
3
The ARMv7MState object is not simply a CPU, it also
4
them from the coming "sme" prctls with similar names.
4
contains the NVIC, SysTick timer, and various MemoryRegions.
5
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Rename the field as 'armv7m', like other Cortex-M boards.
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
8
Message-id: 20220708151540.18136-42-richard.henderson@linaro.org
8
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
10
Message-id: 20250112225614.33723-2-philmd@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
12
---
11
linux-user/aarch64/target_prctl.h | 8 ++++----
13
include/hw/arm/nrf51_soc.h | 2 +-
12
linux-user/syscall.c | 12 ++++++------
14
hw/arm/nrf51_soc.c | 18 +++++++++---------
13
2 files changed, 10 insertions(+), 10 deletions(-)
15
2 files changed, 10 insertions(+), 10 deletions(-)
14
16
15
diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h
17
diff --git a/include/hw/arm/nrf51_soc.h b/include/hw/arm/nrf51_soc.h
16
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
17
--- a/linux-user/aarch64/target_prctl.h
19
--- a/include/hw/arm/nrf51_soc.h
18
+++ b/linux-user/aarch64/target_prctl.h
20
+++ b/include/hw/arm/nrf51_soc.h
19
@@ -XXX,XX +XXX,XX @@
21
@@ -XXX,XX +XXX,XX @@ struct NRF51State {
20
#ifndef AARCH64_TARGET_PRCTL_H
22
SysBusDevice parent_obj;
21
#define AARCH64_TARGET_PRCTL_H
23
22
24
/*< public >*/
23
-static abi_long do_prctl_get_vl(CPUArchState *env)
25
- ARMv7MState cpu;
24
+static abi_long do_prctl_sve_get_vl(CPUArchState *env)
26
+ ARMv7MState armv7m;
25
{
27
26
ARMCPU *cpu = env_archcpu(env);
28
NRF51UARTState uart;
27
if (cpu_isar_feature(aa64_sve, cpu)) {
29
NRF51RNGState rng;
28
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_get_vl(CPUArchState *env)
30
diff --git a/hw/arm/nrf51_soc.c b/hw/arm/nrf51_soc.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/hw/arm/nrf51_soc.c
33
+++ b/hw/arm/nrf51_soc.c
34
@@ -XXX,XX +XXX,XX @@ static void nrf51_soc_realize(DeviceState *dev_soc, Error **errp)
29
}
35
}
30
return -TARGET_EINVAL;
36
/* This clock doesn't need migration because it is fixed-frequency */
31
}
37
clock_set_hz(s->sysclk, HCLK_FRQ);
32
-#define do_prctl_get_vl do_prctl_get_vl
38
- qdev_connect_clock_in(DEVICE(&s->cpu), "cpuclk", s->sysclk);
33
+#define do_prctl_sve_get_vl do_prctl_sve_get_vl
39
+ qdev_connect_clock_in(DEVICE(&s->armv7m), "cpuclk", s->sysclk);
34
35
-static abi_long do_prctl_set_vl(CPUArchState *env, abi_long arg2)
36
+static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
37
{
38
/*
40
/*
39
* We cannot support either PR_SVE_SET_VL_ONEXEC or PR_SVE_VL_INHERIT.
41
* This SoC has no systick device, so don't connect refclk.
40
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_set_vl(CPUArchState *env, abi_long arg2)
42
* TODO: model the lack of systick (currently the armv7m object
43
* will always provide one).
44
*/
45
46
- object_property_set_link(OBJECT(&s->cpu), "memory", OBJECT(&s->container),
47
+ object_property_set_link(OBJECT(&s->armv7m), "memory", OBJECT(&s->container),
48
&error_abort);
49
- if (!sysbus_realize(SYS_BUS_DEVICE(&s->cpu), errp)) {
50
+ if (!sysbus_realize(SYS_BUS_DEVICE(&s->armv7m), errp)) {
51
return;
41
}
52
}
42
return -TARGET_EINVAL;
53
43
}
54
@@ -XXX,XX +XXX,XX @@ static void nrf51_soc_realize(DeviceState *dev_soc, Error **errp)
44
-#define do_prctl_set_vl do_prctl_set_vl
55
mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->uart), 0);
45
+#define do_prctl_sve_set_vl do_prctl_sve_set_vl
56
memory_region_add_subregion_overlap(&s->container, NRF51_UART_BASE, mr, 0);
46
57
sysbus_connect_irq(SYS_BUS_DEVICE(&s->uart), 0,
47
static abi_long do_prctl_reset_keys(CPUArchState *env, abi_long arg2)
58
- qdev_get_gpio_in(DEVICE(&s->cpu),
48
{
59
+ qdev_get_gpio_in(DEVICE(&s->armv7m),
49
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
60
BASE_TO_IRQ(NRF51_UART_BASE)));
50
index XXXXXXX..XXXXXXX 100644
61
51
--- a/linux-user/syscall.c
62
/* RNG */
52
+++ b/linux-user/syscall.c
63
@@ -XXX,XX +XXX,XX @@ static void nrf51_soc_realize(DeviceState *dev_soc, Error **errp)
53
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2)
64
mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->rng), 0);
54
#ifndef do_prctl_set_fp_mode
65
memory_region_add_subregion_overlap(&s->container, NRF51_RNG_BASE, mr, 0);
55
#define do_prctl_set_fp_mode do_prctl_inval1
66
sysbus_connect_irq(SYS_BUS_DEVICE(&s->rng), 0,
56
#endif
67
- qdev_get_gpio_in(DEVICE(&s->cpu),
57
-#ifndef do_prctl_get_vl
68
+ qdev_get_gpio_in(DEVICE(&s->armv7m),
58
-#define do_prctl_get_vl do_prctl_inval0
69
BASE_TO_IRQ(NRF51_RNG_BASE)));
59
+#ifndef do_prctl_sve_get_vl
70
60
+#define do_prctl_sve_get_vl do_prctl_inval0
71
/* UICR, FICR, NVMC, FLASH */
61
#endif
72
@@ -XXX,XX +XXX,XX @@ static void nrf51_soc_realize(DeviceState *dev_soc, Error **errp)
62
-#ifndef do_prctl_set_vl
73
63
-#define do_prctl_set_vl do_prctl_inval1
74
sysbus_mmio_map(SYS_BUS_DEVICE(&s->timer[i]), 0, base_addr);
64
+#ifndef do_prctl_sve_set_vl
75
sysbus_connect_irq(SYS_BUS_DEVICE(&s->timer[i]), 0,
65
+#define do_prctl_sve_set_vl do_prctl_inval1
76
- qdev_get_gpio_in(DEVICE(&s->cpu),
66
#endif
77
+ qdev_get_gpio_in(DEVICE(&s->armv7m),
67
#ifndef do_prctl_reset_keys
78
BASE_TO_IRQ(base_addr)));
68
#define do_prctl_reset_keys do_prctl_inval1
79
}
69
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
80
70
case PR_SET_FP_MODE:
81
@@ -XXX,XX +XXX,XX @@ static void nrf51_soc_init(Object *obj)
71
return do_prctl_set_fp_mode(env, arg2);
82
72
case PR_SVE_GET_VL:
83
memory_region_init(&s->container, obj, "nrf51-container", UINT64_MAX);
73
- return do_prctl_get_vl(env);
84
74
+ return do_prctl_sve_get_vl(env);
85
- object_initialize_child(OBJECT(s), "armv6m", &s->cpu, TYPE_ARMV7M);
75
case PR_SVE_SET_VL:
86
- qdev_prop_set_string(DEVICE(&s->cpu), "cpu-type",
76
- return do_prctl_set_vl(env, arg2);
87
+ object_initialize_child(OBJECT(s), "armv6m", &s->armv7m, TYPE_ARMV7M);
77
+ return do_prctl_sve_set_vl(env, arg2);
88
+ qdev_prop_set_string(DEVICE(&s->armv7m), "cpu-type",
78
case PR_PAC_RESET_KEYS:
89
ARM_CPU_TYPE_NAME("cortex-m0"));
79
if (arg3 || arg4 || arg5) {
90
- qdev_prop_set_uint32(DEVICE(&s->cpu), "num-irq", 32);
80
return -TARGET_EINVAL;
91
+ qdev_prop_set_uint32(DEVICE(&s->armv7m), "num-irq", 32);
92
93
object_initialize_child(obj, "uart", &s->uart, TYPE_NRF51_UART);
94
object_property_add_alias(obj, "serial0", OBJECT(&s->uart), "chardev");
81
--
95
--
82
2.25.1
96
2.34.1
97
98
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
These prctl set the Streaming SVE vector length, which may
3
While the TYPE_ARMV7M object forward its NVIC interrupt lines,
4
be completely different from the Normal SVE vector length.
4
it is somehow misleading to name it 'nvic'. Add the 'armv7m'
5
local variable for clarity, but also keep the 'nvic' variable
6
behaving like before when used for wiring IRQ lines.
5
7
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Message-id: 20220708151540.18136-43-richard.henderson@linaro.org
10
Message-id: 20250112225614.33723-3-philmd@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
12
---
11
linux-user/aarch64/target_prctl.h | 54 +++++++++++++++++++++++++++++++
13
hw/arm/stellaris.c | 21 +++++++++++----------
12
linux-user/syscall.c | 16 +++++++++
14
1 file changed, 11 insertions(+), 10 deletions(-)
13
2 files changed, 70 insertions(+)
14
15
15
diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h
16
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
16
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
17
--- a/linux-user/aarch64/target_prctl.h
18
--- a/hw/arm/stellaris.c
18
+++ b/linux-user/aarch64/target_prctl.h
19
+++ b/hw/arm/stellaris.c
19
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_get_vl(CPUArchState *env)
20
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
20
{
21
*/
21
ARMCPU *cpu = env_archcpu(env);
22
22
if (cpu_isar_feature(aa64_sve, cpu)) {
23
Object *soc_container;
23
+ /* PSTATE.SM is always unset on syscall entry. */
24
- DeviceState *gpio_dev[7], *nvic;
24
return sve_vq(env) * 16;
25
+ DeviceState *gpio_dev[7], *armv7m, *nvic;
25
}
26
qemu_irq gpio_in[7][8];
26
return -TARGET_EINVAL;
27
qemu_irq gpio_out[7][8];
27
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
28
qemu_irq adc;
28
&& arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) {
29
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
29
uint32_t vq, old_vq;
30
qdev_prop_set_uint32(ssys_dev, "dc4", board->dc4);
30
31
sysbus_realize_and_unref(SYS_BUS_DEVICE(ssys_dev), &error_fatal);
31
+ /* PSTATE.SM is always unset on syscall entry. */
32
32
old_vq = sve_vq(env);
33
- nvic = qdev_new(TYPE_ARMV7M);
33
34
- object_property_add_child(soc_container, "v7m", OBJECT(nvic));
34
/*
35
- qdev_prop_set_uint32(nvic, "num-irq", NUM_IRQ_LINES);
35
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
36
- qdev_prop_set_uint8(nvic, "num-prio-bits", NUM_PRIO_BITS);
36
}
37
- qdev_prop_set_string(nvic, "cpu-type", ms->cpu_type);
37
#define do_prctl_sve_set_vl do_prctl_sve_set_vl
38
- qdev_prop_set_bit(nvic, "enable-bitband", true);
38
39
- qdev_connect_clock_in(nvic, "cpuclk",
39
+static abi_long do_prctl_sme_get_vl(CPUArchState *env)
40
+ armv7m = qdev_new(TYPE_ARMV7M);
40
+{
41
+ object_property_add_child(soc_container, "v7m", OBJECT(armv7m));
41
+ ARMCPU *cpu = env_archcpu(env);
42
+ qdev_prop_set_uint32(armv7m, "num-irq", NUM_IRQ_LINES);
42
+ if (cpu_isar_feature(aa64_sme, cpu)) {
43
+ qdev_prop_set_uint8(armv7m, "num-prio-bits", NUM_PRIO_BITS);
43
+ return sme_vq(env) * 16;
44
+ qdev_prop_set_string(armv7m, "cpu-type", ms->cpu_type);
44
+ }
45
+ qdev_prop_set_bit(armv7m, "enable-bitband", true);
45
+ return -TARGET_EINVAL;
46
+ qdev_connect_clock_in(armv7m, "cpuclk",
46
+}
47
qdev_get_clock_out(ssys_dev, "SYSCLK"));
47
+#define do_prctl_sme_get_vl do_prctl_sme_get_vl
48
/* This SoC does not connect the systick reference clock */
48
+
49
- object_property_set_link(OBJECT(nvic), "memory",
49
+static abi_long do_prctl_sme_set_vl(CPUArchState *env, abi_long arg2)
50
+ object_property_set_link(OBJECT(armv7m), "memory",
50
+{
51
OBJECT(get_system_memory()), &error_abort);
51
+ /*
52
/* This will exit with an error if the user passed us a bad cpu_type */
52
+ * We cannot support either PR_SME_SET_VL_ONEXEC or PR_SME_VL_INHERIT.
53
- sysbus_realize_and_unref(SYS_BUS_DEVICE(nvic), &error_fatal);
53
+ * Note the kernel definition of sve_vl_valid allows for VQ=512,
54
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(armv7m), &error_fatal);
54
+ * i.e. VL=8192, even though the architectural maximum is VQ=16.
55
+ nvic = armv7m;
55
+ */
56
56
+ if (cpu_isar_feature(aa64_sme, env_archcpu(env))
57
/* Now we can wire up the IRQ and MMIO of the system registers */
57
+ && arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) {
58
sysbus_mmio_map(SYS_BUS_DEVICE(ssys_dev), 0, 0x400fe000);
58
+ int vq, old_vq;
59
+
60
+ old_vq = sme_vq(env);
61
+
62
+ /*
63
+ * Bound the value of vq, so that we know that it fits into
64
+ * the 4-bit field in SMCR_EL1. Because PSTATE.SM is cleared
65
+ * on syscall entry, we are not modifying the current SVE
66
+ * vector length.
67
+ */
68
+ vq = MAX(arg2 / 16, 1);
69
+ vq = MIN(vq, 16);
70
+ env->vfp.smcr_el[1] =
71
+ FIELD_DP64(env->vfp.smcr_el[1], SMCR, LEN, vq - 1);
72
+
73
+ /* Delay rebuilding hflags until we know if ZA must change. */
74
+ vq = sve_vqm1_for_el_sm(env, 0, true) + 1;
75
+
76
+ if (vq != old_vq) {
77
+ /*
78
+ * PSTATE.ZA state is cleared on any change to SVL.
79
+ * We need not call arm_rebuild_hflags because PSTATE.SM was
80
+ * cleared on syscall entry, so this hasn't changed VL.
81
+ */
82
+ env->svcr = FIELD_DP64(env->svcr, SVCR, ZA, 0);
83
+ arm_rebuild_hflags(env);
84
+ }
85
+ return vq * 16;
86
+ }
87
+ return -TARGET_EINVAL;
88
+}
89
+#define do_prctl_sme_set_vl do_prctl_sme_set_vl
90
+
91
static abi_long do_prctl_reset_keys(CPUArchState *env, abi_long arg2)
92
{
93
ARMCPU *cpu = env_archcpu(env);
94
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
95
index XXXXXXX..XXXXXXX 100644
96
--- a/linux-user/syscall.c
97
+++ b/linux-user/syscall.c
98
@@ -XXX,XX +XXX,XX @@ abi_long do_arch_prctl(CPUX86State *env, int code, abi_ulong addr)
99
#ifndef PR_SET_SYSCALL_USER_DISPATCH
100
# define PR_SET_SYSCALL_USER_DISPATCH 59
101
#endif
102
+#ifndef PR_SME_SET_VL
103
+# define PR_SME_SET_VL 63
104
+# define PR_SME_GET_VL 64
105
+# define PR_SME_VL_LEN_MASK 0xffff
106
+# define PR_SME_VL_INHERIT (1 << 17)
107
+#endif
108
109
#include "target_prctl.h"
110
111
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2)
112
#ifndef do_prctl_set_unalign
113
#define do_prctl_set_unalign do_prctl_inval1
114
#endif
115
+#ifndef do_prctl_sme_get_vl
116
+#define do_prctl_sme_get_vl do_prctl_inval0
117
+#endif
118
+#ifndef do_prctl_sme_set_vl
119
+#define do_prctl_sme_set_vl do_prctl_inval1
120
+#endif
121
122
static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
123
abi_long arg3, abi_long arg4, abi_long arg5)
124
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
125
return do_prctl_sve_get_vl(env);
126
case PR_SVE_SET_VL:
127
return do_prctl_sve_set_vl(env, arg2);
128
+ case PR_SME_GET_VL:
129
+ return do_prctl_sme_get_vl(env);
130
+ case PR_SME_SET_VL:
131
+ return do_prctl_sme_set_vl(env, arg2);
132
case PR_PAC_RESET_KEYS:
133
if (arg3 || arg4 || arg5) {
134
return -TARGET_EINVAL;
135
--
59
--
136
2.25.1
60
2.34.1
61
62
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
These SME instructions are nominally within the SVE decode space,
3
When instanciating the machine model, the machine_init()
4
so we add them to sve.decode and translate-sve.c.
4
implementations usually create the CPUs, so have access
5
to its first CPU. Use that rather then the &first_cpu
6
global.
5
7
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Message-id: 20220708151540.18136-18-richard.henderson@linaro.org
10
Reviewed-by: Samuel Tardieu <sam@rfc1149.net>
11
Message-id: 20250112225614.33723-4-philmd@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
13
---
11
target/arm/translate-a64.h | 12 ++++++++++++
14
hw/arm/b-l475e-iot01a.c | 2 +-
12
target/arm/sve.decode | 5 ++++-
15
hw/arm/microbit.c | 2 +-
13
target/arm/translate-sve.c | 38 ++++++++++++++++++++++++++++++++++++++
16
hw/arm/mps2-tz.c | 2 +-
14
3 files changed, 54 insertions(+), 1 deletion(-)
17
hw/arm/mps2.c | 2 +-
18
hw/arm/msf2-som.c | 2 +-
19
hw/arm/musca.c | 2 +-
20
hw/arm/netduino2.c | 2 +-
21
hw/arm/netduinoplus2.c | 2 +-
22
hw/arm/olimex-stm32-h405.c | 2 +-
23
hw/arm/stellaris.c | 2 +-
24
hw/arm/stm32vldiscovery.c | 2 +-
25
11 files changed, 11 insertions(+), 11 deletions(-)
15
26
16
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
27
diff --git a/hw/arm/b-l475e-iot01a.c b/hw/arm/b-l475e-iot01a.c
17
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/translate-a64.h
29
--- a/hw/arm/b-l475e-iot01a.c
19
+++ b/target/arm/translate-a64.h
30
+++ b/hw/arm/b-l475e-iot01a.c
20
@@ -XXX,XX +XXX,XX @@ static inline int vec_full_reg_size(DisasContext *s)
31
@@ -XXX,XX +XXX,XX @@ static void bl475e_init(MachineState *machine)
21
return s->vl;
32
sysbus_realize(SYS_BUS_DEVICE(&s->soc), &error_fatal);
33
34
sc = STM32L4X5_SOC_GET_CLASS(&s->soc);
35
- armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename, 0,
36
+ armv7m_load_kernel(s->soc.armv7m.cpu, machine->kernel_filename, 0,
37
sc->flash_size);
38
39
if (object_class_by_name(TYPE_DM163)) {
40
diff --git a/hw/arm/microbit.c b/hw/arm/microbit.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/hw/arm/microbit.c
43
+++ b/hw/arm/microbit.c
44
@@ -XXX,XX +XXX,XX @@ static void microbit_init(MachineState *machine)
45
memory_region_add_subregion_overlap(&s->nrf51.container, NRF51_TWI_BASE,
46
mr, -1);
47
48
- armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename,
49
+ armv7m_load_kernel(s->nrf51.armv7m.cpu, machine->kernel_filename,
50
0, s->nrf51.flash_size);
22
}
51
}
23
52
24
+/* Return the byte size of the vector register, SVL / 8. */
53
diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c
25
+static inline int streaming_vec_reg_size(DisasContext *s)
54
index XXXXXXX..XXXXXXX 100644
26
+{
55
--- a/hw/arm/mps2-tz.c
27
+ return s->svl;
56
+++ b/hw/arm/mps2-tz.c
28
+}
57
@@ -XXX,XX +XXX,XX @@ static void mps2tz_common_init(MachineState *machine)
29
+
58
mms->remap_irq);
30
/*
59
}
31
* Return the offset info CPUARMState of the predicate vector register Pn.
60
32
* Note for this purpose, FFR is P16.
61
- armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename,
33
@@ -XXX,XX +XXX,XX @@ static inline int pred_full_reg_size(DisasContext *s)
62
+ armv7m_load_kernel(mms->iotkit.armv7m[0].cpu, machine->kernel_filename,
34
return s->vl >> 3;
63
0, boot_ram_size(mms));
35
}
64
}
36
65
37
+/* Return the byte size of the predicate register, SVL / 64. */
66
diff --git a/hw/arm/mps2.c b/hw/arm/mps2.c
38
+static inline int streaming_pred_reg_size(DisasContext *s)
39
+{
40
+ return s->svl >> 3;
41
+}
42
+
43
/*
44
* Round up the size of a register to a size allowed by
45
* the tcg vector infrastructure. Any operation which uses this
46
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
47
index XXXXXXX..XXXXXXX 100644
67
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/sve.decode
68
--- a/hw/arm/mps2.c
49
+++ b/target/arm/sve.decode
69
+++ b/hw/arm/mps2.c
50
@@ -XXX,XX +XXX,XX @@ INDEX_ri 00000100 esz:2 1 imm:s5 010001 rn:5 rd:5
70
@@ -XXX,XX +XXX,XX @@ static void mps2_common_init(MachineState *machine)
51
# SVE index generation (register start, register increment)
71
qdev_get_gpio_in(armv7m,
52
INDEX_rr 00000100 .. 1 ..... 010011 ..... ..... @rd_rn_rm
72
mmc->fpga_type == FPGA_AN511 ? 47 : 13));
53
73
54
-### SVE Stack Allocation Group
74
- armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename,
55
+### SVE / Streaming SVE Stack Allocation Group
75
+ armv7m_load_kernel(mms->armv7m.cpu, machine->kernel_filename,
56
76
0, 0x400000);
57
# SVE stack frame adjustment
77
}
58
ADDVL 00000100 001 ..... 01010 ...... ..... @rd_rn_i6
78
59
+ADDSVL 00000100 001 ..... 01011 ...... ..... @rd_rn_i6
79
diff --git a/hw/arm/msf2-som.c b/hw/arm/msf2-som.c
60
ADDPL 00000100 011 ..... 01010 ...... ..... @rd_rn_i6
61
+ADDSPL 00000100 011 ..... 01011 ...... ..... @rd_rn_i6
62
63
# SVE stack frame size
64
RDVL 00000100 101 11111 01010 imm:s6 rd:5
65
+RDSVL 00000100 101 11111 01011 imm:s6 rd:5
66
67
### SVE Bitwise Shift - Unpredicated Group
68
69
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
70
index XXXXXXX..XXXXXXX 100644
80
index XXXXXXX..XXXXXXX 100644
71
--- a/target/arm/translate-sve.c
81
--- a/hw/arm/msf2-som.c
72
+++ b/target/arm/translate-sve.c
82
+++ b/hw/arm/msf2-som.c
73
@@ -XXX,XX +XXX,XX @@ static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
83
@@ -XXX,XX +XXX,XX @@ static void emcraft_sf2_s2s010_init(MachineState *machine)
74
return true;
84
cs_line = qdev_get_gpio_in_named(spi_flash, SSI_GPIO_CS, 0);
85
sysbus_connect_irq(SYS_BUS_DEVICE(&soc->spi[0]), 1, cs_line);
86
87
- armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename,
88
+ armv7m_load_kernel(soc->armv7m.cpu, machine->kernel_filename,
89
0, soc->envm_size);
75
}
90
}
76
91
77
+static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a)
92
diff --git a/hw/arm/musca.c b/hw/arm/musca.c
78
+{
93
index XXXXXXX..XXXXXXX 100644
79
+ if (!dc_isar_feature(aa64_sme, s)) {
94
--- a/hw/arm/musca.c
80
+ return false;
95
+++ b/hw/arm/musca.c
81
+ }
96
@@ -XXX,XX +XXX,XX @@ static void musca_init(MachineState *machine)
82
+ if (sme_enabled_check(s)) {
97
"cfg_sec_resp", 0));
83
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
98
}
84
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
99
85
+ tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s));
100
- armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename,
86
+ }
101
+ armv7m_load_kernel(mms->sse.armv7m[0].cpu, machine->kernel_filename,
87
+ return true;
102
0, 0x2000000);
88
+}
89
+
90
static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
91
{
92
if (!dc_isar_feature(aa64_sve, s)) {
93
@@ -XXX,XX +XXX,XX @@ static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
94
return true;
95
}
103
}
96
104
97
+static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a)
105
diff --git a/hw/arm/netduino2.c b/hw/arm/netduino2.c
98
+{
106
index XXXXXXX..XXXXXXX 100644
99
+ if (!dc_isar_feature(aa64_sme, s)) {
107
--- a/hw/arm/netduino2.c
100
+ return false;
108
+++ b/hw/arm/netduino2.c
101
+ }
109
@@ -XXX,XX +XXX,XX @@ static void netduino2_init(MachineState *machine)
102
+ if (sme_enabled_check(s)) {
110
qdev_connect_clock_in(dev, "sysclk", sysclk);
103
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
111
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
104
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
112
105
+ tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s));
113
- armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename,
106
+ }
114
+ armv7m_load_kernel(STM32F205_SOC(dev)->armv7m.cpu, machine->kernel_filename,
107
+ return true;
115
0, FLASH_SIZE);
108
+}
109
+
110
static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
111
{
112
if (!dc_isar_feature(aa64_sve, s)) {
113
@@ -XXX,XX +XXX,XX @@ static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
114
return true;
115
}
116
}
116
117
117
+static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a)
118
diff --git a/hw/arm/netduinoplus2.c b/hw/arm/netduinoplus2.c
118
+{
119
index XXXXXXX..XXXXXXX 100644
119
+ if (!dc_isar_feature(aa64_sme, s)) {
120
--- a/hw/arm/netduinoplus2.c
120
+ return false;
121
+++ b/hw/arm/netduinoplus2.c
121
+ }
122
@@ -XXX,XX +XXX,XX @@ static void netduinoplus2_init(MachineState *machine)
122
+ if (sme_enabled_check(s)) {
123
qdev_connect_clock_in(dev, "sysclk", sysclk);
123
+ TCGv_i64 reg = cpu_reg(s, a->rd);
124
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
124
+ tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s));
125
125
+ }
126
- armv7m_load_kernel(ARM_CPU(first_cpu),
126
+ return true;
127
+ armv7m_load_kernel(STM32F405_SOC(dev)->armv7m.cpu,
127
+}
128
machine->kernel_filename,
128
+
129
0, FLASH_SIZE);
129
/*
130
}
130
*** SVE Compute Vector Address Group
131
diff --git a/hw/arm/olimex-stm32-h405.c b/hw/arm/olimex-stm32-h405.c
131
*/
132
index XXXXXXX..XXXXXXX 100644
133
--- a/hw/arm/olimex-stm32-h405.c
134
+++ b/hw/arm/olimex-stm32-h405.c
135
@@ -XXX,XX +XXX,XX @@ static void olimex_stm32_h405_init(MachineState *machine)
136
qdev_connect_clock_in(dev, "sysclk", sysclk);
137
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
138
139
- armv7m_load_kernel(ARM_CPU(first_cpu),
140
+ armv7m_load_kernel(STM32F405_SOC(dev)->armv7m.cpu,
141
machine->kernel_filename,
142
0, FLASH_SIZE);
143
}
144
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
145
index XXXXXXX..XXXXXXX 100644
146
--- a/hw/arm/stellaris.c
147
+++ b/hw/arm/stellaris.c
148
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
149
create_unimplemented_device("hibernation", 0x400fc000, 0x1000);
150
create_unimplemented_device("flash-control", 0x400fd000, 0x1000);
151
152
- armv7m_load_kernel(ARM_CPU(first_cpu), ms->kernel_filename, 0, flash_size);
153
+ armv7m_load_kernel(ARMV7M(armv7m)->cpu, ms->kernel_filename, 0, flash_size);
154
}
155
156
/* FIXME: Figure out how to generate these from stellaris_boards. */
157
diff --git a/hw/arm/stm32vldiscovery.c b/hw/arm/stm32vldiscovery.c
158
index XXXXXXX..XXXXXXX 100644
159
--- a/hw/arm/stm32vldiscovery.c
160
+++ b/hw/arm/stm32vldiscovery.c
161
@@ -XXX,XX +XXX,XX @@ static void stm32vldiscovery_init(MachineState *machine)
162
qdev_connect_clock_in(dev, "sysclk", sysclk);
163
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
164
165
- armv7m_load_kernel(ARM_CPU(first_cpu),
166
+ armv7m_load_kernel(STM32F100_SOC(dev)->armv7m.cpu,
167
machine->kernel_filename,
168
0, FLASH_SIZE);
169
}
132
--
170
--
133
2.25.1
171
2.34.1
172
173
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Bernhard Beschow <shentey@gmail.com>
2
2
3
The value of the UCFR register is respected when echoing characters to the
4
terminal, but its reset value is reserved. Fix the reset value to the one
5
documented in the datasheet.
6
7
While at it move the related attribute out of the section of unimplemented
8
registers since its value is actually respected.
9
10
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-39-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
13
---
8
linux-user/aarch64/signal.c | 3 +++
14
include/hw/char/imx_serial.h | 2 +-
9
1 file changed, 3 insertions(+)
15
hw/char/imx_serial.c | 1 +
16
2 files changed, 2 insertions(+), 1 deletion(-)
10
17
11
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
18
diff --git a/include/hw/char/imx_serial.h b/include/hw/char/imx_serial.h
12
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
13
--- a/linux-user/aarch64/signal.c
20
--- a/include/hw/char/imx_serial.h
14
+++ b/linux-user/aarch64/signal.c
21
+++ b/include/hw/char/imx_serial.h
15
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
22
@@ -XXX,XX +XXX,XX @@ struct IMXSerialState {
16
__get_user(extra_size,
23
uint32_t ucr1;
17
&((struct target_extra_context *)ctx)->size);
24
uint32_t ucr2;
18
extra = lock_user(VERIFY_READ, extra_datap, extra_size, 0);
25
uint32_t uts1;
19
+ if (!extra) {
26
+ uint32_t ufcr;
20
+ return 1;
27
21
+ }
28
/*
22
break;
29
* The registers below are implemented just so that the
23
30
* guest OS sees what it has written
24
default:
31
*/
32
uint32_t onems;
33
- uint32_t ufcr;
34
uint32_t ubmr;
35
uint32_t ubrc;
36
uint32_t ucr3;
37
diff --git a/hw/char/imx_serial.c b/hw/char/imx_serial.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/hw/char/imx_serial.c
40
+++ b/hw/char/imx_serial.c
41
@@ -XXX,XX +XXX,XX @@ static void imx_serial_reset(IMXSerialState *s)
42
s->ucr3 = 0x700;
43
s->ubmr = 0;
44
s->ubrc = 4;
45
+ s->ufcr = BIT(11) | BIT(0);
46
47
fifo32_reset(&s->rx_fifo);
48
timer_del(&s->ageing_timer);
25
--
49
--
26
2.25.1
50
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Bernhard Beschow <shentey@gmail.com>
2
2
3
Fold the return value setting into the goto, so each
3
Fixes characters to be "echoed" after each keystroke rather than after every
4
point of failure need not do both.
4
other since imx_serial_rx_fifo_ageing_timer_restart() would see ~UTS1_RXEMPTY
5
only after every other keystroke.
5
6
7
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-37-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
linux-user/aarch64/signal.c | 26 +++++++++++---------------
11
hw/char/imx_serial.c | 6 +++---
12
1 file changed, 11 insertions(+), 15 deletions(-)
12
1 file changed, 3 insertions(+), 3 deletions(-)
13
13
14
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
14
diff --git a/hw/char/imx_serial.c b/hw/char/imx_serial.c
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/linux-user/aarch64/signal.c
16
--- a/hw/char/imx_serial.c
17
+++ b/linux-user/aarch64/signal.c
17
+++ b/hw/char/imx_serial.c
18
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
18
@@ -XXX,XX +XXX,XX @@ static void imx_put_data(void *opaque, uint32_t value)
19
struct target_sve_context *sve = NULL;
19
if (fifo32_num_used(&s->rx_fifo) >= rxtl) {
20
uint64_t extra_datap = 0;
20
s->usr1 |= USR1_RRDY;
21
bool used_extra = false;
22
- bool err = false;
23
int vq = 0, sve_size = 0;
24
25
target_restore_general_frame(env, sf);
26
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
27
switch (magic) {
28
case 0:
29
if (size != 0) {
30
- err = true;
31
- goto exit;
32
+ goto err;
33
}
34
if (used_extra) {
35
ctx = NULL;
36
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
37
38
case TARGET_FPSIMD_MAGIC:
39
if (fpsimd || size != sizeof(struct target_fpsimd_context)) {
40
- err = true;
41
- goto exit;
42
+ goto err;
43
}
44
fpsimd = (struct target_fpsimd_context *)ctx;
45
break;
46
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
47
break;
48
}
49
}
50
- err = true;
51
- goto exit;
52
+ goto err;
53
54
case TARGET_EXTRA_MAGIC:
55
if (extra || size != sizeof(struct target_extra_context)) {
56
- err = true;
57
- goto exit;
58
+ goto err;
59
}
60
__get_user(extra_datap,
61
&((struct target_extra_context *)ctx)->datap);
62
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
63
/* Unknown record -- we certainly didn't generate it.
64
* Did we in fact get out of sync?
65
*/
66
- err = true;
67
- goto exit;
68
+ goto err;
69
}
70
ctx = (void *)ctx + size;
71
}
72
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
73
if (fpsimd) {
74
target_restore_fpsimd_record(env, fpsimd);
75
} else {
76
- err = true;
77
+ goto err;
78
}
79
80
/* SVE data, if present, overwrites FPSIMD data. */
81
if (sve) {
82
target_restore_sve_record(env, sve, vq);
83
}
21
}
84
-
22
-
85
- exit:
23
- imx_serial_rx_fifo_ageing_timer_restart(s);
86
unlock_user(extra, extra_datap, 0);
24
-
87
- return err;
25
s->usr2 |= USR2_RDR;
88
+ return 0;
26
s->uts1 &= ~UTS1_RXEMPTY;
27
if (value & URXD_BRK) {
28
s->usr2 |= USR2_BRCD;
29
}
89
+
30
+
90
+ err:
31
+ imx_serial_rx_fifo_ageing_timer_restart(s);
91
+ unlock_user(extra, extra_datap, 0);
32
+
92
+ return 1;
33
imx_update(s);
93
}
34
}
94
35
95
static abi_ulong get_sigframe(struct target_sigaction *ka,
96
--
36
--
97
2.25.1
37
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Bernhard Beschow <shentey@gmail.com>
2
2
3
Add a TCGv_ptr base argument, which will be cpu_env for SVE.
3
Fixes INTD and MSI interrupts poking the same IRQ line without keeping track of
4
We will reuse this for SME save and restore array insns.
4
each other's IRQ level. Furthermore, SoCs such as the i.MX 8M Plus don't share
5
the MSI IRQ with the INTx lines, so expose it as a dedicated pin.
5
6
7
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-22-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
target/arm/translate-a64.h | 3 +++
11
include/hw/arm/fsl-imx6.h | 4 +++-
12
target/arm/translate-sve.c | 48 ++++++++++++++++++++++++++++----------
12
include/hw/arm/fsl-imx7.h | 4 +++-
13
2 files changed, 39 insertions(+), 12 deletions(-)
13
include/hw/pci-host/designware.h | 1 +
14
hw/arm/fsl-imx6.c | 13 ++++++++++++-
15
hw/arm/fsl-imx7.c | 13 ++++++++++++-
16
hw/pci-host/designware.c | 7 +++----
17
hw/arm/Kconfig | 2 ++
18
7 files changed, 36 insertions(+), 8 deletions(-)
14
19
15
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
20
diff --git a/include/hw/arm/fsl-imx6.h b/include/hw/arm/fsl-imx6.h
16
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/translate-a64.h
22
--- a/include/hw/arm/fsl-imx6.h
18
+++ b/target/arm/translate-a64.h
23
+++ b/include/hw/arm/fsl-imx6.h
19
@@ -XXX,XX +XXX,XX @@ void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
24
@@ -XXX,XX +XXX,XX @@
20
uint32_t rm_ofs, int64_t shift,
25
#include "hw/usb/chipidea.h"
21
uint32_t opr_sz, uint32_t max_sz);
26
#include "hw/usb/imx-usb-phy.h"
22
27
#include "hw/pci-host/designware.h"
23
+void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
28
+#include "hw/or-irq.h"
24
+void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
29
#include "exec/memory.h"
25
+
30
#include "cpu.h"
26
#endif /* TARGET_ARM_TRANSLATE_A64_H */
31
#include "qom/object.h"
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
32
@@ -XXX,XX +XXX,XX @@ struct FslIMX6State {
28
index XXXXXXX..XXXXXXX 100644
33
ChipideaState usb[FSL_IMX6_NUM_USBS];
29
--- a/target/arm/translate-sve.c
34
IMXFECState eth;
30
+++ b/target/arm/translate-sve.c
35
DesignwarePCIEHost pcie;
31
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
36
+ OrIRQState pcie4_msi_irq;
32
* The load should begin at the address Rn + IMM.
37
MemoryRegion rom;
33
*/
38
MemoryRegion caam;
34
39
MemoryRegion ocram;
35
-static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
40
@@ -XXX,XX +XXX,XX @@ struct FslIMX6State {
36
+void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
41
#define FSL_IMX6_PCIE1_IRQ 120
37
+ int len, int rn, int imm)
42
#define FSL_IMX6_PCIE2_IRQ 121
43
#define FSL_IMX6_PCIE3_IRQ 122
44
-#define FSL_IMX6_PCIE4_IRQ 123
45
+#define FSL_IMX6_PCIE4_MSI_IRQ 123
46
#define FSL_IMX6_DCIC1_IRQ 124
47
#define FSL_IMX6_DCIC2_IRQ 125
48
#define FSL_IMX6_MLB150_HIGH_IRQ 126
49
diff --git a/include/hw/arm/fsl-imx7.h b/include/hw/arm/fsl-imx7.h
50
index XXXXXXX..XXXXXXX 100644
51
--- a/include/hw/arm/fsl-imx7.h
52
+++ b/include/hw/arm/fsl-imx7.h
53
@@ -XXX,XX +XXX,XX @@
54
#include "hw/net/imx_fec.h"
55
#include "hw/pci-host/designware.h"
56
#include "hw/usb/chipidea.h"
57
+#include "hw/or-irq.h"
58
#include "cpu.h"
59
#include "qom/object.h"
60
#include "qemu/units.h"
61
@@ -XXX,XX +XXX,XX @@ struct FslIMX7State {
62
IMX7GPRState gpr;
63
ChipideaState usb[FSL_IMX7_NUM_USBS];
64
DesignwarePCIEHost pcie;
65
+ OrIRQState pcie4_msi_irq;
66
MemoryRegion rom;
67
MemoryRegion caam;
68
MemoryRegion ocram;
69
@@ -XXX,XX +XXX,XX @@ enum FslIMX7IRQs {
70
FSL_IMX7_PCI_INTA_IRQ = 125,
71
FSL_IMX7_PCI_INTB_IRQ = 124,
72
FSL_IMX7_PCI_INTC_IRQ = 123,
73
- FSL_IMX7_PCI_INTD_IRQ = 122,
74
+ FSL_IMX7_PCI_INTD_MSI_IRQ = 122,
75
76
FSL_IMX7_UART7_IRQ = 126,
77
78
diff --git a/include/hw/pci-host/designware.h b/include/hw/pci-host/designware.h
79
index XXXXXXX..XXXXXXX 100644
80
--- a/include/hw/pci-host/designware.h
81
+++ b/include/hw/pci-host/designware.h
82
@@ -XXX,XX +XXX,XX @@ struct DesignwarePCIEHost {
83
MemoryRegion io;
84
85
qemu_irq irqs[4];
86
+ qemu_irq msi;
87
} pci;
88
89
MemoryRegion mmio;
90
diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/hw/arm/fsl-imx6.c
93
+++ b/hw/arm/fsl-imx6.c
94
@@ -XXX,XX +XXX,XX @@ static void fsl_imx6_init(Object *obj)
95
object_initialize_child(obj, "eth", &s->eth, TYPE_IMX_ENET);
96
97
object_initialize_child(obj, "pcie", &s->pcie, TYPE_DESIGNWARE_PCIE_HOST);
98
+ object_initialize_child(obj, "pcie4-msi-irq", &s->pcie4_msi_irq,
99
+ TYPE_OR_IRQ);
100
}
101
102
static void fsl_imx6_realize(DeviceState *dev, Error **errp)
103
@@ -XXX,XX +XXX,XX @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp)
104
sysbus_realize(SYS_BUS_DEVICE(&s->pcie), &error_abort);
105
sysbus_mmio_map(SYS_BUS_DEVICE(&s->pcie), 0, FSL_IMX6_PCIe_REG_ADDR);
106
107
+ object_property_set_int(OBJECT(&s->pcie4_msi_irq), "num-lines", 2,
108
+ &error_abort);
109
+ qdev_realize(DEVICE(&s->pcie4_msi_irq), NULL, &error_abort);
110
+
111
+ irq = qdev_get_gpio_in(DEVICE(&s->a9mpcore), FSL_IMX6_PCIE4_MSI_IRQ);
112
+ qdev_connect_gpio_out(DEVICE(&s->pcie4_msi_irq), 0, irq);
113
+
114
irq = qdev_get_gpio_in(DEVICE(&s->a9mpcore), FSL_IMX6_PCIE1_IRQ);
115
sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 0, irq);
116
irq = qdev_get_gpio_in(DEVICE(&s->a9mpcore), FSL_IMX6_PCIE2_IRQ);
117
sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 1, irq);
118
irq = qdev_get_gpio_in(DEVICE(&s->a9mpcore), FSL_IMX6_PCIE3_IRQ);
119
sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 2, irq);
120
- irq = qdev_get_gpio_in(DEVICE(&s->a9mpcore), FSL_IMX6_PCIE4_IRQ);
121
+ irq = qdev_get_gpio_in(DEVICE(&s->pcie4_msi_irq), 0);
122
sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 3, irq);
123
+ irq = qdev_get_gpio_in(DEVICE(&s->pcie4_msi_irq), 1);
124
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 4, irq);
125
126
/*
127
* PCIe PHY
128
diff --git a/hw/arm/fsl-imx7.c b/hw/arm/fsl-imx7.c
129
index XXXXXXX..XXXXXXX 100644
130
--- a/hw/arm/fsl-imx7.c
131
+++ b/hw/arm/fsl-imx7.c
132
@@ -XXX,XX +XXX,XX @@ static void fsl_imx7_init(Object *obj)
133
* PCIE
134
*/
135
object_initialize_child(obj, "pcie", &s->pcie, TYPE_DESIGNWARE_PCIE_HOST);
136
+ object_initialize_child(obj, "pcie4-msi-irq", &s->pcie4_msi_irq,
137
+ TYPE_OR_IRQ);
138
139
/*
140
* USBs
141
@@ -XXX,XX +XXX,XX @@ static void fsl_imx7_realize(DeviceState *dev, Error **errp)
142
sysbus_realize(SYS_BUS_DEVICE(&s->pcie), &error_abort);
143
sysbus_mmio_map(SYS_BUS_DEVICE(&s->pcie), 0, FSL_IMX7_PCIE_REG_ADDR);
144
145
+ object_property_set_int(OBJECT(&s->pcie4_msi_irq), "num-lines", 2,
146
+ &error_abort);
147
+ qdev_realize(DEVICE(&s->pcie4_msi_irq), NULL, &error_abort);
148
+
149
+ irq = qdev_get_gpio_in(DEVICE(&s->a7mpcore), FSL_IMX7_PCI_INTD_MSI_IRQ);
150
+ qdev_connect_gpio_out(DEVICE(&s->pcie4_msi_irq), 0, irq);
151
+
152
irq = qdev_get_gpio_in(DEVICE(&s->a7mpcore), FSL_IMX7_PCI_INTA_IRQ);
153
sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 0, irq);
154
irq = qdev_get_gpio_in(DEVICE(&s->a7mpcore), FSL_IMX7_PCI_INTB_IRQ);
155
sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 1, irq);
156
irq = qdev_get_gpio_in(DEVICE(&s->a7mpcore), FSL_IMX7_PCI_INTC_IRQ);
157
sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 2, irq);
158
- irq = qdev_get_gpio_in(DEVICE(&s->a7mpcore), FSL_IMX7_PCI_INTD_IRQ);
159
+ irq = qdev_get_gpio_in(DEVICE(&s->pcie4_msi_irq), 0);
160
sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 3, irq);
161
+ irq = qdev_get_gpio_in(DEVICE(&s->pcie4_msi_irq), 1);
162
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 4, irq);
163
164
/*
165
* USBs
166
diff --git a/hw/pci-host/designware.c b/hw/pci-host/designware.c
167
index XXXXXXX..XXXXXXX 100644
168
--- a/hw/pci-host/designware.c
169
+++ b/hw/pci-host/designware.c
170
@@ -XXX,XX +XXX,XX @@
171
#define DESIGNWARE_PCIE_ATU_DEVFN(x) (((x) >> 16) & 0xff)
172
#define DESIGNWARE_PCIE_ATU_UPPER_TARGET 0x91C
173
174
-#define DESIGNWARE_PCIE_IRQ_MSI 3
175
-
176
static DesignwarePCIEHost *
177
designware_pcie_root_to_host(DesignwarePCIERoot *root)
38
{
178
{
39
int len_align = QEMU_ALIGN_DOWN(len, 8);
179
@@ -XXX,XX +XXX,XX @@ static void designware_pcie_root_msi_write(void *opaque, hwaddr addr,
40
int len_remain = len % 8;
180
root->msi.intr[0].status |= BIT(val) & root->msi.intr[0].enable;
41
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
181
42
t0 = tcg_temp_new_i64();
182
if (root->msi.intr[0].status & ~root->msi.intr[0].mask) {
43
for (i = 0; i < len_align; i += 8) {
183
- qemu_set_irq(host->pci.irqs[DESIGNWARE_PCIE_IRQ_MSI], 1);
44
tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
184
+ qemu_set_irq(host->pci.msi, 1);
45
- tcg_gen_st_i64(t0, cpu_env, vofs + i);
46
+ tcg_gen_st_i64(t0, base, vofs + i);
47
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
48
}
49
tcg_temp_free_i64(t0);
50
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
51
clean_addr = new_tmp_a64_local(s);
52
tcg_gen_mov_i64(clean_addr, t0);
53
54
+ if (base != cpu_env) {
55
+ TCGv_ptr b = tcg_temp_local_new_ptr();
56
+ tcg_gen_mov_ptr(b, base);
57
+ base = b;
58
+ }
59
+
60
gen_set_label(loop);
61
62
t0 = tcg_temp_new_i64();
63
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
64
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
65
66
tp = tcg_temp_new_ptr();
67
- tcg_gen_add_ptr(tp, cpu_env, i);
68
+ tcg_gen_add_ptr(tp, base, i);
69
tcg_gen_addi_ptr(i, i, 8);
70
tcg_gen_st_i64(t0, tp, vofs);
71
tcg_temp_free_ptr(tp);
72
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
73
74
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
75
tcg_temp_free_ptr(i);
76
+
77
+ if (base != cpu_env) {
78
+ tcg_temp_free_ptr(base);
79
+ assert(len_remain == 0);
80
+ }
81
}
82
83
/*
84
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
85
default:
86
g_assert_not_reached();
87
}
88
- tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
89
+ tcg_gen_st_i64(t0, base, vofs + len_align);
90
tcg_temp_free_i64(t0);
91
}
185
}
92
}
186
}
93
187
94
/* Similarly for stores. */
188
@@ -XXX,XX +XXX,XX @@ static void designware_pcie_root_config_write(PCIDevice *d, uint32_t address,
95
-static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
189
case DESIGNWARE_PCIE_MSI_INTR0_STATUS:
96
+void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
190
root->msi.intr[0].status ^= val;
97
+ int len, int rn, int imm)
191
if (!root->msi.intr[0].status) {
98
{
192
- qemu_set_irq(host->pci.irqs[DESIGNWARE_PCIE_IRQ_MSI], 0);
99
int len_align = QEMU_ALIGN_DOWN(len, 8);
193
+ qemu_set_irq(host->pci.msi, 0);
100
int len_remain = len % 8;
101
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
102
103
t0 = tcg_temp_new_i64();
104
for (i = 0; i < len_align; i += 8) {
105
- tcg_gen_ld_i64(t0, cpu_env, vofs + i);
106
+ tcg_gen_ld_i64(t0, base, vofs + i);
107
tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
108
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
109
}
194
}
110
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
195
break;
111
clean_addr = new_tmp_a64_local(s);
196
112
tcg_gen_mov_i64(clean_addr, t0);
197
@@ -XXX,XX +XXX,XX @@ static void designware_pcie_host_realize(DeviceState *dev, Error **errp)
113
198
for (i = 0; i < ARRAY_SIZE(s->pci.irqs); i++) {
114
+ if (base != cpu_env) {
199
sysbus_init_irq(sbd, &s->pci.irqs[i]);
115
+ TCGv_ptr b = tcg_temp_local_new_ptr();
116
+ tcg_gen_mov_ptr(b, base);
117
+ base = b;
118
+ }
119
+
120
gen_set_label(loop);
121
122
t0 = tcg_temp_new_i64();
123
tp = tcg_temp_new_ptr();
124
- tcg_gen_add_ptr(tp, cpu_env, i);
125
+ tcg_gen_add_ptr(tp, base, i);
126
tcg_gen_ld_i64(t0, tp, vofs);
127
tcg_gen_addi_ptr(i, i, 8);
128
tcg_temp_free_ptr(tp);
129
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
130
131
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
132
tcg_temp_free_ptr(i);
133
+
134
+ if (base != cpu_env) {
135
+ tcg_temp_free_ptr(base);
136
+ assert(len_remain == 0);
137
+ }
138
}
200
}
139
201
+ sysbus_init_irq(sbd, &s->pci.msi);
140
/* Predicate register stores can be any multiple of 2. */
202
141
if (len_remain) {
203
memory_region_init_io(&s->mmio,
142
t0 = tcg_temp_new_i64();
204
OBJECT(s),
143
- tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
205
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
144
+ tcg_gen_ld_i64(t0, base, vofs + len_align);
206
index XXXXXXX..XXXXXXX 100644
145
207
--- a/hw/arm/Kconfig
146
switch (len_remain) {
208
+++ b/hw/arm/Kconfig
147
case 2:
209
@@ -XXX,XX +XXX,XX @@ config FSL_IMX6
148
@@ -XXX,XX +XXX,XX @@ static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
210
select PL310 # cache controller
149
if (sve_access_check(s)) {
211
select PCI_EXPRESS_DESIGNWARE
150
int size = vec_full_reg_size(s);
212
select SDHCI
151
int off = vec_full_reg_offset(s, a->rd);
213
+ select OR_IRQ
152
- do_ldr(s, off, size, a->rn, a->imm * size);
214
153
+ gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size);
215
config ASPEED_SOC
154
}
216
bool
155
return true;
217
@@ -XXX,XX +XXX,XX @@ config FSL_IMX7
156
}
218
select WDT_IMX2
157
@@ -XXX,XX +XXX,XX @@ static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
219
select PCI_EXPRESS_DESIGNWARE
158
if (sve_access_check(s)) {
220
select SDHCI
159
int size = pred_full_reg_size(s);
221
+ select OR_IRQ
160
int off = pred_full_reg_offset(s, a->rd);
222
select UNIMP
161
- do_ldr(s, off, size, a->rn, a->imm * size);
223
162
+ gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size);
224
config ARM_SMMUV3
163
}
164
return true;
165
}
166
@@ -XXX,XX +XXX,XX @@ static bool trans_STR_zri(DisasContext *s, arg_rri *a)
167
if (sve_access_check(s)) {
168
int size = vec_full_reg_size(s);
169
int off = vec_full_reg_offset(s, a->rd);
170
- do_str(s, off, size, a->rn, a->imm * size);
171
+ gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size);
172
}
173
return true;
174
}
175
@@ -XXX,XX +XXX,XX @@ static bool trans_STR_pri(DisasContext *s, arg_rri *a)
176
if (sve_access_check(s)) {
177
int size = pred_full_reg_size(s);
178
int off = pred_full_reg_offset(s, a->rd);
179
- do_str(s, off, size, a->rn, a->imm * size);
180
+ gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size);
181
}
182
return true;
183
}
184
--
225
--
185
2.25.1
226
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
We cannot reuse the SVE functions for LD[1-4] and ST[1-4],
3
Board schematic is useful to corroborate GPIOs/IRQs wiring.
4
because those functions accept only a Zreg register number.
5
For SME, we want to pass a pointer into ZA storage.
6
4
5
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20250110160204.74997-2-philmd@linaro.org
9
Message-id: 20220708151540.18136-21-richard.henderson@linaro.org
8
[PMM: Use https:// URLs]
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
10
---
12
target/arm/helper-sme.h | 82 +++++
11
hw/arm/stellaris.c | 8 ++++++++
13
target/arm/sme.decode | 9 +
12
1 file changed, 8 insertions(+)
14
target/arm/sme_helper.c | 595 +++++++++++++++++++++++++++++++++++++
15
target/arm/translate-sme.c | 70 +++++
16
4 files changed, 756 insertions(+)
17
13
18
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
14
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
19
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/helper-sme.h
16
--- a/hw/arm/stellaris.c
21
+++ b/target/arm/helper-sme.h
17
+++ b/hw/arm/stellaris.c
22
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
18
@@ -XXX,XX +XXX,XX @@ static void lm3s6965evb_init(MachineState *machine)
23
DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
19
stellaris_init(machine, &stellaris_boards[1]);
24
DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
25
DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
26
+
27
+DEF_HELPER_FLAGS_5(sme_ld1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
28
+DEF_HELPER_FLAGS_5(sme_ld1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
29
+DEF_HELPER_FLAGS_5(sme_ld1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
30
+DEF_HELPER_FLAGS_5(sme_ld1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
31
+
32
+DEF_HELPER_FLAGS_5(sme_ld1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
33
+DEF_HELPER_FLAGS_5(sme_ld1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
34
+DEF_HELPER_FLAGS_5(sme_ld1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
35
+DEF_HELPER_FLAGS_5(sme_ld1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
36
+DEF_HELPER_FLAGS_5(sme_ld1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
37
+DEF_HELPER_FLAGS_5(sme_ld1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
38
+DEF_HELPER_FLAGS_5(sme_ld1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
39
+DEF_HELPER_FLAGS_5(sme_ld1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
40
+
41
+DEF_HELPER_FLAGS_5(sme_ld1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
42
+DEF_HELPER_FLAGS_5(sme_ld1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
43
+DEF_HELPER_FLAGS_5(sme_ld1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
44
+DEF_HELPER_FLAGS_5(sme_ld1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
45
+DEF_HELPER_FLAGS_5(sme_ld1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
46
+DEF_HELPER_FLAGS_5(sme_ld1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
47
+DEF_HELPER_FLAGS_5(sme_ld1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
48
+DEF_HELPER_FLAGS_5(sme_ld1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
49
+
50
+DEF_HELPER_FLAGS_5(sme_ld1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
51
+DEF_HELPER_FLAGS_5(sme_ld1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
52
+DEF_HELPER_FLAGS_5(sme_ld1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
53
+DEF_HELPER_FLAGS_5(sme_ld1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
54
+DEF_HELPER_FLAGS_5(sme_ld1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
55
+DEF_HELPER_FLAGS_5(sme_ld1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
56
+DEF_HELPER_FLAGS_5(sme_ld1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
57
+DEF_HELPER_FLAGS_5(sme_ld1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
58
+
59
+DEF_HELPER_FLAGS_5(sme_ld1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
60
+DEF_HELPER_FLAGS_5(sme_ld1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
61
+DEF_HELPER_FLAGS_5(sme_ld1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
62
+DEF_HELPER_FLAGS_5(sme_ld1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
63
+DEF_HELPER_FLAGS_5(sme_ld1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
64
+DEF_HELPER_FLAGS_5(sme_ld1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
65
+DEF_HELPER_FLAGS_5(sme_ld1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
66
+DEF_HELPER_FLAGS_5(sme_ld1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
67
+
68
+DEF_HELPER_FLAGS_5(sme_st1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
69
+DEF_HELPER_FLAGS_5(sme_st1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
70
+DEF_HELPER_FLAGS_5(sme_st1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
71
+DEF_HELPER_FLAGS_5(sme_st1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
72
+
73
+DEF_HELPER_FLAGS_5(sme_st1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
74
+DEF_HELPER_FLAGS_5(sme_st1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
75
+DEF_HELPER_FLAGS_5(sme_st1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
76
+DEF_HELPER_FLAGS_5(sme_st1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
77
+DEF_HELPER_FLAGS_5(sme_st1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
78
+DEF_HELPER_FLAGS_5(sme_st1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
79
+DEF_HELPER_FLAGS_5(sme_st1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
80
+DEF_HELPER_FLAGS_5(sme_st1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
81
+
82
+DEF_HELPER_FLAGS_5(sme_st1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
83
+DEF_HELPER_FLAGS_5(sme_st1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
84
+DEF_HELPER_FLAGS_5(sme_st1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
85
+DEF_HELPER_FLAGS_5(sme_st1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
86
+DEF_HELPER_FLAGS_5(sme_st1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
87
+DEF_HELPER_FLAGS_5(sme_st1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
88
+DEF_HELPER_FLAGS_5(sme_st1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
89
+DEF_HELPER_FLAGS_5(sme_st1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
90
+
91
+DEF_HELPER_FLAGS_5(sme_st1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
92
+DEF_HELPER_FLAGS_5(sme_st1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
93
+DEF_HELPER_FLAGS_5(sme_st1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
94
+DEF_HELPER_FLAGS_5(sme_st1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
95
+DEF_HELPER_FLAGS_5(sme_st1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
96
+DEF_HELPER_FLAGS_5(sme_st1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
97
+DEF_HELPER_FLAGS_5(sme_st1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
98
+DEF_HELPER_FLAGS_5(sme_st1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
99
+
100
+DEF_HELPER_FLAGS_5(sme_st1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
101
+DEF_HELPER_FLAGS_5(sme_st1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
102
+DEF_HELPER_FLAGS_5(sme_st1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
103
+DEF_HELPER_FLAGS_5(sme_st1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
104
+DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
105
+DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
106
+DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
107
+DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
108
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
109
index XXXXXXX..XXXXXXX 100644
110
--- a/target/arm/sme.decode
111
+++ b/target/arm/sme.decode
112
@@ -XXX,XX +XXX,XX @@ MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \
113
&mova to_vec=1 rs=%mova_rs
114
MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \
115
&mova to_vec=1 rs=%mova_rs esz=4
116
+
117
+### SME Memory
118
+
119
+&ldst esz rs pg rn rm za_imm v:bool st:bool
120
+
121
+LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
122
+ &ldst rs=%mova_rs
123
+LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
124
+ &ldst esz=4 rs=%mova_rs
125
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
126
index XXXXXXX..XXXXXXX 100644
127
--- a/target/arm/sme_helper.c
128
+++ b/target/arm/sme_helper.c
129
@@ -XXX,XX +XXX,XX @@
130
131
#include "qemu/osdep.h"
132
#include "cpu.h"
133
+#include "internals.h"
134
#include "tcg/tcg-gvec-desc.h"
135
#include "exec/helper-proto.h"
136
+#include "exec/cpu_ldst.h"
137
+#include "exec/exec-all.h"
138
#include "qemu/int128.h"
139
#include "vec_internal.h"
140
+#include "sve_ldst_internal.h"
141
142
/* ResetSVEState */
143
void arm_reset_sve_state(CPUARMState *env)
144
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc)
145
}
20
}
146
21
147
#undef DO_MOVA_Z
148
+
149
+/*
22
+/*
150
+ * Clear elements in a tile slice comprising len bytes.
23
+ * Stellaris LM3S811 Evaluation Board Schematics:
24
+ * https://www.ti.com/lit/ug/symlink/spmu030.pdf
151
+ */
25
+ */
152
+
26
static void lm3s811evb_class_init(ObjectClass *oc, void *data)
153
+typedef void ClearFn(void *ptr, size_t off, size_t len);
27
{
154
+
28
MachineClass *mc = MACHINE_CLASS(oc);
155
+static void clear_horizontal(void *ptr, size_t off, size_t len)
29
@@ -XXX,XX +XXX,XX @@ static const TypeInfo lm3s811evb_type = {
156
+{
30
.class_init = lm3s811evb_class_init,
157
+ memset(ptr + off, 0, len);
31
};
158
+}
32
159
+
160
+static void clear_vertical_b(void *vptr, size_t off, size_t len)
161
+{
162
+ for (size_t i = 0; i < len; ++i) {
163
+ *(uint8_t *)(vptr + tile_vslice_offset(i + off)) = 0;
164
+ }
165
+}
166
+
167
+static void clear_vertical_h(void *vptr, size_t off, size_t len)
168
+{
169
+ for (size_t i = 0; i < len; i += 2) {
170
+ *(uint16_t *)(vptr + tile_vslice_offset(i + off)) = 0;
171
+ }
172
+}
173
+
174
+static void clear_vertical_s(void *vptr, size_t off, size_t len)
175
+{
176
+ for (size_t i = 0; i < len; i += 4) {
177
+ *(uint32_t *)(vptr + tile_vslice_offset(i + off)) = 0;
178
+ }
179
+}
180
+
181
+static void clear_vertical_d(void *vptr, size_t off, size_t len)
182
+{
183
+ for (size_t i = 0; i < len; i += 8) {
184
+ *(uint64_t *)(vptr + tile_vslice_offset(i + off)) = 0;
185
+ }
186
+}
187
+
188
+static void clear_vertical_q(void *vptr, size_t off, size_t len)
189
+{
190
+ for (size_t i = 0; i < len; i += 16) {
191
+ memset(vptr + tile_vslice_offset(i + off), 0, 16);
192
+ }
193
+}
194
+
195
+/*
33
+/*
196
+ * Copy elements from an array into a tile slice comprising len bytes.
34
+ * Stellaris: LM3S6965 Evaluation Board Schematics:
35
+ * https://www.ti.com/lit/ug/symlink/spmu029.pdf
197
+ */
36
+ */
198
+
37
static void lm3s6965evb_class_init(ObjectClass *oc, void *data)
199
+typedef void CopyFn(void *dst, const void *src, size_t len);
38
{
200
+
39
MachineClass *mc = MACHINE_CLASS(oc);
201
+static void copy_horizontal(void *dst, const void *src, size_t len)
202
+{
203
+ memcpy(dst, src, len);
204
+}
205
+
206
+static void copy_vertical_b(void *vdst, const void *vsrc, size_t len)
207
+{
208
+ const uint8_t *src = vsrc;
209
+ uint8_t *dst = vdst;
210
+ size_t i;
211
+
212
+ for (i = 0; i < len; ++i) {
213
+ dst[tile_vslice_index(i)] = src[i];
214
+ }
215
+}
216
+
217
+static void copy_vertical_h(void *vdst, const void *vsrc, size_t len)
218
+{
219
+ const uint16_t *src = vsrc;
220
+ uint16_t *dst = vdst;
221
+ size_t i;
222
+
223
+ for (i = 0; i < len / 2; ++i) {
224
+ dst[tile_vslice_index(i)] = src[i];
225
+ }
226
+}
227
+
228
+static void copy_vertical_s(void *vdst, const void *vsrc, size_t len)
229
+{
230
+ const uint32_t *src = vsrc;
231
+ uint32_t *dst = vdst;
232
+ size_t i;
233
+
234
+ for (i = 0; i < len / 4; ++i) {
235
+ dst[tile_vslice_index(i)] = src[i];
236
+ }
237
+}
238
+
239
+static void copy_vertical_d(void *vdst, const void *vsrc, size_t len)
240
+{
241
+ const uint64_t *src = vsrc;
242
+ uint64_t *dst = vdst;
243
+ size_t i;
244
+
245
+ for (i = 0; i < len / 8; ++i) {
246
+ dst[tile_vslice_index(i)] = src[i];
247
+ }
248
+}
249
+
250
+static void copy_vertical_q(void *vdst, const void *vsrc, size_t len)
251
+{
252
+ for (size_t i = 0; i < len; i += 16) {
253
+ memcpy(vdst + tile_vslice_offset(i), vsrc + i, 16);
254
+ }
255
+}
256
+
257
+/*
258
+ * Host and TLB primitives for vertical tile slice addressing.
259
+ */
260
+
261
+#define DO_LD(NAME, TYPE, HOST, TLB) \
262
+static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \
263
+{ \
264
+ TYPE val = HOST(host); \
265
+ *(TYPE *)(za + tile_vslice_offset(off)) = val; \
266
+} \
267
+static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \
268
+ intptr_t off, target_ulong addr, uintptr_t ra) \
269
+{ \
270
+ TYPE val = TLB(env, useronly_clean_ptr(addr), ra); \
271
+ *(TYPE *)(za + tile_vslice_offset(off)) = val; \
272
+}
273
+
274
+#define DO_ST(NAME, TYPE, HOST, TLB) \
275
+static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \
276
+{ \
277
+ TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \
278
+ HOST(host, val); \
279
+} \
280
+static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \
281
+ intptr_t off, target_ulong addr, uintptr_t ra) \
282
+{ \
283
+ TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \
284
+ TLB(env, useronly_clean_ptr(addr), val, ra); \
285
+}
286
+
287
+/*
288
+ * The ARMVectorReg elements are stored in host-endian 64-bit units.
289
+ * For 128-bit quantities, the sequence defined by the Elem[] pseudocode
290
+ * corresponds to storing the two 64-bit pieces in little-endian order.
291
+ */
292
+#define DO_LDQ(HNAME, VNAME, BE, HOST, TLB) \
293
+static inline void HNAME##_host(void *za, intptr_t off, void *host) \
294
+{ \
295
+ uint64_t val0 = HOST(host), val1 = HOST(host + 8); \
296
+ uint64_t *ptr = za + off; \
297
+ ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
298
+} \
299
+static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
300
+{ \
301
+ HNAME##_host(za, tile_vslice_offset(off), host); \
302
+} \
303
+static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
304
+ target_ulong addr, uintptr_t ra) \
305
+{ \
306
+ uint64_t val0 = TLB(env, useronly_clean_ptr(addr), ra); \
307
+ uint64_t val1 = TLB(env, useronly_clean_ptr(addr + 8), ra); \
308
+ uint64_t *ptr = za + off; \
309
+ ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
310
+} \
311
+static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
312
+ target_ulong addr, uintptr_t ra) \
313
+{ \
314
+ HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \
315
+}
316
+
317
+#define DO_STQ(HNAME, VNAME, BE, HOST, TLB) \
318
+static inline void HNAME##_host(void *za, intptr_t off, void *host) \
319
+{ \
320
+ uint64_t *ptr = za + off; \
321
+ HOST(host, ptr[BE]); \
322
+ HOST(host + 1, ptr[!BE]); \
323
+} \
324
+static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
325
+{ \
326
+ HNAME##_host(za, tile_vslice_offset(off), host); \
327
+} \
328
+static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
329
+ target_ulong addr, uintptr_t ra) \
330
+{ \
331
+ uint64_t *ptr = za + off; \
332
+ TLB(env, useronly_clean_ptr(addr), ptr[BE], ra); \
333
+ TLB(env, useronly_clean_ptr(addr + 8), ptr[!BE], ra); \
334
+} \
335
+static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
336
+ target_ulong addr, uintptr_t ra) \
337
+{ \
338
+ HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \
339
+}
340
+
341
+DO_LD(ld1b, uint8_t, ldub_p, cpu_ldub_data_ra)
342
+DO_LD(ld1h_be, uint16_t, lduw_be_p, cpu_lduw_be_data_ra)
343
+DO_LD(ld1h_le, uint16_t, lduw_le_p, cpu_lduw_le_data_ra)
344
+DO_LD(ld1s_be, uint32_t, ldl_be_p, cpu_ldl_be_data_ra)
345
+DO_LD(ld1s_le, uint32_t, ldl_le_p, cpu_ldl_le_data_ra)
346
+DO_LD(ld1d_be, uint64_t, ldq_be_p, cpu_ldq_be_data_ra)
347
+DO_LD(ld1d_le, uint64_t, ldq_le_p, cpu_ldq_le_data_ra)
348
+
349
+DO_LDQ(sve_ld1qq_be, sme_ld1q_be, 1, ldq_be_p, cpu_ldq_be_data_ra)
350
+DO_LDQ(sve_ld1qq_le, sme_ld1q_le, 0, ldq_le_p, cpu_ldq_le_data_ra)
351
+
352
+DO_ST(st1b, uint8_t, stb_p, cpu_stb_data_ra)
353
+DO_ST(st1h_be, uint16_t, stw_be_p, cpu_stw_be_data_ra)
354
+DO_ST(st1h_le, uint16_t, stw_le_p, cpu_stw_le_data_ra)
355
+DO_ST(st1s_be, uint32_t, stl_be_p, cpu_stl_be_data_ra)
356
+DO_ST(st1s_le, uint32_t, stl_le_p, cpu_stl_le_data_ra)
357
+DO_ST(st1d_be, uint64_t, stq_be_p, cpu_stq_be_data_ra)
358
+DO_ST(st1d_le, uint64_t, stq_le_p, cpu_stq_le_data_ra)
359
+
360
+DO_STQ(sve_st1qq_be, sme_st1q_be, 1, stq_be_p, cpu_stq_be_data_ra)
361
+DO_STQ(sve_st1qq_le, sme_st1q_le, 0, stq_le_p, cpu_stq_le_data_ra)
362
+
363
+#undef DO_LD
364
+#undef DO_ST
365
+#undef DO_LDQ
366
+#undef DO_STQ
367
+
368
+/*
369
+ * Common helper for all contiguous predicated loads.
370
+ */
371
+
372
+static inline QEMU_ALWAYS_INLINE
373
+void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
374
+ const target_ulong addr, uint32_t desc, const uintptr_t ra,
375
+ const int esz, uint32_t mtedesc, bool vertical,
376
+ sve_ldst1_host_fn *host_fn,
377
+ sve_ldst1_tlb_fn *tlb_fn,
378
+ ClearFn *clr_fn,
379
+ CopyFn *cpy_fn)
380
+{
381
+ const intptr_t reg_max = simd_oprsz(desc);
382
+ const intptr_t esize = 1 << esz;
383
+ intptr_t reg_off, reg_last;
384
+ SVEContLdSt info;
385
+ void *host;
386
+ int flags;
387
+
388
+ /* Find the active elements. */
389
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) {
390
+ /* The entire predicate was false; no load occurs. */
391
+ clr_fn(za, 0, reg_max);
392
+ return;
393
+ }
394
+
395
+ /* Probe the page(s). Exit with exception for any invalid page. */
396
+ sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_LOAD, ra);
397
+
398
+ /* Handle watchpoints for all active elements. */
399
+ sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize,
400
+ BP_MEM_READ, ra);
401
+
402
+ /*
403
+ * Handle mte checks for all active elements.
404
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
405
+ */
406
+ if (mtedesc) {
407
+ sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize,
408
+ mtedesc, ra);
409
+ }
410
+
411
+ flags = info.page[0].flags | info.page[1].flags;
412
+ if (unlikely(flags != 0)) {
413
+#ifdef CONFIG_USER_ONLY
414
+ g_assert_not_reached();
415
+#else
416
+ /*
417
+ * At least one page includes MMIO.
418
+ * Any bus operation can fail with cpu_transaction_failed,
419
+ * which for ARM will raise SyncExternal. Perform the load
420
+ * into scratch memory to preserve register state until the end.
421
+ */
422
+ ARMVectorReg scratch = { };
423
+
424
+ reg_off = info.reg_off_first[0];
425
+ reg_last = info.reg_off_last[1];
426
+ if (reg_last < 0) {
427
+ reg_last = info.reg_off_split;
428
+ if (reg_last < 0) {
429
+ reg_last = info.reg_off_last[0];
430
+ }
431
+ }
432
+
433
+ do {
434
+ uint64_t pg = vg[reg_off >> 6];
435
+ do {
436
+ if ((pg >> (reg_off & 63)) & 1) {
437
+ tlb_fn(env, &scratch, reg_off, addr + reg_off, ra);
438
+ }
439
+ reg_off += esize;
440
+ } while (reg_off & 63);
441
+ } while (reg_off <= reg_last);
442
+
443
+ cpy_fn(za, &scratch, reg_max);
444
+ return;
445
+#endif
446
+ }
447
+
448
+ /* The entire operation is in RAM, on valid pages. */
449
+
450
+ reg_off = info.reg_off_first[0];
451
+ reg_last = info.reg_off_last[0];
452
+ host = info.page[0].host;
453
+
454
+ if (!vertical) {
455
+ memset(za, 0, reg_max);
456
+ } else if (reg_off) {
457
+ clr_fn(za, 0, reg_off);
458
+ }
459
+
460
+ while (reg_off <= reg_last) {
461
+ uint64_t pg = vg[reg_off >> 6];
462
+ do {
463
+ if ((pg >> (reg_off & 63)) & 1) {
464
+ host_fn(za, reg_off, host + reg_off);
465
+ } else if (vertical) {
466
+ clr_fn(za, reg_off, esize);
467
+ }
468
+ reg_off += esize;
469
+ } while (reg_off <= reg_last && (reg_off & 63));
470
+ }
471
+
472
+ /*
473
+ * Use the slow path to manage the cross-page misalignment.
474
+ * But we know this is RAM and cannot trap.
475
+ */
476
+ reg_off = info.reg_off_split;
477
+ if (unlikely(reg_off >= 0)) {
478
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
479
+ }
480
+
481
+ reg_off = info.reg_off_first[1];
482
+ if (unlikely(reg_off >= 0)) {
483
+ reg_last = info.reg_off_last[1];
484
+ host = info.page[1].host;
485
+
486
+ do {
487
+ uint64_t pg = vg[reg_off >> 6];
488
+ do {
489
+ if ((pg >> (reg_off & 63)) & 1) {
490
+ host_fn(za, reg_off, host + reg_off);
491
+ } else if (vertical) {
492
+ clr_fn(za, reg_off, esize);
493
+ }
494
+ reg_off += esize;
495
+ } while (reg_off & 63);
496
+ } while (reg_off <= reg_last);
497
+ }
498
+}
499
+
500
+static inline QEMU_ALWAYS_INLINE
501
+void sme_ld1_mte(CPUARMState *env, void *za, uint64_t *vg,
502
+ target_ulong addr, uint32_t desc, uintptr_t ra,
503
+ const int esz, bool vertical,
504
+ sve_ldst1_host_fn *host_fn,
505
+ sve_ldst1_tlb_fn *tlb_fn,
506
+ ClearFn *clr_fn,
507
+ CopyFn *cpy_fn)
508
+{
509
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
510
+ int bit55 = extract64(addr, 55, 1);
511
+
512
+ /* Remove mtedesc from the normal sve descriptor. */
513
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
514
+
515
+ /* Perform gross MTE suppression early. */
516
+ if (!tbi_check(desc, bit55) ||
517
+ tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
518
+ mtedesc = 0;
519
+ }
520
+
521
+ sme_ld1(env, za, vg, addr, desc, ra, esz, mtedesc, vertical,
522
+ host_fn, tlb_fn, clr_fn, cpy_fn);
523
+}
524
+
525
+#define DO_LD(L, END, ESZ) \
526
+void HELPER(sme_ld1##L##END##_h)(CPUARMState *env, void *za, void *vg, \
527
+ target_ulong addr, uint32_t desc) \
528
+{ \
529
+ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \
530
+ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \
531
+ clear_horizontal, copy_horizontal); \
532
+} \
533
+void HELPER(sme_ld1##L##END##_v)(CPUARMState *env, void *za, void *vg, \
534
+ target_ulong addr, uint32_t desc) \
535
+{ \
536
+ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \
537
+ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \
538
+ clear_vertical_##L, copy_vertical_##L); \
539
+} \
540
+void HELPER(sme_ld1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \
541
+ target_ulong addr, uint32_t desc) \
542
+{ \
543
+ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \
544
+ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \
545
+ clear_horizontal, copy_horizontal); \
546
+} \
547
+void HELPER(sme_ld1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \
548
+ target_ulong addr, uint32_t desc) \
549
+{ \
550
+ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \
551
+ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \
552
+ clear_vertical_##L, copy_vertical_##L); \
553
+}
554
+
555
+DO_LD(b, , MO_8)
556
+DO_LD(h, _be, MO_16)
557
+DO_LD(h, _le, MO_16)
558
+DO_LD(s, _be, MO_32)
559
+DO_LD(s, _le, MO_32)
560
+DO_LD(d, _be, MO_64)
561
+DO_LD(d, _le, MO_64)
562
+DO_LD(q, _be, MO_128)
563
+DO_LD(q, _le, MO_128)
564
+
565
+#undef DO_LD
566
+
567
+/*
568
+ * Common helper for all contiguous predicated stores.
569
+ */
570
+
571
+static inline QEMU_ALWAYS_INLINE
572
+void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
573
+ const target_ulong addr, uint32_t desc, const uintptr_t ra,
574
+ const int esz, uint32_t mtedesc, bool vertical,
575
+ sve_ldst1_host_fn *host_fn,
576
+ sve_ldst1_tlb_fn *tlb_fn)
577
+{
578
+ const intptr_t reg_max = simd_oprsz(desc);
579
+ const intptr_t esize = 1 << esz;
580
+ intptr_t reg_off, reg_last;
581
+ SVEContLdSt info;
582
+ void *host;
583
+ int flags;
584
+
585
+ /* Find the active elements. */
586
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) {
587
+ /* The entire predicate was false; no store occurs. */
588
+ return;
589
+ }
590
+
591
+ /* Probe the page(s). Exit with exception for any invalid page. */
592
+ sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_STORE, ra);
593
+
594
+ /* Handle watchpoints for all active elements. */
595
+ sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize,
596
+ BP_MEM_WRITE, ra);
597
+
598
+ /*
599
+ * Handle mte checks for all active elements.
600
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
601
+ */
602
+ if (mtedesc) {
603
+ sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize,
604
+ mtedesc, ra);
605
+ }
606
+
607
+ flags = info.page[0].flags | info.page[1].flags;
608
+ if (unlikely(flags != 0)) {
609
+#ifdef CONFIG_USER_ONLY
610
+ g_assert_not_reached();
611
+#else
612
+ /*
613
+ * At least one page includes MMIO.
614
+ * Any bus operation can fail with cpu_transaction_failed,
615
+ * which for ARM will raise SyncExternal. We cannot avoid
616
+ * this fault and will leave with the store incomplete.
617
+ */
618
+ reg_off = info.reg_off_first[0];
619
+ reg_last = info.reg_off_last[1];
620
+ if (reg_last < 0) {
621
+ reg_last = info.reg_off_split;
622
+ if (reg_last < 0) {
623
+ reg_last = info.reg_off_last[0];
624
+ }
625
+ }
626
+
627
+ do {
628
+ uint64_t pg = vg[reg_off >> 6];
629
+ do {
630
+ if ((pg >> (reg_off & 63)) & 1) {
631
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
632
+ }
633
+ reg_off += esize;
634
+ } while (reg_off & 63);
635
+ } while (reg_off <= reg_last);
636
+ return;
637
+#endif
638
+ }
639
+
640
+ reg_off = info.reg_off_first[0];
641
+ reg_last = info.reg_off_last[0];
642
+ host = info.page[0].host;
643
+
644
+ while (reg_off <= reg_last) {
645
+ uint64_t pg = vg[reg_off >> 6];
646
+ do {
647
+ if ((pg >> (reg_off & 63)) & 1) {
648
+ host_fn(za, reg_off, host + reg_off);
649
+ }
650
+ reg_off += 1 << esz;
651
+ } while (reg_off <= reg_last && (reg_off & 63));
652
+ }
653
+
654
+ /*
655
+ * Use the slow path to manage the cross-page misalignment.
656
+ * But we know this is RAM and cannot trap.
657
+ */
658
+ reg_off = info.reg_off_split;
659
+ if (unlikely(reg_off >= 0)) {
660
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
661
+ }
662
+
663
+ reg_off = info.reg_off_first[1];
664
+ if (unlikely(reg_off >= 0)) {
665
+ reg_last = info.reg_off_last[1];
666
+ host = info.page[1].host;
667
+
668
+ do {
669
+ uint64_t pg = vg[reg_off >> 6];
670
+ do {
671
+ if ((pg >> (reg_off & 63)) & 1) {
672
+ host_fn(za, reg_off, host + reg_off);
673
+ }
674
+ reg_off += 1 << esz;
675
+ } while (reg_off & 63);
676
+ } while (reg_off <= reg_last);
677
+ }
678
+}
679
+
680
+static inline QEMU_ALWAYS_INLINE
681
+void sme_st1_mte(CPUARMState *env, void *za, uint64_t *vg, target_ulong addr,
682
+ uint32_t desc, uintptr_t ra, int esz, bool vertical,
683
+ sve_ldst1_host_fn *host_fn,
684
+ sve_ldst1_tlb_fn *tlb_fn)
685
+{
686
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
687
+ int bit55 = extract64(addr, 55, 1);
688
+
689
+ /* Remove mtedesc from the normal sve descriptor. */
690
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
691
+
692
+ /* Perform gross MTE suppression early. */
693
+ if (!tbi_check(desc, bit55) ||
694
+ tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
695
+ mtedesc = 0;
696
+ }
697
+
698
+ sme_st1(env, za, vg, addr, desc, ra, esz, mtedesc,
699
+ vertical, host_fn, tlb_fn);
700
+}
701
+
702
+#define DO_ST(L, END, ESZ) \
703
+void HELPER(sme_st1##L##END##_h)(CPUARMState *env, void *za, void *vg, \
704
+ target_ulong addr, uint32_t desc) \
705
+{ \
706
+ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \
707
+ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \
708
+} \
709
+void HELPER(sme_st1##L##END##_v)(CPUARMState *env, void *za, void *vg, \
710
+ target_ulong addr, uint32_t desc) \
711
+{ \
712
+ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \
713
+ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \
714
+} \
715
+void HELPER(sme_st1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \
716
+ target_ulong addr, uint32_t desc) \
717
+{ \
718
+ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \
719
+ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \
720
+} \
721
+void HELPER(sme_st1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \
722
+ target_ulong addr, uint32_t desc) \
723
+{ \
724
+ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \
725
+ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \
726
+}
727
+
728
+DO_ST(b, , MO_8)
729
+DO_ST(h, _be, MO_16)
730
+DO_ST(h, _le, MO_16)
731
+DO_ST(s, _be, MO_32)
732
+DO_ST(s, _le, MO_32)
733
+DO_ST(d, _be, MO_64)
734
+DO_ST(d, _le, MO_64)
735
+DO_ST(q, _be, MO_128)
736
+DO_ST(q, _le, MO_128)
737
+
738
+#undef DO_ST
739
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
740
index XXXXXXX..XXXXXXX 100644
741
--- a/target/arm/translate-sme.c
742
+++ b/target/arm/translate-sme.c
743
@@ -XXX,XX +XXX,XX @@ static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
744
745
return true;
746
}
747
+
748
+static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
749
+{
750
+ typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i32);
751
+
752
+ /*
753
+ * Indexed by [esz][be][v][mte][st], which is (except for load/store)
754
+ * also the order in which the elements appear in the function names,
755
+ * and so how we must concatenate the pieces.
756
+ */
757
+
758
+#define FN_LS(F) { gen_helper_sme_ld1##F, gen_helper_sme_st1##F }
759
+#define FN_MTE(F) { FN_LS(F), FN_LS(F##_mte) }
760
+#define FN_HV(F) { FN_MTE(F##_h), FN_MTE(F##_v) }
761
+#define FN_END(L, B) { FN_HV(L), FN_HV(B) }
762
+
763
+ static GenLdSt1 * const fns[5][2][2][2][2] = {
764
+ FN_END(b, b),
765
+ FN_END(h_le, h_be),
766
+ FN_END(s_le, s_be),
767
+ FN_END(d_le, d_be),
768
+ FN_END(q_le, q_be),
769
+ };
770
+
771
+#undef FN_LS
772
+#undef FN_MTE
773
+#undef FN_HV
774
+#undef FN_END
775
+
776
+ TCGv_ptr t_za, t_pg;
777
+ TCGv_i64 addr;
778
+ int svl, desc = 0;
779
+ bool be = s->be_data == MO_BE;
780
+ bool mte = s->mte_active[0];
781
+
782
+ if (!dc_isar_feature(aa64_sme, s)) {
783
+ return false;
784
+ }
785
+ if (!sme_smza_enabled_check(s)) {
786
+ return true;
787
+ }
788
+
789
+ t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
790
+ t_pg = pred_full_reg_ptr(s, a->pg);
791
+ addr = tcg_temp_new_i64();
792
+
793
+ tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz);
794
+ tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
795
+
796
+ if (mte) {
797
+ desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
798
+ desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
799
+ desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
800
+ desc = FIELD_DP32(desc, MTEDESC, WRITE, a->st);
801
+ desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << a->esz) - 1);
802
+ desc <<= SVE_MTEDESC_SHIFT;
803
+ } else {
804
+ addr = clean_data_tbi(s, addr);
805
+ }
806
+ svl = streaming_vec_reg_size(s);
807
+ desc = simd_desc(svl, svl, desc);
808
+
809
+ fns[a->esz][be][a->v][mte][a->st](cpu_env, t_za, t_pg, addr,
810
+ tcg_constant_i32(desc));
811
+
812
+ tcg_temp_free_ptr(t_za);
813
+ tcg_temp_free_ptr(t_pg);
814
+ tcg_temp_free_i64(addr);
815
+ return true;
816
+}
817
--
40
--
818
2.25.1
41
2.34.1
42
43
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20250110160204.74997-3-philmd@linaro.org
5
Message-id: 20220708151540.18136-34-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
7
---
8
linux-user/aarch64/target_cpu.h | 5 ++++-
8
hw/arm/stellaris.c | 6 +++---
9
1 file changed, 4 insertions(+), 1 deletion(-)
9
1 file changed, 3 insertions(+), 3 deletions(-)
10
10
11
diff --git a/linux-user/aarch64/target_cpu.h b/linux-user/aarch64/target_cpu.h
11
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/linux-user/aarch64/target_cpu.h
13
--- a/hw/arm/stellaris.c
14
+++ b/linux-user/aarch64/target_cpu.h
14
+++ b/hw/arm/stellaris.c
15
@@ -XXX,XX +XXX,XX @@ static inline void cpu_clone_regs_parent(CPUARMState *env, unsigned flags)
15
@@ -XXX,XX +XXX,XX @@ static void ssys_update(ssys_state *s)
16
16
qemu_set_irq(s->irq, (s->int_status & s->int_mask) != 0);
17
static inline void cpu_set_tls(CPUARMState *env, target_ulong newtls)
18
{
19
- /* Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is
20
+ /*
21
+ * Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is
22
* different from AArch32 Linux, which uses TPIDRRO.
23
*/
24
env->cp15.tpidr_el[0] = newtls;
25
+ /* TPIDR2_EL0 is cleared with CLONE_SETTLS. */
26
+ env->cp15.tpidr2_el0 = 0;
27
}
17
}
28
18
29
static inline abi_ulong get_sp_from_cpustate(CPUARMState *state)
19
-static uint32_t pllcfg_sandstorm[16] = {
20
+static const uint32_t pllcfg_sandstorm[16] = {
21
0x31c0, /* 1 Mhz */
22
0x1ae0, /* 1.8432 Mhz */
23
0x18c0, /* 2 Mhz */
24
@@ -XXX,XX +XXX,XX @@ static uint32_t pllcfg_sandstorm[16] = {
25
0x585b /* 8.192 Mhz */
26
};
27
28
-static uint32_t pllcfg_fury[16] = {
29
+static const uint32_t pllcfg_fury[16] = {
30
0x3200, /* 1 Mhz */
31
0x1b20, /* 1.8432 Mhz */
32
0x1900, /* 2 Mhz */
33
@@ -XXX,XX +XXX,XX @@ static void stellaris_adc_init(Object *obj)
34
}
35
36
/* Board init. */
37
-static stellaris_board_info stellaris_boards[] = {
38
+static const stellaris_board_info stellaris_boards[] = {
39
{ "LM3S811EVB",
40
0,
41
0x0032000e,
30
--
42
--
31
2.25.1
43
2.34.1
44
45
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
In parse_user_sigframe, the kernel rejects duplicate sve records,
3
There is nothing mapped at 0x40002000.
4
or records that are smaller than the header. We were silently
5
allowing these cases to pass, dropping the record.
6
4
5
I2C#0 is already mapped at 0x40021000.
6
7
Remove the invalid mapping added in commits aecfbbc97a2 & 394c8bbfb7a.
8
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20250110160204.74997-4-philmd@linaro.org
9
Message-id: 20220708151540.18136-38-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
13
---
12
linux-user/aarch64/signal.c | 5 ++++-
14
hw/arm/stellaris.c | 2 --
13
1 file changed, 4 insertions(+), 1 deletion(-)
15
1 file changed, 2 deletions(-)
14
16
15
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
17
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
16
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
17
--- a/linux-user/aarch64/signal.c
19
--- a/hw/arm/stellaris.c
18
+++ b/linux-user/aarch64/signal.c
20
+++ b/hw/arm/stellaris.c
19
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
21
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
20
break;
22
* http://www.ti.com/lit/ds/symlink/lm3s6965.pdf
21
23
*
22
case TARGET_SVE_MAGIC:
24
* 40000000 wdtimer
23
+ if (sve || size < sizeof(struct target_sve_context)) {
25
- * 40002000 i2c (unimplemented)
24
+ goto err;
26
* 40004000 GPIO
25
+ }
27
* 40005000 GPIO
26
if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
28
* 40006000 GPIO
27
vq = sve_vq(env);
29
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
28
sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
30
/* Add dummy regions for the devices we don't implement yet,
29
- if (!sve && size == sve_size) {
31
* so guest accesses don't cause unlogged crashes.
30
+ if (size == sve_size) {
32
*/
31
sve = (struct target_sve_context *)ctx;
33
- create_unimplemented_device("i2c-0", 0x40002000, 0x1000);
32
break;
34
create_unimplemented_device("i2c-2", 0x40021000, 0x1000);
33
}
35
create_unimplemented_device("PWM", 0x40028000, 0x1000);
36
create_unimplemented_device("QEI-0", 0x4002c000, 0x1000);
34
--
37
--
35
2.25.1
38
2.34.1
39
40
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
Note that SME remains effectively disabled for user-only,
3
Add definitions for the number of controllers.
4
because we do not yet set CPACR_EL1.SMEN. This needs to
5
wait until the kernel ABI is implemented.
6
4
5
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20250110160204.74997-5-philmd@linaro.org
9
Message-id: 20220708151540.18136-33-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
9
---
12
docs/system/arm/emulation.rst | 4 ++++
10
hw/arm/stellaris.c | 25 +++++++++++++++----------
13
target/arm/cpu64.c | 11 +++++++++++
11
1 file changed, 15 insertions(+), 10 deletions(-)
14
2 files changed, 15 insertions(+)
15
12
16
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
13
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
17
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
18
--- a/docs/system/arm/emulation.rst
15
--- a/hw/arm/stellaris.c
19
+++ b/docs/system/arm/emulation.rst
16
+++ b/hw/arm/stellaris.c
20
@@ -XXX,XX +XXX,XX @@ the following architecture extensions:
17
@@ -XXX,XX +XXX,XX @@
21
- FEAT_SHA512 (Advanced SIMD SHA512 instructions)
18
#define NUM_IRQ_LINES 64
22
- FEAT_SM3 (Advanced SIMD SM3 instructions)
19
#define NUM_PRIO_BITS 3
23
- FEAT_SM4 (Advanced SIMD SM4 instructions)
20
24
+- FEAT_SME (Scalable Matrix Extension)
21
+#define NUM_GPIO 7
25
+- FEAT_SME_FA64 (Full A64 instruction set in Streaming SVE mode)
22
+#define NUM_UART 4
26
+- FEAT_SME_F64F64 (Double-precision floating-point outer product instructions)
23
+#define NUM_GPTM 4
27
+- FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions)
24
+#define NUM_I2C 2
28
- FEAT_SPECRES (Speculation restriction instructions)
25
+
29
- FEAT_SSBS (Speculative Store Bypass Safe)
26
typedef const struct {
30
- FEAT_TLBIOS (TLB invalidate instructions in Outer Shareable domain)
27
const char *name;
31
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
28
uint32_t did0;
32
index XXXXXXX..XXXXXXX 100644
29
@@ -XXX,XX +XXX,XX @@ static const stellaris_board_info stellaris_boards[] = {
33
--- a/target/arm/cpu64.c
30
34
+++ b/target/arm/cpu64.c
31
static void stellaris_init(MachineState *ms, stellaris_board_info *board)
35
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
32
{
33
- static const int uart_irq[] = {5, 6, 33, 34};
34
- static const int timer_irq[] = {19, 21, 23, 35};
35
- static const uint32_t gpio_addr[7] =
36
+ static const int uart_irq[NUM_UART] = {5, 6, 33, 34};
37
+ static const int timer_irq[NUM_GPTM] = {19, 21, 23, 35};
38
+ static const uint32_t gpio_addr[NUM_GPIO] =
39
{ 0x40004000, 0x40005000, 0x40006000, 0x40007000,
40
0x40024000, 0x40025000, 0x40026000};
41
- static const int gpio_irq[7] = {0, 1, 2, 3, 4, 30, 31};
42
+ static const int gpio_irq[NUM_GPIO] = {0, 1, 2, 3, 4, 30, 31};
43
44
/* Memory map of SoC devices, from
45
* Stellaris LM3S6965 Microcontroller Data Sheet (rev I)
46
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
36
*/
47
*/
37
t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3); /* FEAT_MTE3 */
48
38
t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 0); /* FEAT_RASv1p1 + FEAT_DoubleFault */
49
Object *soc_container;
39
+ t = FIELD_DP64(t, ID_AA64PFR1, SME, 1); /* FEAT_SME */
50
- DeviceState *gpio_dev[7], *armv7m, *nvic;
40
t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_2 */
51
- qemu_irq gpio_in[7][8];
41
cpu->isar.id_aa64pfr1 = t;
52
- qemu_irq gpio_out[7][8];
42
53
+ DeviceState *gpio_dev[NUM_GPIO], *armv7m, *nvic;
43
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
54
+ qemu_irq gpio_in[NUM_GPIO][8];
44
t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 5); /* FEAT_PMUv3p4 */
55
+ qemu_irq gpio_out[NUM_GPIO][8];
45
cpu->isar.id_aa64dfr0 = t;
56
qemu_irq adc;
46
57
int sram_size;
47
+ t = cpu->isar.id_aa64smfr0;
58
int flash_size;
48
+ t = FIELD_DP64(t, ID_AA64SMFR0, F32F32, 1); /* FEAT_SME */
59
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
49
+ t = FIELD_DP64(t, ID_AA64SMFR0, B16F32, 1); /* FEAT_SME */
60
} else {
50
+ t = FIELD_DP64(t, ID_AA64SMFR0, F16F32, 1); /* FEAT_SME */
61
adc = NULL;
51
+ t = FIELD_DP64(t, ID_AA64SMFR0, I8I32, 0xf); /* FEAT_SME */
62
}
52
+ t = FIELD_DP64(t, ID_AA64SMFR0, F64F64, 1); /* FEAT_SME_F64F64 */
63
- for (i = 0; i < 4; i++) {
53
+ t = FIELD_DP64(t, ID_AA64SMFR0, I16I64, 0xf); /* FEAT_SME_I16I64 */
64
+ for (i = 0; i < NUM_GPTM; i++) {
54
+ t = FIELD_DP64(t, ID_AA64SMFR0, FA64, 1); /* FEAT_SME_FA64 */
65
if (board->dc2 & (0x10000 << i)) {
55
+ cpu->isar.id_aa64smfr0 = t;
66
SysBusDevice *sbd;
56
+
67
57
/* Replicate the same data to the 32-bit id registers. */
68
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
58
aa32_max_features(cpu);
69
}
70
71
72
- for (i = 0; i < 7; i++) {
73
+ for (i = 0; i < NUM_GPIO; i++) {
74
if (board->dc4 & (1 << i)) {
75
gpio_dev[i] = sysbus_create_simple("pl061_luminary", gpio_addr[i],
76
qdev_get_gpio_in(nvic,
77
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
78
}
79
}
80
81
- for (i = 0; i < 4; i++) {
82
+ for (i = 0; i < NUM_UART; i++) {
83
if (board->dc2 & (1 << i)) {
84
SysBusDevice *sbd;
59
85
60
--
86
--
61
2.25.1
87
2.34.1
88
89
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
Add definitions (DCx_periph) for the DeviceCapability bits,
4
replace direct bitmask checks with the DEV_CAP() macro,
5
which use the extract/deposit API.
6
7
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20250110160204.74997-6-philmd@linaro.org
5
Message-id: 20220708151540.18136-19-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
11
---
8
target/arm/helper-sme.h | 2 ++
12
hw/arm/stellaris.c | 37 +++++++++++++++++++++++++++++--------
9
target/arm/sme.decode | 4 ++++
13
1 file changed, 29 insertions(+), 8 deletions(-)
10
target/arm/sme_helper.c | 25 +++++++++++++++++++++++++
11
target/arm/translate-sme.c | 13 +++++++++++++
12
4 files changed, 44 insertions(+)
13
14
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
15
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
15
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-sme.h
17
--- a/hw/arm/stellaris.c
17
+++ b/target/arm/helper-sme.h
18
+++ b/hw/arm/stellaris.c
18
@@ -XXX,XX +XXX,XX @@
19
20
DEF_HELPER_FLAGS_2(set_pstate_sm, TCG_CALL_NO_RWG, void, env, i32)
21
DEF_HELPER_FLAGS_2(set_pstate_za, TCG_CALL_NO_RWG, void, env, i32)
22
+
23
+DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32)
24
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/sme.decode
27
+++ b/target/arm/sme.decode
28
@@ -XXX,XX +XXX,XX @@
29
#
30
# This file is processed by scripts/decodetree.py
31
#
32
+
33
+### SME Misc
34
+
35
+ZERO 11000000 00 001 00000000000 imm:8
36
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/sme_helper.c
39
+++ b/target/arm/sme_helper.c
40
@@ -XXX,XX +XXX,XX @@ void helper_set_pstate_za(CPUARMState *env, uint32_t i)
41
memset(env->zarray, 0, sizeof(env->zarray));
42
}
43
}
44
+
45
+void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
46
+{
47
+ uint32_t i;
48
+
49
+ /*
50
+ * Special case clearing the entire ZA space.
51
+ * This falls into the CONSTRAINED UNPREDICTABLE zeroing of any
52
+ * parts of the ZA storage outside of SVL.
53
+ */
54
+ if (imm == 0xff) {
55
+ memset(env->zarray, 0, sizeof(env->zarray));
56
+ return;
57
+ }
58
+
59
+ /*
60
+ * Recall that ZAnH.D[m] is spread across ZA[n+8*m],
61
+ * so each row is discontiguous within ZA[].
62
+ */
63
+ for (i = 0; i < svl; i++) {
64
+ if (imm & (1 << (i % 8))) {
65
+ memset(&env->zarray[i], 0, svl);
66
+ }
67
+ }
68
+}
69
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/target/arm/translate-sme.c
72
+++ b/target/arm/translate-sme.c
73
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@
74
*/
20
*/
75
21
76
#include "decode-sme.c.inc"
22
#include "qemu/osdep.h"
23
+#include "qemu/bitops.h"
24
#include "qapi/error.h"
25
#include "hw/core/split-irq.h"
26
#include "hw/sysbus.h"
27
@@ -XXX,XX +XXX,XX @@
28
#define NUM_GPTM 4
29
#define NUM_I2C 2
30
31
+/*
32
+ * See Stellaris Data Sheet chapter 5.2.5 "System Control",
33
+ * Register 13 .. 17: Device Capabilities 0 .. 4 (DC0 .. DC4).
34
+ */
35
+#define DC1_WDT 3
36
+#define DC1_HIB 6
37
+#define DC1_MPU 7
38
+#define DC1_ADC 16
39
+#define DC1_PWM 20
40
+#define DC2_UART(n) (n)
41
+#define DC2_SSI 4
42
+#define DC2_QEI(n) (8 + n)
43
+#define DC2_I2C(n) (12 + 2 * n)
44
+#define DC2_GPTM(n) (16 + n)
45
+#define DC2_COMP(n) (24 + n)
46
+#define DC4_GPIO(n) (n)
47
+#define DC4_EMAC 28
77
+
48
+
49
+#define DEV_CAP(_dc, _cap) extract32(board->dc##_dc, DC##_dc##_##_cap, 1)
78
+
50
+
79
+static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
51
typedef const struct {
80
+{
52
const char *name;
81
+ if (!dc_isar_feature(aa64_sme, s)) {
53
uint32_t did0;
82
+ return false;
54
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
83
+ }
55
sysbus_mmio_map(SYS_BUS_DEVICE(ssys_dev), 0, 0x400fe000);
84
+ if (sme_za_enabled_check(s)) {
56
sysbus_connect_irq(SYS_BUS_DEVICE(ssys_dev), 0, qdev_get_gpio_in(nvic, 28));
85
+ gen_helper_sme_zero(cpu_env, tcg_constant_i32(a->imm),
57
86
+ tcg_constant_i32(streaming_vec_reg_size(s)));
58
- if (board->dc1 & (1 << 16)) {
87
+ }
59
+ if (DEV_CAP(1, ADC)) {
88
+ return true;
60
dev = sysbus_create_varargs(TYPE_STELLARIS_ADC, 0x40038000,
89
+}
61
qdev_get_gpio_in(nvic, 14),
62
qdev_get_gpio_in(nvic, 15),
63
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
64
adc = NULL;
65
}
66
for (i = 0; i < NUM_GPTM; i++) {
67
- if (board->dc2 & (0x10000 << i)) {
68
+ if (DEV_CAP(2, GPTM(i))) {
69
SysBusDevice *sbd;
70
71
dev = qdev_new(TYPE_STELLARIS_GPTM);
72
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
73
}
74
}
75
76
- if (board->dc1 & (1 << 3)) { /* watchdog present */
77
+ if (DEV_CAP(1, WDT)) {
78
dev = qdev_new(TYPE_LUMINARY_WATCHDOG);
79
object_property_add_child(soc_container, "wdg", OBJECT(dev));
80
qdev_connect_clock_in(dev, "WDOGCLK",
81
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
82
83
84
for (i = 0; i < NUM_GPIO; i++) {
85
- if (board->dc4 & (1 << i)) {
86
+ if (DEV_CAP(4, GPIO(i))) {
87
gpio_dev[i] = sysbus_create_simple("pl061_luminary", gpio_addr[i],
88
qdev_get_gpio_in(nvic,
89
gpio_irq[i]));
90
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
91
}
92
}
93
94
- if (board->dc2 & (1 << 12)) {
95
+ if (DEV_CAP(2, I2C(0))) {
96
dev = sysbus_create_simple(TYPE_STELLARIS_I2C, 0x40020000,
97
qdev_get_gpio_in(nvic, 8));
98
i2c = (I2CBus *)qdev_get_child_bus(dev, "i2c");
99
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
100
}
101
102
for (i = 0; i < NUM_UART; i++) {
103
- if (board->dc2 & (1 << i)) {
104
+ if (DEV_CAP(2, UART(i))) {
105
SysBusDevice *sbd;
106
107
dev = qdev_new("pl011_luminary");
108
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
109
sysbus_connect_irq(sbd, 0, qdev_get_gpio_in(nvic, uart_irq[i]));
110
}
111
}
112
- if (board->dc2 & (1 << 4)) {
113
+ if (DEV_CAP(2, SSI)) {
114
dev = sysbus_create_simple("pl022", 0x40008000,
115
qdev_get_gpio_in(nvic, 7));
116
if (board->peripherals & BP_OLED_SSI) {
117
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
118
qemu_irq_raise(gpio_out[GPIO_D][0]);
119
}
120
}
121
- if (board->dc4 & (1 << 28)) {
122
+ if (DEV_CAP(4, EMAC)) {
123
DeviceState *enet;
124
125
enet = qdev_new("stellaris_enet");
90
--
126
--
91
2.25.1
127
2.34.1
128
129
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
Make sure to zero the currently reserved fields.
3
There are 2 I2C controllers, map them both, removing
4
the unimplemented one. Keep the OLED controller on the
5
first I2C bus.
4
6
7
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20250110160204.74997-7-philmd@linaro.org
7
Message-id: 20220708151540.18136-36-richard.henderson@linaro.org
10
[PMM: tweak to appease maybe-use-uninitialized warning]
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
12
---
10
linux-user/aarch64/signal.c | 9 ++++++++-
13
hw/arm/stellaris.c | 21 +++++++++++++--------
11
1 file changed, 8 insertions(+), 1 deletion(-)
14
1 file changed, 13 insertions(+), 8 deletions(-)
12
15
13
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
16
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
14
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
15
--- a/linux-user/aarch64/signal.c
18
--- a/hw/arm/stellaris.c
16
+++ b/linux-user/aarch64/signal.c
19
+++ b/hw/arm/stellaris.c
17
@@ -XXX,XX +XXX,XX @@ struct target_extra_context {
20
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
18
struct target_sve_context {
21
{ 0x40004000, 0x40005000, 0x40006000, 0x40007000,
19
struct target_aarch64_ctx head;
22
0x40024000, 0x40025000, 0x40026000};
20
uint16_t vl;
23
static const int gpio_irq[NUM_GPIO] = {0, 1, 2, 3, 4, 30, 31};
21
- uint16_t reserved[3];
24
+ static const uint32_t i2c_addr[NUM_I2C] = {0x40020000, 0x40021000};
22
+ uint16_t flags;
25
+ static const int i2c_irq[NUM_I2C] = {8, 37};
23
+ uint16_t reserved[2];
26
24
/* The actual SVE data immediately follows. It is laid out
27
/* Memory map of SoC devices, from
25
* according to TARGET_SVE_SIG_{Z,P}REG_OFFSET, based off of
28
* Stellaris LM3S6965 Microcontroller Data Sheet (rev I)
26
* the original struct pointer.
29
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
27
@@ -XXX,XX +XXX,XX @@ struct target_sve_context {
30
qemu_irq adc;
28
#define TARGET_SVE_SIG_CONTEXT_SIZE(VQ) \
31
int sram_size;
29
(TARGET_SVE_SIG_PREG_OFFSET(VQ, 17))
32
int flash_size;
30
33
- I2CBus *i2c;
31
+#define TARGET_SVE_SIG_FLAG_SM 1
34
+ DeviceState *i2c_dev[NUM_I2C] = { };
35
DeviceState *dev;
36
DeviceState *ssys_dev;
37
int i;
38
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
39
}
40
}
41
42
- if (DEV_CAP(2, I2C(0))) {
43
- dev = sysbus_create_simple(TYPE_STELLARIS_I2C, 0x40020000,
44
- qdev_get_gpio_in(nvic, 8));
45
- i2c = (I2CBus *)qdev_get_child_bus(dev, "i2c");
46
- if (board->peripherals & BP_OLED_I2C) {
47
- i2c_slave_create_simple(i2c, "ssd0303", 0x3d);
48
+ for (i = 0; i < NUM_I2C; i++) {
49
+ if (DEV_CAP(2, I2C(i))) {
50
+ i2c_dev[i] = sysbus_create_simple(TYPE_STELLARIS_I2C, i2c_addr[i],
51
+ qdev_get_gpio_in(nvic,
52
+ i2c_irq[i]));
53
}
54
}
55
+ if (board->peripherals & BP_OLED_I2C) {
56
+ I2CBus *bus = (I2CBus *)qdev_get_child_bus(i2c_dev[0], "i2c");
32
+
57
+
33
struct target_rt_sigframe {
58
+ i2c_slave_create_simple(bus, "ssd0303", 0x3d);
34
struct target_siginfo info;
35
struct target_ucontext uc;
36
@@ -XXX,XX +XXX,XX @@ static void target_setup_sve_record(struct target_sve_context *sve,
37
{
38
int i, j;
39
40
+ memset(sve, 0, sizeof(*sve));
41
__put_user(TARGET_SVE_MAGIC, &sve->head.magic);
42
__put_user(size, &sve->head.size);
43
__put_user(vq * TARGET_SVE_VQ_BYTES, &sve->vl);
44
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
45
+ __put_user(TARGET_SVE_SIG_FLAG_SM, &sve->flags);
46
+ }
59
+ }
47
60
48
/* Note that SVE regs are stored as a byte stream, with each byte element
61
for (i = 0; i < NUM_UART; i++) {
49
* at a subsequent address. This corresponds to a little-endian store
62
if (DEV_CAP(2, UART(i))) {
63
@@ -XXX,XX +XXX,XX @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
64
/* Add dummy regions for the devices we don't implement yet,
65
* so guest accesses don't cause unlogged crashes.
66
*/
67
- create_unimplemented_device("i2c-2", 0x40021000, 0x1000);
68
create_unimplemented_device("PWM", 0x40028000, 0x1000);
69
create_unimplemented_device("QEI-0", 0x4002c000, 0x1000);
70
create_unimplemented_device("QEI-1", 0x4002d000, 0x1000);
50
--
71
--
51
2.25.1
72
2.34.1
73
74
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Thomas Huth <thuth@redhat.com>
2
2
3
This includes the build rules for the decoder, and the
3
We don't have any functional tests for this machine yet, thus let's
4
new file for translation, but excludes any instructions.
4
add a test with a MicroPython binary that is available online
5
(thanks to Joel Stanley for providing it, see:
6
https://www.mail-archive.com/qemu-devel@nongnu.org/msg606064.html ).
5
7
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Thomas Huth <thuth@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Message-id: 20220708151540.18136-3-richard.henderson@linaro.org
10
Message-id: 20250124101709.1591761-1-thuth@redhat.com
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
12
---
11
target/arm/translate-a64.h | 1 +
13
MAINTAINERS | 1 +
12
target/arm/sme.decode | 20 ++++++++++++++++++++
14
tests/functional/meson.build | 1 +
13
target/arm/translate-a64.c | 7 ++++++-
15
tests/functional/test_arm_microbit.py | 31 +++++++++++++++++++++++++++
14
target/arm/translate-sme.c | 35 +++++++++++++++++++++++++++++++++++
16
3 files changed, 33 insertions(+)
15
target/arm/meson.build | 2 ++
17
create mode 100755 tests/functional/test_arm_microbit.py
16
5 files changed, 64 insertions(+), 1 deletion(-)
17
create mode 100644 target/arm/sme.decode
18
create mode 100644 target/arm/translate-sme.c
19
18
20
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
19
diff --git a/MAINTAINERS b/MAINTAINERS
21
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
22
--- a/target/arm/translate-a64.h
21
--- a/MAINTAINERS
23
+++ b/target/arm/translate-a64.h
22
+++ b/MAINTAINERS
24
@@ -XXX,XX +XXX,XX @@ static inline int pred_gvec_reg_size(DisasContext *s)
23
@@ -XXX,XX +XXX,XX @@ F: hw/*/microbit*.c
25
}
24
F: include/hw/*/nrf51*.h
26
25
F: include/hw/*/microbit*.h
27
bool disas_sve(DisasContext *, uint32_t);
26
F: tests/qtest/microbit-test.c
28
+bool disas_sme(DisasContext *, uint32_t);
27
+F: tests/functional/test_arm_microbit.py
29
28
F: docs/system/arm/nrf.rst
30
void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
29
31
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
30
ARM PL011 Rust device
32
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
31
diff --git a/tests/functional/meson.build b/tests/functional/meson.build
33
new file mode 100644
32
index XXXXXXX..XXXXXXX 100644
33
--- a/tests/functional/meson.build
34
+++ b/tests/functional/meson.build
35
@@ -XXX,XX +XXX,XX @@ tests_arm_system_thorough = [
36
'arm_cubieboard',
37
'arm_emcraft_sf2',
38
'arm_integratorcp',
39
+ 'arm_microbit',
40
'arm_orangepi',
41
'arm_quanta_gsj',
42
'arm_raspi2',
43
diff --git a/tests/functional/test_arm_microbit.py b/tests/functional/test_arm_microbit.py
44
new file mode 100755
34
index XXXXXXX..XXXXXXX
45
index XXXXXXX..XXXXXXX
35
--- /dev/null
46
--- /dev/null
36
+++ b/target/arm/sme.decode
47
+++ b/tests/functional/test_arm_microbit.py
37
@@ -XXX,XX +XXX,XX @@
48
@@ -XXX,XX +XXX,XX @@
38
+# AArch64 SME instruction descriptions
49
+#!/usr/bin/env python3
39
+#
50
+#
40
+# Copyright (c) 2022 Linaro, Ltd
51
+# SPDX-License-Identifier: GPL-2.0-or-later
41
+#
52
+#
42
+# This library is free software; you can redistribute it and/or
53
+# Copyright 2025, The QEMU Project Developers.
43
+# modify it under the terms of the GNU Lesser General Public
44
+# License as published by the Free Software Foundation; either
45
+# version 2.1 of the License, or (at your option) any later version.
46
+#
54
+#
47
+# This library is distributed in the hope that it will be useful,
55
+# A functional test that runs MicroPython on the arm microbit machine.
48
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
49
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
50
+# Lesser General Public License for more details.
51
+#
52
+# You should have received a copy of the GNU Lesser General Public
53
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
54
+
56
+
55
+#
57
+from qemu_test import QemuSystemTest, Asset, exec_command_and_wait_for_pattern
56
+# This file is processed by scripts/decodetree.py
58
+from qemu_test import wait_for_console_pattern
57
+#
58
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/target/arm/translate-a64.c
61
+++ b/target/arm/translate-a64.c
62
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
63
}
64
65
switch (extract32(insn, 25, 4)) {
66
- case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
67
+ case 0x0:
68
+ if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) {
69
+ unallocated_encoding(s);
70
+ }
71
+ break;
72
+ case 0x1: case 0x3: /* UNALLOCATED */
73
unallocated_encoding(s);
74
break;
75
case 0x2:
76
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
77
new file mode 100644
78
index XXXXXXX..XXXXXXX
79
--- /dev/null
80
+++ b/target/arm/translate-sme.c
81
@@ -XXX,XX +XXX,XX @@
82
+/*
83
+ * AArch64 SME translation
84
+ *
85
+ * Copyright (c) 2022 Linaro, Ltd
86
+ *
87
+ * This library is free software; you can redistribute it and/or
88
+ * modify it under the terms of the GNU Lesser General Public
89
+ * License as published by the Free Software Foundation; either
90
+ * version 2.1 of the License, or (at your option) any later version.
91
+ *
92
+ * This library is distributed in the hope that it will be useful,
93
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
94
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
95
+ * Lesser General Public License for more details.
96
+ *
97
+ * You should have received a copy of the GNU Lesser General Public
98
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
99
+ */
100
+
101
+#include "qemu/osdep.h"
102
+#include "cpu.h"
103
+#include "tcg/tcg-op.h"
104
+#include "tcg/tcg-op-gvec.h"
105
+#include "tcg/tcg-gvec-desc.h"
106
+#include "translate.h"
107
+#include "exec/helper-gen.h"
108
+#include "translate-a64.h"
109
+#include "fpu/softfloat.h"
110
+
59
+
111
+
60
+
112
+/*
61
+class MicrobitMachine(QemuSystemTest):
113
+ * Include the generated decoder.
114
+ */
115
+
62
+
116
+#include "decode-sme.c.inc"
63
+ ASSET_MICRO = Asset('https://ozlabs.org/~joel/microbit-micropython.hex',
117
diff --git a/target/arm/meson.build b/target/arm/meson.build
64
+ '021641f93dfb11767d4978dbb3ca7f475d1b13c69e7f4aec3382f212636bffd6')
118
index XXXXXXX..XXXXXXX 100644
65
+
119
--- a/target/arm/meson.build
66
+ def test_arm_microbit(self):
120
+++ b/target/arm/meson.build
67
+ self.set_machine('microbit')
121
@@ -XXX,XX +XXX,XX @@
68
+
122
gen = [
69
+ micropython = self.ASSET_MICRO.fetch()
123
decodetree.process('sve.decode', extra_args: '--decode=disas_sve'),
70
+ self.vm.set_console()
124
+ decodetree.process('sme.decode', extra_args: '--decode=disas_sme'),
71
+ self.vm.add_args('-device', f'loader,file={micropython}')
125
decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'),
72
+ self.vm.launch()
126
decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'),
73
+ wait_for_console_pattern(self, 'Type "help()" for more information.')
127
decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'),
74
+ exec_command_and_wait_for_pattern(self, 'import machine as mch', '>>>')
128
@@ -XXX,XX +XXX,XX @@ arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
75
+ exec_command_and_wait_for_pattern(self, 'mch.reset()', 'MicroPython')
129
'sme_helper.c',
76
+ wait_for_console_pattern(self, '>>>')
130
'translate-a64.c',
77
+
131
'translate-sve.c',
78
+if __name__ == '__main__':
132
+ 'translate-sme.c',
79
+ QemuSystemTest.main()
133
))
134
135
arm_softmmu_ss = ss.source_set()
136
--
80
--
137
2.25.1
81
2.34.1
82
83
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
The pseudocode ResetSVEState() does:
2
FPSR = ZeroExtend(0x0800009f<31:0>, 64);
3
but QEMU's arm_reset_sve_state() called vfp_set_fpcr() by accident.
2
4
3
We can handle both exception entry and exception return by
5
Before the advent of FEAT_AFP, this was only setting a collection of
4
hooking into aarch64_sve_change_el.
6
RES0 bits, which vfp_set_fpsr() would then ignore, so the only effect
7
was that we didn't actually set the FPSR the way we are supposed to
8
do. Once FEAT_AFP is implemented, setting the bottom bits of FPSR
9
will change the floating point behaviour.
5
10
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Call vfp_set_fpsr(), as we ought to.
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
8
Message-id: 20220708151540.18136-32-richard.henderson@linaro.org
13
(Note for stable backports: commit 7f2a01e7368f9 moved this function
14
from sme_helper.c to helper.c, but it had the same bug before the
15
move too.)
16
17
Cc: qemu-stable@nongnu.org
18
Fixes: f84734b87461 ("target/arm: Implement SMSTART, SMSTOP")
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
19
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
20
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
21
Message-id: 20250124162836.2332150-4-peter.maydell@linaro.org
10
---
22
---
11
target/arm/helper.c | 15 +++++++++++++--
23
target/arm/helper.c | 2 +-
12
1 file changed, 13 insertions(+), 2 deletions(-)
24
1 file changed, 1 insertion(+), 1 deletion(-)
13
25
14
diff --git a/target/arm/helper.c b/target/arm/helper.c
26
diff --git a/target/arm/helper.c b/target/arm/helper.c
15
index XXXXXXX..XXXXXXX 100644
27
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper.c
28
--- a/target/arm/helper.c
17
+++ b/target/arm/helper.c
29
+++ b/target/arm/helper.c
18
@@ -XXX,XX +XXX,XX @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
30
@@ -XXX,XX +XXX,XX @@ static void arm_reset_sve_state(CPUARMState *env)
19
return;
31
memset(env->vfp.zregs, 0, sizeof(env->vfp.zregs));
20
}
32
/* Recall that FFR is stored as pregs[16]. */
21
33
memset(env->vfp.pregs, 0, sizeof(env->vfp.pregs));
22
+ old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
34
- vfp_set_fpcr(env, 0x0800009f);
23
+ new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
35
+ vfp_set_fpsr(env, 0x0800009f);
24
+
36
}
25
+ /*
37
26
+ * Both AArch64.TakeException and AArch64.ExceptionReturn
38
void aarch64_set_svcr(CPUARMState *env, uint64_t new, uint64_t mask)
27
+ * invoke ResetSVEState when taking an exception from, or
28
+ * returning to, AArch32 state when PSTATE.SM is enabled.
29
+ */
30
+ if (old_a64 != new_a64 && FIELD_EX64(env->svcr, SVCR, SM)) {
31
+ arm_reset_sve_state(env);
32
+ return;
33
+ }
34
+
35
/*
36
* DDI0584A.d sec 3.2: "If SVE instructions are disabled or trapped
37
* at ELx, or not available because the EL is in AArch32 state, then
38
@@ -XXX,XX +XXX,XX @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
39
* we already have the correct register contents when encountering the
40
* vq0->vq0 transition between EL0->EL1.
41
*/
42
- old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
43
old_len = (old_a64 && !sve_exception_el(env, old_el)
44
? sve_vqm1_for_el(env, old_el) : 0);
45
- new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
46
new_len = (new_a64 && !sve_exception_el(env, new_el)
47
? sve_vqm1_for_el(env, new_el) : 0);
48
49
--
39
--
50
2.25.1
40
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Use the FPSR_ named constants in vfp_exceptbits_from_host(),
2
rather than hardcoded magic numbers.
2
3
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-46-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20250124162836.2332150-5-peter.maydell@linaro.org
7
---
7
---
8
linux-user/elfload.c | 20 ++++++++++++++++++++
8
target/arm/vfp_helper.c | 12 ++++++------
9
1 file changed, 20 insertions(+)
9
1 file changed, 6 insertions(+), 6 deletions(-)
10
10
11
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
11
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/linux-user/elfload.c
13
--- a/target/arm/vfp_helper.c
14
+++ b/linux-user/elfload.c
14
+++ b/target/arm/vfp_helper.c
15
@@ -XXX,XX +XXX,XX @@ enum {
15
@@ -XXX,XX +XXX,XX @@ static inline int vfp_exceptbits_from_host(int host_bits)
16
ARM_HWCAP2_A64_RNG = 1 << 16,
16
int target_bits = 0;
17
ARM_HWCAP2_A64_BTI = 1 << 17,
17
18
ARM_HWCAP2_A64_MTE = 1 << 18,
18
if (host_bits & float_flag_invalid) {
19
+ ARM_HWCAP2_A64_ECV = 1 << 19,
19
- target_bits |= 1;
20
+ ARM_HWCAP2_A64_AFP = 1 << 20,
20
+ target_bits |= FPSR_IOC;
21
+ ARM_HWCAP2_A64_RPRES = 1 << 21,
21
}
22
+ ARM_HWCAP2_A64_MTE3 = 1 << 22,
22
if (host_bits & float_flag_divbyzero) {
23
+ ARM_HWCAP2_A64_SME = 1 << 23,
23
- target_bits |= 2;
24
+ ARM_HWCAP2_A64_SME_I16I64 = 1 << 24,
24
+ target_bits |= FPSR_DZC;
25
+ ARM_HWCAP2_A64_SME_F64F64 = 1 << 25,
25
}
26
+ ARM_HWCAP2_A64_SME_I8I32 = 1 << 26,
26
if (host_bits & float_flag_overflow) {
27
+ ARM_HWCAP2_A64_SME_F16F32 = 1 << 27,
27
- target_bits |= 4;
28
+ ARM_HWCAP2_A64_SME_B16F32 = 1 << 28,
28
+ target_bits |= FPSR_OFC;
29
+ ARM_HWCAP2_A64_SME_F32F32 = 1 << 29,
29
}
30
+ ARM_HWCAP2_A64_SME_FA64 = 1 << 30,
30
if (host_bits & (float_flag_underflow | float_flag_output_denormal)) {
31
};
31
- target_bits |= 8;
32
32
+ target_bits |= FPSR_UFC;
33
#define ELF_HWCAP get_elf_hwcap()
33
}
34
@@ -XXX,XX +XXX,XX @@ static uint32_t get_elf_hwcap2(void)
34
if (host_bits & float_flag_inexact) {
35
GET_FEATURE_ID(aa64_rndr, ARM_HWCAP2_A64_RNG);
35
- target_bits |= 0x10;
36
GET_FEATURE_ID(aa64_bti, ARM_HWCAP2_A64_BTI);
36
+ target_bits |= FPSR_IXC;
37
GET_FEATURE_ID(aa64_mte, ARM_HWCAP2_A64_MTE);
37
}
38
+ GET_FEATURE_ID(aa64_sme, (ARM_HWCAP2_A64_SME |
38
if (host_bits & float_flag_input_denormal) {
39
+ ARM_HWCAP2_A64_SME_F32F32 |
39
- target_bits |= 0x80;
40
+ ARM_HWCAP2_A64_SME_B16F32 |
40
+ target_bits |= FPSR_IDC;
41
+ ARM_HWCAP2_A64_SME_F16F32 |
41
}
42
+ ARM_HWCAP2_A64_SME_I8I32));
42
return target_bits;
43
+ GET_FEATURE_ID(aa64_sme_f64f64, ARM_HWCAP2_A64_SME_F64F64);
44
+ GET_FEATURE_ID(aa64_sme_i16i64, ARM_HWCAP2_A64_SME_I16I64);
45
+ GET_FEATURE_ID(aa64_sme_fa64, ARM_HWCAP2_A64_SME_FA64);
46
47
return hwcaps;
48
}
43
}
49
--
44
--
50
2.25.1
45
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
In vfp_exceptbits_from_host(), we accumulate the FPSR flags in
2
an "int", and our return type is also "int". However, the only
3
callsite returns the same information as a uint32_t, and
4
more generally we handle FPSR values in the code as uint32_t,
5
not int. Bring this function in to line with that convention.
2
6
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
There is no behaviour change because none of the FPSR bits
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
we set in this function are bit 31. The input argument to
5
Message-id: 20220708151540.18136-35-richard.henderson@linaro.org
9
the function remains 'int' because that is the return type
10
of the softfloat get_float_exception_flags().
11
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
Message-id: 20250124162836.2332150-6-peter.maydell@linaro.org
7
---
15
---
8
linux-user/aarch64/cpu_loop.c | 9 +++++++++
16
target/arm/vfp_helper.c | 4 ++--
9
1 file changed, 9 insertions(+)
17
1 file changed, 2 insertions(+), 2 deletions(-)
10
18
11
diff --git a/linux-user/aarch64/cpu_loop.c b/linux-user/aarch64/cpu_loop.c
19
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
12
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
13
--- a/linux-user/aarch64/cpu_loop.c
21
--- a/target/arm/vfp_helper.c
14
+++ b/linux-user/aarch64/cpu_loop.c
22
+++ b/target/arm/vfp_helper.c
15
@@ -XXX,XX +XXX,XX @@ void cpu_loop(CPUARMState *env)
23
@@ -XXX,XX +XXX,XX @@
16
24
#ifdef CONFIG_TCG
17
switch (trapnr) {
25
18
case EXCP_SWI:
26
/* Convert host exception flags to vfp form. */
19
+ /*
27
-static inline int vfp_exceptbits_from_host(int host_bits)
20
+ * On syscall, PSTATE.ZA is preserved, along with the ZA matrix.
28
+static inline uint32_t vfp_exceptbits_from_host(int host_bits)
21
+ * PSTATE.SM is cleared, per SMSTOP, which does ResetSVEState.
29
{
22
+ */
30
- int target_bits = 0;
23
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
31
+ uint32_t target_bits = 0;
24
+ env->svcr = FIELD_DP64(env->svcr, SVCR, SM, 0);
32
25
+ arm_rebuild_hflags(env);
33
if (host_bits & float_flag_invalid) {
26
+ arm_reset_sve_state(env);
34
target_bits |= FPSR_IOC;
27
+ }
28
ret = do_syscall(env,
29
env->xregs[8],
30
env->xregs[0],
31
--
35
--
32
2.25.1
36
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
We want to split the existing fp_status in the Arm CPUState into
2
separate float_status fields for AArch32 and AArch64. (This is
3
because new control bits defined by FEAT_AFP only have an effect for
4
AArch64, not AArch32.) To make this split we will:
5
* define new fp_status_a32 and fp_status_a64 which have
6
identical behaviour to the existing fp_status
7
* move existing uses of fp_status to fp_status_a32 or
8
fp_status_a64 as appropriate
9
* delete the old fp_status when it has no uses left
2
10
3
Enable SME, TPIDR2_EL0, and FA64 if supported by the cpu.
11
In this patch we add the new float_status fields.
4
12
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
13
We will also need to split fp_status_f16, but we will do that
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
as a separate series of patches.
7
Message-id: 20220708151540.18136-45-richard.henderson@linaro.org
15
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
18
Message-id: 20250124162836.2332150-7-peter.maydell@linaro.org
9
---
19
---
10
target/arm/cpu.c | 11 +++++++++++
20
target/arm/cpu.h | 4 ++++
11
1 file changed, 11 insertions(+)
21
target/arm/tcg/translate.h | 12 ++++++++++++
22
target/arm/cpu.c | 2 ++
23
target/arm/vfp_helper.c | 12 ++++++++++++
24
4 files changed, 30 insertions(+)
12
25
26
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/arm/cpu.h
29
+++ b/target/arm/cpu.h
30
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
31
/* There are a number of distinct float control structures:
32
*
33
* fp_status: is the "normal" fp status.
34
+ * fp_status_a32: is the "normal" fp status for AArch32 insns
35
+ * fp_status_a64: is the "normal" fp status for AArch64 insns
36
* fp_status_fp16: used for half-precision calculations
37
* standard_fp_status : the ARM "Standard FPSCR Value"
38
* standard_fp_status_fp16 : used for half-precision
39
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
40
* an explicit FPSCR read.
41
*/
42
float_status fp_status;
43
+ float_status fp_status_a32;
44
+ float_status fp_status_a64;
45
float_status fp_status_f16;
46
float_status standard_fp_status;
47
float_status standard_fp_status_f16;
48
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/arm/tcg/translate.h
51
+++ b/target/arm/tcg/translate.h
52
@@ -XXX,XX +XXX,XX @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb)
53
*/
54
typedef enum ARMFPStatusFlavour {
55
FPST_FPCR,
56
+ FPST_A32,
57
+ FPST_A64,
58
FPST_FPCR_F16,
59
FPST_STD,
60
FPST_STD_F16,
61
@@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour {
62
*
63
* FPST_FPCR
64
* for non-FP16 operations controlled by the FPCR
65
+ * FPST_A32
66
+ * for AArch32 non-FP16 operations controlled by the FPCR
67
+ * FPST_A64
68
+ * for AArch64 non-FP16 operations controlled by the FPCR
69
* FPST_FPCR_F16
70
* for operations controlled by the FPCR where FPCR.FZ16 is to be used
71
* FPST_STD
72
@@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
73
case FPST_FPCR:
74
offset = offsetof(CPUARMState, vfp.fp_status);
75
break;
76
+ case FPST_A32:
77
+ offset = offsetof(CPUARMState, vfp.fp_status_a32);
78
+ break;
79
+ case FPST_A64:
80
+ offset = offsetof(CPUARMState, vfp.fp_status_a64);
81
+ break;
82
case FPST_FPCR_F16:
83
offset = offsetof(CPUARMState, vfp.fp_status_f16);
84
break;
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
85
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
14
index XXXXXXX..XXXXXXX 100644
86
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/cpu.c
87
--- a/target/arm/cpu.c
16
+++ b/target/arm/cpu.c
88
+++ b/target/arm/cpu.c
17
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
89
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
18
CPACR_EL1, ZEN, 3);
90
set_default_nan_mode(1, &env->vfp.standard_fp_status);
19
env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
91
set_default_nan_mode(1, &env->vfp.standard_fp_status_f16);
92
arm_set_default_fp_behaviours(&env->vfp.fp_status);
93
+ arm_set_default_fp_behaviours(&env->vfp.fp_status_a32);
94
+ arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
95
arm_set_default_fp_behaviours(&env->vfp.standard_fp_status);
96
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16);
97
arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16);
98
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/target/arm/vfp_helper.c
101
+++ b/target/arm/vfp_helper.c
102
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
103
uint32_t i;
104
105
i = get_float_exception_flags(&env->vfp.fp_status);
106
+ i |= get_float_exception_flags(&env->vfp.fp_status_a32);
107
+ i |= get_float_exception_flags(&env->vfp.fp_status_a64);
108
i |= get_float_exception_flags(&env->vfp.standard_fp_status);
109
/* FZ16 does not generate an input denormal exception. */
110
i |= (get_float_exception_flags(&env->vfp.fp_status_f16)
111
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
112
* be the architecturally up-to-date exception flag information first.
113
*/
114
set_float_exception_flags(0, &env->vfp.fp_status);
115
+ set_float_exception_flags(0, &env->vfp.fp_status_a32);
116
+ set_float_exception_flags(0, &env->vfp.fp_status_a64);
117
set_float_exception_flags(0, &env->vfp.fp_status_f16);
118
set_float_exception_flags(0, &env->vfp.standard_fp_status);
119
set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
120
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
121
break;
20
}
122
}
21
+ /* and for SME instructions, with default vector length, and TPIDR2 */
123
set_float_rounding_mode(i, &env->vfp.fp_status);
22
+ if (cpu_isar_feature(aa64_sme, cpu)) {
124
+ set_float_rounding_mode(i, &env->vfp.fp_status_a32);
23
+ env->cp15.sctlr_el[1] |= SCTLR_EnTP2;
125
+ set_float_rounding_mode(i, &env->vfp.fp_status_a64);
24
+ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
126
set_float_rounding_mode(i, &env->vfp.fp_status_f16);
25
+ CPACR_EL1, SMEN, 3);
127
}
26
+ env->vfp.smcr_el[1] = cpu->sme_default_vq - 1;
128
if (changed & FPCR_FZ16) {
27
+ if (cpu_isar_feature(aa64_sme_fa64, cpu)) {
129
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
28
+ env->vfp.smcr_el[1] = FIELD_DP64(env->vfp.smcr_el[1],
130
bool ftz_enabled = val & FPCR_FZ;
29
+ SMCR, FA64, 1);
131
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status);
30
+ }
132
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status);
31
+ }
133
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
32
/*
134
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
33
* Enable 48-bit address space (TODO: take reserved_va into account).
135
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64);
34
* Enable TBI0 but not TBI1.
136
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a64);
137
}
138
if (changed & FPCR_DN) {
139
bool dnan_enabled = val & FPCR_DN;
140
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status);
141
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32);
142
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64);
143
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16);
144
}
145
}
35
--
146
--
36
2.25.1
147
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Switch from vfp.fp_status to vfp.fp_status_a64 for helpers which:
2
* directly reference an fp_status field
3
* are called only from the A64 decoder
4
* are not called inside a set_rmode/restore_rmode sequence
2
5
3
Move the checks out of the parsing loop and into the
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
restore function. This more closely mirrors the code
7
Message-id: 20250124162836.2332150-8-peter.maydell@linaro.org
5
structure in the kernel, and is slightly clearer.
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
target/arm/tcg/sme_helper.c | 2 +-
11
target/arm/tcg/vec_helper.c | 8 ++++----
12
2 files changed, 5 insertions(+), 5 deletions(-)
6
13
7
Reject rather than silently skip incorrect VL and SVE record sizes,
14
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
8
bringing our checks in to line with those the kernel does.
9
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20220708151540.18136-40-richard.henderson@linaro.org
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
---
15
linux-user/aarch64/signal.c | 51 +++++++++++++++++++++++++------------
16
1 file changed, 35 insertions(+), 16 deletions(-)
17
18
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
19
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
20
--- a/linux-user/aarch64/signal.c
16
--- a/target/arm/tcg/sme_helper.c
21
+++ b/linux-user/aarch64/signal.c
17
+++ b/target/arm/tcg/sme_helper.c
22
@@ -XXX,XX +XXX,XX @@ static void target_restore_fpsimd_record(CPUARMState *env,
18
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
23
}
19
* round-to-odd -- see above.
20
*/
21
fpst_f16 = env->vfp.fp_status_f16;
22
- fpst_std = env->vfp.fp_status;
23
+ fpst_std = env->vfp.fp_status_a64;
24
set_default_nan_mode(true, &fpst_std);
25
set_default_nan_mode(true, &fpst_f16);
26
fpst_odd = fpst_std;
27
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/tcg/vec_helper.c
30
+++ b/target/arm/tcg/vec_helper.c
31
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
32
void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
33
CPUARMState *env, uint32_t desc)
34
{
35
- do_fmlal(vd, vn, vm, &env->vfp.fp_status, desc,
36
+ do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, desc,
37
get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
24
}
38
}
25
39
26
-static void target_restore_sve_record(CPUARMState *env,
40
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
27
- struct target_sve_context *sve, int vq)
41
intptr_t i, oprsz = simd_oprsz(desc);
28
+static bool target_restore_sve_record(CPUARMState *env,
42
uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15;
29
+ struct target_sve_context *sve,
43
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
30
+ int size)
44
- float_status *status = &env->vfp.fp_status;
45
+ float_status *status = &env->vfp.fp_status_a64;
46
bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16);
47
48
for (i = 0; i < oprsz; i += sizeof(float32)) {
49
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
50
void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
51
CPUARMState *env, uint32_t desc)
31
{
52
{
32
- int i, j;
53
- do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status, desc,
33
+ int i, j, vl, vq;
54
+ do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, desc,
34
55
get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
35
- /* Note that SVE regs are stored as a byte stream, with each byte element
36
+ if (!cpu_isar_feature(aa64_sve, env_archcpu(env))) {
37
+ return false;
38
+ }
39
+
40
+ __get_user(vl, &sve->vl);
41
+ vq = sve_vq(env);
42
+
43
+ /* Reject mismatched VL. */
44
+ if (vl != vq * TARGET_SVE_VQ_BYTES) {
45
+ return false;
46
+ }
47
+
48
+ /* Accept empty record -- used to clear PSTATE.SM. */
49
+ if (size <= sizeof(*sve)) {
50
+ return true;
51
+ }
52
+
53
+ /* Reject non-empty but incomplete record. */
54
+ if (size < TARGET_SVE_SIG_CONTEXT_SIZE(vq)) {
55
+ return false;
56
+ }
57
+
58
+ /*
59
+ * Note that SVE regs are stored as a byte stream, with each byte element
60
* at a subsequent address. This corresponds to a little-endian load
61
* of our 64-bit hunks.
62
*/
63
@@ -XXX,XX +XXX,XX @@ static void target_restore_sve_record(CPUARMState *env,
64
}
65
}
66
}
67
+ return true;
68
}
56
}
69
57
70
static int target_restore_sigframe(CPUARMState *env,
58
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
71
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
59
uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15;
72
struct target_sve_context *sve = NULL;
60
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
73
uint64_t extra_datap = 0;
61
intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16);
74
bool used_extra = false;
62
- float_status *status = &env->vfp.fp_status;
75
- int vq = 0, sve_size = 0;
63
+ float_status *status = &env->vfp.fp_status_a64;
76
+ int sve_size = 0;
64
bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16);
77
65
78
target_restore_general_frame(env, sf);
66
for (i = 0; i < oprsz; i += 16) {
79
80
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
81
if (sve || size < sizeof(struct target_sve_context)) {
82
goto err;
83
}
84
- if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
85
- vq = sve_vq(env);
86
- sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
87
- if (size == sve_size) {
88
- sve = (struct target_sve_context *)ctx;
89
- break;
90
- }
91
- }
92
- goto err;
93
+ sve = (struct target_sve_context *)ctx;
94
+ sve_size = size;
95
+ break;
96
97
case TARGET_EXTRA_MAGIC:
98
if (extra || size != sizeof(struct target_extra_context)) {
99
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
100
}
101
102
/* SVE data, if present, overwrites FPSIMD data. */
103
- if (sve) {
104
- target_restore_sve_record(env, sve, vq);
105
+ if (sve && !target_restore_sve_record(env, sve, sve_size)) {
106
+ goto err;
107
}
108
unlock_user(extra, extra_datap, 0);
109
return 0;
110
--
67
--
111
2.25.1
68
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
In is_ebf(), we might be called for A64 or A32, but we have
2
the CPUARMState* so we can select fp_status_a64 or
3
fp_status_a32 accordingly.
2
4
3
This is an SVE instruction that operates using the SVE vector
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
length but that it is present only if SME is implemented.
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/arm/tcg/vec_helper.c | 2 +-
9
1 file changed, 1 insertion(+), 1 deletion(-)
5
10
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-31-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/helper.h | 18 +++++++
12
target/arm/sve.decode | 5 ++
13
target/arm/translate-sve.c | 102 +++++++++++++++++++++++++++++++++++++
14
target/arm/vec_helper.c | 24 +++++++++
15
4 files changed, 149 insertions(+)
16
17
diff --git a/target/arm/helper.h b/target/arm/helper.h
18
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper.h
13
--- a/target/arm/tcg/vec_helper.c
20
+++ b/target/arm/helper.h
14
+++ b/target/arm/tcg/vec_helper.c
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG,
15
@@ -XXX,XX +XXX,XX @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp)
22
DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG,
16
*/
23
void, ptr, ptr, ptr, ptr, ptr, i32)
17
bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF;
24
18
25
+DEF_HELPER_FLAGS_5(gvec_sclamp_b, TCG_CALL_NO_RWG,
19
- *statusp = env->vfp.fp_status;
26
+ void, ptr, ptr, ptr, ptr, i32)
20
+ *statusp = is_a64(env) ? env->vfp.fp_status_a64 : env->vfp.fp_status_a32;
27
+DEF_HELPER_FLAGS_5(gvec_sclamp_h, TCG_CALL_NO_RWG,
21
set_default_nan_mode(true, statusp);
28
+ void, ptr, ptr, ptr, ptr, i32)
22
29
+DEF_HELPER_FLAGS_5(gvec_sclamp_s, TCG_CALL_NO_RWG,
23
if (ebf) {
30
+ void, ptr, ptr, ptr, ptr, i32)
31
+DEF_HELPER_FLAGS_5(gvec_sclamp_d, TCG_CALL_NO_RWG,
32
+ void, ptr, ptr, ptr, ptr, i32)
33
+
34
+DEF_HELPER_FLAGS_5(gvec_uclamp_b, TCG_CALL_NO_RWG,
35
+ void, ptr, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_5(gvec_uclamp_h, TCG_CALL_NO_RWG,
37
+ void, ptr, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG,
39
+ void, ptr, ptr, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG,
41
+ void, ptr, ptr, ptr, ptr, i32)
42
+
43
#ifdef TARGET_AARCH64
44
#include "helper-a64.h"
45
#include "helper-sve.h"
46
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/sve.decode
49
+++ b/target/arm/sve.decode
50
@@ -XXX,XX +XXX,XX @@ PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \
51
@psel esz=2 imm=%psel_imm_s
52
PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \
53
@psel esz=3 imm=%psel_imm_d
54
+
55
+### SVE clamp
56
+
57
+SCLAMP 01000100 .. 0 ..... 110000 ..... ..... @rda_rn_rm
58
+UCLAMP 01000100 .. 0 ..... 110001 ..... ..... @rda_rn_rm
59
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
60
index XXXXXXX..XXXXXXX 100644
61
--- a/target/arm/translate-sve.c
62
+++ b/target/arm/translate-sve.c
63
@@ -XXX,XX +XXX,XX @@ static bool trans_PSEL(DisasContext *s, arg_psel *a)
64
tcg_temp_free_ptr(ptr);
65
return true;
66
}
67
+
68
+static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
69
+{
70
+ tcg_gen_smax_i32(d, a, n);
71
+ tcg_gen_smin_i32(d, d, m);
72
+}
73
+
74
+static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
75
+{
76
+ tcg_gen_smax_i64(d, a, n);
77
+ tcg_gen_smin_i64(d, d, m);
78
+}
79
+
80
+static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
81
+ TCGv_vec m, TCGv_vec a)
82
+{
83
+ tcg_gen_smax_vec(vece, d, a, n);
84
+ tcg_gen_smin_vec(vece, d, d, m);
85
+}
86
+
87
+static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
88
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
89
+{
90
+ static const TCGOpcode vecop[] = {
91
+ INDEX_op_smin_vec, INDEX_op_smax_vec, 0
92
+ };
93
+ static const GVecGen4 ops[4] = {
94
+ { .fniv = gen_sclamp_vec,
95
+ .fno = gen_helper_gvec_sclamp_b,
96
+ .opt_opc = vecop,
97
+ .vece = MO_8 },
98
+ { .fniv = gen_sclamp_vec,
99
+ .fno = gen_helper_gvec_sclamp_h,
100
+ .opt_opc = vecop,
101
+ .vece = MO_16 },
102
+ { .fni4 = gen_sclamp_i32,
103
+ .fniv = gen_sclamp_vec,
104
+ .fno = gen_helper_gvec_sclamp_s,
105
+ .opt_opc = vecop,
106
+ .vece = MO_32 },
107
+ { .fni8 = gen_sclamp_i64,
108
+ .fniv = gen_sclamp_vec,
109
+ .fno = gen_helper_gvec_sclamp_d,
110
+ .opt_opc = vecop,
111
+ .vece = MO_64,
112
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
113
+ };
114
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
115
+}
116
+
117
+TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a)
118
+
119
+static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
120
+{
121
+ tcg_gen_umax_i32(d, a, n);
122
+ tcg_gen_umin_i32(d, d, m);
123
+}
124
+
125
+static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
126
+{
127
+ tcg_gen_umax_i64(d, a, n);
128
+ tcg_gen_umin_i64(d, d, m);
129
+}
130
+
131
+static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
132
+ TCGv_vec m, TCGv_vec a)
133
+{
134
+ tcg_gen_umax_vec(vece, d, a, n);
135
+ tcg_gen_umin_vec(vece, d, d, m);
136
+}
137
+
138
+static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
139
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
140
+{
141
+ static const TCGOpcode vecop[] = {
142
+ INDEX_op_umin_vec, INDEX_op_umax_vec, 0
143
+ };
144
+ static const GVecGen4 ops[4] = {
145
+ { .fniv = gen_uclamp_vec,
146
+ .fno = gen_helper_gvec_uclamp_b,
147
+ .opt_opc = vecop,
148
+ .vece = MO_8 },
149
+ { .fniv = gen_uclamp_vec,
150
+ .fno = gen_helper_gvec_uclamp_h,
151
+ .opt_opc = vecop,
152
+ .vece = MO_16 },
153
+ { .fni4 = gen_uclamp_i32,
154
+ .fniv = gen_uclamp_vec,
155
+ .fno = gen_helper_gvec_uclamp_s,
156
+ .opt_opc = vecop,
157
+ .vece = MO_32 },
158
+ { .fni8 = gen_uclamp_i64,
159
+ .fniv = gen_uclamp_vec,
160
+ .fno = gen_helper_gvec_uclamp_d,
161
+ .opt_opc = vecop,
162
+ .vece = MO_64,
163
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
164
+ };
165
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
166
+}
167
+
168
+TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a)
169
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
170
index XXXXXXX..XXXXXXX 100644
171
--- a/target/arm/vec_helper.c
172
+++ b/target/arm/vec_helper.c
173
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm,
174
}
175
clear_tail(d, opr_sz, simd_maxsz(desc));
176
}
177
+
178
+#define DO_CLAMP(NAME, TYPE) \
179
+void HELPER(NAME)(void *d, void *n, void *m, void *a, uint32_t desc) \
180
+{ \
181
+ intptr_t i, opr_sz = simd_oprsz(desc); \
182
+ for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
183
+ TYPE aa = *(TYPE *)(a + i); \
184
+ TYPE nn = *(TYPE *)(n + i); \
185
+ TYPE mm = *(TYPE *)(m + i); \
186
+ TYPE dd = MIN(MAX(aa, nn), mm); \
187
+ *(TYPE *)(d + i) = dd; \
188
+ } \
189
+ clear_tail(d, opr_sz, simd_maxsz(desc)); \
190
+}
191
+
192
+DO_CLAMP(gvec_sclamp_b, int8_t)
193
+DO_CLAMP(gvec_sclamp_h, int16_t)
194
+DO_CLAMP(gvec_sclamp_s, int32_t)
195
+DO_CLAMP(gvec_sclamp_d, int64_t)
196
+
197
+DO_CLAMP(gvec_uclamp_b, uint8_t)
198
+DO_CLAMP(gvec_uclamp_h, uint16_t)
199
+DO_CLAMP(gvec_uclamp_s, uint32_t)
200
+DO_CLAMP(gvec_uclamp_d, uint64_t)
201
--
24
--
202
2.25.1
25
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Use fp_status_a32 in the vjcvt helper function; this is called only
2
from the A32/T32 decoder and is not used inside a
3
set_rmode/restore_rmode sequence.
2
4
3
This is an SVE instruction that operates using the SVE vector
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
length but that it is present only if SME is implemented.
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20250124162836.2332150-9-peter.maydell@linaro.org
8
---
9
target/arm/vfp_helper.c | 2 +-
10
1 file changed, 1 insertion(+), 1 deletion(-)
5
11
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-30-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/helper-sve.h | 2 ++
12
target/arm/sve.decode | 1 +
13
target/arm/sve_helper.c | 16 ++++++++++++++++
14
target/arm/translate-sve.c | 2 ++
15
4 files changed, 21 insertions(+)
16
17
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
18
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper-sve.h
14
--- a/target/arm/vfp_helper.c
20
+++ b/target/arm/helper-sve.h
15
+++ b/target/arm/vfp_helper.c
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_revh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
16
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(fjcvtzs)(float64 value, float_status *status)
22
17
23
DEF_HELPER_FLAGS_4(sve_revw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
18
uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env)
24
19
{
25
+DEF_HELPER_FLAGS_4(sme_revd_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
20
- uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status);
26
+
21
+ uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status_a32);
27
DEF_HELPER_FLAGS_4(sve_rbit_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
22
uint32_t result = pair;
28
DEF_HELPER_FLAGS_4(sve_rbit_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
23
uint32_t z = (pair >> 32) == 0;
29
DEF_HELPER_FLAGS_4(sve_rbit_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
30
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/sve.decode
33
+++ b/target/arm/sve.decode
34
@@ -XXX,XX +XXX,XX @@ REVB 00000101 .. 1001 00 100 ... ..... ..... @rd_pg_rn
35
REVH 00000101 .. 1001 01 100 ... ..... ..... @rd_pg_rn
36
REVW 00000101 .. 1001 10 100 ... ..... ..... @rd_pg_rn
37
RBIT 00000101 .. 1001 11 100 ... ..... ..... @rd_pg_rn
38
+REVD 00000101 00 1011 10 100 ... ..... ..... @rd_pg_rn_e0
39
40
# SVE vector splice (predicated, destructive)
41
SPLICE 00000101 .. 101 100 100 ... ..... ..... @rdn_pg_rm
42
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/sve_helper.c
45
+++ b/target/arm/sve_helper.c
46
@@ -XXX,XX +XXX,XX @@ DO_ZPZ_D(sve_revh_d, uint64_t, hswap64)
47
48
DO_ZPZ_D(sve_revw_d, uint64_t, wswap64)
49
50
+void HELPER(sme_revd_q)(void *vd, void *vn, void *vg, uint32_t desc)
51
+{
52
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
53
+ uint64_t *d = vd, *n = vn;
54
+ uint8_t *pg = vg;
55
+
56
+ for (i = 0; i < opr_sz; i += 2) {
57
+ if (pg[H1(i)] & 1) {
58
+ uint64_t n0 = n[i + 0];
59
+ uint64_t n1 = n[i + 1];
60
+ d[i + 0] = n1;
61
+ d[i + 1] = n0;
62
+ }
63
+ }
64
+}
65
+
66
DO_ZPZ(sve_rbit_b, uint8_t, H1, revbit8)
67
DO_ZPZ(sve_rbit_h, uint16_t, H1_2, revbit16)
68
DO_ZPZ(sve_rbit_s, uint32_t, H1_4, revbit32)
69
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/target/arm/translate-sve.c
72
+++ b/target/arm/translate-sve.c
73
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
74
TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
75
a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
76
77
+TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0)
78
+
79
TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
80
gen_helper_sve_splice, a, a->esz)
81
24
82
--
25
--
83
2.25.1
26
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
The helpers vfp_cmps, vfp_cmpes, vfp_cmpd, vfp_cmped are used only from
2
the A32 decoder; the A64 decoder uses separate vfp_cmps_a64 etc helpers
3
(because for A64 we update the main NZCV flags and for A32 we update
4
the FPSCR NZCV flags). So we can make these helpers use the fp_status_a32
5
field instead of fp_status.
2
6
3
This is an SVE instruction that operates using the SVE vector
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
length but that it is present only if SME is implemented.
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20250124162836.2332150-10-peter.maydell@linaro.org
10
---
11
target/arm/vfp_helper.c | 4 ++--
12
1 file changed, 2 insertions(+), 2 deletions(-)
5
13
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
14
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-29-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sve.decode | 20 +++++++++++++
12
target/arm/translate-sve.c | 57 ++++++++++++++++++++++++++++++++++++++
13
2 files changed, 77 insertions(+)
14
15
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
16
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sve.decode
16
--- a/target/arm/vfp_helper.c
18
+++ b/target/arm/sve.decode
17
+++ b/target/arm/vfp_helper.c
19
@@ -XXX,XX +XXX,XX @@ BFMLALT_zzxw 01100100 11 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2
18
@@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
20
19
FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \
21
### SVE2 floating-point bfloat16 dot-product (indexed)
20
}
22
BFDOT_zzxz 01100100 01 1 ..... 010000 ..... ..... @rrxr_2 esz=2
21
DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16)
23
+
22
-DO_VFP_cmp(s, float32, float32, fp_status)
24
+### SVE broadcast predicate element
23
-DO_VFP_cmp(d, float64, float64, fp_status)
25
+
24
+DO_VFP_cmp(s, float32, float32, fp_status_a32)
26
+&psel esz pd pn pm rv imm
25
+DO_VFP_cmp(d, float64, float64, fp_status_a32)
27
+%psel_rv 16:2 !function=plus_12
26
#undef DO_VFP_cmp
28
+%psel_imm_b 22:2 19:2
27
29
+%psel_imm_h 22:2 20:1
28
/* Integer to float and float to integer conversions */
30
+%psel_imm_s 22:2
31
+%psel_imm_d 23:1
32
+@psel ........ .. . ... .. .. pn:4 . pm:4 . pd:4 \
33
+ &psel rv=%psel_rv
34
+
35
+PSEL 00100101 .. 1 ..1 .. 01 .... 0 .... 0 .... \
36
+ @psel esz=0 imm=%psel_imm_b
37
+PSEL 00100101 .. 1 .10 .. 01 .... 0 .... 0 .... \
38
+ @psel esz=1 imm=%psel_imm_h
39
+PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \
40
+ @psel esz=2 imm=%psel_imm_s
41
+PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \
42
+ @psel esz=3 imm=%psel_imm_d
43
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/arm/translate-sve.c
46
+++ b/target/arm/translate-sve.c
47
@@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
48
49
TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
50
TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)
51
+
52
+static bool trans_PSEL(DisasContext *s, arg_psel *a)
53
+{
54
+ int vl = vec_full_reg_size(s);
55
+ int pl = pred_gvec_reg_size(s);
56
+ int elements = vl >> a->esz;
57
+ TCGv_i64 tmp, didx, dbit;
58
+ TCGv_ptr ptr;
59
+
60
+ if (!dc_isar_feature(aa64_sme, s)) {
61
+ return false;
62
+ }
63
+ if (!sve_access_check(s)) {
64
+ return true;
65
+ }
66
+
67
+ tmp = tcg_temp_new_i64();
68
+ dbit = tcg_temp_new_i64();
69
+ didx = tcg_temp_new_i64();
70
+ ptr = tcg_temp_new_ptr();
71
+
72
+ /* Compute the predicate element. */
73
+ tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm);
74
+ if (is_power_of_2(elements)) {
75
+ tcg_gen_andi_i64(tmp, tmp, elements - 1);
76
+ } else {
77
+ tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements));
78
+ }
79
+
80
+ /* Extract the predicate byte and bit indices. */
81
+ tcg_gen_shli_i64(tmp, tmp, a->esz);
82
+ tcg_gen_andi_i64(dbit, tmp, 7);
83
+ tcg_gen_shri_i64(didx, tmp, 3);
84
+ if (HOST_BIG_ENDIAN) {
85
+ tcg_gen_xori_i64(didx, didx, 7);
86
+ }
87
+
88
+ /* Load the predicate word. */
89
+ tcg_gen_trunc_i64_ptr(ptr, didx);
90
+ tcg_gen_add_ptr(ptr, ptr, cpu_env);
91
+ tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm));
92
+
93
+ /* Extract the predicate bit and replicate to MO_64. */
94
+ tcg_gen_shr_i64(tmp, tmp, dbit);
95
+ tcg_gen_andi_i64(tmp, tmp, 1);
96
+ tcg_gen_neg_i64(tmp, tmp);
97
+
98
+ /* Apply to either copy the source, or write zeros. */
99
+ tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd),
100
+ pred_full_reg_offset(s, a->pn), tmp, pl, pl);
101
+
102
+ tcg_temp_free_i64(tmp);
103
+ tcg_temp_free_i64(dbit);
104
+ tcg_temp_free_i64(didx);
105
+ tcg_temp_free_ptr(ptr);
106
+ return true;
107
+}
108
--
29
--
109
2.25.1
30
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
In the A32 decoder, use FPST_A32 rather than FPST_FPCR. By
2
doing an automated conversion of the whole file we avoid possibly
3
using more than one fpst value in a set_rmode/op/restore_rmode
4
sequence.
2
5
3
This is SMOPA, SUMOPA, USMOPA_s, UMOPA, for both Int8 and Int16.
6
Patch created with
7
perl -p -i -e 's/FPST_FPCR(?!_)/FPST_A32/g' target/arm/tcg/translate-vfp.c
4
8
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-28-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20250124162836.2332150-11-peter.maydell@linaro.org
9
---
12
---
10
target/arm/helper-sme.h | 16 ++++++++
13
target/arm/tcg/translate-vfp.c | 54 +++++++++++++++++-----------------
11
target/arm/sme.decode | 10 +++++
14
1 file changed, 27 insertions(+), 27 deletions(-)
12
target/arm/sme_helper.c | 82 ++++++++++++++++++++++++++++++++++++++
13
target/arm/translate-sme.c | 10 +++++
14
4 files changed, 118 insertions(+)
15
15
16
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
16
diff --git a/target/arm/tcg/translate-vfp.c b/target/arm/tcg/translate-vfp.c
17
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/helper-sme.h
18
--- a/target/arm/tcg/translate-vfp.c
19
+++ b/target/arm/helper-sme.h
19
+++ b/target/arm/tcg/translate-vfp.c
20
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
20
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
21
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
21
if (sz == 1) {
22
DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG,
22
fpst = fpstatus_ptr(FPST_FPCR_F16);
23
void, ptr, ptr, ptr, ptr, ptr, i32)
23
} else {
24
+DEF_HELPER_FLAGS_6(sme_smopa_s, TCG_CALL_NO_RWG,
24
- fpst = fpstatus_ptr(FPST_FPCR);
25
+ void, ptr, ptr, ptr, ptr, ptr, i32)
25
+ fpst = fpstatus_ptr(FPST_A32);
26
+DEF_HELPER_FLAGS_6(sme_umopa_s, TCG_CALL_NO_RWG,
26
}
27
+ void, ptr, ptr, ptr, ptr, ptr, i32)
27
28
+DEF_HELPER_FLAGS_6(sme_sumopa_s, TCG_CALL_NO_RWG,
28
tcg_rmode = gen_set_rmode(rounding, fpst);
29
+ void, ptr, ptr, ptr, ptr, ptr, i32)
29
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
30
+DEF_HELPER_FLAGS_6(sme_usmopa_s, TCG_CALL_NO_RWG,
30
if (sz == 1) {
31
+ void, ptr, ptr, ptr, ptr, ptr, i32)
31
fpst = fpstatus_ptr(FPST_FPCR_F16);
32
+DEF_HELPER_FLAGS_6(sme_smopa_d, TCG_CALL_NO_RWG,
32
} else {
33
+ void, ptr, ptr, ptr, ptr, ptr, i32)
33
- fpst = fpstatus_ptr(FPST_FPCR);
34
+DEF_HELPER_FLAGS_6(sme_umopa_d, TCG_CALL_NO_RWG,
34
+ fpst = fpstatus_ptr(FPST_A32);
35
+ void, ptr, ptr, ptr, ptr, ptr, i32)
35
}
36
+DEF_HELPER_FLAGS_6(sme_sumopa_d, TCG_CALL_NO_RWG,
36
37
+ void, ptr, ptr, ptr, ptr, ptr, i32)
37
tcg_shift = tcg_constant_i32(0);
38
+DEF_HELPER_FLAGS_6(sme_usmopa_d, TCG_CALL_NO_RWG,
38
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
39
+ void, ptr, ptr, ptr, ptr, ptr, i32)
39
f0 = tcg_temp_new_i32();
40
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
40
f1 = tcg_temp_new_i32();
41
index XXXXXXX..XXXXXXX 100644
41
fd = tcg_temp_new_i32();
42
--- a/target/arm/sme.decode
42
- fpst = fpstatus_ptr(FPST_FPCR);
43
+++ b/target/arm/sme.decode
43
+ fpst = fpstatus_ptr(FPST_A32);
44
@@ -XXX,XX +XXX,XX @@ FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
44
45
45
vfp_load_reg32(f0, vn);
46
BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
46
vfp_load_reg32(f1, vm);
47
FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32
47
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
48
+
48
f0 = tcg_temp_new_i64();
49
+SMOPA_s 1010000 0 10 0 ..... ... ... ..... . 00 .. @op_32
49
f1 = tcg_temp_new_i64();
50
+SUMOPA_s 1010000 0 10 1 ..... ... ... ..... . 00 .. @op_32
50
fd = tcg_temp_new_i64();
51
+USMOPA_s 1010000 1 10 0 ..... ... ... ..... . 00 .. @op_32
51
- fpst = fpstatus_ptr(FPST_FPCR);
52
+UMOPA_s 1010000 1 10 1 ..... ... ... ..... . 00 .. @op_32
52
+ fpst = fpstatus_ptr(FPST_A32);
53
+
53
54
+SMOPA_d 1010000 0 11 0 ..... ... ... ..... . 0 ... @op_64
54
vfp_load_reg64(f0, vn);
55
+SUMOPA_d 1010000 0 11 1 ..... ... ... ..... . 0 ... @op_64
55
vfp_load_reg64(f1, vm);
56
+USMOPA_d 1010000 1 11 0 ..... ... ... ..... . 0 ... @op_64
56
@@ -XXX,XX +XXX,XX @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
57
+UMOPA_d 1010000 1 11 1 ..... ... ... ..... . 0 ... @op_64
57
/* VFNMA, VFNMS */
58
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
58
gen_vfp_negs(vd, vd);
59
index XXXXXXX..XXXXXXX 100644
59
}
60
--- a/target/arm/sme_helper.c
60
- fpst = fpstatus_ptr(FPST_FPCR);
61
+++ b/target/arm/sme_helper.c
61
+ fpst = fpstatus_ptr(FPST_A32);
62
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
62
gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
63
} while (row & 15);
63
vfp_store_reg32(vd, a->vd);
64
}
64
return true;
65
}
65
@@ -XXX,XX +XXX,XX @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
66
+
66
/* VFNMA, VFNMS */
67
+typedef uint64_t IMOPFn(uint64_t, uint64_t, uint64_t, uint8_t, bool);
67
gen_vfp_negd(vd, vd);
68
+
68
}
69
+static inline void do_imopa(uint64_t *za, uint64_t *zn, uint64_t *zm,
69
- fpst = fpstatus_ptr(FPST_FPCR);
70
+ uint8_t *pn, uint8_t *pm,
70
+ fpst = fpstatus_ptr(FPST_A32);
71
+ uint32_t desc, IMOPFn *fn)
71
gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
72
+{
72
vfp_store_reg64(vd, a->vd);
73
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
73
return true;
74
+ bool neg = simd_data(desc);
74
@@ -XXX,XX +XXX,XX @@ static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
75
+
75
76
+ for (row = 0; row < oprsz; ++row) {
76
static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
77
+ uint8_t pa = pn[H1(row)];
77
{
78
+ uint64_t *za_row = &za[tile_vslice_index(row)];
78
- gen_helper_vfp_sqrts(vd, vm, fpstatus_ptr(FPST_FPCR));
79
+ uint64_t n = zn[row];
79
+ gen_helper_vfp_sqrts(vd, vm, fpstatus_ptr(FPST_A32));
80
+
80
}
81
+ for (col = 0; col < oprsz; ++col) {
81
82
+ uint8_t pb = pm[H1(col)];
82
static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
83
+ uint64_t *a = &za_row[col];
83
{
84
+
84
- gen_helper_vfp_sqrtd(vd, vm, fpstatus_ptr(FPST_FPCR));
85
+ *a = fn(n, zm[col], *a, pa & pb, neg);
85
+ gen_helper_vfp_sqrtd(vd, vm, fpstatus_ptr(FPST_A32));
86
+ }
86
}
87
+ }
87
88
+}
88
DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith)
89
+
89
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
90
+#define DEF_IMOP_32(NAME, NTYPE, MTYPE) \
90
return true;
91
+static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
91
}
92
+{ \
92
93
+ uint32_t sum0 = 0, sum1 = 0; \
93
- fpst = fpstatus_ptr(FPST_FPCR);
94
+ /* Apply P to N as a mask, making the inactive elements 0. */ \
94
+ fpst = fpstatus_ptr(FPST_A32);
95
+ n &= expand_pred_b(p); \
95
ahp_mode = get_ahp_flag();
96
+ sum0 += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
96
tmp = tcg_temp_new_i32();
97
+ sum0 += (NTYPE)(n >> 8) * (MTYPE)(m >> 8); \
97
/* The T bit tells us if we want the low or high 16 bits of Vm */
98
+ sum0 += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
98
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
99
+ sum0 += (NTYPE)(n >> 24) * (MTYPE)(m >> 24); \
99
return true;
100
+ sum1 += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
100
}
101
+ sum1 += (NTYPE)(n >> 40) * (MTYPE)(m >> 40); \
101
102
+ sum1 += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
102
- fpst = fpstatus_ptr(FPST_FPCR);
103
+ sum1 += (NTYPE)(n >> 56) * (MTYPE)(m >> 56); \
103
+ fpst = fpstatus_ptr(FPST_A32);
104
+ if (neg) { \
104
ahp_mode = get_ahp_flag();
105
+ sum0 = (uint32_t)a - sum0, sum1 = (uint32_t)(a >> 32) - sum1; \
105
tmp = tcg_temp_new_i32();
106
+ } else { \
106
/* The T bit tells us if we want the low or high 16 bits of Vm */
107
+ sum0 = (uint32_t)a + sum0, sum1 = (uint32_t)(a >> 32) + sum1; \
107
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_b16_f32(DisasContext *s, arg_VCVT_b16_f32 *a)
108
+ } \
108
return true;
109
+ return ((uint64_t)sum1 << 32) | sum0; \
109
}
110
+}
110
111
+
111
- fpst = fpstatus_ptr(FPST_FPCR);
112
+#define DEF_IMOP_64(NAME, NTYPE, MTYPE) \
112
+ fpst = fpstatus_ptr(FPST_A32);
113
+static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
113
tmp = tcg_temp_new_i32();
114
+{ \
114
115
+ uint64_t sum = 0; \
115
vfp_load_reg32(tmp, a->vm);
116
+ /* Apply P to N as a mask, making the inactive elements 0. */ \
116
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
117
+ n &= expand_pred_h(p); \
117
return true;
118
+ sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
118
}
119
+ sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
119
120
+ sum += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
120
- fpst = fpstatus_ptr(FPST_FPCR);
121
+ sum += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
121
+ fpst = fpstatus_ptr(FPST_A32);
122
+ return neg ? a - sum : a + sum; \
122
ahp_mode = get_ahp_flag();
123
+}
123
tmp = tcg_temp_new_i32();
124
+
124
125
+DEF_IMOP_32(smopa_s, int8_t, int8_t)
125
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
126
+DEF_IMOP_32(umopa_s, uint8_t, uint8_t)
126
return true;
127
+DEF_IMOP_32(sumopa_s, int8_t, uint8_t)
127
}
128
+DEF_IMOP_32(usmopa_s, uint8_t, int8_t)
128
129
+
129
- fpst = fpstatus_ptr(FPST_FPCR);
130
+DEF_IMOP_64(smopa_d, int16_t, int16_t)
130
+ fpst = fpstatus_ptr(FPST_A32);
131
+DEF_IMOP_64(umopa_d, uint16_t, uint16_t)
131
ahp_mode = get_ahp_flag();
132
+DEF_IMOP_64(sumopa_d, int16_t, uint16_t)
132
tmp = tcg_temp_new_i32();
133
+DEF_IMOP_64(usmopa_d, uint16_t, int16_t)
133
vm = tcg_temp_new_i64();
134
+
134
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
135
+#define DEF_IMOPH(NAME) \
135
136
+ void HELPER(sme_##NAME)(void *vza, void *vzn, void *vzm, void *vpn, \
136
tmp = tcg_temp_new_i32();
137
+ void *vpm, uint32_t desc) \
137
vfp_load_reg32(tmp, a->vm);
138
+ { do_imopa(vza, vzn, vzm, vpn, vpm, desc, NAME); }
138
- fpst = fpstatus_ptr(FPST_FPCR);
139
+
139
+ fpst = fpstatus_ptr(FPST_A32);
140
+DEF_IMOPH(smopa_s)
140
gen_helper_rints(tmp, tmp, fpst);
141
+DEF_IMOPH(umopa_s)
141
vfp_store_reg32(tmp, a->vd);
142
+DEF_IMOPH(sumopa_s)
142
return true;
143
+DEF_IMOPH(usmopa_s)
143
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
144
+DEF_IMOPH(smopa_d)
144
145
+DEF_IMOPH(umopa_d)
145
tmp = tcg_temp_new_i64();
146
+DEF_IMOPH(sumopa_d)
146
vfp_load_reg64(tmp, a->vm);
147
+DEF_IMOPH(usmopa_d)
147
- fpst = fpstatus_ptr(FPST_FPCR);
148
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
148
+ fpst = fpstatus_ptr(FPST_A32);
149
index XXXXXXX..XXXXXXX 100644
149
gen_helper_rintd(tmp, tmp, fpst);
150
--- a/target/arm/translate-sme.c
150
vfp_store_reg64(tmp, a->vd);
151
+++ b/target/arm/translate-sme.c
151
return true;
152
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_f
152
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
153
153
154
/* TODO: FEAT_EBF16 */
154
tmp = tcg_temp_new_i32();
155
TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
155
vfp_load_reg32(tmp, a->vm);
156
+
156
- fpst = fpstatus_ptr(FPST_FPCR);
157
+TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s)
157
+ fpst = fpstatus_ptr(FPST_A32);
158
+TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s)
158
tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
159
+TRANS_FEAT(SUMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_sumopa_s)
159
gen_helper_rints(tmp, tmp, fpst);
160
+TRANS_FEAT(USMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_usmopa_s)
160
gen_restore_rmode(tcg_rmode, fpst);
161
+
161
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
162
+TRANS_FEAT(SMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_smopa_d)
162
163
+TRANS_FEAT(UMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_umopa_d)
163
tmp = tcg_temp_new_i64();
164
+TRANS_FEAT(SUMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_sumopa_d)
164
vfp_load_reg64(tmp, a->vm);
165
+TRANS_FEAT(USMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_usmopa_d)
165
- fpst = fpstatus_ptr(FPST_FPCR);
166
+ fpst = fpstatus_ptr(FPST_A32);
167
tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
168
gen_helper_rintd(tmp, tmp, fpst);
169
gen_restore_rmode(tcg_rmode, fpst);
170
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
171
172
tmp = tcg_temp_new_i32();
173
vfp_load_reg32(tmp, a->vm);
174
- fpst = fpstatus_ptr(FPST_FPCR);
175
+ fpst = fpstatus_ptr(FPST_A32);
176
gen_helper_rints_exact(tmp, tmp, fpst);
177
vfp_store_reg32(tmp, a->vd);
178
return true;
179
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
180
181
tmp = tcg_temp_new_i64();
182
vfp_load_reg64(tmp, a->vm);
183
- fpst = fpstatus_ptr(FPST_FPCR);
184
+ fpst = fpstatus_ptr(FPST_A32);
185
gen_helper_rintd_exact(tmp, tmp, fpst);
186
vfp_store_reg64(tmp, a->vd);
187
return true;
188
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
189
vm = tcg_temp_new_i32();
190
vd = tcg_temp_new_i64();
191
vfp_load_reg32(vm, a->vm);
192
- gen_helper_vfp_fcvtds(vd, vm, fpstatus_ptr(FPST_FPCR));
193
+ gen_helper_vfp_fcvtds(vd, vm, fpstatus_ptr(FPST_A32));
194
vfp_store_reg64(vd, a->vd);
195
return true;
196
}
197
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
198
vd = tcg_temp_new_i32();
199
vm = tcg_temp_new_i64();
200
vfp_load_reg64(vm, a->vm);
201
- gen_helper_vfp_fcvtsd(vd, vm, fpstatus_ptr(FPST_FPCR));
202
+ gen_helper_vfp_fcvtsd(vd, vm, fpstatus_ptr(FPST_A32));
203
vfp_store_reg32(vd, a->vd);
204
return true;
205
}
206
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
207
208
vm = tcg_temp_new_i32();
209
vfp_load_reg32(vm, a->vm);
210
- fpst = fpstatus_ptr(FPST_FPCR);
211
+ fpst = fpstatus_ptr(FPST_A32);
212
if (a->s) {
213
/* i32 -> f32 */
214
gen_helper_vfp_sitos(vm, vm, fpst);
215
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
216
vm = tcg_temp_new_i32();
217
vd = tcg_temp_new_i64();
218
vfp_load_reg32(vm, a->vm);
219
- fpst = fpstatus_ptr(FPST_FPCR);
220
+ fpst = fpstatus_ptr(FPST_A32);
221
if (a->s) {
222
/* i32 -> f64 */
223
gen_helper_vfp_sitod(vd, vm, fpst);
224
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
225
vd = tcg_temp_new_i32();
226
vfp_load_reg32(vd, a->vd);
227
228
- fpst = fpstatus_ptr(FPST_FPCR);
229
+ fpst = fpstatus_ptr(FPST_A32);
230
shift = tcg_constant_i32(frac_bits);
231
232
/* Switch on op:U:sx bits */
233
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
234
vd = tcg_temp_new_i64();
235
vfp_load_reg64(vd, a->vd);
236
237
- fpst = fpstatus_ptr(FPST_FPCR);
238
+ fpst = fpstatus_ptr(FPST_A32);
239
shift = tcg_constant_i32(frac_bits);
240
241
/* Switch on op:U:sx bits */
242
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
243
return true;
244
}
245
246
- fpst = fpstatus_ptr(FPST_FPCR);
247
+ fpst = fpstatus_ptr(FPST_A32);
248
vm = tcg_temp_new_i32();
249
vfp_load_reg32(vm, a->vm);
250
251
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
252
return true;
253
}
254
255
- fpst = fpstatus_ptr(FPST_FPCR);
256
+ fpst = fpstatus_ptr(FPST_A32);
257
vm = tcg_temp_new_i64();
258
vd = tcg_temp_new_i32();
259
vfp_load_reg64(vm, a->vm);
166
--
260
--
167
2.25.1
261
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
In the A64 decoder, use FPST_A64 rather than FPST_FPCR. By
2
doing an automated conversion of the whole file we avoid possibly
3
using more than one fpst value in a set_rmode/op/restore_rmode
4
sequence.
2
5
3
Mark these as a non-streaming instructions, which should trap
6
Patch created with
4
if full a64 support is not enabled in streaming mode.
5
7
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
perl -p -i -e 's/FPST_FPCR(?!_)/FPST_A64/g' target/arm/tcg/translate-{a64,sve,sme}.c
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
8
Message-id: 20220708151540.18136-8-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20250124162836.2332150-12-peter.maydell@linaro.org
10
---
13
---
11
target/arm/sme-fa64.decode | 2 --
14
target/arm/tcg/translate-a64.c | 70 +++++++++++-----------
12
target/arm/translate-sve.c | 24 +++++++++++++++---------
15
target/arm/tcg/translate-sme.c | 4 +-
13
2 files changed, 15 insertions(+), 11 deletions(-)
16
target/arm/tcg/translate-sve.c | 106 ++++++++++++++++-----------------
17
3 files changed, 90 insertions(+), 90 deletions(-)
14
18
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
19
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
16
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
21
--- a/target/arm/tcg/translate-a64.c
18
+++ b/target/arm/sme-fa64.decode
22
+++ b/target/arm/tcg/translate-a64.c
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
23
@@ -XXX,XX +XXX,XX @@ static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
24
int rm, bool is_fp16, int data,
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
25
gen_helper_gvec_3_ptr *fn)
22
26
{
23
-FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
27
- TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
24
-FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
28
+ TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_A64);
25
FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
29
tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
26
FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
30
vec_full_reg_offset(s, rn),
27
FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
31
vec_full_reg_offset(s, rm), fpst,
28
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
32
@@ -XXX,XX +XXX,XX @@ static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
33
int rm, int ra, bool is_fp16, int data,
34
gen_helper_gvec_4_ptr *fn)
35
{
36
- TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
37
+ TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_A64);
38
tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
39
vec_full_reg_offset(s, rn),
40
vec_full_reg_offset(s, rm),
41
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
42
if (fp_access_check(s)) {
43
TCGv_i64 t0 = read_fp_dreg(s, a->rn);
44
TCGv_i64 t1 = read_fp_dreg(s, a->rm);
45
- f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
46
+ f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
47
write_fp_dreg(s, a->rd, t0);
48
}
49
break;
50
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
51
if (fp_access_check(s)) {
52
TCGv_i32 t0 = read_fp_sreg(s, a->rn);
53
TCGv_i32 t1 = read_fp_sreg(s, a->rm);
54
- f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
55
+ f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
56
write_fp_sreg(s, a->rd, t0);
57
}
58
break;
59
@@ -XXX,XX +XXX,XX @@ static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
60
TCGv_i64 t0 = read_fp_dreg(s, a->rn);
61
TCGv_i64 t1 = tcg_constant_i64(0);
62
if (swap) {
63
- f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_FPCR));
64
+ f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64));
65
} else {
66
- f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
67
+ f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
68
}
69
write_fp_dreg(s, a->rd, t0);
70
}
71
@@ -XXX,XX +XXX,XX @@ static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
72
TCGv_i32 t0 = read_fp_sreg(s, a->rn);
73
TCGv_i32 t1 = tcg_constant_i32(0);
74
if (swap) {
75
- f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_FPCR));
76
+ f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64));
77
} else {
78
- f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
79
+ f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
80
}
81
write_fp_sreg(s, a->rd, t0);
82
}
83
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
84
TCGv_i64 t1 = tcg_temp_new_i64();
85
86
read_vec_element(s, t1, a->rm, a->idx, MO_64);
87
- f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
88
+ f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
89
write_fp_dreg(s, a->rd, t0);
90
}
91
break;
92
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
93
TCGv_i32 t1 = tcg_temp_new_i32();
94
95
read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
96
- f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
97
+ f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
98
write_fp_sreg(s, a->rd, t0);
99
}
100
break;
101
@@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
102
if (neg) {
103
gen_vfp_negd(t1, t1);
104
}
105
- gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR));
106
+ gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
107
write_fp_dreg(s, a->rd, t0);
108
}
109
break;
110
@@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
111
if (neg) {
112
gen_vfp_negs(t1, t1);
113
}
114
- gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR));
115
+ gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
116
write_fp_sreg(s, a->rd, t0);
117
}
118
break;
119
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
120
121
read_vec_element(s, t0, a->rn, 0, MO_64);
122
read_vec_element(s, t1, a->rn, 1, MO_64);
123
- f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
124
+ f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
125
write_fp_dreg(s, a->rd, t0);
126
}
127
break;
128
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
129
130
read_vec_element_i32(s, t0, a->rn, 0, MO_32);
131
read_vec_element_i32(s, t1, a->rn, 1, MO_32);
132
- f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
133
+ f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
134
write_fp_sreg(s, a->rd, t0);
135
}
136
break;
137
@@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
138
if (neg_n) {
139
gen_vfp_negd(tn, tn);
140
}
141
- fpst = fpstatus_ptr(FPST_FPCR);
142
+ fpst = fpstatus_ptr(FPST_A64);
143
gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
144
write_fp_dreg(s, a->rd, ta);
145
}
146
@@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
147
if (neg_n) {
148
gen_vfp_negs(tn, tn);
149
}
150
- fpst = fpstatus_ptr(FPST_FPCR);
151
+ fpst = fpstatus_ptr(FPST_A64);
152
gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
153
write_fp_sreg(s, a->rd, ta);
154
}
155
@@ -XXX,XX +XXX,XX @@ static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
156
if (fp_access_check(s)) {
157
MemOp esz = a->esz;
158
int elts = (a->q ? 16 : 8) >> esz;
159
- TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
160
+ TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
161
TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn);
162
write_fp_sreg(s, a->rd, res);
163
}
164
@@ -XXX,XX +XXX,XX @@ static void handle_fp_compare(DisasContext *s, int size,
165
bool cmp_with_zero, bool signal_all_nans)
166
{
167
TCGv_i64 tcg_flags = tcg_temp_new_i64();
168
- TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
169
+ TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_A64);
170
171
if (size == MO_64) {
172
TCGv_i64 tcg_vn, tcg_vm;
173
@@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
174
return check == 0;
175
}
176
177
- fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
178
+ fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
179
if (rmode >= 0) {
180
tcg_rmode = gen_set_rmode(rmode, fpst);
181
}
182
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a)
183
if (fp_access_check(s)) {
184
TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn);
185
TCGv_i64 tcg_rd = tcg_temp_new_i64();
186
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
187
+ TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
188
189
gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst);
190
write_fp_dreg(s, a->rd, tcg_rd);
191
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a)
192
if (fp_access_check(s)) {
193
TCGv_i32 tmp = read_fp_sreg(s, a->rn);
194
TCGv_i32 ahp = get_ahp_flag();
195
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
196
+ TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
197
198
gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
199
/* write_fp_sreg is OK here because top half of result is zero */
200
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a)
201
if (fp_access_check(s)) {
202
TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
203
TCGv_i32 tcg_rd = tcg_temp_new_i32();
204
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
205
+ TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
206
207
gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst);
208
write_fp_sreg(s, a->rd, tcg_rd);
209
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a)
210
TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
211
TCGv_i32 tcg_rd = tcg_temp_new_i32();
212
TCGv_i32 ahp = get_ahp_flag();
213
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
214
+ TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
215
216
gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
217
/* write_fp_sreg is OK here because top half of tcg_rd is zero */
218
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a)
219
if (fp_access_check(s)) {
220
TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
221
TCGv_i32 tcg_rd = tcg_temp_new_i32();
222
- TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
223
+ TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64);
224
TCGv_i32 tcg_ahp = get_ahp_flag();
225
226
gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
227
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a)
228
if (fp_access_check(s)) {
229
TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
230
TCGv_i64 tcg_rd = tcg_temp_new_i64();
231
- TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
232
+ TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64);
233
TCGv_i32 tcg_ahp = get_ahp_flag();
234
235
gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
236
@@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
237
TCGv_i32 tcg_shift, tcg_single;
238
TCGv_i64 tcg_double;
239
240
- tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
241
+ tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
242
tcg_shift = tcg_constant_i32(shift);
243
244
switch (esz) {
245
@@ -XXX,XX +XXX,XX @@ static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz,
246
TCGv_ptr tcg_fpstatus;
247
TCGv_i32 tcg_shift, tcg_rmode, tcg_single;
248
249
- tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
250
+ tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
251
tcg_shift = tcg_constant_i32(shift);
252
tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
253
254
@@ -XXX,XX +XXX,XX @@ static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a)
255
}
256
if (fp_access_check(s)) {
257
TCGv_i64 t = read_fp_dreg(s, a->rn);
258
- TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
259
+ TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64);
260
261
gen_helper_fjcvtzs(t, t, fpstatus);
262
263
@@ -XXX,XX +XXX,XX @@ static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n)
264
* with von Neumann rounding (round to odd)
265
*/
266
TCGv_i32 tmp = tcg_temp_new_i32();
267
- gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_FPCR));
268
+ gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64));
269
tcg_gen_extu_i32_i64(d, tmp);
270
}
271
272
@@ -XXX,XX +XXX,XX @@ static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n)
273
{
274
TCGv_i32 tcg_lo = tcg_temp_new_i32();
275
TCGv_i32 tcg_hi = tcg_temp_new_i32();
276
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
277
+ TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
278
TCGv_i32 ahp = get_ahp_flag();
279
280
tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n);
281
@@ -XXX,XX +XXX,XX @@ static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n)
282
static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n)
283
{
284
TCGv_i32 tmp = tcg_temp_new_i32();
285
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
286
+ TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
287
288
gen_helper_vfp_fcvtsd(tmp, n, fpst);
289
tcg_gen_extu_i32_i64(d, tmp);
290
@@ -XXX,XX +XXX,XX @@ TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn)
291
292
static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n)
293
{
294
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
295
+ TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
296
TCGv_i32 tmp = tcg_temp_new_i32();
297
gen_helper_bfcvt_pair(tmp, n, fpst);
298
tcg_gen_extu_i32_i64(d, tmp);
299
@@ -XXX,XX +XXX,XX @@ static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a,
300
return check == 0;
301
}
302
303
- fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
304
+ fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
305
if (rmode >= 0) {
306
tcg_rmode = gen_set_rmode(rmode, fpst);
307
}
308
@@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
309
return check == 0;
310
}
311
312
- fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
313
+ fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
314
tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
315
vec_full_reg_offset(s, rn), fpst,
316
is_q ? 16 : 8, vec_full_reg_size(s),
317
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a)
318
return true;
319
}
320
321
- fpst = fpstatus_ptr(FPST_FPCR);
322
+ fpst = fpstatus_ptr(FPST_A64);
323
if (a->esz == MO_64) {
324
/* 32 -> 64 bit fp conversion */
325
TCGv_i64 tcg_res[2];
326
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
29
index XXXXXXX..XXXXXXX 100644
327
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/translate-sve.c
328
--- a/target/arm/tcg/translate-sme.c
31
+++ b/target/arm/translate-sve.c
329
+++ b/target/arm/tcg/translate-sme.c
32
@@ -XXX,XX +XXX,XX @@ static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
330
@@ -XXX,XX +XXX,XX @@ static bool do_outprod_env(DisasContext *s, arg_op *a, MemOp esz,
33
gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
331
TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_env, a,
34
NULL, gen_helper_sve2_pmull_d,
332
MO_32, gen_helper_sme_fmopa_h)
333
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a,
334
- MO_32, FPST_FPCR, gen_helper_sme_fmopa_s)
335
+ MO_32, FPST_A64, gen_helper_sme_fmopa_s)
336
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a,
337
- MO_64, FPST_FPCR, gen_helper_sme_fmopa_d)
338
+ MO_64, FPST_A64, gen_helper_sme_fmopa_d)
339
340
TRANS_FEAT(BFMOPA, aa64_sme, do_outprod_env, a, MO_32, gen_helper_sme_bfmopa)
341
342
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
343
index XXXXXXX..XXXXXXX 100644
344
--- a/target/arm/tcg/translate-sve.c
345
+++ b/target/arm/tcg/translate-sve.c
346
@@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
347
arg_rr_esz *a, int data)
348
{
349
return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data,
350
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
351
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
352
}
353
354
/* Invoke an out-of-line helper on 3 Zregs. */
355
@@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
356
arg_rrr_esz *a, int data)
357
{
358
return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
359
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
360
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
361
}
362
363
/* Invoke an out-of-line helper on 4 Zregs. */
364
@@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
365
arg_rprr_esz *a)
366
{
367
return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0,
368
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
369
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
370
}
371
372
/* Invoke a vector expander on two Zregs and an immediate. */
373
@@ -XXX,XX +XXX,XX @@ static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
35
};
374
};
36
- if (a->esz == 0
375
return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
37
- ? !dc_isar_feature(aa64_sve2_pmull128, s)
376
(a->index << 1) | sub,
38
- : !dc_isar_feature(aa64_sve, s)) {
377
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
39
+
378
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
40
+ if (a->esz == 0) {
379
}
41
+ if (!dc_isar_feature(aa64_sve2_pmull128, s)) {
380
42
+ return false;
381
TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
43
+ }
382
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = {
44
+ s->is_nonstreaming = true;
383
};
45
+ } else if (!dc_isar_feature(aa64_sve, s)) {
384
TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz,
46
return false;
385
fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index,
47
}
386
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
48
return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
387
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64)
388
389
/*
390
*** SVE Floating Point Fast Reduction Group
391
@@ -XXX,XX +XXX,XX @@ static bool do_reduce(DisasContext *s, arg_rpr_esz *a,
392
393
tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn));
394
tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
395
- status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
396
+ status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
397
398
fn(temp, t_zn, t_pg, status, t_desc);
399
400
@@ -XXX,XX +XXX,XX @@ static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
401
if (sve_access_check(s)) {
402
unsigned vsz = vec_full_reg_size(s);
403
TCGv_ptr status =
404
- fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
405
+ fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
406
407
tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
408
vec_full_reg_offset(s, a->rn),
409
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
410
};
411
TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
412
ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
413
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
414
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64)
415
416
/*
417
*** SVE Floating Point Accumulating Reduction Group
418
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
419
t_pg = tcg_temp_new_ptr();
420
tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm));
421
tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
422
- t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
423
+ t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
424
t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
425
426
fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
427
@@ -XXX,XX +XXX,XX @@ static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
428
tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn));
429
tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
430
431
- status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
432
+ status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_A64);
433
desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
434
fn(t_zd, t_zn, t_pg, scalar, status, desc);
435
}
436
@@ -XXX,XX +XXX,XX @@ static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
437
}
438
if (sve_access_check(s)) {
439
unsigned vsz = vec_full_reg_size(s);
440
- TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
441
+ TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
442
tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
443
vec_full_reg_offset(s, a->rn),
444
vec_full_reg_offset(s, a->rm),
445
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_4_ptr * const fcadd_fns[] = {
446
};
447
TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
448
a->rd, a->rn, a->rm, a->pg, a->rot,
449
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
450
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64)
451
452
#define DO_FMLA(NAME, name) \
453
static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
454
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
455
}; \
456
TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \
457
a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
458
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
459
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64)
460
461
DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
462
DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
463
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = {
464
};
465
TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz],
466
a->rd, a->rn, a->rm, a->ra, a->pg, a->rot,
467
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
468
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64)
469
470
static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = {
471
NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL
472
};
473
TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz],
474
a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot,
475
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
476
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64)
477
478
/*
479
*** SVE Floating Point Unary Operations Predicated Group
480
*/
481
482
TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
483
- gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR)
484
+ gen_helper_sve_fcvt_sh, a, 0, FPST_A64)
485
TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
486
- gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR)
487
+ gen_helper_sve_fcvt_hs, a, 0, FPST_A64)
488
489
TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
490
- gen_helper_sve_bfcvt, a, 0, FPST_FPCR)
491
+ gen_helper_sve_bfcvt, a, 0, FPST_A64)
492
493
TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
494
- gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR)
495
+ gen_helper_sve_fcvt_dh, a, 0, FPST_A64)
496
TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
497
- gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR)
498
+ gen_helper_sve_fcvt_hd, a, 0, FPST_A64)
499
TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
500
- gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR)
501
+ gen_helper_sve_fcvt_ds, a, 0, FPST_A64)
502
TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
503
- gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR)
504
+ gen_helper_sve_fcvt_sd, a, 0, FPST_A64)
505
506
TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
507
gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16)
508
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
509
gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16)
510
511
TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
512
- gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR)
513
+ gen_helper_sve_fcvtzs_ss, a, 0, FPST_A64)
514
TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
515
- gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR)
516
+ gen_helper_sve_fcvtzu_ss, a, 0, FPST_A64)
517
TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
518
- gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR)
519
+ gen_helper_sve_fcvtzs_sd, a, 0, FPST_A64)
520
TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
521
- gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR)
522
+ gen_helper_sve_fcvtzu_sd, a, 0, FPST_A64)
523
TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
524
- gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR)
525
+ gen_helper_sve_fcvtzs_ds, a, 0, FPST_A64)
526
TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
527
- gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR)
528
+ gen_helper_sve_fcvtzu_ds, a, 0, FPST_A64)
529
530
TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
531
- gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR)
532
+ gen_helper_sve_fcvtzs_dd, a, 0, FPST_A64)
533
TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
534
- gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR)
535
+ gen_helper_sve_fcvtzu_dd, a, 0, FPST_A64)
536
537
static gen_helper_gvec_3_ptr * const frint_fns[] = {
538
NULL,
539
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frint_fns[] = {
540
gen_helper_sve_frint_d
541
};
542
TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz],
543
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
544
+ a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64)
545
546
static gen_helper_gvec_3_ptr * const frintx_fns[] = {
547
NULL,
548
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frintx_fns[] = {
549
gen_helper_sve_frintx_d
550
};
551
TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz],
552
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
553
+ a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
554
555
static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
556
ARMFPRounding mode, gen_helper_gvec_3_ptr *fn)
557
@@ -XXX,XX +XXX,XX @@ static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
558
}
559
560
vsz = vec_full_reg_size(s);
561
- status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
562
+ status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
563
tmode = gen_set_rmode(mode, status);
564
565
tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
566
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frecpx_fns[] = {
567
gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d,
568
};
569
TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz],
570
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
571
+ a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64)
572
573
static gen_helper_gvec_3_ptr * const fsqrt_fns[] = {
574
NULL, gen_helper_sve_fsqrt_h,
575
gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d,
576
};
577
TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz],
578
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
579
+ a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64)
580
581
TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
582
gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16)
583
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
584
gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16)
585
586
TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
587
- gen_helper_sve_scvt_ss, a, 0, FPST_FPCR)
588
+ gen_helper_sve_scvt_ss, a, 0, FPST_A64)
589
TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
590
- gen_helper_sve_scvt_ds, a, 0, FPST_FPCR)
591
+ gen_helper_sve_scvt_ds, a, 0, FPST_A64)
592
593
TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
594
- gen_helper_sve_scvt_sd, a, 0, FPST_FPCR)
595
+ gen_helper_sve_scvt_sd, a, 0, FPST_A64)
596
TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
597
- gen_helper_sve_scvt_dd, a, 0, FPST_FPCR)
598
+ gen_helper_sve_scvt_dd, a, 0, FPST_A64)
599
600
TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
601
gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16)
602
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
603
gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16)
604
605
TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
606
- gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR)
607
+ gen_helper_sve_ucvt_ss, a, 0, FPST_A64)
608
TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
609
- gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR)
610
+ gen_helper_sve_ucvt_ds, a, 0, FPST_A64)
611
TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
612
- gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR)
613
+ gen_helper_sve_ucvt_sd, a, 0, FPST_A64)
614
615
TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
616
- gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR)
617
+ gen_helper_sve_ucvt_dd, a, 0, FPST_A64)
618
619
/*
620
*** SVE Memory - 32-bit Gather and Unsized Contiguous Group
49
@@ -XXX,XX +XXX,XX @@ DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
621
@@ -XXX,XX +XXX,XX @@ DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
50
* SVE Integer Multiply-Add (unpredicated)
622
51
*/
623
TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz,
52
624
gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra,
53
-TRANS_FEAT(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_s,
625
- 0, FPST_FPCR)
54
- a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
626
+ 0, FPST_A64)
55
-TRANS_FEAT(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_d,
627
TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz,
56
- a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
628
gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra,
57
+TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz,
629
- 0, FPST_FPCR)
58
+ gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra,
630
+ 0, FPST_A64)
59
+ 0, FPST_FPCR)
60
+TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz,
61
+ gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra,
62
+ 0, FPST_FPCR)
63
631
64
static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
632
static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
65
NULL, gen_helper_sve2_sqdmlal_zzzw_h,
633
NULL, gen_helper_sve2_sqdmlal_zzzw_h,
66
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
634
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz,
67
TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
635
gen_gvec_rax1, a)
68
gen_helper_gvec_bfdot_idx, a)
636
69
637
TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
70
-TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
638
- gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
71
- gen_helper_gvec_bfmmla, a, 0)
639
+ gen_helper_sve2_fcvtnt_sh, a, 0, FPST_A64)
72
+TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
640
TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz,
73
+ gen_helper_gvec_bfmmla, a, 0)
641
- gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR)
74
642
+ gen_helper_sve2_fcvtnt_ds, a, 0, FPST_A64)
643
644
TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
645
- gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR)
646
+ gen_helper_sve_bfcvtnt, a, 0, FPST_A64)
647
648
TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz,
649
- gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR)
650
+ gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64)
651
TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz,
652
- gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR)
653
+ gen_helper_sve2_fcvtlt_sd, a, 0, FPST_A64)
654
655
TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a,
656
FPROUNDING_ODD, gen_helper_sve_fcvt_ds)
657
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const flogb_fns[] = {
658
gen_helper_flogb_s, gen_helper_flogb_d
659
};
660
TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz],
661
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
662
+ a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64)
663
664
static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
665
{
666
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_env_arg_zzzz,
75
static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
667
static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
76
{
668
{
669
return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
670
- a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR);
671
+ a->rd, a->rn, a->rm, a->ra, sel, FPST_A64);
672
}
673
674
TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false)
675
@@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
676
{
677
return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
678
a->rd, a->rn, a->rm, a->ra,
679
- (a->index << 1) | sel, FPST_FPCR);
680
+ (a->index << 1) | sel, FPST_A64);
681
}
682
683
TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
77
--
684
--
78
2.25.1
685
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Now we have moved all the uses of vfp.fp_status and FPST_FPCR
2
to either the A32 or A64 fields, we can remove these.
2
3
3
There's no reason to set CPACR_EL1.ZEN if SVE disabled.
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20250124162836.2332150-13-peter.maydell@linaro.org
7
---
8
target/arm/cpu.h | 2 --
9
target/arm/tcg/translate.h | 6 ------
10
target/arm/cpu.c | 1 -
11
target/arm/vfp_helper.c | 8 +-------
12
4 files changed, 1 insertion(+), 16 deletions(-)
4
13
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
14
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
index XXXXXXX..XXXXXXX 100644
7
Message-id: 20220708151540.18136-44-richard.henderson@linaro.org
16
--- a/target/arm/cpu.h
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
+++ b/target/arm/cpu.h
9
---
18
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
10
target/arm/cpu.c | 7 +++----
19
11
1 file changed, 3 insertions(+), 4 deletions(-)
20
/* There are a number of distinct float control structures:
12
21
*
22
- * fp_status: is the "normal" fp status.
23
* fp_status_a32: is the "normal" fp status for AArch32 insns
24
* fp_status_a64: is the "normal" fp status for AArch64 insns
25
* fp_status_fp16: used for half-precision calculations
26
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
27
* only thing which needs to read the exception flags being
28
* an explicit FPSCR read.
29
*/
30
- float_status fp_status;
31
float_status fp_status_a32;
32
float_status fp_status_a64;
33
float_status fp_status_f16;
34
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/tcg/translate.h
37
+++ b/target/arm/tcg/translate.h
38
@@ -XXX,XX +XXX,XX @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb)
39
* Enum for argument to fpstatus_ptr().
40
*/
41
typedef enum ARMFPStatusFlavour {
42
- FPST_FPCR,
43
FPST_A32,
44
FPST_A64,
45
FPST_FPCR_F16,
46
@@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour {
47
* been set up to point to the requested field in the CPU state struct.
48
* The options are:
49
*
50
- * FPST_FPCR
51
- * for non-FP16 operations controlled by the FPCR
52
* FPST_A32
53
* for AArch32 non-FP16 operations controlled by the FPCR
54
* FPST_A64
55
@@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
56
int offset;
57
58
switch (flavour) {
59
- case FPST_FPCR:
60
- offset = offsetof(CPUARMState, vfp.fp_status);
61
- break;
62
case FPST_A32:
63
offset = offsetof(CPUARMState, vfp.fp_status_a32);
64
break;
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
65
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
14
index XXXXXXX..XXXXXXX 100644
66
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/cpu.c
67
--- a/target/arm/cpu.c
16
+++ b/target/arm/cpu.c
68
+++ b/target/arm/cpu.c
17
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
69
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
18
/* and to the FP/Neon instructions */
70
set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status);
19
env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
71
set_default_nan_mode(1, &env->vfp.standard_fp_status);
20
CPACR_EL1, FPEN, 3);
72
set_default_nan_mode(1, &env->vfp.standard_fp_status_f16);
21
- /* and to the SVE instructions */
73
- arm_set_default_fp_behaviours(&env->vfp.fp_status);
22
- env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
74
arm_set_default_fp_behaviours(&env->vfp.fp_status_a32);
23
- CPACR_EL1, ZEN, 3);
75
arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
24
- /* with reasonable vector length */
76
arm_set_default_fp_behaviours(&env->vfp.standard_fp_status);
25
+ /* and to the SVE instructions, with default vector length */
77
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
26
if (cpu_isar_feature(aa64_sve, cpu)) {
78
index XXXXXXX..XXXXXXX 100644
27
+ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
79
--- a/target/arm/vfp_helper.c
28
+ CPACR_EL1, ZEN, 3);
80
+++ b/target/arm/vfp_helper.c
29
env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
81
@@ -XXX,XX +XXX,XX @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits)
82
83
static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
84
{
85
- uint32_t i;
86
+ uint32_t i = 0;
87
88
- i = get_float_exception_flags(&env->vfp.fp_status);
89
i |= get_float_exception_flags(&env->vfp.fp_status_a32);
90
i |= get_float_exception_flags(&env->vfp.fp_status_a64);
91
i |= get_float_exception_flags(&env->vfp.standard_fp_status);
92
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
93
* values. The caller should have arranged for env->vfp.fpsr to
94
* be the architecturally up-to-date exception flag information first.
95
*/
96
- set_float_exception_flags(0, &env->vfp.fp_status);
97
set_float_exception_flags(0, &env->vfp.fp_status_a32);
98
set_float_exception_flags(0, &env->vfp.fp_status_a64);
99
set_float_exception_flags(0, &env->vfp.fp_status_f16);
100
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
101
i = float_round_to_zero;
102
break;
30
}
103
}
31
/*
104
- set_float_rounding_mode(i, &env->vfp.fp_status);
105
set_float_rounding_mode(i, &env->vfp.fp_status_a32);
106
set_float_rounding_mode(i, &env->vfp.fp_status_a64);
107
set_float_rounding_mode(i, &env->vfp.fp_status_f16);
108
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
109
}
110
if (changed & FPCR_FZ) {
111
bool ftz_enabled = val & FPCR_FZ;
112
- set_flush_to_zero(ftz_enabled, &env->vfp.fp_status);
113
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status);
114
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
115
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
116
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64);
117
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
118
}
119
if (changed & FPCR_DN) {
120
bool dnan_enabled = val & FPCR_DN;
121
- set_default_nan_mode(dnan_enabled, &env->vfp.fp_status);
122
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32);
123
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64);
124
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16);
32
--
125
--
33
2.25.1
126
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
As the first part of splitting the existing fp_status_f16
2
into separate float_status fields for AArch32 and AArch64
3
(so that we can make FEAT_AFP control bits apply only
4
for AArch64), define the two new fp_status_f16_a32 and
5
fp_status_f16_a64 fields, but don't use them yet.
2
6
3
This new behaviour is in the ARM pseudocode function
4
AArch64.CheckFPAdvSIMDEnabled, which applies to AArch32
5
via AArch32.CheckAdvSIMDOrFPEnabled when the EL to which
6
the trap would be delivered is in AArch64 mode.
7
8
Given that ARMv9 drops support for AArch32 outside EL0, the trap EL
9
detection ought to be trivially true, but the pseudocode still contains
10
a number of conditions, and QEMU has not yet committed to dropping A32
11
support for EL[12] when v9 features are present.
12
13
Since the computation of SME_TRAP_NONSTREAMING is necessarily different
14
for the two modes, we might as well preserve bits within TBFLAG_ANY and
15
allocate separate bits within TBFLAG_A32 and TBFLAG_A64 instead.
16
17
Note that DDI0616A.a has typos for bits [22:21] of LD1RO in the table
18
of instructions illegal in streaming mode.
19
20
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
21
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
22
Message-id: 20220708151540.18136-4-richard.henderson@linaro.org
23
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20250124162836.2332150-14-peter.maydell@linaro.org
24
---
10
---
25
target/arm/cpu.h | 7 +++
11
target/arm/cpu.h | 4 ++++
26
target/arm/translate.h | 4 ++
12
target/arm/tcg/translate.h | 12 ++++++++++++
27
target/arm/sme-fa64.decode | 90 ++++++++++++++++++++++++++++++++++++++
13
target/arm/cpu.c | 2 ++
28
target/arm/helper.c | 41 +++++++++++++++++
14
target/arm/vfp_helper.c | 14 ++++++++++++++
29
target/arm/translate-a64.c | 40 ++++++++++++++++-
15
4 files changed, 32 insertions(+)
30
target/arm/translate-vfp.c | 12 +++++
31
target/arm/translate.c | 2 +
32
target/arm/meson.build | 1 +
33
8 files changed, 195 insertions(+), 2 deletions(-)
34
create mode 100644 target/arm/sme-fa64.decode
35
16
36
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
17
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
37
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/cpu.h
19
--- a/target/arm/cpu.h
39
+++ b/target/arm/cpu.h
20
+++ b/target/arm/cpu.h
40
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A32, HSTR_ACTIVE, 9, 1)
21
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
41
* the same thing as the current security state of the processor!
22
* fp_status_a32: is the "normal" fp status for AArch32 insns
42
*/
23
* fp_status_a64: is the "normal" fp status for AArch64 insns
43
FIELD(TBFLAG_A32, NS, 10, 1)
24
* fp_status_fp16: used for half-precision calculations
44
+/*
25
+ * fp_status_fp16_a32: used for AArch32 half-precision calculations
45
+ * Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not.
26
+ * fp_status_fp16_a64: used for AArch64 half-precision calculations
46
+ * This requires an SME trap from AArch32 mode when using NEON.
27
* standard_fp_status : the ARM "Standard FPSCR Value"
47
+ */
28
* standard_fp_status_fp16 : used for half-precision
48
+FIELD(TBFLAG_A32, SME_TRAP_NONSTREAMING, 11, 1)
29
* calculations with the ARM "Standard FPSCR Value"
49
30
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
50
/*
31
float_status fp_status_a32;
51
* Bit usage when in AArch32 state, for M-profile only.
32
float_status fp_status_a64;
52
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, SMEEXC_EL, 20, 2)
33
float_status fp_status_f16;
53
FIELD(TBFLAG_A64, PSTATE_SM, 22, 1)
34
+ float_status fp_status_f16_a32;
54
FIELD(TBFLAG_A64, PSTATE_ZA, 23, 1)
35
+ float_status fp_status_f16_a64;
55
FIELD(TBFLAG_A64, SVL, 24, 4)
36
float_status standard_fp_status;
56
+/* Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not. */
37
float_status standard_fp_status_f16;
57
+FIELD(TBFLAG_A64, SME_TRAP_NONSTREAMING, 28, 1)
38
58
39
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
59
/*
60
* Helpers for using the above.
61
diff --git a/target/arm/translate.h b/target/arm/translate.h
62
index XXXXXXX..XXXXXXX 100644
40
index XXXXXXX..XXXXXXX 100644
63
--- a/target/arm/translate.h
41
--- a/target/arm/tcg/translate.h
64
+++ b/target/arm/translate.h
42
+++ b/target/arm/tcg/translate.h
65
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
43
@@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour {
66
bool pstate_sm;
44
FPST_A32,
67
/* True if PSTATE.ZA is set. */
45
FPST_A64,
68
bool pstate_za;
46
FPST_FPCR_F16,
69
+ /* True if non-streaming insns should raise an SME Streaming exception. */
47
+ FPST_A32_F16,
70
+ bool sme_trap_nonstreaming;
48
+ FPST_A64_F16,
71
+ /* True if the current instruction is non-streaming. */
49
FPST_STD,
72
+ bool is_nonstreaming;
50
FPST_STD_F16,
73
/* True if MVE insns are definitely not predicated by VPR or LTPSIZE */
51
} ARMFPStatusFlavour;
74
bool mve_no_pred;
52
@@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour {
75
/*
53
* for AArch64 non-FP16 operations controlled by the FPCR
76
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
54
* FPST_FPCR_F16
77
new file mode 100644
55
* for operations controlled by the FPCR where FPCR.FZ16 is to be used
78
index XXXXXXX..XXXXXXX
56
+ * FPST_A32_F16
79
--- /dev/null
57
+ * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used
80
+++ b/target/arm/sme-fa64.decode
58
+ * FPST_A64_F16
81
@@ -XXX,XX +XXX,XX @@
59
+ * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used
82
+# AArch64 SME allowed instruction decoding
60
* FPST_STD
83
+#
61
* for A32/T32 Neon operations using the "standard FPSCR value"
84
+# Copyright (c) 2022 Linaro, Ltd
62
* FPST_STD_F16
85
+#
63
@@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
86
+# This library is free software; you can redistribute it and/or
64
case FPST_FPCR_F16:
87
+# modify it under the terms of the GNU Lesser General Public
65
offset = offsetof(CPUARMState, vfp.fp_status_f16);
88
+# License as published by the Free Software Foundation; either
66
break;
89
+# version 2.1 of the License, or (at your option) any later version.
67
+ case FPST_A32_F16:
90
+#
68
+ offset = offsetof(CPUARMState, vfp.fp_status_f16_a32);
91
+# This library is distributed in the hope that it will be useful,
69
+ break;
92
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
70
+ case FPST_A64_F16:
93
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
71
+ offset = offsetof(CPUARMState, vfp.fp_status_f16_a64);
94
+# Lesser General Public License for more details.
72
+ break;
95
+#
73
case FPST_STD:
96
+# You should have received a copy of the GNU Lesser General Public
74
offset = offsetof(CPUARMState, vfp.standard_fp_status);
97
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
75
break;
98
+
76
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
99
+#
100
+# This file is processed by scripts/decodetree.py
101
+#
102
+
103
+# These patterns are taken from Appendix E1.1 of DDI0616 A.a,
104
+# Arm Architecture Reference Manual Supplement,
105
+# The Scalable Matrix Extension (SME), for Armv9-A
106
+
107
+{
108
+ [
109
+ OK 0-00 1110 0000 0001 0010 11-- ---- ---- # SMOV W|Xd,Vn.B[0]
110
+ OK 0-00 1110 0000 0010 0010 11-- ---- ---- # SMOV W|Xd,Vn.H[0]
111
+ OK 0100 1110 0000 0100 0010 11-- ---- ---- # SMOV Xd,Vn.S[0]
112
+ OK 0000 1110 0000 0001 0011 11-- ---- ---- # UMOV Wd,Vn.B[0]
113
+ OK 0000 1110 0000 0010 0011 11-- ---- ---- # UMOV Wd,Vn.H[0]
114
+ OK 0000 1110 0000 0100 0011 11-- ---- ---- # UMOV Wd,Vn.S[0]
115
+ OK 0100 1110 0000 1000 0011 11-- ---- ---- # UMOV Xd,Vn.D[0]
116
+ ]
117
+ FAIL 0--0 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD vector operations
118
+}
119
+
120
+{
121
+ [
122
+ OK 0101 1110 --1- ---- 11-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar)
123
+ OK 0101 1110 -10- ---- 00-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar, FP16)
124
+ OK 01-1 1110 1-10 0001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar)
125
+ OK 01-1 1110 1111 1001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar, FP16)
126
+ ]
127
+ FAIL 01-1 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD single-element operations
128
+}
129
+
130
+FAIL 0-00 110- ---- ---- ---- ---- ---- ---- # Advanced SIMD structure load/store
131
+FAIL 1100 1110 ---- ---- ---- ---- ---- ---- # Advanced SIMD cryptography extensions
132
+FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
133
+
134
+# These are the "avoidance of doubt" final table of Illegal Advanced SIMD instructions
135
+# We don't actually need to include these, as the default is OK.
136
+# -001 111- ---- ---- ---- ---- ---- ---- # Scalar floating-point operations
137
+# --10 110- ---- ---- ---- ---- ---- ---- # Load/store pair of FP registers
138
+# --01 1100 ---- ---- ---- ---- ---- ---- # Load FP register (PC-relative literal)
139
+# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm)
140
+# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
141
+# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
142
+
143
+FAIL 0000 0100 --1- ---- 1010 ---- ---- ---- # ADR
144
+FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
145
+FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
146
+FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
147
+FAIL 0010 0101 --10 1--- 1001 ---- ---- ---- # WRFFR, SETFFR
148
+FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
149
+FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
150
+FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
151
+FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
152
+FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
153
+FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
154
+FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
155
+FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
156
+FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
157
+FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
158
+FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
159
+FAIL 1000 010- -01- ---- 1--- ---- ---- ---- # SVE 32-bit gather load (vector+imm)
160
+FAIL 1000 0100 0-0- ---- 0--- ---- ---- ---- # SVE 32-bit gather load byte (scalar+vector)
161
+FAIL 1000 0100 1--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load half (scalar+vector)
162
+FAIL 1000 0101 0--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load word (scalar+vector)
163
+FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
164
+FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
165
+FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
166
+FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
167
+FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
168
+FAIL 1110 010- -00- ---- 001- ---- ---- ---- # SVE2 64-bit scatter NT store (vector+scalar)
169
+FAIL 1110 010- -10- ---- 001- ---- ---- ---- # SVE2 32-bit scatter NT store (vector+scalar)
170
+FAIL 1110 010- ---- ---- 1-0- ---- ---- ---- # SVE scatter store (scalar+32-bit vector)
171
+FAIL 1110 010- ---- ---- 101- ---- ---- ---- # SVE scatter store (misc)
172
diff --git a/target/arm/helper.c b/target/arm/helper.c
173
index XXXXXXX..XXXXXXX 100644
77
index XXXXXXX..XXXXXXX 100644
174
--- a/target/arm/helper.c
78
--- a/target/arm/cpu.c
175
+++ b/target/arm/helper.c
79
+++ b/target/arm/cpu.c
176
@@ -XXX,XX +XXX,XX @@ int sme_exception_el(CPUARMState *env, int el)
80
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
177
return 0;
81
arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
82
arm_set_default_fp_behaviours(&env->vfp.standard_fp_status);
83
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16);
84
+ arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32);
85
+ arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
86
arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16);
87
88
#ifndef CONFIG_USER_ONLY
89
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
90
index XXXXXXX..XXXXXXX 100644
91
--- a/target/arm/vfp_helper.c
92
+++ b/target/arm/vfp_helper.c
93
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
94
/* FZ16 does not generate an input denormal exception. */
95
i |= (get_float_exception_flags(&env->vfp.fp_status_f16)
96
& ~float_flag_input_denormal);
97
+ i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32)
98
+ & ~float_flag_input_denormal);
99
+ i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
100
+ & ~float_flag_input_denormal);
101
i |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16)
102
& ~float_flag_input_denormal);
103
return vfp_exceptbits_from_host(i);
104
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
105
set_float_exception_flags(0, &env->vfp.fp_status_a32);
106
set_float_exception_flags(0, &env->vfp.fp_status_a64);
107
set_float_exception_flags(0, &env->vfp.fp_status_f16);
108
+ set_float_exception_flags(0, &env->vfp.fp_status_f16_a32);
109
+ set_float_exception_flags(0, &env->vfp.fp_status_f16_a64);
110
set_float_exception_flags(0, &env->vfp.standard_fp_status);
111
set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
178
}
112
}
179
113
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
180
+/* This corresponds to the ARM pseudocode function IsFullA64Enabled(). */
114
set_float_rounding_mode(i, &env->vfp.fp_status_a32);
181
+static bool sme_fa64(CPUARMState *env, int el)
115
set_float_rounding_mode(i, &env->vfp.fp_status_a64);
182
+{
116
set_float_rounding_mode(i, &env->vfp.fp_status_f16);
183
+ if (!cpu_isar_feature(aa64_sme_fa64, env_archcpu(env))) {
117
+ set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32);
184
+ return false;
118
+ set_float_rounding_mode(i, &env->vfp.fp_status_f16_a64);
185
+ }
186
+
187
+ if (el <= 1 && !el_is_in_host(env, el)) {
188
+ if (!FIELD_EX64(env->vfp.smcr_el[1], SMCR, FA64)) {
189
+ return false;
190
+ }
191
+ }
192
+ if (el <= 2 && arm_is_el2_enabled(env)) {
193
+ if (!FIELD_EX64(env->vfp.smcr_el[2], SMCR, FA64)) {
194
+ return false;
195
+ }
196
+ }
197
+ if (arm_feature(env, ARM_FEATURE_EL3)) {
198
+ if (!FIELD_EX64(env->vfp.smcr_el[3], SMCR, FA64)) {
199
+ return false;
200
+ }
201
+ }
202
+
203
+ return true;
204
+}
205
+
206
/*
207
* Given that SVE is enabled, return the vector length for EL.
208
*/
209
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el,
210
DP_TBFLAG_ANY(flags, PSTATE__IL, 1);
211
}
119
}
212
120
if (changed & FPCR_FZ16) {
213
+ /*
121
bool ftz_enabled = val & FPCR_FZ16;
214
+ * The SME exception we are testing for is raised via
122
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
215
+ * AArch64.CheckFPAdvSIMDEnabled(), as called from
123
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
216
+ * AArch32.CheckAdvSIMDOrFPEnabled().
124
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
217
+ */
125
set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
218
+ if (el == 0
126
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
219
+ && FIELD_EX64(env->svcr, SVCR, SM)
127
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
220
+ && (!arm_is_el2_enabled(env)
128
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
221
+ || (arm_el_is_aa64(env, 2) && !(env->cp15.hcr_el2 & HCR_TGE)))
129
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
222
+ && arm_el_is_aa64(env, 1)
223
+ && !sme_fa64(env, el)) {
224
+ DP_TBFLAG_A32(flags, SME_TRAP_NONSTREAMING, 1);
225
+ }
226
+
227
return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
228
}
229
230
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
231
}
232
if (FIELD_EX64(env->svcr, SVCR, SM)) {
233
DP_TBFLAG_A64(flags, PSTATE_SM, 1);
234
+ DP_TBFLAG_A64(flags, SME_TRAP_NONSTREAMING, !sme_fa64(env, el));
235
}
236
DP_TBFLAG_A64(flags, PSTATE_ZA, FIELD_EX64(env->svcr, SVCR, ZA));
237
}
130
}
238
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
131
if (changed & FPCR_FZ) {
239
index XXXXXXX..XXXXXXX 100644
132
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
240
--- a/target/arm/translate-a64.c
133
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32);
241
+++ b/target/arm/translate-a64.c
134
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64);
242
@@ -XXX,XX +XXX,XX @@ static void do_vec_ld(DisasContext *s, int destidx, int element,
135
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16);
243
* unallocated-encoding checks (otherwise the syndrome information
136
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
244
* for the resulting exception will be incorrect).
137
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64);
245
*/
246
-static bool fp_access_check(DisasContext *s)
247
+static bool fp_access_check_only(DisasContext *s)
248
{
249
if (s->fp_excp_el) {
250
assert(!s->fp_access_checked);
251
@@ -XXX,XX +XXX,XX @@ static bool fp_access_check(DisasContext *s)
252
return true;
253
}
254
255
+static bool fp_access_check(DisasContext *s)
256
+{
257
+ if (!fp_access_check_only(s)) {
258
+ return false;
259
+ }
260
+ if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
261
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
262
+ syn_smetrap(SME_ET_Streaming, false));
263
+ return false;
264
+ }
265
+ return true;
266
+}
267
+
268
/* Check that SVE access is enabled. If it is, return true.
269
* If not, emit code to generate an appropriate exception and return false.
270
*/
271
@@ -XXX,XX +XXX,XX @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
272
default:
273
g_assert_not_reached();
274
}
275
- if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
276
+ if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
277
return;
278
} else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
279
return;
280
@@ -XXX,XX +XXX,XX @@ static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
281
}
138
}
282
}
139
}
283
140
284
+/*
285
+ * Include the generated SME FA64 decoder.
286
+ */
287
+
288
+#include "decode-sme-fa64.c.inc"
289
+
290
+static bool trans_OK(DisasContext *s, arg_OK *a)
291
+{
292
+ return true;
293
+}
294
+
295
+static bool trans_FAIL(DisasContext *s, arg_OK *a)
296
+{
297
+ s->is_nonstreaming = true;
298
+ return true;
299
+}
300
+
301
/**
302
* is_guarded_page:
303
* @env: The cpu environment
304
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
305
dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
306
dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
307
dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
308
+ dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
309
dc->vec_len = 0;
310
dc->vec_stride = 0;
311
dc->cp_regs = arm_cpu->cp_regs;
312
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
313
}
314
}
315
316
+ s->is_nonstreaming = false;
317
+ if (s->sme_trap_nonstreaming) {
318
+ disas_sme_fa64(s, insn);
319
+ }
320
+
321
switch (extract32(insn, 25, 4)) {
322
case 0x0:
323
if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) {
324
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
325
index XXXXXXX..XXXXXXX 100644
326
--- a/target/arm/translate-vfp.c
327
+++ b/target/arm/translate-vfp.c
328
@@ -XXX,XX +XXX,XX @@ static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled)
329
return false;
330
}
331
332
+ /*
333
+ * Note that rebuild_hflags_a32 has already accounted for being in EL0
334
+ * and the higher EL in A64 mode, etc. Unlike A64 mode, there do not
335
+ * appear to be any insns which touch VFP which are allowed.
336
+ */
337
+ if (s->sme_trap_nonstreaming) {
338
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
339
+ syn_smetrap(SME_ET_Streaming,
340
+ s->base.pc_next - s->pc_curr == 2));
341
+ return false;
342
+ }
343
+
344
if (!s->vfp_enabled && !ignore_vfp_enabled) {
345
assert(!arm_dc_feature(s, ARM_FEATURE_M));
346
unallocated_encoding(s);
347
diff --git a/target/arm/translate.c b/target/arm/translate.c
348
index XXXXXXX..XXXXXXX 100644
349
--- a/target/arm/translate.c
350
+++ b/target/arm/translate.c
351
@@ -XXX,XX +XXX,XX @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
352
dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
353
dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
354
}
355
+ dc->sme_trap_nonstreaming =
356
+ EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
357
}
358
dc->cp_regs = cpu->cp_regs;
359
dc->features = env->features;
360
diff --git a/target/arm/meson.build b/target/arm/meson.build
361
index XXXXXXX..XXXXXXX 100644
362
--- a/target/arm/meson.build
363
+++ b/target/arm/meson.build
364
@@ -XXX,XX +XXX,XX @@
365
gen = [
366
decodetree.process('sve.decode', extra_args: '--decode=disas_sve'),
367
decodetree.process('sme.decode', extra_args: '--decode=disas_sme'),
368
+ decodetree.process('sme-fa64.decode', extra_args: '--static-decode=disas_sme_fa64'),
369
decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'),
370
decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'),
371
decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'),
372
--
141
--
373
2.25.1
142
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
We directly use fp_status_f16 in a handful of helpers that
2
are AArch32-specific; switch to fp_status_f16_a32 for these.
2
3
3
The pseudocode for CheckSVEEnabled gains a check for Streaming
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
SVE mode, and for SME present but SVE absent.
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20250124162836.2332150-15-peter.maydell@linaro.org
7
---
8
target/arm/tcg/vec_helper.c | 4 ++--
9
target/arm/vfp_helper.c | 2 +-
10
2 files changed, 3 insertions(+), 3 deletions(-)
5
11
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-17-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/translate-a64.c | 22 ++++++++++++++++------
12
1 file changed, 16 insertions(+), 6 deletions(-)
13
14
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
15
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/translate-a64.c
14
--- a/target/arm/tcg/vec_helper.c
17
+++ b/target/arm/translate-a64.c
15
+++ b/target/arm/tcg/vec_helper.c
18
@@ -XXX,XX +XXX,XX @@ static bool fp_access_check(DisasContext *s)
16
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
19
return true;
17
CPUARMState *env, uint32_t desc)
18
{
19
do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, desc,
20
- get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
21
+ get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32));
20
}
22
}
21
23
22
-/* Check that SVE access is enabled. If it is, return true.
24
void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
23
+/*
25
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
24
+ * Check that SVE access is enabled. If it is, return true.
26
CPUARMState *env, uint32_t desc)
25
* If not, emit code to generate an appropriate exception and return false.
26
+ * This function corresponds to CheckSVEEnabled().
27
*/
28
bool sve_access_check(DisasContext *s)
29
{
27
{
30
- if (s->sve_excp_el) {
28
do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, desc,
31
- assert(!s->sve_access_checked);
29
- get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
32
- s->sve_access_checked = true;
30
+ get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32));
33
-
34
+ if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
35
+ assert(dc_isar_feature(aa64_sme, s));
36
+ if (!sme_sm_enabled_check(s)) {
37
+ goto fail_exit;
38
+ }
39
+ } else if (s->sve_excp_el) {
40
gen_exception_insn_el(s, s->pc_curr, EXCP_UDEF,
41
syn_sve_access_trap(), s->sve_excp_el);
42
- return false;
43
+ goto fail_exit;
44
}
45
s->sve_access_checked = true;
46
return fp_access_check(s);
47
+
48
+ fail_exit:
49
+ /* Assert that we only raise one exception per instruction. */
50
+ assert(!s->sve_access_checked);
51
+ s->sve_access_checked = true;
52
+ return false;
53
}
31
}
54
32
55
/*
33
void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
34
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/vfp_helper.c
37
+++ b/target/arm/vfp_helper.c
38
@@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
39
softfloat_to_vfp_compare(env, \
40
FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \
41
}
42
-DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16)
43
+DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16_a32)
44
DO_VFP_cmp(s, float32, float32, fp_status_a32)
45
DO_VFP_cmp(d, float64, float64, fp_status_a32)
46
#undef DO_VFP_cmp
56
--
47
--
57
2.25.1
48
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
We directly use fp_status_f16 in a handful of helpers that are
2
AArch64-specific; switch to fp_status_f16_a64 for these.
2
3
3
We can reuse the SVE functions for implementing moves to/from
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
horizontal tile slices, but we need new ones for moves to/from
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
vertical tile slices.
6
Message-id: 20250124162836.2332150-16-peter.maydell@linaro.org
7
---
8
target/arm/tcg/sme_helper.c | 4 ++--
9
target/arm/tcg/vec_helper.c | 8 ++++----
10
2 files changed, 6 insertions(+), 6 deletions(-)
6
11
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-20-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
target/arm/helper-sme.h | 12 +++
13
target/arm/helper-sve.h | 2 +
14
target/arm/translate-a64.h | 8 ++
15
target/arm/translate.h | 5 ++
16
target/arm/sme.decode | 15 ++++
17
target/arm/sme_helper.c | 151 ++++++++++++++++++++++++++++++++++++-
18
target/arm/sve_helper.c | 12 +++
19
target/arm/translate-sme.c | 127 +++++++++++++++++++++++++++++++
20
8 files changed, 331 insertions(+), 1 deletion(-)
21
22
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
23
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
24
--- a/target/arm/helper-sme.h
14
--- a/target/arm/tcg/sme_helper.c
25
+++ b/target/arm/helper-sme.h
15
+++ b/target/arm/tcg/sme_helper.c
26
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(set_pstate_sm, TCG_CALL_NO_RWG, void, env, i32)
16
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
27
DEF_HELPER_FLAGS_2(set_pstate_za, TCG_CALL_NO_RWG, void, env, i32)
17
float_status fpst_odd, fpst_std, fpst_f16;
28
18
29
DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32)
19
/*
30
+
20
- * Make copies of fp_status and fp_status_f16, because this operation
31
+/* Move to/from vertical array slices, i.e. columns, so 'c'. */
21
+ * Make copies of the fp status fields we use, because this operation
32
+DEF_HELPER_FLAGS_4(sme_mova_cz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
22
* does not update the cumulative fp exception status. It also
33
+DEF_HELPER_FLAGS_4(sme_mova_zc_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
23
* produces default NaNs. We also need a second copy of fp_status with
34
+DEF_HELPER_FLAGS_4(sme_mova_cz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
24
* round-to-odd -- see above.
35
+DEF_HELPER_FLAGS_4(sme_mova_zc_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
25
*/
36
+DEF_HELPER_FLAGS_4(sme_mova_cz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
26
- fpst_f16 = env->vfp.fp_status_f16;
37
+DEF_HELPER_FLAGS_4(sme_mova_zc_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
27
+ fpst_f16 = env->vfp.fp_status_f16_a64;
38
+DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
28
fpst_std = env->vfp.fp_status_a64;
39
+DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
29
set_default_nan_mode(true, &fpst_std);
40
+DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
30
set_default_nan_mode(true, &fpst_f16);
41
+DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
31
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
42
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
43
index XXXXXXX..XXXXXXX 100644
32
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/helper-sve.h
33
--- a/target/arm/tcg/vec_helper.c
45
+++ b/target/arm/helper-sve.h
34
+++ b/target/arm/tcg/vec_helper.c
46
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sve_sel_zpzz_s, TCG_CALL_NO_RWG,
35
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
47
void, ptr, ptr, ptr, ptr, i32)
36
CPUARMState *env, uint32_t desc)
48
DEF_HELPER_FLAGS_5(sve_sel_zpzz_d, TCG_CALL_NO_RWG,
37
{
49
void, ptr, ptr, ptr, ptr, i32)
38
do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, desc,
50
+DEF_HELPER_FLAGS_5(sve_sel_zpzz_q, TCG_CALL_NO_RWG,
39
- get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
51
+ void, ptr, ptr, ptr, ptr, i32)
40
+ get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64));
52
53
DEF_HELPER_FLAGS_5(sve2_addp_zpzz_b, TCG_CALL_NO_RWG,
54
void, ptr, ptr, ptr, ptr, i32)
55
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/arm/translate-a64.h
58
+++ b/target/arm/translate-a64.h
59
@@ -XXX,XX +XXX,XX @@ static inline int pred_gvec_reg_size(DisasContext *s)
60
return size_for_gvec(pred_full_reg_size(s));
61
}
41
}
62
42
63
+/* Return a newly allocated pointer to the predicate register. */
43
void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
64
+static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno)
44
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
65
+{
45
uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15;
66
+ TCGv_ptr ret = tcg_temp_new_ptr();
46
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
67
+ tcg_gen_addi_ptr(ret, cpu_env, pred_full_reg_offset(s, regno));
47
float_status *status = &env->vfp.fp_status_a64;
68
+ return ret;
48
- bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16);
69
+}
49
+ bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64);
70
+
50
71
bool disas_sve(DisasContext *, uint32_t);
51
for (i = 0; i < oprsz; i += sizeof(float32)) {
72
bool disas_sme(DisasContext *, uint32_t);
52
float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negn;
73
53
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
74
diff --git a/target/arm/translate.h b/target/arm/translate.h
54
CPUARMState *env, uint32_t desc)
75
index XXXXXXX..XXXXXXX 100644
55
{
76
--- a/target/arm/translate.h
56
do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, desc,
77
+++ b/target/arm/translate.h
57
- get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
78
@@ -XXX,XX +XXX,XX @@ static inline int plus_2(DisasContext *s, int x)
58
+ get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64));
79
return x + 2;
80
}
59
}
81
60
82
+static inline int plus_12(DisasContext *s, int x)
61
void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
83
+{
62
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
84
+ return x + 12;
63
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
85
+}
64
intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16);
86
+
65
float_status *status = &env->vfp.fp_status_a64;
87
static inline int times_2(DisasContext *s, int x)
66
- bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16);
88
{
67
+ bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64);
89
return x * 2;
68
90
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
69
for (i = 0; i < oprsz; i += 16) {
91
index XXXXXXX..XXXXXXX 100644
70
float16 mm_16 = *(float16 *)(vm + i + idx);
92
--- a/target/arm/sme.decode
93
+++ b/target/arm/sme.decode
94
@@ -XXX,XX +XXX,XX @@
95
### SME Misc
96
97
ZERO 11000000 00 001 00000000000 imm:8
98
+
99
+### SME Move into/from Array
100
+
101
+%mova_rs 13:2 !function=plus_12
102
+&mova esz rs pg zr za_imm v:bool to_vec:bool
103
+
104
+MOVA 11000000 esz:2 00000 0 v:1 .. pg:3 zr:5 0 za_imm:4 \
105
+ &mova to_vec=0 rs=%mova_rs
106
+MOVA 11000000 11 00000 1 v:1 .. pg:3 zr:5 0 za_imm:4 \
107
+ &mova to_vec=0 rs=%mova_rs esz=4
108
+
109
+MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \
110
+ &mova to_vec=1 rs=%mova_rs
111
+MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \
112
+ &mova to_vec=1 rs=%mova_rs esz=4
113
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
114
index XXXXXXX..XXXXXXX 100644
115
--- a/target/arm/sme_helper.c
116
+++ b/target/arm/sme_helper.c
117
@@ -XXX,XX +XXX,XX @@
118
119
#include "qemu/osdep.h"
120
#include "cpu.h"
121
-#include "internals.h"
122
+#include "tcg/tcg-gvec-desc.h"
123
#include "exec/helper-proto.h"
124
+#include "qemu/int128.h"
125
+#include "vec_internal.h"
126
127
/* ResetSVEState */
128
void arm_reset_sve_state(CPUARMState *env)
129
@@ -XXX,XX +XXX,XX @@ void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
130
}
131
}
132
}
133
+
134
+
135
+/*
136
+ * When considering the ZA storage as an array of elements of
137
+ * type T, the index within that array of the Nth element of
138
+ * a vertical slice of a tile can be calculated like this,
139
+ * regardless of the size of type T. This is because the tiles
140
+ * are interleaved, so if type T is size N bytes then row 1 of
141
+ * the tile is N rows away from row 0. The division by N to
142
+ * convert a byte offset into an array index and the multiplication
143
+ * by N to convert from vslice-index-within-the-tile to
144
+ * the index within the ZA storage cancel out.
145
+ */
146
+#define tile_vslice_index(i) ((i) * sizeof(ARMVectorReg))
147
+
148
+/*
149
+ * When doing byte arithmetic on the ZA storage, the element
150
+ * byteoff bytes away in a tile vertical slice is always this
151
+ * many bytes away in the ZA storage, regardless of the
152
+ * size of the tile element, assuming that byteoff is a multiple
153
+ * of the element size. Again this is because of the interleaving
154
+ * of the tiles. For instance if we have 1 byte per element then
155
+ * each row of the ZA storage has one byte of the vslice data,
156
+ * and (counting from 0) byte 8 goes in row 8 of the storage
157
+ * at offset (8 * row-size-in-bytes).
158
+ * If we have 8 bytes per element then each row of the ZA storage
159
+ * has 8 bytes of the data, but there are 8 interleaved tiles and
160
+ * so byte 8 of the data goes into row 1 of the tile,
161
+ * which is again row 8 of the storage, so the offset is still
162
+ * (8 * row-size-in-bytes). Similarly for other element sizes.
163
+ */
164
+#define tile_vslice_offset(byteoff) ((byteoff) * sizeof(ARMVectorReg))
165
+
166
+
167
+/*
168
+ * Move Zreg vector to ZArray column.
169
+ */
170
+#define DO_MOVA_C(NAME, TYPE, H) \
171
+void HELPER(NAME)(void *za, void *vn, void *vg, uint32_t desc) \
172
+{ \
173
+ int i, oprsz = simd_oprsz(desc); \
174
+ for (i = 0; i < oprsz; ) { \
175
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
176
+ do { \
177
+ if (pg & 1) { \
178
+ *(TYPE *)(za + tile_vslice_offset(i)) = *(TYPE *)(vn + H(i)); \
179
+ } \
180
+ i += sizeof(TYPE); \
181
+ pg >>= sizeof(TYPE); \
182
+ } while (i & 15); \
183
+ } \
184
+}
185
+
186
+DO_MOVA_C(sme_mova_cz_b, uint8_t, H1)
187
+DO_MOVA_C(sme_mova_cz_h, uint16_t, H1_2)
188
+DO_MOVA_C(sme_mova_cz_s, uint32_t, H1_4)
189
+
190
+void HELPER(sme_mova_cz_d)(void *za, void *vn, void *vg, uint32_t desc)
191
+{
192
+ int i, oprsz = simd_oprsz(desc) / 8;
193
+ uint8_t *pg = vg;
194
+ uint64_t *n = vn;
195
+ uint64_t *a = za;
196
+
197
+ for (i = 0; i < oprsz; i++) {
198
+ if (pg[H1(i)] & 1) {
199
+ a[tile_vslice_index(i)] = n[i];
200
+ }
201
+ }
202
+}
203
+
204
+void HELPER(sme_mova_cz_q)(void *za, void *vn, void *vg, uint32_t desc)
205
+{
206
+ int i, oprsz = simd_oprsz(desc) / 16;
207
+ uint16_t *pg = vg;
208
+ Int128 *n = vn;
209
+ Int128 *a = za;
210
+
211
+ /*
212
+ * Int128 is used here simply to copy 16 bytes, and to simplify
213
+ * the address arithmetic.
214
+ */
215
+ for (i = 0; i < oprsz; i++) {
216
+ if (pg[H2(i)] & 1) {
217
+ a[tile_vslice_index(i)] = n[i];
218
+ }
219
+ }
220
+}
221
+
222
+#undef DO_MOVA_C
223
+
224
+/*
225
+ * Move ZArray column to Zreg vector.
226
+ */
227
+#define DO_MOVA_Z(NAME, TYPE, H) \
228
+void HELPER(NAME)(void *vd, void *za, void *vg, uint32_t desc) \
229
+{ \
230
+ int i, oprsz = simd_oprsz(desc); \
231
+ for (i = 0; i < oprsz; ) { \
232
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
233
+ do { \
234
+ if (pg & 1) { \
235
+ *(TYPE *)(vd + H(i)) = *(TYPE *)(za + tile_vslice_offset(i)); \
236
+ } \
237
+ i += sizeof(TYPE); \
238
+ pg >>= sizeof(TYPE); \
239
+ } while (i & 15); \
240
+ } \
241
+}
242
+
243
+DO_MOVA_Z(sme_mova_zc_b, uint8_t, H1)
244
+DO_MOVA_Z(sme_mova_zc_h, uint16_t, H1_2)
245
+DO_MOVA_Z(sme_mova_zc_s, uint32_t, H1_4)
246
+
247
+void HELPER(sme_mova_zc_d)(void *vd, void *za, void *vg, uint32_t desc)
248
+{
249
+ int i, oprsz = simd_oprsz(desc) / 8;
250
+ uint8_t *pg = vg;
251
+ uint64_t *d = vd;
252
+ uint64_t *a = za;
253
+
254
+ for (i = 0; i < oprsz; i++) {
255
+ if (pg[H1(i)] & 1) {
256
+ d[i] = a[tile_vslice_index(i)];
257
+ }
258
+ }
259
+}
260
+
261
+void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc)
262
+{
263
+ int i, oprsz = simd_oprsz(desc) / 16;
264
+ uint16_t *pg = vg;
265
+ Int128 *d = vd;
266
+ Int128 *a = za;
267
+
268
+ /*
269
+ * Int128 is used here simply to copy 16 bytes, and to simplify
270
+ * the address arithmetic.
271
+ */
272
+ for (i = 0; i < oprsz; i++, za += sizeof(ARMVectorReg)) {
273
+ if (pg[H2(i)] & 1) {
274
+ d[i] = a[tile_vslice_index(i)];
275
+ }
276
+ }
277
+}
278
+
279
+#undef DO_MOVA_Z
280
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
281
index XXXXXXX..XXXXXXX 100644
282
--- a/target/arm/sve_helper.c
283
+++ b/target/arm/sve_helper.c
284
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm,
285
}
286
}
287
288
+void HELPER(sve_sel_zpzz_q)(void *vd, void *vn, void *vm,
289
+ void *vg, uint32_t desc)
290
+{
291
+ intptr_t i, opr_sz = simd_oprsz(desc) / 16;
292
+ Int128 *d = vd, *n = vn, *m = vm;
293
+ uint16_t *pg = vg;
294
+
295
+ for (i = 0; i < opr_sz; i += 1) {
296
+ d[i] = (pg[H2(i)] & 1 ? n : m)[i];
297
+ }
298
+}
299
+
300
/* Two operand comparison controlled by a predicate.
301
* ??? It is very tempting to want to be able to expand this inline
302
* with x86 instructions, e.g.
303
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
304
index XXXXXXX..XXXXXXX 100644
305
--- a/target/arm/translate-sme.c
306
+++ b/target/arm/translate-sme.c
307
@@ -XXX,XX +XXX,XX @@
308
#include "decode-sme.c.inc"
309
310
311
+/*
312
+ * Resolve tile.size[index] to a host pointer, where tile and index
313
+ * are always decoded together, dependent on the element size.
314
+ */
315
+static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
316
+ int tile_index, bool vertical)
317
+{
318
+ int tile = tile_index >> (4 - esz);
319
+ int index = esz == MO_128 ? 0 : extract32(tile_index, 0, 4 - esz);
320
+ int pos, len, offset;
321
+ TCGv_i32 tmp;
322
+ TCGv_ptr addr;
323
+
324
+ /* Compute the final index, which is Rs+imm. */
325
+ tmp = tcg_temp_new_i32();
326
+ tcg_gen_trunc_tl_i32(tmp, cpu_reg(s, rs));
327
+ tcg_gen_addi_i32(tmp, tmp, index);
328
+
329
+ /* Prepare a power-of-two modulo via extraction of @len bits. */
330
+ len = ctz32(streaming_vec_reg_size(s)) - esz;
331
+
332
+ if (vertical) {
333
+ /*
334
+ * Compute the byte offset of the index within the tile:
335
+ * (index % (svl / size)) * size
336
+ * = (index % (svl >> esz)) << esz
337
+ * Perform the power-of-two modulo via extraction of the low @len bits.
338
+ * Perform the multiply by shifting left by @pos bits.
339
+ * Perform these operations simultaneously via deposit into zero.
340
+ */
341
+ pos = esz;
342
+ tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
343
+
344
+ /*
345
+ * For big-endian, adjust the indexed column byte offset within
346
+ * the uint64_t host words that make up env->zarray[].
347
+ */
348
+ if (HOST_BIG_ENDIAN && esz < MO_64) {
349
+ tcg_gen_xori_i32(tmp, tmp, 8 - (1 << esz));
350
+ }
351
+ } else {
352
+ /*
353
+ * Compute the byte offset of the index within the tile:
354
+ * (index % (svl / size)) * (size * sizeof(row))
355
+ * = (index % (svl >> esz)) << (esz + log2(sizeof(row)))
356
+ */
357
+ pos = esz + ctz32(sizeof(ARMVectorReg));
358
+ tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
359
+
360
+ /* Row slices are always aligned and need no endian adjustment. */
361
+ }
362
+
363
+ /* The tile byte offset within env->zarray is the row. */
364
+ offset = tile * sizeof(ARMVectorReg);
365
+
366
+ /* Include the byte offset of zarray to make this relative to env. */
367
+ offset += offsetof(CPUARMState, zarray);
368
+ tcg_gen_addi_i32(tmp, tmp, offset);
369
+
370
+ /* Add the byte offset to env to produce the final pointer. */
371
+ addr = tcg_temp_new_ptr();
372
+ tcg_gen_ext_i32_ptr(addr, tmp);
373
+ tcg_temp_free_i32(tmp);
374
+ tcg_gen_add_ptr(addr, addr, cpu_env);
375
+
376
+ return addr;
377
+}
378
+
379
static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
380
{
381
if (!dc_isar_feature(aa64_sme, s)) {
382
@@ -XXX,XX +XXX,XX @@ static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
383
}
384
return true;
385
}
386
+
387
+static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
388
+{
389
+ static gen_helper_gvec_4 * const h_fns[5] = {
390
+ gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
391
+ gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d,
392
+ gen_helper_sve_sel_zpzz_q
393
+ };
394
+ static gen_helper_gvec_3 * const cz_fns[5] = {
395
+ gen_helper_sme_mova_cz_b, gen_helper_sme_mova_cz_h,
396
+ gen_helper_sme_mova_cz_s, gen_helper_sme_mova_cz_d,
397
+ gen_helper_sme_mova_cz_q,
398
+ };
399
+ static gen_helper_gvec_3 * const zc_fns[5] = {
400
+ gen_helper_sme_mova_zc_b, gen_helper_sme_mova_zc_h,
401
+ gen_helper_sme_mova_zc_s, gen_helper_sme_mova_zc_d,
402
+ gen_helper_sme_mova_zc_q,
403
+ };
404
+
405
+ TCGv_ptr t_za, t_zr, t_pg;
406
+ TCGv_i32 t_desc;
407
+ int svl;
408
+
409
+ if (!dc_isar_feature(aa64_sme, s)) {
410
+ return false;
411
+ }
412
+ if (!sme_smza_enabled_check(s)) {
413
+ return true;
414
+ }
415
+
416
+ t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
417
+ t_zr = vec_full_reg_ptr(s, a->zr);
418
+ t_pg = pred_full_reg_ptr(s, a->pg);
419
+
420
+ svl = streaming_vec_reg_size(s);
421
+ t_desc = tcg_constant_i32(simd_desc(svl, svl, 0));
422
+
423
+ if (a->v) {
424
+ /* Vertical slice -- use sme mova helpers. */
425
+ if (a->to_vec) {
426
+ zc_fns[a->esz](t_zr, t_za, t_pg, t_desc);
427
+ } else {
428
+ cz_fns[a->esz](t_za, t_zr, t_pg, t_desc);
429
+ }
430
+ } else {
431
+ /* Horizontal slice -- reuse sve sel helpers. */
432
+ if (a->to_vec) {
433
+ h_fns[a->esz](t_zr, t_za, t_zr, t_pg, t_desc);
434
+ } else {
435
+ h_fns[a->esz](t_za, t_zr, t_za, t_pg, t_desc);
436
+ }
437
+ }
438
+
439
+ tcg_temp_free_ptr(t_za);
440
+ tcg_temp_free_ptr(t_zr);
441
+ tcg_temp_free_ptr(t_pg);
442
+
443
+ return true;
444
+}
445
--
71
--
446
2.25.1
72
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
In the A32 decoder, use FPST_A32_F16 rather than FPST_FPCR_F16.
2
By doing an automated conversion of the whole file we avoid possibly
3
using more than one fpst value in a set_rmode/op/restore_rmode
4
sequence.
2
5
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Patch created with
4
Message-id: 20220708151540.18136-27-richard.henderson@linaro.org
7
perl -p -i -e 's/FPST_FPCR_F16(?!_)/FPST_A32_F16/g' target/arm/tcg/translate-vfp.c
8
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20250124162836.2332150-17-peter.maydell@linaro.org
7
---
12
---
8
target/arm/helper-sme.h | 2 ++
13
target/arm/tcg/translate-vfp.c | 24 ++++++++++++------------
9
target/arm/sme.decode | 1 +
14
1 file changed, 12 insertions(+), 12 deletions(-)
10
target/arm/sme_helper.c | 74 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 1 +
12
4 files changed, 78 insertions(+)
13
15
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
16
diff --git a/target/arm/tcg/translate-vfp.c b/target/arm/tcg/translate-vfp.c
15
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-sme.h
18
--- a/target/arm/tcg/translate-vfp.c
17
+++ b/target/arm/helper-sme.h
19
+++ b/target/arm/tcg/translate-vfp.c
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
20
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
19
DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
21
}
20
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
22
21
23
if (sz == 1) {
22
+DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG,
24
- fpst = fpstatus_ptr(FPST_FPCR_F16);
23
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
25
+ fpst = fpstatus_ptr(FPST_A32_F16);
24
DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
26
} else {
25
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
27
fpst = fpstatus_ptr(FPST_A32);
26
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
28
}
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
29
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
28
index XXXXXXX..XXXXXXX 100644
30
}
29
--- a/target/arm/sme.decode
31
30
+++ b/target/arm/sme.decode
32
if (sz == 1) {
31
@@ -XXX,XX +XXX,XX @@ FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
33
- fpst = fpstatus_ptr(FPST_FPCR_F16);
32
FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
34
+ fpst = fpstatus_ptr(FPST_A32_F16);
33
35
} else {
34
BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
36
fpst = fpstatus_ptr(FPST_A32);
35
+FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32
37
}
36
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
38
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
37
index XXXXXXX..XXXXXXX 100644
39
/*
38
--- a/target/arm/sme_helper.c
40
* Do a half-precision operation. Functionally this is
39
+++ b/target/arm/sme_helper.c
41
* the same as do_vfp_3op_sp(), except:
40
@@ -XXX,XX +XXX,XX @@ static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
42
- * - it uses the FPST_FPCR_F16
41
return pair;
43
+ * - it uses the FPST_A32_F16
44
* - it doesn't need the VFP vector handling (fp16 is a
45
* v8 feature, and in v8 VFP vectors don't exist)
46
* - it does the aa32_fp16_arith feature test
47
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
48
f0 = tcg_temp_new_i32();
49
f1 = tcg_temp_new_i32();
50
fd = tcg_temp_new_i32();
51
- fpst = fpstatus_ptr(FPST_FPCR_F16);
52
+ fpst = fpstatus_ptr(FPST_A32_F16);
53
54
vfp_load_reg16(f0, vn);
55
vfp_load_reg16(f1, vm);
56
@@ -XXX,XX +XXX,XX @@ static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
57
/* VFNMA, VFNMS */
58
gen_vfp_negh(vd, vd);
59
}
60
- fpst = fpstatus_ptr(FPST_FPCR_F16);
61
+ fpst = fpstatus_ptr(FPST_A32_F16);
62
gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
63
vfp_store_reg32(vd, a->vd);
64
return true;
65
@@ -XXX,XX +XXX,XX @@ DO_VFP_2OP(VNEG, dp, gen_vfp_negd, aa32_fpdp_v2)
66
67
static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
68
{
69
- gen_helper_vfp_sqrth(vd, vm, fpstatus_ptr(FPST_FPCR_F16));
70
+ gen_helper_vfp_sqrth(vd, vm, fpstatus_ptr(FPST_A32_F16));
42
}
71
}
43
72
44
+static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2,
73
static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
45
+ float_status *s_std, float_status *s_odd)
74
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
46
+{
75
47
+ float64 e1r = float16_to_float64(e1 & 0xffff, true, s_std);
76
tmp = tcg_temp_new_i32();
48
+ float64 e1c = float16_to_float64(e1 >> 16, true, s_std);
77
vfp_load_reg16(tmp, a->vm);
49
+ float64 e2r = float16_to_float64(e2 & 0xffff, true, s_std);
78
- fpst = fpstatus_ptr(FPST_FPCR_F16);
50
+ float64 e2c = float16_to_float64(e2 >> 16, true, s_std);
79
+ fpst = fpstatus_ptr(FPST_A32_F16);
51
+ float64 t64;
80
gen_helper_rinth(tmp, tmp, fpst);
52
+ float32 t32;
81
vfp_store_reg32(tmp, a->vd);
53
+
54
+ /*
55
+ * The ARM pseudocode function FPDot performs both multiplies
56
+ * and the add with a single rounding operation. Emulate this
57
+ * by performing the first multiply in round-to-odd, then doing
58
+ * the second multiply as fused multiply-add, and rounding to
59
+ * float32 all in one step.
60
+ */
61
+ t64 = float64_mul(e1r, e2r, s_odd);
62
+ t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std);
63
+
64
+ /* This conversion is exact, because we've already rounded. */
65
+ t32 = float64_to_float32(t64, s_std);
66
+
67
+ /* The final accumulation step is not fused. */
68
+ return float32_add(sum, t32, s_std);
69
+}
70
+
71
+void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
72
+ void *vpm, void *vst, uint32_t desc)
73
+{
74
+ intptr_t row, col, oprsz = simd_maxsz(desc);
75
+ uint32_t neg = simd_data(desc) * 0x80008000u;
76
+ uint16_t *pn = vpn, *pm = vpm;
77
+ float_status fpst_odd, fpst_std;
78
+
79
+ /*
80
+ * Make a copy of float_status because this operation does not
81
+ * update the cumulative fp exception status. It also produces
82
+ * default nans. Make a second copy with round-to-odd -- see above.
83
+ */
84
+ fpst_std = *(float_status *)vst;
85
+ set_default_nan_mode(true, &fpst_std);
86
+ fpst_odd = fpst_std;
87
+ set_float_rounding_mode(float_round_to_odd, &fpst_odd);
88
+
89
+ for (row = 0; row < oprsz; ) {
90
+ uint16_t prow = pn[H2(row >> 4)];
91
+ do {
92
+ void *vza_row = vza + tile_vslice_offset(row);
93
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row));
94
+
95
+ n = f16mop_adj_pair(n, prow, neg);
96
+
97
+ for (col = 0; col < oprsz; ) {
98
+ uint16_t pcol = pm[H2(col >> 4)];
99
+ do {
100
+ if (prow & pcol & 0b0101) {
101
+ uint32_t *a = vza_row + H1_4(col);
102
+ uint32_t m = *(uint32_t *)(vzm + H1_4(col));
103
+
104
+ m = f16mop_adj_pair(m, pcol, 0);
105
+ *a = f16_dotadd(*a, n, m, &fpst_std, &fpst_odd);
106
+
107
+ col += 4;
108
+ pcol >>= 4;
109
+ }
110
+ } while (col & 15);
111
+ }
112
+ row += 4;
113
+ prow >>= 4;
114
+ } while (row & 15);
115
+ }
116
+}
117
+
118
void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
119
void *vpm, uint32_t desc)
120
{
121
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
122
index XXXXXXX..XXXXXXX 100644
123
--- a/target/arm/translate-sme.c
124
+++ b/target/arm/translate-sme.c
125
@@ -XXX,XX +XXX,XX @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
126
return true;
82
return true;
127
}
83
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
128
84
129
+TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h)
85
tmp = tcg_temp_new_i32();
130
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
86
vfp_load_reg16(tmp, a->vm);
131
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
87
- fpst = fpstatus_ptr(FPST_FPCR_F16);
88
+ fpst = fpstatus_ptr(FPST_A32_F16);
89
tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
90
gen_helper_rinth(tmp, tmp, fpst);
91
gen_restore_rmode(tcg_rmode, fpst);
92
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
93
94
tmp = tcg_temp_new_i32();
95
vfp_load_reg16(tmp, a->vm);
96
- fpst = fpstatus_ptr(FPST_FPCR_F16);
97
+ fpst = fpstatus_ptr(FPST_A32_F16);
98
gen_helper_rinth_exact(tmp, tmp, fpst);
99
vfp_store_reg32(tmp, a->vd);
100
return true;
101
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
102
103
vm = tcg_temp_new_i32();
104
vfp_load_reg32(vm, a->vm);
105
- fpst = fpstatus_ptr(FPST_FPCR_F16);
106
+ fpst = fpstatus_ptr(FPST_A32_F16);
107
if (a->s) {
108
/* i32 -> f16 */
109
gen_helper_vfp_sitoh(vm, vm, fpst);
110
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
111
vd = tcg_temp_new_i32();
112
vfp_load_reg32(vd, a->vd);
113
114
- fpst = fpstatus_ptr(FPST_FPCR_F16);
115
+ fpst = fpstatus_ptr(FPST_A32_F16);
116
shift = tcg_constant_i32(frac_bits);
117
118
/* Switch on op:U:sx bits */
119
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
120
return true;
121
}
122
123
- fpst = fpstatus_ptr(FPST_FPCR_F16);
124
+ fpst = fpstatus_ptr(FPST_A32_F16);
125
vm = tcg_temp_new_i32();
126
vfp_load_reg16(vm, a->vm);
132
127
133
--
128
--
134
2.25.1
129
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
In the A32 decoder, use FPST_A64_F16 rather than FPST_FPCR_F16.
2
By doing an automated conversion of the whole file we avoid possibly
3
using more than one fpst value in a set_rmode/op/restore_rmode
4
sequence.
2
5
3
Mark these as a non-streaming instructions, which should trap
6
Patch created with
4
if full a64 support is not enabled in streaming mode.
7
perl -p -i -e 's/FPST_FPCR_F16(?!_)/FPST_A64_F16/g' target/arm/tcg/translate-{a64,sve,sme}.c
5
8
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-9-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20250124162836.2332150-18-peter.maydell@linaro.org
10
---
12
---
11
target/arm/sme-fa64.decode | 3 ---
13
target/arm/tcg/translate-a64.c | 32 ++++++++---------
12
target/arm/translate-sve.c | 15 +++++++++++----
14
target/arm/tcg/translate-sve.c | 66 +++++++++++++++++-----------------
13
2 files changed, 11 insertions(+), 7 deletions(-)
15
2 files changed, 49 insertions(+), 49 deletions(-)
14
16
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
17
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
16
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
19
--- a/target/arm/tcg/translate-a64.c
18
+++ b/target/arm/sme-fa64.decode
20
+++ b/target/arm/tcg/translate-a64.c
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
21
@@ -XXX,XX +XXX,XX @@ static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
22
int rm, bool is_fp16, int data,
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
23
gen_helper_gvec_3_ptr *fn)
22
24
{
23
-FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
25
- TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_A64);
24
-FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
26
+ TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64);
25
-FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
27
tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
26
FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
28
vec_full_reg_offset(s, rn),
27
FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
29
vec_full_reg_offset(s, rm), fpst,
28
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
30
@@ -XXX,XX +XXX,XX @@ static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
29
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
31
int rm, int ra, bool is_fp16, int data,
32
gen_helper_gvec_4_ptr *fn)
33
{
34
- TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_A64);
35
+ TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64);
36
tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
37
vec_full_reg_offset(s, rn),
38
vec_full_reg_offset(s, rm),
39
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
40
if (fp_access_check(s)) {
41
TCGv_i32 t0 = read_fp_hreg(s, a->rn);
42
TCGv_i32 t1 = read_fp_hreg(s, a->rm);
43
- f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
44
+ f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
45
write_fp_sreg(s, a->rd, t0);
46
}
47
break;
48
@@ -XXX,XX +XXX,XX @@ static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
49
TCGv_i32 t0 = read_fp_hreg(s, a->rn);
50
TCGv_i32 t1 = tcg_constant_i32(0);
51
if (swap) {
52
- f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_FPCR_F16));
53
+ f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16));
54
} else {
55
- f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
56
+ f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
57
}
58
write_fp_sreg(s, a->rd, t0);
59
}
60
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
61
TCGv_i32 t1 = tcg_temp_new_i32();
62
63
read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
64
- f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
65
+ f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
66
write_fp_sreg(s, a->rd, t0);
67
}
68
break;
69
@@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
70
gen_vfp_negh(t1, t1);
71
}
72
gen_helper_advsimd_muladdh(t0, t1, t2, t0,
73
- fpstatus_ptr(FPST_FPCR_F16));
74
+ fpstatus_ptr(FPST_A64_F16));
75
write_fp_sreg(s, a->rd, t0);
76
}
77
break;
78
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
79
80
read_vec_element_i32(s, t0, a->rn, 0, MO_16);
81
read_vec_element_i32(s, t1, a->rn, 1, MO_16);
82
- f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
83
+ f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
84
write_fp_sreg(s, a->rd, t0);
85
}
86
break;
87
@@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
88
if (neg_n) {
89
gen_vfp_negh(tn, tn);
90
}
91
- fpst = fpstatus_ptr(FPST_FPCR_F16);
92
+ fpst = fpstatus_ptr(FPST_A64_F16);
93
gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
94
write_fp_sreg(s, a->rd, ta);
95
}
96
@@ -XXX,XX +XXX,XX @@ static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
97
if (fp_access_check(s)) {
98
MemOp esz = a->esz;
99
int elts = (a->q ? 16 : 8) >> esz;
100
- TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
101
+ TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
102
TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn);
103
write_fp_sreg(s, a->rd, res);
104
}
105
@@ -XXX,XX +XXX,XX @@ static void handle_fp_compare(DisasContext *s, int size,
106
bool cmp_with_zero, bool signal_all_nans)
107
{
108
TCGv_i64 tcg_flags = tcg_temp_new_i64();
109
- TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_A64);
110
+ TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64);
111
112
if (size == MO_64) {
113
TCGv_i64 tcg_vn, tcg_vm;
114
@@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
115
return check == 0;
116
}
117
118
- fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
119
+ fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
120
if (rmode >= 0) {
121
tcg_rmode = gen_set_rmode(rmode, fpst);
122
}
123
@@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
124
TCGv_i32 tcg_shift, tcg_single;
125
TCGv_i64 tcg_double;
126
127
- tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
128
+ tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
129
tcg_shift = tcg_constant_i32(shift);
130
131
switch (esz) {
132
@@ -XXX,XX +XXX,XX @@ static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz,
133
TCGv_ptr tcg_fpstatus;
134
TCGv_i32 tcg_shift, tcg_rmode, tcg_single;
135
136
- tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
137
+ tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
138
tcg_shift = tcg_constant_i32(shift);
139
tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
140
141
@@ -XXX,XX +XXX,XX @@ static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a,
142
return check == 0;
143
}
144
145
- fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
146
+ fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
147
if (rmode >= 0) {
148
tcg_rmode = gen_set_rmode(rmode, fpst);
149
}
150
@@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
151
return check == 0;
152
}
153
154
- fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
155
+ fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
156
tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
157
vec_full_reg_offset(s, rn), fpst,
158
is_q ? 16 : 8, vec_full_reg_size(s),
159
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
30
index XXXXXXX..XXXXXXX 100644
160
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate-sve.c
161
--- a/target/arm/tcg/translate-sve.c
32
+++ b/target/arm/translate-sve.c
162
+++ b/target/arm/tcg/translate-sve.c
163
@@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
164
arg_rr_esz *a, int data)
165
{
166
return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data,
167
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
168
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
169
}
170
171
/* Invoke an out-of-line helper on 3 Zregs. */
172
@@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
173
arg_rrr_esz *a, int data)
174
{
175
return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
176
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
177
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
178
}
179
180
/* Invoke an out-of-line helper on 4 Zregs. */
181
@@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
182
arg_rprr_esz *a)
183
{
184
return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0,
185
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
186
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
187
}
188
189
/* Invoke a vector expander on two Zregs and an immediate. */
190
@@ -XXX,XX +XXX,XX @@ static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
191
};
192
return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
193
(a->index << 1) | sub,
194
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
195
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
196
}
197
198
TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
199
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = {
200
};
201
TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz,
202
fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index,
203
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64)
204
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
205
206
/*
207
*** SVE Floating Point Fast Reduction Group
208
@@ -XXX,XX +XXX,XX @@ static bool do_reduce(DisasContext *s, arg_rpr_esz *a,
209
210
tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn));
211
tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
212
- status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
213
+ status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
214
215
fn(temp, t_zn, t_pg, status, t_desc);
216
217
@@ -XXX,XX +XXX,XX @@ static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
218
if (sve_access_check(s)) {
219
unsigned vsz = vec_full_reg_size(s);
220
TCGv_ptr status =
221
- fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
222
+ fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
223
224
tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
225
vec_full_reg_offset(s, a->rn),
33
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
226
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
34
NULL, gen_helper_sve_ftmad_h,
227
};
35
gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
228
TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
36
};
229
ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
37
-TRANS_FEAT(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
230
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64)
38
- ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
231
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
39
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
40
+TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
41
+ ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
42
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
43
232
44
/*
233
/*
45
*** SVE Floating Point Accumulating Reduction Group
234
*** SVE Floating Point Accumulating Reduction Group
46
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
235
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
47
if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
236
t_pg = tcg_temp_new_ptr();
48
return false;
237
tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm));
49
}
238
tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
50
+ s->is_nonstreaming = true;
239
- t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
51
if (!sve_access_check(s)) {
240
+ t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
52
return true;
241
t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
53
}
242
54
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
243
fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
55
DO_FP3(FADD_zzz, fadd)
244
@@ -XXX,XX +XXX,XX @@ static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
56
DO_FP3(FSUB_zzz, fsub)
245
tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn));
57
DO_FP3(FMUL_zzz, fmul)
246
tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
58
-DO_FP3(FTSMUL, ftsmul)
247
59
DO_FP3(FRECPS, recps)
248
- status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_A64);
60
DO_FP3(FRSQRTS, rsqrts)
249
+ status = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64);
61
250
desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
62
#undef DO_FP3
251
fn(t_zd, t_zn, t_pg, scalar, status, desc);
63
252
}
64
+static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = {
253
@@ -XXX,XX +XXX,XX @@ static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
65
+ NULL, gen_helper_gvec_ftsmul_h,
254
}
66
+ gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d
255
if (sve_access_check(s)) {
67
+};
256
unsigned vsz = vec_full_reg_size(s);
68
+TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
257
- TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
69
+ ftsmul_fns[a->esz], a, 0)
258
+ TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
70
+
259
tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
260
vec_full_reg_offset(s, a->rn),
261
vec_full_reg_offset(s, a->rm),
262
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_4_ptr * const fcadd_fns[] = {
263
};
264
TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
265
a->rd, a->rn, a->rm, a->pg, a->rot,
266
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64)
267
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
268
269
#define DO_FMLA(NAME, name) \
270
static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
271
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
272
}; \
273
TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \
274
a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
275
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64)
276
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
277
278
DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
279
DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
280
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = {
281
};
282
TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz],
283
a->rd, a->rn, a->rm, a->ra, a->pg, a->rot,
284
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64)
285
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
286
287
static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = {
288
NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL
289
};
290
TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz],
291
a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot,
292
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64)
293
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
294
71
/*
295
/*
72
*** SVE Floating Point Arithmetic - Predicated Group
296
*** SVE Floating Point Unary Operations Predicated Group
73
*/
297
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
298
gen_helper_sve_fcvt_sd, a, 0, FPST_A64)
299
300
TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
301
- gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16)
302
+ gen_helper_sve_fcvtzs_hh, a, 0, FPST_A64_F16)
303
TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
304
- gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16)
305
+ gen_helper_sve_fcvtzu_hh, a, 0, FPST_A64_F16)
306
TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
307
- gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16)
308
+ gen_helper_sve_fcvtzs_hs, a, 0, FPST_A64_F16)
309
TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
310
- gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16)
311
+ gen_helper_sve_fcvtzu_hs, a, 0, FPST_A64_F16)
312
TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
313
- gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16)
314
+ gen_helper_sve_fcvtzs_hd, a, 0, FPST_A64_F16)
315
TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
316
- gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16)
317
+ gen_helper_sve_fcvtzu_hd, a, 0, FPST_A64_F16)
318
319
TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
320
gen_helper_sve_fcvtzs_ss, a, 0, FPST_A64)
321
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frint_fns[] = {
322
gen_helper_sve_frint_d
323
};
324
TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz],
325
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64)
326
+ a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
327
328
static gen_helper_gvec_3_ptr * const frintx_fns[] = {
329
NULL,
330
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frintx_fns[] = {
331
gen_helper_sve_frintx_d
332
};
333
TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz],
334
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
335
+ a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
336
337
static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
338
ARMFPRounding mode, gen_helper_gvec_3_ptr *fn)
339
@@ -XXX,XX +XXX,XX @@ static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
340
}
341
342
vsz = vec_full_reg_size(s);
343
- status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64);
344
+ status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
345
tmode = gen_set_rmode(mode, status);
346
347
tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
348
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frecpx_fns[] = {
349
gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d,
350
};
351
TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz],
352
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64)
353
+ a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
354
355
static gen_helper_gvec_3_ptr * const fsqrt_fns[] = {
356
NULL, gen_helper_sve_fsqrt_h,
357
gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d,
358
};
359
TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz],
360
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64)
361
+ a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
362
363
TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
364
- gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16)
365
+ gen_helper_sve_scvt_hh, a, 0, FPST_A64_F16)
366
TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
367
- gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16)
368
+ gen_helper_sve_scvt_sh, a, 0, FPST_A64_F16)
369
TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
370
- gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16)
371
+ gen_helper_sve_scvt_dh, a, 0, FPST_A64_F16)
372
373
TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
374
gen_helper_sve_scvt_ss, a, 0, FPST_A64)
375
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
376
gen_helper_sve_scvt_dd, a, 0, FPST_A64)
377
378
TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
379
- gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16)
380
+ gen_helper_sve_ucvt_hh, a, 0, FPST_A64_F16)
381
TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
382
- gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16)
383
+ gen_helper_sve_ucvt_sh, a, 0, FPST_A64_F16)
384
TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
385
- gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16)
386
+ gen_helper_sve_ucvt_dh, a, 0, FPST_A64_F16)
387
388
TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
389
gen_helper_sve_ucvt_ss, a, 0, FPST_A64)
390
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const flogb_fns[] = {
391
gen_helper_flogb_s, gen_helper_flogb_d
392
};
393
TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz],
394
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_A64)
395
+ a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
396
397
static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
398
{
74
--
399
--
75
2.25.1
400
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Now we have moved all the uses of vfp.fp_status_f16 and FPST_FPCR_F16
2
to the new A32 or A64 fields, we can remove these.
2
3
3
Dump SVCR, plus use the correct access check for Streaming Mode.
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20250124162836.2332150-19-peter.maydell@linaro.org
7
---
8
target/arm/cpu.h | 2 --
9
target/arm/tcg/translate.h | 6 ------
10
target/arm/cpu.c | 1 -
11
target/arm/vfp_helper.c | 7 -------
12
4 files changed, 16 deletions(-)
4
13
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
14
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
index XXXXXXX..XXXXXXX 100644
7
Message-id: 20220708151540.18136-2-richard.henderson@linaro.org
16
--- a/target/arm/cpu.h
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
+++ b/target/arm/cpu.h
9
---
18
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
10
target/arm/cpu.c | 17 ++++++++++++++++-
19
*
11
1 file changed, 16 insertions(+), 1 deletion(-)
20
* fp_status_a32: is the "normal" fp status for AArch32 insns
12
21
* fp_status_a64: is the "normal" fp status for AArch64 insns
22
- * fp_status_fp16: used for half-precision calculations
23
* fp_status_fp16_a32: used for AArch32 half-precision calculations
24
* fp_status_fp16_a64: used for AArch64 half-precision calculations
25
* standard_fp_status : the ARM "Standard FPSCR Value"
26
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
27
*/
28
float_status fp_status_a32;
29
float_status fp_status_a64;
30
- float_status fp_status_f16;
31
float_status fp_status_f16_a32;
32
float_status fp_status_f16_a64;
33
float_status standard_fp_status;
34
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/tcg/translate.h
37
+++ b/target/arm/tcg/translate.h
38
@@ -XXX,XX +XXX,XX @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb)
39
typedef enum ARMFPStatusFlavour {
40
FPST_A32,
41
FPST_A64,
42
- FPST_FPCR_F16,
43
FPST_A32_F16,
44
FPST_A64_F16,
45
FPST_STD,
46
@@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour {
47
* for AArch32 non-FP16 operations controlled by the FPCR
48
* FPST_A64
49
* for AArch64 non-FP16 operations controlled by the FPCR
50
- * FPST_FPCR_F16
51
- * for operations controlled by the FPCR where FPCR.FZ16 is to be used
52
* FPST_A32_F16
53
* for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used
54
* FPST_A64_F16
55
@@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
56
case FPST_A64:
57
offset = offsetof(CPUARMState, vfp.fp_status_a64);
58
break;
59
- case FPST_FPCR_F16:
60
- offset = offsetof(CPUARMState, vfp.fp_status_f16);
61
- break;
62
case FPST_A32_F16:
63
offset = offsetof(CPUARMState, vfp.fp_status_f16_a32);
64
break;
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
65
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
14
index XXXXXXX..XXXXXXX 100644
66
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/cpu.c
67
--- a/target/arm/cpu.c
16
+++ b/target/arm/cpu.c
68
+++ b/target/arm/cpu.c
17
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
69
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
18
int i;
70
arm_set_default_fp_behaviours(&env->vfp.fp_status_a32);
19
int el = arm_current_el(env);
71
arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
20
const char *ns_status;
72
arm_set_default_fp_behaviours(&env->vfp.standard_fp_status);
21
+ bool sve;
73
- arm_set_default_fp_behaviours(&env->vfp.fp_status_f16);
22
74
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32);
23
qemu_fprintf(f, " PC=%016" PRIx64 " ", env->pc);
75
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
24
for (i = 0; i < 32; i++) {
76
arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16);
25
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
77
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
26
el,
78
index XXXXXXX..XXXXXXX 100644
27
psr & PSTATE_SP ? 'h' : 't');
79
--- a/target/arm/vfp_helper.c
28
80
+++ b/target/arm/vfp_helper.c
29
+ if (cpu_isar_feature(aa64_sme, cpu)) {
81
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
30
+ qemu_fprintf(f, " SVCR=%08" PRIx64 " %c%c",
82
i |= get_float_exception_flags(&env->vfp.fp_status_a64);
31
+ env->svcr,
83
i |= get_float_exception_flags(&env->vfp.standard_fp_status);
32
+ (FIELD_EX64(env->svcr, SVCR, ZA) ? 'Z' : '-'),
84
/* FZ16 does not generate an input denormal exception. */
33
+ (FIELD_EX64(env->svcr, SVCR, SM) ? 'S' : '-'));
85
- i |= (get_float_exception_flags(&env->vfp.fp_status_f16)
34
+ }
86
- & ~float_flag_input_denormal);
35
if (cpu_isar_feature(aa64_bti, cpu)) {
87
i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32)
36
qemu_fprintf(f, " BTYPE=%d", (psr & PSTATE_BTYPE) >> 10);
88
& ~float_flag_input_denormal);
89
i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
90
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
91
*/
92
set_float_exception_flags(0, &env->vfp.fp_status_a32);
93
set_float_exception_flags(0, &env->vfp.fp_status_a64);
94
- set_float_exception_flags(0, &env->vfp.fp_status_f16);
95
set_float_exception_flags(0, &env->vfp.fp_status_f16_a32);
96
set_float_exception_flags(0, &env->vfp.fp_status_f16_a64);
97
set_float_exception_flags(0, &env->vfp.standard_fp_status);
98
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
99
}
100
set_float_rounding_mode(i, &env->vfp.fp_status_a32);
101
set_float_rounding_mode(i, &env->vfp.fp_status_a64);
102
- set_float_rounding_mode(i, &env->vfp.fp_status_f16);
103
set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32);
104
set_float_rounding_mode(i, &env->vfp.fp_status_f16_a64);
37
}
105
}
38
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
106
if (changed & FPCR_FZ16) {
39
qemu_fprintf(f, " FPCR=%08x FPSR=%08x\n",
107
bool ftz_enabled = val & FPCR_FZ16;
40
vfp_get_fpcr(env), vfp_get_fpsr(env));
108
- set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
41
109
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
42
- if (cpu_isar_feature(aa64_sve, cpu) && sve_exception_el(env, el) == 0) {
110
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
43
+ if (cpu_isar_feature(aa64_sme, cpu) && FIELD_EX64(env->svcr, SVCR, SM)) {
111
set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
44
+ sve = sme_exception_el(env, el) == 0;
112
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
45
+ } else if (cpu_isar_feature(aa64_sve, cpu)) {
113
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
46
+ sve = sve_exception_el(env, el) == 0;
114
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
47
+ } else {
115
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
48
+ sve = false;
116
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
49
+ }
117
bool dnan_enabled = val & FPCR_DN;
50
+
118
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32);
51
+ if (sve) {
119
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64);
52
int j, zcr_len = sve_vqm1_for_el(env, el);
120
- set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16);
53
121
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
54
for (i = 0; i <= FFR_PRED_NUM; i++) {
122
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64);
123
}
55
--
124
--
56
2.25.1
125
2.34.1
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
Mark ADR as a non-streaming instruction, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Removing entries from sme-fa64.decode is an easy way to see
7
what remains to be done.
8
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20220708151540.18136-5-richard.henderson@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
14
target/arm/translate.h | 7 +++++++
15
target/arm/sme-fa64.decode | 1 -
16
target/arm/translate-sve.c | 8 ++++----
17
3 files changed, 11 insertions(+), 5 deletions(-)
18
19
diff --git a/target/arm/translate.h b/target/arm/translate.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/translate.h
22
+++ b/target/arm/translate.h
23
@@ -XXX,XX +XXX,XX @@ uint64_t asimd_imm_const(uint32_t imm, int cmode, int op);
24
static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
25
{ return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); }
26
27
+#define TRANS_FEAT_NONSTREAMING(NAME, FEAT, FUNC, ...) \
28
+ static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
29
+ { \
30
+ s->is_nonstreaming = true; \
31
+ return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); \
32
+ }
33
+
34
#endif /* TARGET_ARM_TRANSLATE_H */
35
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
36
index XXXXXXX..XXXXXXX 100644
37
--- a/target/arm/sme-fa64.decode
38
+++ b/target/arm/sme-fa64.decode
39
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
40
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
41
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
42
43
-FAIL 0000 0100 --1- ---- 1010 ---- ---- ---- # ADR
44
FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
45
FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
46
FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
47
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/translate-sve.c
50
+++ b/target/arm/translate-sve.c
51
@@ -XXX,XX +XXX,XX @@ static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
52
return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
53
}
54
55
-TRANS_FEAT(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
56
-TRANS_FEAT(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
57
-TRANS_FEAT(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
58
-TRANS_FEAT(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
59
+TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
60
+TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
61
+TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
62
+TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
63
64
/*
65
*** SVE Integer Misc - Unpredicated Group
66
--
67
2.25.1
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-6-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 2 --
12
target/arm/translate-sve.c | 9 ++++++---
13
2 files changed, 6 insertions(+), 5 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
21
FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
22
FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
23
-FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
24
-FAIL 0010 0101 --10 1--- 1001 ---- ---- ---- # WRFFR, SETFFR
25
FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
26
FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
27
FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
28
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/translate-sve.c
31
+++ b/target/arm/translate-sve.c
32
@@ -XXX,XX +XXX,XX @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
33
TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
34
35
/* Note pat == 31 is #all, to set all elements. */
36
-TRANS_FEAT(SETFFR, aa64_sve, do_predset, 0, FFR_PRED_NUM, 31, false)
37
+TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve,
38
+ do_predset, 0, FFR_PRED_NUM, 31, false)
39
40
/* Note pat == 32 is #unimp, to set no elements. */
41
TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
42
@@ -XXX,XX +XXX,XX @@ static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
43
.rd = a->rd, .pg = a->pg, .s = a->s,
44
.rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
45
};
46
+
47
+ s->is_nonstreaming = true;
48
return trans_AND_pppp(s, &alt_a);
49
}
50
51
-TRANS_FEAT(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
52
-TRANS_FEAT(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
53
+TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
54
+TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
55
56
static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
57
void (*gen_fn)(TCGv_i32, TCGv_ptr,
58
--
59
2.25.1
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-7-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 3 ---
12
target/arm/translate-sve.c | 22 ++++++++++++----------
13
2 files changed, 12 insertions(+), 13 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
24
-FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
25
-FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
26
FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
27
FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
28
FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
29
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate-sve.c
32
+++ b/target/arm/translate-sve.c
33
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2 * const fexpa_fns[4] = {
34
NULL, gen_helper_sve_fexpa_h,
35
gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
36
};
37
-TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
38
- fexpa_fns[a->esz], a->rd, a->rn, 0)
39
+TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
40
+ fexpa_fns[a->esz], a->rd, a->rn, 0)
41
42
static gen_helper_gvec_3 * const ftssel_fns[4] = {
43
NULL, gen_helper_sve_ftssel_h,
44
gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
45
};
46
-TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
47
+TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
48
+ ftssel_fns[a->esz], a, 0)
49
50
/*
51
*** SVE Predicate Logical Operations Group
52
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
53
static gen_helper_gvec_3 * const compact_fns[4] = {
54
NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
55
};
56
-TRANS_FEAT(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, compact_fns[a->esz], a, 0)
57
+TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz,
58
+ compact_fns[a->esz], a, 0)
59
60
/* Call the helper that computes the ARM LastActiveElement pseudocode
61
* function, scaled by the element size. This includes the not found
62
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const bext_fns[4] = {
63
gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
64
gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
65
};
66
-TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
67
- bext_fns[a->esz], a, 0)
68
+TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
69
+ bext_fns[a->esz], a, 0)
70
71
static gen_helper_gvec_3 * const bdep_fns[4] = {
72
gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
73
gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
74
};
75
-TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
76
- bdep_fns[a->esz], a, 0)
77
+TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
78
+ bdep_fns[a->esz], a, 0)
79
80
static gen_helper_gvec_3 * const bgrp_fns[4] = {
81
gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
82
gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
83
};
84
-TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
85
- bgrp_fns[a->esz], a, 0)
86
+TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
87
+ bgrp_fns[a->esz], a, 0)
88
89
static gen_helper_gvec_3 * const cadd_fns[4] = {
90
gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
91
--
92
2.25.1
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-10-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 1 -
12
target/arm/translate-sve.c | 12 ++++++------
13
2 files changed, 6 insertions(+), 7 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
24
FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
25
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
26
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-sve.c
30
+++ b/target/arm/translate-sve.c
31
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true)
32
TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false)
33
TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true)
34
35
-TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
36
- gen_helper_gvec_smmla_b, a, 0)
37
-TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
38
- gen_helper_gvec_usmmla_b, a, 0)
39
-TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
40
- gen_helper_gvec_ummla_b, a, 0)
41
+TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
42
+ gen_helper_gvec_smmla_b, a, 0)
43
+TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
44
+ gen_helper_gvec_usmmla_b, a, 0)
45
+TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
46
+ gen_helper_gvec_ummla_b, a, 0)
47
48
TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
49
gen_helper_gvec_bfdot, a, 0)
50
--
51
2.25.1
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
Mark these as non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-11-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 1 -
12
target/arm/translate-sve.c | 35 ++++++++++++++++++-----------------
13
2 files changed, 18 insertions(+), 18 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
24
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
25
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
26
FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-sve.c
30
+++ b/target/arm/translate-sve.c
31
@@ -XXX,XX +XXX,XX @@ DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
32
static gen_helper_gvec_flags_4 * const match_fns[4] = {
33
gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
34
};
35
-TRANS_FEAT(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
36
+TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
37
38
static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
39
gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
40
};
41
-TRANS_FEAT(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
42
+TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
43
44
static gen_helper_gvec_4 * const histcnt_fns[4] = {
45
NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
46
};
47
-TRANS_FEAT(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
48
- histcnt_fns[a->esz], a, 0)
49
+TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
50
+ histcnt_fns[a->esz], a, 0)
51
52
-TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
53
- a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
54
+TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
55
+ a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
56
57
DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz)
58
DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz)
59
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
60
TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
61
a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
62
63
-TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
64
- gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
65
+TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
66
+ gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
67
68
-TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
69
- gen_helper_crypto_aese, a, false)
70
-TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
71
- gen_helper_crypto_aese, a, true)
72
+TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
73
+ gen_helper_crypto_aese, a, false)
74
+TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
75
+ gen_helper_crypto_aese, a, true)
76
77
-TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
78
- gen_helper_crypto_sm4e, a, 0)
79
-TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
80
- gen_helper_crypto_sm4ekey, a, 0)
81
+TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
82
+ gen_helper_crypto_sm4e, a, 0)
83
+TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
84
+ gen_helper_crypto_sm4ekey, a, 0)
85
86
-TRANS_FEAT(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, gen_gvec_rax1, a)
87
+TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz,
88
+ gen_gvec_rax1, a)
89
90
TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
91
gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
92
--
93
2.25.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Our float_flag_input_denormal exception flag is set when the fpu code
2
2
flushes an input denormal to zero. This is what many guest
3
These functions will be used to verify that the cpu
3
architectures (eg classic Arm behaviour) require, but it is not the
4
is in the correct state for a given instruction.
4
only donarmal-related reason we might want to set an exception flag.
5
5
The x86 behaviour (which we do not currently model correctly) wants
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
to see an exception flag when a denormal input is *not* flushed to
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
zero and is actually used in an arithmetic operation. Arm's FEAT_AFP
8
Message-id: 20220708151540.18136-16-richard.henderson@linaro.org
8
also wants these semantics.
9
10
Rename float_flag_input_denormal to float_flag_input_denormal_flushed
11
to make it clearer when it is set and to allow us to add a new
12
float_flag_input_denormal_used next to it for the x86/FEAT_AFP
13
semantics.
14
15
Commit created with
16
for f in `git grep -l float_flag_input_denormal`; do sed -i -e 's/float_flag_input_denormal/float_flag_input_denormal_flushed/' $f; done
17
18
and manual editing of softfloat-types.h and softfloat.c to clean
19
up the indentation afterwards and to fix a comment which wasn't
20
using the full name of the flag.
21
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
22
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
23
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
24
Message-id: 20250124162836.2332150-20-peter.maydell@linaro.org
10
---
25
---
11
target/arm/translate-a64.h | 21 +++++++++++++++++++++
26
include/fpu/softfloat-types.h | 5 +++--
12
target/arm/translate-a64.c | 34 ++++++++++++++++++++++++++++++++++
27
fpu/softfloat.c | 4 ++--
13
2 files changed, 55 insertions(+)
28
target/arm/tcg/sve_helper.c | 6 +++---
14
29
target/arm/vfp_helper.c | 10 +++++-----
15
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
30
target/i386/tcg/fpu_helper.c | 6 +++---
16
index XXXXXXX..XXXXXXX 100644
31
target/mips/tcg/msa_helper.c | 2 +-
17
--- a/target/arm/translate-a64.h
32
target/rx/op_helper.c | 2 +-
18
+++ b/target/arm/translate-a64.h
33
fpu/softfloat-parts.c.inc | 2 +-
19
@@ -XXX,XX +XXX,XX @@ void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v);
34
8 files changed, 19 insertions(+), 18 deletions(-)
20
bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
35
21
unsigned int imms, unsigned int immr);
36
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
22
bool sve_access_check(DisasContext *s);
37
index XXXXXXX..XXXXXXX 100644
23
+bool sme_enabled_check(DisasContext *s);
38
--- a/include/fpu/softfloat-types.h
24
+bool sme_enabled_check_with_svcr(DisasContext *s, unsigned);
39
+++ b/include/fpu/softfloat-types.h
25
+
40
@@ -XXX,XX +XXX,XX @@ enum {
26
+/* This function corresponds to CheckStreamingSVEEnabled. */
41
float_flag_overflow = 0x0004,
27
+static inline bool sme_sm_enabled_check(DisasContext *s)
42
float_flag_underflow = 0x0008,
28
+{
43
float_flag_inexact = 0x0010,
29
+ return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK);
44
- float_flag_input_denormal = 0x0020,
30
+}
45
+ /* We flushed an input denormal to 0 (because of flush_inputs_to_zero) */
31
+
46
+ float_flag_input_denormal_flushed = 0x0020,
32
+/* This function corresponds to CheckSMEAndZAEnabled. */
47
float_flag_output_denormal = 0x0040,
33
+static inline bool sme_za_enabled_check(DisasContext *s)
48
float_flag_invalid_isi = 0x0080, /* inf - inf */
34
+{
49
float_flag_invalid_imz = 0x0100, /* inf * 0 */
35
+ return sme_enabled_check_with_svcr(s, R_SVCR_ZA_MASK);
50
@@ -XXX,XX +XXX,XX @@ typedef struct float_status {
36
+}
51
bool tininess_before_rounding;
37
+
52
/* should denormalised results go to zero and set the inexact flag? */
38
+/* Note that this function corresponds to CheckStreamingSVEAndZAEnabled. */
53
bool flush_to_zero;
39
+static inline bool sme_smza_enabled_check(DisasContext *s)
54
- /* should denormalised inputs go to zero and set the input_denormal flag? */
40
+{
55
+ /* should denormalised inputs go to zero and set input_denormal_flushed? */
41
+ return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK | R_SVCR_ZA_MASK);
56
bool flush_inputs_to_zero;
42
+}
57
bool default_nan_mode;
43
+
58
/*
44
TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr);
59
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
45
TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
60
index XXXXXXX..XXXXXXX 100644
46
bool tag_checked, int log2_size);
61
--- a/fpu/softfloat.c
47
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
62
+++ b/fpu/softfloat.c
48
index XXXXXXX..XXXXXXX 100644
63
@@ -XXX,XX +XXX,XX @@ this code that are retained.
49
--- a/target/arm/translate-a64.c
64
if (unlikely(soft_t ## _is_denormal(*a))) { \
50
+++ b/target/arm/translate-a64.c
65
*a = soft_t ## _set_sign(soft_t ## _zero, \
51
@@ -XXX,XX +XXX,XX @@ static bool sme_access_check(DisasContext *s)
66
soft_t ## _is_neg(*a)); \
52
return true;
67
- float_raise(float_flag_input_denormal, s); \
68
+ float_raise(float_flag_input_denormal_flushed, s); \
69
} \
70
}
71
72
@@ -XXX,XX +XXX,XX @@ float128 float128_silence_nan(float128 a, float_status *status)
73
static bool parts_squash_denormal(FloatParts64 p, float_status *status)
74
{
75
if (p.exp == 0 && p.frac != 0) {
76
- float_raise(float_flag_input_denormal, status);
77
+ float_raise(float_flag_input_denormal_flushed, status);
78
return true;
79
}
80
81
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
82
index XXXXXXX..XXXXXXX 100644
83
--- a/target/arm/tcg/sve_helper.c
84
+++ b/target/arm/tcg/sve_helper.c
85
@@ -XXX,XX +XXX,XX @@ static int16_t do_float16_logb_as_int(float16 a, float_status *s)
86
return -15 - clz32(frac);
87
}
88
/* flush to zero */
89
- float_raise(float_flag_input_denormal, s);
90
+ float_raise(float_flag_input_denormal_flushed, s);
91
}
92
} else if (unlikely(exp == 0x1f)) {
93
if (frac == 0) {
94
@@ -XXX,XX +XXX,XX @@ static int32_t do_float32_logb_as_int(float32 a, float_status *s)
95
return -127 - clz32(frac);
96
}
97
/* flush to zero */
98
- float_raise(float_flag_input_denormal, s);
99
+ float_raise(float_flag_input_denormal_flushed, s);
100
}
101
} else if (unlikely(exp == 0xff)) {
102
if (frac == 0) {
103
@@ -XXX,XX +XXX,XX @@ static int64_t do_float64_logb_as_int(float64 a, float_status *s)
104
return -1023 - clz64(frac);
105
}
106
/* flush to zero */
107
- float_raise(float_flag_input_denormal, s);
108
+ float_raise(float_flag_input_denormal_flushed, s);
109
}
110
} else if (unlikely(exp == 0x7ff)) {
111
if (frac == 0) {
112
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
113
index XXXXXXX..XXXXXXX 100644
114
--- a/target/arm/vfp_helper.c
115
+++ b/target/arm/vfp_helper.c
116
@@ -XXX,XX +XXX,XX @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits)
117
if (host_bits & float_flag_inexact) {
118
target_bits |= FPSR_IXC;
119
}
120
- if (host_bits & float_flag_input_denormal) {
121
+ if (host_bits & float_flag_input_denormal_flushed) {
122
target_bits |= FPSR_IDC;
123
}
124
return target_bits;
125
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
126
i |= get_float_exception_flags(&env->vfp.standard_fp_status);
127
/* FZ16 does not generate an input denormal exception. */
128
i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32)
129
- & ~float_flag_input_denormal);
130
+ & ~float_flag_input_denormal_flushed);
131
i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
132
- & ~float_flag_input_denormal);
133
+ & ~float_flag_input_denormal_flushed);
134
i |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16)
135
- & ~float_flag_input_denormal);
136
+ & ~float_flag_input_denormal_flushed);
137
return vfp_exceptbits_from_host(i);
53
}
138
}
54
139
55
+/* This function corresponds to CheckSMEEnabled. */
140
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(fjcvtzs)(float64 value, float_status *status)
56
+bool sme_enabled_check(DisasContext *s)
141
57
+{
142
/* Normal inexact, denormal with flush-to-zero, or overflow or NaN */
58
+ /*
143
inexact = e_new & (float_flag_inexact |
59
+ * Note that unlike sve_excp_el, we have not constrained sme_excp_el
144
- float_flag_input_denormal |
60
+ * to be zero when fp_excp_el has priority. This is because we need
145
+ float_flag_input_denormal_flushed |
61
+ * sme_excp_el by itself for cpregs access checks.
146
float_flag_invalid);
62
+ */
147
63
+ if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
148
/* While not inexact for IEEE FP, -0.0 is inexact for JavaScript. */
64
+ s->fp_access_checked = true;
149
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
65
+ return sme_access_check(s);
150
index XXXXXXX..XXXXXXX 100644
66
+ }
151
--- a/target/i386/tcg/fpu_helper.c
67
+ return fp_access_check_only(s);
152
+++ b/target/i386/tcg/fpu_helper.c
68
+}
153
@@ -XXX,XX +XXX,XX @@ static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
69
+
154
(new_flags & float_flag_overflow ? FPUS_OE : 0) |
70
+/* Common subroutine for CheckSMEAnd*Enabled. */
155
(new_flags & float_flag_underflow ? FPUS_UE : 0) |
71
+bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
156
(new_flags & float_flag_inexact ? FPUS_PE : 0) |
72
+{
157
- (new_flags & float_flag_input_denormal ? FPUS_DE : 0)));
73
+ if (!sme_enabled_check(s)) {
158
+ (new_flags & float_flag_input_denormal_flushed ? FPUS_DE : 0)));
74
+ return false;
159
}
75
+ }
160
76
+ if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
161
static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
77
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
162
@@ -XXX,XX +XXX,XX @@ void helper_fxtract(CPUX86State *env)
78
+ syn_smetrap(SME_ET_NotStreaming, false));
163
int shift = clz64(temp.l.lower);
79
+ return false;
164
temp.l.lower <<= shift;
80
+ }
165
expdif = 1 - EXPBIAS - shift;
81
+ if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
166
- float_raise(float_flag_input_denormal, &env->fp_status);
82
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
167
+ float_raise(float_flag_input_denormal_flushed, &env->fp_status);
83
+ syn_smetrap(SME_ET_InactiveZA, false));
168
} else {
84
+ return false;
169
expdif = EXPD(temp) - EXPBIAS;
85
+ }
170
}
86
+ return true;
171
@@ -XXX,XX +XXX,XX @@ void update_mxcsr_from_sse_status(CPUX86State *env)
87
+}
172
uint8_t flags = get_float_exception_flags(&env->sse_status);
88
+
173
/*
89
/*
174
* The MXCSR denormal flag has opposite semantics to
90
* This utility function is for doing register extension with an
175
- * float_flag_input_denormal (the softfloat code sets that flag
91
* optional shift. You will likely want to pass a temporary for the
176
+ * float_flag_input_denormal_flushed (the softfloat code sets that flag
177
* only when flushing input denormals to zero, but SSE sets it
178
* only when not flushing them to zero), so is not converted
179
* here.
180
diff --git a/target/mips/tcg/msa_helper.c b/target/mips/tcg/msa_helper.c
181
index XXXXXXX..XXXXXXX 100644
182
--- a/target/mips/tcg/msa_helper.c
183
+++ b/target/mips/tcg/msa_helper.c
184
@@ -XXX,XX +XXX,XX @@ static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
185
enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED;
186
187
/* Set Inexact (I) when flushing inputs to zero */
188
- if ((ieee_exception_flags & float_flag_input_denormal) &&
189
+ if ((ieee_exception_flags & float_flag_input_denormal_flushed) &&
190
(env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
191
if (action & CLEAR_IS_INEXACT) {
192
mips_exception_flags &= ~FP_INEXACT;
193
diff --git a/target/rx/op_helper.c b/target/rx/op_helper.c
194
index XXXXXXX..XXXXXXX 100644
195
--- a/target/rx/op_helper.c
196
+++ b/target/rx/op_helper.c
197
@@ -XXX,XX +XXX,XX @@ static void update_fpsw(CPURXState *env, float32 ret, uintptr_t retaddr)
198
if (xcpt & float_flag_inexact) {
199
SET_FPSW(X);
200
}
201
- if ((xcpt & (float_flag_input_denormal
202
+ if ((xcpt & (float_flag_input_denormal_flushed
203
| float_flag_output_denormal))
204
&& !FIELD_EX32(env->fpsw, FPSW, DN)) {
205
env->fpsw = FIELD_DP32(env->fpsw, FPSW, CE, 1);
206
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
207
index XXXXXXX..XXXXXXX 100644
208
--- a/fpu/softfloat-parts.c.inc
209
+++ b/fpu/softfloat-parts.c.inc
210
@@ -XXX,XX +XXX,XX @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
211
if (likely(frac_eqz(p))) {
212
p->cls = float_class_zero;
213
} else if (status->flush_inputs_to_zero) {
214
- float_raise(float_flag_input_denormal, status);
215
+ float_raise(float_flag_input_denormal_flushed, status);
216
p->cls = float_class_zero;
217
frac_clear(p);
218
} else {
92
--
219
--
93
2.25.1
220
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Our float_flag_output_denormal exception flag is set when
2
the fpu code flushes an output denormal to zero. Rename
3
it to float_flag_output_denormal_flushed:
4
* this keeps it parallel with the flag for flushing
5
input denormals, which we just renamed
6
* it makes it clearer that it doesn't mean "set when
7
the output is a denormal"
2
8
3
Set the SM bit in the SVE record on signal delivery, create the ZA record.
9
Commit created with
4
Restore SM and ZA state according to the records present on return.
10
for f in `git grep -l float_flag_output_denormal`; do sed -i -e 's/float_flag_output_denormal/float_flag_output_denormal_flushed/' $f; done
5
11
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-41-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
Message-id: 20250124162836.2332150-21-peter.maydell@linaro.org
10
---
15
---
11
linux-user/aarch64/signal.c | 167 +++++++++++++++++++++++++++++++++---
16
include/fpu/softfloat-types.h | 3 ++-
12
1 file changed, 154 insertions(+), 13 deletions(-)
17
fpu/softfloat.c | 2 +-
18
target/arm/vfp_helper.c | 2 +-
19
target/i386/tcg/fpu_helper.c | 2 +-
20
target/m68k/fpu_helper.c | 2 +-
21
target/mips/tcg/msa_helper.c | 2 +-
22
target/rx/op_helper.c | 2 +-
23
target/tricore/fpu_helper.c | 6 +++---
24
fpu/softfloat-parts.c.inc | 2 +-
25
9 files changed, 12 insertions(+), 11 deletions(-)
13
26
14
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
27
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
15
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
16
--- a/linux-user/aarch64/signal.c
29
--- a/include/fpu/softfloat-types.h
17
+++ b/linux-user/aarch64/signal.c
30
+++ b/include/fpu/softfloat-types.h
18
@@ -XXX,XX +XXX,XX @@ struct target_sve_context {
31
@@ -XXX,XX +XXX,XX @@ enum {
19
32
float_flag_inexact = 0x0010,
20
#define TARGET_SVE_SIG_FLAG_SM 1
33
/* We flushed an input denormal to 0 (because of flush_inputs_to_zero) */
21
34
float_flag_input_denormal_flushed = 0x0020,
22
+#define TARGET_ZA_MAGIC 0x54366345
35
- float_flag_output_denormal = 0x0040,
23
+
36
+ /* We flushed an output denormal to 0 (because of flush_to_zero) */
24
+struct target_za_context {
37
+ float_flag_output_denormal_flushed = 0x0040,
25
+ struct target_aarch64_ctx head;
38
float_flag_invalid_isi = 0x0080, /* inf - inf */
26
+ uint16_t vl;
39
float_flag_invalid_imz = 0x0100, /* inf * 0 */
27
+ uint16_t reserved[3];
40
float_flag_invalid_idi = 0x0200, /* inf / inf */
28
+ /* The actual ZA data immediately follows. */
41
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
29
+};
42
index XXXXXXX..XXXXXXX 100644
30
+
43
--- a/fpu/softfloat.c
31
+#define TARGET_ZA_SIG_REGS_OFFSET \
44
+++ b/fpu/softfloat.c
32
+ QEMU_ALIGN_UP(sizeof(struct target_za_context), TARGET_SVE_VQ_BYTES)
45
@@ -XXX,XX +XXX,XX @@ floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
33
+#define TARGET_ZA_SIG_ZAV_OFFSET(VQ, N) \
46
}
34
+ (TARGET_ZA_SIG_REGS_OFFSET + (VQ) * TARGET_SVE_VQ_BYTES * (N))
47
if ( zExp <= 0 ) {
35
+#define TARGET_ZA_SIG_CONTEXT_SIZE(VQ) \
48
if (status->flush_to_zero) {
36
+ TARGET_ZA_SIG_ZAV_OFFSET(VQ, VQ * TARGET_SVE_VQ_BYTES)
49
- float_raise(float_flag_output_denormal, status);
37
+
50
+ float_raise(float_flag_output_denormal_flushed, status);
38
struct target_rt_sigframe {
51
return packFloatx80(zSign, 0, 0);
39
struct target_siginfo info;
52
}
40
struct target_ucontext uc;
53
isTiny = status->tininess_before_rounding
41
@@ -XXX,XX +XXX,XX @@ static void target_setup_end_record(struct target_aarch64_ctx *end)
54
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
55
index XXXXXXX..XXXXXXX 100644
56
--- a/target/arm/vfp_helper.c
57
+++ b/target/arm/vfp_helper.c
58
@@ -XXX,XX +XXX,XX @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits)
59
if (host_bits & float_flag_overflow) {
60
target_bits |= FPSR_OFC;
61
}
62
- if (host_bits & (float_flag_underflow | float_flag_output_denormal)) {
63
+ if (host_bits & (float_flag_underflow | float_flag_output_denormal_flushed)) {
64
target_bits |= FPSR_UFC;
65
}
66
if (host_bits & float_flag_inexact) {
67
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
68
index XXXXXXX..XXXXXXX 100644
69
--- a/target/i386/tcg/fpu_helper.c
70
+++ b/target/i386/tcg/fpu_helper.c
71
@@ -XXX,XX +XXX,XX @@ void update_mxcsr_from_sse_status(CPUX86State *env)
72
(flags & float_flag_overflow ? FPUS_OE : 0) |
73
(flags & float_flag_underflow ? FPUS_UE : 0) |
74
(flags & float_flag_inexact ? FPUS_PE : 0) |
75
- (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE :
76
+ (flags & float_flag_output_denormal_flushed ? FPUS_UE | FPUS_PE :
77
0));
42
}
78
}
43
79
44
static void target_setup_sve_record(struct target_sve_context *sve,
80
diff --git a/target/m68k/fpu_helper.c b/target/m68k/fpu_helper.c
45
- CPUARMState *env, int vq, int size)
81
index XXXXXXX..XXXXXXX 100644
46
+ CPUARMState *env, int size)
82
--- a/target/m68k/fpu_helper.c
47
{
83
+++ b/target/m68k/fpu_helper.c
48
- int i, j;
84
@@ -XXX,XX +XXX,XX @@ static int cpu_m68k_exceptbits_from_host(int host_bits)
49
+ int i, j, vq = sve_vq(env);
85
if (host_bits & float_flag_overflow) {
50
86
target_bits |= 0x40;
51
memset(sve, 0, sizeof(*sve));
52
__put_user(TARGET_SVE_MAGIC, &sve->head.magic);
53
@@ -XXX,XX +XXX,XX @@ static void target_setup_sve_record(struct target_sve_context *sve,
54
}
87
}
88
- if (host_bits & (float_flag_underflow | float_flag_output_denormal)) {
89
+ if (host_bits & (float_flag_underflow | float_flag_output_denormal_flushed)) {
90
target_bits |= 0x20;
91
}
92
if (host_bits & float_flag_divbyzero) {
93
diff --git a/target/mips/tcg/msa_helper.c b/target/mips/tcg/msa_helper.c
94
index XXXXXXX..XXXXXXX 100644
95
--- a/target/mips/tcg/msa_helper.c
96
+++ b/target/mips/tcg/msa_helper.c
97
@@ -XXX,XX +XXX,XX @@ static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
98
}
99
100
/* Set Inexact (I) and Underflow (U) when flushing outputs to zero */
101
- if ((ieee_exception_flags & float_flag_output_denormal) &&
102
+ if ((ieee_exception_flags & float_flag_output_denormal_flushed) &&
103
(env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
104
mips_exception_flags |= FP_INEXACT;
105
if (action & CLEAR_FS_UNDERFLOW) {
106
diff --git a/target/rx/op_helper.c b/target/rx/op_helper.c
107
index XXXXXXX..XXXXXXX 100644
108
--- a/target/rx/op_helper.c
109
+++ b/target/rx/op_helper.c
110
@@ -XXX,XX +XXX,XX @@ static void update_fpsw(CPURXState *env, float32 ret, uintptr_t retaddr)
111
SET_FPSW(X);
112
}
113
if ((xcpt & (float_flag_input_denormal_flushed
114
- | float_flag_output_denormal))
115
+ | float_flag_output_denormal_flushed))
116
&& !FIELD_EX32(env->fpsw, FPSW, DN)) {
117
env->fpsw = FIELD_DP32(env->fpsw, FPSW, CE, 1);
118
}
119
diff --git a/target/tricore/fpu_helper.c b/target/tricore/fpu_helper.c
120
index XXXXXXX..XXXXXXX 100644
121
--- a/target/tricore/fpu_helper.c
122
+++ b/target/tricore/fpu_helper.c
123
@@ -XXX,XX +XXX,XX @@ static inline uint8_t f_get_excp_flags(CPUTriCoreState *env)
124
& (float_flag_invalid
125
| float_flag_overflow
126
| float_flag_underflow
127
- | float_flag_output_denormal
128
+ | float_flag_output_denormal_flushed
129
| float_flag_divbyzero
130
| float_flag_inexact);
55
}
131
}
56
132
@@ -XXX,XX +XXX,XX @@ static void f_update_psw_flags(CPUTriCoreState *env, uint8_t flags)
57
+static void target_setup_za_record(struct target_za_context *za,
133
some_excp = 1;
58
+ CPUARMState *env, int size)
59
+{
60
+ int vq = sme_vq(env);
61
+ int vl = vq * TARGET_SVE_VQ_BYTES;
62
+ int i, j;
63
+
64
+ memset(za, 0, sizeof(*za));
65
+ __put_user(TARGET_ZA_MAGIC, &za->head.magic);
66
+ __put_user(size, &za->head.size);
67
+ __put_user(vl, &za->vl);
68
+
69
+ if (size == TARGET_ZA_SIG_CONTEXT_SIZE(0)) {
70
+ return;
71
+ }
72
+ assert(size == TARGET_ZA_SIG_CONTEXT_SIZE(vq));
73
+
74
+ /*
75
+ * Note that ZA vectors are stored as a byte stream,
76
+ * with each byte element at a subsequent address.
77
+ */
78
+ for (i = 0; i < vl; ++i) {
79
+ uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
80
+ for (j = 0; j < vq * 2; ++j) {
81
+ __put_user_e(env->zarray[i].d[j], z + j, le);
82
+ }
83
+ }
84
+}
85
+
86
static void target_restore_general_frame(CPUARMState *env,
87
struct target_rt_sigframe *sf)
88
{
89
@@ -XXX,XX +XXX,XX @@ static void target_restore_fpsimd_record(CPUARMState *env,
90
91
static bool target_restore_sve_record(CPUARMState *env,
92
struct target_sve_context *sve,
93
- int size)
94
+ int size, int *svcr)
95
{
96
- int i, j, vl, vq;
97
+ int i, j, vl, vq, flags;
98
+ bool sm;
99
100
- if (!cpu_isar_feature(aa64_sve, env_archcpu(env))) {
101
+ __get_user(vl, &sve->vl);
102
+ __get_user(flags, &sve->flags);
103
+
104
+ sm = flags & TARGET_SVE_SIG_FLAG_SM;
105
+
106
+ /* The cpu must support Streaming or Non-streaming SVE. */
107
+ if (sm
108
+ ? !cpu_isar_feature(aa64_sme, env_archcpu(env))
109
+ : !cpu_isar_feature(aa64_sve, env_archcpu(env))) {
110
return false;
111
}
134
}
112
135
113
- __get_user(vl, &sve->vl);
136
- if (flags & float_flag_underflow || flags & float_flag_output_denormal) {
114
- vq = sve_vq(env);
137
+ if (flags & float_flag_underflow || flags & float_flag_output_denormal_flushed) {
115
+ /*
138
env->FPU_FU = 1 << 31;
116
+ * Note that we cannot use sve_vq() because that depends on the
139
some_excp = 1;
117
+ * current setting of PSTATE.SM, not the state to be restored.
118
+ */
119
+ vq = sve_vqm1_for_el_sm(env, 0, sm) + 1;
120
121
/* Reject mismatched VL. */
122
if (vl != vq * TARGET_SVE_VQ_BYTES) {
123
@@ -XXX,XX +XXX,XX @@ static bool target_restore_sve_record(CPUARMState *env,
124
return false;
125
}
140
}
126
141
@@ -XXX,XX +XXX,XX @@ static void f_update_psw_flags(CPUTriCoreState *env, uint8_t flags)
127
+ *svcr = FIELD_DP64(*svcr, SVCR, SM, sm);
142
some_excp = 1;
128
+
129
/*
130
* Note that SVE regs are stored as a byte stream, with each byte element
131
* at a subsequent address. This corresponds to a little-endian load
132
@@ -XXX,XX +XXX,XX @@ static bool target_restore_sve_record(CPUARMState *env,
133
return true;
134
}
135
136
+static bool target_restore_za_record(CPUARMState *env,
137
+ struct target_za_context *za,
138
+ int size, int *svcr)
139
+{
140
+ int i, j, vl, vq;
141
+
142
+ if (!cpu_isar_feature(aa64_sme, env_archcpu(env))) {
143
+ return false;
144
+ }
145
+
146
+ __get_user(vl, &za->vl);
147
+ vq = sme_vq(env);
148
+
149
+ /* Reject mismatched VL. */
150
+ if (vl != vq * TARGET_SVE_VQ_BYTES) {
151
+ return false;
152
+ }
153
+
154
+ /* Accept empty record -- used to clear PSTATE.ZA. */
155
+ if (size <= TARGET_ZA_SIG_CONTEXT_SIZE(0)) {
156
+ return true;
157
+ }
158
+
159
+ /* Reject non-empty but incomplete record. */
160
+ if (size < TARGET_ZA_SIG_CONTEXT_SIZE(vq)) {
161
+ return false;
162
+ }
163
+
164
+ *svcr = FIELD_DP64(*svcr, SVCR, ZA, 1);
165
+
166
+ for (i = 0; i < vl; ++i) {
167
+ uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
168
+ for (j = 0; j < vq * 2; ++j) {
169
+ __get_user_e(env->zarray[i].d[j], z + j, le);
170
+ }
171
+ }
172
+ return true;
173
+}
174
+
175
static int target_restore_sigframe(CPUARMState *env,
176
struct target_rt_sigframe *sf)
177
{
178
struct target_aarch64_ctx *ctx, *extra = NULL;
179
struct target_fpsimd_context *fpsimd = NULL;
180
struct target_sve_context *sve = NULL;
181
+ struct target_za_context *za = NULL;
182
uint64_t extra_datap = 0;
183
bool used_extra = false;
184
int sve_size = 0;
185
+ int za_size = 0;
186
+ int svcr = 0;
187
188
target_restore_general_frame(env, sf);
189
190
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
191
sve_size = size;
192
break;
193
194
+ case TARGET_ZA_MAGIC:
195
+ if (za || size < sizeof(struct target_za_context)) {
196
+ goto err;
197
+ }
198
+ za = (struct target_za_context *)ctx;
199
+ za_size = size;
200
+ break;
201
+
202
case TARGET_EXTRA_MAGIC:
203
if (extra || size != sizeof(struct target_extra_context)) {
204
goto err;
205
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
206
}
143
}
207
144
208
/* SVE data, if present, overwrites FPSIMD data. */
145
- if (flags & float_flag_inexact || flags & float_flag_output_denormal) {
209
- if (sve && !target_restore_sve_record(env, sve, sve_size)) {
146
+ if (flags & float_flag_inexact || flags & float_flag_output_denormal_flushed) {
210
+ if (sve && !target_restore_sve_record(env, sve, sve_size, &svcr)) {
147
env->PSW |= 1 << 26;
211
goto err;
148
some_excp = 1;
212
}
149
}
213
+ if (za && !target_restore_za_record(env, za, za_size, &svcr)) {
150
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
214
+ goto err;
151
index XXXXXXX..XXXXXXX 100644
215
+ }
152
--- a/fpu/softfloat-parts.c.inc
216
+ if (env->svcr != svcr) {
153
+++ b/fpu/softfloat-parts.c.inc
217
+ env->svcr = svcr;
154
@@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
218
+ arm_rebuild_hflags(env);
155
}
219
+ }
156
frac_shr(p, frac_shift);
220
unlock_user(extra, extra_datap, 0);
157
} else if (s->flush_to_zero) {
221
return 0;
158
- flags |= float_flag_output_denormal;
222
159
+ flags |= float_flag_output_denormal_flushed;
223
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
160
p->cls = float_class_zero;
224
.total_size = offsetof(struct target_rt_sigframe,
161
exp = 0;
225
uc.tuc_mcontext.__reserved),
162
frac_clear(p);
226
};
227
- int fpsimd_ofs, fr_ofs, sve_ofs = 0, vq = 0, sve_size = 0;
228
+ int fpsimd_ofs, fr_ofs, sve_ofs = 0, za_ofs = 0;
229
+ int sve_size = 0, za_size = 0;
230
struct target_rt_sigframe *frame;
231
struct target_rt_frame_record *fr;
232
abi_ulong frame_addr, return_addr;
233
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
234
&layout);
235
236
/* SVE state needs saving only if it exists. */
237
- if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
238
- vq = sve_vq(env);
239
- sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
240
+ if (cpu_isar_feature(aa64_sve, env_archcpu(env)) ||
241
+ cpu_isar_feature(aa64_sme, env_archcpu(env))) {
242
+ sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(sve_vq(env)), 16);
243
sve_ofs = alloc_sigframe_space(sve_size, &layout);
244
}
245
+ if (cpu_isar_feature(aa64_sme, env_archcpu(env))) {
246
+ /* ZA state needs saving only if it is enabled. */
247
+ if (FIELD_EX64(env->svcr, SVCR, ZA)) {
248
+ za_size = TARGET_ZA_SIG_CONTEXT_SIZE(sme_vq(env));
249
+ } else {
250
+ za_size = TARGET_ZA_SIG_CONTEXT_SIZE(0);
251
+ }
252
+ za_ofs = alloc_sigframe_space(za_size, &layout);
253
+ }
254
255
if (layout.extra_ofs) {
256
/* Reserve space for the extra end marker. The standard end marker
257
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
258
target_setup_end_record((void *)frame + layout.extra_end_ofs);
259
}
260
if (sve_ofs) {
261
- target_setup_sve_record((void *)frame + sve_ofs, env, vq, sve_size);
262
+ target_setup_sve_record((void *)frame + sve_ofs, env, sve_size);
263
+ }
264
+ if (za_ofs) {
265
+ target_setup_za_record((void *)frame + za_ofs, env, za_size);
266
}
267
268
/* Set up the stack frame for unwinding. */
269
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
270
env->btype = 2;
271
}
272
273
+ /*
274
+ * Invoke the signal handler with both SM and ZA disabled.
275
+ * When clearing SM, ResetSVEState, per SMSTOP.
276
+ */
277
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
278
+ arm_reset_sve_state(env);
279
+ }
280
+ if (env->svcr) {
281
+ env->svcr = 0;
282
+ arm_rebuild_hflags(env);
283
+ }
284
+
285
if (info) {
286
tswap_siginfo(&frame->info, info);
287
env->xregs[1] = frame_addr + offsetof(struct target_rt_sigframe, info);
288
--
163
--
289
2.25.1
164
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
In softfloat-types.h a comment documents that if the float_status
2
field flush_to_zero is set then we flush denormalised results to 0
3
and set the inexact flag. This isn't correct: the status flag that
4
we set when flush_to_zero causes us to flush an output to zero is
5
float_flag_output_denormal_flushed.
2
6
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Correct the comment.
4
Message-id: 20220708151540.18136-26-richard.henderson@linaro.org
8
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20250124162836.2332150-22-peter.maydell@linaro.org
7
---
12
---
8
target/arm/helper-sme.h | 2 ++
13
include/fpu/softfloat-types.h | 2 +-
9
target/arm/sme.decode | 2 ++
14
1 file changed, 1 insertion(+), 1 deletion(-)
10
target/arm/sme_helper.c | 56 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 30 ++++++++++++++++++++
12
4 files changed, 90 insertions(+)
13
15
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
16
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
15
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-sme.h
18
--- a/include/fpu/softfloat-types.h
17
+++ b/target/arm/helper-sme.h
19
+++ b/include/fpu/softfloat-types.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
20
@@ -XXX,XX +XXX,XX @@ typedef struct float_status {
19
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
21
Float3NaNPropRule float_3nan_prop_rule;
20
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
22
FloatInfZeroNaNRule float_infzeronan_rule;
21
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
23
bool tininess_before_rounding;
22
+DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG,
24
- /* should denormalised results go to zero and set the inexact flag? */
23
+ void, ptr, ptr, ptr, ptr, ptr, i32)
25
+ /* should denormalised results go to zero and set output_denormal_flushed? */
24
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
26
bool flush_to_zero;
25
index XXXXXXX..XXXXXXX 100644
27
/* should denormalised inputs go to zero and set input_denormal_flushed? */
26
--- a/target/arm/sme.decode
28
bool flush_inputs_to_zero;
27
+++ b/target/arm/sme.decode
28
@@ -XXX,XX +XXX,XX @@ ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
29
30
FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
31
FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
32
+
33
+BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
34
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/sme_helper.c
37
+++ b/target/arm/sme_helper.c
38
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn,
39
}
40
}
41
}
42
+
43
+/*
44
+ * Alter PAIR as needed for controlling predicates being false,
45
+ * and for NEG on an enabled row element.
46
+ */
47
+static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
48
+{
49
+ /*
50
+ * The pseudocode uses a conditional negate after the conditional zero.
51
+ * It is simpler here to unconditionally negate before conditional zero.
52
+ */
53
+ pair ^= neg;
54
+ if (!(pg & 1)) {
55
+ pair &= 0xffff0000u;
56
+ }
57
+ if (!(pg & 4)) {
58
+ pair &= 0x0000ffffu;
59
+ }
60
+ return pair;
61
+}
62
+
63
+void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
64
+ void *vpm, uint32_t desc)
65
+{
66
+ intptr_t row, col, oprsz = simd_maxsz(desc);
67
+ uint32_t neg = simd_data(desc) * 0x80008000u;
68
+ uint16_t *pn = vpn, *pm = vpm;
69
+
70
+ for (row = 0; row < oprsz; ) {
71
+ uint16_t prow = pn[H2(row >> 4)];
72
+ do {
73
+ void *vza_row = vza + tile_vslice_offset(row);
74
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row));
75
+
76
+ n = f16mop_adj_pair(n, prow, neg);
77
+
78
+ for (col = 0; col < oprsz; ) {
79
+ uint16_t pcol = pm[H2(col >> 4)];
80
+ do {
81
+ if (prow & pcol & 0b0101) {
82
+ uint32_t *a = vza_row + H1_4(col);
83
+ uint32_t m = *(uint32_t *)(vzm + H1_4(col));
84
+
85
+ m = f16mop_adj_pair(m, pcol, 0);
86
+ *a = bfdotadd(*a, n, m);
87
+
88
+ col += 4;
89
+ pcol >>= 4;
90
+ }
91
+ } while (col & 15);
92
+ }
93
+ row += 4;
94
+ prow >>= 4;
95
+ } while (row & 15);
96
+ }
97
+}
98
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/target/arm/translate-sme.c
101
+++ b/target/arm/translate-sme.c
102
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
103
TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
104
TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
105
106
+static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
107
+ gen_helper_gvec_5 *fn)
108
+{
109
+ int svl = streaming_vec_reg_size(s);
110
+ uint32_t desc = simd_desc(svl, svl, a->sub);
111
+ TCGv_ptr za, zn, zm, pn, pm;
112
+
113
+ if (!sme_smza_enabled_check(s)) {
114
+ return true;
115
+ }
116
+
117
+ /* Sum XZR+zad to find ZAd. */
118
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
119
+ zn = vec_full_reg_ptr(s, a->zn);
120
+ zm = vec_full_reg_ptr(s, a->zm);
121
+ pn = pred_full_reg_ptr(s, a->pn);
122
+ pm = pred_full_reg_ptr(s, a->pm);
123
+
124
+ fn(za, zn, zm, pn, pm, tcg_constant_i32(desc));
125
+
126
+ tcg_temp_free_ptr(za);
127
+ tcg_temp_free_ptr(zn);
128
+ tcg_temp_free_ptr(pn);
129
+ tcg_temp_free_ptr(pm);
130
+ return true;
131
+}
132
+
133
static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
134
gen_helper_gvec_5_ptr *fn)
135
{
136
@@ -XXX,XX +XXX,XX @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
137
138
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
139
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
140
+
141
+/* TODO: FEAT_EBF16 */
142
+TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
143
--
29
--
144
2.25.1
30
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
The advsimd_addh etc helpers defined in helper-a64.c are identical to
2
the vfp_addh etc helpers defined in helper-vfp.c: both take two
3
float16 inputs (in a uint32_t type) plus a float_status* and are
4
simple wrappers around the softfloat float16_* functions.
2
5
3
Mark these as a non-streaming instructions, which should trap if full
6
(The duplication seems to be a historical accident: we added the
4
a64 support is not enabled in streaming mode. In this case, introduce
7
advsimd helpers in 2018 as part of the A64 implementation, and at
5
PRF_ns (prefetch non-streaming) to handle the checks.
8
that time there was no f16 emulation in A32. Then later we added the
9
A32 f16 handling by extending the existing VFP helper macros to
10
generate f16 versions as well as f32 and f64, and didn't realise we
11
could clean things up.)
6
12
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
13
Remove the now-unnecessary advsimd helpers and make the places that
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
generated calls to them use the vfp helpers instead. Many of the
9
Message-id: 20220708151540.18136-13-richard.henderson@linaro.org
15
helper functions were already unused.
16
17
(The remaining advsimd_ helpers are those which don't have vfp
18
versions.)
19
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
20
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
21
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
22
Message-id: 20250124162836.2332150-26-peter.maydell@linaro.org
11
---
23
---
12
target/arm/sme-fa64.decode | 3 ---
24
target/arm/tcg/helper-a64.h | 8 --------
13
target/arm/sve.decode | 10 +++++-----
25
target/arm/tcg/helper-a64.c | 9 ---------
14
target/arm/translate-sve.c | 11 +++++++++++
26
target/arm/tcg/translate-a64.c | 16 ++++++++--------
15
3 files changed, 16 insertions(+), 8 deletions(-)
27
3 files changed, 8 insertions(+), 25 deletions(-)
16
28
17
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
29
diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h
18
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/sme-fa64.decode
31
--- a/target/arm/tcg/helper-a64.h
20
+++ b/target/arm/sme-fa64.decode
32
+++ b/target/arm/tcg/helper-a64.h
21
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
33
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, fpst)
22
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
34
DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, fpst)
23
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
35
DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
24
36
DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
25
-FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
37
-DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
26
-FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
38
-DEF_HELPER_FLAGS_3(advsimd_minh, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
27
FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
39
-DEF_HELPER_FLAGS_3(advsimd_maxnumh, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
28
FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
40
-DEF_HELPER_FLAGS_3(advsimd_minnumh, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
29
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
41
-DEF_HELPER_3(advsimd_addh, f16, f16, f16, fpst)
30
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
42
-DEF_HELPER_3(advsimd_subh, f16, f16, f16, fpst)
31
-FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
43
-DEF_HELPER_3(advsimd_mulh, f16, f16, f16, fpst)
32
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
44
-DEF_HELPER_3(advsimd_divh, f16, f16, f16, fpst)
45
DEF_HELPER_3(advsimd_ceq_f16, i32, f16, f16, fpst)
46
DEF_HELPER_3(advsimd_cge_f16, i32, f16, f16, fpst)
47
DEF_HELPER_3(advsimd_cgt_f16, i32, f16, f16, fpst)
48
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
33
index XXXXXXX..XXXXXXX 100644
49
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/sve.decode
50
--- a/target/arm/tcg/helper-a64.c
35
+++ b/target/arm/sve.decode
51
+++ b/target/arm/tcg/helper-a64.c
36
@@ -XXX,XX +XXX,XX @@ LD1RO_zpri 1010010 .. 01 0.... 001 ... ..... ..... \
52
@@ -XXX,XX +XXX,XX @@ uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, float_status *fpst) \
37
@rpri_load_msz nreg=0
53
return float16_ ## name(a, b, fpst); \
38
54
}
39
# SVE 32-bit gather prefetch (scalar plus 32-bit scaled offsets)
55
40
-PRF 1000010 00 -1 ----- 0-- --- ----- 0 ----
56
-ADVSIMD_HALFOP(add)
41
+PRF_ns 1000010 00 -1 ----- 0-- --- ----- 0 ----
57
-ADVSIMD_HALFOP(sub)
42
58
-ADVSIMD_HALFOP(mul)
43
# SVE 32-bit gather prefetch (vector plus immediate)
59
-ADVSIMD_HALFOP(div)
44
-PRF 1000010 -- 00 ----- 111 --- ----- 0 ----
60
-ADVSIMD_HALFOP(min)
45
+PRF_ns 1000010 -- 00 ----- 111 --- ----- 0 ----
61
-ADVSIMD_HALFOP(max)
46
62
-ADVSIMD_HALFOP(minnum)
47
# SVE contiguous prefetch (scalar plus immediate)
63
-ADVSIMD_HALFOP(maxnum)
48
PRF 1000010 11 1- ----- 0-- --- ----- 0 ----
64
-
49
@@ -XXX,XX +XXX,XX @@ LD1_zpiz 1100010 .. 01 ..... 1.. ... ..... ..... \
65
#define ADVSIMD_TWOHALFOP(name) \
50
@rpri_g_load esz=3
66
uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, \
51
67
float_status *fpst) \
52
# SVE 64-bit gather prefetch (scalar plus 64-bit scaled offsets)
68
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
53
-PRF 1100010 00 11 ----- 1-- --- ----- 0 ----
54
+PRF_ns 1100010 00 11 ----- 1-- --- ----- 0 ----
55
56
# SVE 64-bit gather prefetch (scalar plus unpacked 32-bit scaled offsets)
57
-PRF 1100010 00 -1 ----- 0-- --- ----- 0 ----
58
+PRF_ns 1100010 00 -1 ----- 0-- --- ----- 0 ----
59
60
# SVE 64-bit gather prefetch (vector plus immediate)
61
-PRF 1100010 -- 00 ----- 111 --- ----- 0 ----
62
+PRF_ns 1100010 -- 00 ----- 111 --- ----- 0 ----
63
64
### SVE Memory Store Group
65
66
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
67
index XXXXXXX..XXXXXXX 100644
69
index XXXXXXX..XXXXXXX 100644
68
--- a/target/arm/translate-sve.c
70
--- a/target/arm/tcg/translate-a64.c
69
+++ b/target/arm/translate-sve.c
71
+++ b/target/arm/tcg/translate-a64.c
70
@@ -XXX,XX +XXX,XX @@ static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
72
@@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fmul = {
73
TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul)
74
75
static const FPScalar f_scalar_fmax = {
76
- gen_helper_advsimd_maxh,
77
+ gen_helper_vfp_maxh,
78
gen_helper_vfp_maxs,
79
gen_helper_vfp_maxd,
80
};
81
TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax)
82
83
static const FPScalar f_scalar_fmin = {
84
- gen_helper_advsimd_minh,
85
+ gen_helper_vfp_minh,
86
gen_helper_vfp_mins,
87
gen_helper_vfp_mind,
88
};
89
TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin)
90
91
static const FPScalar f_scalar_fmaxnm = {
92
- gen_helper_advsimd_maxnumh,
93
+ gen_helper_vfp_maxnumh,
94
gen_helper_vfp_maxnums,
95
gen_helper_vfp_maxnumd,
96
};
97
TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm)
98
99
static const FPScalar f_scalar_fminnm = {
100
- gen_helper_advsimd_minnumh,
101
+ gen_helper_vfp_minnumh,
102
gen_helper_vfp_minnums,
103
gen_helper_vfp_minnumd,
104
};
105
@@ -XXX,XX +XXX,XX @@ static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
71
return true;
106
return true;
72
}
107
}
73
108
74
+static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a)
109
-TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxnumh)
75
+{
110
-TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minnumh)
76
+ if (!dc_isar_feature(aa64_sve, s)) {
111
-TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxh)
77
+ return false;
112
-TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minh)
78
+ }
113
+TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxnumh)
79
+ /* Prefetch is a nop within QEMU. */
114
+TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minnumh)
80
+ s->is_nonstreaming = true;
115
+TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxh)
81
+ (void)sve_access_check(s);
116
+TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minh)
82
+ return true;
117
83
+}
118
TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums)
84
+
119
TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums)
85
/*
86
* Move Prefix
87
*
88
--
120
--
89
2.25.1
121
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
We should be using the F16-specific float_status for conversions from
2
half-precision, because halfprec inputs never set Input Denormal.
2
3
3
Mark these as a non-streaming instructions, which should trap
4
Without FEAT_AHP, using the wrong fpst here had no effect, because
4
if full a64 support is not enabled in streaming mode.
5
the only difference between the A64_F16 and A64 fpst is its handling
6
of flush-to-zero on input and output, and the helper functions
7
vfp_fcvt_f16_to_* and vfp_fcvt_*_to_f16 all explicitly squash the
8
relevant flushing flags, and flush_inputs_to_zero was the only way
9
that IDC could be set.
5
10
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
With FEAT_AHP, the FPCR.AH=1 behaviour sets IDC for
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
input_denormal_used, which we will only ignore in
8
Message-id: 20220708151540.18136-12-richard.henderson@linaro.org
13
vfp_get_fpsr_from_host() for the A64_F16 fpst; so it matters that we
14
use that one for f16 inputs (and the normal one for single/double to
15
f16 conversions).
16
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
19
Message-id: 20250124162836.2332150-27-peter.maydell@linaro.org
10
---
20
---
11
target/arm/sme-fa64.decode | 9 ---------
21
target/arm/tcg/translate-a64.c | 9 ++++++---
12
target/arm/translate-sve.c | 6 ++++++
22
target/arm/tcg/translate-sve.c | 4 ++--
13
2 files changed, 6 insertions(+), 9 deletions(-)
23
2 files changed, 8 insertions(+), 5 deletions(-)
14
24
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
25
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
16
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
27
--- a/target/arm/tcg/translate-a64.c
18
+++ b/target/arm/sme-fa64.decode
28
+++ b/target/arm/tcg/translate-a64.c
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
29
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a)
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
30
if (fp_access_check(s)) {
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
31
TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
22
32
TCGv_i32 tcg_rd = tcg_temp_new_i32();
23
-FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
33
- TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64);
24
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
34
+ TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
25
FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
35
TCGv_i32 tcg_ahp = get_ahp_flag();
26
-FAIL 1000 010- -01- ---- 1--- ---- ---- ---- # SVE 32-bit gather load (vector+imm)
36
27
-FAIL 1000 0100 0-0- ---- 0--- ---- ---- ---- # SVE 32-bit gather load byte (scalar+vector)
37
gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
28
-FAIL 1000 0100 1--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load half (scalar+vector)
38
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a)
29
-FAIL 1000 0101 0--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load word (scalar+vector)
39
if (fp_access_check(s)) {
30
FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
40
TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
31
FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
41
TCGv_i64 tcg_rd = tcg_temp_new_i64();
32
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
42
- TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64);
33
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
43
+ TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
34
FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
44
TCGv_i32 tcg_ahp = get_ahp_flag();
35
-FAIL 1110 010- -00- ---- 001- ---- ---- ---- # SVE2 64-bit scatter NT store (vector+scalar)
45
36
-FAIL 1110 010- -10- ---- 001- ---- ---- ---- # SVE2 32-bit scatter NT store (vector+scalar)
46
gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
37
-FAIL 1110 010- ---- ---- 1-0- ---- ---- ---- # SVE scatter store (scalar+32-bit vector)
47
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a)
38
-FAIL 1110 010- ---- ---- 101- ---- ---- ---- # SVE scatter store (misc)
39
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/target/arm/translate-sve.c
42
+++ b/target/arm/translate-sve.c
43
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
44
if (!dc_isar_feature(aa64_sve, s)) {
45
return false;
46
}
47
+ s->is_nonstreaming = true;
48
if (!sve_access_check(s)) {
49
return true;
48
return true;
50
}
49
}
51
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
50
52
if (!dc_isar_feature(aa64_sve, s)) {
51
- fpst = fpstatus_ptr(FPST_A64);
53
return false;
52
if (a->esz == MO_64) {
54
}
53
/* 32 -> 64 bit fp conversion */
55
+ s->is_nonstreaming = true;
54
TCGv_i64 tcg_res[2];
56
if (!sve_access_check(s)) {
55
TCGv_i32 tcg_op = tcg_temp_new_i32();
57
return true;
56
int srcelt = a->q ? 2 : 0;
58
}
57
59
@@ -XXX,XX +XXX,XX @@ static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
58
+ fpst = fpstatus_ptr(FPST_A64);
60
if (!dc_isar_feature(aa64_sve2, s)) {
59
+
61
return false;
60
for (pass = 0; pass < 2; pass++) {
62
}
61
tcg_res[pass] = tcg_temp_new_i64();
63
+ s->is_nonstreaming = true;
62
read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32);
64
if (!sve_access_check(s)) {
63
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a)
65
return true;
64
TCGv_i32 tcg_res[4];
66
}
65
TCGv_i32 ahp = get_ahp_flag();
67
@@ -XXX,XX +XXX,XX @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
66
68
if (!dc_isar_feature(aa64_sve, s)) {
67
+ fpst = fpstatus_ptr(FPST_A64_F16);
69
return false;
68
+
70
}
69
for (pass = 0; pass < 4; pass++) {
71
+ s->is_nonstreaming = true;
70
tcg_res[pass] = tcg_temp_new_i32();
72
if (!sve_access_check(s)) {
71
read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16);
73
return true;
72
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
74
}
73
index XXXXXXX..XXXXXXX 100644
75
@@ -XXX,XX +XXX,XX @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
74
--- a/target/arm/tcg/translate-sve.c
76
if (!dc_isar_feature(aa64_sve, s)) {
75
+++ b/target/arm/tcg/translate-sve.c
77
return false;
76
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz],
78
}
77
TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
79
+ s->is_nonstreaming = true;
78
gen_helper_sve_fcvt_sh, a, 0, FPST_A64)
80
if (!sve_access_check(s)) {
79
TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
81
return true;
80
- gen_helper_sve_fcvt_hs, a, 0, FPST_A64)
82
}
81
+ gen_helper_sve_fcvt_hs, a, 0, FPST_A64_F16)
83
@@ -XXX,XX +XXX,XX @@ static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
82
84
if (!dc_isar_feature(aa64_sve2, s)) {
83
TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
85
return false;
84
gen_helper_sve_bfcvt, a, 0, FPST_A64)
86
}
85
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
87
+ s->is_nonstreaming = true;
86
TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
88
if (!sve_access_check(s)) {
87
gen_helper_sve_fcvt_dh, a, 0, FPST_A64)
89
return true;
88
TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
90
}
89
- gen_helper_sve_fcvt_hd, a, 0, FPST_A64)
90
+ gen_helper_sve_fcvt_hd, a, 0, FPST_A64_F16)
91
TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
92
gen_helper_sve_fcvt_ds, a, 0, FPST_A64)
93
TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
91
--
94
--
92
2.25.1
95
2.34.1
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-14-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 2 --
12
target/arm/translate-sve.c | 2 ++
13
2 files changed, 2 insertions(+), 2 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
24
-FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
25
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
26
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-sve.c
30
+++ b/target/arm/translate-sve.c
31
@@ -XXX,XX +XXX,XX @@ static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
32
if (!dc_isar_feature(aa64_sve, s)) {
33
return false;
34
}
35
+ s->is_nonstreaming = true;
36
if (sve_access_check(s)) {
37
TCGv_i64 addr = new_tmp_a64(s);
38
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
39
@@ -XXX,XX +XXX,XX @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
40
if (!dc_isar_feature(aa64_sve, s)) {
41
return false;
42
}
43
+ s->is_nonstreaming = true;
44
if (sve_access_check(s)) {
45
int vsz = vec_full_reg_size(s);
46
int elements = vsz >> dtype_esz[a->dtype];
47
--
48
2.25.1
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-15-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 3 ---
12
target/arm/translate-sve.c | 2 ++
13
2 files changed, 2 insertions(+), 3 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm)
21
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
22
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
23
-
24
-FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
25
-FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
26
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/arm/translate-sve.c
29
+++ b/target/arm/translate-sve.c
30
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
31
if (a->rm == 31) {
32
return false;
33
}
34
+ s->is_nonstreaming = true;
35
if (sve_access_check(s)) {
36
TCGv_i64 addr = new_tmp_a64(s);
37
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
38
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
39
if (!dc_isar_feature(aa64_sve_f64mm, s)) {
40
return false;
41
}
42
+ s->is_nonstreaming = true;
43
if (sve_access_check(s)) {
44
TCGv_i64 addr = new_tmp_a64(s);
45
tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
46
--
47
2.25.1
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
We can reuse the SVE functions for LDR and STR, passing in the
4
base of the ZA vector and a zero offset.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-23-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme.decode | 7 +++++++
12
target/arm/translate-sme.c | 24 ++++++++++++++++++++++++
13
2 files changed, 31 insertions(+)
14
15
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme.decode
18
+++ b/target/arm/sme.decode
19
@@ -XXX,XX +XXX,XX @@ LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
20
&ldst rs=%mova_rs
21
LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
22
&ldst esz=4 rs=%mova_rs
23
+
24
+&ldstr rv rn imm
25
+@ldstr ....... ... . ...... .. ... rn:5 . imm:4 \
26
+ &ldstr rv=%mova_rs
27
+
28
+LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr
29
+STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr
30
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/translate-sme.c
33
+++ b/target/arm/translate-sme.c
34
@@ -XXX,XX +XXX,XX @@ static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
35
tcg_temp_free_i64(addr);
36
return true;
37
}
38
+
39
+typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int);
40
+
41
+static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
42
+{
43
+ int svl = streaming_vec_reg_size(s);
44
+ int imm = a->imm;
45
+ TCGv_ptr base;
46
+
47
+ if (!sme_za_enabled_check(s)) {
48
+ return true;
49
+ }
50
+
51
+ /* ZA[n] equates to ZA0H.B[n]. */
52
+ base = get_tile_rowcol(s, MO_8, a->rv, imm, false);
53
+
54
+ fn(s, base, 0, svl, a->rn, imm * svl);
55
+
56
+ tcg_temp_free_ptr(base);
57
+ return true;
58
+}
59
+
60
+TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
61
+TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
62
--
63
2.25.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Hongren Zheng <i@zenithal.me>
2
2
3
When USBPacket in OUT direction has larger payload
4
than the ep_out_buffer (of size 512), a buffer overflow
5
would occur.
6
7
It could be fixed by limiting the size of usb_packet_copy
8
to be at most buffer size. Further optimization gets rid
9
of the ep_out_buffer and directly uses ep_out as the target
10
buffer.
11
12
This is reported by a security researcher who artificially
13
constructed an OUT packet of size 2047. The report has gone
14
through the QEMU security process, and as this device is for
15
testing purpose and no deployment of it in virtualization
16
environment is observed, it is triaged not to be a security bug.
17
18
Cc: qemu-stable@nongnu.org
19
Fixes: d7d34918551dc48 ("hw/usb: Add CanoKey Implementation")
20
Reported-by: Juan Jose Lopez Jaimez <thatjiaozi@gmail.com>
21
Signed-off-by: Hongren Zheng <i@zenithal.me>
22
Message-id: Z4TfMOrZz6IQYl_h@Sun
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
23
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-24-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
24
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
25
---
8
target/arm/helper-sme.h | 5 +++
26
hw/usb/canokey.h | 4 ----
9
target/arm/sme.decode | 11 +++++
27
hw/usb/canokey.c | 6 +++---
10
target/arm/sme_helper.c | 90 ++++++++++++++++++++++++++++++++++++++
28
2 files changed, 3 insertions(+), 7 deletions(-)
11
target/arm/translate-sme.c | 31 +++++++++++++
12
4 files changed, 137 insertions(+)
13
29
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
30
diff --git a/hw/usb/canokey.h b/hw/usb/canokey.h
15
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-sme.h
32
--- a/hw/usb/canokey.h
17
+++ b/target/arm/helper-sme.h
33
+++ b/hw/usb/canokey.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i
34
@@ -XXX,XX +XXX,XX @@
19
DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
35
#define CANOKEY_EP_NUM 3
20
DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
36
/* BULK/INTR IN can be up to 1352 bytes, e.g. get key info */
21
DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
37
#define CANOKEY_EP_IN_BUFFER_SIZE 2048
22
+
38
-/* BULK OUT can be up to 270 bytes, e.g. PIV import cert */
23
+DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
39
-#define CANOKEY_EP_OUT_BUFFER_SIZE 512
24
+DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
40
25
+DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
41
typedef enum {
26
+DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
42
CANOKEY_EP_IN_WAIT,
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
43
@@ -XXX,XX +XXX,XX @@ typedef struct CanoKeyState {
44
/* OUT pointer to canokey recv buffer */
45
uint8_t *ep_out[CANOKEY_EP_NUM];
46
uint32_t ep_out_size[CANOKEY_EP_NUM];
47
- /* For large BULK OUT, multiple write to ep_out is needed */
48
- uint8_t ep_out_buffer[CANOKEY_EP_NUM][CANOKEY_EP_OUT_BUFFER_SIZE];
49
50
/* Properties */
51
char *file; /* canokey-file */
52
diff --git a/hw/usb/canokey.c b/hw/usb/canokey.c
28
index XXXXXXX..XXXXXXX 100644
53
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/sme.decode
54
--- a/hw/usb/canokey.c
30
+++ b/target/arm/sme.decode
55
+++ b/hw/usb/canokey.c
31
@@ -XXX,XX +XXX,XX @@ LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
56
@@ -XXX,XX +XXX,XX @@ static void canokey_handle_data(USBDevice *dev, USBPacket *p)
32
57
switch (p->pid) {
33
LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr
58
case USB_TOKEN_OUT:
34
STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr
59
trace_canokey_handle_data_out(ep_out, p->iov.size);
35
+
60
- usb_packet_copy(p, key->ep_out_buffer[ep_out], p->iov.size);
36
+### SME Add Vector to Array
61
out_pos = 0;
37
+
62
+ /* segment packet into (possibly multiple) ep_out */
38
+&adda zad zn pm pn
63
while (out_pos != p->iov.size) {
39
+@adda_32 ........ .. ..... . pm:3 pn:3 zn:5 ... zad:2 &adda
64
/*
40
+@adda_64 ........ .. ..... . pm:3 pn:3 zn:5 .. zad:3 &adda
65
* key->ep_out[ep_out] set by prepare_receive
41
+
66
@@ -XXX,XX +XXX,XX @@ static void canokey_handle_data(USBDevice *dev, USBPacket *p)
42
+ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32
67
* to be the buffer length
43
+ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32
68
*/
44
+ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64
69
out_len = MIN(p->iov.size - out_pos, key->ep_out_size[ep_out]);
45
+ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
70
- memcpy(key->ep_out[ep_out],
46
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
71
- key->ep_out_buffer[ep_out] + out_pos, out_len);
47
index XXXXXXX..XXXXXXX 100644
72
+ /* usb_packet_copy would update the pos offset internally */
48
--- a/target/arm/sme_helper.c
73
+ usb_packet_copy(p, key->ep_out[ep_out], out_len);
49
+++ b/target/arm/sme_helper.c
74
out_pos += out_len;
50
@@ -XXX,XX +XXX,XX @@ DO_ST(q, _be, MO_128)
75
/* update ep_out_size to actual len */
51
DO_ST(q, _le, MO_128)
76
key->ep_out_size[ep_out] = out_len;
52
53
#undef DO_ST
54
+
55
+void HELPER(sme_addha_s)(void *vzda, void *vzn, void *vpn,
56
+ void *vpm, uint32_t desc)
57
+{
58
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
59
+ uint64_t *pn = vpn, *pm = vpm;
60
+ uint32_t *zda = vzda, *zn = vzn;
61
+
62
+ for (row = 0; row < oprsz; ) {
63
+ uint64_t pa = pn[row >> 4];
64
+ do {
65
+ if (pa & 1) {
66
+ for (col = 0; col < oprsz; ) {
67
+ uint64_t pb = pm[col >> 4];
68
+ do {
69
+ if (pb & 1) {
70
+ zda[tile_vslice_index(row) + H4(col)] += zn[H4(col)];
71
+ }
72
+ pb >>= 4;
73
+ } while (++col & 15);
74
+ }
75
+ }
76
+ pa >>= 4;
77
+ } while (++row & 15);
78
+ }
79
+}
80
+
81
+void HELPER(sme_addha_d)(void *vzda, void *vzn, void *vpn,
82
+ void *vpm, uint32_t desc)
83
+{
84
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
85
+ uint8_t *pn = vpn, *pm = vpm;
86
+ uint64_t *zda = vzda, *zn = vzn;
87
+
88
+ for (row = 0; row < oprsz; ++row) {
89
+ if (pn[H1(row)] & 1) {
90
+ for (col = 0; col < oprsz; ++col) {
91
+ if (pm[H1(col)] & 1) {
92
+ zda[tile_vslice_index(row) + col] += zn[col];
93
+ }
94
+ }
95
+ }
96
+ }
97
+}
98
+
99
+void HELPER(sme_addva_s)(void *vzda, void *vzn, void *vpn,
100
+ void *vpm, uint32_t desc)
101
+{
102
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
103
+ uint64_t *pn = vpn, *pm = vpm;
104
+ uint32_t *zda = vzda, *zn = vzn;
105
+
106
+ for (row = 0; row < oprsz; ) {
107
+ uint64_t pa = pn[row >> 4];
108
+ do {
109
+ if (pa & 1) {
110
+ uint32_t zn_row = zn[H4(row)];
111
+ for (col = 0; col < oprsz; ) {
112
+ uint64_t pb = pm[col >> 4];
113
+ do {
114
+ if (pb & 1) {
115
+ zda[tile_vslice_index(row) + H4(col)] += zn_row;
116
+ }
117
+ pb >>= 4;
118
+ } while (++col & 15);
119
+ }
120
+ }
121
+ pa >>= 4;
122
+ } while (++row & 15);
123
+ }
124
+}
125
+
126
+void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
127
+ void *vpm, uint32_t desc)
128
+{
129
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
130
+ uint8_t *pn = vpn, *pm = vpm;
131
+ uint64_t *zda = vzda, *zn = vzn;
132
+
133
+ for (row = 0; row < oprsz; ++row) {
134
+ if (pn[H1(row)] & 1) {
135
+ uint64_t zn_row = zn[row];
136
+ for (col = 0; col < oprsz; ++col) {
137
+ if (pm[H1(col)] & 1) {
138
+ zda[tile_vslice_index(row) + col] += zn_row;
139
+ }
140
+ }
141
+ }
142
+ }
143
+}
144
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
145
index XXXXXXX..XXXXXXX 100644
146
--- a/target/arm/translate-sme.c
147
+++ b/target/arm/translate-sme.c
148
@@ -XXX,XX +XXX,XX @@ static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
149
150
TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
151
TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
152
+
153
+static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz,
154
+ gen_helper_gvec_4 *fn)
155
+{
156
+ int svl = streaming_vec_reg_size(s);
157
+ uint32_t desc = simd_desc(svl, svl, 0);
158
+ TCGv_ptr za, zn, pn, pm;
159
+
160
+ if (!sme_smza_enabled_check(s)) {
161
+ return true;
162
+ }
163
+
164
+ /* Sum XZR+zad to find ZAd. */
165
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
166
+ zn = vec_full_reg_ptr(s, a->zn);
167
+ pn = pred_full_reg_ptr(s, a->pn);
168
+ pm = pred_full_reg_ptr(s, a->pm);
169
+
170
+ fn(za, zn, pn, pm, tcg_constant_i32(desc));
171
+
172
+ tcg_temp_free_ptr(za);
173
+ tcg_temp_free_ptr(zn);
174
+ tcg_temp_free_ptr(pn);
175
+ tcg_temp_free_ptr(pm);
176
+ return true;
177
+}
178
+
179
+TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
180
+TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
181
+TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
182
+TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
183
--
77
--
184
2.25.1
78
2.34.1
diff view generated by jsdifflib
Deleted patch
1
From: Richard Henderson <richard.henderson@linaro.org>
2
1
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220708151540.18136-25-richard.henderson@linaro.org
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/helper-sme.h | 5 +++
9
target/arm/sme.decode | 9 +++++
10
target/arm/sme_helper.c | 69 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 32 ++++++++++++++++++
12
4 files changed, 115 insertions(+)
13
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-sme.h
17
+++ b/target/arm/helper-sme.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
19
DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
20
DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
21
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
22
+
23
+DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
24
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
25
+DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
26
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/sme.decode
30
+++ b/target/arm/sme.decode
31
@@ -XXX,XX +XXX,XX @@ ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32
32
ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32
33
ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64
34
ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
35
+
36
+### SME Outer Product
37
+
38
+&op zad zn zm pm pn sub:bool
39
+@op_32 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 .. zad:2 &op
40
+@op_64 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 . zad:3 &op
41
+
42
+FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
43
+FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
44
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/target/arm/sme_helper.c
47
+++ b/target/arm/sme_helper.c
48
@@ -XXX,XX +XXX,XX @@
49
#include "exec/cpu_ldst.h"
50
#include "exec/exec-all.h"
51
#include "qemu/int128.h"
52
+#include "fpu/softfloat.h"
53
#include "vec_internal.h"
54
#include "sve_ldst_internal.h"
55
56
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
57
}
58
}
59
}
60
+
61
+void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn,
62
+ void *vpm, void *vst, uint32_t desc)
63
+{
64
+ intptr_t row, col, oprsz = simd_maxsz(desc);
65
+ uint32_t neg = simd_data(desc) << 31;
66
+ uint16_t *pn = vpn, *pm = vpm;
67
+ float_status fpst;
68
+
69
+ /*
70
+ * Make a copy of float_status because this operation does not
71
+ * update the cumulative fp exception status. It also produces
72
+ * default nans.
73
+ */
74
+ fpst = *(float_status *)vst;
75
+ set_default_nan_mode(true, &fpst);
76
+
77
+ for (row = 0; row < oprsz; ) {
78
+ uint16_t pa = pn[H2(row >> 4)];
79
+ do {
80
+ if (pa & 1) {
81
+ void *vza_row = vza + tile_vslice_offset(row);
82
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row)) ^ neg;
83
+
84
+ for (col = 0; col < oprsz; ) {
85
+ uint16_t pb = pm[H2(col >> 4)];
86
+ do {
87
+ if (pb & 1) {
88
+ uint32_t *a = vza_row + H1_4(col);
89
+ uint32_t *m = vzm + H1_4(col);
90
+ *a = float32_muladd(n, *m, *a, 0, vst);
91
+ }
92
+ col += 4;
93
+ pb >>= 4;
94
+ } while (col & 15);
95
+ }
96
+ }
97
+ row += 4;
98
+ pa >>= 4;
99
+ } while (row & 15);
100
+ }
101
+}
102
+
103
+void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn,
104
+ void *vpm, void *vst, uint32_t desc)
105
+{
106
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
107
+ uint64_t neg = (uint64_t)simd_data(desc) << 63;
108
+ uint64_t *za = vza, *zn = vzn, *zm = vzm;
109
+ uint8_t *pn = vpn, *pm = vpm;
110
+ float_status fpst = *(float_status *)vst;
111
+
112
+ set_default_nan_mode(true, &fpst);
113
+
114
+ for (row = 0; row < oprsz; ++row) {
115
+ if (pn[H1(row)] & 1) {
116
+ uint64_t *za_row = &za[tile_vslice_index(row)];
117
+ uint64_t n = zn[row] ^ neg;
118
+
119
+ for (col = 0; col < oprsz; ++col) {
120
+ if (pm[H1(col)] & 1) {
121
+ uint64_t *a = &za_row[col];
122
+ *a = float64_muladd(n, zm[col], *a, 0, &fpst);
123
+ }
124
+ }
125
+ }
126
+ }
127
+}
128
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
129
index XXXXXXX..XXXXXXX 100644
130
--- a/target/arm/translate-sme.c
131
+++ b/target/arm/translate-sme.c
132
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
133
TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
134
TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
135
TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
136
+
137
+static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
138
+ gen_helper_gvec_5_ptr *fn)
139
+{
140
+ int svl = streaming_vec_reg_size(s);
141
+ uint32_t desc = simd_desc(svl, svl, a->sub);
142
+ TCGv_ptr za, zn, zm, pn, pm, fpst;
143
+
144
+ if (!sme_smza_enabled_check(s)) {
145
+ return true;
146
+ }
147
+
148
+ /* Sum XZR+zad to find ZAd. */
149
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
150
+ zn = vec_full_reg_ptr(s, a->zn);
151
+ zm = vec_full_reg_ptr(s, a->zm);
152
+ pn = pred_full_reg_ptr(s, a->pn);
153
+ pm = pred_full_reg_ptr(s, a->pm);
154
+ fpst = fpstatus_ptr(FPST_FPCR);
155
+
156
+ fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc));
157
+
158
+ tcg_temp_free_ptr(za);
159
+ tcg_temp_free_ptr(zn);
160
+ tcg_temp_free_ptr(pn);
161
+ tcg_temp_free_ptr(pm);
162
+ tcg_temp_free_ptr(fpst);
163
+ return true;
164
+}
165
+
166
+TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
167
+TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
168
--
169
2.25.1
diff view generated by jsdifflib