1
The following changes since commit 5bab95dc74d43bbb28c6a96d24c810a664432057:
1
Hi; this pullreq contains only my FEAT_AFP/FEAT_RPRES patches
2
(plus a fix for a target/alpha latent bug that would otherwise
3
be revealed by the fpu changes), because 68 patches is already
4
longer than I prefer to send in at one time...
2
5
3
Merge tag 'pull-request-2024-01-24' of https://gitlab.com/thuth/qemu into staging (2024-01-25 12:33:42 +0000)
6
thanks
7
-- PMM
8
9
The following changes since commit ffaf7f0376f8040ce9068d71ae9ae8722505c42e:
10
11
Merge tag 'pull-10.0-testing-and-gdstub-updates-100225-1' of https://gitlab.com/stsquad/qemu into staging (2025-02-10 13:26:17 -0500)
4
12
5
are available in the Git repository at:
13
are available in the Git repository at:
6
14
7
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20240126
15
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20250211
8
16
9
for you to fetch changes up to 5e6be95ed1578c7cfac2082b39384d99fd912508:
17
for you to fetch changes up to ca4c34e07d1388df8e396520b5e7d60883cd3690:
10
18
11
hw/arm: add PCIe to Freescale i.MX6 (2024-01-26 12:23:04 +0000)
19
target/arm: Sink fp_status and fpcr access into do_fmlal* (2025-02-11 16:22:08 +0000)
12
20
13
----------------------------------------------------------------
21
----------------------------------------------------------------
14
target-arm queue:
22
target-arm queue:
15
* Fix VNCR fault detection logic
23
* target/alpha: Don't corrupt error_code with unknown softfloat flags
16
* Fix A64 scalar SQSHRN and SQRSHRN
24
* target/arm: Implement FEAT_AFP and FEAT_RPRES
17
* Fix incorrect aa64_tidcp1 feature check
18
* hw/arm/virt.c: Remove newline from error_report() string
19
* hw/arm/musicpal: Convert to qemu_add_kbd_event_handler()
20
* hw/arm/allwinner-a10: Unconditionally map the USB Host controllers
21
* hw/arm/nseries: Unconditionally map the TUSB6010 USB Host controller
22
* hw/arm: Add EHCI/OHCI controllers to Allwinner R40 and Bananapi board
23
* hw/arm: Add AHCI/SATA controller to Allwinner R40 and Bananapi board
24
* hw/arm: Add watchdog timer to Allwinner H40 and Bananapi board
25
* arm: various include header cleanups
26
* cleanups to allow some files to be built only once
27
* fsl-imx6ul: Add various missing unimplemented devices
28
* docs/system/arm/virt.rst: Add note on CPU features off by default
29
* hw/char/imx_serial: Implement receive FIFO and ageing timer
30
* target/xtensa: fix OOB TLB entry access
31
* bswap.h: Fix const_le64() macro
32
* hw/arm: add PCIe to Freescale i.MX6
33
25
34
----------------------------------------------------------------
26
----------------------------------------------------------------
35
Guenter Roeck (4):
27
Peter Maydell (49):
36
hw/arm: Add EHCI/OHCI controllers to Allwinner R40 and Bananapi board
28
target/alpha: Don't corrupt error_code with unknown softfloat flags
37
hw/arm: Add AHCI/SATA controller to Allwinner R40 and Bananapi board
29
fpu: Add float_class_denormal
38
hw/arm: Add watchdog timer to Allwinner H40 and Bananapi board
30
fpu: Implement float_flag_input_denormal_used
39
fsl-imx6ul: Add various missing unimplemented devices
31
fpu: allow flushing of output denormals to be after rounding
32
target/arm: Define FPCR AH, FIZ, NEP bits
33
target/arm: Implement FPCR.FIZ handling
34
target/arm: Adjust FP behaviour for FPCR.AH = 1
35
target/arm: Adjust exception flag handling for AH = 1
36
target/arm: Add FPCR.AH to tbflags
37
target/arm: Set up float_status to use for FPCR.AH=1 behaviour
38
target/arm: Use FPST_FPCR_AH for FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS
39
target/arm: Use FPST_FPCR_AH for BFCVT* insns
40
target/arm: Use FPST_FPCR_AH for BFMLAL*, BFMLSL* insns
41
target/arm: Add FPCR.NEP to TBFLAGS
42
target/arm: Define and use new write_fp_*reg_merging() functions
43
target/arm: Handle FPCR.NEP for 3-input scalar operations
44
target/arm: Handle FPCR.NEP for BFCVT scalar
45
target/arm: Handle FPCR.NEP for 1-input scalar operations
46
target/arm: Handle FPCR.NEP in do_cvtf_scalar()
47
target/arm: Handle FPCR.NEP for scalar FABS and FNEG
48
target/arm: Handle FPCR.NEP for FCVTXN (scalar)
49
target/arm: Handle FPCR.NEP for NEP for FMUL, FMULX scalar by element
50
target/arm: Implement FPCR.AH semantics for scalar FMIN/FMAX
51
target/arm: Implement FPCR.AH semantics for vector FMIN/FMAX
52
target/arm: Implement FPCR.AH semantics for FMAXV and FMINV
53
target/arm: Implement FPCR.AH semantics for FMINP and FMAXP
54
target/arm: Implement FPCR.AH semantics for SVE FMAXV and FMINV
55
target/arm: Implement FPCR.AH semantics for SVE FMIN/FMAX immediate
56
target/arm: Implement FPCR.AH semantics for SVE FMIN/FMAX vector
57
target/arm: Implement FPCR.AH handling of negation of NaN
58
target/arm: Implement FPCR.AH handling for scalar FABS and FABD
59
target/arm: Handle FPCR.AH in vector FABD
60
target/arm: Handle FPCR.AH in SVE FNEG
61
target/arm: Handle FPCR.AH in SVE FABS
62
target/arm: Handle FPCR.AH in SVE FABD
63
target/arm: Handle FPCR.AH in negation steps in SVE FCADD
64
target/arm: Handle FPCR.AH in negation steps in FCADD
65
target/arm: Handle FPCR.AH in FRECPS and FRSQRTS scalar insns
66
target/arm: Handle FPCR.AH in FRECPS and FRSQRTS vector insns
67
target/arm: Handle FPCR.AH in negation step in FMLS (indexed)
68
target/arm: Handle FPCR.AH in negation in FMLS (vector)
69
target/arm: Handle FPCR.AH in negation step in SVE FMLS (vector)
70
target/arm: Handle FPCR.AH in SVE FTSSEL
71
target/arm: Handle FPCR.AH in SVE FTMAD
72
target/arm: Enable FEAT_AFP for '-cpu max'
73
target/arm: Plumb FEAT_RPRES frecpe and frsqrte through to new helper
74
target/arm: Implement increased precision FRECPE
75
target/arm: Implement increased precision FRSQRTE
76
target/arm: Enable FEAT_RPRES for -cpu max
40
77
41
Gustavo Romero (1):
78
Richard Henderson (19):
42
docs/system/arm/virt.rst: Add note on CPU features off by default
79
target/arm: Handle FPCR.AH in vector FCMLA
80
target/arm: Handle FPCR.AH in FCMLA by index
81
target/arm: Handle FPCR.AH in SVE FCMLA
82
target/arm: Handle FPCR.AH in FMLSL (by element and vector)
83
target/arm: Handle FPCR.AH in SVE FMLSL (indexed)
84
target/arm: Handle FPCR.AH in SVE FMLSLB, FMLSLT (vectors)
85
target/arm: Introduce CPUARMState.vfp.fp_status[]
86
target/arm: Remove standard_fp_status_f16
87
target/arm: Remove standard_fp_status
88
target/arm: Remove ah_fp_status_f16
89
target/arm: Remove ah_fp_status
90
target/arm: Remove fp_status_f16_a64
91
target/arm: Remove fp_status_f16_a32
92
target/arm: Remove fp_status_a64
93
target/arm: Remove fp_status_a32
94
target/arm: Simplify fp_status indexing in mve_helper.c
95
target/arm: Simplify DO_VFP_cmp in vfp_helper.c
96
target/arm: Read fz16 from env->vfp.fpcr
97
target/arm: Sink fp_status and fpcr access into do_fmlal*
43
98
44
Max Filippov (1):
99
docs/system/arm/emulation.rst | 2 +
45
target/xtensa: fix OOB TLB entry access
100
include/fpu/softfloat-helpers.h | 11 +
46
101
include/fpu/softfloat-types.h | 25 ++
47
Nikita Ostrenkov (1):
102
target/arm/cpu-features.h | 10 +
48
hw/arm: add PCIe to Freescale i.MX6
103
target/arm/cpu.h | 97 +++--
49
104
target/arm/helper.h | 26 ++
50
Peter Maydell (6):
105
target/arm/internals.h | 6 +
51
target/arm: Fix VNCR fault detection logic
106
target/arm/tcg/helper-a64.h | 13 +
52
hw/arm/virt.c: Remove newline from error_report() string
107
target/arm/tcg/helper-sve.h | 120 ++++++
53
hw/arm/musicpal: Convert to qemu_add_kbd_event_handler()
108
target/arm/tcg/translate-a64.h | 13 +
54
target/arm: Fix A64 scalar SQSHRN and SQRSHRN
109
target/arm/tcg/translate.h | 54 +--
55
bswap.h: Fix const_le64() macro
110
target/arm/tcg/vec_internal.h | 35 ++
56
target/arm: Fix incorrect aa64_tidcp1 feature check
111
target/mips/fpu_helper.h | 6 +
57
112
fpu/softfloat.c | 66 +++-
58
Philippe Mathieu-Daudé (20):
113
target/alpha/cpu.c | 7 +
59
hw/arm/allwinner-a10: Unconditionally map the USB Host controllers
114
target/alpha/fpu_helper.c | 2 +
60
hw/arm/nseries: Unconditionally map the TUSB6010 USB Host controller
115
target/arm/cpu.c | 46 +--
61
hw/arm/exynos4210: Include missing 'exec/tswap.h' header
116
target/arm/helper.c | 2 +-
62
hw/arm/xilinx_zynq: Include missing 'exec/tswap.h' header
117
target/arm/tcg/cpu64.c | 2 +
63
hw/arm/smmuv3: Include missing 'hw/registerfields.h' header
118
target/arm/tcg/helper-a64.c | 151 ++++----
64
hw/arm/xlnx-versal: Include missing 'cpu.h' header
119
target/arm/tcg/hflags.c | 13 +
65
target/arm/cpu-features: Include missing 'hw/registerfields.h' header
120
target/arm/tcg/mve_helper.c | 44 +--
66
target/arm/cpregs: Include missing 'hw/registerfields.h' header
121
target/arm/tcg/sme_helper.c | 4 +-
67
target/arm/cpregs: Include missing 'kvm-consts.h' header
122
target/arm/tcg/sve_helper.c | 367 ++++++++++++++-----
68
target/arm: Expose arm_cpu_mp_affinity() in 'multiprocessing.h' header
123
target/arm/tcg/translate-a64.c | 782 ++++++++++++++++++++++++++++++++--------
69
target/arm: Declare ARM_CPU_TYPE_NAME/SUFFIX in 'cpu-qom.h'
124
target/arm/tcg/translate-sve.c | 193 +++++++---
70
hw/cpu/a9mpcore: Build it only once
125
target/arm/tcg/vec_helper.c | 387 ++++++++++++++------
71
hw/misc/xlnx-versal-crl: Include generic 'cpu-qom.h' instead of 'cpu.h'
126
target/arm/vfp_helper.c | 374 +++++++++++++++----
72
hw/misc/xlnx-versal-crl: Build it only once
127
target/hppa/fpu_helper.c | 11 +
73
target/arm: Expose M-profile register bank index definitions
128
target/i386/tcg/fpu_helper.c | 8 +
74
hw/arm/armv7m: Make 'hw/intc/armv7m_nvic.h' a target agnostic header
129
target/mips/msa.c | 9 +
75
target/arm: Move ARM_CPU_IRQ/FIQ definitions to 'cpu-qom.h' header
130
target/ppc/cpu_init.c | 3 +
76
target/arm: Move e2h_access() helper around
131
target/rx/cpu.c | 8 +
77
target/arm: Move GTimer definitions to new 'gtimer.h' header
132
target/sh4/cpu.c | 8 +
78
hw/arm: Build various units only once
133
target/tricore/helper.c | 1 +
79
134
tests/fp/fp-bench.c | 1 +
80
Rayhan Faizel (1):
135
fpu/softfloat-parts.c.inc | 127 +++++--
81
hw/char/imx_serial: Implement receive FIFO and ageing timer
136
37 files changed, 2325 insertions(+), 709 deletions(-)
82
83
Richard Henderson (2):
84
target/arm: Rename arm_cpu_mp_affinity
85
target/arm: Create arm_cpu_mp_affinity
86
87
docs/system/arm/bananapi_m2u.rst | 5 +-
88
docs/system/arm/virt.rst | 13 ++++
89
hw/arm/smmuv3-internal.h | 1 +
90
include/hw/arm/allwinner-r40.h | 15 +++++
91
include/hw/arm/fsl-imx6.h | 44 +++++++------
92
include/hw/arm/fsl-imx6ul.h | 2 +
93
include/hw/arm/xlnx-versal.h | 1 +
94
include/hw/char/imx_serial.h | 20 +++++-
95
include/hw/intc/armv7m_nvic.h | 2 +-
96
include/hw/misc/xlnx-versal-crl.h | 2 +-
97
include/qemu/bswap.h | 16 ++---
98
target/arm/cpregs.h | 3 +
99
target/arm/cpu-features.h | 4 +-
100
target/arm/cpu-qom.h | 24 +++++++
101
target/arm/cpu.h | 34 +---------
102
target/arm/gtimer.h | 21 ++++++
103
target/arm/multiprocessing.h | 16 +++++
104
hw/arm/allwinner-a10.c | 50 ++++++--------
105
hw/arm/allwinner-h3.c | 2 +
106
hw/arm/allwinner-r40.c | 69 +++++++++++++++++++-
107
hw/arm/armv7m.c | 2 +
108
hw/arm/aspeed_ast2400.c | 1 +
109
hw/arm/aspeed_ast2600.c | 1 +
110
hw/arm/bcm2836.c | 2 +
111
hw/arm/collie.c | 1 -
112
hw/arm/exynos4210.c | 2 +
113
hw/arm/fsl-imx25.c | 1 +
114
hw/arm/fsl-imx31.c | 1 +
115
hw/arm/fsl-imx6.c | 26 ++++++++
116
hw/arm/fsl-imx6ul.c | 31 +++++++++
117
hw/arm/fsl-imx7.c | 1 +
118
hw/arm/gumstix.c | 1 -
119
hw/arm/highbank.c | 1 +
120
hw/arm/integratorcp.c | 2 +-
121
hw/arm/mainstone.c | 1 -
122
hw/arm/musicpal.c | 133 ++++++++++++++++++--------------------
123
hw/arm/npcm7xx.c | 3 +-
124
hw/arm/nseries.c | 4 +-
125
hw/arm/omap1.c | 1 +
126
hw/arm/omap2.c | 2 +-
127
hw/arm/omap_sx1.c | 1 -
128
hw/arm/palm.c | 1 -
129
hw/arm/realview.c | 1 +
130
hw/arm/sbsa-ref.c | 4 +-
131
hw/arm/spitz.c | 1 -
132
hw/arm/strongarm.c | 2 +-
133
hw/arm/versatilepb.c | 2 +-
134
hw/arm/vexpress.c | 2 +-
135
hw/arm/virt-acpi-build.c | 4 +-
136
hw/arm/virt.c | 15 +++--
137
hw/arm/xilinx_zynq.c | 3 +-
138
hw/arm/xlnx-versal-virt.c | 5 +-
139
hw/arm/xlnx-versal.c | 2 +
140
hw/arm/xlnx-zynqmp.c | 2 +
141
hw/arm/z2.c | 1 -
142
hw/char/imx_serial.c | 102 +++++++++++++++++++++++++----
143
hw/cpu/a15mpcore.c | 1 +
144
hw/cpu/a9mpcore.c | 2 +-
145
hw/misc/xlnx-versal-crl.c | 5 +-
146
target/arm/arm-powerctl.c | 3 +-
147
target/arm/cpu.c | 13 +++-
148
target/arm/helper.c | 30 +++++----
149
target/arm/hvf/hvf.c | 6 +-
150
target/arm/kvm.c | 1 +
151
target/arm/machine.c | 1 +
152
target/arm/tcg/psci.c | 3 +-
153
target/arm/tcg/tlb_helper.c | 2 +-
154
target/arm/tcg/translate-a64.c | 2 +-
155
target/xtensa/mmu_helper.c | 47 ++++++++++----
156
hw/arm/Kconfig | 6 ++
157
hw/arm/meson.build | 23 +++----
158
hw/cpu/meson.build | 2 +-
159
hw/misc/meson.build | 2 +-
160
73 files changed, 597 insertions(+), 261 deletions(-)
161
create mode 100644 target/arm/gtimer.h
162
create mode 100644 target/arm/multiprocessing.h
163
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
In do_cvttq() we set env->error_code with what is supposed to be a
2
set of FPCR exception bit values. However, if the set of float
3
exception flags we get back from softfloat for the conversion
4
includes a flag which is not one of the three we expect here
5
(invalid_cvti, invalid, inexact) then we will fall through the
6
if-ladder and set env->error_code to the unconverted softfloat
7
exception_flag value. This will then cause us to take a spurious
8
exception.
2
9
3
hw/misc/xlnx-versal-crl.c doesn't require "cpu.h"
10
This is harmless now, but when we add new floating point exception
4
anymore. By removing it, the unit become target
11
flags to softfloat it will cause problems. Add an else clause to the
5
agnostic: we can build it once. Update meson.
12
if-ladder to make it ignore any float exception flags it doesn't care
13
about.
6
14
7
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
15
Specifically, without this fix, 'make check-tcg' will fail for Alpha
16
when the commit adding float_flag_input_denormal_used lands.
17
18
19
Fixes: aa3bad5b59e7 ("target/alpha: Use float64_to_int64_modulo for CVTTQ")
20
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
21
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20240118200643.29037-15-philmd@linaro.org
22
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
23
---
12
hw/misc/xlnx-versal-crl.c | 1 -
24
target/alpha/fpu_helper.c | 2 ++
13
hw/misc/meson.build | 2 +-
25
1 file changed, 2 insertions(+)
14
2 files changed, 1 insertion(+), 2 deletions(-)
15
26
16
diff --git a/hw/misc/xlnx-versal-crl.c b/hw/misc/xlnx-versal-crl.c
27
diff --git a/target/alpha/fpu_helper.c b/target/alpha/fpu_helper.c
17
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/misc/xlnx-versal-crl.c
29
--- a/target/alpha/fpu_helper.c
19
+++ b/hw/misc/xlnx-versal-crl.c
30
+++ b/target/alpha/fpu_helper.c
20
@@ -XXX,XX +XXX,XX @@
31
@@ -XXX,XX +XXX,XX @@ static uint64_t do_cvttq(CPUAlphaState *env, uint64_t a, int roundmode)
21
#include "hw/register.h"
32
exc = FPCR_INV;
22
#include "hw/resettable.h"
33
} else if (exc & float_flag_inexact) {
23
34
exc = FPCR_INE;
24
-#include "target/arm/cpu.h"
35
+ } else {
25
#include "target/arm/arm-powerctl.h"
36
+ exc = 0;
26
#include "target/arm/multiprocessing.h"
37
}
27
#include "hw/misc/xlnx-versal-crl.h"
38
}
28
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
39
env->error_code = exc;
29
index XXXXXXX..XXXXXXX 100644
30
--- a/hw/misc/meson.build
31
+++ b/hw/misc/meson.build
32
@@ -XXX,XX +XXX,XX @@ system_ss.add(when: 'CONFIG_SLAVIO', if_true: files('slavio_misc.c'))
33
system_ss.add(when: 'CONFIG_ZYNQ', if_true: files('zynq_slcr.c'))
34
system_ss.add(when: 'CONFIG_XLNX_ZYNQMP_ARM', if_true: files('xlnx-zynqmp-crf.c'))
35
system_ss.add(when: 'CONFIG_XLNX_ZYNQMP_ARM', if_true: files('xlnx-zynqmp-apu-ctrl.c'))
36
-specific_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: files('xlnx-versal-crl.c'))
37
system_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: files(
38
+ 'xlnx-versal-crl.c',
39
'xlnx-versal-xramc.c',
40
'xlnx-versal-pmc-iou-slcr.c',
41
'xlnx-versal-cfu.c',
42
--
40
--
43
2.34.1
41
2.34.1
44
42
45
43
diff view generated by jsdifflib
New patch
1
Currently in softfloat we canonicalize input denormals and so the
2
code that implements floating point operations does not need to care
3
whether the input value was originally normal or denormal. However,
4
both x86 and Arm FEAT_AFP require that an exception flag is set if:
5
* an input is denormal
6
* that input is not squashed to zero
7
* that input is actually used in the calculation (e.g. we
8
did not find the other input was a NaN)
1
9
10
So we need to track that the input was a non-squashed denormal. To
11
do this we add a new value to the FloatClass enum. In this commit we
12
add the value and adjust the code everywhere that looks at FloatClass
13
values so that the new float_class_denormal behaves identically to
14
float_class_normal. We will add the code that does the "raise a new
15
float exception flag if an input was an unsquashed denormal and we
16
used it" in a subsequent commit.
17
18
There should be no behavioural change in this commit.
19
20
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
21
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
22
---
23
fpu/softfloat.c | 32 ++++++++++++++++++++++++++++---
24
fpu/softfloat-parts.c.inc | 40 ++++++++++++++++++++++++---------------
25
2 files changed, 54 insertions(+), 18 deletions(-)
26
27
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/fpu/softfloat.c
30
+++ b/fpu/softfloat.c
31
@@ -XXX,XX +XXX,XX @@ float64_gen2(float64 xa, float64 xb, float_status *s,
32
/*
33
* Classify a floating point number. Everything above float_class_qnan
34
* is a NaN so cls >= float_class_qnan is any NaN.
35
+ *
36
+ * Note that we canonicalize denormals, so most code should treat
37
+ * class_normal and class_denormal identically.
38
*/
39
40
typedef enum __attribute__ ((__packed__)) {
41
float_class_unclassified,
42
float_class_zero,
43
float_class_normal,
44
+ float_class_denormal, /* input was a non-squashed denormal */
45
float_class_inf,
46
float_class_qnan, /* all NaNs from here */
47
float_class_snan,
48
@@ -XXX,XX +XXX,XX @@ typedef enum __attribute__ ((__packed__)) {
49
enum {
50
float_cmask_zero = float_cmask(float_class_zero),
51
float_cmask_normal = float_cmask(float_class_normal),
52
+ float_cmask_denormal = float_cmask(float_class_denormal),
53
float_cmask_inf = float_cmask(float_class_inf),
54
float_cmask_qnan = float_cmask(float_class_qnan),
55
float_cmask_snan = float_cmask(float_class_snan),
56
57
float_cmask_infzero = float_cmask_zero | float_cmask_inf,
58
float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
59
+ float_cmask_anynorm = float_cmask_normal | float_cmask_denormal,
60
};
61
62
/* Flags for parts_minmax. */
63
@@ -XXX,XX +XXX,XX @@ static inline __attribute__((unused)) bool is_qnan(FloatClass c)
64
return c == float_class_qnan;
65
}
66
67
+/*
68
+ * Return true if the float_cmask has only normals in it
69
+ * (including input denormals that were canonicalized)
70
+ */
71
+static inline bool cmask_is_only_normals(int cmask)
72
+{
73
+ return !(cmask & ~float_cmask_anynorm);
74
+}
75
+
76
+static inline bool is_anynorm(FloatClass c)
77
+{
78
+ return float_cmask(c) & float_cmask_anynorm;
79
+}
80
+
81
/*
82
* Structure holding all of the decomposed parts of a float.
83
* The exponent is unbiased and the fraction is normalized.
84
@@ -XXX,XX +XXX,XX @@ static float64 float64r32_round_pack_canonical(FloatParts64 *p,
85
*/
86
switch (p->cls) {
87
case float_class_normal:
88
+ case float_class_denormal:
89
if (unlikely(p->exp == 0)) {
90
/*
91
* The result is denormal for float32, but can be represented
92
@@ -XXX,XX +XXX,XX @@ static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
93
94
switch (p->cls) {
95
case float_class_normal:
96
+ case float_class_denormal:
97
if (s->floatx80_rounding_precision == floatx80_precision_x) {
98
parts_uncanon_normal(p, s, fmt);
99
frac = p->frac_hi;
100
@@ -XXX,XX +XXX,XX @@ static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
101
break;
102
103
case float_class_normal:
104
+ case float_class_denormal:
105
case float_class_zero:
106
break;
107
108
@@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
109
a->sign = b->sign;
110
a->exp = b->exp;
111
112
- if (a->cls == float_class_normal) {
113
+ if (is_anynorm(a->cls)) {
114
frac_truncjam(a, b);
115
} else if (is_nan(a->cls)) {
116
/* Discard the low bits of the NaN. */
117
@@ -XXX,XX +XXX,XX @@ static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
118
return int128_zero();
119
120
case float_class_normal:
121
+ case float_class_denormal:
122
if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
123
flags = float_flag_inexact;
124
}
125
@@ -XXX,XX +XXX,XX @@ static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
126
return int128_zero();
127
128
case float_class_normal:
129
+ case float_class_denormal:
130
if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
131
flags = float_flag_inexact;
132
if (p.cls == float_class_zero) {
133
@@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status)
134
float32_unpack_canonical(&xp, a, status);
135
if (unlikely(xp.cls != float_class_normal)) {
136
switch (xp.cls) {
137
+ case float_class_denormal:
138
+ break;
139
case float_class_snan:
140
case float_class_qnan:
141
parts_return_nan(&xp, status);
142
@@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status)
143
case float_class_zero:
144
return float32_one;
145
default:
146
- break;
147
+ g_assert_not_reached();
148
}
149
- g_assert_not_reached();
150
}
151
152
float_raise(float_flag_inexact, status);
153
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
154
index XXXXXXX..XXXXXXX 100644
155
--- a/fpu/softfloat-parts.c.inc
156
+++ b/fpu/softfloat-parts.c.inc
157
@@ -XXX,XX +XXX,XX @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
158
frac_clear(p);
159
} else {
160
int shift = frac_normalize(p);
161
- p->cls = float_class_normal;
162
+ p->cls = float_class_denormal;
163
p->exp = fmt->frac_shift - fmt->exp_bias
164
- shift + !fmt->m68k_denormal;
165
}
166
@@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
167
static void partsN(uncanon)(FloatPartsN *p, float_status *s,
168
const FloatFmt *fmt)
169
{
170
- if (likely(p->cls == float_class_normal)) {
171
+ if (likely(is_anynorm(p->cls))) {
172
parts_uncanon_normal(p, s, fmt);
173
} else {
174
switch (p->cls) {
175
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
176
177
if (a->sign != b_sign) {
178
/* Subtraction */
179
- if (likely(ab_mask == float_cmask_normal)) {
180
+ if (likely(cmask_is_only_normals(ab_mask))) {
181
if (parts_sub_normal(a, b)) {
182
return a;
183
}
184
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
185
}
186
} else {
187
/* Addition */
188
- if (likely(ab_mask == float_cmask_normal)) {
189
+ if (likely(cmask_is_only_normals(ab_mask))) {
190
parts_add_normal(a, b);
191
return a;
192
}
193
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
194
}
195
196
if (b->cls == float_class_zero) {
197
- g_assert(a->cls == float_class_normal);
198
+ g_assert(is_anynorm(a->cls));
199
return a;
200
}
201
202
g_assert(a->cls == float_class_zero);
203
- g_assert(b->cls == float_class_normal);
204
+ g_assert(is_anynorm(b->cls));
205
return_b:
206
b->sign = b_sign;
207
return b;
208
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
209
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
210
bool sign = a->sign ^ b->sign;
211
212
- if (likely(ab_mask == float_cmask_normal)) {
213
+ if (likely(cmask_is_only_normals(ab_mask))) {
214
FloatPartsW tmp;
215
216
frac_mulw(&tmp, a, b);
217
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
218
a->sign ^= 1;
219
}
220
221
- if (unlikely(ab_mask != float_cmask_normal)) {
222
+ if (unlikely(!cmask_is_only_normals(ab_mask))) {
223
if (unlikely(ab_mask == float_cmask_infzero)) {
224
float_raise(float_flag_invalid | float_flag_invalid_imz, s);
225
goto d_nan;
226
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
227
}
228
229
g_assert(ab_mask & float_cmask_zero);
230
- if (c->cls == float_class_normal) {
231
+ if (is_anynorm(c->cls)) {
232
*a = *c;
233
goto return_normal;
234
}
235
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
236
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
237
bool sign = a->sign ^ b->sign;
238
239
- if (likely(ab_mask == float_cmask_normal)) {
240
+ if (likely(cmask_is_only_normals(ab_mask))) {
241
a->sign = sign;
242
a->exp -= b->exp + frac_div(a, b);
243
return a;
244
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
245
{
246
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
247
248
- if (likely(ab_mask == float_cmask_normal)) {
249
+ if (likely(cmask_is_only_normals(ab_mask))) {
250
frac_modrem(a, b, mod_quot);
251
return a;
252
}
253
@@ -XXX,XX +XXX,XX @@ static void partsN(sqrt)(FloatPartsN *a, float_status *status,
254
255
if (unlikely(a->cls != float_class_normal)) {
256
switch (a->cls) {
257
+ case float_class_denormal:
258
+ break;
259
case float_class_snan:
260
case float_class_qnan:
261
parts_return_nan(a, status);
262
@@ -XXX,XX +XXX,XX @@ static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
263
case float_class_inf:
264
break;
265
case float_class_normal:
266
+ case float_class_denormal:
267
if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
268
float_raise(float_flag_inexact, s);
269
}
270
@@ -XXX,XX +XXX,XX @@ static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
271
return 0;
272
273
case float_class_normal:
274
+ case float_class_denormal:
275
/* TODO: N - 2 is frac_size for rounding; could use input fmt. */
276
if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
277
flags = float_flag_inexact;
278
@@ -XXX,XX +XXX,XX @@ static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
279
return 0;
280
281
case float_class_normal:
282
+ case float_class_denormal:
283
/* TODO: N - 2 is frac_size for rounding; could use input fmt. */
284
if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
285
flags = float_flag_inexact;
286
@@ -XXX,XX +XXX,XX @@ static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p,
287
return 0;
288
289
case float_class_normal:
290
+ case float_class_denormal:
291
/* TODO: N - 2 is frac_size for rounding; could use input fmt. */
292
if (parts_round_to_int_normal(p, rmode, 0, N - 2)) {
293
flags = float_flag_inexact;
294
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
295
a_exp = a->exp;
296
b_exp = b->exp;
297
298
- if (unlikely(ab_mask != float_cmask_normal)) {
299
+ if (unlikely(!cmask_is_only_normals(ab_mask))) {
300
switch (a->cls) {
301
case float_class_normal:
302
+ case float_class_denormal:
303
break;
304
case float_class_inf:
305
a_exp = INT16_MAX;
306
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
307
}
308
switch (b->cls) {
309
case float_class_normal:
310
+ case float_class_denormal:
311
break;
312
case float_class_inf:
313
b_exp = INT16_MAX;
314
@@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
315
{
316
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
317
318
- if (likely(ab_mask == float_cmask_normal)) {
319
+ if (likely(cmask_is_only_normals(ab_mask))) {
320
FloatRelation cmp;
321
322
if (a->sign != b->sign) {
323
@@ -XXX,XX +XXX,XX @@ static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
324
case float_class_inf:
325
break;
326
case float_class_normal:
327
+ case float_class_denormal:
328
a->exp += MIN(MAX(n, -0x10000), 0x10000);
329
break;
330
default:
331
@@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
332
333
if (unlikely(a->cls != float_class_normal)) {
334
switch (a->cls) {
335
+ case float_class_denormal:
336
+ break;
337
case float_class_snan:
338
case float_class_qnan:
339
parts_return_nan(a, s);
340
@@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
341
}
342
return;
343
default:
344
- break;
345
+ g_assert_not_reached();
346
}
347
- g_assert_not_reached();
348
}
349
if (unlikely(a->sign)) {
350
goto d_nan;
351
--
352
2.34.1
diff view generated by jsdifflib
1
From: Nikita Ostrenkov <n.ostrenkov@gmail.com>
1
For the x86 and the Arm FEAT_AFP semantics, we need to be able to
2
tell the target code that the FPU operation has used an input
3
denormal. Implement this; when it happens we set the new
4
float_flag_denormal_input_used.
2
5
3
Signed-off-by: Nikita Ostrenkov <n.ostrenkov@gmail.com>
6
Note that we only set this when an input denormal is actually used by
4
Message-id: 20240108140325.1291-1-n.ostrenkov@gmail.com
7
the operation: if the operation results in Invalid Operation or
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Divide By Zero or the result is a NaN because some other input was a
9
NaN then we never needed to look at the input denormal and do not set
10
denormal_input_used.
11
12
We mostly do not need to adjust the hardfloat codepaths to deal with
13
this flag, because almost all hardfloat operations are already gated
14
on the input not being a denormal, and will fall back to softfloat
15
for a denormal input. The only exception is the comparison
16
operations, where we need to add the check for input denormals, which
17
must now fall back to softfloat where they did not before.
18
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
19
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
20
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
21
---
8
include/hw/arm/fsl-imx6.h | 44 ++++++++++++++++++++-------------------
22
include/fpu/softfloat-types.h | 7 ++++
9
hw/arm/fsl-imx6.c | 25 ++++++++++++++++++++++
23
fpu/softfloat.c | 38 +++++++++++++++++---
10
hw/arm/Kconfig | 2 ++
24
fpu/softfloat-parts.c.inc | 68 ++++++++++++++++++++++++++++++++++-
11
3 files changed, 50 insertions(+), 21 deletions(-)
25
3 files changed, 107 insertions(+), 6 deletions(-)
12
26
13
diff --git a/include/hw/arm/fsl-imx6.h b/include/hw/arm/fsl-imx6.h
27
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
14
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
15
--- a/include/hw/arm/fsl-imx6.h
29
--- a/include/fpu/softfloat-types.h
16
+++ b/include/hw/arm/fsl-imx6.h
30
+++ b/include/fpu/softfloat-types.h
17
@@ -XXX,XX +XXX,XX @@
31
@@ -XXX,XX +XXX,XX @@ enum {
18
#include "hw/net/imx_fec.h"
32
float_flag_invalid_sqrt = 0x0800, /* sqrt(-x) */
19
#include "hw/usb/chipidea.h"
33
float_flag_invalid_cvti = 0x1000, /* non-nan to integer */
20
#include "hw/usb/imx-usb-phy.h"
34
float_flag_invalid_snan = 0x2000, /* any operand was snan */
21
+#include "hw/pci-host/designware.h"
35
+ /*
22
#include "exec/memory.h"
36
+ * An input was denormal and we used it (without flushing it to zero).
23
#include "cpu.h"
37
+ * Not set if we do not actually use the denormal input (e.g.
24
#include "qom/object.h"
38
+ * because some other input was a NaN, or because the operation
25
@@ -XXX,XX +XXX,XX @@ struct FslIMX6State {
39
+ * wasn't actually carried out (divide-by-zero; invalid))
26
DeviceState parent_obj;
40
+ */
27
41
+ float_flag_input_denormal_used = 0x4000,
28
/*< public >*/
29
- ARMCPU cpu[FSL_IMX6_NUM_CPUS];
30
- A9MPPrivState a9mpcore;
31
- IMX6CCMState ccm;
32
- IMX6SRCState src;
33
- IMX7SNVSState snvs;
34
- IMXSerialState uart[FSL_IMX6_NUM_UARTS];
35
- IMXGPTState gpt;
36
- IMXEPITState epit[FSL_IMX6_NUM_EPITS];
37
- IMXI2CState i2c[FSL_IMX6_NUM_I2CS];
38
- IMXGPIOState gpio[FSL_IMX6_NUM_GPIOS];
39
- SDHCIState esdhc[FSL_IMX6_NUM_ESDHCS];
40
- IMXSPIState spi[FSL_IMX6_NUM_ECSPIS];
41
- IMX2WdtState wdt[FSL_IMX6_NUM_WDTS];
42
- IMXUSBPHYState usbphy[FSL_IMX6_NUM_USB_PHYS];
43
- ChipideaState usb[FSL_IMX6_NUM_USBS];
44
- IMXFECState eth;
45
- MemoryRegion rom;
46
- MemoryRegion caam;
47
- MemoryRegion ocram;
48
- MemoryRegion ocram_alias;
49
- uint32_t phy_num;
50
+ ARMCPU cpu[FSL_IMX6_NUM_CPUS];
51
+ A9MPPrivState a9mpcore;
52
+ IMX6CCMState ccm;
53
+ IMX6SRCState src;
54
+ IMX7SNVSState snvs;
55
+ IMXSerialState uart[FSL_IMX6_NUM_UARTS];
56
+ IMXGPTState gpt;
57
+ IMXEPITState epit[FSL_IMX6_NUM_EPITS];
58
+ IMXI2CState i2c[FSL_IMX6_NUM_I2CS];
59
+ IMXGPIOState gpio[FSL_IMX6_NUM_GPIOS];
60
+ SDHCIState esdhc[FSL_IMX6_NUM_ESDHCS];
61
+ IMXSPIState spi[FSL_IMX6_NUM_ECSPIS];
62
+ IMX2WdtState wdt[FSL_IMX6_NUM_WDTS];
63
+ IMXUSBPHYState usbphy[FSL_IMX6_NUM_USB_PHYS];
64
+ ChipideaState usb[FSL_IMX6_NUM_USBS];
65
+ IMXFECState eth;
66
+ DesignwarePCIEHost pcie;
67
+ MemoryRegion rom;
68
+ MemoryRegion caam;
69
+ MemoryRegion ocram;
70
+ MemoryRegion ocram_alias;
71
+ uint32_t phy_num;
72
};
42
};
73
43
74
44
/*
75
diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c
45
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
76
index XXXXXXX..XXXXXXX 100644
46
index XXXXXXX..XXXXXXX 100644
77
--- a/hw/arm/fsl-imx6.c
47
--- a/fpu/softfloat.c
78
+++ b/hw/arm/fsl-imx6.c
48
+++ b/fpu/softfloat.c
79
@@ -XXX,XX +XXX,XX @@
49
@@ -XXX,XX +XXX,XX @@ static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
80
#include "qemu/osdep.h"
50
float16_params_ahp.frac_size + 1);
81
#include "qapi/error.h"
51
break;
82
#include "hw/arm/fsl-imx6.h"
52
83
+#include "hw/misc/unimp.h"
53
- case float_class_normal:
84
#include "hw/usb/imx-usb-phy.h"
54
case float_class_denormal:
85
#include "hw/boards.h"
55
+ float_raise(float_flag_input_denormal_used, s);
86
#include "hw/qdev-properties.h"
56
+ break;
87
@@ -XXX,XX +XXX,XX @@ static void fsl_imx6_init(Object *obj)
57
+ case float_class_normal:
88
58
case float_class_zero:
89
59
break;
90
object_initialize_child(obj, "eth", &s->eth, TYPE_IMX_ENET);
60
91
+
61
@@ -XXX,XX +XXX,XX @@ static void parts64_float_to_float(FloatParts64 *a, float_status *s)
92
+ object_initialize_child(obj, "pcie", &s->pcie, TYPE_DESIGNWARE_PCIE_HOST);
62
if (is_nan(a->cls)) {
63
parts_return_nan(a, s);
64
}
65
+ if (a->cls == float_class_denormal) {
66
+ float_raise(float_flag_input_denormal_used, s);
67
+ }
93
}
68
}
94
69
95
static void fsl_imx6_realize(DeviceState *dev, Error **errp)
70
static void parts128_float_to_float(FloatParts128 *a, float_status *s)
96
@@ -XXX,XX +XXX,XX @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp)
71
@@ -XXX,XX +XXX,XX @@ static void parts128_float_to_float(FloatParts128 *a, float_status *s)
97
MachineState *ms = MACHINE(qdev_get_machine());
72
if (is_nan(a->cls)) {
98
FslIMX6State *s = FSL_IMX6(dev);
73
parts_return_nan(a, s);
99
uint16_t i;
74
}
100
+ qemu_irq irq;
75
+ if (a->cls == float_class_denormal) {
101
unsigned int smp_cpus = ms->smp.cpus;
76
+ float_raise(float_flag_input_denormal_used, s);
102
77
+ }
103
if (smp_cpus > FSL_IMX6_NUM_CPUS) {
78
}
104
@@ -XXX,XX +XXX,XX @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp)
79
105
FSL_IMX6_WDOGn_IRQ[i]));
80
#define parts_float_to_float(P, S) \
106
}
81
@@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
82
a->sign = b->sign;
83
a->exp = b->exp;
84
85
- if (is_anynorm(a->cls)) {
86
+ switch (a->cls) {
87
+ case float_class_denormal:
88
+ float_raise(float_flag_input_denormal_used, s);
89
+ /* fall through */
90
+ case float_class_normal:
91
frac_truncjam(a, b);
92
- } else if (is_nan(a->cls)) {
93
+ break;
94
+ case float_class_snan:
95
+ case float_class_qnan:
96
/* Discard the low bits of the NaN. */
97
a->frac = b->frac_hi;
98
parts_return_nan(a, s);
99
+ break;
100
+ default:
101
+ break;
102
}
103
}
104
105
@@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
106
if (is_nan(a->cls)) {
107
parts_return_nan(a, s);
108
}
109
+ if (a->cls == float_class_denormal) {
110
+ float_raise(float_flag_input_denormal_used, s);
111
+ }
112
}
113
114
float32 float16_to_float32(float16 a, bool ieee, float_status *s)
115
@@ -XXX,XX +XXX,XX @@ float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
116
goto soft;
117
}
118
119
- float32_input_flush2(&ua.s, &ub.s, s);
120
+ if (unlikely(float32_is_denormal(ua.s) || float32_is_denormal(ub.s))) {
121
+ /* We may need to set the input_denormal_used flag */
122
+ goto soft;
123
+ }
124
+
125
if (isgreaterequal(ua.h, ub.h)) {
126
if (isgreater(ua.h, ub.h)) {
127
return float_relation_greater;
128
@@ -XXX,XX +XXX,XX @@ float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
129
goto soft;
130
}
131
132
- float64_input_flush2(&ua.s, &ub.s, s);
133
+ if (unlikely(float64_is_denormal(ua.s) || float64_is_denormal(ub.s))) {
134
+ /* We may need to set the input_denormal_used flag */
135
+ goto soft;
136
+ }
137
+
138
if (isgreaterequal(ua.h, ub.h)) {
139
if (isgreater(ua.h, ub.h)) {
140
return float_relation_greater;
141
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
142
index XXXXXXX..XXXXXXX 100644
143
--- a/fpu/softfloat-parts.c.inc
144
+++ b/fpu/softfloat-parts.c.inc
145
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
146
bool b_sign = b->sign ^ subtract;
147
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
107
148
108
+ /*
149
+ /*
109
+ * PCIe
150
+ * For addition and subtraction, we will consume an
151
+ * input denormal unless the other input is a NaN.
110
+ */
152
+ */
111
+ sysbus_realize(SYS_BUS_DEVICE(&s->pcie), &error_abort);
153
+ if ((ab_mask & (float_cmask_denormal | float_cmask_anynan)) ==
112
+ sysbus_mmio_map(SYS_BUS_DEVICE(&s->pcie), 0, FSL_IMX6_PCIe_REG_ADDR);
154
+ float_cmask_denormal) {
113
+
155
+ float_raise(float_flag_input_denormal_used, s);
114
+ irq = qdev_get_gpio_in(DEVICE(&s->a9mpcore), FSL_IMX6_PCIE1_IRQ);
156
+ }
115
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 0, irq);
157
+
116
+ irq = qdev_get_gpio_in(DEVICE(&s->a9mpcore), FSL_IMX6_PCIE2_IRQ);
158
if (a->sign != b_sign) {
117
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 1, irq);
159
/* Subtraction */
118
+ irq = qdev_get_gpio_in(DEVICE(&s->a9mpcore), FSL_IMX6_PCIE3_IRQ);
160
if (likely(cmask_is_only_normals(ab_mask))) {
119
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 2, irq);
161
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
120
+ irq = qdev_get_gpio_in(DEVICE(&s->a9mpcore), FSL_IMX6_PCIE4_IRQ);
162
if (likely(cmask_is_only_normals(ab_mask))) {
121
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 3, irq);
163
FloatPartsW tmp;
164
165
+ if (ab_mask & float_cmask_denormal) {
166
+ float_raise(float_flag_input_denormal_used, s);
167
+ }
168
+
169
frac_mulw(&tmp, a, b);
170
frac_truncjam(a, &tmp);
171
172
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
173
}
174
175
/* Multiply by 0 or Inf */
176
+ if (ab_mask & float_cmask_denormal) {
177
+ float_raise(float_flag_input_denormal_used, s);
178
+ }
179
+
180
if (ab_mask & float_cmask_inf) {
181
a->cls = float_class_inf;
182
a->sign = sign;
183
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
184
if (flags & float_muladd_negate_result) {
185
a->sign ^= 1;
186
}
122
+
187
+
123
+ /*
188
+ /*
124
+ * PCIe PHY
189
+ * All result types except for "return the default NaN
190
+ * because this is an Invalid Operation" go through here;
191
+ * this matches the set of cases where we consumed a
192
+ * denormal input.
125
+ */
193
+ */
126
+ create_unimplemented_device("pcie-phy", FSL_IMX6_PCIe_ADDR,
194
+ if (abc_mask & float_cmask_denormal) {
127
+ FSL_IMX6_PCIe_SIZE);
195
+ float_raise(float_flag_input_denormal_used, s);
128
+
196
+ }
129
/* ROM memory */
197
return a;
130
if (!memory_region_init_rom(&s->rom, OBJECT(dev), "imx6.rom",
198
131
FSL_IMX6_ROM_SIZE, errp)) {
199
return_sub_zero:
132
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
200
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
133
index XXXXXXX..XXXXXXX 100644
201
bool sign = a->sign ^ b->sign;
134
--- a/hw/arm/Kconfig
202
135
+++ b/hw/arm/Kconfig
203
if (likely(cmask_is_only_normals(ab_mask))) {
136
@@ -XXX,XX +XXX,XX @@ config FSL_IMX31
204
+ if (ab_mask & float_cmask_denormal) {
137
205
+ float_raise(float_flag_input_denormal_used, s);
138
config FSL_IMX6
206
+ }
139
bool
207
a->sign = sign;
140
+ imply PCIE_DEVICES
208
a->exp -= b->exp + frac_div(a, b);
141
imply I2C_DEVICES
209
return a;
142
select A9MPCORE
210
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
143
select IMX
211
return parts_pick_nan(a, b, s);
144
@@ -XXX,XX +XXX,XX @@ config FSL_IMX6
212
}
145
select IMX_USBPHY
213
146
select WDT_IMX2
214
+ if ((ab_mask & float_cmask_denormal) && b->cls != float_class_zero) {
147
select PL310 # cache controller
215
+ float_raise(float_flag_input_denormal_used, s);
148
+ select PCI_EXPRESS_DESIGNWARE
216
+ }
149
select SDHCI
217
+
150
218
a->sign = sign;
151
config ASPEED_SOC
219
220
/* Inf / X */
221
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
222
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
223
224
if (likely(cmask_is_only_normals(ab_mask))) {
225
+ if (ab_mask & float_cmask_denormal) {
226
+ float_raise(float_flag_input_denormal_used, s);
227
+ }
228
frac_modrem(a, b, mod_quot);
229
return a;
230
}
231
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
232
return a;
233
}
234
235
+ if (ab_mask & float_cmask_denormal) {
236
+ float_raise(float_flag_input_denormal_used, s);
237
+ }
238
+
239
/* N % Inf; 0 % N */
240
g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
241
return a;
242
@@ -XXX,XX +XXX,XX @@ static void partsN(sqrt)(FloatPartsN *a, float_status *status,
243
if (unlikely(a->cls != float_class_normal)) {
244
switch (a->cls) {
245
case float_class_denormal:
246
+ if (!a->sign) {
247
+ /* -ve denormal will be InvalidOperation */
248
+ float_raise(float_flag_input_denormal_used, status);
249
+ }
250
break;
251
case float_class_snan:
252
case float_class_qnan:
253
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
254
if ((flags & (minmax_isnum | minmax_isnumber))
255
&& !(ab_mask & float_cmask_snan)
256
&& (ab_mask & ~float_cmask_qnan)) {
257
+ if (ab_mask & float_cmask_denormal) {
258
+ float_raise(float_flag_input_denormal_used, s);
259
+ }
260
return is_nan(a->cls) ? b : a;
261
}
262
263
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
264
return parts_pick_nan(a, b, s);
265
}
266
267
+ if (ab_mask & float_cmask_denormal) {
268
+ float_raise(float_flag_input_denormal_used, s);
269
+ }
270
+
271
a_exp = a->exp;
272
b_exp = b->exp;
273
274
@@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
275
if (likely(cmask_is_only_normals(ab_mask))) {
276
FloatRelation cmp;
277
278
+ if (ab_mask & float_cmask_denormal) {
279
+ float_raise(float_flag_input_denormal_used, s);
280
+ }
281
+
282
if (a->sign != b->sign) {
283
goto a_sign;
284
}
285
@@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
286
return float_relation_unordered;
287
}
288
289
+ if (ab_mask & float_cmask_denormal) {
290
+ float_raise(float_flag_input_denormal_used, s);
291
+ }
292
+
293
if (ab_mask & float_cmask_zero) {
294
if (ab_mask == float_cmask_zero) {
295
return float_relation_equal;
296
@@ -XXX,XX +XXX,XX @@ static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
297
case float_class_zero:
298
case float_class_inf:
299
break;
300
- case float_class_normal:
301
case float_class_denormal:
302
+ float_raise(float_flag_input_denormal_used, s);
303
+ /* fall through */
304
+ case float_class_normal:
305
a->exp += MIN(MAX(n, -0x10000), 0x10000);
306
break;
307
default:
308
@@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
309
if (unlikely(a->cls != float_class_normal)) {
310
switch (a->cls) {
311
case float_class_denormal:
312
+ if (!a->sign) {
313
+ /* -ve denormal will be InvalidOperation */
314
+ float_raise(float_flag_input_denormal_used, s);
315
+ }
316
break;
317
case float_class_snan:
318
case float_class_qnan:
152
--
319
--
153
2.34.1
320
2.34.1
diff view generated by jsdifflib
New patch
1
1
Currently we handle flushing of output denormals in uncanon_normal
2
always before we deal with rounding. This works for architectures
3
that detect tininess before rounding, but is usually not the right
4
place when the architecture detects tininess after rounding. For
5
example, for x86 the SDM states that the MXCSR FTZ control bit causes
6
outputs to be flushed to zero "when it detects a floating-point
7
underflow condition". This means that we mustn't flush to zero if
8
the input is such that after rounding it is no longer tiny.
9
10
At least one of our guest architectures does underflow detection
11
after rounding but flushing of denormals before rounding (MIPS MSA);
12
this means we need to have a config knob for this that is separate
13
from our existing tininess_before_rounding setting.
14
15
Add an ftz_detection flag. For consistency with
16
tininess_before_rounding, we make it default to "detect ftz after
17
rounding"; this means that we need to explicitly set the flag to
18
"detect ftz before rounding" on every existing architecture that sets
19
flush_to_zero, so that this commit has no behaviour change.
20
(This means more code change here but for the long term a less
21
confusing API.)
22
23
For several architectures the current behaviour is either
24
definitely or possibly wrong; annotate those with TODO comments.
25
These architectures are definitely wrong (and should detect
26
ftz after rounding):
27
* x86
28
* Alpha
29
30
For these architectures the spec is unclear:
31
* MIPS (for non-MSA)
32
* RX
33
* SH4
34
35
PA-RISC makes ftz detection IMPDEF, but we aren't setting the
36
"tininess before rounding" setting that we ought to.
37
38
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
39
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
40
---
41
include/fpu/softfloat-helpers.h | 11 +++++++++++
42
include/fpu/softfloat-types.h | 18 ++++++++++++++++++
43
target/mips/fpu_helper.h | 6 ++++++
44
target/alpha/cpu.c | 7 +++++++
45
target/arm/cpu.c | 1 +
46
target/hppa/fpu_helper.c | 11 +++++++++++
47
target/i386/tcg/fpu_helper.c | 8 ++++++++
48
target/mips/msa.c | 9 +++++++++
49
target/ppc/cpu_init.c | 3 +++
50
target/rx/cpu.c | 8 ++++++++
51
target/sh4/cpu.c | 8 ++++++++
52
target/tricore/helper.c | 1 +
53
tests/fp/fp-bench.c | 1 +
54
fpu/softfloat-parts.c.inc | 21 +++++++++++++++------
55
14 files changed, 107 insertions(+), 6 deletions(-)
56
57
diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h
58
index XXXXXXX..XXXXXXX 100644
59
--- a/include/fpu/softfloat-helpers.h
60
+++ b/include/fpu/softfloat-helpers.h
61
@@ -XXX,XX +XXX,XX @@ static inline void set_flush_inputs_to_zero(bool val, float_status *status)
62
status->flush_inputs_to_zero = val;
63
}
64
65
+static inline void set_float_ftz_detection(FloatFTZDetection d,
66
+ float_status *status)
67
+{
68
+ status->ftz_detection = d;
69
+}
70
+
71
static inline void set_default_nan_mode(bool val, float_status *status)
72
{
73
status->default_nan_mode = val;
74
@@ -XXX,XX +XXX,XX @@ static inline bool get_default_nan_mode(const float_status *status)
75
return status->default_nan_mode;
76
}
77
78
+static inline FloatFTZDetection get_float_ftz_detection(const float_status *status)
79
+{
80
+ return status->ftz_detection;
81
+}
82
+
83
#endif /* SOFTFLOAT_HELPERS_H */
84
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
85
index XXXXXXX..XXXXXXX 100644
86
--- a/include/fpu/softfloat-types.h
87
+++ b/include/fpu/softfloat-types.h
88
@@ -XXX,XX +XXX,XX @@ typedef enum __attribute__((__packed__)) {
89
float_infzeronan_suppress_invalid = (1 << 7),
90
} FloatInfZeroNaNRule;
91
92
+/*
93
+ * When flush_to_zero is set, should we detect denormal results to
94
+ * be flushed before or after rounding? For most architectures this
95
+ * should be set to match the tininess_before_rounding setting,
96
+ * but a few architectures, e.g. MIPS MSA, detect FTZ before
97
+ * rounding but tininess after rounding.
98
+ *
99
+ * This enum is arranged so that the default if the target doesn't
100
+ * configure it matches the default for tininess_before_rounding
101
+ * (i.e. "after rounding").
102
+ */
103
+typedef enum __attribute__((__packed__)) {
104
+ float_ftz_after_rounding = 0,
105
+ float_ftz_before_rounding = 1,
106
+} FloatFTZDetection;
107
+
108
/*
109
* Floating Point Status. Individual architectures may maintain
110
* several versions of float_status for different functions. The
111
@@ -XXX,XX +XXX,XX @@ typedef struct float_status {
112
bool tininess_before_rounding;
113
/* should denormalised results go to zero and set output_denormal_flushed? */
114
bool flush_to_zero;
115
+ /* do we detect and flush denormal results before or after rounding? */
116
+ FloatFTZDetection ftz_detection;
117
/* should denormalised inputs go to zero and set input_denormal_flushed? */
118
bool flush_inputs_to_zero;
119
bool default_nan_mode;
120
diff --git a/target/mips/fpu_helper.h b/target/mips/fpu_helper.h
121
index XXXXXXX..XXXXXXX 100644
122
--- a/target/mips/fpu_helper.h
123
+++ b/target/mips/fpu_helper.h
124
@@ -XXX,XX +XXX,XX @@ static inline void fp_reset(CPUMIPSState *env)
125
*/
126
set_float_2nan_prop_rule(float_2nan_prop_s_ab,
127
&env->active_fpu.fp_status);
128
+ /*
129
+ * TODO: the spec does't say clearly whether FTZ happens before
130
+ * or after rounding for normal FPU operations.
131
+ */
132
+ set_float_ftz_detection(float_ftz_before_rounding,
133
+ &env->active_fpu.fp_status);
134
}
135
136
/* MSA */
137
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
138
index XXXXXXX..XXXXXXX 100644
139
--- a/target/alpha/cpu.c
140
+++ b/target/alpha/cpu.c
141
@@ -XXX,XX +XXX,XX @@ static void alpha_cpu_initfn(Object *obj)
142
set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status);
143
/* Default NaN: sign bit clear, msb frac bit set */
144
set_float_default_nan_pattern(0b01000000, &env->fp_status);
145
+ /*
146
+ * TODO: this is incorrect. The Alpha Architecture Handbook version 4
147
+ * section 4.7.7.11 says that we flush to zero for underflow cases, so
148
+ * this should be float_ftz_after_rounding to match the
149
+ * tininess_after_rounding (which is specified in section 4.7.5).
150
+ */
151
+ set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
152
#if defined(CONFIG_USER_ONLY)
153
env->flags = ENV_FLAG_PS_USER | ENV_FLAG_FEN;
154
cpu_alpha_store_fpcr(env, (uint64_t)(FPCR_INVD | FPCR_DZED | FPCR_OVFD
155
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
156
index XXXXXXX..XXXXXXX 100644
157
--- a/target/arm/cpu.c
158
+++ b/target/arm/cpu.c
159
@@ -XXX,XX +XXX,XX @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
160
static void arm_set_default_fp_behaviours(float_status *s)
161
{
162
set_float_detect_tininess(float_tininess_before_rounding, s);
163
+ set_float_ftz_detection(float_ftz_before_rounding, s);
164
set_float_2nan_prop_rule(float_2nan_prop_s_ab, s);
165
set_float_3nan_prop_rule(float_3nan_prop_s_cab, s);
166
set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s);
167
diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c
168
index XXXXXXX..XXXXXXX 100644
169
--- a/target/hppa/fpu_helper.c
170
+++ b/target/hppa/fpu_helper.c
171
@@ -XXX,XX +XXX,XX @@ void HELPER(loaded_fr0)(CPUHPPAState *env)
172
set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status);
173
/* Default NaN: sign bit clear, msb-1 frac bit set */
174
set_float_default_nan_pattern(0b00100000, &env->fp_status);
175
+ /*
176
+ * "PA-RISC 2.0 Architecture" says it is IMPDEF whether the flushing
177
+ * enabled by FPSR.D happens before or after rounding. We pick "before"
178
+ * for consistency with tininess detection.
179
+ */
180
+ set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
181
+ /*
182
+ * TODO: "PA-RISC 2.0 Architecture" chapter 10 says that we should
183
+ * detect tininess before rounding, but we don't set that here so we
184
+ * get the default tininess after rounding.
185
+ */
186
}
187
188
void cpu_hppa_loaded_fr0(CPUHPPAState *env)
189
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
190
index XXXXXXX..XXXXXXX 100644
191
--- a/target/i386/tcg/fpu_helper.c
192
+++ b/target/i386/tcg/fpu_helper.c
193
@@ -XXX,XX +XXX,XX @@ void cpu_init_fp_statuses(CPUX86State *env)
194
set_float_default_nan_pattern(0b11000000, &env->fp_status);
195
set_float_default_nan_pattern(0b11000000, &env->mmx_status);
196
set_float_default_nan_pattern(0b11000000, &env->sse_status);
197
+ /*
198
+ * TODO: x86 does flush-to-zero detection after rounding (the SDM
199
+ * section 10.2.3.3 on the FTZ bit of MXCSR says that we flush
200
+ * when we detect underflow, which x86 does after rounding).
201
+ */
202
+ set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
203
+ set_float_ftz_detection(float_ftz_before_rounding, &env->mmx_status);
204
+ set_float_ftz_detection(float_ftz_before_rounding, &env->sse_status);
205
}
206
207
static inline uint8_t save_exception_flags(CPUX86State *env)
208
diff --git a/target/mips/msa.c b/target/mips/msa.c
209
index XXXXXXX..XXXXXXX 100644
210
--- a/target/mips/msa.c
211
+++ b/target/mips/msa.c
212
@@ -XXX,XX +XXX,XX @@ void msa_reset(CPUMIPSState *env)
213
/* tininess detected after rounding.*/
214
set_float_detect_tininess(float_tininess_after_rounding,
215
&env->active_tc.msa_fp_status);
216
+ /*
217
+ * MSACSR.FS detects tiny results to flush to zero before rounding
218
+ * (per "MIPS Architecture for Programmers Volume IV-j: The MIPS64 SIMD
219
+ * Architecture Module, Revision 1.1" section 3.5.4), even though it
220
+ * detects tininess after rounding for underflow purposes (section 3.4.2
221
+ * table 3.3).
222
+ */
223
+ set_float_ftz_detection(float_ftz_before_rounding,
224
+ &env->active_tc.msa_fp_status);
225
226
/*
227
* According to MIPS specifications, if one of the two operands is
228
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
229
index XXXXXXX..XXXXXXX 100644
230
--- a/target/ppc/cpu_init.c
231
+++ b/target/ppc/cpu_init.c
232
@@ -XXX,XX +XXX,XX @@ static void ppc_cpu_reset_hold(Object *obj, ResetType type)
233
/* tininess for underflow is detected before rounding */
234
set_float_detect_tininess(float_tininess_before_rounding,
235
&env->fp_status);
236
+ /* Similarly for flush-to-zero */
237
+ set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
238
+
239
/*
240
* PowerPC propagation rules:
241
* 1. A if it sNaN or qNaN
242
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
243
index XXXXXXX..XXXXXXX 100644
244
--- a/target/rx/cpu.c
245
+++ b/target/rx/cpu.c
246
@@ -XXX,XX +XXX,XX @@ static void rx_cpu_reset_hold(Object *obj, ResetType type)
247
set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status);
248
/* Default NaN value: sign bit clear, set frac msb */
249
set_float_default_nan_pattern(0b01000000, &env->fp_status);
250
+ /*
251
+ * TODO: "RX Family RXv1 Instruction Set Architecture" is not 100% clear
252
+ * on whether flush-to-zero should happen before or after rounding, but
253
+ * section 1.3.2 says that it happens when underflow is detected, and
254
+ * implies that underflow is detected after rounding. So this may not
255
+ * be the correct setting.
256
+ */
257
+ set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
258
}
259
260
static ObjectClass *rx_cpu_class_by_name(const char *cpu_model)
261
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
262
index XXXXXXX..XXXXXXX 100644
263
--- a/target/sh4/cpu.c
264
+++ b/target/sh4/cpu.c
265
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_reset_hold(Object *obj, ResetType type)
266
set_default_nan_mode(1, &env->fp_status);
267
/* sign bit clear, set all frac bits other than msb */
268
set_float_default_nan_pattern(0b00111111, &env->fp_status);
269
+ /*
270
+ * TODO: "SH-4 CPU Core Architecture ADCS 7182230F" doesn't say whether
271
+ * it detects tininess before or after rounding. Section 6.4 is clear
272
+ * that flush-to-zero happens when the result underflows, though, so
273
+ * either this should be "detect ftz after rounding" or else we should
274
+ * be setting "detect tininess before rounding".
275
+ */
276
+ set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
277
}
278
279
static void superh_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
280
diff --git a/target/tricore/helper.c b/target/tricore/helper.c
281
index XXXXXXX..XXXXXXX 100644
282
--- a/target/tricore/helper.c
283
+++ b/target/tricore/helper.c
284
@@ -XXX,XX +XXX,XX @@ void fpu_set_state(CPUTriCoreState *env)
285
set_flush_inputs_to_zero(1, &env->fp_status);
286
set_flush_to_zero(1, &env->fp_status);
287
set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status);
288
+ set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
289
set_default_nan_mode(1, &env->fp_status);
290
/* Default NaN pattern: sign bit clear, frac msb set */
291
set_float_default_nan_pattern(0b01000000, &env->fp_status);
292
diff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c
293
index XXXXXXX..XXXXXXX 100644
294
--- a/tests/fp/fp-bench.c
295
+++ b/tests/fp/fp-bench.c
296
@@ -XXX,XX +XXX,XX @@ static void run_bench(void)
297
set_float_3nan_prop_rule(float_3nan_prop_s_cab, &soft_status);
298
set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, &soft_status);
299
set_float_default_nan_pattern(0b01000000, &soft_status);
300
+ set_float_ftz_detection(float_ftz_before_rounding, &soft_status);
301
302
f = bench_funcs[operation][precision];
303
g_assert(f);
304
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
305
index XXXXXXX..XXXXXXX 100644
306
--- a/fpu/softfloat-parts.c.inc
307
+++ b/fpu/softfloat-parts.c.inc
308
@@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
309
p->frac_lo &= ~round_mask;
310
}
311
frac_shr(p, frac_shift);
312
- } else if (s->flush_to_zero) {
313
+ } else if (s->flush_to_zero &&
314
+ s->ftz_detection == float_ftz_before_rounding) {
315
flags |= float_flag_output_denormal_flushed;
316
p->cls = float_class_zero;
317
exp = 0;
318
@@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
319
exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal;
320
frac_shr(p, frac_shift);
321
322
- if (is_tiny && (flags & float_flag_inexact)) {
323
- flags |= float_flag_underflow;
324
- }
325
- if (exp == 0 && frac_eqz(p)) {
326
- p->cls = float_class_zero;
327
+ if (is_tiny) {
328
+ if (s->flush_to_zero) {
329
+ assert(s->ftz_detection == float_ftz_after_rounding);
330
+ flags |= float_flag_output_denormal_flushed;
331
+ p->cls = float_class_zero;
332
+ exp = 0;
333
+ frac_clear(p);
334
+ } else if (flags & float_flag_inexact) {
335
+ flags |= float_flag_underflow;
336
+ }
337
+ if (exp == 0 && frac_eqz(p)) {
338
+ p->cls = float_class_zero;
339
+ }
340
}
341
}
342
p->exp = exp;
343
--
344
2.34.1
diff view generated by jsdifflib
1
A typo in the implementation of isar_feature_aa64_tidcp1() means we
1
The Armv8.7 FEAT_AFP feature defines three new control bits in
2
were checking the field in the wrong ID register, so we might have
2
the FPCR:
3
provided the feature on CPUs that don't have it and not provided
3
* FPCR.AH: "alternate floating point mode"; this changes floating
4
it on CPUs that should have it. Correct this bug.
4
point behaviour in a variety of ways, including:
5
- the sign of a default NaN is 1, not 0
6
- if FPCR.FZ is also 1, denormals detected after rounding
7
with an unbounded exponent has been applied are flushed to zero
8
- FPCR.FZ does not cause denormalized inputs to be flushed to zero
9
- miscellaneous other corner-case behaviour changes
10
* FPCR.FIZ: flush denormalized numbers to zero on input for
11
most instructions
12
* FPCR.NEP: makes scalar SIMD operations merge the result with
13
higher vector elements in one of the source registers, instead
14
of zeroing the higher elements of the destination
5
15
6
Cc: qemu-stable@nongnu.org
16
This commit defines the new bits in the FPCR, and allows them to be
7
Fixes: 9cd0c0dec97be9 "target/arm: Implement FEAT_TIDCP1"
17
read or written when FEAT_AFP is implemented. Actual behaviour
8
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2120
18
changes will be implemented in subsequent commits.
19
20
Note that these are the first FPCR bits which don't appear in the
21
AArch32 FPSCR view of the register, and which share bit positions
22
with FPSR bits.
23
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
24
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
25
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20240123160333.958841-1-peter.maydell@linaro.org
12
---
26
---
13
target/arm/cpu-features.h | 2 +-
27
target/arm/cpu-features.h | 5 +++++
14
1 file changed, 1 insertion(+), 1 deletion(-)
28
target/arm/cpu.h | 3 +++
29
target/arm/vfp_helper.c | 11 ++++++++---
30
3 files changed, 16 insertions(+), 3 deletions(-)
15
31
16
diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
32
diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
17
index XXXXXXX..XXXXXXX 100644
33
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/cpu-features.h
34
--- a/target/arm/cpu-features.h
19
+++ b/target/arm/cpu-features.h
35
+++ b/target/arm/cpu-features.h
20
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_hcx(const ARMISARegisters *id)
36
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_hcx(const ARMISARegisters *id)
21
37
return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HCX) != 0;
38
}
39
40
+static inline bool isar_feature_aa64_afp(const ARMISARegisters *id)
41
+{
42
+ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, AFP) != 0;
43
+}
44
+
22
static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id)
45
static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id)
23
{
46
{
24
- return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR1, TIDCP1) != 0;
47
return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0;
25
+ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0;
48
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/arm/cpu.h
51
+++ b/target/arm/cpu.h
52
@@ -XXX,XX +XXX,XX @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val);
53
*/
54
55
/* FPCR bits */
56
+#define FPCR_FIZ (1 << 0) /* Flush Inputs to Zero (FEAT_AFP) */
57
+#define FPCR_AH (1 << 1) /* Alternate Handling (FEAT_AFP) */
58
+#define FPCR_NEP (1 << 2) /* SIMD scalar ops preserve elts (FEAT_AFP) */
59
#define FPCR_IOE (1 << 8) /* Invalid Operation exception trap enable */
60
#define FPCR_DZE (1 << 9) /* Divide by Zero exception trap enable */
61
#define FPCR_OFE (1 << 10) /* Overflow exception trap enable */
62
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/arm/vfp_helper.c
65
+++ b/target/arm/vfp_helper.c
66
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask)
67
if (!cpu_isar_feature(any_fp16, cpu)) {
68
val &= ~FPCR_FZ16;
69
}
70
+ if (!cpu_isar_feature(aa64_afp, cpu)) {
71
+ val &= ~(FPCR_FIZ | FPCR_AH | FPCR_NEP);
72
+ }
73
74
if (!cpu_isar_feature(aa64_ebf16, cpu)) {
75
val &= ~FPCR_EBF;
76
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask)
77
* We don't implement trapped exception handling, so the
78
* trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
79
*
80
- * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF
81
- * and FZ16. Len, Stride and LTPSIZE we just handled. Store those bits
82
+ * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF, FZ16,
83
+ * FIZ, AH, and NEP.
84
+ * Len, Stride and LTPSIZE we just handled. Store those bits
85
* there, and zero any of the other FPCR bits and the RES0 and RAZ/WI
86
* bits.
87
*/
88
- val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 | FPCR_EBF;
89
+ val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 |
90
+ FPCR_EBF | FPCR_FIZ | FPCR_AH | FPCR_NEP;
91
env->vfp.fpcr &= ~mask;
92
env->vfp.fpcr |= val;
26
}
93
}
27
28
static inline bool isar_feature_aa64_hafs(const ARMISARegisters *id)
29
--
94
--
30
2.34.1
95
2.34.1
diff view generated by jsdifflib
New patch
1
Part of FEAT_AFP is the new control bit FPCR.FIZ. This bit affects
2
flushing of single and double precision denormal inputs to zero for
3
AArch64 floating point instructions. (For half-precision, the
4
existing FPCR.FZ16 control remains the only one.)
1
5
6
FPCR.FIZ differs from FPCR.FZ in that if we flush an input denormal
7
only because of FPCR.FIZ then we should *not* set the cumulative
8
exception bit FPSR.IDC.
9
10
FEAT_AFP also defines that in AArch64 the existing FPCR.FZ only
11
applies when FPCR.AH is 0.
12
13
We can implement this by setting the "flush inputs to zero" state
14
appropriately when FPCR is written, and by not reflecting the
15
float_flag_input_denormal status flag into FPSR reads when it is the
16
result only of FPSR.FIZ.
17
18
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
19
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
20
---
21
target/arm/vfp_helper.c | 60 ++++++++++++++++++++++++++++++++++-------
22
1 file changed, 50 insertions(+), 10 deletions(-)
23
24
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/vfp_helper.c
27
+++ b/target/arm/vfp_helper.c
28
@@ -XXX,XX +XXX,XX @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits)
29
30
static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
31
{
32
- uint32_t i = 0;
33
+ uint32_t a32_flags = 0, a64_flags = 0;
34
35
- i |= get_float_exception_flags(&env->vfp.fp_status_a32);
36
- i |= get_float_exception_flags(&env->vfp.fp_status_a64);
37
- i |= get_float_exception_flags(&env->vfp.standard_fp_status);
38
+ a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32);
39
+ a32_flags |= get_float_exception_flags(&env->vfp.standard_fp_status);
40
/* FZ16 does not generate an input denormal exception. */
41
- i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32)
42
+ a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32)
43
& ~float_flag_input_denormal_flushed);
44
- i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
45
+ a32_flags |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16)
46
& ~float_flag_input_denormal_flushed);
47
- i |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16)
48
+
49
+ a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64);
50
+ a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
51
& ~float_flag_input_denormal_flushed);
52
- return vfp_exceptbits_from_host(i);
53
+ /*
54
+ * Flushing an input denormal *only* because FPCR.FIZ == 1 does
55
+ * not set FPSR.IDC; if FPCR.FZ is also set then this takes
56
+ * precedence and IDC is set (see the FPUnpackBase pseudocode).
57
+ * So squash it unless (FPCR.AH == 0 && FPCR.FZ == 1).
58
+ * We only do this for the a64 flags because FIZ has no effect
59
+ * on AArch32 even if it is set.
60
+ */
61
+ if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) {
62
+ a64_flags &= ~float_flag_input_denormal_flushed;
63
+ }
64
+ return vfp_exceptbits_from_host(a32_flags | a64_flags);
65
}
66
67
static void vfp_clear_float_status_exc_flags(CPUARMState *env)
68
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
69
set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
70
}
71
72
+static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env)
73
+{
74
+ /*
75
+ * Synchronize any pending exception-flag information in the
76
+ * float_status values into env->vfp.fpsr, and then clear out
77
+ * the float_status data.
78
+ */
79
+ env->vfp.fpsr |= vfp_get_fpsr_from_host(env);
80
+ vfp_clear_float_status_exc_flags(env);
81
+}
82
+
83
static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
84
{
85
uint64_t changed = env->vfp.fpcr;
86
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
87
if (changed & FPCR_FZ) {
88
bool ftz_enabled = val & FPCR_FZ;
89
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
90
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
91
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64);
92
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a64);
93
+ /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */
94
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
95
+ }
96
+ if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) {
97
+ /*
98
+ * A64: Flush denormalized inputs to zero if FPCR.FIZ = 1, or
99
+ * both FPCR.AH = 0 and FPCR.FZ = 1.
100
+ */
101
+ bool fitz_enabled = (val & FPCR_FIZ) ||
102
+ (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ;
103
+ set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status_a64);
104
}
105
if (changed & FPCR_DN) {
106
bool dnan_enabled = val & FPCR_DN;
107
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
108
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
109
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64);
110
}
111
+ /*
112
+ * If any bits changed that we look at in vfp_get_fpsr_from_host(),
113
+ * we must sync the float_status flags into vfp.fpsr now (under the
114
+ * old regime) before we update vfp.fpcr.
115
+ */
116
+ if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) {
117
+ vfp_sync_and_clear_float_status_exc_flags(env);
118
+ }
119
}
120
121
#else
122
--
123
2.34.1
diff view generated by jsdifflib
New patch
1
When FPCR.AH is set, various behaviours of AArch64 floating point
2
operations which are controlled by softfloat config settings change:
3
* tininess and ftz detection before/after rounding
4
* NaN propagation order
5
* result of 0 * Inf + NaN
6
* default NaN value
1
7
8
When the guest changes the value of the AH bit, switch these config
9
settings on the fp_status_a64 and fp_status_f16_a64 float_status
10
fields.
11
12
This requires us to make the arm_set_default_fp_behaviours() function
13
global, since we now need to call it from cpu.c and vfp_helper.c; we
14
move it to vfp_helper.c so it can be next to the new
15
arm_set_ah_fp_behaviours().
16
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
19
---
20
target/arm/internals.h | 4 +++
21
target/arm/cpu.c | 23 ----------------
22
target/arm/vfp_helper.c | 58 ++++++++++++++++++++++++++++++++++++++++-
23
3 files changed, 61 insertions(+), 24 deletions(-)
24
25
diff --git a/target/arm/internals.h b/target/arm/internals.h
26
index XXXXXXX..XXXXXXX 100644
27
--- a/target/arm/internals.h
28
+++ b/target/arm/internals.h
29
@@ -XXX,XX +XXX,XX @@ uint64_t gt_virt_cnt_offset(CPUARMState *env);
30
* all EL1" scope; this covers stage 1 and stage 2.
31
*/
32
int alle1_tlbmask(CPUARMState *env);
33
+
34
+/* Set the float_status behaviour to match the Arm defaults */
35
+void arm_set_default_fp_behaviours(float_status *s);
36
+
37
#endif
38
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/cpu.c
41
+++ b/target/arm/cpu.c
42
@@ -XXX,XX +XXX,XX @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
43
QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node);
44
}
45
46
-/*
47
- * Set the float_status behaviour to match the Arm defaults:
48
- * * tininess-before-rounding
49
- * * 2-input NaN propagation prefers SNaN over QNaN, and then
50
- * operand A over operand B (see FPProcessNaNs() pseudocode)
51
- * * 3-input NaN propagation prefers SNaN over QNaN, and then
52
- * operand C over A over B (see FPProcessNaNs3() pseudocode,
53
- * but note that for QEMU muladd is a * b + c, whereas for
54
- * the pseudocode function the arguments are in the order c, a, b.
55
- * * 0 * Inf + NaN returns the default NaN if the input NaN is quiet,
56
- * and the input NaN if it is signalling
57
- * * Default NaN has sign bit clear, msb frac bit set
58
- */
59
-static void arm_set_default_fp_behaviours(float_status *s)
60
-{
61
- set_float_detect_tininess(float_tininess_before_rounding, s);
62
- set_float_ftz_detection(float_ftz_before_rounding, s);
63
- set_float_2nan_prop_rule(float_2nan_prop_s_ab, s);
64
- set_float_3nan_prop_rule(float_3nan_prop_s_cab, s);
65
- set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s);
66
- set_float_default_nan_pattern(0b01000000, s);
67
-}
68
-
69
static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque)
70
{
71
/* Reset a single ARMCPRegInfo register */
72
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
73
index XXXXXXX..XXXXXXX 100644
74
--- a/target/arm/vfp_helper.c
75
+++ b/target/arm/vfp_helper.c
76
@@ -XXX,XX +XXX,XX @@
77
#include "exec/helper-proto.h"
78
#include "internals.h"
79
#include "cpu-features.h"
80
+#include "fpu/softfloat.h"
81
#ifdef CONFIG_TCG
82
#include "qemu/log.h"
83
-#include "fpu/softfloat.h"
84
#endif
85
86
/* VFP support. We follow the convention used for VFP instructions:
87
Single precision routines have a "s" suffix, double precision a
88
"d" suffix. */
89
90
+/*
91
+ * Set the float_status behaviour to match the Arm defaults:
92
+ * * tininess-before-rounding
93
+ * * 2-input NaN propagation prefers SNaN over QNaN, and then
94
+ * operand A over operand B (see FPProcessNaNs() pseudocode)
95
+ * * 3-input NaN propagation prefers SNaN over QNaN, and then
96
+ * operand C over A over B (see FPProcessNaNs3() pseudocode,
97
+ * but note that for QEMU muladd is a * b + c, whereas for
98
+ * the pseudocode function the arguments are in the order c, a, b.
99
+ * * 0 * Inf + NaN returns the default NaN if the input NaN is quiet,
100
+ * and the input NaN if it is signalling
101
+ * * Default NaN has sign bit clear, msb frac bit set
102
+ */
103
+void arm_set_default_fp_behaviours(float_status *s)
104
+{
105
+ set_float_detect_tininess(float_tininess_before_rounding, s);
106
+ set_float_ftz_detection(float_ftz_before_rounding, s);
107
+ set_float_2nan_prop_rule(float_2nan_prop_s_ab, s);
108
+ set_float_3nan_prop_rule(float_3nan_prop_s_cab, s);
109
+ set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s);
110
+ set_float_default_nan_pattern(0b01000000, s);
111
+}
112
+
113
+/*
114
+ * Set the float_status behaviour to match the FEAT_AFP
115
+ * FPCR.AH=1 requirements:
116
+ * * tininess-after-rounding
117
+ * * 2-input NaN propagation prefers the first NaN
118
+ * * 3-input NaN propagation prefers a over b over c
119
+ * * 0 * Inf + NaN always returns the input NaN and doesn't
120
+ * set Invalid for a QNaN
121
+ * * default NaN has sign bit set, msb frac bit set
122
+ */
123
+static void arm_set_ah_fp_behaviours(float_status *s)
124
+{
125
+ set_float_detect_tininess(float_tininess_after_rounding, s);
126
+ set_float_ftz_detection(float_ftz_after_rounding, s);
127
+ set_float_2nan_prop_rule(float_2nan_prop_ab, s);
128
+ set_float_3nan_prop_rule(float_3nan_prop_abc, s);
129
+ set_float_infzeronan_rule(float_infzeronan_dnan_never |
130
+ float_infzeronan_suppress_invalid, s);
131
+ set_float_default_nan_pattern(0b11000000, s);
132
+}
133
+
134
#ifdef CONFIG_TCG
135
136
/* Convert host exception flags to vfp form. */
137
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
138
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
139
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64);
140
}
141
+ if (changed & FPCR_AH) {
142
+ bool ah_enabled = val & FPCR_AH;
143
+
144
+ if (ah_enabled) {
145
+ /* Change behaviours for A64 FP operations */
146
+ arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64);
147
+ arm_set_ah_fp_behaviours(&env->vfp.fp_status_f16_a64);
148
+ } else {
149
+ arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
150
+ arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
151
+ }
152
+ }
153
/*
154
* If any bits changed that we look at in vfp_get_fpsr_from_host(),
155
* we must sync the float_status flags into vfp.fpsr now (under the
156
--
157
2.34.1
diff view generated by jsdifflib
New patch
1
When FPCR.AH = 1, some of the cumulative exception flags in the FPSR
2
behave slightly differently for A64 operations:
3
* IDC is set when a denormal input is used without flushing
4
* IXC (Inexact) is set when an output denormal is flushed to zero
1
5
6
Update vfp_get_fpsr_from_host() to do this.
7
8
Note that because half-precision operations never set IDC, we now
9
need to add float_flag_input_denormal_used to the set we mask out of
10
fp_status_f16_a64.
11
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
---
15
target/arm/vfp_helper.c | 17 ++++++++++++++---
16
1 file changed, 14 insertions(+), 3 deletions(-)
17
18
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/vfp_helper.c
21
+++ b/target/arm/vfp_helper.c
22
@@ -XXX,XX +XXX,XX @@ static void arm_set_ah_fp_behaviours(float_status *s)
23
#ifdef CONFIG_TCG
24
25
/* Convert host exception flags to vfp form. */
26
-static inline uint32_t vfp_exceptbits_from_host(int host_bits)
27
+static inline uint32_t vfp_exceptbits_from_host(int host_bits, bool ah)
28
{
29
uint32_t target_bits = 0;
30
31
@@ -XXX,XX +XXX,XX @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits)
32
if (host_bits & float_flag_input_denormal_flushed) {
33
target_bits |= FPSR_IDC;
34
}
35
+ /*
36
+ * With FPCR.AH, IDC is set when an input denormal is used,
37
+ * and flushing an output denormal to zero sets both IXC and UFC.
38
+ */
39
+ if (ah && (host_bits & float_flag_input_denormal_used)) {
40
+ target_bits |= FPSR_IDC;
41
+ }
42
+ if (ah && (host_bits & float_flag_output_denormal_flushed)) {
43
+ target_bits |= FPSR_IXC;
44
+ }
45
return target_bits;
46
}
47
48
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
49
50
a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64);
51
a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
52
- & ~float_flag_input_denormal_flushed);
53
+ & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
54
/*
55
* Flushing an input denormal *only* because FPCR.FIZ == 1 does
56
* not set FPSR.IDC; if FPCR.FZ is also set then this takes
57
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
58
if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) {
59
a64_flags &= ~float_flag_input_denormal_flushed;
60
}
61
- return vfp_exceptbits_from_host(a32_flags | a64_flags);
62
+ return vfp_exceptbits_from_host(a64_flags, env->vfp.fpcr & FPCR_AH) |
63
+ vfp_exceptbits_from_host(a32_flags, false);
64
}
65
66
static void vfp_clear_float_status_exc_flags(CPUARMState *env)
67
--
68
2.34.1
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
We are going to need to generate different code in some cases when
2
FPCR.AH is 1. For example:
3
* Floating point neg and abs must not flip the sign bit of NaNs
4
* some insns (FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS, and various
5
BFCVT and BFM bfloat16 ops) need to use a different float_status
6
to the usual one
2
7
3
e2h_access() was added in commit bb5972e439 ("target/arm:
8
Encode FPCR.AH into the A64 tbflags, so we can refer to it at
4
Add VHE timer register redirection and aliasing") close to
9
translate time.
5
the generic_timer_cp_reginfo[] array, but isn't used until
6
vhe_reginfo[] definition. Move it closer to the other e2h
7
helpers.
8
10
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
11
Because we now have a bit in FPCR that affects codegen, we can't mark
12
the AArch64 FPCR register as being SUPPRESS_TB_END any more; writes
13
to it will now end the TB and trigger a regeneration of hflags.
14
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
16
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20240118200643.29037-19-philmd@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
17
---
14
target/arm/helper.c | 29 +++++++++++++++--------------
18
target/arm/cpu.h | 1 +
15
1 file changed, 15 insertions(+), 14 deletions(-)
19
target/arm/tcg/translate.h | 2 ++
20
target/arm/helper.c | 2 +-
21
target/arm/tcg/hflags.c | 4 ++++
22
target/arm/tcg/translate-a64.c | 1 +
23
5 files changed, 9 insertions(+), 1 deletion(-)
16
24
25
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
26
index XXXXXXX..XXXXXXX 100644
27
--- a/target/arm/cpu.h
28
+++ b/target/arm/cpu.h
29
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, NV2, 34, 1)
30
FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1)
31
/* Set if FEAT_NV2 RAM accesses are big-endian */
32
FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1)
33
+FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */
34
35
/*
36
* Helpers for using the above. Note that only the A64 accessors use
37
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
38
index XXXXXXX..XXXXXXX 100644
39
--- a/target/arm/tcg/translate.h
40
+++ b/target/arm/tcg/translate.h
41
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
42
bool nv2_mem_e20;
43
/* True if NV2 enabled and NV2 RAM accesses are big-endian */
44
bool nv2_mem_be;
45
+ /* True if FPCR.AH is 1 (alternate floating point handling) */
46
+ bool fpcr_ah;
47
/*
48
* >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI.
49
* < 0, set by the current instruction.
17
diff --git a/target/arm/helper.c b/target/arm/helper.c
50
diff --git a/target/arm/helper.c b/target/arm/helper.c
18
index XXXXXXX..XXXXXXX 100644
51
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper.c
52
--- a/target/arm/helper.c
20
+++ b/target/arm/helper.c
53
+++ b/target/arm/helper.c
21
@@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
54
@@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
22
},
55
.writefn = aa64_daif_write, .resetfn = arm_cp_reset_ignore },
23
};
56
{ .name = "FPCR", .state = ARM_CP_STATE_AA64,
24
57
.opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 4,
25
-static CPAccessResult e2h_access(CPUARMState *env, const ARMCPRegInfo *ri,
58
- .access = PL0_RW, .type = ARM_CP_FPU | ARM_CP_SUPPRESS_TB_END,
26
- bool isread)
59
+ .access = PL0_RW, .type = ARM_CP_FPU,
27
-{
60
.readfn = aa64_fpcr_read, .writefn = aa64_fpcr_write },
28
- if (arm_current_el(env) == 1) {
61
{ .name = "FPSR", .state = ARM_CP_STATE_AA64,
29
- /* This must be a FEAT_NV access */
62
.opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4,
30
- /* TODO: FEAT_ECV will need to check CNTHCTL_EL2 here */
63
diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
31
- return CP_ACCESS_OK;
64
index XXXXXXX..XXXXXXX 100644
32
- }
65
--- a/target/arm/tcg/hflags.c
33
- if (!(arm_hcr_el2_eff(env) & HCR_E2H)) {
66
+++ b/target/arm/tcg/hflags.c
34
- return CP_ACCESS_TRAP;
67
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
35
- }
68
DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx));
36
- return CP_ACCESS_OK;
69
}
37
-}
70
38
-
71
+ if (env->vfp.fpcr & FPCR_AH) {
39
#else
72
+ DP_TBFLAG_A64(flags, AH, 1);
40
73
+ }
41
/*
42
@@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo el3_cp_reginfo[] = {
43
};
44
45
#ifndef CONFIG_USER_ONLY
46
+
74
+
47
+static CPAccessResult e2h_access(CPUARMState *env, const ARMCPRegInfo *ri,
75
return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
48
+ bool isread)
76
}
49
+{
77
50
+ if (arm_current_el(env) == 1) {
78
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
51
+ /* This must be a FEAT_NV access */
79
index XXXXXXX..XXXXXXX 100644
52
+ /* TODO: FEAT_ECV will need to check CNTHCTL_EL2 here */
80
--- a/target/arm/tcg/translate-a64.c
53
+ return CP_ACCESS_OK;
81
+++ b/target/arm/tcg/translate-a64.c
54
+ }
82
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
55
+ if (!(arm_hcr_el2_eff(env) & HCR_E2H)) {
83
dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
56
+ return CP_ACCESS_TRAP;
84
dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
57
+ }
85
dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
58
+ return CP_ACCESS_OK;
86
+ dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH);
59
+}
87
dc->vec_len = 0;
60
+
88
dc->vec_stride = 0;
61
/* Test if system register redirection is to occur in the current state. */
89
dc->cp_regs = arm_cpu->cp_regs;
62
static bool redirect_for_e2h(CPUARMState *env)
63
{
64
--
90
--
65
2.34.1
91
2.34.1
66
67
diff view generated by jsdifflib
1
From: Guenter Roeck <linux@roeck-us.net>
1
When FPCR.AH is 1, the behaviour of some instructions changes:
2
2
* AdvSIMD BFCVT, BFCVTN, BFCVTN2, BFMLALB, BFMLALT
3
Add MMDC, OCOTP, SQPI, CAAM, and USBMISC as unimplemented devices.
3
* SVE BFCVT, BFCVTNT, BFMLALB, BFMLALT, BFMLSLB, BFMLSLT
4
4
* SME BFCVT, BFCVTN, BFMLAL, BFMLSL (these are all in SME2 which
5
This allows operating systems such as Linux to run emulations such as
5
QEMU does not yet implement)
6
mcimx6ul-evk.
6
* FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS
7
7
8
Before commit 0cd4926b85 ("Refactor i.MX6UL processor code"), the affected
8
The behaviour change is:
9
memory ranges were covered by the unimplemented DAP device. The commit
9
* the instructions do not update the FPSR cumulative exception flags
10
reduced the DAP address range from 0x100000 to 4kB, and the emulation
10
* trapped floating point exceptions are disabled (a no-op for QEMU,
11
thus no longer covered the various unimplemented devices in the affected
11
which doesn't implement FPCR.{IDE,IXE,UFE,OFE,DZE,IOE})
12
address range.
12
* rounding is always round-to-nearest-even regardless of FPCR.RMode
13
13
* denormalized inputs and outputs are always flushed to zero, as if
14
Fixes: 0cd4926b85 ("Refactor i.MX6UL processor code")
14
FPCR.{FZ,FIZ} is {1,1}
15
Cc: Jean-Christophe Dubois <jcd@tribudubois.net>
15
* FPCR.FZ16 is still honoured for half-precision inputs
16
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
16
17
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
17
(See the Arm ARM DDI0487L.a section A1.5.9.)
18
Message-id: 20240120005356.2599547-1-linux@roeck-us.net
18
19
We can provide all these behaviours with another pair of float_status fields
20
which we use only for these insns, when FPCR.AH is 1. These float_status
21
fields will always have:
22
* flush_to_zero and flush_inputs_to_zero set for the non-F16 field
23
* rounding mode set to round-to-nearest-even
24
and so the only FPCR fields they need to honour are DN and FZ16.
25
26
In this commit we only define the new fp_status fields and give them
27
the required behaviour when FPSR is updated. In subsequent commits
28
we will arrange to use this new fp_status field for the instructions
29
that should be affected by FPCR.AH in this way.
30
19
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
31
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
32
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
20
---
33
---
21
include/hw/arm/fsl-imx6ul.h | 2 ++
34
target/arm/cpu.h | 15 +++++++++++++++
22
hw/arm/fsl-imx6ul.c | 30 ++++++++++++++++++++++++++++++
35
target/arm/internals.h | 2 ++
23
2 files changed, 32 insertions(+)
36
target/arm/tcg/translate.h | 14 ++++++++++++++
24
37
target/arm/cpu.c | 4 ++++
25
diff --git a/include/hw/arm/fsl-imx6ul.h b/include/hw/arm/fsl-imx6ul.h
38
target/arm/vfp_helper.c | 13 ++++++++++++-
26
index XXXXXXX..XXXXXXX 100644
39
5 files changed, 47 insertions(+), 1 deletion(-)
27
--- a/include/hw/arm/fsl-imx6ul.h
40
28
+++ b/include/hw/arm/fsl-imx6ul.h
41
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
29
@@ -XXX,XX +XXX,XX @@ enum FslIMX6ULMemoryMap {
42
index XXXXXXX..XXXXXXX 100644
30
FSL_IMX6UL_ENET1_ADDR = 0x02188000,
43
--- a/target/arm/cpu.h
31
44
+++ b/target/arm/cpu.h
32
FSL_IMX6UL_USBO2_USBMISC_ADDR = 0x02184800,
45
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
33
+ FSL_IMX6UL_USBO2_USBMISC_SIZE = 0x200,
46
* standard_fp_status : the ARM "Standard FPSCR Value"
34
+
47
* standard_fp_status_fp16 : used for half-precision
35
FSL_IMX6UL_USBO2_USB1_ADDR = 0x02184000,
48
* calculations with the ARM "Standard FPSCR Value"
36
FSL_IMX6UL_USBO2_USB2_ADDR = 0x02184200,
49
+ * ah_fp_status: used for the A64 insns which change behaviour
37
50
+ * when FPCR.AH == 1 (bfloat16 conversions and multiplies,
38
diff --git a/hw/arm/fsl-imx6ul.c b/hw/arm/fsl-imx6ul.c
51
+ * and the reciprocal and square root estimate/step insns)
39
index XXXXXXX..XXXXXXX 100644
52
+ * ah_fp_status_f16: used for the A64 insns which change behaviour
40
--- a/hw/arm/fsl-imx6ul.c
53
+ * when FPCR.AH == 1 (bfloat16 conversions and multiplies,
41
+++ b/hw/arm/fsl-imx6ul.c
54
+ * and the reciprocal and square root estimate/step insns);
42
@@ -XXX,XX +XXX,XX @@ static void fsl_imx6ul_realize(DeviceState *dev, Error **errp)
55
+ * for half-precision
43
create_unimplemented_device("a7mpcore-dap", FSL_IMX6UL_A7MPCORE_DAP_ADDR,
56
*
44
FSL_IMX6UL_A7MPCORE_DAP_SIZE);
57
* Half-precision operations are governed by a separate
45
58
* flush-to-zero control bit in FPSCR:FZ16. We pass a separate
59
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
60
* the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than
61
* using a fixed value for it.
62
*
63
+ * The ah_fp_status is needed because some insns have different
64
+ * behaviour when FPCR.AH == 1: they don't update cumulative
65
+ * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and
66
+ * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16,
67
+ * which means we need an ah_fp_status_f16 as well.
68
+ *
69
* To avoid having to transfer exception bits around, we simply
70
* say that the FPSCR cumulative exception flags are the logical
71
* OR of the flags in the four fp statuses. This relies on the
72
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
73
float_status fp_status_f16_a64;
74
float_status standard_fp_status;
75
float_status standard_fp_status_f16;
76
+ float_status ah_fp_status;
77
+ float_status ah_fp_status_f16;
78
79
uint64_t zcr_el[4]; /* ZCR_EL[1-3] */
80
uint64_t smcr_el[4]; /* SMCR_EL[1-3] */
81
diff --git a/target/arm/internals.h b/target/arm/internals.h
82
index XXXXXXX..XXXXXXX 100644
83
--- a/target/arm/internals.h
84
+++ b/target/arm/internals.h
85
@@ -XXX,XX +XXX,XX @@ int alle1_tlbmask(CPUARMState *env);
86
87
/* Set the float_status behaviour to match the Arm defaults */
88
void arm_set_default_fp_behaviours(float_status *s);
89
+/* Set the float_status behaviour to match Arm FPCR.AH=1 behaviour */
90
+void arm_set_ah_fp_behaviours(float_status *s);
91
92
#endif
93
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
94
index XXXXXXX..XXXXXXX 100644
95
--- a/target/arm/tcg/translate.h
96
+++ b/target/arm/tcg/translate.h
97
@@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour {
98
FPST_A64,
99
FPST_A32_F16,
100
FPST_A64_F16,
101
+ FPST_AH,
102
+ FPST_AH_F16,
103
FPST_STD,
104
FPST_STD_F16,
105
} ARMFPStatusFlavour;
106
@@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour {
107
* for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used
108
* FPST_A64_F16
109
* for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used
110
+ * FPST_AH:
111
+ * for AArch64 operations which change behaviour when AH=1 (specifically,
112
+ * bfloat16 conversions and multiplies, and the reciprocal and square root
113
+ * estimate/step insns)
114
+ * FPST_AH_F16:
115
+ * ditto, but for half-precision operations
116
* FPST_STD
117
* for A32/T32 Neon operations using the "standard FPSCR value"
118
* FPST_STD_F16
119
@@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
120
case FPST_A64_F16:
121
offset = offsetof(CPUARMState, vfp.fp_status_f16_a64);
122
break;
123
+ case FPST_AH:
124
+ offset = offsetof(CPUARMState, vfp.ah_fp_status);
125
+ break;
126
+ case FPST_AH_F16:
127
+ offset = offsetof(CPUARMState, vfp.ah_fp_status_f16);
128
+ break;
129
case FPST_STD:
130
offset = offsetof(CPUARMState, vfp.standard_fp_status);
131
break;
132
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
133
index XXXXXXX..XXXXXXX 100644
134
--- a/target/arm/cpu.c
135
+++ b/target/arm/cpu.c
136
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
137
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32);
138
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
139
arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16);
140
+ arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status);
141
+ set_flush_to_zero(1, &env->vfp.ah_fp_status);
142
+ set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status);
143
+ arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status_f16);
144
145
#ifndef CONFIG_USER_ONLY
146
if (kvm_enabled()) {
147
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
148
index XXXXXXX..XXXXXXX 100644
149
--- a/target/arm/vfp_helper.c
150
+++ b/target/arm/vfp_helper.c
151
@@ -XXX,XX +XXX,XX @@ void arm_set_default_fp_behaviours(float_status *s)
152
* set Invalid for a QNaN
153
* * default NaN has sign bit set, msb frac bit set
154
*/
155
-static void arm_set_ah_fp_behaviours(float_status *s)
156
+void arm_set_ah_fp_behaviours(float_status *s)
157
{
158
set_float_detect_tininess(float_tininess_after_rounding, s);
159
set_float_ftz_detection(float_ftz_after_rounding, s);
160
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
161
a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64);
162
a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
163
& ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
46
+ /*
164
+ /*
47
+ * MMDC
165
+ * We do not merge in flags from ah_fp_status or ah_fp_status_f16, because
166
+ * they are used for insns that must not set the cumulative exception bits.
48
+ */
167
+ */
49
+ create_unimplemented_device("a7mpcore-mmdc", FSL_IMX6UL_MMDC_CFG_ADDR,
50
+ FSL_IMX6UL_MMDC_CFG_SIZE);
51
+
52
+ /*
53
+ * OCOTP
54
+ */
55
+ create_unimplemented_device("a7mpcore-ocotp", FSL_IMX6UL_OCOTP_CTRL_ADDR,
56
+ FSL_IMX6UL_OCOTP_CTRL_SIZE);
57
+
58
+ /*
59
+ * QSPI
60
+ */
61
+ create_unimplemented_device("a7mpcore-qspi", FSL_IMX6UL_QSPI_ADDR,
62
+ FSL_IMX6UL_QSPI_SIZE);
63
+
64
+ /*
65
+ * CAAM
66
+ */
67
+ create_unimplemented_device("a7mpcore-qspi", FSL_IMX6UL_CAAM_ADDR,
68
+ FSL_IMX6UL_CAAM_SIZE);
69
+
70
+ /*
71
+ * USBMISC
72
+ */
73
+ create_unimplemented_device("a7mpcore-usbmisc", FSL_IMX6UL_USBO2_USBMISC_ADDR,
74
+ FSL_IMX6UL_USBO2_USBMISC_SIZE);
75
+
168
+
76
/*
169
/*
77
* GPTs
170
* Flushing an input denormal *only* because FPCR.FIZ == 1 does
78
*/
171
* not set FPSR.IDC; if FPCR.FZ is also set then this takes
172
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
173
set_float_exception_flags(0, &env->vfp.fp_status_f16_a64);
174
set_float_exception_flags(0, &env->vfp.standard_fp_status);
175
set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
176
+ set_float_exception_flags(0, &env->vfp.ah_fp_status);
177
+ set_float_exception_flags(0, &env->vfp.ah_fp_status_f16);
178
}
179
180
static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env)
181
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
182
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
183
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
184
set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
185
+ set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16);
186
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
187
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
188
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
189
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16);
190
}
191
if (changed & FPCR_FZ) {
192
bool ftz_enabled = val & FPCR_FZ;
193
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
194
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64);
195
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
196
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64);
197
+ set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status);
198
+ set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status_f16);
199
}
200
if (changed & FPCR_AH) {
201
bool ah_enabled = val & FPCR_AH;
79
--
202
--
80
2.34.1
203
2.34.1
81
82
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
For the instructions FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS, use
2
FPST_FPCR_AH or FPST_FPCR_AH_F16 when FPCR.AH is 1, so that they get
3
the required behaviour changes.
2
4
3
The USB Controllers are part of the chipset, thus are
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
always present and mapped in memory.
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/arm/tcg/translate-a64.h | 13 ++++
9
target/arm/tcg/translate-a64.c | 119 +++++++++++++++++++++++++--------
10
target/arm/tcg/translate-sve.c | 30 ++++++---
11
3 files changed, 127 insertions(+), 35 deletions(-)
5
12
6
This is a migration compatibility break for the cubieboard
13
diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h
7
machine started with the '-usb none' option.
8
9
Reported-by: Guenter Roeck <linux@roeck-us.net>
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
11
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
12
Tested-by: Guenter Roeck <linux@roeck-us.net>
13
Message-id: 20240119215106.45776-2-philmd@linaro.org
14
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
---
17
hw/arm/allwinner-a10.c | 49 +++++++++++++++++-------------------------
18
1 file changed, 20 insertions(+), 29 deletions(-)
19
20
diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c
21
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/arm/allwinner-a10.c
15
--- a/target/arm/tcg/translate-a64.h
23
+++ b/hw/arm/allwinner-a10.c
16
+++ b/target/arm/tcg/translate-a64.h
24
@@ -XXX,XX +XXX,XX @@ static void aw_a10_init(Object *obj)
17
@@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno)
25
18
return ret;
26
object_initialize_child(obj, "i2c0", &s->i2c0, TYPE_AW_I2C);
19
}
27
20
28
- if (machine_usb(current_machine)) {
21
+/*
29
- int i;
22
+ * Return the ARMFPStatusFlavour to use based on element size and
30
-
23
+ * whether FPCR.AH is set.
31
- for (i = 0; i < AW_A10_NUM_USB; i++) {
24
+ */
32
- object_initialize_child(obj, "ehci[*]", &s->ehci[i],
25
+static inline ARMFPStatusFlavour select_ah_fpst(DisasContext *s, MemOp esz)
33
- TYPE_PLATFORM_EHCI);
26
+{
34
- object_initialize_child(obj, "ohci[*]", &s->ohci[i],
27
+ if (s->fpcr_ah) {
35
- TYPE_SYSBUS_OHCI);
28
+ return esz == MO_16 ? FPST_AH_F16 : FPST_AH;
36
- }
29
+ } else {
37
+ for (size_t i = 0; i < AW_A10_NUM_USB; i++) {
30
+ return esz == MO_16 ? FPST_A64_F16 : FPST_A64;
38
+ object_initialize_child(obj, "ehci[*]", &s->ehci[i],
31
+ }
39
+ TYPE_PLATFORM_EHCI);
32
+}
40
+ object_initialize_child(obj, "ohci[*]", &s->ohci[i], TYPE_SYSBUS_OHCI);
33
+
34
bool disas_sve(DisasContext *, uint32_t);
35
bool disas_sme(DisasContext *, uint32_t);
36
37
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/target/arm/tcg/translate-a64.c
40
+++ b/target/arm/tcg/translate-a64.c
41
@@ -XXX,XX +XXX,XX @@ static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
42
* an out-of-line helper.
43
*/
44
static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
45
- int rm, bool is_fp16, int data,
46
+ int rm, ARMFPStatusFlavour fpsttype, int data,
47
gen_helper_gvec_3_ptr *fn)
48
{
49
- TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64);
50
+ TCGv_ptr fpst = fpstatus_ptr(fpsttype);
51
tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
52
vec_full_reg_offset(s, rn),
53
vec_full_reg_offset(s, rm), fpst,
54
@@ -XXX,XX +XXX,XX @@ typedef struct FPScalar {
55
void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
56
} FPScalar;
57
58
-static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
59
+static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a,
60
+ const FPScalar *f,
61
+ ARMFPStatusFlavour fpsttype)
62
{
63
switch (a->esz) {
64
case MO_64:
65
if (fp_access_check(s)) {
66
TCGv_i64 t0 = read_fp_dreg(s, a->rn);
67
TCGv_i64 t1 = read_fp_dreg(s, a->rm);
68
- f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
69
+ f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype));
70
write_fp_dreg(s, a->rd, t0);
71
}
72
break;
73
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
74
if (fp_access_check(s)) {
75
TCGv_i32 t0 = read_fp_sreg(s, a->rn);
76
TCGv_i32 t1 = read_fp_sreg(s, a->rm);
77
- f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
78
+ f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype));
79
write_fp_sreg(s, a->rd, t0);
80
}
81
break;
82
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
83
if (fp_access_check(s)) {
84
TCGv_i32 t0 = read_fp_hreg(s, a->rn);
85
TCGv_i32 t1 = read_fp_hreg(s, a->rm);
86
- f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
87
+ f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype));
88
write_fp_sreg(s, a->rd, t0);
89
}
90
break;
91
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
92
return true;
93
}
94
95
+static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
96
+{
97
+ return do_fp3_scalar_with_fpsttype(s, a, f,
98
+ a->esz == MO_16 ?
99
+ FPST_A64_F16 : FPST_A64);
100
+}
101
+
102
+static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
103
+{
104
+ return do_fp3_scalar_with_fpsttype(s, a, f, select_ah_fpst(s, a->esz));
105
+}
106
+
107
static const FPScalar f_scalar_fadd = {
108
gen_helper_vfp_addh,
109
gen_helper_vfp_adds,
110
@@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_frecps = {
111
gen_helper_recpsf_f32,
112
gen_helper_recpsf_f64,
113
};
114
-TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps)
115
+TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps)
116
117
static const FPScalar f_scalar_frsqrts = {
118
gen_helper_rsqrtsf_f16,
119
gen_helper_rsqrtsf_f32,
120
gen_helper_rsqrtsf_f64,
121
};
122
-TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts)
123
+TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts)
124
125
static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
126
const FPScalar *f, bool swap)
127
@@ -XXX,XX +XXX,XX @@ TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU)
128
TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ)
129
TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE)
130
131
-static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
132
- gen_helper_gvec_3_ptr * const fns[3])
133
+static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a,
134
+ int data,
135
+ gen_helper_gvec_3_ptr * const fns[3],
136
+ ARMFPStatusFlavour fpsttype)
137
{
138
MemOp esz = a->esz;
139
int check = fp_access_check_vector_hsd(s, a->q, esz);
140
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
141
return check == 0;
41
}
142
}
42
143
43
object_initialize_child(obj, "mmc0", &s->mmc0, TYPE_AW_SDHOST_SUN4I);
144
- gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
44
@@ -XXX,XX +XXX,XX @@ static void aw_a10_realize(DeviceState *dev, Error **errp)
145
- esz == MO_16, data, fns[esz - 1]);
45
qdev_get_gpio_in(dev, 1),
146
+ gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype,
46
115200, serial_hd(0), DEVICE_NATIVE_ENDIAN);
147
+ data, fns[esz - 1]);
47
148
return true;
48
- if (machine_usb(current_machine)) {
149
}
49
- int i;
150
50
+ for (size_t i = 0; i < AW_A10_NUM_USB; i++) {
151
+static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
51
+ g_autofree char *bus = g_strdup_printf("usb-bus.%zu", i);
152
+ gen_helper_gvec_3_ptr * const fns[3])
52
153
+{
53
- for (i = 0; i < AW_A10_NUM_USB; i++) {
154
+ return do_fp3_vector_with_fpsttype(s, a, data, fns,
54
- g_autofree char *bus = g_strdup_printf("usb-bus.%d", i);
155
+ a->esz == MO_16 ?
55
+ object_property_set_bool(OBJECT(&s->ehci[i]), "companion-enable",
156
+ FPST_A64_F16 : FPST_A64);
56
+ true, &error_fatal);
157
+}
57
+ sysbus_realize(SYS_BUS_DEVICE(&s->ehci[i]), &error_fatal);
158
+
58
+ sysbus_mmio_map(SYS_BUS_DEVICE(&s->ehci[i]), 0,
159
+static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data,
59
+ AW_A10_EHCI_BASE + i * 0x8000);
160
+ gen_helper_gvec_3_ptr * const f[3])
60
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->ehci[i]), 0,
161
+{
61
+ qdev_get_gpio_in(dev, 39 + i));
162
+ return do_fp3_vector_with_fpsttype(s, a, data, f,
62
163
+ select_ah_fpst(s, a->esz));
63
- object_property_set_bool(OBJECT(&s->ehci[i]), "companion-enable",
164
+}
64
- true, &error_fatal);
165
+
65
- sysbus_realize(SYS_BUS_DEVICE(&s->ehci[i]), &error_fatal);
166
static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = {
66
- sysbus_mmio_map(SYS_BUS_DEVICE(&s->ehci[i]), 0,
167
gen_helper_gvec_fadd_h,
67
- AW_A10_EHCI_BASE + i * 0x8000);
168
gen_helper_gvec_fadd_s,
68
- sysbus_connect_irq(SYS_BUS_DEVICE(&s->ehci[i]), 0,
169
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
69
- qdev_get_gpio_in(dev, 39 + i));
170
gen_helper_gvec_recps_s,
70
-
171
gen_helper_gvec_recps_d,
71
- object_property_set_str(OBJECT(&s->ohci[i]), "masterbus", bus,
172
};
72
- &error_fatal);
173
-TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps)
73
- sysbus_realize(SYS_BUS_DEVICE(&s->ohci[i]), &error_fatal);
174
+TRANS(FRECPS_v, do_fp3_vector_ah, a, 0, f_vector_frecps)
74
- sysbus_mmio_map(SYS_BUS_DEVICE(&s->ohci[i]), 0,
175
75
- AW_A10_OHCI_BASE + i * 0x8000);
176
static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
76
- sysbus_connect_irq(SYS_BUS_DEVICE(&s->ohci[i]), 0,
177
gen_helper_gvec_rsqrts_h,
77
- qdev_get_gpio_in(dev, 64 + i));
178
gen_helper_gvec_rsqrts_s,
78
- }
179
gen_helper_gvec_rsqrts_d,
79
+ object_property_set_str(OBJECT(&s->ohci[i]), "masterbus", bus,
180
};
80
+ &error_fatal);
181
-TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts)
81
+ sysbus_realize(SYS_BUS_DEVICE(&s->ohci[i]), &error_fatal);
182
+TRANS(FRSQRTS_v, do_fp3_vector_ah, a, 0, f_vector_frsqrts)
82
+ sysbus_mmio_map(SYS_BUS_DEVICE(&s->ohci[i]), 0,
183
83
+ AW_A10_OHCI_BASE + i * 0x8000);
184
static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
84
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->ohci[i]), 0,
185
gen_helper_gvec_faddp_h,
85
+ qdev_get_gpio_in(dev, 64 + i));
186
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
86
}
187
}
87
188
88
/* SD/MMC */
189
gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
190
- esz == MO_16, a->idx, fns[esz - 1]);
191
+ esz == MO_16 ? FPST_A64_F16 : FPST_A64,
192
+ a->idx, fns[esz - 1]);
193
return true;
194
}
195
196
@@ -XXX,XX +XXX,XX @@ typedef struct FPScalar1 {
197
void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr);
198
} FPScalar1;
199
200
-static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
201
- const FPScalar1 *f, int rmode)
202
+static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a,
203
+ const FPScalar1 *f, int rmode,
204
+ ARMFPStatusFlavour fpsttype)
205
{
206
TCGv_i32 tcg_rmode = NULL;
207
TCGv_ptr fpst;
208
@@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
209
return check == 0;
210
}
211
212
- fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
213
+ fpst = fpstatus_ptr(fpsttype);
214
if (rmode >= 0) {
215
tcg_rmode = gen_set_rmode(rmode, fpst);
216
}
217
@@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
218
return true;
219
}
220
221
+static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
222
+ const FPScalar1 *f, int rmode)
223
+{
224
+ return do_fp1_scalar_with_fpsttype(s, a, f, rmode,
225
+ a->esz == MO_16 ?
226
+ FPST_A64_F16 : FPST_A64);
227
+}
228
+
229
+static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a,
230
+ const FPScalar1 *f, int rmode)
231
+{
232
+ return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz));
233
+}
234
+
235
static const FPScalar1 f_scalar_fsqrt = {
236
gen_helper_vfp_sqrth,
237
gen_helper_vfp_sqrts,
238
@@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frecpe = {
239
gen_helper_recpe_f32,
240
gen_helper_recpe_f64,
241
};
242
-TRANS(FRECPE_s, do_fp1_scalar, a, &f_scalar_frecpe, -1)
243
+TRANS(FRECPE_s, do_fp1_scalar_ah, a, &f_scalar_frecpe, -1)
244
245
static const FPScalar1 f_scalar_frecpx = {
246
gen_helper_frecpx_f16,
247
gen_helper_frecpx_f32,
248
gen_helper_frecpx_f64,
249
};
250
-TRANS(FRECPX_s, do_fp1_scalar, a, &f_scalar_frecpx, -1)
251
+TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1)
252
253
static const FPScalar1 f_scalar_frsqrte = {
254
gen_helper_rsqrte_f16,
255
gen_helper_rsqrte_f32,
256
gen_helper_rsqrte_f64,
257
};
258
-TRANS(FRSQRTE_s, do_fp1_scalar, a, &f_scalar_frsqrte, -1)
259
+TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, &f_scalar_frsqrte, -1)
260
261
static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a)
262
{
263
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a,
264
&f_scalar_frint64, FPROUNDING_ZERO)
265
TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1)
266
267
-static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
268
- int rd, int rn, int data,
269
- gen_helper_gvec_2_ptr * const fns[3])
270
+static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz,
271
+ bool is_q, int rd, int rn, int data,
272
+ gen_helper_gvec_2_ptr * const fns[3],
273
+ ARMFPStatusFlavour fpsttype)
274
{
275
int check = fp_access_check_vector_hsd(s, is_q, esz);
276
TCGv_ptr fpst;
277
@@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
278
return check == 0;
279
}
280
281
- fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
282
+ fpst = fpstatus_ptr(fpsttype);
283
tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
284
vec_full_reg_offset(s, rn), fpst,
285
is_q ? 16 : 8, vec_full_reg_size(s),
286
@@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
287
return true;
288
}
289
290
+static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
291
+ int rd, int rn, int data,
292
+ gen_helper_gvec_2_ptr * const fns[3])
293
+{
294
+ return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns,
295
+ esz == MO_16 ? FPST_A64_F16 :
296
+ FPST_A64);
297
+}
298
+
299
+static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q,
300
+ int rd, int rn, int data,
301
+ gen_helper_gvec_2_ptr * const fns[3])
302
+{
303
+ return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data,
304
+ fns, select_ah_fpst(s, esz));
305
+}
306
+
307
static gen_helper_gvec_2_ptr * const f_scvtf_v[] = {
308
gen_helper_gvec_vcvt_sh,
309
gen_helper_gvec_vcvt_sf,
310
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const f_frecpe[] = {
311
gen_helper_gvec_frecpe_s,
312
gen_helper_gvec_frecpe_d,
313
};
314
-TRANS(FRECPE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe)
315
+TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe)
316
317
static gen_helper_gvec_2_ptr * const f_frsqrte[] = {
318
gen_helper_gvec_frsqrte_h,
319
gen_helper_gvec_frsqrte_s,
320
gen_helper_gvec_frsqrte_d,
321
};
322
-TRANS(FRSQRTE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte)
323
+TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte)
324
325
static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a)
326
{
327
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
328
index XXXXXXX..XXXXXXX 100644
329
--- a/target/arm/tcg/translate-sve.c
330
+++ b/target/arm/tcg/translate-sve.c
331
@@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
332
return true;
333
}
334
335
-static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
336
- arg_rr_esz *a, int data)
337
+static bool gen_gvec_fpst_ah_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
338
+ arg_rr_esz *a, int data)
339
{
340
return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data,
341
- a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
342
+ select_ah_fpst(s, a->esz));
343
}
344
345
/* Invoke an out-of-line helper on 3 Zregs. */
346
@@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
347
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
348
}
349
350
+static bool gen_gvec_fpst_ah_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
351
+ arg_rrr_esz *a, int data)
352
+{
353
+ return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
354
+ select_ah_fpst(s, a->esz));
355
+}
356
+
357
/* Invoke an out-of-line helper on 4 Zregs. */
358
static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
359
int rd, int rn, int rm, int ra, int data)
360
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const frecpe_fns[] = {
361
NULL, gen_helper_gvec_frecpe_h,
362
gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d,
363
};
364
-TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0)
365
+TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frecpe_fns[a->esz], a, 0)
366
367
static gen_helper_gvec_2_ptr * const frsqrte_fns[] = {
368
NULL, gen_helper_gvec_frsqrte_h,
369
gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d,
370
};
371
-TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0)
372
+TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frsqrte_fns[a->esz], a, 0)
373
374
/*
375
*** SVE Floating Point Compare with Zero Group
376
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
377
}; \
378
TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0)
379
380
+#define DO_FP3_AH(NAME, name) \
381
+ static gen_helper_gvec_3_ptr * const name##_fns[4] = { \
382
+ NULL, gen_helper_gvec_##name##_h, \
383
+ gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
384
+ }; \
385
+ TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, name##_fns[a->esz], a, 0)
386
+
387
DO_FP3(FADD_zzz, fadd)
388
DO_FP3(FSUB_zzz, fsub)
389
DO_FP3(FMUL_zzz, fmul)
390
-DO_FP3(FRECPS, recps)
391
-DO_FP3(FRSQRTS, rsqrts)
392
+DO_FP3_AH(FRECPS, recps)
393
+DO_FP3_AH(FRSQRTS, rsqrts)
394
395
#undef DO_FP3
396
397
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frecpx_fns[] = {
398
gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d,
399
};
400
TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz],
401
- a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
402
+ a, 0, select_ah_fpst(s, a->esz))
403
404
static gen_helper_gvec_3_ptr * const fsqrt_fns[] = {
405
NULL, gen_helper_sve_fsqrt_h,
89
--
406
--
90
2.34.1
407
2.34.1
91
92
diff view generated by jsdifflib
New patch
1
When FPCR.AH is 1, use FPST_FPCR_AH for:
2
* AdvSIMD BFCVT, BFCVTN, BFCVTN2
3
* SVE BFCVT, BFCVTNT
1
4
5
so that they get the required behaviour changes.
6
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
target/arm/tcg/translate-a64.c | 27 +++++++++++++++++++++------
11
target/arm/tcg/translate-sve.c | 6 ++++--
12
2 files changed, 25 insertions(+), 8 deletions(-)
13
14
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/tcg/translate-a64.c
17
+++ b/target/arm/tcg/translate-a64.c
18
@@ -XXX,XX +XXX,XX @@ TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1)
19
static const FPScalar1 f_scalar_bfcvt = {
20
.gen_s = gen_helper_bfcvt,
21
};
22
-TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar, a, &f_scalar_bfcvt, -1)
23
+TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar_ah, a, &f_scalar_bfcvt, -1)
24
25
static const FPScalar1 f_scalar_frint32 = {
26
NULL,
27
@@ -XXX,XX +XXX,XX @@ static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n)
28
tcg_gen_extu_i32_i64(d, tmp);
29
}
30
31
-static ArithOneOp * const f_vector_bfcvtn[] = {
32
- NULL,
33
- gen_bfcvtn_hs,
34
- NULL,
35
+static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n)
36
+{
37
+ TCGv_ptr fpst = fpstatus_ptr(FPST_AH);
38
+ TCGv_i32 tmp = tcg_temp_new_i32();
39
+ gen_helper_bfcvt_pair(tmp, n, fpst);
40
+ tcg_gen_extu_i32_i64(d, tmp);
41
+}
42
+
43
+static ArithOneOp * const f_vector_bfcvtn[2][3] = {
44
+ {
45
+ NULL,
46
+ gen_bfcvtn_hs,
47
+ NULL,
48
+ }, {
49
+ NULL,
50
+ gen_bfcvtn_ah_hs,
51
+ NULL,
52
+ }
53
};
54
-TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, f_vector_bfcvtn)
55
+TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a,
56
+ f_vector_bfcvtn[s->fpcr_ah])
57
58
static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a)
59
{
60
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
61
index XXXXXXX..XXXXXXX 100644
62
--- a/target/arm/tcg/translate-sve.c
63
+++ b/target/arm/tcg/translate-sve.c
64
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
65
gen_helper_sve_fcvt_hs, a, 0, FPST_A64_F16)
66
67
TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
68
- gen_helper_sve_bfcvt, a, 0, FPST_A64)
69
+ gen_helper_sve_bfcvt, a, 0,
70
+ s->fpcr_ah ? FPST_AH : FPST_A64)
71
72
TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
73
gen_helper_sve_fcvt_dh, a, 0, FPST_A64)
74
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz,
75
gen_helper_sve2_fcvtnt_ds, a, 0, FPST_A64)
76
77
TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
78
- gen_helper_sve_bfcvtnt, a, 0, FPST_A64)
79
+ gen_helper_sve_bfcvtnt, a, 0,
80
+ s->fpcr_ah ? FPST_AH : FPST_A64)
81
82
TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz,
83
gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64)
84
--
85
2.34.1
diff view generated by jsdifflib
New patch
1
When FPCR.AH is 1, use FPST_FPCR_AH for:
2
* AdvSIMD BFMLALB, BFMLALT
3
* SVE BFMLALB, BFMLALT, BFMLSLB, BFMLSLT
1
4
5
so that they get the required behaviour changes.
6
7
We do this by making gen_gvec_op4_fpst() take an ARMFPStatusFlavour
8
rather than a bool is_fp16; existing callsites now select
9
FPST_FPCR_F16_A64 vs FPST_FPCR_A64 themselves rather than passing in
10
the boolean.
11
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
---
15
target/arm/tcg/translate-a64.c | 20 +++++++++++++-------
16
target/arm/tcg/translate-sve.c | 6 ++++--
17
2 files changed, 17 insertions(+), 9 deletions(-)
18
19
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/tcg/translate-a64.c
22
+++ b/target/arm/tcg/translate-a64.c
23
@@ -XXX,XX +XXX,XX @@ static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn,
24
* an out-of-line helper.
25
*/
26
static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
27
- int rm, int ra, bool is_fp16, int data,
28
+ int rm, int ra, ARMFPStatusFlavour fpsttype,
29
+ int data,
30
gen_helper_gvec_4_ptr *fn)
31
{
32
- TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64);
33
+ TCGv_ptr fpst = fpstatus_ptr(fpsttype);
34
tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
35
vec_full_reg_offset(s, rn),
36
vec_full_reg_offset(s, rm),
37
@@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
38
}
39
if (fp_access_check(s)) {
40
/* Q bit selects BFMLALB vs BFMLALT. */
41
- gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q,
42
+ gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
43
+ s->fpcr_ah ? FPST_AH : FPST_A64, a->q,
44
gen_helper_gvec_bfmlal);
45
}
46
return true;
47
@@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
48
}
49
50
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
51
- a->esz == MO_16, a->rot, fn[a->esz]);
52
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
53
+ a->rot, fn[a->esz]);
54
return true;
55
}
56
57
@@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
58
}
59
60
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
61
- esz == MO_16, (a->idx << 1) | neg,
62
+ esz == MO_16 ? FPST_A64_F16 : FPST_A64,
63
+ (a->idx << 1) | neg,
64
fns[esz - 1]);
65
return true;
66
}
67
@@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
68
}
69
if (fp_access_check(s)) {
70
/* Q bit selects BFMLALB vs BFMLALT. */
71
- gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0,
72
+ gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
73
+ s->fpcr_ah ? FPST_AH : FPST_A64,
74
(a->idx << 1) | a->q,
75
gen_helper_gvec_bfmlal_idx);
76
}
77
@@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
78
}
79
if (fp_access_check(s)) {
80
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
81
- a->esz == MO_16, (a->idx << 2) | a->rot, fn);
82
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
83
+ (a->idx << 2) | a->rot, fn);
84
}
85
return true;
86
}
87
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
88
index XXXXXXX..XXXXXXX 100644
89
--- a/target/arm/tcg/translate-sve.c
90
+++ b/target/arm/tcg/translate-sve.c
91
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_env_arg_zzzz,
92
static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
93
{
94
return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
95
- a->rd, a->rn, a->rm, a->ra, sel, FPST_A64);
96
+ a->rd, a->rn, a->rm, a->ra, sel,
97
+ s->fpcr_ah ? FPST_AH : FPST_A64);
98
}
99
100
TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false)
101
@@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
102
{
103
return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
104
a->rd, a->rn, a->rm, a->ra,
105
- (a->index << 1) | sel, FPST_A64);
106
+ (a->index << 1) | sel,
107
+ s->fpcr_ah ? FPST_AH : FPST_A64);
108
}
109
110
TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
111
--
112
2.34.1
diff view generated by jsdifflib
New patch
1
For FEAT_AFP, we want to emit different code when FPCR.NEP is set, so
2
that instead of zeroing the high elements of a vector register when
3
we write the output of a scalar operation to it, we instead merge in
4
those elements from one of the source registers. Since this affects
5
the generated code, we need to put FPCR.NEP into the TBFLAGS.
1
6
7
FPCR.NEP is treated as 0 when in streaming SVE mode and FEAT_SME_FA64
8
is not implemented or not enabled; we can implement this logic in
9
rebuild_hflags_a64().
10
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
---
14
target/arm/cpu.h | 1 +
15
target/arm/tcg/translate.h | 2 ++
16
target/arm/tcg/hflags.c | 9 +++++++++
17
target/arm/tcg/translate-a64.c | 1 +
18
4 files changed, 13 insertions(+)
19
20
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/target/arm/cpu.h
23
+++ b/target/arm/cpu.h
24
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1)
25
/* Set if FEAT_NV2 RAM accesses are big-endian */
26
FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1)
27
FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */
28
+FIELD(TBFLAG_A64, NEP, 38, 1) /* FPCR.NEP */
29
30
/*
31
* Helpers for using the above. Note that only the A64 accessors use
32
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/tcg/translate.h
35
+++ b/target/arm/tcg/translate.h
36
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
37
bool nv2_mem_be;
38
/* True if FPCR.AH is 1 (alternate floating point handling) */
39
bool fpcr_ah;
40
+ /* True if FPCR.NEP is 1 (FEAT_AFP scalar upper-element result handling) */
41
+ bool fpcr_nep;
42
/*
43
* >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI.
44
* < 0, set by the current instruction.
45
diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
46
index XXXXXXX..XXXXXXX 100644
47
--- a/target/arm/tcg/hflags.c
48
+++ b/target/arm/tcg/hflags.c
49
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
50
if (env->vfp.fpcr & FPCR_AH) {
51
DP_TBFLAG_A64(flags, AH, 1);
52
}
53
+ if (env->vfp.fpcr & FPCR_NEP) {
54
+ /*
55
+ * In streaming-SVE without FA64, NEP behaves as if zero;
56
+ * compare pseudocode IsMerging()
57
+ */
58
+ if (!(EX_TBFLAG_A64(flags, PSTATE_SM) && !sme_fa64(env, el))) {
59
+ DP_TBFLAG_A64(flags, NEP, 1);
60
+ }
61
+ }
62
63
return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
64
}
65
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
66
index XXXXXXX..XXXXXXX 100644
67
--- a/target/arm/tcg/translate-a64.c
68
+++ b/target/arm/tcg/translate-a64.c
69
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
70
dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
71
dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
72
dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH);
73
+ dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP);
74
dc->vec_len = 0;
75
dc->vec_stride = 0;
76
dc->cp_regs = arm_cpu->cp_regs;
77
--
78
2.34.1
diff view generated by jsdifflib
1
Convert the musicpal key input device to use
1
For FEAT_AFP's FPCR.NEP bit, we need to programmatically change the
2
qemu_add_kbd_event_handler(). This lets us simplify it because we no
2
behaviour of the writeback of the result for most SIMD scalar
3
longer need to track whether we're in the middle of a PS/2 multibyte
3
operations, so that instead of zeroing the upper part of the result
4
key sequence.
4
register it merges the upper elements from one of the input
5
5
registers.
6
In the conversion we move the keyboard handler registration from init
6
7
to realize, because devices shouldn't disturb the state of the
7
Provide new functions write_fp_*reg_merging() which can be used
8
simulation by doing things like registering input handlers until
8
instead of the existing write_fp_*reg() functions when we want this
9
they're realized, so that device objects can be introspected
9
"merge the result with one of the input registers if FPCR.NEP is
10
safely.
10
enabled" handling, and use them in do_fp3_scalar_with_fpsttype().
11
11
12
The behaviour where key-repeat is permitted for the arrow-keys only
12
Note that (as documented in the description of the FPCR.NEP bit)
13
is intentional (added in commit 7c6ce4baedfcd0c), so we retain it,
13
which input register to use as the merge source varies by
14
and add a comment to that effect.
14
instruction: for these 2-input scalar operations, the comparison
15
15
instructions take from Rm, not Rn.
16
This is a migration compatibility break for musicpal.
16
17
We'll extend this to also provide the merging behaviour for
18
the remaining scalar insns in subsequent commits.
17
19
18
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
20
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
19
Tested-by: Alex Bennée <alex.bennee@linaro.org>
21
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
20
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
21
Message-id: 20231103182750.855577-1-peter.maydell@linaro.org
22
---
22
---
23
hw/arm/musicpal.c | 131 +++++++++++++++++++++-------------------------
23
target/arm/tcg/translate-a64.c | 117 +++++++++++++++++++++++++--------
24
1 file changed, 61 insertions(+), 70 deletions(-)
24
1 file changed, 91 insertions(+), 26 deletions(-)
25
25
26
diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c
26
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
27
index XXXXXXX..XXXXXXX 100644
27
index XXXXXXX..XXXXXXX 100644
28
--- a/hw/arm/musicpal.c
28
--- a/target/arm/tcg/translate-a64.c
29
+++ b/hw/arm/musicpal.c
29
+++ b/target/arm/tcg/translate-a64.c
30
@@ -XXX,XX +XXX,XX @@ static const TypeInfo musicpal_gpio_info = {
30
@@ -XXX,XX +XXX,XX @@ static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
31
};
31
write_fp_dreg(s, reg, tmp);
32
32
}
33
/* Keyboard codes & masks */
33
34
-#define KEY_RELEASED 0x80
34
+/*
35
-#define KEY_CODE 0x7f
35
+ * Write a double result to 128 bit vector register reg, honouring FPCR.NEP:
36
-
36
+ * - if FPCR.NEP == 0, clear the high elements of reg
37
-#define KEYCODE_TAB 0x0f
37
+ * - if FPCR.NEP == 1, set the high elements of reg from mergereg
38
-#define KEYCODE_ENTER 0x1c
38
+ * (i.e. merge the result with those high elements)
39
-#define KEYCODE_F 0x21
39
+ * In either case, SVE register bits above 128 are zeroed (per R_WKYLB).
40
-#define KEYCODE_M 0x32
40
+ */
41
-
41
+static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg,
42
-#define KEYCODE_EXTENDED 0xe0
42
+ TCGv_i64 v)
43
-#define KEYCODE_UP 0x48
43
+{
44
-#define KEYCODE_DOWN 0x50
44
+ if (!s->fpcr_nep) {
45
-#define KEYCODE_LEFT 0x4b
45
+ write_fp_dreg(s, reg, v);
46
-#define KEYCODE_RIGHT 0x4d
46
+ return;
47
-
47
+ }
48
#define MP_KEY_WHEEL_VOL (1 << 0)
48
+
49
#define MP_KEY_WHEEL_VOL_INV (1 << 1)
50
#define MP_KEY_WHEEL_NAV (1 << 2)
51
@@ -XXX,XX +XXX,XX @@ struct musicpal_key_state {
52
SysBusDevice parent_obj;
53
/*< public >*/
54
55
- uint32_t kbd_extended;
56
uint32_t pressed_keys;
57
qemu_irq out[8];
58
};
59
60
-static void musicpal_key_event(void *opaque, int keycode)
61
+static void musicpal_key_event(DeviceState *dev, QemuConsole *src,
62
+ InputEvent *evt)
63
{
64
- musicpal_key_state *s = opaque;
65
+ musicpal_key_state *s = MUSICPAL_KEY(dev);
66
+ InputKeyEvent *key = evt->u.key.data;
67
+ int qcode = qemu_input_key_value_to_qcode(key->key);
68
uint32_t event = 0;
69
int i;
70
71
- if (keycode == KEYCODE_EXTENDED) {
72
- s->kbd_extended = 1;
73
- return;
74
+ switch (qcode) {
75
+ case Q_KEY_CODE_UP:
76
+ event = MP_KEY_WHEEL_NAV | MP_KEY_WHEEL_NAV_INV;
77
+ break;
78
+
79
+ case Q_KEY_CODE_DOWN:
80
+ event = MP_KEY_WHEEL_NAV;
81
+ break;
82
+
83
+ case Q_KEY_CODE_LEFT:
84
+ event = MP_KEY_WHEEL_VOL | MP_KEY_WHEEL_VOL_INV;
85
+ break;
86
+
87
+ case Q_KEY_CODE_RIGHT:
88
+ event = MP_KEY_WHEEL_VOL;
89
+ break;
90
+
91
+ case Q_KEY_CODE_F:
92
+ event = MP_KEY_BTN_FAVORITS;
93
+ break;
94
+
95
+ case Q_KEY_CODE_TAB:
96
+ event = MP_KEY_BTN_VOLUME;
97
+ break;
98
+
99
+ case Q_KEY_CODE_RET:
100
+ event = MP_KEY_BTN_NAVIGATION;
101
+ break;
102
+
103
+ case Q_KEY_CODE_M:
104
+ event = MP_KEY_BTN_MENU;
105
+ break;
106
}
107
108
- if (s->kbd_extended) {
109
- switch (keycode & KEY_CODE) {
110
- case KEYCODE_UP:
111
- event = MP_KEY_WHEEL_NAV | MP_KEY_WHEEL_NAV_INV;
112
- break;
113
-
114
- case KEYCODE_DOWN:
115
- event = MP_KEY_WHEEL_NAV;
116
- break;
117
-
118
- case KEYCODE_LEFT:
119
- event = MP_KEY_WHEEL_VOL | MP_KEY_WHEEL_VOL_INV;
120
- break;
121
-
122
- case KEYCODE_RIGHT:
123
- event = MP_KEY_WHEEL_VOL;
124
- break;
125
- }
126
- } else {
127
- switch (keycode & KEY_CODE) {
128
- case KEYCODE_F:
129
- event = MP_KEY_BTN_FAVORITS;
130
- break;
131
-
132
- case KEYCODE_TAB:
133
- event = MP_KEY_BTN_VOLUME;
134
- break;
135
-
136
- case KEYCODE_ENTER:
137
- event = MP_KEY_BTN_NAVIGATION;
138
- break;
139
-
140
- case KEYCODE_M:
141
- event = MP_KEY_BTN_MENU;
142
- break;
143
- }
144
- /* Do not repeat already pressed buttons */
145
- if (!(keycode & KEY_RELEASED) && (s->pressed_keys & event)) {
146
+ /*
49
+ /*
147
+ * We allow repeated wheel-events when the arrow keys are held down,
50
+ * Move from mergereg to reg; this sets the high elements and
148
+ * but do not repeat already-pressed buttons for the other key inputs.
51
+ * clears the bits above 128 as a side effect.
149
+ */
52
+ */
150
+ if (!(event & (MP_KEY_WHEEL_NAV | MP_KEY_WHEEL_VOL))) {
53
+ tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
151
+ if (key->down && (s->pressed_keys & event)) {
54
+ vec_full_reg_offset(s, mergereg),
152
event = 0;
55
+ 16, vec_full_reg_size(s));
56
+ tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg));
57
+}
58
+
59
+/*
60
+ * Write a single-prec result, but only clear the higher elements
61
+ * of the destination register if FPCR.NEP is 0; otherwise preserve them.
62
+ */
63
+static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg,
64
+ TCGv_i32 v)
65
+{
66
+ if (!s->fpcr_nep) {
67
+ write_fp_sreg(s, reg, v);
68
+ return;
69
+ }
70
+
71
+ tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
72
+ vec_full_reg_offset(s, mergereg),
73
+ 16, vec_full_reg_size(s));
74
+ tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
75
+}
76
+
77
+/*
78
+ * Write a half-prec result, but only clear the higher elements
79
+ * of the destination register if FPCR.NEP is 0; otherwise preserve them.
80
+ * The caller must ensure that the top 16 bits of v are zero.
81
+ */
82
+static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg,
83
+ TCGv_i32 v)
84
+{
85
+ if (!s->fpcr_nep) {
86
+ write_fp_sreg(s, reg, v);
87
+ return;
88
+ }
89
+
90
+ tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
91
+ vec_full_reg_offset(s, mergereg),
92
+ 16, vec_full_reg_size(s));
93
+ tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
94
+}
95
+
96
/* Expand a 2-operand AdvSIMD vector operation using an expander function. */
97
static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
98
GVecGen2Fn *gvec_fn, int vece)
99
@@ -XXX,XX +XXX,XX @@ typedef struct FPScalar {
100
} FPScalar;
101
102
static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a,
103
- const FPScalar *f,
104
+ const FPScalar *f, int mergereg,
105
ARMFPStatusFlavour fpsttype)
106
{
107
switch (a->esz) {
108
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a,
109
TCGv_i64 t0 = read_fp_dreg(s, a->rn);
110
TCGv_i64 t1 = read_fp_dreg(s, a->rm);
111
f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype));
112
- write_fp_dreg(s, a->rd, t0);
113
+ write_fp_dreg_merging(s, a->rd, mergereg, t0);
153
}
114
}
154
}
115
break;
155
116
case MO_32:
156
if (event) {
117
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a,
157
/* Raise GPIO pin first if repeating a key */
118
TCGv_i32 t0 = read_fp_sreg(s, a->rn);
158
- if (!(keycode & KEY_RELEASED) && (s->pressed_keys & event)) {
119
TCGv_i32 t1 = read_fp_sreg(s, a->rm);
159
+ if (key->down && (s->pressed_keys & event)) {
120
f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype));
160
for (i = 0; i <= 7; i++) {
121
- write_fp_sreg(s, a->rd, t0);
161
if (event & (1 << i)) {
122
+ write_fp_sreg_merging(s, a->rd, mergereg, t0);
162
qemu_set_irq(s->out[i], 1);
163
@@ -XXX,XX +XXX,XX @@ static void musicpal_key_event(void *opaque, int keycode)
164
}
123
}
165
for (i = 0; i <= 7; i++) {
124
break;
166
if (event & (1 << i)) {
125
case MO_16:
167
- qemu_set_irq(s->out[i], !!(keycode & KEY_RELEASED));
126
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a,
168
+ qemu_set_irq(s->out[i], !key->down);
127
TCGv_i32 t0 = read_fp_hreg(s, a->rn);
169
}
128
TCGv_i32 t1 = read_fp_hreg(s, a->rm);
129
f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype));
130
- write_fp_sreg(s, a->rd, t0);
131
+ write_fp_hreg_merging(s, a->rd, mergereg, t0);
170
}
132
}
171
- if (keycode & KEY_RELEASED) {
133
break;
172
- s->pressed_keys &= ~event;
134
default:
173
- } else {
135
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a,
174
+ if (key->down) {
136
return true;
175
s->pressed_keys |= event;
176
+ } else {
177
+ s->pressed_keys &= ~event;
178
}
179
}
180
-
181
- s->kbd_extended = 0;
182
}
137
}
183
138
184
static void musicpal_key_init(Object *obj)
139
-static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
185
@@ -XXX,XX +XXX,XX @@ static void musicpal_key_init(Object *obj)
140
+static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
186
DeviceState *dev = DEVICE(sbd);
141
+ int mergereg)
187
musicpal_key_state *s = MUSICPAL_KEY(dev);
142
{
188
143
- return do_fp3_scalar_with_fpsttype(s, a, f,
189
- s->kbd_extended = 0;
144
+ return do_fp3_scalar_with_fpsttype(s, a, f, mergereg,
190
s->pressed_keys = 0;
145
a->esz == MO_16 ?
191
146
FPST_A64_F16 : FPST_A64);
192
qdev_init_gpio_out(dev, s->out, ARRAY_SIZE(s->out));
193
+}
194
195
- qemu_add_kbd_event_handler(musicpal_key_event, s);
196
+static const QemuInputHandler musicpal_key_handler = {
197
+ .name = "musicpal_key",
198
+ .mask = INPUT_EVENT_MASK_KEY,
199
+ .event = musicpal_key_event,
200
+};
201
+
202
+static void musicpal_key_realize(DeviceState *dev, Error **errp)
203
+{
204
+ qemu_input_handler_register(dev, &musicpal_key_handler);
205
}
147
}
206
148
207
static const VMStateDescription musicpal_key_vmsd = {
149
-static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
208
.name = "musicpal_key",
150
+static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
209
- .version_id = 1,
151
+ int mergereg)
210
- .minimum_version_id = 1,
152
{
211
+ .version_id = 2,
153
- return do_fp3_scalar_with_fpsttype(s, a, f, select_ah_fpst(s, a->esz));
212
+ .minimum_version_id = 2,
154
+ return do_fp3_scalar_with_fpsttype(s, a, f, mergereg,
213
.fields = (const VMStateField[]) {
155
+ select_ah_fpst(s, a->esz));
214
- VMSTATE_UINT32(kbd_extended, musicpal_key_state),
215
VMSTATE_UINT32(pressed_keys, musicpal_key_state),
216
VMSTATE_END_OF_LIST()
217
}
218
@@ -XXX,XX +XXX,XX @@ static void musicpal_key_class_init(ObjectClass *klass, void *data)
219
DeviceClass *dc = DEVICE_CLASS(klass);
220
221
dc->vmsd = &musicpal_key_vmsd;
222
+ dc->realize = musicpal_key_realize;
223
}
156
}
224
157
225
static const TypeInfo musicpal_key_info = {
158
static const FPScalar f_scalar_fadd = {
159
@@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fadd = {
160
gen_helper_vfp_adds,
161
gen_helper_vfp_addd,
162
};
163
-TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd)
164
+TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn)
165
166
static const FPScalar f_scalar_fsub = {
167
gen_helper_vfp_subh,
168
gen_helper_vfp_subs,
169
gen_helper_vfp_subd,
170
};
171
-TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub)
172
+TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn)
173
174
static const FPScalar f_scalar_fdiv = {
175
gen_helper_vfp_divh,
176
gen_helper_vfp_divs,
177
gen_helper_vfp_divd,
178
};
179
-TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv)
180
+TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn)
181
182
static const FPScalar f_scalar_fmul = {
183
gen_helper_vfp_mulh,
184
gen_helper_vfp_muls,
185
gen_helper_vfp_muld,
186
};
187
-TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul)
188
+TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn)
189
190
static const FPScalar f_scalar_fmax = {
191
gen_helper_vfp_maxh,
192
gen_helper_vfp_maxs,
193
gen_helper_vfp_maxd,
194
};
195
-TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax)
196
+TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax, a->rn)
197
198
static const FPScalar f_scalar_fmin = {
199
gen_helper_vfp_minh,
200
gen_helper_vfp_mins,
201
gen_helper_vfp_mind,
202
};
203
-TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin)
204
+TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin, a->rn)
205
206
static const FPScalar f_scalar_fmaxnm = {
207
gen_helper_vfp_maxnumh,
208
gen_helper_vfp_maxnums,
209
gen_helper_vfp_maxnumd,
210
};
211
-TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm)
212
+TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn)
213
214
static const FPScalar f_scalar_fminnm = {
215
gen_helper_vfp_minnumh,
216
gen_helper_vfp_minnums,
217
gen_helper_vfp_minnumd,
218
};
219
-TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm)
220
+TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn)
221
222
static const FPScalar f_scalar_fmulx = {
223
gen_helper_advsimd_mulxh,
224
gen_helper_vfp_mulxs,
225
gen_helper_vfp_mulxd,
226
};
227
-TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx)
228
+TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn)
229
230
static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
231
{
232
@@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fnmul = {
233
gen_fnmul_s,
234
gen_fnmul_d,
235
};
236
-TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul)
237
+TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul, a->rn)
238
239
static const FPScalar f_scalar_fcmeq = {
240
gen_helper_advsimd_ceq_f16,
241
gen_helper_neon_ceq_f32,
242
gen_helper_neon_ceq_f64,
243
};
244
-TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq)
245
+TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm)
246
247
static const FPScalar f_scalar_fcmge = {
248
gen_helper_advsimd_cge_f16,
249
gen_helper_neon_cge_f32,
250
gen_helper_neon_cge_f64,
251
};
252
-TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge)
253
+TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm)
254
255
static const FPScalar f_scalar_fcmgt = {
256
gen_helper_advsimd_cgt_f16,
257
gen_helper_neon_cgt_f32,
258
gen_helper_neon_cgt_f64,
259
};
260
-TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt)
261
+TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm)
262
263
static const FPScalar f_scalar_facge = {
264
gen_helper_advsimd_acge_f16,
265
gen_helper_neon_acge_f32,
266
gen_helper_neon_acge_f64,
267
};
268
-TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge)
269
+TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm)
270
271
static const FPScalar f_scalar_facgt = {
272
gen_helper_advsimd_acgt_f16,
273
gen_helper_neon_acgt_f32,
274
gen_helper_neon_acgt_f64,
275
};
276
-TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt)
277
+TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm)
278
279
static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
280
{
281
@@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fabd = {
282
gen_fabd_s,
283
gen_fabd_d,
284
};
285
-TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd)
286
+TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd, a->rn)
287
288
static const FPScalar f_scalar_frecps = {
289
gen_helper_recpsf_f16,
290
gen_helper_recpsf_f32,
291
gen_helper_recpsf_f64,
292
};
293
-TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps)
294
+TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps, a->rn)
295
296
static const FPScalar f_scalar_frsqrts = {
297
gen_helper_rsqrtsf_f16,
298
gen_helper_rsqrtsf_f32,
299
gen_helper_rsqrtsf_f64,
300
};
301
-TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts)
302
+TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts, a->rn)
303
304
static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
305
const FPScalar *f, bool swap)
226
--
306
--
227
2.34.1
307
2.34.1
228
229
diff view generated by jsdifflib
New patch
1
Handle FPCR.NEP for the 3-input scalar operations which use
2
do_fmla_scalar_idx() and do_fmadd(), by making them call the
3
appropriate write_fp_*reg_merging() functions.
1
4
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/arm/tcg/translate-a64.c | 12 ++++++------
9
1 file changed, 6 insertions(+), 6 deletions(-)
10
11
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/tcg/translate-a64.c
14
+++ b/target/arm/tcg/translate-a64.c
15
@@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
16
gen_vfp_negd(t1, t1);
17
}
18
gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
19
- write_fp_dreg(s, a->rd, t0);
20
+ write_fp_dreg_merging(s, a->rd, a->rd, t0);
21
}
22
break;
23
case MO_32:
24
@@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
25
gen_vfp_negs(t1, t1);
26
}
27
gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
28
- write_fp_sreg(s, a->rd, t0);
29
+ write_fp_sreg_merging(s, a->rd, a->rd, t0);
30
}
31
break;
32
case MO_16:
33
@@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
34
}
35
gen_helper_advsimd_muladdh(t0, t1, t2, t0,
36
fpstatus_ptr(FPST_A64_F16));
37
- write_fp_sreg(s, a->rd, t0);
38
+ write_fp_hreg_merging(s, a->rd, a->rd, t0);
39
}
40
break;
41
default:
42
@@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
43
}
44
fpst = fpstatus_ptr(FPST_A64);
45
gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
46
- write_fp_dreg(s, a->rd, ta);
47
+ write_fp_dreg_merging(s, a->rd, a->ra, ta);
48
}
49
break;
50
51
@@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
52
}
53
fpst = fpstatus_ptr(FPST_A64);
54
gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
55
- write_fp_sreg(s, a->rd, ta);
56
+ write_fp_sreg_merging(s, a->rd, a->ra, ta);
57
}
58
break;
59
60
@@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
61
}
62
fpst = fpstatus_ptr(FPST_A64_F16);
63
gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
64
- write_fp_sreg(s, a->rd, ta);
65
+ write_fp_hreg_merging(s, a->rd, a->ra, ta);
66
}
67
break;
68
69
--
70
2.34.1
diff view generated by jsdifflib
1
From: Guenter Roeck <linux@roeck-us.net>
1
Currently we implement BFCVT scalar via do_fp1_scalar(). This works
2
even though BFCVT is a narrowing operation from 32 to 16 bits,
3
because we can use write_fp_sreg() for float16. However, FPCR.NEP
4
support requires that we use write_fp_hreg_merging() for float16
5
outputs, so we can't continue to borrow the non-narrowing
6
do_fp1_scalar() function for this. Split out trans_BFCVT_s()
7
into its own implementation that honours FPCR.NEP.
2
8
3
Allwinner R40 supports two USB host ports shared between a USB 2.0 EHCI
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
host controller and a USB 1.1 OHCI host controller. Add support for both
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
of them.
11
---
12
target/arm/tcg/translate-a64.c | 25 +++++++++++++++++++++----
13
1 file changed, 21 insertions(+), 4 deletions(-)
6
14
7
If machine USB support is not enabled, create unimplemented devices
15
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
8
for the USB memory ranges to avoid crashes when booting Linux.
9
10
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
11
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
12
Message-id: 20240115182757.1095012-2-linux@roeck-us.net
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
---
15
docs/system/arm/bananapi_m2u.rst | 2 +-
16
include/hw/arm/allwinner-r40.h | 9 ++++++
17
hw/arm/allwinner-r40.c | 47 ++++++++++++++++++++++++++++++--
18
hw/arm/Kconfig | 2 ++
19
4 files changed, 57 insertions(+), 3 deletions(-)
20
21
diff --git a/docs/system/arm/bananapi_m2u.rst b/docs/system/arm/bananapi_m2u.rst
22
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
23
--- a/docs/system/arm/bananapi_m2u.rst
17
--- a/target/arm/tcg/translate-a64.c
24
+++ b/docs/system/arm/bananapi_m2u.rst
18
+++ b/target/arm/tcg/translate-a64.c
25
@@ -XXX,XX +XXX,XX @@ The Banana Pi M2U machine supports the following devices:
19
@@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frintx = {
26
* GMAC ethernet
27
* Clock Control Unit
28
* TWI (I2C)
29
+ * USB 2.0
30
31
Limitations
32
"""""""""""
33
@@ -XXX,XX +XXX,XX @@ Currently, Banana Pi M2U does *not* support the following features:
34
- Audio output
35
- Hardware Watchdog
36
- Real Time Clock
37
-- USB 2.0 interfaces
38
39
Also see the 'unimplemented' array in the Allwinner R40 SoC module
40
for a complete list of unimplemented I/O devices: ``./hw/arm/allwinner-r40.c``
41
diff --git a/include/hw/arm/allwinner-r40.h b/include/hw/arm/allwinner-r40.h
42
index XXXXXXX..XXXXXXX 100644
43
--- a/include/hw/arm/allwinner-r40.h
44
+++ b/include/hw/arm/allwinner-r40.h
45
@@ -XXX,XX +XXX,XX @@
46
#include "hw/i2c/allwinner-i2c.h"
47
#include "hw/net/allwinner_emac.h"
48
#include "hw/net/allwinner-sun8i-emac.h"
49
+#include "hw/usb/hcd-ohci.h"
50
+#include "hw/usb/hcd-ehci.h"
51
#include "target/arm/cpu.h"
52
#include "sysemu/block-backend.h"
53
54
@@ -XXX,XX +XXX,XX @@ enum {
55
AW_R40_DEV_MMC1,
56
AW_R40_DEV_MMC2,
57
AW_R40_DEV_MMC3,
58
+ AW_R40_DEV_EHCI1,
59
+ AW_R40_DEV_OHCI1,
60
+ AW_R40_DEV_EHCI2,
61
+ AW_R40_DEV_OHCI2,
62
AW_R40_DEV_CCU,
63
AW_R40_DEV_PIT,
64
AW_R40_DEV_UART0,
65
@@ -XXX,XX +XXX,XX @@ OBJECT_DECLARE_SIMPLE_TYPE(AwR40State, AW_R40)
66
* which are currently emulated by the R40 SoC code.
67
*/
68
#define AW_R40_NUM_MMCS 4
69
+#define AW_R40_NUM_USB 2
70
#define AW_R40_NUM_UARTS 8
71
72
struct AwR40State {
73
@@ -XXX,XX +XXX,XX @@ struct AwR40State {
74
AwSRAMCState sramc;
75
AwA10PITState timer;
76
AwSdHostState mmc[AW_R40_NUM_MMCS];
77
+ EHCISysBusState ehci[AW_R40_NUM_USB];
78
+ OHCISysBusState ohci[AW_R40_NUM_USB];
79
AwR40ClockCtlState ccu;
80
AwR40DramCtlState dramc;
81
AWI2CState i2c0;
82
diff --git a/hw/arm/allwinner-r40.c b/hw/arm/allwinner-r40.c
83
index XXXXXXX..XXXXXXX 100644
84
--- a/hw/arm/allwinner-r40.c
85
+++ b/hw/arm/allwinner-r40.c
86
@@ -XXX,XX +XXX,XX @@
87
#include "qemu/bswap.h"
88
#include "qemu/module.h"
89
#include "qemu/units.h"
90
+#include "hw/boards.h"
91
#include "hw/qdev-core.h"
92
#include "hw/sysbus.h"
93
#include "hw/char/serial.h"
94
@@ -XXX,XX +XXX,XX @@ const hwaddr allwinner_r40_memmap[] = {
95
[AW_R40_DEV_MMC1] = 0x01c10000,
96
[AW_R40_DEV_MMC2] = 0x01c11000,
97
[AW_R40_DEV_MMC3] = 0x01c12000,
98
+ [AW_R40_DEV_EHCI1] = 0x01c19000,
99
+ [AW_R40_DEV_OHCI1] = 0x01c19400,
100
+ [AW_R40_DEV_EHCI2] = 0x01c1c000,
101
+ [AW_R40_DEV_OHCI2] = 0x01c1c400,
102
[AW_R40_DEV_CCU] = 0x01c20000,
103
[AW_R40_DEV_PIT] = 0x01c20c00,
104
[AW_R40_DEV_UART0] = 0x01c28000,
105
@@ -XXX,XX +XXX,XX @@ static struct AwR40Unimplemented r40_unimplemented[] = {
106
{ "crypto", 0x01c15000, 4 * KiB },
107
{ "spi2", 0x01c17000, 4 * KiB },
108
{ "sata", 0x01c18000, 4 * KiB },
109
- { "usb1-host", 0x01c19000, 4 * KiB },
110
+ { "usb1-phy", 0x01c19800, 2 * KiB },
111
{ "sid", 0x01c1b000, 4 * KiB },
112
- { "usb2-host", 0x01c1c000, 4 * KiB },
113
+ { "usb2-phy", 0x01c1c800, 2 * KiB },
114
{ "cs1", 0x01c1d000, 4 * KiB },
115
{ "spi3", 0x01c1f000, 4 * KiB },
116
{ "rtc", 0x01c20400, 1 * KiB },
117
@@ -XXX,XX +XXX,XX @@ enum {
118
AW_R40_GIC_SPI_MMC2 = 34,
119
AW_R40_GIC_SPI_MMC3 = 35,
120
AW_R40_GIC_SPI_EMAC = 55,
121
+ AW_R40_GIC_SPI_OHCI1 = 64,
122
+ AW_R40_GIC_SPI_OHCI2 = 65,
123
+ AW_R40_GIC_SPI_EHCI1 = 76,
124
+ AW_R40_GIC_SPI_EHCI2 = 78,
125
AW_R40_GIC_SPI_GMAC = 85,
126
};
20
};
127
21
TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1)
128
@@ -XXX,XX +XXX,XX @@ static void allwinner_r40_init(Object *obj)
22
129
TYPE_AW_SDHOST_SUN50I_A64);
23
-static const FPScalar1 f_scalar_bfcvt = {
130
}
24
- .gen_s = gen_helper_bfcvt,
131
25
-};
132
+ for (size_t i = 0; i < AW_R40_NUM_USB; i++) {
26
-TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar_ah, a, &f_scalar_bfcvt, -1)
133
+ object_initialize_child(obj, "ehci[*]", &s->ehci[i],
27
+static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a)
134
+ TYPE_PLATFORM_EHCI);
28
+{
135
+ object_initialize_child(obj, "ohci[*]", &s->ohci[i],
29
+ ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64;
136
+ TYPE_SYSBUS_OHCI);
30
+ TCGv_i32 t32;
31
+ int check;
32
+
33
+ if (!dc_isar_feature(aa64_bf16, s)) {
34
+ return false;
137
+ }
35
+ }
138
+
36
+
139
object_initialize_child(obj, "twi0", &s->i2c0, TYPE_AW_I2C_SUN6I);
37
+ check = fp_access_check_scalar_hsd(s, a->esz);
140
141
object_initialize_child(obj, "emac", &s->emac, TYPE_AW_EMAC);
142
@@ -XXX,XX +XXX,XX @@ static void allwinner_r40_realize(DeviceState *dev, Error **errp)
143
sysbus_realize(SYS_BUS_DEVICE(&s->ccu), &error_fatal);
144
sysbus_mmio_map(SYS_BUS_DEVICE(&s->ccu), 0, s->memmap[AW_R40_DEV_CCU]);
145
146
+ /* USB */
147
+ for (size_t i = 0; i < AW_R40_NUM_USB; i++) {
148
+ g_autofree char *bus = g_strdup_printf("usb-bus.%zu", i);
149
+
38
+
150
+ object_property_set_bool(OBJECT(&s->ehci[i]), "companion-enable", true,
39
+ if (check <= 0) {
151
+ &error_fatal);
40
+ return check == 0;
152
+ sysbus_realize(SYS_BUS_DEVICE(&s->ehci[i]), &error_fatal);
153
+ sysbus_mmio_map(SYS_BUS_DEVICE(&s->ehci[i]), 0,
154
+ allwinner_r40_memmap[i ? AW_R40_DEV_EHCI2
155
+ : AW_R40_DEV_EHCI1]);
156
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->ehci[i]), 0,
157
+ qdev_get_gpio_in(DEVICE(&s->gic),
158
+ i ? AW_R40_GIC_SPI_EHCI2
159
+ : AW_R40_GIC_SPI_EHCI1));
160
+
161
+ object_property_set_str(OBJECT(&s->ohci[i]), "masterbus", bus,
162
+ &error_fatal);
163
+ sysbus_realize(SYS_BUS_DEVICE(&s->ohci[i]), &error_fatal);
164
+ sysbus_mmio_map(SYS_BUS_DEVICE(&s->ohci[i]), 0,
165
+ allwinner_r40_memmap[i ? AW_R40_DEV_OHCI2
166
+ : AW_R40_DEV_OHCI1]);
167
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->ohci[i]), 0,
168
+ qdev_get_gpio_in(DEVICE(&s->gic),
169
+ i ? AW_R40_GIC_SPI_OHCI2
170
+ : AW_R40_GIC_SPI_OHCI1));
171
+ }
41
+ }
172
+
42
+
173
/* SD/MMC */
43
+ t32 = read_fp_sreg(s, a->rn);
174
for (int i = 0; i < AW_R40_NUM_MMCS; i++) {
44
+ gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype));
175
qemu_irq irq = qdev_get_gpio_in(DEVICE(&s->gic),
45
+ write_fp_hreg_merging(s, a->rd, a->rd, t32);
176
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
46
+ return true;
177
index XXXXXXX..XXXXXXX 100644
47
+}
178
--- a/hw/arm/Kconfig
48
179
+++ b/hw/arm/Kconfig
49
static const FPScalar1 f_scalar_frint32 = {
180
@@ -XXX,XX +XXX,XX @@ config ALLWINNER_R40
50
NULL,
181
select ARM_TIMER
182
select ARM_GIC
183
select UNIMP
184
+ select USB_OHCI
185
+ select USB_EHCI_SYSBUS
186
select SD
187
188
config RASPI
189
--
51
--
190
2.34.1
52
2.34.1
191
192
diff view generated by jsdifflib
New patch
1
Handle FPCR.NEP for the 1-input scalar operations.
1
2
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/arm/tcg/translate-a64.c | 26 ++++++++++++++------------
7
1 file changed, 14 insertions(+), 12 deletions(-)
8
9
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/target/arm/tcg/translate-a64.c
12
+++ b/target/arm/tcg/translate-a64.c
13
@@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a,
14
case MO_64:
15
t64 = read_fp_dreg(s, a->rn);
16
f->gen_d(t64, t64, fpst);
17
- write_fp_dreg(s, a->rd, t64);
18
+ write_fp_dreg_merging(s, a->rd, a->rd, t64);
19
break;
20
case MO_32:
21
t32 = read_fp_sreg(s, a->rn);
22
f->gen_s(t32, t32, fpst);
23
- write_fp_sreg(s, a->rd, t32);
24
+ write_fp_sreg_merging(s, a->rd, a->rd, t32);
25
break;
26
case MO_16:
27
t32 = read_fp_hreg(s, a->rn);
28
f->gen_h(t32, t32, fpst);
29
- write_fp_sreg(s, a->rd, t32);
30
+ write_fp_hreg_merging(s, a->rd, a->rd, t32);
31
break;
32
default:
33
g_assert_not_reached();
34
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a)
35
TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
36
37
gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst);
38
- write_fp_dreg(s, a->rd, tcg_rd);
39
+ write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
40
}
41
return true;
42
}
43
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a)
44
TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
45
46
gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
47
- /* write_fp_sreg is OK here because top half of result is zero */
48
- write_fp_sreg(s, a->rd, tmp);
49
+ /* write_fp_hreg_merging is OK here because top half of result is zero */
50
+ write_fp_hreg_merging(s, a->rd, a->rd, tmp);
51
}
52
return true;
53
}
54
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a)
55
TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
56
57
gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst);
58
- write_fp_sreg(s, a->rd, tcg_rd);
59
+ write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
60
}
61
return true;
62
}
63
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a)
64
TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
65
66
gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
67
- /* write_fp_sreg is OK here because top half of tcg_rd is zero */
68
- write_fp_sreg(s, a->rd, tcg_rd);
69
+ /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */
70
+ write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd);
71
}
72
return true;
73
}
74
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a)
75
TCGv_i32 tcg_ahp = get_ahp_flag();
76
77
gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
78
- write_fp_sreg(s, a->rd, tcg_rd);
79
+ write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
80
}
81
return true;
82
}
83
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a)
84
TCGv_i32 tcg_ahp = get_ahp_flag();
85
86
gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
87
- write_fp_dreg(s, a->rd, tcg_rd);
88
+ write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
89
}
90
return true;
91
}
92
@@ -XXX,XX +XXX,XX @@ static bool do_fcvt_f(DisasContext *s, arg_fcvt *a,
93
do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0),
94
a->esz, tcg_int, a->shift, a->rn, rmode);
95
96
- clear_vec(s, a->rd);
97
+ if (!s->fpcr_nep) {
98
+ clear_vec(s, a->rd);
99
+ }
100
write_vec_element(s, tcg_int, a->rd, 0, a->esz);
101
return true;
102
}
103
--
104
2.34.1
diff view generated by jsdifflib
New patch
1
Handle FPCR.NEP in the operations handled by do_cvtf_scalar().
1
2
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/arm/tcg/translate-a64.c | 6 +++---
7
1 file changed, 3 insertions(+), 3 deletions(-)
8
9
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/target/arm/tcg/translate-a64.c
12
+++ b/target/arm/tcg/translate-a64.c
13
@@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
14
} else {
15
gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
16
}
17
- write_fp_dreg(s, rd, tcg_double);
18
+ write_fp_dreg_merging(s, rd, rd, tcg_double);
19
break;
20
21
case MO_32:
22
@@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
23
} else {
24
gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
25
}
26
- write_fp_sreg(s, rd, tcg_single);
27
+ write_fp_sreg_merging(s, rd, rd, tcg_single);
28
break;
29
30
case MO_16:
31
@@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
32
} else {
33
gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
34
}
35
- write_fp_sreg(s, rd, tcg_single);
36
+ write_fp_hreg_merging(s, rd, rd, tcg_single);
37
break;
38
39
default:
40
--
41
2.34.1
diff view generated by jsdifflib
New patch
1
Handle FPCR.NEP merging for scalar FABS and FNEG; this requires
2
an extra parameter to do_fp1_scalar_int(), since FMOV scalar
3
does not have the merging behaviour.
1
4
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/arm/tcg/translate-a64.c | 27 ++++++++++++++++++++-------
9
1 file changed, 20 insertions(+), 7 deletions(-)
10
11
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/tcg/translate-a64.c
14
+++ b/target/arm/tcg/translate-a64.c
15
@@ -XXX,XX +XXX,XX @@ typedef struct FPScalar1Int {
16
} FPScalar1Int;
17
18
static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a,
19
- const FPScalar1Int *f)
20
+ const FPScalar1Int *f,
21
+ bool merging)
22
{
23
switch (a->esz) {
24
case MO_64:
25
if (fp_access_check(s)) {
26
TCGv_i64 t = read_fp_dreg(s, a->rn);
27
f->gen_d(t, t);
28
- write_fp_dreg(s, a->rd, t);
29
+ if (merging) {
30
+ write_fp_dreg_merging(s, a->rd, a->rd, t);
31
+ } else {
32
+ write_fp_dreg(s, a->rd, t);
33
+ }
34
}
35
break;
36
case MO_32:
37
if (fp_access_check(s)) {
38
TCGv_i32 t = read_fp_sreg(s, a->rn);
39
f->gen_s(t, t);
40
- write_fp_sreg(s, a->rd, t);
41
+ if (merging) {
42
+ write_fp_sreg_merging(s, a->rd, a->rd, t);
43
+ } else {
44
+ write_fp_sreg(s, a->rd, t);
45
+ }
46
}
47
break;
48
case MO_16:
49
@@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a,
50
if (fp_access_check(s)) {
51
TCGv_i32 t = read_fp_hreg(s, a->rn);
52
f->gen_h(t, t);
53
- write_fp_sreg(s, a->rd, t);
54
+ if (merging) {
55
+ write_fp_hreg_merging(s, a->rd, a->rd, t);
56
+ } else {
57
+ write_fp_sreg(s, a->rd, t);
58
+ }
59
}
60
break;
61
default:
62
@@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fmov = {
63
tcg_gen_mov_i32,
64
tcg_gen_mov_i64,
65
};
66
-TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov)
67
+TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false)
68
69
static const FPScalar1Int f_scalar_fabs = {
70
gen_vfp_absh,
71
gen_vfp_abss,
72
gen_vfp_absd,
73
};
74
-TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs)
75
+TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs, true)
76
77
static const FPScalar1Int f_scalar_fneg = {
78
gen_vfp_negh,
79
gen_vfp_negs,
80
gen_vfp_negd,
81
};
82
-TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg)
83
+TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg, true)
84
85
typedef struct FPScalar1 {
86
void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr);
87
--
88
2.34.1
diff view generated by jsdifflib
New patch
1
Unlike the other users of do_2misc_narrow_scalar(), FCVTXN (scalar)
2
is always double-to-single and must honour FPCR.NEP. Implement this
3
directly in a trans function rather than using
4
do_2misc_narrow_scalar().
1
5
6
We still need gen_fcvtxn_sd() and the f_scalar_fcvtxn[] array for
7
the FCVTXN (vector) insn, so we move those down in the file to
8
where they are used.
9
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
target/arm/tcg/translate-a64.c | 43 ++++++++++++++++++++++------------
14
1 file changed, 28 insertions(+), 15 deletions(-)
15
16
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/tcg/translate-a64.c
19
+++ b/target/arm/tcg/translate-a64.c
20
@@ -XXX,XX +XXX,XX @@ static ArithOneOp * const f_scalar_uqxtn[] = {
21
};
22
TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn)
23
24
-static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n)
25
+static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a)
26
{
27
- /*
28
- * 64 bit to 32 bit float conversion
29
- * with von Neumann rounding (round to odd)
30
- */
31
- TCGv_i32 tmp = tcg_temp_new_i32();
32
- gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64));
33
- tcg_gen_extu_i32_i64(d, tmp);
34
+ if (fp_access_check(s)) {
35
+ /*
36
+ * 64 bit to 32 bit float conversion
37
+ * with von Neumann rounding (round to odd)
38
+ */
39
+ TCGv_i64 src = read_fp_dreg(s, a->rn);
40
+ TCGv_i32 dst = tcg_temp_new_i32();
41
+ gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64));
42
+ write_fp_sreg_merging(s, a->rd, a->rd, dst);
43
+ }
44
+ return true;
45
}
46
47
-static ArithOneOp * const f_scalar_fcvtxn[] = {
48
- NULL,
49
- NULL,
50
- gen_fcvtxn_sd,
51
-};
52
-TRANS(FCVTXN_s, do_2misc_narrow_scalar, a, f_scalar_fcvtxn)
53
-
54
#undef WRAP_ENV
55
56
static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
57
@@ -XXX,XX +XXX,XX @@ static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n)
58
tcg_gen_extu_i32_i64(d, tmp);
59
}
60
61
+static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n)
62
+{
63
+ /*
64
+ * 64 bit to 32 bit float conversion
65
+ * with von Neumann rounding (round to odd)
66
+ */
67
+ TCGv_i32 tmp = tcg_temp_new_i32();
68
+ gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64));
69
+ tcg_gen_extu_i32_i64(d, tmp);
70
+}
71
+
72
static ArithOneOp * const f_vector_fcvtn[] = {
73
NULL,
74
gen_fcvtn_hs,
75
gen_fcvtn_sd,
76
};
77
+static ArithOneOp * const f_scalar_fcvtxn[] = {
78
+ NULL,
79
+ NULL,
80
+ gen_fcvtxn_sd,
81
+};
82
TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn)
83
TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn)
84
85
--
86
2.34.1
diff view generated by jsdifflib
New patch
1
do_fp3_scalar_idx() is used only for the FMUL and FMULX scalar by
2
element instructions; these both need to merge the result with the Rn
3
register when FPCR.NEP is set.
1
4
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/arm/tcg/translate-a64.c | 6 +++---
9
1 file changed, 3 insertions(+), 3 deletions(-)
10
11
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/tcg/translate-a64.c
14
+++ b/target/arm/tcg/translate-a64.c
15
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
16
17
read_vec_element(s, t1, a->rm, a->idx, MO_64);
18
f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
19
- write_fp_dreg(s, a->rd, t0);
20
+ write_fp_dreg_merging(s, a->rd, a->rn, t0);
21
}
22
break;
23
case MO_32:
24
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
25
26
read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
27
f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
28
- write_fp_sreg(s, a->rd, t0);
29
+ write_fp_sreg_merging(s, a->rd, a->rn, t0);
30
}
31
break;
32
case MO_16:
33
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
34
35
read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
36
f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
37
- write_fp_sreg(s, a->rd, t0);
38
+ write_fp_hreg_merging(s, a->rd, a->rn, t0);
39
}
40
break;
41
default:
42
--
43
2.34.1
diff view generated by jsdifflib
New patch
1
When FPCR.AH == 1, floating point FMIN and FMAX have some odd special
2
cases:
1
3
4
* comparing two zeroes (even of different sign) or comparing a NaN
5
with anything always returns the second argument (possibly
6
squashed to zero)
7
* denormal outputs are not squashed to zero regardless of FZ or FZ16
8
9
Implement these semantics in new helper functions and select them at
10
translate time if FPCR.AH is 1 for the scalar FMAX and FMIN insns.
11
(We will convert the other FMAX and FMIN insns in subsequent
12
commits.)
13
14
Note that FMINNM and FMAXNM are not affected.
15
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
18
---
19
target/arm/tcg/helper-a64.h | 7 +++++++
20
target/arm/tcg/helper-a64.c | 36 ++++++++++++++++++++++++++++++++++
21
target/arm/tcg/translate-a64.c | 23 ++++++++++++++++++++--
22
3 files changed, 64 insertions(+), 2 deletions(-)
23
24
diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/tcg/helper-a64.h
27
+++ b/target/arm/tcg/helper-a64.h
28
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(advsimd_muladd2h, i32, i32, i32, i32, fpst)
29
DEF_HELPER_2(advsimd_rinth_exact, f16, f16, fpst)
30
DEF_HELPER_2(advsimd_rinth, f16, f16, fpst)
31
32
+DEF_HELPER_3(vfp_ah_minh, f16, f16, f16, fpst)
33
+DEF_HELPER_3(vfp_ah_mins, f32, f32, f32, fpst)
34
+DEF_HELPER_3(vfp_ah_mind, f64, f64, f64, fpst)
35
+DEF_HELPER_3(vfp_ah_maxh, f16, f16, f16, fpst)
36
+DEF_HELPER_3(vfp_ah_maxs, f32, f32, f32, fpst)
37
+DEF_HELPER_3(vfp_ah_maxd, f64, f64, f64, fpst)
38
+
39
DEF_HELPER_2(exception_return, void, env, i64)
40
DEF_HELPER_FLAGS_2(dc_zva, TCG_CALL_NO_WG, void, env, i64)
41
42
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/tcg/helper-a64.c
45
+++ b/target/arm/tcg/helper-a64.c
46
@@ -XXX,XX +XXX,XX @@ float32 HELPER(fcvtx_f64_to_f32)(float64 a, float_status *fpst)
47
return r;
48
}
49
50
+/*
51
+ * AH=1 min/max have some odd special cases:
52
+ * comparing two zeroes (regardless of sign), (NaN, anything),
53
+ * or (anything, NaN) should return the second argument (possibly
54
+ * squashed to zero).
55
+ * Also, denormal outputs are not squashed to zero regardless of FZ or FZ16.
56
+ */
57
+#define AH_MINMAX_HELPER(NAME, CTYPE, FLOATTYPE, MINMAX) \
58
+ CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \
59
+ { \
60
+ bool save; \
61
+ CTYPE r; \
62
+ a = FLOATTYPE ## _squash_input_denormal(a, fpst); \
63
+ b = FLOATTYPE ## _squash_input_denormal(b, fpst); \
64
+ if (FLOATTYPE ## _is_zero(a) && FLOATTYPE ## _is_zero(b)) { \
65
+ return b; \
66
+ } \
67
+ if (FLOATTYPE ## _is_any_nan(a) || \
68
+ FLOATTYPE ## _is_any_nan(b)) { \
69
+ float_raise(float_flag_invalid, fpst); \
70
+ return b; \
71
+ } \
72
+ save = get_flush_to_zero(fpst); \
73
+ set_flush_to_zero(false, fpst); \
74
+ r = FLOATTYPE ## _ ## MINMAX(a, b, fpst); \
75
+ set_flush_to_zero(save, fpst); \
76
+ return r; \
77
+ }
78
+
79
+AH_MINMAX_HELPER(vfp_ah_minh, dh_ctype_f16, float16, min)
80
+AH_MINMAX_HELPER(vfp_ah_mins, float32, float32, min)
81
+AH_MINMAX_HELPER(vfp_ah_mind, float64, float64, min)
82
+AH_MINMAX_HELPER(vfp_ah_maxh, dh_ctype_f16, float16, max)
83
+AH_MINMAX_HELPER(vfp_ah_maxs, float32, float32, max)
84
+AH_MINMAX_HELPER(vfp_ah_maxd, float64, float64, max)
85
+
86
/* 64-bit versions of the CRC helpers. Note that although the operation
87
* (and the prototypes of crc32c() and crc32() mean that only the bottom
88
* 32 bits of the accumulator and result are used, we pass and return
89
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
90
index XXXXXXX..XXXXXXX 100644
91
--- a/target/arm/tcg/translate-a64.c
92
+++ b/target/arm/tcg/translate-a64.c
93
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
94
select_ah_fpst(s, a->esz));
95
}
96
97
+/* Some insns need to call different helpers when FPCR.AH == 1 */
98
+static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a,
99
+ const FPScalar *fnormal,
100
+ const FPScalar *fah,
101
+ int mergereg)
102
+{
103
+ return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg);
104
+}
105
+
106
static const FPScalar f_scalar_fadd = {
107
gen_helper_vfp_addh,
108
gen_helper_vfp_adds,
109
@@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fmax = {
110
gen_helper_vfp_maxs,
111
gen_helper_vfp_maxd,
112
};
113
-TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax, a->rn)
114
+static const FPScalar f_scalar_fmax_ah = {
115
+ gen_helper_vfp_ah_maxh,
116
+ gen_helper_vfp_ah_maxs,
117
+ gen_helper_vfp_ah_maxd,
118
+};
119
+TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn)
120
121
static const FPScalar f_scalar_fmin = {
122
gen_helper_vfp_minh,
123
gen_helper_vfp_mins,
124
gen_helper_vfp_mind,
125
};
126
-TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin, a->rn)
127
+static const FPScalar f_scalar_fmin_ah = {
128
+ gen_helper_vfp_ah_minh,
129
+ gen_helper_vfp_ah_mins,
130
+ gen_helper_vfp_ah_mind,
131
+};
132
+TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn)
133
134
static const FPScalar f_scalar_fmaxnm = {
135
gen_helper_vfp_maxnumh,
136
--
137
2.34.1
diff view generated by jsdifflib
New patch
1
Implement the FPCR.AH == 1 semantics for vector FMIN/FMAX, by
2
creating new _ah_ versions of the gvec helpers which invoke the
3
scalar fmin_ah and fmax_ah helpers on each element.
1
4
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/arm/tcg/helper-sve.h | 14 ++++++++++++++
9
target/arm/tcg/translate-a64.c | 21 +++++++++++++++++++--
10
target/arm/tcg/vec_helper.c | 8 ++++++++
11
3 files changed, 41 insertions(+), 2 deletions(-)
12
13
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/tcg/helper-sve.h
16
+++ b/target/arm/tcg/helper-sve.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_rsqrts_s, TCG_CALL_NO_RWG,
18
DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG,
19
void, ptr, ptr, ptr, fpst, i32)
20
21
+DEF_HELPER_FLAGS_5(gvec_ah_fmax_h, TCG_CALL_NO_RWG,
22
+ void, ptr, ptr, ptr, fpst, i32)
23
+DEF_HELPER_FLAGS_5(gvec_ah_fmax_s, TCG_CALL_NO_RWG,
24
+ void, ptr, ptr, ptr, fpst, i32)
25
+DEF_HELPER_FLAGS_5(gvec_ah_fmax_d, TCG_CALL_NO_RWG,
26
+ void, ptr, ptr, ptr, fpst, i32)
27
+
28
+DEF_HELPER_FLAGS_5(gvec_ah_fmin_h, TCG_CALL_NO_RWG,
29
+ void, ptr, ptr, ptr, fpst, i32)
30
+DEF_HELPER_FLAGS_5(gvec_ah_fmin_s, TCG_CALL_NO_RWG,
31
+ void, ptr, ptr, ptr, fpst, i32)
32
+DEF_HELPER_FLAGS_5(gvec_ah_fmin_d, TCG_CALL_NO_RWG,
33
+ void, ptr, ptr, ptr, fpst, i32)
34
+
35
DEF_HELPER_FLAGS_4(sve_faddv_h, TCG_CALL_NO_RWG,
36
i64, ptr, ptr, fpst, i32)
37
DEF_HELPER_FLAGS_4(sve_faddv_s, TCG_CALL_NO_RWG,
38
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/tcg/translate-a64.c
41
+++ b/target/arm/tcg/translate-a64.c
42
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
43
FPST_A64_F16 : FPST_A64);
44
}
45
46
+static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data,
47
+ gen_helper_gvec_3_ptr * const fnormal[3],
48
+ gen_helper_gvec_3_ptr * const fah[3])
49
+{
50
+ return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal);
51
+}
52
+
53
static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data,
54
gen_helper_gvec_3_ptr * const f[3])
55
{
56
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = {
57
gen_helper_gvec_fmax_s,
58
gen_helper_gvec_fmax_d,
59
};
60
-TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax)
61
+static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = {
62
+ gen_helper_gvec_ah_fmax_h,
63
+ gen_helper_gvec_ah_fmax_s,
64
+ gen_helper_gvec_ah_fmax_d,
65
+};
66
+TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah)
67
68
static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = {
69
gen_helper_gvec_fmin_h,
70
gen_helper_gvec_fmin_s,
71
gen_helper_gvec_fmin_d,
72
};
73
-TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin)
74
+static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = {
75
+ gen_helper_gvec_ah_fmin_h,
76
+ gen_helper_gvec_ah_fmin_s,
77
+ gen_helper_gvec_ah_fmin_d,
78
+};
79
+TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah)
80
81
static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = {
82
gen_helper_gvec_fmaxnum_h,
83
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
84
index XXXXXXX..XXXXXXX 100644
85
--- a/target/arm/tcg/vec_helper.c
86
+++ b/target/arm/tcg/vec_helper.c
87
@@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16)
88
DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32)
89
DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64)
90
91
+DO_3OP(gvec_ah_fmax_h, helper_vfp_ah_maxh, float16)
92
+DO_3OP(gvec_ah_fmax_s, helper_vfp_ah_maxs, float32)
93
+DO_3OP(gvec_ah_fmax_d, helper_vfp_ah_maxd, float64)
94
+
95
+DO_3OP(gvec_ah_fmin_h, helper_vfp_ah_minh, float16)
96
+DO_3OP(gvec_ah_fmin_s, helper_vfp_ah_mins, float32)
97
+DO_3OP(gvec_ah_fmin_d, helper_vfp_ah_mind, float64)
98
+
99
#endif
100
#undef DO_3OP
101
102
--
103
2.34.1
diff view generated by jsdifflib
New patch
1
Implement the FPCR.AH semantics for FMAXV and FMINV. These are the
2
"recursively reduce all lanes of a vector to a scalar result" insns;
3
we just need to use the _ah_ helper for the reduction step when
4
FPCR.AH == 1.
1
5
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
target/arm/tcg/translate-a64.c | 28 ++++++++++++++++++----------
10
1 file changed, 18 insertions(+), 10 deletions(-)
11
12
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/tcg/translate-a64.c
15
+++ b/target/arm/tcg/translate-a64.c
16
@@ -XXX,XX +XXX,XX @@ static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
17
}
18
19
static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
20
- NeonGenTwoSingleOpFn *fn)
21
+ NeonGenTwoSingleOpFn *fnormal,
22
+ NeonGenTwoSingleOpFn *fah)
23
{
24
if (fp_access_check(s)) {
25
MemOp esz = a->esz;
26
int elts = (a->q ? 16 : 8) >> esz;
27
TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
28
- TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn);
29
+ TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst,
30
+ s->fpcr_ah ? fah : fnormal);
31
write_fp_sreg(s, a->rd, res);
32
}
33
return true;
34
}
35
36
-TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxnumh)
37
-TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minnumh)
38
-TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxh)
39
-TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minh)
40
+TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a,
41
+ gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh)
42
+TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a,
43
+ gen_helper_vfp_minnumh, gen_helper_vfp_minnumh)
44
+TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a,
45
+ gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh)
46
+TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a,
47
+ gen_helper_vfp_minh, gen_helper_vfp_ah_minh)
48
49
-TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums)
50
-TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums)
51
-TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs)
52
-TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins)
53
+TRANS(FMAXNMV_s, do_fp_reduction, a,
54
+ gen_helper_vfp_maxnums, gen_helper_vfp_maxnums)
55
+TRANS(FMINNMV_s, do_fp_reduction, a,
56
+ gen_helper_vfp_minnums, gen_helper_vfp_minnums)
57
+TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs)
58
+TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins)
59
60
/*
61
* Floating-point Immediate
62
--
63
2.34.1
diff view generated by jsdifflib
New patch
1
Implement the FPCR.AH semantics for the pairwise floating
2
point minimum/maximum insns FMINP and FMAXP.
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/arm/tcg/helper-sve.h | 14 ++++++++++++++
8
target/arm/tcg/translate-a64.c | 25 +++++++++++++++++++++----
9
target/arm/tcg/vec_helper.c | 10 ++++++++++
10
3 files changed, 45 insertions(+), 4 deletions(-)
11
12
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/tcg/helper-sve.h
15
+++ b/target/arm/tcg/helper-sve.h
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_ah_fmin_s, TCG_CALL_NO_RWG,
17
DEF_HELPER_FLAGS_5(gvec_ah_fmin_d, TCG_CALL_NO_RWG,
18
void, ptr, ptr, ptr, fpst, i32)
19
20
+DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_h, TCG_CALL_NO_RWG,
21
+ void, ptr, ptr, ptr, fpst, i32)
22
+DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_s, TCG_CALL_NO_RWG,
23
+ void, ptr, ptr, ptr, fpst, i32)
24
+DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_d, TCG_CALL_NO_RWG,
25
+ void, ptr, ptr, ptr, fpst, i32)
26
+
27
+DEF_HELPER_FLAGS_5(gvec_ah_fminp_h, TCG_CALL_NO_RWG,
28
+ void, ptr, ptr, ptr, fpst, i32)
29
+DEF_HELPER_FLAGS_5(gvec_ah_fminp_s, TCG_CALL_NO_RWG,
30
+ void, ptr, ptr, ptr, fpst, i32)
31
+DEF_HELPER_FLAGS_5(gvec_ah_fminp_d, TCG_CALL_NO_RWG,
32
+ void, ptr, ptr, ptr, fpst, i32)
33
+
34
DEF_HELPER_FLAGS_4(sve_faddv_h, TCG_CALL_NO_RWG,
35
i64, ptr, ptr, fpst, i32)
36
DEF_HELPER_FLAGS_4(sve_faddv_s, TCG_CALL_NO_RWG,
37
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/target/arm/tcg/translate-a64.c
40
+++ b/target/arm/tcg/translate-a64.c
41
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = {
42
gen_helper_gvec_fmaxp_s,
43
gen_helper_gvec_fmaxp_d,
44
};
45
-TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp)
46
+static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = {
47
+ gen_helper_gvec_ah_fmaxp_h,
48
+ gen_helper_gvec_ah_fmaxp_s,
49
+ gen_helper_gvec_ah_fmaxp_d,
50
+};
51
+TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp)
52
53
static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = {
54
gen_helper_gvec_fminp_h,
55
gen_helper_gvec_fminp_s,
56
gen_helper_gvec_fminp_d,
57
};
58
-TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp)
59
+static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = {
60
+ gen_helper_gvec_ah_fminp_h,
61
+ gen_helper_gvec_ah_fminp_s,
62
+ gen_helper_gvec_ah_fminp_d,
63
+};
64
+TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp)
65
66
static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = {
67
gen_helper_gvec_fmaxnump_h,
68
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
69
return true;
70
}
71
72
+static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a,
73
+ const FPScalar *fnormal,
74
+ const FPScalar *fah)
75
+{
76
+ return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal);
77
+}
78
+
79
TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd)
80
-TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax)
81
-TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin)
82
+TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah)
83
+TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah)
84
TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm)
85
TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm)
86
87
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
88
index XXXXXXX..XXXXXXX 100644
89
--- a/target/arm/tcg/vec_helper.c
90
+++ b/target/arm/tcg/vec_helper.c
91
@@ -XXX,XX +XXX,XX @@ DO_3OP_PAIR(gvec_fminnump_h, float16_minnum, float16, H2)
92
DO_3OP_PAIR(gvec_fminnump_s, float32_minnum, float32, H4)
93
DO_3OP_PAIR(gvec_fminnump_d, float64_minnum, float64, )
94
95
+#ifdef TARGET_AARCH64
96
+DO_3OP_PAIR(gvec_ah_fmaxp_h, helper_vfp_ah_maxh, float16, H2)
97
+DO_3OP_PAIR(gvec_ah_fmaxp_s, helper_vfp_ah_maxs, float32, H4)
98
+DO_3OP_PAIR(gvec_ah_fmaxp_d, helper_vfp_ah_maxd, float64, )
99
+
100
+DO_3OP_PAIR(gvec_ah_fminp_h, helper_vfp_ah_minh, float16, H2)
101
+DO_3OP_PAIR(gvec_ah_fminp_s, helper_vfp_ah_mins, float32, H4)
102
+DO_3OP_PAIR(gvec_ah_fminp_d, helper_vfp_ah_mind, float64, )
103
+#endif
104
+
105
#undef DO_3OP_PAIR
106
107
#define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \
108
--
109
2.34.1
diff view generated by jsdifflib
New patch
1
Implement the FPCR.AH semantics for the SVE FMAXV and FMINV
2
vector-reduction-to-scalar max/min operations.
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/arm/tcg/helper-sve.h | 14 +++++++++++
8
target/arm/tcg/sve_helper.c | 43 +++++++++++++++++++++-------------
9
target/arm/tcg/translate-sve.c | 16 +++++++++++--
10
3 files changed, 55 insertions(+), 18 deletions(-)
11
12
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/tcg/helper-sve.h
15
+++ b/target/arm/tcg/helper-sve.h
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fminv_s, TCG_CALL_NO_RWG,
17
DEF_HELPER_FLAGS_4(sve_fminv_d, TCG_CALL_NO_RWG,
18
i64, ptr, ptr, fpst, i32)
19
20
+DEF_HELPER_FLAGS_4(sve_ah_fmaxv_h, TCG_CALL_NO_RWG,
21
+ i64, ptr, ptr, fpst, i32)
22
+DEF_HELPER_FLAGS_4(sve_ah_fmaxv_s, TCG_CALL_NO_RWG,
23
+ i64, ptr, ptr, fpst, i32)
24
+DEF_HELPER_FLAGS_4(sve_ah_fmaxv_d, TCG_CALL_NO_RWG,
25
+ i64, ptr, ptr, fpst, i32)
26
+
27
+DEF_HELPER_FLAGS_4(sve_ah_fminv_h, TCG_CALL_NO_RWG,
28
+ i64, ptr, ptr, fpst, i32)
29
+DEF_HELPER_FLAGS_4(sve_ah_fminv_s, TCG_CALL_NO_RWG,
30
+ i64, ptr, ptr, fpst, i32)
31
+DEF_HELPER_FLAGS_4(sve_ah_fminv_d, TCG_CALL_NO_RWG,
32
+ i64, ptr, ptr, fpst, i32)
33
+
34
DEF_HELPER_FLAGS_5(sve_fadda_h, TCG_CALL_NO_RWG,
35
i64, i64, ptr, ptr, fpst, i32)
36
DEF_HELPER_FLAGS_5(sve_fadda_s, TCG_CALL_NO_RWG,
37
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/target/arm/tcg/sve_helper.c
40
+++ b/target/arm/tcg/sve_helper.c
41
@@ -XXX,XX +XXX,XX @@ static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \
42
uintptr_t half = n / 2; \
43
TYPE lo = NAME##_reduce(data, status, half); \
44
TYPE hi = NAME##_reduce(data + half, status, half); \
45
- return TYPE##_##FUNC(lo, hi, status); \
46
+ return FUNC(lo, hi, status); \
47
} \
48
} \
49
uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \
50
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \
51
return NAME##_reduce(data, s, maxsz / sizeof(TYPE)); \
52
}
53
54
-DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero)
55
-DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero)
56
-DO_REDUCE(sve_faddv_d, float64, H1_8, add, float64_zero)
57
+DO_REDUCE(sve_faddv_h, float16, H1_2, float16_add, float16_zero)
58
+DO_REDUCE(sve_faddv_s, float32, H1_4, float32_add, float32_zero)
59
+DO_REDUCE(sve_faddv_d, float64, H1_8, float64_add, float64_zero)
60
61
/* Identity is floatN_default_nan, without the function call. */
62
-DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00)
63
-DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000)
64
-DO_REDUCE(sve_fminnmv_d, float64, H1_8, minnum, 0x7FF8000000000000ULL)
65
+DO_REDUCE(sve_fminnmv_h, float16, H1_2, float16_minnum, 0x7E00)
66
+DO_REDUCE(sve_fminnmv_s, float32, H1_4, float32_minnum, 0x7FC00000)
67
+DO_REDUCE(sve_fminnmv_d, float64, H1_8, float64_minnum, 0x7FF8000000000000ULL)
68
69
-DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00)
70
-DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000)
71
-DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, maxnum, 0x7FF8000000000000ULL)
72
+DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, float16_maxnum, 0x7E00)
73
+DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, float32_maxnum, 0x7FC00000)
74
+DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, float64_maxnum, 0x7FF8000000000000ULL)
75
76
-DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity)
77
-DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity)
78
-DO_REDUCE(sve_fminv_d, float64, H1_8, min, float64_infinity)
79
+DO_REDUCE(sve_fminv_h, float16, H1_2, float16_min, float16_infinity)
80
+DO_REDUCE(sve_fminv_s, float32, H1_4, float32_min, float32_infinity)
81
+DO_REDUCE(sve_fminv_d, float64, H1_8, float64_min, float64_infinity)
82
83
-DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity))
84
-DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity))
85
-DO_REDUCE(sve_fmaxv_d, float64, H1_8, max, float64_chs(float64_infinity))
86
+DO_REDUCE(sve_fmaxv_h, float16, H1_2, float16_max, float16_chs(float16_infinity))
87
+DO_REDUCE(sve_fmaxv_s, float32, H1_4, float32_max, float32_chs(float32_infinity))
88
+DO_REDUCE(sve_fmaxv_d, float64, H1_8, float64_max, float64_chs(float64_infinity))
89
+
90
+DO_REDUCE(sve_ah_fminv_h, float16, H1_2, helper_vfp_ah_minh, float16_infinity)
91
+DO_REDUCE(sve_ah_fminv_s, float32, H1_4, helper_vfp_ah_mins, float32_infinity)
92
+DO_REDUCE(sve_ah_fminv_d, float64, H1_8, helper_vfp_ah_mind, float64_infinity)
93
+
94
+DO_REDUCE(sve_ah_fmaxv_h, float16, H1_2, helper_vfp_ah_maxh,
95
+ float16_chs(float16_infinity))
96
+DO_REDUCE(sve_ah_fmaxv_s, float32, H1_4, helper_vfp_ah_maxs,
97
+ float32_chs(float32_infinity))
98
+DO_REDUCE(sve_ah_fmaxv_d, float64, H1_8, helper_vfp_ah_maxd,
99
+ float64_chs(float64_infinity))
100
101
#undef DO_REDUCE
102
103
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
104
index XXXXXXX..XXXXXXX 100644
105
--- a/target/arm/tcg/translate-sve.c
106
+++ b/target/arm/tcg/translate-sve.c
107
@@ -XXX,XX +XXX,XX @@ static bool do_reduce(DisasContext *s, arg_rpr_esz *a,
108
}; \
109
TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz])
110
111
+#define DO_VPZ_AH(NAME, name) \
112
+ static gen_helper_fp_reduce * const name##_fns[4] = { \
113
+ NULL, gen_helper_sve_##name##_h, \
114
+ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
115
+ }; \
116
+ static gen_helper_fp_reduce * const name##_ah_fns[4] = { \
117
+ NULL, gen_helper_sve_ah_##name##_h, \
118
+ gen_helper_sve_ah_##name##_s, gen_helper_sve_ah_##name##_d, \
119
+ }; \
120
+ TRANS_FEAT(NAME, aa64_sve, do_reduce, a, \
121
+ s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz])
122
+
123
DO_VPZ(FADDV, faddv)
124
DO_VPZ(FMINNMV, fminnmv)
125
DO_VPZ(FMAXNMV, fmaxnmv)
126
-DO_VPZ(FMINV, fminv)
127
-DO_VPZ(FMAXV, fmaxv)
128
+DO_VPZ_AH(FMINV, fminv)
129
+DO_VPZ_AH(FMAXV, fmaxv)
130
131
#undef DO_VPZ
132
133
--
134
2.34.1
diff view generated by jsdifflib
New patch
1
Implement the FPCR.AH semantics for the SVE FMAX and FMIN operations
2
that take an immediate as the second operand.
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/arm/tcg/helper-sve.h | 14 ++++++++++++++
8
target/arm/tcg/sve_helper.c | 8 ++++++++
9
target/arm/tcg/translate-sve.c | 25 +++++++++++++++++++++++--
10
3 files changed, 45 insertions(+), 2 deletions(-)
11
12
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/tcg/helper-sve.h
15
+++ b/target/arm/tcg/helper-sve.h
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fmins_s, TCG_CALL_NO_RWG,
17
DEF_HELPER_FLAGS_6(sve_fmins_d, TCG_CALL_NO_RWG,
18
void, ptr, ptr, ptr, i64, fpst, i32)
19
20
+DEF_HELPER_FLAGS_6(sve_ah_fmaxs_h, TCG_CALL_NO_RWG,
21
+ void, ptr, ptr, ptr, i64, fpst, i32)
22
+DEF_HELPER_FLAGS_6(sve_ah_fmaxs_s, TCG_CALL_NO_RWG,
23
+ void, ptr, ptr, ptr, i64, fpst, i32)
24
+DEF_HELPER_FLAGS_6(sve_ah_fmaxs_d, TCG_CALL_NO_RWG,
25
+ void, ptr, ptr, ptr, i64, fpst, i32)
26
+
27
+DEF_HELPER_FLAGS_6(sve_ah_fmins_h, TCG_CALL_NO_RWG,
28
+ void, ptr, ptr, ptr, i64, fpst, i32)
29
+DEF_HELPER_FLAGS_6(sve_ah_fmins_s, TCG_CALL_NO_RWG,
30
+ void, ptr, ptr, ptr, i64, fpst, i32)
31
+DEF_HELPER_FLAGS_6(sve_ah_fmins_d, TCG_CALL_NO_RWG,
32
+ void, ptr, ptr, ptr, i64, fpst, i32)
33
+
34
DEF_HELPER_FLAGS_5(sve_fcvt_sh, TCG_CALL_NO_RWG,
35
void, ptr, ptr, ptr, fpst, i32)
36
DEF_HELPER_FLAGS_5(sve_fcvt_dh, TCG_CALL_NO_RWG,
37
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/target/arm/tcg/sve_helper.c
40
+++ b/target/arm/tcg/sve_helper.c
41
@@ -XXX,XX +XXX,XX @@ DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min)
42
DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min)
43
DO_ZPZS_FP(sve_fmins_d, float64, H1_8, float64_min)
44
45
+DO_ZPZS_FP(sve_ah_fmaxs_h, float16, H1_2, helper_vfp_ah_maxh)
46
+DO_ZPZS_FP(sve_ah_fmaxs_s, float32, H1_4, helper_vfp_ah_maxs)
47
+DO_ZPZS_FP(sve_ah_fmaxs_d, float64, H1_8, helper_vfp_ah_maxd)
48
+
49
+DO_ZPZS_FP(sve_ah_fmins_h, float16, H1_2, helper_vfp_ah_minh)
50
+DO_ZPZS_FP(sve_ah_fmins_s, float32, H1_4, helper_vfp_ah_mins)
51
+DO_ZPZS_FP(sve_ah_fmins_d, float64, H1_8, helper_vfp_ah_mind)
52
+
53
/* Fully general two-operand expander, controlled by a predicate,
54
* With the extra float_status parameter.
55
*/
56
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/target/arm/tcg/translate-sve.c
59
+++ b/target/arm/tcg/translate-sve.c
60
@@ -XXX,XX +XXX,XX @@ static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
61
TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \
62
name##_const[a->esz][a->imm], name##_fns[a->esz])
63
64
+#define DO_FP_AH_IMM(NAME, name, const0, const1) \
65
+ static gen_helper_sve_fp2scalar * const name##_fns[4] = { \
66
+ NULL, gen_helper_sve_##name##_h, \
67
+ gen_helper_sve_##name##_s, \
68
+ gen_helper_sve_##name##_d \
69
+ }; \
70
+ static gen_helper_sve_fp2scalar * const name##_ah_fns[4] = { \
71
+ NULL, gen_helper_sve_ah_##name##_h, \
72
+ gen_helper_sve_ah_##name##_s, \
73
+ gen_helper_sve_ah_##name##_d \
74
+ }; \
75
+ static uint64_t const name##_const[4][2] = { \
76
+ { -1, -1 }, \
77
+ { float16_##const0, float16_##const1 }, \
78
+ { float32_##const0, float32_##const1 }, \
79
+ { float64_##const0, float64_##const1 }, \
80
+ }; \
81
+ TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \
82
+ name##_const[a->esz][a->imm], \
83
+ s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz])
84
+
85
DO_FP_IMM(FADD, fadds, half, one)
86
DO_FP_IMM(FSUB, fsubs, half, one)
87
DO_FP_IMM(FMUL, fmuls, half, two)
88
DO_FP_IMM(FSUBR, fsubrs, half, one)
89
DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
90
DO_FP_IMM(FMINNM, fminnms, zero, one)
91
-DO_FP_IMM(FMAX, fmaxs, zero, one)
92
-DO_FP_IMM(FMIN, fmins, zero, one)
93
+DO_FP_AH_IMM(FMAX, fmaxs, zero, one)
94
+DO_FP_AH_IMM(FMIN, fmins, zero, one)
95
96
#undef DO_FP_IMM
97
98
--
99
2.34.1
diff view generated by jsdifflib
New patch
1
Implement the FPCR.AH semantics for the SVE FMAX and FMIN
2
operations that take two vector operands.
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/arm/tcg/helper-sve.h | 14 ++++++++++++++
8
target/arm/tcg/sve_helper.c | 8 ++++++++
9
target/arm/tcg/translate-sve.c | 17 +++++++++++++++--
10
3 files changed, 37 insertions(+), 2 deletions(-)
11
12
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/tcg/helper-sve.h
15
+++ b/target/arm/tcg/helper-sve.h
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fmax_s, TCG_CALL_NO_RWG,
17
DEF_HELPER_FLAGS_6(sve_fmax_d, TCG_CALL_NO_RWG,
18
void, ptr, ptr, ptr, ptr, fpst, i32)
19
20
+DEF_HELPER_FLAGS_6(sve_ah_fmin_h, TCG_CALL_NO_RWG,
21
+ void, ptr, ptr, ptr, ptr, fpst, i32)
22
+DEF_HELPER_FLAGS_6(sve_ah_fmin_s, TCG_CALL_NO_RWG,
23
+ void, ptr, ptr, ptr, ptr, fpst, i32)
24
+DEF_HELPER_FLAGS_6(sve_ah_fmin_d, TCG_CALL_NO_RWG,
25
+ void, ptr, ptr, ptr, ptr, fpst, i32)
26
+
27
+DEF_HELPER_FLAGS_6(sve_ah_fmax_h, TCG_CALL_NO_RWG,
28
+ void, ptr, ptr, ptr, ptr, fpst, i32)
29
+DEF_HELPER_FLAGS_6(sve_ah_fmax_s, TCG_CALL_NO_RWG,
30
+ void, ptr, ptr, ptr, ptr, fpst, i32)
31
+DEF_HELPER_FLAGS_6(sve_ah_fmax_d, TCG_CALL_NO_RWG,
32
+ void, ptr, ptr, ptr, ptr, fpst, i32)
33
+
34
DEF_HELPER_FLAGS_6(sve_fminnum_h, TCG_CALL_NO_RWG,
35
void, ptr, ptr, ptr, ptr, fpst, i32)
36
DEF_HELPER_FLAGS_6(sve_fminnum_s, TCG_CALL_NO_RWG,
37
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/target/arm/tcg/sve_helper.c
40
+++ b/target/arm/tcg/sve_helper.c
41
@@ -XXX,XX +XXX,XX @@ DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max)
42
DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max)
43
DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max)
44
45
+DO_ZPZZ_FP(sve_ah_fmin_h, uint16_t, H1_2, helper_vfp_ah_minh)
46
+DO_ZPZZ_FP(sve_ah_fmin_s, uint32_t, H1_4, helper_vfp_ah_mins)
47
+DO_ZPZZ_FP(sve_ah_fmin_d, uint64_t, H1_8, helper_vfp_ah_mind)
48
+
49
+DO_ZPZZ_FP(sve_ah_fmax_h, uint16_t, H1_2, helper_vfp_ah_maxh)
50
+DO_ZPZZ_FP(sve_ah_fmax_s, uint32_t, H1_4, helper_vfp_ah_maxs)
51
+DO_ZPZZ_FP(sve_ah_fmax_d, uint64_t, H1_8, helper_vfp_ah_maxd)
52
+
53
DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum)
54
DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum)
55
DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum)
56
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/target/arm/tcg/translate-sve.c
59
+++ b/target/arm/tcg/translate-sve.c
60
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
61
}; \
62
TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a)
63
64
+#define DO_ZPZZ_AH_FP(NAME, FEAT, name, ah_name) \
65
+ static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \
66
+ NULL, gen_helper_##name##_h, \
67
+ gen_helper_##name##_s, gen_helper_##name##_d \
68
+ }; \
69
+ static gen_helper_gvec_4_ptr * const name##_ah_zpzz_fns[4] = { \
70
+ NULL, gen_helper_##ah_name##_h, \
71
+ gen_helper_##ah_name##_s, gen_helper_##ah_name##_d \
72
+ }; \
73
+ TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, \
74
+ s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \
75
+ name##_zpzz_fns[a->esz], a)
76
+
77
DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd)
78
DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub)
79
DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul)
80
-DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin)
81
-DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax)
82
+DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin)
83
+DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax)
84
DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum)
85
DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
86
DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd)
87
--
88
2.34.1
diff view generated by jsdifflib
1
From: Rayhan Faizel <rayhan.faizel@gmail.com>
1
FPCR.AH == 1 mandates that negation of a NaN value should not flip
2
2
its sign bit. This means we can no longer use gen_vfp_neg*()
3
This patch implements a 32 half word FIFO as per imx serial device
3
everywhere but must instead generate slightly more complex code when
4
specifications. If a non empty FIFO is below the trigger level, an
4
FPCR.AH is set.
5
ageing timer will tick for a duration of 8 characters. On expiry,
5
6
AGTIM will be set triggering an interrupt. AGTIM timer resets when
6
Make this change for the scalar FNEG and for those places in
7
there is activity in the receive FIFO.
7
translate-a64.c which were previously directly calling
8
8
gen_vfp_neg*().
9
Otherwise, RRDY is set when trigger level is exceeded. The receive
9
10
trigger level is 8 in newer kernel versions and 1 in older ones.
10
This change in semantics also affects any other instruction whose
11
11
pseudocode calls FPNeg(); in following commits we extend this
12
This change will break migration compatibility for the imx boards.
12
change to the other affected instructions.
13
13
14
Signed-off-by: Rayhan Faizel <rayhan.faizel@gmail.com>
15
Message-id: 20240125151931.83494-1-rayhan.faizel@gmail.com
16
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
17
[PMM: commit message tidyups]
18
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
19
---
16
---
20
include/hw/char/imx_serial.h | 20 ++++++-
17
target/arm/tcg/translate-a64.c | 125 ++++++++++++++++++++++++++++++---
21
hw/char/imx_serial.c | 102 ++++++++++++++++++++++++++++++-----
18
1 file changed, 114 insertions(+), 11 deletions(-)
22
2 files changed, 108 insertions(+), 14 deletions(-)
19
23
20
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
24
diff --git a/include/hw/char/imx_serial.h b/include/hw/char/imx_serial.h
25
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
26
--- a/include/hw/char/imx_serial.h
22
--- a/target/arm/tcg/translate-a64.c
27
+++ b/include/hw/char/imx_serial.h
23
+++ b/target/arm/tcg/translate-a64.c
28
@@ -XXX,XX +XXX,XX @@
24
@@ -XXX,XX +XXX,XX @@ static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
29
#include "hw/sysbus.h"
25
is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
30
#include "chardev/char-fe.h"
31
#include "qom/object.h"
32
+#include "qemu/fifo32.h"
33
34
#define TYPE_IMX_SERIAL "imx.serial"
35
OBJECT_DECLARE_SIMPLE_TYPE(IMXSerialState, IMX_SERIAL)
36
37
+#define FIFO_SIZE 32
38
+
39
#define URXD_CHARRDY (1<<15) /* character read is valid */
40
#define URXD_ERR (1<<14) /* Character has error */
41
+#define URXD_OVRRUN (1<<13) /* 32nd character in RX FIFO */
42
#define URXD_FRMERR (1<<12) /* Character has frame error */
43
#define URXD_BRK (1<<11) /* Break received */
44
45
@@ -XXX,XX +XXX,XX @@ OBJECT_DECLARE_SIMPLE_TYPE(IMXSerialState, IMX_SERIAL)
46
#define UCR1_TXMPTYEN (1<<6) /* Tx Empty Interrupt Enable */
47
#define UCR1_UARTEN (1<<0) /* UART Enable */
48
49
+#define UCR2_ATEN (1<<3) /* Ageing Timer Enable */
50
#define UCR2_TXEN (1<<2) /* Transmitter enable */
51
#define UCR2_RXEN (1<<1) /* Receiver enable */
52
#define UCR2_SRST (1<<0) /* Reset complete */
53
54
#define UCR4_DREN BIT(0) /* Receive Data Ready interrupt enable */
55
+#define UCR4_OREN BIT(1) /* Overrun interrupt enable */
56
#define UCR4_TCEN BIT(3) /* TX complete interrupt enable */
57
#define UCR4_WKEN BIT(7) /* WAKE interrupt enable */
58
59
@@ -XXX,XX +XXX,XX @@ OBJECT_DECLARE_SIMPLE_TYPE(IMXSerialState, IMX_SERIAL)
60
#define UTS1_TXFULL (1<<4)
61
#define UTS1_RXFULL (1<<3)
62
63
+#define TL_MASK 0x3f
64
+
65
+ /* Bit time in nanoseconds assuming maximum baud rate of 115200 */
66
+#define BIT_TIME_NS 8681
67
+
68
+/* Assume 8 bits per character */
69
+#define NUM_BITS 8
70
+
71
+/* Ageing timer triggers after 8 characters */
72
+#define AGE_DURATION_NS (8 * NUM_BITS * BIT_TIME_NS)
73
+
74
struct IMXSerialState {
75
/*< private >*/
76
SysBusDevice parent_obj;
77
78
/*< public >*/
79
MemoryRegion iomem;
80
- int32_t readbuff;
81
+ QEMUTimer ageing_timer;
82
+ Fifo32 rx_fifo;
83
84
uint32_t usr1;
85
uint32_t usr2;
86
diff --git a/hw/char/imx_serial.c b/hw/char/imx_serial.c
87
index XXXXXXX..XXXXXXX 100644
88
--- a/hw/char/imx_serial.c
89
+++ b/hw/char/imx_serial.c
90
@@ -XXX,XX +XXX,XX @@
91
#include "migration/vmstate.h"
92
#include "qemu/log.h"
93
#include "qemu/module.h"
94
+#include "qemu/fifo32.h"
95
96
#ifndef DEBUG_IMX_UART
97
#define DEBUG_IMX_UART 0
98
@@ -XXX,XX +XXX,XX @@
99
100
static const VMStateDescription vmstate_imx_serial = {
101
.name = TYPE_IMX_SERIAL,
102
- .version_id = 2,
103
- .minimum_version_id = 2,
104
+ .version_id = 3,
105
+ .minimum_version_id = 3,
106
.fields = (const VMStateField[]) {
107
- VMSTATE_INT32(readbuff, IMXSerialState),
108
+ VMSTATE_FIFO32(rx_fifo, IMXSerialState),
109
+ VMSTATE_TIMER(ageing_timer, IMXSerialState),
110
VMSTATE_UINT32(usr1, IMXSerialState),
111
VMSTATE_UINT32(usr2, IMXSerialState),
112
VMSTATE_UINT32(ucr1, IMXSerialState),
113
@@ -XXX,XX +XXX,XX @@ static void imx_update(IMXSerialState *s)
114
* following:
115
*/
116
usr1 = s->usr1 & s->ucr1 & (USR1_TRDY | USR1_RRDY);
117
+ /*
118
+ * Interrupt if AGTIM is set (ageing timer interrupt in RxFIFO)
119
+ */
120
+ usr1 |= (s->ucr2 & UCR2_ATEN) ? (s->usr1 & USR1_AGTIM) : 0;
121
/*
122
* Bits that we want in USR2 are not as conveniently laid out,
123
* unfortunately.
124
@@ -XXX,XX +XXX,XX @@ static void imx_update(IMXSerialState *s)
125
mask = (s->ucr1 & UCR1_TXMPTYEN) ? USR2_TXFE : 0;
126
/*
127
* TCEN and TXDC are both bit 3
128
+ * ORE and OREN are both bit 1
129
* RDR and DREN are both bit 0
130
*/
131
- mask |= s->ucr4 & (UCR4_WKEN | UCR4_TCEN | UCR4_DREN);
132
+ mask |= s->ucr4 & (UCR4_WKEN | UCR4_TCEN | UCR4_DREN | UCR4_OREN);
133
134
usr2 = s->usr2 & mask;
135
136
qemu_set_irq(s->irq, usr1 || usr2);
137
}
26
}
138
27
139
+static void imx_serial_rx_fifo_push(IMXSerialState *s, uint32_t value)
28
+/*
140
+{
29
+ * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN.
141
+ uint32_t pushed_value = value;
30
+ * These functions implement
142
+ if (fifo32_is_full(&s->rx_fifo)) {
31
+ * d = floatN_is_any_nan(s) ? s : floatN_chs(s)
143
+ /* Set ORE if FIFO is already full */
32
+ * which for float32 is
144
+ s->usr2 |= USR2_ORE;
33
+ * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31))
34
+ * and similarly for the other float sizes.
35
+ */
36
+static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s)
37
+{
38
+ TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
39
+
40
+ gen_vfp_negh(chs_s, s);
41
+ gen_vfp_absh(abs_s, s);
42
+ tcg_gen_movcond_i32(TCG_COND_GTU, d,
43
+ abs_s, tcg_constant_i32(0x7c00),
44
+ s, chs_s);
45
+}
46
+
47
+static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s)
48
+{
49
+ TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
50
+
51
+ gen_vfp_negs(chs_s, s);
52
+ gen_vfp_abss(abs_s, s);
53
+ tcg_gen_movcond_i32(TCG_COND_GTU, d,
54
+ abs_s, tcg_constant_i32(0x7f800000UL),
55
+ s, chs_s);
56
+}
57
+
58
+static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s)
59
+{
60
+ TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64();
61
+
62
+ gen_vfp_negd(chs_s, s);
63
+ gen_vfp_absd(abs_s, s);
64
+ tcg_gen_movcond_i64(TCG_COND_GTU, d,
65
+ abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
66
+ s, chs_s);
67
+}
68
+
69
+static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
70
+{
71
+ if (dc->fpcr_ah) {
72
+ gen_vfp_ah_negh(d, s);
145
+ } else {
73
+ } else {
146
+ if (fifo32_num_used(&s->rx_fifo) == FIFO_SIZE - 1) {
74
+ gen_vfp_negh(d, s);
147
+ /* Set OVRRUN on 32nd character in FIFO */
148
+ pushed_value |= URXD_ERR | URXD_OVRRUN;
149
+ }
150
+ fifo32_push(&s->rx_fifo, pushed_value);
151
+ }
75
+ }
152
+}
76
+}
153
+
77
+
154
+static uint32_t imx_serial_rx_fifo_pop(IMXSerialState *s)
78
+static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
155
+{
79
+{
156
+ if (fifo32_is_empty(&s->rx_fifo)) {
80
+ if (dc->fpcr_ah) {
157
+ return 0;
81
+ gen_vfp_ah_negs(d, s);
82
+ } else {
83
+ gen_vfp_negs(d, s);
158
+ }
84
+ }
159
+ return fifo32_pop(&s->rx_fifo);
85
+}
160
+}
86
+
161
+
87
+static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s)
162
+static void imx_serial_rx_fifo_ageing_timer_int(void *opaque)
88
+{
163
+{
89
+ if (dc->fpcr_ah) {
164
+ IMXSerialState *s = (IMXSerialState *) opaque;
90
+ gen_vfp_ah_negd(d, s);
165
+ s->usr1 |= USR1_AGTIM;
166
+ imx_update(s);
167
+}
168
+
169
+static void imx_serial_rx_fifo_ageing_timer_restart(void *opaque)
170
+{
171
+ /*
172
+ * Ageing timer starts ticking when
173
+ * RX FIFO is non empty and below trigger level.
174
+ * Timer is reset if new character is received or
175
+ * a FIFO read occurs.
176
+ * Timer triggers an interrupt when duration of
177
+ * 8 characters has passed (assuming 115200 baudrate).
178
+ */
179
+ IMXSerialState *s = (IMXSerialState *) opaque;
180
+
181
+ if (!(s->usr1 & USR1_RRDY) && !(s->uts1 & UTS1_RXEMPTY)) {
182
+ timer_mod_ns(&s->ageing_timer,
183
+ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + AGE_DURATION_NS);
184
+ } else {
91
+ } else {
185
+ timer_del(&s->ageing_timer);
92
+ gen_vfp_negd(d, s);
186
+ }
93
+ }
187
+}
94
+}
188
+
95
+
189
static void imx_serial_reset(IMXSerialState *s)
96
/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
190
{
97
* than the 32 bit equivalent.
191
98
*/
192
@@ -XXX,XX +XXX,XX @@ static void imx_serial_reset(IMXSerialState *s)
99
@@ -XXX,XX +XXX,XX @@ static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
193
s->ucr3 = 0x700;
100
gen_vfp_negd(d, d);
194
s->ubmr = 0;
195
s->ubrc = 4;
196
- s->readbuff = URXD_ERR;
197
+
198
+ fifo32_reset(&s->rx_fifo);
199
+ timer_del(&s->ageing_timer);
200
}
101
}
201
102
202
static void imx_serial_reset_at_boot(DeviceState *dev)
103
+static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
203
@@ -XXX,XX +XXX,XX @@ static uint64_t imx_serial_read(void *opaque, hwaddr offset,
104
+{
204
unsigned size)
105
+ gen_helper_vfp_mulh(d, n, m, s);
205
{
106
+ gen_vfp_ah_negh(d, d);
206
IMXSerialState *s = (IMXSerialState *)opaque;
107
+}
207
- uint32_t c;
108
+
208
+ uint32_t c, rx_used;
109
+static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
209
+ uint8_t rxtl = s->ufcr & TL_MASK;
110
+{
210
111
+ gen_helper_vfp_muls(d, n, m, s);
211
DPRINTF("read(offset=0x%" HWADDR_PRIx ")\n", offset);
112
+ gen_vfp_ah_negs(d, d);
212
113
+}
213
switch (offset >> 2) {
114
+
214
case 0x0: /* URXD */
115
+static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
215
- c = s->readbuff;
116
+{
216
+ c = imx_serial_rx_fifo_pop(s);
117
+ gen_helper_vfp_muld(d, n, m, s);
217
if (!(s->uts1 & UTS1_RXEMPTY)) {
118
+ gen_vfp_ah_negd(d, d);
218
/* Character is valid */
119
+}
219
c |= URXD_CHARRDY;
120
+
220
- s->usr1 &= ~USR1_RRDY;
121
static const FPScalar f_scalar_fnmul = {
221
- s->usr2 &= ~USR2_RDR;
122
gen_fnmul_h,
222
- s->uts1 |= UTS1_RXEMPTY;
123
gen_fnmul_s,
223
+ rx_used = fifo32_num_used(&s->rx_fifo);
124
gen_fnmul_d,
224
+ /* Clear RRDY if below threshold */
125
};
225
+ if (rx_used < rxtl) {
126
-TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul, a->rn)
226
+ s->usr1 &= ~USR1_RRDY;
127
+static const FPScalar f_scalar_ah_fnmul = {
227
+ }
128
+ gen_fnmul_ah_h,
228
+ if (rx_used == 0) {
129
+ gen_fnmul_ah_s,
229
+ s->usr2 &= ~USR2_RDR;
130
+ gen_fnmul_ah_d,
230
+ s->uts1 |= UTS1_RXEMPTY;
131
+};
231
+ }
132
+TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn)
232
imx_update(s);
133
233
+ imx_serial_rx_fifo_ageing_timer_restart(s);
134
static const FPScalar f_scalar_fcmeq = {
234
qemu_chr_fe_accept_input(&s->chr);
135
gen_helper_advsimd_ceq_f16,
235
}
136
@@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
236
return c;
137
237
@@ -XXX,XX +XXX,XX @@ static void imx_serial_write(void *opaque, hwaddr offset,
138
read_vec_element(s, t2, a->rm, a->idx, MO_64);
238
static int imx_can_receive(void *opaque)
139
if (neg) {
239
{
140
- gen_vfp_negd(t1, t1);
240
IMXSerialState *s = (IMXSerialState *)opaque;
141
+ gen_vfp_maybe_ah_negd(s, t1, t1);
241
- return !(s->usr1 & USR1_RRDY);
142
}
242
+ return s->ucr2 & UCR2_RXEN && fifo32_num_used(&s->rx_fifo) < FIFO_SIZE;
143
gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
144
write_fp_dreg_merging(s, a->rd, a->rd, t0);
145
@@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
146
147
read_vec_element_i32(s, t2, a->rm, a->idx, MO_32);
148
if (neg) {
149
- gen_vfp_negs(t1, t1);
150
+ gen_vfp_maybe_ah_negs(s, t1, t1);
151
}
152
gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
153
write_fp_sreg_merging(s, a->rd, a->rd, t0);
154
@@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
155
156
read_vec_element_i32(s, t2, a->rm, a->idx, MO_16);
157
if (neg) {
158
- gen_vfp_negh(t1, t1);
159
+ gen_vfp_maybe_ah_negh(s, t1, t1);
160
}
161
gen_helper_advsimd_muladdh(t0, t1, t2, t0,
162
fpstatus_ptr(FPST_A64_F16));
163
@@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
164
TCGv_i64 ta = read_fp_dreg(s, a->ra);
165
166
if (neg_a) {
167
- gen_vfp_negd(ta, ta);
168
+ gen_vfp_maybe_ah_negd(s, ta, ta);
169
}
170
if (neg_n) {
171
- gen_vfp_negd(tn, tn);
172
+ gen_vfp_maybe_ah_negd(s, tn, tn);
173
}
174
fpst = fpstatus_ptr(FPST_A64);
175
gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
176
@@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
177
TCGv_i32 ta = read_fp_sreg(s, a->ra);
178
179
if (neg_a) {
180
- gen_vfp_negs(ta, ta);
181
+ gen_vfp_maybe_ah_negs(s, ta, ta);
182
}
183
if (neg_n) {
184
- gen_vfp_negs(tn, tn);
185
+ gen_vfp_maybe_ah_negs(s, tn, tn);
186
}
187
fpst = fpstatus_ptr(FPST_A64);
188
gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
189
@@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
190
TCGv_i32 ta = read_fp_hreg(s, a->ra);
191
192
if (neg_a) {
193
- gen_vfp_negh(ta, ta);
194
+ gen_vfp_maybe_ah_negh(s, ta, ta);
195
}
196
if (neg_n) {
197
- gen_vfp_negh(tn, tn);
198
+ gen_vfp_maybe_ah_negh(s, tn, tn);
199
}
200
fpst = fpstatus_ptr(FPST_A64_F16);
201
gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
202
@@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a,
203
return true;
243
}
204
}
244
205
245
static void imx_put_data(void *opaque, uint32_t value)
206
+static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a,
246
{
207
+ const FPScalar1Int *fnormal,
247
IMXSerialState *s = (IMXSerialState *)opaque;
208
+ const FPScalar1Int *fah)
248
+ uint8_t rxtl = s->ufcr & TL_MASK;
209
+{
249
210
+ return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true);
250
DPRINTF("received char\n");
211
+}
251
+ imx_serial_rx_fifo_push(s, value);
212
+
252
+ if (fifo32_num_used(&s->rx_fifo) >= rxtl) {
213
static const FPScalar1Int f_scalar_fmov = {
253
+ s->usr1 |= USR1_RRDY;
214
tcg_gen_mov_i32,
254
+ }
215
tcg_gen_mov_i32,
255
+
216
@@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fneg = {
256
+ imx_serial_rx_fifo_ageing_timer_restart(s);
217
gen_vfp_negs,
257
218
gen_vfp_negd,
258
- s->usr1 |= USR1_RRDY;
219
};
259
s->usr2 |= USR2_RDR;
220
-TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg, true)
260
s->uts1 &= ~UTS1_RXEMPTY;
221
+static const FPScalar1Int f_scalar_ah_fneg = {
261
- s->readbuff = value;
222
+ gen_vfp_ah_negh,
262
if (value & URXD_BRK) {
223
+ gen_vfp_ah_negs,
263
s->usr2 |= USR2_BRCD;
224
+ gen_vfp_ah_negd,
264
}
225
+};
265
@@ -XXX,XX +XXX,XX @@ static void imx_serial_realize(DeviceState *dev, Error **errp)
226
+TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg)
266
{
227
267
IMXSerialState *s = IMX_SERIAL(dev);
228
typedef struct FPScalar1 {
268
229
void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr);
269
+ fifo32_create(&s->rx_fifo, FIFO_SIZE);
270
+ timer_init_ns(&s->ageing_timer, QEMU_CLOCK_VIRTUAL,
271
+ imx_serial_rx_fifo_ageing_timer_int, s);
272
+
273
DPRINTF("char dev for uart: %p\n", qemu_chr_fe_get_driver(&s->chr));
274
275
qemu_chr_fe_set_handlers(&s->chr, imx_can_receive, imx_receive,
276
--
230
--
277
2.34.1
231
2.34.1
diff view generated by jsdifflib
New patch
1
FPCR.AH == 1 mandates that taking the absolute value of a NaN should
2
not change its sign bit. This means we can no longer use
3
gen_vfp_abs*() everywhere but must instead generate slightly more
4
complex code when FPCR.AH is set.
1
5
6
Implement these semantics for scalar FABS and FABD. This change also
7
affects all other instructions whose psuedocode calls FPAbs(); we
8
will extend the change to those instructions in following commits.
9
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
target/arm/tcg/translate-a64.c | 69 +++++++++++++++++++++++++++++++++-
14
1 file changed, 67 insertions(+), 2 deletions(-)
15
16
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/tcg/translate-a64.c
19
+++ b/target/arm/tcg/translate-a64.c
20
@@ -XXX,XX +XXX,XX @@ static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s)
21
s, chs_s);
22
}
23
24
+/*
25
+ * These functions implement
26
+ * d = floatN_is_any_nan(s) ? s : floatN_abs(s)
27
+ * which for float32 is
28
+ * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31))
29
+ * and similarly for the other float sizes.
30
+ */
31
+static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s)
32
+{
33
+ TCGv_i32 abs_s = tcg_temp_new_i32();
34
+
35
+ gen_vfp_absh(abs_s, s);
36
+ tcg_gen_movcond_i32(TCG_COND_GTU, d,
37
+ abs_s, tcg_constant_i32(0x7c00),
38
+ s, abs_s);
39
+}
40
+
41
+static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s)
42
+{
43
+ TCGv_i32 abs_s = tcg_temp_new_i32();
44
+
45
+ gen_vfp_abss(abs_s, s);
46
+ tcg_gen_movcond_i32(TCG_COND_GTU, d,
47
+ abs_s, tcg_constant_i32(0x7f800000UL),
48
+ s, abs_s);
49
+}
50
+
51
+static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s)
52
+{
53
+ TCGv_i64 abs_s = tcg_temp_new_i64();
54
+
55
+ gen_vfp_absd(abs_s, s);
56
+ tcg_gen_movcond_i64(TCG_COND_GTU, d,
57
+ abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
58
+ s, abs_s);
59
+}
60
+
61
static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
62
{
63
if (dc->fpcr_ah) {
64
@@ -XXX,XX +XXX,XX @@ static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
65
gen_vfp_absd(d, d);
66
}
67
68
+static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
69
+{
70
+ gen_helper_vfp_subh(d, n, m, s);
71
+ gen_vfp_ah_absh(d, d);
72
+}
73
+
74
+static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
75
+{
76
+ gen_helper_vfp_subs(d, n, m, s);
77
+ gen_vfp_ah_abss(d, d);
78
+}
79
+
80
+static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
81
+{
82
+ gen_helper_vfp_subd(d, n, m, s);
83
+ gen_vfp_ah_absd(d, d);
84
+}
85
+
86
static const FPScalar f_scalar_fabd = {
87
gen_fabd_h,
88
gen_fabd_s,
89
gen_fabd_d,
90
};
91
-TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd, a->rn)
92
+static const FPScalar f_scalar_ah_fabd = {
93
+ gen_fabd_ah_h,
94
+ gen_fabd_ah_s,
95
+ gen_fabd_ah_d,
96
+};
97
+TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn)
98
99
static const FPScalar f_scalar_frecps = {
100
gen_helper_recpsf_f16,
101
@@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fabs = {
102
gen_vfp_abss,
103
gen_vfp_absd,
104
};
105
-TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs, true)
106
+static const FPScalar1Int f_scalar_ah_fabs = {
107
+ gen_vfp_ah_absh,
108
+ gen_vfp_ah_abss,
109
+ gen_vfp_ah_absd,
110
+};
111
+TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs)
112
113
static const FPScalar1Int f_scalar_fneg = {
114
gen_vfp_negh,
115
--
116
2.34.1
diff view generated by jsdifflib
New patch
1
Split the handling of vector FABD so that it calls a different set
2
of helpers when FPCR.AH is 1, which implement the "no negation of
3
the sign of a NaN" semantics.
1
4
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/arm/helper.h | 4 ++++
9
target/arm/tcg/translate-a64.c | 7 ++++++-
10
target/arm/tcg/vec_helper.c | 23 +++++++++++++++++++++++
11
3 files changed, 33 insertions(+), 1 deletion(-)
12
13
diff --git a/target/arm/helper.h b/target/arm/helper.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/helper.h
16
+++ b/target/arm/helper.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
18
DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
19
DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
20
21
+DEF_HELPER_FLAGS_5(gvec_ah_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
22
+DEF_HELPER_FLAGS_5(gvec_ah_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
23
+DEF_HELPER_FLAGS_5(gvec_ah_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
24
+
25
DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
26
DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
27
DEF_HELPER_FLAGS_5(gvec_fceq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
28
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/tcg/translate-a64.c
31
+++ b/target/arm/tcg/translate-a64.c
32
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
33
gen_helper_gvec_fabd_s,
34
gen_helper_gvec_fabd_d,
35
};
36
-TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd)
37
+static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = {
38
+ gen_helper_gvec_ah_fabd_h,
39
+ gen_helper_gvec_ah_fabd_s,
40
+ gen_helper_gvec_ah_fabd_d,
41
+};
42
+TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd)
43
44
static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
45
gen_helper_gvec_recps_h,
46
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/tcg/vec_helper.c
49
+++ b/target/arm/tcg/vec_helper.c
50
@@ -XXX,XX +XXX,XX @@ static float64 float64_abd(float64 op1, float64 op2, float_status *stat)
51
return float64_abs(float64_sub(op1, op2, stat));
52
}
53
54
+/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */
55
+static float16 float16_ah_abd(float16 op1, float16 op2, float_status *stat)
56
+{
57
+ float16 r = float16_sub(op1, op2, stat);
58
+ return float16_is_any_nan(r) ? r : float16_abs(r);
59
+}
60
+
61
+static float32 float32_ah_abd(float32 op1, float32 op2, float_status *stat)
62
+{
63
+ float32 r = float32_sub(op1, op2, stat);
64
+ return float32_is_any_nan(r) ? r : float32_abs(r);
65
+}
66
+
67
+static float64 float64_ah_abd(float64 op1, float64 op2, float_status *stat)
68
+{
69
+ float64 r = float64_sub(op1, op2, stat);
70
+ return float64_is_any_nan(r) ? r : float64_abs(r);
71
+}
72
+
73
/*
74
* Reciprocal step. These are the AArch32 version which uses a
75
* non-fused multiply-and-subtract.
76
@@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_fabd_h, float16_abd, float16)
77
DO_3OP(gvec_fabd_s, float32_abd, float32)
78
DO_3OP(gvec_fabd_d, float64_abd, float64)
79
80
+DO_3OP(gvec_ah_fabd_h, float16_ah_abd, float16)
81
+DO_3OP(gvec_ah_fabd_s, float32_ah_abd, float32)
82
+DO_3OP(gvec_ah_fabd_d, float64_ah_abd, float64)
83
+
84
DO_3OP(gvec_fceq_h, float16_ceq, float16)
85
DO_3OP(gvec_fceq_s, float32_ceq, float32)
86
DO_3OP(gvec_fceq_d, float64_ceq, float64)
87
--
88
2.34.1
diff view generated by jsdifflib
New patch
1
Make SVE FNEG honour the FPCR.AH "don't negate the sign of a NaN"
2
semantics.
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/arm/tcg/helper-sve.h | 4 ++++
8
target/arm/tcg/sve_helper.c | 8 ++++++++
9
target/arm/tcg/translate-sve.c | 7 ++++++-
10
3 files changed, 18 insertions(+), 1 deletion(-)
11
12
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/tcg/helper-sve.h
15
+++ b/target/arm/tcg/helper-sve.h
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
17
DEF_HELPER_FLAGS_4(sve_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
18
DEF_HELPER_FLAGS_4(sve_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
19
20
+DEF_HELPER_FLAGS_4(sve_ah_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
21
+DEF_HELPER_FLAGS_4(sve_ah_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
22
+DEF_HELPER_FLAGS_4(sve_ah_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
23
+
24
DEF_HELPER_FLAGS_4(sve_not_zpz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
25
DEF_HELPER_FLAGS_4(sve_not_zpz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
26
DEF_HELPER_FLAGS_4(sve_not_zpz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
27
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/tcg/sve_helper.c
30
+++ b/target/arm/tcg/sve_helper.c
31
@@ -XXX,XX +XXX,XX @@ DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG)
32
DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG)
33
DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG)
34
35
+#define DO_AH_FNEG_H(N) (float16_is_any_nan(N) ? (N) : DO_FNEG(N))
36
+#define DO_AH_FNEG_S(N) (float32_is_any_nan(N) ? (N) : DO_FNEG(N))
37
+#define DO_AH_FNEG_D(N) (float64_is_any_nan(N) ? (N) : DO_FNEG(N))
38
+
39
+DO_ZPZ(sve_ah_fneg_h, uint16_t, H1_2, DO_AH_FNEG_H)
40
+DO_ZPZ(sve_ah_fneg_s, uint32_t, H1_4, DO_AH_FNEG_S)
41
+DO_ZPZ_D(sve_ah_fneg_d, uint64_t, DO_AH_FNEG_D)
42
+
43
#define DO_NOT(N) (~N)
44
45
DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT)
46
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/tcg/translate-sve.c
49
+++ b/target/arm/tcg/translate-sve.c
50
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const fneg_fns[4] = {
51
NULL, gen_helper_sve_fneg_h,
52
gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
53
};
54
-TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0)
55
+static gen_helper_gvec_3 * const fneg_ah_fns[4] = {
56
+ NULL, gen_helper_sve_ah_fneg_h,
57
+ gen_helper_sve_ah_fneg_s, gen_helper_sve_ah_fneg_d,
58
+};
59
+TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz,
60
+ s->fpcr_ah ? fneg_ah_fns[a->esz] : fneg_fns[a->esz], a, 0)
61
62
static gen_helper_gvec_3 * const sxtb_fns[4] = {
63
NULL, gen_helper_sve_sxtb_h,
64
--
65
2.34.1
diff view generated by jsdifflib
New patch
1
Make SVE FABS honour the FPCR.AH "don't negate the sign of a NaN"
2
semantics.
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/arm/tcg/helper-sve.h | 4 ++++
8
target/arm/tcg/sve_helper.c | 8 ++++++++
9
target/arm/tcg/translate-sve.c | 7 ++++++-
10
3 files changed, 18 insertions(+), 1 deletion(-)
11
12
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/tcg/helper-sve.h
15
+++ b/target/arm/tcg/helper-sve.h
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
17
DEF_HELPER_FLAGS_4(sve_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
18
DEF_HELPER_FLAGS_4(sve_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
19
20
+DEF_HELPER_FLAGS_4(sve_ah_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
21
+DEF_HELPER_FLAGS_4(sve_ah_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
22
+DEF_HELPER_FLAGS_4(sve_ah_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
23
+
24
DEF_HELPER_FLAGS_4(sve_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
25
DEF_HELPER_FLAGS_4(sve_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
26
DEF_HELPER_FLAGS_4(sve_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
27
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/tcg/sve_helper.c
30
+++ b/target/arm/tcg/sve_helper.c
31
@@ -XXX,XX +XXX,XX @@ DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS)
32
DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS)
33
DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS)
34
35
+#define DO_AH_FABS_H(N) (float16_is_any_nan(N) ? (N) : DO_FABS(N))
36
+#define DO_AH_FABS_S(N) (float32_is_any_nan(N) ? (N) : DO_FABS(N))
37
+#define DO_AH_FABS_D(N) (float64_is_any_nan(N) ? (N) : DO_FABS(N))
38
+
39
+DO_ZPZ(sve_ah_fabs_h, uint16_t, H1_2, DO_AH_FABS_H)
40
+DO_ZPZ(sve_ah_fabs_s, uint32_t, H1_4, DO_AH_FABS_S)
41
+DO_ZPZ_D(sve_ah_fabs_d, uint64_t, DO_AH_FABS_D)
42
+
43
#define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1))
44
45
DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG)
46
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/tcg/translate-sve.c
49
+++ b/target/arm/tcg/translate-sve.c
50
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const fabs_fns[4] = {
51
NULL, gen_helper_sve_fabs_h,
52
gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
53
};
54
-TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0)
55
+static gen_helper_gvec_3 * const fabs_ah_fns[4] = {
56
+ NULL, gen_helper_sve_ah_fabs_h,
57
+ gen_helper_sve_ah_fabs_s, gen_helper_sve_ah_fabs_d,
58
+};
59
+TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz,
60
+ s->fpcr_ah ? fabs_ah_fns[a->esz] : fabs_fns[a->esz], a, 0)
61
62
static gen_helper_gvec_3 * const fneg_fns[4] = {
63
NULL, gen_helper_sve_fneg_h,
64
--
65
2.34.1
diff view generated by jsdifflib
New patch
1
Make the SVE FABD insn honour the FPCR.AH "don't negate the sign
2
of a NaN" semantics.
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/arm/tcg/helper-sve.h | 7 +++++++
8
target/arm/tcg/sve_helper.c | 22 ++++++++++++++++++++++
9
target/arm/tcg/translate-sve.c | 2 +-
10
3 files changed, 30 insertions(+), 1 deletion(-)
11
12
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/tcg/helper-sve.h
15
+++ b/target/arm/tcg/helper-sve.h
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fabd_s, TCG_CALL_NO_RWG,
17
DEF_HELPER_FLAGS_6(sve_fabd_d, TCG_CALL_NO_RWG,
18
void, ptr, ptr, ptr, ptr, fpst, i32)
19
20
+DEF_HELPER_FLAGS_6(sve_ah_fabd_h, TCG_CALL_NO_RWG,
21
+ void, ptr, ptr, ptr, ptr, fpst, i32)
22
+DEF_HELPER_FLAGS_6(sve_ah_fabd_s, TCG_CALL_NO_RWG,
23
+ void, ptr, ptr, ptr, ptr, fpst, i32)
24
+DEF_HELPER_FLAGS_6(sve_ah_fabd_d, TCG_CALL_NO_RWG,
25
+ void, ptr, ptr, ptr, ptr, fpst, i32)
26
+
27
DEF_HELPER_FLAGS_6(sve_fscalbn_h, TCG_CALL_NO_RWG,
28
void, ptr, ptr, ptr, ptr, fpst, i32)
29
DEF_HELPER_FLAGS_6(sve_fscalbn_s, TCG_CALL_NO_RWG,
30
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/tcg/sve_helper.c
33
+++ b/target/arm/tcg/sve_helper.c
34
@@ -XXX,XX +XXX,XX @@ static inline float64 abd_d(float64 a, float64 b, float_status *s)
35
return float64_abs(float64_sub(a, b, s));
36
}
37
38
+/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */
39
+static float16 ah_abd_h(float16 op1, float16 op2, float_status *stat)
40
+{
41
+ float16 r = float16_sub(op1, op2, stat);
42
+ return float16_is_any_nan(r) ? r : float16_abs(r);
43
+}
44
+
45
+static float32 ah_abd_s(float32 op1, float32 op2, float_status *stat)
46
+{
47
+ float32 r = float32_sub(op1, op2, stat);
48
+ return float32_is_any_nan(r) ? r : float32_abs(r);
49
+}
50
+
51
+static float64 ah_abd_d(float64 op1, float64 op2, float_status *stat)
52
+{
53
+ float64 r = float64_sub(op1, op2, stat);
54
+ return float64_is_any_nan(r) ? r : float64_abs(r);
55
+}
56
+
57
DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h)
58
DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s)
59
DO_ZPZZ_FP(sve_fabd_d, uint64_t, H1_8, abd_d)
60
+DO_ZPZZ_FP(sve_ah_fabd_h, uint16_t, H1_2, ah_abd_h)
61
+DO_ZPZZ_FP(sve_ah_fabd_s, uint32_t, H1_4, ah_abd_s)
62
+DO_ZPZZ_FP(sve_ah_fabd_d, uint64_t, H1_8, ah_abd_d)
63
64
static inline float64 scalbn_d(float64 a, int64_t b, float_status *s)
65
{
66
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/target/arm/tcg/translate-sve.c
69
+++ b/target/arm/tcg/translate-sve.c
70
@@ -XXX,XX +XXX,XX @@ DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin)
71
DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax)
72
DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum)
73
DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
74
-DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd)
75
+DO_ZPZZ_AH_FP(FABD, aa64_sve, sve_fabd, sve_ah_fabd)
76
DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn)
77
DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv)
78
DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx)
79
--
80
2.34.1
diff view generated by jsdifflib
New patch
1
The negation steps in FCADD must honour FPCR.AH's "don't change the
2
sign of a NaN" semantics. Implement this in the same way we did for
3
the base ASIMD FCADD, by encoding FPCR.AH into the SIMD data field
4
passed to the helper and using that to decide whether to negate the
5
values.
1
6
7
The construction of neg_imag and neg_real were done to make it easy
8
to apply both in parallel with two simple logical operations. This
9
changed with FPCR.AH, which is more complex than that. Switch to
10
an approach that follows the pseudocode more closely, by extracting
11
the 'rot=1' parameter from the SIMD data field and changing the
12
sign of the appropriate input value.
13
14
Note that there was a naming issue with neg_imag and neg_real.
15
They were named backward, with neg_imag being non-zero for rot=1,
16
and vice versa. This was combined with reversed usage within the
17
loop, so that the negation in the end turned out correct.
18
19
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
20
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
21
---
22
target/arm/tcg/vec_internal.h | 17 ++++++++++++++
23
target/arm/tcg/sve_helper.c | 42 ++++++++++++++++++++++++----------
24
target/arm/tcg/translate-sve.c | 2 +-
25
3 files changed, 48 insertions(+), 13 deletions(-)
26
27
diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/tcg/vec_internal.h
30
+++ b/target/arm/tcg/vec_internal.h
31
@@ -XXX,XX +XXX,XX @@
32
#ifndef TARGET_ARM_VEC_INTERNAL_H
33
#define TARGET_ARM_VEC_INTERNAL_H
34
35
+#include "fpu/softfloat.h"
36
+
37
/*
38
* Note that vector data is stored in host-endian 64-bit chunks,
39
* so addressing units smaller than that needs a host-endian fixup.
40
@@ -XXX,XX +XXX,XX @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2,
41
*/
42
bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp);
43
44
+static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah)
45
+{
46
+ return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a);
47
+}
48
+
49
+static inline float32 float32_maybe_ah_chs(float32 a, bool fpcr_ah)
50
+{
51
+ return fpcr_ah && float32_is_any_nan(a) ? a : float32_chs(a);
52
+}
53
+
54
+static inline float64 float64_maybe_ah_chs(float64 a, bool fpcr_ah)
55
+{
56
+ return fpcr_ah && float64_is_any_nan(a) ? a : float64_chs(a);
57
+}
58
+
59
#endif /* TARGET_ARM_VEC_INTERNAL_H */
60
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
61
index XXXXXXX..XXXXXXX 100644
62
--- a/target/arm/tcg/sve_helper.c
63
+++ b/target/arm/tcg/sve_helper.c
64
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg,
65
{
66
intptr_t j, i = simd_oprsz(desc);
67
uint64_t *g = vg;
68
- float16 neg_imag = float16_set_sign(0, simd_data(desc));
69
- float16 neg_real = float16_chs(neg_imag);
70
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
71
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
72
73
do {
74
uint64_t pg = g[(i - 1) >> 6];
75
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg,
76
i -= 2 * sizeof(float16);
77
78
e0 = *(float16 *)(vn + H1_2(i));
79
- e1 = *(float16 *)(vm + H1_2(j)) ^ neg_real;
80
+ e1 = *(float16 *)(vm + H1_2(j));
81
e2 = *(float16 *)(vn + H1_2(j));
82
- e3 = *(float16 *)(vm + H1_2(i)) ^ neg_imag;
83
+ e3 = *(float16 *)(vm + H1_2(i));
84
+
85
+ if (rot) {
86
+ e3 = float16_maybe_ah_chs(e3, fpcr_ah);
87
+ } else {
88
+ e1 = float16_maybe_ah_chs(e1, fpcr_ah);
89
+ }
90
91
if (likely((pg >> (i & 63)) & 1)) {
92
*(float16 *)(vd + H1_2(i)) = float16_add(e0, e1, s);
93
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg,
94
{
95
intptr_t j, i = simd_oprsz(desc);
96
uint64_t *g = vg;
97
- float32 neg_imag = float32_set_sign(0, simd_data(desc));
98
- float32 neg_real = float32_chs(neg_imag);
99
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
100
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
101
102
do {
103
uint64_t pg = g[(i - 1) >> 6];
104
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg,
105
i -= 2 * sizeof(float32);
106
107
e0 = *(float32 *)(vn + H1_2(i));
108
- e1 = *(float32 *)(vm + H1_2(j)) ^ neg_real;
109
+ e1 = *(float32 *)(vm + H1_2(j));
110
e2 = *(float32 *)(vn + H1_2(j));
111
- e3 = *(float32 *)(vm + H1_2(i)) ^ neg_imag;
112
+ e3 = *(float32 *)(vm + H1_2(i));
113
+
114
+ if (rot) {
115
+ e3 = float32_maybe_ah_chs(e3, fpcr_ah);
116
+ } else {
117
+ e1 = float32_maybe_ah_chs(e1, fpcr_ah);
118
+ }
119
120
if (likely((pg >> (i & 63)) & 1)) {
121
*(float32 *)(vd + H1_2(i)) = float32_add(e0, e1, s);
122
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg,
123
{
124
intptr_t j, i = simd_oprsz(desc);
125
uint64_t *g = vg;
126
- float64 neg_imag = float64_set_sign(0, simd_data(desc));
127
- float64 neg_real = float64_chs(neg_imag);
128
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
129
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
130
131
do {
132
uint64_t pg = g[(i - 1) >> 6];
133
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg,
134
i -= 2 * sizeof(float64);
135
136
e0 = *(float64 *)(vn + H1_2(i));
137
- e1 = *(float64 *)(vm + H1_2(j)) ^ neg_real;
138
+ e1 = *(float64 *)(vm + H1_2(j));
139
e2 = *(float64 *)(vn + H1_2(j));
140
- e3 = *(float64 *)(vm + H1_2(i)) ^ neg_imag;
141
+ e3 = *(float64 *)(vm + H1_2(i));
142
+
143
+ if (rot) {
144
+ e3 = float64_maybe_ah_chs(e3, fpcr_ah);
145
+ } else {
146
+ e1 = float64_maybe_ah_chs(e1, fpcr_ah);
147
+ }
148
149
if (likely((pg >> (i & 63)) & 1)) {
150
*(float64 *)(vd + H1_2(i)) = float64_add(e0, e1, s);
151
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
152
index XXXXXXX..XXXXXXX 100644
153
--- a/target/arm/tcg/translate-sve.c
154
+++ b/target/arm/tcg/translate-sve.c
155
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_4_ptr * const fcadd_fns[] = {
156
gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d,
157
};
158
TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
159
- a->rd, a->rn, a->rm, a->pg, a->rot,
160
+ a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1),
161
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
162
163
#define DO_FMLA(NAME, name) \
164
--
165
2.34.1
diff view generated by jsdifflib
1
In commit 1b7bc9b5c8bf374dd we changed handle_vec_simd_sqshrn() so
1
The negation steps in FCADD must honour FPCR.AH's "don't change the
2
that instead of starting with a 0 value and depositing in each new
2
sign of a NaN" semantics. Implement this by encoding FPCR.AH into
3
element from the narrowing operation, it instead started with the raw
3
the SIMD data field passed to the helper and using that to decide
4
result of the narrowing operation of the first element.
4
whether to negate the values.
5
5
6
This is fine in the vector case, because the deposit operations for
6
The construction of neg_imag and neg_real were done to make it easy
7
the second and subsequent elements will always overwrite any higher
7
to apply both in parallel with two simple logical operations. This
8
bits that might have been in the first element's result value in
8
changed with FPCR.AH, which is more complex than that. Switch to
9
tcg_rd. However in the scalar case we only go through this loop
9
an approach closer to the pseudocode, where we extract the rot
10
once. The effect is that for a signed narrowing operation, if the
10
parameter from the SIMD data word and negate the appropriate
11
result is negative then we will now return a value where the bits
11
input value.
12
above the first element are incorrectly 1 (because the narrowfn
13
returns a sign-extended result, not one that is truncated to the
14
element size).
15
12
16
Fix this by using an extract operation to get exactly the correct
17
bits of the output of the narrowfn for element 1, instead of a
18
plain move.
19
20
Cc: qemu-stable@nongnu.org
21
Fixes: 1b7bc9b5c8bf374dd3 ("target/arm: Avoid tcg_const_ptr in handle_vec_simd_sqshrn")
22
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2089
23
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
24
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
25
Message-id: 20240123153416.877308-1-peter.maydell@linaro.org
26
---
15
---
27
target/arm/tcg/translate-a64.c | 2 +-
16
target/arm/tcg/translate-a64.c | 10 +++++--
28
1 file changed, 1 insertion(+), 1 deletion(-)
17
target/arm/tcg/vec_helper.c | 54 +++++++++++++++++++---------------
18
2 files changed, 38 insertions(+), 26 deletions(-)
29
19
30
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
20
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
31
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/tcg/translate-a64.c
22
--- a/target/arm/tcg/translate-a64.c
33
+++ b/target/arm/tcg/translate-a64.c
23
+++ b/target/arm/tcg/translate-a64.c
34
@@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
24
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = {
35
narrowfn(tcg_rd_narrowed, tcg_env, tcg_rd);
25
gen_helper_gvec_fcadds,
36
tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
26
gen_helper_gvec_fcaddd,
37
if (i == 0) {
27
};
38
- tcg_gen_mov_i64(tcg_final, tcg_rd);
28
-TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd)
39
+ tcg_gen_extract_i64(tcg_final, tcg_rd, 0, esize);
29
-TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd)
40
} else {
30
+/*
41
tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
31
+ * Encode FPCR.AH into the data so the helper knows whether the
42
}
32
+ * negations it does should avoid flipping the sign bit on a NaN
33
+ */
34
+TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1),
35
+ f_vector_fcadd)
36
+TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1),
37
+ f_vector_fcadd)
38
39
static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
40
{
41
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
42
index XXXXXXX..XXXXXXX 100644
43
--- a/target/arm/tcg/vec_helper.c
44
+++ b/target/arm/tcg/vec_helper.c
45
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm,
46
float16 *d = vd;
47
float16 *n = vn;
48
float16 *m = vm;
49
- uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1);
50
- uint32_t neg_imag = neg_real ^ 1;
51
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
52
+ bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1);
53
uintptr_t i;
54
55
- /* Shift boolean to the sign bit so we can xor to negate. */
56
- neg_real <<= 15;
57
- neg_imag <<= 15;
58
-
59
for (i = 0; i < opr_sz / 2; i += 2) {
60
float16 e0 = n[H2(i)];
61
- float16 e1 = m[H2(i + 1)] ^ neg_imag;
62
+ float16 e1 = m[H2(i + 1)];
63
float16 e2 = n[H2(i + 1)];
64
- float16 e3 = m[H2(i)] ^ neg_real;
65
+ float16 e3 = m[H2(i)];
66
+
67
+ if (rot) {
68
+ e3 = float16_maybe_ah_chs(e3, fpcr_ah);
69
+ } else {
70
+ e1 = float16_maybe_ah_chs(e1, fpcr_ah);
71
+ }
72
73
d[H2(i)] = float16_add(e0, e1, fpst);
74
d[H2(i + 1)] = float16_add(e2, e3, fpst);
75
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcadds)(void *vd, void *vn, void *vm,
76
float32 *d = vd;
77
float32 *n = vn;
78
float32 *m = vm;
79
- uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1);
80
- uint32_t neg_imag = neg_real ^ 1;
81
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
82
+ bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1);
83
uintptr_t i;
84
85
- /* Shift boolean to the sign bit so we can xor to negate. */
86
- neg_real <<= 31;
87
- neg_imag <<= 31;
88
-
89
for (i = 0; i < opr_sz / 4; i += 2) {
90
float32 e0 = n[H4(i)];
91
- float32 e1 = m[H4(i + 1)] ^ neg_imag;
92
+ float32 e1 = m[H4(i + 1)];
93
float32 e2 = n[H4(i + 1)];
94
- float32 e3 = m[H4(i)] ^ neg_real;
95
+ float32 e3 = m[H4(i)];
96
+
97
+ if (rot) {
98
+ e3 = float32_maybe_ah_chs(e3, fpcr_ah);
99
+ } else {
100
+ e1 = float32_maybe_ah_chs(e1, fpcr_ah);
101
+ }
102
103
d[H4(i)] = float32_add(e0, e1, fpst);
104
d[H4(i + 1)] = float32_add(e2, e3, fpst);
105
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcaddd)(void *vd, void *vn, void *vm,
106
float64 *d = vd;
107
float64 *n = vn;
108
float64 *m = vm;
109
- uint64_t neg_real = extract64(desc, SIMD_DATA_SHIFT, 1);
110
- uint64_t neg_imag = neg_real ^ 1;
111
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
112
+ bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1);
113
uintptr_t i;
114
115
- /* Shift boolean to the sign bit so we can xor to negate. */
116
- neg_real <<= 63;
117
- neg_imag <<= 63;
118
-
119
for (i = 0; i < opr_sz / 8; i += 2) {
120
float64 e0 = n[i];
121
- float64 e1 = m[i + 1] ^ neg_imag;
122
+ float64 e1 = m[i + 1];
123
float64 e2 = n[i + 1];
124
- float64 e3 = m[i] ^ neg_real;
125
+ float64 e3 = m[i];
126
+
127
+ if (rot) {
128
+ e3 = float64_maybe_ah_chs(e3, fpcr_ah);
129
+ } else {
130
+ e1 = float64_maybe_ah_chs(e1, fpcr_ah);
131
+ }
132
133
d[i] = float64_add(e0, e1, fpst);
134
d[i + 1] = float64_add(e2, e3, fpst);
43
--
135
--
44
2.34.1
136
2.34.1
diff view generated by jsdifflib
New patch
1
Handle the FPCR.AH semantics that we do not change the sign of an
2
input NaN in the FRECPS and FRSQRTS scalar insns, by providing
3
new helper functions that do the CHS part of the operation
4
differently.
1
5
6
Since the extra helper functions would be very repetitive if written
7
out longhand, we condense them and the existing non-AH helpers into
8
being emitted via macros.
9
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
target/arm/tcg/helper-a64.h | 6 ++
14
target/arm/tcg/vec_internal.h | 18 ++++++
15
target/arm/tcg/helper-a64.c | 115 ++++++++++++---------------------
16
target/arm/tcg/translate-a64.c | 25 +++++--
17
4 files changed, 83 insertions(+), 81 deletions(-)
18
19
diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/tcg/helper-a64.h
22
+++ b/target/arm/tcg/helper-a64.h
23
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, fpst)
24
DEF_HELPER_FLAGS_3(recpsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
25
DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
26
DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
27
+DEF_HELPER_FLAGS_3(recpsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
28
+DEF_HELPER_FLAGS_3(recpsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
29
+DEF_HELPER_FLAGS_3(recpsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
30
DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
31
DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
32
DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
33
+DEF_HELPER_FLAGS_3(rsqrtsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
34
+DEF_HELPER_FLAGS_3(rsqrtsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
35
+DEF_HELPER_FLAGS_3(rsqrtsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
36
DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, fpst)
37
DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
38
DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, fpst)
39
diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h
40
index XXXXXXX..XXXXXXX 100644
41
--- a/target/arm/tcg/vec_internal.h
42
+++ b/target/arm/tcg/vec_internal.h
43
@@ -XXX,XX +XXX,XX @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2,
44
*/
45
bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp);
46
47
+/*
48
+ * Negate as for FPCR.AH=1 -- do not negate NaNs.
49
+ */
50
+static inline float16 float16_ah_chs(float16 a)
51
+{
52
+ return float16_is_any_nan(a) ? a : float16_chs(a);
53
+}
54
+
55
+static inline float32 float32_ah_chs(float32 a)
56
+{
57
+ return float32_is_any_nan(a) ? a : float32_chs(a);
58
+}
59
+
60
+static inline float64 float64_ah_chs(float64 a)
61
+{
62
+ return float64_is_any_nan(a) ? a : float64_chs(a);
63
+}
64
+
65
static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah)
66
{
67
return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a);
68
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/target/arm/tcg/helper-a64.c
71
+++ b/target/arm/tcg/helper-a64.c
72
@@ -XXX,XX +XXX,XX @@
73
#ifdef CONFIG_USER_ONLY
74
#include "user/page-protection.h"
75
#endif
76
+#include "vec_internal.h"
77
78
/* C2.4.7 Multiply and divide */
79
/* special cases for 0 and LLONG_MIN are mandated by the standard */
80
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst)
81
return -float64_lt(b, a, fpst);
82
}
83
84
-/* Reciprocal step and sqrt step. Note that unlike the A32/T32
85
+/*
86
+ * Reciprocal step and sqrt step. Note that unlike the A32/T32
87
* versions, these do a fully fused multiply-add or
88
* multiply-add-and-halve.
89
+ * The FPCR.AH == 1 versions need to avoid flipping the sign of NaN.
90
*/
91
-
92
-uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, float_status *fpst)
93
-{
94
- a = float16_squash_input_denormal(a, fpst);
95
- b = float16_squash_input_denormal(b, fpst);
96
-
97
- a = float16_chs(a);
98
- if ((float16_is_infinity(a) && float16_is_zero(b)) ||
99
- (float16_is_infinity(b) && float16_is_zero(a))) {
100
- return float16_two;
101
+#define DO_RECPS(NAME, CTYPE, FLOATTYPE, CHSFN) \
102
+ CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \
103
+ { \
104
+ a = FLOATTYPE ## _squash_input_denormal(a, fpst); \
105
+ b = FLOATTYPE ## _squash_input_denormal(b, fpst); \
106
+ a = FLOATTYPE ## _ ## CHSFN(a); \
107
+ if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \
108
+ (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \
109
+ return FLOATTYPE ## _two; \
110
+ } \
111
+ return FLOATTYPE ## _muladd(a, b, FLOATTYPE ## _two, 0, fpst); \
112
}
113
- return float16_muladd(a, b, float16_two, 0, fpst);
114
-}
115
116
-float32 HELPER(recpsf_f32)(float32 a, float32 b, float_status *fpst)
117
-{
118
- a = float32_squash_input_denormal(a, fpst);
119
- b = float32_squash_input_denormal(b, fpst);
120
+DO_RECPS(recpsf_f16, uint32_t, float16, chs)
121
+DO_RECPS(recpsf_f32, float32, float32, chs)
122
+DO_RECPS(recpsf_f64, float64, float64, chs)
123
+DO_RECPS(recpsf_ah_f16, uint32_t, float16, ah_chs)
124
+DO_RECPS(recpsf_ah_f32, float32, float32, ah_chs)
125
+DO_RECPS(recpsf_ah_f64, float64, float64, ah_chs)
126
127
- a = float32_chs(a);
128
- if ((float32_is_infinity(a) && float32_is_zero(b)) ||
129
- (float32_is_infinity(b) && float32_is_zero(a))) {
130
- return float32_two;
131
- }
132
- return float32_muladd(a, b, float32_two, 0, fpst);
133
-}
134
+#define DO_RSQRTSF(NAME, CTYPE, FLOATTYPE, CHSFN) \
135
+ CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \
136
+ { \
137
+ a = FLOATTYPE ## _squash_input_denormal(a, fpst); \
138
+ b = FLOATTYPE ## _squash_input_denormal(b, fpst); \
139
+ a = FLOATTYPE ## _ ## CHSFN(a); \
140
+ if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \
141
+ (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \
142
+ return FLOATTYPE ## _one_point_five; \
143
+ } \
144
+ return FLOATTYPE ## _muladd_scalbn(a, b, FLOATTYPE ## _three, \
145
+ -1, 0, fpst); \
146
+ } \
147
148
-float64 HELPER(recpsf_f64)(float64 a, float64 b, float_status *fpst)
149
-{
150
- a = float64_squash_input_denormal(a, fpst);
151
- b = float64_squash_input_denormal(b, fpst);
152
-
153
- a = float64_chs(a);
154
- if ((float64_is_infinity(a) && float64_is_zero(b)) ||
155
- (float64_is_infinity(b) && float64_is_zero(a))) {
156
- return float64_two;
157
- }
158
- return float64_muladd(a, b, float64_two, 0, fpst);
159
-}
160
-
161
-uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst)
162
-{
163
- a = float16_squash_input_denormal(a, fpst);
164
- b = float16_squash_input_denormal(b, fpst);
165
-
166
- a = float16_chs(a);
167
- if ((float16_is_infinity(a) && float16_is_zero(b)) ||
168
- (float16_is_infinity(b) && float16_is_zero(a))) {
169
- return float16_one_point_five;
170
- }
171
- return float16_muladd_scalbn(a, b, float16_three, -1, 0, fpst);
172
-}
173
-
174
-float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst)
175
-{
176
- a = float32_squash_input_denormal(a, fpst);
177
- b = float32_squash_input_denormal(b, fpst);
178
-
179
- a = float32_chs(a);
180
- if ((float32_is_infinity(a) && float32_is_zero(b)) ||
181
- (float32_is_infinity(b) && float32_is_zero(a))) {
182
- return float32_one_point_five;
183
- }
184
- return float32_muladd_scalbn(a, b, float32_three, -1, 0, fpst);
185
-}
186
-
187
-float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst)
188
-{
189
- a = float64_squash_input_denormal(a, fpst);
190
- b = float64_squash_input_denormal(b, fpst);
191
-
192
- a = float64_chs(a);
193
- if ((float64_is_infinity(a) && float64_is_zero(b)) ||
194
- (float64_is_infinity(b) && float64_is_zero(a))) {
195
- return float64_one_point_five;
196
- }
197
- return float64_muladd_scalbn(a, b, float64_three, -1, 0, fpst);
198
-}
199
+DO_RSQRTSF(rsqrtsf_f16, uint32_t, float16, chs)
200
+DO_RSQRTSF(rsqrtsf_f32, float32, float32, chs)
201
+DO_RSQRTSF(rsqrtsf_f64, float64, float64, chs)
202
+DO_RSQRTSF(rsqrtsf_ah_f16, uint32_t, float16, ah_chs)
203
+DO_RSQRTSF(rsqrtsf_ah_f32, float32, float32, ah_chs)
204
+DO_RSQRTSF(rsqrtsf_ah_f64, float64, float64, ah_chs)
205
206
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
207
uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst)
208
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
209
index XXXXXXX..XXXXXXX 100644
210
--- a/target/arm/tcg/translate-a64.c
211
+++ b/target/arm/tcg/translate-a64.c
212
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
213
FPST_A64_F16 : FPST_A64);
214
}
215
216
-static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
217
- int mergereg)
218
+static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a,
219
+ const FPScalar *fnormal, const FPScalar *fah,
220
+ int mergereg)
221
{
222
- return do_fp3_scalar_with_fpsttype(s, a, f, mergereg,
223
- select_ah_fpst(s, a->esz));
224
+ return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal,
225
+ mergereg, select_ah_fpst(s, a->esz));
226
}
227
228
/* Some insns need to call different helpers when FPCR.AH == 1 */
229
@@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_frecps = {
230
gen_helper_recpsf_f32,
231
gen_helper_recpsf_f64,
232
};
233
-TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps, a->rn)
234
+static const FPScalar f_scalar_ah_frecps = {
235
+ gen_helper_recpsf_ah_f16,
236
+ gen_helper_recpsf_ah_f32,
237
+ gen_helper_recpsf_ah_f64,
238
+};
239
+TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a,
240
+ &f_scalar_frecps, &f_scalar_ah_frecps, a->rn)
241
242
static const FPScalar f_scalar_frsqrts = {
243
gen_helper_rsqrtsf_f16,
244
gen_helper_rsqrtsf_f32,
245
gen_helper_rsqrtsf_f64,
246
};
247
-TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts, a->rn)
248
+static const FPScalar f_scalar_ah_frsqrts = {
249
+ gen_helper_rsqrtsf_ah_f16,
250
+ gen_helper_rsqrtsf_ah_f32,
251
+ gen_helper_rsqrtsf_ah_f64,
252
+};
253
+TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a,
254
+ &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn)
255
256
static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
257
const FPScalar *f, bool swap)
258
--
259
2.34.1
diff view generated by jsdifflib
New patch
1
Handle the FPCR.AH "don't negate the sign of a NaN" semantics
2
in the vector versions of FRECPS and FRSQRTS, by implementing
3
new vector wrappers that call the _ah_ scalar helpers.
1
4
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/arm/tcg/helper-sve.h | 14 ++++++++++++++
9
target/arm/tcg/translate-a64.c | 21 ++++++++++++++++-----
10
target/arm/tcg/translate-sve.c | 7 ++++++-
11
target/arm/tcg/vec_helper.c | 8 ++++++++
12
4 files changed, 44 insertions(+), 6 deletions(-)
13
14
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/tcg/helper-sve.h
17
+++ b/target/arm/tcg/helper-sve.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_rsqrts_s, TCG_CALL_NO_RWG,
19
DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG,
20
void, ptr, ptr, ptr, fpst, i32)
21
22
+DEF_HELPER_FLAGS_5(gvec_ah_recps_h, TCG_CALL_NO_RWG,
23
+ void, ptr, ptr, ptr, fpst, i32)
24
+DEF_HELPER_FLAGS_5(gvec_ah_recps_s, TCG_CALL_NO_RWG,
25
+ void, ptr, ptr, ptr, fpst, i32)
26
+DEF_HELPER_FLAGS_5(gvec_ah_recps_d, TCG_CALL_NO_RWG,
27
+ void, ptr, ptr, ptr, fpst, i32)
28
+
29
+DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_h, TCG_CALL_NO_RWG,
30
+ void, ptr, ptr, ptr, fpst, i32)
31
+DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_s, TCG_CALL_NO_RWG,
32
+ void, ptr, ptr, ptr, fpst, i32)
33
+DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_d, TCG_CALL_NO_RWG,
34
+ void, ptr, ptr, ptr, fpst, i32)
35
+
36
DEF_HELPER_FLAGS_5(gvec_ah_fmax_h, TCG_CALL_NO_RWG,
37
void, ptr, ptr, ptr, fpst, i32)
38
DEF_HELPER_FLAGS_5(gvec_ah_fmax_s, TCG_CALL_NO_RWG,
39
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/target/arm/tcg/translate-a64.c
42
+++ b/target/arm/tcg/translate-a64.c
43
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data,
44
return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal);
45
}
46
47
-static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data,
48
- gen_helper_gvec_3_ptr * const f[3])
49
+static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data,
50
+ gen_helper_gvec_3_ptr * const fnormal[3],
51
+ gen_helper_gvec_3_ptr * const fah[3])
52
{
53
- return do_fp3_vector_with_fpsttype(s, a, data, f,
54
+ return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal,
55
select_ah_fpst(s, a->esz));
56
}
57
58
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
59
gen_helper_gvec_recps_s,
60
gen_helper_gvec_recps_d,
61
};
62
-TRANS(FRECPS_v, do_fp3_vector_ah, a, 0, f_vector_frecps)
63
+static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = {
64
+ gen_helper_gvec_ah_recps_h,
65
+ gen_helper_gvec_ah_recps_s,
66
+ gen_helper_gvec_ah_recps_d,
67
+};
68
+TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps)
69
70
static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
71
gen_helper_gvec_rsqrts_h,
72
gen_helper_gvec_rsqrts_s,
73
gen_helper_gvec_rsqrts_d,
74
};
75
-TRANS(FRSQRTS_v, do_fp3_vector_ah, a, 0, f_vector_frsqrts)
76
+static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = {
77
+ gen_helper_gvec_ah_rsqrts_h,
78
+ gen_helper_gvec_ah_rsqrts_s,
79
+ gen_helper_gvec_ah_rsqrts_d,
80
+};
81
+TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts)
82
83
static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
84
gen_helper_gvec_faddp_h,
85
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
86
index XXXXXXX..XXXXXXX 100644
87
--- a/target/arm/tcg/translate-sve.c
88
+++ b/target/arm/tcg/translate-sve.c
89
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
90
NULL, gen_helper_gvec_##name##_h, \
91
gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
92
}; \
93
- TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, name##_fns[a->esz], a, 0)
94
+ static gen_helper_gvec_3_ptr * const name##_ah_fns[4] = { \
95
+ NULL, gen_helper_gvec_ah_##name##_h, \
96
+ gen_helper_gvec_ah_##name##_s, gen_helper_gvec_ah_##name##_d \
97
+ }; \
98
+ TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, \
99
+ s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], a, 0)
100
101
DO_FP3(FADD_zzz, fadd)
102
DO_FP3(FSUB_zzz, fsub)
103
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
104
index XXXXXXX..XXXXXXX 100644
105
--- a/target/arm/tcg/vec_helper.c
106
+++ b/target/arm/tcg/vec_helper.c
107
@@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16)
108
DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32)
109
DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64)
110
111
+DO_3OP(gvec_ah_recps_h, helper_recpsf_ah_f16, float16)
112
+DO_3OP(gvec_ah_recps_s, helper_recpsf_ah_f32, float32)
113
+DO_3OP(gvec_ah_recps_d, helper_recpsf_ah_f64, float64)
114
+
115
+DO_3OP(gvec_ah_rsqrts_h, helper_rsqrtsf_ah_f16, float16)
116
+DO_3OP(gvec_ah_rsqrts_s, helper_rsqrtsf_ah_f32, float32)
117
+DO_3OP(gvec_ah_rsqrts_d, helper_rsqrtsf_ah_f64, float64)
118
+
119
DO_3OP(gvec_ah_fmax_h, helper_vfp_ah_maxh, float16)
120
DO_3OP(gvec_ah_fmax_s, helper_vfp_ah_maxs, float32)
121
DO_3OP(gvec_ah_fmax_d, helper_vfp_ah_maxd, float64)
122
--
123
2.34.1
diff view generated by jsdifflib
New patch
1
Handle the FPCR.AH "don't negate the sign of a NaN" semantics in FMLS
2
(indexed). We do this by creating 6 new helpers, which allow us to
3
do the negation either by XOR (for AH=0) or by muladd flags
4
(for AH=1).
1
5
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
[PMM: Mostly from RTH's patch; error in index order into fns[][]
8
fixed]
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
target/arm/helper.h | 14 ++++++++++++++
12
target/arm/tcg/translate-a64.c | 17 +++++++++++------
13
target/arm/tcg/translate-sve.c | 31 +++++++++++++++++--------------
14
target/arm/tcg/vec_helper.c | 24 +++++++++++++++---------
15
4 files changed, 57 insertions(+), 29 deletions(-)
16
17
diff --git a/target/arm/helper.h b/target/arm/helper.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper.h
20
+++ b/target/arm/helper.h
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG,
22
DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG,
23
void, ptr, ptr, ptr, ptr, fpst, i32)
24
25
+DEF_HELPER_FLAGS_6(gvec_fmls_idx_h, TCG_CALL_NO_RWG,
26
+ void, ptr, ptr, ptr, ptr, fpst, i32)
27
+DEF_HELPER_FLAGS_6(gvec_fmls_idx_s, TCG_CALL_NO_RWG,
28
+ void, ptr, ptr, ptr, ptr, fpst, i32)
29
+DEF_HELPER_FLAGS_6(gvec_fmls_idx_d, TCG_CALL_NO_RWG,
30
+ void, ptr, ptr, ptr, ptr, fpst, i32)
31
+
32
+DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_h, TCG_CALL_NO_RWG,
33
+ void, ptr, ptr, ptr, ptr, fpst, i32)
34
+DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_s, TCG_CALL_NO_RWG,
35
+ void, ptr, ptr, ptr, ptr, fpst, i32)
36
+DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_d, TCG_CALL_NO_RWG,
37
+ void, ptr, ptr, ptr, ptr, fpst, i32)
38
+
39
DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG,
40
void, ptr, ptr, ptr, ptr, i32)
41
DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG,
42
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/tcg/translate-a64.c
45
+++ b/target/arm/tcg/translate-a64.c
46
@@ -XXX,XX +XXX,XX @@ TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
47
48
static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
49
{
50
- static gen_helper_gvec_4_ptr * const fns[3] = {
51
- gen_helper_gvec_fmla_idx_h,
52
- gen_helper_gvec_fmla_idx_s,
53
- gen_helper_gvec_fmla_idx_d,
54
+ static gen_helper_gvec_4_ptr * const fns[3][3] = {
55
+ { gen_helper_gvec_fmla_idx_h,
56
+ gen_helper_gvec_fmla_idx_s,
57
+ gen_helper_gvec_fmla_idx_d },
58
+ { gen_helper_gvec_fmls_idx_h,
59
+ gen_helper_gvec_fmls_idx_s,
60
+ gen_helper_gvec_fmls_idx_d },
61
+ { gen_helper_gvec_ah_fmls_idx_h,
62
+ gen_helper_gvec_ah_fmls_idx_s,
63
+ gen_helper_gvec_ah_fmls_idx_d },
64
};
65
MemOp esz = a->esz;
66
int check = fp_access_check_vector_hsd(s, a->q, esz);
67
@@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
68
69
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
70
esz == MO_16 ? FPST_A64_F16 : FPST_A64,
71
- (a->idx << 1) | neg,
72
- fns[esz - 1]);
73
+ a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]);
74
return true;
75
}
76
77
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
78
index XXXXXXX..XXXXXXX 100644
79
--- a/target/arm/tcg/translate-sve.c
80
+++ b/target/arm/tcg/translate-sve.c
81
@@ -XXX,XX +XXX,XX @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
82
*** SVE Floating Point Multiply-Add Indexed Group
83
*/
84
85
-static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
86
-{
87
- static gen_helper_gvec_4_ptr * const fns[4] = {
88
- NULL,
89
- gen_helper_gvec_fmla_idx_h,
90
- gen_helper_gvec_fmla_idx_s,
91
- gen_helper_gvec_fmla_idx_d,
92
- };
93
- return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
94
- (a->index << 1) | sub,
95
- a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
96
-}
97
+static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = {
98
+ NULL, gen_helper_gvec_fmla_idx_h,
99
+ gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d
100
+};
101
+TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
102
+ fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index,
103
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
104
105
-TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
106
-TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true)
107
+static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = {
108
+ { NULL, NULL },
109
+ { gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h },
110
+ { gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s },
111
+ { gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d },
112
+};
113
+TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
114
+ fmls_idx_fns[a->esz][s->fpcr_ah],
115
+ a->rd, a->rn, a->rm, a->ra, a->index,
116
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
117
118
/*
119
*** SVE Floating Point Multiply Indexed Group
120
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
121
index XXXXXXX..XXXXXXX 100644
122
--- a/target/arm/tcg/vec_helper.c
123
+++ b/target/arm/tcg/vec_helper.c
124
@@ -XXX,XX +XXX,XX @@ DO_FMUL_IDX(gvec_fmls_nf_idx_s, float32_sub, float32_mul, float32, H4)
125
126
#undef DO_FMUL_IDX
127
128
-#define DO_FMLA_IDX(NAME, TYPE, H) \
129
+#define DO_FMLA_IDX(NAME, TYPE, H, NEGX, NEGF) \
130
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
131
float_status *stat, uint32_t desc) \
132
{ \
133
intptr_t i, j, oprsz = simd_oprsz(desc); \
134
intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
135
- TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \
136
- intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \
137
+ intptr_t idx = simd_data(desc); \
138
TYPE *d = vd, *n = vn, *m = vm, *a = va; \
139
- op1_neg <<= (8 * sizeof(TYPE) - 1); \
140
for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
141
TYPE mm = m[H(i + idx)]; \
142
for (j = 0; j < segment; j++) { \
143
- d[i + j] = TYPE##_muladd(n[i + j] ^ op1_neg, \
144
- mm, a[i + j], 0, stat); \
145
+ d[i + j] = TYPE##_muladd(n[i + j] ^ NEGX, mm, \
146
+ a[i + j], NEGF, stat); \
147
} \
148
} \
149
clear_tail(d, oprsz, simd_maxsz(desc)); \
150
}
151
152
-DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2)
153
-DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4)
154
-DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8)
155
+DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2, 0, 0)
156
+DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4, 0, 0)
157
+DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8, 0, 0)
158
+
159
+DO_FMLA_IDX(gvec_fmls_idx_h, float16, H2, INT16_MIN, 0)
160
+DO_FMLA_IDX(gvec_fmls_idx_s, float32, H4, INT32_MIN, 0)
161
+DO_FMLA_IDX(gvec_fmls_idx_d, float64, H8, INT64_MIN, 0)
162
+
163
+DO_FMLA_IDX(gvec_ah_fmls_idx_h, float16, H2, 0, float_muladd_negate_product)
164
+DO_FMLA_IDX(gvec_ah_fmls_idx_s, float32, H4, 0, float_muladd_negate_product)
165
+DO_FMLA_IDX(gvec_ah_fmls_idx_d, float64, H8, 0, float_muladd_negate_product)
166
167
#undef DO_FMLA_IDX
168
169
--
170
2.34.1
diff view generated by jsdifflib
New patch
1
Handle the FPCR.AH "don't negate the sign of a NaN" semantics
2
in FMLS (vector), by implementing a new set of helpers for
3
the AH=1 case.
1
4
5
The float_muladd_negate_product flag produces the same result
6
as negating either of the multiplication operands, assuming
7
neither of the operands are NaNs. But since FEAT_AFP does not
8
negate NaNs, this behaviour is exactly what we need.
9
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
target/arm/helper.h | 4 ++++
14
target/arm/tcg/translate-a64.c | 7 ++++++-
15
target/arm/tcg/vec_helper.c | 22 ++++++++++++++++++++++
16
3 files changed, 32 insertions(+), 1 deletion(-)
17
18
diff --git a/target/arm/helper.h b/target/arm/helper.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/helper.h
21
+++ b/target/arm/helper.h
22
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
23
DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
24
DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
25
26
+DEF_HELPER_FLAGS_5(gvec_ah_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
27
+DEF_HELPER_FLAGS_5(gvec_ah_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
28
+DEF_HELPER_FLAGS_5(gvec_ah_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
29
+
30
DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG,
31
void, ptr, ptr, ptr, fpst, i32)
32
DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG,
33
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/target/arm/tcg/translate-a64.c
36
+++ b/target/arm/tcg/translate-a64.c
37
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
38
gen_helper_gvec_vfms_s,
39
gen_helper_gvec_vfms_d,
40
};
41
-TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls)
42
+static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = {
43
+ gen_helper_gvec_ah_vfms_h,
44
+ gen_helper_gvec_ah_vfms_s,
45
+ gen_helper_gvec_ah_vfms_d,
46
+};
47
+TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah)
48
49
static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = {
50
gen_helper_gvec_fceq_h,
51
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/target/arm/tcg/vec_helper.c
54
+++ b/target/arm/tcg/vec_helper.c
55
@@ -XXX,XX +XXX,XX @@ static float64 float64_mulsub_f(float64 dest, float64 op1, float64 op2,
56
return float64_muladd(float64_chs(op1), op2, dest, 0, stat);
57
}
58
59
+static float16 float16_ah_mulsub_f(float16 dest, float16 op1, float16 op2,
60
+ float_status *stat)
61
+{
62
+ return float16_muladd(op1, op2, dest, float_muladd_negate_product, stat);
63
+}
64
+
65
+static float32 float32_ah_mulsub_f(float32 dest, float32 op1, float32 op2,
66
+ float_status *stat)
67
+{
68
+ return float32_muladd(op1, op2, dest, float_muladd_negate_product, stat);
69
+}
70
+
71
+static float64 float64_ah_mulsub_f(float64 dest, float64 op1, float64 op2,
72
+ float_status *stat)
73
+{
74
+ return float64_muladd(op1, op2, dest, float_muladd_negate_product, stat);
75
+}
76
+
77
#define DO_MULADD(NAME, FUNC, TYPE) \
78
void HELPER(NAME)(void *vd, void *vn, void *vm, \
79
float_status *stat, uint32_t desc) \
80
@@ -XXX,XX +XXX,XX @@ DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16)
81
DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32)
82
DO_MULADD(gvec_vfms_d, float64_mulsub_f, float64)
83
84
+DO_MULADD(gvec_ah_vfms_h, float16_ah_mulsub_f, float16)
85
+DO_MULADD(gvec_ah_vfms_s, float32_ah_mulsub_f, float32)
86
+DO_MULADD(gvec_ah_vfms_d, float64_ah_mulsub_f, float64)
87
+
88
/* For the indexed ops, SVE applies the index per 128-bit vector segment.
89
* For AdvSIMD, there is of course only one such vector segment.
90
*/
91
--
92
2.34.1
diff view generated by jsdifflib
New patch
1
Handle the FPCR.AH "don't negate the sign of a NaN" semantics fro the
2
SVE FMLS (vector) insns, by providing new helpers for the AH=1 case
3
which end up passing fpcr_ah = true to the do_fmla_zpzzz_* functions
4
that do the work.
1
5
6
The float*_muladd functions have a flags argument that can
7
perform optional negation of various operand. We don't use
8
that for "normal" arm fmla, because the muladd flags are not
9
applied when an input is a NaN. But since FEAT_AFP does not
10
negate NaNs, this behaviour is exactly what we need.
11
12
The non-AH helpers pass in a zero flags argument and control the
13
negation via the neg1 and neg3 arguments; the AH helpers always pass
14
in neg1 and neg3 as zero and control the negation via the flags
15
argument. This allows us to avoid conditional branches within the
16
inner loop.
17
18
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
19
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
20
---
21
target/arm/tcg/helper-sve.h | 21 ++++++++
22
target/arm/tcg/sve_helper.c | 99 +++++++++++++++++++++++++++-------
23
target/arm/tcg/translate-sve.c | 18 ++++---
24
3 files changed, 114 insertions(+), 24 deletions(-)
25
26
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/arm/tcg/helper-sve.h
29
+++ b/target/arm/tcg/helper-sve.h
30
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
31
DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
32
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
33
34
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG,
35
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
36
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG,
37
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
38
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG,
39
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
40
+
41
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG,
42
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
43
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG,
44
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
45
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG,
46
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
47
+
48
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG,
49
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
50
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
51
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
52
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
53
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
54
+
55
DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG,
56
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
57
DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG,
58
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/target/arm/tcg/sve_helper.c
61
+++ b/target/arm/tcg/sve_helper.c
62
@@ -XXX,XX +XXX,XX @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int)
63
64
static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
65
float_status *status, uint32_t desc,
66
- uint16_t neg1, uint16_t neg3)
67
+ uint16_t neg1, uint16_t neg3, int flags)
68
{
69
intptr_t i = simd_oprsz(desc);
70
uint64_t *g = vg;
71
@@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
72
e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1;
73
e2 = *(uint16_t *)(vm + H1_2(i));
74
e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3;
75
- r = float16_muladd(e1, e2, e3, 0, status);
76
+ r = float16_muladd(e1, e2, e3, flags, status);
77
*(uint16_t *)(vd + H1_2(i)) = r;
78
}
79
} while (i & 63);
80
@@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
81
void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
82
void *vg, float_status *status, uint32_t desc)
83
{
84
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0);
85
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
86
}
87
88
void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
89
void *vg, float_status *status, uint32_t desc)
90
{
91
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0);
92
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0);
93
}
94
95
void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
96
void *vg, float_status *status, uint32_t desc)
97
{
98
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000);
99
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0);
100
}
101
102
void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
103
void *vg, float_status *status, uint32_t desc)
104
{
105
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000);
106
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0);
107
+}
108
+
109
+void HELPER(sve_ah_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
110
+ void *vg, float_status *status, uint32_t desc)
111
+{
112
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
113
+ float_muladd_negate_product);
114
+}
115
+
116
+void HELPER(sve_ah_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
117
+ void *vg, float_status *status, uint32_t desc)
118
+{
119
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
120
+ float_muladd_negate_product | float_muladd_negate_c);
121
+}
122
+
123
+void HELPER(sve_ah_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
124
+ void *vg, float_status *status, uint32_t desc)
125
+{
126
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
127
+ float_muladd_negate_c);
128
}
129
130
static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
131
float_status *status, uint32_t desc,
132
- uint32_t neg1, uint32_t neg3)
133
+ uint32_t neg1, uint32_t neg3, int flags)
134
{
135
intptr_t i = simd_oprsz(desc);
136
uint64_t *g = vg;
137
@@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
138
e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1;
139
e2 = *(uint32_t *)(vm + H1_4(i));
140
e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3;
141
- r = float32_muladd(e1, e2, e3, 0, status);
142
+ r = float32_muladd(e1, e2, e3, flags, status);
143
*(uint32_t *)(vd + H1_4(i)) = r;
144
}
145
} while (i & 63);
146
@@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
147
void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
148
void *vg, float_status *status, uint32_t desc)
149
{
150
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0);
151
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
152
}
153
154
void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
155
void *vg, float_status *status, uint32_t desc)
156
{
157
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0);
158
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, 0);
159
}
160
161
void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
162
void *vg, float_status *status, uint32_t desc)
163
{
164
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000);
165
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, 0);
166
}
167
168
void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
169
void *vg, float_status *status, uint32_t desc)
170
{
171
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000);
172
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, 0);
173
+}
174
+
175
+void HELPER(sve_ah_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
176
+ void *vg, float_status *status, uint32_t desc)
177
+{
178
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
179
+ float_muladd_negate_product);
180
+}
181
+
182
+void HELPER(sve_ah_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
183
+ void *vg, float_status *status, uint32_t desc)
184
+{
185
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
186
+ float_muladd_negate_product | float_muladd_negate_c);
187
+}
188
+
189
+void HELPER(sve_ah_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
190
+ void *vg, float_status *status, uint32_t desc)
191
+{
192
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
193
+ float_muladd_negate_c);
194
}
195
196
static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
197
float_status *status, uint32_t desc,
198
- uint64_t neg1, uint64_t neg3)
199
+ uint64_t neg1, uint64_t neg3, int flags)
200
{
201
intptr_t i = simd_oprsz(desc);
202
uint64_t *g = vg;
203
@@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
204
e1 = *(uint64_t *)(vn + i) ^ neg1;
205
e2 = *(uint64_t *)(vm + i);
206
e3 = *(uint64_t *)(va + i) ^ neg3;
207
- r = float64_muladd(e1, e2, e3, 0, status);
208
+ r = float64_muladd(e1, e2, e3, flags, status);
209
*(uint64_t *)(vd + i) = r;
210
}
211
} while (i & 63);
212
@@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
213
void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
214
void *vg, float_status *status, uint32_t desc)
215
{
216
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0);
217
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
218
}
219
220
void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
221
void *vg, float_status *status, uint32_t desc)
222
{
223
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0);
224
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, 0);
225
}
226
227
void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
228
void *vg, float_status *status, uint32_t desc)
229
{
230
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN);
231
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, 0);
232
}
233
234
void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
235
void *vg, float_status *status, uint32_t desc)
236
{
237
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN);
238
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, 0);
239
+}
240
+
241
+void HELPER(sve_ah_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
242
+ void *vg, float_status *status, uint32_t desc)
243
+{
244
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
245
+ float_muladd_negate_product);
246
+}
247
+
248
+void HELPER(sve_ah_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
249
+ void *vg, float_status *status, uint32_t desc)
250
+{
251
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
252
+ float_muladd_negate_product | float_muladd_negate_c);
253
+}
254
+
255
+void HELPER(sve_ah_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
256
+ void *vg, float_status *status, uint32_t desc)
257
+{
258
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
259
+ float_muladd_negate_c);
260
}
261
262
/* Two operand floating-point comparison controlled by a predicate.
263
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
264
index XXXXXXX..XXXXXXX 100644
265
--- a/target/arm/tcg/translate-sve.c
266
+++ b/target/arm/tcg/translate-sve.c
267
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
268
a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1),
269
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
270
271
-#define DO_FMLA(NAME, name) \
272
+#define DO_FMLA(NAME, name, ah_name) \
273
static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
274
NULL, gen_helper_sve_##name##_h, \
275
gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
276
}; \
277
- TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \
278
+ static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \
279
+ NULL, gen_helper_sve_##ah_name##_h, \
280
+ gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \
281
+ }; \
282
+ TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \
283
+ s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \
284
a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
285
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
286
287
-DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
288
-DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
289
-DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
290
-DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
291
+/* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */
292
+DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz)
293
+DO_FMLA(FMLS_zpzzz, fmls_zpzzz, ah_fmls_zpzzz)
294
+DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz, ah_fnmla_zpzzz)
295
+DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz, ah_fnmls_zpzzz)
296
297
#undef DO_FMLA
298
299
--
300
2.34.1
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
The negation step in the SVE FTSSEL insn mustn't negate a NaN when
2
FPCR.AH is set. Pass FPCR.AH to the helper via the SIMD data field
3
and use that to determine whether to do the negation.
2
4
3
target/arm/cpregs.h uses the CP_REG_ARCH_* definitions
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
from "target/arm/kvm-consts.h". Include it in order to
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
avoid when refactoring unrelated headers:
7
---
8
target/arm/tcg/sve_helper.c | 18 +++++++++++++++---
9
target/arm/tcg/translate-sve.c | 4 ++--
10
2 files changed, 17 insertions(+), 5 deletions(-)
6
11
7
target/arm/cpregs.h:191:18: error: use of undeclared identifier 'CP_REG_ARCH_MASK'
12
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
8
if ((kvmid & CP_REG_ARCH_MASK) == CP_REG_ARM64) {
9
^
10
11
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Message-id: 20240118200643.29037-8-philmd@linaro.org
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
---
16
target/arm/cpregs.h | 1 +
17
1 file changed, 1 insertion(+)
18
19
diff --git a/target/arm/cpregs.h b/target/arm/cpregs.h
20
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/cpregs.h
14
--- a/target/arm/tcg/sve_helper.c
22
+++ b/target/arm/cpregs.h
15
+++ b/target/arm/tcg/sve_helper.c
23
@@ -XXX,XX +XXX,XX @@
16
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fexpa_d)(void *vd, void *vn, uint32_t desc)
24
#define TARGET_ARM_CPREGS_H
17
void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc)
25
18
{
26
#include "hw/registerfields.h"
19
intptr_t i, opr_sz = simd_oprsz(desc) / 2;
27
+#include "target/arm/kvm-consts.h"
20
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1);
21
uint16_t *d = vd, *n = vn, *m = vm;
22
for (i = 0; i < opr_sz; i += 1) {
23
uint16_t nn = n[i];
24
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc)
25
if (mm & 1) {
26
nn = float16_one;
27
}
28
- d[i] = nn ^ (mm & 2) << 14;
29
+ if (mm & 2) {
30
+ nn = float16_maybe_ah_chs(nn, fpcr_ah);
31
+ }
32
+ d[i] = nn;
33
}
34
}
35
36
void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc)
37
{
38
intptr_t i, opr_sz = simd_oprsz(desc) / 4;
39
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1);
40
uint32_t *d = vd, *n = vn, *m = vm;
41
for (i = 0; i < opr_sz; i += 1) {
42
uint32_t nn = n[i];
43
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc)
44
if (mm & 1) {
45
nn = float32_one;
46
}
47
- d[i] = nn ^ (mm & 2) << 30;
48
+ if (mm & 2) {
49
+ nn = float32_maybe_ah_chs(nn, fpcr_ah);
50
+ }
51
+ d[i] = nn;
52
}
53
}
54
55
void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc)
56
{
57
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
58
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1);
59
uint64_t *d = vd, *n = vn, *m = vm;
60
for (i = 0; i < opr_sz; i += 1) {
61
uint64_t nn = n[i];
62
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc)
63
if (mm & 1) {
64
nn = float64_one;
65
}
66
- d[i] = nn ^ (mm & 2) << 62;
67
+ if (mm & 2) {
68
+ nn = float64_maybe_ah_chs(nn, fpcr_ah);
69
+ }
70
+ d[i] = nn;
71
}
72
}
73
74
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
75
index XXXXXXX..XXXXXXX 100644
76
--- a/target/arm/tcg/translate-sve.c
77
+++ b/target/arm/tcg/translate-sve.c
78
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2 * const fexpa_fns[4] = {
79
gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
80
};
81
TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
82
- fexpa_fns[a->esz], a->rd, a->rn, 0)
83
+ fexpa_fns[a->esz], a->rd, a->rn, s->fpcr_ah)
84
85
static gen_helper_gvec_3 * const ftssel_fns[4] = {
86
NULL, gen_helper_sve_ftssel_h,
87
gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
88
};
89
TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
90
- ftssel_fns[a->esz], a, 0)
91
+ ftssel_fns[a->esz], a, s->fpcr_ah)
28
92
29
/*
93
/*
30
* ARMCPRegInfo type field bits:
94
*** SVE Predicate Logical Operations Group
31
--
95
--
32
2.34.1
96
2.34.1
33
34
diff view generated by jsdifflib
1
From: Max Filippov <jcmvbkbc@gmail.com>
1
The negation step in the SVE FTMAD insn mustn't negate a NaN when
2
FPCR.AH is set. Pass FPCR.AH to the helper via the SIMD data field,
3
so we can select the correct behaviour.
2
4
3
r[id]tlb[01], [iw][id]tlb opcodes use TLB way index passed in a register
5
Because the operand is known to be negative, negating the operand
4
by the guest. The host uses 3 bits of the index for ITLB indexing and 4
6
is the same as taking the absolute value. Defer this to the muladd
5
bits for DTLB, but there's only 7 entries in the ITLB array and 10 in
7
operation via flags, so that it happens after NaN detection, which
6
the DTLB array, so a malicious guest may trigger out-of-bound access to
8
is correct for FPCR.AH.
7
these arrays.
8
9
9
Change split_tlb_entry_spec return type to bool to indicate whether TLB
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
way passed to it is valid. Change get_tlb_entry to return NULL in case
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
invalid TLB way is requested. Add assertion to xtensa_tlb_get_entry that
12
---
12
requested TLB way and entry indices are valid. Add checks to the
13
target/arm/tcg/sve_helper.c | 42 ++++++++++++++++++++++++++--------
13
[rwi]tlb helpers that requested TLB way is valid and return 0 or do
14
target/arm/tcg/translate-sve.c | 3 ++-
14
nothing when it's not.
15
2 files changed, 35 insertions(+), 10 deletions(-)
15
16
16
Cc: qemu-stable@nongnu.org
17
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
17
Fixes: b67ea0cd7441 ("target-xtensa: implement memory protection options")
18
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
19
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
20
Message-id: 20231215120307.545381-1-jcmvbkbc@gmail.com
21
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
22
---
23
target/xtensa/mmu_helper.c | 47 ++++++++++++++++++++++++++++----------
24
1 file changed, 35 insertions(+), 12 deletions(-)
25
26
diff --git a/target/xtensa/mmu_helper.c b/target/xtensa/mmu_helper.c
27
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
28
--- a/target/xtensa/mmu_helper.c
19
--- a/target/arm/tcg/sve_helper.c
29
+++ b/target/xtensa/mmu_helper.c
20
+++ b/target/arm/tcg/sve_helper.c
30
@@ -XXX,XX +XXX,XX @@ static void split_tlb_entry_spec_way(const CPUXtensaState *env, uint32_t v,
21
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm,
31
* Split TLB address into TLB way, entry index and VPN (with index).
22
0x3c00, 0xb800, 0x293a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
32
* See ISA, 4.6.5.5 - 4.6.5.8 for the TLB addressing format
23
};
33
*/
24
intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float16);
34
-static void split_tlb_entry_spec(CPUXtensaState *env, uint32_t v, bool dtlb,
25
- intptr_t x = simd_data(desc);
35
- uint32_t *vpn, uint32_t *wi, uint32_t *ei)
26
+ intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3);
36
+static bool split_tlb_entry_spec(CPUXtensaState *env, uint32_t v, bool dtlb,
27
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1);
37
+ uint32_t *vpn, uint32_t *wi, uint32_t *ei)
28
float16 *d = vd, *n = vn, *m = vm;
38
{
29
+
39
if (xtensa_option_enabled(env->config, XTENSA_OPTION_MMU)) {
30
for (i = 0; i < opr_sz; i++) {
40
*wi = v & (dtlb ? 0xf : 0x7);
31
float16 mm = m[i];
41
- split_tlb_entry_spec_way(env, v, dtlb, vpn, *wi, ei);
32
intptr_t xx = x;
42
+ if (*wi < (dtlb ? env->config->dtlb.nways : env->config->itlb.nways)) {
33
+ int flags = 0;
43
+ split_tlb_entry_spec_way(env, v, dtlb, vpn, *wi, ei);
34
+
44
+ return true;
35
if (float16_is_neg(mm)) {
45
+ } else {
36
- mm = float16_abs(mm);
46
+ return false;
37
+ if (fpcr_ah) {
47
+ }
38
+ flags = float_muladd_negate_product;
48
} else {
39
+ } else {
49
*vpn = v & REGION_PAGE_MASK;
40
+ mm = float16_abs(mm);
50
*wi = 0;
41
+ }
51
*ei = (v >> 29) & 0x7;
42
xx += 8;
52
+ return true;
43
}
44
- d[i] = float16_muladd(n[i], mm, coeff[xx], 0, s);
45
+ d[i] = float16_muladd(n[i], mm, coeff[xx], flags, s);
53
}
46
}
54
}
47
}
55
48
56
static xtensa_tlb_entry *xtensa_tlb_get_entry(CPUXtensaState *env, bool dtlb,
49
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm,
57
unsigned wi, unsigned ei)
50
0x37cd37cc, 0x00000000, 0x00000000, 0x00000000,
58
{
51
};
59
+ const xtensa_tlb *tlb = dtlb ? &env->config->dtlb : &env->config->itlb;
52
intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float32);
53
- intptr_t x = simd_data(desc);
54
+ intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3);
55
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1);
56
float32 *d = vd, *n = vn, *m = vm;
60
+
57
+
61
+ assert(wi < tlb->nways && ei < tlb->way_size[wi]);
58
for (i = 0; i < opr_sz; i++) {
62
return dtlb ?
59
float32 mm = m[i];
63
env->dtlb[wi] + ei :
60
intptr_t xx = x;
64
env->itlb[wi] + ei;
61
+ int flags = 0;
65
@@ -XXX,XX +XXX,XX @@ static xtensa_tlb_entry *get_tlb_entry(CPUXtensaState *env,
62
+
66
uint32_t wi;
63
if (float32_is_neg(mm)) {
67
uint32_t ei;
64
- mm = float32_abs(mm);
68
65
+ if (fpcr_ah) {
69
- split_tlb_entry_spec(env, v, dtlb, &vpn, &wi, &ei);
66
+ flags = float_muladd_negate_product;
70
- if (pwi) {
67
+ } else {
71
- *pwi = wi;
68
+ mm = float32_abs(mm);
72
+ if (split_tlb_entry_spec(env, v, dtlb, &vpn, &wi, &ei)) {
69
+ }
73
+ if (pwi) {
70
xx += 8;
74
+ *pwi = wi;
71
}
75
+ }
72
- d[i] = float32_muladd(n[i], mm, coeff[xx], 0, s);
76
+ return xtensa_tlb_get_entry(env, dtlb, wi, ei);
73
+ d[i] = float32_muladd(n[i], mm, coeff[xx], flags, s);
77
+ } else {
78
+ return NULL;
79
}
74
}
80
- return xtensa_tlb_get_entry(env, dtlb, wi, ei);
81
}
75
}
82
76
83
static void xtensa_tlb_set_entry_mmu(const CPUXtensaState *env,
77
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm,
84
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rtlb0)(CPUXtensaState *env, uint32_t v, uint32_t dtlb)
78
0x3e21ee96d2641b13ull, 0xbda8f76380fbb401ull,
85
if (xtensa_option_enabled(env->config, XTENSA_OPTION_MMU)) {
79
};
86
uint32_t wi;
80
intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float64);
87
const xtensa_tlb_entry *entry = get_tlb_entry(env, v, dtlb, &wi);
81
- intptr_t x = simd_data(desc);
88
- return (entry->vaddr & get_vpn_mask(env, dtlb, wi)) | entry->asid;
82
+ intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3);
83
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1);
84
float64 *d = vd, *n = vn, *m = vm;
89
+
85
+
90
+ if (entry) {
86
for (i = 0; i < opr_sz; i++) {
91
+ return (entry->vaddr & get_vpn_mask(env, dtlb, wi)) | entry->asid;
87
float64 mm = m[i];
92
+ } else {
88
intptr_t xx = x;
93
+ return 0;
89
+ int flags = 0;
94
+ }
90
+
95
} else {
91
if (float64_is_neg(mm)) {
96
return v & REGION_PAGE_MASK;
92
- mm = float64_abs(mm);
93
+ if (fpcr_ah) {
94
+ flags = float_muladd_negate_product;
95
+ } else {
96
+ mm = float64_abs(mm);
97
+ }
98
xx += 8;
99
}
100
- d[i] = float64_muladd(n[i], mm, coeff[xx], 0, s);
101
+ d[i] = float64_muladd(n[i], mm, coeff[xx], flags, s);
97
}
102
}
98
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rtlb0)(CPUXtensaState *env, uint32_t v, uint32_t dtlb)
99
uint32_t HELPER(rtlb1)(CPUXtensaState *env, uint32_t v, uint32_t dtlb)
100
{
101
const xtensa_tlb_entry *entry = get_tlb_entry(env, v, dtlb, NULL);
102
- return entry->paddr | entry->attr;
103
+
104
+ if (entry) {
105
+ return entry->paddr | entry->attr;
106
+ } else {
107
+ return 0;
108
+ }
109
}
103
}
110
104
111
void HELPER(itlb)(CPUXtensaState *env, uint32_t v, uint32_t dtlb)
105
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
112
@@ -XXX,XX +XXX,XX @@ void HELPER(itlb)(CPUXtensaState *env, uint32_t v, uint32_t dtlb)
106
index XXXXXXX..XXXXXXX 100644
113
if (xtensa_option_enabled(env->config, XTENSA_OPTION_MMU)) {
107
--- a/target/arm/tcg/translate-sve.c
114
uint32_t wi;
108
+++ b/target/arm/tcg/translate-sve.c
115
xtensa_tlb_entry *entry = get_tlb_entry(env, v, dtlb, &wi);
109
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
116
- if (entry->variable && entry->asid) {
110
gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
117
+ if (entry && entry->variable && entry->asid) {
111
};
118
tlb_flush_page(env_cpu(env), entry->vaddr);
112
TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
119
entry->asid = 0;
113
- ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
120
}
114
+ ftmad_fns[a->esz], a->rd, a->rn, a->rm,
121
@@ -XXX,XX +XXX,XX @@ void HELPER(wtlb)(CPUXtensaState *env, uint32_t p, uint32_t v, uint32_t dtlb)
115
+ a->imm | (s->fpcr_ah << 3),
122
uint32_t vpn;
116
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
123
uint32_t wi;
117
124
uint32_t ei;
118
/*
125
- split_tlb_entry_spec(env, v, dtlb, &vpn, &wi, &ei);
126
- xtensa_tlb_set_entry(env, dtlb, wi, ei, vpn, p);
127
+ if (split_tlb_entry_spec(env, v, dtlb, &vpn, &wi, &ei)) {
128
+ xtensa_tlb_set_entry(env, dtlb, wi, ei, vpn, p);
129
+ }
130
}
131
132
/*!
133
--
119
--
134
2.34.1
120
2.34.1
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
"target/arm/cpu.h" is target specific, any file including it
3
The negation step in FCMLA mustn't negate a NaN when FPCR.AH
4
becomes target specific too, thus this is the same for any file
4
is set. Handle this by passing FPCR.AH to the helper via the
5
including "hw/misc/xlnx-versal-crl.h".
5
SIMD data field, and use this to select whether to do the
6
negation via XOR or via the muladd negate_product flag.
6
7
7
"hw/misc/xlnx-versal-crl.h" doesn't require any target specific
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
definition however, only the target-agnostic QOM definitions
9
Message-id: 20250129013857.135256-26-richard.henderson@linaro.org
9
from "target/arm/cpu-qom.h". Include the latter header to avoid
10
[PMM: Expanded commit message]
10
tainting unnecessary objects as target-specific.
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
12
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
Message-id: 20240118200643.29037-14-philmd@linaro.org
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
---
13
---
17
include/hw/misc/xlnx-versal-crl.h | 2 +-
14
target/arm/tcg/translate-a64.c | 2 +-
18
hw/misc/xlnx-versal-crl.c | 1 +
15
target/arm/tcg/vec_helper.c | 66 ++++++++++++++++++++--------------
19
2 files changed, 2 insertions(+), 1 deletion(-)
16
2 files changed, 40 insertions(+), 28 deletions(-)
20
17
21
diff --git a/include/hw/misc/xlnx-versal-crl.h b/include/hw/misc/xlnx-versal-crl.h
18
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
22
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
23
--- a/include/hw/misc/xlnx-versal-crl.h
20
--- a/target/arm/tcg/translate-a64.c
24
+++ b/include/hw/misc/xlnx-versal-crl.h
21
+++ b/target/arm/tcg/translate-a64.c
25
@@ -XXX,XX +XXX,XX @@
22
@@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
26
23
27
#include "hw/sysbus.h"
24
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
28
#include "hw/register.h"
25
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
29
-#include "target/arm/cpu.h"
26
- a->rot, fn[a->esz]);
30
+#include "target/arm/cpu-qom.h"
27
+ a->rot | (s->fpcr_ah << 2), fn[a->esz]);
31
28
return true;
32
#define TYPE_XLNX_VERSAL_CRL "xlnx-versal-crl"
29
}
33
OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalCRL, XLNX_VERSAL_CRL)
30
34
diff --git a/hw/misc/xlnx-versal-crl.c b/hw/misc/xlnx-versal-crl.c
31
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
35
index XXXXXXX..XXXXXXX 100644
32
index XXXXXXX..XXXXXXX 100644
36
--- a/hw/misc/xlnx-versal-crl.c
33
--- a/target/arm/tcg/vec_helper.c
37
+++ b/hw/misc/xlnx-versal-crl.c
34
+++ b/target/arm/tcg/vec_helper.c
38
@@ -XXX,XX +XXX,XX @@
35
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlah)(void *vd, void *vn, void *vm, void *va,
39
#include "hw/register.h"
36
uintptr_t opr_sz = simd_oprsz(desc);
40
#include "hw/resettable.h"
37
float16 *d = vd, *n = vn, *m = vm, *a = va;
41
38
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
42
+#include "target/arm/cpu.h"
39
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
43
#include "target/arm/arm-powerctl.h"
40
- uint32_t neg_real = flip ^ neg_imag;
44
#include "target/arm/multiprocessing.h"
41
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
45
#include "hw/misc/xlnx-versal-crl.h"
42
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
43
+ uint32_t negf_real = flip ^ negf_imag;
44
+ float16 negx_imag, negx_real;
45
uintptr_t i;
46
47
- /* Shift boolean to the sign bit so we can xor to negate. */
48
- neg_real <<= 15;
49
- neg_imag <<= 15;
50
+ /* With AH=0, use negx; with AH=1 use negf. */
51
+ negx_real = (negf_real & ~fpcr_ah) << 15;
52
+ negx_imag = (negf_imag & ~fpcr_ah) << 15;
53
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
54
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
55
56
for (i = 0; i < opr_sz / 2; i += 2) {
57
float16 e2 = n[H2(i + flip)];
58
- float16 e1 = m[H2(i + flip)] ^ neg_real;
59
+ float16 e1 = m[H2(i + flip)] ^ negx_real;
60
float16 e4 = e2;
61
- float16 e3 = m[H2(i + 1 - flip)] ^ neg_imag;
62
+ float16 e3 = m[H2(i + 1 - flip)] ^ negx_imag;
63
64
- d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], 0, fpst);
65
- d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], 0, fpst);
66
+ d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], negf_real, fpst);
67
+ d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], negf_imag, fpst);
68
}
69
clear_tail(d, opr_sz, simd_maxsz(desc));
70
}
71
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlas)(void *vd, void *vn, void *vm, void *va,
72
uintptr_t opr_sz = simd_oprsz(desc);
73
float32 *d = vd, *n = vn, *m = vm, *a = va;
74
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
75
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
76
- uint32_t neg_real = flip ^ neg_imag;
77
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
78
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
79
+ uint32_t negf_real = flip ^ negf_imag;
80
+ float32 negx_imag, negx_real;
81
uintptr_t i;
82
83
- /* Shift boolean to the sign bit so we can xor to negate. */
84
- neg_real <<= 31;
85
- neg_imag <<= 31;
86
+ /* With AH=0, use negx; with AH=1 use negf. */
87
+ negx_real = (negf_real & ~fpcr_ah) << 31;
88
+ negx_imag = (negf_imag & ~fpcr_ah) << 31;
89
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
90
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
91
92
for (i = 0; i < opr_sz / 4; i += 2) {
93
float32 e2 = n[H4(i + flip)];
94
- float32 e1 = m[H4(i + flip)] ^ neg_real;
95
+ float32 e1 = m[H4(i + flip)] ^ negx_real;
96
float32 e4 = e2;
97
- float32 e3 = m[H4(i + 1 - flip)] ^ neg_imag;
98
+ float32 e3 = m[H4(i + 1 - flip)] ^ negx_imag;
99
100
- d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], 0, fpst);
101
- d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], 0, fpst);
102
+ d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], negf_real, fpst);
103
+ d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], negf_imag, fpst);
104
}
105
clear_tail(d, opr_sz, simd_maxsz(desc));
106
}
107
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm, void *va,
108
uintptr_t opr_sz = simd_oprsz(desc);
109
float64 *d = vd, *n = vn, *m = vm, *a = va;
110
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
111
- uint64_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
112
- uint64_t neg_real = flip ^ neg_imag;
113
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
114
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
115
+ uint32_t negf_real = flip ^ negf_imag;
116
+ float64 negx_real, negx_imag;
117
uintptr_t i;
118
119
- /* Shift boolean to the sign bit so we can xor to negate. */
120
- neg_real <<= 63;
121
- neg_imag <<= 63;
122
+ /* With AH=0, use negx; with AH=1 use negf. */
123
+ negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63;
124
+ negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63;
125
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
126
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
127
128
for (i = 0; i < opr_sz / 8; i += 2) {
129
float64 e2 = n[i + flip];
130
- float64 e1 = m[i + flip] ^ neg_real;
131
+ float64 e1 = m[i + flip] ^ negx_real;
132
float64 e4 = e2;
133
- float64 e3 = m[i + 1 - flip] ^ neg_imag;
134
+ float64 e3 = m[i + 1 - flip] ^ negx_imag;
135
136
- d[i] = float64_muladd(e2, e1, a[i], 0, fpst);
137
- d[i + 1] = float64_muladd(e4, e3, a[i + 1], 0, fpst);
138
+ d[i] = float64_muladd(e2, e1, a[i], negf_real, fpst);
139
+ d[i + 1] = float64_muladd(e4, e3, a[i + 1], negf_imag, fpst);
140
}
141
clear_tail(d, opr_sz, simd_maxsz(desc));
142
}
46
--
143
--
47
2.34.1
144
2.34.1
48
49
diff view generated by jsdifflib
1
From: Gustavo Romero <gustavo.romero@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Add a note on CPU features that are off by default in `virt` machines.
3
The negation step in FCMLA by index mustn't negate a NaN when
4
Some CPU features will remain off even if a CPU-capable CPU (e.g.,
4
FPCR.AH is set. Use the same approach as vector FCMLA of
5
`-cpu max`) is selected because they require support in both the CPU
5
passing in FPCR.AH and using it to select whether to negate
6
itself and in the wider system. Therefore, the user, besides selecting a
6
by XOR or by the muladd negate_product flag.
7
CPU that supports such features, must also turn on the feature using a
8
machine option.
9
7
10
Signed-off-by: Gustavo Romero <gustavo.romero@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20240122211215.95073-1-gustavo.romero@linaro.org
9
Message-id: 20250129013857.135256-27-richard.henderson@linaro.org
10
[PMM: Expanded commit message]
12
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
---
13
---
15
docs/system/arm/virt.rst | 13 +++++++++++++
14
target/arm/tcg/translate-a64.c | 2 +-
16
1 file changed, 13 insertions(+)
15
target/arm/tcg/vec_helper.c | 44 ++++++++++++++++++++--------------
16
2 files changed, 27 insertions(+), 19 deletions(-)
17
17
18
diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
18
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
19
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
20
--- a/docs/system/arm/virt.rst
20
--- a/target/arm/tcg/translate-a64.c
21
+++ b/docs/system/arm/virt.rst
21
+++ b/target/arm/tcg/translate-a64.c
22
@@ -XXX,XX +XXX,XX @@ Supported guest CPU types:
22
@@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
23
Note that the default is ``cortex-a15``, so for an AArch64 guest you must
23
if (fp_access_check(s)) {
24
specify a CPU type.
24
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
25
25
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
26
+Also, please note that passing ``max`` CPU (i.e. ``-cpu max``) won't
26
- (a->idx << 2) | a->rot, fn);
27
+enable all the CPU features for a given ``virt`` machine. Where a CPU
27
+ (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn);
28
+architectural feature requires support in both the CPU itself and in the
28
}
29
+wider system (e.g. the MTE feature), it may not be enabled by default,
29
return true;
30
+but instead requires a machine option to enable it.
30
}
31
+
31
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
32
+For example, MTE support must be enabled with ``-machine virt,mte=on``,
32
index XXXXXXX..XXXXXXX 100644
33
+as well as by selecting an MTE-capable CPU (e.g., ``max``) with the
33
--- a/target/arm/tcg/vec_helper.c
34
+``-cpu`` option.
34
+++ b/target/arm/tcg/vec_helper.c
35
+
35
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, void *va,
36
+See the machine-specific options below, or check them for a given machine
36
uintptr_t opr_sz = simd_oprsz(desc);
37
+by passing the ``help`` suboption, like: ``-machine virt-9.0,help``.
37
float16 *d = vd, *n = vn, *m = vm, *a = va;
38
+
38
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
39
Graphics output is available, but unlike the x86 PC machine types
39
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
40
there is no default display device enabled: you should select one from
40
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
41
the Display devices section of "-device help". The recommended option
41
intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
42
- uint32_t neg_real = flip ^ neg_imag;
43
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1);
44
+ uint32_t negf_real = flip ^ negf_imag;
45
intptr_t elements = opr_sz / sizeof(float16);
46
intptr_t eltspersegment = MIN(16 / sizeof(float16), elements);
47
+ float16 negx_imag, negx_real;
48
intptr_t i, j;
49
50
- /* Shift boolean to the sign bit so we can xor to negate. */
51
- neg_real <<= 15;
52
- neg_imag <<= 15;
53
+ /* With AH=0, use negx; with AH=1 use negf. */
54
+ negx_real = (negf_real & ~fpcr_ah) << 15;
55
+ negx_imag = (negf_imag & ~fpcr_ah) << 15;
56
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
57
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
58
59
for (i = 0; i < elements; i += eltspersegment) {
60
float16 mr = m[H2(i + 2 * index + 0)];
61
float16 mi = m[H2(i + 2 * index + 1)];
62
- float16 e1 = neg_real ^ (flip ? mi : mr);
63
- float16 e3 = neg_imag ^ (flip ? mr : mi);
64
+ float16 e1 = negx_real ^ (flip ? mi : mr);
65
+ float16 e3 = negx_imag ^ (flip ? mr : mi);
66
67
for (j = i; j < i + eltspersegment; j += 2) {
68
float16 e2 = n[H2(j + flip)];
69
float16 e4 = e2;
70
71
- d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], 0, fpst);
72
- d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], 0, fpst);
73
+ d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], negf_real, fpst);
74
+ d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], negf_imag, fpst);
75
}
76
}
77
clear_tail(d, opr_sz, simd_maxsz(desc));
78
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, void *va,
79
uintptr_t opr_sz = simd_oprsz(desc);
80
float32 *d = vd, *n = vn, *m = vm, *a = va;
81
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
82
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
83
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
84
intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
85
- uint32_t neg_real = flip ^ neg_imag;
86
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1);
87
+ uint32_t negf_real = flip ^ negf_imag;
88
intptr_t elements = opr_sz / sizeof(float32);
89
intptr_t eltspersegment = MIN(16 / sizeof(float32), elements);
90
+ float32 negx_imag, negx_real;
91
intptr_t i, j;
92
93
- /* Shift boolean to the sign bit so we can xor to negate. */
94
- neg_real <<= 31;
95
- neg_imag <<= 31;
96
+ /* With AH=0, use negx; with AH=1 use negf. */
97
+ negx_real = (negf_real & ~fpcr_ah) << 31;
98
+ negx_imag = (negf_imag & ~fpcr_ah) << 31;
99
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
100
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
101
102
for (i = 0; i < elements; i += eltspersegment) {
103
float32 mr = m[H4(i + 2 * index + 0)];
104
float32 mi = m[H4(i + 2 * index + 1)];
105
- float32 e1 = neg_real ^ (flip ? mi : mr);
106
- float32 e3 = neg_imag ^ (flip ? mr : mi);
107
+ float32 e1 = negx_real ^ (flip ? mi : mr);
108
+ float32 e3 = negx_imag ^ (flip ? mr : mi);
109
110
for (j = i; j < i + eltspersegment; j += 2) {
111
float32 e2 = n[H4(j + flip)];
112
float32 e4 = e2;
113
114
- d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], 0, fpst);
115
- d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], 0, fpst);
116
+ d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], negf_real, fpst);
117
+ d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], negf_imag, fpst);
118
}
119
}
120
clear_tail(d, opr_sz, simd_maxsz(desc));
42
--
121
--
43
2.34.1
122
2.34.1
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
hw/cpu/a9mpcore.c doesn't require "cpu.h" anymore.
3
The negation step in SVE FCMLA mustn't negate a NaN when FPCR.AH is
4
By removing it, the unit become target agnostic:
4
set. Use the same approach as we did for A64 FCMLA of passing in
5
we can build it once. Update meson.
5
FPCR.AH and using it to select whether to negate by XOR or by the
6
muladd negate_product flag.
6
7
7
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20250129013857.135256-28-richard.henderson@linaro.org
9
Message-id: 20240118200643.29037-13-philmd@linaro.org
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
---
12
hw/cpu/a9mpcore.c | 2 +-
13
target/arm/tcg/sve_helper.c | 69 +++++++++++++++++++++-------------
13
hw/cpu/meson.build | 2 +-
14
target/arm/tcg/translate-sve.c | 2 +-
14
2 files changed, 2 insertions(+), 2 deletions(-)
15
2 files changed, 43 insertions(+), 28 deletions(-)
15
16
16
diff --git a/hw/cpu/a9mpcore.c b/hw/cpu/a9mpcore.c
17
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
17
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/cpu/a9mpcore.c
19
--- a/target/arm/tcg/sve_helper.c
19
+++ b/hw/cpu/a9mpcore.c
20
+++ b/target/arm/tcg/sve_helper.c
20
@@ -XXX,XX +XXX,XX @@
21
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
21
#include "hw/irq.h"
22
void *vg, float_status *status, uint32_t desc)
22
#include "hw/qdev-properties.h"
23
{
23
#include "hw/core/cpu.h"
24
intptr_t j, i = simd_oprsz(desc);
24
-#include "cpu.h"
25
- unsigned rot = simd_data(desc);
25
+#include "target/arm/cpu-qom.h"
26
- bool flip = rot & 1;
26
27
- float16 neg_imag, neg_real;
27
#define A9_GIC_NUM_PRIORITY_BITS 5
28
+ bool flip = extract32(desc, SIMD_DATA_SHIFT, 1);
28
29
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
29
diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build
30
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
31
+ uint32_t negf_real = flip ^ negf_imag;
32
+ float16 negx_imag, negx_real;
33
uint64_t *g = vg;
34
35
- neg_imag = float16_set_sign(0, (rot & 2) != 0);
36
- neg_real = float16_set_sign(0, rot == 1 || rot == 2);
37
+ /* With AH=0, use negx; with AH=1 use negf. */
38
+ negx_real = (negf_real & ~fpcr_ah) << 15;
39
+ negx_imag = (negf_imag & ~fpcr_ah) << 15;
40
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
41
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
42
43
do {
44
uint64_t pg = g[(i - 1) >> 6];
45
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
46
mi = *(float16 *)(vm + H1_2(j));
47
48
e2 = (flip ? ni : nr);
49
- e1 = (flip ? mi : mr) ^ neg_real;
50
+ e1 = (flip ? mi : mr) ^ negx_real;
51
e4 = e2;
52
- e3 = (flip ? mr : mi) ^ neg_imag;
53
+ e3 = (flip ? mr : mi) ^ negx_imag;
54
55
if (likely((pg >> (i & 63)) & 1)) {
56
d = *(float16 *)(va + H1_2(i));
57
- d = float16_muladd(e2, e1, d, 0, status);
58
+ d = float16_muladd(e2, e1, d, negf_real, status);
59
*(float16 *)(vd + H1_2(i)) = d;
60
}
61
if (likely((pg >> (j & 63)) & 1)) {
62
d = *(float16 *)(va + H1_2(j));
63
- d = float16_muladd(e4, e3, d, 0, status);
64
+ d = float16_muladd(e4, e3, d, negf_imag, status);
65
*(float16 *)(vd + H1_2(j)) = d;
66
}
67
} while (i & 63);
68
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
69
void *vg, float_status *status, uint32_t desc)
70
{
71
intptr_t j, i = simd_oprsz(desc);
72
- unsigned rot = simd_data(desc);
73
- bool flip = rot & 1;
74
- float32 neg_imag, neg_real;
75
+ bool flip = extract32(desc, SIMD_DATA_SHIFT, 1);
76
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
77
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
78
+ uint32_t negf_real = flip ^ negf_imag;
79
+ float32 negx_imag, negx_real;
80
uint64_t *g = vg;
81
82
- neg_imag = float32_set_sign(0, (rot & 2) != 0);
83
- neg_real = float32_set_sign(0, rot == 1 || rot == 2);
84
+ /* With AH=0, use negx; with AH=1 use negf. */
85
+ negx_real = (negf_real & ~fpcr_ah) << 31;
86
+ negx_imag = (negf_imag & ~fpcr_ah) << 31;
87
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
88
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
89
90
do {
91
uint64_t pg = g[(i - 1) >> 6];
92
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
93
mi = *(float32 *)(vm + H1_2(j));
94
95
e2 = (flip ? ni : nr);
96
- e1 = (flip ? mi : mr) ^ neg_real;
97
+ e1 = (flip ? mi : mr) ^ negx_real;
98
e4 = e2;
99
- e3 = (flip ? mr : mi) ^ neg_imag;
100
+ e3 = (flip ? mr : mi) ^ negx_imag;
101
102
if (likely((pg >> (i & 63)) & 1)) {
103
d = *(float32 *)(va + H1_2(i));
104
- d = float32_muladd(e2, e1, d, 0, status);
105
+ d = float32_muladd(e2, e1, d, negf_real, status);
106
*(float32 *)(vd + H1_2(i)) = d;
107
}
108
if (likely((pg >> (j & 63)) & 1)) {
109
d = *(float32 *)(va + H1_2(j));
110
- d = float32_muladd(e4, e3, d, 0, status);
111
+ d = float32_muladd(e4, e3, d, negf_imag, status);
112
*(float32 *)(vd + H1_2(j)) = d;
113
}
114
} while (i & 63);
115
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
116
void *vg, float_status *status, uint32_t desc)
117
{
118
intptr_t j, i = simd_oprsz(desc);
119
- unsigned rot = simd_data(desc);
120
- bool flip = rot & 1;
121
- float64 neg_imag, neg_real;
122
+ bool flip = extract32(desc, SIMD_DATA_SHIFT, 1);
123
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
124
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
125
+ uint32_t negf_real = flip ^ negf_imag;
126
+ float64 negx_imag, negx_real;
127
uint64_t *g = vg;
128
129
- neg_imag = float64_set_sign(0, (rot & 2) != 0);
130
- neg_real = float64_set_sign(0, rot == 1 || rot == 2);
131
+ /* With AH=0, use negx; with AH=1 use negf. */
132
+ negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63;
133
+ negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63;
134
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
135
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
136
137
do {
138
uint64_t pg = g[(i - 1) >> 6];
139
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
140
mi = *(float64 *)(vm + H1_2(j));
141
142
e2 = (flip ? ni : nr);
143
- e1 = (flip ? mi : mr) ^ neg_real;
144
+ e1 = (flip ? mi : mr) ^ negx_real;
145
e4 = e2;
146
- e3 = (flip ? mr : mi) ^ neg_imag;
147
+ e3 = (flip ? mr : mi) ^ negx_imag;
148
149
if (likely((pg >> (i & 63)) & 1)) {
150
d = *(float64 *)(va + H1_2(i));
151
- d = float64_muladd(e2, e1, d, 0, status);
152
+ d = float64_muladd(e2, e1, d, negf_real, status);
153
*(float64 *)(vd + H1_2(i)) = d;
154
}
155
if (likely((pg >> (j & 63)) & 1)) {
156
d = *(float64 *)(va + H1_2(j));
157
- d = float64_muladd(e4, e3, d, 0, status);
158
+ d = float64_muladd(e4, e3, d, negf_imag, status);
159
*(float64 *)(vd + H1_2(j)) = d;
160
}
161
} while (i & 63);
162
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
30
index XXXXXXX..XXXXXXX 100644
163
index XXXXXXX..XXXXXXX 100644
31
--- a/hw/cpu/meson.build
164
--- a/target/arm/tcg/translate-sve.c
32
+++ b/hw/cpu/meson.build
165
+++ b/target/arm/tcg/translate-sve.c
33
@@ -XXX,XX +XXX,XX @@ system_ss.add(files('core.c', 'cluster.c'))
166
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = {
34
167
gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d,
35
system_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c'))
168
};
36
system_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c'))
169
TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz],
37
-specific_ss.add(when: 'CONFIG_A9MPCORE', if_true: files('a9mpcore.c'))
170
- a->rd, a->rn, a->rm, a->ra, a->pg, a->rot,
38
+system_ss.add(when: 'CONFIG_A9MPCORE', if_true: files('a9mpcore.c'))
171
+ a->rd, a->rn, a->rm, a->ra, a->pg, a->rot | (s->fpcr_ah << 2),
39
specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c'))
172
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
173
174
static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = {
40
--
175
--
41
2.34.1
176
2.34.1
42
43
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Various files in hw/arm/ don't require "cpu.h" anymore.
3
Handle FPCR.AH's requirement to not negate the sign of a NaN
4
Except virt-acpi-build.c, all of them don't require any
4
in FMLSL by element and vector, using the usual trick of
5
ARM specific knowledge anymore and can be build once as
5
negating by XOR when AH=0 and by muladd flags when AH=1.
6
target agnostic units. Update meson accordingly.
7
6
8
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Since we have the CPUARMState* in the helper anyway, we can
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
look directly at env->vfp.fpcr and don't need toa pass in the
10
Message-id: 20240118200643.29037-21-philmd@linaro.org
9
FPCR.AH value via the SIMD data word.
10
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20250129013857.135256-31-richard.henderson@linaro.org
13
[PMM: commit message tweaked]
14
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
16
---
13
hw/arm/collie.c | 1 -
17
target/arm/tcg/vec_helper.c | 71 ++++++++++++++++++++++++-------------
14
hw/arm/gumstix.c | 1 -
18
1 file changed, 46 insertions(+), 25 deletions(-)
15
hw/arm/integratorcp.c | 1 -
16
hw/arm/mainstone.c | 1 -
17
hw/arm/musicpal.c | 1 -
18
hw/arm/omap2.c | 1 -
19
hw/arm/omap_sx1.c | 1 -
20
hw/arm/palm.c | 1 -
21
hw/arm/spitz.c | 1 -
22
hw/arm/strongarm.c | 1 -
23
hw/arm/versatilepb.c | 1 -
24
hw/arm/vexpress.c | 1 -
25
hw/arm/virt-acpi-build.c | 1 -
26
hw/arm/xilinx_zynq.c | 1 -
27
hw/arm/xlnx-versal-virt.c | 1 -
28
hw/arm/z2.c | 1 -
29
hw/arm/meson.build | 23 ++++++++++++-----------
30
17 files changed, 12 insertions(+), 27 deletions(-)
31
19
32
diff --git a/hw/arm/collie.c b/hw/arm/collie.c
20
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
33
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
34
--- a/hw/arm/collie.c
22
--- a/target/arm/tcg/vec_helper.c
35
+++ b/hw/arm/collie.c
23
+++ b/target/arm/tcg/vec_helper.c
36
@@ -XXX,XX +XXX,XX @@
24
@@ -XXX,XX +XXX,XX @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2)
37
#include "hw/arm/boot.h"
38
#include "hw/block/flash.h"
39
#include "exec/address-spaces.h"
40
-#include "cpu.h"
41
#include "qom/object.h"
42
#include "qemu/error-report.h"
43
44
diff --git a/hw/arm/gumstix.c b/hw/arm/gumstix.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/hw/arm/gumstix.c
47
+++ b/hw/arm/gumstix.c
48
@@ -XXX,XX +XXX,XX @@
49
#include "hw/boards.h"
50
#include "exec/address-spaces.h"
51
#include "sysemu/qtest.h"
52
-#include "cpu.h"
53
54
#define CONNEX_FLASH_SIZE (16 * MiB)
55
#define CONNEX_RAM_SIZE (64 * MiB)
56
diff --git a/hw/arm/integratorcp.c b/hw/arm/integratorcp.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/hw/arm/integratorcp.c
59
+++ b/hw/arm/integratorcp.c
60
@@ -XXX,XX +XXX,XX @@
61
62
#include "qemu/osdep.h"
63
#include "qapi/error.h"
64
-#include "cpu.h"
65
#include "hw/sysbus.h"
66
#include "migration/vmstate.h"
67
#include "hw/boards.h"
68
diff --git a/hw/arm/mainstone.c b/hw/arm/mainstone.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/hw/arm/mainstone.c
71
+++ b/hw/arm/mainstone.c
72
@@ -XXX,XX +XXX,XX @@
73
#include "hw/block/flash.h"
74
#include "hw/sysbus.h"
75
#include "exec/address-spaces.h"
76
-#include "cpu.h"
77
78
/* Device addresses */
79
#define MST_FPGA_PHYS    0x08000000
80
diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c
81
index XXXXXXX..XXXXXXX 100644
82
--- a/hw/arm/musicpal.c
83
+++ b/hw/arm/musicpal.c
84
@@ -XXX,XX +XXX,XX @@
85
#include "qemu/osdep.h"
86
#include "qemu/units.h"
87
#include "qapi/error.h"
88
-#include "cpu.h"
89
#include "hw/sysbus.h"
90
#include "migration/vmstate.h"
91
#include "hw/arm/boot.h"
92
diff --git a/hw/arm/omap2.c b/hw/arm/omap2.c
93
index XXXXXXX..XXXXXXX 100644
94
--- a/hw/arm/omap2.c
95
+++ b/hw/arm/omap2.c
96
@@ -XXX,XX +XXX,XX @@
97
#include "qemu/osdep.h"
98
#include "qemu/error-report.h"
99
#include "qapi/error.h"
100
-#include "cpu.h"
101
#include "exec/address-spaces.h"
102
#include "sysemu/blockdev.h"
103
#include "sysemu/qtest.h"
104
diff --git a/hw/arm/omap_sx1.c b/hw/arm/omap_sx1.c
105
index XXXXXXX..XXXXXXX 100644
106
--- a/hw/arm/omap_sx1.c
107
+++ b/hw/arm/omap_sx1.c
108
@@ -XXX,XX +XXX,XX @@
109
#include "hw/block/flash.h"
110
#include "sysemu/qtest.h"
111
#include "exec/address-spaces.h"
112
-#include "cpu.h"
113
#include "qemu/cutils.h"
114
#include "qemu/error-report.h"
115
116
diff --git a/hw/arm/palm.c b/hw/arm/palm.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/hw/arm/palm.c
119
+++ b/hw/arm/palm.c
120
@@ -XXX,XX +XXX,XX @@
121
#include "hw/input/tsc2xxx.h"
122
#include "hw/irq.h"
123
#include "hw/loader.h"
124
-#include "cpu.h"
125
#include "qemu/cutils.h"
126
#include "qom/object.h"
127
#include "qemu/error-report.h"
128
diff --git a/hw/arm/spitz.c b/hw/arm/spitz.c
129
index XXXXXXX..XXXXXXX 100644
130
--- a/hw/arm/spitz.c
131
+++ b/hw/arm/spitz.c
132
@@ -XXX,XX +XXX,XX @@
133
#include "hw/adc/max111x.h"
134
#include "migration/vmstate.h"
135
#include "exec/address-spaces.h"
136
-#include "cpu.h"
137
#include "qom/object.h"
138
#include "audio/audio.h"
139
140
diff --git a/hw/arm/strongarm.c b/hw/arm/strongarm.c
141
index XXXXXXX..XXXXXXX 100644
142
--- a/hw/arm/strongarm.c
143
+++ b/hw/arm/strongarm.c
144
@@ -XXX,XX +XXX,XX @@
145
*/
25
*/
146
26
147
#include "qemu/osdep.h"
27
static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst,
148
-#include "cpu.h"
28
- uint32_t desc, bool fz16)
149
#include "hw/irq.h"
29
+ uint64_t negx, int negf, uint32_t desc, bool fz16)
150
#include "hw/qdev-properties.h"
30
{
151
#include "hw/qdev-properties-system.h"
31
intptr_t i, oprsz = simd_oprsz(desc);
152
diff --git a/hw/arm/versatilepb.c b/hw/arm/versatilepb.c
32
- int is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
153
index XXXXXXX..XXXXXXX 100644
33
int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
154
--- a/hw/arm/versatilepb.c
34
int is_q = oprsz == 16;
155
+++ b/hw/arm/versatilepb.c
35
uint64_t n_4, m_4;
156
@@ -XXX,XX +XXX,XX @@
36
157
37
- /* Pre-load all of the f16 data, avoiding overlap issues. */
158
#include "qemu/osdep.h"
38
- n_4 = load4_f16(vn, is_q, is_2);
159
#include "qapi/error.h"
39
+ /*
160
-#include "cpu.h"
40
+ * Pre-load all of the f16 data, avoiding overlap issues.
161
#include "hw/sysbus.h"
41
+ * Negate all inputs for AH=0 FMLSL at once.
162
#include "migration/vmstate.h"
42
+ */
163
#include "hw/arm/boot.h"
43
+ n_4 = load4_f16(vn, is_q, is_2) ^ negx;
164
diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c
44
m_4 = load4_f16(vm, is_q, is_2);
165
index XXXXXXX..XXXXXXX 100644
45
166
--- a/hw/arm/vexpress.c
46
- /* Negate all inputs for FMLSL at once. */
167
+++ b/hw/arm/vexpress.c
47
- if (is_s) {
168
@@ -XXX,XX +XXX,XX @@
48
- n_4 ^= 0x8000800080008000ull;
169
#include "qemu/osdep.h"
49
- }
170
#include "qapi/error.h"
50
-
171
#include "qemu/datadir.h"
51
for (i = 0; i < oprsz / 4; i++) {
172
-#include "cpu.h"
52
float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16);
173
#include "hw/sysbus.h"
53
float32 m_1 = float16_to_float32_by_bits(m_4 >> (i * 16), fz16);
174
#include "hw/arm/boot.h"
54
- d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst);
175
#include "hw/arm/primecell.h"
55
+ d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst);
176
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
56
}
177
index XXXXXXX..XXXXXXX 100644
57
clear_tail(d, oprsz, simd_maxsz(desc));
178
--- a/hw/arm/virt-acpi-build.c
58
}
179
+++ b/hw/arm/virt-acpi-build.c
59
@@ -XXX,XX +XXX,XX @@ static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst,
180
@@ -XXX,XX +XXX,XX @@
60
void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
181
#include "qemu/error-report.h"
61
CPUARMState *env, uint32_t desc)
182
#include "trace.h"
62
{
183
#include "hw/core/cpu.h"
63
- do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, desc,
184
-#include "target/arm/cpu.h"
64
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
185
#include "hw/acpi/acpi-defs.h"
65
+ uint64_t negx = is_s ? 0x8000800080008000ull : 0;
186
#include "hw/acpi/acpi.h"
66
+
187
#include "hw/nvram/fw_cfg_acpi.h"
67
+ do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc,
188
diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c
68
get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32));
189
index XXXXXXX..XXXXXXX 100644
69
}
190
--- a/hw/arm/xilinx_zynq.c
70
191
+++ b/hw/arm/xilinx_zynq.c
71
void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
192
@@ -XXX,XX +XXX,XX @@
72
CPUARMState *env, uint32_t desc)
193
#include "qemu/osdep.h"
73
{
194
#include "qemu/units.h"
74
- do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, desc,
195
#include "qapi/error.h"
75
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
196
-#include "cpu.h"
76
+ uint64_t negx = 0;
197
#include "hw/sysbus.h"
77
+ int negf = 0;
198
#include "hw/arm/boot.h"
78
+
199
#include "net/net.h"
79
+ if (is_s) {
200
diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c
80
+ if (env->vfp.fpcr & FPCR_AH) {
201
index XXXXXXX..XXXXXXX 100644
81
+ negf = float_muladd_negate_product;
202
--- a/hw/arm/xlnx-versal-virt.c
82
+ } else {
203
+++ b/hw/arm/xlnx-versal-virt.c
83
+ negx = 0x8000800080008000ull;
204
@@ -XXX,XX +XXX,XX @@
84
+ }
205
#include "hw/boards.h"
85
+ }
206
#include "hw/sysbus.h"
86
+ do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc,
207
#include "hw/arm/fdt.h"
87
get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64));
208
-#include "cpu.h"
88
}
209
#include "hw/qdev-properties.h"
89
210
#include "hw/arm/xlnx-versal.h"
90
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
211
#include "hw/arm/boot.h"
91
}
212
diff --git a/hw/arm/z2.c b/hw/arm/z2.c
92
213
index XXXXXXX..XXXXXXX 100644
93
static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst,
214
--- a/hw/arm/z2.c
94
- uint32_t desc, bool fz16)
215
+++ b/hw/arm/z2.c
95
+ uint64_t negx, int negf, uint32_t desc, bool fz16)
216
@@ -XXX,XX +XXX,XX @@
96
{
217
#include "hw/audio/wm8750.h"
97
intptr_t i, oprsz = simd_oprsz(desc);
218
#include "audio/audio.h"
98
- int is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
219
#include "exec/address-spaces.h"
99
int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
220
-#include "cpu.h"
100
int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3);
221
#include "qom/object.h"
101
int is_q = oprsz == 16;
222
#include "qapi/error.h"
102
uint64_t n_4;
223
103
float32 m_1;
224
diff --git a/hw/arm/meson.build b/hw/arm/meson.build
104
225
index XXXXXXX..XXXXXXX 100644
105
- /* Pre-load all of the f16 data, avoiding overlap issues. */
226
--- a/hw/arm/meson.build
106
- n_4 = load4_f16(vn, is_q, is_2);
227
+++ b/hw/arm/meson.build
107
-
228
@@ -XXX,XX +XXX,XX @@ arm_ss.add(when: 'CONFIG_INTEGRATOR', if_true: files('integratorcp.c'))
108
- /* Negate all inputs for FMLSL at once. */
229
arm_ss.add(when: 'CONFIG_MAINSTONE', if_true: files('mainstone.c'))
109
- if (is_s) {
230
arm_ss.add(when: 'CONFIG_MICROBIT', if_true: files('microbit.c'))
110
- n_4 ^= 0x8000800080008000ull;
231
arm_ss.add(when: 'CONFIG_MUSICPAL', if_true: files('musicpal.c'))
111
- }
232
-arm_ss.add(when: 'CONFIG_NETDUINO2', if_true: files('netduino2.c'))
112
-
233
arm_ss.add(when: 'CONFIG_NETDUINOPLUS2', if_true: files('netduinoplus2.c'))
113
+ /*
234
arm_ss.add(when: 'CONFIG_OLIMEX_STM32_H405', if_true: files('olimex-stm32-h405.c'))
114
+ * Pre-load all of the f16 data, avoiding overlap issues.
235
arm_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx.c', 'npcm7xx_boards.c'))
115
+ * Negate all inputs for AH=0 FMLSL at once.
236
arm_ss.add(when: 'CONFIG_NSERIES', if_true: files('nseries.c'))
116
+ */
237
-arm_ss.add(when: 'CONFIG_SX1', if_true: files('omap_sx1.c'))
117
+ n_4 = load4_f16(vn, is_q, is_2) ^ negx;
238
-arm_ss.add(when: 'CONFIG_CHEETAH', if_true: files('palm.c'))
118
m_1 = float16_to_float32_by_bits(((float16 *)vm)[H2(index)], fz16);
239
-arm_ss.add(when: 'CONFIG_GUMSTIX', if_true: files('gumstix.c'))
119
240
-arm_ss.add(when: 'CONFIG_SPITZ', if_true: files('spitz.c'))
120
for (i = 0; i < oprsz / 4; i++) {
241
-arm_ss.add(when: 'CONFIG_Z2', if_true: files('z2.c'))
121
float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16);
242
arm_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview.c'))
122
- d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst);
243
arm_ss.add(when: 'CONFIG_SBSA_REF', if_true: files('sbsa-ref.c'))
123
+ d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst);
244
arm_ss.add(when: 'CONFIG_STELLARIS', if_true: files('stellaris.c'))
124
}
245
arm_ss.add(when: 'CONFIG_STM32VLDISCOVERY', if_true: files('stm32vldiscovery.c'))
125
clear_tail(d, oprsz, simd_maxsz(desc));
246
-arm_ss.add(when: 'CONFIG_COLLIE', if_true: files('collie.c'))
126
}
247
-arm_ss.add(when: 'CONFIG_VERSATILE', if_true: files('versatilepb.c'))
127
@@ -XXX,XX +XXX,XX @@ static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst,
248
-arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c'))
128
void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
249
arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c'))
129
CPUARMState *env, uint32_t desc)
250
arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c'))
130
{
251
131
- do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, desc,
252
@@ -XXX,XX +XXX,XX @@ arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c'))
132
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
253
arm_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210.c'))
133
+ uint64_t negx = is_s ? 0x8000800080008000ull : 0;
254
arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c'))
134
+
255
arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c'))
135
+ do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc,
256
-arm_ss.add(when: 'CONFIG_OMAP', if_true: files('omap1.c', 'omap2.c'))
136
get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32));
257
-arm_ss.add(when: 'CONFIG_STRONGARM', if_true: files('strongarm.c'))
137
}
258
+arm_ss.add(when: 'CONFIG_OMAP', if_true: files('omap1.c'))
138
259
arm_ss.add(when: 'CONFIG_ALLWINNER_A10', if_true: files('allwinner-a10.c', 'cubieboard.c'))
139
void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
260
arm_ss.add(when: 'CONFIG_ALLWINNER_H3', if_true: files('allwinner-h3.c', 'orangepi.c'))
140
CPUARMState *env, uint32_t desc)
261
arm_ss.add(when: 'CONFIG_ALLWINNER_R40', if_true: files('allwinner-r40.c', 'bananapi_m2u.c'))
141
{
262
@@ -XXX,XX +XXX,XX @@ arm_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_soc.c'))
142
- do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, desc,
263
arm_ss.add(when: 'CONFIG_XEN', if_true: files('xen_arm.c'))
143
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
264
144
+ uint64_t negx = 0;
265
system_ss.add(when: 'CONFIG_ARM_SMMUV3', if_true: files('smmu-common.c'))
145
+ int negf = 0;
266
+system_ss.add(when: 'CONFIG_CHEETAH', if_true: files('palm.c'))
146
+
267
+system_ss.add(when: 'CONFIG_COLLIE', if_true: files('collie.c'))
147
+ if (is_s) {
268
system_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4_boards.c'))
148
+ if (env->vfp.fpcr & FPCR_AH) {
269
+system_ss.add(when: 'CONFIG_GUMSTIX', if_true: files('gumstix.c'))
149
+ negf = float_muladd_negate_product;
270
+system_ss.add(when: 'CONFIG_NETDUINO2', if_true: files('netduino2.c'))
150
+ } else {
271
+system_ss.add(when: 'CONFIG_OMAP', if_true: files('omap2.c'))
151
+ negx = 0x8000800080008000ull;
272
system_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2835_peripherals.c'))
152
+ }
273
+system_ss.add(when: 'CONFIG_SPITZ', if_true: files('spitz.c'))
153
+ }
274
+system_ss.add(when: 'CONFIG_STRONGARM', if_true: files('strongarm.c'))
154
+ do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc,
275
+system_ss.add(when: 'CONFIG_SX1', if_true: files('omap_sx1.c'))
155
get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64));
276
system_ss.add(when: 'CONFIG_TOSA', if_true: files('tosa.c'))
156
}
277
+system_ss.add(when: 'CONFIG_VERSATILE', if_true: files('versatilepb.c'))
157
278
+system_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c'))
279
+system_ss.add(when: 'CONFIG_Z2', if_true: files('z2.c'))
280
281
hw_arch += {'arm': arm_ss}
282
--
158
--
283
2.34.1
159
2.34.1
284
285
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
include/hw/arm/xlnx-versal.h uses the ARMCPU structure which
3
Handle FPCR.AH's requirement to not negate the sign of a NaN in SVE
4
is defined in the "target/arm/cpu.h" header. Include it in
4
FMLSL (indexed), using the usual trick of negating by XOR when AH=0
5
order to avoid when refactoring unrelated headers:
5
and by muladd flags when AH=1.
6
6
7
In file included from hw/arm/xlnx-versal-virt.c:20:
7
Since we have the CPUARMState* in the helper anyway, we can
8
include/hw/arm/xlnx-versal.h:62:23: error: array has incomplete element type 'ARMCPU' (aka 'struct ArchCPU')
8
look directly at env->vfp.fpcr and don't need toa pass in the
9
ARMCPU cpu[XLNX_VERSAL_NR_ACPUS];
9
FPCR.AH value via the SIMD data word.
10
^
11
10
12
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20250129013857.135256-32-richard.henderson@linaro.org
14
Message-id: 20240118200643.29037-5-philmd@linaro.org
13
[PMM: commit message tweaked]
14
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
---
16
---
17
include/hw/arm/xlnx-versal.h | 1 +
17
target/arm/tcg/vec_helper.c | 15 ++++++++++++---
18
1 file changed, 1 insertion(+)
18
1 file changed, 12 insertions(+), 3 deletions(-)
19
19
20
diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h
20
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
21
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
22
--- a/include/hw/arm/xlnx-versal.h
22
--- a/target/arm/tcg/vec_helper.c
23
+++ b/include/hw/arm/xlnx-versal.h
23
+++ b/target/arm/tcg/vec_helper.c
24
@@ -XXX,XX +XXX,XX @@
24
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
25
#include "hw/net/xlnx-versal-canfd.h"
25
CPUARMState *env, uint32_t desc)
26
#include "hw/misc/xlnx-versal-cfu.h"
26
{
27
#include "hw/misc/xlnx-versal-cframe-reg.h"
27
intptr_t i, j, oprsz = simd_oprsz(desc);
28
+#include "target/arm/cpu.h"
28
- uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15;
29
29
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
30
#define TYPE_XLNX_VERSAL "xlnx-versal"
30
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
31
OBJECT_DECLARE_SIMPLE_TYPE(Versal, XLNX_VERSAL)
31
intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16);
32
float_status *status = &env->vfp.fp_status_a64;
33
bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64);
34
+ int negx = 0, negf = 0;
35
+
36
+ if (is_s) {
37
+ if (env->vfp.fpcr & FPCR_AH) {
38
+ negf = float_muladd_negate_product;
39
+ } else {
40
+ negx = 0x8000;
41
+ }
42
+ }
43
44
for (i = 0; i < oprsz; i += 16) {
45
float16 mm_16 = *(float16 *)(vm + i + idx);
46
float32 mm = float16_to_float32_by_bits(mm_16, fz16);
47
48
for (j = 0; j < 16; j += sizeof(float32)) {
49
- float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negn;
50
+ float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negx;
51
float32 nn = float16_to_float32_by_bits(nn_16, fz16);
52
float32 aa = *(float32 *)(va + H1_4(i + j));
53
54
*(float32 *)(vd + H1_4(i + j)) =
55
- float32_muladd(nn, mm, aa, 0, status);
56
+ float32_muladd(nn, mm, aa, negf, status);
57
}
58
}
59
}
32
--
60
--
33
2.34.1
61
2.34.1
34
35
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Now than we can access the M-profile bank index
3
Handle FPCR.AH's requirement to not negate the sign of a NaN in SVE
4
definitions from the target-agnostic "cpu-qom.h"
4
FMLSL (indexed), using the usual trick of negating by XOR when AH=0
5
header, we don't need the huge "cpu.h" anymore
5
and by muladd flags when AH=1.
6
(except in hw/arm/armv7m.c). Reduce its inclusion
7
to the source unit.
8
6
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Since we have the CPUARMState* in the helper anyway, we can
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
look directly at env->vfp.fpcr and don't need toa pass in the
11
Message-id: 20240118200643.29037-17-philmd@linaro.org
9
FPCR.AH value via the SIMD data word.
10
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20250129013857.135256-33-richard.henderson@linaro.org
13
[PMM: tweaked commit message]
14
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
16
---
14
include/hw/intc/armv7m_nvic.h | 2 +-
17
target/arm/tcg/vec_helper.c | 15 ++++++++++++---
15
hw/arm/armv7m.c | 1 +
18
1 file changed, 12 insertions(+), 3 deletions(-)
16
2 files changed, 2 insertions(+), 1 deletion(-)
17
19
18
diff --git a/include/hw/intc/armv7m_nvic.h b/include/hw/intc/armv7m_nvic.h
20
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
19
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
20
--- a/include/hw/intc/armv7m_nvic.h
22
--- a/target/arm/tcg/vec_helper.c
21
+++ b/include/hw/intc/armv7m_nvic.h
23
+++ b/target/arm/tcg/vec_helper.c
22
@@ -XXX,XX +XXX,XX @@
24
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
23
#ifndef HW_ARM_ARMV7M_NVIC_H
25
CPUARMState *env, uint32_t desc)
24
#define HW_ARM_ARMV7M_NVIC_H
26
{
25
27
intptr_t i, oprsz = simd_oprsz(desc);
26
-#include "target/arm/cpu.h"
28
- uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15;
27
+#include "target/arm/cpu-qom.h"
29
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
28
#include "hw/sysbus.h"
30
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
29
#include "hw/timer/armv7m_systick.h"
31
float_status *status = &env->vfp.fp_status_a64;
30
#include "qom/object.h"
32
bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64);
31
diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c
33
+ int negx = 0, negf = 0;
32
index XXXXXXX..XXXXXXX 100644
34
+
33
--- a/hw/arm/armv7m.c
35
+ if (is_s) {
34
+++ b/hw/arm/armv7m.c
36
+ if (env->vfp.fpcr & FPCR_AH) {
35
@@ -XXX,XX +XXX,XX @@
37
+ negf = float_muladd_negate_product;
36
#include "qemu/module.h"
38
+ } else {
37
#include "qemu/log.h"
39
+ negx = 0x8000;
38
#include "target/arm/idau.h"
40
+ }
39
+#include "target/arm/cpu.h"
41
+ }
40
#include "target/arm/cpu-features.h"
42
41
#include "migration/vmstate.h"
43
for (i = 0; i < oprsz; i += sizeof(float32)) {
44
- float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negn;
45
+ float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negx;
46
float16 mm_16 = *(float16 *)(vm + H1_2(i + sel));
47
float32 nn = float16_to_float32_by_bits(nn_16, fz16);
48
float32 mm = float16_to_float32_by_bits(mm_16, fz16);
49
float32 aa = *(float32 *)(va + H1_4(i));
50
51
- *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, 0, status);
52
+ *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, negf, status);
53
}
54
}
42
55
43
--
56
--
44
2.34.1
57
2.34.1
45
46
diff view generated by jsdifflib
1
The const_le64() macro introduced in commit 845d80a8c7b187 turns out
1
Now that we have completed the handling for FPCR.{AH,FIZ,NEP}, we
2
to have a bug which means that on big-endian systems the compiler
2
can enable FEAT_AFP for '-cpu max', and document that we support it.
3
complains if the argument isn't already a 64-bit type. This hasn't
4
caused a problem yet, because there are no in-tree uses, but it
5
means it's not possible for anybody to add one without it failing CI.
6
3
7
This example is from an attempted use of it with the argument '0',
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
from the s390 CI runner's gcc:
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
docs/system/arm/emulation.rst | 1 +
8
target/arm/tcg/cpu64.c | 1 +
9
2 files changed, 2 insertions(+)
9
10
10
../block/blklogwrites.c: In function ‘blk_log_writes_co_do_log’:
11
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
11
../include/qemu/bswap.h:148:36: error: left shift count >= width of
12
type [-Werror=shift-count-overflow]
13
148 | ((((_x) & 0x00000000000000ffU) << 56) | \
14
| ^~
15
../block/blklogwrites.c:409:27: note: in expansion of macro ‘const_le64’
16
409 | .nr_entries = const_le64(0),
17
| ^~~~~~~~~~
18
../include/qemu/bswap.h:149:36: error: left shift count >= width of
19
type [-Werror=shift-count-overflow]
20
149 | (((_x) & 0x000000000000ff00U) << 40) | \
21
| ^~
22
../block/blklogwrites.c:409:27: note: in expansion of macro ‘const_le64’
23
409 | .nr_entries = const_le64(0),
24
| ^~~~~~~~~~
25
cc1: all warnings being treated as errors
26
27
Fix this by making all the constants in the macro have the ULL
28
suffix. This will cause them all to be 64-bit integers, which means
29
the result of the logical & will also be an unsigned 64-bit type,
30
even if the input to the macro is a smaller type, and so the shifts
31
will be in range.
32
33
Fixes: 845d80a8c7b187 ("qemu/bswap: Add const_le64()")
34
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
35
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
36
Reviewed-by: Thomas Huth <thuth@redhat.com>
37
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
38
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
39
Message-id: 20240122173735.472951-1-peter.maydell@linaro.org
40
---
41
include/qemu/bswap.h | 16 ++++++++--------
42
1 file changed, 8 insertions(+), 8 deletions(-)
43
44
diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h
45
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
46
--- a/include/qemu/bswap.h
13
--- a/docs/system/arm/emulation.rst
47
+++ b/include/qemu/bswap.h
14
+++ b/docs/system/arm/emulation.rst
48
@@ -XXX,XX +XXX,XX @@ CPU_CONVERT(le, 64, uint64_t)
15
@@ -XXX,XX +XXX,XX @@ the following architecture extensions:
49
*/
16
- FEAT_AA64EL3 (Support for AArch64 at EL3)
50
#if HOST_BIG_ENDIAN
17
- FEAT_AdvSIMD (Advanced SIMD Extension)
51
# define const_le64(_x) \
18
- FEAT_AES (AESD and AESE instructions)
52
- ((((_x) & 0x00000000000000ffU) << 56) | \
19
+- FEAT_AFP (Alternate floating-point behavior)
53
- (((_x) & 0x000000000000ff00U) << 40) | \
20
- FEAT_Armv9_Crypto (Armv9 Cryptographic Extension)
54
- (((_x) & 0x0000000000ff0000U) << 24) | \
21
- FEAT_ASID16 (16 bit ASID)
55
- (((_x) & 0x00000000ff000000U) << 8) | \
22
- FEAT_BBM at level 2 (Translation table break-before-make levels)
56
- (((_x) & 0x000000ff00000000U) >> 8) | \
23
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
57
- (((_x) & 0x0000ff0000000000U) >> 24) | \
24
index XXXXXXX..XXXXXXX 100644
58
- (((_x) & 0x00ff000000000000U) >> 40) | \
25
--- a/target/arm/tcg/cpu64.c
59
- (((_x) & 0xff00000000000000U) >> 56))
26
+++ b/target/arm/tcg/cpu64.c
60
+ ((((_x) & 0x00000000000000ffULL) << 56) | \
27
@@ -XXX,XX +XXX,XX @@ void aarch64_max_tcg_initfn(Object *obj)
61
+ (((_x) & 0x000000000000ff00ULL) << 40) | \
28
t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1); /* FEAT_XNX */
62
+ (((_x) & 0x0000000000ff0000ULL) << 24) | \
29
t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 2); /* FEAT_ETS2 */
63
+ (((_x) & 0x00000000ff000000ULL) << 8) | \
30
t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1); /* FEAT_HCX */
64
+ (((_x) & 0x000000ff00000000ULL) >> 8) | \
31
+ t = FIELD_DP64(t, ID_AA64MMFR1, AFP, 1); /* FEAT_AFP */
65
+ (((_x) & 0x0000ff0000000000ULL) >> 24) | \
32
t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1); /* FEAT_TIDCP1 */
66
+ (((_x) & 0x00ff000000000000ULL) >> 40) | \
33
t = FIELD_DP64(t, ID_AA64MMFR1, CMOW, 1); /* FEAT_CMOW */
67
+ (((_x) & 0xff00000000000000ULL) >> 56))
34
cpu->isar.id_aa64mmfr1 = t;
68
# define const_le32(_x) \
69
((((_x) & 0x000000ffU) << 24) | \
70
(((_x) & 0x0000ff00U) << 8) | \
71
--
35
--
72
2.34.1
36
2.34.1
73
74
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
FEAT_RPRES implements an "increased precision" variant of the single
2
precision FRECPE and FRSQRTE instructions from an 8 bit to a 12
3
bit mantissa. This applies only when FPCR.AH == 1. Note that the
4
halfprec and double versions of these insns retain the 8 bit
5
precision regardless.
2
6
3
target/arm/cpu-features.h uses the FIELD_EX32() macro
7
In this commit we add all the plumbing to make these instructions
4
defined in "hw/registerfields.h". Include it in order
8
call a new helper function when the increased-precision is in
5
to avoid when refactoring unrelated headers:
9
effect. In the following commit we will provide the actual change
10
in behaviour in the helpers.
6
11
7
target/arm/cpu-features.h:44:12: error: call to undeclared function 'FIELD_EX32';
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
9
return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) != 0;
10
^
11
12
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
Message-id: 20240118200643.29037-6-philmd@linaro.org
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
---
14
---
17
target/arm/cpu-features.h | 2 ++
15
target/arm/cpu-features.h | 5 +++++
18
1 file changed, 2 insertions(+)
16
target/arm/helper.h | 4 ++++
17
target/arm/tcg/translate-a64.c | 34 ++++++++++++++++++++++++++++++----
18
target/arm/tcg/translate-sve.c | 16 ++++++++++++++--
19
target/arm/tcg/vec_helper.c | 2 ++
20
target/arm/vfp_helper.c | 32 ++++++++++++++++++++++++++++++--
21
6 files changed, 85 insertions(+), 8 deletions(-)
19
22
20
diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
23
diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
21
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
22
--- a/target/arm/cpu-features.h
25
--- a/target/arm/cpu-features.h
23
+++ b/target/arm/cpu-features.h
26
+++ b/target/arm/cpu-features.h
24
@@ -XXX,XX +XXX,XX @@
27
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_mops(const ARMISARegisters *id)
25
#ifndef TARGET_ARM_FEATURES_H
28
return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, MOPS);
26
#define TARGET_ARM_FEATURES_H
29
}
27
30
28
+#include "hw/registerfields.h"
31
+static inline bool isar_feature_aa64_rpres(const ARMISARegisters *id)
29
+
32
+{
33
+ return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, RPRES);
34
+}
35
+
36
static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id)
37
{
38
/* We always set the AdvSIMD and FP fields identically. */
39
diff --git a/target/arm/helper.h b/target/arm/helper.h
40
index XXXXXXX..XXXXXXX 100644
41
--- a/target/arm/helper.h
42
+++ b/target/arm/helper.h
43
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, fpst)
44
45
DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, fpst)
46
DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
47
+DEF_HELPER_FLAGS_2(recpe_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
48
DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, fpst)
49
DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, fpst)
50
DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
51
+DEF_HELPER_FLAGS_2(rsqrte_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
52
DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, fpst)
53
DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32)
54
DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32)
55
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_vrintx_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
56
57
DEF_HELPER_FLAGS_4(gvec_frecpe_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
58
DEF_HELPER_FLAGS_4(gvec_frecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
59
+DEF_HELPER_FLAGS_4(gvec_frecpe_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
60
DEF_HELPER_FLAGS_4(gvec_frecpe_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
61
62
DEF_HELPER_FLAGS_4(gvec_frsqrte_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
63
DEF_HELPER_FLAGS_4(gvec_frsqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
64
+DEF_HELPER_FLAGS_4(gvec_frsqrte_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
65
DEF_HELPER_FLAGS_4(gvec_frsqrte_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
66
67
DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
68
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/target/arm/tcg/translate-a64.c
71
+++ b/target/arm/tcg/translate-a64.c
72
@@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frecpe = {
73
gen_helper_recpe_f32,
74
gen_helper_recpe_f64,
75
};
76
-TRANS(FRECPE_s, do_fp1_scalar_ah, a, &f_scalar_frecpe, -1)
77
+static const FPScalar1 f_scalar_frecpe_rpres = {
78
+ gen_helper_recpe_f16,
79
+ gen_helper_recpe_rpres_f32,
80
+ gen_helper_recpe_f64,
81
+};
82
+TRANS(FRECPE_s, do_fp1_scalar_ah, a,
83
+ s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
84
+ &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1)
85
86
static const FPScalar1 f_scalar_frecpx = {
87
gen_helper_frecpx_f16,
88
@@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frsqrte = {
89
gen_helper_rsqrte_f32,
90
gen_helper_rsqrte_f64,
91
};
92
-TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, &f_scalar_frsqrte, -1)
93
+static const FPScalar1 f_scalar_frsqrte_rpres = {
94
+ gen_helper_rsqrte_f16,
95
+ gen_helper_rsqrte_rpres_f32,
96
+ gen_helper_rsqrte_f64,
97
+};
98
+TRANS(FRSQRTE_s, do_fp1_scalar_ah, a,
99
+ s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
100
+ &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1)
101
102
static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a)
103
{
104
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const f_frecpe[] = {
105
gen_helper_gvec_frecpe_s,
106
gen_helper_gvec_frecpe_d,
107
};
108
-TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe)
109
+static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = {
110
+ gen_helper_gvec_frecpe_h,
111
+ gen_helper_gvec_frecpe_rpres_s,
112
+ gen_helper_gvec_frecpe_d,
113
+};
114
+TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
115
+ s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe)
116
117
static gen_helper_gvec_2_ptr * const f_frsqrte[] = {
118
gen_helper_gvec_frsqrte_h,
119
gen_helper_gvec_frsqrte_s,
120
gen_helper_gvec_frsqrte_d,
121
};
122
-TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte)
123
+static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = {
124
+ gen_helper_gvec_frsqrte_h,
125
+ gen_helper_gvec_frsqrte_rpres_s,
126
+ gen_helper_gvec_frsqrte_d,
127
+};
128
+TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
129
+ s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte)
130
131
static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a)
132
{
133
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
134
index XXXXXXX..XXXXXXX 100644
135
--- a/target/arm/tcg/translate-sve.c
136
+++ b/target/arm/tcg/translate-sve.c
137
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const frecpe_fns[] = {
138
NULL, gen_helper_gvec_frecpe_h,
139
gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d,
140
};
141
-TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frecpe_fns[a->esz], a, 0)
142
+static gen_helper_gvec_2_ptr * const frecpe_rpres_fns[] = {
143
+ NULL, gen_helper_gvec_frecpe_h,
144
+ gen_helper_gvec_frecpe_rpres_s, gen_helper_gvec_frecpe_d,
145
+};
146
+TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz,
147
+ s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
148
+ frecpe_rpres_fns[a->esz] : frecpe_fns[a->esz], a, 0)
149
150
static gen_helper_gvec_2_ptr * const frsqrte_fns[] = {
151
NULL, gen_helper_gvec_frsqrte_h,
152
gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d,
153
};
154
-TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frsqrte_fns[a->esz], a, 0)
155
+static gen_helper_gvec_2_ptr * const frsqrte_rpres_fns[] = {
156
+ NULL, gen_helper_gvec_frsqrte_h,
157
+ gen_helper_gvec_frsqrte_rpres_s, gen_helper_gvec_frsqrte_d,
158
+};
159
+TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz,
160
+ s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
161
+ frsqrte_rpres_fns[a->esz] : frsqrte_fns[a->esz], a, 0)
162
30
/*
163
/*
31
* Naming convention for isar_feature functions:
164
*** SVE Floating Point Compare with Zero Group
32
* Functions which test 32-bit ID registers should have _aa32_ in
165
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
166
index XXXXXXX..XXXXXXX 100644
167
--- a/target/arm/tcg/vec_helper.c
168
+++ b/target/arm/tcg/vec_helper.c
169
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, float_status *stat, uint32_t desc) \
170
171
DO_2OP(gvec_frecpe_h, helper_recpe_f16, float16)
172
DO_2OP(gvec_frecpe_s, helper_recpe_f32, float32)
173
+DO_2OP(gvec_frecpe_rpres_s, helper_recpe_rpres_f32, float32)
174
DO_2OP(gvec_frecpe_d, helper_recpe_f64, float64)
175
176
DO_2OP(gvec_frsqrte_h, helper_rsqrte_f16, float16)
177
DO_2OP(gvec_frsqrte_s, helper_rsqrte_f32, float32)
178
+DO_2OP(gvec_frsqrte_rpres_s, helper_rsqrte_rpres_f32, float32)
179
DO_2OP(gvec_frsqrte_d, helper_rsqrte_f64, float64)
180
181
DO_2OP(gvec_vrintx_h, float16_round_to_int, float16)
182
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
183
index XXXXXXX..XXXXXXX 100644
184
--- a/target/arm/vfp_helper.c
185
+++ b/target/arm/vfp_helper.c
186
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst)
187
return make_float16(f16_val);
188
}
189
190
-float32 HELPER(recpe_f32)(float32 input, float_status *fpst)
191
+/*
192
+ * FEAT_RPRES means the f32 FRECPE has an "increased precision" variant
193
+ * which is used when FPCR.AH == 1.
194
+ */
195
+static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres)
196
{
197
float32 f32 = float32_squash_input_denormal(input, fpst);
198
uint32_t f32_val = float32_val(f32);
199
@@ -XXX,XX +XXX,XX @@ float32 HELPER(recpe_f32)(float32 input, float_status *fpst)
200
return make_float32(f32_val);
201
}
202
203
+float32 HELPER(recpe_f32)(float32 input, float_status *fpst)
204
+{
205
+ return do_recpe_f32(input, fpst, false);
206
+}
207
+
208
+float32 HELPER(recpe_rpres_f32)(float32 input, float_status *fpst)
209
+{
210
+ return do_recpe_f32(input, fpst, true);
211
+}
212
+
213
float64 HELPER(recpe_f64)(float64 input, float_status *fpst)
214
{
215
float64 f64 = float64_squash_input_denormal(input, fpst);
216
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s)
217
return make_float16(val);
218
}
219
220
-float32 HELPER(rsqrte_f32)(float32 input, float_status *s)
221
+/*
222
+ * FEAT_RPRES means the f32 FRSQRTE has an "increased precision" variant
223
+ * which is used when FPCR.AH == 1.
224
+ */
225
+static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres)
226
{
227
float32 f32 = float32_squash_input_denormal(input, s);
228
uint32_t val = float32_val(f32);
229
@@ -XXX,XX +XXX,XX @@ float32 HELPER(rsqrte_f32)(float32 input, float_status *s)
230
return make_float32(val);
231
}
232
233
+float32 HELPER(rsqrte_f32)(float32 input, float_status *s)
234
+{
235
+ return do_rsqrte_f32(input, s, false);
236
+}
237
+
238
+float32 HELPER(rsqrte_rpres_f32)(float32 input, float_status *s)
239
+{
240
+ return do_rsqrte_f32(input, s, true);
241
+}
242
+
243
float64 HELPER(rsqrte_f64)(float64 input, float_status *s)
244
{
245
float64 f64 = float64_squash_input_denormal(input, s);
33
--
246
--
34
2.34.1
247
2.34.1
35
36
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
Implement the increased precision variation of FRECPE. In the
2
pseudocode this corresponds to the handling of the
3
"increasedprecision" boolean in the FPRecipEstimate() and
4
RecipEstimate() functions.
2
5
3
target/arm/cpregs.h uses the FIELD() macro defined in
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
"hw/registerfields.h". Include it in order to avoid when
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
refactoring unrelated headers:
8
---
9
target/arm/vfp_helper.c | 54 +++++++++++++++++++++++++++++++++++------
10
1 file changed, 46 insertions(+), 8 deletions(-)
6
11
7
target/arm/cpregs.h:347:30: error: expected identifier
12
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
8
FIELD(HFGRTR_EL2, AFSR0_EL1, 0, 1)
9
^
10
11
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Message-id: 20240118200643.29037-7-philmd@linaro.org
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
---
16
target/arm/cpregs.h | 2 ++
17
1 file changed, 2 insertions(+)
18
19
diff --git a/target/arm/cpregs.h b/target/arm/cpregs.h
20
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/cpregs.h
14
--- a/target/arm/vfp_helper.c
22
+++ b/target/arm/cpregs.h
15
+++ b/target/arm/vfp_helper.c
23
@@ -XXX,XX +XXX,XX @@
16
@@ -XXX,XX +XXX,XX @@ static int recip_estimate(int input)
24
#ifndef TARGET_ARM_CPREGS_H
17
return r;
25
#define TARGET_ARM_CPREGS_H
18
}
26
19
27
+#include "hw/registerfields.h"
20
+/*
21
+ * Increased precision version:
22
+ * input is a 13 bit fixed point number
23
+ * input range 2048 .. 4095 for a number from 0.5 <= x < 1.0.
24
+ * result range 4096 .. 8191 for a number from 1.0 to 2.0
25
+ */
26
+static int recip_estimate_incprec(int input)
27
+{
28
+ int a, b, r;
29
+ assert(2048 <= input && input < 4096);
30
+ a = (input * 2) + 1;
31
+ /*
32
+ * The pseudocode expresses this as an operation on infinite
33
+ * precision reals where it calculates 2^25 / a and then looks
34
+ * at the error between that and the rounded-down-to-integer
35
+ * value to see if it should instead round up. We instead
36
+ * follow the same approach as the pseudocode for the 8-bit
37
+ * precision version, and calculate (2 * (2^25 / a)) as an
38
+ * integer so we can do the "add one and halve" to round it.
39
+ * So the 1 << 26 here is correct.
40
+ */
41
+ b = (1 << 26) / a;
42
+ r = (b + 1) >> 1;
43
+ assert(4096 <= r && r < 8192);
44
+ return r;
45
+}
28
+
46
+
29
/*
47
/*
30
* ARMCPRegInfo type field bits:
48
* Common wrapper to call recip_estimate
49
*
50
@@ -XXX,XX +XXX,XX @@ static int recip_estimate(int input)
51
* callee.
31
*/
52
*/
53
54
-static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac)
55
+static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac,
56
+ bool increasedprecision)
57
{
58
uint32_t scaled, estimate;
59
uint64_t result_frac;
60
@@ -XXX,XX +XXX,XX @@ static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac)
61
}
62
}
63
64
- /* scaled = UInt('1':fraction<51:44>) */
65
- scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
66
- estimate = recip_estimate(scaled);
67
+ if (increasedprecision) {
68
+ /* scaled = UInt('1':fraction<51:41>) */
69
+ scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11));
70
+ estimate = recip_estimate_incprec(scaled);
71
+ } else {
72
+ /* scaled = UInt('1':fraction<51:44>) */
73
+ scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
74
+ estimate = recip_estimate(scaled);
75
+ }
76
77
result_exp = exp_off - *exp;
78
- result_frac = deposit64(0, 44, 8, estimate);
79
+ if (increasedprecision) {
80
+ result_frac = deposit64(0, 40, 12, estimate);
81
+ } else {
82
+ result_frac = deposit64(0, 44, 8, estimate);
83
+ }
84
if (result_exp == 0) {
85
result_frac = deposit64(result_frac >> 1, 51, 1, 1);
86
} else if (result_exp == -1) {
87
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst)
88
}
89
90
f64_frac = call_recip_estimate(&f16_exp, 29,
91
- ((uint64_t) f16_frac) << (52 - 10));
92
+ ((uint64_t) f16_frac) << (52 - 10), false);
93
94
/* result = sign : result_exp<4:0> : fraction<51:42> */
95
f16_val = deposit32(0, 15, 1, f16_sign);
96
@@ -XXX,XX +XXX,XX @@ static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres)
97
}
98
99
f64_frac = call_recip_estimate(&f32_exp, 253,
100
- ((uint64_t) f32_frac) << (52 - 23));
101
+ ((uint64_t) f32_frac) << (52 - 23), rpres);
102
103
/* result = sign : result_exp<7:0> : fraction<51:29> */
104
f32_val = deposit32(0, 31, 1, f32_sign);
105
@@ -XXX,XX +XXX,XX @@ float64 HELPER(recpe_f64)(float64 input, float_status *fpst)
106
return float64_set_sign(float64_zero, float64_is_neg(f64));
107
}
108
109
- f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac);
110
+ f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac, false);
111
112
/* result = sign : result_exp<10:0> : fraction<51:0>; */
113
f64_val = deposit64(0, 63, 1, f64_sign);
32
--
114
--
33
2.34.1
115
2.34.1
34
35
diff view generated by jsdifflib
1
error_report() strings should not include trailing newlines; remove
1
Implement the increased precision variation of FRSQRTE. In the
2
the newline from the error we print when devices won't fit into the
2
pseudocode this corresponds to the handling of the
3
address space of the CPU.
3
"increasedprecision" boolean in the FPRSqrtEstimate() and
4
4
RecipSqrtEstimate() functions.
5
This commit also fixes the accidental hardcoded tabs that were in
6
this line, since we have to touch the line anyway.
7
5
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-id: 20240118131649.2726375-1-peter.maydell@linaro.org
11
---
8
---
12
hw/arm/virt.c | 4 ++--
9
target/arm/vfp_helper.c | 77 ++++++++++++++++++++++++++++++++++-------
13
1 file changed, 2 insertions(+), 2 deletions(-)
10
1 file changed, 64 insertions(+), 13 deletions(-)
14
11
15
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
12
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
16
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/arm/virt.c
14
--- a/target/arm/vfp_helper.c
18
+++ b/hw/arm/virt.c
15
+++ b/target/arm/vfp_helper.c
19
@@ -XXX,XX +XXX,XX @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
16
@@ -XXX,XX +XXX,XX @@ static int do_recip_sqrt_estimate(int a)
20
/* Base address of the high IO region */
17
return estimate;
21
memtop = base = device_memory_base + ROUND_UP(device_memory_size, GiB);
18
}
22
if (memtop > BIT_ULL(pa_bits)) {
19
23
-     error_report("Addressing limited to %d bits, but memory exceeds it by %llu bytes\n",
20
+static int do_recip_sqrt_estimate_incprec(int a)
24
-             pa_bits, memtop - BIT_ULL(pa_bits));
21
+{
25
+ error_report("Addressing limited to %d bits, but memory exceeds it by %llu bytes",
22
+ /*
26
+ pa_bits, memtop - BIT_ULL(pa_bits));
23
+ * The Arm ARM describes the 12-bit precision version of RecipSqrtEstimate
27
exit(EXIT_FAILURE);
24
+ * in terms of an infinite-precision floating point calculation of a
25
+ * square root. We implement this using the same kind of pure integer
26
+ * algorithm as the 8-bit mantissa, to get the same bit-for-bit result.
27
+ */
28
+ int64_t b, estimate;
29
30
-static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
31
+ assert(1024 <= a && a < 4096);
32
+ if (a < 2048) {
33
+ a = a * 2 + 1;
34
+ } else {
35
+ a = (a >> 1) << 1;
36
+ a = (a + 1) * 2;
37
+ }
38
+ b = 8192;
39
+ while (a * (b + 1) * (b + 1) < (1ULL << 39)) {
40
+ b += 1;
41
+ }
42
+ estimate = (b + 1) / 2;
43
+
44
+ assert(4096 <= estimate && estimate < 8192);
45
+
46
+ return estimate;
47
+}
48
+
49
+static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac,
50
+ bool increasedprecision)
51
{
52
int estimate;
53
uint32_t scaled;
54
@@ -XXX,XX +XXX,XX @@ static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
55
frac = extract64(frac, 0, 51) << 1;
28
}
56
}
29
if (base < device_memory_base) {
57
58
- if (*exp & 1) {
59
- /* scaled = UInt('01':fraction<51:45>) */
60
- scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
61
+ if (increasedprecision) {
62
+ if (*exp & 1) {
63
+ /* scaled = UInt('01':fraction<51:42>) */
64
+ scaled = deposit32(1 << 10, 0, 10, extract64(frac, 42, 10));
65
+ } else {
66
+ /* scaled = UInt('1':fraction<51:41>) */
67
+ scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11));
68
+ }
69
+ estimate = do_recip_sqrt_estimate_incprec(scaled);
70
} else {
71
- /* scaled = UInt('1':fraction<51:44>) */
72
- scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
73
+ if (*exp & 1) {
74
+ /* scaled = UInt('01':fraction<51:45>) */
75
+ scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
76
+ } else {
77
+ /* scaled = UInt('1':fraction<51:44>) */
78
+ scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
79
+ }
80
+ estimate = do_recip_sqrt_estimate(scaled);
81
}
82
- estimate = do_recip_sqrt_estimate(scaled);
83
84
*exp = (exp_off - *exp) / 2;
85
- return extract64(estimate, 0, 8) << 44;
86
+ if (increasedprecision) {
87
+ return extract64(estimate, 0, 12) << 40;
88
+ } else {
89
+ return extract64(estimate, 0, 8) << 44;
90
+ }
91
}
92
93
uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s)
94
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s)
95
96
f64_frac = ((uint64_t) f16_frac) << (52 - 10);
97
98
- f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac);
99
+ f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac, false);
100
101
/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */
102
val = deposit32(0, 15, 1, f16_sign);
103
@@ -XXX,XX +XXX,XX @@ static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres)
104
105
f64_frac = ((uint64_t) f32_frac) << 29;
106
107
- f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac);
108
+ f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac, rpres);
109
110
- /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */
111
+ /*
112
+ * result = sign : result_exp<7:0> : estimate<7:0> : Zeros(15)
113
+ * or for increased precision
114
+ * result = sign : result_exp<7:0> : estimate<11:0> : Zeros(11)
115
+ */
116
val = deposit32(0, 31, 1, f32_sign);
117
val = deposit32(val, 23, 8, f32_exp);
118
- val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
119
+ if (rpres) {
120
+ val = deposit32(val, 11, 12, extract64(f64_frac, 52 - 12, 12));
121
+ } else {
122
+ val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
123
+ }
124
return make_float32(val);
125
}
126
127
@@ -XXX,XX +XXX,XX @@ float64 HELPER(rsqrte_f64)(float64 input, float_status *s)
128
return float64_zero;
129
}
130
131
- f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac);
132
+ f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac, false);
133
134
/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */
135
val = deposit64(0, 61, 1, f64_sign);
30
--
136
--
31
2.34.1
137
2.34.1
32
33
diff view generated by jsdifflib
1
In arm_deliver_fault() we check for whether the fault is caused
1
Now the emulation is complete, we can enable FEAT_RPRES for the 'max'
2
by a data abort due to an access to a FEAT_NV2 sysreg in the
2
CPU type.
3
memory pointed to by the VNCR. Unfortunately part of the
4
condition checks the wrong argument to the function, meaning
5
that it would spuriously trigger, resulting in some instruction
6
aborts being taken to the wrong EL and reported incorrectly.
7
3
8
Use the right variable in the condition.
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
docs/system/arm/emulation.rst | 1 +
8
target/arm/tcg/cpu64.c | 1 +
9
2 files changed, 2 insertions(+)
9
10
10
Fixes: 674e5345275d425 ("target/arm: Report VNCR_EL2 based faults correctly")
11
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
11
Reported-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
14
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
15
Message-id: 20240116165605.2523055-1-peter.maydell@linaro.org
16
---
17
target/arm/tcg/tlb_helper.c | 2 +-
18
1 file changed, 1 insertion(+), 1 deletion(-)
19
20
diff --git a/target/arm/tcg/tlb_helper.c b/target/arm/tcg/tlb_helper.c
21
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
22
--- a/target/arm/tcg/tlb_helper.c
13
--- a/docs/system/arm/emulation.rst
23
+++ b/target/arm/tcg/tlb_helper.c
14
+++ b/docs/system/arm/emulation.rst
24
@@ -XXX,XX +XXX,XX @@ void arm_deliver_fault(ARMCPU *cpu, vaddr addr,
15
@@ -XXX,XX +XXX,XX @@ the following architecture extensions:
25
* (and indeed syndrome does not have the EC field in it,
16
- FEAT_RDM (Advanced SIMD rounding double multiply accumulate instructions)
26
* because we masked that out in disas_set_insn_syndrome())
17
- FEAT_RME (Realm Management Extension) (NB: support status in QEMU is experimental)
27
*/
18
- FEAT_RNG (Random number generator)
28
- bool is_vncr = (mmu_idx != MMU_INST_FETCH) &&
19
+- FEAT_RPRES (Increased precision of FRECPE and FRSQRTE)
29
+ bool is_vncr = (access_type != MMU_INST_FETCH) &&
20
- FEAT_S2FWB (Stage 2 forced Write-Back)
30
(env->exception.syndrome & ARM_EL_VNCR);
21
- FEAT_SB (Speculation Barrier)
31
22
- FEAT_SEL2 (Secure EL2)
32
if (is_vncr) {
23
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/target/arm/tcg/cpu64.c
26
+++ b/target/arm/tcg/cpu64.c
27
@@ -XXX,XX +XXX,XX @@ void aarch64_max_tcg_initfn(Object *obj)
28
cpu->isar.id_aa64isar1 = t;
29
30
t = cpu->isar.id_aa64isar2;
31
+ t = FIELD_DP64(t, ID_AA64ISAR2, RPRES, 1); /* FEAT_RPRES */
32
t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1); /* FEAT_MOPS */
33
t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1); /* FEAT_HBC */
34
t = FIELD_DP64(t, ID_AA64ISAR2, WFXT, 2); /* FEAT_WFxT */
33
--
35
--
34
2.34.1
36
2.34.1
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
hw/arm/smmuv3-internal.h uses the REG32() and FIELD()
3
Move ARMFPStatusFlavour to cpu.h with which to index
4
macros defined in "hw/registerfields.h". Include it in
4
this array. For now, place the array in an anonymous
5
order to avoid when refactoring unrelated headers:
5
union with the existing structures. Adjust the order
6
6
of the existing structures to match the enum.
7
In file included from ../../hw/arm/smmuv3.c:34:
7
8
hw/arm/smmuv3-internal.h:36:28: error: expected identifier
8
Simplify fpstatus_ptr() using the new array.
9
REG32(IDR0, 0x0)
9
10
^
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
hw/arm/smmuv3-internal.h:37:5: error: expected function body after function declarator
11
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
12
FIELD(IDR0, S2P, 0 , 1)
12
Message-id: 20250129013857.135256-7-richard.henderson@linaro.org
13
^
14
15
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
16
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
17
Message-id: 20240118200643.29037-4-philmd@linaro.org
18
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
19
---
14
---
20
hw/arm/smmuv3-internal.h | 1 +
15
target/arm/cpu.h | 119 +++++++++++++++++++++----------------
21
1 file changed, 1 insertion(+)
16
target/arm/tcg/translate.h | 64 +-------------------
22
17
2 files changed, 70 insertions(+), 113 deletions(-)
23
diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
18
19
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
24
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
25
--- a/hw/arm/smmuv3-internal.h
21
--- a/target/arm/cpu.h
26
+++ b/hw/arm/smmuv3-internal.h
22
+++ b/target/arm/cpu.h
27
@@ -XXX,XX +XXX,XX @@
23
@@ -XXX,XX +XXX,XX @@ typedef struct ARMMMUFaultInfo ARMMMUFaultInfo;
28
#ifndef HW_ARM_SMMUV3_INTERNAL_H
24
29
#define HW_ARM_SMMUV3_INTERNAL_H
25
typedef struct NVICState NVICState;
30
26
31
+#include "hw/registerfields.h"
27
+/*
32
#include "hw/arm/smmu-common.h"
28
+ * Enum for indexing vfp.fp_status[].
33
29
+ *
34
typedef enum SMMUTranslationStatus {
30
+ * FPST_A32: is the "normal" fp status for AArch32 insns
31
+ * FPST_A64: is the "normal" fp status for AArch64 insns
32
+ * FPST_A32_F16: used for AArch32 half-precision calculations
33
+ * FPST_A64_F16: used for AArch64 half-precision calculations
34
+ * FPST_STD: the ARM "Standard FPSCR Value"
35
+ * FPST_STD_F16: used for half-precision
36
+ * calculations with the ARM "Standard FPSCR Value"
37
+ * FPST_AH: used for the A64 insns which change behaviour
38
+ * when FPCR.AH == 1 (bfloat16 conversions and multiplies,
39
+ * and the reciprocal and square root estimate/step insns)
40
+ * FPST_AH_F16: used for the A64 insns which change behaviour
41
+ * when FPCR.AH == 1 (bfloat16 conversions and multiplies,
42
+ * and the reciprocal and square root estimate/step insns);
43
+ * for half-precision
44
+ *
45
+ * Half-precision operations are governed by a separate
46
+ * flush-to-zero control bit in FPSCR:FZ16. We pass a separate
47
+ * status structure to control this.
48
+ *
49
+ * The "Standard FPSCR", ie default-NaN, flush-to-zero,
50
+ * round-to-nearest and is used by any operations (generally
51
+ * Neon) which the architecture defines as controlled by the
52
+ * standard FPSCR value rather than the FPSCR.
53
+ *
54
+ * The "standard FPSCR but for fp16 ops" is needed because
55
+ * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than
56
+ * using a fixed value for it.
57
+ *
58
+ * The ah_fp_status is needed because some insns have different
59
+ * behaviour when FPCR.AH == 1: they don't update cumulative
60
+ * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and
61
+ * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16,
62
+ * which means we need an ah_fp_status_f16 as well.
63
+ *
64
+ * To avoid having to transfer exception bits around, we simply
65
+ * say that the FPSCR cumulative exception flags are the logical
66
+ * OR of the flags in the four fp statuses. This relies on the
67
+ * only thing which needs to read the exception flags being
68
+ * an explicit FPSCR read.
69
+ */
70
+typedef enum ARMFPStatusFlavour {
71
+ FPST_A32,
72
+ FPST_A64,
73
+ FPST_A32_F16,
74
+ FPST_A64_F16,
75
+ FPST_AH,
76
+ FPST_AH_F16,
77
+ FPST_STD,
78
+ FPST_STD_F16,
79
+} ARMFPStatusFlavour;
80
+#define FPST_COUNT 8
81
+
82
typedef struct CPUArchState {
83
/* Regs for current mode. */
84
uint32_t regs[16];
85
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
86
/* Scratch space for aa32 neon expansion. */
87
uint32_t scratch[8];
88
89
- /* There are a number of distinct float control structures:
90
- *
91
- * fp_status_a32: is the "normal" fp status for AArch32 insns
92
- * fp_status_a64: is the "normal" fp status for AArch64 insns
93
- * fp_status_fp16_a32: used for AArch32 half-precision calculations
94
- * fp_status_fp16_a64: used for AArch64 half-precision calculations
95
- * standard_fp_status : the ARM "Standard FPSCR Value"
96
- * standard_fp_status_fp16 : used for half-precision
97
- * calculations with the ARM "Standard FPSCR Value"
98
- * ah_fp_status: used for the A64 insns which change behaviour
99
- * when FPCR.AH == 1 (bfloat16 conversions and multiplies,
100
- * and the reciprocal and square root estimate/step insns)
101
- * ah_fp_status_f16: used for the A64 insns which change behaviour
102
- * when FPCR.AH == 1 (bfloat16 conversions and multiplies,
103
- * and the reciprocal and square root estimate/step insns);
104
- * for half-precision
105
- *
106
- * Half-precision operations are governed by a separate
107
- * flush-to-zero control bit in FPSCR:FZ16. We pass a separate
108
- * status structure to control this.
109
- *
110
- * The "Standard FPSCR", ie default-NaN, flush-to-zero,
111
- * round-to-nearest and is used by any operations (generally
112
- * Neon) which the architecture defines as controlled by the
113
- * standard FPSCR value rather than the FPSCR.
114
- *
115
- * The "standard FPSCR but for fp16 ops" is needed because
116
- * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than
117
- * using a fixed value for it.
118
- *
119
- * The ah_fp_status is needed because some insns have different
120
- * behaviour when FPCR.AH == 1: they don't update cumulative
121
- * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and
122
- * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16,
123
- * which means we need an ah_fp_status_f16 as well.
124
- *
125
- * To avoid having to transfer exception bits around, we simply
126
- * say that the FPSCR cumulative exception flags are the logical
127
- * OR of the flags in the four fp statuses. This relies on the
128
- * only thing which needs to read the exception flags being
129
- * an explicit FPSCR read.
130
- */
131
- float_status fp_status_a32;
132
- float_status fp_status_a64;
133
- float_status fp_status_f16_a32;
134
- float_status fp_status_f16_a64;
135
- float_status standard_fp_status;
136
- float_status standard_fp_status_f16;
137
- float_status ah_fp_status;
138
- float_status ah_fp_status_f16;
139
+ /* There are a number of distinct float control structures. */
140
+ union {
141
+ float_status fp_status[FPST_COUNT];
142
+ struct {
143
+ float_status fp_status_a32;
144
+ float_status fp_status_a64;
145
+ float_status fp_status_f16_a32;
146
+ float_status fp_status_f16_a64;
147
+ float_status ah_fp_status;
148
+ float_status ah_fp_status_f16;
149
+ float_status standard_fp_status;
150
+ float_status standard_fp_status_f16;
151
+ };
152
+ };
153
154
uint64_t zcr_el[4]; /* ZCR_EL[1-3] */
155
uint64_t smcr_el[4]; /* SMCR_EL[1-3] */
156
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
157
index XXXXXXX..XXXXXXX 100644
158
--- a/target/arm/tcg/translate.h
159
+++ b/target/arm/tcg/translate.h
160
@@ -XXX,XX +XXX,XX @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb)
161
return (CPUARMTBFlags){ tb->flags, tb->cs_base };
162
}
163
164
-/*
165
- * Enum for argument to fpstatus_ptr().
166
- */
167
-typedef enum ARMFPStatusFlavour {
168
- FPST_A32,
169
- FPST_A64,
170
- FPST_A32_F16,
171
- FPST_A64_F16,
172
- FPST_AH,
173
- FPST_AH_F16,
174
- FPST_STD,
175
- FPST_STD_F16,
176
-} ARMFPStatusFlavour;
177
-
178
/**
179
* fpstatus_ptr: return TCGv_ptr to the specified fp_status field
180
*
181
* We have multiple softfloat float_status fields in the Arm CPU state struct
182
* (see the comment in cpu.h for details). Return a TCGv_ptr which has
183
* been set up to point to the requested field in the CPU state struct.
184
- * The options are:
185
- *
186
- * FPST_A32
187
- * for AArch32 non-FP16 operations controlled by the FPCR
188
- * FPST_A64
189
- * for AArch64 non-FP16 operations controlled by the FPCR
190
- * FPST_A32_F16
191
- * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used
192
- * FPST_A64_F16
193
- * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used
194
- * FPST_AH:
195
- * for AArch64 operations which change behaviour when AH=1 (specifically,
196
- * bfloat16 conversions and multiplies, and the reciprocal and square root
197
- * estimate/step insns)
198
- * FPST_AH_F16:
199
- * ditto, but for half-precision operations
200
- * FPST_STD
201
- * for A32/T32 Neon operations using the "standard FPSCR value"
202
- * FPST_STD_F16
203
- * as FPST_STD, but where FPCR.FZ16 is to be used
204
*/
205
static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
206
{
207
TCGv_ptr statusptr = tcg_temp_new_ptr();
208
- int offset;
209
+ int offset = offsetof(CPUARMState, vfp.fp_status[flavour]);
210
211
- switch (flavour) {
212
- case FPST_A32:
213
- offset = offsetof(CPUARMState, vfp.fp_status_a32);
214
- break;
215
- case FPST_A64:
216
- offset = offsetof(CPUARMState, vfp.fp_status_a64);
217
- break;
218
- case FPST_A32_F16:
219
- offset = offsetof(CPUARMState, vfp.fp_status_f16_a32);
220
- break;
221
- case FPST_A64_F16:
222
- offset = offsetof(CPUARMState, vfp.fp_status_f16_a64);
223
- break;
224
- case FPST_AH:
225
- offset = offsetof(CPUARMState, vfp.ah_fp_status);
226
- break;
227
- case FPST_AH_F16:
228
- offset = offsetof(CPUARMState, vfp.ah_fp_status_f16);
229
- break;
230
- case FPST_STD:
231
- offset = offsetof(CPUARMState, vfp.standard_fp_status);
232
- break;
233
- case FPST_STD_F16:
234
- offset = offsetof(CPUARMState, vfp.standard_fp_status_f16);
235
- break;
236
- default:
237
- g_assert_not_reached();
238
- }
239
tcg_gen_addi_ptr(statusptr, tcg_env, offset);
240
return statusptr;
241
}
35
--
242
--
36
2.34.1
243
2.34.1
37
244
38
245
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The ARMv7M QDev container accesses the QDev SysTickState
3
Replace with fp_status[FPST_STD_F16].
4
by its secure/non-secure bank index. In order to make
5
the "hw/intc/armv7m_nvic.h" header target-agnostic in
6
the next commit, first move the M-profile bank index
7
definitions to "target/arm/cpu-qom.h".
8
4
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
11
Message-id: 20240118200643.29037-16-philmd@linaro.org
7
Message-id: 20250129013857.135256-8-richard.henderson@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
9
---
14
target/arm/cpu-qom.h | 15 +++++++++++++++
10
target/arm/cpu.h | 1 -
15
target/arm/cpu.h | 15 ---------------
11
target/arm/cpu.c | 4 ++--
16
2 files changed, 15 insertions(+), 15 deletions(-)
12
target/arm/tcg/mve_helper.c | 24 ++++++++++++------------
13
target/arm/vfp_helper.c | 8 ++++----
14
4 files changed, 18 insertions(+), 19 deletions(-)
17
15
18
diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/cpu-qom.h
21
+++ b/target/arm/cpu-qom.h
22
@@ -XXX,XX +XXX,XX @@ DECLARE_CLASS_CHECKERS(AArch64CPUClass, AARCH64_CPU,
23
#define ARM_CPU_TYPE_SUFFIX "-" TYPE_ARM_CPU
24
#define ARM_CPU_TYPE_NAME(name) (name ARM_CPU_TYPE_SUFFIX)
25
26
+/* For M profile, some registers are banked secure vs non-secure;
27
+ * these are represented as a 2-element array where the first element
28
+ * is the non-secure copy and the second is the secure copy.
29
+ * When the CPU does not have implement the security extension then
30
+ * only the first element is used.
31
+ * This means that the copy for the current security state can be
32
+ * accessed via env->registerfield[env->v7m.secure] (whether the security
33
+ * extension is implemented or not).
34
+ */
35
+enum {
36
+ M_REG_NS = 0,
37
+ M_REG_S = 1,
38
+ M_REG_NUM_BANKS = 2,
39
+};
40
+
41
#endif
42
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
16
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
43
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/cpu.h
18
--- a/target/arm/cpu.h
45
+++ b/target/arm/cpu.h
19
+++ b/target/arm/cpu.h
46
@@ -XXX,XX +XXX,XX @@
20
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
47
#define ARMV7M_EXCP_PENDSV 14
21
float_status ah_fp_status;
48
#define ARMV7M_EXCP_SYSTICK 15
22
float_status ah_fp_status_f16;
49
23
float_status standard_fp_status;
50
-/* For M profile, some registers are banked secure vs non-secure;
24
- float_status standard_fp_status_f16;
51
- * these are represented as a 2-element array where the first element
25
};
52
- * is the non-secure copy and the second is the secure copy.
26
};
53
- * When the CPU does not have implement the security extension then
27
54
- * only the first element is used.
28
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
55
- * This means that the copy for the current security state can be
29
index XXXXXXX..XXXXXXX 100644
56
- * accessed via env->registerfield[env->v7m.secure] (whether the security
30
--- a/target/arm/cpu.c
57
- * extension is implemented or not).
31
+++ b/target/arm/cpu.c
58
- */
32
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
59
-enum {
33
set_flush_to_zero(1, &env->vfp.standard_fp_status);
60
- M_REG_NS = 0,
34
set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status);
61
- M_REG_S = 1,
35
set_default_nan_mode(1, &env->vfp.standard_fp_status);
62
- M_REG_NUM_BANKS = 2,
36
- set_default_nan_mode(1, &env->vfp.standard_fp_status_f16);
63
-};
37
+ set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]);
64
-
38
arm_set_default_fp_behaviours(&env->vfp.fp_status_a32);
65
/* ARM-specific interrupt pending bits. */
39
arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
66
#define CPU_INTERRUPT_FIQ CPU_INTERRUPT_TGT_EXT_1
40
arm_set_default_fp_behaviours(&env->vfp.standard_fp_status);
67
#define CPU_INTERRUPT_VIRQ CPU_INTERRUPT_TGT_EXT_2
41
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32);
42
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
43
- arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16);
44
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]);
45
arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status);
46
set_flush_to_zero(1, &env->vfp.ah_fp_status);
47
set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status);
48
diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/arm/tcg/mve_helper.c
51
+++ b/target/arm/tcg/mve_helper.c
52
@@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN)
53
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
54
continue; \
55
} \
56
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
57
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
58
&env->vfp.standard_fp_status; \
59
if (!(mask & 1)) { \
60
/* We need the result but without updating flags */ \
61
@@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma)
62
r[e] = 0; \
63
continue; \
64
} \
65
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
66
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
67
&env->vfp.standard_fp_status; \
68
if (!(tm & 1)) { \
69
/* We need the result but without updating flags */ \
70
@@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub)
71
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
72
continue; \
73
} \
74
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
75
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
76
&env->vfp.standard_fp_status; \
77
if (!(mask & 1)) { \
78
/* We need the result but without updating flags */ \
79
@@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true)
80
if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \
81
continue; \
82
} \
83
- fpst0 = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
84
+ fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
85
&env->vfp.standard_fp_status; \
86
fpst1 = fpst0; \
87
if (!(mask & 1)) { \
88
@@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS)
89
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
90
continue; \
91
} \
92
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
93
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
94
&env->vfp.standard_fp_status; \
95
if (!(mask & 1)) { \
96
/* We need the result but without updating flags */ \
97
@@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul)
98
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
99
continue; \
100
} \
101
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
102
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
103
&env->vfp.standard_fp_status; \
104
if (!(mask & 1)) { \
105
/* We need the result but without updating flags */ \
106
@@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS)
107
TYPE *m = vm; \
108
TYPE ra = (TYPE)ra_in; \
109
float_status *fpst = (ESIZE == 2) ? \
110
- &env->vfp.standard_fp_status_f16 : \
111
+ &env->vfp.fp_status[FPST_STD_F16] : \
112
&env->vfp.standard_fp_status; \
113
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
114
if (mask & 1) { \
115
@@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
116
if ((mask & emask) == 0) { \
117
continue; \
118
} \
119
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
120
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
121
&env->vfp.standard_fp_status; \
122
if (!(mask & (1 << (e * ESIZE)))) { \
123
/* We need the result but without updating flags */ \
124
@@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
125
if ((mask & emask) == 0) { \
126
continue; \
127
} \
128
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
129
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
130
&env->vfp.standard_fp_status; \
131
if (!(mask & (1 << (e * ESIZE)))) { \
132
/* We need the result but without updating flags */ \
133
@@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32)
134
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
135
continue; \
136
} \
137
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
138
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
139
&env->vfp.standard_fp_status; \
140
if (!(mask & 1)) { \
141
/* We need the result but without updating flags */ \
142
@@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero)
143
float_status *fpst; \
144
float_status scratch_fpst; \
145
float_status *base_fpst = (ESIZE == 2) ? \
146
- &env->vfp.standard_fp_status_f16 : \
147
+ &env->vfp.fp_status[FPST_STD_F16] : \
148
&env->vfp.standard_fp_status; \
149
uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \
150
set_float_rounding_mode(rmode, base_fpst); \
151
@@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm)
152
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
153
continue; \
154
} \
155
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
156
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
157
&env->vfp.standard_fp_status; \
158
if (!(mask & 1)) { \
159
/* We need the result but without updating flags */ \
160
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
161
index XXXXXXX..XXXXXXX 100644
162
--- a/target/arm/vfp_helper.c
163
+++ b/target/arm/vfp_helper.c
164
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
165
/* FZ16 does not generate an input denormal exception. */
166
a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32)
167
& ~float_flag_input_denormal_flushed);
168
- a32_flags |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16)
169
+ a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16])
170
& ~float_flag_input_denormal_flushed);
171
172
a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64);
173
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
174
set_float_exception_flags(0, &env->vfp.fp_status_f16_a32);
175
set_float_exception_flags(0, &env->vfp.fp_status_f16_a64);
176
set_float_exception_flags(0, &env->vfp.standard_fp_status);
177
- set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
178
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
179
set_float_exception_flags(0, &env->vfp.ah_fp_status);
180
set_float_exception_flags(0, &env->vfp.ah_fp_status_f16);
181
}
182
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
183
bool ftz_enabled = val & FPCR_FZ16;
184
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
185
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
186
- set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
187
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
188
set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16);
189
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
190
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
191
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
192
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
193
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16);
194
}
195
if (changed & FPCR_FZ) {
68
--
196
--
69
2.34.1
197
2.34.1
70
198
71
199
diff view generated by jsdifflib
1
From: Guenter Roeck <linux@roeck-us.net>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Allwinner R40 supports an AHCI compliant SATA controller.
3
Replace with fp_status[FPST_STD].
4
Add support for it.
4
5
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
7
Message-id: 20250129013857.135256-9-richard.henderson@linaro.org
8
Message-id: 20240115182757.1095012-3-linux@roeck-us.net
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
9
---
11
docs/system/arm/bananapi_m2u.rst | 1 +
10
target/arm/cpu.h | 1 -
12
include/hw/arm/allwinner-r40.h | 3 +++
11
target/arm/cpu.c | 8 ++++----
13
hw/arm/allwinner-r40.c | 12 +++++++++++-
12
target/arm/tcg/mve_helper.c | 28 ++++++++++++++--------------
14
hw/arm/Kconfig | 1 +
13
target/arm/tcg/vec_helper.c | 4 ++--
15
4 files changed, 16 insertions(+), 1 deletion(-)
14
target/arm/vfp_helper.c | 4 ++--
16
15
5 files changed, 22 insertions(+), 23 deletions(-)
17
diff --git a/docs/system/arm/bananapi_m2u.rst b/docs/system/arm/bananapi_m2u.rst
16
18
index XXXXXXX..XXXXXXX 100644
17
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
19
--- a/docs/system/arm/bananapi_m2u.rst
18
index XXXXXXX..XXXXXXX 100644
20
+++ b/docs/system/arm/bananapi_m2u.rst
19
--- a/target/arm/cpu.h
21
@@ -XXX,XX +XXX,XX @@ The Banana Pi M2U machine supports the following devices:
20
+++ b/target/arm/cpu.h
22
* EMAC ethernet
21
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
23
* GMAC ethernet
22
float_status fp_status_f16_a64;
24
* Clock Control Unit
23
float_status ah_fp_status;
25
+ * SATA
24
float_status ah_fp_status_f16;
26
* TWI (I2C)
25
- float_status standard_fp_status;
27
* USB 2.0
26
};
28
27
};
29
diff --git a/include/hw/arm/allwinner-r40.h b/include/hw/arm/allwinner-r40.h
28
30
index XXXXXXX..XXXXXXX 100644
29
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
31
--- a/include/hw/arm/allwinner-r40.h
30
index XXXXXXX..XXXXXXX 100644
32
+++ b/include/hw/arm/allwinner-r40.h
31
--- a/target/arm/cpu.c
33
@@ -XXX,XX +XXX,XX @@
32
+++ b/target/arm/cpu.c
34
33
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
35
#include "qom/object.h"
34
env->sau.ctrl = 0;
36
#include "hw/timer/allwinner-a10-pit.h"
37
+#include "hw/ide/ahci.h"
38
#include "hw/intc/arm_gic.h"
39
#include "hw/sd/allwinner-sdhost.h"
40
#include "hw/misc/allwinner-r40-ccu.h"
41
@@ -XXX,XX +XXX,XX @@ enum {
42
AW_R40_DEV_MMC1,
43
AW_R40_DEV_MMC2,
44
AW_R40_DEV_MMC3,
45
+ AW_R40_DEV_AHCI,
46
AW_R40_DEV_EHCI1,
47
AW_R40_DEV_OHCI1,
48
AW_R40_DEV_EHCI2,
49
@@ -XXX,XX +XXX,XX @@ struct AwR40State {
50
const hwaddr *memmap;
51
AwSRAMCState sramc;
52
AwA10PITState timer;
53
+ AllwinnerAHCIState sata;
54
AwSdHostState mmc[AW_R40_NUM_MMCS];
55
EHCISysBusState ehci[AW_R40_NUM_USB];
56
OHCISysBusState ohci[AW_R40_NUM_USB];
57
diff --git a/hw/arm/allwinner-r40.c b/hw/arm/allwinner-r40.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/hw/arm/allwinner-r40.c
60
+++ b/hw/arm/allwinner-r40.c
61
@@ -XXX,XX +XXX,XX @@ const hwaddr allwinner_r40_memmap[] = {
62
[AW_R40_DEV_MMC1] = 0x01c10000,
63
[AW_R40_DEV_MMC2] = 0x01c11000,
64
[AW_R40_DEV_MMC3] = 0x01c12000,
65
+ [AW_R40_DEV_AHCI] = 0x01c18000,
66
[AW_R40_DEV_EHCI1] = 0x01c19000,
67
[AW_R40_DEV_OHCI1] = 0x01c19400,
68
[AW_R40_DEV_EHCI2] = 0x01c1c000,
69
@@ -XXX,XX +XXX,XX @@ static struct AwR40Unimplemented r40_unimplemented[] = {
70
{ "usb0-host", 0x01c14000, 4 * KiB },
71
{ "crypto", 0x01c15000, 4 * KiB },
72
{ "spi2", 0x01c17000, 4 * KiB },
73
- { "sata", 0x01c18000, 4 * KiB },
74
{ "usb1-phy", 0x01c19800, 2 * KiB },
75
{ "sid", 0x01c1b000, 4 * KiB },
76
{ "usb2-phy", 0x01c1c800, 2 * KiB },
77
@@ -XXX,XX +XXX,XX @@ enum {
78
AW_R40_GIC_SPI_MMC2 = 34,
79
AW_R40_GIC_SPI_MMC3 = 35,
80
AW_R40_GIC_SPI_EMAC = 55,
81
+ AW_R40_GIC_SPI_AHCI = 56,
82
AW_R40_GIC_SPI_OHCI1 = 64,
83
AW_R40_GIC_SPI_OHCI2 = 65,
84
AW_R40_GIC_SPI_EHCI1 = 76,
85
@@ -XXX,XX +XXX,XX @@ static void allwinner_r40_init(Object *obj)
86
TYPE_AW_SDHOST_SUN50I_A64);
87
}
35
}
88
36
89
+ object_initialize_child(obj, "sata", &s->sata, TYPE_ALLWINNER_AHCI);
37
- set_flush_to_zero(1, &env->vfp.standard_fp_status);
90
+
38
- set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status);
91
for (size_t i = 0; i < AW_R40_NUM_USB; i++) {
39
- set_default_nan_mode(1, &env->vfp.standard_fp_status);
92
object_initialize_child(obj, "ehci[*]", &s->ehci[i],
40
+ set_flush_to_zero(1, &env->vfp.fp_status[FPST_STD]);
93
TYPE_PLATFORM_EHCI);
41
+ set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]);
94
@@ -XXX,XX +XXX,XX @@ static void allwinner_r40_realize(DeviceState *dev, Error **errp)
42
+ set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]);
95
sysbus_realize(SYS_BUS_DEVICE(&s->ccu), &error_fatal);
43
set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]);
96
sysbus_mmio_map(SYS_BUS_DEVICE(&s->ccu), 0, s->memmap[AW_R40_DEV_CCU]);
44
arm_set_default_fp_behaviours(&env->vfp.fp_status_a32);
97
45
arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
98
+ /* SATA / AHCI */
46
- arm_set_default_fp_behaviours(&env->vfp.standard_fp_status);
99
+ sysbus_realize(SYS_BUS_DEVICE(&s->sata), &error_fatal);
47
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]);
100
+ sysbus_mmio_map(SYS_BUS_DEVICE(&s->sata), 0,
48
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32);
101
+ allwinner_r40_memmap[AW_R40_DEV_AHCI]);
49
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
102
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->sata), 0,
50
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]);
103
+ qdev_get_gpio_in(DEVICE(&s->gic), AW_R40_GIC_SPI_AHCI));
51
diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c
104
+
52
index XXXXXXX..XXXXXXX 100644
105
/* USB */
53
--- a/target/arm/tcg/mve_helper.c
106
for (size_t i = 0; i < AW_R40_NUM_USB; i++) {
54
+++ b/target/arm/tcg/mve_helper.c
107
g_autofree char *bus = g_strdup_printf("usb-bus.%zu", i);
55
@@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN)
108
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
56
continue; \
109
index XXXXXXX..XXXXXXX 100644
57
} \
110
--- a/hw/arm/Kconfig
58
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
111
+++ b/hw/arm/Kconfig
59
- &env->vfp.standard_fp_status; \
112
@@ -XXX,XX +XXX,XX @@ config ALLWINNER_H3
60
+ &env->vfp.fp_status[FPST_STD]; \
113
config ALLWINNER_R40
61
if (!(mask & 1)) { \
114
bool
62
/* We need the result but without updating flags */ \
115
default y if TCG && ARM
63
scratch_fpst = *fpst; \
116
+ select AHCI
64
@@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma)
117
select ALLWINNER_SRAMC
65
continue; \
118
select ALLWINNER_A10_PIT
66
} \
119
select AXP2XX_PMU
67
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
68
- &env->vfp.standard_fp_status; \
69
+ &env->vfp.fp_status[FPST_STD]; \
70
if (!(tm & 1)) { \
71
/* We need the result but without updating flags */ \
72
scratch_fpst = *fpst; \
73
@@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub)
74
continue; \
75
} \
76
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
77
- &env->vfp.standard_fp_status; \
78
+ &env->vfp.fp_status[FPST_STD]; \
79
if (!(mask & 1)) { \
80
/* We need the result but without updating flags */ \
81
scratch_fpst = *fpst; \
82
@@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true)
83
continue; \
84
} \
85
fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
86
- &env->vfp.standard_fp_status; \
87
+ &env->vfp.fp_status[FPST_STD]; \
88
fpst1 = fpst0; \
89
if (!(mask & 1)) { \
90
scratch_fpst = *fpst0; \
91
@@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS)
92
continue; \
93
} \
94
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
95
- &env->vfp.standard_fp_status; \
96
+ &env->vfp.fp_status[FPST_STD]; \
97
if (!(mask & 1)) { \
98
/* We need the result but without updating flags */ \
99
scratch_fpst = *fpst; \
100
@@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul)
101
continue; \
102
} \
103
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
104
- &env->vfp.standard_fp_status; \
105
+ &env->vfp.fp_status[FPST_STD]; \
106
if (!(mask & 1)) { \
107
/* We need the result but without updating flags */ \
108
scratch_fpst = *fpst; \
109
@@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS)
110
TYPE ra = (TYPE)ra_in; \
111
float_status *fpst = (ESIZE == 2) ? \
112
&env->vfp.fp_status[FPST_STD_F16] : \
113
- &env->vfp.standard_fp_status; \
114
+ &env->vfp.fp_status[FPST_STD]; \
115
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
116
if (mask & 1) { \
117
TYPE v = m[H##ESIZE(e)]; \
118
@@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
119
continue; \
120
} \
121
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
122
- &env->vfp.standard_fp_status; \
123
+ &env->vfp.fp_status[FPST_STD]; \
124
if (!(mask & (1 << (e * ESIZE)))) { \
125
/* We need the result but without updating flags */ \
126
scratch_fpst = *fpst; \
127
@@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
128
continue; \
129
} \
130
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
131
- &env->vfp.standard_fp_status; \
132
+ &env->vfp.fp_status[FPST_STD]; \
133
if (!(mask & (1 << (e * ESIZE)))) { \
134
/* We need the result but without updating flags */ \
135
scratch_fpst = *fpst; \
136
@@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32)
137
continue; \
138
} \
139
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
140
- &env->vfp.standard_fp_status; \
141
+ &env->vfp.fp_status[FPST_STD]; \
142
if (!(mask & 1)) { \
143
/* We need the result but without updating flags */ \
144
scratch_fpst = *fpst; \
145
@@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero)
146
float_status scratch_fpst; \
147
float_status *base_fpst = (ESIZE == 2) ? \
148
&env->vfp.fp_status[FPST_STD_F16] : \
149
- &env->vfp.standard_fp_status; \
150
+ &env->vfp.fp_status[FPST_STD]; \
151
uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \
152
set_float_rounding_mode(rmode, base_fpst); \
153
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
154
@@ -XXX,XX +XXX,XX @@ static void do_vcvt_sh(CPUARMState *env, void *vd, void *vm, int top)
155
unsigned e;
156
float_status *fpst;
157
float_status scratch_fpst;
158
- float_status *base_fpst = &env->vfp.standard_fp_status;
159
+ float_status *base_fpst = &env->vfp.fp_status[FPST_STD];
160
bool old_fz = get_flush_to_zero(base_fpst);
161
set_flush_to_zero(false, base_fpst);
162
for (e = 0; e < 16 / 4; e++, mask >>= 4) {
163
@@ -XXX,XX +XXX,XX @@ static void do_vcvt_hs(CPUARMState *env, void *vd, void *vm, int top)
164
unsigned e;
165
float_status *fpst;
166
float_status scratch_fpst;
167
- float_status *base_fpst = &env->vfp.standard_fp_status;
168
+ float_status *base_fpst = &env->vfp.fp_status[FPST_STD];
169
bool old_fiz = get_flush_inputs_to_zero(base_fpst);
170
set_flush_inputs_to_zero(false, base_fpst);
171
for (e = 0; e < 16 / 4; e++, mask >>= 4) {
172
@@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm)
173
continue; \
174
} \
175
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
176
- &env->vfp.standard_fp_status; \
177
+ &env->vfp.fp_status[FPST_STD]; \
178
if (!(mask & 1)) { \
179
/* We need the result but without updating flags */ \
180
scratch_fpst = *fpst; \
181
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
182
index XXXXXXX..XXXXXXX 100644
183
--- a/target/arm/tcg/vec_helper.c
184
+++ b/target/arm/tcg/vec_helper.c
185
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
186
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
187
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
188
189
- do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc,
190
+ do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
191
get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32));
192
}
193
194
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
195
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
196
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
197
198
- do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc,
199
+ do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
200
get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32));
201
}
202
203
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
204
index XXXXXXX..XXXXXXX 100644
205
--- a/target/arm/vfp_helper.c
206
+++ b/target/arm/vfp_helper.c
207
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
208
uint32_t a32_flags = 0, a64_flags = 0;
209
210
a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32);
211
- a32_flags |= get_float_exception_flags(&env->vfp.standard_fp_status);
212
+ a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]);
213
/* FZ16 does not generate an input denormal exception. */
214
a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32)
215
& ~float_flag_input_denormal_flushed);
216
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
217
set_float_exception_flags(0, &env->vfp.fp_status_a64);
218
set_float_exception_flags(0, &env->vfp.fp_status_f16_a32);
219
set_float_exception_flags(0, &env->vfp.fp_status_f16_a64);
220
- set_float_exception_flags(0, &env->vfp.standard_fp_status);
221
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
222
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
223
set_float_exception_flags(0, &env->vfp.ah_fp_status);
224
set_float_exception_flags(0, &env->vfp.ah_fp_status_f16);
120
--
225
--
121
2.34.1
226
2.34.1
122
227
123
228
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Move Arm A-class Generic Timer definitions to the new
3
Replace with fp_status[FPST_AH_F16].
4
"target/arm/gtimer.h" header so units in hw/ which don't
5
need access to ARMCPU internals can use them without
6
having to include the huge "cpu.h".
7
4
8
Suggested-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20250129013857.135256-10-richard.henderson@linaro.org
11
Message-id: 20240118200643.29037-20-philmd@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
9
---
14
target/arm/cpu.h | 8 +-------
10
target/arm/cpu.h | 3 +--
15
target/arm/gtimer.h | 21 +++++++++++++++++++++
11
target/arm/cpu.c | 2 +-
16
hw/arm/allwinner-h3.c | 1 +
12
target/arm/vfp_helper.c | 10 +++++-----
17
hw/arm/allwinner-r40.c | 1 +
13
3 files changed, 7 insertions(+), 8 deletions(-)
18
hw/arm/bcm2836.c | 1 +
19
hw/arm/sbsa-ref.c | 1 +
20
hw/arm/virt.c | 1 +
21
hw/arm/xlnx-versal.c | 1 +
22
hw/arm/xlnx-zynqmp.c | 1 +
23
hw/cpu/a15mpcore.c | 1 +
24
target/arm/cpu.c | 1 +
25
target/arm/helper.c | 1 +
26
target/arm/hvf/hvf.c | 1 +
27
target/arm/kvm.c | 1 +
28
target/arm/machine.c | 1 +
29
15 files changed, 35 insertions(+), 7 deletions(-)
30
create mode 100644 target/arm/gtimer.h
31
14
32
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
15
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
33
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/cpu.h
17
--- a/target/arm/cpu.h
35
+++ b/target/arm/cpu.h
18
+++ b/target/arm/cpu.h
36
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@ typedef struct NVICState NVICState;
37
#include "exec/cpu-defs.h"
20
* behaviour when FPCR.AH == 1: they don't update cumulative
38
#include "qapi/qapi-types-common.h"
21
* exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and
39
#include "target/arm/multiprocessing.h"
22
* they ignore FPCR.RMode. But they don't ignore FPCR.FZ16,
40
+#include "target/arm/gtimer.h"
23
- * which means we need an ah_fp_status_f16 as well.
41
24
+ * which means we need an FPST_AH_F16 as well.
42
/* ARM processors have a weak memory model */
25
*
43
#define TCG_GUEST_DEFAULT_MO (0)
26
* To avoid having to transfer exception bits around, we simply
44
@@ -XXX,XX +XXX,XX @@ typedef struct ARMGenericTimer {
27
* say that the FPSCR cumulative exception flags are the logical
45
uint64_t ctl; /* Timer Control register */
28
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
46
} ARMGenericTimer;
29
float_status fp_status_f16_a32;
47
30
float_status fp_status_f16_a64;
48
-#define GTIMER_PHYS 0
31
float_status ah_fp_status;
49
-#define GTIMER_VIRT 1
32
- float_status ah_fp_status_f16;
50
-#define GTIMER_HYP 2
33
};
51
-#define GTIMER_SEC 3
34
};
52
-#define GTIMER_HYPVIRT 4
35
53
-#define NUM_GTIMERS 5
54
-
55
#define VTCR_NSW (1u << 29)
56
#define VTCR_NSA (1u << 30)
57
#define VSTCR_SW VTCR_NSW
58
diff --git a/target/arm/gtimer.h b/target/arm/gtimer.h
59
new file mode 100644
60
index XXXXXXX..XXXXXXX
61
--- /dev/null
62
+++ b/target/arm/gtimer.h
63
@@ -XXX,XX +XXX,XX @@
64
+/*
65
+ * ARM generic timer definitions for Arm A-class CPU
66
+ *
67
+ * Copyright (c) 2003 Fabrice Bellard
68
+ *
69
+ * SPDX-License-Identifier: LGPL-2.1-or-later
70
+ */
71
+
72
+#ifndef TARGET_ARM_GTIMER_H
73
+#define TARGET_ARM_GTIMER_H
74
+
75
+enum {
76
+ GTIMER_PHYS = 0,
77
+ GTIMER_VIRT = 1,
78
+ GTIMER_HYP = 2,
79
+ GTIMER_SEC = 3,
80
+ GTIMER_HYPVIRT = 4,
81
+#define NUM_GTIMERS 5
82
+};
83
+
84
+#endif
85
diff --git a/hw/arm/allwinner-h3.c b/hw/arm/allwinner-h3.c
86
index XXXXXXX..XXXXXXX 100644
87
--- a/hw/arm/allwinner-h3.c
88
+++ b/hw/arm/allwinner-h3.c
89
@@ -XXX,XX +XXX,XX @@
90
#include "sysemu/sysemu.h"
91
#include "hw/arm/allwinner-h3.h"
92
#include "target/arm/cpu-qom.h"
93
+#include "target/arm/gtimer.h"
94
95
/* Memory map */
96
const hwaddr allwinner_h3_memmap[] = {
97
diff --git a/hw/arm/allwinner-r40.c b/hw/arm/allwinner-r40.c
98
index XXXXXXX..XXXXXXX 100644
99
--- a/hw/arm/allwinner-r40.c
100
+++ b/hw/arm/allwinner-r40.c
101
@@ -XXX,XX +XXX,XX @@
102
#include "hw/arm/allwinner-r40.h"
103
#include "hw/misc/allwinner-r40-dramc.h"
104
#include "target/arm/cpu-qom.h"
105
+#include "target/arm/gtimer.h"
106
107
/* Memory map */
108
const hwaddr allwinner_r40_memmap[] = {
109
diff --git a/hw/arm/bcm2836.c b/hw/arm/bcm2836.c
110
index XXXXXXX..XXXXXXX 100644
111
--- a/hw/arm/bcm2836.c
112
+++ b/hw/arm/bcm2836.c
113
@@ -XXX,XX +XXX,XX @@
114
#include "hw/arm/raspi_platform.h"
115
#include "hw/sysbus.h"
116
#include "target/arm/cpu-qom.h"
117
+#include "target/arm/gtimer.h"
118
119
struct BCM283XClass {
120
/*< private >*/
121
diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
122
index XXXXXXX..XXXXXXX 100644
123
--- a/hw/arm/sbsa-ref.c
124
+++ b/hw/arm/sbsa-ref.c
125
@@ -XXX,XX +XXX,XX @@
126
#include "qapi/qmp/qlist.h"
127
#include "qom/object.h"
128
#include "target/arm/cpu-qom.h"
129
+#include "target/arm/gtimer.h"
130
131
#define RAMLIMIT_GB 8192
132
#define RAMLIMIT_BYTES (RAMLIMIT_GB * GiB)
133
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
134
index XXXXXXX..XXXXXXX 100644
135
--- a/hw/arm/virt.c
136
+++ b/hw/arm/virt.c
137
@@ -XXX,XX +XXX,XX @@
138
#include "target/arm/cpu-qom.h"
139
#include "target/arm/internals.h"
140
#include "target/arm/multiprocessing.h"
141
+#include "target/arm/gtimer.h"
142
#include "hw/mem/pc-dimm.h"
143
#include "hw/mem/nvdimm.h"
144
#include "hw/acpi/generic_event_device.h"
145
diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c
146
index XXXXXXX..XXXXXXX 100644
147
--- a/hw/arm/xlnx-versal.c
148
+++ b/hw/arm/xlnx-versal.c
149
@@ -XXX,XX +XXX,XX @@
150
#include "hw/arm/xlnx-versal.h"
151
#include "qemu/log.h"
152
#include "target/arm/cpu-qom.h"
153
+#include "target/arm/gtimer.h"
154
155
#define XLNX_VERSAL_ACPU_TYPE ARM_CPU_TYPE_NAME("cortex-a72")
156
#define XLNX_VERSAL_RCPU_TYPE ARM_CPU_TYPE_NAME("cortex-r5f")
157
diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
158
index XXXXXXX..XXXXXXX 100644
159
--- a/hw/arm/xlnx-zynqmp.c
160
+++ b/hw/arm/xlnx-zynqmp.c
161
@@ -XXX,XX +XXX,XX @@
162
#include "sysemu/sysemu.h"
163
#include "kvm_arm.h"
164
#include "target/arm/cpu-qom.h"
165
+#include "target/arm/gtimer.h"
166
167
#define GIC_NUM_SPI_INTR 160
168
169
diff --git a/hw/cpu/a15mpcore.c b/hw/cpu/a15mpcore.c
170
index XXXXXXX..XXXXXXX 100644
171
--- a/hw/cpu/a15mpcore.c
172
+++ b/hw/cpu/a15mpcore.c
173
@@ -XXX,XX +XXX,XX @@
174
#include "hw/qdev-properties.h"
175
#include "sysemu/kvm.h"
176
#include "kvm_arm.h"
177
+#include "target/arm/gtimer.h"
178
179
static void a15mp_priv_set_irq(void *opaque, int irq, int level)
180
{
181
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
36
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
182
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
183
--- a/target/arm/cpu.c
38
--- a/target/arm/cpu.c
184
+++ b/target/arm/cpu.c
39
+++ b/target/arm/cpu.c
185
@@ -XXX,XX +XXX,XX @@
40
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
186
#include "fpu/softfloat.h"
41
arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status);
187
#include "cpregs.h"
42
set_flush_to_zero(1, &env->vfp.ah_fp_status);
188
#include "target/arm/cpu-qom.h"
43
set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status);
189
+#include "target/arm/gtimer.h"
44
- arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status_f16);
190
45
+ arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]);
191
static void arm_cpu_set_pc(CPUState *cs, vaddr value)
46
192
{
47
#ifndef CONFIG_USER_ONLY
193
diff --git a/target/arm/helper.c b/target/arm/helper.c
48
if (kvm_enabled()) {
49
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
194
index XXXXXXX..XXXXXXX 100644
50
index XXXXXXX..XXXXXXX 100644
195
--- a/target/arm/helper.c
51
--- a/target/arm/vfp_helper.c
196
+++ b/target/arm/helper.c
52
+++ b/target/arm/vfp_helper.c
197
@@ -XXX,XX +XXX,XX @@
53
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
198
#include "semihosting/common-semi.h"
54
a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
199
#endif
55
& ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
200
#include "cpregs.h"
56
/*
201
+#include "target/arm/gtimer.h"
57
- * We do not merge in flags from ah_fp_status or ah_fp_status_f16, because
202
58
+ * We do not merge in flags from ah_fp_status or FPST_AH_F16, because
203
#define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */
59
* they are used for insns that must not set the cumulative exception bits.
204
60
*/
205
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
61
206
index XXXXXXX..XXXXXXX 100644
62
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
207
--- a/target/arm/hvf/hvf.c
63
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
208
+++ b/target/arm/hvf/hvf.c
64
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
209
@@ -XXX,XX +XXX,XX @@
65
set_float_exception_flags(0, &env->vfp.ah_fp_status);
210
#include "target/arm/cpu.h"
66
- set_float_exception_flags(0, &env->vfp.ah_fp_status_f16);
211
#include "target/arm/internals.h"
67
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]);
212
#include "target/arm/multiprocessing.h"
68
}
213
+#include "target/arm/gtimer.h"
69
214
#include "trace/trace-target_arm_hvf.h"
70
static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env)
215
#include "migration/vmstate.h"
71
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
216
72
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
217
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
73
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
218
index XXXXXXX..XXXXXXX 100644
74
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
219
--- a/target/arm/kvm.c
75
- set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16);
220
+++ b/target/arm/kvm.c
76
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
221
@@ -XXX,XX +XXX,XX @@
77
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
222
#include "qemu/log.h"
78
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
223
#include "hw/acpi/acpi.h"
79
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
224
#include "hw/acpi/ghes.h"
80
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16);
225
+#include "target/arm/gtimer.h"
81
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
226
82
}
227
const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
83
if (changed & FPCR_FZ) {
228
KVM_CAP_LAST_INFO
84
bool ftz_enabled = val & FPCR_FZ;
229
diff --git a/target/arm/machine.c b/target/arm/machine.c
85
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
230
index XXXXXXX..XXXXXXX 100644
86
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
231
--- a/target/arm/machine.c
87
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64);
232
+++ b/target/arm/machine.c
88
set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status);
233
@@ -XXX,XX +XXX,XX @@
89
- set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status_f16);
234
#include "internals.h"
90
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]);
235
#include "cpu-features.h"
91
}
236
#include "migration/cpu.h"
92
if (changed & FPCR_AH) {
237
+#include "target/arm/gtimer.h"
93
bool ah_enabled = val & FPCR_AH;
238
239
static bool vfp_needed(void *opaque)
240
{
241
--
94
--
242
2.34.1
95
2.34.1
243
96
244
97
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The ARM_CPU_IRQ/FIQ definitions are used to index the GPIO
3
Replace with fp_status[FPST_AH].
4
IRQ created calling qdev_init_gpio_in() in ARMCPU instance_init()
5
handler. To allow non-ARM code to raise interrupt on ARM cores,
6
move they to 'target/arm/cpu-qom.h' which is non-ARM specific and
7
can be included by any hw/ file.
8
4
9
File list to include the new header generated using:
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
11
$ git grep -wEl 'ARM_CPU_(\w*IRQ|FIQ)'
7
Message-id: 20250129013857.135256-11-richard.henderson@linaro.org
12
13
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
14
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
Message-id: 20240118200643.29037-18-philmd@linaro.org
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
---
9
---
18
target/arm/cpu-qom.h | 6 ++++++
10
target/arm/cpu.h | 3 +--
19
target/arm/cpu.h | 6 ------
11
target/arm/cpu.c | 6 +++---
20
hw/arm/allwinner-a10.c | 1 +
12
target/arm/vfp_helper.c | 6 +++---
21
hw/arm/allwinner-h3.c | 1 +
13
3 files changed, 7 insertions(+), 8 deletions(-)
22
hw/arm/allwinner-r40.c | 1 +
23
hw/arm/armv7m.c | 1 +
24
hw/arm/aspeed_ast2400.c | 1 +
25
hw/arm/aspeed_ast2600.c | 1 +
26
hw/arm/bcm2836.c | 1 +
27
hw/arm/exynos4210.c | 1 +
28
hw/arm/fsl-imx25.c | 1 +
29
hw/arm/fsl-imx31.c | 1 +
30
hw/arm/fsl-imx6.c | 1 +
31
hw/arm/fsl-imx6ul.c | 1 +
32
hw/arm/fsl-imx7.c | 1 +
33
hw/arm/highbank.c | 1 +
34
hw/arm/integratorcp.c | 1 +
35
hw/arm/musicpal.c | 1 +
36
hw/arm/npcm7xx.c | 1 +
37
hw/arm/omap1.c | 1 +
38
hw/arm/omap2.c | 1 +
39
hw/arm/realview.c | 1 +
40
hw/arm/sbsa-ref.c | 1 +
41
hw/arm/strongarm.c | 1 +
42
hw/arm/versatilepb.c | 1 +
43
hw/arm/vexpress.c | 1 +
44
hw/arm/virt.c | 1 +
45
hw/arm/xilinx_zynq.c | 1 +
46
hw/arm/xlnx-versal.c | 1 +
47
hw/arm/xlnx-zynqmp.c | 1 +
48
target/arm/cpu.c | 1 +
49
31 files changed, 35 insertions(+), 6 deletions(-)
50
14
51
diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h
52
index XXXXXXX..XXXXXXX 100644
53
--- a/target/arm/cpu-qom.h
54
+++ b/target/arm/cpu-qom.h
55
@@ -XXX,XX +XXX,XX @@ DECLARE_CLASS_CHECKERS(AArch64CPUClass, AARCH64_CPU,
56
#define ARM_CPU_TYPE_SUFFIX "-" TYPE_ARM_CPU
57
#define ARM_CPU_TYPE_NAME(name) (name ARM_CPU_TYPE_SUFFIX)
58
59
+/* Meanings of the ARMCPU object's four inbound GPIO lines */
60
+#define ARM_CPU_IRQ 0
61
+#define ARM_CPU_FIQ 1
62
+#define ARM_CPU_VIRQ 2
63
+#define ARM_CPU_VFIQ 3
64
+
65
/* For M profile, some registers are banked secure vs non-secure;
66
* these are represented as a 2-element array where the first element
67
* is the non-secure copy and the second is the secure copy.
68
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
15
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
69
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
70
--- a/target/arm/cpu.h
17
--- a/target/arm/cpu.h
71
+++ b/target/arm/cpu.h
18
+++ b/target/arm/cpu.h
72
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@ typedef struct NVICState NVICState;
73
#define offsetofhigh32(S, M) (offsetof(S, M) + sizeof(uint32_t))
20
* the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than
74
#endif
21
* using a fixed value for it.
75
22
*
76
-/* Meanings of the ARMCPU object's four inbound GPIO lines */
23
- * The ah_fp_status is needed because some insns have different
77
-#define ARM_CPU_IRQ 0
24
+ * FPST_AH is needed because some insns have different
78
-#define ARM_CPU_FIQ 1
25
* behaviour when FPCR.AH == 1: they don't update cumulative
79
-#define ARM_CPU_VIRQ 2
26
* exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and
80
-#define ARM_CPU_VFIQ 3
27
* they ignore FPCR.RMode. But they don't ignore FPCR.FZ16,
81
-
28
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
82
/* ARM-specific extra insn start words:
29
float_status fp_status_a64;
83
* 1: Conditional execution bits
30
float_status fp_status_f16_a32;
84
* 2: Partial exception syndrome for data aborts
31
float_status fp_status_f16_a64;
85
diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c
32
- float_status ah_fp_status;
86
index XXXXXXX..XXXXXXX 100644
33
};
87
--- a/hw/arm/allwinner-a10.c
34
};
88
+++ b/hw/arm/allwinner-a10.c
89
@@ -XXX,XX +XXX,XX @@
90
#include "hw/boards.h"
91
#include "hw/usb/hcd-ohci.h"
92
#include "hw/loader.h"
93
+#include "target/arm/cpu-qom.h"
94
95
#define AW_A10_SRAM_A_BASE 0x00000000
96
#define AW_A10_DRAMC_BASE 0x01c01000
97
diff --git a/hw/arm/allwinner-h3.c b/hw/arm/allwinner-h3.c
98
index XXXXXXX..XXXXXXX 100644
99
--- a/hw/arm/allwinner-h3.c
100
+++ b/hw/arm/allwinner-h3.c
101
@@ -XXX,XX +XXX,XX @@
102
#include "hw/loader.h"
103
#include "sysemu/sysemu.h"
104
#include "hw/arm/allwinner-h3.h"
105
+#include "target/arm/cpu-qom.h"
106
107
/* Memory map */
108
const hwaddr allwinner_h3_memmap[] = {
109
diff --git a/hw/arm/allwinner-r40.c b/hw/arm/allwinner-r40.c
110
index XXXXXXX..XXXXXXX 100644
111
--- a/hw/arm/allwinner-r40.c
112
+++ b/hw/arm/allwinner-r40.c
113
@@ -XXX,XX +XXX,XX @@
114
#include "sysemu/sysemu.h"
115
#include "hw/arm/allwinner-r40.h"
116
#include "hw/misc/allwinner-r40-dramc.h"
117
+#include "target/arm/cpu-qom.h"
118
119
/* Memory map */
120
const hwaddr allwinner_r40_memmap[] = {
121
diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c
122
index XXXXXXX..XXXXXXX 100644
123
--- a/hw/arm/armv7m.c
124
+++ b/hw/arm/armv7m.c
125
@@ -XXX,XX +XXX,XX @@
126
#include "target/arm/idau.h"
127
#include "target/arm/cpu.h"
128
#include "target/arm/cpu-features.h"
129
+#include "target/arm/cpu-qom.h"
130
#include "migration/vmstate.h"
131
132
/* Bitbanded IO. Each word corresponds to a single bit. */
133
diff --git a/hw/arm/aspeed_ast2400.c b/hw/arm/aspeed_ast2400.c
134
index XXXXXXX..XXXXXXX 100644
135
--- a/hw/arm/aspeed_ast2400.c
136
+++ b/hw/arm/aspeed_ast2400.c
137
@@ -XXX,XX +XXX,XX @@
138
#include "hw/i2c/aspeed_i2c.h"
139
#include "net/net.h"
140
#include "sysemu/sysemu.h"
141
+#include "target/arm/cpu-qom.h"
142
143
#define ASPEED_SOC_IOMEM_SIZE 0x00200000
144
145
diff --git a/hw/arm/aspeed_ast2600.c b/hw/arm/aspeed_ast2600.c
146
index XXXXXXX..XXXXXXX 100644
147
--- a/hw/arm/aspeed_ast2600.c
148
+++ b/hw/arm/aspeed_ast2600.c
149
@@ -XXX,XX +XXX,XX @@
150
#include "hw/i2c/aspeed_i2c.h"
151
#include "net/net.h"
152
#include "sysemu/sysemu.h"
153
+#include "target/arm/cpu-qom.h"
154
155
#define ASPEED_SOC_IOMEM_SIZE 0x00200000
156
#define ASPEED_SOC_DPMCU_SIZE 0x00040000
157
diff --git a/hw/arm/bcm2836.c b/hw/arm/bcm2836.c
158
index XXXXXXX..XXXXXXX 100644
159
--- a/hw/arm/bcm2836.c
160
+++ b/hw/arm/bcm2836.c
161
@@ -XXX,XX +XXX,XX @@
162
#include "hw/arm/bcm2836.h"
163
#include "hw/arm/raspi_platform.h"
164
#include "hw/sysbus.h"
165
+#include "target/arm/cpu-qom.h"
166
167
struct BCM283XClass {
168
/*< private >*/
169
diff --git a/hw/arm/exynos4210.c b/hw/arm/exynos4210.c
170
index XXXXXXX..XXXXXXX 100644
171
--- a/hw/arm/exynos4210.c
172
+++ b/hw/arm/exynos4210.c
173
@@ -XXX,XX +XXX,XX @@
174
#include "hw/arm/exynos4210.h"
175
#include "hw/sd/sdhci.h"
176
#include "hw/usb/hcd-ehci.h"
177
+#include "target/arm/cpu-qom.h"
178
179
#define EXYNOS4210_CHIPID_ADDR 0x10000000
180
181
diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c
182
index XXXXXXX..XXXXXXX 100644
183
--- a/hw/arm/fsl-imx25.c
184
+++ b/hw/arm/fsl-imx25.c
185
@@ -XXX,XX +XXX,XX @@
186
#include "sysemu/sysemu.h"
187
#include "hw/qdev-properties.h"
188
#include "chardev/char.h"
189
+#include "target/arm/cpu-qom.h"
190
191
#define IMX25_ESDHC_CAPABILITIES 0x07e20000
192
193
diff --git a/hw/arm/fsl-imx31.c b/hw/arm/fsl-imx31.c
194
index XXXXXXX..XXXXXXX 100644
195
--- a/hw/arm/fsl-imx31.c
196
+++ b/hw/arm/fsl-imx31.c
197
@@ -XXX,XX +XXX,XX @@
198
#include "exec/address-spaces.h"
199
#include "hw/qdev-properties.h"
200
#include "chardev/char.h"
201
+#include "target/arm/cpu-qom.h"
202
203
static void fsl_imx31_init(Object *obj)
204
{
205
diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c
206
index XXXXXXX..XXXXXXX 100644
207
--- a/hw/arm/fsl-imx6.c
208
+++ b/hw/arm/fsl-imx6.c
209
@@ -XXX,XX +XXX,XX @@
210
#include "chardev/char.h"
211
#include "qemu/error-report.h"
212
#include "qemu/module.h"
213
+#include "target/arm/cpu-qom.h"
214
215
#define IMX6_ESDHC_CAPABILITIES 0x057834b4
216
217
diff --git a/hw/arm/fsl-imx6ul.c b/hw/arm/fsl-imx6ul.c
218
index XXXXXXX..XXXXXXX 100644
219
--- a/hw/arm/fsl-imx6ul.c
220
+++ b/hw/arm/fsl-imx6ul.c
221
@@ -XXX,XX +XXX,XX @@
222
#include "sysemu/sysemu.h"
223
#include "qemu/error-report.h"
224
#include "qemu/module.h"
225
+#include "target/arm/cpu-qom.h"
226
227
#define NAME_SIZE 20
228
229
diff --git a/hw/arm/fsl-imx7.c b/hw/arm/fsl-imx7.c
230
index XXXXXXX..XXXXXXX 100644
231
--- a/hw/arm/fsl-imx7.c
232
+++ b/hw/arm/fsl-imx7.c
233
@@ -XXX,XX +XXX,XX @@
234
#include "sysemu/sysemu.h"
235
#include "qemu/error-report.h"
236
#include "qemu/module.h"
237
+#include "target/arm/cpu-qom.h"
238
239
#define NAME_SIZE 20
240
241
diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c
242
index XXXXXXX..XXXXXXX 100644
243
--- a/hw/arm/highbank.c
244
+++ b/hw/arm/highbank.c
245
@@ -XXX,XX +XXX,XX @@
246
#include "qemu/log.h"
247
#include "qom/object.h"
248
#include "cpu.h"
249
+#include "target/arm/cpu-qom.h"
250
251
#define SMP_BOOT_ADDR 0x100
252
#define SMP_BOOT_REG 0x40
253
diff --git a/hw/arm/integratorcp.c b/hw/arm/integratorcp.c
254
index XXXXXXX..XXXXXXX 100644
255
--- a/hw/arm/integratorcp.c
256
+++ b/hw/arm/integratorcp.c
257
@@ -XXX,XX +XXX,XX @@
258
#include "hw/sd/sd.h"
259
#include "qom/object.h"
260
#include "audio/audio.h"
261
+#include "target/arm/cpu-qom.h"
262
263
#define TYPE_INTEGRATOR_CM "integrator_core"
264
OBJECT_DECLARE_SIMPLE_TYPE(IntegratorCMState, INTEGRATOR_CM)
265
diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c
266
index XXXXXXX..XXXXXXX 100644
267
--- a/hw/arm/musicpal.c
268
+++ b/hw/arm/musicpal.c
269
@@ -XXX,XX +XXX,XX @@
270
#include "hw/net/mv88w8618_eth.h"
271
#include "audio/audio.h"
272
#include "qemu/error-report.h"
273
+#include "target/arm/cpu-qom.h"
274
275
#define MP_MISC_BASE 0x80002000
276
#define MP_MISC_SIZE 0x00001000
277
diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c
278
index XXXXXXX..XXXXXXX 100644
279
--- a/hw/arm/npcm7xx.c
280
+++ b/hw/arm/npcm7xx.c
281
@@ -XXX,XX +XXX,XX @@
282
#include "qapi/error.h"
283
#include "qemu/units.h"
284
#include "sysemu/sysemu.h"
285
+#include "target/arm/cpu-qom.h"
286
287
/*
288
* This covers the whole MMIO space. We'll use this to catch any MMIO accesses
289
diff --git a/hw/arm/omap1.c b/hw/arm/omap1.c
290
index XXXXXXX..XXXXXXX 100644
291
--- a/hw/arm/omap1.c
292
+++ b/hw/arm/omap1.c
293
@@ -XXX,XX +XXX,XX @@
294
#include "hw/sysbus.h"
295
#include "qemu/cutils.h"
296
#include "qemu/bcd.h"
297
+#include "target/arm/cpu-qom.h"
298
299
static inline void omap_log_badwidth(const char *funcname, hwaddr addr, int sz)
300
{
301
diff --git a/hw/arm/omap2.c b/hw/arm/omap2.c
302
index XXXXXXX..XXXXXXX 100644
303
--- a/hw/arm/omap2.c
304
+++ b/hw/arm/omap2.c
305
@@ -XXX,XX +XXX,XX @@
306
#include "hw/sysbus.h"
307
#include "hw/boards.h"
308
#include "audio/audio.h"
309
+#include "target/arm/cpu-qom.h"
310
311
/* Enhanced Audio Controller (CODEC only) */
312
struct omap_eac_s {
313
diff --git a/hw/arm/realview.c b/hw/arm/realview.c
314
index XXXXXXX..XXXXXXX 100644
315
--- a/hw/arm/realview.c
316
+++ b/hw/arm/realview.c
317
@@ -XXX,XX +XXX,XX @@
318
#include "hw/i2c/arm_sbcon_i2c.h"
319
#include "hw/sd/sd.h"
320
#include "audio/audio.h"
321
+#include "target/arm/cpu-qom.h"
322
323
#define SMP_BOOT_ADDR 0xe0000000
324
#define SMP_BOOTREG_ADDR 0x10000030
325
diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
326
index XXXXXXX..XXXXXXX 100644
327
--- a/hw/arm/sbsa-ref.c
328
+++ b/hw/arm/sbsa-ref.c
329
@@ -XXX,XX +XXX,XX @@
330
#include "net/net.h"
331
#include "qapi/qmp/qlist.h"
332
#include "qom/object.h"
333
+#include "target/arm/cpu-qom.h"
334
335
#define RAMLIMIT_GB 8192
336
#define RAMLIMIT_BYTES (RAMLIMIT_GB * GiB)
337
diff --git a/hw/arm/strongarm.c b/hw/arm/strongarm.c
338
index XXXXXXX..XXXXXXX 100644
339
--- a/hw/arm/strongarm.c
340
+++ b/hw/arm/strongarm.c
341
@@ -XXX,XX +XXX,XX @@
342
#include "qemu/cutils.h"
343
#include "qemu/log.h"
344
#include "qom/object.h"
345
+#include "target/arm/cpu-qom.h"
346
347
//#define DEBUG
348
349
diff --git a/hw/arm/versatilepb.c b/hw/arm/versatilepb.c
350
index XXXXXXX..XXXXXXX 100644
351
--- a/hw/arm/versatilepb.c
352
+++ b/hw/arm/versatilepb.c
353
@@ -XXX,XX +XXX,XX @@
354
#include "hw/sd/sd.h"
355
#include "qom/object.h"
356
#include "audio/audio.h"
357
+#include "target/arm/cpu-qom.h"
358
359
#define VERSATILE_FLASH_ADDR 0x34000000
360
#define VERSATILE_FLASH_SIZE (64 * 1024 * 1024)
361
diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c
362
index XXXXXXX..XXXXXXX 100644
363
--- a/hw/arm/vexpress.c
364
+++ b/hw/arm/vexpress.c
365
@@ -XXX,XX +XXX,XX @@
366
#include "qapi/qmp/qlist.h"
367
#include "qom/object.h"
368
#include "audio/audio.h"
369
+#include "target/arm/cpu-qom.h"
370
371
#define VEXPRESS_BOARD_ID 0x8e0
372
#define VEXPRESS_FLASH_SIZE (64 * 1024 * 1024)
373
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
374
index XXXXXXX..XXXXXXX 100644
375
--- a/hw/arm/virt.c
376
+++ b/hw/arm/virt.c
377
@@ -XXX,XX +XXX,XX @@
378
#include "standard-headers/linux/input.h"
379
#include "hw/arm/smmuv3.h"
380
#include "hw/acpi/acpi.h"
381
+#include "target/arm/cpu-qom.h"
382
#include "target/arm/internals.h"
383
#include "target/arm/multiprocessing.h"
384
#include "hw/mem/pc-dimm.h"
385
diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c
386
index XXXXXXX..XXXXXXX 100644
387
--- a/hw/arm/xilinx_zynq.c
388
+++ b/hw/arm/xilinx_zynq.c
389
@@ -XXX,XX +XXX,XX @@
390
#include "sysemu/reset.h"
391
#include "qom/object.h"
392
#include "exec/tswap.h"
393
+#include "target/arm/cpu-qom.h"
394
395
#define TYPE_ZYNQ_MACHINE MACHINE_TYPE_NAME("xilinx-zynq-a9")
396
OBJECT_DECLARE_SIMPLE_TYPE(ZynqMachineState, ZYNQ_MACHINE)
397
diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c
398
index XXXXXXX..XXXXXXX 100644
399
--- a/hw/arm/xlnx-versal.c
400
+++ b/hw/arm/xlnx-versal.c
401
@@ -XXX,XX +XXX,XX @@
402
#include "hw/misc/unimp.h"
403
#include "hw/arm/xlnx-versal.h"
404
#include "qemu/log.h"
405
+#include "target/arm/cpu-qom.h"
406
407
#define XLNX_VERSAL_ACPU_TYPE ARM_CPU_TYPE_NAME("cortex-a72")
408
#define XLNX_VERSAL_RCPU_TYPE ARM_CPU_TYPE_NAME("cortex-r5f")
409
diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
410
index XXXXXXX..XXXXXXX 100644
411
--- a/hw/arm/xlnx-zynqmp.c
412
+++ b/hw/arm/xlnx-zynqmp.c
413
@@ -XXX,XX +XXX,XX @@
414
#include "sysemu/kvm.h"
415
#include "sysemu/sysemu.h"
416
#include "kvm_arm.h"
417
+#include "target/arm/cpu-qom.h"
418
419
#define GIC_NUM_SPI_INTR 160
420
35
421
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
36
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
422
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
423
--- a/target/arm/cpu.c
38
--- a/target/arm/cpu.c
424
+++ b/target/arm/cpu.c
39
+++ b/target/arm/cpu.c
425
@@ -XXX,XX +XXX,XX @@
40
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
426
#include "disas/capstone.h"
41
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32);
427
#include "fpu/softfloat.h"
42
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
428
#include "cpregs.h"
43
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]);
429
+#include "target/arm/cpu-qom.h"
44
- arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status);
430
45
- set_flush_to_zero(1, &env->vfp.ah_fp_status);
431
static void arm_cpu_set_pc(CPUState *cs, vaddr value)
46
- set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status);
432
{
47
+ arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]);
48
+ set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]);
49
+ set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_AH]);
50
arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]);
51
52
#ifndef CONFIG_USER_ONLY
53
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
54
index XXXXXXX..XXXXXXX 100644
55
--- a/target/arm/vfp_helper.c
56
+++ b/target/arm/vfp_helper.c
57
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
58
a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
59
& ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
60
/*
61
- * We do not merge in flags from ah_fp_status or FPST_AH_F16, because
62
+ * We do not merge in flags from FPST_AH or FPST_AH_F16, because
63
* they are used for insns that must not set the cumulative exception bits.
64
*/
65
66
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
67
set_float_exception_flags(0, &env->vfp.fp_status_f16_a64);
68
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
69
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
70
- set_float_exception_flags(0, &env->vfp.ah_fp_status);
71
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]);
72
set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]);
73
}
74
75
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
76
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64);
77
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
78
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64);
79
- set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status);
80
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]);
81
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]);
82
}
83
if (changed & FPCR_AH) {
433
--
84
--
434
2.34.1
85
2.34.1
435
86
436
87
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Rename to arm_build_mp_affinity. This frees up the name for
3
Replace with fp_status[FPST_A64_F16].
4
other usage, and emphasizes that the cpu object is not involved.
5
4
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20250129013857.135256-12-richard.henderson@linaro.org
9
Message-id: 20240118200643.29037-9-philmd@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
9
---
12
target/arm/cpu.h | 2 +-
10
target/arm/cpu.h | 1 -
13
hw/arm/npcm7xx.c | 2 +-
11
target/arm/cpu.c | 2 +-
14
hw/arm/sbsa-ref.c | 2 +-
12
target/arm/tcg/sme_helper.c | 2 +-
15
hw/arm/virt.c | 2 +-
13
target/arm/tcg/vec_helper.c | 9 ++++-----
16
target/arm/cpu.c | 6 +++---
14
target/arm/vfp_helper.c | 16 ++++++++--------
17
5 files changed, 7 insertions(+), 7 deletions(-)
15
5 files changed, 14 insertions(+), 16 deletions(-)
18
16
19
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
17
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
20
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/cpu.h
19
--- a/target/arm/cpu.h
22
+++ b/target/arm/cpu.h
20
+++ b/target/arm/cpu.h
23
@@ -XXX,XX +XXX,XX @@ void arm_cpu_post_init(Object *obj);
21
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
24
(ARM_AFF0_MASK | ARM_AFF1_MASK | ARM_AFF2_MASK | ARM_AFF3_MASK)
22
float_status fp_status_a32;
25
#define ARM64_AFFINITY_INVALID (~ARM64_AFFINITY_MASK)
23
float_status fp_status_a64;
26
24
float_status fp_status_f16_a32;
27
-uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz);
25
- float_status fp_status_f16_a64;
28
+uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz);
26
};
29
27
};
30
#ifndef CONFIG_USER_ONLY
28
31
extern const VMStateDescription vmstate_arm_cpu;
32
diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/hw/arm/npcm7xx.c
35
+++ b/hw/arm/npcm7xx.c
36
@@ -XXX,XX +XXX,XX @@ static void npcm7xx_realize(DeviceState *dev, Error **errp)
37
/* CPUs */
38
for (i = 0; i < nc->num_cpus; i++) {
39
object_property_set_int(OBJECT(&s->cpu[i]), "mp-affinity",
40
- arm_cpu_mp_affinity(i, NPCM7XX_MAX_NUM_CPUS),
41
+ arm_build_mp_affinity(i, NPCM7XX_MAX_NUM_CPUS),
42
&error_abort);
43
object_property_set_int(OBJECT(&s->cpu[i]), "reset-cbar",
44
NPCM7XX_GIC_CPU_IF_ADDR, &error_abort);
45
diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
46
index XXXXXXX..XXXXXXX 100644
47
--- a/hw/arm/sbsa-ref.c
48
+++ b/hw/arm/sbsa-ref.c
49
@@ -XXX,XX +XXX,XX @@ static const int sbsa_ref_irqmap[] = {
50
static uint64_t sbsa_ref_cpu_mp_affinity(SBSAMachineState *sms, int idx)
51
{
52
uint8_t clustersz = ARM_DEFAULT_CPUS_PER_CLUSTER;
53
- return arm_cpu_mp_affinity(idx, clustersz);
54
+ return arm_build_mp_affinity(idx, clustersz);
55
}
56
57
static void sbsa_fdt_add_gic_node(SBSAMachineState *sms)
58
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/hw/arm/virt.c
61
+++ b/hw/arm/virt.c
62
@@ -XXX,XX +XXX,XX @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
63
clustersz = GICV3_TARGETLIST_BITS;
64
}
65
}
66
- return arm_cpu_mp_affinity(idx, clustersz);
67
+ return arm_build_mp_affinity(idx, clustersz);
68
}
69
70
static inline bool *virt_get_high_memmap_enabled(VirtMachineState *vms,
71
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
29
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
72
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
73
--- a/target/arm/cpu.c
31
--- a/target/arm/cpu.c
74
+++ b/target/arm/cpu.c
32
+++ b/target/arm/cpu.c
75
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags)
33
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
34
arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
35
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]);
36
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32);
37
- arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
38
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
39
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]);
40
arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]);
41
set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]);
42
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/tcg/sme_helper.c
45
+++ b/target/arm/tcg/sme_helper.c
46
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
47
* produces default NaNs. We also need a second copy of fp_status with
48
* round-to-odd -- see above.
49
*/
50
- fpst_f16 = env->vfp.fp_status_f16_a64;
51
+ fpst_f16 = env->vfp.fp_status[FPST_A64_F16];
52
fpst_std = env->vfp.fp_status_a64;
53
set_default_nan_mode(true, &fpst_std);
54
set_default_nan_mode(true, &fpst_f16);
55
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/arm/tcg/vec_helper.c
58
+++ b/target/arm/tcg/vec_helper.c
59
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
60
}
76
}
61
}
62
do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc,
63
- get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64));
64
+ get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]));
77
}
65
}
78
66
79
-uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz)
67
void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
80
+uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz)
68
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
81
{
69
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
82
uint32_t Aff1 = idx / clustersz;
70
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
83
uint32_t Aff0 = idx % clustersz;
71
float_status *status = &env->vfp.fp_status_a64;
84
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
72
- bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64);
85
* so these bits always RAZ.
73
+ bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]);
86
*/
74
int negx = 0, negf = 0;
87
if (cpu->mp_affinity == ARM64_AFFINITY_INVALID) {
75
88
- cpu->mp_affinity = arm_cpu_mp_affinity(cs->cpu_index,
76
if (is_s) {
89
- ARM_DEFAULT_CPUS_PER_CLUSTER);
77
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
90
+ cpu->mp_affinity = arm_build_mp_affinity(cs->cpu_index,
78
}
91
+ ARM_DEFAULT_CPUS_PER_CLUSTER);
92
}
79
}
93
80
do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc,
94
if (cpu->reset_hivecs) {
81
- get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64));
82
+ get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]));
83
}
84
85
void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
86
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
87
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
88
intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16);
89
float_status *status = &env->vfp.fp_status_a64;
90
- bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64);
91
+ bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]);
92
int negx = 0, negf = 0;
93
94
if (is_s) {
95
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
96
negx = 0x8000;
97
}
98
}
99
-
100
for (i = 0; i < oprsz; i += 16) {
101
float16 mm_16 = *(float16 *)(vm + i + idx);
102
float32 mm = float16_to_float32_by_bits(mm_16, fz16);
103
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
104
index XXXXXXX..XXXXXXX 100644
105
--- a/target/arm/vfp_helper.c
106
+++ b/target/arm/vfp_helper.c
107
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
108
& ~float_flag_input_denormal_flushed);
109
110
a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64);
111
- a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
112
+ a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16])
113
& ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
114
/*
115
* We do not merge in flags from FPST_AH or FPST_AH_F16, because
116
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
117
set_float_exception_flags(0, &env->vfp.fp_status_a32);
118
set_float_exception_flags(0, &env->vfp.fp_status_a64);
119
set_float_exception_flags(0, &env->vfp.fp_status_f16_a32);
120
- set_float_exception_flags(0, &env->vfp.fp_status_f16_a64);
121
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]);
122
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
123
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
124
set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]);
125
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
126
set_float_rounding_mode(i, &env->vfp.fp_status_a32);
127
set_float_rounding_mode(i, &env->vfp.fp_status_a64);
128
set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32);
129
- set_float_rounding_mode(i, &env->vfp.fp_status_f16_a64);
130
+ set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]);
131
}
132
if (changed & FPCR_FZ16) {
133
bool ftz_enabled = val & FPCR_FZ16;
134
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
135
- set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
136
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
137
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
138
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
139
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
140
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
141
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
142
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
143
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
144
}
145
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
146
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32);
147
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64);
148
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
149
- set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64);
150
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]);
151
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]);
152
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]);
153
}
154
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
155
if (ah_enabled) {
156
/* Change behaviours for A64 FP operations */
157
arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64);
158
- arm_set_ah_fp_behaviours(&env->vfp.fp_status_f16_a64);
159
+ arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
160
} else {
161
arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
162
- arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
163
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
164
}
165
}
166
/*
95
--
167
--
96
2.34.1
168
2.34.1
97
169
98
170
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Declare arm_cpu_mp_affinity() prototype in the new
3
Replace with fp_status[FPST_A32_F16].
4
"target/arm/multiprocessing.h" header so units in
5
hw/arm/ can use it without having to include the huge
6
target-specific "cpu.h".
7
4
8
File list to include the new header generated using:
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
$ git grep -lw arm_cpu_mp_affinity
7
Message-id: 20250129013857.135256-13-richard.henderson@linaro.org
11
12
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
Message-id: 20240118200643.29037-11-philmd@linaro.org
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
---
9
---
17
target/arm/cpu.h | 6 +-----
10
target/arm/cpu.h | 1 -
18
target/arm/multiprocessing.h | 16 ++++++++++++++++
11
target/arm/cpu.c | 2 +-
19
hw/arm/virt-acpi-build.c | 1 +
12
target/arm/tcg/vec_helper.c | 4 ++--
20
hw/arm/virt.c | 1 +
13
target/arm/vfp_helper.c | 14 +++++++-------
21
hw/arm/xlnx-versal-virt.c | 1 +
14
4 files changed, 10 insertions(+), 11 deletions(-)
22
hw/misc/xlnx-versal-crl.c | 1 +
23
target/arm/arm-powerctl.c | 1 +
24
target/arm/cpu.c | 5 +++++
25
target/arm/hvf/hvf.c | 1 +
26
target/arm/tcg/psci.c | 1 +
27
10 files changed, 29 insertions(+), 5 deletions(-)
28
create mode 100644 target/arm/multiprocessing.h
29
15
30
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
16
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
31
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/cpu.h
18
--- a/target/arm/cpu.h
33
+++ b/target/arm/cpu.h
19
+++ b/target/arm/cpu.h
34
@@ -XXX,XX +XXX,XX @@
20
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
35
#include "cpu-qom.h"
21
struct {
36
#include "exec/cpu-defs.h"
22
float_status fp_status_a32;
37
#include "qapi/qapi-types-common.h"
23
float_status fp_status_a64;
38
+#include "target/arm/multiprocessing.h"
24
- float_status fp_status_f16_a32;
39
25
};
40
/* ARM processors have a weak memory model */
26
};
41
#define TCG_GUEST_DEFAULT_MO (0)
27
42
@@ -XXX,XX +XXX,XX @@ void arm_cpu_post_init(Object *obj);
43
44
uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz);
45
46
-static inline uint64_t arm_cpu_mp_affinity(ARMCPU *cpu)
47
-{
48
- return cpu->mp_affinity;
49
-}
50
-
51
#ifndef CONFIG_USER_ONLY
52
extern const VMStateDescription vmstate_arm_cpu;
53
54
diff --git a/target/arm/multiprocessing.h b/target/arm/multiprocessing.h
55
new file mode 100644
56
index XXXXXXX..XXXXXXX
57
--- /dev/null
58
+++ b/target/arm/multiprocessing.h
59
@@ -XXX,XX +XXX,XX @@
60
+/*
61
+ * ARM multiprocessor CPU helpers
62
+ *
63
+ * Copyright (c) 2003 Fabrice Bellard
64
+ *
65
+ * SPDX-License-Identifier: LGPL-2.1-or-later
66
+ */
67
+
68
+#ifndef TARGET_ARM_MULTIPROCESSING_H
69
+#define TARGET_ARM_MULTIPROCESSING_H
70
+
71
+#include "target/arm/cpu-qom.h"
72
+
73
+uint64_t arm_cpu_mp_affinity(ARMCPU *cpu);
74
+
75
+#endif
76
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
77
index XXXXXXX..XXXXXXX 100644
78
--- a/hw/arm/virt-acpi-build.c
79
+++ b/hw/arm/virt-acpi-build.c
80
@@ -XXX,XX +XXX,XX @@
81
#include "hw/acpi/ghes.h"
82
#include "hw/acpi/viot.h"
83
#include "hw/virtio/virtio-acpi.h"
84
+#include "target/arm/multiprocessing.h"
85
86
#define ARM_SPI_BASE 32
87
88
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
89
index XXXXXXX..XXXXXXX 100644
90
--- a/hw/arm/virt.c
91
+++ b/hw/arm/virt.c
92
@@ -XXX,XX +XXX,XX @@
93
#include "hw/arm/smmuv3.h"
94
#include "hw/acpi/acpi.h"
95
#include "target/arm/internals.h"
96
+#include "target/arm/multiprocessing.h"
97
#include "hw/mem/pc-dimm.h"
98
#include "hw/mem/nvdimm.h"
99
#include "hw/acpi/generic_event_device.h"
100
diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c
101
index XXXXXXX..XXXXXXX 100644
102
--- a/hw/arm/xlnx-versal-virt.c
103
+++ b/hw/arm/xlnx-versal-virt.c
104
@@ -XXX,XX +XXX,XX @@
105
#include "hw/qdev-properties.h"
106
#include "hw/arm/xlnx-versal.h"
107
#include "hw/arm/boot.h"
108
+#include "target/arm/multiprocessing.h"
109
#include "qom/object.h"
110
111
#define TYPE_XLNX_VERSAL_VIRT_MACHINE MACHINE_TYPE_NAME("xlnx-versal-virt")
112
diff --git a/hw/misc/xlnx-versal-crl.c b/hw/misc/xlnx-versal-crl.c
113
index XXXXXXX..XXXXXXX 100644
114
--- a/hw/misc/xlnx-versal-crl.c
115
+++ b/hw/misc/xlnx-versal-crl.c
116
@@ -XXX,XX +XXX,XX @@
117
#include "hw/resettable.h"
118
119
#include "target/arm/arm-powerctl.h"
120
+#include "target/arm/multiprocessing.h"
121
#include "hw/misc/xlnx-versal-crl.h"
122
123
#ifndef XLNX_VERSAL_CRL_ERR_DEBUG
124
diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c
125
index XXXXXXX..XXXXXXX 100644
126
--- a/target/arm/arm-powerctl.c
127
+++ b/target/arm/arm-powerctl.c
128
@@ -XXX,XX +XXX,XX @@
129
#include "qemu/log.h"
130
#include "qemu/main-loop.h"
131
#include "sysemu/tcg.h"
132
+#include "target/arm/multiprocessing.h"
133
134
#ifndef DEBUG_ARM_POWERCTL
135
#define DEBUG_ARM_POWERCTL 0
136
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
28
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
137
index XXXXXXX..XXXXXXX 100644
29
index XXXXXXX..XXXXXXX 100644
138
--- a/target/arm/cpu.c
30
--- a/target/arm/cpu.c
139
+++ b/target/arm/cpu.c
31
+++ b/target/arm/cpu.c
140
@@ -XXX,XX +XXX,XX @@ uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz)
32
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
141
return (Aff1 << ARM_AFF1_SHIFT) | Aff0;
33
arm_set_default_fp_behaviours(&env->vfp.fp_status_a32);
34
arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
35
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]);
36
- arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32);
37
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]);
38
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
39
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]);
40
arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]);
41
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
42
index XXXXXXX..XXXXXXX 100644
43
--- a/target/arm/tcg/vec_helper.c
44
+++ b/target/arm/tcg/vec_helper.c
45
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
46
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
47
48
do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
49
- get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32));
50
+ get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16]));
142
}
51
}
143
52
144
+uint64_t arm_cpu_mp_affinity(ARMCPU *cpu)
53
void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
145
+{
54
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
146
+ return cpu->mp_affinity;
55
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
147
+}
56
148
+
57
do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
149
static void arm_cpu_initfn(Object *obj)
58
- get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32));
150
{
59
+ get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16]));
151
ARMCPU *cpu = ARM_CPU(obj);
60
}
152
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
61
62
void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
63
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
153
index XXXXXXX..XXXXXXX 100644
64
index XXXXXXX..XXXXXXX 100644
154
--- a/target/arm/hvf/hvf.c
65
--- a/target/arm/vfp_helper.c
155
+++ b/target/arm/hvf/hvf.c
66
+++ b/target/arm/vfp_helper.c
156
@@ -XXX,XX +XXX,XX @@
67
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
157
#include "arm-powerctl.h"
68
a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32);
158
#include "target/arm/cpu.h"
69
a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]);
159
#include "target/arm/internals.h"
70
/* FZ16 does not generate an input denormal exception. */
160
+#include "target/arm/multiprocessing.h"
71
- a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32)
161
#include "trace/trace-target_arm_hvf.h"
72
+ a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16])
162
#include "migration/vmstate.h"
73
& ~float_flag_input_denormal_flushed);
163
74
a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16])
164
diff --git a/target/arm/tcg/psci.c b/target/arm/tcg/psci.c
75
& ~float_flag_input_denormal_flushed);
165
index XXXXXXX..XXXXXXX 100644
76
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
166
--- a/target/arm/tcg/psci.c
77
*/
167
+++ b/target/arm/tcg/psci.c
78
set_float_exception_flags(0, &env->vfp.fp_status_a32);
168
@@ -XXX,XX +XXX,XX @@
79
set_float_exception_flags(0, &env->vfp.fp_status_a64);
169
#include "sysemu/runstate.h"
80
- set_float_exception_flags(0, &env->vfp.fp_status_f16_a32);
170
#include "internals.h"
81
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]);
171
#include "arm-powerctl.h"
82
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]);
172
+#include "target/arm/multiprocessing.h"
83
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
173
84
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
174
bool arm_is_psci_call(ARMCPU *cpu, int excp_type)
85
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
175
{
86
}
87
set_float_rounding_mode(i, &env->vfp.fp_status_a32);
88
set_float_rounding_mode(i, &env->vfp.fp_status_a64);
89
- set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32);
90
+ set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]);
91
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]);
92
}
93
if (changed & FPCR_FZ16) {
94
bool ftz_enabled = val & FPCR_FZ16;
95
- set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
96
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]);
97
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
98
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
99
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
100
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
101
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]);
102
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
103
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
104
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
105
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
106
bool dnan_enabled = val & FPCR_DN;
107
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32);
108
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64);
109
- set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
110
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]);
111
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]);
112
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]);
113
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]);
114
@@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
115
softfloat_to_vfp_compare(env, \
116
FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \
117
}
118
-DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16_a32)
119
+DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16])
120
DO_VFP_cmp(s, float32, float32, fp_status_a32)
121
DO_VFP_cmp(d, float64, float64, fp_status_a32)
122
#undef DO_VFP_cmp
176
--
123
--
177
2.34.1
124
2.34.1
178
125
179
126
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Wrapper to return the mp affinity bits from the cpu.
3
Replace with fp_status[FPST_A64].
4
4
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20250129013857.135256-14-richard.henderson@linaro.org
8
Message-id: 20240118200643.29037-10-philmd@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
9
---
11
target/arm/cpu.h | 5 +++++
10
target/arm/cpu.h | 1 -
12
hw/arm/virt-acpi-build.c | 2 +-
11
target/arm/cpu.c | 2 +-
13
hw/arm/virt.c | 6 +++---
12
target/arm/tcg/sme_helper.c | 2 +-
14
hw/arm/xlnx-versal-virt.c | 3 ++-
13
target/arm/tcg/vec_helper.c | 10 +++++-----
15
hw/misc/xlnx-versal-crl.c | 4 ++--
14
target/arm/vfp_helper.c | 16 ++++++++--------
16
target/arm/arm-powerctl.c | 2 +-
15
5 files changed, 15 insertions(+), 16 deletions(-)
17
target/arm/hvf/hvf.c | 4 ++--
18
target/arm/tcg/psci.c | 2 +-
19
8 files changed, 17 insertions(+), 11 deletions(-)
20
16
21
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
17
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
22
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
23
--- a/target/arm/cpu.h
19
--- a/target/arm/cpu.h
24
+++ b/target/arm/cpu.h
20
+++ b/target/arm/cpu.h
25
@@ -XXX,XX +XXX,XX @@ void arm_cpu_post_init(Object *obj);
21
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
26
22
float_status fp_status[FPST_COUNT];
27
uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz);
23
struct {
28
24
float_status fp_status_a32;
29
+static inline uint64_t arm_cpu_mp_affinity(ARMCPU *cpu)
25
- float_status fp_status_a64;
30
+{
26
};
31
+ return cpu->mp_affinity;
27
};
32
+}
28
33
+
29
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
34
#ifndef CONFIG_USER_ONLY
35
extern const VMStateDescription vmstate_arm_cpu;
36
37
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
38
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
39
--- a/hw/arm/virt-acpi-build.c
31
--- a/target/arm/cpu.c
40
+++ b/hw/arm/virt-acpi-build.c
32
+++ b/target/arm/cpu.c
41
@@ -XXX,XX +XXX,XX @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
33
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
42
build_append_int_noprefix(table_data, vgic_interrupt, 4);
34
set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]);
43
build_append_int_noprefix(table_data, 0, 8); /* GICR Base Address*/
35
set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]);
44
/* MPIDR */
36
arm_set_default_fp_behaviours(&env->vfp.fp_status_a32);
45
- build_append_int_noprefix(table_data, armcpu->mp_affinity, 8);
37
- arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
46
+ build_append_int_noprefix(table_data, arm_cpu_mp_affinity(armcpu), 8);
38
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
47
/* Processor Power Efficiency Class */
39
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]);
48
build_append_int_noprefix(table_data, 0, 1);
40
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]);
49
/* Reserved */
41
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
50
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
42
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
51
index XXXXXXX..XXXXXXX 100644
43
index XXXXXXX..XXXXXXX 100644
52
--- a/hw/arm/virt.c
44
--- a/target/arm/tcg/sme_helper.c
53
+++ b/hw/arm/virt.c
45
+++ b/target/arm/tcg/sme_helper.c
54
@@ -XXX,XX +XXX,XX @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms)
46
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
55
for (cpu = 0; cpu < smp_cpus; cpu++) {
47
* round-to-odd -- see above.
56
ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));
48
*/
57
49
fpst_f16 = env->vfp.fp_status[FPST_A64_F16];
58
- if (armcpu->mp_affinity & ARM_AFF3_MASK) {
50
- fpst_std = env->vfp.fp_status_a64;
59
+ if (arm_cpu_mp_affinity(armcpu) & ARM_AFF3_MASK) {
51
+ fpst_std = env->vfp.fp_status[FPST_A64];
60
addr_cells = 2;
52
set_default_nan_mode(true, &fpst_std);
53
set_default_nan_mode(true, &fpst_f16);
54
fpst_odd = fpst_std;
55
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/arm/tcg/vec_helper.c
58
+++ b/target/arm/tcg/vec_helper.c
59
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
60
negx = 0x8000800080008000ull;
61
}
62
}
63
- do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc,
64
+ do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc,
65
get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]));
66
}
67
68
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
69
intptr_t i, oprsz = simd_oprsz(desc);
70
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
71
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
72
- float_status *status = &env->vfp.fp_status_a64;
73
+ float_status *status = &env->vfp.fp_status[FPST_A64];
74
bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]);
75
int negx = 0, negf = 0;
76
77
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
78
negx = 0x8000800080008000ull;
79
}
80
}
81
- do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc,
82
+ do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc,
83
get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]));
84
}
85
86
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
87
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
88
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
89
intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16);
90
- float_status *status = &env->vfp.fp_status_a64;
91
+ float_status *status = &env->vfp.fp_status[FPST_A64];
92
bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]);
93
int negx = 0, negf = 0;
94
95
@@ -XXX,XX +XXX,XX @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp)
96
*/
97
bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF;
98
99
- *statusp = is_a64(env) ? env->vfp.fp_status_a64 : env->vfp.fp_status_a32;
100
+ *statusp = is_a64(env) ? env->vfp.fp_status[FPST_A64] : env->vfp.fp_status_a32;
101
set_default_nan_mode(true, statusp);
102
103
if (ebf) {
104
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
105
index XXXXXXX..XXXXXXX 100644
106
--- a/target/arm/vfp_helper.c
107
+++ b/target/arm/vfp_helper.c
108
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
109
a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16])
110
& ~float_flag_input_denormal_flushed);
111
112
- a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64);
113
+ a64_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A64]);
114
a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16])
115
& ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
116
/*
117
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
118
* be the architecturally up-to-date exception flag information first.
119
*/
120
set_float_exception_flags(0, &env->vfp.fp_status_a32);
121
- set_float_exception_flags(0, &env->vfp.fp_status_a64);
122
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]);
123
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]);
124
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]);
125
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
126
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
61
break;
127
break;
62
}
128
}
63
@@ -XXX,XX +XXX,XX @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms)
129
set_float_rounding_mode(i, &env->vfp.fp_status_a32);
64
130
- set_float_rounding_mode(i, &env->vfp.fp_status_a64);
65
if (addr_cells == 2) {
131
+ set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]);
66
qemu_fdt_setprop_u64(ms->fdt, nodename, "reg",
132
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]);
67
- armcpu->mp_affinity);
133
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]);
68
+ arm_cpu_mp_affinity(armcpu));
134
}
135
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
136
if (changed & FPCR_FZ) {
137
bool ftz_enabled = val & FPCR_FZ;
138
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
139
- set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64);
140
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]);
141
/* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */
142
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
143
}
144
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
145
*/
146
bool fitz_enabled = (val & FPCR_FIZ) ||
147
(val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ;
148
- set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status_a64);
149
+ set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_A64]);
150
}
151
if (changed & FPCR_DN) {
152
bool dnan_enabled = val & FPCR_DN;
153
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32);
154
- set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64);
155
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]);
156
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]);
157
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]);
158
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]);
159
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
160
161
if (ah_enabled) {
162
/* Change behaviours for A64 FP operations */
163
- arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64);
164
+ arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
165
arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
69
} else {
166
} else {
70
qemu_fdt_setprop_cell(ms->fdt, nodename, "reg",
167
- arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
71
- armcpu->mp_affinity);
168
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
72
+ arm_cpu_mp_affinity(armcpu));
169
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
73
}
74
75
if (ms->possible_cpus->cpus[cs->cpu_index].props.has_node_id) {
76
diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c
77
index XXXXXXX..XXXXXXX 100644
78
--- a/hw/arm/xlnx-versal-virt.c
79
+++ b/hw/arm/xlnx-versal-virt.c
80
@@ -XXX,XX +XXX,XX @@ static void fdt_add_cpu_nodes(VersalVirt *s, uint32_t psci_conduit)
81
ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(i));
82
83
qemu_fdt_add_subnode(s->fdt, name);
84
- qemu_fdt_setprop_cell(s->fdt, name, "reg", armcpu->mp_affinity);
85
+ qemu_fdt_setprop_cell(s->fdt, name, "reg",
86
+ arm_cpu_mp_affinity(armcpu));
87
if (psci_conduit != QEMU_PSCI_CONDUIT_DISABLED) {
88
qemu_fdt_setprop_string(s->fdt, name, "enable-method", "psci");
89
}
90
diff --git a/hw/misc/xlnx-versal-crl.c b/hw/misc/xlnx-versal-crl.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/hw/misc/xlnx-versal-crl.c
93
+++ b/hw/misc/xlnx-versal-crl.c
94
@@ -XXX,XX +XXX,XX @@ static void crl_reset_cpu(XlnxVersalCRL *s, ARMCPU *armcpu,
95
bool rst_old, bool rst_new)
96
{
97
if (rst_new) {
98
- arm_set_cpu_off(armcpu->mp_affinity);
99
+ arm_set_cpu_off(arm_cpu_mp_affinity(armcpu));
100
} else {
101
- arm_set_cpu_on_and_reset(armcpu->mp_affinity);
102
+ arm_set_cpu_on_and_reset(arm_cpu_mp_affinity(armcpu));
103
}
104
}
105
106
diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c
107
index XXXXXXX..XXXXXXX 100644
108
--- a/target/arm/arm-powerctl.c
109
+++ b/target/arm/arm-powerctl.c
110
@@ -XXX,XX +XXX,XX @@ CPUState *arm_get_cpu_by_id(uint64_t id)
111
CPU_FOREACH(cpu) {
112
ARMCPU *armcpu = ARM_CPU(cpu);
113
114
- if (armcpu->mp_affinity == id) {
115
+ if (arm_cpu_mp_affinity(armcpu) == id) {
116
return cpu;
117
}
170
}
118
}
171
}
119
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
120
index XXXXXXX..XXXXXXX 100644
121
--- a/target/arm/hvf/hvf.c
122
+++ b/target/arm/hvf/hvf.c
123
@@ -XXX,XX +XXX,XX @@ static void hvf_raise_exception(CPUState *cpu, uint32_t excp,
124
125
static void hvf_psci_cpu_off(ARMCPU *arm_cpu)
126
{
127
- int32_t ret = arm_set_cpu_off(arm_cpu->mp_affinity);
128
+ int32_t ret = arm_set_cpu_off(arm_cpu_mp_affinity(arm_cpu));
129
assert(ret == QEMU_ARM_POWERCTL_RET_SUCCESS);
130
}
131
132
@@ -XXX,XX +XXX,XX @@ static bool hvf_handle_psci_call(CPUState *cpu)
133
int32_t ret = 0;
134
135
trace_hvf_psci_call(param[0], param[1], param[2], param[3],
136
- arm_cpu->mp_affinity);
137
+ arm_cpu_mp_affinity(arm_cpu));
138
139
switch (param[0]) {
140
case QEMU_PSCI_0_2_FN_PSCI_VERSION:
141
diff --git a/target/arm/tcg/psci.c b/target/arm/tcg/psci.c
142
index XXXXXXX..XXXXXXX 100644
143
--- a/target/arm/tcg/psci.c
144
+++ b/target/arm/tcg/psci.c
145
@@ -XXX,XX +XXX,XX @@ err:
146
return;
147
148
cpu_off:
149
- ret = arm_set_cpu_off(cpu->mp_affinity);
150
+ ret = arm_set_cpu_off(arm_cpu_mp_affinity(cpu));
151
/* notreached */
152
/* sanity check in case something failed */
153
assert(ret == QEMU_ARM_POWERCTL_RET_SUCCESS);
154
--
172
--
155
2.34.1
173
2.34.1
156
174
157
175
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Missed in commit 2d56be5a29 ("target: Declare
3
Replace with fp_status[FPST_A32]. As this was the last of the
4
FOO_CPU_TYPE_NAME/SUFFIX in 'cpu-qom.h'"). See
4
old structures, we can remove the anonymous union and struct.
5
it for more details.
6
5
7
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
Message-id: 20240118200643.29037-12-philmd@linaro.org
8
Message-id: 20250129013857.135256-15-richard.henderson@linaro.org
9
[PMM: tweak to account for change to is_ebf()]
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
11
---
12
target/arm/cpu-qom.h | 3 +++
12
target/arm/cpu.h | 7 +------
13
target/arm/cpu.h | 2 --
13
target/arm/cpu.c | 2 +-
14
2 files changed, 3 insertions(+), 2 deletions(-)
14
target/arm/tcg/vec_helper.c | 2 +-
15
target/arm/vfp_helper.c | 18 +++++++++---------
16
4 files changed, 12 insertions(+), 17 deletions(-)
15
17
16
diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/cpu-qom.h
19
+++ b/target/arm/cpu-qom.h
20
@@ -XXX,XX +XXX,XX @@ typedef struct AArch64CPUClass AArch64CPUClass;
21
DECLARE_CLASS_CHECKERS(AArch64CPUClass, AARCH64_CPU,
22
TYPE_AARCH64_CPU)
23
24
+#define ARM_CPU_TYPE_SUFFIX "-" TYPE_ARM_CPU
25
+#define ARM_CPU_TYPE_NAME(name) (name ARM_CPU_TYPE_SUFFIX)
26
+
27
#endif
28
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
18
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
29
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/cpu.h
20
--- a/target/arm/cpu.h
31
+++ b/target/arm/cpu.h
21
+++ b/target/arm/cpu.h
32
@@ -XXX,XX +XXX,XX @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
22
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
33
#define ARM_CPUID_TI915T 0x54029152
23
uint32_t scratch[8];
34
#define ARM_CPUID_TI925T 0x54029252
24
35
25
/* There are a number of distinct float control structures. */
36
-#define ARM_CPU_TYPE_SUFFIX "-" TYPE_ARM_CPU
26
- union {
37
-#define ARM_CPU_TYPE_NAME(name) (name ARM_CPU_TYPE_SUFFIX)
27
- float_status fp_status[FPST_COUNT];
38
#define CPU_RESOLVING_TYPE TYPE_ARM_CPU
28
- struct {
39
29
- float_status fp_status_a32;
40
#define TYPE_ARM_HOST_CPU "host-" TYPE_ARM_CPU
30
- };
31
- };
32
+ float_status fp_status[FPST_COUNT];
33
34
uint64_t zcr_el[4]; /* ZCR_EL[1-3] */
35
uint64_t smcr_el[4]; /* SMCR_EL[1-3] */
36
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/cpu.c
39
+++ b/target/arm/cpu.c
40
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
41
set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]);
42
set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]);
43
set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]);
44
- arm_set_default_fp_behaviours(&env->vfp.fp_status_a32);
45
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32]);
46
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
47
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]);
48
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]);
49
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/target/arm/tcg/vec_helper.c
52
+++ b/target/arm/tcg/vec_helper.c
53
@@ -XXX,XX +XXX,XX @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp)
54
*/
55
bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF;
56
57
- *statusp = is_a64(env) ? env->vfp.fp_status[FPST_A64] : env->vfp.fp_status_a32;
58
+ *statusp = env->vfp.fp_status[is_a64(env) ? FPST_A64 : FPST_A32];
59
set_default_nan_mode(true, statusp);
60
61
if (ebf) {
62
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/arm/vfp_helper.c
65
+++ b/target/arm/vfp_helper.c
66
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
67
{
68
uint32_t a32_flags = 0, a64_flags = 0;
69
70
- a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32);
71
+ a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A32]);
72
a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]);
73
/* FZ16 does not generate an input denormal exception. */
74
a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16])
75
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
76
* values. The caller should have arranged for env->vfp.fpsr to
77
* be the architecturally up-to-date exception flag information first.
78
*/
79
- set_float_exception_flags(0, &env->vfp.fp_status_a32);
80
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32]);
81
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]);
82
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]);
83
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]);
84
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
85
i = float_round_to_zero;
86
break;
87
}
88
- set_float_rounding_mode(i, &env->vfp.fp_status_a32);
89
+ set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32]);
90
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]);
91
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]);
92
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]);
93
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
94
}
95
if (changed & FPCR_FZ) {
96
bool ftz_enabled = val & FPCR_FZ;
97
- set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
98
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]);
99
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]);
100
/* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */
101
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
102
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]);
103
}
104
if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) {
105
/*
106
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
107
}
108
if (changed & FPCR_DN) {
109
bool dnan_enabled = val & FPCR_DN;
110
- set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32);
111
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32]);
112
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]);
113
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]);
114
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]);
115
@@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
116
FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \
117
}
118
DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16])
119
-DO_VFP_cmp(s, float32, float32, fp_status_a32)
120
-DO_VFP_cmp(d, float64, float64, fp_status_a32)
121
+DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32])
122
+DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32])
123
#undef DO_VFP_cmp
124
125
/* Integer to float and float to integer conversions */
126
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(fjcvtzs)(float64 value, float_status *status)
127
128
uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env)
129
{
130
- uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status_a32);
131
+ uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status[FPST_A32]);
132
uint32_t result = pair;
133
uint32_t z = (pair >> 32) == 0;
134
41
--
135
--
42
2.34.1
136
2.34.1
43
137
44
138
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
hw/arm/xilinx_zynq.c calls tswap32() which is declared
3
Select on index instead of pointer.
4
in "exec/tswap.h". Include it in order to avoid when
4
No functional change.
5
refactoring unrelated headers:
6
5
7
hw/arm/xilinx_zynq.c:103:31: error: call to undeclared function 'tswap32';
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
board_setup_blob[n] = tswap32(board_setup_blob[n]);
8
Message-id: 20250129013857.135256-16-richard.henderson@linaro.org
10
^
11
12
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
Message-id: 20240118200643.29037-3-philmd@linaro.org
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
---
10
---
17
hw/arm/xilinx_zynq.c | 1 +
11
target/arm/tcg/mve_helper.c | 40 +++++++++++++------------------------
18
1 file changed, 1 insertion(+)
12
1 file changed, 14 insertions(+), 26 deletions(-)
19
13
20
diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c
14
diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c
21
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/arm/xilinx_zynq.c
16
--- a/target/arm/tcg/mve_helper.c
23
+++ b/hw/arm/xilinx_zynq.c
17
+++ b/target/arm/tcg/mve_helper.c
24
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN)
25
#include "hw/qdev-clock.h"
19
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
26
#include "sysemu/reset.h"
20
continue; \
27
#include "qom/object.h"
21
} \
28
+#include "exec/tswap.h"
22
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
29
23
- &env->vfp.fp_status[FPST_STD]; \
30
#define TYPE_ZYNQ_MACHINE MACHINE_TYPE_NAME("xilinx-zynq-a9")
24
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
31
OBJECT_DECLARE_SIMPLE_TYPE(ZynqMachineState, ZYNQ_MACHINE)
25
if (!(mask & 1)) { \
26
/* We need the result but without updating flags */ \
27
scratch_fpst = *fpst; \
28
@@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma)
29
r[e] = 0; \
30
continue; \
31
} \
32
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
33
- &env->vfp.fp_status[FPST_STD]; \
34
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
35
if (!(tm & 1)) { \
36
/* We need the result but without updating flags */ \
37
scratch_fpst = *fpst; \
38
@@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub)
39
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
40
continue; \
41
} \
42
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
43
- &env->vfp.fp_status[FPST_STD]; \
44
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
45
if (!(mask & 1)) { \
46
/* We need the result but without updating flags */ \
47
scratch_fpst = *fpst; \
48
@@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true)
49
if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \
50
continue; \
51
} \
52
- fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
53
- &env->vfp.fp_status[FPST_STD]; \
54
+ fpst0 = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
55
fpst1 = fpst0; \
56
if (!(mask & 1)) { \
57
scratch_fpst = *fpst0; \
58
@@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS)
59
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
60
continue; \
61
} \
62
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
63
- &env->vfp.fp_status[FPST_STD]; \
64
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
65
if (!(mask & 1)) { \
66
/* We need the result but without updating flags */ \
67
scratch_fpst = *fpst; \
68
@@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul)
69
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
70
continue; \
71
} \
72
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
73
- &env->vfp.fp_status[FPST_STD]; \
74
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
75
if (!(mask & 1)) { \
76
/* We need the result but without updating flags */ \
77
scratch_fpst = *fpst; \
78
@@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS)
79
unsigned e; \
80
TYPE *m = vm; \
81
TYPE ra = (TYPE)ra_in; \
82
- float_status *fpst = (ESIZE == 2) ? \
83
- &env->vfp.fp_status[FPST_STD_F16] : \
84
- &env->vfp.fp_status[FPST_STD]; \
85
+ float_status *fpst = \
86
+ &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
87
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
88
if (mask & 1) { \
89
TYPE v = m[H##ESIZE(e)]; \
90
@@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
91
if ((mask & emask) == 0) { \
92
continue; \
93
} \
94
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
95
- &env->vfp.fp_status[FPST_STD]; \
96
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
97
if (!(mask & (1 << (e * ESIZE)))) { \
98
/* We need the result but without updating flags */ \
99
scratch_fpst = *fpst; \
100
@@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
101
if ((mask & emask) == 0) { \
102
continue; \
103
} \
104
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
105
- &env->vfp.fp_status[FPST_STD]; \
106
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
107
if (!(mask & (1 << (e * ESIZE)))) { \
108
/* We need the result but without updating flags */ \
109
scratch_fpst = *fpst; \
110
@@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32)
111
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
112
continue; \
113
} \
114
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
115
- &env->vfp.fp_status[FPST_STD]; \
116
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
117
if (!(mask & 1)) { \
118
/* We need the result but without updating flags */ \
119
scratch_fpst = *fpst; \
120
@@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero)
121
unsigned e; \
122
float_status *fpst; \
123
float_status scratch_fpst; \
124
- float_status *base_fpst = (ESIZE == 2) ? \
125
- &env->vfp.fp_status[FPST_STD_F16] : \
126
- &env->vfp.fp_status[FPST_STD]; \
127
+ float_status *base_fpst = \
128
+ &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
129
uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \
130
set_float_rounding_mode(rmode, base_fpst); \
131
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
132
@@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm)
133
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
134
continue; \
135
} \
136
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
137
- &env->vfp.fp_status[FPST_STD]; \
138
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
139
if (!(mask & 1)) { \
140
/* We need the result but without updating flags */ \
141
scratch_fpst = *fpst; \
32
--
142
--
33
2.34.1
143
2.34.1
34
144
35
145
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
hw/arm/exynos4210.c calls tswap32() which is declared
3
Pass ARMFPStatusFlavour index instead of fp_status[FOO].
4
in "exec/tswap.h". Include it in order to avoid when
5
refactoring unrelated headers:
6
4
7
hw/arm/exynos4210.c:499:22: error: call to undeclared function 'tswap32';
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
smpboot[n] = tswap32(smpboot[n]);
7
Message-id: 20250129013857.135256-17-richard.henderson@linaro.org
10
^
11
12
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
Message-id: 20240118200643.29037-2-philmd@linaro.org
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
---
9
---
17
hw/arm/exynos4210.c | 1 +
10
target/arm/vfp_helper.c | 10 +++++-----
18
1 file changed, 1 insertion(+)
11
1 file changed, 5 insertions(+), 5 deletions(-)
19
12
20
diff --git a/hw/arm/exynos4210.c b/hw/arm/exynos4210.c
13
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
21
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/arm/exynos4210.c
15
--- a/target/arm/vfp_helper.c
23
+++ b/hw/arm/exynos4210.c
16
+++ b/target/arm/vfp_helper.c
24
@@ -XXX,XX +XXX,XX @@
17
@@ -XXX,XX +XXX,XX @@ static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp)
25
18
void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
26
#include "qemu/osdep.h"
19
{ \
27
#include "qapi/error.h"
20
softfloat_to_vfp_compare(env, \
28
+#include "exec/tswap.h"
21
- FLOATTYPE ## _compare_quiet(a, b, &env->vfp.FPST)); \
29
#include "cpu.h"
22
+ FLOATTYPE ## _compare_quiet(a, b, &env->vfp.fp_status[FPST])); \
30
#include "hw/cpu/a9mpcore.h"
23
} \
31
#include "hw/irq.h"
24
void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
25
{ \
26
softfloat_to_vfp_compare(env, \
27
- FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \
28
+ FLOATTYPE ## _compare(a, b, &env->vfp.fp_status[FPST])); \
29
}
30
-DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16])
31
-DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32])
32
-DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32])
33
+DO_VFP_cmp(h, float16, dh_ctype_f16, FPST_A32_F16)
34
+DO_VFP_cmp(s, float32, float32, FPST_A32)
35
+DO_VFP_cmp(d, float64, float64, FPST_A32)
36
#undef DO_VFP_cmp
37
38
/* Integer to float and float to integer conversions */
32
--
39
--
33
2.34.1
40
2.34.1
34
41
35
42
diff view generated by jsdifflib
1
From: Guenter Roeck <linux@roeck-us.net>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Add watchdog timer support to Allwinner-H40 and Bananapi.
3
Read the bit from the source, rather than from the proxy via
4
The watchdog timer is added as an overlay to the Timer
4
get_flush_inputs_to_zero. This makes it clear that it does
5
module memory map.
5
not matter which of the float_status structures is used.
6
6
7
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Strahinja Jankovic <strahinja.p.jankovic@gmail.com>
8
Message-id: 20250129013857.135256-34-richard.henderson@linaro.org
9
Message-id: 20240115182757.1095012-4-linux@roeck-us.net
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
11
---
12
docs/system/arm/bananapi_m2u.rst | 2 +-
12
target/arm/tcg/vec_helper.c | 12 ++++++------
13
include/hw/arm/allwinner-r40.h | 3 +++
13
1 file changed, 6 insertions(+), 6 deletions(-)
14
hw/arm/allwinner-r40.c | 8 ++++++++
15
hw/arm/Kconfig | 1 +
16
4 files changed, 13 insertions(+), 1 deletion(-)
17
14
18
diff --git a/docs/system/arm/bananapi_m2u.rst b/docs/system/arm/bananapi_m2u.rst
15
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
19
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
20
--- a/docs/system/arm/bananapi_m2u.rst
17
--- a/target/arm/tcg/vec_helper.c
21
+++ b/docs/system/arm/bananapi_m2u.rst
18
+++ b/target/arm/tcg/vec_helper.c
22
@@ -XXX,XX +XXX,XX @@ The Banana Pi M2U machine supports the following devices:
19
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
23
* SATA
20
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
24
* TWI (I2C)
21
25
* USB 2.0
22
do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
26
+ * Hardware Watchdog
23
- get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16]));
27
24
+ env->vfp.fpcr & FPCR_FZ16);
28
Limitations
25
}
29
"""""""""""
26
30
@@ -XXX,XX +XXX,XX @@ Currently, Banana Pi M2U does *not* support the following features:
27
void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
31
28
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
32
- Graphical output via HDMI, GPU and/or the Display Engine
29
}
33
- Audio output
30
}
34
-- Hardware Watchdog
31
do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc,
35
- Real Time Clock
32
- get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]));
36
33
+ env->vfp.fpcr & FPCR_FZ16);
37
Also see the 'unimplemented' array in the Allwinner R40 SoC module
34
}
38
diff --git a/include/hw/arm/allwinner-r40.h b/include/hw/arm/allwinner-r40.h
35
39
index XXXXXXX..XXXXXXX 100644
36
void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
40
--- a/include/hw/arm/allwinner-r40.h
37
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
41
+++ b/include/hw/arm/allwinner-r40.h
38
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
42
@@ -XXX,XX +XXX,XX @@
39
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
43
#include "hw/net/allwinner-sun8i-emac.h"
40
float_status *status = &env->vfp.fp_status[FPST_A64];
44
#include "hw/usb/hcd-ohci.h"
41
- bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]);
45
#include "hw/usb/hcd-ehci.h"
42
+ bool fz16 = env->vfp.fpcr & FPCR_FZ16;
46
+#include "hw/watchdog/allwinner-wdt.h"
43
int negx = 0, negf = 0;
47
#include "target/arm/cpu.h"
44
48
#include "sysemu/block-backend.h"
45
if (is_s) {
49
46
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
50
@@ -XXX,XX +XXX,XX @@ enum {
47
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
51
AW_R40_DEV_OHCI2,
48
52
AW_R40_DEV_CCU,
49
do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
53
AW_R40_DEV_PIT,
50
- get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16]));
54
+ AW_R40_DEV_WDT,
51
+ env->vfp.fpcr & FPCR_FZ16);
55
AW_R40_DEV_UART0,
52
}
56
AW_R40_DEV_UART1,
53
57
AW_R40_DEV_UART2,
54
void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
58
@@ -XXX,XX +XXX,XX @@ struct AwR40State {
55
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
59
const hwaddr *memmap;
56
}
60
AwSRAMCState sramc;
57
}
61
AwA10PITState timer;
58
do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc,
62
+ AwWdtState wdt;
59
- get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]));
63
AllwinnerAHCIState sata;
60
+ env->vfp.fpcr & FPCR_FZ16);
64
AwSdHostState mmc[AW_R40_NUM_MMCS];
61
}
65
EHCISysBusState ehci[AW_R40_NUM_USB];
62
66
diff --git a/hw/arm/allwinner-r40.c b/hw/arm/allwinner-r40.c
63
void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
67
index XXXXXXX..XXXXXXX 100644
64
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
68
--- a/hw/arm/allwinner-r40.c
65
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
69
+++ b/hw/arm/allwinner-r40.c
66
intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16);
70
@@ -XXX,XX +XXX,XX @@ const hwaddr allwinner_r40_memmap[] = {
67
float_status *status = &env->vfp.fp_status[FPST_A64];
71
[AW_R40_DEV_OHCI2] = 0x01c1c400,
68
- bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]);
72
[AW_R40_DEV_CCU] = 0x01c20000,
69
+ bool fz16 = env->vfp.fpcr & FPCR_FZ16;
73
[AW_R40_DEV_PIT] = 0x01c20c00,
70
int negx = 0, negf = 0;
74
+ [AW_R40_DEV_WDT] = 0x01c20c90,
71
75
[AW_R40_DEV_UART0] = 0x01c28000,
72
if (is_s) {
76
[AW_R40_DEV_UART1] = 0x01c28400,
77
[AW_R40_DEV_UART2] = 0x01c28800,
78
@@ -XXX,XX +XXX,XX @@ static void allwinner_r40_init(Object *obj)
79
object_property_add_alias(obj, "clk1-freq", OBJECT(&s->timer),
80
"clk1-freq");
81
82
+ object_initialize_child(obj, "wdt", &s->wdt, TYPE_AW_WDT_SUN4I);
83
+
84
object_initialize_child(obj, "ccu", &s->ccu, TYPE_AW_R40_CCU);
85
86
for (int i = 0; i < AW_R40_NUM_MMCS; i++) {
87
@@ -XXX,XX +XXX,XX @@ static void allwinner_r40_realize(DeviceState *dev, Error **errp)
88
sysbus_connect_irq(SYS_BUS_DEVICE(&s->emac), 0,
89
qdev_get_gpio_in(DEVICE(&s->gic), AW_R40_GIC_SPI_EMAC));
90
91
+ /* WDT */
92
+ sysbus_realize(SYS_BUS_DEVICE(&s->wdt), &error_fatal);
93
+ sysbus_mmio_map_overlap(SYS_BUS_DEVICE(&s->wdt), 0,
94
+ allwinner_r40_memmap[AW_R40_DEV_WDT], 1);
95
+
96
/* Unimplemented devices */
97
for (unsigned i = 0; i < ARRAY_SIZE(r40_unimplemented); i++) {
98
create_unimplemented_device(r40_unimplemented[i].device_name,
99
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
100
index XXXXXXX..XXXXXXX 100644
101
--- a/hw/arm/Kconfig
102
+++ b/hw/arm/Kconfig
103
@@ -XXX,XX +XXX,XX @@ config ALLWINNER_R40
104
select AHCI
105
select ALLWINNER_SRAMC
106
select ALLWINNER_A10_PIT
107
+ select ALLWINNER_WDT
108
select AXP2XX_PMU
109
select SERIAL
110
select ARM_TIMER
111
--
73
--
112
2.34.1
74
2.34.1
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The TUSB6010 USB controller is soldered on the N800 and N810
3
Sink common code from the callers into do_fmlal
4
tablets, thus is always present.
4
and do_fmlal_idx. Reorder the arguments to minimize
5
the re-sorting from the caller's arguments.
5
6
6
This is a migration compatibility break for the n800/n810
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
machines started with the '-usb none' option.
8
Message-id: 20250129013857.135256-35-richard.henderson@linaro.org
8
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Message-id: 20240119215106.45776-3-philmd@linaro.org
11
[PMM: fixed commit message typo]
12
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
---
11
---
15
hw/arm/nseries.c | 4 +---
12
target/arm/tcg/vec_helper.c | 28 ++++++++++++++++------------
16
1 file changed, 1 insertion(+), 3 deletions(-)
13
1 file changed, 16 insertions(+), 12 deletions(-)
17
14
18
diff --git a/hw/arm/nseries.c b/hw/arm/nseries.c
15
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
19
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/arm/nseries.c
17
--- a/target/arm/tcg/vec_helper.c
21
+++ b/hw/arm/nseries.c
18
+++ b/target/arm/tcg/vec_helper.c
22
@@ -XXX,XX +XXX,XX @@ static void n8x0_init(MachineState *machine,
19
@@ -XXX,XX +XXX,XX @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2)
23
n8x0_spi_setup(s);
20
* as there is not yet SVE versions that might use blocking.
24
n8x0_dss_setup(s);
21
*/
25
n8x0_cbus_setup(s);
22
26
- if (machine_usb(machine)) {
23
-static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst,
27
- n8x0_usb_setup(s);
24
- uint64_t negx, int negf, uint32_t desc, bool fz16)
28
- }
25
+static void do_fmlal(float32 *d, void *vn, void *vm,
29
+ n8x0_usb_setup(s);
26
+ CPUARMState *env, uint32_t desc,
30
27
+ ARMFPStatusFlavour fpst_idx,
31
if (machine->kernel_filename) {
28
+ uint64_t negx, int negf)
32
/* Or at the linux loader. */
29
{
30
+ float_status *fpst = &env->vfp.fp_status[fpst_idx];
31
+ bool fz16 = env->vfp.fpcr & FPCR_FZ16;
32
intptr_t i, oprsz = simd_oprsz(desc);
33
int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
34
int is_q = oprsz == 16;
35
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
36
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
37
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
38
39
- do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
40
- env->vfp.fpcr & FPCR_FZ16);
41
+ do_fmlal(vd, vn, vm, env, desc, FPST_STD, negx, 0);
42
}
43
44
void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
45
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
46
negx = 0x8000800080008000ull;
47
}
48
}
49
- do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc,
50
- env->vfp.fpcr & FPCR_FZ16);
51
+ do_fmlal(vd, vn, vm, env, desc, FPST_A64, negx, negf);
52
}
53
54
void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
55
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
56
}
57
}
58
59
-static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst,
60
- uint64_t negx, int negf, uint32_t desc, bool fz16)
61
+static void do_fmlal_idx(float32 *d, void *vn, void *vm,
62
+ CPUARMState *env, uint32_t desc,
63
+ ARMFPStatusFlavour fpst_idx,
64
+ uint64_t negx, int negf)
65
{
66
+ float_status *fpst = &env->vfp.fp_status[fpst_idx];
67
+ bool fz16 = env->vfp.fpcr & FPCR_FZ16;
68
intptr_t i, oprsz = simd_oprsz(desc);
69
int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
70
int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3);
71
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
72
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
73
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
74
75
- do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
76
- env->vfp.fpcr & FPCR_FZ16);
77
+ do_fmlal_idx(vd, vn, vm, env, desc, FPST_STD, negx, 0);
78
}
79
80
void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
81
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
82
negx = 0x8000800080008000ull;
83
}
84
}
85
- do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc,
86
- env->vfp.fpcr & FPCR_FZ16);
87
+ do_fmlal_idx(vd, vn, vm, env, desc, FPST_A64, negx, negf);
88
}
89
90
void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
33
--
91
--
34
2.34.1
92
2.34.1
35
36
diff view generated by jsdifflib