1 | The following changes since commit 5bab95dc74d43bbb28c6a96d24c810a664432057: | 1 | Hi; this pullreq contains only my FEAT_AFP/FEAT_RPRES patches |
---|---|---|---|
2 | (plus a fix for a target/alpha latent bug that would otherwise | ||
3 | be revealed by the fpu changes), because 68 patches is already | ||
4 | longer than I prefer to send in at one time... | ||
2 | 5 | ||
3 | Merge tag 'pull-request-2024-01-24' of https://gitlab.com/thuth/qemu into staging (2024-01-25 12:33:42 +0000) | 6 | thanks |
7 | -- PMM | ||
8 | |||
9 | The following changes since commit ffaf7f0376f8040ce9068d71ae9ae8722505c42e: | ||
10 | |||
11 | Merge tag 'pull-10.0-testing-and-gdstub-updates-100225-1' of https://gitlab.com/stsquad/qemu into staging (2025-02-10 13:26:17 -0500) | ||
4 | 12 | ||
5 | are available in the Git repository at: | 13 | are available in the Git repository at: |
6 | 14 | ||
7 | https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20240126 | 15 | https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20250211 |
8 | 16 | ||
9 | for you to fetch changes up to 5e6be95ed1578c7cfac2082b39384d99fd912508: | 17 | for you to fetch changes up to ca4c34e07d1388df8e396520b5e7d60883cd3690: |
10 | 18 | ||
11 | hw/arm: add PCIe to Freescale i.MX6 (2024-01-26 12:23:04 +0000) | 19 | target/arm: Sink fp_status and fpcr access into do_fmlal* (2025-02-11 16:22:08 +0000) |
12 | 20 | ||
13 | ---------------------------------------------------------------- | 21 | ---------------------------------------------------------------- |
14 | target-arm queue: | 22 | target-arm queue: |
15 | * Fix VNCR fault detection logic | 23 | * target/alpha: Don't corrupt error_code with unknown softfloat flags |
16 | * Fix A64 scalar SQSHRN and SQRSHRN | 24 | * target/arm: Implement FEAT_AFP and FEAT_RPRES |
17 | * Fix incorrect aa64_tidcp1 feature check | ||
18 | * hw/arm/virt.c: Remove newline from error_report() string | ||
19 | * hw/arm/musicpal: Convert to qemu_add_kbd_event_handler() | ||
20 | * hw/arm/allwinner-a10: Unconditionally map the USB Host controllers | ||
21 | * hw/arm/nseries: Unconditionally map the TUSB6010 USB Host controller | ||
22 | * hw/arm: Add EHCI/OHCI controllers to Allwinner R40 and Bananapi board | ||
23 | * hw/arm: Add AHCI/SATA controller to Allwinner R40 and Bananapi board | ||
24 | * hw/arm: Add watchdog timer to Allwinner H40 and Bananapi board | ||
25 | * arm: various include header cleanups | ||
26 | * cleanups to allow some files to be built only once | ||
27 | * fsl-imx6ul: Add various missing unimplemented devices | ||
28 | * docs/system/arm/virt.rst: Add note on CPU features off by default | ||
29 | * hw/char/imx_serial: Implement receive FIFO and ageing timer | ||
30 | * target/xtensa: fix OOB TLB entry access | ||
31 | * bswap.h: Fix const_le64() macro | ||
32 | * hw/arm: add PCIe to Freescale i.MX6 | ||
33 | 25 | ||
34 | ---------------------------------------------------------------- | 26 | ---------------------------------------------------------------- |
35 | Guenter Roeck (4): | 27 | Peter Maydell (49): |
36 | hw/arm: Add EHCI/OHCI controllers to Allwinner R40 and Bananapi board | 28 | target/alpha: Don't corrupt error_code with unknown softfloat flags |
37 | hw/arm: Add AHCI/SATA controller to Allwinner R40 and Bananapi board | 29 | fpu: Add float_class_denormal |
38 | hw/arm: Add watchdog timer to Allwinner H40 and Bananapi board | 30 | fpu: Implement float_flag_input_denormal_used |
39 | fsl-imx6ul: Add various missing unimplemented devices | 31 | fpu: allow flushing of output denormals to be after rounding |
32 | target/arm: Define FPCR AH, FIZ, NEP bits | ||
33 | target/arm: Implement FPCR.FIZ handling | ||
34 | target/arm: Adjust FP behaviour for FPCR.AH = 1 | ||
35 | target/arm: Adjust exception flag handling for AH = 1 | ||
36 | target/arm: Add FPCR.AH to tbflags | ||
37 | target/arm: Set up float_status to use for FPCR.AH=1 behaviour | ||
38 | target/arm: Use FPST_FPCR_AH for FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS | ||
39 | target/arm: Use FPST_FPCR_AH for BFCVT* insns | ||
40 | target/arm: Use FPST_FPCR_AH for BFMLAL*, BFMLSL* insns | ||
41 | target/arm: Add FPCR.NEP to TBFLAGS | ||
42 | target/arm: Define and use new write_fp_*reg_merging() functions | ||
43 | target/arm: Handle FPCR.NEP for 3-input scalar operations | ||
44 | target/arm: Handle FPCR.NEP for BFCVT scalar | ||
45 | target/arm: Handle FPCR.NEP for 1-input scalar operations | ||
46 | target/arm: Handle FPCR.NEP in do_cvtf_scalar() | ||
47 | target/arm: Handle FPCR.NEP for scalar FABS and FNEG | ||
48 | target/arm: Handle FPCR.NEP for FCVTXN (scalar) | ||
49 | target/arm: Handle FPCR.NEP for NEP for FMUL, FMULX scalar by element | ||
50 | target/arm: Implement FPCR.AH semantics for scalar FMIN/FMAX | ||
51 | target/arm: Implement FPCR.AH semantics for vector FMIN/FMAX | ||
52 | target/arm: Implement FPCR.AH semantics for FMAXV and FMINV | ||
53 | target/arm: Implement FPCR.AH semantics for FMINP and FMAXP | ||
54 | target/arm: Implement FPCR.AH semantics for SVE FMAXV and FMINV | ||
55 | target/arm: Implement FPCR.AH semantics for SVE FMIN/FMAX immediate | ||
56 | target/arm: Implement FPCR.AH semantics for SVE FMIN/FMAX vector | ||
57 | target/arm: Implement FPCR.AH handling of negation of NaN | ||
58 | target/arm: Implement FPCR.AH handling for scalar FABS and FABD | ||
59 | target/arm: Handle FPCR.AH in vector FABD | ||
60 | target/arm: Handle FPCR.AH in SVE FNEG | ||
61 | target/arm: Handle FPCR.AH in SVE FABS | ||
62 | target/arm: Handle FPCR.AH in SVE FABD | ||
63 | target/arm: Handle FPCR.AH in negation steps in SVE FCADD | ||
64 | target/arm: Handle FPCR.AH in negation steps in FCADD | ||
65 | target/arm: Handle FPCR.AH in FRECPS and FRSQRTS scalar insns | ||
66 | target/arm: Handle FPCR.AH in FRECPS and FRSQRTS vector insns | ||
67 | target/arm: Handle FPCR.AH in negation step in FMLS (indexed) | ||
68 | target/arm: Handle FPCR.AH in negation in FMLS (vector) | ||
69 | target/arm: Handle FPCR.AH in negation step in SVE FMLS (vector) | ||
70 | target/arm: Handle FPCR.AH in SVE FTSSEL | ||
71 | target/arm: Handle FPCR.AH in SVE FTMAD | ||
72 | target/arm: Enable FEAT_AFP for '-cpu max' | ||
73 | target/arm: Plumb FEAT_RPRES frecpe and frsqrte through to new helper | ||
74 | target/arm: Implement increased precision FRECPE | ||
75 | target/arm: Implement increased precision FRSQRTE | ||
76 | target/arm: Enable FEAT_RPRES for -cpu max | ||
40 | 77 | ||
41 | Gustavo Romero (1): | 78 | Richard Henderson (19): |
42 | docs/system/arm/virt.rst: Add note on CPU features off by default | 79 | target/arm: Handle FPCR.AH in vector FCMLA |
80 | target/arm: Handle FPCR.AH in FCMLA by index | ||
81 | target/arm: Handle FPCR.AH in SVE FCMLA | ||
82 | target/arm: Handle FPCR.AH in FMLSL (by element and vector) | ||
83 | target/arm: Handle FPCR.AH in SVE FMLSL (indexed) | ||
84 | target/arm: Handle FPCR.AH in SVE FMLSLB, FMLSLT (vectors) | ||
85 | target/arm: Introduce CPUARMState.vfp.fp_status[] | ||
86 | target/arm: Remove standard_fp_status_f16 | ||
87 | target/arm: Remove standard_fp_status | ||
88 | target/arm: Remove ah_fp_status_f16 | ||
89 | target/arm: Remove ah_fp_status | ||
90 | target/arm: Remove fp_status_f16_a64 | ||
91 | target/arm: Remove fp_status_f16_a32 | ||
92 | target/arm: Remove fp_status_a64 | ||
93 | target/arm: Remove fp_status_a32 | ||
94 | target/arm: Simplify fp_status indexing in mve_helper.c | ||
95 | target/arm: Simplify DO_VFP_cmp in vfp_helper.c | ||
96 | target/arm: Read fz16 from env->vfp.fpcr | ||
97 | target/arm: Sink fp_status and fpcr access into do_fmlal* | ||
43 | 98 | ||
44 | Max Filippov (1): | 99 | docs/system/arm/emulation.rst | 2 + |
45 | target/xtensa: fix OOB TLB entry access | 100 | include/fpu/softfloat-helpers.h | 11 + |
46 | 101 | include/fpu/softfloat-types.h | 25 ++ | |
47 | Nikita Ostrenkov (1): | 102 | target/arm/cpu-features.h | 10 + |
48 | hw/arm: add PCIe to Freescale i.MX6 | 103 | target/arm/cpu.h | 97 +++-- |
49 | 104 | target/arm/helper.h | 26 ++ | |
50 | Peter Maydell (6): | 105 | target/arm/internals.h | 6 + |
51 | target/arm: Fix VNCR fault detection logic | 106 | target/arm/tcg/helper-a64.h | 13 + |
52 | hw/arm/virt.c: Remove newline from error_report() string | 107 | target/arm/tcg/helper-sve.h | 120 ++++++ |
53 | hw/arm/musicpal: Convert to qemu_add_kbd_event_handler() | 108 | target/arm/tcg/translate-a64.h | 13 + |
54 | target/arm: Fix A64 scalar SQSHRN and SQRSHRN | 109 | target/arm/tcg/translate.h | 54 +-- |
55 | bswap.h: Fix const_le64() macro | 110 | target/arm/tcg/vec_internal.h | 35 ++ |
56 | target/arm: Fix incorrect aa64_tidcp1 feature check | 111 | target/mips/fpu_helper.h | 6 + |
57 | 112 | fpu/softfloat.c | 66 +++- | |
58 | Philippe Mathieu-Daudé (20): | 113 | target/alpha/cpu.c | 7 + |
59 | hw/arm/allwinner-a10: Unconditionally map the USB Host controllers | 114 | target/alpha/fpu_helper.c | 2 + |
60 | hw/arm/nseries: Unconditionally map the TUSB6010 USB Host controller | 115 | target/arm/cpu.c | 46 +-- |
61 | hw/arm/exynos4210: Include missing 'exec/tswap.h' header | 116 | target/arm/helper.c | 2 +- |
62 | hw/arm/xilinx_zynq: Include missing 'exec/tswap.h' header | 117 | target/arm/tcg/cpu64.c | 2 + |
63 | hw/arm/smmuv3: Include missing 'hw/registerfields.h' header | 118 | target/arm/tcg/helper-a64.c | 151 ++++---- |
64 | hw/arm/xlnx-versal: Include missing 'cpu.h' header | 119 | target/arm/tcg/hflags.c | 13 + |
65 | target/arm/cpu-features: Include missing 'hw/registerfields.h' header | 120 | target/arm/tcg/mve_helper.c | 44 +-- |
66 | target/arm/cpregs: Include missing 'hw/registerfields.h' header | 121 | target/arm/tcg/sme_helper.c | 4 +- |
67 | target/arm/cpregs: Include missing 'kvm-consts.h' header | 122 | target/arm/tcg/sve_helper.c | 367 ++++++++++++++----- |
68 | target/arm: Expose arm_cpu_mp_affinity() in 'multiprocessing.h' header | 123 | target/arm/tcg/translate-a64.c | 782 ++++++++++++++++++++++++++++++++-------- |
69 | target/arm: Declare ARM_CPU_TYPE_NAME/SUFFIX in 'cpu-qom.h' | 124 | target/arm/tcg/translate-sve.c | 193 +++++++--- |
70 | hw/cpu/a9mpcore: Build it only once | 125 | target/arm/tcg/vec_helper.c | 387 ++++++++++++++------ |
71 | hw/misc/xlnx-versal-crl: Include generic 'cpu-qom.h' instead of 'cpu.h' | 126 | target/arm/vfp_helper.c | 374 +++++++++++++++---- |
72 | hw/misc/xlnx-versal-crl: Build it only once | 127 | target/hppa/fpu_helper.c | 11 + |
73 | target/arm: Expose M-profile register bank index definitions | 128 | target/i386/tcg/fpu_helper.c | 8 + |
74 | hw/arm/armv7m: Make 'hw/intc/armv7m_nvic.h' a target agnostic header | 129 | target/mips/msa.c | 9 + |
75 | target/arm: Move ARM_CPU_IRQ/FIQ definitions to 'cpu-qom.h' header | 130 | target/ppc/cpu_init.c | 3 + |
76 | target/arm: Move e2h_access() helper around | 131 | target/rx/cpu.c | 8 + |
77 | target/arm: Move GTimer definitions to new 'gtimer.h' header | 132 | target/sh4/cpu.c | 8 + |
78 | hw/arm: Build various units only once | 133 | target/tricore/helper.c | 1 + |
79 | 134 | tests/fp/fp-bench.c | 1 + | |
80 | Rayhan Faizel (1): | 135 | fpu/softfloat-parts.c.inc | 127 +++++-- |
81 | hw/char/imx_serial: Implement receive FIFO and ageing timer | 136 | 37 files changed, 2325 insertions(+), 709 deletions(-) |
82 | |||
83 | Richard Henderson (2): | ||
84 | target/arm: Rename arm_cpu_mp_affinity | ||
85 | target/arm: Create arm_cpu_mp_affinity | ||
86 | |||
87 | docs/system/arm/bananapi_m2u.rst | 5 +- | ||
88 | docs/system/arm/virt.rst | 13 ++++ | ||
89 | hw/arm/smmuv3-internal.h | 1 + | ||
90 | include/hw/arm/allwinner-r40.h | 15 +++++ | ||
91 | include/hw/arm/fsl-imx6.h | 44 +++++++------ | ||
92 | include/hw/arm/fsl-imx6ul.h | 2 + | ||
93 | include/hw/arm/xlnx-versal.h | 1 + | ||
94 | include/hw/char/imx_serial.h | 20 +++++- | ||
95 | include/hw/intc/armv7m_nvic.h | 2 +- | ||
96 | include/hw/misc/xlnx-versal-crl.h | 2 +- | ||
97 | include/qemu/bswap.h | 16 ++--- | ||
98 | target/arm/cpregs.h | 3 + | ||
99 | target/arm/cpu-features.h | 4 +- | ||
100 | target/arm/cpu-qom.h | 24 +++++++ | ||
101 | target/arm/cpu.h | 34 +--------- | ||
102 | target/arm/gtimer.h | 21 ++++++ | ||
103 | target/arm/multiprocessing.h | 16 +++++ | ||
104 | hw/arm/allwinner-a10.c | 50 ++++++-------- | ||
105 | hw/arm/allwinner-h3.c | 2 + | ||
106 | hw/arm/allwinner-r40.c | 69 +++++++++++++++++++- | ||
107 | hw/arm/armv7m.c | 2 + | ||
108 | hw/arm/aspeed_ast2400.c | 1 + | ||
109 | hw/arm/aspeed_ast2600.c | 1 + | ||
110 | hw/arm/bcm2836.c | 2 + | ||
111 | hw/arm/collie.c | 1 - | ||
112 | hw/arm/exynos4210.c | 2 + | ||
113 | hw/arm/fsl-imx25.c | 1 + | ||
114 | hw/arm/fsl-imx31.c | 1 + | ||
115 | hw/arm/fsl-imx6.c | 26 ++++++++ | ||
116 | hw/arm/fsl-imx6ul.c | 31 +++++++++ | ||
117 | hw/arm/fsl-imx7.c | 1 + | ||
118 | hw/arm/gumstix.c | 1 - | ||
119 | hw/arm/highbank.c | 1 + | ||
120 | hw/arm/integratorcp.c | 2 +- | ||
121 | hw/arm/mainstone.c | 1 - | ||
122 | hw/arm/musicpal.c | 133 ++++++++++++++++++-------------------- | ||
123 | hw/arm/npcm7xx.c | 3 +- | ||
124 | hw/arm/nseries.c | 4 +- | ||
125 | hw/arm/omap1.c | 1 + | ||
126 | hw/arm/omap2.c | 2 +- | ||
127 | hw/arm/omap_sx1.c | 1 - | ||
128 | hw/arm/palm.c | 1 - | ||
129 | hw/arm/realview.c | 1 + | ||
130 | hw/arm/sbsa-ref.c | 4 +- | ||
131 | hw/arm/spitz.c | 1 - | ||
132 | hw/arm/strongarm.c | 2 +- | ||
133 | hw/arm/versatilepb.c | 2 +- | ||
134 | hw/arm/vexpress.c | 2 +- | ||
135 | hw/arm/virt-acpi-build.c | 4 +- | ||
136 | hw/arm/virt.c | 15 +++-- | ||
137 | hw/arm/xilinx_zynq.c | 3 +- | ||
138 | hw/arm/xlnx-versal-virt.c | 5 +- | ||
139 | hw/arm/xlnx-versal.c | 2 + | ||
140 | hw/arm/xlnx-zynqmp.c | 2 + | ||
141 | hw/arm/z2.c | 1 - | ||
142 | hw/char/imx_serial.c | 102 +++++++++++++++++++++++++---- | ||
143 | hw/cpu/a15mpcore.c | 1 + | ||
144 | hw/cpu/a9mpcore.c | 2 +- | ||
145 | hw/misc/xlnx-versal-crl.c | 5 +- | ||
146 | target/arm/arm-powerctl.c | 3 +- | ||
147 | target/arm/cpu.c | 13 +++- | ||
148 | target/arm/helper.c | 30 +++++---- | ||
149 | target/arm/hvf/hvf.c | 6 +- | ||
150 | target/arm/kvm.c | 1 + | ||
151 | target/arm/machine.c | 1 + | ||
152 | target/arm/tcg/psci.c | 3 +- | ||
153 | target/arm/tcg/tlb_helper.c | 2 +- | ||
154 | target/arm/tcg/translate-a64.c | 2 +- | ||
155 | target/xtensa/mmu_helper.c | 47 ++++++++++---- | ||
156 | hw/arm/Kconfig | 6 ++ | ||
157 | hw/arm/meson.build | 23 +++---- | ||
158 | hw/cpu/meson.build | 2 +- | ||
159 | hw/misc/meson.build | 2 +- | ||
160 | 73 files changed, 597 insertions(+), 261 deletions(-) | ||
161 | create mode 100644 target/arm/gtimer.h | ||
162 | create mode 100644 target/arm/multiprocessing.h | ||
163 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | 1 | In do_cvttq() we set env->error_code with what is supposed to be a |
---|---|---|---|
2 | set of FPCR exception bit values. However, if the set of float | ||
3 | exception flags we get back from softfloat for the conversion | ||
4 | includes a flag which is not one of the three we expect here | ||
5 | (invalid_cvti, invalid, inexact) then we will fall through the | ||
6 | if-ladder and set env->error_code to the unconverted softfloat | ||
7 | exception_flag value. This will then cause us to take a spurious | ||
8 | exception. | ||
2 | 9 | ||
3 | hw/misc/xlnx-versal-crl.c doesn't require "cpu.h" | 10 | This is harmless now, but when we add new floating point exception |
4 | anymore. By removing it, the unit become target | 11 | flags to softfloat it will cause problems. Add an else clause to the |
5 | agnostic: we can build it once. Update meson. | 12 | if-ladder to make it ignore any float exception flags it doesn't care |
13 | about. | ||
6 | 14 | ||
7 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 15 | Specifically, without this fix, 'make check-tcg' will fail for Alpha |
16 | when the commit adding float_flag_input_denormal_used lands. | ||
17 | |||
18 | |||
19 | Fixes: aa3bad5b59e7 ("target/alpha: Use float64_to_int64_modulo for CVTTQ") | ||
20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 21 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
9 | Message-id: 20240118200643.29037-15-philmd@linaro.org | 22 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | --- | 23 | --- |
12 | hw/misc/xlnx-versal-crl.c | 1 - | 24 | target/alpha/fpu_helper.c | 2 ++ |
13 | hw/misc/meson.build | 2 +- | 25 | 1 file changed, 2 insertions(+) |
14 | 2 files changed, 1 insertion(+), 2 deletions(-) | ||
15 | 26 | ||
16 | diff --git a/hw/misc/xlnx-versal-crl.c b/hw/misc/xlnx-versal-crl.c | 27 | diff --git a/target/alpha/fpu_helper.c b/target/alpha/fpu_helper.c |
17 | index XXXXXXX..XXXXXXX 100644 | 28 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/hw/misc/xlnx-versal-crl.c | 29 | --- a/target/alpha/fpu_helper.c |
19 | +++ b/hw/misc/xlnx-versal-crl.c | 30 | +++ b/target/alpha/fpu_helper.c |
20 | @@ -XXX,XX +XXX,XX @@ | 31 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_cvttq(CPUAlphaState *env, uint64_t a, int roundmode) |
21 | #include "hw/register.h" | 32 | exc = FPCR_INV; |
22 | #include "hw/resettable.h" | 33 | } else if (exc & float_flag_inexact) { |
23 | 34 | exc = FPCR_INE; | |
24 | -#include "target/arm/cpu.h" | 35 | + } else { |
25 | #include "target/arm/arm-powerctl.h" | 36 | + exc = 0; |
26 | #include "target/arm/multiprocessing.h" | 37 | } |
27 | #include "hw/misc/xlnx-versal-crl.h" | 38 | } |
28 | diff --git a/hw/misc/meson.build b/hw/misc/meson.build | 39 | env->error_code = exc; |
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/hw/misc/meson.build | ||
31 | +++ b/hw/misc/meson.build | ||
32 | @@ -XXX,XX +XXX,XX @@ system_ss.add(when: 'CONFIG_SLAVIO', if_true: files('slavio_misc.c')) | ||
33 | system_ss.add(when: 'CONFIG_ZYNQ', if_true: files('zynq_slcr.c')) | ||
34 | system_ss.add(when: 'CONFIG_XLNX_ZYNQMP_ARM', if_true: files('xlnx-zynqmp-crf.c')) | ||
35 | system_ss.add(when: 'CONFIG_XLNX_ZYNQMP_ARM', if_true: files('xlnx-zynqmp-apu-ctrl.c')) | ||
36 | -specific_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: files('xlnx-versal-crl.c')) | ||
37 | system_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: files( | ||
38 | + 'xlnx-versal-crl.c', | ||
39 | 'xlnx-versal-xramc.c', | ||
40 | 'xlnx-versal-pmc-iou-slcr.c', | ||
41 | 'xlnx-versal-cfu.c', | ||
42 | -- | 40 | -- |
43 | 2.34.1 | 41 | 2.34.1 |
44 | 42 | ||
45 | 43 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Currently in softfloat we canonicalize input denormals and so the | ||
2 | code that implements floating point operations does not need to care | ||
3 | whether the input value was originally normal or denormal. However, | ||
4 | both x86 and Arm FEAT_AFP require that an exception flag is set if: | ||
5 | * an input is denormal | ||
6 | * that input is not squashed to zero | ||
7 | * that input is actually used in the calculation (e.g. we | ||
8 | did not find the other input was a NaN) | ||
1 | 9 | ||
10 | So we need to track that the input was a non-squashed denormal. To | ||
11 | do this we add a new value to the FloatClass enum. In this commit we | ||
12 | add the value and adjust the code everywhere that looks at FloatClass | ||
13 | values so that the new float_class_denormal behaves identically to | ||
14 | float_class_normal. We will add the code that does the "raise a new | ||
15 | float exception flag if an input was an unsquashed denormal and we | ||
16 | used it" in a subsequent commit. | ||
17 | |||
18 | There should be no behavioural change in this commit. | ||
19 | |||
20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
21 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
22 | --- | ||
23 | fpu/softfloat.c | 32 ++++++++++++++++++++++++++++--- | ||
24 | fpu/softfloat-parts.c.inc | 40 ++++++++++++++++++++++++--------------- | ||
25 | 2 files changed, 54 insertions(+), 18 deletions(-) | ||
26 | |||
27 | diff --git a/fpu/softfloat.c b/fpu/softfloat.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/fpu/softfloat.c | ||
30 | +++ b/fpu/softfloat.c | ||
31 | @@ -XXX,XX +XXX,XX @@ float64_gen2(float64 xa, float64 xb, float_status *s, | ||
32 | /* | ||
33 | * Classify a floating point number. Everything above float_class_qnan | ||
34 | * is a NaN so cls >= float_class_qnan is any NaN. | ||
35 | + * | ||
36 | + * Note that we canonicalize denormals, so most code should treat | ||
37 | + * class_normal and class_denormal identically. | ||
38 | */ | ||
39 | |||
40 | typedef enum __attribute__ ((__packed__)) { | ||
41 | float_class_unclassified, | ||
42 | float_class_zero, | ||
43 | float_class_normal, | ||
44 | + float_class_denormal, /* input was a non-squashed denormal */ | ||
45 | float_class_inf, | ||
46 | float_class_qnan, /* all NaNs from here */ | ||
47 | float_class_snan, | ||
48 | @@ -XXX,XX +XXX,XX @@ typedef enum __attribute__ ((__packed__)) { | ||
49 | enum { | ||
50 | float_cmask_zero = float_cmask(float_class_zero), | ||
51 | float_cmask_normal = float_cmask(float_class_normal), | ||
52 | + float_cmask_denormal = float_cmask(float_class_denormal), | ||
53 | float_cmask_inf = float_cmask(float_class_inf), | ||
54 | float_cmask_qnan = float_cmask(float_class_qnan), | ||
55 | float_cmask_snan = float_cmask(float_class_snan), | ||
56 | |||
57 | float_cmask_infzero = float_cmask_zero | float_cmask_inf, | ||
58 | float_cmask_anynan = float_cmask_qnan | float_cmask_snan, | ||
59 | + float_cmask_anynorm = float_cmask_normal | float_cmask_denormal, | ||
60 | }; | ||
61 | |||
62 | /* Flags for parts_minmax. */ | ||
63 | @@ -XXX,XX +XXX,XX @@ static inline __attribute__((unused)) bool is_qnan(FloatClass c) | ||
64 | return c == float_class_qnan; | ||
65 | } | ||
66 | |||
67 | +/* | ||
68 | + * Return true if the float_cmask has only normals in it | ||
69 | + * (including input denormals that were canonicalized) | ||
70 | + */ | ||
71 | +static inline bool cmask_is_only_normals(int cmask) | ||
72 | +{ | ||
73 | + return !(cmask & ~float_cmask_anynorm); | ||
74 | +} | ||
75 | + | ||
76 | +static inline bool is_anynorm(FloatClass c) | ||
77 | +{ | ||
78 | + return float_cmask(c) & float_cmask_anynorm; | ||
79 | +} | ||
80 | + | ||
81 | /* | ||
82 | * Structure holding all of the decomposed parts of a float. | ||
83 | * The exponent is unbiased and the fraction is normalized. | ||
84 | @@ -XXX,XX +XXX,XX @@ static float64 float64r32_round_pack_canonical(FloatParts64 *p, | ||
85 | */ | ||
86 | switch (p->cls) { | ||
87 | case float_class_normal: | ||
88 | + case float_class_denormal: | ||
89 | if (unlikely(p->exp == 0)) { | ||
90 | /* | ||
91 | * The result is denormal for float32, but can be represented | ||
92 | @@ -XXX,XX +XXX,XX @@ static floatx80 floatx80_round_pack_canonical(FloatParts128 *p, | ||
93 | |||
94 | switch (p->cls) { | ||
95 | case float_class_normal: | ||
96 | + case float_class_denormal: | ||
97 | if (s->floatx80_rounding_precision == floatx80_precision_x) { | ||
98 | parts_uncanon_normal(p, s, fmt); | ||
99 | frac = p->frac_hi; | ||
100 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_ahp(FloatParts64 *a, float_status *s) | ||
101 | break; | ||
102 | |||
103 | case float_class_normal: | ||
104 | + case float_class_denormal: | ||
105 | case float_class_zero: | ||
106 | break; | ||
107 | |||
108 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b, | ||
109 | a->sign = b->sign; | ||
110 | a->exp = b->exp; | ||
111 | |||
112 | - if (a->cls == float_class_normal) { | ||
113 | + if (is_anynorm(a->cls)) { | ||
114 | frac_truncjam(a, b); | ||
115 | } else if (is_nan(a->cls)) { | ||
116 | /* Discard the low bits of the NaN. */ | ||
117 | @@ -XXX,XX +XXX,XX @@ static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode, | ||
118 | return int128_zero(); | ||
119 | |||
120 | case float_class_normal: | ||
121 | + case float_class_denormal: | ||
122 | if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) { | ||
123 | flags = float_flag_inexact; | ||
124 | } | ||
125 | @@ -XXX,XX +XXX,XX @@ static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode, | ||
126 | return int128_zero(); | ||
127 | |||
128 | case float_class_normal: | ||
129 | + case float_class_denormal: | ||
130 | if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) { | ||
131 | flags = float_flag_inexact; | ||
132 | if (p.cls == float_class_zero) { | ||
133 | @@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status) | ||
134 | float32_unpack_canonical(&xp, a, status); | ||
135 | if (unlikely(xp.cls != float_class_normal)) { | ||
136 | switch (xp.cls) { | ||
137 | + case float_class_denormal: | ||
138 | + break; | ||
139 | case float_class_snan: | ||
140 | case float_class_qnan: | ||
141 | parts_return_nan(&xp, status); | ||
142 | @@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status) | ||
143 | case float_class_zero: | ||
144 | return float32_one; | ||
145 | default: | ||
146 | - break; | ||
147 | + g_assert_not_reached(); | ||
148 | } | ||
149 | - g_assert_not_reached(); | ||
150 | } | ||
151 | |||
152 | float_raise(float_flag_inexact, status); | ||
153 | diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc | ||
154 | index XXXXXXX..XXXXXXX 100644 | ||
155 | --- a/fpu/softfloat-parts.c.inc | ||
156 | +++ b/fpu/softfloat-parts.c.inc | ||
157 | @@ -XXX,XX +XXX,XX @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status, | ||
158 | frac_clear(p); | ||
159 | } else { | ||
160 | int shift = frac_normalize(p); | ||
161 | - p->cls = float_class_normal; | ||
162 | + p->cls = float_class_denormal; | ||
163 | p->exp = fmt->frac_shift - fmt->exp_bias | ||
164 | - shift + !fmt->m68k_denormal; | ||
165 | } | ||
166 | @@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, | ||
167 | static void partsN(uncanon)(FloatPartsN *p, float_status *s, | ||
168 | const FloatFmt *fmt) | ||
169 | { | ||
170 | - if (likely(p->cls == float_class_normal)) { | ||
171 | + if (likely(is_anynorm(p->cls))) { | ||
172 | parts_uncanon_normal(p, s, fmt); | ||
173 | } else { | ||
174 | switch (p->cls) { | ||
175 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | ||
176 | |||
177 | if (a->sign != b_sign) { | ||
178 | /* Subtraction */ | ||
179 | - if (likely(ab_mask == float_cmask_normal)) { | ||
180 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
181 | if (parts_sub_normal(a, b)) { | ||
182 | return a; | ||
183 | } | ||
184 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | ||
185 | } | ||
186 | } else { | ||
187 | /* Addition */ | ||
188 | - if (likely(ab_mask == float_cmask_normal)) { | ||
189 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
190 | parts_add_normal(a, b); | ||
191 | return a; | ||
192 | } | ||
193 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | ||
194 | } | ||
195 | |||
196 | if (b->cls == float_class_zero) { | ||
197 | - g_assert(a->cls == float_class_normal); | ||
198 | + g_assert(is_anynorm(a->cls)); | ||
199 | return a; | ||
200 | } | ||
201 | |||
202 | g_assert(a->cls == float_class_zero); | ||
203 | - g_assert(b->cls == float_class_normal); | ||
204 | + g_assert(is_anynorm(b->cls)); | ||
205 | return_b: | ||
206 | b->sign = b_sign; | ||
207 | return b; | ||
208 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, | ||
209 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
210 | bool sign = a->sign ^ b->sign; | ||
211 | |||
212 | - if (likely(ab_mask == float_cmask_normal)) { | ||
213 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
214 | FloatPartsW tmp; | ||
215 | |||
216 | frac_mulw(&tmp, a, b); | ||
217 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, | ||
218 | a->sign ^= 1; | ||
219 | } | ||
220 | |||
221 | - if (unlikely(ab_mask != float_cmask_normal)) { | ||
222 | + if (unlikely(!cmask_is_only_normals(ab_mask))) { | ||
223 | if (unlikely(ab_mask == float_cmask_infzero)) { | ||
224 | float_raise(float_flag_invalid | float_flag_invalid_imz, s); | ||
225 | goto d_nan; | ||
226 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, | ||
227 | } | ||
228 | |||
229 | g_assert(ab_mask & float_cmask_zero); | ||
230 | - if (c->cls == float_class_normal) { | ||
231 | + if (is_anynorm(c->cls)) { | ||
232 | *a = *c; | ||
233 | goto return_normal; | ||
234 | } | ||
235 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, | ||
236 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
237 | bool sign = a->sign ^ b->sign; | ||
238 | |||
239 | - if (likely(ab_mask == float_cmask_normal)) { | ||
240 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
241 | a->sign = sign; | ||
242 | a->exp -= b->exp + frac_div(a, b); | ||
243 | return a; | ||
244 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, | ||
245 | { | ||
246 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
247 | |||
248 | - if (likely(ab_mask == float_cmask_normal)) { | ||
249 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
250 | frac_modrem(a, b, mod_quot); | ||
251 | return a; | ||
252 | } | ||
253 | @@ -XXX,XX +XXX,XX @@ static void partsN(sqrt)(FloatPartsN *a, float_status *status, | ||
254 | |||
255 | if (unlikely(a->cls != float_class_normal)) { | ||
256 | switch (a->cls) { | ||
257 | + case float_class_denormal: | ||
258 | + break; | ||
259 | case float_class_snan: | ||
260 | case float_class_qnan: | ||
261 | parts_return_nan(a, status); | ||
262 | @@ -XXX,XX +XXX,XX @@ static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode, | ||
263 | case float_class_inf: | ||
264 | break; | ||
265 | case float_class_normal: | ||
266 | + case float_class_denormal: | ||
267 | if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) { | ||
268 | float_raise(float_flag_inexact, s); | ||
269 | } | ||
270 | @@ -XXX,XX +XXX,XX @@ static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode, | ||
271 | return 0; | ||
272 | |||
273 | case float_class_normal: | ||
274 | + case float_class_denormal: | ||
275 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | ||
276 | if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { | ||
277 | flags = float_flag_inexact; | ||
278 | @@ -XXX,XX +XXX,XX @@ static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode, | ||
279 | return 0; | ||
280 | |||
281 | case float_class_normal: | ||
282 | + case float_class_denormal: | ||
283 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | ||
284 | if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { | ||
285 | flags = float_flag_inexact; | ||
286 | @@ -XXX,XX +XXX,XX @@ static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p, | ||
287 | return 0; | ||
288 | |||
289 | case float_class_normal: | ||
290 | + case float_class_denormal: | ||
291 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | ||
292 | if (parts_round_to_int_normal(p, rmode, 0, N - 2)) { | ||
293 | flags = float_flag_inexact; | ||
294 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
295 | a_exp = a->exp; | ||
296 | b_exp = b->exp; | ||
297 | |||
298 | - if (unlikely(ab_mask != float_cmask_normal)) { | ||
299 | + if (unlikely(!cmask_is_only_normals(ab_mask))) { | ||
300 | switch (a->cls) { | ||
301 | case float_class_normal: | ||
302 | + case float_class_denormal: | ||
303 | break; | ||
304 | case float_class_inf: | ||
305 | a_exp = INT16_MAX; | ||
306 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
307 | } | ||
308 | switch (b->cls) { | ||
309 | case float_class_normal: | ||
310 | + case float_class_denormal: | ||
311 | break; | ||
312 | case float_class_inf: | ||
313 | b_exp = INT16_MAX; | ||
314 | @@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, | ||
315 | { | ||
316 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
317 | |||
318 | - if (likely(ab_mask == float_cmask_normal)) { | ||
319 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
320 | FloatRelation cmp; | ||
321 | |||
322 | if (a->sign != b->sign) { | ||
323 | @@ -XXX,XX +XXX,XX @@ static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) | ||
324 | case float_class_inf: | ||
325 | break; | ||
326 | case float_class_normal: | ||
327 | + case float_class_denormal: | ||
328 | a->exp += MIN(MAX(n, -0x10000), 0x10000); | ||
329 | break; | ||
330 | default: | ||
331 | @@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) | ||
332 | |||
333 | if (unlikely(a->cls != float_class_normal)) { | ||
334 | switch (a->cls) { | ||
335 | + case float_class_denormal: | ||
336 | + break; | ||
337 | case float_class_snan: | ||
338 | case float_class_qnan: | ||
339 | parts_return_nan(a, s); | ||
340 | @@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) | ||
341 | } | ||
342 | return; | ||
343 | default: | ||
344 | - break; | ||
345 | + g_assert_not_reached(); | ||
346 | } | ||
347 | - g_assert_not_reached(); | ||
348 | } | ||
349 | if (unlikely(a->sign)) { | ||
350 | goto d_nan; | ||
351 | -- | ||
352 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Nikita Ostrenkov <n.ostrenkov@gmail.com> | 1 | For the x86 and the Arm FEAT_AFP semantics, we need to be able to |
---|---|---|---|
2 | tell the target code that the FPU operation has used an input | ||
3 | denormal. Implement this; when it happens we set the new | ||
4 | float_flag_denormal_input_used. | ||
2 | 5 | ||
3 | Signed-off-by: Nikita Ostrenkov <n.ostrenkov@gmail.com> | 6 | Note that we only set this when an input denormal is actually used by |
4 | Message-id: 20240108140325.1291-1-n.ostrenkov@gmail.com | 7 | the operation: if the operation results in Invalid Operation or |
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Divide By Zero or the result is a NaN because some other input was a |
9 | NaN then we never needed to look at the input denormal and do not set | ||
10 | denormal_input_used. | ||
11 | |||
12 | We mostly do not need to adjust the hardfloat codepaths to deal with | ||
13 | this flag, because almost all hardfloat operations are already gated | ||
14 | on the input not being a denormal, and will fall back to softfloat | ||
15 | for a denormal input. The only exception is the comparison | ||
16 | operations, where we need to add the check for input denormals, which | ||
17 | must now fall back to softfloat where they did not before. | ||
18 | |||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 19 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
20 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | 21 | --- |
8 | include/hw/arm/fsl-imx6.h | 44 ++++++++++++++++++++------------------- | 22 | include/fpu/softfloat-types.h | 7 ++++ |
9 | hw/arm/fsl-imx6.c | 25 ++++++++++++++++++++++ | 23 | fpu/softfloat.c | 38 +++++++++++++++++--- |
10 | hw/arm/Kconfig | 2 ++ | 24 | fpu/softfloat-parts.c.inc | 68 ++++++++++++++++++++++++++++++++++- |
11 | 3 files changed, 50 insertions(+), 21 deletions(-) | 25 | 3 files changed, 107 insertions(+), 6 deletions(-) |
12 | 26 | ||
13 | diff --git a/include/hw/arm/fsl-imx6.h b/include/hw/arm/fsl-imx6.h | 27 | diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h |
14 | index XXXXXXX..XXXXXXX 100644 | 28 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/include/hw/arm/fsl-imx6.h | 29 | --- a/include/fpu/softfloat-types.h |
16 | +++ b/include/hw/arm/fsl-imx6.h | 30 | +++ b/include/fpu/softfloat-types.h |
17 | @@ -XXX,XX +XXX,XX @@ | 31 | @@ -XXX,XX +XXX,XX @@ enum { |
18 | #include "hw/net/imx_fec.h" | 32 | float_flag_invalid_sqrt = 0x0800, /* sqrt(-x) */ |
19 | #include "hw/usb/chipidea.h" | 33 | float_flag_invalid_cvti = 0x1000, /* non-nan to integer */ |
20 | #include "hw/usb/imx-usb-phy.h" | 34 | float_flag_invalid_snan = 0x2000, /* any operand was snan */ |
21 | +#include "hw/pci-host/designware.h" | 35 | + /* |
22 | #include "exec/memory.h" | 36 | + * An input was denormal and we used it (without flushing it to zero). |
23 | #include "cpu.h" | 37 | + * Not set if we do not actually use the denormal input (e.g. |
24 | #include "qom/object.h" | 38 | + * because some other input was a NaN, or because the operation |
25 | @@ -XXX,XX +XXX,XX @@ struct FslIMX6State { | 39 | + * wasn't actually carried out (divide-by-zero; invalid)) |
26 | DeviceState parent_obj; | 40 | + */ |
27 | 41 | + float_flag_input_denormal_used = 0x4000, | |
28 | /*< public >*/ | ||
29 | - ARMCPU cpu[FSL_IMX6_NUM_CPUS]; | ||
30 | - A9MPPrivState a9mpcore; | ||
31 | - IMX6CCMState ccm; | ||
32 | - IMX6SRCState src; | ||
33 | - IMX7SNVSState snvs; | ||
34 | - IMXSerialState uart[FSL_IMX6_NUM_UARTS]; | ||
35 | - IMXGPTState gpt; | ||
36 | - IMXEPITState epit[FSL_IMX6_NUM_EPITS]; | ||
37 | - IMXI2CState i2c[FSL_IMX6_NUM_I2CS]; | ||
38 | - IMXGPIOState gpio[FSL_IMX6_NUM_GPIOS]; | ||
39 | - SDHCIState esdhc[FSL_IMX6_NUM_ESDHCS]; | ||
40 | - IMXSPIState spi[FSL_IMX6_NUM_ECSPIS]; | ||
41 | - IMX2WdtState wdt[FSL_IMX6_NUM_WDTS]; | ||
42 | - IMXUSBPHYState usbphy[FSL_IMX6_NUM_USB_PHYS]; | ||
43 | - ChipideaState usb[FSL_IMX6_NUM_USBS]; | ||
44 | - IMXFECState eth; | ||
45 | - MemoryRegion rom; | ||
46 | - MemoryRegion caam; | ||
47 | - MemoryRegion ocram; | ||
48 | - MemoryRegion ocram_alias; | ||
49 | - uint32_t phy_num; | ||
50 | + ARMCPU cpu[FSL_IMX6_NUM_CPUS]; | ||
51 | + A9MPPrivState a9mpcore; | ||
52 | + IMX6CCMState ccm; | ||
53 | + IMX6SRCState src; | ||
54 | + IMX7SNVSState snvs; | ||
55 | + IMXSerialState uart[FSL_IMX6_NUM_UARTS]; | ||
56 | + IMXGPTState gpt; | ||
57 | + IMXEPITState epit[FSL_IMX6_NUM_EPITS]; | ||
58 | + IMXI2CState i2c[FSL_IMX6_NUM_I2CS]; | ||
59 | + IMXGPIOState gpio[FSL_IMX6_NUM_GPIOS]; | ||
60 | + SDHCIState esdhc[FSL_IMX6_NUM_ESDHCS]; | ||
61 | + IMXSPIState spi[FSL_IMX6_NUM_ECSPIS]; | ||
62 | + IMX2WdtState wdt[FSL_IMX6_NUM_WDTS]; | ||
63 | + IMXUSBPHYState usbphy[FSL_IMX6_NUM_USB_PHYS]; | ||
64 | + ChipideaState usb[FSL_IMX6_NUM_USBS]; | ||
65 | + IMXFECState eth; | ||
66 | + DesignwarePCIEHost pcie; | ||
67 | + MemoryRegion rom; | ||
68 | + MemoryRegion caam; | ||
69 | + MemoryRegion ocram; | ||
70 | + MemoryRegion ocram_alias; | ||
71 | + uint32_t phy_num; | ||
72 | }; | 42 | }; |
73 | 43 | ||
74 | 44 | /* | |
75 | diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c | 45 | diff --git a/fpu/softfloat.c b/fpu/softfloat.c |
76 | index XXXXXXX..XXXXXXX 100644 | 46 | index XXXXXXX..XXXXXXX 100644 |
77 | --- a/hw/arm/fsl-imx6.c | 47 | --- a/fpu/softfloat.c |
78 | +++ b/hw/arm/fsl-imx6.c | 48 | +++ b/fpu/softfloat.c |
79 | @@ -XXX,XX +XXX,XX @@ | 49 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_ahp(FloatParts64 *a, float_status *s) |
80 | #include "qemu/osdep.h" | 50 | float16_params_ahp.frac_size + 1); |
81 | #include "qapi/error.h" | 51 | break; |
82 | #include "hw/arm/fsl-imx6.h" | 52 | |
83 | +#include "hw/misc/unimp.h" | 53 | - case float_class_normal: |
84 | #include "hw/usb/imx-usb-phy.h" | 54 | case float_class_denormal: |
85 | #include "hw/boards.h" | 55 | + float_raise(float_flag_input_denormal_used, s); |
86 | #include "hw/qdev-properties.h" | 56 | + break; |
87 | @@ -XXX,XX +XXX,XX @@ static void fsl_imx6_init(Object *obj) | 57 | + case float_class_normal: |
88 | 58 | case float_class_zero: | |
89 | 59 | break; | |
90 | object_initialize_child(obj, "eth", &s->eth, TYPE_IMX_ENET); | 60 | |
91 | + | 61 | @@ -XXX,XX +XXX,XX @@ static void parts64_float_to_float(FloatParts64 *a, float_status *s) |
92 | + object_initialize_child(obj, "pcie", &s->pcie, TYPE_DESIGNWARE_PCIE_HOST); | 62 | if (is_nan(a->cls)) { |
63 | parts_return_nan(a, s); | ||
64 | } | ||
65 | + if (a->cls == float_class_denormal) { | ||
66 | + float_raise(float_flag_input_denormal_used, s); | ||
67 | + } | ||
93 | } | 68 | } |
94 | 69 | ||
95 | static void fsl_imx6_realize(DeviceState *dev, Error **errp) | 70 | static void parts128_float_to_float(FloatParts128 *a, float_status *s) |
96 | @@ -XXX,XX +XXX,XX @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp) | 71 | @@ -XXX,XX +XXX,XX @@ static void parts128_float_to_float(FloatParts128 *a, float_status *s) |
97 | MachineState *ms = MACHINE(qdev_get_machine()); | 72 | if (is_nan(a->cls)) { |
98 | FslIMX6State *s = FSL_IMX6(dev); | 73 | parts_return_nan(a, s); |
99 | uint16_t i; | 74 | } |
100 | + qemu_irq irq; | 75 | + if (a->cls == float_class_denormal) { |
101 | unsigned int smp_cpus = ms->smp.cpus; | 76 | + float_raise(float_flag_input_denormal_used, s); |
102 | 77 | + } | |
103 | if (smp_cpus > FSL_IMX6_NUM_CPUS) { | 78 | } |
104 | @@ -XXX,XX +XXX,XX @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp) | 79 | |
105 | FSL_IMX6_WDOGn_IRQ[i])); | 80 | #define parts_float_to_float(P, S) \ |
106 | } | 81 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b, |
82 | a->sign = b->sign; | ||
83 | a->exp = b->exp; | ||
84 | |||
85 | - if (is_anynorm(a->cls)) { | ||
86 | + switch (a->cls) { | ||
87 | + case float_class_denormal: | ||
88 | + float_raise(float_flag_input_denormal_used, s); | ||
89 | + /* fall through */ | ||
90 | + case float_class_normal: | ||
91 | frac_truncjam(a, b); | ||
92 | - } else if (is_nan(a->cls)) { | ||
93 | + break; | ||
94 | + case float_class_snan: | ||
95 | + case float_class_qnan: | ||
96 | /* Discard the low bits of the NaN. */ | ||
97 | a->frac = b->frac_hi; | ||
98 | parts_return_nan(a, s); | ||
99 | + break; | ||
100 | + default: | ||
101 | + break; | ||
102 | } | ||
103 | } | ||
104 | |||
105 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b, | ||
106 | if (is_nan(a->cls)) { | ||
107 | parts_return_nan(a, s); | ||
108 | } | ||
109 | + if (a->cls == float_class_denormal) { | ||
110 | + float_raise(float_flag_input_denormal_used, s); | ||
111 | + } | ||
112 | } | ||
113 | |||
114 | float32 float16_to_float32(float16 a, bool ieee, float_status *s) | ||
115 | @@ -XXX,XX +XXX,XX @@ float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet) | ||
116 | goto soft; | ||
117 | } | ||
118 | |||
119 | - float32_input_flush2(&ua.s, &ub.s, s); | ||
120 | + if (unlikely(float32_is_denormal(ua.s) || float32_is_denormal(ub.s))) { | ||
121 | + /* We may need to set the input_denormal_used flag */ | ||
122 | + goto soft; | ||
123 | + } | ||
124 | + | ||
125 | if (isgreaterequal(ua.h, ub.h)) { | ||
126 | if (isgreater(ua.h, ub.h)) { | ||
127 | return float_relation_greater; | ||
128 | @@ -XXX,XX +XXX,XX @@ float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet) | ||
129 | goto soft; | ||
130 | } | ||
131 | |||
132 | - float64_input_flush2(&ua.s, &ub.s, s); | ||
133 | + if (unlikely(float64_is_denormal(ua.s) || float64_is_denormal(ub.s))) { | ||
134 | + /* We may need to set the input_denormal_used flag */ | ||
135 | + goto soft; | ||
136 | + } | ||
137 | + | ||
138 | if (isgreaterequal(ua.h, ub.h)) { | ||
139 | if (isgreater(ua.h, ub.h)) { | ||
140 | return float_relation_greater; | ||
141 | diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc | ||
142 | index XXXXXXX..XXXXXXX 100644 | ||
143 | --- a/fpu/softfloat-parts.c.inc | ||
144 | +++ b/fpu/softfloat-parts.c.inc | ||
145 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | ||
146 | bool b_sign = b->sign ^ subtract; | ||
147 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
107 | 148 | ||
108 | + /* | 149 | + /* |
109 | + * PCIe | 150 | + * For addition and subtraction, we will consume an |
151 | + * input denormal unless the other input is a NaN. | ||
110 | + */ | 152 | + */ |
111 | + sysbus_realize(SYS_BUS_DEVICE(&s->pcie), &error_abort); | 153 | + if ((ab_mask & (float_cmask_denormal | float_cmask_anynan)) == |
112 | + sysbus_mmio_map(SYS_BUS_DEVICE(&s->pcie), 0, FSL_IMX6_PCIe_REG_ADDR); | 154 | + float_cmask_denormal) { |
113 | + | 155 | + float_raise(float_flag_input_denormal_used, s); |
114 | + irq = qdev_get_gpio_in(DEVICE(&s->a9mpcore), FSL_IMX6_PCIE1_IRQ); | 156 | + } |
115 | + sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 0, irq); | 157 | + |
116 | + irq = qdev_get_gpio_in(DEVICE(&s->a9mpcore), FSL_IMX6_PCIE2_IRQ); | 158 | if (a->sign != b_sign) { |
117 | + sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 1, irq); | 159 | /* Subtraction */ |
118 | + irq = qdev_get_gpio_in(DEVICE(&s->a9mpcore), FSL_IMX6_PCIE3_IRQ); | 160 | if (likely(cmask_is_only_normals(ab_mask))) { |
119 | + sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 2, irq); | 161 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, |
120 | + irq = qdev_get_gpio_in(DEVICE(&s->a9mpcore), FSL_IMX6_PCIE4_IRQ); | 162 | if (likely(cmask_is_only_normals(ab_mask))) { |
121 | + sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 3, irq); | 163 | FloatPartsW tmp; |
164 | |||
165 | + if (ab_mask & float_cmask_denormal) { | ||
166 | + float_raise(float_flag_input_denormal_used, s); | ||
167 | + } | ||
168 | + | ||
169 | frac_mulw(&tmp, a, b); | ||
170 | frac_truncjam(a, &tmp); | ||
171 | |||
172 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, | ||
173 | } | ||
174 | |||
175 | /* Multiply by 0 or Inf */ | ||
176 | + if (ab_mask & float_cmask_denormal) { | ||
177 | + float_raise(float_flag_input_denormal_used, s); | ||
178 | + } | ||
179 | + | ||
180 | if (ab_mask & float_cmask_inf) { | ||
181 | a->cls = float_class_inf; | ||
182 | a->sign = sign; | ||
183 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, | ||
184 | if (flags & float_muladd_negate_result) { | ||
185 | a->sign ^= 1; | ||
186 | } | ||
122 | + | 187 | + |
123 | + /* | 188 | + /* |
124 | + * PCIe PHY | 189 | + * All result types except for "return the default NaN |
190 | + * because this is an Invalid Operation" go through here; | ||
191 | + * this matches the set of cases where we consumed a | ||
192 | + * denormal input. | ||
125 | + */ | 193 | + */ |
126 | + create_unimplemented_device("pcie-phy", FSL_IMX6_PCIe_ADDR, | 194 | + if (abc_mask & float_cmask_denormal) { |
127 | + FSL_IMX6_PCIe_SIZE); | 195 | + float_raise(float_flag_input_denormal_used, s); |
128 | + | 196 | + } |
129 | /* ROM memory */ | 197 | return a; |
130 | if (!memory_region_init_rom(&s->rom, OBJECT(dev), "imx6.rom", | 198 | |
131 | FSL_IMX6_ROM_SIZE, errp)) { | 199 | return_sub_zero: |
132 | diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig | 200 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, |
133 | index XXXXXXX..XXXXXXX 100644 | 201 | bool sign = a->sign ^ b->sign; |
134 | --- a/hw/arm/Kconfig | 202 | |
135 | +++ b/hw/arm/Kconfig | 203 | if (likely(cmask_is_only_normals(ab_mask))) { |
136 | @@ -XXX,XX +XXX,XX @@ config FSL_IMX31 | 204 | + if (ab_mask & float_cmask_denormal) { |
137 | 205 | + float_raise(float_flag_input_denormal_used, s); | |
138 | config FSL_IMX6 | 206 | + } |
139 | bool | 207 | a->sign = sign; |
140 | + imply PCIE_DEVICES | 208 | a->exp -= b->exp + frac_div(a, b); |
141 | imply I2C_DEVICES | 209 | return a; |
142 | select A9MPCORE | 210 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, |
143 | select IMX | 211 | return parts_pick_nan(a, b, s); |
144 | @@ -XXX,XX +XXX,XX @@ config FSL_IMX6 | 212 | } |
145 | select IMX_USBPHY | 213 | |
146 | select WDT_IMX2 | 214 | + if ((ab_mask & float_cmask_denormal) && b->cls != float_class_zero) { |
147 | select PL310 # cache controller | 215 | + float_raise(float_flag_input_denormal_used, s); |
148 | + select PCI_EXPRESS_DESIGNWARE | 216 | + } |
149 | select SDHCI | 217 | + |
150 | 218 | a->sign = sign; | |
151 | config ASPEED_SOC | 219 | |
220 | /* Inf / X */ | ||
221 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, | ||
222 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
223 | |||
224 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
225 | + if (ab_mask & float_cmask_denormal) { | ||
226 | + float_raise(float_flag_input_denormal_used, s); | ||
227 | + } | ||
228 | frac_modrem(a, b, mod_quot); | ||
229 | return a; | ||
230 | } | ||
231 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, | ||
232 | return a; | ||
233 | } | ||
234 | |||
235 | + if (ab_mask & float_cmask_denormal) { | ||
236 | + float_raise(float_flag_input_denormal_used, s); | ||
237 | + } | ||
238 | + | ||
239 | /* N % Inf; 0 % N */ | ||
240 | g_assert(b->cls == float_class_inf || a->cls == float_class_zero); | ||
241 | return a; | ||
242 | @@ -XXX,XX +XXX,XX @@ static void partsN(sqrt)(FloatPartsN *a, float_status *status, | ||
243 | if (unlikely(a->cls != float_class_normal)) { | ||
244 | switch (a->cls) { | ||
245 | case float_class_denormal: | ||
246 | + if (!a->sign) { | ||
247 | + /* -ve denormal will be InvalidOperation */ | ||
248 | + float_raise(float_flag_input_denormal_used, status); | ||
249 | + } | ||
250 | break; | ||
251 | case float_class_snan: | ||
252 | case float_class_qnan: | ||
253 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
254 | if ((flags & (minmax_isnum | minmax_isnumber)) | ||
255 | && !(ab_mask & float_cmask_snan) | ||
256 | && (ab_mask & ~float_cmask_qnan)) { | ||
257 | + if (ab_mask & float_cmask_denormal) { | ||
258 | + float_raise(float_flag_input_denormal_used, s); | ||
259 | + } | ||
260 | return is_nan(a->cls) ? b : a; | ||
261 | } | ||
262 | |||
263 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
264 | return parts_pick_nan(a, b, s); | ||
265 | } | ||
266 | |||
267 | + if (ab_mask & float_cmask_denormal) { | ||
268 | + float_raise(float_flag_input_denormal_used, s); | ||
269 | + } | ||
270 | + | ||
271 | a_exp = a->exp; | ||
272 | b_exp = b->exp; | ||
273 | |||
274 | @@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, | ||
275 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
276 | FloatRelation cmp; | ||
277 | |||
278 | + if (ab_mask & float_cmask_denormal) { | ||
279 | + float_raise(float_flag_input_denormal_used, s); | ||
280 | + } | ||
281 | + | ||
282 | if (a->sign != b->sign) { | ||
283 | goto a_sign; | ||
284 | } | ||
285 | @@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, | ||
286 | return float_relation_unordered; | ||
287 | } | ||
288 | |||
289 | + if (ab_mask & float_cmask_denormal) { | ||
290 | + float_raise(float_flag_input_denormal_used, s); | ||
291 | + } | ||
292 | + | ||
293 | if (ab_mask & float_cmask_zero) { | ||
294 | if (ab_mask == float_cmask_zero) { | ||
295 | return float_relation_equal; | ||
296 | @@ -XXX,XX +XXX,XX @@ static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) | ||
297 | case float_class_zero: | ||
298 | case float_class_inf: | ||
299 | break; | ||
300 | - case float_class_normal: | ||
301 | case float_class_denormal: | ||
302 | + float_raise(float_flag_input_denormal_used, s); | ||
303 | + /* fall through */ | ||
304 | + case float_class_normal: | ||
305 | a->exp += MIN(MAX(n, -0x10000), 0x10000); | ||
306 | break; | ||
307 | default: | ||
308 | @@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) | ||
309 | if (unlikely(a->cls != float_class_normal)) { | ||
310 | switch (a->cls) { | ||
311 | case float_class_denormal: | ||
312 | + if (!a->sign) { | ||
313 | + /* -ve denormal will be InvalidOperation */ | ||
314 | + float_raise(float_flag_input_denormal_used, s); | ||
315 | + } | ||
316 | break; | ||
317 | case float_class_snan: | ||
318 | case float_class_qnan: | ||
152 | -- | 319 | -- |
153 | 2.34.1 | 320 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | Currently we handle flushing of output denormals in uncanon_normal | |
2 | always before we deal with rounding. This works for architectures | ||
3 | that detect tininess before rounding, but is usually not the right | ||
4 | place when the architecture detects tininess after rounding. For | ||
5 | example, for x86 the SDM states that the MXCSR FTZ control bit causes | ||
6 | outputs to be flushed to zero "when it detects a floating-point | ||
7 | underflow condition". This means that we mustn't flush to zero if | ||
8 | the input is such that after rounding it is no longer tiny. | ||
9 | |||
10 | At least one of our guest architectures does underflow detection | ||
11 | after rounding but flushing of denormals before rounding (MIPS MSA); | ||
12 | this means we need to have a config knob for this that is separate | ||
13 | from our existing tininess_before_rounding setting. | ||
14 | |||
15 | Add an ftz_detection flag. For consistency with | ||
16 | tininess_before_rounding, we make it default to "detect ftz after | ||
17 | rounding"; this means that we need to explicitly set the flag to | ||
18 | "detect ftz before rounding" on every existing architecture that sets | ||
19 | flush_to_zero, so that this commit has no behaviour change. | ||
20 | (This means more code change here but for the long term a less | ||
21 | confusing API.) | ||
22 | |||
23 | For several architectures the current behaviour is either | ||
24 | definitely or possibly wrong; annotate those with TODO comments. | ||
25 | These architectures are definitely wrong (and should detect | ||
26 | ftz after rounding): | ||
27 | * x86 | ||
28 | * Alpha | ||
29 | |||
30 | For these architectures the spec is unclear: | ||
31 | * MIPS (for non-MSA) | ||
32 | * RX | ||
33 | * SH4 | ||
34 | |||
35 | PA-RISC makes ftz detection IMPDEF, but we aren't setting the | ||
36 | "tininess before rounding" setting that we ought to. | ||
37 | |||
38 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
39 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
40 | --- | ||
41 | include/fpu/softfloat-helpers.h | 11 +++++++++++ | ||
42 | include/fpu/softfloat-types.h | 18 ++++++++++++++++++ | ||
43 | target/mips/fpu_helper.h | 6 ++++++ | ||
44 | target/alpha/cpu.c | 7 +++++++ | ||
45 | target/arm/cpu.c | 1 + | ||
46 | target/hppa/fpu_helper.c | 11 +++++++++++ | ||
47 | target/i386/tcg/fpu_helper.c | 8 ++++++++ | ||
48 | target/mips/msa.c | 9 +++++++++ | ||
49 | target/ppc/cpu_init.c | 3 +++ | ||
50 | target/rx/cpu.c | 8 ++++++++ | ||
51 | target/sh4/cpu.c | 8 ++++++++ | ||
52 | target/tricore/helper.c | 1 + | ||
53 | tests/fp/fp-bench.c | 1 + | ||
54 | fpu/softfloat-parts.c.inc | 21 +++++++++++++++------ | ||
55 | 14 files changed, 107 insertions(+), 6 deletions(-) | ||
56 | |||
57 | diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/include/fpu/softfloat-helpers.h | ||
60 | +++ b/include/fpu/softfloat-helpers.h | ||
61 | @@ -XXX,XX +XXX,XX @@ static inline void set_flush_inputs_to_zero(bool val, float_status *status) | ||
62 | status->flush_inputs_to_zero = val; | ||
63 | } | ||
64 | |||
65 | +static inline void set_float_ftz_detection(FloatFTZDetection d, | ||
66 | + float_status *status) | ||
67 | +{ | ||
68 | + status->ftz_detection = d; | ||
69 | +} | ||
70 | + | ||
71 | static inline void set_default_nan_mode(bool val, float_status *status) | ||
72 | { | ||
73 | status->default_nan_mode = val; | ||
74 | @@ -XXX,XX +XXX,XX @@ static inline bool get_default_nan_mode(const float_status *status) | ||
75 | return status->default_nan_mode; | ||
76 | } | ||
77 | |||
78 | +static inline FloatFTZDetection get_float_ftz_detection(const float_status *status) | ||
79 | +{ | ||
80 | + return status->ftz_detection; | ||
81 | +} | ||
82 | + | ||
83 | #endif /* SOFTFLOAT_HELPERS_H */ | ||
84 | diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h | ||
85 | index XXXXXXX..XXXXXXX 100644 | ||
86 | --- a/include/fpu/softfloat-types.h | ||
87 | +++ b/include/fpu/softfloat-types.h | ||
88 | @@ -XXX,XX +XXX,XX @@ typedef enum __attribute__((__packed__)) { | ||
89 | float_infzeronan_suppress_invalid = (1 << 7), | ||
90 | } FloatInfZeroNaNRule; | ||
91 | |||
92 | +/* | ||
93 | + * When flush_to_zero is set, should we detect denormal results to | ||
94 | + * be flushed before or after rounding? For most architectures this | ||
95 | + * should be set to match the tininess_before_rounding setting, | ||
96 | + * but a few architectures, e.g. MIPS MSA, detect FTZ before | ||
97 | + * rounding but tininess after rounding. | ||
98 | + * | ||
99 | + * This enum is arranged so that the default if the target doesn't | ||
100 | + * configure it matches the default for tininess_before_rounding | ||
101 | + * (i.e. "after rounding"). | ||
102 | + */ | ||
103 | +typedef enum __attribute__((__packed__)) { | ||
104 | + float_ftz_after_rounding = 0, | ||
105 | + float_ftz_before_rounding = 1, | ||
106 | +} FloatFTZDetection; | ||
107 | + | ||
108 | /* | ||
109 | * Floating Point Status. Individual architectures may maintain | ||
110 | * several versions of float_status for different functions. The | ||
111 | @@ -XXX,XX +XXX,XX @@ typedef struct float_status { | ||
112 | bool tininess_before_rounding; | ||
113 | /* should denormalised results go to zero and set output_denormal_flushed? */ | ||
114 | bool flush_to_zero; | ||
115 | + /* do we detect and flush denormal results before or after rounding? */ | ||
116 | + FloatFTZDetection ftz_detection; | ||
117 | /* should denormalised inputs go to zero and set input_denormal_flushed? */ | ||
118 | bool flush_inputs_to_zero; | ||
119 | bool default_nan_mode; | ||
120 | diff --git a/target/mips/fpu_helper.h b/target/mips/fpu_helper.h | ||
121 | index XXXXXXX..XXXXXXX 100644 | ||
122 | --- a/target/mips/fpu_helper.h | ||
123 | +++ b/target/mips/fpu_helper.h | ||
124 | @@ -XXX,XX +XXX,XX @@ static inline void fp_reset(CPUMIPSState *env) | ||
125 | */ | ||
126 | set_float_2nan_prop_rule(float_2nan_prop_s_ab, | ||
127 | &env->active_fpu.fp_status); | ||
128 | + /* | ||
129 | + * TODO: the spec does't say clearly whether FTZ happens before | ||
130 | + * or after rounding for normal FPU operations. | ||
131 | + */ | ||
132 | + set_float_ftz_detection(float_ftz_before_rounding, | ||
133 | + &env->active_fpu.fp_status); | ||
134 | } | ||
135 | |||
136 | /* MSA */ | ||
137 | diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c | ||
138 | index XXXXXXX..XXXXXXX 100644 | ||
139 | --- a/target/alpha/cpu.c | ||
140 | +++ b/target/alpha/cpu.c | ||
141 | @@ -XXX,XX +XXX,XX @@ static void alpha_cpu_initfn(Object *obj) | ||
142 | set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); | ||
143 | /* Default NaN: sign bit clear, msb frac bit set */ | ||
144 | set_float_default_nan_pattern(0b01000000, &env->fp_status); | ||
145 | + /* | ||
146 | + * TODO: this is incorrect. The Alpha Architecture Handbook version 4 | ||
147 | + * section 4.7.7.11 says that we flush to zero for underflow cases, so | ||
148 | + * this should be float_ftz_after_rounding to match the | ||
149 | + * tininess_after_rounding (which is specified in section 4.7.5). | ||
150 | + */ | ||
151 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
152 | #if defined(CONFIG_USER_ONLY) | ||
153 | env->flags = ENV_FLAG_PS_USER | ENV_FLAG_FEN; | ||
154 | cpu_alpha_store_fpcr(env, (uint64_t)(FPCR_INVD | FPCR_DZED | FPCR_OVFD | ||
155 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
156 | index XXXXXXX..XXXXXXX 100644 | ||
157 | --- a/target/arm/cpu.c | ||
158 | +++ b/target/arm/cpu.c | ||
159 | @@ -XXX,XX +XXX,XX @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, | ||
160 | static void arm_set_default_fp_behaviours(float_status *s) | ||
161 | { | ||
162 | set_float_detect_tininess(float_tininess_before_rounding, s); | ||
163 | + set_float_ftz_detection(float_ftz_before_rounding, s); | ||
164 | set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); | ||
165 | set_float_3nan_prop_rule(float_3nan_prop_s_cab, s); | ||
166 | set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s); | ||
167 | diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c | ||
168 | index XXXXXXX..XXXXXXX 100644 | ||
169 | --- a/target/hppa/fpu_helper.c | ||
170 | +++ b/target/hppa/fpu_helper.c | ||
171 | @@ -XXX,XX +XXX,XX @@ void HELPER(loaded_fr0)(CPUHPPAState *env) | ||
172 | set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status); | ||
173 | /* Default NaN: sign bit clear, msb-1 frac bit set */ | ||
174 | set_float_default_nan_pattern(0b00100000, &env->fp_status); | ||
175 | + /* | ||
176 | + * "PA-RISC 2.0 Architecture" says it is IMPDEF whether the flushing | ||
177 | + * enabled by FPSR.D happens before or after rounding. We pick "before" | ||
178 | + * for consistency with tininess detection. | ||
179 | + */ | ||
180 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
181 | + /* | ||
182 | + * TODO: "PA-RISC 2.0 Architecture" chapter 10 says that we should | ||
183 | + * detect tininess before rounding, but we don't set that here so we | ||
184 | + * get the default tininess after rounding. | ||
185 | + */ | ||
186 | } | ||
187 | |||
188 | void cpu_hppa_loaded_fr0(CPUHPPAState *env) | ||
189 | diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c | ||
190 | index XXXXXXX..XXXXXXX 100644 | ||
191 | --- a/target/i386/tcg/fpu_helper.c | ||
192 | +++ b/target/i386/tcg/fpu_helper.c | ||
193 | @@ -XXX,XX +XXX,XX @@ void cpu_init_fp_statuses(CPUX86State *env) | ||
194 | set_float_default_nan_pattern(0b11000000, &env->fp_status); | ||
195 | set_float_default_nan_pattern(0b11000000, &env->mmx_status); | ||
196 | set_float_default_nan_pattern(0b11000000, &env->sse_status); | ||
197 | + /* | ||
198 | + * TODO: x86 does flush-to-zero detection after rounding (the SDM | ||
199 | + * section 10.2.3.3 on the FTZ bit of MXCSR says that we flush | ||
200 | + * when we detect underflow, which x86 does after rounding). | ||
201 | + */ | ||
202 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
203 | + set_float_ftz_detection(float_ftz_before_rounding, &env->mmx_status); | ||
204 | + set_float_ftz_detection(float_ftz_before_rounding, &env->sse_status); | ||
205 | } | ||
206 | |||
207 | static inline uint8_t save_exception_flags(CPUX86State *env) | ||
208 | diff --git a/target/mips/msa.c b/target/mips/msa.c | ||
209 | index XXXXXXX..XXXXXXX 100644 | ||
210 | --- a/target/mips/msa.c | ||
211 | +++ b/target/mips/msa.c | ||
212 | @@ -XXX,XX +XXX,XX @@ void msa_reset(CPUMIPSState *env) | ||
213 | /* tininess detected after rounding.*/ | ||
214 | set_float_detect_tininess(float_tininess_after_rounding, | ||
215 | &env->active_tc.msa_fp_status); | ||
216 | + /* | ||
217 | + * MSACSR.FS detects tiny results to flush to zero before rounding | ||
218 | + * (per "MIPS Architecture for Programmers Volume IV-j: The MIPS64 SIMD | ||
219 | + * Architecture Module, Revision 1.1" section 3.5.4), even though it | ||
220 | + * detects tininess after rounding for underflow purposes (section 3.4.2 | ||
221 | + * table 3.3). | ||
222 | + */ | ||
223 | + set_float_ftz_detection(float_ftz_before_rounding, | ||
224 | + &env->active_tc.msa_fp_status); | ||
225 | |||
226 | /* | ||
227 | * According to MIPS specifications, if one of the two operands is | ||
228 | diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c | ||
229 | index XXXXXXX..XXXXXXX 100644 | ||
230 | --- a/target/ppc/cpu_init.c | ||
231 | +++ b/target/ppc/cpu_init.c | ||
232 | @@ -XXX,XX +XXX,XX @@ static void ppc_cpu_reset_hold(Object *obj, ResetType type) | ||
233 | /* tininess for underflow is detected before rounding */ | ||
234 | set_float_detect_tininess(float_tininess_before_rounding, | ||
235 | &env->fp_status); | ||
236 | + /* Similarly for flush-to-zero */ | ||
237 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
238 | + | ||
239 | /* | ||
240 | * PowerPC propagation rules: | ||
241 | * 1. A if it sNaN or qNaN | ||
242 | diff --git a/target/rx/cpu.c b/target/rx/cpu.c | ||
243 | index XXXXXXX..XXXXXXX 100644 | ||
244 | --- a/target/rx/cpu.c | ||
245 | +++ b/target/rx/cpu.c | ||
246 | @@ -XXX,XX +XXX,XX @@ static void rx_cpu_reset_hold(Object *obj, ResetType type) | ||
247 | set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); | ||
248 | /* Default NaN value: sign bit clear, set frac msb */ | ||
249 | set_float_default_nan_pattern(0b01000000, &env->fp_status); | ||
250 | + /* | ||
251 | + * TODO: "RX Family RXv1 Instruction Set Architecture" is not 100% clear | ||
252 | + * on whether flush-to-zero should happen before or after rounding, but | ||
253 | + * section 1.3.2 says that it happens when underflow is detected, and | ||
254 | + * implies that underflow is detected after rounding. So this may not | ||
255 | + * be the correct setting. | ||
256 | + */ | ||
257 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
258 | } | ||
259 | |||
260 | static ObjectClass *rx_cpu_class_by_name(const char *cpu_model) | ||
261 | diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c | ||
262 | index XXXXXXX..XXXXXXX 100644 | ||
263 | --- a/target/sh4/cpu.c | ||
264 | +++ b/target/sh4/cpu.c | ||
265 | @@ -XXX,XX +XXX,XX @@ static void superh_cpu_reset_hold(Object *obj, ResetType type) | ||
266 | set_default_nan_mode(1, &env->fp_status); | ||
267 | /* sign bit clear, set all frac bits other than msb */ | ||
268 | set_float_default_nan_pattern(0b00111111, &env->fp_status); | ||
269 | + /* | ||
270 | + * TODO: "SH-4 CPU Core Architecture ADCS 7182230F" doesn't say whether | ||
271 | + * it detects tininess before or after rounding. Section 6.4 is clear | ||
272 | + * that flush-to-zero happens when the result underflows, though, so | ||
273 | + * either this should be "detect ftz after rounding" or else we should | ||
274 | + * be setting "detect tininess before rounding". | ||
275 | + */ | ||
276 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
277 | } | ||
278 | |||
279 | static void superh_cpu_disas_set_info(CPUState *cpu, disassemble_info *info) | ||
280 | diff --git a/target/tricore/helper.c b/target/tricore/helper.c | ||
281 | index XXXXXXX..XXXXXXX 100644 | ||
282 | --- a/target/tricore/helper.c | ||
283 | +++ b/target/tricore/helper.c | ||
284 | @@ -XXX,XX +XXX,XX @@ void fpu_set_state(CPUTriCoreState *env) | ||
285 | set_flush_inputs_to_zero(1, &env->fp_status); | ||
286 | set_flush_to_zero(1, &env->fp_status); | ||
287 | set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status); | ||
288 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
289 | set_default_nan_mode(1, &env->fp_status); | ||
290 | /* Default NaN pattern: sign bit clear, frac msb set */ | ||
291 | set_float_default_nan_pattern(0b01000000, &env->fp_status); | ||
292 | diff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c | ||
293 | index XXXXXXX..XXXXXXX 100644 | ||
294 | --- a/tests/fp/fp-bench.c | ||
295 | +++ b/tests/fp/fp-bench.c | ||
296 | @@ -XXX,XX +XXX,XX @@ static void run_bench(void) | ||
297 | set_float_3nan_prop_rule(float_3nan_prop_s_cab, &soft_status); | ||
298 | set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, &soft_status); | ||
299 | set_float_default_nan_pattern(0b01000000, &soft_status); | ||
300 | + set_float_ftz_detection(float_ftz_before_rounding, &soft_status); | ||
301 | |||
302 | f = bench_funcs[operation][precision]; | ||
303 | g_assert(f); | ||
304 | diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc | ||
305 | index XXXXXXX..XXXXXXX 100644 | ||
306 | --- a/fpu/softfloat-parts.c.inc | ||
307 | +++ b/fpu/softfloat-parts.c.inc | ||
308 | @@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, | ||
309 | p->frac_lo &= ~round_mask; | ||
310 | } | ||
311 | frac_shr(p, frac_shift); | ||
312 | - } else if (s->flush_to_zero) { | ||
313 | + } else if (s->flush_to_zero && | ||
314 | + s->ftz_detection == float_ftz_before_rounding) { | ||
315 | flags |= float_flag_output_denormal_flushed; | ||
316 | p->cls = float_class_zero; | ||
317 | exp = 0; | ||
318 | @@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, | ||
319 | exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal; | ||
320 | frac_shr(p, frac_shift); | ||
321 | |||
322 | - if (is_tiny && (flags & float_flag_inexact)) { | ||
323 | - flags |= float_flag_underflow; | ||
324 | - } | ||
325 | - if (exp == 0 && frac_eqz(p)) { | ||
326 | - p->cls = float_class_zero; | ||
327 | + if (is_tiny) { | ||
328 | + if (s->flush_to_zero) { | ||
329 | + assert(s->ftz_detection == float_ftz_after_rounding); | ||
330 | + flags |= float_flag_output_denormal_flushed; | ||
331 | + p->cls = float_class_zero; | ||
332 | + exp = 0; | ||
333 | + frac_clear(p); | ||
334 | + } else if (flags & float_flag_inexact) { | ||
335 | + flags |= float_flag_underflow; | ||
336 | + } | ||
337 | + if (exp == 0 && frac_eqz(p)) { | ||
338 | + p->cls = float_class_zero; | ||
339 | + } | ||
340 | } | ||
341 | } | ||
342 | p->exp = exp; | ||
343 | -- | ||
344 | 2.34.1 | diff view generated by jsdifflib |
1 | A typo in the implementation of isar_feature_aa64_tidcp1() means we | 1 | The Armv8.7 FEAT_AFP feature defines three new control bits in |
---|---|---|---|
2 | were checking the field in the wrong ID register, so we might have | 2 | the FPCR: |
3 | provided the feature on CPUs that don't have it and not provided | 3 | * FPCR.AH: "alternate floating point mode"; this changes floating |
4 | it on CPUs that should have it. Correct this bug. | 4 | point behaviour in a variety of ways, including: |
5 | - the sign of a default NaN is 1, not 0 | ||
6 | - if FPCR.FZ is also 1, denormals detected after rounding | ||
7 | with an unbounded exponent has been applied are flushed to zero | ||
8 | - FPCR.FZ does not cause denormalized inputs to be flushed to zero | ||
9 | - miscellaneous other corner-case behaviour changes | ||
10 | * FPCR.FIZ: flush denormalized numbers to zero on input for | ||
11 | most instructions | ||
12 | * FPCR.NEP: makes scalar SIMD operations merge the result with | ||
13 | higher vector elements in one of the source registers, instead | ||
14 | of zeroing the higher elements of the destination | ||
5 | 15 | ||
6 | Cc: qemu-stable@nongnu.org | 16 | This commit defines the new bits in the FPCR, and allows them to be |
7 | Fixes: 9cd0c0dec97be9 "target/arm: Implement FEAT_TIDCP1" | 17 | read or written when FEAT_AFP is implemented. Actual behaviour |
8 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2120 | 18 | changes will be implemented in subsequent commits. |
19 | |||
20 | Note that these are the first FPCR bits which don't appear in the | ||
21 | AArch32 FPSCR view of the register, and which share bit positions | ||
22 | with FPSR bits. | ||
23 | |||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 24 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 25 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
11 | Message-id: 20240123160333.958841-1-peter.maydell@linaro.org | ||
12 | --- | 26 | --- |
13 | target/arm/cpu-features.h | 2 +- | 27 | target/arm/cpu-features.h | 5 +++++ |
14 | 1 file changed, 1 insertion(+), 1 deletion(-) | 28 | target/arm/cpu.h | 3 +++ |
29 | target/arm/vfp_helper.c | 11 ++++++++--- | ||
30 | 3 files changed, 16 insertions(+), 3 deletions(-) | ||
15 | 31 | ||
16 | diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h | 32 | diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h |
17 | index XXXXXXX..XXXXXXX 100644 | 33 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/target/arm/cpu-features.h | 34 | --- a/target/arm/cpu-features.h |
19 | +++ b/target/arm/cpu-features.h | 35 | +++ b/target/arm/cpu-features.h |
20 | @@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_hcx(const ARMISARegisters *id) | 36 | @@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_hcx(const ARMISARegisters *id) |
21 | 37 | return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HCX) != 0; | |
38 | } | ||
39 | |||
40 | +static inline bool isar_feature_aa64_afp(const ARMISARegisters *id) | ||
41 | +{ | ||
42 | + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, AFP) != 0; | ||
43 | +} | ||
44 | + | ||
22 | static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id) | 45 | static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id) |
23 | { | 46 | { |
24 | - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR1, TIDCP1) != 0; | 47 | return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0; |
25 | + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0; | 48 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/arm/cpu.h | ||
51 | +++ b/target/arm/cpu.h | ||
52 | @@ -XXX,XX +XXX,XX @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val); | ||
53 | */ | ||
54 | |||
55 | /* FPCR bits */ | ||
56 | +#define FPCR_FIZ (1 << 0) /* Flush Inputs to Zero (FEAT_AFP) */ | ||
57 | +#define FPCR_AH (1 << 1) /* Alternate Handling (FEAT_AFP) */ | ||
58 | +#define FPCR_NEP (1 << 2) /* SIMD scalar ops preserve elts (FEAT_AFP) */ | ||
59 | #define FPCR_IOE (1 << 8) /* Invalid Operation exception trap enable */ | ||
60 | #define FPCR_DZE (1 << 9) /* Divide by Zero exception trap enable */ | ||
61 | #define FPCR_OFE (1 << 10) /* Overflow exception trap enable */ | ||
62 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/arm/vfp_helper.c | ||
65 | +++ b/target/arm/vfp_helper.c | ||
66 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask) | ||
67 | if (!cpu_isar_feature(any_fp16, cpu)) { | ||
68 | val &= ~FPCR_FZ16; | ||
69 | } | ||
70 | + if (!cpu_isar_feature(aa64_afp, cpu)) { | ||
71 | + val &= ~(FPCR_FIZ | FPCR_AH | FPCR_NEP); | ||
72 | + } | ||
73 | |||
74 | if (!cpu_isar_feature(aa64_ebf16, cpu)) { | ||
75 | val &= ~FPCR_EBF; | ||
76 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask) | ||
77 | * We don't implement trapped exception handling, so the | ||
78 | * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!) | ||
79 | * | ||
80 | - * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF | ||
81 | - * and FZ16. Len, Stride and LTPSIZE we just handled. Store those bits | ||
82 | + * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF, FZ16, | ||
83 | + * FIZ, AH, and NEP. | ||
84 | + * Len, Stride and LTPSIZE we just handled. Store those bits | ||
85 | * there, and zero any of the other FPCR bits and the RES0 and RAZ/WI | ||
86 | * bits. | ||
87 | */ | ||
88 | - val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 | FPCR_EBF; | ||
89 | + val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 | | ||
90 | + FPCR_EBF | FPCR_FIZ | FPCR_AH | FPCR_NEP; | ||
91 | env->vfp.fpcr &= ~mask; | ||
92 | env->vfp.fpcr |= val; | ||
26 | } | 93 | } |
27 | |||
28 | static inline bool isar_feature_aa64_hafs(const ARMISARegisters *id) | ||
29 | -- | 94 | -- |
30 | 2.34.1 | 95 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Part of FEAT_AFP is the new control bit FPCR.FIZ. This bit affects | ||
2 | flushing of single and double precision denormal inputs to zero for | ||
3 | AArch64 floating point instructions. (For half-precision, the | ||
4 | existing FPCR.FZ16 control remains the only one.) | ||
1 | 5 | ||
6 | FPCR.FIZ differs from FPCR.FZ in that if we flush an input denormal | ||
7 | only because of FPCR.FIZ then we should *not* set the cumulative | ||
8 | exception bit FPSR.IDC. | ||
9 | |||
10 | FEAT_AFP also defines that in AArch64 the existing FPCR.FZ only | ||
11 | applies when FPCR.AH is 0. | ||
12 | |||
13 | We can implement this by setting the "flush inputs to zero" state | ||
14 | appropriately when FPCR is written, and by not reflecting the | ||
15 | float_flag_input_denormal status flag into FPSR reads when it is the | ||
16 | result only of FPSR.FIZ. | ||
17 | |||
18 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
19 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
20 | --- | ||
21 | target/arm/vfp_helper.c | 60 ++++++++++++++++++++++++++++++++++------- | ||
22 | 1 file changed, 50 insertions(+), 10 deletions(-) | ||
23 | |||
24 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/target/arm/vfp_helper.c | ||
27 | +++ b/target/arm/vfp_helper.c | ||
28 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits) | ||
29 | |||
30 | static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
31 | { | ||
32 | - uint32_t i = 0; | ||
33 | + uint32_t a32_flags = 0, a64_flags = 0; | ||
34 | |||
35 | - i |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
36 | - i |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
37 | - i |= get_float_exception_flags(&env->vfp.standard_fp_status); | ||
38 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
39 | + a32_flags |= get_float_exception_flags(&env->vfp.standard_fp_status); | ||
40 | /* FZ16 does not generate an input denormal exception. */ | ||
41 | - i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
42 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
43 | & ~float_flag_input_denormal_flushed); | ||
44 | - i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
45 | + a32_flags |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) | ||
46 | & ~float_flag_input_denormal_flushed); | ||
47 | - i |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) | ||
48 | + | ||
49 | + a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
50 | + a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
51 | & ~float_flag_input_denormal_flushed); | ||
52 | - return vfp_exceptbits_from_host(i); | ||
53 | + /* | ||
54 | + * Flushing an input denormal *only* because FPCR.FIZ == 1 does | ||
55 | + * not set FPSR.IDC; if FPCR.FZ is also set then this takes | ||
56 | + * precedence and IDC is set (see the FPUnpackBase pseudocode). | ||
57 | + * So squash it unless (FPCR.AH == 0 && FPCR.FZ == 1). | ||
58 | + * We only do this for the a64 flags because FIZ has no effect | ||
59 | + * on AArch32 even if it is set. | ||
60 | + */ | ||
61 | + if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) { | ||
62 | + a64_flags &= ~float_flag_input_denormal_flushed; | ||
63 | + } | ||
64 | + return vfp_exceptbits_from_host(a32_flags | a64_flags); | ||
65 | } | ||
66 | |||
67 | static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
68 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
69 | set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); | ||
70 | } | ||
71 | |||
72 | +static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) | ||
73 | +{ | ||
74 | + /* | ||
75 | + * Synchronize any pending exception-flag information in the | ||
76 | + * float_status values into env->vfp.fpsr, and then clear out | ||
77 | + * the float_status data. | ||
78 | + */ | ||
79 | + env->vfp.fpsr |= vfp_get_fpsr_from_host(env); | ||
80 | + vfp_clear_float_status_exc_flags(env); | ||
81 | +} | ||
82 | + | ||
83 | static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
84 | { | ||
85 | uint64_t changed = env->vfp.fpcr; | ||
86 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
87 | if (changed & FPCR_FZ) { | ||
88 | bool ftz_enabled = val & FPCR_FZ; | ||
89 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
90 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
91 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64); | ||
92 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a64); | ||
93 | + /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ | ||
94 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
95 | + } | ||
96 | + if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { | ||
97 | + /* | ||
98 | + * A64: Flush denormalized inputs to zero if FPCR.FIZ = 1, or | ||
99 | + * both FPCR.AH = 0 and FPCR.FZ = 1. | ||
100 | + */ | ||
101 | + bool fitz_enabled = (val & FPCR_FIZ) || | ||
102 | + (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ; | ||
103 | + set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status_a64); | ||
104 | } | ||
105 | if (changed & FPCR_DN) { | ||
106 | bool dnan_enabled = val & FPCR_DN; | ||
107 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
108 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
109 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
110 | } | ||
111 | + /* | ||
112 | + * If any bits changed that we look at in vfp_get_fpsr_from_host(), | ||
113 | + * we must sync the float_status flags into vfp.fpsr now (under the | ||
114 | + * old regime) before we update vfp.fpcr. | ||
115 | + */ | ||
116 | + if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { | ||
117 | + vfp_sync_and_clear_float_status_exc_flags(env); | ||
118 | + } | ||
119 | } | ||
120 | |||
121 | #else | ||
122 | -- | ||
123 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | When FPCR.AH is set, various behaviours of AArch64 floating point | ||
2 | operations which are controlled by softfloat config settings change: | ||
3 | * tininess and ftz detection before/after rounding | ||
4 | * NaN propagation order | ||
5 | * result of 0 * Inf + NaN | ||
6 | * default NaN value | ||
1 | 7 | ||
8 | When the guest changes the value of the AH bit, switch these config | ||
9 | settings on the fp_status_a64 and fp_status_f16_a64 float_status | ||
10 | fields. | ||
11 | |||
12 | This requires us to make the arm_set_default_fp_behaviours() function | ||
13 | global, since we now need to call it from cpu.c and vfp_helper.c; we | ||
14 | move it to vfp_helper.c so it can be next to the new | ||
15 | arm_set_ah_fp_behaviours(). | ||
16 | |||
17 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
18 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
19 | --- | ||
20 | target/arm/internals.h | 4 +++ | ||
21 | target/arm/cpu.c | 23 ---------------- | ||
22 | target/arm/vfp_helper.c | 58 ++++++++++++++++++++++++++++++++++++++++- | ||
23 | 3 files changed, 61 insertions(+), 24 deletions(-) | ||
24 | |||
25 | diff --git a/target/arm/internals.h b/target/arm/internals.h | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/target/arm/internals.h | ||
28 | +++ b/target/arm/internals.h | ||
29 | @@ -XXX,XX +XXX,XX @@ uint64_t gt_virt_cnt_offset(CPUARMState *env); | ||
30 | * all EL1" scope; this covers stage 1 and stage 2. | ||
31 | */ | ||
32 | int alle1_tlbmask(CPUARMState *env); | ||
33 | + | ||
34 | +/* Set the float_status behaviour to match the Arm defaults */ | ||
35 | +void arm_set_default_fp_behaviours(float_status *s); | ||
36 | + | ||
37 | #endif | ||
38 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/target/arm/cpu.c | ||
41 | +++ b/target/arm/cpu.c | ||
42 | @@ -XXX,XX +XXX,XX @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, | ||
43 | QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node); | ||
44 | } | ||
45 | |||
46 | -/* | ||
47 | - * Set the float_status behaviour to match the Arm defaults: | ||
48 | - * * tininess-before-rounding | ||
49 | - * * 2-input NaN propagation prefers SNaN over QNaN, and then | ||
50 | - * operand A over operand B (see FPProcessNaNs() pseudocode) | ||
51 | - * * 3-input NaN propagation prefers SNaN over QNaN, and then | ||
52 | - * operand C over A over B (see FPProcessNaNs3() pseudocode, | ||
53 | - * but note that for QEMU muladd is a * b + c, whereas for | ||
54 | - * the pseudocode function the arguments are in the order c, a, b. | ||
55 | - * * 0 * Inf + NaN returns the default NaN if the input NaN is quiet, | ||
56 | - * and the input NaN if it is signalling | ||
57 | - * * Default NaN has sign bit clear, msb frac bit set | ||
58 | - */ | ||
59 | -static void arm_set_default_fp_behaviours(float_status *s) | ||
60 | -{ | ||
61 | - set_float_detect_tininess(float_tininess_before_rounding, s); | ||
62 | - set_float_ftz_detection(float_ftz_before_rounding, s); | ||
63 | - set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); | ||
64 | - set_float_3nan_prop_rule(float_3nan_prop_s_cab, s); | ||
65 | - set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s); | ||
66 | - set_float_default_nan_pattern(0b01000000, s); | ||
67 | -} | ||
68 | - | ||
69 | static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque) | ||
70 | { | ||
71 | /* Reset a single ARMCPRegInfo register */ | ||
72 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
73 | index XXXXXXX..XXXXXXX 100644 | ||
74 | --- a/target/arm/vfp_helper.c | ||
75 | +++ b/target/arm/vfp_helper.c | ||
76 | @@ -XXX,XX +XXX,XX @@ | ||
77 | #include "exec/helper-proto.h" | ||
78 | #include "internals.h" | ||
79 | #include "cpu-features.h" | ||
80 | +#include "fpu/softfloat.h" | ||
81 | #ifdef CONFIG_TCG | ||
82 | #include "qemu/log.h" | ||
83 | -#include "fpu/softfloat.h" | ||
84 | #endif | ||
85 | |||
86 | /* VFP support. We follow the convention used for VFP instructions: | ||
87 | Single precision routines have a "s" suffix, double precision a | ||
88 | "d" suffix. */ | ||
89 | |||
90 | +/* | ||
91 | + * Set the float_status behaviour to match the Arm defaults: | ||
92 | + * * tininess-before-rounding | ||
93 | + * * 2-input NaN propagation prefers SNaN over QNaN, and then | ||
94 | + * operand A over operand B (see FPProcessNaNs() pseudocode) | ||
95 | + * * 3-input NaN propagation prefers SNaN over QNaN, and then | ||
96 | + * operand C over A over B (see FPProcessNaNs3() pseudocode, | ||
97 | + * but note that for QEMU muladd is a * b + c, whereas for | ||
98 | + * the pseudocode function the arguments are in the order c, a, b. | ||
99 | + * * 0 * Inf + NaN returns the default NaN if the input NaN is quiet, | ||
100 | + * and the input NaN if it is signalling | ||
101 | + * * Default NaN has sign bit clear, msb frac bit set | ||
102 | + */ | ||
103 | +void arm_set_default_fp_behaviours(float_status *s) | ||
104 | +{ | ||
105 | + set_float_detect_tininess(float_tininess_before_rounding, s); | ||
106 | + set_float_ftz_detection(float_ftz_before_rounding, s); | ||
107 | + set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); | ||
108 | + set_float_3nan_prop_rule(float_3nan_prop_s_cab, s); | ||
109 | + set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s); | ||
110 | + set_float_default_nan_pattern(0b01000000, s); | ||
111 | +} | ||
112 | + | ||
113 | +/* | ||
114 | + * Set the float_status behaviour to match the FEAT_AFP | ||
115 | + * FPCR.AH=1 requirements: | ||
116 | + * * tininess-after-rounding | ||
117 | + * * 2-input NaN propagation prefers the first NaN | ||
118 | + * * 3-input NaN propagation prefers a over b over c | ||
119 | + * * 0 * Inf + NaN always returns the input NaN and doesn't | ||
120 | + * set Invalid for a QNaN | ||
121 | + * * default NaN has sign bit set, msb frac bit set | ||
122 | + */ | ||
123 | +static void arm_set_ah_fp_behaviours(float_status *s) | ||
124 | +{ | ||
125 | + set_float_detect_tininess(float_tininess_after_rounding, s); | ||
126 | + set_float_ftz_detection(float_ftz_after_rounding, s); | ||
127 | + set_float_2nan_prop_rule(float_2nan_prop_ab, s); | ||
128 | + set_float_3nan_prop_rule(float_3nan_prop_abc, s); | ||
129 | + set_float_infzeronan_rule(float_infzeronan_dnan_never | | ||
130 | + float_infzeronan_suppress_invalid, s); | ||
131 | + set_float_default_nan_pattern(0b11000000, s); | ||
132 | +} | ||
133 | + | ||
134 | #ifdef CONFIG_TCG | ||
135 | |||
136 | /* Convert host exception flags to vfp form. */ | ||
137 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
138 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
139 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
140 | } | ||
141 | + if (changed & FPCR_AH) { | ||
142 | + bool ah_enabled = val & FPCR_AH; | ||
143 | + | ||
144 | + if (ah_enabled) { | ||
145 | + /* Change behaviours for A64 FP operations */ | ||
146 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); | ||
147 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
148 | + } else { | ||
149 | + arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
150 | + arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
151 | + } | ||
152 | + } | ||
153 | /* | ||
154 | * If any bits changed that we look at in vfp_get_fpsr_from_host(), | ||
155 | * we must sync the float_status flags into vfp.fpsr now (under the | ||
156 | -- | ||
157 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | When FPCR.AH = 1, some of the cumulative exception flags in the FPSR | ||
2 | behave slightly differently for A64 operations: | ||
3 | * IDC is set when a denormal input is used without flushing | ||
4 | * IXC (Inexact) is set when an output denormal is flushed to zero | ||
1 | 5 | ||
6 | Update vfp_get_fpsr_from_host() to do this. | ||
7 | |||
8 | Note that because half-precision operations never set IDC, we now | ||
9 | need to add float_flag_input_denormal_used to the set we mask out of | ||
10 | fp_status_f16_a64. | ||
11 | |||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | --- | ||
15 | target/arm/vfp_helper.c | 17 ++++++++++++++--- | ||
16 | 1 file changed, 14 insertions(+), 3 deletions(-) | ||
17 | |||
18 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/arm/vfp_helper.c | ||
21 | +++ b/target/arm/vfp_helper.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static void arm_set_ah_fp_behaviours(float_status *s) | ||
23 | #ifdef CONFIG_TCG | ||
24 | |||
25 | /* Convert host exception flags to vfp form. */ | ||
26 | -static inline uint32_t vfp_exceptbits_from_host(int host_bits) | ||
27 | +static inline uint32_t vfp_exceptbits_from_host(int host_bits, bool ah) | ||
28 | { | ||
29 | uint32_t target_bits = 0; | ||
30 | |||
31 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits) | ||
32 | if (host_bits & float_flag_input_denormal_flushed) { | ||
33 | target_bits |= FPSR_IDC; | ||
34 | } | ||
35 | + /* | ||
36 | + * With FPCR.AH, IDC is set when an input denormal is used, | ||
37 | + * and flushing an output denormal to zero sets both IXC and UFC. | ||
38 | + */ | ||
39 | + if (ah && (host_bits & float_flag_input_denormal_used)) { | ||
40 | + target_bits |= FPSR_IDC; | ||
41 | + } | ||
42 | + if (ah && (host_bits & float_flag_output_denormal_flushed)) { | ||
43 | + target_bits |= FPSR_IXC; | ||
44 | + } | ||
45 | return target_bits; | ||
46 | } | ||
47 | |||
48 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
49 | |||
50 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
51 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
52 | - & ~float_flag_input_denormal_flushed); | ||
53 | + & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
54 | /* | ||
55 | * Flushing an input denormal *only* because FPCR.FIZ == 1 does | ||
56 | * not set FPSR.IDC; if FPCR.FZ is also set then this takes | ||
57 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
58 | if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) { | ||
59 | a64_flags &= ~float_flag_input_denormal_flushed; | ||
60 | } | ||
61 | - return vfp_exceptbits_from_host(a32_flags | a64_flags); | ||
62 | + return vfp_exceptbits_from_host(a64_flags, env->vfp.fpcr & FPCR_AH) | | ||
63 | + vfp_exceptbits_from_host(a32_flags, false); | ||
64 | } | ||
65 | |||
66 | static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
67 | -- | ||
68 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | 1 | We are going to need to generate different code in some cases when |
---|---|---|---|
2 | FPCR.AH is 1. For example: | ||
3 | * Floating point neg and abs must not flip the sign bit of NaNs | ||
4 | * some insns (FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS, and various | ||
5 | BFCVT and BFM bfloat16 ops) need to use a different float_status | ||
6 | to the usual one | ||
2 | 7 | ||
3 | e2h_access() was added in commit bb5972e439 ("target/arm: | 8 | Encode FPCR.AH into the A64 tbflags, so we can refer to it at |
4 | Add VHE timer register redirection and aliasing") close to | 9 | translate time. |
5 | the generic_timer_cp_reginfo[] array, but isn't used until | ||
6 | vhe_reginfo[] definition. Move it closer to the other e2h | ||
7 | helpers. | ||
8 | 10 | ||
9 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 11 | Because we now have a bit in FPCR that affects codegen, we can't mark |
12 | the AArch64 FPCR register as being SUPPRESS_TB_END any more; writes | ||
13 | to it will now end the TB and trigger a regeneration of hflags. | ||
14 | |||
15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 16 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
11 | Message-id: 20240118200643.29037-19-philmd@linaro.org | ||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | --- | 17 | --- |
14 | target/arm/helper.c | 29 +++++++++++++++-------------- | 18 | target/arm/cpu.h | 1 + |
15 | 1 file changed, 15 insertions(+), 14 deletions(-) | 19 | target/arm/tcg/translate.h | 2 ++ |
20 | target/arm/helper.c | 2 +- | ||
21 | target/arm/tcg/hflags.c | 4 ++++ | ||
22 | target/arm/tcg/translate-a64.c | 1 + | ||
23 | 5 files changed, 9 insertions(+), 1 deletion(-) | ||
16 | 24 | ||
25 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/target/arm/cpu.h | ||
28 | +++ b/target/arm/cpu.h | ||
29 | @@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, NV2, 34, 1) | ||
30 | FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1) | ||
31 | /* Set if FEAT_NV2 RAM accesses are big-endian */ | ||
32 | FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1) | ||
33 | +FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */ | ||
34 | |||
35 | /* | ||
36 | * Helpers for using the above. Note that only the A64 accessors use | ||
37 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/translate.h | ||
40 | +++ b/target/arm/tcg/translate.h | ||
41 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { | ||
42 | bool nv2_mem_e20; | ||
43 | /* True if NV2 enabled and NV2 RAM accesses are big-endian */ | ||
44 | bool nv2_mem_be; | ||
45 | + /* True if FPCR.AH is 1 (alternate floating point handling) */ | ||
46 | + bool fpcr_ah; | ||
47 | /* | ||
48 | * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI. | ||
49 | * < 0, set by the current instruction. | ||
17 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 50 | diff --git a/target/arm/helper.c b/target/arm/helper.c |
18 | index XXXXXXX..XXXXXXX 100644 | 51 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/target/arm/helper.c | 52 | --- a/target/arm/helper.c |
20 | +++ b/target/arm/helper.c | 53 | +++ b/target/arm/helper.c |
21 | @@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { | 54 | @@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo v8_cp_reginfo[] = { |
22 | }, | 55 | .writefn = aa64_daif_write, .resetfn = arm_cp_reset_ignore }, |
23 | }; | 56 | { .name = "FPCR", .state = ARM_CP_STATE_AA64, |
24 | 57 | .opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 4, | |
25 | -static CPAccessResult e2h_access(CPUARMState *env, const ARMCPRegInfo *ri, | 58 | - .access = PL0_RW, .type = ARM_CP_FPU | ARM_CP_SUPPRESS_TB_END, |
26 | - bool isread) | 59 | + .access = PL0_RW, .type = ARM_CP_FPU, |
27 | -{ | 60 | .readfn = aa64_fpcr_read, .writefn = aa64_fpcr_write }, |
28 | - if (arm_current_el(env) == 1) { | 61 | { .name = "FPSR", .state = ARM_CP_STATE_AA64, |
29 | - /* This must be a FEAT_NV access */ | 62 | .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4, |
30 | - /* TODO: FEAT_ECV will need to check CNTHCTL_EL2 here */ | 63 | diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c |
31 | - return CP_ACCESS_OK; | 64 | index XXXXXXX..XXXXXXX 100644 |
32 | - } | 65 | --- a/target/arm/tcg/hflags.c |
33 | - if (!(arm_hcr_el2_eff(env) & HCR_E2H)) { | 66 | +++ b/target/arm/tcg/hflags.c |
34 | - return CP_ACCESS_TRAP; | 67 | @@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, |
35 | - } | 68 | DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx)); |
36 | - return CP_ACCESS_OK; | 69 | } |
37 | -} | 70 | |
38 | - | 71 | + if (env->vfp.fpcr & FPCR_AH) { |
39 | #else | 72 | + DP_TBFLAG_A64(flags, AH, 1); |
40 | 73 | + } | |
41 | /* | ||
42 | @@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo el3_cp_reginfo[] = { | ||
43 | }; | ||
44 | |||
45 | #ifndef CONFIG_USER_ONLY | ||
46 | + | 74 | + |
47 | +static CPAccessResult e2h_access(CPUARMState *env, const ARMCPRegInfo *ri, | 75 | return rebuild_hflags_common(env, fp_el, mmu_idx, flags); |
48 | + bool isread) | 76 | } |
49 | +{ | 77 | |
50 | + if (arm_current_el(env) == 1) { | 78 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
51 | + /* This must be a FEAT_NV access */ | 79 | index XXXXXXX..XXXXXXX 100644 |
52 | + /* TODO: FEAT_ECV will need to check CNTHCTL_EL2 here */ | 80 | --- a/target/arm/tcg/translate-a64.c |
53 | + return CP_ACCESS_OK; | 81 | +++ b/target/arm/tcg/translate-a64.c |
54 | + } | 82 | @@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, |
55 | + if (!(arm_hcr_el2_eff(env) & HCR_E2H)) { | 83 | dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); |
56 | + return CP_ACCESS_TRAP; | 84 | dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); |
57 | + } | 85 | dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); |
58 | + return CP_ACCESS_OK; | 86 | + dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); |
59 | +} | 87 | dc->vec_len = 0; |
60 | + | 88 | dc->vec_stride = 0; |
61 | /* Test if system register redirection is to occur in the current state. */ | 89 | dc->cp_regs = arm_cpu->cp_regs; |
62 | static bool redirect_for_e2h(CPUARMState *env) | ||
63 | { | ||
64 | -- | 90 | -- |
65 | 2.34.1 | 91 | 2.34.1 |
66 | |||
67 | diff view generated by jsdifflib |
1 | From: Guenter Roeck <linux@roeck-us.net> | 1 | When FPCR.AH is 1, the behaviour of some instructions changes: |
---|---|---|---|
2 | 2 | * AdvSIMD BFCVT, BFCVTN, BFCVTN2, BFMLALB, BFMLALT | |
3 | Add MMDC, OCOTP, SQPI, CAAM, and USBMISC as unimplemented devices. | 3 | * SVE BFCVT, BFCVTNT, BFMLALB, BFMLALT, BFMLSLB, BFMLSLT |
4 | 4 | * SME BFCVT, BFCVTN, BFMLAL, BFMLSL (these are all in SME2 which | |
5 | This allows operating systems such as Linux to run emulations such as | 5 | QEMU does not yet implement) |
6 | mcimx6ul-evk. | 6 | * FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS |
7 | 7 | ||
8 | Before commit 0cd4926b85 ("Refactor i.MX6UL processor code"), the affected | 8 | The behaviour change is: |
9 | memory ranges were covered by the unimplemented DAP device. The commit | 9 | * the instructions do not update the FPSR cumulative exception flags |
10 | reduced the DAP address range from 0x100000 to 4kB, and the emulation | 10 | * trapped floating point exceptions are disabled (a no-op for QEMU, |
11 | thus no longer covered the various unimplemented devices in the affected | 11 | which doesn't implement FPCR.{IDE,IXE,UFE,OFE,DZE,IOE}) |
12 | address range. | 12 | * rounding is always round-to-nearest-even regardless of FPCR.RMode |
13 | 13 | * denormalized inputs and outputs are always flushed to zero, as if | |
14 | Fixes: 0cd4926b85 ("Refactor i.MX6UL processor code") | 14 | FPCR.{FZ,FIZ} is {1,1} |
15 | Cc: Jean-Christophe Dubois <jcd@tribudubois.net> | 15 | * FPCR.FZ16 is still honoured for half-precision inputs |
16 | Signed-off-by: Guenter Roeck <linux@roeck-us.net> | 16 | |
17 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 17 | (See the Arm ARM DDI0487L.a section A1.5.9.) |
18 | Message-id: 20240120005356.2599547-1-linux@roeck-us.net | 18 | |
19 | We can provide all these behaviours with another pair of float_status fields | ||
20 | which we use only for these insns, when FPCR.AH is 1. These float_status | ||
21 | fields will always have: | ||
22 | * flush_to_zero and flush_inputs_to_zero set for the non-F16 field | ||
23 | * rounding mode set to round-to-nearest-even | ||
24 | and so the only FPCR fields they need to honour are DN and FZ16. | ||
25 | |||
26 | In this commit we only define the new fp_status fields and give them | ||
27 | the required behaviour when FPSR is updated. In subsequent commits | ||
28 | we will arrange to use this new fp_status field for the instructions | ||
29 | that should be affected by FPCR.AH in this way. | ||
30 | |||
19 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 31 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
32 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
20 | --- | 33 | --- |
21 | include/hw/arm/fsl-imx6ul.h | 2 ++ | 34 | target/arm/cpu.h | 15 +++++++++++++++ |
22 | hw/arm/fsl-imx6ul.c | 30 ++++++++++++++++++++++++++++++ | 35 | target/arm/internals.h | 2 ++ |
23 | 2 files changed, 32 insertions(+) | 36 | target/arm/tcg/translate.h | 14 ++++++++++++++ |
24 | 37 | target/arm/cpu.c | 4 ++++ | |
25 | diff --git a/include/hw/arm/fsl-imx6ul.h b/include/hw/arm/fsl-imx6ul.h | 38 | target/arm/vfp_helper.c | 13 ++++++++++++- |
26 | index XXXXXXX..XXXXXXX 100644 | 39 | 5 files changed, 47 insertions(+), 1 deletion(-) |
27 | --- a/include/hw/arm/fsl-imx6ul.h | 40 | |
28 | +++ b/include/hw/arm/fsl-imx6ul.h | 41 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
29 | @@ -XXX,XX +XXX,XX @@ enum FslIMX6ULMemoryMap { | 42 | index XXXXXXX..XXXXXXX 100644 |
30 | FSL_IMX6UL_ENET1_ADDR = 0x02188000, | 43 | --- a/target/arm/cpu.h |
31 | 44 | +++ b/target/arm/cpu.h | |
32 | FSL_IMX6UL_USBO2_USBMISC_ADDR = 0x02184800, | 45 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
33 | + FSL_IMX6UL_USBO2_USBMISC_SIZE = 0x200, | 46 | * standard_fp_status : the ARM "Standard FPSCR Value" |
34 | + | 47 | * standard_fp_status_fp16 : used for half-precision |
35 | FSL_IMX6UL_USBO2_USB1_ADDR = 0x02184000, | 48 | * calculations with the ARM "Standard FPSCR Value" |
36 | FSL_IMX6UL_USBO2_USB2_ADDR = 0x02184200, | 49 | + * ah_fp_status: used for the A64 insns which change behaviour |
37 | 50 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | |
38 | diff --git a/hw/arm/fsl-imx6ul.c b/hw/arm/fsl-imx6ul.c | 51 | + * and the reciprocal and square root estimate/step insns) |
39 | index XXXXXXX..XXXXXXX 100644 | 52 | + * ah_fp_status_f16: used for the A64 insns which change behaviour |
40 | --- a/hw/arm/fsl-imx6ul.c | 53 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, |
41 | +++ b/hw/arm/fsl-imx6ul.c | 54 | + * and the reciprocal and square root estimate/step insns); |
42 | @@ -XXX,XX +XXX,XX @@ static void fsl_imx6ul_realize(DeviceState *dev, Error **errp) | 55 | + * for half-precision |
43 | create_unimplemented_device("a7mpcore-dap", FSL_IMX6UL_A7MPCORE_DAP_ADDR, | 56 | * |
44 | FSL_IMX6UL_A7MPCORE_DAP_SIZE); | 57 | * Half-precision operations are governed by a separate |
45 | 58 | * flush-to-zero control bit in FPSCR:FZ16. We pass a separate | |
59 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
60 | * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | ||
61 | * using a fixed value for it. | ||
62 | * | ||
63 | + * The ah_fp_status is needed because some insns have different | ||
64 | + * behaviour when FPCR.AH == 1: they don't update cumulative | ||
65 | + * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
66 | + * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
67 | + * which means we need an ah_fp_status_f16 as well. | ||
68 | + * | ||
69 | * To avoid having to transfer exception bits around, we simply | ||
70 | * say that the FPSCR cumulative exception flags are the logical | ||
71 | * OR of the flags in the four fp statuses. This relies on the | ||
72 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
73 | float_status fp_status_f16_a64; | ||
74 | float_status standard_fp_status; | ||
75 | float_status standard_fp_status_f16; | ||
76 | + float_status ah_fp_status; | ||
77 | + float_status ah_fp_status_f16; | ||
78 | |||
79 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ | ||
80 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ | ||
81 | diff --git a/target/arm/internals.h b/target/arm/internals.h | ||
82 | index XXXXXXX..XXXXXXX 100644 | ||
83 | --- a/target/arm/internals.h | ||
84 | +++ b/target/arm/internals.h | ||
85 | @@ -XXX,XX +XXX,XX @@ int alle1_tlbmask(CPUARMState *env); | ||
86 | |||
87 | /* Set the float_status behaviour to match the Arm defaults */ | ||
88 | void arm_set_default_fp_behaviours(float_status *s); | ||
89 | +/* Set the float_status behaviour to match Arm FPCR.AH=1 behaviour */ | ||
90 | +void arm_set_ah_fp_behaviours(float_status *s); | ||
91 | |||
92 | #endif | ||
93 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | ||
94 | index XXXXXXX..XXXXXXX 100644 | ||
95 | --- a/target/arm/tcg/translate.h | ||
96 | +++ b/target/arm/tcg/translate.h | ||
97 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { | ||
98 | FPST_A64, | ||
99 | FPST_A32_F16, | ||
100 | FPST_A64_F16, | ||
101 | + FPST_AH, | ||
102 | + FPST_AH_F16, | ||
103 | FPST_STD, | ||
104 | FPST_STD_F16, | ||
105 | } ARMFPStatusFlavour; | ||
106 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { | ||
107 | * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
108 | * FPST_A64_F16 | ||
109 | * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
110 | + * FPST_AH: | ||
111 | + * for AArch64 operations which change behaviour when AH=1 (specifically, | ||
112 | + * bfloat16 conversions and multiplies, and the reciprocal and square root | ||
113 | + * estimate/step insns) | ||
114 | + * FPST_AH_F16: | ||
115 | + * ditto, but for half-precision operations | ||
116 | * FPST_STD | ||
117 | * for A32/T32 Neon operations using the "standard FPSCR value" | ||
118 | * FPST_STD_F16 | ||
119 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) | ||
120 | case FPST_A64_F16: | ||
121 | offset = offsetof(CPUARMState, vfp.fp_status_f16_a64); | ||
122 | break; | ||
123 | + case FPST_AH: | ||
124 | + offset = offsetof(CPUARMState, vfp.ah_fp_status); | ||
125 | + break; | ||
126 | + case FPST_AH_F16: | ||
127 | + offset = offsetof(CPUARMState, vfp.ah_fp_status_f16); | ||
128 | + break; | ||
129 | case FPST_STD: | ||
130 | offset = offsetof(CPUARMState, vfp.standard_fp_status); | ||
131 | break; | ||
132 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
133 | index XXXXXXX..XXXXXXX 100644 | ||
134 | --- a/target/arm/cpu.c | ||
135 | +++ b/target/arm/cpu.c | ||
136 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
137 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
138 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
139 | arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16); | ||
140 | + arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | ||
141 | + set_flush_to_zero(1, &env->vfp.ah_fp_status); | ||
142 | + set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | ||
143 | + arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status_f16); | ||
144 | |||
145 | #ifndef CONFIG_USER_ONLY | ||
146 | if (kvm_enabled()) { | ||
147 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
148 | index XXXXXXX..XXXXXXX 100644 | ||
149 | --- a/target/arm/vfp_helper.c | ||
150 | +++ b/target/arm/vfp_helper.c | ||
151 | @@ -XXX,XX +XXX,XX @@ void arm_set_default_fp_behaviours(float_status *s) | ||
152 | * set Invalid for a QNaN | ||
153 | * * default NaN has sign bit set, msb frac bit set | ||
154 | */ | ||
155 | -static void arm_set_ah_fp_behaviours(float_status *s) | ||
156 | +void arm_set_ah_fp_behaviours(float_status *s) | ||
157 | { | ||
158 | set_float_detect_tininess(float_tininess_after_rounding, s); | ||
159 | set_float_ftz_detection(float_ftz_after_rounding, s); | ||
160 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
161 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
162 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
163 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
46 | + /* | 164 | + /* |
47 | + * MMDC | 165 | + * We do not merge in flags from ah_fp_status or ah_fp_status_f16, because |
166 | + * they are used for insns that must not set the cumulative exception bits. | ||
48 | + */ | 167 | + */ |
49 | + create_unimplemented_device("a7mpcore-mmdc", FSL_IMX6UL_MMDC_CFG_ADDR, | ||
50 | + FSL_IMX6UL_MMDC_CFG_SIZE); | ||
51 | + | ||
52 | + /* | ||
53 | + * OCOTP | ||
54 | + */ | ||
55 | + create_unimplemented_device("a7mpcore-ocotp", FSL_IMX6UL_OCOTP_CTRL_ADDR, | ||
56 | + FSL_IMX6UL_OCOTP_CTRL_SIZE); | ||
57 | + | ||
58 | + /* | ||
59 | + * QSPI | ||
60 | + */ | ||
61 | + create_unimplemented_device("a7mpcore-qspi", FSL_IMX6UL_QSPI_ADDR, | ||
62 | + FSL_IMX6UL_QSPI_SIZE); | ||
63 | + | ||
64 | + /* | ||
65 | + * CAAM | ||
66 | + */ | ||
67 | + create_unimplemented_device("a7mpcore-qspi", FSL_IMX6UL_CAAM_ADDR, | ||
68 | + FSL_IMX6UL_CAAM_SIZE); | ||
69 | + | ||
70 | + /* | ||
71 | + * USBMISC | ||
72 | + */ | ||
73 | + create_unimplemented_device("a7mpcore-usbmisc", FSL_IMX6UL_USBO2_USBMISC_ADDR, | ||
74 | + FSL_IMX6UL_USBO2_USBMISC_SIZE); | ||
75 | + | 168 | + |
76 | /* | 169 | /* |
77 | * GPTs | 170 | * Flushing an input denormal *only* because FPCR.FIZ == 1 does |
78 | */ | 171 | * not set FPSR.IDC; if FPCR.FZ is also set then this takes |
172 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
173 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
174 | set_float_exception_flags(0, &env->vfp.standard_fp_status); | ||
175 | set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); | ||
176 | + set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
177 | + set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
178 | } | ||
179 | |||
180 | static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) | ||
181 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
182 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
183 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
184 | set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | ||
185 | + set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
186 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
187 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
188 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | ||
189 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
190 | } | ||
191 | if (changed & FPCR_FZ) { | ||
192 | bool ftz_enabled = val & FPCR_FZ; | ||
193 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
194 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
195 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
196 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
197 | + set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); | ||
198 | + set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status_f16); | ||
199 | } | ||
200 | if (changed & FPCR_AH) { | ||
201 | bool ah_enabled = val & FPCR_AH; | ||
79 | -- | 202 | -- |
80 | 2.34.1 | 203 | 2.34.1 |
81 | |||
82 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | 1 | For the instructions FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS, use |
---|---|---|---|
2 | FPST_FPCR_AH or FPST_FPCR_AH_F16 when FPCR.AH is 1, so that they get | ||
3 | the required behaviour changes. | ||
2 | 4 | ||
3 | The USB Controllers are part of the chipset, thus are | 5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | always present and mapped in memory. | 6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
7 | --- | ||
8 | target/arm/tcg/translate-a64.h | 13 ++++ | ||
9 | target/arm/tcg/translate-a64.c | 119 +++++++++++++++++++++++++-------- | ||
10 | target/arm/tcg/translate-sve.c | 30 ++++++--- | ||
11 | 3 files changed, 127 insertions(+), 35 deletions(-) | ||
5 | 12 | ||
6 | This is a migration compatibility break for the cubieboard | 13 | diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h |
7 | machine started with the '-usb none' option. | ||
8 | |||
9 | Reported-by: Guenter Roeck <linux@roeck-us.net> | ||
10 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
11 | Reviewed-by: Guenter Roeck <linux@roeck-us.net> | ||
12 | Tested-by: Guenter Roeck <linux@roeck-us.net> | ||
13 | Message-id: 20240119215106.45776-2-philmd@linaro.org | ||
14 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
16 | --- | ||
17 | hw/arm/allwinner-a10.c | 49 +++++++++++++++++------------------------- | ||
18 | 1 file changed, 20 insertions(+), 29 deletions(-) | ||
19 | |||
20 | diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/hw/arm/allwinner-a10.c | 15 | --- a/target/arm/tcg/translate-a64.h |
23 | +++ b/hw/arm/allwinner-a10.c | 16 | +++ b/target/arm/tcg/translate-a64.h |
24 | @@ -XXX,XX +XXX,XX @@ static void aw_a10_init(Object *obj) | 17 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno) |
25 | 18 | return ret; | |
26 | object_initialize_child(obj, "i2c0", &s->i2c0, TYPE_AW_I2C); | 19 | } |
27 | 20 | ||
28 | - if (machine_usb(current_machine)) { | 21 | +/* |
29 | - int i; | 22 | + * Return the ARMFPStatusFlavour to use based on element size and |
30 | - | 23 | + * whether FPCR.AH is set. |
31 | - for (i = 0; i < AW_A10_NUM_USB; i++) { | 24 | + */ |
32 | - object_initialize_child(obj, "ehci[*]", &s->ehci[i], | 25 | +static inline ARMFPStatusFlavour select_ah_fpst(DisasContext *s, MemOp esz) |
33 | - TYPE_PLATFORM_EHCI); | 26 | +{ |
34 | - object_initialize_child(obj, "ohci[*]", &s->ohci[i], | 27 | + if (s->fpcr_ah) { |
35 | - TYPE_SYSBUS_OHCI); | 28 | + return esz == MO_16 ? FPST_AH_F16 : FPST_AH; |
36 | - } | 29 | + } else { |
37 | + for (size_t i = 0; i < AW_A10_NUM_USB; i++) { | 30 | + return esz == MO_16 ? FPST_A64_F16 : FPST_A64; |
38 | + object_initialize_child(obj, "ehci[*]", &s->ehci[i], | 31 | + } |
39 | + TYPE_PLATFORM_EHCI); | 32 | +} |
40 | + object_initialize_child(obj, "ohci[*]", &s->ohci[i], TYPE_SYSBUS_OHCI); | 33 | + |
34 | bool disas_sve(DisasContext *, uint32_t); | ||
35 | bool disas_sme(DisasContext *, uint32_t); | ||
36 | |||
37 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/translate-a64.c | ||
40 | +++ b/target/arm/tcg/translate-a64.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, | ||
42 | * an out-of-line helper. | ||
43 | */ | ||
44 | static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, | ||
45 | - int rm, bool is_fp16, int data, | ||
46 | + int rm, ARMFPStatusFlavour fpsttype, int data, | ||
47 | gen_helper_gvec_3_ptr *fn) | ||
48 | { | ||
49 | - TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); | ||
50 | + TCGv_ptr fpst = fpstatus_ptr(fpsttype); | ||
51 | tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), | ||
52 | vec_full_reg_offset(s, rn), | ||
53 | vec_full_reg_offset(s, rm), fpst, | ||
54 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar { | ||
55 | void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); | ||
56 | } FPScalar; | ||
57 | |||
58 | -static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
59 | +static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
60 | + const FPScalar *f, | ||
61 | + ARMFPStatusFlavour fpsttype) | ||
62 | { | ||
63 | switch (a->esz) { | ||
64 | case MO_64: | ||
65 | if (fp_access_check(s)) { | ||
66 | TCGv_i64 t0 = read_fp_dreg(s, a->rn); | ||
67 | TCGv_i64 t1 = read_fp_dreg(s, a->rm); | ||
68 | - f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); | ||
69 | + f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
70 | write_fp_dreg(s, a->rd, t0); | ||
71 | } | ||
72 | break; | ||
73 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
74 | if (fp_access_check(s)) { | ||
75 | TCGv_i32 t0 = read_fp_sreg(s, a->rn); | ||
76 | TCGv_i32 t1 = read_fp_sreg(s, a->rm); | ||
77 | - f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); | ||
78 | + f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
79 | write_fp_sreg(s, a->rd, t0); | ||
80 | } | ||
81 | break; | ||
82 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
83 | if (fp_access_check(s)) { | ||
84 | TCGv_i32 t0 = read_fp_hreg(s, a->rn); | ||
85 | TCGv_i32 t1 = read_fp_hreg(s, a->rm); | ||
86 | - f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); | ||
87 | + f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
88 | write_fp_sreg(s, a->rd, t0); | ||
89 | } | ||
90 | break; | ||
91 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
92 | return true; | ||
93 | } | ||
94 | |||
95 | +static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
96 | +{ | ||
97 | + return do_fp3_scalar_with_fpsttype(s, a, f, | ||
98 | + a->esz == MO_16 ? | ||
99 | + FPST_A64_F16 : FPST_A64); | ||
100 | +} | ||
101 | + | ||
102 | +static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
103 | +{ | ||
104 | + return do_fp3_scalar_with_fpsttype(s, a, f, select_ah_fpst(s, a->esz)); | ||
105 | +} | ||
106 | + | ||
107 | static const FPScalar f_scalar_fadd = { | ||
108 | gen_helper_vfp_addh, | ||
109 | gen_helper_vfp_adds, | ||
110 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_frecps = { | ||
111 | gen_helper_recpsf_f32, | ||
112 | gen_helper_recpsf_f64, | ||
113 | }; | ||
114 | -TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps) | ||
115 | +TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps) | ||
116 | |||
117 | static const FPScalar f_scalar_frsqrts = { | ||
118 | gen_helper_rsqrtsf_f16, | ||
119 | gen_helper_rsqrtsf_f32, | ||
120 | gen_helper_rsqrtsf_f64, | ||
121 | }; | ||
122 | -TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts) | ||
123 | +TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts) | ||
124 | |||
125 | static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, | ||
126 | const FPScalar *f, bool swap) | ||
127 | @@ -XXX,XX +XXX,XX @@ TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) | ||
128 | TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) | ||
129 | TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) | ||
130 | |||
131 | -static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | ||
132 | - gen_helper_gvec_3_ptr * const fns[3]) | ||
133 | +static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a, | ||
134 | + int data, | ||
135 | + gen_helper_gvec_3_ptr * const fns[3], | ||
136 | + ARMFPStatusFlavour fpsttype) | ||
137 | { | ||
138 | MemOp esz = a->esz; | ||
139 | int check = fp_access_check_vector_hsd(s, a->q, esz); | ||
140 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | ||
141 | return check == 0; | ||
41 | } | 142 | } |
42 | 143 | ||
43 | object_initialize_child(obj, "mmc0", &s->mmc0, TYPE_AW_SDHOST_SUN4I); | 144 | - gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, |
44 | @@ -XXX,XX +XXX,XX @@ static void aw_a10_realize(DeviceState *dev, Error **errp) | 145 | - esz == MO_16, data, fns[esz - 1]); |
45 | qdev_get_gpio_in(dev, 1), | 146 | + gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype, |
46 | 115200, serial_hd(0), DEVICE_NATIVE_ENDIAN); | 147 | + data, fns[esz - 1]); |
47 | 148 | return true; | |
48 | - if (machine_usb(current_machine)) { | 149 | } |
49 | - int i; | 150 | |
50 | + for (size_t i = 0; i < AW_A10_NUM_USB; i++) { | 151 | +static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, |
51 | + g_autofree char *bus = g_strdup_printf("usb-bus.%zu", i); | 152 | + gen_helper_gvec_3_ptr * const fns[3]) |
52 | 153 | +{ | |
53 | - for (i = 0; i < AW_A10_NUM_USB; i++) { | 154 | + return do_fp3_vector_with_fpsttype(s, a, data, fns, |
54 | - g_autofree char *bus = g_strdup_printf("usb-bus.%d", i); | 155 | + a->esz == MO_16 ? |
55 | + object_property_set_bool(OBJECT(&s->ehci[i]), "companion-enable", | 156 | + FPST_A64_F16 : FPST_A64); |
56 | + true, &error_fatal); | 157 | +} |
57 | + sysbus_realize(SYS_BUS_DEVICE(&s->ehci[i]), &error_fatal); | 158 | + |
58 | + sysbus_mmio_map(SYS_BUS_DEVICE(&s->ehci[i]), 0, | 159 | +static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data, |
59 | + AW_A10_EHCI_BASE + i * 0x8000); | 160 | + gen_helper_gvec_3_ptr * const f[3]) |
60 | + sysbus_connect_irq(SYS_BUS_DEVICE(&s->ehci[i]), 0, | 161 | +{ |
61 | + qdev_get_gpio_in(dev, 39 + i)); | 162 | + return do_fp3_vector_with_fpsttype(s, a, data, f, |
62 | 163 | + select_ah_fpst(s, a->esz)); | |
63 | - object_property_set_bool(OBJECT(&s->ehci[i]), "companion-enable", | 164 | +} |
64 | - true, &error_fatal); | 165 | + |
65 | - sysbus_realize(SYS_BUS_DEVICE(&s->ehci[i]), &error_fatal); | 166 | static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { |
66 | - sysbus_mmio_map(SYS_BUS_DEVICE(&s->ehci[i]), 0, | 167 | gen_helper_gvec_fadd_h, |
67 | - AW_A10_EHCI_BASE + i * 0x8000); | 168 | gen_helper_gvec_fadd_s, |
68 | - sysbus_connect_irq(SYS_BUS_DEVICE(&s->ehci[i]), 0, | 169 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { |
69 | - qdev_get_gpio_in(dev, 39 + i)); | 170 | gen_helper_gvec_recps_s, |
70 | - | 171 | gen_helper_gvec_recps_d, |
71 | - object_property_set_str(OBJECT(&s->ohci[i]), "masterbus", bus, | 172 | }; |
72 | - &error_fatal); | 173 | -TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps) |
73 | - sysbus_realize(SYS_BUS_DEVICE(&s->ohci[i]), &error_fatal); | 174 | +TRANS(FRECPS_v, do_fp3_vector_ah, a, 0, f_vector_frecps) |
74 | - sysbus_mmio_map(SYS_BUS_DEVICE(&s->ohci[i]), 0, | 175 | |
75 | - AW_A10_OHCI_BASE + i * 0x8000); | 176 | static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { |
76 | - sysbus_connect_irq(SYS_BUS_DEVICE(&s->ohci[i]), 0, | 177 | gen_helper_gvec_rsqrts_h, |
77 | - qdev_get_gpio_in(dev, 64 + i)); | 178 | gen_helper_gvec_rsqrts_s, |
78 | - } | 179 | gen_helper_gvec_rsqrts_d, |
79 | + object_property_set_str(OBJECT(&s->ohci[i]), "masterbus", bus, | 180 | }; |
80 | + &error_fatal); | 181 | -TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts) |
81 | + sysbus_realize(SYS_BUS_DEVICE(&s->ohci[i]), &error_fatal); | 182 | +TRANS(FRSQRTS_v, do_fp3_vector_ah, a, 0, f_vector_frsqrts) |
82 | + sysbus_mmio_map(SYS_BUS_DEVICE(&s->ohci[i]), 0, | 183 | |
83 | + AW_A10_OHCI_BASE + i * 0x8000); | 184 | static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { |
84 | + sysbus_connect_irq(SYS_BUS_DEVICE(&s->ohci[i]), 0, | 185 | gen_helper_gvec_faddp_h, |
85 | + qdev_get_gpio_in(dev, 64 + i)); | 186 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, |
86 | } | 187 | } |
87 | 188 | ||
88 | /* SD/MMC */ | 189 | gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, |
190 | - esz == MO_16, a->idx, fns[esz - 1]); | ||
191 | + esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
192 | + a->idx, fns[esz - 1]); | ||
193 | return true; | ||
194 | } | ||
195 | |||
196 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar1 { | ||
197 | void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr); | ||
198 | } FPScalar1; | ||
199 | |||
200 | -static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
201 | - const FPScalar1 *f, int rmode) | ||
202 | +static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a, | ||
203 | + const FPScalar1 *f, int rmode, | ||
204 | + ARMFPStatusFlavour fpsttype) | ||
205 | { | ||
206 | TCGv_i32 tcg_rmode = NULL; | ||
207 | TCGv_ptr fpst; | ||
208 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
209 | return check == 0; | ||
210 | } | ||
211 | |||
212 | - fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
213 | + fpst = fpstatus_ptr(fpsttype); | ||
214 | if (rmode >= 0) { | ||
215 | tcg_rmode = gen_set_rmode(rmode, fpst); | ||
216 | } | ||
217 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
218 | return true; | ||
219 | } | ||
220 | |||
221 | +static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
222 | + const FPScalar1 *f, int rmode) | ||
223 | +{ | ||
224 | + return do_fp1_scalar_with_fpsttype(s, a, f, rmode, | ||
225 | + a->esz == MO_16 ? | ||
226 | + FPST_A64_F16 : FPST_A64); | ||
227 | +} | ||
228 | + | ||
229 | +static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a, | ||
230 | + const FPScalar1 *f, int rmode) | ||
231 | +{ | ||
232 | + return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz)); | ||
233 | +} | ||
234 | + | ||
235 | static const FPScalar1 f_scalar_fsqrt = { | ||
236 | gen_helper_vfp_sqrth, | ||
237 | gen_helper_vfp_sqrts, | ||
238 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frecpe = { | ||
239 | gen_helper_recpe_f32, | ||
240 | gen_helper_recpe_f64, | ||
241 | }; | ||
242 | -TRANS(FRECPE_s, do_fp1_scalar, a, &f_scalar_frecpe, -1) | ||
243 | +TRANS(FRECPE_s, do_fp1_scalar_ah, a, &f_scalar_frecpe, -1) | ||
244 | |||
245 | static const FPScalar1 f_scalar_frecpx = { | ||
246 | gen_helper_frecpx_f16, | ||
247 | gen_helper_frecpx_f32, | ||
248 | gen_helper_frecpx_f64, | ||
249 | }; | ||
250 | -TRANS(FRECPX_s, do_fp1_scalar, a, &f_scalar_frecpx, -1) | ||
251 | +TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1) | ||
252 | |||
253 | static const FPScalar1 f_scalar_frsqrte = { | ||
254 | gen_helper_rsqrte_f16, | ||
255 | gen_helper_rsqrte_f32, | ||
256 | gen_helper_rsqrte_f64, | ||
257 | }; | ||
258 | -TRANS(FRSQRTE_s, do_fp1_scalar, a, &f_scalar_frsqrte, -1) | ||
259 | +TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, &f_scalar_frsqrte, -1) | ||
260 | |||
261 | static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) | ||
262 | { | ||
263 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a, | ||
264 | &f_scalar_frint64, FPROUNDING_ZERO) | ||
265 | TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1) | ||
266 | |||
267 | -static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
268 | - int rd, int rn, int data, | ||
269 | - gen_helper_gvec_2_ptr * const fns[3]) | ||
270 | +static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz, | ||
271 | + bool is_q, int rd, int rn, int data, | ||
272 | + gen_helper_gvec_2_ptr * const fns[3], | ||
273 | + ARMFPStatusFlavour fpsttype) | ||
274 | { | ||
275 | int check = fp_access_check_vector_hsd(s, is_q, esz); | ||
276 | TCGv_ptr fpst; | ||
277 | @@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
278 | return check == 0; | ||
279 | } | ||
280 | |||
281 | - fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
282 | + fpst = fpstatus_ptr(fpsttype); | ||
283 | tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), | ||
284 | vec_full_reg_offset(s, rn), fpst, | ||
285 | is_q ? 16 : 8, vec_full_reg_size(s), | ||
286 | @@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
287 | return true; | ||
288 | } | ||
289 | |||
290 | +static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
291 | + int rd, int rn, int data, | ||
292 | + gen_helper_gvec_2_ptr * const fns[3]) | ||
293 | +{ | ||
294 | + return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns, | ||
295 | + esz == MO_16 ? FPST_A64_F16 : | ||
296 | + FPST_A64); | ||
297 | +} | ||
298 | + | ||
299 | +static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
300 | + int rd, int rn, int data, | ||
301 | + gen_helper_gvec_2_ptr * const fns[3]) | ||
302 | +{ | ||
303 | + return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, | ||
304 | + fns, select_ah_fpst(s, esz)); | ||
305 | +} | ||
306 | + | ||
307 | static gen_helper_gvec_2_ptr * const f_scvtf_v[] = { | ||
308 | gen_helper_gvec_vcvt_sh, | ||
309 | gen_helper_gvec_vcvt_sf, | ||
310 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const f_frecpe[] = { | ||
311 | gen_helper_gvec_frecpe_s, | ||
312 | gen_helper_gvec_frecpe_d, | ||
313 | }; | ||
314 | -TRANS(FRECPE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) | ||
315 | +TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) | ||
316 | |||
317 | static gen_helper_gvec_2_ptr * const f_frsqrte[] = { | ||
318 | gen_helper_gvec_frsqrte_h, | ||
319 | gen_helper_gvec_frsqrte_s, | ||
320 | gen_helper_gvec_frsqrte_d, | ||
321 | }; | ||
322 | -TRANS(FRSQRTE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) | ||
323 | +TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) | ||
324 | |||
325 | static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) | ||
326 | { | ||
327 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
328 | index XXXXXXX..XXXXXXX 100644 | ||
329 | --- a/target/arm/tcg/translate-sve.c | ||
330 | +++ b/target/arm/tcg/translate-sve.c | ||
331 | @@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, | ||
332 | return true; | ||
333 | } | ||
334 | |||
335 | -static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, | ||
336 | - arg_rr_esz *a, int data) | ||
337 | +static bool gen_gvec_fpst_ah_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, | ||
338 | + arg_rr_esz *a, int data) | ||
339 | { | ||
340 | return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data, | ||
341 | - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
342 | + select_ah_fpst(s, a->esz)); | ||
343 | } | ||
344 | |||
345 | /* Invoke an out-of-line helper on 3 Zregs. */ | ||
346 | @@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, | ||
347 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
348 | } | ||
349 | |||
350 | +static bool gen_gvec_fpst_ah_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, | ||
351 | + arg_rrr_esz *a, int data) | ||
352 | +{ | ||
353 | + return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, | ||
354 | + select_ah_fpst(s, a->esz)); | ||
355 | +} | ||
356 | + | ||
357 | /* Invoke an out-of-line helper on 4 Zregs. */ | ||
358 | static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, | ||
359 | int rd, int rn, int rm, int ra, int data) | ||
360 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const frecpe_fns[] = { | ||
361 | NULL, gen_helper_gvec_frecpe_h, | ||
362 | gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, | ||
363 | }; | ||
364 | -TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0) | ||
365 | +TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frecpe_fns[a->esz], a, 0) | ||
366 | |||
367 | static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { | ||
368 | NULL, gen_helper_gvec_frsqrte_h, | ||
369 | gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, | ||
370 | }; | ||
371 | -TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0) | ||
372 | +TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frsqrte_fns[a->esz], a, 0) | ||
373 | |||
374 | /* | ||
375 | *** SVE Floating Point Compare with Zero Group | ||
376 | @@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) | ||
377 | }; \ | ||
378 | TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) | ||
379 | |||
380 | +#define DO_FP3_AH(NAME, name) \ | ||
381 | + static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ | ||
382 | + NULL, gen_helper_gvec_##name##_h, \ | ||
383 | + gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ | ||
384 | + }; \ | ||
385 | + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, name##_fns[a->esz], a, 0) | ||
386 | + | ||
387 | DO_FP3(FADD_zzz, fadd) | ||
388 | DO_FP3(FSUB_zzz, fsub) | ||
389 | DO_FP3(FMUL_zzz, fmul) | ||
390 | -DO_FP3(FRECPS, recps) | ||
391 | -DO_FP3(FRSQRTS, rsqrts) | ||
392 | +DO_FP3_AH(FRECPS, recps) | ||
393 | +DO_FP3_AH(FRSQRTS, rsqrts) | ||
394 | |||
395 | #undef DO_FP3 | ||
396 | |||
397 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frecpx_fns[] = { | ||
398 | gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d, | ||
399 | }; | ||
400 | TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz], | ||
401 | - a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
402 | + a, 0, select_ah_fpst(s, a->esz)) | ||
403 | |||
404 | static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { | ||
405 | NULL, gen_helper_sve_fsqrt_h, | ||
89 | -- | 406 | -- |
90 | 2.34.1 | 407 | 2.34.1 |
91 | |||
92 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | When FPCR.AH is 1, use FPST_FPCR_AH for: | ||
2 | * AdvSIMD BFCVT, BFCVTN, BFCVTN2 | ||
3 | * SVE BFCVT, BFCVTNT | ||
1 | 4 | ||
5 | so that they get the required behaviour changes. | ||
6 | |||
7 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | --- | ||
10 | target/arm/tcg/translate-a64.c | 27 +++++++++++++++++++++------ | ||
11 | target/arm/tcg/translate-sve.c | 6 ++++-- | ||
12 | 2 files changed, 25 insertions(+), 8 deletions(-) | ||
13 | |||
14 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/arm/tcg/translate-a64.c | ||
17 | +++ b/target/arm/tcg/translate-a64.c | ||
18 | @@ -XXX,XX +XXX,XX @@ TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) | ||
19 | static const FPScalar1 f_scalar_bfcvt = { | ||
20 | .gen_s = gen_helper_bfcvt, | ||
21 | }; | ||
22 | -TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar, a, &f_scalar_bfcvt, -1) | ||
23 | +TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar_ah, a, &f_scalar_bfcvt, -1) | ||
24 | |||
25 | static const FPScalar1 f_scalar_frint32 = { | ||
26 | NULL, | ||
27 | @@ -XXX,XX +XXX,XX @@ static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n) | ||
28 | tcg_gen_extu_i32_i64(d, tmp); | ||
29 | } | ||
30 | |||
31 | -static ArithOneOp * const f_vector_bfcvtn[] = { | ||
32 | - NULL, | ||
33 | - gen_bfcvtn_hs, | ||
34 | - NULL, | ||
35 | +static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n) | ||
36 | +{ | ||
37 | + TCGv_ptr fpst = fpstatus_ptr(FPST_AH); | ||
38 | + TCGv_i32 tmp = tcg_temp_new_i32(); | ||
39 | + gen_helper_bfcvt_pair(tmp, n, fpst); | ||
40 | + tcg_gen_extu_i32_i64(d, tmp); | ||
41 | +} | ||
42 | + | ||
43 | +static ArithOneOp * const f_vector_bfcvtn[2][3] = { | ||
44 | + { | ||
45 | + NULL, | ||
46 | + gen_bfcvtn_hs, | ||
47 | + NULL, | ||
48 | + }, { | ||
49 | + NULL, | ||
50 | + gen_bfcvtn_ah_hs, | ||
51 | + NULL, | ||
52 | + } | ||
53 | }; | ||
54 | -TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, f_vector_bfcvtn) | ||
55 | +TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, | ||
56 | + f_vector_bfcvtn[s->fpcr_ah]) | ||
57 | |||
58 | static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a) | ||
59 | { | ||
60 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/target/arm/tcg/translate-sve.c | ||
63 | +++ b/target/arm/tcg/translate-sve.c | ||
64 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, | ||
65 | gen_helper_sve_fcvt_hs, a, 0, FPST_A64_F16) | ||
66 | |||
67 | TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, | ||
68 | - gen_helper_sve_bfcvt, a, 0, FPST_A64) | ||
69 | + gen_helper_sve_bfcvt, a, 0, | ||
70 | + s->fpcr_ah ? FPST_AH : FPST_A64) | ||
71 | |||
72 | TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, | ||
73 | gen_helper_sve_fcvt_dh, a, 0, FPST_A64) | ||
74 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, | ||
75 | gen_helper_sve2_fcvtnt_ds, a, 0, FPST_A64) | ||
76 | |||
77 | TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, | ||
78 | - gen_helper_sve_bfcvtnt, a, 0, FPST_A64) | ||
79 | + gen_helper_sve_bfcvtnt, a, 0, | ||
80 | + s->fpcr_ah ? FPST_AH : FPST_A64) | ||
81 | |||
82 | TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, | ||
83 | gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64) | ||
84 | -- | ||
85 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | When FPCR.AH is 1, use FPST_FPCR_AH for: | ||
2 | * AdvSIMD BFMLALB, BFMLALT | ||
3 | * SVE BFMLALB, BFMLALT, BFMLSLB, BFMLSLT | ||
1 | 4 | ||
5 | so that they get the required behaviour changes. | ||
6 | |||
7 | We do this by making gen_gvec_op4_fpst() take an ARMFPStatusFlavour | ||
8 | rather than a bool is_fp16; existing callsites now select | ||
9 | FPST_FPCR_F16_A64 vs FPST_FPCR_A64 themselves rather than passing in | ||
10 | the boolean. | ||
11 | |||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | --- | ||
15 | target/arm/tcg/translate-a64.c | 20 +++++++++++++------- | ||
16 | target/arm/tcg/translate-sve.c | 6 ++++-- | ||
17 | 2 files changed, 17 insertions(+), 9 deletions(-) | ||
18 | |||
19 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/target/arm/tcg/translate-a64.c | ||
22 | +++ b/target/arm/tcg/translate-a64.c | ||
23 | @@ -XXX,XX +XXX,XX @@ static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn, | ||
24 | * an out-of-line helper. | ||
25 | */ | ||
26 | static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, | ||
27 | - int rm, int ra, bool is_fp16, int data, | ||
28 | + int rm, int ra, ARMFPStatusFlavour fpsttype, | ||
29 | + int data, | ||
30 | gen_helper_gvec_4_ptr *fn) | ||
31 | { | ||
32 | - TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); | ||
33 | + TCGv_ptr fpst = fpstatus_ptr(fpsttype); | ||
34 | tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), | ||
35 | vec_full_reg_offset(s, rn), | ||
36 | vec_full_reg_offset(s, rm), | ||
37 | @@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) | ||
38 | } | ||
39 | if (fp_access_check(s)) { | ||
40 | /* Q bit selects BFMLALB vs BFMLALT. */ | ||
41 | - gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q, | ||
42 | + gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, | ||
43 | + s->fpcr_ah ? FPST_AH : FPST_A64, a->q, | ||
44 | gen_helper_gvec_bfmlal); | ||
45 | } | ||
46 | return true; | ||
47 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) | ||
48 | } | ||
49 | |||
50 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
51 | - a->esz == MO_16, a->rot, fn[a->esz]); | ||
52 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
53 | + a->rot, fn[a->esz]); | ||
54 | return true; | ||
55 | } | ||
56 | |||
57 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) | ||
58 | } | ||
59 | |||
60 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
61 | - esz == MO_16, (a->idx << 1) | neg, | ||
62 | + esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
63 | + (a->idx << 1) | neg, | ||
64 | fns[esz - 1]); | ||
65 | return true; | ||
66 | } | ||
67 | @@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) | ||
68 | } | ||
69 | if (fp_access_check(s)) { | ||
70 | /* Q bit selects BFMLALB vs BFMLALT. */ | ||
71 | - gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0, | ||
72 | + gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, | ||
73 | + s->fpcr_ah ? FPST_AH : FPST_A64, | ||
74 | (a->idx << 1) | a->q, | ||
75 | gen_helper_gvec_bfmlal_idx); | ||
76 | } | ||
77 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) | ||
78 | } | ||
79 | if (fp_access_check(s)) { | ||
80 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
81 | - a->esz == MO_16, (a->idx << 2) | a->rot, fn); | ||
82 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
83 | + (a->idx << 2) | a->rot, fn); | ||
84 | } | ||
85 | return true; | ||
86 | } | ||
87 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
88 | index XXXXXXX..XXXXXXX 100644 | ||
89 | --- a/target/arm/tcg/translate-sve.c | ||
90 | +++ b/target/arm/tcg/translate-sve.c | ||
91 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_env_arg_zzzz, | ||
92 | static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) | ||
93 | { | ||
94 | return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, | ||
95 | - a->rd, a->rn, a->rm, a->ra, sel, FPST_A64); | ||
96 | + a->rd, a->rn, a->rm, a->ra, sel, | ||
97 | + s->fpcr_ah ? FPST_AH : FPST_A64); | ||
98 | } | ||
99 | |||
100 | TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) | ||
101 | @@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) | ||
102 | { | ||
103 | return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, | ||
104 | a->rd, a->rn, a->rm, a->ra, | ||
105 | - (a->index << 1) | sel, FPST_A64); | ||
106 | + (a->index << 1) | sel, | ||
107 | + s->fpcr_ah ? FPST_AH : FPST_A64); | ||
108 | } | ||
109 | |||
110 | TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) | ||
111 | -- | ||
112 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | For FEAT_AFP, we want to emit different code when FPCR.NEP is set, so | ||
2 | that instead of zeroing the high elements of a vector register when | ||
3 | we write the output of a scalar operation to it, we instead merge in | ||
4 | those elements from one of the source registers. Since this affects | ||
5 | the generated code, we need to put FPCR.NEP into the TBFLAGS. | ||
1 | 6 | ||
7 | FPCR.NEP is treated as 0 when in streaming SVE mode and FEAT_SME_FA64 | ||
8 | is not implemented or not enabled; we can implement this logic in | ||
9 | rebuild_hflags_a64(). | ||
10 | |||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | --- | ||
14 | target/arm/cpu.h | 1 + | ||
15 | target/arm/tcg/translate.h | 2 ++ | ||
16 | target/arm/tcg/hflags.c | 9 +++++++++ | ||
17 | target/arm/tcg/translate-a64.c | 1 + | ||
18 | 4 files changed, 13 insertions(+) | ||
19 | |||
20 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/cpu.h | ||
23 | +++ b/target/arm/cpu.h | ||
24 | @@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1) | ||
25 | /* Set if FEAT_NV2 RAM accesses are big-endian */ | ||
26 | FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1) | ||
27 | FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */ | ||
28 | +FIELD(TBFLAG_A64, NEP, 38, 1) /* FPCR.NEP */ | ||
29 | |||
30 | /* | ||
31 | * Helpers for using the above. Note that only the A64 accessors use | ||
32 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/target/arm/tcg/translate.h | ||
35 | +++ b/target/arm/tcg/translate.h | ||
36 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { | ||
37 | bool nv2_mem_be; | ||
38 | /* True if FPCR.AH is 1 (alternate floating point handling) */ | ||
39 | bool fpcr_ah; | ||
40 | + /* True if FPCR.NEP is 1 (FEAT_AFP scalar upper-element result handling) */ | ||
41 | + bool fpcr_nep; | ||
42 | /* | ||
43 | * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI. | ||
44 | * < 0, set by the current instruction. | ||
45 | diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/target/arm/tcg/hflags.c | ||
48 | +++ b/target/arm/tcg/hflags.c | ||
49 | @@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, | ||
50 | if (env->vfp.fpcr & FPCR_AH) { | ||
51 | DP_TBFLAG_A64(flags, AH, 1); | ||
52 | } | ||
53 | + if (env->vfp.fpcr & FPCR_NEP) { | ||
54 | + /* | ||
55 | + * In streaming-SVE without FA64, NEP behaves as if zero; | ||
56 | + * compare pseudocode IsMerging() | ||
57 | + */ | ||
58 | + if (!(EX_TBFLAG_A64(flags, PSTATE_SM) && !sme_fa64(env, el))) { | ||
59 | + DP_TBFLAG_A64(flags, NEP, 1); | ||
60 | + } | ||
61 | + } | ||
62 | |||
63 | return rebuild_hflags_common(env, fp_el, mmu_idx, flags); | ||
64 | } | ||
65 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
66 | index XXXXXXX..XXXXXXX 100644 | ||
67 | --- a/target/arm/tcg/translate-a64.c | ||
68 | +++ b/target/arm/tcg/translate-a64.c | ||
69 | @@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, | ||
70 | dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); | ||
71 | dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); | ||
72 | dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); | ||
73 | + dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP); | ||
74 | dc->vec_len = 0; | ||
75 | dc->vec_stride = 0; | ||
76 | dc->cp_regs = arm_cpu->cp_regs; | ||
77 | -- | ||
78 | 2.34.1 | diff view generated by jsdifflib |
1 | Convert the musicpal key input device to use | 1 | For FEAT_AFP's FPCR.NEP bit, we need to programmatically change the |
---|---|---|---|
2 | qemu_add_kbd_event_handler(). This lets us simplify it because we no | 2 | behaviour of the writeback of the result for most SIMD scalar |
3 | longer need to track whether we're in the middle of a PS/2 multibyte | 3 | operations, so that instead of zeroing the upper part of the result |
4 | key sequence. | 4 | register it merges the upper elements from one of the input |
5 | 5 | registers. | |
6 | In the conversion we move the keyboard handler registration from init | 6 | |
7 | to realize, because devices shouldn't disturb the state of the | 7 | Provide new functions write_fp_*reg_merging() which can be used |
8 | simulation by doing things like registering input handlers until | 8 | instead of the existing write_fp_*reg() functions when we want this |
9 | they're realized, so that device objects can be introspected | 9 | "merge the result with one of the input registers if FPCR.NEP is |
10 | safely. | 10 | enabled" handling, and use them in do_fp3_scalar_with_fpsttype(). |
11 | 11 | ||
12 | The behaviour where key-repeat is permitted for the arrow-keys only | 12 | Note that (as documented in the description of the FPCR.NEP bit) |
13 | is intentional (added in commit 7c6ce4baedfcd0c), so we retain it, | 13 | which input register to use as the merge source varies by |
14 | and add a comment to that effect. | 14 | instruction: for these 2-input scalar operations, the comparison |
15 | 15 | instructions take from Rm, not Rn. | |
16 | This is a migration compatibility break for musicpal. | 16 | |
17 | We'll extend this to also provide the merging behaviour for | ||
18 | the remaining scalar insns in subsequent commits. | ||
17 | 19 | ||
18 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
19 | Tested-by: Alex Bennée <alex.bennee@linaro.org> | 21 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
20 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
21 | Message-id: 20231103182750.855577-1-peter.maydell@linaro.org | ||
22 | --- | 22 | --- |
23 | hw/arm/musicpal.c | 131 +++++++++++++++++++++------------------------- | 23 | target/arm/tcg/translate-a64.c | 117 +++++++++++++++++++++++++-------- |
24 | 1 file changed, 61 insertions(+), 70 deletions(-) | 24 | 1 file changed, 91 insertions(+), 26 deletions(-) |
25 | 25 | ||
26 | diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c | 26 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
27 | index XXXXXXX..XXXXXXX 100644 | 27 | index XXXXXXX..XXXXXXX 100644 |
28 | --- a/hw/arm/musicpal.c | 28 | --- a/target/arm/tcg/translate-a64.c |
29 | +++ b/hw/arm/musicpal.c | 29 | +++ b/target/arm/tcg/translate-a64.c |
30 | @@ -XXX,XX +XXX,XX @@ static const TypeInfo musicpal_gpio_info = { | 30 | @@ -XXX,XX +XXX,XX @@ static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) |
31 | }; | 31 | write_fp_dreg(s, reg, tmp); |
32 | 32 | } | |
33 | /* Keyboard codes & masks */ | 33 | |
34 | -#define KEY_RELEASED 0x80 | 34 | +/* |
35 | -#define KEY_CODE 0x7f | 35 | + * Write a double result to 128 bit vector register reg, honouring FPCR.NEP: |
36 | - | 36 | + * - if FPCR.NEP == 0, clear the high elements of reg |
37 | -#define KEYCODE_TAB 0x0f | 37 | + * - if FPCR.NEP == 1, set the high elements of reg from mergereg |
38 | -#define KEYCODE_ENTER 0x1c | 38 | + * (i.e. merge the result with those high elements) |
39 | -#define KEYCODE_F 0x21 | 39 | + * In either case, SVE register bits above 128 are zeroed (per R_WKYLB). |
40 | -#define KEYCODE_M 0x32 | 40 | + */ |
41 | - | 41 | +static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg, |
42 | -#define KEYCODE_EXTENDED 0xe0 | 42 | + TCGv_i64 v) |
43 | -#define KEYCODE_UP 0x48 | 43 | +{ |
44 | -#define KEYCODE_DOWN 0x50 | 44 | + if (!s->fpcr_nep) { |
45 | -#define KEYCODE_LEFT 0x4b | 45 | + write_fp_dreg(s, reg, v); |
46 | -#define KEYCODE_RIGHT 0x4d | 46 | + return; |
47 | - | 47 | + } |
48 | #define MP_KEY_WHEEL_VOL (1 << 0) | 48 | + |
49 | #define MP_KEY_WHEEL_VOL_INV (1 << 1) | ||
50 | #define MP_KEY_WHEEL_NAV (1 << 2) | ||
51 | @@ -XXX,XX +XXX,XX @@ struct musicpal_key_state { | ||
52 | SysBusDevice parent_obj; | ||
53 | /*< public >*/ | ||
54 | |||
55 | - uint32_t kbd_extended; | ||
56 | uint32_t pressed_keys; | ||
57 | qemu_irq out[8]; | ||
58 | }; | ||
59 | |||
60 | -static void musicpal_key_event(void *opaque, int keycode) | ||
61 | +static void musicpal_key_event(DeviceState *dev, QemuConsole *src, | ||
62 | + InputEvent *evt) | ||
63 | { | ||
64 | - musicpal_key_state *s = opaque; | ||
65 | + musicpal_key_state *s = MUSICPAL_KEY(dev); | ||
66 | + InputKeyEvent *key = evt->u.key.data; | ||
67 | + int qcode = qemu_input_key_value_to_qcode(key->key); | ||
68 | uint32_t event = 0; | ||
69 | int i; | ||
70 | |||
71 | - if (keycode == KEYCODE_EXTENDED) { | ||
72 | - s->kbd_extended = 1; | ||
73 | - return; | ||
74 | + switch (qcode) { | ||
75 | + case Q_KEY_CODE_UP: | ||
76 | + event = MP_KEY_WHEEL_NAV | MP_KEY_WHEEL_NAV_INV; | ||
77 | + break; | ||
78 | + | ||
79 | + case Q_KEY_CODE_DOWN: | ||
80 | + event = MP_KEY_WHEEL_NAV; | ||
81 | + break; | ||
82 | + | ||
83 | + case Q_KEY_CODE_LEFT: | ||
84 | + event = MP_KEY_WHEEL_VOL | MP_KEY_WHEEL_VOL_INV; | ||
85 | + break; | ||
86 | + | ||
87 | + case Q_KEY_CODE_RIGHT: | ||
88 | + event = MP_KEY_WHEEL_VOL; | ||
89 | + break; | ||
90 | + | ||
91 | + case Q_KEY_CODE_F: | ||
92 | + event = MP_KEY_BTN_FAVORITS; | ||
93 | + break; | ||
94 | + | ||
95 | + case Q_KEY_CODE_TAB: | ||
96 | + event = MP_KEY_BTN_VOLUME; | ||
97 | + break; | ||
98 | + | ||
99 | + case Q_KEY_CODE_RET: | ||
100 | + event = MP_KEY_BTN_NAVIGATION; | ||
101 | + break; | ||
102 | + | ||
103 | + case Q_KEY_CODE_M: | ||
104 | + event = MP_KEY_BTN_MENU; | ||
105 | + break; | ||
106 | } | ||
107 | |||
108 | - if (s->kbd_extended) { | ||
109 | - switch (keycode & KEY_CODE) { | ||
110 | - case KEYCODE_UP: | ||
111 | - event = MP_KEY_WHEEL_NAV | MP_KEY_WHEEL_NAV_INV; | ||
112 | - break; | ||
113 | - | ||
114 | - case KEYCODE_DOWN: | ||
115 | - event = MP_KEY_WHEEL_NAV; | ||
116 | - break; | ||
117 | - | ||
118 | - case KEYCODE_LEFT: | ||
119 | - event = MP_KEY_WHEEL_VOL | MP_KEY_WHEEL_VOL_INV; | ||
120 | - break; | ||
121 | - | ||
122 | - case KEYCODE_RIGHT: | ||
123 | - event = MP_KEY_WHEEL_VOL; | ||
124 | - break; | ||
125 | - } | ||
126 | - } else { | ||
127 | - switch (keycode & KEY_CODE) { | ||
128 | - case KEYCODE_F: | ||
129 | - event = MP_KEY_BTN_FAVORITS; | ||
130 | - break; | ||
131 | - | ||
132 | - case KEYCODE_TAB: | ||
133 | - event = MP_KEY_BTN_VOLUME; | ||
134 | - break; | ||
135 | - | ||
136 | - case KEYCODE_ENTER: | ||
137 | - event = MP_KEY_BTN_NAVIGATION; | ||
138 | - break; | ||
139 | - | ||
140 | - case KEYCODE_M: | ||
141 | - event = MP_KEY_BTN_MENU; | ||
142 | - break; | ||
143 | - } | ||
144 | - /* Do not repeat already pressed buttons */ | ||
145 | - if (!(keycode & KEY_RELEASED) && (s->pressed_keys & event)) { | ||
146 | + /* | 49 | + /* |
147 | + * We allow repeated wheel-events when the arrow keys are held down, | 50 | + * Move from mergereg to reg; this sets the high elements and |
148 | + * but do not repeat already-pressed buttons for the other key inputs. | 51 | + * clears the bits above 128 as a side effect. |
149 | + */ | 52 | + */ |
150 | + if (!(event & (MP_KEY_WHEEL_NAV | MP_KEY_WHEEL_VOL))) { | 53 | + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), |
151 | + if (key->down && (s->pressed_keys & event)) { | 54 | + vec_full_reg_offset(s, mergereg), |
152 | event = 0; | 55 | + 16, vec_full_reg_size(s)); |
56 | + tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg)); | ||
57 | +} | ||
58 | + | ||
59 | +/* | ||
60 | + * Write a single-prec result, but only clear the higher elements | ||
61 | + * of the destination register if FPCR.NEP is 0; otherwise preserve them. | ||
62 | + */ | ||
63 | +static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg, | ||
64 | + TCGv_i32 v) | ||
65 | +{ | ||
66 | + if (!s->fpcr_nep) { | ||
67 | + write_fp_sreg(s, reg, v); | ||
68 | + return; | ||
69 | + } | ||
70 | + | ||
71 | + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), | ||
72 | + vec_full_reg_offset(s, mergereg), | ||
73 | + 16, vec_full_reg_size(s)); | ||
74 | + tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); | ||
75 | +} | ||
76 | + | ||
77 | +/* | ||
78 | + * Write a half-prec result, but only clear the higher elements | ||
79 | + * of the destination register if FPCR.NEP is 0; otherwise preserve them. | ||
80 | + * The caller must ensure that the top 16 bits of v are zero. | ||
81 | + */ | ||
82 | +static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg, | ||
83 | + TCGv_i32 v) | ||
84 | +{ | ||
85 | + if (!s->fpcr_nep) { | ||
86 | + write_fp_sreg(s, reg, v); | ||
87 | + return; | ||
88 | + } | ||
89 | + | ||
90 | + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), | ||
91 | + vec_full_reg_offset(s, mergereg), | ||
92 | + 16, vec_full_reg_size(s)); | ||
93 | + tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); | ||
94 | +} | ||
95 | + | ||
96 | /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ | ||
97 | static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, | ||
98 | GVecGen2Fn *gvec_fn, int vece) | ||
99 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar { | ||
100 | } FPScalar; | ||
101 | |||
102 | static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
103 | - const FPScalar *f, | ||
104 | + const FPScalar *f, int mergereg, | ||
105 | ARMFPStatusFlavour fpsttype) | ||
106 | { | ||
107 | switch (a->esz) { | ||
108 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
109 | TCGv_i64 t0 = read_fp_dreg(s, a->rn); | ||
110 | TCGv_i64 t1 = read_fp_dreg(s, a->rm); | ||
111 | f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
112 | - write_fp_dreg(s, a->rd, t0); | ||
113 | + write_fp_dreg_merging(s, a->rd, mergereg, t0); | ||
153 | } | 114 | } |
154 | } | 115 | break; |
155 | 116 | case MO_32: | |
156 | if (event) { | 117 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, |
157 | /* Raise GPIO pin first if repeating a key */ | 118 | TCGv_i32 t0 = read_fp_sreg(s, a->rn); |
158 | - if (!(keycode & KEY_RELEASED) && (s->pressed_keys & event)) { | 119 | TCGv_i32 t1 = read_fp_sreg(s, a->rm); |
159 | + if (key->down && (s->pressed_keys & event)) { | 120 | f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype)); |
160 | for (i = 0; i <= 7; i++) { | 121 | - write_fp_sreg(s, a->rd, t0); |
161 | if (event & (1 << i)) { | 122 | + write_fp_sreg_merging(s, a->rd, mergereg, t0); |
162 | qemu_set_irq(s->out[i], 1); | ||
163 | @@ -XXX,XX +XXX,XX @@ static void musicpal_key_event(void *opaque, int keycode) | ||
164 | } | 123 | } |
165 | for (i = 0; i <= 7; i++) { | 124 | break; |
166 | if (event & (1 << i)) { | 125 | case MO_16: |
167 | - qemu_set_irq(s->out[i], !!(keycode & KEY_RELEASED)); | 126 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, |
168 | + qemu_set_irq(s->out[i], !key->down); | 127 | TCGv_i32 t0 = read_fp_hreg(s, a->rn); |
169 | } | 128 | TCGv_i32 t1 = read_fp_hreg(s, a->rm); |
129 | f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
130 | - write_fp_sreg(s, a->rd, t0); | ||
131 | + write_fp_hreg_merging(s, a->rd, mergereg, t0); | ||
170 | } | 132 | } |
171 | - if (keycode & KEY_RELEASED) { | 133 | break; |
172 | - s->pressed_keys &= ~event; | 134 | default: |
173 | - } else { | 135 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, |
174 | + if (key->down) { | 136 | return true; |
175 | s->pressed_keys |= event; | ||
176 | + } else { | ||
177 | + s->pressed_keys &= ~event; | ||
178 | } | ||
179 | } | ||
180 | - | ||
181 | - s->kbd_extended = 0; | ||
182 | } | 137 | } |
183 | 138 | ||
184 | static void musicpal_key_init(Object *obj) | 139 | -static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) |
185 | @@ -XXX,XX +XXX,XX @@ static void musicpal_key_init(Object *obj) | 140 | +static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, |
186 | DeviceState *dev = DEVICE(sbd); | 141 | + int mergereg) |
187 | musicpal_key_state *s = MUSICPAL_KEY(dev); | 142 | { |
188 | 143 | - return do_fp3_scalar_with_fpsttype(s, a, f, | |
189 | - s->kbd_extended = 0; | 144 | + return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, |
190 | s->pressed_keys = 0; | 145 | a->esz == MO_16 ? |
191 | 146 | FPST_A64_F16 : FPST_A64); | |
192 | qdev_init_gpio_out(dev, s->out, ARRAY_SIZE(s->out)); | ||
193 | +} | ||
194 | |||
195 | - qemu_add_kbd_event_handler(musicpal_key_event, s); | ||
196 | +static const QemuInputHandler musicpal_key_handler = { | ||
197 | + .name = "musicpal_key", | ||
198 | + .mask = INPUT_EVENT_MASK_KEY, | ||
199 | + .event = musicpal_key_event, | ||
200 | +}; | ||
201 | + | ||
202 | +static void musicpal_key_realize(DeviceState *dev, Error **errp) | ||
203 | +{ | ||
204 | + qemu_input_handler_register(dev, &musicpal_key_handler); | ||
205 | } | 147 | } |
206 | 148 | ||
207 | static const VMStateDescription musicpal_key_vmsd = { | 149 | -static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f) |
208 | .name = "musicpal_key", | 150 | +static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f, |
209 | - .version_id = 1, | 151 | + int mergereg) |
210 | - .minimum_version_id = 1, | 152 | { |
211 | + .version_id = 2, | 153 | - return do_fp3_scalar_with_fpsttype(s, a, f, select_ah_fpst(s, a->esz)); |
212 | + .minimum_version_id = 2, | 154 | + return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, |
213 | .fields = (const VMStateField[]) { | 155 | + select_ah_fpst(s, a->esz)); |
214 | - VMSTATE_UINT32(kbd_extended, musicpal_key_state), | ||
215 | VMSTATE_UINT32(pressed_keys, musicpal_key_state), | ||
216 | VMSTATE_END_OF_LIST() | ||
217 | } | ||
218 | @@ -XXX,XX +XXX,XX @@ static void musicpal_key_class_init(ObjectClass *klass, void *data) | ||
219 | DeviceClass *dc = DEVICE_CLASS(klass); | ||
220 | |||
221 | dc->vmsd = &musicpal_key_vmsd; | ||
222 | + dc->realize = musicpal_key_realize; | ||
223 | } | 156 | } |
224 | 157 | ||
225 | static const TypeInfo musicpal_key_info = { | 158 | static const FPScalar f_scalar_fadd = { |
159 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fadd = { | ||
160 | gen_helper_vfp_adds, | ||
161 | gen_helper_vfp_addd, | ||
162 | }; | ||
163 | -TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd) | ||
164 | +TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn) | ||
165 | |||
166 | static const FPScalar f_scalar_fsub = { | ||
167 | gen_helper_vfp_subh, | ||
168 | gen_helper_vfp_subs, | ||
169 | gen_helper_vfp_subd, | ||
170 | }; | ||
171 | -TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub) | ||
172 | +TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn) | ||
173 | |||
174 | static const FPScalar f_scalar_fdiv = { | ||
175 | gen_helper_vfp_divh, | ||
176 | gen_helper_vfp_divs, | ||
177 | gen_helper_vfp_divd, | ||
178 | }; | ||
179 | -TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv) | ||
180 | +TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn) | ||
181 | |||
182 | static const FPScalar f_scalar_fmul = { | ||
183 | gen_helper_vfp_mulh, | ||
184 | gen_helper_vfp_muls, | ||
185 | gen_helper_vfp_muld, | ||
186 | }; | ||
187 | -TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul) | ||
188 | +TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn) | ||
189 | |||
190 | static const FPScalar f_scalar_fmax = { | ||
191 | gen_helper_vfp_maxh, | ||
192 | gen_helper_vfp_maxs, | ||
193 | gen_helper_vfp_maxd, | ||
194 | }; | ||
195 | -TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax) | ||
196 | +TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax, a->rn) | ||
197 | |||
198 | static const FPScalar f_scalar_fmin = { | ||
199 | gen_helper_vfp_minh, | ||
200 | gen_helper_vfp_mins, | ||
201 | gen_helper_vfp_mind, | ||
202 | }; | ||
203 | -TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin) | ||
204 | +TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin, a->rn) | ||
205 | |||
206 | static const FPScalar f_scalar_fmaxnm = { | ||
207 | gen_helper_vfp_maxnumh, | ||
208 | gen_helper_vfp_maxnums, | ||
209 | gen_helper_vfp_maxnumd, | ||
210 | }; | ||
211 | -TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm) | ||
212 | +TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn) | ||
213 | |||
214 | static const FPScalar f_scalar_fminnm = { | ||
215 | gen_helper_vfp_minnumh, | ||
216 | gen_helper_vfp_minnums, | ||
217 | gen_helper_vfp_minnumd, | ||
218 | }; | ||
219 | -TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm) | ||
220 | +TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn) | ||
221 | |||
222 | static const FPScalar f_scalar_fmulx = { | ||
223 | gen_helper_advsimd_mulxh, | ||
224 | gen_helper_vfp_mulxs, | ||
225 | gen_helper_vfp_mulxd, | ||
226 | }; | ||
227 | -TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx) | ||
228 | +TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn) | ||
229 | |||
230 | static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
231 | { | ||
232 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fnmul = { | ||
233 | gen_fnmul_s, | ||
234 | gen_fnmul_d, | ||
235 | }; | ||
236 | -TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul) | ||
237 | +TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul, a->rn) | ||
238 | |||
239 | static const FPScalar f_scalar_fcmeq = { | ||
240 | gen_helper_advsimd_ceq_f16, | ||
241 | gen_helper_neon_ceq_f32, | ||
242 | gen_helper_neon_ceq_f64, | ||
243 | }; | ||
244 | -TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq) | ||
245 | +TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm) | ||
246 | |||
247 | static const FPScalar f_scalar_fcmge = { | ||
248 | gen_helper_advsimd_cge_f16, | ||
249 | gen_helper_neon_cge_f32, | ||
250 | gen_helper_neon_cge_f64, | ||
251 | }; | ||
252 | -TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge) | ||
253 | +TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm) | ||
254 | |||
255 | static const FPScalar f_scalar_fcmgt = { | ||
256 | gen_helper_advsimd_cgt_f16, | ||
257 | gen_helper_neon_cgt_f32, | ||
258 | gen_helper_neon_cgt_f64, | ||
259 | }; | ||
260 | -TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt) | ||
261 | +TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm) | ||
262 | |||
263 | static const FPScalar f_scalar_facge = { | ||
264 | gen_helper_advsimd_acge_f16, | ||
265 | gen_helper_neon_acge_f32, | ||
266 | gen_helper_neon_acge_f64, | ||
267 | }; | ||
268 | -TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge) | ||
269 | +TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm) | ||
270 | |||
271 | static const FPScalar f_scalar_facgt = { | ||
272 | gen_helper_advsimd_acgt_f16, | ||
273 | gen_helper_neon_acgt_f32, | ||
274 | gen_helper_neon_acgt_f64, | ||
275 | }; | ||
276 | -TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt) | ||
277 | +TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm) | ||
278 | |||
279 | static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
280 | { | ||
281 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fabd = { | ||
282 | gen_fabd_s, | ||
283 | gen_fabd_d, | ||
284 | }; | ||
285 | -TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd) | ||
286 | +TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd, a->rn) | ||
287 | |||
288 | static const FPScalar f_scalar_frecps = { | ||
289 | gen_helper_recpsf_f16, | ||
290 | gen_helper_recpsf_f32, | ||
291 | gen_helper_recpsf_f64, | ||
292 | }; | ||
293 | -TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps) | ||
294 | +TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps, a->rn) | ||
295 | |||
296 | static const FPScalar f_scalar_frsqrts = { | ||
297 | gen_helper_rsqrtsf_f16, | ||
298 | gen_helper_rsqrtsf_f32, | ||
299 | gen_helper_rsqrtsf_f64, | ||
300 | }; | ||
301 | -TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts) | ||
302 | +TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts, a->rn) | ||
303 | |||
304 | static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, | ||
305 | const FPScalar *f, bool swap) | ||
226 | -- | 306 | -- |
227 | 2.34.1 | 307 | 2.34.1 |
228 | |||
229 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle FPCR.NEP for the 3-input scalar operations which use | ||
2 | do_fmla_scalar_idx() and do_fmadd(), by making them call the | ||
3 | appropriate write_fp_*reg_merging() functions. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/translate-a64.c | 12 ++++++------ | ||
9 | 1 file changed, 6 insertions(+), 6 deletions(-) | ||
10 | |||
11 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/target/arm/tcg/translate-a64.c | ||
14 | +++ b/target/arm/tcg/translate-a64.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
16 | gen_vfp_negd(t1, t1); | ||
17 | } | ||
18 | gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | ||
19 | - write_fp_dreg(s, a->rd, t0); | ||
20 | + write_fp_dreg_merging(s, a->rd, a->rd, t0); | ||
21 | } | ||
22 | break; | ||
23 | case MO_32: | ||
24 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
25 | gen_vfp_negs(t1, t1); | ||
26 | } | ||
27 | gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | ||
28 | - write_fp_sreg(s, a->rd, t0); | ||
29 | + write_fp_sreg_merging(s, a->rd, a->rd, t0); | ||
30 | } | ||
31 | break; | ||
32 | case MO_16: | ||
33 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
34 | } | ||
35 | gen_helper_advsimd_muladdh(t0, t1, t2, t0, | ||
36 | fpstatus_ptr(FPST_A64_F16)); | ||
37 | - write_fp_sreg(s, a->rd, t0); | ||
38 | + write_fp_hreg_merging(s, a->rd, a->rd, t0); | ||
39 | } | ||
40 | break; | ||
41 | default: | ||
42 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
43 | } | ||
44 | fpst = fpstatus_ptr(FPST_A64); | ||
45 | gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); | ||
46 | - write_fp_dreg(s, a->rd, ta); | ||
47 | + write_fp_dreg_merging(s, a->rd, a->ra, ta); | ||
48 | } | ||
49 | break; | ||
50 | |||
51 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
52 | } | ||
53 | fpst = fpstatus_ptr(FPST_A64); | ||
54 | gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); | ||
55 | - write_fp_sreg(s, a->rd, ta); | ||
56 | + write_fp_sreg_merging(s, a->rd, a->ra, ta); | ||
57 | } | ||
58 | break; | ||
59 | |||
60 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
61 | } | ||
62 | fpst = fpstatus_ptr(FPST_A64_F16); | ||
63 | gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); | ||
64 | - write_fp_sreg(s, a->rd, ta); | ||
65 | + write_fp_hreg_merging(s, a->rd, a->ra, ta); | ||
66 | } | ||
67 | break; | ||
68 | |||
69 | -- | ||
70 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Guenter Roeck <linux@roeck-us.net> | 1 | Currently we implement BFCVT scalar via do_fp1_scalar(). This works |
---|---|---|---|
2 | even though BFCVT is a narrowing operation from 32 to 16 bits, | ||
3 | because we can use write_fp_sreg() for float16. However, FPCR.NEP | ||
4 | support requires that we use write_fp_hreg_merging() for float16 | ||
5 | outputs, so we can't continue to borrow the non-narrowing | ||
6 | do_fp1_scalar() function for this. Split out trans_BFCVT_s() | ||
7 | into its own implementation that honours FPCR.NEP. | ||
2 | 8 | ||
3 | Allwinner R40 supports two USB host ports shared between a USB 2.0 EHCI | 9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | host controller and a USB 1.1 OHCI host controller. Add support for both | 10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
5 | of them. | 11 | --- |
12 | target/arm/tcg/translate-a64.c | 25 +++++++++++++++++++++---- | ||
13 | 1 file changed, 21 insertions(+), 4 deletions(-) | ||
6 | 14 | ||
7 | If machine USB support is not enabled, create unimplemented devices | 15 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
8 | for the USB memory ranges to avoid crashes when booting Linux. | ||
9 | |||
10 | Signed-off-by: Guenter Roeck <linux@roeck-us.net> | ||
11 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
12 | Message-id: 20240115182757.1095012-2-linux@roeck-us.net | ||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
14 | --- | ||
15 | docs/system/arm/bananapi_m2u.rst | 2 +- | ||
16 | include/hw/arm/allwinner-r40.h | 9 ++++++ | ||
17 | hw/arm/allwinner-r40.c | 47 ++++++++++++++++++++++++++++++-- | ||
18 | hw/arm/Kconfig | 2 ++ | ||
19 | 4 files changed, 57 insertions(+), 3 deletions(-) | ||
20 | |||
21 | diff --git a/docs/system/arm/bananapi_m2u.rst b/docs/system/arm/bananapi_m2u.rst | ||
22 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
23 | --- a/docs/system/arm/bananapi_m2u.rst | 17 | --- a/target/arm/tcg/translate-a64.c |
24 | +++ b/docs/system/arm/bananapi_m2u.rst | 18 | +++ b/target/arm/tcg/translate-a64.c |
25 | @@ -XXX,XX +XXX,XX @@ The Banana Pi M2U machine supports the following devices: | 19 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frintx = { |
26 | * GMAC ethernet | ||
27 | * Clock Control Unit | ||
28 | * TWI (I2C) | ||
29 | + * USB 2.0 | ||
30 | |||
31 | Limitations | ||
32 | """"""""""" | ||
33 | @@ -XXX,XX +XXX,XX @@ Currently, Banana Pi M2U does *not* support the following features: | ||
34 | - Audio output | ||
35 | - Hardware Watchdog | ||
36 | - Real Time Clock | ||
37 | -- USB 2.0 interfaces | ||
38 | |||
39 | Also see the 'unimplemented' array in the Allwinner R40 SoC module | ||
40 | for a complete list of unimplemented I/O devices: ``./hw/arm/allwinner-r40.c`` | ||
41 | diff --git a/include/hw/arm/allwinner-r40.h b/include/hw/arm/allwinner-r40.h | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/include/hw/arm/allwinner-r40.h | ||
44 | +++ b/include/hw/arm/allwinner-r40.h | ||
45 | @@ -XXX,XX +XXX,XX @@ | ||
46 | #include "hw/i2c/allwinner-i2c.h" | ||
47 | #include "hw/net/allwinner_emac.h" | ||
48 | #include "hw/net/allwinner-sun8i-emac.h" | ||
49 | +#include "hw/usb/hcd-ohci.h" | ||
50 | +#include "hw/usb/hcd-ehci.h" | ||
51 | #include "target/arm/cpu.h" | ||
52 | #include "sysemu/block-backend.h" | ||
53 | |||
54 | @@ -XXX,XX +XXX,XX @@ enum { | ||
55 | AW_R40_DEV_MMC1, | ||
56 | AW_R40_DEV_MMC2, | ||
57 | AW_R40_DEV_MMC3, | ||
58 | + AW_R40_DEV_EHCI1, | ||
59 | + AW_R40_DEV_OHCI1, | ||
60 | + AW_R40_DEV_EHCI2, | ||
61 | + AW_R40_DEV_OHCI2, | ||
62 | AW_R40_DEV_CCU, | ||
63 | AW_R40_DEV_PIT, | ||
64 | AW_R40_DEV_UART0, | ||
65 | @@ -XXX,XX +XXX,XX @@ OBJECT_DECLARE_SIMPLE_TYPE(AwR40State, AW_R40) | ||
66 | * which are currently emulated by the R40 SoC code. | ||
67 | */ | ||
68 | #define AW_R40_NUM_MMCS 4 | ||
69 | +#define AW_R40_NUM_USB 2 | ||
70 | #define AW_R40_NUM_UARTS 8 | ||
71 | |||
72 | struct AwR40State { | ||
73 | @@ -XXX,XX +XXX,XX @@ struct AwR40State { | ||
74 | AwSRAMCState sramc; | ||
75 | AwA10PITState timer; | ||
76 | AwSdHostState mmc[AW_R40_NUM_MMCS]; | ||
77 | + EHCISysBusState ehci[AW_R40_NUM_USB]; | ||
78 | + OHCISysBusState ohci[AW_R40_NUM_USB]; | ||
79 | AwR40ClockCtlState ccu; | ||
80 | AwR40DramCtlState dramc; | ||
81 | AWI2CState i2c0; | ||
82 | diff --git a/hw/arm/allwinner-r40.c b/hw/arm/allwinner-r40.c | ||
83 | index XXXXXXX..XXXXXXX 100644 | ||
84 | --- a/hw/arm/allwinner-r40.c | ||
85 | +++ b/hw/arm/allwinner-r40.c | ||
86 | @@ -XXX,XX +XXX,XX @@ | ||
87 | #include "qemu/bswap.h" | ||
88 | #include "qemu/module.h" | ||
89 | #include "qemu/units.h" | ||
90 | +#include "hw/boards.h" | ||
91 | #include "hw/qdev-core.h" | ||
92 | #include "hw/sysbus.h" | ||
93 | #include "hw/char/serial.h" | ||
94 | @@ -XXX,XX +XXX,XX @@ const hwaddr allwinner_r40_memmap[] = { | ||
95 | [AW_R40_DEV_MMC1] = 0x01c10000, | ||
96 | [AW_R40_DEV_MMC2] = 0x01c11000, | ||
97 | [AW_R40_DEV_MMC3] = 0x01c12000, | ||
98 | + [AW_R40_DEV_EHCI1] = 0x01c19000, | ||
99 | + [AW_R40_DEV_OHCI1] = 0x01c19400, | ||
100 | + [AW_R40_DEV_EHCI2] = 0x01c1c000, | ||
101 | + [AW_R40_DEV_OHCI2] = 0x01c1c400, | ||
102 | [AW_R40_DEV_CCU] = 0x01c20000, | ||
103 | [AW_R40_DEV_PIT] = 0x01c20c00, | ||
104 | [AW_R40_DEV_UART0] = 0x01c28000, | ||
105 | @@ -XXX,XX +XXX,XX @@ static struct AwR40Unimplemented r40_unimplemented[] = { | ||
106 | { "crypto", 0x01c15000, 4 * KiB }, | ||
107 | { "spi2", 0x01c17000, 4 * KiB }, | ||
108 | { "sata", 0x01c18000, 4 * KiB }, | ||
109 | - { "usb1-host", 0x01c19000, 4 * KiB }, | ||
110 | + { "usb1-phy", 0x01c19800, 2 * KiB }, | ||
111 | { "sid", 0x01c1b000, 4 * KiB }, | ||
112 | - { "usb2-host", 0x01c1c000, 4 * KiB }, | ||
113 | + { "usb2-phy", 0x01c1c800, 2 * KiB }, | ||
114 | { "cs1", 0x01c1d000, 4 * KiB }, | ||
115 | { "spi3", 0x01c1f000, 4 * KiB }, | ||
116 | { "rtc", 0x01c20400, 1 * KiB }, | ||
117 | @@ -XXX,XX +XXX,XX @@ enum { | ||
118 | AW_R40_GIC_SPI_MMC2 = 34, | ||
119 | AW_R40_GIC_SPI_MMC3 = 35, | ||
120 | AW_R40_GIC_SPI_EMAC = 55, | ||
121 | + AW_R40_GIC_SPI_OHCI1 = 64, | ||
122 | + AW_R40_GIC_SPI_OHCI2 = 65, | ||
123 | + AW_R40_GIC_SPI_EHCI1 = 76, | ||
124 | + AW_R40_GIC_SPI_EHCI2 = 78, | ||
125 | AW_R40_GIC_SPI_GMAC = 85, | ||
126 | }; | 20 | }; |
127 | 21 | TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) | |
128 | @@ -XXX,XX +XXX,XX @@ static void allwinner_r40_init(Object *obj) | 22 | |
129 | TYPE_AW_SDHOST_SUN50I_A64); | 23 | -static const FPScalar1 f_scalar_bfcvt = { |
130 | } | 24 | - .gen_s = gen_helper_bfcvt, |
131 | 25 | -}; | |
132 | + for (size_t i = 0; i < AW_R40_NUM_USB; i++) { | 26 | -TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar_ah, a, &f_scalar_bfcvt, -1) |
133 | + object_initialize_child(obj, "ehci[*]", &s->ehci[i], | 27 | +static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a) |
134 | + TYPE_PLATFORM_EHCI); | 28 | +{ |
135 | + object_initialize_child(obj, "ohci[*]", &s->ohci[i], | 29 | + ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64; |
136 | + TYPE_SYSBUS_OHCI); | 30 | + TCGv_i32 t32; |
31 | + int check; | ||
32 | + | ||
33 | + if (!dc_isar_feature(aa64_bf16, s)) { | ||
34 | + return false; | ||
137 | + } | 35 | + } |
138 | + | 36 | + |
139 | object_initialize_child(obj, "twi0", &s->i2c0, TYPE_AW_I2C_SUN6I); | 37 | + check = fp_access_check_scalar_hsd(s, a->esz); |
140 | |||
141 | object_initialize_child(obj, "emac", &s->emac, TYPE_AW_EMAC); | ||
142 | @@ -XXX,XX +XXX,XX @@ static void allwinner_r40_realize(DeviceState *dev, Error **errp) | ||
143 | sysbus_realize(SYS_BUS_DEVICE(&s->ccu), &error_fatal); | ||
144 | sysbus_mmio_map(SYS_BUS_DEVICE(&s->ccu), 0, s->memmap[AW_R40_DEV_CCU]); | ||
145 | |||
146 | + /* USB */ | ||
147 | + for (size_t i = 0; i < AW_R40_NUM_USB; i++) { | ||
148 | + g_autofree char *bus = g_strdup_printf("usb-bus.%zu", i); | ||
149 | + | 38 | + |
150 | + object_property_set_bool(OBJECT(&s->ehci[i]), "companion-enable", true, | 39 | + if (check <= 0) { |
151 | + &error_fatal); | 40 | + return check == 0; |
152 | + sysbus_realize(SYS_BUS_DEVICE(&s->ehci[i]), &error_fatal); | ||
153 | + sysbus_mmio_map(SYS_BUS_DEVICE(&s->ehci[i]), 0, | ||
154 | + allwinner_r40_memmap[i ? AW_R40_DEV_EHCI2 | ||
155 | + : AW_R40_DEV_EHCI1]); | ||
156 | + sysbus_connect_irq(SYS_BUS_DEVICE(&s->ehci[i]), 0, | ||
157 | + qdev_get_gpio_in(DEVICE(&s->gic), | ||
158 | + i ? AW_R40_GIC_SPI_EHCI2 | ||
159 | + : AW_R40_GIC_SPI_EHCI1)); | ||
160 | + | ||
161 | + object_property_set_str(OBJECT(&s->ohci[i]), "masterbus", bus, | ||
162 | + &error_fatal); | ||
163 | + sysbus_realize(SYS_BUS_DEVICE(&s->ohci[i]), &error_fatal); | ||
164 | + sysbus_mmio_map(SYS_BUS_DEVICE(&s->ohci[i]), 0, | ||
165 | + allwinner_r40_memmap[i ? AW_R40_DEV_OHCI2 | ||
166 | + : AW_R40_DEV_OHCI1]); | ||
167 | + sysbus_connect_irq(SYS_BUS_DEVICE(&s->ohci[i]), 0, | ||
168 | + qdev_get_gpio_in(DEVICE(&s->gic), | ||
169 | + i ? AW_R40_GIC_SPI_OHCI2 | ||
170 | + : AW_R40_GIC_SPI_OHCI1)); | ||
171 | + } | 41 | + } |
172 | + | 42 | + |
173 | /* SD/MMC */ | 43 | + t32 = read_fp_sreg(s, a->rn); |
174 | for (int i = 0; i < AW_R40_NUM_MMCS; i++) { | 44 | + gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype)); |
175 | qemu_irq irq = qdev_get_gpio_in(DEVICE(&s->gic), | 45 | + write_fp_hreg_merging(s, a->rd, a->rd, t32); |
176 | diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig | 46 | + return true; |
177 | index XXXXXXX..XXXXXXX 100644 | 47 | +} |
178 | --- a/hw/arm/Kconfig | 48 | |
179 | +++ b/hw/arm/Kconfig | 49 | static const FPScalar1 f_scalar_frint32 = { |
180 | @@ -XXX,XX +XXX,XX @@ config ALLWINNER_R40 | 50 | NULL, |
181 | select ARM_TIMER | ||
182 | select ARM_GIC | ||
183 | select UNIMP | ||
184 | + select USB_OHCI | ||
185 | + select USB_EHCI_SYSBUS | ||
186 | select SD | ||
187 | |||
188 | config RASPI | ||
189 | -- | 51 | -- |
190 | 2.34.1 | 52 | 2.34.1 |
191 | |||
192 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle FPCR.NEP for the 1-input scalar operations. | ||
1 | 2 | ||
3 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | target/arm/tcg/translate-a64.c | 26 ++++++++++++++------------ | ||
7 | 1 file changed, 14 insertions(+), 12 deletions(-) | ||
8 | |||
9 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/target/arm/tcg/translate-a64.c | ||
12 | +++ b/target/arm/tcg/translate-a64.c | ||
13 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a, | ||
14 | case MO_64: | ||
15 | t64 = read_fp_dreg(s, a->rn); | ||
16 | f->gen_d(t64, t64, fpst); | ||
17 | - write_fp_dreg(s, a->rd, t64); | ||
18 | + write_fp_dreg_merging(s, a->rd, a->rd, t64); | ||
19 | break; | ||
20 | case MO_32: | ||
21 | t32 = read_fp_sreg(s, a->rn); | ||
22 | f->gen_s(t32, t32, fpst); | ||
23 | - write_fp_sreg(s, a->rd, t32); | ||
24 | + write_fp_sreg_merging(s, a->rd, a->rd, t32); | ||
25 | break; | ||
26 | case MO_16: | ||
27 | t32 = read_fp_hreg(s, a->rn); | ||
28 | f->gen_h(t32, t32, fpst); | ||
29 | - write_fp_sreg(s, a->rd, t32); | ||
30 | + write_fp_hreg_merging(s, a->rd, a->rd, t32); | ||
31 | break; | ||
32 | default: | ||
33 | g_assert_not_reached(); | ||
34 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) | ||
35 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
36 | |||
37 | gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst); | ||
38 | - write_fp_dreg(s, a->rd, tcg_rd); | ||
39 | + write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); | ||
40 | } | ||
41 | return true; | ||
42 | } | ||
43 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a) | ||
44 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
45 | |||
46 | gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); | ||
47 | - /* write_fp_sreg is OK here because top half of result is zero */ | ||
48 | - write_fp_sreg(s, a->rd, tmp); | ||
49 | + /* write_fp_hreg_merging is OK here because top half of result is zero */ | ||
50 | + write_fp_hreg_merging(s, a->rd, a->rd, tmp); | ||
51 | } | ||
52 | return true; | ||
53 | } | ||
54 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a) | ||
55 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
56 | |||
57 | gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst); | ||
58 | - write_fp_sreg(s, a->rd, tcg_rd); | ||
59 | + write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); | ||
60 | } | ||
61 | return true; | ||
62 | } | ||
63 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a) | ||
64 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
65 | |||
66 | gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); | ||
67 | - /* write_fp_sreg is OK here because top half of tcg_rd is zero */ | ||
68 | - write_fp_sreg(s, a->rd, tcg_rd); | ||
69 | + /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */ | ||
70 | + write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd); | ||
71 | } | ||
72 | return true; | ||
73 | } | ||
74 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a) | ||
75 | TCGv_i32 tcg_ahp = get_ahp_flag(); | ||
76 | |||
77 | gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); | ||
78 | - write_fp_sreg(s, a->rd, tcg_rd); | ||
79 | + write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); | ||
80 | } | ||
81 | return true; | ||
82 | } | ||
83 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a) | ||
84 | TCGv_i32 tcg_ahp = get_ahp_flag(); | ||
85 | |||
86 | gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); | ||
87 | - write_fp_dreg(s, a->rd, tcg_rd); | ||
88 | + write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); | ||
89 | } | ||
90 | return true; | ||
91 | } | ||
92 | @@ -XXX,XX +XXX,XX @@ static bool do_fcvt_f(DisasContext *s, arg_fcvt *a, | ||
93 | do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0), | ||
94 | a->esz, tcg_int, a->shift, a->rn, rmode); | ||
95 | |||
96 | - clear_vec(s, a->rd); | ||
97 | + if (!s->fpcr_nep) { | ||
98 | + clear_vec(s, a->rd); | ||
99 | + } | ||
100 | write_vec_element(s, tcg_int, a->rd, 0, a->esz); | ||
101 | return true; | ||
102 | } | ||
103 | -- | ||
104 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle FPCR.NEP in the operations handled by do_cvtf_scalar(). | ||
1 | 2 | ||
3 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | target/arm/tcg/translate-a64.c | 6 +++--- | ||
7 | 1 file changed, 3 insertions(+), 3 deletions(-) | ||
8 | |||
9 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/target/arm/tcg/translate-a64.c | ||
12 | +++ b/target/arm/tcg/translate-a64.c | ||
13 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, | ||
14 | } else { | ||
15 | gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); | ||
16 | } | ||
17 | - write_fp_dreg(s, rd, tcg_double); | ||
18 | + write_fp_dreg_merging(s, rd, rd, tcg_double); | ||
19 | break; | ||
20 | |||
21 | case MO_32: | ||
22 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, | ||
23 | } else { | ||
24 | gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); | ||
25 | } | ||
26 | - write_fp_sreg(s, rd, tcg_single); | ||
27 | + write_fp_sreg_merging(s, rd, rd, tcg_single); | ||
28 | break; | ||
29 | |||
30 | case MO_16: | ||
31 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, | ||
32 | } else { | ||
33 | gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); | ||
34 | } | ||
35 | - write_fp_sreg(s, rd, tcg_single); | ||
36 | + write_fp_hreg_merging(s, rd, rd, tcg_single); | ||
37 | break; | ||
38 | |||
39 | default: | ||
40 | -- | ||
41 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle FPCR.NEP merging for scalar FABS and FNEG; this requires | ||
2 | an extra parameter to do_fp1_scalar_int(), since FMOV scalar | ||
3 | does not have the merging behaviour. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/translate-a64.c | 27 ++++++++++++++++++++------- | ||
9 | 1 file changed, 20 insertions(+), 7 deletions(-) | ||
10 | |||
11 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/target/arm/tcg/translate-a64.c | ||
14 | +++ b/target/arm/tcg/translate-a64.c | ||
15 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar1Int { | ||
16 | } FPScalar1Int; | ||
17 | |||
18 | static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, | ||
19 | - const FPScalar1Int *f) | ||
20 | + const FPScalar1Int *f, | ||
21 | + bool merging) | ||
22 | { | ||
23 | switch (a->esz) { | ||
24 | case MO_64: | ||
25 | if (fp_access_check(s)) { | ||
26 | TCGv_i64 t = read_fp_dreg(s, a->rn); | ||
27 | f->gen_d(t, t); | ||
28 | - write_fp_dreg(s, a->rd, t); | ||
29 | + if (merging) { | ||
30 | + write_fp_dreg_merging(s, a->rd, a->rd, t); | ||
31 | + } else { | ||
32 | + write_fp_dreg(s, a->rd, t); | ||
33 | + } | ||
34 | } | ||
35 | break; | ||
36 | case MO_32: | ||
37 | if (fp_access_check(s)) { | ||
38 | TCGv_i32 t = read_fp_sreg(s, a->rn); | ||
39 | f->gen_s(t, t); | ||
40 | - write_fp_sreg(s, a->rd, t); | ||
41 | + if (merging) { | ||
42 | + write_fp_sreg_merging(s, a->rd, a->rd, t); | ||
43 | + } else { | ||
44 | + write_fp_sreg(s, a->rd, t); | ||
45 | + } | ||
46 | } | ||
47 | break; | ||
48 | case MO_16: | ||
49 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, | ||
50 | if (fp_access_check(s)) { | ||
51 | TCGv_i32 t = read_fp_hreg(s, a->rn); | ||
52 | f->gen_h(t, t); | ||
53 | - write_fp_sreg(s, a->rd, t); | ||
54 | + if (merging) { | ||
55 | + write_fp_hreg_merging(s, a->rd, a->rd, t); | ||
56 | + } else { | ||
57 | + write_fp_sreg(s, a->rd, t); | ||
58 | + } | ||
59 | } | ||
60 | break; | ||
61 | default: | ||
62 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fmov = { | ||
63 | tcg_gen_mov_i32, | ||
64 | tcg_gen_mov_i64, | ||
65 | }; | ||
66 | -TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov) | ||
67 | +TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false) | ||
68 | |||
69 | static const FPScalar1Int f_scalar_fabs = { | ||
70 | gen_vfp_absh, | ||
71 | gen_vfp_abss, | ||
72 | gen_vfp_absd, | ||
73 | }; | ||
74 | -TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs) | ||
75 | +TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs, true) | ||
76 | |||
77 | static const FPScalar1Int f_scalar_fneg = { | ||
78 | gen_vfp_negh, | ||
79 | gen_vfp_negs, | ||
80 | gen_vfp_negd, | ||
81 | }; | ||
82 | -TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg) | ||
83 | +TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg, true) | ||
84 | |||
85 | typedef struct FPScalar1 { | ||
86 | void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); | ||
87 | -- | ||
88 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Unlike the other users of do_2misc_narrow_scalar(), FCVTXN (scalar) | ||
2 | is always double-to-single and must honour FPCR.NEP. Implement this | ||
3 | directly in a trans function rather than using | ||
4 | do_2misc_narrow_scalar(). | ||
1 | 5 | ||
6 | We still need gen_fcvtxn_sd() and the f_scalar_fcvtxn[] array for | ||
7 | the FCVTXN (vector) insn, so we move those down in the file to | ||
8 | where they are used. | ||
9 | |||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | ||
13 | target/arm/tcg/translate-a64.c | 43 ++++++++++++++++++++++------------ | ||
14 | 1 file changed, 28 insertions(+), 15 deletions(-) | ||
15 | |||
16 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/arm/tcg/translate-a64.c | ||
19 | +++ b/target/arm/tcg/translate-a64.c | ||
20 | @@ -XXX,XX +XXX,XX @@ static ArithOneOp * const f_scalar_uqxtn[] = { | ||
21 | }; | ||
22 | TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn) | ||
23 | |||
24 | -static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) | ||
25 | +static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a) | ||
26 | { | ||
27 | - /* | ||
28 | - * 64 bit to 32 bit float conversion | ||
29 | - * with von Neumann rounding (round to odd) | ||
30 | - */ | ||
31 | - TCGv_i32 tmp = tcg_temp_new_i32(); | ||
32 | - gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); | ||
33 | - tcg_gen_extu_i32_i64(d, tmp); | ||
34 | + if (fp_access_check(s)) { | ||
35 | + /* | ||
36 | + * 64 bit to 32 bit float conversion | ||
37 | + * with von Neumann rounding (round to odd) | ||
38 | + */ | ||
39 | + TCGv_i64 src = read_fp_dreg(s, a->rn); | ||
40 | + TCGv_i32 dst = tcg_temp_new_i32(); | ||
41 | + gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64)); | ||
42 | + write_fp_sreg_merging(s, a->rd, a->rd, dst); | ||
43 | + } | ||
44 | + return true; | ||
45 | } | ||
46 | |||
47 | -static ArithOneOp * const f_scalar_fcvtxn[] = { | ||
48 | - NULL, | ||
49 | - NULL, | ||
50 | - gen_fcvtxn_sd, | ||
51 | -}; | ||
52 | -TRANS(FCVTXN_s, do_2misc_narrow_scalar, a, f_scalar_fcvtxn) | ||
53 | - | ||
54 | #undef WRAP_ENV | ||
55 | |||
56 | static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) | ||
57 | @@ -XXX,XX +XXX,XX @@ static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n) | ||
58 | tcg_gen_extu_i32_i64(d, tmp); | ||
59 | } | ||
60 | |||
61 | +static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) | ||
62 | +{ | ||
63 | + /* | ||
64 | + * 64 bit to 32 bit float conversion | ||
65 | + * with von Neumann rounding (round to odd) | ||
66 | + */ | ||
67 | + TCGv_i32 tmp = tcg_temp_new_i32(); | ||
68 | + gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); | ||
69 | + tcg_gen_extu_i32_i64(d, tmp); | ||
70 | +} | ||
71 | + | ||
72 | static ArithOneOp * const f_vector_fcvtn[] = { | ||
73 | NULL, | ||
74 | gen_fcvtn_hs, | ||
75 | gen_fcvtn_sd, | ||
76 | }; | ||
77 | +static ArithOneOp * const f_scalar_fcvtxn[] = { | ||
78 | + NULL, | ||
79 | + NULL, | ||
80 | + gen_fcvtxn_sd, | ||
81 | +}; | ||
82 | TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn) | ||
83 | TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn) | ||
84 | |||
85 | -- | ||
86 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | do_fp3_scalar_idx() is used only for the FMUL and FMULX scalar by | ||
2 | element instructions; these both need to merge the result with the Rn | ||
3 | register when FPCR.NEP is set. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/translate-a64.c | 6 +++--- | ||
9 | 1 file changed, 3 insertions(+), 3 deletions(-) | ||
10 | |||
11 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/target/arm/tcg/translate-a64.c | ||
14 | +++ b/target/arm/tcg/translate-a64.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) | ||
16 | |||
17 | read_vec_element(s, t1, a->rm, a->idx, MO_64); | ||
18 | f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); | ||
19 | - write_fp_dreg(s, a->rd, t0); | ||
20 | + write_fp_dreg_merging(s, a->rd, a->rn, t0); | ||
21 | } | ||
22 | break; | ||
23 | case MO_32: | ||
24 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) | ||
25 | |||
26 | read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); | ||
27 | f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); | ||
28 | - write_fp_sreg(s, a->rd, t0); | ||
29 | + write_fp_sreg_merging(s, a->rd, a->rn, t0); | ||
30 | } | ||
31 | break; | ||
32 | case MO_16: | ||
33 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) | ||
34 | |||
35 | read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); | ||
36 | f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); | ||
37 | - write_fp_sreg(s, a->rd, t0); | ||
38 | + write_fp_hreg_merging(s, a->rd, a->rn, t0); | ||
39 | } | ||
40 | break; | ||
41 | default: | ||
42 | -- | ||
43 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | When FPCR.AH == 1, floating point FMIN and FMAX have some odd special | ||
2 | cases: | ||
1 | 3 | ||
4 | * comparing two zeroes (even of different sign) or comparing a NaN | ||
5 | with anything always returns the second argument (possibly | ||
6 | squashed to zero) | ||
7 | * denormal outputs are not squashed to zero regardless of FZ or FZ16 | ||
8 | |||
9 | Implement these semantics in new helper functions and select them at | ||
10 | translate time if FPCR.AH is 1 for the scalar FMAX and FMIN insns. | ||
11 | (We will convert the other FMAX and FMIN insns in subsequent | ||
12 | commits.) | ||
13 | |||
14 | Note that FMINNM and FMAXNM are not affected. | ||
15 | |||
16 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
17 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
18 | --- | ||
19 | target/arm/tcg/helper-a64.h | 7 +++++++ | ||
20 | target/arm/tcg/helper-a64.c | 36 ++++++++++++++++++++++++++++++++++ | ||
21 | target/arm/tcg/translate-a64.c | 23 ++++++++++++++++++++-- | ||
22 | 3 files changed, 64 insertions(+), 2 deletions(-) | ||
23 | |||
24 | diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/target/arm/tcg/helper-a64.h | ||
27 | +++ b/target/arm/tcg/helper-a64.h | ||
28 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(advsimd_muladd2h, i32, i32, i32, i32, fpst) | ||
29 | DEF_HELPER_2(advsimd_rinth_exact, f16, f16, fpst) | ||
30 | DEF_HELPER_2(advsimd_rinth, f16, f16, fpst) | ||
31 | |||
32 | +DEF_HELPER_3(vfp_ah_minh, f16, f16, f16, fpst) | ||
33 | +DEF_HELPER_3(vfp_ah_mins, f32, f32, f32, fpst) | ||
34 | +DEF_HELPER_3(vfp_ah_mind, f64, f64, f64, fpst) | ||
35 | +DEF_HELPER_3(vfp_ah_maxh, f16, f16, f16, fpst) | ||
36 | +DEF_HELPER_3(vfp_ah_maxs, f32, f32, f32, fpst) | ||
37 | +DEF_HELPER_3(vfp_ah_maxd, f64, f64, f64, fpst) | ||
38 | + | ||
39 | DEF_HELPER_2(exception_return, void, env, i64) | ||
40 | DEF_HELPER_FLAGS_2(dc_zva, TCG_CALL_NO_WG, void, env, i64) | ||
41 | |||
42 | diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/arm/tcg/helper-a64.c | ||
45 | +++ b/target/arm/tcg/helper-a64.c | ||
46 | @@ -XXX,XX +XXX,XX @@ float32 HELPER(fcvtx_f64_to_f32)(float64 a, float_status *fpst) | ||
47 | return r; | ||
48 | } | ||
49 | |||
50 | +/* | ||
51 | + * AH=1 min/max have some odd special cases: | ||
52 | + * comparing two zeroes (regardless of sign), (NaN, anything), | ||
53 | + * or (anything, NaN) should return the second argument (possibly | ||
54 | + * squashed to zero). | ||
55 | + * Also, denormal outputs are not squashed to zero regardless of FZ or FZ16. | ||
56 | + */ | ||
57 | +#define AH_MINMAX_HELPER(NAME, CTYPE, FLOATTYPE, MINMAX) \ | ||
58 | + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ | ||
59 | + { \ | ||
60 | + bool save; \ | ||
61 | + CTYPE r; \ | ||
62 | + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ | ||
63 | + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ | ||
64 | + if (FLOATTYPE ## _is_zero(a) && FLOATTYPE ## _is_zero(b)) { \ | ||
65 | + return b; \ | ||
66 | + } \ | ||
67 | + if (FLOATTYPE ## _is_any_nan(a) || \ | ||
68 | + FLOATTYPE ## _is_any_nan(b)) { \ | ||
69 | + float_raise(float_flag_invalid, fpst); \ | ||
70 | + return b; \ | ||
71 | + } \ | ||
72 | + save = get_flush_to_zero(fpst); \ | ||
73 | + set_flush_to_zero(false, fpst); \ | ||
74 | + r = FLOATTYPE ## _ ## MINMAX(a, b, fpst); \ | ||
75 | + set_flush_to_zero(save, fpst); \ | ||
76 | + return r; \ | ||
77 | + } | ||
78 | + | ||
79 | +AH_MINMAX_HELPER(vfp_ah_minh, dh_ctype_f16, float16, min) | ||
80 | +AH_MINMAX_HELPER(vfp_ah_mins, float32, float32, min) | ||
81 | +AH_MINMAX_HELPER(vfp_ah_mind, float64, float64, min) | ||
82 | +AH_MINMAX_HELPER(vfp_ah_maxh, dh_ctype_f16, float16, max) | ||
83 | +AH_MINMAX_HELPER(vfp_ah_maxs, float32, float32, max) | ||
84 | +AH_MINMAX_HELPER(vfp_ah_maxd, float64, float64, max) | ||
85 | + | ||
86 | /* 64-bit versions of the CRC helpers. Note that although the operation | ||
87 | * (and the prototypes of crc32c() and crc32() mean that only the bottom | ||
88 | * 32 bits of the accumulator and result are used, we pass and return | ||
89 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
90 | index XXXXXXX..XXXXXXX 100644 | ||
91 | --- a/target/arm/tcg/translate-a64.c | ||
92 | +++ b/target/arm/tcg/translate-a64.c | ||
93 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
94 | select_ah_fpst(s, a->esz)); | ||
95 | } | ||
96 | |||
97 | +/* Some insns need to call different helpers when FPCR.AH == 1 */ | ||
98 | +static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a, | ||
99 | + const FPScalar *fnormal, | ||
100 | + const FPScalar *fah, | ||
101 | + int mergereg) | ||
102 | +{ | ||
103 | + return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg); | ||
104 | +} | ||
105 | + | ||
106 | static const FPScalar f_scalar_fadd = { | ||
107 | gen_helper_vfp_addh, | ||
108 | gen_helper_vfp_adds, | ||
109 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fmax = { | ||
110 | gen_helper_vfp_maxs, | ||
111 | gen_helper_vfp_maxd, | ||
112 | }; | ||
113 | -TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax, a->rn) | ||
114 | +static const FPScalar f_scalar_fmax_ah = { | ||
115 | + gen_helper_vfp_ah_maxh, | ||
116 | + gen_helper_vfp_ah_maxs, | ||
117 | + gen_helper_vfp_ah_maxd, | ||
118 | +}; | ||
119 | +TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn) | ||
120 | |||
121 | static const FPScalar f_scalar_fmin = { | ||
122 | gen_helper_vfp_minh, | ||
123 | gen_helper_vfp_mins, | ||
124 | gen_helper_vfp_mind, | ||
125 | }; | ||
126 | -TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin, a->rn) | ||
127 | +static const FPScalar f_scalar_fmin_ah = { | ||
128 | + gen_helper_vfp_ah_minh, | ||
129 | + gen_helper_vfp_ah_mins, | ||
130 | + gen_helper_vfp_ah_mind, | ||
131 | +}; | ||
132 | +TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn) | ||
133 | |||
134 | static const FPScalar f_scalar_fmaxnm = { | ||
135 | gen_helper_vfp_maxnumh, | ||
136 | -- | ||
137 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH == 1 semantics for vector FMIN/FMAX, by | ||
2 | creating new _ah_ versions of the gvec helpers which invoke the | ||
3 | scalar fmin_ah and fmax_ah helpers on each element. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
9 | target/arm/tcg/translate-a64.c | 21 +++++++++++++++++++-- | ||
10 | target/arm/tcg/vec_helper.c | 8 ++++++++ | ||
11 | 3 files changed, 41 insertions(+), 2 deletions(-) | ||
12 | |||
13 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/arm/tcg/helper-sve.h | ||
16 | +++ b/target/arm/tcg/helper-sve.h | ||
17 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_rsqrts_s, TCG_CALL_NO_RWG, | ||
18 | DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG, | ||
19 | void, ptr, ptr, ptr, fpst, i32) | ||
20 | |||
21 | +DEF_HELPER_FLAGS_5(gvec_ah_fmax_h, TCG_CALL_NO_RWG, | ||
22 | + void, ptr, ptr, ptr, fpst, i32) | ||
23 | +DEF_HELPER_FLAGS_5(gvec_ah_fmax_s, TCG_CALL_NO_RWG, | ||
24 | + void, ptr, ptr, ptr, fpst, i32) | ||
25 | +DEF_HELPER_FLAGS_5(gvec_ah_fmax_d, TCG_CALL_NO_RWG, | ||
26 | + void, ptr, ptr, ptr, fpst, i32) | ||
27 | + | ||
28 | +DEF_HELPER_FLAGS_5(gvec_ah_fmin_h, TCG_CALL_NO_RWG, | ||
29 | + void, ptr, ptr, ptr, fpst, i32) | ||
30 | +DEF_HELPER_FLAGS_5(gvec_ah_fmin_s, TCG_CALL_NO_RWG, | ||
31 | + void, ptr, ptr, ptr, fpst, i32) | ||
32 | +DEF_HELPER_FLAGS_5(gvec_ah_fmin_d, TCG_CALL_NO_RWG, | ||
33 | + void, ptr, ptr, ptr, fpst, i32) | ||
34 | + | ||
35 | DEF_HELPER_FLAGS_4(sve_faddv_h, TCG_CALL_NO_RWG, | ||
36 | i64, ptr, ptr, fpst, i32) | ||
37 | DEF_HELPER_FLAGS_4(sve_faddv_s, TCG_CALL_NO_RWG, | ||
38 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/target/arm/tcg/translate-a64.c | ||
41 | +++ b/target/arm/tcg/translate-a64.c | ||
42 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | ||
43 | FPST_A64_F16 : FPST_A64); | ||
44 | } | ||
45 | |||
46 | +static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, | ||
47 | + gen_helper_gvec_3_ptr * const fnormal[3], | ||
48 | + gen_helper_gvec_3_ptr * const fah[3]) | ||
49 | +{ | ||
50 | + return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal); | ||
51 | +} | ||
52 | + | ||
53 | static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data, | ||
54 | gen_helper_gvec_3_ptr * const f[3]) | ||
55 | { | ||
56 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { | ||
57 | gen_helper_gvec_fmax_s, | ||
58 | gen_helper_gvec_fmax_d, | ||
59 | }; | ||
60 | -TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax) | ||
61 | +static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = { | ||
62 | + gen_helper_gvec_ah_fmax_h, | ||
63 | + gen_helper_gvec_ah_fmax_s, | ||
64 | + gen_helper_gvec_ah_fmax_d, | ||
65 | +}; | ||
66 | +TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah) | ||
67 | |||
68 | static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { | ||
69 | gen_helper_gvec_fmin_h, | ||
70 | gen_helper_gvec_fmin_s, | ||
71 | gen_helper_gvec_fmin_d, | ||
72 | }; | ||
73 | -TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin) | ||
74 | +static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = { | ||
75 | + gen_helper_gvec_ah_fmin_h, | ||
76 | + gen_helper_gvec_ah_fmin_s, | ||
77 | + gen_helper_gvec_ah_fmin_d, | ||
78 | +}; | ||
79 | +TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah) | ||
80 | |||
81 | static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { | ||
82 | gen_helper_gvec_fmaxnum_h, | ||
83 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
84 | index XXXXXXX..XXXXXXX 100644 | ||
85 | --- a/target/arm/tcg/vec_helper.c | ||
86 | +++ b/target/arm/tcg/vec_helper.c | ||
87 | @@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16) | ||
88 | DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32) | ||
89 | DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64) | ||
90 | |||
91 | +DO_3OP(gvec_ah_fmax_h, helper_vfp_ah_maxh, float16) | ||
92 | +DO_3OP(gvec_ah_fmax_s, helper_vfp_ah_maxs, float32) | ||
93 | +DO_3OP(gvec_ah_fmax_d, helper_vfp_ah_maxd, float64) | ||
94 | + | ||
95 | +DO_3OP(gvec_ah_fmin_h, helper_vfp_ah_minh, float16) | ||
96 | +DO_3OP(gvec_ah_fmin_s, helper_vfp_ah_mins, float32) | ||
97 | +DO_3OP(gvec_ah_fmin_d, helper_vfp_ah_mind, float64) | ||
98 | + | ||
99 | #endif | ||
100 | #undef DO_3OP | ||
101 | |||
102 | -- | ||
103 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for FMAXV and FMINV. These are the | ||
2 | "recursively reduce all lanes of a vector to a scalar result" insns; | ||
3 | we just need to use the _ah_ helper for the reduction step when | ||
4 | FPCR.AH == 1. | ||
1 | 5 | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | --- | ||
9 | target/arm/tcg/translate-a64.c | 28 ++++++++++++++++++---------- | ||
10 | 1 file changed, 18 insertions(+), 10 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/translate-a64.c | ||
15 | +++ b/target/arm/tcg/translate-a64.c | ||
16 | @@ -XXX,XX +XXX,XX @@ static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz, | ||
17 | } | ||
18 | |||
19 | static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, | ||
20 | - NeonGenTwoSingleOpFn *fn) | ||
21 | + NeonGenTwoSingleOpFn *fnormal, | ||
22 | + NeonGenTwoSingleOpFn *fah) | ||
23 | { | ||
24 | if (fp_access_check(s)) { | ||
25 | MemOp esz = a->esz; | ||
26 | int elts = (a->q ? 16 : 8) >> esz; | ||
27 | TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
28 | - TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn); | ||
29 | + TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, | ||
30 | + s->fpcr_ah ? fah : fnormal); | ||
31 | write_fp_sreg(s, a->rd, res); | ||
32 | } | ||
33 | return true; | ||
34 | } | ||
35 | |||
36 | -TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxnumh) | ||
37 | -TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minnumh) | ||
38 | -TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxh) | ||
39 | -TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minh) | ||
40 | +TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, | ||
41 | + gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh) | ||
42 | +TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, | ||
43 | + gen_helper_vfp_minnumh, gen_helper_vfp_minnumh) | ||
44 | +TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, | ||
45 | + gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh) | ||
46 | +TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, | ||
47 | + gen_helper_vfp_minh, gen_helper_vfp_ah_minh) | ||
48 | |||
49 | -TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums) | ||
50 | -TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums) | ||
51 | -TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs) | ||
52 | -TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins) | ||
53 | +TRANS(FMAXNMV_s, do_fp_reduction, a, | ||
54 | + gen_helper_vfp_maxnums, gen_helper_vfp_maxnums) | ||
55 | +TRANS(FMINNMV_s, do_fp_reduction, a, | ||
56 | + gen_helper_vfp_minnums, gen_helper_vfp_minnums) | ||
57 | +TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs) | ||
58 | +TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins) | ||
59 | |||
60 | /* | ||
61 | * Floating-point Immediate | ||
62 | -- | ||
63 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the pairwise floating | ||
2 | point minimum/maximum insns FMINP and FMAXP. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
8 | target/arm/tcg/translate-a64.c | 25 +++++++++++++++++++++---- | ||
9 | target/arm/tcg/vec_helper.c | 10 ++++++++++ | ||
10 | 3 files changed, 45 insertions(+), 4 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_ah_fmin_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_5(gvec_ah_fmin_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_5(gvec_ah_fminp_h, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_5(gvec_ah_fminp_s, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_5(gvec_ah_fminp_d, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_4(sve_faddv_h, TCG_CALL_NO_RWG, | ||
35 | i64, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_4(sve_faddv_s, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/translate-a64.c | ||
40 | +++ b/target/arm/tcg/translate-a64.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { | ||
42 | gen_helper_gvec_fmaxp_s, | ||
43 | gen_helper_gvec_fmaxp_d, | ||
44 | }; | ||
45 | -TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp) | ||
46 | +static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = { | ||
47 | + gen_helper_gvec_ah_fmaxp_h, | ||
48 | + gen_helper_gvec_ah_fmaxp_s, | ||
49 | + gen_helper_gvec_ah_fmaxp_d, | ||
50 | +}; | ||
51 | +TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp) | ||
52 | |||
53 | static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { | ||
54 | gen_helper_gvec_fminp_h, | ||
55 | gen_helper_gvec_fminp_s, | ||
56 | gen_helper_gvec_fminp_d, | ||
57 | }; | ||
58 | -TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp) | ||
59 | +static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = { | ||
60 | + gen_helper_gvec_ah_fminp_h, | ||
61 | + gen_helper_gvec_ah_fminp_s, | ||
62 | + gen_helper_gvec_ah_fminp_d, | ||
63 | +}; | ||
64 | +TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp) | ||
65 | |||
66 | static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { | ||
67 | gen_helper_gvec_fmaxnump_h, | ||
68 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) | ||
69 | return true; | ||
70 | } | ||
71 | |||
72 | +static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a, | ||
73 | + const FPScalar *fnormal, | ||
74 | + const FPScalar *fah) | ||
75 | +{ | ||
76 | + return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal); | ||
77 | +} | ||
78 | + | ||
79 | TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) | ||
80 | -TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax) | ||
81 | -TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin) | ||
82 | +TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah) | ||
83 | +TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah) | ||
84 | TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) | ||
85 | TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) | ||
86 | |||
87 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
88 | index XXXXXXX..XXXXXXX 100644 | ||
89 | --- a/target/arm/tcg/vec_helper.c | ||
90 | +++ b/target/arm/tcg/vec_helper.c | ||
91 | @@ -XXX,XX +XXX,XX @@ DO_3OP_PAIR(gvec_fminnump_h, float16_minnum, float16, H2) | ||
92 | DO_3OP_PAIR(gvec_fminnump_s, float32_minnum, float32, H4) | ||
93 | DO_3OP_PAIR(gvec_fminnump_d, float64_minnum, float64, ) | ||
94 | |||
95 | +#ifdef TARGET_AARCH64 | ||
96 | +DO_3OP_PAIR(gvec_ah_fmaxp_h, helper_vfp_ah_maxh, float16, H2) | ||
97 | +DO_3OP_PAIR(gvec_ah_fmaxp_s, helper_vfp_ah_maxs, float32, H4) | ||
98 | +DO_3OP_PAIR(gvec_ah_fmaxp_d, helper_vfp_ah_maxd, float64, ) | ||
99 | + | ||
100 | +DO_3OP_PAIR(gvec_ah_fminp_h, helper_vfp_ah_minh, float16, H2) | ||
101 | +DO_3OP_PAIR(gvec_ah_fminp_s, helper_vfp_ah_mins, float32, H4) | ||
102 | +DO_3OP_PAIR(gvec_ah_fminp_d, helper_vfp_ah_mind, float64, ) | ||
103 | +#endif | ||
104 | + | ||
105 | #undef DO_3OP_PAIR | ||
106 | |||
107 | #define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \ | ||
108 | -- | ||
109 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the SVE FMAXV and FMINV | ||
2 | vector-reduction-to-scalar max/min operations. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 +++++++++++ | ||
8 | target/arm/tcg/sve_helper.c | 43 +++++++++++++++++++++------------- | ||
9 | target/arm/tcg/translate-sve.c | 16 +++++++++++-- | ||
10 | 3 files changed, 55 insertions(+), 18 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fminv_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_4(sve_fminv_d, TCG_CALL_NO_RWG, | ||
18 | i64, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_4(sve_ah_fmaxv_h, TCG_CALL_NO_RWG, | ||
21 | + i64, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_4(sve_ah_fmaxv_s, TCG_CALL_NO_RWG, | ||
23 | + i64, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_4(sve_ah_fmaxv_d, TCG_CALL_NO_RWG, | ||
25 | + i64, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_4(sve_ah_fminv_h, TCG_CALL_NO_RWG, | ||
28 | + i64, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_4(sve_ah_fminv_s, TCG_CALL_NO_RWG, | ||
30 | + i64, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_4(sve_ah_fminv_d, TCG_CALL_NO_RWG, | ||
32 | + i64, ptr, ptr, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_5(sve_fadda_h, TCG_CALL_NO_RWG, | ||
35 | i64, i64, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_5(sve_fadda_s, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/sve_helper.c | ||
40 | +++ b/target/arm/tcg/sve_helper.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \ | ||
42 | uintptr_t half = n / 2; \ | ||
43 | TYPE lo = NAME##_reduce(data, status, half); \ | ||
44 | TYPE hi = NAME##_reduce(data + half, status, half); \ | ||
45 | - return TYPE##_##FUNC(lo, hi, status); \ | ||
46 | + return FUNC(lo, hi, status); \ | ||
47 | } \ | ||
48 | } \ | ||
49 | uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \ | ||
50 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \ | ||
51 | return NAME##_reduce(data, s, maxsz / sizeof(TYPE)); \ | ||
52 | } | ||
53 | |||
54 | -DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero) | ||
55 | -DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero) | ||
56 | -DO_REDUCE(sve_faddv_d, float64, H1_8, add, float64_zero) | ||
57 | +DO_REDUCE(sve_faddv_h, float16, H1_2, float16_add, float16_zero) | ||
58 | +DO_REDUCE(sve_faddv_s, float32, H1_4, float32_add, float32_zero) | ||
59 | +DO_REDUCE(sve_faddv_d, float64, H1_8, float64_add, float64_zero) | ||
60 | |||
61 | /* Identity is floatN_default_nan, without the function call. */ | ||
62 | -DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00) | ||
63 | -DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000) | ||
64 | -DO_REDUCE(sve_fminnmv_d, float64, H1_8, minnum, 0x7FF8000000000000ULL) | ||
65 | +DO_REDUCE(sve_fminnmv_h, float16, H1_2, float16_minnum, 0x7E00) | ||
66 | +DO_REDUCE(sve_fminnmv_s, float32, H1_4, float32_minnum, 0x7FC00000) | ||
67 | +DO_REDUCE(sve_fminnmv_d, float64, H1_8, float64_minnum, 0x7FF8000000000000ULL) | ||
68 | |||
69 | -DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00) | ||
70 | -DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000) | ||
71 | -DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, maxnum, 0x7FF8000000000000ULL) | ||
72 | +DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, float16_maxnum, 0x7E00) | ||
73 | +DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, float32_maxnum, 0x7FC00000) | ||
74 | +DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, float64_maxnum, 0x7FF8000000000000ULL) | ||
75 | |||
76 | -DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity) | ||
77 | -DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity) | ||
78 | -DO_REDUCE(sve_fminv_d, float64, H1_8, min, float64_infinity) | ||
79 | +DO_REDUCE(sve_fminv_h, float16, H1_2, float16_min, float16_infinity) | ||
80 | +DO_REDUCE(sve_fminv_s, float32, H1_4, float32_min, float32_infinity) | ||
81 | +DO_REDUCE(sve_fminv_d, float64, H1_8, float64_min, float64_infinity) | ||
82 | |||
83 | -DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity)) | ||
84 | -DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity)) | ||
85 | -DO_REDUCE(sve_fmaxv_d, float64, H1_8, max, float64_chs(float64_infinity)) | ||
86 | +DO_REDUCE(sve_fmaxv_h, float16, H1_2, float16_max, float16_chs(float16_infinity)) | ||
87 | +DO_REDUCE(sve_fmaxv_s, float32, H1_4, float32_max, float32_chs(float32_infinity)) | ||
88 | +DO_REDUCE(sve_fmaxv_d, float64, H1_8, float64_max, float64_chs(float64_infinity)) | ||
89 | + | ||
90 | +DO_REDUCE(sve_ah_fminv_h, float16, H1_2, helper_vfp_ah_minh, float16_infinity) | ||
91 | +DO_REDUCE(sve_ah_fminv_s, float32, H1_4, helper_vfp_ah_mins, float32_infinity) | ||
92 | +DO_REDUCE(sve_ah_fminv_d, float64, H1_8, helper_vfp_ah_mind, float64_infinity) | ||
93 | + | ||
94 | +DO_REDUCE(sve_ah_fmaxv_h, float16, H1_2, helper_vfp_ah_maxh, | ||
95 | + float16_chs(float16_infinity)) | ||
96 | +DO_REDUCE(sve_ah_fmaxv_s, float32, H1_4, helper_vfp_ah_maxs, | ||
97 | + float32_chs(float32_infinity)) | ||
98 | +DO_REDUCE(sve_ah_fmaxv_d, float64, H1_8, helper_vfp_ah_maxd, | ||
99 | + float64_chs(float64_infinity)) | ||
100 | |||
101 | #undef DO_REDUCE | ||
102 | |||
103 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
104 | index XXXXXXX..XXXXXXX 100644 | ||
105 | --- a/target/arm/tcg/translate-sve.c | ||
106 | +++ b/target/arm/tcg/translate-sve.c | ||
107 | @@ -XXX,XX +XXX,XX @@ static bool do_reduce(DisasContext *s, arg_rpr_esz *a, | ||
108 | }; \ | ||
109 | TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz]) | ||
110 | |||
111 | +#define DO_VPZ_AH(NAME, name) \ | ||
112 | + static gen_helper_fp_reduce * const name##_fns[4] = { \ | ||
113 | + NULL, gen_helper_sve_##name##_h, \ | ||
114 | + gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ | ||
115 | + }; \ | ||
116 | + static gen_helper_fp_reduce * const name##_ah_fns[4] = { \ | ||
117 | + NULL, gen_helper_sve_ah_##name##_h, \ | ||
118 | + gen_helper_sve_ah_##name##_s, gen_helper_sve_ah_##name##_d, \ | ||
119 | + }; \ | ||
120 | + TRANS_FEAT(NAME, aa64_sve, do_reduce, a, \ | ||
121 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) | ||
122 | + | ||
123 | DO_VPZ(FADDV, faddv) | ||
124 | DO_VPZ(FMINNMV, fminnmv) | ||
125 | DO_VPZ(FMAXNMV, fmaxnmv) | ||
126 | -DO_VPZ(FMINV, fminv) | ||
127 | -DO_VPZ(FMAXV, fmaxv) | ||
128 | +DO_VPZ_AH(FMINV, fminv) | ||
129 | +DO_VPZ_AH(FMAXV, fmaxv) | ||
130 | |||
131 | #undef DO_VPZ | ||
132 | |||
133 | -- | ||
134 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the SVE FMAX and FMIN operations | ||
2 | that take an immediate as the second operand. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 25 +++++++++++++++++++++++-- | ||
10 | 3 files changed, 45 insertions(+), 2 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fmins_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_6(sve_fmins_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, i64, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_6(sve_ah_fmaxs_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_6(sve_ah_fmaxs_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_6(sve_ah_fmaxs_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_6(sve_ah_fmins_h, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_6(sve_ah_fmins_s, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_6(sve_ah_fmins_d, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_5(sve_fcvt_sh, TCG_CALL_NO_RWG, | ||
35 | void, ptr, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_5(sve_fcvt_dh, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/sve_helper.c | ||
40 | +++ b/target/arm/tcg/sve_helper.c | ||
41 | @@ -XXX,XX +XXX,XX @@ DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min) | ||
42 | DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min) | ||
43 | DO_ZPZS_FP(sve_fmins_d, float64, H1_8, float64_min) | ||
44 | |||
45 | +DO_ZPZS_FP(sve_ah_fmaxs_h, float16, H1_2, helper_vfp_ah_maxh) | ||
46 | +DO_ZPZS_FP(sve_ah_fmaxs_s, float32, H1_4, helper_vfp_ah_maxs) | ||
47 | +DO_ZPZS_FP(sve_ah_fmaxs_d, float64, H1_8, helper_vfp_ah_maxd) | ||
48 | + | ||
49 | +DO_ZPZS_FP(sve_ah_fmins_h, float16, H1_2, helper_vfp_ah_minh) | ||
50 | +DO_ZPZS_FP(sve_ah_fmins_s, float32, H1_4, helper_vfp_ah_mins) | ||
51 | +DO_ZPZS_FP(sve_ah_fmins_d, float64, H1_8, helper_vfp_ah_mind) | ||
52 | + | ||
53 | /* Fully general two-operand expander, controlled by a predicate, | ||
54 | * With the extra float_status parameter. | ||
55 | */ | ||
56 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/arm/tcg/translate-sve.c | ||
59 | +++ b/target/arm/tcg/translate-sve.c | ||
60 | @@ -XXX,XX +XXX,XX @@ static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, | ||
61 | TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ | ||
62 | name##_const[a->esz][a->imm], name##_fns[a->esz]) | ||
63 | |||
64 | +#define DO_FP_AH_IMM(NAME, name, const0, const1) \ | ||
65 | + static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ | ||
66 | + NULL, gen_helper_sve_##name##_h, \ | ||
67 | + gen_helper_sve_##name##_s, \ | ||
68 | + gen_helper_sve_##name##_d \ | ||
69 | + }; \ | ||
70 | + static gen_helper_sve_fp2scalar * const name##_ah_fns[4] = { \ | ||
71 | + NULL, gen_helper_sve_ah_##name##_h, \ | ||
72 | + gen_helper_sve_ah_##name##_s, \ | ||
73 | + gen_helper_sve_ah_##name##_d \ | ||
74 | + }; \ | ||
75 | + static uint64_t const name##_const[4][2] = { \ | ||
76 | + { -1, -1 }, \ | ||
77 | + { float16_##const0, float16_##const1 }, \ | ||
78 | + { float32_##const0, float32_##const1 }, \ | ||
79 | + { float64_##const0, float64_##const1 }, \ | ||
80 | + }; \ | ||
81 | + TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ | ||
82 | + name##_const[a->esz][a->imm], \ | ||
83 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) | ||
84 | + | ||
85 | DO_FP_IMM(FADD, fadds, half, one) | ||
86 | DO_FP_IMM(FSUB, fsubs, half, one) | ||
87 | DO_FP_IMM(FMUL, fmuls, half, two) | ||
88 | DO_FP_IMM(FSUBR, fsubrs, half, one) | ||
89 | DO_FP_IMM(FMAXNM, fmaxnms, zero, one) | ||
90 | DO_FP_IMM(FMINNM, fminnms, zero, one) | ||
91 | -DO_FP_IMM(FMAX, fmaxs, zero, one) | ||
92 | -DO_FP_IMM(FMIN, fmins, zero, one) | ||
93 | +DO_FP_AH_IMM(FMAX, fmaxs, zero, one) | ||
94 | +DO_FP_AH_IMM(FMIN, fmins, zero, one) | ||
95 | |||
96 | #undef DO_FP_IMM | ||
97 | |||
98 | -- | ||
99 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the SVE FMAX and FMIN | ||
2 | operations that take two vector operands. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 17 +++++++++++++++-- | ||
10 | 3 files changed, 37 insertions(+), 2 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fmax_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_6(sve_fmax_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_6(sve_ah_fmin_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_6(sve_ah_fmin_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_6(sve_ah_fmin_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_6(sve_ah_fmax_h, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_6(sve_ah_fmax_s, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_6(sve_ah_fmax_d, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_6(sve_fminnum_h, TCG_CALL_NO_RWG, | ||
35 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_6(sve_fminnum_s, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/sve_helper.c | ||
40 | +++ b/target/arm/tcg/sve_helper.c | ||
41 | @@ -XXX,XX +XXX,XX @@ DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max) | ||
42 | DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max) | ||
43 | DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max) | ||
44 | |||
45 | +DO_ZPZZ_FP(sve_ah_fmin_h, uint16_t, H1_2, helper_vfp_ah_minh) | ||
46 | +DO_ZPZZ_FP(sve_ah_fmin_s, uint32_t, H1_4, helper_vfp_ah_mins) | ||
47 | +DO_ZPZZ_FP(sve_ah_fmin_d, uint64_t, H1_8, helper_vfp_ah_mind) | ||
48 | + | ||
49 | +DO_ZPZZ_FP(sve_ah_fmax_h, uint16_t, H1_2, helper_vfp_ah_maxh) | ||
50 | +DO_ZPZZ_FP(sve_ah_fmax_s, uint32_t, H1_4, helper_vfp_ah_maxs) | ||
51 | +DO_ZPZZ_FP(sve_ah_fmax_d, uint64_t, H1_8, helper_vfp_ah_maxd) | ||
52 | + | ||
53 | DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum) | ||
54 | DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum) | ||
55 | DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum) | ||
56 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/arm/tcg/translate-sve.c | ||
59 | +++ b/target/arm/tcg/translate-sve.c | ||
60 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, | ||
61 | }; \ | ||
62 | TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) | ||
63 | |||
64 | +#define DO_ZPZZ_AH_FP(NAME, FEAT, name, ah_name) \ | ||
65 | + static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ | ||
66 | + NULL, gen_helper_##name##_h, \ | ||
67 | + gen_helper_##name##_s, gen_helper_##name##_d \ | ||
68 | + }; \ | ||
69 | + static gen_helper_gvec_4_ptr * const name##_ah_zpzz_fns[4] = { \ | ||
70 | + NULL, gen_helper_##ah_name##_h, \ | ||
71 | + gen_helper_##ah_name##_s, gen_helper_##ah_name##_d \ | ||
72 | + }; \ | ||
73 | + TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, \ | ||
74 | + s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \ | ||
75 | + name##_zpzz_fns[a->esz], a) | ||
76 | + | ||
77 | DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) | ||
78 | DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) | ||
79 | DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) | ||
80 | -DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin) | ||
81 | -DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax) | ||
82 | +DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) | ||
83 | +DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) | ||
84 | DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) | ||
85 | DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) | ||
86 | DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) | ||
87 | -- | ||
88 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Rayhan Faizel <rayhan.faizel@gmail.com> | 1 | FPCR.AH == 1 mandates that negation of a NaN value should not flip |
---|---|---|---|
2 | 2 | its sign bit. This means we can no longer use gen_vfp_neg*() | |
3 | This patch implements a 32 half word FIFO as per imx serial device | 3 | everywhere but must instead generate slightly more complex code when |
4 | specifications. If a non empty FIFO is below the trigger level, an | 4 | FPCR.AH is set. |
5 | ageing timer will tick for a duration of 8 characters. On expiry, | 5 | |
6 | AGTIM will be set triggering an interrupt. AGTIM timer resets when | 6 | Make this change for the scalar FNEG and for those places in |
7 | there is activity in the receive FIFO. | 7 | translate-a64.c which were previously directly calling |
8 | 8 | gen_vfp_neg*(). | |
9 | Otherwise, RRDY is set when trigger level is exceeded. The receive | 9 | |
10 | trigger level is 8 in newer kernel versions and 1 in older ones. | 10 | This change in semantics also affects any other instruction whose |
11 | 11 | pseudocode calls FPNeg(); in following commits we extend this | |
12 | This change will break migration compatibility for the imx boards. | 12 | change to the other affected instructions. |
13 | 13 | ||
14 | Signed-off-by: Rayhan Faizel <rayhan.faizel@gmail.com> | ||
15 | Message-id: 20240125151931.83494-1-rayhan.faizel@gmail.com | ||
16 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
17 | [PMM: commit message tidyups] | ||
18 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 14 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
15 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
19 | --- | 16 | --- |
20 | include/hw/char/imx_serial.h | 20 ++++++- | 17 | target/arm/tcg/translate-a64.c | 125 ++++++++++++++++++++++++++++++--- |
21 | hw/char/imx_serial.c | 102 ++++++++++++++++++++++++++++++----- | 18 | 1 file changed, 114 insertions(+), 11 deletions(-) |
22 | 2 files changed, 108 insertions(+), 14 deletions(-) | 19 | |
23 | 20 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | |
24 | diff --git a/include/hw/char/imx_serial.h b/include/hw/char/imx_serial.h | ||
25 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/include/hw/char/imx_serial.h | 22 | --- a/target/arm/tcg/translate-a64.c |
27 | +++ b/include/hw/char/imx_serial.h | 23 | +++ b/target/arm/tcg/translate-a64.c |
28 | @@ -XXX,XX +XXX,XX @@ | 24 | @@ -XXX,XX +XXX,XX @@ static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, |
29 | #include "hw/sysbus.h" | 25 | is_q ? 16 : 8, vec_full_reg_size(s), data, fn); |
30 | #include "chardev/char-fe.h" | ||
31 | #include "qom/object.h" | ||
32 | +#include "qemu/fifo32.h" | ||
33 | |||
34 | #define TYPE_IMX_SERIAL "imx.serial" | ||
35 | OBJECT_DECLARE_SIMPLE_TYPE(IMXSerialState, IMX_SERIAL) | ||
36 | |||
37 | +#define FIFO_SIZE 32 | ||
38 | + | ||
39 | #define URXD_CHARRDY (1<<15) /* character read is valid */ | ||
40 | #define URXD_ERR (1<<14) /* Character has error */ | ||
41 | +#define URXD_OVRRUN (1<<13) /* 32nd character in RX FIFO */ | ||
42 | #define URXD_FRMERR (1<<12) /* Character has frame error */ | ||
43 | #define URXD_BRK (1<<11) /* Break received */ | ||
44 | |||
45 | @@ -XXX,XX +XXX,XX @@ OBJECT_DECLARE_SIMPLE_TYPE(IMXSerialState, IMX_SERIAL) | ||
46 | #define UCR1_TXMPTYEN (1<<6) /* Tx Empty Interrupt Enable */ | ||
47 | #define UCR1_UARTEN (1<<0) /* UART Enable */ | ||
48 | |||
49 | +#define UCR2_ATEN (1<<3) /* Ageing Timer Enable */ | ||
50 | #define UCR2_TXEN (1<<2) /* Transmitter enable */ | ||
51 | #define UCR2_RXEN (1<<1) /* Receiver enable */ | ||
52 | #define UCR2_SRST (1<<0) /* Reset complete */ | ||
53 | |||
54 | #define UCR4_DREN BIT(0) /* Receive Data Ready interrupt enable */ | ||
55 | +#define UCR4_OREN BIT(1) /* Overrun interrupt enable */ | ||
56 | #define UCR4_TCEN BIT(3) /* TX complete interrupt enable */ | ||
57 | #define UCR4_WKEN BIT(7) /* WAKE interrupt enable */ | ||
58 | |||
59 | @@ -XXX,XX +XXX,XX @@ OBJECT_DECLARE_SIMPLE_TYPE(IMXSerialState, IMX_SERIAL) | ||
60 | #define UTS1_TXFULL (1<<4) | ||
61 | #define UTS1_RXFULL (1<<3) | ||
62 | |||
63 | +#define TL_MASK 0x3f | ||
64 | + | ||
65 | + /* Bit time in nanoseconds assuming maximum baud rate of 115200 */ | ||
66 | +#define BIT_TIME_NS 8681 | ||
67 | + | ||
68 | +/* Assume 8 bits per character */ | ||
69 | +#define NUM_BITS 8 | ||
70 | + | ||
71 | +/* Ageing timer triggers after 8 characters */ | ||
72 | +#define AGE_DURATION_NS (8 * NUM_BITS * BIT_TIME_NS) | ||
73 | + | ||
74 | struct IMXSerialState { | ||
75 | /*< private >*/ | ||
76 | SysBusDevice parent_obj; | ||
77 | |||
78 | /*< public >*/ | ||
79 | MemoryRegion iomem; | ||
80 | - int32_t readbuff; | ||
81 | + QEMUTimer ageing_timer; | ||
82 | + Fifo32 rx_fifo; | ||
83 | |||
84 | uint32_t usr1; | ||
85 | uint32_t usr2; | ||
86 | diff --git a/hw/char/imx_serial.c b/hw/char/imx_serial.c | ||
87 | index XXXXXXX..XXXXXXX 100644 | ||
88 | --- a/hw/char/imx_serial.c | ||
89 | +++ b/hw/char/imx_serial.c | ||
90 | @@ -XXX,XX +XXX,XX @@ | ||
91 | #include "migration/vmstate.h" | ||
92 | #include "qemu/log.h" | ||
93 | #include "qemu/module.h" | ||
94 | +#include "qemu/fifo32.h" | ||
95 | |||
96 | #ifndef DEBUG_IMX_UART | ||
97 | #define DEBUG_IMX_UART 0 | ||
98 | @@ -XXX,XX +XXX,XX @@ | ||
99 | |||
100 | static const VMStateDescription vmstate_imx_serial = { | ||
101 | .name = TYPE_IMX_SERIAL, | ||
102 | - .version_id = 2, | ||
103 | - .minimum_version_id = 2, | ||
104 | + .version_id = 3, | ||
105 | + .minimum_version_id = 3, | ||
106 | .fields = (const VMStateField[]) { | ||
107 | - VMSTATE_INT32(readbuff, IMXSerialState), | ||
108 | + VMSTATE_FIFO32(rx_fifo, IMXSerialState), | ||
109 | + VMSTATE_TIMER(ageing_timer, IMXSerialState), | ||
110 | VMSTATE_UINT32(usr1, IMXSerialState), | ||
111 | VMSTATE_UINT32(usr2, IMXSerialState), | ||
112 | VMSTATE_UINT32(ucr1, IMXSerialState), | ||
113 | @@ -XXX,XX +XXX,XX @@ static void imx_update(IMXSerialState *s) | ||
114 | * following: | ||
115 | */ | ||
116 | usr1 = s->usr1 & s->ucr1 & (USR1_TRDY | USR1_RRDY); | ||
117 | + /* | ||
118 | + * Interrupt if AGTIM is set (ageing timer interrupt in RxFIFO) | ||
119 | + */ | ||
120 | + usr1 |= (s->ucr2 & UCR2_ATEN) ? (s->usr1 & USR1_AGTIM) : 0; | ||
121 | /* | ||
122 | * Bits that we want in USR2 are not as conveniently laid out, | ||
123 | * unfortunately. | ||
124 | @@ -XXX,XX +XXX,XX @@ static void imx_update(IMXSerialState *s) | ||
125 | mask = (s->ucr1 & UCR1_TXMPTYEN) ? USR2_TXFE : 0; | ||
126 | /* | ||
127 | * TCEN and TXDC are both bit 3 | ||
128 | + * ORE and OREN are both bit 1 | ||
129 | * RDR and DREN are both bit 0 | ||
130 | */ | ||
131 | - mask |= s->ucr4 & (UCR4_WKEN | UCR4_TCEN | UCR4_DREN); | ||
132 | + mask |= s->ucr4 & (UCR4_WKEN | UCR4_TCEN | UCR4_DREN | UCR4_OREN); | ||
133 | |||
134 | usr2 = s->usr2 & mask; | ||
135 | |||
136 | qemu_set_irq(s->irq, usr1 || usr2); | ||
137 | } | 26 | } |
138 | 27 | ||
139 | +static void imx_serial_rx_fifo_push(IMXSerialState *s, uint32_t value) | 28 | +/* |
140 | +{ | 29 | + * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN. |
141 | + uint32_t pushed_value = value; | 30 | + * These functions implement |
142 | + if (fifo32_is_full(&s->rx_fifo)) { | 31 | + * d = floatN_is_any_nan(s) ? s : floatN_chs(s) |
143 | + /* Set ORE if FIFO is already full */ | 32 | + * which for float32 is |
144 | + s->usr2 |= USR2_ORE; | 33 | + * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31)) |
34 | + * and similarly for the other float sizes. | ||
35 | + */ | ||
36 | +static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s) | ||
37 | +{ | ||
38 | + TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); | ||
39 | + | ||
40 | + gen_vfp_negh(chs_s, s); | ||
41 | + gen_vfp_absh(abs_s, s); | ||
42 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, | ||
43 | + abs_s, tcg_constant_i32(0x7c00), | ||
44 | + s, chs_s); | ||
45 | +} | ||
46 | + | ||
47 | +static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s) | ||
48 | +{ | ||
49 | + TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); | ||
50 | + | ||
51 | + gen_vfp_negs(chs_s, s); | ||
52 | + gen_vfp_abss(abs_s, s); | ||
53 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, | ||
54 | + abs_s, tcg_constant_i32(0x7f800000UL), | ||
55 | + s, chs_s); | ||
56 | +} | ||
57 | + | ||
58 | +static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s) | ||
59 | +{ | ||
60 | + TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64(); | ||
61 | + | ||
62 | + gen_vfp_negd(chs_s, s); | ||
63 | + gen_vfp_absd(abs_s, s); | ||
64 | + tcg_gen_movcond_i64(TCG_COND_GTU, d, | ||
65 | + abs_s, tcg_constant_i64(0x7ff0000000000000ULL), | ||
66 | + s, chs_s); | ||
67 | +} | ||
68 | + | ||
69 | +static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) | ||
70 | +{ | ||
71 | + if (dc->fpcr_ah) { | ||
72 | + gen_vfp_ah_negh(d, s); | ||
145 | + } else { | 73 | + } else { |
146 | + if (fifo32_num_used(&s->rx_fifo) == FIFO_SIZE - 1) { | 74 | + gen_vfp_negh(d, s); |
147 | + /* Set OVRRUN on 32nd character in FIFO */ | ||
148 | + pushed_value |= URXD_ERR | URXD_OVRRUN; | ||
149 | + } | ||
150 | + fifo32_push(&s->rx_fifo, pushed_value); | ||
151 | + } | 75 | + } |
152 | +} | 76 | +} |
153 | + | 77 | + |
154 | +static uint32_t imx_serial_rx_fifo_pop(IMXSerialState *s) | 78 | +static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) |
155 | +{ | 79 | +{ |
156 | + if (fifo32_is_empty(&s->rx_fifo)) { | 80 | + if (dc->fpcr_ah) { |
157 | + return 0; | 81 | + gen_vfp_ah_negs(d, s); |
82 | + } else { | ||
83 | + gen_vfp_negs(d, s); | ||
158 | + } | 84 | + } |
159 | + return fifo32_pop(&s->rx_fifo); | 85 | +} |
160 | +} | 86 | + |
161 | + | 87 | +static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s) |
162 | +static void imx_serial_rx_fifo_ageing_timer_int(void *opaque) | 88 | +{ |
163 | +{ | 89 | + if (dc->fpcr_ah) { |
164 | + IMXSerialState *s = (IMXSerialState *) opaque; | 90 | + gen_vfp_ah_negd(d, s); |
165 | + s->usr1 |= USR1_AGTIM; | ||
166 | + imx_update(s); | ||
167 | +} | ||
168 | + | ||
169 | +static void imx_serial_rx_fifo_ageing_timer_restart(void *opaque) | ||
170 | +{ | ||
171 | + /* | ||
172 | + * Ageing timer starts ticking when | ||
173 | + * RX FIFO is non empty and below trigger level. | ||
174 | + * Timer is reset if new character is received or | ||
175 | + * a FIFO read occurs. | ||
176 | + * Timer triggers an interrupt when duration of | ||
177 | + * 8 characters has passed (assuming 115200 baudrate). | ||
178 | + */ | ||
179 | + IMXSerialState *s = (IMXSerialState *) opaque; | ||
180 | + | ||
181 | + if (!(s->usr1 & USR1_RRDY) && !(s->uts1 & UTS1_RXEMPTY)) { | ||
182 | + timer_mod_ns(&s->ageing_timer, | ||
183 | + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + AGE_DURATION_NS); | ||
184 | + } else { | 91 | + } else { |
185 | + timer_del(&s->ageing_timer); | 92 | + gen_vfp_negd(d, s); |
186 | + } | 93 | + } |
187 | +} | 94 | +} |
188 | + | 95 | + |
189 | static void imx_serial_reset(IMXSerialState *s) | 96 | /* Set ZF and NF based on a 64 bit result. This is alas fiddlier |
190 | { | 97 | * than the 32 bit equivalent. |
191 | 98 | */ | |
192 | @@ -XXX,XX +XXX,XX @@ static void imx_serial_reset(IMXSerialState *s) | 99 | @@ -XXX,XX +XXX,XX @@ static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) |
193 | s->ucr3 = 0x700; | 100 | gen_vfp_negd(d, d); |
194 | s->ubmr = 0; | ||
195 | s->ubrc = 4; | ||
196 | - s->readbuff = URXD_ERR; | ||
197 | + | ||
198 | + fifo32_reset(&s->rx_fifo); | ||
199 | + timer_del(&s->ageing_timer); | ||
200 | } | 101 | } |
201 | 102 | ||
202 | static void imx_serial_reset_at_boot(DeviceState *dev) | 103 | +static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) |
203 | @@ -XXX,XX +XXX,XX @@ static uint64_t imx_serial_read(void *opaque, hwaddr offset, | 104 | +{ |
204 | unsigned size) | 105 | + gen_helper_vfp_mulh(d, n, m, s); |
205 | { | 106 | + gen_vfp_ah_negh(d, d); |
206 | IMXSerialState *s = (IMXSerialState *)opaque; | 107 | +} |
207 | - uint32_t c; | 108 | + |
208 | + uint32_t c, rx_used; | 109 | +static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) |
209 | + uint8_t rxtl = s->ufcr & TL_MASK; | 110 | +{ |
210 | 111 | + gen_helper_vfp_muls(d, n, m, s); | |
211 | DPRINTF("read(offset=0x%" HWADDR_PRIx ")\n", offset); | 112 | + gen_vfp_ah_negs(d, d); |
212 | 113 | +} | |
213 | switch (offset >> 2) { | 114 | + |
214 | case 0x0: /* URXD */ | 115 | +static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) |
215 | - c = s->readbuff; | 116 | +{ |
216 | + c = imx_serial_rx_fifo_pop(s); | 117 | + gen_helper_vfp_muld(d, n, m, s); |
217 | if (!(s->uts1 & UTS1_RXEMPTY)) { | 118 | + gen_vfp_ah_negd(d, d); |
218 | /* Character is valid */ | 119 | +} |
219 | c |= URXD_CHARRDY; | 120 | + |
220 | - s->usr1 &= ~USR1_RRDY; | 121 | static const FPScalar f_scalar_fnmul = { |
221 | - s->usr2 &= ~USR2_RDR; | 122 | gen_fnmul_h, |
222 | - s->uts1 |= UTS1_RXEMPTY; | 123 | gen_fnmul_s, |
223 | + rx_used = fifo32_num_used(&s->rx_fifo); | 124 | gen_fnmul_d, |
224 | + /* Clear RRDY if below threshold */ | 125 | }; |
225 | + if (rx_used < rxtl) { | 126 | -TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul, a->rn) |
226 | + s->usr1 &= ~USR1_RRDY; | 127 | +static const FPScalar f_scalar_ah_fnmul = { |
227 | + } | 128 | + gen_fnmul_ah_h, |
228 | + if (rx_used == 0) { | 129 | + gen_fnmul_ah_s, |
229 | + s->usr2 &= ~USR2_RDR; | 130 | + gen_fnmul_ah_d, |
230 | + s->uts1 |= UTS1_RXEMPTY; | 131 | +}; |
231 | + } | 132 | +TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn) |
232 | imx_update(s); | 133 | |
233 | + imx_serial_rx_fifo_ageing_timer_restart(s); | 134 | static const FPScalar f_scalar_fcmeq = { |
234 | qemu_chr_fe_accept_input(&s->chr); | 135 | gen_helper_advsimd_ceq_f16, |
235 | } | 136 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) |
236 | return c; | 137 | |
237 | @@ -XXX,XX +XXX,XX @@ static void imx_serial_write(void *opaque, hwaddr offset, | 138 | read_vec_element(s, t2, a->rm, a->idx, MO_64); |
238 | static int imx_can_receive(void *opaque) | 139 | if (neg) { |
239 | { | 140 | - gen_vfp_negd(t1, t1); |
240 | IMXSerialState *s = (IMXSerialState *)opaque; | 141 | + gen_vfp_maybe_ah_negd(s, t1, t1); |
241 | - return !(s->usr1 & USR1_RRDY); | 142 | } |
242 | + return s->ucr2 & UCR2_RXEN && fifo32_num_used(&s->rx_fifo) < FIFO_SIZE; | 143 | gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); |
144 | write_fp_dreg_merging(s, a->rd, a->rd, t0); | ||
145 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
146 | |||
147 | read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); | ||
148 | if (neg) { | ||
149 | - gen_vfp_negs(t1, t1); | ||
150 | + gen_vfp_maybe_ah_negs(s, t1, t1); | ||
151 | } | ||
152 | gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | ||
153 | write_fp_sreg_merging(s, a->rd, a->rd, t0); | ||
154 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
155 | |||
156 | read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); | ||
157 | if (neg) { | ||
158 | - gen_vfp_negh(t1, t1); | ||
159 | + gen_vfp_maybe_ah_negh(s, t1, t1); | ||
160 | } | ||
161 | gen_helper_advsimd_muladdh(t0, t1, t2, t0, | ||
162 | fpstatus_ptr(FPST_A64_F16)); | ||
163 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
164 | TCGv_i64 ta = read_fp_dreg(s, a->ra); | ||
165 | |||
166 | if (neg_a) { | ||
167 | - gen_vfp_negd(ta, ta); | ||
168 | + gen_vfp_maybe_ah_negd(s, ta, ta); | ||
169 | } | ||
170 | if (neg_n) { | ||
171 | - gen_vfp_negd(tn, tn); | ||
172 | + gen_vfp_maybe_ah_negd(s, tn, tn); | ||
173 | } | ||
174 | fpst = fpstatus_ptr(FPST_A64); | ||
175 | gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); | ||
176 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
177 | TCGv_i32 ta = read_fp_sreg(s, a->ra); | ||
178 | |||
179 | if (neg_a) { | ||
180 | - gen_vfp_negs(ta, ta); | ||
181 | + gen_vfp_maybe_ah_negs(s, ta, ta); | ||
182 | } | ||
183 | if (neg_n) { | ||
184 | - gen_vfp_negs(tn, tn); | ||
185 | + gen_vfp_maybe_ah_negs(s, tn, tn); | ||
186 | } | ||
187 | fpst = fpstatus_ptr(FPST_A64); | ||
188 | gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); | ||
189 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
190 | TCGv_i32 ta = read_fp_hreg(s, a->ra); | ||
191 | |||
192 | if (neg_a) { | ||
193 | - gen_vfp_negh(ta, ta); | ||
194 | + gen_vfp_maybe_ah_negh(s, ta, ta); | ||
195 | } | ||
196 | if (neg_n) { | ||
197 | - gen_vfp_negh(tn, tn); | ||
198 | + gen_vfp_maybe_ah_negh(s, tn, tn); | ||
199 | } | ||
200 | fpst = fpstatus_ptr(FPST_A64_F16); | ||
201 | gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); | ||
202 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, | ||
203 | return true; | ||
243 | } | 204 | } |
244 | 205 | ||
245 | static void imx_put_data(void *opaque, uint32_t value) | 206 | +static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a, |
246 | { | 207 | + const FPScalar1Int *fnormal, |
247 | IMXSerialState *s = (IMXSerialState *)opaque; | 208 | + const FPScalar1Int *fah) |
248 | + uint8_t rxtl = s->ufcr & TL_MASK; | 209 | +{ |
249 | 210 | + return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true); | |
250 | DPRINTF("received char\n"); | 211 | +} |
251 | + imx_serial_rx_fifo_push(s, value); | 212 | + |
252 | + if (fifo32_num_used(&s->rx_fifo) >= rxtl) { | 213 | static const FPScalar1Int f_scalar_fmov = { |
253 | + s->usr1 |= USR1_RRDY; | 214 | tcg_gen_mov_i32, |
254 | + } | 215 | tcg_gen_mov_i32, |
255 | + | 216 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fneg = { |
256 | + imx_serial_rx_fifo_ageing_timer_restart(s); | 217 | gen_vfp_negs, |
257 | 218 | gen_vfp_negd, | |
258 | - s->usr1 |= USR1_RRDY; | 219 | }; |
259 | s->usr2 |= USR2_RDR; | 220 | -TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg, true) |
260 | s->uts1 &= ~UTS1_RXEMPTY; | 221 | +static const FPScalar1Int f_scalar_ah_fneg = { |
261 | - s->readbuff = value; | 222 | + gen_vfp_ah_negh, |
262 | if (value & URXD_BRK) { | 223 | + gen_vfp_ah_negs, |
263 | s->usr2 |= USR2_BRCD; | 224 | + gen_vfp_ah_negd, |
264 | } | 225 | +}; |
265 | @@ -XXX,XX +XXX,XX @@ static void imx_serial_realize(DeviceState *dev, Error **errp) | 226 | +TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg) |
266 | { | 227 | |
267 | IMXSerialState *s = IMX_SERIAL(dev); | 228 | typedef struct FPScalar1 { |
268 | 229 | void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); | |
269 | + fifo32_create(&s->rx_fifo, FIFO_SIZE); | ||
270 | + timer_init_ns(&s->ageing_timer, QEMU_CLOCK_VIRTUAL, | ||
271 | + imx_serial_rx_fifo_ageing_timer_int, s); | ||
272 | + | ||
273 | DPRINTF("char dev for uart: %p\n", qemu_chr_fe_get_driver(&s->chr)); | ||
274 | |||
275 | qemu_chr_fe_set_handlers(&s->chr, imx_can_receive, imx_receive, | ||
276 | -- | 230 | -- |
277 | 2.34.1 | 231 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | FPCR.AH == 1 mandates that taking the absolute value of a NaN should | ||
2 | not change its sign bit. This means we can no longer use | ||
3 | gen_vfp_abs*() everywhere but must instead generate slightly more | ||
4 | complex code when FPCR.AH is set. | ||
1 | 5 | ||
6 | Implement these semantics for scalar FABS and FABD. This change also | ||
7 | affects all other instructions whose psuedocode calls FPAbs(); we | ||
8 | will extend the change to those instructions in following commits. | ||
9 | |||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | ||
13 | target/arm/tcg/translate-a64.c | 69 +++++++++++++++++++++++++++++++++- | ||
14 | 1 file changed, 67 insertions(+), 2 deletions(-) | ||
15 | |||
16 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/arm/tcg/translate-a64.c | ||
19 | +++ b/target/arm/tcg/translate-a64.c | ||
20 | @@ -XXX,XX +XXX,XX @@ static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s) | ||
21 | s, chs_s); | ||
22 | } | ||
23 | |||
24 | +/* | ||
25 | + * These functions implement | ||
26 | + * d = floatN_is_any_nan(s) ? s : floatN_abs(s) | ||
27 | + * which for float32 is | ||
28 | + * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31)) | ||
29 | + * and similarly for the other float sizes. | ||
30 | + */ | ||
31 | +static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s) | ||
32 | +{ | ||
33 | + TCGv_i32 abs_s = tcg_temp_new_i32(); | ||
34 | + | ||
35 | + gen_vfp_absh(abs_s, s); | ||
36 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, | ||
37 | + abs_s, tcg_constant_i32(0x7c00), | ||
38 | + s, abs_s); | ||
39 | +} | ||
40 | + | ||
41 | +static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s) | ||
42 | +{ | ||
43 | + TCGv_i32 abs_s = tcg_temp_new_i32(); | ||
44 | + | ||
45 | + gen_vfp_abss(abs_s, s); | ||
46 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, | ||
47 | + abs_s, tcg_constant_i32(0x7f800000UL), | ||
48 | + s, abs_s); | ||
49 | +} | ||
50 | + | ||
51 | +static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s) | ||
52 | +{ | ||
53 | + TCGv_i64 abs_s = tcg_temp_new_i64(); | ||
54 | + | ||
55 | + gen_vfp_absd(abs_s, s); | ||
56 | + tcg_gen_movcond_i64(TCG_COND_GTU, d, | ||
57 | + abs_s, tcg_constant_i64(0x7ff0000000000000ULL), | ||
58 | + s, abs_s); | ||
59 | +} | ||
60 | + | ||
61 | static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) | ||
62 | { | ||
63 | if (dc->fpcr_ah) { | ||
64 | @@ -XXX,XX +XXX,XX @@ static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) | ||
65 | gen_vfp_absd(d, d); | ||
66 | } | ||
67 | |||
68 | +static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
69 | +{ | ||
70 | + gen_helper_vfp_subh(d, n, m, s); | ||
71 | + gen_vfp_ah_absh(d, d); | ||
72 | +} | ||
73 | + | ||
74 | +static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
75 | +{ | ||
76 | + gen_helper_vfp_subs(d, n, m, s); | ||
77 | + gen_vfp_ah_abss(d, d); | ||
78 | +} | ||
79 | + | ||
80 | +static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) | ||
81 | +{ | ||
82 | + gen_helper_vfp_subd(d, n, m, s); | ||
83 | + gen_vfp_ah_absd(d, d); | ||
84 | +} | ||
85 | + | ||
86 | static const FPScalar f_scalar_fabd = { | ||
87 | gen_fabd_h, | ||
88 | gen_fabd_s, | ||
89 | gen_fabd_d, | ||
90 | }; | ||
91 | -TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd, a->rn) | ||
92 | +static const FPScalar f_scalar_ah_fabd = { | ||
93 | + gen_fabd_ah_h, | ||
94 | + gen_fabd_ah_s, | ||
95 | + gen_fabd_ah_d, | ||
96 | +}; | ||
97 | +TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn) | ||
98 | |||
99 | static const FPScalar f_scalar_frecps = { | ||
100 | gen_helper_recpsf_f16, | ||
101 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fabs = { | ||
102 | gen_vfp_abss, | ||
103 | gen_vfp_absd, | ||
104 | }; | ||
105 | -TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs, true) | ||
106 | +static const FPScalar1Int f_scalar_ah_fabs = { | ||
107 | + gen_vfp_ah_absh, | ||
108 | + gen_vfp_ah_abss, | ||
109 | + gen_vfp_ah_absd, | ||
110 | +}; | ||
111 | +TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs) | ||
112 | |||
113 | static const FPScalar1Int f_scalar_fneg = { | ||
114 | gen_vfp_negh, | ||
115 | -- | ||
116 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Split the handling of vector FABD so that it calls a different set | ||
2 | of helpers when FPCR.AH is 1, which implement the "no negation of | ||
3 | the sign of a NaN" semantics. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/helper.h | 4 ++++ | ||
9 | target/arm/tcg/translate-a64.c | 7 ++++++- | ||
10 | target/arm/tcg/vec_helper.c | 23 +++++++++++++++++++++++ | ||
11 | 3 files changed, 33 insertions(+), 1 deletion(-) | ||
12 | |||
13 | diff --git a/target/arm/helper.h b/target/arm/helper.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/arm/helper.h | ||
16 | +++ b/target/arm/helper.h | ||
17 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
18 | DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
19 | DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
20 | |||
21 | +DEF_HELPER_FLAGS_5(gvec_ah_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_5(gvec_ah_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
23 | +DEF_HELPER_FLAGS_5(gvec_ah_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
24 | + | ||
25 | DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
26 | DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
27 | DEF_HELPER_FLAGS_5(gvec_fceq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
28 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/arm/tcg/translate-a64.c | ||
31 | +++ b/target/arm/tcg/translate-a64.c | ||
32 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { | ||
33 | gen_helper_gvec_fabd_s, | ||
34 | gen_helper_gvec_fabd_d, | ||
35 | }; | ||
36 | -TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd) | ||
37 | +static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = { | ||
38 | + gen_helper_gvec_ah_fabd_h, | ||
39 | + gen_helper_gvec_ah_fabd_s, | ||
40 | + gen_helper_gvec_ah_fabd_d, | ||
41 | +}; | ||
42 | +TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd) | ||
43 | |||
44 | static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { | ||
45 | gen_helper_gvec_recps_h, | ||
46 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/arm/tcg/vec_helper.c | ||
49 | +++ b/target/arm/tcg/vec_helper.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static float64 float64_abd(float64 op1, float64 op2, float_status *stat) | ||
51 | return float64_abs(float64_sub(op1, op2, stat)); | ||
52 | } | ||
53 | |||
54 | +/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */ | ||
55 | +static float16 float16_ah_abd(float16 op1, float16 op2, float_status *stat) | ||
56 | +{ | ||
57 | + float16 r = float16_sub(op1, op2, stat); | ||
58 | + return float16_is_any_nan(r) ? r : float16_abs(r); | ||
59 | +} | ||
60 | + | ||
61 | +static float32 float32_ah_abd(float32 op1, float32 op2, float_status *stat) | ||
62 | +{ | ||
63 | + float32 r = float32_sub(op1, op2, stat); | ||
64 | + return float32_is_any_nan(r) ? r : float32_abs(r); | ||
65 | +} | ||
66 | + | ||
67 | +static float64 float64_ah_abd(float64 op1, float64 op2, float_status *stat) | ||
68 | +{ | ||
69 | + float64 r = float64_sub(op1, op2, stat); | ||
70 | + return float64_is_any_nan(r) ? r : float64_abs(r); | ||
71 | +} | ||
72 | + | ||
73 | /* | ||
74 | * Reciprocal step. These are the AArch32 version which uses a | ||
75 | * non-fused multiply-and-subtract. | ||
76 | @@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_fabd_h, float16_abd, float16) | ||
77 | DO_3OP(gvec_fabd_s, float32_abd, float32) | ||
78 | DO_3OP(gvec_fabd_d, float64_abd, float64) | ||
79 | |||
80 | +DO_3OP(gvec_ah_fabd_h, float16_ah_abd, float16) | ||
81 | +DO_3OP(gvec_ah_fabd_s, float32_ah_abd, float32) | ||
82 | +DO_3OP(gvec_ah_fabd_d, float64_ah_abd, float64) | ||
83 | + | ||
84 | DO_3OP(gvec_fceq_h, float16_ceq, float16) | ||
85 | DO_3OP(gvec_fceq_s, float32_ceq, float32) | ||
86 | DO_3OP(gvec_fceq_d, float64_ceq, float64) | ||
87 | -- | ||
88 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Make SVE FNEG honour the FPCR.AH "don't negate the sign of a NaN" | ||
2 | semantics. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 4 ++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 7 ++++++- | ||
10 | 3 files changed, 18 insertions(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
17 | DEF_HELPER_FLAGS_4(sve_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
18 | DEF_HELPER_FLAGS_4(sve_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_4(sve_ah_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
21 | +DEF_HELPER_FLAGS_4(sve_ah_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
22 | +DEF_HELPER_FLAGS_4(sve_ah_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
23 | + | ||
24 | DEF_HELPER_FLAGS_4(sve_not_zpz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
25 | DEF_HELPER_FLAGS_4(sve_not_zpz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
26 | DEF_HELPER_FLAGS_4(sve_not_zpz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
27 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/arm/tcg/sve_helper.c | ||
30 | +++ b/target/arm/tcg/sve_helper.c | ||
31 | @@ -XXX,XX +XXX,XX @@ DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG) | ||
32 | DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG) | ||
33 | DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG) | ||
34 | |||
35 | +#define DO_AH_FNEG_H(N) (float16_is_any_nan(N) ? (N) : DO_FNEG(N)) | ||
36 | +#define DO_AH_FNEG_S(N) (float32_is_any_nan(N) ? (N) : DO_FNEG(N)) | ||
37 | +#define DO_AH_FNEG_D(N) (float64_is_any_nan(N) ? (N) : DO_FNEG(N)) | ||
38 | + | ||
39 | +DO_ZPZ(sve_ah_fneg_h, uint16_t, H1_2, DO_AH_FNEG_H) | ||
40 | +DO_ZPZ(sve_ah_fneg_s, uint32_t, H1_4, DO_AH_FNEG_S) | ||
41 | +DO_ZPZ_D(sve_ah_fneg_d, uint64_t, DO_AH_FNEG_D) | ||
42 | + | ||
43 | #define DO_NOT(N) (~N) | ||
44 | |||
45 | DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT) | ||
46 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/arm/tcg/translate-sve.c | ||
49 | +++ b/target/arm/tcg/translate-sve.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const fneg_fns[4] = { | ||
51 | NULL, gen_helper_sve_fneg_h, | ||
52 | gen_helper_sve_fneg_s, gen_helper_sve_fneg_d, | ||
53 | }; | ||
54 | -TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0) | ||
55 | +static gen_helper_gvec_3 * const fneg_ah_fns[4] = { | ||
56 | + NULL, gen_helper_sve_ah_fneg_h, | ||
57 | + gen_helper_sve_ah_fneg_s, gen_helper_sve_ah_fneg_d, | ||
58 | +}; | ||
59 | +TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, | ||
60 | + s->fpcr_ah ? fneg_ah_fns[a->esz] : fneg_fns[a->esz], a, 0) | ||
61 | |||
62 | static gen_helper_gvec_3 * const sxtb_fns[4] = { | ||
63 | NULL, gen_helper_sve_sxtb_h, | ||
64 | -- | ||
65 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Make SVE FABS honour the FPCR.AH "don't negate the sign of a NaN" | ||
2 | semantics. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 4 ++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 7 ++++++- | ||
10 | 3 files changed, 18 insertions(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
17 | DEF_HELPER_FLAGS_4(sve_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
18 | DEF_HELPER_FLAGS_4(sve_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_4(sve_ah_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
21 | +DEF_HELPER_FLAGS_4(sve_ah_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
22 | +DEF_HELPER_FLAGS_4(sve_ah_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
23 | + | ||
24 | DEF_HELPER_FLAGS_4(sve_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
25 | DEF_HELPER_FLAGS_4(sve_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
26 | DEF_HELPER_FLAGS_4(sve_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
27 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/arm/tcg/sve_helper.c | ||
30 | +++ b/target/arm/tcg/sve_helper.c | ||
31 | @@ -XXX,XX +XXX,XX @@ DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS) | ||
32 | DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS) | ||
33 | DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS) | ||
34 | |||
35 | +#define DO_AH_FABS_H(N) (float16_is_any_nan(N) ? (N) : DO_FABS(N)) | ||
36 | +#define DO_AH_FABS_S(N) (float32_is_any_nan(N) ? (N) : DO_FABS(N)) | ||
37 | +#define DO_AH_FABS_D(N) (float64_is_any_nan(N) ? (N) : DO_FABS(N)) | ||
38 | + | ||
39 | +DO_ZPZ(sve_ah_fabs_h, uint16_t, H1_2, DO_AH_FABS_H) | ||
40 | +DO_ZPZ(sve_ah_fabs_s, uint32_t, H1_4, DO_AH_FABS_S) | ||
41 | +DO_ZPZ_D(sve_ah_fabs_d, uint64_t, DO_AH_FABS_D) | ||
42 | + | ||
43 | #define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1)) | ||
44 | |||
45 | DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG) | ||
46 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/arm/tcg/translate-sve.c | ||
49 | +++ b/target/arm/tcg/translate-sve.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const fabs_fns[4] = { | ||
51 | NULL, gen_helper_sve_fabs_h, | ||
52 | gen_helper_sve_fabs_s, gen_helper_sve_fabs_d, | ||
53 | }; | ||
54 | -TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0) | ||
55 | +static gen_helper_gvec_3 * const fabs_ah_fns[4] = { | ||
56 | + NULL, gen_helper_sve_ah_fabs_h, | ||
57 | + gen_helper_sve_ah_fabs_s, gen_helper_sve_ah_fabs_d, | ||
58 | +}; | ||
59 | +TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, | ||
60 | + s->fpcr_ah ? fabs_ah_fns[a->esz] : fabs_fns[a->esz], a, 0) | ||
61 | |||
62 | static gen_helper_gvec_3 * const fneg_fns[4] = { | ||
63 | NULL, gen_helper_sve_fneg_h, | ||
64 | -- | ||
65 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Make the SVE FABD insn honour the FPCR.AH "don't negate the sign | ||
2 | of a NaN" semantics. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 7 +++++++ | ||
8 | target/arm/tcg/sve_helper.c | 22 ++++++++++++++++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 2 +- | ||
10 | 3 files changed, 30 insertions(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fabd_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_6(sve_fabd_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_6(sve_ah_fabd_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_6(sve_ah_fabd_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_6(sve_ah_fabd_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | DEF_HELPER_FLAGS_6(sve_fscalbn_h, TCG_CALL_NO_RWG, | ||
28 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
29 | DEF_HELPER_FLAGS_6(sve_fscalbn_s, TCG_CALL_NO_RWG, | ||
30 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/target/arm/tcg/sve_helper.c | ||
33 | +++ b/target/arm/tcg/sve_helper.c | ||
34 | @@ -XXX,XX +XXX,XX @@ static inline float64 abd_d(float64 a, float64 b, float_status *s) | ||
35 | return float64_abs(float64_sub(a, b, s)); | ||
36 | } | ||
37 | |||
38 | +/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */ | ||
39 | +static float16 ah_abd_h(float16 op1, float16 op2, float_status *stat) | ||
40 | +{ | ||
41 | + float16 r = float16_sub(op1, op2, stat); | ||
42 | + return float16_is_any_nan(r) ? r : float16_abs(r); | ||
43 | +} | ||
44 | + | ||
45 | +static float32 ah_abd_s(float32 op1, float32 op2, float_status *stat) | ||
46 | +{ | ||
47 | + float32 r = float32_sub(op1, op2, stat); | ||
48 | + return float32_is_any_nan(r) ? r : float32_abs(r); | ||
49 | +} | ||
50 | + | ||
51 | +static float64 ah_abd_d(float64 op1, float64 op2, float_status *stat) | ||
52 | +{ | ||
53 | + float64 r = float64_sub(op1, op2, stat); | ||
54 | + return float64_is_any_nan(r) ? r : float64_abs(r); | ||
55 | +} | ||
56 | + | ||
57 | DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h) | ||
58 | DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s) | ||
59 | DO_ZPZZ_FP(sve_fabd_d, uint64_t, H1_8, abd_d) | ||
60 | +DO_ZPZZ_FP(sve_ah_fabd_h, uint16_t, H1_2, ah_abd_h) | ||
61 | +DO_ZPZZ_FP(sve_ah_fabd_s, uint32_t, H1_4, ah_abd_s) | ||
62 | +DO_ZPZZ_FP(sve_ah_fabd_d, uint64_t, H1_8, ah_abd_d) | ||
63 | |||
64 | static inline float64 scalbn_d(float64 a, int64_t b, float_status *s) | ||
65 | { | ||
66 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
67 | index XXXXXXX..XXXXXXX 100644 | ||
68 | --- a/target/arm/tcg/translate-sve.c | ||
69 | +++ b/target/arm/tcg/translate-sve.c | ||
70 | @@ -XXX,XX +XXX,XX @@ DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) | ||
71 | DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) | ||
72 | DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) | ||
73 | DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) | ||
74 | -DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) | ||
75 | +DO_ZPZZ_AH_FP(FABD, aa64_sve, sve_fabd, sve_ah_fabd) | ||
76 | DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) | ||
77 | DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) | ||
78 | DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx) | ||
79 | -- | ||
80 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The negation steps in FCADD must honour FPCR.AH's "don't change the | ||
2 | sign of a NaN" semantics. Implement this in the same way we did for | ||
3 | the base ASIMD FCADD, by encoding FPCR.AH into the SIMD data field | ||
4 | passed to the helper and using that to decide whether to negate the | ||
5 | values. | ||
1 | 6 | ||
7 | The construction of neg_imag and neg_real were done to make it easy | ||
8 | to apply both in parallel with two simple logical operations. This | ||
9 | changed with FPCR.AH, which is more complex than that. Switch to | ||
10 | an approach that follows the pseudocode more closely, by extracting | ||
11 | the 'rot=1' parameter from the SIMD data field and changing the | ||
12 | sign of the appropriate input value. | ||
13 | |||
14 | Note that there was a naming issue with neg_imag and neg_real. | ||
15 | They were named backward, with neg_imag being non-zero for rot=1, | ||
16 | and vice versa. This was combined with reversed usage within the | ||
17 | loop, so that the negation in the end turned out correct. | ||
18 | |||
19 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
20 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
21 | --- | ||
22 | target/arm/tcg/vec_internal.h | 17 ++++++++++++++ | ||
23 | target/arm/tcg/sve_helper.c | 42 ++++++++++++++++++++++++---------- | ||
24 | target/arm/tcg/translate-sve.c | 2 +- | ||
25 | 3 files changed, 48 insertions(+), 13 deletions(-) | ||
26 | |||
27 | diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/arm/tcg/vec_internal.h | ||
30 | +++ b/target/arm/tcg/vec_internal.h | ||
31 | @@ -XXX,XX +XXX,XX @@ | ||
32 | #ifndef TARGET_ARM_VEC_INTERNAL_H | ||
33 | #define TARGET_ARM_VEC_INTERNAL_H | ||
34 | |||
35 | +#include "fpu/softfloat.h" | ||
36 | + | ||
37 | /* | ||
38 | * Note that vector data is stored in host-endian 64-bit chunks, | ||
39 | * so addressing units smaller than that needs a host-endian fixup. | ||
40 | @@ -XXX,XX +XXX,XX @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, | ||
41 | */ | ||
42 | bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp); | ||
43 | |||
44 | +static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah) | ||
45 | +{ | ||
46 | + return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a); | ||
47 | +} | ||
48 | + | ||
49 | +static inline float32 float32_maybe_ah_chs(float32 a, bool fpcr_ah) | ||
50 | +{ | ||
51 | + return fpcr_ah && float32_is_any_nan(a) ? a : float32_chs(a); | ||
52 | +} | ||
53 | + | ||
54 | +static inline float64 float64_maybe_ah_chs(float64 a, bool fpcr_ah) | ||
55 | +{ | ||
56 | + return fpcr_ah && float64_is_any_nan(a) ? a : float64_chs(a); | ||
57 | +} | ||
58 | + | ||
59 | #endif /* TARGET_ARM_VEC_INTERNAL_H */ | ||
60 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/target/arm/tcg/sve_helper.c | ||
63 | +++ b/target/arm/tcg/sve_helper.c | ||
64 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, | ||
65 | { | ||
66 | intptr_t j, i = simd_oprsz(desc); | ||
67 | uint64_t *g = vg; | ||
68 | - float16 neg_imag = float16_set_sign(0, simd_data(desc)); | ||
69 | - float16 neg_real = float16_chs(neg_imag); | ||
70 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
71 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
72 | |||
73 | do { | ||
74 | uint64_t pg = g[(i - 1) >> 6]; | ||
75 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, | ||
76 | i -= 2 * sizeof(float16); | ||
77 | |||
78 | e0 = *(float16 *)(vn + H1_2(i)); | ||
79 | - e1 = *(float16 *)(vm + H1_2(j)) ^ neg_real; | ||
80 | + e1 = *(float16 *)(vm + H1_2(j)); | ||
81 | e2 = *(float16 *)(vn + H1_2(j)); | ||
82 | - e3 = *(float16 *)(vm + H1_2(i)) ^ neg_imag; | ||
83 | + e3 = *(float16 *)(vm + H1_2(i)); | ||
84 | + | ||
85 | + if (rot) { | ||
86 | + e3 = float16_maybe_ah_chs(e3, fpcr_ah); | ||
87 | + } else { | ||
88 | + e1 = float16_maybe_ah_chs(e1, fpcr_ah); | ||
89 | + } | ||
90 | |||
91 | if (likely((pg >> (i & 63)) & 1)) { | ||
92 | *(float16 *)(vd + H1_2(i)) = float16_add(e0, e1, s); | ||
93 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, | ||
94 | { | ||
95 | intptr_t j, i = simd_oprsz(desc); | ||
96 | uint64_t *g = vg; | ||
97 | - float32 neg_imag = float32_set_sign(0, simd_data(desc)); | ||
98 | - float32 neg_real = float32_chs(neg_imag); | ||
99 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
100 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
101 | |||
102 | do { | ||
103 | uint64_t pg = g[(i - 1) >> 6]; | ||
104 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, | ||
105 | i -= 2 * sizeof(float32); | ||
106 | |||
107 | e0 = *(float32 *)(vn + H1_2(i)); | ||
108 | - e1 = *(float32 *)(vm + H1_2(j)) ^ neg_real; | ||
109 | + e1 = *(float32 *)(vm + H1_2(j)); | ||
110 | e2 = *(float32 *)(vn + H1_2(j)); | ||
111 | - e3 = *(float32 *)(vm + H1_2(i)) ^ neg_imag; | ||
112 | + e3 = *(float32 *)(vm + H1_2(i)); | ||
113 | + | ||
114 | + if (rot) { | ||
115 | + e3 = float32_maybe_ah_chs(e3, fpcr_ah); | ||
116 | + } else { | ||
117 | + e1 = float32_maybe_ah_chs(e1, fpcr_ah); | ||
118 | + } | ||
119 | |||
120 | if (likely((pg >> (i & 63)) & 1)) { | ||
121 | *(float32 *)(vd + H1_2(i)) = float32_add(e0, e1, s); | ||
122 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, | ||
123 | { | ||
124 | intptr_t j, i = simd_oprsz(desc); | ||
125 | uint64_t *g = vg; | ||
126 | - float64 neg_imag = float64_set_sign(0, simd_data(desc)); | ||
127 | - float64 neg_real = float64_chs(neg_imag); | ||
128 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
129 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
130 | |||
131 | do { | ||
132 | uint64_t pg = g[(i - 1) >> 6]; | ||
133 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, | ||
134 | i -= 2 * sizeof(float64); | ||
135 | |||
136 | e0 = *(float64 *)(vn + H1_2(i)); | ||
137 | - e1 = *(float64 *)(vm + H1_2(j)) ^ neg_real; | ||
138 | + e1 = *(float64 *)(vm + H1_2(j)); | ||
139 | e2 = *(float64 *)(vn + H1_2(j)); | ||
140 | - e3 = *(float64 *)(vm + H1_2(i)) ^ neg_imag; | ||
141 | + e3 = *(float64 *)(vm + H1_2(i)); | ||
142 | + | ||
143 | + if (rot) { | ||
144 | + e3 = float64_maybe_ah_chs(e3, fpcr_ah); | ||
145 | + } else { | ||
146 | + e1 = float64_maybe_ah_chs(e1, fpcr_ah); | ||
147 | + } | ||
148 | |||
149 | if (likely((pg >> (i & 63)) & 1)) { | ||
150 | *(float64 *)(vd + H1_2(i)) = float64_add(e0, e1, s); | ||
151 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
152 | index XXXXXXX..XXXXXXX 100644 | ||
153 | --- a/target/arm/tcg/translate-sve.c | ||
154 | +++ b/target/arm/tcg/translate-sve.c | ||
155 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_4_ptr * const fcadd_fns[] = { | ||
156 | gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d, | ||
157 | }; | ||
158 | TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], | ||
159 | - a->rd, a->rn, a->rm, a->pg, a->rot, | ||
160 | + a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), | ||
161 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
162 | |||
163 | #define DO_FMLA(NAME, name) \ | ||
164 | -- | ||
165 | 2.34.1 | diff view generated by jsdifflib |
1 | In commit 1b7bc9b5c8bf374dd we changed handle_vec_simd_sqshrn() so | 1 | The negation steps in FCADD must honour FPCR.AH's "don't change the |
---|---|---|---|
2 | that instead of starting with a 0 value and depositing in each new | 2 | sign of a NaN" semantics. Implement this by encoding FPCR.AH into |
3 | element from the narrowing operation, it instead started with the raw | 3 | the SIMD data field passed to the helper and using that to decide |
4 | result of the narrowing operation of the first element. | 4 | whether to negate the values. |
5 | 5 | ||
6 | This is fine in the vector case, because the deposit operations for | 6 | The construction of neg_imag and neg_real were done to make it easy |
7 | the second and subsequent elements will always overwrite any higher | 7 | to apply both in parallel with two simple logical operations. This |
8 | bits that might have been in the first element's result value in | 8 | changed with FPCR.AH, which is more complex than that. Switch to |
9 | tcg_rd. However in the scalar case we only go through this loop | 9 | an approach closer to the pseudocode, where we extract the rot |
10 | once. The effect is that for a signed narrowing operation, if the | 10 | parameter from the SIMD data word and negate the appropriate |
11 | result is negative then we will now return a value where the bits | 11 | input value. |
12 | above the first element are incorrectly 1 (because the narrowfn | ||
13 | returns a sign-extended result, not one that is truncated to the | ||
14 | element size). | ||
15 | 12 | ||
16 | Fix this by using an extract operation to get exactly the correct | ||
17 | bits of the output of the narrowfn for element 1, instead of a | ||
18 | plain move. | ||
19 | |||
20 | Cc: qemu-stable@nongnu.org | ||
21 | Fixes: 1b7bc9b5c8bf374dd3 ("target/arm: Avoid tcg_const_ptr in handle_vec_simd_sqshrn") | ||
22 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2089 | ||
23 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
24 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 14 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
25 | Message-id: 20240123153416.877308-1-peter.maydell@linaro.org | ||
26 | --- | 15 | --- |
27 | target/arm/tcg/translate-a64.c | 2 +- | 16 | target/arm/tcg/translate-a64.c | 10 +++++-- |
28 | 1 file changed, 1 insertion(+), 1 deletion(-) | 17 | target/arm/tcg/vec_helper.c | 54 +++++++++++++++++++--------------- |
18 | 2 files changed, 38 insertions(+), 26 deletions(-) | ||
29 | 19 | ||
30 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | 20 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
31 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
32 | --- a/target/arm/tcg/translate-a64.c | 22 | --- a/target/arm/tcg/translate-a64.c |
33 | +++ b/target/arm/tcg/translate-a64.c | 23 | +++ b/target/arm/tcg/translate-a64.c |
34 | @@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, | 24 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = { |
35 | narrowfn(tcg_rd_narrowed, tcg_env, tcg_rd); | 25 | gen_helper_gvec_fcadds, |
36 | tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed); | 26 | gen_helper_gvec_fcaddd, |
37 | if (i == 0) { | 27 | }; |
38 | - tcg_gen_mov_i64(tcg_final, tcg_rd); | 28 | -TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd) |
39 | + tcg_gen_extract_i64(tcg_final, tcg_rd, 0, esize); | 29 | -TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd) |
40 | } else { | 30 | +/* |
41 | tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); | 31 | + * Encode FPCR.AH into the data so the helper knows whether the |
42 | } | 32 | + * negations it does should avoid flipping the sign bit on a NaN |
33 | + */ | ||
34 | +TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1), | ||
35 | + f_vector_fcadd) | ||
36 | +TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1), | ||
37 | + f_vector_fcadd) | ||
38 | |||
39 | static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) | ||
40 | { | ||
41 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/arm/tcg/vec_helper.c | ||
44 | +++ b/target/arm/tcg/vec_helper.c | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm, | ||
46 | float16 *d = vd; | ||
47 | float16 *n = vn; | ||
48 | float16 *m = vm; | ||
49 | - uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
50 | - uint32_t neg_imag = neg_real ^ 1; | ||
51 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
52 | + bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); | ||
53 | uintptr_t i; | ||
54 | |||
55 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
56 | - neg_real <<= 15; | ||
57 | - neg_imag <<= 15; | ||
58 | - | ||
59 | for (i = 0; i < opr_sz / 2; i += 2) { | ||
60 | float16 e0 = n[H2(i)]; | ||
61 | - float16 e1 = m[H2(i + 1)] ^ neg_imag; | ||
62 | + float16 e1 = m[H2(i + 1)]; | ||
63 | float16 e2 = n[H2(i + 1)]; | ||
64 | - float16 e3 = m[H2(i)] ^ neg_real; | ||
65 | + float16 e3 = m[H2(i)]; | ||
66 | + | ||
67 | + if (rot) { | ||
68 | + e3 = float16_maybe_ah_chs(e3, fpcr_ah); | ||
69 | + } else { | ||
70 | + e1 = float16_maybe_ah_chs(e1, fpcr_ah); | ||
71 | + } | ||
72 | |||
73 | d[H2(i)] = float16_add(e0, e1, fpst); | ||
74 | d[H2(i + 1)] = float16_add(e2, e3, fpst); | ||
75 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcadds)(void *vd, void *vn, void *vm, | ||
76 | float32 *d = vd; | ||
77 | float32 *n = vn; | ||
78 | float32 *m = vm; | ||
79 | - uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
80 | - uint32_t neg_imag = neg_real ^ 1; | ||
81 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
82 | + bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); | ||
83 | uintptr_t i; | ||
84 | |||
85 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
86 | - neg_real <<= 31; | ||
87 | - neg_imag <<= 31; | ||
88 | - | ||
89 | for (i = 0; i < opr_sz / 4; i += 2) { | ||
90 | float32 e0 = n[H4(i)]; | ||
91 | - float32 e1 = m[H4(i + 1)] ^ neg_imag; | ||
92 | + float32 e1 = m[H4(i + 1)]; | ||
93 | float32 e2 = n[H4(i + 1)]; | ||
94 | - float32 e3 = m[H4(i)] ^ neg_real; | ||
95 | + float32 e3 = m[H4(i)]; | ||
96 | + | ||
97 | + if (rot) { | ||
98 | + e3 = float32_maybe_ah_chs(e3, fpcr_ah); | ||
99 | + } else { | ||
100 | + e1 = float32_maybe_ah_chs(e1, fpcr_ah); | ||
101 | + } | ||
102 | |||
103 | d[H4(i)] = float32_add(e0, e1, fpst); | ||
104 | d[H4(i + 1)] = float32_add(e2, e3, fpst); | ||
105 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcaddd)(void *vd, void *vn, void *vm, | ||
106 | float64 *d = vd; | ||
107 | float64 *n = vn; | ||
108 | float64 *m = vm; | ||
109 | - uint64_t neg_real = extract64(desc, SIMD_DATA_SHIFT, 1); | ||
110 | - uint64_t neg_imag = neg_real ^ 1; | ||
111 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
112 | + bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); | ||
113 | uintptr_t i; | ||
114 | |||
115 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
116 | - neg_real <<= 63; | ||
117 | - neg_imag <<= 63; | ||
118 | - | ||
119 | for (i = 0; i < opr_sz / 8; i += 2) { | ||
120 | float64 e0 = n[i]; | ||
121 | - float64 e1 = m[i + 1] ^ neg_imag; | ||
122 | + float64 e1 = m[i + 1]; | ||
123 | float64 e2 = n[i + 1]; | ||
124 | - float64 e3 = m[i] ^ neg_real; | ||
125 | + float64 e3 = m[i]; | ||
126 | + | ||
127 | + if (rot) { | ||
128 | + e3 = float64_maybe_ah_chs(e3, fpcr_ah); | ||
129 | + } else { | ||
130 | + e1 = float64_maybe_ah_chs(e1, fpcr_ah); | ||
131 | + } | ||
132 | |||
133 | d[i] = float64_add(e0, e1, fpst); | ||
134 | d[i + 1] = float64_add(e2, e3, fpst); | ||
43 | -- | 135 | -- |
44 | 2.34.1 | 136 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle the FPCR.AH semantics that we do not change the sign of an | ||
2 | input NaN in the FRECPS and FRSQRTS scalar insns, by providing | ||
3 | new helper functions that do the CHS part of the operation | ||
4 | differently. | ||
1 | 5 | ||
6 | Since the extra helper functions would be very repetitive if written | ||
7 | out longhand, we condense them and the existing non-AH helpers into | ||
8 | being emitted via macros. | ||
9 | |||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | ||
13 | target/arm/tcg/helper-a64.h | 6 ++ | ||
14 | target/arm/tcg/vec_internal.h | 18 ++++++ | ||
15 | target/arm/tcg/helper-a64.c | 115 ++++++++++++--------------------- | ||
16 | target/arm/tcg/translate-a64.c | 25 +++++-- | ||
17 | 4 files changed, 83 insertions(+), 81 deletions(-) | ||
18 | |||
19 | diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/target/arm/tcg/helper-a64.h | ||
22 | +++ b/target/arm/tcg/helper-a64.h | ||
23 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, fpst) | ||
24 | DEF_HELPER_FLAGS_3(recpsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) | ||
25 | DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) | ||
26 | DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) | ||
27 | +DEF_HELPER_FLAGS_3(recpsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) | ||
28 | +DEF_HELPER_FLAGS_3(recpsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) | ||
29 | +DEF_HELPER_FLAGS_3(recpsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) | ||
30 | DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) | ||
31 | DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) | ||
32 | DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) | ||
33 | +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) | ||
34 | +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) | ||
35 | +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) | ||
36 | DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, fpst) | ||
37 | DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, fpst) | ||
38 | DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, fpst) | ||
39 | diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/target/arm/tcg/vec_internal.h | ||
42 | +++ b/target/arm/tcg/vec_internal.h | ||
43 | @@ -XXX,XX +XXX,XX @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, | ||
44 | */ | ||
45 | bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp); | ||
46 | |||
47 | +/* | ||
48 | + * Negate as for FPCR.AH=1 -- do not negate NaNs. | ||
49 | + */ | ||
50 | +static inline float16 float16_ah_chs(float16 a) | ||
51 | +{ | ||
52 | + return float16_is_any_nan(a) ? a : float16_chs(a); | ||
53 | +} | ||
54 | + | ||
55 | +static inline float32 float32_ah_chs(float32 a) | ||
56 | +{ | ||
57 | + return float32_is_any_nan(a) ? a : float32_chs(a); | ||
58 | +} | ||
59 | + | ||
60 | +static inline float64 float64_ah_chs(float64 a) | ||
61 | +{ | ||
62 | + return float64_is_any_nan(a) ? a : float64_chs(a); | ||
63 | +} | ||
64 | + | ||
65 | static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah) | ||
66 | { | ||
67 | return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a); | ||
68 | diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/target/arm/tcg/helper-a64.c | ||
71 | +++ b/target/arm/tcg/helper-a64.c | ||
72 | @@ -XXX,XX +XXX,XX @@ | ||
73 | #ifdef CONFIG_USER_ONLY | ||
74 | #include "user/page-protection.h" | ||
75 | #endif | ||
76 | +#include "vec_internal.h" | ||
77 | |||
78 | /* C2.4.7 Multiply and divide */ | ||
79 | /* special cases for 0 and LLONG_MIN are mandated by the standard */ | ||
80 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst) | ||
81 | return -float64_lt(b, a, fpst); | ||
82 | } | ||
83 | |||
84 | -/* Reciprocal step and sqrt step. Note that unlike the A32/T32 | ||
85 | +/* | ||
86 | + * Reciprocal step and sqrt step. Note that unlike the A32/T32 | ||
87 | * versions, these do a fully fused multiply-add or | ||
88 | * multiply-add-and-halve. | ||
89 | + * The FPCR.AH == 1 versions need to avoid flipping the sign of NaN. | ||
90 | */ | ||
91 | - | ||
92 | -uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, float_status *fpst) | ||
93 | -{ | ||
94 | - a = float16_squash_input_denormal(a, fpst); | ||
95 | - b = float16_squash_input_denormal(b, fpst); | ||
96 | - | ||
97 | - a = float16_chs(a); | ||
98 | - if ((float16_is_infinity(a) && float16_is_zero(b)) || | ||
99 | - (float16_is_infinity(b) && float16_is_zero(a))) { | ||
100 | - return float16_two; | ||
101 | +#define DO_RECPS(NAME, CTYPE, FLOATTYPE, CHSFN) \ | ||
102 | + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ | ||
103 | + { \ | ||
104 | + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ | ||
105 | + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ | ||
106 | + a = FLOATTYPE ## _ ## CHSFN(a); \ | ||
107 | + if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ | ||
108 | + (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ | ||
109 | + return FLOATTYPE ## _two; \ | ||
110 | + } \ | ||
111 | + return FLOATTYPE ## _muladd(a, b, FLOATTYPE ## _two, 0, fpst); \ | ||
112 | } | ||
113 | - return float16_muladd(a, b, float16_two, 0, fpst); | ||
114 | -} | ||
115 | |||
116 | -float32 HELPER(recpsf_f32)(float32 a, float32 b, float_status *fpst) | ||
117 | -{ | ||
118 | - a = float32_squash_input_denormal(a, fpst); | ||
119 | - b = float32_squash_input_denormal(b, fpst); | ||
120 | +DO_RECPS(recpsf_f16, uint32_t, float16, chs) | ||
121 | +DO_RECPS(recpsf_f32, float32, float32, chs) | ||
122 | +DO_RECPS(recpsf_f64, float64, float64, chs) | ||
123 | +DO_RECPS(recpsf_ah_f16, uint32_t, float16, ah_chs) | ||
124 | +DO_RECPS(recpsf_ah_f32, float32, float32, ah_chs) | ||
125 | +DO_RECPS(recpsf_ah_f64, float64, float64, ah_chs) | ||
126 | |||
127 | - a = float32_chs(a); | ||
128 | - if ((float32_is_infinity(a) && float32_is_zero(b)) || | ||
129 | - (float32_is_infinity(b) && float32_is_zero(a))) { | ||
130 | - return float32_two; | ||
131 | - } | ||
132 | - return float32_muladd(a, b, float32_two, 0, fpst); | ||
133 | -} | ||
134 | +#define DO_RSQRTSF(NAME, CTYPE, FLOATTYPE, CHSFN) \ | ||
135 | + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ | ||
136 | + { \ | ||
137 | + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ | ||
138 | + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ | ||
139 | + a = FLOATTYPE ## _ ## CHSFN(a); \ | ||
140 | + if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ | ||
141 | + (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ | ||
142 | + return FLOATTYPE ## _one_point_five; \ | ||
143 | + } \ | ||
144 | + return FLOATTYPE ## _muladd_scalbn(a, b, FLOATTYPE ## _three, \ | ||
145 | + -1, 0, fpst); \ | ||
146 | + } \ | ||
147 | |||
148 | -float64 HELPER(recpsf_f64)(float64 a, float64 b, float_status *fpst) | ||
149 | -{ | ||
150 | - a = float64_squash_input_denormal(a, fpst); | ||
151 | - b = float64_squash_input_denormal(b, fpst); | ||
152 | - | ||
153 | - a = float64_chs(a); | ||
154 | - if ((float64_is_infinity(a) && float64_is_zero(b)) || | ||
155 | - (float64_is_infinity(b) && float64_is_zero(a))) { | ||
156 | - return float64_two; | ||
157 | - } | ||
158 | - return float64_muladd(a, b, float64_two, 0, fpst); | ||
159 | -} | ||
160 | - | ||
161 | -uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst) | ||
162 | -{ | ||
163 | - a = float16_squash_input_denormal(a, fpst); | ||
164 | - b = float16_squash_input_denormal(b, fpst); | ||
165 | - | ||
166 | - a = float16_chs(a); | ||
167 | - if ((float16_is_infinity(a) && float16_is_zero(b)) || | ||
168 | - (float16_is_infinity(b) && float16_is_zero(a))) { | ||
169 | - return float16_one_point_five; | ||
170 | - } | ||
171 | - return float16_muladd_scalbn(a, b, float16_three, -1, 0, fpst); | ||
172 | -} | ||
173 | - | ||
174 | -float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst) | ||
175 | -{ | ||
176 | - a = float32_squash_input_denormal(a, fpst); | ||
177 | - b = float32_squash_input_denormal(b, fpst); | ||
178 | - | ||
179 | - a = float32_chs(a); | ||
180 | - if ((float32_is_infinity(a) && float32_is_zero(b)) || | ||
181 | - (float32_is_infinity(b) && float32_is_zero(a))) { | ||
182 | - return float32_one_point_five; | ||
183 | - } | ||
184 | - return float32_muladd_scalbn(a, b, float32_three, -1, 0, fpst); | ||
185 | -} | ||
186 | - | ||
187 | -float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst) | ||
188 | -{ | ||
189 | - a = float64_squash_input_denormal(a, fpst); | ||
190 | - b = float64_squash_input_denormal(b, fpst); | ||
191 | - | ||
192 | - a = float64_chs(a); | ||
193 | - if ((float64_is_infinity(a) && float64_is_zero(b)) || | ||
194 | - (float64_is_infinity(b) && float64_is_zero(a))) { | ||
195 | - return float64_one_point_five; | ||
196 | - } | ||
197 | - return float64_muladd_scalbn(a, b, float64_three, -1, 0, fpst); | ||
198 | -} | ||
199 | +DO_RSQRTSF(rsqrtsf_f16, uint32_t, float16, chs) | ||
200 | +DO_RSQRTSF(rsqrtsf_f32, float32, float32, chs) | ||
201 | +DO_RSQRTSF(rsqrtsf_f64, float64, float64, chs) | ||
202 | +DO_RSQRTSF(rsqrtsf_ah_f16, uint32_t, float16, ah_chs) | ||
203 | +DO_RSQRTSF(rsqrtsf_ah_f32, float32, float32, ah_chs) | ||
204 | +DO_RSQRTSF(rsqrtsf_ah_f64, float64, float64, ah_chs) | ||
205 | |||
206 | /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ | ||
207 | uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst) | ||
208 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
209 | index XXXXXXX..XXXXXXX 100644 | ||
210 | --- a/target/arm/tcg/translate-a64.c | ||
211 | +++ b/target/arm/tcg/translate-a64.c | ||
212 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
213 | FPST_A64_F16 : FPST_A64); | ||
214 | } | ||
215 | |||
216 | -static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
217 | - int mergereg) | ||
218 | +static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a, | ||
219 | + const FPScalar *fnormal, const FPScalar *fah, | ||
220 | + int mergereg) | ||
221 | { | ||
222 | - return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, | ||
223 | - select_ah_fpst(s, a->esz)); | ||
224 | + return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal, | ||
225 | + mergereg, select_ah_fpst(s, a->esz)); | ||
226 | } | ||
227 | |||
228 | /* Some insns need to call different helpers when FPCR.AH == 1 */ | ||
229 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_frecps = { | ||
230 | gen_helper_recpsf_f32, | ||
231 | gen_helper_recpsf_f64, | ||
232 | }; | ||
233 | -TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps, a->rn) | ||
234 | +static const FPScalar f_scalar_ah_frecps = { | ||
235 | + gen_helper_recpsf_ah_f16, | ||
236 | + gen_helper_recpsf_ah_f32, | ||
237 | + gen_helper_recpsf_ah_f64, | ||
238 | +}; | ||
239 | +TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a, | ||
240 | + &f_scalar_frecps, &f_scalar_ah_frecps, a->rn) | ||
241 | |||
242 | static const FPScalar f_scalar_frsqrts = { | ||
243 | gen_helper_rsqrtsf_f16, | ||
244 | gen_helper_rsqrtsf_f32, | ||
245 | gen_helper_rsqrtsf_f64, | ||
246 | }; | ||
247 | -TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts, a->rn) | ||
248 | +static const FPScalar f_scalar_ah_frsqrts = { | ||
249 | + gen_helper_rsqrtsf_ah_f16, | ||
250 | + gen_helper_rsqrtsf_ah_f32, | ||
251 | + gen_helper_rsqrtsf_ah_f64, | ||
252 | +}; | ||
253 | +TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a, | ||
254 | + &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn) | ||
255 | |||
256 | static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, | ||
257 | const FPScalar *f, bool swap) | ||
258 | -- | ||
259 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics | ||
2 | in the vector versions of FRECPS and FRSQRTS, by implementing | ||
3 | new vector wrappers that call the _ah_ scalar helpers. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
9 | target/arm/tcg/translate-a64.c | 21 ++++++++++++++++----- | ||
10 | target/arm/tcg/translate-sve.c | 7 ++++++- | ||
11 | target/arm/tcg/vec_helper.c | 8 ++++++++ | ||
12 | 4 files changed, 44 insertions(+), 6 deletions(-) | ||
13 | |||
14 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/arm/tcg/helper-sve.h | ||
17 | +++ b/target/arm/tcg/helper-sve.h | ||
18 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_rsqrts_s, TCG_CALL_NO_RWG, | ||
19 | DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG, | ||
20 | void, ptr, ptr, ptr, fpst, i32) | ||
21 | |||
22 | +DEF_HELPER_FLAGS_5(gvec_ah_recps_h, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_5(gvec_ah_recps_s, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, fpst, i32) | ||
26 | +DEF_HELPER_FLAGS_5(gvec_ah_recps_d, TCG_CALL_NO_RWG, | ||
27 | + void, ptr, ptr, ptr, fpst, i32) | ||
28 | + | ||
29 | +DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_h, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_s, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, fpst, i32) | ||
33 | +DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_d, TCG_CALL_NO_RWG, | ||
34 | + void, ptr, ptr, ptr, fpst, i32) | ||
35 | + | ||
36 | DEF_HELPER_FLAGS_5(gvec_ah_fmax_h, TCG_CALL_NO_RWG, | ||
37 | void, ptr, ptr, ptr, fpst, i32) | ||
38 | DEF_HELPER_FLAGS_5(gvec_ah_fmax_s, TCG_CALL_NO_RWG, | ||
39 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/target/arm/tcg/translate-a64.c | ||
42 | +++ b/target/arm/tcg/translate-a64.c | ||
43 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, | ||
44 | return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal); | ||
45 | } | ||
46 | |||
47 | -static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data, | ||
48 | - gen_helper_gvec_3_ptr * const f[3]) | ||
49 | +static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data, | ||
50 | + gen_helper_gvec_3_ptr * const fnormal[3], | ||
51 | + gen_helper_gvec_3_ptr * const fah[3]) | ||
52 | { | ||
53 | - return do_fp3_vector_with_fpsttype(s, a, data, f, | ||
54 | + return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal, | ||
55 | select_ah_fpst(s, a->esz)); | ||
56 | } | ||
57 | |||
58 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { | ||
59 | gen_helper_gvec_recps_s, | ||
60 | gen_helper_gvec_recps_d, | ||
61 | }; | ||
62 | -TRANS(FRECPS_v, do_fp3_vector_ah, a, 0, f_vector_frecps) | ||
63 | +static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = { | ||
64 | + gen_helper_gvec_ah_recps_h, | ||
65 | + gen_helper_gvec_ah_recps_s, | ||
66 | + gen_helper_gvec_ah_recps_d, | ||
67 | +}; | ||
68 | +TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps) | ||
69 | |||
70 | static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { | ||
71 | gen_helper_gvec_rsqrts_h, | ||
72 | gen_helper_gvec_rsqrts_s, | ||
73 | gen_helper_gvec_rsqrts_d, | ||
74 | }; | ||
75 | -TRANS(FRSQRTS_v, do_fp3_vector_ah, a, 0, f_vector_frsqrts) | ||
76 | +static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = { | ||
77 | + gen_helper_gvec_ah_rsqrts_h, | ||
78 | + gen_helper_gvec_ah_rsqrts_s, | ||
79 | + gen_helper_gvec_ah_rsqrts_d, | ||
80 | +}; | ||
81 | +TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts) | ||
82 | |||
83 | static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { | ||
84 | gen_helper_gvec_faddp_h, | ||
85 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
86 | index XXXXXXX..XXXXXXX 100644 | ||
87 | --- a/target/arm/tcg/translate-sve.c | ||
88 | +++ b/target/arm/tcg/translate-sve.c | ||
89 | @@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) | ||
90 | NULL, gen_helper_gvec_##name##_h, \ | ||
91 | gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ | ||
92 | }; \ | ||
93 | - TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, name##_fns[a->esz], a, 0) | ||
94 | + static gen_helper_gvec_3_ptr * const name##_ah_fns[4] = { \ | ||
95 | + NULL, gen_helper_gvec_ah_##name##_h, \ | ||
96 | + gen_helper_gvec_ah_##name##_s, gen_helper_gvec_ah_##name##_d \ | ||
97 | + }; \ | ||
98 | + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, \ | ||
99 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], a, 0) | ||
100 | |||
101 | DO_FP3(FADD_zzz, fadd) | ||
102 | DO_FP3(FSUB_zzz, fsub) | ||
103 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
104 | index XXXXXXX..XXXXXXX 100644 | ||
105 | --- a/target/arm/tcg/vec_helper.c | ||
106 | +++ b/target/arm/tcg/vec_helper.c | ||
107 | @@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16) | ||
108 | DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32) | ||
109 | DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64) | ||
110 | |||
111 | +DO_3OP(gvec_ah_recps_h, helper_recpsf_ah_f16, float16) | ||
112 | +DO_3OP(gvec_ah_recps_s, helper_recpsf_ah_f32, float32) | ||
113 | +DO_3OP(gvec_ah_recps_d, helper_recpsf_ah_f64, float64) | ||
114 | + | ||
115 | +DO_3OP(gvec_ah_rsqrts_h, helper_rsqrtsf_ah_f16, float16) | ||
116 | +DO_3OP(gvec_ah_rsqrts_s, helper_rsqrtsf_ah_f32, float32) | ||
117 | +DO_3OP(gvec_ah_rsqrts_d, helper_rsqrtsf_ah_f64, float64) | ||
118 | + | ||
119 | DO_3OP(gvec_ah_fmax_h, helper_vfp_ah_maxh, float16) | ||
120 | DO_3OP(gvec_ah_fmax_s, helper_vfp_ah_maxs, float32) | ||
121 | DO_3OP(gvec_ah_fmax_d, helper_vfp_ah_maxd, float64) | ||
122 | -- | ||
123 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics in FMLS | ||
2 | (indexed). We do this by creating 6 new helpers, which allow us to | ||
3 | do the negation either by XOR (for AH=0) or by muladd flags | ||
4 | (for AH=1). | ||
1 | 5 | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | [PMM: Mostly from RTH's patch; error in index order into fns[][] | ||
8 | fixed] | ||
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | --- | ||
11 | target/arm/helper.h | 14 ++++++++++++++ | ||
12 | target/arm/tcg/translate-a64.c | 17 +++++++++++------ | ||
13 | target/arm/tcg/translate-sve.c | 31 +++++++++++++++++-------------- | ||
14 | target/arm/tcg/vec_helper.c | 24 +++++++++++++++--------- | ||
15 | 4 files changed, 57 insertions(+), 29 deletions(-) | ||
16 | |||
17 | diff --git a/target/arm/helper.h b/target/arm/helper.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/arm/helper.h | ||
20 | +++ b/target/arm/helper.h | ||
21 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG, | ||
22 | DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG, | ||
23 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
24 | |||
25 | +DEF_HELPER_FLAGS_6(gvec_fmls_idx_h, TCG_CALL_NO_RWG, | ||
26 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
27 | +DEF_HELPER_FLAGS_6(gvec_fmls_idx_s, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_6(gvec_fmls_idx_d, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
31 | + | ||
32 | +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_h, TCG_CALL_NO_RWG, | ||
33 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
34 | +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_s, TCG_CALL_NO_RWG, | ||
35 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
36 | +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_d, TCG_CALL_NO_RWG, | ||
37 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
38 | + | ||
39 | DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG, | ||
40 | void, ptr, ptr, ptr, ptr, i32) | ||
41 | DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG, | ||
42 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/arm/tcg/translate-a64.c | ||
45 | +++ b/target/arm/tcg/translate-a64.c | ||
46 | @@ -XXX,XX +XXX,XX @@ TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) | ||
47 | |||
48 | static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) | ||
49 | { | ||
50 | - static gen_helper_gvec_4_ptr * const fns[3] = { | ||
51 | - gen_helper_gvec_fmla_idx_h, | ||
52 | - gen_helper_gvec_fmla_idx_s, | ||
53 | - gen_helper_gvec_fmla_idx_d, | ||
54 | + static gen_helper_gvec_4_ptr * const fns[3][3] = { | ||
55 | + { gen_helper_gvec_fmla_idx_h, | ||
56 | + gen_helper_gvec_fmla_idx_s, | ||
57 | + gen_helper_gvec_fmla_idx_d }, | ||
58 | + { gen_helper_gvec_fmls_idx_h, | ||
59 | + gen_helper_gvec_fmls_idx_s, | ||
60 | + gen_helper_gvec_fmls_idx_d }, | ||
61 | + { gen_helper_gvec_ah_fmls_idx_h, | ||
62 | + gen_helper_gvec_ah_fmls_idx_s, | ||
63 | + gen_helper_gvec_ah_fmls_idx_d }, | ||
64 | }; | ||
65 | MemOp esz = a->esz; | ||
66 | int check = fp_access_check_vector_hsd(s, a->q, esz); | ||
67 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) | ||
68 | |||
69 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
70 | esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
71 | - (a->idx << 1) | neg, | ||
72 | - fns[esz - 1]); | ||
73 | + a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]); | ||
74 | return true; | ||
75 | } | ||
76 | |||
77 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
78 | index XXXXXXX..XXXXXXX 100644 | ||
79 | --- a/target/arm/tcg/translate-sve.c | ||
80 | +++ b/target/arm/tcg/translate-sve.c | ||
81 | @@ -XXX,XX +XXX,XX @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) | ||
82 | *** SVE Floating Point Multiply-Add Indexed Group | ||
83 | */ | ||
84 | |||
85 | -static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) | ||
86 | -{ | ||
87 | - static gen_helper_gvec_4_ptr * const fns[4] = { | ||
88 | - NULL, | ||
89 | - gen_helper_gvec_fmla_idx_h, | ||
90 | - gen_helper_gvec_fmla_idx_s, | ||
91 | - gen_helper_gvec_fmla_idx_d, | ||
92 | - }; | ||
93 | - return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, | ||
94 | - (a->index << 1) | sub, | ||
95 | - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
96 | -} | ||
97 | +static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = { | ||
98 | + NULL, gen_helper_gvec_fmla_idx_h, | ||
99 | + gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d | ||
100 | +}; | ||
101 | +TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, | ||
102 | + fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index, | ||
103 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
104 | |||
105 | -TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) | ||
106 | -TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true) | ||
107 | +static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = { | ||
108 | + { NULL, NULL }, | ||
109 | + { gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h }, | ||
110 | + { gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s }, | ||
111 | + { gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d }, | ||
112 | +}; | ||
113 | +TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz, | ||
114 | + fmls_idx_fns[a->esz][s->fpcr_ah], | ||
115 | + a->rd, a->rn, a->rm, a->ra, a->index, | ||
116 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
117 | |||
118 | /* | ||
119 | *** SVE Floating Point Multiply Indexed Group | ||
120 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
121 | index XXXXXXX..XXXXXXX 100644 | ||
122 | --- a/target/arm/tcg/vec_helper.c | ||
123 | +++ b/target/arm/tcg/vec_helper.c | ||
124 | @@ -XXX,XX +XXX,XX @@ DO_FMUL_IDX(gvec_fmls_nf_idx_s, float32_sub, float32_mul, float32, H4) | ||
125 | |||
126 | #undef DO_FMUL_IDX | ||
127 | |||
128 | -#define DO_FMLA_IDX(NAME, TYPE, H) \ | ||
129 | +#define DO_FMLA_IDX(NAME, TYPE, H, NEGX, NEGF) \ | ||
130 | void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \ | ||
131 | float_status *stat, uint32_t desc) \ | ||
132 | { \ | ||
133 | intptr_t i, j, oprsz = simd_oprsz(desc); \ | ||
134 | intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \ | ||
135 | - TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \ | ||
136 | - intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \ | ||
137 | + intptr_t idx = simd_data(desc); \ | ||
138 | TYPE *d = vd, *n = vn, *m = vm, *a = va; \ | ||
139 | - op1_neg <<= (8 * sizeof(TYPE) - 1); \ | ||
140 | for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ | ||
141 | TYPE mm = m[H(i + idx)]; \ | ||
142 | for (j = 0; j < segment; j++) { \ | ||
143 | - d[i + j] = TYPE##_muladd(n[i + j] ^ op1_neg, \ | ||
144 | - mm, a[i + j], 0, stat); \ | ||
145 | + d[i + j] = TYPE##_muladd(n[i + j] ^ NEGX, mm, \ | ||
146 | + a[i + j], NEGF, stat); \ | ||
147 | } \ | ||
148 | } \ | ||
149 | clear_tail(d, oprsz, simd_maxsz(desc)); \ | ||
150 | } | ||
151 | |||
152 | -DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2) | ||
153 | -DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4) | ||
154 | -DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8) | ||
155 | +DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2, 0, 0) | ||
156 | +DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4, 0, 0) | ||
157 | +DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8, 0, 0) | ||
158 | + | ||
159 | +DO_FMLA_IDX(gvec_fmls_idx_h, float16, H2, INT16_MIN, 0) | ||
160 | +DO_FMLA_IDX(gvec_fmls_idx_s, float32, H4, INT32_MIN, 0) | ||
161 | +DO_FMLA_IDX(gvec_fmls_idx_d, float64, H8, INT64_MIN, 0) | ||
162 | + | ||
163 | +DO_FMLA_IDX(gvec_ah_fmls_idx_h, float16, H2, 0, float_muladd_negate_product) | ||
164 | +DO_FMLA_IDX(gvec_ah_fmls_idx_s, float32, H4, 0, float_muladd_negate_product) | ||
165 | +DO_FMLA_IDX(gvec_ah_fmls_idx_d, float64, H8, 0, float_muladd_negate_product) | ||
166 | |||
167 | #undef DO_FMLA_IDX | ||
168 | |||
169 | -- | ||
170 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics | ||
2 | in FMLS (vector), by implementing a new set of helpers for | ||
3 | the AH=1 case. | ||
1 | 4 | ||
5 | The float_muladd_negate_product flag produces the same result | ||
6 | as negating either of the multiplication operands, assuming | ||
7 | neither of the operands are NaNs. But since FEAT_AFP does not | ||
8 | negate NaNs, this behaviour is exactly what we need. | ||
9 | |||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | ||
13 | target/arm/helper.h | 4 ++++ | ||
14 | target/arm/tcg/translate-a64.c | 7 ++++++- | ||
15 | target/arm/tcg/vec_helper.c | 22 ++++++++++++++++++++++ | ||
16 | 3 files changed, 32 insertions(+), 1 deletion(-) | ||
17 | |||
18 | diff --git a/target/arm/helper.h b/target/arm/helper.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/arm/helper.h | ||
21 | +++ b/target/arm/helper.h | ||
22 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
23 | DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
24 | DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
25 | |||
26 | +DEF_HELPER_FLAGS_5(gvec_ah_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
27 | +DEF_HELPER_FLAGS_5(gvec_ah_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
28 | +DEF_HELPER_FLAGS_5(gvec_ah_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
29 | + | ||
30 | DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG, | ||
31 | void, ptr, ptr, ptr, fpst, i32) | ||
32 | DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG, | ||
33 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/target/arm/tcg/translate-a64.c | ||
36 | +++ b/target/arm/tcg/translate-a64.c | ||
37 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { | ||
38 | gen_helper_gvec_vfms_s, | ||
39 | gen_helper_gvec_vfms_d, | ||
40 | }; | ||
41 | -TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls) | ||
42 | +static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = { | ||
43 | + gen_helper_gvec_ah_vfms_h, | ||
44 | + gen_helper_gvec_ah_vfms_s, | ||
45 | + gen_helper_gvec_ah_vfms_d, | ||
46 | +}; | ||
47 | +TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah) | ||
48 | |||
49 | static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { | ||
50 | gen_helper_gvec_fceq_h, | ||
51 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/target/arm/tcg/vec_helper.c | ||
54 | +++ b/target/arm/tcg/vec_helper.c | ||
55 | @@ -XXX,XX +XXX,XX @@ static float64 float64_mulsub_f(float64 dest, float64 op1, float64 op2, | ||
56 | return float64_muladd(float64_chs(op1), op2, dest, 0, stat); | ||
57 | } | ||
58 | |||
59 | +static float16 float16_ah_mulsub_f(float16 dest, float16 op1, float16 op2, | ||
60 | + float_status *stat) | ||
61 | +{ | ||
62 | + return float16_muladd(op1, op2, dest, float_muladd_negate_product, stat); | ||
63 | +} | ||
64 | + | ||
65 | +static float32 float32_ah_mulsub_f(float32 dest, float32 op1, float32 op2, | ||
66 | + float_status *stat) | ||
67 | +{ | ||
68 | + return float32_muladd(op1, op2, dest, float_muladd_negate_product, stat); | ||
69 | +} | ||
70 | + | ||
71 | +static float64 float64_ah_mulsub_f(float64 dest, float64 op1, float64 op2, | ||
72 | + float_status *stat) | ||
73 | +{ | ||
74 | + return float64_muladd(op1, op2, dest, float_muladd_negate_product, stat); | ||
75 | +} | ||
76 | + | ||
77 | #define DO_MULADD(NAME, FUNC, TYPE) \ | ||
78 | void HELPER(NAME)(void *vd, void *vn, void *vm, \ | ||
79 | float_status *stat, uint32_t desc) \ | ||
80 | @@ -XXX,XX +XXX,XX @@ DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16) | ||
81 | DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32) | ||
82 | DO_MULADD(gvec_vfms_d, float64_mulsub_f, float64) | ||
83 | |||
84 | +DO_MULADD(gvec_ah_vfms_h, float16_ah_mulsub_f, float16) | ||
85 | +DO_MULADD(gvec_ah_vfms_s, float32_ah_mulsub_f, float32) | ||
86 | +DO_MULADD(gvec_ah_vfms_d, float64_ah_mulsub_f, float64) | ||
87 | + | ||
88 | /* For the indexed ops, SVE applies the index per 128-bit vector segment. | ||
89 | * For AdvSIMD, there is of course only one such vector segment. | ||
90 | */ | ||
91 | -- | ||
92 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics fro the | ||
2 | SVE FMLS (vector) insns, by providing new helpers for the AH=1 case | ||
3 | which end up passing fpcr_ah = true to the do_fmla_zpzzz_* functions | ||
4 | that do the work. | ||
1 | 5 | ||
6 | The float*_muladd functions have a flags argument that can | ||
7 | perform optional negation of various operand. We don't use | ||
8 | that for "normal" arm fmla, because the muladd flags are not | ||
9 | applied when an input is a NaN. But since FEAT_AFP does not | ||
10 | negate NaNs, this behaviour is exactly what we need. | ||
11 | |||
12 | The non-AH helpers pass in a zero flags argument and control the | ||
13 | negation via the neg1 and neg3 arguments; the AH helpers always pass | ||
14 | in neg1 and neg3 as zero and control the negation via the flags | ||
15 | argument. This allows us to avoid conditional branches within the | ||
16 | inner loop. | ||
17 | |||
18 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
19 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
20 | --- | ||
21 | target/arm/tcg/helper-sve.h | 21 ++++++++ | ||
22 | target/arm/tcg/sve_helper.c | 99 +++++++++++++++++++++++++++------- | ||
23 | target/arm/tcg/translate-sve.c | 18 ++++--- | ||
24 | 3 files changed, 114 insertions(+), 24 deletions(-) | ||
25 | |||
26 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/target/arm/tcg/helper-sve.h | ||
29 | +++ b/target/arm/tcg/helper-sve.h | ||
30 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, | ||
31 | DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, | ||
32 | void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
33 | |||
34 | +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG, | ||
35 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
36 | +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG, | ||
37 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
38 | +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG, | ||
39 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
40 | + | ||
41 | +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG, | ||
42 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
43 | +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG, | ||
44 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
45 | +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG, | ||
46 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
47 | + | ||
48 | +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG, | ||
49 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
50 | +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG, | ||
51 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
52 | +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_d, TCG_CALL_NO_RWG, | ||
53 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
54 | + | ||
55 | DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG, | ||
56 | void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
57 | DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG, | ||
58 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
59 | index XXXXXXX..XXXXXXX 100644 | ||
60 | --- a/target/arm/tcg/sve_helper.c | ||
61 | +++ b/target/arm/tcg/sve_helper.c | ||
62 | @@ -XXX,XX +XXX,XX @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int) | ||
63 | |||
64 | static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, | ||
65 | float_status *status, uint32_t desc, | ||
66 | - uint16_t neg1, uint16_t neg3) | ||
67 | + uint16_t neg1, uint16_t neg3, int flags) | ||
68 | { | ||
69 | intptr_t i = simd_oprsz(desc); | ||
70 | uint64_t *g = vg; | ||
71 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, | ||
72 | e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1; | ||
73 | e2 = *(uint16_t *)(vm + H1_2(i)); | ||
74 | e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3; | ||
75 | - r = float16_muladd(e1, e2, e3, 0, status); | ||
76 | + r = float16_muladd(e1, e2, e3, flags, status); | ||
77 | *(uint16_t *)(vd + H1_2(i)) = r; | ||
78 | } | ||
79 | } while (i & 63); | ||
80 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, | ||
81 | void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
82 | void *vg, float_status *status, uint32_t desc) | ||
83 | { | ||
84 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0); | ||
85 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, 0); | ||
86 | } | ||
87 | |||
88 | void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
89 | void *vg, float_status *status, uint32_t desc) | ||
90 | { | ||
91 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0); | ||
92 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0); | ||
93 | } | ||
94 | |||
95 | void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
96 | void *vg, float_status *status, uint32_t desc) | ||
97 | { | ||
98 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000); | ||
99 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0); | ||
100 | } | ||
101 | |||
102 | void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
103 | void *vg, float_status *status, uint32_t desc) | ||
104 | { | ||
105 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000); | ||
106 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0); | ||
107 | +} | ||
108 | + | ||
109 | +void HELPER(sve_ah_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
110 | + void *vg, float_status *status, uint32_t desc) | ||
111 | +{ | ||
112 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
113 | + float_muladd_negate_product); | ||
114 | +} | ||
115 | + | ||
116 | +void HELPER(sve_ah_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
117 | + void *vg, float_status *status, uint32_t desc) | ||
118 | +{ | ||
119 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
120 | + float_muladd_negate_product | float_muladd_negate_c); | ||
121 | +} | ||
122 | + | ||
123 | +void HELPER(sve_ah_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
124 | + void *vg, float_status *status, uint32_t desc) | ||
125 | +{ | ||
126 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
127 | + float_muladd_negate_c); | ||
128 | } | ||
129 | |||
130 | static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, | ||
131 | float_status *status, uint32_t desc, | ||
132 | - uint32_t neg1, uint32_t neg3) | ||
133 | + uint32_t neg1, uint32_t neg3, int flags) | ||
134 | { | ||
135 | intptr_t i = simd_oprsz(desc); | ||
136 | uint64_t *g = vg; | ||
137 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, | ||
138 | e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1; | ||
139 | e2 = *(uint32_t *)(vm + H1_4(i)); | ||
140 | e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3; | ||
141 | - r = float32_muladd(e1, e2, e3, 0, status); | ||
142 | + r = float32_muladd(e1, e2, e3, flags, status); | ||
143 | *(uint32_t *)(vd + H1_4(i)) = r; | ||
144 | } | ||
145 | } while (i & 63); | ||
146 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, | ||
147 | void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
148 | void *vg, float_status *status, uint32_t desc) | ||
149 | { | ||
150 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0); | ||
151 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, 0); | ||
152 | } | ||
153 | |||
154 | void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
155 | void *vg, float_status *status, uint32_t desc) | ||
156 | { | ||
157 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0); | ||
158 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, 0); | ||
159 | } | ||
160 | |||
161 | void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
162 | void *vg, float_status *status, uint32_t desc) | ||
163 | { | ||
164 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000); | ||
165 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, 0); | ||
166 | } | ||
167 | |||
168 | void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
169 | void *vg, float_status *status, uint32_t desc) | ||
170 | { | ||
171 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000); | ||
172 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, 0); | ||
173 | +} | ||
174 | + | ||
175 | +void HELPER(sve_ah_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
176 | + void *vg, float_status *status, uint32_t desc) | ||
177 | +{ | ||
178 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
179 | + float_muladd_negate_product); | ||
180 | +} | ||
181 | + | ||
182 | +void HELPER(sve_ah_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
183 | + void *vg, float_status *status, uint32_t desc) | ||
184 | +{ | ||
185 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
186 | + float_muladd_negate_product | float_muladd_negate_c); | ||
187 | +} | ||
188 | + | ||
189 | +void HELPER(sve_ah_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
190 | + void *vg, float_status *status, uint32_t desc) | ||
191 | +{ | ||
192 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
193 | + float_muladd_negate_c); | ||
194 | } | ||
195 | |||
196 | static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, | ||
197 | float_status *status, uint32_t desc, | ||
198 | - uint64_t neg1, uint64_t neg3) | ||
199 | + uint64_t neg1, uint64_t neg3, int flags) | ||
200 | { | ||
201 | intptr_t i = simd_oprsz(desc); | ||
202 | uint64_t *g = vg; | ||
203 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, | ||
204 | e1 = *(uint64_t *)(vn + i) ^ neg1; | ||
205 | e2 = *(uint64_t *)(vm + i); | ||
206 | e3 = *(uint64_t *)(va + i) ^ neg3; | ||
207 | - r = float64_muladd(e1, e2, e3, 0, status); | ||
208 | + r = float64_muladd(e1, e2, e3, flags, status); | ||
209 | *(uint64_t *)(vd + i) = r; | ||
210 | } | ||
211 | } while (i & 63); | ||
212 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, | ||
213 | void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
214 | void *vg, float_status *status, uint32_t desc) | ||
215 | { | ||
216 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0); | ||
217 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, 0); | ||
218 | } | ||
219 | |||
220 | void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
221 | void *vg, float_status *status, uint32_t desc) | ||
222 | { | ||
223 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0); | ||
224 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, 0); | ||
225 | } | ||
226 | |||
227 | void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
228 | void *vg, float_status *status, uint32_t desc) | ||
229 | { | ||
230 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN); | ||
231 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, 0); | ||
232 | } | ||
233 | |||
234 | void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
235 | void *vg, float_status *status, uint32_t desc) | ||
236 | { | ||
237 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN); | ||
238 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, 0); | ||
239 | +} | ||
240 | + | ||
241 | +void HELPER(sve_ah_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
242 | + void *vg, float_status *status, uint32_t desc) | ||
243 | +{ | ||
244 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
245 | + float_muladd_negate_product); | ||
246 | +} | ||
247 | + | ||
248 | +void HELPER(sve_ah_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
249 | + void *vg, float_status *status, uint32_t desc) | ||
250 | +{ | ||
251 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
252 | + float_muladd_negate_product | float_muladd_negate_c); | ||
253 | +} | ||
254 | + | ||
255 | +void HELPER(sve_ah_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
256 | + void *vg, float_status *status, uint32_t desc) | ||
257 | +{ | ||
258 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
259 | + float_muladd_negate_c); | ||
260 | } | ||
261 | |||
262 | /* Two operand floating-point comparison controlled by a predicate. | ||
263 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
264 | index XXXXXXX..XXXXXXX 100644 | ||
265 | --- a/target/arm/tcg/translate-sve.c | ||
266 | +++ b/target/arm/tcg/translate-sve.c | ||
267 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], | ||
268 | a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), | ||
269 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
270 | |||
271 | -#define DO_FMLA(NAME, name) \ | ||
272 | +#define DO_FMLA(NAME, name, ah_name) \ | ||
273 | static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ | ||
274 | NULL, gen_helper_sve_##name##_h, \ | ||
275 | gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ | ||
276 | }; \ | ||
277 | - TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \ | ||
278 | + static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \ | ||
279 | + NULL, gen_helper_sve_##ah_name##_h, \ | ||
280 | + gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \ | ||
281 | + }; \ | ||
282 | + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \ | ||
283 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \ | ||
284 | a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ | ||
285 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
286 | |||
287 | -DO_FMLA(FMLA_zpzzz, fmla_zpzzz) | ||
288 | -DO_FMLA(FMLS_zpzzz, fmls_zpzzz) | ||
289 | -DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz) | ||
290 | -DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) | ||
291 | +/* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */ | ||
292 | +DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz) | ||
293 | +DO_FMLA(FMLS_zpzzz, fmls_zpzzz, ah_fmls_zpzzz) | ||
294 | +DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz, ah_fnmla_zpzzz) | ||
295 | +DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz, ah_fnmls_zpzzz) | ||
296 | |||
297 | #undef DO_FMLA | ||
298 | |||
299 | -- | ||
300 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | 1 | The negation step in the SVE FTSSEL insn mustn't negate a NaN when |
---|---|---|---|
2 | FPCR.AH is set. Pass FPCR.AH to the helper via the SIMD data field | ||
3 | and use that to determine whether to do the negation. | ||
2 | 4 | ||
3 | target/arm/cpregs.h uses the CP_REG_ARCH_* definitions | 5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | from "target/arm/kvm-consts.h". Include it in order to | 6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
5 | avoid when refactoring unrelated headers: | 7 | --- |
8 | target/arm/tcg/sve_helper.c | 18 +++++++++++++++--- | ||
9 | target/arm/tcg/translate-sve.c | 4 ++-- | ||
10 | 2 files changed, 17 insertions(+), 5 deletions(-) | ||
6 | 11 | ||
7 | target/arm/cpregs.h:191:18: error: use of undeclared identifier 'CP_REG_ARCH_MASK' | 12 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c |
8 | if ((kvmid & CP_REG_ARCH_MASK) == CP_REG_ARM64) { | ||
9 | ^ | ||
10 | |||
11 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | Message-id: 20240118200643.29037-8-philmd@linaro.org | ||
14 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
15 | --- | ||
16 | target/arm/cpregs.h | 1 + | ||
17 | 1 file changed, 1 insertion(+) | ||
18 | |||
19 | diff --git a/target/arm/cpregs.h b/target/arm/cpregs.h | ||
20 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/target/arm/cpregs.h | 14 | --- a/target/arm/tcg/sve_helper.c |
22 | +++ b/target/arm/cpregs.h | 15 | +++ b/target/arm/tcg/sve_helper.c |
23 | @@ -XXX,XX +XXX,XX @@ | 16 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fexpa_d)(void *vd, void *vn, uint32_t desc) |
24 | #define TARGET_ARM_CPREGS_H | 17 | void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc) |
25 | 18 | { | |
26 | #include "hw/registerfields.h" | 19 | intptr_t i, opr_sz = simd_oprsz(desc) / 2; |
27 | +#include "target/arm/kvm-consts.h" | 20 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1); |
21 | uint16_t *d = vd, *n = vn, *m = vm; | ||
22 | for (i = 0; i < opr_sz; i += 1) { | ||
23 | uint16_t nn = n[i]; | ||
24 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc) | ||
25 | if (mm & 1) { | ||
26 | nn = float16_one; | ||
27 | } | ||
28 | - d[i] = nn ^ (mm & 2) << 14; | ||
29 | + if (mm & 2) { | ||
30 | + nn = float16_maybe_ah_chs(nn, fpcr_ah); | ||
31 | + } | ||
32 | + d[i] = nn; | ||
33 | } | ||
34 | } | ||
35 | |||
36 | void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc) | ||
37 | { | ||
38 | intptr_t i, opr_sz = simd_oprsz(desc) / 4; | ||
39 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
40 | uint32_t *d = vd, *n = vn, *m = vm; | ||
41 | for (i = 0; i < opr_sz; i += 1) { | ||
42 | uint32_t nn = n[i]; | ||
43 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc) | ||
44 | if (mm & 1) { | ||
45 | nn = float32_one; | ||
46 | } | ||
47 | - d[i] = nn ^ (mm & 2) << 30; | ||
48 | + if (mm & 2) { | ||
49 | + nn = float32_maybe_ah_chs(nn, fpcr_ah); | ||
50 | + } | ||
51 | + d[i] = nn; | ||
52 | } | ||
53 | } | ||
54 | |||
55 | void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc) | ||
56 | { | ||
57 | intptr_t i, opr_sz = simd_oprsz(desc) / 8; | ||
58 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
59 | uint64_t *d = vd, *n = vn, *m = vm; | ||
60 | for (i = 0; i < opr_sz; i += 1) { | ||
61 | uint64_t nn = n[i]; | ||
62 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc) | ||
63 | if (mm & 1) { | ||
64 | nn = float64_one; | ||
65 | } | ||
66 | - d[i] = nn ^ (mm & 2) << 62; | ||
67 | + if (mm & 2) { | ||
68 | + nn = float64_maybe_ah_chs(nn, fpcr_ah); | ||
69 | + } | ||
70 | + d[i] = nn; | ||
71 | } | ||
72 | } | ||
73 | |||
74 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/target/arm/tcg/translate-sve.c | ||
77 | +++ b/target/arm/tcg/translate-sve.c | ||
78 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2 * const fexpa_fns[4] = { | ||
79 | gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, | ||
80 | }; | ||
81 | TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, | ||
82 | - fexpa_fns[a->esz], a->rd, a->rn, 0) | ||
83 | + fexpa_fns[a->esz], a->rd, a->rn, s->fpcr_ah) | ||
84 | |||
85 | static gen_helper_gvec_3 * const ftssel_fns[4] = { | ||
86 | NULL, gen_helper_sve_ftssel_h, | ||
87 | gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, | ||
88 | }; | ||
89 | TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, | ||
90 | - ftssel_fns[a->esz], a, 0) | ||
91 | + ftssel_fns[a->esz], a, s->fpcr_ah) | ||
28 | 92 | ||
29 | /* | 93 | /* |
30 | * ARMCPRegInfo type field bits: | 94 | *** SVE Predicate Logical Operations Group |
31 | -- | 95 | -- |
32 | 2.34.1 | 96 | 2.34.1 |
33 | |||
34 | diff view generated by jsdifflib |
1 | From: Max Filippov <jcmvbkbc@gmail.com> | 1 | The negation step in the SVE FTMAD insn mustn't negate a NaN when |
---|---|---|---|
2 | FPCR.AH is set. Pass FPCR.AH to the helper via the SIMD data field, | ||
3 | so we can select the correct behaviour. | ||
2 | 4 | ||
3 | r[id]tlb[01], [iw][id]tlb opcodes use TLB way index passed in a register | 5 | Because the operand is known to be negative, negating the operand |
4 | by the guest. The host uses 3 bits of the index for ITLB indexing and 4 | 6 | is the same as taking the absolute value. Defer this to the muladd |
5 | bits for DTLB, but there's only 7 entries in the ITLB array and 10 in | 7 | operation via flags, so that it happens after NaN detection, which |
6 | the DTLB array, so a malicious guest may trigger out-of-bound access to | 8 | is correct for FPCR.AH. |
7 | these arrays. | ||
8 | 9 | ||
9 | Change split_tlb_entry_spec return type to bool to indicate whether TLB | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | way passed to it is valid. Change get_tlb_entry to return NULL in case | 11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
11 | invalid TLB way is requested. Add assertion to xtensa_tlb_get_entry that | 12 | --- |
12 | requested TLB way and entry indices are valid. Add checks to the | 13 | target/arm/tcg/sve_helper.c | 42 ++++++++++++++++++++++++++-------- |
13 | [rwi]tlb helpers that requested TLB way is valid and return 0 or do | 14 | target/arm/tcg/translate-sve.c | 3 ++- |
14 | nothing when it's not. | 15 | 2 files changed, 35 insertions(+), 10 deletions(-) |
15 | 16 | ||
16 | Cc: qemu-stable@nongnu.org | 17 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c |
17 | Fixes: b67ea0cd7441 ("target-xtensa: implement memory protection options") | ||
18 | Signed-off-by: Max Filippov <jcmvbkbc@gmail.com> | ||
19 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
20 | Message-id: 20231215120307.545381-1-jcmvbkbc@gmail.com | ||
21 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
22 | --- | ||
23 | target/xtensa/mmu_helper.c | 47 ++++++++++++++++++++++++++++---------- | ||
24 | 1 file changed, 35 insertions(+), 12 deletions(-) | ||
25 | |||
26 | diff --git a/target/xtensa/mmu_helper.c b/target/xtensa/mmu_helper.c | ||
27 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
28 | --- a/target/xtensa/mmu_helper.c | 19 | --- a/target/arm/tcg/sve_helper.c |
29 | +++ b/target/xtensa/mmu_helper.c | 20 | +++ b/target/arm/tcg/sve_helper.c |
30 | @@ -XXX,XX +XXX,XX @@ static void split_tlb_entry_spec_way(const CPUXtensaState *env, uint32_t v, | 21 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm, |
31 | * Split TLB address into TLB way, entry index and VPN (with index). | 22 | 0x3c00, 0xb800, 0x293a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
32 | * See ISA, 4.6.5.5 - 4.6.5.8 for the TLB addressing format | 23 | }; |
33 | */ | 24 | intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float16); |
34 | -static void split_tlb_entry_spec(CPUXtensaState *env, uint32_t v, bool dtlb, | 25 | - intptr_t x = simd_data(desc); |
35 | - uint32_t *vpn, uint32_t *wi, uint32_t *ei) | 26 | + intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); |
36 | +static bool split_tlb_entry_spec(CPUXtensaState *env, uint32_t v, bool dtlb, | 27 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); |
37 | + uint32_t *vpn, uint32_t *wi, uint32_t *ei) | 28 | float16 *d = vd, *n = vn, *m = vm; |
38 | { | 29 | + |
39 | if (xtensa_option_enabled(env->config, XTENSA_OPTION_MMU)) { | 30 | for (i = 0; i < opr_sz; i++) { |
40 | *wi = v & (dtlb ? 0xf : 0x7); | 31 | float16 mm = m[i]; |
41 | - split_tlb_entry_spec_way(env, v, dtlb, vpn, *wi, ei); | 32 | intptr_t xx = x; |
42 | + if (*wi < (dtlb ? env->config->dtlb.nways : env->config->itlb.nways)) { | 33 | + int flags = 0; |
43 | + split_tlb_entry_spec_way(env, v, dtlb, vpn, *wi, ei); | 34 | + |
44 | + return true; | 35 | if (float16_is_neg(mm)) { |
45 | + } else { | 36 | - mm = float16_abs(mm); |
46 | + return false; | 37 | + if (fpcr_ah) { |
47 | + } | 38 | + flags = float_muladd_negate_product; |
48 | } else { | 39 | + } else { |
49 | *vpn = v & REGION_PAGE_MASK; | 40 | + mm = float16_abs(mm); |
50 | *wi = 0; | 41 | + } |
51 | *ei = (v >> 29) & 0x7; | 42 | xx += 8; |
52 | + return true; | 43 | } |
44 | - d[i] = float16_muladd(n[i], mm, coeff[xx], 0, s); | ||
45 | + d[i] = float16_muladd(n[i], mm, coeff[xx], flags, s); | ||
53 | } | 46 | } |
54 | } | 47 | } |
55 | 48 | ||
56 | static xtensa_tlb_entry *xtensa_tlb_get_entry(CPUXtensaState *env, bool dtlb, | 49 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, |
57 | unsigned wi, unsigned ei) | 50 | 0x37cd37cc, 0x00000000, 0x00000000, 0x00000000, |
58 | { | 51 | }; |
59 | + const xtensa_tlb *tlb = dtlb ? &env->config->dtlb : &env->config->itlb; | 52 | intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float32); |
53 | - intptr_t x = simd_data(desc); | ||
54 | + intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); | ||
55 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); | ||
56 | float32 *d = vd, *n = vn, *m = vm; | ||
60 | + | 57 | + |
61 | + assert(wi < tlb->nways && ei < tlb->way_size[wi]); | 58 | for (i = 0; i < opr_sz; i++) { |
62 | return dtlb ? | 59 | float32 mm = m[i]; |
63 | env->dtlb[wi] + ei : | 60 | intptr_t xx = x; |
64 | env->itlb[wi] + ei; | 61 | + int flags = 0; |
65 | @@ -XXX,XX +XXX,XX @@ static xtensa_tlb_entry *get_tlb_entry(CPUXtensaState *env, | 62 | + |
66 | uint32_t wi; | 63 | if (float32_is_neg(mm)) { |
67 | uint32_t ei; | 64 | - mm = float32_abs(mm); |
68 | 65 | + if (fpcr_ah) { | |
69 | - split_tlb_entry_spec(env, v, dtlb, &vpn, &wi, &ei); | 66 | + flags = float_muladd_negate_product; |
70 | - if (pwi) { | 67 | + } else { |
71 | - *pwi = wi; | 68 | + mm = float32_abs(mm); |
72 | + if (split_tlb_entry_spec(env, v, dtlb, &vpn, &wi, &ei)) { | 69 | + } |
73 | + if (pwi) { | 70 | xx += 8; |
74 | + *pwi = wi; | 71 | } |
75 | + } | 72 | - d[i] = float32_muladd(n[i], mm, coeff[xx], 0, s); |
76 | + return xtensa_tlb_get_entry(env, dtlb, wi, ei); | 73 | + d[i] = float32_muladd(n[i], mm, coeff[xx], flags, s); |
77 | + } else { | ||
78 | + return NULL; | ||
79 | } | 74 | } |
80 | - return xtensa_tlb_get_entry(env, dtlb, wi, ei); | ||
81 | } | 75 | } |
82 | 76 | ||
83 | static void xtensa_tlb_set_entry_mmu(const CPUXtensaState *env, | 77 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, |
84 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rtlb0)(CPUXtensaState *env, uint32_t v, uint32_t dtlb) | 78 | 0x3e21ee96d2641b13ull, 0xbda8f76380fbb401ull, |
85 | if (xtensa_option_enabled(env->config, XTENSA_OPTION_MMU)) { | 79 | }; |
86 | uint32_t wi; | 80 | intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float64); |
87 | const xtensa_tlb_entry *entry = get_tlb_entry(env, v, dtlb, &wi); | 81 | - intptr_t x = simd_data(desc); |
88 | - return (entry->vaddr & get_vpn_mask(env, dtlb, wi)) | entry->asid; | 82 | + intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); |
83 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); | ||
84 | float64 *d = vd, *n = vn, *m = vm; | ||
89 | + | 85 | + |
90 | + if (entry) { | 86 | for (i = 0; i < opr_sz; i++) { |
91 | + return (entry->vaddr & get_vpn_mask(env, dtlb, wi)) | entry->asid; | 87 | float64 mm = m[i]; |
92 | + } else { | 88 | intptr_t xx = x; |
93 | + return 0; | 89 | + int flags = 0; |
94 | + } | 90 | + |
95 | } else { | 91 | if (float64_is_neg(mm)) { |
96 | return v & REGION_PAGE_MASK; | 92 | - mm = float64_abs(mm); |
93 | + if (fpcr_ah) { | ||
94 | + flags = float_muladd_negate_product; | ||
95 | + } else { | ||
96 | + mm = float64_abs(mm); | ||
97 | + } | ||
98 | xx += 8; | ||
99 | } | ||
100 | - d[i] = float64_muladd(n[i], mm, coeff[xx], 0, s); | ||
101 | + d[i] = float64_muladd(n[i], mm, coeff[xx], flags, s); | ||
97 | } | 102 | } |
98 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rtlb0)(CPUXtensaState *env, uint32_t v, uint32_t dtlb) | ||
99 | uint32_t HELPER(rtlb1)(CPUXtensaState *env, uint32_t v, uint32_t dtlb) | ||
100 | { | ||
101 | const xtensa_tlb_entry *entry = get_tlb_entry(env, v, dtlb, NULL); | ||
102 | - return entry->paddr | entry->attr; | ||
103 | + | ||
104 | + if (entry) { | ||
105 | + return entry->paddr | entry->attr; | ||
106 | + } else { | ||
107 | + return 0; | ||
108 | + } | ||
109 | } | 103 | } |
110 | 104 | ||
111 | void HELPER(itlb)(CPUXtensaState *env, uint32_t v, uint32_t dtlb) | 105 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c |
112 | @@ -XXX,XX +XXX,XX @@ void HELPER(itlb)(CPUXtensaState *env, uint32_t v, uint32_t dtlb) | 106 | index XXXXXXX..XXXXXXX 100644 |
113 | if (xtensa_option_enabled(env->config, XTENSA_OPTION_MMU)) { | 107 | --- a/target/arm/tcg/translate-sve.c |
114 | uint32_t wi; | 108 | +++ b/target/arm/tcg/translate-sve.c |
115 | xtensa_tlb_entry *entry = get_tlb_entry(env, v, dtlb, &wi); | 109 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { |
116 | - if (entry->variable && entry->asid) { | 110 | gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, |
117 | + if (entry && entry->variable && entry->asid) { | 111 | }; |
118 | tlb_flush_page(env_cpu(env), entry->vaddr); | 112 | TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, |
119 | entry->asid = 0; | 113 | - ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm, |
120 | } | 114 | + ftmad_fns[a->esz], a->rd, a->rn, a->rm, |
121 | @@ -XXX,XX +XXX,XX @@ void HELPER(wtlb)(CPUXtensaState *env, uint32_t p, uint32_t v, uint32_t dtlb) | 115 | + a->imm | (s->fpcr_ah << 3), |
122 | uint32_t vpn; | 116 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) |
123 | uint32_t wi; | 117 | |
124 | uint32_t ei; | 118 | /* |
125 | - split_tlb_entry_spec(env, v, dtlb, &vpn, &wi, &ei); | ||
126 | - xtensa_tlb_set_entry(env, dtlb, wi, ei, vpn, p); | ||
127 | + if (split_tlb_entry_spec(env, v, dtlb, &vpn, &wi, &ei)) { | ||
128 | + xtensa_tlb_set_entry(env, dtlb, wi, ei, vpn, p); | ||
129 | + } | ||
130 | } | ||
131 | |||
132 | /*! | ||
133 | -- | 119 | -- |
134 | 2.34.1 | 120 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | "target/arm/cpu.h" is target specific, any file including it | 3 | The negation step in FCMLA mustn't negate a NaN when FPCR.AH |
4 | becomes target specific too, thus this is the same for any file | 4 | is set. Handle this by passing FPCR.AH to the helper via the |
5 | including "hw/misc/xlnx-versal-crl.h". | 5 | SIMD data field, and use this to select whether to do the |
6 | negation via XOR or via the muladd negate_product flag. | ||
6 | 7 | ||
7 | "hw/misc/xlnx-versal-crl.h" doesn't require any target specific | 8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | definition however, only the target-agnostic QOM definitions | 9 | Message-id: 20250129013857.135256-26-richard.henderson@linaro.org |
9 | from "target/arm/cpu-qom.h". Include the latter header to avoid | 10 | [PMM: Expanded commit message] |
10 | tainting unnecessary objects as target-specific. | 11 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
11 | |||
12 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | Message-id: 20240118200643.29037-14-philmd@linaro.org | ||
15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
16 | --- | 13 | --- |
17 | include/hw/misc/xlnx-versal-crl.h | 2 +- | 14 | target/arm/tcg/translate-a64.c | 2 +- |
18 | hw/misc/xlnx-versal-crl.c | 1 + | 15 | target/arm/tcg/vec_helper.c | 66 ++++++++++++++++++++-------------- |
19 | 2 files changed, 2 insertions(+), 1 deletion(-) | 16 | 2 files changed, 40 insertions(+), 28 deletions(-) |
20 | 17 | ||
21 | diff --git a/include/hw/misc/xlnx-versal-crl.h b/include/hw/misc/xlnx-versal-crl.h | 18 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
22 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
23 | --- a/include/hw/misc/xlnx-versal-crl.h | 20 | --- a/target/arm/tcg/translate-a64.c |
24 | +++ b/include/hw/misc/xlnx-versal-crl.h | 21 | +++ b/target/arm/tcg/translate-a64.c |
25 | @@ -XXX,XX +XXX,XX @@ | 22 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) |
26 | 23 | ||
27 | #include "hw/sysbus.h" | 24 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, |
28 | #include "hw/register.h" | 25 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, |
29 | -#include "target/arm/cpu.h" | 26 | - a->rot, fn[a->esz]); |
30 | +#include "target/arm/cpu-qom.h" | 27 | + a->rot | (s->fpcr_ah << 2), fn[a->esz]); |
31 | 28 | return true; | |
32 | #define TYPE_XLNX_VERSAL_CRL "xlnx-versal-crl" | 29 | } |
33 | OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalCRL, XLNX_VERSAL_CRL) | 30 | |
34 | diff --git a/hw/misc/xlnx-versal-crl.c b/hw/misc/xlnx-versal-crl.c | 31 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
35 | index XXXXXXX..XXXXXXX 100644 | 32 | index XXXXXXX..XXXXXXX 100644 |
36 | --- a/hw/misc/xlnx-versal-crl.c | 33 | --- a/target/arm/tcg/vec_helper.c |
37 | +++ b/hw/misc/xlnx-versal-crl.c | 34 | +++ b/target/arm/tcg/vec_helper.c |
38 | @@ -XXX,XX +XXX,XX @@ | 35 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlah)(void *vd, void *vn, void *vm, void *va, |
39 | #include "hw/register.h" | 36 | uintptr_t opr_sz = simd_oprsz(desc); |
40 | #include "hw/resettable.h" | 37 | float16 *d = vd, *n = vn, *m = vm, *a = va; |
41 | 38 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | |
42 | +#include "target/arm/cpu.h" | 39 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
43 | #include "target/arm/arm-powerctl.h" | 40 | - uint32_t neg_real = flip ^ neg_imag; |
44 | #include "target/arm/multiprocessing.h" | 41 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); |
45 | #include "hw/misc/xlnx-versal-crl.h" | 42 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
43 | + uint32_t negf_real = flip ^ negf_imag; | ||
44 | + float16 negx_imag, negx_real; | ||
45 | uintptr_t i; | ||
46 | |||
47 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
48 | - neg_real <<= 15; | ||
49 | - neg_imag <<= 15; | ||
50 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
51 | + negx_real = (negf_real & ~fpcr_ah) << 15; | ||
52 | + negx_imag = (negf_imag & ~fpcr_ah) << 15; | ||
53 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
54 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
55 | |||
56 | for (i = 0; i < opr_sz / 2; i += 2) { | ||
57 | float16 e2 = n[H2(i + flip)]; | ||
58 | - float16 e1 = m[H2(i + flip)] ^ neg_real; | ||
59 | + float16 e1 = m[H2(i + flip)] ^ negx_real; | ||
60 | float16 e4 = e2; | ||
61 | - float16 e3 = m[H2(i + 1 - flip)] ^ neg_imag; | ||
62 | + float16 e3 = m[H2(i + 1 - flip)] ^ negx_imag; | ||
63 | |||
64 | - d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], 0, fpst); | ||
65 | - d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], 0, fpst); | ||
66 | + d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], negf_real, fpst); | ||
67 | + d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], negf_imag, fpst); | ||
68 | } | ||
69 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
70 | } | ||
71 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlas)(void *vd, void *vn, void *vm, void *va, | ||
72 | uintptr_t opr_sz = simd_oprsz(desc); | ||
73 | float32 *d = vd, *n = vn, *m = vm, *a = va; | ||
74 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
75 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
76 | - uint32_t neg_real = flip ^ neg_imag; | ||
77 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
78 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
79 | + uint32_t negf_real = flip ^ negf_imag; | ||
80 | + float32 negx_imag, negx_real; | ||
81 | uintptr_t i; | ||
82 | |||
83 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
84 | - neg_real <<= 31; | ||
85 | - neg_imag <<= 31; | ||
86 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
87 | + negx_real = (negf_real & ~fpcr_ah) << 31; | ||
88 | + negx_imag = (negf_imag & ~fpcr_ah) << 31; | ||
89 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
90 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
91 | |||
92 | for (i = 0; i < opr_sz / 4; i += 2) { | ||
93 | float32 e2 = n[H4(i + flip)]; | ||
94 | - float32 e1 = m[H4(i + flip)] ^ neg_real; | ||
95 | + float32 e1 = m[H4(i + flip)] ^ negx_real; | ||
96 | float32 e4 = e2; | ||
97 | - float32 e3 = m[H4(i + 1 - flip)] ^ neg_imag; | ||
98 | + float32 e3 = m[H4(i + 1 - flip)] ^ negx_imag; | ||
99 | |||
100 | - d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], 0, fpst); | ||
101 | - d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], 0, fpst); | ||
102 | + d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], negf_real, fpst); | ||
103 | + d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], negf_imag, fpst); | ||
104 | } | ||
105 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
106 | } | ||
107 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm, void *va, | ||
108 | uintptr_t opr_sz = simd_oprsz(desc); | ||
109 | float64 *d = vd, *n = vn, *m = vm, *a = va; | ||
110 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
111 | - uint64_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
112 | - uint64_t neg_real = flip ^ neg_imag; | ||
113 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
114 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
115 | + uint32_t negf_real = flip ^ negf_imag; | ||
116 | + float64 negx_real, negx_imag; | ||
117 | uintptr_t i; | ||
118 | |||
119 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
120 | - neg_real <<= 63; | ||
121 | - neg_imag <<= 63; | ||
122 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
123 | + negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63; | ||
124 | + negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63; | ||
125 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
126 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
127 | |||
128 | for (i = 0; i < opr_sz / 8; i += 2) { | ||
129 | float64 e2 = n[i + flip]; | ||
130 | - float64 e1 = m[i + flip] ^ neg_real; | ||
131 | + float64 e1 = m[i + flip] ^ negx_real; | ||
132 | float64 e4 = e2; | ||
133 | - float64 e3 = m[i + 1 - flip] ^ neg_imag; | ||
134 | + float64 e3 = m[i + 1 - flip] ^ negx_imag; | ||
135 | |||
136 | - d[i] = float64_muladd(e2, e1, a[i], 0, fpst); | ||
137 | - d[i + 1] = float64_muladd(e4, e3, a[i + 1], 0, fpst); | ||
138 | + d[i] = float64_muladd(e2, e1, a[i], negf_real, fpst); | ||
139 | + d[i + 1] = float64_muladd(e4, e3, a[i + 1], negf_imag, fpst); | ||
140 | } | ||
141 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
142 | } | ||
46 | -- | 143 | -- |
47 | 2.34.1 | 144 | 2.34.1 |
48 | |||
49 | diff view generated by jsdifflib |
1 | From: Gustavo Romero <gustavo.romero@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Add a note on CPU features that are off by default in `virt` machines. | 3 | The negation step in FCMLA by index mustn't negate a NaN when |
4 | Some CPU features will remain off even if a CPU-capable CPU (e.g., | 4 | FPCR.AH is set. Use the same approach as vector FCMLA of |
5 | `-cpu max`) is selected because they require support in both the CPU | 5 | passing in FPCR.AH and using it to select whether to negate |
6 | itself and in the wider system. Therefore, the user, besides selecting a | 6 | by XOR or by the muladd negate_product flag. |
7 | CPU that supports such features, must also turn on the feature using a | ||
8 | machine option. | ||
9 | 7 | ||
10 | Signed-off-by: Gustavo Romero <gustavo.romero@linaro.org> | 8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
11 | Message-id: 20240122211215.95073-1-gustavo.romero@linaro.org | 9 | Message-id: 20250129013857.135256-27-richard.henderson@linaro.org |
10 | [PMM: Expanded commit message] | ||
12 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 11 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
14 | --- | 13 | --- |
15 | docs/system/arm/virt.rst | 13 +++++++++++++ | 14 | target/arm/tcg/translate-a64.c | 2 +- |
16 | 1 file changed, 13 insertions(+) | 15 | target/arm/tcg/vec_helper.c | 44 ++++++++++++++++++++-------------- |
16 | 2 files changed, 27 insertions(+), 19 deletions(-) | ||
17 | 17 | ||
18 | diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst | 18 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
19 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/docs/system/arm/virt.rst | 20 | --- a/target/arm/tcg/translate-a64.c |
21 | +++ b/docs/system/arm/virt.rst | 21 | +++ b/target/arm/tcg/translate-a64.c |
22 | @@ -XXX,XX +XXX,XX @@ Supported guest CPU types: | 22 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) |
23 | Note that the default is ``cortex-a15``, so for an AArch64 guest you must | 23 | if (fp_access_check(s)) { |
24 | specify a CPU type. | 24 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, |
25 | 25 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | |
26 | +Also, please note that passing ``max`` CPU (i.e. ``-cpu max``) won't | 26 | - (a->idx << 2) | a->rot, fn); |
27 | +enable all the CPU features for a given ``virt`` machine. Where a CPU | 27 | + (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn); |
28 | +architectural feature requires support in both the CPU itself and in the | 28 | } |
29 | +wider system (e.g. the MTE feature), it may not be enabled by default, | 29 | return true; |
30 | +but instead requires a machine option to enable it. | 30 | } |
31 | + | 31 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
32 | +For example, MTE support must be enabled with ``-machine virt,mte=on``, | 32 | index XXXXXXX..XXXXXXX 100644 |
33 | +as well as by selecting an MTE-capable CPU (e.g., ``max``) with the | 33 | --- a/target/arm/tcg/vec_helper.c |
34 | +``-cpu`` option. | 34 | +++ b/target/arm/tcg/vec_helper.c |
35 | + | 35 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, void *va, |
36 | +See the machine-specific options below, or check them for a given machine | 36 | uintptr_t opr_sz = simd_oprsz(desc); |
37 | +by passing the ``help`` suboption, like: ``-machine virt-9.0,help``. | 37 | float16 *d = vd, *n = vn, *m = vm, *a = va; |
38 | + | 38 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); |
39 | Graphics output is available, but unlike the x86 PC machine types | 39 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
40 | there is no default display device enabled: you should select one from | 40 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
41 | the Display devices section of "-device help". The recommended option | 41 | intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2); |
42 | - uint32_t neg_real = flip ^ neg_imag; | ||
43 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1); | ||
44 | + uint32_t negf_real = flip ^ negf_imag; | ||
45 | intptr_t elements = opr_sz / sizeof(float16); | ||
46 | intptr_t eltspersegment = MIN(16 / sizeof(float16), elements); | ||
47 | + float16 negx_imag, negx_real; | ||
48 | intptr_t i, j; | ||
49 | |||
50 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
51 | - neg_real <<= 15; | ||
52 | - neg_imag <<= 15; | ||
53 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
54 | + negx_real = (negf_real & ~fpcr_ah) << 15; | ||
55 | + negx_imag = (negf_imag & ~fpcr_ah) << 15; | ||
56 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
57 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
58 | |||
59 | for (i = 0; i < elements; i += eltspersegment) { | ||
60 | float16 mr = m[H2(i + 2 * index + 0)]; | ||
61 | float16 mi = m[H2(i + 2 * index + 1)]; | ||
62 | - float16 e1 = neg_real ^ (flip ? mi : mr); | ||
63 | - float16 e3 = neg_imag ^ (flip ? mr : mi); | ||
64 | + float16 e1 = negx_real ^ (flip ? mi : mr); | ||
65 | + float16 e3 = negx_imag ^ (flip ? mr : mi); | ||
66 | |||
67 | for (j = i; j < i + eltspersegment; j += 2) { | ||
68 | float16 e2 = n[H2(j + flip)]; | ||
69 | float16 e4 = e2; | ||
70 | |||
71 | - d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], 0, fpst); | ||
72 | - d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], 0, fpst); | ||
73 | + d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], negf_real, fpst); | ||
74 | + d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], negf_imag, fpst); | ||
75 | } | ||
76 | } | ||
77 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
78 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, void *va, | ||
79 | uintptr_t opr_sz = simd_oprsz(desc); | ||
80 | float32 *d = vd, *n = vn, *m = vm, *a = va; | ||
81 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
82 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
83 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
84 | intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2); | ||
85 | - uint32_t neg_real = flip ^ neg_imag; | ||
86 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1); | ||
87 | + uint32_t negf_real = flip ^ negf_imag; | ||
88 | intptr_t elements = opr_sz / sizeof(float32); | ||
89 | intptr_t eltspersegment = MIN(16 / sizeof(float32), elements); | ||
90 | + float32 negx_imag, negx_real; | ||
91 | intptr_t i, j; | ||
92 | |||
93 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
94 | - neg_real <<= 31; | ||
95 | - neg_imag <<= 31; | ||
96 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
97 | + negx_real = (negf_real & ~fpcr_ah) << 31; | ||
98 | + negx_imag = (negf_imag & ~fpcr_ah) << 31; | ||
99 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
100 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
101 | |||
102 | for (i = 0; i < elements; i += eltspersegment) { | ||
103 | float32 mr = m[H4(i + 2 * index + 0)]; | ||
104 | float32 mi = m[H4(i + 2 * index + 1)]; | ||
105 | - float32 e1 = neg_real ^ (flip ? mi : mr); | ||
106 | - float32 e3 = neg_imag ^ (flip ? mr : mi); | ||
107 | + float32 e1 = negx_real ^ (flip ? mi : mr); | ||
108 | + float32 e3 = negx_imag ^ (flip ? mr : mi); | ||
109 | |||
110 | for (j = i; j < i + eltspersegment; j += 2) { | ||
111 | float32 e2 = n[H4(j + flip)]; | ||
112 | float32 e4 = e2; | ||
113 | |||
114 | - d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], 0, fpst); | ||
115 | - d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], 0, fpst); | ||
116 | + d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], negf_real, fpst); | ||
117 | + d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], negf_imag, fpst); | ||
118 | } | ||
119 | } | ||
120 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
42 | -- | 121 | -- |
43 | 2.34.1 | 122 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | hw/cpu/a9mpcore.c doesn't require "cpu.h" anymore. | 3 | The negation step in SVE FCMLA mustn't negate a NaN when FPCR.AH is |
4 | By removing it, the unit become target agnostic: | 4 | set. Use the same approach as we did for A64 FCMLA of passing in |
5 | we can build it once. Update meson. | 5 | FPCR.AH and using it to select whether to negate by XOR or by the |
6 | muladd negate_product flag. | ||
6 | 7 | ||
7 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 9 | Message-id: 20250129013857.135256-28-richard.henderson@linaro.org |
9 | Message-id: 20240118200643.29037-13-philmd@linaro.org | 10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
11 | --- | 12 | --- |
12 | hw/cpu/a9mpcore.c | 2 +- | 13 | target/arm/tcg/sve_helper.c | 69 +++++++++++++++++++++------------- |
13 | hw/cpu/meson.build | 2 +- | 14 | target/arm/tcg/translate-sve.c | 2 +- |
14 | 2 files changed, 2 insertions(+), 2 deletions(-) | 15 | 2 files changed, 43 insertions(+), 28 deletions(-) |
15 | 16 | ||
16 | diff --git a/hw/cpu/a9mpcore.c b/hw/cpu/a9mpcore.c | 17 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c |
17 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/hw/cpu/a9mpcore.c | 19 | --- a/target/arm/tcg/sve_helper.c |
19 | +++ b/hw/cpu/a9mpcore.c | 20 | +++ b/target/arm/tcg/sve_helper.c |
20 | @@ -XXX,XX +XXX,XX @@ | 21 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, |
21 | #include "hw/irq.h" | 22 | void *vg, float_status *status, uint32_t desc) |
22 | #include "hw/qdev-properties.h" | 23 | { |
23 | #include "hw/core/cpu.h" | 24 | intptr_t j, i = simd_oprsz(desc); |
24 | -#include "cpu.h" | 25 | - unsigned rot = simd_data(desc); |
25 | +#include "target/arm/cpu-qom.h" | 26 | - bool flip = rot & 1; |
26 | 27 | - float16 neg_imag, neg_real; | |
27 | #define A9_GIC_NUM_PRIORITY_BITS 5 | 28 | + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); |
28 | 29 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | |
29 | diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build | 30 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
31 | + uint32_t negf_real = flip ^ negf_imag; | ||
32 | + float16 negx_imag, negx_real; | ||
33 | uint64_t *g = vg; | ||
34 | |||
35 | - neg_imag = float16_set_sign(0, (rot & 2) != 0); | ||
36 | - neg_real = float16_set_sign(0, rot == 1 || rot == 2); | ||
37 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
38 | + negx_real = (negf_real & ~fpcr_ah) << 15; | ||
39 | + negx_imag = (negf_imag & ~fpcr_ah) << 15; | ||
40 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
41 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
42 | |||
43 | do { | ||
44 | uint64_t pg = g[(i - 1) >> 6]; | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
46 | mi = *(float16 *)(vm + H1_2(j)); | ||
47 | |||
48 | e2 = (flip ? ni : nr); | ||
49 | - e1 = (flip ? mi : mr) ^ neg_real; | ||
50 | + e1 = (flip ? mi : mr) ^ negx_real; | ||
51 | e4 = e2; | ||
52 | - e3 = (flip ? mr : mi) ^ neg_imag; | ||
53 | + e3 = (flip ? mr : mi) ^ negx_imag; | ||
54 | |||
55 | if (likely((pg >> (i & 63)) & 1)) { | ||
56 | d = *(float16 *)(va + H1_2(i)); | ||
57 | - d = float16_muladd(e2, e1, d, 0, status); | ||
58 | + d = float16_muladd(e2, e1, d, negf_real, status); | ||
59 | *(float16 *)(vd + H1_2(i)) = d; | ||
60 | } | ||
61 | if (likely((pg >> (j & 63)) & 1)) { | ||
62 | d = *(float16 *)(va + H1_2(j)); | ||
63 | - d = float16_muladd(e4, e3, d, 0, status); | ||
64 | + d = float16_muladd(e4, e3, d, negf_imag, status); | ||
65 | *(float16 *)(vd + H1_2(j)) = d; | ||
66 | } | ||
67 | } while (i & 63); | ||
68 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
69 | void *vg, float_status *status, uint32_t desc) | ||
70 | { | ||
71 | intptr_t j, i = simd_oprsz(desc); | ||
72 | - unsigned rot = simd_data(desc); | ||
73 | - bool flip = rot & 1; | ||
74 | - float32 neg_imag, neg_real; | ||
75 | + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
76 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
77 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
78 | + uint32_t negf_real = flip ^ negf_imag; | ||
79 | + float32 negx_imag, negx_real; | ||
80 | uint64_t *g = vg; | ||
81 | |||
82 | - neg_imag = float32_set_sign(0, (rot & 2) != 0); | ||
83 | - neg_real = float32_set_sign(0, rot == 1 || rot == 2); | ||
84 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
85 | + negx_real = (negf_real & ~fpcr_ah) << 31; | ||
86 | + negx_imag = (negf_imag & ~fpcr_ah) << 31; | ||
87 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
88 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
89 | |||
90 | do { | ||
91 | uint64_t pg = g[(i - 1) >> 6]; | ||
92 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
93 | mi = *(float32 *)(vm + H1_2(j)); | ||
94 | |||
95 | e2 = (flip ? ni : nr); | ||
96 | - e1 = (flip ? mi : mr) ^ neg_real; | ||
97 | + e1 = (flip ? mi : mr) ^ negx_real; | ||
98 | e4 = e2; | ||
99 | - e3 = (flip ? mr : mi) ^ neg_imag; | ||
100 | + e3 = (flip ? mr : mi) ^ negx_imag; | ||
101 | |||
102 | if (likely((pg >> (i & 63)) & 1)) { | ||
103 | d = *(float32 *)(va + H1_2(i)); | ||
104 | - d = float32_muladd(e2, e1, d, 0, status); | ||
105 | + d = float32_muladd(e2, e1, d, negf_real, status); | ||
106 | *(float32 *)(vd + H1_2(i)) = d; | ||
107 | } | ||
108 | if (likely((pg >> (j & 63)) & 1)) { | ||
109 | d = *(float32 *)(va + H1_2(j)); | ||
110 | - d = float32_muladd(e4, e3, d, 0, status); | ||
111 | + d = float32_muladd(e4, e3, d, negf_imag, status); | ||
112 | *(float32 *)(vd + H1_2(j)) = d; | ||
113 | } | ||
114 | } while (i & 63); | ||
115 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
116 | void *vg, float_status *status, uint32_t desc) | ||
117 | { | ||
118 | intptr_t j, i = simd_oprsz(desc); | ||
119 | - unsigned rot = simd_data(desc); | ||
120 | - bool flip = rot & 1; | ||
121 | - float64 neg_imag, neg_real; | ||
122 | + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
123 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
124 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
125 | + uint32_t negf_real = flip ^ negf_imag; | ||
126 | + float64 negx_imag, negx_real; | ||
127 | uint64_t *g = vg; | ||
128 | |||
129 | - neg_imag = float64_set_sign(0, (rot & 2) != 0); | ||
130 | - neg_real = float64_set_sign(0, rot == 1 || rot == 2); | ||
131 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
132 | + negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63; | ||
133 | + negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63; | ||
134 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
135 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
136 | |||
137 | do { | ||
138 | uint64_t pg = g[(i - 1) >> 6]; | ||
139 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
140 | mi = *(float64 *)(vm + H1_2(j)); | ||
141 | |||
142 | e2 = (flip ? ni : nr); | ||
143 | - e1 = (flip ? mi : mr) ^ neg_real; | ||
144 | + e1 = (flip ? mi : mr) ^ negx_real; | ||
145 | e4 = e2; | ||
146 | - e3 = (flip ? mr : mi) ^ neg_imag; | ||
147 | + e3 = (flip ? mr : mi) ^ negx_imag; | ||
148 | |||
149 | if (likely((pg >> (i & 63)) & 1)) { | ||
150 | d = *(float64 *)(va + H1_2(i)); | ||
151 | - d = float64_muladd(e2, e1, d, 0, status); | ||
152 | + d = float64_muladd(e2, e1, d, negf_real, status); | ||
153 | *(float64 *)(vd + H1_2(i)) = d; | ||
154 | } | ||
155 | if (likely((pg >> (j & 63)) & 1)) { | ||
156 | d = *(float64 *)(va + H1_2(j)); | ||
157 | - d = float64_muladd(e4, e3, d, 0, status); | ||
158 | + d = float64_muladd(e4, e3, d, negf_imag, status); | ||
159 | *(float64 *)(vd + H1_2(j)) = d; | ||
160 | } | ||
161 | } while (i & 63); | ||
162 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
30 | index XXXXXXX..XXXXXXX 100644 | 163 | index XXXXXXX..XXXXXXX 100644 |
31 | --- a/hw/cpu/meson.build | 164 | --- a/target/arm/tcg/translate-sve.c |
32 | +++ b/hw/cpu/meson.build | 165 | +++ b/target/arm/tcg/translate-sve.c |
33 | @@ -XXX,XX +XXX,XX @@ system_ss.add(files('core.c', 'cluster.c')) | 166 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { |
34 | 167 | gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, | |
35 | system_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) | 168 | }; |
36 | system_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) | 169 | TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], |
37 | -specific_ss.add(when: 'CONFIG_A9MPCORE', if_true: files('a9mpcore.c')) | 170 | - a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, |
38 | +system_ss.add(when: 'CONFIG_A9MPCORE', if_true: files('a9mpcore.c')) | 171 | + a->rd, a->rn, a->rm, a->ra, a->pg, a->rot | (s->fpcr_ah << 2), |
39 | specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) | 172 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) |
173 | |||
174 | static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { | ||
40 | -- | 175 | -- |
41 | 2.34.1 | 176 | 2.34.1 |
42 | |||
43 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Various files in hw/arm/ don't require "cpu.h" anymore. | 3 | Handle FPCR.AH's requirement to not negate the sign of a NaN |
4 | Except virt-acpi-build.c, all of them don't require any | 4 | in FMLSL by element and vector, using the usual trick of |
5 | ARM specific knowledge anymore and can be build once as | 5 | negating by XOR when AH=0 and by muladd flags when AH=1. |
6 | target agnostic units. Update meson accordingly. | ||
7 | 6 | ||
8 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 7 | Since we have the CPUARMState* in the helper anyway, we can |
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 8 | look directly at env->vfp.fpcr and don't need toa pass in the |
10 | Message-id: 20240118200643.29037-21-philmd@linaro.org | 9 | FPCR.AH value via the SIMD data word. |
10 | |||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Message-id: 20250129013857.135256-31-richard.henderson@linaro.org | ||
13 | [PMM: commit message tweaked] | ||
14 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
12 | --- | 16 | --- |
13 | hw/arm/collie.c | 1 - | 17 | target/arm/tcg/vec_helper.c | 71 ++++++++++++++++++++++++------------- |
14 | hw/arm/gumstix.c | 1 - | 18 | 1 file changed, 46 insertions(+), 25 deletions(-) |
15 | hw/arm/integratorcp.c | 1 - | ||
16 | hw/arm/mainstone.c | 1 - | ||
17 | hw/arm/musicpal.c | 1 - | ||
18 | hw/arm/omap2.c | 1 - | ||
19 | hw/arm/omap_sx1.c | 1 - | ||
20 | hw/arm/palm.c | 1 - | ||
21 | hw/arm/spitz.c | 1 - | ||
22 | hw/arm/strongarm.c | 1 - | ||
23 | hw/arm/versatilepb.c | 1 - | ||
24 | hw/arm/vexpress.c | 1 - | ||
25 | hw/arm/virt-acpi-build.c | 1 - | ||
26 | hw/arm/xilinx_zynq.c | 1 - | ||
27 | hw/arm/xlnx-versal-virt.c | 1 - | ||
28 | hw/arm/z2.c | 1 - | ||
29 | hw/arm/meson.build | 23 ++++++++++++----------- | ||
30 | 17 files changed, 12 insertions(+), 27 deletions(-) | ||
31 | 19 | ||
32 | diff --git a/hw/arm/collie.c b/hw/arm/collie.c | 20 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
33 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
34 | --- a/hw/arm/collie.c | 22 | --- a/target/arm/tcg/vec_helper.c |
35 | +++ b/hw/arm/collie.c | 23 | +++ b/target/arm/tcg/vec_helper.c |
36 | @@ -XXX,XX +XXX,XX @@ | 24 | @@ -XXX,XX +XXX,XX @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2) |
37 | #include "hw/arm/boot.h" | ||
38 | #include "hw/block/flash.h" | ||
39 | #include "exec/address-spaces.h" | ||
40 | -#include "cpu.h" | ||
41 | #include "qom/object.h" | ||
42 | #include "qemu/error-report.h" | ||
43 | |||
44 | diff --git a/hw/arm/gumstix.c b/hw/arm/gumstix.c | ||
45 | index XXXXXXX..XXXXXXX 100644 | ||
46 | --- a/hw/arm/gumstix.c | ||
47 | +++ b/hw/arm/gumstix.c | ||
48 | @@ -XXX,XX +XXX,XX @@ | ||
49 | #include "hw/boards.h" | ||
50 | #include "exec/address-spaces.h" | ||
51 | #include "sysemu/qtest.h" | ||
52 | -#include "cpu.h" | ||
53 | |||
54 | #define CONNEX_FLASH_SIZE (16 * MiB) | ||
55 | #define CONNEX_RAM_SIZE (64 * MiB) | ||
56 | diff --git a/hw/arm/integratorcp.c b/hw/arm/integratorcp.c | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/hw/arm/integratorcp.c | ||
59 | +++ b/hw/arm/integratorcp.c | ||
60 | @@ -XXX,XX +XXX,XX @@ | ||
61 | |||
62 | #include "qemu/osdep.h" | ||
63 | #include "qapi/error.h" | ||
64 | -#include "cpu.h" | ||
65 | #include "hw/sysbus.h" | ||
66 | #include "migration/vmstate.h" | ||
67 | #include "hw/boards.h" | ||
68 | diff --git a/hw/arm/mainstone.c b/hw/arm/mainstone.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/hw/arm/mainstone.c | ||
71 | +++ b/hw/arm/mainstone.c | ||
72 | @@ -XXX,XX +XXX,XX @@ | ||
73 | #include "hw/block/flash.h" | ||
74 | #include "hw/sysbus.h" | ||
75 | #include "exec/address-spaces.h" | ||
76 | -#include "cpu.h" | ||
77 | |||
78 | /* Device addresses */ | ||
79 | #define MST_FPGA_PHYS 0x08000000 | ||
80 | diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c | ||
81 | index XXXXXXX..XXXXXXX 100644 | ||
82 | --- a/hw/arm/musicpal.c | ||
83 | +++ b/hw/arm/musicpal.c | ||
84 | @@ -XXX,XX +XXX,XX @@ | ||
85 | #include "qemu/osdep.h" | ||
86 | #include "qemu/units.h" | ||
87 | #include "qapi/error.h" | ||
88 | -#include "cpu.h" | ||
89 | #include "hw/sysbus.h" | ||
90 | #include "migration/vmstate.h" | ||
91 | #include "hw/arm/boot.h" | ||
92 | diff --git a/hw/arm/omap2.c b/hw/arm/omap2.c | ||
93 | index XXXXXXX..XXXXXXX 100644 | ||
94 | --- a/hw/arm/omap2.c | ||
95 | +++ b/hw/arm/omap2.c | ||
96 | @@ -XXX,XX +XXX,XX @@ | ||
97 | #include "qemu/osdep.h" | ||
98 | #include "qemu/error-report.h" | ||
99 | #include "qapi/error.h" | ||
100 | -#include "cpu.h" | ||
101 | #include "exec/address-spaces.h" | ||
102 | #include "sysemu/blockdev.h" | ||
103 | #include "sysemu/qtest.h" | ||
104 | diff --git a/hw/arm/omap_sx1.c b/hw/arm/omap_sx1.c | ||
105 | index XXXXXXX..XXXXXXX 100644 | ||
106 | --- a/hw/arm/omap_sx1.c | ||
107 | +++ b/hw/arm/omap_sx1.c | ||
108 | @@ -XXX,XX +XXX,XX @@ | ||
109 | #include "hw/block/flash.h" | ||
110 | #include "sysemu/qtest.h" | ||
111 | #include "exec/address-spaces.h" | ||
112 | -#include "cpu.h" | ||
113 | #include "qemu/cutils.h" | ||
114 | #include "qemu/error-report.h" | ||
115 | |||
116 | diff --git a/hw/arm/palm.c b/hw/arm/palm.c | ||
117 | index XXXXXXX..XXXXXXX 100644 | ||
118 | --- a/hw/arm/palm.c | ||
119 | +++ b/hw/arm/palm.c | ||
120 | @@ -XXX,XX +XXX,XX @@ | ||
121 | #include "hw/input/tsc2xxx.h" | ||
122 | #include "hw/irq.h" | ||
123 | #include "hw/loader.h" | ||
124 | -#include "cpu.h" | ||
125 | #include "qemu/cutils.h" | ||
126 | #include "qom/object.h" | ||
127 | #include "qemu/error-report.h" | ||
128 | diff --git a/hw/arm/spitz.c b/hw/arm/spitz.c | ||
129 | index XXXXXXX..XXXXXXX 100644 | ||
130 | --- a/hw/arm/spitz.c | ||
131 | +++ b/hw/arm/spitz.c | ||
132 | @@ -XXX,XX +XXX,XX @@ | ||
133 | #include "hw/adc/max111x.h" | ||
134 | #include "migration/vmstate.h" | ||
135 | #include "exec/address-spaces.h" | ||
136 | -#include "cpu.h" | ||
137 | #include "qom/object.h" | ||
138 | #include "audio/audio.h" | ||
139 | |||
140 | diff --git a/hw/arm/strongarm.c b/hw/arm/strongarm.c | ||
141 | index XXXXXXX..XXXXXXX 100644 | ||
142 | --- a/hw/arm/strongarm.c | ||
143 | +++ b/hw/arm/strongarm.c | ||
144 | @@ -XXX,XX +XXX,XX @@ | ||
145 | */ | 25 | */ |
146 | 26 | ||
147 | #include "qemu/osdep.h" | 27 | static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, |
148 | -#include "cpu.h" | 28 | - uint32_t desc, bool fz16) |
149 | #include "hw/irq.h" | 29 | + uint64_t negx, int negf, uint32_t desc, bool fz16) |
150 | #include "hw/qdev-properties.h" | 30 | { |
151 | #include "hw/qdev-properties-system.h" | 31 | intptr_t i, oprsz = simd_oprsz(desc); |
152 | diff --git a/hw/arm/versatilepb.c b/hw/arm/versatilepb.c | 32 | - int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
153 | index XXXXXXX..XXXXXXX 100644 | 33 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
154 | --- a/hw/arm/versatilepb.c | 34 | int is_q = oprsz == 16; |
155 | +++ b/hw/arm/versatilepb.c | 35 | uint64_t n_4, m_4; |
156 | @@ -XXX,XX +XXX,XX @@ | 36 | |
157 | 37 | - /* Pre-load all of the f16 data, avoiding overlap issues. */ | |
158 | #include "qemu/osdep.h" | 38 | - n_4 = load4_f16(vn, is_q, is_2); |
159 | #include "qapi/error.h" | 39 | + /* |
160 | -#include "cpu.h" | 40 | + * Pre-load all of the f16 data, avoiding overlap issues. |
161 | #include "hw/sysbus.h" | 41 | + * Negate all inputs for AH=0 FMLSL at once. |
162 | #include "migration/vmstate.h" | 42 | + */ |
163 | #include "hw/arm/boot.h" | 43 | + n_4 = load4_f16(vn, is_q, is_2) ^ negx; |
164 | diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c | 44 | m_4 = load4_f16(vm, is_q, is_2); |
165 | index XXXXXXX..XXXXXXX 100644 | 45 | |
166 | --- a/hw/arm/vexpress.c | 46 | - /* Negate all inputs for FMLSL at once. */ |
167 | +++ b/hw/arm/vexpress.c | 47 | - if (is_s) { |
168 | @@ -XXX,XX +XXX,XX @@ | 48 | - n_4 ^= 0x8000800080008000ull; |
169 | #include "qemu/osdep.h" | 49 | - } |
170 | #include "qapi/error.h" | 50 | - |
171 | #include "qemu/datadir.h" | 51 | for (i = 0; i < oprsz / 4; i++) { |
172 | -#include "cpu.h" | 52 | float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); |
173 | #include "hw/sysbus.h" | 53 | float32 m_1 = float16_to_float32_by_bits(m_4 >> (i * 16), fz16); |
174 | #include "hw/arm/boot.h" | 54 | - d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst); |
175 | #include "hw/arm/primecell.h" | 55 | + d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst); |
176 | diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c | 56 | } |
177 | index XXXXXXX..XXXXXXX 100644 | 57 | clear_tail(d, oprsz, simd_maxsz(desc)); |
178 | --- a/hw/arm/virt-acpi-build.c | 58 | } |
179 | +++ b/hw/arm/virt-acpi-build.c | 59 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, |
180 | @@ -XXX,XX +XXX,XX @@ | 60 | void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, |
181 | #include "qemu/error-report.h" | 61 | CPUARMState *env, uint32_t desc) |
182 | #include "trace.h" | 62 | { |
183 | #include "hw/core/cpu.h" | 63 | - do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, desc, |
184 | -#include "target/arm/cpu.h" | 64 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
185 | #include "hw/acpi/acpi-defs.h" | 65 | + uint64_t negx = is_s ? 0x8000800080008000ull : 0; |
186 | #include "hw/acpi/acpi.h" | 66 | + |
187 | #include "hw/nvram/fw_cfg_acpi.h" | 67 | + do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, |
188 | diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c | 68 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); |
189 | index XXXXXXX..XXXXXXX 100644 | 69 | } |
190 | --- a/hw/arm/xilinx_zynq.c | 70 | |
191 | +++ b/hw/arm/xilinx_zynq.c | 71 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
192 | @@ -XXX,XX +XXX,XX @@ | 72 | CPUARMState *env, uint32_t desc) |
193 | #include "qemu/osdep.h" | 73 | { |
194 | #include "qemu/units.h" | 74 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, desc, |
195 | #include "qapi/error.h" | 75 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
196 | -#include "cpu.h" | 76 | + uint64_t negx = 0; |
197 | #include "hw/sysbus.h" | 77 | + int negf = 0; |
198 | #include "hw/arm/boot.h" | 78 | + |
199 | #include "net/net.h" | 79 | + if (is_s) { |
200 | diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c | 80 | + if (env->vfp.fpcr & FPCR_AH) { |
201 | index XXXXXXX..XXXXXXX 100644 | 81 | + negf = float_muladd_negate_product; |
202 | --- a/hw/arm/xlnx-versal-virt.c | 82 | + } else { |
203 | +++ b/hw/arm/xlnx-versal-virt.c | 83 | + negx = 0x8000800080008000ull; |
204 | @@ -XXX,XX +XXX,XX @@ | 84 | + } |
205 | #include "hw/boards.h" | 85 | + } |
206 | #include "hw/sysbus.h" | 86 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, |
207 | #include "hw/arm/fdt.h" | 87 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); |
208 | -#include "cpu.h" | 88 | } |
209 | #include "hw/qdev-properties.h" | 89 | |
210 | #include "hw/arm/xlnx-versal.h" | 90 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
211 | #include "hw/arm/boot.h" | 91 | } |
212 | diff --git a/hw/arm/z2.c b/hw/arm/z2.c | 92 | |
213 | index XXXXXXX..XXXXXXX 100644 | 93 | static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, |
214 | --- a/hw/arm/z2.c | 94 | - uint32_t desc, bool fz16) |
215 | +++ b/hw/arm/z2.c | 95 | + uint64_t negx, int negf, uint32_t desc, bool fz16) |
216 | @@ -XXX,XX +XXX,XX @@ | 96 | { |
217 | #include "hw/audio/wm8750.h" | 97 | intptr_t i, oprsz = simd_oprsz(desc); |
218 | #include "audio/audio.h" | 98 | - int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
219 | #include "exec/address-spaces.h" | 99 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
220 | -#include "cpu.h" | 100 | int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3); |
221 | #include "qom/object.h" | 101 | int is_q = oprsz == 16; |
222 | #include "qapi/error.h" | 102 | uint64_t n_4; |
223 | 103 | float32 m_1; | |
224 | diff --git a/hw/arm/meson.build b/hw/arm/meson.build | 104 | |
225 | index XXXXXXX..XXXXXXX 100644 | 105 | - /* Pre-load all of the f16 data, avoiding overlap issues. */ |
226 | --- a/hw/arm/meson.build | 106 | - n_4 = load4_f16(vn, is_q, is_2); |
227 | +++ b/hw/arm/meson.build | 107 | - |
228 | @@ -XXX,XX +XXX,XX @@ arm_ss.add(when: 'CONFIG_INTEGRATOR', if_true: files('integratorcp.c')) | 108 | - /* Negate all inputs for FMLSL at once. */ |
229 | arm_ss.add(when: 'CONFIG_MAINSTONE', if_true: files('mainstone.c')) | 109 | - if (is_s) { |
230 | arm_ss.add(when: 'CONFIG_MICROBIT', if_true: files('microbit.c')) | 110 | - n_4 ^= 0x8000800080008000ull; |
231 | arm_ss.add(when: 'CONFIG_MUSICPAL', if_true: files('musicpal.c')) | 111 | - } |
232 | -arm_ss.add(when: 'CONFIG_NETDUINO2', if_true: files('netduino2.c')) | 112 | - |
233 | arm_ss.add(when: 'CONFIG_NETDUINOPLUS2', if_true: files('netduinoplus2.c')) | 113 | + /* |
234 | arm_ss.add(when: 'CONFIG_OLIMEX_STM32_H405', if_true: files('olimex-stm32-h405.c')) | 114 | + * Pre-load all of the f16 data, avoiding overlap issues. |
235 | arm_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx.c', 'npcm7xx_boards.c')) | 115 | + * Negate all inputs for AH=0 FMLSL at once. |
236 | arm_ss.add(when: 'CONFIG_NSERIES', if_true: files('nseries.c')) | 116 | + */ |
237 | -arm_ss.add(when: 'CONFIG_SX1', if_true: files('omap_sx1.c')) | 117 | + n_4 = load4_f16(vn, is_q, is_2) ^ negx; |
238 | -arm_ss.add(when: 'CONFIG_CHEETAH', if_true: files('palm.c')) | 118 | m_1 = float16_to_float32_by_bits(((float16 *)vm)[H2(index)], fz16); |
239 | -arm_ss.add(when: 'CONFIG_GUMSTIX', if_true: files('gumstix.c')) | 119 | |
240 | -arm_ss.add(when: 'CONFIG_SPITZ', if_true: files('spitz.c')) | 120 | for (i = 0; i < oprsz / 4; i++) { |
241 | -arm_ss.add(when: 'CONFIG_Z2', if_true: files('z2.c')) | 121 | float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); |
242 | arm_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview.c')) | 122 | - d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst); |
243 | arm_ss.add(when: 'CONFIG_SBSA_REF', if_true: files('sbsa-ref.c')) | 123 | + d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst); |
244 | arm_ss.add(when: 'CONFIG_STELLARIS', if_true: files('stellaris.c')) | 124 | } |
245 | arm_ss.add(when: 'CONFIG_STM32VLDISCOVERY', if_true: files('stm32vldiscovery.c')) | 125 | clear_tail(d, oprsz, simd_maxsz(desc)); |
246 | -arm_ss.add(when: 'CONFIG_COLLIE', if_true: files('collie.c')) | 126 | } |
247 | -arm_ss.add(when: 'CONFIG_VERSATILE', if_true: files('versatilepb.c')) | 127 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, |
248 | -arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) | 128 | void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, |
249 | arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c')) | 129 | CPUARMState *env, uint32_t desc) |
250 | arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c')) | 130 | { |
251 | 131 | - do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, desc, | |
252 | @@ -XXX,XX +XXX,XX @@ arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c')) | 132 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
253 | arm_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210.c')) | 133 | + uint64_t negx = is_s ? 0x8000800080008000ull : 0; |
254 | arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c')) | 134 | + |
255 | arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c')) | 135 | + do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, |
256 | -arm_ss.add(when: 'CONFIG_OMAP', if_true: files('omap1.c', 'omap2.c')) | 136 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); |
257 | -arm_ss.add(when: 'CONFIG_STRONGARM', if_true: files('strongarm.c')) | 137 | } |
258 | +arm_ss.add(when: 'CONFIG_OMAP', if_true: files('omap1.c')) | 138 | |
259 | arm_ss.add(when: 'CONFIG_ALLWINNER_A10', if_true: files('allwinner-a10.c', 'cubieboard.c')) | 139 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
260 | arm_ss.add(when: 'CONFIG_ALLWINNER_H3', if_true: files('allwinner-h3.c', 'orangepi.c')) | 140 | CPUARMState *env, uint32_t desc) |
261 | arm_ss.add(when: 'CONFIG_ALLWINNER_R40', if_true: files('allwinner-r40.c', 'bananapi_m2u.c')) | 141 | { |
262 | @@ -XXX,XX +XXX,XX @@ arm_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_soc.c')) | 142 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, desc, |
263 | arm_ss.add(when: 'CONFIG_XEN', if_true: files('xen_arm.c')) | 143 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
264 | 144 | + uint64_t negx = 0; | |
265 | system_ss.add(when: 'CONFIG_ARM_SMMUV3', if_true: files('smmu-common.c')) | 145 | + int negf = 0; |
266 | +system_ss.add(when: 'CONFIG_CHEETAH', if_true: files('palm.c')) | 146 | + |
267 | +system_ss.add(when: 'CONFIG_COLLIE', if_true: files('collie.c')) | 147 | + if (is_s) { |
268 | system_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4_boards.c')) | 148 | + if (env->vfp.fpcr & FPCR_AH) { |
269 | +system_ss.add(when: 'CONFIG_GUMSTIX', if_true: files('gumstix.c')) | 149 | + negf = float_muladd_negate_product; |
270 | +system_ss.add(when: 'CONFIG_NETDUINO2', if_true: files('netduino2.c')) | 150 | + } else { |
271 | +system_ss.add(when: 'CONFIG_OMAP', if_true: files('omap2.c')) | 151 | + negx = 0x8000800080008000ull; |
272 | system_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2835_peripherals.c')) | 152 | + } |
273 | +system_ss.add(when: 'CONFIG_SPITZ', if_true: files('spitz.c')) | 153 | + } |
274 | +system_ss.add(when: 'CONFIG_STRONGARM', if_true: files('strongarm.c')) | 154 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, |
275 | +system_ss.add(when: 'CONFIG_SX1', if_true: files('omap_sx1.c')) | 155 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); |
276 | system_ss.add(when: 'CONFIG_TOSA', if_true: files('tosa.c')) | 156 | } |
277 | +system_ss.add(when: 'CONFIG_VERSATILE', if_true: files('versatilepb.c')) | 157 | |
278 | +system_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) | ||
279 | +system_ss.add(when: 'CONFIG_Z2', if_true: files('z2.c')) | ||
280 | |||
281 | hw_arch += {'arm': arm_ss} | ||
282 | -- | 158 | -- |
283 | 2.34.1 | 159 | 2.34.1 |
284 | |||
285 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | include/hw/arm/xlnx-versal.h uses the ARMCPU structure which | 3 | Handle FPCR.AH's requirement to not negate the sign of a NaN in SVE |
4 | is defined in the "target/arm/cpu.h" header. Include it in | 4 | FMLSL (indexed), using the usual trick of negating by XOR when AH=0 |
5 | order to avoid when refactoring unrelated headers: | 5 | and by muladd flags when AH=1. |
6 | 6 | ||
7 | In file included from hw/arm/xlnx-versal-virt.c:20: | 7 | Since we have the CPUARMState* in the helper anyway, we can |
8 | include/hw/arm/xlnx-versal.h:62:23: error: array has incomplete element type 'ARMCPU' (aka 'struct ArchCPU') | 8 | look directly at env->vfp.fpcr and don't need toa pass in the |
9 | ARMCPU cpu[XLNX_VERSAL_NR_ACPUS]; | 9 | FPCR.AH value via the SIMD data word. |
10 | ^ | ||
11 | 10 | ||
12 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 12 | Message-id: 20250129013857.135256-32-richard.henderson@linaro.org |
14 | Message-id: 20240118200643.29037-5-philmd@linaro.org | 13 | [PMM: commit message tweaked] |
14 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
16 | --- | 16 | --- |
17 | include/hw/arm/xlnx-versal.h | 1 + | 17 | target/arm/tcg/vec_helper.c | 15 ++++++++++++--- |
18 | 1 file changed, 1 insertion(+) | 18 | 1 file changed, 12 insertions(+), 3 deletions(-) |
19 | 19 | ||
20 | diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h | 20 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
21 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/include/hw/arm/xlnx-versal.h | 22 | --- a/target/arm/tcg/vec_helper.c |
23 | +++ b/include/hw/arm/xlnx-versal.h | 23 | +++ b/target/arm/tcg/vec_helper.c |
24 | @@ -XXX,XX +XXX,XX @@ | 24 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, |
25 | #include "hw/net/xlnx-versal-canfd.h" | 25 | CPUARMState *env, uint32_t desc) |
26 | #include "hw/misc/xlnx-versal-cfu.h" | 26 | { |
27 | #include "hw/misc/xlnx-versal-cframe-reg.h" | 27 | intptr_t i, j, oprsz = simd_oprsz(desc); |
28 | +#include "target/arm/cpu.h" | 28 | - uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; |
29 | 29 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | |
30 | #define TYPE_XLNX_VERSAL "xlnx-versal" | 30 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
31 | OBJECT_DECLARE_SIMPLE_TYPE(Versal, XLNX_VERSAL) | 31 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); |
32 | float_status *status = &env->vfp.fp_status_a64; | ||
33 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); | ||
34 | + int negx = 0, negf = 0; | ||
35 | + | ||
36 | + if (is_s) { | ||
37 | + if (env->vfp.fpcr & FPCR_AH) { | ||
38 | + negf = float_muladd_negate_product; | ||
39 | + } else { | ||
40 | + negx = 0x8000; | ||
41 | + } | ||
42 | + } | ||
43 | |||
44 | for (i = 0; i < oprsz; i += 16) { | ||
45 | float16 mm_16 = *(float16 *)(vm + i + idx); | ||
46 | float32 mm = float16_to_float32_by_bits(mm_16, fz16); | ||
47 | |||
48 | for (j = 0; j < 16; j += sizeof(float32)) { | ||
49 | - float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negn; | ||
50 | + float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negx; | ||
51 | float32 nn = float16_to_float32_by_bits(nn_16, fz16); | ||
52 | float32 aa = *(float32 *)(va + H1_4(i + j)); | ||
53 | |||
54 | *(float32 *)(vd + H1_4(i + j)) = | ||
55 | - float32_muladd(nn, mm, aa, 0, status); | ||
56 | + float32_muladd(nn, mm, aa, negf, status); | ||
57 | } | ||
58 | } | ||
59 | } | ||
32 | -- | 60 | -- |
33 | 2.34.1 | 61 | 2.34.1 |
34 | |||
35 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Now than we can access the M-profile bank index | 3 | Handle FPCR.AH's requirement to not negate the sign of a NaN in SVE |
4 | definitions from the target-agnostic "cpu-qom.h" | 4 | FMLSL (indexed), using the usual trick of negating by XOR when AH=0 |
5 | header, we don't need the huge "cpu.h" anymore | 5 | and by muladd flags when AH=1. |
6 | (except in hw/arm/armv7m.c). Reduce its inclusion | ||
7 | to the source unit. | ||
8 | 6 | ||
9 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 7 | Since we have the CPUARMState* in the helper anyway, we can |
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 8 | look directly at env->vfp.fpcr and don't need toa pass in the |
11 | Message-id: 20240118200643.29037-17-philmd@linaro.org | 9 | FPCR.AH value via the SIMD data word. |
10 | |||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Message-id: 20250129013857.135256-33-richard.henderson@linaro.org | ||
13 | [PMM: tweaked commit message] | ||
14 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
13 | --- | 16 | --- |
14 | include/hw/intc/armv7m_nvic.h | 2 +- | 17 | target/arm/tcg/vec_helper.c | 15 ++++++++++++--- |
15 | hw/arm/armv7m.c | 1 + | 18 | 1 file changed, 12 insertions(+), 3 deletions(-) |
16 | 2 files changed, 2 insertions(+), 1 deletion(-) | ||
17 | 19 | ||
18 | diff --git a/include/hw/intc/armv7m_nvic.h b/include/hw/intc/armv7m_nvic.h | 20 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
19 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/include/hw/intc/armv7m_nvic.h | 22 | --- a/target/arm/tcg/vec_helper.c |
21 | +++ b/include/hw/intc/armv7m_nvic.h | 23 | +++ b/target/arm/tcg/vec_helper.c |
22 | @@ -XXX,XX +XXX,XX @@ | 24 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
23 | #ifndef HW_ARM_ARMV7M_NVIC_H | 25 | CPUARMState *env, uint32_t desc) |
24 | #define HW_ARM_ARMV7M_NVIC_H | 26 | { |
25 | 27 | intptr_t i, oprsz = simd_oprsz(desc); | |
26 | -#include "target/arm/cpu.h" | 28 | - uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; |
27 | +#include "target/arm/cpu-qom.h" | 29 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
28 | #include "hw/sysbus.h" | 30 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
29 | #include "hw/timer/armv7m_systick.h" | 31 | float_status *status = &env->vfp.fp_status_a64; |
30 | #include "qom/object.h" | 32 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); |
31 | diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c | 33 | + int negx = 0, negf = 0; |
32 | index XXXXXXX..XXXXXXX 100644 | 34 | + |
33 | --- a/hw/arm/armv7m.c | 35 | + if (is_s) { |
34 | +++ b/hw/arm/armv7m.c | 36 | + if (env->vfp.fpcr & FPCR_AH) { |
35 | @@ -XXX,XX +XXX,XX @@ | 37 | + negf = float_muladd_negate_product; |
36 | #include "qemu/module.h" | 38 | + } else { |
37 | #include "qemu/log.h" | 39 | + negx = 0x8000; |
38 | #include "target/arm/idau.h" | 40 | + } |
39 | +#include "target/arm/cpu.h" | 41 | + } |
40 | #include "target/arm/cpu-features.h" | 42 | |
41 | #include "migration/vmstate.h" | 43 | for (i = 0; i < oprsz; i += sizeof(float32)) { |
44 | - float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negn; | ||
45 | + float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negx; | ||
46 | float16 mm_16 = *(float16 *)(vm + H1_2(i + sel)); | ||
47 | float32 nn = float16_to_float32_by_bits(nn_16, fz16); | ||
48 | float32 mm = float16_to_float32_by_bits(mm_16, fz16); | ||
49 | float32 aa = *(float32 *)(va + H1_4(i)); | ||
50 | |||
51 | - *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, 0, status); | ||
52 | + *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, negf, status); | ||
53 | } | ||
54 | } | ||
42 | 55 | ||
43 | -- | 56 | -- |
44 | 2.34.1 | 57 | 2.34.1 |
45 | |||
46 | diff view generated by jsdifflib |
1 | The const_le64() macro introduced in commit 845d80a8c7b187 turns out | 1 | Now that we have completed the handling for FPCR.{AH,FIZ,NEP}, we |
---|---|---|---|
2 | to have a bug which means that on big-endian systems the compiler | 2 | can enable FEAT_AFP for '-cpu max', and document that we support it. |
3 | complains if the argument isn't already a 64-bit type. This hasn't | ||
4 | caused a problem yet, because there are no in-tree uses, but it | ||
5 | means it's not possible for anybody to add one without it failing CI. | ||
6 | 3 | ||
7 | This example is from an attempted use of it with the argument '0', | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
8 | from the s390 CI runner's gcc: | 5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
6 | --- | ||
7 | docs/system/arm/emulation.rst | 1 + | ||
8 | target/arm/tcg/cpu64.c | 1 + | ||
9 | 2 files changed, 2 insertions(+) | ||
9 | 10 | ||
10 | ../block/blklogwrites.c: In function ‘blk_log_writes_co_do_log’: | 11 | diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst |
11 | ../include/qemu/bswap.h:148:36: error: left shift count >= width of | ||
12 | type [-Werror=shift-count-overflow] | ||
13 | 148 | ((((_x) & 0x00000000000000ffU) << 56) | \ | ||
14 | | ^~ | ||
15 | ../block/blklogwrites.c:409:27: note: in expansion of macro ‘const_le64’ | ||
16 | 409 | .nr_entries = const_le64(0), | ||
17 | | ^~~~~~~~~~ | ||
18 | ../include/qemu/bswap.h:149:36: error: left shift count >= width of | ||
19 | type [-Werror=shift-count-overflow] | ||
20 | 149 | (((_x) & 0x000000000000ff00U) << 40) | \ | ||
21 | | ^~ | ||
22 | ../block/blklogwrites.c:409:27: note: in expansion of macro ‘const_le64’ | ||
23 | 409 | .nr_entries = const_le64(0), | ||
24 | | ^~~~~~~~~~ | ||
25 | cc1: all warnings being treated as errors | ||
26 | |||
27 | Fix this by making all the constants in the macro have the ULL | ||
28 | suffix. This will cause them all to be 64-bit integers, which means | ||
29 | the result of the logical & will also be an unsigned 64-bit type, | ||
30 | even if the input to the macro is a smaller type, and so the shifts | ||
31 | will be in range. | ||
32 | |||
33 | Fixes: 845d80a8c7b187 ("qemu/bswap: Add const_le64()") | ||
34 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
35 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
36 | Reviewed-by: Thomas Huth <thuth@redhat.com> | ||
37 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
38 | Reviewed-by: Ira Weiny <ira.weiny@intel.com> | ||
39 | Message-id: 20240122173735.472951-1-peter.maydell@linaro.org | ||
40 | --- | ||
41 | include/qemu/bswap.h | 16 ++++++++-------- | ||
42 | 1 file changed, 8 insertions(+), 8 deletions(-) | ||
43 | |||
44 | diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h | ||
45 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
46 | --- a/include/qemu/bswap.h | 13 | --- a/docs/system/arm/emulation.rst |
47 | +++ b/include/qemu/bswap.h | 14 | +++ b/docs/system/arm/emulation.rst |
48 | @@ -XXX,XX +XXX,XX @@ CPU_CONVERT(le, 64, uint64_t) | 15 | @@ -XXX,XX +XXX,XX @@ the following architecture extensions: |
49 | */ | 16 | - FEAT_AA64EL3 (Support for AArch64 at EL3) |
50 | #if HOST_BIG_ENDIAN | 17 | - FEAT_AdvSIMD (Advanced SIMD Extension) |
51 | # define const_le64(_x) \ | 18 | - FEAT_AES (AESD and AESE instructions) |
52 | - ((((_x) & 0x00000000000000ffU) << 56) | \ | 19 | +- FEAT_AFP (Alternate floating-point behavior) |
53 | - (((_x) & 0x000000000000ff00U) << 40) | \ | 20 | - FEAT_Armv9_Crypto (Armv9 Cryptographic Extension) |
54 | - (((_x) & 0x0000000000ff0000U) << 24) | \ | 21 | - FEAT_ASID16 (16 bit ASID) |
55 | - (((_x) & 0x00000000ff000000U) << 8) | \ | 22 | - FEAT_BBM at level 2 (Translation table break-before-make levels) |
56 | - (((_x) & 0x000000ff00000000U) >> 8) | \ | 23 | diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c |
57 | - (((_x) & 0x0000ff0000000000U) >> 24) | \ | 24 | index XXXXXXX..XXXXXXX 100644 |
58 | - (((_x) & 0x00ff000000000000U) >> 40) | \ | 25 | --- a/target/arm/tcg/cpu64.c |
59 | - (((_x) & 0xff00000000000000U) >> 56)) | 26 | +++ b/target/arm/tcg/cpu64.c |
60 | + ((((_x) & 0x00000000000000ffULL) << 56) | \ | 27 | @@ -XXX,XX +XXX,XX @@ void aarch64_max_tcg_initfn(Object *obj) |
61 | + (((_x) & 0x000000000000ff00ULL) << 40) | \ | 28 | t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1); /* FEAT_XNX */ |
62 | + (((_x) & 0x0000000000ff0000ULL) << 24) | \ | 29 | t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 2); /* FEAT_ETS2 */ |
63 | + (((_x) & 0x00000000ff000000ULL) << 8) | \ | 30 | t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1); /* FEAT_HCX */ |
64 | + (((_x) & 0x000000ff00000000ULL) >> 8) | \ | 31 | + t = FIELD_DP64(t, ID_AA64MMFR1, AFP, 1); /* FEAT_AFP */ |
65 | + (((_x) & 0x0000ff0000000000ULL) >> 24) | \ | 32 | t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1); /* FEAT_TIDCP1 */ |
66 | + (((_x) & 0x00ff000000000000ULL) >> 40) | \ | 33 | t = FIELD_DP64(t, ID_AA64MMFR1, CMOW, 1); /* FEAT_CMOW */ |
67 | + (((_x) & 0xff00000000000000ULL) >> 56)) | 34 | cpu->isar.id_aa64mmfr1 = t; |
68 | # define const_le32(_x) \ | ||
69 | ((((_x) & 0x000000ffU) << 24) | \ | ||
70 | (((_x) & 0x0000ff00U) << 8) | \ | ||
71 | -- | 35 | -- |
72 | 2.34.1 | 36 | 2.34.1 |
73 | |||
74 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | 1 | FEAT_RPRES implements an "increased precision" variant of the single |
---|---|---|---|
2 | precision FRECPE and FRSQRTE instructions from an 8 bit to a 12 | ||
3 | bit mantissa. This applies only when FPCR.AH == 1. Note that the | ||
4 | halfprec and double versions of these insns retain the 8 bit | ||
5 | precision regardless. | ||
2 | 6 | ||
3 | target/arm/cpu-features.h uses the FIELD_EX32() macro | 7 | In this commit we add all the plumbing to make these instructions |
4 | defined in "hw/registerfields.h". Include it in order | 8 | call a new helper function when the increased-precision is in |
5 | to avoid when refactoring unrelated headers: | 9 | effect. In the following commit we will provide the actual change |
10 | in behaviour in the helpers. | ||
6 | 11 | ||
7 | target/arm/cpu-features.h:44:12: error: call to undeclared function 'FIELD_EX32'; | 12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
8 | ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] | ||
9 | return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) != 0; | ||
10 | ^ | ||
11 | |||
12 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
14 | Message-id: 20240118200643.29037-6-philmd@linaro.org | ||
15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
16 | --- | 14 | --- |
17 | target/arm/cpu-features.h | 2 ++ | 15 | target/arm/cpu-features.h | 5 +++++ |
18 | 1 file changed, 2 insertions(+) | 16 | target/arm/helper.h | 4 ++++ |
17 | target/arm/tcg/translate-a64.c | 34 ++++++++++++++++++++++++++++++---- | ||
18 | target/arm/tcg/translate-sve.c | 16 ++++++++++++++-- | ||
19 | target/arm/tcg/vec_helper.c | 2 ++ | ||
20 | target/arm/vfp_helper.c | 32 ++++++++++++++++++++++++++++++-- | ||
21 | 6 files changed, 85 insertions(+), 8 deletions(-) | ||
19 | 22 | ||
20 | diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h | 23 | diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h |
21 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/target/arm/cpu-features.h | 25 | --- a/target/arm/cpu-features.h |
23 | +++ b/target/arm/cpu-features.h | 26 | +++ b/target/arm/cpu-features.h |
24 | @@ -XXX,XX +XXX,XX @@ | 27 | @@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_mops(const ARMISARegisters *id) |
25 | #ifndef TARGET_ARM_FEATURES_H | 28 | return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, MOPS); |
26 | #define TARGET_ARM_FEATURES_H | 29 | } |
27 | 30 | ||
28 | +#include "hw/registerfields.h" | 31 | +static inline bool isar_feature_aa64_rpres(const ARMISARegisters *id) |
29 | + | 32 | +{ |
33 | + return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, RPRES); | ||
34 | +} | ||
35 | + | ||
36 | static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id) | ||
37 | { | ||
38 | /* We always set the AdvSIMD and FP fields identically. */ | ||
39 | diff --git a/target/arm/helper.h b/target/arm/helper.h | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/target/arm/helper.h | ||
42 | +++ b/target/arm/helper.h | ||
43 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, fpst) | ||
44 | |||
45 | DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, fpst) | ||
46 | DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, fpst) | ||
47 | +DEF_HELPER_FLAGS_2(recpe_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst) | ||
48 | DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, fpst) | ||
49 | DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, fpst) | ||
50 | DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, fpst) | ||
51 | +DEF_HELPER_FLAGS_2(rsqrte_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst) | ||
52 | DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, fpst) | ||
53 | DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32) | ||
54 | DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32) | ||
55 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_vrintx_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
56 | |||
57 | DEF_HELPER_FLAGS_4(gvec_frecpe_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
58 | DEF_HELPER_FLAGS_4(gvec_frecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
59 | +DEF_HELPER_FLAGS_4(gvec_frecpe_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
60 | DEF_HELPER_FLAGS_4(gvec_frecpe_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
61 | |||
62 | DEF_HELPER_FLAGS_4(gvec_frsqrte_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
63 | DEF_HELPER_FLAGS_4(gvec_frsqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
64 | +DEF_HELPER_FLAGS_4(gvec_frsqrte_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
65 | DEF_HELPER_FLAGS_4(gvec_frsqrte_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
66 | |||
67 | DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
68 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/target/arm/tcg/translate-a64.c | ||
71 | +++ b/target/arm/tcg/translate-a64.c | ||
72 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frecpe = { | ||
73 | gen_helper_recpe_f32, | ||
74 | gen_helper_recpe_f64, | ||
75 | }; | ||
76 | -TRANS(FRECPE_s, do_fp1_scalar_ah, a, &f_scalar_frecpe, -1) | ||
77 | +static const FPScalar1 f_scalar_frecpe_rpres = { | ||
78 | + gen_helper_recpe_f16, | ||
79 | + gen_helper_recpe_rpres_f32, | ||
80 | + gen_helper_recpe_f64, | ||
81 | +}; | ||
82 | +TRANS(FRECPE_s, do_fp1_scalar_ah, a, | ||
83 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? | ||
84 | + &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1) | ||
85 | |||
86 | static const FPScalar1 f_scalar_frecpx = { | ||
87 | gen_helper_frecpx_f16, | ||
88 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frsqrte = { | ||
89 | gen_helper_rsqrte_f32, | ||
90 | gen_helper_rsqrte_f64, | ||
91 | }; | ||
92 | -TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, &f_scalar_frsqrte, -1) | ||
93 | +static const FPScalar1 f_scalar_frsqrte_rpres = { | ||
94 | + gen_helper_rsqrte_f16, | ||
95 | + gen_helper_rsqrte_rpres_f32, | ||
96 | + gen_helper_rsqrte_f64, | ||
97 | +}; | ||
98 | +TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, | ||
99 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? | ||
100 | + &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1) | ||
101 | |||
102 | static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) | ||
103 | { | ||
104 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const f_frecpe[] = { | ||
105 | gen_helper_gvec_frecpe_s, | ||
106 | gen_helper_gvec_frecpe_d, | ||
107 | }; | ||
108 | -TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) | ||
109 | +static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = { | ||
110 | + gen_helper_gvec_frecpe_h, | ||
111 | + gen_helper_gvec_frecpe_rpres_s, | ||
112 | + gen_helper_gvec_frecpe_d, | ||
113 | +}; | ||
114 | +TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, | ||
115 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe) | ||
116 | |||
117 | static gen_helper_gvec_2_ptr * const f_frsqrte[] = { | ||
118 | gen_helper_gvec_frsqrte_h, | ||
119 | gen_helper_gvec_frsqrte_s, | ||
120 | gen_helper_gvec_frsqrte_d, | ||
121 | }; | ||
122 | -TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) | ||
123 | +static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = { | ||
124 | + gen_helper_gvec_frsqrte_h, | ||
125 | + gen_helper_gvec_frsqrte_rpres_s, | ||
126 | + gen_helper_gvec_frsqrte_d, | ||
127 | +}; | ||
128 | +TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, | ||
129 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte) | ||
130 | |||
131 | static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) | ||
132 | { | ||
133 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
134 | index XXXXXXX..XXXXXXX 100644 | ||
135 | --- a/target/arm/tcg/translate-sve.c | ||
136 | +++ b/target/arm/tcg/translate-sve.c | ||
137 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const frecpe_fns[] = { | ||
138 | NULL, gen_helper_gvec_frecpe_h, | ||
139 | gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, | ||
140 | }; | ||
141 | -TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frecpe_fns[a->esz], a, 0) | ||
142 | +static gen_helper_gvec_2_ptr * const frecpe_rpres_fns[] = { | ||
143 | + NULL, gen_helper_gvec_frecpe_h, | ||
144 | + gen_helper_gvec_frecpe_rpres_s, gen_helper_gvec_frecpe_d, | ||
145 | +}; | ||
146 | +TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, | ||
147 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? | ||
148 | + frecpe_rpres_fns[a->esz] : frecpe_fns[a->esz], a, 0) | ||
149 | |||
150 | static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { | ||
151 | NULL, gen_helper_gvec_frsqrte_h, | ||
152 | gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, | ||
153 | }; | ||
154 | -TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frsqrte_fns[a->esz], a, 0) | ||
155 | +static gen_helper_gvec_2_ptr * const frsqrte_rpres_fns[] = { | ||
156 | + NULL, gen_helper_gvec_frsqrte_h, | ||
157 | + gen_helper_gvec_frsqrte_rpres_s, gen_helper_gvec_frsqrte_d, | ||
158 | +}; | ||
159 | +TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, | ||
160 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? | ||
161 | + frsqrte_rpres_fns[a->esz] : frsqrte_fns[a->esz], a, 0) | ||
162 | |||
30 | /* | 163 | /* |
31 | * Naming convention for isar_feature functions: | 164 | *** SVE Floating Point Compare with Zero Group |
32 | * Functions which test 32-bit ID registers should have _aa32_ in | 165 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
166 | index XXXXXXX..XXXXXXX 100644 | ||
167 | --- a/target/arm/tcg/vec_helper.c | ||
168 | +++ b/target/arm/tcg/vec_helper.c | ||
169 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, float_status *stat, uint32_t desc) \ | ||
170 | |||
171 | DO_2OP(gvec_frecpe_h, helper_recpe_f16, float16) | ||
172 | DO_2OP(gvec_frecpe_s, helper_recpe_f32, float32) | ||
173 | +DO_2OP(gvec_frecpe_rpres_s, helper_recpe_rpres_f32, float32) | ||
174 | DO_2OP(gvec_frecpe_d, helper_recpe_f64, float64) | ||
175 | |||
176 | DO_2OP(gvec_frsqrte_h, helper_rsqrte_f16, float16) | ||
177 | DO_2OP(gvec_frsqrte_s, helper_rsqrte_f32, float32) | ||
178 | +DO_2OP(gvec_frsqrte_rpres_s, helper_rsqrte_rpres_f32, float32) | ||
179 | DO_2OP(gvec_frsqrte_d, helper_rsqrte_f64, float64) | ||
180 | |||
181 | DO_2OP(gvec_vrintx_h, float16_round_to_int, float16) | ||
182 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
183 | index XXXXXXX..XXXXXXX 100644 | ||
184 | --- a/target/arm/vfp_helper.c | ||
185 | +++ b/target/arm/vfp_helper.c | ||
186 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst) | ||
187 | return make_float16(f16_val); | ||
188 | } | ||
189 | |||
190 | -float32 HELPER(recpe_f32)(float32 input, float_status *fpst) | ||
191 | +/* | ||
192 | + * FEAT_RPRES means the f32 FRECPE has an "increased precision" variant | ||
193 | + * which is used when FPCR.AH == 1. | ||
194 | + */ | ||
195 | +static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres) | ||
196 | { | ||
197 | float32 f32 = float32_squash_input_denormal(input, fpst); | ||
198 | uint32_t f32_val = float32_val(f32); | ||
199 | @@ -XXX,XX +XXX,XX @@ float32 HELPER(recpe_f32)(float32 input, float_status *fpst) | ||
200 | return make_float32(f32_val); | ||
201 | } | ||
202 | |||
203 | +float32 HELPER(recpe_f32)(float32 input, float_status *fpst) | ||
204 | +{ | ||
205 | + return do_recpe_f32(input, fpst, false); | ||
206 | +} | ||
207 | + | ||
208 | +float32 HELPER(recpe_rpres_f32)(float32 input, float_status *fpst) | ||
209 | +{ | ||
210 | + return do_recpe_f32(input, fpst, true); | ||
211 | +} | ||
212 | + | ||
213 | float64 HELPER(recpe_f64)(float64 input, float_status *fpst) | ||
214 | { | ||
215 | float64 f64 = float64_squash_input_denormal(input, fpst); | ||
216 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s) | ||
217 | return make_float16(val); | ||
218 | } | ||
219 | |||
220 | -float32 HELPER(rsqrte_f32)(float32 input, float_status *s) | ||
221 | +/* | ||
222 | + * FEAT_RPRES means the f32 FRSQRTE has an "increased precision" variant | ||
223 | + * which is used when FPCR.AH == 1. | ||
224 | + */ | ||
225 | +static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres) | ||
226 | { | ||
227 | float32 f32 = float32_squash_input_denormal(input, s); | ||
228 | uint32_t val = float32_val(f32); | ||
229 | @@ -XXX,XX +XXX,XX @@ float32 HELPER(rsqrte_f32)(float32 input, float_status *s) | ||
230 | return make_float32(val); | ||
231 | } | ||
232 | |||
233 | +float32 HELPER(rsqrte_f32)(float32 input, float_status *s) | ||
234 | +{ | ||
235 | + return do_rsqrte_f32(input, s, false); | ||
236 | +} | ||
237 | + | ||
238 | +float32 HELPER(rsqrte_rpres_f32)(float32 input, float_status *s) | ||
239 | +{ | ||
240 | + return do_rsqrte_f32(input, s, true); | ||
241 | +} | ||
242 | + | ||
243 | float64 HELPER(rsqrte_f64)(float64 input, float_status *s) | ||
244 | { | ||
245 | float64 f64 = float64_squash_input_denormal(input, s); | ||
33 | -- | 246 | -- |
34 | 2.34.1 | 247 | 2.34.1 |
35 | |||
36 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | 1 | Implement the increased precision variation of FRECPE. In the |
---|---|---|---|
2 | pseudocode this corresponds to the handling of the | ||
3 | "increasedprecision" boolean in the FPRecipEstimate() and | ||
4 | RecipEstimate() functions. | ||
2 | 5 | ||
3 | target/arm/cpregs.h uses the FIELD() macro defined in | 6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | "hw/registerfields.h". Include it in order to avoid when | 7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
5 | refactoring unrelated headers: | 8 | --- |
9 | target/arm/vfp_helper.c | 54 +++++++++++++++++++++++++++++++++++------ | ||
10 | 1 file changed, 46 insertions(+), 8 deletions(-) | ||
6 | 11 | ||
7 | target/arm/cpregs.h:347:30: error: expected identifier | 12 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
8 | FIELD(HFGRTR_EL2, AFSR0_EL1, 0, 1) | ||
9 | ^ | ||
10 | |||
11 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | Message-id: 20240118200643.29037-7-philmd@linaro.org | ||
14 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
15 | --- | ||
16 | target/arm/cpregs.h | 2 ++ | ||
17 | 1 file changed, 2 insertions(+) | ||
18 | |||
19 | diff --git a/target/arm/cpregs.h b/target/arm/cpregs.h | ||
20 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/target/arm/cpregs.h | 14 | --- a/target/arm/vfp_helper.c |
22 | +++ b/target/arm/cpregs.h | 15 | +++ b/target/arm/vfp_helper.c |
23 | @@ -XXX,XX +XXX,XX @@ | 16 | @@ -XXX,XX +XXX,XX @@ static int recip_estimate(int input) |
24 | #ifndef TARGET_ARM_CPREGS_H | 17 | return r; |
25 | #define TARGET_ARM_CPREGS_H | 18 | } |
26 | 19 | ||
27 | +#include "hw/registerfields.h" | 20 | +/* |
21 | + * Increased precision version: | ||
22 | + * input is a 13 bit fixed point number | ||
23 | + * input range 2048 .. 4095 for a number from 0.5 <= x < 1.0. | ||
24 | + * result range 4096 .. 8191 for a number from 1.0 to 2.0 | ||
25 | + */ | ||
26 | +static int recip_estimate_incprec(int input) | ||
27 | +{ | ||
28 | + int a, b, r; | ||
29 | + assert(2048 <= input && input < 4096); | ||
30 | + a = (input * 2) + 1; | ||
31 | + /* | ||
32 | + * The pseudocode expresses this as an operation on infinite | ||
33 | + * precision reals where it calculates 2^25 / a and then looks | ||
34 | + * at the error between that and the rounded-down-to-integer | ||
35 | + * value to see if it should instead round up. We instead | ||
36 | + * follow the same approach as the pseudocode for the 8-bit | ||
37 | + * precision version, and calculate (2 * (2^25 / a)) as an | ||
38 | + * integer so we can do the "add one and halve" to round it. | ||
39 | + * So the 1 << 26 here is correct. | ||
40 | + */ | ||
41 | + b = (1 << 26) / a; | ||
42 | + r = (b + 1) >> 1; | ||
43 | + assert(4096 <= r && r < 8192); | ||
44 | + return r; | ||
45 | +} | ||
28 | + | 46 | + |
29 | /* | 47 | /* |
30 | * ARMCPRegInfo type field bits: | 48 | * Common wrapper to call recip_estimate |
49 | * | ||
50 | @@ -XXX,XX +XXX,XX @@ static int recip_estimate(int input) | ||
51 | * callee. | ||
31 | */ | 52 | */ |
53 | |||
54 | -static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac) | ||
55 | +static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac, | ||
56 | + bool increasedprecision) | ||
57 | { | ||
58 | uint32_t scaled, estimate; | ||
59 | uint64_t result_frac; | ||
60 | @@ -XXX,XX +XXX,XX @@ static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac) | ||
61 | } | ||
62 | } | ||
63 | |||
64 | - /* scaled = UInt('1':fraction<51:44>) */ | ||
65 | - scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); | ||
66 | - estimate = recip_estimate(scaled); | ||
67 | + if (increasedprecision) { | ||
68 | + /* scaled = UInt('1':fraction<51:41>) */ | ||
69 | + scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11)); | ||
70 | + estimate = recip_estimate_incprec(scaled); | ||
71 | + } else { | ||
72 | + /* scaled = UInt('1':fraction<51:44>) */ | ||
73 | + scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); | ||
74 | + estimate = recip_estimate(scaled); | ||
75 | + } | ||
76 | |||
77 | result_exp = exp_off - *exp; | ||
78 | - result_frac = deposit64(0, 44, 8, estimate); | ||
79 | + if (increasedprecision) { | ||
80 | + result_frac = deposit64(0, 40, 12, estimate); | ||
81 | + } else { | ||
82 | + result_frac = deposit64(0, 44, 8, estimate); | ||
83 | + } | ||
84 | if (result_exp == 0) { | ||
85 | result_frac = deposit64(result_frac >> 1, 51, 1, 1); | ||
86 | } else if (result_exp == -1) { | ||
87 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst) | ||
88 | } | ||
89 | |||
90 | f64_frac = call_recip_estimate(&f16_exp, 29, | ||
91 | - ((uint64_t) f16_frac) << (52 - 10)); | ||
92 | + ((uint64_t) f16_frac) << (52 - 10), false); | ||
93 | |||
94 | /* result = sign : result_exp<4:0> : fraction<51:42> */ | ||
95 | f16_val = deposit32(0, 15, 1, f16_sign); | ||
96 | @@ -XXX,XX +XXX,XX @@ static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres) | ||
97 | } | ||
98 | |||
99 | f64_frac = call_recip_estimate(&f32_exp, 253, | ||
100 | - ((uint64_t) f32_frac) << (52 - 23)); | ||
101 | + ((uint64_t) f32_frac) << (52 - 23), rpres); | ||
102 | |||
103 | /* result = sign : result_exp<7:0> : fraction<51:29> */ | ||
104 | f32_val = deposit32(0, 31, 1, f32_sign); | ||
105 | @@ -XXX,XX +XXX,XX @@ float64 HELPER(recpe_f64)(float64 input, float_status *fpst) | ||
106 | return float64_set_sign(float64_zero, float64_is_neg(f64)); | ||
107 | } | ||
108 | |||
109 | - f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac); | ||
110 | + f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac, false); | ||
111 | |||
112 | /* result = sign : result_exp<10:0> : fraction<51:0>; */ | ||
113 | f64_val = deposit64(0, 63, 1, f64_sign); | ||
32 | -- | 114 | -- |
33 | 2.34.1 | 115 | 2.34.1 |
34 | |||
35 | diff view generated by jsdifflib |
1 | error_report() strings should not include trailing newlines; remove | 1 | Implement the increased precision variation of FRSQRTE. In the |
---|---|---|---|
2 | the newline from the error we print when devices won't fit into the | 2 | pseudocode this corresponds to the handling of the |
3 | address space of the CPU. | 3 | "increasedprecision" boolean in the FPRSqrtEstimate() and |
4 | 4 | RecipSqrtEstimate() functions. | |
5 | This commit also fixes the accidental hardcoded tabs that were in | ||
6 | this line, since we have to touch the line anyway. | ||
7 | 5 | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
9 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
10 | Message-id: 20240118131649.2726375-1-peter.maydell@linaro.org | ||
11 | --- | 8 | --- |
12 | hw/arm/virt.c | 4 ++-- | 9 | target/arm/vfp_helper.c | 77 ++++++++++++++++++++++++++++++++++------- |
13 | 1 file changed, 2 insertions(+), 2 deletions(-) | 10 | 1 file changed, 64 insertions(+), 13 deletions(-) |
14 | 11 | ||
15 | diff --git a/hw/arm/virt.c b/hw/arm/virt.c | 12 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
16 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/hw/arm/virt.c | 14 | --- a/target/arm/vfp_helper.c |
18 | +++ b/hw/arm/virt.c | 15 | +++ b/target/arm/vfp_helper.c |
19 | @@ -XXX,XX +XXX,XX @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits) | 16 | @@ -XXX,XX +XXX,XX @@ static int do_recip_sqrt_estimate(int a) |
20 | /* Base address of the high IO region */ | 17 | return estimate; |
21 | memtop = base = device_memory_base + ROUND_UP(device_memory_size, GiB); | 18 | } |
22 | if (memtop > BIT_ULL(pa_bits)) { | 19 | |
23 | - error_report("Addressing limited to %d bits, but memory exceeds it by %llu bytes\n", | 20 | +static int do_recip_sqrt_estimate_incprec(int a) |
24 | - pa_bits, memtop - BIT_ULL(pa_bits)); | 21 | +{ |
25 | + error_report("Addressing limited to %d bits, but memory exceeds it by %llu bytes", | 22 | + /* |
26 | + pa_bits, memtop - BIT_ULL(pa_bits)); | 23 | + * The Arm ARM describes the 12-bit precision version of RecipSqrtEstimate |
27 | exit(EXIT_FAILURE); | 24 | + * in terms of an infinite-precision floating point calculation of a |
25 | + * square root. We implement this using the same kind of pure integer | ||
26 | + * algorithm as the 8-bit mantissa, to get the same bit-for-bit result. | ||
27 | + */ | ||
28 | + int64_t b, estimate; | ||
29 | |||
30 | -static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac) | ||
31 | + assert(1024 <= a && a < 4096); | ||
32 | + if (a < 2048) { | ||
33 | + a = a * 2 + 1; | ||
34 | + } else { | ||
35 | + a = (a >> 1) << 1; | ||
36 | + a = (a + 1) * 2; | ||
37 | + } | ||
38 | + b = 8192; | ||
39 | + while (a * (b + 1) * (b + 1) < (1ULL << 39)) { | ||
40 | + b += 1; | ||
41 | + } | ||
42 | + estimate = (b + 1) / 2; | ||
43 | + | ||
44 | + assert(4096 <= estimate && estimate < 8192); | ||
45 | + | ||
46 | + return estimate; | ||
47 | +} | ||
48 | + | ||
49 | +static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac, | ||
50 | + bool increasedprecision) | ||
51 | { | ||
52 | int estimate; | ||
53 | uint32_t scaled; | ||
54 | @@ -XXX,XX +XXX,XX @@ static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac) | ||
55 | frac = extract64(frac, 0, 51) << 1; | ||
28 | } | 56 | } |
29 | if (base < device_memory_base) { | 57 | |
58 | - if (*exp & 1) { | ||
59 | - /* scaled = UInt('01':fraction<51:45>) */ | ||
60 | - scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7)); | ||
61 | + if (increasedprecision) { | ||
62 | + if (*exp & 1) { | ||
63 | + /* scaled = UInt('01':fraction<51:42>) */ | ||
64 | + scaled = deposit32(1 << 10, 0, 10, extract64(frac, 42, 10)); | ||
65 | + } else { | ||
66 | + /* scaled = UInt('1':fraction<51:41>) */ | ||
67 | + scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11)); | ||
68 | + } | ||
69 | + estimate = do_recip_sqrt_estimate_incprec(scaled); | ||
70 | } else { | ||
71 | - /* scaled = UInt('1':fraction<51:44>) */ | ||
72 | - scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); | ||
73 | + if (*exp & 1) { | ||
74 | + /* scaled = UInt('01':fraction<51:45>) */ | ||
75 | + scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7)); | ||
76 | + } else { | ||
77 | + /* scaled = UInt('1':fraction<51:44>) */ | ||
78 | + scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); | ||
79 | + } | ||
80 | + estimate = do_recip_sqrt_estimate(scaled); | ||
81 | } | ||
82 | - estimate = do_recip_sqrt_estimate(scaled); | ||
83 | |||
84 | *exp = (exp_off - *exp) / 2; | ||
85 | - return extract64(estimate, 0, 8) << 44; | ||
86 | + if (increasedprecision) { | ||
87 | + return extract64(estimate, 0, 12) << 40; | ||
88 | + } else { | ||
89 | + return extract64(estimate, 0, 8) << 44; | ||
90 | + } | ||
91 | } | ||
92 | |||
93 | uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s) | ||
94 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s) | ||
95 | |||
96 | f64_frac = ((uint64_t) f16_frac) << (52 - 10); | ||
97 | |||
98 | - f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac); | ||
99 | + f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac, false); | ||
100 | |||
101 | /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */ | ||
102 | val = deposit32(0, 15, 1, f16_sign); | ||
103 | @@ -XXX,XX +XXX,XX @@ static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres) | ||
104 | |||
105 | f64_frac = ((uint64_t) f32_frac) << 29; | ||
106 | |||
107 | - f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac); | ||
108 | + f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac, rpres); | ||
109 | |||
110 | - /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */ | ||
111 | + /* | ||
112 | + * result = sign : result_exp<7:0> : estimate<7:0> : Zeros(15) | ||
113 | + * or for increased precision | ||
114 | + * result = sign : result_exp<7:0> : estimate<11:0> : Zeros(11) | ||
115 | + */ | ||
116 | val = deposit32(0, 31, 1, f32_sign); | ||
117 | val = deposit32(val, 23, 8, f32_exp); | ||
118 | - val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8)); | ||
119 | + if (rpres) { | ||
120 | + val = deposit32(val, 11, 12, extract64(f64_frac, 52 - 12, 12)); | ||
121 | + } else { | ||
122 | + val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8)); | ||
123 | + } | ||
124 | return make_float32(val); | ||
125 | } | ||
126 | |||
127 | @@ -XXX,XX +XXX,XX @@ float64 HELPER(rsqrte_f64)(float64 input, float_status *s) | ||
128 | return float64_zero; | ||
129 | } | ||
130 | |||
131 | - f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac); | ||
132 | + f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac, false); | ||
133 | |||
134 | /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */ | ||
135 | val = deposit64(0, 61, 1, f64_sign); | ||
30 | -- | 136 | -- |
31 | 2.34.1 | 137 | 2.34.1 |
32 | |||
33 | diff view generated by jsdifflib |
1 | In arm_deliver_fault() we check for whether the fault is caused | 1 | Now the emulation is complete, we can enable FEAT_RPRES for the 'max' |
---|---|---|---|
2 | by a data abort due to an access to a FEAT_NV2 sysreg in the | 2 | CPU type. |
3 | memory pointed to by the VNCR. Unfortunately part of the | ||
4 | condition checks the wrong argument to the function, meaning | ||
5 | that it would spuriously trigger, resulting in some instruction | ||
6 | aborts being taken to the wrong EL and reported incorrectly. | ||
7 | 3 | ||
8 | Use the right variable in the condition. | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | docs/system/arm/emulation.rst | 1 + | ||
8 | target/arm/tcg/cpu64.c | 1 + | ||
9 | 2 files changed, 2 insertions(+) | ||
9 | 10 | ||
10 | Fixes: 674e5345275d425 ("target/arm: Report VNCR_EL2 based faults correctly") | 11 | diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst |
11 | Reported-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> | ||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> | ||
14 | Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> | ||
15 | Message-id: 20240116165605.2523055-1-peter.maydell@linaro.org | ||
16 | --- | ||
17 | target/arm/tcg/tlb_helper.c | 2 +- | ||
18 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
19 | |||
20 | diff --git a/target/arm/tcg/tlb_helper.c b/target/arm/tcg/tlb_helper.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/target/arm/tcg/tlb_helper.c | 13 | --- a/docs/system/arm/emulation.rst |
23 | +++ b/target/arm/tcg/tlb_helper.c | 14 | +++ b/docs/system/arm/emulation.rst |
24 | @@ -XXX,XX +XXX,XX @@ void arm_deliver_fault(ARMCPU *cpu, vaddr addr, | 15 | @@ -XXX,XX +XXX,XX @@ the following architecture extensions: |
25 | * (and indeed syndrome does not have the EC field in it, | 16 | - FEAT_RDM (Advanced SIMD rounding double multiply accumulate instructions) |
26 | * because we masked that out in disas_set_insn_syndrome()) | 17 | - FEAT_RME (Realm Management Extension) (NB: support status in QEMU is experimental) |
27 | */ | 18 | - FEAT_RNG (Random number generator) |
28 | - bool is_vncr = (mmu_idx != MMU_INST_FETCH) && | 19 | +- FEAT_RPRES (Increased precision of FRECPE and FRSQRTE) |
29 | + bool is_vncr = (access_type != MMU_INST_FETCH) && | 20 | - FEAT_S2FWB (Stage 2 forced Write-Back) |
30 | (env->exception.syndrome & ARM_EL_VNCR); | 21 | - FEAT_SB (Speculation Barrier) |
31 | 22 | - FEAT_SEL2 (Secure EL2) | |
32 | if (is_vncr) { | 23 | diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c |
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/target/arm/tcg/cpu64.c | ||
26 | +++ b/target/arm/tcg/cpu64.c | ||
27 | @@ -XXX,XX +XXX,XX @@ void aarch64_max_tcg_initfn(Object *obj) | ||
28 | cpu->isar.id_aa64isar1 = t; | ||
29 | |||
30 | t = cpu->isar.id_aa64isar2; | ||
31 | + t = FIELD_DP64(t, ID_AA64ISAR2, RPRES, 1); /* FEAT_RPRES */ | ||
32 | t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1); /* FEAT_MOPS */ | ||
33 | t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1); /* FEAT_HBC */ | ||
34 | t = FIELD_DP64(t, ID_AA64ISAR2, WFXT, 2); /* FEAT_WFxT */ | ||
33 | -- | 35 | -- |
34 | 2.34.1 | 36 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | hw/arm/smmuv3-internal.h uses the REG32() and FIELD() | 3 | Move ARMFPStatusFlavour to cpu.h with which to index |
4 | macros defined in "hw/registerfields.h". Include it in | 4 | this array. For now, place the array in an anonymous |
5 | order to avoid when refactoring unrelated headers: | 5 | union with the existing structures. Adjust the order |
6 | 6 | of the existing structures to match the enum. | |
7 | In file included from ../../hw/arm/smmuv3.c:34: | 7 | |
8 | hw/arm/smmuv3-internal.h:36:28: error: expected identifier | 8 | Simplify fpstatus_ptr() using the new array. |
9 | REG32(IDR0, 0x0) | 9 | |
10 | ^ | 10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
11 | hw/arm/smmuv3-internal.h:37:5: error: expected function body after function declarator | 11 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
12 | FIELD(IDR0, S2P, 0 , 1) | 12 | Message-id: 20250129013857.135256-7-richard.henderson@linaro.org |
13 | ^ | ||
14 | |||
15 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
16 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
17 | Message-id: 20240118200643.29037-4-philmd@linaro.org | ||
18 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
19 | --- | 14 | --- |
20 | hw/arm/smmuv3-internal.h | 1 + | 15 | target/arm/cpu.h | 119 +++++++++++++++++++++---------------- |
21 | 1 file changed, 1 insertion(+) | 16 | target/arm/tcg/translate.h | 64 +------------------- |
22 | 17 | 2 files changed, 70 insertions(+), 113 deletions(-) | |
23 | diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h | 18 | |
19 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
24 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
25 | --- a/hw/arm/smmuv3-internal.h | 21 | --- a/target/arm/cpu.h |
26 | +++ b/hw/arm/smmuv3-internal.h | 22 | +++ b/target/arm/cpu.h |
27 | @@ -XXX,XX +XXX,XX @@ | 23 | @@ -XXX,XX +XXX,XX @@ typedef struct ARMMMUFaultInfo ARMMMUFaultInfo; |
28 | #ifndef HW_ARM_SMMUV3_INTERNAL_H | 24 | |
29 | #define HW_ARM_SMMUV3_INTERNAL_H | 25 | typedef struct NVICState NVICState; |
30 | 26 | ||
31 | +#include "hw/registerfields.h" | 27 | +/* |
32 | #include "hw/arm/smmu-common.h" | 28 | + * Enum for indexing vfp.fp_status[]. |
33 | 29 | + * | |
34 | typedef enum SMMUTranslationStatus { | 30 | + * FPST_A32: is the "normal" fp status for AArch32 insns |
31 | + * FPST_A64: is the "normal" fp status for AArch64 insns | ||
32 | + * FPST_A32_F16: used for AArch32 half-precision calculations | ||
33 | + * FPST_A64_F16: used for AArch64 half-precision calculations | ||
34 | + * FPST_STD: the ARM "Standard FPSCR Value" | ||
35 | + * FPST_STD_F16: used for half-precision | ||
36 | + * calculations with the ARM "Standard FPSCR Value" | ||
37 | + * FPST_AH: used for the A64 insns which change behaviour | ||
38 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
39 | + * and the reciprocal and square root estimate/step insns) | ||
40 | + * FPST_AH_F16: used for the A64 insns which change behaviour | ||
41 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
42 | + * and the reciprocal and square root estimate/step insns); | ||
43 | + * for half-precision | ||
44 | + * | ||
45 | + * Half-precision operations are governed by a separate | ||
46 | + * flush-to-zero control bit in FPSCR:FZ16. We pass a separate | ||
47 | + * status structure to control this. | ||
48 | + * | ||
49 | + * The "Standard FPSCR", ie default-NaN, flush-to-zero, | ||
50 | + * round-to-nearest and is used by any operations (generally | ||
51 | + * Neon) which the architecture defines as controlled by the | ||
52 | + * standard FPSCR value rather than the FPSCR. | ||
53 | + * | ||
54 | + * The "standard FPSCR but for fp16 ops" is needed because | ||
55 | + * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | ||
56 | + * using a fixed value for it. | ||
57 | + * | ||
58 | + * The ah_fp_status is needed because some insns have different | ||
59 | + * behaviour when FPCR.AH == 1: they don't update cumulative | ||
60 | + * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
61 | + * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
62 | + * which means we need an ah_fp_status_f16 as well. | ||
63 | + * | ||
64 | + * To avoid having to transfer exception bits around, we simply | ||
65 | + * say that the FPSCR cumulative exception flags are the logical | ||
66 | + * OR of the flags in the four fp statuses. This relies on the | ||
67 | + * only thing which needs to read the exception flags being | ||
68 | + * an explicit FPSCR read. | ||
69 | + */ | ||
70 | +typedef enum ARMFPStatusFlavour { | ||
71 | + FPST_A32, | ||
72 | + FPST_A64, | ||
73 | + FPST_A32_F16, | ||
74 | + FPST_A64_F16, | ||
75 | + FPST_AH, | ||
76 | + FPST_AH_F16, | ||
77 | + FPST_STD, | ||
78 | + FPST_STD_F16, | ||
79 | +} ARMFPStatusFlavour; | ||
80 | +#define FPST_COUNT 8 | ||
81 | + | ||
82 | typedef struct CPUArchState { | ||
83 | /* Regs for current mode. */ | ||
84 | uint32_t regs[16]; | ||
85 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
86 | /* Scratch space for aa32 neon expansion. */ | ||
87 | uint32_t scratch[8]; | ||
88 | |||
89 | - /* There are a number of distinct float control structures: | ||
90 | - * | ||
91 | - * fp_status_a32: is the "normal" fp status for AArch32 insns | ||
92 | - * fp_status_a64: is the "normal" fp status for AArch64 insns | ||
93 | - * fp_status_fp16_a32: used for AArch32 half-precision calculations | ||
94 | - * fp_status_fp16_a64: used for AArch64 half-precision calculations | ||
95 | - * standard_fp_status : the ARM "Standard FPSCR Value" | ||
96 | - * standard_fp_status_fp16 : used for half-precision | ||
97 | - * calculations with the ARM "Standard FPSCR Value" | ||
98 | - * ah_fp_status: used for the A64 insns which change behaviour | ||
99 | - * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
100 | - * and the reciprocal and square root estimate/step insns) | ||
101 | - * ah_fp_status_f16: used for the A64 insns which change behaviour | ||
102 | - * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
103 | - * and the reciprocal and square root estimate/step insns); | ||
104 | - * for half-precision | ||
105 | - * | ||
106 | - * Half-precision operations are governed by a separate | ||
107 | - * flush-to-zero control bit in FPSCR:FZ16. We pass a separate | ||
108 | - * status structure to control this. | ||
109 | - * | ||
110 | - * The "Standard FPSCR", ie default-NaN, flush-to-zero, | ||
111 | - * round-to-nearest and is used by any operations (generally | ||
112 | - * Neon) which the architecture defines as controlled by the | ||
113 | - * standard FPSCR value rather than the FPSCR. | ||
114 | - * | ||
115 | - * The "standard FPSCR but for fp16 ops" is needed because | ||
116 | - * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | ||
117 | - * using a fixed value for it. | ||
118 | - * | ||
119 | - * The ah_fp_status is needed because some insns have different | ||
120 | - * behaviour when FPCR.AH == 1: they don't update cumulative | ||
121 | - * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
122 | - * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
123 | - * which means we need an ah_fp_status_f16 as well. | ||
124 | - * | ||
125 | - * To avoid having to transfer exception bits around, we simply | ||
126 | - * say that the FPSCR cumulative exception flags are the logical | ||
127 | - * OR of the flags in the four fp statuses. This relies on the | ||
128 | - * only thing which needs to read the exception flags being | ||
129 | - * an explicit FPSCR read. | ||
130 | - */ | ||
131 | - float_status fp_status_a32; | ||
132 | - float_status fp_status_a64; | ||
133 | - float_status fp_status_f16_a32; | ||
134 | - float_status fp_status_f16_a64; | ||
135 | - float_status standard_fp_status; | ||
136 | - float_status standard_fp_status_f16; | ||
137 | - float_status ah_fp_status; | ||
138 | - float_status ah_fp_status_f16; | ||
139 | + /* There are a number of distinct float control structures. */ | ||
140 | + union { | ||
141 | + float_status fp_status[FPST_COUNT]; | ||
142 | + struct { | ||
143 | + float_status fp_status_a32; | ||
144 | + float_status fp_status_a64; | ||
145 | + float_status fp_status_f16_a32; | ||
146 | + float_status fp_status_f16_a64; | ||
147 | + float_status ah_fp_status; | ||
148 | + float_status ah_fp_status_f16; | ||
149 | + float_status standard_fp_status; | ||
150 | + float_status standard_fp_status_f16; | ||
151 | + }; | ||
152 | + }; | ||
153 | |||
154 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ | ||
155 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ | ||
156 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | ||
157 | index XXXXXXX..XXXXXXX 100644 | ||
158 | --- a/target/arm/tcg/translate.h | ||
159 | +++ b/target/arm/tcg/translate.h | ||
160 | @@ -XXX,XX +XXX,XX @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb) | ||
161 | return (CPUARMTBFlags){ tb->flags, tb->cs_base }; | ||
162 | } | ||
163 | |||
164 | -/* | ||
165 | - * Enum for argument to fpstatus_ptr(). | ||
166 | - */ | ||
167 | -typedef enum ARMFPStatusFlavour { | ||
168 | - FPST_A32, | ||
169 | - FPST_A64, | ||
170 | - FPST_A32_F16, | ||
171 | - FPST_A64_F16, | ||
172 | - FPST_AH, | ||
173 | - FPST_AH_F16, | ||
174 | - FPST_STD, | ||
175 | - FPST_STD_F16, | ||
176 | -} ARMFPStatusFlavour; | ||
177 | - | ||
178 | /** | ||
179 | * fpstatus_ptr: return TCGv_ptr to the specified fp_status field | ||
180 | * | ||
181 | * We have multiple softfloat float_status fields in the Arm CPU state struct | ||
182 | * (see the comment in cpu.h for details). Return a TCGv_ptr which has | ||
183 | * been set up to point to the requested field in the CPU state struct. | ||
184 | - * The options are: | ||
185 | - * | ||
186 | - * FPST_A32 | ||
187 | - * for AArch32 non-FP16 operations controlled by the FPCR | ||
188 | - * FPST_A64 | ||
189 | - * for AArch64 non-FP16 operations controlled by the FPCR | ||
190 | - * FPST_A32_F16 | ||
191 | - * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
192 | - * FPST_A64_F16 | ||
193 | - * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
194 | - * FPST_AH: | ||
195 | - * for AArch64 operations which change behaviour when AH=1 (specifically, | ||
196 | - * bfloat16 conversions and multiplies, and the reciprocal and square root | ||
197 | - * estimate/step insns) | ||
198 | - * FPST_AH_F16: | ||
199 | - * ditto, but for half-precision operations | ||
200 | - * FPST_STD | ||
201 | - * for A32/T32 Neon operations using the "standard FPSCR value" | ||
202 | - * FPST_STD_F16 | ||
203 | - * as FPST_STD, but where FPCR.FZ16 is to be used | ||
204 | */ | ||
205 | static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) | ||
206 | { | ||
207 | TCGv_ptr statusptr = tcg_temp_new_ptr(); | ||
208 | - int offset; | ||
209 | + int offset = offsetof(CPUARMState, vfp.fp_status[flavour]); | ||
210 | |||
211 | - switch (flavour) { | ||
212 | - case FPST_A32: | ||
213 | - offset = offsetof(CPUARMState, vfp.fp_status_a32); | ||
214 | - break; | ||
215 | - case FPST_A64: | ||
216 | - offset = offsetof(CPUARMState, vfp.fp_status_a64); | ||
217 | - break; | ||
218 | - case FPST_A32_F16: | ||
219 | - offset = offsetof(CPUARMState, vfp.fp_status_f16_a32); | ||
220 | - break; | ||
221 | - case FPST_A64_F16: | ||
222 | - offset = offsetof(CPUARMState, vfp.fp_status_f16_a64); | ||
223 | - break; | ||
224 | - case FPST_AH: | ||
225 | - offset = offsetof(CPUARMState, vfp.ah_fp_status); | ||
226 | - break; | ||
227 | - case FPST_AH_F16: | ||
228 | - offset = offsetof(CPUARMState, vfp.ah_fp_status_f16); | ||
229 | - break; | ||
230 | - case FPST_STD: | ||
231 | - offset = offsetof(CPUARMState, vfp.standard_fp_status); | ||
232 | - break; | ||
233 | - case FPST_STD_F16: | ||
234 | - offset = offsetof(CPUARMState, vfp.standard_fp_status_f16); | ||
235 | - break; | ||
236 | - default: | ||
237 | - g_assert_not_reached(); | ||
238 | - } | ||
239 | tcg_gen_addi_ptr(statusptr, tcg_env, offset); | ||
240 | return statusptr; | ||
241 | } | ||
35 | -- | 242 | -- |
36 | 2.34.1 | 243 | 2.34.1 |
37 | 244 | ||
38 | 245 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | The ARMv7M QDev container accesses the QDev SysTickState | 3 | Replace with fp_status[FPST_STD_F16]. |
4 | by its secure/non-secure bank index. In order to make | ||
5 | the "hw/intc/armv7m_nvic.h" header target-agnostic in | ||
6 | the next commit, first move the M-profile bank index | ||
7 | definitions to "target/arm/cpu-qom.h". | ||
8 | 4 | ||
9 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
11 | Message-id: 20240118200643.29037-16-philmd@linaro.org | 7 | Message-id: 20250129013857.135256-8-richard.henderson@linaro.org |
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
13 | --- | 9 | --- |
14 | target/arm/cpu-qom.h | 15 +++++++++++++++ | 10 | target/arm/cpu.h | 1 - |
15 | target/arm/cpu.h | 15 --------------- | 11 | target/arm/cpu.c | 4 ++-- |
16 | 2 files changed, 15 insertions(+), 15 deletions(-) | 12 | target/arm/tcg/mve_helper.c | 24 ++++++++++++------------ |
13 | target/arm/vfp_helper.c | 8 ++++---- | ||
14 | 4 files changed, 18 insertions(+), 19 deletions(-) | ||
17 | 15 | ||
18 | diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/arm/cpu-qom.h | ||
21 | +++ b/target/arm/cpu-qom.h | ||
22 | @@ -XXX,XX +XXX,XX @@ DECLARE_CLASS_CHECKERS(AArch64CPUClass, AARCH64_CPU, | ||
23 | #define ARM_CPU_TYPE_SUFFIX "-" TYPE_ARM_CPU | ||
24 | #define ARM_CPU_TYPE_NAME(name) (name ARM_CPU_TYPE_SUFFIX) | ||
25 | |||
26 | +/* For M profile, some registers are banked secure vs non-secure; | ||
27 | + * these are represented as a 2-element array where the first element | ||
28 | + * is the non-secure copy and the second is the secure copy. | ||
29 | + * When the CPU does not have implement the security extension then | ||
30 | + * only the first element is used. | ||
31 | + * This means that the copy for the current security state can be | ||
32 | + * accessed via env->registerfield[env->v7m.secure] (whether the security | ||
33 | + * extension is implemented or not). | ||
34 | + */ | ||
35 | +enum { | ||
36 | + M_REG_NS = 0, | ||
37 | + M_REG_S = 1, | ||
38 | + M_REG_NUM_BANKS = 2, | ||
39 | +}; | ||
40 | + | ||
41 | #endif | ||
42 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 16 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
43 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
44 | --- a/target/arm/cpu.h | 18 | --- a/target/arm/cpu.h |
45 | +++ b/target/arm/cpu.h | 19 | +++ b/target/arm/cpu.h |
46 | @@ -XXX,XX +XXX,XX @@ | 20 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
47 | #define ARMV7M_EXCP_PENDSV 14 | 21 | float_status ah_fp_status; |
48 | #define ARMV7M_EXCP_SYSTICK 15 | 22 | float_status ah_fp_status_f16; |
49 | 23 | float_status standard_fp_status; | |
50 | -/* For M profile, some registers are banked secure vs non-secure; | 24 | - float_status standard_fp_status_f16; |
51 | - * these are represented as a 2-element array where the first element | 25 | }; |
52 | - * is the non-secure copy and the second is the secure copy. | 26 | }; |
53 | - * When the CPU does not have implement the security extension then | 27 | |
54 | - * only the first element is used. | 28 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
55 | - * This means that the copy for the current security state can be | 29 | index XXXXXXX..XXXXXXX 100644 |
56 | - * accessed via env->registerfield[env->v7m.secure] (whether the security | 30 | --- a/target/arm/cpu.c |
57 | - * extension is implemented or not). | 31 | +++ b/target/arm/cpu.c |
58 | - */ | 32 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
59 | -enum { | 33 | set_flush_to_zero(1, &env->vfp.standard_fp_status); |
60 | - M_REG_NS = 0, | 34 | set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); |
61 | - M_REG_S = 1, | 35 | set_default_nan_mode(1, &env->vfp.standard_fp_status); |
62 | - M_REG_NUM_BANKS = 2, | 36 | - set_default_nan_mode(1, &env->vfp.standard_fp_status_f16); |
63 | -}; | 37 | + set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); |
64 | - | 38 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); |
65 | /* ARM-specific interrupt pending bits. */ | 39 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); |
66 | #define CPU_INTERRUPT_FIQ CPU_INTERRUPT_TGT_EXT_1 | 40 | arm_set_default_fp_behaviours(&env->vfp.standard_fp_status); |
67 | #define CPU_INTERRUPT_VIRQ CPU_INTERRUPT_TGT_EXT_2 | 41 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); |
42 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
43 | - arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16); | ||
44 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
45 | arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | ||
46 | set_flush_to_zero(1, &env->vfp.ah_fp_status); | ||
47 | set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | ||
48 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/arm/tcg/mve_helper.c | ||
51 | +++ b/target/arm/tcg/mve_helper.c | ||
52 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) | ||
53 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
54 | continue; \ | ||
55 | } \ | ||
56 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
57 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
58 | &env->vfp.standard_fp_status; \ | ||
59 | if (!(mask & 1)) { \ | ||
60 | /* We need the result but without updating flags */ \ | ||
61 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) | ||
62 | r[e] = 0; \ | ||
63 | continue; \ | ||
64 | } \ | ||
65 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
66 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
67 | &env->vfp.standard_fp_status; \ | ||
68 | if (!(tm & 1)) { \ | ||
69 | /* We need the result but without updating flags */ \ | ||
70 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) | ||
71 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
72 | continue; \ | ||
73 | } \ | ||
74 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
75 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
76 | &env->vfp.standard_fp_status; \ | ||
77 | if (!(mask & 1)) { \ | ||
78 | /* We need the result but without updating flags */ \ | ||
79 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) | ||
80 | if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \ | ||
81 | continue; \ | ||
82 | } \ | ||
83 | - fpst0 = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
84 | + fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
85 | &env->vfp.standard_fp_status; \ | ||
86 | fpst1 = fpst0; \ | ||
87 | if (!(mask & 1)) { \ | ||
88 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) | ||
89 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
90 | continue; \ | ||
91 | } \ | ||
92 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
93 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
94 | &env->vfp.standard_fp_status; \ | ||
95 | if (!(mask & 1)) { \ | ||
96 | /* We need the result but without updating flags */ \ | ||
97 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) | ||
98 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
99 | continue; \ | ||
100 | } \ | ||
101 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
102 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
103 | &env->vfp.standard_fp_status; \ | ||
104 | if (!(mask & 1)) { \ | ||
105 | /* We need the result but without updating flags */ \ | ||
106 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) | ||
107 | TYPE *m = vm; \ | ||
108 | TYPE ra = (TYPE)ra_in; \ | ||
109 | float_status *fpst = (ESIZE == 2) ? \ | ||
110 | - &env->vfp.standard_fp_status_f16 : \ | ||
111 | + &env->vfp.fp_status[FPST_STD_F16] : \ | ||
112 | &env->vfp.standard_fp_status; \ | ||
113 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
114 | if (mask & 1) { \ | ||
115 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
116 | if ((mask & emask) == 0) { \ | ||
117 | continue; \ | ||
118 | } \ | ||
119 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
120 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
121 | &env->vfp.standard_fp_status; \ | ||
122 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
123 | /* We need the result but without updating flags */ \ | ||
124 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
125 | if ((mask & emask) == 0) { \ | ||
126 | continue; \ | ||
127 | } \ | ||
128 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
129 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
130 | &env->vfp.standard_fp_status; \ | ||
131 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
132 | /* We need the result but without updating flags */ \ | ||
133 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) | ||
134 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
135 | continue; \ | ||
136 | } \ | ||
137 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
138 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
139 | &env->vfp.standard_fp_status; \ | ||
140 | if (!(mask & 1)) { \ | ||
141 | /* We need the result but without updating flags */ \ | ||
142 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) | ||
143 | float_status *fpst; \ | ||
144 | float_status scratch_fpst; \ | ||
145 | float_status *base_fpst = (ESIZE == 2) ? \ | ||
146 | - &env->vfp.standard_fp_status_f16 : \ | ||
147 | + &env->vfp.fp_status[FPST_STD_F16] : \ | ||
148 | &env->vfp.standard_fp_status; \ | ||
149 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ | ||
150 | set_float_rounding_mode(rmode, base_fpst); \ | ||
151 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) | ||
152 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
153 | continue; \ | ||
154 | } \ | ||
155 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
156 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
157 | &env->vfp.standard_fp_status; \ | ||
158 | if (!(mask & 1)) { \ | ||
159 | /* We need the result but without updating flags */ \ | ||
160 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
161 | index XXXXXXX..XXXXXXX 100644 | ||
162 | --- a/target/arm/vfp_helper.c | ||
163 | +++ b/target/arm/vfp_helper.c | ||
164 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
165 | /* FZ16 does not generate an input denormal exception. */ | ||
166 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
167 | & ~float_flag_input_denormal_flushed); | ||
168 | - a32_flags |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) | ||
169 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) | ||
170 | & ~float_flag_input_denormal_flushed); | ||
171 | |||
172 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
173 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
174 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | ||
175 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
176 | set_float_exception_flags(0, &env->vfp.standard_fp_status); | ||
177 | - set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); | ||
178 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
179 | set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
180 | set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
181 | } | ||
182 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
183 | bool ftz_enabled = val & FPCR_FZ16; | ||
184 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
185 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
186 | - set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | ||
187 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
188 | set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
189 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
190 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
191 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | ||
192 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
193 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
194 | } | ||
195 | if (changed & FPCR_FZ) { | ||
68 | -- | 196 | -- |
69 | 2.34.1 | 197 | 2.34.1 |
70 | 198 | ||
71 | 199 | diff view generated by jsdifflib |
1 | From: Guenter Roeck <linux@roeck-us.net> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Allwinner R40 supports an AHCI compliant SATA controller. | 3 | Replace with fp_status[FPST_STD]. |
4 | Add support for it. | 4 | |
5 | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | |
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
7 | Signed-off-by: Guenter Roeck <linux@roeck-us.net> | 7 | Message-id: 20250129013857.135256-9-richard.henderson@linaro.org |
8 | Message-id: 20240115182757.1095012-3-linux@roeck-us.net | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | --- | 9 | --- |
11 | docs/system/arm/bananapi_m2u.rst | 1 + | 10 | target/arm/cpu.h | 1 - |
12 | include/hw/arm/allwinner-r40.h | 3 +++ | 11 | target/arm/cpu.c | 8 ++++---- |
13 | hw/arm/allwinner-r40.c | 12 +++++++++++- | 12 | target/arm/tcg/mve_helper.c | 28 ++++++++++++++-------------- |
14 | hw/arm/Kconfig | 1 + | 13 | target/arm/tcg/vec_helper.c | 4 ++-- |
15 | 4 files changed, 16 insertions(+), 1 deletion(-) | 14 | target/arm/vfp_helper.c | 4 ++-- |
16 | 15 | 5 files changed, 22 insertions(+), 23 deletions(-) | |
17 | diff --git a/docs/system/arm/bananapi_m2u.rst b/docs/system/arm/bananapi_m2u.rst | 16 | |
18 | index XXXXXXX..XXXXXXX 100644 | 17 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
19 | --- a/docs/system/arm/bananapi_m2u.rst | 18 | index XXXXXXX..XXXXXXX 100644 |
20 | +++ b/docs/system/arm/bananapi_m2u.rst | 19 | --- a/target/arm/cpu.h |
21 | @@ -XXX,XX +XXX,XX @@ The Banana Pi M2U machine supports the following devices: | 20 | +++ b/target/arm/cpu.h |
22 | * EMAC ethernet | 21 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
23 | * GMAC ethernet | 22 | float_status fp_status_f16_a64; |
24 | * Clock Control Unit | 23 | float_status ah_fp_status; |
25 | + * SATA | 24 | float_status ah_fp_status_f16; |
26 | * TWI (I2C) | 25 | - float_status standard_fp_status; |
27 | * USB 2.0 | 26 | }; |
28 | 27 | }; | |
29 | diff --git a/include/hw/arm/allwinner-r40.h b/include/hw/arm/allwinner-r40.h | 28 | |
30 | index XXXXXXX..XXXXXXX 100644 | 29 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
31 | --- a/include/hw/arm/allwinner-r40.h | 30 | index XXXXXXX..XXXXXXX 100644 |
32 | +++ b/include/hw/arm/allwinner-r40.h | 31 | --- a/target/arm/cpu.c |
33 | @@ -XXX,XX +XXX,XX @@ | 32 | +++ b/target/arm/cpu.c |
34 | 33 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | |
35 | #include "qom/object.h" | 34 | env->sau.ctrl = 0; |
36 | #include "hw/timer/allwinner-a10-pit.h" | ||
37 | +#include "hw/ide/ahci.h" | ||
38 | #include "hw/intc/arm_gic.h" | ||
39 | #include "hw/sd/allwinner-sdhost.h" | ||
40 | #include "hw/misc/allwinner-r40-ccu.h" | ||
41 | @@ -XXX,XX +XXX,XX @@ enum { | ||
42 | AW_R40_DEV_MMC1, | ||
43 | AW_R40_DEV_MMC2, | ||
44 | AW_R40_DEV_MMC3, | ||
45 | + AW_R40_DEV_AHCI, | ||
46 | AW_R40_DEV_EHCI1, | ||
47 | AW_R40_DEV_OHCI1, | ||
48 | AW_R40_DEV_EHCI2, | ||
49 | @@ -XXX,XX +XXX,XX @@ struct AwR40State { | ||
50 | const hwaddr *memmap; | ||
51 | AwSRAMCState sramc; | ||
52 | AwA10PITState timer; | ||
53 | + AllwinnerAHCIState sata; | ||
54 | AwSdHostState mmc[AW_R40_NUM_MMCS]; | ||
55 | EHCISysBusState ehci[AW_R40_NUM_USB]; | ||
56 | OHCISysBusState ohci[AW_R40_NUM_USB]; | ||
57 | diff --git a/hw/arm/allwinner-r40.c b/hw/arm/allwinner-r40.c | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/hw/arm/allwinner-r40.c | ||
60 | +++ b/hw/arm/allwinner-r40.c | ||
61 | @@ -XXX,XX +XXX,XX @@ const hwaddr allwinner_r40_memmap[] = { | ||
62 | [AW_R40_DEV_MMC1] = 0x01c10000, | ||
63 | [AW_R40_DEV_MMC2] = 0x01c11000, | ||
64 | [AW_R40_DEV_MMC3] = 0x01c12000, | ||
65 | + [AW_R40_DEV_AHCI] = 0x01c18000, | ||
66 | [AW_R40_DEV_EHCI1] = 0x01c19000, | ||
67 | [AW_R40_DEV_OHCI1] = 0x01c19400, | ||
68 | [AW_R40_DEV_EHCI2] = 0x01c1c000, | ||
69 | @@ -XXX,XX +XXX,XX @@ static struct AwR40Unimplemented r40_unimplemented[] = { | ||
70 | { "usb0-host", 0x01c14000, 4 * KiB }, | ||
71 | { "crypto", 0x01c15000, 4 * KiB }, | ||
72 | { "spi2", 0x01c17000, 4 * KiB }, | ||
73 | - { "sata", 0x01c18000, 4 * KiB }, | ||
74 | { "usb1-phy", 0x01c19800, 2 * KiB }, | ||
75 | { "sid", 0x01c1b000, 4 * KiB }, | ||
76 | { "usb2-phy", 0x01c1c800, 2 * KiB }, | ||
77 | @@ -XXX,XX +XXX,XX @@ enum { | ||
78 | AW_R40_GIC_SPI_MMC2 = 34, | ||
79 | AW_R40_GIC_SPI_MMC3 = 35, | ||
80 | AW_R40_GIC_SPI_EMAC = 55, | ||
81 | + AW_R40_GIC_SPI_AHCI = 56, | ||
82 | AW_R40_GIC_SPI_OHCI1 = 64, | ||
83 | AW_R40_GIC_SPI_OHCI2 = 65, | ||
84 | AW_R40_GIC_SPI_EHCI1 = 76, | ||
85 | @@ -XXX,XX +XXX,XX @@ static void allwinner_r40_init(Object *obj) | ||
86 | TYPE_AW_SDHOST_SUN50I_A64); | ||
87 | } | 35 | } |
88 | 36 | ||
89 | + object_initialize_child(obj, "sata", &s->sata, TYPE_ALLWINNER_AHCI); | 37 | - set_flush_to_zero(1, &env->vfp.standard_fp_status); |
90 | + | 38 | - set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); |
91 | for (size_t i = 0; i < AW_R40_NUM_USB; i++) { | 39 | - set_default_nan_mode(1, &env->vfp.standard_fp_status); |
92 | object_initialize_child(obj, "ehci[*]", &s->ehci[i], | 40 | + set_flush_to_zero(1, &env->vfp.fp_status[FPST_STD]); |
93 | TYPE_PLATFORM_EHCI); | 41 | + set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]); |
94 | @@ -XXX,XX +XXX,XX @@ static void allwinner_r40_realize(DeviceState *dev, Error **errp) | 42 | + set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); |
95 | sysbus_realize(SYS_BUS_DEVICE(&s->ccu), &error_fatal); | 43 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); |
96 | sysbus_mmio_map(SYS_BUS_DEVICE(&s->ccu), 0, s->memmap[AW_R40_DEV_CCU]); | 44 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); |
97 | 45 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | |
98 | + /* SATA / AHCI */ | 46 | - arm_set_default_fp_behaviours(&env->vfp.standard_fp_status); |
99 | + sysbus_realize(SYS_BUS_DEVICE(&s->sata), &error_fatal); | 47 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); |
100 | + sysbus_mmio_map(SYS_BUS_DEVICE(&s->sata), 0, | 48 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); |
101 | + allwinner_r40_memmap[AW_R40_DEV_AHCI]); | 49 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); |
102 | + sysbus_connect_irq(SYS_BUS_DEVICE(&s->sata), 0, | 50 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); |
103 | + qdev_get_gpio_in(DEVICE(&s->gic), AW_R40_GIC_SPI_AHCI)); | 51 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c |
104 | + | 52 | index XXXXXXX..XXXXXXX 100644 |
105 | /* USB */ | 53 | --- a/target/arm/tcg/mve_helper.c |
106 | for (size_t i = 0; i < AW_R40_NUM_USB; i++) { | 54 | +++ b/target/arm/tcg/mve_helper.c |
107 | g_autofree char *bus = g_strdup_printf("usb-bus.%zu", i); | 55 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) |
108 | diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig | 56 | continue; \ |
109 | index XXXXXXX..XXXXXXX 100644 | 57 | } \ |
110 | --- a/hw/arm/Kconfig | 58 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
111 | +++ b/hw/arm/Kconfig | 59 | - &env->vfp.standard_fp_status; \ |
112 | @@ -XXX,XX +XXX,XX @@ config ALLWINNER_H3 | 60 | + &env->vfp.fp_status[FPST_STD]; \ |
113 | config ALLWINNER_R40 | 61 | if (!(mask & 1)) { \ |
114 | bool | 62 | /* We need the result but without updating flags */ \ |
115 | default y if TCG && ARM | 63 | scratch_fpst = *fpst; \ |
116 | + select AHCI | 64 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) |
117 | select ALLWINNER_SRAMC | 65 | continue; \ |
118 | select ALLWINNER_A10_PIT | 66 | } \ |
119 | select AXP2XX_PMU | 67 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
68 | - &env->vfp.standard_fp_status; \ | ||
69 | + &env->vfp.fp_status[FPST_STD]; \ | ||
70 | if (!(tm & 1)) { \ | ||
71 | /* We need the result but without updating flags */ \ | ||
72 | scratch_fpst = *fpst; \ | ||
73 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) | ||
74 | continue; \ | ||
75 | } \ | ||
76 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
77 | - &env->vfp.standard_fp_status; \ | ||
78 | + &env->vfp.fp_status[FPST_STD]; \ | ||
79 | if (!(mask & 1)) { \ | ||
80 | /* We need the result but without updating flags */ \ | ||
81 | scratch_fpst = *fpst; \ | ||
82 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) | ||
83 | continue; \ | ||
84 | } \ | ||
85 | fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
86 | - &env->vfp.standard_fp_status; \ | ||
87 | + &env->vfp.fp_status[FPST_STD]; \ | ||
88 | fpst1 = fpst0; \ | ||
89 | if (!(mask & 1)) { \ | ||
90 | scratch_fpst = *fpst0; \ | ||
91 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) | ||
92 | continue; \ | ||
93 | } \ | ||
94 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
95 | - &env->vfp.standard_fp_status; \ | ||
96 | + &env->vfp.fp_status[FPST_STD]; \ | ||
97 | if (!(mask & 1)) { \ | ||
98 | /* We need the result but without updating flags */ \ | ||
99 | scratch_fpst = *fpst; \ | ||
100 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) | ||
101 | continue; \ | ||
102 | } \ | ||
103 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
104 | - &env->vfp.standard_fp_status; \ | ||
105 | + &env->vfp.fp_status[FPST_STD]; \ | ||
106 | if (!(mask & 1)) { \ | ||
107 | /* We need the result but without updating flags */ \ | ||
108 | scratch_fpst = *fpst; \ | ||
109 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) | ||
110 | TYPE ra = (TYPE)ra_in; \ | ||
111 | float_status *fpst = (ESIZE == 2) ? \ | ||
112 | &env->vfp.fp_status[FPST_STD_F16] : \ | ||
113 | - &env->vfp.standard_fp_status; \ | ||
114 | + &env->vfp.fp_status[FPST_STD]; \ | ||
115 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
116 | if (mask & 1) { \ | ||
117 | TYPE v = m[H##ESIZE(e)]; \ | ||
118 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
119 | continue; \ | ||
120 | } \ | ||
121 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
122 | - &env->vfp.standard_fp_status; \ | ||
123 | + &env->vfp.fp_status[FPST_STD]; \ | ||
124 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
125 | /* We need the result but without updating flags */ \ | ||
126 | scratch_fpst = *fpst; \ | ||
127 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
128 | continue; \ | ||
129 | } \ | ||
130 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
131 | - &env->vfp.standard_fp_status; \ | ||
132 | + &env->vfp.fp_status[FPST_STD]; \ | ||
133 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
134 | /* We need the result but without updating flags */ \ | ||
135 | scratch_fpst = *fpst; \ | ||
136 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) | ||
137 | continue; \ | ||
138 | } \ | ||
139 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
140 | - &env->vfp.standard_fp_status; \ | ||
141 | + &env->vfp.fp_status[FPST_STD]; \ | ||
142 | if (!(mask & 1)) { \ | ||
143 | /* We need the result but without updating flags */ \ | ||
144 | scratch_fpst = *fpst; \ | ||
145 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) | ||
146 | float_status scratch_fpst; \ | ||
147 | float_status *base_fpst = (ESIZE == 2) ? \ | ||
148 | &env->vfp.fp_status[FPST_STD_F16] : \ | ||
149 | - &env->vfp.standard_fp_status; \ | ||
150 | + &env->vfp.fp_status[FPST_STD]; \ | ||
151 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ | ||
152 | set_float_rounding_mode(rmode, base_fpst); \ | ||
153 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
154 | @@ -XXX,XX +XXX,XX @@ static void do_vcvt_sh(CPUARMState *env, void *vd, void *vm, int top) | ||
155 | unsigned e; | ||
156 | float_status *fpst; | ||
157 | float_status scratch_fpst; | ||
158 | - float_status *base_fpst = &env->vfp.standard_fp_status; | ||
159 | + float_status *base_fpst = &env->vfp.fp_status[FPST_STD]; | ||
160 | bool old_fz = get_flush_to_zero(base_fpst); | ||
161 | set_flush_to_zero(false, base_fpst); | ||
162 | for (e = 0; e < 16 / 4; e++, mask >>= 4) { | ||
163 | @@ -XXX,XX +XXX,XX @@ static void do_vcvt_hs(CPUARMState *env, void *vd, void *vm, int top) | ||
164 | unsigned e; | ||
165 | float_status *fpst; | ||
166 | float_status scratch_fpst; | ||
167 | - float_status *base_fpst = &env->vfp.standard_fp_status; | ||
168 | + float_status *base_fpst = &env->vfp.fp_status[FPST_STD]; | ||
169 | bool old_fiz = get_flush_inputs_to_zero(base_fpst); | ||
170 | set_flush_inputs_to_zero(false, base_fpst); | ||
171 | for (e = 0; e < 16 / 4; e++, mask >>= 4) { | ||
172 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) | ||
173 | continue; \ | ||
174 | } \ | ||
175 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
176 | - &env->vfp.standard_fp_status; \ | ||
177 | + &env->vfp.fp_status[FPST_STD]; \ | ||
178 | if (!(mask & 1)) { \ | ||
179 | /* We need the result but without updating flags */ \ | ||
180 | scratch_fpst = *fpst; \ | ||
181 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
182 | index XXXXXXX..XXXXXXX 100644 | ||
183 | --- a/target/arm/tcg/vec_helper.c | ||
184 | +++ b/target/arm/tcg/vec_helper.c | ||
185 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
186 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
187 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
188 | |||
189 | - do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, | ||
190 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
191 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
192 | } | ||
193 | |||
194 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | ||
195 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
196 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
197 | |||
198 | - do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, | ||
199 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
200 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
201 | } | ||
202 | |||
203 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
204 | index XXXXXXX..XXXXXXX 100644 | ||
205 | --- a/target/arm/vfp_helper.c | ||
206 | +++ b/target/arm/vfp_helper.c | ||
207 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
208 | uint32_t a32_flags = 0, a64_flags = 0; | ||
209 | |||
210 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
211 | - a32_flags |= get_float_exception_flags(&env->vfp.standard_fp_status); | ||
212 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); | ||
213 | /* FZ16 does not generate an input denormal exception. */ | ||
214 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
215 | & ~float_flag_input_denormal_flushed); | ||
216 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
217 | set_float_exception_flags(0, &env->vfp.fp_status_a64); | ||
218 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | ||
219 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
220 | - set_float_exception_flags(0, &env->vfp.standard_fp_status); | ||
221 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
222 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
223 | set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
224 | set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
120 | -- | 225 | -- |
121 | 2.34.1 | 226 | 2.34.1 |
122 | 227 | ||
123 | 228 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Move Arm A-class Generic Timer definitions to the new | 3 | Replace with fp_status[FPST_AH_F16]. |
4 | "target/arm/gtimer.h" header so units in hw/ which don't | ||
5 | need access to ARMCPU internals can use them without | ||
6 | having to include the huge "cpu.h". | ||
7 | 4 | ||
8 | Suggested-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
9 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Message-id: 20250129013857.135256-10-richard.henderson@linaro.org |
11 | Message-id: 20240118200643.29037-20-philmd@linaro.org | ||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
13 | --- | 9 | --- |
14 | target/arm/cpu.h | 8 +------- | 10 | target/arm/cpu.h | 3 +-- |
15 | target/arm/gtimer.h | 21 +++++++++++++++++++++ | 11 | target/arm/cpu.c | 2 +- |
16 | hw/arm/allwinner-h3.c | 1 + | 12 | target/arm/vfp_helper.c | 10 +++++----- |
17 | hw/arm/allwinner-r40.c | 1 + | 13 | 3 files changed, 7 insertions(+), 8 deletions(-) |
18 | hw/arm/bcm2836.c | 1 + | ||
19 | hw/arm/sbsa-ref.c | 1 + | ||
20 | hw/arm/virt.c | 1 + | ||
21 | hw/arm/xlnx-versal.c | 1 + | ||
22 | hw/arm/xlnx-zynqmp.c | 1 + | ||
23 | hw/cpu/a15mpcore.c | 1 + | ||
24 | target/arm/cpu.c | 1 + | ||
25 | target/arm/helper.c | 1 + | ||
26 | target/arm/hvf/hvf.c | 1 + | ||
27 | target/arm/kvm.c | 1 + | ||
28 | target/arm/machine.c | 1 + | ||
29 | 15 files changed, 35 insertions(+), 7 deletions(-) | ||
30 | create mode 100644 target/arm/gtimer.h | ||
31 | 14 | ||
32 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 15 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
33 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
34 | --- a/target/arm/cpu.h | 17 | --- a/target/arm/cpu.h |
35 | +++ b/target/arm/cpu.h | 18 | +++ b/target/arm/cpu.h |
36 | @@ -XXX,XX +XXX,XX @@ | 19 | @@ -XXX,XX +XXX,XX @@ typedef struct NVICState NVICState; |
37 | #include "exec/cpu-defs.h" | 20 | * behaviour when FPCR.AH == 1: they don't update cumulative |
38 | #include "qapi/qapi-types-common.h" | 21 | * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and |
39 | #include "target/arm/multiprocessing.h" | 22 | * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, |
40 | +#include "target/arm/gtimer.h" | 23 | - * which means we need an ah_fp_status_f16 as well. |
41 | 24 | + * which means we need an FPST_AH_F16 as well. | |
42 | /* ARM processors have a weak memory model */ | 25 | * |
43 | #define TCG_GUEST_DEFAULT_MO (0) | 26 | * To avoid having to transfer exception bits around, we simply |
44 | @@ -XXX,XX +XXX,XX @@ typedef struct ARMGenericTimer { | 27 | * say that the FPSCR cumulative exception flags are the logical |
45 | uint64_t ctl; /* Timer Control register */ | 28 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
46 | } ARMGenericTimer; | 29 | float_status fp_status_f16_a32; |
47 | 30 | float_status fp_status_f16_a64; | |
48 | -#define GTIMER_PHYS 0 | 31 | float_status ah_fp_status; |
49 | -#define GTIMER_VIRT 1 | 32 | - float_status ah_fp_status_f16; |
50 | -#define GTIMER_HYP 2 | 33 | }; |
51 | -#define GTIMER_SEC 3 | 34 | }; |
52 | -#define GTIMER_HYPVIRT 4 | 35 | |
53 | -#define NUM_GTIMERS 5 | ||
54 | - | ||
55 | #define VTCR_NSW (1u << 29) | ||
56 | #define VTCR_NSA (1u << 30) | ||
57 | #define VSTCR_SW VTCR_NSW | ||
58 | diff --git a/target/arm/gtimer.h b/target/arm/gtimer.h | ||
59 | new file mode 100644 | ||
60 | index XXXXXXX..XXXXXXX | ||
61 | --- /dev/null | ||
62 | +++ b/target/arm/gtimer.h | ||
63 | @@ -XXX,XX +XXX,XX @@ | ||
64 | +/* | ||
65 | + * ARM generic timer definitions for Arm A-class CPU | ||
66 | + * | ||
67 | + * Copyright (c) 2003 Fabrice Bellard | ||
68 | + * | ||
69 | + * SPDX-License-Identifier: LGPL-2.1-or-later | ||
70 | + */ | ||
71 | + | ||
72 | +#ifndef TARGET_ARM_GTIMER_H | ||
73 | +#define TARGET_ARM_GTIMER_H | ||
74 | + | ||
75 | +enum { | ||
76 | + GTIMER_PHYS = 0, | ||
77 | + GTIMER_VIRT = 1, | ||
78 | + GTIMER_HYP = 2, | ||
79 | + GTIMER_SEC = 3, | ||
80 | + GTIMER_HYPVIRT = 4, | ||
81 | +#define NUM_GTIMERS 5 | ||
82 | +}; | ||
83 | + | ||
84 | +#endif | ||
85 | diff --git a/hw/arm/allwinner-h3.c b/hw/arm/allwinner-h3.c | ||
86 | index XXXXXXX..XXXXXXX 100644 | ||
87 | --- a/hw/arm/allwinner-h3.c | ||
88 | +++ b/hw/arm/allwinner-h3.c | ||
89 | @@ -XXX,XX +XXX,XX @@ | ||
90 | #include "sysemu/sysemu.h" | ||
91 | #include "hw/arm/allwinner-h3.h" | ||
92 | #include "target/arm/cpu-qom.h" | ||
93 | +#include "target/arm/gtimer.h" | ||
94 | |||
95 | /* Memory map */ | ||
96 | const hwaddr allwinner_h3_memmap[] = { | ||
97 | diff --git a/hw/arm/allwinner-r40.c b/hw/arm/allwinner-r40.c | ||
98 | index XXXXXXX..XXXXXXX 100644 | ||
99 | --- a/hw/arm/allwinner-r40.c | ||
100 | +++ b/hw/arm/allwinner-r40.c | ||
101 | @@ -XXX,XX +XXX,XX @@ | ||
102 | #include "hw/arm/allwinner-r40.h" | ||
103 | #include "hw/misc/allwinner-r40-dramc.h" | ||
104 | #include "target/arm/cpu-qom.h" | ||
105 | +#include "target/arm/gtimer.h" | ||
106 | |||
107 | /* Memory map */ | ||
108 | const hwaddr allwinner_r40_memmap[] = { | ||
109 | diff --git a/hw/arm/bcm2836.c b/hw/arm/bcm2836.c | ||
110 | index XXXXXXX..XXXXXXX 100644 | ||
111 | --- a/hw/arm/bcm2836.c | ||
112 | +++ b/hw/arm/bcm2836.c | ||
113 | @@ -XXX,XX +XXX,XX @@ | ||
114 | #include "hw/arm/raspi_platform.h" | ||
115 | #include "hw/sysbus.h" | ||
116 | #include "target/arm/cpu-qom.h" | ||
117 | +#include "target/arm/gtimer.h" | ||
118 | |||
119 | struct BCM283XClass { | ||
120 | /*< private >*/ | ||
121 | diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c | ||
122 | index XXXXXXX..XXXXXXX 100644 | ||
123 | --- a/hw/arm/sbsa-ref.c | ||
124 | +++ b/hw/arm/sbsa-ref.c | ||
125 | @@ -XXX,XX +XXX,XX @@ | ||
126 | #include "qapi/qmp/qlist.h" | ||
127 | #include "qom/object.h" | ||
128 | #include "target/arm/cpu-qom.h" | ||
129 | +#include "target/arm/gtimer.h" | ||
130 | |||
131 | #define RAMLIMIT_GB 8192 | ||
132 | #define RAMLIMIT_BYTES (RAMLIMIT_GB * GiB) | ||
133 | diff --git a/hw/arm/virt.c b/hw/arm/virt.c | ||
134 | index XXXXXXX..XXXXXXX 100644 | ||
135 | --- a/hw/arm/virt.c | ||
136 | +++ b/hw/arm/virt.c | ||
137 | @@ -XXX,XX +XXX,XX @@ | ||
138 | #include "target/arm/cpu-qom.h" | ||
139 | #include "target/arm/internals.h" | ||
140 | #include "target/arm/multiprocessing.h" | ||
141 | +#include "target/arm/gtimer.h" | ||
142 | #include "hw/mem/pc-dimm.h" | ||
143 | #include "hw/mem/nvdimm.h" | ||
144 | #include "hw/acpi/generic_event_device.h" | ||
145 | diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c | ||
146 | index XXXXXXX..XXXXXXX 100644 | ||
147 | --- a/hw/arm/xlnx-versal.c | ||
148 | +++ b/hw/arm/xlnx-versal.c | ||
149 | @@ -XXX,XX +XXX,XX @@ | ||
150 | #include "hw/arm/xlnx-versal.h" | ||
151 | #include "qemu/log.h" | ||
152 | #include "target/arm/cpu-qom.h" | ||
153 | +#include "target/arm/gtimer.h" | ||
154 | |||
155 | #define XLNX_VERSAL_ACPU_TYPE ARM_CPU_TYPE_NAME("cortex-a72") | ||
156 | #define XLNX_VERSAL_RCPU_TYPE ARM_CPU_TYPE_NAME("cortex-r5f") | ||
157 | diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c | ||
158 | index XXXXXXX..XXXXXXX 100644 | ||
159 | --- a/hw/arm/xlnx-zynqmp.c | ||
160 | +++ b/hw/arm/xlnx-zynqmp.c | ||
161 | @@ -XXX,XX +XXX,XX @@ | ||
162 | #include "sysemu/sysemu.h" | ||
163 | #include "kvm_arm.h" | ||
164 | #include "target/arm/cpu-qom.h" | ||
165 | +#include "target/arm/gtimer.h" | ||
166 | |||
167 | #define GIC_NUM_SPI_INTR 160 | ||
168 | |||
169 | diff --git a/hw/cpu/a15mpcore.c b/hw/cpu/a15mpcore.c | ||
170 | index XXXXXXX..XXXXXXX 100644 | ||
171 | --- a/hw/cpu/a15mpcore.c | ||
172 | +++ b/hw/cpu/a15mpcore.c | ||
173 | @@ -XXX,XX +XXX,XX @@ | ||
174 | #include "hw/qdev-properties.h" | ||
175 | #include "sysemu/kvm.h" | ||
176 | #include "kvm_arm.h" | ||
177 | +#include "target/arm/gtimer.h" | ||
178 | |||
179 | static void a15mp_priv_set_irq(void *opaque, int irq, int level) | ||
180 | { | ||
181 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | 36 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
182 | index XXXXXXX..XXXXXXX 100644 | 37 | index XXXXXXX..XXXXXXX 100644 |
183 | --- a/target/arm/cpu.c | 38 | --- a/target/arm/cpu.c |
184 | +++ b/target/arm/cpu.c | 39 | +++ b/target/arm/cpu.c |
185 | @@ -XXX,XX +XXX,XX @@ | 40 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
186 | #include "fpu/softfloat.h" | 41 | arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); |
187 | #include "cpregs.h" | 42 | set_flush_to_zero(1, &env->vfp.ah_fp_status); |
188 | #include "target/arm/cpu-qom.h" | 43 | set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); |
189 | +#include "target/arm/gtimer.h" | 44 | - arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status_f16); |
190 | 45 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]); | |
191 | static void arm_cpu_set_pc(CPUState *cs, vaddr value) | 46 | |
192 | { | 47 | #ifndef CONFIG_USER_ONLY |
193 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 48 | if (kvm_enabled()) { |
49 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
194 | index XXXXXXX..XXXXXXX 100644 | 50 | index XXXXXXX..XXXXXXX 100644 |
195 | --- a/target/arm/helper.c | 51 | --- a/target/arm/vfp_helper.c |
196 | +++ b/target/arm/helper.c | 52 | +++ b/target/arm/vfp_helper.c |
197 | @@ -XXX,XX +XXX,XX @@ | 53 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) |
198 | #include "semihosting/common-semi.h" | 54 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) |
199 | #endif | 55 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); |
200 | #include "cpregs.h" | 56 | /* |
201 | +#include "target/arm/gtimer.h" | 57 | - * We do not merge in flags from ah_fp_status or ah_fp_status_f16, because |
202 | 58 | + * We do not merge in flags from ah_fp_status or FPST_AH_F16, because | |
203 | #define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */ | 59 | * they are used for insns that must not set the cumulative exception bits. |
204 | 60 | */ | |
205 | diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c | 61 | |
206 | index XXXXXXX..XXXXXXX 100644 | 62 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) |
207 | --- a/target/arm/hvf/hvf.c | 63 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); |
208 | +++ b/target/arm/hvf/hvf.c | 64 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); |
209 | @@ -XXX,XX +XXX,XX @@ | 65 | set_float_exception_flags(0, &env->vfp.ah_fp_status); |
210 | #include "target/arm/cpu.h" | 66 | - set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); |
211 | #include "target/arm/internals.h" | 67 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]); |
212 | #include "target/arm/multiprocessing.h" | 68 | } |
213 | +#include "target/arm/gtimer.h" | 69 | |
214 | #include "trace/trace-target_arm_hvf.h" | 70 | static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) |
215 | #include "migration/vmstate.h" | 71 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
216 | 72 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | |
217 | diff --git a/target/arm/kvm.c b/target/arm/kvm.c | 73 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); |
218 | index XXXXXXX..XXXXXXX 100644 | 74 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); |
219 | --- a/target/arm/kvm.c | 75 | - set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); |
220 | +++ b/target/arm/kvm.c | 76 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
221 | @@ -XXX,XX +XXX,XX @@ | 77 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
222 | #include "qemu/log.h" | 78 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); |
223 | #include "hw/acpi/acpi.h" | 79 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); |
224 | #include "hw/acpi/ghes.h" | 80 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); |
225 | +#include "target/arm/gtimer.h" | 81 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
226 | 82 | } | |
227 | const KVMCapabilityInfo kvm_arch_required_capabilities[] = { | 83 | if (changed & FPCR_FZ) { |
228 | KVM_CAP_LAST_INFO | 84 | bool ftz_enabled = val & FPCR_FZ; |
229 | diff --git a/target/arm/machine.c b/target/arm/machine.c | 85 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
230 | index XXXXXXX..XXXXXXX 100644 | 86 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); |
231 | --- a/target/arm/machine.c | 87 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); |
232 | +++ b/target/arm/machine.c | 88 | set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); |
233 | @@ -XXX,XX +XXX,XX @@ | 89 | - set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status_f16); |
234 | #include "internals.h" | 90 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
235 | #include "cpu-features.h" | 91 | } |
236 | #include "migration/cpu.h" | 92 | if (changed & FPCR_AH) { |
237 | +#include "target/arm/gtimer.h" | 93 | bool ah_enabled = val & FPCR_AH; |
238 | |||
239 | static bool vfp_needed(void *opaque) | ||
240 | { | ||
241 | -- | 94 | -- |
242 | 2.34.1 | 95 | 2.34.1 |
243 | 96 | ||
244 | 97 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | The ARM_CPU_IRQ/FIQ definitions are used to index the GPIO | 3 | Replace with fp_status[FPST_AH]. |
4 | IRQ created calling qdev_init_gpio_in() in ARMCPU instance_init() | ||
5 | handler. To allow non-ARM code to raise interrupt on ARM cores, | ||
6 | move they to 'target/arm/cpu-qom.h' which is non-ARM specific and | ||
7 | can be included by any hw/ file. | ||
8 | 4 | ||
9 | File list to include the new header generated using: | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
10 | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | |
11 | $ git grep -wEl 'ARM_CPU_(\w*IRQ|FIQ)' | 7 | Message-id: 20250129013857.135256-11-richard.henderson@linaro.org |
12 | |||
13 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
14 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
15 | Message-id: 20240118200643.29037-18-philmd@linaro.org | ||
16 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
17 | --- | 9 | --- |
18 | target/arm/cpu-qom.h | 6 ++++++ | 10 | target/arm/cpu.h | 3 +-- |
19 | target/arm/cpu.h | 6 ------ | 11 | target/arm/cpu.c | 6 +++--- |
20 | hw/arm/allwinner-a10.c | 1 + | 12 | target/arm/vfp_helper.c | 6 +++--- |
21 | hw/arm/allwinner-h3.c | 1 + | 13 | 3 files changed, 7 insertions(+), 8 deletions(-) |
22 | hw/arm/allwinner-r40.c | 1 + | ||
23 | hw/arm/armv7m.c | 1 + | ||
24 | hw/arm/aspeed_ast2400.c | 1 + | ||
25 | hw/arm/aspeed_ast2600.c | 1 + | ||
26 | hw/arm/bcm2836.c | 1 + | ||
27 | hw/arm/exynos4210.c | 1 + | ||
28 | hw/arm/fsl-imx25.c | 1 + | ||
29 | hw/arm/fsl-imx31.c | 1 + | ||
30 | hw/arm/fsl-imx6.c | 1 + | ||
31 | hw/arm/fsl-imx6ul.c | 1 + | ||
32 | hw/arm/fsl-imx7.c | 1 + | ||
33 | hw/arm/highbank.c | 1 + | ||
34 | hw/arm/integratorcp.c | 1 + | ||
35 | hw/arm/musicpal.c | 1 + | ||
36 | hw/arm/npcm7xx.c | 1 + | ||
37 | hw/arm/omap1.c | 1 + | ||
38 | hw/arm/omap2.c | 1 + | ||
39 | hw/arm/realview.c | 1 + | ||
40 | hw/arm/sbsa-ref.c | 1 + | ||
41 | hw/arm/strongarm.c | 1 + | ||
42 | hw/arm/versatilepb.c | 1 + | ||
43 | hw/arm/vexpress.c | 1 + | ||
44 | hw/arm/virt.c | 1 + | ||
45 | hw/arm/xilinx_zynq.c | 1 + | ||
46 | hw/arm/xlnx-versal.c | 1 + | ||
47 | hw/arm/xlnx-zynqmp.c | 1 + | ||
48 | target/arm/cpu.c | 1 + | ||
49 | 31 files changed, 35 insertions(+), 6 deletions(-) | ||
50 | 14 | ||
51 | diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/target/arm/cpu-qom.h | ||
54 | +++ b/target/arm/cpu-qom.h | ||
55 | @@ -XXX,XX +XXX,XX @@ DECLARE_CLASS_CHECKERS(AArch64CPUClass, AARCH64_CPU, | ||
56 | #define ARM_CPU_TYPE_SUFFIX "-" TYPE_ARM_CPU | ||
57 | #define ARM_CPU_TYPE_NAME(name) (name ARM_CPU_TYPE_SUFFIX) | ||
58 | |||
59 | +/* Meanings of the ARMCPU object's four inbound GPIO lines */ | ||
60 | +#define ARM_CPU_IRQ 0 | ||
61 | +#define ARM_CPU_FIQ 1 | ||
62 | +#define ARM_CPU_VIRQ 2 | ||
63 | +#define ARM_CPU_VFIQ 3 | ||
64 | + | ||
65 | /* For M profile, some registers are banked secure vs non-secure; | ||
66 | * these are represented as a 2-element array where the first element | ||
67 | * is the non-secure copy and the second is the secure copy. | ||
68 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 15 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
69 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
70 | --- a/target/arm/cpu.h | 17 | --- a/target/arm/cpu.h |
71 | +++ b/target/arm/cpu.h | 18 | +++ b/target/arm/cpu.h |
72 | @@ -XXX,XX +XXX,XX @@ | 19 | @@ -XXX,XX +XXX,XX @@ typedef struct NVICState NVICState; |
73 | #define offsetofhigh32(S, M) (offsetof(S, M) + sizeof(uint32_t)) | 20 | * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than |
74 | #endif | 21 | * using a fixed value for it. |
75 | 22 | * | |
76 | -/* Meanings of the ARMCPU object's four inbound GPIO lines */ | 23 | - * The ah_fp_status is needed because some insns have different |
77 | -#define ARM_CPU_IRQ 0 | 24 | + * FPST_AH is needed because some insns have different |
78 | -#define ARM_CPU_FIQ 1 | 25 | * behaviour when FPCR.AH == 1: they don't update cumulative |
79 | -#define ARM_CPU_VIRQ 2 | 26 | * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and |
80 | -#define ARM_CPU_VFIQ 3 | 27 | * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, |
81 | - | 28 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
82 | /* ARM-specific extra insn start words: | 29 | float_status fp_status_a64; |
83 | * 1: Conditional execution bits | 30 | float_status fp_status_f16_a32; |
84 | * 2: Partial exception syndrome for data aborts | 31 | float_status fp_status_f16_a64; |
85 | diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c | 32 | - float_status ah_fp_status; |
86 | index XXXXXXX..XXXXXXX 100644 | 33 | }; |
87 | --- a/hw/arm/allwinner-a10.c | 34 | }; |
88 | +++ b/hw/arm/allwinner-a10.c | ||
89 | @@ -XXX,XX +XXX,XX @@ | ||
90 | #include "hw/boards.h" | ||
91 | #include "hw/usb/hcd-ohci.h" | ||
92 | #include "hw/loader.h" | ||
93 | +#include "target/arm/cpu-qom.h" | ||
94 | |||
95 | #define AW_A10_SRAM_A_BASE 0x00000000 | ||
96 | #define AW_A10_DRAMC_BASE 0x01c01000 | ||
97 | diff --git a/hw/arm/allwinner-h3.c b/hw/arm/allwinner-h3.c | ||
98 | index XXXXXXX..XXXXXXX 100644 | ||
99 | --- a/hw/arm/allwinner-h3.c | ||
100 | +++ b/hw/arm/allwinner-h3.c | ||
101 | @@ -XXX,XX +XXX,XX @@ | ||
102 | #include "hw/loader.h" | ||
103 | #include "sysemu/sysemu.h" | ||
104 | #include "hw/arm/allwinner-h3.h" | ||
105 | +#include "target/arm/cpu-qom.h" | ||
106 | |||
107 | /* Memory map */ | ||
108 | const hwaddr allwinner_h3_memmap[] = { | ||
109 | diff --git a/hw/arm/allwinner-r40.c b/hw/arm/allwinner-r40.c | ||
110 | index XXXXXXX..XXXXXXX 100644 | ||
111 | --- a/hw/arm/allwinner-r40.c | ||
112 | +++ b/hw/arm/allwinner-r40.c | ||
113 | @@ -XXX,XX +XXX,XX @@ | ||
114 | #include "sysemu/sysemu.h" | ||
115 | #include "hw/arm/allwinner-r40.h" | ||
116 | #include "hw/misc/allwinner-r40-dramc.h" | ||
117 | +#include "target/arm/cpu-qom.h" | ||
118 | |||
119 | /* Memory map */ | ||
120 | const hwaddr allwinner_r40_memmap[] = { | ||
121 | diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c | ||
122 | index XXXXXXX..XXXXXXX 100644 | ||
123 | --- a/hw/arm/armv7m.c | ||
124 | +++ b/hw/arm/armv7m.c | ||
125 | @@ -XXX,XX +XXX,XX @@ | ||
126 | #include "target/arm/idau.h" | ||
127 | #include "target/arm/cpu.h" | ||
128 | #include "target/arm/cpu-features.h" | ||
129 | +#include "target/arm/cpu-qom.h" | ||
130 | #include "migration/vmstate.h" | ||
131 | |||
132 | /* Bitbanded IO. Each word corresponds to a single bit. */ | ||
133 | diff --git a/hw/arm/aspeed_ast2400.c b/hw/arm/aspeed_ast2400.c | ||
134 | index XXXXXXX..XXXXXXX 100644 | ||
135 | --- a/hw/arm/aspeed_ast2400.c | ||
136 | +++ b/hw/arm/aspeed_ast2400.c | ||
137 | @@ -XXX,XX +XXX,XX @@ | ||
138 | #include "hw/i2c/aspeed_i2c.h" | ||
139 | #include "net/net.h" | ||
140 | #include "sysemu/sysemu.h" | ||
141 | +#include "target/arm/cpu-qom.h" | ||
142 | |||
143 | #define ASPEED_SOC_IOMEM_SIZE 0x00200000 | ||
144 | |||
145 | diff --git a/hw/arm/aspeed_ast2600.c b/hw/arm/aspeed_ast2600.c | ||
146 | index XXXXXXX..XXXXXXX 100644 | ||
147 | --- a/hw/arm/aspeed_ast2600.c | ||
148 | +++ b/hw/arm/aspeed_ast2600.c | ||
149 | @@ -XXX,XX +XXX,XX @@ | ||
150 | #include "hw/i2c/aspeed_i2c.h" | ||
151 | #include "net/net.h" | ||
152 | #include "sysemu/sysemu.h" | ||
153 | +#include "target/arm/cpu-qom.h" | ||
154 | |||
155 | #define ASPEED_SOC_IOMEM_SIZE 0x00200000 | ||
156 | #define ASPEED_SOC_DPMCU_SIZE 0x00040000 | ||
157 | diff --git a/hw/arm/bcm2836.c b/hw/arm/bcm2836.c | ||
158 | index XXXXXXX..XXXXXXX 100644 | ||
159 | --- a/hw/arm/bcm2836.c | ||
160 | +++ b/hw/arm/bcm2836.c | ||
161 | @@ -XXX,XX +XXX,XX @@ | ||
162 | #include "hw/arm/bcm2836.h" | ||
163 | #include "hw/arm/raspi_platform.h" | ||
164 | #include "hw/sysbus.h" | ||
165 | +#include "target/arm/cpu-qom.h" | ||
166 | |||
167 | struct BCM283XClass { | ||
168 | /*< private >*/ | ||
169 | diff --git a/hw/arm/exynos4210.c b/hw/arm/exynos4210.c | ||
170 | index XXXXXXX..XXXXXXX 100644 | ||
171 | --- a/hw/arm/exynos4210.c | ||
172 | +++ b/hw/arm/exynos4210.c | ||
173 | @@ -XXX,XX +XXX,XX @@ | ||
174 | #include "hw/arm/exynos4210.h" | ||
175 | #include "hw/sd/sdhci.h" | ||
176 | #include "hw/usb/hcd-ehci.h" | ||
177 | +#include "target/arm/cpu-qom.h" | ||
178 | |||
179 | #define EXYNOS4210_CHIPID_ADDR 0x10000000 | ||
180 | |||
181 | diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c | ||
182 | index XXXXXXX..XXXXXXX 100644 | ||
183 | --- a/hw/arm/fsl-imx25.c | ||
184 | +++ b/hw/arm/fsl-imx25.c | ||
185 | @@ -XXX,XX +XXX,XX @@ | ||
186 | #include "sysemu/sysemu.h" | ||
187 | #include "hw/qdev-properties.h" | ||
188 | #include "chardev/char.h" | ||
189 | +#include "target/arm/cpu-qom.h" | ||
190 | |||
191 | #define IMX25_ESDHC_CAPABILITIES 0x07e20000 | ||
192 | |||
193 | diff --git a/hw/arm/fsl-imx31.c b/hw/arm/fsl-imx31.c | ||
194 | index XXXXXXX..XXXXXXX 100644 | ||
195 | --- a/hw/arm/fsl-imx31.c | ||
196 | +++ b/hw/arm/fsl-imx31.c | ||
197 | @@ -XXX,XX +XXX,XX @@ | ||
198 | #include "exec/address-spaces.h" | ||
199 | #include "hw/qdev-properties.h" | ||
200 | #include "chardev/char.h" | ||
201 | +#include "target/arm/cpu-qom.h" | ||
202 | |||
203 | static void fsl_imx31_init(Object *obj) | ||
204 | { | ||
205 | diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c | ||
206 | index XXXXXXX..XXXXXXX 100644 | ||
207 | --- a/hw/arm/fsl-imx6.c | ||
208 | +++ b/hw/arm/fsl-imx6.c | ||
209 | @@ -XXX,XX +XXX,XX @@ | ||
210 | #include "chardev/char.h" | ||
211 | #include "qemu/error-report.h" | ||
212 | #include "qemu/module.h" | ||
213 | +#include "target/arm/cpu-qom.h" | ||
214 | |||
215 | #define IMX6_ESDHC_CAPABILITIES 0x057834b4 | ||
216 | |||
217 | diff --git a/hw/arm/fsl-imx6ul.c b/hw/arm/fsl-imx6ul.c | ||
218 | index XXXXXXX..XXXXXXX 100644 | ||
219 | --- a/hw/arm/fsl-imx6ul.c | ||
220 | +++ b/hw/arm/fsl-imx6ul.c | ||
221 | @@ -XXX,XX +XXX,XX @@ | ||
222 | #include "sysemu/sysemu.h" | ||
223 | #include "qemu/error-report.h" | ||
224 | #include "qemu/module.h" | ||
225 | +#include "target/arm/cpu-qom.h" | ||
226 | |||
227 | #define NAME_SIZE 20 | ||
228 | |||
229 | diff --git a/hw/arm/fsl-imx7.c b/hw/arm/fsl-imx7.c | ||
230 | index XXXXXXX..XXXXXXX 100644 | ||
231 | --- a/hw/arm/fsl-imx7.c | ||
232 | +++ b/hw/arm/fsl-imx7.c | ||
233 | @@ -XXX,XX +XXX,XX @@ | ||
234 | #include "sysemu/sysemu.h" | ||
235 | #include "qemu/error-report.h" | ||
236 | #include "qemu/module.h" | ||
237 | +#include "target/arm/cpu-qom.h" | ||
238 | |||
239 | #define NAME_SIZE 20 | ||
240 | |||
241 | diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c | ||
242 | index XXXXXXX..XXXXXXX 100644 | ||
243 | --- a/hw/arm/highbank.c | ||
244 | +++ b/hw/arm/highbank.c | ||
245 | @@ -XXX,XX +XXX,XX @@ | ||
246 | #include "qemu/log.h" | ||
247 | #include "qom/object.h" | ||
248 | #include "cpu.h" | ||
249 | +#include "target/arm/cpu-qom.h" | ||
250 | |||
251 | #define SMP_BOOT_ADDR 0x100 | ||
252 | #define SMP_BOOT_REG 0x40 | ||
253 | diff --git a/hw/arm/integratorcp.c b/hw/arm/integratorcp.c | ||
254 | index XXXXXXX..XXXXXXX 100644 | ||
255 | --- a/hw/arm/integratorcp.c | ||
256 | +++ b/hw/arm/integratorcp.c | ||
257 | @@ -XXX,XX +XXX,XX @@ | ||
258 | #include "hw/sd/sd.h" | ||
259 | #include "qom/object.h" | ||
260 | #include "audio/audio.h" | ||
261 | +#include "target/arm/cpu-qom.h" | ||
262 | |||
263 | #define TYPE_INTEGRATOR_CM "integrator_core" | ||
264 | OBJECT_DECLARE_SIMPLE_TYPE(IntegratorCMState, INTEGRATOR_CM) | ||
265 | diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c | ||
266 | index XXXXXXX..XXXXXXX 100644 | ||
267 | --- a/hw/arm/musicpal.c | ||
268 | +++ b/hw/arm/musicpal.c | ||
269 | @@ -XXX,XX +XXX,XX @@ | ||
270 | #include "hw/net/mv88w8618_eth.h" | ||
271 | #include "audio/audio.h" | ||
272 | #include "qemu/error-report.h" | ||
273 | +#include "target/arm/cpu-qom.h" | ||
274 | |||
275 | #define MP_MISC_BASE 0x80002000 | ||
276 | #define MP_MISC_SIZE 0x00001000 | ||
277 | diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c | ||
278 | index XXXXXXX..XXXXXXX 100644 | ||
279 | --- a/hw/arm/npcm7xx.c | ||
280 | +++ b/hw/arm/npcm7xx.c | ||
281 | @@ -XXX,XX +XXX,XX @@ | ||
282 | #include "qapi/error.h" | ||
283 | #include "qemu/units.h" | ||
284 | #include "sysemu/sysemu.h" | ||
285 | +#include "target/arm/cpu-qom.h" | ||
286 | |||
287 | /* | ||
288 | * This covers the whole MMIO space. We'll use this to catch any MMIO accesses | ||
289 | diff --git a/hw/arm/omap1.c b/hw/arm/omap1.c | ||
290 | index XXXXXXX..XXXXXXX 100644 | ||
291 | --- a/hw/arm/omap1.c | ||
292 | +++ b/hw/arm/omap1.c | ||
293 | @@ -XXX,XX +XXX,XX @@ | ||
294 | #include "hw/sysbus.h" | ||
295 | #include "qemu/cutils.h" | ||
296 | #include "qemu/bcd.h" | ||
297 | +#include "target/arm/cpu-qom.h" | ||
298 | |||
299 | static inline void omap_log_badwidth(const char *funcname, hwaddr addr, int sz) | ||
300 | { | ||
301 | diff --git a/hw/arm/omap2.c b/hw/arm/omap2.c | ||
302 | index XXXXXXX..XXXXXXX 100644 | ||
303 | --- a/hw/arm/omap2.c | ||
304 | +++ b/hw/arm/omap2.c | ||
305 | @@ -XXX,XX +XXX,XX @@ | ||
306 | #include "hw/sysbus.h" | ||
307 | #include "hw/boards.h" | ||
308 | #include "audio/audio.h" | ||
309 | +#include "target/arm/cpu-qom.h" | ||
310 | |||
311 | /* Enhanced Audio Controller (CODEC only) */ | ||
312 | struct omap_eac_s { | ||
313 | diff --git a/hw/arm/realview.c b/hw/arm/realview.c | ||
314 | index XXXXXXX..XXXXXXX 100644 | ||
315 | --- a/hw/arm/realview.c | ||
316 | +++ b/hw/arm/realview.c | ||
317 | @@ -XXX,XX +XXX,XX @@ | ||
318 | #include "hw/i2c/arm_sbcon_i2c.h" | ||
319 | #include "hw/sd/sd.h" | ||
320 | #include "audio/audio.h" | ||
321 | +#include "target/arm/cpu-qom.h" | ||
322 | |||
323 | #define SMP_BOOT_ADDR 0xe0000000 | ||
324 | #define SMP_BOOTREG_ADDR 0x10000030 | ||
325 | diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c | ||
326 | index XXXXXXX..XXXXXXX 100644 | ||
327 | --- a/hw/arm/sbsa-ref.c | ||
328 | +++ b/hw/arm/sbsa-ref.c | ||
329 | @@ -XXX,XX +XXX,XX @@ | ||
330 | #include "net/net.h" | ||
331 | #include "qapi/qmp/qlist.h" | ||
332 | #include "qom/object.h" | ||
333 | +#include "target/arm/cpu-qom.h" | ||
334 | |||
335 | #define RAMLIMIT_GB 8192 | ||
336 | #define RAMLIMIT_BYTES (RAMLIMIT_GB * GiB) | ||
337 | diff --git a/hw/arm/strongarm.c b/hw/arm/strongarm.c | ||
338 | index XXXXXXX..XXXXXXX 100644 | ||
339 | --- a/hw/arm/strongarm.c | ||
340 | +++ b/hw/arm/strongarm.c | ||
341 | @@ -XXX,XX +XXX,XX @@ | ||
342 | #include "qemu/cutils.h" | ||
343 | #include "qemu/log.h" | ||
344 | #include "qom/object.h" | ||
345 | +#include "target/arm/cpu-qom.h" | ||
346 | |||
347 | //#define DEBUG | ||
348 | |||
349 | diff --git a/hw/arm/versatilepb.c b/hw/arm/versatilepb.c | ||
350 | index XXXXXXX..XXXXXXX 100644 | ||
351 | --- a/hw/arm/versatilepb.c | ||
352 | +++ b/hw/arm/versatilepb.c | ||
353 | @@ -XXX,XX +XXX,XX @@ | ||
354 | #include "hw/sd/sd.h" | ||
355 | #include "qom/object.h" | ||
356 | #include "audio/audio.h" | ||
357 | +#include "target/arm/cpu-qom.h" | ||
358 | |||
359 | #define VERSATILE_FLASH_ADDR 0x34000000 | ||
360 | #define VERSATILE_FLASH_SIZE (64 * 1024 * 1024) | ||
361 | diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c | ||
362 | index XXXXXXX..XXXXXXX 100644 | ||
363 | --- a/hw/arm/vexpress.c | ||
364 | +++ b/hw/arm/vexpress.c | ||
365 | @@ -XXX,XX +XXX,XX @@ | ||
366 | #include "qapi/qmp/qlist.h" | ||
367 | #include "qom/object.h" | ||
368 | #include "audio/audio.h" | ||
369 | +#include "target/arm/cpu-qom.h" | ||
370 | |||
371 | #define VEXPRESS_BOARD_ID 0x8e0 | ||
372 | #define VEXPRESS_FLASH_SIZE (64 * 1024 * 1024) | ||
373 | diff --git a/hw/arm/virt.c b/hw/arm/virt.c | ||
374 | index XXXXXXX..XXXXXXX 100644 | ||
375 | --- a/hw/arm/virt.c | ||
376 | +++ b/hw/arm/virt.c | ||
377 | @@ -XXX,XX +XXX,XX @@ | ||
378 | #include "standard-headers/linux/input.h" | ||
379 | #include "hw/arm/smmuv3.h" | ||
380 | #include "hw/acpi/acpi.h" | ||
381 | +#include "target/arm/cpu-qom.h" | ||
382 | #include "target/arm/internals.h" | ||
383 | #include "target/arm/multiprocessing.h" | ||
384 | #include "hw/mem/pc-dimm.h" | ||
385 | diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c | ||
386 | index XXXXXXX..XXXXXXX 100644 | ||
387 | --- a/hw/arm/xilinx_zynq.c | ||
388 | +++ b/hw/arm/xilinx_zynq.c | ||
389 | @@ -XXX,XX +XXX,XX @@ | ||
390 | #include "sysemu/reset.h" | ||
391 | #include "qom/object.h" | ||
392 | #include "exec/tswap.h" | ||
393 | +#include "target/arm/cpu-qom.h" | ||
394 | |||
395 | #define TYPE_ZYNQ_MACHINE MACHINE_TYPE_NAME("xilinx-zynq-a9") | ||
396 | OBJECT_DECLARE_SIMPLE_TYPE(ZynqMachineState, ZYNQ_MACHINE) | ||
397 | diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c | ||
398 | index XXXXXXX..XXXXXXX 100644 | ||
399 | --- a/hw/arm/xlnx-versal.c | ||
400 | +++ b/hw/arm/xlnx-versal.c | ||
401 | @@ -XXX,XX +XXX,XX @@ | ||
402 | #include "hw/misc/unimp.h" | ||
403 | #include "hw/arm/xlnx-versal.h" | ||
404 | #include "qemu/log.h" | ||
405 | +#include "target/arm/cpu-qom.h" | ||
406 | |||
407 | #define XLNX_VERSAL_ACPU_TYPE ARM_CPU_TYPE_NAME("cortex-a72") | ||
408 | #define XLNX_VERSAL_RCPU_TYPE ARM_CPU_TYPE_NAME("cortex-r5f") | ||
409 | diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c | ||
410 | index XXXXXXX..XXXXXXX 100644 | ||
411 | --- a/hw/arm/xlnx-zynqmp.c | ||
412 | +++ b/hw/arm/xlnx-zynqmp.c | ||
413 | @@ -XXX,XX +XXX,XX @@ | ||
414 | #include "sysemu/kvm.h" | ||
415 | #include "sysemu/sysemu.h" | ||
416 | #include "kvm_arm.h" | ||
417 | +#include "target/arm/cpu-qom.h" | ||
418 | |||
419 | #define GIC_NUM_SPI_INTR 160 | ||
420 | 35 | ||
421 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | 36 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
422 | index XXXXXXX..XXXXXXX 100644 | 37 | index XXXXXXX..XXXXXXX 100644 |
423 | --- a/target/arm/cpu.c | 38 | --- a/target/arm/cpu.c |
424 | +++ b/target/arm/cpu.c | 39 | +++ b/target/arm/cpu.c |
425 | @@ -XXX,XX +XXX,XX @@ | 40 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
426 | #include "disas/capstone.h" | 41 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); |
427 | #include "fpu/softfloat.h" | 42 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); |
428 | #include "cpregs.h" | 43 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); |
429 | +#include "target/arm/cpu-qom.h" | 44 | - arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); |
430 | 45 | - set_flush_to_zero(1, &env->vfp.ah_fp_status); | |
431 | static void arm_cpu_set_pc(CPUState *cs, vaddr value) | 46 | - set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); |
432 | { | 47 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); |
48 | + set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]); | ||
49 | + set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_AH]); | ||
50 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]); | ||
51 | |||
52 | #ifndef CONFIG_USER_ONLY | ||
53 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
54 | index XXXXXXX..XXXXXXX 100644 | ||
55 | --- a/target/arm/vfp_helper.c | ||
56 | +++ b/target/arm/vfp_helper.c | ||
57 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
58 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
59 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
60 | /* | ||
61 | - * We do not merge in flags from ah_fp_status or FPST_AH_F16, because | ||
62 | + * We do not merge in flags from FPST_AH or FPST_AH_F16, because | ||
63 | * they are used for insns that must not set the cumulative exception bits. | ||
64 | */ | ||
65 | |||
66 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
67 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
68 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
69 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
70 | - set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
71 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]); | ||
72 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]); | ||
73 | } | ||
74 | |||
75 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
76 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
77 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
78 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
79 | - set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); | ||
80 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | ||
81 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
82 | } | ||
83 | if (changed & FPCR_AH) { | ||
433 | -- | 84 | -- |
434 | 2.34.1 | 85 | 2.34.1 |
435 | 86 | ||
436 | 87 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Rename to arm_build_mp_affinity. This frees up the name for | 3 | Replace with fp_status[FPST_A64_F16]. |
4 | other usage, and emphasizes that the cpu object is not involved. | ||
5 | 4 | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Message-id: 20250129013857.135256-12-richard.henderson@linaro.org |
9 | Message-id: 20240118200643.29037-9-philmd@linaro.org | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
11 | --- | 9 | --- |
12 | target/arm/cpu.h | 2 +- | 10 | target/arm/cpu.h | 1 - |
13 | hw/arm/npcm7xx.c | 2 +- | 11 | target/arm/cpu.c | 2 +- |
14 | hw/arm/sbsa-ref.c | 2 +- | 12 | target/arm/tcg/sme_helper.c | 2 +- |
15 | hw/arm/virt.c | 2 +- | 13 | target/arm/tcg/vec_helper.c | 9 ++++----- |
16 | target/arm/cpu.c | 6 +++--- | 14 | target/arm/vfp_helper.c | 16 ++++++++-------- |
17 | 5 files changed, 7 insertions(+), 7 deletions(-) | 15 | 5 files changed, 14 insertions(+), 16 deletions(-) |
18 | 16 | ||
19 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 17 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
20 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/target/arm/cpu.h | 19 | --- a/target/arm/cpu.h |
22 | +++ b/target/arm/cpu.h | 20 | +++ b/target/arm/cpu.h |
23 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_post_init(Object *obj); | 21 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
24 | (ARM_AFF0_MASK | ARM_AFF1_MASK | ARM_AFF2_MASK | ARM_AFF3_MASK) | 22 | float_status fp_status_a32; |
25 | #define ARM64_AFFINITY_INVALID (~ARM64_AFFINITY_MASK) | 23 | float_status fp_status_a64; |
26 | 24 | float_status fp_status_f16_a32; | |
27 | -uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz); | 25 | - float_status fp_status_f16_a64; |
28 | +uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz); | 26 | }; |
29 | 27 | }; | |
30 | #ifndef CONFIG_USER_ONLY | 28 | |
31 | extern const VMStateDescription vmstate_arm_cpu; | ||
32 | diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/hw/arm/npcm7xx.c | ||
35 | +++ b/hw/arm/npcm7xx.c | ||
36 | @@ -XXX,XX +XXX,XX @@ static void npcm7xx_realize(DeviceState *dev, Error **errp) | ||
37 | /* CPUs */ | ||
38 | for (i = 0; i < nc->num_cpus; i++) { | ||
39 | object_property_set_int(OBJECT(&s->cpu[i]), "mp-affinity", | ||
40 | - arm_cpu_mp_affinity(i, NPCM7XX_MAX_NUM_CPUS), | ||
41 | + arm_build_mp_affinity(i, NPCM7XX_MAX_NUM_CPUS), | ||
42 | &error_abort); | ||
43 | object_property_set_int(OBJECT(&s->cpu[i]), "reset-cbar", | ||
44 | NPCM7XX_GIC_CPU_IF_ADDR, &error_abort); | ||
45 | diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/hw/arm/sbsa-ref.c | ||
48 | +++ b/hw/arm/sbsa-ref.c | ||
49 | @@ -XXX,XX +XXX,XX @@ static const int sbsa_ref_irqmap[] = { | ||
50 | static uint64_t sbsa_ref_cpu_mp_affinity(SBSAMachineState *sms, int idx) | ||
51 | { | ||
52 | uint8_t clustersz = ARM_DEFAULT_CPUS_PER_CLUSTER; | ||
53 | - return arm_cpu_mp_affinity(idx, clustersz); | ||
54 | + return arm_build_mp_affinity(idx, clustersz); | ||
55 | } | ||
56 | |||
57 | static void sbsa_fdt_add_gic_node(SBSAMachineState *sms) | ||
58 | diff --git a/hw/arm/virt.c b/hw/arm/virt.c | ||
59 | index XXXXXXX..XXXXXXX 100644 | ||
60 | --- a/hw/arm/virt.c | ||
61 | +++ b/hw/arm/virt.c | ||
62 | @@ -XXX,XX +XXX,XX @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) | ||
63 | clustersz = GICV3_TARGETLIST_BITS; | ||
64 | } | ||
65 | } | ||
66 | - return arm_cpu_mp_affinity(idx, clustersz); | ||
67 | + return arm_build_mp_affinity(idx, clustersz); | ||
68 | } | ||
69 | |||
70 | static inline bool *virt_get_high_memmap_enabled(VirtMachineState *vms, | ||
71 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | 29 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
72 | index XXXXXXX..XXXXXXX 100644 | 30 | index XXXXXXX..XXXXXXX 100644 |
73 | --- a/target/arm/cpu.c | 31 | --- a/target/arm/cpu.c |
74 | +++ b/target/arm/cpu.c | 32 | +++ b/target/arm/cpu.c |
75 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags) | 33 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
34 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
35 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
36 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
37 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
38 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
39 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
40 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); | ||
41 | set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]); | ||
42 | diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/arm/tcg/sme_helper.c | ||
45 | +++ b/target/arm/tcg/sme_helper.c | ||
46 | @@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, | ||
47 | * produces default NaNs. We also need a second copy of fp_status with | ||
48 | * round-to-odd -- see above. | ||
49 | */ | ||
50 | - fpst_f16 = env->vfp.fp_status_f16_a64; | ||
51 | + fpst_f16 = env->vfp.fp_status[FPST_A64_F16]; | ||
52 | fpst_std = env->vfp.fp_status_a64; | ||
53 | set_default_nan_mode(true, &fpst_std); | ||
54 | set_default_nan_mode(true, &fpst_f16); | ||
55 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/arm/tcg/vec_helper.c | ||
58 | +++ b/target/arm/tcg/vec_helper.c | ||
59 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
60 | } | ||
76 | } | 61 | } |
62 | do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
63 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); | ||
64 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
77 | } | 65 | } |
78 | 66 | ||
79 | -uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz) | 67 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
80 | +uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz) | 68 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
81 | { | 69 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
82 | uint32_t Aff1 = idx / clustersz; | 70 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
83 | uint32_t Aff0 = idx % clustersz; | 71 | float_status *status = &env->vfp.fp_status_a64; |
84 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) | 72 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); |
85 | * so these bits always RAZ. | 73 | + bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); |
86 | */ | 74 | int negx = 0, negf = 0; |
87 | if (cpu->mp_affinity == ARM64_AFFINITY_INVALID) { | 75 | |
88 | - cpu->mp_affinity = arm_cpu_mp_affinity(cs->cpu_index, | 76 | if (is_s) { |
89 | - ARM_DEFAULT_CPUS_PER_CLUSTER); | 77 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
90 | + cpu->mp_affinity = arm_build_mp_affinity(cs->cpu_index, | 78 | } |
91 | + ARM_DEFAULT_CPUS_PER_CLUSTER); | ||
92 | } | 79 | } |
93 | 80 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | |
94 | if (cpu->reset_hivecs) { | 81 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); |
82 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
83 | } | ||
84 | |||
85 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
86 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
87 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
88 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | ||
89 | float_status *status = &env->vfp.fp_status_a64; | ||
90 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); | ||
91 | + bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
92 | int negx = 0, negf = 0; | ||
93 | |||
94 | if (is_s) { | ||
95 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
96 | negx = 0x8000; | ||
97 | } | ||
98 | } | ||
99 | - | ||
100 | for (i = 0; i < oprsz; i += 16) { | ||
101 | float16 mm_16 = *(float16 *)(vm + i + idx); | ||
102 | float32 mm = float16_to_float32_by_bits(mm_16, fz16); | ||
103 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
104 | index XXXXXXX..XXXXXXX 100644 | ||
105 | --- a/target/arm/vfp_helper.c | ||
106 | +++ b/target/arm/vfp_helper.c | ||
107 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
108 | & ~float_flag_input_denormal_flushed); | ||
109 | |||
110 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
111 | - a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
112 | + a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16]) | ||
113 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
114 | /* | ||
115 | * We do not merge in flags from FPST_AH or FPST_AH_F16, because | ||
116 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
117 | set_float_exception_flags(0, &env->vfp.fp_status_a32); | ||
118 | set_float_exception_flags(0, &env->vfp.fp_status_a64); | ||
119 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | ||
120 | - set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
121 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | ||
122 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
123 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
124 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]); | ||
125 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
126 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
127 | set_float_rounding_mode(i, &env->vfp.fp_status_a64); | ||
128 | set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32); | ||
129 | - set_float_rounding_mode(i, &env->vfp.fp_status_f16_a64); | ||
130 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
131 | } | ||
132 | if (changed & FPCR_FZ16) { | ||
133 | bool ftz_enabled = val & FPCR_FZ16; | ||
134 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
135 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
136 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
137 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
138 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
139 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
140 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
141 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
142 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
143 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
144 | } | ||
145 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
146 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
147 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
148 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
149 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
150 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
151 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | ||
152 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
153 | } | ||
154 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
155 | if (ah_enabled) { | ||
156 | /* Change behaviours for A64 FP operations */ | ||
157 | arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); | ||
158 | - arm_set_ah_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
159 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
160 | } else { | ||
161 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
162 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
163 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
164 | } | ||
165 | } | ||
166 | /* | ||
95 | -- | 167 | -- |
96 | 2.34.1 | 168 | 2.34.1 |
97 | 169 | ||
98 | 170 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Declare arm_cpu_mp_affinity() prototype in the new | 3 | Replace with fp_status[FPST_A32_F16]. |
4 | "target/arm/multiprocessing.h" header so units in | ||
5 | hw/arm/ can use it without having to include the huge | ||
6 | target-specific "cpu.h". | ||
7 | 4 | ||
8 | File list to include the new header generated using: | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
9 | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | |
10 | $ git grep -lw arm_cpu_mp_affinity | 7 | Message-id: 20250129013857.135256-13-richard.henderson@linaro.org |
11 | |||
12 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | Message-id: 20240118200643.29037-11-philmd@linaro.org | ||
15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
16 | --- | 9 | --- |
17 | target/arm/cpu.h | 6 +----- | 10 | target/arm/cpu.h | 1 - |
18 | target/arm/multiprocessing.h | 16 ++++++++++++++++ | 11 | target/arm/cpu.c | 2 +- |
19 | hw/arm/virt-acpi-build.c | 1 + | 12 | target/arm/tcg/vec_helper.c | 4 ++-- |
20 | hw/arm/virt.c | 1 + | 13 | target/arm/vfp_helper.c | 14 +++++++------- |
21 | hw/arm/xlnx-versal-virt.c | 1 + | 14 | 4 files changed, 10 insertions(+), 11 deletions(-) |
22 | hw/misc/xlnx-versal-crl.c | 1 + | ||
23 | target/arm/arm-powerctl.c | 1 + | ||
24 | target/arm/cpu.c | 5 +++++ | ||
25 | target/arm/hvf/hvf.c | 1 + | ||
26 | target/arm/tcg/psci.c | 1 + | ||
27 | 10 files changed, 29 insertions(+), 5 deletions(-) | ||
28 | create mode 100644 target/arm/multiprocessing.h | ||
29 | 15 | ||
30 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 16 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
31 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
32 | --- a/target/arm/cpu.h | 18 | --- a/target/arm/cpu.h |
33 | +++ b/target/arm/cpu.h | 19 | +++ b/target/arm/cpu.h |
34 | @@ -XXX,XX +XXX,XX @@ | 20 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
35 | #include "cpu-qom.h" | 21 | struct { |
36 | #include "exec/cpu-defs.h" | 22 | float_status fp_status_a32; |
37 | #include "qapi/qapi-types-common.h" | 23 | float_status fp_status_a64; |
38 | +#include "target/arm/multiprocessing.h" | 24 | - float_status fp_status_f16_a32; |
39 | 25 | }; | |
40 | /* ARM processors have a weak memory model */ | 26 | }; |
41 | #define TCG_GUEST_DEFAULT_MO (0) | 27 | |
42 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_post_init(Object *obj); | ||
43 | |||
44 | uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz); | ||
45 | |||
46 | -static inline uint64_t arm_cpu_mp_affinity(ARMCPU *cpu) | ||
47 | -{ | ||
48 | - return cpu->mp_affinity; | ||
49 | -} | ||
50 | - | ||
51 | #ifndef CONFIG_USER_ONLY | ||
52 | extern const VMStateDescription vmstate_arm_cpu; | ||
53 | |||
54 | diff --git a/target/arm/multiprocessing.h b/target/arm/multiprocessing.h | ||
55 | new file mode 100644 | ||
56 | index XXXXXXX..XXXXXXX | ||
57 | --- /dev/null | ||
58 | +++ b/target/arm/multiprocessing.h | ||
59 | @@ -XXX,XX +XXX,XX @@ | ||
60 | +/* | ||
61 | + * ARM multiprocessor CPU helpers | ||
62 | + * | ||
63 | + * Copyright (c) 2003 Fabrice Bellard | ||
64 | + * | ||
65 | + * SPDX-License-Identifier: LGPL-2.1-or-later | ||
66 | + */ | ||
67 | + | ||
68 | +#ifndef TARGET_ARM_MULTIPROCESSING_H | ||
69 | +#define TARGET_ARM_MULTIPROCESSING_H | ||
70 | + | ||
71 | +#include "target/arm/cpu-qom.h" | ||
72 | + | ||
73 | +uint64_t arm_cpu_mp_affinity(ARMCPU *cpu); | ||
74 | + | ||
75 | +#endif | ||
76 | diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c | ||
77 | index XXXXXXX..XXXXXXX 100644 | ||
78 | --- a/hw/arm/virt-acpi-build.c | ||
79 | +++ b/hw/arm/virt-acpi-build.c | ||
80 | @@ -XXX,XX +XXX,XX @@ | ||
81 | #include "hw/acpi/ghes.h" | ||
82 | #include "hw/acpi/viot.h" | ||
83 | #include "hw/virtio/virtio-acpi.h" | ||
84 | +#include "target/arm/multiprocessing.h" | ||
85 | |||
86 | #define ARM_SPI_BASE 32 | ||
87 | |||
88 | diff --git a/hw/arm/virt.c b/hw/arm/virt.c | ||
89 | index XXXXXXX..XXXXXXX 100644 | ||
90 | --- a/hw/arm/virt.c | ||
91 | +++ b/hw/arm/virt.c | ||
92 | @@ -XXX,XX +XXX,XX @@ | ||
93 | #include "hw/arm/smmuv3.h" | ||
94 | #include "hw/acpi/acpi.h" | ||
95 | #include "target/arm/internals.h" | ||
96 | +#include "target/arm/multiprocessing.h" | ||
97 | #include "hw/mem/pc-dimm.h" | ||
98 | #include "hw/mem/nvdimm.h" | ||
99 | #include "hw/acpi/generic_event_device.h" | ||
100 | diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c | ||
101 | index XXXXXXX..XXXXXXX 100644 | ||
102 | --- a/hw/arm/xlnx-versal-virt.c | ||
103 | +++ b/hw/arm/xlnx-versal-virt.c | ||
104 | @@ -XXX,XX +XXX,XX @@ | ||
105 | #include "hw/qdev-properties.h" | ||
106 | #include "hw/arm/xlnx-versal.h" | ||
107 | #include "hw/arm/boot.h" | ||
108 | +#include "target/arm/multiprocessing.h" | ||
109 | #include "qom/object.h" | ||
110 | |||
111 | #define TYPE_XLNX_VERSAL_VIRT_MACHINE MACHINE_TYPE_NAME("xlnx-versal-virt") | ||
112 | diff --git a/hw/misc/xlnx-versal-crl.c b/hw/misc/xlnx-versal-crl.c | ||
113 | index XXXXXXX..XXXXXXX 100644 | ||
114 | --- a/hw/misc/xlnx-versal-crl.c | ||
115 | +++ b/hw/misc/xlnx-versal-crl.c | ||
116 | @@ -XXX,XX +XXX,XX @@ | ||
117 | #include "hw/resettable.h" | ||
118 | |||
119 | #include "target/arm/arm-powerctl.h" | ||
120 | +#include "target/arm/multiprocessing.h" | ||
121 | #include "hw/misc/xlnx-versal-crl.h" | ||
122 | |||
123 | #ifndef XLNX_VERSAL_CRL_ERR_DEBUG | ||
124 | diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c | ||
125 | index XXXXXXX..XXXXXXX 100644 | ||
126 | --- a/target/arm/arm-powerctl.c | ||
127 | +++ b/target/arm/arm-powerctl.c | ||
128 | @@ -XXX,XX +XXX,XX @@ | ||
129 | #include "qemu/log.h" | ||
130 | #include "qemu/main-loop.h" | ||
131 | #include "sysemu/tcg.h" | ||
132 | +#include "target/arm/multiprocessing.h" | ||
133 | |||
134 | #ifndef DEBUG_ARM_POWERCTL | ||
135 | #define DEBUG_ARM_POWERCTL 0 | ||
136 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | 28 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
137 | index XXXXXXX..XXXXXXX 100644 | 29 | index XXXXXXX..XXXXXXX 100644 |
138 | --- a/target/arm/cpu.c | 30 | --- a/target/arm/cpu.c |
139 | +++ b/target/arm/cpu.c | 31 | +++ b/target/arm/cpu.c |
140 | @@ -XXX,XX +XXX,XX @@ uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz) | 32 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
141 | return (Aff1 << ARM_AFF1_SHIFT) | Aff0; | 33 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); |
34 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
35 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
36 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
37 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); | ||
38 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
39 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
40 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); | ||
41 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/arm/tcg/vec_helper.c | ||
44 | +++ b/target/arm/tcg/vec_helper.c | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
46 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
47 | |||
48 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
49 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
50 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); | ||
142 | } | 51 | } |
143 | 52 | ||
144 | +uint64_t arm_cpu_mp_affinity(ARMCPU *cpu) | 53 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
145 | +{ | 54 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, |
146 | + return cpu->mp_affinity; | 55 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; |
147 | +} | 56 | |
148 | + | 57 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, |
149 | static void arm_cpu_initfn(Object *obj) | 58 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); |
150 | { | 59 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); |
151 | ARMCPU *cpu = ARM_CPU(obj); | 60 | } |
152 | diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c | 61 | |
62 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
63 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
153 | index XXXXXXX..XXXXXXX 100644 | 64 | index XXXXXXX..XXXXXXX 100644 |
154 | --- a/target/arm/hvf/hvf.c | 65 | --- a/target/arm/vfp_helper.c |
155 | +++ b/target/arm/hvf/hvf.c | 66 | +++ b/target/arm/vfp_helper.c |
156 | @@ -XXX,XX +XXX,XX @@ | 67 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) |
157 | #include "arm-powerctl.h" | 68 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); |
158 | #include "target/arm/cpu.h" | 69 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); |
159 | #include "target/arm/internals.h" | 70 | /* FZ16 does not generate an input denormal exception. */ |
160 | +#include "target/arm/multiprocessing.h" | 71 | - a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) |
161 | #include "trace/trace-target_arm_hvf.h" | 72 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16]) |
162 | #include "migration/vmstate.h" | 73 | & ~float_flag_input_denormal_flushed); |
163 | 74 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) | |
164 | diff --git a/target/arm/tcg/psci.c b/target/arm/tcg/psci.c | 75 | & ~float_flag_input_denormal_flushed); |
165 | index XXXXXXX..XXXXXXX 100644 | 76 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) |
166 | --- a/target/arm/tcg/psci.c | 77 | */ |
167 | +++ b/target/arm/tcg/psci.c | 78 | set_float_exception_flags(0, &env->vfp.fp_status_a32); |
168 | @@ -XXX,XX +XXX,XX @@ | 79 | set_float_exception_flags(0, &env->vfp.fp_status_a64); |
169 | #include "sysemu/runstate.h" | 80 | - set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); |
170 | #include "internals.h" | 81 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); |
171 | #include "arm-powerctl.h" | 82 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); |
172 | +#include "target/arm/multiprocessing.h" | 83 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); |
173 | 84 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | |
174 | bool arm_is_psci_call(ARMCPU *cpu, int excp_type) | 85 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
175 | { | 86 | } |
87 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
88 | set_float_rounding_mode(i, &env->vfp.fp_status_a64); | ||
89 | - set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32); | ||
90 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); | ||
91 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
92 | } | ||
93 | if (changed & FPCR_FZ16) { | ||
94 | bool ftz_enabled = val & FPCR_FZ16; | ||
95 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
96 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
97 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
98 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
99 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
100 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
101 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
102 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
103 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
104 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
105 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
106 | bool dnan_enabled = val & FPCR_DN; | ||
107 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
108 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
109 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
110 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
111 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
112 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | ||
113 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
114 | @@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ | ||
115 | softfloat_to_vfp_compare(env, \ | ||
116 | FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ | ||
117 | } | ||
118 | -DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16_a32) | ||
119 | +DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) | ||
120 | DO_VFP_cmp(s, float32, float32, fp_status_a32) | ||
121 | DO_VFP_cmp(d, float64, float64, fp_status_a32) | ||
122 | #undef DO_VFP_cmp | ||
176 | -- | 123 | -- |
177 | 2.34.1 | 124 | 2.34.1 |
178 | 125 | ||
179 | 126 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Wrapper to return the mp affinity bits from the cpu. | 3 | Replace with fp_status[FPST_A64]. |
4 | 4 | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Message-id: 20250129013857.135256-14-richard.henderson@linaro.org |
8 | Message-id: 20240118200643.29037-10-philmd@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | --- | 9 | --- |
11 | target/arm/cpu.h | 5 +++++ | 10 | target/arm/cpu.h | 1 - |
12 | hw/arm/virt-acpi-build.c | 2 +- | 11 | target/arm/cpu.c | 2 +- |
13 | hw/arm/virt.c | 6 +++--- | 12 | target/arm/tcg/sme_helper.c | 2 +- |
14 | hw/arm/xlnx-versal-virt.c | 3 ++- | 13 | target/arm/tcg/vec_helper.c | 10 +++++----- |
15 | hw/misc/xlnx-versal-crl.c | 4 ++-- | 14 | target/arm/vfp_helper.c | 16 ++++++++-------- |
16 | target/arm/arm-powerctl.c | 2 +- | 15 | 5 files changed, 15 insertions(+), 16 deletions(-) |
17 | target/arm/hvf/hvf.c | 4 ++-- | ||
18 | target/arm/tcg/psci.c | 2 +- | ||
19 | 8 files changed, 17 insertions(+), 11 deletions(-) | ||
20 | 16 | ||
21 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 17 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
22 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
23 | --- a/target/arm/cpu.h | 19 | --- a/target/arm/cpu.h |
24 | +++ b/target/arm/cpu.h | 20 | +++ b/target/arm/cpu.h |
25 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_post_init(Object *obj); | 21 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
26 | 22 | float_status fp_status[FPST_COUNT]; | |
27 | uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz); | 23 | struct { |
28 | 24 | float_status fp_status_a32; | |
29 | +static inline uint64_t arm_cpu_mp_affinity(ARMCPU *cpu) | 25 | - float_status fp_status_a64; |
30 | +{ | 26 | }; |
31 | + return cpu->mp_affinity; | 27 | }; |
32 | +} | 28 | |
33 | + | 29 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
34 | #ifndef CONFIG_USER_ONLY | ||
35 | extern const VMStateDescription vmstate_arm_cpu; | ||
36 | |||
37 | diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | 30 | index XXXXXXX..XXXXXXX 100644 |
39 | --- a/hw/arm/virt-acpi-build.c | 31 | --- a/target/arm/cpu.c |
40 | +++ b/hw/arm/virt-acpi-build.c | 32 | +++ b/target/arm/cpu.c |
41 | @@ -XXX,XX +XXX,XX @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) | 33 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
42 | build_append_int_noprefix(table_data, vgic_interrupt, 4); | 34 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); |
43 | build_append_int_noprefix(table_data, 0, 8); /* GICR Base Address*/ | 35 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); |
44 | /* MPIDR */ | 36 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); |
45 | - build_append_int_noprefix(table_data, armcpu->mp_affinity, 8); | 37 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); |
46 | + build_append_int_noprefix(table_data, arm_cpu_mp_affinity(armcpu), 8); | 38 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); |
47 | /* Processor Power Efficiency Class */ | 39 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); |
48 | build_append_int_noprefix(table_data, 0, 1); | 40 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); |
49 | /* Reserved */ | 41 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); |
50 | diff --git a/hw/arm/virt.c b/hw/arm/virt.c | 42 | diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c |
51 | index XXXXXXX..XXXXXXX 100644 | 43 | index XXXXXXX..XXXXXXX 100644 |
52 | --- a/hw/arm/virt.c | 44 | --- a/target/arm/tcg/sme_helper.c |
53 | +++ b/hw/arm/virt.c | 45 | +++ b/target/arm/tcg/sme_helper.c |
54 | @@ -XXX,XX +XXX,XX @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) | 46 | @@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, |
55 | for (cpu = 0; cpu < smp_cpus; cpu++) { | 47 | * round-to-odd -- see above. |
56 | ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu)); | 48 | */ |
57 | 49 | fpst_f16 = env->vfp.fp_status[FPST_A64_F16]; | |
58 | - if (armcpu->mp_affinity & ARM_AFF3_MASK) { | 50 | - fpst_std = env->vfp.fp_status_a64; |
59 | + if (arm_cpu_mp_affinity(armcpu) & ARM_AFF3_MASK) { | 51 | + fpst_std = env->vfp.fp_status[FPST_A64]; |
60 | addr_cells = 2; | 52 | set_default_nan_mode(true, &fpst_std); |
53 | set_default_nan_mode(true, &fpst_f16); | ||
54 | fpst_odd = fpst_std; | ||
55 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/arm/tcg/vec_helper.c | ||
58 | +++ b/target/arm/tcg/vec_helper.c | ||
59 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
60 | negx = 0x8000800080008000ull; | ||
61 | } | ||
62 | } | ||
63 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
64 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
65 | get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
66 | } | ||
67 | |||
68 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
69 | intptr_t i, oprsz = simd_oprsz(desc); | ||
70 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
71 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
72 | - float_status *status = &env->vfp.fp_status_a64; | ||
73 | + float_status *status = &env->vfp.fp_status[FPST_A64]; | ||
74 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
75 | int negx = 0, negf = 0; | ||
76 | |||
77 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
78 | negx = 0x8000800080008000ull; | ||
79 | } | ||
80 | } | ||
81 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
82 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
83 | get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
84 | } | ||
85 | |||
86 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
87 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
88 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
89 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | ||
90 | - float_status *status = &env->vfp.fp_status_a64; | ||
91 | + float_status *status = &env->vfp.fp_status[FPST_A64]; | ||
92 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
93 | int negx = 0, negf = 0; | ||
94 | |||
95 | @@ -XXX,XX +XXX,XX @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp) | ||
96 | */ | ||
97 | bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF; | ||
98 | |||
99 | - *statusp = is_a64(env) ? env->vfp.fp_status_a64 : env->vfp.fp_status_a32; | ||
100 | + *statusp = is_a64(env) ? env->vfp.fp_status[FPST_A64] : env->vfp.fp_status_a32; | ||
101 | set_default_nan_mode(true, statusp); | ||
102 | |||
103 | if (ebf) { | ||
104 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
105 | index XXXXXXX..XXXXXXX 100644 | ||
106 | --- a/target/arm/vfp_helper.c | ||
107 | +++ b/target/arm/vfp_helper.c | ||
108 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
109 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) | ||
110 | & ~float_flag_input_denormal_flushed); | ||
111 | |||
112 | - a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
113 | + a64_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A64]); | ||
114 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16]) | ||
115 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
116 | /* | ||
117 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
118 | * be the architecturally up-to-date exception flag information first. | ||
119 | */ | ||
120 | set_float_exception_flags(0, &env->vfp.fp_status_a32); | ||
121 | - set_float_exception_flags(0, &env->vfp.fp_status_a64); | ||
122 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]); | ||
123 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); | ||
124 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | ||
125 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
126 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
61 | break; | 127 | break; |
62 | } | 128 | } |
63 | @@ -XXX,XX +XXX,XX @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) | 129 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); |
64 | 130 | - set_float_rounding_mode(i, &env->vfp.fp_status_a64); | |
65 | if (addr_cells == 2) { | 131 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]); |
66 | qemu_fdt_setprop_u64(ms->fdt, nodename, "reg", | 132 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); |
67 | - armcpu->mp_affinity); | 133 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); |
68 | + arm_cpu_mp_affinity(armcpu)); | 134 | } |
135 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
136 | if (changed & FPCR_FZ) { | ||
137 | bool ftz_enabled = val & FPCR_FZ; | ||
138 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
139 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64); | ||
140 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]); | ||
141 | /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ | ||
142 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
143 | } | ||
144 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
145 | */ | ||
146 | bool fitz_enabled = (val & FPCR_FIZ) || | ||
147 | (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ; | ||
148 | - set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status_a64); | ||
149 | + set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_A64]); | ||
150 | } | ||
151 | if (changed & FPCR_DN) { | ||
152 | bool dnan_enabled = val & FPCR_DN; | ||
153 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
154 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
155 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]); | ||
156 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
157 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
158 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | ||
159 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
160 | |||
161 | if (ah_enabled) { | ||
162 | /* Change behaviours for A64 FP operations */ | ||
163 | - arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); | ||
164 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | ||
165 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
69 | } else { | 166 | } else { |
70 | qemu_fdt_setprop_cell(ms->fdt, nodename, "reg", | 167 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); |
71 | - armcpu->mp_affinity); | 168 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); |
72 | + arm_cpu_mp_affinity(armcpu)); | 169 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); |
73 | } | ||
74 | |||
75 | if (ms->possible_cpus->cpus[cs->cpu_index].props.has_node_id) { | ||
76 | diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c | ||
77 | index XXXXXXX..XXXXXXX 100644 | ||
78 | --- a/hw/arm/xlnx-versal-virt.c | ||
79 | +++ b/hw/arm/xlnx-versal-virt.c | ||
80 | @@ -XXX,XX +XXX,XX @@ static void fdt_add_cpu_nodes(VersalVirt *s, uint32_t psci_conduit) | ||
81 | ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(i)); | ||
82 | |||
83 | qemu_fdt_add_subnode(s->fdt, name); | ||
84 | - qemu_fdt_setprop_cell(s->fdt, name, "reg", armcpu->mp_affinity); | ||
85 | + qemu_fdt_setprop_cell(s->fdt, name, "reg", | ||
86 | + arm_cpu_mp_affinity(armcpu)); | ||
87 | if (psci_conduit != QEMU_PSCI_CONDUIT_DISABLED) { | ||
88 | qemu_fdt_setprop_string(s->fdt, name, "enable-method", "psci"); | ||
89 | } | ||
90 | diff --git a/hw/misc/xlnx-versal-crl.c b/hw/misc/xlnx-versal-crl.c | ||
91 | index XXXXXXX..XXXXXXX 100644 | ||
92 | --- a/hw/misc/xlnx-versal-crl.c | ||
93 | +++ b/hw/misc/xlnx-versal-crl.c | ||
94 | @@ -XXX,XX +XXX,XX @@ static void crl_reset_cpu(XlnxVersalCRL *s, ARMCPU *armcpu, | ||
95 | bool rst_old, bool rst_new) | ||
96 | { | ||
97 | if (rst_new) { | ||
98 | - arm_set_cpu_off(armcpu->mp_affinity); | ||
99 | + arm_set_cpu_off(arm_cpu_mp_affinity(armcpu)); | ||
100 | } else { | ||
101 | - arm_set_cpu_on_and_reset(armcpu->mp_affinity); | ||
102 | + arm_set_cpu_on_and_reset(arm_cpu_mp_affinity(armcpu)); | ||
103 | } | ||
104 | } | ||
105 | |||
106 | diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c | ||
107 | index XXXXXXX..XXXXXXX 100644 | ||
108 | --- a/target/arm/arm-powerctl.c | ||
109 | +++ b/target/arm/arm-powerctl.c | ||
110 | @@ -XXX,XX +XXX,XX @@ CPUState *arm_get_cpu_by_id(uint64_t id) | ||
111 | CPU_FOREACH(cpu) { | ||
112 | ARMCPU *armcpu = ARM_CPU(cpu); | ||
113 | |||
114 | - if (armcpu->mp_affinity == id) { | ||
115 | + if (arm_cpu_mp_affinity(armcpu) == id) { | ||
116 | return cpu; | ||
117 | } | 170 | } |
118 | } | 171 | } |
119 | diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c | ||
120 | index XXXXXXX..XXXXXXX 100644 | ||
121 | --- a/target/arm/hvf/hvf.c | ||
122 | +++ b/target/arm/hvf/hvf.c | ||
123 | @@ -XXX,XX +XXX,XX @@ static void hvf_raise_exception(CPUState *cpu, uint32_t excp, | ||
124 | |||
125 | static void hvf_psci_cpu_off(ARMCPU *arm_cpu) | ||
126 | { | ||
127 | - int32_t ret = arm_set_cpu_off(arm_cpu->mp_affinity); | ||
128 | + int32_t ret = arm_set_cpu_off(arm_cpu_mp_affinity(arm_cpu)); | ||
129 | assert(ret == QEMU_ARM_POWERCTL_RET_SUCCESS); | ||
130 | } | ||
131 | |||
132 | @@ -XXX,XX +XXX,XX @@ static bool hvf_handle_psci_call(CPUState *cpu) | ||
133 | int32_t ret = 0; | ||
134 | |||
135 | trace_hvf_psci_call(param[0], param[1], param[2], param[3], | ||
136 | - arm_cpu->mp_affinity); | ||
137 | + arm_cpu_mp_affinity(arm_cpu)); | ||
138 | |||
139 | switch (param[0]) { | ||
140 | case QEMU_PSCI_0_2_FN_PSCI_VERSION: | ||
141 | diff --git a/target/arm/tcg/psci.c b/target/arm/tcg/psci.c | ||
142 | index XXXXXXX..XXXXXXX 100644 | ||
143 | --- a/target/arm/tcg/psci.c | ||
144 | +++ b/target/arm/tcg/psci.c | ||
145 | @@ -XXX,XX +XXX,XX @@ err: | ||
146 | return; | ||
147 | |||
148 | cpu_off: | ||
149 | - ret = arm_set_cpu_off(cpu->mp_affinity); | ||
150 | + ret = arm_set_cpu_off(arm_cpu_mp_affinity(cpu)); | ||
151 | /* notreached */ | ||
152 | /* sanity check in case something failed */ | ||
153 | assert(ret == QEMU_ARM_POWERCTL_RET_SUCCESS); | ||
154 | -- | 172 | -- |
155 | 2.34.1 | 173 | 2.34.1 |
156 | 174 | ||
157 | 175 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Missed in commit 2d56be5a29 ("target: Declare | 3 | Replace with fp_status[FPST_A32]. As this was the last of the |
4 | FOO_CPU_TYPE_NAME/SUFFIX in 'cpu-qom.h'"). See | 4 | old structures, we can remove the anonymous union and struct. |
5 | it for more details. | ||
6 | 5 | ||
7 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
9 | Message-id: 20240118200643.29037-12-philmd@linaro.org | 8 | Message-id: 20250129013857.135256-15-richard.henderson@linaro.org |
9 | [PMM: tweak to account for change to is_ebf()] | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
11 | --- | 11 | --- |
12 | target/arm/cpu-qom.h | 3 +++ | 12 | target/arm/cpu.h | 7 +------ |
13 | target/arm/cpu.h | 2 -- | 13 | target/arm/cpu.c | 2 +- |
14 | 2 files changed, 3 insertions(+), 2 deletions(-) | 14 | target/arm/tcg/vec_helper.c | 2 +- |
15 | target/arm/vfp_helper.c | 18 +++++++++--------- | ||
16 | 4 files changed, 12 insertions(+), 17 deletions(-) | ||
15 | 17 | ||
16 | diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/arm/cpu-qom.h | ||
19 | +++ b/target/arm/cpu-qom.h | ||
20 | @@ -XXX,XX +XXX,XX @@ typedef struct AArch64CPUClass AArch64CPUClass; | ||
21 | DECLARE_CLASS_CHECKERS(AArch64CPUClass, AARCH64_CPU, | ||
22 | TYPE_AARCH64_CPU) | ||
23 | |||
24 | +#define ARM_CPU_TYPE_SUFFIX "-" TYPE_ARM_CPU | ||
25 | +#define ARM_CPU_TYPE_NAME(name) (name ARM_CPU_TYPE_SUFFIX) | ||
26 | + | ||
27 | #endif | ||
28 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 18 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
29 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
30 | --- a/target/arm/cpu.h | 20 | --- a/target/arm/cpu.h |
31 | +++ b/target/arm/cpu.h | 21 | +++ b/target/arm/cpu.h |
32 | @@ -XXX,XX +XXX,XX @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); | 22 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
33 | #define ARM_CPUID_TI915T 0x54029152 | 23 | uint32_t scratch[8]; |
34 | #define ARM_CPUID_TI925T 0x54029252 | 24 | |
35 | 25 | /* There are a number of distinct float control structures. */ | |
36 | -#define ARM_CPU_TYPE_SUFFIX "-" TYPE_ARM_CPU | 26 | - union { |
37 | -#define ARM_CPU_TYPE_NAME(name) (name ARM_CPU_TYPE_SUFFIX) | 27 | - float_status fp_status[FPST_COUNT]; |
38 | #define CPU_RESOLVING_TYPE TYPE_ARM_CPU | 28 | - struct { |
39 | 29 | - float_status fp_status_a32; | |
40 | #define TYPE_ARM_HOST_CPU "host-" TYPE_ARM_CPU | 30 | - }; |
31 | - }; | ||
32 | + float_status fp_status[FPST_COUNT]; | ||
33 | |||
34 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ | ||
35 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ | ||
36 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/arm/cpu.c | ||
39 | +++ b/target/arm/cpu.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
41 | set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]); | ||
42 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); | ||
43 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); | ||
44 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | ||
45 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32]); | ||
46 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | ||
47 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
48 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); | ||
49 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/target/arm/tcg/vec_helper.c | ||
52 | +++ b/target/arm/tcg/vec_helper.c | ||
53 | @@ -XXX,XX +XXX,XX @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp) | ||
54 | */ | ||
55 | bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF; | ||
56 | |||
57 | - *statusp = is_a64(env) ? env->vfp.fp_status[FPST_A64] : env->vfp.fp_status_a32; | ||
58 | + *statusp = env->vfp.fp_status[is_a64(env) ? FPST_A64 : FPST_A32]; | ||
59 | set_default_nan_mode(true, statusp); | ||
60 | |||
61 | if (ebf) { | ||
62 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/arm/vfp_helper.c | ||
65 | +++ b/target/arm/vfp_helper.c | ||
66 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
67 | { | ||
68 | uint32_t a32_flags = 0, a64_flags = 0; | ||
69 | |||
70 | - a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
71 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A32]); | ||
72 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); | ||
73 | /* FZ16 does not generate an input denormal exception. */ | ||
74 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16]) | ||
75 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
76 | * values. The caller should have arranged for env->vfp.fpsr to | ||
77 | * be the architecturally up-to-date exception flag information first. | ||
78 | */ | ||
79 | - set_float_exception_flags(0, &env->vfp.fp_status_a32); | ||
80 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32]); | ||
81 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]); | ||
82 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); | ||
83 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | ||
84 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
85 | i = float_round_to_zero; | ||
86 | break; | ||
87 | } | ||
88 | - set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
89 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32]); | ||
90 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]); | ||
91 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); | ||
92 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
93 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
94 | } | ||
95 | if (changed & FPCR_FZ) { | ||
96 | bool ftz_enabled = val & FPCR_FZ; | ||
97 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
98 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); | ||
99 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]); | ||
100 | /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ | ||
101 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
102 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); | ||
103 | } | ||
104 | if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { | ||
105 | /* | ||
106 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
107 | } | ||
108 | if (changed & FPCR_DN) { | ||
109 | bool dnan_enabled = val & FPCR_DN; | ||
110 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
111 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32]); | ||
112 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]); | ||
113 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
114 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
115 | @@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ | ||
116 | FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ | ||
117 | } | ||
118 | DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) | ||
119 | -DO_VFP_cmp(s, float32, float32, fp_status_a32) | ||
120 | -DO_VFP_cmp(d, float64, float64, fp_status_a32) | ||
121 | +DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32]) | ||
122 | +DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32]) | ||
123 | #undef DO_VFP_cmp | ||
124 | |||
125 | /* Integer to float and float to integer conversions */ | ||
126 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(fjcvtzs)(float64 value, float_status *status) | ||
127 | |||
128 | uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env) | ||
129 | { | ||
130 | - uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status_a32); | ||
131 | + uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status[FPST_A32]); | ||
132 | uint32_t result = pair; | ||
133 | uint32_t z = (pair >> 32) == 0; | ||
134 | |||
41 | -- | 135 | -- |
42 | 2.34.1 | 136 | 2.34.1 |
43 | 137 | ||
44 | 138 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | hw/arm/xilinx_zynq.c calls tswap32() which is declared | 3 | Select on index instead of pointer. |
4 | in "exec/tswap.h". Include it in order to avoid when | 4 | No functional change. |
5 | refactoring unrelated headers: | ||
6 | 5 | ||
7 | hw/arm/xilinx_zynq.c:103:31: error: call to undeclared function 'tswap32'; | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] | 7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
9 | board_setup_blob[n] = tswap32(board_setup_blob[n]); | 8 | Message-id: 20250129013857.135256-16-richard.henderson@linaro.org |
10 | ^ | ||
11 | |||
12 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | Message-id: 20240118200643.29037-3-philmd@linaro.org | ||
15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
16 | --- | 10 | --- |
17 | hw/arm/xilinx_zynq.c | 1 + | 11 | target/arm/tcg/mve_helper.c | 40 +++++++++++++------------------------ |
18 | 1 file changed, 1 insertion(+) | 12 | 1 file changed, 14 insertions(+), 26 deletions(-) |
19 | 13 | ||
20 | diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c | 14 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c |
21 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/hw/arm/xilinx_zynq.c | 16 | --- a/target/arm/tcg/mve_helper.c |
23 | +++ b/hw/arm/xilinx_zynq.c | 17 | +++ b/target/arm/tcg/mve_helper.c |
24 | @@ -XXX,XX +XXX,XX @@ | 18 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) |
25 | #include "hw/qdev-clock.h" | 19 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ |
26 | #include "sysemu/reset.h" | 20 | continue; \ |
27 | #include "qom/object.h" | 21 | } \ |
28 | +#include "exec/tswap.h" | 22 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
29 | 23 | - &env->vfp.fp_status[FPST_STD]; \ | |
30 | #define TYPE_ZYNQ_MACHINE MACHINE_TYPE_NAME("xilinx-zynq-a9") | 24 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
31 | OBJECT_DECLARE_SIMPLE_TYPE(ZynqMachineState, ZYNQ_MACHINE) | 25 | if (!(mask & 1)) { \ |
26 | /* We need the result but without updating flags */ \ | ||
27 | scratch_fpst = *fpst; \ | ||
28 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) | ||
29 | r[e] = 0; \ | ||
30 | continue; \ | ||
31 | } \ | ||
32 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
33 | - &env->vfp.fp_status[FPST_STD]; \ | ||
34 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
35 | if (!(tm & 1)) { \ | ||
36 | /* We need the result but without updating flags */ \ | ||
37 | scratch_fpst = *fpst; \ | ||
38 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) | ||
39 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
40 | continue; \ | ||
41 | } \ | ||
42 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
43 | - &env->vfp.fp_status[FPST_STD]; \ | ||
44 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
45 | if (!(mask & 1)) { \ | ||
46 | /* We need the result but without updating flags */ \ | ||
47 | scratch_fpst = *fpst; \ | ||
48 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) | ||
49 | if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \ | ||
50 | continue; \ | ||
51 | } \ | ||
52 | - fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
53 | - &env->vfp.fp_status[FPST_STD]; \ | ||
54 | + fpst0 = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
55 | fpst1 = fpst0; \ | ||
56 | if (!(mask & 1)) { \ | ||
57 | scratch_fpst = *fpst0; \ | ||
58 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) | ||
59 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
60 | continue; \ | ||
61 | } \ | ||
62 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
63 | - &env->vfp.fp_status[FPST_STD]; \ | ||
64 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
65 | if (!(mask & 1)) { \ | ||
66 | /* We need the result but without updating flags */ \ | ||
67 | scratch_fpst = *fpst; \ | ||
68 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) | ||
69 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
70 | continue; \ | ||
71 | } \ | ||
72 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
73 | - &env->vfp.fp_status[FPST_STD]; \ | ||
74 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
75 | if (!(mask & 1)) { \ | ||
76 | /* We need the result but without updating flags */ \ | ||
77 | scratch_fpst = *fpst; \ | ||
78 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) | ||
79 | unsigned e; \ | ||
80 | TYPE *m = vm; \ | ||
81 | TYPE ra = (TYPE)ra_in; \ | ||
82 | - float_status *fpst = (ESIZE == 2) ? \ | ||
83 | - &env->vfp.fp_status[FPST_STD_F16] : \ | ||
84 | - &env->vfp.fp_status[FPST_STD]; \ | ||
85 | + float_status *fpst = \ | ||
86 | + &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
87 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
88 | if (mask & 1) { \ | ||
89 | TYPE v = m[H##ESIZE(e)]; \ | ||
90 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
91 | if ((mask & emask) == 0) { \ | ||
92 | continue; \ | ||
93 | } \ | ||
94 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
95 | - &env->vfp.fp_status[FPST_STD]; \ | ||
96 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
97 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
98 | /* We need the result but without updating flags */ \ | ||
99 | scratch_fpst = *fpst; \ | ||
100 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
101 | if ((mask & emask) == 0) { \ | ||
102 | continue; \ | ||
103 | } \ | ||
104 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
105 | - &env->vfp.fp_status[FPST_STD]; \ | ||
106 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
107 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
108 | /* We need the result but without updating flags */ \ | ||
109 | scratch_fpst = *fpst; \ | ||
110 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) | ||
111 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
112 | continue; \ | ||
113 | } \ | ||
114 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
115 | - &env->vfp.fp_status[FPST_STD]; \ | ||
116 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
117 | if (!(mask & 1)) { \ | ||
118 | /* We need the result but without updating flags */ \ | ||
119 | scratch_fpst = *fpst; \ | ||
120 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) | ||
121 | unsigned e; \ | ||
122 | float_status *fpst; \ | ||
123 | float_status scratch_fpst; \ | ||
124 | - float_status *base_fpst = (ESIZE == 2) ? \ | ||
125 | - &env->vfp.fp_status[FPST_STD_F16] : \ | ||
126 | - &env->vfp.fp_status[FPST_STD]; \ | ||
127 | + float_status *base_fpst = \ | ||
128 | + &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
129 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ | ||
130 | set_float_rounding_mode(rmode, base_fpst); \ | ||
131 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
132 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) | ||
133 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
134 | continue; \ | ||
135 | } \ | ||
136 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
137 | - &env->vfp.fp_status[FPST_STD]; \ | ||
138 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
139 | if (!(mask & 1)) { \ | ||
140 | /* We need the result but without updating flags */ \ | ||
141 | scratch_fpst = *fpst; \ | ||
32 | -- | 142 | -- |
33 | 2.34.1 | 143 | 2.34.1 |
34 | 144 | ||
35 | 145 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | hw/arm/exynos4210.c calls tswap32() which is declared | 3 | Pass ARMFPStatusFlavour index instead of fp_status[FOO]. |
4 | in "exec/tswap.h". Include it in order to avoid when | ||
5 | refactoring unrelated headers: | ||
6 | 4 | ||
7 | hw/arm/exynos4210.c:499:22: error: call to undeclared function 'tswap32'; | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
9 | smpboot[n] = tswap32(smpboot[n]); | 7 | Message-id: 20250129013857.135256-17-richard.henderson@linaro.org |
10 | ^ | ||
11 | |||
12 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | Message-id: 20240118200643.29037-2-philmd@linaro.org | ||
15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
16 | --- | 9 | --- |
17 | hw/arm/exynos4210.c | 1 + | 10 | target/arm/vfp_helper.c | 10 +++++----- |
18 | 1 file changed, 1 insertion(+) | 11 | 1 file changed, 5 insertions(+), 5 deletions(-) |
19 | 12 | ||
20 | diff --git a/hw/arm/exynos4210.c b/hw/arm/exynos4210.c | 13 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
21 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/hw/arm/exynos4210.c | 15 | --- a/target/arm/vfp_helper.c |
23 | +++ b/hw/arm/exynos4210.c | 16 | +++ b/target/arm/vfp_helper.c |
24 | @@ -XXX,XX +XXX,XX @@ | 17 | @@ -XXX,XX +XXX,XX @@ static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp) |
25 | 18 | void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ | |
26 | #include "qemu/osdep.h" | 19 | { \ |
27 | #include "qapi/error.h" | 20 | softfloat_to_vfp_compare(env, \ |
28 | +#include "exec/tswap.h" | 21 | - FLOATTYPE ## _compare_quiet(a, b, &env->vfp.FPST)); \ |
29 | #include "cpu.h" | 22 | + FLOATTYPE ## _compare_quiet(a, b, &env->vfp.fp_status[FPST])); \ |
30 | #include "hw/cpu/a9mpcore.h" | 23 | } \ |
31 | #include "hw/irq.h" | 24 | void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ |
25 | { \ | ||
26 | softfloat_to_vfp_compare(env, \ | ||
27 | - FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ | ||
28 | + FLOATTYPE ## _compare(a, b, &env->vfp.fp_status[FPST])); \ | ||
29 | } | ||
30 | -DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) | ||
31 | -DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32]) | ||
32 | -DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32]) | ||
33 | +DO_VFP_cmp(h, float16, dh_ctype_f16, FPST_A32_F16) | ||
34 | +DO_VFP_cmp(s, float32, float32, FPST_A32) | ||
35 | +DO_VFP_cmp(d, float64, float64, FPST_A32) | ||
36 | #undef DO_VFP_cmp | ||
37 | |||
38 | /* Integer to float and float to integer conversions */ | ||
32 | -- | 39 | -- |
33 | 2.34.1 | 40 | 2.34.1 |
34 | 41 | ||
35 | 42 | diff view generated by jsdifflib |
1 | From: Guenter Roeck <linux@roeck-us.net> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Add watchdog timer support to Allwinner-H40 and Bananapi. | 3 | Read the bit from the source, rather than from the proxy via |
4 | The watchdog timer is added as an overlay to the Timer | 4 | get_flush_inputs_to_zero. This makes it clear that it does |
5 | module memory map. | 5 | not matter which of the float_status structures is used. |
6 | 6 | ||
7 | Signed-off-by: Guenter Roeck <linux@roeck-us.net> | 7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | Reviewed-by: Strahinja Jankovic <strahinja.p.jankovic@gmail.com> | 8 | Message-id: 20250129013857.135256-34-richard.henderson@linaro.org |
9 | Message-id: 20240115182757.1095012-4-linux@roeck-us.net | 9 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
11 | --- | 11 | --- |
12 | docs/system/arm/bananapi_m2u.rst | 2 +- | 12 | target/arm/tcg/vec_helper.c | 12 ++++++------ |
13 | include/hw/arm/allwinner-r40.h | 3 +++ | 13 | 1 file changed, 6 insertions(+), 6 deletions(-) |
14 | hw/arm/allwinner-r40.c | 8 ++++++++ | ||
15 | hw/arm/Kconfig | 1 + | ||
16 | 4 files changed, 13 insertions(+), 1 deletion(-) | ||
17 | 14 | ||
18 | diff --git a/docs/system/arm/bananapi_m2u.rst b/docs/system/arm/bananapi_m2u.rst | 15 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
19 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/docs/system/arm/bananapi_m2u.rst | 17 | --- a/target/arm/tcg/vec_helper.c |
21 | +++ b/docs/system/arm/bananapi_m2u.rst | 18 | +++ b/target/arm/tcg/vec_helper.c |
22 | @@ -XXX,XX +XXX,XX @@ The Banana Pi M2U machine supports the following devices: | 19 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, |
23 | * SATA | 20 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; |
24 | * TWI (I2C) | 21 | |
25 | * USB 2.0 | 22 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, |
26 | + * Hardware Watchdog | 23 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); |
27 | 24 | + env->vfp.fpcr & FPCR_FZ16); | |
28 | Limitations | 25 | } |
29 | """"""""""" | 26 | |
30 | @@ -XXX,XX +XXX,XX @@ Currently, Banana Pi M2U does *not* support the following features: | 27 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
31 | 28 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | |
32 | - Graphical output via HDMI, GPU and/or the Display Engine | 29 | } |
33 | - Audio output | 30 | } |
34 | -- Hardware Watchdog | 31 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, |
35 | - Real Time Clock | 32 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); |
36 | 33 | + env->vfp.fpcr & FPCR_FZ16); | |
37 | Also see the 'unimplemented' array in the Allwinner R40 SoC module | 34 | } |
38 | diff --git a/include/hw/arm/allwinner-r40.h b/include/hw/arm/allwinner-r40.h | 35 | |
39 | index XXXXXXX..XXXXXXX 100644 | 36 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
40 | --- a/include/hw/arm/allwinner-r40.h | 37 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
41 | +++ b/include/hw/arm/allwinner-r40.h | 38 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
42 | @@ -XXX,XX +XXX,XX @@ | 39 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
43 | #include "hw/net/allwinner-sun8i-emac.h" | 40 | float_status *status = &env->vfp.fp_status[FPST_A64]; |
44 | #include "hw/usb/hcd-ohci.h" | 41 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); |
45 | #include "hw/usb/hcd-ehci.h" | 42 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; |
46 | +#include "hw/watchdog/allwinner-wdt.h" | 43 | int negx = 0, negf = 0; |
47 | #include "target/arm/cpu.h" | 44 | |
48 | #include "sysemu/block-backend.h" | 45 | if (is_s) { |
49 | 46 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | |
50 | @@ -XXX,XX +XXX,XX @@ enum { | 47 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; |
51 | AW_R40_DEV_OHCI2, | 48 | |
52 | AW_R40_DEV_CCU, | 49 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, |
53 | AW_R40_DEV_PIT, | 50 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); |
54 | + AW_R40_DEV_WDT, | 51 | + env->vfp.fpcr & FPCR_FZ16); |
55 | AW_R40_DEV_UART0, | 52 | } |
56 | AW_R40_DEV_UART1, | 53 | |
57 | AW_R40_DEV_UART2, | 54 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
58 | @@ -XXX,XX +XXX,XX @@ struct AwR40State { | 55 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
59 | const hwaddr *memmap; | 56 | } |
60 | AwSRAMCState sramc; | 57 | } |
61 | AwA10PITState timer; | 58 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, |
62 | + AwWdtState wdt; | 59 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); |
63 | AllwinnerAHCIState sata; | 60 | + env->vfp.fpcr & FPCR_FZ16); |
64 | AwSdHostState mmc[AW_R40_NUM_MMCS]; | 61 | } |
65 | EHCISysBusState ehci[AW_R40_NUM_USB]; | 62 | |
66 | diff --git a/hw/arm/allwinner-r40.c b/hw/arm/allwinner-r40.c | 63 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, |
67 | index XXXXXXX..XXXXXXX 100644 | 64 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, |
68 | --- a/hw/arm/allwinner-r40.c | 65 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
69 | +++ b/hw/arm/allwinner-r40.c | 66 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); |
70 | @@ -XXX,XX +XXX,XX @@ const hwaddr allwinner_r40_memmap[] = { | 67 | float_status *status = &env->vfp.fp_status[FPST_A64]; |
71 | [AW_R40_DEV_OHCI2] = 0x01c1c400, | 68 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); |
72 | [AW_R40_DEV_CCU] = 0x01c20000, | 69 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; |
73 | [AW_R40_DEV_PIT] = 0x01c20c00, | 70 | int negx = 0, negf = 0; |
74 | + [AW_R40_DEV_WDT] = 0x01c20c90, | 71 | |
75 | [AW_R40_DEV_UART0] = 0x01c28000, | 72 | if (is_s) { |
76 | [AW_R40_DEV_UART1] = 0x01c28400, | ||
77 | [AW_R40_DEV_UART2] = 0x01c28800, | ||
78 | @@ -XXX,XX +XXX,XX @@ static void allwinner_r40_init(Object *obj) | ||
79 | object_property_add_alias(obj, "clk1-freq", OBJECT(&s->timer), | ||
80 | "clk1-freq"); | ||
81 | |||
82 | + object_initialize_child(obj, "wdt", &s->wdt, TYPE_AW_WDT_SUN4I); | ||
83 | + | ||
84 | object_initialize_child(obj, "ccu", &s->ccu, TYPE_AW_R40_CCU); | ||
85 | |||
86 | for (int i = 0; i < AW_R40_NUM_MMCS; i++) { | ||
87 | @@ -XXX,XX +XXX,XX @@ static void allwinner_r40_realize(DeviceState *dev, Error **errp) | ||
88 | sysbus_connect_irq(SYS_BUS_DEVICE(&s->emac), 0, | ||
89 | qdev_get_gpio_in(DEVICE(&s->gic), AW_R40_GIC_SPI_EMAC)); | ||
90 | |||
91 | + /* WDT */ | ||
92 | + sysbus_realize(SYS_BUS_DEVICE(&s->wdt), &error_fatal); | ||
93 | + sysbus_mmio_map_overlap(SYS_BUS_DEVICE(&s->wdt), 0, | ||
94 | + allwinner_r40_memmap[AW_R40_DEV_WDT], 1); | ||
95 | + | ||
96 | /* Unimplemented devices */ | ||
97 | for (unsigned i = 0; i < ARRAY_SIZE(r40_unimplemented); i++) { | ||
98 | create_unimplemented_device(r40_unimplemented[i].device_name, | ||
99 | diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig | ||
100 | index XXXXXXX..XXXXXXX 100644 | ||
101 | --- a/hw/arm/Kconfig | ||
102 | +++ b/hw/arm/Kconfig | ||
103 | @@ -XXX,XX +XXX,XX @@ config ALLWINNER_R40 | ||
104 | select AHCI | ||
105 | select ALLWINNER_SRAMC | ||
106 | select ALLWINNER_A10_PIT | ||
107 | + select ALLWINNER_WDT | ||
108 | select AXP2XX_PMU | ||
109 | select SERIAL | ||
110 | select ARM_TIMER | ||
111 | -- | 73 | -- |
112 | 2.34.1 | 74 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | The TUSB6010 USB controller is soldered on the N800 and N810 | 3 | Sink common code from the callers into do_fmlal |
4 | tablets, thus is always present. | 4 | and do_fmlal_idx. Reorder the arguments to minimize |
5 | the re-sorting from the caller's arguments. | ||
5 | 6 | ||
6 | This is a migration compatibility break for the n800/n810 | 7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | machines started with the '-usb none' option. | 8 | Message-id: 20250129013857.135256-35-richard.henderson@linaro.org |
8 | |||
9 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
10 | Message-id: 20240119215106.45776-3-philmd@linaro.org | ||
11 | [PMM: fixed commit message typo] | ||
12 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 9 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
14 | --- | 11 | --- |
15 | hw/arm/nseries.c | 4 +--- | 12 | target/arm/tcg/vec_helper.c | 28 ++++++++++++++++------------ |
16 | 1 file changed, 1 insertion(+), 3 deletions(-) | 13 | 1 file changed, 16 insertions(+), 12 deletions(-) |
17 | 14 | ||
18 | diff --git a/hw/arm/nseries.c b/hw/arm/nseries.c | 15 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
19 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/hw/arm/nseries.c | 17 | --- a/target/arm/tcg/vec_helper.c |
21 | +++ b/hw/arm/nseries.c | 18 | +++ b/target/arm/tcg/vec_helper.c |
22 | @@ -XXX,XX +XXX,XX @@ static void n8x0_init(MachineState *machine, | 19 | @@ -XXX,XX +XXX,XX @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2) |
23 | n8x0_spi_setup(s); | 20 | * as there is not yet SVE versions that might use blocking. |
24 | n8x0_dss_setup(s); | 21 | */ |
25 | n8x0_cbus_setup(s); | 22 | |
26 | - if (machine_usb(machine)) { | 23 | -static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, |
27 | - n8x0_usb_setup(s); | 24 | - uint64_t negx, int negf, uint32_t desc, bool fz16) |
28 | - } | 25 | +static void do_fmlal(float32 *d, void *vn, void *vm, |
29 | + n8x0_usb_setup(s); | 26 | + CPUARMState *env, uint32_t desc, |
30 | 27 | + ARMFPStatusFlavour fpst_idx, | |
31 | if (machine->kernel_filename) { | 28 | + uint64_t negx, int negf) |
32 | /* Or at the linux loader. */ | 29 | { |
30 | + float_status *fpst = &env->vfp.fp_status[fpst_idx]; | ||
31 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; | ||
32 | intptr_t i, oprsz = simd_oprsz(desc); | ||
33 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
34 | int is_q = oprsz == 16; | ||
35 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
36 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
37 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
38 | |||
39 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
40 | - env->vfp.fpcr & FPCR_FZ16); | ||
41 | + do_fmlal(vd, vn, vm, env, desc, FPST_STD, negx, 0); | ||
42 | } | ||
43 | |||
44 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
46 | negx = 0x8000800080008000ull; | ||
47 | } | ||
48 | } | ||
49 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
50 | - env->vfp.fpcr & FPCR_FZ16); | ||
51 | + do_fmlal(vd, vn, vm, env, desc, FPST_A64, negx, negf); | ||
52 | } | ||
53 | |||
54 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
55 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
56 | } | ||
57 | } | ||
58 | |||
59 | -static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, | ||
60 | - uint64_t negx, int negf, uint32_t desc, bool fz16) | ||
61 | +static void do_fmlal_idx(float32 *d, void *vn, void *vm, | ||
62 | + CPUARMState *env, uint32_t desc, | ||
63 | + ARMFPStatusFlavour fpst_idx, | ||
64 | + uint64_t negx, int negf) | ||
65 | { | ||
66 | + float_status *fpst = &env->vfp.fp_status[fpst_idx]; | ||
67 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; | ||
68 | intptr_t i, oprsz = simd_oprsz(desc); | ||
69 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
70 | int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3); | ||
71 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | ||
72 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
73 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
74 | |||
75 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
76 | - env->vfp.fpcr & FPCR_FZ16); | ||
77 | + do_fmlal_idx(vd, vn, vm, env, desc, FPST_STD, negx, 0); | ||
78 | } | ||
79 | |||
80 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
81 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
82 | negx = 0x8000800080008000ull; | ||
83 | } | ||
84 | } | ||
85 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
86 | - env->vfp.fpcr & FPCR_FZ16); | ||
87 | + do_fmlal_idx(vd, vn, vm, env, desc, FPST_A64, negx, negf); | ||
88 | } | ||
89 | |||
90 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
33 | -- | 91 | -- |
34 | 2.34.1 | 92 | 2.34.1 |
35 | |||
36 | diff view generated by jsdifflib |