1 | Hi; this pullreq contains only my FEAT_AFP/FEAT_RPRES patches | 1 | v2: dropped PMCCNTR patch |
---|---|---|---|
2 | (plus a fix for a target/alpha latent bug that would otherwise | ||
3 | be revealed by the fpu changes), because 68 patches is already | ||
4 | longer than I prefer to send in at one time... | ||
5 | 2 | ||
6 | thanks | 3 | The following changes since commit 0f397dcfecc9211d12c2c720c01eb32f0eaa7d23: |
7 | -- PMM | ||
8 | 4 | ||
9 | The following changes since commit ffaf7f0376f8040ce9068d71ae9ae8722505c42e: | 5 | Merge tag 'pull-nbd-2024-08-08' of https://repo.or.cz/qemu/ericb into staging (2024-08-09 08:40:37 +1000) |
10 | |||
11 | Merge tag 'pull-10.0-testing-and-gdstub-updates-100225-1' of https://gitlab.com/stsquad/qemu into staging (2025-02-10 13:26:17 -0500) | ||
12 | 6 | ||
13 | are available in the Git repository at: | 7 | are available in the Git repository at: |
14 | 8 | ||
15 | https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20250211 | 9 | https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20240812 |
16 | 10 | ||
17 | for you to fetch changes up to ca4c34e07d1388df8e396520b5e7d60883cd3690: | 11 | for you to fetch changes up to ed5031ad5d4c4c3b6eee6ab21aa95ccfc9dffdd4: |
18 | 12 | ||
19 | target/arm: Sink fp_status and fpcr access into do_fmlal* (2025-02-11 16:22:08 +0000) | 13 | arm/virt: place power button pin number on a define (2024-08-12 11:40:16 +0100) |
20 | 14 | ||
21 | ---------------------------------------------------------------- | 15 | ---------------------------------------------------------------- |
22 | target-arm queue: | 16 | * Fix BTI versus CF_PCREL |
23 | * target/alpha: Don't corrupt error_code with unknown softfloat flags | 17 | * include: Fix typo in name of MAKE_IDENTFIER macro |
24 | * target/arm: Implement FEAT_AFP and FEAT_RPRES | 18 | * docs: Various txt-to-rST conversions |
19 | * hw/core/ptimer: fix timer zero period condition for freq > 1GHz | ||
20 | * arm/virt: place power button pin number on a define | ||
25 | 21 | ||
26 | ---------------------------------------------------------------- | 22 | ---------------------------------------------------------------- |
27 | Peter Maydell (49): | 23 | Eric Blake (1): |
28 | target/alpha: Don't corrupt error_code with unknown softfloat flags | 24 | docs: Typo fix in live disk backup |
29 | fpu: Add float_class_denormal | ||
30 | fpu: Implement float_flag_input_denormal_used | ||
31 | fpu: allow flushing of output denormals to be after rounding | ||
32 | target/arm: Define FPCR AH, FIZ, NEP bits | ||
33 | target/arm: Implement FPCR.FIZ handling | ||
34 | target/arm: Adjust FP behaviour for FPCR.AH = 1 | ||
35 | target/arm: Adjust exception flag handling for AH = 1 | ||
36 | target/arm: Add FPCR.AH to tbflags | ||
37 | target/arm: Set up float_status to use for FPCR.AH=1 behaviour | ||
38 | target/arm: Use FPST_FPCR_AH for FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS | ||
39 | target/arm: Use FPST_FPCR_AH for BFCVT* insns | ||
40 | target/arm: Use FPST_FPCR_AH for BFMLAL*, BFMLSL* insns | ||
41 | target/arm: Add FPCR.NEP to TBFLAGS | ||
42 | target/arm: Define and use new write_fp_*reg_merging() functions | ||
43 | target/arm: Handle FPCR.NEP for 3-input scalar operations | ||
44 | target/arm: Handle FPCR.NEP for BFCVT scalar | ||
45 | target/arm: Handle FPCR.NEP for 1-input scalar operations | ||
46 | target/arm: Handle FPCR.NEP in do_cvtf_scalar() | ||
47 | target/arm: Handle FPCR.NEP for scalar FABS and FNEG | ||
48 | target/arm: Handle FPCR.NEP for FCVTXN (scalar) | ||
49 | target/arm: Handle FPCR.NEP for NEP for FMUL, FMULX scalar by element | ||
50 | target/arm: Implement FPCR.AH semantics for scalar FMIN/FMAX | ||
51 | target/arm: Implement FPCR.AH semantics for vector FMIN/FMAX | ||
52 | target/arm: Implement FPCR.AH semantics for FMAXV and FMINV | ||
53 | target/arm: Implement FPCR.AH semantics for FMINP and FMAXP | ||
54 | target/arm: Implement FPCR.AH semantics for SVE FMAXV and FMINV | ||
55 | target/arm: Implement FPCR.AH semantics for SVE FMIN/FMAX immediate | ||
56 | target/arm: Implement FPCR.AH semantics for SVE FMIN/FMAX vector | ||
57 | target/arm: Implement FPCR.AH handling of negation of NaN | ||
58 | target/arm: Implement FPCR.AH handling for scalar FABS and FABD | ||
59 | target/arm: Handle FPCR.AH in vector FABD | ||
60 | target/arm: Handle FPCR.AH in SVE FNEG | ||
61 | target/arm: Handle FPCR.AH in SVE FABS | ||
62 | target/arm: Handle FPCR.AH in SVE FABD | ||
63 | target/arm: Handle FPCR.AH in negation steps in SVE FCADD | ||
64 | target/arm: Handle FPCR.AH in negation steps in FCADD | ||
65 | target/arm: Handle FPCR.AH in FRECPS and FRSQRTS scalar insns | ||
66 | target/arm: Handle FPCR.AH in FRECPS and FRSQRTS vector insns | ||
67 | target/arm: Handle FPCR.AH in negation step in FMLS (indexed) | ||
68 | target/arm: Handle FPCR.AH in negation in FMLS (vector) | ||
69 | target/arm: Handle FPCR.AH in negation step in SVE FMLS (vector) | ||
70 | target/arm: Handle FPCR.AH in SVE FTSSEL | ||
71 | target/arm: Handle FPCR.AH in SVE FTMAD | ||
72 | target/arm: Enable FEAT_AFP for '-cpu max' | ||
73 | target/arm: Plumb FEAT_RPRES frecpe and frsqrte through to new helper | ||
74 | target/arm: Implement increased precision FRECPE | ||
75 | target/arm: Implement increased precision FRSQRTE | ||
76 | target/arm: Enable FEAT_RPRES for -cpu max | ||
77 | 25 | ||
78 | Richard Henderson (19): | 26 | Jianzhou Yue (1): |
79 | target/arm: Handle FPCR.AH in vector FCMLA | 27 | hw/core/ptimer: fix timer zero period condition for freq > 1GHz |
80 | target/arm: Handle FPCR.AH in FCMLA by index | ||
81 | target/arm: Handle FPCR.AH in SVE FCMLA | ||
82 | target/arm: Handle FPCR.AH in FMLSL (by element and vector) | ||
83 | target/arm: Handle FPCR.AH in SVE FMLSL (indexed) | ||
84 | target/arm: Handle FPCR.AH in SVE FMLSLB, FMLSLT (vectors) | ||
85 | target/arm: Introduce CPUARMState.vfp.fp_status[] | ||
86 | target/arm: Remove standard_fp_status_f16 | ||
87 | target/arm: Remove standard_fp_status | ||
88 | target/arm: Remove ah_fp_status_f16 | ||
89 | target/arm: Remove ah_fp_status | ||
90 | target/arm: Remove fp_status_f16_a64 | ||
91 | target/arm: Remove fp_status_f16_a32 | ||
92 | target/arm: Remove fp_status_a64 | ||
93 | target/arm: Remove fp_status_a32 | ||
94 | target/arm: Simplify fp_status indexing in mve_helper.c | ||
95 | target/arm: Simplify DO_VFP_cmp in vfp_helper.c | ||
96 | target/arm: Read fz16 from env->vfp.fpcr | ||
97 | target/arm: Sink fp_status and fpcr access into do_fmlal* | ||
98 | 28 | ||
99 | docs/system/arm/emulation.rst | 2 + | 29 | Mauro Carvalho Chehab (1): |
100 | include/fpu/softfloat-helpers.h | 11 + | 30 | arm/virt: place power button pin number on a define |
101 | include/fpu/softfloat-types.h | 25 ++ | 31 | |
102 | target/arm/cpu-features.h | 10 + | 32 | Peter Maydell (6): |
103 | target/arm/cpu.h | 97 +++-- | 33 | include: Fix typo in name of MAKE_IDENTFIER macro |
104 | target/arm/helper.h | 26 ++ | 34 | docs/specs/rocker.txt: Convert to rST |
105 | target/arm/internals.h | 6 + | 35 | docs/interop/nbd.txt: Convert to rST |
106 | target/arm/tcg/helper-a64.h | 13 + | 36 | docs/interop/parallels.txt: Convert to rST |
107 | target/arm/tcg/helper-sve.h | 120 ++++++ | 37 | docs/interop/prl-xml.txt: Convert to rST |
108 | target/arm/tcg/translate-a64.h | 13 + | 38 | docs/interop/prl-xml.rst: Fix minor grammar nits |
109 | target/arm/tcg/translate.h | 54 +-- | 39 | |
110 | target/arm/tcg/vec_internal.h | 35 ++ | 40 | Richard Henderson (1): |
111 | target/mips/fpu_helper.h | 6 + | 41 | target/arm: Fix BTI versus CF_PCREL |
112 | fpu/softfloat.c | 66 +++- | 42 | |
113 | target/alpha/cpu.c | 7 + | 43 | MAINTAINERS | 7 +- |
114 | target/alpha/fpu_helper.c | 2 + | 44 | docs/interop/index.rst | 3 + |
115 | target/arm/cpu.c | 46 +-- | 45 | docs/interop/live-block-operations.rst | 4 +- |
116 | target/arm/helper.c | 2 +- | 46 | docs/interop/nbd.rst | 89 ++++++++++++ |
117 | target/arm/tcg/cpu64.c | 2 + | 47 | docs/interop/nbd.txt | 72 ---------- |
118 | target/arm/tcg/helper-a64.c | 151 ++++---- | 48 | docs/interop/{parallels.txt => parallels.rst} | 108 ++++++++------- |
119 | target/arm/tcg/hflags.c | 13 + | 49 | docs/interop/prl-xml.rst | 192 ++++++++++++++++++++++++++ |
120 | target/arm/tcg/mve_helper.c | 44 +-- | 50 | docs/interop/prl-xml.txt | 158 --------------------- |
121 | target/arm/tcg/sme_helper.c | 4 +- | 51 | docs/specs/index.rst | 1 + |
122 | target/arm/tcg/sve_helper.c | 367 ++++++++++++++----- | 52 | docs/specs/{rocker.txt => rocker.rst} | 181 ++++++++++++------------ |
123 | target/arm/tcg/translate-a64.c | 782 ++++++++++++++++++++++++++++++++-------- | 53 | include/hw/arm/virt.h | 3 + |
124 | target/arm/tcg/translate-sve.c | 193 +++++++--- | 54 | include/qapi/qmp/qobject.h | 2 +- |
125 | target/arm/tcg/vec_helper.c | 387 ++++++++++++++------ | 55 | include/qemu/atomic.h | 2 +- |
126 | target/arm/vfp_helper.c | 374 +++++++++++++++---- | 56 | include/qemu/compiler.h | 2 +- |
127 | target/hppa/fpu_helper.c | 11 + | 57 | include/qemu/osdep.h | 6 +- |
128 | target/i386/tcg/fpu_helper.c | 8 + | 58 | target/arm/tcg/helper-a64.h | 3 + |
129 | target/mips/msa.c | 9 + | 59 | target/arm/tcg/translate.h | 2 - |
130 | target/ppc/cpu_init.c | 3 + | 60 | hw/arm/virt-acpi-build.c | 6 +- |
131 | target/rx/cpu.c | 8 + | 61 | hw/arm/virt.c | 7 +- |
132 | target/sh4/cpu.c | 8 + | 62 | hw/core/ptimer.c | 4 +- |
133 | target/tricore/helper.c | 1 + | 63 | target/arm/tcg/helper-a64.c | 39 ++++++ |
134 | tests/fp/fp-bench.c | 1 + | 64 | target/arm/tcg/translate-a64.c | 64 ++------- |
135 | fpu/softfloat-parts.c.inc | 127 +++++-- | 65 | tests/unit/ptimer-test.c | 33 +++++ |
136 | 37 files changed, 2325 insertions(+), 709 deletions(-) | 66 | 23 files changed, 547 insertions(+), 441 deletions(-) |
67 | create mode 100644 docs/interop/nbd.rst | ||
68 | delete mode 100644 docs/interop/nbd.txt | ||
69 | rename docs/interop/{parallels.txt => parallels.rst} (72%) | ||
70 | create mode 100644 docs/interop/prl-xml.rst | ||
71 | delete mode 100644 docs/interop/prl-xml.txt | ||
72 | rename docs/specs/{rocker.txt => rocker.rst} (91%) | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | In do_cvttq() we set env->error_code with what is supposed to be a | ||
2 | set of FPCR exception bit values. However, if the set of float | ||
3 | exception flags we get back from softfloat for the conversion | ||
4 | includes a flag which is not one of the three we expect here | ||
5 | (invalid_cvti, invalid, inexact) then we will fall through the | ||
6 | if-ladder and set env->error_code to the unconverted softfloat | ||
7 | exception_flag value. This will then cause us to take a spurious | ||
8 | exception. | ||
9 | 1 | ||
10 | This is harmless now, but when we add new floating point exception | ||
11 | flags to softfloat it will cause problems. Add an else clause to the | ||
12 | if-ladder to make it ignore any float exception flags it doesn't care | ||
13 | about. | ||
14 | |||
15 | Specifically, without this fix, 'make check-tcg' will fail for Alpha | ||
16 | when the commit adding float_flag_input_denormal_used lands. | ||
17 | |||
18 | |||
19 | Fixes: aa3bad5b59e7 ("target/alpha: Use float64_to_int64_modulo for CVTTQ") | ||
20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
21 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
22 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
23 | --- | ||
24 | target/alpha/fpu_helper.c | 2 ++ | ||
25 | 1 file changed, 2 insertions(+) | ||
26 | |||
27 | diff --git a/target/alpha/fpu_helper.c b/target/alpha/fpu_helper.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/alpha/fpu_helper.c | ||
30 | +++ b/target/alpha/fpu_helper.c | ||
31 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_cvttq(CPUAlphaState *env, uint64_t a, int roundmode) | ||
32 | exc = FPCR_INV; | ||
33 | } else if (exc & float_flag_inexact) { | ||
34 | exc = FPCR_INE; | ||
35 | + } else { | ||
36 | + exc = 0; | ||
37 | } | ||
38 | } | ||
39 | env->error_code = exc; | ||
40 | -- | ||
41 | 2.34.1 | ||
42 | |||
43 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Currently in softfloat we canonicalize input denormals and so the | ||
2 | code that implements floating point operations does not need to care | ||
3 | whether the input value was originally normal or denormal. However, | ||
4 | both x86 and Arm FEAT_AFP require that an exception flag is set if: | ||
5 | * an input is denormal | ||
6 | * that input is not squashed to zero | ||
7 | * that input is actually used in the calculation (e.g. we | ||
8 | did not find the other input was a NaN) | ||
9 | 1 | ||
10 | So we need to track that the input was a non-squashed denormal. To | ||
11 | do this we add a new value to the FloatClass enum. In this commit we | ||
12 | add the value and adjust the code everywhere that looks at FloatClass | ||
13 | values so that the new float_class_denormal behaves identically to | ||
14 | float_class_normal. We will add the code that does the "raise a new | ||
15 | float exception flag if an input was an unsquashed denormal and we | ||
16 | used it" in a subsequent commit. | ||
17 | |||
18 | There should be no behavioural change in this commit. | ||
19 | |||
20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
21 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
22 | --- | ||
23 | fpu/softfloat.c | 32 ++++++++++++++++++++++++++++--- | ||
24 | fpu/softfloat-parts.c.inc | 40 ++++++++++++++++++++++++--------------- | ||
25 | 2 files changed, 54 insertions(+), 18 deletions(-) | ||
26 | |||
27 | diff --git a/fpu/softfloat.c b/fpu/softfloat.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/fpu/softfloat.c | ||
30 | +++ b/fpu/softfloat.c | ||
31 | @@ -XXX,XX +XXX,XX @@ float64_gen2(float64 xa, float64 xb, float_status *s, | ||
32 | /* | ||
33 | * Classify a floating point number. Everything above float_class_qnan | ||
34 | * is a NaN so cls >= float_class_qnan is any NaN. | ||
35 | + * | ||
36 | + * Note that we canonicalize denormals, so most code should treat | ||
37 | + * class_normal and class_denormal identically. | ||
38 | */ | ||
39 | |||
40 | typedef enum __attribute__ ((__packed__)) { | ||
41 | float_class_unclassified, | ||
42 | float_class_zero, | ||
43 | float_class_normal, | ||
44 | + float_class_denormal, /* input was a non-squashed denormal */ | ||
45 | float_class_inf, | ||
46 | float_class_qnan, /* all NaNs from here */ | ||
47 | float_class_snan, | ||
48 | @@ -XXX,XX +XXX,XX @@ typedef enum __attribute__ ((__packed__)) { | ||
49 | enum { | ||
50 | float_cmask_zero = float_cmask(float_class_zero), | ||
51 | float_cmask_normal = float_cmask(float_class_normal), | ||
52 | + float_cmask_denormal = float_cmask(float_class_denormal), | ||
53 | float_cmask_inf = float_cmask(float_class_inf), | ||
54 | float_cmask_qnan = float_cmask(float_class_qnan), | ||
55 | float_cmask_snan = float_cmask(float_class_snan), | ||
56 | |||
57 | float_cmask_infzero = float_cmask_zero | float_cmask_inf, | ||
58 | float_cmask_anynan = float_cmask_qnan | float_cmask_snan, | ||
59 | + float_cmask_anynorm = float_cmask_normal | float_cmask_denormal, | ||
60 | }; | ||
61 | |||
62 | /* Flags for parts_minmax. */ | ||
63 | @@ -XXX,XX +XXX,XX @@ static inline __attribute__((unused)) bool is_qnan(FloatClass c) | ||
64 | return c == float_class_qnan; | ||
65 | } | ||
66 | |||
67 | +/* | ||
68 | + * Return true if the float_cmask has only normals in it | ||
69 | + * (including input denormals that were canonicalized) | ||
70 | + */ | ||
71 | +static inline bool cmask_is_only_normals(int cmask) | ||
72 | +{ | ||
73 | + return !(cmask & ~float_cmask_anynorm); | ||
74 | +} | ||
75 | + | ||
76 | +static inline bool is_anynorm(FloatClass c) | ||
77 | +{ | ||
78 | + return float_cmask(c) & float_cmask_anynorm; | ||
79 | +} | ||
80 | + | ||
81 | /* | ||
82 | * Structure holding all of the decomposed parts of a float. | ||
83 | * The exponent is unbiased and the fraction is normalized. | ||
84 | @@ -XXX,XX +XXX,XX @@ static float64 float64r32_round_pack_canonical(FloatParts64 *p, | ||
85 | */ | ||
86 | switch (p->cls) { | ||
87 | case float_class_normal: | ||
88 | + case float_class_denormal: | ||
89 | if (unlikely(p->exp == 0)) { | ||
90 | /* | ||
91 | * The result is denormal for float32, but can be represented | ||
92 | @@ -XXX,XX +XXX,XX @@ static floatx80 floatx80_round_pack_canonical(FloatParts128 *p, | ||
93 | |||
94 | switch (p->cls) { | ||
95 | case float_class_normal: | ||
96 | + case float_class_denormal: | ||
97 | if (s->floatx80_rounding_precision == floatx80_precision_x) { | ||
98 | parts_uncanon_normal(p, s, fmt); | ||
99 | frac = p->frac_hi; | ||
100 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_ahp(FloatParts64 *a, float_status *s) | ||
101 | break; | ||
102 | |||
103 | case float_class_normal: | ||
104 | + case float_class_denormal: | ||
105 | case float_class_zero: | ||
106 | break; | ||
107 | |||
108 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b, | ||
109 | a->sign = b->sign; | ||
110 | a->exp = b->exp; | ||
111 | |||
112 | - if (a->cls == float_class_normal) { | ||
113 | + if (is_anynorm(a->cls)) { | ||
114 | frac_truncjam(a, b); | ||
115 | } else if (is_nan(a->cls)) { | ||
116 | /* Discard the low bits of the NaN. */ | ||
117 | @@ -XXX,XX +XXX,XX @@ static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode, | ||
118 | return int128_zero(); | ||
119 | |||
120 | case float_class_normal: | ||
121 | + case float_class_denormal: | ||
122 | if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) { | ||
123 | flags = float_flag_inexact; | ||
124 | } | ||
125 | @@ -XXX,XX +XXX,XX @@ static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode, | ||
126 | return int128_zero(); | ||
127 | |||
128 | case float_class_normal: | ||
129 | + case float_class_denormal: | ||
130 | if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) { | ||
131 | flags = float_flag_inexact; | ||
132 | if (p.cls == float_class_zero) { | ||
133 | @@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status) | ||
134 | float32_unpack_canonical(&xp, a, status); | ||
135 | if (unlikely(xp.cls != float_class_normal)) { | ||
136 | switch (xp.cls) { | ||
137 | + case float_class_denormal: | ||
138 | + break; | ||
139 | case float_class_snan: | ||
140 | case float_class_qnan: | ||
141 | parts_return_nan(&xp, status); | ||
142 | @@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status) | ||
143 | case float_class_zero: | ||
144 | return float32_one; | ||
145 | default: | ||
146 | - break; | ||
147 | + g_assert_not_reached(); | ||
148 | } | ||
149 | - g_assert_not_reached(); | ||
150 | } | ||
151 | |||
152 | float_raise(float_flag_inexact, status); | ||
153 | diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc | ||
154 | index XXXXXXX..XXXXXXX 100644 | ||
155 | --- a/fpu/softfloat-parts.c.inc | ||
156 | +++ b/fpu/softfloat-parts.c.inc | ||
157 | @@ -XXX,XX +XXX,XX @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status, | ||
158 | frac_clear(p); | ||
159 | } else { | ||
160 | int shift = frac_normalize(p); | ||
161 | - p->cls = float_class_normal; | ||
162 | + p->cls = float_class_denormal; | ||
163 | p->exp = fmt->frac_shift - fmt->exp_bias | ||
164 | - shift + !fmt->m68k_denormal; | ||
165 | } | ||
166 | @@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, | ||
167 | static void partsN(uncanon)(FloatPartsN *p, float_status *s, | ||
168 | const FloatFmt *fmt) | ||
169 | { | ||
170 | - if (likely(p->cls == float_class_normal)) { | ||
171 | + if (likely(is_anynorm(p->cls))) { | ||
172 | parts_uncanon_normal(p, s, fmt); | ||
173 | } else { | ||
174 | switch (p->cls) { | ||
175 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | ||
176 | |||
177 | if (a->sign != b_sign) { | ||
178 | /* Subtraction */ | ||
179 | - if (likely(ab_mask == float_cmask_normal)) { | ||
180 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
181 | if (parts_sub_normal(a, b)) { | ||
182 | return a; | ||
183 | } | ||
184 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | ||
185 | } | ||
186 | } else { | ||
187 | /* Addition */ | ||
188 | - if (likely(ab_mask == float_cmask_normal)) { | ||
189 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
190 | parts_add_normal(a, b); | ||
191 | return a; | ||
192 | } | ||
193 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | ||
194 | } | ||
195 | |||
196 | if (b->cls == float_class_zero) { | ||
197 | - g_assert(a->cls == float_class_normal); | ||
198 | + g_assert(is_anynorm(a->cls)); | ||
199 | return a; | ||
200 | } | ||
201 | |||
202 | g_assert(a->cls == float_class_zero); | ||
203 | - g_assert(b->cls == float_class_normal); | ||
204 | + g_assert(is_anynorm(b->cls)); | ||
205 | return_b: | ||
206 | b->sign = b_sign; | ||
207 | return b; | ||
208 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, | ||
209 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
210 | bool sign = a->sign ^ b->sign; | ||
211 | |||
212 | - if (likely(ab_mask == float_cmask_normal)) { | ||
213 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
214 | FloatPartsW tmp; | ||
215 | |||
216 | frac_mulw(&tmp, a, b); | ||
217 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, | ||
218 | a->sign ^= 1; | ||
219 | } | ||
220 | |||
221 | - if (unlikely(ab_mask != float_cmask_normal)) { | ||
222 | + if (unlikely(!cmask_is_only_normals(ab_mask))) { | ||
223 | if (unlikely(ab_mask == float_cmask_infzero)) { | ||
224 | float_raise(float_flag_invalid | float_flag_invalid_imz, s); | ||
225 | goto d_nan; | ||
226 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, | ||
227 | } | ||
228 | |||
229 | g_assert(ab_mask & float_cmask_zero); | ||
230 | - if (c->cls == float_class_normal) { | ||
231 | + if (is_anynorm(c->cls)) { | ||
232 | *a = *c; | ||
233 | goto return_normal; | ||
234 | } | ||
235 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, | ||
236 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
237 | bool sign = a->sign ^ b->sign; | ||
238 | |||
239 | - if (likely(ab_mask == float_cmask_normal)) { | ||
240 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
241 | a->sign = sign; | ||
242 | a->exp -= b->exp + frac_div(a, b); | ||
243 | return a; | ||
244 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, | ||
245 | { | ||
246 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
247 | |||
248 | - if (likely(ab_mask == float_cmask_normal)) { | ||
249 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
250 | frac_modrem(a, b, mod_quot); | ||
251 | return a; | ||
252 | } | ||
253 | @@ -XXX,XX +XXX,XX @@ static void partsN(sqrt)(FloatPartsN *a, float_status *status, | ||
254 | |||
255 | if (unlikely(a->cls != float_class_normal)) { | ||
256 | switch (a->cls) { | ||
257 | + case float_class_denormal: | ||
258 | + break; | ||
259 | case float_class_snan: | ||
260 | case float_class_qnan: | ||
261 | parts_return_nan(a, status); | ||
262 | @@ -XXX,XX +XXX,XX @@ static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode, | ||
263 | case float_class_inf: | ||
264 | break; | ||
265 | case float_class_normal: | ||
266 | + case float_class_denormal: | ||
267 | if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) { | ||
268 | float_raise(float_flag_inexact, s); | ||
269 | } | ||
270 | @@ -XXX,XX +XXX,XX @@ static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode, | ||
271 | return 0; | ||
272 | |||
273 | case float_class_normal: | ||
274 | + case float_class_denormal: | ||
275 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | ||
276 | if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { | ||
277 | flags = float_flag_inexact; | ||
278 | @@ -XXX,XX +XXX,XX @@ static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode, | ||
279 | return 0; | ||
280 | |||
281 | case float_class_normal: | ||
282 | + case float_class_denormal: | ||
283 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | ||
284 | if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { | ||
285 | flags = float_flag_inexact; | ||
286 | @@ -XXX,XX +XXX,XX @@ static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p, | ||
287 | return 0; | ||
288 | |||
289 | case float_class_normal: | ||
290 | + case float_class_denormal: | ||
291 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | ||
292 | if (parts_round_to_int_normal(p, rmode, 0, N - 2)) { | ||
293 | flags = float_flag_inexact; | ||
294 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
295 | a_exp = a->exp; | ||
296 | b_exp = b->exp; | ||
297 | |||
298 | - if (unlikely(ab_mask != float_cmask_normal)) { | ||
299 | + if (unlikely(!cmask_is_only_normals(ab_mask))) { | ||
300 | switch (a->cls) { | ||
301 | case float_class_normal: | ||
302 | + case float_class_denormal: | ||
303 | break; | ||
304 | case float_class_inf: | ||
305 | a_exp = INT16_MAX; | ||
306 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
307 | } | ||
308 | switch (b->cls) { | ||
309 | case float_class_normal: | ||
310 | + case float_class_denormal: | ||
311 | break; | ||
312 | case float_class_inf: | ||
313 | b_exp = INT16_MAX; | ||
314 | @@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, | ||
315 | { | ||
316 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
317 | |||
318 | - if (likely(ab_mask == float_cmask_normal)) { | ||
319 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
320 | FloatRelation cmp; | ||
321 | |||
322 | if (a->sign != b->sign) { | ||
323 | @@ -XXX,XX +XXX,XX @@ static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) | ||
324 | case float_class_inf: | ||
325 | break; | ||
326 | case float_class_normal: | ||
327 | + case float_class_denormal: | ||
328 | a->exp += MIN(MAX(n, -0x10000), 0x10000); | ||
329 | break; | ||
330 | default: | ||
331 | @@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) | ||
332 | |||
333 | if (unlikely(a->cls != float_class_normal)) { | ||
334 | switch (a->cls) { | ||
335 | + case float_class_denormal: | ||
336 | + break; | ||
337 | case float_class_snan: | ||
338 | case float_class_qnan: | ||
339 | parts_return_nan(a, s); | ||
340 | @@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) | ||
341 | } | ||
342 | return; | ||
343 | default: | ||
344 | - break; | ||
345 | + g_assert_not_reached(); | ||
346 | } | ||
347 | - g_assert_not_reached(); | ||
348 | } | ||
349 | if (unlikely(a->sign)) { | ||
350 | goto d_nan; | ||
351 | -- | ||
352 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | For the x86 and the Arm FEAT_AFP semantics, we need to be able to | ||
2 | tell the target code that the FPU operation has used an input | ||
3 | denormal. Implement this; when it happens we set the new | ||
4 | float_flag_denormal_input_used. | ||
5 | 1 | ||
6 | Note that we only set this when an input denormal is actually used by | ||
7 | the operation: if the operation results in Invalid Operation or | ||
8 | Divide By Zero or the result is a NaN because some other input was a | ||
9 | NaN then we never needed to look at the input denormal and do not set | ||
10 | denormal_input_used. | ||
11 | |||
12 | We mostly do not need to adjust the hardfloat codepaths to deal with | ||
13 | this flag, because almost all hardfloat operations are already gated | ||
14 | on the input not being a denormal, and will fall back to softfloat | ||
15 | for a denormal input. The only exception is the comparison | ||
16 | operations, where we need to add the check for input denormals, which | ||
17 | must now fall back to softfloat where they did not before. | ||
18 | |||
19 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
20 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
21 | --- | ||
22 | include/fpu/softfloat-types.h | 7 ++++ | ||
23 | fpu/softfloat.c | 38 +++++++++++++++++--- | ||
24 | fpu/softfloat-parts.c.inc | 68 ++++++++++++++++++++++++++++++++++- | ||
25 | 3 files changed, 107 insertions(+), 6 deletions(-) | ||
26 | |||
27 | diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/include/fpu/softfloat-types.h | ||
30 | +++ b/include/fpu/softfloat-types.h | ||
31 | @@ -XXX,XX +XXX,XX @@ enum { | ||
32 | float_flag_invalid_sqrt = 0x0800, /* sqrt(-x) */ | ||
33 | float_flag_invalid_cvti = 0x1000, /* non-nan to integer */ | ||
34 | float_flag_invalid_snan = 0x2000, /* any operand was snan */ | ||
35 | + /* | ||
36 | + * An input was denormal and we used it (without flushing it to zero). | ||
37 | + * Not set if we do not actually use the denormal input (e.g. | ||
38 | + * because some other input was a NaN, or because the operation | ||
39 | + * wasn't actually carried out (divide-by-zero; invalid)) | ||
40 | + */ | ||
41 | + float_flag_input_denormal_used = 0x4000, | ||
42 | }; | ||
43 | |||
44 | /* | ||
45 | diff --git a/fpu/softfloat.c b/fpu/softfloat.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/fpu/softfloat.c | ||
48 | +++ b/fpu/softfloat.c | ||
49 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_ahp(FloatParts64 *a, float_status *s) | ||
50 | float16_params_ahp.frac_size + 1); | ||
51 | break; | ||
52 | |||
53 | - case float_class_normal: | ||
54 | case float_class_denormal: | ||
55 | + float_raise(float_flag_input_denormal_used, s); | ||
56 | + break; | ||
57 | + case float_class_normal: | ||
58 | case float_class_zero: | ||
59 | break; | ||
60 | |||
61 | @@ -XXX,XX +XXX,XX @@ static void parts64_float_to_float(FloatParts64 *a, float_status *s) | ||
62 | if (is_nan(a->cls)) { | ||
63 | parts_return_nan(a, s); | ||
64 | } | ||
65 | + if (a->cls == float_class_denormal) { | ||
66 | + float_raise(float_flag_input_denormal_used, s); | ||
67 | + } | ||
68 | } | ||
69 | |||
70 | static void parts128_float_to_float(FloatParts128 *a, float_status *s) | ||
71 | @@ -XXX,XX +XXX,XX @@ static void parts128_float_to_float(FloatParts128 *a, float_status *s) | ||
72 | if (is_nan(a->cls)) { | ||
73 | parts_return_nan(a, s); | ||
74 | } | ||
75 | + if (a->cls == float_class_denormal) { | ||
76 | + float_raise(float_flag_input_denormal_used, s); | ||
77 | + } | ||
78 | } | ||
79 | |||
80 | #define parts_float_to_float(P, S) \ | ||
81 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b, | ||
82 | a->sign = b->sign; | ||
83 | a->exp = b->exp; | ||
84 | |||
85 | - if (is_anynorm(a->cls)) { | ||
86 | + switch (a->cls) { | ||
87 | + case float_class_denormal: | ||
88 | + float_raise(float_flag_input_denormal_used, s); | ||
89 | + /* fall through */ | ||
90 | + case float_class_normal: | ||
91 | frac_truncjam(a, b); | ||
92 | - } else if (is_nan(a->cls)) { | ||
93 | + break; | ||
94 | + case float_class_snan: | ||
95 | + case float_class_qnan: | ||
96 | /* Discard the low bits of the NaN. */ | ||
97 | a->frac = b->frac_hi; | ||
98 | parts_return_nan(a, s); | ||
99 | + break; | ||
100 | + default: | ||
101 | + break; | ||
102 | } | ||
103 | } | ||
104 | |||
105 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b, | ||
106 | if (is_nan(a->cls)) { | ||
107 | parts_return_nan(a, s); | ||
108 | } | ||
109 | + if (a->cls == float_class_denormal) { | ||
110 | + float_raise(float_flag_input_denormal_used, s); | ||
111 | + } | ||
112 | } | ||
113 | |||
114 | float32 float16_to_float32(float16 a, bool ieee, float_status *s) | ||
115 | @@ -XXX,XX +XXX,XX @@ float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet) | ||
116 | goto soft; | ||
117 | } | ||
118 | |||
119 | - float32_input_flush2(&ua.s, &ub.s, s); | ||
120 | + if (unlikely(float32_is_denormal(ua.s) || float32_is_denormal(ub.s))) { | ||
121 | + /* We may need to set the input_denormal_used flag */ | ||
122 | + goto soft; | ||
123 | + } | ||
124 | + | ||
125 | if (isgreaterequal(ua.h, ub.h)) { | ||
126 | if (isgreater(ua.h, ub.h)) { | ||
127 | return float_relation_greater; | ||
128 | @@ -XXX,XX +XXX,XX @@ float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet) | ||
129 | goto soft; | ||
130 | } | ||
131 | |||
132 | - float64_input_flush2(&ua.s, &ub.s, s); | ||
133 | + if (unlikely(float64_is_denormal(ua.s) || float64_is_denormal(ub.s))) { | ||
134 | + /* We may need to set the input_denormal_used flag */ | ||
135 | + goto soft; | ||
136 | + } | ||
137 | + | ||
138 | if (isgreaterequal(ua.h, ub.h)) { | ||
139 | if (isgreater(ua.h, ub.h)) { | ||
140 | return float_relation_greater; | ||
141 | diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc | ||
142 | index XXXXXXX..XXXXXXX 100644 | ||
143 | --- a/fpu/softfloat-parts.c.inc | ||
144 | +++ b/fpu/softfloat-parts.c.inc | ||
145 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | ||
146 | bool b_sign = b->sign ^ subtract; | ||
147 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
148 | |||
149 | + /* | ||
150 | + * For addition and subtraction, we will consume an | ||
151 | + * input denormal unless the other input is a NaN. | ||
152 | + */ | ||
153 | + if ((ab_mask & (float_cmask_denormal | float_cmask_anynan)) == | ||
154 | + float_cmask_denormal) { | ||
155 | + float_raise(float_flag_input_denormal_used, s); | ||
156 | + } | ||
157 | + | ||
158 | if (a->sign != b_sign) { | ||
159 | /* Subtraction */ | ||
160 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
161 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, | ||
162 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
163 | FloatPartsW tmp; | ||
164 | |||
165 | + if (ab_mask & float_cmask_denormal) { | ||
166 | + float_raise(float_flag_input_denormal_used, s); | ||
167 | + } | ||
168 | + | ||
169 | frac_mulw(&tmp, a, b); | ||
170 | frac_truncjam(a, &tmp); | ||
171 | |||
172 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, | ||
173 | } | ||
174 | |||
175 | /* Multiply by 0 or Inf */ | ||
176 | + if (ab_mask & float_cmask_denormal) { | ||
177 | + float_raise(float_flag_input_denormal_used, s); | ||
178 | + } | ||
179 | + | ||
180 | if (ab_mask & float_cmask_inf) { | ||
181 | a->cls = float_class_inf; | ||
182 | a->sign = sign; | ||
183 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, | ||
184 | if (flags & float_muladd_negate_result) { | ||
185 | a->sign ^= 1; | ||
186 | } | ||
187 | + | ||
188 | + /* | ||
189 | + * All result types except for "return the default NaN | ||
190 | + * because this is an Invalid Operation" go through here; | ||
191 | + * this matches the set of cases where we consumed a | ||
192 | + * denormal input. | ||
193 | + */ | ||
194 | + if (abc_mask & float_cmask_denormal) { | ||
195 | + float_raise(float_flag_input_denormal_used, s); | ||
196 | + } | ||
197 | return a; | ||
198 | |||
199 | return_sub_zero: | ||
200 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, | ||
201 | bool sign = a->sign ^ b->sign; | ||
202 | |||
203 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
204 | + if (ab_mask & float_cmask_denormal) { | ||
205 | + float_raise(float_flag_input_denormal_used, s); | ||
206 | + } | ||
207 | a->sign = sign; | ||
208 | a->exp -= b->exp + frac_div(a, b); | ||
209 | return a; | ||
210 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, | ||
211 | return parts_pick_nan(a, b, s); | ||
212 | } | ||
213 | |||
214 | + if ((ab_mask & float_cmask_denormal) && b->cls != float_class_zero) { | ||
215 | + float_raise(float_flag_input_denormal_used, s); | ||
216 | + } | ||
217 | + | ||
218 | a->sign = sign; | ||
219 | |||
220 | /* Inf / X */ | ||
221 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, | ||
222 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
223 | |||
224 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
225 | + if (ab_mask & float_cmask_denormal) { | ||
226 | + float_raise(float_flag_input_denormal_used, s); | ||
227 | + } | ||
228 | frac_modrem(a, b, mod_quot); | ||
229 | return a; | ||
230 | } | ||
231 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, | ||
232 | return a; | ||
233 | } | ||
234 | |||
235 | + if (ab_mask & float_cmask_denormal) { | ||
236 | + float_raise(float_flag_input_denormal_used, s); | ||
237 | + } | ||
238 | + | ||
239 | /* N % Inf; 0 % N */ | ||
240 | g_assert(b->cls == float_class_inf || a->cls == float_class_zero); | ||
241 | return a; | ||
242 | @@ -XXX,XX +XXX,XX @@ static void partsN(sqrt)(FloatPartsN *a, float_status *status, | ||
243 | if (unlikely(a->cls != float_class_normal)) { | ||
244 | switch (a->cls) { | ||
245 | case float_class_denormal: | ||
246 | + if (!a->sign) { | ||
247 | + /* -ve denormal will be InvalidOperation */ | ||
248 | + float_raise(float_flag_input_denormal_used, status); | ||
249 | + } | ||
250 | break; | ||
251 | case float_class_snan: | ||
252 | case float_class_qnan: | ||
253 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
254 | if ((flags & (minmax_isnum | minmax_isnumber)) | ||
255 | && !(ab_mask & float_cmask_snan) | ||
256 | && (ab_mask & ~float_cmask_qnan)) { | ||
257 | + if (ab_mask & float_cmask_denormal) { | ||
258 | + float_raise(float_flag_input_denormal_used, s); | ||
259 | + } | ||
260 | return is_nan(a->cls) ? b : a; | ||
261 | } | ||
262 | |||
263 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
264 | return parts_pick_nan(a, b, s); | ||
265 | } | ||
266 | |||
267 | + if (ab_mask & float_cmask_denormal) { | ||
268 | + float_raise(float_flag_input_denormal_used, s); | ||
269 | + } | ||
270 | + | ||
271 | a_exp = a->exp; | ||
272 | b_exp = b->exp; | ||
273 | |||
274 | @@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, | ||
275 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
276 | FloatRelation cmp; | ||
277 | |||
278 | + if (ab_mask & float_cmask_denormal) { | ||
279 | + float_raise(float_flag_input_denormal_used, s); | ||
280 | + } | ||
281 | + | ||
282 | if (a->sign != b->sign) { | ||
283 | goto a_sign; | ||
284 | } | ||
285 | @@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, | ||
286 | return float_relation_unordered; | ||
287 | } | ||
288 | |||
289 | + if (ab_mask & float_cmask_denormal) { | ||
290 | + float_raise(float_flag_input_denormal_used, s); | ||
291 | + } | ||
292 | + | ||
293 | if (ab_mask & float_cmask_zero) { | ||
294 | if (ab_mask == float_cmask_zero) { | ||
295 | return float_relation_equal; | ||
296 | @@ -XXX,XX +XXX,XX @@ static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) | ||
297 | case float_class_zero: | ||
298 | case float_class_inf: | ||
299 | break; | ||
300 | - case float_class_normal: | ||
301 | case float_class_denormal: | ||
302 | + float_raise(float_flag_input_denormal_used, s); | ||
303 | + /* fall through */ | ||
304 | + case float_class_normal: | ||
305 | a->exp += MIN(MAX(n, -0x10000), 0x10000); | ||
306 | break; | ||
307 | default: | ||
308 | @@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) | ||
309 | if (unlikely(a->cls != float_class_normal)) { | ||
310 | switch (a->cls) { | ||
311 | case float_class_denormal: | ||
312 | + if (!a->sign) { | ||
313 | + /* -ve denormal will be InvalidOperation */ | ||
314 | + float_raise(float_flag_input_denormal_used, s); | ||
315 | + } | ||
316 | break; | ||
317 | case float_class_snan: | ||
318 | case float_class_qnan: | ||
319 | -- | ||
320 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Currently we handle flushing of output denormals in uncanon_normal | ||
2 | always before we deal with rounding. This works for architectures | ||
3 | that detect tininess before rounding, but is usually not the right | ||
4 | place when the architecture detects tininess after rounding. For | ||
5 | example, for x86 the SDM states that the MXCSR FTZ control bit causes | ||
6 | outputs to be flushed to zero "when it detects a floating-point | ||
7 | underflow condition". This means that we mustn't flush to zero if | ||
8 | the input is such that after rounding it is no longer tiny. | ||
9 | 1 | ||
10 | At least one of our guest architectures does underflow detection | ||
11 | after rounding but flushing of denormals before rounding (MIPS MSA); | ||
12 | this means we need to have a config knob for this that is separate | ||
13 | from our existing tininess_before_rounding setting. | ||
14 | |||
15 | Add an ftz_detection flag. For consistency with | ||
16 | tininess_before_rounding, we make it default to "detect ftz after | ||
17 | rounding"; this means that we need to explicitly set the flag to | ||
18 | "detect ftz before rounding" on every existing architecture that sets | ||
19 | flush_to_zero, so that this commit has no behaviour change. | ||
20 | (This means more code change here but for the long term a less | ||
21 | confusing API.) | ||
22 | |||
23 | For several architectures the current behaviour is either | ||
24 | definitely or possibly wrong; annotate those with TODO comments. | ||
25 | These architectures are definitely wrong (and should detect | ||
26 | ftz after rounding): | ||
27 | * x86 | ||
28 | * Alpha | ||
29 | |||
30 | For these architectures the spec is unclear: | ||
31 | * MIPS (for non-MSA) | ||
32 | * RX | ||
33 | * SH4 | ||
34 | |||
35 | PA-RISC makes ftz detection IMPDEF, but we aren't setting the | ||
36 | "tininess before rounding" setting that we ought to. | ||
37 | |||
38 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
39 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
40 | --- | ||
41 | include/fpu/softfloat-helpers.h | 11 +++++++++++ | ||
42 | include/fpu/softfloat-types.h | 18 ++++++++++++++++++ | ||
43 | target/mips/fpu_helper.h | 6 ++++++ | ||
44 | target/alpha/cpu.c | 7 +++++++ | ||
45 | target/arm/cpu.c | 1 + | ||
46 | target/hppa/fpu_helper.c | 11 +++++++++++ | ||
47 | target/i386/tcg/fpu_helper.c | 8 ++++++++ | ||
48 | target/mips/msa.c | 9 +++++++++ | ||
49 | target/ppc/cpu_init.c | 3 +++ | ||
50 | target/rx/cpu.c | 8 ++++++++ | ||
51 | target/sh4/cpu.c | 8 ++++++++ | ||
52 | target/tricore/helper.c | 1 + | ||
53 | tests/fp/fp-bench.c | 1 + | ||
54 | fpu/softfloat-parts.c.inc | 21 +++++++++++++++------ | ||
55 | 14 files changed, 107 insertions(+), 6 deletions(-) | ||
56 | |||
57 | diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/include/fpu/softfloat-helpers.h | ||
60 | +++ b/include/fpu/softfloat-helpers.h | ||
61 | @@ -XXX,XX +XXX,XX @@ static inline void set_flush_inputs_to_zero(bool val, float_status *status) | ||
62 | status->flush_inputs_to_zero = val; | ||
63 | } | ||
64 | |||
65 | +static inline void set_float_ftz_detection(FloatFTZDetection d, | ||
66 | + float_status *status) | ||
67 | +{ | ||
68 | + status->ftz_detection = d; | ||
69 | +} | ||
70 | + | ||
71 | static inline void set_default_nan_mode(bool val, float_status *status) | ||
72 | { | ||
73 | status->default_nan_mode = val; | ||
74 | @@ -XXX,XX +XXX,XX @@ static inline bool get_default_nan_mode(const float_status *status) | ||
75 | return status->default_nan_mode; | ||
76 | } | ||
77 | |||
78 | +static inline FloatFTZDetection get_float_ftz_detection(const float_status *status) | ||
79 | +{ | ||
80 | + return status->ftz_detection; | ||
81 | +} | ||
82 | + | ||
83 | #endif /* SOFTFLOAT_HELPERS_H */ | ||
84 | diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h | ||
85 | index XXXXXXX..XXXXXXX 100644 | ||
86 | --- a/include/fpu/softfloat-types.h | ||
87 | +++ b/include/fpu/softfloat-types.h | ||
88 | @@ -XXX,XX +XXX,XX @@ typedef enum __attribute__((__packed__)) { | ||
89 | float_infzeronan_suppress_invalid = (1 << 7), | ||
90 | } FloatInfZeroNaNRule; | ||
91 | |||
92 | +/* | ||
93 | + * When flush_to_zero is set, should we detect denormal results to | ||
94 | + * be flushed before or after rounding? For most architectures this | ||
95 | + * should be set to match the tininess_before_rounding setting, | ||
96 | + * but a few architectures, e.g. MIPS MSA, detect FTZ before | ||
97 | + * rounding but tininess after rounding. | ||
98 | + * | ||
99 | + * This enum is arranged so that the default if the target doesn't | ||
100 | + * configure it matches the default for tininess_before_rounding | ||
101 | + * (i.e. "after rounding"). | ||
102 | + */ | ||
103 | +typedef enum __attribute__((__packed__)) { | ||
104 | + float_ftz_after_rounding = 0, | ||
105 | + float_ftz_before_rounding = 1, | ||
106 | +} FloatFTZDetection; | ||
107 | + | ||
108 | /* | ||
109 | * Floating Point Status. Individual architectures may maintain | ||
110 | * several versions of float_status for different functions. The | ||
111 | @@ -XXX,XX +XXX,XX @@ typedef struct float_status { | ||
112 | bool tininess_before_rounding; | ||
113 | /* should denormalised results go to zero and set output_denormal_flushed? */ | ||
114 | bool flush_to_zero; | ||
115 | + /* do we detect and flush denormal results before or after rounding? */ | ||
116 | + FloatFTZDetection ftz_detection; | ||
117 | /* should denormalised inputs go to zero and set input_denormal_flushed? */ | ||
118 | bool flush_inputs_to_zero; | ||
119 | bool default_nan_mode; | ||
120 | diff --git a/target/mips/fpu_helper.h b/target/mips/fpu_helper.h | ||
121 | index XXXXXXX..XXXXXXX 100644 | ||
122 | --- a/target/mips/fpu_helper.h | ||
123 | +++ b/target/mips/fpu_helper.h | ||
124 | @@ -XXX,XX +XXX,XX @@ static inline void fp_reset(CPUMIPSState *env) | ||
125 | */ | ||
126 | set_float_2nan_prop_rule(float_2nan_prop_s_ab, | ||
127 | &env->active_fpu.fp_status); | ||
128 | + /* | ||
129 | + * TODO: the spec does't say clearly whether FTZ happens before | ||
130 | + * or after rounding for normal FPU operations. | ||
131 | + */ | ||
132 | + set_float_ftz_detection(float_ftz_before_rounding, | ||
133 | + &env->active_fpu.fp_status); | ||
134 | } | ||
135 | |||
136 | /* MSA */ | ||
137 | diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c | ||
138 | index XXXXXXX..XXXXXXX 100644 | ||
139 | --- a/target/alpha/cpu.c | ||
140 | +++ b/target/alpha/cpu.c | ||
141 | @@ -XXX,XX +XXX,XX @@ static void alpha_cpu_initfn(Object *obj) | ||
142 | set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); | ||
143 | /* Default NaN: sign bit clear, msb frac bit set */ | ||
144 | set_float_default_nan_pattern(0b01000000, &env->fp_status); | ||
145 | + /* | ||
146 | + * TODO: this is incorrect. The Alpha Architecture Handbook version 4 | ||
147 | + * section 4.7.7.11 says that we flush to zero for underflow cases, so | ||
148 | + * this should be float_ftz_after_rounding to match the | ||
149 | + * tininess_after_rounding (which is specified in section 4.7.5). | ||
150 | + */ | ||
151 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
152 | #if defined(CONFIG_USER_ONLY) | ||
153 | env->flags = ENV_FLAG_PS_USER | ENV_FLAG_FEN; | ||
154 | cpu_alpha_store_fpcr(env, (uint64_t)(FPCR_INVD | FPCR_DZED | FPCR_OVFD | ||
155 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
156 | index XXXXXXX..XXXXXXX 100644 | ||
157 | --- a/target/arm/cpu.c | ||
158 | +++ b/target/arm/cpu.c | ||
159 | @@ -XXX,XX +XXX,XX @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, | ||
160 | static void arm_set_default_fp_behaviours(float_status *s) | ||
161 | { | ||
162 | set_float_detect_tininess(float_tininess_before_rounding, s); | ||
163 | + set_float_ftz_detection(float_ftz_before_rounding, s); | ||
164 | set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); | ||
165 | set_float_3nan_prop_rule(float_3nan_prop_s_cab, s); | ||
166 | set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s); | ||
167 | diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c | ||
168 | index XXXXXXX..XXXXXXX 100644 | ||
169 | --- a/target/hppa/fpu_helper.c | ||
170 | +++ b/target/hppa/fpu_helper.c | ||
171 | @@ -XXX,XX +XXX,XX @@ void HELPER(loaded_fr0)(CPUHPPAState *env) | ||
172 | set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status); | ||
173 | /* Default NaN: sign bit clear, msb-1 frac bit set */ | ||
174 | set_float_default_nan_pattern(0b00100000, &env->fp_status); | ||
175 | + /* | ||
176 | + * "PA-RISC 2.0 Architecture" says it is IMPDEF whether the flushing | ||
177 | + * enabled by FPSR.D happens before or after rounding. We pick "before" | ||
178 | + * for consistency with tininess detection. | ||
179 | + */ | ||
180 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
181 | + /* | ||
182 | + * TODO: "PA-RISC 2.0 Architecture" chapter 10 says that we should | ||
183 | + * detect tininess before rounding, but we don't set that here so we | ||
184 | + * get the default tininess after rounding. | ||
185 | + */ | ||
186 | } | ||
187 | |||
188 | void cpu_hppa_loaded_fr0(CPUHPPAState *env) | ||
189 | diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c | ||
190 | index XXXXXXX..XXXXXXX 100644 | ||
191 | --- a/target/i386/tcg/fpu_helper.c | ||
192 | +++ b/target/i386/tcg/fpu_helper.c | ||
193 | @@ -XXX,XX +XXX,XX @@ void cpu_init_fp_statuses(CPUX86State *env) | ||
194 | set_float_default_nan_pattern(0b11000000, &env->fp_status); | ||
195 | set_float_default_nan_pattern(0b11000000, &env->mmx_status); | ||
196 | set_float_default_nan_pattern(0b11000000, &env->sse_status); | ||
197 | + /* | ||
198 | + * TODO: x86 does flush-to-zero detection after rounding (the SDM | ||
199 | + * section 10.2.3.3 on the FTZ bit of MXCSR says that we flush | ||
200 | + * when we detect underflow, which x86 does after rounding). | ||
201 | + */ | ||
202 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
203 | + set_float_ftz_detection(float_ftz_before_rounding, &env->mmx_status); | ||
204 | + set_float_ftz_detection(float_ftz_before_rounding, &env->sse_status); | ||
205 | } | ||
206 | |||
207 | static inline uint8_t save_exception_flags(CPUX86State *env) | ||
208 | diff --git a/target/mips/msa.c b/target/mips/msa.c | ||
209 | index XXXXXXX..XXXXXXX 100644 | ||
210 | --- a/target/mips/msa.c | ||
211 | +++ b/target/mips/msa.c | ||
212 | @@ -XXX,XX +XXX,XX @@ void msa_reset(CPUMIPSState *env) | ||
213 | /* tininess detected after rounding.*/ | ||
214 | set_float_detect_tininess(float_tininess_after_rounding, | ||
215 | &env->active_tc.msa_fp_status); | ||
216 | + /* | ||
217 | + * MSACSR.FS detects tiny results to flush to zero before rounding | ||
218 | + * (per "MIPS Architecture for Programmers Volume IV-j: The MIPS64 SIMD | ||
219 | + * Architecture Module, Revision 1.1" section 3.5.4), even though it | ||
220 | + * detects tininess after rounding for underflow purposes (section 3.4.2 | ||
221 | + * table 3.3). | ||
222 | + */ | ||
223 | + set_float_ftz_detection(float_ftz_before_rounding, | ||
224 | + &env->active_tc.msa_fp_status); | ||
225 | |||
226 | /* | ||
227 | * According to MIPS specifications, if one of the two operands is | ||
228 | diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c | ||
229 | index XXXXXXX..XXXXXXX 100644 | ||
230 | --- a/target/ppc/cpu_init.c | ||
231 | +++ b/target/ppc/cpu_init.c | ||
232 | @@ -XXX,XX +XXX,XX @@ static void ppc_cpu_reset_hold(Object *obj, ResetType type) | ||
233 | /* tininess for underflow is detected before rounding */ | ||
234 | set_float_detect_tininess(float_tininess_before_rounding, | ||
235 | &env->fp_status); | ||
236 | + /* Similarly for flush-to-zero */ | ||
237 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
238 | + | ||
239 | /* | ||
240 | * PowerPC propagation rules: | ||
241 | * 1. A if it sNaN or qNaN | ||
242 | diff --git a/target/rx/cpu.c b/target/rx/cpu.c | ||
243 | index XXXXXXX..XXXXXXX 100644 | ||
244 | --- a/target/rx/cpu.c | ||
245 | +++ b/target/rx/cpu.c | ||
246 | @@ -XXX,XX +XXX,XX @@ static void rx_cpu_reset_hold(Object *obj, ResetType type) | ||
247 | set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); | ||
248 | /* Default NaN value: sign bit clear, set frac msb */ | ||
249 | set_float_default_nan_pattern(0b01000000, &env->fp_status); | ||
250 | + /* | ||
251 | + * TODO: "RX Family RXv1 Instruction Set Architecture" is not 100% clear | ||
252 | + * on whether flush-to-zero should happen before or after rounding, but | ||
253 | + * section 1.3.2 says that it happens when underflow is detected, and | ||
254 | + * implies that underflow is detected after rounding. So this may not | ||
255 | + * be the correct setting. | ||
256 | + */ | ||
257 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
258 | } | ||
259 | |||
260 | static ObjectClass *rx_cpu_class_by_name(const char *cpu_model) | ||
261 | diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c | ||
262 | index XXXXXXX..XXXXXXX 100644 | ||
263 | --- a/target/sh4/cpu.c | ||
264 | +++ b/target/sh4/cpu.c | ||
265 | @@ -XXX,XX +XXX,XX @@ static void superh_cpu_reset_hold(Object *obj, ResetType type) | ||
266 | set_default_nan_mode(1, &env->fp_status); | ||
267 | /* sign bit clear, set all frac bits other than msb */ | ||
268 | set_float_default_nan_pattern(0b00111111, &env->fp_status); | ||
269 | + /* | ||
270 | + * TODO: "SH-4 CPU Core Architecture ADCS 7182230F" doesn't say whether | ||
271 | + * it detects tininess before or after rounding. Section 6.4 is clear | ||
272 | + * that flush-to-zero happens when the result underflows, though, so | ||
273 | + * either this should be "detect ftz after rounding" or else we should | ||
274 | + * be setting "detect tininess before rounding". | ||
275 | + */ | ||
276 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
277 | } | ||
278 | |||
279 | static void superh_cpu_disas_set_info(CPUState *cpu, disassemble_info *info) | ||
280 | diff --git a/target/tricore/helper.c b/target/tricore/helper.c | ||
281 | index XXXXXXX..XXXXXXX 100644 | ||
282 | --- a/target/tricore/helper.c | ||
283 | +++ b/target/tricore/helper.c | ||
284 | @@ -XXX,XX +XXX,XX @@ void fpu_set_state(CPUTriCoreState *env) | ||
285 | set_flush_inputs_to_zero(1, &env->fp_status); | ||
286 | set_flush_to_zero(1, &env->fp_status); | ||
287 | set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status); | ||
288 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
289 | set_default_nan_mode(1, &env->fp_status); | ||
290 | /* Default NaN pattern: sign bit clear, frac msb set */ | ||
291 | set_float_default_nan_pattern(0b01000000, &env->fp_status); | ||
292 | diff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c | ||
293 | index XXXXXXX..XXXXXXX 100644 | ||
294 | --- a/tests/fp/fp-bench.c | ||
295 | +++ b/tests/fp/fp-bench.c | ||
296 | @@ -XXX,XX +XXX,XX @@ static void run_bench(void) | ||
297 | set_float_3nan_prop_rule(float_3nan_prop_s_cab, &soft_status); | ||
298 | set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, &soft_status); | ||
299 | set_float_default_nan_pattern(0b01000000, &soft_status); | ||
300 | + set_float_ftz_detection(float_ftz_before_rounding, &soft_status); | ||
301 | |||
302 | f = bench_funcs[operation][precision]; | ||
303 | g_assert(f); | ||
304 | diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc | ||
305 | index XXXXXXX..XXXXXXX 100644 | ||
306 | --- a/fpu/softfloat-parts.c.inc | ||
307 | +++ b/fpu/softfloat-parts.c.inc | ||
308 | @@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, | ||
309 | p->frac_lo &= ~round_mask; | ||
310 | } | ||
311 | frac_shr(p, frac_shift); | ||
312 | - } else if (s->flush_to_zero) { | ||
313 | + } else if (s->flush_to_zero && | ||
314 | + s->ftz_detection == float_ftz_before_rounding) { | ||
315 | flags |= float_flag_output_denormal_flushed; | ||
316 | p->cls = float_class_zero; | ||
317 | exp = 0; | ||
318 | @@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, | ||
319 | exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal; | ||
320 | frac_shr(p, frac_shift); | ||
321 | |||
322 | - if (is_tiny && (flags & float_flag_inexact)) { | ||
323 | - flags |= float_flag_underflow; | ||
324 | - } | ||
325 | - if (exp == 0 && frac_eqz(p)) { | ||
326 | - p->cls = float_class_zero; | ||
327 | + if (is_tiny) { | ||
328 | + if (s->flush_to_zero) { | ||
329 | + assert(s->ftz_detection == float_ftz_after_rounding); | ||
330 | + flags |= float_flag_output_denormal_flushed; | ||
331 | + p->cls = float_class_zero; | ||
332 | + exp = 0; | ||
333 | + frac_clear(p); | ||
334 | + } else if (flags & float_flag_inexact) { | ||
335 | + flags |= float_flag_underflow; | ||
336 | + } | ||
337 | + if (exp == 0 && frac_eqz(p)) { | ||
338 | + p->cls = float_class_zero; | ||
339 | + } | ||
340 | } | ||
341 | } | ||
342 | p->exp = exp; | ||
343 | -- | ||
344 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | The Armv8.7 FEAT_AFP feature defines three new control bits in | ||
2 | the FPCR: | ||
3 | * FPCR.AH: "alternate floating point mode"; this changes floating | ||
4 | point behaviour in a variety of ways, including: | ||
5 | - the sign of a default NaN is 1, not 0 | ||
6 | - if FPCR.FZ is also 1, denormals detected after rounding | ||
7 | with an unbounded exponent has been applied are flushed to zero | ||
8 | - FPCR.FZ does not cause denormalized inputs to be flushed to zero | ||
9 | - miscellaneous other corner-case behaviour changes | ||
10 | * FPCR.FIZ: flush denormalized numbers to zero on input for | ||
11 | most instructions | ||
12 | * FPCR.NEP: makes scalar SIMD operations merge the result with | ||
13 | higher vector elements in one of the source registers, instead | ||
14 | of zeroing the higher elements of the destination | ||
15 | 1 | ||
16 | This commit defines the new bits in the FPCR, and allows them to be | ||
17 | read or written when FEAT_AFP is implemented. Actual behaviour | ||
18 | changes will be implemented in subsequent commits. | ||
19 | |||
20 | Note that these are the first FPCR bits which don't appear in the | ||
21 | AArch32 FPSCR view of the register, and which share bit positions | ||
22 | with FPSR bits. | ||
23 | |||
24 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
25 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
26 | --- | ||
27 | target/arm/cpu-features.h | 5 +++++ | ||
28 | target/arm/cpu.h | 3 +++ | ||
29 | target/arm/vfp_helper.c | 11 ++++++++--- | ||
30 | 3 files changed, 16 insertions(+), 3 deletions(-) | ||
31 | |||
32 | diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/target/arm/cpu-features.h | ||
35 | +++ b/target/arm/cpu-features.h | ||
36 | @@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_hcx(const ARMISARegisters *id) | ||
37 | return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HCX) != 0; | ||
38 | } | ||
39 | |||
40 | +static inline bool isar_feature_aa64_afp(const ARMISARegisters *id) | ||
41 | +{ | ||
42 | + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, AFP) != 0; | ||
43 | +} | ||
44 | + | ||
45 | static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id) | ||
46 | { | ||
47 | return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0; | ||
48 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/arm/cpu.h | ||
51 | +++ b/target/arm/cpu.h | ||
52 | @@ -XXX,XX +XXX,XX @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val); | ||
53 | */ | ||
54 | |||
55 | /* FPCR bits */ | ||
56 | +#define FPCR_FIZ (1 << 0) /* Flush Inputs to Zero (FEAT_AFP) */ | ||
57 | +#define FPCR_AH (1 << 1) /* Alternate Handling (FEAT_AFP) */ | ||
58 | +#define FPCR_NEP (1 << 2) /* SIMD scalar ops preserve elts (FEAT_AFP) */ | ||
59 | #define FPCR_IOE (1 << 8) /* Invalid Operation exception trap enable */ | ||
60 | #define FPCR_DZE (1 << 9) /* Divide by Zero exception trap enable */ | ||
61 | #define FPCR_OFE (1 << 10) /* Overflow exception trap enable */ | ||
62 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/arm/vfp_helper.c | ||
65 | +++ b/target/arm/vfp_helper.c | ||
66 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask) | ||
67 | if (!cpu_isar_feature(any_fp16, cpu)) { | ||
68 | val &= ~FPCR_FZ16; | ||
69 | } | ||
70 | + if (!cpu_isar_feature(aa64_afp, cpu)) { | ||
71 | + val &= ~(FPCR_FIZ | FPCR_AH | FPCR_NEP); | ||
72 | + } | ||
73 | |||
74 | if (!cpu_isar_feature(aa64_ebf16, cpu)) { | ||
75 | val &= ~FPCR_EBF; | ||
76 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask) | ||
77 | * We don't implement trapped exception handling, so the | ||
78 | * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!) | ||
79 | * | ||
80 | - * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF | ||
81 | - * and FZ16. Len, Stride and LTPSIZE we just handled. Store those bits | ||
82 | + * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF, FZ16, | ||
83 | + * FIZ, AH, and NEP. | ||
84 | + * Len, Stride and LTPSIZE we just handled. Store those bits | ||
85 | * there, and zero any of the other FPCR bits and the RES0 and RAZ/WI | ||
86 | * bits. | ||
87 | */ | ||
88 | - val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 | FPCR_EBF; | ||
89 | + val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 | | ||
90 | + FPCR_EBF | FPCR_FIZ | FPCR_AH | FPCR_NEP; | ||
91 | env->vfp.fpcr &= ~mask; | ||
92 | env->vfp.fpcr |= val; | ||
93 | } | ||
94 | -- | ||
95 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Part of FEAT_AFP is the new control bit FPCR.FIZ. This bit affects | ||
2 | flushing of single and double precision denormal inputs to zero for | ||
3 | AArch64 floating point instructions. (For half-precision, the | ||
4 | existing FPCR.FZ16 control remains the only one.) | ||
5 | 1 | ||
6 | FPCR.FIZ differs from FPCR.FZ in that if we flush an input denormal | ||
7 | only because of FPCR.FIZ then we should *not* set the cumulative | ||
8 | exception bit FPSR.IDC. | ||
9 | |||
10 | FEAT_AFP also defines that in AArch64 the existing FPCR.FZ only | ||
11 | applies when FPCR.AH is 0. | ||
12 | |||
13 | We can implement this by setting the "flush inputs to zero" state | ||
14 | appropriately when FPCR is written, and by not reflecting the | ||
15 | float_flag_input_denormal status flag into FPSR reads when it is the | ||
16 | result only of FPSR.FIZ. | ||
17 | |||
18 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
19 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
20 | --- | ||
21 | target/arm/vfp_helper.c | 60 ++++++++++++++++++++++++++++++++++------- | ||
22 | 1 file changed, 50 insertions(+), 10 deletions(-) | ||
23 | |||
24 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/target/arm/vfp_helper.c | ||
27 | +++ b/target/arm/vfp_helper.c | ||
28 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits) | ||
29 | |||
30 | static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
31 | { | ||
32 | - uint32_t i = 0; | ||
33 | + uint32_t a32_flags = 0, a64_flags = 0; | ||
34 | |||
35 | - i |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
36 | - i |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
37 | - i |= get_float_exception_flags(&env->vfp.standard_fp_status); | ||
38 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
39 | + a32_flags |= get_float_exception_flags(&env->vfp.standard_fp_status); | ||
40 | /* FZ16 does not generate an input denormal exception. */ | ||
41 | - i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
42 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
43 | & ~float_flag_input_denormal_flushed); | ||
44 | - i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
45 | + a32_flags |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) | ||
46 | & ~float_flag_input_denormal_flushed); | ||
47 | - i |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) | ||
48 | + | ||
49 | + a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
50 | + a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
51 | & ~float_flag_input_denormal_flushed); | ||
52 | - return vfp_exceptbits_from_host(i); | ||
53 | + /* | ||
54 | + * Flushing an input denormal *only* because FPCR.FIZ == 1 does | ||
55 | + * not set FPSR.IDC; if FPCR.FZ is also set then this takes | ||
56 | + * precedence and IDC is set (see the FPUnpackBase pseudocode). | ||
57 | + * So squash it unless (FPCR.AH == 0 && FPCR.FZ == 1). | ||
58 | + * We only do this for the a64 flags because FIZ has no effect | ||
59 | + * on AArch32 even if it is set. | ||
60 | + */ | ||
61 | + if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) { | ||
62 | + a64_flags &= ~float_flag_input_denormal_flushed; | ||
63 | + } | ||
64 | + return vfp_exceptbits_from_host(a32_flags | a64_flags); | ||
65 | } | ||
66 | |||
67 | static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
68 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
69 | set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); | ||
70 | } | ||
71 | |||
72 | +static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) | ||
73 | +{ | ||
74 | + /* | ||
75 | + * Synchronize any pending exception-flag information in the | ||
76 | + * float_status values into env->vfp.fpsr, and then clear out | ||
77 | + * the float_status data. | ||
78 | + */ | ||
79 | + env->vfp.fpsr |= vfp_get_fpsr_from_host(env); | ||
80 | + vfp_clear_float_status_exc_flags(env); | ||
81 | +} | ||
82 | + | ||
83 | static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
84 | { | ||
85 | uint64_t changed = env->vfp.fpcr; | ||
86 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
87 | if (changed & FPCR_FZ) { | ||
88 | bool ftz_enabled = val & FPCR_FZ; | ||
89 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
90 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
91 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64); | ||
92 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a64); | ||
93 | + /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ | ||
94 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
95 | + } | ||
96 | + if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { | ||
97 | + /* | ||
98 | + * A64: Flush denormalized inputs to zero if FPCR.FIZ = 1, or | ||
99 | + * both FPCR.AH = 0 and FPCR.FZ = 1. | ||
100 | + */ | ||
101 | + bool fitz_enabled = (val & FPCR_FIZ) || | ||
102 | + (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ; | ||
103 | + set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status_a64); | ||
104 | } | ||
105 | if (changed & FPCR_DN) { | ||
106 | bool dnan_enabled = val & FPCR_DN; | ||
107 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
108 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
109 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
110 | } | ||
111 | + /* | ||
112 | + * If any bits changed that we look at in vfp_get_fpsr_from_host(), | ||
113 | + * we must sync the float_status flags into vfp.fpsr now (under the | ||
114 | + * old regime) before we update vfp.fpcr. | ||
115 | + */ | ||
116 | + if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { | ||
117 | + vfp_sync_and_clear_float_status_exc_flags(env); | ||
118 | + } | ||
119 | } | ||
120 | |||
121 | #else | ||
122 | -- | ||
123 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | When FPCR.AH is set, various behaviours of AArch64 floating point | ||
2 | operations which are controlled by softfloat config settings change: | ||
3 | * tininess and ftz detection before/after rounding | ||
4 | * NaN propagation order | ||
5 | * result of 0 * Inf + NaN | ||
6 | * default NaN value | ||
7 | 1 | ||
8 | When the guest changes the value of the AH bit, switch these config | ||
9 | settings on the fp_status_a64 and fp_status_f16_a64 float_status | ||
10 | fields. | ||
11 | |||
12 | This requires us to make the arm_set_default_fp_behaviours() function | ||
13 | global, since we now need to call it from cpu.c and vfp_helper.c; we | ||
14 | move it to vfp_helper.c so it can be next to the new | ||
15 | arm_set_ah_fp_behaviours(). | ||
16 | |||
17 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
18 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
19 | --- | ||
20 | target/arm/internals.h | 4 +++ | ||
21 | target/arm/cpu.c | 23 ---------------- | ||
22 | target/arm/vfp_helper.c | 58 ++++++++++++++++++++++++++++++++++++++++- | ||
23 | 3 files changed, 61 insertions(+), 24 deletions(-) | ||
24 | |||
25 | diff --git a/target/arm/internals.h b/target/arm/internals.h | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/target/arm/internals.h | ||
28 | +++ b/target/arm/internals.h | ||
29 | @@ -XXX,XX +XXX,XX @@ uint64_t gt_virt_cnt_offset(CPUARMState *env); | ||
30 | * all EL1" scope; this covers stage 1 and stage 2. | ||
31 | */ | ||
32 | int alle1_tlbmask(CPUARMState *env); | ||
33 | + | ||
34 | +/* Set the float_status behaviour to match the Arm defaults */ | ||
35 | +void arm_set_default_fp_behaviours(float_status *s); | ||
36 | + | ||
37 | #endif | ||
38 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/target/arm/cpu.c | ||
41 | +++ b/target/arm/cpu.c | ||
42 | @@ -XXX,XX +XXX,XX @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, | ||
43 | QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node); | ||
44 | } | ||
45 | |||
46 | -/* | ||
47 | - * Set the float_status behaviour to match the Arm defaults: | ||
48 | - * * tininess-before-rounding | ||
49 | - * * 2-input NaN propagation prefers SNaN over QNaN, and then | ||
50 | - * operand A over operand B (see FPProcessNaNs() pseudocode) | ||
51 | - * * 3-input NaN propagation prefers SNaN over QNaN, and then | ||
52 | - * operand C over A over B (see FPProcessNaNs3() pseudocode, | ||
53 | - * but note that for QEMU muladd is a * b + c, whereas for | ||
54 | - * the pseudocode function the arguments are in the order c, a, b. | ||
55 | - * * 0 * Inf + NaN returns the default NaN if the input NaN is quiet, | ||
56 | - * and the input NaN if it is signalling | ||
57 | - * * Default NaN has sign bit clear, msb frac bit set | ||
58 | - */ | ||
59 | -static void arm_set_default_fp_behaviours(float_status *s) | ||
60 | -{ | ||
61 | - set_float_detect_tininess(float_tininess_before_rounding, s); | ||
62 | - set_float_ftz_detection(float_ftz_before_rounding, s); | ||
63 | - set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); | ||
64 | - set_float_3nan_prop_rule(float_3nan_prop_s_cab, s); | ||
65 | - set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s); | ||
66 | - set_float_default_nan_pattern(0b01000000, s); | ||
67 | -} | ||
68 | - | ||
69 | static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque) | ||
70 | { | ||
71 | /* Reset a single ARMCPRegInfo register */ | ||
72 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
73 | index XXXXXXX..XXXXXXX 100644 | ||
74 | --- a/target/arm/vfp_helper.c | ||
75 | +++ b/target/arm/vfp_helper.c | ||
76 | @@ -XXX,XX +XXX,XX @@ | ||
77 | #include "exec/helper-proto.h" | ||
78 | #include "internals.h" | ||
79 | #include "cpu-features.h" | ||
80 | +#include "fpu/softfloat.h" | ||
81 | #ifdef CONFIG_TCG | ||
82 | #include "qemu/log.h" | ||
83 | -#include "fpu/softfloat.h" | ||
84 | #endif | ||
85 | |||
86 | /* VFP support. We follow the convention used for VFP instructions: | ||
87 | Single precision routines have a "s" suffix, double precision a | ||
88 | "d" suffix. */ | ||
89 | |||
90 | +/* | ||
91 | + * Set the float_status behaviour to match the Arm defaults: | ||
92 | + * * tininess-before-rounding | ||
93 | + * * 2-input NaN propagation prefers SNaN over QNaN, and then | ||
94 | + * operand A over operand B (see FPProcessNaNs() pseudocode) | ||
95 | + * * 3-input NaN propagation prefers SNaN over QNaN, and then | ||
96 | + * operand C over A over B (see FPProcessNaNs3() pseudocode, | ||
97 | + * but note that for QEMU muladd is a * b + c, whereas for | ||
98 | + * the pseudocode function the arguments are in the order c, a, b. | ||
99 | + * * 0 * Inf + NaN returns the default NaN if the input NaN is quiet, | ||
100 | + * and the input NaN if it is signalling | ||
101 | + * * Default NaN has sign bit clear, msb frac bit set | ||
102 | + */ | ||
103 | +void arm_set_default_fp_behaviours(float_status *s) | ||
104 | +{ | ||
105 | + set_float_detect_tininess(float_tininess_before_rounding, s); | ||
106 | + set_float_ftz_detection(float_ftz_before_rounding, s); | ||
107 | + set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); | ||
108 | + set_float_3nan_prop_rule(float_3nan_prop_s_cab, s); | ||
109 | + set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s); | ||
110 | + set_float_default_nan_pattern(0b01000000, s); | ||
111 | +} | ||
112 | + | ||
113 | +/* | ||
114 | + * Set the float_status behaviour to match the FEAT_AFP | ||
115 | + * FPCR.AH=1 requirements: | ||
116 | + * * tininess-after-rounding | ||
117 | + * * 2-input NaN propagation prefers the first NaN | ||
118 | + * * 3-input NaN propagation prefers a over b over c | ||
119 | + * * 0 * Inf + NaN always returns the input NaN and doesn't | ||
120 | + * set Invalid for a QNaN | ||
121 | + * * default NaN has sign bit set, msb frac bit set | ||
122 | + */ | ||
123 | +static void arm_set_ah_fp_behaviours(float_status *s) | ||
124 | +{ | ||
125 | + set_float_detect_tininess(float_tininess_after_rounding, s); | ||
126 | + set_float_ftz_detection(float_ftz_after_rounding, s); | ||
127 | + set_float_2nan_prop_rule(float_2nan_prop_ab, s); | ||
128 | + set_float_3nan_prop_rule(float_3nan_prop_abc, s); | ||
129 | + set_float_infzeronan_rule(float_infzeronan_dnan_never | | ||
130 | + float_infzeronan_suppress_invalid, s); | ||
131 | + set_float_default_nan_pattern(0b11000000, s); | ||
132 | +} | ||
133 | + | ||
134 | #ifdef CONFIG_TCG | ||
135 | |||
136 | /* Convert host exception flags to vfp form. */ | ||
137 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
138 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
139 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
140 | } | ||
141 | + if (changed & FPCR_AH) { | ||
142 | + bool ah_enabled = val & FPCR_AH; | ||
143 | + | ||
144 | + if (ah_enabled) { | ||
145 | + /* Change behaviours for A64 FP operations */ | ||
146 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); | ||
147 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
148 | + } else { | ||
149 | + arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
150 | + arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
151 | + } | ||
152 | + } | ||
153 | /* | ||
154 | * If any bits changed that we look at in vfp_get_fpsr_from_host(), | ||
155 | * we must sync the float_status flags into vfp.fpsr now (under the | ||
156 | -- | ||
157 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | When FPCR.AH = 1, some of the cumulative exception flags in the FPSR | ||
2 | behave slightly differently for A64 operations: | ||
3 | * IDC is set when a denormal input is used without flushing | ||
4 | * IXC (Inexact) is set when an output denormal is flushed to zero | ||
5 | 1 | ||
6 | Update vfp_get_fpsr_from_host() to do this. | ||
7 | |||
8 | Note that because half-precision operations never set IDC, we now | ||
9 | need to add float_flag_input_denormal_used to the set we mask out of | ||
10 | fp_status_f16_a64. | ||
11 | |||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | --- | ||
15 | target/arm/vfp_helper.c | 17 ++++++++++++++--- | ||
16 | 1 file changed, 14 insertions(+), 3 deletions(-) | ||
17 | |||
18 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/arm/vfp_helper.c | ||
21 | +++ b/target/arm/vfp_helper.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static void arm_set_ah_fp_behaviours(float_status *s) | ||
23 | #ifdef CONFIG_TCG | ||
24 | |||
25 | /* Convert host exception flags to vfp form. */ | ||
26 | -static inline uint32_t vfp_exceptbits_from_host(int host_bits) | ||
27 | +static inline uint32_t vfp_exceptbits_from_host(int host_bits, bool ah) | ||
28 | { | ||
29 | uint32_t target_bits = 0; | ||
30 | |||
31 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits) | ||
32 | if (host_bits & float_flag_input_denormal_flushed) { | ||
33 | target_bits |= FPSR_IDC; | ||
34 | } | ||
35 | + /* | ||
36 | + * With FPCR.AH, IDC is set when an input denormal is used, | ||
37 | + * and flushing an output denormal to zero sets both IXC and UFC. | ||
38 | + */ | ||
39 | + if (ah && (host_bits & float_flag_input_denormal_used)) { | ||
40 | + target_bits |= FPSR_IDC; | ||
41 | + } | ||
42 | + if (ah && (host_bits & float_flag_output_denormal_flushed)) { | ||
43 | + target_bits |= FPSR_IXC; | ||
44 | + } | ||
45 | return target_bits; | ||
46 | } | ||
47 | |||
48 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
49 | |||
50 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
51 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
52 | - & ~float_flag_input_denormal_flushed); | ||
53 | + & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
54 | /* | ||
55 | * Flushing an input denormal *only* because FPCR.FIZ == 1 does | ||
56 | * not set FPSR.IDC; if FPCR.FZ is also set then this takes | ||
57 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
58 | if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) { | ||
59 | a64_flags &= ~float_flag_input_denormal_flushed; | ||
60 | } | ||
61 | - return vfp_exceptbits_from_host(a32_flags | a64_flags); | ||
62 | + return vfp_exceptbits_from_host(a64_flags, env->vfp.fpcr & FPCR_AH) | | ||
63 | + vfp_exceptbits_from_host(a32_flags, false); | ||
64 | } | ||
65 | |||
66 | static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
67 | -- | ||
68 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | We are going to need to generate different code in some cases when | ||
2 | FPCR.AH is 1. For example: | ||
3 | * Floating point neg and abs must not flip the sign bit of NaNs | ||
4 | * some insns (FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS, and various | ||
5 | BFCVT and BFM bfloat16 ops) need to use a different float_status | ||
6 | to the usual one | ||
7 | 1 | ||
8 | Encode FPCR.AH into the A64 tbflags, so we can refer to it at | ||
9 | translate time. | ||
10 | |||
11 | Because we now have a bit in FPCR that affects codegen, we can't mark | ||
12 | the AArch64 FPCR register as being SUPPRESS_TB_END any more; writes | ||
13 | to it will now end the TB and trigger a regeneration of hflags. | ||
14 | |||
15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
16 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
17 | --- | ||
18 | target/arm/cpu.h | 1 + | ||
19 | target/arm/tcg/translate.h | 2 ++ | ||
20 | target/arm/helper.c | 2 +- | ||
21 | target/arm/tcg/hflags.c | 4 ++++ | ||
22 | target/arm/tcg/translate-a64.c | 1 + | ||
23 | 5 files changed, 9 insertions(+), 1 deletion(-) | ||
24 | |||
25 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/target/arm/cpu.h | ||
28 | +++ b/target/arm/cpu.h | ||
29 | @@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, NV2, 34, 1) | ||
30 | FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1) | ||
31 | /* Set if FEAT_NV2 RAM accesses are big-endian */ | ||
32 | FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1) | ||
33 | +FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */ | ||
34 | |||
35 | /* | ||
36 | * Helpers for using the above. Note that only the A64 accessors use | ||
37 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/translate.h | ||
40 | +++ b/target/arm/tcg/translate.h | ||
41 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { | ||
42 | bool nv2_mem_e20; | ||
43 | /* True if NV2 enabled and NV2 RAM accesses are big-endian */ | ||
44 | bool nv2_mem_be; | ||
45 | + /* True if FPCR.AH is 1 (alternate floating point handling) */ | ||
46 | + bool fpcr_ah; | ||
47 | /* | ||
48 | * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI. | ||
49 | * < 0, set by the current instruction. | ||
50 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
51 | index XXXXXXX..XXXXXXX 100644 | ||
52 | --- a/target/arm/helper.c | ||
53 | +++ b/target/arm/helper.c | ||
54 | @@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo v8_cp_reginfo[] = { | ||
55 | .writefn = aa64_daif_write, .resetfn = arm_cp_reset_ignore }, | ||
56 | { .name = "FPCR", .state = ARM_CP_STATE_AA64, | ||
57 | .opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 4, | ||
58 | - .access = PL0_RW, .type = ARM_CP_FPU | ARM_CP_SUPPRESS_TB_END, | ||
59 | + .access = PL0_RW, .type = ARM_CP_FPU, | ||
60 | .readfn = aa64_fpcr_read, .writefn = aa64_fpcr_write }, | ||
61 | { .name = "FPSR", .state = ARM_CP_STATE_AA64, | ||
62 | .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4, | ||
63 | diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c | ||
64 | index XXXXXXX..XXXXXXX 100644 | ||
65 | --- a/target/arm/tcg/hflags.c | ||
66 | +++ b/target/arm/tcg/hflags.c | ||
67 | @@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, | ||
68 | DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx)); | ||
69 | } | ||
70 | |||
71 | + if (env->vfp.fpcr & FPCR_AH) { | ||
72 | + DP_TBFLAG_A64(flags, AH, 1); | ||
73 | + } | ||
74 | + | ||
75 | return rebuild_hflags_common(env, fp_el, mmu_idx, flags); | ||
76 | } | ||
77 | |||
78 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
79 | index XXXXXXX..XXXXXXX 100644 | ||
80 | --- a/target/arm/tcg/translate-a64.c | ||
81 | +++ b/target/arm/tcg/translate-a64.c | ||
82 | @@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, | ||
83 | dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); | ||
84 | dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); | ||
85 | dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); | ||
86 | + dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); | ||
87 | dc->vec_len = 0; | ||
88 | dc->vec_stride = 0; | ||
89 | dc->cp_regs = arm_cpu->cp_regs; | ||
90 | -- | ||
91 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | When FPCR.AH is 1, the behaviour of some instructions changes: | ||
2 | * AdvSIMD BFCVT, BFCVTN, BFCVTN2, BFMLALB, BFMLALT | ||
3 | * SVE BFCVT, BFCVTNT, BFMLALB, BFMLALT, BFMLSLB, BFMLSLT | ||
4 | * SME BFCVT, BFCVTN, BFMLAL, BFMLSL (these are all in SME2 which | ||
5 | QEMU does not yet implement) | ||
6 | * FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS | ||
7 | 1 | ||
8 | The behaviour change is: | ||
9 | * the instructions do not update the FPSR cumulative exception flags | ||
10 | * trapped floating point exceptions are disabled (a no-op for QEMU, | ||
11 | which doesn't implement FPCR.{IDE,IXE,UFE,OFE,DZE,IOE}) | ||
12 | * rounding is always round-to-nearest-even regardless of FPCR.RMode | ||
13 | * denormalized inputs and outputs are always flushed to zero, as if | ||
14 | FPCR.{FZ,FIZ} is {1,1} | ||
15 | * FPCR.FZ16 is still honoured for half-precision inputs | ||
16 | |||
17 | (See the Arm ARM DDI0487L.a section A1.5.9.) | ||
18 | |||
19 | We can provide all these behaviours with another pair of float_status fields | ||
20 | which we use only for these insns, when FPCR.AH is 1. These float_status | ||
21 | fields will always have: | ||
22 | * flush_to_zero and flush_inputs_to_zero set for the non-F16 field | ||
23 | * rounding mode set to round-to-nearest-even | ||
24 | and so the only FPCR fields they need to honour are DN and FZ16. | ||
25 | |||
26 | In this commit we only define the new fp_status fields and give them | ||
27 | the required behaviour when FPSR is updated. In subsequent commits | ||
28 | we will arrange to use this new fp_status field for the instructions | ||
29 | that should be affected by FPCR.AH in this way. | ||
30 | |||
31 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
32 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
33 | --- | ||
34 | target/arm/cpu.h | 15 +++++++++++++++ | ||
35 | target/arm/internals.h | 2 ++ | ||
36 | target/arm/tcg/translate.h | 14 ++++++++++++++ | ||
37 | target/arm/cpu.c | 4 ++++ | ||
38 | target/arm/vfp_helper.c | 13 ++++++++++++- | ||
39 | 5 files changed, 47 insertions(+), 1 deletion(-) | ||
40 | |||
41 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/arm/cpu.h | ||
44 | +++ b/target/arm/cpu.h | ||
45 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
46 | * standard_fp_status : the ARM "Standard FPSCR Value" | ||
47 | * standard_fp_status_fp16 : used for half-precision | ||
48 | * calculations with the ARM "Standard FPSCR Value" | ||
49 | + * ah_fp_status: used for the A64 insns which change behaviour | ||
50 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
51 | + * and the reciprocal and square root estimate/step insns) | ||
52 | + * ah_fp_status_f16: used for the A64 insns which change behaviour | ||
53 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
54 | + * and the reciprocal and square root estimate/step insns); | ||
55 | + * for half-precision | ||
56 | * | ||
57 | * Half-precision operations are governed by a separate | ||
58 | * flush-to-zero control bit in FPSCR:FZ16. We pass a separate | ||
59 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
60 | * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | ||
61 | * using a fixed value for it. | ||
62 | * | ||
63 | + * The ah_fp_status is needed because some insns have different | ||
64 | + * behaviour when FPCR.AH == 1: they don't update cumulative | ||
65 | + * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
66 | + * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
67 | + * which means we need an ah_fp_status_f16 as well. | ||
68 | + * | ||
69 | * To avoid having to transfer exception bits around, we simply | ||
70 | * say that the FPSCR cumulative exception flags are the logical | ||
71 | * OR of the flags in the four fp statuses. This relies on the | ||
72 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
73 | float_status fp_status_f16_a64; | ||
74 | float_status standard_fp_status; | ||
75 | float_status standard_fp_status_f16; | ||
76 | + float_status ah_fp_status; | ||
77 | + float_status ah_fp_status_f16; | ||
78 | |||
79 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ | ||
80 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ | ||
81 | diff --git a/target/arm/internals.h b/target/arm/internals.h | ||
82 | index XXXXXXX..XXXXXXX 100644 | ||
83 | --- a/target/arm/internals.h | ||
84 | +++ b/target/arm/internals.h | ||
85 | @@ -XXX,XX +XXX,XX @@ int alle1_tlbmask(CPUARMState *env); | ||
86 | |||
87 | /* Set the float_status behaviour to match the Arm defaults */ | ||
88 | void arm_set_default_fp_behaviours(float_status *s); | ||
89 | +/* Set the float_status behaviour to match Arm FPCR.AH=1 behaviour */ | ||
90 | +void arm_set_ah_fp_behaviours(float_status *s); | ||
91 | |||
92 | #endif | ||
93 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | ||
94 | index XXXXXXX..XXXXXXX 100644 | ||
95 | --- a/target/arm/tcg/translate.h | ||
96 | +++ b/target/arm/tcg/translate.h | ||
97 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { | ||
98 | FPST_A64, | ||
99 | FPST_A32_F16, | ||
100 | FPST_A64_F16, | ||
101 | + FPST_AH, | ||
102 | + FPST_AH_F16, | ||
103 | FPST_STD, | ||
104 | FPST_STD_F16, | ||
105 | } ARMFPStatusFlavour; | ||
106 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { | ||
107 | * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
108 | * FPST_A64_F16 | ||
109 | * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
110 | + * FPST_AH: | ||
111 | + * for AArch64 operations which change behaviour when AH=1 (specifically, | ||
112 | + * bfloat16 conversions and multiplies, and the reciprocal and square root | ||
113 | + * estimate/step insns) | ||
114 | + * FPST_AH_F16: | ||
115 | + * ditto, but for half-precision operations | ||
116 | * FPST_STD | ||
117 | * for A32/T32 Neon operations using the "standard FPSCR value" | ||
118 | * FPST_STD_F16 | ||
119 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) | ||
120 | case FPST_A64_F16: | ||
121 | offset = offsetof(CPUARMState, vfp.fp_status_f16_a64); | ||
122 | break; | ||
123 | + case FPST_AH: | ||
124 | + offset = offsetof(CPUARMState, vfp.ah_fp_status); | ||
125 | + break; | ||
126 | + case FPST_AH_F16: | ||
127 | + offset = offsetof(CPUARMState, vfp.ah_fp_status_f16); | ||
128 | + break; | ||
129 | case FPST_STD: | ||
130 | offset = offsetof(CPUARMState, vfp.standard_fp_status); | ||
131 | break; | ||
132 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
133 | index XXXXXXX..XXXXXXX 100644 | ||
134 | --- a/target/arm/cpu.c | ||
135 | +++ b/target/arm/cpu.c | ||
136 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
137 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
138 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
139 | arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16); | ||
140 | + arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | ||
141 | + set_flush_to_zero(1, &env->vfp.ah_fp_status); | ||
142 | + set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | ||
143 | + arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status_f16); | ||
144 | |||
145 | #ifndef CONFIG_USER_ONLY | ||
146 | if (kvm_enabled()) { | ||
147 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
148 | index XXXXXXX..XXXXXXX 100644 | ||
149 | --- a/target/arm/vfp_helper.c | ||
150 | +++ b/target/arm/vfp_helper.c | ||
151 | @@ -XXX,XX +XXX,XX @@ void arm_set_default_fp_behaviours(float_status *s) | ||
152 | * set Invalid for a QNaN | ||
153 | * * default NaN has sign bit set, msb frac bit set | ||
154 | */ | ||
155 | -static void arm_set_ah_fp_behaviours(float_status *s) | ||
156 | +void arm_set_ah_fp_behaviours(float_status *s) | ||
157 | { | ||
158 | set_float_detect_tininess(float_tininess_after_rounding, s); | ||
159 | set_float_ftz_detection(float_ftz_after_rounding, s); | ||
160 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
161 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
162 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
163 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
164 | + /* | ||
165 | + * We do not merge in flags from ah_fp_status or ah_fp_status_f16, because | ||
166 | + * they are used for insns that must not set the cumulative exception bits. | ||
167 | + */ | ||
168 | + | ||
169 | /* | ||
170 | * Flushing an input denormal *only* because FPCR.FIZ == 1 does | ||
171 | * not set FPSR.IDC; if FPCR.FZ is also set then this takes | ||
172 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
173 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
174 | set_float_exception_flags(0, &env->vfp.standard_fp_status); | ||
175 | set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); | ||
176 | + set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
177 | + set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
178 | } | ||
179 | |||
180 | static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) | ||
181 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
182 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
183 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
184 | set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | ||
185 | + set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
186 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
187 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
188 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | ||
189 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
190 | } | ||
191 | if (changed & FPCR_FZ) { | ||
192 | bool ftz_enabled = val & FPCR_FZ; | ||
193 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
194 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
195 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
196 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
197 | + set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); | ||
198 | + set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status_f16); | ||
199 | } | ||
200 | if (changed & FPCR_AH) { | ||
201 | bool ah_enabled = val & FPCR_AH; | ||
202 | -- | ||
203 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | For the instructions FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS, use | ||
2 | FPST_FPCR_AH or FPST_FPCR_AH_F16 when FPCR.AH is 1, so that they get | ||
3 | the required behaviour changes. | ||
4 | 1 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/translate-a64.h | 13 ++++ | ||
9 | target/arm/tcg/translate-a64.c | 119 +++++++++++++++++++++++++-------- | ||
10 | target/arm/tcg/translate-sve.c | 30 ++++++--- | ||
11 | 3 files changed, 127 insertions(+), 35 deletions(-) | ||
12 | |||
13 | diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/arm/tcg/translate-a64.h | ||
16 | +++ b/target/arm/tcg/translate-a64.h | ||
17 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno) | ||
18 | return ret; | ||
19 | } | ||
20 | |||
21 | +/* | ||
22 | + * Return the ARMFPStatusFlavour to use based on element size and | ||
23 | + * whether FPCR.AH is set. | ||
24 | + */ | ||
25 | +static inline ARMFPStatusFlavour select_ah_fpst(DisasContext *s, MemOp esz) | ||
26 | +{ | ||
27 | + if (s->fpcr_ah) { | ||
28 | + return esz == MO_16 ? FPST_AH_F16 : FPST_AH; | ||
29 | + } else { | ||
30 | + return esz == MO_16 ? FPST_A64_F16 : FPST_A64; | ||
31 | + } | ||
32 | +} | ||
33 | + | ||
34 | bool disas_sve(DisasContext *, uint32_t); | ||
35 | bool disas_sme(DisasContext *, uint32_t); | ||
36 | |||
37 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/translate-a64.c | ||
40 | +++ b/target/arm/tcg/translate-a64.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, | ||
42 | * an out-of-line helper. | ||
43 | */ | ||
44 | static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, | ||
45 | - int rm, bool is_fp16, int data, | ||
46 | + int rm, ARMFPStatusFlavour fpsttype, int data, | ||
47 | gen_helper_gvec_3_ptr *fn) | ||
48 | { | ||
49 | - TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); | ||
50 | + TCGv_ptr fpst = fpstatus_ptr(fpsttype); | ||
51 | tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), | ||
52 | vec_full_reg_offset(s, rn), | ||
53 | vec_full_reg_offset(s, rm), fpst, | ||
54 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar { | ||
55 | void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); | ||
56 | } FPScalar; | ||
57 | |||
58 | -static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
59 | +static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
60 | + const FPScalar *f, | ||
61 | + ARMFPStatusFlavour fpsttype) | ||
62 | { | ||
63 | switch (a->esz) { | ||
64 | case MO_64: | ||
65 | if (fp_access_check(s)) { | ||
66 | TCGv_i64 t0 = read_fp_dreg(s, a->rn); | ||
67 | TCGv_i64 t1 = read_fp_dreg(s, a->rm); | ||
68 | - f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); | ||
69 | + f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
70 | write_fp_dreg(s, a->rd, t0); | ||
71 | } | ||
72 | break; | ||
73 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
74 | if (fp_access_check(s)) { | ||
75 | TCGv_i32 t0 = read_fp_sreg(s, a->rn); | ||
76 | TCGv_i32 t1 = read_fp_sreg(s, a->rm); | ||
77 | - f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); | ||
78 | + f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
79 | write_fp_sreg(s, a->rd, t0); | ||
80 | } | ||
81 | break; | ||
82 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
83 | if (fp_access_check(s)) { | ||
84 | TCGv_i32 t0 = read_fp_hreg(s, a->rn); | ||
85 | TCGv_i32 t1 = read_fp_hreg(s, a->rm); | ||
86 | - f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); | ||
87 | + f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
88 | write_fp_sreg(s, a->rd, t0); | ||
89 | } | ||
90 | break; | ||
91 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
92 | return true; | ||
93 | } | ||
94 | |||
95 | +static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
96 | +{ | ||
97 | + return do_fp3_scalar_with_fpsttype(s, a, f, | ||
98 | + a->esz == MO_16 ? | ||
99 | + FPST_A64_F16 : FPST_A64); | ||
100 | +} | ||
101 | + | ||
102 | +static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
103 | +{ | ||
104 | + return do_fp3_scalar_with_fpsttype(s, a, f, select_ah_fpst(s, a->esz)); | ||
105 | +} | ||
106 | + | ||
107 | static const FPScalar f_scalar_fadd = { | ||
108 | gen_helper_vfp_addh, | ||
109 | gen_helper_vfp_adds, | ||
110 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_frecps = { | ||
111 | gen_helper_recpsf_f32, | ||
112 | gen_helper_recpsf_f64, | ||
113 | }; | ||
114 | -TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps) | ||
115 | +TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps) | ||
116 | |||
117 | static const FPScalar f_scalar_frsqrts = { | ||
118 | gen_helper_rsqrtsf_f16, | ||
119 | gen_helper_rsqrtsf_f32, | ||
120 | gen_helper_rsqrtsf_f64, | ||
121 | }; | ||
122 | -TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts) | ||
123 | +TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts) | ||
124 | |||
125 | static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, | ||
126 | const FPScalar *f, bool swap) | ||
127 | @@ -XXX,XX +XXX,XX @@ TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) | ||
128 | TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) | ||
129 | TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) | ||
130 | |||
131 | -static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | ||
132 | - gen_helper_gvec_3_ptr * const fns[3]) | ||
133 | +static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a, | ||
134 | + int data, | ||
135 | + gen_helper_gvec_3_ptr * const fns[3], | ||
136 | + ARMFPStatusFlavour fpsttype) | ||
137 | { | ||
138 | MemOp esz = a->esz; | ||
139 | int check = fp_access_check_vector_hsd(s, a->q, esz); | ||
140 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | ||
141 | return check == 0; | ||
142 | } | ||
143 | |||
144 | - gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, | ||
145 | - esz == MO_16, data, fns[esz - 1]); | ||
146 | + gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype, | ||
147 | + data, fns[esz - 1]); | ||
148 | return true; | ||
149 | } | ||
150 | |||
151 | +static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | ||
152 | + gen_helper_gvec_3_ptr * const fns[3]) | ||
153 | +{ | ||
154 | + return do_fp3_vector_with_fpsttype(s, a, data, fns, | ||
155 | + a->esz == MO_16 ? | ||
156 | + FPST_A64_F16 : FPST_A64); | ||
157 | +} | ||
158 | + | ||
159 | +static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data, | ||
160 | + gen_helper_gvec_3_ptr * const f[3]) | ||
161 | +{ | ||
162 | + return do_fp3_vector_with_fpsttype(s, a, data, f, | ||
163 | + select_ah_fpst(s, a->esz)); | ||
164 | +} | ||
165 | + | ||
166 | static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { | ||
167 | gen_helper_gvec_fadd_h, | ||
168 | gen_helper_gvec_fadd_s, | ||
169 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { | ||
170 | gen_helper_gvec_recps_s, | ||
171 | gen_helper_gvec_recps_d, | ||
172 | }; | ||
173 | -TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps) | ||
174 | +TRANS(FRECPS_v, do_fp3_vector_ah, a, 0, f_vector_frecps) | ||
175 | |||
176 | static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { | ||
177 | gen_helper_gvec_rsqrts_h, | ||
178 | gen_helper_gvec_rsqrts_s, | ||
179 | gen_helper_gvec_rsqrts_d, | ||
180 | }; | ||
181 | -TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts) | ||
182 | +TRANS(FRSQRTS_v, do_fp3_vector_ah, a, 0, f_vector_frsqrts) | ||
183 | |||
184 | static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { | ||
185 | gen_helper_gvec_faddp_h, | ||
186 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, | ||
187 | } | ||
188 | |||
189 | gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, | ||
190 | - esz == MO_16, a->idx, fns[esz - 1]); | ||
191 | + esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
192 | + a->idx, fns[esz - 1]); | ||
193 | return true; | ||
194 | } | ||
195 | |||
196 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar1 { | ||
197 | void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr); | ||
198 | } FPScalar1; | ||
199 | |||
200 | -static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
201 | - const FPScalar1 *f, int rmode) | ||
202 | +static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a, | ||
203 | + const FPScalar1 *f, int rmode, | ||
204 | + ARMFPStatusFlavour fpsttype) | ||
205 | { | ||
206 | TCGv_i32 tcg_rmode = NULL; | ||
207 | TCGv_ptr fpst; | ||
208 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
209 | return check == 0; | ||
210 | } | ||
211 | |||
212 | - fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
213 | + fpst = fpstatus_ptr(fpsttype); | ||
214 | if (rmode >= 0) { | ||
215 | tcg_rmode = gen_set_rmode(rmode, fpst); | ||
216 | } | ||
217 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
218 | return true; | ||
219 | } | ||
220 | |||
221 | +static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
222 | + const FPScalar1 *f, int rmode) | ||
223 | +{ | ||
224 | + return do_fp1_scalar_with_fpsttype(s, a, f, rmode, | ||
225 | + a->esz == MO_16 ? | ||
226 | + FPST_A64_F16 : FPST_A64); | ||
227 | +} | ||
228 | + | ||
229 | +static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a, | ||
230 | + const FPScalar1 *f, int rmode) | ||
231 | +{ | ||
232 | + return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz)); | ||
233 | +} | ||
234 | + | ||
235 | static const FPScalar1 f_scalar_fsqrt = { | ||
236 | gen_helper_vfp_sqrth, | ||
237 | gen_helper_vfp_sqrts, | ||
238 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frecpe = { | ||
239 | gen_helper_recpe_f32, | ||
240 | gen_helper_recpe_f64, | ||
241 | }; | ||
242 | -TRANS(FRECPE_s, do_fp1_scalar, a, &f_scalar_frecpe, -1) | ||
243 | +TRANS(FRECPE_s, do_fp1_scalar_ah, a, &f_scalar_frecpe, -1) | ||
244 | |||
245 | static const FPScalar1 f_scalar_frecpx = { | ||
246 | gen_helper_frecpx_f16, | ||
247 | gen_helper_frecpx_f32, | ||
248 | gen_helper_frecpx_f64, | ||
249 | }; | ||
250 | -TRANS(FRECPX_s, do_fp1_scalar, a, &f_scalar_frecpx, -1) | ||
251 | +TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1) | ||
252 | |||
253 | static const FPScalar1 f_scalar_frsqrte = { | ||
254 | gen_helper_rsqrte_f16, | ||
255 | gen_helper_rsqrte_f32, | ||
256 | gen_helper_rsqrte_f64, | ||
257 | }; | ||
258 | -TRANS(FRSQRTE_s, do_fp1_scalar, a, &f_scalar_frsqrte, -1) | ||
259 | +TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, &f_scalar_frsqrte, -1) | ||
260 | |||
261 | static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) | ||
262 | { | ||
263 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a, | ||
264 | &f_scalar_frint64, FPROUNDING_ZERO) | ||
265 | TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1) | ||
266 | |||
267 | -static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
268 | - int rd, int rn, int data, | ||
269 | - gen_helper_gvec_2_ptr * const fns[3]) | ||
270 | +static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz, | ||
271 | + bool is_q, int rd, int rn, int data, | ||
272 | + gen_helper_gvec_2_ptr * const fns[3], | ||
273 | + ARMFPStatusFlavour fpsttype) | ||
274 | { | ||
275 | int check = fp_access_check_vector_hsd(s, is_q, esz); | ||
276 | TCGv_ptr fpst; | ||
277 | @@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
278 | return check == 0; | ||
279 | } | ||
280 | |||
281 | - fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
282 | + fpst = fpstatus_ptr(fpsttype); | ||
283 | tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), | ||
284 | vec_full_reg_offset(s, rn), fpst, | ||
285 | is_q ? 16 : 8, vec_full_reg_size(s), | ||
286 | @@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
287 | return true; | ||
288 | } | ||
289 | |||
290 | +static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
291 | + int rd, int rn, int data, | ||
292 | + gen_helper_gvec_2_ptr * const fns[3]) | ||
293 | +{ | ||
294 | + return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns, | ||
295 | + esz == MO_16 ? FPST_A64_F16 : | ||
296 | + FPST_A64); | ||
297 | +} | ||
298 | + | ||
299 | +static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
300 | + int rd, int rn, int data, | ||
301 | + gen_helper_gvec_2_ptr * const fns[3]) | ||
302 | +{ | ||
303 | + return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, | ||
304 | + fns, select_ah_fpst(s, esz)); | ||
305 | +} | ||
306 | + | ||
307 | static gen_helper_gvec_2_ptr * const f_scvtf_v[] = { | ||
308 | gen_helper_gvec_vcvt_sh, | ||
309 | gen_helper_gvec_vcvt_sf, | ||
310 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const f_frecpe[] = { | ||
311 | gen_helper_gvec_frecpe_s, | ||
312 | gen_helper_gvec_frecpe_d, | ||
313 | }; | ||
314 | -TRANS(FRECPE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) | ||
315 | +TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) | ||
316 | |||
317 | static gen_helper_gvec_2_ptr * const f_frsqrte[] = { | ||
318 | gen_helper_gvec_frsqrte_h, | ||
319 | gen_helper_gvec_frsqrte_s, | ||
320 | gen_helper_gvec_frsqrte_d, | ||
321 | }; | ||
322 | -TRANS(FRSQRTE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) | ||
323 | +TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) | ||
324 | |||
325 | static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) | ||
326 | { | ||
327 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
328 | index XXXXXXX..XXXXXXX 100644 | ||
329 | --- a/target/arm/tcg/translate-sve.c | ||
330 | +++ b/target/arm/tcg/translate-sve.c | ||
331 | @@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, | ||
332 | return true; | ||
333 | } | ||
334 | |||
335 | -static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, | ||
336 | - arg_rr_esz *a, int data) | ||
337 | +static bool gen_gvec_fpst_ah_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, | ||
338 | + arg_rr_esz *a, int data) | ||
339 | { | ||
340 | return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data, | ||
341 | - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
342 | + select_ah_fpst(s, a->esz)); | ||
343 | } | ||
344 | |||
345 | /* Invoke an out-of-line helper on 3 Zregs. */ | ||
346 | @@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, | ||
347 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
348 | } | ||
349 | |||
350 | +static bool gen_gvec_fpst_ah_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, | ||
351 | + arg_rrr_esz *a, int data) | ||
352 | +{ | ||
353 | + return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, | ||
354 | + select_ah_fpst(s, a->esz)); | ||
355 | +} | ||
356 | + | ||
357 | /* Invoke an out-of-line helper on 4 Zregs. */ | ||
358 | static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, | ||
359 | int rd, int rn, int rm, int ra, int data) | ||
360 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const frecpe_fns[] = { | ||
361 | NULL, gen_helper_gvec_frecpe_h, | ||
362 | gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, | ||
363 | }; | ||
364 | -TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0) | ||
365 | +TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frecpe_fns[a->esz], a, 0) | ||
366 | |||
367 | static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { | ||
368 | NULL, gen_helper_gvec_frsqrte_h, | ||
369 | gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, | ||
370 | }; | ||
371 | -TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0) | ||
372 | +TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frsqrte_fns[a->esz], a, 0) | ||
373 | |||
374 | /* | ||
375 | *** SVE Floating Point Compare with Zero Group | ||
376 | @@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) | ||
377 | }; \ | ||
378 | TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) | ||
379 | |||
380 | +#define DO_FP3_AH(NAME, name) \ | ||
381 | + static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ | ||
382 | + NULL, gen_helper_gvec_##name##_h, \ | ||
383 | + gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ | ||
384 | + }; \ | ||
385 | + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, name##_fns[a->esz], a, 0) | ||
386 | + | ||
387 | DO_FP3(FADD_zzz, fadd) | ||
388 | DO_FP3(FSUB_zzz, fsub) | ||
389 | DO_FP3(FMUL_zzz, fmul) | ||
390 | -DO_FP3(FRECPS, recps) | ||
391 | -DO_FP3(FRSQRTS, rsqrts) | ||
392 | +DO_FP3_AH(FRECPS, recps) | ||
393 | +DO_FP3_AH(FRSQRTS, rsqrts) | ||
394 | |||
395 | #undef DO_FP3 | ||
396 | |||
397 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frecpx_fns[] = { | ||
398 | gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d, | ||
399 | }; | ||
400 | TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz], | ||
401 | - a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
402 | + a, 0, select_ah_fpst(s, a->esz)) | ||
403 | |||
404 | static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { | ||
405 | NULL, gen_helper_sve_fsqrt_h, | ||
406 | -- | ||
407 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | When FPCR.AH is 1, use FPST_FPCR_AH for: | ||
2 | * AdvSIMD BFCVT, BFCVTN, BFCVTN2 | ||
3 | * SVE BFCVT, BFCVTNT | ||
4 | 1 | ||
5 | so that they get the required behaviour changes. | ||
6 | |||
7 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | --- | ||
10 | target/arm/tcg/translate-a64.c | 27 +++++++++++++++++++++------ | ||
11 | target/arm/tcg/translate-sve.c | 6 ++++-- | ||
12 | 2 files changed, 25 insertions(+), 8 deletions(-) | ||
13 | |||
14 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/arm/tcg/translate-a64.c | ||
17 | +++ b/target/arm/tcg/translate-a64.c | ||
18 | @@ -XXX,XX +XXX,XX @@ TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) | ||
19 | static const FPScalar1 f_scalar_bfcvt = { | ||
20 | .gen_s = gen_helper_bfcvt, | ||
21 | }; | ||
22 | -TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar, a, &f_scalar_bfcvt, -1) | ||
23 | +TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar_ah, a, &f_scalar_bfcvt, -1) | ||
24 | |||
25 | static const FPScalar1 f_scalar_frint32 = { | ||
26 | NULL, | ||
27 | @@ -XXX,XX +XXX,XX @@ static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n) | ||
28 | tcg_gen_extu_i32_i64(d, tmp); | ||
29 | } | ||
30 | |||
31 | -static ArithOneOp * const f_vector_bfcvtn[] = { | ||
32 | - NULL, | ||
33 | - gen_bfcvtn_hs, | ||
34 | - NULL, | ||
35 | +static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n) | ||
36 | +{ | ||
37 | + TCGv_ptr fpst = fpstatus_ptr(FPST_AH); | ||
38 | + TCGv_i32 tmp = tcg_temp_new_i32(); | ||
39 | + gen_helper_bfcvt_pair(tmp, n, fpst); | ||
40 | + tcg_gen_extu_i32_i64(d, tmp); | ||
41 | +} | ||
42 | + | ||
43 | +static ArithOneOp * const f_vector_bfcvtn[2][3] = { | ||
44 | + { | ||
45 | + NULL, | ||
46 | + gen_bfcvtn_hs, | ||
47 | + NULL, | ||
48 | + }, { | ||
49 | + NULL, | ||
50 | + gen_bfcvtn_ah_hs, | ||
51 | + NULL, | ||
52 | + } | ||
53 | }; | ||
54 | -TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, f_vector_bfcvtn) | ||
55 | +TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, | ||
56 | + f_vector_bfcvtn[s->fpcr_ah]) | ||
57 | |||
58 | static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a) | ||
59 | { | ||
60 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/target/arm/tcg/translate-sve.c | ||
63 | +++ b/target/arm/tcg/translate-sve.c | ||
64 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, | ||
65 | gen_helper_sve_fcvt_hs, a, 0, FPST_A64_F16) | ||
66 | |||
67 | TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, | ||
68 | - gen_helper_sve_bfcvt, a, 0, FPST_A64) | ||
69 | + gen_helper_sve_bfcvt, a, 0, | ||
70 | + s->fpcr_ah ? FPST_AH : FPST_A64) | ||
71 | |||
72 | TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, | ||
73 | gen_helper_sve_fcvt_dh, a, 0, FPST_A64) | ||
74 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, | ||
75 | gen_helper_sve2_fcvtnt_ds, a, 0, FPST_A64) | ||
76 | |||
77 | TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, | ||
78 | - gen_helper_sve_bfcvtnt, a, 0, FPST_A64) | ||
79 | + gen_helper_sve_bfcvtnt, a, 0, | ||
80 | + s->fpcr_ah ? FPST_AH : FPST_A64) | ||
81 | |||
82 | TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, | ||
83 | gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64) | ||
84 | -- | ||
85 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | When FPCR.AH is 1, use FPST_FPCR_AH for: | ||
2 | * AdvSIMD BFMLALB, BFMLALT | ||
3 | * SVE BFMLALB, BFMLALT, BFMLSLB, BFMLSLT | ||
4 | 1 | ||
5 | so that they get the required behaviour changes. | ||
6 | |||
7 | We do this by making gen_gvec_op4_fpst() take an ARMFPStatusFlavour | ||
8 | rather than a bool is_fp16; existing callsites now select | ||
9 | FPST_FPCR_F16_A64 vs FPST_FPCR_A64 themselves rather than passing in | ||
10 | the boolean. | ||
11 | |||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | --- | ||
15 | target/arm/tcg/translate-a64.c | 20 +++++++++++++------- | ||
16 | target/arm/tcg/translate-sve.c | 6 ++++-- | ||
17 | 2 files changed, 17 insertions(+), 9 deletions(-) | ||
18 | |||
19 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/target/arm/tcg/translate-a64.c | ||
22 | +++ b/target/arm/tcg/translate-a64.c | ||
23 | @@ -XXX,XX +XXX,XX @@ static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn, | ||
24 | * an out-of-line helper. | ||
25 | */ | ||
26 | static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, | ||
27 | - int rm, int ra, bool is_fp16, int data, | ||
28 | + int rm, int ra, ARMFPStatusFlavour fpsttype, | ||
29 | + int data, | ||
30 | gen_helper_gvec_4_ptr *fn) | ||
31 | { | ||
32 | - TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); | ||
33 | + TCGv_ptr fpst = fpstatus_ptr(fpsttype); | ||
34 | tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), | ||
35 | vec_full_reg_offset(s, rn), | ||
36 | vec_full_reg_offset(s, rm), | ||
37 | @@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) | ||
38 | } | ||
39 | if (fp_access_check(s)) { | ||
40 | /* Q bit selects BFMLALB vs BFMLALT. */ | ||
41 | - gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q, | ||
42 | + gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, | ||
43 | + s->fpcr_ah ? FPST_AH : FPST_A64, a->q, | ||
44 | gen_helper_gvec_bfmlal); | ||
45 | } | ||
46 | return true; | ||
47 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) | ||
48 | } | ||
49 | |||
50 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
51 | - a->esz == MO_16, a->rot, fn[a->esz]); | ||
52 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
53 | + a->rot, fn[a->esz]); | ||
54 | return true; | ||
55 | } | ||
56 | |||
57 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) | ||
58 | } | ||
59 | |||
60 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
61 | - esz == MO_16, (a->idx << 1) | neg, | ||
62 | + esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
63 | + (a->idx << 1) | neg, | ||
64 | fns[esz - 1]); | ||
65 | return true; | ||
66 | } | ||
67 | @@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) | ||
68 | } | ||
69 | if (fp_access_check(s)) { | ||
70 | /* Q bit selects BFMLALB vs BFMLALT. */ | ||
71 | - gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0, | ||
72 | + gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, | ||
73 | + s->fpcr_ah ? FPST_AH : FPST_A64, | ||
74 | (a->idx << 1) | a->q, | ||
75 | gen_helper_gvec_bfmlal_idx); | ||
76 | } | ||
77 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) | ||
78 | } | ||
79 | if (fp_access_check(s)) { | ||
80 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
81 | - a->esz == MO_16, (a->idx << 2) | a->rot, fn); | ||
82 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
83 | + (a->idx << 2) | a->rot, fn); | ||
84 | } | ||
85 | return true; | ||
86 | } | ||
87 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
88 | index XXXXXXX..XXXXXXX 100644 | ||
89 | --- a/target/arm/tcg/translate-sve.c | ||
90 | +++ b/target/arm/tcg/translate-sve.c | ||
91 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_env_arg_zzzz, | ||
92 | static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) | ||
93 | { | ||
94 | return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, | ||
95 | - a->rd, a->rn, a->rm, a->ra, sel, FPST_A64); | ||
96 | + a->rd, a->rn, a->rm, a->ra, sel, | ||
97 | + s->fpcr_ah ? FPST_AH : FPST_A64); | ||
98 | } | ||
99 | |||
100 | TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) | ||
101 | @@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) | ||
102 | { | ||
103 | return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, | ||
104 | a->rd, a->rn, a->rm, a->ra, | ||
105 | - (a->index << 1) | sel, FPST_A64); | ||
106 | + (a->index << 1) | sel, | ||
107 | + s->fpcr_ah ? FPST_AH : FPST_A64); | ||
108 | } | ||
109 | |||
110 | TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) | ||
111 | -- | ||
112 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | For FEAT_AFP, we want to emit different code when FPCR.NEP is set, so | ||
2 | that instead of zeroing the high elements of a vector register when | ||
3 | we write the output of a scalar operation to it, we instead merge in | ||
4 | those elements from one of the source registers. Since this affects | ||
5 | the generated code, we need to put FPCR.NEP into the TBFLAGS. | ||
6 | 1 | ||
7 | FPCR.NEP is treated as 0 when in streaming SVE mode and FEAT_SME_FA64 | ||
8 | is not implemented or not enabled; we can implement this logic in | ||
9 | rebuild_hflags_a64(). | ||
10 | |||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | --- | ||
14 | target/arm/cpu.h | 1 + | ||
15 | target/arm/tcg/translate.h | 2 ++ | ||
16 | target/arm/tcg/hflags.c | 9 +++++++++ | ||
17 | target/arm/tcg/translate-a64.c | 1 + | ||
18 | 4 files changed, 13 insertions(+) | ||
19 | |||
20 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/cpu.h | ||
23 | +++ b/target/arm/cpu.h | ||
24 | @@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1) | ||
25 | /* Set if FEAT_NV2 RAM accesses are big-endian */ | ||
26 | FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1) | ||
27 | FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */ | ||
28 | +FIELD(TBFLAG_A64, NEP, 38, 1) /* FPCR.NEP */ | ||
29 | |||
30 | /* | ||
31 | * Helpers for using the above. Note that only the A64 accessors use | ||
32 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/target/arm/tcg/translate.h | ||
35 | +++ b/target/arm/tcg/translate.h | ||
36 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { | ||
37 | bool nv2_mem_be; | ||
38 | /* True if FPCR.AH is 1 (alternate floating point handling) */ | ||
39 | bool fpcr_ah; | ||
40 | + /* True if FPCR.NEP is 1 (FEAT_AFP scalar upper-element result handling) */ | ||
41 | + bool fpcr_nep; | ||
42 | /* | ||
43 | * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI. | ||
44 | * < 0, set by the current instruction. | ||
45 | diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/target/arm/tcg/hflags.c | ||
48 | +++ b/target/arm/tcg/hflags.c | ||
49 | @@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, | ||
50 | if (env->vfp.fpcr & FPCR_AH) { | ||
51 | DP_TBFLAG_A64(flags, AH, 1); | ||
52 | } | ||
53 | + if (env->vfp.fpcr & FPCR_NEP) { | ||
54 | + /* | ||
55 | + * In streaming-SVE without FA64, NEP behaves as if zero; | ||
56 | + * compare pseudocode IsMerging() | ||
57 | + */ | ||
58 | + if (!(EX_TBFLAG_A64(flags, PSTATE_SM) && !sme_fa64(env, el))) { | ||
59 | + DP_TBFLAG_A64(flags, NEP, 1); | ||
60 | + } | ||
61 | + } | ||
62 | |||
63 | return rebuild_hflags_common(env, fp_el, mmu_idx, flags); | ||
64 | } | ||
65 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
66 | index XXXXXXX..XXXXXXX 100644 | ||
67 | --- a/target/arm/tcg/translate-a64.c | ||
68 | +++ b/target/arm/tcg/translate-a64.c | ||
69 | @@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, | ||
70 | dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); | ||
71 | dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); | ||
72 | dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); | ||
73 | + dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP); | ||
74 | dc->vec_len = 0; | ||
75 | dc->vec_stride = 0; | ||
76 | dc->cp_regs = arm_cpu->cp_regs; | ||
77 | -- | ||
78 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | For FEAT_AFP's FPCR.NEP bit, we need to programmatically change the | ||
2 | behaviour of the writeback of the result for most SIMD scalar | ||
3 | operations, so that instead of zeroing the upper part of the result | ||
4 | register it merges the upper elements from one of the input | ||
5 | registers. | ||
6 | 1 | ||
7 | Provide new functions write_fp_*reg_merging() which can be used | ||
8 | instead of the existing write_fp_*reg() functions when we want this | ||
9 | "merge the result with one of the input registers if FPCR.NEP is | ||
10 | enabled" handling, and use them in do_fp3_scalar_with_fpsttype(). | ||
11 | |||
12 | Note that (as documented in the description of the FPCR.NEP bit) | ||
13 | which input register to use as the merge source varies by | ||
14 | instruction: for these 2-input scalar operations, the comparison | ||
15 | instructions take from Rm, not Rn. | ||
16 | |||
17 | We'll extend this to also provide the merging behaviour for | ||
18 | the remaining scalar insns in subsequent commits. | ||
19 | |||
20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
21 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
22 | --- | ||
23 | target/arm/tcg/translate-a64.c | 117 +++++++++++++++++++++++++-------- | ||
24 | 1 file changed, 91 insertions(+), 26 deletions(-) | ||
25 | |||
26 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/target/arm/tcg/translate-a64.c | ||
29 | +++ b/target/arm/tcg/translate-a64.c | ||
30 | @@ -XXX,XX +XXX,XX @@ static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) | ||
31 | write_fp_dreg(s, reg, tmp); | ||
32 | } | ||
33 | |||
34 | +/* | ||
35 | + * Write a double result to 128 bit vector register reg, honouring FPCR.NEP: | ||
36 | + * - if FPCR.NEP == 0, clear the high elements of reg | ||
37 | + * - if FPCR.NEP == 1, set the high elements of reg from mergereg | ||
38 | + * (i.e. merge the result with those high elements) | ||
39 | + * In either case, SVE register bits above 128 are zeroed (per R_WKYLB). | ||
40 | + */ | ||
41 | +static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg, | ||
42 | + TCGv_i64 v) | ||
43 | +{ | ||
44 | + if (!s->fpcr_nep) { | ||
45 | + write_fp_dreg(s, reg, v); | ||
46 | + return; | ||
47 | + } | ||
48 | + | ||
49 | + /* | ||
50 | + * Move from mergereg to reg; this sets the high elements and | ||
51 | + * clears the bits above 128 as a side effect. | ||
52 | + */ | ||
53 | + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), | ||
54 | + vec_full_reg_offset(s, mergereg), | ||
55 | + 16, vec_full_reg_size(s)); | ||
56 | + tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg)); | ||
57 | +} | ||
58 | + | ||
59 | +/* | ||
60 | + * Write a single-prec result, but only clear the higher elements | ||
61 | + * of the destination register if FPCR.NEP is 0; otherwise preserve them. | ||
62 | + */ | ||
63 | +static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg, | ||
64 | + TCGv_i32 v) | ||
65 | +{ | ||
66 | + if (!s->fpcr_nep) { | ||
67 | + write_fp_sreg(s, reg, v); | ||
68 | + return; | ||
69 | + } | ||
70 | + | ||
71 | + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), | ||
72 | + vec_full_reg_offset(s, mergereg), | ||
73 | + 16, vec_full_reg_size(s)); | ||
74 | + tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); | ||
75 | +} | ||
76 | + | ||
77 | +/* | ||
78 | + * Write a half-prec result, but only clear the higher elements | ||
79 | + * of the destination register if FPCR.NEP is 0; otherwise preserve them. | ||
80 | + * The caller must ensure that the top 16 bits of v are zero. | ||
81 | + */ | ||
82 | +static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg, | ||
83 | + TCGv_i32 v) | ||
84 | +{ | ||
85 | + if (!s->fpcr_nep) { | ||
86 | + write_fp_sreg(s, reg, v); | ||
87 | + return; | ||
88 | + } | ||
89 | + | ||
90 | + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), | ||
91 | + vec_full_reg_offset(s, mergereg), | ||
92 | + 16, vec_full_reg_size(s)); | ||
93 | + tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); | ||
94 | +} | ||
95 | + | ||
96 | /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ | ||
97 | static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, | ||
98 | GVecGen2Fn *gvec_fn, int vece) | ||
99 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar { | ||
100 | } FPScalar; | ||
101 | |||
102 | static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
103 | - const FPScalar *f, | ||
104 | + const FPScalar *f, int mergereg, | ||
105 | ARMFPStatusFlavour fpsttype) | ||
106 | { | ||
107 | switch (a->esz) { | ||
108 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
109 | TCGv_i64 t0 = read_fp_dreg(s, a->rn); | ||
110 | TCGv_i64 t1 = read_fp_dreg(s, a->rm); | ||
111 | f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
112 | - write_fp_dreg(s, a->rd, t0); | ||
113 | + write_fp_dreg_merging(s, a->rd, mergereg, t0); | ||
114 | } | ||
115 | break; | ||
116 | case MO_32: | ||
117 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
118 | TCGv_i32 t0 = read_fp_sreg(s, a->rn); | ||
119 | TCGv_i32 t1 = read_fp_sreg(s, a->rm); | ||
120 | f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
121 | - write_fp_sreg(s, a->rd, t0); | ||
122 | + write_fp_sreg_merging(s, a->rd, mergereg, t0); | ||
123 | } | ||
124 | break; | ||
125 | case MO_16: | ||
126 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
127 | TCGv_i32 t0 = read_fp_hreg(s, a->rn); | ||
128 | TCGv_i32 t1 = read_fp_hreg(s, a->rm); | ||
129 | f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
130 | - write_fp_sreg(s, a->rd, t0); | ||
131 | + write_fp_hreg_merging(s, a->rd, mergereg, t0); | ||
132 | } | ||
133 | break; | ||
134 | default: | ||
135 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
136 | return true; | ||
137 | } | ||
138 | |||
139 | -static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
140 | +static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
141 | + int mergereg) | ||
142 | { | ||
143 | - return do_fp3_scalar_with_fpsttype(s, a, f, | ||
144 | + return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, | ||
145 | a->esz == MO_16 ? | ||
146 | FPST_A64_F16 : FPST_A64); | ||
147 | } | ||
148 | |||
149 | -static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
150 | +static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
151 | + int mergereg) | ||
152 | { | ||
153 | - return do_fp3_scalar_with_fpsttype(s, a, f, select_ah_fpst(s, a->esz)); | ||
154 | + return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, | ||
155 | + select_ah_fpst(s, a->esz)); | ||
156 | } | ||
157 | |||
158 | static const FPScalar f_scalar_fadd = { | ||
159 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fadd = { | ||
160 | gen_helper_vfp_adds, | ||
161 | gen_helper_vfp_addd, | ||
162 | }; | ||
163 | -TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd) | ||
164 | +TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn) | ||
165 | |||
166 | static const FPScalar f_scalar_fsub = { | ||
167 | gen_helper_vfp_subh, | ||
168 | gen_helper_vfp_subs, | ||
169 | gen_helper_vfp_subd, | ||
170 | }; | ||
171 | -TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub) | ||
172 | +TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn) | ||
173 | |||
174 | static const FPScalar f_scalar_fdiv = { | ||
175 | gen_helper_vfp_divh, | ||
176 | gen_helper_vfp_divs, | ||
177 | gen_helper_vfp_divd, | ||
178 | }; | ||
179 | -TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv) | ||
180 | +TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn) | ||
181 | |||
182 | static const FPScalar f_scalar_fmul = { | ||
183 | gen_helper_vfp_mulh, | ||
184 | gen_helper_vfp_muls, | ||
185 | gen_helper_vfp_muld, | ||
186 | }; | ||
187 | -TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul) | ||
188 | +TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn) | ||
189 | |||
190 | static const FPScalar f_scalar_fmax = { | ||
191 | gen_helper_vfp_maxh, | ||
192 | gen_helper_vfp_maxs, | ||
193 | gen_helper_vfp_maxd, | ||
194 | }; | ||
195 | -TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax) | ||
196 | +TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax, a->rn) | ||
197 | |||
198 | static const FPScalar f_scalar_fmin = { | ||
199 | gen_helper_vfp_minh, | ||
200 | gen_helper_vfp_mins, | ||
201 | gen_helper_vfp_mind, | ||
202 | }; | ||
203 | -TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin) | ||
204 | +TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin, a->rn) | ||
205 | |||
206 | static const FPScalar f_scalar_fmaxnm = { | ||
207 | gen_helper_vfp_maxnumh, | ||
208 | gen_helper_vfp_maxnums, | ||
209 | gen_helper_vfp_maxnumd, | ||
210 | }; | ||
211 | -TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm) | ||
212 | +TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn) | ||
213 | |||
214 | static const FPScalar f_scalar_fminnm = { | ||
215 | gen_helper_vfp_minnumh, | ||
216 | gen_helper_vfp_minnums, | ||
217 | gen_helper_vfp_minnumd, | ||
218 | }; | ||
219 | -TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm) | ||
220 | +TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn) | ||
221 | |||
222 | static const FPScalar f_scalar_fmulx = { | ||
223 | gen_helper_advsimd_mulxh, | ||
224 | gen_helper_vfp_mulxs, | ||
225 | gen_helper_vfp_mulxd, | ||
226 | }; | ||
227 | -TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx) | ||
228 | +TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn) | ||
229 | |||
230 | static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
231 | { | ||
232 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fnmul = { | ||
233 | gen_fnmul_s, | ||
234 | gen_fnmul_d, | ||
235 | }; | ||
236 | -TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul) | ||
237 | +TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul, a->rn) | ||
238 | |||
239 | static const FPScalar f_scalar_fcmeq = { | ||
240 | gen_helper_advsimd_ceq_f16, | ||
241 | gen_helper_neon_ceq_f32, | ||
242 | gen_helper_neon_ceq_f64, | ||
243 | }; | ||
244 | -TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq) | ||
245 | +TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm) | ||
246 | |||
247 | static const FPScalar f_scalar_fcmge = { | ||
248 | gen_helper_advsimd_cge_f16, | ||
249 | gen_helper_neon_cge_f32, | ||
250 | gen_helper_neon_cge_f64, | ||
251 | }; | ||
252 | -TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge) | ||
253 | +TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm) | ||
254 | |||
255 | static const FPScalar f_scalar_fcmgt = { | ||
256 | gen_helper_advsimd_cgt_f16, | ||
257 | gen_helper_neon_cgt_f32, | ||
258 | gen_helper_neon_cgt_f64, | ||
259 | }; | ||
260 | -TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt) | ||
261 | +TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm) | ||
262 | |||
263 | static const FPScalar f_scalar_facge = { | ||
264 | gen_helper_advsimd_acge_f16, | ||
265 | gen_helper_neon_acge_f32, | ||
266 | gen_helper_neon_acge_f64, | ||
267 | }; | ||
268 | -TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge) | ||
269 | +TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm) | ||
270 | |||
271 | static const FPScalar f_scalar_facgt = { | ||
272 | gen_helper_advsimd_acgt_f16, | ||
273 | gen_helper_neon_acgt_f32, | ||
274 | gen_helper_neon_acgt_f64, | ||
275 | }; | ||
276 | -TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt) | ||
277 | +TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm) | ||
278 | |||
279 | static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
280 | { | ||
281 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fabd = { | ||
282 | gen_fabd_s, | ||
283 | gen_fabd_d, | ||
284 | }; | ||
285 | -TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd) | ||
286 | +TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd, a->rn) | ||
287 | |||
288 | static const FPScalar f_scalar_frecps = { | ||
289 | gen_helper_recpsf_f16, | ||
290 | gen_helper_recpsf_f32, | ||
291 | gen_helper_recpsf_f64, | ||
292 | }; | ||
293 | -TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps) | ||
294 | +TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps, a->rn) | ||
295 | |||
296 | static const FPScalar f_scalar_frsqrts = { | ||
297 | gen_helper_rsqrtsf_f16, | ||
298 | gen_helper_rsqrtsf_f32, | ||
299 | gen_helper_rsqrtsf_f64, | ||
300 | }; | ||
301 | -TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts) | ||
302 | +TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts, a->rn) | ||
303 | |||
304 | static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, | ||
305 | const FPScalar *f, bool swap) | ||
306 | -- | ||
307 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Handle FPCR.NEP for the 3-input scalar operations which use | ||
2 | do_fmla_scalar_idx() and do_fmadd(), by making them call the | ||
3 | appropriate write_fp_*reg_merging() functions. | ||
4 | 1 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/translate-a64.c | 12 ++++++------ | ||
9 | 1 file changed, 6 insertions(+), 6 deletions(-) | ||
10 | |||
11 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/target/arm/tcg/translate-a64.c | ||
14 | +++ b/target/arm/tcg/translate-a64.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
16 | gen_vfp_negd(t1, t1); | ||
17 | } | ||
18 | gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | ||
19 | - write_fp_dreg(s, a->rd, t0); | ||
20 | + write_fp_dreg_merging(s, a->rd, a->rd, t0); | ||
21 | } | ||
22 | break; | ||
23 | case MO_32: | ||
24 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
25 | gen_vfp_negs(t1, t1); | ||
26 | } | ||
27 | gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | ||
28 | - write_fp_sreg(s, a->rd, t0); | ||
29 | + write_fp_sreg_merging(s, a->rd, a->rd, t0); | ||
30 | } | ||
31 | break; | ||
32 | case MO_16: | ||
33 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
34 | } | ||
35 | gen_helper_advsimd_muladdh(t0, t1, t2, t0, | ||
36 | fpstatus_ptr(FPST_A64_F16)); | ||
37 | - write_fp_sreg(s, a->rd, t0); | ||
38 | + write_fp_hreg_merging(s, a->rd, a->rd, t0); | ||
39 | } | ||
40 | break; | ||
41 | default: | ||
42 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
43 | } | ||
44 | fpst = fpstatus_ptr(FPST_A64); | ||
45 | gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); | ||
46 | - write_fp_dreg(s, a->rd, ta); | ||
47 | + write_fp_dreg_merging(s, a->rd, a->ra, ta); | ||
48 | } | ||
49 | break; | ||
50 | |||
51 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
52 | } | ||
53 | fpst = fpstatus_ptr(FPST_A64); | ||
54 | gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); | ||
55 | - write_fp_sreg(s, a->rd, ta); | ||
56 | + write_fp_sreg_merging(s, a->rd, a->ra, ta); | ||
57 | } | ||
58 | break; | ||
59 | |||
60 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
61 | } | ||
62 | fpst = fpstatus_ptr(FPST_A64_F16); | ||
63 | gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); | ||
64 | - write_fp_sreg(s, a->rd, ta); | ||
65 | + write_fp_hreg_merging(s, a->rd, a->ra, ta); | ||
66 | } | ||
67 | break; | ||
68 | |||
69 | -- | ||
70 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Currently we implement BFCVT scalar via do_fp1_scalar(). This works | ||
2 | even though BFCVT is a narrowing operation from 32 to 16 bits, | ||
3 | because we can use write_fp_sreg() for float16. However, FPCR.NEP | ||
4 | support requires that we use write_fp_hreg_merging() for float16 | ||
5 | outputs, so we can't continue to borrow the non-narrowing | ||
6 | do_fp1_scalar() function for this. Split out trans_BFCVT_s() | ||
7 | into its own implementation that honours FPCR.NEP. | ||
8 | 1 | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | --- | ||
12 | target/arm/tcg/translate-a64.c | 25 +++++++++++++++++++++---- | ||
13 | 1 file changed, 21 insertions(+), 4 deletions(-) | ||
14 | |||
15 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/arm/tcg/translate-a64.c | ||
18 | +++ b/target/arm/tcg/translate-a64.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frintx = { | ||
20 | }; | ||
21 | TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) | ||
22 | |||
23 | -static const FPScalar1 f_scalar_bfcvt = { | ||
24 | - .gen_s = gen_helper_bfcvt, | ||
25 | -}; | ||
26 | -TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar_ah, a, &f_scalar_bfcvt, -1) | ||
27 | +static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a) | ||
28 | +{ | ||
29 | + ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64; | ||
30 | + TCGv_i32 t32; | ||
31 | + int check; | ||
32 | + | ||
33 | + if (!dc_isar_feature(aa64_bf16, s)) { | ||
34 | + return false; | ||
35 | + } | ||
36 | + | ||
37 | + check = fp_access_check_scalar_hsd(s, a->esz); | ||
38 | + | ||
39 | + if (check <= 0) { | ||
40 | + return check == 0; | ||
41 | + } | ||
42 | + | ||
43 | + t32 = read_fp_sreg(s, a->rn); | ||
44 | + gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype)); | ||
45 | + write_fp_hreg_merging(s, a->rd, a->rd, t32); | ||
46 | + return true; | ||
47 | +} | ||
48 | |||
49 | static const FPScalar1 f_scalar_frint32 = { | ||
50 | NULL, | ||
51 | -- | ||
52 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Handle FPCR.NEP for the 1-input scalar operations. | ||
2 | 1 | ||
3 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | target/arm/tcg/translate-a64.c | 26 ++++++++++++++------------ | ||
7 | 1 file changed, 14 insertions(+), 12 deletions(-) | ||
8 | |||
9 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/target/arm/tcg/translate-a64.c | ||
12 | +++ b/target/arm/tcg/translate-a64.c | ||
13 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a, | ||
14 | case MO_64: | ||
15 | t64 = read_fp_dreg(s, a->rn); | ||
16 | f->gen_d(t64, t64, fpst); | ||
17 | - write_fp_dreg(s, a->rd, t64); | ||
18 | + write_fp_dreg_merging(s, a->rd, a->rd, t64); | ||
19 | break; | ||
20 | case MO_32: | ||
21 | t32 = read_fp_sreg(s, a->rn); | ||
22 | f->gen_s(t32, t32, fpst); | ||
23 | - write_fp_sreg(s, a->rd, t32); | ||
24 | + write_fp_sreg_merging(s, a->rd, a->rd, t32); | ||
25 | break; | ||
26 | case MO_16: | ||
27 | t32 = read_fp_hreg(s, a->rn); | ||
28 | f->gen_h(t32, t32, fpst); | ||
29 | - write_fp_sreg(s, a->rd, t32); | ||
30 | + write_fp_hreg_merging(s, a->rd, a->rd, t32); | ||
31 | break; | ||
32 | default: | ||
33 | g_assert_not_reached(); | ||
34 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) | ||
35 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
36 | |||
37 | gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst); | ||
38 | - write_fp_dreg(s, a->rd, tcg_rd); | ||
39 | + write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); | ||
40 | } | ||
41 | return true; | ||
42 | } | ||
43 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a) | ||
44 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
45 | |||
46 | gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); | ||
47 | - /* write_fp_sreg is OK here because top half of result is zero */ | ||
48 | - write_fp_sreg(s, a->rd, tmp); | ||
49 | + /* write_fp_hreg_merging is OK here because top half of result is zero */ | ||
50 | + write_fp_hreg_merging(s, a->rd, a->rd, tmp); | ||
51 | } | ||
52 | return true; | ||
53 | } | ||
54 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a) | ||
55 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
56 | |||
57 | gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst); | ||
58 | - write_fp_sreg(s, a->rd, tcg_rd); | ||
59 | + write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); | ||
60 | } | ||
61 | return true; | ||
62 | } | ||
63 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a) | ||
64 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
65 | |||
66 | gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); | ||
67 | - /* write_fp_sreg is OK here because top half of tcg_rd is zero */ | ||
68 | - write_fp_sreg(s, a->rd, tcg_rd); | ||
69 | + /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */ | ||
70 | + write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd); | ||
71 | } | ||
72 | return true; | ||
73 | } | ||
74 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a) | ||
75 | TCGv_i32 tcg_ahp = get_ahp_flag(); | ||
76 | |||
77 | gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); | ||
78 | - write_fp_sreg(s, a->rd, tcg_rd); | ||
79 | + write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); | ||
80 | } | ||
81 | return true; | ||
82 | } | ||
83 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a) | ||
84 | TCGv_i32 tcg_ahp = get_ahp_flag(); | ||
85 | |||
86 | gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); | ||
87 | - write_fp_dreg(s, a->rd, tcg_rd); | ||
88 | + write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); | ||
89 | } | ||
90 | return true; | ||
91 | } | ||
92 | @@ -XXX,XX +XXX,XX @@ static bool do_fcvt_f(DisasContext *s, arg_fcvt *a, | ||
93 | do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0), | ||
94 | a->esz, tcg_int, a->shift, a->rn, rmode); | ||
95 | |||
96 | - clear_vec(s, a->rd); | ||
97 | + if (!s->fpcr_nep) { | ||
98 | + clear_vec(s, a->rd); | ||
99 | + } | ||
100 | write_vec_element(s, tcg_int, a->rd, 0, a->esz); | ||
101 | return true; | ||
102 | } | ||
103 | -- | ||
104 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Handle FPCR.NEP in the operations handled by do_cvtf_scalar(). | ||
2 | 1 | ||
3 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | target/arm/tcg/translate-a64.c | 6 +++--- | ||
7 | 1 file changed, 3 insertions(+), 3 deletions(-) | ||
8 | |||
9 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/target/arm/tcg/translate-a64.c | ||
12 | +++ b/target/arm/tcg/translate-a64.c | ||
13 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, | ||
14 | } else { | ||
15 | gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); | ||
16 | } | ||
17 | - write_fp_dreg(s, rd, tcg_double); | ||
18 | + write_fp_dreg_merging(s, rd, rd, tcg_double); | ||
19 | break; | ||
20 | |||
21 | case MO_32: | ||
22 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, | ||
23 | } else { | ||
24 | gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); | ||
25 | } | ||
26 | - write_fp_sreg(s, rd, tcg_single); | ||
27 | + write_fp_sreg_merging(s, rd, rd, tcg_single); | ||
28 | break; | ||
29 | |||
30 | case MO_16: | ||
31 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, | ||
32 | } else { | ||
33 | gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); | ||
34 | } | ||
35 | - write_fp_sreg(s, rd, tcg_single); | ||
36 | + write_fp_hreg_merging(s, rd, rd, tcg_single); | ||
37 | break; | ||
38 | |||
39 | default: | ||
40 | -- | ||
41 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Handle FPCR.NEP merging for scalar FABS and FNEG; this requires | ||
2 | an extra parameter to do_fp1_scalar_int(), since FMOV scalar | ||
3 | does not have the merging behaviour. | ||
4 | 1 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/translate-a64.c | 27 ++++++++++++++++++++------- | ||
9 | 1 file changed, 20 insertions(+), 7 deletions(-) | ||
10 | |||
11 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/target/arm/tcg/translate-a64.c | ||
14 | +++ b/target/arm/tcg/translate-a64.c | ||
15 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar1Int { | ||
16 | } FPScalar1Int; | ||
17 | |||
18 | static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, | ||
19 | - const FPScalar1Int *f) | ||
20 | + const FPScalar1Int *f, | ||
21 | + bool merging) | ||
22 | { | ||
23 | switch (a->esz) { | ||
24 | case MO_64: | ||
25 | if (fp_access_check(s)) { | ||
26 | TCGv_i64 t = read_fp_dreg(s, a->rn); | ||
27 | f->gen_d(t, t); | ||
28 | - write_fp_dreg(s, a->rd, t); | ||
29 | + if (merging) { | ||
30 | + write_fp_dreg_merging(s, a->rd, a->rd, t); | ||
31 | + } else { | ||
32 | + write_fp_dreg(s, a->rd, t); | ||
33 | + } | ||
34 | } | ||
35 | break; | ||
36 | case MO_32: | ||
37 | if (fp_access_check(s)) { | ||
38 | TCGv_i32 t = read_fp_sreg(s, a->rn); | ||
39 | f->gen_s(t, t); | ||
40 | - write_fp_sreg(s, a->rd, t); | ||
41 | + if (merging) { | ||
42 | + write_fp_sreg_merging(s, a->rd, a->rd, t); | ||
43 | + } else { | ||
44 | + write_fp_sreg(s, a->rd, t); | ||
45 | + } | ||
46 | } | ||
47 | break; | ||
48 | case MO_16: | ||
49 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, | ||
50 | if (fp_access_check(s)) { | ||
51 | TCGv_i32 t = read_fp_hreg(s, a->rn); | ||
52 | f->gen_h(t, t); | ||
53 | - write_fp_sreg(s, a->rd, t); | ||
54 | + if (merging) { | ||
55 | + write_fp_hreg_merging(s, a->rd, a->rd, t); | ||
56 | + } else { | ||
57 | + write_fp_sreg(s, a->rd, t); | ||
58 | + } | ||
59 | } | ||
60 | break; | ||
61 | default: | ||
62 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fmov = { | ||
63 | tcg_gen_mov_i32, | ||
64 | tcg_gen_mov_i64, | ||
65 | }; | ||
66 | -TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov) | ||
67 | +TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false) | ||
68 | |||
69 | static const FPScalar1Int f_scalar_fabs = { | ||
70 | gen_vfp_absh, | ||
71 | gen_vfp_abss, | ||
72 | gen_vfp_absd, | ||
73 | }; | ||
74 | -TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs) | ||
75 | +TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs, true) | ||
76 | |||
77 | static const FPScalar1Int f_scalar_fneg = { | ||
78 | gen_vfp_negh, | ||
79 | gen_vfp_negs, | ||
80 | gen_vfp_negd, | ||
81 | }; | ||
82 | -TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg) | ||
83 | +TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg, true) | ||
84 | |||
85 | typedef struct FPScalar1 { | ||
86 | void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); | ||
87 | -- | ||
88 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Unlike the other users of do_2misc_narrow_scalar(), FCVTXN (scalar) | ||
2 | is always double-to-single and must honour FPCR.NEP. Implement this | ||
3 | directly in a trans function rather than using | ||
4 | do_2misc_narrow_scalar(). | ||
5 | 1 | ||
6 | We still need gen_fcvtxn_sd() and the f_scalar_fcvtxn[] array for | ||
7 | the FCVTXN (vector) insn, so we move those down in the file to | ||
8 | where they are used. | ||
9 | |||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | ||
13 | target/arm/tcg/translate-a64.c | 43 ++++++++++++++++++++++------------ | ||
14 | 1 file changed, 28 insertions(+), 15 deletions(-) | ||
15 | |||
16 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/arm/tcg/translate-a64.c | ||
19 | +++ b/target/arm/tcg/translate-a64.c | ||
20 | @@ -XXX,XX +XXX,XX @@ static ArithOneOp * const f_scalar_uqxtn[] = { | ||
21 | }; | ||
22 | TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn) | ||
23 | |||
24 | -static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) | ||
25 | +static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a) | ||
26 | { | ||
27 | - /* | ||
28 | - * 64 bit to 32 bit float conversion | ||
29 | - * with von Neumann rounding (round to odd) | ||
30 | - */ | ||
31 | - TCGv_i32 tmp = tcg_temp_new_i32(); | ||
32 | - gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); | ||
33 | - tcg_gen_extu_i32_i64(d, tmp); | ||
34 | + if (fp_access_check(s)) { | ||
35 | + /* | ||
36 | + * 64 bit to 32 bit float conversion | ||
37 | + * with von Neumann rounding (round to odd) | ||
38 | + */ | ||
39 | + TCGv_i64 src = read_fp_dreg(s, a->rn); | ||
40 | + TCGv_i32 dst = tcg_temp_new_i32(); | ||
41 | + gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64)); | ||
42 | + write_fp_sreg_merging(s, a->rd, a->rd, dst); | ||
43 | + } | ||
44 | + return true; | ||
45 | } | ||
46 | |||
47 | -static ArithOneOp * const f_scalar_fcvtxn[] = { | ||
48 | - NULL, | ||
49 | - NULL, | ||
50 | - gen_fcvtxn_sd, | ||
51 | -}; | ||
52 | -TRANS(FCVTXN_s, do_2misc_narrow_scalar, a, f_scalar_fcvtxn) | ||
53 | - | ||
54 | #undef WRAP_ENV | ||
55 | |||
56 | static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) | ||
57 | @@ -XXX,XX +XXX,XX @@ static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n) | ||
58 | tcg_gen_extu_i32_i64(d, tmp); | ||
59 | } | ||
60 | |||
61 | +static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) | ||
62 | +{ | ||
63 | + /* | ||
64 | + * 64 bit to 32 bit float conversion | ||
65 | + * with von Neumann rounding (round to odd) | ||
66 | + */ | ||
67 | + TCGv_i32 tmp = tcg_temp_new_i32(); | ||
68 | + gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); | ||
69 | + tcg_gen_extu_i32_i64(d, tmp); | ||
70 | +} | ||
71 | + | ||
72 | static ArithOneOp * const f_vector_fcvtn[] = { | ||
73 | NULL, | ||
74 | gen_fcvtn_hs, | ||
75 | gen_fcvtn_sd, | ||
76 | }; | ||
77 | +static ArithOneOp * const f_scalar_fcvtxn[] = { | ||
78 | + NULL, | ||
79 | + NULL, | ||
80 | + gen_fcvtxn_sd, | ||
81 | +}; | ||
82 | TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn) | ||
83 | TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn) | ||
84 | |||
85 | -- | ||
86 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | do_fp3_scalar_idx() is used only for the FMUL and FMULX scalar by | ||
2 | element instructions; these both need to merge the result with the Rn | ||
3 | register when FPCR.NEP is set. | ||
4 | 1 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/translate-a64.c | 6 +++--- | ||
9 | 1 file changed, 3 insertions(+), 3 deletions(-) | ||
10 | |||
11 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/target/arm/tcg/translate-a64.c | ||
14 | +++ b/target/arm/tcg/translate-a64.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) | ||
16 | |||
17 | read_vec_element(s, t1, a->rm, a->idx, MO_64); | ||
18 | f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); | ||
19 | - write_fp_dreg(s, a->rd, t0); | ||
20 | + write_fp_dreg_merging(s, a->rd, a->rn, t0); | ||
21 | } | ||
22 | break; | ||
23 | case MO_32: | ||
24 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) | ||
25 | |||
26 | read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); | ||
27 | f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); | ||
28 | - write_fp_sreg(s, a->rd, t0); | ||
29 | + write_fp_sreg_merging(s, a->rd, a->rn, t0); | ||
30 | } | ||
31 | break; | ||
32 | case MO_16: | ||
33 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) | ||
34 | |||
35 | read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); | ||
36 | f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); | ||
37 | - write_fp_sreg(s, a->rd, t0); | ||
38 | + write_fp_hreg_merging(s, a->rd, a->rn, t0); | ||
39 | } | ||
40 | break; | ||
41 | default: | ||
42 | -- | ||
43 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | When FPCR.AH == 1, floating point FMIN and FMAX have some odd special | ||
2 | cases: | ||
3 | 1 | ||
4 | * comparing two zeroes (even of different sign) or comparing a NaN | ||
5 | with anything always returns the second argument (possibly | ||
6 | squashed to zero) | ||
7 | * denormal outputs are not squashed to zero regardless of FZ or FZ16 | ||
8 | |||
9 | Implement these semantics in new helper functions and select them at | ||
10 | translate time if FPCR.AH is 1 for the scalar FMAX and FMIN insns. | ||
11 | (We will convert the other FMAX and FMIN insns in subsequent | ||
12 | commits.) | ||
13 | |||
14 | Note that FMINNM and FMAXNM are not affected. | ||
15 | |||
16 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
17 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
18 | --- | ||
19 | target/arm/tcg/helper-a64.h | 7 +++++++ | ||
20 | target/arm/tcg/helper-a64.c | 36 ++++++++++++++++++++++++++++++++++ | ||
21 | target/arm/tcg/translate-a64.c | 23 ++++++++++++++++++++-- | ||
22 | 3 files changed, 64 insertions(+), 2 deletions(-) | ||
23 | |||
24 | diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/target/arm/tcg/helper-a64.h | ||
27 | +++ b/target/arm/tcg/helper-a64.h | ||
28 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(advsimd_muladd2h, i32, i32, i32, i32, fpst) | ||
29 | DEF_HELPER_2(advsimd_rinth_exact, f16, f16, fpst) | ||
30 | DEF_HELPER_2(advsimd_rinth, f16, f16, fpst) | ||
31 | |||
32 | +DEF_HELPER_3(vfp_ah_minh, f16, f16, f16, fpst) | ||
33 | +DEF_HELPER_3(vfp_ah_mins, f32, f32, f32, fpst) | ||
34 | +DEF_HELPER_3(vfp_ah_mind, f64, f64, f64, fpst) | ||
35 | +DEF_HELPER_3(vfp_ah_maxh, f16, f16, f16, fpst) | ||
36 | +DEF_HELPER_3(vfp_ah_maxs, f32, f32, f32, fpst) | ||
37 | +DEF_HELPER_3(vfp_ah_maxd, f64, f64, f64, fpst) | ||
38 | + | ||
39 | DEF_HELPER_2(exception_return, void, env, i64) | ||
40 | DEF_HELPER_FLAGS_2(dc_zva, TCG_CALL_NO_WG, void, env, i64) | ||
41 | |||
42 | diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/arm/tcg/helper-a64.c | ||
45 | +++ b/target/arm/tcg/helper-a64.c | ||
46 | @@ -XXX,XX +XXX,XX @@ float32 HELPER(fcvtx_f64_to_f32)(float64 a, float_status *fpst) | ||
47 | return r; | ||
48 | } | ||
49 | |||
50 | +/* | ||
51 | + * AH=1 min/max have some odd special cases: | ||
52 | + * comparing two zeroes (regardless of sign), (NaN, anything), | ||
53 | + * or (anything, NaN) should return the second argument (possibly | ||
54 | + * squashed to zero). | ||
55 | + * Also, denormal outputs are not squashed to zero regardless of FZ or FZ16. | ||
56 | + */ | ||
57 | +#define AH_MINMAX_HELPER(NAME, CTYPE, FLOATTYPE, MINMAX) \ | ||
58 | + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ | ||
59 | + { \ | ||
60 | + bool save; \ | ||
61 | + CTYPE r; \ | ||
62 | + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ | ||
63 | + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ | ||
64 | + if (FLOATTYPE ## _is_zero(a) && FLOATTYPE ## _is_zero(b)) { \ | ||
65 | + return b; \ | ||
66 | + } \ | ||
67 | + if (FLOATTYPE ## _is_any_nan(a) || \ | ||
68 | + FLOATTYPE ## _is_any_nan(b)) { \ | ||
69 | + float_raise(float_flag_invalid, fpst); \ | ||
70 | + return b; \ | ||
71 | + } \ | ||
72 | + save = get_flush_to_zero(fpst); \ | ||
73 | + set_flush_to_zero(false, fpst); \ | ||
74 | + r = FLOATTYPE ## _ ## MINMAX(a, b, fpst); \ | ||
75 | + set_flush_to_zero(save, fpst); \ | ||
76 | + return r; \ | ||
77 | + } | ||
78 | + | ||
79 | +AH_MINMAX_HELPER(vfp_ah_minh, dh_ctype_f16, float16, min) | ||
80 | +AH_MINMAX_HELPER(vfp_ah_mins, float32, float32, min) | ||
81 | +AH_MINMAX_HELPER(vfp_ah_mind, float64, float64, min) | ||
82 | +AH_MINMAX_HELPER(vfp_ah_maxh, dh_ctype_f16, float16, max) | ||
83 | +AH_MINMAX_HELPER(vfp_ah_maxs, float32, float32, max) | ||
84 | +AH_MINMAX_HELPER(vfp_ah_maxd, float64, float64, max) | ||
85 | + | ||
86 | /* 64-bit versions of the CRC helpers. Note that although the operation | ||
87 | * (and the prototypes of crc32c() and crc32() mean that only the bottom | ||
88 | * 32 bits of the accumulator and result are used, we pass and return | ||
89 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
90 | index XXXXXXX..XXXXXXX 100644 | ||
91 | --- a/target/arm/tcg/translate-a64.c | ||
92 | +++ b/target/arm/tcg/translate-a64.c | ||
93 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
94 | select_ah_fpst(s, a->esz)); | ||
95 | } | ||
96 | |||
97 | +/* Some insns need to call different helpers when FPCR.AH == 1 */ | ||
98 | +static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a, | ||
99 | + const FPScalar *fnormal, | ||
100 | + const FPScalar *fah, | ||
101 | + int mergereg) | ||
102 | +{ | ||
103 | + return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg); | ||
104 | +} | ||
105 | + | ||
106 | static const FPScalar f_scalar_fadd = { | ||
107 | gen_helper_vfp_addh, | ||
108 | gen_helper_vfp_adds, | ||
109 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fmax = { | ||
110 | gen_helper_vfp_maxs, | ||
111 | gen_helper_vfp_maxd, | ||
112 | }; | ||
113 | -TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax, a->rn) | ||
114 | +static const FPScalar f_scalar_fmax_ah = { | ||
115 | + gen_helper_vfp_ah_maxh, | ||
116 | + gen_helper_vfp_ah_maxs, | ||
117 | + gen_helper_vfp_ah_maxd, | ||
118 | +}; | ||
119 | +TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn) | ||
120 | |||
121 | static const FPScalar f_scalar_fmin = { | ||
122 | gen_helper_vfp_minh, | ||
123 | gen_helper_vfp_mins, | ||
124 | gen_helper_vfp_mind, | ||
125 | }; | ||
126 | -TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin, a->rn) | ||
127 | +static const FPScalar f_scalar_fmin_ah = { | ||
128 | + gen_helper_vfp_ah_minh, | ||
129 | + gen_helper_vfp_ah_mins, | ||
130 | + gen_helper_vfp_ah_mind, | ||
131 | +}; | ||
132 | +TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn) | ||
133 | |||
134 | static const FPScalar f_scalar_fmaxnm = { | ||
135 | gen_helper_vfp_maxnumh, | ||
136 | -- | ||
137 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Implement the FPCR.AH == 1 semantics for vector FMIN/FMAX, by | ||
2 | creating new _ah_ versions of the gvec helpers which invoke the | ||
3 | scalar fmin_ah and fmax_ah helpers on each element. | ||
4 | 1 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
9 | target/arm/tcg/translate-a64.c | 21 +++++++++++++++++++-- | ||
10 | target/arm/tcg/vec_helper.c | 8 ++++++++ | ||
11 | 3 files changed, 41 insertions(+), 2 deletions(-) | ||
12 | |||
13 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/arm/tcg/helper-sve.h | ||
16 | +++ b/target/arm/tcg/helper-sve.h | ||
17 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_rsqrts_s, TCG_CALL_NO_RWG, | ||
18 | DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG, | ||
19 | void, ptr, ptr, ptr, fpst, i32) | ||
20 | |||
21 | +DEF_HELPER_FLAGS_5(gvec_ah_fmax_h, TCG_CALL_NO_RWG, | ||
22 | + void, ptr, ptr, ptr, fpst, i32) | ||
23 | +DEF_HELPER_FLAGS_5(gvec_ah_fmax_s, TCG_CALL_NO_RWG, | ||
24 | + void, ptr, ptr, ptr, fpst, i32) | ||
25 | +DEF_HELPER_FLAGS_5(gvec_ah_fmax_d, TCG_CALL_NO_RWG, | ||
26 | + void, ptr, ptr, ptr, fpst, i32) | ||
27 | + | ||
28 | +DEF_HELPER_FLAGS_5(gvec_ah_fmin_h, TCG_CALL_NO_RWG, | ||
29 | + void, ptr, ptr, ptr, fpst, i32) | ||
30 | +DEF_HELPER_FLAGS_5(gvec_ah_fmin_s, TCG_CALL_NO_RWG, | ||
31 | + void, ptr, ptr, ptr, fpst, i32) | ||
32 | +DEF_HELPER_FLAGS_5(gvec_ah_fmin_d, TCG_CALL_NO_RWG, | ||
33 | + void, ptr, ptr, ptr, fpst, i32) | ||
34 | + | ||
35 | DEF_HELPER_FLAGS_4(sve_faddv_h, TCG_CALL_NO_RWG, | ||
36 | i64, ptr, ptr, fpst, i32) | ||
37 | DEF_HELPER_FLAGS_4(sve_faddv_s, TCG_CALL_NO_RWG, | ||
38 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/target/arm/tcg/translate-a64.c | ||
41 | +++ b/target/arm/tcg/translate-a64.c | ||
42 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | ||
43 | FPST_A64_F16 : FPST_A64); | ||
44 | } | ||
45 | |||
46 | +static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, | ||
47 | + gen_helper_gvec_3_ptr * const fnormal[3], | ||
48 | + gen_helper_gvec_3_ptr * const fah[3]) | ||
49 | +{ | ||
50 | + return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal); | ||
51 | +} | ||
52 | + | ||
53 | static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data, | ||
54 | gen_helper_gvec_3_ptr * const f[3]) | ||
55 | { | ||
56 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { | ||
57 | gen_helper_gvec_fmax_s, | ||
58 | gen_helper_gvec_fmax_d, | ||
59 | }; | ||
60 | -TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax) | ||
61 | +static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = { | ||
62 | + gen_helper_gvec_ah_fmax_h, | ||
63 | + gen_helper_gvec_ah_fmax_s, | ||
64 | + gen_helper_gvec_ah_fmax_d, | ||
65 | +}; | ||
66 | +TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah) | ||
67 | |||
68 | static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { | ||
69 | gen_helper_gvec_fmin_h, | ||
70 | gen_helper_gvec_fmin_s, | ||
71 | gen_helper_gvec_fmin_d, | ||
72 | }; | ||
73 | -TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin) | ||
74 | +static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = { | ||
75 | + gen_helper_gvec_ah_fmin_h, | ||
76 | + gen_helper_gvec_ah_fmin_s, | ||
77 | + gen_helper_gvec_ah_fmin_d, | ||
78 | +}; | ||
79 | +TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah) | ||
80 | |||
81 | static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { | ||
82 | gen_helper_gvec_fmaxnum_h, | ||
83 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
84 | index XXXXXXX..XXXXXXX 100644 | ||
85 | --- a/target/arm/tcg/vec_helper.c | ||
86 | +++ b/target/arm/tcg/vec_helper.c | ||
87 | @@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16) | ||
88 | DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32) | ||
89 | DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64) | ||
90 | |||
91 | +DO_3OP(gvec_ah_fmax_h, helper_vfp_ah_maxh, float16) | ||
92 | +DO_3OP(gvec_ah_fmax_s, helper_vfp_ah_maxs, float32) | ||
93 | +DO_3OP(gvec_ah_fmax_d, helper_vfp_ah_maxd, float64) | ||
94 | + | ||
95 | +DO_3OP(gvec_ah_fmin_h, helper_vfp_ah_minh, float16) | ||
96 | +DO_3OP(gvec_ah_fmin_s, helper_vfp_ah_mins, float32) | ||
97 | +DO_3OP(gvec_ah_fmin_d, helper_vfp_ah_mind, float64) | ||
98 | + | ||
99 | #endif | ||
100 | #undef DO_3OP | ||
101 | |||
102 | -- | ||
103 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for FMAXV and FMINV. These are the | ||
2 | "recursively reduce all lanes of a vector to a scalar result" insns; | ||
3 | we just need to use the _ah_ helper for the reduction step when | ||
4 | FPCR.AH == 1. | ||
5 | 1 | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | --- | ||
9 | target/arm/tcg/translate-a64.c | 28 ++++++++++++++++++---------- | ||
10 | 1 file changed, 18 insertions(+), 10 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/translate-a64.c | ||
15 | +++ b/target/arm/tcg/translate-a64.c | ||
16 | @@ -XXX,XX +XXX,XX @@ static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz, | ||
17 | } | ||
18 | |||
19 | static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, | ||
20 | - NeonGenTwoSingleOpFn *fn) | ||
21 | + NeonGenTwoSingleOpFn *fnormal, | ||
22 | + NeonGenTwoSingleOpFn *fah) | ||
23 | { | ||
24 | if (fp_access_check(s)) { | ||
25 | MemOp esz = a->esz; | ||
26 | int elts = (a->q ? 16 : 8) >> esz; | ||
27 | TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
28 | - TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn); | ||
29 | + TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, | ||
30 | + s->fpcr_ah ? fah : fnormal); | ||
31 | write_fp_sreg(s, a->rd, res); | ||
32 | } | ||
33 | return true; | ||
34 | } | ||
35 | |||
36 | -TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxnumh) | ||
37 | -TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minnumh) | ||
38 | -TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxh) | ||
39 | -TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minh) | ||
40 | +TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, | ||
41 | + gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh) | ||
42 | +TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, | ||
43 | + gen_helper_vfp_minnumh, gen_helper_vfp_minnumh) | ||
44 | +TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, | ||
45 | + gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh) | ||
46 | +TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, | ||
47 | + gen_helper_vfp_minh, gen_helper_vfp_ah_minh) | ||
48 | |||
49 | -TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums) | ||
50 | -TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums) | ||
51 | -TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs) | ||
52 | -TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins) | ||
53 | +TRANS(FMAXNMV_s, do_fp_reduction, a, | ||
54 | + gen_helper_vfp_maxnums, gen_helper_vfp_maxnums) | ||
55 | +TRANS(FMINNMV_s, do_fp_reduction, a, | ||
56 | + gen_helper_vfp_minnums, gen_helper_vfp_minnums) | ||
57 | +TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs) | ||
58 | +TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins) | ||
59 | |||
60 | /* | ||
61 | * Floating-point Immediate | ||
62 | -- | ||
63 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the pairwise floating | ||
2 | point minimum/maximum insns FMINP and FMAXP. | ||
3 | 1 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
8 | target/arm/tcg/translate-a64.c | 25 +++++++++++++++++++++---- | ||
9 | target/arm/tcg/vec_helper.c | 10 ++++++++++ | ||
10 | 3 files changed, 45 insertions(+), 4 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_ah_fmin_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_5(gvec_ah_fmin_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_5(gvec_ah_fminp_h, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_5(gvec_ah_fminp_s, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_5(gvec_ah_fminp_d, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_4(sve_faddv_h, TCG_CALL_NO_RWG, | ||
35 | i64, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_4(sve_faddv_s, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/translate-a64.c | ||
40 | +++ b/target/arm/tcg/translate-a64.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { | ||
42 | gen_helper_gvec_fmaxp_s, | ||
43 | gen_helper_gvec_fmaxp_d, | ||
44 | }; | ||
45 | -TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp) | ||
46 | +static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = { | ||
47 | + gen_helper_gvec_ah_fmaxp_h, | ||
48 | + gen_helper_gvec_ah_fmaxp_s, | ||
49 | + gen_helper_gvec_ah_fmaxp_d, | ||
50 | +}; | ||
51 | +TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp) | ||
52 | |||
53 | static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { | ||
54 | gen_helper_gvec_fminp_h, | ||
55 | gen_helper_gvec_fminp_s, | ||
56 | gen_helper_gvec_fminp_d, | ||
57 | }; | ||
58 | -TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp) | ||
59 | +static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = { | ||
60 | + gen_helper_gvec_ah_fminp_h, | ||
61 | + gen_helper_gvec_ah_fminp_s, | ||
62 | + gen_helper_gvec_ah_fminp_d, | ||
63 | +}; | ||
64 | +TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp) | ||
65 | |||
66 | static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { | ||
67 | gen_helper_gvec_fmaxnump_h, | ||
68 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) | ||
69 | return true; | ||
70 | } | ||
71 | |||
72 | +static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a, | ||
73 | + const FPScalar *fnormal, | ||
74 | + const FPScalar *fah) | ||
75 | +{ | ||
76 | + return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal); | ||
77 | +} | ||
78 | + | ||
79 | TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) | ||
80 | -TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax) | ||
81 | -TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin) | ||
82 | +TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah) | ||
83 | +TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah) | ||
84 | TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) | ||
85 | TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) | ||
86 | |||
87 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
88 | index XXXXXXX..XXXXXXX 100644 | ||
89 | --- a/target/arm/tcg/vec_helper.c | ||
90 | +++ b/target/arm/tcg/vec_helper.c | ||
91 | @@ -XXX,XX +XXX,XX @@ DO_3OP_PAIR(gvec_fminnump_h, float16_minnum, float16, H2) | ||
92 | DO_3OP_PAIR(gvec_fminnump_s, float32_minnum, float32, H4) | ||
93 | DO_3OP_PAIR(gvec_fminnump_d, float64_minnum, float64, ) | ||
94 | |||
95 | +#ifdef TARGET_AARCH64 | ||
96 | +DO_3OP_PAIR(gvec_ah_fmaxp_h, helper_vfp_ah_maxh, float16, H2) | ||
97 | +DO_3OP_PAIR(gvec_ah_fmaxp_s, helper_vfp_ah_maxs, float32, H4) | ||
98 | +DO_3OP_PAIR(gvec_ah_fmaxp_d, helper_vfp_ah_maxd, float64, ) | ||
99 | + | ||
100 | +DO_3OP_PAIR(gvec_ah_fminp_h, helper_vfp_ah_minh, float16, H2) | ||
101 | +DO_3OP_PAIR(gvec_ah_fminp_s, helper_vfp_ah_mins, float32, H4) | ||
102 | +DO_3OP_PAIR(gvec_ah_fminp_d, helper_vfp_ah_mind, float64, ) | ||
103 | +#endif | ||
104 | + | ||
105 | #undef DO_3OP_PAIR | ||
106 | |||
107 | #define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \ | ||
108 | -- | ||
109 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the SVE FMAXV and FMINV | ||
2 | vector-reduction-to-scalar max/min operations. | ||
3 | 1 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 +++++++++++ | ||
8 | target/arm/tcg/sve_helper.c | 43 +++++++++++++++++++++------------- | ||
9 | target/arm/tcg/translate-sve.c | 16 +++++++++++-- | ||
10 | 3 files changed, 55 insertions(+), 18 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fminv_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_4(sve_fminv_d, TCG_CALL_NO_RWG, | ||
18 | i64, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_4(sve_ah_fmaxv_h, TCG_CALL_NO_RWG, | ||
21 | + i64, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_4(sve_ah_fmaxv_s, TCG_CALL_NO_RWG, | ||
23 | + i64, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_4(sve_ah_fmaxv_d, TCG_CALL_NO_RWG, | ||
25 | + i64, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_4(sve_ah_fminv_h, TCG_CALL_NO_RWG, | ||
28 | + i64, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_4(sve_ah_fminv_s, TCG_CALL_NO_RWG, | ||
30 | + i64, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_4(sve_ah_fminv_d, TCG_CALL_NO_RWG, | ||
32 | + i64, ptr, ptr, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_5(sve_fadda_h, TCG_CALL_NO_RWG, | ||
35 | i64, i64, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_5(sve_fadda_s, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/sve_helper.c | ||
40 | +++ b/target/arm/tcg/sve_helper.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \ | ||
42 | uintptr_t half = n / 2; \ | ||
43 | TYPE lo = NAME##_reduce(data, status, half); \ | ||
44 | TYPE hi = NAME##_reduce(data + half, status, half); \ | ||
45 | - return TYPE##_##FUNC(lo, hi, status); \ | ||
46 | + return FUNC(lo, hi, status); \ | ||
47 | } \ | ||
48 | } \ | ||
49 | uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \ | ||
50 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \ | ||
51 | return NAME##_reduce(data, s, maxsz / sizeof(TYPE)); \ | ||
52 | } | ||
53 | |||
54 | -DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero) | ||
55 | -DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero) | ||
56 | -DO_REDUCE(sve_faddv_d, float64, H1_8, add, float64_zero) | ||
57 | +DO_REDUCE(sve_faddv_h, float16, H1_2, float16_add, float16_zero) | ||
58 | +DO_REDUCE(sve_faddv_s, float32, H1_4, float32_add, float32_zero) | ||
59 | +DO_REDUCE(sve_faddv_d, float64, H1_8, float64_add, float64_zero) | ||
60 | |||
61 | /* Identity is floatN_default_nan, without the function call. */ | ||
62 | -DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00) | ||
63 | -DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000) | ||
64 | -DO_REDUCE(sve_fminnmv_d, float64, H1_8, minnum, 0x7FF8000000000000ULL) | ||
65 | +DO_REDUCE(sve_fminnmv_h, float16, H1_2, float16_minnum, 0x7E00) | ||
66 | +DO_REDUCE(sve_fminnmv_s, float32, H1_4, float32_minnum, 0x7FC00000) | ||
67 | +DO_REDUCE(sve_fminnmv_d, float64, H1_8, float64_minnum, 0x7FF8000000000000ULL) | ||
68 | |||
69 | -DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00) | ||
70 | -DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000) | ||
71 | -DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, maxnum, 0x7FF8000000000000ULL) | ||
72 | +DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, float16_maxnum, 0x7E00) | ||
73 | +DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, float32_maxnum, 0x7FC00000) | ||
74 | +DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, float64_maxnum, 0x7FF8000000000000ULL) | ||
75 | |||
76 | -DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity) | ||
77 | -DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity) | ||
78 | -DO_REDUCE(sve_fminv_d, float64, H1_8, min, float64_infinity) | ||
79 | +DO_REDUCE(sve_fminv_h, float16, H1_2, float16_min, float16_infinity) | ||
80 | +DO_REDUCE(sve_fminv_s, float32, H1_4, float32_min, float32_infinity) | ||
81 | +DO_REDUCE(sve_fminv_d, float64, H1_8, float64_min, float64_infinity) | ||
82 | |||
83 | -DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity)) | ||
84 | -DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity)) | ||
85 | -DO_REDUCE(sve_fmaxv_d, float64, H1_8, max, float64_chs(float64_infinity)) | ||
86 | +DO_REDUCE(sve_fmaxv_h, float16, H1_2, float16_max, float16_chs(float16_infinity)) | ||
87 | +DO_REDUCE(sve_fmaxv_s, float32, H1_4, float32_max, float32_chs(float32_infinity)) | ||
88 | +DO_REDUCE(sve_fmaxv_d, float64, H1_8, float64_max, float64_chs(float64_infinity)) | ||
89 | + | ||
90 | +DO_REDUCE(sve_ah_fminv_h, float16, H1_2, helper_vfp_ah_minh, float16_infinity) | ||
91 | +DO_REDUCE(sve_ah_fminv_s, float32, H1_4, helper_vfp_ah_mins, float32_infinity) | ||
92 | +DO_REDUCE(sve_ah_fminv_d, float64, H1_8, helper_vfp_ah_mind, float64_infinity) | ||
93 | + | ||
94 | +DO_REDUCE(sve_ah_fmaxv_h, float16, H1_2, helper_vfp_ah_maxh, | ||
95 | + float16_chs(float16_infinity)) | ||
96 | +DO_REDUCE(sve_ah_fmaxv_s, float32, H1_4, helper_vfp_ah_maxs, | ||
97 | + float32_chs(float32_infinity)) | ||
98 | +DO_REDUCE(sve_ah_fmaxv_d, float64, H1_8, helper_vfp_ah_maxd, | ||
99 | + float64_chs(float64_infinity)) | ||
100 | |||
101 | #undef DO_REDUCE | ||
102 | |||
103 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
104 | index XXXXXXX..XXXXXXX 100644 | ||
105 | --- a/target/arm/tcg/translate-sve.c | ||
106 | +++ b/target/arm/tcg/translate-sve.c | ||
107 | @@ -XXX,XX +XXX,XX @@ static bool do_reduce(DisasContext *s, arg_rpr_esz *a, | ||
108 | }; \ | ||
109 | TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz]) | ||
110 | |||
111 | +#define DO_VPZ_AH(NAME, name) \ | ||
112 | + static gen_helper_fp_reduce * const name##_fns[4] = { \ | ||
113 | + NULL, gen_helper_sve_##name##_h, \ | ||
114 | + gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ | ||
115 | + }; \ | ||
116 | + static gen_helper_fp_reduce * const name##_ah_fns[4] = { \ | ||
117 | + NULL, gen_helper_sve_ah_##name##_h, \ | ||
118 | + gen_helper_sve_ah_##name##_s, gen_helper_sve_ah_##name##_d, \ | ||
119 | + }; \ | ||
120 | + TRANS_FEAT(NAME, aa64_sve, do_reduce, a, \ | ||
121 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) | ||
122 | + | ||
123 | DO_VPZ(FADDV, faddv) | ||
124 | DO_VPZ(FMINNMV, fminnmv) | ||
125 | DO_VPZ(FMAXNMV, fmaxnmv) | ||
126 | -DO_VPZ(FMINV, fminv) | ||
127 | -DO_VPZ(FMAXV, fmaxv) | ||
128 | +DO_VPZ_AH(FMINV, fminv) | ||
129 | +DO_VPZ_AH(FMAXV, fmaxv) | ||
130 | |||
131 | #undef DO_VPZ | ||
132 | |||
133 | -- | ||
134 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the SVE FMAX and FMIN operations | ||
2 | that take an immediate as the second operand. | ||
3 | 1 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 25 +++++++++++++++++++++++-- | ||
10 | 3 files changed, 45 insertions(+), 2 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fmins_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_6(sve_fmins_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, i64, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_6(sve_ah_fmaxs_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_6(sve_ah_fmaxs_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_6(sve_ah_fmaxs_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_6(sve_ah_fmins_h, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_6(sve_ah_fmins_s, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_6(sve_ah_fmins_d, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_5(sve_fcvt_sh, TCG_CALL_NO_RWG, | ||
35 | void, ptr, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_5(sve_fcvt_dh, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/sve_helper.c | ||
40 | +++ b/target/arm/tcg/sve_helper.c | ||
41 | @@ -XXX,XX +XXX,XX @@ DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min) | ||
42 | DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min) | ||
43 | DO_ZPZS_FP(sve_fmins_d, float64, H1_8, float64_min) | ||
44 | |||
45 | +DO_ZPZS_FP(sve_ah_fmaxs_h, float16, H1_2, helper_vfp_ah_maxh) | ||
46 | +DO_ZPZS_FP(sve_ah_fmaxs_s, float32, H1_4, helper_vfp_ah_maxs) | ||
47 | +DO_ZPZS_FP(sve_ah_fmaxs_d, float64, H1_8, helper_vfp_ah_maxd) | ||
48 | + | ||
49 | +DO_ZPZS_FP(sve_ah_fmins_h, float16, H1_2, helper_vfp_ah_minh) | ||
50 | +DO_ZPZS_FP(sve_ah_fmins_s, float32, H1_4, helper_vfp_ah_mins) | ||
51 | +DO_ZPZS_FP(sve_ah_fmins_d, float64, H1_8, helper_vfp_ah_mind) | ||
52 | + | ||
53 | /* Fully general two-operand expander, controlled by a predicate, | ||
54 | * With the extra float_status parameter. | ||
55 | */ | ||
56 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/arm/tcg/translate-sve.c | ||
59 | +++ b/target/arm/tcg/translate-sve.c | ||
60 | @@ -XXX,XX +XXX,XX @@ static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, | ||
61 | TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ | ||
62 | name##_const[a->esz][a->imm], name##_fns[a->esz]) | ||
63 | |||
64 | +#define DO_FP_AH_IMM(NAME, name, const0, const1) \ | ||
65 | + static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ | ||
66 | + NULL, gen_helper_sve_##name##_h, \ | ||
67 | + gen_helper_sve_##name##_s, \ | ||
68 | + gen_helper_sve_##name##_d \ | ||
69 | + }; \ | ||
70 | + static gen_helper_sve_fp2scalar * const name##_ah_fns[4] = { \ | ||
71 | + NULL, gen_helper_sve_ah_##name##_h, \ | ||
72 | + gen_helper_sve_ah_##name##_s, \ | ||
73 | + gen_helper_sve_ah_##name##_d \ | ||
74 | + }; \ | ||
75 | + static uint64_t const name##_const[4][2] = { \ | ||
76 | + { -1, -1 }, \ | ||
77 | + { float16_##const0, float16_##const1 }, \ | ||
78 | + { float32_##const0, float32_##const1 }, \ | ||
79 | + { float64_##const0, float64_##const1 }, \ | ||
80 | + }; \ | ||
81 | + TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ | ||
82 | + name##_const[a->esz][a->imm], \ | ||
83 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) | ||
84 | + | ||
85 | DO_FP_IMM(FADD, fadds, half, one) | ||
86 | DO_FP_IMM(FSUB, fsubs, half, one) | ||
87 | DO_FP_IMM(FMUL, fmuls, half, two) | ||
88 | DO_FP_IMM(FSUBR, fsubrs, half, one) | ||
89 | DO_FP_IMM(FMAXNM, fmaxnms, zero, one) | ||
90 | DO_FP_IMM(FMINNM, fminnms, zero, one) | ||
91 | -DO_FP_IMM(FMAX, fmaxs, zero, one) | ||
92 | -DO_FP_IMM(FMIN, fmins, zero, one) | ||
93 | +DO_FP_AH_IMM(FMAX, fmaxs, zero, one) | ||
94 | +DO_FP_AH_IMM(FMIN, fmins, zero, one) | ||
95 | |||
96 | #undef DO_FP_IMM | ||
97 | |||
98 | -- | ||
99 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the SVE FMAX and FMIN | ||
2 | operations that take two vector operands. | ||
3 | 1 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 17 +++++++++++++++-- | ||
10 | 3 files changed, 37 insertions(+), 2 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fmax_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_6(sve_fmax_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_6(sve_ah_fmin_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_6(sve_ah_fmin_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_6(sve_ah_fmin_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_6(sve_ah_fmax_h, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_6(sve_ah_fmax_s, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_6(sve_ah_fmax_d, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_6(sve_fminnum_h, TCG_CALL_NO_RWG, | ||
35 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_6(sve_fminnum_s, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/sve_helper.c | ||
40 | +++ b/target/arm/tcg/sve_helper.c | ||
41 | @@ -XXX,XX +XXX,XX @@ DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max) | ||
42 | DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max) | ||
43 | DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max) | ||
44 | |||
45 | +DO_ZPZZ_FP(sve_ah_fmin_h, uint16_t, H1_2, helper_vfp_ah_minh) | ||
46 | +DO_ZPZZ_FP(sve_ah_fmin_s, uint32_t, H1_4, helper_vfp_ah_mins) | ||
47 | +DO_ZPZZ_FP(sve_ah_fmin_d, uint64_t, H1_8, helper_vfp_ah_mind) | ||
48 | + | ||
49 | +DO_ZPZZ_FP(sve_ah_fmax_h, uint16_t, H1_2, helper_vfp_ah_maxh) | ||
50 | +DO_ZPZZ_FP(sve_ah_fmax_s, uint32_t, H1_4, helper_vfp_ah_maxs) | ||
51 | +DO_ZPZZ_FP(sve_ah_fmax_d, uint64_t, H1_8, helper_vfp_ah_maxd) | ||
52 | + | ||
53 | DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum) | ||
54 | DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum) | ||
55 | DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum) | ||
56 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/arm/tcg/translate-sve.c | ||
59 | +++ b/target/arm/tcg/translate-sve.c | ||
60 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, | ||
61 | }; \ | ||
62 | TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) | ||
63 | |||
64 | +#define DO_ZPZZ_AH_FP(NAME, FEAT, name, ah_name) \ | ||
65 | + static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ | ||
66 | + NULL, gen_helper_##name##_h, \ | ||
67 | + gen_helper_##name##_s, gen_helper_##name##_d \ | ||
68 | + }; \ | ||
69 | + static gen_helper_gvec_4_ptr * const name##_ah_zpzz_fns[4] = { \ | ||
70 | + NULL, gen_helper_##ah_name##_h, \ | ||
71 | + gen_helper_##ah_name##_s, gen_helper_##ah_name##_d \ | ||
72 | + }; \ | ||
73 | + TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, \ | ||
74 | + s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \ | ||
75 | + name##_zpzz_fns[a->esz], a) | ||
76 | + | ||
77 | DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) | ||
78 | DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) | ||
79 | DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) | ||
80 | -DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin) | ||
81 | -DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax) | ||
82 | +DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) | ||
83 | +DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) | ||
84 | DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) | ||
85 | DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) | ||
86 | DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) | ||
87 | -- | ||
88 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | FPCR.AH == 1 mandates that negation of a NaN value should not flip | ||
2 | its sign bit. This means we can no longer use gen_vfp_neg*() | ||
3 | everywhere but must instead generate slightly more complex code when | ||
4 | FPCR.AH is set. | ||
5 | 1 | ||
6 | Make this change for the scalar FNEG and for those places in | ||
7 | translate-a64.c which were previously directly calling | ||
8 | gen_vfp_neg*(). | ||
9 | |||
10 | This change in semantics also affects any other instruction whose | ||
11 | pseudocode calls FPNeg(); in following commits we extend this | ||
12 | change to the other affected instructions. | ||
13 | |||
14 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
15 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
16 | --- | ||
17 | target/arm/tcg/translate-a64.c | 125 ++++++++++++++++++++++++++++++--- | ||
18 | 1 file changed, 114 insertions(+), 11 deletions(-) | ||
19 | |||
20 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/tcg/translate-a64.c | ||
23 | +++ b/target/arm/tcg/translate-a64.c | ||
24 | @@ -XXX,XX +XXX,XX @@ static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, | ||
25 | is_q ? 16 : 8, vec_full_reg_size(s), data, fn); | ||
26 | } | ||
27 | |||
28 | +/* | ||
29 | + * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN. | ||
30 | + * These functions implement | ||
31 | + * d = floatN_is_any_nan(s) ? s : floatN_chs(s) | ||
32 | + * which for float32 is | ||
33 | + * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31)) | ||
34 | + * and similarly for the other float sizes. | ||
35 | + */ | ||
36 | +static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s) | ||
37 | +{ | ||
38 | + TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); | ||
39 | + | ||
40 | + gen_vfp_negh(chs_s, s); | ||
41 | + gen_vfp_absh(abs_s, s); | ||
42 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, | ||
43 | + abs_s, tcg_constant_i32(0x7c00), | ||
44 | + s, chs_s); | ||
45 | +} | ||
46 | + | ||
47 | +static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s) | ||
48 | +{ | ||
49 | + TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); | ||
50 | + | ||
51 | + gen_vfp_negs(chs_s, s); | ||
52 | + gen_vfp_abss(abs_s, s); | ||
53 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, | ||
54 | + abs_s, tcg_constant_i32(0x7f800000UL), | ||
55 | + s, chs_s); | ||
56 | +} | ||
57 | + | ||
58 | +static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s) | ||
59 | +{ | ||
60 | + TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64(); | ||
61 | + | ||
62 | + gen_vfp_negd(chs_s, s); | ||
63 | + gen_vfp_absd(abs_s, s); | ||
64 | + tcg_gen_movcond_i64(TCG_COND_GTU, d, | ||
65 | + abs_s, tcg_constant_i64(0x7ff0000000000000ULL), | ||
66 | + s, chs_s); | ||
67 | +} | ||
68 | + | ||
69 | +static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) | ||
70 | +{ | ||
71 | + if (dc->fpcr_ah) { | ||
72 | + gen_vfp_ah_negh(d, s); | ||
73 | + } else { | ||
74 | + gen_vfp_negh(d, s); | ||
75 | + } | ||
76 | +} | ||
77 | + | ||
78 | +static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) | ||
79 | +{ | ||
80 | + if (dc->fpcr_ah) { | ||
81 | + gen_vfp_ah_negs(d, s); | ||
82 | + } else { | ||
83 | + gen_vfp_negs(d, s); | ||
84 | + } | ||
85 | +} | ||
86 | + | ||
87 | +static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s) | ||
88 | +{ | ||
89 | + if (dc->fpcr_ah) { | ||
90 | + gen_vfp_ah_negd(d, s); | ||
91 | + } else { | ||
92 | + gen_vfp_negd(d, s); | ||
93 | + } | ||
94 | +} | ||
95 | + | ||
96 | /* Set ZF and NF based on a 64 bit result. This is alas fiddlier | ||
97 | * than the 32 bit equivalent. | ||
98 | */ | ||
99 | @@ -XXX,XX +XXX,XX @@ static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) | ||
100 | gen_vfp_negd(d, d); | ||
101 | } | ||
102 | |||
103 | +static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
104 | +{ | ||
105 | + gen_helper_vfp_mulh(d, n, m, s); | ||
106 | + gen_vfp_ah_negh(d, d); | ||
107 | +} | ||
108 | + | ||
109 | +static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
110 | +{ | ||
111 | + gen_helper_vfp_muls(d, n, m, s); | ||
112 | + gen_vfp_ah_negs(d, d); | ||
113 | +} | ||
114 | + | ||
115 | +static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) | ||
116 | +{ | ||
117 | + gen_helper_vfp_muld(d, n, m, s); | ||
118 | + gen_vfp_ah_negd(d, d); | ||
119 | +} | ||
120 | + | ||
121 | static const FPScalar f_scalar_fnmul = { | ||
122 | gen_fnmul_h, | ||
123 | gen_fnmul_s, | ||
124 | gen_fnmul_d, | ||
125 | }; | ||
126 | -TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul, a->rn) | ||
127 | +static const FPScalar f_scalar_ah_fnmul = { | ||
128 | + gen_fnmul_ah_h, | ||
129 | + gen_fnmul_ah_s, | ||
130 | + gen_fnmul_ah_d, | ||
131 | +}; | ||
132 | +TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn) | ||
133 | |||
134 | static const FPScalar f_scalar_fcmeq = { | ||
135 | gen_helper_advsimd_ceq_f16, | ||
136 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
137 | |||
138 | read_vec_element(s, t2, a->rm, a->idx, MO_64); | ||
139 | if (neg) { | ||
140 | - gen_vfp_negd(t1, t1); | ||
141 | + gen_vfp_maybe_ah_negd(s, t1, t1); | ||
142 | } | ||
143 | gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | ||
144 | write_fp_dreg_merging(s, a->rd, a->rd, t0); | ||
145 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
146 | |||
147 | read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); | ||
148 | if (neg) { | ||
149 | - gen_vfp_negs(t1, t1); | ||
150 | + gen_vfp_maybe_ah_negs(s, t1, t1); | ||
151 | } | ||
152 | gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | ||
153 | write_fp_sreg_merging(s, a->rd, a->rd, t0); | ||
154 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
155 | |||
156 | read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); | ||
157 | if (neg) { | ||
158 | - gen_vfp_negh(t1, t1); | ||
159 | + gen_vfp_maybe_ah_negh(s, t1, t1); | ||
160 | } | ||
161 | gen_helper_advsimd_muladdh(t0, t1, t2, t0, | ||
162 | fpstatus_ptr(FPST_A64_F16)); | ||
163 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
164 | TCGv_i64 ta = read_fp_dreg(s, a->ra); | ||
165 | |||
166 | if (neg_a) { | ||
167 | - gen_vfp_negd(ta, ta); | ||
168 | + gen_vfp_maybe_ah_negd(s, ta, ta); | ||
169 | } | ||
170 | if (neg_n) { | ||
171 | - gen_vfp_negd(tn, tn); | ||
172 | + gen_vfp_maybe_ah_negd(s, tn, tn); | ||
173 | } | ||
174 | fpst = fpstatus_ptr(FPST_A64); | ||
175 | gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); | ||
176 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
177 | TCGv_i32 ta = read_fp_sreg(s, a->ra); | ||
178 | |||
179 | if (neg_a) { | ||
180 | - gen_vfp_negs(ta, ta); | ||
181 | + gen_vfp_maybe_ah_negs(s, ta, ta); | ||
182 | } | ||
183 | if (neg_n) { | ||
184 | - gen_vfp_negs(tn, tn); | ||
185 | + gen_vfp_maybe_ah_negs(s, tn, tn); | ||
186 | } | ||
187 | fpst = fpstatus_ptr(FPST_A64); | ||
188 | gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); | ||
189 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
190 | TCGv_i32 ta = read_fp_hreg(s, a->ra); | ||
191 | |||
192 | if (neg_a) { | ||
193 | - gen_vfp_negh(ta, ta); | ||
194 | + gen_vfp_maybe_ah_negh(s, ta, ta); | ||
195 | } | ||
196 | if (neg_n) { | ||
197 | - gen_vfp_negh(tn, tn); | ||
198 | + gen_vfp_maybe_ah_negh(s, tn, tn); | ||
199 | } | ||
200 | fpst = fpstatus_ptr(FPST_A64_F16); | ||
201 | gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); | ||
202 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, | ||
203 | return true; | ||
204 | } | ||
205 | |||
206 | +static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a, | ||
207 | + const FPScalar1Int *fnormal, | ||
208 | + const FPScalar1Int *fah) | ||
209 | +{ | ||
210 | + return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true); | ||
211 | +} | ||
212 | + | ||
213 | static const FPScalar1Int f_scalar_fmov = { | ||
214 | tcg_gen_mov_i32, | ||
215 | tcg_gen_mov_i32, | ||
216 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fneg = { | ||
217 | gen_vfp_negs, | ||
218 | gen_vfp_negd, | ||
219 | }; | ||
220 | -TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg, true) | ||
221 | +static const FPScalar1Int f_scalar_ah_fneg = { | ||
222 | + gen_vfp_ah_negh, | ||
223 | + gen_vfp_ah_negs, | ||
224 | + gen_vfp_ah_negd, | ||
225 | +}; | ||
226 | +TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg) | ||
227 | |||
228 | typedef struct FPScalar1 { | ||
229 | void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); | ||
230 | -- | ||
231 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | FPCR.AH == 1 mandates that taking the absolute value of a NaN should | ||
2 | not change its sign bit. This means we can no longer use | ||
3 | gen_vfp_abs*() everywhere but must instead generate slightly more | ||
4 | complex code when FPCR.AH is set. | ||
5 | 1 | ||
6 | Implement these semantics for scalar FABS and FABD. This change also | ||
7 | affects all other instructions whose psuedocode calls FPAbs(); we | ||
8 | will extend the change to those instructions in following commits. | ||
9 | |||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | ||
13 | target/arm/tcg/translate-a64.c | 69 +++++++++++++++++++++++++++++++++- | ||
14 | 1 file changed, 67 insertions(+), 2 deletions(-) | ||
15 | |||
16 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/arm/tcg/translate-a64.c | ||
19 | +++ b/target/arm/tcg/translate-a64.c | ||
20 | @@ -XXX,XX +XXX,XX @@ static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s) | ||
21 | s, chs_s); | ||
22 | } | ||
23 | |||
24 | +/* | ||
25 | + * These functions implement | ||
26 | + * d = floatN_is_any_nan(s) ? s : floatN_abs(s) | ||
27 | + * which for float32 is | ||
28 | + * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31)) | ||
29 | + * and similarly for the other float sizes. | ||
30 | + */ | ||
31 | +static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s) | ||
32 | +{ | ||
33 | + TCGv_i32 abs_s = tcg_temp_new_i32(); | ||
34 | + | ||
35 | + gen_vfp_absh(abs_s, s); | ||
36 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, | ||
37 | + abs_s, tcg_constant_i32(0x7c00), | ||
38 | + s, abs_s); | ||
39 | +} | ||
40 | + | ||
41 | +static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s) | ||
42 | +{ | ||
43 | + TCGv_i32 abs_s = tcg_temp_new_i32(); | ||
44 | + | ||
45 | + gen_vfp_abss(abs_s, s); | ||
46 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, | ||
47 | + abs_s, tcg_constant_i32(0x7f800000UL), | ||
48 | + s, abs_s); | ||
49 | +} | ||
50 | + | ||
51 | +static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s) | ||
52 | +{ | ||
53 | + TCGv_i64 abs_s = tcg_temp_new_i64(); | ||
54 | + | ||
55 | + gen_vfp_absd(abs_s, s); | ||
56 | + tcg_gen_movcond_i64(TCG_COND_GTU, d, | ||
57 | + abs_s, tcg_constant_i64(0x7ff0000000000000ULL), | ||
58 | + s, abs_s); | ||
59 | +} | ||
60 | + | ||
61 | static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) | ||
62 | { | ||
63 | if (dc->fpcr_ah) { | ||
64 | @@ -XXX,XX +XXX,XX @@ static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) | ||
65 | gen_vfp_absd(d, d); | ||
66 | } | ||
67 | |||
68 | +static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
69 | +{ | ||
70 | + gen_helper_vfp_subh(d, n, m, s); | ||
71 | + gen_vfp_ah_absh(d, d); | ||
72 | +} | ||
73 | + | ||
74 | +static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
75 | +{ | ||
76 | + gen_helper_vfp_subs(d, n, m, s); | ||
77 | + gen_vfp_ah_abss(d, d); | ||
78 | +} | ||
79 | + | ||
80 | +static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) | ||
81 | +{ | ||
82 | + gen_helper_vfp_subd(d, n, m, s); | ||
83 | + gen_vfp_ah_absd(d, d); | ||
84 | +} | ||
85 | + | ||
86 | static const FPScalar f_scalar_fabd = { | ||
87 | gen_fabd_h, | ||
88 | gen_fabd_s, | ||
89 | gen_fabd_d, | ||
90 | }; | ||
91 | -TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd, a->rn) | ||
92 | +static const FPScalar f_scalar_ah_fabd = { | ||
93 | + gen_fabd_ah_h, | ||
94 | + gen_fabd_ah_s, | ||
95 | + gen_fabd_ah_d, | ||
96 | +}; | ||
97 | +TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn) | ||
98 | |||
99 | static const FPScalar f_scalar_frecps = { | ||
100 | gen_helper_recpsf_f16, | ||
101 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fabs = { | ||
102 | gen_vfp_abss, | ||
103 | gen_vfp_absd, | ||
104 | }; | ||
105 | -TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs, true) | ||
106 | +static const FPScalar1Int f_scalar_ah_fabs = { | ||
107 | + gen_vfp_ah_absh, | ||
108 | + gen_vfp_ah_abss, | ||
109 | + gen_vfp_ah_absd, | ||
110 | +}; | ||
111 | +TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs) | ||
112 | |||
113 | static const FPScalar1Int f_scalar_fneg = { | ||
114 | gen_vfp_negh, | ||
115 | -- | ||
116 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Split the handling of vector FABD so that it calls a different set | ||
2 | of helpers when FPCR.AH is 1, which implement the "no negation of | ||
3 | the sign of a NaN" semantics. | ||
4 | 1 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/helper.h | 4 ++++ | ||
9 | target/arm/tcg/translate-a64.c | 7 ++++++- | ||
10 | target/arm/tcg/vec_helper.c | 23 +++++++++++++++++++++++ | ||
11 | 3 files changed, 33 insertions(+), 1 deletion(-) | ||
12 | |||
13 | diff --git a/target/arm/helper.h b/target/arm/helper.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/arm/helper.h | ||
16 | +++ b/target/arm/helper.h | ||
17 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
18 | DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
19 | DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
20 | |||
21 | +DEF_HELPER_FLAGS_5(gvec_ah_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_5(gvec_ah_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
23 | +DEF_HELPER_FLAGS_5(gvec_ah_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
24 | + | ||
25 | DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
26 | DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
27 | DEF_HELPER_FLAGS_5(gvec_fceq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
28 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/arm/tcg/translate-a64.c | ||
31 | +++ b/target/arm/tcg/translate-a64.c | ||
32 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { | ||
33 | gen_helper_gvec_fabd_s, | ||
34 | gen_helper_gvec_fabd_d, | ||
35 | }; | ||
36 | -TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd) | ||
37 | +static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = { | ||
38 | + gen_helper_gvec_ah_fabd_h, | ||
39 | + gen_helper_gvec_ah_fabd_s, | ||
40 | + gen_helper_gvec_ah_fabd_d, | ||
41 | +}; | ||
42 | +TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd) | ||
43 | |||
44 | static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { | ||
45 | gen_helper_gvec_recps_h, | ||
46 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/arm/tcg/vec_helper.c | ||
49 | +++ b/target/arm/tcg/vec_helper.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static float64 float64_abd(float64 op1, float64 op2, float_status *stat) | ||
51 | return float64_abs(float64_sub(op1, op2, stat)); | ||
52 | } | ||
53 | |||
54 | +/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */ | ||
55 | +static float16 float16_ah_abd(float16 op1, float16 op2, float_status *stat) | ||
56 | +{ | ||
57 | + float16 r = float16_sub(op1, op2, stat); | ||
58 | + return float16_is_any_nan(r) ? r : float16_abs(r); | ||
59 | +} | ||
60 | + | ||
61 | +static float32 float32_ah_abd(float32 op1, float32 op2, float_status *stat) | ||
62 | +{ | ||
63 | + float32 r = float32_sub(op1, op2, stat); | ||
64 | + return float32_is_any_nan(r) ? r : float32_abs(r); | ||
65 | +} | ||
66 | + | ||
67 | +static float64 float64_ah_abd(float64 op1, float64 op2, float_status *stat) | ||
68 | +{ | ||
69 | + float64 r = float64_sub(op1, op2, stat); | ||
70 | + return float64_is_any_nan(r) ? r : float64_abs(r); | ||
71 | +} | ||
72 | + | ||
73 | /* | ||
74 | * Reciprocal step. These are the AArch32 version which uses a | ||
75 | * non-fused multiply-and-subtract. | ||
76 | @@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_fabd_h, float16_abd, float16) | ||
77 | DO_3OP(gvec_fabd_s, float32_abd, float32) | ||
78 | DO_3OP(gvec_fabd_d, float64_abd, float64) | ||
79 | |||
80 | +DO_3OP(gvec_ah_fabd_h, float16_ah_abd, float16) | ||
81 | +DO_3OP(gvec_ah_fabd_s, float32_ah_abd, float32) | ||
82 | +DO_3OP(gvec_ah_fabd_d, float64_ah_abd, float64) | ||
83 | + | ||
84 | DO_3OP(gvec_fceq_h, float16_ceq, float16) | ||
85 | DO_3OP(gvec_fceq_s, float32_ceq, float32) | ||
86 | DO_3OP(gvec_fceq_d, float64_ceq, float64) | ||
87 | -- | ||
88 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Make SVE FNEG honour the FPCR.AH "don't negate the sign of a NaN" | ||
2 | semantics. | ||
3 | 1 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 4 ++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 7 ++++++- | ||
10 | 3 files changed, 18 insertions(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
17 | DEF_HELPER_FLAGS_4(sve_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
18 | DEF_HELPER_FLAGS_4(sve_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_4(sve_ah_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
21 | +DEF_HELPER_FLAGS_4(sve_ah_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
22 | +DEF_HELPER_FLAGS_4(sve_ah_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
23 | + | ||
24 | DEF_HELPER_FLAGS_4(sve_not_zpz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
25 | DEF_HELPER_FLAGS_4(sve_not_zpz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
26 | DEF_HELPER_FLAGS_4(sve_not_zpz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
27 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/arm/tcg/sve_helper.c | ||
30 | +++ b/target/arm/tcg/sve_helper.c | ||
31 | @@ -XXX,XX +XXX,XX @@ DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG) | ||
32 | DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG) | ||
33 | DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG) | ||
34 | |||
35 | +#define DO_AH_FNEG_H(N) (float16_is_any_nan(N) ? (N) : DO_FNEG(N)) | ||
36 | +#define DO_AH_FNEG_S(N) (float32_is_any_nan(N) ? (N) : DO_FNEG(N)) | ||
37 | +#define DO_AH_FNEG_D(N) (float64_is_any_nan(N) ? (N) : DO_FNEG(N)) | ||
38 | + | ||
39 | +DO_ZPZ(sve_ah_fneg_h, uint16_t, H1_2, DO_AH_FNEG_H) | ||
40 | +DO_ZPZ(sve_ah_fneg_s, uint32_t, H1_4, DO_AH_FNEG_S) | ||
41 | +DO_ZPZ_D(sve_ah_fneg_d, uint64_t, DO_AH_FNEG_D) | ||
42 | + | ||
43 | #define DO_NOT(N) (~N) | ||
44 | |||
45 | DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT) | ||
46 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/arm/tcg/translate-sve.c | ||
49 | +++ b/target/arm/tcg/translate-sve.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const fneg_fns[4] = { | ||
51 | NULL, gen_helper_sve_fneg_h, | ||
52 | gen_helper_sve_fneg_s, gen_helper_sve_fneg_d, | ||
53 | }; | ||
54 | -TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0) | ||
55 | +static gen_helper_gvec_3 * const fneg_ah_fns[4] = { | ||
56 | + NULL, gen_helper_sve_ah_fneg_h, | ||
57 | + gen_helper_sve_ah_fneg_s, gen_helper_sve_ah_fneg_d, | ||
58 | +}; | ||
59 | +TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, | ||
60 | + s->fpcr_ah ? fneg_ah_fns[a->esz] : fneg_fns[a->esz], a, 0) | ||
61 | |||
62 | static gen_helper_gvec_3 * const sxtb_fns[4] = { | ||
63 | NULL, gen_helper_sve_sxtb_h, | ||
64 | -- | ||
65 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Make SVE FABS honour the FPCR.AH "don't negate the sign of a NaN" | ||
2 | semantics. | ||
3 | 1 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 4 ++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 7 ++++++- | ||
10 | 3 files changed, 18 insertions(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
17 | DEF_HELPER_FLAGS_4(sve_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
18 | DEF_HELPER_FLAGS_4(sve_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_4(sve_ah_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
21 | +DEF_HELPER_FLAGS_4(sve_ah_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
22 | +DEF_HELPER_FLAGS_4(sve_ah_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
23 | + | ||
24 | DEF_HELPER_FLAGS_4(sve_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
25 | DEF_HELPER_FLAGS_4(sve_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
26 | DEF_HELPER_FLAGS_4(sve_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
27 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/arm/tcg/sve_helper.c | ||
30 | +++ b/target/arm/tcg/sve_helper.c | ||
31 | @@ -XXX,XX +XXX,XX @@ DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS) | ||
32 | DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS) | ||
33 | DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS) | ||
34 | |||
35 | +#define DO_AH_FABS_H(N) (float16_is_any_nan(N) ? (N) : DO_FABS(N)) | ||
36 | +#define DO_AH_FABS_S(N) (float32_is_any_nan(N) ? (N) : DO_FABS(N)) | ||
37 | +#define DO_AH_FABS_D(N) (float64_is_any_nan(N) ? (N) : DO_FABS(N)) | ||
38 | + | ||
39 | +DO_ZPZ(sve_ah_fabs_h, uint16_t, H1_2, DO_AH_FABS_H) | ||
40 | +DO_ZPZ(sve_ah_fabs_s, uint32_t, H1_4, DO_AH_FABS_S) | ||
41 | +DO_ZPZ_D(sve_ah_fabs_d, uint64_t, DO_AH_FABS_D) | ||
42 | + | ||
43 | #define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1)) | ||
44 | |||
45 | DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG) | ||
46 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/arm/tcg/translate-sve.c | ||
49 | +++ b/target/arm/tcg/translate-sve.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const fabs_fns[4] = { | ||
51 | NULL, gen_helper_sve_fabs_h, | ||
52 | gen_helper_sve_fabs_s, gen_helper_sve_fabs_d, | ||
53 | }; | ||
54 | -TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0) | ||
55 | +static gen_helper_gvec_3 * const fabs_ah_fns[4] = { | ||
56 | + NULL, gen_helper_sve_ah_fabs_h, | ||
57 | + gen_helper_sve_ah_fabs_s, gen_helper_sve_ah_fabs_d, | ||
58 | +}; | ||
59 | +TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, | ||
60 | + s->fpcr_ah ? fabs_ah_fns[a->esz] : fabs_fns[a->esz], a, 0) | ||
61 | |||
62 | static gen_helper_gvec_3 * const fneg_fns[4] = { | ||
63 | NULL, gen_helper_sve_fneg_h, | ||
64 | -- | ||
65 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Make the SVE FABD insn honour the FPCR.AH "don't negate the sign | ||
2 | of a NaN" semantics. | ||
3 | 1 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 7 +++++++ | ||
8 | target/arm/tcg/sve_helper.c | 22 ++++++++++++++++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 2 +- | ||
10 | 3 files changed, 30 insertions(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fabd_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_6(sve_fabd_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_6(sve_ah_fabd_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_6(sve_ah_fabd_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_6(sve_ah_fabd_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | DEF_HELPER_FLAGS_6(sve_fscalbn_h, TCG_CALL_NO_RWG, | ||
28 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
29 | DEF_HELPER_FLAGS_6(sve_fscalbn_s, TCG_CALL_NO_RWG, | ||
30 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/target/arm/tcg/sve_helper.c | ||
33 | +++ b/target/arm/tcg/sve_helper.c | ||
34 | @@ -XXX,XX +XXX,XX @@ static inline float64 abd_d(float64 a, float64 b, float_status *s) | ||
35 | return float64_abs(float64_sub(a, b, s)); | ||
36 | } | ||
37 | |||
38 | +/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */ | ||
39 | +static float16 ah_abd_h(float16 op1, float16 op2, float_status *stat) | ||
40 | +{ | ||
41 | + float16 r = float16_sub(op1, op2, stat); | ||
42 | + return float16_is_any_nan(r) ? r : float16_abs(r); | ||
43 | +} | ||
44 | + | ||
45 | +static float32 ah_abd_s(float32 op1, float32 op2, float_status *stat) | ||
46 | +{ | ||
47 | + float32 r = float32_sub(op1, op2, stat); | ||
48 | + return float32_is_any_nan(r) ? r : float32_abs(r); | ||
49 | +} | ||
50 | + | ||
51 | +static float64 ah_abd_d(float64 op1, float64 op2, float_status *stat) | ||
52 | +{ | ||
53 | + float64 r = float64_sub(op1, op2, stat); | ||
54 | + return float64_is_any_nan(r) ? r : float64_abs(r); | ||
55 | +} | ||
56 | + | ||
57 | DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h) | ||
58 | DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s) | ||
59 | DO_ZPZZ_FP(sve_fabd_d, uint64_t, H1_8, abd_d) | ||
60 | +DO_ZPZZ_FP(sve_ah_fabd_h, uint16_t, H1_2, ah_abd_h) | ||
61 | +DO_ZPZZ_FP(sve_ah_fabd_s, uint32_t, H1_4, ah_abd_s) | ||
62 | +DO_ZPZZ_FP(sve_ah_fabd_d, uint64_t, H1_8, ah_abd_d) | ||
63 | |||
64 | static inline float64 scalbn_d(float64 a, int64_t b, float_status *s) | ||
65 | { | ||
66 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
67 | index XXXXXXX..XXXXXXX 100644 | ||
68 | --- a/target/arm/tcg/translate-sve.c | ||
69 | +++ b/target/arm/tcg/translate-sve.c | ||
70 | @@ -XXX,XX +XXX,XX @@ DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) | ||
71 | DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) | ||
72 | DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) | ||
73 | DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) | ||
74 | -DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) | ||
75 | +DO_ZPZZ_AH_FP(FABD, aa64_sve, sve_fabd, sve_ah_fabd) | ||
76 | DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) | ||
77 | DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) | ||
78 | DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx) | ||
79 | -- | ||
80 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | The negation steps in FCADD must honour FPCR.AH's "don't change the | ||
2 | sign of a NaN" semantics. Implement this in the same way we did for | ||
3 | the base ASIMD FCADD, by encoding FPCR.AH into the SIMD data field | ||
4 | passed to the helper and using that to decide whether to negate the | ||
5 | values. | ||
6 | 1 | ||
7 | The construction of neg_imag and neg_real were done to make it easy | ||
8 | to apply both in parallel with two simple logical operations. This | ||
9 | changed with FPCR.AH, which is more complex than that. Switch to | ||
10 | an approach that follows the pseudocode more closely, by extracting | ||
11 | the 'rot=1' parameter from the SIMD data field and changing the | ||
12 | sign of the appropriate input value. | ||
13 | |||
14 | Note that there was a naming issue with neg_imag and neg_real. | ||
15 | They were named backward, with neg_imag being non-zero for rot=1, | ||
16 | and vice versa. This was combined with reversed usage within the | ||
17 | loop, so that the negation in the end turned out correct. | ||
18 | |||
19 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
20 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
21 | --- | ||
22 | target/arm/tcg/vec_internal.h | 17 ++++++++++++++ | ||
23 | target/arm/tcg/sve_helper.c | 42 ++++++++++++++++++++++++---------- | ||
24 | target/arm/tcg/translate-sve.c | 2 +- | ||
25 | 3 files changed, 48 insertions(+), 13 deletions(-) | ||
26 | |||
27 | diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/arm/tcg/vec_internal.h | ||
30 | +++ b/target/arm/tcg/vec_internal.h | ||
31 | @@ -XXX,XX +XXX,XX @@ | ||
32 | #ifndef TARGET_ARM_VEC_INTERNAL_H | ||
33 | #define TARGET_ARM_VEC_INTERNAL_H | ||
34 | |||
35 | +#include "fpu/softfloat.h" | ||
36 | + | ||
37 | /* | ||
38 | * Note that vector data is stored in host-endian 64-bit chunks, | ||
39 | * so addressing units smaller than that needs a host-endian fixup. | ||
40 | @@ -XXX,XX +XXX,XX @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, | ||
41 | */ | ||
42 | bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp); | ||
43 | |||
44 | +static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah) | ||
45 | +{ | ||
46 | + return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a); | ||
47 | +} | ||
48 | + | ||
49 | +static inline float32 float32_maybe_ah_chs(float32 a, bool fpcr_ah) | ||
50 | +{ | ||
51 | + return fpcr_ah && float32_is_any_nan(a) ? a : float32_chs(a); | ||
52 | +} | ||
53 | + | ||
54 | +static inline float64 float64_maybe_ah_chs(float64 a, bool fpcr_ah) | ||
55 | +{ | ||
56 | + return fpcr_ah && float64_is_any_nan(a) ? a : float64_chs(a); | ||
57 | +} | ||
58 | + | ||
59 | #endif /* TARGET_ARM_VEC_INTERNAL_H */ | ||
60 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/target/arm/tcg/sve_helper.c | ||
63 | +++ b/target/arm/tcg/sve_helper.c | ||
64 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, | ||
65 | { | ||
66 | intptr_t j, i = simd_oprsz(desc); | ||
67 | uint64_t *g = vg; | ||
68 | - float16 neg_imag = float16_set_sign(0, simd_data(desc)); | ||
69 | - float16 neg_real = float16_chs(neg_imag); | ||
70 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
71 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
72 | |||
73 | do { | ||
74 | uint64_t pg = g[(i - 1) >> 6]; | ||
75 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, | ||
76 | i -= 2 * sizeof(float16); | ||
77 | |||
78 | e0 = *(float16 *)(vn + H1_2(i)); | ||
79 | - e1 = *(float16 *)(vm + H1_2(j)) ^ neg_real; | ||
80 | + e1 = *(float16 *)(vm + H1_2(j)); | ||
81 | e2 = *(float16 *)(vn + H1_2(j)); | ||
82 | - e3 = *(float16 *)(vm + H1_2(i)) ^ neg_imag; | ||
83 | + e3 = *(float16 *)(vm + H1_2(i)); | ||
84 | + | ||
85 | + if (rot) { | ||
86 | + e3 = float16_maybe_ah_chs(e3, fpcr_ah); | ||
87 | + } else { | ||
88 | + e1 = float16_maybe_ah_chs(e1, fpcr_ah); | ||
89 | + } | ||
90 | |||
91 | if (likely((pg >> (i & 63)) & 1)) { | ||
92 | *(float16 *)(vd + H1_2(i)) = float16_add(e0, e1, s); | ||
93 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, | ||
94 | { | ||
95 | intptr_t j, i = simd_oprsz(desc); | ||
96 | uint64_t *g = vg; | ||
97 | - float32 neg_imag = float32_set_sign(0, simd_data(desc)); | ||
98 | - float32 neg_real = float32_chs(neg_imag); | ||
99 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
100 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
101 | |||
102 | do { | ||
103 | uint64_t pg = g[(i - 1) >> 6]; | ||
104 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, | ||
105 | i -= 2 * sizeof(float32); | ||
106 | |||
107 | e0 = *(float32 *)(vn + H1_2(i)); | ||
108 | - e1 = *(float32 *)(vm + H1_2(j)) ^ neg_real; | ||
109 | + e1 = *(float32 *)(vm + H1_2(j)); | ||
110 | e2 = *(float32 *)(vn + H1_2(j)); | ||
111 | - e3 = *(float32 *)(vm + H1_2(i)) ^ neg_imag; | ||
112 | + e3 = *(float32 *)(vm + H1_2(i)); | ||
113 | + | ||
114 | + if (rot) { | ||
115 | + e3 = float32_maybe_ah_chs(e3, fpcr_ah); | ||
116 | + } else { | ||
117 | + e1 = float32_maybe_ah_chs(e1, fpcr_ah); | ||
118 | + } | ||
119 | |||
120 | if (likely((pg >> (i & 63)) & 1)) { | ||
121 | *(float32 *)(vd + H1_2(i)) = float32_add(e0, e1, s); | ||
122 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, | ||
123 | { | ||
124 | intptr_t j, i = simd_oprsz(desc); | ||
125 | uint64_t *g = vg; | ||
126 | - float64 neg_imag = float64_set_sign(0, simd_data(desc)); | ||
127 | - float64 neg_real = float64_chs(neg_imag); | ||
128 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
129 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
130 | |||
131 | do { | ||
132 | uint64_t pg = g[(i - 1) >> 6]; | ||
133 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, | ||
134 | i -= 2 * sizeof(float64); | ||
135 | |||
136 | e0 = *(float64 *)(vn + H1_2(i)); | ||
137 | - e1 = *(float64 *)(vm + H1_2(j)) ^ neg_real; | ||
138 | + e1 = *(float64 *)(vm + H1_2(j)); | ||
139 | e2 = *(float64 *)(vn + H1_2(j)); | ||
140 | - e3 = *(float64 *)(vm + H1_2(i)) ^ neg_imag; | ||
141 | + e3 = *(float64 *)(vm + H1_2(i)); | ||
142 | + | ||
143 | + if (rot) { | ||
144 | + e3 = float64_maybe_ah_chs(e3, fpcr_ah); | ||
145 | + } else { | ||
146 | + e1 = float64_maybe_ah_chs(e1, fpcr_ah); | ||
147 | + } | ||
148 | |||
149 | if (likely((pg >> (i & 63)) & 1)) { | ||
150 | *(float64 *)(vd + H1_2(i)) = float64_add(e0, e1, s); | ||
151 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
152 | index XXXXXXX..XXXXXXX 100644 | ||
153 | --- a/target/arm/tcg/translate-sve.c | ||
154 | +++ b/target/arm/tcg/translate-sve.c | ||
155 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_4_ptr * const fcadd_fns[] = { | ||
156 | gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d, | ||
157 | }; | ||
158 | TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], | ||
159 | - a->rd, a->rn, a->rm, a->pg, a->rot, | ||
160 | + a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), | ||
161 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
162 | |||
163 | #define DO_FMLA(NAME, name) \ | ||
164 | -- | ||
165 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | The negation steps in FCADD must honour FPCR.AH's "don't change the | ||
2 | sign of a NaN" semantics. Implement this by encoding FPCR.AH into | ||
3 | the SIMD data field passed to the helper and using that to decide | ||
4 | whether to negate the values. | ||
5 | 1 | ||
6 | The construction of neg_imag and neg_real were done to make it easy | ||
7 | to apply both in parallel with two simple logical operations. This | ||
8 | changed with FPCR.AH, which is more complex than that. Switch to | ||
9 | an approach closer to the pseudocode, where we extract the rot | ||
10 | parameter from the SIMD data word and negate the appropriate | ||
11 | input value. | ||
12 | |||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
14 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
15 | --- | ||
16 | target/arm/tcg/translate-a64.c | 10 +++++-- | ||
17 | target/arm/tcg/vec_helper.c | 54 +++++++++++++++++++--------------- | ||
18 | 2 files changed, 38 insertions(+), 26 deletions(-) | ||
19 | |||
20 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/tcg/translate-a64.c | ||
23 | +++ b/target/arm/tcg/translate-a64.c | ||
24 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = { | ||
25 | gen_helper_gvec_fcadds, | ||
26 | gen_helper_gvec_fcaddd, | ||
27 | }; | ||
28 | -TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd) | ||
29 | -TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd) | ||
30 | +/* | ||
31 | + * Encode FPCR.AH into the data so the helper knows whether the | ||
32 | + * negations it does should avoid flipping the sign bit on a NaN | ||
33 | + */ | ||
34 | +TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1), | ||
35 | + f_vector_fcadd) | ||
36 | +TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1), | ||
37 | + f_vector_fcadd) | ||
38 | |||
39 | static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) | ||
40 | { | ||
41 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/arm/tcg/vec_helper.c | ||
44 | +++ b/target/arm/tcg/vec_helper.c | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm, | ||
46 | float16 *d = vd; | ||
47 | float16 *n = vn; | ||
48 | float16 *m = vm; | ||
49 | - uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
50 | - uint32_t neg_imag = neg_real ^ 1; | ||
51 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
52 | + bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); | ||
53 | uintptr_t i; | ||
54 | |||
55 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
56 | - neg_real <<= 15; | ||
57 | - neg_imag <<= 15; | ||
58 | - | ||
59 | for (i = 0; i < opr_sz / 2; i += 2) { | ||
60 | float16 e0 = n[H2(i)]; | ||
61 | - float16 e1 = m[H2(i + 1)] ^ neg_imag; | ||
62 | + float16 e1 = m[H2(i + 1)]; | ||
63 | float16 e2 = n[H2(i + 1)]; | ||
64 | - float16 e3 = m[H2(i)] ^ neg_real; | ||
65 | + float16 e3 = m[H2(i)]; | ||
66 | + | ||
67 | + if (rot) { | ||
68 | + e3 = float16_maybe_ah_chs(e3, fpcr_ah); | ||
69 | + } else { | ||
70 | + e1 = float16_maybe_ah_chs(e1, fpcr_ah); | ||
71 | + } | ||
72 | |||
73 | d[H2(i)] = float16_add(e0, e1, fpst); | ||
74 | d[H2(i + 1)] = float16_add(e2, e3, fpst); | ||
75 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcadds)(void *vd, void *vn, void *vm, | ||
76 | float32 *d = vd; | ||
77 | float32 *n = vn; | ||
78 | float32 *m = vm; | ||
79 | - uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
80 | - uint32_t neg_imag = neg_real ^ 1; | ||
81 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
82 | + bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); | ||
83 | uintptr_t i; | ||
84 | |||
85 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
86 | - neg_real <<= 31; | ||
87 | - neg_imag <<= 31; | ||
88 | - | ||
89 | for (i = 0; i < opr_sz / 4; i += 2) { | ||
90 | float32 e0 = n[H4(i)]; | ||
91 | - float32 e1 = m[H4(i + 1)] ^ neg_imag; | ||
92 | + float32 e1 = m[H4(i + 1)]; | ||
93 | float32 e2 = n[H4(i + 1)]; | ||
94 | - float32 e3 = m[H4(i)] ^ neg_real; | ||
95 | + float32 e3 = m[H4(i)]; | ||
96 | + | ||
97 | + if (rot) { | ||
98 | + e3 = float32_maybe_ah_chs(e3, fpcr_ah); | ||
99 | + } else { | ||
100 | + e1 = float32_maybe_ah_chs(e1, fpcr_ah); | ||
101 | + } | ||
102 | |||
103 | d[H4(i)] = float32_add(e0, e1, fpst); | ||
104 | d[H4(i + 1)] = float32_add(e2, e3, fpst); | ||
105 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcaddd)(void *vd, void *vn, void *vm, | ||
106 | float64 *d = vd; | ||
107 | float64 *n = vn; | ||
108 | float64 *m = vm; | ||
109 | - uint64_t neg_real = extract64(desc, SIMD_DATA_SHIFT, 1); | ||
110 | - uint64_t neg_imag = neg_real ^ 1; | ||
111 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
112 | + bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); | ||
113 | uintptr_t i; | ||
114 | |||
115 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
116 | - neg_real <<= 63; | ||
117 | - neg_imag <<= 63; | ||
118 | - | ||
119 | for (i = 0; i < opr_sz / 8; i += 2) { | ||
120 | float64 e0 = n[i]; | ||
121 | - float64 e1 = m[i + 1] ^ neg_imag; | ||
122 | + float64 e1 = m[i + 1]; | ||
123 | float64 e2 = n[i + 1]; | ||
124 | - float64 e3 = m[i] ^ neg_real; | ||
125 | + float64 e3 = m[i]; | ||
126 | + | ||
127 | + if (rot) { | ||
128 | + e3 = float64_maybe_ah_chs(e3, fpcr_ah); | ||
129 | + } else { | ||
130 | + e1 = float64_maybe_ah_chs(e1, fpcr_ah); | ||
131 | + } | ||
132 | |||
133 | d[i] = float64_add(e0, e1, fpst); | ||
134 | d[i + 1] = float64_add(e2, e3, fpst); | ||
135 | -- | ||
136 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Handle the FPCR.AH semantics that we do not change the sign of an | ||
2 | input NaN in the FRECPS and FRSQRTS scalar insns, by providing | ||
3 | new helper functions that do the CHS part of the operation | ||
4 | differently. | ||
5 | 1 | ||
6 | Since the extra helper functions would be very repetitive if written | ||
7 | out longhand, we condense them and the existing non-AH helpers into | ||
8 | being emitted via macros. | ||
9 | |||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | ||
13 | target/arm/tcg/helper-a64.h | 6 ++ | ||
14 | target/arm/tcg/vec_internal.h | 18 ++++++ | ||
15 | target/arm/tcg/helper-a64.c | 115 ++++++++++++--------------------- | ||
16 | target/arm/tcg/translate-a64.c | 25 +++++-- | ||
17 | 4 files changed, 83 insertions(+), 81 deletions(-) | ||
18 | |||
19 | diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/target/arm/tcg/helper-a64.h | ||
22 | +++ b/target/arm/tcg/helper-a64.h | ||
23 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, fpst) | ||
24 | DEF_HELPER_FLAGS_3(recpsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) | ||
25 | DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) | ||
26 | DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) | ||
27 | +DEF_HELPER_FLAGS_3(recpsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) | ||
28 | +DEF_HELPER_FLAGS_3(recpsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) | ||
29 | +DEF_HELPER_FLAGS_3(recpsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) | ||
30 | DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) | ||
31 | DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) | ||
32 | DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) | ||
33 | +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) | ||
34 | +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) | ||
35 | +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) | ||
36 | DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, fpst) | ||
37 | DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, fpst) | ||
38 | DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, fpst) | ||
39 | diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/target/arm/tcg/vec_internal.h | ||
42 | +++ b/target/arm/tcg/vec_internal.h | ||
43 | @@ -XXX,XX +XXX,XX @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, | ||
44 | */ | ||
45 | bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp); | ||
46 | |||
47 | +/* | ||
48 | + * Negate as for FPCR.AH=1 -- do not negate NaNs. | ||
49 | + */ | ||
50 | +static inline float16 float16_ah_chs(float16 a) | ||
51 | +{ | ||
52 | + return float16_is_any_nan(a) ? a : float16_chs(a); | ||
53 | +} | ||
54 | + | ||
55 | +static inline float32 float32_ah_chs(float32 a) | ||
56 | +{ | ||
57 | + return float32_is_any_nan(a) ? a : float32_chs(a); | ||
58 | +} | ||
59 | + | ||
60 | +static inline float64 float64_ah_chs(float64 a) | ||
61 | +{ | ||
62 | + return float64_is_any_nan(a) ? a : float64_chs(a); | ||
63 | +} | ||
64 | + | ||
65 | static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah) | ||
66 | { | ||
67 | return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a); | ||
68 | diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/target/arm/tcg/helper-a64.c | ||
71 | +++ b/target/arm/tcg/helper-a64.c | ||
72 | @@ -XXX,XX +XXX,XX @@ | ||
73 | #ifdef CONFIG_USER_ONLY | ||
74 | #include "user/page-protection.h" | ||
75 | #endif | ||
76 | +#include "vec_internal.h" | ||
77 | |||
78 | /* C2.4.7 Multiply and divide */ | ||
79 | /* special cases for 0 and LLONG_MIN are mandated by the standard */ | ||
80 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst) | ||
81 | return -float64_lt(b, a, fpst); | ||
82 | } | ||
83 | |||
84 | -/* Reciprocal step and sqrt step. Note that unlike the A32/T32 | ||
85 | +/* | ||
86 | + * Reciprocal step and sqrt step. Note that unlike the A32/T32 | ||
87 | * versions, these do a fully fused multiply-add or | ||
88 | * multiply-add-and-halve. | ||
89 | + * The FPCR.AH == 1 versions need to avoid flipping the sign of NaN. | ||
90 | */ | ||
91 | - | ||
92 | -uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, float_status *fpst) | ||
93 | -{ | ||
94 | - a = float16_squash_input_denormal(a, fpst); | ||
95 | - b = float16_squash_input_denormal(b, fpst); | ||
96 | - | ||
97 | - a = float16_chs(a); | ||
98 | - if ((float16_is_infinity(a) && float16_is_zero(b)) || | ||
99 | - (float16_is_infinity(b) && float16_is_zero(a))) { | ||
100 | - return float16_two; | ||
101 | +#define DO_RECPS(NAME, CTYPE, FLOATTYPE, CHSFN) \ | ||
102 | + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ | ||
103 | + { \ | ||
104 | + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ | ||
105 | + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ | ||
106 | + a = FLOATTYPE ## _ ## CHSFN(a); \ | ||
107 | + if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ | ||
108 | + (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ | ||
109 | + return FLOATTYPE ## _two; \ | ||
110 | + } \ | ||
111 | + return FLOATTYPE ## _muladd(a, b, FLOATTYPE ## _two, 0, fpst); \ | ||
112 | } | ||
113 | - return float16_muladd(a, b, float16_two, 0, fpst); | ||
114 | -} | ||
115 | |||
116 | -float32 HELPER(recpsf_f32)(float32 a, float32 b, float_status *fpst) | ||
117 | -{ | ||
118 | - a = float32_squash_input_denormal(a, fpst); | ||
119 | - b = float32_squash_input_denormal(b, fpst); | ||
120 | +DO_RECPS(recpsf_f16, uint32_t, float16, chs) | ||
121 | +DO_RECPS(recpsf_f32, float32, float32, chs) | ||
122 | +DO_RECPS(recpsf_f64, float64, float64, chs) | ||
123 | +DO_RECPS(recpsf_ah_f16, uint32_t, float16, ah_chs) | ||
124 | +DO_RECPS(recpsf_ah_f32, float32, float32, ah_chs) | ||
125 | +DO_RECPS(recpsf_ah_f64, float64, float64, ah_chs) | ||
126 | |||
127 | - a = float32_chs(a); | ||
128 | - if ((float32_is_infinity(a) && float32_is_zero(b)) || | ||
129 | - (float32_is_infinity(b) && float32_is_zero(a))) { | ||
130 | - return float32_two; | ||
131 | - } | ||
132 | - return float32_muladd(a, b, float32_two, 0, fpst); | ||
133 | -} | ||
134 | +#define DO_RSQRTSF(NAME, CTYPE, FLOATTYPE, CHSFN) \ | ||
135 | + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ | ||
136 | + { \ | ||
137 | + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ | ||
138 | + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ | ||
139 | + a = FLOATTYPE ## _ ## CHSFN(a); \ | ||
140 | + if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ | ||
141 | + (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ | ||
142 | + return FLOATTYPE ## _one_point_five; \ | ||
143 | + } \ | ||
144 | + return FLOATTYPE ## _muladd_scalbn(a, b, FLOATTYPE ## _three, \ | ||
145 | + -1, 0, fpst); \ | ||
146 | + } \ | ||
147 | |||
148 | -float64 HELPER(recpsf_f64)(float64 a, float64 b, float_status *fpst) | ||
149 | -{ | ||
150 | - a = float64_squash_input_denormal(a, fpst); | ||
151 | - b = float64_squash_input_denormal(b, fpst); | ||
152 | - | ||
153 | - a = float64_chs(a); | ||
154 | - if ((float64_is_infinity(a) && float64_is_zero(b)) || | ||
155 | - (float64_is_infinity(b) && float64_is_zero(a))) { | ||
156 | - return float64_two; | ||
157 | - } | ||
158 | - return float64_muladd(a, b, float64_two, 0, fpst); | ||
159 | -} | ||
160 | - | ||
161 | -uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst) | ||
162 | -{ | ||
163 | - a = float16_squash_input_denormal(a, fpst); | ||
164 | - b = float16_squash_input_denormal(b, fpst); | ||
165 | - | ||
166 | - a = float16_chs(a); | ||
167 | - if ((float16_is_infinity(a) && float16_is_zero(b)) || | ||
168 | - (float16_is_infinity(b) && float16_is_zero(a))) { | ||
169 | - return float16_one_point_five; | ||
170 | - } | ||
171 | - return float16_muladd_scalbn(a, b, float16_three, -1, 0, fpst); | ||
172 | -} | ||
173 | - | ||
174 | -float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst) | ||
175 | -{ | ||
176 | - a = float32_squash_input_denormal(a, fpst); | ||
177 | - b = float32_squash_input_denormal(b, fpst); | ||
178 | - | ||
179 | - a = float32_chs(a); | ||
180 | - if ((float32_is_infinity(a) && float32_is_zero(b)) || | ||
181 | - (float32_is_infinity(b) && float32_is_zero(a))) { | ||
182 | - return float32_one_point_five; | ||
183 | - } | ||
184 | - return float32_muladd_scalbn(a, b, float32_three, -1, 0, fpst); | ||
185 | -} | ||
186 | - | ||
187 | -float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst) | ||
188 | -{ | ||
189 | - a = float64_squash_input_denormal(a, fpst); | ||
190 | - b = float64_squash_input_denormal(b, fpst); | ||
191 | - | ||
192 | - a = float64_chs(a); | ||
193 | - if ((float64_is_infinity(a) && float64_is_zero(b)) || | ||
194 | - (float64_is_infinity(b) && float64_is_zero(a))) { | ||
195 | - return float64_one_point_five; | ||
196 | - } | ||
197 | - return float64_muladd_scalbn(a, b, float64_three, -1, 0, fpst); | ||
198 | -} | ||
199 | +DO_RSQRTSF(rsqrtsf_f16, uint32_t, float16, chs) | ||
200 | +DO_RSQRTSF(rsqrtsf_f32, float32, float32, chs) | ||
201 | +DO_RSQRTSF(rsqrtsf_f64, float64, float64, chs) | ||
202 | +DO_RSQRTSF(rsqrtsf_ah_f16, uint32_t, float16, ah_chs) | ||
203 | +DO_RSQRTSF(rsqrtsf_ah_f32, float32, float32, ah_chs) | ||
204 | +DO_RSQRTSF(rsqrtsf_ah_f64, float64, float64, ah_chs) | ||
205 | |||
206 | /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ | ||
207 | uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst) | ||
208 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
209 | index XXXXXXX..XXXXXXX 100644 | ||
210 | --- a/target/arm/tcg/translate-a64.c | ||
211 | +++ b/target/arm/tcg/translate-a64.c | ||
212 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
213 | FPST_A64_F16 : FPST_A64); | ||
214 | } | ||
215 | |||
216 | -static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
217 | - int mergereg) | ||
218 | +static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a, | ||
219 | + const FPScalar *fnormal, const FPScalar *fah, | ||
220 | + int mergereg) | ||
221 | { | ||
222 | - return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, | ||
223 | - select_ah_fpst(s, a->esz)); | ||
224 | + return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal, | ||
225 | + mergereg, select_ah_fpst(s, a->esz)); | ||
226 | } | ||
227 | |||
228 | /* Some insns need to call different helpers when FPCR.AH == 1 */ | ||
229 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_frecps = { | ||
230 | gen_helper_recpsf_f32, | ||
231 | gen_helper_recpsf_f64, | ||
232 | }; | ||
233 | -TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps, a->rn) | ||
234 | +static const FPScalar f_scalar_ah_frecps = { | ||
235 | + gen_helper_recpsf_ah_f16, | ||
236 | + gen_helper_recpsf_ah_f32, | ||
237 | + gen_helper_recpsf_ah_f64, | ||
238 | +}; | ||
239 | +TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a, | ||
240 | + &f_scalar_frecps, &f_scalar_ah_frecps, a->rn) | ||
241 | |||
242 | static const FPScalar f_scalar_frsqrts = { | ||
243 | gen_helper_rsqrtsf_f16, | ||
244 | gen_helper_rsqrtsf_f32, | ||
245 | gen_helper_rsqrtsf_f64, | ||
246 | }; | ||
247 | -TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts, a->rn) | ||
248 | +static const FPScalar f_scalar_ah_frsqrts = { | ||
249 | + gen_helper_rsqrtsf_ah_f16, | ||
250 | + gen_helper_rsqrtsf_ah_f32, | ||
251 | + gen_helper_rsqrtsf_ah_f64, | ||
252 | +}; | ||
253 | +TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a, | ||
254 | + &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn) | ||
255 | |||
256 | static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, | ||
257 | const FPScalar *f, bool swap) | ||
258 | -- | ||
259 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics | ||
2 | in the vector versions of FRECPS and FRSQRTS, by implementing | ||
3 | new vector wrappers that call the _ah_ scalar helpers. | ||
4 | 1 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
9 | target/arm/tcg/translate-a64.c | 21 ++++++++++++++++----- | ||
10 | target/arm/tcg/translate-sve.c | 7 ++++++- | ||
11 | target/arm/tcg/vec_helper.c | 8 ++++++++ | ||
12 | 4 files changed, 44 insertions(+), 6 deletions(-) | ||
13 | |||
14 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/arm/tcg/helper-sve.h | ||
17 | +++ b/target/arm/tcg/helper-sve.h | ||
18 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_rsqrts_s, TCG_CALL_NO_RWG, | ||
19 | DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG, | ||
20 | void, ptr, ptr, ptr, fpst, i32) | ||
21 | |||
22 | +DEF_HELPER_FLAGS_5(gvec_ah_recps_h, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_5(gvec_ah_recps_s, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, fpst, i32) | ||
26 | +DEF_HELPER_FLAGS_5(gvec_ah_recps_d, TCG_CALL_NO_RWG, | ||
27 | + void, ptr, ptr, ptr, fpst, i32) | ||
28 | + | ||
29 | +DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_h, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_s, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, fpst, i32) | ||
33 | +DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_d, TCG_CALL_NO_RWG, | ||
34 | + void, ptr, ptr, ptr, fpst, i32) | ||
35 | + | ||
36 | DEF_HELPER_FLAGS_5(gvec_ah_fmax_h, TCG_CALL_NO_RWG, | ||
37 | void, ptr, ptr, ptr, fpst, i32) | ||
38 | DEF_HELPER_FLAGS_5(gvec_ah_fmax_s, TCG_CALL_NO_RWG, | ||
39 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/target/arm/tcg/translate-a64.c | ||
42 | +++ b/target/arm/tcg/translate-a64.c | ||
43 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, | ||
44 | return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal); | ||
45 | } | ||
46 | |||
47 | -static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data, | ||
48 | - gen_helper_gvec_3_ptr * const f[3]) | ||
49 | +static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data, | ||
50 | + gen_helper_gvec_3_ptr * const fnormal[3], | ||
51 | + gen_helper_gvec_3_ptr * const fah[3]) | ||
52 | { | ||
53 | - return do_fp3_vector_with_fpsttype(s, a, data, f, | ||
54 | + return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal, | ||
55 | select_ah_fpst(s, a->esz)); | ||
56 | } | ||
57 | |||
58 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { | ||
59 | gen_helper_gvec_recps_s, | ||
60 | gen_helper_gvec_recps_d, | ||
61 | }; | ||
62 | -TRANS(FRECPS_v, do_fp3_vector_ah, a, 0, f_vector_frecps) | ||
63 | +static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = { | ||
64 | + gen_helper_gvec_ah_recps_h, | ||
65 | + gen_helper_gvec_ah_recps_s, | ||
66 | + gen_helper_gvec_ah_recps_d, | ||
67 | +}; | ||
68 | +TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps) | ||
69 | |||
70 | static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { | ||
71 | gen_helper_gvec_rsqrts_h, | ||
72 | gen_helper_gvec_rsqrts_s, | ||
73 | gen_helper_gvec_rsqrts_d, | ||
74 | }; | ||
75 | -TRANS(FRSQRTS_v, do_fp3_vector_ah, a, 0, f_vector_frsqrts) | ||
76 | +static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = { | ||
77 | + gen_helper_gvec_ah_rsqrts_h, | ||
78 | + gen_helper_gvec_ah_rsqrts_s, | ||
79 | + gen_helper_gvec_ah_rsqrts_d, | ||
80 | +}; | ||
81 | +TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts) | ||
82 | |||
83 | static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { | ||
84 | gen_helper_gvec_faddp_h, | ||
85 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
86 | index XXXXXXX..XXXXXXX 100644 | ||
87 | --- a/target/arm/tcg/translate-sve.c | ||
88 | +++ b/target/arm/tcg/translate-sve.c | ||
89 | @@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) | ||
90 | NULL, gen_helper_gvec_##name##_h, \ | ||
91 | gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ | ||
92 | }; \ | ||
93 | - TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, name##_fns[a->esz], a, 0) | ||
94 | + static gen_helper_gvec_3_ptr * const name##_ah_fns[4] = { \ | ||
95 | + NULL, gen_helper_gvec_ah_##name##_h, \ | ||
96 | + gen_helper_gvec_ah_##name##_s, gen_helper_gvec_ah_##name##_d \ | ||
97 | + }; \ | ||
98 | + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, \ | ||
99 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], a, 0) | ||
100 | |||
101 | DO_FP3(FADD_zzz, fadd) | ||
102 | DO_FP3(FSUB_zzz, fsub) | ||
103 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
104 | index XXXXXXX..XXXXXXX 100644 | ||
105 | --- a/target/arm/tcg/vec_helper.c | ||
106 | +++ b/target/arm/tcg/vec_helper.c | ||
107 | @@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16) | ||
108 | DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32) | ||
109 | DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64) | ||
110 | |||
111 | +DO_3OP(gvec_ah_recps_h, helper_recpsf_ah_f16, float16) | ||
112 | +DO_3OP(gvec_ah_recps_s, helper_recpsf_ah_f32, float32) | ||
113 | +DO_3OP(gvec_ah_recps_d, helper_recpsf_ah_f64, float64) | ||
114 | + | ||
115 | +DO_3OP(gvec_ah_rsqrts_h, helper_rsqrtsf_ah_f16, float16) | ||
116 | +DO_3OP(gvec_ah_rsqrts_s, helper_rsqrtsf_ah_f32, float32) | ||
117 | +DO_3OP(gvec_ah_rsqrts_d, helper_rsqrtsf_ah_f64, float64) | ||
118 | + | ||
119 | DO_3OP(gvec_ah_fmax_h, helper_vfp_ah_maxh, float16) | ||
120 | DO_3OP(gvec_ah_fmax_s, helper_vfp_ah_maxs, float32) | ||
121 | DO_3OP(gvec_ah_fmax_d, helper_vfp_ah_maxd, float64) | ||
122 | -- | ||
123 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics in FMLS | ||
2 | (indexed). We do this by creating 6 new helpers, which allow us to | ||
3 | do the negation either by XOR (for AH=0) or by muladd flags | ||
4 | (for AH=1). | ||
5 | 1 | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | [PMM: Mostly from RTH's patch; error in index order into fns[][] | ||
8 | fixed] | ||
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | --- | ||
11 | target/arm/helper.h | 14 ++++++++++++++ | ||
12 | target/arm/tcg/translate-a64.c | 17 +++++++++++------ | ||
13 | target/arm/tcg/translate-sve.c | 31 +++++++++++++++++-------------- | ||
14 | target/arm/tcg/vec_helper.c | 24 +++++++++++++++--------- | ||
15 | 4 files changed, 57 insertions(+), 29 deletions(-) | ||
16 | |||
17 | diff --git a/target/arm/helper.h b/target/arm/helper.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/arm/helper.h | ||
20 | +++ b/target/arm/helper.h | ||
21 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG, | ||
22 | DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG, | ||
23 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
24 | |||
25 | +DEF_HELPER_FLAGS_6(gvec_fmls_idx_h, TCG_CALL_NO_RWG, | ||
26 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
27 | +DEF_HELPER_FLAGS_6(gvec_fmls_idx_s, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_6(gvec_fmls_idx_d, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
31 | + | ||
32 | +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_h, TCG_CALL_NO_RWG, | ||
33 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
34 | +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_s, TCG_CALL_NO_RWG, | ||
35 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
36 | +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_d, TCG_CALL_NO_RWG, | ||
37 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
38 | + | ||
39 | DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG, | ||
40 | void, ptr, ptr, ptr, ptr, i32) | ||
41 | DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG, | ||
42 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/arm/tcg/translate-a64.c | ||
45 | +++ b/target/arm/tcg/translate-a64.c | ||
46 | @@ -XXX,XX +XXX,XX @@ TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) | ||
47 | |||
48 | static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) | ||
49 | { | ||
50 | - static gen_helper_gvec_4_ptr * const fns[3] = { | ||
51 | - gen_helper_gvec_fmla_idx_h, | ||
52 | - gen_helper_gvec_fmla_idx_s, | ||
53 | - gen_helper_gvec_fmla_idx_d, | ||
54 | + static gen_helper_gvec_4_ptr * const fns[3][3] = { | ||
55 | + { gen_helper_gvec_fmla_idx_h, | ||
56 | + gen_helper_gvec_fmla_idx_s, | ||
57 | + gen_helper_gvec_fmla_idx_d }, | ||
58 | + { gen_helper_gvec_fmls_idx_h, | ||
59 | + gen_helper_gvec_fmls_idx_s, | ||
60 | + gen_helper_gvec_fmls_idx_d }, | ||
61 | + { gen_helper_gvec_ah_fmls_idx_h, | ||
62 | + gen_helper_gvec_ah_fmls_idx_s, | ||
63 | + gen_helper_gvec_ah_fmls_idx_d }, | ||
64 | }; | ||
65 | MemOp esz = a->esz; | ||
66 | int check = fp_access_check_vector_hsd(s, a->q, esz); | ||
67 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) | ||
68 | |||
69 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
70 | esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
71 | - (a->idx << 1) | neg, | ||
72 | - fns[esz - 1]); | ||
73 | + a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]); | ||
74 | return true; | ||
75 | } | ||
76 | |||
77 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
78 | index XXXXXXX..XXXXXXX 100644 | ||
79 | --- a/target/arm/tcg/translate-sve.c | ||
80 | +++ b/target/arm/tcg/translate-sve.c | ||
81 | @@ -XXX,XX +XXX,XX @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) | ||
82 | *** SVE Floating Point Multiply-Add Indexed Group | ||
83 | */ | ||
84 | |||
85 | -static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) | ||
86 | -{ | ||
87 | - static gen_helper_gvec_4_ptr * const fns[4] = { | ||
88 | - NULL, | ||
89 | - gen_helper_gvec_fmla_idx_h, | ||
90 | - gen_helper_gvec_fmla_idx_s, | ||
91 | - gen_helper_gvec_fmla_idx_d, | ||
92 | - }; | ||
93 | - return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, | ||
94 | - (a->index << 1) | sub, | ||
95 | - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
96 | -} | ||
97 | +static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = { | ||
98 | + NULL, gen_helper_gvec_fmla_idx_h, | ||
99 | + gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d | ||
100 | +}; | ||
101 | +TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, | ||
102 | + fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index, | ||
103 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
104 | |||
105 | -TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) | ||
106 | -TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true) | ||
107 | +static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = { | ||
108 | + { NULL, NULL }, | ||
109 | + { gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h }, | ||
110 | + { gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s }, | ||
111 | + { gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d }, | ||
112 | +}; | ||
113 | +TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz, | ||
114 | + fmls_idx_fns[a->esz][s->fpcr_ah], | ||
115 | + a->rd, a->rn, a->rm, a->ra, a->index, | ||
116 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
117 | |||
118 | /* | ||
119 | *** SVE Floating Point Multiply Indexed Group | ||
120 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
121 | index XXXXXXX..XXXXXXX 100644 | ||
122 | --- a/target/arm/tcg/vec_helper.c | ||
123 | +++ b/target/arm/tcg/vec_helper.c | ||
124 | @@ -XXX,XX +XXX,XX @@ DO_FMUL_IDX(gvec_fmls_nf_idx_s, float32_sub, float32_mul, float32, H4) | ||
125 | |||
126 | #undef DO_FMUL_IDX | ||
127 | |||
128 | -#define DO_FMLA_IDX(NAME, TYPE, H) \ | ||
129 | +#define DO_FMLA_IDX(NAME, TYPE, H, NEGX, NEGF) \ | ||
130 | void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \ | ||
131 | float_status *stat, uint32_t desc) \ | ||
132 | { \ | ||
133 | intptr_t i, j, oprsz = simd_oprsz(desc); \ | ||
134 | intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \ | ||
135 | - TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \ | ||
136 | - intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \ | ||
137 | + intptr_t idx = simd_data(desc); \ | ||
138 | TYPE *d = vd, *n = vn, *m = vm, *a = va; \ | ||
139 | - op1_neg <<= (8 * sizeof(TYPE) - 1); \ | ||
140 | for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ | ||
141 | TYPE mm = m[H(i + idx)]; \ | ||
142 | for (j = 0; j < segment; j++) { \ | ||
143 | - d[i + j] = TYPE##_muladd(n[i + j] ^ op1_neg, \ | ||
144 | - mm, a[i + j], 0, stat); \ | ||
145 | + d[i + j] = TYPE##_muladd(n[i + j] ^ NEGX, mm, \ | ||
146 | + a[i + j], NEGF, stat); \ | ||
147 | } \ | ||
148 | } \ | ||
149 | clear_tail(d, oprsz, simd_maxsz(desc)); \ | ||
150 | } | ||
151 | |||
152 | -DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2) | ||
153 | -DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4) | ||
154 | -DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8) | ||
155 | +DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2, 0, 0) | ||
156 | +DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4, 0, 0) | ||
157 | +DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8, 0, 0) | ||
158 | + | ||
159 | +DO_FMLA_IDX(gvec_fmls_idx_h, float16, H2, INT16_MIN, 0) | ||
160 | +DO_FMLA_IDX(gvec_fmls_idx_s, float32, H4, INT32_MIN, 0) | ||
161 | +DO_FMLA_IDX(gvec_fmls_idx_d, float64, H8, INT64_MIN, 0) | ||
162 | + | ||
163 | +DO_FMLA_IDX(gvec_ah_fmls_idx_h, float16, H2, 0, float_muladd_negate_product) | ||
164 | +DO_FMLA_IDX(gvec_ah_fmls_idx_s, float32, H4, 0, float_muladd_negate_product) | ||
165 | +DO_FMLA_IDX(gvec_ah_fmls_idx_d, float64, H8, 0, float_muladd_negate_product) | ||
166 | |||
167 | #undef DO_FMLA_IDX | ||
168 | |||
169 | -- | ||
170 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics | ||
2 | in FMLS (vector), by implementing a new set of helpers for | ||
3 | the AH=1 case. | ||
4 | 1 | ||
5 | The float_muladd_negate_product flag produces the same result | ||
6 | as negating either of the multiplication operands, assuming | ||
7 | neither of the operands are NaNs. But since FEAT_AFP does not | ||
8 | negate NaNs, this behaviour is exactly what we need. | ||
9 | |||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | ||
13 | target/arm/helper.h | 4 ++++ | ||
14 | target/arm/tcg/translate-a64.c | 7 ++++++- | ||
15 | target/arm/tcg/vec_helper.c | 22 ++++++++++++++++++++++ | ||
16 | 3 files changed, 32 insertions(+), 1 deletion(-) | ||
17 | |||
18 | diff --git a/target/arm/helper.h b/target/arm/helper.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/arm/helper.h | ||
21 | +++ b/target/arm/helper.h | ||
22 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
23 | DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
24 | DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
25 | |||
26 | +DEF_HELPER_FLAGS_5(gvec_ah_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
27 | +DEF_HELPER_FLAGS_5(gvec_ah_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
28 | +DEF_HELPER_FLAGS_5(gvec_ah_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
29 | + | ||
30 | DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG, | ||
31 | void, ptr, ptr, ptr, fpst, i32) | ||
32 | DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG, | ||
33 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/target/arm/tcg/translate-a64.c | ||
36 | +++ b/target/arm/tcg/translate-a64.c | ||
37 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { | ||
38 | gen_helper_gvec_vfms_s, | ||
39 | gen_helper_gvec_vfms_d, | ||
40 | }; | ||
41 | -TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls) | ||
42 | +static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = { | ||
43 | + gen_helper_gvec_ah_vfms_h, | ||
44 | + gen_helper_gvec_ah_vfms_s, | ||
45 | + gen_helper_gvec_ah_vfms_d, | ||
46 | +}; | ||
47 | +TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah) | ||
48 | |||
49 | static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { | ||
50 | gen_helper_gvec_fceq_h, | ||
51 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/target/arm/tcg/vec_helper.c | ||
54 | +++ b/target/arm/tcg/vec_helper.c | ||
55 | @@ -XXX,XX +XXX,XX @@ static float64 float64_mulsub_f(float64 dest, float64 op1, float64 op2, | ||
56 | return float64_muladd(float64_chs(op1), op2, dest, 0, stat); | ||
57 | } | ||
58 | |||
59 | +static float16 float16_ah_mulsub_f(float16 dest, float16 op1, float16 op2, | ||
60 | + float_status *stat) | ||
61 | +{ | ||
62 | + return float16_muladd(op1, op2, dest, float_muladd_negate_product, stat); | ||
63 | +} | ||
64 | + | ||
65 | +static float32 float32_ah_mulsub_f(float32 dest, float32 op1, float32 op2, | ||
66 | + float_status *stat) | ||
67 | +{ | ||
68 | + return float32_muladd(op1, op2, dest, float_muladd_negate_product, stat); | ||
69 | +} | ||
70 | + | ||
71 | +static float64 float64_ah_mulsub_f(float64 dest, float64 op1, float64 op2, | ||
72 | + float_status *stat) | ||
73 | +{ | ||
74 | + return float64_muladd(op1, op2, dest, float_muladd_negate_product, stat); | ||
75 | +} | ||
76 | + | ||
77 | #define DO_MULADD(NAME, FUNC, TYPE) \ | ||
78 | void HELPER(NAME)(void *vd, void *vn, void *vm, \ | ||
79 | float_status *stat, uint32_t desc) \ | ||
80 | @@ -XXX,XX +XXX,XX @@ DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16) | ||
81 | DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32) | ||
82 | DO_MULADD(gvec_vfms_d, float64_mulsub_f, float64) | ||
83 | |||
84 | +DO_MULADD(gvec_ah_vfms_h, float16_ah_mulsub_f, float16) | ||
85 | +DO_MULADD(gvec_ah_vfms_s, float32_ah_mulsub_f, float32) | ||
86 | +DO_MULADD(gvec_ah_vfms_d, float64_ah_mulsub_f, float64) | ||
87 | + | ||
88 | /* For the indexed ops, SVE applies the index per 128-bit vector segment. | ||
89 | * For AdvSIMD, there is of course only one such vector segment. | ||
90 | */ | ||
91 | -- | ||
92 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics fro the | ||
2 | SVE FMLS (vector) insns, by providing new helpers for the AH=1 case | ||
3 | which end up passing fpcr_ah = true to the do_fmla_zpzzz_* functions | ||
4 | that do the work. | ||
5 | 1 | ||
6 | The float*_muladd functions have a flags argument that can | ||
7 | perform optional negation of various operand. We don't use | ||
8 | that for "normal" arm fmla, because the muladd flags are not | ||
9 | applied when an input is a NaN. But since FEAT_AFP does not | ||
10 | negate NaNs, this behaviour is exactly what we need. | ||
11 | |||
12 | The non-AH helpers pass in a zero flags argument and control the | ||
13 | negation via the neg1 and neg3 arguments; the AH helpers always pass | ||
14 | in neg1 and neg3 as zero and control the negation via the flags | ||
15 | argument. This allows us to avoid conditional branches within the | ||
16 | inner loop. | ||
17 | |||
18 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
19 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
20 | --- | ||
21 | target/arm/tcg/helper-sve.h | 21 ++++++++ | ||
22 | target/arm/tcg/sve_helper.c | 99 +++++++++++++++++++++++++++------- | ||
23 | target/arm/tcg/translate-sve.c | 18 ++++--- | ||
24 | 3 files changed, 114 insertions(+), 24 deletions(-) | ||
25 | |||
26 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/target/arm/tcg/helper-sve.h | ||
29 | +++ b/target/arm/tcg/helper-sve.h | ||
30 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, | ||
31 | DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, | ||
32 | void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
33 | |||
34 | +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG, | ||
35 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
36 | +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG, | ||
37 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
38 | +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG, | ||
39 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
40 | + | ||
41 | +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG, | ||
42 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
43 | +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG, | ||
44 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
45 | +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG, | ||
46 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
47 | + | ||
48 | +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG, | ||
49 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
50 | +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG, | ||
51 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
52 | +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_d, TCG_CALL_NO_RWG, | ||
53 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
54 | + | ||
55 | DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG, | ||
56 | void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
57 | DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG, | ||
58 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
59 | index XXXXXXX..XXXXXXX 100644 | ||
60 | --- a/target/arm/tcg/sve_helper.c | ||
61 | +++ b/target/arm/tcg/sve_helper.c | ||
62 | @@ -XXX,XX +XXX,XX @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int) | ||
63 | |||
64 | static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, | ||
65 | float_status *status, uint32_t desc, | ||
66 | - uint16_t neg1, uint16_t neg3) | ||
67 | + uint16_t neg1, uint16_t neg3, int flags) | ||
68 | { | ||
69 | intptr_t i = simd_oprsz(desc); | ||
70 | uint64_t *g = vg; | ||
71 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, | ||
72 | e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1; | ||
73 | e2 = *(uint16_t *)(vm + H1_2(i)); | ||
74 | e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3; | ||
75 | - r = float16_muladd(e1, e2, e3, 0, status); | ||
76 | + r = float16_muladd(e1, e2, e3, flags, status); | ||
77 | *(uint16_t *)(vd + H1_2(i)) = r; | ||
78 | } | ||
79 | } while (i & 63); | ||
80 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, | ||
81 | void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
82 | void *vg, float_status *status, uint32_t desc) | ||
83 | { | ||
84 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0); | ||
85 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, 0); | ||
86 | } | ||
87 | |||
88 | void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
89 | void *vg, float_status *status, uint32_t desc) | ||
90 | { | ||
91 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0); | ||
92 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0); | ||
93 | } | ||
94 | |||
95 | void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
96 | void *vg, float_status *status, uint32_t desc) | ||
97 | { | ||
98 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000); | ||
99 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0); | ||
100 | } | ||
101 | |||
102 | void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
103 | void *vg, float_status *status, uint32_t desc) | ||
104 | { | ||
105 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000); | ||
106 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0); | ||
107 | +} | ||
108 | + | ||
109 | +void HELPER(sve_ah_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
110 | + void *vg, float_status *status, uint32_t desc) | ||
111 | +{ | ||
112 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
113 | + float_muladd_negate_product); | ||
114 | +} | ||
115 | + | ||
116 | +void HELPER(sve_ah_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
117 | + void *vg, float_status *status, uint32_t desc) | ||
118 | +{ | ||
119 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
120 | + float_muladd_negate_product | float_muladd_negate_c); | ||
121 | +} | ||
122 | + | ||
123 | +void HELPER(sve_ah_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
124 | + void *vg, float_status *status, uint32_t desc) | ||
125 | +{ | ||
126 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
127 | + float_muladd_negate_c); | ||
128 | } | ||
129 | |||
130 | static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, | ||
131 | float_status *status, uint32_t desc, | ||
132 | - uint32_t neg1, uint32_t neg3) | ||
133 | + uint32_t neg1, uint32_t neg3, int flags) | ||
134 | { | ||
135 | intptr_t i = simd_oprsz(desc); | ||
136 | uint64_t *g = vg; | ||
137 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, | ||
138 | e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1; | ||
139 | e2 = *(uint32_t *)(vm + H1_4(i)); | ||
140 | e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3; | ||
141 | - r = float32_muladd(e1, e2, e3, 0, status); | ||
142 | + r = float32_muladd(e1, e2, e3, flags, status); | ||
143 | *(uint32_t *)(vd + H1_4(i)) = r; | ||
144 | } | ||
145 | } while (i & 63); | ||
146 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, | ||
147 | void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
148 | void *vg, float_status *status, uint32_t desc) | ||
149 | { | ||
150 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0); | ||
151 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, 0); | ||
152 | } | ||
153 | |||
154 | void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
155 | void *vg, float_status *status, uint32_t desc) | ||
156 | { | ||
157 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0); | ||
158 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, 0); | ||
159 | } | ||
160 | |||
161 | void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
162 | void *vg, float_status *status, uint32_t desc) | ||
163 | { | ||
164 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000); | ||
165 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, 0); | ||
166 | } | ||
167 | |||
168 | void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
169 | void *vg, float_status *status, uint32_t desc) | ||
170 | { | ||
171 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000); | ||
172 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, 0); | ||
173 | +} | ||
174 | + | ||
175 | +void HELPER(sve_ah_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
176 | + void *vg, float_status *status, uint32_t desc) | ||
177 | +{ | ||
178 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
179 | + float_muladd_negate_product); | ||
180 | +} | ||
181 | + | ||
182 | +void HELPER(sve_ah_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
183 | + void *vg, float_status *status, uint32_t desc) | ||
184 | +{ | ||
185 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
186 | + float_muladd_negate_product | float_muladd_negate_c); | ||
187 | +} | ||
188 | + | ||
189 | +void HELPER(sve_ah_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
190 | + void *vg, float_status *status, uint32_t desc) | ||
191 | +{ | ||
192 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
193 | + float_muladd_negate_c); | ||
194 | } | ||
195 | |||
196 | static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, | ||
197 | float_status *status, uint32_t desc, | ||
198 | - uint64_t neg1, uint64_t neg3) | ||
199 | + uint64_t neg1, uint64_t neg3, int flags) | ||
200 | { | ||
201 | intptr_t i = simd_oprsz(desc); | ||
202 | uint64_t *g = vg; | ||
203 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, | ||
204 | e1 = *(uint64_t *)(vn + i) ^ neg1; | ||
205 | e2 = *(uint64_t *)(vm + i); | ||
206 | e3 = *(uint64_t *)(va + i) ^ neg3; | ||
207 | - r = float64_muladd(e1, e2, e3, 0, status); | ||
208 | + r = float64_muladd(e1, e2, e3, flags, status); | ||
209 | *(uint64_t *)(vd + i) = r; | ||
210 | } | ||
211 | } while (i & 63); | ||
212 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, | ||
213 | void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
214 | void *vg, float_status *status, uint32_t desc) | ||
215 | { | ||
216 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0); | ||
217 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, 0); | ||
218 | } | ||
219 | |||
220 | void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
221 | void *vg, float_status *status, uint32_t desc) | ||
222 | { | ||
223 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0); | ||
224 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, 0); | ||
225 | } | ||
226 | |||
227 | void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
228 | void *vg, float_status *status, uint32_t desc) | ||
229 | { | ||
230 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN); | ||
231 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, 0); | ||
232 | } | ||
233 | |||
234 | void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
235 | void *vg, float_status *status, uint32_t desc) | ||
236 | { | ||
237 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN); | ||
238 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, 0); | ||
239 | +} | ||
240 | + | ||
241 | +void HELPER(sve_ah_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
242 | + void *vg, float_status *status, uint32_t desc) | ||
243 | +{ | ||
244 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
245 | + float_muladd_negate_product); | ||
246 | +} | ||
247 | + | ||
248 | +void HELPER(sve_ah_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
249 | + void *vg, float_status *status, uint32_t desc) | ||
250 | +{ | ||
251 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
252 | + float_muladd_negate_product | float_muladd_negate_c); | ||
253 | +} | ||
254 | + | ||
255 | +void HELPER(sve_ah_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
256 | + void *vg, float_status *status, uint32_t desc) | ||
257 | +{ | ||
258 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
259 | + float_muladd_negate_c); | ||
260 | } | ||
261 | |||
262 | /* Two operand floating-point comparison controlled by a predicate. | ||
263 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
264 | index XXXXXXX..XXXXXXX 100644 | ||
265 | --- a/target/arm/tcg/translate-sve.c | ||
266 | +++ b/target/arm/tcg/translate-sve.c | ||
267 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], | ||
268 | a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), | ||
269 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
270 | |||
271 | -#define DO_FMLA(NAME, name) \ | ||
272 | +#define DO_FMLA(NAME, name, ah_name) \ | ||
273 | static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ | ||
274 | NULL, gen_helper_sve_##name##_h, \ | ||
275 | gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ | ||
276 | }; \ | ||
277 | - TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \ | ||
278 | + static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \ | ||
279 | + NULL, gen_helper_sve_##ah_name##_h, \ | ||
280 | + gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \ | ||
281 | + }; \ | ||
282 | + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \ | ||
283 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \ | ||
284 | a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ | ||
285 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
286 | |||
287 | -DO_FMLA(FMLA_zpzzz, fmla_zpzzz) | ||
288 | -DO_FMLA(FMLS_zpzzz, fmls_zpzzz) | ||
289 | -DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz) | ||
290 | -DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) | ||
291 | +/* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */ | ||
292 | +DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz) | ||
293 | +DO_FMLA(FMLS_zpzzz, fmls_zpzzz, ah_fmls_zpzzz) | ||
294 | +DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz, ah_fnmla_zpzzz) | ||
295 | +DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz, ah_fnmls_zpzzz) | ||
296 | |||
297 | #undef DO_FMLA | ||
298 | |||
299 | -- | ||
300 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | The negation step in the SVE FTSSEL insn mustn't negate a NaN when | ||
2 | FPCR.AH is set. Pass FPCR.AH to the helper via the SIMD data field | ||
3 | and use that to determine whether to do the negation. | ||
4 | 1 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/sve_helper.c | 18 +++++++++++++++--- | ||
9 | target/arm/tcg/translate-sve.c | 4 ++-- | ||
10 | 2 files changed, 17 insertions(+), 5 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/sve_helper.c | ||
15 | +++ b/target/arm/tcg/sve_helper.c | ||
16 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fexpa_d)(void *vd, void *vn, uint32_t desc) | ||
17 | void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc) | ||
18 | { | ||
19 | intptr_t i, opr_sz = simd_oprsz(desc) / 2; | ||
20 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
21 | uint16_t *d = vd, *n = vn, *m = vm; | ||
22 | for (i = 0; i < opr_sz; i += 1) { | ||
23 | uint16_t nn = n[i]; | ||
24 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc) | ||
25 | if (mm & 1) { | ||
26 | nn = float16_one; | ||
27 | } | ||
28 | - d[i] = nn ^ (mm & 2) << 14; | ||
29 | + if (mm & 2) { | ||
30 | + nn = float16_maybe_ah_chs(nn, fpcr_ah); | ||
31 | + } | ||
32 | + d[i] = nn; | ||
33 | } | ||
34 | } | ||
35 | |||
36 | void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc) | ||
37 | { | ||
38 | intptr_t i, opr_sz = simd_oprsz(desc) / 4; | ||
39 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
40 | uint32_t *d = vd, *n = vn, *m = vm; | ||
41 | for (i = 0; i < opr_sz; i += 1) { | ||
42 | uint32_t nn = n[i]; | ||
43 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc) | ||
44 | if (mm & 1) { | ||
45 | nn = float32_one; | ||
46 | } | ||
47 | - d[i] = nn ^ (mm & 2) << 30; | ||
48 | + if (mm & 2) { | ||
49 | + nn = float32_maybe_ah_chs(nn, fpcr_ah); | ||
50 | + } | ||
51 | + d[i] = nn; | ||
52 | } | ||
53 | } | ||
54 | |||
55 | void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc) | ||
56 | { | ||
57 | intptr_t i, opr_sz = simd_oprsz(desc) / 8; | ||
58 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
59 | uint64_t *d = vd, *n = vn, *m = vm; | ||
60 | for (i = 0; i < opr_sz; i += 1) { | ||
61 | uint64_t nn = n[i]; | ||
62 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc) | ||
63 | if (mm & 1) { | ||
64 | nn = float64_one; | ||
65 | } | ||
66 | - d[i] = nn ^ (mm & 2) << 62; | ||
67 | + if (mm & 2) { | ||
68 | + nn = float64_maybe_ah_chs(nn, fpcr_ah); | ||
69 | + } | ||
70 | + d[i] = nn; | ||
71 | } | ||
72 | } | ||
73 | |||
74 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/target/arm/tcg/translate-sve.c | ||
77 | +++ b/target/arm/tcg/translate-sve.c | ||
78 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2 * const fexpa_fns[4] = { | ||
79 | gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, | ||
80 | }; | ||
81 | TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, | ||
82 | - fexpa_fns[a->esz], a->rd, a->rn, 0) | ||
83 | + fexpa_fns[a->esz], a->rd, a->rn, s->fpcr_ah) | ||
84 | |||
85 | static gen_helper_gvec_3 * const ftssel_fns[4] = { | ||
86 | NULL, gen_helper_sve_ftssel_h, | ||
87 | gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, | ||
88 | }; | ||
89 | TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, | ||
90 | - ftssel_fns[a->esz], a, 0) | ||
91 | + ftssel_fns[a->esz], a, s->fpcr_ah) | ||
92 | |||
93 | /* | ||
94 | *** SVE Predicate Logical Operations Group | ||
95 | -- | ||
96 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | The negation step in the SVE FTMAD insn mustn't negate a NaN when | ||
2 | FPCR.AH is set. Pass FPCR.AH to the helper via the SIMD data field, | ||
3 | so we can select the correct behaviour. | ||
4 | 1 | ||
5 | Because the operand is known to be negative, negating the operand | ||
6 | is the same as taking the absolute value. Defer this to the muladd | ||
7 | operation via flags, so that it happens after NaN detection, which | ||
8 | is correct for FPCR.AH. | ||
9 | |||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | ||
13 | target/arm/tcg/sve_helper.c | 42 ++++++++++++++++++++++++++-------- | ||
14 | target/arm/tcg/translate-sve.c | 3 ++- | ||
15 | 2 files changed, 35 insertions(+), 10 deletions(-) | ||
16 | |||
17 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/arm/tcg/sve_helper.c | ||
20 | +++ b/target/arm/tcg/sve_helper.c | ||
21 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm, | ||
22 | 0x3c00, 0xb800, 0x293a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | ||
23 | }; | ||
24 | intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float16); | ||
25 | - intptr_t x = simd_data(desc); | ||
26 | + intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); | ||
27 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); | ||
28 | float16 *d = vd, *n = vn, *m = vm; | ||
29 | + | ||
30 | for (i = 0; i < opr_sz; i++) { | ||
31 | float16 mm = m[i]; | ||
32 | intptr_t xx = x; | ||
33 | + int flags = 0; | ||
34 | + | ||
35 | if (float16_is_neg(mm)) { | ||
36 | - mm = float16_abs(mm); | ||
37 | + if (fpcr_ah) { | ||
38 | + flags = float_muladd_negate_product; | ||
39 | + } else { | ||
40 | + mm = float16_abs(mm); | ||
41 | + } | ||
42 | xx += 8; | ||
43 | } | ||
44 | - d[i] = float16_muladd(n[i], mm, coeff[xx], 0, s); | ||
45 | + d[i] = float16_muladd(n[i], mm, coeff[xx], flags, s); | ||
46 | } | ||
47 | } | ||
48 | |||
49 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, | ||
50 | 0x37cd37cc, 0x00000000, 0x00000000, 0x00000000, | ||
51 | }; | ||
52 | intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float32); | ||
53 | - intptr_t x = simd_data(desc); | ||
54 | + intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); | ||
55 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); | ||
56 | float32 *d = vd, *n = vn, *m = vm; | ||
57 | + | ||
58 | for (i = 0; i < opr_sz; i++) { | ||
59 | float32 mm = m[i]; | ||
60 | intptr_t xx = x; | ||
61 | + int flags = 0; | ||
62 | + | ||
63 | if (float32_is_neg(mm)) { | ||
64 | - mm = float32_abs(mm); | ||
65 | + if (fpcr_ah) { | ||
66 | + flags = float_muladd_negate_product; | ||
67 | + } else { | ||
68 | + mm = float32_abs(mm); | ||
69 | + } | ||
70 | xx += 8; | ||
71 | } | ||
72 | - d[i] = float32_muladd(n[i], mm, coeff[xx], 0, s); | ||
73 | + d[i] = float32_muladd(n[i], mm, coeff[xx], flags, s); | ||
74 | } | ||
75 | } | ||
76 | |||
77 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, | ||
78 | 0x3e21ee96d2641b13ull, 0xbda8f76380fbb401ull, | ||
79 | }; | ||
80 | intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float64); | ||
81 | - intptr_t x = simd_data(desc); | ||
82 | + intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); | ||
83 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); | ||
84 | float64 *d = vd, *n = vn, *m = vm; | ||
85 | + | ||
86 | for (i = 0; i < opr_sz; i++) { | ||
87 | float64 mm = m[i]; | ||
88 | intptr_t xx = x; | ||
89 | + int flags = 0; | ||
90 | + | ||
91 | if (float64_is_neg(mm)) { | ||
92 | - mm = float64_abs(mm); | ||
93 | + if (fpcr_ah) { | ||
94 | + flags = float_muladd_negate_product; | ||
95 | + } else { | ||
96 | + mm = float64_abs(mm); | ||
97 | + } | ||
98 | xx += 8; | ||
99 | } | ||
100 | - d[i] = float64_muladd(n[i], mm, coeff[xx], 0, s); | ||
101 | + d[i] = float64_muladd(n[i], mm, coeff[xx], flags, s); | ||
102 | } | ||
103 | } | ||
104 | |||
105 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
106 | index XXXXXXX..XXXXXXX 100644 | ||
107 | --- a/target/arm/tcg/translate-sve.c | ||
108 | +++ b/target/arm/tcg/translate-sve.c | ||
109 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { | ||
110 | gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, | ||
111 | }; | ||
112 | TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, | ||
113 | - ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm, | ||
114 | + ftmad_fns[a->esz], a->rd, a->rn, a->rm, | ||
115 | + a->imm | (s->fpcr_ah << 3), | ||
116 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
117 | |||
118 | /* | ||
119 | -- | ||
120 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | The negation step in FCMLA mustn't negate a NaN when FPCR.AH | ||
4 | is set. Handle this by passing FPCR.AH to the helper via the | ||
5 | SIMD data field, and use this to select whether to do the | ||
6 | negation via XOR or via the muladd negate_product flag. | ||
7 | |||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-id: 20250129013857.135256-26-richard.henderson@linaro.org | ||
10 | [PMM: Expanded commit message] | ||
11 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | --- | ||
14 | target/arm/tcg/translate-a64.c | 2 +- | ||
15 | target/arm/tcg/vec_helper.c | 66 ++++++++++++++++++++-------------- | ||
16 | 2 files changed, 40 insertions(+), 28 deletions(-) | ||
17 | |||
18 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/arm/tcg/translate-a64.c | ||
21 | +++ b/target/arm/tcg/translate-a64.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) | ||
23 | |||
24 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
25 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
26 | - a->rot, fn[a->esz]); | ||
27 | + a->rot | (s->fpcr_ah << 2), fn[a->esz]); | ||
28 | return true; | ||
29 | } | ||
30 | |||
31 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/target/arm/tcg/vec_helper.c | ||
34 | +++ b/target/arm/tcg/vec_helper.c | ||
35 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlah)(void *vd, void *vn, void *vm, void *va, | ||
36 | uintptr_t opr_sz = simd_oprsz(desc); | ||
37 | float16 *d = vd, *n = vn, *m = vm, *a = va; | ||
38 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
39 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
40 | - uint32_t neg_real = flip ^ neg_imag; | ||
41 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
42 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
43 | + uint32_t negf_real = flip ^ negf_imag; | ||
44 | + float16 negx_imag, negx_real; | ||
45 | uintptr_t i; | ||
46 | |||
47 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
48 | - neg_real <<= 15; | ||
49 | - neg_imag <<= 15; | ||
50 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
51 | + negx_real = (negf_real & ~fpcr_ah) << 15; | ||
52 | + negx_imag = (negf_imag & ~fpcr_ah) << 15; | ||
53 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
54 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
55 | |||
56 | for (i = 0; i < opr_sz / 2; i += 2) { | ||
57 | float16 e2 = n[H2(i + flip)]; | ||
58 | - float16 e1 = m[H2(i + flip)] ^ neg_real; | ||
59 | + float16 e1 = m[H2(i + flip)] ^ negx_real; | ||
60 | float16 e4 = e2; | ||
61 | - float16 e3 = m[H2(i + 1 - flip)] ^ neg_imag; | ||
62 | + float16 e3 = m[H2(i + 1 - flip)] ^ negx_imag; | ||
63 | |||
64 | - d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], 0, fpst); | ||
65 | - d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], 0, fpst); | ||
66 | + d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], negf_real, fpst); | ||
67 | + d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], negf_imag, fpst); | ||
68 | } | ||
69 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
70 | } | ||
71 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlas)(void *vd, void *vn, void *vm, void *va, | ||
72 | uintptr_t opr_sz = simd_oprsz(desc); | ||
73 | float32 *d = vd, *n = vn, *m = vm, *a = va; | ||
74 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
75 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
76 | - uint32_t neg_real = flip ^ neg_imag; | ||
77 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
78 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
79 | + uint32_t negf_real = flip ^ negf_imag; | ||
80 | + float32 negx_imag, negx_real; | ||
81 | uintptr_t i; | ||
82 | |||
83 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
84 | - neg_real <<= 31; | ||
85 | - neg_imag <<= 31; | ||
86 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
87 | + negx_real = (negf_real & ~fpcr_ah) << 31; | ||
88 | + negx_imag = (negf_imag & ~fpcr_ah) << 31; | ||
89 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
90 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
91 | |||
92 | for (i = 0; i < opr_sz / 4; i += 2) { | ||
93 | float32 e2 = n[H4(i + flip)]; | ||
94 | - float32 e1 = m[H4(i + flip)] ^ neg_real; | ||
95 | + float32 e1 = m[H4(i + flip)] ^ negx_real; | ||
96 | float32 e4 = e2; | ||
97 | - float32 e3 = m[H4(i + 1 - flip)] ^ neg_imag; | ||
98 | + float32 e3 = m[H4(i + 1 - flip)] ^ negx_imag; | ||
99 | |||
100 | - d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], 0, fpst); | ||
101 | - d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], 0, fpst); | ||
102 | + d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], negf_real, fpst); | ||
103 | + d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], negf_imag, fpst); | ||
104 | } | ||
105 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
106 | } | ||
107 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm, void *va, | ||
108 | uintptr_t opr_sz = simd_oprsz(desc); | ||
109 | float64 *d = vd, *n = vn, *m = vm, *a = va; | ||
110 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
111 | - uint64_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
112 | - uint64_t neg_real = flip ^ neg_imag; | ||
113 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
114 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
115 | + uint32_t negf_real = flip ^ negf_imag; | ||
116 | + float64 negx_real, negx_imag; | ||
117 | uintptr_t i; | ||
118 | |||
119 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
120 | - neg_real <<= 63; | ||
121 | - neg_imag <<= 63; | ||
122 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
123 | + negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63; | ||
124 | + negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63; | ||
125 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
126 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
127 | |||
128 | for (i = 0; i < opr_sz / 8; i += 2) { | ||
129 | float64 e2 = n[i + flip]; | ||
130 | - float64 e1 = m[i + flip] ^ neg_real; | ||
131 | + float64 e1 = m[i + flip] ^ negx_real; | ||
132 | float64 e4 = e2; | ||
133 | - float64 e3 = m[i + 1 - flip] ^ neg_imag; | ||
134 | + float64 e3 = m[i + 1 - flip] ^ negx_imag; | ||
135 | |||
136 | - d[i] = float64_muladd(e2, e1, a[i], 0, fpst); | ||
137 | - d[i + 1] = float64_muladd(e4, e3, a[i + 1], 0, fpst); | ||
138 | + d[i] = float64_muladd(e2, e1, a[i], negf_real, fpst); | ||
139 | + d[i + 1] = float64_muladd(e4, e3, a[i + 1], negf_imag, fpst); | ||
140 | } | ||
141 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
142 | } | ||
143 | -- | ||
144 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | The negation step in FCMLA by index mustn't negate a NaN when | ||
4 | FPCR.AH is set. Use the same approach as vector FCMLA of | ||
5 | passing in FPCR.AH and using it to select whether to negate | ||
6 | by XOR or by the muladd negate_product flag. | ||
7 | |||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-id: 20250129013857.135256-27-richard.henderson@linaro.org | ||
10 | [PMM: Expanded commit message] | ||
11 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | --- | ||
14 | target/arm/tcg/translate-a64.c | 2 +- | ||
15 | target/arm/tcg/vec_helper.c | 44 ++++++++++++++++++++-------------- | ||
16 | 2 files changed, 27 insertions(+), 19 deletions(-) | ||
17 | |||
18 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/arm/tcg/translate-a64.c | ||
21 | +++ b/target/arm/tcg/translate-a64.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) | ||
23 | if (fp_access_check(s)) { | ||
24 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
25 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
26 | - (a->idx << 2) | a->rot, fn); | ||
27 | + (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn); | ||
28 | } | ||
29 | return true; | ||
30 | } | ||
31 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/target/arm/tcg/vec_helper.c | ||
34 | +++ b/target/arm/tcg/vec_helper.c | ||
35 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, void *va, | ||
36 | uintptr_t opr_sz = simd_oprsz(desc); | ||
37 | float16 *d = vd, *n = vn, *m = vm, *a = va; | ||
38 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
39 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
40 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
41 | intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2); | ||
42 | - uint32_t neg_real = flip ^ neg_imag; | ||
43 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1); | ||
44 | + uint32_t negf_real = flip ^ negf_imag; | ||
45 | intptr_t elements = opr_sz / sizeof(float16); | ||
46 | intptr_t eltspersegment = MIN(16 / sizeof(float16), elements); | ||
47 | + float16 negx_imag, negx_real; | ||
48 | intptr_t i, j; | ||
49 | |||
50 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
51 | - neg_real <<= 15; | ||
52 | - neg_imag <<= 15; | ||
53 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
54 | + negx_real = (negf_real & ~fpcr_ah) << 15; | ||
55 | + negx_imag = (negf_imag & ~fpcr_ah) << 15; | ||
56 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
57 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
58 | |||
59 | for (i = 0; i < elements; i += eltspersegment) { | ||
60 | float16 mr = m[H2(i + 2 * index + 0)]; | ||
61 | float16 mi = m[H2(i + 2 * index + 1)]; | ||
62 | - float16 e1 = neg_real ^ (flip ? mi : mr); | ||
63 | - float16 e3 = neg_imag ^ (flip ? mr : mi); | ||
64 | + float16 e1 = negx_real ^ (flip ? mi : mr); | ||
65 | + float16 e3 = negx_imag ^ (flip ? mr : mi); | ||
66 | |||
67 | for (j = i; j < i + eltspersegment; j += 2) { | ||
68 | float16 e2 = n[H2(j + flip)]; | ||
69 | float16 e4 = e2; | ||
70 | |||
71 | - d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], 0, fpst); | ||
72 | - d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], 0, fpst); | ||
73 | + d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], negf_real, fpst); | ||
74 | + d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], negf_imag, fpst); | ||
75 | } | ||
76 | } | ||
77 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
78 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, void *va, | ||
79 | uintptr_t opr_sz = simd_oprsz(desc); | ||
80 | float32 *d = vd, *n = vn, *m = vm, *a = va; | ||
81 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
82 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
83 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
84 | intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2); | ||
85 | - uint32_t neg_real = flip ^ neg_imag; | ||
86 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1); | ||
87 | + uint32_t negf_real = flip ^ negf_imag; | ||
88 | intptr_t elements = opr_sz / sizeof(float32); | ||
89 | intptr_t eltspersegment = MIN(16 / sizeof(float32), elements); | ||
90 | + float32 negx_imag, negx_real; | ||
91 | intptr_t i, j; | ||
92 | |||
93 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
94 | - neg_real <<= 31; | ||
95 | - neg_imag <<= 31; | ||
96 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
97 | + negx_real = (negf_real & ~fpcr_ah) << 31; | ||
98 | + negx_imag = (negf_imag & ~fpcr_ah) << 31; | ||
99 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
100 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
101 | |||
102 | for (i = 0; i < elements; i += eltspersegment) { | ||
103 | float32 mr = m[H4(i + 2 * index + 0)]; | ||
104 | float32 mi = m[H4(i + 2 * index + 1)]; | ||
105 | - float32 e1 = neg_real ^ (flip ? mi : mr); | ||
106 | - float32 e3 = neg_imag ^ (flip ? mr : mi); | ||
107 | + float32 e1 = negx_real ^ (flip ? mi : mr); | ||
108 | + float32 e3 = negx_imag ^ (flip ? mr : mi); | ||
109 | |||
110 | for (j = i; j < i + eltspersegment; j += 2) { | ||
111 | float32 e2 = n[H4(j + flip)]; | ||
112 | float32 e4 = e2; | ||
113 | |||
114 | - d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], 0, fpst); | ||
115 | - d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], 0, fpst); | ||
116 | + d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], negf_real, fpst); | ||
117 | + d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], negf_imag, fpst); | ||
118 | } | ||
119 | } | ||
120 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
121 | -- | ||
122 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | The negation step in SVE FCMLA mustn't negate a NaN when FPCR.AH is | ||
4 | set. Use the same approach as we did for A64 FCMLA of passing in | ||
5 | FPCR.AH and using it to select whether to negate by XOR or by the | ||
6 | muladd negate_product flag. | ||
7 | |||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-id: 20250129013857.135256-28-richard.henderson@linaro.org | ||
10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | --- | ||
13 | target/arm/tcg/sve_helper.c | 69 +++++++++++++++++++++------------- | ||
14 | target/arm/tcg/translate-sve.c | 2 +- | ||
15 | 2 files changed, 43 insertions(+), 28 deletions(-) | ||
16 | |||
17 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/arm/tcg/sve_helper.c | ||
20 | +++ b/target/arm/tcg/sve_helper.c | ||
21 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
22 | void *vg, float_status *status, uint32_t desc) | ||
23 | { | ||
24 | intptr_t j, i = simd_oprsz(desc); | ||
25 | - unsigned rot = simd_data(desc); | ||
26 | - bool flip = rot & 1; | ||
27 | - float16 neg_imag, neg_real; | ||
28 | + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
29 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
30 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
31 | + uint32_t negf_real = flip ^ negf_imag; | ||
32 | + float16 negx_imag, negx_real; | ||
33 | uint64_t *g = vg; | ||
34 | |||
35 | - neg_imag = float16_set_sign(0, (rot & 2) != 0); | ||
36 | - neg_real = float16_set_sign(0, rot == 1 || rot == 2); | ||
37 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
38 | + negx_real = (negf_real & ~fpcr_ah) << 15; | ||
39 | + negx_imag = (negf_imag & ~fpcr_ah) << 15; | ||
40 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
41 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
42 | |||
43 | do { | ||
44 | uint64_t pg = g[(i - 1) >> 6]; | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
46 | mi = *(float16 *)(vm + H1_2(j)); | ||
47 | |||
48 | e2 = (flip ? ni : nr); | ||
49 | - e1 = (flip ? mi : mr) ^ neg_real; | ||
50 | + e1 = (flip ? mi : mr) ^ negx_real; | ||
51 | e4 = e2; | ||
52 | - e3 = (flip ? mr : mi) ^ neg_imag; | ||
53 | + e3 = (flip ? mr : mi) ^ negx_imag; | ||
54 | |||
55 | if (likely((pg >> (i & 63)) & 1)) { | ||
56 | d = *(float16 *)(va + H1_2(i)); | ||
57 | - d = float16_muladd(e2, e1, d, 0, status); | ||
58 | + d = float16_muladd(e2, e1, d, negf_real, status); | ||
59 | *(float16 *)(vd + H1_2(i)) = d; | ||
60 | } | ||
61 | if (likely((pg >> (j & 63)) & 1)) { | ||
62 | d = *(float16 *)(va + H1_2(j)); | ||
63 | - d = float16_muladd(e4, e3, d, 0, status); | ||
64 | + d = float16_muladd(e4, e3, d, negf_imag, status); | ||
65 | *(float16 *)(vd + H1_2(j)) = d; | ||
66 | } | ||
67 | } while (i & 63); | ||
68 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
69 | void *vg, float_status *status, uint32_t desc) | ||
70 | { | ||
71 | intptr_t j, i = simd_oprsz(desc); | ||
72 | - unsigned rot = simd_data(desc); | ||
73 | - bool flip = rot & 1; | ||
74 | - float32 neg_imag, neg_real; | ||
75 | + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
76 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
77 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
78 | + uint32_t negf_real = flip ^ negf_imag; | ||
79 | + float32 negx_imag, negx_real; | ||
80 | uint64_t *g = vg; | ||
81 | |||
82 | - neg_imag = float32_set_sign(0, (rot & 2) != 0); | ||
83 | - neg_real = float32_set_sign(0, rot == 1 || rot == 2); | ||
84 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
85 | + negx_real = (negf_real & ~fpcr_ah) << 31; | ||
86 | + negx_imag = (negf_imag & ~fpcr_ah) << 31; | ||
87 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
88 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
89 | |||
90 | do { | ||
91 | uint64_t pg = g[(i - 1) >> 6]; | ||
92 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
93 | mi = *(float32 *)(vm + H1_2(j)); | ||
94 | |||
95 | e2 = (flip ? ni : nr); | ||
96 | - e1 = (flip ? mi : mr) ^ neg_real; | ||
97 | + e1 = (flip ? mi : mr) ^ negx_real; | ||
98 | e4 = e2; | ||
99 | - e3 = (flip ? mr : mi) ^ neg_imag; | ||
100 | + e3 = (flip ? mr : mi) ^ negx_imag; | ||
101 | |||
102 | if (likely((pg >> (i & 63)) & 1)) { | ||
103 | d = *(float32 *)(va + H1_2(i)); | ||
104 | - d = float32_muladd(e2, e1, d, 0, status); | ||
105 | + d = float32_muladd(e2, e1, d, negf_real, status); | ||
106 | *(float32 *)(vd + H1_2(i)) = d; | ||
107 | } | ||
108 | if (likely((pg >> (j & 63)) & 1)) { | ||
109 | d = *(float32 *)(va + H1_2(j)); | ||
110 | - d = float32_muladd(e4, e3, d, 0, status); | ||
111 | + d = float32_muladd(e4, e3, d, negf_imag, status); | ||
112 | *(float32 *)(vd + H1_2(j)) = d; | ||
113 | } | ||
114 | } while (i & 63); | ||
115 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
116 | void *vg, float_status *status, uint32_t desc) | ||
117 | { | ||
118 | intptr_t j, i = simd_oprsz(desc); | ||
119 | - unsigned rot = simd_data(desc); | ||
120 | - bool flip = rot & 1; | ||
121 | - float64 neg_imag, neg_real; | ||
122 | + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
123 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
124 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
125 | + uint32_t negf_real = flip ^ negf_imag; | ||
126 | + float64 negx_imag, negx_real; | ||
127 | uint64_t *g = vg; | ||
128 | |||
129 | - neg_imag = float64_set_sign(0, (rot & 2) != 0); | ||
130 | - neg_real = float64_set_sign(0, rot == 1 || rot == 2); | ||
131 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
132 | + negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63; | ||
133 | + negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63; | ||
134 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
135 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
136 | |||
137 | do { | ||
138 | uint64_t pg = g[(i - 1) >> 6]; | ||
139 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
140 | mi = *(float64 *)(vm + H1_2(j)); | ||
141 | |||
142 | e2 = (flip ? ni : nr); | ||
143 | - e1 = (flip ? mi : mr) ^ neg_real; | ||
144 | + e1 = (flip ? mi : mr) ^ negx_real; | ||
145 | e4 = e2; | ||
146 | - e3 = (flip ? mr : mi) ^ neg_imag; | ||
147 | + e3 = (flip ? mr : mi) ^ negx_imag; | ||
148 | |||
149 | if (likely((pg >> (i & 63)) & 1)) { | ||
150 | d = *(float64 *)(va + H1_2(i)); | ||
151 | - d = float64_muladd(e2, e1, d, 0, status); | ||
152 | + d = float64_muladd(e2, e1, d, negf_real, status); | ||
153 | *(float64 *)(vd + H1_2(i)) = d; | ||
154 | } | ||
155 | if (likely((pg >> (j & 63)) & 1)) { | ||
156 | d = *(float64 *)(va + H1_2(j)); | ||
157 | - d = float64_muladd(e4, e3, d, 0, status); | ||
158 | + d = float64_muladd(e4, e3, d, negf_imag, status); | ||
159 | *(float64 *)(vd + H1_2(j)) = d; | ||
160 | } | ||
161 | } while (i & 63); | ||
162 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
163 | index XXXXXXX..XXXXXXX 100644 | ||
164 | --- a/target/arm/tcg/translate-sve.c | ||
165 | +++ b/target/arm/tcg/translate-sve.c | ||
166 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { | ||
167 | gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, | ||
168 | }; | ||
169 | TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], | ||
170 | - a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, | ||
171 | + a->rd, a->rn, a->rm, a->ra, a->pg, a->rot | (s->fpcr_ah << 2), | ||
172 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
173 | |||
174 | static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { | ||
175 | -- | ||
176 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Handle FPCR.AH's requirement to not negate the sign of a NaN | ||
4 | in FMLSL by element and vector, using the usual trick of | ||
5 | negating by XOR when AH=0 and by muladd flags when AH=1. | ||
6 | |||
7 | Since we have the CPUARMState* in the helper anyway, we can | ||
8 | look directly at env->vfp.fpcr and don't need toa pass in the | ||
9 | FPCR.AH value via the SIMD data word. | ||
10 | |||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Message-id: 20250129013857.135256-31-richard.henderson@linaro.org | ||
13 | [PMM: commit message tweaked] | ||
14 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
16 | --- | ||
17 | target/arm/tcg/vec_helper.c | 71 ++++++++++++++++++++++++------------- | ||
18 | 1 file changed, 46 insertions(+), 25 deletions(-) | ||
19 | |||
20 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/tcg/vec_helper.c | ||
23 | +++ b/target/arm/tcg/vec_helper.c | ||
24 | @@ -XXX,XX +XXX,XX @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2) | ||
25 | */ | ||
26 | |||
27 | static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, | ||
28 | - uint32_t desc, bool fz16) | ||
29 | + uint64_t negx, int negf, uint32_t desc, bool fz16) | ||
30 | { | ||
31 | intptr_t i, oprsz = simd_oprsz(desc); | ||
32 | - int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
33 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
34 | int is_q = oprsz == 16; | ||
35 | uint64_t n_4, m_4; | ||
36 | |||
37 | - /* Pre-load all of the f16 data, avoiding overlap issues. */ | ||
38 | - n_4 = load4_f16(vn, is_q, is_2); | ||
39 | + /* | ||
40 | + * Pre-load all of the f16 data, avoiding overlap issues. | ||
41 | + * Negate all inputs for AH=0 FMLSL at once. | ||
42 | + */ | ||
43 | + n_4 = load4_f16(vn, is_q, is_2) ^ negx; | ||
44 | m_4 = load4_f16(vm, is_q, is_2); | ||
45 | |||
46 | - /* Negate all inputs for FMLSL at once. */ | ||
47 | - if (is_s) { | ||
48 | - n_4 ^= 0x8000800080008000ull; | ||
49 | - } | ||
50 | - | ||
51 | for (i = 0; i < oprsz / 4; i++) { | ||
52 | float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); | ||
53 | float32 m_1 = float16_to_float32_by_bits(m_4 >> (i * 16), fz16); | ||
54 | - d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst); | ||
55 | + d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst); | ||
56 | } | ||
57 | clear_tail(d, oprsz, simd_maxsz(desc)); | ||
58 | } | ||
59 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, | ||
60 | void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
61 | CPUARMState *env, uint32_t desc) | ||
62 | { | ||
63 | - do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, desc, | ||
64 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
65 | + uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
66 | + | ||
67 | + do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, | ||
68 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
69 | } | ||
70 | |||
71 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
72 | CPUARMState *env, uint32_t desc) | ||
73 | { | ||
74 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, desc, | ||
75 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
76 | + uint64_t negx = 0; | ||
77 | + int negf = 0; | ||
78 | + | ||
79 | + if (is_s) { | ||
80 | + if (env->vfp.fpcr & FPCR_AH) { | ||
81 | + negf = float_muladd_negate_product; | ||
82 | + } else { | ||
83 | + negx = 0x8000800080008000ull; | ||
84 | + } | ||
85 | + } | ||
86 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
87 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); | ||
88 | } | ||
89 | |||
90 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
91 | } | ||
92 | |||
93 | static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, | ||
94 | - uint32_t desc, bool fz16) | ||
95 | + uint64_t negx, int negf, uint32_t desc, bool fz16) | ||
96 | { | ||
97 | intptr_t i, oprsz = simd_oprsz(desc); | ||
98 | - int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
99 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
100 | int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3); | ||
101 | int is_q = oprsz == 16; | ||
102 | uint64_t n_4; | ||
103 | float32 m_1; | ||
104 | |||
105 | - /* Pre-load all of the f16 data, avoiding overlap issues. */ | ||
106 | - n_4 = load4_f16(vn, is_q, is_2); | ||
107 | - | ||
108 | - /* Negate all inputs for FMLSL at once. */ | ||
109 | - if (is_s) { | ||
110 | - n_4 ^= 0x8000800080008000ull; | ||
111 | - } | ||
112 | - | ||
113 | + /* | ||
114 | + * Pre-load all of the f16 data, avoiding overlap issues. | ||
115 | + * Negate all inputs for AH=0 FMLSL at once. | ||
116 | + */ | ||
117 | + n_4 = load4_f16(vn, is_q, is_2) ^ negx; | ||
118 | m_1 = float16_to_float32_by_bits(((float16 *)vm)[H2(index)], fz16); | ||
119 | |||
120 | for (i = 0; i < oprsz / 4; i++) { | ||
121 | float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); | ||
122 | - d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst); | ||
123 | + d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst); | ||
124 | } | ||
125 | clear_tail(d, oprsz, simd_maxsz(desc)); | ||
126 | } | ||
127 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, | ||
128 | void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | ||
129 | CPUARMState *env, uint32_t desc) | ||
130 | { | ||
131 | - do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, desc, | ||
132 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
133 | + uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
134 | + | ||
135 | + do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, | ||
136 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
137 | } | ||
138 | |||
139 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
140 | CPUARMState *env, uint32_t desc) | ||
141 | { | ||
142 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, desc, | ||
143 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
144 | + uint64_t negx = 0; | ||
145 | + int negf = 0; | ||
146 | + | ||
147 | + if (is_s) { | ||
148 | + if (env->vfp.fpcr & FPCR_AH) { | ||
149 | + negf = float_muladd_negate_product; | ||
150 | + } else { | ||
151 | + negx = 0x8000800080008000ull; | ||
152 | + } | ||
153 | + } | ||
154 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
155 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); | ||
156 | } | ||
157 | |||
158 | -- | ||
159 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Handle FPCR.AH's requirement to not negate the sign of a NaN in SVE | ||
4 | FMLSL (indexed), using the usual trick of negating by XOR when AH=0 | ||
5 | and by muladd flags when AH=1. | ||
6 | |||
7 | Since we have the CPUARMState* in the helper anyway, we can | ||
8 | look directly at env->vfp.fpcr and don't need toa pass in the | ||
9 | FPCR.AH value via the SIMD data word. | ||
10 | |||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Message-id: 20250129013857.135256-32-richard.henderson@linaro.org | ||
13 | [PMM: commit message tweaked] | ||
14 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
16 | --- | ||
17 | target/arm/tcg/vec_helper.c | 15 ++++++++++++--- | ||
18 | 1 file changed, 12 insertions(+), 3 deletions(-) | ||
19 | |||
20 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/tcg/vec_helper.c | ||
23 | +++ b/target/arm/tcg/vec_helper.c | ||
24 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
25 | CPUARMState *env, uint32_t desc) | ||
26 | { | ||
27 | intptr_t i, j, oprsz = simd_oprsz(desc); | ||
28 | - uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; | ||
29 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
30 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
31 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | ||
32 | float_status *status = &env->vfp.fp_status_a64; | ||
33 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); | ||
34 | + int negx = 0, negf = 0; | ||
35 | + | ||
36 | + if (is_s) { | ||
37 | + if (env->vfp.fpcr & FPCR_AH) { | ||
38 | + negf = float_muladd_negate_product; | ||
39 | + } else { | ||
40 | + negx = 0x8000; | ||
41 | + } | ||
42 | + } | ||
43 | |||
44 | for (i = 0; i < oprsz; i += 16) { | ||
45 | float16 mm_16 = *(float16 *)(vm + i + idx); | ||
46 | float32 mm = float16_to_float32_by_bits(mm_16, fz16); | ||
47 | |||
48 | for (j = 0; j < 16; j += sizeof(float32)) { | ||
49 | - float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negn; | ||
50 | + float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negx; | ||
51 | float32 nn = float16_to_float32_by_bits(nn_16, fz16); | ||
52 | float32 aa = *(float32 *)(va + H1_4(i + j)); | ||
53 | |||
54 | *(float32 *)(vd + H1_4(i + j)) = | ||
55 | - float32_muladd(nn, mm, aa, 0, status); | ||
56 | + float32_muladd(nn, mm, aa, negf, status); | ||
57 | } | ||
58 | } | ||
59 | } | ||
60 | -- | ||
61 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Handle FPCR.AH's requirement to not negate the sign of a NaN in SVE | ||
4 | FMLSL (indexed), using the usual trick of negating by XOR when AH=0 | ||
5 | and by muladd flags when AH=1. | ||
6 | |||
7 | Since we have the CPUARMState* in the helper anyway, we can | ||
8 | look directly at env->vfp.fpcr and don't need toa pass in the | ||
9 | FPCR.AH value via the SIMD data word. | ||
10 | |||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Message-id: 20250129013857.135256-33-richard.henderson@linaro.org | ||
13 | [PMM: tweaked commit message] | ||
14 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
16 | --- | ||
17 | target/arm/tcg/vec_helper.c | 15 ++++++++++++--- | ||
18 | 1 file changed, 12 insertions(+), 3 deletions(-) | ||
19 | |||
20 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/tcg/vec_helper.c | ||
23 | +++ b/target/arm/tcg/vec_helper.c | ||
24 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
25 | CPUARMState *env, uint32_t desc) | ||
26 | { | ||
27 | intptr_t i, oprsz = simd_oprsz(desc); | ||
28 | - uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; | ||
29 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
30 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
31 | float_status *status = &env->vfp.fp_status_a64; | ||
32 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); | ||
33 | + int negx = 0, negf = 0; | ||
34 | + | ||
35 | + if (is_s) { | ||
36 | + if (env->vfp.fpcr & FPCR_AH) { | ||
37 | + negf = float_muladd_negate_product; | ||
38 | + } else { | ||
39 | + negx = 0x8000; | ||
40 | + } | ||
41 | + } | ||
42 | |||
43 | for (i = 0; i < oprsz; i += sizeof(float32)) { | ||
44 | - float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negn; | ||
45 | + float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negx; | ||
46 | float16 mm_16 = *(float16 *)(vm + H1_2(i + sel)); | ||
47 | float32 nn = float16_to_float32_by_bits(nn_16, fz16); | ||
48 | float32 mm = float16_to_float32_by_bits(mm_16, fz16); | ||
49 | float32 aa = *(float32 *)(va + H1_4(i)); | ||
50 | |||
51 | - *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, 0, status); | ||
52 | + *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, negf, status); | ||
53 | } | ||
54 | } | ||
55 | |||
56 | -- | ||
57 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Now that we have completed the handling for FPCR.{AH,FIZ,NEP}, we | ||
2 | can enable FEAT_AFP for '-cpu max', and document that we support it. | ||
3 | 1 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | docs/system/arm/emulation.rst | 1 + | ||
8 | target/arm/tcg/cpu64.c | 1 + | ||
9 | 2 files changed, 2 insertions(+) | ||
10 | |||
11 | diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/docs/system/arm/emulation.rst | ||
14 | +++ b/docs/system/arm/emulation.rst | ||
15 | @@ -XXX,XX +XXX,XX @@ the following architecture extensions: | ||
16 | - FEAT_AA64EL3 (Support for AArch64 at EL3) | ||
17 | - FEAT_AdvSIMD (Advanced SIMD Extension) | ||
18 | - FEAT_AES (AESD and AESE instructions) | ||
19 | +- FEAT_AFP (Alternate floating-point behavior) | ||
20 | - FEAT_Armv9_Crypto (Armv9 Cryptographic Extension) | ||
21 | - FEAT_ASID16 (16 bit ASID) | ||
22 | - FEAT_BBM at level 2 (Translation table break-before-make levels) | ||
23 | diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/target/arm/tcg/cpu64.c | ||
26 | +++ b/target/arm/tcg/cpu64.c | ||
27 | @@ -XXX,XX +XXX,XX @@ void aarch64_max_tcg_initfn(Object *obj) | ||
28 | t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1); /* FEAT_XNX */ | ||
29 | t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 2); /* FEAT_ETS2 */ | ||
30 | t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1); /* FEAT_HCX */ | ||
31 | + t = FIELD_DP64(t, ID_AA64MMFR1, AFP, 1); /* FEAT_AFP */ | ||
32 | t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1); /* FEAT_TIDCP1 */ | ||
33 | t = FIELD_DP64(t, ID_AA64MMFR1, CMOW, 1); /* FEAT_CMOW */ | ||
34 | cpu->isar.id_aa64mmfr1 = t; | ||
35 | -- | ||
36 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | FEAT_RPRES implements an "increased precision" variant of the single | ||
2 | precision FRECPE and FRSQRTE instructions from an 8 bit to a 12 | ||
3 | bit mantissa. This applies only when FPCR.AH == 1. Note that the | ||
4 | halfprec and double versions of these insns retain the 8 bit | ||
5 | precision regardless. | ||
6 | 1 | ||
7 | In this commit we add all the plumbing to make these instructions | ||
8 | call a new helper function when the increased-precision is in | ||
9 | effect. In the following commit we will provide the actual change | ||
10 | in behaviour in the helpers. | ||
11 | |||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | --- | ||
15 | target/arm/cpu-features.h | 5 +++++ | ||
16 | target/arm/helper.h | 4 ++++ | ||
17 | target/arm/tcg/translate-a64.c | 34 ++++++++++++++++++++++++++++++---- | ||
18 | target/arm/tcg/translate-sve.c | 16 ++++++++++++++-- | ||
19 | target/arm/tcg/vec_helper.c | 2 ++ | ||
20 | target/arm/vfp_helper.c | 32 ++++++++++++++++++++++++++++++-- | ||
21 | 6 files changed, 85 insertions(+), 8 deletions(-) | ||
22 | |||
23 | diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/target/arm/cpu-features.h | ||
26 | +++ b/target/arm/cpu-features.h | ||
27 | @@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_mops(const ARMISARegisters *id) | ||
28 | return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, MOPS); | ||
29 | } | ||
30 | |||
31 | +static inline bool isar_feature_aa64_rpres(const ARMISARegisters *id) | ||
32 | +{ | ||
33 | + return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, RPRES); | ||
34 | +} | ||
35 | + | ||
36 | static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id) | ||
37 | { | ||
38 | /* We always set the AdvSIMD and FP fields identically. */ | ||
39 | diff --git a/target/arm/helper.h b/target/arm/helper.h | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/target/arm/helper.h | ||
42 | +++ b/target/arm/helper.h | ||
43 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, fpst) | ||
44 | |||
45 | DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, fpst) | ||
46 | DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, fpst) | ||
47 | +DEF_HELPER_FLAGS_2(recpe_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst) | ||
48 | DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, fpst) | ||
49 | DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, fpst) | ||
50 | DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, fpst) | ||
51 | +DEF_HELPER_FLAGS_2(rsqrte_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst) | ||
52 | DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, fpst) | ||
53 | DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32) | ||
54 | DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32) | ||
55 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_vrintx_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
56 | |||
57 | DEF_HELPER_FLAGS_4(gvec_frecpe_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
58 | DEF_HELPER_FLAGS_4(gvec_frecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
59 | +DEF_HELPER_FLAGS_4(gvec_frecpe_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
60 | DEF_HELPER_FLAGS_4(gvec_frecpe_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
61 | |||
62 | DEF_HELPER_FLAGS_4(gvec_frsqrte_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
63 | DEF_HELPER_FLAGS_4(gvec_frsqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
64 | +DEF_HELPER_FLAGS_4(gvec_frsqrte_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
65 | DEF_HELPER_FLAGS_4(gvec_frsqrte_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
66 | |||
67 | DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
68 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/target/arm/tcg/translate-a64.c | ||
71 | +++ b/target/arm/tcg/translate-a64.c | ||
72 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frecpe = { | ||
73 | gen_helper_recpe_f32, | ||
74 | gen_helper_recpe_f64, | ||
75 | }; | ||
76 | -TRANS(FRECPE_s, do_fp1_scalar_ah, a, &f_scalar_frecpe, -1) | ||
77 | +static const FPScalar1 f_scalar_frecpe_rpres = { | ||
78 | + gen_helper_recpe_f16, | ||
79 | + gen_helper_recpe_rpres_f32, | ||
80 | + gen_helper_recpe_f64, | ||
81 | +}; | ||
82 | +TRANS(FRECPE_s, do_fp1_scalar_ah, a, | ||
83 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? | ||
84 | + &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1) | ||
85 | |||
86 | static const FPScalar1 f_scalar_frecpx = { | ||
87 | gen_helper_frecpx_f16, | ||
88 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frsqrte = { | ||
89 | gen_helper_rsqrte_f32, | ||
90 | gen_helper_rsqrte_f64, | ||
91 | }; | ||
92 | -TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, &f_scalar_frsqrte, -1) | ||
93 | +static const FPScalar1 f_scalar_frsqrte_rpres = { | ||
94 | + gen_helper_rsqrte_f16, | ||
95 | + gen_helper_rsqrte_rpres_f32, | ||
96 | + gen_helper_rsqrte_f64, | ||
97 | +}; | ||
98 | +TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, | ||
99 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? | ||
100 | + &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1) | ||
101 | |||
102 | static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) | ||
103 | { | ||
104 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const f_frecpe[] = { | ||
105 | gen_helper_gvec_frecpe_s, | ||
106 | gen_helper_gvec_frecpe_d, | ||
107 | }; | ||
108 | -TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) | ||
109 | +static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = { | ||
110 | + gen_helper_gvec_frecpe_h, | ||
111 | + gen_helper_gvec_frecpe_rpres_s, | ||
112 | + gen_helper_gvec_frecpe_d, | ||
113 | +}; | ||
114 | +TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, | ||
115 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe) | ||
116 | |||
117 | static gen_helper_gvec_2_ptr * const f_frsqrte[] = { | ||
118 | gen_helper_gvec_frsqrte_h, | ||
119 | gen_helper_gvec_frsqrte_s, | ||
120 | gen_helper_gvec_frsqrte_d, | ||
121 | }; | ||
122 | -TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) | ||
123 | +static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = { | ||
124 | + gen_helper_gvec_frsqrte_h, | ||
125 | + gen_helper_gvec_frsqrte_rpres_s, | ||
126 | + gen_helper_gvec_frsqrte_d, | ||
127 | +}; | ||
128 | +TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, | ||
129 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte) | ||
130 | |||
131 | static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) | ||
132 | { | ||
133 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
134 | index XXXXXXX..XXXXXXX 100644 | ||
135 | --- a/target/arm/tcg/translate-sve.c | ||
136 | +++ b/target/arm/tcg/translate-sve.c | ||
137 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const frecpe_fns[] = { | ||
138 | NULL, gen_helper_gvec_frecpe_h, | ||
139 | gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, | ||
140 | }; | ||
141 | -TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frecpe_fns[a->esz], a, 0) | ||
142 | +static gen_helper_gvec_2_ptr * const frecpe_rpres_fns[] = { | ||
143 | + NULL, gen_helper_gvec_frecpe_h, | ||
144 | + gen_helper_gvec_frecpe_rpres_s, gen_helper_gvec_frecpe_d, | ||
145 | +}; | ||
146 | +TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, | ||
147 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? | ||
148 | + frecpe_rpres_fns[a->esz] : frecpe_fns[a->esz], a, 0) | ||
149 | |||
150 | static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { | ||
151 | NULL, gen_helper_gvec_frsqrte_h, | ||
152 | gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, | ||
153 | }; | ||
154 | -TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frsqrte_fns[a->esz], a, 0) | ||
155 | +static gen_helper_gvec_2_ptr * const frsqrte_rpres_fns[] = { | ||
156 | + NULL, gen_helper_gvec_frsqrte_h, | ||
157 | + gen_helper_gvec_frsqrte_rpres_s, gen_helper_gvec_frsqrte_d, | ||
158 | +}; | ||
159 | +TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, | ||
160 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? | ||
161 | + frsqrte_rpres_fns[a->esz] : frsqrte_fns[a->esz], a, 0) | ||
162 | |||
163 | /* | ||
164 | *** SVE Floating Point Compare with Zero Group | ||
165 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
166 | index XXXXXXX..XXXXXXX 100644 | ||
167 | --- a/target/arm/tcg/vec_helper.c | ||
168 | +++ b/target/arm/tcg/vec_helper.c | ||
169 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, float_status *stat, uint32_t desc) \ | ||
170 | |||
171 | DO_2OP(gvec_frecpe_h, helper_recpe_f16, float16) | ||
172 | DO_2OP(gvec_frecpe_s, helper_recpe_f32, float32) | ||
173 | +DO_2OP(gvec_frecpe_rpres_s, helper_recpe_rpres_f32, float32) | ||
174 | DO_2OP(gvec_frecpe_d, helper_recpe_f64, float64) | ||
175 | |||
176 | DO_2OP(gvec_frsqrte_h, helper_rsqrte_f16, float16) | ||
177 | DO_2OP(gvec_frsqrte_s, helper_rsqrte_f32, float32) | ||
178 | +DO_2OP(gvec_frsqrte_rpres_s, helper_rsqrte_rpres_f32, float32) | ||
179 | DO_2OP(gvec_frsqrte_d, helper_rsqrte_f64, float64) | ||
180 | |||
181 | DO_2OP(gvec_vrintx_h, float16_round_to_int, float16) | ||
182 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
183 | index XXXXXXX..XXXXXXX 100644 | ||
184 | --- a/target/arm/vfp_helper.c | ||
185 | +++ b/target/arm/vfp_helper.c | ||
186 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst) | ||
187 | return make_float16(f16_val); | ||
188 | } | ||
189 | |||
190 | -float32 HELPER(recpe_f32)(float32 input, float_status *fpst) | ||
191 | +/* | ||
192 | + * FEAT_RPRES means the f32 FRECPE has an "increased precision" variant | ||
193 | + * which is used when FPCR.AH == 1. | ||
194 | + */ | ||
195 | +static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres) | ||
196 | { | ||
197 | float32 f32 = float32_squash_input_denormal(input, fpst); | ||
198 | uint32_t f32_val = float32_val(f32); | ||
199 | @@ -XXX,XX +XXX,XX @@ float32 HELPER(recpe_f32)(float32 input, float_status *fpst) | ||
200 | return make_float32(f32_val); | ||
201 | } | ||
202 | |||
203 | +float32 HELPER(recpe_f32)(float32 input, float_status *fpst) | ||
204 | +{ | ||
205 | + return do_recpe_f32(input, fpst, false); | ||
206 | +} | ||
207 | + | ||
208 | +float32 HELPER(recpe_rpres_f32)(float32 input, float_status *fpst) | ||
209 | +{ | ||
210 | + return do_recpe_f32(input, fpst, true); | ||
211 | +} | ||
212 | + | ||
213 | float64 HELPER(recpe_f64)(float64 input, float_status *fpst) | ||
214 | { | ||
215 | float64 f64 = float64_squash_input_denormal(input, fpst); | ||
216 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s) | ||
217 | return make_float16(val); | ||
218 | } | ||
219 | |||
220 | -float32 HELPER(rsqrte_f32)(float32 input, float_status *s) | ||
221 | +/* | ||
222 | + * FEAT_RPRES means the f32 FRSQRTE has an "increased precision" variant | ||
223 | + * which is used when FPCR.AH == 1. | ||
224 | + */ | ||
225 | +static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres) | ||
226 | { | ||
227 | float32 f32 = float32_squash_input_denormal(input, s); | ||
228 | uint32_t val = float32_val(f32); | ||
229 | @@ -XXX,XX +XXX,XX @@ float32 HELPER(rsqrte_f32)(float32 input, float_status *s) | ||
230 | return make_float32(val); | ||
231 | } | ||
232 | |||
233 | +float32 HELPER(rsqrte_f32)(float32 input, float_status *s) | ||
234 | +{ | ||
235 | + return do_rsqrte_f32(input, s, false); | ||
236 | +} | ||
237 | + | ||
238 | +float32 HELPER(rsqrte_rpres_f32)(float32 input, float_status *s) | ||
239 | +{ | ||
240 | + return do_rsqrte_f32(input, s, true); | ||
241 | +} | ||
242 | + | ||
243 | float64 HELPER(rsqrte_f64)(float64 input, float_status *s) | ||
244 | { | ||
245 | float64 f64 = float64_squash_input_denormal(input, s); | ||
246 | -- | ||
247 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Implement the increased precision variation of FRECPE. In the | ||
2 | pseudocode this corresponds to the handling of the | ||
3 | "increasedprecision" boolean in the FPRecipEstimate() and | ||
4 | RecipEstimate() functions. | ||
5 | 1 | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | --- | ||
9 | target/arm/vfp_helper.c | 54 +++++++++++++++++++++++++++++++++++------ | ||
10 | 1 file changed, 46 insertions(+), 8 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/vfp_helper.c | ||
15 | +++ b/target/arm/vfp_helper.c | ||
16 | @@ -XXX,XX +XXX,XX @@ static int recip_estimate(int input) | ||
17 | return r; | ||
18 | } | ||
19 | |||
20 | +/* | ||
21 | + * Increased precision version: | ||
22 | + * input is a 13 bit fixed point number | ||
23 | + * input range 2048 .. 4095 for a number from 0.5 <= x < 1.0. | ||
24 | + * result range 4096 .. 8191 for a number from 1.0 to 2.0 | ||
25 | + */ | ||
26 | +static int recip_estimate_incprec(int input) | ||
27 | +{ | ||
28 | + int a, b, r; | ||
29 | + assert(2048 <= input && input < 4096); | ||
30 | + a = (input * 2) + 1; | ||
31 | + /* | ||
32 | + * The pseudocode expresses this as an operation on infinite | ||
33 | + * precision reals where it calculates 2^25 / a and then looks | ||
34 | + * at the error between that and the rounded-down-to-integer | ||
35 | + * value to see if it should instead round up. We instead | ||
36 | + * follow the same approach as the pseudocode for the 8-bit | ||
37 | + * precision version, and calculate (2 * (2^25 / a)) as an | ||
38 | + * integer so we can do the "add one and halve" to round it. | ||
39 | + * So the 1 << 26 here is correct. | ||
40 | + */ | ||
41 | + b = (1 << 26) / a; | ||
42 | + r = (b + 1) >> 1; | ||
43 | + assert(4096 <= r && r < 8192); | ||
44 | + return r; | ||
45 | +} | ||
46 | + | ||
47 | /* | ||
48 | * Common wrapper to call recip_estimate | ||
49 | * | ||
50 | @@ -XXX,XX +XXX,XX @@ static int recip_estimate(int input) | ||
51 | * callee. | ||
52 | */ | ||
53 | |||
54 | -static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac) | ||
55 | +static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac, | ||
56 | + bool increasedprecision) | ||
57 | { | ||
58 | uint32_t scaled, estimate; | ||
59 | uint64_t result_frac; | ||
60 | @@ -XXX,XX +XXX,XX @@ static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac) | ||
61 | } | ||
62 | } | ||
63 | |||
64 | - /* scaled = UInt('1':fraction<51:44>) */ | ||
65 | - scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); | ||
66 | - estimate = recip_estimate(scaled); | ||
67 | + if (increasedprecision) { | ||
68 | + /* scaled = UInt('1':fraction<51:41>) */ | ||
69 | + scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11)); | ||
70 | + estimate = recip_estimate_incprec(scaled); | ||
71 | + } else { | ||
72 | + /* scaled = UInt('1':fraction<51:44>) */ | ||
73 | + scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); | ||
74 | + estimate = recip_estimate(scaled); | ||
75 | + } | ||
76 | |||
77 | result_exp = exp_off - *exp; | ||
78 | - result_frac = deposit64(0, 44, 8, estimate); | ||
79 | + if (increasedprecision) { | ||
80 | + result_frac = deposit64(0, 40, 12, estimate); | ||
81 | + } else { | ||
82 | + result_frac = deposit64(0, 44, 8, estimate); | ||
83 | + } | ||
84 | if (result_exp == 0) { | ||
85 | result_frac = deposit64(result_frac >> 1, 51, 1, 1); | ||
86 | } else if (result_exp == -1) { | ||
87 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst) | ||
88 | } | ||
89 | |||
90 | f64_frac = call_recip_estimate(&f16_exp, 29, | ||
91 | - ((uint64_t) f16_frac) << (52 - 10)); | ||
92 | + ((uint64_t) f16_frac) << (52 - 10), false); | ||
93 | |||
94 | /* result = sign : result_exp<4:0> : fraction<51:42> */ | ||
95 | f16_val = deposit32(0, 15, 1, f16_sign); | ||
96 | @@ -XXX,XX +XXX,XX @@ static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres) | ||
97 | } | ||
98 | |||
99 | f64_frac = call_recip_estimate(&f32_exp, 253, | ||
100 | - ((uint64_t) f32_frac) << (52 - 23)); | ||
101 | + ((uint64_t) f32_frac) << (52 - 23), rpres); | ||
102 | |||
103 | /* result = sign : result_exp<7:0> : fraction<51:29> */ | ||
104 | f32_val = deposit32(0, 31, 1, f32_sign); | ||
105 | @@ -XXX,XX +XXX,XX @@ float64 HELPER(recpe_f64)(float64 input, float_status *fpst) | ||
106 | return float64_set_sign(float64_zero, float64_is_neg(f64)); | ||
107 | } | ||
108 | |||
109 | - f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac); | ||
110 | + f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac, false); | ||
111 | |||
112 | /* result = sign : result_exp<10:0> : fraction<51:0>; */ | ||
113 | f64_val = deposit64(0, 63, 1, f64_sign); | ||
114 | -- | ||
115 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Implement the increased precision variation of FRSQRTE. In the | ||
2 | pseudocode this corresponds to the handling of the | ||
3 | "increasedprecision" boolean in the FPRSqrtEstimate() and | ||
4 | RecipSqrtEstimate() functions. | ||
5 | 1 | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | --- | ||
9 | target/arm/vfp_helper.c | 77 ++++++++++++++++++++++++++++++++++------- | ||
10 | 1 file changed, 64 insertions(+), 13 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/vfp_helper.c | ||
15 | +++ b/target/arm/vfp_helper.c | ||
16 | @@ -XXX,XX +XXX,XX @@ static int do_recip_sqrt_estimate(int a) | ||
17 | return estimate; | ||
18 | } | ||
19 | |||
20 | +static int do_recip_sqrt_estimate_incprec(int a) | ||
21 | +{ | ||
22 | + /* | ||
23 | + * The Arm ARM describes the 12-bit precision version of RecipSqrtEstimate | ||
24 | + * in terms of an infinite-precision floating point calculation of a | ||
25 | + * square root. We implement this using the same kind of pure integer | ||
26 | + * algorithm as the 8-bit mantissa, to get the same bit-for-bit result. | ||
27 | + */ | ||
28 | + int64_t b, estimate; | ||
29 | |||
30 | -static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac) | ||
31 | + assert(1024 <= a && a < 4096); | ||
32 | + if (a < 2048) { | ||
33 | + a = a * 2 + 1; | ||
34 | + } else { | ||
35 | + a = (a >> 1) << 1; | ||
36 | + a = (a + 1) * 2; | ||
37 | + } | ||
38 | + b = 8192; | ||
39 | + while (a * (b + 1) * (b + 1) < (1ULL << 39)) { | ||
40 | + b += 1; | ||
41 | + } | ||
42 | + estimate = (b + 1) / 2; | ||
43 | + | ||
44 | + assert(4096 <= estimate && estimate < 8192); | ||
45 | + | ||
46 | + return estimate; | ||
47 | +} | ||
48 | + | ||
49 | +static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac, | ||
50 | + bool increasedprecision) | ||
51 | { | ||
52 | int estimate; | ||
53 | uint32_t scaled; | ||
54 | @@ -XXX,XX +XXX,XX @@ static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac) | ||
55 | frac = extract64(frac, 0, 51) << 1; | ||
56 | } | ||
57 | |||
58 | - if (*exp & 1) { | ||
59 | - /* scaled = UInt('01':fraction<51:45>) */ | ||
60 | - scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7)); | ||
61 | + if (increasedprecision) { | ||
62 | + if (*exp & 1) { | ||
63 | + /* scaled = UInt('01':fraction<51:42>) */ | ||
64 | + scaled = deposit32(1 << 10, 0, 10, extract64(frac, 42, 10)); | ||
65 | + } else { | ||
66 | + /* scaled = UInt('1':fraction<51:41>) */ | ||
67 | + scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11)); | ||
68 | + } | ||
69 | + estimate = do_recip_sqrt_estimate_incprec(scaled); | ||
70 | } else { | ||
71 | - /* scaled = UInt('1':fraction<51:44>) */ | ||
72 | - scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); | ||
73 | + if (*exp & 1) { | ||
74 | + /* scaled = UInt('01':fraction<51:45>) */ | ||
75 | + scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7)); | ||
76 | + } else { | ||
77 | + /* scaled = UInt('1':fraction<51:44>) */ | ||
78 | + scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); | ||
79 | + } | ||
80 | + estimate = do_recip_sqrt_estimate(scaled); | ||
81 | } | ||
82 | - estimate = do_recip_sqrt_estimate(scaled); | ||
83 | |||
84 | *exp = (exp_off - *exp) / 2; | ||
85 | - return extract64(estimate, 0, 8) << 44; | ||
86 | + if (increasedprecision) { | ||
87 | + return extract64(estimate, 0, 12) << 40; | ||
88 | + } else { | ||
89 | + return extract64(estimate, 0, 8) << 44; | ||
90 | + } | ||
91 | } | ||
92 | |||
93 | uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s) | ||
94 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s) | ||
95 | |||
96 | f64_frac = ((uint64_t) f16_frac) << (52 - 10); | ||
97 | |||
98 | - f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac); | ||
99 | + f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac, false); | ||
100 | |||
101 | /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */ | ||
102 | val = deposit32(0, 15, 1, f16_sign); | ||
103 | @@ -XXX,XX +XXX,XX @@ static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres) | ||
104 | |||
105 | f64_frac = ((uint64_t) f32_frac) << 29; | ||
106 | |||
107 | - f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac); | ||
108 | + f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac, rpres); | ||
109 | |||
110 | - /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */ | ||
111 | + /* | ||
112 | + * result = sign : result_exp<7:0> : estimate<7:0> : Zeros(15) | ||
113 | + * or for increased precision | ||
114 | + * result = sign : result_exp<7:0> : estimate<11:0> : Zeros(11) | ||
115 | + */ | ||
116 | val = deposit32(0, 31, 1, f32_sign); | ||
117 | val = deposit32(val, 23, 8, f32_exp); | ||
118 | - val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8)); | ||
119 | + if (rpres) { | ||
120 | + val = deposit32(val, 11, 12, extract64(f64_frac, 52 - 12, 12)); | ||
121 | + } else { | ||
122 | + val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8)); | ||
123 | + } | ||
124 | return make_float32(val); | ||
125 | } | ||
126 | |||
127 | @@ -XXX,XX +XXX,XX @@ float64 HELPER(rsqrte_f64)(float64 input, float_status *s) | ||
128 | return float64_zero; | ||
129 | } | ||
130 | |||
131 | - f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac); | ||
132 | + f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac, false); | ||
133 | |||
134 | /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */ | ||
135 | val = deposit64(0, 61, 1, f64_sign); | ||
136 | -- | ||
137 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Now the emulation is complete, we can enable FEAT_RPRES for the 'max' | ||
2 | CPU type. | ||
3 | 1 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | docs/system/arm/emulation.rst | 1 + | ||
8 | target/arm/tcg/cpu64.c | 1 + | ||
9 | 2 files changed, 2 insertions(+) | ||
10 | |||
11 | diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/docs/system/arm/emulation.rst | ||
14 | +++ b/docs/system/arm/emulation.rst | ||
15 | @@ -XXX,XX +XXX,XX @@ the following architecture extensions: | ||
16 | - FEAT_RDM (Advanced SIMD rounding double multiply accumulate instructions) | ||
17 | - FEAT_RME (Realm Management Extension) (NB: support status in QEMU is experimental) | ||
18 | - FEAT_RNG (Random number generator) | ||
19 | +- FEAT_RPRES (Increased precision of FRECPE and FRSQRTE) | ||
20 | - FEAT_S2FWB (Stage 2 forced Write-Back) | ||
21 | - FEAT_SB (Speculation Barrier) | ||
22 | - FEAT_SEL2 (Secure EL2) | ||
23 | diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/target/arm/tcg/cpu64.c | ||
26 | +++ b/target/arm/tcg/cpu64.c | ||
27 | @@ -XXX,XX +XXX,XX @@ void aarch64_max_tcg_initfn(Object *obj) | ||
28 | cpu->isar.id_aa64isar1 = t; | ||
29 | |||
30 | t = cpu->isar.id_aa64isar2; | ||
31 | + t = FIELD_DP64(t, ID_AA64ISAR2, RPRES, 1); /* FEAT_RPRES */ | ||
32 | t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1); /* FEAT_MOPS */ | ||
33 | t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1); /* FEAT_HBC */ | ||
34 | t = FIELD_DP64(t, ID_AA64ISAR2, WFXT, 2); /* FEAT_WFxT */ | ||
35 | -- | ||
36 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Move ARMFPStatusFlavour to cpu.h with which to index | ||
4 | this array. For now, place the array in an anonymous | ||
5 | union with the existing structures. Adjust the order | ||
6 | of the existing structures to match the enum. | ||
7 | |||
8 | Simplify fpstatus_ptr() using the new array. | ||
9 | |||
10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
12 | Message-id: 20250129013857.135256-7-richard.henderson@linaro.org | ||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
14 | --- | ||
15 | target/arm/cpu.h | 119 +++++++++++++++++++++---------------- | ||
16 | target/arm/tcg/translate.h | 64 +------------------- | ||
17 | 2 files changed, 70 insertions(+), 113 deletions(-) | ||
18 | |||
19 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/target/arm/cpu.h | ||
22 | +++ b/target/arm/cpu.h | ||
23 | @@ -XXX,XX +XXX,XX @@ typedef struct ARMMMUFaultInfo ARMMMUFaultInfo; | ||
24 | |||
25 | typedef struct NVICState NVICState; | ||
26 | |||
27 | +/* | ||
28 | + * Enum for indexing vfp.fp_status[]. | ||
29 | + * | ||
30 | + * FPST_A32: is the "normal" fp status for AArch32 insns | ||
31 | + * FPST_A64: is the "normal" fp status for AArch64 insns | ||
32 | + * FPST_A32_F16: used for AArch32 half-precision calculations | ||
33 | + * FPST_A64_F16: used for AArch64 half-precision calculations | ||
34 | + * FPST_STD: the ARM "Standard FPSCR Value" | ||
35 | + * FPST_STD_F16: used for half-precision | ||
36 | + * calculations with the ARM "Standard FPSCR Value" | ||
37 | + * FPST_AH: used for the A64 insns which change behaviour | ||
38 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
39 | + * and the reciprocal and square root estimate/step insns) | ||
40 | + * FPST_AH_F16: used for the A64 insns which change behaviour | ||
41 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
42 | + * and the reciprocal and square root estimate/step insns); | ||
43 | + * for half-precision | ||
44 | + * | ||
45 | + * Half-precision operations are governed by a separate | ||
46 | + * flush-to-zero control bit in FPSCR:FZ16. We pass a separate | ||
47 | + * status structure to control this. | ||
48 | + * | ||
49 | + * The "Standard FPSCR", ie default-NaN, flush-to-zero, | ||
50 | + * round-to-nearest and is used by any operations (generally | ||
51 | + * Neon) which the architecture defines as controlled by the | ||
52 | + * standard FPSCR value rather than the FPSCR. | ||
53 | + * | ||
54 | + * The "standard FPSCR but for fp16 ops" is needed because | ||
55 | + * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | ||
56 | + * using a fixed value for it. | ||
57 | + * | ||
58 | + * The ah_fp_status is needed because some insns have different | ||
59 | + * behaviour when FPCR.AH == 1: they don't update cumulative | ||
60 | + * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
61 | + * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
62 | + * which means we need an ah_fp_status_f16 as well. | ||
63 | + * | ||
64 | + * To avoid having to transfer exception bits around, we simply | ||
65 | + * say that the FPSCR cumulative exception flags are the logical | ||
66 | + * OR of the flags in the four fp statuses. This relies on the | ||
67 | + * only thing which needs to read the exception flags being | ||
68 | + * an explicit FPSCR read. | ||
69 | + */ | ||
70 | +typedef enum ARMFPStatusFlavour { | ||
71 | + FPST_A32, | ||
72 | + FPST_A64, | ||
73 | + FPST_A32_F16, | ||
74 | + FPST_A64_F16, | ||
75 | + FPST_AH, | ||
76 | + FPST_AH_F16, | ||
77 | + FPST_STD, | ||
78 | + FPST_STD_F16, | ||
79 | +} ARMFPStatusFlavour; | ||
80 | +#define FPST_COUNT 8 | ||
81 | + | ||
82 | typedef struct CPUArchState { | ||
83 | /* Regs for current mode. */ | ||
84 | uint32_t regs[16]; | ||
85 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
86 | /* Scratch space for aa32 neon expansion. */ | ||
87 | uint32_t scratch[8]; | ||
88 | |||
89 | - /* There are a number of distinct float control structures: | ||
90 | - * | ||
91 | - * fp_status_a32: is the "normal" fp status for AArch32 insns | ||
92 | - * fp_status_a64: is the "normal" fp status for AArch64 insns | ||
93 | - * fp_status_fp16_a32: used for AArch32 half-precision calculations | ||
94 | - * fp_status_fp16_a64: used for AArch64 half-precision calculations | ||
95 | - * standard_fp_status : the ARM "Standard FPSCR Value" | ||
96 | - * standard_fp_status_fp16 : used for half-precision | ||
97 | - * calculations with the ARM "Standard FPSCR Value" | ||
98 | - * ah_fp_status: used for the A64 insns which change behaviour | ||
99 | - * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
100 | - * and the reciprocal and square root estimate/step insns) | ||
101 | - * ah_fp_status_f16: used for the A64 insns which change behaviour | ||
102 | - * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
103 | - * and the reciprocal and square root estimate/step insns); | ||
104 | - * for half-precision | ||
105 | - * | ||
106 | - * Half-precision operations are governed by a separate | ||
107 | - * flush-to-zero control bit in FPSCR:FZ16. We pass a separate | ||
108 | - * status structure to control this. | ||
109 | - * | ||
110 | - * The "Standard FPSCR", ie default-NaN, flush-to-zero, | ||
111 | - * round-to-nearest and is used by any operations (generally | ||
112 | - * Neon) which the architecture defines as controlled by the | ||
113 | - * standard FPSCR value rather than the FPSCR. | ||
114 | - * | ||
115 | - * The "standard FPSCR but for fp16 ops" is needed because | ||
116 | - * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | ||
117 | - * using a fixed value for it. | ||
118 | - * | ||
119 | - * The ah_fp_status is needed because some insns have different | ||
120 | - * behaviour when FPCR.AH == 1: they don't update cumulative | ||
121 | - * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
122 | - * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
123 | - * which means we need an ah_fp_status_f16 as well. | ||
124 | - * | ||
125 | - * To avoid having to transfer exception bits around, we simply | ||
126 | - * say that the FPSCR cumulative exception flags are the logical | ||
127 | - * OR of the flags in the four fp statuses. This relies on the | ||
128 | - * only thing which needs to read the exception flags being | ||
129 | - * an explicit FPSCR read. | ||
130 | - */ | ||
131 | - float_status fp_status_a32; | ||
132 | - float_status fp_status_a64; | ||
133 | - float_status fp_status_f16_a32; | ||
134 | - float_status fp_status_f16_a64; | ||
135 | - float_status standard_fp_status; | ||
136 | - float_status standard_fp_status_f16; | ||
137 | - float_status ah_fp_status; | ||
138 | - float_status ah_fp_status_f16; | ||
139 | + /* There are a number of distinct float control structures. */ | ||
140 | + union { | ||
141 | + float_status fp_status[FPST_COUNT]; | ||
142 | + struct { | ||
143 | + float_status fp_status_a32; | ||
144 | + float_status fp_status_a64; | ||
145 | + float_status fp_status_f16_a32; | ||
146 | + float_status fp_status_f16_a64; | ||
147 | + float_status ah_fp_status; | ||
148 | + float_status ah_fp_status_f16; | ||
149 | + float_status standard_fp_status; | ||
150 | + float_status standard_fp_status_f16; | ||
151 | + }; | ||
152 | + }; | ||
153 | |||
154 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ | ||
155 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ | ||
156 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | ||
157 | index XXXXXXX..XXXXXXX 100644 | ||
158 | --- a/target/arm/tcg/translate.h | ||
159 | +++ b/target/arm/tcg/translate.h | ||
160 | @@ -XXX,XX +XXX,XX @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb) | ||
161 | return (CPUARMTBFlags){ tb->flags, tb->cs_base }; | ||
162 | } | ||
163 | |||
164 | -/* | ||
165 | - * Enum for argument to fpstatus_ptr(). | ||
166 | - */ | ||
167 | -typedef enum ARMFPStatusFlavour { | ||
168 | - FPST_A32, | ||
169 | - FPST_A64, | ||
170 | - FPST_A32_F16, | ||
171 | - FPST_A64_F16, | ||
172 | - FPST_AH, | ||
173 | - FPST_AH_F16, | ||
174 | - FPST_STD, | ||
175 | - FPST_STD_F16, | ||
176 | -} ARMFPStatusFlavour; | ||
177 | - | ||
178 | /** | ||
179 | * fpstatus_ptr: return TCGv_ptr to the specified fp_status field | ||
180 | * | ||
181 | * We have multiple softfloat float_status fields in the Arm CPU state struct | ||
182 | * (see the comment in cpu.h for details). Return a TCGv_ptr which has | ||
183 | * been set up to point to the requested field in the CPU state struct. | ||
184 | - * The options are: | ||
185 | - * | ||
186 | - * FPST_A32 | ||
187 | - * for AArch32 non-FP16 operations controlled by the FPCR | ||
188 | - * FPST_A64 | ||
189 | - * for AArch64 non-FP16 operations controlled by the FPCR | ||
190 | - * FPST_A32_F16 | ||
191 | - * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
192 | - * FPST_A64_F16 | ||
193 | - * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
194 | - * FPST_AH: | ||
195 | - * for AArch64 operations which change behaviour when AH=1 (specifically, | ||
196 | - * bfloat16 conversions and multiplies, and the reciprocal and square root | ||
197 | - * estimate/step insns) | ||
198 | - * FPST_AH_F16: | ||
199 | - * ditto, but for half-precision operations | ||
200 | - * FPST_STD | ||
201 | - * for A32/T32 Neon operations using the "standard FPSCR value" | ||
202 | - * FPST_STD_F16 | ||
203 | - * as FPST_STD, but where FPCR.FZ16 is to be used | ||
204 | */ | ||
205 | static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) | ||
206 | { | ||
207 | TCGv_ptr statusptr = tcg_temp_new_ptr(); | ||
208 | - int offset; | ||
209 | + int offset = offsetof(CPUARMState, vfp.fp_status[flavour]); | ||
210 | |||
211 | - switch (flavour) { | ||
212 | - case FPST_A32: | ||
213 | - offset = offsetof(CPUARMState, vfp.fp_status_a32); | ||
214 | - break; | ||
215 | - case FPST_A64: | ||
216 | - offset = offsetof(CPUARMState, vfp.fp_status_a64); | ||
217 | - break; | ||
218 | - case FPST_A32_F16: | ||
219 | - offset = offsetof(CPUARMState, vfp.fp_status_f16_a32); | ||
220 | - break; | ||
221 | - case FPST_A64_F16: | ||
222 | - offset = offsetof(CPUARMState, vfp.fp_status_f16_a64); | ||
223 | - break; | ||
224 | - case FPST_AH: | ||
225 | - offset = offsetof(CPUARMState, vfp.ah_fp_status); | ||
226 | - break; | ||
227 | - case FPST_AH_F16: | ||
228 | - offset = offsetof(CPUARMState, vfp.ah_fp_status_f16); | ||
229 | - break; | ||
230 | - case FPST_STD: | ||
231 | - offset = offsetof(CPUARMState, vfp.standard_fp_status); | ||
232 | - break; | ||
233 | - case FPST_STD_F16: | ||
234 | - offset = offsetof(CPUARMState, vfp.standard_fp_status_f16); | ||
235 | - break; | ||
236 | - default: | ||
237 | - g_assert_not_reached(); | ||
238 | - } | ||
239 | tcg_gen_addi_ptr(statusptr, tcg_env, offset); | ||
240 | return statusptr; | ||
241 | } | ||
242 | -- | ||
243 | 2.34.1 | ||
244 | |||
245 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Replace with fp_status[FPST_STD_F16]. | ||
4 | |||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Message-id: 20250129013857.135256-8-richard.henderson@linaro.org | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | --- | ||
10 | target/arm/cpu.h | 1 - | ||
11 | target/arm/cpu.c | 4 ++-- | ||
12 | target/arm/tcg/mve_helper.c | 24 ++++++++++++------------ | ||
13 | target/arm/vfp_helper.c | 8 ++++---- | ||
14 | 4 files changed, 18 insertions(+), 19 deletions(-) | ||
15 | |||
16 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/arm/cpu.h | ||
19 | +++ b/target/arm/cpu.h | ||
20 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
21 | float_status ah_fp_status; | ||
22 | float_status ah_fp_status_f16; | ||
23 | float_status standard_fp_status; | ||
24 | - float_status standard_fp_status_f16; | ||
25 | }; | ||
26 | }; | ||
27 | |||
28 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/arm/cpu.c | ||
31 | +++ b/target/arm/cpu.c | ||
32 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
33 | set_flush_to_zero(1, &env->vfp.standard_fp_status); | ||
34 | set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); | ||
35 | set_default_nan_mode(1, &env->vfp.standard_fp_status); | ||
36 | - set_default_nan_mode(1, &env->vfp.standard_fp_status_f16); | ||
37 | + set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); | ||
38 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | ||
39 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
40 | arm_set_default_fp_behaviours(&env->vfp.standard_fp_status); | ||
41 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
42 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
43 | - arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16); | ||
44 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
45 | arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | ||
46 | set_flush_to_zero(1, &env->vfp.ah_fp_status); | ||
47 | set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | ||
48 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/arm/tcg/mve_helper.c | ||
51 | +++ b/target/arm/tcg/mve_helper.c | ||
52 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) | ||
53 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
54 | continue; \ | ||
55 | } \ | ||
56 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
57 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
58 | &env->vfp.standard_fp_status; \ | ||
59 | if (!(mask & 1)) { \ | ||
60 | /* We need the result but without updating flags */ \ | ||
61 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) | ||
62 | r[e] = 0; \ | ||
63 | continue; \ | ||
64 | } \ | ||
65 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
66 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
67 | &env->vfp.standard_fp_status; \ | ||
68 | if (!(tm & 1)) { \ | ||
69 | /* We need the result but without updating flags */ \ | ||
70 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) | ||
71 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
72 | continue; \ | ||
73 | } \ | ||
74 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
75 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
76 | &env->vfp.standard_fp_status; \ | ||
77 | if (!(mask & 1)) { \ | ||
78 | /* We need the result but without updating flags */ \ | ||
79 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) | ||
80 | if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \ | ||
81 | continue; \ | ||
82 | } \ | ||
83 | - fpst0 = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
84 | + fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
85 | &env->vfp.standard_fp_status; \ | ||
86 | fpst1 = fpst0; \ | ||
87 | if (!(mask & 1)) { \ | ||
88 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) | ||
89 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
90 | continue; \ | ||
91 | } \ | ||
92 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
93 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
94 | &env->vfp.standard_fp_status; \ | ||
95 | if (!(mask & 1)) { \ | ||
96 | /* We need the result but without updating flags */ \ | ||
97 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) | ||
98 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
99 | continue; \ | ||
100 | } \ | ||
101 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
102 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
103 | &env->vfp.standard_fp_status; \ | ||
104 | if (!(mask & 1)) { \ | ||
105 | /* We need the result but without updating flags */ \ | ||
106 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) | ||
107 | TYPE *m = vm; \ | ||
108 | TYPE ra = (TYPE)ra_in; \ | ||
109 | float_status *fpst = (ESIZE == 2) ? \ | ||
110 | - &env->vfp.standard_fp_status_f16 : \ | ||
111 | + &env->vfp.fp_status[FPST_STD_F16] : \ | ||
112 | &env->vfp.standard_fp_status; \ | ||
113 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
114 | if (mask & 1) { \ | ||
115 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
116 | if ((mask & emask) == 0) { \ | ||
117 | continue; \ | ||
118 | } \ | ||
119 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
120 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
121 | &env->vfp.standard_fp_status; \ | ||
122 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
123 | /* We need the result but without updating flags */ \ | ||
124 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
125 | if ((mask & emask) == 0) { \ | ||
126 | continue; \ | ||
127 | } \ | ||
128 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
129 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
130 | &env->vfp.standard_fp_status; \ | ||
131 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
132 | /* We need the result but without updating flags */ \ | ||
133 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) | ||
134 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
135 | continue; \ | ||
136 | } \ | ||
137 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
138 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
139 | &env->vfp.standard_fp_status; \ | ||
140 | if (!(mask & 1)) { \ | ||
141 | /* We need the result but without updating flags */ \ | ||
142 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) | ||
143 | float_status *fpst; \ | ||
144 | float_status scratch_fpst; \ | ||
145 | float_status *base_fpst = (ESIZE == 2) ? \ | ||
146 | - &env->vfp.standard_fp_status_f16 : \ | ||
147 | + &env->vfp.fp_status[FPST_STD_F16] : \ | ||
148 | &env->vfp.standard_fp_status; \ | ||
149 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ | ||
150 | set_float_rounding_mode(rmode, base_fpst); \ | ||
151 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) | ||
152 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
153 | continue; \ | ||
154 | } \ | ||
155 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
156 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
157 | &env->vfp.standard_fp_status; \ | ||
158 | if (!(mask & 1)) { \ | ||
159 | /* We need the result but without updating flags */ \ | ||
160 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
161 | index XXXXXXX..XXXXXXX 100644 | ||
162 | --- a/target/arm/vfp_helper.c | ||
163 | +++ b/target/arm/vfp_helper.c | ||
164 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
165 | /* FZ16 does not generate an input denormal exception. */ | ||
166 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
167 | & ~float_flag_input_denormal_flushed); | ||
168 | - a32_flags |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) | ||
169 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) | ||
170 | & ~float_flag_input_denormal_flushed); | ||
171 | |||
172 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
173 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
174 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | ||
175 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
176 | set_float_exception_flags(0, &env->vfp.standard_fp_status); | ||
177 | - set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); | ||
178 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
179 | set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
180 | set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
181 | } | ||
182 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
183 | bool ftz_enabled = val & FPCR_FZ16; | ||
184 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
185 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
186 | - set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | ||
187 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
188 | set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
189 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
190 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
191 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | ||
192 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
193 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
194 | } | ||
195 | if (changed & FPCR_FZ) { | ||
196 | -- | ||
197 | 2.34.1 | ||
198 | |||
199 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Replace with fp_status[FPST_STD]. | ||
4 | |||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Message-id: 20250129013857.135256-9-richard.henderson@linaro.org | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | --- | ||
10 | target/arm/cpu.h | 1 - | ||
11 | target/arm/cpu.c | 8 ++++---- | ||
12 | target/arm/tcg/mve_helper.c | 28 ++++++++++++++-------------- | ||
13 | target/arm/tcg/vec_helper.c | 4 ++-- | ||
14 | target/arm/vfp_helper.c | 4 ++-- | ||
15 | 5 files changed, 22 insertions(+), 23 deletions(-) | ||
16 | |||
17 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/arm/cpu.h | ||
20 | +++ b/target/arm/cpu.h | ||
21 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
22 | float_status fp_status_f16_a64; | ||
23 | float_status ah_fp_status; | ||
24 | float_status ah_fp_status_f16; | ||
25 | - float_status standard_fp_status; | ||
26 | }; | ||
27 | }; | ||
28 | |||
29 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/target/arm/cpu.c | ||
32 | +++ b/target/arm/cpu.c | ||
33 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
34 | env->sau.ctrl = 0; | ||
35 | } | ||
36 | |||
37 | - set_flush_to_zero(1, &env->vfp.standard_fp_status); | ||
38 | - set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); | ||
39 | - set_default_nan_mode(1, &env->vfp.standard_fp_status); | ||
40 | + set_flush_to_zero(1, &env->vfp.fp_status[FPST_STD]); | ||
41 | + set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]); | ||
42 | + set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); | ||
43 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); | ||
44 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | ||
45 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
46 | - arm_set_default_fp_behaviours(&env->vfp.standard_fp_status); | ||
47 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
48 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
49 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
50 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
51 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/target/arm/tcg/mve_helper.c | ||
54 | +++ b/target/arm/tcg/mve_helper.c | ||
55 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) | ||
56 | continue; \ | ||
57 | } \ | ||
58 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
59 | - &env->vfp.standard_fp_status; \ | ||
60 | + &env->vfp.fp_status[FPST_STD]; \ | ||
61 | if (!(mask & 1)) { \ | ||
62 | /* We need the result but without updating flags */ \ | ||
63 | scratch_fpst = *fpst; \ | ||
64 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) | ||
65 | continue; \ | ||
66 | } \ | ||
67 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
68 | - &env->vfp.standard_fp_status; \ | ||
69 | + &env->vfp.fp_status[FPST_STD]; \ | ||
70 | if (!(tm & 1)) { \ | ||
71 | /* We need the result but without updating flags */ \ | ||
72 | scratch_fpst = *fpst; \ | ||
73 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) | ||
74 | continue; \ | ||
75 | } \ | ||
76 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
77 | - &env->vfp.standard_fp_status; \ | ||
78 | + &env->vfp.fp_status[FPST_STD]; \ | ||
79 | if (!(mask & 1)) { \ | ||
80 | /* We need the result but without updating flags */ \ | ||
81 | scratch_fpst = *fpst; \ | ||
82 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) | ||
83 | continue; \ | ||
84 | } \ | ||
85 | fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
86 | - &env->vfp.standard_fp_status; \ | ||
87 | + &env->vfp.fp_status[FPST_STD]; \ | ||
88 | fpst1 = fpst0; \ | ||
89 | if (!(mask & 1)) { \ | ||
90 | scratch_fpst = *fpst0; \ | ||
91 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) | ||
92 | continue; \ | ||
93 | } \ | ||
94 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
95 | - &env->vfp.standard_fp_status; \ | ||
96 | + &env->vfp.fp_status[FPST_STD]; \ | ||
97 | if (!(mask & 1)) { \ | ||
98 | /* We need the result but without updating flags */ \ | ||
99 | scratch_fpst = *fpst; \ | ||
100 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) | ||
101 | continue; \ | ||
102 | } \ | ||
103 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
104 | - &env->vfp.standard_fp_status; \ | ||
105 | + &env->vfp.fp_status[FPST_STD]; \ | ||
106 | if (!(mask & 1)) { \ | ||
107 | /* We need the result but without updating flags */ \ | ||
108 | scratch_fpst = *fpst; \ | ||
109 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) | ||
110 | TYPE ra = (TYPE)ra_in; \ | ||
111 | float_status *fpst = (ESIZE == 2) ? \ | ||
112 | &env->vfp.fp_status[FPST_STD_F16] : \ | ||
113 | - &env->vfp.standard_fp_status; \ | ||
114 | + &env->vfp.fp_status[FPST_STD]; \ | ||
115 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
116 | if (mask & 1) { \ | ||
117 | TYPE v = m[H##ESIZE(e)]; \ | ||
118 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
119 | continue; \ | ||
120 | } \ | ||
121 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
122 | - &env->vfp.standard_fp_status; \ | ||
123 | + &env->vfp.fp_status[FPST_STD]; \ | ||
124 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
125 | /* We need the result but without updating flags */ \ | ||
126 | scratch_fpst = *fpst; \ | ||
127 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
128 | continue; \ | ||
129 | } \ | ||
130 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
131 | - &env->vfp.standard_fp_status; \ | ||
132 | + &env->vfp.fp_status[FPST_STD]; \ | ||
133 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
134 | /* We need the result but without updating flags */ \ | ||
135 | scratch_fpst = *fpst; \ | ||
136 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) | ||
137 | continue; \ | ||
138 | } \ | ||
139 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
140 | - &env->vfp.standard_fp_status; \ | ||
141 | + &env->vfp.fp_status[FPST_STD]; \ | ||
142 | if (!(mask & 1)) { \ | ||
143 | /* We need the result but without updating flags */ \ | ||
144 | scratch_fpst = *fpst; \ | ||
145 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) | ||
146 | float_status scratch_fpst; \ | ||
147 | float_status *base_fpst = (ESIZE == 2) ? \ | ||
148 | &env->vfp.fp_status[FPST_STD_F16] : \ | ||
149 | - &env->vfp.standard_fp_status; \ | ||
150 | + &env->vfp.fp_status[FPST_STD]; \ | ||
151 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ | ||
152 | set_float_rounding_mode(rmode, base_fpst); \ | ||
153 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
154 | @@ -XXX,XX +XXX,XX @@ static void do_vcvt_sh(CPUARMState *env, void *vd, void *vm, int top) | ||
155 | unsigned e; | ||
156 | float_status *fpst; | ||
157 | float_status scratch_fpst; | ||
158 | - float_status *base_fpst = &env->vfp.standard_fp_status; | ||
159 | + float_status *base_fpst = &env->vfp.fp_status[FPST_STD]; | ||
160 | bool old_fz = get_flush_to_zero(base_fpst); | ||
161 | set_flush_to_zero(false, base_fpst); | ||
162 | for (e = 0; e < 16 / 4; e++, mask >>= 4) { | ||
163 | @@ -XXX,XX +XXX,XX @@ static void do_vcvt_hs(CPUARMState *env, void *vd, void *vm, int top) | ||
164 | unsigned e; | ||
165 | float_status *fpst; | ||
166 | float_status scratch_fpst; | ||
167 | - float_status *base_fpst = &env->vfp.standard_fp_status; | ||
168 | + float_status *base_fpst = &env->vfp.fp_status[FPST_STD]; | ||
169 | bool old_fiz = get_flush_inputs_to_zero(base_fpst); | ||
170 | set_flush_inputs_to_zero(false, base_fpst); | ||
171 | for (e = 0; e < 16 / 4; e++, mask >>= 4) { | ||
172 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) | ||
173 | continue; \ | ||
174 | } \ | ||
175 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
176 | - &env->vfp.standard_fp_status; \ | ||
177 | + &env->vfp.fp_status[FPST_STD]; \ | ||
178 | if (!(mask & 1)) { \ | ||
179 | /* We need the result but without updating flags */ \ | ||
180 | scratch_fpst = *fpst; \ | ||
181 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
182 | index XXXXXXX..XXXXXXX 100644 | ||
183 | --- a/target/arm/tcg/vec_helper.c | ||
184 | +++ b/target/arm/tcg/vec_helper.c | ||
185 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
186 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
187 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
188 | |||
189 | - do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, | ||
190 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
191 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
192 | } | ||
193 | |||
194 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | ||
195 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
196 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
197 | |||
198 | - do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, | ||
199 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
200 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
201 | } | ||
202 | |||
203 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
204 | index XXXXXXX..XXXXXXX 100644 | ||
205 | --- a/target/arm/vfp_helper.c | ||
206 | +++ b/target/arm/vfp_helper.c | ||
207 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
208 | uint32_t a32_flags = 0, a64_flags = 0; | ||
209 | |||
210 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
211 | - a32_flags |= get_float_exception_flags(&env->vfp.standard_fp_status); | ||
212 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); | ||
213 | /* FZ16 does not generate an input denormal exception. */ | ||
214 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
215 | & ~float_flag_input_denormal_flushed); | ||
216 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
217 | set_float_exception_flags(0, &env->vfp.fp_status_a64); | ||
218 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | ||
219 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
220 | - set_float_exception_flags(0, &env->vfp.standard_fp_status); | ||
221 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
222 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
223 | set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
224 | set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
225 | -- | ||
226 | 2.34.1 | ||
227 | |||
228 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Replace with fp_status[FPST_AH_F16]. | ||
4 | |||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Message-id: 20250129013857.135256-10-richard.henderson@linaro.org | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | --- | ||
10 | target/arm/cpu.h | 3 +-- | ||
11 | target/arm/cpu.c | 2 +- | ||
12 | target/arm/vfp_helper.c | 10 +++++----- | ||
13 | 3 files changed, 7 insertions(+), 8 deletions(-) | ||
14 | |||
15 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/arm/cpu.h | ||
18 | +++ b/target/arm/cpu.h | ||
19 | @@ -XXX,XX +XXX,XX @@ typedef struct NVICState NVICState; | ||
20 | * behaviour when FPCR.AH == 1: they don't update cumulative | ||
21 | * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
22 | * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
23 | - * which means we need an ah_fp_status_f16 as well. | ||
24 | + * which means we need an FPST_AH_F16 as well. | ||
25 | * | ||
26 | * To avoid having to transfer exception bits around, we simply | ||
27 | * say that the FPSCR cumulative exception flags are the logical | ||
28 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
29 | float_status fp_status_f16_a32; | ||
30 | float_status fp_status_f16_a64; | ||
31 | float_status ah_fp_status; | ||
32 | - float_status ah_fp_status_f16; | ||
33 | }; | ||
34 | }; | ||
35 | |||
36 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/arm/cpu.c | ||
39 | +++ b/target/arm/cpu.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
41 | arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | ||
42 | set_flush_to_zero(1, &env->vfp.ah_fp_status); | ||
43 | set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | ||
44 | - arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status_f16); | ||
45 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]); | ||
46 | |||
47 | #ifndef CONFIG_USER_ONLY | ||
48 | if (kvm_enabled()) { | ||
49 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/target/arm/vfp_helper.c | ||
52 | +++ b/target/arm/vfp_helper.c | ||
53 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
54 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
55 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
56 | /* | ||
57 | - * We do not merge in flags from ah_fp_status or ah_fp_status_f16, because | ||
58 | + * We do not merge in flags from ah_fp_status or FPST_AH_F16, because | ||
59 | * they are used for insns that must not set the cumulative exception bits. | ||
60 | */ | ||
61 | |||
62 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
63 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
64 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
65 | set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
66 | - set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
67 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]); | ||
68 | } | ||
69 | |||
70 | static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) | ||
71 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
72 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
73 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
74 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
75 | - set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
76 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
77 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
78 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
79 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
80 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
81 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
82 | } | ||
83 | if (changed & FPCR_FZ) { | ||
84 | bool ftz_enabled = val & FPCR_FZ; | ||
85 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
86 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
87 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
88 | set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); | ||
89 | - set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status_f16); | ||
90 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
91 | } | ||
92 | if (changed & FPCR_AH) { | ||
93 | bool ah_enabled = val & FPCR_AH; | ||
94 | -- | ||
95 | 2.34.1 | ||
96 | |||
97 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Replace with fp_status[FPST_AH]. | ||
4 | |||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Message-id: 20250129013857.135256-11-richard.henderson@linaro.org | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | --- | ||
10 | target/arm/cpu.h | 3 +-- | ||
11 | target/arm/cpu.c | 6 +++--- | ||
12 | target/arm/vfp_helper.c | 6 +++--- | ||
13 | 3 files changed, 7 insertions(+), 8 deletions(-) | ||
14 | |||
15 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/arm/cpu.h | ||
18 | +++ b/target/arm/cpu.h | ||
19 | @@ -XXX,XX +XXX,XX @@ typedef struct NVICState NVICState; | ||
20 | * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | ||
21 | * using a fixed value for it. | ||
22 | * | ||
23 | - * The ah_fp_status is needed because some insns have different | ||
24 | + * FPST_AH is needed because some insns have different | ||
25 | * behaviour when FPCR.AH == 1: they don't update cumulative | ||
26 | * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
27 | * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
28 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
29 | float_status fp_status_a64; | ||
30 | float_status fp_status_f16_a32; | ||
31 | float_status fp_status_f16_a64; | ||
32 | - float_status ah_fp_status; | ||
33 | }; | ||
34 | }; | ||
35 | |||
36 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/arm/cpu.c | ||
39 | +++ b/target/arm/cpu.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
41 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
42 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
43 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
44 | - arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | ||
45 | - set_flush_to_zero(1, &env->vfp.ah_fp_status); | ||
46 | - set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | ||
47 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); | ||
48 | + set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]); | ||
49 | + set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_AH]); | ||
50 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]); | ||
51 | |||
52 | #ifndef CONFIG_USER_ONLY | ||
53 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
54 | index XXXXXXX..XXXXXXX 100644 | ||
55 | --- a/target/arm/vfp_helper.c | ||
56 | +++ b/target/arm/vfp_helper.c | ||
57 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
58 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
59 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
60 | /* | ||
61 | - * We do not merge in flags from ah_fp_status or FPST_AH_F16, because | ||
62 | + * We do not merge in flags from FPST_AH or FPST_AH_F16, because | ||
63 | * they are used for insns that must not set the cumulative exception bits. | ||
64 | */ | ||
65 | |||
66 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
67 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
68 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
69 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
70 | - set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
71 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]); | ||
72 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]); | ||
73 | } | ||
74 | |||
75 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
76 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
77 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
78 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
79 | - set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); | ||
80 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | ||
81 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
82 | } | ||
83 | if (changed & FPCR_AH) { | ||
84 | -- | ||
85 | 2.34.1 | ||
86 | |||
87 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Replace with fp_status[FPST_A64_F16]. | ||
4 | |||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Message-id: 20250129013857.135256-12-richard.henderson@linaro.org | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | --- | ||
10 | target/arm/cpu.h | 1 - | ||
11 | target/arm/cpu.c | 2 +- | ||
12 | target/arm/tcg/sme_helper.c | 2 +- | ||
13 | target/arm/tcg/vec_helper.c | 9 ++++----- | ||
14 | target/arm/vfp_helper.c | 16 ++++++++-------- | ||
15 | 5 files changed, 14 insertions(+), 16 deletions(-) | ||
16 | |||
17 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/arm/cpu.h | ||
20 | +++ b/target/arm/cpu.h | ||
21 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
22 | float_status fp_status_a32; | ||
23 | float_status fp_status_a64; | ||
24 | float_status fp_status_f16_a32; | ||
25 | - float_status fp_status_f16_a64; | ||
26 | }; | ||
27 | }; | ||
28 | |||
29 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/target/arm/cpu.c | ||
32 | +++ b/target/arm/cpu.c | ||
33 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
34 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
35 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
36 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
37 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
38 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
39 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
40 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); | ||
41 | set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]); | ||
42 | diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/arm/tcg/sme_helper.c | ||
45 | +++ b/target/arm/tcg/sme_helper.c | ||
46 | @@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, | ||
47 | * produces default NaNs. We also need a second copy of fp_status with | ||
48 | * round-to-odd -- see above. | ||
49 | */ | ||
50 | - fpst_f16 = env->vfp.fp_status_f16_a64; | ||
51 | + fpst_f16 = env->vfp.fp_status[FPST_A64_F16]; | ||
52 | fpst_std = env->vfp.fp_status_a64; | ||
53 | set_default_nan_mode(true, &fpst_std); | ||
54 | set_default_nan_mode(true, &fpst_f16); | ||
55 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/arm/tcg/vec_helper.c | ||
58 | +++ b/target/arm/tcg/vec_helper.c | ||
59 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
60 | } | ||
61 | } | ||
62 | do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
63 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); | ||
64 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
65 | } | ||
66 | |||
67 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
68 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
69 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
70 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
71 | float_status *status = &env->vfp.fp_status_a64; | ||
72 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); | ||
73 | + bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
74 | int negx = 0, negf = 0; | ||
75 | |||
76 | if (is_s) { | ||
77 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
78 | } | ||
79 | } | ||
80 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
81 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); | ||
82 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
83 | } | ||
84 | |||
85 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
86 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
87 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
88 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | ||
89 | float_status *status = &env->vfp.fp_status_a64; | ||
90 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); | ||
91 | + bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
92 | int negx = 0, negf = 0; | ||
93 | |||
94 | if (is_s) { | ||
95 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
96 | negx = 0x8000; | ||
97 | } | ||
98 | } | ||
99 | - | ||
100 | for (i = 0; i < oprsz; i += 16) { | ||
101 | float16 mm_16 = *(float16 *)(vm + i + idx); | ||
102 | float32 mm = float16_to_float32_by_bits(mm_16, fz16); | ||
103 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
104 | index XXXXXXX..XXXXXXX 100644 | ||
105 | --- a/target/arm/vfp_helper.c | ||
106 | +++ b/target/arm/vfp_helper.c | ||
107 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
108 | & ~float_flag_input_denormal_flushed); | ||
109 | |||
110 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
111 | - a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
112 | + a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16]) | ||
113 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
114 | /* | ||
115 | * We do not merge in flags from FPST_AH or FPST_AH_F16, because | ||
116 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
117 | set_float_exception_flags(0, &env->vfp.fp_status_a32); | ||
118 | set_float_exception_flags(0, &env->vfp.fp_status_a64); | ||
119 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | ||
120 | - set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
121 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | ||
122 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
123 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
124 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]); | ||
125 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
126 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
127 | set_float_rounding_mode(i, &env->vfp.fp_status_a64); | ||
128 | set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32); | ||
129 | - set_float_rounding_mode(i, &env->vfp.fp_status_f16_a64); | ||
130 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
131 | } | ||
132 | if (changed & FPCR_FZ16) { | ||
133 | bool ftz_enabled = val & FPCR_FZ16; | ||
134 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
135 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
136 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
137 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
138 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
139 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
140 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
141 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
142 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
143 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
144 | } | ||
145 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
146 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
147 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
148 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
149 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
150 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
151 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | ||
152 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
153 | } | ||
154 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
155 | if (ah_enabled) { | ||
156 | /* Change behaviours for A64 FP operations */ | ||
157 | arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); | ||
158 | - arm_set_ah_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
159 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
160 | } else { | ||
161 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
162 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
163 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
164 | } | ||
165 | } | ||
166 | /* | ||
167 | -- | ||
168 | 2.34.1 | ||
169 | |||
170 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Replace with fp_status[FPST_A32_F16]. | ||
4 | |||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Message-id: 20250129013857.135256-13-richard.henderson@linaro.org | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | --- | ||
10 | target/arm/cpu.h | 1 - | ||
11 | target/arm/cpu.c | 2 +- | ||
12 | target/arm/tcg/vec_helper.c | 4 ++-- | ||
13 | target/arm/vfp_helper.c | 14 +++++++------- | ||
14 | 4 files changed, 10 insertions(+), 11 deletions(-) | ||
15 | |||
16 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/arm/cpu.h | ||
19 | +++ b/target/arm/cpu.h | ||
20 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
21 | struct { | ||
22 | float_status fp_status_a32; | ||
23 | float_status fp_status_a64; | ||
24 | - float_status fp_status_f16_a32; | ||
25 | }; | ||
26 | }; | ||
27 | |||
28 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/arm/cpu.c | ||
31 | +++ b/target/arm/cpu.c | ||
32 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
33 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | ||
34 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
35 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
36 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
37 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); | ||
38 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
39 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
40 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); | ||
41 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/arm/tcg/vec_helper.c | ||
44 | +++ b/target/arm/tcg/vec_helper.c | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
46 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
47 | |||
48 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
49 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
50 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); | ||
51 | } | ||
52 | |||
53 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
54 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | ||
55 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
56 | |||
57 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
58 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
59 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); | ||
60 | } | ||
61 | |||
62 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
63 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
64 | index XXXXXXX..XXXXXXX 100644 | ||
65 | --- a/target/arm/vfp_helper.c | ||
66 | +++ b/target/arm/vfp_helper.c | ||
67 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
68 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
69 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); | ||
70 | /* FZ16 does not generate an input denormal exception. */ | ||
71 | - a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
72 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16]) | ||
73 | & ~float_flag_input_denormal_flushed); | ||
74 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) | ||
75 | & ~float_flag_input_denormal_flushed); | ||
76 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
77 | */ | ||
78 | set_float_exception_flags(0, &env->vfp.fp_status_a32); | ||
79 | set_float_exception_flags(0, &env->vfp.fp_status_a64); | ||
80 | - set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | ||
81 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); | ||
82 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | ||
83 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
84 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
85 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
86 | } | ||
87 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
88 | set_float_rounding_mode(i, &env->vfp.fp_status_a64); | ||
89 | - set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32); | ||
90 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); | ||
91 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
92 | } | ||
93 | if (changed & FPCR_FZ16) { | ||
94 | bool ftz_enabled = val & FPCR_FZ16; | ||
95 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
96 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
97 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
98 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
99 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
100 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
101 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
102 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
103 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
104 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
105 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
106 | bool dnan_enabled = val & FPCR_DN; | ||
107 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
108 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
109 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
110 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
111 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
112 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | ||
113 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
114 | @@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ | ||
115 | softfloat_to_vfp_compare(env, \ | ||
116 | FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ | ||
117 | } | ||
118 | -DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16_a32) | ||
119 | +DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) | ||
120 | DO_VFP_cmp(s, float32, float32, fp_status_a32) | ||
121 | DO_VFP_cmp(d, float64, float64, fp_status_a32) | ||
122 | #undef DO_VFP_cmp | ||
123 | -- | ||
124 | 2.34.1 | ||
125 | |||
126 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Replace with fp_status[FPST_A64]. | ||
4 | |||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Message-id: 20250129013857.135256-14-richard.henderson@linaro.org | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | --- | ||
10 | target/arm/cpu.h | 1 - | ||
11 | target/arm/cpu.c | 2 +- | ||
12 | target/arm/tcg/sme_helper.c | 2 +- | ||
13 | target/arm/tcg/vec_helper.c | 10 +++++----- | ||
14 | target/arm/vfp_helper.c | 16 ++++++++-------- | ||
15 | 5 files changed, 15 insertions(+), 16 deletions(-) | ||
16 | |||
17 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/arm/cpu.h | ||
20 | +++ b/target/arm/cpu.h | ||
21 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
22 | float_status fp_status[FPST_COUNT]; | ||
23 | struct { | ||
24 | float_status fp_status_a32; | ||
25 | - float_status fp_status_a64; | ||
26 | }; | ||
27 | }; | ||
28 | |||
29 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/target/arm/cpu.c | ||
32 | +++ b/target/arm/cpu.c | ||
33 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
34 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); | ||
35 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); | ||
36 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | ||
37 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
38 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | ||
39 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
40 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); | ||
41 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
42 | diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/arm/tcg/sme_helper.c | ||
45 | +++ b/target/arm/tcg/sme_helper.c | ||
46 | @@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, | ||
47 | * round-to-odd -- see above. | ||
48 | */ | ||
49 | fpst_f16 = env->vfp.fp_status[FPST_A64_F16]; | ||
50 | - fpst_std = env->vfp.fp_status_a64; | ||
51 | + fpst_std = env->vfp.fp_status[FPST_A64]; | ||
52 | set_default_nan_mode(true, &fpst_std); | ||
53 | set_default_nan_mode(true, &fpst_f16); | ||
54 | fpst_odd = fpst_std; | ||
55 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/arm/tcg/vec_helper.c | ||
58 | +++ b/target/arm/tcg/vec_helper.c | ||
59 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
60 | negx = 0x8000800080008000ull; | ||
61 | } | ||
62 | } | ||
63 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
64 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
65 | get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
66 | } | ||
67 | |||
68 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
69 | intptr_t i, oprsz = simd_oprsz(desc); | ||
70 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
71 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
72 | - float_status *status = &env->vfp.fp_status_a64; | ||
73 | + float_status *status = &env->vfp.fp_status[FPST_A64]; | ||
74 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
75 | int negx = 0, negf = 0; | ||
76 | |||
77 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
78 | negx = 0x8000800080008000ull; | ||
79 | } | ||
80 | } | ||
81 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
82 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
83 | get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
84 | } | ||
85 | |||
86 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
87 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
88 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
89 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | ||
90 | - float_status *status = &env->vfp.fp_status_a64; | ||
91 | + float_status *status = &env->vfp.fp_status[FPST_A64]; | ||
92 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
93 | int negx = 0, negf = 0; | ||
94 | |||
95 | @@ -XXX,XX +XXX,XX @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp) | ||
96 | */ | ||
97 | bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF; | ||
98 | |||
99 | - *statusp = is_a64(env) ? env->vfp.fp_status_a64 : env->vfp.fp_status_a32; | ||
100 | + *statusp = is_a64(env) ? env->vfp.fp_status[FPST_A64] : env->vfp.fp_status_a32; | ||
101 | set_default_nan_mode(true, statusp); | ||
102 | |||
103 | if (ebf) { | ||
104 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
105 | index XXXXXXX..XXXXXXX 100644 | ||
106 | --- a/target/arm/vfp_helper.c | ||
107 | +++ b/target/arm/vfp_helper.c | ||
108 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
109 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) | ||
110 | & ~float_flag_input_denormal_flushed); | ||
111 | |||
112 | - a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
113 | + a64_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A64]); | ||
114 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16]) | ||
115 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
116 | /* | ||
117 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
118 | * be the architecturally up-to-date exception flag information first. | ||
119 | */ | ||
120 | set_float_exception_flags(0, &env->vfp.fp_status_a32); | ||
121 | - set_float_exception_flags(0, &env->vfp.fp_status_a64); | ||
122 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]); | ||
123 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); | ||
124 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | ||
125 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
126 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
127 | break; | ||
128 | } | ||
129 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
130 | - set_float_rounding_mode(i, &env->vfp.fp_status_a64); | ||
131 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]); | ||
132 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); | ||
133 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
134 | } | ||
135 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
136 | if (changed & FPCR_FZ) { | ||
137 | bool ftz_enabled = val & FPCR_FZ; | ||
138 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
139 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64); | ||
140 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]); | ||
141 | /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ | ||
142 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
143 | } | ||
144 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
145 | */ | ||
146 | bool fitz_enabled = (val & FPCR_FIZ) || | ||
147 | (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ; | ||
148 | - set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status_a64); | ||
149 | + set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_A64]); | ||
150 | } | ||
151 | if (changed & FPCR_DN) { | ||
152 | bool dnan_enabled = val & FPCR_DN; | ||
153 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
154 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
155 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]); | ||
156 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
157 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
158 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | ||
159 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
160 | |||
161 | if (ah_enabled) { | ||
162 | /* Change behaviours for A64 FP operations */ | ||
163 | - arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); | ||
164 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | ||
165 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
166 | } else { | ||
167 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
168 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | ||
169 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
170 | } | ||
171 | } | ||
172 | -- | ||
173 | 2.34.1 | ||
174 | |||
175 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Replace with fp_status[FPST_A32]. As this was the last of the | ||
4 | old structures, we can remove the anonymous union and struct. | ||
5 | |||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
8 | Message-id: 20250129013857.135256-15-richard.henderson@linaro.org | ||
9 | [PMM: tweak to account for change to is_ebf()] | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | --- | ||
12 | target/arm/cpu.h | 7 +------ | ||
13 | target/arm/cpu.c | 2 +- | ||
14 | target/arm/tcg/vec_helper.c | 2 +- | ||
15 | target/arm/vfp_helper.c | 18 +++++++++--------- | ||
16 | 4 files changed, 12 insertions(+), 17 deletions(-) | ||
17 | |||
18 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/arm/cpu.h | ||
21 | +++ b/target/arm/cpu.h | ||
22 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
23 | uint32_t scratch[8]; | ||
24 | |||
25 | /* There are a number of distinct float control structures. */ | ||
26 | - union { | ||
27 | - float_status fp_status[FPST_COUNT]; | ||
28 | - struct { | ||
29 | - float_status fp_status_a32; | ||
30 | - }; | ||
31 | - }; | ||
32 | + float_status fp_status[FPST_COUNT]; | ||
33 | |||
34 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ | ||
35 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ | ||
36 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/arm/cpu.c | ||
39 | +++ b/target/arm/cpu.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
41 | set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]); | ||
42 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); | ||
43 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); | ||
44 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | ||
45 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32]); | ||
46 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | ||
47 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
48 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); | ||
49 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/target/arm/tcg/vec_helper.c | ||
52 | +++ b/target/arm/tcg/vec_helper.c | ||
53 | @@ -XXX,XX +XXX,XX @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp) | ||
54 | */ | ||
55 | bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF; | ||
56 | |||
57 | - *statusp = is_a64(env) ? env->vfp.fp_status[FPST_A64] : env->vfp.fp_status_a32; | ||
58 | + *statusp = env->vfp.fp_status[is_a64(env) ? FPST_A64 : FPST_A32]; | ||
59 | set_default_nan_mode(true, statusp); | ||
60 | |||
61 | if (ebf) { | ||
62 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/arm/vfp_helper.c | ||
65 | +++ b/target/arm/vfp_helper.c | ||
66 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
67 | { | ||
68 | uint32_t a32_flags = 0, a64_flags = 0; | ||
69 | |||
70 | - a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
71 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A32]); | ||
72 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); | ||
73 | /* FZ16 does not generate an input denormal exception. */ | ||
74 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16]) | ||
75 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
76 | * values. The caller should have arranged for env->vfp.fpsr to | ||
77 | * be the architecturally up-to-date exception flag information first. | ||
78 | */ | ||
79 | - set_float_exception_flags(0, &env->vfp.fp_status_a32); | ||
80 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32]); | ||
81 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]); | ||
82 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); | ||
83 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | ||
84 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
85 | i = float_round_to_zero; | ||
86 | break; | ||
87 | } | ||
88 | - set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
89 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32]); | ||
90 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]); | ||
91 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); | ||
92 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
93 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
94 | } | ||
95 | if (changed & FPCR_FZ) { | ||
96 | bool ftz_enabled = val & FPCR_FZ; | ||
97 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
98 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); | ||
99 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]); | ||
100 | /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ | ||
101 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
102 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); | ||
103 | } | ||
104 | if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { | ||
105 | /* | ||
106 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
107 | } | ||
108 | if (changed & FPCR_DN) { | ||
109 | bool dnan_enabled = val & FPCR_DN; | ||
110 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
111 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32]); | ||
112 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]); | ||
113 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
114 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
115 | @@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ | ||
116 | FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ | ||
117 | } | ||
118 | DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) | ||
119 | -DO_VFP_cmp(s, float32, float32, fp_status_a32) | ||
120 | -DO_VFP_cmp(d, float64, float64, fp_status_a32) | ||
121 | +DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32]) | ||
122 | +DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32]) | ||
123 | #undef DO_VFP_cmp | ||
124 | |||
125 | /* Integer to float and float to integer conversions */ | ||
126 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(fjcvtzs)(float64 value, float_status *status) | ||
127 | |||
128 | uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env) | ||
129 | { | ||
130 | - uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status_a32); | ||
131 | + uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status[FPST_A32]); | ||
132 | uint32_t result = pair; | ||
133 | uint32_t z = (pair >> 32) == 0; | ||
134 | |||
135 | -- | ||
136 | 2.34.1 | ||
137 | |||
138 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Select on index instead of pointer. | ||
4 | No functional change. | ||
5 | |||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
8 | Message-id: 20250129013857.135256-16-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | --- | ||
11 | target/arm/tcg/mve_helper.c | 40 +++++++++++++------------------------ | ||
12 | 1 file changed, 14 insertions(+), 26 deletions(-) | ||
13 | |||
14 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/arm/tcg/mve_helper.c | ||
17 | +++ b/target/arm/tcg/mve_helper.c | ||
18 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) | ||
19 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
20 | continue; \ | ||
21 | } \ | ||
22 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
23 | - &env->vfp.fp_status[FPST_STD]; \ | ||
24 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
25 | if (!(mask & 1)) { \ | ||
26 | /* We need the result but without updating flags */ \ | ||
27 | scratch_fpst = *fpst; \ | ||
28 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) | ||
29 | r[e] = 0; \ | ||
30 | continue; \ | ||
31 | } \ | ||
32 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
33 | - &env->vfp.fp_status[FPST_STD]; \ | ||
34 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
35 | if (!(tm & 1)) { \ | ||
36 | /* We need the result but without updating flags */ \ | ||
37 | scratch_fpst = *fpst; \ | ||
38 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) | ||
39 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
40 | continue; \ | ||
41 | } \ | ||
42 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
43 | - &env->vfp.fp_status[FPST_STD]; \ | ||
44 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
45 | if (!(mask & 1)) { \ | ||
46 | /* We need the result but without updating flags */ \ | ||
47 | scratch_fpst = *fpst; \ | ||
48 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) | ||
49 | if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \ | ||
50 | continue; \ | ||
51 | } \ | ||
52 | - fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
53 | - &env->vfp.fp_status[FPST_STD]; \ | ||
54 | + fpst0 = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
55 | fpst1 = fpst0; \ | ||
56 | if (!(mask & 1)) { \ | ||
57 | scratch_fpst = *fpst0; \ | ||
58 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) | ||
59 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
60 | continue; \ | ||
61 | } \ | ||
62 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
63 | - &env->vfp.fp_status[FPST_STD]; \ | ||
64 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
65 | if (!(mask & 1)) { \ | ||
66 | /* We need the result but without updating flags */ \ | ||
67 | scratch_fpst = *fpst; \ | ||
68 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) | ||
69 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
70 | continue; \ | ||
71 | } \ | ||
72 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
73 | - &env->vfp.fp_status[FPST_STD]; \ | ||
74 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
75 | if (!(mask & 1)) { \ | ||
76 | /* We need the result but without updating flags */ \ | ||
77 | scratch_fpst = *fpst; \ | ||
78 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) | ||
79 | unsigned e; \ | ||
80 | TYPE *m = vm; \ | ||
81 | TYPE ra = (TYPE)ra_in; \ | ||
82 | - float_status *fpst = (ESIZE == 2) ? \ | ||
83 | - &env->vfp.fp_status[FPST_STD_F16] : \ | ||
84 | - &env->vfp.fp_status[FPST_STD]; \ | ||
85 | + float_status *fpst = \ | ||
86 | + &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
87 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
88 | if (mask & 1) { \ | ||
89 | TYPE v = m[H##ESIZE(e)]; \ | ||
90 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
91 | if ((mask & emask) == 0) { \ | ||
92 | continue; \ | ||
93 | } \ | ||
94 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
95 | - &env->vfp.fp_status[FPST_STD]; \ | ||
96 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
97 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
98 | /* We need the result but without updating flags */ \ | ||
99 | scratch_fpst = *fpst; \ | ||
100 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
101 | if ((mask & emask) == 0) { \ | ||
102 | continue; \ | ||
103 | } \ | ||
104 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
105 | - &env->vfp.fp_status[FPST_STD]; \ | ||
106 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
107 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
108 | /* We need the result but without updating flags */ \ | ||
109 | scratch_fpst = *fpst; \ | ||
110 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) | ||
111 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
112 | continue; \ | ||
113 | } \ | ||
114 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
115 | - &env->vfp.fp_status[FPST_STD]; \ | ||
116 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
117 | if (!(mask & 1)) { \ | ||
118 | /* We need the result but without updating flags */ \ | ||
119 | scratch_fpst = *fpst; \ | ||
120 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) | ||
121 | unsigned e; \ | ||
122 | float_status *fpst; \ | ||
123 | float_status scratch_fpst; \ | ||
124 | - float_status *base_fpst = (ESIZE == 2) ? \ | ||
125 | - &env->vfp.fp_status[FPST_STD_F16] : \ | ||
126 | - &env->vfp.fp_status[FPST_STD]; \ | ||
127 | + float_status *base_fpst = \ | ||
128 | + &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
129 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ | ||
130 | set_float_rounding_mode(rmode, base_fpst); \ | ||
131 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
132 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) | ||
133 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
134 | continue; \ | ||
135 | } \ | ||
136 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
137 | - &env->vfp.fp_status[FPST_STD]; \ | ||
138 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
139 | if (!(mask & 1)) { \ | ||
140 | /* We need the result but without updating flags */ \ | ||
141 | scratch_fpst = *fpst; \ | ||
142 | -- | ||
143 | 2.34.1 | ||
144 | |||
145 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Pass ARMFPStatusFlavour index instead of fp_status[FOO]. | ||
4 | |||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Message-id: 20250129013857.135256-17-richard.henderson@linaro.org | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | --- | ||
10 | target/arm/vfp_helper.c | 10 +++++----- | ||
11 | 1 file changed, 5 insertions(+), 5 deletions(-) | ||
12 | |||
13 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/arm/vfp_helper.c | ||
16 | +++ b/target/arm/vfp_helper.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp) | ||
18 | void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ | ||
19 | { \ | ||
20 | softfloat_to_vfp_compare(env, \ | ||
21 | - FLOATTYPE ## _compare_quiet(a, b, &env->vfp.FPST)); \ | ||
22 | + FLOATTYPE ## _compare_quiet(a, b, &env->vfp.fp_status[FPST])); \ | ||
23 | } \ | ||
24 | void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ | ||
25 | { \ | ||
26 | softfloat_to_vfp_compare(env, \ | ||
27 | - FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ | ||
28 | + FLOATTYPE ## _compare(a, b, &env->vfp.fp_status[FPST])); \ | ||
29 | } | ||
30 | -DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) | ||
31 | -DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32]) | ||
32 | -DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32]) | ||
33 | +DO_VFP_cmp(h, float16, dh_ctype_f16, FPST_A32_F16) | ||
34 | +DO_VFP_cmp(s, float32, float32, FPST_A32) | ||
35 | +DO_VFP_cmp(d, float64, float64, FPST_A32) | ||
36 | #undef DO_VFP_cmp | ||
37 | |||
38 | /* Integer to float and float to integer conversions */ | ||
39 | -- | ||
40 | 2.34.1 | ||
41 | |||
42 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Read the bit from the source, rather than from the proxy via | ||
4 | get_flush_inputs_to_zero. This makes it clear that it does | ||
5 | not matter which of the float_status structures is used. | ||
6 | |||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20250129013857.135256-34-richard.henderson@linaro.org | ||
9 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | --- | ||
12 | target/arm/tcg/vec_helper.c | 12 ++++++------ | ||
13 | 1 file changed, 6 insertions(+), 6 deletions(-) | ||
14 | |||
15 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/arm/tcg/vec_helper.c | ||
18 | +++ b/target/arm/tcg/vec_helper.c | ||
19 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
20 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
21 | |||
22 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
23 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); | ||
24 | + env->vfp.fpcr & FPCR_FZ16); | ||
25 | } | ||
26 | |||
27 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
28 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
29 | } | ||
30 | } | ||
31 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
32 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
33 | + env->vfp.fpcr & FPCR_FZ16); | ||
34 | } | ||
35 | |||
36 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
37 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
38 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
39 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
40 | float_status *status = &env->vfp.fp_status[FPST_A64]; | ||
41 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
42 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; | ||
43 | int negx = 0, negf = 0; | ||
44 | |||
45 | if (is_s) { | ||
46 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | ||
47 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
48 | |||
49 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
50 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); | ||
51 | + env->vfp.fpcr & FPCR_FZ16); | ||
52 | } | ||
53 | |||
54 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
55 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
56 | } | ||
57 | } | ||
58 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
59 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
60 | + env->vfp.fpcr & FPCR_FZ16); | ||
61 | } | ||
62 | |||
63 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
64 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
65 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
66 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | ||
67 | float_status *status = &env->vfp.fp_status[FPST_A64]; | ||
68 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
69 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; | ||
70 | int negx = 0, negf = 0; | ||
71 | |||
72 | if (is_s) { | ||
73 | -- | ||
74 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Sink common code from the callers into do_fmlal | ||
4 | and do_fmlal_idx. Reorder the arguments to minimize | ||
5 | the re-sorting from the caller's arguments. | ||
6 | |||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20250129013857.135256-35-richard.henderson@linaro.org | ||
9 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | --- | ||
12 | target/arm/tcg/vec_helper.c | 28 ++++++++++++++++------------ | ||
13 | 1 file changed, 16 insertions(+), 12 deletions(-) | ||
14 | |||
15 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/arm/tcg/vec_helper.c | ||
18 | +++ b/target/arm/tcg/vec_helper.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2) | ||
20 | * as there is not yet SVE versions that might use blocking. | ||
21 | */ | ||
22 | |||
23 | -static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, | ||
24 | - uint64_t negx, int negf, uint32_t desc, bool fz16) | ||
25 | +static void do_fmlal(float32 *d, void *vn, void *vm, | ||
26 | + CPUARMState *env, uint32_t desc, | ||
27 | + ARMFPStatusFlavour fpst_idx, | ||
28 | + uint64_t negx, int negf) | ||
29 | { | ||
30 | + float_status *fpst = &env->vfp.fp_status[fpst_idx]; | ||
31 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; | ||
32 | intptr_t i, oprsz = simd_oprsz(desc); | ||
33 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
34 | int is_q = oprsz == 16; | ||
35 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
36 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
37 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
38 | |||
39 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
40 | - env->vfp.fpcr & FPCR_FZ16); | ||
41 | + do_fmlal(vd, vn, vm, env, desc, FPST_STD, negx, 0); | ||
42 | } | ||
43 | |||
44 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
46 | negx = 0x8000800080008000ull; | ||
47 | } | ||
48 | } | ||
49 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
50 | - env->vfp.fpcr & FPCR_FZ16); | ||
51 | + do_fmlal(vd, vn, vm, env, desc, FPST_A64, negx, negf); | ||
52 | } | ||
53 | |||
54 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
55 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
56 | } | ||
57 | } | ||
58 | |||
59 | -static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, | ||
60 | - uint64_t negx, int negf, uint32_t desc, bool fz16) | ||
61 | +static void do_fmlal_idx(float32 *d, void *vn, void *vm, | ||
62 | + CPUARMState *env, uint32_t desc, | ||
63 | + ARMFPStatusFlavour fpst_idx, | ||
64 | + uint64_t negx, int negf) | ||
65 | { | ||
66 | + float_status *fpst = &env->vfp.fp_status[fpst_idx]; | ||
67 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; | ||
68 | intptr_t i, oprsz = simd_oprsz(desc); | ||
69 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
70 | int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3); | ||
71 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | ||
72 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
73 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
74 | |||
75 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
76 | - env->vfp.fpcr & FPCR_FZ16); | ||
77 | + do_fmlal_idx(vd, vn, vm, env, desc, FPST_STD, negx, 0); | ||
78 | } | ||
79 | |||
80 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
81 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
82 | negx = 0x8000800080008000ull; | ||
83 | } | ||
84 | } | ||
85 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
86 | - env->vfp.fpcr & FPCR_FZ16); | ||
87 | + do_fmlal_idx(vd, vn, vm, env, desc, FPST_A64, negx, negf); | ||
88 | } | ||
89 | |||
90 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
91 | -- | ||
92 | 2.34.1 | diff view generated by jsdifflib |