1 | Hi Peter, | 1 | Hi Peter, |
---|---|---|---|
2 | 2 | ||
3 | After reviewing your AFP patch set, and starting to rebase SME2 work | 3 | I know you've sent a PR with some of this, but I don't have a |
4 | on top of that (since I'd been skipping the FPCR.AH portion of SME2), | 4 | complete tree against which to rebase. So this is still |
5 | here are some cleanups which might be folded back into the original | ||
6 | change for FPCR.AH or left as separate follow-ups. | ||
7 | 5 | ||
8 | All of the FPST frobbing is because I add FPST_ZA (not included here) | 6 | Based-on: 20250124162836.2332150-1-peter.maydell@linaro.org |
9 | which is like FPST_FPCR_A64, but with default-nans and whose cumulative | 7 | ("[PATCH 00/76] target/arm: Implement FEAT_AFP and FEAT_RPRES") |
10 | exception flags are ignored. Thus it does not overlap FPST_FPCR_AH | ||
11 | semantics. | ||
12 | 8 | ||
13 | I've not tested this extraction heavily (i.e. just make check). | 9 | Up to patch 22 is unchanged; after patch 22 is all new. |
14 | Do you have further tests for AH=1? | 10 | |
11 | Most of them simplify floatN_muladd to use float_muladd_negate_*. | ||
12 | Five of them (25, 26, 27, 31, 32) address functions missed in | ||
13 | your v1 patchset. | ||
14 | |||
15 | Patch 30 was easier to write with the reversion in patch 29 first. | ||
16 | Obviously I wouldn't think you'd apply as-is. | ||
15 | 17 | ||
16 | 18 | ||
17 | r~ | 19 | r~ |
18 | 20 | ||
19 | 21 | ||
20 | Based-on: 20250124162836.2332150-1-peter.maydell@linaro.org | 22 | Richard Henderson (34): |
21 | ("[PATCH 00/76] target/arm: Implement FEAT_AFP and FEAT_RPRES") | ||
22 | |||
23 | |||
24 | Richard Henderson (22): | ||
25 | target/arm: Rename FPST_FPCR_A32 to FPST_A32 | 23 | target/arm: Rename FPST_FPCR_A32 to FPST_A32 |
26 | target/arm: Rename FPST_FPCR_A64 to FPST_A64 | 24 | target/arm: Rename FPST_FPCR_A64 to FPST_A64 |
27 | target/arm: Rename FPST_FPCR_F16_A32 to FPST_A32_F16 | 25 | target/arm: Rename FPST_FPCR_F16_A32 to FPST_A32_F16 |
28 | target/arm: Rename FPST_FPCR_F16_A64 to FPST_A64_F16 | 26 | target/arm: Rename FPST_FPCR_F16_A64 to FPST_A64_F16 |
29 | target/arm: Rename FPST_FPCR_AH* to FPST_AH* | 27 | target/arm: Rename FPST_FPCR_AH* to FPST_AH* |
... | ... | ||
39 | target/arm: Simplify fp_status indexing in mve_helper.c | 37 | target/arm: Simplify fp_status indexing in mve_helper.c |
40 | target/arm: Simplify DO_VFP_cmp in vfp_helper.c | 38 | target/arm: Simplify DO_VFP_cmp in vfp_helper.c |
41 | target/arm: Move float*_ah_chs to vec_internal.h | 39 | target/arm: Move float*_ah_chs to vec_internal.h |
42 | target/arm: Introduce float*_maybe_ah_chs | 40 | target/arm: Introduce float*_maybe_ah_chs |
43 | target/arm: Use float*_maybe_ah_chs in sve_ftssel_* | 41 | target/arm: Use float*_maybe_ah_chs in sve_ftssel_* |
44 | target/arm: Use float*_maybe_ah_chs in sve_ftmad_* | 42 | target/arm: Use float*_maybe_ah_chs in sve_fcadd_* |
45 | target/arm: Use float*_maybe_ah_chs in sve_ftmad_* | 43 | target/arm: Use float*_maybe_ah_chs in sve_fcadd_* |
46 | target/arm: Use flags for AH negation in do_fmla_zpzzz_* | 44 | target/arm: Use flags for AH negation in do_fmla_zpzzz_* |
45 | target/arm: Use flags for AH negation in sve_ftmad_* | ||
46 | target/arm: Use flags for AH negation in float*_ah_mulsub_f | ||
47 | target/arm: Handle FPCR.AH in gvec_fcmla[hsd] | ||
48 | target/arm: Handle FPCR.AH in gvec_fcmla[hs]_idx | ||
49 | target/arm: Handle FPCR.AH in sve_fcmla_zpzzz_* | ||
50 | target/arm: Split gvec_fmla_idx_* for fmls and ah_fmls | ||
51 | Revert "target/arm: Handle FPCR.AH in FMLSL" | ||
52 | target/arm: Handle FPCR.AH in gvec_fmlal_a64 | ||
53 | target/arm: Handle FPCR.AH in sve2_fmlal_zzxw_s | ||
54 | target/arm: Handle FPCR.AH in sve2_fmlal_zzzw_s | ||
55 | target/arm: Read fz16 from env->vfp.fpcr | ||
56 | target/arm: Sink fp_status and fpcr access into do_fmlal* | ||
47 | 57 | ||
48 | target/arm/cpu.h | 107 ++++++++++++----------- | 58 | target/arm/cpu.h | 107 +++++----- |
49 | target/arm/tcg/translate.h | 68 +-------------- | 59 | target/arm/helper.h | 14 ++ |
50 | target/arm/tcg/vec_internal.h | 35 ++++++++ | 60 | target/arm/tcg/translate.h | 68 +------ |
51 | target/arm/cpu.c | 28 +++--- | 61 | target/arm/tcg/vec_internal.h | 35 ++++ |
52 | target/arm/tcg/helper-a64.c | 15 +--- | 62 | target/arm/cpu.c | 28 +-- |
53 | target/arm/tcg/mve_helper.c | 44 ++++------ | 63 | target/arm/tcg/helper-a64.c | 15 +- |
64 | target/arm/tcg/mve_helper.c | 44 ++--- | ||
54 | target/arm/tcg/sme_helper.c | 4 +- | 65 | target/arm/tcg/sme_helper.c | 4 +- |
55 | target/arm/tcg/sve_helper.c | 150 ++++++++++++++------------------- | 66 | target/arm/tcg/sve_helper.c | 234 +++++++++++----------- |
56 | target/arm/tcg/translate-a64.c | 100 +++++++++++----------- | 67 | target/arm/tcg/translate-a64.c | 125 ++++++------ |
57 | target/arm/tcg/translate-sme.c | 4 +- | 68 | target/arm/tcg/translate-sme.c | 4 +- |
58 | target/arm/tcg/translate-sve.c | 126 +++++++++++++-------------- | 69 | target/arm/tcg/translate-sve.c | 157 +++++++-------- |
59 | target/arm/tcg/translate-vfp.c | 78 ++++++++--------- | 70 | target/arm/tcg/translate-vfp.c | 78 ++++---- |
60 | target/arm/tcg/vec_helper.c | 26 +++--- | 71 | target/arm/tcg/vec_helper.c | 346 +++++++++++++++++---------------- |
61 | target/arm/vfp_helper.c | 94 ++++++++++----------- | 72 | target/arm/vfp_helper.c | 94 ++++----- |
62 | 14 files changed, 405 insertions(+), 474 deletions(-) | 73 | 15 files changed, 681 insertions(+), 672 deletions(-) |
63 | 74 | ||
64 | -- | 75 | -- |
65 | 2.43.0 | 76 | 2.43.0 | diff view generated by jsdifflib |
1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | --- | 2 | --- |
3 | target/arm/tcg/translate.h | 6 ++-- | 3 | target/arm/tcg/translate.h | 6 ++-- |
4 | target/arm/tcg/translate-vfp.c | 54 +++++++++++++++++----------------- | 4 | target/arm/tcg/translate-vfp.c | 54 +++++++++++++++++----------------- |
5 | 2 files changed, 30 insertions(+), 30 deletions(-) | 5 | 2 files changed, 30 insertions(+), 30 deletions(-) |
6 | 6 | ||
7 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | 7 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h |
8 | index XXXXXXX..XXXXXXX 100644 | 8 | index XXXXXXX..XXXXXXX 100644 |
9 | --- a/target/arm/tcg/translate.h | 9 | --- a/target/arm/tcg/translate.h |
10 | +++ b/target/arm/tcg/translate.h | 10 | +++ b/target/arm/tcg/translate.h |
11 | @@ -XXX,XX +XXX,XX @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb) | 11 | @@ -XXX,XX +XXX,XX @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb) |
12 | * Enum for argument to fpstatus_ptr(). | 12 | * Enum for argument to fpstatus_ptr(). |
13 | */ | 13 | */ |
14 | typedef enum ARMFPStatusFlavour { | 14 | typedef enum ARMFPStatusFlavour { |
15 | - FPST_FPCR_A32, | 15 | - FPST_FPCR_A32, |
16 | + FPST_A32, | 16 | + FPST_A32, |
17 | FPST_FPCR_A64, | 17 | FPST_FPCR_A64, |
18 | FPST_FPCR_F16_A32, | 18 | FPST_FPCR_F16_A32, |
19 | FPST_FPCR_F16_A64, | 19 | FPST_FPCR_F16_A64, |
20 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { | 20 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { |
21 | * been set up to point to the requested field in the CPU state struct. | 21 | * been set up to point to the requested field in the CPU state struct. |
22 | * The options are: | 22 | * The options are: |
23 | * | 23 | * |
24 | - * FPST_FPCR_A32 | 24 | - * FPST_FPCR_A32 |
25 | + * FPST_A32 | 25 | + * FPST_A32 |
26 | * for AArch32 non-FP16 operations controlled by the FPCR | 26 | * for AArch32 non-FP16 operations controlled by the FPCR |
27 | * FPST_FPCR_A64 | 27 | * FPST_FPCR_A64 |
28 | * for AArch64 non-FP16 operations controlled by the FPCR | 28 | * for AArch64 non-FP16 operations controlled by the FPCR |
29 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) | 29 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) |
30 | int offset; | 30 | int offset; |
31 | 31 | ||
32 | switch (flavour) { | 32 | switch (flavour) { |
33 | - case FPST_FPCR_A32: | 33 | - case FPST_FPCR_A32: |
34 | + case FPST_A32: | 34 | + case FPST_A32: |
35 | offset = offsetof(CPUARMState, vfp.fp_status_a32); | 35 | offset = offsetof(CPUARMState, vfp.fp_status_a32); |
36 | break; | 36 | break; |
37 | case FPST_FPCR_A64: | 37 | case FPST_FPCR_A64: |
38 | diff --git a/target/arm/tcg/translate-vfp.c b/target/arm/tcg/translate-vfp.c | 38 | diff --git a/target/arm/tcg/translate-vfp.c b/target/arm/tcg/translate-vfp.c |
39 | index XXXXXXX..XXXXXXX 100644 | 39 | index XXXXXXX..XXXXXXX 100644 |
40 | --- a/target/arm/tcg/translate-vfp.c | 40 | --- a/target/arm/tcg/translate-vfp.c |
41 | +++ b/target/arm/tcg/translate-vfp.c | 41 | +++ b/target/arm/tcg/translate-vfp.c |
42 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a) | 42 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a) |
43 | if (sz == 1) { | 43 | if (sz == 1) { |
44 | fpst = fpstatus_ptr(FPST_FPCR_F16_A32); | 44 | fpst = fpstatus_ptr(FPST_FPCR_F16_A32); |
45 | } else { | 45 | } else { |
46 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 46 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
47 | + fpst = fpstatus_ptr(FPST_A32); | 47 | + fpst = fpstatus_ptr(FPST_A32); |
48 | } | 48 | } |
49 | 49 | ||
50 | tcg_rmode = gen_set_rmode(rounding, fpst); | 50 | tcg_rmode = gen_set_rmode(rounding, fpst); |
51 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a) | 51 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a) |
52 | if (sz == 1) { | 52 | if (sz == 1) { |
53 | fpst = fpstatus_ptr(FPST_FPCR_F16_A32); | 53 | fpst = fpstatus_ptr(FPST_FPCR_F16_A32); |
54 | } else { | 54 | } else { |
55 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 55 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
56 | + fpst = fpstatus_ptr(FPST_A32); | 56 | + fpst = fpstatus_ptr(FPST_A32); |
57 | } | 57 | } |
58 | 58 | ||
59 | tcg_shift = tcg_constant_i32(0); | 59 | tcg_shift = tcg_constant_i32(0); |
60 | @@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn, | 60 | @@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn, |
61 | f0 = tcg_temp_new_i32(); | 61 | f0 = tcg_temp_new_i32(); |
62 | f1 = tcg_temp_new_i32(); | 62 | f1 = tcg_temp_new_i32(); |
63 | fd = tcg_temp_new_i32(); | 63 | fd = tcg_temp_new_i32(); |
64 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 64 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
65 | + fpst = fpstatus_ptr(FPST_A32); | 65 | + fpst = fpstatus_ptr(FPST_A32); |
66 | 66 | ||
67 | vfp_load_reg32(f0, vn); | 67 | vfp_load_reg32(f0, vn); |
68 | vfp_load_reg32(f1, vm); | 68 | vfp_load_reg32(f1, vm); |
69 | @@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn, | 69 | @@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn, |
70 | f0 = tcg_temp_new_i64(); | 70 | f0 = tcg_temp_new_i64(); |
71 | f1 = tcg_temp_new_i64(); | 71 | f1 = tcg_temp_new_i64(); |
72 | fd = tcg_temp_new_i64(); | 72 | fd = tcg_temp_new_i64(); |
73 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 73 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
74 | + fpst = fpstatus_ptr(FPST_A32); | 74 | + fpst = fpstatus_ptr(FPST_A32); |
75 | 75 | ||
76 | vfp_load_reg64(f0, vn); | 76 | vfp_load_reg64(f0, vn); |
77 | vfp_load_reg64(f1, vm); | 77 | vfp_load_reg64(f1, vm); |
78 | @@ -XXX,XX +XXX,XX @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) | 78 | @@ -XXX,XX +XXX,XX @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) |
79 | /* VFNMA, VFNMS */ | 79 | /* VFNMA, VFNMS */ |
80 | gen_vfp_negs(vd, vd); | 80 | gen_vfp_negs(vd, vd); |
81 | } | 81 | } |
82 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 82 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
83 | + fpst = fpstatus_ptr(FPST_A32); | 83 | + fpst = fpstatus_ptr(FPST_A32); |
84 | gen_helper_vfp_muladds(vd, vn, vm, vd, fpst); | 84 | gen_helper_vfp_muladds(vd, vn, vm, vd, fpst); |
85 | vfp_store_reg32(vd, a->vd); | 85 | vfp_store_reg32(vd, a->vd); |
86 | return true; | 86 | return true; |
87 | @@ -XXX,XX +XXX,XX @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d) | 87 | @@ -XXX,XX +XXX,XX @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d) |
88 | /* VFNMA, VFNMS */ | 88 | /* VFNMA, VFNMS */ |
89 | gen_vfp_negd(vd, vd); | 89 | gen_vfp_negd(vd, vd); |
90 | } | 90 | } |
91 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 91 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
92 | + fpst = fpstatus_ptr(FPST_A32); | 92 | + fpst = fpstatus_ptr(FPST_A32); |
93 | gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst); | 93 | gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst); |
94 | vfp_store_reg64(vd, a->vd); | 94 | vfp_store_reg64(vd, a->vd); |
95 | return true; | 95 | return true; |
96 | @@ -XXX,XX +XXX,XX @@ static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm) | 96 | @@ -XXX,XX +XXX,XX @@ static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm) |
97 | 97 | ||
98 | static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm) | 98 | static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm) |
99 | { | 99 | { |
100 | - gen_helper_vfp_sqrts(vd, vm, fpstatus_ptr(FPST_FPCR_A32)); | 100 | - gen_helper_vfp_sqrts(vd, vm, fpstatus_ptr(FPST_FPCR_A32)); |
101 | + gen_helper_vfp_sqrts(vd, vm, fpstatus_ptr(FPST_A32)); | 101 | + gen_helper_vfp_sqrts(vd, vm, fpstatus_ptr(FPST_A32)); |
102 | } | 102 | } |
103 | 103 | ||
104 | static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm) | 104 | static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm) |
105 | { | 105 | { |
106 | - gen_helper_vfp_sqrtd(vd, vm, fpstatus_ptr(FPST_FPCR_A32)); | 106 | - gen_helper_vfp_sqrtd(vd, vm, fpstatus_ptr(FPST_FPCR_A32)); |
107 | + gen_helper_vfp_sqrtd(vd, vm, fpstatus_ptr(FPST_A32)); | 107 | + gen_helper_vfp_sqrtd(vd, vm, fpstatus_ptr(FPST_A32)); |
108 | } | 108 | } |
109 | 109 | ||
110 | DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith) | 110 | DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith) |
111 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a) | 111 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a) |
112 | return true; | 112 | return true; |
113 | } | 113 | } |
114 | 114 | ||
115 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 115 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
116 | + fpst = fpstatus_ptr(FPST_A32); | 116 | + fpst = fpstatus_ptr(FPST_A32); |
117 | ahp_mode = get_ahp_flag(); | 117 | ahp_mode = get_ahp_flag(); |
118 | tmp = tcg_temp_new_i32(); | 118 | tmp = tcg_temp_new_i32(); |
119 | /* The T bit tells us if we want the low or high 16 bits of Vm */ | 119 | /* The T bit tells us if we want the low or high 16 bits of Vm */ |
120 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a) | 120 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a) |
121 | return true; | 121 | return true; |
122 | } | 122 | } |
123 | 123 | ||
124 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 124 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
125 | + fpst = fpstatus_ptr(FPST_A32); | 125 | + fpst = fpstatus_ptr(FPST_A32); |
126 | ahp_mode = get_ahp_flag(); | 126 | ahp_mode = get_ahp_flag(); |
127 | tmp = tcg_temp_new_i32(); | 127 | tmp = tcg_temp_new_i32(); |
128 | /* The T bit tells us if we want the low or high 16 bits of Vm */ | 128 | /* The T bit tells us if we want the low or high 16 bits of Vm */ |
129 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_b16_f32(DisasContext *s, arg_VCVT_b16_f32 *a) | 129 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_b16_f32(DisasContext *s, arg_VCVT_b16_f32 *a) |
130 | return true; | 130 | return true; |
131 | } | 131 | } |
132 | 132 | ||
133 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 133 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
134 | + fpst = fpstatus_ptr(FPST_A32); | 134 | + fpst = fpstatus_ptr(FPST_A32); |
135 | tmp = tcg_temp_new_i32(); | 135 | tmp = tcg_temp_new_i32(); |
136 | 136 | ||
137 | vfp_load_reg32(tmp, a->vm); | 137 | vfp_load_reg32(tmp, a->vm); |
138 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a) | 138 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a) |
139 | return true; | 139 | return true; |
140 | } | 140 | } |
141 | 141 | ||
142 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 142 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
143 | + fpst = fpstatus_ptr(FPST_A32); | 143 | + fpst = fpstatus_ptr(FPST_A32); |
144 | ahp_mode = get_ahp_flag(); | 144 | ahp_mode = get_ahp_flag(); |
145 | tmp = tcg_temp_new_i32(); | 145 | tmp = tcg_temp_new_i32(); |
146 | 146 | ||
147 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a) | 147 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a) |
148 | return true; | 148 | return true; |
149 | } | 149 | } |
150 | 150 | ||
151 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 151 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
152 | + fpst = fpstatus_ptr(FPST_A32); | 152 | + fpst = fpstatus_ptr(FPST_A32); |
153 | ahp_mode = get_ahp_flag(); | 153 | ahp_mode = get_ahp_flag(); |
154 | tmp = tcg_temp_new_i32(); | 154 | tmp = tcg_temp_new_i32(); |
155 | vm = tcg_temp_new_i64(); | 155 | vm = tcg_temp_new_i64(); |
156 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a) | 156 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a) |
157 | 157 | ||
158 | tmp = tcg_temp_new_i32(); | 158 | tmp = tcg_temp_new_i32(); |
159 | vfp_load_reg32(tmp, a->vm); | 159 | vfp_load_reg32(tmp, a->vm); |
160 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 160 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
161 | + fpst = fpstatus_ptr(FPST_A32); | 161 | + fpst = fpstatus_ptr(FPST_A32); |
162 | gen_helper_rints(tmp, tmp, fpst); | 162 | gen_helper_rints(tmp, tmp, fpst); |
163 | vfp_store_reg32(tmp, a->vd); | 163 | vfp_store_reg32(tmp, a->vd); |
164 | return true; | 164 | return true; |
165 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a) | 165 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a) |
166 | 166 | ||
167 | tmp = tcg_temp_new_i64(); | 167 | tmp = tcg_temp_new_i64(); |
168 | vfp_load_reg64(tmp, a->vm); | 168 | vfp_load_reg64(tmp, a->vm); |
169 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 169 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
170 | + fpst = fpstatus_ptr(FPST_A32); | 170 | + fpst = fpstatus_ptr(FPST_A32); |
171 | gen_helper_rintd(tmp, tmp, fpst); | 171 | gen_helper_rintd(tmp, tmp, fpst); |
172 | vfp_store_reg64(tmp, a->vd); | 172 | vfp_store_reg64(tmp, a->vd); |
173 | return true; | 173 | return true; |
174 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a) | 174 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a) |
175 | 175 | ||
176 | tmp = tcg_temp_new_i32(); | 176 | tmp = tcg_temp_new_i32(); |
177 | vfp_load_reg32(tmp, a->vm); | 177 | vfp_load_reg32(tmp, a->vm); |
178 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 178 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
179 | + fpst = fpstatus_ptr(FPST_A32); | 179 | + fpst = fpstatus_ptr(FPST_A32); |
180 | tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst); | 180 | tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst); |
181 | gen_helper_rints(tmp, tmp, fpst); | 181 | gen_helper_rints(tmp, tmp, fpst); |
182 | gen_restore_rmode(tcg_rmode, fpst); | 182 | gen_restore_rmode(tcg_rmode, fpst); |
183 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a) | 183 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a) |
184 | 184 | ||
185 | tmp = tcg_temp_new_i64(); | 185 | tmp = tcg_temp_new_i64(); |
186 | vfp_load_reg64(tmp, a->vm); | 186 | vfp_load_reg64(tmp, a->vm); |
187 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 187 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
188 | + fpst = fpstatus_ptr(FPST_A32); | 188 | + fpst = fpstatus_ptr(FPST_A32); |
189 | tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst); | 189 | tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst); |
190 | gen_helper_rintd(tmp, tmp, fpst); | 190 | gen_helper_rintd(tmp, tmp, fpst); |
191 | gen_restore_rmode(tcg_rmode, fpst); | 191 | gen_restore_rmode(tcg_rmode, fpst); |
192 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a) | 192 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a) |
193 | 193 | ||
194 | tmp = tcg_temp_new_i32(); | 194 | tmp = tcg_temp_new_i32(); |
195 | vfp_load_reg32(tmp, a->vm); | 195 | vfp_load_reg32(tmp, a->vm); |
196 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 196 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
197 | + fpst = fpstatus_ptr(FPST_A32); | 197 | + fpst = fpstatus_ptr(FPST_A32); |
198 | gen_helper_rints_exact(tmp, tmp, fpst); | 198 | gen_helper_rints_exact(tmp, tmp, fpst); |
199 | vfp_store_reg32(tmp, a->vd); | 199 | vfp_store_reg32(tmp, a->vd); |
200 | return true; | 200 | return true; |
201 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a) | 201 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a) |
202 | 202 | ||
203 | tmp = tcg_temp_new_i64(); | 203 | tmp = tcg_temp_new_i64(); |
204 | vfp_load_reg64(tmp, a->vm); | 204 | vfp_load_reg64(tmp, a->vm); |
205 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 205 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
206 | + fpst = fpstatus_ptr(FPST_A32); | 206 | + fpst = fpstatus_ptr(FPST_A32); |
207 | gen_helper_rintd_exact(tmp, tmp, fpst); | 207 | gen_helper_rintd_exact(tmp, tmp, fpst); |
208 | vfp_store_reg64(tmp, a->vd); | 208 | vfp_store_reg64(tmp, a->vd); |
209 | return true; | 209 | return true; |
210 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a) | 210 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a) |
211 | vm = tcg_temp_new_i32(); | 211 | vm = tcg_temp_new_i32(); |
212 | vd = tcg_temp_new_i64(); | 212 | vd = tcg_temp_new_i64(); |
213 | vfp_load_reg32(vm, a->vm); | 213 | vfp_load_reg32(vm, a->vm); |
214 | - gen_helper_vfp_fcvtds(vd, vm, fpstatus_ptr(FPST_FPCR_A32)); | 214 | - gen_helper_vfp_fcvtds(vd, vm, fpstatus_ptr(FPST_FPCR_A32)); |
215 | + gen_helper_vfp_fcvtds(vd, vm, fpstatus_ptr(FPST_A32)); | 215 | + gen_helper_vfp_fcvtds(vd, vm, fpstatus_ptr(FPST_A32)); |
216 | vfp_store_reg64(vd, a->vd); | 216 | vfp_store_reg64(vd, a->vd); |
217 | return true; | 217 | return true; |
218 | } | 218 | } |
219 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a) | 219 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a) |
220 | vd = tcg_temp_new_i32(); | 220 | vd = tcg_temp_new_i32(); |
221 | vm = tcg_temp_new_i64(); | 221 | vm = tcg_temp_new_i64(); |
222 | vfp_load_reg64(vm, a->vm); | 222 | vfp_load_reg64(vm, a->vm); |
223 | - gen_helper_vfp_fcvtsd(vd, vm, fpstatus_ptr(FPST_FPCR_A32)); | 223 | - gen_helper_vfp_fcvtsd(vd, vm, fpstatus_ptr(FPST_FPCR_A32)); |
224 | + gen_helper_vfp_fcvtsd(vd, vm, fpstatus_ptr(FPST_A32)); | 224 | + gen_helper_vfp_fcvtsd(vd, vm, fpstatus_ptr(FPST_A32)); |
225 | vfp_store_reg32(vd, a->vd); | 225 | vfp_store_reg32(vd, a->vd); |
226 | return true; | 226 | return true; |
227 | } | 227 | } |
228 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a) | 228 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a) |
229 | 229 | ||
230 | vm = tcg_temp_new_i32(); | 230 | vm = tcg_temp_new_i32(); |
231 | vfp_load_reg32(vm, a->vm); | 231 | vfp_load_reg32(vm, a->vm); |
232 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 232 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
233 | + fpst = fpstatus_ptr(FPST_A32); | 233 | + fpst = fpstatus_ptr(FPST_A32); |
234 | if (a->s) { | 234 | if (a->s) { |
235 | /* i32 -> f32 */ | 235 | /* i32 -> f32 */ |
236 | gen_helper_vfp_sitos(vm, vm, fpst); | 236 | gen_helper_vfp_sitos(vm, vm, fpst); |
237 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a) | 237 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a) |
238 | vm = tcg_temp_new_i32(); | 238 | vm = tcg_temp_new_i32(); |
239 | vd = tcg_temp_new_i64(); | 239 | vd = tcg_temp_new_i64(); |
240 | vfp_load_reg32(vm, a->vm); | 240 | vfp_load_reg32(vm, a->vm); |
241 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 241 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
242 | + fpst = fpstatus_ptr(FPST_A32); | 242 | + fpst = fpstatus_ptr(FPST_A32); |
243 | if (a->s) { | 243 | if (a->s) { |
244 | /* i32 -> f64 */ | 244 | /* i32 -> f64 */ |
245 | gen_helper_vfp_sitod(vd, vm, fpst); | 245 | gen_helper_vfp_sitod(vd, vm, fpst); |
246 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a) | 246 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a) |
247 | vd = tcg_temp_new_i32(); | 247 | vd = tcg_temp_new_i32(); |
248 | vfp_load_reg32(vd, a->vd); | 248 | vfp_load_reg32(vd, a->vd); |
249 | 249 | ||
250 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 250 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
251 | + fpst = fpstatus_ptr(FPST_A32); | 251 | + fpst = fpstatus_ptr(FPST_A32); |
252 | shift = tcg_constant_i32(frac_bits); | 252 | shift = tcg_constant_i32(frac_bits); |
253 | 253 | ||
254 | /* Switch on op:U:sx bits */ | 254 | /* Switch on op:U:sx bits */ |
255 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a) | 255 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a) |
256 | vd = tcg_temp_new_i64(); | 256 | vd = tcg_temp_new_i64(); |
257 | vfp_load_reg64(vd, a->vd); | 257 | vfp_load_reg64(vd, a->vd); |
258 | 258 | ||
259 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 259 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
260 | + fpst = fpstatus_ptr(FPST_A32); | 260 | + fpst = fpstatus_ptr(FPST_A32); |
261 | shift = tcg_constant_i32(frac_bits); | 261 | shift = tcg_constant_i32(frac_bits); |
262 | 262 | ||
263 | /* Switch on op:U:sx bits */ | 263 | /* Switch on op:U:sx bits */ |
264 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a) | 264 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a) |
265 | return true; | 265 | return true; |
266 | } | 266 | } |
267 | 267 | ||
268 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 268 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
269 | + fpst = fpstatus_ptr(FPST_A32); | 269 | + fpst = fpstatus_ptr(FPST_A32); |
270 | vm = tcg_temp_new_i32(); | 270 | vm = tcg_temp_new_i32(); |
271 | vfp_load_reg32(vm, a->vm); | 271 | vfp_load_reg32(vm, a->vm); |
272 | 272 | ||
273 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a) | 273 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a) |
274 | return true; | 274 | return true; |
275 | } | 275 | } |
276 | 276 | ||
277 | - fpst = fpstatus_ptr(FPST_FPCR_A32); | 277 | - fpst = fpstatus_ptr(FPST_FPCR_A32); |
278 | + fpst = fpstatus_ptr(FPST_A32); | 278 | + fpst = fpstatus_ptr(FPST_A32); |
279 | vm = tcg_temp_new_i64(); | 279 | vm = tcg_temp_new_i64(); |
280 | vd = tcg_temp_new_i32(); | 280 | vd = tcg_temp_new_i32(); |
281 | vfp_load_reg64(vm, a->vm); | 281 | vfp_load_reg64(vm, a->vm); |
282 | -- | 282 | -- |
283 | 2.43.0 | 283 | 2.43.0 | diff view generated by jsdifflib |
1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | --- | 2 | --- |
3 | target/arm/tcg/translate.h | 8 +-- | 3 | target/arm/tcg/translate.h | 8 +-- |
4 | target/arm/tcg/translate-a64.c | 78 +++++++++++++-------------- | 4 | target/arm/tcg/translate-a64.c | 78 +++++++++++++-------------- |
5 | target/arm/tcg/translate-sme.c | 4 +- | 5 | target/arm/tcg/translate-sme.c | 4 +- |
6 | target/arm/tcg/translate-sve.c | 98 +++++++++++++++++----------------- | 6 | target/arm/tcg/translate-sve.c | 98 +++++++++++++++++----------------- |
7 | 4 files changed, 94 insertions(+), 94 deletions(-) | 7 | 4 files changed, 94 insertions(+), 94 deletions(-) |
8 | 8 | ||
9 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | 9 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h |
10 | index XXXXXXX..XXXXXXX 100644 | 10 | index XXXXXXX..XXXXXXX 100644 |
11 | --- a/target/arm/tcg/translate.h | 11 | --- a/target/arm/tcg/translate.h |
12 | +++ b/target/arm/tcg/translate.h | 12 | +++ b/target/arm/tcg/translate.h |
13 | @@ -XXX,XX +XXX,XX @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb) | 13 | @@ -XXX,XX +XXX,XX @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb) |
14 | */ | 14 | */ |
15 | typedef enum ARMFPStatusFlavour { | 15 | typedef enum ARMFPStatusFlavour { |
16 | FPST_A32, | 16 | FPST_A32, |
17 | - FPST_FPCR_A64, | 17 | - FPST_FPCR_A64, |
18 | + FPST_A64, | 18 | + FPST_A64, |
19 | FPST_FPCR_F16_A32, | 19 | FPST_FPCR_F16_A32, |
20 | FPST_FPCR_F16_A64, | 20 | FPST_FPCR_F16_A64, |
21 | FPST_FPCR_AH, | 21 | FPST_FPCR_AH, |
22 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { | 22 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { |
23 | * | 23 | * |
24 | * FPST_A32 | 24 | * FPST_A32 |
25 | * for AArch32 non-FP16 operations controlled by the FPCR | 25 | * for AArch32 non-FP16 operations controlled by the FPCR |
26 | - * FPST_FPCR_A64 | 26 | - * FPST_FPCR_A64 |
27 | + * FPST_A64 | 27 | + * FPST_A64 |
28 | * for AArch64 non-FP16 operations controlled by the FPCR | 28 | * for AArch64 non-FP16 operations controlled by the FPCR |
29 | * FPST_FPCR_F16_A32 | 29 | * FPST_FPCR_F16_A32 |
30 | * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used | 30 | * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used |
31 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) | 31 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) |
32 | case FPST_A32: | 32 | case FPST_A32: |
33 | offset = offsetof(CPUARMState, vfp.fp_status_a32); | 33 | offset = offsetof(CPUARMState, vfp.fp_status_a32); |
34 | break; | 34 | break; |
35 | - case FPST_FPCR_A64: | 35 | - case FPST_FPCR_A64: |
36 | + case FPST_A64: | 36 | + case FPST_A64: |
37 | offset = offsetof(CPUARMState, vfp.fp_status_a64); | 37 | offset = offsetof(CPUARMState, vfp.fp_status_a64); |
38 | break; | 38 | break; |
39 | case FPST_FPCR_F16_A32: | 39 | case FPST_FPCR_F16_A32: |
40 | @@ -XXX,XX +XXX,XX @@ static inline ARMFPStatusFlavour select_fpst(DisasContext *s, MemOp esz) | 40 | @@ -XXX,XX +XXX,XX @@ static inline ARMFPStatusFlavour select_fpst(DisasContext *s, MemOp esz) |
41 | if (s->fpcr_ah) { | 41 | if (s->fpcr_ah) { |
42 | return esz == MO_16 ? FPST_FPCR_AH_F16 : FPST_FPCR_AH; | 42 | return esz == MO_16 ? FPST_FPCR_AH_F16 : FPST_FPCR_AH; |
43 | } else { | 43 | } else { |
44 | - return esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64; | 44 | - return esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64; |
45 | + return esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64; | 45 | + return esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64; |
46 | } | 46 | } |
47 | } | 47 | } |
48 | 48 | ||
49 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | 49 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
50 | index XXXXXXX..XXXXXXX 100644 | 50 | index XXXXXXX..XXXXXXX 100644 |
51 | --- a/target/arm/tcg/translate-a64.c | 51 | --- a/target/arm/tcg/translate-a64.c |
52 | +++ b/target/arm/tcg/translate-a64.c | 52 | +++ b/target/arm/tcg/translate-a64.c |
53 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | 53 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, |
54 | { | 54 | { |
55 | return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, | 55 | return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, |
56 | a->esz == MO_16 ? | 56 | a->esz == MO_16 ? |
57 | - FPST_FPCR_F16_A64 : FPST_FPCR_A64); | 57 | - FPST_FPCR_F16_A64 : FPST_FPCR_A64); |
58 | + FPST_FPCR_F16_A64 : FPST_A64); | 58 | + FPST_FPCR_F16_A64 : FPST_A64); |
59 | } | 59 | } |
60 | 60 | ||
61 | static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a, | 61 | static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a, |
62 | @@ -XXX,XX +XXX,XX @@ static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, | 62 | @@ -XXX,XX +XXX,XX @@ static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, |
63 | TCGv_i64 t0 = read_fp_dreg(s, a->rn); | 63 | TCGv_i64 t0 = read_fp_dreg(s, a->rn); |
64 | TCGv_i64 t1 = tcg_constant_i64(0); | 64 | TCGv_i64 t1 = tcg_constant_i64(0); |
65 | if (swap) { | 65 | if (swap) { |
66 | - f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_FPCR_A64)); | 66 | - f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_FPCR_A64)); |
67 | + f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64)); | 67 | + f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64)); |
68 | } else { | 68 | } else { |
69 | - f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR_A64)); | 69 | - f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR_A64)); |
70 | + f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); | 70 | + f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); |
71 | } | 71 | } |
72 | write_fp_dreg(s, a->rd, t0); | 72 | write_fp_dreg(s, a->rd, t0); |
73 | } | 73 | } |
74 | @@ -XXX,XX +XXX,XX @@ static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, | 74 | @@ -XXX,XX +XXX,XX @@ static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, |
75 | TCGv_i32 t0 = read_fp_sreg(s, a->rn); | 75 | TCGv_i32 t0 = read_fp_sreg(s, a->rn); |
76 | TCGv_i32 t1 = tcg_constant_i32(0); | 76 | TCGv_i32 t1 = tcg_constant_i32(0); |
77 | if (swap) { | 77 | if (swap) { |
78 | - f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_FPCR_A64)); | 78 | - f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_FPCR_A64)); |
79 | + f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64)); | 79 | + f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64)); |
80 | } else { | 80 | } else { |
81 | - f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR_A64)); | 81 | - f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR_A64)); |
82 | + f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); | 82 | + f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); |
83 | } | 83 | } |
84 | write_fp_sreg(s, a->rd, t0); | 84 | write_fp_sreg(s, a->rd, t0); |
85 | } | 85 | } |
86 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | 86 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, |
87 | { | 87 | { |
88 | return do_fp3_vector_with_fpsttype(s, a, data, fns, | 88 | return do_fp3_vector_with_fpsttype(s, a, data, fns, |
89 | a->esz == MO_16 ? | 89 | a->esz == MO_16 ? |
90 | - FPST_FPCR_F16_A64 :FPST_FPCR_A64); | 90 | - FPST_FPCR_F16_A64 :FPST_FPCR_A64); |
91 | + FPST_FPCR_F16_A64 :FPST_A64); | 91 | + FPST_FPCR_F16_A64 :FPST_A64); |
92 | } | 92 | } |
93 | 93 | ||
94 | static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, | 94 | static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, |
95 | @@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) | 95 | @@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) |
96 | if (fp_access_check(s)) { | 96 | if (fp_access_check(s)) { |
97 | /* Q bit selects BFMLALB vs BFMLALT. */ | 97 | /* Q bit selects BFMLALB vs BFMLALT. */ |
98 | gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, | 98 | gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, |
99 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64, a->q, | 99 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64, a->q, |
100 | + s->fpcr_ah ? FPST_FPCR_AH : FPST_A64, a->q, | 100 | + s->fpcr_ah ? FPST_FPCR_AH : FPST_A64, a->q, |
101 | gen_helper_gvec_bfmlal); | 101 | gen_helper_gvec_bfmlal); |
102 | } | 102 | } |
103 | return true; | 103 | return true; |
104 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) | 104 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) |
105 | } | 105 | } |
106 | 106 | ||
107 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | 107 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, |
108 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64, | 108 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64, |
109 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64, | 109 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64, |
110 | a->rot, fn[a->esz]); | 110 | a->rot, fn[a->esz]); |
111 | return true; | 111 | return true; |
112 | } | 112 | } |
113 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) | 113 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) |
114 | TCGv_i64 t1 = tcg_temp_new_i64(); | 114 | TCGv_i64 t1 = tcg_temp_new_i64(); |
115 | 115 | ||
116 | read_vec_element(s, t1, a->rm, a->idx, MO_64); | 116 | read_vec_element(s, t1, a->rm, a->idx, MO_64); |
117 | - f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR_A64)); | 117 | - f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR_A64)); |
118 | + f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); | 118 | + f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); |
119 | write_fp_dreg_merging(s, a->rd, a->rn, t0); | 119 | write_fp_dreg_merging(s, a->rd, a->rn, t0); |
120 | } | 120 | } |
121 | break; | 121 | break; |
122 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) | 122 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) |
123 | TCGv_i32 t1 = tcg_temp_new_i32(); | 123 | TCGv_i32 t1 = tcg_temp_new_i32(); |
124 | 124 | ||
125 | read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); | 125 | read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); |
126 | - f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR_A64)); | 126 | - f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR_A64)); |
127 | + f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); | 127 | + f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); |
128 | write_fp_sreg_merging(s, a->rd, a->rn, t0); | 128 | write_fp_sreg_merging(s, a->rd, a->rn, t0); |
129 | } | 129 | } |
130 | break; | 130 | break; |
131 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | 131 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) |
132 | if (neg) { | 132 | if (neg) { |
133 | gen_vfp_maybe_ah_negd(s, t1, t1); | 133 | gen_vfp_maybe_ah_negd(s, t1, t1); |
134 | } | 134 | } |
135 | - gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR_A64)); | 135 | - gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR_A64)); |
136 | + gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | 136 | + gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); |
137 | write_fp_dreg_merging(s, a->rd, a->rd, t0); | 137 | write_fp_dreg_merging(s, a->rd, a->rd, t0); |
138 | } | 138 | } |
139 | break; | 139 | break; |
140 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | 140 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) |
141 | if (neg) { | 141 | if (neg) { |
142 | gen_vfp_maybe_ah_negs(s, t1, t1); | 142 | gen_vfp_maybe_ah_negs(s, t1, t1); |
143 | } | 143 | } |
144 | - gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR_A64)); | 144 | - gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR_A64)); |
145 | + gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | 145 | + gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); |
146 | write_fp_sreg_merging(s, a->rd, a->rd, t0); | 146 | write_fp_sreg_merging(s, a->rd, a->rd, t0); |
147 | } | 147 | } |
148 | break; | 148 | break; |
149 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, | 149 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, |
150 | } | 150 | } |
151 | 151 | ||
152 | gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, | 152 | gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, |
153 | - esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64, | 153 | - esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64, |
154 | + esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64, | 154 | + esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64, |
155 | a->idx, fns[esz - 1]); | 155 | a->idx, fns[esz - 1]); |
156 | return true; | 156 | return true; |
157 | } | 157 | } |
158 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) | 158 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) |
159 | } | 159 | } |
160 | 160 | ||
161 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | 161 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, |
162 | - esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64, | 162 | - esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64, |
163 | + esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64, | 163 | + esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64, |
164 | (s->fpcr_ah << 5) | (a->idx << 1) | neg, | 164 | (s->fpcr_ah << 5) | (a->idx << 1) | neg, |
165 | fns[esz - 1]); | 165 | fns[esz - 1]); |
166 | return true; | 166 | return true; |
167 | @@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) | 167 | @@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) |
168 | if (fp_access_check(s)) { | 168 | if (fp_access_check(s)) { |
169 | /* Q bit selects BFMLALB vs BFMLALT. */ | 169 | /* Q bit selects BFMLALB vs BFMLALT. */ |
170 | gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, | 170 | gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, |
171 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64, | 171 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64, |
172 | + s->fpcr_ah ? FPST_FPCR_AH : FPST_A64, | 172 | + s->fpcr_ah ? FPST_FPCR_AH : FPST_A64, |
173 | (a->idx << 1) | a->q, | 173 | (a->idx << 1) | a->q, |
174 | gen_helper_gvec_bfmlal_idx); | 174 | gen_helper_gvec_bfmlal_idx); |
175 | } | 175 | } |
176 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) | 176 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) |
177 | } | 177 | } |
178 | if (fp_access_check(s)) { | 178 | if (fp_access_check(s)) { |
179 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | 179 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, |
180 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64, | 180 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64, |
181 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64, | 181 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64, |
182 | (a->idx << 2) | a->rot, fn); | 182 | (a->idx << 2) | a->rot, fn); |
183 | } | 183 | } |
184 | return true; | 184 | return true; |
185 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) | 185 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) |
186 | 186 | ||
187 | read_vec_element(s, t0, a->rn, 0, MO_64); | 187 | read_vec_element(s, t0, a->rn, 0, MO_64); |
188 | read_vec_element(s, t1, a->rn, 1, MO_64); | 188 | read_vec_element(s, t1, a->rn, 1, MO_64); |
189 | - f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR_A64)); | 189 | - f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR_A64)); |
190 | + f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); | 190 | + f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); |
191 | write_fp_dreg(s, a->rd, t0); | 191 | write_fp_dreg(s, a->rd, t0); |
192 | } | 192 | } |
193 | break; | 193 | break; |
194 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) | 194 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) |
195 | 195 | ||
196 | read_vec_element_i32(s, t0, a->rn, 0, MO_32); | 196 | read_vec_element_i32(s, t0, a->rn, 0, MO_32); |
197 | read_vec_element_i32(s, t1, a->rn, 1, MO_32); | 197 | read_vec_element_i32(s, t1, a->rn, 1, MO_32); |
198 | - f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR_A64)); | 198 | - f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR_A64)); |
199 | + f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); | 199 | + f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); |
200 | write_fp_sreg(s, a->rd, t0); | 200 | write_fp_sreg(s, a->rd, t0); |
201 | } | 201 | } |
202 | break; | 202 | break; |
203 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | 203 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) |
204 | if (neg_n) { | 204 | if (neg_n) { |
205 | gen_vfp_maybe_ah_negd(s, tn, tn); | 205 | gen_vfp_maybe_ah_negd(s, tn, tn); |
206 | } | 206 | } |
207 | - fpst = fpstatus_ptr(FPST_FPCR_A64); | 207 | - fpst = fpstatus_ptr(FPST_FPCR_A64); |
208 | + fpst = fpstatus_ptr(FPST_A64); | 208 | + fpst = fpstatus_ptr(FPST_A64); |
209 | gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); | 209 | gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); |
210 | write_fp_dreg_merging(s, a->rd, a->ra, ta); | 210 | write_fp_dreg_merging(s, a->rd, a->ra, ta); |
211 | } | 211 | } |
212 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | 212 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) |
213 | if (neg_n) { | 213 | if (neg_n) { |
214 | gen_vfp_maybe_ah_negs(s, tn, tn); | 214 | gen_vfp_maybe_ah_negs(s, tn, tn); |
215 | } | 215 | } |
216 | - fpst = fpstatus_ptr(FPST_FPCR_A64); | 216 | - fpst = fpstatus_ptr(FPST_FPCR_A64); |
217 | + fpst = fpstatus_ptr(FPST_A64); | 217 | + fpst = fpstatus_ptr(FPST_A64); |
218 | gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); | 218 | gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); |
219 | write_fp_sreg_merging(s, a->rd, a->ra, ta); | 219 | write_fp_sreg_merging(s, a->rd, a->ra, ta); |
220 | } | 220 | } |
221 | @@ -XXX,XX +XXX,XX @@ static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, | 221 | @@ -XXX,XX +XXX,XX @@ static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, |
222 | if (fp_access_check(s)) { | 222 | if (fp_access_check(s)) { |
223 | MemOp esz = a->esz; | 223 | MemOp esz = a->esz; |
224 | int elts = (a->q ? 16 : 8) >> esz; | 224 | int elts = (a->q ? 16 : 8) >> esz; |
225 | - TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); | 225 | - TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); |
226 | + TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 226 | + TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
227 | TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, | 227 | TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, |
228 | s->fpcr_ah ? fah : fnormal); | 228 | s->fpcr_ah ? fah : fnormal); |
229 | write_fp_sreg(s, a->rd, res); | 229 | write_fp_sreg(s, a->rd, res); |
230 | @@ -XXX,XX +XXX,XX @@ static void handle_fp_compare(DisasContext *s, int size, | 230 | @@ -XXX,XX +XXX,XX @@ static void handle_fp_compare(DisasContext *s, int size, |
231 | bool cmp_with_zero, bool signal_all_nans) | 231 | bool cmp_with_zero, bool signal_all_nans) |
232 | { | 232 | { |
233 | TCGv_i64 tcg_flags = tcg_temp_new_i64(); | 233 | TCGv_i64 tcg_flags = tcg_temp_new_i64(); |
234 | - TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); | 234 | - TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); |
235 | + TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 235 | + TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
236 | 236 | ||
237 | if (size == MO_64) { | 237 | if (size == MO_64) { |
238 | TCGv_i64 tcg_vn, tcg_vm; | 238 | TCGv_i64 tcg_vn, tcg_vm; |
239 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | 239 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, |
240 | { | 240 | { |
241 | return do_fp1_scalar_with_fpsttype(s, a, f, rmode, | 241 | return do_fp1_scalar_with_fpsttype(s, a, f, rmode, |
242 | a->esz == MO_16 ? | 242 | a->esz == MO_16 ? |
243 | - FPST_FPCR_F16_A64 : FPST_FPCR_A64); | 243 | - FPST_FPCR_F16_A64 : FPST_FPCR_A64); |
244 | + FPST_FPCR_F16_A64 : FPST_A64); | 244 | + FPST_FPCR_F16_A64 : FPST_A64); |
245 | } | 245 | } |
246 | 246 | ||
247 | static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a, | 247 | static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a, |
248 | @@ -XXX,XX +XXX,XX @@ TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) | 248 | @@ -XXX,XX +XXX,XX @@ TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) |
249 | 249 | ||
250 | static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a) | 250 | static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a) |
251 | { | 251 | { |
252 | - ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64; | 252 | - ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64; |
253 | + ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_FPCR_AH : FPST_A64; | 253 | + ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_FPCR_AH : FPST_A64; |
254 | TCGv_i32 t32; | 254 | TCGv_i32 t32; |
255 | int check; | 255 | int check; |
256 | 256 | ||
257 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) | 257 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) |
258 | if (fp_access_check(s)) { | 258 | if (fp_access_check(s)) { |
259 | TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn); | 259 | TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn); |
260 | TCGv_i64 tcg_rd = tcg_temp_new_i64(); | 260 | TCGv_i64 tcg_rd = tcg_temp_new_i64(); |
261 | - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_A64); | 261 | - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_A64); |
262 | + TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | 262 | + TCGv_ptr fpst = fpstatus_ptr(FPST_A64); |
263 | 263 | ||
264 | gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst); | 264 | gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst); |
265 | write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); | 265 | write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); |
266 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a) | 266 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a) |
267 | if (fp_access_check(s)) { | 267 | if (fp_access_check(s)) { |
268 | TCGv_i32 tmp = read_fp_sreg(s, a->rn); | 268 | TCGv_i32 tmp = read_fp_sreg(s, a->rn); |
269 | TCGv_i32 ahp = get_ahp_flag(); | 269 | TCGv_i32 ahp = get_ahp_flag(); |
270 | - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_A64); | 270 | - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_A64); |
271 | + TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | 271 | + TCGv_ptr fpst = fpstatus_ptr(FPST_A64); |
272 | 272 | ||
273 | gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); | 273 | gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); |
274 | /* write_fp_hreg_merging is OK here because top half of result is zero */ | 274 | /* write_fp_hreg_merging is OK here because top half of result is zero */ |
275 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a) | 275 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a) |
276 | if (fp_access_check(s)) { | 276 | if (fp_access_check(s)) { |
277 | TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); | 277 | TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); |
278 | TCGv_i32 tcg_rd = tcg_temp_new_i32(); | 278 | TCGv_i32 tcg_rd = tcg_temp_new_i32(); |
279 | - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_A64); | 279 | - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_A64); |
280 | + TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | 280 | + TCGv_ptr fpst = fpstatus_ptr(FPST_A64); |
281 | 281 | ||
282 | gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst); | 282 | gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst); |
283 | write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); | 283 | write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); |
284 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a) | 284 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a) |
285 | TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); | 285 | TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); |
286 | TCGv_i32 tcg_rd = tcg_temp_new_i32(); | 286 | TCGv_i32 tcg_rd = tcg_temp_new_i32(); |
287 | TCGv_i32 ahp = get_ahp_flag(); | 287 | TCGv_i32 ahp = get_ahp_flag(); |
288 | - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_A64); | 288 | - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_A64); |
289 | + TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | 289 | + TCGv_ptr fpst = fpstatus_ptr(FPST_A64); |
290 | 290 | ||
291 | gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); | 291 | gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); |
292 | /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */ | 292 | /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */ |
293 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, | 293 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, |
294 | TCGv_i32 tcg_shift, tcg_single; | 294 | TCGv_i32 tcg_shift, tcg_single; |
295 | TCGv_i64 tcg_double; | 295 | TCGv_i64 tcg_double; |
296 | 296 | ||
297 | - tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); | 297 | - tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); |
298 | + tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 298 | + tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
299 | tcg_shift = tcg_constant_i32(shift); | 299 | tcg_shift = tcg_constant_i32(shift); |
300 | 300 | ||
301 | switch (esz) { | 301 | switch (esz) { |
302 | @@ -XXX,XX +XXX,XX @@ static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz, | 302 | @@ -XXX,XX +XXX,XX @@ static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz, |
303 | TCGv_ptr tcg_fpstatus; | 303 | TCGv_ptr tcg_fpstatus; |
304 | TCGv_i32 tcg_shift, tcg_rmode, tcg_single; | 304 | TCGv_i32 tcg_shift, tcg_rmode, tcg_single; |
305 | 305 | ||
306 | - tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); | 306 | - tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); |
307 | + tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 307 | + tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
308 | tcg_shift = tcg_constant_i32(shift); | 308 | tcg_shift = tcg_constant_i32(shift); |
309 | tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); | 309 | tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); |
310 | 310 | ||
311 | @@ -XXX,XX +XXX,XX @@ static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a) | 311 | @@ -XXX,XX +XXX,XX @@ static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a) |
312 | } | 312 | } |
313 | if (fp_access_check(s)) { | 313 | if (fp_access_check(s)) { |
314 | TCGv_i64 t = read_fp_dreg(s, a->rn); | 314 | TCGv_i64 t = read_fp_dreg(s, a->rn); |
315 | - TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR_A64); | 315 | - TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR_A64); |
316 | + TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64); | 316 | + TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64); |
317 | 317 | ||
318 | gen_helper_fjcvtzs(t, t, fpstatus); | 318 | gen_helper_fjcvtzs(t, t, fpstatus); |
319 | 319 | ||
320 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a) | 320 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a) |
321 | */ | 321 | */ |
322 | TCGv_i64 src = read_fp_dreg(s, a->rn); | 322 | TCGv_i64 src = read_fp_dreg(s, a->rn); |
323 | TCGv_i32 dst = tcg_temp_new_i32(); | 323 | TCGv_i32 dst = tcg_temp_new_i32(); |
324 | - gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_FPCR_A64)); | 324 | - gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_FPCR_A64)); |
325 | + gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64)); | 325 | + gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64)); |
326 | write_fp_sreg_merging(s, a->rd, a->rd, dst); | 326 | write_fp_sreg_merging(s, a->rd, a->rd, dst); |
327 | } | 327 | } |
328 | return true; | 328 | return true; |
329 | @@ -XXX,XX +XXX,XX @@ static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n) | 329 | @@ -XXX,XX +XXX,XX @@ static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n) |
330 | { | 330 | { |
331 | TCGv_i32 tcg_lo = tcg_temp_new_i32(); | 331 | TCGv_i32 tcg_lo = tcg_temp_new_i32(); |
332 | TCGv_i32 tcg_hi = tcg_temp_new_i32(); | 332 | TCGv_i32 tcg_hi = tcg_temp_new_i32(); |
333 | - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_A64); | 333 | - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_A64); |
334 | + TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | 334 | + TCGv_ptr fpst = fpstatus_ptr(FPST_A64); |
335 | TCGv_i32 ahp = get_ahp_flag(); | 335 | TCGv_i32 ahp = get_ahp_flag(); |
336 | 336 | ||
337 | tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n); | 337 | tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n); |
338 | @@ -XXX,XX +XXX,XX @@ static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n) | 338 | @@ -XXX,XX +XXX,XX @@ static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n) |
339 | static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n) | 339 | static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n) |
340 | { | 340 | { |
341 | TCGv_i32 tmp = tcg_temp_new_i32(); | 341 | TCGv_i32 tmp = tcg_temp_new_i32(); |
342 | - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_A64); | 342 | - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_A64); |
343 | + TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | 343 | + TCGv_ptr fpst = fpstatus_ptr(FPST_A64); |
344 | 344 | ||
345 | gen_helper_vfp_fcvtsd(tmp, n, fpst); | 345 | gen_helper_vfp_fcvtsd(tmp, n, fpst); |
346 | tcg_gen_extu_i32_i64(d, tmp); | 346 | tcg_gen_extu_i32_i64(d, tmp); |
347 | @@ -XXX,XX +XXX,XX @@ static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) | 347 | @@ -XXX,XX +XXX,XX @@ static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) |
348 | * with von Neumann rounding (round to odd) | 348 | * with von Neumann rounding (round to odd) |
349 | */ | 349 | */ |
350 | TCGv_i32 tmp = tcg_temp_new_i32(); | 350 | TCGv_i32 tmp = tcg_temp_new_i32(); |
351 | - gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_FPCR_A64)); | 351 | - gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_FPCR_A64)); |
352 | + gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); | 352 | + gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); |
353 | tcg_gen_extu_i32_i64(d, tmp); | 353 | tcg_gen_extu_i32_i64(d, tmp); |
354 | } | 354 | } |
355 | 355 | ||
356 | @@ -XXX,XX +XXX,XX @@ TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn) | 356 | @@ -XXX,XX +XXX,XX @@ TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn) |
357 | 357 | ||
358 | static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n) | 358 | static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n) |
359 | { | 359 | { |
360 | - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_A64); | 360 | - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_A64); |
361 | + TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | 361 | + TCGv_ptr fpst = fpstatus_ptr(FPST_A64); |
362 | TCGv_i32 tmp = tcg_temp_new_i32(); | 362 | TCGv_i32 tmp = tcg_temp_new_i32(); |
363 | gen_helper_bfcvt_pair(tmp, n, fpst); | 363 | gen_helper_bfcvt_pair(tmp, n, fpst); |
364 | tcg_gen_extu_i32_i64(d, tmp); | 364 | tcg_gen_extu_i32_i64(d, tmp); |
365 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a, | 365 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a, |
366 | return check == 0; | 366 | return check == 0; |
367 | } | 367 | } |
368 | 368 | ||
369 | - fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); | 369 | - fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); |
370 | + fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 370 | + fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
371 | if (rmode >= 0) { | 371 | if (rmode >= 0) { |
372 | tcg_rmode = gen_set_rmode(rmode, fpst); | 372 | tcg_rmode = gen_set_rmode(rmode, fpst); |
373 | } | 373 | } |
374 | @@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | 374 | @@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, |
375 | { | 375 | { |
376 | return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns, | 376 | return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns, |
377 | esz == MO_16 ? FPST_FPCR_F16_A64 : | 377 | esz == MO_16 ? FPST_FPCR_F16_A64 : |
378 | - FPST_FPCR_A64); | 378 | - FPST_FPCR_A64); |
379 | + FPST_A64); | 379 | + FPST_A64); |
380 | } | 380 | } |
381 | 381 | ||
382 | static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q, | 382 | static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q, |
383 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) | 383 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) |
384 | TCGv_i32 tcg_op = tcg_temp_new_i32(); | 384 | TCGv_i32 tcg_op = tcg_temp_new_i32(); |
385 | int srcelt = a->q ? 2 : 0; | 385 | int srcelt = a->q ? 2 : 0; |
386 | 386 | ||
387 | - fpst = fpstatus_ptr(FPST_FPCR_A64); | 387 | - fpst = fpstatus_ptr(FPST_FPCR_A64); |
388 | + fpst = fpstatus_ptr(FPST_A64); | 388 | + fpst = fpstatus_ptr(FPST_A64); |
389 | 389 | ||
390 | for (pass = 0; pass < 2; pass++) { | 390 | for (pass = 0; pass < 2; pass++) { |
391 | tcg_res[pass] = tcg_temp_new_i64(); | 391 | tcg_res[pass] = tcg_temp_new_i64(); |
392 | diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c | 392 | diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c |
393 | index XXXXXXX..XXXXXXX 100644 | 393 | index XXXXXXX..XXXXXXX 100644 |
394 | --- a/target/arm/tcg/translate-sme.c | 394 | --- a/target/arm/tcg/translate-sme.c |
395 | +++ b/target/arm/tcg/translate-sme.c | 395 | +++ b/target/arm/tcg/translate-sme.c |
396 | @@ -XXX,XX +XXX,XX @@ static bool do_outprod_env(DisasContext *s, arg_op *a, MemOp esz, | 396 | @@ -XXX,XX +XXX,XX @@ static bool do_outprod_env(DisasContext *s, arg_op *a, MemOp esz, |
397 | TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_env, a, | 397 | TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_env, a, |
398 | MO_32, gen_helper_sme_fmopa_h) | 398 | MO_32, gen_helper_sme_fmopa_h) |
399 | TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, | 399 | TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, |
400 | - MO_32, FPST_FPCR_A64, gen_helper_sme_fmopa_s) | 400 | - MO_32, FPST_FPCR_A64, gen_helper_sme_fmopa_s) |
401 | + MO_32, FPST_A64, gen_helper_sme_fmopa_s) | 401 | + MO_32, FPST_A64, gen_helper_sme_fmopa_s) |
402 | TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, | 402 | TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, |
403 | - MO_64, FPST_FPCR_A64, gen_helper_sme_fmopa_d) | 403 | - MO_64, FPST_FPCR_A64, gen_helper_sme_fmopa_d) |
404 | + MO_64, FPST_A64, gen_helper_sme_fmopa_d) | 404 | + MO_64, FPST_A64, gen_helper_sme_fmopa_d) |
405 | 405 | ||
406 | TRANS_FEAT(BFMOPA, aa64_sme, do_outprod_env, a, MO_32, gen_helper_sme_bfmopa) | 406 | TRANS_FEAT(BFMOPA, aa64_sme, do_outprod_env, a, MO_32, gen_helper_sme_bfmopa) |
407 | 407 | ||
408 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | 408 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c |
409 | index XXXXXXX..XXXXXXX 100644 | 409 | index XXXXXXX..XXXXXXX 100644 |
410 | --- a/target/arm/tcg/translate-sve.c | 410 | --- a/target/arm/tcg/translate-sve.c |
411 | +++ b/target/arm/tcg/translate-sve.c | 411 | +++ b/target/arm/tcg/translate-sve.c |
412 | @@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, | 412 | @@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, |
413 | arg_rrr_esz *a, int data) | 413 | arg_rrr_esz *a, int data) |
414 | { | 414 | { |
415 | return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, | 415 | return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, |
416 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); | 416 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); |
417 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 417 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
418 | } | 418 | } |
419 | 419 | ||
420 | static bool gen_gvec_fpst_ah_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, | 420 | static bool gen_gvec_fpst_ah_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, |
421 | @@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, | 421 | @@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, |
422 | arg_rprr_esz *a) | 422 | arg_rprr_esz *a) |
423 | { | 423 | { |
424 | return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, | 424 | return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, |
425 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); | 425 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); |
426 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 426 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
427 | } | 427 | } |
428 | 428 | ||
429 | /* Invoke a vector expander on two Zregs and an immediate. */ | 429 | /* Invoke a vector expander on two Zregs and an immediate. */ |
430 | @@ -XXX,XX +XXX,XX @@ static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) | 430 | @@ -XXX,XX +XXX,XX @@ static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) |
431 | }; | 431 | }; |
432 | return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, | 432 | return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, |
433 | (s->fpcr_ah << 5) | (a->index << 1) | sub, | 433 | (s->fpcr_ah << 5) | (a->index << 1) | sub, |
434 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); | 434 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); |
435 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 435 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
436 | } | 436 | } |
437 | 437 | ||
438 | TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) | 438 | TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) |
439 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { | 439 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { |
440 | }; | 440 | }; |
441 | TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, | 441 | TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, |
442 | fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index, | 442 | fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index, |
443 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64) | 443 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64) |
444 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) | 444 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) |
445 | 445 | ||
446 | /* | 446 | /* |
447 | *** SVE Floating Point Fast Reduction Group | 447 | *** SVE Floating Point Fast Reduction Group |
448 | @@ -XXX,XX +XXX,XX @@ static bool do_reduce(DisasContext *s, arg_rpr_esz *a, | 448 | @@ -XXX,XX +XXX,XX @@ static bool do_reduce(DisasContext *s, arg_rpr_esz *a, |
449 | 449 | ||
450 | tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); | 450 | tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); |
451 | tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); | 451 | tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); |
452 | - status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); | 452 | - status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); |
453 | + status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 453 | + status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
454 | 454 | ||
455 | fn(temp, t_zn, t_pg, status, t_desc); | 455 | fn(temp, t_zn, t_pg, status, t_desc); |
456 | 456 | ||
457 | @@ -XXX,XX +XXX,XX @@ static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a, | 457 | @@ -XXX,XX +XXX,XX @@ static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a, |
458 | if (sve_access_check(s)) { | 458 | if (sve_access_check(s)) { |
459 | unsigned vsz = vec_full_reg_size(s); | 459 | unsigned vsz = vec_full_reg_size(s); |
460 | TCGv_ptr status = | 460 | TCGv_ptr status = |
461 | - fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); | 461 | - fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); |
462 | + fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 462 | + fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
463 | 463 | ||
464 | tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), | 464 | tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), |
465 | vec_full_reg_offset(s, a->rn), | 465 | vec_full_reg_offset(s, a->rn), |
466 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { | 466 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { |
467 | TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, | 467 | TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, |
468 | ftmad_fns[a->esz], a->rd, a->rn, a->rm, | 468 | ftmad_fns[a->esz], a->rd, a->rn, a->rm, |
469 | a->imm | (s->fpcr_ah << 3), | 469 | a->imm | (s->fpcr_ah << 3), |
470 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64) | 470 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64) |
471 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) | 471 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) |
472 | 472 | ||
473 | /* | 473 | /* |
474 | *** SVE Floating Point Accumulating Reduction Group | 474 | *** SVE Floating Point Accumulating Reduction Group |
475 | @@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) | 475 | @@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) |
476 | t_pg = tcg_temp_new_ptr(); | 476 | t_pg = tcg_temp_new_ptr(); |
477 | tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm)); | 477 | tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm)); |
478 | tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); | 478 | tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); |
479 | - t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); | 479 | - t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); |
480 | + t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 480 | + t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
481 | t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); | 481 | t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); |
482 | 482 | ||
483 | fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc); | 483 | fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc); |
484 | @@ -XXX,XX +XXX,XX @@ static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16, | 484 | @@ -XXX,XX +XXX,XX @@ static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16, |
485 | tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn)); | 485 | tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn)); |
486 | tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); | 486 | tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); |
487 | 487 | ||
488 | - status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); | 488 | - status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); |
489 | + status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16_A64 : FPST_A64); | 489 | + status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16_A64 : FPST_A64); |
490 | desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); | 490 | desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); |
491 | fn(t_zd, t_zn, t_pg, scalar, status, desc); | 491 | fn(t_zd, t_zn, t_pg, scalar, status, desc); |
492 | } | 492 | } |
493 | @@ -XXX,XX +XXX,XX @@ static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, | 493 | @@ -XXX,XX +XXX,XX @@ static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, |
494 | } | 494 | } |
495 | if (sve_access_check(s)) { | 495 | if (sve_access_check(s)) { |
496 | unsigned vsz = vec_full_reg_size(s); | 496 | unsigned vsz = vec_full_reg_size(s); |
497 | - TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); | 497 | - TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); |
498 | + TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 498 | + TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
499 | tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd), | 499 | tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd), |
500 | vec_full_reg_offset(s, a->rn), | 500 | vec_full_reg_offset(s, a->rn), |
501 | vec_full_reg_offset(s, a->rm), | 501 | vec_full_reg_offset(s, a->rm), |
502 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_4_ptr * const fcadd_fns[] = { | 502 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_4_ptr * const fcadd_fns[] = { |
503 | }; | 503 | }; |
504 | TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], | 504 | TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], |
505 | a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), | 505 | a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), |
506 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64) | 506 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64) |
507 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) | 507 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) |
508 | 508 | ||
509 | #define DO_FMLA(NAME, name, ah_name) \ | 509 | #define DO_FMLA(NAME, name, ah_name) \ |
510 | static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ | 510 | static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ |
511 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], | 511 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], |
512 | TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \ | 512 | TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \ |
513 | s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \ | 513 | s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \ |
514 | a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ | 514 | a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ |
515 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64) | 515 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64) |
516 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) | 516 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) |
517 | 517 | ||
518 | /* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */ | 518 | /* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */ |
519 | DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz) | 519 | DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz) |
520 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { | 520 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { |
521 | }; | 521 | }; |
522 | TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], | 522 | TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], |
523 | a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, | 523 | a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, |
524 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64) | 524 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64) |
525 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) | 525 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) |
526 | 526 | ||
527 | static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { | 527 | static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { |
528 | NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL | 528 | NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL |
529 | }; | 529 | }; |
530 | TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], | 530 | TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], |
531 | a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot, | 531 | a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot, |
532 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64) | 532 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64) |
533 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) | 533 | + a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) |
534 | 534 | ||
535 | /* | 535 | /* |
536 | *** SVE Floating Point Unary Operations Predicated Group | 536 | *** SVE Floating Point Unary Operations Predicated Group |
537 | */ | 537 | */ |
538 | 538 | ||
539 | TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz, | 539 | TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz, |
540 | - gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR_A64) | 540 | - gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR_A64) |
541 | + gen_helper_sve_fcvt_sh, a, 0, FPST_A64) | 541 | + gen_helper_sve_fcvt_sh, a, 0, FPST_A64) |
542 | TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, | 542 | TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, |
543 | gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR_F16_A64) | 543 | gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR_F16_A64) |
544 | 544 | ||
545 | TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, | 545 | TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, |
546 | gen_helper_sve_bfcvt, a, 0, | 546 | gen_helper_sve_bfcvt, a, 0, |
547 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64) | 547 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64) |
548 | + s->fpcr_ah ? FPST_FPCR_AH : FPST_A64) | 548 | + s->fpcr_ah ? FPST_FPCR_AH : FPST_A64) |
549 | 549 | ||
550 | TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, | 550 | TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, |
551 | - gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR_A64) | 551 | - gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR_A64) |
552 | + gen_helper_sve_fcvt_dh, a, 0, FPST_A64) | 552 | + gen_helper_sve_fcvt_dh, a, 0, FPST_A64) |
553 | TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz, | 553 | TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz, |
554 | gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR_F16_A64) | 554 | gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR_F16_A64) |
555 | TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz, | 555 | TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz, |
556 | - gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR_A64) | 556 | - gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR_A64) |
557 | + gen_helper_sve_fcvt_ds, a, 0, FPST_A64) | 557 | + gen_helper_sve_fcvt_ds, a, 0, FPST_A64) |
558 | TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz, | 558 | TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz, |
559 | - gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR_A64) | 559 | - gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR_A64) |
560 | + gen_helper_sve_fcvt_sd, a, 0, FPST_A64) | 560 | + gen_helper_sve_fcvt_sd, a, 0, FPST_A64) |
561 | 561 | ||
562 | TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz, | 562 | TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz, |
563 | gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16_A64) | 563 | gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16_A64) |
564 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz, | 564 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz, |
565 | gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16_A64) | 565 | gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16_A64) |
566 | 566 | ||
567 | TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz, | 567 | TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz, |
568 | - gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR_A64) | 568 | - gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR_A64) |
569 | + gen_helper_sve_fcvtzs_ss, a, 0, FPST_A64) | 569 | + gen_helper_sve_fcvtzs_ss, a, 0, FPST_A64) |
570 | TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz, | 570 | TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz, |
571 | - gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR_A64) | 571 | - gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR_A64) |
572 | + gen_helper_sve_fcvtzu_ss, a, 0, FPST_A64) | 572 | + gen_helper_sve_fcvtzu_ss, a, 0, FPST_A64) |
573 | TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz, | 573 | TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz, |
574 | - gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR_A64) | 574 | - gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR_A64) |
575 | + gen_helper_sve_fcvtzs_sd, a, 0, FPST_A64) | 575 | + gen_helper_sve_fcvtzs_sd, a, 0, FPST_A64) |
576 | TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz, | 576 | TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz, |
577 | - gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR_A64) | 577 | - gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR_A64) |
578 | + gen_helper_sve_fcvtzu_sd, a, 0, FPST_A64) | 578 | + gen_helper_sve_fcvtzu_sd, a, 0, FPST_A64) |
579 | TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz, | 579 | TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz, |
580 | - gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR_A64) | 580 | - gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR_A64) |
581 | + gen_helper_sve_fcvtzs_ds, a, 0, FPST_A64) | 581 | + gen_helper_sve_fcvtzs_ds, a, 0, FPST_A64) |
582 | TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz, | 582 | TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz, |
583 | - gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR_A64) | 583 | - gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR_A64) |
584 | + gen_helper_sve_fcvtzu_ds, a, 0, FPST_A64) | 584 | + gen_helper_sve_fcvtzu_ds, a, 0, FPST_A64) |
585 | 585 | ||
586 | TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz, | 586 | TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz, |
587 | - gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR_A64) | 587 | - gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR_A64) |
588 | + gen_helper_sve_fcvtzs_dd, a, 0, FPST_A64) | 588 | + gen_helper_sve_fcvtzs_dd, a, 0, FPST_A64) |
589 | TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz, | 589 | TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz, |
590 | - gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR_A64) | 590 | - gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR_A64) |
591 | + gen_helper_sve_fcvtzu_dd, a, 0, FPST_A64) | 591 | + gen_helper_sve_fcvtzu_dd, a, 0, FPST_A64) |
592 | 592 | ||
593 | static gen_helper_gvec_3_ptr * const frint_fns[] = { | 593 | static gen_helper_gvec_3_ptr * const frint_fns[] = { |
594 | NULL, | 594 | NULL, |
595 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frint_fns[] = { | 595 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frint_fns[] = { |
596 | gen_helper_sve_frint_d | 596 | gen_helper_sve_frint_d |
597 | }; | 597 | }; |
598 | TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz], | 598 | TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz], |
599 | - a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64) | 599 | - a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64) |
600 | + a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) | 600 | + a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) |
601 | 601 | ||
602 | static gen_helper_gvec_3_ptr * const frintx_fns[] = { | 602 | static gen_helper_gvec_3_ptr * const frintx_fns[] = { |
603 | NULL, | 603 | NULL, |
604 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frintx_fns[] = { | 604 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frintx_fns[] = { |
605 | gen_helper_sve_frintx_d | 605 | gen_helper_sve_frintx_d |
606 | }; | 606 | }; |
607 | TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz], | 607 | TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz], |
608 | - a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); | 608 | - a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); |
609 | + a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 609 | + a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
610 | 610 | ||
611 | static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, | 611 | static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, |
612 | ARMFPRounding mode, gen_helper_gvec_3_ptr *fn) | 612 | ARMFPRounding mode, gen_helper_gvec_3_ptr *fn) |
613 | @@ -XXX,XX +XXX,XX @@ static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, | 613 | @@ -XXX,XX +XXX,XX @@ static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, |
614 | } | 614 | } |
615 | 615 | ||
616 | vsz = vec_full_reg_size(s); | 616 | vsz = vec_full_reg_size(s); |
617 | - status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); | 617 | - status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64); |
618 | + status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 618 | + status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
619 | tmode = gen_set_rmode(mode, status); | 619 | tmode = gen_set_rmode(mode, status); |
620 | 620 | ||
621 | tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), | 621 | tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), |
622 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { | 622 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { |
623 | gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d, | 623 | gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d, |
624 | }; | 624 | }; |
625 | TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz], | 625 | TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz], |
626 | - a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64) | 626 | - a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64) |
627 | + a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) | 627 | + a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) |
628 | 628 | ||
629 | TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, | 629 | TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, |
630 | gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16_A64) | 630 | gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16_A64) |
631 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, | 631 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, |
632 | gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16_A64) | 632 | gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16_A64) |
633 | 633 | ||
634 | TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, | 634 | TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, |
635 | - gen_helper_sve_scvt_ss, a, 0, FPST_FPCR_A64) | 635 | - gen_helper_sve_scvt_ss, a, 0, FPST_FPCR_A64) |
636 | + gen_helper_sve_scvt_ss, a, 0, FPST_A64) | 636 | + gen_helper_sve_scvt_ss, a, 0, FPST_A64) |
637 | TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, | 637 | TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, |
638 | - gen_helper_sve_scvt_ds, a, 0, FPST_FPCR_A64) | 638 | - gen_helper_sve_scvt_ds, a, 0, FPST_FPCR_A64) |
639 | + gen_helper_sve_scvt_ds, a, 0, FPST_A64) | 639 | + gen_helper_sve_scvt_ds, a, 0, FPST_A64) |
640 | 640 | ||
641 | TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, | 641 | TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, |
642 | - gen_helper_sve_scvt_sd, a, 0, FPST_FPCR_A64) | 642 | - gen_helper_sve_scvt_sd, a, 0, FPST_FPCR_A64) |
643 | + gen_helper_sve_scvt_sd, a, 0, FPST_A64) | 643 | + gen_helper_sve_scvt_sd, a, 0, FPST_A64) |
644 | TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, | 644 | TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, |
645 | - gen_helper_sve_scvt_dd, a, 0, FPST_FPCR_A64) | 645 | - gen_helper_sve_scvt_dd, a, 0, FPST_FPCR_A64) |
646 | + gen_helper_sve_scvt_dd, a, 0, FPST_A64) | 646 | + gen_helper_sve_scvt_dd, a, 0, FPST_A64) |
647 | 647 | ||
648 | TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, | 648 | TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, |
649 | gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16_A64) | 649 | gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16_A64) |
650 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, | 650 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, |
651 | gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16_A64) | 651 | gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16_A64) |
652 | 652 | ||
653 | TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, | 653 | TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, |
654 | - gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR_A64) | 654 | - gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR_A64) |
655 | + gen_helper_sve_ucvt_ss, a, 0, FPST_A64) | 655 | + gen_helper_sve_ucvt_ss, a, 0, FPST_A64) |
656 | TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, | 656 | TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, |
657 | - gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR_A64) | 657 | - gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR_A64) |
658 | + gen_helper_sve_ucvt_ds, a, 0, FPST_A64) | 658 | + gen_helper_sve_ucvt_ds, a, 0, FPST_A64) |
659 | TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, | 659 | TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, |
660 | - gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR_A64) | 660 | - gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR_A64) |
661 | + gen_helper_sve_ucvt_sd, a, 0, FPST_A64) | 661 | + gen_helper_sve_ucvt_sd, a, 0, FPST_A64) |
662 | 662 | ||
663 | TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, | 663 | TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, |
664 | - gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR_A64) | 664 | - gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR_A64) |
665 | + gen_helper_sve_ucvt_dd, a, 0, FPST_A64) | 665 | + gen_helper_sve_ucvt_dd, a, 0, FPST_A64) |
666 | 666 | ||
667 | /* | 667 | /* |
668 | *** SVE Memory - 32-bit Gather and Unsized Contiguous Group | 668 | *** SVE Memory - 32-bit Gather and Unsized Contiguous Group |
669 | @@ -XXX,XX +XXX,XX @@ DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz) | 669 | @@ -XXX,XX +XXX,XX @@ DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz) |
670 | 670 | ||
671 | TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, | 671 | TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, |
672 | gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra, | 672 | gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra, |
673 | - 0, FPST_FPCR_A64) | 673 | - 0, FPST_FPCR_A64) |
674 | + 0, FPST_A64) | 674 | + 0, FPST_A64) |
675 | TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, | 675 | TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, |
676 | gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra, | 676 | gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra, |
677 | - 0, FPST_FPCR_A64) | 677 | - 0, FPST_FPCR_A64) |
678 | + 0, FPST_A64) | 678 | + 0, FPST_A64) |
679 | 679 | ||
680 | static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = { | 680 | static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = { |
681 | NULL, gen_helper_sve2_sqdmlal_zzzw_h, | 681 | NULL, gen_helper_sve2_sqdmlal_zzzw_h, |
682 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, | 682 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, |
683 | gen_gvec_rax1, a) | 683 | gen_gvec_rax1, a) |
684 | 684 | ||
685 | TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz, | 685 | TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz, |
686 | - gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR_A64) | 686 | - gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR_A64) |
687 | + gen_helper_sve2_fcvtnt_sh, a, 0, FPST_A64) | 687 | + gen_helper_sve2_fcvtnt_sh, a, 0, FPST_A64) |
688 | TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, | 688 | TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, |
689 | - gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR_A64) | 689 | - gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR_A64) |
690 | + gen_helper_sve2_fcvtnt_ds, a, 0, FPST_A64) | 690 | + gen_helper_sve2_fcvtnt_ds, a, 0, FPST_A64) |
691 | 691 | ||
692 | TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, | 692 | TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, |
693 | gen_helper_sve_bfcvtnt, a, 0, | 693 | gen_helper_sve_bfcvtnt, a, 0, |
694 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64) | 694 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64) |
695 | + s->fpcr_ah ? FPST_FPCR_AH : FPST_A64) | 695 | + s->fpcr_ah ? FPST_FPCR_AH : FPST_A64) |
696 | 696 | ||
697 | TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, | 697 | TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, |
698 | - gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR_A64) | 698 | - gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR_A64) |
699 | + gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64) | 699 | + gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64) |
700 | TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz, | 700 | TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz, |
701 | - gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR_A64) | 701 | - gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR_A64) |
702 | + gen_helper_sve2_fcvtlt_sd, a, 0, FPST_A64) | 702 | + gen_helper_sve2_fcvtlt_sd, a, 0, FPST_A64) |
703 | 703 | ||
704 | TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a, | 704 | TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a, |
705 | FPROUNDING_ODD, gen_helper_sve_fcvt_ds) | 705 | FPROUNDING_ODD, gen_helper_sve_fcvt_ds) |
706 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const flogb_fns[] = { | 706 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const flogb_fns[] = { |
707 | gen_helper_flogb_s, gen_helper_flogb_d | 707 | gen_helper_flogb_s, gen_helper_flogb_d |
708 | }; | 708 | }; |
709 | TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz], | 709 | TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz], |
710 | - a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64) | 710 | - a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64) |
711 | + a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) | 711 | + a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) |
712 | 712 | ||
713 | static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel) | 713 | static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel) |
714 | { | 714 | { |
715 | @@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) | 715 | @@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) |
716 | { | 716 | { |
717 | return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, | 717 | return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, |
718 | a->rd, a->rn, a->rm, a->ra, sel, | 718 | a->rd, a->rn, a->rm, a->ra, sel, |
719 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64); | 719 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64); |
720 | + s->fpcr_ah ? FPST_FPCR_AH : FPST_A64); | 720 | + s->fpcr_ah ? FPST_FPCR_AH : FPST_A64); |
721 | } | 721 | } |
722 | 722 | ||
723 | TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) | 723 | TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) |
724 | @@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) | 724 | @@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) |
725 | return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, | 725 | return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, |
726 | a->rd, a->rn, a->rm, a->ra, | 726 | a->rd, a->rn, a->rm, a->ra, |
727 | (a->index << 1) | sel, | 727 | (a->index << 1) | sel, |
728 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64); | 728 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64); |
729 | + s->fpcr_ah ? FPST_FPCR_AH : FPST_A64); | 729 | + s->fpcr_ah ? FPST_FPCR_AH : FPST_A64); |
730 | } | 730 | } |
731 | 731 | ||
732 | TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) | 732 | TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) |
733 | -- | 733 | -- |
734 | 2.43.0 | 734 | 2.43.0 | diff view generated by jsdifflib |
1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | --- | 2 | --- |
3 | target/arm/tcg/translate.h | 6 +++--- | 3 | target/arm/tcg/translate.h | 6 +++--- |
4 | target/arm/tcg/translate-vfp.c | 24 ++++++++++++------------ | 4 | target/arm/tcg/translate-vfp.c | 24 ++++++++++++------------ |
5 | 2 files changed, 15 insertions(+), 15 deletions(-) | 5 | 2 files changed, 15 insertions(+), 15 deletions(-) |
6 | 6 | ||
7 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | 7 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h |
8 | index XXXXXXX..XXXXXXX 100644 | 8 | index XXXXXXX..XXXXXXX 100644 |
9 | --- a/target/arm/tcg/translate.h | 9 | --- a/target/arm/tcg/translate.h |
10 | +++ b/target/arm/tcg/translate.h | 10 | +++ b/target/arm/tcg/translate.h |
11 | @@ -XXX,XX +XXX,XX @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb) | 11 | @@ -XXX,XX +XXX,XX @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb) |
12 | typedef enum ARMFPStatusFlavour { | 12 | typedef enum ARMFPStatusFlavour { |
13 | FPST_A32, | 13 | FPST_A32, |
14 | FPST_A64, | 14 | FPST_A64, |
15 | - FPST_FPCR_F16_A32, | 15 | - FPST_FPCR_F16_A32, |
16 | + FPST_A32_F16, | 16 | + FPST_A32_F16, |
17 | FPST_FPCR_F16_A64, | 17 | FPST_FPCR_F16_A64, |
18 | FPST_FPCR_AH, | 18 | FPST_FPCR_AH, |
19 | FPST_FPCR_AH_F16, | 19 | FPST_FPCR_AH_F16, |
20 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { | 20 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { |
21 | * for AArch32 non-FP16 operations controlled by the FPCR | 21 | * for AArch32 non-FP16 operations controlled by the FPCR |
22 | * FPST_A64 | 22 | * FPST_A64 |
23 | * for AArch64 non-FP16 operations controlled by the FPCR | 23 | * for AArch64 non-FP16 operations controlled by the FPCR |
24 | - * FPST_FPCR_F16_A32 | 24 | - * FPST_FPCR_F16_A32 |
25 | + * FPST_A32_F16 | 25 | + * FPST_A32_F16 |
26 | * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used | 26 | * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used |
27 | * FPST_FPCR_F16_A64 | 27 | * FPST_FPCR_F16_A64 |
28 | * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used | 28 | * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used |
29 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) | 29 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) |
30 | case FPST_A64: | 30 | case FPST_A64: |
31 | offset = offsetof(CPUARMState, vfp.fp_status_a64); | 31 | offset = offsetof(CPUARMState, vfp.fp_status_a64); |
32 | break; | 32 | break; |
33 | - case FPST_FPCR_F16_A32: | 33 | - case FPST_FPCR_F16_A32: |
34 | + case FPST_A32_F16: | 34 | + case FPST_A32_F16: |
35 | offset = offsetof(CPUARMState, vfp.fp_status_f16_a32); | 35 | offset = offsetof(CPUARMState, vfp.fp_status_f16_a32); |
36 | break; | 36 | break; |
37 | case FPST_FPCR_F16_A64: | 37 | case FPST_FPCR_F16_A64: |
38 | diff --git a/target/arm/tcg/translate-vfp.c b/target/arm/tcg/translate-vfp.c | 38 | diff --git a/target/arm/tcg/translate-vfp.c b/target/arm/tcg/translate-vfp.c |
39 | index XXXXXXX..XXXXXXX 100644 | 39 | index XXXXXXX..XXXXXXX 100644 |
40 | --- a/target/arm/tcg/translate-vfp.c | 40 | --- a/target/arm/tcg/translate-vfp.c |
41 | +++ b/target/arm/tcg/translate-vfp.c | 41 | +++ b/target/arm/tcg/translate-vfp.c |
42 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a) | 42 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a) |
43 | } | 43 | } |
44 | 44 | ||
45 | if (sz == 1) { | 45 | if (sz == 1) { |
46 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A32); | 46 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A32); |
47 | + fpst = fpstatus_ptr(FPST_A32_F16); | 47 | + fpst = fpstatus_ptr(FPST_A32_F16); |
48 | } else { | 48 | } else { |
49 | fpst = fpstatus_ptr(FPST_A32); | 49 | fpst = fpstatus_ptr(FPST_A32); |
50 | } | 50 | } |
51 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a) | 51 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a) |
52 | } | 52 | } |
53 | 53 | ||
54 | if (sz == 1) { | 54 | if (sz == 1) { |
55 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A32); | 55 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A32); |
56 | + fpst = fpstatus_ptr(FPST_A32_F16); | 56 | + fpst = fpstatus_ptr(FPST_A32_F16); |
57 | } else { | 57 | } else { |
58 | fpst = fpstatus_ptr(FPST_A32); | 58 | fpst = fpstatus_ptr(FPST_A32); |
59 | } | 59 | } |
60 | @@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn, | 60 | @@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn, |
61 | /* | 61 | /* |
62 | * Do a half-precision operation. Functionally this is | 62 | * Do a half-precision operation. Functionally this is |
63 | * the same as do_vfp_3op_sp(), except: | 63 | * the same as do_vfp_3op_sp(), except: |
64 | - * - it uses the FPST_FPCR_F16_A32 | 64 | - * - it uses the FPST_FPCR_F16_A32 |
65 | + * - it uses the FPST_A32_F16 | 65 | + * - it uses the FPST_A32_F16 |
66 | * - it doesn't need the VFP vector handling (fp16 is a | 66 | * - it doesn't need the VFP vector handling (fp16 is a |
67 | * v8 feature, and in v8 VFP vectors don't exist) | 67 | * v8 feature, and in v8 VFP vectors don't exist) |
68 | * - it does the aa32_fp16_arith feature test | 68 | * - it does the aa32_fp16_arith feature test |
69 | @@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn, | 69 | @@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn, |
70 | f0 = tcg_temp_new_i32(); | 70 | f0 = tcg_temp_new_i32(); |
71 | f1 = tcg_temp_new_i32(); | 71 | f1 = tcg_temp_new_i32(); |
72 | fd = tcg_temp_new_i32(); | 72 | fd = tcg_temp_new_i32(); |
73 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A32); | 73 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A32); |
74 | + fpst = fpstatus_ptr(FPST_A32_F16); | 74 | + fpst = fpstatus_ptr(FPST_A32_F16); |
75 | 75 | ||
76 | vfp_load_reg16(f0, vn); | 76 | vfp_load_reg16(f0, vn); |
77 | vfp_load_reg16(f1, vm); | 77 | vfp_load_reg16(f1, vm); |
78 | @@ -XXX,XX +XXX,XX @@ static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) | 78 | @@ -XXX,XX +XXX,XX @@ static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) |
79 | /* VFNMA, VFNMS */ | 79 | /* VFNMA, VFNMS */ |
80 | gen_vfp_negh(vd, vd); | 80 | gen_vfp_negh(vd, vd); |
81 | } | 81 | } |
82 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A32); | 82 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A32); |
83 | + fpst = fpstatus_ptr(FPST_A32_F16); | 83 | + fpst = fpstatus_ptr(FPST_A32_F16); |
84 | gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst); | 84 | gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst); |
85 | vfp_store_reg32(vd, a->vd); | 85 | vfp_store_reg32(vd, a->vd); |
86 | return true; | 86 | return true; |
87 | @@ -XXX,XX +XXX,XX @@ DO_VFP_2OP(VNEG, dp, gen_vfp_negd, aa32_fpdp_v2) | 87 | @@ -XXX,XX +XXX,XX @@ DO_VFP_2OP(VNEG, dp, gen_vfp_negd, aa32_fpdp_v2) |
88 | 88 | ||
89 | static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm) | 89 | static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm) |
90 | { | 90 | { |
91 | - gen_helper_vfp_sqrth(vd, vm, fpstatus_ptr(FPST_FPCR_F16_A32)); | 91 | - gen_helper_vfp_sqrth(vd, vm, fpstatus_ptr(FPST_FPCR_F16_A32)); |
92 | + gen_helper_vfp_sqrth(vd, vm, fpstatus_ptr(FPST_A32_F16)); | 92 | + gen_helper_vfp_sqrth(vd, vm, fpstatus_ptr(FPST_A32_F16)); |
93 | } | 93 | } |
94 | 94 | ||
95 | static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm) | 95 | static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm) |
96 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a) | 96 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a) |
97 | 97 | ||
98 | tmp = tcg_temp_new_i32(); | 98 | tmp = tcg_temp_new_i32(); |
99 | vfp_load_reg16(tmp, a->vm); | 99 | vfp_load_reg16(tmp, a->vm); |
100 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A32); | 100 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A32); |
101 | + fpst = fpstatus_ptr(FPST_A32_F16); | 101 | + fpst = fpstatus_ptr(FPST_A32_F16); |
102 | gen_helper_rinth(tmp, tmp, fpst); | 102 | gen_helper_rinth(tmp, tmp, fpst); |
103 | vfp_store_reg32(tmp, a->vd); | 103 | vfp_store_reg32(tmp, a->vd); |
104 | return true; | 104 | return true; |
105 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a) | 105 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a) |
106 | 106 | ||
107 | tmp = tcg_temp_new_i32(); | 107 | tmp = tcg_temp_new_i32(); |
108 | vfp_load_reg16(tmp, a->vm); | 108 | vfp_load_reg16(tmp, a->vm); |
109 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A32); | 109 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A32); |
110 | + fpst = fpstatus_ptr(FPST_A32_F16); | 110 | + fpst = fpstatus_ptr(FPST_A32_F16); |
111 | tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst); | 111 | tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst); |
112 | gen_helper_rinth(tmp, tmp, fpst); | 112 | gen_helper_rinth(tmp, tmp, fpst); |
113 | gen_restore_rmode(tcg_rmode, fpst); | 113 | gen_restore_rmode(tcg_rmode, fpst); |
114 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a) | 114 | @@ -XXX,XX +XXX,XX @@ static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a) |
115 | 115 | ||
116 | tmp = tcg_temp_new_i32(); | 116 | tmp = tcg_temp_new_i32(); |
117 | vfp_load_reg16(tmp, a->vm); | 117 | vfp_load_reg16(tmp, a->vm); |
118 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A32); | 118 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A32); |
119 | + fpst = fpstatus_ptr(FPST_A32_F16); | 119 | + fpst = fpstatus_ptr(FPST_A32_F16); |
120 | gen_helper_rinth_exact(tmp, tmp, fpst); | 120 | gen_helper_rinth_exact(tmp, tmp, fpst); |
121 | vfp_store_reg32(tmp, a->vd); | 121 | vfp_store_reg32(tmp, a->vd); |
122 | return true; | 122 | return true; |
123 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a) | 123 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a) |
124 | 124 | ||
125 | vm = tcg_temp_new_i32(); | 125 | vm = tcg_temp_new_i32(); |
126 | vfp_load_reg32(vm, a->vm); | 126 | vfp_load_reg32(vm, a->vm); |
127 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A32); | 127 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A32); |
128 | + fpst = fpstatus_ptr(FPST_A32_F16); | 128 | + fpst = fpstatus_ptr(FPST_A32_F16); |
129 | if (a->s) { | 129 | if (a->s) { |
130 | /* i32 -> f16 */ | 130 | /* i32 -> f16 */ |
131 | gen_helper_vfp_sitoh(vm, vm, fpst); | 131 | gen_helper_vfp_sitoh(vm, vm, fpst); |
132 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a) | 132 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a) |
133 | vd = tcg_temp_new_i32(); | 133 | vd = tcg_temp_new_i32(); |
134 | vfp_load_reg32(vd, a->vd); | 134 | vfp_load_reg32(vd, a->vd); |
135 | 135 | ||
136 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A32); | 136 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A32); |
137 | + fpst = fpstatus_ptr(FPST_A32_F16); | 137 | + fpst = fpstatus_ptr(FPST_A32_F16); |
138 | shift = tcg_constant_i32(frac_bits); | 138 | shift = tcg_constant_i32(frac_bits); |
139 | 139 | ||
140 | /* Switch on op:U:sx bits */ | 140 | /* Switch on op:U:sx bits */ |
141 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a) | 141 | @@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a) |
142 | return true; | 142 | return true; |
143 | } | 143 | } |
144 | 144 | ||
145 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A32); | 145 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A32); |
146 | + fpst = fpstatus_ptr(FPST_A32_F16); | 146 | + fpst = fpstatus_ptr(FPST_A32_F16); |
147 | vm = tcg_temp_new_i32(); | 147 | vm = tcg_temp_new_i32(); |
148 | vfp_load_reg16(vm, a->vm); | 148 | vfp_load_reg16(vm, a->vm); |
149 | 149 | ||
150 | -- | 150 | -- |
151 | 2.43.0 | 151 | 2.43.0 | diff view generated by jsdifflib |
1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | --- | 2 | --- |
3 | target/arm/tcg/translate.h | 8 ++--- | 3 | target/arm/tcg/translate.h | 8 ++--- |
4 | target/arm/tcg/translate-a64.c | 44 +++++++++++------------ | 4 | target/arm/tcg/translate-a64.c | 44 +++++++++++------------ |
5 | target/arm/tcg/translate-sve.c | 66 +++++++++++++++++----------------- | 5 | target/arm/tcg/translate-sve.c | 66 +++++++++++++++++----------------- |
6 | 3 files changed, 59 insertions(+), 59 deletions(-) | 6 | 3 files changed, 59 insertions(+), 59 deletions(-) |
7 | 7 | ||
8 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | 8 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h |
9 | index XXXXXXX..XXXXXXX 100644 | 9 | index XXXXXXX..XXXXXXX 100644 |
10 | --- a/target/arm/tcg/translate.h | 10 | --- a/target/arm/tcg/translate.h |
11 | +++ b/target/arm/tcg/translate.h | 11 | +++ b/target/arm/tcg/translate.h |
12 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { | 12 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { |
13 | FPST_A32, | 13 | FPST_A32, |
14 | FPST_A64, | 14 | FPST_A64, |
15 | FPST_A32_F16, | 15 | FPST_A32_F16, |
16 | - FPST_FPCR_F16_A64, | 16 | - FPST_FPCR_F16_A64, |
17 | + FPST_A64_F16, | 17 | + FPST_A64_F16, |
18 | FPST_FPCR_AH, | 18 | FPST_FPCR_AH, |
19 | FPST_FPCR_AH_F16, | 19 | FPST_FPCR_AH_F16, |
20 | FPST_STD, | 20 | FPST_STD, |
21 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { | 21 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { |
22 | * for AArch64 non-FP16 operations controlled by the FPCR | 22 | * for AArch64 non-FP16 operations controlled by the FPCR |
23 | * FPST_A32_F16 | 23 | * FPST_A32_F16 |
24 | * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used | 24 | * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used |
25 | - * FPST_FPCR_F16_A64 | 25 | - * FPST_FPCR_F16_A64 |
26 | + * FPST_A64_F16 | 26 | + * FPST_A64_F16 |
27 | * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used | 27 | * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used |
28 | * FPST_FPCR_AH: | 28 | * FPST_FPCR_AH: |
29 | * for AArch64 operations which change behaviour when AH=1 (specifically, | 29 | * for AArch64 operations which change behaviour when AH=1 (specifically, |
30 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) | 30 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) |
31 | case FPST_A32_F16: | 31 | case FPST_A32_F16: |
32 | offset = offsetof(CPUARMState, vfp.fp_status_f16_a32); | 32 | offset = offsetof(CPUARMState, vfp.fp_status_f16_a32); |
33 | break; | 33 | break; |
34 | - case FPST_FPCR_F16_A64: | 34 | - case FPST_FPCR_F16_A64: |
35 | + case FPST_A64_F16: | 35 | + case FPST_A64_F16: |
36 | offset = offsetof(CPUARMState, vfp.fp_status_f16_a64); | 36 | offset = offsetof(CPUARMState, vfp.fp_status_f16_a64); |
37 | break; | 37 | break; |
38 | case FPST_FPCR_AH: | 38 | case FPST_FPCR_AH: |
39 | @@ -XXX,XX +XXX,XX @@ static inline ARMFPStatusFlavour select_fpst(DisasContext *s, MemOp esz) | 39 | @@ -XXX,XX +XXX,XX @@ static inline ARMFPStatusFlavour select_fpst(DisasContext *s, MemOp esz) |
40 | if (s->fpcr_ah) { | 40 | if (s->fpcr_ah) { |
41 | return esz == MO_16 ? FPST_FPCR_AH_F16 : FPST_FPCR_AH; | 41 | return esz == MO_16 ? FPST_FPCR_AH_F16 : FPST_FPCR_AH; |
42 | } else { | 42 | } else { |
43 | - return esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64; | 43 | - return esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64; |
44 | + return esz == MO_16 ? FPST_A64_F16 : FPST_A64; | 44 | + return esz == MO_16 ? FPST_A64_F16 : FPST_A64; |
45 | } | 45 | } |
46 | } | 46 | } |
47 | 47 | ||
48 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | 48 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
49 | index XXXXXXX..XXXXXXX 100644 | 49 | index XXXXXXX..XXXXXXX 100644 |
50 | --- a/target/arm/tcg/translate-a64.c | 50 | --- a/target/arm/tcg/translate-a64.c |
51 | +++ b/target/arm/tcg/translate-a64.c | 51 | +++ b/target/arm/tcg/translate-a64.c |
52 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | 52 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, |
53 | { | 53 | { |
54 | return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, | 54 | return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, |
55 | a->esz == MO_16 ? | 55 | a->esz == MO_16 ? |
56 | - FPST_FPCR_F16_A64 : FPST_A64); | 56 | - FPST_FPCR_F16_A64 : FPST_A64); |
57 | + FPST_A64_F16 : FPST_A64); | 57 | + FPST_A64_F16 : FPST_A64); |
58 | } | 58 | } |
59 | 59 | ||
60 | static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a, | 60 | static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a, |
61 | @@ -XXX,XX +XXX,XX @@ static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, | 61 | @@ -XXX,XX +XXX,XX @@ static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, |
62 | TCGv_i32 t0 = read_fp_hreg(s, a->rn); | 62 | TCGv_i32 t0 = read_fp_hreg(s, a->rn); |
63 | TCGv_i32 t1 = tcg_constant_i32(0); | 63 | TCGv_i32 t1 = tcg_constant_i32(0); |
64 | if (swap) { | 64 | if (swap) { |
65 | - f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_FPCR_F16_A64)); | 65 | - f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_FPCR_F16_A64)); |
66 | + f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16)); | 66 | + f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16)); |
67 | } else { | 67 | } else { |
68 | - f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16_A64)); | 68 | - f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16_A64)); |
69 | + f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); | 69 | + f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); |
70 | } | 70 | } |
71 | write_fp_sreg(s, a->rd, t0); | 71 | write_fp_sreg(s, a->rd, t0); |
72 | } | 72 | } |
73 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | 73 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, |
74 | { | 74 | { |
75 | return do_fp3_vector_with_fpsttype(s, a, data, fns, | 75 | return do_fp3_vector_with_fpsttype(s, a, data, fns, |
76 | a->esz == MO_16 ? | 76 | a->esz == MO_16 ? |
77 | - FPST_FPCR_F16_A64 :FPST_A64); | 77 | - FPST_FPCR_F16_A64 :FPST_A64); |
78 | + FPST_A64_F16 :FPST_A64); | 78 | + FPST_A64_F16 :FPST_A64); |
79 | } | 79 | } |
80 | 80 | ||
81 | static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, | 81 | static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, |
82 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) | 82 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) |
83 | } | 83 | } |
84 | 84 | ||
85 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | 85 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, |
86 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64, | 86 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64, |
87 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | 87 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, |
88 | a->rot, fn[a->esz]); | 88 | a->rot, fn[a->esz]); |
89 | return true; | 89 | return true; |
90 | } | 90 | } |
91 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) | 91 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) |
92 | TCGv_i32 t1 = tcg_temp_new_i32(); | 92 | TCGv_i32 t1 = tcg_temp_new_i32(); |
93 | 93 | ||
94 | read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); | 94 | read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); |
95 | - f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16_A64)); | 95 | - f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16_A64)); |
96 | + f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); | 96 | + f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); |
97 | write_fp_hreg_merging(s, a->rd, a->rn, t0); | 97 | write_fp_hreg_merging(s, a->rd, a->rn, t0); |
98 | } | 98 | } |
99 | break; | 99 | break; |
100 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | 100 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) |
101 | gen_vfp_maybe_ah_negh(s, t1, t1); | 101 | gen_vfp_maybe_ah_negh(s, t1, t1); |
102 | } | 102 | } |
103 | gen_helper_advsimd_muladdh(t0, t1, t2, t0, | 103 | gen_helper_advsimd_muladdh(t0, t1, t2, t0, |
104 | - fpstatus_ptr(FPST_FPCR_F16_A64)); | 104 | - fpstatus_ptr(FPST_FPCR_F16_A64)); |
105 | + fpstatus_ptr(FPST_A64_F16)); | 105 | + fpstatus_ptr(FPST_A64_F16)); |
106 | write_fp_hreg_merging(s, a->rd, a->rd, t0); | 106 | write_fp_hreg_merging(s, a->rd, a->rd, t0); |
107 | } | 107 | } |
108 | break; | 108 | break; |
109 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, | 109 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, |
110 | } | 110 | } |
111 | 111 | ||
112 | gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, | 112 | gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, |
113 | - esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64, | 113 | - esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64, |
114 | + esz == MO_16 ? FPST_A64_F16 : FPST_A64, | 114 | + esz == MO_16 ? FPST_A64_F16 : FPST_A64, |
115 | a->idx, fns[esz - 1]); | 115 | a->idx, fns[esz - 1]); |
116 | return true; | 116 | return true; |
117 | } | 117 | } |
118 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) | 118 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) |
119 | } | 119 | } |
120 | 120 | ||
121 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | 121 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, |
122 | - esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64, | 122 | - esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64, |
123 | + esz == MO_16 ? FPST_A64_F16 : FPST_A64, | 123 | + esz == MO_16 ? FPST_A64_F16 : FPST_A64, |
124 | (s->fpcr_ah << 5) | (a->idx << 1) | neg, | 124 | (s->fpcr_ah << 5) | (a->idx << 1) | neg, |
125 | fns[esz - 1]); | 125 | fns[esz - 1]); |
126 | return true; | 126 | return true; |
127 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) | 127 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) |
128 | } | 128 | } |
129 | if (fp_access_check(s)) { | 129 | if (fp_access_check(s)) { |
130 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | 130 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, |
131 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64, | 131 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64, |
132 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | 132 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, |
133 | (a->idx << 2) | a->rot, fn); | 133 | (a->idx << 2) | a->rot, fn); |
134 | } | 134 | } |
135 | return true; | 135 | return true; |
136 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) | 136 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) |
137 | 137 | ||
138 | read_vec_element_i32(s, t0, a->rn, 0, MO_16); | 138 | read_vec_element_i32(s, t0, a->rn, 0, MO_16); |
139 | read_vec_element_i32(s, t1, a->rn, 1, MO_16); | 139 | read_vec_element_i32(s, t1, a->rn, 1, MO_16); |
140 | - f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16_A64)); | 140 | - f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16_A64)); |
141 | + f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); | 141 | + f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); |
142 | write_fp_sreg(s, a->rd, t0); | 142 | write_fp_sreg(s, a->rd, t0); |
143 | } | 143 | } |
144 | break; | 144 | break; |
145 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | 145 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) |
146 | if (neg_n) { | 146 | if (neg_n) { |
147 | gen_vfp_maybe_ah_negh(s, tn, tn); | 147 | gen_vfp_maybe_ah_negh(s, tn, tn); |
148 | } | 148 | } |
149 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A64); | 149 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A64); |
150 | + fpst = fpstatus_ptr(FPST_A64_F16); | 150 | + fpst = fpstatus_ptr(FPST_A64_F16); |
151 | gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); | 151 | gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); |
152 | write_fp_hreg_merging(s, a->rd, a->ra, ta); | 152 | write_fp_hreg_merging(s, a->rd, a->ra, ta); |
153 | } | 153 | } |
154 | @@ -XXX,XX +XXX,XX @@ static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, | 154 | @@ -XXX,XX +XXX,XX @@ static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, |
155 | if (fp_access_check(s)) { | 155 | if (fp_access_check(s)) { |
156 | MemOp esz = a->esz; | 156 | MemOp esz = a->esz; |
157 | int elts = (a->q ? 16 : 8) >> esz; | 157 | int elts = (a->q ? 16 : 8) >> esz; |
158 | - TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 158 | - TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
159 | + TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); | 159 | + TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); |
160 | TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, | 160 | TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, |
161 | s->fpcr_ah ? fah : fnormal); | 161 | s->fpcr_ah ? fah : fnormal); |
162 | write_fp_sreg(s, a->rd, res); | 162 | write_fp_sreg(s, a->rd, res); |
163 | @@ -XXX,XX +XXX,XX @@ static void handle_fp_compare(DisasContext *s, int size, | 163 | @@ -XXX,XX +XXX,XX @@ static void handle_fp_compare(DisasContext *s, int size, |
164 | bool cmp_with_zero, bool signal_all_nans) | 164 | bool cmp_with_zero, bool signal_all_nans) |
165 | { | 165 | { |
166 | TCGv_i64 tcg_flags = tcg_temp_new_i64(); | 166 | TCGv_i64 tcg_flags = tcg_temp_new_i64(); |
167 | - TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 167 | - TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
168 | + TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64); | 168 | + TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64); |
169 | 169 | ||
170 | if (size == MO_64) { | 170 | if (size == MO_64) { |
171 | TCGv_i64 tcg_vn, tcg_vm; | 171 | TCGv_i64 tcg_vn, tcg_vm; |
172 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | 172 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, |
173 | { | 173 | { |
174 | return do_fp1_scalar_with_fpsttype(s, a, f, rmode, | 174 | return do_fp1_scalar_with_fpsttype(s, a, f, rmode, |
175 | a->esz == MO_16 ? | 175 | a->esz == MO_16 ? |
176 | - FPST_FPCR_F16_A64 : FPST_A64); | 176 | - FPST_FPCR_F16_A64 : FPST_A64); |
177 | + FPST_A64_F16 : FPST_A64); | 177 | + FPST_A64_F16 : FPST_A64); |
178 | } | 178 | } |
179 | 179 | ||
180 | static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a, | 180 | static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a, |
181 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a) | 181 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a) |
182 | if (fp_access_check(s)) { | 182 | if (fp_access_check(s)) { |
183 | TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); | 183 | TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); |
184 | TCGv_i32 tcg_rd = tcg_temp_new_i32(); | 184 | TCGv_i32 tcg_rd = tcg_temp_new_i32(); |
185 | - TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR_F16_A64); | 185 | - TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR_F16_A64); |
186 | + TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); | 186 | + TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); |
187 | TCGv_i32 tcg_ahp = get_ahp_flag(); | 187 | TCGv_i32 tcg_ahp = get_ahp_flag(); |
188 | 188 | ||
189 | gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); | 189 | gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); |
190 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a) | 190 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a) |
191 | if (fp_access_check(s)) { | 191 | if (fp_access_check(s)) { |
192 | TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); | 192 | TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); |
193 | TCGv_i64 tcg_rd = tcg_temp_new_i64(); | 193 | TCGv_i64 tcg_rd = tcg_temp_new_i64(); |
194 | - TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR_F16_A64); | 194 | - TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR_F16_A64); |
195 | + TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); | 195 | + TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); |
196 | TCGv_i32 tcg_ahp = get_ahp_flag(); | 196 | TCGv_i32 tcg_ahp = get_ahp_flag(); |
197 | 197 | ||
198 | gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); | 198 | gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); |
199 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, | 199 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, |
200 | TCGv_i32 tcg_shift, tcg_single; | 200 | TCGv_i32 tcg_shift, tcg_single; |
201 | TCGv_i64 tcg_double; | 201 | TCGv_i64 tcg_double; |
202 | 202 | ||
203 | - tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 203 | - tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
204 | + tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); | 204 | + tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); |
205 | tcg_shift = tcg_constant_i32(shift); | 205 | tcg_shift = tcg_constant_i32(shift); |
206 | 206 | ||
207 | switch (esz) { | 207 | switch (esz) { |
208 | @@ -XXX,XX +XXX,XX @@ static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz, | 208 | @@ -XXX,XX +XXX,XX @@ static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz, |
209 | TCGv_ptr tcg_fpstatus; | 209 | TCGv_ptr tcg_fpstatus; |
210 | TCGv_i32 tcg_shift, tcg_rmode, tcg_single; | 210 | TCGv_i32 tcg_shift, tcg_rmode, tcg_single; |
211 | 211 | ||
212 | - tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 212 | - tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
213 | + tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); | 213 | + tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); |
214 | tcg_shift = tcg_constant_i32(shift); | 214 | tcg_shift = tcg_constant_i32(shift); |
215 | tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); | 215 | tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); |
216 | 216 | ||
217 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a, | 217 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a, |
218 | return check == 0; | 218 | return check == 0; |
219 | } | 219 | } |
220 | 220 | ||
221 | - fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 221 | - fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
222 | + fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | 222 | + fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); |
223 | if (rmode >= 0) { | 223 | if (rmode >= 0) { |
224 | tcg_rmode = gen_set_rmode(rmode, fpst); | 224 | tcg_rmode = gen_set_rmode(rmode, fpst); |
225 | } | 225 | } |
226 | @@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | 226 | @@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, |
227 | gen_helper_gvec_2_ptr * const fns[3]) | 227 | gen_helper_gvec_2_ptr * const fns[3]) |
228 | { | 228 | { |
229 | return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns, | 229 | return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns, |
230 | - esz == MO_16 ? FPST_FPCR_F16_A64 : | 230 | - esz == MO_16 ? FPST_FPCR_F16_A64 : |
231 | + esz == MO_16 ? FPST_A64_F16 : | 231 | + esz == MO_16 ? FPST_A64_F16 : |
232 | FPST_A64); | 232 | FPST_A64); |
233 | } | 233 | } |
234 | 234 | ||
235 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) | 235 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) |
236 | TCGv_i32 tcg_res[4]; | 236 | TCGv_i32 tcg_res[4]; |
237 | TCGv_i32 ahp = get_ahp_flag(); | 237 | TCGv_i32 ahp = get_ahp_flag(); |
238 | 238 | ||
239 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A64); | 239 | - fpst = fpstatus_ptr(FPST_FPCR_F16_A64); |
240 | + fpst = fpstatus_ptr(FPST_A64_F16); | 240 | + fpst = fpstatus_ptr(FPST_A64_F16); |
241 | 241 | ||
242 | for (pass = 0; pass < 4; pass++) { | 242 | for (pass = 0; pass < 4; pass++) { |
243 | tcg_res[pass] = tcg_temp_new_i32(); | 243 | tcg_res[pass] = tcg_temp_new_i32(); |
244 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | 244 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c |
245 | index XXXXXXX..XXXXXXX 100644 | 245 | index XXXXXXX..XXXXXXX 100644 |
246 | --- a/target/arm/tcg/translate-sve.c | 246 | --- a/target/arm/tcg/translate-sve.c |
247 | +++ b/target/arm/tcg/translate-sve.c | 247 | +++ b/target/arm/tcg/translate-sve.c |
248 | @@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, | 248 | @@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, |
249 | arg_rrr_esz *a, int data) | 249 | arg_rrr_esz *a, int data) |
250 | { | 250 | { |
251 | return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, | 251 | return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, |
252 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 252 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
253 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | 253 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); |
254 | } | 254 | } |
255 | 255 | ||
256 | static bool gen_gvec_fpst_ah_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, | 256 | static bool gen_gvec_fpst_ah_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, |
257 | @@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, | 257 | @@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, |
258 | arg_rprr_esz *a) | 258 | arg_rprr_esz *a) |
259 | { | 259 | { |
260 | return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, | 260 | return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, |
261 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 261 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
262 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | 262 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); |
263 | } | 263 | } |
264 | 264 | ||
265 | /* Invoke a vector expander on two Zregs and an immediate. */ | 265 | /* Invoke a vector expander on two Zregs and an immediate. */ |
266 | @@ -XXX,XX +XXX,XX @@ static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) | 266 | @@ -XXX,XX +XXX,XX @@ static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) |
267 | }; | 267 | }; |
268 | return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, | 268 | return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, |
269 | (s->fpcr_ah << 5) | (a->index << 1) | sub, | 269 | (s->fpcr_ah << 5) | (a->index << 1) | sub, |
270 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 270 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
271 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | 271 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); |
272 | } | 272 | } |
273 | 273 | ||
274 | TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) | 274 | TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) |
275 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { | 275 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { |
276 | }; | 276 | }; |
277 | TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, | 277 | TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, |
278 | fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index, | 278 | fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index, |
279 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) | 279 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) |
280 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | 280 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) |
281 | 281 | ||
282 | /* | 282 | /* |
283 | *** SVE Floating Point Fast Reduction Group | 283 | *** SVE Floating Point Fast Reduction Group |
284 | @@ -XXX,XX +XXX,XX @@ static bool do_reduce(DisasContext *s, arg_rpr_esz *a, | 284 | @@ -XXX,XX +XXX,XX @@ static bool do_reduce(DisasContext *s, arg_rpr_esz *a, |
285 | 285 | ||
286 | tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); | 286 | tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); |
287 | tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); | 287 | tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); |
288 | - status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 288 | - status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
289 | + status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | 289 | + status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); |
290 | 290 | ||
291 | fn(temp, t_zn, t_pg, status, t_desc); | 291 | fn(temp, t_zn, t_pg, status, t_desc); |
292 | 292 | ||
293 | @@ -XXX,XX +XXX,XX @@ static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a, | 293 | @@ -XXX,XX +XXX,XX @@ static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a, |
294 | if (sve_access_check(s)) { | 294 | if (sve_access_check(s)) { |
295 | unsigned vsz = vec_full_reg_size(s); | 295 | unsigned vsz = vec_full_reg_size(s); |
296 | TCGv_ptr status = | 296 | TCGv_ptr status = |
297 | - fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 297 | - fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
298 | + fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | 298 | + fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); |
299 | 299 | ||
300 | tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), | 300 | tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), |
301 | vec_full_reg_offset(s, a->rn), | 301 | vec_full_reg_offset(s, a->rn), |
302 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { | 302 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { |
303 | TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, | 303 | TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, |
304 | ftmad_fns[a->esz], a->rd, a->rn, a->rm, | 304 | ftmad_fns[a->esz], a->rd, a->rn, a->rm, |
305 | a->imm | (s->fpcr_ah << 3), | 305 | a->imm | (s->fpcr_ah << 3), |
306 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) | 306 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) |
307 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | 307 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) |
308 | 308 | ||
309 | /* | 309 | /* |
310 | *** SVE Floating Point Accumulating Reduction Group | 310 | *** SVE Floating Point Accumulating Reduction Group |
311 | @@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) | 311 | @@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) |
312 | t_pg = tcg_temp_new_ptr(); | 312 | t_pg = tcg_temp_new_ptr(); |
313 | tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm)); | 313 | tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm)); |
314 | tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); | 314 | tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); |
315 | - t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 315 | - t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
316 | + t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | 316 | + t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); |
317 | t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); | 317 | t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); |
318 | 318 | ||
319 | fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc); | 319 | fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc); |
320 | @@ -XXX,XX +XXX,XX @@ static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16, | 320 | @@ -XXX,XX +XXX,XX @@ static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16, |
321 | tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn)); | 321 | tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn)); |
322 | tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); | 322 | tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); |
323 | 323 | ||
324 | - status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16_A64 : FPST_A64); | 324 | - status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16_A64 : FPST_A64); |
325 | + status = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); | 325 | + status = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); |
326 | desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); | 326 | desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); |
327 | fn(t_zd, t_zn, t_pg, scalar, status, desc); | 327 | fn(t_zd, t_zn, t_pg, scalar, status, desc); |
328 | } | 328 | } |
329 | @@ -XXX,XX +XXX,XX @@ static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, | 329 | @@ -XXX,XX +XXX,XX @@ static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, |
330 | } | 330 | } |
331 | if (sve_access_check(s)) { | 331 | if (sve_access_check(s)) { |
332 | unsigned vsz = vec_full_reg_size(s); | 332 | unsigned vsz = vec_full_reg_size(s); |
333 | - TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 333 | - TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
334 | + TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | 334 | + TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); |
335 | tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd), | 335 | tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd), |
336 | vec_full_reg_offset(s, a->rn), | 336 | vec_full_reg_offset(s, a->rn), |
337 | vec_full_reg_offset(s, a->rm), | 337 | vec_full_reg_offset(s, a->rm), |
338 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_4_ptr * const fcadd_fns[] = { | 338 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_4_ptr * const fcadd_fns[] = { |
339 | }; | 339 | }; |
340 | TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], | 340 | TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], |
341 | a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), | 341 | a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), |
342 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) | 342 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) |
343 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | 343 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) |
344 | 344 | ||
345 | #define DO_FMLA(NAME, name, ah_name) \ | 345 | #define DO_FMLA(NAME, name, ah_name) \ |
346 | static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ | 346 | static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ |
347 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], | 347 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], |
348 | TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \ | 348 | TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \ |
349 | s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \ | 349 | s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \ |
350 | a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ | 350 | a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ |
351 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) | 351 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) |
352 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | 352 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) |
353 | 353 | ||
354 | /* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */ | 354 | /* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */ |
355 | DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz) | 355 | DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz) |
356 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { | 356 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { |
357 | }; | 357 | }; |
358 | TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], | 358 | TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], |
359 | a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, | 359 | a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, |
360 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) | 360 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) |
361 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | 361 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) |
362 | 362 | ||
363 | static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { | 363 | static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { |
364 | NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL | 364 | NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL |
365 | }; | 365 | }; |
366 | TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], | 366 | TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], |
367 | a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot, | 367 | a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot, |
368 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) | 368 | - a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) |
369 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | 369 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) |
370 | 370 | ||
371 | /* | 371 | /* |
372 | *** SVE Floating Point Unary Operations Predicated Group | 372 | *** SVE Floating Point Unary Operations Predicated Group |
373 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], | 373 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], |
374 | TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz, | 374 | TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz, |
375 | gen_helper_sve_fcvt_sh, a, 0, FPST_A64) | 375 | gen_helper_sve_fcvt_sh, a, 0, FPST_A64) |
376 | TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, | 376 | TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, |
377 | - gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR_F16_A64) | 377 | - gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR_F16_A64) |
378 | + gen_helper_sve_fcvt_hs, a, 0, FPST_A64_F16) | 378 | + gen_helper_sve_fcvt_hs, a, 0, FPST_A64_F16) |
379 | 379 | ||
380 | TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, | 380 | TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, |
381 | gen_helper_sve_bfcvt, a, 0, | 381 | gen_helper_sve_bfcvt, a, 0, |
382 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, | 382 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, |
383 | TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, | 383 | TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, |
384 | gen_helper_sve_fcvt_dh, a, 0, FPST_A64) | 384 | gen_helper_sve_fcvt_dh, a, 0, FPST_A64) |
385 | TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz, | 385 | TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz, |
386 | - gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR_F16_A64) | 386 | - gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR_F16_A64) |
387 | + gen_helper_sve_fcvt_hd, a, 0, FPST_A64_F16) | 387 | + gen_helper_sve_fcvt_hd, a, 0, FPST_A64_F16) |
388 | TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz, | 388 | TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz, |
389 | gen_helper_sve_fcvt_ds, a, 0, FPST_A64) | 389 | gen_helper_sve_fcvt_ds, a, 0, FPST_A64) |
390 | TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz, | 390 | TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz, |
391 | gen_helper_sve_fcvt_sd, a, 0, FPST_A64) | 391 | gen_helper_sve_fcvt_sd, a, 0, FPST_A64) |
392 | 392 | ||
393 | TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz, | 393 | TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz, |
394 | - gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16_A64) | 394 | - gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16_A64) |
395 | + gen_helper_sve_fcvtzs_hh, a, 0, FPST_A64_F16) | 395 | + gen_helper_sve_fcvtzs_hh, a, 0, FPST_A64_F16) |
396 | TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz, | 396 | TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz, |
397 | - gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16_A64) | 397 | - gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16_A64) |
398 | + gen_helper_sve_fcvtzu_hh, a, 0, FPST_A64_F16) | 398 | + gen_helper_sve_fcvtzu_hh, a, 0, FPST_A64_F16) |
399 | TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz, | 399 | TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz, |
400 | - gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16_A64) | 400 | - gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16_A64) |
401 | + gen_helper_sve_fcvtzs_hs, a, 0, FPST_A64_F16) | 401 | + gen_helper_sve_fcvtzs_hs, a, 0, FPST_A64_F16) |
402 | TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz, | 402 | TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz, |
403 | - gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16_A64) | 403 | - gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16_A64) |
404 | + gen_helper_sve_fcvtzu_hs, a, 0, FPST_A64_F16) | 404 | + gen_helper_sve_fcvtzu_hs, a, 0, FPST_A64_F16) |
405 | TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz, | 405 | TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz, |
406 | - gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16_A64) | 406 | - gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16_A64) |
407 | + gen_helper_sve_fcvtzs_hd, a, 0, FPST_A64_F16) | 407 | + gen_helper_sve_fcvtzs_hd, a, 0, FPST_A64_F16) |
408 | TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz, | 408 | TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz, |
409 | - gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16_A64) | 409 | - gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16_A64) |
410 | + gen_helper_sve_fcvtzu_hd, a, 0, FPST_A64_F16) | 410 | + gen_helper_sve_fcvtzu_hd, a, 0, FPST_A64_F16) |
411 | 411 | ||
412 | TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz, | 412 | TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz, |
413 | gen_helper_sve_fcvtzs_ss, a, 0, FPST_A64) | 413 | gen_helper_sve_fcvtzs_ss, a, 0, FPST_A64) |
414 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frint_fns[] = { | 414 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frint_fns[] = { |
415 | gen_helper_sve_frint_d | 415 | gen_helper_sve_frint_d |
416 | }; | 416 | }; |
417 | TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz], | 417 | TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz], |
418 | - a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) | 418 | - a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) |
419 | + a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | 419 | + a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) |
420 | 420 | ||
421 | static gen_helper_gvec_3_ptr * const frintx_fns[] = { | 421 | static gen_helper_gvec_3_ptr * const frintx_fns[] = { |
422 | NULL, | 422 | NULL, |
423 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frintx_fns[] = { | 423 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frintx_fns[] = { |
424 | gen_helper_sve_frintx_d | 424 | gen_helper_sve_frintx_d |
425 | }; | 425 | }; |
426 | TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz], | 426 | TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz], |
427 | - a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 427 | - a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
428 | + a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | 428 | + a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); |
429 | 429 | ||
430 | static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, | 430 | static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, |
431 | ARMFPRounding mode, gen_helper_gvec_3_ptr *fn) | 431 | ARMFPRounding mode, gen_helper_gvec_3_ptr *fn) |
432 | @@ -XXX,XX +XXX,XX @@ static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, | 432 | @@ -XXX,XX +XXX,XX @@ static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, |
433 | } | 433 | } |
434 | 434 | ||
435 | vsz = vec_full_reg_size(s); | 435 | vsz = vec_full_reg_size(s); |
436 | - status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); | 436 | - status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64); |
437 | + status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | 437 | + status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); |
438 | tmode = gen_set_rmode(mode, status); | 438 | tmode = gen_set_rmode(mode, status); |
439 | 439 | ||
440 | tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), | 440 | tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), |
441 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { | 441 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { |
442 | gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d, | 442 | gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d, |
443 | }; | 443 | }; |
444 | TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz], | 444 | TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz], |
445 | - a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) | 445 | - a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) |
446 | + a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | 446 | + a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) |
447 | 447 | ||
448 | TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, | 448 | TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, |
449 | - gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16_A64) | 449 | - gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16_A64) |
450 | + gen_helper_sve_scvt_hh, a, 0, FPST_A64_F16) | 450 | + gen_helper_sve_scvt_hh, a, 0, FPST_A64_F16) |
451 | TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, | 451 | TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, |
452 | - gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16_A64) | 452 | - gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16_A64) |
453 | + gen_helper_sve_scvt_sh, a, 0, FPST_A64_F16) | 453 | + gen_helper_sve_scvt_sh, a, 0, FPST_A64_F16) |
454 | TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, | 454 | TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, |
455 | - gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16_A64) | 455 | - gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16_A64) |
456 | + gen_helper_sve_scvt_dh, a, 0, FPST_A64_F16) | 456 | + gen_helper_sve_scvt_dh, a, 0, FPST_A64_F16) |
457 | 457 | ||
458 | TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, | 458 | TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, |
459 | gen_helper_sve_scvt_ss, a, 0, FPST_A64) | 459 | gen_helper_sve_scvt_ss, a, 0, FPST_A64) |
460 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, | 460 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, |
461 | gen_helper_sve_scvt_dd, a, 0, FPST_A64) | 461 | gen_helper_sve_scvt_dd, a, 0, FPST_A64) |
462 | 462 | ||
463 | TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, | 463 | TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, |
464 | - gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16_A64) | 464 | - gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16_A64) |
465 | + gen_helper_sve_ucvt_hh, a, 0, FPST_A64_F16) | 465 | + gen_helper_sve_ucvt_hh, a, 0, FPST_A64_F16) |
466 | TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, | 466 | TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, |
467 | - gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16_A64) | 467 | - gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16_A64) |
468 | + gen_helper_sve_ucvt_sh, a, 0, FPST_A64_F16) | 468 | + gen_helper_sve_ucvt_sh, a, 0, FPST_A64_F16) |
469 | TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, | 469 | TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, |
470 | - gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16_A64) | 470 | - gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16_A64) |
471 | + gen_helper_sve_ucvt_dh, a, 0, FPST_A64_F16) | 471 | + gen_helper_sve_ucvt_dh, a, 0, FPST_A64_F16) |
472 | 472 | ||
473 | TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, | 473 | TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, |
474 | gen_helper_sve_ucvt_ss, a, 0, FPST_A64) | 474 | gen_helper_sve_ucvt_ss, a, 0, FPST_A64) |
475 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const flogb_fns[] = { | 475 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const flogb_fns[] = { |
476 | gen_helper_flogb_s, gen_helper_flogb_d | 476 | gen_helper_flogb_s, gen_helper_flogb_d |
477 | }; | 477 | }; |
478 | TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz], | 478 | TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz], |
479 | - a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) | 479 | - a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64) |
480 | + a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | 480 | + a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) |
481 | 481 | ||
482 | static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel) | 482 | static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel) |
483 | { | 483 | { |
484 | -- | 484 | -- |
485 | 2.43.0 | 485 | 2.43.0 | diff view generated by jsdifflib |
1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | --- | 2 | --- |
3 | target/arm/tcg/translate.h | 14 +++++++------- | 3 | target/arm/tcg/translate.h | 14 +++++++------- |
4 | target/arm/tcg/translate-a64.c | 8 ++++---- | 4 | target/arm/tcg/translate-a64.c | 8 ++++---- |
5 | target/arm/tcg/translate-sve.c | 8 ++++---- | 5 | target/arm/tcg/translate-sve.c | 8 ++++---- |
6 | 3 files changed, 15 insertions(+), 15 deletions(-) | 6 | 3 files changed, 15 insertions(+), 15 deletions(-) |
7 | 7 | ||
8 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | 8 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h |
9 | index XXXXXXX..XXXXXXX 100644 | 9 | index XXXXXXX..XXXXXXX 100644 |
10 | --- a/target/arm/tcg/translate.h | 10 | --- a/target/arm/tcg/translate.h |
11 | +++ b/target/arm/tcg/translate.h | 11 | +++ b/target/arm/tcg/translate.h |
12 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { | 12 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { |
13 | FPST_A64, | 13 | FPST_A64, |
14 | FPST_A32_F16, | 14 | FPST_A32_F16, |
15 | FPST_A64_F16, | 15 | FPST_A64_F16, |
16 | - FPST_FPCR_AH, | 16 | - FPST_FPCR_AH, |
17 | - FPST_FPCR_AH_F16, | 17 | - FPST_FPCR_AH_F16, |
18 | + FPST_AH, | 18 | + FPST_AH, |
19 | + FPST_AH_F16, | 19 | + FPST_AH_F16, |
20 | FPST_STD, | 20 | FPST_STD, |
21 | FPST_STD_F16, | 21 | FPST_STD_F16, |
22 | } ARMFPStatusFlavour; | 22 | } ARMFPStatusFlavour; |
23 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { | 23 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { |
24 | * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used | 24 | * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used |
25 | * FPST_A64_F16 | 25 | * FPST_A64_F16 |
26 | * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used | 26 | * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used |
27 | - * FPST_FPCR_AH: | 27 | - * FPST_FPCR_AH: |
28 | + * FPST_AH: | 28 | + * FPST_AH: |
29 | * for AArch64 operations which change behaviour when AH=1 (specifically, | 29 | * for AArch64 operations which change behaviour when AH=1 (specifically, |
30 | * bfloat16 conversions and multiplies, and the reciprocal and square root | 30 | * bfloat16 conversions and multiplies, and the reciprocal and square root |
31 | * estimate/step insns) | 31 | * estimate/step insns) |
32 | - * FPST_FPCR_AH_F16: | 32 | - * FPST_FPCR_AH_F16: |
33 | + * FPST_AH_F16: | 33 | + * FPST_AH_F16: |
34 | * ditto, but for half-precision operations | 34 | * ditto, but for half-precision operations |
35 | * FPST_STD | 35 | * FPST_STD |
36 | * for A32/T32 Neon operations using the "standard FPSCR value" | 36 | * for A32/T32 Neon operations using the "standard FPSCR value" |
37 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) | 37 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) |
38 | case FPST_A64_F16: | 38 | case FPST_A64_F16: |
39 | offset = offsetof(CPUARMState, vfp.fp_status_f16_a64); | 39 | offset = offsetof(CPUARMState, vfp.fp_status_f16_a64); |
40 | break; | 40 | break; |
41 | - case FPST_FPCR_AH: | 41 | - case FPST_FPCR_AH: |
42 | + case FPST_AH: | 42 | + case FPST_AH: |
43 | offset = offsetof(CPUARMState, vfp.ah_fp_status); | 43 | offset = offsetof(CPUARMState, vfp.ah_fp_status); |
44 | break; | 44 | break; |
45 | - case FPST_FPCR_AH_F16: | 45 | - case FPST_FPCR_AH_F16: |
46 | + case FPST_AH_F16: | 46 | + case FPST_AH_F16: |
47 | offset = offsetof(CPUARMState, vfp.ah_fp_status_f16); | 47 | offset = offsetof(CPUARMState, vfp.ah_fp_status_f16); |
48 | break; | 48 | break; |
49 | case FPST_STD: | 49 | case FPST_STD: |
50 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) | 50 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) |
51 | static inline ARMFPStatusFlavour select_fpst(DisasContext *s, MemOp esz) | 51 | static inline ARMFPStatusFlavour select_fpst(DisasContext *s, MemOp esz) |
52 | { | 52 | { |
53 | if (s->fpcr_ah) { | 53 | if (s->fpcr_ah) { |
54 | - return esz == MO_16 ? FPST_FPCR_AH_F16 : FPST_FPCR_AH; | 54 | - return esz == MO_16 ? FPST_FPCR_AH_F16 : FPST_FPCR_AH; |
55 | + return esz == MO_16 ? FPST_AH_F16 : FPST_AH; | 55 | + return esz == MO_16 ? FPST_AH_F16 : FPST_AH; |
56 | } else { | 56 | } else { |
57 | return esz == MO_16 ? FPST_A64_F16 : FPST_A64; | 57 | return esz == MO_16 ? FPST_A64_F16 : FPST_A64; |
58 | } | 58 | } |
59 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | 59 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
60 | index XXXXXXX..XXXXXXX 100644 | 60 | index XXXXXXX..XXXXXXX 100644 |
61 | --- a/target/arm/tcg/translate-a64.c | 61 | --- a/target/arm/tcg/translate-a64.c |
62 | +++ b/target/arm/tcg/translate-a64.c | 62 | +++ b/target/arm/tcg/translate-a64.c |
63 | @@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) | 63 | @@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) |
64 | if (fp_access_check(s)) { | 64 | if (fp_access_check(s)) { |
65 | /* Q bit selects BFMLALB vs BFMLALT. */ | 65 | /* Q bit selects BFMLALB vs BFMLALT. */ |
66 | gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, | 66 | gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, |
67 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_A64, a->q, | 67 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_A64, a->q, |
68 | + s->fpcr_ah ? FPST_AH : FPST_A64, a->q, | 68 | + s->fpcr_ah ? FPST_AH : FPST_A64, a->q, |
69 | gen_helper_gvec_bfmlal); | 69 | gen_helper_gvec_bfmlal); |
70 | } | 70 | } |
71 | return true; | 71 | return true; |
72 | @@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) | 72 | @@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) |
73 | if (fp_access_check(s)) { | 73 | if (fp_access_check(s)) { |
74 | /* Q bit selects BFMLALB vs BFMLALT. */ | 74 | /* Q bit selects BFMLALB vs BFMLALT. */ |
75 | gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, | 75 | gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, |
76 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_A64, | 76 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_A64, |
77 | + s->fpcr_ah ? FPST_AH : FPST_A64, | 77 | + s->fpcr_ah ? FPST_AH : FPST_A64, |
78 | (a->idx << 1) | a->q, | 78 | (a->idx << 1) | a->q, |
79 | gen_helper_gvec_bfmlal_idx); | 79 | gen_helper_gvec_bfmlal_idx); |
80 | } | 80 | } |
81 | @@ -XXX,XX +XXX,XX @@ TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) | 81 | @@ -XXX,XX +XXX,XX @@ TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) |
82 | 82 | ||
83 | static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a) | 83 | static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a) |
84 | { | 84 | { |
85 | - ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_FPCR_AH : FPST_A64; | 85 | - ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_FPCR_AH : FPST_A64; |
86 | + ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64; | 86 | + ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64; |
87 | TCGv_i32 t32; | 87 | TCGv_i32 t32; |
88 | int check; | 88 | int check; |
89 | 89 | ||
90 | @@ -XXX,XX +XXX,XX @@ static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n) | 90 | @@ -XXX,XX +XXX,XX @@ static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n) |
91 | 91 | ||
92 | static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n) | 92 | static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n) |
93 | { | 93 | { |
94 | - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_AH); | 94 | - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_AH); |
95 | + TCGv_ptr fpst = fpstatus_ptr(FPST_AH); | 95 | + TCGv_ptr fpst = fpstatus_ptr(FPST_AH); |
96 | TCGv_i32 tmp = tcg_temp_new_i32(); | 96 | TCGv_i32 tmp = tcg_temp_new_i32(); |
97 | gen_helper_bfcvt_pair(tmp, n, fpst); | 97 | gen_helper_bfcvt_pair(tmp, n, fpst); |
98 | tcg_gen_extu_i32_i64(d, tmp); | 98 | tcg_gen_extu_i32_i64(d, tmp); |
99 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | 99 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c |
100 | index XXXXXXX..XXXXXXX 100644 | 100 | index XXXXXXX..XXXXXXX 100644 |
101 | --- a/target/arm/tcg/translate-sve.c | 101 | --- a/target/arm/tcg/translate-sve.c |
102 | +++ b/target/arm/tcg/translate-sve.c | 102 | +++ b/target/arm/tcg/translate-sve.c |
103 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, | 103 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, |
104 | 104 | ||
105 | TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, | 105 | TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, |
106 | gen_helper_sve_bfcvt, a, 0, | 106 | gen_helper_sve_bfcvt, a, 0, |
107 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_A64) | 107 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_A64) |
108 | + s->fpcr_ah ? FPST_AH : FPST_A64) | 108 | + s->fpcr_ah ? FPST_AH : FPST_A64) |
109 | 109 | ||
110 | TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, | 110 | TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, |
111 | gen_helper_sve_fcvt_dh, a, 0, FPST_A64) | 111 | gen_helper_sve_fcvt_dh, a, 0, FPST_A64) |
112 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, | 112 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, |
113 | 113 | ||
114 | TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, | 114 | TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, |
115 | gen_helper_sve_bfcvtnt, a, 0, | 115 | gen_helper_sve_bfcvtnt, a, 0, |
116 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_A64) | 116 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_A64) |
117 | + s->fpcr_ah ? FPST_AH : FPST_A64) | 117 | + s->fpcr_ah ? FPST_AH : FPST_A64) |
118 | 118 | ||
119 | TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, | 119 | TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, |
120 | gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64) | 120 | gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64) |
121 | @@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) | 121 | @@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) |
122 | { | 122 | { |
123 | return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, | 123 | return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, |
124 | a->rd, a->rn, a->rm, a->ra, sel, | 124 | a->rd, a->rn, a->rm, a->ra, sel, |
125 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_A64); | 125 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_A64); |
126 | + s->fpcr_ah ? FPST_AH : FPST_A64); | 126 | + s->fpcr_ah ? FPST_AH : FPST_A64); |
127 | } | 127 | } |
128 | 128 | ||
129 | TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) | 129 | TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) |
130 | @@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) | 130 | @@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) |
131 | return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, | 131 | return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, |
132 | a->rd, a->rn, a->rm, a->ra, | 132 | a->rd, a->rn, a->rm, a->ra, |
133 | (a->index << 1) | sel, | 133 | (a->index << 1) | sel, |
134 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_A64); | 134 | - s->fpcr_ah ? FPST_FPCR_AH : FPST_A64); |
135 | + s->fpcr_ah ? FPST_AH : FPST_A64); | 135 | + s->fpcr_ah ? FPST_AH : FPST_A64); |
136 | } | 136 | } |
137 | 137 | ||
138 | TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) | 138 | TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) |
139 | -- | 139 | -- |
140 | 2.43.0 | 140 | 2.43.0 | diff view generated by jsdifflib |
1 | Move ARMFPStatusFlavour to cpu.h with which to index | 1 | Move ARMFPStatusFlavour to cpu.h with which to index |
---|---|---|---|
2 | this array. For now, place the array in an anonymous | 2 | this array. For now, place the array in an anonymous |
3 | union with the existing structures. Adjust the order | 3 | union with the existing structures. Adjust the order |
4 | of the existing structures to match the enum. | 4 | of the existing structures to match the enum. |
5 | 5 | ||
6 | Simplify fpstatus_ptr() using the new array. | 6 | Simplify fpstatus_ptr() using the new array. |
7 | 7 | ||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
9 | --- | 9 | --- |
10 | target/arm/cpu.h | 119 +++++++++++++++++++++---------------- | 10 | target/arm/cpu.h | 119 +++++++++++++++++++++---------------- |
11 | target/arm/tcg/translate.h | 64 +------------------- | 11 | target/arm/tcg/translate.h | 64 +------------------- |
12 | 2 files changed, 70 insertions(+), 113 deletions(-) | 12 | 2 files changed, 70 insertions(+), 113 deletions(-) |
13 | 13 | ||
14 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 14 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
15 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/arm/cpu.h | 16 | --- a/target/arm/cpu.h |
17 | +++ b/target/arm/cpu.h | 17 | +++ b/target/arm/cpu.h |
18 | @@ -XXX,XX +XXX,XX @@ typedef struct ARMMMUFaultInfo ARMMMUFaultInfo; | 18 | @@ -XXX,XX +XXX,XX @@ typedef struct ARMMMUFaultInfo ARMMMUFaultInfo; |
19 | 19 | ||
20 | typedef struct NVICState NVICState; | 20 | typedef struct NVICState NVICState; |
21 | 21 | ||
22 | +/* | 22 | +/* |
23 | + * Enum for indexing vfp.fp_status[]. | 23 | + * Enum for indexing vfp.fp_status[]. |
24 | + * | 24 | + * |
25 | + * FPST_A32: is the "normal" fp status for AArch32 insns | 25 | + * FPST_A32: is the "normal" fp status for AArch32 insns |
26 | + * FPST_A64: is the "normal" fp status for AArch64 insns | 26 | + * FPST_A64: is the "normal" fp status for AArch64 insns |
27 | + * FPST_A32_F16: used for AArch32 half-precision calculations | 27 | + * FPST_A32_F16: used for AArch32 half-precision calculations |
28 | + * FPST_A64_F16: used for AArch64 half-precision calculations | 28 | + * FPST_A64_F16: used for AArch64 half-precision calculations |
29 | + * FPST_STD: the ARM "Standard FPSCR Value" | 29 | + * FPST_STD: the ARM "Standard FPSCR Value" |
30 | + * FPST_STD_F16: used for half-precision | 30 | + * FPST_STD_F16: used for half-precision |
31 | + * calculations with the ARM "Standard FPSCR Value" | 31 | + * calculations with the ARM "Standard FPSCR Value" |
32 | + * FPST_AH: used for the A64 insns which change behaviour | 32 | + * FPST_AH: used for the A64 insns which change behaviour |
33 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | 33 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, |
34 | + * and the reciprocal and square root estimate/step insns) | 34 | + * and the reciprocal and square root estimate/step insns) |
35 | + * FPST_AH_F16: used for the A64 insns which change behaviour | 35 | + * FPST_AH_F16: used for the A64 insns which change behaviour |
36 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | 36 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, |
37 | + * and the reciprocal and square root estimate/step insns); | 37 | + * and the reciprocal and square root estimate/step insns); |
38 | + * for half-precision | 38 | + * for half-precision |
39 | + * | 39 | + * |
40 | + * Half-precision operations are governed by a separate | 40 | + * Half-precision operations are governed by a separate |
41 | + * flush-to-zero control bit in FPSCR:FZ16. We pass a separate | 41 | + * flush-to-zero control bit in FPSCR:FZ16. We pass a separate |
42 | + * status structure to control this. | 42 | + * status structure to control this. |
43 | + * | 43 | + * |
44 | + * The "Standard FPSCR", ie default-NaN, flush-to-zero, | 44 | + * The "Standard FPSCR", ie default-NaN, flush-to-zero, |
45 | + * round-to-nearest and is used by any operations (generally | 45 | + * round-to-nearest and is used by any operations (generally |
46 | + * Neon) which the architecture defines as controlled by the | 46 | + * Neon) which the architecture defines as controlled by the |
47 | + * standard FPSCR value rather than the FPSCR. | 47 | + * standard FPSCR value rather than the FPSCR. |
48 | + * | 48 | + * |
49 | + * The "standard FPSCR but for fp16 ops" is needed because | 49 | + * The "standard FPSCR but for fp16 ops" is needed because |
50 | + * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | 50 | + * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than |
51 | + * using a fixed value for it. | 51 | + * using a fixed value for it. |
52 | + * | 52 | + * |
53 | + * The ah_fp_status is needed because some insns have different | 53 | + * The ah_fp_status is needed because some insns have different |
54 | + * behaviour when FPCR.AH == 1: they don't update cumulative | 54 | + * behaviour when FPCR.AH == 1: they don't update cumulative |
55 | + * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | 55 | + * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and |
56 | + * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | 56 | + * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, |
57 | + * which means we need an ah_fp_status_f16 as well. | 57 | + * which means we need an ah_fp_status_f16 as well. |
58 | + * | 58 | + * |
59 | + * To avoid having to transfer exception bits around, we simply | 59 | + * To avoid having to transfer exception bits around, we simply |
60 | + * say that the FPSCR cumulative exception flags are the logical | 60 | + * say that the FPSCR cumulative exception flags are the logical |
61 | + * OR of the flags in the four fp statuses. This relies on the | 61 | + * OR of the flags in the four fp statuses. This relies on the |
62 | + * only thing which needs to read the exception flags being | 62 | + * only thing which needs to read the exception flags being |
63 | + * an explicit FPSCR read. | 63 | + * an explicit FPSCR read. |
64 | + */ | 64 | + */ |
65 | +typedef enum ARMFPStatusFlavour { | 65 | +typedef enum ARMFPStatusFlavour { |
66 | + FPST_A32, | 66 | + FPST_A32, |
67 | + FPST_A64, | 67 | + FPST_A64, |
68 | + FPST_A32_F16, | 68 | + FPST_A32_F16, |
69 | + FPST_A64_F16, | 69 | + FPST_A64_F16, |
70 | + FPST_AH, | 70 | + FPST_AH, |
71 | + FPST_AH_F16, | 71 | + FPST_AH_F16, |
72 | + FPST_STD, | 72 | + FPST_STD, |
73 | + FPST_STD_F16, | 73 | + FPST_STD_F16, |
74 | +} ARMFPStatusFlavour; | 74 | +} ARMFPStatusFlavour; |
75 | +#define FPST_COUNT 8 | 75 | +#define FPST_COUNT 8 |
76 | + | 76 | + |
77 | typedef struct CPUArchState { | 77 | typedef struct CPUArchState { |
78 | /* Regs for current mode. */ | 78 | /* Regs for current mode. */ |
79 | uint32_t regs[16]; | 79 | uint32_t regs[16]; |
80 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | 80 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
81 | /* Scratch space for aa32 neon expansion. */ | 81 | /* Scratch space for aa32 neon expansion. */ |
82 | uint32_t scratch[8]; | 82 | uint32_t scratch[8]; |
83 | 83 | ||
84 | - /* There are a number of distinct float control structures: | 84 | - /* There are a number of distinct float control structures: |
85 | - * | 85 | - * |
86 | - * fp_status_a32: is the "normal" fp status for AArch32 insns | 86 | - * fp_status_a32: is the "normal" fp status for AArch32 insns |
87 | - * fp_status_a64: is the "normal" fp status for AArch64 insns | 87 | - * fp_status_a64: is the "normal" fp status for AArch64 insns |
88 | - * fp_status_fp16_a32: used for AArch32 half-precision calculations | 88 | - * fp_status_fp16_a32: used for AArch32 half-precision calculations |
89 | - * fp_status_fp16_a64: used for AArch64 half-precision calculations | 89 | - * fp_status_fp16_a64: used for AArch64 half-precision calculations |
90 | - * standard_fp_status : the ARM "Standard FPSCR Value" | 90 | - * standard_fp_status : the ARM "Standard FPSCR Value" |
91 | - * standard_fp_status_fp16 : used for half-precision | 91 | - * standard_fp_status_fp16 : used for half-precision |
92 | - * calculations with the ARM "Standard FPSCR Value" | 92 | - * calculations with the ARM "Standard FPSCR Value" |
93 | - * ah_fp_status: used for the A64 insns which change behaviour | 93 | - * ah_fp_status: used for the A64 insns which change behaviour |
94 | - * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | 94 | - * when FPCR.AH == 1 (bfloat16 conversions and multiplies, |
95 | - * and the reciprocal and square root estimate/step insns) | 95 | - * and the reciprocal and square root estimate/step insns) |
96 | - * ah_fp_status_f16: used for the A64 insns which change behaviour | 96 | - * ah_fp_status_f16: used for the A64 insns which change behaviour |
97 | - * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | 97 | - * when FPCR.AH == 1 (bfloat16 conversions and multiplies, |
98 | - * and the reciprocal and square root estimate/step insns); | 98 | - * and the reciprocal and square root estimate/step insns); |
99 | - * for half-precision | 99 | - * for half-precision |
100 | - * | 100 | - * |
101 | - * Half-precision operations are governed by a separate | 101 | - * Half-precision operations are governed by a separate |
102 | - * flush-to-zero control bit in FPSCR:FZ16. We pass a separate | 102 | - * flush-to-zero control bit in FPSCR:FZ16. We pass a separate |
103 | - * status structure to control this. | 103 | - * status structure to control this. |
104 | - * | 104 | - * |
105 | - * The "Standard FPSCR", ie default-NaN, flush-to-zero, | 105 | - * The "Standard FPSCR", ie default-NaN, flush-to-zero, |
106 | - * round-to-nearest and is used by any operations (generally | 106 | - * round-to-nearest and is used by any operations (generally |
107 | - * Neon) which the architecture defines as controlled by the | 107 | - * Neon) which the architecture defines as controlled by the |
108 | - * standard FPSCR value rather than the FPSCR. | 108 | - * standard FPSCR value rather than the FPSCR. |
109 | - * | 109 | - * |
110 | - * The "standard FPSCR but for fp16 ops" is needed because | 110 | - * The "standard FPSCR but for fp16 ops" is needed because |
111 | - * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | 111 | - * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than |
112 | - * using a fixed value for it. | 112 | - * using a fixed value for it. |
113 | - * | 113 | - * |
114 | - * The ah_fp_status is needed because some insns have different | 114 | - * The ah_fp_status is needed because some insns have different |
115 | - * behaviour when FPCR.AH == 1: they don't update cumulative | 115 | - * behaviour when FPCR.AH == 1: they don't update cumulative |
116 | - * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | 116 | - * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and |
117 | - * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | 117 | - * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, |
118 | - * which means we need an ah_fp_status_f16 as well. | 118 | - * which means we need an ah_fp_status_f16 as well. |
119 | - * | 119 | - * |
120 | - * To avoid having to transfer exception bits around, we simply | 120 | - * To avoid having to transfer exception bits around, we simply |
121 | - * say that the FPSCR cumulative exception flags are the logical | 121 | - * say that the FPSCR cumulative exception flags are the logical |
122 | - * OR of the flags in the four fp statuses. This relies on the | 122 | - * OR of the flags in the four fp statuses. This relies on the |
123 | - * only thing which needs to read the exception flags being | 123 | - * only thing which needs to read the exception flags being |
124 | - * an explicit FPSCR read. | 124 | - * an explicit FPSCR read. |
125 | - */ | 125 | - */ |
126 | - float_status fp_status_a32; | 126 | - float_status fp_status_a32; |
127 | - float_status fp_status_a64; | 127 | - float_status fp_status_a64; |
128 | - float_status fp_status_f16_a32; | 128 | - float_status fp_status_f16_a32; |
129 | - float_status fp_status_f16_a64; | 129 | - float_status fp_status_f16_a64; |
130 | - float_status standard_fp_status; | 130 | - float_status standard_fp_status; |
131 | - float_status standard_fp_status_f16; | 131 | - float_status standard_fp_status_f16; |
132 | - float_status ah_fp_status; | 132 | - float_status ah_fp_status; |
133 | - float_status ah_fp_status_f16; | 133 | - float_status ah_fp_status_f16; |
134 | + /* There are a number of distinct float control structures. */ | 134 | + /* There are a number of distinct float control structures. */ |
135 | + union { | 135 | + union { |
136 | + float_status fp_status[FPST_COUNT]; | 136 | + float_status fp_status[FPST_COUNT]; |
137 | + struct { | 137 | + struct { |
138 | + float_status fp_status_a32; | 138 | + float_status fp_status_a32; |
139 | + float_status fp_status_a64; | 139 | + float_status fp_status_a64; |
140 | + float_status fp_status_f16_a32; | 140 | + float_status fp_status_f16_a32; |
141 | + float_status fp_status_f16_a64; | 141 | + float_status fp_status_f16_a64; |
142 | + float_status ah_fp_status; | 142 | + float_status ah_fp_status; |
143 | + float_status ah_fp_status_f16; | 143 | + float_status ah_fp_status_f16; |
144 | + float_status standard_fp_status; | 144 | + float_status standard_fp_status; |
145 | + float_status standard_fp_status_f16; | 145 | + float_status standard_fp_status_f16; |
146 | + }; | 146 | + }; |
147 | + }; | 147 | + }; |
148 | 148 | ||
149 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ | 149 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ |
150 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ | 150 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ |
151 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | 151 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h |
152 | index XXXXXXX..XXXXXXX 100644 | 152 | index XXXXXXX..XXXXXXX 100644 |
153 | --- a/target/arm/tcg/translate.h | 153 | --- a/target/arm/tcg/translate.h |
154 | +++ b/target/arm/tcg/translate.h | 154 | +++ b/target/arm/tcg/translate.h |
155 | @@ -XXX,XX +XXX,XX @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb) | 155 | @@ -XXX,XX +XXX,XX @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb) |
156 | return (CPUARMTBFlags){ tb->flags, tb->cs_base }; | 156 | return (CPUARMTBFlags){ tb->flags, tb->cs_base }; |
157 | } | 157 | } |
158 | 158 | ||
159 | -/* | 159 | -/* |
160 | - * Enum for argument to fpstatus_ptr(). | 160 | - * Enum for argument to fpstatus_ptr(). |
161 | - */ | 161 | - */ |
162 | -typedef enum ARMFPStatusFlavour { | 162 | -typedef enum ARMFPStatusFlavour { |
163 | - FPST_A32, | 163 | - FPST_A32, |
164 | - FPST_A64, | 164 | - FPST_A64, |
165 | - FPST_A32_F16, | 165 | - FPST_A32_F16, |
166 | - FPST_A64_F16, | 166 | - FPST_A64_F16, |
167 | - FPST_AH, | 167 | - FPST_AH, |
168 | - FPST_AH_F16, | 168 | - FPST_AH_F16, |
169 | - FPST_STD, | 169 | - FPST_STD, |
170 | - FPST_STD_F16, | 170 | - FPST_STD_F16, |
171 | -} ARMFPStatusFlavour; | 171 | -} ARMFPStatusFlavour; |
172 | - | 172 | - |
173 | /** | 173 | /** |
174 | * fpstatus_ptr: return TCGv_ptr to the specified fp_status field | 174 | * fpstatus_ptr: return TCGv_ptr to the specified fp_status field |
175 | * | 175 | * |
176 | * We have multiple softfloat float_status fields in the Arm CPU state struct | 176 | * We have multiple softfloat float_status fields in the Arm CPU state struct |
177 | * (see the comment in cpu.h for details). Return a TCGv_ptr which has | 177 | * (see the comment in cpu.h for details). Return a TCGv_ptr which has |
178 | * been set up to point to the requested field in the CPU state struct. | 178 | * been set up to point to the requested field in the CPU state struct. |
179 | - * The options are: | 179 | - * The options are: |
180 | - * | 180 | - * |
181 | - * FPST_A32 | 181 | - * FPST_A32 |
182 | - * for AArch32 non-FP16 operations controlled by the FPCR | 182 | - * for AArch32 non-FP16 operations controlled by the FPCR |
183 | - * FPST_A64 | 183 | - * FPST_A64 |
184 | - * for AArch64 non-FP16 operations controlled by the FPCR | 184 | - * for AArch64 non-FP16 operations controlled by the FPCR |
185 | - * FPST_A32_F16 | 185 | - * FPST_A32_F16 |
186 | - * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used | 186 | - * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used |
187 | - * FPST_A64_F16 | 187 | - * FPST_A64_F16 |
188 | - * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used | 188 | - * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used |
189 | - * FPST_AH: | 189 | - * FPST_AH: |
190 | - * for AArch64 operations which change behaviour when AH=1 (specifically, | 190 | - * for AArch64 operations which change behaviour when AH=1 (specifically, |
191 | - * bfloat16 conversions and multiplies, and the reciprocal and square root | 191 | - * bfloat16 conversions and multiplies, and the reciprocal and square root |
192 | - * estimate/step insns) | 192 | - * estimate/step insns) |
193 | - * FPST_AH_F16: | 193 | - * FPST_AH_F16: |
194 | - * ditto, but for half-precision operations | 194 | - * ditto, but for half-precision operations |
195 | - * FPST_STD | 195 | - * FPST_STD |
196 | - * for A32/T32 Neon operations using the "standard FPSCR value" | 196 | - * for A32/T32 Neon operations using the "standard FPSCR value" |
197 | - * FPST_STD_F16 | 197 | - * FPST_STD_F16 |
198 | - * as FPST_STD, but where FPCR.FZ16 is to be used | 198 | - * as FPST_STD, but where FPCR.FZ16 is to be used |
199 | */ | 199 | */ |
200 | static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) | 200 | static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) |
201 | { | 201 | { |
202 | TCGv_ptr statusptr = tcg_temp_new_ptr(); | 202 | TCGv_ptr statusptr = tcg_temp_new_ptr(); |
203 | - int offset; | 203 | - int offset; |
204 | + int offset = offsetof(CPUARMState, vfp.fp_status[flavour]); | 204 | + int offset = offsetof(CPUARMState, vfp.fp_status[flavour]); |
205 | 205 | ||
206 | - switch (flavour) { | 206 | - switch (flavour) { |
207 | - case FPST_A32: | 207 | - case FPST_A32: |
208 | - offset = offsetof(CPUARMState, vfp.fp_status_a32); | 208 | - offset = offsetof(CPUARMState, vfp.fp_status_a32); |
209 | - break; | 209 | - break; |
210 | - case FPST_A64: | 210 | - case FPST_A64: |
211 | - offset = offsetof(CPUARMState, vfp.fp_status_a64); | 211 | - offset = offsetof(CPUARMState, vfp.fp_status_a64); |
212 | - break; | 212 | - break; |
213 | - case FPST_A32_F16: | 213 | - case FPST_A32_F16: |
214 | - offset = offsetof(CPUARMState, vfp.fp_status_f16_a32); | 214 | - offset = offsetof(CPUARMState, vfp.fp_status_f16_a32); |
215 | - break; | 215 | - break; |
216 | - case FPST_A64_F16: | 216 | - case FPST_A64_F16: |
217 | - offset = offsetof(CPUARMState, vfp.fp_status_f16_a64); | 217 | - offset = offsetof(CPUARMState, vfp.fp_status_f16_a64); |
218 | - break; | 218 | - break; |
219 | - case FPST_AH: | 219 | - case FPST_AH: |
220 | - offset = offsetof(CPUARMState, vfp.ah_fp_status); | 220 | - offset = offsetof(CPUARMState, vfp.ah_fp_status); |
221 | - break; | 221 | - break; |
222 | - case FPST_AH_F16: | 222 | - case FPST_AH_F16: |
223 | - offset = offsetof(CPUARMState, vfp.ah_fp_status_f16); | 223 | - offset = offsetof(CPUARMState, vfp.ah_fp_status_f16); |
224 | - break; | 224 | - break; |
225 | - case FPST_STD: | 225 | - case FPST_STD: |
226 | - offset = offsetof(CPUARMState, vfp.standard_fp_status); | 226 | - offset = offsetof(CPUARMState, vfp.standard_fp_status); |
227 | - break; | 227 | - break; |
228 | - case FPST_STD_F16: | 228 | - case FPST_STD_F16: |
229 | - offset = offsetof(CPUARMState, vfp.standard_fp_status_f16); | 229 | - offset = offsetof(CPUARMState, vfp.standard_fp_status_f16); |
230 | - break; | 230 | - break; |
231 | - default: | 231 | - default: |
232 | - g_assert_not_reached(); | 232 | - g_assert_not_reached(); |
233 | - } | 233 | - } |
234 | tcg_gen_addi_ptr(statusptr, tcg_env, offset); | 234 | tcg_gen_addi_ptr(statusptr, tcg_env, offset); |
235 | return statusptr; | 235 | return statusptr; |
236 | } | 236 | } |
237 | -- | 237 | -- |
238 | 2.43.0 | 238 | 2.43.0 | diff view generated by jsdifflib |
1 | Replace with fp_status[FPST_STD_F16]. | 1 | Replace with fp_status[FPST_STD_F16]. |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | --- | 4 | --- |
5 | target/arm/cpu.h | 1 - | 5 | target/arm/cpu.h | 1 - |
6 | target/arm/cpu.c | 4 ++-- | 6 | target/arm/cpu.c | 4 ++-- |
7 | target/arm/tcg/mve_helper.c | 24 ++++++++++++------------ | 7 | target/arm/tcg/mve_helper.c | 24 ++++++++++++------------ |
8 | target/arm/vfp_helper.c | 8 ++++---- | 8 | target/arm/vfp_helper.c | 8 ++++---- |
9 | 4 files changed, 18 insertions(+), 19 deletions(-) | 9 | 4 files changed, 18 insertions(+), 19 deletions(-) |
10 | 10 | ||
11 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 11 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
12 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/target/arm/cpu.h | 13 | --- a/target/arm/cpu.h |
14 | +++ b/target/arm/cpu.h | 14 | +++ b/target/arm/cpu.h |
15 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | 15 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
16 | float_status ah_fp_status; | 16 | float_status ah_fp_status; |
17 | float_status ah_fp_status_f16; | 17 | float_status ah_fp_status_f16; |
18 | float_status standard_fp_status; | 18 | float_status standard_fp_status; |
19 | - float_status standard_fp_status_f16; | 19 | - float_status standard_fp_status_f16; |
20 | }; | 20 | }; |
21 | }; | 21 | }; |
22 | 22 | ||
23 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | 23 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
24 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
25 | --- a/target/arm/cpu.c | 25 | --- a/target/arm/cpu.c |
26 | +++ b/target/arm/cpu.c | 26 | +++ b/target/arm/cpu.c |
27 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | 27 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
28 | set_flush_to_zero(1, &env->vfp.standard_fp_status); | 28 | set_flush_to_zero(1, &env->vfp.standard_fp_status); |
29 | set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); | 29 | set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); |
30 | set_default_nan_mode(1, &env->vfp.standard_fp_status); | 30 | set_default_nan_mode(1, &env->vfp.standard_fp_status); |
31 | - set_default_nan_mode(1, &env->vfp.standard_fp_status_f16); | 31 | - set_default_nan_mode(1, &env->vfp.standard_fp_status_f16); |
32 | + set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); | 32 | + set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); |
33 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | 33 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); |
34 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | 34 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); |
35 | arm_set_default_fp_behaviours(&env->vfp.standard_fp_status); | 35 | arm_set_default_fp_behaviours(&env->vfp.standard_fp_status); |
36 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | 36 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); |
37 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | 37 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); |
38 | - arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16); | 38 | - arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16); |
39 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | 39 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); |
40 | arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | 40 | arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); |
41 | set_flush_to_zero(1, &env->vfp.ah_fp_status); | 41 | set_flush_to_zero(1, &env->vfp.ah_fp_status); |
42 | set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | 42 | set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); |
43 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c | 43 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c |
44 | index XXXXXXX..XXXXXXX 100644 | 44 | index XXXXXXX..XXXXXXX 100644 |
45 | --- a/target/arm/tcg/mve_helper.c | 45 | --- a/target/arm/tcg/mve_helper.c |
46 | +++ b/target/arm/tcg/mve_helper.c | 46 | +++ b/target/arm/tcg/mve_helper.c |
47 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) | 47 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) |
48 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | 48 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ |
49 | continue; \ | 49 | continue; \ |
50 | } \ | 50 | } \ |
51 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | 51 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ |
52 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 52 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
53 | &env->vfp.standard_fp_status; \ | 53 | &env->vfp.standard_fp_status; \ |
54 | if (!(mask & 1)) { \ | 54 | if (!(mask & 1)) { \ |
55 | /* We need the result but without updating flags */ \ | 55 | /* We need the result but without updating flags */ \ |
56 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) | 56 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) |
57 | r[e] = 0; \ | 57 | r[e] = 0; \ |
58 | continue; \ | 58 | continue; \ |
59 | } \ | 59 | } \ |
60 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | 60 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ |
61 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 61 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
62 | &env->vfp.standard_fp_status; \ | 62 | &env->vfp.standard_fp_status; \ |
63 | if (!(tm & 1)) { \ | 63 | if (!(tm & 1)) { \ |
64 | /* We need the result but without updating flags */ \ | 64 | /* We need the result but without updating flags */ \ |
65 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) | 65 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) |
66 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | 66 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ |
67 | continue; \ | 67 | continue; \ |
68 | } \ | 68 | } \ |
69 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | 69 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ |
70 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 70 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
71 | &env->vfp.standard_fp_status; \ | 71 | &env->vfp.standard_fp_status; \ |
72 | if (!(mask & 1)) { \ | 72 | if (!(mask & 1)) { \ |
73 | /* We need the result but without updating flags */ \ | 73 | /* We need the result but without updating flags */ \ |
74 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) | 74 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) |
75 | if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \ | 75 | if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \ |
76 | continue; \ | 76 | continue; \ |
77 | } \ | 77 | } \ |
78 | - fpst0 = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | 78 | - fpst0 = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ |
79 | + fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 79 | + fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
80 | &env->vfp.standard_fp_status; \ | 80 | &env->vfp.standard_fp_status; \ |
81 | fpst1 = fpst0; \ | 81 | fpst1 = fpst0; \ |
82 | if (!(mask & 1)) { \ | 82 | if (!(mask & 1)) { \ |
83 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) | 83 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) |
84 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | 84 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ |
85 | continue; \ | 85 | continue; \ |
86 | } \ | 86 | } \ |
87 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | 87 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ |
88 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 88 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
89 | &env->vfp.standard_fp_status; \ | 89 | &env->vfp.standard_fp_status; \ |
90 | if (!(mask & 1)) { \ | 90 | if (!(mask & 1)) { \ |
91 | /* We need the result but without updating flags */ \ | 91 | /* We need the result but without updating flags */ \ |
92 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) | 92 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) |
93 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | 93 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ |
94 | continue; \ | 94 | continue; \ |
95 | } \ | 95 | } \ |
96 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | 96 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ |
97 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 97 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
98 | &env->vfp.standard_fp_status; \ | 98 | &env->vfp.standard_fp_status; \ |
99 | if (!(mask & 1)) { \ | 99 | if (!(mask & 1)) { \ |
100 | /* We need the result but without updating flags */ \ | 100 | /* We need the result but without updating flags */ \ |
101 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) | 101 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) |
102 | TYPE *m = vm; \ | 102 | TYPE *m = vm; \ |
103 | TYPE ra = (TYPE)ra_in; \ | 103 | TYPE ra = (TYPE)ra_in; \ |
104 | float_status *fpst = (ESIZE == 2) ? \ | 104 | float_status *fpst = (ESIZE == 2) ? \ |
105 | - &env->vfp.standard_fp_status_f16 : \ | 105 | - &env->vfp.standard_fp_status_f16 : \ |
106 | + &env->vfp.fp_status[FPST_STD_F16] : \ | 106 | + &env->vfp.fp_status[FPST_STD_F16] : \ |
107 | &env->vfp.standard_fp_status; \ | 107 | &env->vfp.standard_fp_status; \ |
108 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | 108 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ |
109 | if (mask & 1) { \ | 109 | if (mask & 1) { \ |
110 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | 110 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) |
111 | if ((mask & emask) == 0) { \ | 111 | if ((mask & emask) == 0) { \ |
112 | continue; \ | 112 | continue; \ |
113 | } \ | 113 | } \ |
114 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | 114 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ |
115 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 115 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
116 | &env->vfp.standard_fp_status; \ | 116 | &env->vfp.standard_fp_status; \ |
117 | if (!(mask & (1 << (e * ESIZE)))) { \ | 117 | if (!(mask & (1 << (e * ESIZE)))) { \ |
118 | /* We need the result but without updating flags */ \ | 118 | /* We need the result but without updating flags */ \ |
119 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | 119 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) |
120 | if ((mask & emask) == 0) { \ | 120 | if ((mask & emask) == 0) { \ |
121 | continue; \ | 121 | continue; \ |
122 | } \ | 122 | } \ |
123 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | 123 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ |
124 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 124 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
125 | &env->vfp.standard_fp_status; \ | 125 | &env->vfp.standard_fp_status; \ |
126 | if (!(mask & (1 << (e * ESIZE)))) { \ | 126 | if (!(mask & (1 << (e * ESIZE)))) { \ |
127 | /* We need the result but without updating flags */ \ | 127 | /* We need the result but without updating flags */ \ |
128 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) | 128 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) |
129 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | 129 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ |
130 | continue; \ | 130 | continue; \ |
131 | } \ | 131 | } \ |
132 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | 132 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ |
133 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 133 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
134 | &env->vfp.standard_fp_status; \ | 134 | &env->vfp.standard_fp_status; \ |
135 | if (!(mask & 1)) { \ | 135 | if (!(mask & 1)) { \ |
136 | /* We need the result but without updating flags */ \ | 136 | /* We need the result but without updating flags */ \ |
137 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) | 137 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) |
138 | float_status *fpst; \ | 138 | float_status *fpst; \ |
139 | float_status scratch_fpst; \ | 139 | float_status scratch_fpst; \ |
140 | float_status *base_fpst = (ESIZE == 2) ? \ | 140 | float_status *base_fpst = (ESIZE == 2) ? \ |
141 | - &env->vfp.standard_fp_status_f16 : \ | 141 | - &env->vfp.standard_fp_status_f16 : \ |
142 | + &env->vfp.fp_status[FPST_STD_F16] : \ | 142 | + &env->vfp.fp_status[FPST_STD_F16] : \ |
143 | &env->vfp.standard_fp_status; \ | 143 | &env->vfp.standard_fp_status; \ |
144 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ | 144 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ |
145 | set_float_rounding_mode(rmode, base_fpst); \ | 145 | set_float_rounding_mode(rmode, base_fpst); \ |
146 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) | 146 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) |
147 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | 147 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ |
148 | continue; \ | 148 | continue; \ |
149 | } \ | 149 | } \ |
150 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | 150 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ |
151 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 151 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
152 | &env->vfp.standard_fp_status; \ | 152 | &env->vfp.standard_fp_status; \ |
153 | if (!(mask & 1)) { \ | 153 | if (!(mask & 1)) { \ |
154 | /* We need the result but without updating flags */ \ | 154 | /* We need the result but without updating flags */ \ |
155 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | 155 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
156 | index XXXXXXX..XXXXXXX 100644 | 156 | index XXXXXXX..XXXXXXX 100644 |
157 | --- a/target/arm/vfp_helper.c | 157 | --- a/target/arm/vfp_helper.c |
158 | +++ b/target/arm/vfp_helper.c | 158 | +++ b/target/arm/vfp_helper.c |
159 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | 159 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) |
160 | /* FZ16 does not generate an input denormal exception. */ | 160 | /* FZ16 does not generate an input denormal exception. */ |
161 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | 161 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) |
162 | & ~float_flag_input_denormal_flushed); | 162 | & ~float_flag_input_denormal_flushed); |
163 | - a32_flags |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) | 163 | - a32_flags |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) |
164 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) | 164 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) |
165 | & ~float_flag_input_denormal_flushed); | 165 | & ~float_flag_input_denormal_flushed); |
166 | 166 | ||
167 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | 167 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); |
168 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | 168 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) |
169 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | 169 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); |
170 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | 170 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); |
171 | set_float_exception_flags(0, &env->vfp.standard_fp_status); | 171 | set_float_exception_flags(0, &env->vfp.standard_fp_status); |
172 | - set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); | 172 | - set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); |
173 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | 173 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); |
174 | set_float_exception_flags(0, &env->vfp.ah_fp_status); | 174 | set_float_exception_flags(0, &env->vfp.ah_fp_status); |
175 | set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | 175 | set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); |
176 | } | 176 | } |
177 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | 177 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
178 | bool ftz_enabled = val & FPCR_FZ16; | 178 | bool ftz_enabled = val & FPCR_FZ16; |
179 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | 179 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
180 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | 180 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); |
181 | - set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | 181 | - set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); |
182 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | 182 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); |
183 | set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | 183 | set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); |
184 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | 184 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
185 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | 185 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); |
186 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | 186 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); |
187 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | 187 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); |
188 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | 188 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); |
189 | } | 189 | } |
190 | if (changed & FPCR_FZ) { | 190 | if (changed & FPCR_FZ) { |
191 | -- | 191 | -- |
192 | 2.43.0 | 192 | 2.43.0 | diff view generated by jsdifflib |
1 | Replace with fp_status[FPST_STD]. | 1 | Replace with fp_status[FPST_STD]. |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | --- | 4 | --- |
5 | target/arm/cpu.h | 1 - | 5 | target/arm/cpu.h | 1 - |
6 | target/arm/cpu.c | 8 ++++---- | 6 | target/arm/cpu.c | 8 ++++---- |
7 | target/arm/tcg/mve_helper.c | 28 ++++++++++++++-------------- | 7 | target/arm/tcg/mve_helper.c | 28 ++++++++++++++-------------- |
8 | target/arm/tcg/vec_helper.c | 4 ++-- | 8 | target/arm/tcg/vec_helper.c | 4 ++-- |
9 | target/arm/vfp_helper.c | 4 ++-- | 9 | target/arm/vfp_helper.c | 4 ++-- |
10 | 5 files changed, 22 insertions(+), 23 deletions(-) | 10 | 5 files changed, 22 insertions(+), 23 deletions(-) |
11 | 11 | ||
12 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 12 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
13 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/target/arm/cpu.h | 14 | --- a/target/arm/cpu.h |
15 | +++ b/target/arm/cpu.h | 15 | +++ b/target/arm/cpu.h |
16 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | 16 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
17 | float_status fp_status_f16_a64; | 17 | float_status fp_status_f16_a64; |
18 | float_status ah_fp_status; | 18 | float_status ah_fp_status; |
19 | float_status ah_fp_status_f16; | 19 | float_status ah_fp_status_f16; |
20 | - float_status standard_fp_status; | 20 | - float_status standard_fp_status; |
21 | }; | 21 | }; |
22 | }; | 22 | }; |
23 | 23 | ||
24 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | 24 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
25 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/target/arm/cpu.c | 26 | --- a/target/arm/cpu.c |
27 | +++ b/target/arm/cpu.c | 27 | +++ b/target/arm/cpu.c |
28 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | 28 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
29 | env->sau.ctrl = 0; | 29 | env->sau.ctrl = 0; |
30 | } | 30 | } |
31 | 31 | ||
32 | - set_flush_to_zero(1, &env->vfp.standard_fp_status); | 32 | - set_flush_to_zero(1, &env->vfp.standard_fp_status); |
33 | - set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); | 33 | - set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); |
34 | - set_default_nan_mode(1, &env->vfp.standard_fp_status); | 34 | - set_default_nan_mode(1, &env->vfp.standard_fp_status); |
35 | + set_flush_to_zero(1, &env->vfp.fp_status[FPST_STD]); | 35 | + set_flush_to_zero(1, &env->vfp.fp_status[FPST_STD]); |
36 | + set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]); | 36 | + set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]); |
37 | + set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); | 37 | + set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); |
38 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); | 38 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); |
39 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | 39 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); |
40 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | 40 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); |
41 | - arm_set_default_fp_behaviours(&env->vfp.standard_fp_status); | 41 | - arm_set_default_fp_behaviours(&env->vfp.standard_fp_status); |
42 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | 42 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); |
43 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | 43 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); |
44 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | 44 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); |
45 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | 45 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); |
46 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c | 46 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c |
47 | index XXXXXXX..XXXXXXX 100644 | 47 | index XXXXXXX..XXXXXXX 100644 |
48 | --- a/target/arm/tcg/mve_helper.c | 48 | --- a/target/arm/tcg/mve_helper.c |
49 | +++ b/target/arm/tcg/mve_helper.c | 49 | +++ b/target/arm/tcg/mve_helper.c |
50 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) | 50 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) |
51 | continue; \ | 51 | continue; \ |
52 | } \ | 52 | } \ |
53 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 53 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
54 | - &env->vfp.standard_fp_status; \ | 54 | - &env->vfp.standard_fp_status; \ |
55 | + &env->vfp.fp_status[FPST_STD]; \ | 55 | + &env->vfp.fp_status[FPST_STD]; \ |
56 | if (!(mask & 1)) { \ | 56 | if (!(mask & 1)) { \ |
57 | /* We need the result but without updating flags */ \ | 57 | /* We need the result but without updating flags */ \ |
58 | scratch_fpst = *fpst; \ | 58 | scratch_fpst = *fpst; \ |
59 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) | 59 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) |
60 | continue; \ | 60 | continue; \ |
61 | } \ | 61 | } \ |
62 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 62 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
63 | - &env->vfp.standard_fp_status; \ | 63 | - &env->vfp.standard_fp_status; \ |
64 | + &env->vfp.fp_status[FPST_STD]; \ | 64 | + &env->vfp.fp_status[FPST_STD]; \ |
65 | if (!(tm & 1)) { \ | 65 | if (!(tm & 1)) { \ |
66 | /* We need the result but without updating flags */ \ | 66 | /* We need the result but without updating flags */ \ |
67 | scratch_fpst = *fpst; \ | 67 | scratch_fpst = *fpst; \ |
68 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) | 68 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) |
69 | continue; \ | 69 | continue; \ |
70 | } \ | 70 | } \ |
71 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 71 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
72 | - &env->vfp.standard_fp_status; \ | 72 | - &env->vfp.standard_fp_status; \ |
73 | + &env->vfp.fp_status[FPST_STD]; \ | 73 | + &env->vfp.fp_status[FPST_STD]; \ |
74 | if (!(mask & 1)) { \ | 74 | if (!(mask & 1)) { \ |
75 | /* We need the result but without updating flags */ \ | 75 | /* We need the result but without updating flags */ \ |
76 | scratch_fpst = *fpst; \ | 76 | scratch_fpst = *fpst; \ |
77 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) | 77 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) |
78 | continue; \ | 78 | continue; \ |
79 | } \ | 79 | } \ |
80 | fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 80 | fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
81 | - &env->vfp.standard_fp_status; \ | 81 | - &env->vfp.standard_fp_status; \ |
82 | + &env->vfp.fp_status[FPST_STD]; \ | 82 | + &env->vfp.fp_status[FPST_STD]; \ |
83 | fpst1 = fpst0; \ | 83 | fpst1 = fpst0; \ |
84 | if (!(mask & 1)) { \ | 84 | if (!(mask & 1)) { \ |
85 | scratch_fpst = *fpst0; \ | 85 | scratch_fpst = *fpst0; \ |
86 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) | 86 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) |
87 | continue; \ | 87 | continue; \ |
88 | } \ | 88 | } \ |
89 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 89 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
90 | - &env->vfp.standard_fp_status; \ | 90 | - &env->vfp.standard_fp_status; \ |
91 | + &env->vfp.fp_status[FPST_STD]; \ | 91 | + &env->vfp.fp_status[FPST_STD]; \ |
92 | if (!(mask & 1)) { \ | 92 | if (!(mask & 1)) { \ |
93 | /* We need the result but without updating flags */ \ | 93 | /* We need the result but without updating flags */ \ |
94 | scratch_fpst = *fpst; \ | 94 | scratch_fpst = *fpst; \ |
95 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) | 95 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) |
96 | continue; \ | 96 | continue; \ |
97 | } \ | 97 | } \ |
98 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 98 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
99 | - &env->vfp.standard_fp_status; \ | 99 | - &env->vfp.standard_fp_status; \ |
100 | + &env->vfp.fp_status[FPST_STD]; \ | 100 | + &env->vfp.fp_status[FPST_STD]; \ |
101 | if (!(mask & 1)) { \ | 101 | if (!(mask & 1)) { \ |
102 | /* We need the result but without updating flags */ \ | 102 | /* We need the result but without updating flags */ \ |
103 | scratch_fpst = *fpst; \ | 103 | scratch_fpst = *fpst; \ |
104 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) | 104 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) |
105 | TYPE ra = (TYPE)ra_in; \ | 105 | TYPE ra = (TYPE)ra_in; \ |
106 | float_status *fpst = (ESIZE == 2) ? \ | 106 | float_status *fpst = (ESIZE == 2) ? \ |
107 | &env->vfp.fp_status[FPST_STD_F16] : \ | 107 | &env->vfp.fp_status[FPST_STD_F16] : \ |
108 | - &env->vfp.standard_fp_status; \ | 108 | - &env->vfp.standard_fp_status; \ |
109 | + &env->vfp.fp_status[FPST_STD]; \ | 109 | + &env->vfp.fp_status[FPST_STD]; \ |
110 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | 110 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ |
111 | if (mask & 1) { \ | 111 | if (mask & 1) { \ |
112 | TYPE v = m[H##ESIZE(e)]; \ | 112 | TYPE v = m[H##ESIZE(e)]; \ |
113 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | 113 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) |
114 | continue; \ | 114 | continue; \ |
115 | } \ | 115 | } \ |
116 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 116 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
117 | - &env->vfp.standard_fp_status; \ | 117 | - &env->vfp.standard_fp_status; \ |
118 | + &env->vfp.fp_status[FPST_STD]; \ | 118 | + &env->vfp.fp_status[FPST_STD]; \ |
119 | if (!(mask & (1 << (e * ESIZE)))) { \ | 119 | if (!(mask & (1 << (e * ESIZE)))) { \ |
120 | /* We need the result but without updating flags */ \ | 120 | /* We need the result but without updating flags */ \ |
121 | scratch_fpst = *fpst; \ | 121 | scratch_fpst = *fpst; \ |
122 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | 122 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) |
123 | continue; \ | 123 | continue; \ |
124 | } \ | 124 | } \ |
125 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 125 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
126 | - &env->vfp.standard_fp_status; \ | 126 | - &env->vfp.standard_fp_status; \ |
127 | + &env->vfp.fp_status[FPST_STD]; \ | 127 | + &env->vfp.fp_status[FPST_STD]; \ |
128 | if (!(mask & (1 << (e * ESIZE)))) { \ | 128 | if (!(mask & (1 << (e * ESIZE)))) { \ |
129 | /* We need the result but without updating flags */ \ | 129 | /* We need the result but without updating flags */ \ |
130 | scratch_fpst = *fpst; \ | 130 | scratch_fpst = *fpst; \ |
131 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) | 131 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) |
132 | continue; \ | 132 | continue; \ |
133 | } \ | 133 | } \ |
134 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 134 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
135 | - &env->vfp.standard_fp_status; \ | 135 | - &env->vfp.standard_fp_status; \ |
136 | + &env->vfp.fp_status[FPST_STD]; \ | 136 | + &env->vfp.fp_status[FPST_STD]; \ |
137 | if (!(mask & 1)) { \ | 137 | if (!(mask & 1)) { \ |
138 | /* We need the result but without updating flags */ \ | 138 | /* We need the result but without updating flags */ \ |
139 | scratch_fpst = *fpst; \ | 139 | scratch_fpst = *fpst; \ |
140 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) | 140 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) |
141 | float_status scratch_fpst; \ | 141 | float_status scratch_fpst; \ |
142 | float_status *base_fpst = (ESIZE == 2) ? \ | 142 | float_status *base_fpst = (ESIZE == 2) ? \ |
143 | &env->vfp.fp_status[FPST_STD_F16] : \ | 143 | &env->vfp.fp_status[FPST_STD_F16] : \ |
144 | - &env->vfp.standard_fp_status; \ | 144 | - &env->vfp.standard_fp_status; \ |
145 | + &env->vfp.fp_status[FPST_STD]; \ | 145 | + &env->vfp.fp_status[FPST_STD]; \ |
146 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ | 146 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ |
147 | set_float_rounding_mode(rmode, base_fpst); \ | 147 | set_float_rounding_mode(rmode, base_fpst); \ |
148 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | 148 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ |
149 | @@ -XXX,XX +XXX,XX @@ static void do_vcvt_sh(CPUARMState *env, void *vd, void *vm, int top) | 149 | @@ -XXX,XX +XXX,XX @@ static void do_vcvt_sh(CPUARMState *env, void *vd, void *vm, int top) |
150 | unsigned e; | 150 | unsigned e; |
151 | float_status *fpst; | 151 | float_status *fpst; |
152 | float_status scratch_fpst; | 152 | float_status scratch_fpst; |
153 | - float_status *base_fpst = &env->vfp.standard_fp_status; | 153 | - float_status *base_fpst = &env->vfp.standard_fp_status; |
154 | + float_status *base_fpst = &env->vfp.fp_status[FPST_STD]; | 154 | + float_status *base_fpst = &env->vfp.fp_status[FPST_STD]; |
155 | bool old_fz = get_flush_to_zero(base_fpst); | 155 | bool old_fz = get_flush_to_zero(base_fpst); |
156 | set_flush_to_zero(false, base_fpst); | 156 | set_flush_to_zero(false, base_fpst); |
157 | for (e = 0; e < 16 / 4; e++, mask >>= 4) { | 157 | for (e = 0; e < 16 / 4; e++, mask >>= 4) { |
158 | @@ -XXX,XX +XXX,XX @@ static void do_vcvt_hs(CPUARMState *env, void *vd, void *vm, int top) | 158 | @@ -XXX,XX +XXX,XX @@ static void do_vcvt_hs(CPUARMState *env, void *vd, void *vm, int top) |
159 | unsigned e; | 159 | unsigned e; |
160 | float_status *fpst; | 160 | float_status *fpst; |
161 | float_status scratch_fpst; | 161 | float_status scratch_fpst; |
162 | - float_status *base_fpst = &env->vfp.standard_fp_status; | 162 | - float_status *base_fpst = &env->vfp.standard_fp_status; |
163 | + float_status *base_fpst = &env->vfp.fp_status[FPST_STD]; | 163 | + float_status *base_fpst = &env->vfp.fp_status[FPST_STD]; |
164 | bool old_fiz = get_flush_inputs_to_zero(base_fpst); | 164 | bool old_fiz = get_flush_inputs_to_zero(base_fpst); |
165 | set_flush_inputs_to_zero(false, base_fpst); | 165 | set_flush_inputs_to_zero(false, base_fpst); |
166 | for (e = 0; e < 16 / 4; e++, mask >>= 4) { | 166 | for (e = 0; e < 16 / 4; e++, mask >>= 4) { |
167 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) | 167 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) |
168 | continue; \ | 168 | continue; \ |
169 | } \ | 169 | } \ |
170 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 170 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
171 | - &env->vfp.standard_fp_status; \ | 171 | - &env->vfp.standard_fp_status; \ |
172 | + &env->vfp.fp_status[FPST_STD]; \ | 172 | + &env->vfp.fp_status[FPST_STD]; \ |
173 | if (!(mask & 1)) { \ | 173 | if (!(mask & 1)) { \ |
174 | /* We need the result but without updating flags */ \ | 174 | /* We need the result but without updating flags */ \ |
175 | scratch_fpst = *fpst; \ | 175 | scratch_fpst = *fpst; \ |
176 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | 176 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
177 | index XXXXXXX..XXXXXXX 100644 | 177 | index XXXXXXX..XXXXXXX 100644 |
178 | --- a/target/arm/tcg/vec_helper.c | 178 | --- a/target/arm/tcg/vec_helper.c |
179 | +++ b/target/arm/tcg/vec_helper.c | 179 | +++ b/target/arm/tcg/vec_helper.c |
180 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, | 180 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, |
181 | void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | 181 | void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, |
182 | CPUARMState *env, uint32_t desc) | 182 | CPUARMState *env, uint32_t desc) |
183 | { | 183 | { |
184 | - do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, desc, | 184 | - do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, desc, |
185 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], desc, | 185 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], desc, |
186 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | 186 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); |
187 | } | 187 | } |
188 | 188 | ||
189 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, | 189 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, |
190 | void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | 190 | void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, |
191 | CPUARMState *env, uint32_t desc) | 191 | CPUARMState *env, uint32_t desc) |
192 | { | 192 | { |
193 | - do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, desc, | 193 | - do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, desc, |
194 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], desc, | 194 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], desc, |
195 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | 195 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); |
196 | } | 196 | } |
197 | 197 | ||
198 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | 198 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
199 | index XXXXXXX..XXXXXXX 100644 | 199 | index XXXXXXX..XXXXXXX 100644 |
200 | --- a/target/arm/vfp_helper.c | 200 | --- a/target/arm/vfp_helper.c |
201 | +++ b/target/arm/vfp_helper.c | 201 | +++ b/target/arm/vfp_helper.c |
202 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | 202 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) |
203 | uint32_t a32_flags = 0, a64_flags = 0; | 203 | uint32_t a32_flags = 0, a64_flags = 0; |
204 | 204 | ||
205 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | 205 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); |
206 | - a32_flags |= get_float_exception_flags(&env->vfp.standard_fp_status); | 206 | - a32_flags |= get_float_exception_flags(&env->vfp.standard_fp_status); |
207 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); | 207 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); |
208 | /* FZ16 does not generate an input denormal exception. */ | 208 | /* FZ16 does not generate an input denormal exception. */ |
209 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | 209 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) |
210 | & ~float_flag_input_denormal_flushed); | 210 | & ~float_flag_input_denormal_flushed); |
211 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | 211 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) |
212 | set_float_exception_flags(0, &env->vfp.fp_status_a64); | 212 | set_float_exception_flags(0, &env->vfp.fp_status_a64); |
213 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | 213 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); |
214 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | 214 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); |
215 | - set_float_exception_flags(0, &env->vfp.standard_fp_status); | 215 | - set_float_exception_flags(0, &env->vfp.standard_fp_status); |
216 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | 216 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); |
217 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | 217 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); |
218 | set_float_exception_flags(0, &env->vfp.ah_fp_status); | 218 | set_float_exception_flags(0, &env->vfp.ah_fp_status); |
219 | set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | 219 | set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); |
220 | -- | 220 | -- |
221 | 2.43.0 | 221 | 2.43.0 | diff view generated by jsdifflib |
1 | Replace with fp_status[FPST_AH_F16]. | 1 | Replace with fp_status[FPST_AH_F16]. |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | --- | 4 | --- |
5 | target/arm/cpu.h | 3 +-- | 5 | target/arm/cpu.h | 3 +-- |
6 | target/arm/cpu.c | 2 +- | 6 | target/arm/cpu.c | 2 +- |
7 | target/arm/vfp_helper.c | 10 +++++----- | 7 | target/arm/vfp_helper.c | 10 +++++----- |
8 | 3 files changed, 7 insertions(+), 8 deletions(-) | 8 | 3 files changed, 7 insertions(+), 8 deletions(-) |
9 | 9 | ||
10 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 10 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
11 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/target/arm/cpu.h | 12 | --- a/target/arm/cpu.h |
13 | +++ b/target/arm/cpu.h | 13 | +++ b/target/arm/cpu.h |
14 | @@ -XXX,XX +XXX,XX @@ typedef struct NVICState NVICState; | 14 | @@ -XXX,XX +XXX,XX @@ typedef struct NVICState NVICState; |
15 | * behaviour when FPCR.AH == 1: they don't update cumulative | 15 | * behaviour when FPCR.AH == 1: they don't update cumulative |
16 | * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | 16 | * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and |
17 | * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | 17 | * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, |
18 | - * which means we need an ah_fp_status_f16 as well. | 18 | - * which means we need an ah_fp_status_f16 as well. |
19 | + * which means we need an FPST_AH_F16 as well. | 19 | + * which means we need an FPST_AH_F16 as well. |
20 | * | 20 | * |
21 | * To avoid having to transfer exception bits around, we simply | 21 | * To avoid having to transfer exception bits around, we simply |
22 | * say that the FPSCR cumulative exception flags are the logical | 22 | * say that the FPSCR cumulative exception flags are the logical |
23 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | 23 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
24 | float_status fp_status_f16_a32; | 24 | float_status fp_status_f16_a32; |
25 | float_status fp_status_f16_a64; | 25 | float_status fp_status_f16_a64; |
26 | float_status ah_fp_status; | 26 | float_status ah_fp_status; |
27 | - float_status ah_fp_status_f16; | 27 | - float_status ah_fp_status_f16; |
28 | }; | 28 | }; |
29 | }; | 29 | }; |
30 | 30 | ||
31 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | 31 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
32 | index XXXXXXX..XXXXXXX 100644 | 32 | index XXXXXXX..XXXXXXX 100644 |
33 | --- a/target/arm/cpu.c | 33 | --- a/target/arm/cpu.c |
34 | +++ b/target/arm/cpu.c | 34 | +++ b/target/arm/cpu.c |
35 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | 35 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
36 | arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | 36 | arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); |
37 | set_flush_to_zero(1, &env->vfp.ah_fp_status); | 37 | set_flush_to_zero(1, &env->vfp.ah_fp_status); |
38 | set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | 38 | set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); |
39 | - arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status_f16); | 39 | - arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status_f16); |
40 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]); | 40 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]); |
41 | 41 | ||
42 | #ifndef CONFIG_USER_ONLY | 42 | #ifndef CONFIG_USER_ONLY |
43 | if (kvm_enabled()) { | 43 | if (kvm_enabled()) { |
44 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | 44 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
45 | index XXXXXXX..XXXXXXX 100644 | 45 | index XXXXXXX..XXXXXXX 100644 |
46 | --- a/target/arm/vfp_helper.c | 46 | --- a/target/arm/vfp_helper.c |
47 | +++ b/target/arm/vfp_helper.c | 47 | +++ b/target/arm/vfp_helper.c |
48 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | 48 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) |
49 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | 49 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) |
50 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | 50 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); |
51 | /* | 51 | /* |
52 | - * We do not merge in flags from ah_fp_status or ah_fp_status_f16, because | 52 | - * We do not merge in flags from ah_fp_status or ah_fp_status_f16, because |
53 | + * We do not merge in flags from ah_fp_status or FPST_AH_F16, because | 53 | + * We do not merge in flags from ah_fp_status or FPST_AH_F16, because |
54 | * they are used for insns that must not set the cumulative exception bits. | 54 | * they are used for insns that must not set the cumulative exception bits. |
55 | */ | 55 | */ |
56 | 56 | ||
57 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | 57 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) |
58 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | 58 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); |
59 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | 59 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); |
60 | set_float_exception_flags(0, &env->vfp.ah_fp_status); | 60 | set_float_exception_flags(0, &env->vfp.ah_fp_status); |
61 | - set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | 61 | - set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); |
62 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]); | 62 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]); |
63 | } | 63 | } |
64 | 64 | ||
65 | static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) | 65 | static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) |
66 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | 66 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
67 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | 67 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
68 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | 68 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); |
69 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | 69 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); |
70 | - set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | 70 | - set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); |
71 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | 71 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
72 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | 72 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
73 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | 73 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); |
74 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | 74 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); |
75 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | 75 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); |
76 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | 76 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
77 | } | 77 | } |
78 | if (changed & FPCR_FZ) { | 78 | if (changed & FPCR_FZ) { |
79 | bool ftz_enabled = val & FPCR_FZ; | 79 | bool ftz_enabled = val & FPCR_FZ; |
80 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | 80 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
81 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | 81 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); |
82 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | 82 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); |
83 | set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); | 83 | set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); |
84 | - set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status_f16); | 84 | - set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status_f16); |
85 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | 85 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
86 | } | 86 | } |
87 | if (changed & FPCR_AH) { | 87 | if (changed & FPCR_AH) { |
88 | bool ah_enabled = val & FPCR_AH; | 88 | bool ah_enabled = val & FPCR_AH; |
89 | -- | 89 | -- |
90 | 2.43.0 | 90 | 2.43.0 | diff view generated by jsdifflib |
1 | Replace with fp_status[FPST_AH]. | 1 | Replace with fp_status[FPST_AH]. |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | --- | 4 | --- |
5 | target/arm/cpu.h | 3 +-- | 5 | target/arm/cpu.h | 3 +-- |
6 | target/arm/cpu.c | 6 +++--- | 6 | target/arm/cpu.c | 6 +++--- |
7 | target/arm/vfp_helper.c | 6 +++--- | 7 | target/arm/vfp_helper.c | 6 +++--- |
8 | 3 files changed, 7 insertions(+), 8 deletions(-) | 8 | 3 files changed, 7 insertions(+), 8 deletions(-) |
9 | 9 | ||
10 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 10 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
11 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/target/arm/cpu.h | 12 | --- a/target/arm/cpu.h |
13 | +++ b/target/arm/cpu.h | 13 | +++ b/target/arm/cpu.h |
14 | @@ -XXX,XX +XXX,XX @@ typedef struct NVICState NVICState; | 14 | @@ -XXX,XX +XXX,XX @@ typedef struct NVICState NVICState; |
15 | * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | 15 | * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than |
16 | * using a fixed value for it. | 16 | * using a fixed value for it. |
17 | * | 17 | * |
18 | - * The ah_fp_status is needed because some insns have different | 18 | - * The ah_fp_status is needed because some insns have different |
19 | + * FPST_AH is needed because some insns have different | 19 | + * FPST_AH is needed because some insns have different |
20 | * behaviour when FPCR.AH == 1: they don't update cumulative | 20 | * behaviour when FPCR.AH == 1: they don't update cumulative |
21 | * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | 21 | * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and |
22 | * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | 22 | * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, |
23 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | 23 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
24 | float_status fp_status_a64; | 24 | float_status fp_status_a64; |
25 | float_status fp_status_f16_a32; | 25 | float_status fp_status_f16_a32; |
26 | float_status fp_status_f16_a64; | 26 | float_status fp_status_f16_a64; |
27 | - float_status ah_fp_status; | 27 | - float_status ah_fp_status; |
28 | }; | 28 | }; |
29 | }; | 29 | }; |
30 | 30 | ||
31 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | 31 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
32 | index XXXXXXX..XXXXXXX 100644 | 32 | index XXXXXXX..XXXXXXX 100644 |
33 | --- a/target/arm/cpu.c | 33 | --- a/target/arm/cpu.c |
34 | +++ b/target/arm/cpu.c | 34 | +++ b/target/arm/cpu.c |
35 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | 35 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
36 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | 36 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); |
37 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | 37 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); |
38 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | 38 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); |
39 | - arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | 39 | - arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); |
40 | - set_flush_to_zero(1, &env->vfp.ah_fp_status); | 40 | - set_flush_to_zero(1, &env->vfp.ah_fp_status); |
41 | - set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | 41 | - set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); |
42 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); | 42 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); |
43 | + set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]); | 43 | + set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]); |
44 | + set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_AH]); | 44 | + set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_AH]); |
45 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]); | 45 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]); |
46 | 46 | ||
47 | #ifndef CONFIG_USER_ONLY | 47 | #ifndef CONFIG_USER_ONLY |
48 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | 48 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
49 | index XXXXXXX..XXXXXXX 100644 | 49 | index XXXXXXX..XXXXXXX 100644 |
50 | --- a/target/arm/vfp_helper.c | 50 | --- a/target/arm/vfp_helper.c |
51 | +++ b/target/arm/vfp_helper.c | 51 | +++ b/target/arm/vfp_helper.c |
52 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | 52 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) |
53 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | 53 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) |
54 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | 54 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); |
55 | /* | 55 | /* |
56 | - * We do not merge in flags from ah_fp_status or FPST_AH_F16, because | 56 | - * We do not merge in flags from ah_fp_status or FPST_AH_F16, because |
57 | + * We do not merge in flags from FPST_AH or FPST_AH_F16, because | 57 | + * We do not merge in flags from FPST_AH or FPST_AH_F16, because |
58 | * they are used for insns that must not set the cumulative exception bits. | 58 | * they are used for insns that must not set the cumulative exception bits. |
59 | */ | 59 | */ |
60 | 60 | ||
61 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | 61 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) |
62 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | 62 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); |
63 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | 63 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); |
64 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | 64 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); |
65 | - set_float_exception_flags(0, &env->vfp.ah_fp_status); | 65 | - set_float_exception_flags(0, &env->vfp.ah_fp_status); |
66 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]); | 66 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]); |
67 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]); | 67 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]); |
68 | } | 68 | } |
69 | 69 | ||
70 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | 70 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
71 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | 71 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); |
72 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | 72 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); |
73 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | 73 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); |
74 | - set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); | 74 | - set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); |
75 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | 75 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); |
76 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | 76 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
77 | } | 77 | } |
78 | if (changed & FPCR_AH) { | 78 | if (changed & FPCR_AH) { |
79 | -- | 79 | -- |
80 | 2.43.0 | 80 | 2.43.0 | diff view generated by jsdifflib |
1 | Replace with fp_status[FPST_A64_F16]. | 1 | Replace with fp_status[FPST_A64_F16]. |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | --- | 4 | --- |
5 | target/arm/cpu.h | 1 - | 5 | target/arm/cpu.h | 1 - |
6 | target/arm/cpu.c | 2 +- | 6 | target/arm/cpu.c | 2 +- |
7 | target/arm/tcg/sme_helper.c | 2 +- | 7 | target/arm/tcg/sme_helper.c | 2 +- |
8 | target/arm/tcg/vec_helper.c | 8 ++++---- | 8 | target/arm/tcg/vec_helper.c | 8 ++++---- |
9 | target/arm/vfp_helper.c | 16 ++++++++-------- | 9 | target/arm/vfp_helper.c | 16 ++++++++-------- |
10 | 5 files changed, 14 insertions(+), 15 deletions(-) | 10 | 5 files changed, 14 insertions(+), 15 deletions(-) |
11 | 11 | ||
12 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 12 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
13 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/target/arm/cpu.h | 14 | --- a/target/arm/cpu.h |
15 | +++ b/target/arm/cpu.h | 15 | +++ b/target/arm/cpu.h |
16 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | 16 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
17 | float_status fp_status_a32; | 17 | float_status fp_status_a32; |
18 | float_status fp_status_a64; | 18 | float_status fp_status_a64; |
19 | float_status fp_status_f16_a32; | 19 | float_status fp_status_f16_a32; |
20 | - float_status fp_status_f16_a64; | 20 | - float_status fp_status_f16_a64; |
21 | }; | 21 | }; |
22 | }; | 22 | }; |
23 | 23 | ||
24 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | 24 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
25 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/target/arm/cpu.c | 26 | --- a/target/arm/cpu.c |
27 | +++ b/target/arm/cpu.c | 27 | +++ b/target/arm/cpu.c |
28 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | 28 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
29 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | 29 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); |
30 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | 30 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); |
31 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | 31 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); |
32 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | 32 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); |
33 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | 33 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); |
34 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | 34 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); |
35 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); | 35 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); |
36 | set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]); | 36 | set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]); |
37 | diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c | 37 | diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c |
38 | index XXXXXXX..XXXXXXX 100644 | 38 | index XXXXXXX..XXXXXXX 100644 |
39 | --- a/target/arm/tcg/sme_helper.c | 39 | --- a/target/arm/tcg/sme_helper.c |
40 | +++ b/target/arm/tcg/sme_helper.c | 40 | +++ b/target/arm/tcg/sme_helper.c |
41 | @@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, | 41 | @@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, |
42 | * produces default NaNs. We also need a second copy of fp_status with | 42 | * produces default NaNs. We also need a second copy of fp_status with |
43 | * round-to-odd -- see above. | 43 | * round-to-odd -- see above. |
44 | */ | 44 | */ |
45 | - fpst_f16 = env->vfp.fp_status_f16_a64; | 45 | - fpst_f16 = env->vfp.fp_status_f16_a64; |
46 | + fpst_f16 = env->vfp.fp_status[FPST_A64_F16]; | 46 | + fpst_f16 = env->vfp.fp_status[FPST_A64_F16]; |
47 | fpst_std = env->vfp.fp_status_a64; | 47 | fpst_std = env->vfp.fp_status_a64; |
48 | set_default_nan_mode(true, &fpst_std); | 48 | set_default_nan_mode(true, &fpst_std); |
49 | set_default_nan_mode(true, &fpst_f16); | 49 | set_default_nan_mode(true, &fpst_f16); |
50 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | 50 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
51 | index XXXXXXX..XXXXXXX 100644 | 51 | index XXXXXXX..XXXXXXX 100644 |
52 | --- a/target/arm/tcg/vec_helper.c | 52 | --- a/target/arm/tcg/vec_helper.c |
53 | +++ b/target/arm/tcg/vec_helper.c | 53 | +++ b/target/arm/tcg/vec_helper.c |
54 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | 54 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
55 | CPUARMState *env, uint32_t desc) | 55 | CPUARMState *env, uint32_t desc) |
56 | { | 56 | { |
57 | do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, desc, | 57 | do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, desc, |
58 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); | 58 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); |
59 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | 59 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); |
60 | } | 60 | } |
61 | 61 | ||
62 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | 62 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
63 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | 63 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
64 | uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; | 64 | uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; |
65 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | 65 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
66 | float_status *status = &env->vfp.fp_status_a64; | 66 | float_status *status = &env->vfp.fp_status_a64; |
67 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); | 67 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); |
68 | + bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | 68 | + bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); |
69 | 69 | ||
70 | for (i = 0; i < oprsz; i += sizeof(float32)) { | 70 | for (i = 0; i < oprsz; i += sizeof(float32)) { |
71 | float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negn; | 71 | float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negn; |
72 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | 72 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
73 | CPUARMState *env, uint32_t desc) | 73 | CPUARMState *env, uint32_t desc) |
74 | { | 74 | { |
75 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, desc, | 75 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, desc, |
76 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); | 76 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); |
77 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | 77 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); |
78 | } | 78 | } |
79 | 79 | ||
80 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | 80 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, |
81 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | 81 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, |
82 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | 82 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
83 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | 83 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); |
84 | float_status *status = &env->vfp.fp_status_a64; | 84 | float_status *status = &env->vfp.fp_status_a64; |
85 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); | 85 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); |
86 | + bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | 86 | + bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); |
87 | 87 | ||
88 | for (i = 0; i < oprsz; i += 16) { | 88 | for (i = 0; i < oprsz; i += 16) { |
89 | float16 mm_16 = *(float16 *)(vm + i + idx); | 89 | float16 mm_16 = *(float16 *)(vm + i + idx); |
90 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | 90 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
91 | index XXXXXXX..XXXXXXX 100644 | 91 | index XXXXXXX..XXXXXXX 100644 |
92 | --- a/target/arm/vfp_helper.c | 92 | --- a/target/arm/vfp_helper.c |
93 | +++ b/target/arm/vfp_helper.c | 93 | +++ b/target/arm/vfp_helper.c |
94 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | 94 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) |
95 | & ~float_flag_input_denormal_flushed); | 95 | & ~float_flag_input_denormal_flushed); |
96 | 96 | ||
97 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | 97 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); |
98 | - a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | 98 | - a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) |
99 | + a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16]) | 99 | + a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16]) |
100 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | 100 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); |
101 | /* | 101 | /* |
102 | * We do not merge in flags from FPST_AH or FPST_AH_F16, because | 102 | * We do not merge in flags from FPST_AH or FPST_AH_F16, because |
103 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | 103 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) |
104 | set_float_exception_flags(0, &env->vfp.fp_status_a32); | 104 | set_float_exception_flags(0, &env->vfp.fp_status_a32); |
105 | set_float_exception_flags(0, &env->vfp.fp_status_a64); | 105 | set_float_exception_flags(0, &env->vfp.fp_status_a64); |
106 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | 106 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); |
107 | - set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | 107 | - set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); |
108 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | 108 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); |
109 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | 109 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); |
110 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | 110 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); |
111 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]); | 111 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]); |
112 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | 112 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
113 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); | 113 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); |
114 | set_float_rounding_mode(i, &env->vfp.fp_status_a64); | 114 | set_float_rounding_mode(i, &env->vfp.fp_status_a64); |
115 | set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32); | 115 | set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32); |
116 | - set_float_rounding_mode(i, &env->vfp.fp_status_f16_a64); | 116 | - set_float_rounding_mode(i, &env->vfp.fp_status_f16_a64); |
117 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | 117 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); |
118 | } | 118 | } |
119 | if (changed & FPCR_FZ16) { | 119 | if (changed & FPCR_FZ16) { |
120 | bool ftz_enabled = val & FPCR_FZ16; | 120 | bool ftz_enabled = val & FPCR_FZ16; |
121 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | 121 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
122 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | 122 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); |
123 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); | 123 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); |
124 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | 124 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); |
125 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | 125 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
126 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | 126 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
127 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | 127 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); |
128 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); | 128 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); |
129 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | 129 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); |
130 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | 130 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
131 | } | 131 | } |
132 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | 132 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
133 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | 133 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); |
134 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | 134 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); |
135 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | 135 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); |
136 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | 136 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); |
137 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | 137 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); |
138 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | 138 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); |
139 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | 139 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
140 | } | 140 | } |
141 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | 141 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
142 | if (ah_enabled) { | 142 | if (ah_enabled) { |
143 | /* Change behaviours for A64 FP operations */ | 143 | /* Change behaviours for A64 FP operations */ |
144 | arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); | 144 | arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); |
145 | - arm_set_ah_fp_behaviours(&env->vfp.fp_status_f16_a64); | 145 | - arm_set_ah_fp_behaviours(&env->vfp.fp_status_f16_a64); |
146 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | 146 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); |
147 | } else { | 147 | } else { |
148 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | 148 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); |
149 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | 149 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); |
150 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | 150 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); |
151 | } | 151 | } |
152 | } | 152 | } |
153 | /* | 153 | /* |
154 | -- | 154 | -- |
155 | 2.43.0 | 155 | 2.43.0 | diff view generated by jsdifflib |
1 | Replace with fp_status[FPST_A32_F16]. | 1 | Replace with fp_status[FPST_A32_F16]. |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | --- | 4 | --- |
5 | target/arm/cpu.h | 1 - | 5 | target/arm/cpu.h | 1 - |
6 | target/arm/cpu.c | 2 +- | 6 | target/arm/cpu.c | 2 +- |
7 | target/arm/tcg/vec_helper.c | 4 ++-- | 7 | target/arm/tcg/vec_helper.c | 4 ++-- |
8 | target/arm/vfp_helper.c | 14 +++++++------- | 8 | target/arm/vfp_helper.c | 14 +++++++------- |
9 | 4 files changed, 10 insertions(+), 11 deletions(-) | 9 | 4 files changed, 10 insertions(+), 11 deletions(-) |
10 | 10 | ||
11 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 11 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
12 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/target/arm/cpu.h | 13 | --- a/target/arm/cpu.h |
14 | +++ b/target/arm/cpu.h | 14 | +++ b/target/arm/cpu.h |
15 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | 15 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
16 | struct { | 16 | struct { |
17 | float_status fp_status_a32; | 17 | float_status fp_status_a32; |
18 | float_status fp_status_a64; | 18 | float_status fp_status_a64; |
19 | - float_status fp_status_f16_a32; | 19 | - float_status fp_status_f16_a32; |
20 | }; | 20 | }; |
21 | }; | 21 | }; |
22 | 22 | ||
23 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | 23 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
24 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
25 | --- a/target/arm/cpu.c | 25 | --- a/target/arm/cpu.c |
26 | +++ b/target/arm/cpu.c | 26 | +++ b/target/arm/cpu.c |
27 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | 27 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
28 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | 28 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); |
29 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | 29 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); |
30 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | 30 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); |
31 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | 31 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); |
32 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); | 32 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); |
33 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | 33 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); |
34 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | 34 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); |
35 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); | 35 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); |
36 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | 36 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
37 | index XXXXXXX..XXXXXXX 100644 | 37 | index XXXXXXX..XXXXXXX 100644 |
38 | --- a/target/arm/tcg/vec_helper.c | 38 | --- a/target/arm/tcg/vec_helper.c |
39 | +++ b/target/arm/tcg/vec_helper.c | 39 | +++ b/target/arm/tcg/vec_helper.c |
40 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | 40 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, |
41 | CPUARMState *env, uint32_t desc) | 41 | CPUARMState *env, uint32_t desc) |
42 | { | 42 | { |
43 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], desc, | 43 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], desc, |
44 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | 44 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); |
45 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); | 45 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); |
46 | } | 46 | } |
47 | 47 | ||
48 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | 48 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
49 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | 49 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, |
50 | CPUARMState *env, uint32_t desc) | 50 | CPUARMState *env, uint32_t desc) |
51 | { | 51 | { |
52 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], desc, | 52 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], desc, |
53 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | 53 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); |
54 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); | 54 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); |
55 | } | 55 | } |
56 | 56 | ||
57 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | 57 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
58 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | 58 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
59 | index XXXXXXX..XXXXXXX 100644 | 59 | index XXXXXXX..XXXXXXX 100644 |
60 | --- a/target/arm/vfp_helper.c | 60 | --- a/target/arm/vfp_helper.c |
61 | +++ b/target/arm/vfp_helper.c | 61 | +++ b/target/arm/vfp_helper.c |
62 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | 62 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) |
63 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | 63 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); |
64 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); | 64 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); |
65 | /* FZ16 does not generate an input denormal exception. */ | 65 | /* FZ16 does not generate an input denormal exception. */ |
66 | - a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | 66 | - a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) |
67 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16]) | 67 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16]) |
68 | & ~float_flag_input_denormal_flushed); | 68 | & ~float_flag_input_denormal_flushed); |
69 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) | 69 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) |
70 | & ~float_flag_input_denormal_flushed); | 70 | & ~float_flag_input_denormal_flushed); |
71 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | 71 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) |
72 | */ | 72 | */ |
73 | set_float_exception_flags(0, &env->vfp.fp_status_a32); | 73 | set_float_exception_flags(0, &env->vfp.fp_status_a32); |
74 | set_float_exception_flags(0, &env->vfp.fp_status_a64); | 74 | set_float_exception_flags(0, &env->vfp.fp_status_a64); |
75 | - set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | 75 | - set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); |
76 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); | 76 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); |
77 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | 77 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); |
78 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | 78 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); |
79 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | 79 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); |
80 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | 80 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
81 | } | 81 | } |
82 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); | 82 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); |
83 | set_float_rounding_mode(i, &env->vfp.fp_status_a64); | 83 | set_float_rounding_mode(i, &env->vfp.fp_status_a64); |
84 | - set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32); | 84 | - set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32); |
85 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); | 85 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); |
86 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | 86 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); |
87 | } | 87 | } |
88 | if (changed & FPCR_FZ16) { | 88 | if (changed & FPCR_FZ16) { |
89 | bool ftz_enabled = val & FPCR_FZ16; | 89 | bool ftz_enabled = val & FPCR_FZ16; |
90 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | 90 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
91 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); | 91 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); |
92 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); | 92 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); |
93 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | 93 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); |
94 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | 94 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
95 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | 95 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
96 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); | 96 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); |
97 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); | 97 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); |
98 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | 98 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); |
99 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | 99 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
100 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | 100 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
101 | bool dnan_enabled = val & FPCR_DN; | 101 | bool dnan_enabled = val & FPCR_DN; |
102 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | 102 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); |
103 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | 103 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); |
104 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | 104 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); |
105 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); | 105 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); |
106 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | 106 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); |
107 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | 107 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); |
108 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | 108 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
109 | @@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ | 109 | @@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ |
110 | softfloat_to_vfp_compare(env, \ | 110 | softfloat_to_vfp_compare(env, \ |
111 | FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ | 111 | FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ |
112 | } | 112 | } |
113 | -DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16_a32) | 113 | -DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16_a32) |
114 | +DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) | 114 | +DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) |
115 | DO_VFP_cmp(s, float32, float32, fp_status_a32) | 115 | DO_VFP_cmp(s, float32, float32, fp_status_a32) |
116 | DO_VFP_cmp(d, float64, float64, fp_status_a32) | 116 | DO_VFP_cmp(d, float64, float64, fp_status_a32) |
117 | #undef DO_VFP_cmp | 117 | #undef DO_VFP_cmp |
118 | -- | 118 | -- |
119 | 2.43.0 | 119 | 2.43.0 | diff view generated by jsdifflib |
1 | Replace with fp_status[FPST_A64]. | 1 | Replace with fp_status[FPST_A64]. |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | --- | 4 | --- |
5 | target/arm/cpu.h | 1 - | 5 | target/arm/cpu.h | 1 - |
6 | target/arm/cpu.c | 2 +- | 6 | target/arm/cpu.c | 2 +- |
7 | target/arm/tcg/sme_helper.c | 2 +- | 7 | target/arm/tcg/sme_helper.c | 2 +- |
8 | target/arm/tcg/vec_helper.c | 10 +++++----- | 8 | target/arm/tcg/vec_helper.c | 10 +++++----- |
9 | target/arm/vfp_helper.c | 16 ++++++++-------- | 9 | target/arm/vfp_helper.c | 16 ++++++++-------- |
10 | 5 files changed, 15 insertions(+), 16 deletions(-) | 10 | 5 files changed, 15 insertions(+), 16 deletions(-) |
11 | 11 | ||
12 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 12 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
13 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/target/arm/cpu.h | 14 | --- a/target/arm/cpu.h |
15 | +++ b/target/arm/cpu.h | 15 | +++ b/target/arm/cpu.h |
16 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | 16 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
17 | float_status fp_status[FPST_COUNT]; | 17 | float_status fp_status[FPST_COUNT]; |
18 | struct { | 18 | struct { |
19 | float_status fp_status_a32; | 19 | float_status fp_status_a32; |
20 | - float_status fp_status_a64; | 20 | - float_status fp_status_a64; |
21 | }; | 21 | }; |
22 | }; | 22 | }; |
23 | 23 | ||
24 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | 24 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
25 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/target/arm/cpu.c | 26 | --- a/target/arm/cpu.c |
27 | +++ b/target/arm/cpu.c | 27 | +++ b/target/arm/cpu.c |
28 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | 28 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
29 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); | 29 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); |
30 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); | 30 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); |
31 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | 31 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); |
32 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | 32 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); |
33 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | 33 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); |
34 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | 34 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); |
35 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); | 35 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); |
36 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | 36 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); |
37 | diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c | 37 | diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c |
38 | index XXXXXXX..XXXXXXX 100644 | 38 | index XXXXXXX..XXXXXXX 100644 |
39 | --- a/target/arm/tcg/sme_helper.c | 39 | --- a/target/arm/tcg/sme_helper.c |
40 | +++ b/target/arm/tcg/sme_helper.c | 40 | +++ b/target/arm/tcg/sme_helper.c |
41 | @@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, | 41 | @@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, |
42 | * round-to-odd -- see above. | 42 | * round-to-odd -- see above. |
43 | */ | 43 | */ |
44 | fpst_f16 = env->vfp.fp_status[FPST_A64_F16]; | 44 | fpst_f16 = env->vfp.fp_status[FPST_A64_F16]; |
45 | - fpst_std = env->vfp.fp_status_a64; | 45 | - fpst_std = env->vfp.fp_status_a64; |
46 | + fpst_std = env->vfp.fp_status[FPST_A64]; | 46 | + fpst_std = env->vfp.fp_status[FPST_A64]; |
47 | set_default_nan_mode(true, &fpst_std); | 47 | set_default_nan_mode(true, &fpst_std); |
48 | set_default_nan_mode(true, &fpst_f16); | 48 | set_default_nan_mode(true, &fpst_f16); |
49 | fpst_odd = fpst_std; | 49 | fpst_odd = fpst_std; |
50 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | 50 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
51 | index XXXXXXX..XXXXXXX 100644 | 51 | index XXXXXXX..XXXXXXX 100644 |
52 | --- a/target/arm/tcg/vec_helper.c | 52 | --- a/target/arm/tcg/vec_helper.c |
53 | +++ b/target/arm/tcg/vec_helper.c | 53 | +++ b/target/arm/tcg/vec_helper.c |
54 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | 54 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, |
55 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | 55 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
56 | CPUARMState *env, uint32_t desc) | 56 | CPUARMState *env, uint32_t desc) |
57 | { | 57 | { |
58 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, desc, | 58 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, desc, |
59 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], desc, | 59 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], desc, |
60 | get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | 60 | get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); |
61 | } | 61 | } |
62 | 62 | ||
63 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | 63 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
64 | intptr_t i, oprsz = simd_oprsz(desc); | 64 | intptr_t i, oprsz = simd_oprsz(desc); |
65 | uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; | 65 | uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; |
66 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | 66 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
67 | - float_status *status = &env->vfp.fp_status_a64; | 67 | - float_status *status = &env->vfp.fp_status_a64; |
68 | + float_status *status = &env->vfp.fp_status[FPST_A64]; | 68 | + float_status *status = &env->vfp.fp_status[FPST_A64]; |
69 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | 69 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); |
70 | 70 | ||
71 | for (i = 0; i < oprsz; i += sizeof(float32)) { | 71 | for (i = 0; i < oprsz; i += sizeof(float32)) { |
72 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | 72 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, |
73 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | 73 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
74 | CPUARMState *env, uint32_t desc) | 74 | CPUARMState *env, uint32_t desc) |
75 | { | 75 | { |
76 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, desc, | 76 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, desc, |
77 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], desc, | 77 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], desc, |
78 | get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | 78 | get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); |
79 | } | 79 | } |
80 | 80 | ||
81 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | 81 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, |
82 | uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; | 82 | uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; |
83 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | 83 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
84 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | 84 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); |
85 | - float_status *status = &env->vfp.fp_status_a64; | 85 | - float_status *status = &env->vfp.fp_status_a64; |
86 | + float_status *status = &env->vfp.fp_status[FPST_A64]; | 86 | + float_status *status = &env->vfp.fp_status[FPST_A64]; |
87 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | 87 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); |
88 | 88 | ||
89 | for (i = 0; i < oprsz; i += 16) { | 89 | for (i = 0; i < oprsz; i += 16) { |
90 | @@ -XXX,XX +XXX,XX @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp) | 90 | @@ -XXX,XX +XXX,XX @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp) |
91 | */ | 91 | */ |
92 | bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF; | 92 | bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF; |
93 | 93 | ||
94 | - *statusp = env->vfp.fp_status_a64; | 94 | - *statusp = env->vfp.fp_status_a64; |
95 | + *statusp = env->vfp.fp_status[FPST_A64]; | 95 | + *statusp = env->vfp.fp_status[FPST_A64]; |
96 | set_default_nan_mode(true, statusp); | 96 | set_default_nan_mode(true, statusp); |
97 | 97 | ||
98 | if (ebf) { | 98 | if (ebf) { |
99 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | 99 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
100 | index XXXXXXX..XXXXXXX 100644 | 100 | index XXXXXXX..XXXXXXX 100644 |
101 | --- a/target/arm/vfp_helper.c | 101 | --- a/target/arm/vfp_helper.c |
102 | +++ b/target/arm/vfp_helper.c | 102 | +++ b/target/arm/vfp_helper.c |
103 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | 103 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) |
104 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) | 104 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) |
105 | & ~float_flag_input_denormal_flushed); | 105 | & ~float_flag_input_denormal_flushed); |
106 | 106 | ||
107 | - a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | 107 | - a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); |
108 | + a64_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A64]); | 108 | + a64_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A64]); |
109 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16]) | 109 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16]) |
110 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | 110 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); |
111 | /* | 111 | /* |
112 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | 112 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) |
113 | * be the architecturally up-to-date exception flag information first. | 113 | * be the architecturally up-to-date exception flag information first. |
114 | */ | 114 | */ |
115 | set_float_exception_flags(0, &env->vfp.fp_status_a32); | 115 | set_float_exception_flags(0, &env->vfp.fp_status_a32); |
116 | - set_float_exception_flags(0, &env->vfp.fp_status_a64); | 116 | - set_float_exception_flags(0, &env->vfp.fp_status_a64); |
117 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]); | 117 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]); |
118 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); | 118 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); |
119 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | 119 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); |
120 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | 120 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); |
121 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | 121 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
122 | break; | 122 | break; |
123 | } | 123 | } |
124 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); | 124 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); |
125 | - set_float_rounding_mode(i, &env->vfp.fp_status_a64); | 125 | - set_float_rounding_mode(i, &env->vfp.fp_status_a64); |
126 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]); | 126 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]); |
127 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); | 127 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); |
128 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | 128 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); |
129 | } | 129 | } |
130 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | 130 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
131 | if (changed & FPCR_FZ) { | 131 | if (changed & FPCR_FZ) { |
132 | bool ftz_enabled = val & FPCR_FZ; | 132 | bool ftz_enabled = val & FPCR_FZ; |
133 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | 133 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); |
134 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64); | 134 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64); |
135 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]); | 135 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]); |
136 | /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ | 136 | /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ |
137 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | 137 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); |
138 | } | 138 | } |
139 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | 139 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
140 | */ | 140 | */ |
141 | bool fitz_enabled = (val & FPCR_FIZ) || | 141 | bool fitz_enabled = (val & FPCR_FIZ) || |
142 | (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ; | 142 | (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ; |
143 | - set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status_a64); | 143 | - set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status_a64); |
144 | + set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_A64]); | 144 | + set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_A64]); |
145 | } | 145 | } |
146 | if (changed & FPCR_DN) { | 146 | if (changed & FPCR_DN) { |
147 | bool dnan_enabled = val & FPCR_DN; | 147 | bool dnan_enabled = val & FPCR_DN; |
148 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | 148 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); |
149 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | 149 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); |
150 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]); | 150 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]); |
151 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); | 151 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); |
152 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | 152 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); |
153 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | 153 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); |
154 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | 154 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
155 | 155 | ||
156 | if (ah_enabled) { | 156 | if (ah_enabled) { |
157 | /* Change behaviours for A64 FP operations */ | 157 | /* Change behaviours for A64 FP operations */ |
158 | - arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); | 158 | - arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); |
159 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | 159 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64]); |
160 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | 160 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); |
161 | } else { | 161 | } else { |
162 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | 162 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); |
163 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | 163 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); |
164 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | 164 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); |
165 | } | 165 | } |
166 | } | 166 | } |
167 | -- | 167 | -- |
168 | 2.43.0 | 168 | 2.43.0 | diff view generated by jsdifflib |
1 | Replace with fp_status[FPST_A32]. As this was the last of the | 1 | Replace with fp_status[FPST_A32]. As this was the last of the |
---|---|---|---|
2 | old structures, we can remove the anonymous union and struct. | 2 | old structures, we can remove the anonymous union and struct. |
3 | 3 | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
5 | --- | 5 | --- |
6 | target/arm/cpu.h | 7 +------ | 6 | target/arm/cpu.h | 7 +------ |
7 | target/arm/cpu.c | 2 +- | 7 | target/arm/cpu.c | 2 +- |
8 | target/arm/vfp_helper.c | 18 +++++++++--------- | 8 | target/arm/vfp_helper.c | 18 +++++++++--------- |
9 | 3 files changed, 11 insertions(+), 16 deletions(-) | 9 | 3 files changed, 11 insertions(+), 16 deletions(-) |
10 | 10 | ||
11 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 11 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
12 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/target/arm/cpu.h | 13 | --- a/target/arm/cpu.h |
14 | +++ b/target/arm/cpu.h | 14 | +++ b/target/arm/cpu.h |
15 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | 15 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
16 | uint32_t scratch[8]; | 16 | uint32_t scratch[8]; |
17 | 17 | ||
18 | /* There are a number of distinct float control structures. */ | 18 | /* There are a number of distinct float control structures. */ |
19 | - union { | 19 | - union { |
20 | - float_status fp_status[FPST_COUNT]; | 20 | - float_status fp_status[FPST_COUNT]; |
21 | - struct { | 21 | - struct { |
22 | - float_status fp_status_a32; | 22 | - float_status fp_status_a32; |
23 | - }; | 23 | - }; |
24 | - }; | 24 | - }; |
25 | + float_status fp_status[FPST_COUNT]; | 25 | + float_status fp_status[FPST_COUNT]; |
26 | 26 | ||
27 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ | 27 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ |
28 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ | 28 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ |
29 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | 29 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
30 | index XXXXXXX..XXXXXXX 100644 | 30 | index XXXXXXX..XXXXXXX 100644 |
31 | --- a/target/arm/cpu.c | 31 | --- a/target/arm/cpu.c |
32 | +++ b/target/arm/cpu.c | 32 | +++ b/target/arm/cpu.c |
33 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | 33 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
34 | set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]); | 34 | set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]); |
35 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); | 35 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); |
36 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); | 36 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); |
37 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | 37 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); |
38 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32]); | 38 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32]); |
39 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | 39 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); |
40 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | 40 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); |
41 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); | 41 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); |
42 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | 42 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
43 | index XXXXXXX..XXXXXXX 100644 | 43 | index XXXXXXX..XXXXXXX 100644 |
44 | --- a/target/arm/vfp_helper.c | 44 | --- a/target/arm/vfp_helper.c |
45 | +++ b/target/arm/vfp_helper.c | 45 | +++ b/target/arm/vfp_helper.c |
46 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | 46 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) |
47 | { | 47 | { |
48 | uint32_t a32_flags = 0, a64_flags = 0; | 48 | uint32_t a32_flags = 0, a64_flags = 0; |
49 | 49 | ||
50 | - a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | 50 | - a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); |
51 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A32]); | 51 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A32]); |
52 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); | 52 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); |
53 | /* FZ16 does not generate an input denormal exception. */ | 53 | /* FZ16 does not generate an input denormal exception. */ |
54 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16]) | 54 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16]) |
55 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | 55 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) |
56 | * values. The caller should have arranged for env->vfp.fpsr to | 56 | * values. The caller should have arranged for env->vfp.fpsr to |
57 | * be the architecturally up-to-date exception flag information first. | 57 | * be the architecturally up-to-date exception flag information first. |
58 | */ | 58 | */ |
59 | - set_float_exception_flags(0, &env->vfp.fp_status_a32); | 59 | - set_float_exception_flags(0, &env->vfp.fp_status_a32); |
60 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32]); | 60 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32]); |
61 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]); | 61 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]); |
62 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); | 62 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); |
63 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | 63 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); |
64 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | 64 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
65 | i = float_round_to_zero; | 65 | i = float_round_to_zero; |
66 | break; | 66 | break; |
67 | } | 67 | } |
68 | - set_float_rounding_mode(i, &env->vfp.fp_status_a32); | 68 | - set_float_rounding_mode(i, &env->vfp.fp_status_a32); |
69 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32]); | 69 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32]); |
70 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]); | 70 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]); |
71 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); | 71 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); |
72 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | 72 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); |
73 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | 73 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
74 | } | 74 | } |
75 | if (changed & FPCR_FZ) { | 75 | if (changed & FPCR_FZ) { |
76 | bool ftz_enabled = val & FPCR_FZ; | 76 | bool ftz_enabled = val & FPCR_FZ; |
77 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | 77 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); |
78 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); | 78 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); |
79 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]); | 79 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]); |
80 | /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ | 80 | /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ |
81 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | 81 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); |
82 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); | 82 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); |
83 | } | 83 | } |
84 | if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { | 84 | if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { |
85 | /* | 85 | /* |
86 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | 86 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
87 | } | 87 | } |
88 | if (changed & FPCR_DN) { | 88 | if (changed & FPCR_DN) { |
89 | bool dnan_enabled = val & FPCR_DN; | 89 | bool dnan_enabled = val & FPCR_DN; |
90 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | 90 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); |
91 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32]); | 91 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32]); |
92 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]); | 92 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]); |
93 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); | 93 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); |
94 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | 94 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); |
95 | @@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ | 95 | @@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ |
96 | FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ | 96 | FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ |
97 | } | 97 | } |
98 | DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) | 98 | DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) |
99 | -DO_VFP_cmp(s, float32, float32, fp_status_a32) | 99 | -DO_VFP_cmp(s, float32, float32, fp_status_a32) |
100 | -DO_VFP_cmp(d, float64, float64, fp_status_a32) | 100 | -DO_VFP_cmp(d, float64, float64, fp_status_a32) |
101 | +DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32]) | 101 | +DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32]) |
102 | +DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32]) | 102 | +DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32]) |
103 | #undef DO_VFP_cmp | 103 | #undef DO_VFP_cmp |
104 | 104 | ||
105 | /* Integer to float and float to integer conversions */ | 105 | /* Integer to float and float to integer conversions */ |
106 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(fjcvtzs)(float64 value, float_status *status) | 106 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(fjcvtzs)(float64 value, float_status *status) |
107 | 107 | ||
108 | uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env) | 108 | uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env) |
109 | { | 109 | { |
110 | - uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status_a32); | 110 | - uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status_a32); |
111 | + uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status[FPST_A32]); | 111 | + uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status[FPST_A32]); |
112 | uint32_t result = pair; | 112 | uint32_t result = pair; |
113 | uint32_t z = (pair >> 32) == 0; | 113 | uint32_t z = (pair >> 32) == 0; |
114 | 114 | ||
115 | -- | 115 | -- |
116 | 2.43.0 | 116 | 2.43.0 | diff view generated by jsdifflib |
1 | Select on index instead of pointer. | 1 | Select on index instead of pointer. |
---|---|---|---|
2 | No functional change. | 2 | No functional change. |
3 | 3 | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
5 | --- | 5 | --- |
6 | target/arm/tcg/mve_helper.c | 40 +++++++++++++------------------------ | 6 | target/arm/tcg/mve_helper.c | 40 +++++++++++++------------------------ |
7 | 1 file changed, 14 insertions(+), 26 deletions(-) | 7 | 1 file changed, 14 insertions(+), 26 deletions(-) |
8 | 8 | ||
9 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c | 9 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c |
10 | index XXXXXXX..XXXXXXX 100644 | 10 | index XXXXXXX..XXXXXXX 100644 |
11 | --- a/target/arm/tcg/mve_helper.c | 11 | --- a/target/arm/tcg/mve_helper.c |
12 | +++ b/target/arm/tcg/mve_helper.c | 12 | +++ b/target/arm/tcg/mve_helper.c |
13 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) | 13 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) |
14 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | 14 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ |
15 | continue; \ | 15 | continue; \ |
16 | } \ | 16 | } \ |
17 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 17 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
18 | - &env->vfp.fp_status[FPST_STD]; \ | 18 | - &env->vfp.fp_status[FPST_STD]; \ |
19 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | 19 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
20 | if (!(mask & 1)) { \ | 20 | if (!(mask & 1)) { \ |
21 | /* We need the result but without updating flags */ \ | 21 | /* We need the result but without updating flags */ \ |
22 | scratch_fpst = *fpst; \ | 22 | scratch_fpst = *fpst; \ |
23 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) | 23 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) |
24 | r[e] = 0; \ | 24 | r[e] = 0; \ |
25 | continue; \ | 25 | continue; \ |
26 | } \ | 26 | } \ |
27 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 27 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
28 | - &env->vfp.fp_status[FPST_STD]; \ | 28 | - &env->vfp.fp_status[FPST_STD]; \ |
29 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | 29 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
30 | if (!(tm & 1)) { \ | 30 | if (!(tm & 1)) { \ |
31 | /* We need the result but without updating flags */ \ | 31 | /* We need the result but without updating flags */ \ |
32 | scratch_fpst = *fpst; \ | 32 | scratch_fpst = *fpst; \ |
33 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) | 33 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) |
34 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | 34 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ |
35 | continue; \ | 35 | continue; \ |
36 | } \ | 36 | } \ |
37 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 37 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
38 | - &env->vfp.fp_status[FPST_STD]; \ | 38 | - &env->vfp.fp_status[FPST_STD]; \ |
39 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | 39 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
40 | if (!(mask & 1)) { \ | 40 | if (!(mask & 1)) { \ |
41 | /* We need the result but without updating flags */ \ | 41 | /* We need the result but without updating flags */ \ |
42 | scratch_fpst = *fpst; \ | 42 | scratch_fpst = *fpst; \ |
43 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) | 43 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) |
44 | if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \ | 44 | if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \ |
45 | continue; \ | 45 | continue; \ |
46 | } \ | 46 | } \ |
47 | - fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 47 | - fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
48 | - &env->vfp.fp_status[FPST_STD]; \ | 48 | - &env->vfp.fp_status[FPST_STD]; \ |
49 | + fpst0 = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | 49 | + fpst0 = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
50 | fpst1 = fpst0; \ | 50 | fpst1 = fpst0; \ |
51 | if (!(mask & 1)) { \ | 51 | if (!(mask & 1)) { \ |
52 | scratch_fpst = *fpst0; \ | 52 | scratch_fpst = *fpst0; \ |
53 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) | 53 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) |
54 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | 54 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ |
55 | continue; \ | 55 | continue; \ |
56 | } \ | 56 | } \ |
57 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 57 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
58 | - &env->vfp.fp_status[FPST_STD]; \ | 58 | - &env->vfp.fp_status[FPST_STD]; \ |
59 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | 59 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
60 | if (!(mask & 1)) { \ | 60 | if (!(mask & 1)) { \ |
61 | /* We need the result but without updating flags */ \ | 61 | /* We need the result but without updating flags */ \ |
62 | scratch_fpst = *fpst; \ | 62 | scratch_fpst = *fpst; \ |
63 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) | 63 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) |
64 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | 64 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ |
65 | continue; \ | 65 | continue; \ |
66 | } \ | 66 | } \ |
67 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 67 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
68 | - &env->vfp.fp_status[FPST_STD]; \ | 68 | - &env->vfp.fp_status[FPST_STD]; \ |
69 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | 69 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
70 | if (!(mask & 1)) { \ | 70 | if (!(mask & 1)) { \ |
71 | /* We need the result but without updating flags */ \ | 71 | /* We need the result but without updating flags */ \ |
72 | scratch_fpst = *fpst; \ | 72 | scratch_fpst = *fpst; \ |
73 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) | 73 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) |
74 | unsigned e; \ | 74 | unsigned e; \ |
75 | TYPE *m = vm; \ | 75 | TYPE *m = vm; \ |
76 | TYPE ra = (TYPE)ra_in; \ | 76 | TYPE ra = (TYPE)ra_in; \ |
77 | - float_status *fpst = (ESIZE == 2) ? \ | 77 | - float_status *fpst = (ESIZE == 2) ? \ |
78 | - &env->vfp.fp_status[FPST_STD_F16] : \ | 78 | - &env->vfp.fp_status[FPST_STD_F16] : \ |
79 | - &env->vfp.fp_status[FPST_STD]; \ | 79 | - &env->vfp.fp_status[FPST_STD]; \ |
80 | + float_status *fpst = \ | 80 | + float_status *fpst = \ |
81 | + &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | 81 | + &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
82 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | 82 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ |
83 | if (mask & 1) { \ | 83 | if (mask & 1) { \ |
84 | TYPE v = m[H##ESIZE(e)]; \ | 84 | TYPE v = m[H##ESIZE(e)]; \ |
85 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | 85 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) |
86 | if ((mask & emask) == 0) { \ | 86 | if ((mask & emask) == 0) { \ |
87 | continue; \ | 87 | continue; \ |
88 | } \ | 88 | } \ |
89 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 89 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
90 | - &env->vfp.fp_status[FPST_STD]; \ | 90 | - &env->vfp.fp_status[FPST_STD]; \ |
91 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | 91 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
92 | if (!(mask & (1 << (e * ESIZE)))) { \ | 92 | if (!(mask & (1 << (e * ESIZE)))) { \ |
93 | /* We need the result but without updating flags */ \ | 93 | /* We need the result but without updating flags */ \ |
94 | scratch_fpst = *fpst; \ | 94 | scratch_fpst = *fpst; \ |
95 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | 95 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) |
96 | if ((mask & emask) == 0) { \ | 96 | if ((mask & emask) == 0) { \ |
97 | continue; \ | 97 | continue; \ |
98 | } \ | 98 | } \ |
99 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 99 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
100 | - &env->vfp.fp_status[FPST_STD]; \ | 100 | - &env->vfp.fp_status[FPST_STD]; \ |
101 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | 101 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
102 | if (!(mask & (1 << (e * ESIZE)))) { \ | 102 | if (!(mask & (1 << (e * ESIZE)))) { \ |
103 | /* We need the result but without updating flags */ \ | 103 | /* We need the result but without updating flags */ \ |
104 | scratch_fpst = *fpst; \ | 104 | scratch_fpst = *fpst; \ |
105 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) | 105 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) |
106 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | 106 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ |
107 | continue; \ | 107 | continue; \ |
108 | } \ | 108 | } \ |
109 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 109 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
110 | - &env->vfp.fp_status[FPST_STD]; \ | 110 | - &env->vfp.fp_status[FPST_STD]; \ |
111 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | 111 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
112 | if (!(mask & 1)) { \ | 112 | if (!(mask & 1)) { \ |
113 | /* We need the result but without updating flags */ \ | 113 | /* We need the result but without updating flags */ \ |
114 | scratch_fpst = *fpst; \ | 114 | scratch_fpst = *fpst; \ |
115 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) | 115 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) |
116 | unsigned e; \ | 116 | unsigned e; \ |
117 | float_status *fpst; \ | 117 | float_status *fpst; \ |
118 | float_status scratch_fpst; \ | 118 | float_status scratch_fpst; \ |
119 | - float_status *base_fpst = (ESIZE == 2) ? \ | 119 | - float_status *base_fpst = (ESIZE == 2) ? \ |
120 | - &env->vfp.fp_status[FPST_STD_F16] : \ | 120 | - &env->vfp.fp_status[FPST_STD_F16] : \ |
121 | - &env->vfp.fp_status[FPST_STD]; \ | 121 | - &env->vfp.fp_status[FPST_STD]; \ |
122 | + float_status *base_fpst = \ | 122 | + float_status *base_fpst = \ |
123 | + &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | 123 | + &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
124 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ | 124 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ |
125 | set_float_rounding_mode(rmode, base_fpst); \ | 125 | set_float_rounding_mode(rmode, base_fpst); \ |
126 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | 126 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ |
127 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) | 127 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) |
128 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | 128 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ |
129 | continue; \ | 129 | continue; \ |
130 | } \ | 130 | } \ |
131 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | 131 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
132 | - &env->vfp.fp_status[FPST_STD]; \ | 132 | - &env->vfp.fp_status[FPST_STD]; \ |
133 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | 133 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
134 | if (!(mask & 1)) { \ | 134 | if (!(mask & 1)) { \ |
135 | /* We need the result but without updating flags */ \ | 135 | /* We need the result but without updating flags */ \ |
136 | scratch_fpst = *fpst; \ | 136 | scratch_fpst = *fpst; \ |
137 | -- | 137 | -- |
138 | 2.43.0 | 138 | 2.43.0 | diff view generated by jsdifflib |
1 | Pass ARMFPStatusFlavour index instead of fp_status[FOO]. | 1 | Pass ARMFPStatusFlavour index instead of fp_status[FOO]. |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | --- | 4 | --- |
5 | target/arm/vfp_helper.c | 10 +++++----- | 5 | target/arm/vfp_helper.c | 10 +++++----- |
6 | 1 file changed, 5 insertions(+), 5 deletions(-) | 6 | 1 file changed, 5 insertions(+), 5 deletions(-) |
7 | 7 | ||
8 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | 8 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
9 | index XXXXXXX..XXXXXXX 100644 | 9 | index XXXXXXX..XXXXXXX 100644 |
10 | --- a/target/arm/vfp_helper.c | 10 | --- a/target/arm/vfp_helper.c |
11 | +++ b/target/arm/vfp_helper.c | 11 | +++ b/target/arm/vfp_helper.c |
12 | @@ -XXX,XX +XXX,XX @@ static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp) | 12 | @@ -XXX,XX +XXX,XX @@ static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp) |
13 | void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ | 13 | void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ |
14 | { \ | 14 | { \ |
15 | softfloat_to_vfp_compare(env, \ | 15 | softfloat_to_vfp_compare(env, \ |
16 | - FLOATTYPE ## _compare_quiet(a, b, &env->vfp.FPST)); \ | 16 | - FLOATTYPE ## _compare_quiet(a, b, &env->vfp.FPST)); \ |
17 | + FLOATTYPE ## _compare_quiet(a, b, &env->vfp.fp_status[FPST])); \ | 17 | + FLOATTYPE ## _compare_quiet(a, b, &env->vfp.fp_status[FPST])); \ |
18 | } \ | 18 | } \ |
19 | void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ | 19 | void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ |
20 | { \ | 20 | { \ |
21 | softfloat_to_vfp_compare(env, \ | 21 | softfloat_to_vfp_compare(env, \ |
22 | - FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ | 22 | - FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ |
23 | + FLOATTYPE ## _compare(a, b, &env->vfp.fp_status[FPST])); \ | 23 | + FLOATTYPE ## _compare(a, b, &env->vfp.fp_status[FPST])); \ |
24 | } | 24 | } |
25 | -DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) | 25 | -DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) |
26 | -DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32]) | 26 | -DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32]) |
27 | -DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32]) | 27 | -DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32]) |
28 | +DO_VFP_cmp(h, float16, dh_ctype_f16, FPST_A32_F16) | 28 | +DO_VFP_cmp(h, float16, dh_ctype_f16, FPST_A32_F16) |
29 | +DO_VFP_cmp(s, float32, float32, FPST_A32) | 29 | +DO_VFP_cmp(s, float32, float32, FPST_A32) |
30 | +DO_VFP_cmp(d, float64, float64, FPST_A32) | 30 | +DO_VFP_cmp(d, float64, float64, FPST_A32) |
31 | #undef DO_VFP_cmp | 31 | #undef DO_VFP_cmp |
32 | 32 | ||
33 | /* Integer to float and float to integer conversions */ | 33 | /* Integer to float and float to integer conversions */ |
34 | -- | 34 | -- |
35 | 2.43.0 | 35 | 2.43.0 | diff view generated by jsdifflib |
1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | --- | 2 | --- |
3 | target/arm/tcg/vec_internal.h | 20 ++++++++++++++++++++ | 3 | target/arm/tcg/vec_internal.h | 20 ++++++++++++++++++++ |
4 | target/arm/tcg/helper-a64.c | 15 +-------------- | 4 | target/arm/tcg/helper-a64.c | 15 +-------------- |
5 | 2 files changed, 21 insertions(+), 14 deletions(-) | 5 | 2 files changed, 21 insertions(+), 14 deletions(-) |
6 | 6 | ||
7 | diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h | 7 | diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h |
8 | index XXXXXXX..XXXXXXX 100644 | 8 | index XXXXXXX..XXXXXXX 100644 |
9 | --- a/target/arm/tcg/vec_internal.h | 9 | --- a/target/arm/tcg/vec_internal.h |
10 | +++ b/target/arm/tcg/vec_internal.h | 10 | +++ b/target/arm/tcg/vec_internal.h |
11 | @@ -XXX,XX +XXX,XX @@ | 11 | @@ -XXX,XX +XXX,XX @@ |
12 | #ifndef TARGET_ARM_VEC_INTERNAL_H | 12 | #ifndef TARGET_ARM_VEC_INTERNAL_H |
13 | #define TARGET_ARM_VEC_INTERNAL_H | 13 | #define TARGET_ARM_VEC_INTERNAL_H |
14 | 14 | ||
15 | +#include "fpu/softfloat.h" | 15 | +#include "fpu/softfloat.h" |
16 | + | 16 | + |
17 | /* | 17 | /* |
18 | * Note that vector data is stored in host-endian 64-bit chunks, | 18 | * Note that vector data is stored in host-endian 64-bit chunks, |
19 | * so addressing units smaller than that needs a host-endian fixup. | 19 | * so addressing units smaller than that needs a host-endian fixup. |
20 | @@ -XXX,XX +XXX,XX @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, | 20 | @@ -XXX,XX +XXX,XX @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, |
21 | */ | 21 | */ |
22 | bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp); | 22 | bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp); |
23 | 23 | ||
24 | +/* | 24 | +/* |
25 | + * Negate as for FPCR.AH=1 -- do not negate NaNs. | 25 | + * Negate as for FPCR.AH=1 -- do not negate NaNs. |
26 | + */ | 26 | + */ |
27 | +static inline float16 float16_ah_chs(float16 a) | 27 | +static inline float16 float16_ah_chs(float16 a) |
28 | +{ | 28 | +{ |
29 | + return float16_is_any_nan(a) ? a : float16_chs(a); | 29 | + return float16_is_any_nan(a) ? a : float16_chs(a); |
30 | +} | 30 | +} |
31 | + | 31 | + |
32 | +static inline float32 float32_ah_chs(float32 a) | 32 | +static inline float32 float32_ah_chs(float32 a) |
33 | +{ | 33 | +{ |
34 | + return float32_is_any_nan(a) ? a : float32_chs(a); | 34 | + return float32_is_any_nan(a) ? a : float32_chs(a); |
35 | +} | 35 | +} |
36 | + | 36 | + |
37 | +static inline float64 float64_ah_chs(float64 a) | 37 | +static inline float64 float64_ah_chs(float64 a) |
38 | +{ | 38 | +{ |
39 | + return float64_is_any_nan(a) ? a : float64_chs(a); | 39 | + return float64_is_any_nan(a) ? a : float64_chs(a); |
40 | +} | 40 | +} |
41 | + | 41 | + |
42 | #endif /* TARGET_ARM_VEC_INTERNAL_H */ | 42 | #endif /* TARGET_ARM_VEC_INTERNAL_H */ |
43 | diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c | 43 | diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c |
44 | index XXXXXXX..XXXXXXX 100644 | 44 | index XXXXXXX..XXXXXXX 100644 |
45 | --- a/target/arm/tcg/helper-a64.c | 45 | --- a/target/arm/tcg/helper-a64.c |
46 | +++ b/target/arm/tcg/helper-a64.c | 46 | +++ b/target/arm/tcg/helper-a64.c |
47 | @@ -XXX,XX +XXX,XX @@ | 47 | @@ -XXX,XX +XXX,XX @@ |
48 | #ifdef CONFIG_USER_ONLY | 48 | #ifdef CONFIG_USER_ONLY |
49 | #include "user/page-protection.h" | 49 | #include "user/page-protection.h" |
50 | #endif | 50 | #endif |
51 | +#include "vec_internal.h" | 51 | +#include "vec_internal.h" |
52 | 52 | ||
53 | /* C2.4.7 Multiply and divide */ | 53 | /* C2.4.7 Multiply and divide */ |
54 | /* special cases for 0 and LLONG_MIN are mandated by the standard */ | 54 | /* special cases for 0 and LLONG_MIN are mandated by the standard */ |
55 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst) | 55 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst) |
56 | return -float64_lt(b, a, fpst); | 56 | return -float64_lt(b, a, fpst); |
57 | } | 57 | } |
58 | 58 | ||
59 | -static float16 float16_ah_chs(float16 a) | 59 | -static float16 float16_ah_chs(float16 a) |
60 | -{ | 60 | -{ |
61 | - return float16_is_any_nan(a) ? a : float16_chs(a); | 61 | - return float16_is_any_nan(a) ? a : float16_chs(a); |
62 | -} | 62 | -} |
63 | - | 63 | - |
64 | -static float32 float32_ah_chs(float32 a) | 64 | -static float32 float32_ah_chs(float32 a) |
65 | -{ | 65 | -{ |
66 | - return float32_is_any_nan(a) ? a : float32_chs(a); | 66 | - return float32_is_any_nan(a) ? a : float32_chs(a); |
67 | -} | 67 | -} |
68 | - | 68 | - |
69 | -static float64 float64_ah_chs(float64 a) | 69 | -static float64 float64_ah_chs(float64 a) |
70 | -{ | 70 | -{ |
71 | - return float64_is_any_nan(a) ? a : float64_chs(a); | 71 | - return float64_is_any_nan(a) ? a : float64_chs(a); |
72 | -} | 72 | -} |
73 | /* | 73 | /* |
74 | * Reciprocal step and sqrt step. Note that unlike the A32/T32 | 74 | * Reciprocal step and sqrt step. Note that unlike the A32/T32 |
75 | * versions, these do a fully fused multiply-add or | 75 | * versions, these do a fully fused multiply-add or |
76 | -- | 76 | -- |
77 | 2.43.0 | 77 | 2.43.0 | diff view generated by jsdifflib |
1 | Add versions of float*_ah_chs which takes fpcr_ah. | 1 | Add versions of float*_ah_chs which takes fpcr_ah. |
---|---|---|---|
2 | These will help simplify some usages. | 2 | These will help simplify some usages. |
3 | 3 | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
5 | --- | 5 | --- |
6 | target/arm/tcg/vec_internal.h | 15 +++++++++++++++ | 6 | target/arm/tcg/vec_internal.h | 15 +++++++++++++++ |
7 | 1 file changed, 15 insertions(+) | 7 | 1 file changed, 15 insertions(+) |
8 | 8 | ||
9 | diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h | 9 | diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h |
10 | index XXXXXXX..XXXXXXX 100644 | 10 | index XXXXXXX..XXXXXXX 100644 |
11 | --- a/target/arm/tcg/vec_internal.h | 11 | --- a/target/arm/tcg/vec_internal.h |
12 | +++ b/target/arm/tcg/vec_internal.h | 12 | +++ b/target/arm/tcg/vec_internal.h |
13 | @@ -XXX,XX +XXX,XX @@ static inline float64 float64_ah_chs(float64 a) | 13 | @@ -XXX,XX +XXX,XX @@ static inline float64 float64_ah_chs(float64 a) |
14 | return float64_is_any_nan(a) ? a : float64_chs(a); | 14 | return float64_is_any_nan(a) ? a : float64_chs(a); |
15 | } | 15 | } |
16 | 16 | ||
17 | +static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah) | 17 | +static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah) |
18 | +{ | 18 | +{ |
19 | + return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a); | 19 | + return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a); |
20 | +} | 20 | +} |
21 | + | 21 | + |
22 | +static inline float32 float32_maybe_ah_chs(float32 a, bool fpcr_ah) | 22 | +static inline float32 float32_maybe_ah_chs(float32 a, bool fpcr_ah) |
23 | +{ | 23 | +{ |
24 | + return fpcr_ah && float32_is_any_nan(a) ? a : float32_chs(a); | 24 | + return fpcr_ah && float32_is_any_nan(a) ? a : float32_chs(a); |
25 | +} | 25 | +} |
26 | + | 26 | + |
27 | +static inline float64 float64_maybe_ah_chs(float64 a, bool fpcr_ah) | 27 | +static inline float64 float64_maybe_ah_chs(float64 a, bool fpcr_ah) |
28 | +{ | 28 | +{ |
29 | + return fpcr_ah && float64_is_any_nan(a) ? a : float64_chs(a); | 29 | + return fpcr_ah && float64_is_any_nan(a) ? a : float64_chs(a); |
30 | +} | 30 | +} |
31 | + | 31 | + |
32 | #endif /* TARGET_ARM_VEC_INTERNAL_H */ | 32 | #endif /* TARGET_ARM_VEC_INTERNAL_H */ |
33 | -- | 33 | -- |
34 | 2.43.0 | 34 | 2.43.0 | diff view generated by jsdifflib |
1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | --- | 2 | --- |
3 | target/arm/tcg/sve_helper.c | 12 ++++++------ | 3 | target/arm/tcg/sve_helper.c | 12 ++++++------ |
4 | 1 file changed, 6 insertions(+), 6 deletions(-) | 4 | 1 file changed, 6 insertions(+), 6 deletions(-) |
5 | 5 | ||
6 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | 6 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c |
7 | index XXXXXXX..XXXXXXX 100644 | 7 | index XXXXXXX..XXXXXXX 100644 |
8 | --- a/target/arm/tcg/sve_helper.c | 8 | --- a/target/arm/tcg/sve_helper.c |
9 | +++ b/target/arm/tcg/sve_helper.c | 9 | +++ b/target/arm/tcg/sve_helper.c |
10 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc) | 10 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc) |
11 | if (mm & 1) { | 11 | if (mm & 1) { |
12 | nn = float16_one; | 12 | nn = float16_one; |
13 | } | 13 | } |
14 | - if ((mm & 2) && !(fpcr_ah && float16_is_any_nan(nn))) { | 14 | - if ((mm & 2) && !(fpcr_ah && float16_is_any_nan(nn))) { |
15 | - nn ^= (1 << 15); | 15 | - nn ^= (1 << 15); |
16 | + if (mm & 2) { | 16 | + if (mm & 2) { |
17 | + nn = float16_maybe_ah_chs(nn, fpcr_ah); | 17 | + nn = float16_maybe_ah_chs(nn, fpcr_ah); |
18 | } | 18 | } |
19 | d[i] = nn; | 19 | d[i] = nn; |
20 | } | 20 | } |
21 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc) | 21 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc) |
22 | if (mm & 1) { | 22 | if (mm & 1) { |
23 | nn = float32_one; | 23 | nn = float32_one; |
24 | } | 24 | } |
25 | - if ((mm & 2) && !(fpcr_ah && float32_is_any_nan(nn))) { | 25 | - if ((mm & 2) && !(fpcr_ah && float32_is_any_nan(nn))) { |
26 | - nn ^= (1U << 31); | 26 | - nn ^= (1U << 31); |
27 | + if (mm & 2) { | 27 | + if (mm & 2) { |
28 | + nn = float32_maybe_ah_chs(nn, fpcr_ah); | 28 | + nn = float32_maybe_ah_chs(nn, fpcr_ah); |
29 | } | 29 | } |
30 | d[i] = nn; | 30 | d[i] = nn; |
31 | } | 31 | } |
32 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc) | 32 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc) |
33 | if (mm & 1) { | 33 | if (mm & 1) { |
34 | nn = float64_one; | 34 | nn = float64_one; |
35 | } | 35 | } |
36 | - if ((mm & 2) && !(fpcr_ah && float64_is_any_nan(nn))) { | 36 | - if ((mm & 2) && !(fpcr_ah && float64_is_any_nan(nn))) { |
37 | - nn ^= (1ULL << 63); | 37 | - nn ^= (1ULL << 63); |
38 | + if (mm & 2) { | 38 | + if (mm & 2) { |
39 | + nn = float64_maybe_ah_chs(nn, fpcr_ah); | 39 | + nn = float64_maybe_ah_chs(nn, fpcr_ah); |
40 | } | 40 | } |
41 | d[i] = nn; | 41 | d[i] = nn; |
42 | } | 42 | } |
43 | -- | 43 | -- |
44 | 2.43.0 | 44 | 2.43.0 | diff view generated by jsdifflib |
1 | The construction of neg_imag and neg_real were done to make it easy | 1 | The construction of neg_imag and neg_real were done to make it easy |
---|---|---|---|
2 | to apply both in parallel with two simple logical operations. This | 2 | to apply both in parallel with two simple logical operations. This |
3 | changed with FPCR.AH, which is more complex than that. | 3 | changed with FPCR.AH, which is more complex than that. |
4 | 4 | ||
5 | Note that there was a naming issue with neg_imag and neg_real. | 5 | Note that there was a naming issue with neg_imag and neg_real. |
6 | They were named backward, with neg_imag being non-zero for rot=1, | 6 | They were named backward, with neg_imag being non-zero for rot=1, |
7 | and vice versa. This was combined with reversed usage within the | 7 | and vice versa. This was combined with reversed usage within the |
8 | loop, so that the negation in the end turned out correct. | 8 | loop, so that the negation in the end turned out correct. |
9 | 9 | ||
10 | Using the rot variable introduced with fpcr_ah, it's easier to | 10 | Using the rot variable introduced with fpcr_ah, it's easier to |
11 | match the pseudocode for the instruction. | 11 | match the pseudocode for the instruction. |
12 | 12 | ||
13 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 13 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
14 | --- | 14 | --- |
15 | target/arm/tcg/sve_helper.c | 33 ++++++++++++--------------------- | 15 | target/arm/tcg/sve_helper.c | 33 ++++++++++++--------------------- |
16 | 1 file changed, 12 insertions(+), 21 deletions(-) | 16 | 1 file changed, 12 insertions(+), 21 deletions(-) |
17 | 17 | ||
18 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | 18 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c |
19 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/target/arm/tcg/sve_helper.c | 20 | --- a/target/arm/tcg/sve_helper.c |
21 | +++ b/target/arm/tcg/sve_helper.c | 21 | +++ b/target/arm/tcg/sve_helper.c |
22 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, | 22 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, |
23 | uint64_t *g = vg; | 23 | uint64_t *g = vg; |
24 | bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | 24 | bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); |
25 | bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | 25 | bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
26 | - float16 neg_imag = float16_set_sign(0, rot); | 26 | - float16 neg_imag = float16_set_sign(0, rot); |
27 | - float16 neg_real = float16_chs(neg_imag); | 27 | - float16 neg_real = float16_chs(neg_imag); |
28 | 28 | ||
29 | do { | 29 | do { |
30 | uint64_t pg = g[(i - 1) >> 6]; | 30 | uint64_t pg = g[(i - 1) >> 6]; |
31 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, | 31 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, |
32 | e2 = *(float16 *)(vn + H1_2(j)); | 32 | e2 = *(float16 *)(vn + H1_2(j)); |
33 | e3 = *(float16 *)(vm + H1_2(i)); | 33 | e3 = *(float16 *)(vm + H1_2(i)); |
34 | 34 | ||
35 | - if (neg_real && !(fpcr_ah && float16_is_any_nan(e1))) { | 35 | - if (neg_real && !(fpcr_ah && float16_is_any_nan(e1))) { |
36 | - e1 ^= neg_real; | 36 | - e1 ^= neg_real; |
37 | - } | 37 | - } |
38 | - if (neg_imag && !(fpcr_ah && float16_is_any_nan(e3))) { | 38 | - if (neg_imag && !(fpcr_ah && float16_is_any_nan(e3))) { |
39 | - e3 ^= neg_imag; | 39 | - e3 ^= neg_imag; |
40 | + if (rot) { | 40 | + if (rot) { |
41 | + e3 = float16_maybe_ah_chs(e3, fpcr_ah); | 41 | + e3 = float16_maybe_ah_chs(e3, fpcr_ah); |
42 | + } else { | 42 | + } else { |
43 | + e1 = float16_maybe_ah_chs(e1, fpcr_ah); | 43 | + e1 = float16_maybe_ah_chs(e1, fpcr_ah); |
44 | } | 44 | } |
45 | 45 | ||
46 | if (likely((pg >> (i & 63)) & 1)) { | 46 | if (likely((pg >> (i & 63)) & 1)) { |
47 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, | 47 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, |
48 | uint64_t *g = vg; | 48 | uint64_t *g = vg; |
49 | bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | 49 | bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); |
50 | bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | 50 | bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
51 | - float32 neg_imag = float32_set_sign(0, rot); | 51 | - float32 neg_imag = float32_set_sign(0, rot); |
52 | - float32 neg_real = float32_chs(neg_imag); | 52 | - float32 neg_real = float32_chs(neg_imag); |
53 | 53 | ||
54 | do { | 54 | do { |
55 | uint64_t pg = g[(i - 1) >> 6]; | 55 | uint64_t pg = g[(i - 1) >> 6]; |
56 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, | 56 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, |
57 | e2 = *(float32 *)(vn + H1_2(j)); | 57 | e2 = *(float32 *)(vn + H1_2(j)); |
58 | e3 = *(float32 *)(vm + H1_2(i)); | 58 | e3 = *(float32 *)(vm + H1_2(i)); |
59 | 59 | ||
60 | - if (neg_real && !(fpcr_ah && float32_is_any_nan(e1))) { | 60 | - if (neg_real && !(fpcr_ah && float32_is_any_nan(e1))) { |
61 | - e1 ^= neg_real; | 61 | - e1 ^= neg_real; |
62 | - } | 62 | - } |
63 | - if (neg_imag && !(fpcr_ah && float32_is_any_nan(e3))) { | 63 | - if (neg_imag && !(fpcr_ah && float32_is_any_nan(e3))) { |
64 | - e3 ^= neg_imag; | 64 | - e3 ^= neg_imag; |
65 | + if (rot) { | 65 | + if (rot) { |
66 | + e3 = float32_maybe_ah_chs(e3, fpcr_ah); | 66 | + e3 = float32_maybe_ah_chs(e3, fpcr_ah); |
67 | + } else { | 67 | + } else { |
68 | + e1 = float32_maybe_ah_chs(e1, fpcr_ah); | 68 | + e1 = float32_maybe_ah_chs(e1, fpcr_ah); |
69 | } | 69 | } |
70 | 70 | ||
71 | if (likely((pg >> (i & 63)) & 1)) { | 71 | if (likely((pg >> (i & 63)) & 1)) { |
72 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, | 72 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, |
73 | uint64_t *g = vg; | 73 | uint64_t *g = vg; |
74 | bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | 74 | bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); |
75 | bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | 75 | bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
76 | - float64 neg_imag = float64_set_sign(0, rot); | 76 | - float64 neg_imag = float64_set_sign(0, rot); |
77 | - float64 neg_real = float64_chs(neg_imag); | 77 | - float64 neg_real = float64_chs(neg_imag); |
78 | 78 | ||
79 | do { | 79 | do { |
80 | uint64_t pg = g[(i - 1) >> 6]; | 80 | uint64_t pg = g[(i - 1) >> 6]; |
81 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, | 81 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, |
82 | e2 = *(float64 *)(vn + H1_2(j)); | 82 | e2 = *(float64 *)(vn + H1_2(j)); |
83 | e3 = *(float64 *)(vm + H1_2(i)); | 83 | e3 = *(float64 *)(vm + H1_2(i)); |
84 | 84 | ||
85 | - if (neg_real && !(fpcr_ah && float64_is_any_nan(e1))) { | 85 | - if (neg_real && !(fpcr_ah && float64_is_any_nan(e1))) { |
86 | - e1 ^= neg_real; | 86 | - e1 ^= neg_real; |
87 | - } | 87 | - } |
88 | - if (neg_imag && !(fpcr_ah && float64_is_any_nan(e3))) { | 88 | - if (neg_imag && !(fpcr_ah && float64_is_any_nan(e3))) { |
89 | - e3 ^= neg_imag; | 89 | - e3 ^= neg_imag; |
90 | + if (rot) { | 90 | + if (rot) { |
91 | + e3 = float64_maybe_ah_chs(e3, fpcr_ah); | 91 | + e3 = float64_maybe_ah_chs(e3, fpcr_ah); |
92 | + } else { | 92 | + } else { |
93 | + e1 = float64_maybe_ah_chs(e1, fpcr_ah); | 93 | + e1 = float64_maybe_ah_chs(e1, fpcr_ah); |
94 | } | 94 | } |
95 | 95 | ||
96 | if (likely((pg >> (i & 63)) & 1)) { | 96 | if (likely((pg >> (i & 63)) & 1)) { |
97 | -- | 97 | -- |
98 | 2.43.0 | 98 | 2.43.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The construction of neg_imag and neg_real were done to make it easy | ||
2 | to apply both in parallel with two simple logical operations. This | ||
3 | changed with FPCR.AH, which is more complex than that. | ||
1 | 4 | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/vec_helper.c | 51 +++++++++++-------------------------- | ||
8 | 1 file changed, 15 insertions(+), 36 deletions(-) | ||
9 | |||
10 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/target/arm/tcg/vec_helper.c | ||
13 | +++ b/target/arm/tcg/vec_helper.c | ||
14 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm, | ||
15 | float16 *d = vd; | ||
16 | float16 *n = vn; | ||
17 | float16 *m = vm; | ||
18 | - uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
19 | - uint32_t neg_imag = neg_real ^ 1; | ||
20 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
21 | bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); | ||
22 | uintptr_t i; | ||
23 | |||
24 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
25 | - neg_real <<= 15; | ||
26 | - neg_imag <<= 15; | ||
27 | - | ||
28 | for (i = 0; i < opr_sz / 2; i += 2) { | ||
29 | float16 e0 = n[H2(i)]; | ||
30 | float16 e1 = m[H2(i + 1)]; | ||
31 | float16 e2 = n[H2(i + 1)]; | ||
32 | float16 e3 = m[H2(i)]; | ||
33 | |||
34 | - /* FPNeg() mustn't flip sign of a NaN if FPCR.AH == 1 */ | ||
35 | - if (!(fpcr_ah && float16_is_any_nan(e1))) { | ||
36 | - e1 ^= neg_imag; | ||
37 | - } | ||
38 | - if (!(fpcr_ah && float16_is_any_nan(e3))) { | ||
39 | - e3 ^= neg_real; | ||
40 | + if (rot) { | ||
41 | + e3 = float16_maybe_ah_chs(e3, fpcr_ah); | ||
42 | + } else { | ||
43 | + e1 = float16_maybe_ah_chs(e1, fpcr_ah); | ||
44 | } | ||
45 | |||
46 | d[H2(i)] = float16_add(e0, e1, fpst); | ||
47 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcadds)(void *vd, void *vn, void *vm, | ||
48 | float32 *d = vd; | ||
49 | float32 *n = vn; | ||
50 | float32 *m = vm; | ||
51 | - uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
52 | - uint32_t neg_imag = neg_real ^ 1; | ||
53 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
54 | bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); | ||
55 | uintptr_t i; | ||
56 | |||
57 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
58 | - neg_real <<= 31; | ||
59 | - neg_imag <<= 31; | ||
60 | - | ||
61 | for (i = 0; i < opr_sz / 4; i += 2) { | ||
62 | float32 e0 = n[H4(i)]; | ||
63 | float32 e1 = m[H4(i + 1)]; | ||
64 | float32 e2 = n[H4(i + 1)]; | ||
65 | float32 e3 = m[H4(i)]; | ||
66 | |||
67 | - /* FPNeg() mustn't flip sign of a NaN if FPCR.AH == 1 */ | ||
68 | - if (!(fpcr_ah && float32_is_any_nan(e1))) { | ||
69 | - e1 ^= neg_imag; | ||
70 | - } | ||
71 | - if (!(fpcr_ah && float32_is_any_nan(e3))) { | ||
72 | - e3 ^= neg_real; | ||
73 | + if (rot) { | ||
74 | + e3 = float32_maybe_ah_chs(e3, fpcr_ah); | ||
75 | + } else { | ||
76 | + e1 = float32_maybe_ah_chs(e1, fpcr_ah); | ||
77 | } | ||
78 | |||
79 | d[H4(i)] = float32_add(e0, e1, fpst); | ||
80 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcaddd)(void *vd, void *vn, void *vm, | ||
81 | float64 *d = vd; | ||
82 | float64 *n = vn; | ||
83 | float64 *m = vm; | ||
84 | - uint64_t neg_real = extract64(desc, SIMD_DATA_SHIFT, 1); | ||
85 | - uint64_t neg_imag = neg_real ^ 1; | ||
86 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
87 | bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); | ||
88 | uintptr_t i; | ||
89 | |||
90 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
91 | - neg_real <<= 63; | ||
92 | - neg_imag <<= 63; | ||
93 | - | ||
94 | for (i = 0; i < opr_sz / 8; i += 2) { | ||
95 | float64 e0 = n[i]; | ||
96 | float64 e1 = m[i + 1]; | ||
97 | float64 e2 = n[i + 1]; | ||
98 | float64 e3 = m[i]; | ||
99 | |||
100 | - /* FPNeg() mustn't flip sign of a NaN if FPCR.AH == 1 */ | ||
101 | - if (!(fpcr_ah && float64_is_any_nan(e1))) { | ||
102 | - e1 ^= neg_imag; | ||
103 | - } | ||
104 | - if (!(fpcr_ah && float64_is_any_nan(e3))) { | ||
105 | - e3 ^= neg_real; | ||
106 | + if (rot) { | ||
107 | + e3 = float64_maybe_ah_chs(e3, fpcr_ah); | ||
108 | + } else { | ||
109 | + e1 = float64_maybe_ah_chs(e1, fpcr_ah); | ||
110 | } | ||
111 | |||
112 | d[i] = float64_add(e0, e1, fpst); | ||
113 | -- | ||
114 | 2.43.0 | diff view generated by jsdifflib |
1 | The float*_muladd functions have a flags argument that can | 1 | The float*_muladd functions have a flags argument that can |
---|---|---|---|
2 | perform optional negation of various operand. We don't use | 2 | perform optional negation of various operand. We don't use |
3 | that for "normal" arm fmla, because the muladd flags are not | 3 | that for "normal" arm fmla, because the muladd flags are not |
4 | applied when an input is a NaN. But since FEAT_AFP does not | 4 | applied when an input is a NaN. But since FEAT_AFP does not |
5 | negate NaNs, this behaviour is exactly what we need. | 5 | negate NaNs, this behaviour is exactly what we need. |
6 | 6 | ||
7 | Since we have separate helper entry points for the various | 7 | Since we have separate helper entry points for the various |
8 | fmla, fmls, fnmla, fnmls instructions, it's easy to just | 8 | fmla, fmls, fnmla, fnmls instructions, it's easy to just |
9 | pass down the exact values required so that no conditional | 9 | pass down the exact values required so that no conditional |
10 | branch is required within the inner loop. | 10 | branch is required within the inner loop. |
11 | 11 | ||
12 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 12 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
13 | --- | 13 | --- |
14 | target/arm/tcg/sve_helper.c | 93 +++++++++++++++++-------------------- | 14 | target/arm/tcg/sve_helper.c | 93 +++++++++++++++++-------------------- |
15 | 1 file changed, 42 insertions(+), 51 deletions(-) | 15 | 1 file changed, 42 insertions(+), 51 deletions(-) |
16 | 16 | ||
17 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | 17 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c |
18 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/target/arm/tcg/sve_helper.c | 19 | --- a/target/arm/tcg/sve_helper.c |
20 | +++ b/target/arm/tcg/sve_helper.c | 20 | +++ b/target/arm/tcg/sve_helper.c |
21 | @@ -XXX,XX +XXX,XX @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int) | 21 | @@ -XXX,XX +XXX,XX @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int) |
22 | 22 | ||
23 | static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, | 23 | static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, |
24 | float_status *status, uint32_t desc, | 24 | float_status *status, uint32_t desc, |
25 | - uint16_t neg1, uint16_t neg3, bool fpcr_ah) | 25 | - uint16_t neg1, uint16_t neg3, bool fpcr_ah) |
26 | + uint16_t neg1, uint16_t neg3, int flags) | 26 | + uint16_t neg1, uint16_t neg3, int flags) |
27 | { | 27 | { |
28 | intptr_t i = simd_oprsz(desc); | 28 | intptr_t i = simd_oprsz(desc); |
29 | uint64_t *g = vg; | 29 | uint64_t *g = vg; |
30 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, | 30 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, |
31 | if (likely((pg >> (i & 63)) & 1)) { | 31 | if (likely((pg >> (i & 63)) & 1)) { |
32 | float16 e1, e2, e3, r; | 32 | float16 e1, e2, e3, r; |
33 | 33 | ||
34 | - e1 = *(uint16_t *)(vn + H1_2(i)); | 34 | - e1 = *(uint16_t *)(vn + H1_2(i)); |
35 | + e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1; | 35 | + e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1; |
36 | e2 = *(uint16_t *)(vm + H1_2(i)); | 36 | e2 = *(uint16_t *)(vm + H1_2(i)); |
37 | - e3 = *(uint16_t *)(va + H1_2(i)); | 37 | - e3 = *(uint16_t *)(va + H1_2(i)); |
38 | - if (neg1 && !(fpcr_ah && float16_is_any_nan(e1))) { | 38 | - if (neg1 && !(fpcr_ah && float16_is_any_nan(e1))) { |
39 | - e1 ^= neg1; | 39 | - e1 ^= neg1; |
40 | - } | 40 | - } |
41 | - if (neg3 && !(fpcr_ah && float16_is_any_nan(e3))) { | 41 | - if (neg3 && !(fpcr_ah && float16_is_any_nan(e3))) { |
42 | - e3 ^= neg3; | 42 | - e3 ^= neg3; |
43 | - } | 43 | - } |
44 | - r = float16_muladd(e1, e2, e3, 0, status); | 44 | - r = float16_muladd(e1, e2, e3, 0, status); |
45 | + e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3; | 45 | + e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3; |
46 | + r = float16_muladd(e1, e2, e3, flags, status); | 46 | + r = float16_muladd(e1, e2, e3, flags, status); |
47 | *(uint16_t *)(vd + H1_2(i)) = r; | 47 | *(uint16_t *)(vd + H1_2(i)) = r; |
48 | } | 48 | } |
49 | } while (i & 63); | 49 | } while (i & 63); |
50 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, | 50 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, |
51 | void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | 51 | void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, |
52 | void *vg, float_status *status, uint32_t desc) | 52 | void *vg, float_status *status, uint32_t desc) |
53 | { | 53 | { |
54 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, false); | 54 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, false); |
55 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, 0); | 55 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, 0); |
56 | } | 56 | } |
57 | 57 | ||
58 | void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | 58 | void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, |
59 | void *vg, float_status *status, uint32_t desc) | 59 | void *vg, float_status *status, uint32_t desc) |
60 | { | 60 | { |
61 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, false); | 61 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, false); |
62 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0); | 62 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0); |
63 | } | 63 | } |
64 | 64 | ||
65 | void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | 65 | void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, |
66 | void *vg, float_status *status, uint32_t desc) | 66 | void *vg, float_status *status, uint32_t desc) |
67 | { | 67 | { |
68 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, false); | 68 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, false); |
69 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0); | 69 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0); |
70 | } | 70 | } |
71 | 71 | ||
72 | void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | 72 | void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, |
73 | void *vg, float_status *status, uint32_t desc) | 73 | void *vg, float_status *status, uint32_t desc) |
74 | { | 74 | { |
75 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, false); | 75 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, false); |
76 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0); | 76 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0); |
77 | } | 77 | } |
78 | 78 | ||
79 | void HELPER(sve_ah_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | 79 | void HELPER(sve_ah_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, |
80 | void *vg, float_status *status, uint32_t desc) | 80 | void *vg, float_status *status, uint32_t desc) |
81 | { | 81 | { |
82 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, true); | 82 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, true); |
83 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, | 83 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, |
84 | + float_muladd_negate_product); | 84 | + float_muladd_negate_product); |
85 | } | 85 | } |
86 | 86 | ||
87 | void HELPER(sve_ah_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | 87 | void HELPER(sve_ah_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, |
88 | void *vg, float_status *status, uint32_t desc) | 88 | void *vg, float_status *status, uint32_t desc) |
89 | { | 89 | { |
90 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, true); | 90 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, true); |
91 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, | 91 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, |
92 | + float_muladd_negate_product | float_muladd_negate_c); | 92 | + float_muladd_negate_product | float_muladd_negate_c); |
93 | } | 93 | } |
94 | 94 | ||
95 | void HELPER(sve_ah_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | 95 | void HELPER(sve_ah_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, |
96 | void *vg, float_status *status, uint32_t desc) | 96 | void *vg, float_status *status, uint32_t desc) |
97 | { | 97 | { |
98 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, true); | 98 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, true); |
99 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, | 99 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, |
100 | + float_muladd_negate_c); | 100 | + float_muladd_negate_c); |
101 | } | 101 | } |
102 | 102 | ||
103 | static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, | 103 | static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, |
104 | float_status *status, uint32_t desc, | 104 | float_status *status, uint32_t desc, |
105 | - uint32_t neg1, uint32_t neg3, bool fpcr_ah) | 105 | - uint32_t neg1, uint32_t neg3, bool fpcr_ah) |
106 | + uint32_t neg1, uint32_t neg3, int flags) | 106 | + uint32_t neg1, uint32_t neg3, int flags) |
107 | { | 107 | { |
108 | intptr_t i = simd_oprsz(desc); | 108 | intptr_t i = simd_oprsz(desc); |
109 | uint64_t *g = vg; | 109 | uint64_t *g = vg; |
110 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, | 110 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, |
111 | if (likely((pg >> (i & 63)) & 1)) { | 111 | if (likely((pg >> (i & 63)) & 1)) { |
112 | float32 e1, e2, e3, r; | 112 | float32 e1, e2, e3, r; |
113 | 113 | ||
114 | - e1 = *(uint32_t *)(vn + H1_4(i)); | 114 | - e1 = *(uint32_t *)(vn + H1_4(i)); |
115 | + e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1; | 115 | + e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1; |
116 | e2 = *(uint32_t *)(vm + H1_4(i)); | 116 | e2 = *(uint32_t *)(vm + H1_4(i)); |
117 | - e3 = *(uint32_t *)(va + H1_4(i)); | 117 | - e3 = *(uint32_t *)(va + H1_4(i)); |
118 | - if (neg1 && !(fpcr_ah && float32_is_any_nan(e1))) { | 118 | - if (neg1 && !(fpcr_ah && float32_is_any_nan(e1))) { |
119 | - e1 ^= neg1; | 119 | - e1 ^= neg1; |
120 | - } | 120 | - } |
121 | - if (neg3 && !(fpcr_ah && float32_is_any_nan(e3))) { | 121 | - if (neg3 && !(fpcr_ah && float32_is_any_nan(e3))) { |
122 | - e3 ^= neg3; | 122 | - e3 ^= neg3; |
123 | - } | 123 | - } |
124 | - r = float32_muladd(e1, e2, e3, 0, status); | 124 | - r = float32_muladd(e1, e2, e3, 0, status); |
125 | + e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3; | 125 | + e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3; |
126 | + r = float32_muladd(e1, e2, e3, flags, status); | 126 | + r = float32_muladd(e1, e2, e3, flags, status); |
127 | *(uint32_t *)(vd + H1_4(i)) = r; | 127 | *(uint32_t *)(vd + H1_4(i)) = r; |
128 | } | 128 | } |
129 | } while (i & 63); | 129 | } while (i & 63); |
130 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, | 130 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, |
131 | void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | 131 | void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, |
132 | void *vg, float_status *status, uint32_t desc) | 132 | void *vg, float_status *status, uint32_t desc) |
133 | { | 133 | { |
134 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, false); | 134 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, false); |
135 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, 0); | 135 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, 0); |
136 | } | 136 | } |
137 | 137 | ||
138 | void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | 138 | void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, |
139 | void *vg, float_status *status, uint32_t desc) | 139 | void *vg, float_status *status, uint32_t desc) |
140 | { | 140 | { |
141 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, false); | 141 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, false); |
142 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, 0); | 142 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, 0); |
143 | } | 143 | } |
144 | 144 | ||
145 | void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | 145 | void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, |
146 | void *vg, float_status *status, uint32_t desc) | 146 | void *vg, float_status *status, uint32_t desc) |
147 | { | 147 | { |
148 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, false); | 148 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, false); |
149 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, 0); | 149 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, 0); |
150 | } | 150 | } |
151 | 151 | ||
152 | void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | 152 | void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, |
153 | void *vg, float_status *status, uint32_t desc) | 153 | void *vg, float_status *status, uint32_t desc) |
154 | { | 154 | { |
155 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, false); | 155 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, false); |
156 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, 0); | 156 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, 0); |
157 | } | 157 | } |
158 | 158 | ||
159 | void HELPER(sve_ah_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | 159 | void HELPER(sve_ah_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, |
160 | void *vg, float_status *status, uint32_t desc) | 160 | void *vg, float_status *status, uint32_t desc) |
161 | { | 161 | { |
162 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, true); | 162 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, true); |
163 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, | 163 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, |
164 | + float_muladd_negate_product); | 164 | + float_muladd_negate_product); |
165 | } | 165 | } |
166 | 166 | ||
167 | void HELPER(sve_ah_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | 167 | void HELPER(sve_ah_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, |
168 | void *vg, float_status *status, uint32_t desc) | 168 | void *vg, float_status *status, uint32_t desc) |
169 | { | 169 | { |
170 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, true); | 170 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, true); |
171 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, | 171 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, |
172 | + float_muladd_negate_product | float_muladd_negate_c); | 172 | + float_muladd_negate_product | float_muladd_negate_c); |
173 | } | 173 | } |
174 | 174 | ||
175 | void HELPER(sve_ah_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | 175 | void HELPER(sve_ah_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, |
176 | void *vg, float_status *status, uint32_t desc) | 176 | void *vg, float_status *status, uint32_t desc) |
177 | { | 177 | { |
178 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, true); | 178 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, true); |
179 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, | 179 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, |
180 | + float_muladd_negate_c); | 180 | + float_muladd_negate_c); |
181 | } | 181 | } |
182 | 182 | ||
183 | static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, | 183 | static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, |
184 | float_status *status, uint32_t desc, | 184 | float_status *status, uint32_t desc, |
185 | - uint64_t neg1, uint64_t neg3, bool fpcr_ah) | 185 | - uint64_t neg1, uint64_t neg3, bool fpcr_ah) |
186 | + uint64_t neg1, uint64_t neg3, int flags) | 186 | + uint64_t neg1, uint64_t neg3, int flags) |
187 | { | 187 | { |
188 | intptr_t i = simd_oprsz(desc); | 188 | intptr_t i = simd_oprsz(desc); |
189 | uint64_t *g = vg; | 189 | uint64_t *g = vg; |
190 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, | 190 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, |
191 | if (likely((pg >> (i & 63)) & 1)) { | 191 | if (likely((pg >> (i & 63)) & 1)) { |
192 | float64 e1, e2, e3, r; | 192 | float64 e1, e2, e3, r; |
193 | 193 | ||
194 | - e1 = *(uint64_t *)(vn + i); | 194 | - e1 = *(uint64_t *)(vn + i); |
195 | + e1 = *(uint64_t *)(vn + i) ^ neg1; | 195 | + e1 = *(uint64_t *)(vn + i) ^ neg1; |
196 | e2 = *(uint64_t *)(vm + i); | 196 | e2 = *(uint64_t *)(vm + i); |
197 | - e3 = *(uint64_t *)(va + i); | 197 | - e3 = *(uint64_t *)(va + i); |
198 | - if (neg1 && !(fpcr_ah && float64_is_any_nan(e1))) { | 198 | - if (neg1 && !(fpcr_ah && float64_is_any_nan(e1))) { |
199 | - e1 ^= neg1; | 199 | - e1 ^= neg1; |
200 | - } | 200 | - } |
201 | - if (neg3 && !(fpcr_ah && float64_is_any_nan(e3))) { | 201 | - if (neg3 && !(fpcr_ah && float64_is_any_nan(e3))) { |
202 | - e3 ^= neg3; | 202 | - e3 ^= neg3; |
203 | - } | 203 | - } |
204 | - r = float64_muladd(e1, e2, e3, 0, status); | 204 | - r = float64_muladd(e1, e2, e3, 0, status); |
205 | + e3 = *(uint64_t *)(va + i) ^ neg3; | 205 | + e3 = *(uint64_t *)(va + i) ^ neg3; |
206 | + r = float64_muladd(e1, e2, e3, flags, status); | 206 | + r = float64_muladd(e1, e2, e3, flags, status); |
207 | *(uint64_t *)(vd + i) = r; | 207 | *(uint64_t *)(vd + i) = r; |
208 | } | 208 | } |
209 | } while (i & 63); | 209 | } while (i & 63); |
210 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, | 210 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, |
211 | void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | 211 | void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, |
212 | void *vg, float_status *status, uint32_t desc) | 212 | void *vg, float_status *status, uint32_t desc) |
213 | { | 213 | { |
214 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, false); | 214 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, false); |
215 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, 0); | 215 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, 0); |
216 | } | 216 | } |
217 | 217 | ||
218 | void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | 218 | void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, |
219 | void *vg, float_status *status, uint32_t desc) | 219 | void *vg, float_status *status, uint32_t desc) |
220 | { | 220 | { |
221 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, false); | 221 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, false); |
222 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, 0); | 222 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, 0); |
223 | } | 223 | } |
224 | 224 | ||
225 | void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | 225 | void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, |
226 | void *vg, float_status *status, uint32_t desc) | 226 | void *vg, float_status *status, uint32_t desc) |
227 | { | 227 | { |
228 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, false); | 228 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, false); |
229 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, 0); | 229 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, 0); |
230 | } | 230 | } |
231 | 231 | ||
232 | void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | 232 | void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, |
233 | void *vg, float_status *status, uint32_t desc) | 233 | void *vg, float_status *status, uint32_t desc) |
234 | { | 234 | { |
235 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, false); | 235 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, false); |
236 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, 0); | 236 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, 0); |
237 | } | 237 | } |
238 | 238 | ||
239 | void HELPER(sve_ah_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | 239 | void HELPER(sve_ah_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, |
240 | void *vg, float_status *status, uint32_t desc) | 240 | void *vg, float_status *status, uint32_t desc) |
241 | { | 241 | { |
242 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, true); | 242 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, true); |
243 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, | 243 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, |
244 | + float_muladd_negate_product); | 244 | + float_muladd_negate_product); |
245 | } | 245 | } |
246 | 246 | ||
247 | void HELPER(sve_ah_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | 247 | void HELPER(sve_ah_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, |
248 | void *vg, float_status *status, uint32_t desc) | 248 | void *vg, float_status *status, uint32_t desc) |
249 | { | 249 | { |
250 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, true); | 250 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, true); |
251 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, | 251 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, |
252 | + float_muladd_negate_product | float_muladd_negate_c); | 252 | + float_muladd_negate_product | float_muladd_negate_c); |
253 | } | 253 | } |
254 | 254 | ||
255 | void HELPER(sve_ah_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | 255 | void HELPER(sve_ah_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, |
256 | void *vg, float_status *status, uint32_t desc) | 256 | void *vg, float_status *status, uint32_t desc) |
257 | { | 257 | { |
258 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, true); | 258 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, true); |
259 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, | 259 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, |
260 | + float_muladd_negate_c); | 260 | + float_muladd_negate_c); |
261 | } | 261 | } |
262 | 262 | ||
263 | /* Two operand floating-point comparison controlled by a predicate. | 263 | /* Two operand floating-point comparison controlled by a predicate. |
264 | -- | 264 | -- |
265 | 2.43.0 | 265 | 2.43.0 | diff view generated by jsdifflib |
1 | Since we know the operand is negative, absolute value | 1 | Because the operand is known to be negative, negating the operand |
---|---|---|---|
2 | can be had by negating rather than abs per se. | 2 | is the same as taking the absolute value. Defer this to the muladd |
3 | operation via flags, so that it happens after NaN detection, which | ||
4 | is correct for FPCR.AH. | ||
3 | 5 | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
5 | --- | 7 | --- |
6 | target/arm/tcg/sve_helper.c | 12 +++--------- | 8 | target/arm/tcg/sve_helper.c | 27 +++++++++++++++++++++------ |
7 | 1 file changed, 3 insertions(+), 9 deletions(-) | 9 | 1 file changed, 21 insertions(+), 6 deletions(-) |
8 | 10 | ||
9 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | 11 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c |
10 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
11 | --- a/target/arm/tcg/sve_helper.c | 13 | --- a/target/arm/tcg/sve_helper.c |
12 | +++ b/target/arm/tcg/sve_helper.c | 14 | +++ b/target/arm/tcg/sve_helper.c |
13 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm, | 15 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm, |
16 | intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); | ||
17 | bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); | ||
18 | float16 *d = vd, *n = vn, *m = vm; | ||
19 | + | ||
20 | for (i = 0; i < opr_sz; i++) { | ||
14 | float16 mm = m[i]; | 21 | float16 mm = m[i]; |
15 | intptr_t xx = x; | 22 | intptr_t xx = x; |
23 | + int flags = 0; | ||
24 | + | ||
16 | if (float16_is_neg(mm)) { | 25 | if (float16_is_neg(mm)) { |
17 | - if (!(fpcr_ah && float16_is_any_nan(mm))) { | 26 | - if (!(fpcr_ah && float16_is_any_nan(mm))) { |
18 | - mm = float16_abs(mm); | 27 | + if (fpcr_ah) { |
19 | - } | 28 | + flags = float_muladd_negate_product; |
20 | + mm = float16_maybe_ah_chs(mm, fpcr_ah); | 29 | + } else { |
30 | mm = float16_abs(mm); | ||
31 | } | ||
21 | xx += 8; | 32 | xx += 8; |
22 | } | 33 | } |
23 | d[i] = float16_muladd(n[i], mm, coeff[xx], 0, s); | 34 | - d[i] = float16_muladd(n[i], mm, coeff[xx], 0, s); |
35 | + d[i] = float16_muladd(n[i], mm, coeff[xx], flags, s); | ||
36 | } | ||
37 | } | ||
38 | |||
24 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, | 39 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, |
40 | intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); | ||
41 | bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); | ||
42 | float32 *d = vd, *n = vn, *m = vm; | ||
43 | + | ||
44 | for (i = 0; i < opr_sz; i++) { | ||
25 | float32 mm = m[i]; | 45 | float32 mm = m[i]; |
26 | intptr_t xx = x; | 46 | intptr_t xx = x; |
47 | + int flags = 0; | ||
48 | + | ||
27 | if (float32_is_neg(mm)) { | 49 | if (float32_is_neg(mm)) { |
28 | - if (!(fpcr_ah && float32_is_any_nan(mm))) { | 50 | - if (!(fpcr_ah && float32_is_any_nan(mm))) { |
29 | - mm = float32_abs(mm); | 51 | + if (fpcr_ah) { |
30 | - } | 52 | + flags = float_muladd_negate_product; |
31 | + mm = float32_maybe_ah_chs(mm, fpcr_ah); | 53 | + } else { |
54 | mm = float32_abs(mm); | ||
55 | } | ||
32 | xx += 8; | 56 | xx += 8; |
33 | } | 57 | } |
34 | d[i] = float32_muladd(n[i], mm, coeff[xx], 0, s); | 58 | - d[i] = float32_muladd(n[i], mm, coeff[xx], 0, s); |
59 | + d[i] = float32_muladd(n[i], mm, coeff[xx], flags, s); | ||
60 | } | ||
61 | } | ||
62 | |||
35 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, | 63 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, |
64 | intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); | ||
65 | bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); | ||
66 | float64 *d = vd, *n = vn, *m = vm; | ||
67 | + | ||
68 | for (i = 0; i < opr_sz; i++) { | ||
36 | float64 mm = m[i]; | 69 | float64 mm = m[i]; |
37 | intptr_t xx = x; | 70 | intptr_t xx = x; |
71 | + int flags = 0; | ||
72 | + | ||
38 | if (float64_is_neg(mm)) { | 73 | if (float64_is_neg(mm)) { |
39 | - if (!(fpcr_ah && float64_is_any_nan(mm))) { | 74 | - if (!(fpcr_ah && float64_is_any_nan(mm))) { |
40 | - mm = float64_abs(mm); | 75 | + if (fpcr_ah) { |
41 | - } | 76 | + flags = float_muladd_negate_product; |
42 | + mm = float64_maybe_ah_chs(mm, fpcr_ah); | 77 | + } else { |
78 | mm = float64_abs(mm); | ||
79 | } | ||
43 | xx += 8; | 80 | xx += 8; |
44 | } | 81 | } |
45 | d[i] = float64_muladd(n[i], mm, coeff[xx], 0, s); | 82 | - d[i] = float64_muladd(n[i], mm, coeff[xx], 0, s); |
83 | + d[i] = float64_muladd(n[i], mm, coeff[xx], flags, s); | ||
84 | } | ||
85 | } | ||
86 | |||
46 | -- | 87 | -- |
47 | 2.43.0 | 88 | 2.43.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The float_muladd_negate_product flag produces the same result | ||
2 | as negating either of the multiplication operands, assuming | ||
3 | neither of the operands are NaNs. But since FEAT_AFP does not | ||
4 | negate NaNs, this behaviour is exactly what we need. | ||
1 | 5 | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/vec_helper.c | 9 +++------ | ||
9 | 1 file changed, 3 insertions(+), 6 deletions(-) | ||
10 | |||
11 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/target/arm/tcg/vec_helper.c | ||
14 | +++ b/target/arm/tcg/vec_helper.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static float64 float64_mulsub_f(float64 dest, float64 op1, float64 op2, | ||
16 | static float16 float16_ah_mulsub_f(float16 dest, float16 op1, float16 op2, | ||
17 | float_status *stat) | ||
18 | { | ||
19 | - op1 = float16_is_any_nan(op1) ? op1 : float16_chs(op1); | ||
20 | - return float16_muladd(op1, op2, dest, 0, stat); | ||
21 | + return float16_muladd(op1, op2, dest, float_muladd_negate_product, stat); | ||
22 | } | ||
23 | |||
24 | static float32 float32_ah_mulsub_f(float32 dest, float32 op1, float32 op2, | ||
25 | float_status *stat) | ||
26 | { | ||
27 | - op1 = float32_is_any_nan(op1) ? op1 : float32_chs(op1); | ||
28 | - return float32_muladd(op1, op2, dest, 0, stat); | ||
29 | + return float32_muladd(op1, op2, dest, float_muladd_negate_product, stat); | ||
30 | } | ||
31 | |||
32 | static float64 float64_ah_mulsub_f(float64 dest, float64 op1, float64 op2, | ||
33 | float_status *stat) | ||
34 | { | ||
35 | - op1 = float64_is_any_nan(op1) ? op1 : float64_chs(op1); | ||
36 | - return float64_muladd(op1, op2, dest, 0, stat); | ||
37 | + return float64_muladd(op1, op2, dest, float_muladd_negate_product, stat); | ||
38 | } | ||
39 | |||
40 | #define DO_MULADD(NAME, FUNC, TYPE) \ | ||
41 | -- | ||
42 | 2.43.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
2 | --- | ||
3 | target/arm/tcg/translate-a64.c | 2 +- | ||
4 | target/arm/tcg/vec_helper.c | 66 ++++++++++++++++++++-------------- | ||
5 | 2 files changed, 40 insertions(+), 28 deletions(-) | ||
1 | 6 | ||
7 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
8 | index XXXXXXX..XXXXXXX 100644 | ||
9 | --- a/target/arm/tcg/translate-a64.c | ||
10 | +++ b/target/arm/tcg/translate-a64.c | ||
11 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) | ||
12 | |||
13 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
14 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
15 | - a->rot, fn[a->esz]); | ||
16 | + a->rot | (s->fpcr_ah << 2), fn[a->esz]); | ||
17 | return true; | ||
18 | } | ||
19 | |||
20 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/tcg/vec_helper.c | ||
23 | +++ b/target/arm/tcg/vec_helper.c | ||
24 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlah)(void *vd, void *vn, void *vm, void *va, | ||
25 | uintptr_t opr_sz = simd_oprsz(desc); | ||
26 | float16 *d = vd, *n = vn, *m = vm, *a = va; | ||
27 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
28 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
29 | - uint32_t neg_real = flip ^ neg_imag; | ||
30 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
31 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
32 | + uint32_t negf_real = flip ^ negf_imag; | ||
33 | + float16 negx_imag, negx_real; | ||
34 | uintptr_t i; | ||
35 | |||
36 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
37 | - neg_real <<= 15; | ||
38 | - neg_imag <<= 15; | ||
39 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
40 | + negx_real = (negf_real & ~fpcr_ah) << 15; | ||
41 | + negx_imag = (negf_imag & ~fpcr_ah) << 15; | ||
42 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
43 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
44 | |||
45 | for (i = 0; i < opr_sz / 2; i += 2) { | ||
46 | float16 e2 = n[H2(i + flip)]; | ||
47 | - float16 e1 = m[H2(i + flip)] ^ neg_real; | ||
48 | + float16 e1 = m[H2(i + flip)] ^ negx_real; | ||
49 | float16 e4 = e2; | ||
50 | - float16 e3 = m[H2(i + 1 - flip)] ^ neg_imag; | ||
51 | + float16 e3 = m[H2(i + 1 - flip)] ^ negx_imag; | ||
52 | |||
53 | - d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], 0, fpst); | ||
54 | - d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], 0, fpst); | ||
55 | + d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], negf_real, fpst); | ||
56 | + d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], negf_imag, fpst); | ||
57 | } | ||
58 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
59 | } | ||
60 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlas)(void *vd, void *vn, void *vm, void *va, | ||
61 | uintptr_t opr_sz = simd_oprsz(desc); | ||
62 | float32 *d = vd, *n = vn, *m = vm, *a = va; | ||
63 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
64 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
65 | - uint32_t neg_real = flip ^ neg_imag; | ||
66 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
67 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
68 | + uint32_t negf_real = flip ^ negf_imag; | ||
69 | + float32 negx_imag, negx_real; | ||
70 | uintptr_t i; | ||
71 | |||
72 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
73 | - neg_real <<= 31; | ||
74 | - neg_imag <<= 31; | ||
75 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
76 | + negx_real = (negf_real & ~fpcr_ah) << 31; | ||
77 | + negx_imag = (negf_imag & ~fpcr_ah) << 31; | ||
78 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
79 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
80 | |||
81 | for (i = 0; i < opr_sz / 4; i += 2) { | ||
82 | float32 e2 = n[H4(i + flip)]; | ||
83 | - float32 e1 = m[H4(i + flip)] ^ neg_real; | ||
84 | + float32 e1 = m[H4(i + flip)] ^ negx_real; | ||
85 | float32 e4 = e2; | ||
86 | - float32 e3 = m[H4(i + 1 - flip)] ^ neg_imag; | ||
87 | + float32 e3 = m[H4(i + 1 - flip)] ^ negx_imag; | ||
88 | |||
89 | - d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], 0, fpst); | ||
90 | - d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], 0, fpst); | ||
91 | + d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], negf_real, fpst); | ||
92 | + d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], negf_imag, fpst); | ||
93 | } | ||
94 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
95 | } | ||
96 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm, void *va, | ||
97 | uintptr_t opr_sz = simd_oprsz(desc); | ||
98 | float64 *d = vd, *n = vn, *m = vm, *a = va; | ||
99 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
100 | - uint64_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
101 | - uint64_t neg_real = flip ^ neg_imag; | ||
102 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
103 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
104 | + uint32_t negf_real = flip ^ negf_imag; | ||
105 | + float64 negx_real, negx_imag; | ||
106 | uintptr_t i; | ||
107 | |||
108 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
109 | - neg_real <<= 63; | ||
110 | - neg_imag <<= 63; | ||
111 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
112 | + negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63; | ||
113 | + negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63; | ||
114 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
115 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
116 | |||
117 | for (i = 0; i < opr_sz / 8; i += 2) { | ||
118 | float64 e2 = n[i + flip]; | ||
119 | - float64 e1 = m[i + flip] ^ neg_real; | ||
120 | + float64 e1 = m[i + flip] ^ negx_real; | ||
121 | float64 e4 = e2; | ||
122 | - float64 e3 = m[i + 1 - flip] ^ neg_imag; | ||
123 | + float64 e3 = m[i + 1 - flip] ^ negx_imag; | ||
124 | |||
125 | - d[i] = float64_muladd(e2, e1, a[i], 0, fpst); | ||
126 | - d[i + 1] = float64_muladd(e4, e3, a[i + 1], 0, fpst); | ||
127 | + d[i] = float64_muladd(e2, e1, a[i], negf_real, fpst); | ||
128 | + d[i + 1] = float64_muladd(e4, e3, a[i + 1], negf_imag, fpst); | ||
129 | } | ||
130 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
131 | } | ||
132 | -- | ||
133 | 2.43.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
2 | --- | ||
3 | target/arm/tcg/translate-a64.c | 2 +- | ||
4 | target/arm/tcg/vec_helper.c | 44 ++++++++++++++++++++-------------- | ||
5 | 2 files changed, 27 insertions(+), 19 deletions(-) | ||
1 | 6 | ||
7 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
8 | index XXXXXXX..XXXXXXX 100644 | ||
9 | --- a/target/arm/tcg/translate-a64.c | ||
10 | +++ b/target/arm/tcg/translate-a64.c | ||
11 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) | ||
12 | if (fp_access_check(s)) { | ||
13 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
14 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
15 | - (a->idx << 2) | a->rot, fn); | ||
16 | + (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn); | ||
17 | } | ||
18 | return true; | ||
19 | } | ||
20 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/tcg/vec_helper.c | ||
23 | +++ b/target/arm/tcg/vec_helper.c | ||
24 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, void *va, | ||
25 | uintptr_t opr_sz = simd_oprsz(desc); | ||
26 | float16 *d = vd, *n = vn, *m = vm, *a = va; | ||
27 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
28 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
29 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
30 | intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2); | ||
31 | - uint32_t neg_real = flip ^ neg_imag; | ||
32 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1); | ||
33 | + uint32_t negf_real = flip ^ negf_imag; | ||
34 | intptr_t elements = opr_sz / sizeof(float16); | ||
35 | intptr_t eltspersegment = MIN(16 / sizeof(float16), elements); | ||
36 | + float16 negx_imag, negx_real; | ||
37 | intptr_t i, j; | ||
38 | |||
39 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
40 | - neg_real <<= 15; | ||
41 | - neg_imag <<= 15; | ||
42 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
43 | + negx_real = (negf_real & ~fpcr_ah) << 15; | ||
44 | + negx_imag = (negf_imag & ~fpcr_ah) << 15; | ||
45 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
46 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
47 | |||
48 | for (i = 0; i < elements; i += eltspersegment) { | ||
49 | float16 mr = m[H2(i + 2 * index + 0)]; | ||
50 | float16 mi = m[H2(i + 2 * index + 1)]; | ||
51 | - float16 e1 = neg_real ^ (flip ? mi : mr); | ||
52 | - float16 e3 = neg_imag ^ (flip ? mr : mi); | ||
53 | + float16 e1 = negx_real ^ (flip ? mi : mr); | ||
54 | + float16 e3 = negx_imag ^ (flip ? mr : mi); | ||
55 | |||
56 | for (j = i; j < i + eltspersegment; j += 2) { | ||
57 | float16 e2 = n[H2(j + flip)]; | ||
58 | float16 e4 = e2; | ||
59 | |||
60 | - d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], 0, fpst); | ||
61 | - d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], 0, fpst); | ||
62 | + d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], negf_real, fpst); | ||
63 | + d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], negf_imag, fpst); | ||
64 | } | ||
65 | } | ||
66 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
67 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, void *va, | ||
68 | uintptr_t opr_sz = simd_oprsz(desc); | ||
69 | float32 *d = vd, *n = vn, *m = vm, *a = va; | ||
70 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
71 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
72 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
73 | intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2); | ||
74 | - uint32_t neg_real = flip ^ neg_imag; | ||
75 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1); | ||
76 | + uint32_t negf_real = flip ^ negf_imag; | ||
77 | intptr_t elements = opr_sz / sizeof(float32); | ||
78 | intptr_t eltspersegment = MIN(16 / sizeof(float32), elements); | ||
79 | + float32 negx_imag, negx_real; | ||
80 | intptr_t i, j; | ||
81 | |||
82 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
83 | - neg_real <<= 31; | ||
84 | - neg_imag <<= 31; | ||
85 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
86 | + negx_real = (negf_real & ~fpcr_ah) << 31; | ||
87 | + negx_imag = (negf_imag & ~fpcr_ah) << 31; | ||
88 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
89 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
90 | |||
91 | for (i = 0; i < elements; i += eltspersegment) { | ||
92 | float32 mr = m[H4(i + 2 * index + 0)]; | ||
93 | float32 mi = m[H4(i + 2 * index + 1)]; | ||
94 | - float32 e1 = neg_real ^ (flip ? mi : mr); | ||
95 | - float32 e3 = neg_imag ^ (flip ? mr : mi); | ||
96 | + float32 e1 = negx_real ^ (flip ? mi : mr); | ||
97 | + float32 e3 = negx_imag ^ (flip ? mr : mi); | ||
98 | |||
99 | for (j = i; j < i + eltspersegment; j += 2) { | ||
100 | float32 e2 = n[H4(j + flip)]; | ||
101 | float32 e4 = e2; | ||
102 | |||
103 | - d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], 0, fpst); | ||
104 | - d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], 0, fpst); | ||
105 | + d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], negf_real, fpst); | ||
106 | + d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], negf_imag, fpst); | ||
107 | } | ||
108 | } | ||
109 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
110 | -- | ||
111 | 2.43.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
2 | --- | ||
3 | target/arm/tcg/sve_helper.c | 69 +++++++++++++++++++++------------- | ||
4 | target/arm/tcg/translate-sve.c | 2 +- | ||
5 | 2 files changed, 43 insertions(+), 28 deletions(-) | ||
1 | 6 | ||
7 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
8 | index XXXXXXX..XXXXXXX 100644 | ||
9 | --- a/target/arm/tcg/sve_helper.c | ||
10 | +++ b/target/arm/tcg/sve_helper.c | ||
11 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
12 | void *vg, float_status *status, uint32_t desc) | ||
13 | { | ||
14 | intptr_t j, i = simd_oprsz(desc); | ||
15 | - unsigned rot = simd_data(desc); | ||
16 | - bool flip = rot & 1; | ||
17 | - float16 neg_imag, neg_real; | ||
18 | + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
19 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
20 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
21 | + uint32_t negf_real = flip ^ negf_imag; | ||
22 | + float16 negx_imag, negx_real; | ||
23 | uint64_t *g = vg; | ||
24 | |||
25 | - neg_imag = float16_set_sign(0, (rot & 2) != 0); | ||
26 | - neg_real = float16_set_sign(0, rot == 1 || rot == 2); | ||
27 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
28 | + negx_real = (negf_real & ~fpcr_ah) << 15; | ||
29 | + negx_imag = (negf_imag & ~fpcr_ah) << 15; | ||
30 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
31 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
32 | |||
33 | do { | ||
34 | uint64_t pg = g[(i - 1) >> 6]; | ||
35 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
36 | mi = *(float16 *)(vm + H1_2(j)); | ||
37 | |||
38 | e2 = (flip ? ni : nr); | ||
39 | - e1 = (flip ? mi : mr) ^ neg_real; | ||
40 | + e1 = (flip ? mi : mr) ^ negx_real; | ||
41 | e4 = e2; | ||
42 | - e3 = (flip ? mr : mi) ^ neg_imag; | ||
43 | + e3 = (flip ? mr : mi) ^ negx_imag; | ||
44 | |||
45 | if (likely((pg >> (i & 63)) & 1)) { | ||
46 | d = *(float16 *)(va + H1_2(i)); | ||
47 | - d = float16_muladd(e2, e1, d, 0, status); | ||
48 | + d = float16_muladd(e2, e1, d, negf_real, status); | ||
49 | *(float16 *)(vd + H1_2(i)) = d; | ||
50 | } | ||
51 | if (likely((pg >> (j & 63)) & 1)) { | ||
52 | d = *(float16 *)(va + H1_2(j)); | ||
53 | - d = float16_muladd(e4, e3, d, 0, status); | ||
54 | + d = float16_muladd(e4, e3, d, negf_imag, status); | ||
55 | *(float16 *)(vd + H1_2(j)) = d; | ||
56 | } | ||
57 | } while (i & 63); | ||
58 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
59 | void *vg, float_status *status, uint32_t desc) | ||
60 | { | ||
61 | intptr_t j, i = simd_oprsz(desc); | ||
62 | - unsigned rot = simd_data(desc); | ||
63 | - bool flip = rot & 1; | ||
64 | - float32 neg_imag, neg_real; | ||
65 | + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
66 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
67 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
68 | + uint32_t negf_real = flip ^ negf_imag; | ||
69 | + float32 negx_imag, negx_real; | ||
70 | uint64_t *g = vg; | ||
71 | |||
72 | - neg_imag = float32_set_sign(0, (rot & 2) != 0); | ||
73 | - neg_real = float32_set_sign(0, rot == 1 || rot == 2); | ||
74 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
75 | + negx_real = (negf_real & ~fpcr_ah) << 31; | ||
76 | + negx_imag = (negf_imag & ~fpcr_ah) << 31; | ||
77 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
78 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
79 | |||
80 | do { | ||
81 | uint64_t pg = g[(i - 1) >> 6]; | ||
82 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
83 | mi = *(float32 *)(vm + H1_2(j)); | ||
84 | |||
85 | e2 = (flip ? ni : nr); | ||
86 | - e1 = (flip ? mi : mr) ^ neg_real; | ||
87 | + e1 = (flip ? mi : mr) ^ negx_real; | ||
88 | e4 = e2; | ||
89 | - e3 = (flip ? mr : mi) ^ neg_imag; | ||
90 | + e3 = (flip ? mr : mi) ^ negx_imag; | ||
91 | |||
92 | if (likely((pg >> (i & 63)) & 1)) { | ||
93 | d = *(float32 *)(va + H1_2(i)); | ||
94 | - d = float32_muladd(e2, e1, d, 0, status); | ||
95 | + d = float32_muladd(e2, e1, d, negf_real, status); | ||
96 | *(float32 *)(vd + H1_2(i)) = d; | ||
97 | } | ||
98 | if (likely((pg >> (j & 63)) & 1)) { | ||
99 | d = *(float32 *)(va + H1_2(j)); | ||
100 | - d = float32_muladd(e4, e3, d, 0, status); | ||
101 | + d = float32_muladd(e4, e3, d, negf_imag, status); | ||
102 | *(float32 *)(vd + H1_2(j)) = d; | ||
103 | } | ||
104 | } while (i & 63); | ||
105 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
106 | void *vg, float_status *status, uint32_t desc) | ||
107 | { | ||
108 | intptr_t j, i = simd_oprsz(desc); | ||
109 | - unsigned rot = simd_data(desc); | ||
110 | - bool flip = rot & 1; | ||
111 | - float64 neg_imag, neg_real; | ||
112 | + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
113 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
114 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
115 | + uint32_t negf_real = flip ^ negf_imag; | ||
116 | + float64 negx_imag, negx_real; | ||
117 | uint64_t *g = vg; | ||
118 | |||
119 | - neg_imag = float64_set_sign(0, (rot & 2) != 0); | ||
120 | - neg_real = float64_set_sign(0, rot == 1 || rot == 2); | ||
121 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
122 | + negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63; | ||
123 | + negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63; | ||
124 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
125 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
126 | |||
127 | do { | ||
128 | uint64_t pg = g[(i - 1) >> 6]; | ||
129 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
130 | mi = *(float64 *)(vm + H1_2(j)); | ||
131 | |||
132 | e2 = (flip ? ni : nr); | ||
133 | - e1 = (flip ? mi : mr) ^ neg_real; | ||
134 | + e1 = (flip ? mi : mr) ^ negx_real; | ||
135 | e4 = e2; | ||
136 | - e3 = (flip ? mr : mi) ^ neg_imag; | ||
137 | + e3 = (flip ? mr : mi) ^ negx_imag; | ||
138 | |||
139 | if (likely((pg >> (i & 63)) & 1)) { | ||
140 | d = *(float64 *)(va + H1_2(i)); | ||
141 | - d = float64_muladd(e2, e1, d, 0, status); | ||
142 | + d = float64_muladd(e2, e1, d, negf_real, status); | ||
143 | *(float64 *)(vd + H1_2(i)) = d; | ||
144 | } | ||
145 | if (likely((pg >> (j & 63)) & 1)) { | ||
146 | d = *(float64 *)(va + H1_2(j)); | ||
147 | - d = float64_muladd(e4, e3, d, 0, status); | ||
148 | + d = float64_muladd(e4, e3, d, negf_imag, status); | ||
149 | *(float64 *)(vd + H1_2(j)) = d; | ||
150 | } | ||
151 | } while (i & 63); | ||
152 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
153 | index XXXXXXX..XXXXXXX 100644 | ||
154 | --- a/target/arm/tcg/translate-sve.c | ||
155 | +++ b/target/arm/tcg/translate-sve.c | ||
156 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { | ||
157 | gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, | ||
158 | }; | ||
159 | TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], | ||
160 | - a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, | ||
161 | + a->rd, a->rn, a->rm, a->ra, a->pg, a->rot | (s->fpcr_ah << 2), | ||
162 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
163 | |||
164 | static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { | ||
165 | -- | ||
166 | 2.43.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Split negation cases out of gvec_fmla, creating 6 new helpers. | ||
2 | We no longer pass 'neg' as a bit in simd_data. | ||
1 | 3 | ||
4 | Handle FPCR.AH=0 via xor and FPCR.AH=1 via muladd flags. | ||
5 | |||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/helper.h | 14 ++++++++++++++ | ||
9 | target/arm/tcg/translate-a64.c | 17 +++++++++++------ | ||
10 | target/arm/tcg/translate-sve.c | 31 +++++++++++++++++-------------- | ||
11 | target/arm/tcg/vec_helper.c | 29 +++++++++++++++-------------- | ||
12 | 4 files changed, 57 insertions(+), 34 deletions(-) | ||
13 | |||
14 | diff --git a/target/arm/helper.h b/target/arm/helper.h | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/arm/helper.h | ||
17 | +++ b/target/arm/helper.h | ||
18 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG, | ||
19 | DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG, | ||
20 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
21 | |||
22 | +DEF_HELPER_FLAGS_6(gvec_fmls_idx_h, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_6(gvec_fmls_idx_s, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
26 | +DEF_HELPER_FLAGS_6(gvec_fmls_idx_d, TCG_CALL_NO_RWG, | ||
27 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
28 | + | ||
29 | +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_h, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_s, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
33 | +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_d, TCG_CALL_NO_RWG, | ||
34 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
35 | + | ||
36 | DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG, | ||
37 | void, ptr, ptr, ptr, ptr, i32) | ||
38 | DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG, | ||
39 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/target/arm/tcg/translate-a64.c | ||
42 | +++ b/target/arm/tcg/translate-a64.c | ||
43 | @@ -XXX,XX +XXX,XX @@ TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) | ||
44 | |||
45 | static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) | ||
46 | { | ||
47 | - static gen_helper_gvec_4_ptr * const fns[3] = { | ||
48 | - gen_helper_gvec_fmla_idx_h, | ||
49 | - gen_helper_gvec_fmla_idx_s, | ||
50 | - gen_helper_gvec_fmla_idx_d, | ||
51 | + static gen_helper_gvec_4_ptr * const fns[3][3] = { | ||
52 | + { gen_helper_gvec_fmla_idx_h, | ||
53 | + gen_helper_gvec_fmla_idx_s, | ||
54 | + gen_helper_gvec_fmla_idx_d }, | ||
55 | + { gen_helper_gvec_fmls_idx_h, | ||
56 | + gen_helper_gvec_fmls_idx_s, | ||
57 | + gen_helper_gvec_fmls_idx_d }, | ||
58 | + { gen_helper_gvec_ah_fmls_idx_h, | ||
59 | + gen_helper_gvec_ah_fmls_idx_s, | ||
60 | + gen_helper_gvec_ah_fmls_idx_d }, | ||
61 | }; | ||
62 | MemOp esz = a->esz; | ||
63 | int check = fp_access_check_vector_hsd(s, a->q, esz); | ||
64 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) | ||
65 | |||
66 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
67 | esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
68 | - (s->fpcr_ah << 5) | (a->idx << 1) | neg, | ||
69 | - fns[esz - 1]); | ||
70 | + a->idx, fns[esz - 1][neg ? 1 + s->fpcr_ah : 0]); | ||
71 | return true; | ||
72 | } | ||
73 | |||
74 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/target/arm/tcg/translate-sve.c | ||
77 | +++ b/target/arm/tcg/translate-sve.c | ||
78 | @@ -XXX,XX +XXX,XX @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) | ||
79 | *** SVE Floating Point Multiply-Add Indexed Group | ||
80 | */ | ||
81 | |||
82 | -static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) | ||
83 | -{ | ||
84 | - static gen_helper_gvec_4_ptr * const fns[4] = { | ||
85 | - NULL, | ||
86 | - gen_helper_gvec_fmla_idx_h, | ||
87 | - gen_helper_gvec_fmla_idx_s, | ||
88 | - gen_helper_gvec_fmla_idx_d, | ||
89 | - }; | ||
90 | - return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, | ||
91 | - (s->fpcr_ah << 5) | (a->index << 1) | sub, | ||
92 | - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
93 | -} | ||
94 | +static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = { | ||
95 | + NULL, gen_helper_gvec_fmla_idx_h, | ||
96 | + gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d | ||
97 | +}; | ||
98 | +TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, | ||
99 | + fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index, | ||
100 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
101 | |||
102 | -TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) | ||
103 | -TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true) | ||
104 | +static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = { | ||
105 | + { NULL, NULL }, | ||
106 | + { gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h }, | ||
107 | + { gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s }, | ||
108 | + { gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d }, | ||
109 | +}; | ||
110 | +TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz, | ||
111 | + fmls_idx_fns[a->esz][s->fpcr_ah], | ||
112 | + a->rd, a->rn, a->rm, a->ra, a->index, | ||
113 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
114 | |||
115 | /* | ||
116 | *** SVE Floating Point Multiply Indexed Group | ||
117 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
118 | index XXXXXXX..XXXXXXX 100644 | ||
119 | --- a/target/arm/tcg/vec_helper.c | ||
120 | +++ b/target/arm/tcg/vec_helper.c | ||
121 | @@ -XXX,XX +XXX,XX @@ DO_FMUL_IDX(gvec_fmls_nf_idx_s, float32_sub, float32_mul, float32, H4) | ||
122 | |||
123 | #undef DO_FMUL_IDX | ||
124 | |||
125 | -#define DO_FMLA_IDX(NAME, TYPE, H) \ | ||
126 | +#define DO_FMLA_IDX(NAME, TYPE, H, NEGX, NEGF) \ | ||
127 | void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \ | ||
128 | float_status *stat, uint32_t desc) \ | ||
129 | { \ | ||
130 | intptr_t i, j, oprsz = simd_oprsz(desc); \ | ||
131 | intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \ | ||
132 | - TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \ | ||
133 | - intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 1, 3); \ | ||
134 | - bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 5, 1); \ | ||
135 | + intptr_t idx = simd_data(desc); \ | ||
136 | TYPE *d = vd, *n = vn, *m = vm, *a = va; \ | ||
137 | - op1_neg <<= (8 * sizeof(TYPE) - 1); \ | ||
138 | for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ | ||
139 | TYPE mm = m[H(i + idx)]; \ | ||
140 | for (j = 0; j < segment; j++) { \ | ||
141 | - TYPE nval = n[i + j]; \ | ||
142 | - if (!(fpcr_ah && TYPE ## _is_any_nan(nval))) { \ | ||
143 | - nval ^= op1_neg; \ | ||
144 | - } \ | ||
145 | - d[i + j] = TYPE##_muladd(nval, \ | ||
146 | - mm, a[i + j], 0, stat); \ | ||
147 | + d[i + j] = TYPE##_muladd(n[i + j] ^ NEGX, mm, \ | ||
148 | + a[i + j], NEGF, stat); \ | ||
149 | } \ | ||
150 | } \ | ||
151 | clear_tail(d, oprsz, simd_maxsz(desc)); \ | ||
152 | } | ||
153 | |||
154 | -DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2) | ||
155 | -DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4) | ||
156 | -DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8) | ||
157 | +DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2, 0, 0) | ||
158 | +DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4, 0, 0) | ||
159 | +DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8, 0, 0) | ||
160 | + | ||
161 | +DO_FMLA_IDX(gvec_fmls_idx_h, float16, H2, INT16_MIN, 0) | ||
162 | +DO_FMLA_IDX(gvec_fmls_idx_s, float32, H4, INT32_MIN, 0) | ||
163 | +DO_FMLA_IDX(gvec_fmls_idx_d, float64, H8, INT64_MIN, 0) | ||
164 | + | ||
165 | +DO_FMLA_IDX(gvec_ah_fmls_idx_h, float16, H2, 0, float_muladd_negate_product) | ||
166 | +DO_FMLA_IDX(gvec_ah_fmls_idx_s, float32, H4, 0, float_muladd_negate_product) | ||
167 | +DO_FMLA_IDX(gvec_ah_fmls_idx_d, float64, H8, 0, float_muladd_negate_product) | ||
168 | |||
169 | #undef DO_FMLA_IDX | ||
170 | |||
171 | -- | ||
172 | 2.43.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This reverts commit c5eb0b62e603c1d391ee2199108f0eb34aadc8f5. | ||
2 | --- | ||
3 | target/arm/tcg/translate-a64.c | 4 ++-- | ||
4 | target/arm/tcg/vec_helper.c | 28 ++++------------------------ | ||
5 | 2 files changed, 6 insertions(+), 26 deletions(-) | ||
1 | 6 | ||
7 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
8 | index XXXXXXX..XXXXXXX 100644 | ||
9 | --- a/target/arm/tcg/translate-a64.c | ||
10 | +++ b/target/arm/tcg/translate-a64.c | ||
11 | @@ -XXX,XX +XXX,XX @@ TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp) | ||
12 | static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2) | ||
13 | { | ||
14 | if (fp_access_check(s)) { | ||
15 | - int data = (s->fpcr_ah << 2) | (is_2 << 1) | is_s; | ||
16 | + int data = (is_2 << 1) | is_s; | ||
17 | tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), | ||
18 | vec_full_reg_offset(s, a->rn), | ||
19 | vec_full_reg_offset(s, a->rm), tcg_env, | ||
20 | @@ -XXX,XX +XXX,XX @@ TRANS(FMLS_vi, do_fmla_vector_idx, a, true) | ||
21 | static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2) | ||
22 | { | ||
23 | if (fp_access_check(s)) { | ||
24 | - int data = (s->fpcr_ah << 5) | (a->idx << 2) | (is_2 << 1) | is_s; | ||
25 | + int data = (a->idx << 2) | (is_2 << 1) | is_s; | ||
26 | tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), | ||
27 | vec_full_reg_offset(s, a->rn), | ||
28 | vec_full_reg_offset(s, a->rm), tcg_env, | ||
29 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/target/arm/tcg/vec_helper.c | ||
32 | +++ b/target/arm/tcg/vec_helper.c | ||
33 | @@ -XXX,XX +XXX,XX @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2) | ||
34 | return ptr[is_q & is_2] >> ((is_2 & ~is_q) << 5); | ||
35 | } | ||
36 | |||
37 | -static uint64_t neg4_f16(uint64_t v, bool fpcr_ah) | ||
38 | -{ | ||
39 | - /* | ||
40 | - * Negate all inputs for FMLSL at once. This is slightly complicated | ||
41 | - * by the need to avoid flipping the sign of a NaN when FPCR.AH == 1 | ||
42 | - */ | ||
43 | - uint64_t mask = 0x8000800080008000ull; | ||
44 | - if (fpcr_ah) { | ||
45 | - uint64_t tmp = v, signbit = 0x8000; | ||
46 | - for (int i = 0; i < 4; i++) { | ||
47 | - if (float16_is_any_nan(extract64(tmp, 0, 16))) { | ||
48 | - mask ^= signbit; | ||
49 | - } | ||
50 | - tmp >>= 16; | ||
51 | - signbit <<= 16; | ||
52 | - } | ||
53 | - } | ||
54 | - return v ^ mask; | ||
55 | -} | ||
56 | - | ||
57 | /* | ||
58 | * Note that FMLAL requires oprsz == 8 or oprsz == 16, | ||
59 | * as there is not yet SVE versions that might use blocking. | ||
60 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, | ||
61 | intptr_t i, oprsz = simd_oprsz(desc); | ||
62 | int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
63 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
64 | - bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
65 | int is_q = oprsz == 16; | ||
66 | uint64_t n_4, m_4; | ||
67 | |||
68 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, | ||
69 | n_4 = load4_f16(vn, is_q, is_2); | ||
70 | m_4 = load4_f16(vm, is_q, is_2); | ||
71 | |||
72 | + /* Negate all inputs for FMLSL at once. */ | ||
73 | if (is_s) { | ||
74 | - n_4 = neg4_f16(n_4, fpcr_ah); | ||
75 | + n_4 ^= 0x8000800080008000ull; | ||
76 | } | ||
77 | |||
78 | for (i = 0; i < oprsz / 4; i++) { | ||
79 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, | ||
80 | int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
81 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
82 | int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3); | ||
83 | - bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 5, 1); | ||
84 | int is_q = oprsz == 16; | ||
85 | uint64_t n_4; | ||
86 | float32 m_1; | ||
87 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, | ||
88 | /* Pre-load all of the f16 data, avoiding overlap issues. */ | ||
89 | n_4 = load4_f16(vn, is_q, is_2); | ||
90 | |||
91 | + /* Negate all inputs for FMLSL at once. */ | ||
92 | if (is_s) { | ||
93 | - n_4 = neg4_f16(n_4, fpcr_ah); | ||
94 | + n_4 ^= 0x8000800080008000ull; | ||
95 | } | ||
96 | |||
97 | m_1 = float16_to_float32_by_bits(((float16 *)vm)[H2(index)], fz16); | ||
98 | -- | ||
99 | 2.43.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
2 | --- | ||
3 | target/arm/tcg/vec_helper.c | 71 ++++++++++++++++++++++++------------- | ||
4 | 1 file changed, 46 insertions(+), 25 deletions(-) | ||
1 | 5 | ||
6 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
7 | index XXXXXXX..XXXXXXX 100644 | ||
8 | --- a/target/arm/tcg/vec_helper.c | ||
9 | +++ b/target/arm/tcg/vec_helper.c | ||
10 | @@ -XXX,XX +XXX,XX @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2) | ||
11 | */ | ||
12 | |||
13 | static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, | ||
14 | - uint32_t desc, bool fz16) | ||
15 | + uint64_t negx, int negf, uint32_t desc, bool fz16) | ||
16 | { | ||
17 | intptr_t i, oprsz = simd_oprsz(desc); | ||
18 | - int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
19 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
20 | int is_q = oprsz == 16; | ||
21 | uint64_t n_4, m_4; | ||
22 | |||
23 | - /* Pre-load all of the f16 data, avoiding overlap issues. */ | ||
24 | - n_4 = load4_f16(vn, is_q, is_2); | ||
25 | + /* | ||
26 | + * Pre-load all of the f16 data, avoiding overlap issues. | ||
27 | + * Negate all inputs for AH=0 FMLSL at once. | ||
28 | + */ | ||
29 | + n_4 = load4_f16(vn, is_q, is_2) ^ negx; | ||
30 | m_4 = load4_f16(vm, is_q, is_2); | ||
31 | |||
32 | - /* Negate all inputs for FMLSL at once. */ | ||
33 | - if (is_s) { | ||
34 | - n_4 ^= 0x8000800080008000ull; | ||
35 | - } | ||
36 | - | ||
37 | for (i = 0; i < oprsz / 4; i++) { | ||
38 | float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); | ||
39 | float32 m_1 = float16_to_float32_by_bits(m_4 >> (i * 16), fz16); | ||
40 | - d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst); | ||
41 | + d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst); | ||
42 | } | ||
43 | clear_tail(d, oprsz, simd_maxsz(desc)); | ||
44 | } | ||
45 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, | ||
46 | void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
47 | CPUARMState *env, uint32_t desc) | ||
48 | { | ||
49 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], desc, | ||
50 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
51 | + uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
52 | + | ||
53 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
54 | get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); | ||
55 | } | ||
56 | |||
57 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
58 | CPUARMState *env, uint32_t desc) | ||
59 | { | ||
60 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], desc, | ||
61 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
62 | + uint64_t negx = 0; | ||
63 | + int negf = 0; | ||
64 | + | ||
65 | + if (is_s) { | ||
66 | + if (env->vfp.fpcr & FPCR_AH) { | ||
67 | + negf = float_muladd_negate_product; | ||
68 | + } else { | ||
69 | + negx = 0x8000800080008000ull; | ||
70 | + } | ||
71 | + } | ||
72 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
73 | get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
74 | } | ||
75 | |||
76 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
77 | } | ||
78 | |||
79 | static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, | ||
80 | - uint32_t desc, bool fz16) | ||
81 | + uint64_t negx, int negf, uint32_t desc, bool fz16) | ||
82 | { | ||
83 | intptr_t i, oprsz = simd_oprsz(desc); | ||
84 | - int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
85 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
86 | int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3); | ||
87 | int is_q = oprsz == 16; | ||
88 | uint64_t n_4; | ||
89 | float32 m_1; | ||
90 | |||
91 | - /* Pre-load all of the f16 data, avoiding overlap issues. */ | ||
92 | - n_4 = load4_f16(vn, is_q, is_2); | ||
93 | - | ||
94 | - /* Negate all inputs for FMLSL at once. */ | ||
95 | - if (is_s) { | ||
96 | - n_4 ^= 0x8000800080008000ull; | ||
97 | - } | ||
98 | - | ||
99 | + /* | ||
100 | + * Pre-load all of the f16 data, avoiding overlap issues. | ||
101 | + * Negate all inputs for AH=0 FMLSL at once. | ||
102 | + */ | ||
103 | + n_4 = load4_f16(vn, is_q, is_2) ^ negx; | ||
104 | m_1 = float16_to_float32_by_bits(((float16 *)vm)[H2(index)], fz16); | ||
105 | |||
106 | for (i = 0; i < oprsz / 4; i++) { | ||
107 | float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); | ||
108 | - d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst); | ||
109 | + d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst); | ||
110 | } | ||
111 | clear_tail(d, oprsz, simd_maxsz(desc)); | ||
112 | } | ||
113 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, | ||
114 | void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | ||
115 | CPUARMState *env, uint32_t desc) | ||
116 | { | ||
117 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], desc, | ||
118 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
119 | + uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
120 | + | ||
121 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
122 | get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); | ||
123 | } | ||
124 | |||
125 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
126 | CPUARMState *env, uint32_t desc) | ||
127 | { | ||
128 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], desc, | ||
129 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
130 | + uint64_t negx = 0; | ||
131 | + int negf = 0; | ||
132 | + | ||
133 | + if (is_s) { | ||
134 | + if (env->vfp.fpcr & FPCR_AH) { | ||
135 | + negf = float_muladd_negate_product; | ||
136 | + } else { | ||
137 | + negx = 0x8000800080008000ull; | ||
138 | + } | ||
139 | + } | ||
140 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
141 | get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
142 | } | ||
143 | |||
144 | -- | ||
145 | 2.43.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
2 | --- | ||
3 | target/arm/tcg/vec_helper.c | 15 ++++++++++++--- | ||
4 | 1 file changed, 12 insertions(+), 3 deletions(-) | ||
1 | 5 | ||
6 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
7 | index XXXXXXX..XXXXXXX 100644 | ||
8 | --- a/target/arm/tcg/vec_helper.c | ||
9 | +++ b/target/arm/tcg/vec_helper.c | ||
10 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
11 | CPUARMState *env, uint32_t desc) | ||
12 | { | ||
13 | intptr_t i, j, oprsz = simd_oprsz(desc); | ||
14 | - uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; | ||
15 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
16 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
17 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | ||
18 | float_status *status = &env->vfp.fp_status[FPST_A64]; | ||
19 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
20 | + int negx = 0, negf = 0; | ||
21 | + | ||
22 | + if (is_s) { | ||
23 | + if (env->vfp.fpcr & FPCR_AH) { | ||
24 | + negf = float_muladd_negate_product; | ||
25 | + } else { | ||
26 | + negx = 0x8000; | ||
27 | + } | ||
28 | + } | ||
29 | |||
30 | for (i = 0; i < oprsz; i += 16) { | ||
31 | float16 mm_16 = *(float16 *)(vm + i + idx); | ||
32 | float32 mm = float16_to_float32_by_bits(mm_16, fz16); | ||
33 | |||
34 | for (j = 0; j < 16; j += sizeof(float32)) { | ||
35 | - float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negn; | ||
36 | + float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negx; | ||
37 | float32 nn = float16_to_float32_by_bits(nn_16, fz16); | ||
38 | float32 aa = *(float32 *)(va + H1_4(i + j)); | ||
39 | |||
40 | *(float32 *)(vd + H1_4(i + j)) = | ||
41 | - float32_muladd(nn, mm, aa, 0, status); | ||
42 | + float32_muladd(nn, mm, aa, negf, status); | ||
43 | } | ||
44 | } | ||
45 | } | ||
46 | -- | ||
47 | 2.43.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
2 | --- | ||
3 | target/arm/tcg/vec_helper.c | 15 ++++++++++++--- | ||
4 | 1 file changed, 12 insertions(+), 3 deletions(-) | ||
1 | 5 | ||
6 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
7 | index XXXXXXX..XXXXXXX 100644 | ||
8 | --- a/target/arm/tcg/vec_helper.c | ||
9 | +++ b/target/arm/tcg/vec_helper.c | ||
10 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
11 | CPUARMState *env, uint32_t desc) | ||
12 | { | ||
13 | intptr_t i, oprsz = simd_oprsz(desc); | ||
14 | - uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; | ||
15 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
16 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
17 | float_status *status = &env->vfp.fp_status[FPST_A64]; | ||
18 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
19 | + int negx = 0, negf = 0; | ||
20 | + | ||
21 | + if (is_s) { | ||
22 | + if (env->vfp.fpcr & FPCR_AH) { | ||
23 | + negf = float_muladd_negate_product; | ||
24 | + } else { | ||
25 | + negx = 0x8000; | ||
26 | + } | ||
27 | + } | ||
28 | |||
29 | for (i = 0; i < oprsz; i += sizeof(float32)) { | ||
30 | - float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negn; | ||
31 | + float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negx; | ||
32 | float16 mm_16 = *(float16 *)(vm + H1_2(i + sel)); | ||
33 | float32 nn = float16_to_float32_by_bits(nn_16, fz16); | ||
34 | float32 mm = float16_to_float32_by_bits(mm_16, fz16); | ||
35 | float32 aa = *(float32 *)(va + H1_4(i)); | ||
36 | |||
37 | - *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, 0, status); | ||
38 | + *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, negf, status); | ||
39 | } | ||
40 | } | ||
41 | |||
42 | -- | ||
43 | 2.43.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Read the bit from the source, rather than from the proxy via | ||
2 | get_flush_inputs_to_zero. This makes it clear that it does | ||
3 | not matter which of the float_status structures is used. | ||
1 | 4 | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/vec_helper.c | 12 ++++++------ | ||
8 | 1 file changed, 6 insertions(+), 6 deletions(-) | ||
9 | |||
10 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/target/arm/tcg/vec_helper.c | ||
13 | +++ b/target/arm/tcg/vec_helper.c | ||
14 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
15 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
16 | |||
17 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
18 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); | ||
19 | + env->vfp.fpcr & FPCR_FZ16); | ||
20 | } | ||
21 | |||
22 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
23 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
24 | } | ||
25 | } | ||
26 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
27 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
28 | + env->vfp.fpcr & FPCR_FZ16); | ||
29 | } | ||
30 | |||
31 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
32 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
33 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
34 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
35 | float_status *status = &env->vfp.fp_status[FPST_A64]; | ||
36 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
37 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; | ||
38 | int negx = 0, negf = 0; | ||
39 | |||
40 | if (is_s) { | ||
41 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | ||
42 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
43 | |||
44 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
45 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); | ||
46 | + env->vfp.fpcr & FPCR_FZ16); | ||
47 | } | ||
48 | |||
49 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
50 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
51 | } | ||
52 | } | ||
53 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
54 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
55 | + env->vfp.fpcr & FPCR_FZ16); | ||
56 | } | ||
57 | |||
58 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
59 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
60 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
61 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | ||
62 | float_status *status = &env->vfp.fp_status[FPST_A64]; | ||
63 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
64 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; | ||
65 | int negx = 0, negf = 0; | ||
66 | |||
67 | if (is_s) { | ||
68 | -- | ||
69 | 2.43.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Sink common code from the callers into do_fmlal | ||
2 | and do_fmlal_idx. Reorder the arguments to minimize | ||
3 | the re-sorting from the caller's arguments. | ||
1 | 4 | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/vec_helper.c | 28 ++++++++++++++++------------ | ||
8 | 1 file changed, 16 insertions(+), 12 deletions(-) | ||
9 | |||
10 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/target/arm/tcg/vec_helper.c | ||
13 | +++ b/target/arm/tcg/vec_helper.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2) | ||
15 | * as there is not yet SVE versions that might use blocking. | ||
16 | */ | ||
17 | |||
18 | -static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, | ||
19 | - uint64_t negx, int negf, uint32_t desc, bool fz16) | ||
20 | +static void do_fmlal(float32 *d, void *vn, void *vm, | ||
21 | + CPUARMState *env, uint32_t desc, | ||
22 | + ARMFPStatusFlavour fpst_idx, | ||
23 | + uint64_t negx, int negf) | ||
24 | { | ||
25 | + float_status *fpst = &env->vfp.fp_status[fpst_idx]; | ||
26 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; | ||
27 | intptr_t i, oprsz = simd_oprsz(desc); | ||
28 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
29 | int is_q = oprsz == 16; | ||
30 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
31 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
32 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
33 | |||
34 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
35 | - env->vfp.fpcr & FPCR_FZ16); | ||
36 | + do_fmlal(vd, vn, vm, env, desc, FPST_STD, negx, 0); | ||
37 | } | ||
38 | |||
39 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
40 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
41 | negx = 0x8000800080008000ull; | ||
42 | } | ||
43 | } | ||
44 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
45 | - env->vfp.fpcr & FPCR_FZ16); | ||
46 | + do_fmlal(vd, vn, vm, env, desc, FPST_A64, negx, negf); | ||
47 | } | ||
48 | |||
49 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
50 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
51 | } | ||
52 | } | ||
53 | |||
54 | -static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, | ||
55 | - uint64_t negx, int negf, uint32_t desc, bool fz16) | ||
56 | +static void do_fmlal_idx(float32 *d, void *vn, void *vm, | ||
57 | + CPUARMState *env, uint32_t desc, | ||
58 | + ARMFPStatusFlavour fpst_idx, | ||
59 | + uint64_t negx, int negf) | ||
60 | { | ||
61 | + float_status *fpst = &env->vfp.fp_status[fpst_idx]; | ||
62 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; | ||
63 | intptr_t i, oprsz = simd_oprsz(desc); | ||
64 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
65 | int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3); | ||
66 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | ||
67 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
68 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
69 | |||
70 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
71 | - env->vfp.fpcr & FPCR_FZ16); | ||
72 | + do_fmlal_idx(vd, vn, vm, env, desc, FPST_STD, negx, 0); | ||
73 | } | ||
74 | |||
75 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
76 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
77 | negx = 0x8000800080008000ull; | ||
78 | } | ||
79 | } | ||
80 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
81 | - env->vfp.fpcr & FPCR_FZ16); | ||
82 | + do_fmlal_idx(vd, vn, vm, env, desc, FPST_A64, negx, negf); | ||
83 | } | ||
84 | |||
85 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
86 | -- | ||
87 | 2.43.0 | diff view generated by jsdifflib |