1 | Hi; here's the first arm pullreq for the 8.2 cycle. These are | 1 | Hi; this pullreq contains only my FEAT_AFP/FEAT_RPRES patches |
---|---|---|---|
2 | pretty much all bug fixes (mostly for the experimental FEAT_RME), | 2 | (plus a fix for a target/alpha latent bug that would otherwise |
3 | rather than any major features. | 3 | be revealed by the fpu changes), because 68 patches is already |
4 | longer than I prefer to send in at one time... | ||
4 | 5 | ||
6 | thanks | ||
5 | -- PMM | 7 | -- PMM |
6 | 8 | ||
7 | The following changes since commit b0dd9a7d6dd15a6898e9c585b521e6bec79b25aa: | 9 | The following changes since commit ffaf7f0376f8040ce9068d71ae9ae8722505c42e: |
8 | 10 | ||
9 | Open 8.2 development tree (2023-08-22 07:14:07 -0700) | 11 | Merge tag 'pull-10.0-testing-and-gdstub-updates-100225-1' of https://gitlab.com/stsquad/qemu into staging (2025-02-10 13:26:17 -0500) |
10 | 12 | ||
11 | are available in the Git repository at: | 13 | are available in the Git repository at: |
12 | 14 | ||
13 | https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20230824 | 15 | https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20250211 |
14 | 16 | ||
15 | for you to fetch changes up to cd1e4db73646006039f25879af3bff55b2295ff3: | 17 | for you to fetch changes up to ca4c34e07d1388df8e396520b5e7d60883cd3690: |
16 | 18 | ||
17 | target/arm: Fix 64-bit SSRA (2023-08-22 17:31:14 +0100) | 19 | target/arm: Sink fp_status and fpcr access into do_fmlal* (2025-02-11 16:22:08 +0000) |
18 | 20 | ||
19 | ---------------------------------------------------------------- | 21 | ---------------------------------------------------------------- |
20 | target-arm queue: | 22 | target-arm queue: |
21 | * hw/gpio/nrf51: implement DETECT signal | 23 | * target/alpha: Don't corrupt error_code with unknown softfloat flags |
22 | * accel/kvm: Specify default IPA size for arm64 | 24 | * target/arm: Implement FEAT_AFP and FEAT_RPRES |
23 | * ptw: refactor, fix some FEAT_RME bugs | ||
24 | * target/arm: Adjust PAR_EL1.SH for Device and Normal-NC memory types | ||
25 | * target/arm/helper: Implement CNTHCTL_EL2.CNT[VP]MASK | ||
26 | * Fix SME ST1Q | ||
27 | * Fix 64-bit SSRA | ||
28 | 25 | ||
29 | ---------------------------------------------------------------- | 26 | ---------------------------------------------------------------- |
30 | Akihiko Odaki (6): | 27 | Peter Maydell (49): |
31 | kvm: Introduce kvm_arch_get_default_type hook | 28 | target/alpha: Don't corrupt error_code with unknown softfloat flags |
32 | accel/kvm: Specify default IPA size for arm64 | 29 | fpu: Add float_class_denormal |
33 | mips: Report an error when KVM_VM_MIPS_VZ is unavailable | 30 | fpu: Implement float_flag_input_denormal_used |
34 | accel/kvm: Use negative KVM type for error propagation | 31 | fpu: allow flushing of output denormals to be after rounding |
35 | accel/kvm: Free as when an error occurred | 32 | target/arm: Define FPCR AH, FIZ, NEP bits |
36 | accel/kvm: Make kvm_dirty_ring_reaper_init() void | 33 | target/arm: Implement FPCR.FIZ handling |
34 | target/arm: Adjust FP behaviour for FPCR.AH = 1 | ||
35 | target/arm: Adjust exception flag handling for AH = 1 | ||
36 | target/arm: Add FPCR.AH to tbflags | ||
37 | target/arm: Set up float_status to use for FPCR.AH=1 behaviour | ||
38 | target/arm: Use FPST_FPCR_AH for FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS | ||
39 | target/arm: Use FPST_FPCR_AH for BFCVT* insns | ||
40 | target/arm: Use FPST_FPCR_AH for BFMLAL*, BFMLSL* insns | ||
41 | target/arm: Add FPCR.NEP to TBFLAGS | ||
42 | target/arm: Define and use new write_fp_*reg_merging() functions | ||
43 | target/arm: Handle FPCR.NEP for 3-input scalar operations | ||
44 | target/arm: Handle FPCR.NEP for BFCVT scalar | ||
45 | target/arm: Handle FPCR.NEP for 1-input scalar operations | ||
46 | target/arm: Handle FPCR.NEP in do_cvtf_scalar() | ||
47 | target/arm: Handle FPCR.NEP for scalar FABS and FNEG | ||
48 | target/arm: Handle FPCR.NEP for FCVTXN (scalar) | ||
49 | target/arm: Handle FPCR.NEP for NEP for FMUL, FMULX scalar by element | ||
50 | target/arm: Implement FPCR.AH semantics for scalar FMIN/FMAX | ||
51 | target/arm: Implement FPCR.AH semantics for vector FMIN/FMAX | ||
52 | target/arm: Implement FPCR.AH semantics for FMAXV and FMINV | ||
53 | target/arm: Implement FPCR.AH semantics for FMINP and FMAXP | ||
54 | target/arm: Implement FPCR.AH semantics for SVE FMAXV and FMINV | ||
55 | target/arm: Implement FPCR.AH semantics for SVE FMIN/FMAX immediate | ||
56 | target/arm: Implement FPCR.AH semantics for SVE FMIN/FMAX vector | ||
57 | target/arm: Implement FPCR.AH handling of negation of NaN | ||
58 | target/arm: Implement FPCR.AH handling for scalar FABS and FABD | ||
59 | target/arm: Handle FPCR.AH in vector FABD | ||
60 | target/arm: Handle FPCR.AH in SVE FNEG | ||
61 | target/arm: Handle FPCR.AH in SVE FABS | ||
62 | target/arm: Handle FPCR.AH in SVE FABD | ||
63 | target/arm: Handle FPCR.AH in negation steps in SVE FCADD | ||
64 | target/arm: Handle FPCR.AH in negation steps in FCADD | ||
65 | target/arm: Handle FPCR.AH in FRECPS and FRSQRTS scalar insns | ||
66 | target/arm: Handle FPCR.AH in FRECPS and FRSQRTS vector insns | ||
67 | target/arm: Handle FPCR.AH in negation step in FMLS (indexed) | ||
68 | target/arm: Handle FPCR.AH in negation in FMLS (vector) | ||
69 | target/arm: Handle FPCR.AH in negation step in SVE FMLS (vector) | ||
70 | target/arm: Handle FPCR.AH in SVE FTSSEL | ||
71 | target/arm: Handle FPCR.AH in SVE FTMAD | ||
72 | target/arm: Enable FEAT_AFP for '-cpu max' | ||
73 | target/arm: Plumb FEAT_RPRES frecpe and frsqrte through to new helper | ||
74 | target/arm: Implement increased precision FRECPE | ||
75 | target/arm: Implement increased precision FRSQRTE | ||
76 | target/arm: Enable FEAT_RPRES for -cpu max | ||
37 | 77 | ||
38 | Chris Laplante (6): | 78 | Richard Henderson (19): |
39 | hw/gpio/nrf51: implement DETECT signal | 79 | target/arm: Handle FPCR.AH in vector FCMLA |
40 | qtest: factor out qtest_install_gpio_out_intercept | 80 | target/arm: Handle FPCR.AH in FCMLA by index |
41 | qtest: implement named interception of out-GPIO | 81 | target/arm: Handle FPCR.AH in SVE FCMLA |
42 | qtest: bail from irq_intercept_in if name is specified | 82 | target/arm: Handle FPCR.AH in FMLSL (by element and vector) |
43 | qtest: irq_intercept_[out/in]: return FAIL if no intercepts are installed | 83 | target/arm: Handle FPCR.AH in SVE FMLSL (indexed) |
44 | qtest: microbit-test: add tests for nRF51 DETECT | 84 | target/arm: Handle FPCR.AH in SVE FMLSLB, FMLSLT (vectors) |
85 | target/arm: Introduce CPUARMState.vfp.fp_status[] | ||
86 | target/arm: Remove standard_fp_status_f16 | ||
87 | target/arm: Remove standard_fp_status | ||
88 | target/arm: Remove ah_fp_status_f16 | ||
89 | target/arm: Remove ah_fp_status | ||
90 | target/arm: Remove fp_status_f16_a64 | ||
91 | target/arm: Remove fp_status_f16_a32 | ||
92 | target/arm: Remove fp_status_a64 | ||
93 | target/arm: Remove fp_status_a32 | ||
94 | target/arm: Simplify fp_status indexing in mve_helper.c | ||
95 | target/arm: Simplify DO_VFP_cmp in vfp_helper.c | ||
96 | target/arm: Read fz16 from env->vfp.fpcr | ||
97 | target/arm: Sink fp_status and fpcr access into do_fmlal* | ||
45 | 98 | ||
46 | Jean-Philippe Brucker (6): | 99 | docs/system/arm/emulation.rst | 2 + |
47 | target/arm/ptw: Load stage-2 tables from realm physical space | 100 | include/fpu/softfloat-helpers.h | 11 + |
48 | target/arm/helper: Fix tlbmask and tlbbits for TLBI VAE2* | 101 | include/fpu/softfloat-types.h | 25 ++ |
49 | target/arm: Skip granule protection checks for AT instructions | 102 | target/arm/cpu-features.h | 10 + |
50 | target/arm: Pass security space rather than flag for AT instructions | 103 | target/arm/cpu.h | 97 +++-- |
51 | target/arm/helper: Check SCR_EL3.{NSE, NS} encoding for AT instructions | 104 | target/arm/helper.h | 26 ++ |
52 | target/arm/helper: Implement CNTHCTL_EL2.CNT[VP]MASK | 105 | target/arm/internals.h | 6 + |
53 | 106 | target/arm/tcg/helper-a64.h | 13 + | |
54 | Peter Maydell (15): | 107 | target/arm/tcg/helper-sve.h | 120 ++++++ |
55 | target/arm/ptw: Don't set fi->s1ptw for UnsuppAtomicUpdate fault | 108 | target/arm/tcg/translate-a64.h | 13 + |
56 | target/arm/ptw: Don't report GPC faults on stage 1 ptw as stage2 faults | 109 | target/arm/tcg/translate.h | 54 +-- |
57 | target/arm/ptw: Set s1ns bit in fault info more consistently | 110 | target/arm/tcg/vec_internal.h | 35 ++ |
58 | target/arm/ptw: Pass ptw into get_phys_addr_pmsa*() and get_phys_addr_disabled() | 111 | target/mips/fpu_helper.h | 6 + |
59 | target/arm/ptw: Pass ARMSecurityState to regime_translation_disabled() | 112 | fpu/softfloat.c | 66 +++- |
60 | target/arm/ptw: Pass an ARMSecuritySpace to arm_hcr_el2_eff_secstate() | 113 | target/alpha/cpu.c | 7 + |
61 | target/arm: Pass an ARMSecuritySpace to arm_is_el2_enabled_secstate() | 114 | target/alpha/fpu_helper.c | 2 + |
62 | target/arm/ptw: Only fold in NSTable bit effects in Secure state | 115 | target/arm/cpu.c | 46 +-- |
63 | target/arm/ptw: Remove last uses of ptw->in_secure | 116 | target/arm/helper.c | 2 +- |
64 | target/arm/ptw: Remove S1Translate::in_secure | 117 | target/arm/tcg/cpu64.c | 2 + |
65 | target/arm/ptw: Drop S1Translate::out_secure | 118 | target/arm/tcg/helper-a64.c | 151 ++++---- |
66 | target/arm/ptw: Set attributes correctly for MMU disabled data accesses | 119 | target/arm/tcg/hflags.c | 13 + |
67 | target/arm/ptw: Check for block descriptors at invalid levels | 120 | target/arm/tcg/mve_helper.c | 44 +-- |
68 | target/arm/ptw: Report stage 2 fault level for stage 2 faults on stage 1 ptw | 121 | target/arm/tcg/sme_helper.c | 4 +- |
69 | target/arm: Adjust PAR_EL1.SH for Device and Normal-NC memory types | 122 | target/arm/tcg/sve_helper.c | 367 ++++++++++++++----- |
70 | 123 | target/arm/tcg/translate-a64.c | 782 ++++++++++++++++++++++++++++++++-------- | |
71 | Richard Henderson (2): | 124 | target/arm/tcg/translate-sve.c | 193 +++++++--- |
72 | target/arm: Fix SME ST1Q | 125 | target/arm/tcg/vec_helper.c | 387 ++++++++++++++------ |
73 | target/arm: Fix 64-bit SSRA | 126 | target/arm/vfp_helper.c | 374 +++++++++++++++---- |
74 | 127 | target/hppa/fpu_helper.c | 11 + | |
75 | include/hw/gpio/nrf51_gpio.h | 1 + | 128 | target/i386/tcg/fpu_helper.c | 8 + |
76 | include/sysemu/kvm.h | 2 + | 129 | target/mips/msa.c | 9 + |
77 | target/arm/cpu.h | 19 ++-- | 130 | target/ppc/cpu_init.c | 3 + |
78 | target/arm/internals.h | 25 ++--- | 131 | target/rx/cpu.c | 8 + |
79 | target/mips/kvm_mips.h | 9 -- | 132 | target/sh4/cpu.c | 8 + |
80 | tests/qtest/libqtest.h | 11 +++ | 133 | target/tricore/helper.c | 1 + |
81 | accel/kvm/kvm-all.c | 19 ++-- | 134 | tests/fp/fp-bench.c | 1 + |
82 | hw/arm/virt.c | 2 +- | 135 | fpu/softfloat-parts.c.inc | 127 +++++-- |
83 | hw/gpio/nrf51_gpio.c | 14 ++- | 136 | 37 files changed, 2325 insertions(+), 709 deletions(-) |
84 | hw/mips/loongson3_virt.c | 2 - | ||
85 | hw/ppc/spapr.c | 2 +- | ||
86 | softmmu/qtest.c | 52 +++++++--- | ||
87 | target/arm/cpu.c | 6 ++ | ||
88 | target/arm/helper.c | 207 ++++++++++++++++++++++++++++---------- | ||
89 | target/arm/kvm.c | 7 ++ | ||
90 | target/arm/ptw.c | 231 ++++++++++++++++++++++++++----------------- | ||
91 | target/arm/tcg/sme_helper.c | 2 +- | ||
92 | target/arm/tcg/translate.c | 2 +- | ||
93 | target/i386/kvm/kvm.c | 5 + | ||
94 | target/mips/kvm.c | 3 +- | ||
95 | target/ppc/kvm.c | 5 + | ||
96 | target/riscv/kvm.c | 5 + | ||
97 | target/s390x/kvm/kvm.c | 5 + | ||
98 | tests/qtest/libqtest.c | 6 ++ | ||
99 | tests/qtest/microbit-test.c | 44 +++++++++ | ||
100 | target/arm/trace-events | 7 +- | ||
101 | 26 files changed, 494 insertions(+), 199 deletions(-) | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | In do_cvttq() we set env->error_code with what is supposed to be a | ||
2 | set of FPCR exception bit values. However, if the set of float | ||
3 | exception flags we get back from softfloat for the conversion | ||
4 | includes a flag which is not one of the three we expect here | ||
5 | (invalid_cvti, invalid, inexact) then we will fall through the | ||
6 | if-ladder and set env->error_code to the unconverted softfloat | ||
7 | exception_flag value. This will then cause us to take a spurious | ||
8 | exception. | ||
1 | 9 | ||
10 | This is harmless now, but when we add new floating point exception | ||
11 | flags to softfloat it will cause problems. Add an else clause to the | ||
12 | if-ladder to make it ignore any float exception flags it doesn't care | ||
13 | about. | ||
14 | |||
15 | Specifically, without this fix, 'make check-tcg' will fail for Alpha | ||
16 | when the commit adding float_flag_input_denormal_used lands. | ||
17 | |||
18 | |||
19 | Fixes: aa3bad5b59e7 ("target/alpha: Use float64_to_int64_modulo for CVTTQ") | ||
20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
21 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
22 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
23 | --- | ||
24 | target/alpha/fpu_helper.c | 2 ++ | ||
25 | 1 file changed, 2 insertions(+) | ||
26 | |||
27 | diff --git a/target/alpha/fpu_helper.c b/target/alpha/fpu_helper.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/alpha/fpu_helper.c | ||
30 | +++ b/target/alpha/fpu_helper.c | ||
31 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_cvttq(CPUAlphaState *env, uint64_t a, int roundmode) | ||
32 | exc = FPCR_INV; | ||
33 | } else if (exc & float_flag_inexact) { | ||
34 | exc = FPCR_INE; | ||
35 | + } else { | ||
36 | + exc = 0; | ||
37 | } | ||
38 | } | ||
39 | env->error_code = exc; | ||
40 | -- | ||
41 | 2.34.1 | ||
42 | |||
43 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Currently in softfloat we canonicalize input denormals and so the | ||
2 | code that implements floating point operations does not need to care | ||
3 | whether the input value was originally normal or denormal. However, | ||
4 | both x86 and Arm FEAT_AFP require that an exception flag is set if: | ||
5 | * an input is denormal | ||
6 | * that input is not squashed to zero | ||
7 | * that input is actually used in the calculation (e.g. we | ||
8 | did not find the other input was a NaN) | ||
1 | 9 | ||
10 | So we need to track that the input was a non-squashed denormal. To | ||
11 | do this we add a new value to the FloatClass enum. In this commit we | ||
12 | add the value and adjust the code everywhere that looks at FloatClass | ||
13 | values so that the new float_class_denormal behaves identically to | ||
14 | float_class_normal. We will add the code that does the "raise a new | ||
15 | float exception flag if an input was an unsquashed denormal and we | ||
16 | used it" in a subsequent commit. | ||
17 | |||
18 | There should be no behavioural change in this commit. | ||
19 | |||
20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
21 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
22 | --- | ||
23 | fpu/softfloat.c | 32 ++++++++++++++++++++++++++++--- | ||
24 | fpu/softfloat-parts.c.inc | 40 ++++++++++++++++++++++++--------------- | ||
25 | 2 files changed, 54 insertions(+), 18 deletions(-) | ||
26 | |||
27 | diff --git a/fpu/softfloat.c b/fpu/softfloat.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/fpu/softfloat.c | ||
30 | +++ b/fpu/softfloat.c | ||
31 | @@ -XXX,XX +XXX,XX @@ float64_gen2(float64 xa, float64 xb, float_status *s, | ||
32 | /* | ||
33 | * Classify a floating point number. Everything above float_class_qnan | ||
34 | * is a NaN so cls >= float_class_qnan is any NaN. | ||
35 | + * | ||
36 | + * Note that we canonicalize denormals, so most code should treat | ||
37 | + * class_normal and class_denormal identically. | ||
38 | */ | ||
39 | |||
40 | typedef enum __attribute__ ((__packed__)) { | ||
41 | float_class_unclassified, | ||
42 | float_class_zero, | ||
43 | float_class_normal, | ||
44 | + float_class_denormal, /* input was a non-squashed denormal */ | ||
45 | float_class_inf, | ||
46 | float_class_qnan, /* all NaNs from here */ | ||
47 | float_class_snan, | ||
48 | @@ -XXX,XX +XXX,XX @@ typedef enum __attribute__ ((__packed__)) { | ||
49 | enum { | ||
50 | float_cmask_zero = float_cmask(float_class_zero), | ||
51 | float_cmask_normal = float_cmask(float_class_normal), | ||
52 | + float_cmask_denormal = float_cmask(float_class_denormal), | ||
53 | float_cmask_inf = float_cmask(float_class_inf), | ||
54 | float_cmask_qnan = float_cmask(float_class_qnan), | ||
55 | float_cmask_snan = float_cmask(float_class_snan), | ||
56 | |||
57 | float_cmask_infzero = float_cmask_zero | float_cmask_inf, | ||
58 | float_cmask_anynan = float_cmask_qnan | float_cmask_snan, | ||
59 | + float_cmask_anynorm = float_cmask_normal | float_cmask_denormal, | ||
60 | }; | ||
61 | |||
62 | /* Flags for parts_minmax. */ | ||
63 | @@ -XXX,XX +XXX,XX @@ static inline __attribute__((unused)) bool is_qnan(FloatClass c) | ||
64 | return c == float_class_qnan; | ||
65 | } | ||
66 | |||
67 | +/* | ||
68 | + * Return true if the float_cmask has only normals in it | ||
69 | + * (including input denormals that were canonicalized) | ||
70 | + */ | ||
71 | +static inline bool cmask_is_only_normals(int cmask) | ||
72 | +{ | ||
73 | + return !(cmask & ~float_cmask_anynorm); | ||
74 | +} | ||
75 | + | ||
76 | +static inline bool is_anynorm(FloatClass c) | ||
77 | +{ | ||
78 | + return float_cmask(c) & float_cmask_anynorm; | ||
79 | +} | ||
80 | + | ||
81 | /* | ||
82 | * Structure holding all of the decomposed parts of a float. | ||
83 | * The exponent is unbiased and the fraction is normalized. | ||
84 | @@ -XXX,XX +XXX,XX @@ static float64 float64r32_round_pack_canonical(FloatParts64 *p, | ||
85 | */ | ||
86 | switch (p->cls) { | ||
87 | case float_class_normal: | ||
88 | + case float_class_denormal: | ||
89 | if (unlikely(p->exp == 0)) { | ||
90 | /* | ||
91 | * The result is denormal for float32, but can be represented | ||
92 | @@ -XXX,XX +XXX,XX @@ static floatx80 floatx80_round_pack_canonical(FloatParts128 *p, | ||
93 | |||
94 | switch (p->cls) { | ||
95 | case float_class_normal: | ||
96 | + case float_class_denormal: | ||
97 | if (s->floatx80_rounding_precision == floatx80_precision_x) { | ||
98 | parts_uncanon_normal(p, s, fmt); | ||
99 | frac = p->frac_hi; | ||
100 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_ahp(FloatParts64 *a, float_status *s) | ||
101 | break; | ||
102 | |||
103 | case float_class_normal: | ||
104 | + case float_class_denormal: | ||
105 | case float_class_zero: | ||
106 | break; | ||
107 | |||
108 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b, | ||
109 | a->sign = b->sign; | ||
110 | a->exp = b->exp; | ||
111 | |||
112 | - if (a->cls == float_class_normal) { | ||
113 | + if (is_anynorm(a->cls)) { | ||
114 | frac_truncjam(a, b); | ||
115 | } else if (is_nan(a->cls)) { | ||
116 | /* Discard the low bits of the NaN. */ | ||
117 | @@ -XXX,XX +XXX,XX @@ static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode, | ||
118 | return int128_zero(); | ||
119 | |||
120 | case float_class_normal: | ||
121 | + case float_class_denormal: | ||
122 | if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) { | ||
123 | flags = float_flag_inexact; | ||
124 | } | ||
125 | @@ -XXX,XX +XXX,XX @@ static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode, | ||
126 | return int128_zero(); | ||
127 | |||
128 | case float_class_normal: | ||
129 | + case float_class_denormal: | ||
130 | if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) { | ||
131 | flags = float_flag_inexact; | ||
132 | if (p.cls == float_class_zero) { | ||
133 | @@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status) | ||
134 | float32_unpack_canonical(&xp, a, status); | ||
135 | if (unlikely(xp.cls != float_class_normal)) { | ||
136 | switch (xp.cls) { | ||
137 | + case float_class_denormal: | ||
138 | + break; | ||
139 | case float_class_snan: | ||
140 | case float_class_qnan: | ||
141 | parts_return_nan(&xp, status); | ||
142 | @@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status) | ||
143 | case float_class_zero: | ||
144 | return float32_one; | ||
145 | default: | ||
146 | - break; | ||
147 | + g_assert_not_reached(); | ||
148 | } | ||
149 | - g_assert_not_reached(); | ||
150 | } | ||
151 | |||
152 | float_raise(float_flag_inexact, status); | ||
153 | diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc | ||
154 | index XXXXXXX..XXXXXXX 100644 | ||
155 | --- a/fpu/softfloat-parts.c.inc | ||
156 | +++ b/fpu/softfloat-parts.c.inc | ||
157 | @@ -XXX,XX +XXX,XX @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status, | ||
158 | frac_clear(p); | ||
159 | } else { | ||
160 | int shift = frac_normalize(p); | ||
161 | - p->cls = float_class_normal; | ||
162 | + p->cls = float_class_denormal; | ||
163 | p->exp = fmt->frac_shift - fmt->exp_bias | ||
164 | - shift + !fmt->m68k_denormal; | ||
165 | } | ||
166 | @@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, | ||
167 | static void partsN(uncanon)(FloatPartsN *p, float_status *s, | ||
168 | const FloatFmt *fmt) | ||
169 | { | ||
170 | - if (likely(p->cls == float_class_normal)) { | ||
171 | + if (likely(is_anynorm(p->cls))) { | ||
172 | parts_uncanon_normal(p, s, fmt); | ||
173 | } else { | ||
174 | switch (p->cls) { | ||
175 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | ||
176 | |||
177 | if (a->sign != b_sign) { | ||
178 | /* Subtraction */ | ||
179 | - if (likely(ab_mask == float_cmask_normal)) { | ||
180 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
181 | if (parts_sub_normal(a, b)) { | ||
182 | return a; | ||
183 | } | ||
184 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | ||
185 | } | ||
186 | } else { | ||
187 | /* Addition */ | ||
188 | - if (likely(ab_mask == float_cmask_normal)) { | ||
189 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
190 | parts_add_normal(a, b); | ||
191 | return a; | ||
192 | } | ||
193 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | ||
194 | } | ||
195 | |||
196 | if (b->cls == float_class_zero) { | ||
197 | - g_assert(a->cls == float_class_normal); | ||
198 | + g_assert(is_anynorm(a->cls)); | ||
199 | return a; | ||
200 | } | ||
201 | |||
202 | g_assert(a->cls == float_class_zero); | ||
203 | - g_assert(b->cls == float_class_normal); | ||
204 | + g_assert(is_anynorm(b->cls)); | ||
205 | return_b: | ||
206 | b->sign = b_sign; | ||
207 | return b; | ||
208 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, | ||
209 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
210 | bool sign = a->sign ^ b->sign; | ||
211 | |||
212 | - if (likely(ab_mask == float_cmask_normal)) { | ||
213 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
214 | FloatPartsW tmp; | ||
215 | |||
216 | frac_mulw(&tmp, a, b); | ||
217 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, | ||
218 | a->sign ^= 1; | ||
219 | } | ||
220 | |||
221 | - if (unlikely(ab_mask != float_cmask_normal)) { | ||
222 | + if (unlikely(!cmask_is_only_normals(ab_mask))) { | ||
223 | if (unlikely(ab_mask == float_cmask_infzero)) { | ||
224 | float_raise(float_flag_invalid | float_flag_invalid_imz, s); | ||
225 | goto d_nan; | ||
226 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, | ||
227 | } | ||
228 | |||
229 | g_assert(ab_mask & float_cmask_zero); | ||
230 | - if (c->cls == float_class_normal) { | ||
231 | + if (is_anynorm(c->cls)) { | ||
232 | *a = *c; | ||
233 | goto return_normal; | ||
234 | } | ||
235 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, | ||
236 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
237 | bool sign = a->sign ^ b->sign; | ||
238 | |||
239 | - if (likely(ab_mask == float_cmask_normal)) { | ||
240 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
241 | a->sign = sign; | ||
242 | a->exp -= b->exp + frac_div(a, b); | ||
243 | return a; | ||
244 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, | ||
245 | { | ||
246 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
247 | |||
248 | - if (likely(ab_mask == float_cmask_normal)) { | ||
249 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
250 | frac_modrem(a, b, mod_quot); | ||
251 | return a; | ||
252 | } | ||
253 | @@ -XXX,XX +XXX,XX @@ static void partsN(sqrt)(FloatPartsN *a, float_status *status, | ||
254 | |||
255 | if (unlikely(a->cls != float_class_normal)) { | ||
256 | switch (a->cls) { | ||
257 | + case float_class_denormal: | ||
258 | + break; | ||
259 | case float_class_snan: | ||
260 | case float_class_qnan: | ||
261 | parts_return_nan(a, status); | ||
262 | @@ -XXX,XX +XXX,XX @@ static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode, | ||
263 | case float_class_inf: | ||
264 | break; | ||
265 | case float_class_normal: | ||
266 | + case float_class_denormal: | ||
267 | if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) { | ||
268 | float_raise(float_flag_inexact, s); | ||
269 | } | ||
270 | @@ -XXX,XX +XXX,XX @@ static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode, | ||
271 | return 0; | ||
272 | |||
273 | case float_class_normal: | ||
274 | + case float_class_denormal: | ||
275 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | ||
276 | if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { | ||
277 | flags = float_flag_inexact; | ||
278 | @@ -XXX,XX +XXX,XX @@ static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode, | ||
279 | return 0; | ||
280 | |||
281 | case float_class_normal: | ||
282 | + case float_class_denormal: | ||
283 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | ||
284 | if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { | ||
285 | flags = float_flag_inexact; | ||
286 | @@ -XXX,XX +XXX,XX @@ static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p, | ||
287 | return 0; | ||
288 | |||
289 | case float_class_normal: | ||
290 | + case float_class_denormal: | ||
291 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | ||
292 | if (parts_round_to_int_normal(p, rmode, 0, N - 2)) { | ||
293 | flags = float_flag_inexact; | ||
294 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
295 | a_exp = a->exp; | ||
296 | b_exp = b->exp; | ||
297 | |||
298 | - if (unlikely(ab_mask != float_cmask_normal)) { | ||
299 | + if (unlikely(!cmask_is_only_normals(ab_mask))) { | ||
300 | switch (a->cls) { | ||
301 | case float_class_normal: | ||
302 | + case float_class_denormal: | ||
303 | break; | ||
304 | case float_class_inf: | ||
305 | a_exp = INT16_MAX; | ||
306 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
307 | } | ||
308 | switch (b->cls) { | ||
309 | case float_class_normal: | ||
310 | + case float_class_denormal: | ||
311 | break; | ||
312 | case float_class_inf: | ||
313 | b_exp = INT16_MAX; | ||
314 | @@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, | ||
315 | { | ||
316 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
317 | |||
318 | - if (likely(ab_mask == float_cmask_normal)) { | ||
319 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
320 | FloatRelation cmp; | ||
321 | |||
322 | if (a->sign != b->sign) { | ||
323 | @@ -XXX,XX +XXX,XX @@ static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) | ||
324 | case float_class_inf: | ||
325 | break; | ||
326 | case float_class_normal: | ||
327 | + case float_class_denormal: | ||
328 | a->exp += MIN(MAX(n, -0x10000), 0x10000); | ||
329 | break; | ||
330 | default: | ||
331 | @@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) | ||
332 | |||
333 | if (unlikely(a->cls != float_class_normal)) { | ||
334 | switch (a->cls) { | ||
335 | + case float_class_denormal: | ||
336 | + break; | ||
337 | case float_class_snan: | ||
338 | case float_class_qnan: | ||
339 | parts_return_nan(a, s); | ||
340 | @@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) | ||
341 | } | ||
342 | return; | ||
343 | default: | ||
344 | - break; | ||
345 | + g_assert_not_reached(); | ||
346 | } | ||
347 | - g_assert_not_reached(); | ||
348 | } | ||
349 | if (unlikely(a->sign)) { | ||
350 | goto d_nan; | ||
351 | -- | ||
352 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Chris Laplante <chris@laplante.io> | 1 | For the x86 and the Arm FEAT_AFP semantics, we need to be able to |
---|---|---|---|
2 | tell the target code that the FPU operation has used an input | ||
3 | denormal. Implement this; when it happens we set the new | ||
4 | float_flag_denormal_input_used. | ||
2 | 5 | ||
3 | Named interception of in-GPIOs is not supported yet. | 6 | Note that we only set this when an input denormal is actually used by |
7 | the operation: if the operation results in Invalid Operation or | ||
8 | Divide By Zero or the result is a NaN because some other input was a | ||
9 | NaN then we never needed to look at the input denormal and do not set | ||
10 | denormal_input_used. | ||
4 | 11 | ||
5 | Signed-off-by: Chris Laplante <chris@laplante.io> | 12 | We mostly do not need to adjust the hardfloat codepaths to deal with |
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 13 | this flag, because almost all hardfloat operations are already gated |
7 | Message-id: 20230728160324.1159090-5-chris@laplante.io | 14 | on the input not being a denormal, and will fall back to softfloat |
15 | for a denormal input. The only exception is the comparison | ||
16 | operations, where we need to add the check for input denormals, which | ||
17 | must now fall back to softfloat where they did not before. | ||
18 | |||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 19 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
20 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | --- | 21 | --- |
10 | softmmu/qtest.c | 8 ++++++++ | 22 | include/fpu/softfloat-types.h | 7 ++++ |
11 | 1 file changed, 8 insertions(+) | 23 | fpu/softfloat.c | 38 +++++++++++++++++--- |
24 | fpu/softfloat-parts.c.inc | 68 ++++++++++++++++++++++++++++++++++- | ||
25 | 3 files changed, 107 insertions(+), 6 deletions(-) | ||
12 | 26 | ||
13 | diff --git a/softmmu/qtest.c b/softmmu/qtest.c | 27 | diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h |
14 | index XXXXXXX..XXXXXXX 100644 | 28 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/softmmu/qtest.c | 29 | --- a/include/fpu/softfloat-types.h |
16 | +++ b/softmmu/qtest.c | 30 | +++ b/include/fpu/softfloat-types.h |
17 | @@ -XXX,XX +XXX,XX @@ static void qtest_process_command(CharBackend *chr, gchar **words) | 31 | @@ -XXX,XX +XXX,XX @@ enum { |
18 | || strcmp(words[0], "irq_intercept_in") == 0) { | 32 | float_flag_invalid_sqrt = 0x0800, /* sqrt(-x) */ |
19 | DeviceState *dev; | 33 | float_flag_invalid_cvti = 0x1000, /* non-nan to integer */ |
20 | NamedGPIOList *ngl; | 34 | float_flag_invalid_snan = 0x2000, /* any operand was snan */ |
21 | + bool is_named; | 35 | + /* |
22 | bool is_outbound; | 36 | + * An input was denormal and we used it (without flushing it to zero). |
23 | 37 | + * Not set if we do not actually use the denormal input (e.g. | |
24 | g_assert(words[1]); | 38 | + * because some other input was a NaN, or because the operation |
25 | + is_named = words[2] != NULL; | 39 | + * wasn't actually carried out (divide-by-zero; invalid)) |
26 | is_outbound = words[0][14] == 'o'; | 40 | + */ |
27 | dev = DEVICE(object_resolve_path(words[1], NULL)); | 41 | + float_flag_input_denormal_used = 0x4000, |
28 | if (!dev) { | 42 | }; |
29 | @@ -XXX,XX +XXX,XX @@ static void qtest_process_command(CharBackend *chr, gchar **words) | 43 | |
30 | return; | 44 | /* |
45 | diff --git a/fpu/softfloat.c b/fpu/softfloat.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/fpu/softfloat.c | ||
48 | +++ b/fpu/softfloat.c | ||
49 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_ahp(FloatParts64 *a, float_status *s) | ||
50 | float16_params_ahp.frac_size + 1); | ||
51 | break; | ||
52 | |||
53 | - case float_class_normal: | ||
54 | case float_class_denormal: | ||
55 | + float_raise(float_flag_input_denormal_used, s); | ||
56 | + break; | ||
57 | + case float_class_normal: | ||
58 | case float_class_zero: | ||
59 | break; | ||
60 | |||
61 | @@ -XXX,XX +XXX,XX @@ static void parts64_float_to_float(FloatParts64 *a, float_status *s) | ||
62 | if (is_nan(a->cls)) { | ||
63 | parts_return_nan(a, s); | ||
64 | } | ||
65 | + if (a->cls == float_class_denormal) { | ||
66 | + float_raise(float_flag_input_denormal_used, s); | ||
67 | + } | ||
68 | } | ||
69 | |||
70 | static void parts128_float_to_float(FloatParts128 *a, float_status *s) | ||
71 | @@ -XXX,XX +XXX,XX @@ static void parts128_float_to_float(FloatParts128 *a, float_status *s) | ||
72 | if (is_nan(a->cls)) { | ||
73 | parts_return_nan(a, s); | ||
74 | } | ||
75 | + if (a->cls == float_class_denormal) { | ||
76 | + float_raise(float_flag_input_denormal_used, s); | ||
77 | + } | ||
78 | } | ||
79 | |||
80 | #define parts_float_to_float(P, S) \ | ||
81 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b, | ||
82 | a->sign = b->sign; | ||
83 | a->exp = b->exp; | ||
84 | |||
85 | - if (is_anynorm(a->cls)) { | ||
86 | + switch (a->cls) { | ||
87 | + case float_class_denormal: | ||
88 | + float_raise(float_flag_input_denormal_used, s); | ||
89 | + /* fall through */ | ||
90 | + case float_class_normal: | ||
91 | frac_truncjam(a, b); | ||
92 | - } else if (is_nan(a->cls)) { | ||
93 | + break; | ||
94 | + case float_class_snan: | ||
95 | + case float_class_qnan: | ||
96 | /* Discard the low bits of the NaN. */ | ||
97 | a->frac = b->frac_hi; | ||
98 | parts_return_nan(a, s); | ||
99 | + break; | ||
100 | + default: | ||
101 | + break; | ||
102 | } | ||
103 | } | ||
104 | |||
105 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b, | ||
106 | if (is_nan(a->cls)) { | ||
107 | parts_return_nan(a, s); | ||
108 | } | ||
109 | + if (a->cls == float_class_denormal) { | ||
110 | + float_raise(float_flag_input_denormal_used, s); | ||
111 | + } | ||
112 | } | ||
113 | |||
114 | float32 float16_to_float32(float16 a, bool ieee, float_status *s) | ||
115 | @@ -XXX,XX +XXX,XX @@ float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet) | ||
116 | goto soft; | ||
117 | } | ||
118 | |||
119 | - float32_input_flush2(&ua.s, &ub.s, s); | ||
120 | + if (unlikely(float32_is_denormal(ua.s) || float32_is_denormal(ub.s))) { | ||
121 | + /* We may need to set the input_denormal_used flag */ | ||
122 | + goto soft; | ||
123 | + } | ||
124 | + | ||
125 | if (isgreaterequal(ua.h, ub.h)) { | ||
126 | if (isgreater(ua.h, ub.h)) { | ||
127 | return float_relation_greater; | ||
128 | @@ -XXX,XX +XXX,XX @@ float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet) | ||
129 | goto soft; | ||
130 | } | ||
131 | |||
132 | - float64_input_flush2(&ua.s, &ub.s, s); | ||
133 | + if (unlikely(float64_is_denormal(ua.s) || float64_is_denormal(ub.s))) { | ||
134 | + /* We may need to set the input_denormal_used flag */ | ||
135 | + goto soft; | ||
136 | + } | ||
137 | + | ||
138 | if (isgreaterequal(ua.h, ub.h)) { | ||
139 | if (isgreater(ua.h, ub.h)) { | ||
140 | return float_relation_greater; | ||
141 | diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc | ||
142 | index XXXXXXX..XXXXXXX 100644 | ||
143 | --- a/fpu/softfloat-parts.c.inc | ||
144 | +++ b/fpu/softfloat-parts.c.inc | ||
145 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | ||
146 | bool b_sign = b->sign ^ subtract; | ||
147 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
148 | |||
149 | + /* | ||
150 | + * For addition and subtraction, we will consume an | ||
151 | + * input denormal unless the other input is a NaN. | ||
152 | + */ | ||
153 | + if ((ab_mask & (float_cmask_denormal | float_cmask_anynan)) == | ||
154 | + float_cmask_denormal) { | ||
155 | + float_raise(float_flag_input_denormal_used, s); | ||
156 | + } | ||
157 | + | ||
158 | if (a->sign != b_sign) { | ||
159 | /* Subtraction */ | ||
160 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
161 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, | ||
162 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
163 | FloatPartsW tmp; | ||
164 | |||
165 | + if (ab_mask & float_cmask_denormal) { | ||
166 | + float_raise(float_flag_input_denormal_used, s); | ||
167 | + } | ||
168 | + | ||
169 | frac_mulw(&tmp, a, b); | ||
170 | frac_truncjam(a, &tmp); | ||
171 | |||
172 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, | ||
173 | } | ||
174 | |||
175 | /* Multiply by 0 or Inf */ | ||
176 | + if (ab_mask & float_cmask_denormal) { | ||
177 | + float_raise(float_flag_input_denormal_used, s); | ||
178 | + } | ||
179 | + | ||
180 | if (ab_mask & float_cmask_inf) { | ||
181 | a->cls = float_class_inf; | ||
182 | a->sign = sign; | ||
183 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, | ||
184 | if (flags & float_muladd_negate_result) { | ||
185 | a->sign ^= 1; | ||
186 | } | ||
187 | + | ||
188 | + /* | ||
189 | + * All result types except for "return the default NaN | ||
190 | + * because this is an Invalid Operation" go through here; | ||
191 | + * this matches the set of cases where we consumed a | ||
192 | + * denormal input. | ||
193 | + */ | ||
194 | + if (abc_mask & float_cmask_denormal) { | ||
195 | + float_raise(float_flag_input_denormal_used, s); | ||
196 | + } | ||
197 | return a; | ||
198 | |||
199 | return_sub_zero: | ||
200 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, | ||
201 | bool sign = a->sign ^ b->sign; | ||
202 | |||
203 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
204 | + if (ab_mask & float_cmask_denormal) { | ||
205 | + float_raise(float_flag_input_denormal_used, s); | ||
206 | + } | ||
207 | a->sign = sign; | ||
208 | a->exp -= b->exp + frac_div(a, b); | ||
209 | return a; | ||
210 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, | ||
211 | return parts_pick_nan(a, b, s); | ||
212 | } | ||
213 | |||
214 | + if ((ab_mask & float_cmask_denormal) && b->cls != float_class_zero) { | ||
215 | + float_raise(float_flag_input_denormal_used, s); | ||
216 | + } | ||
217 | + | ||
218 | a->sign = sign; | ||
219 | |||
220 | /* Inf / X */ | ||
221 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, | ||
222 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
223 | |||
224 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
225 | + if (ab_mask & float_cmask_denormal) { | ||
226 | + float_raise(float_flag_input_denormal_used, s); | ||
227 | + } | ||
228 | frac_modrem(a, b, mod_quot); | ||
229 | return a; | ||
230 | } | ||
231 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, | ||
232 | return a; | ||
233 | } | ||
234 | |||
235 | + if (ab_mask & float_cmask_denormal) { | ||
236 | + float_raise(float_flag_input_denormal_used, s); | ||
237 | + } | ||
238 | + | ||
239 | /* N % Inf; 0 % N */ | ||
240 | g_assert(b->cls == float_class_inf || a->cls == float_class_zero); | ||
241 | return a; | ||
242 | @@ -XXX,XX +XXX,XX @@ static void partsN(sqrt)(FloatPartsN *a, float_status *status, | ||
243 | if (unlikely(a->cls != float_class_normal)) { | ||
244 | switch (a->cls) { | ||
245 | case float_class_denormal: | ||
246 | + if (!a->sign) { | ||
247 | + /* -ve denormal will be InvalidOperation */ | ||
248 | + float_raise(float_flag_input_denormal_used, status); | ||
249 | + } | ||
250 | break; | ||
251 | case float_class_snan: | ||
252 | case float_class_qnan: | ||
253 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
254 | if ((flags & (minmax_isnum | minmax_isnumber)) | ||
255 | && !(ab_mask & float_cmask_snan) | ||
256 | && (ab_mask & ~float_cmask_qnan)) { | ||
257 | + if (ab_mask & float_cmask_denormal) { | ||
258 | + float_raise(float_flag_input_denormal_used, s); | ||
259 | + } | ||
260 | return is_nan(a->cls) ? b : a; | ||
31 | } | 261 | } |
32 | 262 | ||
33 | + if (is_named && !is_outbound) { | 263 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, |
34 | + qtest_send_prefix(chr); | 264 | return parts_pick_nan(a, b, s); |
35 | + qtest_send(chr, "FAIL Interception of named in-GPIOs not yet supported\n"); | 265 | } |
36 | + return; | 266 | |
267 | + if (ab_mask & float_cmask_denormal) { | ||
268 | + float_raise(float_flag_input_denormal_used, s); | ||
269 | + } | ||
270 | + | ||
271 | a_exp = a->exp; | ||
272 | b_exp = b->exp; | ||
273 | |||
274 | @@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, | ||
275 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
276 | FloatRelation cmp; | ||
277 | |||
278 | + if (ab_mask & float_cmask_denormal) { | ||
279 | + float_raise(float_flag_input_denormal_used, s); | ||
37 | + } | 280 | + } |
38 | + | 281 | + |
39 | if (irq_intercept_dev) { | 282 | if (a->sign != b->sign) { |
40 | qtest_send_prefix(chr); | 283 | goto a_sign; |
41 | if (irq_intercept_dev != dev) { | 284 | } |
285 | @@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, | ||
286 | return float_relation_unordered; | ||
287 | } | ||
288 | |||
289 | + if (ab_mask & float_cmask_denormal) { | ||
290 | + float_raise(float_flag_input_denormal_used, s); | ||
291 | + } | ||
292 | + | ||
293 | if (ab_mask & float_cmask_zero) { | ||
294 | if (ab_mask == float_cmask_zero) { | ||
295 | return float_relation_equal; | ||
296 | @@ -XXX,XX +XXX,XX @@ static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) | ||
297 | case float_class_zero: | ||
298 | case float_class_inf: | ||
299 | break; | ||
300 | - case float_class_normal: | ||
301 | case float_class_denormal: | ||
302 | + float_raise(float_flag_input_denormal_used, s); | ||
303 | + /* fall through */ | ||
304 | + case float_class_normal: | ||
305 | a->exp += MIN(MAX(n, -0x10000), 0x10000); | ||
306 | break; | ||
307 | default: | ||
308 | @@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) | ||
309 | if (unlikely(a->cls != float_class_normal)) { | ||
310 | switch (a->cls) { | ||
311 | case float_class_denormal: | ||
312 | + if (!a->sign) { | ||
313 | + /* -ve denormal will be InvalidOperation */ | ||
314 | + float_raise(float_flag_input_denormal_used, s); | ||
315 | + } | ||
316 | break; | ||
317 | case float_class_snan: | ||
318 | case float_class_qnan: | ||
42 | -- | 319 | -- |
43 | 2.34.1 | 320 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | Currently we handle flushing of output denormals in uncanon_normal | |
2 | always before we deal with rounding. This works for architectures | ||
3 | that detect tininess before rounding, but is usually not the right | ||
4 | place when the architecture detects tininess after rounding. For | ||
5 | example, for x86 the SDM states that the MXCSR FTZ control bit causes | ||
6 | outputs to be flushed to zero "when it detects a floating-point | ||
7 | underflow condition". This means that we mustn't flush to zero if | ||
8 | the input is such that after rounding it is no longer tiny. | ||
9 | |||
10 | At least one of our guest architectures does underflow detection | ||
11 | after rounding but flushing of denormals before rounding (MIPS MSA); | ||
12 | this means we need to have a config knob for this that is separate | ||
13 | from our existing tininess_before_rounding setting. | ||
14 | |||
15 | Add an ftz_detection flag. For consistency with | ||
16 | tininess_before_rounding, we make it default to "detect ftz after | ||
17 | rounding"; this means that we need to explicitly set the flag to | ||
18 | "detect ftz before rounding" on every existing architecture that sets | ||
19 | flush_to_zero, so that this commit has no behaviour change. | ||
20 | (This means more code change here but for the long term a less | ||
21 | confusing API.) | ||
22 | |||
23 | For several architectures the current behaviour is either | ||
24 | definitely or possibly wrong; annotate those with TODO comments. | ||
25 | These architectures are definitely wrong (and should detect | ||
26 | ftz after rounding): | ||
27 | * x86 | ||
28 | * Alpha | ||
29 | |||
30 | For these architectures the spec is unclear: | ||
31 | * MIPS (for non-MSA) | ||
32 | * RX | ||
33 | * SH4 | ||
34 | |||
35 | PA-RISC makes ftz detection IMPDEF, but we aren't setting the | ||
36 | "tininess before rounding" setting that we ought to. | ||
37 | |||
38 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
39 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
40 | --- | ||
41 | include/fpu/softfloat-helpers.h | 11 +++++++++++ | ||
42 | include/fpu/softfloat-types.h | 18 ++++++++++++++++++ | ||
43 | target/mips/fpu_helper.h | 6 ++++++ | ||
44 | target/alpha/cpu.c | 7 +++++++ | ||
45 | target/arm/cpu.c | 1 + | ||
46 | target/hppa/fpu_helper.c | 11 +++++++++++ | ||
47 | target/i386/tcg/fpu_helper.c | 8 ++++++++ | ||
48 | target/mips/msa.c | 9 +++++++++ | ||
49 | target/ppc/cpu_init.c | 3 +++ | ||
50 | target/rx/cpu.c | 8 ++++++++ | ||
51 | target/sh4/cpu.c | 8 ++++++++ | ||
52 | target/tricore/helper.c | 1 + | ||
53 | tests/fp/fp-bench.c | 1 + | ||
54 | fpu/softfloat-parts.c.inc | 21 +++++++++++++++------ | ||
55 | 14 files changed, 107 insertions(+), 6 deletions(-) | ||
56 | |||
57 | diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/include/fpu/softfloat-helpers.h | ||
60 | +++ b/include/fpu/softfloat-helpers.h | ||
61 | @@ -XXX,XX +XXX,XX @@ static inline void set_flush_inputs_to_zero(bool val, float_status *status) | ||
62 | status->flush_inputs_to_zero = val; | ||
63 | } | ||
64 | |||
65 | +static inline void set_float_ftz_detection(FloatFTZDetection d, | ||
66 | + float_status *status) | ||
67 | +{ | ||
68 | + status->ftz_detection = d; | ||
69 | +} | ||
70 | + | ||
71 | static inline void set_default_nan_mode(bool val, float_status *status) | ||
72 | { | ||
73 | status->default_nan_mode = val; | ||
74 | @@ -XXX,XX +XXX,XX @@ static inline bool get_default_nan_mode(const float_status *status) | ||
75 | return status->default_nan_mode; | ||
76 | } | ||
77 | |||
78 | +static inline FloatFTZDetection get_float_ftz_detection(const float_status *status) | ||
79 | +{ | ||
80 | + return status->ftz_detection; | ||
81 | +} | ||
82 | + | ||
83 | #endif /* SOFTFLOAT_HELPERS_H */ | ||
84 | diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h | ||
85 | index XXXXXXX..XXXXXXX 100644 | ||
86 | --- a/include/fpu/softfloat-types.h | ||
87 | +++ b/include/fpu/softfloat-types.h | ||
88 | @@ -XXX,XX +XXX,XX @@ typedef enum __attribute__((__packed__)) { | ||
89 | float_infzeronan_suppress_invalid = (1 << 7), | ||
90 | } FloatInfZeroNaNRule; | ||
91 | |||
92 | +/* | ||
93 | + * When flush_to_zero is set, should we detect denormal results to | ||
94 | + * be flushed before or after rounding? For most architectures this | ||
95 | + * should be set to match the tininess_before_rounding setting, | ||
96 | + * but a few architectures, e.g. MIPS MSA, detect FTZ before | ||
97 | + * rounding but tininess after rounding. | ||
98 | + * | ||
99 | + * This enum is arranged so that the default if the target doesn't | ||
100 | + * configure it matches the default for tininess_before_rounding | ||
101 | + * (i.e. "after rounding"). | ||
102 | + */ | ||
103 | +typedef enum __attribute__((__packed__)) { | ||
104 | + float_ftz_after_rounding = 0, | ||
105 | + float_ftz_before_rounding = 1, | ||
106 | +} FloatFTZDetection; | ||
107 | + | ||
108 | /* | ||
109 | * Floating Point Status. Individual architectures may maintain | ||
110 | * several versions of float_status for different functions. The | ||
111 | @@ -XXX,XX +XXX,XX @@ typedef struct float_status { | ||
112 | bool tininess_before_rounding; | ||
113 | /* should denormalised results go to zero and set output_denormal_flushed? */ | ||
114 | bool flush_to_zero; | ||
115 | + /* do we detect and flush denormal results before or after rounding? */ | ||
116 | + FloatFTZDetection ftz_detection; | ||
117 | /* should denormalised inputs go to zero and set input_denormal_flushed? */ | ||
118 | bool flush_inputs_to_zero; | ||
119 | bool default_nan_mode; | ||
120 | diff --git a/target/mips/fpu_helper.h b/target/mips/fpu_helper.h | ||
121 | index XXXXXXX..XXXXXXX 100644 | ||
122 | --- a/target/mips/fpu_helper.h | ||
123 | +++ b/target/mips/fpu_helper.h | ||
124 | @@ -XXX,XX +XXX,XX @@ static inline void fp_reset(CPUMIPSState *env) | ||
125 | */ | ||
126 | set_float_2nan_prop_rule(float_2nan_prop_s_ab, | ||
127 | &env->active_fpu.fp_status); | ||
128 | + /* | ||
129 | + * TODO: the spec does't say clearly whether FTZ happens before | ||
130 | + * or after rounding for normal FPU operations. | ||
131 | + */ | ||
132 | + set_float_ftz_detection(float_ftz_before_rounding, | ||
133 | + &env->active_fpu.fp_status); | ||
134 | } | ||
135 | |||
136 | /* MSA */ | ||
137 | diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c | ||
138 | index XXXXXXX..XXXXXXX 100644 | ||
139 | --- a/target/alpha/cpu.c | ||
140 | +++ b/target/alpha/cpu.c | ||
141 | @@ -XXX,XX +XXX,XX @@ static void alpha_cpu_initfn(Object *obj) | ||
142 | set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); | ||
143 | /* Default NaN: sign bit clear, msb frac bit set */ | ||
144 | set_float_default_nan_pattern(0b01000000, &env->fp_status); | ||
145 | + /* | ||
146 | + * TODO: this is incorrect. The Alpha Architecture Handbook version 4 | ||
147 | + * section 4.7.7.11 says that we flush to zero for underflow cases, so | ||
148 | + * this should be float_ftz_after_rounding to match the | ||
149 | + * tininess_after_rounding (which is specified in section 4.7.5). | ||
150 | + */ | ||
151 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
152 | #if defined(CONFIG_USER_ONLY) | ||
153 | env->flags = ENV_FLAG_PS_USER | ENV_FLAG_FEN; | ||
154 | cpu_alpha_store_fpcr(env, (uint64_t)(FPCR_INVD | FPCR_DZED | FPCR_OVFD | ||
155 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
156 | index XXXXXXX..XXXXXXX 100644 | ||
157 | --- a/target/arm/cpu.c | ||
158 | +++ b/target/arm/cpu.c | ||
159 | @@ -XXX,XX +XXX,XX @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, | ||
160 | static void arm_set_default_fp_behaviours(float_status *s) | ||
161 | { | ||
162 | set_float_detect_tininess(float_tininess_before_rounding, s); | ||
163 | + set_float_ftz_detection(float_ftz_before_rounding, s); | ||
164 | set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); | ||
165 | set_float_3nan_prop_rule(float_3nan_prop_s_cab, s); | ||
166 | set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s); | ||
167 | diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c | ||
168 | index XXXXXXX..XXXXXXX 100644 | ||
169 | --- a/target/hppa/fpu_helper.c | ||
170 | +++ b/target/hppa/fpu_helper.c | ||
171 | @@ -XXX,XX +XXX,XX @@ void HELPER(loaded_fr0)(CPUHPPAState *env) | ||
172 | set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status); | ||
173 | /* Default NaN: sign bit clear, msb-1 frac bit set */ | ||
174 | set_float_default_nan_pattern(0b00100000, &env->fp_status); | ||
175 | + /* | ||
176 | + * "PA-RISC 2.0 Architecture" says it is IMPDEF whether the flushing | ||
177 | + * enabled by FPSR.D happens before or after rounding. We pick "before" | ||
178 | + * for consistency with tininess detection. | ||
179 | + */ | ||
180 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
181 | + /* | ||
182 | + * TODO: "PA-RISC 2.0 Architecture" chapter 10 says that we should | ||
183 | + * detect tininess before rounding, but we don't set that here so we | ||
184 | + * get the default tininess after rounding. | ||
185 | + */ | ||
186 | } | ||
187 | |||
188 | void cpu_hppa_loaded_fr0(CPUHPPAState *env) | ||
189 | diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c | ||
190 | index XXXXXXX..XXXXXXX 100644 | ||
191 | --- a/target/i386/tcg/fpu_helper.c | ||
192 | +++ b/target/i386/tcg/fpu_helper.c | ||
193 | @@ -XXX,XX +XXX,XX @@ void cpu_init_fp_statuses(CPUX86State *env) | ||
194 | set_float_default_nan_pattern(0b11000000, &env->fp_status); | ||
195 | set_float_default_nan_pattern(0b11000000, &env->mmx_status); | ||
196 | set_float_default_nan_pattern(0b11000000, &env->sse_status); | ||
197 | + /* | ||
198 | + * TODO: x86 does flush-to-zero detection after rounding (the SDM | ||
199 | + * section 10.2.3.3 on the FTZ bit of MXCSR says that we flush | ||
200 | + * when we detect underflow, which x86 does after rounding). | ||
201 | + */ | ||
202 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
203 | + set_float_ftz_detection(float_ftz_before_rounding, &env->mmx_status); | ||
204 | + set_float_ftz_detection(float_ftz_before_rounding, &env->sse_status); | ||
205 | } | ||
206 | |||
207 | static inline uint8_t save_exception_flags(CPUX86State *env) | ||
208 | diff --git a/target/mips/msa.c b/target/mips/msa.c | ||
209 | index XXXXXXX..XXXXXXX 100644 | ||
210 | --- a/target/mips/msa.c | ||
211 | +++ b/target/mips/msa.c | ||
212 | @@ -XXX,XX +XXX,XX @@ void msa_reset(CPUMIPSState *env) | ||
213 | /* tininess detected after rounding.*/ | ||
214 | set_float_detect_tininess(float_tininess_after_rounding, | ||
215 | &env->active_tc.msa_fp_status); | ||
216 | + /* | ||
217 | + * MSACSR.FS detects tiny results to flush to zero before rounding | ||
218 | + * (per "MIPS Architecture for Programmers Volume IV-j: The MIPS64 SIMD | ||
219 | + * Architecture Module, Revision 1.1" section 3.5.4), even though it | ||
220 | + * detects tininess after rounding for underflow purposes (section 3.4.2 | ||
221 | + * table 3.3). | ||
222 | + */ | ||
223 | + set_float_ftz_detection(float_ftz_before_rounding, | ||
224 | + &env->active_tc.msa_fp_status); | ||
225 | |||
226 | /* | ||
227 | * According to MIPS specifications, if one of the two operands is | ||
228 | diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c | ||
229 | index XXXXXXX..XXXXXXX 100644 | ||
230 | --- a/target/ppc/cpu_init.c | ||
231 | +++ b/target/ppc/cpu_init.c | ||
232 | @@ -XXX,XX +XXX,XX @@ static void ppc_cpu_reset_hold(Object *obj, ResetType type) | ||
233 | /* tininess for underflow is detected before rounding */ | ||
234 | set_float_detect_tininess(float_tininess_before_rounding, | ||
235 | &env->fp_status); | ||
236 | + /* Similarly for flush-to-zero */ | ||
237 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
238 | + | ||
239 | /* | ||
240 | * PowerPC propagation rules: | ||
241 | * 1. A if it sNaN or qNaN | ||
242 | diff --git a/target/rx/cpu.c b/target/rx/cpu.c | ||
243 | index XXXXXXX..XXXXXXX 100644 | ||
244 | --- a/target/rx/cpu.c | ||
245 | +++ b/target/rx/cpu.c | ||
246 | @@ -XXX,XX +XXX,XX @@ static void rx_cpu_reset_hold(Object *obj, ResetType type) | ||
247 | set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); | ||
248 | /* Default NaN value: sign bit clear, set frac msb */ | ||
249 | set_float_default_nan_pattern(0b01000000, &env->fp_status); | ||
250 | + /* | ||
251 | + * TODO: "RX Family RXv1 Instruction Set Architecture" is not 100% clear | ||
252 | + * on whether flush-to-zero should happen before or after rounding, but | ||
253 | + * section 1.3.2 says that it happens when underflow is detected, and | ||
254 | + * implies that underflow is detected after rounding. So this may not | ||
255 | + * be the correct setting. | ||
256 | + */ | ||
257 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
258 | } | ||
259 | |||
260 | static ObjectClass *rx_cpu_class_by_name(const char *cpu_model) | ||
261 | diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c | ||
262 | index XXXXXXX..XXXXXXX 100644 | ||
263 | --- a/target/sh4/cpu.c | ||
264 | +++ b/target/sh4/cpu.c | ||
265 | @@ -XXX,XX +XXX,XX @@ static void superh_cpu_reset_hold(Object *obj, ResetType type) | ||
266 | set_default_nan_mode(1, &env->fp_status); | ||
267 | /* sign bit clear, set all frac bits other than msb */ | ||
268 | set_float_default_nan_pattern(0b00111111, &env->fp_status); | ||
269 | + /* | ||
270 | + * TODO: "SH-4 CPU Core Architecture ADCS 7182230F" doesn't say whether | ||
271 | + * it detects tininess before or after rounding. Section 6.4 is clear | ||
272 | + * that flush-to-zero happens when the result underflows, though, so | ||
273 | + * either this should be "detect ftz after rounding" or else we should | ||
274 | + * be setting "detect tininess before rounding". | ||
275 | + */ | ||
276 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
277 | } | ||
278 | |||
279 | static void superh_cpu_disas_set_info(CPUState *cpu, disassemble_info *info) | ||
280 | diff --git a/target/tricore/helper.c b/target/tricore/helper.c | ||
281 | index XXXXXXX..XXXXXXX 100644 | ||
282 | --- a/target/tricore/helper.c | ||
283 | +++ b/target/tricore/helper.c | ||
284 | @@ -XXX,XX +XXX,XX @@ void fpu_set_state(CPUTriCoreState *env) | ||
285 | set_flush_inputs_to_zero(1, &env->fp_status); | ||
286 | set_flush_to_zero(1, &env->fp_status); | ||
287 | set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status); | ||
288 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
289 | set_default_nan_mode(1, &env->fp_status); | ||
290 | /* Default NaN pattern: sign bit clear, frac msb set */ | ||
291 | set_float_default_nan_pattern(0b01000000, &env->fp_status); | ||
292 | diff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c | ||
293 | index XXXXXXX..XXXXXXX 100644 | ||
294 | --- a/tests/fp/fp-bench.c | ||
295 | +++ b/tests/fp/fp-bench.c | ||
296 | @@ -XXX,XX +XXX,XX @@ static void run_bench(void) | ||
297 | set_float_3nan_prop_rule(float_3nan_prop_s_cab, &soft_status); | ||
298 | set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, &soft_status); | ||
299 | set_float_default_nan_pattern(0b01000000, &soft_status); | ||
300 | + set_float_ftz_detection(float_ftz_before_rounding, &soft_status); | ||
301 | |||
302 | f = bench_funcs[operation][precision]; | ||
303 | g_assert(f); | ||
304 | diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc | ||
305 | index XXXXXXX..XXXXXXX 100644 | ||
306 | --- a/fpu/softfloat-parts.c.inc | ||
307 | +++ b/fpu/softfloat-parts.c.inc | ||
308 | @@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, | ||
309 | p->frac_lo &= ~round_mask; | ||
310 | } | ||
311 | frac_shr(p, frac_shift); | ||
312 | - } else if (s->flush_to_zero) { | ||
313 | + } else if (s->flush_to_zero && | ||
314 | + s->ftz_detection == float_ftz_before_rounding) { | ||
315 | flags |= float_flag_output_denormal_flushed; | ||
316 | p->cls = float_class_zero; | ||
317 | exp = 0; | ||
318 | @@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, | ||
319 | exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal; | ||
320 | frac_shr(p, frac_shift); | ||
321 | |||
322 | - if (is_tiny && (flags & float_flag_inexact)) { | ||
323 | - flags |= float_flag_underflow; | ||
324 | - } | ||
325 | - if (exp == 0 && frac_eqz(p)) { | ||
326 | - p->cls = float_class_zero; | ||
327 | + if (is_tiny) { | ||
328 | + if (s->flush_to_zero) { | ||
329 | + assert(s->ftz_detection == float_ftz_after_rounding); | ||
330 | + flags |= float_flag_output_denormal_flushed; | ||
331 | + p->cls = float_class_zero; | ||
332 | + exp = 0; | ||
333 | + frac_clear(p); | ||
334 | + } else if (flags & float_flag_inexact) { | ||
335 | + flags |= float_flag_underflow; | ||
336 | + } | ||
337 | + if (exp == 0 && frac_eqz(p)) { | ||
338 | + p->cls = float_class_zero; | ||
339 | + } | ||
340 | } | ||
341 | } | ||
342 | p->exp = exp; | ||
343 | -- | ||
344 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The Armv8.7 FEAT_AFP feature defines three new control bits in | ||
2 | the FPCR: | ||
3 | * FPCR.AH: "alternate floating point mode"; this changes floating | ||
4 | point behaviour in a variety of ways, including: | ||
5 | - the sign of a default NaN is 1, not 0 | ||
6 | - if FPCR.FZ is also 1, denormals detected after rounding | ||
7 | with an unbounded exponent has been applied are flushed to zero | ||
8 | - FPCR.FZ does not cause denormalized inputs to be flushed to zero | ||
9 | - miscellaneous other corner-case behaviour changes | ||
10 | * FPCR.FIZ: flush denormalized numbers to zero on input for | ||
11 | most instructions | ||
12 | * FPCR.NEP: makes scalar SIMD operations merge the result with | ||
13 | higher vector elements in one of the source registers, instead | ||
14 | of zeroing the higher elements of the destination | ||
1 | 15 | ||
16 | This commit defines the new bits in the FPCR, and allows them to be | ||
17 | read or written when FEAT_AFP is implemented. Actual behaviour | ||
18 | changes will be implemented in subsequent commits. | ||
19 | |||
20 | Note that these are the first FPCR bits which don't appear in the | ||
21 | AArch32 FPSCR view of the register, and which share bit positions | ||
22 | with FPSR bits. | ||
23 | |||
24 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
25 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
26 | --- | ||
27 | target/arm/cpu-features.h | 5 +++++ | ||
28 | target/arm/cpu.h | 3 +++ | ||
29 | target/arm/vfp_helper.c | 11 ++++++++--- | ||
30 | 3 files changed, 16 insertions(+), 3 deletions(-) | ||
31 | |||
32 | diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/target/arm/cpu-features.h | ||
35 | +++ b/target/arm/cpu-features.h | ||
36 | @@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_hcx(const ARMISARegisters *id) | ||
37 | return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HCX) != 0; | ||
38 | } | ||
39 | |||
40 | +static inline bool isar_feature_aa64_afp(const ARMISARegisters *id) | ||
41 | +{ | ||
42 | + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, AFP) != 0; | ||
43 | +} | ||
44 | + | ||
45 | static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id) | ||
46 | { | ||
47 | return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0; | ||
48 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/arm/cpu.h | ||
51 | +++ b/target/arm/cpu.h | ||
52 | @@ -XXX,XX +XXX,XX @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val); | ||
53 | */ | ||
54 | |||
55 | /* FPCR bits */ | ||
56 | +#define FPCR_FIZ (1 << 0) /* Flush Inputs to Zero (FEAT_AFP) */ | ||
57 | +#define FPCR_AH (1 << 1) /* Alternate Handling (FEAT_AFP) */ | ||
58 | +#define FPCR_NEP (1 << 2) /* SIMD scalar ops preserve elts (FEAT_AFP) */ | ||
59 | #define FPCR_IOE (1 << 8) /* Invalid Operation exception trap enable */ | ||
60 | #define FPCR_DZE (1 << 9) /* Divide by Zero exception trap enable */ | ||
61 | #define FPCR_OFE (1 << 10) /* Overflow exception trap enable */ | ||
62 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/arm/vfp_helper.c | ||
65 | +++ b/target/arm/vfp_helper.c | ||
66 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask) | ||
67 | if (!cpu_isar_feature(any_fp16, cpu)) { | ||
68 | val &= ~FPCR_FZ16; | ||
69 | } | ||
70 | + if (!cpu_isar_feature(aa64_afp, cpu)) { | ||
71 | + val &= ~(FPCR_FIZ | FPCR_AH | FPCR_NEP); | ||
72 | + } | ||
73 | |||
74 | if (!cpu_isar_feature(aa64_ebf16, cpu)) { | ||
75 | val &= ~FPCR_EBF; | ||
76 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask) | ||
77 | * We don't implement trapped exception handling, so the | ||
78 | * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!) | ||
79 | * | ||
80 | - * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF | ||
81 | - * and FZ16. Len, Stride and LTPSIZE we just handled. Store those bits | ||
82 | + * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF, FZ16, | ||
83 | + * FIZ, AH, and NEP. | ||
84 | + * Len, Stride and LTPSIZE we just handled. Store those bits | ||
85 | * there, and zero any of the other FPCR bits and the RES0 and RAZ/WI | ||
86 | * bits. | ||
87 | */ | ||
88 | - val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 | FPCR_EBF; | ||
89 | + val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 | | ||
90 | + FPCR_EBF | FPCR_FIZ | FPCR_AH | FPCR_NEP; | ||
91 | env->vfp.fpcr &= ~mask; | ||
92 | env->vfp.fpcr |= val; | ||
93 | } | ||
94 | -- | ||
95 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Jean-Philippe Brucker <jean-philippe@linaro.org> | 1 | Part of FEAT_AFP is the new control bit FPCR.FIZ. This bit affects |
---|---|---|---|
2 | flushing of single and double precision denormal inputs to zero for | ||
3 | AArch64 floating point instructions. (For half-precision, the | ||
4 | existing FPCR.FZ16 control remains the only one.) | ||
2 | 5 | ||
3 | The AT instruction is UNDEFINED if the {NSE,NS} configuration is | 6 | FPCR.FIZ differs from FPCR.FZ in that if we flush an input denormal |
4 | invalid. Add a function to check this on all AT instructions that apply | 7 | only because of FPCR.FIZ then we should *not* set the cumulative |
5 | to an EL lower than 3. | 8 | exception bit FPSR.IDC. |
6 | 9 | ||
7 | Suggested-by: Peter Maydell <peter.maydell@linaro.org> | 10 | FEAT_AFP also defines that in AArch64 the existing FPCR.FZ only |
8 | Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | 11 | applies when FPCR.AH is 0. |
9 | Message-id: 20230809123706.1842548-6-jean-philippe@linaro.org | 12 | |
13 | We can implement this by setting the "flush inputs to zero" state | ||
14 | appropriately when FPCR is written, and by not reflecting the | ||
15 | float_flag_input_denormal status flag into FPSR reads when it is the | ||
16 | result only of FPSR.FIZ. | ||
17 | |||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 18 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
11 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 19 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
12 | --- | 20 | --- |
13 | target/arm/helper.c | 38 +++++++++++++++++++++++++++----------- | 21 | target/arm/vfp_helper.c | 60 ++++++++++++++++++++++++++++++++++------- |
14 | 1 file changed, 27 insertions(+), 11 deletions(-) | 22 | 1 file changed, 50 insertions(+), 10 deletions(-) |
15 | 23 | ||
16 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 24 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
17 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/target/arm/helper.c | 26 | --- a/target/arm/vfp_helper.c |
19 | +++ b/target/arm/helper.c | 27 | +++ b/target/arm/vfp_helper.c |
20 | @@ -XXX,XX +XXX,XX @@ static void ats1h_write(CPUARMState *env, const ARMCPRegInfo *ri, | 28 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits) |
21 | #endif /* CONFIG_TCG */ | 29 | |
30 | static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
31 | { | ||
32 | - uint32_t i = 0; | ||
33 | + uint32_t a32_flags = 0, a64_flags = 0; | ||
34 | |||
35 | - i |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
36 | - i |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
37 | - i |= get_float_exception_flags(&env->vfp.standard_fp_status); | ||
38 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
39 | + a32_flags |= get_float_exception_flags(&env->vfp.standard_fp_status); | ||
40 | /* FZ16 does not generate an input denormal exception. */ | ||
41 | - i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
42 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
43 | & ~float_flag_input_denormal_flushed); | ||
44 | - i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
45 | + a32_flags |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) | ||
46 | & ~float_flag_input_denormal_flushed); | ||
47 | - i |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) | ||
48 | + | ||
49 | + a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
50 | + a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
51 | & ~float_flag_input_denormal_flushed); | ||
52 | - return vfp_exceptbits_from_host(i); | ||
53 | + /* | ||
54 | + * Flushing an input denormal *only* because FPCR.FIZ == 1 does | ||
55 | + * not set FPSR.IDC; if FPCR.FZ is also set then this takes | ||
56 | + * precedence and IDC is set (see the FPUnpackBase pseudocode). | ||
57 | + * So squash it unless (FPCR.AH == 0 && FPCR.FZ == 1). | ||
58 | + * We only do this for the a64 flags because FIZ has no effect | ||
59 | + * on AArch32 even if it is set. | ||
60 | + */ | ||
61 | + if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) { | ||
62 | + a64_flags &= ~float_flag_input_denormal_flushed; | ||
63 | + } | ||
64 | + return vfp_exceptbits_from_host(a32_flags | a64_flags); | ||
22 | } | 65 | } |
23 | 66 | ||
24 | +static CPAccessResult at_e012_access(CPUARMState *env, const ARMCPRegInfo *ri, | 67 | static void vfp_clear_float_status_exc_flags(CPUARMState *env) |
25 | + bool isread) | 68 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) |
69 | set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); | ||
70 | } | ||
71 | |||
72 | +static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) | ||
26 | +{ | 73 | +{ |
27 | + /* | 74 | + /* |
28 | + * R_NYXTL: instruction is UNDEFINED if it applies to an Exception level | 75 | + * Synchronize any pending exception-flag information in the |
29 | + * lower than EL3 and the combination SCR_EL3.{NSE,NS} is reserved. This can | 76 | + * float_status values into env->vfp.fpsr, and then clear out |
30 | + * only happen when executing at EL3 because that combination also causes an | 77 | + * the float_status data. |
31 | + * illegal exception return. We don't need to check FEAT_RME either, because | ||
32 | + * scr_write() ensures that the NSE bit is not set otherwise. | ||
33 | + */ | 78 | + */ |
34 | + if ((env->cp15.scr_el3 & (SCR_NSE | SCR_NS)) == SCR_NSE) { | 79 | + env->vfp.fpsr |= vfp_get_fpsr_from_host(env); |
35 | + return CP_ACCESS_TRAP; | 80 | + vfp_clear_float_status_exc_flags(env); |
36 | + } | ||
37 | + return CP_ACCESS_OK; | ||
38 | +} | 81 | +} |
39 | + | 82 | + |
40 | static CPAccessResult at_s1e2_access(CPUARMState *env, const ARMCPRegInfo *ri, | 83 | static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
41 | bool isread) | ||
42 | { | 84 | { |
43 | @@ -XXX,XX +XXX,XX @@ static CPAccessResult at_s1e2_access(CPUARMState *env, const ARMCPRegInfo *ri, | 85 | uint64_t changed = env->vfp.fpcr; |
44 | !(env->cp15.scr_el3 & (SCR_NS | SCR_EEL2))) { | 86 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
45 | return CP_ACCESS_TRAP; | 87 | if (changed & FPCR_FZ) { |
88 | bool ftz_enabled = val & FPCR_FZ; | ||
89 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
90 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
91 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64); | ||
92 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a64); | ||
93 | + /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ | ||
94 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
95 | + } | ||
96 | + if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { | ||
97 | + /* | ||
98 | + * A64: Flush denormalized inputs to zero if FPCR.FIZ = 1, or | ||
99 | + * both FPCR.AH = 0 and FPCR.FZ = 1. | ||
100 | + */ | ||
101 | + bool fitz_enabled = (val & FPCR_FIZ) || | ||
102 | + (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ; | ||
103 | + set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status_a64); | ||
46 | } | 104 | } |
47 | - return CP_ACCESS_OK; | 105 | if (changed & FPCR_DN) { |
48 | + return at_e012_access(env, ri, isread); | 106 | bool dnan_enabled = val & FPCR_DN; |
107 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
108 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
109 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
110 | } | ||
111 | + /* | ||
112 | + * If any bits changed that we look at in vfp_get_fpsr_from_host(), | ||
113 | + * we must sync the float_status flags into vfp.fpsr now (under the | ||
114 | + * old regime) before we update vfp.fpcr. | ||
115 | + */ | ||
116 | + if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { | ||
117 | + vfp_sync_and_clear_float_status_exc_flags(env); | ||
118 | + } | ||
49 | } | 119 | } |
50 | 120 | ||
51 | static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri, | 121 | #else |
52 | @@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo v8_cp_reginfo[] = { | ||
53 | .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 0, | ||
54 | .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, | ||
55 | .fgt = FGT_ATS1E1R, | ||
56 | - .writefn = ats_write64 }, | ||
57 | + .accessfn = at_e012_access, .writefn = ats_write64 }, | ||
58 | { .name = "AT_S1E1W", .state = ARM_CP_STATE_AA64, | ||
59 | .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 1, | ||
60 | .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, | ||
61 | .fgt = FGT_ATS1E1W, | ||
62 | - .writefn = ats_write64 }, | ||
63 | + .accessfn = at_e012_access, .writefn = ats_write64 }, | ||
64 | { .name = "AT_S1E0R", .state = ARM_CP_STATE_AA64, | ||
65 | .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 2, | ||
66 | .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, | ||
67 | .fgt = FGT_ATS1E0R, | ||
68 | - .writefn = ats_write64 }, | ||
69 | + .accessfn = at_e012_access, .writefn = ats_write64 }, | ||
70 | { .name = "AT_S1E0W", .state = ARM_CP_STATE_AA64, | ||
71 | .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 3, | ||
72 | .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, | ||
73 | .fgt = FGT_ATS1E0W, | ||
74 | - .writefn = ats_write64 }, | ||
75 | + .accessfn = at_e012_access, .writefn = ats_write64 }, | ||
76 | { .name = "AT_S12E1R", .state = ARM_CP_STATE_AA64, | ||
77 | .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 4, | ||
78 | .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, | ||
79 | - .writefn = ats_write64 }, | ||
80 | + .accessfn = at_e012_access, .writefn = ats_write64 }, | ||
81 | { .name = "AT_S12E1W", .state = ARM_CP_STATE_AA64, | ||
82 | .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 5, | ||
83 | .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, | ||
84 | - .writefn = ats_write64 }, | ||
85 | + .accessfn = at_e012_access, .writefn = ats_write64 }, | ||
86 | { .name = "AT_S12E0R", .state = ARM_CP_STATE_AA64, | ||
87 | .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 6, | ||
88 | .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, | ||
89 | - .writefn = ats_write64 }, | ||
90 | + .accessfn = at_e012_access, .writefn = ats_write64 }, | ||
91 | { .name = "AT_S12E0W", .state = ARM_CP_STATE_AA64, | ||
92 | .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 7, | ||
93 | .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, | ||
94 | - .writefn = ats_write64 }, | ||
95 | + .accessfn = at_e012_access, .writefn = ats_write64 }, | ||
96 | /* AT S1E2* are elsewhere as they UNDEF from EL3 if EL2 is not present */ | ||
97 | { .name = "AT_S1E3R", .state = ARM_CP_STATE_AA64, | ||
98 | .opc0 = 1, .opc1 = 6, .crn = 7, .crm = 8, .opc2 = 0, | ||
99 | @@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo ats1e1_reginfo[] = { | ||
100 | .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 0, | ||
101 | .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, | ||
102 | .fgt = FGT_ATS1E1RP, | ||
103 | - .writefn = ats_write64 }, | ||
104 | + .accessfn = at_e012_access, .writefn = ats_write64 }, | ||
105 | { .name = "AT_S1E1WP", .state = ARM_CP_STATE_AA64, | ||
106 | .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 1, | ||
107 | .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, | ||
108 | .fgt = FGT_ATS1E1WP, | ||
109 | - .writefn = ats_write64 }, | ||
110 | + .accessfn = at_e012_access, .writefn = ats_write64 }, | ||
111 | }; | ||
112 | |||
113 | static const ARMCPRegInfo ats1cp_reginfo[] = { | ||
114 | -- | 122 | -- |
115 | 2.34.1 | 123 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Jean-Philippe Brucker <jean-philippe@linaro.org> | 1 | When FPCR.AH is set, various behaviours of AArch64 floating point |
---|---|---|---|
2 | operations which are controlled by softfloat config settings change: | ||
3 | * tininess and ftz detection before/after rounding | ||
4 | * NaN propagation order | ||
5 | * result of 0 * Inf + NaN | ||
6 | * default NaN value | ||
2 | 7 | ||
3 | At the moment we only handle Secure and Nonsecure security spaces for | 8 | When the guest changes the value of the AH bit, switch these config |
4 | the AT instructions. Add support for Realm and Root. | 9 | settings on the fp_status_a64 and fp_status_f16_a64 float_status |
10 | fields. | ||
5 | 11 | ||
6 | For AArch64, arm_security_space() gives the desired space. ARM DDI0487J | 12 | This requires us to make the arm_set_default_fp_behaviours() function |
7 | says (R_NYXTL): | 13 | global, since we now need to call it from cpu.c and vfp_helper.c; we |
14 | move it to vfp_helper.c so it can be next to the new | ||
15 | arm_set_ah_fp_behaviours(). | ||
8 | 16 | ||
9 | If EL3 is implemented, then when an address translation instruction | ||
10 | that applies to an Exception level lower than EL3 is executed, the | ||
11 | Effective value of SCR_EL3.{NSE, NS} determines the target Security | ||
12 | state that the instruction applies to. | ||
13 | |||
14 | For AArch32, some instructions can access NonSecure space from Secure, | ||
15 | so we still need to pass the state explicitly to do_ats_write(). | ||
16 | |||
17 | Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | ||
18 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
19 | Message-id: 20230809123706.1842548-5-jean-philippe@linaro.org | ||
20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 17 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
18 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
21 | --- | 19 | --- |
22 | target/arm/internals.h | 18 +++++++++--------- | 20 | target/arm/internals.h | 4 +++ |
23 | target/arm/helper.c | 27 ++++++++++++--------------- | 21 | target/arm/cpu.c | 23 ---------------- |
24 | target/arm/ptw.c | 12 ++++++------ | 22 | target/arm/vfp_helper.c | 58 ++++++++++++++++++++++++++++++++++++++++- |
25 | 3 files changed, 27 insertions(+), 30 deletions(-) | 23 | 3 files changed, 61 insertions(+), 24 deletions(-) |
26 | 24 | ||
27 | diff --git a/target/arm/internals.h b/target/arm/internals.h | 25 | diff --git a/target/arm/internals.h b/target/arm/internals.h |
28 | index XXXXXXX..XXXXXXX 100644 | 26 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/target/arm/internals.h | 27 | --- a/target/arm/internals.h |
30 | +++ b/target/arm/internals.h | 28 | +++ b/target/arm/internals.h |
31 | @@ -XXX,XX +XXX,XX @@ bool get_phys_addr(CPUARMState *env, target_ulong address, | 29 | @@ -XXX,XX +XXX,XX @@ uint64_t gt_virt_cnt_offset(CPUARMState *env); |
32 | __attribute__((nonnull)); | 30 | * all EL1" scope; this covers stage 1 and stage 2. |
33 | |||
34 | /** | ||
35 | - * get_phys_addr_with_secure_nogpc: get the physical address for a virtual | ||
36 | - * address | ||
37 | + * get_phys_addr_with_space_nogpc: get the physical address for a virtual | ||
38 | + * address | ||
39 | * @env: CPUARMState | ||
40 | * @address: virtual address to get physical address for | ||
41 | * @access_type: 0 for read, 1 for write, 2 for execute | ||
42 | * @mmu_idx: MMU index indicating required translation regime | ||
43 | - * @is_secure: security state for the access | ||
44 | + * @space: security space for the access | ||
45 | * @result: set on translation success. | ||
46 | * @fi: set to fault info if the translation fails | ||
47 | * | ||
48 | - * Similar to get_phys_addr, but use the given security regime and don't perform | ||
49 | + * Similar to get_phys_addr, but use the given security space and don't perform | ||
50 | * a Granule Protection Check on the resulting address. | ||
51 | */ | 31 | */ |
52 | -bool get_phys_addr_with_secure_nogpc(CPUARMState *env, target_ulong address, | 32 | int alle1_tlbmask(CPUARMState *env); |
53 | - MMUAccessType access_type, | 33 | + |
54 | - ARMMMUIdx mmu_idx, bool is_secure, | 34 | +/* Set the float_status behaviour to match the Arm defaults */ |
55 | - GetPhysAddrResult *result, | 35 | +void arm_set_default_fp_behaviours(float_status *s); |
56 | - ARMMMUFaultInfo *fi) | 36 | + |
57 | +bool get_phys_addr_with_space_nogpc(CPUARMState *env, target_ulong address, | 37 | #endif |
58 | + MMUAccessType access_type, | 38 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
59 | + ARMMMUIdx mmu_idx, ARMSecuritySpace space, | ||
60 | + GetPhysAddrResult *result, | ||
61 | + ARMMMUFaultInfo *fi) | ||
62 | __attribute__((nonnull)); | ||
63 | |||
64 | bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, | ||
65 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
66 | index XXXXXXX..XXXXXXX 100644 | 39 | index XXXXXXX..XXXXXXX 100644 |
67 | --- a/target/arm/helper.c | 40 | --- a/target/arm/cpu.c |
68 | +++ b/target/arm/helper.c | 41 | +++ b/target/arm/cpu.c |
69 | @@ -XXX,XX +XXX,XX @@ static int par_el1_shareability(GetPhysAddrResult *res) | 42 | @@ -XXX,XX +XXX,XX @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, |
70 | 43 | QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node); | |
71 | static uint64_t do_ats_write(CPUARMState *env, uint64_t value, | 44 | } |
72 | MMUAccessType access_type, ARMMMUIdx mmu_idx, | 45 | |
73 | - bool is_secure) | 46 | -/* |
74 | + ARMSecuritySpace ss) | 47 | - * Set the float_status behaviour to match the Arm defaults: |
48 | - * * tininess-before-rounding | ||
49 | - * * 2-input NaN propagation prefers SNaN over QNaN, and then | ||
50 | - * operand A over operand B (see FPProcessNaNs() pseudocode) | ||
51 | - * * 3-input NaN propagation prefers SNaN over QNaN, and then | ||
52 | - * operand C over A over B (see FPProcessNaNs3() pseudocode, | ||
53 | - * but note that for QEMU muladd is a * b + c, whereas for | ||
54 | - * the pseudocode function the arguments are in the order c, a, b. | ||
55 | - * * 0 * Inf + NaN returns the default NaN if the input NaN is quiet, | ||
56 | - * and the input NaN if it is signalling | ||
57 | - * * Default NaN has sign bit clear, msb frac bit set | ||
58 | - */ | ||
59 | -static void arm_set_default_fp_behaviours(float_status *s) | ||
60 | -{ | ||
61 | - set_float_detect_tininess(float_tininess_before_rounding, s); | ||
62 | - set_float_ftz_detection(float_ftz_before_rounding, s); | ||
63 | - set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); | ||
64 | - set_float_3nan_prop_rule(float_3nan_prop_s_cab, s); | ||
65 | - set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s); | ||
66 | - set_float_default_nan_pattern(0b01000000, s); | ||
67 | -} | ||
68 | - | ||
69 | static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque) | ||
75 | { | 70 | { |
76 | bool ret; | 71 | /* Reset a single ARMCPRegInfo register */ |
77 | uint64_t par64; | 72 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
78 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value, | 73 | index XXXXXXX..XXXXXXX 100644 |
79 | * I_MXTJT: Granule protection checks are not performed on the final address | 74 | --- a/target/arm/vfp_helper.c |
80 | * of a successful translation. | 75 | +++ b/target/arm/vfp_helper.c |
81 | */ | 76 | @@ -XXX,XX +XXX,XX @@ |
82 | - ret = get_phys_addr_with_secure_nogpc(env, value, access_type, mmu_idx, | 77 | #include "exec/helper-proto.h" |
83 | - is_secure, &res, &fi); | 78 | #include "internals.h" |
84 | + ret = get_phys_addr_with_space_nogpc(env, value, access_type, mmu_idx, ss, | 79 | #include "cpu-features.h" |
85 | + &res, &fi); | 80 | +#include "fpu/softfloat.h" |
86 | 81 | #ifdef CONFIG_TCG | |
82 | #include "qemu/log.h" | ||
83 | -#include "fpu/softfloat.h" | ||
84 | #endif | ||
85 | |||
86 | /* VFP support. We follow the convention used for VFP instructions: | ||
87 | Single precision routines have a "s" suffix, double precision a | ||
88 | "d" suffix. */ | ||
89 | |||
90 | +/* | ||
91 | + * Set the float_status behaviour to match the Arm defaults: | ||
92 | + * * tininess-before-rounding | ||
93 | + * * 2-input NaN propagation prefers SNaN over QNaN, and then | ||
94 | + * operand A over operand B (see FPProcessNaNs() pseudocode) | ||
95 | + * * 3-input NaN propagation prefers SNaN over QNaN, and then | ||
96 | + * operand C over A over B (see FPProcessNaNs3() pseudocode, | ||
97 | + * but note that for QEMU muladd is a * b + c, whereas for | ||
98 | + * the pseudocode function the arguments are in the order c, a, b. | ||
99 | + * * 0 * Inf + NaN returns the default NaN if the input NaN is quiet, | ||
100 | + * and the input NaN if it is signalling | ||
101 | + * * Default NaN has sign bit clear, msb frac bit set | ||
102 | + */ | ||
103 | +void arm_set_default_fp_behaviours(float_status *s) | ||
104 | +{ | ||
105 | + set_float_detect_tininess(float_tininess_before_rounding, s); | ||
106 | + set_float_ftz_detection(float_ftz_before_rounding, s); | ||
107 | + set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); | ||
108 | + set_float_3nan_prop_rule(float_3nan_prop_s_cab, s); | ||
109 | + set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s); | ||
110 | + set_float_default_nan_pattern(0b01000000, s); | ||
111 | +} | ||
112 | + | ||
113 | +/* | ||
114 | + * Set the float_status behaviour to match the FEAT_AFP | ||
115 | + * FPCR.AH=1 requirements: | ||
116 | + * * tininess-after-rounding | ||
117 | + * * 2-input NaN propagation prefers the first NaN | ||
118 | + * * 3-input NaN propagation prefers a over b over c | ||
119 | + * * 0 * Inf + NaN always returns the input NaN and doesn't | ||
120 | + * set Invalid for a QNaN | ||
121 | + * * default NaN has sign bit set, msb frac bit set | ||
122 | + */ | ||
123 | +static void arm_set_ah_fp_behaviours(float_status *s) | ||
124 | +{ | ||
125 | + set_float_detect_tininess(float_tininess_after_rounding, s); | ||
126 | + set_float_ftz_detection(float_ftz_after_rounding, s); | ||
127 | + set_float_2nan_prop_rule(float_2nan_prop_ab, s); | ||
128 | + set_float_3nan_prop_rule(float_3nan_prop_abc, s); | ||
129 | + set_float_infzeronan_rule(float_infzeronan_dnan_never | | ||
130 | + float_infzeronan_suppress_invalid, s); | ||
131 | + set_float_default_nan_pattern(0b11000000, s); | ||
132 | +} | ||
133 | + | ||
134 | #ifdef CONFIG_TCG | ||
135 | |||
136 | /* Convert host exception flags to vfp form. */ | ||
137 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
138 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
139 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
140 | } | ||
141 | + if (changed & FPCR_AH) { | ||
142 | + bool ah_enabled = val & FPCR_AH; | ||
143 | + | ||
144 | + if (ah_enabled) { | ||
145 | + /* Change behaviours for A64 FP operations */ | ||
146 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); | ||
147 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
148 | + } else { | ||
149 | + arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
150 | + arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
151 | + } | ||
152 | + } | ||
87 | /* | 153 | /* |
88 | * ATS operations only do S1 or S1+S2 translations, so we never | 154 | * If any bits changed that we look at in vfp_get_fpsr_from_host(), |
89 | @@ -XXX,XX +XXX,XX @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) | 155 | * we must sync the float_status flags into vfp.fpsr now (under the |
90 | uint64_t par64; | ||
91 | ARMMMUIdx mmu_idx; | ||
92 | int el = arm_current_el(env); | ||
93 | - bool secure = arm_is_secure_below_el3(env); | ||
94 | + ARMSecuritySpace ss = arm_security_space(env); | ||
95 | |||
96 | switch (ri->opc2 & 6) { | ||
97 | case 0: | ||
98 | @@ -XXX,XX +XXX,XX @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) | ||
99 | switch (el) { | ||
100 | case 3: | ||
101 | mmu_idx = ARMMMUIdx_E3; | ||
102 | - secure = true; | ||
103 | break; | ||
104 | case 2: | ||
105 | - g_assert(!secure); /* ARMv8.4-SecEL2 is 64-bit only */ | ||
106 | + g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */ | ||
107 | /* fall through */ | ||
108 | case 1: | ||
109 | if (ri->crm == 9 && (env->uncached_cpsr & CPSR_PAN)) { | ||
110 | @@ -XXX,XX +XXX,XX @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) | ||
111 | switch (el) { | ||
112 | case 3: | ||
113 | mmu_idx = ARMMMUIdx_E10_0; | ||
114 | - secure = true; | ||
115 | break; | ||
116 | case 2: | ||
117 | - g_assert(!secure); /* ARMv8.4-SecEL2 is 64-bit only */ | ||
118 | + g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */ | ||
119 | mmu_idx = ARMMMUIdx_Stage1_E0; | ||
120 | break; | ||
121 | case 1: | ||
122 | @@ -XXX,XX +XXX,XX @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) | ||
123 | case 4: | ||
124 | /* stage 1+2 NonSecure PL1: ATS12NSOPR, ATS12NSOPW */ | ||
125 | mmu_idx = ARMMMUIdx_E10_1; | ||
126 | - secure = false; | ||
127 | + ss = ARMSS_NonSecure; | ||
128 | break; | ||
129 | case 6: | ||
130 | /* stage 1+2 NonSecure PL0: ATS12NSOUR, ATS12NSOUW */ | ||
131 | mmu_idx = ARMMMUIdx_E10_0; | ||
132 | - secure = false; | ||
133 | + ss = ARMSS_NonSecure; | ||
134 | break; | ||
135 | default: | ||
136 | g_assert_not_reached(); | ||
137 | } | ||
138 | |||
139 | - par64 = do_ats_write(env, value, access_type, mmu_idx, secure); | ||
140 | + par64 = do_ats_write(env, value, access_type, mmu_idx, ss); | ||
141 | |||
142 | A32_BANKED_CURRENT_REG_SET(env, par, par64); | ||
143 | #else | ||
144 | @@ -XXX,XX +XXX,XX @@ static void ats1h_write(CPUARMState *env, const ARMCPRegInfo *ri, | ||
145 | uint64_t par64; | ||
146 | |||
147 | /* There is no SecureEL2 for AArch32. */ | ||
148 | - par64 = do_ats_write(env, value, access_type, ARMMMUIdx_E2, false); | ||
149 | + par64 = do_ats_write(env, value, access_type, ARMMMUIdx_E2, | ||
150 | + ARMSS_NonSecure); | ||
151 | |||
152 | A32_BANKED_CURRENT_REG_SET(env, par, par64); | ||
153 | #else | ||
154 | @@ -XXX,XX +XXX,XX @@ static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri, | ||
155 | #ifdef CONFIG_TCG | ||
156 | MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD; | ||
157 | ARMMMUIdx mmu_idx; | ||
158 | - int secure = arm_is_secure_below_el3(env); | ||
159 | uint64_t hcr_el2 = arm_hcr_el2_eff(env); | ||
160 | bool regime_e20 = (hcr_el2 & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE); | ||
161 | |||
162 | @@ -XXX,XX +XXX,XX @@ static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri, | ||
163 | break; | ||
164 | case 6: /* AT S1E3R, AT S1E3W */ | ||
165 | mmu_idx = ARMMMUIdx_E3; | ||
166 | - secure = true; | ||
167 | break; | ||
168 | default: | ||
169 | g_assert_not_reached(); | ||
170 | @@ -XXX,XX +XXX,XX @@ static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri, | ||
171 | } | ||
172 | |||
173 | env->cp15.par_el[1] = do_ats_write(env, value, access_type, | ||
174 | - mmu_idx, secure); | ||
175 | + mmu_idx, arm_security_space(env)); | ||
176 | #else | ||
177 | /* Handled by hardware accelerator. */ | ||
178 | g_assert_not_reached(); | ||
179 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
180 | index XXXXXXX..XXXXXXX 100644 | ||
181 | --- a/target/arm/ptw.c | ||
182 | +++ b/target/arm/ptw.c | ||
183 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_gpc(CPUARMState *env, S1Translate *ptw, | ||
184 | return false; | ||
185 | } | ||
186 | |||
187 | -bool get_phys_addr_with_secure_nogpc(CPUARMState *env, target_ulong address, | ||
188 | - MMUAccessType access_type, | ||
189 | - ARMMMUIdx mmu_idx, bool is_secure, | ||
190 | - GetPhysAddrResult *result, | ||
191 | - ARMMMUFaultInfo *fi) | ||
192 | +bool get_phys_addr_with_space_nogpc(CPUARMState *env, target_ulong address, | ||
193 | + MMUAccessType access_type, | ||
194 | + ARMMMUIdx mmu_idx, ARMSecuritySpace space, | ||
195 | + GetPhysAddrResult *result, | ||
196 | + ARMMMUFaultInfo *fi) | ||
197 | { | ||
198 | S1Translate ptw = { | ||
199 | .in_mmu_idx = mmu_idx, | ||
200 | - .in_space = arm_secure_to_space(is_secure), | ||
201 | + .in_space = space, | ||
202 | }; | ||
203 | return get_phys_addr_nogpc(env, &ptw, address, access_type, result, fi); | ||
204 | } | ||
205 | -- | 156 | -- |
206 | 2.34.1 | 157 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | When FPCR.AH = 1, some of the cumulative exception flags in the FPSR | ||
2 | behave slightly differently for A64 operations: | ||
3 | * IDC is set when a denormal input is used without flushing | ||
4 | * IXC (Inexact) is set when an output denormal is flushed to zero | ||
1 | 5 | ||
6 | Update vfp_get_fpsr_from_host() to do this. | ||
7 | |||
8 | Note that because half-precision operations never set IDC, we now | ||
9 | need to add float_flag_input_denormal_used to the set we mask out of | ||
10 | fp_status_f16_a64. | ||
11 | |||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | --- | ||
15 | target/arm/vfp_helper.c | 17 ++++++++++++++--- | ||
16 | 1 file changed, 14 insertions(+), 3 deletions(-) | ||
17 | |||
18 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/arm/vfp_helper.c | ||
21 | +++ b/target/arm/vfp_helper.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static void arm_set_ah_fp_behaviours(float_status *s) | ||
23 | #ifdef CONFIG_TCG | ||
24 | |||
25 | /* Convert host exception flags to vfp form. */ | ||
26 | -static inline uint32_t vfp_exceptbits_from_host(int host_bits) | ||
27 | +static inline uint32_t vfp_exceptbits_from_host(int host_bits, bool ah) | ||
28 | { | ||
29 | uint32_t target_bits = 0; | ||
30 | |||
31 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits) | ||
32 | if (host_bits & float_flag_input_denormal_flushed) { | ||
33 | target_bits |= FPSR_IDC; | ||
34 | } | ||
35 | + /* | ||
36 | + * With FPCR.AH, IDC is set when an input denormal is used, | ||
37 | + * and flushing an output denormal to zero sets both IXC and UFC. | ||
38 | + */ | ||
39 | + if (ah && (host_bits & float_flag_input_denormal_used)) { | ||
40 | + target_bits |= FPSR_IDC; | ||
41 | + } | ||
42 | + if (ah && (host_bits & float_flag_output_denormal_flushed)) { | ||
43 | + target_bits |= FPSR_IXC; | ||
44 | + } | ||
45 | return target_bits; | ||
46 | } | ||
47 | |||
48 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
49 | |||
50 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
51 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
52 | - & ~float_flag_input_denormal_flushed); | ||
53 | + & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
54 | /* | ||
55 | * Flushing an input denormal *only* because FPCR.FIZ == 1 does | ||
56 | * not set FPSR.IDC; if FPCR.FZ is also set then this takes | ||
57 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
58 | if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) { | ||
59 | a64_flags &= ~float_flag_input_denormal_flushed; | ||
60 | } | ||
61 | - return vfp_exceptbits_from_host(a32_flags | a64_flags); | ||
62 | + return vfp_exceptbits_from_host(a64_flags, env->vfp.fpcr & FPCR_AH) | | ||
63 | + vfp_exceptbits_from_host(a32_flags, false); | ||
64 | } | ||
65 | |||
66 | static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
67 | -- | ||
68 | 2.34.1 | diff view generated by jsdifflib |
1 | arm_hcr_el2_eff_secstate() takes a bool secure, which it uses to | 1 | We are going to need to generate different code in some cases when |
---|---|---|---|
2 | determine whether EL2 is enabled in the current security state. | 2 | FPCR.AH is 1. For example: |
3 | With the advent of FEAT_RME this is no longer sufficient, because | 3 | * Floating point neg and abs must not flip the sign bit of NaNs |
4 | EL2 can be enabled for Secure state but not for Root, and both | 4 | * some insns (FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS, and various |
5 | of those will pass 'secure == true' in the callsites in ptw.c. | 5 | BFCVT and BFM bfloat16 ops) need to use a different float_status |
6 | to the usual one | ||
6 | 7 | ||
7 | As it happens in all of our callsites in ptw.c we either avoid making | 8 | Encode FPCR.AH into the A64 tbflags, so we can refer to it at |
8 | the call or else avoid using the returned value if we're doing a | 9 | translate time. |
9 | translation for Root, so this is not a behaviour change even if the | ||
10 | experimental FEAT_RME is enabled. But it is less confusing in the | ||
11 | ptw.c code if we avoid the use of a bool secure that duplicates some | ||
12 | of the information in the ArmSecuritySpace argument. | ||
13 | 10 | ||
14 | Make arm_hcr_el2_eff_secstate() take an ARMSecuritySpace argument | 11 | Because we now have a bit in FPCR that affects codegen, we can't mark |
15 | instead. Because we always want to know the HCR_EL2 for the | 12 | the AArch64 FPCR register as being SUPPRESS_TB_END any more; writes |
16 | security state defined by the current effective value of | 13 | to it will now end the TB and trigger a regeneration of hflags. |
17 | SCR_EL3.{NSE,NS}, it makes no sense to pass ARMSS_Root here, | ||
18 | and we assert that callers don't do that. | ||
19 | |||
20 | To avoid the assert(), we thus push the call to | ||
21 | arm_hcr_el2_eff_secstate() down into the cases in | ||
22 | regime_translation_disabled() that need it, rather than calling the | ||
23 | function and ignoring the result for the Root space translations. | ||
24 | All other calls to this function in ptw.c are already in places | ||
25 | where we have confirmed that the mmu_idx is a stage 2 translation | ||
26 | or that the regime EL is not 3. | ||
27 | 14 | ||
28 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
29 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 16 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
30 | Message-id: 20230807141514.19075-7-peter.maydell@linaro.org | ||
31 | --- | 17 | --- |
32 | target/arm/cpu.h | 2 +- | 18 | target/arm/cpu.h | 1 + |
33 | target/arm/helper.c | 8 +++++--- | 19 | target/arm/tcg/translate.h | 2 ++ |
34 | target/arm/ptw.c | 15 +++++++-------- | 20 | target/arm/helper.c | 2 +- |
35 | 3 files changed, 13 insertions(+), 12 deletions(-) | 21 | target/arm/tcg/hflags.c | 4 ++++ |
22 | target/arm/tcg/translate-a64.c | 1 + | ||
23 | 5 files changed, 9 insertions(+), 1 deletion(-) | ||
36 | 24 | ||
37 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 25 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
38 | index XXXXXXX..XXXXXXX 100644 | 26 | index XXXXXXX..XXXXXXX 100644 |
39 | --- a/target/arm/cpu.h | 27 | --- a/target/arm/cpu.h |
40 | +++ b/target/arm/cpu.h | 28 | +++ b/target/arm/cpu.h |
41 | @@ -XXX,XX +XXX,XX @@ static inline bool arm_is_el2_enabled(CPUARMState *env) | 29 | @@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, NV2, 34, 1) |
42 | * "for all purposes other than a direct read or write access of HCR_EL2." | 30 | FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1) |
43 | * Not included here is HCR_RW. | 31 | /* Set if FEAT_NV2 RAM accesses are big-endian */ |
44 | */ | 32 | FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1) |
45 | -uint64_t arm_hcr_el2_eff_secstate(CPUARMState *env, bool secure); | 33 | +FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */ |
46 | +uint64_t arm_hcr_el2_eff_secstate(CPUARMState *env, ARMSecuritySpace space); | 34 | |
47 | uint64_t arm_hcr_el2_eff(CPUARMState *env); | 35 | /* |
48 | uint64_t arm_hcrx_el2_eff(CPUARMState *env); | 36 | * Helpers for using the above. Note that only the A64 accessors use |
49 | 37 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | |
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/translate.h | ||
40 | +++ b/target/arm/tcg/translate.h | ||
41 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { | ||
42 | bool nv2_mem_e20; | ||
43 | /* True if NV2 enabled and NV2 RAM accesses are big-endian */ | ||
44 | bool nv2_mem_be; | ||
45 | + /* True if FPCR.AH is 1 (alternate floating point handling) */ | ||
46 | + bool fpcr_ah; | ||
47 | /* | ||
48 | * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI. | ||
49 | * < 0, set by the current instruction. | ||
50 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 50 | diff --git a/target/arm/helper.c b/target/arm/helper.c |
51 | index XXXXXXX..XXXXXXX 100644 | 51 | index XXXXXXX..XXXXXXX 100644 |
52 | --- a/target/arm/helper.c | 52 | --- a/target/arm/helper.c |
53 | +++ b/target/arm/helper.c | 53 | +++ b/target/arm/helper.c |
54 | @@ -XXX,XX +XXX,XX @@ static void hcr_writelow(CPUARMState *env, const ARMCPRegInfo *ri, | 54 | @@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo v8_cp_reginfo[] = { |
55 | * Bits that are not included here: | 55 | .writefn = aa64_daif_write, .resetfn = arm_cp_reset_ignore }, |
56 | * RW (read from SCR_EL3.RW as needed) | 56 | { .name = "FPCR", .state = ARM_CP_STATE_AA64, |
57 | */ | 57 | .opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 4, |
58 | -uint64_t arm_hcr_el2_eff_secstate(CPUARMState *env, bool secure) | 58 | - .access = PL0_RW, .type = ARM_CP_FPU | ARM_CP_SUPPRESS_TB_END, |
59 | +uint64_t arm_hcr_el2_eff_secstate(CPUARMState *env, ARMSecuritySpace space) | 59 | + .access = PL0_RW, .type = ARM_CP_FPU, |
60 | { | 60 | .readfn = aa64_fpcr_read, .writefn = aa64_fpcr_write }, |
61 | uint64_t ret = env->cp15.hcr_el2; | 61 | { .name = "FPSR", .state = ARM_CP_STATE_AA64, |
62 | 62 | .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4, | |
63 | - if (!arm_is_el2_enabled_secstate(env, secure)) { | 63 | diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c |
64 | + assert(space != ARMSS_Root); | 64 | index XXXXXXX..XXXXXXX 100644 |
65 | --- a/target/arm/tcg/hflags.c | ||
66 | +++ b/target/arm/tcg/hflags.c | ||
67 | @@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, | ||
68 | DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx)); | ||
69 | } | ||
70 | |||
71 | + if (env->vfp.fpcr & FPCR_AH) { | ||
72 | + DP_TBFLAG_A64(flags, AH, 1); | ||
73 | + } | ||
65 | + | 74 | + |
66 | + if (!arm_is_el2_enabled_secstate(env, arm_space_is_secure(space))) { | 75 | return rebuild_hflags_common(env, fp_el, mmu_idx, flags); |
67 | /* | ||
68 | * "This register has no effect if EL2 is not enabled in the | ||
69 | * current Security state". This is ARMv8.4-SecEL2 speak for | ||
70 | @@ -XXX,XX +XXX,XX @@ uint64_t arm_hcr_el2_eff(CPUARMState *env) | ||
71 | if (arm_feature(env, ARM_FEATURE_M)) { | ||
72 | return 0; | ||
73 | } | ||
74 | - return arm_hcr_el2_eff_secstate(env, arm_is_secure_below_el3(env)); | ||
75 | + return arm_hcr_el2_eff_secstate(env, arm_security_space_below_el3(env)); | ||
76 | } | 76 | } |
77 | 77 | ||
78 | /* | 78 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
79 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
80 | index XXXXXXX..XXXXXXX 100644 | 79 | index XXXXXXX..XXXXXXX 100644 |
81 | --- a/target/arm/ptw.c | 80 | --- a/target/arm/tcg/translate-a64.c |
82 | +++ b/target/arm/ptw.c | 81 | +++ b/target/arm/tcg/translate-a64.c |
83 | @@ -XXX,XX +XXX,XX @@ static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx, | 82 | @@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, |
84 | ARMSecuritySpace space) | 83 | dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); |
85 | { | 84 | dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); |
86 | uint64_t hcr_el2; | 85 | dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); |
87 | - bool is_secure = arm_space_is_secure(space); | 86 | + dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); |
88 | 87 | dc->vec_len = 0; | |
89 | if (arm_feature(env, ARM_FEATURE_M)) { | 88 | dc->vec_stride = 0; |
90 | + bool is_secure = arm_space_is_secure(space); | 89 | dc->cp_regs = arm_cpu->cp_regs; |
91 | switch (env->v7m.mpu_ctrl[is_secure] & | ||
92 | (R_V7M_MPU_CTRL_ENABLE_MASK | R_V7M_MPU_CTRL_HFNMIENA_MASK)) { | ||
93 | case R_V7M_MPU_CTRL_ENABLE_MASK: | ||
94 | @@ -XXX,XX +XXX,XX @@ static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx, | ||
95 | } | ||
96 | } | ||
97 | |||
98 | - hcr_el2 = arm_hcr_el2_eff_secstate(env, is_secure); | ||
99 | |||
100 | switch (mmu_idx) { | ||
101 | case ARMMMUIdx_Stage2: | ||
102 | case ARMMMUIdx_Stage2_S: | ||
103 | /* HCR.DC means HCR.VM behaves as 1 */ | ||
104 | + hcr_el2 = arm_hcr_el2_eff_secstate(env, space); | ||
105 | return (hcr_el2 & (HCR_DC | HCR_VM)) == 0; | ||
106 | |||
107 | case ARMMMUIdx_E10_0: | ||
108 | case ARMMMUIdx_E10_1: | ||
109 | case ARMMMUIdx_E10_1_PAN: | ||
110 | /* TGE means that EL0/1 act as if SCTLR_EL1.M is zero */ | ||
111 | + hcr_el2 = arm_hcr_el2_eff_secstate(env, space); | ||
112 | if (hcr_el2 & HCR_TGE) { | ||
113 | return true; | ||
114 | } | ||
115 | @@ -XXX,XX +XXX,XX @@ static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx, | ||
116 | case ARMMMUIdx_Stage1_E1: | ||
117 | case ARMMMUIdx_Stage1_E1_PAN: | ||
118 | /* HCR.DC means SCTLR_EL1.M behaves as 0 */ | ||
119 | + hcr_el2 = arm_hcr_el2_eff_secstate(env, space); | ||
120 | if (hcr_el2 & HCR_DC) { | ||
121 | return true; | ||
122 | } | ||
123 | @@ -XXX,XX +XXX,XX @@ static bool fault_s1ns(ARMSecuritySpace space, ARMMMUIdx s2_mmu_idx) | ||
124 | static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, | ||
125 | hwaddr addr, ARMMMUFaultInfo *fi) | ||
126 | { | ||
127 | - bool is_secure = ptw->in_secure; | ||
128 | ARMMMUIdx mmu_idx = ptw->in_mmu_idx; | ||
129 | ARMMMUIdx s2_mmu_idx = ptw->in_ptw_idx; | ||
130 | uint8_t pte_attrs; | ||
131 | @@ -XXX,XX +XXX,XX @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, | ||
132 | } | ||
133 | |||
134 | if (regime_is_stage2(s2_mmu_idx)) { | ||
135 | - uint64_t hcr = arm_hcr_el2_eff_secstate(env, is_secure); | ||
136 | + uint64_t hcr = arm_hcr_el2_eff_secstate(env, ptw->in_space); | ||
137 | |||
138 | if ((hcr & HCR_PTW) && S2_attrs_are_device(hcr, pte_attrs)) { | ||
139 | /* | ||
140 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_disabled(CPUARMState *env, | ||
141 | ARMMMUFaultInfo *fi) | ||
142 | { | ||
143 | ARMMMUIdx mmu_idx = ptw->in_mmu_idx; | ||
144 | - bool is_secure = arm_space_is_secure(ptw->in_space); | ||
145 | uint8_t memattr = 0x00; /* Device nGnRnE */ | ||
146 | uint8_t shareability = 0; /* non-shareable */ | ||
147 | int r_el; | ||
148 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_disabled(CPUARMState *env, | ||
149 | |||
150 | /* Fill in cacheattr a-la AArch64.TranslateAddressS1Off. */ | ||
151 | if (r_el == 1) { | ||
152 | - uint64_t hcr = arm_hcr_el2_eff_secstate(env, is_secure); | ||
153 | + uint64_t hcr = arm_hcr_el2_eff_secstate(env, ptw->in_space); | ||
154 | if (hcr & HCR_DC) { | ||
155 | if (hcr & HCR_DCT) { | ||
156 | memattr = 0xf0; /* Tagged, Normal, WB, RWA */ | ||
157 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, | ||
158 | { | ||
159 | hwaddr ipa; | ||
160 | int s1_prot, s1_lgpgsz; | ||
161 | - bool is_secure = ptw->in_secure; | ||
162 | ARMSecuritySpace in_space = ptw->in_space; | ||
163 | bool ret, ipa_secure; | ||
164 | ARMCacheAttrs cacheattrs1; | ||
165 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, | ||
166 | } | ||
167 | |||
168 | /* Combine the S1 and S2 cache attributes. */ | ||
169 | - hcr = arm_hcr_el2_eff_secstate(env, is_secure); | ||
170 | + hcr = arm_hcr_el2_eff_secstate(env, in_space); | ||
171 | if (hcr & HCR_DC) { | ||
172 | /* | ||
173 | * HCR.DC forces the first stage attributes to | ||
174 | -- | 90 | -- |
175 | 2.34.1 | 91 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Jean-Philippe Brucker <jean-philippe@linaro.org> | 1 | When FPCR.AH is 1, the behaviour of some instructions changes: |
---|---|---|---|
2 | 2 | * AdvSIMD BFCVT, BFCVTN, BFCVTN2, BFMLALB, BFMLALT | |
3 | GPC checks are not performed on the output address for AT instructions, | 3 | * SVE BFCVT, BFCVTNT, BFMLALB, BFMLALT, BFMLSLB, BFMLSLT |
4 | as stated by ARM DDI 0487J in D8.12.2: | 4 | * SME BFCVT, BFCVTN, BFMLAL, BFMLSL (these are all in SME2 which |
5 | 5 | QEMU does not yet implement) | |
6 | When populating PAR_EL1 with the result of an address translation | 6 | * FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS |
7 | instruction, granule protection checks are not performed on the final | 7 | |
8 | output address of a successful translation. | 8 | The behaviour change is: |
9 | 9 | * the instructions do not update the FPSR cumulative exception flags | |
10 | Rename get_phys_addr_with_secure(), since it's only used to handle AT | 10 | * trapped floating point exceptions are disabled (a no-op for QEMU, |
11 | instructions. | 11 | which doesn't implement FPCR.{IDE,IXE,UFE,OFE,DZE,IOE}) |
12 | 12 | * rounding is always round-to-nearest-even regardless of FPCR.RMode | |
13 | Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | 13 | * denormalized inputs and outputs are always flushed to zero, as if |
14 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 14 | FPCR.{FZ,FIZ} is {1,1} |
15 | Message-id: 20230809123706.1842548-4-jean-philippe@linaro.org | 15 | * FPCR.FZ16 is still honoured for half-precision inputs |
16 | |||
17 | (See the Arm ARM DDI0487L.a section A1.5.9.) | ||
18 | |||
19 | We can provide all these behaviours with another pair of float_status fields | ||
20 | which we use only for these insns, when FPCR.AH is 1. These float_status | ||
21 | fields will always have: | ||
22 | * flush_to_zero and flush_inputs_to_zero set for the non-F16 field | ||
23 | * rounding mode set to round-to-nearest-even | ||
24 | and so the only FPCR fields they need to honour are DN and FZ16. | ||
25 | |||
26 | In this commit we only define the new fp_status fields and give them | ||
27 | the required behaviour when FPSR is updated. In subsequent commits | ||
28 | we will arrange to use this new fp_status field for the instructions | ||
29 | that should be affected by FPCR.AH in this way. | ||
30 | |||
16 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 31 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
32 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
17 | --- | 33 | --- |
18 | target/arm/internals.h | 25 ++++++++++++++----------- | 34 | target/arm/cpu.h | 15 +++++++++++++++ |
19 | target/arm/helper.c | 8 ++++++-- | 35 | target/arm/internals.h | 2 ++ |
20 | target/arm/ptw.c | 11 ++++++----- | 36 | target/arm/tcg/translate.h | 14 ++++++++++++++ |
21 | 3 files changed, 26 insertions(+), 18 deletions(-) | 37 | target/arm/cpu.c | 4 ++++ |
22 | 38 | target/arm/vfp_helper.c | 13 ++++++++++++- | |
39 | 5 files changed, 47 insertions(+), 1 deletion(-) | ||
40 | |||
41 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/arm/cpu.h | ||
44 | +++ b/target/arm/cpu.h | ||
45 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
46 | * standard_fp_status : the ARM "Standard FPSCR Value" | ||
47 | * standard_fp_status_fp16 : used for half-precision | ||
48 | * calculations with the ARM "Standard FPSCR Value" | ||
49 | + * ah_fp_status: used for the A64 insns which change behaviour | ||
50 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
51 | + * and the reciprocal and square root estimate/step insns) | ||
52 | + * ah_fp_status_f16: used for the A64 insns which change behaviour | ||
53 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
54 | + * and the reciprocal and square root estimate/step insns); | ||
55 | + * for half-precision | ||
56 | * | ||
57 | * Half-precision operations are governed by a separate | ||
58 | * flush-to-zero control bit in FPSCR:FZ16. We pass a separate | ||
59 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
60 | * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | ||
61 | * using a fixed value for it. | ||
62 | * | ||
63 | + * The ah_fp_status is needed because some insns have different | ||
64 | + * behaviour when FPCR.AH == 1: they don't update cumulative | ||
65 | + * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
66 | + * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
67 | + * which means we need an ah_fp_status_f16 as well. | ||
68 | + * | ||
69 | * To avoid having to transfer exception bits around, we simply | ||
70 | * say that the FPSCR cumulative exception flags are the logical | ||
71 | * OR of the flags in the four fp statuses. This relies on the | ||
72 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
73 | float_status fp_status_f16_a64; | ||
74 | float_status standard_fp_status; | ||
75 | float_status standard_fp_status_f16; | ||
76 | + float_status ah_fp_status; | ||
77 | + float_status ah_fp_status_f16; | ||
78 | |||
79 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ | ||
80 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ | ||
23 | diff --git a/target/arm/internals.h b/target/arm/internals.h | 81 | diff --git a/target/arm/internals.h b/target/arm/internals.h |
24 | index XXXXXXX..XXXXXXX 100644 | 82 | index XXXXXXX..XXXXXXX 100644 |
25 | --- a/target/arm/internals.h | 83 | --- a/target/arm/internals.h |
26 | +++ b/target/arm/internals.h | 84 | +++ b/target/arm/internals.h |
27 | @@ -XXX,XX +XXX,XX @@ typedef struct GetPhysAddrResult { | 85 | @@ -XXX,XX +XXX,XX @@ int alle1_tlbmask(CPUARMState *env); |
28 | } GetPhysAddrResult; | 86 | |
29 | 87 | /* Set the float_status behaviour to match the Arm defaults */ | |
30 | /** | 88 | void arm_set_default_fp_behaviours(float_status *s); |
31 | - * get_phys_addr_with_secure: get the physical address for a virtual address | 89 | +/* Set the float_status behaviour to match Arm FPCR.AH=1 behaviour */ |
32 | + * get_phys_addr: get the physical address for a virtual address | 90 | +void arm_set_ah_fp_behaviours(float_status *s); |
33 | * @env: CPUARMState | 91 | |
34 | * @address: virtual address to get physical address for | 92 | #endif |
35 | * @access_type: 0 for read, 1 for write, 2 for execute | 93 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h |
36 | * @mmu_idx: MMU index indicating required translation regime | 94 | index XXXXXXX..XXXXXXX 100644 |
37 | - * @is_secure: security state for the access | 95 | --- a/target/arm/tcg/translate.h |
38 | * @result: set on translation success. | 96 | +++ b/target/arm/tcg/translate.h |
39 | * @fi: set to fault info if the translation fails | 97 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { |
40 | * | 98 | FPST_A64, |
41 | @@ -XXX,XX +XXX,XX @@ typedef struct GetPhysAddrResult { | 99 | FPST_A32_F16, |
42 | * * for PSMAv5 based systems we don't bother to return a full FSR format | 100 | FPST_A64_F16, |
43 | * value. | 101 | + FPST_AH, |
102 | + FPST_AH_F16, | ||
103 | FPST_STD, | ||
104 | FPST_STD_F16, | ||
105 | } ARMFPStatusFlavour; | ||
106 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { | ||
107 | * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
108 | * FPST_A64_F16 | ||
109 | * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
110 | + * FPST_AH: | ||
111 | + * for AArch64 operations which change behaviour when AH=1 (specifically, | ||
112 | + * bfloat16 conversions and multiplies, and the reciprocal and square root | ||
113 | + * estimate/step insns) | ||
114 | + * FPST_AH_F16: | ||
115 | + * ditto, but for half-precision operations | ||
116 | * FPST_STD | ||
117 | * for A32/T32 Neon operations using the "standard FPSCR value" | ||
118 | * FPST_STD_F16 | ||
119 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) | ||
120 | case FPST_A64_F16: | ||
121 | offset = offsetof(CPUARMState, vfp.fp_status_f16_a64); | ||
122 | break; | ||
123 | + case FPST_AH: | ||
124 | + offset = offsetof(CPUARMState, vfp.ah_fp_status); | ||
125 | + break; | ||
126 | + case FPST_AH_F16: | ||
127 | + offset = offsetof(CPUARMState, vfp.ah_fp_status_f16); | ||
128 | + break; | ||
129 | case FPST_STD: | ||
130 | offset = offsetof(CPUARMState, vfp.standard_fp_status); | ||
131 | break; | ||
132 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
133 | index XXXXXXX..XXXXXXX 100644 | ||
134 | --- a/target/arm/cpu.c | ||
135 | +++ b/target/arm/cpu.c | ||
136 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
137 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
138 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
139 | arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16); | ||
140 | + arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | ||
141 | + set_flush_to_zero(1, &env->vfp.ah_fp_status); | ||
142 | + set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | ||
143 | + arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status_f16); | ||
144 | |||
145 | #ifndef CONFIG_USER_ONLY | ||
146 | if (kvm_enabled()) { | ||
147 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
148 | index XXXXXXX..XXXXXXX 100644 | ||
149 | --- a/target/arm/vfp_helper.c | ||
150 | +++ b/target/arm/vfp_helper.c | ||
151 | @@ -XXX,XX +XXX,XX @@ void arm_set_default_fp_behaviours(float_status *s) | ||
152 | * set Invalid for a QNaN | ||
153 | * * default NaN has sign bit set, msb frac bit set | ||
44 | */ | 154 | */ |
45 | -bool get_phys_addr_with_secure(CPUARMState *env, target_ulong address, | 155 | -static void arm_set_ah_fp_behaviours(float_status *s) |
46 | - MMUAccessType access_type, | 156 | +void arm_set_ah_fp_behaviours(float_status *s) |
47 | - ARMMMUIdx mmu_idx, bool is_secure, | 157 | { |
48 | - GetPhysAddrResult *result, ARMMMUFaultInfo *fi) | 158 | set_float_detect_tininess(float_tininess_after_rounding, s); |
49 | +bool get_phys_addr(CPUARMState *env, target_ulong address, | 159 | set_float_ftz_detection(float_ftz_after_rounding, s); |
50 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | 160 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) |
51 | + GetPhysAddrResult *result, ARMMMUFaultInfo *fi) | 161 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); |
52 | __attribute__((nonnull)); | 162 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) |
53 | 163 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | |
54 | /** | ||
55 | - * get_phys_addr: get the physical address for a virtual address | ||
56 | + * get_phys_addr_with_secure_nogpc: get the physical address for a virtual | ||
57 | + * address | ||
58 | * @env: CPUARMState | ||
59 | * @address: virtual address to get physical address for | ||
60 | * @access_type: 0 for read, 1 for write, 2 for execute | ||
61 | * @mmu_idx: MMU index indicating required translation regime | ||
62 | + * @is_secure: security state for the access | ||
63 | * @result: set on translation success. | ||
64 | * @fi: set to fault info if the translation fails | ||
65 | * | ||
66 | - * Similarly, but use the security regime of @mmu_idx. | ||
67 | + * Similar to get_phys_addr, but use the given security regime and don't perform | ||
68 | + * a Granule Protection Check on the resulting address. | ||
69 | */ | ||
70 | -bool get_phys_addr(CPUARMState *env, target_ulong address, | ||
71 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
72 | - GetPhysAddrResult *result, ARMMMUFaultInfo *fi) | ||
73 | +bool get_phys_addr_with_secure_nogpc(CPUARMState *env, target_ulong address, | ||
74 | + MMUAccessType access_type, | ||
75 | + ARMMMUIdx mmu_idx, bool is_secure, | ||
76 | + GetPhysAddrResult *result, | ||
77 | + ARMMMUFaultInfo *fi) | ||
78 | __attribute__((nonnull)); | ||
79 | |||
80 | bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, | ||
81 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
82 | index XXXXXXX..XXXXXXX 100644 | ||
83 | --- a/target/arm/helper.c | ||
84 | +++ b/target/arm/helper.c | ||
85 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value, | ||
86 | ARMMMUFaultInfo fi = {}; | ||
87 | GetPhysAddrResult res = {}; | ||
88 | |||
89 | - ret = get_phys_addr_with_secure(env, value, access_type, mmu_idx, | ||
90 | - is_secure, &res, &fi); | ||
91 | + /* | 164 | + /* |
92 | + * I_MXTJT: Granule protection checks are not performed on the final address | 165 | + * We do not merge in flags from ah_fp_status or ah_fp_status_f16, because |
93 | + * of a successful translation. | 166 | + * they are used for insns that must not set the cumulative exception bits. |
94 | + */ | 167 | + */ |
95 | + ret = get_phys_addr_with_secure_nogpc(env, value, access_type, mmu_idx, | 168 | + |
96 | + is_secure, &res, &fi); | ||
97 | |||
98 | /* | 169 | /* |
99 | * ATS operations only do S1 or S1+S2 translations, so we never | 170 | * Flushing an input denormal *only* because FPCR.FIZ == 1 does |
100 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | 171 | * not set FPSR.IDC; if FPCR.FZ is also set then this takes |
101 | index XXXXXXX..XXXXXXX 100644 | 172 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) |
102 | --- a/target/arm/ptw.c | 173 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); |
103 | +++ b/target/arm/ptw.c | 174 | set_float_exception_flags(0, &env->vfp.standard_fp_status); |
104 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_gpc(CPUARMState *env, S1Translate *ptw, | 175 | set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); |
105 | return false; | 176 | + set_float_exception_flags(0, &env->vfp.ah_fp_status); |
177 | + set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
106 | } | 178 | } |
107 | 179 | ||
108 | -bool get_phys_addr_with_secure(CPUARMState *env, target_ulong address, | 180 | static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) |
109 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | 181 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
110 | - bool is_secure, GetPhysAddrResult *result, | 182 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
111 | - ARMMMUFaultInfo *fi) | 183 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); |
112 | +bool get_phys_addr_with_secure_nogpc(CPUARMState *env, target_ulong address, | 184 | set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); |
113 | + MMUAccessType access_type, | 185 | + set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); |
114 | + ARMMMUIdx mmu_idx, bool is_secure, | 186 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
115 | + GetPhysAddrResult *result, | 187 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); |
116 | + ARMMMUFaultInfo *fi) | 188 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); |
117 | { | 189 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); |
118 | S1Translate ptw = { | 190 | } |
119 | .in_mmu_idx = mmu_idx, | 191 | if (changed & FPCR_FZ) { |
120 | .in_space = arm_secure_to_space(is_secure), | 192 | bool ftz_enabled = val & FPCR_FZ; |
121 | }; | 193 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
122 | - return get_phys_addr_gpc(env, &ptw, address, access_type, result, fi); | 194 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); |
123 | + return get_phys_addr_nogpc(env, &ptw, address, access_type, result, fi); | 195 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); |
124 | } | 196 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); |
125 | 197 | + set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); | |
126 | bool get_phys_addr(CPUARMState *env, target_ulong address, | 198 | + set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status_f16); |
199 | } | ||
200 | if (changed & FPCR_AH) { | ||
201 | bool ah_enabled = val & FPCR_AH; | ||
127 | -- | 202 | -- |
128 | 2.34.1 | 203 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | For the instructions FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS, use | ||
2 | FPST_FPCR_AH or FPST_FPCR_AH_F16 when FPCR.AH is 1, so that they get | ||
3 | the required behaviour changes. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/translate-a64.h | 13 ++++ | ||
9 | target/arm/tcg/translate-a64.c | 119 +++++++++++++++++++++++++-------- | ||
10 | target/arm/tcg/translate-sve.c | 30 ++++++--- | ||
11 | 3 files changed, 127 insertions(+), 35 deletions(-) | ||
12 | |||
13 | diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/arm/tcg/translate-a64.h | ||
16 | +++ b/target/arm/tcg/translate-a64.h | ||
17 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno) | ||
18 | return ret; | ||
19 | } | ||
20 | |||
21 | +/* | ||
22 | + * Return the ARMFPStatusFlavour to use based on element size and | ||
23 | + * whether FPCR.AH is set. | ||
24 | + */ | ||
25 | +static inline ARMFPStatusFlavour select_ah_fpst(DisasContext *s, MemOp esz) | ||
26 | +{ | ||
27 | + if (s->fpcr_ah) { | ||
28 | + return esz == MO_16 ? FPST_AH_F16 : FPST_AH; | ||
29 | + } else { | ||
30 | + return esz == MO_16 ? FPST_A64_F16 : FPST_A64; | ||
31 | + } | ||
32 | +} | ||
33 | + | ||
34 | bool disas_sve(DisasContext *, uint32_t); | ||
35 | bool disas_sme(DisasContext *, uint32_t); | ||
36 | |||
37 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/translate-a64.c | ||
40 | +++ b/target/arm/tcg/translate-a64.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, | ||
42 | * an out-of-line helper. | ||
43 | */ | ||
44 | static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, | ||
45 | - int rm, bool is_fp16, int data, | ||
46 | + int rm, ARMFPStatusFlavour fpsttype, int data, | ||
47 | gen_helper_gvec_3_ptr *fn) | ||
48 | { | ||
49 | - TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); | ||
50 | + TCGv_ptr fpst = fpstatus_ptr(fpsttype); | ||
51 | tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), | ||
52 | vec_full_reg_offset(s, rn), | ||
53 | vec_full_reg_offset(s, rm), fpst, | ||
54 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar { | ||
55 | void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); | ||
56 | } FPScalar; | ||
57 | |||
58 | -static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
59 | +static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
60 | + const FPScalar *f, | ||
61 | + ARMFPStatusFlavour fpsttype) | ||
62 | { | ||
63 | switch (a->esz) { | ||
64 | case MO_64: | ||
65 | if (fp_access_check(s)) { | ||
66 | TCGv_i64 t0 = read_fp_dreg(s, a->rn); | ||
67 | TCGv_i64 t1 = read_fp_dreg(s, a->rm); | ||
68 | - f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); | ||
69 | + f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
70 | write_fp_dreg(s, a->rd, t0); | ||
71 | } | ||
72 | break; | ||
73 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
74 | if (fp_access_check(s)) { | ||
75 | TCGv_i32 t0 = read_fp_sreg(s, a->rn); | ||
76 | TCGv_i32 t1 = read_fp_sreg(s, a->rm); | ||
77 | - f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); | ||
78 | + f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
79 | write_fp_sreg(s, a->rd, t0); | ||
80 | } | ||
81 | break; | ||
82 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
83 | if (fp_access_check(s)) { | ||
84 | TCGv_i32 t0 = read_fp_hreg(s, a->rn); | ||
85 | TCGv_i32 t1 = read_fp_hreg(s, a->rm); | ||
86 | - f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); | ||
87 | + f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
88 | write_fp_sreg(s, a->rd, t0); | ||
89 | } | ||
90 | break; | ||
91 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
92 | return true; | ||
93 | } | ||
94 | |||
95 | +static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
96 | +{ | ||
97 | + return do_fp3_scalar_with_fpsttype(s, a, f, | ||
98 | + a->esz == MO_16 ? | ||
99 | + FPST_A64_F16 : FPST_A64); | ||
100 | +} | ||
101 | + | ||
102 | +static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
103 | +{ | ||
104 | + return do_fp3_scalar_with_fpsttype(s, a, f, select_ah_fpst(s, a->esz)); | ||
105 | +} | ||
106 | + | ||
107 | static const FPScalar f_scalar_fadd = { | ||
108 | gen_helper_vfp_addh, | ||
109 | gen_helper_vfp_adds, | ||
110 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_frecps = { | ||
111 | gen_helper_recpsf_f32, | ||
112 | gen_helper_recpsf_f64, | ||
113 | }; | ||
114 | -TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps) | ||
115 | +TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps) | ||
116 | |||
117 | static const FPScalar f_scalar_frsqrts = { | ||
118 | gen_helper_rsqrtsf_f16, | ||
119 | gen_helper_rsqrtsf_f32, | ||
120 | gen_helper_rsqrtsf_f64, | ||
121 | }; | ||
122 | -TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts) | ||
123 | +TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts) | ||
124 | |||
125 | static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, | ||
126 | const FPScalar *f, bool swap) | ||
127 | @@ -XXX,XX +XXX,XX @@ TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) | ||
128 | TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) | ||
129 | TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) | ||
130 | |||
131 | -static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | ||
132 | - gen_helper_gvec_3_ptr * const fns[3]) | ||
133 | +static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a, | ||
134 | + int data, | ||
135 | + gen_helper_gvec_3_ptr * const fns[3], | ||
136 | + ARMFPStatusFlavour fpsttype) | ||
137 | { | ||
138 | MemOp esz = a->esz; | ||
139 | int check = fp_access_check_vector_hsd(s, a->q, esz); | ||
140 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | ||
141 | return check == 0; | ||
142 | } | ||
143 | |||
144 | - gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, | ||
145 | - esz == MO_16, data, fns[esz - 1]); | ||
146 | + gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype, | ||
147 | + data, fns[esz - 1]); | ||
148 | return true; | ||
149 | } | ||
150 | |||
151 | +static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | ||
152 | + gen_helper_gvec_3_ptr * const fns[3]) | ||
153 | +{ | ||
154 | + return do_fp3_vector_with_fpsttype(s, a, data, fns, | ||
155 | + a->esz == MO_16 ? | ||
156 | + FPST_A64_F16 : FPST_A64); | ||
157 | +} | ||
158 | + | ||
159 | +static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data, | ||
160 | + gen_helper_gvec_3_ptr * const f[3]) | ||
161 | +{ | ||
162 | + return do_fp3_vector_with_fpsttype(s, a, data, f, | ||
163 | + select_ah_fpst(s, a->esz)); | ||
164 | +} | ||
165 | + | ||
166 | static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { | ||
167 | gen_helper_gvec_fadd_h, | ||
168 | gen_helper_gvec_fadd_s, | ||
169 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { | ||
170 | gen_helper_gvec_recps_s, | ||
171 | gen_helper_gvec_recps_d, | ||
172 | }; | ||
173 | -TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps) | ||
174 | +TRANS(FRECPS_v, do_fp3_vector_ah, a, 0, f_vector_frecps) | ||
175 | |||
176 | static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { | ||
177 | gen_helper_gvec_rsqrts_h, | ||
178 | gen_helper_gvec_rsqrts_s, | ||
179 | gen_helper_gvec_rsqrts_d, | ||
180 | }; | ||
181 | -TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts) | ||
182 | +TRANS(FRSQRTS_v, do_fp3_vector_ah, a, 0, f_vector_frsqrts) | ||
183 | |||
184 | static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { | ||
185 | gen_helper_gvec_faddp_h, | ||
186 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, | ||
187 | } | ||
188 | |||
189 | gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, | ||
190 | - esz == MO_16, a->idx, fns[esz - 1]); | ||
191 | + esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
192 | + a->idx, fns[esz - 1]); | ||
193 | return true; | ||
194 | } | ||
195 | |||
196 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar1 { | ||
197 | void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr); | ||
198 | } FPScalar1; | ||
199 | |||
200 | -static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
201 | - const FPScalar1 *f, int rmode) | ||
202 | +static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a, | ||
203 | + const FPScalar1 *f, int rmode, | ||
204 | + ARMFPStatusFlavour fpsttype) | ||
205 | { | ||
206 | TCGv_i32 tcg_rmode = NULL; | ||
207 | TCGv_ptr fpst; | ||
208 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
209 | return check == 0; | ||
210 | } | ||
211 | |||
212 | - fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
213 | + fpst = fpstatus_ptr(fpsttype); | ||
214 | if (rmode >= 0) { | ||
215 | tcg_rmode = gen_set_rmode(rmode, fpst); | ||
216 | } | ||
217 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
218 | return true; | ||
219 | } | ||
220 | |||
221 | +static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
222 | + const FPScalar1 *f, int rmode) | ||
223 | +{ | ||
224 | + return do_fp1_scalar_with_fpsttype(s, a, f, rmode, | ||
225 | + a->esz == MO_16 ? | ||
226 | + FPST_A64_F16 : FPST_A64); | ||
227 | +} | ||
228 | + | ||
229 | +static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a, | ||
230 | + const FPScalar1 *f, int rmode) | ||
231 | +{ | ||
232 | + return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz)); | ||
233 | +} | ||
234 | + | ||
235 | static const FPScalar1 f_scalar_fsqrt = { | ||
236 | gen_helper_vfp_sqrth, | ||
237 | gen_helper_vfp_sqrts, | ||
238 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frecpe = { | ||
239 | gen_helper_recpe_f32, | ||
240 | gen_helper_recpe_f64, | ||
241 | }; | ||
242 | -TRANS(FRECPE_s, do_fp1_scalar, a, &f_scalar_frecpe, -1) | ||
243 | +TRANS(FRECPE_s, do_fp1_scalar_ah, a, &f_scalar_frecpe, -1) | ||
244 | |||
245 | static const FPScalar1 f_scalar_frecpx = { | ||
246 | gen_helper_frecpx_f16, | ||
247 | gen_helper_frecpx_f32, | ||
248 | gen_helper_frecpx_f64, | ||
249 | }; | ||
250 | -TRANS(FRECPX_s, do_fp1_scalar, a, &f_scalar_frecpx, -1) | ||
251 | +TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1) | ||
252 | |||
253 | static const FPScalar1 f_scalar_frsqrte = { | ||
254 | gen_helper_rsqrte_f16, | ||
255 | gen_helper_rsqrte_f32, | ||
256 | gen_helper_rsqrte_f64, | ||
257 | }; | ||
258 | -TRANS(FRSQRTE_s, do_fp1_scalar, a, &f_scalar_frsqrte, -1) | ||
259 | +TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, &f_scalar_frsqrte, -1) | ||
260 | |||
261 | static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) | ||
262 | { | ||
263 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a, | ||
264 | &f_scalar_frint64, FPROUNDING_ZERO) | ||
265 | TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1) | ||
266 | |||
267 | -static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
268 | - int rd, int rn, int data, | ||
269 | - gen_helper_gvec_2_ptr * const fns[3]) | ||
270 | +static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz, | ||
271 | + bool is_q, int rd, int rn, int data, | ||
272 | + gen_helper_gvec_2_ptr * const fns[3], | ||
273 | + ARMFPStatusFlavour fpsttype) | ||
274 | { | ||
275 | int check = fp_access_check_vector_hsd(s, is_q, esz); | ||
276 | TCGv_ptr fpst; | ||
277 | @@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
278 | return check == 0; | ||
279 | } | ||
280 | |||
281 | - fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
282 | + fpst = fpstatus_ptr(fpsttype); | ||
283 | tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), | ||
284 | vec_full_reg_offset(s, rn), fpst, | ||
285 | is_q ? 16 : 8, vec_full_reg_size(s), | ||
286 | @@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
287 | return true; | ||
288 | } | ||
289 | |||
290 | +static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
291 | + int rd, int rn, int data, | ||
292 | + gen_helper_gvec_2_ptr * const fns[3]) | ||
293 | +{ | ||
294 | + return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns, | ||
295 | + esz == MO_16 ? FPST_A64_F16 : | ||
296 | + FPST_A64); | ||
297 | +} | ||
298 | + | ||
299 | +static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
300 | + int rd, int rn, int data, | ||
301 | + gen_helper_gvec_2_ptr * const fns[3]) | ||
302 | +{ | ||
303 | + return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, | ||
304 | + fns, select_ah_fpst(s, esz)); | ||
305 | +} | ||
306 | + | ||
307 | static gen_helper_gvec_2_ptr * const f_scvtf_v[] = { | ||
308 | gen_helper_gvec_vcvt_sh, | ||
309 | gen_helper_gvec_vcvt_sf, | ||
310 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const f_frecpe[] = { | ||
311 | gen_helper_gvec_frecpe_s, | ||
312 | gen_helper_gvec_frecpe_d, | ||
313 | }; | ||
314 | -TRANS(FRECPE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) | ||
315 | +TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) | ||
316 | |||
317 | static gen_helper_gvec_2_ptr * const f_frsqrte[] = { | ||
318 | gen_helper_gvec_frsqrte_h, | ||
319 | gen_helper_gvec_frsqrte_s, | ||
320 | gen_helper_gvec_frsqrte_d, | ||
321 | }; | ||
322 | -TRANS(FRSQRTE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) | ||
323 | +TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) | ||
324 | |||
325 | static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) | ||
326 | { | ||
327 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
328 | index XXXXXXX..XXXXXXX 100644 | ||
329 | --- a/target/arm/tcg/translate-sve.c | ||
330 | +++ b/target/arm/tcg/translate-sve.c | ||
331 | @@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, | ||
332 | return true; | ||
333 | } | ||
334 | |||
335 | -static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, | ||
336 | - arg_rr_esz *a, int data) | ||
337 | +static bool gen_gvec_fpst_ah_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, | ||
338 | + arg_rr_esz *a, int data) | ||
339 | { | ||
340 | return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data, | ||
341 | - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
342 | + select_ah_fpst(s, a->esz)); | ||
343 | } | ||
344 | |||
345 | /* Invoke an out-of-line helper on 3 Zregs. */ | ||
346 | @@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, | ||
347 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
348 | } | ||
349 | |||
350 | +static bool gen_gvec_fpst_ah_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, | ||
351 | + arg_rrr_esz *a, int data) | ||
352 | +{ | ||
353 | + return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, | ||
354 | + select_ah_fpst(s, a->esz)); | ||
355 | +} | ||
356 | + | ||
357 | /* Invoke an out-of-line helper on 4 Zregs. */ | ||
358 | static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, | ||
359 | int rd, int rn, int rm, int ra, int data) | ||
360 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const frecpe_fns[] = { | ||
361 | NULL, gen_helper_gvec_frecpe_h, | ||
362 | gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, | ||
363 | }; | ||
364 | -TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0) | ||
365 | +TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frecpe_fns[a->esz], a, 0) | ||
366 | |||
367 | static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { | ||
368 | NULL, gen_helper_gvec_frsqrte_h, | ||
369 | gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, | ||
370 | }; | ||
371 | -TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0) | ||
372 | +TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frsqrte_fns[a->esz], a, 0) | ||
373 | |||
374 | /* | ||
375 | *** SVE Floating Point Compare with Zero Group | ||
376 | @@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) | ||
377 | }; \ | ||
378 | TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) | ||
379 | |||
380 | +#define DO_FP3_AH(NAME, name) \ | ||
381 | + static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ | ||
382 | + NULL, gen_helper_gvec_##name##_h, \ | ||
383 | + gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ | ||
384 | + }; \ | ||
385 | + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, name##_fns[a->esz], a, 0) | ||
386 | + | ||
387 | DO_FP3(FADD_zzz, fadd) | ||
388 | DO_FP3(FSUB_zzz, fsub) | ||
389 | DO_FP3(FMUL_zzz, fmul) | ||
390 | -DO_FP3(FRECPS, recps) | ||
391 | -DO_FP3(FRSQRTS, rsqrts) | ||
392 | +DO_FP3_AH(FRECPS, recps) | ||
393 | +DO_FP3_AH(FRSQRTS, rsqrts) | ||
394 | |||
395 | #undef DO_FP3 | ||
396 | |||
397 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frecpx_fns[] = { | ||
398 | gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d, | ||
399 | }; | ||
400 | TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz], | ||
401 | - a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
402 | + a, 0, select_ah_fpst(s, a->esz)) | ||
403 | |||
404 | static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { | ||
405 | NULL, gen_helper_sve_fsqrt_h, | ||
406 | -- | ||
407 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | When FPCR.AH is 1, use FPST_FPCR_AH for: | ||
2 | * AdvSIMD BFCVT, BFCVTN, BFCVTN2 | ||
3 | * SVE BFCVT, BFCVTNT | ||
1 | 4 | ||
5 | so that they get the required behaviour changes. | ||
6 | |||
7 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | --- | ||
10 | target/arm/tcg/translate-a64.c | 27 +++++++++++++++++++++------ | ||
11 | target/arm/tcg/translate-sve.c | 6 ++++-- | ||
12 | 2 files changed, 25 insertions(+), 8 deletions(-) | ||
13 | |||
14 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/arm/tcg/translate-a64.c | ||
17 | +++ b/target/arm/tcg/translate-a64.c | ||
18 | @@ -XXX,XX +XXX,XX @@ TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) | ||
19 | static const FPScalar1 f_scalar_bfcvt = { | ||
20 | .gen_s = gen_helper_bfcvt, | ||
21 | }; | ||
22 | -TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar, a, &f_scalar_bfcvt, -1) | ||
23 | +TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar_ah, a, &f_scalar_bfcvt, -1) | ||
24 | |||
25 | static const FPScalar1 f_scalar_frint32 = { | ||
26 | NULL, | ||
27 | @@ -XXX,XX +XXX,XX @@ static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n) | ||
28 | tcg_gen_extu_i32_i64(d, tmp); | ||
29 | } | ||
30 | |||
31 | -static ArithOneOp * const f_vector_bfcvtn[] = { | ||
32 | - NULL, | ||
33 | - gen_bfcvtn_hs, | ||
34 | - NULL, | ||
35 | +static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n) | ||
36 | +{ | ||
37 | + TCGv_ptr fpst = fpstatus_ptr(FPST_AH); | ||
38 | + TCGv_i32 tmp = tcg_temp_new_i32(); | ||
39 | + gen_helper_bfcvt_pair(tmp, n, fpst); | ||
40 | + tcg_gen_extu_i32_i64(d, tmp); | ||
41 | +} | ||
42 | + | ||
43 | +static ArithOneOp * const f_vector_bfcvtn[2][3] = { | ||
44 | + { | ||
45 | + NULL, | ||
46 | + gen_bfcvtn_hs, | ||
47 | + NULL, | ||
48 | + }, { | ||
49 | + NULL, | ||
50 | + gen_bfcvtn_ah_hs, | ||
51 | + NULL, | ||
52 | + } | ||
53 | }; | ||
54 | -TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, f_vector_bfcvtn) | ||
55 | +TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, | ||
56 | + f_vector_bfcvtn[s->fpcr_ah]) | ||
57 | |||
58 | static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a) | ||
59 | { | ||
60 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/target/arm/tcg/translate-sve.c | ||
63 | +++ b/target/arm/tcg/translate-sve.c | ||
64 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, | ||
65 | gen_helper_sve_fcvt_hs, a, 0, FPST_A64_F16) | ||
66 | |||
67 | TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, | ||
68 | - gen_helper_sve_bfcvt, a, 0, FPST_A64) | ||
69 | + gen_helper_sve_bfcvt, a, 0, | ||
70 | + s->fpcr_ah ? FPST_AH : FPST_A64) | ||
71 | |||
72 | TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, | ||
73 | gen_helper_sve_fcvt_dh, a, 0, FPST_A64) | ||
74 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, | ||
75 | gen_helper_sve2_fcvtnt_ds, a, 0, FPST_A64) | ||
76 | |||
77 | TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, | ||
78 | - gen_helper_sve_bfcvtnt, a, 0, FPST_A64) | ||
79 | + gen_helper_sve_bfcvtnt, a, 0, | ||
80 | + s->fpcr_ah ? FPST_AH : FPST_A64) | ||
81 | |||
82 | TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, | ||
83 | gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64) | ||
84 | -- | ||
85 | 2.34.1 | diff view generated by jsdifflib |
1 | In commit 6d2654ffacea813916176 we created the S1Translate struct and | 1 | When FPCR.AH is 1, use FPST_FPCR_AH for: |
---|---|---|---|
2 | used it to plumb through various arguments that we were previously | 2 | * AdvSIMD BFMLALB, BFMLALT |
3 | passing one-at-a-time to get_phys_addr_v5(), get_phys_addr_v6(), and | 3 | * SVE BFMLALB, BFMLALT, BFMLSLB, BFMLSLT |
4 | get_phys_addr_lpae(). Extend that pattern to get_phys_addr_pmsav5(), | ||
5 | get_phys_addr_pmsav7(), get_phys_addr_pmsav8() and | ||
6 | get_phys_addr_disabled(), so that all the get_phys_addr_* functions | ||
7 | we call from get_phys_addr_nogpc() take the S1Translate struct rather | ||
8 | than the mmu_idx and is_secure bool. | ||
9 | 4 | ||
10 | (This refactoring is a prelude to having the called functions look | 5 | so that they get the required behaviour changes. |
11 | at ptw->is_space rather than using an is_secure boolean.) | 6 | |
7 | We do this by making gen_gvec_op4_fpst() take an ARMFPStatusFlavour | ||
8 | rather than a bool is_fp16; existing callsites now select | ||
9 | FPST_FPCR_F16_A64 vs FPST_FPCR_A64 themselves rather than passing in | ||
10 | the boolean. | ||
12 | 11 | ||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
14 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
15 | Message-id: 20230807141514.19075-5-peter.maydell@linaro.org | ||
16 | --- | 14 | --- |
17 | target/arm/ptw.c | 57 ++++++++++++++++++++++++++++++------------------ | 15 | target/arm/tcg/translate-a64.c | 20 +++++++++++++------- |
18 | 1 file changed, 36 insertions(+), 21 deletions(-) | 16 | target/arm/tcg/translate-sve.c | 6 ++++-- |
17 | 2 files changed, 17 insertions(+), 9 deletions(-) | ||
19 | 18 | ||
20 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | 19 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
21 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/target/arm/ptw.c | 21 | --- a/target/arm/tcg/translate-a64.c |
23 | +++ b/target/arm/ptw.c | 22 | +++ b/target/arm/tcg/translate-a64.c |
24 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, | 23 | @@ -XXX,XX +XXX,XX @@ static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn, |
24 | * an out-of-line helper. | ||
25 | */ | ||
26 | static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, | ||
27 | - int rm, int ra, bool is_fp16, int data, | ||
28 | + int rm, int ra, ARMFPStatusFlavour fpsttype, | ||
29 | + int data, | ||
30 | gen_helper_gvec_4_ptr *fn) | ||
31 | { | ||
32 | - TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); | ||
33 | + TCGv_ptr fpst = fpstatus_ptr(fpsttype); | ||
34 | tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), | ||
35 | vec_full_reg_offset(s, rn), | ||
36 | vec_full_reg_offset(s, rm), | ||
37 | @@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) | ||
38 | } | ||
39 | if (fp_access_check(s)) { | ||
40 | /* Q bit selects BFMLALB vs BFMLALT. */ | ||
41 | - gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q, | ||
42 | + gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, | ||
43 | + s->fpcr_ah ? FPST_AH : FPST_A64, a->q, | ||
44 | gen_helper_gvec_bfmlal); | ||
45 | } | ||
46 | return true; | ||
47 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) | ||
48 | } | ||
49 | |||
50 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
51 | - a->esz == MO_16, a->rot, fn[a->esz]); | ||
52 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
53 | + a->rot, fn[a->esz]); | ||
25 | return true; | 54 | return true; |
26 | } | 55 | } |
27 | 56 | ||
28 | -static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address, | 57 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) |
29 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | 58 | } |
30 | - bool is_secure, GetPhysAddrResult *result, | 59 | |
31 | +static bool get_phys_addr_pmsav5(CPUARMState *env, | 60 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, |
32 | + S1Translate *ptw, | 61 | - esz == MO_16, (a->idx << 1) | neg, |
33 | + uint32_t address, | 62 | + esz == MO_16 ? FPST_A64_F16 : FPST_A64, |
34 | + MMUAccessType access_type, | 63 | + (a->idx << 1) | neg, |
35 | + GetPhysAddrResult *result, | 64 | fns[esz - 1]); |
36 | ARMMMUFaultInfo *fi) | 65 | return true; |
66 | } | ||
67 | @@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) | ||
68 | } | ||
69 | if (fp_access_check(s)) { | ||
70 | /* Q bit selects BFMLALB vs BFMLALT. */ | ||
71 | - gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0, | ||
72 | + gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, | ||
73 | + s->fpcr_ah ? FPST_AH : FPST_A64, | ||
74 | (a->idx << 1) | a->q, | ||
75 | gen_helper_gvec_bfmlal_idx); | ||
76 | } | ||
77 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) | ||
78 | } | ||
79 | if (fp_access_check(s)) { | ||
80 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
81 | - a->esz == MO_16, (a->idx << 2) | a->rot, fn); | ||
82 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
83 | + (a->idx << 2) | a->rot, fn); | ||
84 | } | ||
85 | return true; | ||
86 | } | ||
87 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
88 | index XXXXXXX..XXXXXXX 100644 | ||
89 | --- a/target/arm/tcg/translate-sve.c | ||
90 | +++ b/target/arm/tcg/translate-sve.c | ||
91 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_env_arg_zzzz, | ||
92 | static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) | ||
37 | { | 93 | { |
38 | int n; | 94 | return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, |
39 | uint32_t mask; | 95 | - a->rd, a->rn, a->rm, a->ra, sel, FPST_A64); |
40 | uint32_t base; | 96 | + a->rd, a->rn, a->rm, a->ra, sel, |
41 | + ARMMMUIdx mmu_idx = ptw->in_mmu_idx; | 97 | + s->fpcr_ah ? FPST_AH : FPST_A64); |
42 | bool is_user = regime_is_user(env, mmu_idx); | ||
43 | + bool is_secure = arm_space_is_secure(ptw->in_space); | ||
44 | |||
45 | if (regime_translation_disabled(env, mmu_idx, is_secure)) { | ||
46 | /* MPU disabled. */ | ||
47 | @@ -XXX,XX +XXX,XX @@ static bool pmsav7_use_background_region(ARMCPU *cpu, ARMMMUIdx mmu_idx, | ||
48 | return regime_sctlr(env, mmu_idx) & SCTLR_BR; | ||
49 | } | 98 | } |
50 | 99 | ||
51 | -static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, | 100 | TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) |
52 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | 101 | @@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) |
53 | - bool secure, GetPhysAddrResult *result, | ||
54 | +static bool get_phys_addr_pmsav7(CPUARMState *env, | ||
55 | + S1Translate *ptw, | ||
56 | + uint32_t address, | ||
57 | + MMUAccessType access_type, | ||
58 | + GetPhysAddrResult *result, | ||
59 | ARMMMUFaultInfo *fi) | ||
60 | { | 102 | { |
61 | ARMCPU *cpu = env_archcpu(env); | 103 | return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, |
62 | int n; | 104 | a->rd, a->rn, a->rm, a->ra, |
63 | + ARMMMUIdx mmu_idx = ptw->in_mmu_idx; | 105 | - (a->index << 1) | sel, FPST_A64); |
64 | bool is_user = regime_is_user(env, mmu_idx); | 106 | + (a->index << 1) | sel, |
65 | + bool secure = arm_space_is_secure(ptw->in_space); | 107 | + s->fpcr_ah ? FPST_AH : FPST_A64); |
66 | |||
67 | result->f.phys_addr = address; | ||
68 | result->f.lg_page_size = TARGET_PAGE_BITS; | ||
69 | @@ -XXX,XX +XXX,XX @@ void v8m_security_lookup(CPUARMState *env, uint32_t address, | ||
70 | } | ||
71 | } | 108 | } |
72 | 109 | ||
73 | -static bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address, | 110 | TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) |
74 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
75 | - bool secure, GetPhysAddrResult *result, | ||
76 | +static bool get_phys_addr_pmsav8(CPUARMState *env, | ||
77 | + S1Translate *ptw, | ||
78 | + uint32_t address, | ||
79 | + MMUAccessType access_type, | ||
80 | + GetPhysAddrResult *result, | ||
81 | ARMMMUFaultInfo *fi) | ||
82 | { | ||
83 | V8M_SAttributes sattrs = {}; | ||
84 | + ARMMMUIdx mmu_idx = ptw->in_mmu_idx; | ||
85 | + bool secure = arm_space_is_secure(ptw->in_space); | ||
86 | bool ret; | ||
87 | |||
88 | if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { | ||
89 | @@ -XXX,XX +XXX,XX @@ static ARMCacheAttrs combine_cacheattrs(uint64_t hcr, | ||
90 | * MMU disabled. S1 addresses within aa64 translation regimes are | ||
91 | * still checked for bounds -- see AArch64.S1DisabledOutput(). | ||
92 | */ | ||
93 | -static bool get_phys_addr_disabled(CPUARMState *env, target_ulong address, | ||
94 | +static bool get_phys_addr_disabled(CPUARMState *env, | ||
95 | + S1Translate *ptw, | ||
96 | + target_ulong address, | ||
97 | MMUAccessType access_type, | ||
98 | - ARMMMUIdx mmu_idx, bool is_secure, | ||
99 | GetPhysAddrResult *result, | ||
100 | ARMMMUFaultInfo *fi) | ||
101 | { | ||
102 | + ARMMMUIdx mmu_idx = ptw->in_mmu_idx; | ||
103 | + bool is_secure = arm_space_is_secure(ptw->in_space); | ||
104 | uint8_t memattr = 0x00; /* Device nGnRnE */ | ||
105 | uint8_t shareability = 0; /* non-shareable */ | ||
106 | int r_el; | ||
107 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, | ||
108 | case ARMMMUIdx_Phys_Root: | ||
109 | case ARMMMUIdx_Phys_Realm: | ||
110 | /* Checking Phys early avoids special casing later vs regime_el. */ | ||
111 | - return get_phys_addr_disabled(env, address, access_type, mmu_idx, | ||
112 | - is_secure, result, fi); | ||
113 | + return get_phys_addr_disabled(env, ptw, address, access_type, | ||
114 | + result, fi); | ||
115 | |||
116 | case ARMMMUIdx_Stage1_E0: | ||
117 | case ARMMMUIdx_Stage1_E1: | ||
118 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, | ||
119 | |||
120 | if (arm_feature(env, ARM_FEATURE_V8)) { | ||
121 | /* PMSAv8 */ | ||
122 | - ret = get_phys_addr_pmsav8(env, address, access_type, mmu_idx, | ||
123 | - is_secure, result, fi); | ||
124 | + ret = get_phys_addr_pmsav8(env, ptw, address, access_type, | ||
125 | + result, fi); | ||
126 | } else if (arm_feature(env, ARM_FEATURE_V7)) { | ||
127 | /* PMSAv7 */ | ||
128 | - ret = get_phys_addr_pmsav7(env, address, access_type, mmu_idx, | ||
129 | - is_secure, result, fi); | ||
130 | + ret = get_phys_addr_pmsav7(env, ptw, address, access_type, | ||
131 | + result, fi); | ||
132 | } else { | ||
133 | /* Pre-v7 MPU */ | ||
134 | - ret = get_phys_addr_pmsav5(env, address, access_type, mmu_idx, | ||
135 | - is_secure, result, fi); | ||
136 | + ret = get_phys_addr_pmsav5(env, ptw, address, access_type, | ||
137 | + result, fi); | ||
138 | } | ||
139 | qemu_log_mask(CPU_LOG_MMU, "PMSA MPU lookup for %s at 0x%08" PRIx32 | ||
140 | " mmu_idx %u -> %s (prot %c%c%c)\n", | ||
141 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, | ||
142 | /* Definitely a real MMU, not an MPU */ | ||
143 | |||
144 | if (regime_translation_disabled(env, mmu_idx, is_secure)) { | ||
145 | - return get_phys_addr_disabled(env, address, access_type, mmu_idx, | ||
146 | - is_secure, result, fi); | ||
147 | + return get_phys_addr_disabled(env, ptw, address, access_type, | ||
148 | + result, fi); | ||
149 | } | ||
150 | |||
151 | if (regime_using_lpae_format(env, mmu_idx)) { | ||
152 | -- | 111 | -- |
153 | 2.34.1 | 112 | 2.34.1 | diff view generated by jsdifflib |
1 | Pass an ARMSecuritySpace instead of a bool secure to | 1 | For FEAT_AFP, we want to emit different code when FPCR.NEP is set, so |
---|---|---|---|
2 | arm_is_el2_enabled_secstate(). This doesn't change behaviour. | 2 | that instead of zeroing the high elements of a vector register when |
3 | we write the output of a scalar operation to it, we instead merge in | ||
4 | those elements from one of the source registers. Since this affects | ||
5 | the generated code, we need to put FPCR.NEP into the TBFLAGS. | ||
6 | |||
7 | FPCR.NEP is treated as 0 when in streaming SVE mode and FEAT_SME_FA64 | ||
8 | is not implemented or not enabled; we can implement this logic in | ||
9 | rebuild_hflags_a64(). | ||
3 | 10 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Message-id: 20230807141514.19075-8-peter.maydell@linaro.org | ||
7 | --- | 13 | --- |
8 | target/arm/cpu.h | 13 ++++++++----- | 14 | target/arm/cpu.h | 1 + |
9 | target/arm/helper.c | 2 +- | 15 | target/arm/tcg/translate.h | 2 ++ |
10 | 2 files changed, 9 insertions(+), 6 deletions(-) | 16 | target/arm/tcg/hflags.c | 9 +++++++++ |
17 | target/arm/tcg/translate-a64.c | 1 + | ||
18 | 4 files changed, 13 insertions(+) | ||
11 | 19 | ||
12 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 20 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
13 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/target/arm/cpu.h | 22 | --- a/target/arm/cpu.h |
15 | +++ b/target/arm/cpu.h | 23 | +++ b/target/arm/cpu.h |
16 | @@ -XXX,XX +XXX,XX @@ static inline bool arm_is_secure(CPUARMState *env) | 24 | @@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1) |
25 | /* Set if FEAT_NV2 RAM accesses are big-endian */ | ||
26 | FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1) | ||
27 | FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */ | ||
28 | +FIELD(TBFLAG_A64, NEP, 38, 1) /* FPCR.NEP */ | ||
17 | 29 | ||
18 | /* | 30 | /* |
19 | * Return true if the current security state has AArch64 EL2 or AArch32 Hyp. | 31 | * Helpers for using the above. Note that only the A64 accessors use |
20 | - * This corresponds to the pseudocode EL2Enabled() | 32 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h |
21 | + * This corresponds to the pseudocode EL2Enabled(). | 33 | index XXXXXXX..XXXXXXX 100644 |
22 | */ | 34 | --- a/target/arm/tcg/translate.h |
23 | -static inline bool arm_is_el2_enabled_secstate(CPUARMState *env, bool secure) | 35 | +++ b/target/arm/tcg/translate.h |
24 | +static inline bool arm_is_el2_enabled_secstate(CPUARMState *env, | 36 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { |
25 | + ARMSecuritySpace space) | 37 | bool nv2_mem_be; |
26 | { | 38 | /* True if FPCR.AH is 1 (alternate floating point handling) */ |
27 | + assert(space != ARMSS_Root); | 39 | bool fpcr_ah; |
28 | return arm_feature(env, ARM_FEATURE_EL2) | 40 | + /* True if FPCR.NEP is 1 (FEAT_AFP scalar upper-element result handling) */ |
29 | - && (!secure || (env->cp15.scr_el3 & SCR_EEL2)); | 41 | + bool fpcr_nep; |
30 | + && (space != ARMSS_Secure || (env->cp15.scr_el3 & SCR_EEL2)); | 42 | /* |
43 | * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI. | ||
44 | * < 0, set by the current instruction. | ||
45 | diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/target/arm/tcg/hflags.c | ||
48 | +++ b/target/arm/tcg/hflags.c | ||
49 | @@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, | ||
50 | if (env->vfp.fpcr & FPCR_AH) { | ||
51 | DP_TBFLAG_A64(flags, AH, 1); | ||
52 | } | ||
53 | + if (env->vfp.fpcr & FPCR_NEP) { | ||
54 | + /* | ||
55 | + * In streaming-SVE without FA64, NEP behaves as if zero; | ||
56 | + * compare pseudocode IsMerging() | ||
57 | + */ | ||
58 | + if (!(EX_TBFLAG_A64(flags, PSTATE_SM) && !sme_fa64(env, el))) { | ||
59 | + DP_TBFLAG_A64(flags, NEP, 1); | ||
60 | + } | ||
61 | + } | ||
62 | |||
63 | return rebuild_hflags_common(env, fp_el, mmu_idx, flags); | ||
31 | } | 64 | } |
32 | 65 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | |
33 | static inline bool arm_is_el2_enabled(CPUARMState *env) | ||
34 | { | ||
35 | - return arm_is_el2_enabled_secstate(env, arm_is_secure_below_el3(env)); | ||
36 | + return arm_is_el2_enabled_secstate(env, arm_security_space_below_el3(env)); | ||
37 | } | ||
38 | |||
39 | #else | ||
40 | @@ -XXX,XX +XXX,XX @@ static inline bool arm_is_secure(CPUARMState *env) | ||
41 | return false; | ||
42 | } | ||
43 | |||
44 | -static inline bool arm_is_el2_enabled_secstate(CPUARMState *env, bool secure) | ||
45 | +static inline bool arm_is_el2_enabled_secstate(CPUARMState *env, | ||
46 | + ARMSecuritySpace space) | ||
47 | { | ||
48 | return false; | ||
49 | } | ||
50 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
51 | index XXXXXXX..XXXXXXX 100644 | 66 | index XXXXXXX..XXXXXXX 100644 |
52 | --- a/target/arm/helper.c | 67 | --- a/target/arm/tcg/translate-a64.c |
53 | +++ b/target/arm/helper.c | 68 | +++ b/target/arm/tcg/translate-a64.c |
54 | @@ -XXX,XX +XXX,XX @@ uint64_t arm_hcr_el2_eff_secstate(CPUARMState *env, ARMSecuritySpace space) | 69 | @@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, |
55 | 70 | dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); | |
56 | assert(space != ARMSS_Root); | 71 | dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); |
57 | 72 | dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); | |
58 | - if (!arm_is_el2_enabled_secstate(env, arm_space_is_secure(space))) { | 73 | + dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP); |
59 | + if (!arm_is_el2_enabled_secstate(env, space)) { | 74 | dc->vec_len = 0; |
60 | /* | 75 | dc->vec_stride = 0; |
61 | * "This register has no effect if EL2 is not enabled in the | 76 | dc->cp_regs = arm_cpu->cp_regs; |
62 | * current Security state". This is ARMv8.4-SecEL2 speak for | ||
63 | -- | 77 | -- |
64 | 2.34.1 | 78 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Chris Laplante <chris@laplante.io> | 1 | For FEAT_AFP's FPCR.NEP bit, we need to programmatically change the |
---|---|---|---|
2 | 2 | behaviour of the writeback of the result for most SIMD scalar | |
3 | Exercise the DETECT mechanism of the GPIO peripheral. | 3 | operations, so that instead of zeroing the upper part of the result |
4 | 4 | register it merges the upper elements from one of the input | |
5 | Signed-off-by: Chris Laplante <chris@laplante.io> | 5 | registers. |
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 6 | |
7 | Message-id: 20230728160324.1159090-7-chris@laplante.io | 7 | Provide new functions write_fp_*reg_merging() which can be used |
8 | [PMM: fixed coding style nits] | 8 | instead of the existing write_fp_*reg() functions when we want this |
9 | "merge the result with one of the input registers if FPCR.NEP is | ||
10 | enabled" handling, and use them in do_fp3_scalar_with_fpsttype(). | ||
11 | |||
12 | Note that (as documented in the description of the FPCR.NEP bit) | ||
13 | which input register to use as the merge source varies by | ||
14 | instruction: for these 2-input scalar operations, the comparison | ||
15 | instructions take from Rm, not Rn. | ||
16 | |||
17 | We'll extend this to also provide the merging behaviour for | ||
18 | the remaining scalar insns in subsequent commits. | ||
19 | |||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
21 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | --- | 22 | --- |
11 | tests/qtest/microbit-test.c | 44 +++++++++++++++++++++++++++++++++++++ | 23 | target/arm/tcg/translate-a64.c | 117 +++++++++++++++++++++++++-------- |
12 | 1 file changed, 44 insertions(+) | 24 | 1 file changed, 91 insertions(+), 26 deletions(-) |
13 | 25 | ||
14 | diff --git a/tests/qtest/microbit-test.c b/tests/qtest/microbit-test.c | 26 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
15 | index XXXXXXX..XXXXXXX 100644 | 27 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/tests/qtest/microbit-test.c | 28 | --- a/target/arm/tcg/translate-a64.c |
17 | +++ b/tests/qtest/microbit-test.c | 29 | +++ b/target/arm/tcg/translate-a64.c |
18 | @@ -XXX,XX +XXX,XX @@ static void test_nrf51_gpio(void) | 30 | @@ -XXX,XX +XXX,XX @@ static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) |
19 | qtest_quit(qts); | 31 | write_fp_dreg(s, reg, tmp); |
20 | } | 32 | } |
21 | 33 | ||
22 | +static void test_nrf51_gpio_detect(void) | 34 | +/* |
35 | + * Write a double result to 128 bit vector register reg, honouring FPCR.NEP: | ||
36 | + * - if FPCR.NEP == 0, clear the high elements of reg | ||
37 | + * - if FPCR.NEP == 1, set the high elements of reg from mergereg | ||
38 | + * (i.e. merge the result with those high elements) | ||
39 | + * In either case, SVE register bits above 128 are zeroed (per R_WKYLB). | ||
40 | + */ | ||
41 | +static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg, | ||
42 | + TCGv_i64 v) | ||
23 | +{ | 43 | +{ |
24 | + QTestState *qts = qtest_init("-M microbit"); | 44 | + if (!s->fpcr_nep) { |
25 | + int i; | 45 | + write_fp_dreg(s, reg, v); |
26 | + | 46 | + return; |
27 | + /* Connect input buffer on pins 1-7, configure SENSE for high level */ | ||
28 | + for (i = 1; i <= 7; i++) { | ||
29 | + qtest_writel(qts, NRF51_GPIO_BASE + NRF51_GPIO_REG_CNF_START + i * 4, | ||
30 | + deposit32(0, 16, 2, 2)); | ||
31 | + } | 47 | + } |
32 | + | 48 | + |
33 | + qtest_irq_intercept_out_named(qts, "/machine/nrf51/gpio", "detect"); | 49 | + /* |
34 | + | 50 | + * Move from mergereg to reg; this sets the high elements and |
35 | + for (i = 1; i <= 7; i++) { | 51 | + * clears the bits above 128 as a side effect. |
36 | + /* Set pin high */ | 52 | + */ |
37 | + qtest_set_irq_in(qts, "/machine/nrf51", "unnamed-gpio-in", i, 1); | 53 | + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), |
38 | + uint32_t actual = qtest_readl(qts, NRF51_GPIO_BASE + NRF51_GPIO_REG_IN); | 54 | + vec_full_reg_offset(s, mergereg), |
39 | + g_assert_cmpuint(actual, ==, 1 << i); | 55 | + 16, vec_full_reg_size(s)); |
40 | + | 56 | + tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg)); |
41 | + /* Check that DETECT is high */ | 57 | +} |
42 | + g_assert_true(qtest_get_irq(qts, 0)); | 58 | + |
43 | + | 59 | +/* |
44 | + /* Set pin low, check that DETECT goes low. */ | 60 | + * Write a single-prec result, but only clear the higher elements |
45 | + qtest_set_irq_in(qts, "/machine/nrf51", "unnamed-gpio-in", i, 0); | 61 | + * of the destination register if FPCR.NEP is 0; otherwise preserve them. |
46 | + actual = qtest_readl(qts, NRF51_GPIO_BASE + NRF51_GPIO_REG_IN); | 62 | + */ |
47 | + g_assert_cmpuint(actual, ==, 0x0); | 63 | +static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg, |
48 | + g_assert_false(qtest_get_irq(qts, 0)); | 64 | + TCGv_i32 v) |
65 | +{ | ||
66 | + if (!s->fpcr_nep) { | ||
67 | + write_fp_sreg(s, reg, v); | ||
68 | + return; | ||
49 | + } | 69 | + } |
50 | + | 70 | + |
51 | + /* Set pin 0 high, check that DETECT doesn't fire */ | 71 | + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), |
52 | + qtest_set_irq_in(qts, "/machine/nrf51", "unnamed-gpio-in", 0, 1); | 72 | + vec_full_reg_offset(s, mergereg), |
53 | + g_assert_false(qtest_get_irq(qts, 0)); | 73 | + 16, vec_full_reg_size(s)); |
54 | + qtest_set_irq_in(qts, "/machine/nrf51", "unnamed-gpio-in", 0, 0); | 74 | + tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); |
55 | + | 75 | +} |
56 | + /* Set pins 1, 2, and 3 high, then set 3 low. Check DETECT is still high */ | 76 | + |
57 | + for (i = 1; i <= 3; i++) { | 77 | +/* |
58 | + qtest_set_irq_in(qts, "/machine/nrf51", "unnamed-gpio-in", i, 1); | 78 | + * Write a half-prec result, but only clear the higher elements |
79 | + * of the destination register if FPCR.NEP is 0; otherwise preserve them. | ||
80 | + * The caller must ensure that the top 16 bits of v are zero. | ||
81 | + */ | ||
82 | +static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg, | ||
83 | + TCGv_i32 v) | ||
84 | +{ | ||
85 | + if (!s->fpcr_nep) { | ||
86 | + write_fp_sreg(s, reg, v); | ||
87 | + return; | ||
59 | + } | 88 | + } |
60 | + g_assert_true(qtest_get_irq(qts, 0)); | 89 | + |
61 | + qtest_set_irq_in(qts, "/machine/nrf51", "unnamed-gpio-in", 3, 0); | 90 | + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), |
62 | + g_assert_true(qtest_get_irq(qts, 0)); | 91 | + vec_full_reg_offset(s, mergereg), |
92 | + 16, vec_full_reg_size(s)); | ||
93 | + tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); | ||
63 | +} | 94 | +} |
64 | + | 95 | + |
65 | static void timer_task(QTestState *qts, hwaddr task) | 96 | /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ |
66 | { | 97 | static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, |
67 | qtest_writel(qts, NRF51_TIMER_BASE + task, NRF51_TRIGGER_TASK); | 98 | GVecGen2Fn *gvec_fn, int vece) |
68 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | 99 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar { |
69 | 100 | } FPScalar; | |
70 | qtest_add_func("/microbit/nrf51/uart", test_nrf51_uart); | 101 | |
71 | qtest_add_func("/microbit/nrf51/gpio", test_nrf51_gpio); | 102 | static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, |
72 | + qtest_add_func("/microbit/nrf51/gpio_detect", test_nrf51_gpio_detect); | 103 | - const FPScalar *f, |
73 | qtest_add_func("/microbit/nrf51/nvmc", test_nrf51_nvmc); | 104 | + const FPScalar *f, int mergereg, |
74 | qtest_add_func("/microbit/nrf51/timer", test_nrf51_timer); | 105 | ARMFPStatusFlavour fpsttype) |
75 | qtest_add_func("/microbit/microbit/i2c", test_microbit_i2c); | 106 | { |
107 | switch (a->esz) { | ||
108 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
109 | TCGv_i64 t0 = read_fp_dreg(s, a->rn); | ||
110 | TCGv_i64 t1 = read_fp_dreg(s, a->rm); | ||
111 | f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
112 | - write_fp_dreg(s, a->rd, t0); | ||
113 | + write_fp_dreg_merging(s, a->rd, mergereg, t0); | ||
114 | } | ||
115 | break; | ||
116 | case MO_32: | ||
117 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
118 | TCGv_i32 t0 = read_fp_sreg(s, a->rn); | ||
119 | TCGv_i32 t1 = read_fp_sreg(s, a->rm); | ||
120 | f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
121 | - write_fp_sreg(s, a->rd, t0); | ||
122 | + write_fp_sreg_merging(s, a->rd, mergereg, t0); | ||
123 | } | ||
124 | break; | ||
125 | case MO_16: | ||
126 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
127 | TCGv_i32 t0 = read_fp_hreg(s, a->rn); | ||
128 | TCGv_i32 t1 = read_fp_hreg(s, a->rm); | ||
129 | f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
130 | - write_fp_sreg(s, a->rd, t0); | ||
131 | + write_fp_hreg_merging(s, a->rd, mergereg, t0); | ||
132 | } | ||
133 | break; | ||
134 | default: | ||
135 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
136 | return true; | ||
137 | } | ||
138 | |||
139 | -static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
140 | +static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
141 | + int mergereg) | ||
142 | { | ||
143 | - return do_fp3_scalar_with_fpsttype(s, a, f, | ||
144 | + return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, | ||
145 | a->esz == MO_16 ? | ||
146 | FPST_A64_F16 : FPST_A64); | ||
147 | } | ||
148 | |||
149 | -static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
150 | +static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
151 | + int mergereg) | ||
152 | { | ||
153 | - return do_fp3_scalar_with_fpsttype(s, a, f, select_ah_fpst(s, a->esz)); | ||
154 | + return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, | ||
155 | + select_ah_fpst(s, a->esz)); | ||
156 | } | ||
157 | |||
158 | static const FPScalar f_scalar_fadd = { | ||
159 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fadd = { | ||
160 | gen_helper_vfp_adds, | ||
161 | gen_helper_vfp_addd, | ||
162 | }; | ||
163 | -TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd) | ||
164 | +TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn) | ||
165 | |||
166 | static const FPScalar f_scalar_fsub = { | ||
167 | gen_helper_vfp_subh, | ||
168 | gen_helper_vfp_subs, | ||
169 | gen_helper_vfp_subd, | ||
170 | }; | ||
171 | -TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub) | ||
172 | +TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn) | ||
173 | |||
174 | static const FPScalar f_scalar_fdiv = { | ||
175 | gen_helper_vfp_divh, | ||
176 | gen_helper_vfp_divs, | ||
177 | gen_helper_vfp_divd, | ||
178 | }; | ||
179 | -TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv) | ||
180 | +TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn) | ||
181 | |||
182 | static const FPScalar f_scalar_fmul = { | ||
183 | gen_helper_vfp_mulh, | ||
184 | gen_helper_vfp_muls, | ||
185 | gen_helper_vfp_muld, | ||
186 | }; | ||
187 | -TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul) | ||
188 | +TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn) | ||
189 | |||
190 | static const FPScalar f_scalar_fmax = { | ||
191 | gen_helper_vfp_maxh, | ||
192 | gen_helper_vfp_maxs, | ||
193 | gen_helper_vfp_maxd, | ||
194 | }; | ||
195 | -TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax) | ||
196 | +TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax, a->rn) | ||
197 | |||
198 | static const FPScalar f_scalar_fmin = { | ||
199 | gen_helper_vfp_minh, | ||
200 | gen_helper_vfp_mins, | ||
201 | gen_helper_vfp_mind, | ||
202 | }; | ||
203 | -TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin) | ||
204 | +TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin, a->rn) | ||
205 | |||
206 | static const FPScalar f_scalar_fmaxnm = { | ||
207 | gen_helper_vfp_maxnumh, | ||
208 | gen_helper_vfp_maxnums, | ||
209 | gen_helper_vfp_maxnumd, | ||
210 | }; | ||
211 | -TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm) | ||
212 | +TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn) | ||
213 | |||
214 | static const FPScalar f_scalar_fminnm = { | ||
215 | gen_helper_vfp_minnumh, | ||
216 | gen_helper_vfp_minnums, | ||
217 | gen_helper_vfp_minnumd, | ||
218 | }; | ||
219 | -TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm) | ||
220 | +TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn) | ||
221 | |||
222 | static const FPScalar f_scalar_fmulx = { | ||
223 | gen_helper_advsimd_mulxh, | ||
224 | gen_helper_vfp_mulxs, | ||
225 | gen_helper_vfp_mulxd, | ||
226 | }; | ||
227 | -TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx) | ||
228 | +TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn) | ||
229 | |||
230 | static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
231 | { | ||
232 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fnmul = { | ||
233 | gen_fnmul_s, | ||
234 | gen_fnmul_d, | ||
235 | }; | ||
236 | -TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul) | ||
237 | +TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul, a->rn) | ||
238 | |||
239 | static const FPScalar f_scalar_fcmeq = { | ||
240 | gen_helper_advsimd_ceq_f16, | ||
241 | gen_helper_neon_ceq_f32, | ||
242 | gen_helper_neon_ceq_f64, | ||
243 | }; | ||
244 | -TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq) | ||
245 | +TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm) | ||
246 | |||
247 | static const FPScalar f_scalar_fcmge = { | ||
248 | gen_helper_advsimd_cge_f16, | ||
249 | gen_helper_neon_cge_f32, | ||
250 | gen_helper_neon_cge_f64, | ||
251 | }; | ||
252 | -TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge) | ||
253 | +TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm) | ||
254 | |||
255 | static const FPScalar f_scalar_fcmgt = { | ||
256 | gen_helper_advsimd_cgt_f16, | ||
257 | gen_helper_neon_cgt_f32, | ||
258 | gen_helper_neon_cgt_f64, | ||
259 | }; | ||
260 | -TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt) | ||
261 | +TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm) | ||
262 | |||
263 | static const FPScalar f_scalar_facge = { | ||
264 | gen_helper_advsimd_acge_f16, | ||
265 | gen_helper_neon_acge_f32, | ||
266 | gen_helper_neon_acge_f64, | ||
267 | }; | ||
268 | -TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge) | ||
269 | +TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm) | ||
270 | |||
271 | static const FPScalar f_scalar_facgt = { | ||
272 | gen_helper_advsimd_acgt_f16, | ||
273 | gen_helper_neon_acgt_f32, | ||
274 | gen_helper_neon_acgt_f64, | ||
275 | }; | ||
276 | -TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt) | ||
277 | +TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm) | ||
278 | |||
279 | static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
280 | { | ||
281 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fabd = { | ||
282 | gen_fabd_s, | ||
283 | gen_fabd_d, | ||
284 | }; | ||
285 | -TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd) | ||
286 | +TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd, a->rn) | ||
287 | |||
288 | static const FPScalar f_scalar_frecps = { | ||
289 | gen_helper_recpsf_f16, | ||
290 | gen_helper_recpsf_f32, | ||
291 | gen_helper_recpsf_f64, | ||
292 | }; | ||
293 | -TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps) | ||
294 | +TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps, a->rn) | ||
295 | |||
296 | static const FPScalar f_scalar_frsqrts = { | ||
297 | gen_helper_rsqrtsf_f16, | ||
298 | gen_helper_rsqrtsf_f32, | ||
299 | gen_helper_rsqrtsf_f64, | ||
300 | }; | ||
301 | -TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts) | ||
302 | +TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts, a->rn) | ||
303 | |||
304 | static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, | ||
305 | const FPScalar *f, bool swap) | ||
76 | -- | 306 | -- |
77 | 2.34.1 | 307 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle FPCR.NEP for the 3-input scalar operations which use | ||
2 | do_fmla_scalar_idx() and do_fmadd(), by making them call the | ||
3 | appropriate write_fp_*reg_merging() functions. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/translate-a64.c | 12 ++++++------ | ||
9 | 1 file changed, 6 insertions(+), 6 deletions(-) | ||
10 | |||
11 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/target/arm/tcg/translate-a64.c | ||
14 | +++ b/target/arm/tcg/translate-a64.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
16 | gen_vfp_negd(t1, t1); | ||
17 | } | ||
18 | gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | ||
19 | - write_fp_dreg(s, a->rd, t0); | ||
20 | + write_fp_dreg_merging(s, a->rd, a->rd, t0); | ||
21 | } | ||
22 | break; | ||
23 | case MO_32: | ||
24 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
25 | gen_vfp_negs(t1, t1); | ||
26 | } | ||
27 | gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | ||
28 | - write_fp_sreg(s, a->rd, t0); | ||
29 | + write_fp_sreg_merging(s, a->rd, a->rd, t0); | ||
30 | } | ||
31 | break; | ||
32 | case MO_16: | ||
33 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
34 | } | ||
35 | gen_helper_advsimd_muladdh(t0, t1, t2, t0, | ||
36 | fpstatus_ptr(FPST_A64_F16)); | ||
37 | - write_fp_sreg(s, a->rd, t0); | ||
38 | + write_fp_hreg_merging(s, a->rd, a->rd, t0); | ||
39 | } | ||
40 | break; | ||
41 | default: | ||
42 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
43 | } | ||
44 | fpst = fpstatus_ptr(FPST_A64); | ||
45 | gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); | ||
46 | - write_fp_dreg(s, a->rd, ta); | ||
47 | + write_fp_dreg_merging(s, a->rd, a->ra, ta); | ||
48 | } | ||
49 | break; | ||
50 | |||
51 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
52 | } | ||
53 | fpst = fpstatus_ptr(FPST_A64); | ||
54 | gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); | ||
55 | - write_fp_sreg(s, a->rd, ta); | ||
56 | + write_fp_sreg_merging(s, a->rd, a->ra, ta); | ||
57 | } | ||
58 | break; | ||
59 | |||
60 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
61 | } | ||
62 | fpst = fpstatus_ptr(FPST_A64_F16); | ||
63 | gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); | ||
64 | - write_fp_sreg(s, a->rd, ta); | ||
65 | + write_fp_hreg_merging(s, a->rd, a->ra, ta); | ||
66 | } | ||
67 | break; | ||
68 | |||
69 | -- | ||
70 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Currently we implement BFCVT scalar via do_fp1_scalar(). This works | ||
2 | even though BFCVT is a narrowing operation from 32 to 16 bits, | ||
3 | because we can use write_fp_sreg() for float16. However, FPCR.NEP | ||
4 | support requires that we use write_fp_hreg_merging() for float16 | ||
5 | outputs, so we can't continue to borrow the non-narrowing | ||
6 | do_fp1_scalar() function for this. Split out trans_BFCVT_s() | ||
7 | into its own implementation that honours FPCR.NEP. | ||
1 | 8 | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | --- | ||
12 | target/arm/tcg/translate-a64.c | 25 +++++++++++++++++++++---- | ||
13 | 1 file changed, 21 insertions(+), 4 deletions(-) | ||
14 | |||
15 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/arm/tcg/translate-a64.c | ||
18 | +++ b/target/arm/tcg/translate-a64.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frintx = { | ||
20 | }; | ||
21 | TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) | ||
22 | |||
23 | -static const FPScalar1 f_scalar_bfcvt = { | ||
24 | - .gen_s = gen_helper_bfcvt, | ||
25 | -}; | ||
26 | -TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar_ah, a, &f_scalar_bfcvt, -1) | ||
27 | +static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a) | ||
28 | +{ | ||
29 | + ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64; | ||
30 | + TCGv_i32 t32; | ||
31 | + int check; | ||
32 | + | ||
33 | + if (!dc_isar_feature(aa64_bf16, s)) { | ||
34 | + return false; | ||
35 | + } | ||
36 | + | ||
37 | + check = fp_access_check_scalar_hsd(s, a->esz); | ||
38 | + | ||
39 | + if (check <= 0) { | ||
40 | + return check == 0; | ||
41 | + } | ||
42 | + | ||
43 | + t32 = read_fp_sreg(s, a->rn); | ||
44 | + gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype)); | ||
45 | + write_fp_hreg_merging(s, a->rd, a->rd, t32); | ||
46 | + return true; | ||
47 | +} | ||
48 | |||
49 | static const FPScalar1 f_scalar_frint32 = { | ||
50 | NULL, | ||
51 | -- | ||
52 | 2.34.1 | diff view generated by jsdifflib |
1 | When we report faults due to stage 2 faults during a stage 1 | 1 | Handle FPCR.NEP for the 1-input scalar operations. |
---|---|---|---|
2 | page table walk, the 'level' parameter should be the level | ||
3 | of the walk in stage 2 that faulted, not the level of the | ||
4 | walk in stage 1. Correct the reporting of these faults. | ||
5 | 2 | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 3 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
8 | Message-id: 20230807141514.19075-15-peter.maydell@linaro.org | ||
9 | --- | 5 | --- |
10 | target/arm/ptw.c | 10 +++++++--- | 6 | target/arm/tcg/translate-a64.c | 26 ++++++++++++++------------ |
11 | 1 file changed, 7 insertions(+), 3 deletions(-) | 7 | 1 file changed, 14 insertions(+), 12 deletions(-) |
12 | 8 | ||
13 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | 9 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
14 | index XXXXXXX..XXXXXXX 100644 | 10 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/arm/ptw.c | 11 | --- a/target/arm/tcg/translate-a64.c |
16 | +++ b/target/arm/ptw.c | 12 | +++ b/target/arm/tcg/translate-a64.c |
17 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, | 13 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a, |
18 | do_translation_fault: | 14 | case MO_64: |
19 | fi->type = ARMFault_Translation; | 15 | t64 = read_fp_dreg(s, a->rn); |
20 | do_fault: | 16 | f->gen_d(t64, t64, fpst); |
21 | - fi->level = level; | 17 | - write_fp_dreg(s, a->rd, t64); |
22 | - /* Tag the error as S2 for failed S1 PTW at S2 or ordinary S2. */ | 18 | + write_fp_dreg_merging(s, a->rd, a->rd, t64); |
23 | - fi->stage2 = fi->s1ptw || regime_is_stage2(mmu_idx); | 19 | break; |
24 | + if (fi->s1ptw) { | 20 | case MO_32: |
25 | + /* Retain the existing stage 2 fi->level */ | 21 | t32 = read_fp_sreg(s, a->rn); |
26 | + assert(fi->stage2); | 22 | f->gen_s(t32, t32, fpst); |
27 | + } else { | 23 | - write_fp_sreg(s, a->rd, t32); |
28 | + fi->level = level; | 24 | + write_fp_sreg_merging(s, a->rd, a->rd, t32); |
29 | + fi->stage2 = regime_is_stage2(mmu_idx); | 25 | break; |
26 | case MO_16: | ||
27 | t32 = read_fp_hreg(s, a->rn); | ||
28 | f->gen_h(t32, t32, fpst); | ||
29 | - write_fp_sreg(s, a->rd, t32); | ||
30 | + write_fp_hreg_merging(s, a->rd, a->rd, t32); | ||
31 | break; | ||
32 | default: | ||
33 | g_assert_not_reached(); | ||
34 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) | ||
35 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
36 | |||
37 | gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst); | ||
38 | - write_fp_dreg(s, a->rd, tcg_rd); | ||
39 | + write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); | ||
40 | } | ||
41 | return true; | ||
42 | } | ||
43 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a) | ||
44 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
45 | |||
46 | gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); | ||
47 | - /* write_fp_sreg is OK here because top half of result is zero */ | ||
48 | - write_fp_sreg(s, a->rd, tmp); | ||
49 | + /* write_fp_hreg_merging is OK here because top half of result is zero */ | ||
50 | + write_fp_hreg_merging(s, a->rd, a->rd, tmp); | ||
51 | } | ||
52 | return true; | ||
53 | } | ||
54 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a) | ||
55 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
56 | |||
57 | gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst); | ||
58 | - write_fp_sreg(s, a->rd, tcg_rd); | ||
59 | + write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); | ||
60 | } | ||
61 | return true; | ||
62 | } | ||
63 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a) | ||
64 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
65 | |||
66 | gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); | ||
67 | - /* write_fp_sreg is OK here because top half of tcg_rd is zero */ | ||
68 | - write_fp_sreg(s, a->rd, tcg_rd); | ||
69 | + /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */ | ||
70 | + write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd); | ||
71 | } | ||
72 | return true; | ||
73 | } | ||
74 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a) | ||
75 | TCGv_i32 tcg_ahp = get_ahp_flag(); | ||
76 | |||
77 | gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); | ||
78 | - write_fp_sreg(s, a->rd, tcg_rd); | ||
79 | + write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); | ||
80 | } | ||
81 | return true; | ||
82 | } | ||
83 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a) | ||
84 | TCGv_i32 tcg_ahp = get_ahp_flag(); | ||
85 | |||
86 | gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); | ||
87 | - write_fp_dreg(s, a->rd, tcg_rd); | ||
88 | + write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); | ||
89 | } | ||
90 | return true; | ||
91 | } | ||
92 | @@ -XXX,XX +XXX,XX @@ static bool do_fcvt_f(DisasContext *s, arg_fcvt *a, | ||
93 | do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0), | ||
94 | a->esz, tcg_int, a->shift, a->rn, rmode); | ||
95 | |||
96 | - clear_vec(s, a->rd); | ||
97 | + if (!s->fpcr_nep) { | ||
98 | + clear_vec(s, a->rd); | ||
30 | + } | 99 | + } |
31 | fi->s1ns = fault_s1ns(ptw->in_space, mmu_idx); | 100 | write_vec_element(s, tcg_int, a->rd, 0, a->esz); |
32 | return true; | 101 | return true; |
33 | } | 102 | } |
34 | -- | 103 | -- |
35 | 2.34.1 | 104 | 2.34.1 | diff view generated by jsdifflib |
1 | Replace the last uses of ptw->in_secure with appropriate | 1 | Handle FPCR.NEP in the operations handled by do_cvtf_scalar(). |
---|---|---|---|
2 | checks on ptw->in_space. | ||
3 | 2 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 3 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Message-id: 20230807141514.19075-10-peter.maydell@linaro.org | ||
7 | --- | 5 | --- |
8 | target/arm/ptw.c | 11 +++++++---- | 6 | target/arm/tcg/translate-a64.c | 6 +++--- |
9 | 1 file changed, 7 insertions(+), 4 deletions(-) | 7 | 1 file changed, 3 insertions(+), 3 deletions(-) |
10 | 8 | ||
11 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | 9 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
12 | index XXXXXXX..XXXXXXX 100644 | 10 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/target/arm/ptw.c | 11 | --- a/target/arm/tcg/translate-a64.c |
14 | +++ b/target/arm/ptw.c | 12 | +++ b/target/arm/tcg/translate-a64.c |
15 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, | 13 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, |
16 | ARMMMUFaultInfo *fi) | 14 | } else { |
17 | { | 15 | gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); |
18 | ARMMMUIdx mmu_idx = ptw->in_mmu_idx; | 16 | } |
19 | - bool is_secure = ptw->in_secure; | 17 | - write_fp_dreg(s, rd, tcg_double); |
20 | ARMMMUIdx s1_mmu_idx; | 18 | + write_fp_dreg_merging(s, rd, rd, tcg_double); |
21 | |||
22 | /* | ||
23 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, | ||
24 | * cannot upgrade a NonSecure translation regime's attributes | ||
25 | * to Secure or Realm. | ||
26 | */ | ||
27 | - result->f.attrs.secure = is_secure; | ||
28 | result->f.attrs.space = ptw->in_space; | ||
29 | + result->f.attrs.secure = arm_space_is_secure(ptw->in_space); | ||
30 | |||
31 | switch (mmu_idx) { | ||
32 | case ARMMMUIdx_Phys_S: | ||
33 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, | ||
34 | case ARMMMUIdx_Stage1_E0: | ||
35 | case ARMMMUIdx_Stage1_E1: | ||
36 | case ARMMMUIdx_Stage1_E1_PAN: | ||
37 | - /* First stage lookup uses second stage for ptw. */ | ||
38 | - ptw->in_ptw_idx = is_secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2; | ||
39 | + /* | ||
40 | + * First stage lookup uses second stage for ptw; only | ||
41 | + * Secure has both S and NS IPA and starts with Stage2_S. | ||
42 | + */ | ||
43 | + ptw->in_ptw_idx = (ptw->in_space == ARMSS_Secure) ? | ||
44 | + ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2; | ||
45 | break; | 19 | break; |
46 | 20 | ||
47 | case ARMMMUIdx_Stage2: | 21 | case MO_32: |
22 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, | ||
23 | } else { | ||
24 | gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); | ||
25 | } | ||
26 | - write_fp_sreg(s, rd, tcg_single); | ||
27 | + write_fp_sreg_merging(s, rd, rd, tcg_single); | ||
28 | break; | ||
29 | |||
30 | case MO_16: | ||
31 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, | ||
32 | } else { | ||
33 | gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); | ||
34 | } | ||
35 | - write_fp_sreg(s, rd, tcg_single); | ||
36 | + write_fp_hreg_merging(s, rd, rd, tcg_single); | ||
37 | break; | ||
38 | |||
39 | default: | ||
48 | -- | 40 | -- |
49 | 2.34.1 | 41 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle FPCR.NEP merging for scalar FABS and FNEG; this requires | ||
2 | an extra parameter to do_fp1_scalar_int(), since FMOV scalar | ||
3 | does not have the merging behaviour. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/translate-a64.c | 27 ++++++++++++++++++++------- | ||
9 | 1 file changed, 20 insertions(+), 7 deletions(-) | ||
10 | |||
11 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/target/arm/tcg/translate-a64.c | ||
14 | +++ b/target/arm/tcg/translate-a64.c | ||
15 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar1Int { | ||
16 | } FPScalar1Int; | ||
17 | |||
18 | static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, | ||
19 | - const FPScalar1Int *f) | ||
20 | + const FPScalar1Int *f, | ||
21 | + bool merging) | ||
22 | { | ||
23 | switch (a->esz) { | ||
24 | case MO_64: | ||
25 | if (fp_access_check(s)) { | ||
26 | TCGv_i64 t = read_fp_dreg(s, a->rn); | ||
27 | f->gen_d(t, t); | ||
28 | - write_fp_dreg(s, a->rd, t); | ||
29 | + if (merging) { | ||
30 | + write_fp_dreg_merging(s, a->rd, a->rd, t); | ||
31 | + } else { | ||
32 | + write_fp_dreg(s, a->rd, t); | ||
33 | + } | ||
34 | } | ||
35 | break; | ||
36 | case MO_32: | ||
37 | if (fp_access_check(s)) { | ||
38 | TCGv_i32 t = read_fp_sreg(s, a->rn); | ||
39 | f->gen_s(t, t); | ||
40 | - write_fp_sreg(s, a->rd, t); | ||
41 | + if (merging) { | ||
42 | + write_fp_sreg_merging(s, a->rd, a->rd, t); | ||
43 | + } else { | ||
44 | + write_fp_sreg(s, a->rd, t); | ||
45 | + } | ||
46 | } | ||
47 | break; | ||
48 | case MO_16: | ||
49 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, | ||
50 | if (fp_access_check(s)) { | ||
51 | TCGv_i32 t = read_fp_hreg(s, a->rn); | ||
52 | f->gen_h(t, t); | ||
53 | - write_fp_sreg(s, a->rd, t); | ||
54 | + if (merging) { | ||
55 | + write_fp_hreg_merging(s, a->rd, a->rd, t); | ||
56 | + } else { | ||
57 | + write_fp_sreg(s, a->rd, t); | ||
58 | + } | ||
59 | } | ||
60 | break; | ||
61 | default: | ||
62 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fmov = { | ||
63 | tcg_gen_mov_i32, | ||
64 | tcg_gen_mov_i64, | ||
65 | }; | ||
66 | -TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov) | ||
67 | +TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false) | ||
68 | |||
69 | static const FPScalar1Int f_scalar_fabs = { | ||
70 | gen_vfp_absh, | ||
71 | gen_vfp_abss, | ||
72 | gen_vfp_absd, | ||
73 | }; | ||
74 | -TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs) | ||
75 | +TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs, true) | ||
76 | |||
77 | static const FPScalar1Int f_scalar_fneg = { | ||
78 | gen_vfp_negh, | ||
79 | gen_vfp_negs, | ||
80 | gen_vfp_negd, | ||
81 | }; | ||
82 | -TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg) | ||
83 | +TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg, true) | ||
84 | |||
85 | typedef struct FPScalar1 { | ||
86 | void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); | ||
87 | -- | ||
88 | 2.34.1 | diff view generated by jsdifflib |
1 | The PAR_EL1.SH field documents that for the cases of: | 1 | Unlike the other users of do_2misc_narrow_scalar(), FCVTXN (scalar) |
---|---|---|---|
2 | * Device memory | 2 | is always double-to-single and must honour FPCR.NEP. Implement this |
3 | * Normal memory with both Inner and Outer Non-Cacheable | 3 | directly in a trans function rather than using |
4 | the field should be 0b10 rather than whatever was in the | 4 | do_2misc_narrow_scalar(). |
5 | translation table descriptor field. (In the pseudocode this | 5 | |
6 | is handled by PAREncodeShareability().) Perform this | 6 | We still need gen_fcvtxn_sd() and the f_scalar_fcvtxn[] array for |
7 | adjustment when assembling a PAR value. | 7 | the FCVTXN (vector) insn, so we move those down in the file to |
8 | where they are used. | ||
8 | 9 | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
11 | Message-id: 20230807141514.19075-16-peter.maydell@linaro.org | ||
12 | --- | 12 | --- |
13 | target/arm/helper.c | 15 ++++++++++++++- | 13 | target/arm/tcg/translate-a64.c | 43 ++++++++++++++++++++++------------ |
14 | 1 file changed, 14 insertions(+), 1 deletion(-) | 14 | 1 file changed, 28 insertions(+), 15 deletions(-) |
15 | 15 | ||
16 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 16 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
17 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/target/arm/helper.c | 18 | --- a/target/arm/tcg/translate-a64.c |
19 | +++ b/target/arm/helper.c | 19 | +++ b/target/arm/tcg/translate-a64.c |
20 | @@ -XXX,XX +XXX,XX @@ static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri, | 20 | @@ -XXX,XX +XXX,XX @@ static ArithOneOp * const f_scalar_uqxtn[] = { |
21 | }; | ||
22 | TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn) | ||
23 | |||
24 | -static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) | ||
25 | +static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a) | ||
26 | { | ||
27 | - /* | ||
28 | - * 64 bit to 32 bit float conversion | ||
29 | - * with von Neumann rounding (round to odd) | ||
30 | - */ | ||
31 | - TCGv_i32 tmp = tcg_temp_new_i32(); | ||
32 | - gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); | ||
33 | - tcg_gen_extu_i32_i64(d, tmp); | ||
34 | + if (fp_access_check(s)) { | ||
35 | + /* | ||
36 | + * 64 bit to 32 bit float conversion | ||
37 | + * with von Neumann rounding (round to odd) | ||
38 | + */ | ||
39 | + TCGv_i64 src = read_fp_dreg(s, a->rn); | ||
40 | + TCGv_i32 dst = tcg_temp_new_i32(); | ||
41 | + gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64)); | ||
42 | + write_fp_sreg_merging(s, a->rd, a->rd, dst); | ||
43 | + } | ||
44 | + return true; | ||
21 | } | 45 | } |
22 | 46 | ||
23 | #ifdef CONFIG_TCG | 47 | -static ArithOneOp * const f_scalar_fcvtxn[] = { |
24 | +static int par_el1_shareability(GetPhysAddrResult *res) | 48 | - NULL, |
49 | - NULL, | ||
50 | - gen_fcvtxn_sd, | ||
51 | -}; | ||
52 | -TRANS(FCVTXN_s, do_2misc_narrow_scalar, a, f_scalar_fcvtxn) | ||
53 | - | ||
54 | #undef WRAP_ENV | ||
55 | |||
56 | static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) | ||
57 | @@ -XXX,XX +XXX,XX @@ static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n) | ||
58 | tcg_gen_extu_i32_i64(d, tmp); | ||
59 | } | ||
60 | |||
61 | +static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) | ||
25 | +{ | 62 | +{ |
26 | + /* | 63 | + /* |
27 | + * The PAR_EL1.SH field must be 0b10 for Device or Normal-NC | 64 | + * 64 bit to 32 bit float conversion |
28 | + * memory -- see pseudocode PAREncodeShareability(). | 65 | + * with von Neumann rounding (round to odd) |
29 | + */ | 66 | + */ |
30 | + if (((res->cacheattrs.attrs & 0xf0) == 0) || | 67 | + TCGv_i32 tmp = tcg_temp_new_i32(); |
31 | + res->cacheattrs.attrs == 0x44 || res->cacheattrs.attrs == 0x40) { | 68 | + gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); |
32 | + return 2; | 69 | + tcg_gen_extu_i32_i64(d, tmp); |
33 | + } | ||
34 | + return res->cacheattrs.shareability; | ||
35 | +} | 70 | +} |
36 | + | 71 | + |
37 | static uint64_t do_ats_write(CPUARMState *env, uint64_t value, | 72 | static ArithOneOp * const f_vector_fcvtn[] = { |
38 | MMUAccessType access_type, ARMMMUIdx mmu_idx, | 73 | NULL, |
39 | bool is_secure) | 74 | gen_fcvtn_hs, |
40 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value, | 75 | gen_fcvtn_sd, |
41 | par64 |= (1 << 9); /* NS */ | 76 | }; |
42 | } | 77 | +static ArithOneOp * const f_scalar_fcvtxn[] = { |
43 | par64 |= (uint64_t)res.cacheattrs.attrs << 56; /* ATTR */ | 78 | + NULL, |
44 | - par64 |= res.cacheattrs.shareability << 7; /* SH */ | 79 | + NULL, |
45 | + par64 |= par_el1_shareability(&res) << 7; /* SH */ | 80 | + gen_fcvtxn_sd, |
46 | } else { | 81 | +}; |
47 | uint32_t fsr = arm_fi_to_lfsc(&fi); | 82 | TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn) |
83 | TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn) | ||
48 | 84 | ||
49 | -- | 85 | -- |
50 | 2.34.1 | 86 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | do_fp3_scalar_idx() is used only for the FMUL and FMULX scalar by | ||
2 | element instructions; these both need to merge the result with the Rn | ||
3 | register when FPCR.NEP is set. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/translate-a64.c | 6 +++--- | ||
9 | 1 file changed, 3 insertions(+), 3 deletions(-) | ||
10 | |||
11 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/target/arm/tcg/translate-a64.c | ||
14 | +++ b/target/arm/tcg/translate-a64.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) | ||
16 | |||
17 | read_vec_element(s, t1, a->rm, a->idx, MO_64); | ||
18 | f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); | ||
19 | - write_fp_dreg(s, a->rd, t0); | ||
20 | + write_fp_dreg_merging(s, a->rd, a->rn, t0); | ||
21 | } | ||
22 | break; | ||
23 | case MO_32: | ||
24 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) | ||
25 | |||
26 | read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); | ||
27 | f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); | ||
28 | - write_fp_sreg(s, a->rd, t0); | ||
29 | + write_fp_sreg_merging(s, a->rd, a->rn, t0); | ||
30 | } | ||
31 | break; | ||
32 | case MO_16: | ||
33 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) | ||
34 | |||
35 | read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); | ||
36 | f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); | ||
37 | - write_fp_sreg(s, a->rd, t0); | ||
38 | + write_fp_hreg_merging(s, a->rd, a->rn, t0); | ||
39 | } | ||
40 | break; | ||
41 | default: | ||
42 | -- | ||
43 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | When FPCR.AH == 1, floating point FMIN and FMAX have some odd special | ||
2 | cases: | ||
1 | 3 | ||
4 | * comparing two zeroes (even of different sign) or comparing a NaN | ||
5 | with anything always returns the second argument (possibly | ||
6 | squashed to zero) | ||
7 | * denormal outputs are not squashed to zero regardless of FZ or FZ16 | ||
8 | |||
9 | Implement these semantics in new helper functions and select them at | ||
10 | translate time if FPCR.AH is 1 for the scalar FMAX and FMIN insns. | ||
11 | (We will convert the other FMAX and FMIN insns in subsequent | ||
12 | commits.) | ||
13 | |||
14 | Note that FMINNM and FMAXNM are not affected. | ||
15 | |||
16 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
17 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
18 | --- | ||
19 | target/arm/tcg/helper-a64.h | 7 +++++++ | ||
20 | target/arm/tcg/helper-a64.c | 36 ++++++++++++++++++++++++++++++++++ | ||
21 | target/arm/tcg/translate-a64.c | 23 ++++++++++++++++++++-- | ||
22 | 3 files changed, 64 insertions(+), 2 deletions(-) | ||
23 | |||
24 | diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/target/arm/tcg/helper-a64.h | ||
27 | +++ b/target/arm/tcg/helper-a64.h | ||
28 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(advsimd_muladd2h, i32, i32, i32, i32, fpst) | ||
29 | DEF_HELPER_2(advsimd_rinth_exact, f16, f16, fpst) | ||
30 | DEF_HELPER_2(advsimd_rinth, f16, f16, fpst) | ||
31 | |||
32 | +DEF_HELPER_3(vfp_ah_minh, f16, f16, f16, fpst) | ||
33 | +DEF_HELPER_3(vfp_ah_mins, f32, f32, f32, fpst) | ||
34 | +DEF_HELPER_3(vfp_ah_mind, f64, f64, f64, fpst) | ||
35 | +DEF_HELPER_3(vfp_ah_maxh, f16, f16, f16, fpst) | ||
36 | +DEF_HELPER_3(vfp_ah_maxs, f32, f32, f32, fpst) | ||
37 | +DEF_HELPER_3(vfp_ah_maxd, f64, f64, f64, fpst) | ||
38 | + | ||
39 | DEF_HELPER_2(exception_return, void, env, i64) | ||
40 | DEF_HELPER_FLAGS_2(dc_zva, TCG_CALL_NO_WG, void, env, i64) | ||
41 | |||
42 | diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/arm/tcg/helper-a64.c | ||
45 | +++ b/target/arm/tcg/helper-a64.c | ||
46 | @@ -XXX,XX +XXX,XX @@ float32 HELPER(fcvtx_f64_to_f32)(float64 a, float_status *fpst) | ||
47 | return r; | ||
48 | } | ||
49 | |||
50 | +/* | ||
51 | + * AH=1 min/max have some odd special cases: | ||
52 | + * comparing two zeroes (regardless of sign), (NaN, anything), | ||
53 | + * or (anything, NaN) should return the second argument (possibly | ||
54 | + * squashed to zero). | ||
55 | + * Also, denormal outputs are not squashed to zero regardless of FZ or FZ16. | ||
56 | + */ | ||
57 | +#define AH_MINMAX_HELPER(NAME, CTYPE, FLOATTYPE, MINMAX) \ | ||
58 | + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ | ||
59 | + { \ | ||
60 | + bool save; \ | ||
61 | + CTYPE r; \ | ||
62 | + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ | ||
63 | + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ | ||
64 | + if (FLOATTYPE ## _is_zero(a) && FLOATTYPE ## _is_zero(b)) { \ | ||
65 | + return b; \ | ||
66 | + } \ | ||
67 | + if (FLOATTYPE ## _is_any_nan(a) || \ | ||
68 | + FLOATTYPE ## _is_any_nan(b)) { \ | ||
69 | + float_raise(float_flag_invalid, fpst); \ | ||
70 | + return b; \ | ||
71 | + } \ | ||
72 | + save = get_flush_to_zero(fpst); \ | ||
73 | + set_flush_to_zero(false, fpst); \ | ||
74 | + r = FLOATTYPE ## _ ## MINMAX(a, b, fpst); \ | ||
75 | + set_flush_to_zero(save, fpst); \ | ||
76 | + return r; \ | ||
77 | + } | ||
78 | + | ||
79 | +AH_MINMAX_HELPER(vfp_ah_minh, dh_ctype_f16, float16, min) | ||
80 | +AH_MINMAX_HELPER(vfp_ah_mins, float32, float32, min) | ||
81 | +AH_MINMAX_HELPER(vfp_ah_mind, float64, float64, min) | ||
82 | +AH_MINMAX_HELPER(vfp_ah_maxh, dh_ctype_f16, float16, max) | ||
83 | +AH_MINMAX_HELPER(vfp_ah_maxs, float32, float32, max) | ||
84 | +AH_MINMAX_HELPER(vfp_ah_maxd, float64, float64, max) | ||
85 | + | ||
86 | /* 64-bit versions of the CRC helpers. Note that although the operation | ||
87 | * (and the prototypes of crc32c() and crc32() mean that only the bottom | ||
88 | * 32 bits of the accumulator and result are used, we pass and return | ||
89 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
90 | index XXXXXXX..XXXXXXX 100644 | ||
91 | --- a/target/arm/tcg/translate-a64.c | ||
92 | +++ b/target/arm/tcg/translate-a64.c | ||
93 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
94 | select_ah_fpst(s, a->esz)); | ||
95 | } | ||
96 | |||
97 | +/* Some insns need to call different helpers when FPCR.AH == 1 */ | ||
98 | +static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a, | ||
99 | + const FPScalar *fnormal, | ||
100 | + const FPScalar *fah, | ||
101 | + int mergereg) | ||
102 | +{ | ||
103 | + return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg); | ||
104 | +} | ||
105 | + | ||
106 | static const FPScalar f_scalar_fadd = { | ||
107 | gen_helper_vfp_addh, | ||
108 | gen_helper_vfp_adds, | ||
109 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fmax = { | ||
110 | gen_helper_vfp_maxs, | ||
111 | gen_helper_vfp_maxd, | ||
112 | }; | ||
113 | -TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax, a->rn) | ||
114 | +static const FPScalar f_scalar_fmax_ah = { | ||
115 | + gen_helper_vfp_ah_maxh, | ||
116 | + gen_helper_vfp_ah_maxs, | ||
117 | + gen_helper_vfp_ah_maxd, | ||
118 | +}; | ||
119 | +TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn) | ||
120 | |||
121 | static const FPScalar f_scalar_fmin = { | ||
122 | gen_helper_vfp_minh, | ||
123 | gen_helper_vfp_mins, | ||
124 | gen_helper_vfp_mind, | ||
125 | }; | ||
126 | -TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin, a->rn) | ||
127 | +static const FPScalar f_scalar_fmin_ah = { | ||
128 | + gen_helper_vfp_ah_minh, | ||
129 | + gen_helper_vfp_ah_mins, | ||
130 | + gen_helper_vfp_ah_mind, | ||
131 | +}; | ||
132 | +TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn) | ||
133 | |||
134 | static const FPScalar f_scalar_fmaxnm = { | ||
135 | gen_helper_vfp_maxnumh, | ||
136 | -- | ||
137 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH == 1 semantics for vector FMIN/FMAX, by | ||
2 | creating new _ah_ versions of the gvec helpers which invoke the | ||
3 | scalar fmin_ah and fmax_ah helpers on each element. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
9 | target/arm/tcg/translate-a64.c | 21 +++++++++++++++++++-- | ||
10 | target/arm/tcg/vec_helper.c | 8 ++++++++ | ||
11 | 3 files changed, 41 insertions(+), 2 deletions(-) | ||
12 | |||
13 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/arm/tcg/helper-sve.h | ||
16 | +++ b/target/arm/tcg/helper-sve.h | ||
17 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_rsqrts_s, TCG_CALL_NO_RWG, | ||
18 | DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG, | ||
19 | void, ptr, ptr, ptr, fpst, i32) | ||
20 | |||
21 | +DEF_HELPER_FLAGS_5(gvec_ah_fmax_h, TCG_CALL_NO_RWG, | ||
22 | + void, ptr, ptr, ptr, fpst, i32) | ||
23 | +DEF_HELPER_FLAGS_5(gvec_ah_fmax_s, TCG_CALL_NO_RWG, | ||
24 | + void, ptr, ptr, ptr, fpst, i32) | ||
25 | +DEF_HELPER_FLAGS_5(gvec_ah_fmax_d, TCG_CALL_NO_RWG, | ||
26 | + void, ptr, ptr, ptr, fpst, i32) | ||
27 | + | ||
28 | +DEF_HELPER_FLAGS_5(gvec_ah_fmin_h, TCG_CALL_NO_RWG, | ||
29 | + void, ptr, ptr, ptr, fpst, i32) | ||
30 | +DEF_HELPER_FLAGS_5(gvec_ah_fmin_s, TCG_CALL_NO_RWG, | ||
31 | + void, ptr, ptr, ptr, fpst, i32) | ||
32 | +DEF_HELPER_FLAGS_5(gvec_ah_fmin_d, TCG_CALL_NO_RWG, | ||
33 | + void, ptr, ptr, ptr, fpst, i32) | ||
34 | + | ||
35 | DEF_HELPER_FLAGS_4(sve_faddv_h, TCG_CALL_NO_RWG, | ||
36 | i64, ptr, ptr, fpst, i32) | ||
37 | DEF_HELPER_FLAGS_4(sve_faddv_s, TCG_CALL_NO_RWG, | ||
38 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/target/arm/tcg/translate-a64.c | ||
41 | +++ b/target/arm/tcg/translate-a64.c | ||
42 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | ||
43 | FPST_A64_F16 : FPST_A64); | ||
44 | } | ||
45 | |||
46 | +static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, | ||
47 | + gen_helper_gvec_3_ptr * const fnormal[3], | ||
48 | + gen_helper_gvec_3_ptr * const fah[3]) | ||
49 | +{ | ||
50 | + return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal); | ||
51 | +} | ||
52 | + | ||
53 | static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data, | ||
54 | gen_helper_gvec_3_ptr * const f[3]) | ||
55 | { | ||
56 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { | ||
57 | gen_helper_gvec_fmax_s, | ||
58 | gen_helper_gvec_fmax_d, | ||
59 | }; | ||
60 | -TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax) | ||
61 | +static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = { | ||
62 | + gen_helper_gvec_ah_fmax_h, | ||
63 | + gen_helper_gvec_ah_fmax_s, | ||
64 | + gen_helper_gvec_ah_fmax_d, | ||
65 | +}; | ||
66 | +TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah) | ||
67 | |||
68 | static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { | ||
69 | gen_helper_gvec_fmin_h, | ||
70 | gen_helper_gvec_fmin_s, | ||
71 | gen_helper_gvec_fmin_d, | ||
72 | }; | ||
73 | -TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin) | ||
74 | +static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = { | ||
75 | + gen_helper_gvec_ah_fmin_h, | ||
76 | + gen_helper_gvec_ah_fmin_s, | ||
77 | + gen_helper_gvec_ah_fmin_d, | ||
78 | +}; | ||
79 | +TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah) | ||
80 | |||
81 | static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { | ||
82 | gen_helper_gvec_fmaxnum_h, | ||
83 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
84 | index XXXXXXX..XXXXXXX 100644 | ||
85 | --- a/target/arm/tcg/vec_helper.c | ||
86 | +++ b/target/arm/tcg/vec_helper.c | ||
87 | @@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16) | ||
88 | DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32) | ||
89 | DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64) | ||
90 | |||
91 | +DO_3OP(gvec_ah_fmax_h, helper_vfp_ah_maxh, float16) | ||
92 | +DO_3OP(gvec_ah_fmax_s, helper_vfp_ah_maxs, float32) | ||
93 | +DO_3OP(gvec_ah_fmax_d, helper_vfp_ah_maxd, float64) | ||
94 | + | ||
95 | +DO_3OP(gvec_ah_fmin_h, helper_vfp_ah_minh, float16) | ||
96 | +DO_3OP(gvec_ah_fmin_s, helper_vfp_ah_mins, float32) | ||
97 | +DO_3OP(gvec_ah_fmin_d, helper_vfp_ah_mind, float64) | ||
98 | + | ||
99 | #endif | ||
100 | #undef DO_3OP | ||
101 | |||
102 | -- | ||
103 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for FMAXV and FMINV. These are the | ||
2 | "recursively reduce all lanes of a vector to a scalar result" insns; | ||
3 | we just need to use the _ah_ helper for the reduction step when | ||
4 | FPCR.AH == 1. | ||
1 | 5 | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | --- | ||
9 | target/arm/tcg/translate-a64.c | 28 ++++++++++++++++++---------- | ||
10 | 1 file changed, 18 insertions(+), 10 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/translate-a64.c | ||
15 | +++ b/target/arm/tcg/translate-a64.c | ||
16 | @@ -XXX,XX +XXX,XX @@ static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz, | ||
17 | } | ||
18 | |||
19 | static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, | ||
20 | - NeonGenTwoSingleOpFn *fn) | ||
21 | + NeonGenTwoSingleOpFn *fnormal, | ||
22 | + NeonGenTwoSingleOpFn *fah) | ||
23 | { | ||
24 | if (fp_access_check(s)) { | ||
25 | MemOp esz = a->esz; | ||
26 | int elts = (a->q ? 16 : 8) >> esz; | ||
27 | TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
28 | - TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn); | ||
29 | + TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, | ||
30 | + s->fpcr_ah ? fah : fnormal); | ||
31 | write_fp_sreg(s, a->rd, res); | ||
32 | } | ||
33 | return true; | ||
34 | } | ||
35 | |||
36 | -TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxnumh) | ||
37 | -TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minnumh) | ||
38 | -TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxh) | ||
39 | -TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minh) | ||
40 | +TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, | ||
41 | + gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh) | ||
42 | +TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, | ||
43 | + gen_helper_vfp_minnumh, gen_helper_vfp_minnumh) | ||
44 | +TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, | ||
45 | + gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh) | ||
46 | +TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, | ||
47 | + gen_helper_vfp_minh, gen_helper_vfp_ah_minh) | ||
48 | |||
49 | -TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums) | ||
50 | -TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums) | ||
51 | -TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs) | ||
52 | -TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins) | ||
53 | +TRANS(FMAXNMV_s, do_fp_reduction, a, | ||
54 | + gen_helper_vfp_maxnums, gen_helper_vfp_maxnums) | ||
55 | +TRANS(FMINNMV_s, do_fp_reduction, a, | ||
56 | + gen_helper_vfp_minnums, gen_helper_vfp_minnums) | ||
57 | +TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs) | ||
58 | +TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins) | ||
59 | |||
60 | /* | ||
61 | * Floating-point Immediate | ||
62 | -- | ||
63 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the pairwise floating | ||
2 | point minimum/maximum insns FMINP and FMAXP. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
8 | target/arm/tcg/translate-a64.c | 25 +++++++++++++++++++++---- | ||
9 | target/arm/tcg/vec_helper.c | 10 ++++++++++ | ||
10 | 3 files changed, 45 insertions(+), 4 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_ah_fmin_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_5(gvec_ah_fmin_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_5(gvec_ah_fminp_h, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_5(gvec_ah_fminp_s, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_5(gvec_ah_fminp_d, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_4(sve_faddv_h, TCG_CALL_NO_RWG, | ||
35 | i64, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_4(sve_faddv_s, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/translate-a64.c | ||
40 | +++ b/target/arm/tcg/translate-a64.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { | ||
42 | gen_helper_gvec_fmaxp_s, | ||
43 | gen_helper_gvec_fmaxp_d, | ||
44 | }; | ||
45 | -TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp) | ||
46 | +static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = { | ||
47 | + gen_helper_gvec_ah_fmaxp_h, | ||
48 | + gen_helper_gvec_ah_fmaxp_s, | ||
49 | + gen_helper_gvec_ah_fmaxp_d, | ||
50 | +}; | ||
51 | +TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp) | ||
52 | |||
53 | static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { | ||
54 | gen_helper_gvec_fminp_h, | ||
55 | gen_helper_gvec_fminp_s, | ||
56 | gen_helper_gvec_fminp_d, | ||
57 | }; | ||
58 | -TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp) | ||
59 | +static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = { | ||
60 | + gen_helper_gvec_ah_fminp_h, | ||
61 | + gen_helper_gvec_ah_fminp_s, | ||
62 | + gen_helper_gvec_ah_fminp_d, | ||
63 | +}; | ||
64 | +TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp) | ||
65 | |||
66 | static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { | ||
67 | gen_helper_gvec_fmaxnump_h, | ||
68 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) | ||
69 | return true; | ||
70 | } | ||
71 | |||
72 | +static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a, | ||
73 | + const FPScalar *fnormal, | ||
74 | + const FPScalar *fah) | ||
75 | +{ | ||
76 | + return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal); | ||
77 | +} | ||
78 | + | ||
79 | TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) | ||
80 | -TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax) | ||
81 | -TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin) | ||
82 | +TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah) | ||
83 | +TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah) | ||
84 | TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) | ||
85 | TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) | ||
86 | |||
87 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
88 | index XXXXXXX..XXXXXXX 100644 | ||
89 | --- a/target/arm/tcg/vec_helper.c | ||
90 | +++ b/target/arm/tcg/vec_helper.c | ||
91 | @@ -XXX,XX +XXX,XX @@ DO_3OP_PAIR(gvec_fminnump_h, float16_minnum, float16, H2) | ||
92 | DO_3OP_PAIR(gvec_fminnump_s, float32_minnum, float32, H4) | ||
93 | DO_3OP_PAIR(gvec_fminnump_d, float64_minnum, float64, ) | ||
94 | |||
95 | +#ifdef TARGET_AARCH64 | ||
96 | +DO_3OP_PAIR(gvec_ah_fmaxp_h, helper_vfp_ah_maxh, float16, H2) | ||
97 | +DO_3OP_PAIR(gvec_ah_fmaxp_s, helper_vfp_ah_maxs, float32, H4) | ||
98 | +DO_3OP_PAIR(gvec_ah_fmaxp_d, helper_vfp_ah_maxd, float64, ) | ||
99 | + | ||
100 | +DO_3OP_PAIR(gvec_ah_fminp_h, helper_vfp_ah_minh, float16, H2) | ||
101 | +DO_3OP_PAIR(gvec_ah_fminp_s, helper_vfp_ah_mins, float32, H4) | ||
102 | +DO_3OP_PAIR(gvec_ah_fminp_d, helper_vfp_ah_mind, float64, ) | ||
103 | +#endif | ||
104 | + | ||
105 | #undef DO_3OP_PAIR | ||
106 | |||
107 | #define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \ | ||
108 | -- | ||
109 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the SVE FMAXV and FMINV | ||
2 | vector-reduction-to-scalar max/min operations. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 +++++++++++ | ||
8 | target/arm/tcg/sve_helper.c | 43 +++++++++++++++++++++------------- | ||
9 | target/arm/tcg/translate-sve.c | 16 +++++++++++-- | ||
10 | 3 files changed, 55 insertions(+), 18 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fminv_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_4(sve_fminv_d, TCG_CALL_NO_RWG, | ||
18 | i64, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_4(sve_ah_fmaxv_h, TCG_CALL_NO_RWG, | ||
21 | + i64, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_4(sve_ah_fmaxv_s, TCG_CALL_NO_RWG, | ||
23 | + i64, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_4(sve_ah_fmaxv_d, TCG_CALL_NO_RWG, | ||
25 | + i64, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_4(sve_ah_fminv_h, TCG_CALL_NO_RWG, | ||
28 | + i64, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_4(sve_ah_fminv_s, TCG_CALL_NO_RWG, | ||
30 | + i64, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_4(sve_ah_fminv_d, TCG_CALL_NO_RWG, | ||
32 | + i64, ptr, ptr, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_5(sve_fadda_h, TCG_CALL_NO_RWG, | ||
35 | i64, i64, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_5(sve_fadda_s, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/sve_helper.c | ||
40 | +++ b/target/arm/tcg/sve_helper.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \ | ||
42 | uintptr_t half = n / 2; \ | ||
43 | TYPE lo = NAME##_reduce(data, status, half); \ | ||
44 | TYPE hi = NAME##_reduce(data + half, status, half); \ | ||
45 | - return TYPE##_##FUNC(lo, hi, status); \ | ||
46 | + return FUNC(lo, hi, status); \ | ||
47 | } \ | ||
48 | } \ | ||
49 | uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \ | ||
50 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \ | ||
51 | return NAME##_reduce(data, s, maxsz / sizeof(TYPE)); \ | ||
52 | } | ||
53 | |||
54 | -DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero) | ||
55 | -DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero) | ||
56 | -DO_REDUCE(sve_faddv_d, float64, H1_8, add, float64_zero) | ||
57 | +DO_REDUCE(sve_faddv_h, float16, H1_2, float16_add, float16_zero) | ||
58 | +DO_REDUCE(sve_faddv_s, float32, H1_4, float32_add, float32_zero) | ||
59 | +DO_REDUCE(sve_faddv_d, float64, H1_8, float64_add, float64_zero) | ||
60 | |||
61 | /* Identity is floatN_default_nan, without the function call. */ | ||
62 | -DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00) | ||
63 | -DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000) | ||
64 | -DO_REDUCE(sve_fminnmv_d, float64, H1_8, minnum, 0x7FF8000000000000ULL) | ||
65 | +DO_REDUCE(sve_fminnmv_h, float16, H1_2, float16_minnum, 0x7E00) | ||
66 | +DO_REDUCE(sve_fminnmv_s, float32, H1_4, float32_minnum, 0x7FC00000) | ||
67 | +DO_REDUCE(sve_fminnmv_d, float64, H1_8, float64_minnum, 0x7FF8000000000000ULL) | ||
68 | |||
69 | -DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00) | ||
70 | -DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000) | ||
71 | -DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, maxnum, 0x7FF8000000000000ULL) | ||
72 | +DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, float16_maxnum, 0x7E00) | ||
73 | +DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, float32_maxnum, 0x7FC00000) | ||
74 | +DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, float64_maxnum, 0x7FF8000000000000ULL) | ||
75 | |||
76 | -DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity) | ||
77 | -DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity) | ||
78 | -DO_REDUCE(sve_fminv_d, float64, H1_8, min, float64_infinity) | ||
79 | +DO_REDUCE(sve_fminv_h, float16, H1_2, float16_min, float16_infinity) | ||
80 | +DO_REDUCE(sve_fminv_s, float32, H1_4, float32_min, float32_infinity) | ||
81 | +DO_REDUCE(sve_fminv_d, float64, H1_8, float64_min, float64_infinity) | ||
82 | |||
83 | -DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity)) | ||
84 | -DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity)) | ||
85 | -DO_REDUCE(sve_fmaxv_d, float64, H1_8, max, float64_chs(float64_infinity)) | ||
86 | +DO_REDUCE(sve_fmaxv_h, float16, H1_2, float16_max, float16_chs(float16_infinity)) | ||
87 | +DO_REDUCE(sve_fmaxv_s, float32, H1_4, float32_max, float32_chs(float32_infinity)) | ||
88 | +DO_REDUCE(sve_fmaxv_d, float64, H1_8, float64_max, float64_chs(float64_infinity)) | ||
89 | + | ||
90 | +DO_REDUCE(sve_ah_fminv_h, float16, H1_2, helper_vfp_ah_minh, float16_infinity) | ||
91 | +DO_REDUCE(sve_ah_fminv_s, float32, H1_4, helper_vfp_ah_mins, float32_infinity) | ||
92 | +DO_REDUCE(sve_ah_fminv_d, float64, H1_8, helper_vfp_ah_mind, float64_infinity) | ||
93 | + | ||
94 | +DO_REDUCE(sve_ah_fmaxv_h, float16, H1_2, helper_vfp_ah_maxh, | ||
95 | + float16_chs(float16_infinity)) | ||
96 | +DO_REDUCE(sve_ah_fmaxv_s, float32, H1_4, helper_vfp_ah_maxs, | ||
97 | + float32_chs(float32_infinity)) | ||
98 | +DO_REDUCE(sve_ah_fmaxv_d, float64, H1_8, helper_vfp_ah_maxd, | ||
99 | + float64_chs(float64_infinity)) | ||
100 | |||
101 | #undef DO_REDUCE | ||
102 | |||
103 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
104 | index XXXXXXX..XXXXXXX 100644 | ||
105 | --- a/target/arm/tcg/translate-sve.c | ||
106 | +++ b/target/arm/tcg/translate-sve.c | ||
107 | @@ -XXX,XX +XXX,XX @@ static bool do_reduce(DisasContext *s, arg_rpr_esz *a, | ||
108 | }; \ | ||
109 | TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz]) | ||
110 | |||
111 | +#define DO_VPZ_AH(NAME, name) \ | ||
112 | + static gen_helper_fp_reduce * const name##_fns[4] = { \ | ||
113 | + NULL, gen_helper_sve_##name##_h, \ | ||
114 | + gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ | ||
115 | + }; \ | ||
116 | + static gen_helper_fp_reduce * const name##_ah_fns[4] = { \ | ||
117 | + NULL, gen_helper_sve_ah_##name##_h, \ | ||
118 | + gen_helper_sve_ah_##name##_s, gen_helper_sve_ah_##name##_d, \ | ||
119 | + }; \ | ||
120 | + TRANS_FEAT(NAME, aa64_sve, do_reduce, a, \ | ||
121 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) | ||
122 | + | ||
123 | DO_VPZ(FADDV, faddv) | ||
124 | DO_VPZ(FMINNMV, fminnmv) | ||
125 | DO_VPZ(FMAXNMV, fmaxnmv) | ||
126 | -DO_VPZ(FMINV, fminv) | ||
127 | -DO_VPZ(FMAXV, fmaxv) | ||
128 | +DO_VPZ_AH(FMINV, fminv) | ||
129 | +DO_VPZ_AH(FMAXV, fmaxv) | ||
130 | |||
131 | #undef DO_VPZ | ||
132 | |||
133 | -- | ||
134 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the SVE FMAX and FMIN operations | ||
2 | that take an immediate as the second operand. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 25 +++++++++++++++++++++++-- | ||
10 | 3 files changed, 45 insertions(+), 2 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fmins_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_6(sve_fmins_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, i64, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_6(sve_ah_fmaxs_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_6(sve_ah_fmaxs_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_6(sve_ah_fmaxs_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_6(sve_ah_fmins_h, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_6(sve_ah_fmins_s, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_6(sve_ah_fmins_d, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_5(sve_fcvt_sh, TCG_CALL_NO_RWG, | ||
35 | void, ptr, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_5(sve_fcvt_dh, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/sve_helper.c | ||
40 | +++ b/target/arm/tcg/sve_helper.c | ||
41 | @@ -XXX,XX +XXX,XX @@ DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min) | ||
42 | DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min) | ||
43 | DO_ZPZS_FP(sve_fmins_d, float64, H1_8, float64_min) | ||
44 | |||
45 | +DO_ZPZS_FP(sve_ah_fmaxs_h, float16, H1_2, helper_vfp_ah_maxh) | ||
46 | +DO_ZPZS_FP(sve_ah_fmaxs_s, float32, H1_4, helper_vfp_ah_maxs) | ||
47 | +DO_ZPZS_FP(sve_ah_fmaxs_d, float64, H1_8, helper_vfp_ah_maxd) | ||
48 | + | ||
49 | +DO_ZPZS_FP(sve_ah_fmins_h, float16, H1_2, helper_vfp_ah_minh) | ||
50 | +DO_ZPZS_FP(sve_ah_fmins_s, float32, H1_4, helper_vfp_ah_mins) | ||
51 | +DO_ZPZS_FP(sve_ah_fmins_d, float64, H1_8, helper_vfp_ah_mind) | ||
52 | + | ||
53 | /* Fully general two-operand expander, controlled by a predicate, | ||
54 | * With the extra float_status parameter. | ||
55 | */ | ||
56 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/arm/tcg/translate-sve.c | ||
59 | +++ b/target/arm/tcg/translate-sve.c | ||
60 | @@ -XXX,XX +XXX,XX @@ static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, | ||
61 | TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ | ||
62 | name##_const[a->esz][a->imm], name##_fns[a->esz]) | ||
63 | |||
64 | +#define DO_FP_AH_IMM(NAME, name, const0, const1) \ | ||
65 | + static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ | ||
66 | + NULL, gen_helper_sve_##name##_h, \ | ||
67 | + gen_helper_sve_##name##_s, \ | ||
68 | + gen_helper_sve_##name##_d \ | ||
69 | + }; \ | ||
70 | + static gen_helper_sve_fp2scalar * const name##_ah_fns[4] = { \ | ||
71 | + NULL, gen_helper_sve_ah_##name##_h, \ | ||
72 | + gen_helper_sve_ah_##name##_s, \ | ||
73 | + gen_helper_sve_ah_##name##_d \ | ||
74 | + }; \ | ||
75 | + static uint64_t const name##_const[4][2] = { \ | ||
76 | + { -1, -1 }, \ | ||
77 | + { float16_##const0, float16_##const1 }, \ | ||
78 | + { float32_##const0, float32_##const1 }, \ | ||
79 | + { float64_##const0, float64_##const1 }, \ | ||
80 | + }; \ | ||
81 | + TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ | ||
82 | + name##_const[a->esz][a->imm], \ | ||
83 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) | ||
84 | + | ||
85 | DO_FP_IMM(FADD, fadds, half, one) | ||
86 | DO_FP_IMM(FSUB, fsubs, half, one) | ||
87 | DO_FP_IMM(FMUL, fmuls, half, two) | ||
88 | DO_FP_IMM(FSUBR, fsubrs, half, one) | ||
89 | DO_FP_IMM(FMAXNM, fmaxnms, zero, one) | ||
90 | DO_FP_IMM(FMINNM, fminnms, zero, one) | ||
91 | -DO_FP_IMM(FMAX, fmaxs, zero, one) | ||
92 | -DO_FP_IMM(FMIN, fmins, zero, one) | ||
93 | +DO_FP_AH_IMM(FMAX, fmaxs, zero, one) | ||
94 | +DO_FP_AH_IMM(FMIN, fmins, zero, one) | ||
95 | |||
96 | #undef DO_FP_IMM | ||
97 | |||
98 | -- | ||
99 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the SVE FMAX and FMIN | ||
2 | operations that take two vector operands. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 17 +++++++++++++++-- | ||
10 | 3 files changed, 37 insertions(+), 2 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fmax_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_6(sve_fmax_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_6(sve_ah_fmin_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_6(sve_ah_fmin_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_6(sve_ah_fmin_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_6(sve_ah_fmax_h, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_6(sve_ah_fmax_s, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_6(sve_ah_fmax_d, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_6(sve_fminnum_h, TCG_CALL_NO_RWG, | ||
35 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_6(sve_fminnum_s, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/sve_helper.c | ||
40 | +++ b/target/arm/tcg/sve_helper.c | ||
41 | @@ -XXX,XX +XXX,XX @@ DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max) | ||
42 | DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max) | ||
43 | DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max) | ||
44 | |||
45 | +DO_ZPZZ_FP(sve_ah_fmin_h, uint16_t, H1_2, helper_vfp_ah_minh) | ||
46 | +DO_ZPZZ_FP(sve_ah_fmin_s, uint32_t, H1_4, helper_vfp_ah_mins) | ||
47 | +DO_ZPZZ_FP(sve_ah_fmin_d, uint64_t, H1_8, helper_vfp_ah_mind) | ||
48 | + | ||
49 | +DO_ZPZZ_FP(sve_ah_fmax_h, uint16_t, H1_2, helper_vfp_ah_maxh) | ||
50 | +DO_ZPZZ_FP(sve_ah_fmax_s, uint32_t, H1_4, helper_vfp_ah_maxs) | ||
51 | +DO_ZPZZ_FP(sve_ah_fmax_d, uint64_t, H1_8, helper_vfp_ah_maxd) | ||
52 | + | ||
53 | DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum) | ||
54 | DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum) | ||
55 | DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum) | ||
56 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/arm/tcg/translate-sve.c | ||
59 | +++ b/target/arm/tcg/translate-sve.c | ||
60 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, | ||
61 | }; \ | ||
62 | TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) | ||
63 | |||
64 | +#define DO_ZPZZ_AH_FP(NAME, FEAT, name, ah_name) \ | ||
65 | + static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ | ||
66 | + NULL, gen_helper_##name##_h, \ | ||
67 | + gen_helper_##name##_s, gen_helper_##name##_d \ | ||
68 | + }; \ | ||
69 | + static gen_helper_gvec_4_ptr * const name##_ah_zpzz_fns[4] = { \ | ||
70 | + NULL, gen_helper_##ah_name##_h, \ | ||
71 | + gen_helper_##ah_name##_s, gen_helper_##ah_name##_d \ | ||
72 | + }; \ | ||
73 | + TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, \ | ||
74 | + s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \ | ||
75 | + name##_zpzz_fns[a->esz], a) | ||
76 | + | ||
77 | DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) | ||
78 | DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) | ||
79 | DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) | ||
80 | -DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin) | ||
81 | -DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax) | ||
82 | +DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) | ||
83 | +DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) | ||
84 | DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) | ||
85 | DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) | ||
86 | DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) | ||
87 | -- | ||
88 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | FPCR.AH == 1 mandates that negation of a NaN value should not flip | |
2 | its sign bit. This means we can no longer use gen_vfp_neg*() | ||
3 | everywhere but must instead generate slightly more complex code when | ||
4 | FPCR.AH is set. | ||
5 | |||
6 | Make this change for the scalar FNEG and for those places in | ||
7 | translate-a64.c which were previously directly calling | ||
8 | gen_vfp_neg*(). | ||
9 | |||
10 | This change in semantics also affects any other instruction whose | ||
11 | pseudocode calls FPNeg(); in following commits we extend this | ||
12 | change to the other affected instructions. | ||
13 | |||
14 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
15 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
16 | --- | ||
17 | target/arm/tcg/translate-a64.c | 125 ++++++++++++++++++++++++++++++--- | ||
18 | 1 file changed, 114 insertions(+), 11 deletions(-) | ||
19 | |||
20 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/tcg/translate-a64.c | ||
23 | +++ b/target/arm/tcg/translate-a64.c | ||
24 | @@ -XXX,XX +XXX,XX @@ static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, | ||
25 | is_q ? 16 : 8, vec_full_reg_size(s), data, fn); | ||
26 | } | ||
27 | |||
28 | +/* | ||
29 | + * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN. | ||
30 | + * These functions implement | ||
31 | + * d = floatN_is_any_nan(s) ? s : floatN_chs(s) | ||
32 | + * which for float32 is | ||
33 | + * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31)) | ||
34 | + * and similarly for the other float sizes. | ||
35 | + */ | ||
36 | +static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s) | ||
37 | +{ | ||
38 | + TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); | ||
39 | + | ||
40 | + gen_vfp_negh(chs_s, s); | ||
41 | + gen_vfp_absh(abs_s, s); | ||
42 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, | ||
43 | + abs_s, tcg_constant_i32(0x7c00), | ||
44 | + s, chs_s); | ||
45 | +} | ||
46 | + | ||
47 | +static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s) | ||
48 | +{ | ||
49 | + TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); | ||
50 | + | ||
51 | + gen_vfp_negs(chs_s, s); | ||
52 | + gen_vfp_abss(abs_s, s); | ||
53 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, | ||
54 | + abs_s, tcg_constant_i32(0x7f800000UL), | ||
55 | + s, chs_s); | ||
56 | +} | ||
57 | + | ||
58 | +static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s) | ||
59 | +{ | ||
60 | + TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64(); | ||
61 | + | ||
62 | + gen_vfp_negd(chs_s, s); | ||
63 | + gen_vfp_absd(abs_s, s); | ||
64 | + tcg_gen_movcond_i64(TCG_COND_GTU, d, | ||
65 | + abs_s, tcg_constant_i64(0x7ff0000000000000ULL), | ||
66 | + s, chs_s); | ||
67 | +} | ||
68 | + | ||
69 | +static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) | ||
70 | +{ | ||
71 | + if (dc->fpcr_ah) { | ||
72 | + gen_vfp_ah_negh(d, s); | ||
73 | + } else { | ||
74 | + gen_vfp_negh(d, s); | ||
75 | + } | ||
76 | +} | ||
77 | + | ||
78 | +static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) | ||
79 | +{ | ||
80 | + if (dc->fpcr_ah) { | ||
81 | + gen_vfp_ah_negs(d, s); | ||
82 | + } else { | ||
83 | + gen_vfp_negs(d, s); | ||
84 | + } | ||
85 | +} | ||
86 | + | ||
87 | +static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s) | ||
88 | +{ | ||
89 | + if (dc->fpcr_ah) { | ||
90 | + gen_vfp_ah_negd(d, s); | ||
91 | + } else { | ||
92 | + gen_vfp_negd(d, s); | ||
93 | + } | ||
94 | +} | ||
95 | + | ||
96 | /* Set ZF and NF based on a 64 bit result. This is alas fiddlier | ||
97 | * than the 32 bit equivalent. | ||
98 | */ | ||
99 | @@ -XXX,XX +XXX,XX @@ static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) | ||
100 | gen_vfp_negd(d, d); | ||
101 | } | ||
102 | |||
103 | +static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
104 | +{ | ||
105 | + gen_helper_vfp_mulh(d, n, m, s); | ||
106 | + gen_vfp_ah_negh(d, d); | ||
107 | +} | ||
108 | + | ||
109 | +static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
110 | +{ | ||
111 | + gen_helper_vfp_muls(d, n, m, s); | ||
112 | + gen_vfp_ah_negs(d, d); | ||
113 | +} | ||
114 | + | ||
115 | +static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) | ||
116 | +{ | ||
117 | + gen_helper_vfp_muld(d, n, m, s); | ||
118 | + gen_vfp_ah_negd(d, d); | ||
119 | +} | ||
120 | + | ||
121 | static const FPScalar f_scalar_fnmul = { | ||
122 | gen_fnmul_h, | ||
123 | gen_fnmul_s, | ||
124 | gen_fnmul_d, | ||
125 | }; | ||
126 | -TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul, a->rn) | ||
127 | +static const FPScalar f_scalar_ah_fnmul = { | ||
128 | + gen_fnmul_ah_h, | ||
129 | + gen_fnmul_ah_s, | ||
130 | + gen_fnmul_ah_d, | ||
131 | +}; | ||
132 | +TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn) | ||
133 | |||
134 | static const FPScalar f_scalar_fcmeq = { | ||
135 | gen_helper_advsimd_ceq_f16, | ||
136 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
137 | |||
138 | read_vec_element(s, t2, a->rm, a->idx, MO_64); | ||
139 | if (neg) { | ||
140 | - gen_vfp_negd(t1, t1); | ||
141 | + gen_vfp_maybe_ah_negd(s, t1, t1); | ||
142 | } | ||
143 | gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | ||
144 | write_fp_dreg_merging(s, a->rd, a->rd, t0); | ||
145 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
146 | |||
147 | read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); | ||
148 | if (neg) { | ||
149 | - gen_vfp_negs(t1, t1); | ||
150 | + gen_vfp_maybe_ah_negs(s, t1, t1); | ||
151 | } | ||
152 | gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | ||
153 | write_fp_sreg_merging(s, a->rd, a->rd, t0); | ||
154 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
155 | |||
156 | read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); | ||
157 | if (neg) { | ||
158 | - gen_vfp_negh(t1, t1); | ||
159 | + gen_vfp_maybe_ah_negh(s, t1, t1); | ||
160 | } | ||
161 | gen_helper_advsimd_muladdh(t0, t1, t2, t0, | ||
162 | fpstatus_ptr(FPST_A64_F16)); | ||
163 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
164 | TCGv_i64 ta = read_fp_dreg(s, a->ra); | ||
165 | |||
166 | if (neg_a) { | ||
167 | - gen_vfp_negd(ta, ta); | ||
168 | + gen_vfp_maybe_ah_negd(s, ta, ta); | ||
169 | } | ||
170 | if (neg_n) { | ||
171 | - gen_vfp_negd(tn, tn); | ||
172 | + gen_vfp_maybe_ah_negd(s, tn, tn); | ||
173 | } | ||
174 | fpst = fpstatus_ptr(FPST_A64); | ||
175 | gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); | ||
176 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
177 | TCGv_i32 ta = read_fp_sreg(s, a->ra); | ||
178 | |||
179 | if (neg_a) { | ||
180 | - gen_vfp_negs(ta, ta); | ||
181 | + gen_vfp_maybe_ah_negs(s, ta, ta); | ||
182 | } | ||
183 | if (neg_n) { | ||
184 | - gen_vfp_negs(tn, tn); | ||
185 | + gen_vfp_maybe_ah_negs(s, tn, tn); | ||
186 | } | ||
187 | fpst = fpstatus_ptr(FPST_A64); | ||
188 | gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); | ||
189 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
190 | TCGv_i32 ta = read_fp_hreg(s, a->ra); | ||
191 | |||
192 | if (neg_a) { | ||
193 | - gen_vfp_negh(ta, ta); | ||
194 | + gen_vfp_maybe_ah_negh(s, ta, ta); | ||
195 | } | ||
196 | if (neg_n) { | ||
197 | - gen_vfp_negh(tn, tn); | ||
198 | + gen_vfp_maybe_ah_negh(s, tn, tn); | ||
199 | } | ||
200 | fpst = fpstatus_ptr(FPST_A64_F16); | ||
201 | gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); | ||
202 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, | ||
203 | return true; | ||
204 | } | ||
205 | |||
206 | +static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a, | ||
207 | + const FPScalar1Int *fnormal, | ||
208 | + const FPScalar1Int *fah) | ||
209 | +{ | ||
210 | + return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true); | ||
211 | +} | ||
212 | + | ||
213 | static const FPScalar1Int f_scalar_fmov = { | ||
214 | tcg_gen_mov_i32, | ||
215 | tcg_gen_mov_i32, | ||
216 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fneg = { | ||
217 | gen_vfp_negs, | ||
218 | gen_vfp_negd, | ||
219 | }; | ||
220 | -TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg, true) | ||
221 | +static const FPScalar1Int f_scalar_ah_fneg = { | ||
222 | + gen_vfp_ah_negh, | ||
223 | + gen_vfp_ah_negs, | ||
224 | + gen_vfp_ah_negd, | ||
225 | +}; | ||
226 | +TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg) | ||
227 | |||
228 | typedef struct FPScalar1 { | ||
229 | void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); | ||
230 | -- | ||
231 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | 1 | FPCR.AH == 1 mandates that taking the absolute value of a NaN should |
---|---|---|---|
2 | not change its sign bit. This means we can no longer use | ||
3 | gen_vfp_abs*() everywhere but must instead generate slightly more | ||
4 | complex code when FPCR.AH is set. | ||
2 | 5 | ||
3 | kvm_arch_get_default_type() returns the default KVM type. This hook is | 6 | Implement these semantics for scalar FABS and FABD. This change also |
4 | particularly useful to derive a KVM type that is valid for "none" | 7 | affects all other instructions whose psuedocode calls FPAbs(); we |
5 | machine model, which is used by libvirt to probe the availability of | 8 | will extend the change to those instructions in following commits. |
6 | KVM. | ||
7 | 9 | ||
8 | For MIPS, the existing mips_kvm_type() is reused. This function ensures | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
9 | the availability of VZ which is mandatory to use KVM on the current | 11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
10 | QEMU. | 12 | --- |
13 | target/arm/tcg/translate-a64.c | 69 +++++++++++++++++++++++++++++++++- | ||
14 | 1 file changed, 67 insertions(+), 2 deletions(-) | ||
11 | 15 | ||
12 | Cc: qemu-stable@nongnu.org | 16 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
13 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
14 | Message-id: 20230727073134.134102-2-akihiko.odaki@daynix.com | ||
15 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
16 | [PMM: added doc comment for new function] | ||
17 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
18 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
19 | --- | ||
20 | include/sysemu/kvm.h | 2 ++ | ||
21 | target/mips/kvm_mips.h | 9 --------- | ||
22 | accel/kvm/kvm-all.c | 4 +++- | ||
23 | hw/mips/loongson3_virt.c | 2 -- | ||
24 | target/arm/kvm.c | 5 +++++ | ||
25 | target/i386/kvm/kvm.c | 5 +++++ | ||
26 | target/mips/kvm.c | 2 +- | ||
27 | target/ppc/kvm.c | 5 +++++ | ||
28 | target/riscv/kvm.c | 5 +++++ | ||
29 | target/s390x/kvm/kvm.c | 5 +++++ | ||
30 | 10 files changed, 31 insertions(+), 13 deletions(-) | ||
31 | |||
32 | diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
34 | --- a/include/sysemu/kvm.h | 18 | --- a/target/arm/tcg/translate-a64.c |
35 | +++ b/include/sysemu/kvm.h | 19 | +++ b/target/arm/tcg/translate-a64.c |
36 | @@ -XXX,XX +XXX,XX @@ int kvm_arch_get_registers(CPUState *cpu); | 20 | @@ -XXX,XX +XXX,XX @@ static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s) |
37 | 21 | s, chs_s); | |
38 | int kvm_arch_put_registers(CPUState *cpu, int level); | 22 | } |
39 | 23 | ||
40 | +int kvm_arch_get_default_type(MachineState *ms); | 24 | +/* |
25 | + * These functions implement | ||
26 | + * d = floatN_is_any_nan(s) ? s : floatN_abs(s) | ||
27 | + * which for float32 is | ||
28 | + * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31)) | ||
29 | + * and similarly for the other float sizes. | ||
30 | + */ | ||
31 | +static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s) | ||
32 | +{ | ||
33 | + TCGv_i32 abs_s = tcg_temp_new_i32(); | ||
41 | + | 34 | + |
42 | int kvm_arch_init(MachineState *ms, KVMState *s); | 35 | + gen_vfp_absh(abs_s, s); |
43 | 36 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, | |
44 | int kvm_arch_init_vcpu(CPUState *cpu); | 37 | + abs_s, tcg_constant_i32(0x7c00), |
45 | diff --git a/target/mips/kvm_mips.h b/target/mips/kvm_mips.h | 38 | + s, abs_s); |
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/target/mips/kvm_mips.h | ||
48 | +++ b/target/mips/kvm_mips.h | ||
49 | @@ -XXX,XX +XXX,XX @@ void kvm_mips_reset_vcpu(MIPSCPU *cpu); | ||
50 | int kvm_mips_set_interrupt(MIPSCPU *cpu, int irq, int level); | ||
51 | int kvm_mips_set_ipi_interrupt(MIPSCPU *cpu, int irq, int level); | ||
52 | |||
53 | -#ifdef CONFIG_KVM | ||
54 | -int mips_kvm_type(MachineState *machine, const char *vm_type); | ||
55 | -#else | ||
56 | -static inline int mips_kvm_type(MachineState *machine, const char *vm_type) | ||
57 | -{ | ||
58 | - return 0; | ||
59 | -} | ||
60 | -#endif | ||
61 | - | ||
62 | #endif /* KVM_MIPS_H */ | ||
63 | diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c | ||
64 | index XXXXXXX..XXXXXXX 100644 | ||
65 | --- a/accel/kvm/kvm-all.c | ||
66 | +++ b/accel/kvm/kvm-all.c | ||
67 | @@ -XXX,XX +XXX,XX @@ static int kvm_init(MachineState *ms) | ||
68 | KVMState *s; | ||
69 | const KVMCapabilityInfo *missing_cap; | ||
70 | int ret; | ||
71 | - int type = 0; | ||
72 | + int type; | ||
73 | uint64_t dirty_log_manual_caps; | ||
74 | |||
75 | qemu_mutex_init(&kml_slots_lock); | ||
76 | @@ -XXX,XX +XXX,XX @@ static int kvm_init(MachineState *ms) | ||
77 | type = mc->kvm_type(ms, kvm_type); | ||
78 | } else if (mc->kvm_type) { | ||
79 | type = mc->kvm_type(ms, NULL); | ||
80 | + } else { | ||
81 | + type = kvm_arch_get_default_type(ms); | ||
82 | } | ||
83 | |||
84 | do { | ||
85 | diff --git a/hw/mips/loongson3_virt.c b/hw/mips/loongson3_virt.c | ||
86 | index XXXXXXX..XXXXXXX 100644 | ||
87 | --- a/hw/mips/loongson3_virt.c | ||
88 | +++ b/hw/mips/loongson3_virt.c | ||
89 | @@ -XXX,XX +XXX,XX @@ | ||
90 | #include "qemu/datadir.h" | ||
91 | #include "qapi/error.h" | ||
92 | #include "elf.h" | ||
93 | -#include "kvm_mips.h" | ||
94 | #include "hw/char/serial.h" | ||
95 | #include "hw/intc/loongson_liointc.h" | ||
96 | #include "hw/mips/mips.h" | ||
97 | @@ -XXX,XX +XXX,XX @@ static void loongson3v_machine_class_init(ObjectClass *oc, void *data) | ||
98 | mc->max_cpus = LOONGSON_MAX_VCPUS; | ||
99 | mc->default_ram_id = "loongson3.highram"; | ||
100 | mc->default_ram_size = 1600 * MiB; | ||
101 | - mc->kvm_type = mips_kvm_type; | ||
102 | mc->minimum_page_bits = 14; | ||
103 | mc->default_nic = "virtio-net-pci"; | ||
104 | } | ||
105 | diff --git a/target/arm/kvm.c b/target/arm/kvm.c | ||
106 | index XXXXXXX..XXXXXXX 100644 | ||
107 | --- a/target/arm/kvm.c | ||
108 | +++ b/target/arm/kvm.c | ||
109 | @@ -XXX,XX +XXX,XX @@ int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa) | ||
110 | return ret > 0 ? ret : 40; | ||
111 | } | ||
112 | |||
113 | +int kvm_arch_get_default_type(MachineState *ms) | ||
114 | +{ | ||
115 | + return 0; | ||
116 | +} | 39 | +} |
117 | + | 40 | + |
118 | int kvm_arch_init(MachineState *ms, KVMState *s) | 41 | +static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s) |
119 | { | ||
120 | int ret = 0; | ||
121 | diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c | ||
122 | index XXXXXXX..XXXXXXX 100644 | ||
123 | --- a/target/i386/kvm/kvm.c | ||
124 | +++ b/target/i386/kvm/kvm.c | ||
125 | @@ -XXX,XX +XXX,XX @@ static void register_smram_listener(Notifier *n, void *unused) | ||
126 | &smram_address_space, 1, "kvm-smram"); | ||
127 | } | ||
128 | |||
129 | +int kvm_arch_get_default_type(MachineState *ms) | ||
130 | +{ | 42 | +{ |
131 | + return 0; | 43 | + TCGv_i32 abs_s = tcg_temp_new_i32(); |
44 | + | ||
45 | + gen_vfp_abss(abs_s, s); | ||
46 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, | ||
47 | + abs_s, tcg_constant_i32(0x7f800000UL), | ||
48 | + s, abs_s); | ||
132 | +} | 49 | +} |
133 | + | 50 | + |
134 | int kvm_arch_init(MachineState *ms, KVMState *s) | 51 | +static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s) |
135 | { | ||
136 | uint64_t identity_base = 0xfffbc000; | ||
137 | diff --git a/target/mips/kvm.c b/target/mips/kvm.c | ||
138 | index XXXXXXX..XXXXXXX 100644 | ||
139 | --- a/target/mips/kvm.c | ||
140 | +++ b/target/mips/kvm.c | ||
141 | @@ -XXX,XX +XXX,XX @@ int kvm_arch_msi_data_to_gsi(uint32_t data) | ||
142 | abort(); | ||
143 | } | ||
144 | |||
145 | -int mips_kvm_type(MachineState *machine, const char *vm_type) | ||
146 | +int kvm_arch_get_default_type(MachineState *machine) | ||
147 | { | ||
148 | #if defined(KVM_CAP_MIPS_VZ) | ||
149 | int r; | ||
150 | diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c | ||
151 | index XXXXXXX..XXXXXXX 100644 | ||
152 | --- a/target/ppc/kvm.c | ||
153 | +++ b/target/ppc/kvm.c | ||
154 | @@ -XXX,XX +XXX,XX @@ static int kvm_ppc_register_host_cpu_type(void); | ||
155 | static void kvmppc_get_cpu_characteristics(KVMState *s); | ||
156 | static int kvmppc_get_dec_bits(void); | ||
157 | |||
158 | +int kvm_arch_get_default_type(MachineState *ms) | ||
159 | +{ | 52 | +{ |
160 | + return 0; | 53 | + TCGv_i64 abs_s = tcg_temp_new_i64(); |
54 | + | ||
55 | + gen_vfp_absd(abs_s, s); | ||
56 | + tcg_gen_movcond_i64(TCG_COND_GTU, d, | ||
57 | + abs_s, tcg_constant_i64(0x7ff0000000000000ULL), | ||
58 | + s, abs_s); | ||
161 | +} | 59 | +} |
162 | + | 60 | + |
163 | int kvm_arch_init(MachineState *ms, KVMState *s) | 61 | static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) |
164 | { | 62 | { |
165 | cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); | 63 | if (dc->fpcr_ah) { |
166 | diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c | 64 | @@ -XXX,XX +XXX,XX @@ static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) |
167 | index XXXXXXX..XXXXXXX 100644 | 65 | gen_vfp_absd(d, d); |
168 | --- a/target/riscv/kvm.c | ||
169 | +++ b/target/riscv/kvm.c | ||
170 | @@ -XXX,XX +XXX,XX @@ int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, | ||
171 | return 0; | ||
172 | } | 66 | } |
173 | 67 | ||
174 | +int kvm_arch_get_default_type(MachineState *ms) | 68 | +static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) |
175 | +{ | 69 | +{ |
176 | + return 0; | 70 | + gen_helper_vfp_subh(d, n, m, s); |
71 | + gen_vfp_ah_absh(d, d); | ||
177 | +} | 72 | +} |
178 | + | 73 | + |
179 | int kvm_arch_init(MachineState *ms, KVMState *s) | 74 | +static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) |
180 | { | ||
181 | return 0; | ||
182 | diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c | ||
183 | index XXXXXXX..XXXXXXX 100644 | ||
184 | --- a/target/s390x/kvm/kvm.c | ||
185 | +++ b/target/s390x/kvm/kvm.c | ||
186 | @@ -XXX,XX +XXX,XX @@ static void ccw_machine_class_foreach(ObjectClass *oc, void *opaque) | ||
187 | mc->default_cpu_type = S390_CPU_TYPE_NAME("host"); | ||
188 | } | ||
189 | |||
190 | +int kvm_arch_get_default_type(MachineState *ms) | ||
191 | +{ | 75 | +{ |
192 | + return 0; | 76 | + gen_helper_vfp_subs(d, n, m, s); |
77 | + gen_vfp_ah_abss(d, d); | ||
193 | +} | 78 | +} |
194 | + | 79 | + |
195 | int kvm_arch_init(MachineState *ms, KVMState *s) | 80 | +static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) |
196 | { | 81 | +{ |
197 | object_class_foreach(ccw_machine_class_foreach, TYPE_S390_CCW_MACHINE, | 82 | + gen_helper_vfp_subd(d, n, m, s); |
83 | + gen_vfp_ah_absd(d, d); | ||
84 | +} | ||
85 | + | ||
86 | static const FPScalar f_scalar_fabd = { | ||
87 | gen_fabd_h, | ||
88 | gen_fabd_s, | ||
89 | gen_fabd_d, | ||
90 | }; | ||
91 | -TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd, a->rn) | ||
92 | +static const FPScalar f_scalar_ah_fabd = { | ||
93 | + gen_fabd_ah_h, | ||
94 | + gen_fabd_ah_s, | ||
95 | + gen_fabd_ah_d, | ||
96 | +}; | ||
97 | +TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn) | ||
98 | |||
99 | static const FPScalar f_scalar_frecps = { | ||
100 | gen_helper_recpsf_f16, | ||
101 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fabs = { | ||
102 | gen_vfp_abss, | ||
103 | gen_vfp_absd, | ||
104 | }; | ||
105 | -TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs, true) | ||
106 | +static const FPScalar1Int f_scalar_ah_fabs = { | ||
107 | + gen_vfp_ah_absh, | ||
108 | + gen_vfp_ah_abss, | ||
109 | + gen_vfp_ah_absd, | ||
110 | +}; | ||
111 | +TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs) | ||
112 | |||
113 | static const FPScalar1Int f_scalar_fneg = { | ||
114 | gen_vfp_negh, | ||
198 | -- | 115 | -- |
199 | 2.34.1 | 116 | 2.34.1 |
200 | |||
201 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Split the handling of vector FABD so that it calls a different set | ||
2 | of helpers when FPCR.AH is 1, which implement the "no negation of | ||
3 | the sign of a NaN" semantics. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/helper.h | 4 ++++ | ||
9 | target/arm/tcg/translate-a64.c | 7 ++++++- | ||
10 | target/arm/tcg/vec_helper.c | 23 +++++++++++++++++++++++ | ||
11 | 3 files changed, 33 insertions(+), 1 deletion(-) | ||
12 | |||
13 | diff --git a/target/arm/helper.h b/target/arm/helper.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/arm/helper.h | ||
16 | +++ b/target/arm/helper.h | ||
17 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
18 | DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
19 | DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
20 | |||
21 | +DEF_HELPER_FLAGS_5(gvec_ah_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_5(gvec_ah_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
23 | +DEF_HELPER_FLAGS_5(gvec_ah_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
24 | + | ||
25 | DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
26 | DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
27 | DEF_HELPER_FLAGS_5(gvec_fceq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
28 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/arm/tcg/translate-a64.c | ||
31 | +++ b/target/arm/tcg/translate-a64.c | ||
32 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { | ||
33 | gen_helper_gvec_fabd_s, | ||
34 | gen_helper_gvec_fabd_d, | ||
35 | }; | ||
36 | -TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd) | ||
37 | +static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = { | ||
38 | + gen_helper_gvec_ah_fabd_h, | ||
39 | + gen_helper_gvec_ah_fabd_s, | ||
40 | + gen_helper_gvec_ah_fabd_d, | ||
41 | +}; | ||
42 | +TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd) | ||
43 | |||
44 | static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { | ||
45 | gen_helper_gvec_recps_h, | ||
46 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/arm/tcg/vec_helper.c | ||
49 | +++ b/target/arm/tcg/vec_helper.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static float64 float64_abd(float64 op1, float64 op2, float_status *stat) | ||
51 | return float64_abs(float64_sub(op1, op2, stat)); | ||
52 | } | ||
53 | |||
54 | +/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */ | ||
55 | +static float16 float16_ah_abd(float16 op1, float16 op2, float_status *stat) | ||
56 | +{ | ||
57 | + float16 r = float16_sub(op1, op2, stat); | ||
58 | + return float16_is_any_nan(r) ? r : float16_abs(r); | ||
59 | +} | ||
60 | + | ||
61 | +static float32 float32_ah_abd(float32 op1, float32 op2, float_status *stat) | ||
62 | +{ | ||
63 | + float32 r = float32_sub(op1, op2, stat); | ||
64 | + return float32_is_any_nan(r) ? r : float32_abs(r); | ||
65 | +} | ||
66 | + | ||
67 | +static float64 float64_ah_abd(float64 op1, float64 op2, float_status *stat) | ||
68 | +{ | ||
69 | + float64 r = float64_sub(op1, op2, stat); | ||
70 | + return float64_is_any_nan(r) ? r : float64_abs(r); | ||
71 | +} | ||
72 | + | ||
73 | /* | ||
74 | * Reciprocal step. These are the AArch32 version which uses a | ||
75 | * non-fused multiply-and-subtract. | ||
76 | @@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_fabd_h, float16_abd, float16) | ||
77 | DO_3OP(gvec_fabd_s, float32_abd, float32) | ||
78 | DO_3OP(gvec_fabd_d, float64_abd, float64) | ||
79 | |||
80 | +DO_3OP(gvec_ah_fabd_h, float16_ah_abd, float16) | ||
81 | +DO_3OP(gvec_ah_fabd_s, float32_ah_abd, float32) | ||
82 | +DO_3OP(gvec_ah_fabd_d, float64_ah_abd, float64) | ||
83 | + | ||
84 | DO_3OP(gvec_fceq_h, float16_ceq, float16) | ||
85 | DO_3OP(gvec_fceq_s, float32_ceq, float32) | ||
86 | DO_3OP(gvec_fceq_d, float64_ceq, float64) | ||
87 | -- | ||
88 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Make SVE FNEG honour the FPCR.AH "don't negate the sign of a NaN" | ||
2 | semantics. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 4 ++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 7 ++++++- | ||
10 | 3 files changed, 18 insertions(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
17 | DEF_HELPER_FLAGS_4(sve_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
18 | DEF_HELPER_FLAGS_4(sve_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_4(sve_ah_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
21 | +DEF_HELPER_FLAGS_4(sve_ah_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
22 | +DEF_HELPER_FLAGS_4(sve_ah_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
23 | + | ||
24 | DEF_HELPER_FLAGS_4(sve_not_zpz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
25 | DEF_HELPER_FLAGS_4(sve_not_zpz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
26 | DEF_HELPER_FLAGS_4(sve_not_zpz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
27 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/arm/tcg/sve_helper.c | ||
30 | +++ b/target/arm/tcg/sve_helper.c | ||
31 | @@ -XXX,XX +XXX,XX @@ DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG) | ||
32 | DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG) | ||
33 | DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG) | ||
34 | |||
35 | +#define DO_AH_FNEG_H(N) (float16_is_any_nan(N) ? (N) : DO_FNEG(N)) | ||
36 | +#define DO_AH_FNEG_S(N) (float32_is_any_nan(N) ? (N) : DO_FNEG(N)) | ||
37 | +#define DO_AH_FNEG_D(N) (float64_is_any_nan(N) ? (N) : DO_FNEG(N)) | ||
38 | + | ||
39 | +DO_ZPZ(sve_ah_fneg_h, uint16_t, H1_2, DO_AH_FNEG_H) | ||
40 | +DO_ZPZ(sve_ah_fneg_s, uint32_t, H1_4, DO_AH_FNEG_S) | ||
41 | +DO_ZPZ_D(sve_ah_fneg_d, uint64_t, DO_AH_FNEG_D) | ||
42 | + | ||
43 | #define DO_NOT(N) (~N) | ||
44 | |||
45 | DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT) | ||
46 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/arm/tcg/translate-sve.c | ||
49 | +++ b/target/arm/tcg/translate-sve.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const fneg_fns[4] = { | ||
51 | NULL, gen_helper_sve_fneg_h, | ||
52 | gen_helper_sve_fneg_s, gen_helper_sve_fneg_d, | ||
53 | }; | ||
54 | -TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0) | ||
55 | +static gen_helper_gvec_3 * const fneg_ah_fns[4] = { | ||
56 | + NULL, gen_helper_sve_ah_fneg_h, | ||
57 | + gen_helper_sve_ah_fneg_s, gen_helper_sve_ah_fneg_d, | ||
58 | +}; | ||
59 | +TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, | ||
60 | + s->fpcr_ah ? fneg_ah_fns[a->esz] : fneg_fns[a->esz], a, 0) | ||
61 | |||
62 | static gen_helper_gvec_3 * const sxtb_fns[4] = { | ||
63 | NULL, gen_helper_sve_sxtb_h, | ||
64 | -- | ||
65 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Make SVE FABS honour the FPCR.AH "don't negate the sign of a NaN" | ||
2 | semantics. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 4 ++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 7 ++++++- | ||
10 | 3 files changed, 18 insertions(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
17 | DEF_HELPER_FLAGS_4(sve_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
18 | DEF_HELPER_FLAGS_4(sve_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_4(sve_ah_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
21 | +DEF_HELPER_FLAGS_4(sve_ah_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
22 | +DEF_HELPER_FLAGS_4(sve_ah_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
23 | + | ||
24 | DEF_HELPER_FLAGS_4(sve_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
25 | DEF_HELPER_FLAGS_4(sve_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
26 | DEF_HELPER_FLAGS_4(sve_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
27 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/arm/tcg/sve_helper.c | ||
30 | +++ b/target/arm/tcg/sve_helper.c | ||
31 | @@ -XXX,XX +XXX,XX @@ DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS) | ||
32 | DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS) | ||
33 | DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS) | ||
34 | |||
35 | +#define DO_AH_FABS_H(N) (float16_is_any_nan(N) ? (N) : DO_FABS(N)) | ||
36 | +#define DO_AH_FABS_S(N) (float32_is_any_nan(N) ? (N) : DO_FABS(N)) | ||
37 | +#define DO_AH_FABS_D(N) (float64_is_any_nan(N) ? (N) : DO_FABS(N)) | ||
38 | + | ||
39 | +DO_ZPZ(sve_ah_fabs_h, uint16_t, H1_2, DO_AH_FABS_H) | ||
40 | +DO_ZPZ(sve_ah_fabs_s, uint32_t, H1_4, DO_AH_FABS_S) | ||
41 | +DO_ZPZ_D(sve_ah_fabs_d, uint64_t, DO_AH_FABS_D) | ||
42 | + | ||
43 | #define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1)) | ||
44 | |||
45 | DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG) | ||
46 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/arm/tcg/translate-sve.c | ||
49 | +++ b/target/arm/tcg/translate-sve.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const fabs_fns[4] = { | ||
51 | NULL, gen_helper_sve_fabs_h, | ||
52 | gen_helper_sve_fabs_s, gen_helper_sve_fabs_d, | ||
53 | }; | ||
54 | -TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0) | ||
55 | +static gen_helper_gvec_3 * const fabs_ah_fns[4] = { | ||
56 | + NULL, gen_helper_sve_ah_fabs_h, | ||
57 | + gen_helper_sve_ah_fabs_s, gen_helper_sve_ah_fabs_d, | ||
58 | +}; | ||
59 | +TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, | ||
60 | + s->fpcr_ah ? fabs_ah_fns[a->esz] : fabs_fns[a->esz], a, 0) | ||
61 | |||
62 | static gen_helper_gvec_3 * const fneg_fns[4] = { | ||
63 | NULL, gen_helper_sve_fneg_h, | ||
64 | -- | ||
65 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Make the SVE FABD insn honour the FPCR.AH "don't negate the sign | ||
2 | of a NaN" semantics. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 7 +++++++ | ||
8 | target/arm/tcg/sve_helper.c | 22 ++++++++++++++++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 2 +- | ||
10 | 3 files changed, 30 insertions(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fabd_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_6(sve_fabd_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_6(sve_ah_fabd_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_6(sve_ah_fabd_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_6(sve_ah_fabd_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | DEF_HELPER_FLAGS_6(sve_fscalbn_h, TCG_CALL_NO_RWG, | ||
28 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
29 | DEF_HELPER_FLAGS_6(sve_fscalbn_s, TCG_CALL_NO_RWG, | ||
30 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/target/arm/tcg/sve_helper.c | ||
33 | +++ b/target/arm/tcg/sve_helper.c | ||
34 | @@ -XXX,XX +XXX,XX @@ static inline float64 abd_d(float64 a, float64 b, float_status *s) | ||
35 | return float64_abs(float64_sub(a, b, s)); | ||
36 | } | ||
37 | |||
38 | +/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */ | ||
39 | +static float16 ah_abd_h(float16 op1, float16 op2, float_status *stat) | ||
40 | +{ | ||
41 | + float16 r = float16_sub(op1, op2, stat); | ||
42 | + return float16_is_any_nan(r) ? r : float16_abs(r); | ||
43 | +} | ||
44 | + | ||
45 | +static float32 ah_abd_s(float32 op1, float32 op2, float_status *stat) | ||
46 | +{ | ||
47 | + float32 r = float32_sub(op1, op2, stat); | ||
48 | + return float32_is_any_nan(r) ? r : float32_abs(r); | ||
49 | +} | ||
50 | + | ||
51 | +static float64 ah_abd_d(float64 op1, float64 op2, float_status *stat) | ||
52 | +{ | ||
53 | + float64 r = float64_sub(op1, op2, stat); | ||
54 | + return float64_is_any_nan(r) ? r : float64_abs(r); | ||
55 | +} | ||
56 | + | ||
57 | DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h) | ||
58 | DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s) | ||
59 | DO_ZPZZ_FP(sve_fabd_d, uint64_t, H1_8, abd_d) | ||
60 | +DO_ZPZZ_FP(sve_ah_fabd_h, uint16_t, H1_2, ah_abd_h) | ||
61 | +DO_ZPZZ_FP(sve_ah_fabd_s, uint32_t, H1_4, ah_abd_s) | ||
62 | +DO_ZPZZ_FP(sve_ah_fabd_d, uint64_t, H1_8, ah_abd_d) | ||
63 | |||
64 | static inline float64 scalbn_d(float64 a, int64_t b, float_status *s) | ||
65 | { | ||
66 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
67 | index XXXXXXX..XXXXXXX 100644 | ||
68 | --- a/target/arm/tcg/translate-sve.c | ||
69 | +++ b/target/arm/tcg/translate-sve.c | ||
70 | @@ -XXX,XX +XXX,XX @@ DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) | ||
71 | DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) | ||
72 | DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) | ||
73 | DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) | ||
74 | -DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) | ||
75 | +DO_ZPZZ_AH_FP(FABD, aa64_sve, sve_fabd, sve_ah_fabd) | ||
76 | DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) | ||
77 | DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) | ||
78 | DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx) | ||
79 | -- | ||
80 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The negation steps in FCADD must honour FPCR.AH's "don't change the | ||
2 | sign of a NaN" semantics. Implement this in the same way we did for | ||
3 | the base ASIMD FCADD, by encoding FPCR.AH into the SIMD data field | ||
4 | passed to the helper and using that to decide whether to negate the | ||
5 | values. | ||
1 | 6 | ||
7 | The construction of neg_imag and neg_real were done to make it easy | ||
8 | to apply both in parallel with two simple logical operations. This | ||
9 | changed with FPCR.AH, which is more complex than that. Switch to | ||
10 | an approach that follows the pseudocode more closely, by extracting | ||
11 | the 'rot=1' parameter from the SIMD data field and changing the | ||
12 | sign of the appropriate input value. | ||
13 | |||
14 | Note that there was a naming issue with neg_imag and neg_real. | ||
15 | They were named backward, with neg_imag being non-zero for rot=1, | ||
16 | and vice versa. This was combined with reversed usage within the | ||
17 | loop, so that the negation in the end turned out correct. | ||
18 | |||
19 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
20 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
21 | --- | ||
22 | target/arm/tcg/vec_internal.h | 17 ++++++++++++++ | ||
23 | target/arm/tcg/sve_helper.c | 42 ++++++++++++++++++++++++---------- | ||
24 | target/arm/tcg/translate-sve.c | 2 +- | ||
25 | 3 files changed, 48 insertions(+), 13 deletions(-) | ||
26 | |||
27 | diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/arm/tcg/vec_internal.h | ||
30 | +++ b/target/arm/tcg/vec_internal.h | ||
31 | @@ -XXX,XX +XXX,XX @@ | ||
32 | #ifndef TARGET_ARM_VEC_INTERNAL_H | ||
33 | #define TARGET_ARM_VEC_INTERNAL_H | ||
34 | |||
35 | +#include "fpu/softfloat.h" | ||
36 | + | ||
37 | /* | ||
38 | * Note that vector data is stored in host-endian 64-bit chunks, | ||
39 | * so addressing units smaller than that needs a host-endian fixup. | ||
40 | @@ -XXX,XX +XXX,XX @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, | ||
41 | */ | ||
42 | bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp); | ||
43 | |||
44 | +static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah) | ||
45 | +{ | ||
46 | + return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a); | ||
47 | +} | ||
48 | + | ||
49 | +static inline float32 float32_maybe_ah_chs(float32 a, bool fpcr_ah) | ||
50 | +{ | ||
51 | + return fpcr_ah && float32_is_any_nan(a) ? a : float32_chs(a); | ||
52 | +} | ||
53 | + | ||
54 | +static inline float64 float64_maybe_ah_chs(float64 a, bool fpcr_ah) | ||
55 | +{ | ||
56 | + return fpcr_ah && float64_is_any_nan(a) ? a : float64_chs(a); | ||
57 | +} | ||
58 | + | ||
59 | #endif /* TARGET_ARM_VEC_INTERNAL_H */ | ||
60 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/target/arm/tcg/sve_helper.c | ||
63 | +++ b/target/arm/tcg/sve_helper.c | ||
64 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, | ||
65 | { | ||
66 | intptr_t j, i = simd_oprsz(desc); | ||
67 | uint64_t *g = vg; | ||
68 | - float16 neg_imag = float16_set_sign(0, simd_data(desc)); | ||
69 | - float16 neg_real = float16_chs(neg_imag); | ||
70 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
71 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
72 | |||
73 | do { | ||
74 | uint64_t pg = g[(i - 1) >> 6]; | ||
75 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, | ||
76 | i -= 2 * sizeof(float16); | ||
77 | |||
78 | e0 = *(float16 *)(vn + H1_2(i)); | ||
79 | - e1 = *(float16 *)(vm + H1_2(j)) ^ neg_real; | ||
80 | + e1 = *(float16 *)(vm + H1_2(j)); | ||
81 | e2 = *(float16 *)(vn + H1_2(j)); | ||
82 | - e3 = *(float16 *)(vm + H1_2(i)) ^ neg_imag; | ||
83 | + e3 = *(float16 *)(vm + H1_2(i)); | ||
84 | + | ||
85 | + if (rot) { | ||
86 | + e3 = float16_maybe_ah_chs(e3, fpcr_ah); | ||
87 | + } else { | ||
88 | + e1 = float16_maybe_ah_chs(e1, fpcr_ah); | ||
89 | + } | ||
90 | |||
91 | if (likely((pg >> (i & 63)) & 1)) { | ||
92 | *(float16 *)(vd + H1_2(i)) = float16_add(e0, e1, s); | ||
93 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, | ||
94 | { | ||
95 | intptr_t j, i = simd_oprsz(desc); | ||
96 | uint64_t *g = vg; | ||
97 | - float32 neg_imag = float32_set_sign(0, simd_data(desc)); | ||
98 | - float32 neg_real = float32_chs(neg_imag); | ||
99 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
100 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
101 | |||
102 | do { | ||
103 | uint64_t pg = g[(i - 1) >> 6]; | ||
104 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, | ||
105 | i -= 2 * sizeof(float32); | ||
106 | |||
107 | e0 = *(float32 *)(vn + H1_2(i)); | ||
108 | - e1 = *(float32 *)(vm + H1_2(j)) ^ neg_real; | ||
109 | + e1 = *(float32 *)(vm + H1_2(j)); | ||
110 | e2 = *(float32 *)(vn + H1_2(j)); | ||
111 | - e3 = *(float32 *)(vm + H1_2(i)) ^ neg_imag; | ||
112 | + e3 = *(float32 *)(vm + H1_2(i)); | ||
113 | + | ||
114 | + if (rot) { | ||
115 | + e3 = float32_maybe_ah_chs(e3, fpcr_ah); | ||
116 | + } else { | ||
117 | + e1 = float32_maybe_ah_chs(e1, fpcr_ah); | ||
118 | + } | ||
119 | |||
120 | if (likely((pg >> (i & 63)) & 1)) { | ||
121 | *(float32 *)(vd + H1_2(i)) = float32_add(e0, e1, s); | ||
122 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, | ||
123 | { | ||
124 | intptr_t j, i = simd_oprsz(desc); | ||
125 | uint64_t *g = vg; | ||
126 | - float64 neg_imag = float64_set_sign(0, simd_data(desc)); | ||
127 | - float64 neg_real = float64_chs(neg_imag); | ||
128 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
129 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
130 | |||
131 | do { | ||
132 | uint64_t pg = g[(i - 1) >> 6]; | ||
133 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, | ||
134 | i -= 2 * sizeof(float64); | ||
135 | |||
136 | e0 = *(float64 *)(vn + H1_2(i)); | ||
137 | - e1 = *(float64 *)(vm + H1_2(j)) ^ neg_real; | ||
138 | + e1 = *(float64 *)(vm + H1_2(j)); | ||
139 | e2 = *(float64 *)(vn + H1_2(j)); | ||
140 | - e3 = *(float64 *)(vm + H1_2(i)) ^ neg_imag; | ||
141 | + e3 = *(float64 *)(vm + H1_2(i)); | ||
142 | + | ||
143 | + if (rot) { | ||
144 | + e3 = float64_maybe_ah_chs(e3, fpcr_ah); | ||
145 | + } else { | ||
146 | + e1 = float64_maybe_ah_chs(e1, fpcr_ah); | ||
147 | + } | ||
148 | |||
149 | if (likely((pg >> (i & 63)) & 1)) { | ||
150 | *(float64 *)(vd + H1_2(i)) = float64_add(e0, e1, s); | ||
151 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
152 | index XXXXXXX..XXXXXXX 100644 | ||
153 | --- a/target/arm/tcg/translate-sve.c | ||
154 | +++ b/target/arm/tcg/translate-sve.c | ||
155 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_4_ptr * const fcadd_fns[] = { | ||
156 | gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d, | ||
157 | }; | ||
158 | TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], | ||
159 | - a->rd, a->rn, a->rm, a->pg, a->rot, | ||
160 | + a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), | ||
161 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
162 | |||
163 | #define DO_FMLA(NAME, name) \ | ||
164 | -- | ||
165 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The negation steps in FCADD must honour FPCR.AH's "don't change the | ||
2 | sign of a NaN" semantics. Implement this by encoding FPCR.AH into | ||
3 | the SIMD data field passed to the helper and using that to decide | ||
4 | whether to negate the values. | ||
1 | 5 | ||
6 | The construction of neg_imag and neg_real were done to make it easy | ||
7 | to apply both in parallel with two simple logical operations. This | ||
8 | changed with FPCR.AH, which is more complex than that. Switch to | ||
9 | an approach closer to the pseudocode, where we extract the rot | ||
10 | parameter from the SIMD data word and negate the appropriate | ||
11 | input value. | ||
12 | |||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
14 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
15 | --- | ||
16 | target/arm/tcg/translate-a64.c | 10 +++++-- | ||
17 | target/arm/tcg/vec_helper.c | 54 +++++++++++++++++++--------------- | ||
18 | 2 files changed, 38 insertions(+), 26 deletions(-) | ||
19 | |||
20 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/tcg/translate-a64.c | ||
23 | +++ b/target/arm/tcg/translate-a64.c | ||
24 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = { | ||
25 | gen_helper_gvec_fcadds, | ||
26 | gen_helper_gvec_fcaddd, | ||
27 | }; | ||
28 | -TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd) | ||
29 | -TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd) | ||
30 | +/* | ||
31 | + * Encode FPCR.AH into the data so the helper knows whether the | ||
32 | + * negations it does should avoid flipping the sign bit on a NaN | ||
33 | + */ | ||
34 | +TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1), | ||
35 | + f_vector_fcadd) | ||
36 | +TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1), | ||
37 | + f_vector_fcadd) | ||
38 | |||
39 | static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) | ||
40 | { | ||
41 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/arm/tcg/vec_helper.c | ||
44 | +++ b/target/arm/tcg/vec_helper.c | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm, | ||
46 | float16 *d = vd; | ||
47 | float16 *n = vn; | ||
48 | float16 *m = vm; | ||
49 | - uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
50 | - uint32_t neg_imag = neg_real ^ 1; | ||
51 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
52 | + bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); | ||
53 | uintptr_t i; | ||
54 | |||
55 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
56 | - neg_real <<= 15; | ||
57 | - neg_imag <<= 15; | ||
58 | - | ||
59 | for (i = 0; i < opr_sz / 2; i += 2) { | ||
60 | float16 e0 = n[H2(i)]; | ||
61 | - float16 e1 = m[H2(i + 1)] ^ neg_imag; | ||
62 | + float16 e1 = m[H2(i + 1)]; | ||
63 | float16 e2 = n[H2(i + 1)]; | ||
64 | - float16 e3 = m[H2(i)] ^ neg_real; | ||
65 | + float16 e3 = m[H2(i)]; | ||
66 | + | ||
67 | + if (rot) { | ||
68 | + e3 = float16_maybe_ah_chs(e3, fpcr_ah); | ||
69 | + } else { | ||
70 | + e1 = float16_maybe_ah_chs(e1, fpcr_ah); | ||
71 | + } | ||
72 | |||
73 | d[H2(i)] = float16_add(e0, e1, fpst); | ||
74 | d[H2(i + 1)] = float16_add(e2, e3, fpst); | ||
75 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcadds)(void *vd, void *vn, void *vm, | ||
76 | float32 *d = vd; | ||
77 | float32 *n = vn; | ||
78 | float32 *m = vm; | ||
79 | - uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
80 | - uint32_t neg_imag = neg_real ^ 1; | ||
81 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
82 | + bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); | ||
83 | uintptr_t i; | ||
84 | |||
85 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
86 | - neg_real <<= 31; | ||
87 | - neg_imag <<= 31; | ||
88 | - | ||
89 | for (i = 0; i < opr_sz / 4; i += 2) { | ||
90 | float32 e0 = n[H4(i)]; | ||
91 | - float32 e1 = m[H4(i + 1)] ^ neg_imag; | ||
92 | + float32 e1 = m[H4(i + 1)]; | ||
93 | float32 e2 = n[H4(i + 1)]; | ||
94 | - float32 e3 = m[H4(i)] ^ neg_real; | ||
95 | + float32 e3 = m[H4(i)]; | ||
96 | + | ||
97 | + if (rot) { | ||
98 | + e3 = float32_maybe_ah_chs(e3, fpcr_ah); | ||
99 | + } else { | ||
100 | + e1 = float32_maybe_ah_chs(e1, fpcr_ah); | ||
101 | + } | ||
102 | |||
103 | d[H4(i)] = float32_add(e0, e1, fpst); | ||
104 | d[H4(i + 1)] = float32_add(e2, e3, fpst); | ||
105 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcaddd)(void *vd, void *vn, void *vm, | ||
106 | float64 *d = vd; | ||
107 | float64 *n = vn; | ||
108 | float64 *m = vm; | ||
109 | - uint64_t neg_real = extract64(desc, SIMD_DATA_SHIFT, 1); | ||
110 | - uint64_t neg_imag = neg_real ^ 1; | ||
111 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
112 | + bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); | ||
113 | uintptr_t i; | ||
114 | |||
115 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
116 | - neg_real <<= 63; | ||
117 | - neg_imag <<= 63; | ||
118 | - | ||
119 | for (i = 0; i < opr_sz / 8; i += 2) { | ||
120 | float64 e0 = n[i]; | ||
121 | - float64 e1 = m[i + 1] ^ neg_imag; | ||
122 | + float64 e1 = m[i + 1]; | ||
123 | float64 e2 = n[i + 1]; | ||
124 | - float64 e3 = m[i] ^ neg_real; | ||
125 | + float64 e3 = m[i]; | ||
126 | + | ||
127 | + if (rot) { | ||
128 | + e3 = float64_maybe_ah_chs(e3, fpcr_ah); | ||
129 | + } else { | ||
130 | + e1 = float64_maybe_ah_chs(e1, fpcr_ah); | ||
131 | + } | ||
132 | |||
133 | d[i] = float64_add(e0, e1, fpst); | ||
134 | d[i + 1] = float64_add(e2, e3, fpst); | ||
135 | -- | ||
136 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle the FPCR.AH semantics that we do not change the sign of an | ||
2 | input NaN in the FRECPS and FRSQRTS scalar insns, by providing | ||
3 | new helper functions that do the CHS part of the operation | ||
4 | differently. | ||
1 | 5 | ||
6 | Since the extra helper functions would be very repetitive if written | ||
7 | out longhand, we condense them and the existing non-AH helpers into | ||
8 | being emitted via macros. | ||
9 | |||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | ||
13 | target/arm/tcg/helper-a64.h | 6 ++ | ||
14 | target/arm/tcg/vec_internal.h | 18 ++++++ | ||
15 | target/arm/tcg/helper-a64.c | 115 ++++++++++++--------------------- | ||
16 | target/arm/tcg/translate-a64.c | 25 +++++-- | ||
17 | 4 files changed, 83 insertions(+), 81 deletions(-) | ||
18 | |||
19 | diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/target/arm/tcg/helper-a64.h | ||
22 | +++ b/target/arm/tcg/helper-a64.h | ||
23 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, fpst) | ||
24 | DEF_HELPER_FLAGS_3(recpsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) | ||
25 | DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) | ||
26 | DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) | ||
27 | +DEF_HELPER_FLAGS_3(recpsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) | ||
28 | +DEF_HELPER_FLAGS_3(recpsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) | ||
29 | +DEF_HELPER_FLAGS_3(recpsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) | ||
30 | DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) | ||
31 | DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) | ||
32 | DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) | ||
33 | +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) | ||
34 | +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) | ||
35 | +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) | ||
36 | DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, fpst) | ||
37 | DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, fpst) | ||
38 | DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, fpst) | ||
39 | diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/target/arm/tcg/vec_internal.h | ||
42 | +++ b/target/arm/tcg/vec_internal.h | ||
43 | @@ -XXX,XX +XXX,XX @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, | ||
44 | */ | ||
45 | bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp); | ||
46 | |||
47 | +/* | ||
48 | + * Negate as for FPCR.AH=1 -- do not negate NaNs. | ||
49 | + */ | ||
50 | +static inline float16 float16_ah_chs(float16 a) | ||
51 | +{ | ||
52 | + return float16_is_any_nan(a) ? a : float16_chs(a); | ||
53 | +} | ||
54 | + | ||
55 | +static inline float32 float32_ah_chs(float32 a) | ||
56 | +{ | ||
57 | + return float32_is_any_nan(a) ? a : float32_chs(a); | ||
58 | +} | ||
59 | + | ||
60 | +static inline float64 float64_ah_chs(float64 a) | ||
61 | +{ | ||
62 | + return float64_is_any_nan(a) ? a : float64_chs(a); | ||
63 | +} | ||
64 | + | ||
65 | static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah) | ||
66 | { | ||
67 | return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a); | ||
68 | diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/target/arm/tcg/helper-a64.c | ||
71 | +++ b/target/arm/tcg/helper-a64.c | ||
72 | @@ -XXX,XX +XXX,XX @@ | ||
73 | #ifdef CONFIG_USER_ONLY | ||
74 | #include "user/page-protection.h" | ||
75 | #endif | ||
76 | +#include "vec_internal.h" | ||
77 | |||
78 | /* C2.4.7 Multiply and divide */ | ||
79 | /* special cases for 0 and LLONG_MIN are mandated by the standard */ | ||
80 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst) | ||
81 | return -float64_lt(b, a, fpst); | ||
82 | } | ||
83 | |||
84 | -/* Reciprocal step and sqrt step. Note that unlike the A32/T32 | ||
85 | +/* | ||
86 | + * Reciprocal step and sqrt step. Note that unlike the A32/T32 | ||
87 | * versions, these do a fully fused multiply-add or | ||
88 | * multiply-add-and-halve. | ||
89 | + * The FPCR.AH == 1 versions need to avoid flipping the sign of NaN. | ||
90 | */ | ||
91 | - | ||
92 | -uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, float_status *fpst) | ||
93 | -{ | ||
94 | - a = float16_squash_input_denormal(a, fpst); | ||
95 | - b = float16_squash_input_denormal(b, fpst); | ||
96 | - | ||
97 | - a = float16_chs(a); | ||
98 | - if ((float16_is_infinity(a) && float16_is_zero(b)) || | ||
99 | - (float16_is_infinity(b) && float16_is_zero(a))) { | ||
100 | - return float16_two; | ||
101 | +#define DO_RECPS(NAME, CTYPE, FLOATTYPE, CHSFN) \ | ||
102 | + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ | ||
103 | + { \ | ||
104 | + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ | ||
105 | + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ | ||
106 | + a = FLOATTYPE ## _ ## CHSFN(a); \ | ||
107 | + if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ | ||
108 | + (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ | ||
109 | + return FLOATTYPE ## _two; \ | ||
110 | + } \ | ||
111 | + return FLOATTYPE ## _muladd(a, b, FLOATTYPE ## _two, 0, fpst); \ | ||
112 | } | ||
113 | - return float16_muladd(a, b, float16_two, 0, fpst); | ||
114 | -} | ||
115 | |||
116 | -float32 HELPER(recpsf_f32)(float32 a, float32 b, float_status *fpst) | ||
117 | -{ | ||
118 | - a = float32_squash_input_denormal(a, fpst); | ||
119 | - b = float32_squash_input_denormal(b, fpst); | ||
120 | +DO_RECPS(recpsf_f16, uint32_t, float16, chs) | ||
121 | +DO_RECPS(recpsf_f32, float32, float32, chs) | ||
122 | +DO_RECPS(recpsf_f64, float64, float64, chs) | ||
123 | +DO_RECPS(recpsf_ah_f16, uint32_t, float16, ah_chs) | ||
124 | +DO_RECPS(recpsf_ah_f32, float32, float32, ah_chs) | ||
125 | +DO_RECPS(recpsf_ah_f64, float64, float64, ah_chs) | ||
126 | |||
127 | - a = float32_chs(a); | ||
128 | - if ((float32_is_infinity(a) && float32_is_zero(b)) || | ||
129 | - (float32_is_infinity(b) && float32_is_zero(a))) { | ||
130 | - return float32_two; | ||
131 | - } | ||
132 | - return float32_muladd(a, b, float32_two, 0, fpst); | ||
133 | -} | ||
134 | +#define DO_RSQRTSF(NAME, CTYPE, FLOATTYPE, CHSFN) \ | ||
135 | + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ | ||
136 | + { \ | ||
137 | + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ | ||
138 | + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ | ||
139 | + a = FLOATTYPE ## _ ## CHSFN(a); \ | ||
140 | + if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ | ||
141 | + (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ | ||
142 | + return FLOATTYPE ## _one_point_five; \ | ||
143 | + } \ | ||
144 | + return FLOATTYPE ## _muladd_scalbn(a, b, FLOATTYPE ## _three, \ | ||
145 | + -1, 0, fpst); \ | ||
146 | + } \ | ||
147 | |||
148 | -float64 HELPER(recpsf_f64)(float64 a, float64 b, float_status *fpst) | ||
149 | -{ | ||
150 | - a = float64_squash_input_denormal(a, fpst); | ||
151 | - b = float64_squash_input_denormal(b, fpst); | ||
152 | - | ||
153 | - a = float64_chs(a); | ||
154 | - if ((float64_is_infinity(a) && float64_is_zero(b)) || | ||
155 | - (float64_is_infinity(b) && float64_is_zero(a))) { | ||
156 | - return float64_two; | ||
157 | - } | ||
158 | - return float64_muladd(a, b, float64_two, 0, fpst); | ||
159 | -} | ||
160 | - | ||
161 | -uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst) | ||
162 | -{ | ||
163 | - a = float16_squash_input_denormal(a, fpst); | ||
164 | - b = float16_squash_input_denormal(b, fpst); | ||
165 | - | ||
166 | - a = float16_chs(a); | ||
167 | - if ((float16_is_infinity(a) && float16_is_zero(b)) || | ||
168 | - (float16_is_infinity(b) && float16_is_zero(a))) { | ||
169 | - return float16_one_point_five; | ||
170 | - } | ||
171 | - return float16_muladd_scalbn(a, b, float16_three, -1, 0, fpst); | ||
172 | -} | ||
173 | - | ||
174 | -float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst) | ||
175 | -{ | ||
176 | - a = float32_squash_input_denormal(a, fpst); | ||
177 | - b = float32_squash_input_denormal(b, fpst); | ||
178 | - | ||
179 | - a = float32_chs(a); | ||
180 | - if ((float32_is_infinity(a) && float32_is_zero(b)) || | ||
181 | - (float32_is_infinity(b) && float32_is_zero(a))) { | ||
182 | - return float32_one_point_five; | ||
183 | - } | ||
184 | - return float32_muladd_scalbn(a, b, float32_three, -1, 0, fpst); | ||
185 | -} | ||
186 | - | ||
187 | -float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst) | ||
188 | -{ | ||
189 | - a = float64_squash_input_denormal(a, fpst); | ||
190 | - b = float64_squash_input_denormal(b, fpst); | ||
191 | - | ||
192 | - a = float64_chs(a); | ||
193 | - if ((float64_is_infinity(a) && float64_is_zero(b)) || | ||
194 | - (float64_is_infinity(b) && float64_is_zero(a))) { | ||
195 | - return float64_one_point_five; | ||
196 | - } | ||
197 | - return float64_muladd_scalbn(a, b, float64_three, -1, 0, fpst); | ||
198 | -} | ||
199 | +DO_RSQRTSF(rsqrtsf_f16, uint32_t, float16, chs) | ||
200 | +DO_RSQRTSF(rsqrtsf_f32, float32, float32, chs) | ||
201 | +DO_RSQRTSF(rsqrtsf_f64, float64, float64, chs) | ||
202 | +DO_RSQRTSF(rsqrtsf_ah_f16, uint32_t, float16, ah_chs) | ||
203 | +DO_RSQRTSF(rsqrtsf_ah_f32, float32, float32, ah_chs) | ||
204 | +DO_RSQRTSF(rsqrtsf_ah_f64, float64, float64, ah_chs) | ||
205 | |||
206 | /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ | ||
207 | uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst) | ||
208 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
209 | index XXXXXXX..XXXXXXX 100644 | ||
210 | --- a/target/arm/tcg/translate-a64.c | ||
211 | +++ b/target/arm/tcg/translate-a64.c | ||
212 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
213 | FPST_A64_F16 : FPST_A64); | ||
214 | } | ||
215 | |||
216 | -static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
217 | - int mergereg) | ||
218 | +static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a, | ||
219 | + const FPScalar *fnormal, const FPScalar *fah, | ||
220 | + int mergereg) | ||
221 | { | ||
222 | - return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, | ||
223 | - select_ah_fpst(s, a->esz)); | ||
224 | + return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal, | ||
225 | + mergereg, select_ah_fpst(s, a->esz)); | ||
226 | } | ||
227 | |||
228 | /* Some insns need to call different helpers when FPCR.AH == 1 */ | ||
229 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_frecps = { | ||
230 | gen_helper_recpsf_f32, | ||
231 | gen_helper_recpsf_f64, | ||
232 | }; | ||
233 | -TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps, a->rn) | ||
234 | +static const FPScalar f_scalar_ah_frecps = { | ||
235 | + gen_helper_recpsf_ah_f16, | ||
236 | + gen_helper_recpsf_ah_f32, | ||
237 | + gen_helper_recpsf_ah_f64, | ||
238 | +}; | ||
239 | +TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a, | ||
240 | + &f_scalar_frecps, &f_scalar_ah_frecps, a->rn) | ||
241 | |||
242 | static const FPScalar f_scalar_frsqrts = { | ||
243 | gen_helper_rsqrtsf_f16, | ||
244 | gen_helper_rsqrtsf_f32, | ||
245 | gen_helper_rsqrtsf_f64, | ||
246 | }; | ||
247 | -TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts, a->rn) | ||
248 | +static const FPScalar f_scalar_ah_frsqrts = { | ||
249 | + gen_helper_rsqrtsf_ah_f16, | ||
250 | + gen_helper_rsqrtsf_ah_f32, | ||
251 | + gen_helper_rsqrtsf_ah_f64, | ||
252 | +}; | ||
253 | +TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a, | ||
254 | + &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn) | ||
255 | |||
256 | static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, | ||
257 | const FPScalar *f, bool swap) | ||
258 | -- | ||
259 | 2.34.1 | diff view generated by jsdifflib |
1 | When the MMU is disabled, data accesses should be Device nGnRnE, | 1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics |
---|---|---|---|
2 | Outer Shareable, Untagged. We handle the other cases from | 2 | in the vector versions of FRECPS and FRSQRTS, by implementing |
3 | AArch64.S1DisabledOutput() correctly but missed this one. | 3 | new vector wrappers that call the _ah_ scalar helpers. |
4 | Device nGnRnE is memattr == 0, so the only part we were missing | ||
5 | was that shareability should be set to 2 for both insn fetches | ||
6 | and data accesses. | ||
7 | 4 | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
10 | Message-id: 20230807141514.19075-13-peter.maydell@linaro.org | ||
11 | --- | 7 | --- |
12 | target/arm/ptw.c | 12 +++++++----- | 8 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ |
13 | 1 file changed, 7 insertions(+), 5 deletions(-) | 9 | target/arm/tcg/translate-a64.c | 21 ++++++++++++++++----- |
10 | target/arm/tcg/translate-sve.c | 7 ++++++- | ||
11 | target/arm/tcg/vec_helper.c | 8 ++++++++ | ||
12 | 4 files changed, 44 insertions(+), 6 deletions(-) | ||
14 | 13 | ||
15 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | 14 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h |
16 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/arm/ptw.c | 16 | --- a/target/arm/tcg/helper-sve.h |
18 | +++ b/target/arm/ptw.c | 17 | +++ b/target/arm/tcg/helper-sve.h |
19 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_disabled(CPUARMState *env, | 18 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_rsqrts_s, TCG_CALL_NO_RWG, |
20 | } | 19 | DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG, |
21 | } | 20 | void, ptr, ptr, ptr, fpst, i32) |
22 | } | 21 | |
23 | - if (memattr == 0 && access_type == MMU_INST_FETCH) { | 22 | +DEF_HELPER_FLAGS_5(gvec_ah_recps_h, TCG_CALL_NO_RWG, |
24 | - if (regime_sctlr(env, mmu_idx) & SCTLR_I) { | 23 | + void, ptr, ptr, ptr, fpst, i32) |
25 | - memattr = 0xee; /* Normal, WT, RA, NT */ | 24 | +DEF_HELPER_FLAGS_5(gvec_ah_recps_s, TCG_CALL_NO_RWG, |
26 | - } else { | 25 | + void, ptr, ptr, ptr, fpst, i32) |
27 | - memattr = 0x44; /* Normal, NC, No */ | 26 | +DEF_HELPER_FLAGS_5(gvec_ah_recps_d, TCG_CALL_NO_RWG, |
28 | + if (memattr == 0) { | 27 | + void, ptr, ptr, ptr, fpst, i32) |
29 | + if (access_type == MMU_INST_FETCH) { | 28 | + |
30 | + if (regime_sctlr(env, mmu_idx) & SCTLR_I) { | 29 | +DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_h, TCG_CALL_NO_RWG, |
31 | + memattr = 0xee; /* Normal, WT, RA, NT */ | 30 | + void, ptr, ptr, ptr, fpst, i32) |
32 | + } else { | 31 | +DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_s, TCG_CALL_NO_RWG, |
33 | + memattr = 0x44; /* Normal, NC, No */ | 32 | + void, ptr, ptr, ptr, fpst, i32) |
34 | + } | 33 | +DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_d, TCG_CALL_NO_RWG, |
35 | } | 34 | + void, ptr, ptr, ptr, fpst, i32) |
36 | shareability = 2; /* outer shareable */ | 35 | + |
37 | } | 36 | DEF_HELPER_FLAGS_5(gvec_ah_fmax_h, TCG_CALL_NO_RWG, |
37 | void, ptr, ptr, ptr, fpst, i32) | ||
38 | DEF_HELPER_FLAGS_5(gvec_ah_fmax_s, TCG_CALL_NO_RWG, | ||
39 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/target/arm/tcg/translate-a64.c | ||
42 | +++ b/target/arm/tcg/translate-a64.c | ||
43 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, | ||
44 | return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal); | ||
45 | } | ||
46 | |||
47 | -static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data, | ||
48 | - gen_helper_gvec_3_ptr * const f[3]) | ||
49 | +static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data, | ||
50 | + gen_helper_gvec_3_ptr * const fnormal[3], | ||
51 | + gen_helper_gvec_3_ptr * const fah[3]) | ||
52 | { | ||
53 | - return do_fp3_vector_with_fpsttype(s, a, data, f, | ||
54 | + return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal, | ||
55 | select_ah_fpst(s, a->esz)); | ||
56 | } | ||
57 | |||
58 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { | ||
59 | gen_helper_gvec_recps_s, | ||
60 | gen_helper_gvec_recps_d, | ||
61 | }; | ||
62 | -TRANS(FRECPS_v, do_fp3_vector_ah, a, 0, f_vector_frecps) | ||
63 | +static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = { | ||
64 | + gen_helper_gvec_ah_recps_h, | ||
65 | + gen_helper_gvec_ah_recps_s, | ||
66 | + gen_helper_gvec_ah_recps_d, | ||
67 | +}; | ||
68 | +TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps) | ||
69 | |||
70 | static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { | ||
71 | gen_helper_gvec_rsqrts_h, | ||
72 | gen_helper_gvec_rsqrts_s, | ||
73 | gen_helper_gvec_rsqrts_d, | ||
74 | }; | ||
75 | -TRANS(FRSQRTS_v, do_fp3_vector_ah, a, 0, f_vector_frsqrts) | ||
76 | +static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = { | ||
77 | + gen_helper_gvec_ah_rsqrts_h, | ||
78 | + gen_helper_gvec_ah_rsqrts_s, | ||
79 | + gen_helper_gvec_ah_rsqrts_d, | ||
80 | +}; | ||
81 | +TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts) | ||
82 | |||
83 | static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { | ||
84 | gen_helper_gvec_faddp_h, | ||
85 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
86 | index XXXXXXX..XXXXXXX 100644 | ||
87 | --- a/target/arm/tcg/translate-sve.c | ||
88 | +++ b/target/arm/tcg/translate-sve.c | ||
89 | @@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) | ||
90 | NULL, gen_helper_gvec_##name##_h, \ | ||
91 | gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ | ||
92 | }; \ | ||
93 | - TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, name##_fns[a->esz], a, 0) | ||
94 | + static gen_helper_gvec_3_ptr * const name##_ah_fns[4] = { \ | ||
95 | + NULL, gen_helper_gvec_ah_##name##_h, \ | ||
96 | + gen_helper_gvec_ah_##name##_s, gen_helper_gvec_ah_##name##_d \ | ||
97 | + }; \ | ||
98 | + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, \ | ||
99 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], a, 0) | ||
100 | |||
101 | DO_FP3(FADD_zzz, fadd) | ||
102 | DO_FP3(FSUB_zzz, fsub) | ||
103 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
104 | index XXXXXXX..XXXXXXX 100644 | ||
105 | --- a/target/arm/tcg/vec_helper.c | ||
106 | +++ b/target/arm/tcg/vec_helper.c | ||
107 | @@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16) | ||
108 | DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32) | ||
109 | DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64) | ||
110 | |||
111 | +DO_3OP(gvec_ah_recps_h, helper_recpsf_ah_f16, float16) | ||
112 | +DO_3OP(gvec_ah_recps_s, helper_recpsf_ah_f32, float32) | ||
113 | +DO_3OP(gvec_ah_recps_d, helper_recpsf_ah_f64, float64) | ||
114 | + | ||
115 | +DO_3OP(gvec_ah_rsqrts_h, helper_rsqrtsf_ah_f16, float16) | ||
116 | +DO_3OP(gvec_ah_rsqrts_s, helper_rsqrtsf_ah_f32, float32) | ||
117 | +DO_3OP(gvec_ah_rsqrts_d, helper_rsqrtsf_ah_f64, float64) | ||
118 | + | ||
119 | DO_3OP(gvec_ah_fmax_h, helper_vfp_ah_maxh, float16) | ||
120 | DO_3OP(gvec_ah_fmax_s, helper_vfp_ah_maxs, float32) | ||
121 | DO_3OP(gvec_ah_fmax_d, helper_vfp_ah_maxd, float64) | ||
38 | -- | 122 | -- |
39 | 2.34.1 | 123 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics in FMLS | ||
2 | (indexed). We do this by creating 6 new helpers, which allow us to | ||
3 | do the negation either by XOR (for AH=0) or by muladd flags | ||
4 | (for AH=1). | ||
1 | 5 | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | [PMM: Mostly from RTH's patch; error in index order into fns[][] | ||
8 | fixed] | ||
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | --- | ||
11 | target/arm/helper.h | 14 ++++++++++++++ | ||
12 | target/arm/tcg/translate-a64.c | 17 +++++++++++------ | ||
13 | target/arm/tcg/translate-sve.c | 31 +++++++++++++++++-------------- | ||
14 | target/arm/tcg/vec_helper.c | 24 +++++++++++++++--------- | ||
15 | 4 files changed, 57 insertions(+), 29 deletions(-) | ||
16 | |||
17 | diff --git a/target/arm/helper.h b/target/arm/helper.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/arm/helper.h | ||
20 | +++ b/target/arm/helper.h | ||
21 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG, | ||
22 | DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG, | ||
23 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
24 | |||
25 | +DEF_HELPER_FLAGS_6(gvec_fmls_idx_h, TCG_CALL_NO_RWG, | ||
26 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
27 | +DEF_HELPER_FLAGS_6(gvec_fmls_idx_s, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_6(gvec_fmls_idx_d, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
31 | + | ||
32 | +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_h, TCG_CALL_NO_RWG, | ||
33 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
34 | +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_s, TCG_CALL_NO_RWG, | ||
35 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
36 | +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_d, TCG_CALL_NO_RWG, | ||
37 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
38 | + | ||
39 | DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG, | ||
40 | void, ptr, ptr, ptr, ptr, i32) | ||
41 | DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG, | ||
42 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/arm/tcg/translate-a64.c | ||
45 | +++ b/target/arm/tcg/translate-a64.c | ||
46 | @@ -XXX,XX +XXX,XX @@ TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) | ||
47 | |||
48 | static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) | ||
49 | { | ||
50 | - static gen_helper_gvec_4_ptr * const fns[3] = { | ||
51 | - gen_helper_gvec_fmla_idx_h, | ||
52 | - gen_helper_gvec_fmla_idx_s, | ||
53 | - gen_helper_gvec_fmla_idx_d, | ||
54 | + static gen_helper_gvec_4_ptr * const fns[3][3] = { | ||
55 | + { gen_helper_gvec_fmla_idx_h, | ||
56 | + gen_helper_gvec_fmla_idx_s, | ||
57 | + gen_helper_gvec_fmla_idx_d }, | ||
58 | + { gen_helper_gvec_fmls_idx_h, | ||
59 | + gen_helper_gvec_fmls_idx_s, | ||
60 | + gen_helper_gvec_fmls_idx_d }, | ||
61 | + { gen_helper_gvec_ah_fmls_idx_h, | ||
62 | + gen_helper_gvec_ah_fmls_idx_s, | ||
63 | + gen_helper_gvec_ah_fmls_idx_d }, | ||
64 | }; | ||
65 | MemOp esz = a->esz; | ||
66 | int check = fp_access_check_vector_hsd(s, a->q, esz); | ||
67 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) | ||
68 | |||
69 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
70 | esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
71 | - (a->idx << 1) | neg, | ||
72 | - fns[esz - 1]); | ||
73 | + a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]); | ||
74 | return true; | ||
75 | } | ||
76 | |||
77 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
78 | index XXXXXXX..XXXXXXX 100644 | ||
79 | --- a/target/arm/tcg/translate-sve.c | ||
80 | +++ b/target/arm/tcg/translate-sve.c | ||
81 | @@ -XXX,XX +XXX,XX @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) | ||
82 | *** SVE Floating Point Multiply-Add Indexed Group | ||
83 | */ | ||
84 | |||
85 | -static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) | ||
86 | -{ | ||
87 | - static gen_helper_gvec_4_ptr * const fns[4] = { | ||
88 | - NULL, | ||
89 | - gen_helper_gvec_fmla_idx_h, | ||
90 | - gen_helper_gvec_fmla_idx_s, | ||
91 | - gen_helper_gvec_fmla_idx_d, | ||
92 | - }; | ||
93 | - return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, | ||
94 | - (a->index << 1) | sub, | ||
95 | - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
96 | -} | ||
97 | +static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = { | ||
98 | + NULL, gen_helper_gvec_fmla_idx_h, | ||
99 | + gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d | ||
100 | +}; | ||
101 | +TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, | ||
102 | + fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index, | ||
103 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
104 | |||
105 | -TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) | ||
106 | -TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true) | ||
107 | +static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = { | ||
108 | + { NULL, NULL }, | ||
109 | + { gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h }, | ||
110 | + { gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s }, | ||
111 | + { gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d }, | ||
112 | +}; | ||
113 | +TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz, | ||
114 | + fmls_idx_fns[a->esz][s->fpcr_ah], | ||
115 | + a->rd, a->rn, a->rm, a->ra, a->index, | ||
116 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
117 | |||
118 | /* | ||
119 | *** SVE Floating Point Multiply Indexed Group | ||
120 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
121 | index XXXXXXX..XXXXXXX 100644 | ||
122 | --- a/target/arm/tcg/vec_helper.c | ||
123 | +++ b/target/arm/tcg/vec_helper.c | ||
124 | @@ -XXX,XX +XXX,XX @@ DO_FMUL_IDX(gvec_fmls_nf_idx_s, float32_sub, float32_mul, float32, H4) | ||
125 | |||
126 | #undef DO_FMUL_IDX | ||
127 | |||
128 | -#define DO_FMLA_IDX(NAME, TYPE, H) \ | ||
129 | +#define DO_FMLA_IDX(NAME, TYPE, H, NEGX, NEGF) \ | ||
130 | void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \ | ||
131 | float_status *stat, uint32_t desc) \ | ||
132 | { \ | ||
133 | intptr_t i, j, oprsz = simd_oprsz(desc); \ | ||
134 | intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \ | ||
135 | - TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \ | ||
136 | - intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \ | ||
137 | + intptr_t idx = simd_data(desc); \ | ||
138 | TYPE *d = vd, *n = vn, *m = vm, *a = va; \ | ||
139 | - op1_neg <<= (8 * sizeof(TYPE) - 1); \ | ||
140 | for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ | ||
141 | TYPE mm = m[H(i + idx)]; \ | ||
142 | for (j = 0; j < segment; j++) { \ | ||
143 | - d[i + j] = TYPE##_muladd(n[i + j] ^ op1_neg, \ | ||
144 | - mm, a[i + j], 0, stat); \ | ||
145 | + d[i + j] = TYPE##_muladd(n[i + j] ^ NEGX, mm, \ | ||
146 | + a[i + j], NEGF, stat); \ | ||
147 | } \ | ||
148 | } \ | ||
149 | clear_tail(d, oprsz, simd_maxsz(desc)); \ | ||
150 | } | ||
151 | |||
152 | -DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2) | ||
153 | -DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4) | ||
154 | -DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8) | ||
155 | +DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2, 0, 0) | ||
156 | +DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4, 0, 0) | ||
157 | +DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8, 0, 0) | ||
158 | + | ||
159 | +DO_FMLA_IDX(gvec_fmls_idx_h, float16, H2, INT16_MIN, 0) | ||
160 | +DO_FMLA_IDX(gvec_fmls_idx_s, float32, H4, INT32_MIN, 0) | ||
161 | +DO_FMLA_IDX(gvec_fmls_idx_d, float64, H8, INT64_MIN, 0) | ||
162 | + | ||
163 | +DO_FMLA_IDX(gvec_ah_fmls_idx_h, float16, H2, 0, float_muladd_negate_product) | ||
164 | +DO_FMLA_IDX(gvec_ah_fmls_idx_s, float32, H4, 0, float_muladd_negate_product) | ||
165 | +DO_FMLA_IDX(gvec_ah_fmls_idx_d, float64, H8, 0, float_muladd_negate_product) | ||
166 | |||
167 | #undef DO_FMLA_IDX | ||
168 | |||
169 | -- | ||
170 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Chris Laplante <chris@laplante.io> | 1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics |
---|---|---|---|
2 | in FMLS (vector), by implementing a new set of helpers for | ||
3 | the AH=1 case. | ||
2 | 4 | ||
3 | Signed-off-by: Chris Laplante <chris@laplante.io> | 5 | The float_muladd_negate_product flag produces the same result |
4 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 6 | as negating either of the multiplication operands, assuming |
5 | Message-id: 20230728160324.1159090-3-chris@laplante.io | 7 | neither of the operands are NaNs. But since FEAT_AFP does not |
8 | negate NaNs, this behaviour is exactly what we need. | ||
9 | |||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | 12 | --- |
8 | softmmu/qtest.c | 16 ++++++++++------ | 13 | target/arm/helper.h | 4 ++++ |
9 | 1 file changed, 10 insertions(+), 6 deletions(-) | 14 | target/arm/tcg/translate-a64.c | 7 ++++++- |
15 | target/arm/tcg/vec_helper.c | 22 ++++++++++++++++++++++ | ||
16 | 3 files changed, 32 insertions(+), 1 deletion(-) | ||
10 | 17 | ||
11 | diff --git a/softmmu/qtest.c b/softmmu/qtest.c | 18 | diff --git a/target/arm/helper.h b/target/arm/helper.h |
12 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/softmmu/qtest.c | 20 | --- a/target/arm/helper.h |
14 | +++ b/softmmu/qtest.c | 21 | +++ b/target/arm/helper.h |
15 | @@ -XXX,XX +XXX,XX @@ void qtest_set_command_cb(bool (*pc_cb)(CharBackend *chr, gchar **words)) | 22 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) |
16 | process_command_cb = pc_cb; | 23 | DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) |
24 | DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
25 | |||
26 | +DEF_HELPER_FLAGS_5(gvec_ah_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
27 | +DEF_HELPER_FLAGS_5(gvec_ah_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
28 | +DEF_HELPER_FLAGS_5(gvec_ah_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
29 | + | ||
30 | DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG, | ||
31 | void, ptr, ptr, ptr, fpst, i32) | ||
32 | DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG, | ||
33 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/target/arm/tcg/translate-a64.c | ||
36 | +++ b/target/arm/tcg/translate-a64.c | ||
37 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { | ||
38 | gen_helper_gvec_vfms_s, | ||
39 | gen_helper_gvec_vfms_d, | ||
40 | }; | ||
41 | -TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls) | ||
42 | +static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = { | ||
43 | + gen_helper_gvec_ah_vfms_h, | ||
44 | + gen_helper_gvec_ah_vfms_s, | ||
45 | + gen_helper_gvec_ah_vfms_d, | ||
46 | +}; | ||
47 | +TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah) | ||
48 | |||
49 | static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { | ||
50 | gen_helper_gvec_fceq_h, | ||
51 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/target/arm/tcg/vec_helper.c | ||
54 | +++ b/target/arm/tcg/vec_helper.c | ||
55 | @@ -XXX,XX +XXX,XX @@ static float64 float64_mulsub_f(float64 dest, float64 op1, float64 op2, | ||
56 | return float64_muladd(float64_chs(op1), op2, dest, 0, stat); | ||
17 | } | 57 | } |
18 | 58 | ||
19 | +static void qtest_install_gpio_out_intercept(DeviceState *dev, const char *name, int n) | 59 | +static float16 float16_ah_mulsub_f(float16 dest, float16 op1, float16 op2, |
60 | + float_status *stat) | ||
20 | +{ | 61 | +{ |
21 | + qemu_irq *disconnected = g_new0(qemu_irq, 1); | 62 | + return float16_muladd(op1, op2, dest, float_muladd_negate_product, stat); |
22 | + qemu_irq icpt = qemu_allocate_irq(qtest_irq_handler, | ||
23 | + disconnected, n); | ||
24 | + | ||
25 | + *disconnected = qdev_intercept_gpio_out(dev, icpt, name, n); | ||
26 | +} | 63 | +} |
27 | + | 64 | + |
28 | static void qtest_process_command(CharBackend *chr, gchar **words) | 65 | +static float32 float32_ah_mulsub_f(float32 dest, float32 op1, float32 op2, |
29 | { | 66 | + float_status *stat) |
30 | const gchar *command; | 67 | +{ |
31 | @@ -XXX,XX +XXX,XX @@ static void qtest_process_command(CharBackend *chr, gchar **words) | 68 | + return float32_muladd(op1, op2, dest, float_muladd_negate_product, stat); |
32 | if (words[0][14] == 'o') { | 69 | +} |
33 | int i; | 70 | + |
34 | for (i = 0; i < ngl->num_out; ++i) { | 71 | +static float64 float64_ah_mulsub_f(float64 dest, float64 op1, float64 op2, |
35 | - qemu_irq *disconnected = g_new0(qemu_irq, 1); | 72 | + float_status *stat) |
36 | - qemu_irq icpt = qemu_allocate_irq(qtest_irq_handler, | 73 | +{ |
37 | - disconnected, i); | 74 | + return float64_muladd(op1, op2, dest, float_muladd_negate_product, stat); |
38 | - | 75 | +} |
39 | - *disconnected = qdev_intercept_gpio_out(dev, icpt, | 76 | + |
40 | - ngl->name, i); | 77 | #define DO_MULADD(NAME, FUNC, TYPE) \ |
41 | + qtest_install_gpio_out_intercept(dev, ngl->name, i); | 78 | void HELPER(NAME)(void *vd, void *vn, void *vm, \ |
42 | } | 79 | float_status *stat, uint32_t desc) \ |
43 | } else { | 80 | @@ -XXX,XX +XXX,XX @@ DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16) |
44 | qemu_irq_intercept_in(ngl->in, qtest_irq_handler, | 81 | DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32) |
82 | DO_MULADD(gvec_vfms_d, float64_mulsub_f, float64) | ||
83 | |||
84 | +DO_MULADD(gvec_ah_vfms_h, float16_ah_mulsub_f, float16) | ||
85 | +DO_MULADD(gvec_ah_vfms_s, float32_ah_mulsub_f, float32) | ||
86 | +DO_MULADD(gvec_ah_vfms_d, float64_ah_mulsub_f, float64) | ||
87 | + | ||
88 | /* For the indexed ops, SVE applies the index per 128-bit vector segment. | ||
89 | * For AdvSIMD, there is of course only one such vector segment. | ||
90 | */ | ||
45 | -- | 91 | -- |
46 | 2.34.1 | 92 | 2.34.1 | diff view generated by jsdifflib |
1 | For an Unsupported Atomic Update fault where the stage 1 translation | 1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics fro the |
---|---|---|---|
2 | table descriptor update can't be done because it's to an unsupported | 2 | SVE FMLS (vector) insns, by providing new helpers for the AH=1 case |
3 | memory type, this is a stage 1 abort (per the Arm ARM R_VSXXT). This | 3 | which end up passing fpcr_ah = true to the do_fmla_zpzzz_* functions |
4 | means we should not set fi->s1ptw, because this will cause the code | 4 | that do the work. |
5 | in the get_phys_addr_lpae() error-exit path to mark it as stage 2. | 5 | |
6 | The float*_muladd functions have a flags argument that can | ||
7 | perform optional negation of various operand. We don't use | ||
8 | that for "normal" arm fmla, because the muladd flags are not | ||
9 | applied when an input is a NaN. But since FEAT_AFP does not | ||
10 | negate NaNs, this behaviour is exactly what we need. | ||
11 | |||
12 | The non-AH helpers pass in a zero flags argument and control the | ||
13 | negation via the neg1 and neg3 arguments; the AH helpers always pass | ||
14 | in neg1 and neg3 as zero and control the negation via the flags | ||
15 | argument. This allows us to avoid conditional branches within the | ||
16 | inner loop. | ||
6 | 17 | ||
7 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 18 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 19 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
9 | Message-id: 20230807141514.19075-2-peter.maydell@linaro.org | ||
10 | --- | 20 | --- |
11 | target/arm/ptw.c | 1 - | 21 | target/arm/tcg/helper-sve.h | 21 ++++++++ |
12 | 1 file changed, 1 deletion(-) | 22 | target/arm/tcg/sve_helper.c | 99 +++++++++++++++++++++++++++------- |
23 | target/arm/tcg/translate-sve.c | 18 ++++--- | ||
24 | 3 files changed, 114 insertions(+), 24 deletions(-) | ||
13 | 25 | ||
14 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | 26 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h |
15 | index XXXXXXX..XXXXXXX 100644 | 27 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/arm/ptw.c | 28 | --- a/target/arm/tcg/helper-sve.h |
17 | +++ b/target/arm/ptw.c | 29 | +++ b/target/arm/tcg/helper-sve.h |
18 | @@ -XXX,XX +XXX,XX @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val, | 30 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, |
19 | 31 | DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, | |
20 | if (unlikely(!host)) { | 32 | void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
21 | fi->type = ARMFault_UnsuppAtomicUpdate; | 33 | |
22 | - fi->s1ptw = true; | 34 | +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG, |
23 | return 0; | 35 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
24 | } | 36 | +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG, |
37 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
38 | +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG, | ||
39 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
40 | + | ||
41 | +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG, | ||
42 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
43 | +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG, | ||
44 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
45 | +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG, | ||
46 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
47 | + | ||
48 | +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG, | ||
49 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
50 | +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG, | ||
51 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
52 | +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_d, TCG_CALL_NO_RWG, | ||
53 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
54 | + | ||
55 | DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG, | ||
56 | void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
57 | DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG, | ||
58 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
59 | index XXXXXXX..XXXXXXX 100644 | ||
60 | --- a/target/arm/tcg/sve_helper.c | ||
61 | +++ b/target/arm/tcg/sve_helper.c | ||
62 | @@ -XXX,XX +XXX,XX @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int) | ||
63 | |||
64 | static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, | ||
65 | float_status *status, uint32_t desc, | ||
66 | - uint16_t neg1, uint16_t neg3) | ||
67 | + uint16_t neg1, uint16_t neg3, int flags) | ||
68 | { | ||
69 | intptr_t i = simd_oprsz(desc); | ||
70 | uint64_t *g = vg; | ||
71 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, | ||
72 | e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1; | ||
73 | e2 = *(uint16_t *)(vm + H1_2(i)); | ||
74 | e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3; | ||
75 | - r = float16_muladd(e1, e2, e3, 0, status); | ||
76 | + r = float16_muladd(e1, e2, e3, flags, status); | ||
77 | *(uint16_t *)(vd + H1_2(i)) = r; | ||
78 | } | ||
79 | } while (i & 63); | ||
80 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, | ||
81 | void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
82 | void *vg, float_status *status, uint32_t desc) | ||
83 | { | ||
84 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0); | ||
85 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, 0); | ||
86 | } | ||
87 | |||
88 | void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
89 | void *vg, float_status *status, uint32_t desc) | ||
90 | { | ||
91 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0); | ||
92 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0); | ||
93 | } | ||
94 | |||
95 | void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
96 | void *vg, float_status *status, uint32_t desc) | ||
97 | { | ||
98 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000); | ||
99 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0); | ||
100 | } | ||
101 | |||
102 | void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
103 | void *vg, float_status *status, uint32_t desc) | ||
104 | { | ||
105 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000); | ||
106 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0); | ||
107 | +} | ||
108 | + | ||
109 | +void HELPER(sve_ah_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
110 | + void *vg, float_status *status, uint32_t desc) | ||
111 | +{ | ||
112 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
113 | + float_muladd_negate_product); | ||
114 | +} | ||
115 | + | ||
116 | +void HELPER(sve_ah_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
117 | + void *vg, float_status *status, uint32_t desc) | ||
118 | +{ | ||
119 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
120 | + float_muladd_negate_product | float_muladd_negate_c); | ||
121 | +} | ||
122 | + | ||
123 | +void HELPER(sve_ah_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
124 | + void *vg, float_status *status, uint32_t desc) | ||
125 | +{ | ||
126 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
127 | + float_muladd_negate_c); | ||
128 | } | ||
129 | |||
130 | static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, | ||
131 | float_status *status, uint32_t desc, | ||
132 | - uint32_t neg1, uint32_t neg3) | ||
133 | + uint32_t neg1, uint32_t neg3, int flags) | ||
134 | { | ||
135 | intptr_t i = simd_oprsz(desc); | ||
136 | uint64_t *g = vg; | ||
137 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, | ||
138 | e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1; | ||
139 | e2 = *(uint32_t *)(vm + H1_4(i)); | ||
140 | e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3; | ||
141 | - r = float32_muladd(e1, e2, e3, 0, status); | ||
142 | + r = float32_muladd(e1, e2, e3, flags, status); | ||
143 | *(uint32_t *)(vd + H1_4(i)) = r; | ||
144 | } | ||
145 | } while (i & 63); | ||
146 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, | ||
147 | void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
148 | void *vg, float_status *status, uint32_t desc) | ||
149 | { | ||
150 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0); | ||
151 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, 0); | ||
152 | } | ||
153 | |||
154 | void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
155 | void *vg, float_status *status, uint32_t desc) | ||
156 | { | ||
157 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0); | ||
158 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, 0); | ||
159 | } | ||
160 | |||
161 | void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
162 | void *vg, float_status *status, uint32_t desc) | ||
163 | { | ||
164 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000); | ||
165 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, 0); | ||
166 | } | ||
167 | |||
168 | void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
169 | void *vg, float_status *status, uint32_t desc) | ||
170 | { | ||
171 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000); | ||
172 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, 0); | ||
173 | +} | ||
174 | + | ||
175 | +void HELPER(sve_ah_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
176 | + void *vg, float_status *status, uint32_t desc) | ||
177 | +{ | ||
178 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
179 | + float_muladd_negate_product); | ||
180 | +} | ||
181 | + | ||
182 | +void HELPER(sve_ah_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
183 | + void *vg, float_status *status, uint32_t desc) | ||
184 | +{ | ||
185 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
186 | + float_muladd_negate_product | float_muladd_negate_c); | ||
187 | +} | ||
188 | + | ||
189 | +void HELPER(sve_ah_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
190 | + void *vg, float_status *status, uint32_t desc) | ||
191 | +{ | ||
192 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
193 | + float_muladd_negate_c); | ||
194 | } | ||
195 | |||
196 | static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, | ||
197 | float_status *status, uint32_t desc, | ||
198 | - uint64_t neg1, uint64_t neg3) | ||
199 | + uint64_t neg1, uint64_t neg3, int flags) | ||
200 | { | ||
201 | intptr_t i = simd_oprsz(desc); | ||
202 | uint64_t *g = vg; | ||
203 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, | ||
204 | e1 = *(uint64_t *)(vn + i) ^ neg1; | ||
205 | e2 = *(uint64_t *)(vm + i); | ||
206 | e3 = *(uint64_t *)(va + i) ^ neg3; | ||
207 | - r = float64_muladd(e1, e2, e3, 0, status); | ||
208 | + r = float64_muladd(e1, e2, e3, flags, status); | ||
209 | *(uint64_t *)(vd + i) = r; | ||
210 | } | ||
211 | } while (i & 63); | ||
212 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, | ||
213 | void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
214 | void *vg, float_status *status, uint32_t desc) | ||
215 | { | ||
216 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0); | ||
217 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, 0); | ||
218 | } | ||
219 | |||
220 | void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
221 | void *vg, float_status *status, uint32_t desc) | ||
222 | { | ||
223 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0); | ||
224 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, 0); | ||
225 | } | ||
226 | |||
227 | void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
228 | void *vg, float_status *status, uint32_t desc) | ||
229 | { | ||
230 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN); | ||
231 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, 0); | ||
232 | } | ||
233 | |||
234 | void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
235 | void *vg, float_status *status, uint32_t desc) | ||
236 | { | ||
237 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN); | ||
238 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, 0); | ||
239 | +} | ||
240 | + | ||
241 | +void HELPER(sve_ah_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
242 | + void *vg, float_status *status, uint32_t desc) | ||
243 | +{ | ||
244 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
245 | + float_muladd_negate_product); | ||
246 | +} | ||
247 | + | ||
248 | +void HELPER(sve_ah_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
249 | + void *vg, float_status *status, uint32_t desc) | ||
250 | +{ | ||
251 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
252 | + float_muladd_negate_product | float_muladd_negate_c); | ||
253 | +} | ||
254 | + | ||
255 | +void HELPER(sve_ah_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
256 | + void *vg, float_status *status, uint32_t desc) | ||
257 | +{ | ||
258 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
259 | + float_muladd_negate_c); | ||
260 | } | ||
261 | |||
262 | /* Two operand floating-point comparison controlled by a predicate. | ||
263 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
264 | index XXXXXXX..XXXXXXX 100644 | ||
265 | --- a/target/arm/tcg/translate-sve.c | ||
266 | +++ b/target/arm/tcg/translate-sve.c | ||
267 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], | ||
268 | a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), | ||
269 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
270 | |||
271 | -#define DO_FMLA(NAME, name) \ | ||
272 | +#define DO_FMLA(NAME, name, ah_name) \ | ||
273 | static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ | ||
274 | NULL, gen_helper_sve_##name##_h, \ | ||
275 | gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ | ||
276 | }; \ | ||
277 | - TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \ | ||
278 | + static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \ | ||
279 | + NULL, gen_helper_sve_##ah_name##_h, \ | ||
280 | + gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \ | ||
281 | + }; \ | ||
282 | + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \ | ||
283 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \ | ||
284 | a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ | ||
285 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
286 | |||
287 | -DO_FMLA(FMLA_zpzzz, fmla_zpzzz) | ||
288 | -DO_FMLA(FMLS_zpzzz, fmls_zpzzz) | ||
289 | -DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz) | ||
290 | -DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) | ||
291 | +/* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */ | ||
292 | +DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz) | ||
293 | +DO_FMLA(FMLS_zpzzz, fmls_zpzzz, ah_fmls_zpzzz) | ||
294 | +DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz, ah_fnmla_zpzzz) | ||
295 | +DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz, ah_fnmls_zpzzz) | ||
296 | |||
297 | #undef DO_FMLA | ||
25 | 298 | ||
26 | -- | 299 | -- |
27 | 2.34.1 | 300 | 2.34.1 | diff view generated by jsdifflib |
1 | We only use S1Translate::out_secure in two places, where we are | 1 | The negation step in the SVE FTSSEL insn mustn't negate a NaN when |
---|---|---|---|
2 | setting up MemTxAttrs for a page table load. We can use | 2 | FPCR.AH is set. Pass FPCR.AH to the helper via the SIMD data field |
3 | arm_space_is_secure(ptw->out_space) instead, which guarantees | 3 | and use that to determine whether to do the negation. |
4 | that we're setting the MemTxAttrs secure and space fields | ||
5 | consistently, and allows us to drop the out_secure field in | ||
6 | S1Translate entirely. | ||
7 | 4 | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
10 | Message-id: 20230807141514.19075-12-peter.maydell@linaro.org | ||
11 | --- | 7 | --- |
12 | target/arm/ptw.c | 7 ++----- | 8 | target/arm/tcg/sve_helper.c | 18 +++++++++++++++--- |
13 | 1 file changed, 2 insertions(+), 5 deletions(-) | 9 | target/arm/tcg/translate-sve.c | 4 ++-- |
10 | 2 files changed, 17 insertions(+), 5 deletions(-) | ||
14 | 11 | ||
15 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | 12 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c |
16 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/arm/ptw.c | 14 | --- a/target/arm/tcg/sve_helper.c |
18 | +++ b/target/arm/ptw.c | 15 | +++ b/target/arm/tcg/sve_helper.c |
19 | @@ -XXX,XX +XXX,XX @@ typedef struct S1Translate { | 16 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fexpa_d)(void *vd, void *vn, uint32_t desc) |
20 | * Stage 2 is indicated by in_mmu_idx set to ARMMMUIdx_Stage2{,_S}. | 17 | void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc) |
21 | */ | 18 | { |
22 | bool in_s1_is_el0; | 19 | intptr_t i, opr_sz = simd_oprsz(desc) / 2; |
23 | - bool out_secure; | 20 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1); |
24 | bool out_rw; | 21 | uint16_t *d = vd, *n = vn, *m = vm; |
25 | bool out_be; | 22 | for (i = 0; i < opr_sz; i += 1) { |
26 | ARMSecuritySpace out_space; | 23 | uint16_t nn = n[i]; |
27 | @@ -XXX,XX +XXX,XX @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, | 24 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc) |
28 | pte_attrs = s2.cacheattrs.attrs; | 25 | if (mm & 1) { |
29 | ptw->out_host = NULL; | 26 | nn = float16_one; |
30 | ptw->out_rw = false; | 27 | } |
31 | - ptw->out_secure = s2.f.attrs.secure; | 28 | - d[i] = nn ^ (mm & 2) << 14; |
32 | ptw->out_space = s2.f.attrs.space; | 29 | + if (mm & 2) { |
33 | } else { | 30 | + nn = float16_maybe_ah_chs(nn, fpcr_ah); |
34 | #ifdef CONFIG_TCG | 31 | + } |
35 | @@ -XXX,XX +XXX,XX @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, | 32 | + d[i] = nn; |
36 | ptw->out_phys = full->phys_addr | (addr & ~TARGET_PAGE_MASK); | 33 | } |
37 | ptw->out_rw = full->prot & PAGE_WRITE; | 34 | } |
38 | pte_attrs = full->pte_attrs; | 35 | |
39 | - ptw->out_secure = full->attrs.secure; | 36 | void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc) |
40 | ptw->out_space = full->attrs.space; | 37 | { |
41 | #else | 38 | intptr_t i, opr_sz = simd_oprsz(desc) / 4; |
42 | g_assert_not_reached(); | 39 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1); |
43 | @@ -XXX,XX +XXX,XX @@ static uint32_t arm_ldl_ptw(CPUARMState *env, S1Translate *ptw, | 40 | uint32_t *d = vd, *n = vn, *m = vm; |
44 | } else { | 41 | for (i = 0; i < opr_sz; i += 1) { |
45 | /* Page tables are in MMIO. */ | 42 | uint32_t nn = n[i]; |
46 | MemTxAttrs attrs = { | 43 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc) |
47 | - .secure = ptw->out_secure, | 44 | if (mm & 1) { |
48 | .space = ptw->out_space, | 45 | nn = float32_one; |
49 | + .secure = arm_space_is_secure(ptw->out_space), | 46 | } |
50 | }; | 47 | - d[i] = nn ^ (mm & 2) << 30; |
51 | AddressSpace *as = arm_addressspace(cs, attrs); | 48 | + if (mm & 2) { |
52 | MemTxResult result = MEMTX_OK; | 49 | + nn = float32_maybe_ah_chs(nn, fpcr_ah); |
53 | @@ -XXX,XX +XXX,XX @@ static uint64_t arm_ldq_ptw(CPUARMState *env, S1Translate *ptw, | 50 | + } |
54 | } else { | 51 | + d[i] = nn; |
55 | /* Page tables are in MMIO. */ | 52 | } |
56 | MemTxAttrs attrs = { | 53 | } |
57 | - .secure = ptw->out_secure, | 54 | |
58 | .space = ptw->out_space, | 55 | void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc) |
59 | + .secure = arm_space_is_secure(ptw->out_space), | 56 | { |
60 | }; | 57 | intptr_t i, opr_sz = simd_oprsz(desc) / 8; |
61 | AddressSpace *as = arm_addressspace(cs, attrs); | 58 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1); |
62 | MemTxResult result = MEMTX_OK; | 59 | uint64_t *d = vd, *n = vn, *m = vm; |
60 | for (i = 0; i < opr_sz; i += 1) { | ||
61 | uint64_t nn = n[i]; | ||
62 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc) | ||
63 | if (mm & 1) { | ||
64 | nn = float64_one; | ||
65 | } | ||
66 | - d[i] = nn ^ (mm & 2) << 62; | ||
67 | + if (mm & 2) { | ||
68 | + nn = float64_maybe_ah_chs(nn, fpcr_ah); | ||
69 | + } | ||
70 | + d[i] = nn; | ||
71 | } | ||
72 | } | ||
73 | |||
74 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/target/arm/tcg/translate-sve.c | ||
77 | +++ b/target/arm/tcg/translate-sve.c | ||
78 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2 * const fexpa_fns[4] = { | ||
79 | gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, | ||
80 | }; | ||
81 | TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, | ||
82 | - fexpa_fns[a->esz], a->rd, a->rn, 0) | ||
83 | + fexpa_fns[a->esz], a->rd, a->rn, s->fpcr_ah) | ||
84 | |||
85 | static gen_helper_gvec_3 * const ftssel_fns[4] = { | ||
86 | NULL, gen_helper_sve_ftssel_h, | ||
87 | gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, | ||
88 | }; | ||
89 | TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, | ||
90 | - ftssel_fns[a->esz], a, 0) | ||
91 | + ftssel_fns[a->esz], a, s->fpcr_ah) | ||
92 | |||
93 | /* | ||
94 | *** SVE Predicate Logical Operations Group | ||
63 | -- | 95 | -- |
64 | 2.34.1 | 96 | 2.34.1 | diff view generated by jsdifflib |
1 | The s1ns bit in ARMMMUFaultInfo is documented as "true if | 1 | The negation step in the SVE FTMAD insn mustn't negate a NaN when |
---|---|---|---|
2 | we faulted on a non-secure IPA while in secure state". Both the | 2 | FPCR.AH is set. Pass FPCR.AH to the helper via the SIMD data field, |
3 | places which look at this bit only do so after having confirmed | 3 | so we can select the correct behaviour. |
4 | that this is a stage 2 fault and we're dealing with Secure EL2, | ||
5 | which leaves the ptw.c code free to set the bit to any random | ||
6 | value in the other cases. | ||
7 | 4 | ||
8 | Instead of taking advantage of that freedom, consistently | 5 | Because the operand is known to be negative, negating the operand |
9 | make the bit be set to false for the "not a stage 2 fault | 6 | is the same as taking the absolute value. Defer this to the muladd |
10 | for Secure EL2" cases. This removes some cases where we | 7 | operation via flags, so that it happens after NaN detection, which |
11 | were using an 'is_secure' boolean and leaving the reader | 8 | is correct for FPCR.AH. |
12 | guessing about whether that was the right thing for Realm | ||
13 | and Root cases. | ||
14 | 9 | ||
15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
16 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
17 | Message-id: 20230807141514.19075-4-peter.maydell@linaro.org | ||
18 | --- | 12 | --- |
19 | target/arm/ptw.c | 19 +++++++++++++++---- | 13 | target/arm/tcg/sve_helper.c | 42 ++++++++++++++++++++++++++-------- |
20 | 1 file changed, 15 insertions(+), 4 deletions(-) | 14 | target/arm/tcg/translate-sve.c | 3 ++- |
15 | 2 files changed, 35 insertions(+), 10 deletions(-) | ||
21 | 16 | ||
22 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | 17 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c |
23 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
24 | --- a/target/arm/ptw.c | 19 | --- a/target/arm/tcg/sve_helper.c |
25 | +++ b/target/arm/ptw.c | 20 | +++ b/target/arm/tcg/sve_helper.c |
26 | @@ -XXX,XX +XXX,XX @@ static ARMSecuritySpace S2_security_space(ARMSecuritySpace s1_space, | 21 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm, |
22 | 0x3c00, 0xb800, 0x293a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | ||
23 | }; | ||
24 | intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float16); | ||
25 | - intptr_t x = simd_data(desc); | ||
26 | + intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); | ||
27 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); | ||
28 | float16 *d = vd, *n = vn, *m = vm; | ||
29 | + | ||
30 | for (i = 0; i < opr_sz; i++) { | ||
31 | float16 mm = m[i]; | ||
32 | intptr_t xx = x; | ||
33 | + int flags = 0; | ||
34 | + | ||
35 | if (float16_is_neg(mm)) { | ||
36 | - mm = float16_abs(mm); | ||
37 | + if (fpcr_ah) { | ||
38 | + flags = float_muladd_negate_product; | ||
39 | + } else { | ||
40 | + mm = float16_abs(mm); | ||
41 | + } | ||
42 | xx += 8; | ||
43 | } | ||
44 | - d[i] = float16_muladd(n[i], mm, coeff[xx], 0, s); | ||
45 | + d[i] = float16_muladd(n[i], mm, coeff[xx], flags, s); | ||
27 | } | 46 | } |
28 | } | 47 | } |
29 | 48 | ||
30 | +static bool fault_s1ns(ARMSecuritySpace space, ARMMMUIdx s2_mmu_idx) | 49 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, |
31 | +{ | 50 | 0x37cd37cc, 0x00000000, 0x00000000, 0x00000000, |
32 | + /* | 51 | }; |
33 | + * For stage 2 faults in Secure EL22, S1NS indicates | 52 | intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float32); |
34 | + * whether the faulting IPA is in the Secure or NonSecure | 53 | - intptr_t x = simd_data(desc); |
35 | + * IPA space. For all other kinds of fault, it is false. | 54 | + intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); |
36 | + */ | 55 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); |
37 | + return space == ARMSS_Secure && regime_is_stage2(s2_mmu_idx) | 56 | float32 *d = vd, *n = vn, *m = vm; |
38 | + && s2_mmu_idx == ARMMMUIdx_Stage2_S; | ||
39 | +} | ||
40 | + | 57 | + |
41 | /* Translate a S1 pagetable walk through S2 if needed. */ | 58 | for (i = 0; i < opr_sz; i++) { |
42 | static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, | 59 | float32 mm = m[i]; |
43 | hwaddr addr, ARMMMUFaultInfo *fi) | 60 | intptr_t xx = x; |
44 | @@ -XXX,XX +XXX,XX @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, | 61 | + int flags = 0; |
45 | fi->s2addr = addr; | 62 | + |
46 | fi->stage2 = true; | 63 | if (float32_is_neg(mm)) { |
47 | fi->s1ptw = true; | 64 | - mm = float32_abs(mm); |
48 | - fi->s1ns = !is_secure; | 65 | + if (fpcr_ah) { |
49 | + fi->s1ns = fault_s1ns(ptw->in_space, s2_mmu_idx); | 66 | + flags = float_muladd_negate_product; |
50 | return false; | 67 | + } else { |
68 | + mm = float32_abs(mm); | ||
69 | + } | ||
70 | xx += 8; | ||
51 | } | 71 | } |
72 | - d[i] = float32_muladd(n[i], mm, coeff[xx], 0, s); | ||
73 | + d[i] = float32_muladd(n[i], mm, coeff[xx], flags, s); | ||
52 | } | 74 | } |
53 | @@ -XXX,XX +XXX,XX @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, | ||
54 | fi->s2addr = addr; | ||
55 | fi->stage2 = regime_is_stage2(s2_mmu_idx); | ||
56 | fi->s1ptw = fi->stage2; | ||
57 | - fi->s1ns = !is_secure; | ||
58 | + fi->s1ns = fault_s1ns(ptw->in_space, s2_mmu_idx); | ||
59 | return false; | ||
60 | } | 75 | } |
61 | 76 | ||
62 | @@ -XXX,XX +XXX,XX @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val, | 77 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, |
63 | fi->s2addr = ptw->out_virt; | 78 | 0x3e21ee96d2641b13ull, 0xbda8f76380fbb401ull, |
64 | fi->stage2 = true; | 79 | }; |
65 | fi->s1ptw = true; | 80 | intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float64); |
66 | - fi->s1ns = !ptw->in_secure; | 81 | - intptr_t x = simd_data(desc); |
67 | + fi->s1ns = fault_s1ns(ptw->in_space, ptw->in_ptw_idx); | 82 | + intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); |
68 | return 0; | 83 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); |
84 | float64 *d = vd, *n = vn, *m = vm; | ||
85 | + | ||
86 | for (i = 0; i < opr_sz; i++) { | ||
87 | float64 mm = m[i]; | ||
88 | intptr_t xx = x; | ||
89 | + int flags = 0; | ||
90 | + | ||
91 | if (float64_is_neg(mm)) { | ||
92 | - mm = float64_abs(mm); | ||
93 | + if (fpcr_ah) { | ||
94 | + flags = float_muladd_negate_product; | ||
95 | + } else { | ||
96 | + mm = float64_abs(mm); | ||
97 | + } | ||
98 | xx += 8; | ||
69 | } | 99 | } |
70 | 100 | - d[i] = float64_muladd(n[i], mm, coeff[xx], 0, s); | |
71 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, | 101 | + d[i] = float64_muladd(n[i], mm, coeff[xx], flags, s); |
72 | fi->level = level; | 102 | } |
73 | /* Tag the error as S2 for failed S1 PTW at S2 or ordinary S2. */ | ||
74 | fi->stage2 = fi->s1ptw || regime_is_stage2(mmu_idx); | ||
75 | - fi->s1ns = mmu_idx == ARMMMUIdx_Stage2; | ||
76 | + fi->s1ns = fault_s1ns(ptw->in_space, mmu_idx); | ||
77 | return true; | ||
78 | } | 103 | } |
79 | 104 | ||
105 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
106 | index XXXXXXX..XXXXXXX 100644 | ||
107 | --- a/target/arm/tcg/translate-sve.c | ||
108 | +++ b/target/arm/tcg/translate-sve.c | ||
109 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { | ||
110 | gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, | ||
111 | }; | ||
112 | TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, | ||
113 | - ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm, | ||
114 | + ftmad_fns[a->esz], a->rd, a->rn, a->rm, | ||
115 | + a->imm | (s->fpcr_ah << 3), | ||
116 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
117 | |||
118 | /* | ||
80 | -- | 119 | -- |
81 | 2.34.1 | 120 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Before this change, the default KVM type, which is used for non-virt | 3 | The negation step in FCMLA mustn't negate a NaN when FPCR.AH |
4 | machine models, was 0. | 4 | is set. Handle this by passing FPCR.AH to the helper via the |
5 | SIMD data field, and use this to select whether to do the | ||
6 | negation via XOR or via the muladd negate_product flag. | ||
5 | 7 | ||
6 | The kernel documentation says: | 8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | > On arm64, the physical address size for a VM (IPA Size limit) is | 9 | Message-id: 20250129013857.135256-26-richard.henderson@linaro.org |
8 | > limited to 40bits by default. The limit can be configured if the host | 10 | [PMM: Expanded commit message] |
9 | > supports the extension KVM_CAP_ARM_VM_IPA_SIZE. When supported, use | ||
10 | > KVM_VM_TYPE_ARM_IPA_SIZE(IPA_Bits) to set the size in the machine type | ||
11 | > identifier, where IPA_Bits is the maximum width of any physical | ||
12 | > address used by the VM. The IPA_Bits is encoded in bits[7-0] of the | ||
13 | > machine type identifier. | ||
14 | > | ||
15 | > e.g, to configure a guest to use 48bit physical address size:: | ||
16 | > | ||
17 | > vm_fd = ioctl(dev_fd, KVM_CREATE_VM, KVM_VM_TYPE_ARM_IPA_SIZE(48)); | ||
18 | > | ||
19 | > The requested size (IPA_Bits) must be: | ||
20 | > | ||
21 | > == ========================================================= | ||
22 | > 0 Implies default size, 40bits (for backward compatibility) | ||
23 | > N Implies N bits, where N is a positive integer such that, | ||
24 | > 32 <= N <= Host_IPA_Limit | ||
25 | > == ========================================================= | ||
26 | |||
27 | > Host_IPA_Limit is the maximum possible value for IPA_Bits on the host | ||
28 | > and is dependent on the CPU capability and the kernel configuration. | ||
29 | > The limit can be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the | ||
30 | > KVM_CHECK_EXTENSION ioctl() at run-time. | ||
31 | > | ||
32 | > Creation of the VM will fail if the requested IPA size (whether it is | ||
33 | > implicit or explicit) is unsupported on the host. | ||
34 | https://docs.kernel.org/virt/kvm/api.html#kvm-create-vm | ||
35 | |||
36 | So if Host_IPA_Limit < 40, specifying 0 as the type will fail. This | ||
37 | actually confused libvirt, which uses "none" machine model to probe the | ||
38 | KVM availability, on M2 MacBook Air. | ||
39 | |||
40 | Fix this by using Host_IPA_Limit as the default type when | ||
41 | KVM_CAP_ARM_VM_IPA_SIZE is available. | ||
42 | |||
43 | Cc: qemu-stable@nongnu.org | ||
44 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
45 | Message-id: 20230727073134.134102-3-akihiko.odaki@daynix.com | ||
46 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 11 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
47 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
48 | --- | 13 | --- |
49 | target/arm/kvm.c | 4 +++- | 14 | target/arm/tcg/translate-a64.c | 2 +- |
50 | 1 file changed, 3 insertions(+), 1 deletion(-) | 15 | target/arm/tcg/vec_helper.c | 66 ++++++++++++++++++++-------------- |
16 | 2 files changed, 40 insertions(+), 28 deletions(-) | ||
51 | 17 | ||
52 | diff --git a/target/arm/kvm.c b/target/arm/kvm.c | 18 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
53 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
54 | --- a/target/arm/kvm.c | 20 | --- a/target/arm/tcg/translate-a64.c |
55 | +++ b/target/arm/kvm.c | 21 | +++ b/target/arm/tcg/translate-a64.c |
56 | @@ -XXX,XX +XXX,XX @@ int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa) | 22 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) |
57 | 23 | ||
58 | int kvm_arch_get_default_type(MachineState *ms) | 24 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, |
59 | { | 25 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, |
60 | - return 0; | 26 | - a->rot, fn[a->esz]); |
61 | + bool fixed_ipa; | 27 | + a->rot | (s->fpcr_ah << 2), fn[a->esz]); |
62 | + int size = kvm_arm_get_max_vm_ipa_size(ms, &fixed_ipa); | 28 | return true; |
63 | + return fixed_ipa ? 0 : size; | ||
64 | } | 29 | } |
65 | 30 | ||
66 | int kvm_arch_init(MachineState *ms, KVMState *s) | 31 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/target/arm/tcg/vec_helper.c | ||
34 | +++ b/target/arm/tcg/vec_helper.c | ||
35 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlah)(void *vd, void *vn, void *vm, void *va, | ||
36 | uintptr_t opr_sz = simd_oprsz(desc); | ||
37 | float16 *d = vd, *n = vn, *m = vm, *a = va; | ||
38 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
39 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
40 | - uint32_t neg_real = flip ^ neg_imag; | ||
41 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
42 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
43 | + uint32_t negf_real = flip ^ negf_imag; | ||
44 | + float16 negx_imag, negx_real; | ||
45 | uintptr_t i; | ||
46 | |||
47 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
48 | - neg_real <<= 15; | ||
49 | - neg_imag <<= 15; | ||
50 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
51 | + negx_real = (negf_real & ~fpcr_ah) << 15; | ||
52 | + negx_imag = (negf_imag & ~fpcr_ah) << 15; | ||
53 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
54 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
55 | |||
56 | for (i = 0; i < opr_sz / 2; i += 2) { | ||
57 | float16 e2 = n[H2(i + flip)]; | ||
58 | - float16 e1 = m[H2(i + flip)] ^ neg_real; | ||
59 | + float16 e1 = m[H2(i + flip)] ^ negx_real; | ||
60 | float16 e4 = e2; | ||
61 | - float16 e3 = m[H2(i + 1 - flip)] ^ neg_imag; | ||
62 | + float16 e3 = m[H2(i + 1 - flip)] ^ negx_imag; | ||
63 | |||
64 | - d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], 0, fpst); | ||
65 | - d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], 0, fpst); | ||
66 | + d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], negf_real, fpst); | ||
67 | + d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], negf_imag, fpst); | ||
68 | } | ||
69 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
70 | } | ||
71 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlas)(void *vd, void *vn, void *vm, void *va, | ||
72 | uintptr_t opr_sz = simd_oprsz(desc); | ||
73 | float32 *d = vd, *n = vn, *m = vm, *a = va; | ||
74 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
75 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
76 | - uint32_t neg_real = flip ^ neg_imag; | ||
77 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
78 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
79 | + uint32_t negf_real = flip ^ negf_imag; | ||
80 | + float32 negx_imag, negx_real; | ||
81 | uintptr_t i; | ||
82 | |||
83 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
84 | - neg_real <<= 31; | ||
85 | - neg_imag <<= 31; | ||
86 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
87 | + negx_real = (negf_real & ~fpcr_ah) << 31; | ||
88 | + negx_imag = (negf_imag & ~fpcr_ah) << 31; | ||
89 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
90 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
91 | |||
92 | for (i = 0; i < opr_sz / 4; i += 2) { | ||
93 | float32 e2 = n[H4(i + flip)]; | ||
94 | - float32 e1 = m[H4(i + flip)] ^ neg_real; | ||
95 | + float32 e1 = m[H4(i + flip)] ^ negx_real; | ||
96 | float32 e4 = e2; | ||
97 | - float32 e3 = m[H4(i + 1 - flip)] ^ neg_imag; | ||
98 | + float32 e3 = m[H4(i + 1 - flip)] ^ negx_imag; | ||
99 | |||
100 | - d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], 0, fpst); | ||
101 | - d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], 0, fpst); | ||
102 | + d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], negf_real, fpst); | ||
103 | + d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], negf_imag, fpst); | ||
104 | } | ||
105 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
106 | } | ||
107 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm, void *va, | ||
108 | uintptr_t opr_sz = simd_oprsz(desc); | ||
109 | float64 *d = vd, *n = vn, *m = vm, *a = va; | ||
110 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
111 | - uint64_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
112 | - uint64_t neg_real = flip ^ neg_imag; | ||
113 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
114 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
115 | + uint32_t negf_real = flip ^ negf_imag; | ||
116 | + float64 negx_real, negx_imag; | ||
117 | uintptr_t i; | ||
118 | |||
119 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
120 | - neg_real <<= 63; | ||
121 | - neg_imag <<= 63; | ||
122 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
123 | + negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63; | ||
124 | + negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63; | ||
125 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
126 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
127 | |||
128 | for (i = 0; i < opr_sz / 8; i += 2) { | ||
129 | float64 e2 = n[i + flip]; | ||
130 | - float64 e1 = m[i + flip] ^ neg_real; | ||
131 | + float64 e1 = m[i + flip] ^ negx_real; | ||
132 | float64 e4 = e2; | ||
133 | - float64 e3 = m[i + 1 - flip] ^ neg_imag; | ||
134 | + float64 e3 = m[i + 1 - flip] ^ negx_imag; | ||
135 | |||
136 | - d[i] = float64_muladd(e2, e1, a[i], 0, fpst); | ||
137 | - d[i + 1] = float64_muladd(e4, e3, a[i + 1], 0, fpst); | ||
138 | + d[i] = float64_muladd(e2, e1, a[i], negf_real, fpst); | ||
139 | + d[i + 1] = float64_muladd(e4, e3, a[i + 1], negf_imag, fpst); | ||
140 | } | ||
141 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
142 | } | ||
67 | -- | 143 | -- |
68 | 2.34.1 | 144 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
1 | 2 | ||
3 | The negation step in FCMLA by index mustn't negate a NaN when | ||
4 | FPCR.AH is set. Use the same approach as vector FCMLA of | ||
5 | passing in FPCR.AH and using it to select whether to negate | ||
6 | by XOR or by the muladd negate_product flag. | ||
7 | |||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-id: 20250129013857.135256-27-richard.henderson@linaro.org | ||
10 | [PMM: Expanded commit message] | ||
11 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | --- | ||
14 | target/arm/tcg/translate-a64.c | 2 +- | ||
15 | target/arm/tcg/vec_helper.c | 44 ++++++++++++++++++++-------------- | ||
16 | 2 files changed, 27 insertions(+), 19 deletions(-) | ||
17 | |||
18 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/arm/tcg/translate-a64.c | ||
21 | +++ b/target/arm/tcg/translate-a64.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) | ||
23 | if (fp_access_check(s)) { | ||
24 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
25 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
26 | - (a->idx << 2) | a->rot, fn); | ||
27 | + (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn); | ||
28 | } | ||
29 | return true; | ||
30 | } | ||
31 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/target/arm/tcg/vec_helper.c | ||
34 | +++ b/target/arm/tcg/vec_helper.c | ||
35 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, void *va, | ||
36 | uintptr_t opr_sz = simd_oprsz(desc); | ||
37 | float16 *d = vd, *n = vn, *m = vm, *a = va; | ||
38 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
39 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
40 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
41 | intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2); | ||
42 | - uint32_t neg_real = flip ^ neg_imag; | ||
43 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1); | ||
44 | + uint32_t negf_real = flip ^ negf_imag; | ||
45 | intptr_t elements = opr_sz / sizeof(float16); | ||
46 | intptr_t eltspersegment = MIN(16 / sizeof(float16), elements); | ||
47 | + float16 negx_imag, negx_real; | ||
48 | intptr_t i, j; | ||
49 | |||
50 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
51 | - neg_real <<= 15; | ||
52 | - neg_imag <<= 15; | ||
53 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
54 | + negx_real = (negf_real & ~fpcr_ah) << 15; | ||
55 | + negx_imag = (negf_imag & ~fpcr_ah) << 15; | ||
56 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
57 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
58 | |||
59 | for (i = 0; i < elements; i += eltspersegment) { | ||
60 | float16 mr = m[H2(i + 2 * index + 0)]; | ||
61 | float16 mi = m[H2(i + 2 * index + 1)]; | ||
62 | - float16 e1 = neg_real ^ (flip ? mi : mr); | ||
63 | - float16 e3 = neg_imag ^ (flip ? mr : mi); | ||
64 | + float16 e1 = negx_real ^ (flip ? mi : mr); | ||
65 | + float16 e3 = negx_imag ^ (flip ? mr : mi); | ||
66 | |||
67 | for (j = i; j < i + eltspersegment; j += 2) { | ||
68 | float16 e2 = n[H2(j + flip)]; | ||
69 | float16 e4 = e2; | ||
70 | |||
71 | - d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], 0, fpst); | ||
72 | - d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], 0, fpst); | ||
73 | + d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], negf_real, fpst); | ||
74 | + d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], negf_imag, fpst); | ||
75 | } | ||
76 | } | ||
77 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
78 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, void *va, | ||
79 | uintptr_t opr_sz = simd_oprsz(desc); | ||
80 | float32 *d = vd, *n = vn, *m = vm, *a = va; | ||
81 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
82 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
83 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
84 | intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2); | ||
85 | - uint32_t neg_real = flip ^ neg_imag; | ||
86 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1); | ||
87 | + uint32_t negf_real = flip ^ negf_imag; | ||
88 | intptr_t elements = opr_sz / sizeof(float32); | ||
89 | intptr_t eltspersegment = MIN(16 / sizeof(float32), elements); | ||
90 | + float32 negx_imag, negx_real; | ||
91 | intptr_t i, j; | ||
92 | |||
93 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
94 | - neg_real <<= 31; | ||
95 | - neg_imag <<= 31; | ||
96 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
97 | + negx_real = (negf_real & ~fpcr_ah) << 31; | ||
98 | + negx_imag = (negf_imag & ~fpcr_ah) << 31; | ||
99 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
100 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
101 | |||
102 | for (i = 0; i < elements; i += eltspersegment) { | ||
103 | float32 mr = m[H4(i + 2 * index + 0)]; | ||
104 | float32 mi = m[H4(i + 2 * index + 1)]; | ||
105 | - float32 e1 = neg_real ^ (flip ? mi : mr); | ||
106 | - float32 e3 = neg_imag ^ (flip ? mr : mi); | ||
107 | + float32 e1 = negx_real ^ (flip ? mi : mr); | ||
108 | + float32 e3 = negx_imag ^ (flip ? mr : mi); | ||
109 | |||
110 | for (j = i; j < i + eltspersegment; j += 2) { | ||
111 | float32 e2 = n[H4(j + flip)]; | ||
112 | float32 e4 = e2; | ||
113 | |||
114 | - d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], 0, fpst); | ||
115 | - d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], 0, fpst); | ||
116 | + d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], negf_real, fpst); | ||
117 | + d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], negf_imag, fpst); | ||
118 | } | ||
119 | } | ||
120 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
121 | -- | ||
122 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | An error may occur after s->as is allocated, for example if the | 3 | The negation step in SVE FCMLA mustn't negate a NaN when FPCR.AH is |
4 | KVM_CREATE_VM ioctl call fails. | 4 | set. Use the same approach as we did for A64 FCMLA of passing in |
5 | FPCR.AH and using it to select whether to negate by XOR or by the | ||
6 | muladd negate_product flag. | ||
5 | 7 | ||
6 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | 8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | Message-id: 20230727073134.134102-6-akihiko.odaki@daynix.com | 9 | Message-id: 20250129013857.135256-28-richard.henderson@linaro.org |
8 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
9 | [PMM: tweaked commit message] | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
11 | --- | 12 | --- |
12 | accel/kvm/kvm-all.c | 1 + | 13 | target/arm/tcg/sve_helper.c | 69 +++++++++++++++++++++------------- |
13 | 1 file changed, 1 insertion(+) | 14 | target/arm/tcg/translate-sve.c | 2 +- |
15 | 2 files changed, 43 insertions(+), 28 deletions(-) | ||
14 | 16 | ||
15 | diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c | 17 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c |
16 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/accel/kvm/kvm-all.c | 19 | --- a/target/arm/tcg/sve_helper.c |
18 | +++ b/accel/kvm/kvm-all.c | 20 | +++ b/target/arm/tcg/sve_helper.c |
19 | @@ -XXX,XX +XXX,XX @@ err: | 21 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, |
20 | if (s->fd != -1) { | 22 | void *vg, float_status *status, uint32_t desc) |
21 | close(s->fd); | 23 | { |
22 | } | 24 | intptr_t j, i = simd_oprsz(desc); |
23 | + g_free(s->as); | 25 | - unsigned rot = simd_data(desc); |
24 | g_free(s->memory_listener.slots); | 26 | - bool flip = rot & 1; |
25 | 27 | - float16 neg_imag, neg_real; | |
26 | return ret; | 28 | + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); |
29 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
30 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
31 | + uint32_t negf_real = flip ^ negf_imag; | ||
32 | + float16 negx_imag, negx_real; | ||
33 | uint64_t *g = vg; | ||
34 | |||
35 | - neg_imag = float16_set_sign(0, (rot & 2) != 0); | ||
36 | - neg_real = float16_set_sign(0, rot == 1 || rot == 2); | ||
37 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
38 | + negx_real = (negf_real & ~fpcr_ah) << 15; | ||
39 | + negx_imag = (negf_imag & ~fpcr_ah) << 15; | ||
40 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
41 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
42 | |||
43 | do { | ||
44 | uint64_t pg = g[(i - 1) >> 6]; | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
46 | mi = *(float16 *)(vm + H1_2(j)); | ||
47 | |||
48 | e2 = (flip ? ni : nr); | ||
49 | - e1 = (flip ? mi : mr) ^ neg_real; | ||
50 | + e1 = (flip ? mi : mr) ^ negx_real; | ||
51 | e4 = e2; | ||
52 | - e3 = (flip ? mr : mi) ^ neg_imag; | ||
53 | + e3 = (flip ? mr : mi) ^ negx_imag; | ||
54 | |||
55 | if (likely((pg >> (i & 63)) & 1)) { | ||
56 | d = *(float16 *)(va + H1_2(i)); | ||
57 | - d = float16_muladd(e2, e1, d, 0, status); | ||
58 | + d = float16_muladd(e2, e1, d, negf_real, status); | ||
59 | *(float16 *)(vd + H1_2(i)) = d; | ||
60 | } | ||
61 | if (likely((pg >> (j & 63)) & 1)) { | ||
62 | d = *(float16 *)(va + H1_2(j)); | ||
63 | - d = float16_muladd(e4, e3, d, 0, status); | ||
64 | + d = float16_muladd(e4, e3, d, negf_imag, status); | ||
65 | *(float16 *)(vd + H1_2(j)) = d; | ||
66 | } | ||
67 | } while (i & 63); | ||
68 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
69 | void *vg, float_status *status, uint32_t desc) | ||
70 | { | ||
71 | intptr_t j, i = simd_oprsz(desc); | ||
72 | - unsigned rot = simd_data(desc); | ||
73 | - bool flip = rot & 1; | ||
74 | - float32 neg_imag, neg_real; | ||
75 | + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
76 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
77 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
78 | + uint32_t negf_real = flip ^ negf_imag; | ||
79 | + float32 negx_imag, negx_real; | ||
80 | uint64_t *g = vg; | ||
81 | |||
82 | - neg_imag = float32_set_sign(0, (rot & 2) != 0); | ||
83 | - neg_real = float32_set_sign(0, rot == 1 || rot == 2); | ||
84 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
85 | + negx_real = (negf_real & ~fpcr_ah) << 31; | ||
86 | + negx_imag = (negf_imag & ~fpcr_ah) << 31; | ||
87 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
88 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
89 | |||
90 | do { | ||
91 | uint64_t pg = g[(i - 1) >> 6]; | ||
92 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
93 | mi = *(float32 *)(vm + H1_2(j)); | ||
94 | |||
95 | e2 = (flip ? ni : nr); | ||
96 | - e1 = (flip ? mi : mr) ^ neg_real; | ||
97 | + e1 = (flip ? mi : mr) ^ negx_real; | ||
98 | e4 = e2; | ||
99 | - e3 = (flip ? mr : mi) ^ neg_imag; | ||
100 | + e3 = (flip ? mr : mi) ^ negx_imag; | ||
101 | |||
102 | if (likely((pg >> (i & 63)) & 1)) { | ||
103 | d = *(float32 *)(va + H1_2(i)); | ||
104 | - d = float32_muladd(e2, e1, d, 0, status); | ||
105 | + d = float32_muladd(e2, e1, d, negf_real, status); | ||
106 | *(float32 *)(vd + H1_2(i)) = d; | ||
107 | } | ||
108 | if (likely((pg >> (j & 63)) & 1)) { | ||
109 | d = *(float32 *)(va + H1_2(j)); | ||
110 | - d = float32_muladd(e4, e3, d, 0, status); | ||
111 | + d = float32_muladd(e4, e3, d, negf_imag, status); | ||
112 | *(float32 *)(vd + H1_2(j)) = d; | ||
113 | } | ||
114 | } while (i & 63); | ||
115 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
116 | void *vg, float_status *status, uint32_t desc) | ||
117 | { | ||
118 | intptr_t j, i = simd_oprsz(desc); | ||
119 | - unsigned rot = simd_data(desc); | ||
120 | - bool flip = rot & 1; | ||
121 | - float64 neg_imag, neg_real; | ||
122 | + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
123 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
124 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
125 | + uint32_t negf_real = flip ^ negf_imag; | ||
126 | + float64 negx_imag, negx_real; | ||
127 | uint64_t *g = vg; | ||
128 | |||
129 | - neg_imag = float64_set_sign(0, (rot & 2) != 0); | ||
130 | - neg_real = float64_set_sign(0, rot == 1 || rot == 2); | ||
131 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
132 | + negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63; | ||
133 | + negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63; | ||
134 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
135 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
136 | |||
137 | do { | ||
138 | uint64_t pg = g[(i - 1) >> 6]; | ||
139 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
140 | mi = *(float64 *)(vm + H1_2(j)); | ||
141 | |||
142 | e2 = (flip ? ni : nr); | ||
143 | - e1 = (flip ? mi : mr) ^ neg_real; | ||
144 | + e1 = (flip ? mi : mr) ^ negx_real; | ||
145 | e4 = e2; | ||
146 | - e3 = (flip ? mr : mi) ^ neg_imag; | ||
147 | + e3 = (flip ? mr : mi) ^ negx_imag; | ||
148 | |||
149 | if (likely((pg >> (i & 63)) & 1)) { | ||
150 | d = *(float64 *)(va + H1_2(i)); | ||
151 | - d = float64_muladd(e2, e1, d, 0, status); | ||
152 | + d = float64_muladd(e2, e1, d, negf_real, status); | ||
153 | *(float64 *)(vd + H1_2(i)) = d; | ||
154 | } | ||
155 | if (likely((pg >> (j & 63)) & 1)) { | ||
156 | d = *(float64 *)(va + H1_2(j)); | ||
157 | - d = float64_muladd(e4, e3, d, 0, status); | ||
158 | + d = float64_muladd(e4, e3, d, negf_imag, status); | ||
159 | *(float64 *)(vd + H1_2(j)) = d; | ||
160 | } | ||
161 | } while (i & 63); | ||
162 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
163 | index XXXXXXX..XXXXXXX 100644 | ||
164 | --- a/target/arm/tcg/translate-sve.c | ||
165 | +++ b/target/arm/tcg/translate-sve.c | ||
166 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { | ||
167 | gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, | ||
168 | }; | ||
169 | TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], | ||
170 | - a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, | ||
171 | + a->rd, a->rn, a->rm, a->ra, a->pg, a->rot | (s->fpcr_ah << 2), | ||
172 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
173 | |||
174 | static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { | ||
27 | -- | 175 | -- |
28 | 2.34.1 | 176 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Jean-Philippe Brucker <jean-philippe@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | In realm state, stage-2 translation tables are fetched from the realm | 3 | Handle FPCR.AH's requirement to not negate the sign of a NaN |
4 | physical address space (R_PGRQD). | 4 | in FMLSL by element and vector, using the usual trick of |
5 | negating by XOR when AH=0 and by muladd flags when AH=1. | ||
5 | 6 | ||
6 | Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | 7 | Since we have the CPUARMState* in the helper anyway, we can |
8 | look directly at env->vfp.fpcr and don't need toa pass in the | ||
9 | FPCR.AH value via the SIMD data word. | ||
10 | |||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Message-id: 20250129013857.135256-31-richard.henderson@linaro.org | ||
13 | [PMM: commit message tweaked] | ||
7 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 14 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
8 | Message-id: 20230809123706.1842548-2-jean-philippe@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | --- | 16 | --- |
11 | target/arm/ptw.c | 26 ++++++++++++++++++-------- | 17 | target/arm/tcg/vec_helper.c | 71 ++++++++++++++++++++++++------------- |
12 | 1 file changed, 18 insertions(+), 8 deletions(-) | 18 | 1 file changed, 46 insertions(+), 25 deletions(-) |
13 | 19 | ||
14 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | 20 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
15 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/arm/ptw.c | 22 | --- a/target/arm/tcg/vec_helper.c |
17 | +++ b/target/arm/ptw.c | 23 | +++ b/target/arm/tcg/vec_helper.c |
18 | @@ -XXX,XX +XXX,XX @@ static ARMMMUIdx ptw_idx_for_stage_2(CPUARMState *env, ARMMMUIdx stage2idx) | 24 | @@ -XXX,XX +XXX,XX @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2) |
19 | 25 | */ | |
20 | /* | 26 | |
21 | * We're OK to check the current state of the CPU here because | 27 | static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, |
22 | - * (1) we always invalidate all TLBs when the SCR_EL3.NS bit changes | 28 | - uint32_t desc, bool fz16) |
23 | + * (1) we always invalidate all TLBs when the SCR_EL3.NS or SCR_EL3.NSE bit | 29 | + uint64_t negx, int negf, uint32_t desc, bool fz16) |
24 | + * changes. | 30 | { |
25 | * (2) there's no way to do a lookup that cares about Stage 2 for a | 31 | intptr_t i, oprsz = simd_oprsz(desc); |
26 | * different security state to the current one for AArch64, and AArch32 | 32 | - int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
27 | * never has a secure EL2. (AArch32 ATS12NSO[UP][RW] allow EL3 to do | 33 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
28 | * an NS stage 1+2 lookup while the NS bit is 0.) | 34 | int is_q = oprsz == 16; |
29 | */ | 35 | uint64_t n_4, m_4; |
30 | - if (!arm_is_secure_below_el3(env) || !arm_el_is_aa64(env, 3)) { | 36 | |
31 | + if (!arm_el_is_aa64(env, 3)) { | 37 | - /* Pre-load all of the f16 data, avoiding overlap issues. */ |
32 | return ARMMMUIdx_Phys_NS; | 38 | - n_4 = load4_f16(vn, is_q, is_2); |
39 | + /* | ||
40 | + * Pre-load all of the f16 data, avoiding overlap issues. | ||
41 | + * Negate all inputs for AH=0 FMLSL at once. | ||
42 | + */ | ||
43 | + n_4 = load4_f16(vn, is_q, is_2) ^ negx; | ||
44 | m_4 = load4_f16(vm, is_q, is_2); | ||
45 | |||
46 | - /* Negate all inputs for FMLSL at once. */ | ||
47 | - if (is_s) { | ||
48 | - n_4 ^= 0x8000800080008000ull; | ||
49 | - } | ||
50 | - | ||
51 | for (i = 0; i < oprsz / 4; i++) { | ||
52 | float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); | ||
53 | float32 m_1 = float16_to_float32_by_bits(m_4 >> (i * 16), fz16); | ||
54 | - d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst); | ||
55 | + d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst); | ||
33 | } | 56 | } |
34 | - if (stage2idx == ARMMMUIdx_Stage2_S) { | 57 | clear_tail(d, oprsz, simd_maxsz(desc)); |
35 | - s2walk_secure = !(env->cp15.vstcr_el2 & VSTCR_SW); | 58 | } |
36 | - } else { | 59 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, |
37 | - s2walk_secure = !(env->cp15.vtcr_el2 & VTCR_NSW); | 60 | void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, |
61 | CPUARMState *env, uint32_t desc) | ||
62 | { | ||
63 | - do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, desc, | ||
64 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
65 | + uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
66 | + | ||
67 | + do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, | ||
68 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
69 | } | ||
70 | |||
71 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
72 | CPUARMState *env, uint32_t desc) | ||
73 | { | ||
74 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, desc, | ||
75 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
76 | + uint64_t negx = 0; | ||
77 | + int negf = 0; | ||
78 | + | ||
79 | + if (is_s) { | ||
80 | + if (env->vfp.fpcr & FPCR_AH) { | ||
81 | + negf = float_muladd_negate_product; | ||
82 | + } else { | ||
83 | + negx = 0x8000800080008000ull; | ||
84 | + } | ||
85 | + } | ||
86 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
87 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); | ||
88 | } | ||
89 | |||
90 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
91 | } | ||
92 | |||
93 | static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, | ||
94 | - uint32_t desc, bool fz16) | ||
95 | + uint64_t negx, int negf, uint32_t desc, bool fz16) | ||
96 | { | ||
97 | intptr_t i, oprsz = simd_oprsz(desc); | ||
98 | - int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
99 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
100 | int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3); | ||
101 | int is_q = oprsz == 16; | ||
102 | uint64_t n_4; | ||
103 | float32 m_1; | ||
104 | |||
105 | - /* Pre-load all of the f16 data, avoiding overlap issues. */ | ||
106 | - n_4 = load4_f16(vn, is_q, is_2); | ||
107 | - | ||
108 | - /* Negate all inputs for FMLSL at once. */ | ||
109 | - if (is_s) { | ||
110 | - n_4 ^= 0x8000800080008000ull; | ||
38 | - } | 111 | - } |
39 | - return s2walk_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS; | 112 | - |
40 | 113 | + /* | |
41 | + switch (arm_security_space_below_el3(env)) { | 114 | + * Pre-load all of the f16 data, avoiding overlap issues. |
42 | + case ARMSS_NonSecure: | 115 | + * Negate all inputs for AH=0 FMLSL at once. |
43 | + return ARMMMUIdx_Phys_NS; | 116 | + */ |
44 | + case ARMSS_Realm: | 117 | + n_4 = load4_f16(vn, is_q, is_2) ^ negx; |
45 | + return ARMMMUIdx_Phys_Realm; | 118 | m_1 = float16_to_float32_by_bits(((float16 *)vm)[H2(index)], fz16); |
46 | + case ARMSS_Secure: | 119 | |
47 | + if (stage2idx == ARMMMUIdx_Stage2_S) { | 120 | for (i = 0; i < oprsz / 4; i++) { |
48 | + s2walk_secure = !(env->cp15.vstcr_el2 & VSTCR_SW); | 121 | float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); |
122 | - d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst); | ||
123 | + d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst); | ||
124 | } | ||
125 | clear_tail(d, oprsz, simd_maxsz(desc)); | ||
126 | } | ||
127 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, | ||
128 | void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | ||
129 | CPUARMState *env, uint32_t desc) | ||
130 | { | ||
131 | - do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, desc, | ||
132 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
133 | + uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
134 | + | ||
135 | + do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, | ||
136 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
137 | } | ||
138 | |||
139 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
140 | CPUARMState *env, uint32_t desc) | ||
141 | { | ||
142 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, desc, | ||
143 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
144 | + uint64_t negx = 0; | ||
145 | + int negf = 0; | ||
146 | + | ||
147 | + if (is_s) { | ||
148 | + if (env->vfp.fpcr & FPCR_AH) { | ||
149 | + negf = float_muladd_negate_product; | ||
49 | + } else { | 150 | + } else { |
50 | + s2walk_secure = !(env->cp15.vtcr_el2 & VTCR_NSW); | 151 | + negx = 0x8000800080008000ull; |
51 | + } | 152 | + } |
52 | + return s2walk_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS; | ||
53 | + default: | ||
54 | + g_assert_not_reached(); | ||
55 | + } | 153 | + } |
154 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
155 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); | ||
56 | } | 156 | } |
57 | 157 | ||
58 | static bool regime_translation_big_endian(CPUARMState *env, ARMMMUIdx mmu_idx) | ||
59 | -- | 158 | -- |
60 | 2.34.1 | 159 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Chris Laplante <chris@laplante.io> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | This is much better than just silently failing with OK. | 3 | Handle FPCR.AH's requirement to not negate the sign of a NaN in SVE |
4 | FMLSL (indexed), using the usual trick of negating by XOR when AH=0 | ||
5 | and by muladd flags when AH=1. | ||
4 | 6 | ||
5 | Signed-off-by: Chris Laplante <chris@laplante.io> | 7 | Since we have the CPUARMState* in the helper anyway, we can |
6 | Message-id: 20230728160324.1159090-6-chris@laplante.io | 8 | look directly at env->vfp.fpcr and don't need toa pass in the |
9 | FPCR.AH value via the SIMD data word. | ||
10 | |||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Message-id: 20250129013857.135256-32-richard.henderson@linaro.org | ||
13 | [PMM: commit message tweaked] | ||
7 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 14 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
9 | --- | 16 | --- |
10 | softmmu/qtest.c | 12 ++++++++++-- | 17 | target/arm/tcg/vec_helper.c | 15 ++++++++++++--- |
11 | 1 file changed, 10 insertions(+), 2 deletions(-) | 18 | 1 file changed, 12 insertions(+), 3 deletions(-) |
12 | 19 | ||
13 | diff --git a/softmmu/qtest.c b/softmmu/qtest.c | 20 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
14 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/softmmu/qtest.c | 22 | --- a/target/arm/tcg/vec_helper.c |
16 | +++ b/softmmu/qtest.c | 23 | +++ b/target/arm/tcg/vec_helper.c |
17 | @@ -XXX,XX +XXX,XX @@ static void qtest_process_command(CharBackend *chr, gchar **words) | 24 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, |
18 | NamedGPIOList *ngl; | 25 | CPUARMState *env, uint32_t desc) |
19 | bool is_named; | 26 | { |
20 | bool is_outbound; | 27 | intptr_t i, j, oprsz = simd_oprsz(desc); |
21 | + bool interception_succeeded = false; | 28 | - uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; |
22 | 29 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | |
23 | g_assert(words[1]); | 30 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
24 | is_named = words[2] != NULL; | 31 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); |
25 | @@ -XXX,XX +XXX,XX @@ static void qtest_process_command(CharBackend *chr, gchar **words) | 32 | float_status *status = &env->vfp.fp_status_a64; |
26 | for (i = 0; i < ngl->num_out; ++i) { | 33 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); |
27 | qtest_install_gpio_out_intercept(dev, ngl->name, i); | 34 | + int negx = 0, negf = 0; |
28 | } | 35 | + |
29 | + interception_succeeded = true; | 36 | + if (is_s) { |
30 | } | 37 | + if (env->vfp.fpcr & FPCR_AH) { |
31 | } else { | 38 | + negf = float_muladd_negate_product; |
32 | qemu_irq_intercept_in(ngl->in, qtest_irq_handler, | 39 | + } else { |
33 | ngl->num_in); | 40 | + negx = 0x8000; |
34 | + interception_succeeded = true; | 41 | + } |
35 | } | 42 | + } |
43 | |||
44 | for (i = 0; i < oprsz; i += 16) { | ||
45 | float16 mm_16 = *(float16 *)(vm + i + idx); | ||
46 | float32 mm = float16_to_float32_by_bits(mm_16, fz16); | ||
47 | |||
48 | for (j = 0; j < 16; j += sizeof(float32)) { | ||
49 | - float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negn; | ||
50 | + float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negx; | ||
51 | float32 nn = float16_to_float32_by_bits(nn_16, fz16); | ||
52 | float32 aa = *(float32 *)(va + H1_4(i + j)); | ||
53 | |||
54 | *(float32 *)(vd + H1_4(i + j)) = | ||
55 | - float32_muladd(nn, mm, aa, 0, status); | ||
56 | + float32_muladd(nn, mm, aa, negf, status); | ||
36 | } | 57 | } |
37 | - irq_intercept_dev = dev; | 58 | } |
38 | + | 59 | } |
39 | qtest_send_prefix(chr); | ||
40 | - qtest_send(chr, "OK\n"); | ||
41 | + if (interception_succeeded) { | ||
42 | + irq_intercept_dev = dev; | ||
43 | + qtest_send(chr, "OK\n"); | ||
44 | + } else { | ||
45 | + qtest_send(chr, "FAIL No intercepts installed\n"); | ||
46 | + } | ||
47 | } else if (strcmp(words[0], "set_irq_in") == 0) { | ||
48 | DeviceState *dev; | ||
49 | qemu_irq irq; | ||
50 | -- | 60 | -- |
51 | 2.34.1 | 61 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Chris Laplante <chris@laplante.io> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Adds qtest_irq_intercept_out_named method, which utilizes a new optional | 3 | Handle FPCR.AH's requirement to not negate the sign of a NaN in SVE |
4 | name parameter to the irq_intercept_out qtest command. | 4 | FMLSL (indexed), using the usual trick of negating by XOR when AH=0 |
5 | and by muladd flags when AH=1. | ||
5 | 6 | ||
6 | Signed-off-by: Chris Laplante <chris@laplante.io> | 7 | Since we have the CPUARMState* in the helper anyway, we can |
7 | Message-id: 20230728160324.1159090-4-chris@laplante.io | 8 | look directly at env->vfp.fpcr and don't need toa pass in the |
9 | FPCR.AH value via the SIMD data word. | ||
10 | |||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Message-id: 20250129013857.135256-33-richard.henderson@linaro.org | ||
13 | [PMM: tweaked commit message] | ||
8 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 14 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | --- | 16 | --- |
11 | tests/qtest/libqtest.h | 11 +++++++++++ | 17 | target/arm/tcg/vec_helper.c | 15 ++++++++++++--- |
12 | softmmu/qtest.c | 18 ++++++++++-------- | 18 | 1 file changed, 12 insertions(+), 3 deletions(-) |
13 | tests/qtest/libqtest.c | 6 ++++++ | ||
14 | 3 files changed, 27 insertions(+), 8 deletions(-) | ||
15 | 19 | ||
16 | diff --git a/tests/qtest/libqtest.h b/tests/qtest/libqtest.h | 20 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
17 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/tests/qtest/libqtest.h | 22 | --- a/target/arm/tcg/vec_helper.c |
19 | +++ b/tests/qtest/libqtest.h | 23 | +++ b/target/arm/tcg/vec_helper.c |
20 | @@ -XXX,XX +XXX,XX @@ void qtest_irq_intercept_in(QTestState *s, const char *string); | 24 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
21 | */ | 25 | CPUARMState *env, uint32_t desc) |
22 | void qtest_irq_intercept_out(QTestState *s, const char *string); | 26 | { |
23 | 27 | intptr_t i, oprsz = simd_oprsz(desc); | |
24 | +/** | 28 | - uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; |
25 | + * qtest_irq_intercept_out_named: | 29 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
26 | + * @s: #QTestState instance to operate on. | 30 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
27 | + * @qom_path: QOM path of a device. | 31 | float_status *status = &env->vfp.fp_status_a64; |
28 | + * @name: Name of the GPIO out pin | 32 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); |
29 | + * | 33 | + int negx = 0, negf = 0; |
30 | + * Associate a qtest irq with the named GPIO-out pin of the device | ||
31 | + * whose path is specified by @string and whose name is @name. | ||
32 | + */ | ||
33 | +void qtest_irq_intercept_out_named(QTestState *s, const char *qom_path, const char *name); | ||
34 | + | 34 | + |
35 | /** | 35 | + if (is_s) { |
36 | * qtest_set_irq_in: | 36 | + if (env->vfp.fpcr & FPCR_AH) { |
37 | * @s: QTestState instance to operate on. | 37 | + negf = float_muladd_negate_product; |
38 | diff --git a/softmmu/qtest.c b/softmmu/qtest.c | 38 | + } else { |
39 | index XXXXXXX..XXXXXXX 100644 | 39 | + negx = 0x8000; |
40 | --- a/softmmu/qtest.c | 40 | + } |
41 | +++ b/softmmu/qtest.c | 41 | + } |
42 | @@ -XXX,XX +XXX,XX @@ static void qtest_process_command(CharBackend *chr, gchar **words) | 42 | |
43 | || strcmp(words[0], "irq_intercept_in") == 0) { | 43 | for (i = 0; i < oprsz; i += sizeof(float32)) { |
44 | DeviceState *dev; | 44 | - float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negn; |
45 | NamedGPIOList *ngl; | 45 | + float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negx; |
46 | + bool is_outbound; | 46 | float16 mm_16 = *(float16 *)(vm + H1_2(i + sel)); |
47 | 47 | float32 nn = float16_to_float32_by_bits(nn_16, fz16); | |
48 | g_assert(words[1]); | 48 | float32 mm = float16_to_float32_by_bits(mm_16, fz16); |
49 | + is_outbound = words[0][14] == 'o'; | 49 | float32 aa = *(float32 *)(va + H1_4(i)); |
50 | dev = DEVICE(object_resolve_path(words[1], NULL)); | 50 | |
51 | if (!dev) { | 51 | - *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, 0, status); |
52 | qtest_send_prefix(chr); | 52 | + *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, negf, status); |
53 | @@ -XXX,XX +XXX,XX @@ static void qtest_process_command(CharBackend *chr, gchar **words) | 53 | } |
54 | } | ||
55 | |||
56 | QLIST_FOREACH(ngl, &dev->gpios, node) { | ||
57 | - /* We don't support intercept of named GPIOs yet */ | ||
58 | - if (ngl->name) { | ||
59 | - continue; | ||
60 | - } | ||
61 | - if (words[0][14] == 'o') { | ||
62 | - int i; | ||
63 | - for (i = 0; i < ngl->num_out; ++i) { | ||
64 | - qtest_install_gpio_out_intercept(dev, ngl->name, i); | ||
65 | + /* We don't support inbound interception of named GPIOs yet */ | ||
66 | + if (is_outbound) { | ||
67 | + /* NULL is valid and matchable, for "unnamed GPIO" */ | ||
68 | + if (g_strcmp0(ngl->name, words[2]) == 0) { | ||
69 | + int i; | ||
70 | + for (i = 0; i < ngl->num_out; ++i) { | ||
71 | + qtest_install_gpio_out_intercept(dev, ngl->name, i); | ||
72 | + } | ||
73 | } | ||
74 | } else { | ||
75 | qemu_irq_intercept_in(ngl->in, qtest_irq_handler, | ||
76 | diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c | ||
77 | index XXXXXXX..XXXXXXX 100644 | ||
78 | --- a/tests/qtest/libqtest.c | ||
79 | +++ b/tests/qtest/libqtest.c | ||
80 | @@ -XXX,XX +XXX,XX @@ void qtest_irq_intercept_out(QTestState *s, const char *qom_path) | ||
81 | qtest_rsp(s); | ||
82 | } | 54 | } |
83 | 55 | ||
84 | +void qtest_irq_intercept_out_named(QTestState *s, const char *qom_path, const char *name) | ||
85 | +{ | ||
86 | + qtest_sendf(s, "irq_intercept_out %s %s\n", qom_path, name); | ||
87 | + qtest_rsp(s); | ||
88 | +} | ||
89 | + | ||
90 | void qtest_irq_intercept_in(QTestState *s, const char *qom_path) | ||
91 | { | ||
92 | qtest_sendf(s, "irq_intercept_in %s\n", qom_path); | ||
93 | -- | 56 | -- |
94 | 2.34.1 | 57 | 2.34.1 | diff view generated by jsdifflib |
1 | When we do a translation in Secure state, the NSTable bits in table | 1 | Now that we have completed the handling for FPCR.{AH,FIZ,NEP}, we |
---|---|---|---|
2 | descriptors may downgrade us to NonSecure; we update ptw->in_secure | 2 | can enable FEAT_AFP for '-cpu max', and document that we support it. |
3 | and ptw->in_space accordingly. We guard that check correctly with a | ||
4 | conditional that means it's only applied for Secure stage 1 | ||
5 | translations. However, later on in get_phys_addr_lpae() we fold the | ||
6 | effects of the NSTable bits into the final descriptor attributes | ||
7 | bits, and there we do it unconditionally regardless of the CPU state. | ||
8 | That means that in Realm state (where in_secure is false) we will set | ||
9 | bit 5 in attrs, and later use it to decide to output to non-secure | ||
10 | space. | ||
11 | |||
12 | We don't in fact need to do this folding in at all any more (since | ||
13 | commit 2f1ff4e7b9f30c): if an NSTable bit was set then we have | ||
14 | already set ptw->in_space to ARMSS_NonSecure, and in that situation | ||
15 | we don't look at attrs bit 5. The only thing we still need to deal | ||
16 | with is the real NS bit in the final descriptor word, so we can just | ||
17 | drop the code that ORed in the NSTable bit. | ||
18 | 3 | ||
19 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
20 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
21 | Message-id: 20230807141514.19075-9-peter.maydell@linaro.org | ||
22 | --- | 6 | --- |
23 | target/arm/ptw.c | 3 +-- | 7 | docs/system/arm/emulation.rst | 1 + |
24 | 1 file changed, 1 insertion(+), 2 deletions(-) | 8 | target/arm/tcg/cpu64.c | 1 + |
9 | 2 files changed, 2 insertions(+) | ||
25 | 10 | ||
26 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | 11 | diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst |
27 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
28 | --- a/target/arm/ptw.c | 13 | --- a/docs/system/arm/emulation.rst |
29 | +++ b/target/arm/ptw.c | 14 | +++ b/docs/system/arm/emulation.rst |
30 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, | 15 | @@ -XXX,XX +XXX,XX @@ the following architecture extensions: |
31 | * Extract attributes from the (modified) descriptor, and apply | 16 | - FEAT_AA64EL3 (Support for AArch64 at EL3) |
32 | * table descriptors. Stage 2 table descriptors do not include | 17 | - FEAT_AdvSIMD (Advanced SIMD Extension) |
33 | * any attribute fields. HPD disables all the table attributes | 18 | - FEAT_AES (AESD and AESE instructions) |
34 | - * except NSTable. | 19 | +- FEAT_AFP (Alternate floating-point behavior) |
35 | + * except NSTable (which we have already handled). | 20 | - FEAT_Armv9_Crypto (Armv9 Cryptographic Extension) |
36 | */ | 21 | - FEAT_ASID16 (16 bit ASID) |
37 | attrs = new_descriptor & (MAKE_64BIT_MASK(2, 10) | MAKE_64BIT_MASK(50, 14)); | 22 | - FEAT_BBM at level 2 (Translation table break-before-make levels) |
38 | if (!regime_is_stage2(mmu_idx)) { | 23 | diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c |
39 | - attrs |= !ptw->in_secure << 5; /* NS */ | 24 | index XXXXXXX..XXXXXXX 100644 |
40 | if (!param.hpd) { | 25 | --- a/target/arm/tcg/cpu64.c |
41 | attrs |= extract64(tableattrs, 0, 2) << 53; /* XN, PXN */ | 26 | +++ b/target/arm/tcg/cpu64.c |
42 | /* | 27 | @@ -XXX,XX +XXX,XX @@ void aarch64_max_tcg_initfn(Object *obj) |
28 | t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1); /* FEAT_XNX */ | ||
29 | t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 2); /* FEAT_ETS2 */ | ||
30 | t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1); /* FEAT_HCX */ | ||
31 | + t = FIELD_DP64(t, ID_AA64MMFR1, AFP, 1); /* FEAT_AFP */ | ||
32 | t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1); /* FEAT_TIDCP1 */ | ||
33 | t = FIELD_DP64(t, ID_AA64MMFR1, CMOW, 1); /* FEAT_CMOW */ | ||
34 | cpu->isar.id_aa64mmfr1 = t; | ||
43 | -- | 35 | -- |
44 | 2.34.1 | 36 | 2.34.1 | diff view generated by jsdifflib |
1 | Plumb the ARMSecurityState through to regime_translation_disabled() | 1 | FEAT_RPRES implements an "increased precision" variant of the single |
---|---|---|---|
2 | rather than just a bool is_secure. | 2 | precision FRECPE and FRSQRTE instructions from an 8 bit to a 12 |
3 | bit mantissa. This applies only when FPCR.AH == 1. Note that the | ||
4 | halfprec and double versions of these insns retain the 8 bit | ||
5 | precision regardless. | ||
6 | |||
7 | In this commit we add all the plumbing to make these instructions | ||
8 | call a new helper function when the increased-precision is in | ||
9 | effect. In the following commit we will provide the actual change | ||
10 | in behaviour in the helpers. | ||
3 | 11 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Message-id: 20230807141514.19075-6-peter.maydell@linaro.org | ||
7 | --- | 14 | --- |
8 | target/arm/ptw.c | 15 ++++++++------- | 15 | target/arm/cpu-features.h | 5 +++++ |
9 | 1 file changed, 8 insertions(+), 7 deletions(-) | 16 | target/arm/helper.h | 4 ++++ |
17 | target/arm/tcg/translate-a64.c | 34 ++++++++++++++++++++++++++++++---- | ||
18 | target/arm/tcg/translate-sve.c | 16 ++++++++++++++-- | ||
19 | target/arm/tcg/vec_helper.c | 2 ++ | ||
20 | target/arm/vfp_helper.c | 32 ++++++++++++++++++++++++++++++-- | ||
21 | 6 files changed, 85 insertions(+), 8 deletions(-) | ||
10 | 22 | ||
11 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | 23 | diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h |
12 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/target/arm/ptw.c | 25 | --- a/target/arm/cpu-features.h |
14 | +++ b/target/arm/ptw.c | 26 | +++ b/target/arm/cpu-features.h |
15 | @@ -XXX,XX +XXX,XX @@ static uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn) | 27 | @@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_mops(const ARMISARegisters *id) |
16 | 28 | return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, MOPS); | |
17 | /* Return true if the specified stage of address translation is disabled */ | 29 | } |
18 | static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx, | 30 | |
19 | - bool is_secure) | 31 | +static inline bool isar_feature_aa64_rpres(const ARMISARegisters *id) |
20 | + ARMSecuritySpace space) | 32 | +{ |
21 | { | 33 | + return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, RPRES); |
22 | uint64_t hcr_el2; | 34 | +} |
23 | + bool is_secure = arm_space_is_secure(space); | 35 | + |
24 | 36 | static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id) | |
25 | if (arm_feature(env, ARM_FEATURE_M)) { | 37 | { |
26 | switch (env->v7m.mpu_ctrl[is_secure] & | 38 | /* We always set the AdvSIMD and FP fields identically. */ |
27 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_pmsav5(CPUARMState *env, | 39 | diff --git a/target/arm/helper.h b/target/arm/helper.h |
28 | uint32_t base; | 40 | index XXXXXXX..XXXXXXX 100644 |
29 | ARMMMUIdx mmu_idx = ptw->in_mmu_idx; | 41 | --- a/target/arm/helper.h |
30 | bool is_user = regime_is_user(env, mmu_idx); | 42 | +++ b/target/arm/helper.h |
31 | - bool is_secure = arm_space_is_secure(ptw->in_space); | 43 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, fpst) |
32 | 44 | ||
33 | - if (regime_translation_disabled(env, mmu_idx, is_secure)) { | 45 | DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, fpst) |
34 | + if (regime_translation_disabled(env, mmu_idx, ptw->in_space)) { | 46 | DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, fpst) |
35 | /* MPU disabled. */ | 47 | +DEF_HELPER_FLAGS_2(recpe_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst) |
36 | result->f.phys_addr = address; | 48 | DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, fpst) |
37 | result->f.prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; | 49 | DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, fpst) |
38 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_pmsav7(CPUARMState *env, | 50 | DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, fpst) |
39 | result->f.lg_page_size = TARGET_PAGE_BITS; | 51 | +DEF_HELPER_FLAGS_2(rsqrte_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst) |
40 | result->f.prot = 0; | 52 | DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, fpst) |
41 | 53 | DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32) | |
42 | - if (regime_translation_disabled(env, mmu_idx, secure) || | 54 | DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32) |
43 | + if (regime_translation_disabled(env, mmu_idx, ptw->in_space) || | 55 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_vrintx_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
44 | m_is_ppb_region(env, address)) { | 56 | |
45 | /* | 57 | DEF_HELPER_FLAGS_4(gvec_frecpe_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
46 | * MPU disabled or M profile PPB access: use default memory map. | 58 | DEF_HELPER_FLAGS_4(gvec_frecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
47 | @@ -XXX,XX +XXX,XX @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, | 59 | +DEF_HELPER_FLAGS_4(gvec_frecpe_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
48 | * are done in arm_v7m_load_vector(), which always does a direct | 60 | DEF_HELPER_FLAGS_4(gvec_frecpe_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
49 | * read using address_space_ldl(), rather than going via this function. | 61 | |
50 | */ | 62 | DEF_HELPER_FLAGS_4(gvec_frsqrte_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
51 | - if (regime_translation_disabled(env, mmu_idx, secure)) { /* MPU disabled */ | 63 | DEF_HELPER_FLAGS_4(gvec_frsqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
52 | + if (regime_translation_disabled(env, mmu_idx, arm_secure_to_space(secure))) { | 64 | +DEF_HELPER_FLAGS_4(gvec_frsqrte_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
53 | + /* MPU disabled */ | 65 | DEF_HELPER_FLAGS_4(gvec_frsqrte_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
54 | hit = true; | 66 | |
55 | } else if (m_is_ppb_region(env, address)) { | 67 | DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
56 | hit = true; | 68 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
57 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, | 69 | index XXXXXXX..XXXXXXX 100644 |
58 | */ | 70 | --- a/target/arm/tcg/translate-a64.c |
59 | ptw->in_mmu_idx = mmu_idx = s1_mmu_idx; | 71 | +++ b/target/arm/tcg/translate-a64.c |
60 | if (arm_feature(env, ARM_FEATURE_EL2) && | 72 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frecpe = { |
61 | - !regime_translation_disabled(env, ARMMMUIdx_Stage2, is_secure)) { | 73 | gen_helper_recpe_f32, |
62 | + !regime_translation_disabled(env, ARMMMUIdx_Stage2, ptw->in_space)) { | 74 | gen_helper_recpe_f64, |
63 | return get_phys_addr_twostage(env, ptw, address, access_type, | 75 | }; |
64 | result, fi); | 76 | -TRANS(FRECPE_s, do_fp1_scalar_ah, a, &f_scalar_frecpe, -1) |
65 | } | 77 | +static const FPScalar1 f_scalar_frecpe_rpres = { |
66 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, | 78 | + gen_helper_recpe_f16, |
67 | 79 | + gen_helper_recpe_rpres_f32, | |
68 | /* Definitely a real MMU, not an MPU */ | 80 | + gen_helper_recpe_f64, |
69 | 81 | +}; | |
70 | - if (regime_translation_disabled(env, mmu_idx, is_secure)) { | 82 | +TRANS(FRECPE_s, do_fp1_scalar_ah, a, |
71 | + if (regime_translation_disabled(env, mmu_idx, ptw->in_space)) { | 83 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? |
72 | return get_phys_addr_disabled(env, ptw, address, access_type, | 84 | + &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1) |
73 | result, fi); | 85 | |
74 | } | 86 | static const FPScalar1 f_scalar_frecpx = { |
87 | gen_helper_frecpx_f16, | ||
88 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frsqrte = { | ||
89 | gen_helper_rsqrte_f32, | ||
90 | gen_helper_rsqrte_f64, | ||
91 | }; | ||
92 | -TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, &f_scalar_frsqrte, -1) | ||
93 | +static const FPScalar1 f_scalar_frsqrte_rpres = { | ||
94 | + gen_helper_rsqrte_f16, | ||
95 | + gen_helper_rsqrte_rpres_f32, | ||
96 | + gen_helper_rsqrte_f64, | ||
97 | +}; | ||
98 | +TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, | ||
99 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? | ||
100 | + &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1) | ||
101 | |||
102 | static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) | ||
103 | { | ||
104 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const f_frecpe[] = { | ||
105 | gen_helper_gvec_frecpe_s, | ||
106 | gen_helper_gvec_frecpe_d, | ||
107 | }; | ||
108 | -TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) | ||
109 | +static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = { | ||
110 | + gen_helper_gvec_frecpe_h, | ||
111 | + gen_helper_gvec_frecpe_rpres_s, | ||
112 | + gen_helper_gvec_frecpe_d, | ||
113 | +}; | ||
114 | +TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, | ||
115 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe) | ||
116 | |||
117 | static gen_helper_gvec_2_ptr * const f_frsqrte[] = { | ||
118 | gen_helper_gvec_frsqrte_h, | ||
119 | gen_helper_gvec_frsqrte_s, | ||
120 | gen_helper_gvec_frsqrte_d, | ||
121 | }; | ||
122 | -TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) | ||
123 | +static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = { | ||
124 | + gen_helper_gvec_frsqrte_h, | ||
125 | + gen_helper_gvec_frsqrte_rpres_s, | ||
126 | + gen_helper_gvec_frsqrte_d, | ||
127 | +}; | ||
128 | +TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, | ||
129 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte) | ||
130 | |||
131 | static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) | ||
132 | { | ||
133 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
134 | index XXXXXXX..XXXXXXX 100644 | ||
135 | --- a/target/arm/tcg/translate-sve.c | ||
136 | +++ b/target/arm/tcg/translate-sve.c | ||
137 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const frecpe_fns[] = { | ||
138 | NULL, gen_helper_gvec_frecpe_h, | ||
139 | gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, | ||
140 | }; | ||
141 | -TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frecpe_fns[a->esz], a, 0) | ||
142 | +static gen_helper_gvec_2_ptr * const frecpe_rpres_fns[] = { | ||
143 | + NULL, gen_helper_gvec_frecpe_h, | ||
144 | + gen_helper_gvec_frecpe_rpres_s, gen_helper_gvec_frecpe_d, | ||
145 | +}; | ||
146 | +TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, | ||
147 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? | ||
148 | + frecpe_rpres_fns[a->esz] : frecpe_fns[a->esz], a, 0) | ||
149 | |||
150 | static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { | ||
151 | NULL, gen_helper_gvec_frsqrte_h, | ||
152 | gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, | ||
153 | }; | ||
154 | -TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frsqrte_fns[a->esz], a, 0) | ||
155 | +static gen_helper_gvec_2_ptr * const frsqrte_rpres_fns[] = { | ||
156 | + NULL, gen_helper_gvec_frsqrte_h, | ||
157 | + gen_helper_gvec_frsqrte_rpres_s, gen_helper_gvec_frsqrte_d, | ||
158 | +}; | ||
159 | +TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, | ||
160 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? | ||
161 | + frsqrte_rpres_fns[a->esz] : frsqrte_fns[a->esz], a, 0) | ||
162 | |||
163 | /* | ||
164 | *** SVE Floating Point Compare with Zero Group | ||
165 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
166 | index XXXXXXX..XXXXXXX 100644 | ||
167 | --- a/target/arm/tcg/vec_helper.c | ||
168 | +++ b/target/arm/tcg/vec_helper.c | ||
169 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, float_status *stat, uint32_t desc) \ | ||
170 | |||
171 | DO_2OP(gvec_frecpe_h, helper_recpe_f16, float16) | ||
172 | DO_2OP(gvec_frecpe_s, helper_recpe_f32, float32) | ||
173 | +DO_2OP(gvec_frecpe_rpres_s, helper_recpe_rpres_f32, float32) | ||
174 | DO_2OP(gvec_frecpe_d, helper_recpe_f64, float64) | ||
175 | |||
176 | DO_2OP(gvec_frsqrte_h, helper_rsqrte_f16, float16) | ||
177 | DO_2OP(gvec_frsqrte_s, helper_rsqrte_f32, float32) | ||
178 | +DO_2OP(gvec_frsqrte_rpres_s, helper_rsqrte_rpres_f32, float32) | ||
179 | DO_2OP(gvec_frsqrte_d, helper_rsqrte_f64, float64) | ||
180 | |||
181 | DO_2OP(gvec_vrintx_h, float16_round_to_int, float16) | ||
182 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
183 | index XXXXXXX..XXXXXXX 100644 | ||
184 | --- a/target/arm/vfp_helper.c | ||
185 | +++ b/target/arm/vfp_helper.c | ||
186 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst) | ||
187 | return make_float16(f16_val); | ||
188 | } | ||
189 | |||
190 | -float32 HELPER(recpe_f32)(float32 input, float_status *fpst) | ||
191 | +/* | ||
192 | + * FEAT_RPRES means the f32 FRECPE has an "increased precision" variant | ||
193 | + * which is used when FPCR.AH == 1. | ||
194 | + */ | ||
195 | +static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres) | ||
196 | { | ||
197 | float32 f32 = float32_squash_input_denormal(input, fpst); | ||
198 | uint32_t f32_val = float32_val(f32); | ||
199 | @@ -XXX,XX +XXX,XX @@ float32 HELPER(recpe_f32)(float32 input, float_status *fpst) | ||
200 | return make_float32(f32_val); | ||
201 | } | ||
202 | |||
203 | +float32 HELPER(recpe_f32)(float32 input, float_status *fpst) | ||
204 | +{ | ||
205 | + return do_recpe_f32(input, fpst, false); | ||
206 | +} | ||
207 | + | ||
208 | +float32 HELPER(recpe_rpres_f32)(float32 input, float_status *fpst) | ||
209 | +{ | ||
210 | + return do_recpe_f32(input, fpst, true); | ||
211 | +} | ||
212 | + | ||
213 | float64 HELPER(recpe_f64)(float64 input, float_status *fpst) | ||
214 | { | ||
215 | float64 f64 = float64_squash_input_denormal(input, fpst); | ||
216 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s) | ||
217 | return make_float16(val); | ||
218 | } | ||
219 | |||
220 | -float32 HELPER(rsqrte_f32)(float32 input, float_status *s) | ||
221 | +/* | ||
222 | + * FEAT_RPRES means the f32 FRSQRTE has an "increased precision" variant | ||
223 | + * which is used when FPCR.AH == 1. | ||
224 | + */ | ||
225 | +static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres) | ||
226 | { | ||
227 | float32 f32 = float32_squash_input_denormal(input, s); | ||
228 | uint32_t val = float32_val(f32); | ||
229 | @@ -XXX,XX +XXX,XX @@ float32 HELPER(rsqrte_f32)(float32 input, float_status *s) | ||
230 | return make_float32(val); | ||
231 | } | ||
232 | |||
233 | +float32 HELPER(rsqrte_f32)(float32 input, float_status *s) | ||
234 | +{ | ||
235 | + return do_rsqrte_f32(input, s, false); | ||
236 | +} | ||
237 | + | ||
238 | +float32 HELPER(rsqrte_rpres_f32)(float32 input, float_status *s) | ||
239 | +{ | ||
240 | + return do_rsqrte_f32(input, s, true); | ||
241 | +} | ||
242 | + | ||
243 | float64 HELPER(rsqrte_f64)(float64 input, float_status *s) | ||
244 | { | ||
245 | float64 f64 = float64_squash_input_denormal(input, s); | ||
75 | -- | 246 | -- |
76 | 2.34.1 | 247 | 2.34.1 | diff view generated by jsdifflib |
1 | We no longer look at the in_secure field of the S1Translate struct | 1 | Implement the increased precision variation of FRECPE. In the |
---|---|---|---|
2 | anyway, so we can remove it and all the code which sets it. | 2 | pseudocode this corresponds to the handling of the |
3 | "increasedprecision" boolean in the FPRecipEstimate() and | ||
4 | RecipEstimate() functions. | ||
3 | 5 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Message-id: 20230807141514.19075-11-peter.maydell@linaro.org | ||
7 | --- | 8 | --- |
8 | target/arm/ptw.c | 13 ------------- | 9 | target/arm/vfp_helper.c | 54 +++++++++++++++++++++++++++++++++++------ |
9 | 1 file changed, 13 deletions(-) | 10 | 1 file changed, 46 insertions(+), 8 deletions(-) |
10 | 11 | ||
11 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | 12 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
12 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/target/arm/ptw.c | 14 | --- a/target/arm/vfp_helper.c |
14 | +++ b/target/arm/ptw.c | 15 | +++ b/target/arm/vfp_helper.c |
15 | @@ -XXX,XX +XXX,XX @@ typedef struct S1Translate { | 16 | @@ -XXX,XX +XXX,XX @@ static int recip_estimate(int input) |
16 | * value being Stage2 vs Stage2_S distinguishes those. | 17 | return r; |
17 | */ | 18 | } |
18 | ARMSecuritySpace in_space; | 19 | |
19 | - /* | 20 | +/* |
20 | - * in_secure: whether the translation regime is a Secure one. | 21 | + * Increased precision version: |
21 | - * This is always equal to arm_space_is_secure(in_space). | 22 | + * input is a 13 bit fixed point number |
22 | - * If a Secure ptw is "downgraded" to NonSecure by an NSTable bit, | 23 | + * input range 2048 .. 4095 for a number from 0.5 <= x < 1.0. |
23 | - * this field is updated accordingly. | 24 | + * result range 4096 .. 8191 for a number from 1.0 to 2.0 |
24 | - */ | 25 | + */ |
25 | - bool in_secure; | 26 | +static int recip_estimate_incprec(int input) |
26 | /* | 27 | +{ |
27 | * in_debug: is this a QEMU debug access (gdbstub, etc)? Debug | 28 | + int a, b, r; |
28 | * accesses will not update the guest page table access flags | 29 | + assert(2048 <= input && input < 4096); |
29 | @@ -XXX,XX +XXX,XX @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, | 30 | + a = (input * 2) + 1; |
30 | S1Translate s2ptw = { | 31 | + /* |
31 | .in_mmu_idx = s2_mmu_idx, | 32 | + * The pseudocode expresses this as an operation on infinite |
32 | .in_ptw_idx = ptw_idx_for_stage_2(env, s2_mmu_idx), | 33 | + * precision reals where it calculates 2^25 / a and then looks |
33 | - .in_secure = arm_space_is_secure(s2_space), | 34 | + * at the error between that and the rounded-down-to-integer |
34 | .in_space = s2_space, | 35 | + * value to see if it should instead round up. We instead |
35 | .in_debug = true, | 36 | + * follow the same approach as the pseudocode for the 8-bit |
36 | }; | 37 | + * precision version, and calculate (2 * (2^25 / a)) as an |
37 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, | 38 | + * integer so we can do the "add one and halve" to round it. |
38 | QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_S + 1 != ARMMMUIdx_Phys_NS); | 39 | + * So the 1 << 26 here is correct. |
39 | QEMU_BUILD_BUG_ON(ARMMMUIdx_Stage2_S + 1 != ARMMMUIdx_Stage2); | 40 | + */ |
40 | ptw->in_ptw_idx += 1; | 41 | + b = (1 << 26) / a; |
41 | - ptw->in_secure = false; | 42 | + r = (b + 1) >> 1; |
42 | ptw->in_space = ARMSS_NonSecure; | 43 | + assert(4096 <= r && r < 8192); |
44 | + return r; | ||
45 | +} | ||
46 | + | ||
47 | /* | ||
48 | * Common wrapper to call recip_estimate | ||
49 | * | ||
50 | @@ -XXX,XX +XXX,XX @@ static int recip_estimate(int input) | ||
51 | * callee. | ||
52 | */ | ||
53 | |||
54 | -static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac) | ||
55 | +static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac, | ||
56 | + bool increasedprecision) | ||
57 | { | ||
58 | uint32_t scaled, estimate; | ||
59 | uint64_t result_frac; | ||
60 | @@ -XXX,XX +XXX,XX @@ static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac) | ||
61 | } | ||
43 | } | 62 | } |
44 | 63 | ||
45 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, | 64 | - /* scaled = UInt('1':fraction<51:44>) */ |
46 | 65 | - scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); | |
47 | ptw->in_s1_is_el0 = ptw->in_mmu_idx == ARMMMUIdx_Stage1_E0; | 66 | - estimate = recip_estimate(scaled); |
48 | ptw->in_mmu_idx = ipa_secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2; | 67 | + if (increasedprecision) { |
49 | - ptw->in_secure = ipa_secure; | 68 | + /* scaled = UInt('1':fraction<51:41>) */ |
50 | ptw->in_space = ipa_space; | 69 | + scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11)); |
51 | ptw->in_ptw_idx = ptw_idx_for_stage_2(env, ptw->in_mmu_idx); | 70 | + estimate = recip_estimate_incprec(scaled); |
52 | 71 | + } else { | |
53 | @@ -XXX,XX +XXX,XX @@ bool get_phys_addr_with_secure(CPUARMState *env, target_ulong address, | 72 | + /* scaled = UInt('1':fraction<51:44>) */ |
54 | { | 73 | + scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); |
55 | S1Translate ptw = { | 74 | + estimate = recip_estimate(scaled); |
56 | .in_mmu_idx = mmu_idx, | 75 | + } |
57 | - .in_secure = is_secure, | 76 | |
58 | .in_space = arm_secure_to_space(is_secure), | 77 | result_exp = exp_off - *exp; |
59 | }; | 78 | - result_frac = deposit64(0, 44, 8, estimate); |
60 | return get_phys_addr_gpc(env, &ptw, address, access_type, result, fi); | 79 | + if (increasedprecision) { |
61 | @@ -XXX,XX +XXX,XX @@ bool get_phys_addr(CPUARMState *env, target_ulong address, | 80 | + result_frac = deposit64(0, 40, 12, estimate); |
81 | + } else { | ||
82 | + result_frac = deposit64(0, 44, 8, estimate); | ||
83 | + } | ||
84 | if (result_exp == 0) { | ||
85 | result_frac = deposit64(result_frac >> 1, 51, 1, 1); | ||
86 | } else if (result_exp == -1) { | ||
87 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst) | ||
62 | } | 88 | } |
63 | 89 | ||
64 | ptw.in_space = ss; | 90 | f64_frac = call_recip_estimate(&f16_exp, 29, |
65 | - ptw.in_secure = arm_space_is_secure(ss); | 91 | - ((uint64_t) f16_frac) << (52 - 10)); |
66 | return get_phys_addr_gpc(env, &ptw, address, access_type, result, fi); | 92 | + ((uint64_t) f16_frac) << (52 - 10), false); |
67 | } | 93 | |
68 | 94 | /* result = sign : result_exp<4:0> : fraction<51:42> */ | |
69 | @@ -XXX,XX +XXX,XX @@ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, | 95 | f16_val = deposit32(0, 15, 1, f16_sign); |
70 | S1Translate ptw = { | 96 | @@ -XXX,XX +XXX,XX @@ static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres) |
71 | .in_mmu_idx = mmu_idx, | 97 | } |
72 | .in_space = ss, | 98 | |
73 | - .in_secure = arm_space_is_secure(ss), | 99 | f64_frac = call_recip_estimate(&f32_exp, 253, |
74 | .in_debug = true, | 100 | - ((uint64_t) f32_frac) << (52 - 23)); |
75 | }; | 101 | + ((uint64_t) f32_frac) << (52 - 23), rpres); |
76 | GetPhysAddrResult res = {}; | 102 | |
103 | /* result = sign : result_exp<7:0> : fraction<51:29> */ | ||
104 | f32_val = deposit32(0, 31, 1, f32_sign); | ||
105 | @@ -XXX,XX +XXX,XX @@ float64 HELPER(recpe_f64)(float64 input, float_status *fpst) | ||
106 | return float64_set_sign(float64_zero, float64_is_neg(f64)); | ||
107 | } | ||
108 | |||
109 | - f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac); | ||
110 | + f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac, false); | ||
111 | |||
112 | /* result = sign : result_exp<10:0> : fraction<51:0>; */ | ||
113 | f64_val = deposit64(0, 63, 1, f64_sign); | ||
77 | -- | 114 | -- |
78 | 2.34.1 | 115 | 2.34.1 | diff view generated by jsdifflib |
1 | The architecture doesn't permit block descriptors at any arbitrary | 1 | Implement the increased precision variation of FRSQRTE. In the |
---|---|---|---|
2 | level of the page table walk; it depends on the granule size which | 2 | pseudocode this corresponds to the handling of the |
3 | levels are permitted. We implemented only a partial version of this | 3 | "increasedprecision" boolean in the FPRSqrtEstimate() and |
4 | check which assumes that block descriptors are valid at all levels | 4 | RecipSqrtEstimate() functions. |
5 | except level 3, which meant that we wouldn't deliver the Translation | ||
6 | fault for all cases of this sort of guest page table error. | ||
7 | |||
8 | Implement the logic corresponding to the pseudocode | ||
9 | AArch64.DecodeDescriptorType() and AArch64.BlockDescSupported(). | ||
10 | 5 | ||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
13 | Message-id: 20230807141514.19075-14-peter.maydell@linaro.org | ||
14 | --- | 8 | --- |
15 | target/arm/ptw.c | 25 +++++++++++++++++++++++-- | 9 | target/arm/vfp_helper.c | 77 ++++++++++++++++++++++++++++++++++------- |
16 | 1 file changed, 23 insertions(+), 2 deletions(-) | 10 | 1 file changed, 64 insertions(+), 13 deletions(-) |
17 | 11 | ||
18 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | 12 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
19 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/target/arm/ptw.c | 14 | --- a/target/arm/vfp_helper.c |
21 | +++ b/target/arm/ptw.c | 15 | +++ b/target/arm/vfp_helper.c |
22 | @@ -XXX,XX +XXX,XX @@ static int check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, uint64_t tcr, | 16 | @@ -XXX,XX +XXX,XX @@ static int do_recip_sqrt_estimate(int a) |
23 | return INT_MIN; | 17 | return estimate; |
24 | } | 18 | } |
25 | 19 | ||
26 | +static bool lpae_block_desc_valid(ARMCPU *cpu, bool ds, | 20 | +static int do_recip_sqrt_estimate_incprec(int a) |
27 | + ARMGranuleSize gran, int level) | ||
28 | +{ | 21 | +{ |
29 | + /* | 22 | + /* |
30 | + * See pseudocode AArch46.BlockDescSupported(): block descriptors | 23 | + * The Arm ARM describes the 12-bit precision version of RecipSqrtEstimate |
31 | + * are not valid at all levels, depending on the page size. | 24 | + * in terms of an infinite-precision floating point calculation of a |
25 | + * square root. We implement this using the same kind of pure integer | ||
26 | + * algorithm as the 8-bit mantissa, to get the same bit-for-bit result. | ||
32 | + */ | 27 | + */ |
33 | + switch (gran) { | 28 | + int64_t b, estimate; |
34 | + case Gran4K: | 29 | |
35 | + return (level == 0 && ds) || level == 1 || level == 2; | 30 | -static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac) |
36 | + case Gran16K: | 31 | + assert(1024 <= a && a < 4096); |
37 | + return (level == 1 && ds) || level == 2; | 32 | + if (a < 2048) { |
38 | + case Gran64K: | 33 | + a = a * 2 + 1; |
39 | + return (level == 1 && arm_pamax(cpu) == 52) || level == 2; | 34 | + } else { |
40 | + default: | 35 | + a = (a >> 1) << 1; |
41 | + g_assert_not_reached(); | 36 | + a = (a + 1) * 2; |
42 | + } | 37 | + } |
38 | + b = 8192; | ||
39 | + while (a * (b + 1) * (b + 1) < (1ULL << 39)) { | ||
40 | + b += 1; | ||
41 | + } | ||
42 | + estimate = (b + 1) / 2; | ||
43 | + | ||
44 | + assert(4096 <= estimate && estimate < 8192); | ||
45 | + | ||
46 | + return estimate; | ||
43 | +} | 47 | +} |
44 | + | 48 | + |
45 | /** | 49 | +static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac, |
46 | * get_phys_addr_lpae: perform one stage of page table walk, LPAE format | 50 | + bool increasedprecision) |
47 | * | 51 | { |
48 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, | 52 | int estimate; |
49 | new_descriptor = descriptor; | 53 | uint32_t scaled; |
50 | 54 | @@ -XXX,XX +XXX,XX @@ static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac) | |
51 | restart_atomic_update: | 55 | frac = extract64(frac, 0, 51) << 1; |
52 | - if (!(descriptor & 1) || (!(descriptor & 2) && (level == 3))) { | ||
53 | - /* Invalid, or the Reserved level 3 encoding */ | ||
54 | + if (!(descriptor & 1) || | ||
55 | + (!(descriptor & 2) && | ||
56 | + !lpae_block_desc_valid(cpu, param.ds, param.gran, level))) { | ||
57 | + /* Invalid, or a block descriptor at an invalid level */ | ||
58 | goto do_translation_fault; | ||
59 | } | 56 | } |
60 | 57 | ||
58 | - if (*exp & 1) { | ||
59 | - /* scaled = UInt('01':fraction<51:45>) */ | ||
60 | - scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7)); | ||
61 | + if (increasedprecision) { | ||
62 | + if (*exp & 1) { | ||
63 | + /* scaled = UInt('01':fraction<51:42>) */ | ||
64 | + scaled = deposit32(1 << 10, 0, 10, extract64(frac, 42, 10)); | ||
65 | + } else { | ||
66 | + /* scaled = UInt('1':fraction<51:41>) */ | ||
67 | + scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11)); | ||
68 | + } | ||
69 | + estimate = do_recip_sqrt_estimate_incprec(scaled); | ||
70 | } else { | ||
71 | - /* scaled = UInt('1':fraction<51:44>) */ | ||
72 | - scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); | ||
73 | + if (*exp & 1) { | ||
74 | + /* scaled = UInt('01':fraction<51:45>) */ | ||
75 | + scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7)); | ||
76 | + } else { | ||
77 | + /* scaled = UInt('1':fraction<51:44>) */ | ||
78 | + scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); | ||
79 | + } | ||
80 | + estimate = do_recip_sqrt_estimate(scaled); | ||
81 | } | ||
82 | - estimate = do_recip_sqrt_estimate(scaled); | ||
83 | |||
84 | *exp = (exp_off - *exp) / 2; | ||
85 | - return extract64(estimate, 0, 8) << 44; | ||
86 | + if (increasedprecision) { | ||
87 | + return extract64(estimate, 0, 12) << 40; | ||
88 | + } else { | ||
89 | + return extract64(estimate, 0, 8) << 44; | ||
90 | + } | ||
91 | } | ||
92 | |||
93 | uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s) | ||
94 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s) | ||
95 | |||
96 | f64_frac = ((uint64_t) f16_frac) << (52 - 10); | ||
97 | |||
98 | - f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac); | ||
99 | + f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac, false); | ||
100 | |||
101 | /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */ | ||
102 | val = deposit32(0, 15, 1, f16_sign); | ||
103 | @@ -XXX,XX +XXX,XX @@ static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres) | ||
104 | |||
105 | f64_frac = ((uint64_t) f32_frac) << 29; | ||
106 | |||
107 | - f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac); | ||
108 | + f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac, rpres); | ||
109 | |||
110 | - /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */ | ||
111 | + /* | ||
112 | + * result = sign : result_exp<7:0> : estimate<7:0> : Zeros(15) | ||
113 | + * or for increased precision | ||
114 | + * result = sign : result_exp<7:0> : estimate<11:0> : Zeros(11) | ||
115 | + */ | ||
116 | val = deposit32(0, 31, 1, f32_sign); | ||
117 | val = deposit32(val, 23, 8, f32_exp); | ||
118 | - val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8)); | ||
119 | + if (rpres) { | ||
120 | + val = deposit32(val, 11, 12, extract64(f64_frac, 52 - 12, 12)); | ||
121 | + } else { | ||
122 | + val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8)); | ||
123 | + } | ||
124 | return make_float32(val); | ||
125 | } | ||
126 | |||
127 | @@ -XXX,XX +XXX,XX @@ float64 HELPER(rsqrte_f64)(float64 input, float_status *s) | ||
128 | return float64_zero; | ||
129 | } | ||
130 | |||
131 | - f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac); | ||
132 | + f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac, false); | ||
133 | |||
134 | /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */ | ||
135 | val = deposit64(0, 61, 1, f64_sign); | ||
61 | -- | 136 | -- |
62 | 2.34.1 | 137 | 2.34.1 | diff view generated by jsdifflib |
1 | In S1_ptw_translate() we set up the ARMMMUFaultInfo if the attempt to | 1 | Now the emulation is complete, we can enable FEAT_RPRES for the 'max' |
---|---|---|---|
2 | translate the page descriptor address into a physical address fails. | 2 | CPU type. |
3 | This used to only be possible if we are doing a stage 2 ptw for that | ||
4 | descriptor address, and so the code always sets fi->stage2 and | ||
5 | fi->s1ptw to true. However, with FEAT_RME it is also possible for | ||
6 | the lookup of the page descriptor address to fail because of a | ||
7 | Granule Protection Check fault. These should not be reported as | ||
8 | stage 2, otherwise arm_deliver_fault() will incorrectly set | ||
9 | HPFAR_EL2. Similarly the s1ptw bit should only be set for stage 2 | ||
10 | faults on stage 1 translation table walks, i.e. not for GPC faults. | ||
11 | |||
12 | Add a comment to the the other place where we might detect a | ||
13 | stage2-fault-on-stage-1-ptw, in arm_casq_ptw(), noting why we know in | ||
14 | that case that it must really be a stage 2 fault and not a GPC fault. | ||
15 | 3 | ||
16 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
17 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
18 | Message-id: 20230807141514.19075-3-peter.maydell@linaro.org | ||
19 | --- | 6 | --- |
20 | target/arm/ptw.c | 10 ++++++++-- | 7 | docs/system/arm/emulation.rst | 1 + |
21 | 1 file changed, 8 insertions(+), 2 deletions(-) | 8 | target/arm/tcg/cpu64.c | 1 + |
9 | 2 files changed, 2 insertions(+) | ||
22 | 10 | ||
23 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | 11 | diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst |
24 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
25 | --- a/target/arm/ptw.c | 13 | --- a/docs/system/arm/emulation.rst |
26 | +++ b/target/arm/ptw.c | 14 | +++ b/docs/system/arm/emulation.rst |
27 | @@ -XXX,XX +XXX,XX @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, | 15 | @@ -XXX,XX +XXX,XX @@ the following architecture extensions: |
28 | fi->type = ARMFault_GPCFOnWalk; | 16 | - FEAT_RDM (Advanced SIMD rounding double multiply accumulate instructions) |
29 | } | 17 | - FEAT_RME (Realm Management Extension) (NB: support status in QEMU is experimental) |
30 | fi->s2addr = addr; | 18 | - FEAT_RNG (Random number generator) |
31 | - fi->stage2 = true; | 19 | +- FEAT_RPRES (Increased precision of FRECPE and FRSQRTE) |
32 | - fi->s1ptw = true; | 20 | - FEAT_S2FWB (Stage 2 forced Write-Back) |
33 | + fi->stage2 = regime_is_stage2(s2_mmu_idx); | 21 | - FEAT_SB (Speculation Barrier) |
34 | + fi->s1ptw = fi->stage2; | 22 | - FEAT_SEL2 (Secure EL2) |
35 | fi->s1ns = !is_secure; | 23 | diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c |
36 | return false; | 24 | index XXXXXXX..XXXXXXX 100644 |
37 | } | 25 | --- a/target/arm/tcg/cpu64.c |
38 | @@ -XXX,XX +XXX,XX @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val, | 26 | +++ b/target/arm/tcg/cpu64.c |
39 | env->tlb_fi = NULL; | 27 | @@ -XXX,XX +XXX,XX @@ void aarch64_max_tcg_initfn(Object *obj) |
40 | 28 | cpu->isar.id_aa64isar1 = t; | |
41 | if (unlikely(flags & TLB_INVALID_MASK)) { | 29 | |
42 | + /* | 30 | t = cpu->isar.id_aa64isar2; |
43 | + * We know this must be a stage 2 fault because the granule | 31 | + t = FIELD_DP64(t, ID_AA64ISAR2, RPRES, 1); /* FEAT_RPRES */ |
44 | + * protection table does not separately track read and write | 32 | t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1); /* FEAT_MOPS */ |
45 | + * permission, so all GPC faults are caught in S1_ptw_translate(): | 33 | t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1); /* FEAT_HBC */ |
46 | + * we only get here for "readable but not writeable". | 34 | t = FIELD_DP64(t, ID_AA64ISAR2, WFXT, 2); /* FEAT_WFxT */ |
47 | + */ | ||
48 | assert(fi->type != ARMFault_None); | ||
49 | fi->s2addr = ptw->out_virt; | ||
50 | fi->stage2 = true; | ||
51 | -- | 35 | -- |
52 | 2.34.1 | 36 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | From: Richard Henderson <richard.henderson@linaro.org> | |
2 | |||
3 | Move ARMFPStatusFlavour to cpu.h with which to index | ||
4 | this array. For now, place the array in an anonymous | ||
5 | union with the existing structures. Adjust the order | ||
6 | of the existing structures to match the enum. | ||
7 | |||
8 | Simplify fpstatus_ptr() using the new array. | ||
9 | |||
10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
12 | Message-id: 20250129013857.135256-7-richard.henderson@linaro.org | ||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
14 | --- | ||
15 | target/arm/cpu.h | 119 +++++++++++++++++++++---------------- | ||
16 | target/arm/tcg/translate.h | 64 +------------------- | ||
17 | 2 files changed, 70 insertions(+), 113 deletions(-) | ||
18 | |||
19 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/target/arm/cpu.h | ||
22 | +++ b/target/arm/cpu.h | ||
23 | @@ -XXX,XX +XXX,XX @@ typedef struct ARMMMUFaultInfo ARMMMUFaultInfo; | ||
24 | |||
25 | typedef struct NVICState NVICState; | ||
26 | |||
27 | +/* | ||
28 | + * Enum for indexing vfp.fp_status[]. | ||
29 | + * | ||
30 | + * FPST_A32: is the "normal" fp status for AArch32 insns | ||
31 | + * FPST_A64: is the "normal" fp status for AArch64 insns | ||
32 | + * FPST_A32_F16: used for AArch32 half-precision calculations | ||
33 | + * FPST_A64_F16: used for AArch64 half-precision calculations | ||
34 | + * FPST_STD: the ARM "Standard FPSCR Value" | ||
35 | + * FPST_STD_F16: used for half-precision | ||
36 | + * calculations with the ARM "Standard FPSCR Value" | ||
37 | + * FPST_AH: used for the A64 insns which change behaviour | ||
38 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
39 | + * and the reciprocal and square root estimate/step insns) | ||
40 | + * FPST_AH_F16: used for the A64 insns which change behaviour | ||
41 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
42 | + * and the reciprocal and square root estimate/step insns); | ||
43 | + * for half-precision | ||
44 | + * | ||
45 | + * Half-precision operations are governed by a separate | ||
46 | + * flush-to-zero control bit in FPSCR:FZ16. We pass a separate | ||
47 | + * status structure to control this. | ||
48 | + * | ||
49 | + * The "Standard FPSCR", ie default-NaN, flush-to-zero, | ||
50 | + * round-to-nearest and is used by any operations (generally | ||
51 | + * Neon) which the architecture defines as controlled by the | ||
52 | + * standard FPSCR value rather than the FPSCR. | ||
53 | + * | ||
54 | + * The "standard FPSCR but for fp16 ops" is needed because | ||
55 | + * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | ||
56 | + * using a fixed value for it. | ||
57 | + * | ||
58 | + * The ah_fp_status is needed because some insns have different | ||
59 | + * behaviour when FPCR.AH == 1: they don't update cumulative | ||
60 | + * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
61 | + * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
62 | + * which means we need an ah_fp_status_f16 as well. | ||
63 | + * | ||
64 | + * To avoid having to transfer exception bits around, we simply | ||
65 | + * say that the FPSCR cumulative exception flags are the logical | ||
66 | + * OR of the flags in the four fp statuses. This relies on the | ||
67 | + * only thing which needs to read the exception flags being | ||
68 | + * an explicit FPSCR read. | ||
69 | + */ | ||
70 | +typedef enum ARMFPStatusFlavour { | ||
71 | + FPST_A32, | ||
72 | + FPST_A64, | ||
73 | + FPST_A32_F16, | ||
74 | + FPST_A64_F16, | ||
75 | + FPST_AH, | ||
76 | + FPST_AH_F16, | ||
77 | + FPST_STD, | ||
78 | + FPST_STD_F16, | ||
79 | +} ARMFPStatusFlavour; | ||
80 | +#define FPST_COUNT 8 | ||
81 | + | ||
82 | typedef struct CPUArchState { | ||
83 | /* Regs for current mode. */ | ||
84 | uint32_t regs[16]; | ||
85 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
86 | /* Scratch space for aa32 neon expansion. */ | ||
87 | uint32_t scratch[8]; | ||
88 | |||
89 | - /* There are a number of distinct float control structures: | ||
90 | - * | ||
91 | - * fp_status_a32: is the "normal" fp status for AArch32 insns | ||
92 | - * fp_status_a64: is the "normal" fp status for AArch64 insns | ||
93 | - * fp_status_fp16_a32: used for AArch32 half-precision calculations | ||
94 | - * fp_status_fp16_a64: used for AArch64 half-precision calculations | ||
95 | - * standard_fp_status : the ARM "Standard FPSCR Value" | ||
96 | - * standard_fp_status_fp16 : used for half-precision | ||
97 | - * calculations with the ARM "Standard FPSCR Value" | ||
98 | - * ah_fp_status: used for the A64 insns which change behaviour | ||
99 | - * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
100 | - * and the reciprocal and square root estimate/step insns) | ||
101 | - * ah_fp_status_f16: used for the A64 insns which change behaviour | ||
102 | - * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
103 | - * and the reciprocal and square root estimate/step insns); | ||
104 | - * for half-precision | ||
105 | - * | ||
106 | - * Half-precision operations are governed by a separate | ||
107 | - * flush-to-zero control bit in FPSCR:FZ16. We pass a separate | ||
108 | - * status structure to control this. | ||
109 | - * | ||
110 | - * The "Standard FPSCR", ie default-NaN, flush-to-zero, | ||
111 | - * round-to-nearest and is used by any operations (generally | ||
112 | - * Neon) which the architecture defines as controlled by the | ||
113 | - * standard FPSCR value rather than the FPSCR. | ||
114 | - * | ||
115 | - * The "standard FPSCR but for fp16 ops" is needed because | ||
116 | - * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | ||
117 | - * using a fixed value for it. | ||
118 | - * | ||
119 | - * The ah_fp_status is needed because some insns have different | ||
120 | - * behaviour when FPCR.AH == 1: they don't update cumulative | ||
121 | - * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
122 | - * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
123 | - * which means we need an ah_fp_status_f16 as well. | ||
124 | - * | ||
125 | - * To avoid having to transfer exception bits around, we simply | ||
126 | - * say that the FPSCR cumulative exception flags are the logical | ||
127 | - * OR of the flags in the four fp statuses. This relies on the | ||
128 | - * only thing which needs to read the exception flags being | ||
129 | - * an explicit FPSCR read. | ||
130 | - */ | ||
131 | - float_status fp_status_a32; | ||
132 | - float_status fp_status_a64; | ||
133 | - float_status fp_status_f16_a32; | ||
134 | - float_status fp_status_f16_a64; | ||
135 | - float_status standard_fp_status; | ||
136 | - float_status standard_fp_status_f16; | ||
137 | - float_status ah_fp_status; | ||
138 | - float_status ah_fp_status_f16; | ||
139 | + /* There are a number of distinct float control structures. */ | ||
140 | + union { | ||
141 | + float_status fp_status[FPST_COUNT]; | ||
142 | + struct { | ||
143 | + float_status fp_status_a32; | ||
144 | + float_status fp_status_a64; | ||
145 | + float_status fp_status_f16_a32; | ||
146 | + float_status fp_status_f16_a64; | ||
147 | + float_status ah_fp_status; | ||
148 | + float_status ah_fp_status_f16; | ||
149 | + float_status standard_fp_status; | ||
150 | + float_status standard_fp_status_f16; | ||
151 | + }; | ||
152 | + }; | ||
153 | |||
154 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ | ||
155 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ | ||
156 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | ||
157 | index XXXXXXX..XXXXXXX 100644 | ||
158 | --- a/target/arm/tcg/translate.h | ||
159 | +++ b/target/arm/tcg/translate.h | ||
160 | @@ -XXX,XX +XXX,XX @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb) | ||
161 | return (CPUARMTBFlags){ tb->flags, tb->cs_base }; | ||
162 | } | ||
163 | |||
164 | -/* | ||
165 | - * Enum for argument to fpstatus_ptr(). | ||
166 | - */ | ||
167 | -typedef enum ARMFPStatusFlavour { | ||
168 | - FPST_A32, | ||
169 | - FPST_A64, | ||
170 | - FPST_A32_F16, | ||
171 | - FPST_A64_F16, | ||
172 | - FPST_AH, | ||
173 | - FPST_AH_F16, | ||
174 | - FPST_STD, | ||
175 | - FPST_STD_F16, | ||
176 | -} ARMFPStatusFlavour; | ||
177 | - | ||
178 | /** | ||
179 | * fpstatus_ptr: return TCGv_ptr to the specified fp_status field | ||
180 | * | ||
181 | * We have multiple softfloat float_status fields in the Arm CPU state struct | ||
182 | * (see the comment in cpu.h for details). Return a TCGv_ptr which has | ||
183 | * been set up to point to the requested field in the CPU state struct. | ||
184 | - * The options are: | ||
185 | - * | ||
186 | - * FPST_A32 | ||
187 | - * for AArch32 non-FP16 operations controlled by the FPCR | ||
188 | - * FPST_A64 | ||
189 | - * for AArch64 non-FP16 operations controlled by the FPCR | ||
190 | - * FPST_A32_F16 | ||
191 | - * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
192 | - * FPST_A64_F16 | ||
193 | - * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
194 | - * FPST_AH: | ||
195 | - * for AArch64 operations which change behaviour when AH=1 (specifically, | ||
196 | - * bfloat16 conversions and multiplies, and the reciprocal and square root | ||
197 | - * estimate/step insns) | ||
198 | - * FPST_AH_F16: | ||
199 | - * ditto, but for half-precision operations | ||
200 | - * FPST_STD | ||
201 | - * for A32/T32 Neon operations using the "standard FPSCR value" | ||
202 | - * FPST_STD_F16 | ||
203 | - * as FPST_STD, but where FPCR.FZ16 is to be used | ||
204 | */ | ||
205 | static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) | ||
206 | { | ||
207 | TCGv_ptr statusptr = tcg_temp_new_ptr(); | ||
208 | - int offset; | ||
209 | + int offset = offsetof(CPUARMState, vfp.fp_status[flavour]); | ||
210 | |||
211 | - switch (flavour) { | ||
212 | - case FPST_A32: | ||
213 | - offset = offsetof(CPUARMState, vfp.fp_status_a32); | ||
214 | - break; | ||
215 | - case FPST_A64: | ||
216 | - offset = offsetof(CPUARMState, vfp.fp_status_a64); | ||
217 | - break; | ||
218 | - case FPST_A32_F16: | ||
219 | - offset = offsetof(CPUARMState, vfp.fp_status_f16_a32); | ||
220 | - break; | ||
221 | - case FPST_A64_F16: | ||
222 | - offset = offsetof(CPUARMState, vfp.fp_status_f16_a64); | ||
223 | - break; | ||
224 | - case FPST_AH: | ||
225 | - offset = offsetof(CPUARMState, vfp.ah_fp_status); | ||
226 | - break; | ||
227 | - case FPST_AH_F16: | ||
228 | - offset = offsetof(CPUARMState, vfp.ah_fp_status_f16); | ||
229 | - break; | ||
230 | - case FPST_STD: | ||
231 | - offset = offsetof(CPUARMState, vfp.standard_fp_status); | ||
232 | - break; | ||
233 | - case FPST_STD_F16: | ||
234 | - offset = offsetof(CPUARMState, vfp.standard_fp_status_f16); | ||
235 | - break; | ||
236 | - default: | ||
237 | - g_assert_not_reached(); | ||
238 | - } | ||
239 | tcg_gen_addi_ptr(statusptr, tcg_env, offset); | ||
240 | return statusptr; | ||
241 | } | ||
242 | -- | ||
243 | 2.34.1 | ||
244 | |||
245 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
1 | 2 | ||
3 | Replace with fp_status[FPST_STD_F16]. | ||
4 | |||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Message-id: 20250129013857.135256-8-richard.henderson@linaro.org | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | --- | ||
10 | target/arm/cpu.h | 1 - | ||
11 | target/arm/cpu.c | 4 ++-- | ||
12 | target/arm/tcg/mve_helper.c | 24 ++++++++++++------------ | ||
13 | target/arm/vfp_helper.c | 8 ++++---- | ||
14 | 4 files changed, 18 insertions(+), 19 deletions(-) | ||
15 | |||
16 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/arm/cpu.h | ||
19 | +++ b/target/arm/cpu.h | ||
20 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
21 | float_status ah_fp_status; | ||
22 | float_status ah_fp_status_f16; | ||
23 | float_status standard_fp_status; | ||
24 | - float_status standard_fp_status_f16; | ||
25 | }; | ||
26 | }; | ||
27 | |||
28 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/arm/cpu.c | ||
31 | +++ b/target/arm/cpu.c | ||
32 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
33 | set_flush_to_zero(1, &env->vfp.standard_fp_status); | ||
34 | set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); | ||
35 | set_default_nan_mode(1, &env->vfp.standard_fp_status); | ||
36 | - set_default_nan_mode(1, &env->vfp.standard_fp_status_f16); | ||
37 | + set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); | ||
38 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | ||
39 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
40 | arm_set_default_fp_behaviours(&env->vfp.standard_fp_status); | ||
41 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
42 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
43 | - arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16); | ||
44 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
45 | arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | ||
46 | set_flush_to_zero(1, &env->vfp.ah_fp_status); | ||
47 | set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | ||
48 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/arm/tcg/mve_helper.c | ||
51 | +++ b/target/arm/tcg/mve_helper.c | ||
52 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) | ||
53 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
54 | continue; \ | ||
55 | } \ | ||
56 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
57 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
58 | &env->vfp.standard_fp_status; \ | ||
59 | if (!(mask & 1)) { \ | ||
60 | /* We need the result but without updating flags */ \ | ||
61 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) | ||
62 | r[e] = 0; \ | ||
63 | continue; \ | ||
64 | } \ | ||
65 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
66 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
67 | &env->vfp.standard_fp_status; \ | ||
68 | if (!(tm & 1)) { \ | ||
69 | /* We need the result but without updating flags */ \ | ||
70 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) | ||
71 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
72 | continue; \ | ||
73 | } \ | ||
74 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
75 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
76 | &env->vfp.standard_fp_status; \ | ||
77 | if (!(mask & 1)) { \ | ||
78 | /* We need the result but without updating flags */ \ | ||
79 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) | ||
80 | if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \ | ||
81 | continue; \ | ||
82 | } \ | ||
83 | - fpst0 = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
84 | + fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
85 | &env->vfp.standard_fp_status; \ | ||
86 | fpst1 = fpst0; \ | ||
87 | if (!(mask & 1)) { \ | ||
88 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) | ||
89 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
90 | continue; \ | ||
91 | } \ | ||
92 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
93 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
94 | &env->vfp.standard_fp_status; \ | ||
95 | if (!(mask & 1)) { \ | ||
96 | /* We need the result but without updating flags */ \ | ||
97 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) | ||
98 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
99 | continue; \ | ||
100 | } \ | ||
101 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
102 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
103 | &env->vfp.standard_fp_status; \ | ||
104 | if (!(mask & 1)) { \ | ||
105 | /* We need the result but without updating flags */ \ | ||
106 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) | ||
107 | TYPE *m = vm; \ | ||
108 | TYPE ra = (TYPE)ra_in; \ | ||
109 | float_status *fpst = (ESIZE == 2) ? \ | ||
110 | - &env->vfp.standard_fp_status_f16 : \ | ||
111 | + &env->vfp.fp_status[FPST_STD_F16] : \ | ||
112 | &env->vfp.standard_fp_status; \ | ||
113 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
114 | if (mask & 1) { \ | ||
115 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
116 | if ((mask & emask) == 0) { \ | ||
117 | continue; \ | ||
118 | } \ | ||
119 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
120 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
121 | &env->vfp.standard_fp_status; \ | ||
122 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
123 | /* We need the result but without updating flags */ \ | ||
124 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
125 | if ((mask & emask) == 0) { \ | ||
126 | continue; \ | ||
127 | } \ | ||
128 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
129 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
130 | &env->vfp.standard_fp_status; \ | ||
131 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
132 | /* We need the result but without updating flags */ \ | ||
133 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) | ||
134 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
135 | continue; \ | ||
136 | } \ | ||
137 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
138 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
139 | &env->vfp.standard_fp_status; \ | ||
140 | if (!(mask & 1)) { \ | ||
141 | /* We need the result but without updating flags */ \ | ||
142 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) | ||
143 | float_status *fpst; \ | ||
144 | float_status scratch_fpst; \ | ||
145 | float_status *base_fpst = (ESIZE == 2) ? \ | ||
146 | - &env->vfp.standard_fp_status_f16 : \ | ||
147 | + &env->vfp.fp_status[FPST_STD_F16] : \ | ||
148 | &env->vfp.standard_fp_status; \ | ||
149 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ | ||
150 | set_float_rounding_mode(rmode, base_fpst); \ | ||
151 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) | ||
152 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
153 | continue; \ | ||
154 | } \ | ||
155 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
156 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
157 | &env->vfp.standard_fp_status; \ | ||
158 | if (!(mask & 1)) { \ | ||
159 | /* We need the result but without updating flags */ \ | ||
160 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
161 | index XXXXXXX..XXXXXXX 100644 | ||
162 | --- a/target/arm/vfp_helper.c | ||
163 | +++ b/target/arm/vfp_helper.c | ||
164 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
165 | /* FZ16 does not generate an input denormal exception. */ | ||
166 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
167 | & ~float_flag_input_denormal_flushed); | ||
168 | - a32_flags |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) | ||
169 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) | ||
170 | & ~float_flag_input_denormal_flushed); | ||
171 | |||
172 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
173 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
174 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | ||
175 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
176 | set_float_exception_flags(0, &env->vfp.standard_fp_status); | ||
177 | - set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); | ||
178 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
179 | set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
180 | set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
181 | } | ||
182 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
183 | bool ftz_enabled = val & FPCR_FZ16; | ||
184 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
185 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
186 | - set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | ||
187 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
188 | set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
189 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
190 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
191 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | ||
192 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
193 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
194 | } | ||
195 | if (changed & FPCR_FZ) { | ||
196 | -- | ||
197 | 2.34.1 | ||
198 | |||
199 | diff view generated by jsdifflib |
1 | From: Jean-Philippe Brucker <jean-philippe@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | When FEAT_RME is implemented, these bits override the value of | 3 | Replace with fp_status[FPST_STD]. |
4 | CNT[VP]_CTL_EL0.IMASK in Realm and Root state. Move the IRQ state update | 4 | |
5 | into a new gt_update_irq() function and test those bits every time we | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | recompute the IRQ state. | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
7 | 7 | Message-id: 20250129013857.135256-9-richard.henderson@linaro.org | |
8 | Since we're removing the IRQ state from some trace events, add a new | ||
9 | trace event for gt_update_irq(). | ||
10 | |||
11 | Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | ||
12 | Message-id: 20230809123706.1842548-7-jean-philippe@linaro.org | ||
13 | [PMM: only register change hook if not USER_ONLY and if TCG] | ||
14 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
15 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
16 | --- | 9 | --- |
17 | target/arm/cpu.h | 4 +++ | 10 | target/arm/cpu.h | 1 - |
18 | target/arm/cpu.c | 6 ++++ | 11 | target/arm/cpu.c | 8 ++++---- |
19 | target/arm/helper.c | 65 ++++++++++++++++++++++++++++++++++------- | 12 | target/arm/tcg/mve_helper.c | 28 ++++++++++++++-------------- |
20 | target/arm/trace-events | 7 +++-- | 13 | target/arm/tcg/vec_helper.c | 4 ++-- |
21 | 4 files changed, 68 insertions(+), 14 deletions(-) | 14 | target/arm/vfp_helper.c | 4 ++-- |
15 | 5 files changed, 22 insertions(+), 23 deletions(-) | ||
22 | 16 | ||
23 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 17 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
24 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
25 | --- a/target/arm/cpu.h | 19 | --- a/target/arm/cpu.h |
26 | +++ b/target/arm/cpu.h | 20 | +++ b/target/arm/cpu.h |
27 | @@ -XXX,XX +XXX,XX @@ struct ArchCPU { | 21 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
28 | }; | 22 | float_status fp_status_f16_a64; |
29 | 23 | float_status ah_fp_status; | |
30 | unsigned int gt_cntfrq_period_ns(ARMCPU *cpu); | 24 | float_status ah_fp_status_f16; |
31 | +void gt_rme_post_el_change(ARMCPU *cpu, void *opaque); | 25 | - float_status standard_fp_status; |
32 | 26 | }; | |
33 | void arm_cpu_post_init(Object *obj); | 27 | }; |
34 | 28 | ||
35 | @@ -XXX,XX +XXX,XX @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask) | ||
36 | #define HSTR_TTEE (1 << 16) | ||
37 | #define HSTR_TJDBX (1 << 17) | ||
38 | |||
39 | +#define CNTHCTL_CNTVMASK (1 << 18) | ||
40 | +#define CNTHCTL_CNTPMASK (1 << 19) | ||
41 | + | ||
42 | /* Return the current FPSCR value. */ | ||
43 | uint32_t vfp_get_fpscr(CPUARMState *env); | ||
44 | void vfp_set_fpscr(CPUARMState *env, uint32_t val); | ||
45 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | 29 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
46 | index XXXXXXX..XXXXXXX 100644 | 30 | index XXXXXXX..XXXXXXX 100644 |
47 | --- a/target/arm/cpu.c | 31 | --- a/target/arm/cpu.c |
48 | +++ b/target/arm/cpu.c | 32 | +++ b/target/arm/cpu.c |
49 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) | 33 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
50 | set_feature(env, ARM_FEATURE_VBAR); | 34 | env->sau.ctrl = 0; |
51 | } | 35 | } |
52 | 36 | ||
53 | +#ifndef CONFIG_USER_ONLY | 37 | - set_flush_to_zero(1, &env->vfp.standard_fp_status); |
54 | + if (tcg_enabled() && cpu_isar_feature(aa64_rme, cpu)) { | 38 | - set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); |
55 | + arm_register_el_change_hook(cpu, >_rme_post_el_change, 0); | 39 | - set_default_nan_mode(1, &env->vfp.standard_fp_status); |
56 | + } | 40 | + set_flush_to_zero(1, &env->vfp.fp_status[FPST_STD]); |
57 | +#endif | 41 | + set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]); |
58 | + | 42 | + set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); |
59 | register_cp_regs_for_features(cpu); | 43 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); |
60 | arm_cpu_register_gdb_regs_for_features(cpu); | 44 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); |
61 | 45 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | |
62 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 46 | - arm_set_default_fp_behaviours(&env->vfp.standard_fp_status); |
63 | index XXXXXXX..XXXXXXX 100644 | 47 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); |
64 | --- a/target/arm/helper.c | 48 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); |
65 | +++ b/target/arm/helper.c | 49 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); |
66 | @@ -XXX,XX +XXX,XX @@ static uint64_t gt_get_countervalue(CPUARMState *env) | 50 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); |
67 | return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / gt_cntfrq_period_ns(cpu); | 51 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c |
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/target/arm/tcg/mve_helper.c | ||
54 | +++ b/target/arm/tcg/mve_helper.c | ||
55 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) | ||
56 | continue; \ | ||
57 | } \ | ||
58 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
59 | - &env->vfp.standard_fp_status; \ | ||
60 | + &env->vfp.fp_status[FPST_STD]; \ | ||
61 | if (!(mask & 1)) { \ | ||
62 | /* We need the result but without updating flags */ \ | ||
63 | scratch_fpst = *fpst; \ | ||
64 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) | ||
65 | continue; \ | ||
66 | } \ | ||
67 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
68 | - &env->vfp.standard_fp_status; \ | ||
69 | + &env->vfp.fp_status[FPST_STD]; \ | ||
70 | if (!(tm & 1)) { \ | ||
71 | /* We need the result but without updating flags */ \ | ||
72 | scratch_fpst = *fpst; \ | ||
73 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) | ||
74 | continue; \ | ||
75 | } \ | ||
76 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
77 | - &env->vfp.standard_fp_status; \ | ||
78 | + &env->vfp.fp_status[FPST_STD]; \ | ||
79 | if (!(mask & 1)) { \ | ||
80 | /* We need the result but without updating flags */ \ | ||
81 | scratch_fpst = *fpst; \ | ||
82 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) | ||
83 | continue; \ | ||
84 | } \ | ||
85 | fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
86 | - &env->vfp.standard_fp_status; \ | ||
87 | + &env->vfp.fp_status[FPST_STD]; \ | ||
88 | fpst1 = fpst0; \ | ||
89 | if (!(mask & 1)) { \ | ||
90 | scratch_fpst = *fpst0; \ | ||
91 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) | ||
92 | continue; \ | ||
93 | } \ | ||
94 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
95 | - &env->vfp.standard_fp_status; \ | ||
96 | + &env->vfp.fp_status[FPST_STD]; \ | ||
97 | if (!(mask & 1)) { \ | ||
98 | /* We need the result but without updating flags */ \ | ||
99 | scratch_fpst = *fpst; \ | ||
100 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) | ||
101 | continue; \ | ||
102 | } \ | ||
103 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
104 | - &env->vfp.standard_fp_status; \ | ||
105 | + &env->vfp.fp_status[FPST_STD]; \ | ||
106 | if (!(mask & 1)) { \ | ||
107 | /* We need the result but without updating flags */ \ | ||
108 | scratch_fpst = *fpst; \ | ||
109 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) | ||
110 | TYPE ra = (TYPE)ra_in; \ | ||
111 | float_status *fpst = (ESIZE == 2) ? \ | ||
112 | &env->vfp.fp_status[FPST_STD_F16] : \ | ||
113 | - &env->vfp.standard_fp_status; \ | ||
114 | + &env->vfp.fp_status[FPST_STD]; \ | ||
115 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
116 | if (mask & 1) { \ | ||
117 | TYPE v = m[H##ESIZE(e)]; \ | ||
118 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
119 | continue; \ | ||
120 | } \ | ||
121 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
122 | - &env->vfp.standard_fp_status; \ | ||
123 | + &env->vfp.fp_status[FPST_STD]; \ | ||
124 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
125 | /* We need the result but without updating flags */ \ | ||
126 | scratch_fpst = *fpst; \ | ||
127 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
128 | continue; \ | ||
129 | } \ | ||
130 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
131 | - &env->vfp.standard_fp_status; \ | ||
132 | + &env->vfp.fp_status[FPST_STD]; \ | ||
133 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
134 | /* We need the result but without updating flags */ \ | ||
135 | scratch_fpst = *fpst; \ | ||
136 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) | ||
137 | continue; \ | ||
138 | } \ | ||
139 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
140 | - &env->vfp.standard_fp_status; \ | ||
141 | + &env->vfp.fp_status[FPST_STD]; \ | ||
142 | if (!(mask & 1)) { \ | ||
143 | /* We need the result but without updating flags */ \ | ||
144 | scratch_fpst = *fpst; \ | ||
145 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) | ||
146 | float_status scratch_fpst; \ | ||
147 | float_status *base_fpst = (ESIZE == 2) ? \ | ||
148 | &env->vfp.fp_status[FPST_STD_F16] : \ | ||
149 | - &env->vfp.standard_fp_status; \ | ||
150 | + &env->vfp.fp_status[FPST_STD]; \ | ||
151 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ | ||
152 | set_float_rounding_mode(rmode, base_fpst); \ | ||
153 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
154 | @@ -XXX,XX +XXX,XX @@ static void do_vcvt_sh(CPUARMState *env, void *vd, void *vm, int top) | ||
155 | unsigned e; | ||
156 | float_status *fpst; | ||
157 | float_status scratch_fpst; | ||
158 | - float_status *base_fpst = &env->vfp.standard_fp_status; | ||
159 | + float_status *base_fpst = &env->vfp.fp_status[FPST_STD]; | ||
160 | bool old_fz = get_flush_to_zero(base_fpst); | ||
161 | set_flush_to_zero(false, base_fpst); | ||
162 | for (e = 0; e < 16 / 4; e++, mask >>= 4) { | ||
163 | @@ -XXX,XX +XXX,XX @@ static void do_vcvt_hs(CPUARMState *env, void *vd, void *vm, int top) | ||
164 | unsigned e; | ||
165 | float_status *fpst; | ||
166 | float_status scratch_fpst; | ||
167 | - float_status *base_fpst = &env->vfp.standard_fp_status; | ||
168 | + float_status *base_fpst = &env->vfp.fp_status[FPST_STD]; | ||
169 | bool old_fiz = get_flush_inputs_to_zero(base_fpst); | ||
170 | set_flush_inputs_to_zero(false, base_fpst); | ||
171 | for (e = 0; e < 16 / 4; e++, mask >>= 4) { | ||
172 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) | ||
173 | continue; \ | ||
174 | } \ | ||
175 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
176 | - &env->vfp.standard_fp_status; \ | ||
177 | + &env->vfp.fp_status[FPST_STD]; \ | ||
178 | if (!(mask & 1)) { \ | ||
179 | /* We need the result but without updating flags */ \ | ||
180 | scratch_fpst = *fpst; \ | ||
181 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
182 | index XXXXXXX..XXXXXXX 100644 | ||
183 | --- a/target/arm/tcg/vec_helper.c | ||
184 | +++ b/target/arm/tcg/vec_helper.c | ||
185 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
186 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
187 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
188 | |||
189 | - do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, | ||
190 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
191 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
68 | } | 192 | } |
69 | 193 | ||
70 | +static void gt_update_irq(ARMCPU *cpu, int timeridx) | 194 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, |
71 | +{ | 195 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
72 | + CPUARMState *env = &cpu->env; | 196 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; |
73 | + uint64_t cnthctl = env->cp15.cnthctl_el2; | 197 | |
74 | + ARMSecuritySpace ss = arm_security_space(env); | 198 | - do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, |
75 | + /* ISTATUS && !IMASK */ | 199 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, |
76 | + int irqstate = (env->cp15.c14_timer[timeridx].ctl & 6) == 4; | 200 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); |
77 | + | ||
78 | + /* | ||
79 | + * If bit CNTHCTL_EL2.CNT[VP]MASK is set, it overrides IMASK. | ||
80 | + * It is RES0 in Secure and NonSecure state. | ||
81 | + */ | ||
82 | + if ((ss == ARMSS_Root || ss == ARMSS_Realm) && | ||
83 | + ((timeridx == GTIMER_VIRT && (cnthctl & CNTHCTL_CNTVMASK)) || | ||
84 | + (timeridx == GTIMER_PHYS && (cnthctl & CNTHCTL_CNTPMASK)))) { | ||
85 | + irqstate = 0; | ||
86 | + } | ||
87 | + | ||
88 | + qemu_set_irq(cpu->gt_timer_outputs[timeridx], irqstate); | ||
89 | + trace_arm_gt_update_irq(timeridx, irqstate); | ||
90 | +} | ||
91 | + | ||
92 | +void gt_rme_post_el_change(ARMCPU *cpu, void *ignored) | ||
93 | +{ | ||
94 | + /* | ||
95 | + * Changing security state between Root and Secure/NonSecure, which may | ||
96 | + * happen when switching EL, can change the effective value of CNTHCTL_EL2 | ||
97 | + * mask bits. Update the IRQ state accordingly. | ||
98 | + */ | ||
99 | + gt_update_irq(cpu, GTIMER_VIRT); | ||
100 | + gt_update_irq(cpu, GTIMER_PHYS); | ||
101 | +} | ||
102 | + | ||
103 | static void gt_recalc_timer(ARMCPU *cpu, int timeridx) | ||
104 | { | ||
105 | ARMGenericTimer *gt = &cpu->env.cp15.c14_timer[timeridx]; | ||
106 | @@ -XXX,XX +XXX,XX @@ static void gt_recalc_timer(ARMCPU *cpu, int timeridx) | ||
107 | /* Note that this must be unsigned 64 bit arithmetic: */ | ||
108 | int istatus = count - offset >= gt->cval; | ||
109 | uint64_t nexttick; | ||
110 | - int irqstate; | ||
111 | |||
112 | gt->ctl = deposit32(gt->ctl, 2, 1, istatus); | ||
113 | |||
114 | - irqstate = (istatus && !(gt->ctl & 2)); | ||
115 | - qemu_set_irq(cpu->gt_timer_outputs[timeridx], irqstate); | ||
116 | - | ||
117 | if (istatus) { | ||
118 | /* Next transition is when count rolls back over to zero */ | ||
119 | nexttick = UINT64_MAX; | ||
120 | @@ -XXX,XX +XXX,XX @@ static void gt_recalc_timer(ARMCPU *cpu, int timeridx) | ||
121 | } else { | ||
122 | timer_mod(cpu->gt_timer[timeridx], nexttick); | ||
123 | } | ||
124 | - trace_arm_gt_recalc(timeridx, irqstate, nexttick); | ||
125 | + trace_arm_gt_recalc(timeridx, nexttick); | ||
126 | } else { | ||
127 | /* Timer disabled: ISTATUS and timer output always clear */ | ||
128 | gt->ctl &= ~4; | ||
129 | - qemu_set_irq(cpu->gt_timer_outputs[timeridx], 0); | ||
130 | timer_del(cpu->gt_timer[timeridx]); | ||
131 | trace_arm_gt_recalc_disabled(timeridx); | ||
132 | } | ||
133 | + gt_update_irq(cpu, timeridx); | ||
134 | } | 201 | } |
135 | 202 | ||
136 | static void gt_timer_reset(CPUARMState *env, const ARMCPRegInfo *ri, | 203 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
137 | @@ -XXX,XX +XXX,XX @@ static void gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri, | 204 | index XXXXXXX..XXXXXXX 100644 |
138 | * IMASK toggled: don't need to recalculate, | 205 | --- a/target/arm/vfp_helper.c |
139 | * just set the interrupt line based on ISTATUS | 206 | +++ b/target/arm/vfp_helper.c |
140 | */ | 207 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) |
141 | - int irqstate = (oldval & 4) && !(value & 2); | 208 | uint32_t a32_flags = 0, a64_flags = 0; |
142 | - | 209 | |
143 | - trace_arm_gt_imask_toggle(timeridx, irqstate); | 210 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); |
144 | - qemu_set_irq(cpu->gt_timer_outputs[timeridx], irqstate); | 211 | - a32_flags |= get_float_exception_flags(&env->vfp.standard_fp_status); |
145 | + trace_arm_gt_imask_toggle(timeridx); | 212 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); |
146 | + gt_update_irq(cpu, timeridx); | 213 | /* FZ16 does not generate an input denormal exception. */ |
147 | } | 214 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) |
148 | } | 215 | & ~float_flag_input_denormal_flushed); |
149 | 216 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | |
150 | @@ -XXX,XX +XXX,XX @@ static void gt_virt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri, | 217 | set_float_exception_flags(0, &env->vfp.fp_status_a64); |
151 | gt_ctl_write(env, ri, GTIMER_VIRT, value); | 218 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); |
152 | } | 219 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); |
153 | 220 | - set_float_exception_flags(0, &env->vfp.standard_fp_status); | |
154 | +static void gt_cnthctl_write(CPUARMState *env, const ARMCPRegInfo *ri, | 221 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); |
155 | + uint64_t value) | 222 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); |
156 | +{ | 223 | set_float_exception_flags(0, &env->vfp.ah_fp_status); |
157 | + ARMCPU *cpu = env_archcpu(env); | 224 | set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); |
158 | + uint32_t oldval = env->cp15.cnthctl_el2; | ||
159 | + | ||
160 | + raw_write(env, ri, value); | ||
161 | + | ||
162 | + if ((oldval ^ value) & CNTHCTL_CNTVMASK) { | ||
163 | + gt_update_irq(cpu, GTIMER_VIRT); | ||
164 | + } else if ((oldval ^ value) & CNTHCTL_CNTPMASK) { | ||
165 | + gt_update_irq(cpu, GTIMER_PHYS); | ||
166 | + } | ||
167 | +} | ||
168 | + | ||
169 | static void gt_cntvoff_write(CPUARMState *env, const ARMCPRegInfo *ri, | ||
170 | uint64_t value) | ||
171 | { | ||
172 | @@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo el2_cp_reginfo[] = { | ||
173 | * reset values as IMPDEF. We choose to reset to 3 to comply with | ||
174 | * both ARMv7 and ARMv8. | ||
175 | */ | ||
176 | - .access = PL2_RW, .resetvalue = 3, | ||
177 | + .access = PL2_RW, .type = ARM_CP_IO, .resetvalue = 3, | ||
178 | + .writefn = gt_cnthctl_write, .raw_writefn = raw_write, | ||
179 | .fieldoffset = offsetof(CPUARMState, cp15.cnthctl_el2) }, | ||
180 | { .name = "CNTVOFF_EL2", .state = ARM_CP_STATE_AA64, | ||
181 | .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 0, .opc2 = 3, | ||
182 | diff --git a/target/arm/trace-events b/target/arm/trace-events | ||
183 | index XXXXXXX..XXXXXXX 100644 | ||
184 | --- a/target/arm/trace-events | ||
185 | +++ b/target/arm/trace-events | ||
186 | @@ -XXX,XX +XXX,XX @@ | ||
187 | # See docs/devel/tracing.rst for syntax documentation. | ||
188 | |||
189 | # helper.c | ||
190 | -arm_gt_recalc(int timer, int irqstate, uint64_t nexttick) "gt recalc: timer %d irqstate %d next tick 0x%" PRIx64 | ||
191 | -arm_gt_recalc_disabled(int timer) "gt recalc: timer %d irqstate 0 timer disabled" | ||
192 | +arm_gt_recalc(int timer, uint64_t nexttick) "gt recalc: timer %d next tick 0x%" PRIx64 | ||
193 | +arm_gt_recalc_disabled(int timer) "gt recalc: timer %d timer disabled" | ||
194 | arm_gt_cval_write(int timer, uint64_t value) "gt_cval_write: timer %d value 0x%" PRIx64 | ||
195 | arm_gt_tval_write(int timer, uint64_t value) "gt_tval_write: timer %d value 0x%" PRIx64 | ||
196 | arm_gt_ctl_write(int timer, uint64_t value) "gt_ctl_write: timer %d value 0x%" PRIx64 | ||
197 | -arm_gt_imask_toggle(int timer, int irqstate) "gt_ctl_write: timer %d IMASK toggle, new irqstate %d" | ||
198 | +arm_gt_imask_toggle(int timer) "gt_ctl_write: timer %d IMASK toggle" | ||
199 | arm_gt_cntvoff_write(uint64_t value) "gt_cntvoff_write: value 0x%" PRIx64 | ||
200 | +arm_gt_update_irq(int timer, int irqstate) "gt_update_irq: timer %d irqstate %d" | ||
201 | |||
202 | # kvm.c | ||
203 | kvm_arm_fixup_msi_route(uint64_t iova, uint64_t gpa) "MSI iova = 0x%"PRIx64" is translated into 0x%"PRIx64 | ||
204 | -- | 225 | -- |
205 | 2.34.1 | 226 | 2.34.1 |
227 | |||
228 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
1 | 2 | ||
3 | Replace with fp_status[FPST_AH_F16]. | ||
4 | |||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Message-id: 20250129013857.135256-10-richard.henderson@linaro.org | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | --- | ||
10 | target/arm/cpu.h | 3 +-- | ||
11 | target/arm/cpu.c | 2 +- | ||
12 | target/arm/vfp_helper.c | 10 +++++----- | ||
13 | 3 files changed, 7 insertions(+), 8 deletions(-) | ||
14 | |||
15 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/arm/cpu.h | ||
18 | +++ b/target/arm/cpu.h | ||
19 | @@ -XXX,XX +XXX,XX @@ typedef struct NVICState NVICState; | ||
20 | * behaviour when FPCR.AH == 1: they don't update cumulative | ||
21 | * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
22 | * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
23 | - * which means we need an ah_fp_status_f16 as well. | ||
24 | + * which means we need an FPST_AH_F16 as well. | ||
25 | * | ||
26 | * To avoid having to transfer exception bits around, we simply | ||
27 | * say that the FPSCR cumulative exception flags are the logical | ||
28 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
29 | float_status fp_status_f16_a32; | ||
30 | float_status fp_status_f16_a64; | ||
31 | float_status ah_fp_status; | ||
32 | - float_status ah_fp_status_f16; | ||
33 | }; | ||
34 | }; | ||
35 | |||
36 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/arm/cpu.c | ||
39 | +++ b/target/arm/cpu.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
41 | arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | ||
42 | set_flush_to_zero(1, &env->vfp.ah_fp_status); | ||
43 | set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | ||
44 | - arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status_f16); | ||
45 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]); | ||
46 | |||
47 | #ifndef CONFIG_USER_ONLY | ||
48 | if (kvm_enabled()) { | ||
49 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/target/arm/vfp_helper.c | ||
52 | +++ b/target/arm/vfp_helper.c | ||
53 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
54 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
55 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
56 | /* | ||
57 | - * We do not merge in flags from ah_fp_status or ah_fp_status_f16, because | ||
58 | + * We do not merge in flags from ah_fp_status or FPST_AH_F16, because | ||
59 | * they are used for insns that must not set the cumulative exception bits. | ||
60 | */ | ||
61 | |||
62 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
63 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
64 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
65 | set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
66 | - set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
67 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]); | ||
68 | } | ||
69 | |||
70 | static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) | ||
71 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
72 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
73 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
74 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
75 | - set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
76 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
77 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
78 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
79 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
80 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
81 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
82 | } | ||
83 | if (changed & FPCR_FZ) { | ||
84 | bool ftz_enabled = val & FPCR_FZ; | ||
85 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
86 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
87 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
88 | set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); | ||
89 | - set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status_f16); | ||
90 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
91 | } | ||
92 | if (changed & FPCR_AH) { | ||
93 | bool ah_enabled = val & FPCR_AH; | ||
94 | -- | ||
95 | 2.34.1 | ||
96 | |||
97 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
1 | 2 | ||
3 | Replace with fp_status[FPST_AH]. | ||
4 | |||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Message-id: 20250129013857.135256-11-richard.henderson@linaro.org | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | --- | ||
10 | target/arm/cpu.h | 3 +-- | ||
11 | target/arm/cpu.c | 6 +++--- | ||
12 | target/arm/vfp_helper.c | 6 +++--- | ||
13 | 3 files changed, 7 insertions(+), 8 deletions(-) | ||
14 | |||
15 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/arm/cpu.h | ||
18 | +++ b/target/arm/cpu.h | ||
19 | @@ -XXX,XX +XXX,XX @@ typedef struct NVICState NVICState; | ||
20 | * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | ||
21 | * using a fixed value for it. | ||
22 | * | ||
23 | - * The ah_fp_status is needed because some insns have different | ||
24 | + * FPST_AH is needed because some insns have different | ||
25 | * behaviour when FPCR.AH == 1: they don't update cumulative | ||
26 | * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
27 | * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
28 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
29 | float_status fp_status_a64; | ||
30 | float_status fp_status_f16_a32; | ||
31 | float_status fp_status_f16_a64; | ||
32 | - float_status ah_fp_status; | ||
33 | }; | ||
34 | }; | ||
35 | |||
36 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/arm/cpu.c | ||
39 | +++ b/target/arm/cpu.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
41 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
42 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
43 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
44 | - arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | ||
45 | - set_flush_to_zero(1, &env->vfp.ah_fp_status); | ||
46 | - set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | ||
47 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); | ||
48 | + set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]); | ||
49 | + set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_AH]); | ||
50 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]); | ||
51 | |||
52 | #ifndef CONFIG_USER_ONLY | ||
53 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
54 | index XXXXXXX..XXXXXXX 100644 | ||
55 | --- a/target/arm/vfp_helper.c | ||
56 | +++ b/target/arm/vfp_helper.c | ||
57 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
58 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
59 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
60 | /* | ||
61 | - * We do not merge in flags from ah_fp_status or FPST_AH_F16, because | ||
62 | + * We do not merge in flags from FPST_AH or FPST_AH_F16, because | ||
63 | * they are used for insns that must not set the cumulative exception bits. | ||
64 | */ | ||
65 | |||
66 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
67 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
68 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
69 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
70 | - set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
71 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]); | ||
72 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]); | ||
73 | } | ||
74 | |||
75 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
76 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
77 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
78 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
79 | - set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); | ||
80 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | ||
81 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
82 | } | ||
83 | if (changed & FPCR_AH) { | ||
84 | -- | ||
85 | 2.34.1 | ||
86 | |||
87 | diff view generated by jsdifflib |
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | On MIPS, kvm_arch_get_default_type() returns a negative value when an | 3 | Replace with fp_status[FPST_A64_F16]. |
4 | error occurred so handle the case. Also, let other machines return | ||
5 | negative values when errors occur and declare returning a negative | ||
6 | value as the correct way to propagate an error that happened when | ||
7 | determining KVM type. | ||
8 | 4 | ||
9 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
10 | Message-id: 20230727073134.134102-5-akihiko.odaki@daynix.com | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
11 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 7 | Message-id: 20250129013857.135256-12-richard.henderson@linaro.org |
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
13 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
14 | --- | 9 | --- |
15 | accel/kvm/kvm-all.c | 5 +++++ | 10 | target/arm/cpu.h | 1 - |
16 | hw/arm/virt.c | 2 +- | 11 | target/arm/cpu.c | 2 +- |
17 | hw/ppc/spapr.c | 2 +- | 12 | target/arm/tcg/sme_helper.c | 2 +- |
18 | 3 files changed, 7 insertions(+), 2 deletions(-) | 13 | target/arm/tcg/vec_helper.c | 9 ++++----- |
14 | target/arm/vfp_helper.c | 16 ++++++++-------- | ||
15 | 5 files changed, 14 insertions(+), 16 deletions(-) | ||
19 | 16 | ||
20 | diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c | 17 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
21 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/accel/kvm/kvm-all.c | 19 | --- a/target/arm/cpu.h |
23 | +++ b/accel/kvm/kvm-all.c | 20 | +++ b/target/arm/cpu.h |
24 | @@ -XXX,XX +XXX,XX @@ static int kvm_init(MachineState *ms) | 21 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
25 | type = kvm_arch_get_default_type(ms); | 22 | float_status fp_status_a32; |
23 | float_status fp_status_a64; | ||
24 | float_status fp_status_f16_a32; | ||
25 | - float_status fp_status_f16_a64; | ||
26 | }; | ||
27 | }; | ||
28 | |||
29 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/target/arm/cpu.c | ||
32 | +++ b/target/arm/cpu.c | ||
33 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
34 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
35 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
36 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
37 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
38 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
39 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
40 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); | ||
41 | set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]); | ||
42 | diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/arm/tcg/sme_helper.c | ||
45 | +++ b/target/arm/tcg/sme_helper.c | ||
46 | @@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, | ||
47 | * produces default NaNs. We also need a second copy of fp_status with | ||
48 | * round-to-odd -- see above. | ||
49 | */ | ||
50 | - fpst_f16 = env->vfp.fp_status_f16_a64; | ||
51 | + fpst_f16 = env->vfp.fp_status[FPST_A64_F16]; | ||
52 | fpst_std = env->vfp.fp_status_a64; | ||
53 | set_default_nan_mode(true, &fpst_std); | ||
54 | set_default_nan_mode(true, &fpst_f16); | ||
55 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/arm/tcg/vec_helper.c | ||
58 | +++ b/target/arm/tcg/vec_helper.c | ||
59 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
60 | } | ||
26 | } | 61 | } |
27 | 62 | do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | |
28 | + if (type < 0) { | 63 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); |
29 | + ret = -EINVAL; | 64 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); |
30 | + goto err; | 65 | } |
31 | + } | 66 | |
32 | + | 67 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
33 | do { | 68 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
34 | ret = kvm_ioctl(s, KVM_CREATE_VM, type); | 69 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
35 | } while (ret == -EINTR); | 70 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
36 | diff --git a/hw/arm/virt.c b/hw/arm/virt.c | 71 | float_status *status = &env->vfp.fp_status_a64; |
72 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); | ||
73 | + bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
74 | int negx = 0, negf = 0; | ||
75 | |||
76 | if (is_s) { | ||
77 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
78 | } | ||
79 | } | ||
80 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
81 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); | ||
82 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
83 | } | ||
84 | |||
85 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
86 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
87 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
88 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | ||
89 | float_status *status = &env->vfp.fp_status_a64; | ||
90 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); | ||
91 | + bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
92 | int negx = 0, negf = 0; | ||
93 | |||
94 | if (is_s) { | ||
95 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
96 | negx = 0x8000; | ||
97 | } | ||
98 | } | ||
99 | - | ||
100 | for (i = 0; i < oprsz; i += 16) { | ||
101 | float16 mm_16 = *(float16 *)(vm + i + idx); | ||
102 | float32 mm = float16_to_float32_by_bits(mm_16, fz16); | ||
103 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | 104 | index XXXXXXX..XXXXXXX 100644 |
38 | --- a/hw/arm/virt.c | 105 | --- a/target/arm/vfp_helper.c |
39 | +++ b/hw/arm/virt.c | 106 | +++ b/target/arm/vfp_helper.c |
40 | @@ -XXX,XX +XXX,XX @@ static int virt_kvm_type(MachineState *ms, const char *type_str) | 107 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) |
41 | "require an IPA range (%d bits) larger than " | 108 | & ~float_flag_input_denormal_flushed); |
42 | "the one supported by the host (%d bits)", | 109 | |
43 | requested_pa_size, max_vm_pa_size); | 110 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); |
44 | - exit(1); | 111 | - a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) |
45 | + return -1; | 112 | + a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16]) |
113 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
114 | /* | ||
115 | * We do not merge in flags from FPST_AH or FPST_AH_F16, because | ||
116 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
117 | set_float_exception_flags(0, &env->vfp.fp_status_a32); | ||
118 | set_float_exception_flags(0, &env->vfp.fp_status_a64); | ||
119 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | ||
120 | - set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
121 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | ||
122 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
123 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
124 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]); | ||
125 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
126 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
127 | set_float_rounding_mode(i, &env->vfp.fp_status_a64); | ||
128 | set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32); | ||
129 | - set_float_rounding_mode(i, &env->vfp.fp_status_f16_a64); | ||
130 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
131 | } | ||
132 | if (changed & FPCR_FZ16) { | ||
133 | bool ftz_enabled = val & FPCR_FZ16; | ||
134 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
135 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
136 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
137 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
138 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
139 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
140 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
141 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
142 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
143 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
144 | } | ||
145 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
146 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
147 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
148 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
149 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
150 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
151 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | ||
152 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
153 | } | ||
154 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
155 | if (ah_enabled) { | ||
156 | /* Change behaviours for A64 FP operations */ | ||
157 | arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); | ||
158 | - arm_set_ah_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
159 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
160 | } else { | ||
161 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
162 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
163 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
164 | } | ||
46 | } | 165 | } |
47 | /* | 166 | /* |
48 | * We return the requested PA log size, unless KVM only supports | ||
49 | diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/hw/ppc/spapr.c | ||
52 | +++ b/hw/ppc/spapr.c | ||
53 | @@ -XXX,XX +XXX,XX @@ static int spapr_kvm_type(MachineState *machine, const char *vm_type) | ||
54 | } | ||
55 | |||
56 | error_report("Unknown kvm-type specified '%s'", vm_type); | ||
57 | - exit(1); | ||
58 | + return -1; | ||
59 | } | ||
60 | |||
61 | /* | ||
62 | -- | 167 | -- |
63 | 2.34.1 | 168 | 2.34.1 |
64 | 169 | ||
65 | 170 | diff view generated by jsdifflib |
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | The returned value was always zero and had no meaning. | 3 | Replace with fp_status[FPST_A32_F16]. |
4 | 4 | ||
5 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Message-id: 20230727073134.134102-7-akihiko.odaki@daynix.com | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
7 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 7 | Message-id: 20250129013857.135256-13-richard.henderson@linaro.org |
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
9 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
10 | --- | 9 | --- |
11 | accel/kvm/kvm-all.c | 9 ++------- | 10 | target/arm/cpu.h | 1 - |
12 | 1 file changed, 2 insertions(+), 7 deletions(-) | 11 | target/arm/cpu.c | 2 +- |
12 | target/arm/tcg/vec_helper.c | 4 ++-- | ||
13 | target/arm/vfp_helper.c | 14 +++++++------- | ||
14 | 4 files changed, 10 insertions(+), 11 deletions(-) | ||
13 | 15 | ||
14 | diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c | 16 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
15 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/accel/kvm/kvm-all.c | 18 | --- a/target/arm/cpu.h |
17 | +++ b/accel/kvm/kvm-all.c | 19 | +++ b/target/arm/cpu.h |
18 | @@ -XXX,XX +XXX,XX @@ static void *kvm_dirty_ring_reaper_thread(void *data) | 20 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
19 | return NULL; | 21 | struct { |
22 | float_status fp_status_a32; | ||
23 | float_status fp_status_a64; | ||
24 | - float_status fp_status_f16_a32; | ||
25 | }; | ||
26 | }; | ||
27 | |||
28 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/arm/cpu.c | ||
31 | +++ b/target/arm/cpu.c | ||
32 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
33 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | ||
34 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
35 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
36 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
37 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); | ||
38 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
39 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
40 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); | ||
41 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/arm/tcg/vec_helper.c | ||
44 | +++ b/target/arm/tcg/vec_helper.c | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
46 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
47 | |||
48 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
49 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
50 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); | ||
20 | } | 51 | } |
21 | 52 | ||
22 | -static int kvm_dirty_ring_reaper_init(KVMState *s) | 53 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
23 | +static void kvm_dirty_ring_reaper_init(KVMState *s) | 54 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, |
24 | { | 55 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; |
25 | struct KVMDirtyRingReaper *r = &s->reaper; | 56 | |
26 | 57 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | |
27 | qemu_thread_create(&r->reaper_thr, "kvm-reaper", | 58 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); |
28 | kvm_dirty_ring_reaper_thread, | 59 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); |
29 | s, QEMU_THREAD_JOINABLE); | ||
30 | - | ||
31 | - return 0; | ||
32 | } | 60 | } |
33 | 61 | ||
34 | static int kvm_dirty_ring_init(KVMState *s) | 62 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
35 | @@ -XXX,XX +XXX,XX @@ static int kvm_init(MachineState *ms) | 63 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
64 | index XXXXXXX..XXXXXXX 100644 | ||
65 | --- a/target/arm/vfp_helper.c | ||
66 | +++ b/target/arm/vfp_helper.c | ||
67 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
68 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
69 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); | ||
70 | /* FZ16 does not generate an input denormal exception. */ | ||
71 | - a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
72 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16]) | ||
73 | & ~float_flag_input_denormal_flushed); | ||
74 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) | ||
75 | & ~float_flag_input_denormal_flushed); | ||
76 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
77 | */ | ||
78 | set_float_exception_flags(0, &env->vfp.fp_status_a32); | ||
79 | set_float_exception_flags(0, &env->vfp.fp_status_a64); | ||
80 | - set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | ||
81 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); | ||
82 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | ||
83 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
84 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
85 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
86 | } | ||
87 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
88 | set_float_rounding_mode(i, &env->vfp.fp_status_a64); | ||
89 | - set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32); | ||
90 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); | ||
91 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
36 | } | 92 | } |
37 | 93 | if (changed & FPCR_FZ16) { | |
38 | if (s->kvm_dirty_ring_size) { | 94 | bool ftz_enabled = val & FPCR_FZ16; |
39 | - ret = kvm_dirty_ring_reaper_init(s); | 95 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
40 | - if (ret) { | 96 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); |
41 | - goto err; | 97 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); |
42 | - } | 98 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); |
43 | + kvm_dirty_ring_reaper_init(s); | 99 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
44 | } | 100 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
45 | 101 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); | |
46 | if (kvm_check_extension(kvm_state, KVM_CAP_BINARY_STATS_FD)) { | 102 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); |
103 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
104 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
105 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
106 | bool dnan_enabled = val & FPCR_DN; | ||
107 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
108 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
109 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
110 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
111 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
112 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | ||
113 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
114 | @@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ | ||
115 | softfloat_to_vfp_compare(env, \ | ||
116 | FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ | ||
117 | } | ||
118 | -DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16_a32) | ||
119 | +DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) | ||
120 | DO_VFP_cmp(s, float32, float32, fp_status_a32) | ||
121 | DO_VFP_cmp(d, float64, float64, fp_status_a32) | ||
122 | #undef DO_VFP_cmp | ||
47 | -- | 123 | -- |
48 | 2.34.1 | 124 | 2.34.1 |
49 | 125 | ||
50 | 126 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | A typo, noted in the bug report, resulting in an | 3 | Replace with fp_status[FPST_A64]. |
4 | incorrect write offset. | ||
5 | 4 | ||
6 | Cc: qemu-stable@nongnu.org | ||
7 | Fixes: 7390e0e9ab8 ("target/arm: Implement SME LD1, ST1") | ||
8 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1833 | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
10 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
11 | Message-id: 20230818214255.146905-1-richard.henderson@linaro.org | 7 | Message-id: 20250129013857.135256-14-richard.henderson@linaro.org |
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
13 | --- | 9 | --- |
14 | target/arm/tcg/sme_helper.c | 2 +- | 10 | target/arm/cpu.h | 1 - |
15 | 1 file changed, 1 insertion(+), 1 deletion(-) | 11 | target/arm/cpu.c | 2 +- |
12 | target/arm/tcg/sme_helper.c | 2 +- | ||
13 | target/arm/tcg/vec_helper.c | 10 +++++----- | ||
14 | target/arm/vfp_helper.c | 16 ++++++++-------- | ||
15 | 5 files changed, 15 insertions(+), 16 deletions(-) | ||
16 | 16 | ||
17 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/arm/cpu.h | ||
20 | +++ b/target/arm/cpu.h | ||
21 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
22 | float_status fp_status[FPST_COUNT]; | ||
23 | struct { | ||
24 | float_status fp_status_a32; | ||
25 | - float_status fp_status_a64; | ||
26 | }; | ||
27 | }; | ||
28 | |||
29 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/target/arm/cpu.c | ||
32 | +++ b/target/arm/cpu.c | ||
33 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
34 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); | ||
35 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); | ||
36 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | ||
37 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
38 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | ||
39 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
40 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); | ||
41 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
17 | diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c | 42 | diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c |
18 | index XXXXXXX..XXXXXXX 100644 | 43 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/target/arm/tcg/sme_helper.c | 44 | --- a/target/arm/tcg/sme_helper.c |
20 | +++ b/target/arm/tcg/sme_helper.c | 45 | +++ b/target/arm/tcg/sme_helper.c |
21 | @@ -XXX,XX +XXX,XX @@ static inline void HNAME##_host(void *za, intptr_t off, void *host) \ | 46 | @@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, |
22 | { \ | 47 | * round-to-odd -- see above. |
23 | uint64_t *ptr = za + off; \ | 48 | */ |
24 | HOST(host, ptr[BE]); \ | 49 | fpst_f16 = env->vfp.fp_status[FPST_A64_F16]; |
25 | - HOST(host + 1, ptr[!BE]); \ | 50 | - fpst_std = env->vfp.fp_status_a64; |
26 | + HOST(host + 8, ptr[!BE]); \ | 51 | + fpst_std = env->vfp.fp_status[FPST_A64]; |
27 | } \ | 52 | set_default_nan_mode(true, &fpst_std); |
28 | static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \ | 53 | set_default_nan_mode(true, &fpst_f16); |
29 | { \ | 54 | fpst_odd = fpst_std; |
55 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/arm/tcg/vec_helper.c | ||
58 | +++ b/target/arm/tcg/vec_helper.c | ||
59 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
60 | negx = 0x8000800080008000ull; | ||
61 | } | ||
62 | } | ||
63 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
64 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
65 | get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
66 | } | ||
67 | |||
68 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
69 | intptr_t i, oprsz = simd_oprsz(desc); | ||
70 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
71 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
72 | - float_status *status = &env->vfp.fp_status_a64; | ||
73 | + float_status *status = &env->vfp.fp_status[FPST_A64]; | ||
74 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
75 | int negx = 0, negf = 0; | ||
76 | |||
77 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
78 | negx = 0x8000800080008000ull; | ||
79 | } | ||
80 | } | ||
81 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
82 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
83 | get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
84 | } | ||
85 | |||
86 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
87 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
88 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
89 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | ||
90 | - float_status *status = &env->vfp.fp_status_a64; | ||
91 | + float_status *status = &env->vfp.fp_status[FPST_A64]; | ||
92 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
93 | int negx = 0, negf = 0; | ||
94 | |||
95 | @@ -XXX,XX +XXX,XX @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp) | ||
96 | */ | ||
97 | bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF; | ||
98 | |||
99 | - *statusp = is_a64(env) ? env->vfp.fp_status_a64 : env->vfp.fp_status_a32; | ||
100 | + *statusp = is_a64(env) ? env->vfp.fp_status[FPST_A64] : env->vfp.fp_status_a32; | ||
101 | set_default_nan_mode(true, statusp); | ||
102 | |||
103 | if (ebf) { | ||
104 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
105 | index XXXXXXX..XXXXXXX 100644 | ||
106 | --- a/target/arm/vfp_helper.c | ||
107 | +++ b/target/arm/vfp_helper.c | ||
108 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
109 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) | ||
110 | & ~float_flag_input_denormal_flushed); | ||
111 | |||
112 | - a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
113 | + a64_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A64]); | ||
114 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16]) | ||
115 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
116 | /* | ||
117 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
118 | * be the architecturally up-to-date exception flag information first. | ||
119 | */ | ||
120 | set_float_exception_flags(0, &env->vfp.fp_status_a32); | ||
121 | - set_float_exception_flags(0, &env->vfp.fp_status_a64); | ||
122 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]); | ||
123 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); | ||
124 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | ||
125 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
126 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
127 | break; | ||
128 | } | ||
129 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
130 | - set_float_rounding_mode(i, &env->vfp.fp_status_a64); | ||
131 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]); | ||
132 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); | ||
133 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
134 | } | ||
135 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
136 | if (changed & FPCR_FZ) { | ||
137 | bool ftz_enabled = val & FPCR_FZ; | ||
138 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
139 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64); | ||
140 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]); | ||
141 | /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ | ||
142 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
143 | } | ||
144 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
145 | */ | ||
146 | bool fitz_enabled = (val & FPCR_FIZ) || | ||
147 | (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ; | ||
148 | - set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status_a64); | ||
149 | + set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_A64]); | ||
150 | } | ||
151 | if (changed & FPCR_DN) { | ||
152 | bool dnan_enabled = val & FPCR_DN; | ||
153 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
154 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
155 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]); | ||
156 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
157 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
158 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | ||
159 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
160 | |||
161 | if (ah_enabled) { | ||
162 | /* Change behaviours for A64 FP operations */ | ||
163 | - arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); | ||
164 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | ||
165 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
166 | } else { | ||
167 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
168 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | ||
169 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
170 | } | ||
171 | } | ||
30 | -- | 172 | -- |
31 | 2.34.1 | 173 | 2.34.1 |
32 | 174 | ||
33 | 175 | diff view generated by jsdifflib |
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | On MIPS, QEMU requires KVM_VM_MIPS_VZ type for KVM. Report an error in | 3 | Replace with fp_status[FPST_A32]. As this was the last of the |
4 | such a case as other architectures do when an error occurred during KVM | 4 | old structures, we can remove the anonymous union and struct. |
5 | type decision. | ||
6 | 5 | ||
7 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | Message-id: 20230727073134.134102-4-akihiko.odaki@daynix.com | 7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
9 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Message-id: 20250129013857.135256-15-richard.henderson@linaro.org |
9 | [PMM: tweak to account for change to is_ebf()] | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
11 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
12 | --- | 11 | --- |
13 | target/mips/kvm.c | 1 + | 12 | target/arm/cpu.h | 7 +------ |
14 | 1 file changed, 1 insertion(+) | 13 | target/arm/cpu.c | 2 +- |
14 | target/arm/tcg/vec_helper.c | 2 +- | ||
15 | target/arm/vfp_helper.c | 18 +++++++++--------- | ||
16 | 4 files changed, 12 insertions(+), 17 deletions(-) | ||
15 | 17 | ||
16 | diff --git a/target/mips/kvm.c b/target/mips/kvm.c | 18 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
17 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/target/mips/kvm.c | 20 | --- a/target/arm/cpu.h |
19 | +++ b/target/mips/kvm.c | 21 | +++ b/target/arm/cpu.h |
20 | @@ -XXX,XX +XXX,XX @@ int kvm_arch_get_default_type(MachineState *machine) | 22 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
23 | uint32_t scratch[8]; | ||
24 | |||
25 | /* There are a number of distinct float control structures. */ | ||
26 | - union { | ||
27 | - float_status fp_status[FPST_COUNT]; | ||
28 | - struct { | ||
29 | - float_status fp_status_a32; | ||
30 | - }; | ||
31 | - }; | ||
32 | + float_status fp_status[FPST_COUNT]; | ||
33 | |||
34 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ | ||
35 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ | ||
36 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/arm/cpu.c | ||
39 | +++ b/target/arm/cpu.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
41 | set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]); | ||
42 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); | ||
43 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); | ||
44 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | ||
45 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32]); | ||
46 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | ||
47 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
48 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); | ||
49 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/target/arm/tcg/vec_helper.c | ||
52 | +++ b/target/arm/tcg/vec_helper.c | ||
53 | @@ -XXX,XX +XXX,XX @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp) | ||
54 | */ | ||
55 | bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF; | ||
56 | |||
57 | - *statusp = is_a64(env) ? env->vfp.fp_status[FPST_A64] : env->vfp.fp_status_a32; | ||
58 | + *statusp = env->vfp.fp_status[is_a64(env) ? FPST_A64 : FPST_A32]; | ||
59 | set_default_nan_mode(true, statusp); | ||
60 | |||
61 | if (ebf) { | ||
62 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/arm/vfp_helper.c | ||
65 | +++ b/target/arm/vfp_helper.c | ||
66 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
67 | { | ||
68 | uint32_t a32_flags = 0, a64_flags = 0; | ||
69 | |||
70 | - a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
71 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A32]); | ||
72 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); | ||
73 | /* FZ16 does not generate an input denormal exception. */ | ||
74 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16]) | ||
75 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
76 | * values. The caller should have arranged for env->vfp.fpsr to | ||
77 | * be the architecturally up-to-date exception flag information first. | ||
78 | */ | ||
79 | - set_float_exception_flags(0, &env->vfp.fp_status_a32); | ||
80 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32]); | ||
81 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]); | ||
82 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); | ||
83 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | ||
84 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
85 | i = float_round_to_zero; | ||
86 | break; | ||
87 | } | ||
88 | - set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
89 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32]); | ||
90 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]); | ||
91 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); | ||
92 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
93 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
21 | } | 94 | } |
22 | #endif | 95 | if (changed & FPCR_FZ) { |
23 | 96 | bool ftz_enabled = val & FPCR_FZ; | |
24 | + error_report("KVM_VM_MIPS_VZ type is not available"); | 97 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); |
25 | return -1; | 98 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); |
99 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]); | ||
100 | /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ | ||
101 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
102 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); | ||
103 | } | ||
104 | if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { | ||
105 | /* | ||
106 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
107 | } | ||
108 | if (changed & FPCR_DN) { | ||
109 | bool dnan_enabled = val & FPCR_DN; | ||
110 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
111 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32]); | ||
112 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]); | ||
113 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
114 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
115 | @@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ | ||
116 | FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ | ||
26 | } | 117 | } |
118 | DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) | ||
119 | -DO_VFP_cmp(s, float32, float32, fp_status_a32) | ||
120 | -DO_VFP_cmp(d, float64, float64, fp_status_a32) | ||
121 | +DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32]) | ||
122 | +DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32]) | ||
123 | #undef DO_VFP_cmp | ||
124 | |||
125 | /* Integer to float and float to integer conversions */ | ||
126 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(fjcvtzs)(float64 value, float_status *status) | ||
127 | |||
128 | uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env) | ||
129 | { | ||
130 | - uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status_a32); | ||
131 | + uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status[FPST_A32]); | ||
132 | uint32_t result = pair; | ||
133 | uint32_t z = (pair >> 32) == 0; | ||
27 | 134 | ||
28 | -- | 135 | -- |
29 | 2.34.1 | 136 | 2.34.1 |
30 | 137 | ||
31 | 138 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
1 | 2 | ||
3 | Select on index instead of pointer. | ||
4 | No functional change. | ||
5 | |||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
8 | Message-id: 20250129013857.135256-16-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | --- | ||
11 | target/arm/tcg/mve_helper.c | 40 +++++++++++++------------------------ | ||
12 | 1 file changed, 14 insertions(+), 26 deletions(-) | ||
13 | |||
14 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/arm/tcg/mve_helper.c | ||
17 | +++ b/target/arm/tcg/mve_helper.c | ||
18 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) | ||
19 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
20 | continue; \ | ||
21 | } \ | ||
22 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
23 | - &env->vfp.fp_status[FPST_STD]; \ | ||
24 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
25 | if (!(mask & 1)) { \ | ||
26 | /* We need the result but without updating flags */ \ | ||
27 | scratch_fpst = *fpst; \ | ||
28 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) | ||
29 | r[e] = 0; \ | ||
30 | continue; \ | ||
31 | } \ | ||
32 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
33 | - &env->vfp.fp_status[FPST_STD]; \ | ||
34 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
35 | if (!(tm & 1)) { \ | ||
36 | /* We need the result but without updating flags */ \ | ||
37 | scratch_fpst = *fpst; \ | ||
38 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) | ||
39 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
40 | continue; \ | ||
41 | } \ | ||
42 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
43 | - &env->vfp.fp_status[FPST_STD]; \ | ||
44 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
45 | if (!(mask & 1)) { \ | ||
46 | /* We need the result but without updating flags */ \ | ||
47 | scratch_fpst = *fpst; \ | ||
48 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) | ||
49 | if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \ | ||
50 | continue; \ | ||
51 | } \ | ||
52 | - fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
53 | - &env->vfp.fp_status[FPST_STD]; \ | ||
54 | + fpst0 = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
55 | fpst1 = fpst0; \ | ||
56 | if (!(mask & 1)) { \ | ||
57 | scratch_fpst = *fpst0; \ | ||
58 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) | ||
59 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
60 | continue; \ | ||
61 | } \ | ||
62 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
63 | - &env->vfp.fp_status[FPST_STD]; \ | ||
64 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
65 | if (!(mask & 1)) { \ | ||
66 | /* We need the result but without updating flags */ \ | ||
67 | scratch_fpst = *fpst; \ | ||
68 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) | ||
69 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
70 | continue; \ | ||
71 | } \ | ||
72 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
73 | - &env->vfp.fp_status[FPST_STD]; \ | ||
74 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
75 | if (!(mask & 1)) { \ | ||
76 | /* We need the result but without updating flags */ \ | ||
77 | scratch_fpst = *fpst; \ | ||
78 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) | ||
79 | unsigned e; \ | ||
80 | TYPE *m = vm; \ | ||
81 | TYPE ra = (TYPE)ra_in; \ | ||
82 | - float_status *fpst = (ESIZE == 2) ? \ | ||
83 | - &env->vfp.fp_status[FPST_STD_F16] : \ | ||
84 | - &env->vfp.fp_status[FPST_STD]; \ | ||
85 | + float_status *fpst = \ | ||
86 | + &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
87 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
88 | if (mask & 1) { \ | ||
89 | TYPE v = m[H##ESIZE(e)]; \ | ||
90 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
91 | if ((mask & emask) == 0) { \ | ||
92 | continue; \ | ||
93 | } \ | ||
94 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
95 | - &env->vfp.fp_status[FPST_STD]; \ | ||
96 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
97 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
98 | /* We need the result but without updating flags */ \ | ||
99 | scratch_fpst = *fpst; \ | ||
100 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
101 | if ((mask & emask) == 0) { \ | ||
102 | continue; \ | ||
103 | } \ | ||
104 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
105 | - &env->vfp.fp_status[FPST_STD]; \ | ||
106 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
107 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
108 | /* We need the result but without updating flags */ \ | ||
109 | scratch_fpst = *fpst; \ | ||
110 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) | ||
111 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
112 | continue; \ | ||
113 | } \ | ||
114 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
115 | - &env->vfp.fp_status[FPST_STD]; \ | ||
116 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
117 | if (!(mask & 1)) { \ | ||
118 | /* We need the result but without updating flags */ \ | ||
119 | scratch_fpst = *fpst; \ | ||
120 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) | ||
121 | unsigned e; \ | ||
122 | float_status *fpst; \ | ||
123 | float_status scratch_fpst; \ | ||
124 | - float_status *base_fpst = (ESIZE == 2) ? \ | ||
125 | - &env->vfp.fp_status[FPST_STD_F16] : \ | ||
126 | - &env->vfp.fp_status[FPST_STD]; \ | ||
127 | + float_status *base_fpst = \ | ||
128 | + &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
129 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ | ||
130 | set_float_rounding_mode(rmode, base_fpst); \ | ||
131 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
132 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) | ||
133 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
134 | continue; \ | ||
135 | } \ | ||
136 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
137 | - &env->vfp.fp_status[FPST_STD]; \ | ||
138 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
139 | if (!(mask & 1)) { \ | ||
140 | /* We need the result but without updating flags */ \ | ||
141 | scratch_fpst = *fpst; \ | ||
142 | -- | ||
143 | 2.34.1 | ||
144 | |||
145 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Typo applied byte-wise shift instead of double-word shift. | 3 | Pass ARMFPStatusFlavour index instead of fp_status[FOO]. |
4 | 4 | ||
5 | Cc: qemu-stable@nongnu.org | ||
6 | Fixes: 631e565450c ("target/arm: Create gen_gvec_[us]sra") | ||
7 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1737 | ||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
9 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
10 | Message-id: 20230821022025.397682-1-richard.henderson@linaro.org | 7 | Message-id: 20250129013857.135256-17-richard.henderson@linaro.org |
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
12 | --- | 9 | --- |
13 | target/arm/tcg/translate.c | 2 +- | 10 | target/arm/vfp_helper.c | 10 +++++----- |
14 | 1 file changed, 1 insertion(+), 1 deletion(-) | 11 | 1 file changed, 5 insertions(+), 5 deletions(-) |
15 | 12 | ||
16 | diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c | 13 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
17 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/target/arm/tcg/translate.c | 15 | --- a/target/arm/vfp_helper.c |
19 | +++ b/target/arm/tcg/translate.c | 16 | +++ b/target/arm/vfp_helper.c |
20 | @@ -XXX,XX +XXX,XX @@ void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, | 17 | @@ -XXX,XX +XXX,XX @@ static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp) |
21 | .vece = MO_32 }, | 18 | void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ |
22 | { .fni8 = gen_ssra64_i64, | 19 | { \ |
23 | .fniv = gen_ssra_vec, | 20 | softfloat_to_vfp_compare(env, \ |
24 | - .fno = gen_helper_gvec_ssra_b, | 21 | - FLOATTYPE ## _compare_quiet(a, b, &env->vfp.FPST)); \ |
25 | + .fno = gen_helper_gvec_ssra_d, | 22 | + FLOATTYPE ## _compare_quiet(a, b, &env->vfp.fp_status[FPST])); \ |
26 | .prefer_i64 = TCG_TARGET_REG_BITS == 64, | 23 | } \ |
27 | .opt_opc = vecop_list, | 24 | void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ |
28 | .load_dest = true, | 25 | { \ |
26 | softfloat_to_vfp_compare(env, \ | ||
27 | - FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ | ||
28 | + FLOATTYPE ## _compare(a, b, &env->vfp.fp_status[FPST])); \ | ||
29 | } | ||
30 | -DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) | ||
31 | -DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32]) | ||
32 | -DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32]) | ||
33 | +DO_VFP_cmp(h, float16, dh_ctype_f16, FPST_A32_F16) | ||
34 | +DO_VFP_cmp(s, float32, float32, FPST_A32) | ||
35 | +DO_VFP_cmp(d, float64, float64, FPST_A32) | ||
36 | #undef DO_VFP_cmp | ||
37 | |||
38 | /* Integer to float and float to integer conversions */ | ||
29 | -- | 39 | -- |
30 | 2.34.1 | 40 | 2.34.1 |
31 | 41 | ||
32 | 42 | diff view generated by jsdifflib |
1 | From: Chris Laplante <chris@laplante.io> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Implement nRF51 DETECT signal in the GPIO peripheral. | 3 | Read the bit from the source, rather than from the proxy via |
4 | get_flush_inputs_to_zero. This makes it clear that it does | ||
5 | not matter which of the float_status structures is used. | ||
4 | 6 | ||
5 | The reference manual makes mention of a per-pin DETECT signal, but these | 7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | are not exposed to the user. See https://devzone.nordicsemi.com/f/nordic-q-a/39858/gpio-per-pin-detect-signal-available | 8 | Message-id: 20250129013857.135256-34-richard.henderson@linaro.org |
7 | for more information. Currently, I don't see a reason to model these. | ||
8 | |||
9 | Signed-off-by: Chris Laplante <chris@laplante.io> | ||
10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 9 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
11 | Message-id: 20230728160324.1159090-2-chris@laplante.io | ||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
13 | --- | 11 | --- |
14 | include/hw/gpio/nrf51_gpio.h | 1 + | 12 | target/arm/tcg/vec_helper.c | 12 ++++++------ |
15 | hw/gpio/nrf51_gpio.c | 14 +++++++++++++- | 13 | 1 file changed, 6 insertions(+), 6 deletions(-) |
16 | 2 files changed, 14 insertions(+), 1 deletion(-) | ||
17 | 14 | ||
18 | diff --git a/include/hw/gpio/nrf51_gpio.h b/include/hw/gpio/nrf51_gpio.h | 15 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
19 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/include/hw/gpio/nrf51_gpio.h | 17 | --- a/target/arm/tcg/vec_helper.c |
21 | +++ b/include/hw/gpio/nrf51_gpio.h | 18 | +++ b/target/arm/tcg/vec_helper.c |
22 | @@ -XXX,XX +XXX,XX @@ struct NRF51GPIOState { | 19 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, |
23 | uint32_t old_out_connected; | 20 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; |
24 | 21 | ||
25 | qemu_irq output[NRF51_GPIO_PINS]; | 22 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, |
26 | + qemu_irq detect; | 23 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); |
27 | }; | 24 | + env->vfp.fpcr & FPCR_FZ16); |
28 | 25 | } | |
29 | 26 | ||
30 | diff --git a/hw/gpio/nrf51_gpio.c b/hw/gpio/nrf51_gpio.c | 27 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
31 | index XXXXXXX..XXXXXXX 100644 | 28 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
32 | --- a/hw/gpio/nrf51_gpio.c | ||
33 | +++ b/hw/gpio/nrf51_gpio.c | ||
34 | @@ -XXX,XX +XXX,XX @@ static void update_state(NRF51GPIOState *s) | ||
35 | int pull; | ||
36 | size_t i; | ||
37 | bool connected_out, dir, connected_in, out, in, input; | ||
38 | + bool assert_detect = false; | ||
39 | |||
40 | for (i = 0; i < NRF51_GPIO_PINS; i++) { | ||
41 | pull = pull_value(s->cnf[i]); | ||
42 | @@ -XXX,XX +XXX,XX @@ static void update_state(NRF51GPIOState *s) | ||
43 | qemu_log_mask(LOG_GUEST_ERROR, | ||
44 | "GPIO pin %zu short circuited\n", i); | ||
45 | } | ||
46 | - if (!connected_in) { | ||
47 | + if (connected_in) { | ||
48 | + uint32_t detect_config = extract32(s->cnf[i], 16, 2); | ||
49 | + if ((detect_config == 2) && (in == 1)) { | ||
50 | + assert_detect = true; | ||
51 | + } | ||
52 | + if ((detect_config == 3) && (in == 0)) { | ||
53 | + assert_detect = true; | ||
54 | + } | ||
55 | + } else { | ||
56 | /* | ||
57 | * Floating input: the output stimulates IN if connected, | ||
58 | * otherwise pull-up/pull-down resistors put a value on both | ||
59 | @@ -XXX,XX +XXX,XX @@ static void update_state(NRF51GPIOState *s) | ||
60 | } | 29 | } |
61 | update_output_irq(s, i, connected_out, out); | ||
62 | } | 30 | } |
63 | + | 31 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, |
64 | + qemu_set_irq(s->detect, assert_detect); | 32 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); |
33 | + env->vfp.fpcr & FPCR_FZ16); | ||
65 | } | 34 | } |
66 | 35 | ||
67 | /* | 36 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
68 | @@ -XXX,XX +XXX,XX @@ static void nrf51_gpio_init(Object *obj) | 37 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
69 | 38 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | |
70 | qdev_init_gpio_in(DEVICE(s), nrf51_gpio_set, NRF51_GPIO_PINS); | 39 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
71 | qdev_init_gpio_out(DEVICE(s), s->output, NRF51_GPIO_PINS); | 40 | float_status *status = &env->vfp.fp_status[FPST_A64]; |
72 | + qdev_init_gpio_out_named(DEVICE(s), &s->detect, "detect", 1); | 41 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); |
42 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; | ||
43 | int negx = 0, negf = 0; | ||
44 | |||
45 | if (is_s) { | ||
46 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | ||
47 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
48 | |||
49 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
50 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); | ||
51 | + env->vfp.fpcr & FPCR_FZ16); | ||
73 | } | 52 | } |
74 | 53 | ||
75 | static void nrf51_gpio_class_init(ObjectClass *klass, void *data) | 54 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
55 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
56 | } | ||
57 | } | ||
58 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
59 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
60 | + env->vfp.fpcr & FPCR_FZ16); | ||
61 | } | ||
62 | |||
63 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
64 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
65 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
66 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | ||
67 | float_status *status = &env->vfp.fp_status[FPST_A64]; | ||
68 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
69 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; | ||
70 | int negx = 0, negf = 0; | ||
71 | |||
72 | if (is_s) { | ||
76 | -- | 73 | -- |
77 | 2.34.1 | 74 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Jean-Philippe Brucker <jean-philippe@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | When HCR_EL2.E2H is enabled, TLB entries are formed using the EL2&0 | 3 | Sink common code from the callers into do_fmlal |
4 | translation regime, instead of the EL2 translation regime. The TLB VAE2* | 4 | and do_fmlal_idx. Reorder the arguments to minimize |
5 | instructions invalidate the regime that corresponds to the current value | 5 | the re-sorting from the caller's arguments. |
6 | of HCR_EL2.E2H. | ||
7 | 6 | ||
8 | At the moment we only invalidate the EL2 translation regime. This causes | 7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
9 | problems with RMM, which issues TLBI VAE2IS instructions with | 8 | Message-id: 20250129013857.135256-35-richard.henderson@linaro.org |
10 | HCR_EL2.E2H enabled. Update vae2_tlbmask() to take HCR_EL2.E2H into | ||
11 | account. | ||
12 | |||
13 | Add vae2_tlbbits() as well, since the top-byte-ignore configuration is | ||
14 | different between the EL2&0 and EL2 regime. | ||
15 | |||
16 | Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | ||
17 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 9 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
18 | Message-id: 20230809123706.1842548-3-jean-philippe@linaro.org | ||
19 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
20 | --- | 11 | --- |
21 | target/arm/helper.c | 50 ++++++++++++++++++++++++++++++++++++--------- | 12 | target/arm/tcg/vec_helper.c | 28 ++++++++++++++++------------ |
22 | 1 file changed, 40 insertions(+), 10 deletions(-) | 13 | 1 file changed, 16 insertions(+), 12 deletions(-) |
23 | 14 | ||
24 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 15 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
25 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/target/arm/helper.c | 17 | --- a/target/arm/tcg/vec_helper.c |
27 | +++ b/target/arm/helper.c | 18 | +++ b/target/arm/tcg/vec_helper.c |
28 | @@ -XXX,XX +XXX,XX @@ static int vae1_tlbmask(CPUARMState *env) | 19 | @@ -XXX,XX +XXX,XX @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2) |
29 | return mask; | 20 | * as there is not yet SVE versions that might use blocking. |
21 | */ | ||
22 | |||
23 | -static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, | ||
24 | - uint64_t negx, int negf, uint32_t desc, bool fz16) | ||
25 | +static void do_fmlal(float32 *d, void *vn, void *vm, | ||
26 | + CPUARMState *env, uint32_t desc, | ||
27 | + ARMFPStatusFlavour fpst_idx, | ||
28 | + uint64_t negx, int negf) | ||
29 | { | ||
30 | + float_status *fpst = &env->vfp.fp_status[fpst_idx]; | ||
31 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; | ||
32 | intptr_t i, oprsz = simd_oprsz(desc); | ||
33 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
34 | int is_q = oprsz == 16; | ||
35 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
36 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
37 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
38 | |||
39 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
40 | - env->vfp.fpcr & FPCR_FZ16); | ||
41 | + do_fmlal(vd, vn, vm, env, desc, FPST_STD, negx, 0); | ||
30 | } | 42 | } |
31 | 43 | ||
32 | +static int vae2_tlbmask(CPUARMState *env) | 44 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
33 | +{ | 45 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
34 | + uint64_t hcr = arm_hcr_el2_eff(env); | 46 | negx = 0x8000800080008000ull; |
35 | + uint16_t mask; | 47 | } |
36 | + | 48 | } |
37 | + if (hcr & HCR_E2H) { | 49 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, |
38 | + mask = ARMMMUIdxBit_E20_2 | | 50 | - env->vfp.fpcr & FPCR_FZ16); |
39 | + ARMMMUIdxBit_E20_2_PAN | | 51 | + do_fmlal(vd, vn, vm, env, desc, FPST_A64, negx, negf); |
40 | + ARMMMUIdxBit_E20_0; | ||
41 | + } else { | ||
42 | + mask = ARMMMUIdxBit_E2; | ||
43 | + } | ||
44 | + return mask; | ||
45 | +} | ||
46 | + | ||
47 | /* Return 56 if TBI is enabled, 64 otherwise. */ | ||
48 | static int tlbbits_for_regime(CPUARMState *env, ARMMMUIdx mmu_idx, | ||
49 | uint64_t addr) | ||
50 | @@ -XXX,XX +XXX,XX @@ static int vae1_tlbbits(CPUARMState *env, uint64_t addr) | ||
51 | return tlbbits_for_regime(env, mmu_idx, addr); | ||
52 | } | 52 | } |
53 | 53 | ||
54 | +static int vae2_tlbbits(CPUARMState *env, uint64_t addr) | 54 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
55 | +{ | 55 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
56 | + uint64_t hcr = arm_hcr_el2_eff(env); | 56 | } |
57 | + ARMMMUIdx mmu_idx; | 57 | } |
58 | + | 58 | |
59 | + /* | 59 | -static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, |
60 | + * Only the regime of the mmu_idx below is significant. | 60 | - uint64_t negx, int negf, uint32_t desc, bool fz16) |
61 | + * Regime EL2&0 has two ranges with separate TBI configuration, while EL2 | 61 | +static void do_fmlal_idx(float32 *d, void *vn, void *vm, |
62 | + * only has one. | 62 | + CPUARMState *env, uint32_t desc, |
63 | + */ | 63 | + ARMFPStatusFlavour fpst_idx, |
64 | + if (hcr & HCR_E2H) { | 64 | + uint64_t negx, int negf) |
65 | + mmu_idx = ARMMMUIdx_E20_2; | ||
66 | + } else { | ||
67 | + mmu_idx = ARMMMUIdx_E2; | ||
68 | + } | ||
69 | + | ||
70 | + return tlbbits_for_regime(env, mmu_idx, addr); | ||
71 | +} | ||
72 | + | ||
73 | static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri, | ||
74 | uint64_t value) | ||
75 | { | 65 | { |
76 | @@ -XXX,XX +XXX,XX @@ static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri, | 66 | + float_status *fpst = &env->vfp.fp_status[fpst_idx]; |
77 | * flush-last-level-only. | 67 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; |
78 | */ | 68 | intptr_t i, oprsz = simd_oprsz(desc); |
79 | CPUState *cs = env_cpu(env); | 69 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
80 | - int mask = e2_tlbmask(env); | 70 | int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3); |
81 | + int mask = vae2_tlbmask(env); | 71 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, |
82 | uint64_t pageaddr = sextract64(value << 12, 0, 56); | 72 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
83 | + int bits = vae2_tlbbits(env, pageaddr); | 73 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; |
84 | 74 | ||
85 | - tlb_flush_page_by_mmuidx(cs, pageaddr, mask); | 75 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, |
86 | + tlb_flush_page_bits_by_mmuidx(cs, pageaddr, mask, bits); | 76 | - env->vfp.fpcr & FPCR_FZ16); |
77 | + do_fmlal_idx(vd, vn, vm, env, desc, FPST_STD, negx, 0); | ||
87 | } | 78 | } |
88 | 79 | ||
89 | static void tlbi_aa64_vae3_write(CPUARMState *env, const ARMCPRegInfo *ri, | 80 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
90 | @@ -XXX,XX +XXX,XX @@ static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri, | 81 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
91 | uint64_t value) | 82 | negx = 0x8000800080008000ull; |
92 | { | 83 | } |
93 | CPUState *cs = env_cpu(env); | 84 | } |
94 | + int mask = vae2_tlbmask(env); | 85 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, |
95 | uint64_t pageaddr = sextract64(value << 12, 0, 56); | 86 | - env->vfp.fpcr & FPCR_FZ16); |
96 | - int bits = tlbbits_for_regime(env, ARMMMUIdx_E2, pageaddr); | 87 | + do_fmlal_idx(vd, vn, vm, env, desc, FPST_A64, negx, negf); |
97 | + int bits = vae2_tlbbits(env, pageaddr); | ||
98 | |||
99 | - tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, | ||
100 | - ARMMMUIdxBit_E2, bits); | ||
101 | + tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits); | ||
102 | } | 88 | } |
103 | 89 | ||
104 | static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri, | 90 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, |
105 | @@ -XXX,XX +XXX,XX @@ static void tlbi_aa64_rvae1is_write(CPUARMState *env, | ||
106 | do_rvae_write(env, value, vae1_tlbmask(env), true); | ||
107 | } | ||
108 | |||
109 | -static int vae2_tlbmask(CPUARMState *env) | ||
110 | -{ | ||
111 | - return ARMMMUIdxBit_E2; | ||
112 | -} | ||
113 | - | ||
114 | static void tlbi_aa64_rvae2_write(CPUARMState *env, | ||
115 | const ARMCPRegInfo *ri, | ||
116 | uint64_t value) | ||
117 | -- | 91 | -- |
118 | 2.34.1 | 92 | 2.34.1 | diff view generated by jsdifflib |