1 | The following changes since commit 6d940eff4734bcb40b1a25f62d7cec5a396f994a: | 1 | Hi; this pullreq contains only my FEAT_AFP/FEAT_RPRES patches |
---|---|---|---|
2 | (plus a fix for a target/alpha latent bug that would otherwise | ||
3 | be revealed by the fpu changes), because 68 patches is already | ||
4 | longer than I prefer to send in at one time... | ||
2 | 5 | ||
3 | Merge tag 'pull-tpm-2022-06-07-1' of https://github.com/stefanberger/qemu-tpm into staging (2022-06-07 19:22:18 -0700) | 6 | thanks |
7 | -- PMM | ||
8 | |||
9 | The following changes since commit ffaf7f0376f8040ce9068d71ae9ae8722505c42e: | ||
10 | |||
11 | Merge tag 'pull-10.0-testing-and-gdstub-updates-100225-1' of https://gitlab.com/stsquad/qemu into staging (2025-02-10 13:26:17 -0500) | ||
4 | 12 | ||
5 | are available in the Git repository at: | 13 | are available in the Git repository at: |
6 | 14 | ||
7 | https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20220609 | 15 | https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20250211 |
8 | 16 | ||
9 | for you to fetch changes up to 414c54d515dba16bfaef643a8acec200c05f229a: | 17 | for you to fetch changes up to ca4c34e07d1388df8e396520b5e7d60883cd3690: |
10 | 18 | ||
11 | target/arm: Add ID_AA64SMFR0_EL1 (2022-06-08 19:38:59 +0100) | 19 | target/arm: Sink fp_status and fpcr access into do_fmlal* (2025-02-11 16:22:08 +0000) |
12 | 20 | ||
13 | ---------------------------------------------------------------- | 21 | ---------------------------------------------------------------- |
14 | target-arm queue: | 22 | target-arm queue: |
15 | * target/arm: Declare support for FEAT_RASv1p1 | 23 | * target/alpha: Don't corrupt error_code with unknown softfloat flags |
16 | * target/arm: Implement FEAT_DoubleFault | 24 | * target/arm: Implement FEAT_AFP and FEAT_RPRES |
17 | * Fix 'writeable' typos | ||
18 | * xlnx_dp: Implement vblank interrupt | ||
19 | * target/arm: Move page-table-walk code to ptw.c | ||
20 | * target/arm: Preparatory patches for SME support | ||
21 | 25 | ||
22 | ---------------------------------------------------------------- | 26 | ---------------------------------------------------------------- |
23 | Frederic Konrad (2): | 27 | Peter Maydell (49): |
24 | xlnx_dp: fix the wrong register size | 28 | target/alpha: Don't corrupt error_code with unknown softfloat flags |
25 | xlnx-zynqmp: fix the irq mapping for the display port and its dma | 29 | fpu: Add float_class_denormal |
30 | fpu: Implement float_flag_input_denormal_used | ||
31 | fpu: allow flushing of output denormals to be after rounding | ||
32 | target/arm: Define FPCR AH, FIZ, NEP bits | ||
33 | target/arm: Implement FPCR.FIZ handling | ||
34 | target/arm: Adjust FP behaviour for FPCR.AH = 1 | ||
35 | target/arm: Adjust exception flag handling for AH = 1 | ||
36 | target/arm: Add FPCR.AH to tbflags | ||
37 | target/arm: Set up float_status to use for FPCR.AH=1 behaviour | ||
38 | target/arm: Use FPST_FPCR_AH for FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS | ||
39 | target/arm: Use FPST_FPCR_AH for BFCVT* insns | ||
40 | target/arm: Use FPST_FPCR_AH for BFMLAL*, BFMLSL* insns | ||
41 | target/arm: Add FPCR.NEP to TBFLAGS | ||
42 | target/arm: Define and use new write_fp_*reg_merging() functions | ||
43 | target/arm: Handle FPCR.NEP for 3-input scalar operations | ||
44 | target/arm: Handle FPCR.NEP for BFCVT scalar | ||
45 | target/arm: Handle FPCR.NEP for 1-input scalar operations | ||
46 | target/arm: Handle FPCR.NEP in do_cvtf_scalar() | ||
47 | target/arm: Handle FPCR.NEP for scalar FABS and FNEG | ||
48 | target/arm: Handle FPCR.NEP for FCVTXN (scalar) | ||
49 | target/arm: Handle FPCR.NEP for NEP for FMUL, FMULX scalar by element | ||
50 | target/arm: Implement FPCR.AH semantics for scalar FMIN/FMAX | ||
51 | target/arm: Implement FPCR.AH semantics for vector FMIN/FMAX | ||
52 | target/arm: Implement FPCR.AH semantics for FMAXV and FMINV | ||
53 | target/arm: Implement FPCR.AH semantics for FMINP and FMAXP | ||
54 | target/arm: Implement FPCR.AH semantics for SVE FMAXV and FMINV | ||
55 | target/arm: Implement FPCR.AH semantics for SVE FMIN/FMAX immediate | ||
56 | target/arm: Implement FPCR.AH semantics for SVE FMIN/FMAX vector | ||
57 | target/arm: Implement FPCR.AH handling of negation of NaN | ||
58 | target/arm: Implement FPCR.AH handling for scalar FABS and FABD | ||
59 | target/arm: Handle FPCR.AH in vector FABD | ||
60 | target/arm: Handle FPCR.AH in SVE FNEG | ||
61 | target/arm: Handle FPCR.AH in SVE FABS | ||
62 | target/arm: Handle FPCR.AH in SVE FABD | ||
63 | target/arm: Handle FPCR.AH in negation steps in SVE FCADD | ||
64 | target/arm: Handle FPCR.AH in negation steps in FCADD | ||
65 | target/arm: Handle FPCR.AH in FRECPS and FRSQRTS scalar insns | ||
66 | target/arm: Handle FPCR.AH in FRECPS and FRSQRTS vector insns | ||
67 | target/arm: Handle FPCR.AH in negation step in FMLS (indexed) | ||
68 | target/arm: Handle FPCR.AH in negation in FMLS (vector) | ||
69 | target/arm: Handle FPCR.AH in negation step in SVE FMLS (vector) | ||
70 | target/arm: Handle FPCR.AH in SVE FTSSEL | ||
71 | target/arm: Handle FPCR.AH in SVE FTMAD | ||
72 | target/arm: Enable FEAT_AFP for '-cpu max' | ||
73 | target/arm: Plumb FEAT_RPRES frecpe and frsqrte through to new helper | ||
74 | target/arm: Implement increased precision FRECPE | ||
75 | target/arm: Implement increased precision FRSQRTE | ||
76 | target/arm: Enable FEAT_RPRES for -cpu max | ||
26 | 77 | ||
27 | Peter Maydell (3): | 78 | Richard Henderson (19): |
28 | target/arm: Declare support for FEAT_RASv1p1 | 79 | target/arm: Handle FPCR.AH in vector FCMLA |
29 | target/arm: Implement FEAT_DoubleFault | 80 | target/arm: Handle FPCR.AH in FCMLA by index |
30 | Fix 'writeable' typos | 81 | target/arm: Handle FPCR.AH in SVE FCMLA |
82 | target/arm: Handle FPCR.AH in FMLSL (by element and vector) | ||
83 | target/arm: Handle FPCR.AH in SVE FMLSL (indexed) | ||
84 | target/arm: Handle FPCR.AH in SVE FMLSLB, FMLSLT (vectors) | ||
85 | target/arm: Introduce CPUARMState.vfp.fp_status[] | ||
86 | target/arm: Remove standard_fp_status_f16 | ||
87 | target/arm: Remove standard_fp_status | ||
88 | target/arm: Remove ah_fp_status_f16 | ||
89 | target/arm: Remove ah_fp_status | ||
90 | target/arm: Remove fp_status_f16_a64 | ||
91 | target/arm: Remove fp_status_f16_a32 | ||
92 | target/arm: Remove fp_status_a64 | ||
93 | target/arm: Remove fp_status_a32 | ||
94 | target/arm: Simplify fp_status indexing in mve_helper.c | ||
95 | target/arm: Simplify DO_VFP_cmp in vfp_helper.c | ||
96 | target/arm: Read fz16 from env->vfp.fpcr | ||
97 | target/arm: Sink fp_status and fpcr access into do_fmlal* | ||
31 | 98 | ||
32 | Richard Henderson (48): | 99 | docs/system/arm/emulation.rst | 2 + |
33 | target/arm: Move stage_1_mmu_idx decl to internals.h | 100 | include/fpu/softfloat-helpers.h | 11 + |
34 | target/arm: Move get_phys_addr to ptw.c | 101 | include/fpu/softfloat-types.h | 25 ++ |
35 | target/arm: Move get_phys_addr_v5 to ptw.c | 102 | target/arm/cpu-features.h | 10 + |
36 | target/arm: Move get_phys_addr_v6 to ptw.c | 103 | target/arm/cpu.h | 97 +++-- |
37 | target/arm: Move get_phys_addr_pmsav5 to ptw.c | 104 | target/arm/helper.h | 26 ++ |
38 | target/arm: Move get_phys_addr_pmsav7_default to ptw.c | 105 | target/arm/internals.h | 6 + |
39 | target/arm: Move get_phys_addr_pmsav7 to ptw.c | 106 | target/arm/tcg/helper-a64.h | 13 + |
40 | target/arm: Move get_phys_addr_pmsav8 to ptw.c | 107 | target/arm/tcg/helper-sve.h | 120 ++++++ |
41 | target/arm: Move pmsav8_mpu_lookup to ptw.c | 108 | target/arm/tcg/translate-a64.h | 13 + |
42 | target/arm: Move pmsav7_use_background_region to ptw.c | 109 | target/arm/tcg/translate.h | 54 +-- |
43 | target/arm: Move v8m_security_lookup to ptw.c | 110 | target/arm/tcg/vec_internal.h | 35 ++ |
44 | target/arm: Move m_is_{ppb,system}_region to ptw.c | 111 | target/mips/fpu_helper.h | 6 + |
45 | target/arm: Move get_level1_table_address to ptw.c | 112 | fpu/softfloat.c | 66 +++- |
46 | target/arm: Move combine_cacheattrs and subroutines to ptw.c | 113 | target/alpha/cpu.c | 7 + |
47 | target/arm: Move get_phys_addr_lpae to ptw.c | 114 | target/alpha/fpu_helper.c | 2 + |
48 | target/arm: Move arm_{ldl,ldq}_ptw to ptw.c | 115 | target/arm/cpu.c | 46 +-- |
49 | target/arm: Move {arm_s1_, }regime_using_lpae_format to tlb_helper.c | 116 | target/arm/helper.c | 2 +- |
50 | target/arm: Move arm_pamax, pamax_map into ptw.c | 117 | target/arm/tcg/cpu64.c | 2 + |
51 | target/arm: Move get_S1prot, get_S2prot to ptw.c | 118 | target/arm/tcg/helper-a64.c | 151 ++++---- |
52 | target/arm: Move check_s2_mmu_setup to ptw.c | 119 | target/arm/tcg/hflags.c | 13 + |
53 | target/arm: Move aa32_va_parameters to ptw.c | 120 | target/arm/tcg/mve_helper.c | 44 +-- |
54 | target/arm: Move ap_to_tw_prot etc to ptw.c | 121 | target/arm/tcg/sme_helper.c | 4 +- |
55 | target/arm: Move regime_is_user to ptw.c | 122 | target/arm/tcg/sve_helper.c | 367 ++++++++++++++----- |
56 | target/arm: Move regime_ttbr to ptw.c | 123 | target/arm/tcg/translate-a64.c | 782 ++++++++++++++++++++++++++++++++-------- |
57 | target/arm: Move regime_translation_disabled to ptw.c | 124 | target/arm/tcg/translate-sve.c | 193 +++++++--- |
58 | target/arm: Move arm_cpu_get_phys_page_attrs_debug to ptw.c | 125 | target/arm/tcg/vec_helper.c | 387 ++++++++++++++------ |
59 | target/arm: Move stage_1_mmu_idx, arm_stage1_mmu_idx to ptw.c | 126 | target/arm/vfp_helper.c | 374 +++++++++++++++---- |
60 | target/arm: Pass CPUARMState to arm_ld[lq]_ptw | 127 | target/hppa/fpu_helper.c | 11 + |
61 | target/arm: Rename TBFLAG_A64 ZCR_LEN to VL | 128 | target/i386/tcg/fpu_helper.c | 8 + |
62 | linux-user/aarch64: Introduce sve_vq | 129 | target/mips/msa.c | 9 + |
63 | target/arm: Remove route_to_el2 check from sve_exception_el | 130 | target/ppc/cpu_init.c | 3 + |
64 | target/arm: Remove fp checks from sve_exception_el | 131 | target/rx/cpu.c | 8 + |
65 | target/arm: Add el_is_in_host | 132 | target/sh4/cpu.c | 8 + |
66 | target/arm: Use el_is_in_host for sve_zcr_len_for_el | 133 | target/tricore/helper.c | 1 + |
67 | target/arm: Use el_is_in_host for sve_exception_el | 134 | tests/fp/fp-bench.c | 1 + |
68 | target/arm: Hoist arm_is_el2_enabled check in sve_exception_el | 135 | fpu/softfloat-parts.c.inc | 127 +++++-- |
69 | target/arm: Do not use aarch64_sve_zcr_get_valid_len in reset | 136 | 37 files changed, 2325 insertions(+), 709 deletions(-) |
70 | target/arm: Merge aarch64_sve_zcr_get_valid_len into caller | ||
71 | target/arm: Use uint32_t instead of bitmap for sve vq's | ||
72 | target/arm: Rename sve_zcr_len_for_el to sve_vqm1_for_el | ||
73 | target/arm: Split out load/store primitives to sve_ldst_internal.h | ||
74 | target/arm: Export sve contiguous ldst support functions | ||
75 | target/arm: Move expand_pred_b to vec_internal.h | ||
76 | target/arm: Use expand_pred_b in mve_helper.c | ||
77 | target/arm: Move expand_pred_h to vec_internal.h | ||
78 | target/arm: Export bfdotadd from vec_helper.c | ||
79 | target/arm: Add isar_feature_aa64_sme | ||
80 | target/arm: Add ID_AA64SMFR0_EL1 | ||
81 | |||
82 | Sai Pavan Boddu (2): | ||
83 | xlnx_dp: Introduce a vblank signal | ||
84 | xlnx_dp: Fix the interrupt disable logic | ||
85 | |||
86 | docs/interop/vhost-user.rst | 2 +- | ||
87 | docs/specs/vmgenid.txt | 4 +- | ||
88 | docs/system/arm/emulation.rst | 2 + | ||
89 | hw/scsi/mfi.h | 2 +- | ||
90 | include/hw/display/xlnx_dp.h | 12 +- | ||
91 | linux-user/aarch64/target_prctl.h | 20 +- | ||
92 | target/arm/cpu.h | 66 +- | ||
93 | target/arm/internals.h | 45 +- | ||
94 | target/arm/kvm_arm.h | 7 +- | ||
95 | target/arm/sve_ldst_internal.h | 221 +++ | ||
96 | target/arm/translate-a64.h | 2 +- | ||
97 | target/arm/translate.h | 2 +- | ||
98 | target/arm/vec_internal.h | 28 +- | ||
99 | target/i386/hvf/vmcs.h | 2 +- | ||
100 | target/i386/hvf/vmx.h | 2 +- | ||
101 | accel/hvf/hvf-accel-ops.c | 4 +- | ||
102 | accel/kvm/kvm-all.c | 4 +- | ||
103 | accel/tcg/user-exec.c | 6 +- | ||
104 | hw/acpi/ghes.c | 2 +- | ||
105 | hw/arm/xlnx-zynqmp.c | 4 +- | ||
106 | hw/display/xlnx_dp.c | 49 +- | ||
107 | hw/intc/arm_gicv3_cpuif.c | 2 +- | ||
108 | hw/intc/arm_gicv3_dist.c | 2 +- | ||
109 | hw/intc/arm_gicv3_redist.c | 4 +- | ||
110 | hw/intc/riscv_aclint.c | 2 +- | ||
111 | hw/intc/riscv_aplic.c | 2 +- | ||
112 | hw/pci/shpc.c | 2 +- | ||
113 | hw/sparc64/sun4u_iommu.c | 2 +- | ||
114 | hw/timer/sse-timer.c | 2 +- | ||
115 | linux-user/aarch64/signal.c | 4 +- | ||
116 | target/arm/arch_dump.c | 2 +- | ||
117 | target/arm/cpu.c | 5 +- | ||
118 | target/arm/cpu64.c | 120 +- | ||
119 | target/arm/gdbstub.c | 2 +- | ||
120 | target/arm/gdbstub64.c | 2 +- | ||
121 | target/arm/helper.c | 2742 ++----------------------------------- | ||
122 | target/arm/hvf/hvf.c | 4 +- | ||
123 | target/arm/kvm64.c | 47 +- | ||
124 | target/arm/mve_helper.c | 6 +- | ||
125 | target/arm/ptw.c | 2540 ++++++++++++++++++++++++++++++++++ | ||
126 | target/arm/sve_helper.c | 232 +--- | ||
127 | target/arm/tlb_helper.c | 26 + | ||
128 | target/arm/translate-a64.c | 2 +- | ||
129 | target/arm/translate-sve.c | 2 +- | ||
130 | target/arm/vec_helper.c | 28 +- | ||
131 | target/i386/cpu-sysemu.c | 2 +- | ||
132 | target/s390x/ioinst.c | 2 +- | ||
133 | python/qemu/machine/machine.py | 2 +- | ||
134 | target/arm/meson.build | 1 + | ||
135 | tests/tcg/x86_64/system/boot.S | 2 +- | ||
136 | 50 files changed, 3240 insertions(+), 3037 deletions(-) | ||
137 | create mode 100644 target/arm/sve_ldst_internal.h | ||
138 | create mode 100644 target/arm/ptw.c | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | In do_cvttq() we set env->error_code with what is supposed to be a | ||
2 | set of FPCR exception bit values. However, if the set of float | ||
3 | exception flags we get back from softfloat for the conversion | ||
4 | includes a flag which is not one of the three we expect here | ||
5 | (invalid_cvti, invalid, inexact) then we will fall through the | ||
6 | if-ladder and set env->error_code to the unconverted softfloat | ||
7 | exception_flag value. This will then cause us to take a spurious | ||
8 | exception. | ||
1 | 9 | ||
10 | This is harmless now, but when we add new floating point exception | ||
11 | flags to softfloat it will cause problems. Add an else clause to the | ||
12 | if-ladder to make it ignore any float exception flags it doesn't care | ||
13 | about. | ||
14 | |||
15 | Specifically, without this fix, 'make check-tcg' will fail for Alpha | ||
16 | when the commit adding float_flag_input_denormal_used lands. | ||
17 | |||
18 | |||
19 | Fixes: aa3bad5b59e7 ("target/alpha: Use float64_to_int64_modulo for CVTTQ") | ||
20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
21 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
22 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
23 | --- | ||
24 | target/alpha/fpu_helper.c | 2 ++ | ||
25 | 1 file changed, 2 insertions(+) | ||
26 | |||
27 | diff --git a/target/alpha/fpu_helper.c b/target/alpha/fpu_helper.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/alpha/fpu_helper.c | ||
30 | +++ b/target/alpha/fpu_helper.c | ||
31 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_cvttq(CPUAlphaState *env, uint64_t a, int roundmode) | ||
32 | exc = FPCR_INV; | ||
33 | } else if (exc & float_flag_inexact) { | ||
34 | exc = FPCR_INE; | ||
35 | + } else { | ||
36 | + exc = 0; | ||
37 | } | ||
38 | } | ||
39 | env->error_code = exc; | ||
40 | -- | ||
41 | 2.34.1 | ||
42 | |||
43 | diff view generated by jsdifflib |
1 | From: Sai Pavan Boddu <sai.pavan.boddu@xilinx.com> | 1 | Currently in softfloat we canonicalize input denormals and so the |
---|---|---|---|
2 | code that implements floating point operations does not need to care | ||
3 | whether the input value was originally normal or denormal. However, | ||
4 | both x86 and Arm FEAT_AFP require that an exception flag is set if: | ||
5 | * an input is denormal | ||
6 | * that input is not squashed to zero | ||
7 | * that input is actually used in the calculation (e.g. we | ||
8 | did not find the other input was a NaN) | ||
2 | 9 | ||
3 | Add a periodic timer which raises vblank at a frequency of 30Hz. | 10 | So we need to track that the input was a non-squashed denormal. To |
11 | do this we add a new value to the FloatClass enum. In this commit we | ||
12 | add the value and adjust the code everywhere that looks at FloatClass | ||
13 | values so that the new float_class_denormal behaves identically to | ||
14 | float_class_normal. We will add the code that does the "raise a new | ||
15 | float exception flag if an input was an unsquashed denormal and we | ||
16 | used it" in a subsequent commit. | ||
4 | 17 | ||
5 | Note that this is a migration compatibility break for the | 18 | There should be no behavioural change in this commit. |
6 | xlnx-zcu102 board type. | ||
7 | 19 | ||
8 | Signed-off-by: Sai Pavan Boddu <saipava@xilinx.com> | ||
9 | Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com> | ||
10 | Signed-off-by: Frederic Konrad <fkonrad@amd.com> | ||
11 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
12 | Message-id: 20220601172353.3220232-3-fkonrad@xilinx.com | ||
13 | Changes by fkonrad: | ||
14 | - Switched to transaction-based ptimer API. | ||
15 | - Added the DP_INT_VBLNK_START macro. | ||
16 | Signed-off-by: Frederic Konrad <fkonrad@amd.com> | ||
17 | [PMM: bump vmstate version, add commit message note about | ||
18 | compat break] | ||
19 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
21 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
21 | --- | 22 | --- |
22 | include/hw/display/xlnx_dp.h | 3 +++ | 23 | fpu/softfloat.c | 32 ++++++++++++++++++++++++++++--- |
23 | hw/display/xlnx_dp.c | 30 ++++++++++++++++++++++++++---- | 24 | fpu/softfloat-parts.c.inc | 40 ++++++++++++++++++++++++--------------- |
24 | 2 files changed, 29 insertions(+), 4 deletions(-) | 25 | 2 files changed, 54 insertions(+), 18 deletions(-) |
25 | 26 | ||
26 | diff --git a/include/hw/display/xlnx_dp.h b/include/hw/display/xlnx_dp.h | 27 | diff --git a/fpu/softfloat.c b/fpu/softfloat.c |
27 | index XXXXXXX..XXXXXXX 100644 | 28 | index XXXXXXX..XXXXXXX 100644 |
28 | --- a/include/hw/display/xlnx_dp.h | 29 | --- a/fpu/softfloat.c |
29 | +++ b/include/hw/display/xlnx_dp.h | 30 | +++ b/fpu/softfloat.c |
30 | @@ -XXX,XX +XXX,XX @@ | 31 | @@ -XXX,XX +XXX,XX @@ float64_gen2(float64 xa, float64 xb, float_status *s, |
31 | #include "hw/dma/xlnx_dpdma.h" | 32 | /* |
32 | #include "audio/audio.h" | 33 | * Classify a floating point number. Everything above float_class_qnan |
33 | #include "qom/object.h" | 34 | * is a NaN so cls >= float_class_qnan is any NaN. |
34 | +#include "hw/ptimer.h" | 35 | + * |
35 | 36 | + * Note that we canonicalize denormals, so most code should treat | |
36 | #define AUD_CHBUF_MAX_DEPTH (32 * KiB) | 37 | + * class_normal and class_denormal identically. |
37 | #define MAX_QEMU_BUFFER_SIZE (4 * KiB) | 38 | */ |
38 | @@ -XXX,XX +XXX,XX @@ struct XlnxDPState { | 39 | |
39 | */ | 40 | typedef enum __attribute__ ((__packed__)) { |
40 | DPCDState *dpcd; | 41 | float_class_unclassified, |
41 | I2CDDCState *edid; | 42 | float_class_zero, |
42 | + | 43 | float_class_normal, |
43 | + ptimer_state *vblank; | 44 | + float_class_denormal, /* input was a non-squashed denormal */ |
45 | float_class_inf, | ||
46 | float_class_qnan, /* all NaNs from here */ | ||
47 | float_class_snan, | ||
48 | @@ -XXX,XX +XXX,XX @@ typedef enum __attribute__ ((__packed__)) { | ||
49 | enum { | ||
50 | float_cmask_zero = float_cmask(float_class_zero), | ||
51 | float_cmask_normal = float_cmask(float_class_normal), | ||
52 | + float_cmask_denormal = float_cmask(float_class_denormal), | ||
53 | float_cmask_inf = float_cmask(float_class_inf), | ||
54 | float_cmask_qnan = float_cmask(float_class_qnan), | ||
55 | float_cmask_snan = float_cmask(float_class_snan), | ||
56 | |||
57 | float_cmask_infzero = float_cmask_zero | float_cmask_inf, | ||
58 | float_cmask_anynan = float_cmask_qnan | float_cmask_snan, | ||
59 | + float_cmask_anynorm = float_cmask_normal | float_cmask_denormal, | ||
44 | }; | 60 | }; |
45 | 61 | ||
46 | #define TYPE_XLNX_DP "xlnx.v-dp" | 62 | /* Flags for parts_minmax. */ |
47 | diff --git a/hw/display/xlnx_dp.c b/hw/display/xlnx_dp.c | 63 | @@ -XXX,XX +XXX,XX @@ static inline __attribute__((unused)) bool is_qnan(FloatClass c) |
48 | index XXXXXXX..XXXXXXX 100644 | 64 | return c == float_class_qnan; |
49 | --- a/hw/display/xlnx_dp.c | ||
50 | +++ b/hw/display/xlnx_dp.c | ||
51 | @@ -XXX,XX +XXX,XX @@ | ||
52 | #define DP_TX_N_AUD (0x032C >> 2) | ||
53 | #define DP_TX_AUDIO_EXT_DATA(n) ((0x0330 + 4 * n) >> 2) | ||
54 | #define DP_INT_STATUS (0x03A0 >> 2) | ||
55 | +#define DP_INT_VBLNK_START (1 << 13) | ||
56 | #define DP_INT_MASK (0x03A4 >> 2) | ||
57 | #define DP_INT_EN (0x03A8 >> 2) | ||
58 | #define DP_INT_DS (0x03AC >> 2) | ||
59 | @@ -XXX,XX +XXX,XX @@ typedef enum DPVideoFmt DPVideoFmt; | ||
60 | |||
61 | static const VMStateDescription vmstate_dp = { | ||
62 | .name = TYPE_XLNX_DP, | ||
63 | - .version_id = 1, | ||
64 | + .version_id = 2, | ||
65 | .fields = (VMStateField[]){ | ||
66 | VMSTATE_UINT32_ARRAY(core_registers, XlnxDPState, | ||
67 | DP_CORE_REG_ARRAY_SIZE), | ||
68 | @@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_dp = { | ||
69 | DP_VBLEND_REG_ARRAY_SIZE), | ||
70 | VMSTATE_UINT32_ARRAY(audio_registers, XlnxDPState, | ||
71 | DP_AUDIO_REG_ARRAY_SIZE), | ||
72 | + VMSTATE_PTIMER(vblank, XlnxDPState), | ||
73 | VMSTATE_END_OF_LIST() | ||
74 | } | ||
75 | }; | ||
76 | |||
77 | +#define DP_VBLANK_PTIMER_POLICY (PTIMER_POLICY_WRAP_AFTER_ONE_PERIOD | \ | ||
78 | + PTIMER_POLICY_CONTINUOUS_TRIGGER | \ | ||
79 | + PTIMER_POLICY_NO_IMMEDIATE_TRIGGER) | ||
80 | + | ||
81 | static void xlnx_dp_update_irq(XlnxDPState *s); | ||
82 | |||
83 | static uint64_t xlnx_dp_audio_read(void *opaque, hwaddr offset, unsigned size) | ||
84 | @@ -XXX,XX +XXX,XX @@ static void xlnx_dp_write(void *opaque, hwaddr offset, uint64_t value, | ||
85 | break; | ||
86 | case DP_TRANSMITTER_ENABLE: | ||
87 | s->core_registers[offset] = value & 0x01; | ||
88 | + ptimer_transaction_begin(s->vblank); | ||
89 | + if (value & 0x1) { | ||
90 | + ptimer_run(s->vblank, 0); | ||
91 | + } else { | ||
92 | + ptimer_stop(s->vblank); | ||
93 | + } | ||
94 | + ptimer_transaction_commit(s->vblank); | ||
95 | break; | ||
96 | case DP_FORCE_SCRAMBLER_RESET: | ||
97 | /* | ||
98 | @@ -XXX,XX +XXX,XX @@ static void xlnx_dp_update_display(void *opaque) | ||
99 | return; | ||
100 | } | ||
101 | |||
102 | - s->core_registers[DP_INT_STATUS] |= (1 << 13); | ||
103 | - xlnx_dp_update_irq(s); | ||
104 | - | ||
105 | xlnx_dpdma_trigger_vsync_irq(s->dpdma); | ||
106 | |||
107 | /* | ||
108 | @@ -XXX,XX +XXX,XX @@ static void xlnx_dp_finalize(Object *obj) | ||
109 | fifo8_destroy(&s->rx_fifo); | ||
110 | } | 65 | } |
111 | 66 | ||
112 | +static void vblank_hit(void *opaque) | 67 | +/* |
68 | + * Return true if the float_cmask has only normals in it | ||
69 | + * (including input denormals that were canonicalized) | ||
70 | + */ | ||
71 | +static inline bool cmask_is_only_normals(int cmask) | ||
113 | +{ | 72 | +{ |
114 | + XlnxDPState *s = XLNX_DP(opaque); | 73 | + return !(cmask & ~float_cmask_anynorm); |
115 | + | ||
116 | + s->core_registers[DP_INT_STATUS] |= DP_INT_VBLNK_START; | ||
117 | + xlnx_dp_update_irq(s); | ||
118 | +} | 74 | +} |
119 | + | 75 | + |
120 | static void xlnx_dp_realize(DeviceState *dev, Error **errp) | 76 | +static inline bool is_anynorm(FloatClass c) |
77 | +{ | ||
78 | + return float_cmask(c) & float_cmask_anynorm; | ||
79 | +} | ||
80 | + | ||
81 | /* | ||
82 | * Structure holding all of the decomposed parts of a float. | ||
83 | * The exponent is unbiased and the fraction is normalized. | ||
84 | @@ -XXX,XX +XXX,XX @@ static float64 float64r32_round_pack_canonical(FloatParts64 *p, | ||
85 | */ | ||
86 | switch (p->cls) { | ||
87 | case float_class_normal: | ||
88 | + case float_class_denormal: | ||
89 | if (unlikely(p->exp == 0)) { | ||
90 | /* | ||
91 | * The result is denormal for float32, but can be represented | ||
92 | @@ -XXX,XX +XXX,XX @@ static floatx80 floatx80_round_pack_canonical(FloatParts128 *p, | ||
93 | |||
94 | switch (p->cls) { | ||
95 | case float_class_normal: | ||
96 | + case float_class_denormal: | ||
97 | if (s->floatx80_rounding_precision == floatx80_precision_x) { | ||
98 | parts_uncanon_normal(p, s, fmt); | ||
99 | frac = p->frac_hi; | ||
100 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_ahp(FloatParts64 *a, float_status *s) | ||
101 | break; | ||
102 | |||
103 | case float_class_normal: | ||
104 | + case float_class_denormal: | ||
105 | case float_class_zero: | ||
106 | break; | ||
107 | |||
108 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b, | ||
109 | a->sign = b->sign; | ||
110 | a->exp = b->exp; | ||
111 | |||
112 | - if (a->cls == float_class_normal) { | ||
113 | + if (is_anynorm(a->cls)) { | ||
114 | frac_truncjam(a, b); | ||
115 | } else if (is_nan(a->cls)) { | ||
116 | /* Discard the low bits of the NaN. */ | ||
117 | @@ -XXX,XX +XXX,XX @@ static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode, | ||
118 | return int128_zero(); | ||
119 | |||
120 | case float_class_normal: | ||
121 | + case float_class_denormal: | ||
122 | if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) { | ||
123 | flags = float_flag_inexact; | ||
124 | } | ||
125 | @@ -XXX,XX +XXX,XX @@ static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode, | ||
126 | return int128_zero(); | ||
127 | |||
128 | case float_class_normal: | ||
129 | + case float_class_denormal: | ||
130 | if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) { | ||
131 | flags = float_flag_inexact; | ||
132 | if (p.cls == float_class_zero) { | ||
133 | @@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status) | ||
134 | float32_unpack_canonical(&xp, a, status); | ||
135 | if (unlikely(xp.cls != float_class_normal)) { | ||
136 | switch (xp.cls) { | ||
137 | + case float_class_denormal: | ||
138 | + break; | ||
139 | case float_class_snan: | ||
140 | case float_class_qnan: | ||
141 | parts_return_nan(&xp, status); | ||
142 | @@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status) | ||
143 | case float_class_zero: | ||
144 | return float32_one; | ||
145 | default: | ||
146 | - break; | ||
147 | + g_assert_not_reached(); | ||
148 | } | ||
149 | - g_assert_not_reached(); | ||
150 | } | ||
151 | |||
152 | float_raise(float_flag_inexact, status); | ||
153 | diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc | ||
154 | index XXXXXXX..XXXXXXX 100644 | ||
155 | --- a/fpu/softfloat-parts.c.inc | ||
156 | +++ b/fpu/softfloat-parts.c.inc | ||
157 | @@ -XXX,XX +XXX,XX @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status, | ||
158 | frac_clear(p); | ||
159 | } else { | ||
160 | int shift = frac_normalize(p); | ||
161 | - p->cls = float_class_normal; | ||
162 | + p->cls = float_class_denormal; | ||
163 | p->exp = fmt->frac_shift - fmt->exp_bias | ||
164 | - shift + !fmt->m68k_denormal; | ||
165 | } | ||
166 | @@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, | ||
167 | static void partsN(uncanon)(FloatPartsN *p, float_status *s, | ||
168 | const FloatFmt *fmt) | ||
121 | { | 169 | { |
122 | XlnxDPState *s = XLNX_DP(dev); | 170 | - if (likely(p->cls == float_class_normal)) { |
123 | @@ -XXX,XX +XXX,XX @@ static void xlnx_dp_realize(DeviceState *dev, Error **errp) | 171 | + if (likely(is_anynorm(p->cls))) { |
124 | &as); | 172 | parts_uncanon_normal(p, s, fmt); |
125 | AUD_set_volume_out(s->amixer_output_stream, 0, 255, 255); | 173 | } else { |
126 | xlnx_dp_audio_activate(s); | 174 | switch (p->cls) { |
127 | + s->vblank = ptimer_init(vblank_hit, s, DP_VBLANK_PTIMER_POLICY); | 175 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, |
128 | + ptimer_transaction_begin(s->vblank); | 176 | |
129 | + ptimer_set_freq(s->vblank, 30); | 177 | if (a->sign != b_sign) { |
130 | + ptimer_transaction_commit(s->vblank); | 178 | /* Subtraction */ |
131 | } | 179 | - if (likely(ab_mask == float_cmask_normal)) { |
132 | 180 | + if (likely(cmask_is_only_normals(ab_mask))) { | |
133 | static void xlnx_dp_reset(DeviceState *dev) | 181 | if (parts_sub_normal(a, b)) { |
182 | return a; | ||
183 | } | ||
184 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | ||
185 | } | ||
186 | } else { | ||
187 | /* Addition */ | ||
188 | - if (likely(ab_mask == float_cmask_normal)) { | ||
189 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
190 | parts_add_normal(a, b); | ||
191 | return a; | ||
192 | } | ||
193 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | ||
194 | } | ||
195 | |||
196 | if (b->cls == float_class_zero) { | ||
197 | - g_assert(a->cls == float_class_normal); | ||
198 | + g_assert(is_anynorm(a->cls)); | ||
199 | return a; | ||
200 | } | ||
201 | |||
202 | g_assert(a->cls == float_class_zero); | ||
203 | - g_assert(b->cls == float_class_normal); | ||
204 | + g_assert(is_anynorm(b->cls)); | ||
205 | return_b: | ||
206 | b->sign = b_sign; | ||
207 | return b; | ||
208 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, | ||
209 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
210 | bool sign = a->sign ^ b->sign; | ||
211 | |||
212 | - if (likely(ab_mask == float_cmask_normal)) { | ||
213 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
214 | FloatPartsW tmp; | ||
215 | |||
216 | frac_mulw(&tmp, a, b); | ||
217 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, | ||
218 | a->sign ^= 1; | ||
219 | } | ||
220 | |||
221 | - if (unlikely(ab_mask != float_cmask_normal)) { | ||
222 | + if (unlikely(!cmask_is_only_normals(ab_mask))) { | ||
223 | if (unlikely(ab_mask == float_cmask_infzero)) { | ||
224 | float_raise(float_flag_invalid | float_flag_invalid_imz, s); | ||
225 | goto d_nan; | ||
226 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, | ||
227 | } | ||
228 | |||
229 | g_assert(ab_mask & float_cmask_zero); | ||
230 | - if (c->cls == float_class_normal) { | ||
231 | + if (is_anynorm(c->cls)) { | ||
232 | *a = *c; | ||
233 | goto return_normal; | ||
234 | } | ||
235 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, | ||
236 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
237 | bool sign = a->sign ^ b->sign; | ||
238 | |||
239 | - if (likely(ab_mask == float_cmask_normal)) { | ||
240 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
241 | a->sign = sign; | ||
242 | a->exp -= b->exp + frac_div(a, b); | ||
243 | return a; | ||
244 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, | ||
245 | { | ||
246 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
247 | |||
248 | - if (likely(ab_mask == float_cmask_normal)) { | ||
249 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
250 | frac_modrem(a, b, mod_quot); | ||
251 | return a; | ||
252 | } | ||
253 | @@ -XXX,XX +XXX,XX @@ static void partsN(sqrt)(FloatPartsN *a, float_status *status, | ||
254 | |||
255 | if (unlikely(a->cls != float_class_normal)) { | ||
256 | switch (a->cls) { | ||
257 | + case float_class_denormal: | ||
258 | + break; | ||
259 | case float_class_snan: | ||
260 | case float_class_qnan: | ||
261 | parts_return_nan(a, status); | ||
262 | @@ -XXX,XX +XXX,XX @@ static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode, | ||
263 | case float_class_inf: | ||
264 | break; | ||
265 | case float_class_normal: | ||
266 | + case float_class_denormal: | ||
267 | if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) { | ||
268 | float_raise(float_flag_inexact, s); | ||
269 | } | ||
270 | @@ -XXX,XX +XXX,XX @@ static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode, | ||
271 | return 0; | ||
272 | |||
273 | case float_class_normal: | ||
274 | + case float_class_denormal: | ||
275 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | ||
276 | if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { | ||
277 | flags = float_flag_inexact; | ||
278 | @@ -XXX,XX +XXX,XX @@ static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode, | ||
279 | return 0; | ||
280 | |||
281 | case float_class_normal: | ||
282 | + case float_class_denormal: | ||
283 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | ||
284 | if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { | ||
285 | flags = float_flag_inexact; | ||
286 | @@ -XXX,XX +XXX,XX @@ static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p, | ||
287 | return 0; | ||
288 | |||
289 | case float_class_normal: | ||
290 | + case float_class_denormal: | ||
291 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | ||
292 | if (parts_round_to_int_normal(p, rmode, 0, N - 2)) { | ||
293 | flags = float_flag_inexact; | ||
294 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
295 | a_exp = a->exp; | ||
296 | b_exp = b->exp; | ||
297 | |||
298 | - if (unlikely(ab_mask != float_cmask_normal)) { | ||
299 | + if (unlikely(!cmask_is_only_normals(ab_mask))) { | ||
300 | switch (a->cls) { | ||
301 | case float_class_normal: | ||
302 | + case float_class_denormal: | ||
303 | break; | ||
304 | case float_class_inf: | ||
305 | a_exp = INT16_MAX; | ||
306 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
307 | } | ||
308 | switch (b->cls) { | ||
309 | case float_class_normal: | ||
310 | + case float_class_denormal: | ||
311 | break; | ||
312 | case float_class_inf: | ||
313 | b_exp = INT16_MAX; | ||
314 | @@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, | ||
315 | { | ||
316 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
317 | |||
318 | - if (likely(ab_mask == float_cmask_normal)) { | ||
319 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
320 | FloatRelation cmp; | ||
321 | |||
322 | if (a->sign != b->sign) { | ||
323 | @@ -XXX,XX +XXX,XX @@ static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) | ||
324 | case float_class_inf: | ||
325 | break; | ||
326 | case float_class_normal: | ||
327 | + case float_class_denormal: | ||
328 | a->exp += MIN(MAX(n, -0x10000), 0x10000); | ||
329 | break; | ||
330 | default: | ||
331 | @@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) | ||
332 | |||
333 | if (unlikely(a->cls != float_class_normal)) { | ||
334 | switch (a->cls) { | ||
335 | + case float_class_denormal: | ||
336 | + break; | ||
337 | case float_class_snan: | ||
338 | case float_class_qnan: | ||
339 | parts_return_nan(a, s); | ||
340 | @@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) | ||
341 | } | ||
342 | return; | ||
343 | default: | ||
344 | - break; | ||
345 | + g_assert_not_reached(); | ||
346 | } | ||
347 | - g_assert_not_reached(); | ||
348 | } | ||
349 | if (unlikely(a->sign)) { | ||
350 | goto d_nan; | ||
134 | -- | 351 | -- |
135 | 2.25.1 | 352 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | For the x86 and the Arm FEAT_AFP semantics, we need to be able to |
---|---|---|---|
2 | tell the target code that the FPU operation has used an input | ||
3 | denormal. Implement this; when it happens we set the new | ||
4 | float_flag_denormal_input_used. | ||
2 | 5 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Note that we only set this when an input denormal is actually used by |
4 | Message-id: 20220604040607.269301-16-richard.henderson@linaro.org | 7 | the operation: if the operation results in Invalid Operation or |
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Divide By Zero or the result is a NaN because some other input was a |
9 | NaN then we never needed to look at the input denormal and do not set | ||
10 | denormal_input_used. | ||
11 | |||
12 | We mostly do not need to adjust the hardfloat codepaths to deal with | ||
13 | this flag, because almost all hardfloat operations are already gated | ||
14 | on the input not being a denormal, and will fall back to softfloat | ||
15 | for a denormal input. The only exception is the comparison | ||
16 | operations, where we need to add the check for input denormals, which | ||
17 | must now fall back to softfloat where they did not before. | ||
18 | |||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 19 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
20 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | 21 | --- |
8 | target/arm/ptw.h | 10 ++ | 22 | include/fpu/softfloat-types.h | 7 ++++ |
9 | target/arm/helper.c | 416 +------------------------------------------- | 23 | fpu/softfloat.c | 38 +++++++++++++++++--- |
10 | target/arm/ptw.c | 411 +++++++++++++++++++++++++++++++++++++++++++ | 24 | fpu/softfloat-parts.c.inc | 68 ++++++++++++++++++++++++++++++++++- |
11 | 3 files changed, 429 insertions(+), 408 deletions(-) | 25 | 3 files changed, 107 insertions(+), 6 deletions(-) |
12 | 26 | ||
13 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | 27 | diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h |
14 | index XXXXXXX..XXXXXXX 100644 | 28 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/arm/ptw.h | 29 | --- a/include/fpu/softfloat-types.h |
16 | +++ b/target/arm/ptw.h | 30 | +++ b/include/fpu/softfloat-types.h |
17 | @@ -XXX,XX +XXX,XX @@ | 31 | @@ -XXX,XX +XXX,XX @@ enum { |
18 | 32 | float_flag_invalid_sqrt = 0x0800, /* sqrt(-x) */ | |
19 | #ifndef CONFIG_USER_ONLY | 33 | float_flag_invalid_cvti = 0x1000, /* non-nan to integer */ |
20 | 34 | float_flag_invalid_snan = 0x2000, /* any operand was snan */ | |
21 | +extern const uint8_t pamax_map[7]; | 35 | + /* |
22 | + | 36 | + * An input was denormal and we used it (without flushing it to zero). |
23 | uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure, | 37 | + * Not set if we do not actually use the denormal input (e.g. |
24 | ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi); | 38 | + * because some other input was a NaN, or because the operation |
25 | uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure, | 39 | + * wasn't actually carried out (divide-by-zero; invalid)) |
26 | @@ -XXX,XX +XXX,XX @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) | 40 | + */ |
27 | return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx)); | 41 | + float_flag_input_denormal_used = 0x4000, |
42 | }; | ||
43 | |||
44 | /* | ||
45 | diff --git a/fpu/softfloat.c b/fpu/softfloat.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/fpu/softfloat.c | ||
48 | +++ b/fpu/softfloat.c | ||
49 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_ahp(FloatParts64 *a, float_status *s) | ||
50 | float16_params_ahp.frac_size + 1); | ||
51 | break; | ||
52 | |||
53 | - case float_class_normal: | ||
54 | case float_class_denormal: | ||
55 | + float_raise(float_flag_input_denormal_used, s); | ||
56 | + break; | ||
57 | + case float_class_normal: | ||
58 | case float_class_zero: | ||
59 | break; | ||
60 | |||
61 | @@ -XXX,XX +XXX,XX @@ static void parts64_float_to_float(FloatParts64 *a, float_status *s) | ||
62 | if (is_nan(a->cls)) { | ||
63 | parts_return_nan(a, s); | ||
64 | } | ||
65 | + if (a->cls == float_class_denormal) { | ||
66 | + float_raise(float_flag_input_denormal_used, s); | ||
67 | + } | ||
28 | } | 68 | } |
29 | 69 | ||
30 | +ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va, | 70 | static void parts128_float_to_float(FloatParts128 *a, float_status *s) |
31 | + ARMMMUIdx mmu_idx); | 71 | @@ -XXX,XX +XXX,XX @@ static void parts128_float_to_float(FloatParts128 *a, float_status *s) |
32 | +bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level, | 72 | if (is_nan(a->cls)) { |
33 | + int inputsize, int stride, int outputsize); | 73 | parts_return_nan(a, s); |
34 | +int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0); | 74 | } |
35 | +int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, | 75 | + if (a->cls == float_class_denormal) { |
36 | + int ap, int ns, int xn, int pxn); | 76 | + float_raise(float_flag_input_denormal_used, s); |
37 | + | 77 | + } |
38 | bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, | 78 | } |
39 | MMUAccessType access_type, ARMMMUIdx mmu_idx, | 79 | |
40 | bool s1_is_el0, | 80 | #define parts_float_to_float(P, S) \ |
41 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 81 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b, |
82 | a->sign = b->sign; | ||
83 | a->exp = b->exp; | ||
84 | |||
85 | - if (is_anynorm(a->cls)) { | ||
86 | + switch (a->cls) { | ||
87 | + case float_class_denormal: | ||
88 | + float_raise(float_flag_input_denormal_used, s); | ||
89 | + /* fall through */ | ||
90 | + case float_class_normal: | ||
91 | frac_truncjam(a, b); | ||
92 | - } else if (is_nan(a->cls)) { | ||
93 | + break; | ||
94 | + case float_class_snan: | ||
95 | + case float_class_qnan: | ||
96 | /* Discard the low bits of the NaN. */ | ||
97 | a->frac = b->frac_hi; | ||
98 | parts_return_nan(a, s); | ||
99 | + break; | ||
100 | + default: | ||
101 | + break; | ||
102 | } | ||
103 | } | ||
104 | |||
105 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b, | ||
106 | if (is_nan(a->cls)) { | ||
107 | parts_return_nan(a, s); | ||
108 | } | ||
109 | + if (a->cls == float_class_denormal) { | ||
110 | + float_raise(float_flag_input_denormal_used, s); | ||
111 | + } | ||
112 | } | ||
113 | |||
114 | float32 float16_to_float32(float16 a, bool ieee, float_status *s) | ||
115 | @@ -XXX,XX +XXX,XX @@ float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet) | ||
116 | goto soft; | ||
117 | } | ||
118 | |||
119 | - float32_input_flush2(&ua.s, &ub.s, s); | ||
120 | + if (unlikely(float32_is_denormal(ua.s) || float32_is_denormal(ub.s))) { | ||
121 | + /* We may need to set the input_denormal_used flag */ | ||
122 | + goto soft; | ||
123 | + } | ||
124 | + | ||
125 | if (isgreaterequal(ua.h, ub.h)) { | ||
126 | if (isgreater(ua.h, ub.h)) { | ||
127 | return float_relation_greater; | ||
128 | @@ -XXX,XX +XXX,XX @@ float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet) | ||
129 | goto soft; | ||
130 | } | ||
131 | |||
132 | - float64_input_flush2(&ua.s, &ub.s, s); | ||
133 | + if (unlikely(float64_is_denormal(ua.s) || float64_is_denormal(ub.s))) { | ||
134 | + /* We may need to set the input_denormal_used flag */ | ||
135 | + goto soft; | ||
136 | + } | ||
137 | + | ||
138 | if (isgreaterequal(ua.h, ub.h)) { | ||
139 | if (isgreater(ua.h, ub.h)) { | ||
140 | return float_relation_greater; | ||
141 | diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc | ||
42 | index XXXXXXX..XXXXXXX 100644 | 142 | index XXXXXXX..XXXXXXX 100644 |
43 | --- a/target/arm/helper.c | 143 | --- a/fpu/softfloat-parts.c.inc |
44 | +++ b/target/arm/helper.c | 144 | +++ b/fpu/softfloat-parts.c.inc |
45 | @@ -XXX,XX +XXX,XX @@ int simple_ap_to_rw_prot_is_user(int ap, bool is_user) | 145 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, |
46 | * @xn: XN (execute-never) bits | 146 | bool b_sign = b->sign ^ subtract; |
47 | * @s1_is_el0: true if this is S2 of an S1+2 walk for EL0 | 147 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); |
48 | */ | 148 | |
49 | -static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0) | 149 | + /* |
50 | +int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0) | 150 | + * For addition and subtraction, we will consume an |
51 | { | 151 | + * input denormal unless the other input is a NaN. |
52 | int prot = 0; | 152 | + */ |
53 | 153 | + if ((ab_mask & (float_cmask_denormal | float_cmask_anynan)) == | |
54 | @@ -XXX,XX +XXX,XX @@ static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0) | 154 | + float_cmask_denormal) { |
55 | * @xn: XN (execute-never) bit | 155 | + float_raise(float_flag_input_denormal_used, s); |
56 | * @pxn: PXN (privileged execute-never) bit | 156 | + } |
57 | */ | 157 | + |
58 | -static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, | 158 | if (a->sign != b_sign) { |
59 | - int ap, int ns, int xn, int pxn) | 159 | /* Subtraction */ |
60 | +int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, | 160 | if (likely(cmask_is_only_normals(ab_mask))) { |
61 | + int ap, int ns, int xn, int pxn) | 161 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, |
62 | { | 162 | if (likely(cmask_is_only_normals(ab_mask))) { |
63 | bool is_user = regime_is_user(env, mmu_idx); | 163 | FloatPartsW tmp; |
64 | int prot_rw, user_rw; | 164 | |
65 | @@ -XXX,XX +XXX,XX @@ uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure, | 165 | + if (ab_mask & float_cmask_denormal) { |
66 | * Returns true if the suggested S2 translation parameters are OK and | 166 | + float_raise(float_flag_input_denormal_used, s); |
67 | * false otherwise. | ||
68 | */ | ||
69 | -static bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level, | ||
70 | - int inputsize, int stride, int outputsize) | ||
71 | +bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level, | ||
72 | + int inputsize, int stride, int outputsize) | ||
73 | { | ||
74 | const int grainsize = stride + 3; | ||
75 | int startsizecheck; | ||
76 | @@ -XXX,XX +XXX,XX @@ static bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level, | ||
77 | #endif /* !CONFIG_USER_ONLY */ | ||
78 | |||
79 | /* This mapping is common between ID_AA64MMFR0.PARANGE and TCR_ELx.{I}PS. */ | ||
80 | -static const uint8_t pamax_map[] = { | ||
81 | +const uint8_t pamax_map[] = { | ||
82 | [0] = 32, | ||
83 | [1] = 36, | ||
84 | [2] = 40, | ||
85 | @@ -XXX,XX +XXX,XX @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, | ||
86 | } | ||
87 | |||
88 | #ifndef CONFIG_USER_ONLY | ||
89 | -static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va, | ||
90 | - ARMMMUIdx mmu_idx) | ||
91 | +ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va, | ||
92 | + ARMMMUIdx mmu_idx) | ||
93 | { | ||
94 | uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr; | ||
95 | uint32_t el = regime_el(env, mmu_idx); | ||
96 | @@ -XXX,XX +XXX,XX @@ static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va, | ||
97 | }; | ||
98 | } | ||
99 | |||
100 | -/** | ||
101 | - * get_phys_addr_lpae: perform one stage of page table walk, LPAE format | ||
102 | - * | ||
103 | - * Returns false if the translation was successful. Otherwise, phys_ptr, attrs, | ||
104 | - * prot and page_size may not be filled in, and the populated fsr value provides | ||
105 | - * information on why the translation aborted, in the format of a long-format | ||
106 | - * DFSR/IFSR fault register, with the following caveats: | ||
107 | - * * the WnR bit is never set (the caller must do this). | ||
108 | - * | ||
109 | - * @env: CPUARMState | ||
110 | - * @address: virtual address to get physical address for | ||
111 | - * @access_type: MMU_DATA_LOAD, MMU_DATA_STORE or MMU_INST_FETCH | ||
112 | - * @mmu_idx: MMU index indicating required translation regime | ||
113 | - * @s1_is_el0: if @mmu_idx is ARMMMUIdx_Stage2 (so this is a stage 2 page table | ||
114 | - * walk), must be true if this is stage 2 of a stage 1+2 walk for an | ||
115 | - * EL0 access). If @mmu_idx is anything else, @s1_is_el0 is ignored. | ||
116 | - * @phys_ptr: set to the physical address corresponding to the virtual address | ||
117 | - * @attrs: set to the memory transaction attributes to use | ||
118 | - * @prot: set to the permissions for the page containing phys_ptr | ||
119 | - * @page_size_ptr: set to the size of the page containing phys_ptr | ||
120 | - * @fi: set to fault info if the translation fails | ||
121 | - * @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes | ||
122 | - */ | ||
123 | -bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, | ||
124 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
125 | - bool s1_is_el0, | ||
126 | - hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot, | ||
127 | - target_ulong *page_size_ptr, | ||
128 | - ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) | ||
129 | -{ | ||
130 | - ARMCPU *cpu = env_archcpu(env); | ||
131 | - CPUState *cs = CPU(cpu); | ||
132 | - /* Read an LPAE long-descriptor translation table. */ | ||
133 | - ARMFaultType fault_type = ARMFault_Translation; | ||
134 | - uint32_t level; | ||
135 | - ARMVAParameters param; | ||
136 | - uint64_t ttbr; | ||
137 | - hwaddr descaddr, indexmask, indexmask_grainsize; | ||
138 | - uint32_t tableattrs; | ||
139 | - target_ulong page_size; | ||
140 | - uint32_t attrs; | ||
141 | - int32_t stride; | ||
142 | - int addrsize, inputsize, outputsize; | ||
143 | - TCR *tcr = regime_tcr(env, mmu_idx); | ||
144 | - int ap, ns, xn, pxn; | ||
145 | - uint32_t el = regime_el(env, mmu_idx); | ||
146 | - uint64_t descaddrmask; | ||
147 | - bool aarch64 = arm_el_is_aa64(env, el); | ||
148 | - bool guarded = false; | ||
149 | - | ||
150 | - /* TODO: This code does not support shareability levels. */ | ||
151 | - if (aarch64) { | ||
152 | - int ps; | ||
153 | - | ||
154 | - param = aa64_va_parameters(env, address, mmu_idx, | ||
155 | - access_type != MMU_INST_FETCH); | ||
156 | - level = 0; | ||
157 | - | ||
158 | - /* | ||
159 | - * If TxSZ is programmed to a value larger than the maximum, | ||
160 | - * or smaller than the effective minimum, it is IMPLEMENTATION | ||
161 | - * DEFINED whether we behave as if the field were programmed | ||
162 | - * within bounds, or if a level 0 Translation fault is generated. | ||
163 | - * | ||
164 | - * With FEAT_LVA, fault on less than minimum becomes required, | ||
165 | - * so our choice is to always raise the fault. | ||
166 | - */ | ||
167 | - if (param.tsz_oob) { | ||
168 | - fault_type = ARMFault_Translation; | ||
169 | - goto do_fault; | ||
170 | - } | ||
171 | - | ||
172 | - addrsize = 64 - 8 * param.tbi; | ||
173 | - inputsize = 64 - param.tsz; | ||
174 | - | ||
175 | - /* | ||
176 | - * Bound PS by PARANGE to find the effective output address size. | ||
177 | - * ID_AA64MMFR0 is a read-only register so values outside of the | ||
178 | - * supported mappings can be considered an implementation error. | ||
179 | - */ | ||
180 | - ps = FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE); | ||
181 | - ps = MIN(ps, param.ps); | ||
182 | - assert(ps < ARRAY_SIZE(pamax_map)); | ||
183 | - outputsize = pamax_map[ps]; | ||
184 | - } else { | ||
185 | - param = aa32_va_parameters(env, address, mmu_idx); | ||
186 | - level = 1; | ||
187 | - addrsize = (mmu_idx == ARMMMUIdx_Stage2 ? 40 : 32); | ||
188 | - inputsize = addrsize - param.tsz; | ||
189 | - outputsize = 40; | ||
190 | - } | ||
191 | - | ||
192 | - /* | ||
193 | - * We determined the region when collecting the parameters, but we | ||
194 | - * have not yet validated that the address is valid for the region. | ||
195 | - * Extract the top bits and verify that they all match select. | ||
196 | - * | ||
197 | - * For aa32, if inputsize == addrsize, then we have selected the | ||
198 | - * region by exclusion in aa32_va_parameters and there is no more | ||
199 | - * validation to do here. | ||
200 | - */ | ||
201 | - if (inputsize < addrsize) { | ||
202 | - target_ulong top_bits = sextract64(address, inputsize, | ||
203 | - addrsize - inputsize); | ||
204 | - if (-top_bits != param.select) { | ||
205 | - /* The gap between the two regions is a Translation fault */ | ||
206 | - fault_type = ARMFault_Translation; | ||
207 | - goto do_fault; | ||
208 | - } | ||
209 | - } | ||
210 | - | ||
211 | - if (param.using64k) { | ||
212 | - stride = 13; | ||
213 | - } else if (param.using16k) { | ||
214 | - stride = 11; | ||
215 | - } else { | ||
216 | - stride = 9; | ||
217 | - } | ||
218 | - | ||
219 | - /* Note that QEMU ignores shareability and cacheability attributes, | ||
220 | - * so we don't need to do anything with the SH, ORGN, IRGN fields | ||
221 | - * in the TTBCR. Similarly, TTBCR:A1 selects whether we get the | ||
222 | - * ASID from TTBR0 or TTBR1, but QEMU's TLB doesn't currently | ||
223 | - * implement any ASID-like capability so we can ignore it (instead | ||
224 | - * we will always flush the TLB any time the ASID is changed). | ||
225 | - */ | ||
226 | - ttbr = regime_ttbr(env, mmu_idx, param.select); | ||
227 | - | ||
228 | - /* Here we should have set up all the parameters for the translation: | ||
229 | - * inputsize, ttbr, epd, stride, tbi | ||
230 | - */ | ||
231 | - | ||
232 | - if (param.epd) { | ||
233 | - /* Translation table walk disabled => Translation fault on TLB miss | ||
234 | - * Note: This is always 0 on 64-bit EL2 and EL3. | ||
235 | - */ | ||
236 | - goto do_fault; | ||
237 | - } | ||
238 | - | ||
239 | - if (mmu_idx != ARMMMUIdx_Stage2 && mmu_idx != ARMMMUIdx_Stage2_S) { | ||
240 | - /* The starting level depends on the virtual address size (which can | ||
241 | - * be up to 48 bits) and the translation granule size. It indicates | ||
242 | - * the number of strides (stride bits at a time) needed to | ||
243 | - * consume the bits of the input address. In the pseudocode this is: | ||
244 | - * level = 4 - RoundUp((inputsize - grainsize) / stride) | ||
245 | - * where their 'inputsize' is our 'inputsize', 'grainsize' is | ||
246 | - * our 'stride + 3' and 'stride' is our 'stride'. | ||
247 | - * Applying the usual "rounded up m/n is (m+n-1)/n" and simplifying: | ||
248 | - * = 4 - (inputsize - stride - 3 + stride - 1) / stride | ||
249 | - * = 4 - (inputsize - 4) / stride; | ||
250 | - */ | ||
251 | - level = 4 - (inputsize - 4) / stride; | ||
252 | - } else { | ||
253 | - /* For stage 2 translations the starting level is specified by the | ||
254 | - * VTCR_EL2.SL0 field (whose interpretation depends on the page size) | ||
255 | - */ | ||
256 | - uint32_t sl0 = extract32(tcr->raw_tcr, 6, 2); | ||
257 | - uint32_t sl2 = extract64(tcr->raw_tcr, 33, 1); | ||
258 | - uint32_t startlevel; | ||
259 | - bool ok; | ||
260 | - | ||
261 | - /* SL2 is RES0 unless DS=1 & 4kb granule. */ | ||
262 | - if (param.ds && stride == 9 && sl2) { | ||
263 | - if (sl0 != 0) { | ||
264 | - level = 0; | ||
265 | - fault_type = ARMFault_Translation; | ||
266 | - goto do_fault; | ||
267 | - } | ||
268 | - startlevel = -1; | ||
269 | - } else if (!aarch64 || stride == 9) { | ||
270 | - /* AArch32 or 4KB pages */ | ||
271 | - startlevel = 2 - sl0; | ||
272 | - | ||
273 | - if (cpu_isar_feature(aa64_st, cpu)) { | ||
274 | - startlevel &= 3; | ||
275 | - } | ||
276 | - } else { | ||
277 | - /* 16KB or 64KB pages */ | ||
278 | - startlevel = 3 - sl0; | ||
279 | - } | ||
280 | - | ||
281 | - /* Check that the starting level is valid. */ | ||
282 | - ok = check_s2_mmu_setup(cpu, aarch64, startlevel, | ||
283 | - inputsize, stride, outputsize); | ||
284 | - if (!ok) { | ||
285 | - fault_type = ARMFault_Translation; | ||
286 | - goto do_fault; | ||
287 | - } | ||
288 | - level = startlevel; | ||
289 | - } | ||
290 | - | ||
291 | - indexmask_grainsize = MAKE_64BIT_MASK(0, stride + 3); | ||
292 | - indexmask = MAKE_64BIT_MASK(0, inputsize - (stride * (4 - level))); | ||
293 | - | ||
294 | - /* Now we can extract the actual base address from the TTBR */ | ||
295 | - descaddr = extract64(ttbr, 0, 48); | ||
296 | - | ||
297 | - /* | ||
298 | - * For FEAT_LPA and PS=6, bits [51:48] of descaddr are in [5:2] of TTBR. | ||
299 | - * | ||
300 | - * Otherwise, if the base address is out of range, raise AddressSizeFault. | ||
301 | - * In the pseudocode, this is !IsZero(baseregister<47:outputsize>), | ||
302 | - * but we've just cleared the bits above 47, so simplify the test. | ||
303 | - */ | ||
304 | - if (outputsize > 48) { | ||
305 | - descaddr |= extract64(ttbr, 2, 4) << 48; | ||
306 | - } else if (descaddr >> outputsize) { | ||
307 | - level = 0; | ||
308 | - fault_type = ARMFault_AddressSize; | ||
309 | - goto do_fault; | ||
310 | - } | ||
311 | - | ||
312 | - /* | ||
313 | - * We rely on this masking to clear the RES0 bits at the bottom of the TTBR | ||
314 | - * and also to mask out CnP (bit 0) which could validly be non-zero. | ||
315 | - */ | ||
316 | - descaddr &= ~indexmask; | ||
317 | - | ||
318 | - /* | ||
319 | - * For AArch32, the address field in the descriptor goes up to bit 39 | ||
320 | - * for both v7 and v8. However, for v8 the SBZ bits [47:40] must be 0 | ||
321 | - * or an AddressSize fault is raised. So for v8 we extract those SBZ | ||
322 | - * bits as part of the address, which will be checked via outputsize. | ||
323 | - * For AArch64, the address field goes up to bit 47, or 49 with FEAT_LPA2; | ||
324 | - * the highest bits of a 52-bit output are placed elsewhere. | ||
325 | - */ | ||
326 | - if (param.ds) { | ||
327 | - descaddrmask = MAKE_64BIT_MASK(0, 50); | ||
328 | - } else if (arm_feature(env, ARM_FEATURE_V8)) { | ||
329 | - descaddrmask = MAKE_64BIT_MASK(0, 48); | ||
330 | - } else { | ||
331 | - descaddrmask = MAKE_64BIT_MASK(0, 40); | ||
332 | - } | ||
333 | - descaddrmask &= ~indexmask_grainsize; | ||
334 | - | ||
335 | - /* Secure accesses start with the page table in secure memory and | ||
336 | - * can be downgraded to non-secure at any step. Non-secure accesses | ||
337 | - * remain non-secure. We implement this by just ORing in the NSTable/NS | ||
338 | - * bits at each step. | ||
339 | - */ | ||
340 | - tableattrs = regime_is_secure(env, mmu_idx) ? 0 : (1 << 4); | ||
341 | - for (;;) { | ||
342 | - uint64_t descriptor; | ||
343 | - bool nstable; | ||
344 | - | ||
345 | - descaddr |= (address >> (stride * (4 - level))) & indexmask; | ||
346 | - descaddr &= ~7ULL; | ||
347 | - nstable = extract32(tableattrs, 4, 1); | ||
348 | - descriptor = arm_ldq_ptw(cs, descaddr, !nstable, mmu_idx, fi); | ||
349 | - if (fi->type != ARMFault_None) { | ||
350 | - goto do_fault; | ||
351 | - } | ||
352 | - | ||
353 | - if (!(descriptor & 1) || | ||
354 | - (!(descriptor & 2) && (level == 3))) { | ||
355 | - /* Invalid, or the Reserved level 3 encoding */ | ||
356 | - goto do_fault; | ||
357 | - } | ||
358 | - | ||
359 | - descaddr = descriptor & descaddrmask; | ||
360 | - | ||
361 | - /* | ||
362 | - * For FEAT_LPA and PS=6, bits [51:48] of descaddr are in [15:12] | ||
363 | - * of descriptor. For FEAT_LPA2 and effective DS, bits [51:50] of | ||
364 | - * descaddr are in [9:8]. Otherwise, if descaddr is out of range, | ||
365 | - * raise AddressSizeFault. | ||
366 | - */ | ||
367 | - if (outputsize > 48) { | ||
368 | - if (param.ds) { | ||
369 | - descaddr |= extract64(descriptor, 8, 2) << 50; | ||
370 | - } else { | ||
371 | - descaddr |= extract64(descriptor, 12, 4) << 48; | ||
372 | - } | ||
373 | - } else if (descaddr >> outputsize) { | ||
374 | - fault_type = ARMFault_AddressSize; | ||
375 | - goto do_fault; | ||
376 | - } | ||
377 | - | ||
378 | - if ((descriptor & 2) && (level < 3)) { | ||
379 | - /* Table entry. The top five bits are attributes which may | ||
380 | - * propagate down through lower levels of the table (and | ||
381 | - * which are all arranged so that 0 means "no effect", so | ||
382 | - * we can gather them up by ORing in the bits at each level). | ||
383 | - */ | ||
384 | - tableattrs |= extract64(descriptor, 59, 5); | ||
385 | - level++; | ||
386 | - indexmask = indexmask_grainsize; | ||
387 | - continue; | ||
388 | - } | ||
389 | - /* | ||
390 | - * Block entry at level 1 or 2, or page entry at level 3. | ||
391 | - * These are basically the same thing, although the number | ||
392 | - * of bits we pull in from the vaddr varies. Note that although | ||
393 | - * descaddrmask masks enough of the low bits of the descriptor | ||
394 | - * to give a correct page or table address, the address field | ||
395 | - * in a block descriptor is smaller; so we need to explicitly | ||
396 | - * clear the lower bits here before ORing in the low vaddr bits. | ||
397 | - */ | ||
398 | - page_size = (1ULL << ((stride * (4 - level)) + 3)); | ||
399 | - descaddr &= ~(page_size - 1); | ||
400 | - descaddr |= (address & (page_size - 1)); | ||
401 | - /* Extract attributes from the descriptor */ | ||
402 | - attrs = extract64(descriptor, 2, 10) | ||
403 | - | (extract64(descriptor, 52, 12) << 10); | ||
404 | - | ||
405 | - if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { | ||
406 | - /* Stage 2 table descriptors do not include any attribute fields */ | ||
407 | - break; | ||
408 | - } | ||
409 | - /* Merge in attributes from table descriptors */ | ||
410 | - attrs |= nstable << 3; /* NS */ | ||
411 | - guarded = extract64(descriptor, 50, 1); /* GP */ | ||
412 | - if (param.hpd) { | ||
413 | - /* HPD disables all the table attributes except NSTable. */ | ||
414 | - break; | ||
415 | - } | ||
416 | - attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */ | ||
417 | - /* The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1 | ||
418 | - * means "force PL1 access only", which means forcing AP[1] to 0. | ||
419 | - */ | ||
420 | - attrs &= ~(extract32(tableattrs, 2, 1) << 4); /* !APT[0] => AP[1] */ | ||
421 | - attrs |= extract32(tableattrs, 3, 1) << 5; /* APT[1] => AP[2] */ | ||
422 | - break; | ||
423 | - } | ||
424 | - /* Here descaddr is the final physical address, and attributes | ||
425 | - * are all in attrs. | ||
426 | - */ | ||
427 | - fault_type = ARMFault_AccessFlag; | ||
428 | - if ((attrs & (1 << 8)) == 0) { | ||
429 | - /* Access flag */ | ||
430 | - goto do_fault; | ||
431 | - } | ||
432 | - | ||
433 | - ap = extract32(attrs, 4, 2); | ||
434 | - | ||
435 | - if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { | ||
436 | - ns = mmu_idx == ARMMMUIdx_Stage2; | ||
437 | - xn = extract32(attrs, 11, 2); | ||
438 | - *prot = get_S2prot(env, ap, xn, s1_is_el0); | ||
439 | - } else { | ||
440 | - ns = extract32(attrs, 3, 1); | ||
441 | - xn = extract32(attrs, 12, 1); | ||
442 | - pxn = extract32(attrs, 11, 1); | ||
443 | - *prot = get_S1prot(env, mmu_idx, aarch64, ap, ns, xn, pxn); | ||
444 | - } | ||
445 | - | ||
446 | - fault_type = ARMFault_Permission; | ||
447 | - if (!(*prot & (1 << access_type))) { | ||
448 | - goto do_fault; | ||
449 | - } | ||
450 | - | ||
451 | - if (ns) { | ||
452 | - /* The NS bit will (as required by the architecture) have no effect if | ||
453 | - * the CPU doesn't support TZ or this is a non-secure translation | ||
454 | - * regime, because the attribute will already be non-secure. | ||
455 | - */ | ||
456 | - txattrs->secure = false; | ||
457 | - } | ||
458 | - /* When in aarch64 mode, and BTI is enabled, remember GP in the IOTLB. */ | ||
459 | - if (aarch64 && guarded && cpu_isar_feature(aa64_bti, cpu)) { | ||
460 | - arm_tlb_bti_gp(txattrs) = true; | ||
461 | - } | ||
462 | - | ||
463 | - if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { | ||
464 | - cacheattrs->is_s2_format = true; | ||
465 | - cacheattrs->attrs = extract32(attrs, 0, 4); | ||
466 | - } else { | ||
467 | - /* Index into MAIR registers for cache attributes */ | ||
468 | - uint8_t attrindx = extract32(attrs, 0, 3); | ||
469 | - uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; | ||
470 | - assert(attrindx <= 7); | ||
471 | - cacheattrs->is_s2_format = false; | ||
472 | - cacheattrs->attrs = extract64(mair, attrindx * 8, 8); | ||
473 | - } | ||
474 | - | ||
475 | - /* | ||
476 | - * For FEAT_LPA2 and effective DS, the SH field in the attributes | ||
477 | - * was re-purposed for output address bits. The SH attribute in | ||
478 | - * that case comes from TCR_ELx, which we extracted earlier. | ||
479 | - */ | ||
480 | - if (param.ds) { | ||
481 | - cacheattrs->shareability = param.sh; | ||
482 | - } else { | ||
483 | - cacheattrs->shareability = extract32(attrs, 6, 2); | ||
484 | - } | ||
485 | - | ||
486 | - *phys_ptr = descaddr; | ||
487 | - *page_size_ptr = page_size; | ||
488 | - return false; | ||
489 | - | ||
490 | -do_fault: | ||
491 | - fi->type = fault_type; | ||
492 | - fi->level = level; | ||
493 | - /* Tag the error as S2 for failed S1 PTW at S2 or ordinary S2. */ | ||
494 | - fi->stage2 = fi->s1ptw || (mmu_idx == ARMMMUIdx_Stage2 || | ||
495 | - mmu_idx == ARMMMUIdx_Stage2_S); | ||
496 | - fi->s1ns = mmu_idx == ARMMMUIdx_Stage2; | ||
497 | - return true; | ||
498 | -} | ||
499 | - | ||
500 | hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, | ||
501 | MemTxAttrs *attrs) | ||
502 | { | ||
503 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
504 | index XXXXXXX..XXXXXXX 100644 | ||
505 | --- a/target/arm/ptw.c | ||
506 | +++ b/target/arm/ptw.c | ||
507 | @@ -XXX,XX +XXX,XX @@ do_fault: | ||
508 | return true; | ||
509 | } | ||
510 | |||
511 | +/** | ||
512 | + * get_phys_addr_lpae: perform one stage of page table walk, LPAE format | ||
513 | + * | ||
514 | + * Returns false if the translation was successful. Otherwise, phys_ptr, | ||
515 | + * attrs, prot and page_size may not be filled in, and the populated fsr | ||
516 | + * value provides information on why the translation aborted, in the format | ||
517 | + * of a long-format DFSR/IFSR fault register, with the following caveat: | ||
518 | + * the WnR bit is never set (the caller must do this). | ||
519 | + * | ||
520 | + * @env: CPUARMState | ||
521 | + * @address: virtual address to get physical address for | ||
522 | + * @access_type: MMU_DATA_LOAD, MMU_DATA_STORE or MMU_INST_FETCH | ||
523 | + * @mmu_idx: MMU index indicating required translation regime | ||
524 | + * @s1_is_el0: if @mmu_idx is ARMMMUIdx_Stage2 (so this is a stage 2 page | ||
525 | + * table walk), must be true if this is stage 2 of a stage 1+2 | ||
526 | + * walk for an EL0 access. If @mmu_idx is anything else, | ||
527 | + * @s1_is_el0 is ignored. | ||
528 | + * @phys_ptr: set to the physical address corresponding to the virtual address | ||
529 | + * @attrs: set to the memory transaction attributes to use | ||
530 | + * @prot: set to the permissions for the page containing phys_ptr | ||
531 | + * @page_size_ptr: set to the size of the page containing phys_ptr | ||
532 | + * @fi: set to fault info if the translation fails | ||
533 | + * @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes | ||
534 | + */ | ||
535 | +bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, | ||
536 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
537 | + bool s1_is_el0, | ||
538 | + hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot, | ||
539 | + target_ulong *page_size_ptr, | ||
540 | + ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) | ||
541 | +{ | ||
542 | + ARMCPU *cpu = env_archcpu(env); | ||
543 | + CPUState *cs = CPU(cpu); | ||
544 | + /* Read an LPAE long-descriptor translation table. */ | ||
545 | + ARMFaultType fault_type = ARMFault_Translation; | ||
546 | + uint32_t level; | ||
547 | + ARMVAParameters param; | ||
548 | + uint64_t ttbr; | ||
549 | + hwaddr descaddr, indexmask, indexmask_grainsize; | ||
550 | + uint32_t tableattrs; | ||
551 | + target_ulong page_size; | ||
552 | + uint32_t attrs; | ||
553 | + int32_t stride; | ||
554 | + int addrsize, inputsize, outputsize; | ||
555 | + TCR *tcr = regime_tcr(env, mmu_idx); | ||
556 | + int ap, ns, xn, pxn; | ||
557 | + uint32_t el = regime_el(env, mmu_idx); | ||
558 | + uint64_t descaddrmask; | ||
559 | + bool aarch64 = arm_el_is_aa64(env, el); | ||
560 | + bool guarded = false; | ||
561 | + | ||
562 | + /* TODO: This code does not support shareability levels. */ | ||
563 | + if (aarch64) { | ||
564 | + int ps; | ||
565 | + | ||
566 | + param = aa64_va_parameters(env, address, mmu_idx, | ||
567 | + access_type != MMU_INST_FETCH); | ||
568 | + level = 0; | ||
569 | + | ||
570 | + /* | ||
571 | + * If TxSZ is programmed to a value larger than the maximum, | ||
572 | + * or smaller than the effective minimum, it is IMPLEMENTATION | ||
573 | + * DEFINED whether we behave as if the field were programmed | ||
574 | + * within bounds, or if a level 0 Translation fault is generated. | ||
575 | + * | ||
576 | + * With FEAT_LVA, fault on less than minimum becomes required, | ||
577 | + * so our choice is to always raise the fault. | ||
578 | + */ | ||
579 | + if (param.tsz_oob) { | ||
580 | + fault_type = ARMFault_Translation; | ||
581 | + goto do_fault; | ||
582 | + } | 167 | + } |
583 | + | 168 | + |
584 | + addrsize = 64 - 8 * param.tbi; | 169 | frac_mulw(&tmp, a, b); |
585 | + inputsize = 64 - param.tsz; | 170 | frac_truncjam(a, &tmp); |
586 | + | 171 | |
587 | + /* | 172 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, |
588 | + * Bound PS by PARANGE to find the effective output address size. | 173 | } |
589 | + * ID_AA64MMFR0 is a read-only register so values outside of the | 174 | |
590 | + * supported mappings can be considered an implementation error. | 175 | /* Multiply by 0 or Inf */ |
591 | + */ | 176 | + if (ab_mask & float_cmask_denormal) { |
592 | + ps = FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE); | 177 | + float_raise(float_flag_input_denormal_used, s); |
593 | + ps = MIN(ps, param.ps); | 178 | + } |
594 | + assert(ps < ARRAY_SIZE(pamax_map)); | 179 | + |
595 | + outputsize = pamax_map[ps]; | 180 | if (ab_mask & float_cmask_inf) { |
596 | + } else { | 181 | a->cls = float_class_inf; |
597 | + param = aa32_va_parameters(env, address, mmu_idx); | 182 | a->sign = sign; |
598 | + level = 1; | 183 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, |
599 | + addrsize = (mmu_idx == ARMMMUIdx_Stage2 ? 40 : 32); | 184 | if (flags & float_muladd_negate_result) { |
600 | + inputsize = addrsize - param.tsz; | 185 | a->sign ^= 1; |
601 | + outputsize = 40; | 186 | } |
602 | + } | ||
603 | + | 187 | + |
604 | + /* | 188 | + /* |
605 | + * We determined the region when collecting the parameters, but we | 189 | + * All result types except for "return the default NaN |
606 | + * have not yet validated that the address is valid for the region. | 190 | + * because this is an Invalid Operation" go through here; |
607 | + * Extract the top bits and verify that they all match select. | 191 | + * this matches the set of cases where we consumed a |
608 | + * | 192 | + * denormal input. |
609 | + * For aa32, if inputsize == addrsize, then we have selected the | ||
610 | + * region by exclusion in aa32_va_parameters and there is no more | ||
611 | + * validation to do here. | ||
612 | + */ | 193 | + */ |
613 | + if (inputsize < addrsize) { | 194 | + if (abc_mask & float_cmask_denormal) { |
614 | + target_ulong top_bits = sextract64(address, inputsize, | 195 | + float_raise(float_flag_input_denormal_used, s); |
615 | + addrsize - inputsize); | 196 | + } |
616 | + if (-top_bits != param.select) { | 197 | return a; |
617 | + /* The gap between the two regions is a Translation fault */ | 198 | |
618 | + fault_type = ARMFault_Translation; | 199 | return_sub_zero: |
619 | + goto do_fault; | 200 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, |
201 | bool sign = a->sign ^ b->sign; | ||
202 | |||
203 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
204 | + if (ab_mask & float_cmask_denormal) { | ||
205 | + float_raise(float_flag_input_denormal_used, s); | ||
620 | + } | 206 | + } |
621 | + } | 207 | a->sign = sign; |
622 | + | 208 | a->exp -= b->exp + frac_div(a, b); |
623 | + if (param.using64k) { | 209 | return a; |
624 | + stride = 13; | 210 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, |
625 | + } else if (param.using16k) { | 211 | return parts_pick_nan(a, b, s); |
626 | + stride = 11; | 212 | } |
627 | + } else { | 213 | |
628 | + stride = 9; | 214 | + if ((ab_mask & float_cmask_denormal) && b->cls != float_class_zero) { |
629 | + } | 215 | + float_raise(float_flag_input_denormal_used, s); |
630 | + | 216 | + } |
631 | + /* | 217 | + |
632 | + * Note that QEMU ignores shareability and cacheability attributes, | 218 | a->sign = sign; |
633 | + * so we don't need to do anything with the SH, ORGN, IRGN fields | 219 | |
634 | + * in the TTBCR. Similarly, TTBCR:A1 selects whether we get the | 220 | /* Inf / X */ |
635 | + * ASID from TTBR0 or TTBR1, but QEMU's TLB doesn't currently | 221 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, |
636 | + * implement any ASID-like capability so we can ignore it (instead | 222 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); |
637 | + * we will always flush the TLB any time the ASID is changed). | 223 | |
638 | + */ | 224 | if (likely(cmask_is_only_normals(ab_mask))) { |
639 | + ttbr = regime_ttbr(env, mmu_idx, param.select); | 225 | + if (ab_mask & float_cmask_denormal) { |
640 | + | 226 | + float_raise(float_flag_input_denormal_used, s); |
641 | + /* | 227 | + } |
642 | + * Here we should have set up all the parameters for the translation: | 228 | frac_modrem(a, b, mod_quot); |
643 | + * inputsize, ttbr, epd, stride, tbi | 229 | return a; |
644 | + */ | 230 | } |
645 | + | 231 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, |
646 | + if (param.epd) { | 232 | return a; |
647 | + /* | 233 | } |
648 | + * Translation table walk disabled => Translation fault on TLB miss | 234 | |
649 | + * Note: This is always 0 on 64-bit EL2 and EL3. | 235 | + if (ab_mask & float_cmask_denormal) { |
650 | + */ | 236 | + float_raise(float_flag_input_denormal_used, s); |
651 | + goto do_fault; | 237 | + } |
652 | + } | 238 | + |
653 | + | 239 | /* N % Inf; 0 % N */ |
654 | + if (mmu_idx != ARMMMUIdx_Stage2 && mmu_idx != ARMMMUIdx_Stage2_S) { | 240 | g_assert(b->cls == float_class_inf || a->cls == float_class_zero); |
655 | + /* | 241 | return a; |
656 | + * The starting level depends on the virtual address size (which can | 242 | @@ -XXX,XX +XXX,XX @@ static void partsN(sqrt)(FloatPartsN *a, float_status *status, |
657 | + * be up to 48 bits) and the translation granule size. It indicates | 243 | if (unlikely(a->cls != float_class_normal)) { |
658 | + * the number of strides (stride bits at a time) needed to | 244 | switch (a->cls) { |
659 | + * consume the bits of the input address. In the pseudocode this is: | 245 | case float_class_denormal: |
660 | + * level = 4 - RoundUp((inputsize - grainsize) / stride) | 246 | + if (!a->sign) { |
661 | + * where their 'inputsize' is our 'inputsize', 'grainsize' is | 247 | + /* -ve denormal will be InvalidOperation */ |
662 | + * our 'stride + 3' and 'stride' is our 'stride'. | 248 | + float_raise(float_flag_input_denormal_used, status); |
663 | + * Applying the usual "rounded up m/n is (m+n-1)/n" and simplifying: | ||
664 | + * = 4 - (inputsize - stride - 3 + stride - 1) / stride | ||
665 | + * = 4 - (inputsize - 4) / stride; | ||
666 | + */ | ||
667 | + level = 4 - (inputsize - 4) / stride; | ||
668 | + } else { | ||
669 | + /* | ||
670 | + * For stage 2 translations the starting level is specified by the | ||
671 | + * VTCR_EL2.SL0 field (whose interpretation depends on the page size) | ||
672 | + */ | ||
673 | + uint32_t sl0 = extract32(tcr->raw_tcr, 6, 2); | ||
674 | + uint32_t sl2 = extract64(tcr->raw_tcr, 33, 1); | ||
675 | + uint32_t startlevel; | ||
676 | + bool ok; | ||
677 | + | ||
678 | + /* SL2 is RES0 unless DS=1 & 4kb granule. */ | ||
679 | + if (param.ds && stride == 9 && sl2) { | ||
680 | + if (sl0 != 0) { | ||
681 | + level = 0; | ||
682 | + fault_type = ARMFault_Translation; | ||
683 | + goto do_fault; | ||
684 | + } | 249 | + } |
685 | + startlevel = -1; | 250 | break; |
686 | + } else if (!aarch64 || stride == 9) { | 251 | case float_class_snan: |
687 | + /* AArch32 or 4KB pages */ | 252 | case float_class_qnan: |
688 | + startlevel = 2 - sl0; | 253 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, |
689 | + | 254 | if ((flags & (minmax_isnum | minmax_isnumber)) |
690 | + if (cpu_isar_feature(aa64_st, cpu)) { | 255 | && !(ab_mask & float_cmask_snan) |
691 | + startlevel &= 3; | 256 | && (ab_mask & ~float_cmask_qnan)) { |
257 | + if (ab_mask & float_cmask_denormal) { | ||
258 | + float_raise(float_flag_input_denormal_used, s); | ||
692 | + } | 259 | + } |
693 | + } else { | 260 | return is_nan(a->cls) ? b : a; |
694 | + /* 16KB or 64KB pages */ | 261 | } |
695 | + startlevel = 3 - sl0; | 262 | |
263 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
264 | return parts_pick_nan(a, b, s); | ||
265 | } | ||
266 | |||
267 | + if (ab_mask & float_cmask_denormal) { | ||
268 | + float_raise(float_flag_input_denormal_used, s); | ||
269 | + } | ||
270 | + | ||
271 | a_exp = a->exp; | ||
272 | b_exp = b->exp; | ||
273 | |||
274 | @@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, | ||
275 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
276 | FloatRelation cmp; | ||
277 | |||
278 | + if (ab_mask & float_cmask_denormal) { | ||
279 | + float_raise(float_flag_input_denormal_used, s); | ||
696 | + } | 280 | + } |
697 | + | 281 | + |
698 | + /* Check that the starting level is valid. */ | 282 | if (a->sign != b->sign) { |
699 | + ok = check_s2_mmu_setup(cpu, aarch64, startlevel, | 283 | goto a_sign; |
700 | + inputsize, stride, outputsize); | 284 | } |
701 | + if (!ok) { | 285 | @@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, |
702 | + fault_type = ARMFault_Translation; | 286 | return float_relation_unordered; |
703 | + goto do_fault; | 287 | } |
704 | + } | 288 | |
705 | + level = startlevel; | 289 | + if (ab_mask & float_cmask_denormal) { |
706 | + } | 290 | + float_raise(float_flag_input_denormal_used, s); |
707 | + | 291 | + } |
708 | + indexmask_grainsize = MAKE_64BIT_MASK(0, stride + 3); | 292 | + |
709 | + indexmask = MAKE_64BIT_MASK(0, inputsize - (stride * (4 - level))); | 293 | if (ab_mask & float_cmask_zero) { |
710 | + | 294 | if (ab_mask == float_cmask_zero) { |
711 | + /* Now we can extract the actual base address from the TTBR */ | 295 | return float_relation_equal; |
712 | + descaddr = extract64(ttbr, 0, 48); | 296 | @@ -XXX,XX +XXX,XX @@ static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) |
713 | + | 297 | case float_class_zero: |
714 | + /* | 298 | case float_class_inf: |
715 | + * For FEAT_LPA and PS=6, bits [51:48] of descaddr are in [5:2] of TTBR. | 299 | break; |
716 | + * | 300 | - case float_class_normal: |
717 | + * Otherwise, if the base address is out of range, raise AddressSizeFault. | 301 | case float_class_denormal: |
718 | + * In the pseudocode, this is !IsZero(baseregister<47:outputsize>), | 302 | + float_raise(float_flag_input_denormal_used, s); |
719 | + * but we've just cleared the bits above 47, so simplify the test. | 303 | + /* fall through */ |
720 | + */ | 304 | + case float_class_normal: |
721 | + if (outputsize > 48) { | 305 | a->exp += MIN(MAX(n, -0x10000), 0x10000); |
722 | + descaddr |= extract64(ttbr, 2, 4) << 48; | 306 | break; |
723 | + } else if (descaddr >> outputsize) { | 307 | default: |
724 | + level = 0; | 308 | @@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) |
725 | + fault_type = ARMFault_AddressSize; | 309 | if (unlikely(a->cls != float_class_normal)) { |
726 | + goto do_fault; | 310 | switch (a->cls) { |
727 | + } | 311 | case float_class_denormal: |
728 | + | 312 | + if (!a->sign) { |
729 | + /* | 313 | + /* -ve denormal will be InvalidOperation */ |
730 | + * We rely on this masking to clear the RES0 bits at the bottom of the TTBR | 314 | + float_raise(float_flag_input_denormal_used, s); |
731 | + * and also to mask out CnP (bit 0) which could validly be non-zero. | ||
732 | + */ | ||
733 | + descaddr &= ~indexmask; | ||
734 | + | ||
735 | + /* | ||
736 | + * For AArch32, the address field in the descriptor goes up to bit 39 | ||
737 | + * for both v7 and v8. However, for v8 the SBZ bits [47:40] must be 0 | ||
738 | + * or an AddressSize fault is raised. So for v8 we extract those SBZ | ||
739 | + * bits as part of the address, which will be checked via outputsize. | ||
740 | + * For AArch64, the address field goes up to bit 47, or 49 with FEAT_LPA2; | ||
741 | + * the highest bits of a 52-bit output are placed elsewhere. | ||
742 | + */ | ||
743 | + if (param.ds) { | ||
744 | + descaddrmask = MAKE_64BIT_MASK(0, 50); | ||
745 | + } else if (arm_feature(env, ARM_FEATURE_V8)) { | ||
746 | + descaddrmask = MAKE_64BIT_MASK(0, 48); | ||
747 | + } else { | ||
748 | + descaddrmask = MAKE_64BIT_MASK(0, 40); | ||
749 | + } | ||
750 | + descaddrmask &= ~indexmask_grainsize; | ||
751 | + | ||
752 | + /* | ||
753 | + * Secure accesses start with the page table in secure memory and | ||
754 | + * can be downgraded to non-secure at any step. Non-secure accesses | ||
755 | + * remain non-secure. We implement this by just ORing in the NSTable/NS | ||
756 | + * bits at each step. | ||
757 | + */ | ||
758 | + tableattrs = regime_is_secure(env, mmu_idx) ? 0 : (1 << 4); | ||
759 | + for (;;) { | ||
760 | + uint64_t descriptor; | ||
761 | + bool nstable; | ||
762 | + | ||
763 | + descaddr |= (address >> (stride * (4 - level))) & indexmask; | ||
764 | + descaddr &= ~7ULL; | ||
765 | + nstable = extract32(tableattrs, 4, 1); | ||
766 | + descriptor = arm_ldq_ptw(cs, descaddr, !nstable, mmu_idx, fi); | ||
767 | + if (fi->type != ARMFault_None) { | ||
768 | + goto do_fault; | ||
769 | + } | ||
770 | + | ||
771 | + if (!(descriptor & 1) || | ||
772 | + (!(descriptor & 2) && (level == 3))) { | ||
773 | + /* Invalid, or the Reserved level 3 encoding */ | ||
774 | + goto do_fault; | ||
775 | + } | ||
776 | + | ||
777 | + descaddr = descriptor & descaddrmask; | ||
778 | + | ||
779 | + /* | ||
780 | + * For FEAT_LPA and PS=6, bits [51:48] of descaddr are in [15:12] | ||
781 | + * of descriptor. For FEAT_LPA2 and effective DS, bits [51:50] of | ||
782 | + * descaddr are in [9:8]. Otherwise, if descaddr is out of range, | ||
783 | + * raise AddressSizeFault. | ||
784 | + */ | ||
785 | + if (outputsize > 48) { | ||
786 | + if (param.ds) { | ||
787 | + descaddr |= extract64(descriptor, 8, 2) << 50; | ||
788 | + } else { | ||
789 | + descaddr |= extract64(descriptor, 12, 4) << 48; | ||
790 | + } | 315 | + } |
791 | + } else if (descaddr >> outputsize) { | 316 | break; |
792 | + fault_type = ARMFault_AddressSize; | 317 | case float_class_snan: |
793 | + goto do_fault; | 318 | case float_class_qnan: |
794 | + } | ||
795 | + | ||
796 | + if ((descriptor & 2) && (level < 3)) { | ||
797 | + /* | ||
798 | + * Table entry. The top five bits are attributes which may | ||
799 | + * propagate down through lower levels of the table (and | ||
800 | + * which are all arranged so that 0 means "no effect", so | ||
801 | + * we can gather them up by ORing in the bits at each level). | ||
802 | + */ | ||
803 | + tableattrs |= extract64(descriptor, 59, 5); | ||
804 | + level++; | ||
805 | + indexmask = indexmask_grainsize; | ||
806 | + continue; | ||
807 | + } | ||
808 | + /* | ||
809 | + * Block entry at level 1 or 2, or page entry at level 3. | ||
810 | + * These are basically the same thing, although the number | ||
811 | + * of bits we pull in from the vaddr varies. Note that although | ||
812 | + * descaddrmask masks enough of the low bits of the descriptor | ||
813 | + * to give a correct page or table address, the address field | ||
814 | + * in a block descriptor is smaller; so we need to explicitly | ||
815 | + * clear the lower bits here before ORing in the low vaddr bits. | ||
816 | + */ | ||
817 | + page_size = (1ULL << ((stride * (4 - level)) + 3)); | ||
818 | + descaddr &= ~(page_size - 1); | ||
819 | + descaddr |= (address & (page_size - 1)); | ||
820 | + /* Extract attributes from the descriptor */ | ||
821 | + attrs = extract64(descriptor, 2, 10) | ||
822 | + | (extract64(descriptor, 52, 12) << 10); | ||
823 | + | ||
824 | + if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { | ||
825 | + /* Stage 2 table descriptors do not include any attribute fields */ | ||
826 | + break; | ||
827 | + } | ||
828 | + /* Merge in attributes from table descriptors */ | ||
829 | + attrs |= nstable << 3; /* NS */ | ||
830 | + guarded = extract64(descriptor, 50, 1); /* GP */ | ||
831 | + if (param.hpd) { | ||
832 | + /* HPD disables all the table attributes except NSTable. */ | ||
833 | + break; | ||
834 | + } | ||
835 | + attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */ | ||
836 | + /* | ||
837 | + * The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1 | ||
838 | + * means "force PL1 access only", which means forcing AP[1] to 0. | ||
839 | + */ | ||
840 | + attrs &= ~(extract32(tableattrs, 2, 1) << 4); /* !APT[0] => AP[1] */ | ||
841 | + attrs |= extract32(tableattrs, 3, 1) << 5; /* APT[1] => AP[2] */ | ||
842 | + break; | ||
843 | + } | ||
844 | + /* | ||
845 | + * Here descaddr is the final physical address, and attributes | ||
846 | + * are all in attrs. | ||
847 | + */ | ||
848 | + fault_type = ARMFault_AccessFlag; | ||
849 | + if ((attrs & (1 << 8)) == 0) { | ||
850 | + /* Access flag */ | ||
851 | + goto do_fault; | ||
852 | + } | ||
853 | + | ||
854 | + ap = extract32(attrs, 4, 2); | ||
855 | + | ||
856 | + if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { | ||
857 | + ns = mmu_idx == ARMMMUIdx_Stage2; | ||
858 | + xn = extract32(attrs, 11, 2); | ||
859 | + *prot = get_S2prot(env, ap, xn, s1_is_el0); | ||
860 | + } else { | ||
861 | + ns = extract32(attrs, 3, 1); | ||
862 | + xn = extract32(attrs, 12, 1); | ||
863 | + pxn = extract32(attrs, 11, 1); | ||
864 | + *prot = get_S1prot(env, mmu_idx, aarch64, ap, ns, xn, pxn); | ||
865 | + } | ||
866 | + | ||
867 | + fault_type = ARMFault_Permission; | ||
868 | + if (!(*prot & (1 << access_type))) { | ||
869 | + goto do_fault; | ||
870 | + } | ||
871 | + | ||
872 | + if (ns) { | ||
873 | + /* | ||
874 | + * The NS bit will (as required by the architecture) have no effect if | ||
875 | + * the CPU doesn't support TZ or this is a non-secure translation | ||
876 | + * regime, because the attribute will already be non-secure. | ||
877 | + */ | ||
878 | + txattrs->secure = false; | ||
879 | + } | ||
880 | + /* When in aarch64 mode, and BTI is enabled, remember GP in the IOTLB. */ | ||
881 | + if (aarch64 && guarded && cpu_isar_feature(aa64_bti, cpu)) { | ||
882 | + arm_tlb_bti_gp(txattrs) = true; | ||
883 | + } | ||
884 | + | ||
885 | + if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { | ||
886 | + cacheattrs->is_s2_format = true; | ||
887 | + cacheattrs->attrs = extract32(attrs, 0, 4); | ||
888 | + } else { | ||
889 | + /* Index into MAIR registers for cache attributes */ | ||
890 | + uint8_t attrindx = extract32(attrs, 0, 3); | ||
891 | + uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; | ||
892 | + assert(attrindx <= 7); | ||
893 | + cacheattrs->is_s2_format = false; | ||
894 | + cacheattrs->attrs = extract64(mair, attrindx * 8, 8); | ||
895 | + } | ||
896 | + | ||
897 | + /* | ||
898 | + * For FEAT_LPA2 and effective DS, the SH field in the attributes | ||
899 | + * was re-purposed for output address bits. The SH attribute in | ||
900 | + * that case comes from TCR_ELx, which we extracted earlier. | ||
901 | + */ | ||
902 | + if (param.ds) { | ||
903 | + cacheattrs->shareability = param.sh; | ||
904 | + } else { | ||
905 | + cacheattrs->shareability = extract32(attrs, 6, 2); | ||
906 | + } | ||
907 | + | ||
908 | + *phys_ptr = descaddr; | ||
909 | + *page_size_ptr = page_size; | ||
910 | + return false; | ||
911 | + | ||
912 | +do_fault: | ||
913 | + fi->type = fault_type; | ||
914 | + fi->level = level; | ||
915 | + /* Tag the error as S2 for failed S1 PTW at S2 or ordinary S2. */ | ||
916 | + fi->stage2 = fi->s1ptw || (mmu_idx == ARMMMUIdx_Stage2 || | ||
917 | + mmu_idx == ARMMMUIdx_Stage2_S); | ||
918 | + fi->s1ns = mmu_idx == ARMMMUIdx_Stage2; | ||
919 | + return true; | ||
920 | +} | ||
921 | + | ||
922 | static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address, | ||
923 | MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
924 | hwaddr *phys_ptr, int *prot, | ||
925 | -- | 319 | -- |
926 | 2.25.1 | 320 | 2.34.1 | diff view generated by jsdifflib |
1 | We have about 30 instances of the typo/variant spelling 'writeable', | 1 | Currently we handle flushing of output denormals in uncanon_normal |
---|---|---|---|
2 | and over 500 of the more common 'writable'. Standardize on the | 2 | always before we deal with rounding. This works for architectures |
3 | latter. | 3 | that detect tininess before rounding, but is usually not the right |
4 | 4 | place when the architecture detects tininess after rounding. For | |
5 | Change produced with: | 5 | example, for x86 the SDM states that the MXCSR FTZ control bit causes |
6 | 6 | outputs to be flushed to zero "when it detects a floating-point | |
7 | sed -i -e 's/\([Ww][Rr][Ii][Tt]\)[Ee]\([Aa][Bb][Ll][Ee]\)/\1\2/g' $(git grep -il writeable) | 7 | underflow condition". This means that we mustn't flush to zero if |
8 | 8 | the input is such that after rounding it is no longer tiny. | |
9 | and then hand-undoing the instance in linux-headers/linux/kvm.h. | 9 | |
10 | 10 | At least one of our guest architectures does underflow detection | |
11 | Most of these changes are in comments or documentation; the | 11 | after rounding but flushing of denormals before rounding (MIPS MSA); |
12 | exceptions are: | 12 | this means we need to have a config knob for this that is separate |
13 | * a local variable in accel/hvf/hvf-accel-ops.c | 13 | from our existing tininess_before_rounding setting. |
14 | * a local variable in accel/kvm/kvm-all.c | 14 | |
15 | * the PMCR_WRITABLE_MASK macro in target/arm/internals.h | 15 | Add an ftz_detection flag. For consistency with |
16 | * the EPT_VIOLATION_GPA_WRITABLE macro in target/i386/hvf/vmcs.h | 16 | tininess_before_rounding, we make it default to "detect ftz after |
17 | (which is never used anywhere) | 17 | rounding"; this means that we need to explicitly set the flag to |
18 | * the AR_TYPE_WRITABLE_MASK macro in target/i386/hvf/vmx.h | 18 | "detect ftz before rounding" on every existing architecture that sets |
19 | (which is never used anywhere) | 19 | flush_to_zero, so that this commit has no behaviour change. |
20 | (This means more code change here but for the long term a less | ||
21 | confusing API.) | ||
22 | |||
23 | For several architectures the current behaviour is either | ||
24 | definitely or possibly wrong; annotate those with TODO comments. | ||
25 | These architectures are definitely wrong (and should detect | ||
26 | ftz after rounding): | ||
27 | * x86 | ||
28 | * Alpha | ||
29 | |||
30 | For these architectures the spec is unclear: | ||
31 | * MIPS (for non-MSA) | ||
32 | * RX | ||
33 | * SH4 | ||
34 | |||
35 | PA-RISC makes ftz detection IMPDEF, but we aren't setting the | ||
36 | "tininess before rounding" setting that we ought to. | ||
20 | 37 | ||
21 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 38 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
22 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 39 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
23 | Reviewed-by: Stefan Weil <sw@weilnetz.de> | ||
24 | Message-id: 20220505095015.2714666-1-peter.maydell@linaro.org | ||
25 | --- | 40 | --- |
26 | docs/interop/vhost-user.rst | 2 +- | 41 | include/fpu/softfloat-helpers.h | 11 +++++++++++ |
27 | docs/specs/vmgenid.txt | 4 ++-- | 42 | include/fpu/softfloat-types.h | 18 ++++++++++++++++++ |
28 | hw/scsi/mfi.h | 2 +- | 43 | target/mips/fpu_helper.h | 6 ++++++ |
29 | target/arm/internals.h | 4 ++-- | 44 | target/alpha/cpu.c | 7 +++++++ |
30 | target/i386/hvf/vmcs.h | 2 +- | 45 | target/arm/cpu.c | 1 + |
31 | target/i386/hvf/vmx.h | 2 +- | 46 | target/hppa/fpu_helper.c | 11 +++++++++++ |
32 | accel/hvf/hvf-accel-ops.c | 4 ++-- | 47 | target/i386/tcg/fpu_helper.c | 8 ++++++++ |
33 | accel/kvm/kvm-all.c | 4 ++-- | 48 | target/mips/msa.c | 9 +++++++++ |
34 | accel/tcg/user-exec.c | 6 +++--- | 49 | target/ppc/cpu_init.c | 3 +++ |
35 | hw/acpi/ghes.c | 2 +- | 50 | target/rx/cpu.c | 8 ++++++++ |
36 | hw/intc/arm_gicv3_cpuif.c | 2 +- | 51 | target/sh4/cpu.c | 8 ++++++++ |
37 | hw/intc/arm_gicv3_dist.c | 2 +- | 52 | target/tricore/helper.c | 1 + |
38 | hw/intc/arm_gicv3_redist.c | 4 ++-- | 53 | tests/fp/fp-bench.c | 1 + |
39 | hw/intc/riscv_aclint.c | 2 +- | 54 | fpu/softfloat-parts.c.inc | 21 +++++++++++++++------ |
40 | hw/intc/riscv_aplic.c | 2 +- | 55 | 14 files changed, 107 insertions(+), 6 deletions(-) |
41 | hw/pci/shpc.c | 2 +- | 56 | |
42 | hw/sparc64/sun4u_iommu.c | 2 +- | 57 | diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h |
43 | hw/timer/sse-timer.c | 2 +- | 58 | index XXXXXXX..XXXXXXX 100644 |
44 | target/arm/gdbstub.c | 2 +- | 59 | --- a/include/fpu/softfloat-helpers.h |
45 | target/arm/helper.c | 4 ++-- | 60 | +++ b/include/fpu/softfloat-helpers.h |
46 | target/arm/hvf/hvf.c | 4 ++-- | 61 | @@ -XXX,XX +XXX,XX @@ static inline void set_flush_inputs_to_zero(bool val, float_status *status) |
47 | target/i386/cpu-sysemu.c | 2 +- | 62 | status->flush_inputs_to_zero = val; |
48 | target/s390x/ioinst.c | 2 +- | 63 | } |
49 | python/qemu/machine/machine.py | 2 +- | 64 | |
50 | tests/tcg/x86_64/system/boot.S | 2 +- | 65 | +static inline void set_float_ftz_detection(FloatFTZDetection d, |
51 | 25 files changed, 34 insertions(+), 34 deletions(-) | 66 | + float_status *status) |
52 | 67 | +{ | |
53 | diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst | 68 | + status->ftz_detection = d; |
54 | index XXXXXXX..XXXXXXX 100644 | 69 | +} |
55 | --- a/docs/interop/vhost-user.rst | 70 | + |
56 | +++ b/docs/interop/vhost-user.rst | 71 | static inline void set_default_nan_mode(bool val, float_status *status) |
57 | @@ -XXX,XX +XXX,XX @@ Virtio device config space | 72 | { |
58 | :size: a 32-bit configuration space access size in bytes | 73 | status->default_nan_mode = val; |
59 | 74 | @@ -XXX,XX +XXX,XX @@ static inline bool get_default_nan_mode(const float_status *status) | |
60 | :flags: a 32-bit value: | 75 | return status->default_nan_mode; |
61 | - - 0: Vhost front-end messages used for writeable fields | 76 | } |
62 | + - 0: Vhost front-end messages used for writable fields | 77 | |
63 | - 1: Vhost front-end messages used for live migration | 78 | +static inline FloatFTZDetection get_float_ftz_detection(const float_status *status) |
64 | 79 | +{ | |
65 | :payload: Size bytes array holding the contents of the virtio | 80 | + return status->ftz_detection; |
66 | diff --git a/docs/specs/vmgenid.txt b/docs/specs/vmgenid.txt | 81 | +} |
67 | index XXXXXXX..XXXXXXX 100644 | 82 | + |
68 | --- a/docs/specs/vmgenid.txt | 83 | #endif /* SOFTFLOAT_HELPERS_H */ |
69 | +++ b/docs/specs/vmgenid.txt | 84 | diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h |
70 | @@ -XXX,XX +XXX,XX @@ change the contents of the memory at runtime, specifically when starting a | 85 | index XXXXXXX..XXXXXXX 100644 |
71 | backed-up or snapshotted image. In order to do this, QEMU must know the | 86 | --- a/include/fpu/softfloat-types.h |
72 | address that has been allocated. | 87 | +++ b/include/fpu/softfloat-types.h |
73 | 88 | @@ -XXX,XX +XXX,XX @@ typedef enum __attribute__((__packed__)) { | |
74 | -The mechanism chosen for this memory sharing is writeable fw_cfg blobs. | 89 | float_infzeronan_suppress_invalid = (1 << 7), |
75 | +The mechanism chosen for this memory sharing is writable fw_cfg blobs. | 90 | } FloatInfZeroNaNRule; |
76 | These are data object that are visible to both QEMU and guests, and are | 91 | |
77 | addressable as sequential files. | 92 | +/* |
78 | 93 | + * When flush_to_zero is set, should we detect denormal results to | |
79 | @@ -XXX,XX +XXX,XX @@ Two fw_cfg blobs are used in this case: | 94 | + * be flushed before or after rounding? For most architectures this |
80 | /etc/vmgenid_guid - contains the actual VM Generation ID GUID | 95 | + * should be set to match the tininess_before_rounding setting, |
81 | - read-only to the guest | 96 | + * but a few architectures, e.g. MIPS MSA, detect FTZ before |
82 | /etc/vmgenid_addr - contains the address of the downloaded vmgenid blob | 97 | + * rounding but tininess after rounding. |
83 | - - writeable by the guest | 98 | + * |
84 | + - writable by the guest | 99 | + * This enum is arranged so that the default if the target doesn't |
85 | 100 | + * configure it matches the default for tininess_before_rounding | |
86 | 101 | + * (i.e. "after rounding"). | |
87 | QEMU sends the following commands to the guest at startup: | 102 | + */ |
88 | diff --git a/hw/scsi/mfi.h b/hw/scsi/mfi.h | 103 | +typedef enum __attribute__((__packed__)) { |
89 | index XXXXXXX..XXXXXXX 100644 | 104 | + float_ftz_after_rounding = 0, |
90 | --- a/hw/scsi/mfi.h | 105 | + float_ftz_before_rounding = 1, |
91 | +++ b/hw/scsi/mfi.h | 106 | +} FloatFTZDetection; |
92 | @@ -XXX,XX +XXX,XX @@ struct mfi_ctrl_props { | 107 | + |
93 | * metadata and user data | ||
94 | * 1=5%, 2=10%, 3=15% and so on | ||
95 | */ | ||
96 | - uint8_t viewSpace; /* snapshot writeable VIEWs | ||
97 | + uint8_t viewSpace; /* snapshot writable VIEWs | ||
98 | * capacity as a % of source LD | ||
99 | * capacity. 0=READ only | ||
100 | * 1=5%, 2=10%, 3=15% and so on | ||
101 | diff --git a/target/arm/internals.h b/target/arm/internals.h | ||
102 | index XXXXXXX..XXXXXXX 100644 | ||
103 | --- a/target/arm/internals.h | ||
104 | +++ b/target/arm/internals.h | ||
105 | @@ -XXX,XX +XXX,XX @@ enum MVEECIState { | ||
106 | #define PMCRP 0x2 | ||
107 | #define PMCRE 0x1 | ||
108 | /* | 108 | /* |
109 | - * Mask of PMCR bits writeable by guest (not including WO bits like C, P, | 109 | * Floating Point Status. Individual architectures may maintain |
110 | + * Mask of PMCR bits writable by guest (not including WO bits like C, P, | 110 | * several versions of float_status for different functions. The |
111 | * which can be written as 1 to trigger behaviour but which stay RAZ). | 111 | @@ -XXX,XX +XXX,XX @@ typedef struct float_status { |
112 | */ | 112 | bool tininess_before_rounding; |
113 | -#define PMCR_WRITEABLE_MASK (PMCRLC | PMCRDP | PMCRX | PMCRD | PMCRE) | 113 | /* should denormalised results go to zero and set output_denormal_flushed? */ |
114 | +#define PMCR_WRITABLE_MASK (PMCRLC | PMCRDP | PMCRX | PMCRD | PMCRE) | 114 | bool flush_to_zero; |
115 | 115 | + /* do we detect and flush denormal results before or after rounding? */ | |
116 | #define PMXEVTYPER_P 0x80000000 | 116 | + FloatFTZDetection ftz_detection; |
117 | #define PMXEVTYPER_U 0x40000000 | 117 | /* should denormalised inputs go to zero and set input_denormal_flushed? */ |
118 | diff --git a/target/i386/hvf/vmcs.h b/target/i386/hvf/vmcs.h | 118 | bool flush_inputs_to_zero; |
119 | index XXXXXXX..XXXXXXX 100644 | 119 | bool default_nan_mode; |
120 | --- a/target/i386/hvf/vmcs.h | 120 | diff --git a/target/mips/fpu_helper.h b/target/mips/fpu_helper.h |
121 | +++ b/target/i386/hvf/vmcs.h | 121 | index XXXXXXX..XXXXXXX 100644 |
122 | @@ -XXX,XX +XXX,XX @@ | 122 | --- a/target/mips/fpu_helper.h |
123 | #define EPT_VIOLATION_DATA_WRITE (1UL << 1) | 123 | +++ b/target/mips/fpu_helper.h |
124 | #define EPT_VIOLATION_INST_FETCH (1UL << 2) | 124 | @@ -XXX,XX +XXX,XX @@ static inline void fp_reset(CPUMIPSState *env) |
125 | #define EPT_VIOLATION_GPA_READABLE (1UL << 3) | 125 | */ |
126 | -#define EPT_VIOLATION_GPA_WRITEABLE (1UL << 4) | 126 | set_float_2nan_prop_rule(float_2nan_prop_s_ab, |
127 | +#define EPT_VIOLATION_GPA_WRITABLE (1UL << 4) | 127 | &env->active_fpu.fp_status); |
128 | #define EPT_VIOLATION_GPA_EXECUTABLE (1UL << 5) | 128 | + /* |
129 | #define EPT_VIOLATION_GLA_VALID (1UL << 7) | 129 | + * TODO: the spec does't say clearly whether FTZ happens before |
130 | #define EPT_VIOLATION_XLAT_VALID (1UL << 8) | 130 | + * or after rounding for normal FPU operations. |
131 | diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h | 131 | + */ |
132 | index XXXXXXX..XXXXXXX 100644 | 132 | + set_float_ftz_detection(float_ftz_before_rounding, |
133 | --- a/target/i386/hvf/vmx.h | 133 | + &env->active_fpu.fp_status); |
134 | +++ b/target/i386/hvf/vmx.h | 134 | } |
135 | @@ -XXX,XX +XXX,XX @@ static inline uint64_t cap2ctrl(uint64_t cap, uint64_t ctrl) | 135 | |
136 | 136 | /* MSA */ | |
137 | #define AR_TYPE_ACCESSES_MASK 1 | 137 | diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c |
138 | #define AR_TYPE_READABLE_MASK (1 << 1) | 138 | index XXXXXXX..XXXXXXX 100644 |
139 | -#define AR_TYPE_WRITEABLE_MASK (1 << 2) | 139 | --- a/target/alpha/cpu.c |
140 | +#define AR_TYPE_WRITABLE_MASK (1 << 2) | 140 | +++ b/target/alpha/cpu.c |
141 | #define AR_TYPE_CODE_MASK (1 << 3) | 141 | @@ -XXX,XX +XXX,XX @@ static void alpha_cpu_initfn(Object *obj) |
142 | #define AR_TYPE_MASK 0x0f | 142 | set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); |
143 | #define AR_TYPE_BUSY_64_TSS 11 | 143 | /* Default NaN: sign bit clear, msb frac bit set */ |
144 | diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c | 144 | set_float_default_nan_pattern(0b01000000, &env->fp_status); |
145 | index XXXXXXX..XXXXXXX 100644 | 145 | + /* |
146 | --- a/accel/hvf/hvf-accel-ops.c | 146 | + * TODO: this is incorrect. The Alpha Architecture Handbook version 4 |
147 | +++ b/accel/hvf/hvf-accel-ops.c | 147 | + * section 4.7.7.11 says that we flush to zero for underflow cases, so |
148 | @@ -XXX,XX +XXX,XX @@ static void hvf_set_phys_mem(MemoryRegionSection *section, bool add) | 148 | + * this should be float_ftz_after_rounding to match the |
149 | + * tininess_after_rounding (which is specified in section 4.7.5). | ||
150 | + */ | ||
151 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
152 | #if defined(CONFIG_USER_ONLY) | ||
153 | env->flags = ENV_FLAG_PS_USER | ENV_FLAG_FEN; | ||
154 | cpu_alpha_store_fpcr(env, (uint64_t)(FPCR_INVD | FPCR_DZED | FPCR_OVFD | ||
155 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
156 | index XXXXXXX..XXXXXXX 100644 | ||
157 | --- a/target/arm/cpu.c | ||
158 | +++ b/target/arm/cpu.c | ||
159 | @@ -XXX,XX +XXX,XX @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, | ||
160 | static void arm_set_default_fp_behaviours(float_status *s) | ||
149 | { | 161 | { |
150 | hvf_slot *mem; | 162 | set_float_detect_tininess(float_tininess_before_rounding, s); |
151 | MemoryRegion *area = section->mr; | 163 | + set_float_ftz_detection(float_ftz_before_rounding, s); |
152 | - bool writeable = !area->readonly && !area->rom_device; | 164 | set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); |
153 | + bool writable = !area->readonly && !area->rom_device; | 165 | set_float_3nan_prop_rule(float_3nan_prop_s_cab, s); |
154 | hv_memory_flags_t flags; | 166 | set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s); |
155 | uint64_t page_size = qemu_real_host_page_size(); | 167 | diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c |
156 | 168 | index XXXXXXX..XXXXXXX 100644 | |
157 | if (!memory_region_is_ram(area)) { | 169 | --- a/target/hppa/fpu_helper.c |
158 | - if (writeable) { | 170 | +++ b/target/hppa/fpu_helper.c |
159 | + if (writable) { | 171 | @@ -XXX,XX +XXX,XX @@ void HELPER(loaded_fr0)(CPUHPPAState *env) |
160 | return; | 172 | set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status); |
161 | } else if (!memory_region_is_romd(area)) { | 173 | /* Default NaN: sign bit clear, msb-1 frac bit set */ |
162 | /* | 174 | set_float_default_nan_pattern(0b00100000, &env->fp_status); |
163 | diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c | 175 | + /* |
164 | index XXXXXXX..XXXXXXX 100644 | 176 | + * "PA-RISC 2.0 Architecture" says it is IMPDEF whether the flushing |
165 | --- a/accel/kvm/kvm-all.c | 177 | + * enabled by FPSR.D happens before or after rounding. We pick "before" |
166 | +++ b/accel/kvm/kvm-all.c | 178 | + * for consistency with tininess detection. |
167 | @@ -XXX,XX +XXX,XX @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, | 179 | + */ |
168 | KVMSlot *mem; | 180 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); |
169 | int err; | 181 | + /* |
170 | MemoryRegion *mr = section->mr; | 182 | + * TODO: "PA-RISC 2.0 Architecture" chapter 10 says that we should |
171 | - bool writeable = !mr->readonly && !mr->rom_device; | 183 | + * detect tininess before rounding, but we don't set that here so we |
172 | + bool writable = !mr->readonly && !mr->rom_device; | 184 | + * get the default tininess after rounding. |
173 | hwaddr start_addr, size, slot_size, mr_offset; | 185 | + */ |
174 | ram_addr_t ram_start_offset; | 186 | } |
175 | void *ram; | 187 | |
176 | 188 | void cpu_hppa_loaded_fr0(CPUHPPAState *env) | |
177 | if (!memory_region_is_ram(mr)) { | 189 | diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c |
178 | - if (writeable || !kvm_readonly_mem_allowed) { | 190 | index XXXXXXX..XXXXXXX 100644 |
179 | + if (writable || !kvm_readonly_mem_allowed) { | 191 | --- a/target/i386/tcg/fpu_helper.c |
180 | return; | 192 | +++ b/target/i386/tcg/fpu_helper.c |
181 | } else if (!mr->romd_mode) { | 193 | @@ -XXX,XX +XXX,XX @@ void cpu_init_fp_statuses(CPUX86State *env) |
182 | /* If the memory device is not in romd_mode, then we actually want | 194 | set_float_default_nan_pattern(0b11000000, &env->fp_status); |
183 | diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c | 195 | set_float_default_nan_pattern(0b11000000, &env->mmx_status); |
184 | index XXXXXXX..XXXXXXX 100644 | 196 | set_float_default_nan_pattern(0b11000000, &env->sse_status); |
185 | --- a/accel/tcg/user-exec.c | 197 | + /* |
186 | +++ b/accel/tcg/user-exec.c | 198 | + * TODO: x86 does flush-to-zero detection after rounding (the SDM |
187 | @@ -XXX,XX +XXX,XX @@ MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write) | 199 | + * section 10.2.3.3 on the FTZ bit of MXCSR says that we flush |
188 | * Return true if the write fault has been handled, and should be re-tried. | 200 | + * when we detect underflow, which x86 does after rounding). |
189 | * | 201 | + */ |
190 | * Note that it is important that we don't call page_unprotect() unless | 202 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); |
191 | - * this is really a "write to nonwriteable page" fault, because | 203 | + set_float_ftz_detection(float_ftz_before_rounding, &env->mmx_status); |
192 | + * this is really a "write to nonwritable page" fault, because | 204 | + set_float_ftz_detection(float_ftz_before_rounding, &env->sse_status); |
193 | * page_unprotect() assumes that if it is called for an access to | 205 | } |
194 | - * a page that's writeable this means we had two threads racing and | 206 | |
195 | - * another thread got there first and already made the page writeable; | 207 | static inline uint8_t save_exception_flags(CPUX86State *env) |
196 | + * a page that's writable this means we had two threads racing and | 208 | diff --git a/target/mips/msa.c b/target/mips/msa.c |
197 | + * another thread got there first and already made the page writable; | 209 | index XXXXXXX..XXXXXXX 100644 |
198 | * so we will retry the access. If we were to call page_unprotect() | 210 | --- a/target/mips/msa.c |
199 | * for some other kind of fault that should really be passed to the | 211 | +++ b/target/mips/msa.c |
200 | * guest, we'd end up in an infinite loop of retrying the faulting access. | 212 | @@ -XXX,XX +XXX,XX @@ void msa_reset(CPUMIPSState *env) |
201 | diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c | 213 | /* tininess detected after rounding.*/ |
202 | index XXXXXXX..XXXXXXX 100644 | 214 | set_float_detect_tininess(float_tininess_after_rounding, |
203 | --- a/hw/acpi/ghes.c | 215 | &env->active_tc.msa_fp_status); |
204 | +++ b/hw/acpi/ghes.c | 216 | + /* |
205 | @@ -XXX,XX +XXX,XX @@ void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker) | 217 | + * MSACSR.FS detects tiny results to flush to zero before rounding |
206 | for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) { | 218 | + * (per "MIPS Architecture for Programmers Volume IV-j: The MIPS64 SIMD |
207 | /* | 219 | + * Architecture Module, Revision 1.1" section 3.5.4), even though it |
208 | * Initialize the value of read_ack_register to 1, so GHES can be | 220 | + * detects tininess after rounding for underflow purposes (section 3.4.2 |
209 | - * writeable after (re)boot. | 221 | + * table 3.3). |
210 | + * writable after (re)boot. | 222 | + */ |
211 | * ACPI 6.2: 18.3.2.8 Generic Hardware Error Source version 2 | 223 | + set_float_ftz_detection(float_ftz_before_rounding, |
212 | * (GHESv2 - Type 10) | 224 | + &env->active_tc.msa_fp_status); |
213 | */ | ||
214 | diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c | ||
215 | index XXXXXXX..XXXXXXX 100644 | ||
216 | --- a/hw/intc/arm_gicv3_cpuif.c | ||
217 | +++ b/hw/intc/arm_gicv3_cpuif.c | ||
218 | @@ -XXX,XX +XXX,XX @@ static void icc_ctlr_el3_write(CPUARMState *env, const ARMCPRegInfo *ri, | ||
219 | cs->icc_ctlr_el1[GICV3_S] |= ICC_CTLR_EL1_CBPR; | ||
220 | } | ||
221 | |||
222 | - /* The only bit stored in icc_ctlr_el3 which is writeable is EOIMODE_EL3: */ | ||
223 | + /* The only bit stored in icc_ctlr_el3 which is writable is EOIMODE_EL3: */ | ||
224 | mask = ICC_CTLR_EL3_EOIMODE_EL3; | ||
225 | |||
226 | cs->icc_ctlr_el3 &= ~mask; | ||
227 | diff --git a/hw/intc/arm_gicv3_dist.c b/hw/intc/arm_gicv3_dist.c | ||
228 | index XXXXXXX..XXXXXXX 100644 | ||
229 | --- a/hw/intc/arm_gicv3_dist.c | ||
230 | +++ b/hw/intc/arm_gicv3_dist.c | ||
231 | @@ -XXX,XX +XXX,XX @@ static bool gicd_writel(GICv3State *s, hwaddr offset, | ||
232 | if (value & mask & GICD_CTLR_DS) { | ||
233 | /* We just set DS, so the ARE_NS and EnG1S bits are now RES0. | ||
234 | * Note that this is a one-way transition because if DS is set | ||
235 | - * then it's not writeable, so it can only go back to 0 with a | ||
236 | + * then it's not writable, so it can only go back to 0 with a | ||
237 | * hardware reset. | ||
238 | */ | ||
239 | s->gicd_ctlr &= ~(GICD_CTLR_EN_GRP1S | GICD_CTLR_ARE_NS); | ||
240 | diff --git a/hw/intc/arm_gicv3_redist.c b/hw/intc/arm_gicv3_redist.c | ||
241 | index XXXXXXX..XXXXXXX 100644 | ||
242 | --- a/hw/intc/arm_gicv3_redist.c | ||
243 | +++ b/hw/intc/arm_gicv3_redist.c | ||
244 | @@ -XXX,XX +XXX,XX @@ static void gicr_write_vpendbaser(GICv3CPUState *cs, uint64_t newval) | ||
245 | 225 | ||
246 | /* | 226 | /* |
247 | * The DIRTY bit is read-only and for us is always zero; | 227 | * According to MIPS specifications, if one of the two operands is |
248 | - * other fields are writeable. | 228 | diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c |
249 | + * other fields are writable. | 229 | index XXXXXXX..XXXXXXX 100644 |
250 | */ | 230 | --- a/target/ppc/cpu_init.c |
251 | newval &= R_GICR_VPENDBASER_INNERCACHE_MASK | | 231 | +++ b/target/ppc/cpu_init.c |
252 | R_GICR_VPENDBASER_SHAREABILITY_MASK | | 232 | @@ -XXX,XX +XXX,XX @@ static void ppc_cpu_reset_hold(Object *obj, ResetType type) |
253 | @@ -XXX,XX +XXX,XX @@ static MemTxResult gicr_writel(GICv3CPUState *cs, hwaddr offset, | 233 | /* tininess for underflow is detected before rounding */ |
254 | /* RAZ/WI for our implementation */ | 234 | set_float_detect_tininess(float_tininess_before_rounding, |
255 | return MEMTX_OK; | 235 | &env->fp_status); |
256 | case GICR_WAKER: | 236 | + /* Similarly for flush-to-zero */ |
257 | - /* Only the ProcessorSleep bit is writeable. When the guest sets | 237 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); |
258 | + /* Only the ProcessorSleep bit is writable. When the guest sets | 238 | + |
259 | * it it requests that we transition the channel between the | 239 | /* |
260 | * redistributor and the cpu interface to quiescent, and that | 240 | * PowerPC propagation rules: |
261 | * we set the ChildrenAsleep bit once the inteface has reached the | 241 | * 1. A if it sNaN or qNaN |
262 | diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c | 242 | diff --git a/target/rx/cpu.c b/target/rx/cpu.c |
263 | index XXXXXXX..XXXXXXX 100644 | 243 | index XXXXXXX..XXXXXXX 100644 |
264 | --- a/hw/intc/riscv_aclint.c | 244 | --- a/target/rx/cpu.c |
265 | +++ b/hw/intc/riscv_aclint.c | 245 | +++ b/target/rx/cpu.c |
266 | @@ -XXX,XX +XXX,XX @@ static void riscv_aclint_swi_realize(DeviceState *dev, Error **errp) | 246 | @@ -XXX,XX +XXX,XX @@ static void rx_cpu_reset_hold(Object *obj, ResetType type) |
267 | /* Claim software interrupt bits */ | 247 | set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); |
268 | for (i = 0; i < swi->num_harts; i++) { | 248 | /* Default NaN value: sign bit clear, set frac msb */ |
269 | RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(swi->hartid_base + i)); | 249 | set_float_default_nan_pattern(0b01000000, &env->fp_status); |
270 | - /* We don't claim mip.SSIP because it is writeable by software */ | 250 | + /* |
271 | + /* We don't claim mip.SSIP because it is writable by software */ | 251 | + * TODO: "RX Family RXv1 Instruction Set Architecture" is not 100% clear |
272 | if (riscv_cpu_claim_interrupts(cpu, swi->sswi ? 0 : MIP_MSIP) < 0) { | 252 | + * on whether flush-to-zero should happen before or after rounding, but |
273 | error_report("MSIP already claimed"); | 253 | + * section 1.3.2 says that it happens when underflow is detected, and |
274 | exit(1); | 254 | + * implies that underflow is detected after rounding. So this may not |
275 | diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c | 255 | + * be the correct setting. |
276 | index XXXXXXX..XXXXXXX 100644 | 256 | + */ |
277 | --- a/hw/intc/riscv_aplic.c | 257 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); |
278 | +++ b/hw/intc/riscv_aplic.c | 258 | } |
279 | @@ -XXX,XX +XXX,XX @@ static void riscv_aplic_write(void *opaque, hwaddr addr, uint64_t value, | 259 | |
280 | } | 260 | static ObjectClass *rx_cpu_class_by_name(const char *cpu_model) |
281 | 261 | diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c | |
282 | if (addr == APLIC_DOMAINCFG) { | 262 | index XXXXXXX..XXXXXXX 100644 |
283 | - /* Only IE bit writeable at the moment */ | 263 | --- a/target/sh4/cpu.c |
284 | + /* Only IE bit writable at the moment */ | 264 | +++ b/target/sh4/cpu.c |
285 | value &= APLIC_DOMAINCFG_IE; | 265 | @@ -XXX,XX +XXX,XX @@ static void superh_cpu_reset_hold(Object *obj, ResetType type) |
286 | aplic->domaincfg = value; | 266 | set_default_nan_mode(1, &env->fp_status); |
287 | } else if ((APLIC_SOURCECFG_BASE <= addr) && | 267 | /* sign bit clear, set all frac bits other than msb */ |
288 | diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c | 268 | set_float_default_nan_pattern(0b00111111, &env->fp_status); |
289 | index XXXXXXX..XXXXXXX 100644 | 269 | + /* |
290 | --- a/hw/pci/shpc.c | 270 | + * TODO: "SH-4 CPU Core Architecture ADCS 7182230F" doesn't say whether |
291 | +++ b/hw/pci/shpc.c | 271 | + * it detects tininess before or after rounding. Section 6.4 is clear |
292 | @@ -XXX,XX +XXX,XX @@ static int shpc_cap_add_config(PCIDevice *d, Error **errp) | 272 | + * that flush-to-zero happens when the result underflows, though, so |
293 | pci_set_byte(config + SHPC_CAP_CxP, 0); | 273 | + * either this should be "detect ftz after rounding" or else we should |
294 | pci_set_long(config + SHPC_CAP_DWORD_DATA, 0); | 274 | + * be setting "detect tininess before rounding". |
295 | d->shpc->cap = config_offset; | 275 | + */ |
296 | - /* Make dword select and data writeable. */ | 276 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); |
297 | + /* Make dword select and data writable. */ | 277 | } |
298 | pci_set_byte(d->wmask + config_offset + SHPC_CAP_DWORD_SELECT, 0xff); | 278 | |
299 | pci_set_long(d->wmask + config_offset + SHPC_CAP_DWORD_DATA, 0xffffffff); | 279 | static void superh_cpu_disas_set_info(CPUState *cpu, disassemble_info *info) |
300 | return 0; | 280 | diff --git a/target/tricore/helper.c b/target/tricore/helper.c |
301 | diff --git a/hw/sparc64/sun4u_iommu.c b/hw/sparc64/sun4u_iommu.c | 281 | index XXXXXXX..XXXXXXX 100644 |
302 | index XXXXXXX..XXXXXXX 100644 | 282 | --- a/target/tricore/helper.c |
303 | --- a/hw/sparc64/sun4u_iommu.c | 283 | +++ b/target/tricore/helper.c |
304 | +++ b/hw/sparc64/sun4u_iommu.c | 284 | @@ -XXX,XX +XXX,XX @@ void fpu_set_state(CPUTriCoreState *env) |
305 | @@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry sun4u_translate_iommu(IOMMUMemoryRegion *iommu, | 285 | set_flush_inputs_to_zero(1, &env->fp_status); |
306 | } | 286 | set_flush_to_zero(1, &env->fp_status); |
307 | 287 | set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status); | |
308 | if (tte & IOMMU_TTE_DATA_W) { | 288 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); |
309 | - /* Writeable */ | 289 | set_default_nan_mode(1, &env->fp_status); |
310 | + /* Writable */ | 290 | /* Default NaN pattern: sign bit clear, frac msb set */ |
311 | ret.perm = IOMMU_RW; | 291 | set_float_default_nan_pattern(0b01000000, &env->fp_status); |
312 | } else { | 292 | diff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c |
313 | ret.perm = IOMMU_RO; | 293 | index XXXXXXX..XXXXXXX 100644 |
314 | diff --git a/hw/timer/sse-timer.c b/hw/timer/sse-timer.c | 294 | --- a/tests/fp/fp-bench.c |
315 | index XXXXXXX..XXXXXXX 100644 | 295 | +++ b/tests/fp/fp-bench.c |
316 | --- a/hw/timer/sse-timer.c | 296 | @@ -XXX,XX +XXX,XX @@ static void run_bench(void) |
317 | +++ b/hw/timer/sse-timer.c | 297 | set_float_3nan_prop_rule(float_3nan_prop_s_cab, &soft_status); |
318 | @@ -XXX,XX +XXX,XX @@ static void sse_timer_write(void *opaque, hwaddr offset, uint64_t value, | 298 | set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, &soft_status); |
319 | { | 299 | set_float_default_nan_pattern(0b01000000, &soft_status); |
320 | uint32_t old_ctl = s->cntp_aival_ctl; | 300 | + set_float_ftz_detection(float_ftz_before_rounding, &soft_status); |
321 | 301 | ||
322 | - /* EN bit is writeable; CLR bit is write-0-to-clear, write-1-ignored */ | 302 | f = bench_funcs[operation][precision]; |
323 | + /* EN bit is writable; CLR bit is write-0-to-clear, write-1-ignored */ | 303 | g_assert(f); |
324 | s->cntp_aival_ctl &= ~R_CNTP_AIVAL_CTL_EN_MASK; | 304 | diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc |
325 | s->cntp_aival_ctl |= value & R_CNTP_AIVAL_CTL_EN_MASK; | 305 | index XXXXXXX..XXXXXXX 100644 |
326 | if (!(value & R_CNTP_AIVAL_CTL_CLR_MASK)) { | 306 | --- a/fpu/softfloat-parts.c.inc |
327 | diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c | 307 | +++ b/fpu/softfloat-parts.c.inc |
328 | index XXXXXXX..XXXXXXX 100644 | 308 | @@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, |
329 | --- a/target/arm/gdbstub.c | 309 | p->frac_lo &= ~round_mask; |
330 | +++ b/target/arm/gdbstub.c | 310 | } |
331 | @@ -XXX,XX +XXX,XX @@ int arm_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) | 311 | frac_shr(p, frac_shift); |
332 | /* | 312 | - } else if (s->flush_to_zero) { |
333 | * Don't allow writing to XPSR.Exception as it can cause | 313 | + } else if (s->flush_to_zero && |
334 | * a transition into or out of handler mode (it's not | 314 | + s->ftz_detection == float_ftz_before_rounding) { |
335 | - * writeable via the MSR insn so this is a reasonable | 315 | flags |= float_flag_output_denormal_flushed; |
336 | + * writable via the MSR insn so this is a reasonable | 316 | p->cls = float_class_zero; |
337 | * restriction). Other fields are safe to update. | 317 | exp = 0; |
338 | */ | 318 | @@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, |
339 | xpsr_write(env, tmp, ~XPSR_EXCP); | 319 | exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal; |
340 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 320 | frac_shr(p, frac_shift); |
341 | index XXXXXXX..XXXXXXX 100644 | 321 | |
342 | --- a/target/arm/helper.c | 322 | - if (is_tiny && (flags & float_flag_inexact)) { |
343 | +++ b/target/arm/helper.c | 323 | - flags |= float_flag_underflow; |
344 | @@ -XXX,XX +XXX,XX @@ static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri, | 324 | - } |
325 | - if (exp == 0 && frac_eqz(p)) { | ||
326 | - p->cls = float_class_zero; | ||
327 | + if (is_tiny) { | ||
328 | + if (s->flush_to_zero) { | ||
329 | + assert(s->ftz_detection == float_ftz_after_rounding); | ||
330 | + flags |= float_flag_output_denormal_flushed; | ||
331 | + p->cls = float_class_zero; | ||
332 | + exp = 0; | ||
333 | + frac_clear(p); | ||
334 | + } else if (flags & float_flag_inexact) { | ||
335 | + flags |= float_flag_underflow; | ||
336 | + } | ||
337 | + if (exp == 0 && frac_eqz(p)) { | ||
338 | + p->cls = float_class_zero; | ||
339 | + } | ||
345 | } | 340 | } |
346 | } | 341 | } |
347 | 342 | p->exp = exp; | |
348 | - env->cp15.c9_pmcr &= ~PMCR_WRITEABLE_MASK; | ||
349 | - env->cp15.c9_pmcr |= (value & PMCR_WRITEABLE_MASK); | ||
350 | + env->cp15.c9_pmcr &= ~PMCR_WRITABLE_MASK; | ||
351 | + env->cp15.c9_pmcr |= (value & PMCR_WRITABLE_MASK); | ||
352 | |||
353 | pmu_op_finish(env); | ||
354 | } | ||
355 | diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c | ||
356 | index XXXXXXX..XXXXXXX 100644 | ||
357 | --- a/target/arm/hvf/hvf.c | ||
358 | +++ b/target/arm/hvf/hvf.c | ||
359 | @@ -XXX,XX +XXX,XX @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val) | ||
360 | } | ||
361 | } | ||
362 | |||
363 | - env->cp15.c9_pmcr &= ~PMCR_WRITEABLE_MASK; | ||
364 | - env->cp15.c9_pmcr |= (val & PMCR_WRITEABLE_MASK); | ||
365 | + env->cp15.c9_pmcr &= ~PMCR_WRITABLE_MASK; | ||
366 | + env->cp15.c9_pmcr |= (val & PMCR_WRITABLE_MASK); | ||
367 | |||
368 | pmu_op_finish(env); | ||
369 | break; | ||
370 | diff --git a/target/i386/cpu-sysemu.c b/target/i386/cpu-sysemu.c | ||
371 | index XXXXXXX..XXXXXXX 100644 | ||
372 | --- a/target/i386/cpu-sysemu.c | ||
373 | +++ b/target/i386/cpu-sysemu.c | ||
374 | @@ -XXX,XX +XXX,XX @@ static void x86_cpu_to_dict(X86CPU *cpu, QDict *props) | ||
375 | |||
376 | /* Convert CPU model data from X86CPU object to a property dictionary | ||
377 | * that can recreate exactly the same CPU model, including every | ||
378 | - * writeable QOM property. | ||
379 | + * writable QOM property. | ||
380 | */ | ||
381 | static void x86_cpu_to_dict_full(X86CPU *cpu, QDict *props) | ||
382 | { | ||
383 | diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c | ||
384 | index XXXXXXX..XXXXXXX 100644 | ||
385 | --- a/target/s390x/ioinst.c | ||
386 | +++ b/target/s390x/ioinst.c | ||
387 | @@ -XXX,XX +XXX,XX @@ void ioinst_handle_stsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, | ||
388 | g_assert(!s390_is_pv()); | ||
389 | /* | ||
390 | * As operand exceptions have a lower priority than access exceptions, | ||
391 | - * we check whether the memory area is writeable (injecting the | ||
392 | + * we check whether the memory area is writable (injecting the | ||
393 | * access execption if it is not) first. | ||
394 | */ | ||
395 | if (!s390_cpu_virt_mem_check_write(cpu, addr, ar, sizeof(schib))) { | ||
396 | diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py | ||
397 | index XXXXXXX..XXXXXXX 100644 | ||
398 | --- a/python/qemu/machine/machine.py | ||
399 | +++ b/python/qemu/machine/machine.py | ||
400 | @@ -XXX,XX +XXX,XX @@ def _early_cleanup(self) -> None: | ||
401 | """ | ||
402 | # If we keep the console socket open, we may deadlock waiting | ||
403 | # for QEMU to exit, while QEMU is waiting for the socket to | ||
404 | - # become writeable. | ||
405 | + # become writable. | ||
406 | if self._console_socket is not None: | ||
407 | self._console_socket.close() | ||
408 | self._console_socket = None | ||
409 | diff --git a/tests/tcg/x86_64/system/boot.S b/tests/tcg/x86_64/system/boot.S | ||
410 | index XXXXXXX..XXXXXXX 100644 | ||
411 | --- a/tests/tcg/x86_64/system/boot.S | ||
412 | +++ b/tests/tcg/x86_64/system/boot.S | ||
413 | @@ -XXX,XX +XXX,XX @@ | ||
414 | * | ||
415 | * - `ebx`: contains the physical memory address where the loader has placed | ||
416 | * the boot start info structure. | ||
417 | - * - `cr0`: bit 0 (PE) must be set. All the other writeable bits are cleared. | ||
418 | + * - `cr0`: bit 0 (PE) must be set. All the other writable bits are cleared. | ||
419 | * - `cr4`: all bits are cleared. | ||
420 | * - `cs `: must be a 32-bit read/execute code segment with a base of ‘0’ | ||
421 | * and a limit of ‘0xFFFFFFFF’. The selector value is unspecified. | ||
422 | -- | 343 | -- |
423 | 2.25.1 | 344 | 2.34.1 |
424 | |||
425 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | The Armv8.7 FEAT_AFP feature defines three new control bits in |
---|---|---|---|
2 | the FPCR: | ||
3 | * FPCR.AH: "alternate floating point mode"; this changes floating | ||
4 | point behaviour in a variety of ways, including: | ||
5 | - the sign of a default NaN is 1, not 0 | ||
6 | - if FPCR.FZ is also 1, denormals detected after rounding | ||
7 | with an unbounded exponent has been applied are flushed to zero | ||
8 | - FPCR.FZ does not cause denormalized inputs to be flushed to zero | ||
9 | - miscellaneous other corner-case behaviour changes | ||
10 | * FPCR.FIZ: flush denormalized numbers to zero on input for | ||
11 | most instructions | ||
12 | * FPCR.NEP: makes scalar SIMD operations merge the result with | ||
13 | higher vector elements in one of the source registers, instead | ||
14 | of zeroing the higher elements of the destination | ||
2 | 15 | ||
3 | Begin creation of sve_ldst_internal.h by moving the primitives | 16 | This commit defines the new bits in the FPCR, and allows them to be |
4 | that access host and tlb memory. | 17 | read or written when FEAT_AFP is implemented. Actual behaviour |
18 | changes will be implemented in subsequent commits. | ||
5 | 19 | ||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 20 | Note that these are the first FPCR bits which don't appear in the |
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 21 | AArch32 FPSCR view of the register, and which share bit positions |
8 | Message-id: 20220607203306.657998-14-richard.henderson@linaro.org | 22 | with FPSR bits. |
23 | |||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 24 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
25 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | --- | 26 | --- |
11 | target/arm/sve_ldst_internal.h | 127 +++++++++++++++++++++++++++++++++ | 27 | target/arm/cpu-features.h | 5 +++++ |
12 | target/arm/sve_helper.c | 107 +-------------------------- | 28 | target/arm/cpu.h | 3 +++ |
13 | 2 files changed, 128 insertions(+), 106 deletions(-) | 29 | target/arm/vfp_helper.c | 11 ++++++++--- |
14 | create mode 100644 target/arm/sve_ldst_internal.h | 30 | 3 files changed, 16 insertions(+), 3 deletions(-) |
15 | 31 | ||
16 | diff --git a/target/arm/sve_ldst_internal.h b/target/arm/sve_ldst_internal.h | 32 | diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h |
17 | new file mode 100644 | 33 | index XXXXXXX..XXXXXXX 100644 |
18 | index XXXXXXX..XXXXXXX | 34 | --- a/target/arm/cpu-features.h |
19 | --- /dev/null | 35 | +++ b/target/arm/cpu-features.h |
20 | +++ b/target/arm/sve_ldst_internal.h | 36 | @@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_hcx(const ARMISARegisters *id) |
21 | @@ -XXX,XX +XXX,XX @@ | 37 | return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HCX) != 0; |
22 | +/* | 38 | } |
23 | + * ARM SVE Load/Store Helpers | 39 | |
24 | + * | 40 | +static inline bool isar_feature_aa64_afp(const ARMISARegisters *id) |
25 | + * Copyright (c) 2018-2022 Linaro | 41 | +{ |
26 | + * | 42 | + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, AFP) != 0; |
27 | + * This library is free software; you can redistribute it and/or | ||
28 | + * modify it under the terms of the GNU Lesser General Public | ||
29 | + * License as published by the Free Software Foundation; either | ||
30 | + * version 2.1 of the License, or (at your option) any later version. | ||
31 | + * | ||
32 | + * This library is distributed in the hope that it will be useful, | ||
33 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
34 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
35 | + * Lesser General Public License for more details. | ||
36 | + * | ||
37 | + * You should have received a copy of the GNU Lesser General Public | ||
38 | + * License along with this library; if not, see <http://www.gnu.org/licenses/>. | ||
39 | + */ | ||
40 | + | ||
41 | +#ifndef TARGET_ARM_SVE_LDST_INTERNAL_H | ||
42 | +#define TARGET_ARM_SVE_LDST_INTERNAL_H | ||
43 | + | ||
44 | +#include "exec/cpu_ldst.h" | ||
45 | + | ||
46 | +/* | ||
47 | + * Load one element into @vd + @reg_off from @host. | ||
48 | + * The controlling predicate is known to be true. | ||
49 | + */ | ||
50 | +typedef void sve_ldst1_host_fn(void *vd, intptr_t reg_off, void *host); | ||
51 | + | ||
52 | +/* | ||
53 | + * Load one element into @vd + @reg_off from (@env, @vaddr, @ra). | ||
54 | + * The controlling predicate is known to be true. | ||
55 | + */ | ||
56 | +typedef void sve_ldst1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off, | ||
57 | + target_ulong vaddr, uintptr_t retaddr); | ||
58 | + | ||
59 | +/* | ||
60 | + * Generate the above primitives. | ||
61 | + */ | ||
62 | + | ||
63 | +#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \ | ||
64 | +static inline void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \ | ||
65 | +{ TYPEM val = HOST(host); *(TYPEE *)(vd + H(reg_off)) = val; } | ||
66 | + | ||
67 | +#define DO_ST_HOST(NAME, H, TYPEE, TYPEM, HOST) \ | ||
68 | +static inline void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \ | ||
69 | +{ TYPEM val = *(TYPEE *)(vd + H(reg_off)); HOST(host, val); } | ||
70 | + | ||
71 | +#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, TLB) \ | ||
72 | +static inline void sve_##NAME##_tlb(CPUARMState *env, void *vd, \ | ||
73 | + intptr_t reg_off, target_ulong addr, uintptr_t ra) \ | ||
74 | +{ \ | ||
75 | + TYPEM val = TLB(env, useronly_clean_ptr(addr), ra); \ | ||
76 | + *(TYPEE *)(vd + H(reg_off)) = val; \ | ||
77 | +} | 43 | +} |
78 | + | 44 | + |
79 | +#define DO_ST_TLB(NAME, H, TYPEE, TYPEM, TLB) \ | 45 | static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id) |
80 | +static inline void sve_##NAME##_tlb(CPUARMState *env, void *vd, \ | 46 | { |
81 | + intptr_t reg_off, target_ulong addr, uintptr_t ra) \ | 47 | return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0; |
82 | +{ \ | 48 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
83 | + TYPEM val = *(TYPEE *)(vd + H(reg_off)); \ | ||
84 | + TLB(env, useronly_clean_ptr(addr), val, ra); \ | ||
85 | +} | ||
86 | + | ||
87 | +#define DO_LD_PRIM_1(NAME, H, TE, TM) \ | ||
88 | + DO_LD_HOST(NAME, H, TE, TM, ldub_p) \ | ||
89 | + DO_LD_TLB(NAME, H, TE, TM, cpu_ldub_data_ra) | ||
90 | + | ||
91 | +DO_LD_PRIM_1(ld1bb, H1, uint8_t, uint8_t) | ||
92 | +DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t) | ||
93 | +DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t, int8_t) | ||
94 | +DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t) | ||
95 | +DO_LD_PRIM_1(ld1bss, H1_4, uint32_t, int8_t) | ||
96 | +DO_LD_PRIM_1(ld1bdu, H1_8, uint64_t, uint8_t) | ||
97 | +DO_LD_PRIM_1(ld1bds, H1_8, uint64_t, int8_t) | ||
98 | + | ||
99 | +#define DO_ST_PRIM_1(NAME, H, TE, TM) \ | ||
100 | + DO_ST_HOST(st1##NAME, H, TE, TM, stb_p) \ | ||
101 | + DO_ST_TLB(st1##NAME, H, TE, TM, cpu_stb_data_ra) | ||
102 | + | ||
103 | +DO_ST_PRIM_1(bb, H1, uint8_t, uint8_t) | ||
104 | +DO_ST_PRIM_1(bh, H1_2, uint16_t, uint8_t) | ||
105 | +DO_ST_PRIM_1(bs, H1_4, uint32_t, uint8_t) | ||
106 | +DO_ST_PRIM_1(bd, H1_8, uint64_t, uint8_t) | ||
107 | + | ||
108 | +#define DO_LD_PRIM_2(NAME, H, TE, TM, LD) \ | ||
109 | + DO_LD_HOST(ld1##NAME##_be, H, TE, TM, LD##_be_p) \ | ||
110 | + DO_LD_HOST(ld1##NAME##_le, H, TE, TM, LD##_le_p) \ | ||
111 | + DO_LD_TLB(ld1##NAME##_be, H, TE, TM, cpu_##LD##_be_data_ra) \ | ||
112 | + DO_LD_TLB(ld1##NAME##_le, H, TE, TM, cpu_##LD##_le_data_ra) | ||
113 | + | ||
114 | +#define DO_ST_PRIM_2(NAME, H, TE, TM, ST) \ | ||
115 | + DO_ST_HOST(st1##NAME##_be, H, TE, TM, ST##_be_p) \ | ||
116 | + DO_ST_HOST(st1##NAME##_le, H, TE, TM, ST##_le_p) \ | ||
117 | + DO_ST_TLB(st1##NAME##_be, H, TE, TM, cpu_##ST##_be_data_ra) \ | ||
118 | + DO_ST_TLB(st1##NAME##_le, H, TE, TM, cpu_##ST##_le_data_ra) | ||
119 | + | ||
120 | +DO_LD_PRIM_2(hh, H1_2, uint16_t, uint16_t, lduw) | ||
121 | +DO_LD_PRIM_2(hsu, H1_4, uint32_t, uint16_t, lduw) | ||
122 | +DO_LD_PRIM_2(hss, H1_4, uint32_t, int16_t, lduw) | ||
123 | +DO_LD_PRIM_2(hdu, H1_8, uint64_t, uint16_t, lduw) | ||
124 | +DO_LD_PRIM_2(hds, H1_8, uint64_t, int16_t, lduw) | ||
125 | + | ||
126 | +DO_ST_PRIM_2(hh, H1_2, uint16_t, uint16_t, stw) | ||
127 | +DO_ST_PRIM_2(hs, H1_4, uint32_t, uint16_t, stw) | ||
128 | +DO_ST_PRIM_2(hd, H1_8, uint64_t, uint16_t, stw) | ||
129 | + | ||
130 | +DO_LD_PRIM_2(ss, H1_4, uint32_t, uint32_t, ldl) | ||
131 | +DO_LD_PRIM_2(sdu, H1_8, uint64_t, uint32_t, ldl) | ||
132 | +DO_LD_PRIM_2(sds, H1_8, uint64_t, int32_t, ldl) | ||
133 | + | ||
134 | +DO_ST_PRIM_2(ss, H1_4, uint32_t, uint32_t, stl) | ||
135 | +DO_ST_PRIM_2(sd, H1_8, uint64_t, uint32_t, stl) | ||
136 | + | ||
137 | +DO_LD_PRIM_2(dd, H1_8, uint64_t, uint64_t, ldq) | ||
138 | +DO_ST_PRIM_2(dd, H1_8, uint64_t, uint64_t, stq) | ||
139 | + | ||
140 | +#undef DO_LD_TLB | ||
141 | +#undef DO_ST_TLB | ||
142 | +#undef DO_LD_HOST | ||
143 | +#undef DO_LD_PRIM_1 | ||
144 | +#undef DO_ST_PRIM_1 | ||
145 | +#undef DO_LD_PRIM_2 | ||
146 | +#undef DO_ST_PRIM_2 | ||
147 | + | ||
148 | +#endif /* TARGET_ARM_SVE_LDST_INTERNAL_H */ | ||
149 | diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c | ||
150 | index XXXXXXX..XXXXXXX 100644 | 49 | index XXXXXXX..XXXXXXX 100644 |
151 | --- a/target/arm/sve_helper.c | 50 | --- a/target/arm/cpu.h |
152 | +++ b/target/arm/sve_helper.c | 51 | +++ b/target/arm/cpu.h |
153 | @@ -XXX,XX +XXX,XX @@ | 52 | @@ -XXX,XX +XXX,XX @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val); |
154 | #include "cpu.h" | ||
155 | #include "internals.h" | ||
156 | #include "exec/exec-all.h" | ||
157 | -#include "exec/cpu_ldst.h" | ||
158 | #include "exec/helper-proto.h" | ||
159 | #include "tcg/tcg-gvec-desc.h" | ||
160 | #include "fpu/softfloat.h" | ||
161 | #include "tcg/tcg.h" | ||
162 | #include "vec_internal.h" | ||
163 | +#include "sve_ldst_internal.h" | ||
164 | |||
165 | |||
166 | /* Return a value for NZCV as per the ARM PredTest pseudofunction. | ||
167 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
168 | * Load contiguous data, protected by a governing predicate. | ||
169 | */ | 53 | */ |
170 | 54 | ||
171 | -/* | 55 | /* FPCR bits */ |
172 | - * Load one element into @vd + @reg_off from @host. | 56 | +#define FPCR_FIZ (1 << 0) /* Flush Inputs to Zero (FEAT_AFP) */ |
173 | - * The controlling predicate is known to be true. | 57 | +#define FPCR_AH (1 << 1) /* Alternate Handling (FEAT_AFP) */ |
174 | - */ | 58 | +#define FPCR_NEP (1 << 2) /* SIMD scalar ops preserve elts (FEAT_AFP) */ |
175 | -typedef void sve_ldst1_host_fn(void *vd, intptr_t reg_off, void *host); | 59 | #define FPCR_IOE (1 << 8) /* Invalid Operation exception trap enable */ |
176 | - | 60 | #define FPCR_DZE (1 << 9) /* Divide by Zero exception trap enable */ |
177 | -/* | 61 | #define FPCR_OFE (1 << 10) /* Overflow exception trap enable */ |
178 | - * Load one element into @vd + @reg_off from (@env, @vaddr, @ra). | 62 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
179 | - * The controlling predicate is known to be true. | 63 | index XXXXXXX..XXXXXXX 100644 |
180 | - */ | 64 | --- a/target/arm/vfp_helper.c |
181 | -typedef void sve_ldst1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off, | 65 | +++ b/target/arm/vfp_helper.c |
182 | - target_ulong vaddr, uintptr_t retaddr); | 66 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask) |
183 | - | 67 | if (!cpu_isar_feature(any_fp16, cpu)) { |
184 | -/* | 68 | val &= ~FPCR_FZ16; |
185 | - * Generate the above primitives. | 69 | } |
186 | - */ | 70 | + if (!cpu_isar_feature(aa64_afp, cpu)) { |
187 | - | 71 | + val &= ~(FPCR_FIZ | FPCR_AH | FPCR_NEP); |
188 | -#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \ | 72 | + } |
189 | -static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \ | 73 | |
190 | -{ \ | 74 | if (!cpu_isar_feature(aa64_ebf16, cpu)) { |
191 | - TYPEM val = HOST(host); \ | 75 | val &= ~FPCR_EBF; |
192 | - *(TYPEE *)(vd + H(reg_off)) = val; \ | 76 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask) |
193 | -} | 77 | * We don't implement trapped exception handling, so the |
194 | - | 78 | * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!) |
195 | -#define DO_ST_HOST(NAME, H, TYPEE, TYPEM, HOST) \ | 79 | * |
196 | -static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \ | 80 | - * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF |
197 | -{ HOST(host, (TYPEM)*(TYPEE *)(vd + H(reg_off))); } | 81 | - * and FZ16. Len, Stride and LTPSIZE we just handled. Store those bits |
198 | - | 82 | + * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF, FZ16, |
199 | -#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, TLB) \ | 83 | + * FIZ, AH, and NEP. |
200 | -static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ | 84 | + * Len, Stride and LTPSIZE we just handled. Store those bits |
201 | - target_ulong addr, uintptr_t ra) \ | 85 | * there, and zero any of the other FPCR bits and the RES0 and RAZ/WI |
202 | -{ \ | 86 | * bits. |
203 | - *(TYPEE *)(vd + H(reg_off)) = \ | 87 | */ |
204 | - (TYPEM)TLB(env, useronly_clean_ptr(addr), ra); \ | 88 | - val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 | FPCR_EBF; |
205 | -} | 89 | + val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 | |
206 | - | 90 | + FPCR_EBF | FPCR_FIZ | FPCR_AH | FPCR_NEP; |
207 | -#define DO_ST_TLB(NAME, H, TYPEE, TYPEM, TLB) \ | 91 | env->vfp.fpcr &= ~mask; |
208 | -static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ | 92 | env->vfp.fpcr |= val; |
209 | - target_ulong addr, uintptr_t ra) \ | 93 | } |
210 | -{ \ | ||
211 | - TLB(env, useronly_clean_ptr(addr), \ | ||
212 | - (TYPEM)*(TYPEE *)(vd + H(reg_off)), ra); \ | ||
213 | -} | ||
214 | - | ||
215 | -#define DO_LD_PRIM_1(NAME, H, TE, TM) \ | ||
216 | - DO_LD_HOST(NAME, H, TE, TM, ldub_p) \ | ||
217 | - DO_LD_TLB(NAME, H, TE, TM, cpu_ldub_data_ra) | ||
218 | - | ||
219 | -DO_LD_PRIM_1(ld1bb, H1, uint8_t, uint8_t) | ||
220 | -DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t) | ||
221 | -DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t, int8_t) | ||
222 | -DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t) | ||
223 | -DO_LD_PRIM_1(ld1bss, H1_4, uint32_t, int8_t) | ||
224 | -DO_LD_PRIM_1(ld1bdu, H1_8, uint64_t, uint8_t) | ||
225 | -DO_LD_PRIM_1(ld1bds, H1_8, uint64_t, int8_t) | ||
226 | - | ||
227 | -#define DO_ST_PRIM_1(NAME, H, TE, TM) \ | ||
228 | - DO_ST_HOST(st1##NAME, H, TE, TM, stb_p) \ | ||
229 | - DO_ST_TLB(st1##NAME, H, TE, TM, cpu_stb_data_ra) | ||
230 | - | ||
231 | -DO_ST_PRIM_1(bb, H1, uint8_t, uint8_t) | ||
232 | -DO_ST_PRIM_1(bh, H1_2, uint16_t, uint8_t) | ||
233 | -DO_ST_PRIM_1(bs, H1_4, uint32_t, uint8_t) | ||
234 | -DO_ST_PRIM_1(bd, H1_8, uint64_t, uint8_t) | ||
235 | - | ||
236 | -#define DO_LD_PRIM_2(NAME, H, TE, TM, LD) \ | ||
237 | - DO_LD_HOST(ld1##NAME##_be, H, TE, TM, LD##_be_p) \ | ||
238 | - DO_LD_HOST(ld1##NAME##_le, H, TE, TM, LD##_le_p) \ | ||
239 | - DO_LD_TLB(ld1##NAME##_be, H, TE, TM, cpu_##LD##_be_data_ra) \ | ||
240 | - DO_LD_TLB(ld1##NAME##_le, H, TE, TM, cpu_##LD##_le_data_ra) | ||
241 | - | ||
242 | -#define DO_ST_PRIM_2(NAME, H, TE, TM, ST) \ | ||
243 | - DO_ST_HOST(st1##NAME##_be, H, TE, TM, ST##_be_p) \ | ||
244 | - DO_ST_HOST(st1##NAME##_le, H, TE, TM, ST##_le_p) \ | ||
245 | - DO_ST_TLB(st1##NAME##_be, H, TE, TM, cpu_##ST##_be_data_ra) \ | ||
246 | - DO_ST_TLB(st1##NAME##_le, H, TE, TM, cpu_##ST##_le_data_ra) | ||
247 | - | ||
248 | -DO_LD_PRIM_2(hh, H1_2, uint16_t, uint16_t, lduw) | ||
249 | -DO_LD_PRIM_2(hsu, H1_4, uint32_t, uint16_t, lduw) | ||
250 | -DO_LD_PRIM_2(hss, H1_4, uint32_t, int16_t, lduw) | ||
251 | -DO_LD_PRIM_2(hdu, H1_8, uint64_t, uint16_t, lduw) | ||
252 | -DO_LD_PRIM_2(hds, H1_8, uint64_t, int16_t, lduw) | ||
253 | - | ||
254 | -DO_ST_PRIM_2(hh, H1_2, uint16_t, uint16_t, stw) | ||
255 | -DO_ST_PRIM_2(hs, H1_4, uint32_t, uint16_t, stw) | ||
256 | -DO_ST_PRIM_2(hd, H1_8, uint64_t, uint16_t, stw) | ||
257 | - | ||
258 | -DO_LD_PRIM_2(ss, H1_4, uint32_t, uint32_t, ldl) | ||
259 | -DO_LD_PRIM_2(sdu, H1_8, uint64_t, uint32_t, ldl) | ||
260 | -DO_LD_PRIM_2(sds, H1_8, uint64_t, int32_t, ldl) | ||
261 | - | ||
262 | -DO_ST_PRIM_2(ss, H1_4, uint32_t, uint32_t, stl) | ||
263 | -DO_ST_PRIM_2(sd, H1_8, uint64_t, uint32_t, stl) | ||
264 | - | ||
265 | -DO_LD_PRIM_2(dd, H1_8, uint64_t, uint64_t, ldq) | ||
266 | -DO_ST_PRIM_2(dd, H1_8, uint64_t, uint64_t, stq) | ||
267 | - | ||
268 | -#undef DO_LD_TLB | ||
269 | -#undef DO_ST_TLB | ||
270 | -#undef DO_LD_HOST | ||
271 | -#undef DO_LD_PRIM_1 | ||
272 | -#undef DO_ST_PRIM_1 | ||
273 | -#undef DO_LD_PRIM_2 | ||
274 | -#undef DO_ST_PRIM_2 | ||
275 | - | ||
276 | /* | ||
277 | * Skip through a sequence of inactive elements in the guarding predicate @vg, | ||
278 | * beginning at @reg_off bounded by @reg_max. Return the offset of the active | ||
279 | -- | 94 | -- |
280 | 2.25.1 | 95 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Part of FEAT_AFP is the new control bit FPCR.FIZ. This bit affects |
---|---|---|---|
2 | flushing of single and double precision denormal inputs to zero for | ||
3 | AArch64 floating point instructions. (For half-precision, the | ||
4 | existing FPCR.FZ16 control remains the only one.) | ||
2 | 5 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | FPCR.FIZ differs from FPCR.FZ in that if we flush an input denormal |
4 | Message-id: 20220604040607.269301-11-richard.henderson@linaro.org | 7 | only because of FPCR.FIZ then we should *not* set the cumulative |
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 8 | exception bit FPSR.IDC. |
9 | |||
10 | FEAT_AFP also defines that in AArch64 the existing FPCR.FZ only | ||
11 | applies when FPCR.AH is 0. | ||
12 | |||
13 | We can implement this by setting the "flush inputs to zero" state | ||
14 | appropriately when FPCR is written, and by not reflecting the | ||
15 | float_flag_input_denormal status flag into FPSR reads when it is the | ||
16 | result only of FPSR.FIZ. | ||
17 | |||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 18 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
19 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | 20 | --- |
8 | target/arm/ptw.h | 2 -- | 21 | target/arm/vfp_helper.c | 60 ++++++++++++++++++++++++++++++++++------- |
9 | target/arm/helper.c | 19 ------------------- | 22 | 1 file changed, 50 insertions(+), 10 deletions(-) |
10 | target/arm/ptw.c | 21 +++++++++++++++++++++ | ||
11 | 3 files changed, 21 insertions(+), 21 deletions(-) | ||
12 | 23 | ||
13 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | 24 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
14 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/arm/ptw.h | 26 | --- a/target/arm/vfp_helper.c |
16 | +++ b/target/arm/ptw.h | 27 | +++ b/target/arm/vfp_helper.c |
17 | @@ -XXX,XX +XXX,XX @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) | 28 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits) |
18 | bool m_is_ppb_region(CPUARMState *env, uint32_t address); | 29 | |
19 | bool m_is_system_region(CPUARMState *env, uint32_t address); | 30 | static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) |
20 | 31 | { | |
21 | -bool pmsav7_use_background_region(ARMCPU *cpu, ARMMMUIdx mmu_idx, bool is_user); | 32 | - uint32_t i = 0; |
22 | - | 33 | + uint32_t a32_flags = 0, a64_flags = 0; |
23 | bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, | 34 | |
24 | MMUAccessType access_type, ARMMMUIdx mmu_idx, | 35 | - i |= get_float_exception_flags(&env->vfp.fp_status_a32); |
25 | bool s1_is_el0, | 36 | - i |= get_float_exception_flags(&env->vfp.fp_status_a64); |
26 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 37 | - i |= get_float_exception_flags(&env->vfp.standard_fp_status); |
27 | index XXXXXXX..XXXXXXX 100644 | 38 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); |
28 | --- a/target/arm/helper.c | 39 | + a32_flags |= get_float_exception_flags(&env->vfp.standard_fp_status); |
29 | +++ b/target/arm/helper.c | 40 | /* FZ16 does not generate an input denormal exception. */ |
30 | @@ -XXX,XX +XXX,XX @@ do_fault: | 41 | - i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) |
31 | return true; | 42 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) |
43 | & ~float_flag_input_denormal_flushed); | ||
44 | - i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
45 | + a32_flags |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) | ||
46 | & ~float_flag_input_denormal_flushed); | ||
47 | - i |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) | ||
48 | + | ||
49 | + a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
50 | + a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
51 | & ~float_flag_input_denormal_flushed); | ||
52 | - return vfp_exceptbits_from_host(i); | ||
53 | + /* | ||
54 | + * Flushing an input denormal *only* because FPCR.FIZ == 1 does | ||
55 | + * not set FPSR.IDC; if FPCR.FZ is also set then this takes | ||
56 | + * precedence and IDC is set (see the FPUnpackBase pseudocode). | ||
57 | + * So squash it unless (FPCR.AH == 0 && FPCR.FZ == 1). | ||
58 | + * We only do this for the a64 flags because FIZ has no effect | ||
59 | + * on AArch32 even if it is set. | ||
60 | + */ | ||
61 | + if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) { | ||
62 | + a64_flags &= ~float_flag_input_denormal_flushed; | ||
63 | + } | ||
64 | + return vfp_exceptbits_from_host(a32_flags | a64_flags); | ||
32 | } | 65 | } |
33 | 66 | ||
34 | -bool pmsav7_use_background_region(ARMCPU *cpu, ARMMMUIdx mmu_idx, bool is_user) | 67 | static void vfp_clear_float_status_exc_flags(CPUARMState *env) |
35 | -{ | 68 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) |
36 | - /* Return true if we should use the default memory map as a | 69 | set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); |
37 | - * "background" region if there are no hits against any MPU regions. | ||
38 | - */ | ||
39 | - CPUARMState *env = &cpu->env; | ||
40 | - | ||
41 | - if (is_user) { | ||
42 | - return false; | ||
43 | - } | ||
44 | - | ||
45 | - if (arm_feature(env, ARM_FEATURE_M)) { | ||
46 | - return env->v7m.mpu_ctrl[regime_is_secure(env, mmu_idx)] | ||
47 | - & R_V7M_MPU_CTRL_PRIVDEFENA_MASK; | ||
48 | - } else { | ||
49 | - return regime_sctlr(env, mmu_idx) & SCTLR_BR; | ||
50 | - } | ||
51 | -} | ||
52 | - | ||
53 | bool m_is_ppb_region(CPUARMState *env, uint32_t address) | ||
54 | { | ||
55 | /* True if address is in the M profile PPB region 0xe0000000 - 0xe00fffff */ | ||
56 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/arm/ptw.c | ||
59 | +++ b/target/arm/ptw.c | ||
60 | @@ -XXX,XX +XXX,XX @@ static void get_phys_addr_pmsav7_default(CPUARMState *env, ARMMMUIdx mmu_idx, | ||
61 | } | ||
62 | } | 70 | } |
63 | 71 | ||
64 | +static bool pmsav7_use_background_region(ARMCPU *cpu, ARMMMUIdx mmu_idx, | 72 | +static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) |
65 | + bool is_user) | ||
66 | +{ | 73 | +{ |
67 | + /* | 74 | + /* |
68 | + * Return true if we should use the default memory map as a | 75 | + * Synchronize any pending exception-flag information in the |
69 | + * "background" region if there are no hits against any MPU regions. | 76 | + * float_status values into env->vfp.fpsr, and then clear out |
77 | + * the float_status data. | ||
70 | + */ | 78 | + */ |
71 | + CPUARMState *env = &cpu->env; | 79 | + env->vfp.fpsr |= vfp_get_fpsr_from_host(env); |
72 | + | 80 | + vfp_clear_float_status_exc_flags(env); |
73 | + if (is_user) { | ||
74 | + return false; | ||
75 | + } | ||
76 | + | ||
77 | + if (arm_feature(env, ARM_FEATURE_M)) { | ||
78 | + return env->v7m.mpu_ctrl[regime_is_secure(env, mmu_idx)] | ||
79 | + & R_V7M_MPU_CTRL_PRIVDEFENA_MASK; | ||
80 | + } else { | ||
81 | + return regime_sctlr(env, mmu_idx) & SCTLR_BR; | ||
82 | + } | ||
83 | +} | 81 | +} |
84 | + | 82 | + |
85 | static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, | 83 | static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
86 | MMUAccessType access_type, ARMMMUIdx mmu_idx, | 84 | { |
87 | hwaddr *phys_ptr, int *prot, | 85 | uint64_t changed = env->vfp.fpcr; |
86 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
87 | if (changed & FPCR_FZ) { | ||
88 | bool ftz_enabled = val & FPCR_FZ; | ||
89 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
90 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
91 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64); | ||
92 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a64); | ||
93 | + /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ | ||
94 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
95 | + } | ||
96 | + if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { | ||
97 | + /* | ||
98 | + * A64: Flush denormalized inputs to zero if FPCR.FIZ = 1, or | ||
99 | + * both FPCR.AH = 0 and FPCR.FZ = 1. | ||
100 | + */ | ||
101 | + bool fitz_enabled = (val & FPCR_FIZ) || | ||
102 | + (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ; | ||
103 | + set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status_a64); | ||
104 | } | ||
105 | if (changed & FPCR_DN) { | ||
106 | bool dnan_enabled = val & FPCR_DN; | ||
107 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
108 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
109 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
110 | } | ||
111 | + /* | ||
112 | + * If any bits changed that we look at in vfp_get_fpsr_from_host(), | ||
113 | + * we must sync the float_status flags into vfp.fpsr now (under the | ||
114 | + * old regime) before we update vfp.fpcr. | ||
115 | + */ | ||
116 | + if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { | ||
117 | + vfp_sync_and_clear_float_status_exc_flags(env); | ||
118 | + } | ||
119 | } | ||
120 | |||
121 | #else | ||
88 | -- | 122 | -- |
89 | 2.25.1 | 123 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | When FPCR.AH is set, various behaviours of AArch64 floating point |
---|---|---|---|
2 | operations which are controlled by softfloat config settings change: | ||
3 | * tininess and ftz detection before/after rounding | ||
4 | * NaN propagation order | ||
5 | * result of 0 * Inf + NaN | ||
6 | * default NaN value | ||
2 | 7 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 8 | When the guest changes the value of the AH bit, switch these config |
4 | Message-id: 20220604040607.269301-20-richard.henderson@linaro.org | 9 | settings on the fp_status_a64 and fp_status_f16_a64 float_status |
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 10 | fields. |
11 | |||
12 | This requires us to make the arm_set_default_fp_behaviours() function | ||
13 | global, since we now need to call it from cpu.c and vfp_helper.c; we | ||
14 | move it to vfp_helper.c so it can be next to the new | ||
15 | arm_set_ah_fp_behaviours(). | ||
16 | |||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 17 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
18 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | 19 | --- |
8 | target/arm/ptw.h | 3 -- | 20 | target/arm/internals.h | 4 +++ |
9 | target/arm/helper.c | 128 -------------------------------------------- | 21 | target/arm/cpu.c | 23 ---------------- |
10 | target/arm/ptw.c | 128 ++++++++++++++++++++++++++++++++++++++++++++ | 22 | target/arm/vfp_helper.c | 58 ++++++++++++++++++++++++++++++++++++++++- |
11 | 3 files changed, 128 insertions(+), 131 deletions(-) | 23 | 3 files changed, 61 insertions(+), 24 deletions(-) |
12 | 24 | ||
13 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | 25 | diff --git a/target/arm/internals.h b/target/arm/internals.h |
14 | index XXXXXXX..XXXXXXX 100644 | 26 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/arm/ptw.h | 27 | --- a/target/arm/internals.h |
16 | +++ b/target/arm/ptw.h | 28 | +++ b/target/arm/internals.h |
17 | @@ -XXX,XX +XXX,XX @@ ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va, | 29 | @@ -XXX,XX +XXX,XX @@ uint64_t gt_virt_cnt_offset(CPUARMState *env); |
18 | ARMMMUIdx mmu_idx); | 30 | * all EL1" scope; this covers stage 1 and stage 2. |
19 | bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level, | 31 | */ |
20 | int inputsize, int stride, int outputsize); | 32 | int alle1_tlbmask(CPUARMState *env); |
21 | -int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0); | 33 | + |
22 | -int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, | 34 | +/* Set the float_status behaviour to match the Arm defaults */ |
23 | - int ap, int ns, int xn, int pxn); | 35 | +void arm_set_default_fp_behaviours(float_status *s); |
24 | 36 | + | |
25 | #endif /* !CONFIG_USER_ONLY */ | 37 | #endif |
26 | #endif /* TARGET_ARM_PTW_H */ | 38 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
27 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | 39 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/target/arm/helper.c | 40 | --- a/target/arm/cpu.c |
30 | +++ b/target/arm/helper.c | 41 | +++ b/target/arm/cpu.c |
31 | @@ -XXX,XX +XXX,XX @@ int simple_ap_to_rw_prot_is_user(int ap, bool is_user) | 42 | @@ -XXX,XX +XXX,XX @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, |
32 | } | 43 | QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node); |
33 | } | 44 | } |
34 | 45 | ||
35 | -/* Translate S2 section/page access permissions to protection flags | 46 | -/* |
36 | - * | 47 | - * Set the float_status behaviour to match the Arm defaults: |
37 | - * @env: CPUARMState | 48 | - * * tininess-before-rounding |
38 | - * @s2ap: The 2-bit stage2 access permissions (S2AP) | 49 | - * * 2-input NaN propagation prefers SNaN over QNaN, and then |
39 | - * @xn: XN (execute-never) bits | 50 | - * operand A over operand B (see FPProcessNaNs() pseudocode) |
40 | - * @s1_is_el0: true if this is S2 of an S1+2 walk for EL0 | 51 | - * * 3-input NaN propagation prefers SNaN over QNaN, and then |
52 | - * operand C over A over B (see FPProcessNaNs3() pseudocode, | ||
53 | - * but note that for QEMU muladd is a * b + c, whereas for | ||
54 | - * the pseudocode function the arguments are in the order c, a, b. | ||
55 | - * * 0 * Inf + NaN returns the default NaN if the input NaN is quiet, | ||
56 | - * and the input NaN if it is signalling | ||
57 | - * * Default NaN has sign bit clear, msb frac bit set | ||
41 | - */ | 58 | - */ |
42 | -int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0) | 59 | -static void arm_set_default_fp_behaviours(float_status *s) |
43 | -{ | 60 | -{ |
44 | - int prot = 0; | 61 | - set_float_detect_tininess(float_tininess_before_rounding, s); |
45 | - | 62 | - set_float_ftz_detection(float_ftz_before_rounding, s); |
46 | - if (s2ap & 1) { | 63 | - set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); |
47 | - prot |= PAGE_READ; | 64 | - set_float_3nan_prop_rule(float_3nan_prop_s_cab, s); |
48 | - } | 65 | - set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s); |
49 | - if (s2ap & 2) { | 66 | - set_float_default_nan_pattern(0b01000000, s); |
50 | - prot |= PAGE_WRITE; | ||
51 | - } | ||
52 | - | ||
53 | - if (cpu_isar_feature(any_tts2uxn, env_archcpu(env))) { | ||
54 | - switch (xn) { | ||
55 | - case 0: | ||
56 | - prot |= PAGE_EXEC; | ||
57 | - break; | ||
58 | - case 1: | ||
59 | - if (s1_is_el0) { | ||
60 | - prot |= PAGE_EXEC; | ||
61 | - } | ||
62 | - break; | ||
63 | - case 2: | ||
64 | - break; | ||
65 | - case 3: | ||
66 | - if (!s1_is_el0) { | ||
67 | - prot |= PAGE_EXEC; | ||
68 | - } | ||
69 | - break; | ||
70 | - default: | ||
71 | - g_assert_not_reached(); | ||
72 | - } | ||
73 | - } else { | ||
74 | - if (!extract32(xn, 1, 1)) { | ||
75 | - if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) { | ||
76 | - prot |= PAGE_EXEC; | ||
77 | - } | ||
78 | - } | ||
79 | - } | ||
80 | - return prot; | ||
81 | -} | 67 | -} |
82 | - | 68 | - |
83 | -/* Translate section/page access permissions to protection flags | 69 | static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque) |
84 | - * | 70 | { |
85 | - * @env: CPUARMState | 71 | /* Reset a single ARMCPRegInfo register */ |
86 | - * @mmu_idx: MMU index indicating required translation regime | 72 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
87 | - * @is_aa64: TRUE if AArch64 | ||
88 | - * @ap: The 2-bit simple AP (AP[2:1]) | ||
89 | - * @ns: NS (non-secure) bit | ||
90 | - * @xn: XN (execute-never) bit | ||
91 | - * @pxn: PXN (privileged execute-never) bit | ||
92 | - */ | ||
93 | -int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, | ||
94 | - int ap, int ns, int xn, int pxn) | ||
95 | -{ | ||
96 | - bool is_user = regime_is_user(env, mmu_idx); | ||
97 | - int prot_rw, user_rw; | ||
98 | - bool have_wxn; | ||
99 | - int wxn = 0; | ||
100 | - | ||
101 | - assert(mmu_idx != ARMMMUIdx_Stage2); | ||
102 | - assert(mmu_idx != ARMMMUIdx_Stage2_S); | ||
103 | - | ||
104 | - user_rw = simple_ap_to_rw_prot_is_user(ap, true); | ||
105 | - if (is_user) { | ||
106 | - prot_rw = user_rw; | ||
107 | - } else { | ||
108 | - if (user_rw && regime_is_pan(env, mmu_idx)) { | ||
109 | - /* PAN forbids data accesses but doesn't affect insn fetch */ | ||
110 | - prot_rw = 0; | ||
111 | - } else { | ||
112 | - prot_rw = simple_ap_to_rw_prot_is_user(ap, false); | ||
113 | - } | ||
114 | - } | ||
115 | - | ||
116 | - if (ns && arm_is_secure(env) && (env->cp15.scr_el3 & SCR_SIF)) { | ||
117 | - return prot_rw; | ||
118 | - } | ||
119 | - | ||
120 | - /* TODO have_wxn should be replaced with | ||
121 | - * ARM_FEATURE_V8 || (ARM_FEATURE_V7 && ARM_FEATURE_EL2) | ||
122 | - * when ARM_FEATURE_EL2 starts getting set. For now we assume all LPAE | ||
123 | - * compatible processors have EL2, which is required for [U]WXN. | ||
124 | - */ | ||
125 | - have_wxn = arm_feature(env, ARM_FEATURE_LPAE); | ||
126 | - | ||
127 | - if (have_wxn) { | ||
128 | - wxn = regime_sctlr(env, mmu_idx) & SCTLR_WXN; | ||
129 | - } | ||
130 | - | ||
131 | - if (is_aa64) { | ||
132 | - if (regime_has_2_ranges(mmu_idx) && !is_user) { | ||
133 | - xn = pxn || (user_rw & PAGE_WRITE); | ||
134 | - } | ||
135 | - } else if (arm_feature(env, ARM_FEATURE_V7)) { | ||
136 | - switch (regime_el(env, mmu_idx)) { | ||
137 | - case 1: | ||
138 | - case 3: | ||
139 | - if (is_user) { | ||
140 | - xn = xn || !(user_rw & PAGE_READ); | ||
141 | - } else { | ||
142 | - int uwxn = 0; | ||
143 | - if (have_wxn) { | ||
144 | - uwxn = regime_sctlr(env, mmu_idx) & SCTLR_UWXN; | ||
145 | - } | ||
146 | - xn = xn || !(prot_rw & PAGE_READ) || pxn || | ||
147 | - (uwxn && (user_rw & PAGE_WRITE)); | ||
148 | - } | ||
149 | - break; | ||
150 | - case 2: | ||
151 | - break; | ||
152 | - } | ||
153 | - } else { | ||
154 | - xn = wxn = 0; | ||
155 | - } | ||
156 | - | ||
157 | - if (xn || (wxn && (prot_rw & PAGE_WRITE))) { | ||
158 | - return prot_rw; | ||
159 | - } | ||
160 | - return prot_rw | PAGE_EXEC; | ||
161 | -} | ||
162 | - | ||
163 | /* | ||
164 | * check_s2_mmu_setup | ||
165 | * @cpu: ARMCPU | ||
166 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
167 | index XXXXXXX..XXXXXXX 100644 | 73 | index XXXXXXX..XXXXXXX 100644 |
168 | --- a/target/arm/ptw.c | 74 | --- a/target/arm/vfp_helper.c |
169 | +++ b/target/arm/ptw.c | 75 | +++ b/target/arm/vfp_helper.c |
170 | @@ -XXX,XX +XXX,XX @@ do_fault: | 76 | @@ -XXX,XX +XXX,XX @@ |
171 | return true; | 77 | #include "exec/helper-proto.h" |
172 | } | 78 | #include "internals.h" |
79 | #include "cpu-features.h" | ||
80 | +#include "fpu/softfloat.h" | ||
81 | #ifdef CONFIG_TCG | ||
82 | #include "qemu/log.h" | ||
83 | -#include "fpu/softfloat.h" | ||
84 | #endif | ||
85 | |||
86 | /* VFP support. We follow the convention used for VFP instructions: | ||
87 | Single precision routines have a "s" suffix, double precision a | ||
88 | "d" suffix. */ | ||
173 | 89 | ||
174 | +/* | 90 | +/* |
175 | + * Translate S2 section/page access permissions to protection flags | 91 | + * Set the float_status behaviour to match the Arm defaults: |
176 | + * @env: CPUARMState | 92 | + * * tininess-before-rounding |
177 | + * @s2ap: The 2-bit stage2 access permissions (S2AP) | 93 | + * * 2-input NaN propagation prefers SNaN over QNaN, and then |
178 | + * @xn: XN (execute-never) bits | 94 | + * operand A over operand B (see FPProcessNaNs() pseudocode) |
179 | + * @s1_is_el0: true if this is S2 of an S1+2 walk for EL0 | 95 | + * * 3-input NaN propagation prefers SNaN over QNaN, and then |
96 | + * operand C over A over B (see FPProcessNaNs3() pseudocode, | ||
97 | + * but note that for QEMU muladd is a * b + c, whereas for | ||
98 | + * the pseudocode function the arguments are in the order c, a, b. | ||
99 | + * * 0 * Inf + NaN returns the default NaN if the input NaN is quiet, | ||
100 | + * and the input NaN if it is signalling | ||
101 | + * * Default NaN has sign bit clear, msb frac bit set | ||
180 | + */ | 102 | + */ |
181 | +static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0) | 103 | +void arm_set_default_fp_behaviours(float_status *s) |
182 | +{ | 104 | +{ |
183 | + int prot = 0; | 105 | + set_float_detect_tininess(float_tininess_before_rounding, s); |
184 | + | 106 | + set_float_ftz_detection(float_ftz_before_rounding, s); |
185 | + if (s2ap & 1) { | 107 | + set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); |
186 | + prot |= PAGE_READ; | 108 | + set_float_3nan_prop_rule(float_3nan_prop_s_cab, s); |
187 | + } | 109 | + set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s); |
188 | + if (s2ap & 2) { | 110 | + set_float_default_nan_pattern(0b01000000, s); |
189 | + prot |= PAGE_WRITE; | ||
190 | + } | ||
191 | + | ||
192 | + if (cpu_isar_feature(any_tts2uxn, env_archcpu(env))) { | ||
193 | + switch (xn) { | ||
194 | + case 0: | ||
195 | + prot |= PAGE_EXEC; | ||
196 | + break; | ||
197 | + case 1: | ||
198 | + if (s1_is_el0) { | ||
199 | + prot |= PAGE_EXEC; | ||
200 | + } | ||
201 | + break; | ||
202 | + case 2: | ||
203 | + break; | ||
204 | + case 3: | ||
205 | + if (!s1_is_el0) { | ||
206 | + prot |= PAGE_EXEC; | ||
207 | + } | ||
208 | + break; | ||
209 | + default: | ||
210 | + g_assert_not_reached(); | ||
211 | + } | ||
212 | + } else { | ||
213 | + if (!extract32(xn, 1, 1)) { | ||
214 | + if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) { | ||
215 | + prot |= PAGE_EXEC; | ||
216 | + } | ||
217 | + } | ||
218 | + } | ||
219 | + return prot; | ||
220 | +} | 111 | +} |
221 | + | 112 | + |
222 | +/* | 113 | +/* |
223 | + * Translate section/page access permissions to protection flags | 114 | + * Set the float_status behaviour to match the FEAT_AFP |
224 | + * @env: CPUARMState | 115 | + * FPCR.AH=1 requirements: |
225 | + * @mmu_idx: MMU index indicating required translation regime | 116 | + * * tininess-after-rounding |
226 | + * @is_aa64: TRUE if AArch64 | 117 | + * * 2-input NaN propagation prefers the first NaN |
227 | + * @ap: The 2-bit simple AP (AP[2:1]) | 118 | + * * 3-input NaN propagation prefers a over b over c |
228 | + * @ns: NS (non-secure) bit | 119 | + * * 0 * Inf + NaN always returns the input NaN and doesn't |
229 | + * @xn: XN (execute-never) bit | 120 | + * set Invalid for a QNaN |
230 | + * @pxn: PXN (privileged execute-never) bit | 121 | + * * default NaN has sign bit set, msb frac bit set |
231 | + */ | 122 | + */ |
232 | +static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, | 123 | +static void arm_set_ah_fp_behaviours(float_status *s) |
233 | + int ap, int ns, int xn, int pxn) | ||
234 | +{ | 124 | +{ |
235 | + bool is_user = regime_is_user(env, mmu_idx); | 125 | + set_float_detect_tininess(float_tininess_after_rounding, s); |
236 | + int prot_rw, user_rw; | 126 | + set_float_ftz_detection(float_ftz_after_rounding, s); |
237 | + bool have_wxn; | 127 | + set_float_2nan_prop_rule(float_2nan_prop_ab, s); |
238 | + int wxn = 0; | 128 | + set_float_3nan_prop_rule(float_3nan_prop_abc, s); |
129 | + set_float_infzeronan_rule(float_infzeronan_dnan_never | | ||
130 | + float_infzeronan_suppress_invalid, s); | ||
131 | + set_float_default_nan_pattern(0b11000000, s); | ||
132 | +} | ||
239 | + | 133 | + |
240 | + assert(mmu_idx != ARMMMUIdx_Stage2); | 134 | #ifdef CONFIG_TCG |
241 | + assert(mmu_idx != ARMMMUIdx_Stage2_S); | 135 | |
136 | /* Convert host exception flags to vfp form. */ | ||
137 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
138 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
139 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
140 | } | ||
141 | + if (changed & FPCR_AH) { | ||
142 | + bool ah_enabled = val & FPCR_AH; | ||
242 | + | 143 | + |
243 | + user_rw = simple_ap_to_rw_prot_is_user(ap, true); | 144 | + if (ah_enabled) { |
244 | + if (is_user) { | 145 | + /* Change behaviours for A64 FP operations */ |
245 | + prot_rw = user_rw; | 146 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); |
246 | + } else { | 147 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status_f16_a64); |
247 | + if (user_rw && regime_is_pan(env, mmu_idx)) { | ||
248 | + /* PAN forbids data accesses but doesn't affect insn fetch */ | ||
249 | + prot_rw = 0; | ||
250 | + } else { | 148 | + } else { |
251 | + prot_rw = simple_ap_to_rw_prot_is_user(ap, false); | 149 | + arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); |
150 | + arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
252 | + } | 151 | + } |
253 | + } | 152 | + } |
254 | + | 153 | /* |
255 | + if (ns && arm_is_secure(env) && (env->cp15.scr_el3 & SCR_SIF)) { | 154 | * If any bits changed that we look at in vfp_get_fpsr_from_host(), |
256 | + return prot_rw; | 155 | * we must sync the float_status flags into vfp.fpsr now (under the |
257 | + } | ||
258 | + | ||
259 | + /* TODO have_wxn should be replaced with | ||
260 | + * ARM_FEATURE_V8 || (ARM_FEATURE_V7 && ARM_FEATURE_EL2) | ||
261 | + * when ARM_FEATURE_EL2 starts getting set. For now we assume all LPAE | ||
262 | + * compatible processors have EL2, which is required for [U]WXN. | ||
263 | + */ | ||
264 | + have_wxn = arm_feature(env, ARM_FEATURE_LPAE); | ||
265 | + | ||
266 | + if (have_wxn) { | ||
267 | + wxn = regime_sctlr(env, mmu_idx) & SCTLR_WXN; | ||
268 | + } | ||
269 | + | ||
270 | + if (is_aa64) { | ||
271 | + if (regime_has_2_ranges(mmu_idx) && !is_user) { | ||
272 | + xn = pxn || (user_rw & PAGE_WRITE); | ||
273 | + } | ||
274 | + } else if (arm_feature(env, ARM_FEATURE_V7)) { | ||
275 | + switch (regime_el(env, mmu_idx)) { | ||
276 | + case 1: | ||
277 | + case 3: | ||
278 | + if (is_user) { | ||
279 | + xn = xn || !(user_rw & PAGE_READ); | ||
280 | + } else { | ||
281 | + int uwxn = 0; | ||
282 | + if (have_wxn) { | ||
283 | + uwxn = regime_sctlr(env, mmu_idx) & SCTLR_UWXN; | ||
284 | + } | ||
285 | + xn = xn || !(prot_rw & PAGE_READ) || pxn || | ||
286 | + (uwxn && (user_rw & PAGE_WRITE)); | ||
287 | + } | ||
288 | + break; | ||
289 | + case 2: | ||
290 | + break; | ||
291 | + } | ||
292 | + } else { | ||
293 | + xn = wxn = 0; | ||
294 | + } | ||
295 | + | ||
296 | + if (xn || (wxn && (prot_rw & PAGE_WRITE))) { | ||
297 | + return prot_rw; | ||
298 | + } | ||
299 | + return prot_rw | PAGE_EXEC; | ||
300 | +} | ||
301 | + | ||
302 | /** | ||
303 | * get_phys_addr_lpae: perform one stage of page table walk, LPAE format | ||
304 | * | ||
305 | -- | 156 | -- |
306 | 2.25.1 | 157 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | When FPCR.AH = 1, some of the cumulative exception flags in the FPSR | ||
2 | behave slightly differently for A64 operations: | ||
3 | * IDC is set when a denormal input is used without flushing | ||
4 | * IXC (Inexact) is set when an output denormal is flushed to zero | ||
1 | 5 | ||
6 | Update vfp_get_fpsr_from_host() to do this. | ||
7 | |||
8 | Note that because half-precision operations never set IDC, we now | ||
9 | need to add float_flag_input_denormal_used to the set we mask out of | ||
10 | fp_status_f16_a64. | ||
11 | |||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | --- | ||
15 | target/arm/vfp_helper.c | 17 ++++++++++++++--- | ||
16 | 1 file changed, 14 insertions(+), 3 deletions(-) | ||
17 | |||
18 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/arm/vfp_helper.c | ||
21 | +++ b/target/arm/vfp_helper.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static void arm_set_ah_fp_behaviours(float_status *s) | ||
23 | #ifdef CONFIG_TCG | ||
24 | |||
25 | /* Convert host exception flags to vfp form. */ | ||
26 | -static inline uint32_t vfp_exceptbits_from_host(int host_bits) | ||
27 | +static inline uint32_t vfp_exceptbits_from_host(int host_bits, bool ah) | ||
28 | { | ||
29 | uint32_t target_bits = 0; | ||
30 | |||
31 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits) | ||
32 | if (host_bits & float_flag_input_denormal_flushed) { | ||
33 | target_bits |= FPSR_IDC; | ||
34 | } | ||
35 | + /* | ||
36 | + * With FPCR.AH, IDC is set when an input denormal is used, | ||
37 | + * and flushing an output denormal to zero sets both IXC and UFC. | ||
38 | + */ | ||
39 | + if (ah && (host_bits & float_flag_input_denormal_used)) { | ||
40 | + target_bits |= FPSR_IDC; | ||
41 | + } | ||
42 | + if (ah && (host_bits & float_flag_output_denormal_flushed)) { | ||
43 | + target_bits |= FPSR_IXC; | ||
44 | + } | ||
45 | return target_bits; | ||
46 | } | ||
47 | |||
48 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
49 | |||
50 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
51 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
52 | - & ~float_flag_input_denormal_flushed); | ||
53 | + & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
54 | /* | ||
55 | * Flushing an input denormal *only* because FPCR.FIZ == 1 does | ||
56 | * not set FPSR.IDC; if FPCR.FZ is also set then this takes | ||
57 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
58 | if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) { | ||
59 | a64_flags &= ~float_flag_input_denormal_flushed; | ||
60 | } | ||
61 | - return vfp_exceptbits_from_host(a32_flags | a64_flags); | ||
62 | + return vfp_exceptbits_from_host(a64_flags, env->vfp.fpcr & FPCR_AH) | | ||
63 | + vfp_exceptbits_from_host(a32_flags, false); | ||
64 | } | ||
65 | |||
66 | static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
67 | -- | ||
68 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | We are going to need to generate different code in some cases when |
---|---|---|---|
2 | FPCR.AH is 1. For example: | ||
3 | * Floating point neg and abs must not flip the sign bit of NaNs | ||
4 | * some insns (FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS, and various | ||
5 | BFCVT and BFM bfloat16 ops) need to use a different float_status | ||
6 | to the usual one | ||
2 | 7 | ||
3 | The bitmap need only hold 15 bits; bitmap is over-complicated. | 8 | Encode FPCR.AH into the A64 tbflags, so we can refer to it at |
4 | We can simplify operations quite a bit with plain logical ops. | 9 | translate time. |
5 | 10 | ||
6 | The introduction of SVE_VQ_POW2_MAP eliminates the need for | 11 | Because we now have a bit in FPCR that affects codegen, we can't mark |
7 | looping in order to search for powers of two. Simply perform | 12 | the AArch64 FPCR register as being SUPPRESS_TB_END any more; writes |
8 | the logical ops and use count leading or trailing zeros as | 13 | to it will now end the TB and trigger a regeneration of hflags. |
9 | required to find the result. | ||
10 | 14 | ||
11 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | Message-id: 20220607203306.657998-12-richard.henderson@linaro.org | ||
14 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
16 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
15 | --- | 17 | --- |
16 | target/arm/cpu.h | 6 +-- | 18 | target/arm/cpu.h | 1 + |
17 | target/arm/internals.h | 5 ++ | 19 | target/arm/tcg/translate.h | 2 ++ |
18 | target/arm/kvm_arm.h | 7 ++- | 20 | target/arm/helper.c | 2 +- |
19 | target/arm/cpu64.c | 117 ++++++++++++++++++++--------------------- | 21 | target/arm/tcg/hflags.c | 4 ++++ |
20 | target/arm/helper.c | 9 +--- | 22 | target/arm/tcg/translate-a64.c | 1 + |
21 | target/arm/kvm64.c | 36 +++---------- | 23 | 5 files changed, 9 insertions(+), 1 deletion(-) |
22 | 6 files changed, 75 insertions(+), 105 deletions(-) | ||
23 | 24 | ||
24 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 25 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
25 | index XXXXXXX..XXXXXXX 100644 | 26 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/target/arm/cpu.h | 27 | --- a/target/arm/cpu.h |
27 | +++ b/target/arm/cpu.h | 28 | +++ b/target/arm/cpu.h |
28 | @@ -XXX,XX +XXX,XX @@ struct ArchCPU { | 29 | @@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, NV2, 34, 1) |
29 | * Bits set in sve_vq_supported represent valid vector lengths for | 30 | FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1) |
30 | * the CPU type. | 31 | /* Set if FEAT_NV2 RAM accesses are big-endian */ |
31 | */ | 32 | FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1) |
32 | - DECLARE_BITMAP(sve_vq_map, ARM_MAX_VQ); | 33 | +FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */ |
33 | - DECLARE_BITMAP(sve_vq_init, ARM_MAX_VQ); | 34 | |
34 | - DECLARE_BITMAP(sve_vq_supported, ARM_MAX_VQ); | 35 | /* |
35 | + uint32_t sve_vq_map; | 36 | * Helpers for using the above. Note that only the A64 accessors use |
36 | + uint32_t sve_vq_init; | 37 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h |
37 | + uint32_t sve_vq_supported; | ||
38 | |||
39 | /* Generic timer counter frequency, in Hz */ | ||
40 | uint64_t gt_cntfrq_hz; | ||
41 | diff --git a/target/arm/internals.h b/target/arm/internals.h | ||
42 | index XXXXXXX..XXXXXXX 100644 | 38 | index XXXXXXX..XXXXXXX 100644 |
43 | --- a/target/arm/internals.h | 39 | --- a/target/arm/tcg/translate.h |
44 | +++ b/target/arm/internals.h | 40 | +++ b/target/arm/tcg/translate.h |
45 | @@ -XXX,XX +XXX,XX @@ bool el_is_in_host(CPUARMState *env, int el); | 41 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { |
46 | 42 | bool nv2_mem_e20; | |
47 | void aa32_max_features(ARMCPU *cpu); | 43 | /* True if NV2 enabled and NV2 RAM accesses are big-endian */ |
48 | 44 | bool nv2_mem_be; | |
49 | +/* Powers of 2 for sve_vq_map et al. */ | 45 | + /* True if FPCR.AH is 1 (alternate floating point handling) */ |
50 | +#define SVE_VQ_POW2_MAP \ | 46 | + bool fpcr_ah; |
51 | + ((1 << (1 - 1)) | (1 << (2 - 1)) | \ | ||
52 | + (1 << (4 - 1)) | (1 << (8 - 1)) | (1 << (16 - 1))) | ||
53 | + | ||
54 | #endif | ||
55 | diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/arm/kvm_arm.h | ||
58 | +++ b/target/arm/kvm_arm.h | ||
59 | @@ -XXX,XX +XXX,XX @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf); | ||
60 | /** | ||
61 | * kvm_arm_sve_get_vls: | ||
62 | * @cs: CPUState | ||
63 | - * @map: bitmap to fill in | ||
64 | * | ||
65 | * Get all the SVE vector lengths supported by the KVM host, setting | ||
66 | * the bits corresponding to their length in quadwords minus one | ||
67 | - * (vq - 1) in @map up to ARM_MAX_VQ. | ||
68 | + * (vq - 1) up to ARM_MAX_VQ. Return the resulting map. | ||
69 | */ | ||
70 | -void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map); | ||
71 | +uint32_t kvm_arm_sve_get_vls(CPUState *cs); | ||
72 | |||
73 | /** | ||
74 | * kvm_arm_set_cpu_features_from_host: | ||
75 | @@ -XXX,XX +XXX,XX @@ static inline void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp) | ||
76 | g_assert_not_reached(); | ||
77 | } | ||
78 | |||
79 | -static inline void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map) | ||
80 | +static inline uint32_t kvm_arm_sve_get_vls(CPUState *cs) | ||
81 | { | ||
82 | g_assert_not_reached(); | ||
83 | } | ||
84 | diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c | ||
85 | index XXXXXXX..XXXXXXX 100644 | ||
86 | --- a/target/arm/cpu64.c | ||
87 | +++ b/target/arm/cpu64.c | ||
88 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) | ||
89 | * any of the above. Finally, if SVE is not disabled, then at least one | ||
90 | * vector length must be enabled. | ||
91 | */ | ||
92 | - DECLARE_BITMAP(tmp, ARM_MAX_VQ); | ||
93 | - uint32_t vq, max_vq = 0; | ||
94 | + uint32_t vq_map = cpu->sve_vq_map; | ||
95 | + uint32_t vq_init = cpu->sve_vq_init; | ||
96 | + uint32_t vq_supported; | ||
97 | + uint32_t vq_mask = 0; | ||
98 | + uint32_t tmp, vq, max_vq = 0; | ||
99 | |||
100 | /* | 47 | /* |
101 | * CPU models specify a set of supported vector lengths which are | 48 | * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI. |
102 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) | 49 | * < 0, set by the current instruction. |
103 | * in the supported bitmap results in an error. When KVM is enabled we | ||
104 | * fetch the supported bitmap from the host. | ||
105 | */ | ||
106 | - if (kvm_enabled() && kvm_arm_sve_supported()) { | ||
107 | - kvm_arm_sve_get_vls(CPU(cpu), cpu->sve_vq_supported); | ||
108 | - } else if (kvm_enabled()) { | ||
109 | - assert(!cpu_isar_feature(aa64_sve, cpu)); | ||
110 | + if (kvm_enabled()) { | ||
111 | + if (kvm_arm_sve_supported()) { | ||
112 | + cpu->sve_vq_supported = kvm_arm_sve_get_vls(CPU(cpu)); | ||
113 | + vq_supported = cpu->sve_vq_supported; | ||
114 | + } else { | ||
115 | + assert(!cpu_isar_feature(aa64_sve, cpu)); | ||
116 | + vq_supported = 0; | ||
117 | + } | ||
118 | + } else { | ||
119 | + vq_supported = cpu->sve_vq_supported; | ||
120 | } | ||
121 | |||
122 | /* | ||
123 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) | ||
124 | * From the properties, sve_vq_map<N> implies sve_vq_init<N>. | ||
125 | * Check first for any sve<N> enabled. | ||
126 | */ | ||
127 | - if (!bitmap_empty(cpu->sve_vq_map, ARM_MAX_VQ)) { | ||
128 | - max_vq = find_last_bit(cpu->sve_vq_map, ARM_MAX_VQ) + 1; | ||
129 | + if (vq_map != 0) { | ||
130 | + max_vq = 32 - clz32(vq_map); | ||
131 | + vq_mask = MAKE_64BIT_MASK(0, max_vq); | ||
132 | |||
133 | if (cpu->sve_max_vq && max_vq > cpu->sve_max_vq) { | ||
134 | error_setg(errp, "cannot enable sve%d", max_vq * 128); | ||
135 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) | ||
136 | * For KVM we have to automatically enable all supported unitialized | ||
137 | * lengths, even when the smaller lengths are not all powers-of-two. | ||
138 | */ | ||
139 | - bitmap_andnot(tmp, cpu->sve_vq_supported, cpu->sve_vq_init, max_vq); | ||
140 | - bitmap_or(cpu->sve_vq_map, cpu->sve_vq_map, tmp, max_vq); | ||
141 | + vq_map |= vq_supported & ~vq_init & vq_mask; | ||
142 | } else { | ||
143 | /* Propagate enabled bits down through required powers-of-two. */ | ||
144 | - for (vq = pow2floor(max_vq); vq >= 1; vq >>= 1) { | ||
145 | - if (!test_bit(vq - 1, cpu->sve_vq_init)) { | ||
146 | - set_bit(vq - 1, cpu->sve_vq_map); | ||
147 | - } | ||
148 | - } | ||
149 | + vq_map |= SVE_VQ_POW2_MAP & ~vq_init & vq_mask; | ||
150 | } | ||
151 | } else if (cpu->sve_max_vq == 0) { | ||
152 | /* | ||
153 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) | ||
154 | |||
155 | if (kvm_enabled()) { | ||
156 | /* Disabling a supported length disables all larger lengths. */ | ||
157 | - for (vq = 1; vq <= ARM_MAX_VQ; ++vq) { | ||
158 | - if (test_bit(vq - 1, cpu->sve_vq_init) && | ||
159 | - test_bit(vq - 1, cpu->sve_vq_supported)) { | ||
160 | - break; | ||
161 | - } | ||
162 | - } | ||
163 | + tmp = vq_init & vq_supported; | ||
164 | } else { | ||
165 | /* Disabling a power-of-two disables all larger lengths. */ | ||
166 | - for (vq = 1; vq <= ARM_MAX_VQ; vq <<= 1) { | ||
167 | - if (test_bit(vq - 1, cpu->sve_vq_init)) { | ||
168 | - break; | ||
169 | - } | ||
170 | - } | ||
171 | + tmp = vq_init & SVE_VQ_POW2_MAP; | ||
172 | } | ||
173 | + vq = ctz32(tmp) + 1; | ||
174 | |||
175 | max_vq = vq <= ARM_MAX_VQ ? vq - 1 : ARM_MAX_VQ; | ||
176 | - bitmap_andnot(cpu->sve_vq_map, cpu->sve_vq_supported, | ||
177 | - cpu->sve_vq_init, max_vq); | ||
178 | - if (max_vq == 0 || bitmap_empty(cpu->sve_vq_map, max_vq)) { | ||
179 | + vq_mask = MAKE_64BIT_MASK(0, max_vq); | ||
180 | + vq_map = vq_supported & ~vq_init & vq_mask; | ||
181 | + | ||
182 | + if (max_vq == 0 || vq_map == 0) { | ||
183 | error_setg(errp, "cannot disable sve%d", vq * 128); | ||
184 | error_append_hint(errp, "Disabling sve%d results in all " | ||
185 | "vector lengths being disabled.\n", | ||
186 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) | ||
187 | return; | ||
188 | } | ||
189 | |||
190 | - max_vq = find_last_bit(cpu->sve_vq_map, max_vq) + 1; | ||
191 | + max_vq = 32 - clz32(vq_map); | ||
192 | + vq_mask = MAKE_64BIT_MASK(0, max_vq); | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) | ||
197 | */ | ||
198 | if (cpu->sve_max_vq != 0) { | ||
199 | max_vq = cpu->sve_max_vq; | ||
200 | + vq_mask = MAKE_64BIT_MASK(0, max_vq); | ||
201 | |||
202 | - if (!test_bit(max_vq - 1, cpu->sve_vq_map) && | ||
203 | - test_bit(max_vq - 1, cpu->sve_vq_init)) { | ||
204 | + if (vq_init & ~vq_map & (1 << (max_vq - 1))) { | ||
205 | error_setg(errp, "cannot disable sve%d", max_vq * 128); | ||
206 | error_append_hint(errp, "The maximum vector length must be " | ||
207 | "enabled, sve-max-vq=%d (%d bits)\n", | ||
208 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) | ||
209 | } | ||
210 | |||
211 | /* Set all bits not explicitly set within sve-max-vq. */ | ||
212 | - bitmap_complement(tmp, cpu->sve_vq_init, max_vq); | ||
213 | - bitmap_or(cpu->sve_vq_map, cpu->sve_vq_map, tmp, max_vq); | ||
214 | + vq_map |= ~vq_init & vq_mask; | ||
215 | } | ||
216 | |||
217 | /* | ||
218 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) | ||
219 | * are clear, just in case anybody looks. | ||
220 | */ | ||
221 | assert(max_vq != 0); | ||
222 | - bitmap_clear(cpu->sve_vq_map, max_vq, ARM_MAX_VQ - max_vq); | ||
223 | + assert(vq_mask != 0); | ||
224 | + vq_map &= vq_mask; | ||
225 | |||
226 | /* Ensure the set of lengths matches what is supported. */ | ||
227 | - bitmap_xor(tmp, cpu->sve_vq_map, cpu->sve_vq_supported, max_vq); | ||
228 | - if (!bitmap_empty(tmp, max_vq)) { | ||
229 | - vq = find_last_bit(tmp, max_vq) + 1; | ||
230 | - if (test_bit(vq - 1, cpu->sve_vq_map)) { | ||
231 | + tmp = vq_map ^ (vq_supported & vq_mask); | ||
232 | + if (tmp) { | ||
233 | + vq = 32 - clz32(tmp); | ||
234 | + if (vq_map & (1 << (vq - 1))) { | ||
235 | if (cpu->sve_max_vq) { | ||
236 | error_setg(errp, "cannot set sve-max-vq=%d", cpu->sve_max_vq); | ||
237 | error_append_hint(errp, "This CPU does not support " | ||
238 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) | ||
239 | return; | ||
240 | } else { | ||
241 | /* Ensure all required powers-of-two are enabled. */ | ||
242 | - for (vq = pow2floor(max_vq); vq >= 1; vq >>= 1) { | ||
243 | - if (!test_bit(vq - 1, cpu->sve_vq_map)) { | ||
244 | - error_setg(errp, "cannot disable sve%d", vq * 128); | ||
245 | - error_append_hint(errp, "sve%d is required as it " | ||
246 | - "is a power-of-two length smaller " | ||
247 | - "than the maximum, sve%d\n", | ||
248 | - vq * 128, max_vq * 128); | ||
249 | - return; | ||
250 | - } | ||
251 | + tmp = SVE_VQ_POW2_MAP & vq_mask & ~vq_map; | ||
252 | + if (tmp) { | ||
253 | + vq = 32 - clz32(tmp); | ||
254 | + error_setg(errp, "cannot disable sve%d", vq * 128); | ||
255 | + error_append_hint(errp, "sve%d is required as it " | ||
256 | + "is a power-of-two length smaller " | ||
257 | + "than the maximum, sve%d\n", | ||
258 | + vq * 128, max_vq * 128); | ||
259 | + return; | ||
260 | } | ||
261 | } | ||
262 | } | ||
263 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) | ||
264 | |||
265 | /* From now on sve_max_vq is the actual maximum supported length. */ | ||
266 | cpu->sve_max_vq = max_vq; | ||
267 | + cpu->sve_vq_map = vq_map; | ||
268 | } | ||
269 | |||
270 | static void cpu_max_get_sve_max_vq(Object *obj, Visitor *v, const char *name, | ||
271 | @@ -XXX,XX +XXX,XX @@ static void cpu_arm_get_sve_vq(Object *obj, Visitor *v, const char *name, | ||
272 | if (!cpu_isar_feature(aa64_sve, cpu)) { | ||
273 | value = false; | ||
274 | } else { | ||
275 | - value = test_bit(vq - 1, cpu->sve_vq_map); | ||
276 | + value = extract32(cpu->sve_vq_map, vq - 1, 1); | ||
277 | } | ||
278 | visit_type_bool(v, name, &value, errp); | ||
279 | } | ||
280 | @@ -XXX,XX +XXX,XX @@ static void cpu_arm_set_sve_vq(Object *obj, Visitor *v, const char *name, | ||
281 | return; | ||
282 | } | ||
283 | |||
284 | - if (value) { | ||
285 | - set_bit(vq - 1, cpu->sve_vq_map); | ||
286 | - } else { | ||
287 | - clear_bit(vq - 1, cpu->sve_vq_map); | ||
288 | - } | ||
289 | - set_bit(vq - 1, cpu->sve_vq_init); | ||
290 | + cpu->sve_vq_map = deposit32(cpu->sve_vq_map, vq - 1, 1, value); | ||
291 | + cpu->sve_vq_init |= 1 << (vq - 1); | ||
292 | } | ||
293 | |||
294 | static bool cpu_arm_get_sve(Object *obj, Error **errp) | ||
295 | @@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj) | ||
296 | cpu->dcz_blocksize = 7; /* 512 bytes */ | ||
297 | #endif | ||
298 | |||
299 | - bitmap_fill(cpu->sve_vq_supported, ARM_MAX_VQ); | ||
300 | + cpu->sve_vq_supported = MAKE_64BIT_MASK(0, ARM_MAX_VQ); | ||
301 | |||
302 | aarch64_add_pauth_properties(obj); | ||
303 | aarch64_add_sve_properties(obj); | ||
304 | @@ -XXX,XX +XXX,XX @@ static void aarch64_a64fx_initfn(Object *obj) | ||
305 | cpu->gic_vprebits = 5; | ||
306 | cpu->gic_pribits = 5; | ||
307 | |||
308 | - /* Suppport of A64FX's vector length are 128,256 and 512bit only */ | ||
309 | + /* The A64FX supports only 128, 256 and 512 bit vector lengths */ | ||
310 | aarch64_add_sve_properties(obj); | ||
311 | - bitmap_zero(cpu->sve_vq_supported, ARM_MAX_VQ); | ||
312 | - set_bit(0, cpu->sve_vq_supported); /* 128bit */ | ||
313 | - set_bit(1, cpu->sve_vq_supported); /* 256bit */ | ||
314 | - set_bit(3, cpu->sve_vq_supported); /* 512bit */ | ||
315 | + cpu->sve_vq_supported = (1 << 0) /* 128bit */ | ||
316 | + | (1 << 1) /* 256bit */ | ||
317 | + | (1 << 3); /* 512bit */ | ||
318 | |||
319 | cpu->isar.reset_pmcr_el0 = 0x46014040; | ||
320 | |||
321 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 50 | diff --git a/target/arm/helper.c b/target/arm/helper.c |
322 | index XXXXXXX..XXXXXXX 100644 | 51 | index XXXXXXX..XXXXXXX 100644 |
323 | --- a/target/arm/helper.c | 52 | --- a/target/arm/helper.c |
324 | +++ b/target/arm/helper.c | 53 | +++ b/target/arm/helper.c |
325 | @@ -XXX,XX +XXX,XX @@ uint32_t sve_zcr_len_for_el(CPUARMState *env, int el) | 54 | @@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo v8_cp_reginfo[] = { |
326 | { | 55 | .writefn = aa64_daif_write, .resetfn = arm_cp_reset_ignore }, |
327 | ARMCPU *cpu = env_archcpu(env); | 56 | { .name = "FPCR", .state = ARM_CP_STATE_AA64, |
328 | uint32_t len = cpu->sve_max_vq - 1; | 57 | .opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 4, |
329 | - uint32_t end_len; | 58 | - .access = PL0_RW, .type = ARM_CP_FPU | ARM_CP_SUPPRESS_TB_END, |
330 | 59 | + .access = PL0_RW, .type = ARM_CP_FPU, | |
331 | if (el <= 1 && !el_is_in_host(env, el)) { | 60 | .readfn = aa64_fpcr_read, .writefn = aa64_fpcr_write }, |
332 | len = MIN(len, 0xf & (uint32_t)env->vfp.zcr_el[1]); | 61 | { .name = "FPSR", .state = ARM_CP_STATE_AA64, |
333 | @@ -XXX,XX +XXX,XX @@ uint32_t sve_zcr_len_for_el(CPUARMState *env, int el) | 62 | .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4, |
334 | len = MIN(len, 0xf & (uint32_t)env->vfp.zcr_el[3]); | 63 | diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c |
64 | index XXXXXXX..XXXXXXX 100644 | ||
65 | --- a/target/arm/tcg/hflags.c | ||
66 | +++ b/target/arm/tcg/hflags.c | ||
67 | @@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, | ||
68 | DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx)); | ||
335 | } | 69 | } |
336 | 70 | ||
337 | - end_len = len; | 71 | + if (env->vfp.fpcr & FPCR_AH) { |
338 | - if (!test_bit(len, cpu->sve_vq_map)) { | 72 | + DP_TBFLAG_A64(flags, AH, 1); |
339 | - end_len = find_last_bit(cpu->sve_vq_map, len); | 73 | + } |
340 | - assert(end_len < len); | 74 | + |
341 | - } | 75 | return rebuild_hflags_common(env, fp_el, mmu_idx, flags); |
342 | - return end_len; | ||
343 | + len = 31 - clz32(cpu->sve_vq_map & MAKE_64BIT_MASK(0, len + 1)); | ||
344 | + return len; | ||
345 | } | 76 | } |
346 | 77 | ||
347 | static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri, | 78 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
348 | diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c | ||
349 | index XXXXXXX..XXXXXXX 100644 | 79 | index XXXXXXX..XXXXXXX 100644 |
350 | --- a/target/arm/kvm64.c | 80 | --- a/target/arm/tcg/translate-a64.c |
351 | +++ b/target/arm/kvm64.c | 81 | +++ b/target/arm/tcg/translate-a64.c |
352 | @@ -XXX,XX +XXX,XX @@ bool kvm_arm_steal_time_supported(void) | 82 | @@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, |
353 | 83 | dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); | |
354 | QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1); | 84 | dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); |
355 | 85 | dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); | |
356 | -void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map) | 86 | + dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); |
357 | +uint32_t kvm_arm_sve_get_vls(CPUState *cs) | 87 | dc->vec_len = 0; |
358 | { | 88 | dc->vec_stride = 0; |
359 | /* Only call this function if kvm_arm_sve_supported() returns true. */ | 89 | dc->cp_regs = arm_cpu->cp_regs; |
360 | static uint64_t vls[KVM_ARM64_SVE_VLS_WORDS]; | ||
361 | static bool probed; | ||
362 | uint32_t vq = 0; | ||
363 | - int i, j; | ||
364 | - | ||
365 | - bitmap_zero(map, ARM_MAX_VQ); | ||
366 | + int i; | ||
367 | |||
368 | /* | ||
369 | * KVM ensures all host CPUs support the same set of vector lengths. | ||
370 | @@ -XXX,XX +XXX,XX @@ void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map) | ||
371 | if (vq > ARM_MAX_VQ) { | ||
372 | warn_report("KVM supports vector lengths larger than " | ||
373 | "QEMU can enable"); | ||
374 | + vls[0] &= MAKE_64BIT_MASK(0, ARM_MAX_VQ); | ||
375 | } | ||
376 | } | ||
377 | |||
378 | - for (i = 0; i < KVM_ARM64_SVE_VLS_WORDS; ++i) { | ||
379 | - if (!vls[i]) { | ||
380 | - continue; | ||
381 | - } | ||
382 | - for (j = 1; j <= 64; ++j) { | ||
383 | - vq = j + i * 64; | ||
384 | - if (vq > ARM_MAX_VQ) { | ||
385 | - return; | ||
386 | - } | ||
387 | - if (vls[i] & (1UL << (j - 1))) { | ||
388 | - set_bit(vq - 1, map); | ||
389 | - } | ||
390 | - } | ||
391 | - } | ||
392 | + return vls[0]; | ||
393 | } | ||
394 | |||
395 | static int kvm_arm_sve_set_vls(CPUState *cs) | ||
396 | { | ||
397 | - uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = {0}; | ||
398 | + ARMCPU *cpu = ARM_CPU(cs); | ||
399 | + uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = { cpu->sve_vq_map }; | ||
400 | struct kvm_one_reg reg = { | ||
401 | .id = KVM_REG_ARM64_SVE_VLS, | ||
402 | .addr = (uint64_t)&vls[0], | ||
403 | }; | ||
404 | - ARMCPU *cpu = ARM_CPU(cs); | ||
405 | - uint32_t vq; | ||
406 | - int i, j; | ||
407 | |||
408 | assert(cpu->sve_max_vq <= KVM_ARM64_SVE_VQ_MAX); | ||
409 | |||
410 | - for (vq = 1; vq <= cpu->sve_max_vq; ++vq) { | ||
411 | - if (test_bit(vq - 1, cpu->sve_vq_map)) { | ||
412 | - i = (vq - 1) / 64; | ||
413 | - j = (vq - 1) % 64; | ||
414 | - vls[i] |= 1UL << j; | ||
415 | - } | ||
416 | - } | ||
417 | - | ||
418 | return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); | ||
419 | } | ||
420 | |||
421 | -- | 90 | -- |
422 | 2.25.1 | 91 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | When FPCR.AH is 1, the behaviour of some instructions changes: |
---|---|---|---|
2 | 2 | * AdvSIMD BFCVT, BFCVTN, BFCVTN2, BFMLALB, BFMLALT | |
3 | This function is used only once, and will need modification | 3 | * SVE BFCVT, BFCVTNT, BFMLALB, BFMLALT, BFMLSLB, BFMLSLT |
4 | for Streaming SVE mode. | 4 | * SME BFCVT, BFCVTN, BFMLAL, BFMLSL (these are all in SME2 which |
5 | 5 | QEMU does not yet implement) | |
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 6 | * FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS |
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 7 | |
8 | Message-id: 20220607203306.657998-11-richard.henderson@linaro.org | 8 | The behaviour change is: |
9 | * the instructions do not update the FPSR cumulative exception flags | ||
10 | * trapped floating point exceptions are disabled (a no-op for QEMU, | ||
11 | which doesn't implement FPCR.{IDE,IXE,UFE,OFE,DZE,IOE}) | ||
12 | * rounding is always round-to-nearest-even regardless of FPCR.RMode | ||
13 | * denormalized inputs and outputs are always flushed to zero, as if | ||
14 | FPCR.{FZ,FIZ} is {1,1} | ||
15 | * FPCR.FZ16 is still honoured for half-precision inputs | ||
16 | |||
17 | (See the Arm ARM DDI0487L.a section A1.5.9.) | ||
18 | |||
19 | We can provide all these behaviours with another pair of float_status fields | ||
20 | which we use only for these insns, when FPCR.AH is 1. These float_status | ||
21 | fields will always have: | ||
22 | * flush_to_zero and flush_inputs_to_zero set for the non-F16 field | ||
23 | * rounding mode set to round-to-nearest-even | ||
24 | and so the only FPCR fields they need to honour are DN and FZ16. | ||
25 | |||
26 | In this commit we only define the new fp_status fields and give them | ||
27 | the required behaviour when FPSR is updated. In subsequent commits | ||
28 | we will arrange to use this new fp_status field for the instructions | ||
29 | that should be affected by FPCR.AH in this way. | ||
30 | |||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 31 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
32 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | --- | 33 | --- |
11 | target/arm/internals.h | 11 ----------- | 34 | target/arm/cpu.h | 15 +++++++++++++++ |
12 | target/arm/helper.c | 30 +++++++++++------------------- | 35 | target/arm/internals.h | 2 ++ |
13 | 2 files changed, 11 insertions(+), 30 deletions(-) | 36 | target/arm/tcg/translate.h | 14 ++++++++++++++ |
14 | 37 | target/arm/cpu.c | 4 ++++ | |
38 | target/arm/vfp_helper.c | 13 ++++++++++++- | ||
39 | 5 files changed, 47 insertions(+), 1 deletion(-) | ||
40 | |||
41 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/arm/cpu.h | ||
44 | +++ b/target/arm/cpu.h | ||
45 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
46 | * standard_fp_status : the ARM "Standard FPSCR Value" | ||
47 | * standard_fp_status_fp16 : used for half-precision | ||
48 | * calculations with the ARM "Standard FPSCR Value" | ||
49 | + * ah_fp_status: used for the A64 insns which change behaviour | ||
50 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
51 | + * and the reciprocal and square root estimate/step insns) | ||
52 | + * ah_fp_status_f16: used for the A64 insns which change behaviour | ||
53 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
54 | + * and the reciprocal and square root estimate/step insns); | ||
55 | + * for half-precision | ||
56 | * | ||
57 | * Half-precision operations are governed by a separate | ||
58 | * flush-to-zero control bit in FPSCR:FZ16. We pass a separate | ||
59 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
60 | * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | ||
61 | * using a fixed value for it. | ||
62 | * | ||
63 | + * The ah_fp_status is needed because some insns have different | ||
64 | + * behaviour when FPCR.AH == 1: they don't update cumulative | ||
65 | + * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
66 | + * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
67 | + * which means we need an ah_fp_status_f16 as well. | ||
68 | + * | ||
69 | * To avoid having to transfer exception bits around, we simply | ||
70 | * say that the FPSCR cumulative exception flags are the logical | ||
71 | * OR of the flags in the four fp statuses. This relies on the | ||
72 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
73 | float_status fp_status_f16_a64; | ||
74 | float_status standard_fp_status; | ||
75 | float_status standard_fp_status_f16; | ||
76 | + float_status ah_fp_status; | ||
77 | + float_status ah_fp_status_f16; | ||
78 | |||
79 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ | ||
80 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ | ||
15 | diff --git a/target/arm/internals.h b/target/arm/internals.h | 81 | diff --git a/target/arm/internals.h b/target/arm/internals.h |
16 | index XXXXXXX..XXXXXXX 100644 | 82 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/arm/internals.h | 83 | --- a/target/arm/internals.h |
18 | +++ b/target/arm/internals.h | 84 | +++ b/target/arm/internals.h |
19 | @@ -XXX,XX +XXX,XX @@ void arm_translate_init(void); | 85 | @@ -XXX,XX +XXX,XX @@ int alle1_tlbmask(CPUARMState *env); |
20 | void arm_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb); | 86 | |
21 | #endif /* CONFIG_TCG */ | 87 | /* Set the float_status behaviour to match the Arm defaults */ |
22 | 88 | void arm_set_default_fp_behaviours(float_status *s); | |
23 | -/** | 89 | +/* Set the float_status behaviour to match Arm FPCR.AH=1 behaviour */ |
24 | - * aarch64_sve_zcr_get_valid_len: | 90 | +void arm_set_ah_fp_behaviours(float_status *s); |
25 | - * @cpu: cpu context | 91 | |
26 | - * @start_len: maximum len to consider | 92 | #endif |
27 | - * | 93 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h |
28 | - * Return the maximum supported sve vector length <= @start_len. | 94 | index XXXXXXX..XXXXXXX 100644 |
29 | - * Note that both @start_len and the return value are in units | 95 | --- a/target/arm/tcg/translate.h |
30 | - * of ZCR_ELx.LEN, so the vector bit length is (x + 1) * 128. | 96 | +++ b/target/arm/tcg/translate.h |
31 | - */ | 97 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { |
32 | -uint32_t aarch64_sve_zcr_get_valid_len(ARMCPU *cpu, uint32_t start_len); | 98 | FPST_A64, |
33 | - | 99 | FPST_A32_F16, |
34 | enum arm_fprounding { | 100 | FPST_A64_F16, |
35 | FPROUNDING_TIEEVEN, | 101 | + FPST_AH, |
36 | FPROUNDING_POSINF, | 102 | + FPST_AH_F16, |
37 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 103 | FPST_STD, |
38 | index XXXXXXX..XXXXXXX 100644 | 104 | FPST_STD_F16, |
39 | --- a/target/arm/helper.c | 105 | } ARMFPStatusFlavour; |
40 | +++ b/target/arm/helper.c | 106 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { |
41 | @@ -XXX,XX +XXX,XX @@ int sve_exception_el(CPUARMState *env, int el) | 107 | * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used |
42 | return 0; | 108 | * FPST_A64_F16 |
109 | * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
110 | + * FPST_AH: | ||
111 | + * for AArch64 operations which change behaviour when AH=1 (specifically, | ||
112 | + * bfloat16 conversions and multiplies, and the reciprocal and square root | ||
113 | + * estimate/step insns) | ||
114 | + * FPST_AH_F16: | ||
115 | + * ditto, but for half-precision operations | ||
116 | * FPST_STD | ||
117 | * for A32/T32 Neon operations using the "standard FPSCR value" | ||
118 | * FPST_STD_F16 | ||
119 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) | ||
120 | case FPST_A64_F16: | ||
121 | offset = offsetof(CPUARMState, vfp.fp_status_f16_a64); | ||
122 | break; | ||
123 | + case FPST_AH: | ||
124 | + offset = offsetof(CPUARMState, vfp.ah_fp_status); | ||
125 | + break; | ||
126 | + case FPST_AH_F16: | ||
127 | + offset = offsetof(CPUARMState, vfp.ah_fp_status_f16); | ||
128 | + break; | ||
129 | case FPST_STD: | ||
130 | offset = offsetof(CPUARMState, vfp.standard_fp_status); | ||
131 | break; | ||
132 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
133 | index XXXXXXX..XXXXXXX 100644 | ||
134 | --- a/target/arm/cpu.c | ||
135 | +++ b/target/arm/cpu.c | ||
136 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
137 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
138 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
139 | arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16); | ||
140 | + arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | ||
141 | + set_flush_to_zero(1, &env->vfp.ah_fp_status); | ||
142 | + set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | ||
143 | + arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status_f16); | ||
144 | |||
145 | #ifndef CONFIG_USER_ONLY | ||
146 | if (kvm_enabled()) { | ||
147 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
148 | index XXXXXXX..XXXXXXX 100644 | ||
149 | --- a/target/arm/vfp_helper.c | ||
150 | +++ b/target/arm/vfp_helper.c | ||
151 | @@ -XXX,XX +XXX,XX @@ void arm_set_default_fp_behaviours(float_status *s) | ||
152 | * set Invalid for a QNaN | ||
153 | * * default NaN has sign bit set, msb frac bit set | ||
154 | */ | ||
155 | -static void arm_set_ah_fp_behaviours(float_status *s) | ||
156 | +void arm_set_ah_fp_behaviours(float_status *s) | ||
157 | { | ||
158 | set_float_detect_tininess(float_tininess_after_rounding, s); | ||
159 | set_float_ftz_detection(float_ftz_after_rounding, s); | ||
160 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
161 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
162 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
163 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
164 | + /* | ||
165 | + * We do not merge in flags from ah_fp_status or ah_fp_status_f16, because | ||
166 | + * they are used for insns that must not set the cumulative exception bits. | ||
167 | + */ | ||
168 | + | ||
169 | /* | ||
170 | * Flushing an input denormal *only* because FPCR.FIZ == 1 does | ||
171 | * not set FPSR.IDC; if FPCR.FZ is also set then this takes | ||
172 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
173 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
174 | set_float_exception_flags(0, &env->vfp.standard_fp_status); | ||
175 | set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); | ||
176 | + set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
177 | + set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
43 | } | 178 | } |
44 | 179 | ||
45 | -uint32_t aarch64_sve_zcr_get_valid_len(ARMCPU *cpu, uint32_t start_len) | 180 | static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) |
46 | -{ | 181 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
47 | - uint32_t end_len; | 182 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
48 | - | 183 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); |
49 | - start_len = MIN(start_len, ARM_MAX_VQ - 1); | 184 | set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); |
50 | - end_len = start_len; | 185 | + set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); |
51 | - | 186 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
52 | - if (!test_bit(start_len, cpu->sve_vq_map)) { | 187 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); |
53 | - end_len = find_last_bit(cpu->sve_vq_map, start_len); | 188 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); |
54 | - assert(end_len < start_len); | 189 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); |
55 | - } | ||
56 | - return end_len; | ||
57 | -} | ||
58 | - | ||
59 | /* | ||
60 | * Given that SVE is enabled, return the vector length for EL. | ||
61 | */ | ||
62 | uint32_t sve_zcr_len_for_el(CPUARMState *env, int el) | ||
63 | { | ||
64 | ARMCPU *cpu = env_archcpu(env); | ||
65 | - uint32_t zcr_len = cpu->sve_max_vq - 1; | ||
66 | + uint32_t len = cpu->sve_max_vq - 1; | ||
67 | + uint32_t end_len; | ||
68 | |||
69 | if (el <= 1 && !el_is_in_host(env, el)) { | ||
70 | - zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]); | ||
71 | + len = MIN(len, 0xf & (uint32_t)env->vfp.zcr_el[1]); | ||
72 | } | 190 | } |
73 | if (el <= 2 && arm_feature(env, ARM_FEATURE_EL2)) { | 191 | if (changed & FPCR_FZ) { |
74 | - zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]); | 192 | bool ftz_enabled = val & FPCR_FZ; |
75 | + len = MIN(len, 0xf & (uint32_t)env->vfp.zcr_el[2]); | 193 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
194 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
195 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
196 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
197 | + set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); | ||
198 | + set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status_f16); | ||
76 | } | 199 | } |
77 | if (arm_feature(env, ARM_FEATURE_EL3)) { | 200 | if (changed & FPCR_AH) { |
78 | - zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]); | 201 | bool ah_enabled = val & FPCR_AH; |
79 | + len = MIN(len, 0xf & (uint32_t)env->vfp.zcr_el[3]); | ||
80 | } | ||
81 | |||
82 | - return aarch64_sve_zcr_get_valid_len(cpu, zcr_len); | ||
83 | + end_len = len; | ||
84 | + if (!test_bit(len, cpu->sve_vq_map)) { | ||
85 | + end_len = find_last_bit(cpu->sve_vq_map, len); | ||
86 | + assert(end_len < len); | ||
87 | + } | ||
88 | + return end_len; | ||
89 | } | ||
90 | |||
91 | static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri, | ||
92 | -- | 202 | -- |
93 | 2.25.1 | 203 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | For the instructions FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS, use |
---|---|---|---|
2 | FPST_FPCR_AH or FPST_FPCR_AH_F16 when FPCR.AH is 1, so that they get | ||
3 | the required behaviour changes. | ||
2 | 4 | ||
3 | Instead of checking these bits in fp_exception_el and | 5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | also in sve_exception_el, document that we must compare | 6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
5 | the results. The only place where we have not already | 7 | --- |
6 | checked that FP EL is zero is in rebuild_hflags_a64. | 8 | target/arm/tcg/translate-a64.h | 13 ++++ |
9 | target/arm/tcg/translate-a64.c | 119 +++++++++++++++++++++++++-------- | ||
10 | target/arm/tcg/translate-sve.c | 30 ++++++--- | ||
11 | 3 files changed, 127 insertions(+), 35 deletions(-) | ||
7 | 12 | ||
8 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 13 | diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h |
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | Message-id: 20220607203306.657998-5-richard.henderson@linaro.org | ||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | --- | ||
13 | target/arm/helper.c | 58 +++++++++++++++------------------------------ | ||
14 | 1 file changed, 19 insertions(+), 39 deletions(-) | ||
15 | |||
16 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/target/arm/helper.c | 15 | --- a/target/arm/tcg/translate-a64.h |
19 | +++ b/target/arm/helper.c | 16 | +++ b/target/arm/tcg/translate-a64.h |
20 | @@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo minimal_ras_reginfo[] = { | 17 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno) |
21 | .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.vsesr_el2) }, | 18 | return ret; |
22 | }; | 19 | } |
23 | 20 | ||
24 | -/* Return the exception level to which exceptions should be taken | ||
25 | - * via SVEAccessTrap. If an exception should be routed through | ||
26 | - * AArch64.AdvSIMDFPAccessTrap, return 0; fp_exception_el should | ||
27 | - * take care of raising that exception. | ||
28 | - * C.f. the ARM pseudocode function CheckSVEEnabled. | ||
29 | +/* | 21 | +/* |
30 | + * Return the exception level to which exceptions should be taken | 22 | + * Return the ARMFPStatusFlavour to use based on element size and |
31 | + * via SVEAccessTrap. This excludes the check for whether the exception | 23 | + * whether FPCR.AH is set. |
32 | + * should be routed through AArch64.AdvSIMDFPAccessTrap. That can easily | 24 | + */ |
33 | + * be found by testing 0 < fp_exception_el < sve_exception_el. | 25 | +static inline ARMFPStatusFlavour select_ah_fpst(DisasContext *s, MemOp esz) |
34 | + * | 26 | +{ |
35 | + * C.f. the ARM pseudocode function CheckSVEEnabled. Note that the | 27 | + if (s->fpcr_ah) { |
36 | + * pseudocode does *not* separate out the FP trap checks, but has them | 28 | + return esz == MO_16 ? FPST_AH_F16 : FPST_AH; |
37 | + * all in one function. | 29 | + } else { |
30 | + return esz == MO_16 ? FPST_A64_F16 : FPST_A64; | ||
31 | + } | ||
32 | +} | ||
33 | + | ||
34 | bool disas_sve(DisasContext *, uint32_t); | ||
35 | bool disas_sme(DisasContext *, uint32_t); | ||
36 | |||
37 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/translate-a64.c | ||
40 | +++ b/target/arm/tcg/translate-a64.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, | ||
42 | * an out-of-line helper. | ||
38 | */ | 43 | */ |
39 | int sve_exception_el(CPUARMState *env, int el) | 44 | static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, |
40 | { | 45 | - int rm, bool is_fp16, int data, |
41 | @@ -XXX,XX +XXX,XX @@ int sve_exception_el(CPUARMState *env, int el) | 46 | + int rm, ARMFPStatusFlavour fpsttype, int data, |
42 | case 2: | 47 | gen_helper_gvec_3_ptr *fn) |
43 | return 1; | 48 | { |
49 | - TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); | ||
50 | + TCGv_ptr fpst = fpstatus_ptr(fpsttype); | ||
51 | tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), | ||
52 | vec_full_reg_offset(s, rn), | ||
53 | vec_full_reg_offset(s, rm), fpst, | ||
54 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar { | ||
55 | void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); | ||
56 | } FPScalar; | ||
57 | |||
58 | -static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
59 | +static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
60 | + const FPScalar *f, | ||
61 | + ARMFPStatusFlavour fpsttype) | ||
62 | { | ||
63 | switch (a->esz) { | ||
64 | case MO_64: | ||
65 | if (fp_access_check(s)) { | ||
66 | TCGv_i64 t0 = read_fp_dreg(s, a->rn); | ||
67 | TCGv_i64 t1 = read_fp_dreg(s, a->rm); | ||
68 | - f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); | ||
69 | + f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
70 | write_fp_dreg(s, a->rd, t0); | ||
44 | } | 71 | } |
45 | - | 72 | break; |
46 | - /* Check CPACR.FPEN. */ | 73 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) |
47 | - switch (FIELD_EX64(env->cp15.cpacr_el1, CPACR_EL1, FPEN)) { | 74 | if (fp_access_check(s)) { |
48 | - case 1: | 75 | TCGv_i32 t0 = read_fp_sreg(s, a->rn); |
49 | - if (el != 0) { | 76 | TCGv_i32 t1 = read_fp_sreg(s, a->rm); |
50 | - break; | 77 | - f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); |
51 | - } | 78 | + f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype)); |
52 | - /* fall through */ | 79 | write_fp_sreg(s, a->rd, t0); |
53 | - case 0: | 80 | } |
54 | - case 2: | 81 | break; |
55 | - return 0; | 82 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) |
56 | - } | 83 | if (fp_access_check(s)) { |
84 | TCGv_i32 t0 = read_fp_hreg(s, a->rn); | ||
85 | TCGv_i32 t1 = read_fp_hreg(s, a->rm); | ||
86 | - f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); | ||
87 | + f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
88 | write_fp_sreg(s, a->rd, t0); | ||
89 | } | ||
90 | break; | ||
91 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
92 | return true; | ||
93 | } | ||
94 | |||
95 | +static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
96 | +{ | ||
97 | + return do_fp3_scalar_with_fpsttype(s, a, f, | ||
98 | + a->esz == MO_16 ? | ||
99 | + FPST_A64_F16 : FPST_A64); | ||
100 | +} | ||
101 | + | ||
102 | +static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
103 | +{ | ||
104 | + return do_fp3_scalar_with_fpsttype(s, a, f, select_ah_fpst(s, a->esz)); | ||
105 | +} | ||
106 | + | ||
107 | static const FPScalar f_scalar_fadd = { | ||
108 | gen_helper_vfp_addh, | ||
109 | gen_helper_vfp_adds, | ||
110 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_frecps = { | ||
111 | gen_helper_recpsf_f32, | ||
112 | gen_helper_recpsf_f64, | ||
113 | }; | ||
114 | -TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps) | ||
115 | +TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps) | ||
116 | |||
117 | static const FPScalar f_scalar_frsqrts = { | ||
118 | gen_helper_rsqrtsf_f16, | ||
119 | gen_helper_rsqrtsf_f32, | ||
120 | gen_helper_rsqrtsf_f64, | ||
121 | }; | ||
122 | -TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts) | ||
123 | +TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts) | ||
124 | |||
125 | static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, | ||
126 | const FPScalar *f, bool swap) | ||
127 | @@ -XXX,XX +XXX,XX @@ TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) | ||
128 | TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) | ||
129 | TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) | ||
130 | |||
131 | -static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | ||
132 | - gen_helper_gvec_3_ptr * const fns[3]) | ||
133 | +static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a, | ||
134 | + int data, | ||
135 | + gen_helper_gvec_3_ptr * const fns[3], | ||
136 | + ARMFPStatusFlavour fpsttype) | ||
137 | { | ||
138 | MemOp esz = a->esz; | ||
139 | int check = fp_access_check_vector_hsd(s, a->q, esz); | ||
140 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | ||
141 | return check == 0; | ||
57 | } | 142 | } |
58 | 143 | ||
59 | /* | 144 | - gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, |
60 | @@ -XXX,XX +XXX,XX @@ int sve_exception_el(CPUARMState *env, int el) | 145 | - esz == MO_16, data, fns[esz - 1]); |
61 | case 2: | 146 | + gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype, |
62 | return 2; | 147 | + data, fns[esz - 1]); |
63 | } | 148 | return true; |
64 | - | 149 | } |
65 | - switch (FIELD_EX32(env->cp15.cptr_el[2], CPTR_EL2, FPEN)) { | 150 | |
66 | - case 1: | 151 | +static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, |
67 | - if (el == 2 || !(hcr_el2 & HCR_TGE)) { | 152 | + gen_helper_gvec_3_ptr * const fns[3]) |
68 | - break; | 153 | +{ |
69 | - } | 154 | + return do_fp3_vector_with_fpsttype(s, a, data, fns, |
70 | - /* fall through */ | 155 | + a->esz == MO_16 ? |
71 | - case 0: | 156 | + FPST_A64_F16 : FPST_A64); |
72 | - case 2: | 157 | +} |
73 | - return 0; | 158 | + |
74 | - } | 159 | +static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data, |
75 | } else if (arm_is_el2_enabled(env)) { | 160 | + gen_helper_gvec_3_ptr * const f[3]) |
76 | if (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, TZ)) { | 161 | +{ |
77 | return 2; | 162 | + return do_fp3_vector_with_fpsttype(s, a, data, f, |
78 | } | 163 | + select_ah_fpst(s, a->esz)); |
79 | - if (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, TFP)) { | 164 | +} |
80 | - return 0; | 165 | + |
81 | - } | 166 | static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { |
82 | } | 167 | gen_helper_gvec_fadd_h, |
168 | gen_helper_gvec_fadd_s, | ||
169 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { | ||
170 | gen_helper_gvec_recps_s, | ||
171 | gen_helper_gvec_recps_d, | ||
172 | }; | ||
173 | -TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps) | ||
174 | +TRANS(FRECPS_v, do_fp3_vector_ah, a, 0, f_vector_frecps) | ||
175 | |||
176 | static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { | ||
177 | gen_helper_gvec_rsqrts_h, | ||
178 | gen_helper_gvec_rsqrts_s, | ||
179 | gen_helper_gvec_rsqrts_d, | ||
180 | }; | ||
181 | -TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts) | ||
182 | +TRANS(FRSQRTS_v, do_fp3_vector_ah, a, 0, f_vector_frsqrts) | ||
183 | |||
184 | static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { | ||
185 | gen_helper_gvec_faddp_h, | ||
186 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, | ||
83 | } | 187 | } |
84 | 188 | ||
85 | @@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, | 189 | gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, |
86 | 190 | - esz == MO_16, a->idx, fns[esz - 1]); | |
87 | if (cpu_isar_feature(aa64_sve, env_archcpu(env))) { | 191 | + esz == MO_16 ? FPST_A64_F16 : FPST_A64, |
88 | int sve_el = sve_exception_el(env, el); | 192 | + a->idx, fns[esz - 1]); |
89 | - uint32_t zcr_len; | 193 | return true; |
90 | 194 | } | |
91 | /* | 195 | |
92 | - * If SVE is disabled, but FP is enabled, | 196 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar1 { |
93 | - * then the effective len is 0. | 197 | void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr); |
94 | + * If either FP or SVE are disabled, translator does not need len. | 198 | } FPScalar1; |
95 | + * If SVE EL > FP EL, FP exception has precedence, and translator | 199 | |
96 | + * does not need SVE EL. Save potential re-translations by forcing | 200 | -static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, |
97 | + * the unneeded data to zero. | 201 | - const FPScalar1 *f, int rmode) |
98 | */ | 202 | +static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a, |
99 | - if (sve_el != 0 && fp_el == 0) { | 203 | + const FPScalar1 *f, int rmode, |
100 | - zcr_len = 0; | 204 | + ARMFPStatusFlavour fpsttype) |
101 | - } else { | 205 | { |
102 | - zcr_len = sve_zcr_len_for_el(env, el); | 206 | TCGv_i32 tcg_rmode = NULL; |
103 | + if (fp_el != 0) { | 207 | TCGv_ptr fpst; |
104 | + if (sve_el > fp_el) { | 208 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, |
105 | + sve_el = 0; | 209 | return check == 0; |
106 | + } | ||
107 | + } else if (sve_el == 0) { | ||
108 | + DP_TBFLAG_A64(flags, VL, sve_zcr_len_for_el(env, el)); | ||
109 | } | ||
110 | DP_TBFLAG_A64(flags, SVEEXC_EL, sve_el); | ||
111 | - DP_TBFLAG_A64(flags, VL, zcr_len); | ||
112 | } | 210 | } |
113 | 211 | ||
114 | sctlr = regime_sctlr(env, stage1); | 212 | - fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); |
213 | + fpst = fpstatus_ptr(fpsttype); | ||
214 | if (rmode >= 0) { | ||
215 | tcg_rmode = gen_set_rmode(rmode, fpst); | ||
216 | } | ||
217 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
218 | return true; | ||
219 | } | ||
220 | |||
221 | +static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
222 | + const FPScalar1 *f, int rmode) | ||
223 | +{ | ||
224 | + return do_fp1_scalar_with_fpsttype(s, a, f, rmode, | ||
225 | + a->esz == MO_16 ? | ||
226 | + FPST_A64_F16 : FPST_A64); | ||
227 | +} | ||
228 | + | ||
229 | +static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a, | ||
230 | + const FPScalar1 *f, int rmode) | ||
231 | +{ | ||
232 | + return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz)); | ||
233 | +} | ||
234 | + | ||
235 | static const FPScalar1 f_scalar_fsqrt = { | ||
236 | gen_helper_vfp_sqrth, | ||
237 | gen_helper_vfp_sqrts, | ||
238 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frecpe = { | ||
239 | gen_helper_recpe_f32, | ||
240 | gen_helper_recpe_f64, | ||
241 | }; | ||
242 | -TRANS(FRECPE_s, do_fp1_scalar, a, &f_scalar_frecpe, -1) | ||
243 | +TRANS(FRECPE_s, do_fp1_scalar_ah, a, &f_scalar_frecpe, -1) | ||
244 | |||
245 | static const FPScalar1 f_scalar_frecpx = { | ||
246 | gen_helper_frecpx_f16, | ||
247 | gen_helper_frecpx_f32, | ||
248 | gen_helper_frecpx_f64, | ||
249 | }; | ||
250 | -TRANS(FRECPX_s, do_fp1_scalar, a, &f_scalar_frecpx, -1) | ||
251 | +TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1) | ||
252 | |||
253 | static const FPScalar1 f_scalar_frsqrte = { | ||
254 | gen_helper_rsqrte_f16, | ||
255 | gen_helper_rsqrte_f32, | ||
256 | gen_helper_rsqrte_f64, | ||
257 | }; | ||
258 | -TRANS(FRSQRTE_s, do_fp1_scalar, a, &f_scalar_frsqrte, -1) | ||
259 | +TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, &f_scalar_frsqrte, -1) | ||
260 | |||
261 | static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) | ||
262 | { | ||
263 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a, | ||
264 | &f_scalar_frint64, FPROUNDING_ZERO) | ||
265 | TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1) | ||
266 | |||
267 | -static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
268 | - int rd, int rn, int data, | ||
269 | - gen_helper_gvec_2_ptr * const fns[3]) | ||
270 | +static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz, | ||
271 | + bool is_q, int rd, int rn, int data, | ||
272 | + gen_helper_gvec_2_ptr * const fns[3], | ||
273 | + ARMFPStatusFlavour fpsttype) | ||
274 | { | ||
275 | int check = fp_access_check_vector_hsd(s, is_q, esz); | ||
276 | TCGv_ptr fpst; | ||
277 | @@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
278 | return check == 0; | ||
279 | } | ||
280 | |||
281 | - fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
282 | + fpst = fpstatus_ptr(fpsttype); | ||
283 | tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), | ||
284 | vec_full_reg_offset(s, rn), fpst, | ||
285 | is_q ? 16 : 8, vec_full_reg_size(s), | ||
286 | @@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
287 | return true; | ||
288 | } | ||
289 | |||
290 | +static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
291 | + int rd, int rn, int data, | ||
292 | + gen_helper_gvec_2_ptr * const fns[3]) | ||
293 | +{ | ||
294 | + return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns, | ||
295 | + esz == MO_16 ? FPST_A64_F16 : | ||
296 | + FPST_A64); | ||
297 | +} | ||
298 | + | ||
299 | +static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
300 | + int rd, int rn, int data, | ||
301 | + gen_helper_gvec_2_ptr * const fns[3]) | ||
302 | +{ | ||
303 | + return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, | ||
304 | + fns, select_ah_fpst(s, esz)); | ||
305 | +} | ||
306 | + | ||
307 | static gen_helper_gvec_2_ptr * const f_scvtf_v[] = { | ||
308 | gen_helper_gvec_vcvt_sh, | ||
309 | gen_helper_gvec_vcvt_sf, | ||
310 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const f_frecpe[] = { | ||
311 | gen_helper_gvec_frecpe_s, | ||
312 | gen_helper_gvec_frecpe_d, | ||
313 | }; | ||
314 | -TRANS(FRECPE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) | ||
315 | +TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) | ||
316 | |||
317 | static gen_helper_gvec_2_ptr * const f_frsqrte[] = { | ||
318 | gen_helper_gvec_frsqrte_h, | ||
319 | gen_helper_gvec_frsqrte_s, | ||
320 | gen_helper_gvec_frsqrte_d, | ||
321 | }; | ||
322 | -TRANS(FRSQRTE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) | ||
323 | +TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) | ||
324 | |||
325 | static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) | ||
326 | { | ||
327 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
328 | index XXXXXXX..XXXXXXX 100644 | ||
329 | --- a/target/arm/tcg/translate-sve.c | ||
330 | +++ b/target/arm/tcg/translate-sve.c | ||
331 | @@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, | ||
332 | return true; | ||
333 | } | ||
334 | |||
335 | -static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, | ||
336 | - arg_rr_esz *a, int data) | ||
337 | +static bool gen_gvec_fpst_ah_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, | ||
338 | + arg_rr_esz *a, int data) | ||
339 | { | ||
340 | return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data, | ||
341 | - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
342 | + select_ah_fpst(s, a->esz)); | ||
343 | } | ||
344 | |||
345 | /* Invoke an out-of-line helper on 3 Zregs. */ | ||
346 | @@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, | ||
347 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
348 | } | ||
349 | |||
350 | +static bool gen_gvec_fpst_ah_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, | ||
351 | + arg_rrr_esz *a, int data) | ||
352 | +{ | ||
353 | + return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, | ||
354 | + select_ah_fpst(s, a->esz)); | ||
355 | +} | ||
356 | + | ||
357 | /* Invoke an out-of-line helper on 4 Zregs. */ | ||
358 | static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, | ||
359 | int rd, int rn, int rm, int ra, int data) | ||
360 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const frecpe_fns[] = { | ||
361 | NULL, gen_helper_gvec_frecpe_h, | ||
362 | gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, | ||
363 | }; | ||
364 | -TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0) | ||
365 | +TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frecpe_fns[a->esz], a, 0) | ||
366 | |||
367 | static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { | ||
368 | NULL, gen_helper_gvec_frsqrte_h, | ||
369 | gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, | ||
370 | }; | ||
371 | -TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0) | ||
372 | +TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frsqrte_fns[a->esz], a, 0) | ||
373 | |||
374 | /* | ||
375 | *** SVE Floating Point Compare with Zero Group | ||
376 | @@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) | ||
377 | }; \ | ||
378 | TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) | ||
379 | |||
380 | +#define DO_FP3_AH(NAME, name) \ | ||
381 | + static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ | ||
382 | + NULL, gen_helper_gvec_##name##_h, \ | ||
383 | + gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ | ||
384 | + }; \ | ||
385 | + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, name##_fns[a->esz], a, 0) | ||
386 | + | ||
387 | DO_FP3(FADD_zzz, fadd) | ||
388 | DO_FP3(FSUB_zzz, fsub) | ||
389 | DO_FP3(FMUL_zzz, fmul) | ||
390 | -DO_FP3(FRECPS, recps) | ||
391 | -DO_FP3(FRSQRTS, rsqrts) | ||
392 | +DO_FP3_AH(FRECPS, recps) | ||
393 | +DO_FP3_AH(FRSQRTS, rsqrts) | ||
394 | |||
395 | #undef DO_FP3 | ||
396 | |||
397 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frecpx_fns[] = { | ||
398 | gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d, | ||
399 | }; | ||
400 | TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz], | ||
401 | - a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
402 | + a, 0, select_ah_fpst(s, a->esz)) | ||
403 | |||
404 | static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { | ||
405 | NULL, gen_helper_sve_fsqrt_h, | ||
115 | -- | 406 | -- |
116 | 2.25.1 | 407 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | When FPCR.AH is 1, use FPST_FPCR_AH for: | ||
2 | * AdvSIMD BFCVT, BFCVTN, BFCVTN2 | ||
3 | * SVE BFCVT, BFCVTNT | ||
1 | 4 | ||
5 | so that they get the required behaviour changes. | ||
6 | |||
7 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | --- | ||
10 | target/arm/tcg/translate-a64.c | 27 +++++++++++++++++++++------ | ||
11 | target/arm/tcg/translate-sve.c | 6 ++++-- | ||
12 | 2 files changed, 25 insertions(+), 8 deletions(-) | ||
13 | |||
14 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/arm/tcg/translate-a64.c | ||
17 | +++ b/target/arm/tcg/translate-a64.c | ||
18 | @@ -XXX,XX +XXX,XX @@ TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) | ||
19 | static const FPScalar1 f_scalar_bfcvt = { | ||
20 | .gen_s = gen_helper_bfcvt, | ||
21 | }; | ||
22 | -TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar, a, &f_scalar_bfcvt, -1) | ||
23 | +TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar_ah, a, &f_scalar_bfcvt, -1) | ||
24 | |||
25 | static const FPScalar1 f_scalar_frint32 = { | ||
26 | NULL, | ||
27 | @@ -XXX,XX +XXX,XX @@ static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n) | ||
28 | tcg_gen_extu_i32_i64(d, tmp); | ||
29 | } | ||
30 | |||
31 | -static ArithOneOp * const f_vector_bfcvtn[] = { | ||
32 | - NULL, | ||
33 | - gen_bfcvtn_hs, | ||
34 | - NULL, | ||
35 | +static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n) | ||
36 | +{ | ||
37 | + TCGv_ptr fpst = fpstatus_ptr(FPST_AH); | ||
38 | + TCGv_i32 tmp = tcg_temp_new_i32(); | ||
39 | + gen_helper_bfcvt_pair(tmp, n, fpst); | ||
40 | + tcg_gen_extu_i32_i64(d, tmp); | ||
41 | +} | ||
42 | + | ||
43 | +static ArithOneOp * const f_vector_bfcvtn[2][3] = { | ||
44 | + { | ||
45 | + NULL, | ||
46 | + gen_bfcvtn_hs, | ||
47 | + NULL, | ||
48 | + }, { | ||
49 | + NULL, | ||
50 | + gen_bfcvtn_ah_hs, | ||
51 | + NULL, | ||
52 | + } | ||
53 | }; | ||
54 | -TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, f_vector_bfcvtn) | ||
55 | +TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, | ||
56 | + f_vector_bfcvtn[s->fpcr_ah]) | ||
57 | |||
58 | static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a) | ||
59 | { | ||
60 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/target/arm/tcg/translate-sve.c | ||
63 | +++ b/target/arm/tcg/translate-sve.c | ||
64 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, | ||
65 | gen_helper_sve_fcvt_hs, a, 0, FPST_A64_F16) | ||
66 | |||
67 | TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, | ||
68 | - gen_helper_sve_bfcvt, a, 0, FPST_A64) | ||
69 | + gen_helper_sve_bfcvt, a, 0, | ||
70 | + s->fpcr_ah ? FPST_AH : FPST_A64) | ||
71 | |||
72 | TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, | ||
73 | gen_helper_sve_fcvt_dh, a, 0, FPST_A64) | ||
74 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, | ||
75 | gen_helper_sve2_fcvtnt_ds, a, 0, FPST_A64) | ||
76 | |||
77 | TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, | ||
78 | - gen_helper_sve_bfcvtnt, a, 0, FPST_A64) | ||
79 | + gen_helper_sve_bfcvtnt, a, 0, | ||
80 | + s->fpcr_ah ? FPST_AH : FPST_A64) | ||
81 | |||
82 | TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, | ||
83 | gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64) | ||
84 | -- | ||
85 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Frederic Konrad <fkonrad@amd.com> | 1 | When FPCR.AH is 1, use FPST_FPCR_AH for: |
---|---|---|---|
2 | * AdvSIMD BFMLALB, BFMLALT | ||
3 | * SVE BFMLALB, BFMLALT, BFMLSLB, BFMLSLT | ||
2 | 4 | ||
3 | The core and the vblend registers size are wrong, they should respectively be | 5 | so that they get the required behaviour changes. |
4 | 0x3B0 and 0x1E0 according to: | ||
5 | https://www.xilinx.com/htmldocs/registers/ug1087/ug1087-zynq-ultrascale-registers.html. | ||
6 | 6 | ||
7 | Let's fix that and use macros when creating the mmio region. | 7 | We do this by making gen_gvec_op4_fpst() take an ARMFPStatusFlavour |
8 | rather than a bool is_fp16; existing callsites now select | ||
9 | FPST_FPCR_F16_A64 vs FPST_FPCR_A64 themselves rather than passing in | ||
10 | the boolean. | ||
8 | 11 | ||
9 | Fixes: 58ac482a66d ("introduce xlnx-dp") | ||
10 | Signed-off-by: Frederic Konrad <fkonrad@amd.com> | ||
11 | Reviewed-by: Edgar E. Iglesias <edgar.iglesias@amd.com> | ||
12 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
13 | Message-id: 20220601172353.3220232-2-fkonrad@xilinx.com | ||
14 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
15 | --- | 14 | --- |
16 | include/hw/display/xlnx_dp.h | 9 +++++++-- | 15 | target/arm/tcg/translate-a64.c | 20 +++++++++++++------- |
17 | hw/display/xlnx_dp.c | 17 ++++++++++------- | 16 | target/arm/tcg/translate-sve.c | 6 ++++-- |
18 | 2 files changed, 17 insertions(+), 9 deletions(-) | 17 | 2 files changed, 17 insertions(+), 9 deletions(-) |
19 | 18 | ||
20 | diff --git a/include/hw/display/xlnx_dp.h b/include/hw/display/xlnx_dp.h | 19 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
21 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/include/hw/display/xlnx_dp.h | 21 | --- a/target/arm/tcg/translate-a64.c |
23 | +++ b/include/hw/display/xlnx_dp.h | 22 | +++ b/target/arm/tcg/translate-a64.c |
24 | @@ -XXX,XX +XXX,XX @@ | 23 | @@ -XXX,XX +XXX,XX @@ static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn, |
25 | #define AUD_CHBUF_MAX_DEPTH (32 * KiB) | 24 | * an out-of-line helper. |
26 | #define MAX_QEMU_BUFFER_SIZE (4 * KiB) | 25 | */ |
27 | 26 | static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, | |
28 | -#define DP_CORE_REG_ARRAY_SIZE (0x3AF >> 2) | 27 | - int rm, int ra, bool is_fp16, int data, |
29 | +#define DP_CORE_REG_OFFSET (0x0000) | 28 | + int rm, int ra, ARMFPStatusFlavour fpsttype, |
30 | +#define DP_CORE_REG_ARRAY_SIZE (0x3B0 >> 2) | 29 | + int data, |
31 | +#define DP_AVBUF_REG_OFFSET (0xB000) | 30 | gen_helper_gvec_4_ptr *fn) |
32 | #define DP_AVBUF_REG_ARRAY_SIZE (0x238 >> 2) | 31 | { |
33 | -#define DP_VBLEND_REG_ARRAY_SIZE (0x1DF >> 2) | 32 | - TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); |
34 | +#define DP_VBLEND_REG_OFFSET (0xA000) | 33 | + TCGv_ptr fpst = fpstatus_ptr(fpsttype); |
35 | +#define DP_VBLEND_REG_ARRAY_SIZE (0x1E0 >> 2) | 34 | tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), |
36 | +#define DP_AUDIO_REG_OFFSET (0xC000) | 35 | vec_full_reg_offset(s, rn), |
37 | #define DP_AUDIO_REG_ARRAY_SIZE (0x50 >> 2) | 36 | vec_full_reg_offset(s, rm), |
38 | +#define DP_CONTAINER_SIZE (0xC050) | 37 | @@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) |
39 | 38 | } | |
40 | struct PixmanPlane { | 39 | if (fp_access_check(s)) { |
41 | pixman_format_code_t format; | 40 | /* Q bit selects BFMLALB vs BFMLALT. */ |
42 | diff --git a/hw/display/xlnx_dp.c b/hw/display/xlnx_dp.c | 41 | - gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q, |
42 | + gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, | ||
43 | + s->fpcr_ah ? FPST_AH : FPST_A64, a->q, | ||
44 | gen_helper_gvec_bfmlal); | ||
45 | } | ||
46 | return true; | ||
47 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) | ||
48 | } | ||
49 | |||
50 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
51 | - a->esz == MO_16, a->rot, fn[a->esz]); | ||
52 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
53 | + a->rot, fn[a->esz]); | ||
54 | return true; | ||
55 | } | ||
56 | |||
57 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) | ||
58 | } | ||
59 | |||
60 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
61 | - esz == MO_16, (a->idx << 1) | neg, | ||
62 | + esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
63 | + (a->idx << 1) | neg, | ||
64 | fns[esz - 1]); | ||
65 | return true; | ||
66 | } | ||
67 | @@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) | ||
68 | } | ||
69 | if (fp_access_check(s)) { | ||
70 | /* Q bit selects BFMLALB vs BFMLALT. */ | ||
71 | - gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0, | ||
72 | + gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, | ||
73 | + s->fpcr_ah ? FPST_AH : FPST_A64, | ||
74 | (a->idx << 1) | a->q, | ||
75 | gen_helper_gvec_bfmlal_idx); | ||
76 | } | ||
77 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) | ||
78 | } | ||
79 | if (fp_access_check(s)) { | ||
80 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
81 | - a->esz == MO_16, (a->idx << 2) | a->rot, fn); | ||
82 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
83 | + (a->idx << 2) | a->rot, fn); | ||
84 | } | ||
85 | return true; | ||
86 | } | ||
87 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | 88 | index XXXXXXX..XXXXXXX 100644 |
44 | --- a/hw/display/xlnx_dp.c | 89 | --- a/target/arm/tcg/translate-sve.c |
45 | +++ b/hw/display/xlnx_dp.c | 90 | +++ b/target/arm/tcg/translate-sve.c |
46 | @@ -XXX,XX +XXX,XX @@ static void xlnx_dp_init(Object *obj) | 91 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_env_arg_zzzz, |
47 | SysBusDevice *sbd = SYS_BUS_DEVICE(obj); | 92 | static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) |
48 | XlnxDPState *s = XLNX_DP(obj); | 93 | { |
49 | 94 | return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, | |
50 | - memory_region_init(&s->container, obj, TYPE_XLNX_DP, 0xC050); | 95 | - a->rd, a->rn, a->rm, a->ra, sel, FPST_A64); |
51 | + memory_region_init(&s->container, obj, TYPE_XLNX_DP, DP_CONTAINER_SIZE); | 96 | + a->rd, a->rn, a->rm, a->ra, sel, |
52 | 97 | + s->fpcr_ah ? FPST_AH : FPST_A64); | |
53 | memory_region_init_io(&s->core_iomem, obj, &dp_ops, s, TYPE_XLNX_DP | 98 | } |
54 | - ".core", 0x3AF); | 99 | |
55 | - memory_region_add_subregion(&s->container, 0x0000, &s->core_iomem); | 100 | TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) |
56 | + ".core", sizeof(s->core_registers)); | 101 | @@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) |
57 | + memory_region_add_subregion(&s->container, DP_CORE_REG_OFFSET, | 102 | { |
58 | + &s->core_iomem); | 103 | return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, |
59 | 104 | a->rd, a->rn, a->rm, a->ra, | |
60 | memory_region_init_io(&s->vblend_iomem, obj, &vblend_ops, s, TYPE_XLNX_DP | 105 | - (a->index << 1) | sel, FPST_A64); |
61 | - ".v_blend", 0x1DF); | 106 | + (a->index << 1) | sel, |
62 | - memory_region_add_subregion(&s->container, 0xA000, &s->vblend_iomem); | 107 | + s->fpcr_ah ? FPST_AH : FPST_A64); |
63 | + ".v_blend", sizeof(s->vblend_registers)); | 108 | } |
64 | + memory_region_add_subregion(&s->container, DP_VBLEND_REG_OFFSET, | 109 | |
65 | + &s->vblend_iomem); | 110 | TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) |
66 | |||
67 | memory_region_init_io(&s->avbufm_iomem, obj, &avbufm_ops, s, TYPE_XLNX_DP | ||
68 | - ".av_buffer_manager", 0x238); | ||
69 | - memory_region_add_subregion(&s->container, 0xB000, &s->avbufm_iomem); | ||
70 | + ".av_buffer_manager", sizeof(s->avbufm_registers)); | ||
71 | + memory_region_add_subregion(&s->container, DP_AVBUF_REG_OFFSET, | ||
72 | + &s->avbufm_iomem); | ||
73 | |||
74 | memory_region_init_io(&s->audio_iomem, obj, &audio_ops, s, TYPE_XLNX_DP | ||
75 | ".audio", sizeof(s->audio_registers)); | ||
76 | -- | 111 | -- |
77 | 2.25.1 | 112 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | For FEAT_AFP, we want to emit different code when FPCR.NEP is set, so | ||
2 | that instead of zeroing the high elements of a vector register when | ||
3 | we write the output of a scalar operation to it, we instead merge in | ||
4 | those elements from one of the source registers. Since this affects | ||
5 | the generated code, we need to put FPCR.NEP into the TBFLAGS. | ||
1 | 6 | ||
7 | FPCR.NEP is treated as 0 when in streaming SVE mode and FEAT_SME_FA64 | ||
8 | is not implemented or not enabled; we can implement this logic in | ||
9 | rebuild_hflags_a64(). | ||
10 | |||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | --- | ||
14 | target/arm/cpu.h | 1 + | ||
15 | target/arm/tcg/translate.h | 2 ++ | ||
16 | target/arm/tcg/hflags.c | 9 +++++++++ | ||
17 | target/arm/tcg/translate-a64.c | 1 + | ||
18 | 4 files changed, 13 insertions(+) | ||
19 | |||
20 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/cpu.h | ||
23 | +++ b/target/arm/cpu.h | ||
24 | @@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1) | ||
25 | /* Set if FEAT_NV2 RAM accesses are big-endian */ | ||
26 | FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1) | ||
27 | FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */ | ||
28 | +FIELD(TBFLAG_A64, NEP, 38, 1) /* FPCR.NEP */ | ||
29 | |||
30 | /* | ||
31 | * Helpers for using the above. Note that only the A64 accessors use | ||
32 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/target/arm/tcg/translate.h | ||
35 | +++ b/target/arm/tcg/translate.h | ||
36 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { | ||
37 | bool nv2_mem_be; | ||
38 | /* True if FPCR.AH is 1 (alternate floating point handling) */ | ||
39 | bool fpcr_ah; | ||
40 | + /* True if FPCR.NEP is 1 (FEAT_AFP scalar upper-element result handling) */ | ||
41 | + bool fpcr_nep; | ||
42 | /* | ||
43 | * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI. | ||
44 | * < 0, set by the current instruction. | ||
45 | diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/target/arm/tcg/hflags.c | ||
48 | +++ b/target/arm/tcg/hflags.c | ||
49 | @@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, | ||
50 | if (env->vfp.fpcr & FPCR_AH) { | ||
51 | DP_TBFLAG_A64(flags, AH, 1); | ||
52 | } | ||
53 | + if (env->vfp.fpcr & FPCR_NEP) { | ||
54 | + /* | ||
55 | + * In streaming-SVE without FA64, NEP behaves as if zero; | ||
56 | + * compare pseudocode IsMerging() | ||
57 | + */ | ||
58 | + if (!(EX_TBFLAG_A64(flags, PSTATE_SM) && !sme_fa64(env, el))) { | ||
59 | + DP_TBFLAG_A64(flags, NEP, 1); | ||
60 | + } | ||
61 | + } | ||
62 | |||
63 | return rebuild_hflags_common(env, fp_el, mmu_idx, flags); | ||
64 | } | ||
65 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
66 | index XXXXXXX..XXXXXXX 100644 | ||
67 | --- a/target/arm/tcg/translate-a64.c | ||
68 | +++ b/target/arm/tcg/translate-a64.c | ||
69 | @@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, | ||
70 | dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); | ||
71 | dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); | ||
72 | dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); | ||
73 | + dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP); | ||
74 | dc->vec_len = 0; | ||
75 | dc->vec_stride = 0; | ||
76 | dc->cp_regs = arm_cpu->cp_regs; | ||
77 | -- | ||
78 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | For FEAT_AFP's FPCR.NEP bit, we need to programmatically change the |
---|---|---|---|
2 | 2 | behaviour of the writeback of the result for most SIMD scalar | |
3 | This function has one private helper, v8m_is_sau_exempt, | 3 | operations, so that instead of zeroing the upper part of the result |
4 | so move that at the same time. | 4 | register it merges the upper elements from one of the input |
5 | 5 | registers. | |
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | |
7 | Message-id: 20220604040607.269301-12-richard.henderson@linaro.org | 7 | Provide new functions write_fp_*reg_merging() which can be used |
8 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 8 | instead of the existing write_fp_*reg() functions when we want this |
9 | "merge the result with one of the input registers if FPCR.NEP is | ||
10 | enabled" handling, and use them in do_fp3_scalar_with_fpsttype(). | ||
11 | |||
12 | Note that (as documented in the description of the FPCR.NEP bit) | ||
13 | which input register to use as the merge source varies by | ||
14 | instruction: for these 2-input scalar operations, the comparison | ||
15 | instructions take from Rm, not Rn. | ||
16 | |||
17 | We'll extend this to also provide the merging behaviour for | ||
18 | the remaining scalar insns in subsequent commits. | ||
19 | |||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
21 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | --- | 22 | --- |
11 | target/arm/helper.c | 123 ------------------------------------------ | 23 | target/arm/tcg/translate-a64.c | 117 +++++++++++++++++++++++++-------- |
12 | target/arm/ptw.c | 126 ++++++++++++++++++++++++++++++++++++++++++++ | 24 | 1 file changed, 91 insertions(+), 26 deletions(-) |
13 | 2 files changed, 126 insertions(+), 123 deletions(-) | 25 | |
14 | 26 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | |
15 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | 27 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/arm/helper.c | 28 | --- a/target/arm/tcg/translate-a64.c |
18 | +++ b/target/arm/helper.c | 29 | +++ b/target/arm/tcg/translate-a64.c |
19 | @@ -XXX,XX +XXX,XX @@ | 30 | @@ -XXX,XX +XXX,XX @@ static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) |
20 | #include "qemu/osdep.h" | 31 | write_fp_dreg(s, reg, tmp); |
21 | #include "qemu/units.h" | ||
22 | #include "qemu/log.h" | ||
23 | -#include "target/arm/idau.h" | ||
24 | #include "trace.h" | ||
25 | #include "cpu.h" | ||
26 | #include "internals.h" | ||
27 | @@ -XXX,XX +XXX,XX @@ bool m_is_system_region(CPUARMState *env, uint32_t address) | ||
28 | return arm_feature(env, ARM_FEATURE_M) && extract32(address, 29, 3) == 0x7; | ||
29 | } | 32 | } |
30 | 33 | ||
31 | -static bool v8m_is_sau_exempt(CPUARMState *env, | 34 | +/* |
32 | - uint32_t address, MMUAccessType access_type) | 35 | + * Write a double result to 128 bit vector register reg, honouring FPCR.NEP: |
33 | -{ | 36 | + * - if FPCR.NEP == 0, clear the high elements of reg |
34 | - /* The architecture specifies that certain address ranges are | 37 | + * - if FPCR.NEP == 1, set the high elements of reg from mergereg |
35 | - * exempt from v8M SAU/IDAU checks. | 38 | + * (i.e. merge the result with those high elements) |
36 | - */ | 39 | + * In either case, SVE register bits above 128 are zeroed (per R_WKYLB). |
37 | - return | 40 | + */ |
38 | - (access_type == MMU_INST_FETCH && m_is_system_region(env, address)) || | 41 | +static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg, |
39 | - (address >= 0xe0000000 && address <= 0xe0002fff) || | 42 | + TCGv_i64 v) |
40 | - (address >= 0xe000e000 && address <= 0xe000efff) || | ||
41 | - (address >= 0xe002e000 && address <= 0xe002efff) || | ||
42 | - (address >= 0xe0040000 && address <= 0xe0041fff) || | ||
43 | - (address >= 0xe00ff000 && address <= 0xe00fffff); | ||
44 | -} | ||
45 | - | ||
46 | -void v8m_security_lookup(CPUARMState *env, uint32_t address, | ||
47 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
48 | - V8M_SAttributes *sattrs) | ||
49 | -{ | ||
50 | - /* Look up the security attributes for this address. Compare the | ||
51 | - * pseudocode SecurityCheck() function. | ||
52 | - * We assume the caller has zero-initialized *sattrs. | ||
53 | - */ | ||
54 | - ARMCPU *cpu = env_archcpu(env); | ||
55 | - int r; | ||
56 | - bool idau_exempt = false, idau_ns = true, idau_nsc = true; | ||
57 | - int idau_region = IREGION_NOTVALID; | ||
58 | - uint32_t addr_page_base = address & TARGET_PAGE_MASK; | ||
59 | - uint32_t addr_page_limit = addr_page_base + (TARGET_PAGE_SIZE - 1); | ||
60 | - | ||
61 | - if (cpu->idau) { | ||
62 | - IDAUInterfaceClass *iic = IDAU_INTERFACE_GET_CLASS(cpu->idau); | ||
63 | - IDAUInterface *ii = IDAU_INTERFACE(cpu->idau); | ||
64 | - | ||
65 | - iic->check(ii, address, &idau_region, &idau_exempt, &idau_ns, | ||
66 | - &idau_nsc); | ||
67 | - } | ||
68 | - | ||
69 | - if (access_type == MMU_INST_FETCH && extract32(address, 28, 4) == 0xf) { | ||
70 | - /* 0xf0000000..0xffffffff is always S for insn fetches */ | ||
71 | - return; | ||
72 | - } | ||
73 | - | ||
74 | - if (idau_exempt || v8m_is_sau_exempt(env, address, access_type)) { | ||
75 | - sattrs->ns = !regime_is_secure(env, mmu_idx); | ||
76 | - return; | ||
77 | - } | ||
78 | - | ||
79 | - if (idau_region != IREGION_NOTVALID) { | ||
80 | - sattrs->irvalid = true; | ||
81 | - sattrs->iregion = idau_region; | ||
82 | - } | ||
83 | - | ||
84 | - switch (env->sau.ctrl & 3) { | ||
85 | - case 0: /* SAU.ENABLE == 0, SAU.ALLNS == 0 */ | ||
86 | - break; | ||
87 | - case 2: /* SAU.ENABLE == 0, SAU.ALLNS == 1 */ | ||
88 | - sattrs->ns = true; | ||
89 | - break; | ||
90 | - default: /* SAU.ENABLE == 1 */ | ||
91 | - for (r = 0; r < cpu->sau_sregion; r++) { | ||
92 | - if (env->sau.rlar[r] & 1) { | ||
93 | - uint32_t base = env->sau.rbar[r] & ~0x1f; | ||
94 | - uint32_t limit = env->sau.rlar[r] | 0x1f; | ||
95 | - | ||
96 | - if (base <= address && limit >= address) { | ||
97 | - if (base > addr_page_base || limit < addr_page_limit) { | ||
98 | - sattrs->subpage = true; | ||
99 | - } | ||
100 | - if (sattrs->srvalid) { | ||
101 | - /* If we hit in more than one region then we must report | ||
102 | - * as Secure, not NS-Callable, with no valid region | ||
103 | - * number info. | ||
104 | - */ | ||
105 | - sattrs->ns = false; | ||
106 | - sattrs->nsc = false; | ||
107 | - sattrs->sregion = 0; | ||
108 | - sattrs->srvalid = false; | ||
109 | - break; | ||
110 | - } else { | ||
111 | - if (env->sau.rlar[r] & 2) { | ||
112 | - sattrs->nsc = true; | ||
113 | - } else { | ||
114 | - sattrs->ns = true; | ||
115 | - } | ||
116 | - sattrs->srvalid = true; | ||
117 | - sattrs->sregion = r; | ||
118 | - } | ||
119 | - } else { | ||
120 | - /* | ||
121 | - * Address not in this region. We must check whether the | ||
122 | - * region covers addresses in the same page as our address. | ||
123 | - * In that case we must not report a size that covers the | ||
124 | - * whole page for a subsequent hit against a different MPU | ||
125 | - * region or the background region, because it would result | ||
126 | - * in incorrect TLB hits for subsequent accesses to | ||
127 | - * addresses that are in this MPU region. | ||
128 | - */ | ||
129 | - if (limit >= base && | ||
130 | - ranges_overlap(base, limit - base + 1, | ||
131 | - addr_page_base, | ||
132 | - TARGET_PAGE_SIZE)) { | ||
133 | - sattrs->subpage = true; | ||
134 | - } | ||
135 | - } | ||
136 | - } | ||
137 | - } | ||
138 | - break; | ||
139 | - } | ||
140 | - | ||
141 | - /* | ||
142 | - * The IDAU will override the SAU lookup results if it specifies | ||
143 | - * higher security than the SAU does. | ||
144 | - */ | ||
145 | - if (!idau_ns) { | ||
146 | - if (sattrs->ns || (!idau_nsc && sattrs->nsc)) { | ||
147 | - sattrs->ns = false; | ||
148 | - sattrs->nsc = idau_nsc; | ||
149 | - } | ||
150 | - } | ||
151 | -} | ||
152 | - | ||
153 | /* Combine either inner or outer cacheability attributes for normal | ||
154 | * memory, according to table D4-42 and pseudocode procedure | ||
155 | * CombineS1S2AttrHints() of ARM DDI 0487B.b (the ARMv8 ARM). | ||
156 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
157 | index XXXXXXX..XXXXXXX 100644 | ||
158 | --- a/target/arm/ptw.c | ||
159 | +++ b/target/arm/ptw.c | ||
160 | @@ -XXX,XX +XXX,XX @@ | ||
161 | #include "qemu/range.h" | ||
162 | #include "cpu.h" | ||
163 | #include "internals.h" | ||
164 | +#include "idau.h" | ||
165 | #include "ptw.h" | ||
166 | |||
167 | |||
168 | @@ -XXX,XX +XXX,XX @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, | ||
169 | return !(*prot & (1 << access_type)); | ||
170 | } | ||
171 | |||
172 | +static bool v8m_is_sau_exempt(CPUARMState *env, | ||
173 | + uint32_t address, MMUAccessType access_type) | ||
174 | +{ | 43 | +{ |
175 | + /* | 44 | + if (!s->fpcr_nep) { |
176 | + * The architecture specifies that certain address ranges are | 45 | + write_fp_dreg(s, reg, v); |
177 | + * exempt from v8M SAU/IDAU checks. | ||
178 | + */ | ||
179 | + return | ||
180 | + (access_type == MMU_INST_FETCH && m_is_system_region(env, address)) || | ||
181 | + (address >= 0xe0000000 && address <= 0xe0002fff) || | ||
182 | + (address >= 0xe000e000 && address <= 0xe000efff) || | ||
183 | + (address >= 0xe002e000 && address <= 0xe002efff) || | ||
184 | + (address >= 0xe0040000 && address <= 0xe0041fff) || | ||
185 | + (address >= 0xe00ff000 && address <= 0xe00fffff); | ||
186 | +} | ||
187 | + | ||
188 | +void v8m_security_lookup(CPUARMState *env, uint32_t address, | ||
189 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
190 | + V8M_SAttributes *sattrs) | ||
191 | +{ | ||
192 | + /* | ||
193 | + * Look up the security attributes for this address. Compare the | ||
194 | + * pseudocode SecurityCheck() function. | ||
195 | + * We assume the caller has zero-initialized *sattrs. | ||
196 | + */ | ||
197 | + ARMCPU *cpu = env_archcpu(env); | ||
198 | + int r; | ||
199 | + bool idau_exempt = false, idau_ns = true, idau_nsc = true; | ||
200 | + int idau_region = IREGION_NOTVALID; | ||
201 | + uint32_t addr_page_base = address & TARGET_PAGE_MASK; | ||
202 | + uint32_t addr_page_limit = addr_page_base + (TARGET_PAGE_SIZE - 1); | ||
203 | + | ||
204 | + if (cpu->idau) { | ||
205 | + IDAUInterfaceClass *iic = IDAU_INTERFACE_GET_CLASS(cpu->idau); | ||
206 | + IDAUInterface *ii = IDAU_INTERFACE(cpu->idau); | ||
207 | + | ||
208 | + iic->check(ii, address, &idau_region, &idau_exempt, &idau_ns, | ||
209 | + &idau_nsc); | ||
210 | + } | ||
211 | + | ||
212 | + if (access_type == MMU_INST_FETCH && extract32(address, 28, 4) == 0xf) { | ||
213 | + /* 0xf0000000..0xffffffff is always S for insn fetches */ | ||
214 | + return; | 46 | + return; |
215 | + } | 47 | + } |
216 | + | 48 | + |
217 | + if (idau_exempt || v8m_is_sau_exempt(env, address, access_type)) { | 49 | + /* |
218 | + sattrs->ns = !regime_is_secure(env, mmu_idx); | 50 | + * Move from mergereg to reg; this sets the high elements and |
51 | + * clears the bits above 128 as a side effect. | ||
52 | + */ | ||
53 | + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), | ||
54 | + vec_full_reg_offset(s, mergereg), | ||
55 | + 16, vec_full_reg_size(s)); | ||
56 | + tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg)); | ||
57 | +} | ||
58 | + | ||
59 | +/* | ||
60 | + * Write a single-prec result, but only clear the higher elements | ||
61 | + * of the destination register if FPCR.NEP is 0; otherwise preserve them. | ||
62 | + */ | ||
63 | +static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg, | ||
64 | + TCGv_i32 v) | ||
65 | +{ | ||
66 | + if (!s->fpcr_nep) { | ||
67 | + write_fp_sreg(s, reg, v); | ||
219 | + return; | 68 | + return; |
220 | + } | 69 | + } |
221 | + | 70 | + |
222 | + if (idau_region != IREGION_NOTVALID) { | 71 | + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), |
223 | + sattrs->irvalid = true; | 72 | + vec_full_reg_offset(s, mergereg), |
224 | + sattrs->iregion = idau_region; | 73 | + 16, vec_full_reg_size(s)); |
74 | + tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); | ||
75 | +} | ||
76 | + | ||
77 | +/* | ||
78 | + * Write a half-prec result, but only clear the higher elements | ||
79 | + * of the destination register if FPCR.NEP is 0; otherwise preserve them. | ||
80 | + * The caller must ensure that the top 16 bits of v are zero. | ||
81 | + */ | ||
82 | +static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg, | ||
83 | + TCGv_i32 v) | ||
84 | +{ | ||
85 | + if (!s->fpcr_nep) { | ||
86 | + write_fp_sreg(s, reg, v); | ||
87 | + return; | ||
225 | + } | 88 | + } |
226 | + | 89 | + |
227 | + switch (env->sau.ctrl & 3) { | 90 | + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), |
228 | + case 0: /* SAU.ENABLE == 0, SAU.ALLNS == 0 */ | 91 | + vec_full_reg_offset(s, mergereg), |
229 | + break; | 92 | + 16, vec_full_reg_size(s)); |
230 | + case 2: /* SAU.ENABLE == 0, SAU.ALLNS == 1 */ | 93 | + tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); |
231 | + sattrs->ns = true; | ||
232 | + break; | ||
233 | + default: /* SAU.ENABLE == 1 */ | ||
234 | + for (r = 0; r < cpu->sau_sregion; r++) { | ||
235 | + if (env->sau.rlar[r] & 1) { | ||
236 | + uint32_t base = env->sau.rbar[r] & ~0x1f; | ||
237 | + uint32_t limit = env->sau.rlar[r] | 0x1f; | ||
238 | + | ||
239 | + if (base <= address && limit >= address) { | ||
240 | + if (base > addr_page_base || limit < addr_page_limit) { | ||
241 | + sattrs->subpage = true; | ||
242 | + } | ||
243 | + if (sattrs->srvalid) { | ||
244 | + /* | ||
245 | + * If we hit in more than one region then we must report | ||
246 | + * as Secure, not NS-Callable, with no valid region | ||
247 | + * number info. | ||
248 | + */ | ||
249 | + sattrs->ns = false; | ||
250 | + sattrs->nsc = false; | ||
251 | + sattrs->sregion = 0; | ||
252 | + sattrs->srvalid = false; | ||
253 | + break; | ||
254 | + } else { | ||
255 | + if (env->sau.rlar[r] & 2) { | ||
256 | + sattrs->nsc = true; | ||
257 | + } else { | ||
258 | + sattrs->ns = true; | ||
259 | + } | ||
260 | + sattrs->srvalid = true; | ||
261 | + sattrs->sregion = r; | ||
262 | + } | ||
263 | + } else { | ||
264 | + /* | ||
265 | + * Address not in this region. We must check whether the | ||
266 | + * region covers addresses in the same page as our address. | ||
267 | + * In that case we must not report a size that covers the | ||
268 | + * whole page for a subsequent hit against a different MPU | ||
269 | + * region or the background region, because it would result | ||
270 | + * in incorrect TLB hits for subsequent accesses to | ||
271 | + * addresses that are in this MPU region. | ||
272 | + */ | ||
273 | + if (limit >= base && | ||
274 | + ranges_overlap(base, limit - base + 1, | ||
275 | + addr_page_base, | ||
276 | + TARGET_PAGE_SIZE)) { | ||
277 | + sattrs->subpage = true; | ||
278 | + } | ||
279 | + } | ||
280 | + } | ||
281 | + } | ||
282 | + break; | ||
283 | + } | ||
284 | + | ||
285 | + /* | ||
286 | + * The IDAU will override the SAU lookup results if it specifies | ||
287 | + * higher security than the SAU does. | ||
288 | + */ | ||
289 | + if (!idau_ns) { | ||
290 | + if (sattrs->ns || (!idau_nsc && sattrs->nsc)) { | ||
291 | + sattrs->ns = false; | ||
292 | + sattrs->nsc = idau_nsc; | ||
293 | + } | ||
294 | + } | ||
295 | +} | 94 | +} |
296 | + | 95 | + |
297 | static bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address, | 96 | /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ |
298 | MMUAccessType access_type, ARMMMUIdx mmu_idx, | 97 | static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, |
299 | hwaddr *phys_ptr, MemTxAttrs *txattrs, | 98 | GVecGen2Fn *gvec_fn, int vece) |
99 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar { | ||
100 | } FPScalar; | ||
101 | |||
102 | static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
103 | - const FPScalar *f, | ||
104 | + const FPScalar *f, int mergereg, | ||
105 | ARMFPStatusFlavour fpsttype) | ||
106 | { | ||
107 | switch (a->esz) { | ||
108 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
109 | TCGv_i64 t0 = read_fp_dreg(s, a->rn); | ||
110 | TCGv_i64 t1 = read_fp_dreg(s, a->rm); | ||
111 | f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
112 | - write_fp_dreg(s, a->rd, t0); | ||
113 | + write_fp_dreg_merging(s, a->rd, mergereg, t0); | ||
114 | } | ||
115 | break; | ||
116 | case MO_32: | ||
117 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
118 | TCGv_i32 t0 = read_fp_sreg(s, a->rn); | ||
119 | TCGv_i32 t1 = read_fp_sreg(s, a->rm); | ||
120 | f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
121 | - write_fp_sreg(s, a->rd, t0); | ||
122 | + write_fp_sreg_merging(s, a->rd, mergereg, t0); | ||
123 | } | ||
124 | break; | ||
125 | case MO_16: | ||
126 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
127 | TCGv_i32 t0 = read_fp_hreg(s, a->rn); | ||
128 | TCGv_i32 t1 = read_fp_hreg(s, a->rm); | ||
129 | f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
130 | - write_fp_sreg(s, a->rd, t0); | ||
131 | + write_fp_hreg_merging(s, a->rd, mergereg, t0); | ||
132 | } | ||
133 | break; | ||
134 | default: | ||
135 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
136 | return true; | ||
137 | } | ||
138 | |||
139 | -static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
140 | +static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
141 | + int mergereg) | ||
142 | { | ||
143 | - return do_fp3_scalar_with_fpsttype(s, a, f, | ||
144 | + return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, | ||
145 | a->esz == MO_16 ? | ||
146 | FPST_A64_F16 : FPST_A64); | ||
147 | } | ||
148 | |||
149 | -static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
150 | +static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
151 | + int mergereg) | ||
152 | { | ||
153 | - return do_fp3_scalar_with_fpsttype(s, a, f, select_ah_fpst(s, a->esz)); | ||
154 | + return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, | ||
155 | + select_ah_fpst(s, a->esz)); | ||
156 | } | ||
157 | |||
158 | static const FPScalar f_scalar_fadd = { | ||
159 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fadd = { | ||
160 | gen_helper_vfp_adds, | ||
161 | gen_helper_vfp_addd, | ||
162 | }; | ||
163 | -TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd) | ||
164 | +TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn) | ||
165 | |||
166 | static const FPScalar f_scalar_fsub = { | ||
167 | gen_helper_vfp_subh, | ||
168 | gen_helper_vfp_subs, | ||
169 | gen_helper_vfp_subd, | ||
170 | }; | ||
171 | -TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub) | ||
172 | +TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn) | ||
173 | |||
174 | static const FPScalar f_scalar_fdiv = { | ||
175 | gen_helper_vfp_divh, | ||
176 | gen_helper_vfp_divs, | ||
177 | gen_helper_vfp_divd, | ||
178 | }; | ||
179 | -TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv) | ||
180 | +TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn) | ||
181 | |||
182 | static const FPScalar f_scalar_fmul = { | ||
183 | gen_helper_vfp_mulh, | ||
184 | gen_helper_vfp_muls, | ||
185 | gen_helper_vfp_muld, | ||
186 | }; | ||
187 | -TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul) | ||
188 | +TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn) | ||
189 | |||
190 | static const FPScalar f_scalar_fmax = { | ||
191 | gen_helper_vfp_maxh, | ||
192 | gen_helper_vfp_maxs, | ||
193 | gen_helper_vfp_maxd, | ||
194 | }; | ||
195 | -TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax) | ||
196 | +TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax, a->rn) | ||
197 | |||
198 | static const FPScalar f_scalar_fmin = { | ||
199 | gen_helper_vfp_minh, | ||
200 | gen_helper_vfp_mins, | ||
201 | gen_helper_vfp_mind, | ||
202 | }; | ||
203 | -TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin) | ||
204 | +TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin, a->rn) | ||
205 | |||
206 | static const FPScalar f_scalar_fmaxnm = { | ||
207 | gen_helper_vfp_maxnumh, | ||
208 | gen_helper_vfp_maxnums, | ||
209 | gen_helper_vfp_maxnumd, | ||
210 | }; | ||
211 | -TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm) | ||
212 | +TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn) | ||
213 | |||
214 | static const FPScalar f_scalar_fminnm = { | ||
215 | gen_helper_vfp_minnumh, | ||
216 | gen_helper_vfp_minnums, | ||
217 | gen_helper_vfp_minnumd, | ||
218 | }; | ||
219 | -TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm) | ||
220 | +TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn) | ||
221 | |||
222 | static const FPScalar f_scalar_fmulx = { | ||
223 | gen_helper_advsimd_mulxh, | ||
224 | gen_helper_vfp_mulxs, | ||
225 | gen_helper_vfp_mulxd, | ||
226 | }; | ||
227 | -TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx) | ||
228 | +TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn) | ||
229 | |||
230 | static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
231 | { | ||
232 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fnmul = { | ||
233 | gen_fnmul_s, | ||
234 | gen_fnmul_d, | ||
235 | }; | ||
236 | -TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul) | ||
237 | +TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul, a->rn) | ||
238 | |||
239 | static const FPScalar f_scalar_fcmeq = { | ||
240 | gen_helper_advsimd_ceq_f16, | ||
241 | gen_helper_neon_ceq_f32, | ||
242 | gen_helper_neon_ceq_f64, | ||
243 | }; | ||
244 | -TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq) | ||
245 | +TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm) | ||
246 | |||
247 | static const FPScalar f_scalar_fcmge = { | ||
248 | gen_helper_advsimd_cge_f16, | ||
249 | gen_helper_neon_cge_f32, | ||
250 | gen_helper_neon_cge_f64, | ||
251 | }; | ||
252 | -TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge) | ||
253 | +TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm) | ||
254 | |||
255 | static const FPScalar f_scalar_fcmgt = { | ||
256 | gen_helper_advsimd_cgt_f16, | ||
257 | gen_helper_neon_cgt_f32, | ||
258 | gen_helper_neon_cgt_f64, | ||
259 | }; | ||
260 | -TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt) | ||
261 | +TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm) | ||
262 | |||
263 | static const FPScalar f_scalar_facge = { | ||
264 | gen_helper_advsimd_acge_f16, | ||
265 | gen_helper_neon_acge_f32, | ||
266 | gen_helper_neon_acge_f64, | ||
267 | }; | ||
268 | -TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge) | ||
269 | +TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm) | ||
270 | |||
271 | static const FPScalar f_scalar_facgt = { | ||
272 | gen_helper_advsimd_acgt_f16, | ||
273 | gen_helper_neon_acgt_f32, | ||
274 | gen_helper_neon_acgt_f64, | ||
275 | }; | ||
276 | -TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt) | ||
277 | +TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm) | ||
278 | |||
279 | static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
280 | { | ||
281 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fabd = { | ||
282 | gen_fabd_s, | ||
283 | gen_fabd_d, | ||
284 | }; | ||
285 | -TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd) | ||
286 | +TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd, a->rn) | ||
287 | |||
288 | static const FPScalar f_scalar_frecps = { | ||
289 | gen_helper_recpsf_f16, | ||
290 | gen_helper_recpsf_f32, | ||
291 | gen_helper_recpsf_f64, | ||
292 | }; | ||
293 | -TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps) | ||
294 | +TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps, a->rn) | ||
295 | |||
296 | static const FPScalar f_scalar_frsqrts = { | ||
297 | gen_helper_rsqrtsf_f16, | ||
298 | gen_helper_rsqrtsf_f32, | ||
299 | gen_helper_rsqrtsf_f64, | ||
300 | }; | ||
301 | -TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts) | ||
302 | +TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts, a->rn) | ||
303 | |||
304 | static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, | ||
305 | const FPScalar *f, bool swap) | ||
300 | -- | 306 | -- |
301 | 2.25.1 | 307 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle FPCR.NEP for the 3-input scalar operations which use | ||
2 | do_fmla_scalar_idx() and do_fmadd(), by making them call the | ||
3 | appropriate write_fp_*reg_merging() functions. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/translate-a64.c | 12 ++++++------ | ||
9 | 1 file changed, 6 insertions(+), 6 deletions(-) | ||
10 | |||
11 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/target/arm/tcg/translate-a64.c | ||
14 | +++ b/target/arm/tcg/translate-a64.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
16 | gen_vfp_negd(t1, t1); | ||
17 | } | ||
18 | gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | ||
19 | - write_fp_dreg(s, a->rd, t0); | ||
20 | + write_fp_dreg_merging(s, a->rd, a->rd, t0); | ||
21 | } | ||
22 | break; | ||
23 | case MO_32: | ||
24 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
25 | gen_vfp_negs(t1, t1); | ||
26 | } | ||
27 | gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | ||
28 | - write_fp_sreg(s, a->rd, t0); | ||
29 | + write_fp_sreg_merging(s, a->rd, a->rd, t0); | ||
30 | } | ||
31 | break; | ||
32 | case MO_16: | ||
33 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
34 | } | ||
35 | gen_helper_advsimd_muladdh(t0, t1, t2, t0, | ||
36 | fpstatus_ptr(FPST_A64_F16)); | ||
37 | - write_fp_sreg(s, a->rd, t0); | ||
38 | + write_fp_hreg_merging(s, a->rd, a->rd, t0); | ||
39 | } | ||
40 | break; | ||
41 | default: | ||
42 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
43 | } | ||
44 | fpst = fpstatus_ptr(FPST_A64); | ||
45 | gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); | ||
46 | - write_fp_dreg(s, a->rd, ta); | ||
47 | + write_fp_dreg_merging(s, a->rd, a->ra, ta); | ||
48 | } | ||
49 | break; | ||
50 | |||
51 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
52 | } | ||
53 | fpst = fpstatus_ptr(FPST_A64); | ||
54 | gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); | ||
55 | - write_fp_sreg(s, a->rd, ta); | ||
56 | + write_fp_sreg_merging(s, a->rd, a->ra, ta); | ||
57 | } | ||
58 | break; | ||
59 | |||
60 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
61 | } | ||
62 | fpst = fpstatus_ptr(FPST_A64_F16); | ||
63 | gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); | ||
64 | - write_fp_sreg(s, a->rd, ta); | ||
65 | + write_fp_hreg_merging(s, a->rd, a->ra, ta); | ||
66 | } | ||
67 | break; | ||
68 | |||
69 | -- | ||
70 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Currently we implement BFCVT scalar via do_fp1_scalar(). This works |
---|---|---|---|
2 | even though BFCVT is a narrowing operation from 32 to 16 bits, | ||
3 | because we can use write_fp_sreg() for float16. However, FPCR.NEP | ||
4 | support requires that we use write_fp_hreg_merging() for float16 | ||
5 | outputs, so we can't continue to borrow the non-narrowing | ||
6 | do_fp1_scalar() function for this. Split out trans_BFCVT_s() | ||
7 | into its own implementation that honours FPCR.NEP. | ||
2 | 8 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | Message-id: 20220604040607.269301-21-richard.henderson@linaro.org | ||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | 11 | --- |
8 | target/arm/ptw.h | 2 -- | 12 | target/arm/tcg/translate-a64.c | 25 +++++++++++++++++++++---- |
9 | target/arm/helper.c | 70 --------------------------------------------- | 13 | 1 file changed, 21 insertions(+), 4 deletions(-) |
10 | target/arm/ptw.c | 70 +++++++++++++++++++++++++++++++++++++++++++++ | ||
11 | 3 files changed, 70 insertions(+), 72 deletions(-) | ||
12 | 14 | ||
13 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | 15 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
14 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/arm/ptw.h | 17 | --- a/target/arm/tcg/translate-a64.c |
16 | +++ b/target/arm/ptw.h | 18 | +++ b/target/arm/tcg/translate-a64.c |
17 | @@ -XXX,XX +XXX,XX @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) | 19 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frintx = { |
18 | 20 | }; | |
19 | ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va, | 21 | TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) |
20 | ARMMMUIdx mmu_idx); | 22 | |
21 | -bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level, | 23 | -static const FPScalar1 f_scalar_bfcvt = { |
22 | - int inputsize, int stride, int outputsize); | 24 | - .gen_s = gen_helper_bfcvt, |
23 | 25 | -}; | |
24 | #endif /* !CONFIG_USER_ONLY */ | 26 | -TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar_ah, a, &f_scalar_bfcvt, -1) |
25 | #endif /* TARGET_ARM_PTW_H */ | 27 | +static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a) |
26 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/target/arm/helper.c | ||
29 | +++ b/target/arm/helper.c | ||
30 | @@ -XXX,XX +XXX,XX @@ int simple_ap_to_rw_prot_is_user(int ap, bool is_user) | ||
31 | g_assert_not_reached(); | ||
32 | } | ||
33 | } | ||
34 | - | ||
35 | -/* | ||
36 | - * check_s2_mmu_setup | ||
37 | - * @cpu: ARMCPU | ||
38 | - * @is_aa64: True if the translation regime is in AArch64 state | ||
39 | - * @startlevel: Suggested starting level | ||
40 | - * @inputsize: Bitsize of IPAs | ||
41 | - * @stride: Page-table stride (See the ARM ARM) | ||
42 | - * | ||
43 | - * Returns true if the suggested S2 translation parameters are OK and | ||
44 | - * false otherwise. | ||
45 | - */ | ||
46 | -bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level, | ||
47 | - int inputsize, int stride, int outputsize) | ||
48 | -{ | ||
49 | - const int grainsize = stride + 3; | ||
50 | - int startsizecheck; | ||
51 | - | ||
52 | - /* | ||
53 | - * Negative levels are usually not allowed... | ||
54 | - * Except for FEAT_LPA2, 4k page table, 52-bit address space, which | ||
55 | - * begins with level -1. Note that previous feature tests will have | ||
56 | - * eliminated this combination if it is not enabled. | ||
57 | - */ | ||
58 | - if (level < (inputsize == 52 && stride == 9 ? -1 : 0)) { | ||
59 | - return false; | ||
60 | - } | ||
61 | - | ||
62 | - startsizecheck = inputsize - ((3 - level) * stride + grainsize); | ||
63 | - if (startsizecheck < 1 || startsizecheck > stride + 4) { | ||
64 | - return false; | ||
65 | - } | ||
66 | - | ||
67 | - if (is_aa64) { | ||
68 | - switch (stride) { | ||
69 | - case 13: /* 64KB Pages. */ | ||
70 | - if (level == 0 || (level == 1 && outputsize <= 42)) { | ||
71 | - return false; | ||
72 | - } | ||
73 | - break; | ||
74 | - case 11: /* 16KB Pages. */ | ||
75 | - if (level == 0 || (level == 1 && outputsize <= 40)) { | ||
76 | - return false; | ||
77 | - } | ||
78 | - break; | ||
79 | - case 9: /* 4KB Pages. */ | ||
80 | - if (level == 0 && outputsize <= 42) { | ||
81 | - return false; | ||
82 | - } | ||
83 | - break; | ||
84 | - default: | ||
85 | - g_assert_not_reached(); | ||
86 | - } | ||
87 | - | ||
88 | - /* Inputsize checks. */ | ||
89 | - if (inputsize > outputsize && | ||
90 | - (arm_el_is_aa64(&cpu->env, 1) || inputsize > 40)) { | ||
91 | - /* This is CONSTRAINED UNPREDICTABLE and we choose to fault. */ | ||
92 | - return false; | ||
93 | - } | ||
94 | - } else { | ||
95 | - /* AArch32 only supports 4KB pages. Assert on that. */ | ||
96 | - assert(stride == 9); | ||
97 | - | ||
98 | - if (level == 0) { | ||
99 | - return false; | ||
100 | - } | ||
101 | - } | ||
102 | - return true; | ||
103 | -} | ||
104 | #endif /* !CONFIG_USER_ONLY */ | ||
105 | |||
106 | int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx) | ||
107 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
108 | index XXXXXXX..XXXXXXX 100644 | ||
109 | --- a/target/arm/ptw.c | ||
110 | +++ b/target/arm/ptw.c | ||
111 | @@ -XXX,XX +XXX,XX @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, | ||
112 | return prot_rw | PAGE_EXEC; | ||
113 | } | ||
114 | |||
115 | +/* | ||
116 | + * check_s2_mmu_setup | ||
117 | + * @cpu: ARMCPU | ||
118 | + * @is_aa64: True if the translation regime is in AArch64 state | ||
119 | + * @startlevel: Suggested starting level | ||
120 | + * @inputsize: Bitsize of IPAs | ||
121 | + * @stride: Page-table stride (See the ARM ARM) | ||
122 | + * | ||
123 | + * Returns true if the suggested S2 translation parameters are OK and | ||
124 | + * false otherwise. | ||
125 | + */ | ||
126 | +static bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level, | ||
127 | + int inputsize, int stride, int outputsize) | ||
128 | +{ | 28 | +{ |
129 | + const int grainsize = stride + 3; | 29 | + ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64; |
130 | + int startsizecheck; | 30 | + TCGv_i32 t32; |
31 | + int check; | ||
131 | + | 32 | + |
132 | + /* | 33 | + if (!dc_isar_feature(aa64_bf16, s)) { |
133 | + * Negative levels are usually not allowed... | ||
134 | + * Except for FEAT_LPA2, 4k page table, 52-bit address space, which | ||
135 | + * begins with level -1. Note that previous feature tests will have | ||
136 | + * eliminated this combination if it is not enabled. | ||
137 | + */ | ||
138 | + if (level < (inputsize == 52 && stride == 9 ? -1 : 0)) { | ||
139 | + return false; | 34 | + return false; |
140 | + } | 35 | + } |
141 | + | 36 | + |
142 | + startsizecheck = inputsize - ((3 - level) * stride + grainsize); | 37 | + check = fp_access_check_scalar_hsd(s, a->esz); |
143 | + if (startsizecheck < 1 || startsizecheck > stride + 4) { | 38 | + |
144 | + return false; | 39 | + if (check <= 0) { |
40 | + return check == 0; | ||
145 | + } | 41 | + } |
146 | + | 42 | + |
147 | + if (is_aa64) { | 43 | + t32 = read_fp_sreg(s, a->rn); |
148 | + switch (stride) { | 44 | + gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype)); |
149 | + case 13: /* 64KB Pages. */ | 45 | + write_fp_hreg_merging(s, a->rd, a->rd, t32); |
150 | + if (level == 0 || (level == 1 && outputsize <= 42)) { | ||
151 | + return false; | ||
152 | + } | ||
153 | + break; | ||
154 | + case 11: /* 16KB Pages. */ | ||
155 | + if (level == 0 || (level == 1 && outputsize <= 40)) { | ||
156 | + return false; | ||
157 | + } | ||
158 | + break; | ||
159 | + case 9: /* 4KB Pages. */ | ||
160 | + if (level == 0 && outputsize <= 42) { | ||
161 | + return false; | ||
162 | + } | ||
163 | + break; | ||
164 | + default: | ||
165 | + g_assert_not_reached(); | ||
166 | + } | ||
167 | + | ||
168 | + /* Inputsize checks. */ | ||
169 | + if (inputsize > outputsize && | ||
170 | + (arm_el_is_aa64(&cpu->env, 1) || inputsize > 40)) { | ||
171 | + /* This is CONSTRAINED UNPREDICTABLE and we choose to fault. */ | ||
172 | + return false; | ||
173 | + } | ||
174 | + } else { | ||
175 | + /* AArch32 only supports 4KB pages. Assert on that. */ | ||
176 | + assert(stride == 9); | ||
177 | + | ||
178 | + if (level == 0) { | ||
179 | + return false; | ||
180 | + } | ||
181 | + } | ||
182 | + return true; | 46 | + return true; |
183 | +} | 47 | +} |
184 | + | 48 | |
185 | /** | 49 | static const FPScalar1 f_scalar_frint32 = { |
186 | * get_phys_addr_lpae: perform one stage of page table walk, LPAE format | 50 | NULL, |
187 | * | ||
188 | -- | 51 | -- |
189 | 2.25.1 | 52 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle FPCR.NEP for the 1-input scalar operations. | ||
1 | 2 | ||
3 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | target/arm/tcg/translate-a64.c | 26 ++++++++++++++------------ | ||
7 | 1 file changed, 14 insertions(+), 12 deletions(-) | ||
8 | |||
9 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/target/arm/tcg/translate-a64.c | ||
12 | +++ b/target/arm/tcg/translate-a64.c | ||
13 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a, | ||
14 | case MO_64: | ||
15 | t64 = read_fp_dreg(s, a->rn); | ||
16 | f->gen_d(t64, t64, fpst); | ||
17 | - write_fp_dreg(s, a->rd, t64); | ||
18 | + write_fp_dreg_merging(s, a->rd, a->rd, t64); | ||
19 | break; | ||
20 | case MO_32: | ||
21 | t32 = read_fp_sreg(s, a->rn); | ||
22 | f->gen_s(t32, t32, fpst); | ||
23 | - write_fp_sreg(s, a->rd, t32); | ||
24 | + write_fp_sreg_merging(s, a->rd, a->rd, t32); | ||
25 | break; | ||
26 | case MO_16: | ||
27 | t32 = read_fp_hreg(s, a->rn); | ||
28 | f->gen_h(t32, t32, fpst); | ||
29 | - write_fp_sreg(s, a->rd, t32); | ||
30 | + write_fp_hreg_merging(s, a->rd, a->rd, t32); | ||
31 | break; | ||
32 | default: | ||
33 | g_assert_not_reached(); | ||
34 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) | ||
35 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
36 | |||
37 | gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst); | ||
38 | - write_fp_dreg(s, a->rd, tcg_rd); | ||
39 | + write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); | ||
40 | } | ||
41 | return true; | ||
42 | } | ||
43 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a) | ||
44 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
45 | |||
46 | gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); | ||
47 | - /* write_fp_sreg is OK here because top half of result is zero */ | ||
48 | - write_fp_sreg(s, a->rd, tmp); | ||
49 | + /* write_fp_hreg_merging is OK here because top half of result is zero */ | ||
50 | + write_fp_hreg_merging(s, a->rd, a->rd, tmp); | ||
51 | } | ||
52 | return true; | ||
53 | } | ||
54 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a) | ||
55 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
56 | |||
57 | gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst); | ||
58 | - write_fp_sreg(s, a->rd, tcg_rd); | ||
59 | + write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); | ||
60 | } | ||
61 | return true; | ||
62 | } | ||
63 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a) | ||
64 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
65 | |||
66 | gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); | ||
67 | - /* write_fp_sreg is OK here because top half of tcg_rd is zero */ | ||
68 | - write_fp_sreg(s, a->rd, tcg_rd); | ||
69 | + /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */ | ||
70 | + write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd); | ||
71 | } | ||
72 | return true; | ||
73 | } | ||
74 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a) | ||
75 | TCGv_i32 tcg_ahp = get_ahp_flag(); | ||
76 | |||
77 | gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); | ||
78 | - write_fp_sreg(s, a->rd, tcg_rd); | ||
79 | + write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); | ||
80 | } | ||
81 | return true; | ||
82 | } | ||
83 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a) | ||
84 | TCGv_i32 tcg_ahp = get_ahp_flag(); | ||
85 | |||
86 | gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); | ||
87 | - write_fp_dreg(s, a->rd, tcg_rd); | ||
88 | + write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); | ||
89 | } | ||
90 | return true; | ||
91 | } | ||
92 | @@ -XXX,XX +XXX,XX @@ static bool do_fcvt_f(DisasContext *s, arg_fcvt *a, | ||
93 | do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0), | ||
94 | a->esz, tcg_int, a->shift, a->rn, rmode); | ||
95 | |||
96 | - clear_vec(s, a->rd); | ||
97 | + if (!s->fpcr_nep) { | ||
98 | + clear_vec(s, a->rd); | ||
99 | + } | ||
100 | write_vec_element(s, tcg_int, a->rd, 0, a->esz); | ||
101 | return true; | ||
102 | } | ||
103 | -- | ||
104 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle FPCR.NEP in the operations handled by do_cvtf_scalar(). | ||
1 | 2 | ||
3 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | target/arm/tcg/translate-a64.c | 6 +++--- | ||
7 | 1 file changed, 3 insertions(+), 3 deletions(-) | ||
8 | |||
9 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/target/arm/tcg/translate-a64.c | ||
12 | +++ b/target/arm/tcg/translate-a64.c | ||
13 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, | ||
14 | } else { | ||
15 | gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); | ||
16 | } | ||
17 | - write_fp_dreg(s, rd, tcg_double); | ||
18 | + write_fp_dreg_merging(s, rd, rd, tcg_double); | ||
19 | break; | ||
20 | |||
21 | case MO_32: | ||
22 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, | ||
23 | } else { | ||
24 | gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); | ||
25 | } | ||
26 | - write_fp_sreg(s, rd, tcg_single); | ||
27 | + write_fp_sreg_merging(s, rd, rd, tcg_single); | ||
28 | break; | ||
29 | |||
30 | case MO_16: | ||
31 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, | ||
32 | } else { | ||
33 | gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); | ||
34 | } | ||
35 | - write_fp_sreg(s, rd, tcg_single); | ||
36 | + write_fp_hreg_merging(s, rd, rd, tcg_single); | ||
37 | break; | ||
38 | |||
39 | default: | ||
40 | -- | ||
41 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle FPCR.NEP merging for scalar FABS and FNEG; this requires | ||
2 | an extra parameter to do_fp1_scalar_int(), since FMOV scalar | ||
3 | does not have the merging behaviour. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/translate-a64.c | 27 ++++++++++++++++++++------- | ||
9 | 1 file changed, 20 insertions(+), 7 deletions(-) | ||
10 | |||
11 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/target/arm/tcg/translate-a64.c | ||
14 | +++ b/target/arm/tcg/translate-a64.c | ||
15 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar1Int { | ||
16 | } FPScalar1Int; | ||
17 | |||
18 | static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, | ||
19 | - const FPScalar1Int *f) | ||
20 | + const FPScalar1Int *f, | ||
21 | + bool merging) | ||
22 | { | ||
23 | switch (a->esz) { | ||
24 | case MO_64: | ||
25 | if (fp_access_check(s)) { | ||
26 | TCGv_i64 t = read_fp_dreg(s, a->rn); | ||
27 | f->gen_d(t, t); | ||
28 | - write_fp_dreg(s, a->rd, t); | ||
29 | + if (merging) { | ||
30 | + write_fp_dreg_merging(s, a->rd, a->rd, t); | ||
31 | + } else { | ||
32 | + write_fp_dreg(s, a->rd, t); | ||
33 | + } | ||
34 | } | ||
35 | break; | ||
36 | case MO_32: | ||
37 | if (fp_access_check(s)) { | ||
38 | TCGv_i32 t = read_fp_sreg(s, a->rn); | ||
39 | f->gen_s(t, t); | ||
40 | - write_fp_sreg(s, a->rd, t); | ||
41 | + if (merging) { | ||
42 | + write_fp_sreg_merging(s, a->rd, a->rd, t); | ||
43 | + } else { | ||
44 | + write_fp_sreg(s, a->rd, t); | ||
45 | + } | ||
46 | } | ||
47 | break; | ||
48 | case MO_16: | ||
49 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, | ||
50 | if (fp_access_check(s)) { | ||
51 | TCGv_i32 t = read_fp_hreg(s, a->rn); | ||
52 | f->gen_h(t, t); | ||
53 | - write_fp_sreg(s, a->rd, t); | ||
54 | + if (merging) { | ||
55 | + write_fp_hreg_merging(s, a->rd, a->rd, t); | ||
56 | + } else { | ||
57 | + write_fp_sreg(s, a->rd, t); | ||
58 | + } | ||
59 | } | ||
60 | break; | ||
61 | default: | ||
62 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fmov = { | ||
63 | tcg_gen_mov_i32, | ||
64 | tcg_gen_mov_i64, | ||
65 | }; | ||
66 | -TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov) | ||
67 | +TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false) | ||
68 | |||
69 | static const FPScalar1Int f_scalar_fabs = { | ||
70 | gen_vfp_absh, | ||
71 | gen_vfp_abss, | ||
72 | gen_vfp_absd, | ||
73 | }; | ||
74 | -TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs) | ||
75 | +TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs, true) | ||
76 | |||
77 | static const FPScalar1Int f_scalar_fneg = { | ||
78 | gen_vfp_negh, | ||
79 | gen_vfp_negs, | ||
80 | gen_vfp_negd, | ||
81 | }; | ||
82 | -TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg) | ||
83 | +TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg, true) | ||
84 | |||
85 | typedef struct FPScalar1 { | ||
86 | void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); | ||
87 | -- | ||
88 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Unlike the other users of do_2misc_narrow_scalar(), FCVTXN (scalar) |
---|---|---|---|
2 | is always double-to-single and must honour FPCR.NEP. Implement this | ||
3 | directly in a trans function rather than using | ||
4 | do_2misc_narrow_scalar(). | ||
2 | 5 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | We still need gen_fcvtxn_sd() and the f_scalar_fcvtxn[] array for |
4 | Message-id: 20220604040607.269301-9-richard.henderson@linaro.org | 7 | the FCVTXN (vector) insn, so we move those down in the file to |
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 8 | where they are used. |
9 | |||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | 12 | --- |
8 | target/arm/ptw.h | 5 --- | 13 | target/arm/tcg/translate-a64.c | 43 ++++++++++++++++++++++------------ |
9 | target/arm/helper.c | 75 ------------------------------------------- | 14 | 1 file changed, 28 insertions(+), 15 deletions(-) |
10 | target/arm/ptw.c | 77 +++++++++++++++++++++++++++++++++++++++++++++ | ||
11 | 3 files changed, 77 insertions(+), 80 deletions(-) | ||
12 | 15 | ||
13 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | 16 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
14 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/arm/ptw.h | 18 | --- a/target/arm/tcg/translate-a64.c |
16 | +++ b/target/arm/ptw.h | 19 | +++ b/target/arm/tcg/translate-a64.c |
17 | @@ -XXX,XX +XXX,XX @@ void get_phys_addr_pmsav7_default(CPUARMState *env, | 20 | @@ -XXX,XX +XXX,XX @@ static ArithOneOp * const f_scalar_uqxtn[] = { |
18 | int32_t address, int *prot); | 21 | }; |
19 | bool pmsav7_use_background_region(ARMCPU *cpu, ARMMMUIdx mmu_idx, bool is_user); | 22 | TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn) |
20 | 23 | ||
21 | -bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address, | 24 | -static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) |
22 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | 25 | +static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a) |
23 | - hwaddr *phys_ptr, MemTxAttrs *txattrs, | 26 | { |
24 | - int *prot, target_ulong *page_size, | 27 | - /* |
25 | - ARMMMUFaultInfo *fi); | 28 | - * 64 bit to 32 bit float conversion |
26 | bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, | 29 | - * with von Neumann rounding (round to odd) |
27 | MMUAccessType access_type, ARMMMUIdx mmu_idx, | 30 | - */ |
28 | bool s1_is_el0, | 31 | - TCGv_i32 tmp = tcg_temp_new_i32(); |
29 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 32 | - gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); |
30 | index XXXXXXX..XXXXXXX 100644 | 33 | - tcg_gen_extu_i32_i64(d, tmp); |
31 | --- a/target/arm/helper.c | 34 | + if (fp_access_check(s)) { |
32 | +++ b/target/arm/helper.c | 35 | + /* |
33 | @@ -XXX,XX +XXX,XX @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, | 36 | + * 64 bit to 32 bit float conversion |
34 | return !(*prot & (1 << access_type)); | 37 | + * with von Neumann rounding (round to odd) |
38 | + */ | ||
39 | + TCGv_i64 src = read_fp_dreg(s, a->rn); | ||
40 | + TCGv_i32 dst = tcg_temp_new_i32(); | ||
41 | + gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64)); | ||
42 | + write_fp_sreg_merging(s, a->rd, a->rd, dst); | ||
43 | + } | ||
44 | + return true; | ||
35 | } | 45 | } |
36 | 46 | ||
47 | -static ArithOneOp * const f_scalar_fcvtxn[] = { | ||
48 | - NULL, | ||
49 | - NULL, | ||
50 | - gen_fcvtxn_sd, | ||
51 | -}; | ||
52 | -TRANS(FCVTXN_s, do_2misc_narrow_scalar, a, f_scalar_fcvtxn) | ||
37 | - | 53 | - |
38 | -bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address, | 54 | #undef WRAP_ENV |
39 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | 55 | |
40 | - hwaddr *phys_ptr, MemTxAttrs *txattrs, | 56 | static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) |
41 | - int *prot, target_ulong *page_size, | 57 | @@ -XXX,XX +XXX,XX @@ static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n) |
42 | - ARMMMUFaultInfo *fi) | 58 | tcg_gen_extu_i32_i64(d, tmp); |
43 | -{ | ||
44 | - uint32_t secure = regime_is_secure(env, mmu_idx); | ||
45 | - V8M_SAttributes sattrs = {}; | ||
46 | - bool ret; | ||
47 | - bool mpu_is_subpage; | ||
48 | - | ||
49 | - if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { | ||
50 | - v8m_security_lookup(env, address, access_type, mmu_idx, &sattrs); | ||
51 | - if (access_type == MMU_INST_FETCH) { | ||
52 | - /* Instruction fetches always use the MMU bank and the | ||
53 | - * transaction attribute determined by the fetch address, | ||
54 | - * regardless of CPU state. This is painful for QEMU | ||
55 | - * to handle, because it would mean we need to encode | ||
56 | - * into the mmu_idx not just the (user, negpri) information | ||
57 | - * for the current security state but also that for the | ||
58 | - * other security state, which would balloon the number | ||
59 | - * of mmu_idx values needed alarmingly. | ||
60 | - * Fortunately we can avoid this because it's not actually | ||
61 | - * possible to arbitrarily execute code from memory with | ||
62 | - * the wrong security attribute: it will always generate | ||
63 | - * an exception of some kind or another, apart from the | ||
64 | - * special case of an NS CPU executing an SG instruction | ||
65 | - * in S&NSC memory. So we always just fail the translation | ||
66 | - * here and sort things out in the exception handler | ||
67 | - * (including possibly emulating an SG instruction). | ||
68 | - */ | ||
69 | - if (sattrs.ns != !secure) { | ||
70 | - if (sattrs.nsc) { | ||
71 | - fi->type = ARMFault_QEMU_NSCExec; | ||
72 | - } else { | ||
73 | - fi->type = ARMFault_QEMU_SFault; | ||
74 | - } | ||
75 | - *page_size = sattrs.subpage ? 1 : TARGET_PAGE_SIZE; | ||
76 | - *phys_ptr = address; | ||
77 | - *prot = 0; | ||
78 | - return true; | ||
79 | - } | ||
80 | - } else { | ||
81 | - /* For data accesses we always use the MMU bank indicated | ||
82 | - * by the current CPU state, but the security attributes | ||
83 | - * might downgrade a secure access to nonsecure. | ||
84 | - */ | ||
85 | - if (sattrs.ns) { | ||
86 | - txattrs->secure = false; | ||
87 | - } else if (!secure) { | ||
88 | - /* NS access to S memory must fault. | ||
89 | - * Architecturally we should first check whether the | ||
90 | - * MPU information for this address indicates that we | ||
91 | - * are doing an unaligned access to Device memory, which | ||
92 | - * should generate a UsageFault instead. QEMU does not | ||
93 | - * currently check for that kind of unaligned access though. | ||
94 | - * If we added it we would need to do so as a special case | ||
95 | - * for M_FAKE_FSR_SFAULT in arm_v7m_cpu_do_interrupt(). | ||
96 | - */ | ||
97 | - fi->type = ARMFault_QEMU_SFault; | ||
98 | - *page_size = sattrs.subpage ? 1 : TARGET_PAGE_SIZE; | ||
99 | - *phys_ptr = address; | ||
100 | - *prot = 0; | ||
101 | - return true; | ||
102 | - } | ||
103 | - } | ||
104 | - } | ||
105 | - | ||
106 | - ret = pmsav8_mpu_lookup(env, address, access_type, mmu_idx, phys_ptr, | ||
107 | - txattrs, prot, &mpu_is_subpage, fi, NULL); | ||
108 | - *page_size = sattrs.subpage || mpu_is_subpage ? 1 : TARGET_PAGE_SIZE; | ||
109 | - return ret; | ||
110 | -} | ||
111 | - | ||
112 | /* Combine either inner or outer cacheability attributes for normal | ||
113 | * memory, according to table D4-42 and pseudocode procedure | ||
114 | * CombineS1S2AttrHints() of ARM DDI 0487B.b (the ARMv8 ARM). | ||
115 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
116 | index XXXXXXX..XXXXXXX 100644 | ||
117 | --- a/target/arm/ptw.c | ||
118 | +++ b/target/arm/ptw.c | ||
119 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, | ||
120 | return !(*prot & (1 << access_type)); | ||
121 | } | 59 | } |
122 | 60 | ||
123 | +static bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address, | 61 | +static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) |
124 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
125 | + hwaddr *phys_ptr, MemTxAttrs *txattrs, | ||
126 | + int *prot, target_ulong *page_size, | ||
127 | + ARMMMUFaultInfo *fi) | ||
128 | +{ | 62 | +{ |
129 | + uint32_t secure = regime_is_secure(env, mmu_idx); | 63 | + /* |
130 | + V8M_SAttributes sattrs = {}; | 64 | + * 64 bit to 32 bit float conversion |
131 | + bool ret; | 65 | + * with von Neumann rounding (round to odd) |
132 | + bool mpu_is_subpage; | 66 | + */ |
133 | + | 67 | + TCGv_i32 tmp = tcg_temp_new_i32(); |
134 | + if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { | 68 | + gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); |
135 | + v8m_security_lookup(env, address, access_type, mmu_idx, &sattrs); | 69 | + tcg_gen_extu_i32_i64(d, tmp); |
136 | + if (access_type == MMU_INST_FETCH) { | ||
137 | + /* | ||
138 | + * Instruction fetches always use the MMU bank and the | ||
139 | + * transaction attribute determined by the fetch address, | ||
140 | + * regardless of CPU state. This is painful for QEMU | ||
141 | + * to handle, because it would mean we need to encode | ||
142 | + * into the mmu_idx not just the (user, negpri) information | ||
143 | + * for the current security state but also that for the | ||
144 | + * other security state, which would balloon the number | ||
145 | + * of mmu_idx values needed alarmingly. | ||
146 | + * Fortunately we can avoid this because it's not actually | ||
147 | + * possible to arbitrarily execute code from memory with | ||
148 | + * the wrong security attribute: it will always generate | ||
149 | + * an exception of some kind or another, apart from the | ||
150 | + * special case of an NS CPU executing an SG instruction | ||
151 | + * in S&NSC memory. So we always just fail the translation | ||
152 | + * here and sort things out in the exception handler | ||
153 | + * (including possibly emulating an SG instruction). | ||
154 | + */ | ||
155 | + if (sattrs.ns != !secure) { | ||
156 | + if (sattrs.nsc) { | ||
157 | + fi->type = ARMFault_QEMU_NSCExec; | ||
158 | + } else { | ||
159 | + fi->type = ARMFault_QEMU_SFault; | ||
160 | + } | ||
161 | + *page_size = sattrs.subpage ? 1 : TARGET_PAGE_SIZE; | ||
162 | + *phys_ptr = address; | ||
163 | + *prot = 0; | ||
164 | + return true; | ||
165 | + } | ||
166 | + } else { | ||
167 | + /* | ||
168 | + * For data accesses we always use the MMU bank indicated | ||
169 | + * by the current CPU state, but the security attributes | ||
170 | + * might downgrade a secure access to nonsecure. | ||
171 | + */ | ||
172 | + if (sattrs.ns) { | ||
173 | + txattrs->secure = false; | ||
174 | + } else if (!secure) { | ||
175 | + /* | ||
176 | + * NS access to S memory must fault. | ||
177 | + * Architecturally we should first check whether the | ||
178 | + * MPU information for this address indicates that we | ||
179 | + * are doing an unaligned access to Device memory, which | ||
180 | + * should generate a UsageFault instead. QEMU does not | ||
181 | + * currently check for that kind of unaligned access though. | ||
182 | + * If we added it we would need to do so as a special case | ||
183 | + * for M_FAKE_FSR_SFAULT in arm_v7m_cpu_do_interrupt(). | ||
184 | + */ | ||
185 | + fi->type = ARMFault_QEMU_SFault; | ||
186 | + *page_size = sattrs.subpage ? 1 : TARGET_PAGE_SIZE; | ||
187 | + *phys_ptr = address; | ||
188 | + *prot = 0; | ||
189 | + return true; | ||
190 | + } | ||
191 | + } | ||
192 | + } | ||
193 | + | ||
194 | + ret = pmsav8_mpu_lookup(env, address, access_type, mmu_idx, phys_ptr, | ||
195 | + txattrs, prot, &mpu_is_subpage, fi, NULL); | ||
196 | + *page_size = sattrs.subpage || mpu_is_subpage ? 1 : TARGET_PAGE_SIZE; | ||
197 | + return ret; | ||
198 | +} | 70 | +} |
199 | + | 71 | + |
200 | /** | 72 | static ArithOneOp * const f_vector_fcvtn[] = { |
201 | * get_phys_addr - get the physical address for this virtual address | 73 | NULL, |
202 | * | 74 | gen_fcvtn_hs, |
75 | gen_fcvtn_sd, | ||
76 | }; | ||
77 | +static ArithOneOp * const f_scalar_fcvtxn[] = { | ||
78 | + NULL, | ||
79 | + NULL, | ||
80 | + gen_fcvtxn_sd, | ||
81 | +}; | ||
82 | TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn) | ||
83 | TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn) | ||
84 | |||
203 | -- | 85 | -- |
204 | 2.25.1 | 86 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Sai Pavan Boddu <sai.pavan.boddu@xilinx.com> | 1 | do_fp3_scalar_idx() is used only for the FMUL and FMULX scalar by |
---|---|---|---|
2 | element instructions; these both need to merge the result with the Rn | ||
3 | register when FPCR.NEP is set. | ||
2 | 4 | ||
3 | Fix interrupt disable logic. Mask value 1 indicates that interrupts are | 5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | disabled. | 6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
7 | --- | ||
8 | target/arm/tcg/translate-a64.c | 6 +++--- | ||
9 | 1 file changed, 3 insertions(+), 3 deletions(-) | ||
5 | 10 | ||
6 | Signed-off-by: Sai Pavan Boddu <saipava@xilinx.com> | 11 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
7 | Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com> | ||
8 | Signed-off-by: Frederic Konrad <fkonrad@amd.com> | ||
9 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
10 | Message-id: 20220601172353.3220232-4-fkonrad@xilinx.com | ||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | --- | ||
13 | hw/display/xlnx_dp.c | 2 +- | ||
14 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
15 | |||
16 | diff --git a/hw/display/xlnx_dp.c b/hw/display/xlnx_dp.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/hw/display/xlnx_dp.c | 13 | --- a/target/arm/tcg/translate-a64.c |
19 | +++ b/hw/display/xlnx_dp.c | 14 | +++ b/target/arm/tcg/translate-a64.c |
20 | @@ -XXX,XX +XXX,XX @@ static void xlnx_dp_write(void *opaque, hwaddr offset, uint64_t value, | 15 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) |
21 | xlnx_dp_update_irq(s); | 16 | |
17 | read_vec_element(s, t1, a->rm, a->idx, MO_64); | ||
18 | f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); | ||
19 | - write_fp_dreg(s, a->rd, t0); | ||
20 | + write_fp_dreg_merging(s, a->rd, a->rn, t0); | ||
21 | } | ||
22 | break; | 22 | break; |
23 | case DP_INT_DS: | 23 | case MO_32: |
24 | - s->core_registers[DP_INT_MASK] |= ~value; | 24 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) |
25 | + s->core_registers[DP_INT_MASK] |= value; | 25 | |
26 | xlnx_dp_update_irq(s); | 26 | read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); |
27 | f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); | ||
28 | - write_fp_sreg(s, a->rd, t0); | ||
29 | + write_fp_sreg_merging(s, a->rd, a->rn, t0); | ||
30 | } | ||
31 | break; | ||
32 | case MO_16: | ||
33 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) | ||
34 | |||
35 | read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); | ||
36 | f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); | ||
37 | - write_fp_sreg(s, a->rd, t0); | ||
38 | + write_fp_hreg_merging(s, a->rd, a->rn, t0); | ||
39 | } | ||
27 | break; | 40 | break; |
28 | default: | 41 | default: |
29 | -- | 42 | -- |
30 | 2.25.1 | 43 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | When FPCR.AH == 1, floating point FMIN and FMAX have some odd special |
---|---|---|---|
2 | cases: | ||
2 | 3 | ||
3 | This (newish) ARM pseudocode function is easier to work with | 4 | * comparing two zeroes (even of different sign) or comparing a NaN |
4 | than open-coded tests for HCR_E2H etc. Use of the function | 5 | with anything always returns the second argument (possibly |
5 | will be staged into the code base in parts. | 6 | squashed to zero) |
7 | * denormal outputs are not squashed to zero regardless of FZ or FZ16 | ||
6 | 8 | ||
7 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 9 | Implement these semantics in new helper functions and select them at |
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 10 | translate time if FPCR.AH is 1 for the scalar FMAX and FMIN insns. |
9 | Message-id: 20220607203306.657998-6-richard.henderson@linaro.org | 11 | (We will convert the other FMAX and FMIN insns in subsequent |
12 | commits.) | ||
13 | |||
14 | Note that FMINNM and FMAXNM are not affected. | ||
15 | |||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 16 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
17 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | --- | 18 | --- |
12 | target/arm/internals.h | 2 ++ | 19 | target/arm/tcg/helper-a64.h | 7 +++++++ |
13 | target/arm/helper.c | 28 ++++++++++++++++++++++++++++ | 20 | target/arm/tcg/helper-a64.c | 36 ++++++++++++++++++++++++++++++++++ |
14 | 2 files changed, 30 insertions(+) | 21 | target/arm/tcg/translate-a64.c | 23 ++++++++++++++++++++-- |
22 | 3 files changed, 64 insertions(+), 2 deletions(-) | ||
15 | 23 | ||
16 | diff --git a/target/arm/internals.h b/target/arm/internals.h | 24 | diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h |
17 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/target/arm/internals.h | 26 | --- a/target/arm/tcg/helper-a64.h |
19 | +++ b/target/arm/internals.h | 27 | +++ b/target/arm/tcg/helper-a64.h |
20 | @@ -XXX,XX +XXX,XX @@ static inline void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) { } | 28 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(advsimd_muladd2h, i32, i32, i32, i32, fpst) |
21 | void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu); | 29 | DEF_HELPER_2(advsimd_rinth_exact, f16, f16, fpst) |
22 | #endif | 30 | DEF_HELPER_2(advsimd_rinth, f16, f16, fpst) |
23 | 31 | ||
24 | +bool el_is_in_host(CPUARMState *env, int el); | 32 | +DEF_HELPER_3(vfp_ah_minh, f16, f16, f16, fpst) |
33 | +DEF_HELPER_3(vfp_ah_mins, f32, f32, f32, fpst) | ||
34 | +DEF_HELPER_3(vfp_ah_mind, f64, f64, f64, fpst) | ||
35 | +DEF_HELPER_3(vfp_ah_maxh, f16, f16, f16, fpst) | ||
36 | +DEF_HELPER_3(vfp_ah_maxs, f32, f32, f32, fpst) | ||
37 | +DEF_HELPER_3(vfp_ah_maxd, f64, f64, f64, fpst) | ||
25 | + | 38 | + |
26 | void aa32_max_features(ARMCPU *cpu); | 39 | DEF_HELPER_2(exception_return, void, env, i64) |
27 | 40 | DEF_HELPER_FLAGS_2(dc_zva, TCG_CALL_NO_WG, void, env, i64) | |
28 | #endif | 41 | |
29 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 42 | diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c |
30 | index XXXXXXX..XXXXXXX 100644 | 43 | index XXXXXXX..XXXXXXX 100644 |
31 | --- a/target/arm/helper.c | 44 | --- a/target/arm/tcg/helper-a64.c |
32 | +++ b/target/arm/helper.c | 45 | +++ b/target/arm/tcg/helper-a64.c |
33 | @@ -XXX,XX +XXX,XX @@ uint64_t arm_hcr_el2_eff(CPUARMState *env) | 46 | @@ -XXX,XX +XXX,XX @@ float32 HELPER(fcvtx_f64_to_f32)(float64 a, float_status *fpst) |
34 | return ret; | 47 | return r; |
35 | } | 48 | } |
36 | 49 | ||
37 | +/* | 50 | +/* |
38 | + * Corresponds to ARM pseudocode function ELIsInHost(). | 51 | + * AH=1 min/max have some odd special cases: |
52 | + * comparing two zeroes (regardless of sign), (NaN, anything), | ||
53 | + * or (anything, NaN) should return the second argument (possibly | ||
54 | + * squashed to zero). | ||
55 | + * Also, denormal outputs are not squashed to zero regardless of FZ or FZ16. | ||
39 | + */ | 56 | + */ |
40 | +bool el_is_in_host(CPUARMState *env, int el) | 57 | +#define AH_MINMAX_HELPER(NAME, CTYPE, FLOATTYPE, MINMAX) \ |
41 | +{ | 58 | + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ |
42 | + uint64_t mask; | 59 | + { \ |
43 | + | 60 | + bool save; \ |
44 | + /* | 61 | + CTYPE r; \ |
45 | + * Since we only care about E2H and TGE, we can skip arm_hcr_el2_eff(). | 62 | + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ |
46 | + * Perform the simplest bit tests first, and validate EL2 afterward. | 63 | + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ |
47 | + */ | 64 | + if (FLOATTYPE ## _is_zero(a) && FLOATTYPE ## _is_zero(b)) { \ |
48 | + if (el & 1) { | 65 | + return b; \ |
49 | + return false; /* EL1 or EL3 */ | 66 | + } \ |
67 | + if (FLOATTYPE ## _is_any_nan(a) || \ | ||
68 | + FLOATTYPE ## _is_any_nan(b)) { \ | ||
69 | + float_raise(float_flag_invalid, fpst); \ | ||
70 | + return b; \ | ||
71 | + } \ | ||
72 | + save = get_flush_to_zero(fpst); \ | ||
73 | + set_flush_to_zero(false, fpst); \ | ||
74 | + r = FLOATTYPE ## _ ## MINMAX(a, b, fpst); \ | ||
75 | + set_flush_to_zero(save, fpst); \ | ||
76 | + return r; \ | ||
50 | + } | 77 | + } |
51 | + | 78 | + |
52 | + /* | 79 | +AH_MINMAX_HELPER(vfp_ah_minh, dh_ctype_f16, float16, min) |
53 | + * Note that hcr_write() checks isar_feature_aa64_vh(), | 80 | +AH_MINMAX_HELPER(vfp_ah_mins, float32, float32, min) |
54 | + * aka HaveVirtHostExt(), in allowing HCR_E2H to be set. | 81 | +AH_MINMAX_HELPER(vfp_ah_mind, float64, float64, min) |
55 | + */ | 82 | +AH_MINMAX_HELPER(vfp_ah_maxh, dh_ctype_f16, float16, max) |
56 | + mask = el ? HCR_E2H : HCR_E2H | HCR_TGE; | 83 | +AH_MINMAX_HELPER(vfp_ah_maxs, float32, float32, max) |
57 | + if ((env->cp15.hcr_el2 & mask) != mask) { | 84 | +AH_MINMAX_HELPER(vfp_ah_maxd, float64, float64, max) |
58 | + return false; | ||
59 | + } | ||
60 | + | 85 | + |
61 | + /* TGE and/or E2H set: double check those bits are currently legal. */ | 86 | /* 64-bit versions of the CRC helpers. Note that although the operation |
62 | + return arm_is_el2_enabled(env) && arm_el_is_aa64(env, 2); | 87 | * (and the prototypes of crc32c() and crc32() mean that only the bottom |
88 | * 32 bits of the accumulator and result are used, we pass and return | ||
89 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
90 | index XXXXXXX..XXXXXXX 100644 | ||
91 | --- a/target/arm/tcg/translate-a64.c | ||
92 | +++ b/target/arm/tcg/translate-a64.c | ||
93 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
94 | select_ah_fpst(s, a->esz)); | ||
95 | } | ||
96 | |||
97 | +/* Some insns need to call different helpers when FPCR.AH == 1 */ | ||
98 | +static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a, | ||
99 | + const FPScalar *fnormal, | ||
100 | + const FPScalar *fah, | ||
101 | + int mergereg) | ||
102 | +{ | ||
103 | + return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg); | ||
63 | +} | 104 | +} |
64 | + | 105 | + |
65 | static void hcrx_write(CPUARMState *env, const ARMCPRegInfo *ri, | 106 | static const FPScalar f_scalar_fadd = { |
66 | uint64_t value) | 107 | gen_helper_vfp_addh, |
67 | { | 108 | gen_helper_vfp_adds, |
109 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fmax = { | ||
110 | gen_helper_vfp_maxs, | ||
111 | gen_helper_vfp_maxd, | ||
112 | }; | ||
113 | -TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax, a->rn) | ||
114 | +static const FPScalar f_scalar_fmax_ah = { | ||
115 | + gen_helper_vfp_ah_maxh, | ||
116 | + gen_helper_vfp_ah_maxs, | ||
117 | + gen_helper_vfp_ah_maxd, | ||
118 | +}; | ||
119 | +TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn) | ||
120 | |||
121 | static const FPScalar f_scalar_fmin = { | ||
122 | gen_helper_vfp_minh, | ||
123 | gen_helper_vfp_mins, | ||
124 | gen_helper_vfp_mind, | ||
125 | }; | ||
126 | -TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin, a->rn) | ||
127 | +static const FPScalar f_scalar_fmin_ah = { | ||
128 | + gen_helper_vfp_ah_minh, | ||
129 | + gen_helper_vfp_ah_mins, | ||
130 | + gen_helper_vfp_ah_mind, | ||
131 | +}; | ||
132 | +TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn) | ||
133 | |||
134 | static const FPScalar f_scalar_fmaxnm = { | ||
135 | gen_helper_vfp_maxnumh, | ||
68 | -- | 136 | -- |
69 | 2.25.1 | 137 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Implement the FPCR.AH == 1 semantics for vector FMIN/FMAX, by |
---|---|---|---|
2 | creating new _ah_ versions of the gvec helpers which invoke the | ||
3 | scalar fmin_ah and fmax_ah helpers on each element. | ||
2 | 4 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | Message-id: 20220604040607.269301-26-richard.henderson@linaro.org | ||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | 7 | --- |
8 | target/arm/ptw.h | 17 ---------------- | 8 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ |
9 | target/arm/helper.c | 47 --------------------------------------------- | 9 | target/arm/tcg/translate-a64.c | 21 +++++++++++++++++++-- |
10 | target/arm/ptw.c | 47 ++++++++++++++++++++++++++++++++++++++++++++- | 10 | target/arm/tcg/vec_helper.c | 8 ++++++++ |
11 | 3 files changed, 46 insertions(+), 65 deletions(-) | 11 | 3 files changed, 41 insertions(+), 2 deletions(-) |
12 | delete mode 100644 target/arm/ptw.h | ||
13 | 12 | ||
14 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | 13 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h |
15 | deleted file mode 100644 | ||
16 | index XXXXXXX..XXXXXXX | ||
17 | --- a/target/arm/ptw.h | ||
18 | +++ /dev/null | ||
19 | @@ -XXX,XX +XXX,XX @@ | ||
20 | -/* | ||
21 | - * ARM page table walking. | ||
22 | - * | ||
23 | - * This code is licensed under the GNU GPL v2 or later. | ||
24 | - * | ||
25 | - * SPDX-License-Identifier: GPL-2.0-or-later | ||
26 | - */ | ||
27 | - | ||
28 | -#ifndef TARGET_ARM_PTW_H | ||
29 | -#define TARGET_ARM_PTW_H | ||
30 | - | ||
31 | -#ifndef CONFIG_USER_ONLY | ||
32 | - | ||
33 | -bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx); | ||
34 | - | ||
35 | -#endif /* !CONFIG_USER_ONLY */ | ||
36 | -#endif /* TARGET_ARM_PTW_H */ | ||
37 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
39 | --- a/target/arm/helper.c | 15 | --- a/target/arm/tcg/helper-sve.h |
40 | +++ b/target/arm/helper.c | 16 | +++ b/target/arm/tcg/helper-sve.h |
41 | @@ -XXX,XX +XXX,XX @@ | 17 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_rsqrts_s, TCG_CALL_NO_RWG, |
42 | #include "semihosting/common-semi.h" | 18 | DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG, |
43 | #endif | 19 | void, ptr, ptr, ptr, fpst, i32) |
44 | #include "cpregs.h" | 20 | |
45 | -#include "ptw.h" | 21 | +DEF_HELPER_FLAGS_5(gvec_ah_fmax_h, TCG_CALL_NO_RWG, |
46 | 22 | + void, ptr, ptr, ptr, fpst, i32) | |
47 | #define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */ | 23 | +DEF_HELPER_FLAGS_5(gvec_ah_fmax_s, TCG_CALL_NO_RWG, |
48 | 24 | + void, ptr, ptr, ptr, fpst, i32) | |
49 | @@ -XXX,XX +XXX,XX @@ uint64_t arm_sctlr(CPUARMState *env, int el) | 25 | +DEF_HELPER_FLAGS_5(gvec_ah_fmax_d, TCG_CALL_NO_RWG, |
26 | + void, ptr, ptr, ptr, fpst, i32) | ||
27 | + | ||
28 | +DEF_HELPER_FLAGS_5(gvec_ah_fmin_h, TCG_CALL_NO_RWG, | ||
29 | + void, ptr, ptr, ptr, fpst, i32) | ||
30 | +DEF_HELPER_FLAGS_5(gvec_ah_fmin_s, TCG_CALL_NO_RWG, | ||
31 | + void, ptr, ptr, ptr, fpst, i32) | ||
32 | +DEF_HELPER_FLAGS_5(gvec_ah_fmin_d, TCG_CALL_NO_RWG, | ||
33 | + void, ptr, ptr, ptr, fpst, i32) | ||
34 | + | ||
35 | DEF_HELPER_FLAGS_4(sve_faddv_h, TCG_CALL_NO_RWG, | ||
36 | i64, ptr, ptr, fpst, i32) | ||
37 | DEF_HELPER_FLAGS_4(sve_faddv_s, TCG_CALL_NO_RWG, | ||
38 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/target/arm/tcg/translate-a64.c | ||
41 | +++ b/target/arm/tcg/translate-a64.c | ||
42 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | ||
43 | FPST_A64_F16 : FPST_A64); | ||
50 | } | 44 | } |
51 | 45 | ||
52 | #ifndef CONFIG_USER_ONLY | 46 | +static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, |
53 | - | 47 | + gen_helper_gvec_3_ptr * const fnormal[3], |
54 | -/* Return true if the specified stage of address translation is disabled */ | 48 | + gen_helper_gvec_3_ptr * const fah[3]) |
55 | -bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx) | ||
56 | -{ | ||
57 | - uint64_t hcr_el2; | ||
58 | - | ||
59 | - if (arm_feature(env, ARM_FEATURE_M)) { | ||
60 | - switch (env->v7m.mpu_ctrl[regime_is_secure(env, mmu_idx)] & | ||
61 | - (R_V7M_MPU_CTRL_ENABLE_MASK | R_V7M_MPU_CTRL_HFNMIENA_MASK)) { | ||
62 | - case R_V7M_MPU_CTRL_ENABLE_MASK: | ||
63 | - /* Enabled, but not for HardFault and NMI */ | ||
64 | - return mmu_idx & ARM_MMU_IDX_M_NEGPRI; | ||
65 | - case R_V7M_MPU_CTRL_ENABLE_MASK | R_V7M_MPU_CTRL_HFNMIENA_MASK: | ||
66 | - /* Enabled for all cases */ | ||
67 | - return false; | ||
68 | - case 0: | ||
69 | - default: | ||
70 | - /* HFNMIENA set and ENABLE clear is UNPREDICTABLE, but | ||
71 | - * we warned about that in armv7m_nvic.c when the guest set it. | ||
72 | - */ | ||
73 | - return true; | ||
74 | - } | ||
75 | - } | ||
76 | - | ||
77 | - hcr_el2 = arm_hcr_el2_eff(env); | ||
78 | - | ||
79 | - if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { | ||
80 | - /* HCR.DC means HCR.VM behaves as 1 */ | ||
81 | - return (hcr_el2 & (HCR_DC | HCR_VM)) == 0; | ||
82 | - } | ||
83 | - | ||
84 | - if (hcr_el2 & HCR_TGE) { | ||
85 | - /* TGE means that NS EL0/1 act as if SCTLR_EL1.M is zero */ | ||
86 | - if (!regime_is_secure(env, mmu_idx) && regime_el(env, mmu_idx) == 1) { | ||
87 | - return true; | ||
88 | - } | ||
89 | - } | ||
90 | - | ||
91 | - if ((hcr_el2 & HCR_DC) && arm_mmu_idx_is_stage1_of_2(mmu_idx)) { | ||
92 | - /* HCR.DC means SCTLR_EL1.M behaves as 0 */ | ||
93 | - return true; | ||
94 | - } | ||
95 | - | ||
96 | - return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0; | ||
97 | -} | ||
98 | - | ||
99 | /* Convert a possible stage1+2 MMU index into the appropriate | ||
100 | * stage 1 MMU index | ||
101 | */ | ||
102 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
103 | index XXXXXXX..XXXXXXX 100644 | ||
104 | --- a/target/arm/ptw.c | ||
105 | +++ b/target/arm/ptw.c | ||
106 | @@ -XXX,XX +XXX,XX @@ | ||
107 | #include "cpu.h" | ||
108 | #include "internals.h" | ||
109 | #include "idau.h" | ||
110 | -#include "ptw.h" | ||
111 | |||
112 | |||
113 | static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, | ||
114 | @@ -XXX,XX +XXX,XX @@ static uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn) | ||
115 | } | ||
116 | } | ||
117 | |||
118 | +/* Return true if the specified stage of address translation is disabled */ | ||
119 | +static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx) | ||
120 | +{ | 49 | +{ |
121 | + uint64_t hcr_el2; | 50 | + return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal); |
122 | + | ||
123 | + if (arm_feature(env, ARM_FEATURE_M)) { | ||
124 | + switch (env->v7m.mpu_ctrl[regime_is_secure(env, mmu_idx)] & | ||
125 | + (R_V7M_MPU_CTRL_ENABLE_MASK | R_V7M_MPU_CTRL_HFNMIENA_MASK)) { | ||
126 | + case R_V7M_MPU_CTRL_ENABLE_MASK: | ||
127 | + /* Enabled, but not for HardFault and NMI */ | ||
128 | + return mmu_idx & ARM_MMU_IDX_M_NEGPRI; | ||
129 | + case R_V7M_MPU_CTRL_ENABLE_MASK | R_V7M_MPU_CTRL_HFNMIENA_MASK: | ||
130 | + /* Enabled for all cases */ | ||
131 | + return false; | ||
132 | + case 0: | ||
133 | + default: | ||
134 | + /* | ||
135 | + * HFNMIENA set and ENABLE clear is UNPREDICTABLE, but | ||
136 | + * we warned about that in armv7m_nvic.c when the guest set it. | ||
137 | + */ | ||
138 | + return true; | ||
139 | + } | ||
140 | + } | ||
141 | + | ||
142 | + hcr_el2 = arm_hcr_el2_eff(env); | ||
143 | + | ||
144 | + if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { | ||
145 | + /* HCR.DC means HCR.VM behaves as 1 */ | ||
146 | + return (hcr_el2 & (HCR_DC | HCR_VM)) == 0; | ||
147 | + } | ||
148 | + | ||
149 | + if (hcr_el2 & HCR_TGE) { | ||
150 | + /* TGE means that NS EL0/1 act as if SCTLR_EL1.M is zero */ | ||
151 | + if (!regime_is_secure(env, mmu_idx) && regime_el(env, mmu_idx) == 1) { | ||
152 | + return true; | ||
153 | + } | ||
154 | + } | ||
155 | + | ||
156 | + if ((hcr_el2 & HCR_DC) && arm_mmu_idx_is_stage1_of_2(mmu_idx)) { | ||
157 | + /* HCR.DC means SCTLR_EL1.M behaves as 0 */ | ||
158 | + return true; | ||
159 | + } | ||
160 | + | ||
161 | + return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0; | ||
162 | +} | 51 | +} |
163 | + | 52 | + |
164 | static bool ptw_attrs_are_device(CPUARMState *env, ARMCacheAttrs cacheattrs) | 53 | static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data, |
54 | gen_helper_gvec_3_ptr * const f[3]) | ||
165 | { | 55 | { |
166 | /* | 56 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { |
57 | gen_helper_gvec_fmax_s, | ||
58 | gen_helper_gvec_fmax_d, | ||
59 | }; | ||
60 | -TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax) | ||
61 | +static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = { | ||
62 | + gen_helper_gvec_ah_fmax_h, | ||
63 | + gen_helper_gvec_ah_fmax_s, | ||
64 | + gen_helper_gvec_ah_fmax_d, | ||
65 | +}; | ||
66 | +TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah) | ||
67 | |||
68 | static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { | ||
69 | gen_helper_gvec_fmin_h, | ||
70 | gen_helper_gvec_fmin_s, | ||
71 | gen_helper_gvec_fmin_d, | ||
72 | }; | ||
73 | -TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin) | ||
74 | +static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = { | ||
75 | + gen_helper_gvec_ah_fmin_h, | ||
76 | + gen_helper_gvec_ah_fmin_s, | ||
77 | + gen_helper_gvec_ah_fmin_d, | ||
78 | +}; | ||
79 | +TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah) | ||
80 | |||
81 | static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { | ||
82 | gen_helper_gvec_fmaxnum_h, | ||
83 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
84 | index XXXXXXX..XXXXXXX 100644 | ||
85 | --- a/target/arm/tcg/vec_helper.c | ||
86 | +++ b/target/arm/tcg/vec_helper.c | ||
87 | @@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16) | ||
88 | DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32) | ||
89 | DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64) | ||
90 | |||
91 | +DO_3OP(gvec_ah_fmax_h, helper_vfp_ah_maxh, float16) | ||
92 | +DO_3OP(gvec_ah_fmax_s, helper_vfp_ah_maxs, float32) | ||
93 | +DO_3OP(gvec_ah_fmax_d, helper_vfp_ah_maxd, float64) | ||
94 | + | ||
95 | +DO_3OP(gvec_ah_fmin_h, helper_vfp_ah_minh, float16) | ||
96 | +DO_3OP(gvec_ah_fmin_s, helper_vfp_ah_mins, float32) | ||
97 | +DO_3OP(gvec_ah_fmin_d, helper_vfp_ah_mind, float64) | ||
98 | + | ||
99 | #endif | ||
100 | #undef DO_3OP | ||
101 | |||
167 | -- | 102 | -- |
168 | 2.25.1 | 103 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Implement the FPCR.AH semantics for FMAXV and FMINV. These are the |
---|---|---|---|
2 | "recursively reduce all lanes of a vector to a scalar result" insns; | ||
3 | we just need to use the _ah_ helper for the reduction step when | ||
4 | FPCR.AH == 1. | ||
2 | 5 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | Message-id: 20220604040607.269301-7-richard.henderson@linaro.org | ||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | 8 | --- |
8 | target/arm/ptw.h | 3 +++ | 9 | target/arm/tcg/translate-a64.c | 28 ++++++++++++++++++---------- |
9 | target/arm/helper.c | 41 ----------------------------------------- | 10 | 1 file changed, 18 insertions(+), 10 deletions(-) |
10 | target/arm/ptw.c | 41 +++++++++++++++++++++++++++++++++++++++++ | ||
11 | 3 files changed, 44 insertions(+), 41 deletions(-) | ||
12 | 11 | ||
13 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | 12 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
14 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/arm/ptw.h | 14 | --- a/target/arm/tcg/translate-a64.c |
16 | +++ b/target/arm/ptw.h | 15 | +++ b/target/arm/tcg/translate-a64.c |
17 | @@ -XXX,XX +XXX,XX @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) | 16 | @@ -XXX,XX +XXX,XX @@ static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz, |
18 | return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx)); | ||
19 | } | 17 | } |
20 | 18 | ||
21 | +void get_phys_addr_pmsav7_default(CPUARMState *env, | 19 | static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, |
22 | + ARMMMUIdx mmu_idx, | 20 | - NeonGenTwoSingleOpFn *fn) |
23 | + int32_t address, int *prot); | 21 | + NeonGenTwoSingleOpFn *fnormal, |
24 | bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, | 22 | + NeonGenTwoSingleOpFn *fah) |
25 | MMUAccessType access_type, ARMMMUIdx mmu_idx, | 23 | { |
26 | hwaddr *phys_ptr, int *prot, | 24 | if (fp_access_check(s)) { |
27 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 25 | MemOp esz = a->esz; |
28 | index XXXXXXX..XXXXXXX 100644 | 26 | int elts = (a->q ? 16 : 8) >> esz; |
29 | --- a/target/arm/helper.c | 27 | TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); |
30 | +++ b/target/arm/helper.c | 28 | - TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn); |
31 | @@ -XXX,XX +XXX,XX @@ do_fault: | 29 | + TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, |
30 | + s->fpcr_ah ? fah : fnormal); | ||
31 | write_fp_sreg(s, a->rd, res); | ||
32 | } | ||
32 | return true; | 33 | return true; |
33 | } | 34 | } |
34 | 35 | ||
35 | -static inline void get_phys_addr_pmsav7_default(CPUARMState *env, | 36 | -TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxnumh) |
36 | - ARMMMUIdx mmu_idx, | 37 | -TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minnumh) |
37 | - int32_t address, int *prot) | 38 | -TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxh) |
38 | -{ | 39 | -TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minh) |
39 | - if (!arm_feature(env, ARM_FEATURE_M)) { | 40 | +TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, |
40 | - *prot = PAGE_READ | PAGE_WRITE; | 41 | + gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh) |
41 | - switch (address) { | 42 | +TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, |
42 | - case 0xF0000000 ... 0xFFFFFFFF: | 43 | + gen_helper_vfp_minnumh, gen_helper_vfp_minnumh) |
43 | - if (regime_sctlr(env, mmu_idx) & SCTLR_V) { | 44 | +TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, |
44 | - /* hivecs execing is ok */ | 45 | + gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh) |
45 | - *prot |= PAGE_EXEC; | 46 | +TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, |
46 | - } | 47 | + gen_helper_vfp_minh, gen_helper_vfp_ah_minh) |
47 | - break; | 48 | |
48 | - case 0x00000000 ... 0x7FFFFFFF: | 49 | -TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums) |
49 | - *prot |= PAGE_EXEC; | 50 | -TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums) |
50 | - break; | 51 | -TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs) |
51 | - } | 52 | -TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins) |
52 | - } else { | 53 | +TRANS(FMAXNMV_s, do_fp_reduction, a, |
53 | - /* Default system address map for M profile cores. | 54 | + gen_helper_vfp_maxnums, gen_helper_vfp_maxnums) |
54 | - * The architecture specifies which regions are execute-never; | 55 | +TRANS(FMINNMV_s, do_fp_reduction, a, |
55 | - * at the MPU level no other checks are defined. | 56 | + gen_helper_vfp_minnums, gen_helper_vfp_minnums) |
56 | - */ | 57 | +TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs) |
57 | - switch (address) { | 58 | +TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins) |
58 | - case 0x00000000 ... 0x1fffffff: /* ROM */ | 59 | |
59 | - case 0x20000000 ... 0x3fffffff: /* SRAM */ | 60 | /* |
60 | - case 0x60000000 ... 0x7fffffff: /* RAM */ | 61 | * Floating-point Immediate |
61 | - case 0x80000000 ... 0x9fffffff: /* RAM */ | ||
62 | - *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; | ||
63 | - break; | ||
64 | - case 0x40000000 ... 0x5fffffff: /* Peripheral */ | ||
65 | - case 0xa0000000 ... 0xbfffffff: /* Device */ | ||
66 | - case 0xc0000000 ... 0xdfffffff: /* Device */ | ||
67 | - case 0xe0000000 ... 0xffffffff: /* System */ | ||
68 | - *prot = PAGE_READ | PAGE_WRITE; | ||
69 | - break; | ||
70 | - default: | ||
71 | - g_assert_not_reached(); | ||
72 | - } | ||
73 | - } | ||
74 | -} | ||
75 | - | ||
76 | static bool pmsav7_use_background_region(ARMCPU *cpu, | ||
77 | ARMMMUIdx mmu_idx, bool is_user) | ||
78 | { | ||
79 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
80 | index XXXXXXX..XXXXXXX 100644 | ||
81 | --- a/target/arm/ptw.c | ||
82 | +++ b/target/arm/ptw.c | ||
83 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address, | ||
84 | return false; | ||
85 | } | ||
86 | |||
87 | +void get_phys_addr_pmsav7_default(CPUARMState *env, | ||
88 | + ARMMMUIdx mmu_idx, | ||
89 | + int32_t address, int *prot) | ||
90 | +{ | ||
91 | + if (!arm_feature(env, ARM_FEATURE_M)) { | ||
92 | + *prot = PAGE_READ | PAGE_WRITE; | ||
93 | + switch (address) { | ||
94 | + case 0xF0000000 ... 0xFFFFFFFF: | ||
95 | + if (regime_sctlr(env, mmu_idx) & SCTLR_V) { | ||
96 | + /* hivecs execing is ok */ | ||
97 | + *prot |= PAGE_EXEC; | ||
98 | + } | ||
99 | + break; | ||
100 | + case 0x00000000 ... 0x7FFFFFFF: | ||
101 | + *prot |= PAGE_EXEC; | ||
102 | + break; | ||
103 | + } | ||
104 | + } else { | ||
105 | + /* Default system address map for M profile cores. | ||
106 | + * The architecture specifies which regions are execute-never; | ||
107 | + * at the MPU level no other checks are defined. | ||
108 | + */ | ||
109 | + switch (address) { | ||
110 | + case 0x00000000 ... 0x1fffffff: /* ROM */ | ||
111 | + case 0x20000000 ... 0x3fffffff: /* SRAM */ | ||
112 | + case 0x60000000 ... 0x7fffffff: /* RAM */ | ||
113 | + case 0x80000000 ... 0x9fffffff: /* RAM */ | ||
114 | + *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; | ||
115 | + break; | ||
116 | + case 0x40000000 ... 0x5fffffff: /* Peripheral */ | ||
117 | + case 0xa0000000 ... 0xbfffffff: /* Device */ | ||
118 | + case 0xc0000000 ... 0xdfffffff: /* Device */ | ||
119 | + case 0xe0000000 ... 0xffffffff: /* System */ | ||
120 | + *prot = PAGE_READ | PAGE_WRITE; | ||
121 | + break; | ||
122 | + default: | ||
123 | + g_assert_not_reached(); | ||
124 | + } | ||
125 | + } | ||
126 | +} | ||
127 | + | ||
128 | /** | ||
129 | * get_phys_addr - get the physical address for this virtual address | ||
130 | * | ||
131 | -- | 62 | -- |
132 | 2.25.1 | 63 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Implement the FPCR.AH semantics for the pairwise floating |
---|---|---|---|
2 | point minimum/maximum insns FMINP and FMAXP. | ||
2 | 3 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | Message-id: 20220604040607.269301-13-richard.henderson@linaro.org | ||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | 6 | --- |
8 | target/arm/ptw.h | 3 --- | 7 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ |
9 | target/arm/helper.c | 15 --------------- | 8 | target/arm/tcg/translate-a64.c | 25 +++++++++++++++++++++---- |
10 | target/arm/ptw.c | 16 ++++++++++++++++ | 9 | target/arm/tcg/vec_helper.c | 10 ++++++++++ |
11 | 3 files changed, 16 insertions(+), 18 deletions(-) | 10 | 3 files changed, 45 insertions(+), 4 deletions(-) |
12 | 11 | ||
13 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | 12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h |
14 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/arm/ptw.h | 14 | --- a/target/arm/tcg/helper-sve.h |
16 | +++ b/target/arm/ptw.h | 15 | +++ b/target/arm/tcg/helper-sve.h |
17 | @@ -XXX,XX +XXX,XX @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) | 16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_ah_fmin_s, TCG_CALL_NO_RWG, |
18 | return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx)); | 17 | DEF_HELPER_FLAGS_5(gvec_ah_fmin_d, TCG_CALL_NO_RWG, |
19 | } | 18 | void, ptr, ptr, ptr, fpst, i32) |
20 | 19 | ||
21 | -bool m_is_ppb_region(CPUARMState *env, uint32_t address); | 20 | +DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_h, TCG_CALL_NO_RWG, |
22 | -bool m_is_system_region(CPUARMState *env, uint32_t address); | 21 | + void, ptr, ptr, ptr, fpst, i32) |
23 | - | 22 | +DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_s, TCG_CALL_NO_RWG, |
24 | bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, | 23 | + void, ptr, ptr, ptr, fpst, i32) |
25 | MMUAccessType access_type, ARMMMUIdx mmu_idx, | 24 | +DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_d, TCG_CALL_NO_RWG, |
26 | bool s1_is_el0, | 25 | + void, ptr, ptr, ptr, fpst, i32) |
27 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 26 | + |
27 | +DEF_HELPER_FLAGS_5(gvec_ah_fminp_h, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_5(gvec_ah_fminp_s, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_5(gvec_ah_fminp_d, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_4(sve_faddv_h, TCG_CALL_NO_RWG, | ||
35 | i64, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_4(sve_faddv_s, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | 38 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/target/arm/helper.c | 39 | --- a/target/arm/tcg/translate-a64.c |
30 | +++ b/target/arm/helper.c | 40 | +++ b/target/arm/tcg/translate-a64.c |
31 | @@ -XXX,XX +XXX,XX @@ do_fault: | 41 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { |
42 | gen_helper_gvec_fmaxp_s, | ||
43 | gen_helper_gvec_fmaxp_d, | ||
44 | }; | ||
45 | -TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp) | ||
46 | +static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = { | ||
47 | + gen_helper_gvec_ah_fmaxp_h, | ||
48 | + gen_helper_gvec_ah_fmaxp_s, | ||
49 | + gen_helper_gvec_ah_fmaxp_d, | ||
50 | +}; | ||
51 | +TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp) | ||
52 | |||
53 | static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { | ||
54 | gen_helper_gvec_fminp_h, | ||
55 | gen_helper_gvec_fminp_s, | ||
56 | gen_helper_gvec_fminp_d, | ||
57 | }; | ||
58 | -TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp) | ||
59 | +static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = { | ||
60 | + gen_helper_gvec_ah_fminp_h, | ||
61 | + gen_helper_gvec_ah_fminp_s, | ||
62 | + gen_helper_gvec_ah_fminp_d, | ||
63 | +}; | ||
64 | +TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp) | ||
65 | |||
66 | static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { | ||
67 | gen_helper_gvec_fmaxnump_h, | ||
68 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) | ||
32 | return true; | 69 | return true; |
33 | } | 70 | } |
34 | 71 | ||
35 | -bool m_is_ppb_region(CPUARMState *env, uint32_t address) | 72 | +static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a, |
36 | -{ | 73 | + const FPScalar *fnormal, |
37 | - /* True if address is in the M profile PPB region 0xe0000000 - 0xe00fffff */ | 74 | + const FPScalar *fah) |
38 | - return arm_feature(env, ARM_FEATURE_M) && | ||
39 | - extract32(address, 20, 12) == 0xe00; | ||
40 | -} | ||
41 | - | ||
42 | -bool m_is_system_region(CPUARMState *env, uint32_t address) | ||
43 | -{ | ||
44 | - /* True if address is in the M profile system region | ||
45 | - * 0xe0000000 - 0xffffffff | ||
46 | - */ | ||
47 | - return arm_feature(env, ARM_FEATURE_M) && extract32(address, 29, 3) == 0x7; | ||
48 | -} | ||
49 | - | ||
50 | /* Combine either inner or outer cacheability attributes for normal | ||
51 | * memory, according to table D4-42 and pseudocode procedure | ||
52 | * CombineS1S2AttrHints() of ARM DDI 0487B.b (the ARMv8 ARM). | ||
53 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
54 | index XXXXXXX..XXXXXXX 100644 | ||
55 | --- a/target/arm/ptw.c | ||
56 | +++ b/target/arm/ptw.c | ||
57 | @@ -XXX,XX +XXX,XX @@ static void get_phys_addr_pmsav7_default(CPUARMState *env, ARMMMUIdx mmu_idx, | ||
58 | } | ||
59 | } | ||
60 | |||
61 | +static bool m_is_ppb_region(CPUARMState *env, uint32_t address) | ||
62 | +{ | 75 | +{ |
63 | + /* True if address is in the M profile PPB region 0xe0000000 - 0xe00fffff */ | 76 | + return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal); |
64 | + return arm_feature(env, ARM_FEATURE_M) && | ||
65 | + extract32(address, 20, 12) == 0xe00; | ||
66 | +} | 77 | +} |
67 | + | 78 | + |
68 | +static bool m_is_system_region(CPUARMState *env, uint32_t address) | 79 | TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) |
69 | +{ | 80 | -TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax) |
70 | + /* | 81 | -TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin) |
71 | + * True if address is in the M profile system region | 82 | +TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah) |
72 | + * 0xe0000000 - 0xffffffff | 83 | +TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah) |
73 | + */ | 84 | TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) |
74 | + return arm_feature(env, ARM_FEATURE_M) && extract32(address, 29, 3) == 0x7; | 85 | TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) |
75 | +} | 86 | |
87 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
88 | index XXXXXXX..XXXXXXX 100644 | ||
89 | --- a/target/arm/tcg/vec_helper.c | ||
90 | +++ b/target/arm/tcg/vec_helper.c | ||
91 | @@ -XXX,XX +XXX,XX @@ DO_3OP_PAIR(gvec_fminnump_h, float16_minnum, float16, H2) | ||
92 | DO_3OP_PAIR(gvec_fminnump_s, float32_minnum, float32, H4) | ||
93 | DO_3OP_PAIR(gvec_fminnump_d, float64_minnum, float64, ) | ||
94 | |||
95 | +#ifdef TARGET_AARCH64 | ||
96 | +DO_3OP_PAIR(gvec_ah_fmaxp_h, helper_vfp_ah_maxh, float16, H2) | ||
97 | +DO_3OP_PAIR(gvec_ah_fmaxp_s, helper_vfp_ah_maxs, float32, H4) | ||
98 | +DO_3OP_PAIR(gvec_ah_fmaxp_d, helper_vfp_ah_maxd, float64, ) | ||
76 | + | 99 | + |
77 | static bool pmsav7_use_background_region(ARMCPU *cpu, ARMMMUIdx mmu_idx, | 100 | +DO_3OP_PAIR(gvec_ah_fminp_h, helper_vfp_ah_minh, float16, H2) |
78 | bool is_user) | 101 | +DO_3OP_PAIR(gvec_ah_fminp_s, helper_vfp_ah_mins, float32, H4) |
79 | { | 102 | +DO_3OP_PAIR(gvec_ah_fminp_d, helper_vfp_ah_mind, float64, ) |
103 | +#endif | ||
104 | + | ||
105 | #undef DO_3OP_PAIR | ||
106 | |||
107 | #define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \ | ||
80 | -- | 108 | -- |
81 | 2.25.1 | 109 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Implement the FPCR.AH semantics for the SVE FMAXV and FMINV |
---|---|---|---|
2 | vector-reduction-to-scalar max/min operations. | ||
2 | 3 | ||
3 | Put the inline function near the array declaration. | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 +++++++++++ | ||
8 | target/arm/tcg/sve_helper.c | 43 +++++++++++++++++++++------------- | ||
9 | target/arm/tcg/translate-sve.c | 16 +++++++++++-- | ||
10 | 3 files changed, 55 insertions(+), 18 deletions(-) | ||
4 | 11 | ||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h |
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-id: 20220607203306.657998-16-richard.henderson@linaro.org | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | --- | ||
10 | target/arm/vec_internal.h | 8 +++++++- | ||
11 | target/arm/sve_helper.c | 9 --------- | ||
12 | 2 files changed, 7 insertions(+), 10 deletions(-) | ||
13 | |||
14 | diff --git a/target/arm/vec_internal.h b/target/arm/vec_internal.h | ||
15 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/arm/vec_internal.h | 14 | --- a/target/arm/tcg/helper-sve.h |
17 | +++ b/target/arm/vec_internal.h | 15 | +++ b/target/arm/tcg/helper-sve.h |
18 | @@ -XXX,XX +XXX,XX @@ | 16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fminv_s, TCG_CALL_NO_RWG, |
19 | #define H8(x) (x) | 17 | DEF_HELPER_FLAGS_4(sve_fminv_d, TCG_CALL_NO_RWG, |
20 | #define H1_8(x) (x) | 18 | i64, ptr, ptr, fpst, i32) |
21 | 19 | ||
22 | -/* Data for expanding active predicate bits to bytes, for byte elements. */ | 20 | +DEF_HELPER_FLAGS_4(sve_ah_fmaxv_h, TCG_CALL_NO_RWG, |
23 | +/* | 21 | + i64, ptr, ptr, fpst, i32) |
24 | + * Expand active predicate bits to bytes, for byte elements. | 22 | +DEF_HELPER_FLAGS_4(sve_ah_fmaxv_s, TCG_CALL_NO_RWG, |
25 | + */ | 23 | + i64, ptr, ptr, fpst, i32) |
26 | extern const uint64_t expand_pred_b_data[256]; | 24 | +DEF_HELPER_FLAGS_4(sve_ah_fmaxv_d, TCG_CALL_NO_RWG, |
27 | +static inline uint64_t expand_pred_b(uint8_t byte) | 25 | + i64, ptr, ptr, fpst, i32) |
28 | +{ | 26 | + |
29 | + return expand_pred_b_data[byte]; | 27 | +DEF_HELPER_FLAGS_4(sve_ah_fminv_h, TCG_CALL_NO_RWG, |
30 | +} | 28 | + i64, ptr, ptr, fpst, i32) |
31 | 29 | +DEF_HELPER_FLAGS_4(sve_ah_fminv_s, TCG_CALL_NO_RWG, | |
32 | static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) | 30 | + i64, ptr, ptr, fpst, i32) |
33 | { | 31 | +DEF_HELPER_FLAGS_4(sve_ah_fminv_d, TCG_CALL_NO_RWG, |
34 | diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c | 32 | + i64, ptr, ptr, fpst, i32) |
33 | + | ||
34 | DEF_HELPER_FLAGS_5(sve_fadda_h, TCG_CALL_NO_RWG, | ||
35 | i64, i64, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_5(sve_fadda_s, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
35 | index XXXXXXX..XXXXXXX 100644 | 38 | index XXXXXXX..XXXXXXX 100644 |
36 | --- a/target/arm/sve_helper.c | 39 | --- a/target/arm/tcg/sve_helper.c |
37 | +++ b/target/arm/sve_helper.c | 40 | +++ b/target/arm/tcg/sve_helper.c |
38 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(sve_predtest)(void *vd, void *vg, uint32_t words) | 41 | @@ -XXX,XX +XXX,XX @@ static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \ |
39 | return flags; | 42 | uintptr_t half = n / 2; \ |
43 | TYPE lo = NAME##_reduce(data, status, half); \ | ||
44 | TYPE hi = NAME##_reduce(data + half, status, half); \ | ||
45 | - return TYPE##_##FUNC(lo, hi, status); \ | ||
46 | + return FUNC(lo, hi, status); \ | ||
47 | } \ | ||
48 | } \ | ||
49 | uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \ | ||
50 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \ | ||
51 | return NAME##_reduce(data, s, maxsz / sizeof(TYPE)); \ | ||
40 | } | 52 | } |
41 | 53 | ||
42 | -/* | 54 | -DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero) |
43 | - * Expand active predicate bits to bytes, for byte elements. | 55 | -DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero) |
44 | - * (The data table itself is in vec_helper.c as MVE also needs it.) | 56 | -DO_REDUCE(sve_faddv_d, float64, H1_8, add, float64_zero) |
45 | - */ | 57 | +DO_REDUCE(sve_faddv_h, float16, H1_2, float16_add, float16_zero) |
46 | -static inline uint64_t expand_pred_b(uint8_t byte) | 58 | +DO_REDUCE(sve_faddv_s, float32, H1_4, float32_add, float32_zero) |
47 | -{ | 59 | +DO_REDUCE(sve_faddv_d, float64, H1_8, float64_add, float64_zero) |
48 | - return expand_pred_b_data[byte]; | 60 | |
49 | -} | 61 | /* Identity is floatN_default_nan, without the function call. */ |
50 | - | 62 | -DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00) |
51 | /* Similarly for half-word elements. | 63 | -DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000) |
52 | * for (i = 0; i < 256; ++i) { | 64 | -DO_REDUCE(sve_fminnmv_d, float64, H1_8, minnum, 0x7FF8000000000000ULL) |
53 | * unsigned long m = 0; | 65 | +DO_REDUCE(sve_fminnmv_h, float16, H1_2, float16_minnum, 0x7E00) |
66 | +DO_REDUCE(sve_fminnmv_s, float32, H1_4, float32_minnum, 0x7FC00000) | ||
67 | +DO_REDUCE(sve_fminnmv_d, float64, H1_8, float64_minnum, 0x7FF8000000000000ULL) | ||
68 | |||
69 | -DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00) | ||
70 | -DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000) | ||
71 | -DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, maxnum, 0x7FF8000000000000ULL) | ||
72 | +DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, float16_maxnum, 0x7E00) | ||
73 | +DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, float32_maxnum, 0x7FC00000) | ||
74 | +DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, float64_maxnum, 0x7FF8000000000000ULL) | ||
75 | |||
76 | -DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity) | ||
77 | -DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity) | ||
78 | -DO_REDUCE(sve_fminv_d, float64, H1_8, min, float64_infinity) | ||
79 | +DO_REDUCE(sve_fminv_h, float16, H1_2, float16_min, float16_infinity) | ||
80 | +DO_REDUCE(sve_fminv_s, float32, H1_4, float32_min, float32_infinity) | ||
81 | +DO_REDUCE(sve_fminv_d, float64, H1_8, float64_min, float64_infinity) | ||
82 | |||
83 | -DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity)) | ||
84 | -DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity)) | ||
85 | -DO_REDUCE(sve_fmaxv_d, float64, H1_8, max, float64_chs(float64_infinity)) | ||
86 | +DO_REDUCE(sve_fmaxv_h, float16, H1_2, float16_max, float16_chs(float16_infinity)) | ||
87 | +DO_REDUCE(sve_fmaxv_s, float32, H1_4, float32_max, float32_chs(float32_infinity)) | ||
88 | +DO_REDUCE(sve_fmaxv_d, float64, H1_8, float64_max, float64_chs(float64_infinity)) | ||
89 | + | ||
90 | +DO_REDUCE(sve_ah_fminv_h, float16, H1_2, helper_vfp_ah_minh, float16_infinity) | ||
91 | +DO_REDUCE(sve_ah_fminv_s, float32, H1_4, helper_vfp_ah_mins, float32_infinity) | ||
92 | +DO_REDUCE(sve_ah_fminv_d, float64, H1_8, helper_vfp_ah_mind, float64_infinity) | ||
93 | + | ||
94 | +DO_REDUCE(sve_ah_fmaxv_h, float16, H1_2, helper_vfp_ah_maxh, | ||
95 | + float16_chs(float16_infinity)) | ||
96 | +DO_REDUCE(sve_ah_fmaxv_s, float32, H1_4, helper_vfp_ah_maxs, | ||
97 | + float32_chs(float32_infinity)) | ||
98 | +DO_REDUCE(sve_ah_fmaxv_d, float64, H1_8, helper_vfp_ah_maxd, | ||
99 | + float64_chs(float64_infinity)) | ||
100 | |||
101 | #undef DO_REDUCE | ||
102 | |||
103 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
104 | index XXXXXXX..XXXXXXX 100644 | ||
105 | --- a/target/arm/tcg/translate-sve.c | ||
106 | +++ b/target/arm/tcg/translate-sve.c | ||
107 | @@ -XXX,XX +XXX,XX @@ static bool do_reduce(DisasContext *s, arg_rpr_esz *a, | ||
108 | }; \ | ||
109 | TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz]) | ||
110 | |||
111 | +#define DO_VPZ_AH(NAME, name) \ | ||
112 | + static gen_helper_fp_reduce * const name##_fns[4] = { \ | ||
113 | + NULL, gen_helper_sve_##name##_h, \ | ||
114 | + gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ | ||
115 | + }; \ | ||
116 | + static gen_helper_fp_reduce * const name##_ah_fns[4] = { \ | ||
117 | + NULL, gen_helper_sve_ah_##name##_h, \ | ||
118 | + gen_helper_sve_ah_##name##_s, gen_helper_sve_ah_##name##_d, \ | ||
119 | + }; \ | ||
120 | + TRANS_FEAT(NAME, aa64_sve, do_reduce, a, \ | ||
121 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) | ||
122 | + | ||
123 | DO_VPZ(FADDV, faddv) | ||
124 | DO_VPZ(FMINNMV, fminnmv) | ||
125 | DO_VPZ(FMAXNMV, fmaxnmv) | ||
126 | -DO_VPZ(FMINV, fminv) | ||
127 | -DO_VPZ(FMAXV, fmaxv) | ||
128 | +DO_VPZ_AH(FMINV, fminv) | ||
129 | +DO_VPZ_AH(FMAXV, fmaxv) | ||
130 | |||
131 | #undef DO_VPZ | ||
132 | |||
54 | -- | 133 | -- |
55 | 2.25.1 | 134 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the SVE FMAX and FMIN operations | ||
2 | that take an immediate as the second operand. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 25 +++++++++++++++++++++++-- | ||
10 | 3 files changed, 45 insertions(+), 2 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fmins_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_6(sve_fmins_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, i64, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_6(sve_ah_fmaxs_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_6(sve_ah_fmaxs_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_6(sve_ah_fmaxs_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_6(sve_ah_fmins_h, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_6(sve_ah_fmins_s, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_6(sve_ah_fmins_d, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_5(sve_fcvt_sh, TCG_CALL_NO_RWG, | ||
35 | void, ptr, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_5(sve_fcvt_dh, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/sve_helper.c | ||
40 | +++ b/target/arm/tcg/sve_helper.c | ||
41 | @@ -XXX,XX +XXX,XX @@ DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min) | ||
42 | DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min) | ||
43 | DO_ZPZS_FP(sve_fmins_d, float64, H1_8, float64_min) | ||
44 | |||
45 | +DO_ZPZS_FP(sve_ah_fmaxs_h, float16, H1_2, helper_vfp_ah_maxh) | ||
46 | +DO_ZPZS_FP(sve_ah_fmaxs_s, float32, H1_4, helper_vfp_ah_maxs) | ||
47 | +DO_ZPZS_FP(sve_ah_fmaxs_d, float64, H1_8, helper_vfp_ah_maxd) | ||
48 | + | ||
49 | +DO_ZPZS_FP(sve_ah_fmins_h, float16, H1_2, helper_vfp_ah_minh) | ||
50 | +DO_ZPZS_FP(sve_ah_fmins_s, float32, H1_4, helper_vfp_ah_mins) | ||
51 | +DO_ZPZS_FP(sve_ah_fmins_d, float64, H1_8, helper_vfp_ah_mind) | ||
52 | + | ||
53 | /* Fully general two-operand expander, controlled by a predicate, | ||
54 | * With the extra float_status parameter. | ||
55 | */ | ||
56 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/arm/tcg/translate-sve.c | ||
59 | +++ b/target/arm/tcg/translate-sve.c | ||
60 | @@ -XXX,XX +XXX,XX @@ static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, | ||
61 | TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ | ||
62 | name##_const[a->esz][a->imm], name##_fns[a->esz]) | ||
63 | |||
64 | +#define DO_FP_AH_IMM(NAME, name, const0, const1) \ | ||
65 | + static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ | ||
66 | + NULL, gen_helper_sve_##name##_h, \ | ||
67 | + gen_helper_sve_##name##_s, \ | ||
68 | + gen_helper_sve_##name##_d \ | ||
69 | + }; \ | ||
70 | + static gen_helper_sve_fp2scalar * const name##_ah_fns[4] = { \ | ||
71 | + NULL, gen_helper_sve_ah_##name##_h, \ | ||
72 | + gen_helper_sve_ah_##name##_s, \ | ||
73 | + gen_helper_sve_ah_##name##_d \ | ||
74 | + }; \ | ||
75 | + static uint64_t const name##_const[4][2] = { \ | ||
76 | + { -1, -1 }, \ | ||
77 | + { float16_##const0, float16_##const1 }, \ | ||
78 | + { float32_##const0, float32_##const1 }, \ | ||
79 | + { float64_##const0, float64_##const1 }, \ | ||
80 | + }; \ | ||
81 | + TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ | ||
82 | + name##_const[a->esz][a->imm], \ | ||
83 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) | ||
84 | + | ||
85 | DO_FP_IMM(FADD, fadds, half, one) | ||
86 | DO_FP_IMM(FSUB, fsubs, half, one) | ||
87 | DO_FP_IMM(FMUL, fmuls, half, two) | ||
88 | DO_FP_IMM(FSUBR, fsubrs, half, one) | ||
89 | DO_FP_IMM(FMAXNM, fmaxnms, zero, one) | ||
90 | DO_FP_IMM(FMINNM, fminnms, zero, one) | ||
91 | -DO_FP_IMM(FMAX, fmaxs, zero, one) | ||
92 | -DO_FP_IMM(FMIN, fmins, zero, one) | ||
93 | +DO_FP_AH_IMM(FMAX, fmaxs, zero, one) | ||
94 | +DO_FP_AH_IMM(FMIN, fmins, zero, one) | ||
95 | |||
96 | #undef DO_FP_IMM | ||
97 | |||
98 | -- | ||
99 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the SVE FMAX and FMIN | ||
2 | operations that take two vector operands. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 17 +++++++++++++++-- | ||
10 | 3 files changed, 37 insertions(+), 2 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fmax_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_6(sve_fmax_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_6(sve_ah_fmin_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_6(sve_ah_fmin_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_6(sve_ah_fmin_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_6(sve_ah_fmax_h, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_6(sve_ah_fmax_s, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_6(sve_ah_fmax_d, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_6(sve_fminnum_h, TCG_CALL_NO_RWG, | ||
35 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_6(sve_fminnum_s, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/sve_helper.c | ||
40 | +++ b/target/arm/tcg/sve_helper.c | ||
41 | @@ -XXX,XX +XXX,XX @@ DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max) | ||
42 | DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max) | ||
43 | DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max) | ||
44 | |||
45 | +DO_ZPZZ_FP(sve_ah_fmin_h, uint16_t, H1_2, helper_vfp_ah_minh) | ||
46 | +DO_ZPZZ_FP(sve_ah_fmin_s, uint32_t, H1_4, helper_vfp_ah_mins) | ||
47 | +DO_ZPZZ_FP(sve_ah_fmin_d, uint64_t, H1_8, helper_vfp_ah_mind) | ||
48 | + | ||
49 | +DO_ZPZZ_FP(sve_ah_fmax_h, uint16_t, H1_2, helper_vfp_ah_maxh) | ||
50 | +DO_ZPZZ_FP(sve_ah_fmax_s, uint32_t, H1_4, helper_vfp_ah_maxs) | ||
51 | +DO_ZPZZ_FP(sve_ah_fmax_d, uint64_t, H1_8, helper_vfp_ah_maxd) | ||
52 | + | ||
53 | DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum) | ||
54 | DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum) | ||
55 | DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum) | ||
56 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/arm/tcg/translate-sve.c | ||
59 | +++ b/target/arm/tcg/translate-sve.c | ||
60 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, | ||
61 | }; \ | ||
62 | TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) | ||
63 | |||
64 | +#define DO_ZPZZ_AH_FP(NAME, FEAT, name, ah_name) \ | ||
65 | + static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ | ||
66 | + NULL, gen_helper_##name##_h, \ | ||
67 | + gen_helper_##name##_s, gen_helper_##name##_d \ | ||
68 | + }; \ | ||
69 | + static gen_helper_gvec_4_ptr * const name##_ah_zpzz_fns[4] = { \ | ||
70 | + NULL, gen_helper_##ah_name##_h, \ | ||
71 | + gen_helper_##ah_name##_s, gen_helper_##ah_name##_d \ | ||
72 | + }; \ | ||
73 | + TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, \ | ||
74 | + s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \ | ||
75 | + name##_zpzz_fns[a->esz], a) | ||
76 | + | ||
77 | DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) | ||
78 | DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) | ||
79 | DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) | ||
80 | -DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin) | ||
81 | -DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax) | ||
82 | +DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) | ||
83 | +DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) | ||
84 | DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) | ||
85 | DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) | ||
86 | DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) | ||
87 | -- | ||
88 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | FPCR.AH == 1 mandates that negation of a NaN value should not flip |
---|---|---|---|
2 | 2 | its sign bit. This means we can no longer use gen_vfp_neg*() | |
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 3 | everywhere but must instead generate slightly more complex code when |
4 | Message-id: 20220604040607.269301-6-richard.henderson@linaro.org | 4 | FPCR.AH is set. |
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 5 | |
6 | Make this change for the scalar FNEG and for those places in | ||
7 | translate-a64.c which were previously directly calling | ||
8 | gen_vfp_neg*(). | ||
9 | |||
10 | This change in semantics also affects any other instruction whose | ||
11 | pseudocode calls FPNeg(); in following commits we extend this | ||
12 | change to the other affected instructions. | ||
13 | |||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 14 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
15 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | 16 | --- |
8 | target/arm/ptw.h | 4 --- | 17 | target/arm/tcg/translate-a64.c | 125 ++++++++++++++++++++++++++++++--- |
9 | target/arm/helper.c | 85 --------------------------------------------- | 18 | 1 file changed, 114 insertions(+), 11 deletions(-) |
10 | target/arm/ptw.c | 85 +++++++++++++++++++++++++++++++++++++++++++++ | 19 | |
11 | 3 files changed, 85 insertions(+), 89 deletions(-) | 20 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
12 | |||
13 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/arm/ptw.h | 22 | --- a/target/arm/tcg/translate-a64.c |
16 | +++ b/target/arm/ptw.h | 23 | +++ b/target/arm/tcg/translate-a64.c |
17 | @@ -XXX,XX +XXX,XX @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) | 24 | @@ -XXX,XX +XXX,XX @@ static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, |
18 | return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx)); | 25 | is_q ? 16 : 8, vec_full_reg_size(s), data, fn); |
19 | } | 26 | } |
20 | 27 | ||
21 | -bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address, | 28 | +/* |
22 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | 29 | + * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN. |
23 | - hwaddr *phys_ptr, int *prot, | 30 | + * These functions implement |
24 | - ARMMMUFaultInfo *fi); | 31 | + * d = floatN_is_any_nan(s) ? s : floatN_chs(s) |
25 | bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, | 32 | + * which for float32 is |
26 | MMUAccessType access_type, ARMMMUIdx mmu_idx, | 33 | + * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31)) |
27 | hwaddr *phys_ptr, int *prot, | 34 | + * and similarly for the other float sizes. |
28 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 35 | + */ |
29 | index XXXXXXX..XXXXXXX 100644 | 36 | +static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s) |
30 | --- a/target/arm/helper.c | 37 | +{ |
31 | +++ b/target/arm/helper.c | 38 | + TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); |
32 | @@ -XXX,XX +XXX,XX @@ bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address, | 39 | + |
33 | return ret; | 40 | + gen_vfp_negh(chs_s, s); |
41 | + gen_vfp_absh(abs_s, s); | ||
42 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, | ||
43 | + abs_s, tcg_constant_i32(0x7c00), | ||
44 | + s, chs_s); | ||
45 | +} | ||
46 | + | ||
47 | +static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s) | ||
48 | +{ | ||
49 | + TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); | ||
50 | + | ||
51 | + gen_vfp_negs(chs_s, s); | ||
52 | + gen_vfp_abss(abs_s, s); | ||
53 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, | ||
54 | + abs_s, tcg_constant_i32(0x7f800000UL), | ||
55 | + s, chs_s); | ||
56 | +} | ||
57 | + | ||
58 | +static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s) | ||
59 | +{ | ||
60 | + TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64(); | ||
61 | + | ||
62 | + gen_vfp_negd(chs_s, s); | ||
63 | + gen_vfp_absd(abs_s, s); | ||
64 | + tcg_gen_movcond_i64(TCG_COND_GTU, d, | ||
65 | + abs_s, tcg_constant_i64(0x7ff0000000000000ULL), | ||
66 | + s, chs_s); | ||
67 | +} | ||
68 | + | ||
69 | +static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) | ||
70 | +{ | ||
71 | + if (dc->fpcr_ah) { | ||
72 | + gen_vfp_ah_negh(d, s); | ||
73 | + } else { | ||
74 | + gen_vfp_negh(d, s); | ||
75 | + } | ||
76 | +} | ||
77 | + | ||
78 | +static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) | ||
79 | +{ | ||
80 | + if (dc->fpcr_ah) { | ||
81 | + gen_vfp_ah_negs(d, s); | ||
82 | + } else { | ||
83 | + gen_vfp_negs(d, s); | ||
84 | + } | ||
85 | +} | ||
86 | + | ||
87 | +static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s) | ||
88 | +{ | ||
89 | + if (dc->fpcr_ah) { | ||
90 | + gen_vfp_ah_negd(d, s); | ||
91 | + } else { | ||
92 | + gen_vfp_negd(d, s); | ||
93 | + } | ||
94 | +} | ||
95 | + | ||
96 | /* Set ZF and NF based on a 64 bit result. This is alas fiddlier | ||
97 | * than the 32 bit equivalent. | ||
98 | */ | ||
99 | @@ -XXX,XX +XXX,XX @@ static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) | ||
100 | gen_vfp_negd(d, d); | ||
34 | } | 101 | } |
35 | 102 | ||
36 | -bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address, | 103 | +static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) |
37 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | 104 | +{ |
38 | - hwaddr *phys_ptr, int *prot, | 105 | + gen_helper_vfp_mulh(d, n, m, s); |
39 | - ARMMMUFaultInfo *fi) | 106 | + gen_vfp_ah_negh(d, d); |
40 | -{ | 107 | +} |
41 | - int n; | 108 | + |
42 | - uint32_t mask; | 109 | +static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) |
43 | - uint32_t base; | 110 | +{ |
44 | - bool is_user = regime_is_user(env, mmu_idx); | 111 | + gen_helper_vfp_muls(d, n, m, s); |
45 | - | 112 | + gen_vfp_ah_negs(d, d); |
46 | - if (regime_translation_disabled(env, mmu_idx)) { | 113 | +} |
47 | - /* MPU disabled. */ | 114 | + |
48 | - *phys_ptr = address; | 115 | +static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) |
49 | - *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; | 116 | +{ |
50 | - return false; | 117 | + gen_helper_vfp_muld(d, n, m, s); |
51 | - } | 118 | + gen_vfp_ah_negd(d, d); |
52 | - | 119 | +} |
53 | - *phys_ptr = address; | 120 | + |
54 | - for (n = 7; n >= 0; n--) { | 121 | static const FPScalar f_scalar_fnmul = { |
55 | - base = env->cp15.c6_region[n]; | 122 | gen_fnmul_h, |
56 | - if ((base & 1) == 0) { | 123 | gen_fnmul_s, |
57 | - continue; | 124 | gen_fnmul_d, |
58 | - } | 125 | }; |
59 | - mask = 1 << ((base >> 1) & 0x1f); | 126 | -TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul, a->rn) |
60 | - /* Keep this shift separate from the above to avoid an | 127 | +static const FPScalar f_scalar_ah_fnmul = { |
61 | - (undefined) << 32. */ | 128 | + gen_fnmul_ah_h, |
62 | - mask = (mask << 1) - 1; | 129 | + gen_fnmul_ah_s, |
63 | - if (((base ^ address) & ~mask) == 0) { | 130 | + gen_fnmul_ah_d, |
64 | - break; | 131 | +}; |
65 | - } | 132 | +TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn) |
66 | - } | 133 | |
67 | - if (n < 0) { | 134 | static const FPScalar f_scalar_fcmeq = { |
68 | - fi->type = ARMFault_Background; | 135 | gen_helper_advsimd_ceq_f16, |
69 | - return true; | 136 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) |
70 | - } | 137 | |
71 | - | 138 | read_vec_element(s, t2, a->rm, a->idx, MO_64); |
72 | - if (access_type == MMU_INST_FETCH) { | 139 | if (neg) { |
73 | - mask = env->cp15.pmsav5_insn_ap; | 140 | - gen_vfp_negd(t1, t1); |
74 | - } else { | 141 | + gen_vfp_maybe_ah_negd(s, t1, t1); |
75 | - mask = env->cp15.pmsav5_data_ap; | 142 | } |
76 | - } | 143 | gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); |
77 | - mask = (mask >> (n * 4)) & 0xf; | 144 | write_fp_dreg_merging(s, a->rd, a->rd, t0); |
78 | - switch (mask) { | 145 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) |
79 | - case 0: | 146 | |
80 | - fi->type = ARMFault_Permission; | 147 | read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); |
81 | - fi->level = 1; | 148 | if (neg) { |
82 | - return true; | 149 | - gen_vfp_negs(t1, t1); |
83 | - case 1: | 150 | + gen_vfp_maybe_ah_negs(s, t1, t1); |
84 | - if (is_user) { | 151 | } |
85 | - fi->type = ARMFault_Permission; | 152 | gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); |
86 | - fi->level = 1; | 153 | write_fp_sreg_merging(s, a->rd, a->rd, t0); |
87 | - return true; | 154 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) |
88 | - } | 155 | |
89 | - *prot = PAGE_READ | PAGE_WRITE; | 156 | read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); |
90 | - break; | 157 | if (neg) { |
91 | - case 2: | 158 | - gen_vfp_negh(t1, t1); |
92 | - *prot = PAGE_READ; | 159 | + gen_vfp_maybe_ah_negh(s, t1, t1); |
93 | - if (!is_user) { | 160 | } |
94 | - *prot |= PAGE_WRITE; | 161 | gen_helper_advsimd_muladdh(t0, t1, t2, t0, |
95 | - } | 162 | fpstatus_ptr(FPST_A64_F16)); |
96 | - break; | 163 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) |
97 | - case 3: | 164 | TCGv_i64 ta = read_fp_dreg(s, a->ra); |
98 | - *prot = PAGE_READ | PAGE_WRITE; | 165 | |
99 | - break; | 166 | if (neg_a) { |
100 | - case 5: | 167 | - gen_vfp_negd(ta, ta); |
101 | - if (is_user) { | 168 | + gen_vfp_maybe_ah_negd(s, ta, ta); |
102 | - fi->type = ARMFault_Permission; | 169 | } |
103 | - fi->level = 1; | 170 | if (neg_n) { |
104 | - return true; | 171 | - gen_vfp_negd(tn, tn); |
105 | - } | 172 | + gen_vfp_maybe_ah_negd(s, tn, tn); |
106 | - *prot = PAGE_READ; | 173 | } |
107 | - break; | 174 | fpst = fpstatus_ptr(FPST_A64); |
108 | - case 6: | 175 | gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); |
109 | - *prot = PAGE_READ; | 176 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) |
110 | - break; | 177 | TCGv_i32 ta = read_fp_sreg(s, a->ra); |
111 | - default: | 178 | |
112 | - /* Bad permission. */ | 179 | if (neg_a) { |
113 | - fi->type = ARMFault_Permission; | 180 | - gen_vfp_negs(ta, ta); |
114 | - fi->level = 1; | 181 | + gen_vfp_maybe_ah_negs(s, ta, ta); |
115 | - return true; | 182 | } |
116 | - } | 183 | if (neg_n) { |
117 | - *prot |= PAGE_EXEC; | 184 | - gen_vfp_negs(tn, tn); |
118 | - return false; | 185 | + gen_vfp_maybe_ah_negs(s, tn, tn); |
119 | -} | 186 | } |
120 | - | 187 | fpst = fpstatus_ptr(FPST_A64); |
121 | /* Combine either inner or outer cacheability attributes for normal | 188 | gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); |
122 | * memory, according to table D4-42 and pseudocode procedure | 189 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) |
123 | * CombineS1S2AttrHints() of ARM DDI 0487B.b (the ARMv8 ARM). | 190 | TCGv_i32 ta = read_fp_hreg(s, a->ra); |
124 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | 191 | |
125 | index XXXXXXX..XXXXXXX 100644 | 192 | if (neg_a) { |
126 | --- a/target/arm/ptw.c | 193 | - gen_vfp_negh(ta, ta); |
127 | +++ b/target/arm/ptw.c | 194 | + gen_vfp_maybe_ah_negh(s, ta, ta); |
128 | @@ -XXX,XX +XXX,XX @@ do_fault: | 195 | } |
196 | if (neg_n) { | ||
197 | - gen_vfp_negh(tn, tn); | ||
198 | + gen_vfp_maybe_ah_negh(s, tn, tn); | ||
199 | } | ||
200 | fpst = fpstatus_ptr(FPST_A64_F16); | ||
201 | gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); | ||
202 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, | ||
129 | return true; | 203 | return true; |
130 | } | 204 | } |
131 | 205 | ||
132 | +static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address, | 206 | +static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a, |
133 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | 207 | + const FPScalar1Int *fnormal, |
134 | + hwaddr *phys_ptr, int *prot, | 208 | + const FPScalar1Int *fah) |
135 | + ARMMMUFaultInfo *fi) | 209 | +{ |
136 | +{ | 210 | + return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true); |
137 | + int n; | 211 | +} |
138 | + uint32_t mask; | 212 | + |
139 | + uint32_t base; | 213 | static const FPScalar1Int f_scalar_fmov = { |
140 | + bool is_user = regime_is_user(env, mmu_idx); | 214 | tcg_gen_mov_i32, |
141 | + | 215 | tcg_gen_mov_i32, |
142 | + if (regime_translation_disabled(env, mmu_idx)) { | 216 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fneg = { |
143 | + /* MPU disabled. */ | 217 | gen_vfp_negs, |
144 | + *phys_ptr = address; | 218 | gen_vfp_negd, |
145 | + *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; | 219 | }; |
146 | + return false; | 220 | -TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg, true) |
147 | + } | 221 | +static const FPScalar1Int f_scalar_ah_fneg = { |
148 | + | 222 | + gen_vfp_ah_negh, |
149 | + *phys_ptr = address; | 223 | + gen_vfp_ah_negs, |
150 | + for (n = 7; n >= 0; n--) { | 224 | + gen_vfp_ah_negd, |
151 | + base = env->cp15.c6_region[n]; | 225 | +}; |
152 | + if ((base & 1) == 0) { | 226 | +TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg) |
153 | + continue; | 227 | |
154 | + } | 228 | typedef struct FPScalar1 { |
155 | + mask = 1 << ((base >> 1) & 0x1f); | 229 | void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); |
156 | + /* Keep this shift separate from the above to avoid an | ||
157 | + (undefined) << 32. */ | ||
158 | + mask = (mask << 1) - 1; | ||
159 | + if (((base ^ address) & ~mask) == 0) { | ||
160 | + break; | ||
161 | + } | ||
162 | + } | ||
163 | + if (n < 0) { | ||
164 | + fi->type = ARMFault_Background; | ||
165 | + return true; | ||
166 | + } | ||
167 | + | ||
168 | + if (access_type == MMU_INST_FETCH) { | ||
169 | + mask = env->cp15.pmsav5_insn_ap; | ||
170 | + } else { | ||
171 | + mask = env->cp15.pmsav5_data_ap; | ||
172 | + } | ||
173 | + mask = (mask >> (n * 4)) & 0xf; | ||
174 | + switch (mask) { | ||
175 | + case 0: | ||
176 | + fi->type = ARMFault_Permission; | ||
177 | + fi->level = 1; | ||
178 | + return true; | ||
179 | + case 1: | ||
180 | + if (is_user) { | ||
181 | + fi->type = ARMFault_Permission; | ||
182 | + fi->level = 1; | ||
183 | + return true; | ||
184 | + } | ||
185 | + *prot = PAGE_READ | PAGE_WRITE; | ||
186 | + break; | ||
187 | + case 2: | ||
188 | + *prot = PAGE_READ; | ||
189 | + if (!is_user) { | ||
190 | + *prot |= PAGE_WRITE; | ||
191 | + } | ||
192 | + break; | ||
193 | + case 3: | ||
194 | + *prot = PAGE_READ | PAGE_WRITE; | ||
195 | + break; | ||
196 | + case 5: | ||
197 | + if (is_user) { | ||
198 | + fi->type = ARMFault_Permission; | ||
199 | + fi->level = 1; | ||
200 | + return true; | ||
201 | + } | ||
202 | + *prot = PAGE_READ; | ||
203 | + break; | ||
204 | + case 6: | ||
205 | + *prot = PAGE_READ; | ||
206 | + break; | ||
207 | + default: | ||
208 | + /* Bad permission. */ | ||
209 | + fi->type = ARMFault_Permission; | ||
210 | + fi->level = 1; | ||
211 | + return true; | ||
212 | + } | ||
213 | + *prot |= PAGE_EXEC; | ||
214 | + return false; | ||
215 | +} | ||
216 | + | ||
217 | /** | ||
218 | * get_phys_addr - get the physical address for this virtual address | ||
219 | * | ||
220 | -- | 230 | -- |
221 | 2.25.1 | 231 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | FPCR.AH == 1 mandates that taking the absolute value of a NaN should |
---|---|---|---|
2 | not change its sign bit. This means we can no longer use | ||
3 | gen_vfp_abs*() everywhere but must instead generate slightly more | ||
4 | complex code when FPCR.AH is set. | ||
2 | 5 | ||
3 | There are a handful of helpers for combine_cacheattrs | 6 | Implement these semantics for scalar FABS and FABD. This change also |
4 | that we can move at the same time as the main entry point. | 7 | affects all other instructions whose psuedocode calls FPAbs(); we |
8 | will extend the change to those instructions in following commits. | ||
5 | 9 | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-id: 20220604040607.269301-15-richard.henderson@linaro.org | ||
8 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | --- | 12 | --- |
11 | target/arm/ptw.h | 3 - | 13 | target/arm/tcg/translate-a64.c | 69 +++++++++++++++++++++++++++++++++- |
12 | target/arm/helper.c | 218 ------------------------------------------- | 14 | 1 file changed, 67 insertions(+), 2 deletions(-) |
13 | target/arm/ptw.c | 221 ++++++++++++++++++++++++++++++++++++++++++++ | ||
14 | 3 files changed, 221 insertions(+), 221 deletions(-) | ||
15 | 15 | ||
16 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | 16 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
17 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/target/arm/ptw.h | 18 | --- a/target/arm/tcg/translate-a64.c |
19 | +++ b/target/arm/ptw.h | 19 | +++ b/target/arm/tcg/translate-a64.c |
20 | @@ -XXX,XX +XXX,XX @@ bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx); | 20 | @@ -XXX,XX +XXX,XX @@ static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s) |
21 | bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx); | 21 | s, chs_s); |
22 | uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn); | ||
23 | |||
24 | -ARMCacheAttrs combine_cacheattrs(CPUARMState *env, | ||
25 | - ARMCacheAttrs s1, ARMCacheAttrs s2); | ||
26 | - | ||
27 | int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, | ||
28 | int ap, int domain_prot); | ||
29 | int simple_ap_to_rw_prot_is_user(int ap, bool is_user); | ||
30 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/target/arm/helper.c | ||
33 | +++ b/target/arm/helper.c | ||
34 | @@ -XXX,XX +XXX,XX @@ static bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level, | ||
35 | } | ||
36 | return true; | ||
37 | } | 22 | } |
38 | - | ||
39 | -/* Translate from the 4-bit stage 2 representation of | ||
40 | - * memory attributes (without cache-allocation hints) to | ||
41 | - * the 8-bit representation of the stage 1 MAIR registers | ||
42 | - * (which includes allocation hints). | ||
43 | - * | ||
44 | - * ref: shared/translation/attrs/S2AttrDecode() | ||
45 | - * .../S2ConvertAttrsHints() | ||
46 | - */ | ||
47 | -static uint8_t convert_stage2_attrs(CPUARMState *env, uint8_t s2attrs) | ||
48 | -{ | ||
49 | - uint8_t hiattr = extract32(s2attrs, 2, 2); | ||
50 | - uint8_t loattr = extract32(s2attrs, 0, 2); | ||
51 | - uint8_t hihint = 0, lohint = 0; | ||
52 | - | ||
53 | - if (hiattr != 0) { /* normal memory */ | ||
54 | - if (arm_hcr_el2_eff(env) & HCR_CD) { /* cache disabled */ | ||
55 | - hiattr = loattr = 1; /* non-cacheable */ | ||
56 | - } else { | ||
57 | - if (hiattr != 1) { /* Write-through or write-back */ | ||
58 | - hihint = 3; /* RW allocate */ | ||
59 | - } | ||
60 | - if (loattr != 1) { /* Write-through or write-back */ | ||
61 | - lohint = 3; /* RW allocate */ | ||
62 | - } | ||
63 | - } | ||
64 | - } | ||
65 | - | ||
66 | - return (hiattr << 6) | (hihint << 4) | (loattr << 2) | lohint; | ||
67 | -} | ||
68 | #endif /* !CONFIG_USER_ONLY */ | ||
69 | |||
70 | /* This mapping is common between ID_AA64MMFR0.PARANGE and TCR_ELx.{I}PS. */ | ||
71 | @@ -XXX,XX +XXX,XX @@ do_fault: | ||
72 | return true; | ||
73 | } | ||
74 | |||
75 | -/* Combine either inner or outer cacheability attributes for normal | ||
76 | - * memory, according to table D4-42 and pseudocode procedure | ||
77 | - * CombineS1S2AttrHints() of ARM DDI 0487B.b (the ARMv8 ARM). | ||
78 | - * | ||
79 | - * NB: only stage 1 includes allocation hints (RW bits), leading to | ||
80 | - * some asymmetry. | ||
81 | - */ | ||
82 | -static uint8_t combine_cacheattr_nibble(uint8_t s1, uint8_t s2) | ||
83 | -{ | ||
84 | - if (s1 == 4 || s2 == 4) { | ||
85 | - /* non-cacheable has precedence */ | ||
86 | - return 4; | ||
87 | - } else if (extract32(s1, 2, 2) == 0 || extract32(s1, 2, 2) == 2) { | ||
88 | - /* stage 1 write-through takes precedence */ | ||
89 | - return s1; | ||
90 | - } else if (extract32(s2, 2, 2) == 2) { | ||
91 | - /* stage 2 write-through takes precedence, but the allocation hint | ||
92 | - * is still taken from stage 1 | ||
93 | - */ | ||
94 | - return (2 << 2) | extract32(s1, 0, 2); | ||
95 | - } else { /* write-back */ | ||
96 | - return s1; | ||
97 | - } | ||
98 | -} | ||
99 | - | ||
100 | -/* | ||
101 | - * Combine the memory type and cacheability attributes of | ||
102 | - * s1 and s2 for the HCR_EL2.FWB == 0 case, returning the | ||
103 | - * combined attributes in MAIR_EL1 format. | ||
104 | - */ | ||
105 | -static uint8_t combined_attrs_nofwb(CPUARMState *env, | ||
106 | - ARMCacheAttrs s1, ARMCacheAttrs s2) | ||
107 | -{ | ||
108 | - uint8_t s1lo, s2lo, s1hi, s2hi, s2_mair_attrs, ret_attrs; | ||
109 | - | ||
110 | - s2_mair_attrs = convert_stage2_attrs(env, s2.attrs); | ||
111 | - | ||
112 | - s1lo = extract32(s1.attrs, 0, 4); | ||
113 | - s2lo = extract32(s2_mair_attrs, 0, 4); | ||
114 | - s1hi = extract32(s1.attrs, 4, 4); | ||
115 | - s2hi = extract32(s2_mair_attrs, 4, 4); | ||
116 | - | ||
117 | - /* Combine memory type and cacheability attributes */ | ||
118 | - if (s1hi == 0 || s2hi == 0) { | ||
119 | - /* Device has precedence over normal */ | ||
120 | - if (s1lo == 0 || s2lo == 0) { | ||
121 | - /* nGnRnE has precedence over anything */ | ||
122 | - ret_attrs = 0; | ||
123 | - } else if (s1lo == 4 || s2lo == 4) { | ||
124 | - /* non-Reordering has precedence over Reordering */ | ||
125 | - ret_attrs = 4; /* nGnRE */ | ||
126 | - } else if (s1lo == 8 || s2lo == 8) { | ||
127 | - /* non-Gathering has precedence over Gathering */ | ||
128 | - ret_attrs = 8; /* nGRE */ | ||
129 | - } else { | ||
130 | - ret_attrs = 0xc; /* GRE */ | ||
131 | - } | ||
132 | - } else { /* Normal memory */ | ||
133 | - /* Outer/inner cacheability combine independently */ | ||
134 | - ret_attrs = combine_cacheattr_nibble(s1hi, s2hi) << 4 | ||
135 | - | combine_cacheattr_nibble(s1lo, s2lo); | ||
136 | - } | ||
137 | - return ret_attrs; | ||
138 | -} | ||
139 | - | ||
140 | -static uint8_t force_cacheattr_nibble_wb(uint8_t attr) | ||
141 | -{ | ||
142 | - /* | ||
143 | - * Given the 4 bits specifying the outer or inner cacheability | ||
144 | - * in MAIR format, return a value specifying Normal Write-Back, | ||
145 | - * with the allocation and transient hints taken from the input | ||
146 | - * if the input specified some kind of cacheable attribute. | ||
147 | - */ | ||
148 | - if (attr == 0 || attr == 4) { | ||
149 | - /* | ||
150 | - * 0 == an UNPREDICTABLE encoding | ||
151 | - * 4 == Non-cacheable | ||
152 | - * Either way, force Write-Back RW allocate non-transient | ||
153 | - */ | ||
154 | - return 0xf; | ||
155 | - } | ||
156 | - /* Change WriteThrough to WriteBack, keep allocation and transient hints */ | ||
157 | - return attr | 4; | ||
158 | -} | ||
159 | - | ||
160 | -/* | ||
161 | - * Combine the memory type and cacheability attributes of | ||
162 | - * s1 and s2 for the HCR_EL2.FWB == 1 case, returning the | ||
163 | - * combined attributes in MAIR_EL1 format. | ||
164 | - */ | ||
165 | -static uint8_t combined_attrs_fwb(CPUARMState *env, | ||
166 | - ARMCacheAttrs s1, ARMCacheAttrs s2) | ||
167 | -{ | ||
168 | - switch (s2.attrs) { | ||
169 | - case 7: | ||
170 | - /* Use stage 1 attributes */ | ||
171 | - return s1.attrs; | ||
172 | - case 6: | ||
173 | - /* | ||
174 | - * Force Normal Write-Back. Note that if S1 is Normal cacheable | ||
175 | - * then we take the allocation hints from it; otherwise it is | ||
176 | - * RW allocate, non-transient. | ||
177 | - */ | ||
178 | - if ((s1.attrs & 0xf0) == 0) { | ||
179 | - /* S1 is Device */ | ||
180 | - return 0xff; | ||
181 | - } | ||
182 | - /* Need to check the Inner and Outer nibbles separately */ | ||
183 | - return force_cacheattr_nibble_wb(s1.attrs & 0xf) | | ||
184 | - force_cacheattr_nibble_wb(s1.attrs >> 4) << 4; | ||
185 | - case 5: | ||
186 | - /* If S1 attrs are Device, use them; otherwise Normal Non-cacheable */ | ||
187 | - if ((s1.attrs & 0xf0) == 0) { | ||
188 | - return s1.attrs; | ||
189 | - } | ||
190 | - return 0x44; | ||
191 | - case 0 ... 3: | ||
192 | - /* Force Device, of subtype specified by S2 */ | ||
193 | - return s2.attrs << 2; | ||
194 | - default: | ||
195 | - /* | ||
196 | - * RESERVED values (including RES0 descriptor bit [5] being nonzero); | ||
197 | - * arbitrarily force Device. | ||
198 | - */ | ||
199 | - return 0; | ||
200 | - } | ||
201 | -} | ||
202 | - | ||
203 | -/* Combine S1 and S2 cacheability/shareability attributes, per D4.5.4 | ||
204 | - * and CombineS1S2Desc() | ||
205 | - * | ||
206 | - * @env: CPUARMState | ||
207 | - * @s1: Attributes from stage 1 walk | ||
208 | - * @s2: Attributes from stage 2 walk | ||
209 | - */ | ||
210 | -ARMCacheAttrs combine_cacheattrs(CPUARMState *env, | ||
211 | - ARMCacheAttrs s1, ARMCacheAttrs s2) | ||
212 | -{ | ||
213 | - ARMCacheAttrs ret; | ||
214 | - bool tagged = false; | ||
215 | - | ||
216 | - assert(s2.is_s2_format && !s1.is_s2_format); | ||
217 | - ret.is_s2_format = false; | ||
218 | - | ||
219 | - if (s1.attrs == 0xf0) { | ||
220 | - tagged = true; | ||
221 | - s1.attrs = 0xff; | ||
222 | - } | ||
223 | - | ||
224 | - /* Combine shareability attributes (table D4-43) */ | ||
225 | - if (s1.shareability == 2 || s2.shareability == 2) { | ||
226 | - /* if either are outer-shareable, the result is outer-shareable */ | ||
227 | - ret.shareability = 2; | ||
228 | - } else if (s1.shareability == 3 || s2.shareability == 3) { | ||
229 | - /* if either are inner-shareable, the result is inner-shareable */ | ||
230 | - ret.shareability = 3; | ||
231 | - } else { | ||
232 | - /* both non-shareable */ | ||
233 | - ret.shareability = 0; | ||
234 | - } | ||
235 | - | ||
236 | - /* Combine memory type and cacheability attributes */ | ||
237 | - if (arm_hcr_el2_eff(env) & HCR_FWB) { | ||
238 | - ret.attrs = combined_attrs_fwb(env, s1, s2); | ||
239 | - } else { | ||
240 | - ret.attrs = combined_attrs_nofwb(env, s1, s2); | ||
241 | - } | ||
242 | - | ||
243 | - /* | ||
244 | - * Any location for which the resultant memory type is any | ||
245 | - * type of Device memory is always treated as Outer Shareable. | ||
246 | - * Any location for which the resultant memory type is Normal | ||
247 | - * Inner Non-cacheable, Outer Non-cacheable is always treated | ||
248 | - * as Outer Shareable. | ||
249 | - * TODO: FEAT_XS adds another value (0x40) also meaning iNCoNC | ||
250 | - */ | ||
251 | - if ((ret.attrs & 0xf0) == 0 || ret.attrs == 0x44) { | ||
252 | - ret.shareability = 2; | ||
253 | - } | ||
254 | - | ||
255 | - /* TODO: CombineS1S2Desc does not consider transient, only WB, RWA. */ | ||
256 | - if (tagged && ret.attrs == 0xff) { | ||
257 | - ret.attrs = 0xf0; | ||
258 | - } | ||
259 | - | ||
260 | - return ret; | ||
261 | -} | ||
262 | - | ||
263 | hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, | ||
264 | MemTxAttrs *attrs) | ||
265 | { | ||
266 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
267 | index XXXXXXX..XXXXXXX 100644 | ||
268 | --- a/target/arm/ptw.c | ||
269 | +++ b/target/arm/ptw.c | ||
270 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address, | ||
271 | return ret; | ||
272 | } | ||
273 | 23 | ||
274 | +/* | 24 | +/* |
275 | + * Translate from the 4-bit stage 2 representation of | 25 | + * These functions implement |
276 | + * memory attributes (without cache-allocation hints) to | 26 | + * d = floatN_is_any_nan(s) ? s : floatN_abs(s) |
277 | + * the 8-bit representation of the stage 1 MAIR registers | 27 | + * which for float32 is |
278 | + * (which includes allocation hints). | 28 | + * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31)) |
279 | + * | 29 | + * and similarly for the other float sizes. |
280 | + * ref: shared/translation/attrs/S2AttrDecode() | ||
281 | + * .../S2ConvertAttrsHints() | ||
282 | + */ | 30 | + */ |
283 | +static uint8_t convert_stage2_attrs(CPUARMState *env, uint8_t s2attrs) | 31 | +static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s) |
284 | +{ | 32 | +{ |
285 | + uint8_t hiattr = extract32(s2attrs, 2, 2); | 33 | + TCGv_i32 abs_s = tcg_temp_new_i32(); |
286 | + uint8_t loattr = extract32(s2attrs, 0, 2); | ||
287 | + uint8_t hihint = 0, lohint = 0; | ||
288 | + | 34 | + |
289 | + if (hiattr != 0) { /* normal memory */ | 35 | + gen_vfp_absh(abs_s, s); |
290 | + if (arm_hcr_el2_eff(env) & HCR_CD) { /* cache disabled */ | 36 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, |
291 | + hiattr = loattr = 1; /* non-cacheable */ | 37 | + abs_s, tcg_constant_i32(0x7c00), |
292 | + } else { | 38 | + s, abs_s); |
293 | + if (hiattr != 1) { /* Write-through or write-back */ | ||
294 | + hihint = 3; /* RW allocate */ | ||
295 | + } | ||
296 | + if (loattr != 1) { /* Write-through or write-back */ | ||
297 | + lohint = 3; /* RW allocate */ | ||
298 | + } | ||
299 | + } | ||
300 | + } | ||
301 | + | ||
302 | + return (hiattr << 6) | (hihint << 4) | (loattr << 2) | lohint; | ||
303 | +} | 39 | +} |
304 | + | 40 | + |
305 | +/* | 41 | +static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s) |
306 | + * Combine either inner or outer cacheability attributes for normal | ||
307 | + * memory, according to table D4-42 and pseudocode procedure | ||
308 | + * CombineS1S2AttrHints() of ARM DDI 0487B.b (the ARMv8 ARM). | ||
309 | + * | ||
310 | + * NB: only stage 1 includes allocation hints (RW bits), leading to | ||
311 | + * some asymmetry. | ||
312 | + */ | ||
313 | +static uint8_t combine_cacheattr_nibble(uint8_t s1, uint8_t s2) | ||
314 | +{ | 42 | +{ |
315 | + if (s1 == 4 || s2 == 4) { | 43 | + TCGv_i32 abs_s = tcg_temp_new_i32(); |
316 | + /* non-cacheable has precedence */ | 44 | + |
317 | + return 4; | 45 | + gen_vfp_abss(abs_s, s); |
318 | + } else if (extract32(s1, 2, 2) == 0 || extract32(s1, 2, 2) == 2) { | 46 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, |
319 | + /* stage 1 write-through takes precedence */ | 47 | + abs_s, tcg_constant_i32(0x7f800000UL), |
320 | + return s1; | 48 | + s, abs_s); |
321 | + } else if (extract32(s2, 2, 2) == 2) { | ||
322 | + /* stage 2 write-through takes precedence, but the allocation hint | ||
323 | + * is still taken from stage 1 | ||
324 | + */ | ||
325 | + return (2 << 2) | extract32(s1, 0, 2); | ||
326 | + } else { /* write-back */ | ||
327 | + return s1; | ||
328 | + } | ||
329 | +} | 49 | +} |
330 | + | 50 | + |
331 | +/* | 51 | +static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s) |
332 | + * Combine the memory type and cacheability attributes of | ||
333 | + * s1 and s2 for the HCR_EL2.FWB == 0 case, returning the | ||
334 | + * combined attributes in MAIR_EL1 format. | ||
335 | + */ | ||
336 | +static uint8_t combined_attrs_nofwb(CPUARMState *env, | ||
337 | + ARMCacheAttrs s1, ARMCacheAttrs s2) | ||
338 | +{ | 52 | +{ |
339 | + uint8_t s1lo, s2lo, s1hi, s2hi, s2_mair_attrs, ret_attrs; | 53 | + TCGv_i64 abs_s = tcg_temp_new_i64(); |
340 | + | 54 | + |
341 | + s2_mair_attrs = convert_stage2_attrs(env, s2.attrs); | 55 | + gen_vfp_absd(abs_s, s); |
342 | + | 56 | + tcg_gen_movcond_i64(TCG_COND_GTU, d, |
343 | + s1lo = extract32(s1.attrs, 0, 4); | 57 | + abs_s, tcg_constant_i64(0x7ff0000000000000ULL), |
344 | + s2lo = extract32(s2_mair_attrs, 0, 4); | 58 | + s, abs_s); |
345 | + s1hi = extract32(s1.attrs, 4, 4); | ||
346 | + s2hi = extract32(s2_mair_attrs, 4, 4); | ||
347 | + | ||
348 | + /* Combine memory type and cacheability attributes */ | ||
349 | + if (s1hi == 0 || s2hi == 0) { | ||
350 | + /* Device has precedence over normal */ | ||
351 | + if (s1lo == 0 || s2lo == 0) { | ||
352 | + /* nGnRnE has precedence over anything */ | ||
353 | + ret_attrs = 0; | ||
354 | + } else if (s1lo == 4 || s2lo == 4) { | ||
355 | + /* non-Reordering has precedence over Reordering */ | ||
356 | + ret_attrs = 4; /* nGnRE */ | ||
357 | + } else if (s1lo == 8 || s2lo == 8) { | ||
358 | + /* non-Gathering has precedence over Gathering */ | ||
359 | + ret_attrs = 8; /* nGRE */ | ||
360 | + } else { | ||
361 | + ret_attrs = 0xc; /* GRE */ | ||
362 | + } | ||
363 | + } else { /* Normal memory */ | ||
364 | + /* Outer/inner cacheability combine independently */ | ||
365 | + ret_attrs = combine_cacheattr_nibble(s1hi, s2hi) << 4 | ||
366 | + | combine_cacheattr_nibble(s1lo, s2lo); | ||
367 | + } | ||
368 | + return ret_attrs; | ||
369 | +} | 59 | +} |
370 | + | 60 | + |
371 | +static uint8_t force_cacheattr_nibble_wb(uint8_t attr) | 61 | static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) |
62 | { | ||
63 | if (dc->fpcr_ah) { | ||
64 | @@ -XXX,XX +XXX,XX @@ static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) | ||
65 | gen_vfp_absd(d, d); | ||
66 | } | ||
67 | |||
68 | +static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
372 | +{ | 69 | +{ |
373 | + /* | 70 | + gen_helper_vfp_subh(d, n, m, s); |
374 | + * Given the 4 bits specifying the outer or inner cacheability | 71 | + gen_vfp_ah_absh(d, d); |
375 | + * in MAIR format, return a value specifying Normal Write-Back, | ||
376 | + * with the allocation and transient hints taken from the input | ||
377 | + * if the input specified some kind of cacheable attribute. | ||
378 | + */ | ||
379 | + if (attr == 0 || attr == 4) { | ||
380 | + /* | ||
381 | + * 0 == an UNPREDICTABLE encoding | ||
382 | + * 4 == Non-cacheable | ||
383 | + * Either way, force Write-Back RW allocate non-transient | ||
384 | + */ | ||
385 | + return 0xf; | ||
386 | + } | ||
387 | + /* Change WriteThrough to WriteBack, keep allocation and transient hints */ | ||
388 | + return attr | 4; | ||
389 | +} | 72 | +} |
390 | + | 73 | + |
391 | +/* | 74 | +static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) |
392 | + * Combine the memory type and cacheability attributes of | ||
393 | + * s1 and s2 for the HCR_EL2.FWB == 1 case, returning the | ||
394 | + * combined attributes in MAIR_EL1 format. | ||
395 | + */ | ||
396 | +static uint8_t combined_attrs_fwb(CPUARMState *env, | ||
397 | + ARMCacheAttrs s1, ARMCacheAttrs s2) | ||
398 | +{ | 75 | +{ |
399 | + switch (s2.attrs) { | 76 | + gen_helper_vfp_subs(d, n, m, s); |
400 | + case 7: | 77 | + gen_vfp_ah_abss(d, d); |
401 | + /* Use stage 1 attributes */ | ||
402 | + return s1.attrs; | ||
403 | + case 6: | ||
404 | + /* | ||
405 | + * Force Normal Write-Back. Note that if S1 is Normal cacheable | ||
406 | + * then we take the allocation hints from it; otherwise it is | ||
407 | + * RW allocate, non-transient. | ||
408 | + */ | ||
409 | + if ((s1.attrs & 0xf0) == 0) { | ||
410 | + /* S1 is Device */ | ||
411 | + return 0xff; | ||
412 | + } | ||
413 | + /* Need to check the Inner and Outer nibbles separately */ | ||
414 | + return force_cacheattr_nibble_wb(s1.attrs & 0xf) | | ||
415 | + force_cacheattr_nibble_wb(s1.attrs >> 4) << 4; | ||
416 | + case 5: | ||
417 | + /* If S1 attrs are Device, use them; otherwise Normal Non-cacheable */ | ||
418 | + if ((s1.attrs & 0xf0) == 0) { | ||
419 | + return s1.attrs; | ||
420 | + } | ||
421 | + return 0x44; | ||
422 | + case 0 ... 3: | ||
423 | + /* Force Device, of subtype specified by S2 */ | ||
424 | + return s2.attrs << 2; | ||
425 | + default: | ||
426 | + /* | ||
427 | + * RESERVED values (including RES0 descriptor bit [5] being nonzero); | ||
428 | + * arbitrarily force Device. | ||
429 | + */ | ||
430 | + return 0; | ||
431 | + } | ||
432 | +} | 78 | +} |
433 | + | 79 | + |
434 | +/* | 80 | +static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) |
435 | + * Combine S1 and S2 cacheability/shareability attributes, per D4.5.4 | ||
436 | + * and CombineS1S2Desc() | ||
437 | + * | ||
438 | + * @env: CPUARMState | ||
439 | + * @s1: Attributes from stage 1 walk | ||
440 | + * @s2: Attributes from stage 2 walk | ||
441 | + */ | ||
442 | +static ARMCacheAttrs combine_cacheattrs(CPUARMState *env, | ||
443 | + ARMCacheAttrs s1, ARMCacheAttrs s2) | ||
444 | +{ | 81 | +{ |
445 | + ARMCacheAttrs ret; | 82 | + gen_helper_vfp_subd(d, n, m, s); |
446 | + bool tagged = false; | 83 | + gen_vfp_ah_absd(d, d); |
447 | + | ||
448 | + assert(s2.is_s2_format && !s1.is_s2_format); | ||
449 | + ret.is_s2_format = false; | ||
450 | + | ||
451 | + if (s1.attrs == 0xf0) { | ||
452 | + tagged = true; | ||
453 | + s1.attrs = 0xff; | ||
454 | + } | ||
455 | + | ||
456 | + /* Combine shareability attributes (table D4-43) */ | ||
457 | + if (s1.shareability == 2 || s2.shareability == 2) { | ||
458 | + /* if either are outer-shareable, the result is outer-shareable */ | ||
459 | + ret.shareability = 2; | ||
460 | + } else if (s1.shareability == 3 || s2.shareability == 3) { | ||
461 | + /* if either are inner-shareable, the result is inner-shareable */ | ||
462 | + ret.shareability = 3; | ||
463 | + } else { | ||
464 | + /* both non-shareable */ | ||
465 | + ret.shareability = 0; | ||
466 | + } | ||
467 | + | ||
468 | + /* Combine memory type and cacheability attributes */ | ||
469 | + if (arm_hcr_el2_eff(env) & HCR_FWB) { | ||
470 | + ret.attrs = combined_attrs_fwb(env, s1, s2); | ||
471 | + } else { | ||
472 | + ret.attrs = combined_attrs_nofwb(env, s1, s2); | ||
473 | + } | ||
474 | + | ||
475 | + /* | ||
476 | + * Any location for which the resultant memory type is any | ||
477 | + * type of Device memory is always treated as Outer Shareable. | ||
478 | + * Any location for which the resultant memory type is Normal | ||
479 | + * Inner Non-cacheable, Outer Non-cacheable is always treated | ||
480 | + * as Outer Shareable. | ||
481 | + * TODO: FEAT_XS adds another value (0x40) also meaning iNCoNC | ||
482 | + */ | ||
483 | + if ((ret.attrs & 0xf0) == 0 || ret.attrs == 0x44) { | ||
484 | + ret.shareability = 2; | ||
485 | + } | ||
486 | + | ||
487 | + /* TODO: CombineS1S2Desc does not consider transient, only WB, RWA. */ | ||
488 | + if (tagged && ret.attrs == 0xff) { | ||
489 | + ret.attrs = 0xf0; | ||
490 | + } | ||
491 | + | ||
492 | + return ret; | ||
493 | +} | 84 | +} |
494 | + | 85 | + |
495 | /** | 86 | static const FPScalar f_scalar_fabd = { |
496 | * get_phys_addr - get the physical address for this virtual address | 87 | gen_fabd_h, |
497 | * | 88 | gen_fabd_s, |
89 | gen_fabd_d, | ||
90 | }; | ||
91 | -TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd, a->rn) | ||
92 | +static const FPScalar f_scalar_ah_fabd = { | ||
93 | + gen_fabd_ah_h, | ||
94 | + gen_fabd_ah_s, | ||
95 | + gen_fabd_ah_d, | ||
96 | +}; | ||
97 | +TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn) | ||
98 | |||
99 | static const FPScalar f_scalar_frecps = { | ||
100 | gen_helper_recpsf_f16, | ||
101 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fabs = { | ||
102 | gen_vfp_abss, | ||
103 | gen_vfp_absd, | ||
104 | }; | ||
105 | -TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs, true) | ||
106 | +static const FPScalar1Int f_scalar_ah_fabs = { | ||
107 | + gen_vfp_ah_absh, | ||
108 | + gen_vfp_ah_abss, | ||
109 | + gen_vfp_ah_absd, | ||
110 | +}; | ||
111 | +TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs) | ||
112 | |||
113 | static const FPScalar1Int f_scalar_fneg = { | ||
114 | gen_vfp_negh, | ||
498 | -- | 115 | -- |
499 | 2.25.1 | 116 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Split the handling of vector FABD so that it calls a different set |
---|---|---|---|
2 | of helpers when FPCR.AH is 1, which implement the "no negation of | ||
3 | the sign of a NaN" semantics. | ||
2 | 4 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | Message-id: 20220604040607.269301-22-richard.henderson@linaro.org | ||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | 7 | --- |
8 | target/arm/ptw.h | 3 --- | 8 | target/arm/helper.h | 4 ++++ |
9 | target/arm/helper.c | 64 --------------------------------------------- | 9 | target/arm/tcg/translate-a64.c | 7 ++++++- |
10 | target/arm/ptw.c | 64 +++++++++++++++++++++++++++++++++++++++++++++ | 10 | target/arm/tcg/vec_helper.c | 23 +++++++++++++++++++++++ |
11 | 3 files changed, 64 insertions(+), 67 deletions(-) | 11 | 3 files changed, 33 insertions(+), 1 deletion(-) |
12 | 12 | ||
13 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | 13 | diff --git a/target/arm/helper.h b/target/arm/helper.h |
14 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/arm/ptw.h | 15 | --- a/target/arm/helper.h |
16 | +++ b/target/arm/ptw.h | 16 | +++ b/target/arm/helper.h |
17 | @@ -XXX,XX +XXX,XX @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) | 17 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) |
18 | return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx)); | 18 | DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) |
19 | DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
20 | |||
21 | +DEF_HELPER_FLAGS_5(gvec_ah_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_5(gvec_ah_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
23 | +DEF_HELPER_FLAGS_5(gvec_ah_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
24 | + | ||
25 | DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
26 | DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
27 | DEF_HELPER_FLAGS_5(gvec_fceq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
28 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/arm/tcg/translate-a64.c | ||
31 | +++ b/target/arm/tcg/translate-a64.c | ||
32 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { | ||
33 | gen_helper_gvec_fabd_s, | ||
34 | gen_helper_gvec_fabd_d, | ||
35 | }; | ||
36 | -TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd) | ||
37 | +static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = { | ||
38 | + gen_helper_gvec_ah_fabd_h, | ||
39 | + gen_helper_gvec_ah_fabd_s, | ||
40 | + gen_helper_gvec_ah_fabd_d, | ||
41 | +}; | ||
42 | +TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd) | ||
43 | |||
44 | static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { | ||
45 | gen_helper_gvec_recps_h, | ||
46 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/arm/tcg/vec_helper.c | ||
49 | +++ b/target/arm/tcg/vec_helper.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static float64 float64_abd(float64 op1, float64 op2, float_status *stat) | ||
51 | return float64_abs(float64_sub(op1, op2, stat)); | ||
19 | } | 52 | } |
20 | 53 | ||
21 | -ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va, | 54 | +/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */ |
22 | - ARMMMUIdx mmu_idx); | 55 | +static float16 float16_ah_abd(float16 op1, float16 op2, float_status *stat) |
23 | - | ||
24 | #endif /* !CONFIG_USER_ONLY */ | ||
25 | #endif /* TARGET_ARM_PTW_H */ | ||
26 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/target/arm/helper.c | ||
29 | +++ b/target/arm/helper.c | ||
30 | @@ -XXX,XX +XXX,XX @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, | ||
31 | } | ||
32 | |||
33 | #ifndef CONFIG_USER_ONLY | ||
34 | -ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va, | ||
35 | - ARMMMUIdx mmu_idx) | ||
36 | -{ | ||
37 | - uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr; | ||
38 | - uint32_t el = regime_el(env, mmu_idx); | ||
39 | - int select, tsz; | ||
40 | - bool epd, hpd; | ||
41 | - | ||
42 | - assert(mmu_idx != ARMMMUIdx_Stage2_S); | ||
43 | - | ||
44 | - if (mmu_idx == ARMMMUIdx_Stage2) { | ||
45 | - /* VTCR */ | ||
46 | - bool sext = extract32(tcr, 4, 1); | ||
47 | - bool sign = extract32(tcr, 3, 1); | ||
48 | - | ||
49 | - /* | ||
50 | - * If the sign-extend bit is not the same as t0sz[3], the result | ||
51 | - * is unpredictable. Flag this as a guest error. | ||
52 | - */ | ||
53 | - if (sign != sext) { | ||
54 | - qemu_log_mask(LOG_GUEST_ERROR, | ||
55 | - "AArch32: VTCR.S / VTCR.T0SZ[3] mismatch\n"); | ||
56 | - } | ||
57 | - tsz = sextract32(tcr, 0, 4) + 8; | ||
58 | - select = 0; | ||
59 | - hpd = false; | ||
60 | - epd = false; | ||
61 | - } else if (el == 2) { | ||
62 | - /* HTCR */ | ||
63 | - tsz = extract32(tcr, 0, 3); | ||
64 | - select = 0; | ||
65 | - hpd = extract64(tcr, 24, 1); | ||
66 | - epd = false; | ||
67 | - } else { | ||
68 | - int t0sz = extract32(tcr, 0, 3); | ||
69 | - int t1sz = extract32(tcr, 16, 3); | ||
70 | - | ||
71 | - if (t1sz == 0) { | ||
72 | - select = va > (0xffffffffu >> t0sz); | ||
73 | - } else { | ||
74 | - /* Note that we will detect errors later. */ | ||
75 | - select = va >= ~(0xffffffffu >> t1sz); | ||
76 | - } | ||
77 | - if (!select) { | ||
78 | - tsz = t0sz; | ||
79 | - epd = extract32(tcr, 7, 1); | ||
80 | - hpd = extract64(tcr, 41, 1); | ||
81 | - } else { | ||
82 | - tsz = t1sz; | ||
83 | - epd = extract32(tcr, 23, 1); | ||
84 | - hpd = extract64(tcr, 42, 1); | ||
85 | - } | ||
86 | - /* For aarch32, hpd0 is not enabled without t2e as well. */ | ||
87 | - hpd &= extract32(tcr, 6, 1); | ||
88 | - } | ||
89 | - | ||
90 | - return (ARMVAParameters) { | ||
91 | - .tsz = tsz, | ||
92 | - .select = select, | ||
93 | - .epd = epd, | ||
94 | - .hpd = hpd, | ||
95 | - }; | ||
96 | -} | ||
97 | - | ||
98 | hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, | ||
99 | MemTxAttrs *attrs) | ||
100 | { | ||
101 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
102 | index XXXXXXX..XXXXXXX 100644 | ||
103 | --- a/target/arm/ptw.c | ||
104 | +++ b/target/arm/ptw.c | ||
105 | @@ -XXX,XX +XXX,XX @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, | ||
106 | return prot_rw | PAGE_EXEC; | ||
107 | } | ||
108 | |||
109 | +static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va, | ||
110 | + ARMMMUIdx mmu_idx) | ||
111 | +{ | 56 | +{ |
112 | + uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr; | 57 | + float16 r = float16_sub(op1, op2, stat); |
113 | + uint32_t el = regime_el(env, mmu_idx); | 58 | + return float16_is_any_nan(r) ? r : float16_abs(r); |
114 | + int select, tsz; | 59 | +} |
115 | + bool epd, hpd; | ||
116 | + | 60 | + |
117 | + assert(mmu_idx != ARMMMUIdx_Stage2_S); | 61 | +static float32 float32_ah_abd(float32 op1, float32 op2, float_status *stat) |
62 | +{ | ||
63 | + float32 r = float32_sub(op1, op2, stat); | ||
64 | + return float32_is_any_nan(r) ? r : float32_abs(r); | ||
65 | +} | ||
118 | + | 66 | + |
119 | + if (mmu_idx == ARMMMUIdx_Stage2) { | 67 | +static float64 float64_ah_abd(float64 op1, float64 op2, float_status *stat) |
120 | + /* VTCR */ | 68 | +{ |
121 | + bool sext = extract32(tcr, 4, 1); | 69 | + float64 r = float64_sub(op1, op2, stat); |
122 | + bool sign = extract32(tcr, 3, 1); | 70 | + return float64_is_any_nan(r) ? r : float64_abs(r); |
123 | + | ||
124 | + /* | ||
125 | + * If the sign-extend bit is not the same as t0sz[3], the result | ||
126 | + * is unpredictable. Flag this as a guest error. | ||
127 | + */ | ||
128 | + if (sign != sext) { | ||
129 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
130 | + "AArch32: VTCR.S / VTCR.T0SZ[3] mismatch\n"); | ||
131 | + } | ||
132 | + tsz = sextract32(tcr, 0, 4) + 8; | ||
133 | + select = 0; | ||
134 | + hpd = false; | ||
135 | + epd = false; | ||
136 | + } else if (el == 2) { | ||
137 | + /* HTCR */ | ||
138 | + tsz = extract32(tcr, 0, 3); | ||
139 | + select = 0; | ||
140 | + hpd = extract64(tcr, 24, 1); | ||
141 | + epd = false; | ||
142 | + } else { | ||
143 | + int t0sz = extract32(tcr, 0, 3); | ||
144 | + int t1sz = extract32(tcr, 16, 3); | ||
145 | + | ||
146 | + if (t1sz == 0) { | ||
147 | + select = va > (0xffffffffu >> t0sz); | ||
148 | + } else { | ||
149 | + /* Note that we will detect errors later. */ | ||
150 | + select = va >= ~(0xffffffffu >> t1sz); | ||
151 | + } | ||
152 | + if (!select) { | ||
153 | + tsz = t0sz; | ||
154 | + epd = extract32(tcr, 7, 1); | ||
155 | + hpd = extract64(tcr, 41, 1); | ||
156 | + } else { | ||
157 | + tsz = t1sz; | ||
158 | + epd = extract32(tcr, 23, 1); | ||
159 | + hpd = extract64(tcr, 42, 1); | ||
160 | + } | ||
161 | + /* For aarch32, hpd0 is not enabled without t2e as well. */ | ||
162 | + hpd &= extract32(tcr, 6, 1); | ||
163 | + } | ||
164 | + | ||
165 | + return (ARMVAParameters) { | ||
166 | + .tsz = tsz, | ||
167 | + .select = select, | ||
168 | + .epd = epd, | ||
169 | + .hpd = hpd, | ||
170 | + }; | ||
171 | +} | 71 | +} |
172 | + | 72 | + |
173 | /* | 73 | /* |
174 | * check_s2_mmu_setup | 74 | * Reciprocal step. These are the AArch32 version which uses a |
175 | * @cpu: ARMCPU | 75 | * non-fused multiply-and-subtract. |
76 | @@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_fabd_h, float16_abd, float16) | ||
77 | DO_3OP(gvec_fabd_s, float32_abd, float32) | ||
78 | DO_3OP(gvec_fabd_d, float64_abd, float64) | ||
79 | |||
80 | +DO_3OP(gvec_ah_fabd_h, float16_ah_abd, float16) | ||
81 | +DO_3OP(gvec_ah_fabd_s, float32_ah_abd, float32) | ||
82 | +DO_3OP(gvec_ah_fabd_d, float64_ah_abd, float64) | ||
83 | + | ||
84 | DO_3OP(gvec_fceq_h, float16_ceq, float16) | ||
85 | DO_3OP(gvec_fceq_s, float32_ceq, float32) | ||
86 | DO_3OP(gvec_fceq_d, float64_ceq, float64) | ||
176 | -- | 87 | -- |
177 | 2.25.1 | 88 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Make SVE FNEG honour the FPCR.AH "don't negate the sign of a NaN" | ||
2 | semantics. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 4 ++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 7 ++++++- | ||
10 | 3 files changed, 18 insertions(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
17 | DEF_HELPER_FLAGS_4(sve_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
18 | DEF_HELPER_FLAGS_4(sve_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_4(sve_ah_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
21 | +DEF_HELPER_FLAGS_4(sve_ah_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
22 | +DEF_HELPER_FLAGS_4(sve_ah_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
23 | + | ||
24 | DEF_HELPER_FLAGS_4(sve_not_zpz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
25 | DEF_HELPER_FLAGS_4(sve_not_zpz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
26 | DEF_HELPER_FLAGS_4(sve_not_zpz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
27 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/arm/tcg/sve_helper.c | ||
30 | +++ b/target/arm/tcg/sve_helper.c | ||
31 | @@ -XXX,XX +XXX,XX @@ DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG) | ||
32 | DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG) | ||
33 | DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG) | ||
34 | |||
35 | +#define DO_AH_FNEG_H(N) (float16_is_any_nan(N) ? (N) : DO_FNEG(N)) | ||
36 | +#define DO_AH_FNEG_S(N) (float32_is_any_nan(N) ? (N) : DO_FNEG(N)) | ||
37 | +#define DO_AH_FNEG_D(N) (float64_is_any_nan(N) ? (N) : DO_FNEG(N)) | ||
38 | + | ||
39 | +DO_ZPZ(sve_ah_fneg_h, uint16_t, H1_2, DO_AH_FNEG_H) | ||
40 | +DO_ZPZ(sve_ah_fneg_s, uint32_t, H1_4, DO_AH_FNEG_S) | ||
41 | +DO_ZPZ_D(sve_ah_fneg_d, uint64_t, DO_AH_FNEG_D) | ||
42 | + | ||
43 | #define DO_NOT(N) (~N) | ||
44 | |||
45 | DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT) | ||
46 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/arm/tcg/translate-sve.c | ||
49 | +++ b/target/arm/tcg/translate-sve.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const fneg_fns[4] = { | ||
51 | NULL, gen_helper_sve_fneg_h, | ||
52 | gen_helper_sve_fneg_s, gen_helper_sve_fneg_d, | ||
53 | }; | ||
54 | -TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0) | ||
55 | +static gen_helper_gvec_3 * const fneg_ah_fns[4] = { | ||
56 | + NULL, gen_helper_sve_ah_fneg_h, | ||
57 | + gen_helper_sve_ah_fneg_s, gen_helper_sve_ah_fneg_d, | ||
58 | +}; | ||
59 | +TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, | ||
60 | + s->fpcr_ah ? fneg_ah_fns[a->esz] : fneg_fns[a->esz], a, 0) | ||
61 | |||
62 | static gen_helper_gvec_3 * const sxtb_fns[4] = { | ||
63 | NULL, gen_helper_sve_sxtb_h, | ||
64 | -- | ||
65 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Make SVE FABS honour the FPCR.AH "don't negate the sign of a NaN" | ||
2 | semantics. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 4 ++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 7 ++++++- | ||
10 | 3 files changed, 18 insertions(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
17 | DEF_HELPER_FLAGS_4(sve_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
18 | DEF_HELPER_FLAGS_4(sve_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_4(sve_ah_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
21 | +DEF_HELPER_FLAGS_4(sve_ah_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
22 | +DEF_HELPER_FLAGS_4(sve_ah_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
23 | + | ||
24 | DEF_HELPER_FLAGS_4(sve_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
25 | DEF_HELPER_FLAGS_4(sve_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
26 | DEF_HELPER_FLAGS_4(sve_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
27 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/arm/tcg/sve_helper.c | ||
30 | +++ b/target/arm/tcg/sve_helper.c | ||
31 | @@ -XXX,XX +XXX,XX @@ DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS) | ||
32 | DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS) | ||
33 | DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS) | ||
34 | |||
35 | +#define DO_AH_FABS_H(N) (float16_is_any_nan(N) ? (N) : DO_FABS(N)) | ||
36 | +#define DO_AH_FABS_S(N) (float32_is_any_nan(N) ? (N) : DO_FABS(N)) | ||
37 | +#define DO_AH_FABS_D(N) (float64_is_any_nan(N) ? (N) : DO_FABS(N)) | ||
38 | + | ||
39 | +DO_ZPZ(sve_ah_fabs_h, uint16_t, H1_2, DO_AH_FABS_H) | ||
40 | +DO_ZPZ(sve_ah_fabs_s, uint32_t, H1_4, DO_AH_FABS_S) | ||
41 | +DO_ZPZ_D(sve_ah_fabs_d, uint64_t, DO_AH_FABS_D) | ||
42 | + | ||
43 | #define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1)) | ||
44 | |||
45 | DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG) | ||
46 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/arm/tcg/translate-sve.c | ||
49 | +++ b/target/arm/tcg/translate-sve.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const fabs_fns[4] = { | ||
51 | NULL, gen_helper_sve_fabs_h, | ||
52 | gen_helper_sve_fabs_s, gen_helper_sve_fabs_d, | ||
53 | }; | ||
54 | -TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0) | ||
55 | +static gen_helper_gvec_3 * const fabs_ah_fns[4] = { | ||
56 | + NULL, gen_helper_sve_ah_fabs_h, | ||
57 | + gen_helper_sve_ah_fabs_s, gen_helper_sve_ah_fabs_d, | ||
58 | +}; | ||
59 | +TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, | ||
60 | + s->fpcr_ah ? fabs_ah_fns[a->esz] : fabs_fns[a->esz], a, 0) | ||
61 | |||
62 | static gen_helper_gvec_3 * const fneg_fns[4] = { | ||
63 | NULL, gen_helper_sve_fneg_h, | ||
64 | -- | ||
65 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Make the SVE FABD insn honour the FPCR.AH "don't negate the sign |
---|---|---|---|
2 | of a NaN" semantics. | ||
2 | 3 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | Message-id: 20220604040607.269301-28-richard.henderson@linaro.org | ||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | 6 | --- |
8 | target/arm/helper.c | 32 -------------------------------- | 7 | target/arm/tcg/helper-sve.h | 7 +++++++ |
9 | target/arm/ptw.c | 28 ++++++++++++++++++++++++++++ | 8 | target/arm/tcg/sve_helper.c | 22 ++++++++++++++++++++++ |
10 | 2 files changed, 28 insertions(+), 32 deletions(-) | 9 | target/arm/tcg/translate-sve.c | 2 +- |
10 | 3 files changed, 30 insertions(+), 1 deletion(-) | ||
11 | 11 | ||
12 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h |
13 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/target/arm/helper.c | 14 | --- a/target/arm/tcg/helper-sve.h |
15 | +++ b/target/arm/helper.c | 15 | +++ b/target/arm/tcg/helper-sve.h |
16 | @@ -XXX,XX +XXX,XX @@ uint64_t arm_sctlr(CPUARMState *env, int el) | 16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fabd_s, TCG_CALL_NO_RWG, |
17 | return env->cp15.sctlr_el[el]; | 17 | DEF_HELPER_FLAGS_6(sve_fabd_d, TCG_CALL_NO_RWG, |
18 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_6(sve_ah_fabd_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_6(sve_ah_fabd_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_6(sve_ah_fabd_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | DEF_HELPER_FLAGS_6(sve_fscalbn_h, TCG_CALL_NO_RWG, | ||
28 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
29 | DEF_HELPER_FLAGS_6(sve_fscalbn_s, TCG_CALL_NO_RWG, | ||
30 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/target/arm/tcg/sve_helper.c | ||
33 | +++ b/target/arm/tcg/sve_helper.c | ||
34 | @@ -XXX,XX +XXX,XX @@ static inline float64 abd_d(float64 a, float64 b, float_status *s) | ||
35 | return float64_abs(float64_sub(a, b, s)); | ||
18 | } | 36 | } |
19 | 37 | ||
20 | -#ifndef CONFIG_USER_ONLY | 38 | +/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */ |
21 | -/* Convert a possible stage1+2 MMU index into the appropriate | 39 | +static float16 ah_abd_h(float16 op1, float16 op2, float_status *stat) |
22 | - * stage 1 MMU index | ||
23 | - */ | ||
24 | -ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx) | ||
25 | -{ | ||
26 | - switch (mmu_idx) { | ||
27 | - case ARMMMUIdx_SE10_0: | ||
28 | - return ARMMMUIdx_Stage1_SE0; | ||
29 | - case ARMMMUIdx_SE10_1: | ||
30 | - return ARMMMUIdx_Stage1_SE1; | ||
31 | - case ARMMMUIdx_SE10_1_PAN: | ||
32 | - return ARMMMUIdx_Stage1_SE1_PAN; | ||
33 | - case ARMMMUIdx_E10_0: | ||
34 | - return ARMMMUIdx_Stage1_E0; | ||
35 | - case ARMMMUIdx_E10_1: | ||
36 | - return ARMMMUIdx_Stage1_E1; | ||
37 | - case ARMMMUIdx_E10_1_PAN: | ||
38 | - return ARMMMUIdx_Stage1_E1_PAN; | ||
39 | - default: | ||
40 | - return mmu_idx; | ||
41 | - } | ||
42 | -} | ||
43 | -#endif /* !CONFIG_USER_ONLY */ | ||
44 | - | ||
45 | int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx) | ||
46 | { | ||
47 | if (regime_has_2_ranges(mmu_idx)) { | ||
48 | @@ -XXX,XX +XXX,XX @@ ARMMMUIdx arm_mmu_idx(CPUARMState *env) | ||
49 | return arm_mmu_idx_el(env, arm_current_el(env)); | ||
50 | } | ||
51 | |||
52 | -#ifndef CONFIG_USER_ONLY | ||
53 | -ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env) | ||
54 | -{ | ||
55 | - return stage_1_mmu_idx(arm_mmu_idx(env)); | ||
56 | -} | ||
57 | -#endif | ||
58 | - | ||
59 | static CPUARMTBFlags rebuild_hflags_common(CPUARMState *env, int fp_el, | ||
60 | ARMMMUIdx mmu_idx, | ||
61 | CPUARMTBFlags flags) | ||
62 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/arm/ptw.c | ||
65 | +++ b/target/arm/ptw.c | ||
66 | @@ -XXX,XX +XXX,XX @@ unsigned int arm_pamax(ARMCPU *cpu) | ||
67 | return pamax_map[parange]; | ||
68 | } | ||
69 | |||
70 | +/* | ||
71 | + * Convert a possible stage1+2 MMU index into the appropriate stage 1 MMU index | ||
72 | + */ | ||
73 | +ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx) | ||
74 | +{ | 40 | +{ |
75 | + switch (mmu_idx) { | 41 | + float16 r = float16_sub(op1, op2, stat); |
76 | + case ARMMMUIdx_SE10_0: | 42 | + return float16_is_any_nan(r) ? r : float16_abs(r); |
77 | + return ARMMMUIdx_Stage1_SE0; | ||
78 | + case ARMMMUIdx_SE10_1: | ||
79 | + return ARMMMUIdx_Stage1_SE1; | ||
80 | + case ARMMMUIdx_SE10_1_PAN: | ||
81 | + return ARMMMUIdx_Stage1_SE1_PAN; | ||
82 | + case ARMMMUIdx_E10_0: | ||
83 | + return ARMMMUIdx_Stage1_E0; | ||
84 | + case ARMMMUIdx_E10_1: | ||
85 | + return ARMMMUIdx_Stage1_E1; | ||
86 | + case ARMMMUIdx_E10_1_PAN: | ||
87 | + return ARMMMUIdx_Stage1_E1_PAN; | ||
88 | + default: | ||
89 | + return mmu_idx; | ||
90 | + } | ||
91 | +} | 43 | +} |
92 | + | 44 | + |
93 | +ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env) | 45 | +static float32 ah_abd_s(float32 op1, float32 op2, float_status *stat) |
94 | +{ | 46 | +{ |
95 | + return stage_1_mmu_idx(arm_mmu_idx(env)); | 47 | + float32 r = float32_sub(op1, op2, stat); |
48 | + return float32_is_any_nan(r) ? r : float32_abs(r); | ||
96 | +} | 49 | +} |
97 | + | 50 | + |
98 | static bool regime_translation_big_endian(CPUARMState *env, ARMMMUIdx mmu_idx) | 51 | +static float64 ah_abd_d(float64 op1, float64 op2, float_status *stat) |
52 | +{ | ||
53 | + float64 r = float64_sub(op1, op2, stat); | ||
54 | + return float64_is_any_nan(r) ? r : float64_abs(r); | ||
55 | +} | ||
56 | + | ||
57 | DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h) | ||
58 | DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s) | ||
59 | DO_ZPZZ_FP(sve_fabd_d, uint64_t, H1_8, abd_d) | ||
60 | +DO_ZPZZ_FP(sve_ah_fabd_h, uint16_t, H1_2, ah_abd_h) | ||
61 | +DO_ZPZZ_FP(sve_ah_fabd_s, uint32_t, H1_4, ah_abd_s) | ||
62 | +DO_ZPZZ_FP(sve_ah_fabd_d, uint64_t, H1_8, ah_abd_d) | ||
63 | |||
64 | static inline float64 scalbn_d(float64 a, int64_t b, float_status *s) | ||
99 | { | 65 | { |
100 | return (regime_sctlr(env, mmu_idx) & SCTLR_EE) != 0; | 66 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c |
67 | index XXXXXXX..XXXXXXX 100644 | ||
68 | --- a/target/arm/tcg/translate-sve.c | ||
69 | +++ b/target/arm/tcg/translate-sve.c | ||
70 | @@ -XXX,XX +XXX,XX @@ DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) | ||
71 | DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) | ||
72 | DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) | ||
73 | DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) | ||
74 | -DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) | ||
75 | +DO_ZPZZ_AH_FP(FABD, aa64_sve, sve_fabd, sve_ah_fabd) | ||
76 | DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) | ||
77 | DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) | ||
78 | DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx) | ||
101 | -- | 79 | -- |
102 | 2.25.1 | 80 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | The negation steps in FCADD must honour FPCR.AH's "don't change the |
---|---|---|---|
2 | sign of a NaN" semantics. Implement this in the same way we did for | ||
3 | the base ASIMD FCADD, by encoding FPCR.AH into the SIMD data field | ||
4 | passed to the helper and using that to decide whether to negate the | ||
5 | values. | ||
2 | 6 | ||
3 | We will need this over in sme_helper.c. | 7 | The construction of neg_imag and neg_real were done to make it easy |
8 | to apply both in parallel with two simple logical operations. This | ||
9 | changed with FPCR.AH, which is more complex than that. Switch to | ||
10 | an approach that follows the pseudocode more closely, by extracting | ||
11 | the 'rot=1' parameter from the SIMD data field and changing the | ||
12 | sign of the appropriate input value. | ||
4 | 13 | ||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 14 | Note that there was a naming issue with neg_imag and neg_real. |
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 15 | They were named backward, with neg_imag being non-zero for rot=1, |
7 | Message-id: 20220607203306.657998-19-richard.henderson@linaro.org | 16 | and vice versa. This was combined with reversed usage within the |
17 | loop, so that the negation in the end turned out correct. | ||
18 | |||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 19 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
20 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | --- | 21 | --- |
10 | target/arm/vec_internal.h | 13 +++++++++++++ | 22 | target/arm/tcg/vec_internal.h | 17 ++++++++++++++ |
11 | target/arm/vec_helper.c | 2 +- | 23 | target/arm/tcg/sve_helper.c | 42 ++++++++++++++++++++++++---------- |
12 | 2 files changed, 14 insertions(+), 1 deletion(-) | 24 | target/arm/tcg/translate-sve.c | 2 +- |
25 | 3 files changed, 48 insertions(+), 13 deletions(-) | ||
13 | 26 | ||
14 | diff --git a/target/arm/vec_internal.h b/target/arm/vec_internal.h | 27 | diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h |
15 | index XXXXXXX..XXXXXXX 100644 | 28 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/arm/vec_internal.h | 29 | --- a/target/arm/tcg/vec_internal.h |
17 | +++ b/target/arm/vec_internal.h | 30 | +++ b/target/arm/tcg/vec_internal.h |
18 | @@ -XXX,XX +XXX,XX @@ uint64_t pmull_h(uint64_t op1, uint64_t op2); | 31 | @@ -XXX,XX +XXX,XX @@ |
32 | #ifndef TARGET_ARM_VEC_INTERNAL_H | ||
33 | #define TARGET_ARM_VEC_INTERNAL_H | ||
34 | |||
35 | +#include "fpu/softfloat.h" | ||
36 | + | ||
37 | /* | ||
38 | * Note that vector data is stored in host-endian 64-bit chunks, | ||
39 | * so addressing units smaller than that needs a host-endian fixup. | ||
40 | @@ -XXX,XX +XXX,XX @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, | ||
19 | */ | 41 | */ |
20 | uint64_t pmull_w(uint64_t op1, uint64_t op2); | 42 | bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp); |
21 | 43 | ||
22 | +/** | 44 | +static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah) |
23 | + * bfdotadd: | 45 | +{ |
24 | + * @sum: addend | 46 | + return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a); |
25 | + * @e1, @e2: multiplicand vectors | 47 | +} |
26 | + * | 48 | + |
27 | + * BFloat16 2-way dot product of @e1 & @e2, accumulating with @sum. | 49 | +static inline float32 float32_maybe_ah_chs(float32 a, bool fpcr_ah) |
28 | + * The @e1 and @e2 operands correspond to the 32-bit source vector | 50 | +{ |
29 | + * slots and contain two Bfloat16 values each. | 51 | + return fpcr_ah && float32_is_any_nan(a) ? a : float32_chs(a); |
30 | + * | 52 | +} |
31 | + * Corresponds to the ARM pseudocode function BFDotAdd. | 53 | + |
32 | + */ | 54 | +static inline float64 float64_maybe_ah_chs(float64 a, bool fpcr_ah) |
33 | +float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2); | 55 | +{ |
56 | + return fpcr_ah && float64_is_any_nan(a) ? a : float64_chs(a); | ||
57 | +} | ||
34 | + | 58 | + |
35 | #endif /* TARGET_ARM_VEC_INTERNAL_H */ | 59 | #endif /* TARGET_ARM_VEC_INTERNAL_H */ |
36 | diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c | 60 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c |
37 | index XXXXXXX..XXXXXXX 100644 | 61 | index XXXXXXX..XXXXXXX 100644 |
38 | --- a/target/arm/vec_helper.c | 62 | --- a/target/arm/tcg/sve_helper.c |
39 | +++ b/target/arm/vec_helper.c | 63 | +++ b/target/arm/tcg/sve_helper.c |
40 | @@ -XXX,XX +XXX,XX @@ DO_MMLA_B(gvec_usmmla_b, do_usmmla_b) | 64 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, |
41 | * BFloat16 Dot Product | ||
42 | */ | ||
43 | |||
44 | -static float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2) | ||
45 | +float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2) | ||
46 | { | 65 | { |
47 | /* FPCR is ignored for BFDOT and BFMMLA. */ | 66 | intptr_t j, i = simd_oprsz(desc); |
48 | float_status bf_status = { | 67 | uint64_t *g = vg; |
68 | - float16 neg_imag = float16_set_sign(0, simd_data(desc)); | ||
69 | - float16 neg_real = float16_chs(neg_imag); | ||
70 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
71 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
72 | |||
73 | do { | ||
74 | uint64_t pg = g[(i - 1) >> 6]; | ||
75 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, | ||
76 | i -= 2 * sizeof(float16); | ||
77 | |||
78 | e0 = *(float16 *)(vn + H1_2(i)); | ||
79 | - e1 = *(float16 *)(vm + H1_2(j)) ^ neg_real; | ||
80 | + e1 = *(float16 *)(vm + H1_2(j)); | ||
81 | e2 = *(float16 *)(vn + H1_2(j)); | ||
82 | - e3 = *(float16 *)(vm + H1_2(i)) ^ neg_imag; | ||
83 | + e3 = *(float16 *)(vm + H1_2(i)); | ||
84 | + | ||
85 | + if (rot) { | ||
86 | + e3 = float16_maybe_ah_chs(e3, fpcr_ah); | ||
87 | + } else { | ||
88 | + e1 = float16_maybe_ah_chs(e1, fpcr_ah); | ||
89 | + } | ||
90 | |||
91 | if (likely((pg >> (i & 63)) & 1)) { | ||
92 | *(float16 *)(vd + H1_2(i)) = float16_add(e0, e1, s); | ||
93 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, | ||
94 | { | ||
95 | intptr_t j, i = simd_oprsz(desc); | ||
96 | uint64_t *g = vg; | ||
97 | - float32 neg_imag = float32_set_sign(0, simd_data(desc)); | ||
98 | - float32 neg_real = float32_chs(neg_imag); | ||
99 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
100 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
101 | |||
102 | do { | ||
103 | uint64_t pg = g[(i - 1) >> 6]; | ||
104 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, | ||
105 | i -= 2 * sizeof(float32); | ||
106 | |||
107 | e0 = *(float32 *)(vn + H1_2(i)); | ||
108 | - e1 = *(float32 *)(vm + H1_2(j)) ^ neg_real; | ||
109 | + e1 = *(float32 *)(vm + H1_2(j)); | ||
110 | e2 = *(float32 *)(vn + H1_2(j)); | ||
111 | - e3 = *(float32 *)(vm + H1_2(i)) ^ neg_imag; | ||
112 | + e3 = *(float32 *)(vm + H1_2(i)); | ||
113 | + | ||
114 | + if (rot) { | ||
115 | + e3 = float32_maybe_ah_chs(e3, fpcr_ah); | ||
116 | + } else { | ||
117 | + e1 = float32_maybe_ah_chs(e1, fpcr_ah); | ||
118 | + } | ||
119 | |||
120 | if (likely((pg >> (i & 63)) & 1)) { | ||
121 | *(float32 *)(vd + H1_2(i)) = float32_add(e0, e1, s); | ||
122 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, | ||
123 | { | ||
124 | intptr_t j, i = simd_oprsz(desc); | ||
125 | uint64_t *g = vg; | ||
126 | - float64 neg_imag = float64_set_sign(0, simd_data(desc)); | ||
127 | - float64 neg_real = float64_chs(neg_imag); | ||
128 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
129 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
130 | |||
131 | do { | ||
132 | uint64_t pg = g[(i - 1) >> 6]; | ||
133 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, | ||
134 | i -= 2 * sizeof(float64); | ||
135 | |||
136 | e0 = *(float64 *)(vn + H1_2(i)); | ||
137 | - e1 = *(float64 *)(vm + H1_2(j)) ^ neg_real; | ||
138 | + e1 = *(float64 *)(vm + H1_2(j)); | ||
139 | e2 = *(float64 *)(vn + H1_2(j)); | ||
140 | - e3 = *(float64 *)(vm + H1_2(i)) ^ neg_imag; | ||
141 | + e3 = *(float64 *)(vm + H1_2(i)); | ||
142 | + | ||
143 | + if (rot) { | ||
144 | + e3 = float64_maybe_ah_chs(e3, fpcr_ah); | ||
145 | + } else { | ||
146 | + e1 = float64_maybe_ah_chs(e1, fpcr_ah); | ||
147 | + } | ||
148 | |||
149 | if (likely((pg >> (i & 63)) & 1)) { | ||
150 | *(float64 *)(vd + H1_2(i)) = float64_add(e0, e1, s); | ||
151 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
152 | index XXXXXXX..XXXXXXX 100644 | ||
153 | --- a/target/arm/tcg/translate-sve.c | ||
154 | +++ b/target/arm/tcg/translate-sve.c | ||
155 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_4_ptr * const fcadd_fns[] = { | ||
156 | gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d, | ||
157 | }; | ||
158 | TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], | ||
159 | - a->rd, a->rn, a->rm, a->pg, a->rot, | ||
160 | + a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), | ||
161 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
162 | |||
163 | #define DO_FMLA(NAME, name) \ | ||
49 | -- | 164 | -- |
50 | 2.25.1 | 165 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The negation steps in FCADD must honour FPCR.AH's "don't change the | ||
2 | sign of a NaN" semantics. Implement this by encoding FPCR.AH into | ||
3 | the SIMD data field passed to the helper and using that to decide | ||
4 | whether to negate the values. | ||
1 | 5 | ||
6 | The construction of neg_imag and neg_real were done to make it easy | ||
7 | to apply both in parallel with two simple logical operations. This | ||
8 | changed with FPCR.AH, which is more complex than that. Switch to | ||
9 | an approach closer to the pseudocode, where we extract the rot | ||
10 | parameter from the SIMD data word and negate the appropriate | ||
11 | input value. | ||
12 | |||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
14 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
15 | --- | ||
16 | target/arm/tcg/translate-a64.c | 10 +++++-- | ||
17 | target/arm/tcg/vec_helper.c | 54 +++++++++++++++++++--------------- | ||
18 | 2 files changed, 38 insertions(+), 26 deletions(-) | ||
19 | |||
20 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/tcg/translate-a64.c | ||
23 | +++ b/target/arm/tcg/translate-a64.c | ||
24 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = { | ||
25 | gen_helper_gvec_fcadds, | ||
26 | gen_helper_gvec_fcaddd, | ||
27 | }; | ||
28 | -TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd) | ||
29 | -TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd) | ||
30 | +/* | ||
31 | + * Encode FPCR.AH into the data so the helper knows whether the | ||
32 | + * negations it does should avoid flipping the sign bit on a NaN | ||
33 | + */ | ||
34 | +TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1), | ||
35 | + f_vector_fcadd) | ||
36 | +TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1), | ||
37 | + f_vector_fcadd) | ||
38 | |||
39 | static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) | ||
40 | { | ||
41 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/arm/tcg/vec_helper.c | ||
44 | +++ b/target/arm/tcg/vec_helper.c | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm, | ||
46 | float16 *d = vd; | ||
47 | float16 *n = vn; | ||
48 | float16 *m = vm; | ||
49 | - uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
50 | - uint32_t neg_imag = neg_real ^ 1; | ||
51 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
52 | + bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); | ||
53 | uintptr_t i; | ||
54 | |||
55 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
56 | - neg_real <<= 15; | ||
57 | - neg_imag <<= 15; | ||
58 | - | ||
59 | for (i = 0; i < opr_sz / 2; i += 2) { | ||
60 | float16 e0 = n[H2(i)]; | ||
61 | - float16 e1 = m[H2(i + 1)] ^ neg_imag; | ||
62 | + float16 e1 = m[H2(i + 1)]; | ||
63 | float16 e2 = n[H2(i + 1)]; | ||
64 | - float16 e3 = m[H2(i)] ^ neg_real; | ||
65 | + float16 e3 = m[H2(i)]; | ||
66 | + | ||
67 | + if (rot) { | ||
68 | + e3 = float16_maybe_ah_chs(e3, fpcr_ah); | ||
69 | + } else { | ||
70 | + e1 = float16_maybe_ah_chs(e1, fpcr_ah); | ||
71 | + } | ||
72 | |||
73 | d[H2(i)] = float16_add(e0, e1, fpst); | ||
74 | d[H2(i + 1)] = float16_add(e2, e3, fpst); | ||
75 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcadds)(void *vd, void *vn, void *vm, | ||
76 | float32 *d = vd; | ||
77 | float32 *n = vn; | ||
78 | float32 *m = vm; | ||
79 | - uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
80 | - uint32_t neg_imag = neg_real ^ 1; | ||
81 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
82 | + bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); | ||
83 | uintptr_t i; | ||
84 | |||
85 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
86 | - neg_real <<= 31; | ||
87 | - neg_imag <<= 31; | ||
88 | - | ||
89 | for (i = 0; i < opr_sz / 4; i += 2) { | ||
90 | float32 e0 = n[H4(i)]; | ||
91 | - float32 e1 = m[H4(i + 1)] ^ neg_imag; | ||
92 | + float32 e1 = m[H4(i + 1)]; | ||
93 | float32 e2 = n[H4(i + 1)]; | ||
94 | - float32 e3 = m[H4(i)] ^ neg_real; | ||
95 | + float32 e3 = m[H4(i)]; | ||
96 | + | ||
97 | + if (rot) { | ||
98 | + e3 = float32_maybe_ah_chs(e3, fpcr_ah); | ||
99 | + } else { | ||
100 | + e1 = float32_maybe_ah_chs(e1, fpcr_ah); | ||
101 | + } | ||
102 | |||
103 | d[H4(i)] = float32_add(e0, e1, fpst); | ||
104 | d[H4(i + 1)] = float32_add(e2, e3, fpst); | ||
105 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcaddd)(void *vd, void *vn, void *vm, | ||
106 | float64 *d = vd; | ||
107 | float64 *n = vn; | ||
108 | float64 *m = vm; | ||
109 | - uint64_t neg_real = extract64(desc, SIMD_DATA_SHIFT, 1); | ||
110 | - uint64_t neg_imag = neg_real ^ 1; | ||
111 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
112 | + bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); | ||
113 | uintptr_t i; | ||
114 | |||
115 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
116 | - neg_real <<= 63; | ||
117 | - neg_imag <<= 63; | ||
118 | - | ||
119 | for (i = 0; i < opr_sz / 8; i += 2) { | ||
120 | float64 e0 = n[i]; | ||
121 | - float64 e1 = m[i + 1] ^ neg_imag; | ||
122 | + float64 e1 = m[i + 1]; | ||
123 | float64 e2 = n[i + 1]; | ||
124 | - float64 e3 = m[i] ^ neg_real; | ||
125 | + float64 e3 = m[i]; | ||
126 | + | ||
127 | + if (rot) { | ||
128 | + e3 = float64_maybe_ah_chs(e3, fpcr_ah); | ||
129 | + } else { | ||
130 | + e1 = float64_maybe_ah_chs(e1, fpcr_ah); | ||
131 | + } | ||
132 | |||
133 | d[i] = float64_add(e0, e1, fpst); | ||
134 | d[i + 1] = float64_add(e2, e3, fpst); | ||
135 | -- | ||
136 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Handle the FPCR.AH semantics that we do not change the sign of an |
---|---|---|---|
2 | input NaN in the FRECPS and FRSQRTS scalar insns, by providing | ||
3 | new helper functions that do the CHS part of the operation | ||
4 | differently. | ||
2 | 5 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Since the extra helper functions would be very repetitive if written |
4 | Message-id: 20220604040607.269301-23-richard.henderson@linaro.org | 7 | out longhand, we condense them and the existing non-AH helpers into |
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 8 | being emitted via macros. |
9 | |||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | 12 | --- |
8 | target/arm/ptw.h | 10 ------ | 13 | target/arm/tcg/helper-a64.h | 6 ++ |
9 | target/arm/helper.c | 77 ------------------------------------------ | 14 | target/arm/tcg/vec_internal.h | 18 ++++++ |
10 | target/arm/ptw.c | 81 +++++++++++++++++++++++++++++++++++++++++++++ | 15 | target/arm/tcg/helper-a64.c | 115 ++++++++++++--------------------- |
11 | 3 files changed, 81 insertions(+), 87 deletions(-) | 16 | target/arm/tcg/translate-a64.c | 25 +++++-- |
17 | 4 files changed, 83 insertions(+), 81 deletions(-) | ||
12 | 18 | ||
13 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | 19 | diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h |
14 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/arm/ptw.h | 21 | --- a/target/arm/tcg/helper-a64.h |
16 | +++ b/target/arm/ptw.h | 22 | +++ b/target/arm/tcg/helper-a64.h |
17 | @@ -XXX,XX +XXX,XX @@ bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx); | 23 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, fpst) |
18 | bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx); | 24 | DEF_HELPER_FLAGS_3(recpsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) |
19 | uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn); | 25 | DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) |
20 | 26 | DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) | |
21 | -int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, | 27 | +DEF_HELPER_FLAGS_3(recpsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) |
22 | - int ap, int domain_prot); | 28 | +DEF_HELPER_FLAGS_3(recpsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) |
23 | -int simple_ap_to_rw_prot_is_user(int ap, bool is_user); | 29 | +DEF_HELPER_FLAGS_3(recpsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) |
24 | - | 30 | DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) |
25 | -static inline int | 31 | DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) |
26 | -simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) | 32 | DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) |
27 | -{ | 33 | +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) |
28 | - return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx)); | 34 | +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) |
29 | -} | 35 | +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) |
30 | - | 36 | DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, fpst) |
31 | #endif /* !CONFIG_USER_ONLY */ | 37 | DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, fpst) |
32 | #endif /* TARGET_ARM_PTW_H */ | 38 | DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, fpst) |
33 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 39 | diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h |
34 | index XXXXXXX..XXXXXXX 100644 | 40 | index XXXXXXX..XXXXXXX 100644 |
35 | --- a/target/arm/helper.c | 41 | --- a/target/arm/tcg/vec_internal.h |
36 | +++ b/target/arm/helper.c | 42 | +++ b/target/arm/tcg/vec_internal.h |
37 | @@ -XXX,XX +XXX,XX @@ bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) | 43 | @@ -XXX,XX +XXX,XX @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, |
38 | g_assert_not_reached(); | 44 | */ |
39 | } | 45 | bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp); |
40 | } | ||
41 | - | ||
42 | -/* Translate section/page access permissions to page | ||
43 | - * R/W protection flags | ||
44 | - * | ||
45 | - * @env: CPUARMState | ||
46 | - * @mmu_idx: MMU index indicating required translation regime | ||
47 | - * @ap: The 3-bit access permissions (AP[2:0]) | ||
48 | - * @domain_prot: The 2-bit domain access permissions | ||
49 | - */ | ||
50 | -int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap, int domain_prot) | ||
51 | -{ | ||
52 | - bool is_user = regime_is_user(env, mmu_idx); | ||
53 | - | ||
54 | - if (domain_prot == 3) { | ||
55 | - return PAGE_READ | PAGE_WRITE; | ||
56 | - } | ||
57 | - | ||
58 | - switch (ap) { | ||
59 | - case 0: | ||
60 | - if (arm_feature(env, ARM_FEATURE_V7)) { | ||
61 | - return 0; | ||
62 | - } | ||
63 | - switch (regime_sctlr(env, mmu_idx) & (SCTLR_S | SCTLR_R)) { | ||
64 | - case SCTLR_S: | ||
65 | - return is_user ? 0 : PAGE_READ; | ||
66 | - case SCTLR_R: | ||
67 | - return PAGE_READ; | ||
68 | - default: | ||
69 | - return 0; | ||
70 | - } | ||
71 | - case 1: | ||
72 | - return is_user ? 0 : PAGE_READ | PAGE_WRITE; | ||
73 | - case 2: | ||
74 | - if (is_user) { | ||
75 | - return PAGE_READ; | ||
76 | - } else { | ||
77 | - return PAGE_READ | PAGE_WRITE; | ||
78 | - } | ||
79 | - case 3: | ||
80 | - return PAGE_READ | PAGE_WRITE; | ||
81 | - case 4: /* Reserved. */ | ||
82 | - return 0; | ||
83 | - case 5: | ||
84 | - return is_user ? 0 : PAGE_READ; | ||
85 | - case 6: | ||
86 | - return PAGE_READ; | ||
87 | - case 7: | ||
88 | - if (!arm_feature(env, ARM_FEATURE_V6K)) { | ||
89 | - return 0; | ||
90 | - } | ||
91 | - return PAGE_READ; | ||
92 | - default: | ||
93 | - g_assert_not_reached(); | ||
94 | - } | ||
95 | -} | ||
96 | - | ||
97 | -/* Translate section/page access permissions to page | ||
98 | - * R/W protection flags. | ||
99 | - * | ||
100 | - * @ap: The 2-bit simple AP (AP[2:1]) | ||
101 | - * @is_user: TRUE if accessing from PL0 | ||
102 | - */ | ||
103 | -int simple_ap_to_rw_prot_is_user(int ap, bool is_user) | ||
104 | -{ | ||
105 | - switch (ap) { | ||
106 | - case 0: | ||
107 | - return is_user ? 0 : PAGE_READ | PAGE_WRITE; | ||
108 | - case 1: | ||
109 | - return PAGE_READ | PAGE_WRITE; | ||
110 | - case 2: | ||
111 | - return is_user ? 0 : PAGE_READ; | ||
112 | - case 3: | ||
113 | - return PAGE_READ; | ||
114 | - default: | ||
115 | - g_assert_not_reached(); | ||
116 | - } | ||
117 | -} | ||
118 | #endif /* !CONFIG_USER_ONLY */ | ||
119 | |||
120 | int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx) | ||
121 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
122 | index XXXXXXX..XXXXXXX 100644 | ||
123 | --- a/target/arm/ptw.c | ||
124 | +++ b/target/arm/ptw.c | ||
125 | @@ -XXX,XX +XXX,XX @@ static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx, | ||
126 | return true; | ||
127 | } | ||
128 | 46 | ||
129 | +/* | 47 | +/* |
130 | + * Translate section/page access permissions to page R/W protection flags | 48 | + * Negate as for FPCR.AH=1 -- do not negate NaNs. |
131 | + * @env: CPUARMState | ||
132 | + * @mmu_idx: MMU index indicating required translation regime | ||
133 | + * @ap: The 3-bit access permissions (AP[2:0]) | ||
134 | + * @domain_prot: The 2-bit domain access permissions | ||
135 | + */ | 49 | + */ |
136 | +static int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, | 50 | +static inline float16 float16_ah_chs(float16 a) |
137 | + int ap, int domain_prot) | ||
138 | +{ | 51 | +{ |
139 | + bool is_user = regime_is_user(env, mmu_idx); | 52 | + return float16_is_any_nan(a) ? a : float16_chs(a); |
140 | + | ||
141 | + if (domain_prot == 3) { | ||
142 | + return PAGE_READ | PAGE_WRITE; | ||
143 | + } | ||
144 | + | ||
145 | + switch (ap) { | ||
146 | + case 0: | ||
147 | + if (arm_feature(env, ARM_FEATURE_V7)) { | ||
148 | + return 0; | ||
149 | + } | ||
150 | + switch (regime_sctlr(env, mmu_idx) & (SCTLR_S | SCTLR_R)) { | ||
151 | + case SCTLR_S: | ||
152 | + return is_user ? 0 : PAGE_READ; | ||
153 | + case SCTLR_R: | ||
154 | + return PAGE_READ; | ||
155 | + default: | ||
156 | + return 0; | ||
157 | + } | ||
158 | + case 1: | ||
159 | + return is_user ? 0 : PAGE_READ | PAGE_WRITE; | ||
160 | + case 2: | ||
161 | + if (is_user) { | ||
162 | + return PAGE_READ; | ||
163 | + } else { | ||
164 | + return PAGE_READ | PAGE_WRITE; | ||
165 | + } | ||
166 | + case 3: | ||
167 | + return PAGE_READ | PAGE_WRITE; | ||
168 | + case 4: /* Reserved. */ | ||
169 | + return 0; | ||
170 | + case 5: | ||
171 | + return is_user ? 0 : PAGE_READ; | ||
172 | + case 6: | ||
173 | + return PAGE_READ; | ||
174 | + case 7: | ||
175 | + if (!arm_feature(env, ARM_FEATURE_V6K)) { | ||
176 | + return 0; | ||
177 | + } | ||
178 | + return PAGE_READ; | ||
179 | + default: | ||
180 | + g_assert_not_reached(); | ||
181 | + } | ||
182 | +} | 53 | +} |
183 | + | 54 | + |
184 | +/* | 55 | +static inline float32 float32_ah_chs(float32 a) |
185 | + * Translate section/page access permissions to page R/W protection flags. | ||
186 | + * @ap: The 2-bit simple AP (AP[2:1]) | ||
187 | + * @is_user: TRUE if accessing from PL0 | ||
188 | + */ | ||
189 | +static int simple_ap_to_rw_prot_is_user(int ap, bool is_user) | ||
190 | +{ | 56 | +{ |
191 | + switch (ap) { | 57 | + return float32_is_any_nan(a) ? a : float32_chs(a); |
192 | + case 0: | ||
193 | + return is_user ? 0 : PAGE_READ | PAGE_WRITE; | ||
194 | + case 1: | ||
195 | + return PAGE_READ | PAGE_WRITE; | ||
196 | + case 2: | ||
197 | + return is_user ? 0 : PAGE_READ; | ||
198 | + case 3: | ||
199 | + return PAGE_READ; | ||
200 | + default: | ||
201 | + g_assert_not_reached(); | ||
202 | + } | ||
203 | +} | 58 | +} |
204 | + | 59 | + |
205 | +static int simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) | 60 | +static inline float64 float64_ah_chs(float64 a) |
206 | +{ | 61 | +{ |
207 | + return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx)); | 62 | + return float64_is_any_nan(a) ? a : float64_chs(a); |
208 | +} | 63 | +} |
209 | + | 64 | + |
210 | static bool get_phys_addr_v5(CPUARMState *env, uint32_t address, | 65 | static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah) |
211 | MMUAccessType access_type, ARMMMUIdx mmu_idx, | 66 | { |
212 | hwaddr *phys_ptr, int *prot, | 67 | return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a); |
68 | diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/target/arm/tcg/helper-a64.c | ||
71 | +++ b/target/arm/tcg/helper-a64.c | ||
72 | @@ -XXX,XX +XXX,XX @@ | ||
73 | #ifdef CONFIG_USER_ONLY | ||
74 | #include "user/page-protection.h" | ||
75 | #endif | ||
76 | +#include "vec_internal.h" | ||
77 | |||
78 | /* C2.4.7 Multiply and divide */ | ||
79 | /* special cases for 0 and LLONG_MIN are mandated by the standard */ | ||
80 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst) | ||
81 | return -float64_lt(b, a, fpst); | ||
82 | } | ||
83 | |||
84 | -/* Reciprocal step and sqrt step. Note that unlike the A32/T32 | ||
85 | +/* | ||
86 | + * Reciprocal step and sqrt step. Note that unlike the A32/T32 | ||
87 | * versions, these do a fully fused multiply-add or | ||
88 | * multiply-add-and-halve. | ||
89 | + * The FPCR.AH == 1 versions need to avoid flipping the sign of NaN. | ||
90 | */ | ||
91 | - | ||
92 | -uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, float_status *fpst) | ||
93 | -{ | ||
94 | - a = float16_squash_input_denormal(a, fpst); | ||
95 | - b = float16_squash_input_denormal(b, fpst); | ||
96 | - | ||
97 | - a = float16_chs(a); | ||
98 | - if ((float16_is_infinity(a) && float16_is_zero(b)) || | ||
99 | - (float16_is_infinity(b) && float16_is_zero(a))) { | ||
100 | - return float16_two; | ||
101 | +#define DO_RECPS(NAME, CTYPE, FLOATTYPE, CHSFN) \ | ||
102 | + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ | ||
103 | + { \ | ||
104 | + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ | ||
105 | + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ | ||
106 | + a = FLOATTYPE ## _ ## CHSFN(a); \ | ||
107 | + if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ | ||
108 | + (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ | ||
109 | + return FLOATTYPE ## _two; \ | ||
110 | + } \ | ||
111 | + return FLOATTYPE ## _muladd(a, b, FLOATTYPE ## _two, 0, fpst); \ | ||
112 | } | ||
113 | - return float16_muladd(a, b, float16_two, 0, fpst); | ||
114 | -} | ||
115 | |||
116 | -float32 HELPER(recpsf_f32)(float32 a, float32 b, float_status *fpst) | ||
117 | -{ | ||
118 | - a = float32_squash_input_denormal(a, fpst); | ||
119 | - b = float32_squash_input_denormal(b, fpst); | ||
120 | +DO_RECPS(recpsf_f16, uint32_t, float16, chs) | ||
121 | +DO_RECPS(recpsf_f32, float32, float32, chs) | ||
122 | +DO_RECPS(recpsf_f64, float64, float64, chs) | ||
123 | +DO_RECPS(recpsf_ah_f16, uint32_t, float16, ah_chs) | ||
124 | +DO_RECPS(recpsf_ah_f32, float32, float32, ah_chs) | ||
125 | +DO_RECPS(recpsf_ah_f64, float64, float64, ah_chs) | ||
126 | |||
127 | - a = float32_chs(a); | ||
128 | - if ((float32_is_infinity(a) && float32_is_zero(b)) || | ||
129 | - (float32_is_infinity(b) && float32_is_zero(a))) { | ||
130 | - return float32_two; | ||
131 | - } | ||
132 | - return float32_muladd(a, b, float32_two, 0, fpst); | ||
133 | -} | ||
134 | +#define DO_RSQRTSF(NAME, CTYPE, FLOATTYPE, CHSFN) \ | ||
135 | + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ | ||
136 | + { \ | ||
137 | + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ | ||
138 | + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ | ||
139 | + a = FLOATTYPE ## _ ## CHSFN(a); \ | ||
140 | + if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ | ||
141 | + (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ | ||
142 | + return FLOATTYPE ## _one_point_five; \ | ||
143 | + } \ | ||
144 | + return FLOATTYPE ## _muladd_scalbn(a, b, FLOATTYPE ## _three, \ | ||
145 | + -1, 0, fpst); \ | ||
146 | + } \ | ||
147 | |||
148 | -float64 HELPER(recpsf_f64)(float64 a, float64 b, float_status *fpst) | ||
149 | -{ | ||
150 | - a = float64_squash_input_denormal(a, fpst); | ||
151 | - b = float64_squash_input_denormal(b, fpst); | ||
152 | - | ||
153 | - a = float64_chs(a); | ||
154 | - if ((float64_is_infinity(a) && float64_is_zero(b)) || | ||
155 | - (float64_is_infinity(b) && float64_is_zero(a))) { | ||
156 | - return float64_two; | ||
157 | - } | ||
158 | - return float64_muladd(a, b, float64_two, 0, fpst); | ||
159 | -} | ||
160 | - | ||
161 | -uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst) | ||
162 | -{ | ||
163 | - a = float16_squash_input_denormal(a, fpst); | ||
164 | - b = float16_squash_input_denormal(b, fpst); | ||
165 | - | ||
166 | - a = float16_chs(a); | ||
167 | - if ((float16_is_infinity(a) && float16_is_zero(b)) || | ||
168 | - (float16_is_infinity(b) && float16_is_zero(a))) { | ||
169 | - return float16_one_point_five; | ||
170 | - } | ||
171 | - return float16_muladd_scalbn(a, b, float16_three, -1, 0, fpst); | ||
172 | -} | ||
173 | - | ||
174 | -float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst) | ||
175 | -{ | ||
176 | - a = float32_squash_input_denormal(a, fpst); | ||
177 | - b = float32_squash_input_denormal(b, fpst); | ||
178 | - | ||
179 | - a = float32_chs(a); | ||
180 | - if ((float32_is_infinity(a) && float32_is_zero(b)) || | ||
181 | - (float32_is_infinity(b) && float32_is_zero(a))) { | ||
182 | - return float32_one_point_five; | ||
183 | - } | ||
184 | - return float32_muladd_scalbn(a, b, float32_three, -1, 0, fpst); | ||
185 | -} | ||
186 | - | ||
187 | -float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst) | ||
188 | -{ | ||
189 | - a = float64_squash_input_denormal(a, fpst); | ||
190 | - b = float64_squash_input_denormal(b, fpst); | ||
191 | - | ||
192 | - a = float64_chs(a); | ||
193 | - if ((float64_is_infinity(a) && float64_is_zero(b)) || | ||
194 | - (float64_is_infinity(b) && float64_is_zero(a))) { | ||
195 | - return float64_one_point_five; | ||
196 | - } | ||
197 | - return float64_muladd_scalbn(a, b, float64_three, -1, 0, fpst); | ||
198 | -} | ||
199 | +DO_RSQRTSF(rsqrtsf_f16, uint32_t, float16, chs) | ||
200 | +DO_RSQRTSF(rsqrtsf_f32, float32, float32, chs) | ||
201 | +DO_RSQRTSF(rsqrtsf_f64, float64, float64, chs) | ||
202 | +DO_RSQRTSF(rsqrtsf_ah_f16, uint32_t, float16, ah_chs) | ||
203 | +DO_RSQRTSF(rsqrtsf_ah_f32, float32, float32, ah_chs) | ||
204 | +DO_RSQRTSF(rsqrtsf_ah_f64, float64, float64, ah_chs) | ||
205 | |||
206 | /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ | ||
207 | uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst) | ||
208 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
209 | index XXXXXXX..XXXXXXX 100644 | ||
210 | --- a/target/arm/tcg/translate-a64.c | ||
211 | +++ b/target/arm/tcg/translate-a64.c | ||
212 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
213 | FPST_A64_F16 : FPST_A64); | ||
214 | } | ||
215 | |||
216 | -static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
217 | - int mergereg) | ||
218 | +static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a, | ||
219 | + const FPScalar *fnormal, const FPScalar *fah, | ||
220 | + int mergereg) | ||
221 | { | ||
222 | - return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, | ||
223 | - select_ah_fpst(s, a->esz)); | ||
224 | + return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal, | ||
225 | + mergereg, select_ah_fpst(s, a->esz)); | ||
226 | } | ||
227 | |||
228 | /* Some insns need to call different helpers when FPCR.AH == 1 */ | ||
229 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_frecps = { | ||
230 | gen_helper_recpsf_f32, | ||
231 | gen_helper_recpsf_f64, | ||
232 | }; | ||
233 | -TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps, a->rn) | ||
234 | +static const FPScalar f_scalar_ah_frecps = { | ||
235 | + gen_helper_recpsf_ah_f16, | ||
236 | + gen_helper_recpsf_ah_f32, | ||
237 | + gen_helper_recpsf_ah_f64, | ||
238 | +}; | ||
239 | +TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a, | ||
240 | + &f_scalar_frecps, &f_scalar_ah_frecps, a->rn) | ||
241 | |||
242 | static const FPScalar f_scalar_frsqrts = { | ||
243 | gen_helper_rsqrtsf_f16, | ||
244 | gen_helper_rsqrtsf_f32, | ||
245 | gen_helper_rsqrtsf_f64, | ||
246 | }; | ||
247 | -TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts, a->rn) | ||
248 | +static const FPScalar f_scalar_ah_frsqrts = { | ||
249 | + gen_helper_rsqrtsf_ah_f16, | ||
250 | + gen_helper_rsqrtsf_ah_f32, | ||
251 | + gen_helper_rsqrtsf_ah_f64, | ||
252 | +}; | ||
253 | +TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a, | ||
254 | + &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn) | ||
255 | |||
256 | static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, | ||
257 | const FPScalar *f, bool swap) | ||
213 | -- | 258 | -- |
214 | 2.25.1 | 259 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics |
---|---|---|---|
2 | in the vector versions of FRECPS and FRSQRTS, by implementing | ||
3 | new vector wrappers that call the _ah_ scalar helpers. | ||
2 | 4 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | Message-id: 20220604040607.269301-14-richard.henderson@linaro.org | ||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | 7 | --- |
8 | target/arm/ptw.h | 4 ++-- | 8 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ |
9 | target/arm/helper.c | 26 +------------------------- | 9 | target/arm/tcg/translate-a64.c | 21 ++++++++++++++++----- |
10 | target/arm/ptw.c | 23 +++++++++++++++++++++++ | 10 | target/arm/tcg/translate-sve.c | 7 ++++++- |
11 | 3 files changed, 26 insertions(+), 27 deletions(-) | 11 | target/arm/tcg/vec_helper.c | 8 ++++++++ |
12 | 4 files changed, 44 insertions(+), 6 deletions(-) | ||
12 | 13 | ||
13 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | 14 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h |
14 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/arm/ptw.h | 16 | --- a/target/arm/tcg/helper-sve.h |
16 | +++ b/target/arm/ptw.h | 17 | +++ b/target/arm/tcg/helper-sve.h |
17 | @@ -XXX,XX +XXX,XX @@ uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure, | 18 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_rsqrts_s, TCG_CALL_NO_RWG, |
18 | 19 | DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG, | |
19 | bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx); | 20 | void, ptr, ptr, ptr, fpst, i32) |
20 | bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx); | 21 | |
21 | +uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn); | 22 | +DEF_HELPER_FLAGS_5(gvec_ah_recps_h, TCG_CALL_NO_RWG, |
23 | + void, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_5(gvec_ah_recps_s, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, fpst, i32) | ||
26 | +DEF_HELPER_FLAGS_5(gvec_ah_recps_d, TCG_CALL_NO_RWG, | ||
27 | + void, ptr, ptr, ptr, fpst, i32) | ||
22 | + | 28 | + |
23 | ARMCacheAttrs combine_cacheattrs(CPUARMState *env, | 29 | +DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_h, TCG_CALL_NO_RWG, |
24 | ARMCacheAttrs s1, ARMCacheAttrs s2); | 30 | + void, ptr, ptr, ptr, fpst, i32) |
25 | 31 | +DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_s, TCG_CALL_NO_RWG, | |
26 | -bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx, | 32 | + void, ptr, ptr, ptr, fpst, i32) |
27 | - uint32_t *table, uint32_t address); | 33 | +DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_d, TCG_CALL_NO_RWG, |
28 | int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, | 34 | + void, ptr, ptr, ptr, fpst, i32) |
29 | int ap, int domain_prot); | 35 | + |
30 | int simple_ap_to_rw_prot_is_user(int ap, bool is_user); | 36 | DEF_HELPER_FLAGS_5(gvec_ah_fmax_h, TCG_CALL_NO_RWG, |
31 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 37 | void, ptr, ptr, ptr, fpst, i32) |
38 | DEF_HELPER_FLAGS_5(gvec_ah_fmax_s, TCG_CALL_NO_RWG, | ||
39 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
32 | index XXXXXXX..XXXXXXX 100644 | 40 | index XXXXXXX..XXXXXXX 100644 |
33 | --- a/target/arm/helper.c | 41 | --- a/target/arm/tcg/translate-a64.c |
34 | +++ b/target/arm/helper.c | 42 | +++ b/target/arm/tcg/translate-a64.c |
35 | @@ -XXX,XX +XXX,XX @@ static inline bool regime_translation_big_endian(CPUARMState *env, | 43 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, |
44 | return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal); | ||
36 | } | 45 | } |
37 | 46 | ||
38 | /* Return the TTBR associated with this translation regime */ | 47 | -static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data, |
39 | -static inline uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, | 48 | - gen_helper_gvec_3_ptr * const f[3]) |
40 | - int ttbrn) | 49 | +static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data, |
41 | +uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn) | 50 | + gen_helper_gvec_3_ptr * const fnormal[3], |
51 | + gen_helper_gvec_3_ptr * const fah[3]) | ||
42 | { | 52 | { |
43 | if (mmu_idx == ARMMMUIdx_Stage2) { | 53 | - return do_fp3_vector_with_fpsttype(s, a, data, f, |
44 | return env->cp15.vttbr_el2; | 54 | + return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal, |
45 | @@ -XXX,XX +XXX,XX @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, | 55 | select_ah_fpst(s, a->esz)); |
46 | return prot_rw | PAGE_EXEC; | ||
47 | } | 56 | } |
48 | 57 | ||
49 | -bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx, | 58 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { |
50 | - uint32_t *table, uint32_t address) | 59 | gen_helper_gvec_recps_s, |
51 | -{ | 60 | gen_helper_gvec_recps_d, |
52 | - /* Note that we can only get here for an AArch32 PL0/PL1 lookup */ | 61 | }; |
53 | - TCR *tcr = regime_tcr(env, mmu_idx); | 62 | -TRANS(FRECPS_v, do_fp3_vector_ah, a, 0, f_vector_frecps) |
54 | - | 63 | +static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = { |
55 | - if (address & tcr->mask) { | 64 | + gen_helper_gvec_ah_recps_h, |
56 | - if (tcr->raw_tcr & TTBCR_PD1) { | 65 | + gen_helper_gvec_ah_recps_s, |
57 | - /* Translation table walk disabled for TTBR1 */ | 66 | + gen_helper_gvec_ah_recps_d, |
58 | - return false; | 67 | +}; |
59 | - } | 68 | +TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps) |
60 | - *table = regime_ttbr(env, mmu_idx, 1) & 0xffffc000; | 69 | |
61 | - } else { | 70 | static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { |
62 | - if (tcr->raw_tcr & TTBCR_PD0) { | 71 | gen_helper_gvec_rsqrts_h, |
63 | - /* Translation table walk disabled for TTBR0 */ | 72 | gen_helper_gvec_rsqrts_s, |
64 | - return false; | 73 | gen_helper_gvec_rsqrts_d, |
65 | - } | 74 | }; |
66 | - *table = regime_ttbr(env, mmu_idx, 0) & tcr->base_mask; | 75 | -TRANS(FRSQRTS_v, do_fp3_vector_ah, a, 0, f_vector_frsqrts) |
67 | - } | 76 | +static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = { |
68 | - *table |= (address >> 18) & 0x3ffc; | 77 | + gen_helper_gvec_ah_rsqrts_h, |
69 | - return true; | 78 | + gen_helper_gvec_ah_rsqrts_s, |
70 | -} | 79 | + gen_helper_gvec_ah_rsqrts_d, |
71 | - | 80 | +}; |
72 | static bool ptw_attrs_are_device(CPUARMState *env, ARMCacheAttrs cacheattrs) | 81 | +TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts) |
73 | { | 82 | |
74 | /* | 83 | static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { |
75 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | 84 | gen_helper_gvec_faddp_h, |
85 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
76 | index XXXXXXX..XXXXXXX 100644 | 86 | index XXXXXXX..XXXXXXX 100644 |
77 | --- a/target/arm/ptw.c | 87 | --- a/target/arm/tcg/translate-sve.c |
78 | +++ b/target/arm/ptw.c | 88 | +++ b/target/arm/tcg/translate-sve.c |
79 | @@ -XXX,XX +XXX,XX @@ | 89 | @@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) |
80 | #include "ptw.h" | 90 | NULL, gen_helper_gvec_##name##_h, \ |
81 | 91 | gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ | |
82 | 92 | }; \ | |
83 | +static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx, | 93 | - TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, name##_fns[a->esz], a, 0) |
84 | + uint32_t *table, uint32_t address) | 94 | + static gen_helper_gvec_3_ptr * const name##_ah_fns[4] = { \ |
85 | +{ | 95 | + NULL, gen_helper_gvec_ah_##name##_h, \ |
86 | + /* Note that we can only get here for an AArch32 PL0/PL1 lookup */ | 96 | + gen_helper_gvec_ah_##name##_s, gen_helper_gvec_ah_##name##_d \ |
87 | + TCR *tcr = regime_tcr(env, mmu_idx); | 97 | + }; \ |
98 | + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, \ | ||
99 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], a, 0) | ||
100 | |||
101 | DO_FP3(FADD_zzz, fadd) | ||
102 | DO_FP3(FSUB_zzz, fsub) | ||
103 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
104 | index XXXXXXX..XXXXXXX 100644 | ||
105 | --- a/target/arm/tcg/vec_helper.c | ||
106 | +++ b/target/arm/tcg/vec_helper.c | ||
107 | @@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16) | ||
108 | DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32) | ||
109 | DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64) | ||
110 | |||
111 | +DO_3OP(gvec_ah_recps_h, helper_recpsf_ah_f16, float16) | ||
112 | +DO_3OP(gvec_ah_recps_s, helper_recpsf_ah_f32, float32) | ||
113 | +DO_3OP(gvec_ah_recps_d, helper_recpsf_ah_f64, float64) | ||
88 | + | 114 | + |
89 | + if (address & tcr->mask) { | 115 | +DO_3OP(gvec_ah_rsqrts_h, helper_rsqrtsf_ah_f16, float16) |
90 | + if (tcr->raw_tcr & TTBCR_PD1) { | 116 | +DO_3OP(gvec_ah_rsqrts_s, helper_rsqrtsf_ah_f32, float32) |
91 | + /* Translation table walk disabled for TTBR1 */ | 117 | +DO_3OP(gvec_ah_rsqrts_d, helper_rsqrtsf_ah_f64, float64) |
92 | + return false; | ||
93 | + } | ||
94 | + *table = regime_ttbr(env, mmu_idx, 1) & 0xffffc000; | ||
95 | + } else { | ||
96 | + if (tcr->raw_tcr & TTBCR_PD0) { | ||
97 | + /* Translation table walk disabled for TTBR0 */ | ||
98 | + return false; | ||
99 | + } | ||
100 | + *table = regime_ttbr(env, mmu_idx, 0) & tcr->base_mask; | ||
101 | + } | ||
102 | + *table |= (address >> 18) & 0x3ffc; | ||
103 | + return true; | ||
104 | +} | ||
105 | + | 118 | + |
106 | static bool get_phys_addr_v5(CPUARMState *env, uint32_t address, | 119 | DO_3OP(gvec_ah_fmax_h, helper_vfp_ah_maxh, float16) |
107 | MMUAccessType access_type, ARMMMUIdx mmu_idx, | 120 | DO_3OP(gvec_ah_fmax_s, helper_vfp_ah_maxs, float32) |
108 | hwaddr *phys_ptr, int *prot, | 121 | DO_3OP(gvec_ah_fmax_d, helper_vfp_ah_maxd, float64) |
109 | -- | 122 | -- |
110 | 2.25.1 | 123 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics in FMLS |
---|---|---|---|
2 | (indexed). We do this by creating 6 new helpers, which allow us to | ||
3 | do the negation either by XOR (for AH=0) or by muladd flags | ||
4 | (for AH=1). | ||
2 | 5 | ||
3 | Export all of the support functions for performing bulk | 6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | fault analysis on a set of elements at contiguous addresses | 7 | [PMM: Mostly from RTH's patch; error in index order into fns[][] |
5 | controlled by a predicate. | 8 | fixed] |
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | --- | ||
11 | target/arm/helper.h | 14 ++++++++++++++ | ||
12 | target/arm/tcg/translate-a64.c | 17 +++++++++++------ | ||
13 | target/arm/tcg/translate-sve.c | 31 +++++++++++++++++-------------- | ||
14 | target/arm/tcg/vec_helper.c | 24 +++++++++++++++--------- | ||
15 | 4 files changed, 57 insertions(+), 29 deletions(-) | ||
6 | 16 | ||
7 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 17 | diff --git a/target/arm/helper.h b/target/arm/helper.h |
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-id: 20220607203306.657998-15-richard.henderson@linaro.org | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | --- | ||
12 | target/arm/sve_ldst_internal.h | 94 ++++++++++++++++++++++++++++++++++ | ||
13 | target/arm/sve_helper.c | 87 ++++++------------------------- | ||
14 | 2 files changed, 111 insertions(+), 70 deletions(-) | ||
15 | |||
16 | diff --git a/target/arm/sve_ldst_internal.h b/target/arm/sve_ldst_internal.h | ||
17 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/target/arm/sve_ldst_internal.h | 19 | --- a/target/arm/helper.h |
19 | +++ b/target/arm/sve_ldst_internal.h | 20 | +++ b/target/arm/helper.h |
20 | @@ -XXX,XX +XXX,XX @@ DO_ST_PRIM_2(dd, H1_8, uint64_t, uint64_t, stq) | 21 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG, |
21 | #undef DO_LD_PRIM_2 | 22 | DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG, |
22 | #undef DO_ST_PRIM_2 | 23 | void, ptr, ptr, ptr, ptr, fpst, i32) |
23 | 24 | ||
24 | +/* | 25 | +DEF_HELPER_FLAGS_6(gvec_fmls_idx_h, TCG_CALL_NO_RWG, |
25 | + * Resolve the guest virtual address to info->host and info->flags. | 26 | + void, ptr, ptr, ptr, ptr, fpst, i32) |
26 | + * If @nofault, return false if the page is invalid, otherwise | 27 | +DEF_HELPER_FLAGS_6(gvec_fmls_idx_s, TCG_CALL_NO_RWG, |
27 | + * exit via page fault exception. | 28 | + void, ptr, ptr, ptr, ptr, fpst, i32) |
28 | + */ | 29 | +DEF_HELPER_FLAGS_6(gvec_fmls_idx_d, TCG_CALL_NO_RWG, |
30 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
29 | + | 31 | + |
30 | +typedef struct { | 32 | +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_h, TCG_CALL_NO_RWG, |
31 | + void *host; | 33 | + void, ptr, ptr, ptr, ptr, fpst, i32) |
32 | + int flags; | 34 | +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_s, TCG_CALL_NO_RWG, |
33 | + MemTxAttrs attrs; | 35 | + void, ptr, ptr, ptr, ptr, fpst, i32) |
34 | +} SVEHostPage; | 36 | +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_d, TCG_CALL_NO_RWG, |
37 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
35 | + | 38 | + |
36 | +bool sve_probe_page(SVEHostPage *info, bool nofault, CPUARMState *env, | 39 | DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG, |
37 | + target_ulong addr, int mem_off, MMUAccessType access_type, | 40 | void, ptr, ptr, ptr, ptr, i32) |
38 | + int mmu_idx, uintptr_t retaddr); | 41 | DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG, |
39 | + | 42 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
40 | +/* | ||
41 | + * Analyse contiguous data, protected by a governing predicate. | ||
42 | + */ | ||
43 | + | ||
44 | +typedef enum { | ||
45 | + FAULT_NO, | ||
46 | + FAULT_FIRST, | ||
47 | + FAULT_ALL, | ||
48 | +} SVEContFault; | ||
49 | + | ||
50 | +typedef struct { | ||
51 | + /* | ||
52 | + * First and last element wholly contained within the two pages. | ||
53 | + * mem_off_first[0] and reg_off_first[0] are always set >= 0. | ||
54 | + * reg_off_last[0] may be < 0 if the first element crosses pages. | ||
55 | + * All of mem_off_first[1], reg_off_first[1] and reg_off_last[1] | ||
56 | + * are set >= 0 only if there are complete elements on a second page. | ||
57 | + * | ||
58 | + * The reg_off_* offsets are relative to the internal vector register. | ||
59 | + * The mem_off_first offset is relative to the memory address; the | ||
60 | + * two offsets are different when a load operation extends, a store | ||
61 | + * operation truncates, or for multi-register operations. | ||
62 | + */ | ||
63 | + int16_t mem_off_first[2]; | ||
64 | + int16_t reg_off_first[2]; | ||
65 | + int16_t reg_off_last[2]; | ||
66 | + | ||
67 | + /* | ||
68 | + * One element that is misaligned and spans both pages, | ||
69 | + * or -1 if there is no such active element. | ||
70 | + */ | ||
71 | + int16_t mem_off_split; | ||
72 | + int16_t reg_off_split; | ||
73 | + | ||
74 | + /* | ||
75 | + * The byte offset at which the entire operation crosses a page boundary. | ||
76 | + * Set >= 0 if and only if the entire operation spans two pages. | ||
77 | + */ | ||
78 | + int16_t page_split; | ||
79 | + | ||
80 | + /* TLB data for the two pages. */ | ||
81 | + SVEHostPage page[2]; | ||
82 | +} SVEContLdSt; | ||
83 | + | ||
84 | +/* | ||
85 | + * Find first active element on each page, and a loose bound for the | ||
86 | + * final element on each page. Identify any single element that spans | ||
87 | + * the page boundary. Return true if there are any active elements. | ||
88 | + */ | ||
89 | +bool sve_cont_ldst_elements(SVEContLdSt *info, target_ulong addr, uint64_t *vg, | ||
90 | + intptr_t reg_max, int esz, int msize); | ||
91 | + | ||
92 | +/* | ||
93 | + * Resolve the guest virtual addresses to info->page[]. | ||
94 | + * Control the generation of page faults with @fault. Return false if | ||
95 | + * there is no work to do, which can only happen with @fault == FAULT_NO. | ||
96 | + */ | ||
97 | +bool sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault, | ||
98 | + CPUARMState *env, target_ulong addr, | ||
99 | + MMUAccessType access_type, uintptr_t retaddr); | ||
100 | + | ||
101 | +#ifdef CONFIG_USER_ONLY | ||
102 | +static inline void | ||
103 | +sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env, uint64_t *vg, | ||
104 | + target_ulong addr, int esize, int msize, | ||
105 | + int wp_access, uintptr_t retaddr) | ||
106 | +{ } | ||
107 | +#else | ||
108 | +void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env, | ||
109 | + uint64_t *vg, target_ulong addr, | ||
110 | + int esize, int msize, int wp_access, | ||
111 | + uintptr_t retaddr); | ||
112 | +#endif | ||
113 | + | ||
114 | +void sve_cont_ldst_mte_check(SVEContLdSt *info, CPUARMState *env, uint64_t *vg, | ||
115 | + target_ulong addr, int esize, int msize, | ||
116 | + uint32_t mtedesc, uintptr_t ra); | ||
117 | + | ||
118 | #endif /* TARGET_ARM_SVE_LDST_INTERNAL_H */ | ||
119 | diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c | ||
120 | index XXXXXXX..XXXXXXX 100644 | 43 | index XXXXXXX..XXXXXXX 100644 |
121 | --- a/target/arm/sve_helper.c | 44 | --- a/target/arm/tcg/translate-a64.c |
122 | +++ b/target/arm/sve_helper.c | 45 | +++ b/target/arm/tcg/translate-a64.c |
123 | @@ -XXX,XX +XXX,XX @@ static intptr_t find_next_active(uint64_t *vg, intptr_t reg_off, | 46 | @@ -XXX,XX +XXX,XX @@ TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) |
124 | * exit via page fault exception. | 47 | |
125 | */ | 48 | static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) |
126 | |||
127 | -typedef struct { | ||
128 | - void *host; | ||
129 | - int flags; | ||
130 | - MemTxAttrs attrs; | ||
131 | -} SVEHostPage; | ||
132 | - | ||
133 | -static bool sve_probe_page(SVEHostPage *info, bool nofault, | ||
134 | - CPUARMState *env, target_ulong addr, | ||
135 | - int mem_off, MMUAccessType access_type, | ||
136 | - int mmu_idx, uintptr_t retaddr) | ||
137 | +bool sve_probe_page(SVEHostPage *info, bool nofault, CPUARMState *env, | ||
138 | + target_ulong addr, int mem_off, MMUAccessType access_type, | ||
139 | + int mmu_idx, uintptr_t retaddr) | ||
140 | { | 49 | { |
141 | int flags; | 50 | - static gen_helper_gvec_4_ptr * const fns[3] = { |
142 | 51 | - gen_helper_gvec_fmla_idx_h, | |
143 | @@ -XXX,XX +XXX,XX @@ static bool sve_probe_page(SVEHostPage *info, bool nofault, | 52 | - gen_helper_gvec_fmla_idx_s, |
53 | - gen_helper_gvec_fmla_idx_d, | ||
54 | + static gen_helper_gvec_4_ptr * const fns[3][3] = { | ||
55 | + { gen_helper_gvec_fmla_idx_h, | ||
56 | + gen_helper_gvec_fmla_idx_s, | ||
57 | + gen_helper_gvec_fmla_idx_d }, | ||
58 | + { gen_helper_gvec_fmls_idx_h, | ||
59 | + gen_helper_gvec_fmls_idx_s, | ||
60 | + gen_helper_gvec_fmls_idx_d }, | ||
61 | + { gen_helper_gvec_ah_fmls_idx_h, | ||
62 | + gen_helper_gvec_ah_fmls_idx_s, | ||
63 | + gen_helper_gvec_ah_fmls_idx_d }, | ||
64 | }; | ||
65 | MemOp esz = a->esz; | ||
66 | int check = fp_access_check_vector_hsd(s, a->q, esz); | ||
67 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) | ||
68 | |||
69 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
70 | esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
71 | - (a->idx << 1) | neg, | ||
72 | - fns[esz - 1]); | ||
73 | + a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]); | ||
144 | return true; | 74 | return true; |
145 | } | 75 | } |
146 | 76 | ||
147 | - | 77 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c |
148 | -/* | 78 | index XXXXXXX..XXXXXXX 100644 |
149 | - * Analyse contiguous data, protected by a governing predicate. | 79 | --- a/target/arm/tcg/translate-sve.c |
150 | - */ | 80 | +++ b/target/arm/tcg/translate-sve.c |
151 | - | 81 | @@ -XXX,XX +XXX,XX @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) |
152 | -typedef enum { | 82 | *** SVE Floating Point Multiply-Add Indexed Group |
153 | - FAULT_NO, | 83 | */ |
154 | - FAULT_FIRST, | 84 | |
155 | - FAULT_ALL, | 85 | -static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) |
156 | -} SVEContFault; | 86 | -{ |
157 | - | 87 | - static gen_helper_gvec_4_ptr * const fns[4] = { |
158 | -typedef struct { | 88 | - NULL, |
159 | - /* | 89 | - gen_helper_gvec_fmla_idx_h, |
160 | - * First and last element wholly contained within the two pages. | 90 | - gen_helper_gvec_fmla_idx_s, |
161 | - * mem_off_first[0] and reg_off_first[0] are always set >= 0. | 91 | - gen_helper_gvec_fmla_idx_d, |
162 | - * reg_off_last[0] may be < 0 if the first element crosses pages. | 92 | - }; |
163 | - * All of mem_off_first[1], reg_off_first[1] and reg_off_last[1] | 93 | - return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, |
164 | - * are set >= 0 only if there are complete elements on a second page. | 94 | - (a->index << 1) | sub, |
165 | - * | 95 | - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); |
166 | - * The reg_off_* offsets are relative to the internal vector register. | 96 | -} |
167 | - * The mem_off_first offset is relative to the memory address; the | 97 | +static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = { |
168 | - * two offsets are different when a load operation extends, a store | 98 | + NULL, gen_helper_gvec_fmla_idx_h, |
169 | - * operation truncates, or for multi-register operations. | 99 | + gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d |
170 | - */ | 100 | +}; |
171 | - int16_t mem_off_first[2]; | 101 | +TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, |
172 | - int16_t reg_off_first[2]; | 102 | + fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index, |
173 | - int16_t reg_off_last[2]; | 103 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) |
174 | - | 104 | |
175 | - /* | 105 | -TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) |
176 | - * One element that is misaligned and spans both pages, | 106 | -TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true) |
177 | - * or -1 if there is no such active element. | 107 | +static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = { |
178 | - */ | 108 | + { NULL, NULL }, |
179 | - int16_t mem_off_split; | 109 | + { gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h }, |
180 | - int16_t reg_off_split; | 110 | + { gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s }, |
181 | - | 111 | + { gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d }, |
182 | - /* | 112 | +}; |
183 | - * The byte offset at which the entire operation crosses a page boundary. | 113 | +TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz, |
184 | - * Set >= 0 if and only if the entire operation spans two pages. | 114 | + fmls_idx_fns[a->esz][s->fpcr_ah], |
185 | - */ | 115 | + a->rd, a->rn, a->rm, a->ra, a->index, |
186 | - int16_t page_split; | 116 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) |
187 | - | 117 | |
188 | - /* TLB data for the two pages. */ | ||
189 | - SVEHostPage page[2]; | ||
190 | -} SVEContLdSt; | ||
191 | - | ||
192 | /* | 118 | /* |
193 | * Find first active element on each page, and a loose bound for the | 119 | *** SVE Floating Point Multiply Indexed Group |
194 | * final element on each page. Identify any single element that spans | 120 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
195 | * the page boundary. Return true if there are any active elements. | 121 | index XXXXXXX..XXXXXXX 100644 |
196 | */ | 122 | --- a/target/arm/tcg/vec_helper.c |
197 | -static bool sve_cont_ldst_elements(SVEContLdSt *info, target_ulong addr, | 123 | +++ b/target/arm/tcg/vec_helper.c |
198 | - uint64_t *vg, intptr_t reg_max, | 124 | @@ -XXX,XX +XXX,XX @@ DO_FMUL_IDX(gvec_fmls_nf_idx_s, float32_sub, float32_mul, float32, H4) |
199 | - int esz, int msize) | 125 | |
200 | +bool sve_cont_ldst_elements(SVEContLdSt *info, target_ulong addr, uint64_t *vg, | 126 | #undef DO_FMUL_IDX |
201 | + intptr_t reg_max, int esz, int msize) | 127 | |
202 | { | 128 | -#define DO_FMLA_IDX(NAME, TYPE, H) \ |
203 | const int esize = 1 << esz; | 129 | +#define DO_FMLA_IDX(NAME, TYPE, H, NEGX, NEGF) \ |
204 | const uint64_t pg_mask = pred_esz_masks[esz]; | 130 | void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \ |
205 | @@ -XXX,XX +XXX,XX @@ static bool sve_cont_ldst_elements(SVEContLdSt *info, target_ulong addr, | 131 | float_status *stat, uint32_t desc) \ |
206 | * Control the generation of page faults with @fault. Return false if | 132 | { \ |
207 | * there is no work to do, which can only happen with @fault == FAULT_NO. | 133 | intptr_t i, j, oprsz = simd_oprsz(desc); \ |
208 | */ | 134 | intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \ |
209 | -static bool sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault, | 135 | - TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \ |
210 | - CPUARMState *env, target_ulong addr, | 136 | - intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \ |
211 | - MMUAccessType access_type, uintptr_t retaddr) | 137 | + intptr_t idx = simd_data(desc); \ |
212 | +bool sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault, | 138 | TYPE *d = vd, *n = vn, *m = vm, *a = va; \ |
213 | + CPUARMState *env, target_ulong addr, | 139 | - op1_neg <<= (8 * sizeof(TYPE) - 1); \ |
214 | + MMUAccessType access_type, uintptr_t retaddr) | 140 | for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ |
215 | { | 141 | TYPE mm = m[H(i + idx)]; \ |
216 | int mmu_idx = cpu_mmu_index(env, false); | 142 | for (j = 0; j < segment; j++) { \ |
217 | int mem_off = info->mem_off_first[0]; | 143 | - d[i + j] = TYPE##_muladd(n[i + j] ^ op1_neg, \ |
218 | @@ -XXX,XX +XXX,XX @@ static bool sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault, | 144 | - mm, a[i + j], 0, stat); \ |
219 | return have_work; | 145 | + d[i + j] = TYPE##_muladd(n[i + j] ^ NEGX, mm, \ |
146 | + a[i + j], NEGF, stat); \ | ||
147 | } \ | ||
148 | } \ | ||
149 | clear_tail(d, oprsz, simd_maxsz(desc)); \ | ||
220 | } | 150 | } |
221 | 151 | ||
222 | -static void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env, | 152 | -DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2) |
223 | - uint64_t *vg, target_ulong addr, | 153 | -DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4) |
224 | - int esize, int msize, int wp_access, | 154 | -DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8) |
225 | - uintptr_t retaddr) | 155 | +DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2, 0, 0) |
226 | -{ | 156 | +DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4, 0, 0) |
227 | #ifndef CONFIG_USER_ONLY | 157 | +DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8, 0, 0) |
228 | +void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env, | 158 | + |
229 | + uint64_t *vg, target_ulong addr, | 159 | +DO_FMLA_IDX(gvec_fmls_idx_h, float16, H2, INT16_MIN, 0) |
230 | + int esize, int msize, int wp_access, | 160 | +DO_FMLA_IDX(gvec_fmls_idx_s, float32, H4, INT32_MIN, 0) |
231 | + uintptr_t retaddr) | 161 | +DO_FMLA_IDX(gvec_fmls_idx_d, float64, H8, INT64_MIN, 0) |
232 | +{ | 162 | + |
233 | intptr_t mem_off, reg_off, reg_last; | 163 | +DO_FMLA_IDX(gvec_ah_fmls_idx_h, float16, H2, 0, float_muladd_negate_product) |
234 | int flags0 = info->page[0].flags; | 164 | +DO_FMLA_IDX(gvec_ah_fmls_idx_s, float32, H4, 0, float_muladd_negate_product) |
235 | int flags1 = info->page[1].flags; | 165 | +DO_FMLA_IDX(gvec_ah_fmls_idx_d, float64, H8, 0, float_muladd_negate_product) |
236 | @@ -XXX,XX +XXX,XX @@ static void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env, | 166 | |
237 | } while (reg_off & 63); | 167 | #undef DO_FMLA_IDX |
238 | } while (reg_off <= reg_last); | ||
239 | } | ||
240 | -#endif | ||
241 | } | ||
242 | +#endif | ||
243 | |||
244 | -static void sve_cont_ldst_mte_check(SVEContLdSt *info, CPUARMState *env, | ||
245 | - uint64_t *vg, target_ulong addr, int esize, | ||
246 | - int msize, uint32_t mtedesc, uintptr_t ra) | ||
247 | +void sve_cont_ldst_mte_check(SVEContLdSt *info, CPUARMState *env, | ||
248 | + uint64_t *vg, target_ulong addr, int esize, | ||
249 | + int msize, uint32_t mtedesc, uintptr_t ra) | ||
250 | { | ||
251 | intptr_t mem_off, reg_off, reg_last; | ||
252 | 168 | ||
253 | -- | 169 | -- |
254 | 2.25.1 | 170 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics |
---|---|---|---|
2 | in FMLS (vector), by implementing a new set of helpers for | ||
3 | the AH=1 case. | ||
2 | 4 | ||
3 | Move the ptw load functions, plus 3 common subroutines: | 5 | The float_muladd_negate_product flag produces the same result |
4 | S1_ptw_translate, ptw_attrs_are_device, and regime_translation_big_endian. | 6 | as negating either of the multiplication operands, assuming |
5 | This also allows get_phys_addr_lpae to become static again. | 7 | neither of the operands are NaNs. But since FEAT_AFP does not |
8 | negate NaNs, this behaviour is exactly what we need. | ||
6 | 9 | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20220604040607.269301-17-richard.henderson@linaro.org | ||
9 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | --- | 12 | --- |
12 | target/arm/ptw.h | 13 ---- | 13 | target/arm/helper.h | 4 ++++ |
13 | target/arm/helper.c | 141 -------------------------------------- | 14 | target/arm/tcg/translate-a64.c | 7 ++++++- |
14 | target/arm/ptw.c | 160 ++++++++++++++++++++++++++++++++++++++++++-- | 15 | target/arm/tcg/vec_helper.c | 22 ++++++++++++++++++++++ |
15 | 3 files changed, 154 insertions(+), 160 deletions(-) | 16 | 3 files changed, 32 insertions(+), 1 deletion(-) |
16 | 17 | ||
17 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | 18 | diff --git a/target/arm/helper.h b/target/arm/helper.h |
18 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/target/arm/ptw.h | 20 | --- a/target/arm/helper.h |
20 | +++ b/target/arm/ptw.h | 21 | +++ b/target/arm/helper.h |
21 | @@ -XXX,XX +XXX,XX @@ | 22 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) |
22 | 23 | DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | |
23 | extern const uint8_t pamax_map[7]; | 24 | DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) |
24 | 25 | ||
25 | -uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure, | 26 | +DEF_HELPER_FLAGS_5(gvec_ah_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) |
26 | - ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi); | 27 | +DEF_HELPER_FLAGS_5(gvec_ah_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) |
27 | -uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure, | 28 | +DEF_HELPER_FLAGS_5(gvec_ah_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) |
28 | - ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi); | 29 | + |
29 | - | 30 | DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG, |
30 | bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx); | 31 | void, ptr, ptr, ptr, fpst, i32) |
31 | bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx); | 32 | DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG, |
32 | uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn); | 33 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
33 | @@ -XXX,XX +XXX,XX @@ int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0); | ||
34 | int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, | ||
35 | int ap, int ns, int xn, int pxn); | ||
36 | |||
37 | -bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, | ||
38 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
39 | - bool s1_is_el0, | ||
40 | - hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot, | ||
41 | - target_ulong *page_size_ptr, | ||
42 | - ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) | ||
43 | - __attribute__((nonnull)); | ||
44 | - | ||
45 | #endif /* !CONFIG_USER_ONLY */ | ||
46 | #endif /* TARGET_ARM_PTW_H */ | ||
47 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
48 | index XXXXXXX..XXXXXXX 100644 | 34 | index XXXXXXX..XXXXXXX 100644 |
49 | --- a/target/arm/helper.c | 35 | --- a/target/arm/tcg/translate-a64.c |
50 | +++ b/target/arm/helper.c | 36 | +++ b/target/arm/tcg/translate-a64.c |
51 | @@ -XXX,XX +XXX,XX @@ bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx) | 37 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { |
52 | return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0; | 38 | gen_helper_gvec_vfms_s, |
39 | gen_helper_gvec_vfms_d, | ||
40 | }; | ||
41 | -TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls) | ||
42 | +static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = { | ||
43 | + gen_helper_gvec_ah_vfms_h, | ||
44 | + gen_helper_gvec_ah_vfms_s, | ||
45 | + gen_helper_gvec_ah_vfms_d, | ||
46 | +}; | ||
47 | +TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah) | ||
48 | |||
49 | static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { | ||
50 | gen_helper_gvec_fceq_h, | ||
51 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/target/arm/tcg/vec_helper.c | ||
54 | +++ b/target/arm/tcg/vec_helper.c | ||
55 | @@ -XXX,XX +XXX,XX @@ static float64 float64_mulsub_f(float64 dest, float64 op1, float64 op2, | ||
56 | return float64_muladd(float64_chs(op1), op2, dest, 0, stat); | ||
53 | } | 57 | } |
54 | 58 | ||
55 | -static inline bool regime_translation_big_endian(CPUARMState *env, | 59 | +static float16 float16_ah_mulsub_f(float16 dest, float16 op1, float16 op2, |
56 | - ARMMMUIdx mmu_idx) | 60 | + float_status *stat) |
57 | -{ | ||
58 | - return (regime_sctlr(env, mmu_idx) & SCTLR_EE) != 0; | ||
59 | -} | ||
60 | - | ||
61 | /* Return the TTBR associated with this translation regime */ | ||
62 | uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn) | ||
63 | { | ||
64 | @@ -XXX,XX +XXX,XX @@ int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, | ||
65 | return prot_rw | PAGE_EXEC; | ||
66 | } | ||
67 | |||
68 | -static bool ptw_attrs_are_device(CPUARMState *env, ARMCacheAttrs cacheattrs) | ||
69 | -{ | ||
70 | - /* | ||
71 | - * For an S1 page table walk, the stage 1 attributes are always | ||
72 | - * some form of "this is Normal memory". The combined S1+S2 | ||
73 | - * attributes are therefore only Device if stage 2 specifies Device. | ||
74 | - * With HCR_EL2.FWB == 0 this is when descriptor bits [5:4] are 0b00, | ||
75 | - * ie when cacheattrs.attrs bits [3:2] are 0b00. | ||
76 | - * With HCR_EL2.FWB == 1 this is when descriptor bit [4] is 0, ie | ||
77 | - * when cacheattrs.attrs bit [2] is 0. | ||
78 | - */ | ||
79 | - assert(cacheattrs.is_s2_format); | ||
80 | - if (arm_hcr_el2_eff(env) & HCR_FWB) { | ||
81 | - return (cacheattrs.attrs & 0x4) == 0; | ||
82 | - } else { | ||
83 | - return (cacheattrs.attrs & 0xc) == 0; | ||
84 | - } | ||
85 | -} | ||
86 | - | ||
87 | -/* Translate a S1 pagetable walk through S2 if needed. */ | ||
88 | -static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx, | ||
89 | - hwaddr addr, bool *is_secure, | ||
90 | - ARMMMUFaultInfo *fi) | ||
91 | -{ | ||
92 | - if (arm_mmu_idx_is_stage1_of_2(mmu_idx) && | ||
93 | - !regime_translation_disabled(env, ARMMMUIdx_Stage2)) { | ||
94 | - target_ulong s2size; | ||
95 | - hwaddr s2pa; | ||
96 | - int s2prot; | ||
97 | - int ret; | ||
98 | - ARMMMUIdx s2_mmu_idx = *is_secure ? ARMMMUIdx_Stage2_S | ||
99 | - : ARMMMUIdx_Stage2; | ||
100 | - ARMCacheAttrs cacheattrs = {}; | ||
101 | - MemTxAttrs txattrs = {}; | ||
102 | - | ||
103 | - ret = get_phys_addr_lpae(env, addr, MMU_DATA_LOAD, s2_mmu_idx, false, | ||
104 | - &s2pa, &txattrs, &s2prot, &s2size, fi, | ||
105 | - &cacheattrs); | ||
106 | - if (ret) { | ||
107 | - assert(fi->type != ARMFault_None); | ||
108 | - fi->s2addr = addr; | ||
109 | - fi->stage2 = true; | ||
110 | - fi->s1ptw = true; | ||
111 | - fi->s1ns = !*is_secure; | ||
112 | - return ~0; | ||
113 | - } | ||
114 | - if ((arm_hcr_el2_eff(env) & HCR_PTW) && | ||
115 | - ptw_attrs_are_device(env, cacheattrs)) { | ||
116 | - /* | ||
117 | - * PTW set and S1 walk touched S2 Device memory: | ||
118 | - * generate Permission fault. | ||
119 | - */ | ||
120 | - fi->type = ARMFault_Permission; | ||
121 | - fi->s2addr = addr; | ||
122 | - fi->stage2 = true; | ||
123 | - fi->s1ptw = true; | ||
124 | - fi->s1ns = !*is_secure; | ||
125 | - return ~0; | ||
126 | - } | ||
127 | - | ||
128 | - if (arm_is_secure_below_el3(env)) { | ||
129 | - /* Check if page table walk is to secure or non-secure PA space. */ | ||
130 | - if (*is_secure) { | ||
131 | - *is_secure = !(env->cp15.vstcr_el2.raw_tcr & VSTCR_SW); | ||
132 | - } else { | ||
133 | - *is_secure = !(env->cp15.vtcr_el2.raw_tcr & VTCR_NSW); | ||
134 | - } | ||
135 | - } else { | ||
136 | - assert(!*is_secure); | ||
137 | - } | ||
138 | - | ||
139 | - addr = s2pa; | ||
140 | - } | ||
141 | - return addr; | ||
142 | -} | ||
143 | - | ||
144 | -/* All loads done in the course of a page table walk go through here. */ | ||
145 | -uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure, | ||
146 | - ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi) | ||
147 | -{ | ||
148 | - ARMCPU *cpu = ARM_CPU(cs); | ||
149 | - CPUARMState *env = &cpu->env; | ||
150 | - MemTxAttrs attrs = {}; | ||
151 | - MemTxResult result = MEMTX_OK; | ||
152 | - AddressSpace *as; | ||
153 | - uint32_t data; | ||
154 | - | ||
155 | - addr = S1_ptw_translate(env, mmu_idx, addr, &is_secure, fi); | ||
156 | - attrs.secure = is_secure; | ||
157 | - as = arm_addressspace(cs, attrs); | ||
158 | - if (fi->s1ptw) { | ||
159 | - return 0; | ||
160 | - } | ||
161 | - if (regime_translation_big_endian(env, mmu_idx)) { | ||
162 | - data = address_space_ldl_be(as, addr, attrs, &result); | ||
163 | - } else { | ||
164 | - data = address_space_ldl_le(as, addr, attrs, &result); | ||
165 | - } | ||
166 | - if (result == MEMTX_OK) { | ||
167 | - return data; | ||
168 | - } | ||
169 | - fi->type = ARMFault_SyncExternalOnWalk; | ||
170 | - fi->ea = arm_extabort_type(result); | ||
171 | - return 0; | ||
172 | -} | ||
173 | - | ||
174 | -uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure, | ||
175 | - ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi) | ||
176 | -{ | ||
177 | - ARMCPU *cpu = ARM_CPU(cs); | ||
178 | - CPUARMState *env = &cpu->env; | ||
179 | - MemTxAttrs attrs = {}; | ||
180 | - MemTxResult result = MEMTX_OK; | ||
181 | - AddressSpace *as; | ||
182 | - uint64_t data; | ||
183 | - | ||
184 | - addr = S1_ptw_translate(env, mmu_idx, addr, &is_secure, fi); | ||
185 | - attrs.secure = is_secure; | ||
186 | - as = arm_addressspace(cs, attrs); | ||
187 | - if (fi->s1ptw) { | ||
188 | - return 0; | ||
189 | - } | ||
190 | - if (regime_translation_big_endian(env, mmu_idx)) { | ||
191 | - data = address_space_ldq_be(as, addr, attrs, &result); | ||
192 | - } else { | ||
193 | - data = address_space_ldq_le(as, addr, attrs, &result); | ||
194 | - } | ||
195 | - if (result == MEMTX_OK) { | ||
196 | - return data; | ||
197 | - } | ||
198 | - fi->type = ARMFault_SyncExternalOnWalk; | ||
199 | - fi->ea = arm_extabort_type(result); | ||
200 | - return 0; | ||
201 | -} | ||
202 | - | ||
203 | /* | ||
204 | * check_s2_mmu_setup | ||
205 | * @cpu: ARMCPU | ||
206 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
207 | index XXXXXXX..XXXXXXX 100644 | ||
208 | --- a/target/arm/ptw.c | ||
209 | +++ b/target/arm/ptw.c | ||
210 | @@ -XXX,XX +XXX,XX @@ | ||
211 | #include "ptw.h" | ||
212 | |||
213 | |||
214 | +static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, | ||
215 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
216 | + bool s1_is_el0, hwaddr *phys_ptr, | ||
217 | + MemTxAttrs *txattrs, int *prot, | ||
218 | + target_ulong *page_size_ptr, | ||
219 | + ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) | ||
220 | + __attribute__((nonnull)); | ||
221 | + | ||
222 | +static bool regime_translation_big_endian(CPUARMState *env, ARMMMUIdx mmu_idx) | ||
223 | +{ | 61 | +{ |
224 | + return (regime_sctlr(env, mmu_idx) & SCTLR_EE) != 0; | 62 | + return float16_muladd(op1, op2, dest, float_muladd_negate_product, stat); |
225 | +} | 63 | +} |
226 | + | 64 | + |
227 | +static bool ptw_attrs_are_device(CPUARMState *env, ARMCacheAttrs cacheattrs) | 65 | +static float32 float32_ah_mulsub_f(float32 dest, float32 op1, float32 op2, |
66 | + float_status *stat) | ||
228 | +{ | 67 | +{ |
229 | + /* | 68 | + return float32_muladd(op1, op2, dest, float_muladd_negate_product, stat); |
230 | + * For an S1 page table walk, the stage 1 attributes are always | ||
231 | + * some form of "this is Normal memory". The combined S1+S2 | ||
232 | + * attributes are therefore only Device if stage 2 specifies Device. | ||
233 | + * With HCR_EL2.FWB == 0 this is when descriptor bits [5:4] are 0b00, | ||
234 | + * ie when cacheattrs.attrs bits [3:2] are 0b00. | ||
235 | + * With HCR_EL2.FWB == 1 this is when descriptor bit [4] is 0, ie | ||
236 | + * when cacheattrs.attrs bit [2] is 0. | ||
237 | + */ | ||
238 | + assert(cacheattrs.is_s2_format); | ||
239 | + if (arm_hcr_el2_eff(env) & HCR_FWB) { | ||
240 | + return (cacheattrs.attrs & 0x4) == 0; | ||
241 | + } else { | ||
242 | + return (cacheattrs.attrs & 0xc) == 0; | ||
243 | + } | ||
244 | +} | 69 | +} |
245 | + | 70 | + |
246 | +/* Translate a S1 pagetable walk through S2 if needed. */ | 71 | +static float64 float64_ah_mulsub_f(float64 dest, float64 op1, float64 op2, |
247 | +static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx, | 72 | + float_status *stat) |
248 | + hwaddr addr, bool *is_secure, | ||
249 | + ARMMMUFaultInfo *fi) | ||
250 | +{ | 73 | +{ |
251 | + if (arm_mmu_idx_is_stage1_of_2(mmu_idx) && | 74 | + return float64_muladd(op1, op2, dest, float_muladd_negate_product, stat); |
252 | + !regime_translation_disabled(env, ARMMMUIdx_Stage2)) { | ||
253 | + target_ulong s2size; | ||
254 | + hwaddr s2pa; | ||
255 | + int s2prot; | ||
256 | + int ret; | ||
257 | + ARMMMUIdx s2_mmu_idx = *is_secure ? ARMMMUIdx_Stage2_S | ||
258 | + : ARMMMUIdx_Stage2; | ||
259 | + ARMCacheAttrs cacheattrs = {}; | ||
260 | + MemTxAttrs txattrs = {}; | ||
261 | + | ||
262 | + ret = get_phys_addr_lpae(env, addr, MMU_DATA_LOAD, s2_mmu_idx, false, | ||
263 | + &s2pa, &txattrs, &s2prot, &s2size, fi, | ||
264 | + &cacheattrs); | ||
265 | + if (ret) { | ||
266 | + assert(fi->type != ARMFault_None); | ||
267 | + fi->s2addr = addr; | ||
268 | + fi->stage2 = true; | ||
269 | + fi->s1ptw = true; | ||
270 | + fi->s1ns = !*is_secure; | ||
271 | + return ~0; | ||
272 | + } | ||
273 | + if ((arm_hcr_el2_eff(env) & HCR_PTW) && | ||
274 | + ptw_attrs_are_device(env, cacheattrs)) { | ||
275 | + /* | ||
276 | + * PTW set and S1 walk touched S2 Device memory: | ||
277 | + * generate Permission fault. | ||
278 | + */ | ||
279 | + fi->type = ARMFault_Permission; | ||
280 | + fi->s2addr = addr; | ||
281 | + fi->stage2 = true; | ||
282 | + fi->s1ptw = true; | ||
283 | + fi->s1ns = !*is_secure; | ||
284 | + return ~0; | ||
285 | + } | ||
286 | + | ||
287 | + if (arm_is_secure_below_el3(env)) { | ||
288 | + /* Check if page table walk is to secure or non-secure PA space. */ | ||
289 | + if (*is_secure) { | ||
290 | + *is_secure = !(env->cp15.vstcr_el2.raw_tcr & VSTCR_SW); | ||
291 | + } else { | ||
292 | + *is_secure = !(env->cp15.vtcr_el2.raw_tcr & VTCR_NSW); | ||
293 | + } | ||
294 | + } else { | ||
295 | + assert(!*is_secure); | ||
296 | + } | ||
297 | + | ||
298 | + addr = s2pa; | ||
299 | + } | ||
300 | + return addr; | ||
301 | +} | 75 | +} |
302 | + | 76 | + |
303 | +/* All loads done in the course of a page table walk go through here. */ | 77 | #define DO_MULADD(NAME, FUNC, TYPE) \ |
304 | +static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure, | 78 | void HELPER(NAME)(void *vd, void *vn, void *vm, \ |
305 | + ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi) | 79 | float_status *stat, uint32_t desc) \ |
306 | +{ | 80 | @@ -XXX,XX +XXX,XX @@ DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16) |
307 | + ARMCPU *cpu = ARM_CPU(cs); | 81 | DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32) |
308 | + CPUARMState *env = &cpu->env; | 82 | DO_MULADD(gvec_vfms_d, float64_mulsub_f, float64) |
309 | + MemTxAttrs attrs = {}; | 83 | |
310 | + MemTxResult result = MEMTX_OK; | 84 | +DO_MULADD(gvec_ah_vfms_h, float16_ah_mulsub_f, float16) |
311 | + AddressSpace *as; | 85 | +DO_MULADD(gvec_ah_vfms_s, float32_ah_mulsub_f, float32) |
312 | + uint32_t data; | 86 | +DO_MULADD(gvec_ah_vfms_d, float64_ah_mulsub_f, float64) |
313 | + | 87 | + |
314 | + addr = S1_ptw_translate(env, mmu_idx, addr, &is_secure, fi); | 88 | /* For the indexed ops, SVE applies the index per 128-bit vector segment. |
315 | + attrs.secure = is_secure; | 89 | * For AdvSIMD, there is of course only one such vector segment. |
316 | + as = arm_addressspace(cs, attrs); | ||
317 | + if (fi->s1ptw) { | ||
318 | + return 0; | ||
319 | + } | ||
320 | + if (regime_translation_big_endian(env, mmu_idx)) { | ||
321 | + data = address_space_ldl_be(as, addr, attrs, &result); | ||
322 | + } else { | ||
323 | + data = address_space_ldl_le(as, addr, attrs, &result); | ||
324 | + } | ||
325 | + if (result == MEMTX_OK) { | ||
326 | + return data; | ||
327 | + } | ||
328 | + fi->type = ARMFault_SyncExternalOnWalk; | ||
329 | + fi->ea = arm_extabort_type(result); | ||
330 | + return 0; | ||
331 | +} | ||
332 | + | ||
333 | +static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure, | ||
334 | + ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi) | ||
335 | +{ | ||
336 | + ARMCPU *cpu = ARM_CPU(cs); | ||
337 | + CPUARMState *env = &cpu->env; | ||
338 | + MemTxAttrs attrs = {}; | ||
339 | + MemTxResult result = MEMTX_OK; | ||
340 | + AddressSpace *as; | ||
341 | + uint64_t data; | ||
342 | + | ||
343 | + addr = S1_ptw_translate(env, mmu_idx, addr, &is_secure, fi); | ||
344 | + attrs.secure = is_secure; | ||
345 | + as = arm_addressspace(cs, attrs); | ||
346 | + if (fi->s1ptw) { | ||
347 | + return 0; | ||
348 | + } | ||
349 | + if (regime_translation_big_endian(env, mmu_idx)) { | ||
350 | + data = address_space_ldq_be(as, addr, attrs, &result); | ||
351 | + } else { | ||
352 | + data = address_space_ldq_le(as, addr, attrs, &result); | ||
353 | + } | ||
354 | + if (result == MEMTX_OK) { | ||
355 | + return data; | ||
356 | + } | ||
357 | + fi->type = ARMFault_SyncExternalOnWalk; | ||
358 | + fi->ea = arm_extabort_type(result); | ||
359 | + return 0; | ||
360 | +} | ||
361 | + | ||
362 | static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx, | ||
363 | uint32_t *table, uint32_t address) | ||
364 | { | ||
365 | @@ -XXX,XX +XXX,XX @@ do_fault: | ||
366 | * @fi: set to fault info if the translation fails | ||
367 | * @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes | ||
368 | */ | 90 | */ |
369 | -bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, | ||
370 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
371 | - bool s1_is_el0, | ||
372 | - hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot, | ||
373 | - target_ulong *page_size_ptr, | ||
374 | - ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) | ||
375 | +static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, | ||
376 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
377 | + bool s1_is_el0, hwaddr *phys_ptr, | ||
378 | + MemTxAttrs *txattrs, int *prot, | ||
379 | + target_ulong *page_size_ptr, | ||
380 | + ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) | ||
381 | { | ||
382 | ARMCPU *cpu = env_archcpu(env); | ||
383 | CPUState *cs = CPU(cpu); | ||
384 | -- | 91 | -- |
385 | 2.25.1 | 92 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics fro the |
---|---|---|---|
2 | SVE FMLS (vector) insns, by providing new helpers for the AH=1 case | ||
3 | which end up passing fpcr_ah = true to the do_fmla_zpzzz_* functions | ||
4 | that do the work. | ||
2 | 5 | ||
3 | The ARM pseudocode function CheckNormalSVEEnabled uses this | 6 | The float*_muladd functions have a flags argument that can |
4 | predicate now, and I think it's a bit clearer. | 7 | perform optional negation of various operand. We don't use |
8 | that for "normal" arm fmla, because the muladd flags are not | ||
9 | applied when an input is a NaN. But since FEAT_AFP does not | ||
10 | negate NaNs, this behaviour is exactly what we need. | ||
5 | 11 | ||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 12 | The non-AH helpers pass in a zero flags argument and control the |
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 13 | negation via the neg1 and neg3 arguments; the AH helpers always pass |
8 | Message-id: 20220607203306.657998-8-richard.henderson@linaro.org | 14 | in neg1 and neg3 as zero and control the negation via the flags |
15 | argument. This allows us to avoid conditional branches within the | ||
16 | inner loop. | ||
17 | |||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 18 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
19 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | --- | 20 | --- |
11 | target/arm/helper.c | 5 ++--- | 21 | target/arm/tcg/helper-sve.h | 21 ++++++++ |
12 | 1 file changed, 2 insertions(+), 3 deletions(-) | 22 | target/arm/tcg/sve_helper.c | 99 +++++++++++++++++++++++++++------- |
23 | target/arm/tcg/translate-sve.c | 18 ++++--- | ||
24 | 3 files changed, 114 insertions(+), 24 deletions(-) | ||
13 | 25 | ||
14 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 26 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h |
15 | index XXXXXXX..XXXXXXX 100644 | 27 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/arm/helper.c | 28 | --- a/target/arm/tcg/helper-sve.h |
17 | +++ b/target/arm/helper.c | 29 | +++ b/target/arm/tcg/helper-sve.h |
18 | @@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo minimal_ras_reginfo[] = { | 30 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, |
19 | int sve_exception_el(CPUARMState *env, int el) | 31 | DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, |
20 | { | 32 | void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
21 | #ifndef CONFIG_USER_ONLY | 33 | |
22 | - uint64_t hcr_el2 = arm_hcr_el2_eff(env); | 34 | +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG, |
23 | - | 35 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
24 | - if (el <= 1 && (hcr_el2 & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) { | 36 | +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG, |
25 | + if (el <= 1 && !el_is_in_host(env, el)) { | 37 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
26 | switch (FIELD_EX64(env->cp15.cpacr_el1, CPACR_EL1, ZEN)) { | 38 | +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG, |
27 | case 1: | 39 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
28 | if (el != 0) { | 40 | + |
29 | @@ -XXX,XX +XXX,XX @@ int sve_exception_el(CPUARMState *env, int el) | 41 | +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG, |
30 | * CPTR_EL2 changes format with HCR_EL2.E2H (regardless of TGE). | 42 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
31 | */ | 43 | +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG, |
32 | if (el <= 2) { | 44 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
33 | + uint64_t hcr_el2 = arm_hcr_el2_eff(env); | 45 | +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG, |
34 | if (hcr_el2 & HCR_E2H) { | 46 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
35 | switch (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, ZEN)) { | 47 | + |
36 | case 1: | 48 | +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG, |
49 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
50 | +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG, | ||
51 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
52 | +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_d, TCG_CALL_NO_RWG, | ||
53 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
54 | + | ||
55 | DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG, | ||
56 | void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
57 | DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG, | ||
58 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
59 | index XXXXXXX..XXXXXXX 100644 | ||
60 | --- a/target/arm/tcg/sve_helper.c | ||
61 | +++ b/target/arm/tcg/sve_helper.c | ||
62 | @@ -XXX,XX +XXX,XX @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int) | ||
63 | |||
64 | static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, | ||
65 | float_status *status, uint32_t desc, | ||
66 | - uint16_t neg1, uint16_t neg3) | ||
67 | + uint16_t neg1, uint16_t neg3, int flags) | ||
68 | { | ||
69 | intptr_t i = simd_oprsz(desc); | ||
70 | uint64_t *g = vg; | ||
71 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, | ||
72 | e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1; | ||
73 | e2 = *(uint16_t *)(vm + H1_2(i)); | ||
74 | e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3; | ||
75 | - r = float16_muladd(e1, e2, e3, 0, status); | ||
76 | + r = float16_muladd(e1, e2, e3, flags, status); | ||
77 | *(uint16_t *)(vd + H1_2(i)) = r; | ||
78 | } | ||
79 | } while (i & 63); | ||
80 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, | ||
81 | void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
82 | void *vg, float_status *status, uint32_t desc) | ||
83 | { | ||
84 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0); | ||
85 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, 0); | ||
86 | } | ||
87 | |||
88 | void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
89 | void *vg, float_status *status, uint32_t desc) | ||
90 | { | ||
91 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0); | ||
92 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0); | ||
93 | } | ||
94 | |||
95 | void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
96 | void *vg, float_status *status, uint32_t desc) | ||
97 | { | ||
98 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000); | ||
99 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0); | ||
100 | } | ||
101 | |||
102 | void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
103 | void *vg, float_status *status, uint32_t desc) | ||
104 | { | ||
105 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000); | ||
106 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0); | ||
107 | +} | ||
108 | + | ||
109 | +void HELPER(sve_ah_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
110 | + void *vg, float_status *status, uint32_t desc) | ||
111 | +{ | ||
112 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
113 | + float_muladd_negate_product); | ||
114 | +} | ||
115 | + | ||
116 | +void HELPER(sve_ah_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
117 | + void *vg, float_status *status, uint32_t desc) | ||
118 | +{ | ||
119 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
120 | + float_muladd_negate_product | float_muladd_negate_c); | ||
121 | +} | ||
122 | + | ||
123 | +void HELPER(sve_ah_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
124 | + void *vg, float_status *status, uint32_t desc) | ||
125 | +{ | ||
126 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
127 | + float_muladd_negate_c); | ||
128 | } | ||
129 | |||
130 | static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, | ||
131 | float_status *status, uint32_t desc, | ||
132 | - uint32_t neg1, uint32_t neg3) | ||
133 | + uint32_t neg1, uint32_t neg3, int flags) | ||
134 | { | ||
135 | intptr_t i = simd_oprsz(desc); | ||
136 | uint64_t *g = vg; | ||
137 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, | ||
138 | e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1; | ||
139 | e2 = *(uint32_t *)(vm + H1_4(i)); | ||
140 | e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3; | ||
141 | - r = float32_muladd(e1, e2, e3, 0, status); | ||
142 | + r = float32_muladd(e1, e2, e3, flags, status); | ||
143 | *(uint32_t *)(vd + H1_4(i)) = r; | ||
144 | } | ||
145 | } while (i & 63); | ||
146 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, | ||
147 | void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
148 | void *vg, float_status *status, uint32_t desc) | ||
149 | { | ||
150 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0); | ||
151 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, 0); | ||
152 | } | ||
153 | |||
154 | void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
155 | void *vg, float_status *status, uint32_t desc) | ||
156 | { | ||
157 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0); | ||
158 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, 0); | ||
159 | } | ||
160 | |||
161 | void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
162 | void *vg, float_status *status, uint32_t desc) | ||
163 | { | ||
164 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000); | ||
165 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, 0); | ||
166 | } | ||
167 | |||
168 | void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
169 | void *vg, float_status *status, uint32_t desc) | ||
170 | { | ||
171 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000); | ||
172 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, 0); | ||
173 | +} | ||
174 | + | ||
175 | +void HELPER(sve_ah_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
176 | + void *vg, float_status *status, uint32_t desc) | ||
177 | +{ | ||
178 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
179 | + float_muladd_negate_product); | ||
180 | +} | ||
181 | + | ||
182 | +void HELPER(sve_ah_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
183 | + void *vg, float_status *status, uint32_t desc) | ||
184 | +{ | ||
185 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
186 | + float_muladd_negate_product | float_muladd_negate_c); | ||
187 | +} | ||
188 | + | ||
189 | +void HELPER(sve_ah_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
190 | + void *vg, float_status *status, uint32_t desc) | ||
191 | +{ | ||
192 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
193 | + float_muladd_negate_c); | ||
194 | } | ||
195 | |||
196 | static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, | ||
197 | float_status *status, uint32_t desc, | ||
198 | - uint64_t neg1, uint64_t neg3) | ||
199 | + uint64_t neg1, uint64_t neg3, int flags) | ||
200 | { | ||
201 | intptr_t i = simd_oprsz(desc); | ||
202 | uint64_t *g = vg; | ||
203 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, | ||
204 | e1 = *(uint64_t *)(vn + i) ^ neg1; | ||
205 | e2 = *(uint64_t *)(vm + i); | ||
206 | e3 = *(uint64_t *)(va + i) ^ neg3; | ||
207 | - r = float64_muladd(e1, e2, e3, 0, status); | ||
208 | + r = float64_muladd(e1, e2, e3, flags, status); | ||
209 | *(uint64_t *)(vd + i) = r; | ||
210 | } | ||
211 | } while (i & 63); | ||
212 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, | ||
213 | void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
214 | void *vg, float_status *status, uint32_t desc) | ||
215 | { | ||
216 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0); | ||
217 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, 0); | ||
218 | } | ||
219 | |||
220 | void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
221 | void *vg, float_status *status, uint32_t desc) | ||
222 | { | ||
223 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0); | ||
224 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, 0); | ||
225 | } | ||
226 | |||
227 | void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
228 | void *vg, float_status *status, uint32_t desc) | ||
229 | { | ||
230 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN); | ||
231 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, 0); | ||
232 | } | ||
233 | |||
234 | void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
235 | void *vg, float_status *status, uint32_t desc) | ||
236 | { | ||
237 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN); | ||
238 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, 0); | ||
239 | +} | ||
240 | + | ||
241 | +void HELPER(sve_ah_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
242 | + void *vg, float_status *status, uint32_t desc) | ||
243 | +{ | ||
244 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
245 | + float_muladd_negate_product); | ||
246 | +} | ||
247 | + | ||
248 | +void HELPER(sve_ah_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
249 | + void *vg, float_status *status, uint32_t desc) | ||
250 | +{ | ||
251 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
252 | + float_muladd_negate_product | float_muladd_negate_c); | ||
253 | +} | ||
254 | + | ||
255 | +void HELPER(sve_ah_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
256 | + void *vg, float_status *status, uint32_t desc) | ||
257 | +{ | ||
258 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
259 | + float_muladd_negate_c); | ||
260 | } | ||
261 | |||
262 | /* Two operand floating-point comparison controlled by a predicate. | ||
263 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
264 | index XXXXXXX..XXXXXXX 100644 | ||
265 | --- a/target/arm/tcg/translate-sve.c | ||
266 | +++ b/target/arm/tcg/translate-sve.c | ||
267 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], | ||
268 | a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), | ||
269 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
270 | |||
271 | -#define DO_FMLA(NAME, name) \ | ||
272 | +#define DO_FMLA(NAME, name, ah_name) \ | ||
273 | static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ | ||
274 | NULL, gen_helper_sve_##name##_h, \ | ||
275 | gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ | ||
276 | }; \ | ||
277 | - TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \ | ||
278 | + static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \ | ||
279 | + NULL, gen_helper_sve_##ah_name##_h, \ | ||
280 | + gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \ | ||
281 | + }; \ | ||
282 | + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \ | ||
283 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \ | ||
284 | a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ | ||
285 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
286 | |||
287 | -DO_FMLA(FMLA_zpzzz, fmla_zpzzz) | ||
288 | -DO_FMLA(FMLS_zpzzz, fmls_zpzzz) | ||
289 | -DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz) | ||
290 | -DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) | ||
291 | +/* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */ | ||
292 | +DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz) | ||
293 | +DO_FMLA(FMLS_zpzzz, fmls_zpzzz, ah_fmls_zpzzz) | ||
294 | +DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz, ah_fnmla_zpzzz) | ||
295 | +DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz, ah_fnmls_zpzzz) | ||
296 | |||
297 | #undef DO_FMLA | ||
298 | |||
37 | -- | 299 | -- |
38 | 2.25.1 | 300 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | The negation step in the SVE FTSSEL insn mustn't negate a NaN when |
---|---|---|---|
2 | FPCR.AH is set. Pass FPCR.AH to the helper via the SIMD data field | ||
3 | and use that to determine whether to do the negation. | ||
2 | 4 | ||
3 | Use the function instead of the array directly. | 5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/sve_helper.c | 18 +++++++++++++++--- | ||
9 | target/arm/tcg/translate-sve.c | 4 ++-- | ||
10 | 2 files changed, 17 insertions(+), 5 deletions(-) | ||
4 | 11 | ||
5 | Because the function performs its own masking, via the uint8_t | 12 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c |
6 | parameter, we need to do nothing extra within the users: the bits | ||
7 | above the first 2 (_uh) or 4 (_uw) will be discarded by assignment | ||
8 | to the local bmask variables, and of course _uq uses the entire | ||
9 | uint64_t result. | ||
10 | |||
11 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | Message-id: 20220607203306.657998-17-richard.henderson@linaro.org | ||
14 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
15 | --- | ||
16 | target/arm/mve_helper.c | 6 +++--- | ||
17 | 1 file changed, 3 insertions(+), 3 deletions(-) | ||
18 | |||
19 | diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/target/arm/mve_helper.c | 14 | --- a/target/arm/tcg/sve_helper.c |
22 | +++ b/target/arm/mve_helper.c | 15 | +++ b/target/arm/tcg/sve_helper.c |
23 | @@ -XXX,XX +XXX,XX @@ static void mergemask_sb(int8_t *d, int8_t r, uint16_t mask) | 16 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fexpa_d)(void *vd, void *vn, uint32_t desc) |
24 | 17 | void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc) | |
25 | static void mergemask_uh(uint16_t *d, uint16_t r, uint16_t mask) | ||
26 | { | 18 | { |
27 | - uint16_t bmask = expand_pred_b_data[mask & 3]; | 19 | intptr_t i, opr_sz = simd_oprsz(desc) / 2; |
28 | + uint16_t bmask = expand_pred_b(mask); | 20 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1); |
29 | *d = (*d & ~bmask) | (r & bmask); | 21 | uint16_t *d = vd, *n = vn, *m = vm; |
22 | for (i = 0; i < opr_sz; i += 1) { | ||
23 | uint16_t nn = n[i]; | ||
24 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc) | ||
25 | if (mm & 1) { | ||
26 | nn = float16_one; | ||
27 | } | ||
28 | - d[i] = nn ^ (mm & 2) << 14; | ||
29 | + if (mm & 2) { | ||
30 | + nn = float16_maybe_ah_chs(nn, fpcr_ah); | ||
31 | + } | ||
32 | + d[i] = nn; | ||
33 | } | ||
30 | } | 34 | } |
31 | 35 | ||
32 | @@ -XXX,XX +XXX,XX @@ static void mergemask_sh(int16_t *d, int16_t r, uint16_t mask) | 36 | void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc) |
33 | |||
34 | static void mergemask_uw(uint32_t *d, uint32_t r, uint16_t mask) | ||
35 | { | 37 | { |
36 | - uint32_t bmask = expand_pred_b_data[mask & 0xf]; | 38 | intptr_t i, opr_sz = simd_oprsz(desc) / 4; |
37 | + uint32_t bmask = expand_pred_b(mask); | 39 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1); |
38 | *d = (*d & ~bmask) | (r & bmask); | 40 | uint32_t *d = vd, *n = vn, *m = vm; |
41 | for (i = 0; i < opr_sz; i += 1) { | ||
42 | uint32_t nn = n[i]; | ||
43 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc) | ||
44 | if (mm & 1) { | ||
45 | nn = float32_one; | ||
46 | } | ||
47 | - d[i] = nn ^ (mm & 2) << 30; | ||
48 | + if (mm & 2) { | ||
49 | + nn = float32_maybe_ah_chs(nn, fpcr_ah); | ||
50 | + } | ||
51 | + d[i] = nn; | ||
52 | } | ||
39 | } | 53 | } |
40 | 54 | ||
41 | @@ -XXX,XX +XXX,XX @@ static void mergemask_sw(int32_t *d, int32_t r, uint16_t mask) | 55 | void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc) |
42 | |||
43 | static void mergemask_uq(uint64_t *d, uint64_t r, uint16_t mask) | ||
44 | { | 56 | { |
45 | - uint64_t bmask = expand_pred_b_data[mask & 0xff]; | 57 | intptr_t i, opr_sz = simd_oprsz(desc) / 8; |
46 | + uint64_t bmask = expand_pred_b(mask); | 58 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1); |
47 | *d = (*d & ~bmask) | (r & bmask); | 59 | uint64_t *d = vd, *n = vn, *m = vm; |
60 | for (i = 0; i < opr_sz; i += 1) { | ||
61 | uint64_t nn = n[i]; | ||
62 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc) | ||
63 | if (mm & 1) { | ||
64 | nn = float64_one; | ||
65 | } | ||
66 | - d[i] = nn ^ (mm & 2) << 62; | ||
67 | + if (mm & 2) { | ||
68 | + nn = float64_maybe_ah_chs(nn, fpcr_ah); | ||
69 | + } | ||
70 | + d[i] = nn; | ||
71 | } | ||
48 | } | 72 | } |
49 | 73 | ||
74 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/target/arm/tcg/translate-sve.c | ||
77 | +++ b/target/arm/tcg/translate-sve.c | ||
78 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2 * const fexpa_fns[4] = { | ||
79 | gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, | ||
80 | }; | ||
81 | TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, | ||
82 | - fexpa_fns[a->esz], a->rd, a->rn, 0) | ||
83 | + fexpa_fns[a->esz], a->rd, a->rn, s->fpcr_ah) | ||
84 | |||
85 | static gen_helper_gvec_3 * const ftssel_fns[4] = { | ||
86 | NULL, gen_helper_sve_ftssel_h, | ||
87 | gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, | ||
88 | }; | ||
89 | TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, | ||
90 | - ftssel_fns[a->esz], a, 0) | ||
91 | + ftssel_fns[a->esz], a, s->fpcr_ah) | ||
92 | |||
93 | /* | ||
94 | *** SVE Predicate Logical Operations Group | ||
50 | -- | 95 | -- |
51 | 2.25.1 | 96 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | The negation step in the SVE FTMAD insn mustn't negate a NaN when |
---|---|---|---|
2 | FPCR.AH is set. Pass FPCR.AH to the helper via the SIMD data field, | ||
3 | so we can select the correct behaviour. | ||
2 | 4 | ||
3 | Begin moving all of the page table walking functions | 5 | Because the operand is known to be negative, negating the operand |
4 | out of helper.c, starting with get_phys_addr(). | 6 | is the same as taking the absolute value. Defer this to the muladd |
7 | operation via flags, so that it happens after NaN detection, which | ||
8 | is correct for FPCR.AH. | ||
5 | 9 | ||
6 | Create a temporary header file, "ptw.h", in which to | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
7 | share declarations between the two C files while we | 11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
8 | are moving functions. | 12 | --- |
13 | target/arm/tcg/sve_helper.c | 42 ++++++++++++++++++++++++++-------- | ||
14 | target/arm/tcg/translate-sve.c | 3 ++- | ||
15 | 2 files changed, 35 insertions(+), 10 deletions(-) | ||
9 | 16 | ||
10 | Move a few declarations to "internals.h", which will | 17 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c |
11 | remain used by multiple C files. | ||
12 | |||
13 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | Message-id: 20220604040607.269301-3-richard.henderson@linaro.org | ||
15 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
16 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
17 | --- | ||
18 | target/arm/internals.h | 18 ++- | ||
19 | target/arm/ptw.h | 51 ++++++ | ||
20 | target/arm/helper.c | 344 +++++------------------------------------ | ||
21 | target/arm/ptw.c | 267 ++++++++++++++++++++++++++++++++ | ||
22 | target/arm/meson.build | 1 + | ||
23 | 5 files changed, 372 insertions(+), 309 deletions(-) | ||
24 | create mode 100644 target/arm/ptw.h | ||
25 | create mode 100644 target/arm/ptw.c | ||
26 | |||
27 | diff --git a/target/arm/internals.h b/target/arm/internals.h | ||
28 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/target/arm/internals.h | 19 | --- a/target/arm/tcg/sve_helper.c |
30 | +++ b/target/arm/internals.h | 20 | +++ b/target/arm/tcg/sve_helper.c |
31 | @@ -XXX,XX +XXX,XX @@ ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env, | 21 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm, |
32 | /* Return the MMU index for a v7M CPU in the specified security state */ | 22 | 0x3c00, 0xb800, 0x293a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
33 | ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate); | 23 | }; |
34 | 24 | intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float16); | |
35 | -/* Return true if the stage 1 translation regime is using LPAE format page | 25 | - intptr_t x = simd_data(desc); |
36 | - * tables */ | 26 | + intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); |
37 | +/* Return true if the translation regime is using LPAE format page tables */ | 27 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); |
38 | +bool regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx); | 28 | float16 *d = vd, *n = vn, *m = vm; |
39 | + | 29 | + |
40 | +/* | 30 | for (i = 0; i < opr_sz; i++) { |
41 | + * Return true if the stage 1 translation regime is using LPAE | 31 | float16 mm = m[i]; |
42 | + * format page tables | 32 | intptr_t xx = x; |
43 | + */ | 33 | + int flags = 0; |
44 | bool arm_s1_regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx); | 34 | + |
45 | 35 | if (float16_is_neg(mm)) { | |
46 | /* Raise a data fault alignment exception for the specified virtual address */ | 36 | - mm = float16_abs(mm); |
47 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) | 37 | + if (fpcr_ah) { |
38 | + flags = float_muladd_negate_product; | ||
39 | + } else { | ||
40 | + mm = float16_abs(mm); | ||
41 | + } | ||
42 | xx += 8; | ||
43 | } | ||
44 | - d[i] = float16_muladd(n[i], mm, coeff[xx], 0, s); | ||
45 | + d[i] = float16_muladd(n[i], mm, coeff[xx], flags, s); | ||
48 | } | 46 | } |
49 | } | 47 | } |
50 | 48 | ||
51 | +/* Return the SCTLR value which controls this address translation regime */ | 49 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, |
52 | +static inline uint64_t regime_sctlr(CPUARMState *env, ARMMMUIdx mmu_idx) | 50 | 0x37cd37cc, 0x00000000, 0x00000000, 0x00000000, |
53 | +{ | 51 | }; |
54 | + return env->cp15.sctlr_el[regime_el(env, mmu_idx)]; | 52 | intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float32); |
55 | +} | 53 | - intptr_t x = simd_data(desc); |
54 | + intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); | ||
55 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); | ||
56 | float32 *d = vd, *n = vn, *m = vm; | ||
56 | + | 57 | + |
57 | /* Return the TCR controlling this translation regime */ | 58 | for (i = 0; i < opr_sz; i++) { |
58 | static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx) | 59 | float32 mm = m[i]; |
59 | { | 60 | intptr_t xx = x; |
60 | @@ -XXX,XX +XXX,XX @@ typedef struct ARMVAParameters { | 61 | + int flags = 0; |
61 | ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, | ||
62 | ARMMMUIdx mmu_idx, bool data); | ||
63 | |||
64 | +int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx); | ||
65 | +int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx); | ||
66 | + | 62 | + |
67 | static inline int exception_target_el(CPUARMState *env) | 63 | if (float32_is_neg(mm)) { |
68 | { | 64 | - mm = float32_abs(mm); |
69 | int target_el = MAX(1, arm_current_el(env)); | 65 | + if (fpcr_ah) { |
70 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | 66 | + flags = float_muladd_negate_product; |
71 | new file mode 100644 | 67 | + } else { |
72 | index XXXXXXX..XXXXXXX | 68 | + mm = float32_abs(mm); |
73 | --- /dev/null | 69 | + } |
74 | +++ b/target/arm/ptw.h | 70 | xx += 8; |
75 | @@ -XXX,XX +XXX,XX @@ | 71 | } |
76 | +/* | 72 | - d[i] = float32_muladd(n[i], mm, coeff[xx], 0, s); |
77 | + * ARM page table walking. | 73 | + d[i] = float32_muladd(n[i], mm, coeff[xx], flags, s); |
78 | + * | ||
79 | + * This code is licensed under the GNU GPL v2 or later. | ||
80 | + * | ||
81 | + * SPDX-License-Identifier: GPL-2.0-or-later | ||
82 | + */ | ||
83 | + | ||
84 | +#ifndef TARGET_ARM_PTW_H | ||
85 | +#define TARGET_ARM_PTW_H | ||
86 | + | ||
87 | +#ifndef CONFIG_USER_ONLY | ||
88 | + | ||
89 | +bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx); | ||
90 | +bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx); | ||
91 | +ARMCacheAttrs combine_cacheattrs(CPUARMState *env, | ||
92 | + ARMCacheAttrs s1, ARMCacheAttrs s2); | ||
93 | + | ||
94 | +bool get_phys_addr_v5(CPUARMState *env, uint32_t address, | ||
95 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
96 | + hwaddr *phys_ptr, int *prot, | ||
97 | + target_ulong *page_size, | ||
98 | + ARMMMUFaultInfo *fi); | ||
99 | +bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address, | ||
100 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
101 | + hwaddr *phys_ptr, int *prot, | ||
102 | + ARMMMUFaultInfo *fi); | ||
103 | +bool get_phys_addr_v6(CPUARMState *env, uint32_t address, | ||
104 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
105 | + hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot, | ||
106 | + target_ulong *page_size, ARMMMUFaultInfo *fi); | ||
107 | +bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, | ||
108 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
109 | + hwaddr *phys_ptr, int *prot, | ||
110 | + target_ulong *page_size, | ||
111 | + ARMMMUFaultInfo *fi); | ||
112 | +bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address, | ||
113 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
114 | + hwaddr *phys_ptr, MemTxAttrs *txattrs, | ||
115 | + int *prot, target_ulong *page_size, | ||
116 | + ARMMMUFaultInfo *fi); | ||
117 | +bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, | ||
118 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
119 | + bool s1_is_el0, | ||
120 | + hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot, | ||
121 | + target_ulong *page_size_ptr, | ||
122 | + ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) | ||
123 | + __attribute__((nonnull)); | ||
124 | + | ||
125 | +#endif /* !CONFIG_USER_ONLY */ | ||
126 | +#endif /* TARGET_ARM_PTW_H */ | ||
127 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
128 | index XXXXXXX..XXXXXXX 100644 | ||
129 | --- a/target/arm/helper.c | ||
130 | +++ b/target/arm/helper.c | ||
131 | @@ -XXX,XX +XXX,XX @@ | ||
132 | #include "semihosting/common-semi.h" | ||
133 | #endif | ||
134 | #include "cpregs.h" | ||
135 | +#include "ptw.h" | ||
136 | |||
137 | #define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */ | ||
138 | |||
139 | -#ifndef CONFIG_USER_ONLY | ||
140 | - | ||
141 | -static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, | ||
142 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
143 | - bool s1_is_el0, | ||
144 | - hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot, | ||
145 | - target_ulong *page_size_ptr, | ||
146 | - ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) | ||
147 | - __attribute__((nonnull)); | ||
148 | -#endif | ||
149 | - | ||
150 | static void switch_mode(CPUARMState *env, int mode); | ||
151 | -static int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx); | ||
152 | |||
153 | static uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri) | ||
154 | { | ||
155 | @@ -XXX,XX +XXX,XX @@ uint64_t arm_sctlr(CPUARMState *env, int el) | ||
156 | return env->cp15.sctlr_el[el]; | ||
157 | } | ||
158 | |||
159 | -/* Return the SCTLR value which controls this address translation regime */ | ||
160 | -static inline uint64_t regime_sctlr(CPUARMState *env, ARMMMUIdx mmu_idx) | ||
161 | -{ | ||
162 | - return env->cp15.sctlr_el[regime_el(env, mmu_idx)]; | ||
163 | -} | ||
164 | - | ||
165 | #ifndef CONFIG_USER_ONLY | ||
166 | |||
167 | /* Return true if the specified stage of address translation is disabled */ | ||
168 | -static inline bool regime_translation_disabled(CPUARMState *env, | ||
169 | - ARMMMUIdx mmu_idx) | ||
170 | +bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx) | ||
171 | { | ||
172 | uint64_t hcr_el2; | ||
173 | |||
174 | @@ -XXX,XX +XXX,XX @@ ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx) | ||
175 | #endif /* !CONFIG_USER_ONLY */ | ||
176 | |||
177 | /* Return true if the translation regime is using LPAE format page tables */ | ||
178 | -static inline bool regime_using_lpae_format(CPUARMState *env, | ||
179 | - ARMMMUIdx mmu_idx) | ||
180 | +bool regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx) | ||
181 | { | ||
182 | int el = regime_el(env, mmu_idx); | ||
183 | if (el == 2 || arm_el_is_aa64(env, el)) { | ||
184 | @@ -XXX,XX +XXX,XX @@ bool arm_s1_regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx) | ||
185 | } | ||
186 | |||
187 | #ifndef CONFIG_USER_ONLY | ||
188 | -static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) | ||
189 | +bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) | ||
190 | { | ||
191 | switch (mmu_idx) { | ||
192 | case ARMMMUIdx_SE10_0: | ||
193 | @@ -XXX,XX +XXX,XX @@ static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure, | ||
194 | return 0; | ||
195 | } | ||
196 | |||
197 | -static bool get_phys_addr_v5(CPUARMState *env, uint32_t address, | ||
198 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
199 | - hwaddr *phys_ptr, int *prot, | ||
200 | - target_ulong *page_size, | ||
201 | - ARMMMUFaultInfo *fi) | ||
202 | +bool get_phys_addr_v5(CPUARMState *env, uint32_t address, | ||
203 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
204 | + hwaddr *phys_ptr, int *prot, | ||
205 | + target_ulong *page_size, | ||
206 | + ARMMMUFaultInfo *fi) | ||
207 | { | ||
208 | CPUState *cs = env_cpu(env); | ||
209 | int level = 1; | ||
210 | @@ -XXX,XX +XXX,XX @@ do_fault: | ||
211 | return true; | ||
212 | } | ||
213 | |||
214 | -static bool get_phys_addr_v6(CPUARMState *env, uint32_t address, | ||
215 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
216 | - hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot, | ||
217 | - target_ulong *page_size, ARMMMUFaultInfo *fi) | ||
218 | +bool get_phys_addr_v6(CPUARMState *env, uint32_t address, | ||
219 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
220 | + hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot, | ||
221 | + target_ulong *page_size, ARMMMUFaultInfo *fi) | ||
222 | { | ||
223 | CPUState *cs = env_cpu(env); | ||
224 | ARMCPU *cpu = env_archcpu(env); | ||
225 | @@ -XXX,XX +XXX,XX @@ unsigned int arm_pamax(ARMCPU *cpu) | ||
226 | return pamax_map[parange]; | ||
227 | } | ||
228 | |||
229 | -static int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx) | ||
230 | +int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx) | ||
231 | { | ||
232 | if (regime_has_2_ranges(mmu_idx)) { | ||
233 | return extract64(tcr, 37, 2); | ||
234 | @@ -XXX,XX +XXX,XX @@ static int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx) | ||
235 | } | 74 | } |
236 | } | 75 | } |
237 | 76 | ||
238 | -static int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx) | 77 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, |
239 | +int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx) | 78 | 0x3e21ee96d2641b13ull, 0xbda8f76380fbb401ull, |
240 | { | 79 | }; |
241 | if (regime_has_2_ranges(mmu_idx)) { | 80 | intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float64); |
242 | return extract64(tcr, 51, 2); | 81 | - intptr_t x = simd_data(desc); |
243 | @@ -XXX,XX +XXX,XX @@ static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va, | 82 | + intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); |
244 | * @fi: set to fault info if the translation fails | 83 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); |
245 | * @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes | 84 | float64 *d = vd, *n = vn, *m = vm; |
246 | */ | 85 | + |
247 | -static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, | 86 | for (i = 0; i < opr_sz; i++) { |
248 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | 87 | float64 mm = m[i]; |
249 | - bool s1_is_el0, | 88 | intptr_t xx = x; |
250 | - hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot, | 89 | + int flags = 0; |
251 | - target_ulong *page_size_ptr, | 90 | + |
252 | - ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) | 91 | if (float64_is_neg(mm)) { |
253 | +bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, | 92 | - mm = float64_abs(mm); |
254 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | 93 | + if (fpcr_ah) { |
255 | + bool s1_is_el0, | 94 | + flags = float_muladd_negate_product; |
256 | + hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot, | 95 | + } else { |
257 | + target_ulong *page_size_ptr, | 96 | + mm = float64_abs(mm); |
258 | + ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) | 97 | + } |
259 | { | 98 | xx += 8; |
260 | ARMCPU *cpu = env_archcpu(env); | 99 | } |
261 | CPUState *cs = CPU(cpu); | 100 | - d[i] = float64_muladd(n[i], mm, coeff[xx], 0, s); |
262 | @@ -XXX,XX +XXX,XX @@ static inline bool m_is_system_region(CPUARMState *env, uint32_t address) | 101 | + d[i] = float64_muladd(n[i], mm, coeff[xx], flags, s); |
263 | return arm_feature(env, ARM_FEATURE_M) && extract32(address, 29, 3) == 0x7; | 102 | } |
264 | } | 103 | } |
265 | 104 | ||
266 | -static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, | 105 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c |
267 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
268 | - hwaddr *phys_ptr, int *prot, | ||
269 | - target_ulong *page_size, | ||
270 | - ARMMMUFaultInfo *fi) | ||
271 | +bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, | ||
272 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
273 | + hwaddr *phys_ptr, int *prot, | ||
274 | + target_ulong *page_size, | ||
275 | + ARMMMUFaultInfo *fi) | ||
276 | { | ||
277 | ARMCPU *cpu = env_archcpu(env); | ||
278 | int n; | ||
279 | @@ -XXX,XX +XXX,XX @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, | ||
280 | } | ||
281 | |||
282 | |||
283 | -static bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address, | ||
284 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
285 | - hwaddr *phys_ptr, MemTxAttrs *txattrs, | ||
286 | - int *prot, target_ulong *page_size, | ||
287 | - ARMMMUFaultInfo *fi) | ||
288 | +bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address, | ||
289 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
290 | + hwaddr *phys_ptr, MemTxAttrs *txattrs, | ||
291 | + int *prot, target_ulong *page_size, | ||
292 | + ARMMMUFaultInfo *fi) | ||
293 | { | ||
294 | uint32_t secure = regime_is_secure(env, mmu_idx); | ||
295 | V8M_SAttributes sattrs = {}; | ||
296 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address, | ||
297 | return ret; | ||
298 | } | ||
299 | |||
300 | -static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address, | ||
301 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
302 | - hwaddr *phys_ptr, int *prot, | ||
303 | - ARMMMUFaultInfo *fi) | ||
304 | +bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address, | ||
305 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
306 | + hwaddr *phys_ptr, int *prot, | ||
307 | + ARMMMUFaultInfo *fi) | ||
308 | { | ||
309 | int n; | ||
310 | uint32_t mask; | ||
311 | @@ -XXX,XX +XXX,XX @@ static uint8_t combined_attrs_fwb(CPUARMState *env, | ||
312 | * @s1: Attributes from stage 1 walk | ||
313 | * @s2: Attributes from stage 2 walk | ||
314 | */ | ||
315 | -static ARMCacheAttrs combine_cacheattrs(CPUARMState *env, | ||
316 | - ARMCacheAttrs s1, ARMCacheAttrs s2) | ||
317 | +ARMCacheAttrs combine_cacheattrs(CPUARMState *env, | ||
318 | + ARMCacheAttrs s1, ARMCacheAttrs s2) | ||
319 | { | ||
320 | ARMCacheAttrs ret; | ||
321 | bool tagged = false; | ||
322 | @@ -XXX,XX +XXX,XX @@ static ARMCacheAttrs combine_cacheattrs(CPUARMState *env, | ||
323 | return ret; | ||
324 | } | ||
325 | |||
326 | - | ||
327 | -/* get_phys_addr - get the physical address for this virtual address | ||
328 | - * | ||
329 | - * Find the physical address corresponding to the given virtual address, | ||
330 | - * by doing a translation table walk on MMU based systems or using the | ||
331 | - * MPU state on MPU based systems. | ||
332 | - * | ||
333 | - * Returns false if the translation was successful. Otherwise, phys_ptr, attrs, | ||
334 | - * prot and page_size may not be filled in, and the populated fsr value provides | ||
335 | - * information on why the translation aborted, in the format of a | ||
336 | - * DFSR/IFSR fault register, with the following caveats: | ||
337 | - * * we honour the short vs long DFSR format differences. | ||
338 | - * * the WnR bit is never set (the caller must do this). | ||
339 | - * * for PSMAv5 based systems we don't bother to return a full FSR format | ||
340 | - * value. | ||
341 | - * | ||
342 | - * @env: CPUARMState | ||
343 | - * @address: virtual address to get physical address for | ||
344 | - * @access_type: 0 for read, 1 for write, 2 for execute | ||
345 | - * @mmu_idx: MMU index indicating required translation regime | ||
346 | - * @phys_ptr: set to the physical address corresponding to the virtual address | ||
347 | - * @attrs: set to the memory transaction attributes to use | ||
348 | - * @prot: set to the permissions for the page containing phys_ptr | ||
349 | - * @page_size: set to the size of the page containing phys_ptr | ||
350 | - * @fi: set to fault info if the translation fails | ||
351 | - * @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes | ||
352 | - */ | ||
353 | -bool get_phys_addr(CPUARMState *env, target_ulong address, | ||
354 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
355 | - hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot, | ||
356 | - target_ulong *page_size, | ||
357 | - ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) | ||
358 | -{ | ||
359 | - ARMMMUIdx s1_mmu_idx = stage_1_mmu_idx(mmu_idx); | ||
360 | - | ||
361 | - if (mmu_idx != s1_mmu_idx) { | ||
362 | - /* Call ourselves recursively to do the stage 1 and then stage 2 | ||
363 | - * translations if mmu_idx is a two-stage regime. | ||
364 | - */ | ||
365 | - if (arm_feature(env, ARM_FEATURE_EL2)) { | ||
366 | - hwaddr ipa; | ||
367 | - int s2_prot; | ||
368 | - int ret; | ||
369 | - bool ipa_secure; | ||
370 | - ARMCacheAttrs cacheattrs2 = {}; | ||
371 | - ARMMMUIdx s2_mmu_idx; | ||
372 | - bool is_el0; | ||
373 | - | ||
374 | - ret = get_phys_addr(env, address, access_type, s1_mmu_idx, &ipa, | ||
375 | - attrs, prot, page_size, fi, cacheattrs); | ||
376 | - | ||
377 | - /* If S1 fails or S2 is disabled, return early. */ | ||
378 | - if (ret || regime_translation_disabled(env, ARMMMUIdx_Stage2)) { | ||
379 | - *phys_ptr = ipa; | ||
380 | - return ret; | ||
381 | - } | ||
382 | - | ||
383 | - ipa_secure = attrs->secure; | ||
384 | - if (arm_is_secure_below_el3(env)) { | ||
385 | - if (ipa_secure) { | ||
386 | - attrs->secure = !(env->cp15.vstcr_el2.raw_tcr & VSTCR_SW); | ||
387 | - } else { | ||
388 | - attrs->secure = !(env->cp15.vtcr_el2.raw_tcr & VTCR_NSW); | ||
389 | - } | ||
390 | - } else { | ||
391 | - assert(!ipa_secure); | ||
392 | - } | ||
393 | - | ||
394 | - s2_mmu_idx = attrs->secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2; | ||
395 | - is_el0 = mmu_idx == ARMMMUIdx_E10_0 || mmu_idx == ARMMMUIdx_SE10_0; | ||
396 | - | ||
397 | - /* S1 is done. Now do S2 translation. */ | ||
398 | - ret = get_phys_addr_lpae(env, ipa, access_type, s2_mmu_idx, is_el0, | ||
399 | - phys_ptr, attrs, &s2_prot, | ||
400 | - page_size, fi, &cacheattrs2); | ||
401 | - fi->s2addr = ipa; | ||
402 | - /* Combine the S1 and S2 perms. */ | ||
403 | - *prot &= s2_prot; | ||
404 | - | ||
405 | - /* If S2 fails, return early. */ | ||
406 | - if (ret) { | ||
407 | - return ret; | ||
408 | - } | ||
409 | - | ||
410 | - /* Combine the S1 and S2 cache attributes. */ | ||
411 | - if (arm_hcr_el2_eff(env) & HCR_DC) { | ||
412 | - /* | ||
413 | - * HCR.DC forces the first stage attributes to | ||
414 | - * Normal Non-Shareable, | ||
415 | - * Inner Write-Back Read-Allocate Write-Allocate, | ||
416 | - * Outer Write-Back Read-Allocate Write-Allocate. | ||
417 | - * Do not overwrite Tagged within attrs. | ||
418 | - */ | ||
419 | - if (cacheattrs->attrs != 0xf0) { | ||
420 | - cacheattrs->attrs = 0xff; | ||
421 | - } | ||
422 | - cacheattrs->shareability = 0; | ||
423 | - } | ||
424 | - *cacheattrs = combine_cacheattrs(env, *cacheattrs, cacheattrs2); | ||
425 | - | ||
426 | - /* Check if IPA translates to secure or non-secure PA space. */ | ||
427 | - if (arm_is_secure_below_el3(env)) { | ||
428 | - if (ipa_secure) { | ||
429 | - attrs->secure = | ||
430 | - !(env->cp15.vstcr_el2.raw_tcr & (VSTCR_SA | VSTCR_SW)); | ||
431 | - } else { | ||
432 | - attrs->secure = | ||
433 | - !((env->cp15.vtcr_el2.raw_tcr & (VTCR_NSA | VTCR_NSW)) | ||
434 | - || (env->cp15.vstcr_el2.raw_tcr & (VSTCR_SA | VSTCR_SW))); | ||
435 | - } | ||
436 | - } | ||
437 | - return 0; | ||
438 | - } else { | ||
439 | - /* | ||
440 | - * For non-EL2 CPUs a stage1+stage2 translation is just stage 1. | ||
441 | - */ | ||
442 | - mmu_idx = stage_1_mmu_idx(mmu_idx); | ||
443 | - } | ||
444 | - } | ||
445 | - | ||
446 | - /* The page table entries may downgrade secure to non-secure, but | ||
447 | - * cannot upgrade an non-secure translation regime's attributes | ||
448 | - * to secure. | ||
449 | - */ | ||
450 | - attrs->secure = regime_is_secure(env, mmu_idx); | ||
451 | - attrs->user = regime_is_user(env, mmu_idx); | ||
452 | - | ||
453 | - /* Fast Context Switch Extension. This doesn't exist at all in v8. | ||
454 | - * In v7 and earlier it affects all stage 1 translations. | ||
455 | - */ | ||
456 | - if (address < 0x02000000 && mmu_idx != ARMMMUIdx_Stage2 | ||
457 | - && !arm_feature(env, ARM_FEATURE_V8)) { | ||
458 | - if (regime_el(env, mmu_idx) == 3) { | ||
459 | - address += env->cp15.fcseidr_s; | ||
460 | - } else { | ||
461 | - address += env->cp15.fcseidr_ns; | ||
462 | - } | ||
463 | - } | ||
464 | - | ||
465 | - if (arm_feature(env, ARM_FEATURE_PMSA)) { | ||
466 | - bool ret; | ||
467 | - *page_size = TARGET_PAGE_SIZE; | ||
468 | - | ||
469 | - if (arm_feature(env, ARM_FEATURE_V8)) { | ||
470 | - /* PMSAv8 */ | ||
471 | - ret = get_phys_addr_pmsav8(env, address, access_type, mmu_idx, | ||
472 | - phys_ptr, attrs, prot, page_size, fi); | ||
473 | - } else if (arm_feature(env, ARM_FEATURE_V7)) { | ||
474 | - /* PMSAv7 */ | ||
475 | - ret = get_phys_addr_pmsav7(env, address, access_type, mmu_idx, | ||
476 | - phys_ptr, prot, page_size, fi); | ||
477 | - } else { | ||
478 | - /* Pre-v7 MPU */ | ||
479 | - ret = get_phys_addr_pmsav5(env, address, access_type, mmu_idx, | ||
480 | - phys_ptr, prot, fi); | ||
481 | - } | ||
482 | - qemu_log_mask(CPU_LOG_MMU, "PMSA MPU lookup for %s at 0x%08" PRIx32 | ||
483 | - " mmu_idx %u -> %s (prot %c%c%c)\n", | ||
484 | - access_type == MMU_DATA_LOAD ? "reading" : | ||
485 | - (access_type == MMU_DATA_STORE ? "writing" : "execute"), | ||
486 | - (uint32_t)address, mmu_idx, | ||
487 | - ret ? "Miss" : "Hit", | ||
488 | - *prot & PAGE_READ ? 'r' : '-', | ||
489 | - *prot & PAGE_WRITE ? 'w' : '-', | ||
490 | - *prot & PAGE_EXEC ? 'x' : '-'); | ||
491 | - | ||
492 | - return ret; | ||
493 | - } | ||
494 | - | ||
495 | - /* Definitely a real MMU, not an MPU */ | ||
496 | - | ||
497 | - if (regime_translation_disabled(env, mmu_idx)) { | ||
498 | - uint64_t hcr; | ||
499 | - uint8_t memattr; | ||
500 | - | ||
501 | - /* | ||
502 | - * MMU disabled. S1 addresses within aa64 translation regimes are | ||
503 | - * still checked for bounds -- see AArch64.TranslateAddressS1Off. | ||
504 | - */ | ||
505 | - if (mmu_idx != ARMMMUIdx_Stage2 && mmu_idx != ARMMMUIdx_Stage2_S) { | ||
506 | - int r_el = regime_el(env, mmu_idx); | ||
507 | - if (arm_el_is_aa64(env, r_el)) { | ||
508 | - int pamax = arm_pamax(env_archcpu(env)); | ||
509 | - uint64_t tcr = env->cp15.tcr_el[r_el].raw_tcr; | ||
510 | - int addrtop, tbi; | ||
511 | - | ||
512 | - tbi = aa64_va_parameter_tbi(tcr, mmu_idx); | ||
513 | - if (access_type == MMU_INST_FETCH) { | ||
514 | - tbi &= ~aa64_va_parameter_tbid(tcr, mmu_idx); | ||
515 | - } | ||
516 | - tbi = (tbi >> extract64(address, 55, 1)) & 1; | ||
517 | - addrtop = (tbi ? 55 : 63); | ||
518 | - | ||
519 | - if (extract64(address, pamax, addrtop - pamax + 1) != 0) { | ||
520 | - fi->type = ARMFault_AddressSize; | ||
521 | - fi->level = 0; | ||
522 | - fi->stage2 = false; | ||
523 | - return 1; | ||
524 | - } | ||
525 | - | ||
526 | - /* | ||
527 | - * When TBI is disabled, we've just validated that all of the | ||
528 | - * bits above PAMax are zero, so logically we only need to | ||
529 | - * clear the top byte for TBI. But it's clearer to follow | ||
530 | - * the pseudocode set of addrdesc.paddress. | ||
531 | - */ | ||
532 | - address = extract64(address, 0, 52); | ||
533 | - } | ||
534 | - } | ||
535 | - *phys_ptr = address; | ||
536 | - *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; | ||
537 | - *page_size = TARGET_PAGE_SIZE; | ||
538 | - | ||
539 | - /* Fill in cacheattr a-la AArch64.TranslateAddressS1Off. */ | ||
540 | - hcr = arm_hcr_el2_eff(env); | ||
541 | - cacheattrs->shareability = 0; | ||
542 | - cacheattrs->is_s2_format = false; | ||
543 | - if (hcr & HCR_DC) { | ||
544 | - if (hcr & HCR_DCT) { | ||
545 | - memattr = 0xf0; /* Tagged, Normal, WB, RWA */ | ||
546 | - } else { | ||
547 | - memattr = 0xff; /* Normal, WB, RWA */ | ||
548 | - } | ||
549 | - } else if (access_type == MMU_INST_FETCH) { | ||
550 | - if (regime_sctlr(env, mmu_idx) & SCTLR_I) { | ||
551 | - memattr = 0xee; /* Normal, WT, RA, NT */ | ||
552 | - } else { | ||
553 | - memattr = 0x44; /* Normal, NC, No */ | ||
554 | - } | ||
555 | - cacheattrs->shareability = 2; /* outer sharable */ | ||
556 | - } else { | ||
557 | - memattr = 0x00; /* Device, nGnRnE */ | ||
558 | - } | ||
559 | - cacheattrs->attrs = memattr; | ||
560 | - return 0; | ||
561 | - } | ||
562 | - | ||
563 | - if (regime_using_lpae_format(env, mmu_idx)) { | ||
564 | - return get_phys_addr_lpae(env, address, access_type, mmu_idx, false, | ||
565 | - phys_ptr, attrs, prot, page_size, | ||
566 | - fi, cacheattrs); | ||
567 | - } else if (regime_sctlr(env, mmu_idx) & SCTLR_XP) { | ||
568 | - return get_phys_addr_v6(env, address, access_type, mmu_idx, | ||
569 | - phys_ptr, attrs, prot, page_size, fi); | ||
570 | - } else { | ||
571 | - return get_phys_addr_v5(env, address, access_type, mmu_idx, | ||
572 | - phys_ptr, prot, page_size, fi); | ||
573 | - } | ||
574 | -} | ||
575 | - | ||
576 | hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, | ||
577 | MemTxAttrs *attrs) | ||
578 | { | ||
579 | @@ -XXX,XX +XXX,XX @@ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, | ||
580 | } | ||
581 | return phys_addr; | ||
582 | } | ||
583 | - | ||
584 | #endif | ||
585 | |||
586 | /* Note that signed overflow is undefined in C. The following routines are | ||
587 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
588 | new file mode 100644 | ||
589 | index XXXXXXX..XXXXXXX | ||
590 | --- /dev/null | ||
591 | +++ b/target/arm/ptw.c | ||
592 | @@ -XXX,XX +XXX,XX @@ | ||
593 | +/* | ||
594 | + * ARM page table walking. | ||
595 | + * | ||
596 | + * This code is licensed under the GNU GPL v2 or later. | ||
597 | + * | ||
598 | + * SPDX-License-Identifier: GPL-2.0-or-later | ||
599 | + */ | ||
600 | + | ||
601 | +#include "qemu/osdep.h" | ||
602 | +#include "qemu/log.h" | ||
603 | +#include "cpu.h" | ||
604 | +#include "internals.h" | ||
605 | +#include "ptw.h" | ||
606 | + | ||
607 | + | ||
608 | +/** | ||
609 | + * get_phys_addr - get the physical address for this virtual address | ||
610 | + * | ||
611 | + * Find the physical address corresponding to the given virtual address, | ||
612 | + * by doing a translation table walk on MMU based systems or using the | ||
613 | + * MPU state on MPU based systems. | ||
614 | + * | ||
615 | + * Returns false if the translation was successful. Otherwise, phys_ptr, attrs, | ||
616 | + * prot and page_size may not be filled in, and the populated fsr value provides | ||
617 | + * information on why the translation aborted, in the format of a | ||
618 | + * DFSR/IFSR fault register, with the following caveats: | ||
619 | + * * we honour the short vs long DFSR format differences. | ||
620 | + * * the WnR bit is never set (the caller must do this). | ||
621 | + * * for PSMAv5 based systems we don't bother to return a full FSR format | ||
622 | + * value. | ||
623 | + * | ||
624 | + * @env: CPUARMState | ||
625 | + * @address: virtual address to get physical address for | ||
626 | + * @access_type: 0 for read, 1 for write, 2 for execute | ||
627 | + * @mmu_idx: MMU index indicating required translation regime | ||
628 | + * @phys_ptr: set to the physical address corresponding to the virtual address | ||
629 | + * @attrs: set to the memory transaction attributes to use | ||
630 | + * @prot: set to the permissions for the page containing phys_ptr | ||
631 | + * @page_size: set to the size of the page containing phys_ptr | ||
632 | + * @fi: set to fault info if the translation fails | ||
633 | + * @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes | ||
634 | + */ | ||
635 | +bool get_phys_addr(CPUARMState *env, target_ulong address, | ||
636 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
637 | + hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot, | ||
638 | + target_ulong *page_size, | ||
639 | + ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) | ||
640 | +{ | ||
641 | + ARMMMUIdx s1_mmu_idx = stage_1_mmu_idx(mmu_idx); | ||
642 | + | ||
643 | + if (mmu_idx != s1_mmu_idx) { | ||
644 | + /* | ||
645 | + * Call ourselves recursively to do the stage 1 and then stage 2 | ||
646 | + * translations if mmu_idx is a two-stage regime. | ||
647 | + */ | ||
648 | + if (arm_feature(env, ARM_FEATURE_EL2)) { | ||
649 | + hwaddr ipa; | ||
650 | + int s2_prot; | ||
651 | + int ret; | ||
652 | + bool ipa_secure; | ||
653 | + ARMCacheAttrs cacheattrs2 = {}; | ||
654 | + ARMMMUIdx s2_mmu_idx; | ||
655 | + bool is_el0; | ||
656 | + | ||
657 | + ret = get_phys_addr(env, address, access_type, s1_mmu_idx, &ipa, | ||
658 | + attrs, prot, page_size, fi, cacheattrs); | ||
659 | + | ||
660 | + /* If S1 fails or S2 is disabled, return early. */ | ||
661 | + if (ret || regime_translation_disabled(env, ARMMMUIdx_Stage2)) { | ||
662 | + *phys_ptr = ipa; | ||
663 | + return ret; | ||
664 | + } | ||
665 | + | ||
666 | + ipa_secure = attrs->secure; | ||
667 | + if (arm_is_secure_below_el3(env)) { | ||
668 | + if (ipa_secure) { | ||
669 | + attrs->secure = !(env->cp15.vstcr_el2.raw_tcr & VSTCR_SW); | ||
670 | + } else { | ||
671 | + attrs->secure = !(env->cp15.vtcr_el2.raw_tcr & VTCR_NSW); | ||
672 | + } | ||
673 | + } else { | ||
674 | + assert(!ipa_secure); | ||
675 | + } | ||
676 | + | ||
677 | + s2_mmu_idx = attrs->secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2; | ||
678 | + is_el0 = mmu_idx == ARMMMUIdx_E10_0 || mmu_idx == ARMMMUIdx_SE10_0; | ||
679 | + | ||
680 | + /* S1 is done. Now do S2 translation. */ | ||
681 | + ret = get_phys_addr_lpae(env, ipa, access_type, s2_mmu_idx, is_el0, | ||
682 | + phys_ptr, attrs, &s2_prot, | ||
683 | + page_size, fi, &cacheattrs2); | ||
684 | + fi->s2addr = ipa; | ||
685 | + /* Combine the S1 and S2 perms. */ | ||
686 | + *prot &= s2_prot; | ||
687 | + | ||
688 | + /* If S2 fails, return early. */ | ||
689 | + if (ret) { | ||
690 | + return ret; | ||
691 | + } | ||
692 | + | ||
693 | + /* Combine the S1 and S2 cache attributes. */ | ||
694 | + if (arm_hcr_el2_eff(env) & HCR_DC) { | ||
695 | + /* | ||
696 | + * HCR.DC forces the first stage attributes to | ||
697 | + * Normal Non-Shareable, | ||
698 | + * Inner Write-Back Read-Allocate Write-Allocate, | ||
699 | + * Outer Write-Back Read-Allocate Write-Allocate. | ||
700 | + * Do not overwrite Tagged within attrs. | ||
701 | + */ | ||
702 | + if (cacheattrs->attrs != 0xf0) { | ||
703 | + cacheattrs->attrs = 0xff; | ||
704 | + } | ||
705 | + cacheattrs->shareability = 0; | ||
706 | + } | ||
707 | + *cacheattrs = combine_cacheattrs(env, *cacheattrs, cacheattrs2); | ||
708 | + | ||
709 | + /* Check if IPA translates to secure or non-secure PA space. */ | ||
710 | + if (arm_is_secure_below_el3(env)) { | ||
711 | + if (ipa_secure) { | ||
712 | + attrs->secure = | ||
713 | + !(env->cp15.vstcr_el2.raw_tcr & (VSTCR_SA | VSTCR_SW)); | ||
714 | + } else { | ||
715 | + attrs->secure = | ||
716 | + !((env->cp15.vtcr_el2.raw_tcr & (VTCR_NSA | VTCR_NSW)) | ||
717 | + || (env->cp15.vstcr_el2.raw_tcr & (VSTCR_SA | VSTCR_SW))); | ||
718 | + } | ||
719 | + } | ||
720 | + return 0; | ||
721 | + } else { | ||
722 | + /* | ||
723 | + * For non-EL2 CPUs a stage1+stage2 translation is just stage 1. | ||
724 | + */ | ||
725 | + mmu_idx = stage_1_mmu_idx(mmu_idx); | ||
726 | + } | ||
727 | + } | ||
728 | + | ||
729 | + /* | ||
730 | + * The page table entries may downgrade secure to non-secure, but | ||
731 | + * cannot upgrade an non-secure translation regime's attributes | ||
732 | + * to secure. | ||
733 | + */ | ||
734 | + attrs->secure = regime_is_secure(env, mmu_idx); | ||
735 | + attrs->user = regime_is_user(env, mmu_idx); | ||
736 | + | ||
737 | + /* | ||
738 | + * Fast Context Switch Extension. This doesn't exist at all in v8. | ||
739 | + * In v7 and earlier it affects all stage 1 translations. | ||
740 | + */ | ||
741 | + if (address < 0x02000000 && mmu_idx != ARMMMUIdx_Stage2 | ||
742 | + && !arm_feature(env, ARM_FEATURE_V8)) { | ||
743 | + if (regime_el(env, mmu_idx) == 3) { | ||
744 | + address += env->cp15.fcseidr_s; | ||
745 | + } else { | ||
746 | + address += env->cp15.fcseidr_ns; | ||
747 | + } | ||
748 | + } | ||
749 | + | ||
750 | + if (arm_feature(env, ARM_FEATURE_PMSA)) { | ||
751 | + bool ret; | ||
752 | + *page_size = TARGET_PAGE_SIZE; | ||
753 | + | ||
754 | + if (arm_feature(env, ARM_FEATURE_V8)) { | ||
755 | + /* PMSAv8 */ | ||
756 | + ret = get_phys_addr_pmsav8(env, address, access_type, mmu_idx, | ||
757 | + phys_ptr, attrs, prot, page_size, fi); | ||
758 | + } else if (arm_feature(env, ARM_FEATURE_V7)) { | ||
759 | + /* PMSAv7 */ | ||
760 | + ret = get_phys_addr_pmsav7(env, address, access_type, mmu_idx, | ||
761 | + phys_ptr, prot, page_size, fi); | ||
762 | + } else { | ||
763 | + /* Pre-v7 MPU */ | ||
764 | + ret = get_phys_addr_pmsav5(env, address, access_type, mmu_idx, | ||
765 | + phys_ptr, prot, fi); | ||
766 | + } | ||
767 | + qemu_log_mask(CPU_LOG_MMU, "PMSA MPU lookup for %s at 0x%08" PRIx32 | ||
768 | + " mmu_idx %u -> %s (prot %c%c%c)\n", | ||
769 | + access_type == MMU_DATA_LOAD ? "reading" : | ||
770 | + (access_type == MMU_DATA_STORE ? "writing" : "execute"), | ||
771 | + (uint32_t)address, mmu_idx, | ||
772 | + ret ? "Miss" : "Hit", | ||
773 | + *prot & PAGE_READ ? 'r' : '-', | ||
774 | + *prot & PAGE_WRITE ? 'w' : '-', | ||
775 | + *prot & PAGE_EXEC ? 'x' : '-'); | ||
776 | + | ||
777 | + return ret; | ||
778 | + } | ||
779 | + | ||
780 | + /* Definitely a real MMU, not an MPU */ | ||
781 | + | ||
782 | + if (regime_translation_disabled(env, mmu_idx)) { | ||
783 | + uint64_t hcr; | ||
784 | + uint8_t memattr; | ||
785 | + | ||
786 | + /* | ||
787 | + * MMU disabled. S1 addresses within aa64 translation regimes are | ||
788 | + * still checked for bounds -- see AArch64.TranslateAddressS1Off. | ||
789 | + */ | ||
790 | + if (mmu_idx != ARMMMUIdx_Stage2 && mmu_idx != ARMMMUIdx_Stage2_S) { | ||
791 | + int r_el = regime_el(env, mmu_idx); | ||
792 | + if (arm_el_is_aa64(env, r_el)) { | ||
793 | + int pamax = arm_pamax(env_archcpu(env)); | ||
794 | + uint64_t tcr = env->cp15.tcr_el[r_el].raw_tcr; | ||
795 | + int addrtop, tbi; | ||
796 | + | ||
797 | + tbi = aa64_va_parameter_tbi(tcr, mmu_idx); | ||
798 | + if (access_type == MMU_INST_FETCH) { | ||
799 | + tbi &= ~aa64_va_parameter_tbid(tcr, mmu_idx); | ||
800 | + } | ||
801 | + tbi = (tbi >> extract64(address, 55, 1)) & 1; | ||
802 | + addrtop = (tbi ? 55 : 63); | ||
803 | + | ||
804 | + if (extract64(address, pamax, addrtop - pamax + 1) != 0) { | ||
805 | + fi->type = ARMFault_AddressSize; | ||
806 | + fi->level = 0; | ||
807 | + fi->stage2 = false; | ||
808 | + return 1; | ||
809 | + } | ||
810 | + | ||
811 | + /* | ||
812 | + * When TBI is disabled, we've just validated that all of the | ||
813 | + * bits above PAMax are zero, so logically we only need to | ||
814 | + * clear the top byte for TBI. But it's clearer to follow | ||
815 | + * the pseudocode set of addrdesc.paddress. | ||
816 | + */ | ||
817 | + address = extract64(address, 0, 52); | ||
818 | + } | ||
819 | + } | ||
820 | + *phys_ptr = address; | ||
821 | + *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; | ||
822 | + *page_size = TARGET_PAGE_SIZE; | ||
823 | + | ||
824 | + /* Fill in cacheattr a-la AArch64.TranslateAddressS1Off. */ | ||
825 | + hcr = arm_hcr_el2_eff(env); | ||
826 | + cacheattrs->shareability = 0; | ||
827 | + cacheattrs->is_s2_format = false; | ||
828 | + if (hcr & HCR_DC) { | ||
829 | + if (hcr & HCR_DCT) { | ||
830 | + memattr = 0xf0; /* Tagged, Normal, WB, RWA */ | ||
831 | + } else { | ||
832 | + memattr = 0xff; /* Normal, WB, RWA */ | ||
833 | + } | ||
834 | + } else if (access_type == MMU_INST_FETCH) { | ||
835 | + if (regime_sctlr(env, mmu_idx) & SCTLR_I) { | ||
836 | + memattr = 0xee; /* Normal, WT, RA, NT */ | ||
837 | + } else { | ||
838 | + memattr = 0x44; /* Normal, NC, No */ | ||
839 | + } | ||
840 | + cacheattrs->shareability = 2; /* outer sharable */ | ||
841 | + } else { | ||
842 | + memattr = 0x00; /* Device, nGnRnE */ | ||
843 | + } | ||
844 | + cacheattrs->attrs = memattr; | ||
845 | + return 0; | ||
846 | + } | ||
847 | + | ||
848 | + if (regime_using_lpae_format(env, mmu_idx)) { | ||
849 | + return get_phys_addr_lpae(env, address, access_type, mmu_idx, false, | ||
850 | + phys_ptr, attrs, prot, page_size, | ||
851 | + fi, cacheattrs); | ||
852 | + } else if (regime_sctlr(env, mmu_idx) & SCTLR_XP) { | ||
853 | + return get_phys_addr_v6(env, address, access_type, mmu_idx, | ||
854 | + phys_ptr, attrs, prot, page_size, fi); | ||
855 | + } else { | ||
856 | + return get_phys_addr_v5(env, address, access_type, mmu_idx, | ||
857 | + phys_ptr, prot, page_size, fi); | ||
858 | + } | ||
859 | +} | ||
860 | diff --git a/target/arm/meson.build b/target/arm/meson.build | ||
861 | index XXXXXXX..XXXXXXX 100644 | 106 | index XXXXXXX..XXXXXXX 100644 |
862 | --- a/target/arm/meson.build | 107 | --- a/target/arm/tcg/translate-sve.c |
863 | +++ b/target/arm/meson.build | 108 | +++ b/target/arm/tcg/translate-sve.c |
864 | @@ -XXX,XX +XXX,XX @@ arm_softmmu_ss.add(files( | 109 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { |
865 | 'machine.c', | 110 | gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, |
866 | 'monitor.c', | 111 | }; |
867 | 'psci.c', | 112 | TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, |
868 | + 'ptw.c', | 113 | - ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm, |
869 | )) | 114 | + ftmad_fns[a->esz], a->rd, a->rn, a->rm, |
870 | 115 | + a->imm | (s->fpcr_ah << 3), | |
871 | subdir('hvf') | 116 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) |
117 | |||
118 | /* | ||
872 | -- | 119 | -- |
873 | 2.25.1 | 120 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | The negation step in FCMLA mustn't negate a NaN when FPCR.AH | ||
4 | is set. Handle this by passing FPCR.AH to the helper via the | ||
5 | SIMD data field, and use this to select whether to do the | ||
6 | negation via XOR or via the muladd negate_product flag. | ||
7 | |||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | Message-id: 20220604040607.269301-5-richard.henderson@linaro.org | 9 | Message-id: 20250129013857.135256-26-richard.henderson@linaro.org |
10 | [PMM: Expanded commit message] | ||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 11 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
7 | --- | 13 | --- |
8 | target/arm/ptw.h | 11 +-- | 14 | target/arm/tcg/translate-a64.c | 2 +- |
9 | target/arm/helper.c | 161 +------------------------------------------- | 15 | target/arm/tcg/vec_helper.c | 66 ++++++++++++++++++++-------------- |
10 | target/arm/ptw.c | 153 +++++++++++++++++++++++++++++++++++++++++ | 16 | 2 files changed, 40 insertions(+), 28 deletions(-) |
11 | 3 files changed, 161 insertions(+), 164 deletions(-) | ||
12 | 17 | ||
13 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | 18 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
14 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/arm/ptw.h | 20 | --- a/target/arm/tcg/translate-a64.c |
16 | +++ b/target/arm/ptw.h | 21 | +++ b/target/arm/tcg/translate-a64.c |
17 | @@ -XXX,XX +XXX,XX @@ bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx, | 22 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) |
18 | uint32_t *table, uint32_t address); | 23 | |
19 | int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, | 24 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, |
20 | int ap, int domain_prot); | 25 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, |
21 | +int simple_ap_to_rw_prot_is_user(int ap, bool is_user); | 26 | - a->rot, fn[a->esz]); |
22 | + | 27 | + a->rot | (s->fpcr_ah << 2), fn[a->esz]); |
23 | +static inline int | ||
24 | +simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) | ||
25 | +{ | ||
26 | + return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx)); | ||
27 | +} | ||
28 | |||
29 | bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address, | ||
30 | MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
31 | hwaddr *phys_ptr, int *prot, | ||
32 | ARMMMUFaultInfo *fi); | ||
33 | -bool get_phys_addr_v6(CPUARMState *env, uint32_t address, | ||
34 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
35 | - hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot, | ||
36 | - target_ulong *page_size, ARMMMUFaultInfo *fi); | ||
37 | bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, | ||
38 | MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
39 | hwaddr *phys_ptr, int *prot, | ||
40 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/target/arm/helper.c | ||
43 | +++ b/target/arm/helper.c | ||
44 | @@ -XXX,XX +XXX,XX @@ int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap, int domain_prot) | ||
45 | * @ap: The 2-bit simple AP (AP[2:1]) | ||
46 | * @is_user: TRUE if accessing from PL0 | ||
47 | */ | ||
48 | -static inline int simple_ap_to_rw_prot_is_user(int ap, bool is_user) | ||
49 | +int simple_ap_to_rw_prot_is_user(int ap, bool is_user) | ||
50 | { | ||
51 | switch (ap) { | ||
52 | case 0: | ||
53 | @@ -XXX,XX +XXX,XX @@ static inline int simple_ap_to_rw_prot_is_user(int ap, bool is_user) | ||
54 | } | ||
55 | } | ||
56 | |||
57 | -static inline int | ||
58 | -simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) | ||
59 | -{ | ||
60 | - return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx)); | ||
61 | -} | ||
62 | - | ||
63 | /* Translate S2 section/page access permissions to protection flags | ||
64 | * | ||
65 | * @env: CPUARMState | ||
66 | @@ -XXX,XX +XXX,XX @@ uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure, | ||
67 | return 0; | ||
68 | } | ||
69 | |||
70 | -bool get_phys_addr_v6(CPUARMState *env, uint32_t address, | ||
71 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
72 | - hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot, | ||
73 | - target_ulong *page_size, ARMMMUFaultInfo *fi) | ||
74 | -{ | ||
75 | - CPUState *cs = env_cpu(env); | ||
76 | - ARMCPU *cpu = env_archcpu(env); | ||
77 | - int level = 1; | ||
78 | - uint32_t table; | ||
79 | - uint32_t desc; | ||
80 | - uint32_t xn; | ||
81 | - uint32_t pxn = 0; | ||
82 | - int type; | ||
83 | - int ap; | ||
84 | - int domain = 0; | ||
85 | - int domain_prot; | ||
86 | - hwaddr phys_addr; | ||
87 | - uint32_t dacr; | ||
88 | - bool ns; | ||
89 | - | ||
90 | - /* Pagetable walk. */ | ||
91 | - /* Lookup l1 descriptor. */ | ||
92 | - if (!get_level1_table_address(env, mmu_idx, &table, address)) { | ||
93 | - /* Section translation fault if page walk is disabled by PD0 or PD1 */ | ||
94 | - fi->type = ARMFault_Translation; | ||
95 | - goto do_fault; | ||
96 | - } | ||
97 | - desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx), | ||
98 | - mmu_idx, fi); | ||
99 | - if (fi->type != ARMFault_None) { | ||
100 | - goto do_fault; | ||
101 | - } | ||
102 | - type = (desc & 3); | ||
103 | - if (type == 0 || (type == 3 && !cpu_isar_feature(aa32_pxn, cpu))) { | ||
104 | - /* Section translation fault, or attempt to use the encoding | ||
105 | - * which is Reserved on implementations without PXN. | ||
106 | - */ | ||
107 | - fi->type = ARMFault_Translation; | ||
108 | - goto do_fault; | ||
109 | - } | ||
110 | - if ((type == 1) || !(desc & (1 << 18))) { | ||
111 | - /* Page or Section. */ | ||
112 | - domain = (desc >> 5) & 0x0f; | ||
113 | - } | ||
114 | - if (regime_el(env, mmu_idx) == 1) { | ||
115 | - dacr = env->cp15.dacr_ns; | ||
116 | - } else { | ||
117 | - dacr = env->cp15.dacr_s; | ||
118 | - } | ||
119 | - if (type == 1) { | ||
120 | - level = 2; | ||
121 | - } | ||
122 | - domain_prot = (dacr >> (domain * 2)) & 3; | ||
123 | - if (domain_prot == 0 || domain_prot == 2) { | ||
124 | - /* Section or Page domain fault */ | ||
125 | - fi->type = ARMFault_Domain; | ||
126 | - goto do_fault; | ||
127 | - } | ||
128 | - if (type != 1) { | ||
129 | - if (desc & (1 << 18)) { | ||
130 | - /* Supersection. */ | ||
131 | - phys_addr = (desc & 0xff000000) | (address & 0x00ffffff); | ||
132 | - phys_addr |= (uint64_t)extract32(desc, 20, 4) << 32; | ||
133 | - phys_addr |= (uint64_t)extract32(desc, 5, 4) << 36; | ||
134 | - *page_size = 0x1000000; | ||
135 | - } else { | ||
136 | - /* Section. */ | ||
137 | - phys_addr = (desc & 0xfff00000) | (address & 0x000fffff); | ||
138 | - *page_size = 0x100000; | ||
139 | - } | ||
140 | - ap = ((desc >> 10) & 3) | ((desc >> 13) & 4); | ||
141 | - xn = desc & (1 << 4); | ||
142 | - pxn = desc & 1; | ||
143 | - ns = extract32(desc, 19, 1); | ||
144 | - } else { | ||
145 | - if (cpu_isar_feature(aa32_pxn, cpu)) { | ||
146 | - pxn = (desc >> 2) & 1; | ||
147 | - } | ||
148 | - ns = extract32(desc, 3, 1); | ||
149 | - /* Lookup l2 entry. */ | ||
150 | - table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc); | ||
151 | - desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx), | ||
152 | - mmu_idx, fi); | ||
153 | - if (fi->type != ARMFault_None) { | ||
154 | - goto do_fault; | ||
155 | - } | ||
156 | - ap = ((desc >> 4) & 3) | ((desc >> 7) & 4); | ||
157 | - switch (desc & 3) { | ||
158 | - case 0: /* Page translation fault. */ | ||
159 | - fi->type = ARMFault_Translation; | ||
160 | - goto do_fault; | ||
161 | - case 1: /* 64k page. */ | ||
162 | - phys_addr = (desc & 0xffff0000) | (address & 0xffff); | ||
163 | - xn = desc & (1 << 15); | ||
164 | - *page_size = 0x10000; | ||
165 | - break; | ||
166 | - case 2: case 3: /* 4k page. */ | ||
167 | - phys_addr = (desc & 0xfffff000) | (address & 0xfff); | ||
168 | - xn = desc & 1; | ||
169 | - *page_size = 0x1000; | ||
170 | - break; | ||
171 | - default: | ||
172 | - /* Never happens, but compiler isn't smart enough to tell. */ | ||
173 | - g_assert_not_reached(); | ||
174 | - } | ||
175 | - } | ||
176 | - if (domain_prot == 3) { | ||
177 | - *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; | ||
178 | - } else { | ||
179 | - if (pxn && !regime_is_user(env, mmu_idx)) { | ||
180 | - xn = 1; | ||
181 | - } | ||
182 | - if (xn && access_type == MMU_INST_FETCH) { | ||
183 | - fi->type = ARMFault_Permission; | ||
184 | - goto do_fault; | ||
185 | - } | ||
186 | - | ||
187 | - if (arm_feature(env, ARM_FEATURE_V6K) && | ||
188 | - (regime_sctlr(env, mmu_idx) & SCTLR_AFE)) { | ||
189 | - /* The simplified model uses AP[0] as an access control bit. */ | ||
190 | - if ((ap & 1) == 0) { | ||
191 | - /* Access flag fault. */ | ||
192 | - fi->type = ARMFault_AccessFlag; | ||
193 | - goto do_fault; | ||
194 | - } | ||
195 | - *prot = simple_ap_to_rw_prot(env, mmu_idx, ap >> 1); | ||
196 | - } else { | ||
197 | - *prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot); | ||
198 | - } | ||
199 | - if (*prot && !xn) { | ||
200 | - *prot |= PAGE_EXEC; | ||
201 | - } | ||
202 | - if (!(*prot & (1 << access_type))) { | ||
203 | - /* Access permission fault. */ | ||
204 | - fi->type = ARMFault_Permission; | ||
205 | - goto do_fault; | ||
206 | - } | ||
207 | - } | ||
208 | - if (ns) { | ||
209 | - /* The NS bit will (as required by the architecture) have no effect if | ||
210 | - * the CPU doesn't support TZ or this is a non-secure translation | ||
211 | - * regime, because the attribute will already be non-secure. | ||
212 | - */ | ||
213 | - attrs->secure = false; | ||
214 | - } | ||
215 | - *phys_ptr = phys_addr; | ||
216 | - return false; | ||
217 | -do_fault: | ||
218 | - fi->domain = domain; | ||
219 | - fi->level = level; | ||
220 | - return true; | ||
221 | -} | ||
222 | - | ||
223 | /* | ||
224 | * check_s2_mmu_setup | ||
225 | * @cpu: ARMCPU | ||
226 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
227 | index XXXXXXX..XXXXXXX 100644 | ||
228 | --- a/target/arm/ptw.c | ||
229 | +++ b/target/arm/ptw.c | ||
230 | @@ -XXX,XX +XXX,XX @@ do_fault: | ||
231 | return true; | 28 | return true; |
232 | } | 29 | } |
233 | 30 | ||
234 | +static bool get_phys_addr_v6(CPUARMState *env, uint32_t address, | 31 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
235 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | 32 | index XXXXXXX..XXXXXXX 100644 |
236 | + hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot, | 33 | --- a/target/arm/tcg/vec_helper.c |
237 | + target_ulong *page_size, ARMMMUFaultInfo *fi) | 34 | +++ b/target/arm/tcg/vec_helper.c |
238 | +{ | 35 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlah)(void *vd, void *vn, void *vm, void *va, |
239 | + CPUState *cs = env_cpu(env); | 36 | uintptr_t opr_sz = simd_oprsz(desc); |
240 | + ARMCPU *cpu = env_archcpu(env); | 37 | float16 *d = vd, *n = vn, *m = vm, *a = va; |
241 | + int level = 1; | 38 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); |
242 | + uint32_t table; | 39 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
243 | + uint32_t desc; | 40 | - uint32_t neg_real = flip ^ neg_imag; |
244 | + uint32_t xn; | 41 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); |
245 | + uint32_t pxn = 0; | 42 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
246 | + int type; | 43 | + uint32_t negf_real = flip ^ negf_imag; |
247 | + int ap; | 44 | + float16 negx_imag, negx_real; |
248 | + int domain = 0; | 45 | uintptr_t i; |
249 | + int domain_prot; | 46 | |
250 | + hwaddr phys_addr; | 47 | - /* Shift boolean to the sign bit so we can xor to negate. */ |
251 | + uint32_t dacr; | 48 | - neg_real <<= 15; |
252 | + bool ns; | 49 | - neg_imag <<= 15; |
253 | + | 50 | + /* With AH=0, use negx; with AH=1 use negf. */ |
254 | + /* Pagetable walk. */ | 51 | + negx_real = (negf_real & ~fpcr_ah) << 15; |
255 | + /* Lookup l1 descriptor. */ | 52 | + negx_imag = (negf_imag & ~fpcr_ah) << 15; |
256 | + if (!get_level1_table_address(env, mmu_idx, &table, address)) { | 53 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); |
257 | + /* Section translation fault if page walk is disabled by PD0 or PD1 */ | 54 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); |
258 | + fi->type = ARMFault_Translation; | 55 | |
259 | + goto do_fault; | 56 | for (i = 0; i < opr_sz / 2; i += 2) { |
260 | + } | 57 | float16 e2 = n[H2(i + flip)]; |
261 | + desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx), | 58 | - float16 e1 = m[H2(i + flip)] ^ neg_real; |
262 | + mmu_idx, fi); | 59 | + float16 e1 = m[H2(i + flip)] ^ negx_real; |
263 | + if (fi->type != ARMFault_None) { | 60 | float16 e4 = e2; |
264 | + goto do_fault; | 61 | - float16 e3 = m[H2(i + 1 - flip)] ^ neg_imag; |
265 | + } | 62 | + float16 e3 = m[H2(i + 1 - flip)] ^ negx_imag; |
266 | + type = (desc & 3); | 63 | |
267 | + if (type == 0 || (type == 3 && !cpu_isar_feature(aa32_pxn, cpu))) { | 64 | - d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], 0, fpst); |
268 | + /* Section translation fault, or attempt to use the encoding | 65 | - d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], 0, fpst); |
269 | + * which is Reserved on implementations without PXN. | 66 | + d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], negf_real, fpst); |
270 | + */ | 67 | + d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], negf_imag, fpst); |
271 | + fi->type = ARMFault_Translation; | 68 | } |
272 | + goto do_fault; | 69 | clear_tail(d, opr_sz, simd_maxsz(desc)); |
273 | + } | 70 | } |
274 | + if ((type == 1) || !(desc & (1 << 18))) { | 71 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlas)(void *vd, void *vn, void *vm, void *va, |
275 | + /* Page or Section. */ | 72 | uintptr_t opr_sz = simd_oprsz(desc); |
276 | + domain = (desc >> 5) & 0x0f; | 73 | float32 *d = vd, *n = vn, *m = vm, *a = va; |
277 | + } | 74 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); |
278 | + if (regime_el(env, mmu_idx) == 1) { | 75 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
279 | + dacr = env->cp15.dacr_ns; | 76 | - uint32_t neg_real = flip ^ neg_imag; |
280 | + } else { | 77 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); |
281 | + dacr = env->cp15.dacr_s; | 78 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
282 | + } | 79 | + uint32_t negf_real = flip ^ negf_imag; |
283 | + if (type == 1) { | 80 | + float32 negx_imag, negx_real; |
284 | + level = 2; | 81 | uintptr_t i; |
285 | + } | 82 | |
286 | + domain_prot = (dacr >> (domain * 2)) & 3; | 83 | - /* Shift boolean to the sign bit so we can xor to negate. */ |
287 | + if (domain_prot == 0 || domain_prot == 2) { | 84 | - neg_real <<= 31; |
288 | + /* Section or Page domain fault */ | 85 | - neg_imag <<= 31; |
289 | + fi->type = ARMFault_Domain; | 86 | + /* With AH=0, use negx; with AH=1 use negf. */ |
290 | + goto do_fault; | 87 | + negx_real = (negf_real & ~fpcr_ah) << 31; |
291 | + } | 88 | + negx_imag = (negf_imag & ~fpcr_ah) << 31; |
292 | + if (type != 1) { | 89 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); |
293 | + if (desc & (1 << 18)) { | 90 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); |
294 | + /* Supersection. */ | 91 | |
295 | + phys_addr = (desc & 0xff000000) | (address & 0x00ffffff); | 92 | for (i = 0; i < opr_sz / 4; i += 2) { |
296 | + phys_addr |= (uint64_t)extract32(desc, 20, 4) << 32; | 93 | float32 e2 = n[H4(i + flip)]; |
297 | + phys_addr |= (uint64_t)extract32(desc, 5, 4) << 36; | 94 | - float32 e1 = m[H4(i + flip)] ^ neg_real; |
298 | + *page_size = 0x1000000; | 95 | + float32 e1 = m[H4(i + flip)] ^ negx_real; |
299 | + } else { | 96 | float32 e4 = e2; |
300 | + /* Section. */ | 97 | - float32 e3 = m[H4(i + 1 - flip)] ^ neg_imag; |
301 | + phys_addr = (desc & 0xfff00000) | (address & 0x000fffff); | 98 | + float32 e3 = m[H4(i + 1 - flip)] ^ negx_imag; |
302 | + *page_size = 0x100000; | 99 | |
303 | + } | 100 | - d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], 0, fpst); |
304 | + ap = ((desc >> 10) & 3) | ((desc >> 13) & 4); | 101 | - d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], 0, fpst); |
305 | + xn = desc & (1 << 4); | 102 | + d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], negf_real, fpst); |
306 | + pxn = desc & 1; | 103 | + d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], negf_imag, fpst); |
307 | + ns = extract32(desc, 19, 1); | 104 | } |
308 | + } else { | 105 | clear_tail(d, opr_sz, simd_maxsz(desc)); |
309 | + if (cpu_isar_feature(aa32_pxn, cpu)) { | 106 | } |
310 | + pxn = (desc >> 2) & 1; | 107 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm, void *va, |
311 | + } | 108 | uintptr_t opr_sz = simd_oprsz(desc); |
312 | + ns = extract32(desc, 3, 1); | 109 | float64 *d = vd, *n = vn, *m = vm, *a = va; |
313 | + /* Lookup l2 entry. */ | 110 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); |
314 | + table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc); | 111 | - uint64_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
315 | + desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx), | 112 | - uint64_t neg_real = flip ^ neg_imag; |
316 | + mmu_idx, fi); | 113 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); |
317 | + if (fi->type != ARMFault_None) { | 114 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
318 | + goto do_fault; | 115 | + uint32_t negf_real = flip ^ negf_imag; |
319 | + } | 116 | + float64 negx_real, negx_imag; |
320 | + ap = ((desc >> 4) & 3) | ((desc >> 7) & 4); | 117 | uintptr_t i; |
321 | + switch (desc & 3) { | 118 | |
322 | + case 0: /* Page translation fault. */ | 119 | - /* Shift boolean to the sign bit so we can xor to negate. */ |
323 | + fi->type = ARMFault_Translation; | 120 | - neg_real <<= 63; |
324 | + goto do_fault; | 121 | - neg_imag <<= 63; |
325 | + case 1: /* 64k page. */ | 122 | + /* With AH=0, use negx; with AH=1 use negf. */ |
326 | + phys_addr = (desc & 0xffff0000) | (address & 0xffff); | 123 | + negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63; |
327 | + xn = desc & (1 << 15); | 124 | + negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63; |
328 | + *page_size = 0x10000; | 125 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); |
329 | + break; | 126 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); |
330 | + case 2: case 3: /* 4k page. */ | 127 | |
331 | + phys_addr = (desc & 0xfffff000) | (address & 0xfff); | 128 | for (i = 0; i < opr_sz / 8; i += 2) { |
332 | + xn = desc & 1; | 129 | float64 e2 = n[i + flip]; |
333 | + *page_size = 0x1000; | 130 | - float64 e1 = m[i + flip] ^ neg_real; |
334 | + break; | 131 | + float64 e1 = m[i + flip] ^ negx_real; |
335 | + default: | 132 | float64 e4 = e2; |
336 | + /* Never happens, but compiler isn't smart enough to tell. */ | 133 | - float64 e3 = m[i + 1 - flip] ^ neg_imag; |
337 | + g_assert_not_reached(); | 134 | + float64 e3 = m[i + 1 - flip] ^ negx_imag; |
338 | + } | 135 | |
339 | + } | 136 | - d[i] = float64_muladd(e2, e1, a[i], 0, fpst); |
340 | + if (domain_prot == 3) { | 137 | - d[i + 1] = float64_muladd(e4, e3, a[i + 1], 0, fpst); |
341 | + *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; | 138 | + d[i] = float64_muladd(e2, e1, a[i], negf_real, fpst); |
342 | + } else { | 139 | + d[i + 1] = float64_muladd(e4, e3, a[i + 1], negf_imag, fpst); |
343 | + if (pxn && !regime_is_user(env, mmu_idx)) { | 140 | } |
344 | + xn = 1; | 141 | clear_tail(d, opr_sz, simd_maxsz(desc)); |
345 | + } | 142 | } |
346 | + if (xn && access_type == MMU_INST_FETCH) { | ||
347 | + fi->type = ARMFault_Permission; | ||
348 | + goto do_fault; | ||
349 | + } | ||
350 | + | ||
351 | + if (arm_feature(env, ARM_FEATURE_V6K) && | ||
352 | + (regime_sctlr(env, mmu_idx) & SCTLR_AFE)) { | ||
353 | + /* The simplified model uses AP[0] as an access control bit. */ | ||
354 | + if ((ap & 1) == 0) { | ||
355 | + /* Access flag fault. */ | ||
356 | + fi->type = ARMFault_AccessFlag; | ||
357 | + goto do_fault; | ||
358 | + } | ||
359 | + *prot = simple_ap_to_rw_prot(env, mmu_idx, ap >> 1); | ||
360 | + } else { | ||
361 | + *prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot); | ||
362 | + } | ||
363 | + if (*prot && !xn) { | ||
364 | + *prot |= PAGE_EXEC; | ||
365 | + } | ||
366 | + if (!(*prot & (1 << access_type))) { | ||
367 | + /* Access permission fault. */ | ||
368 | + fi->type = ARMFault_Permission; | ||
369 | + goto do_fault; | ||
370 | + } | ||
371 | + } | ||
372 | + if (ns) { | ||
373 | + /* The NS bit will (as required by the architecture) have no effect if | ||
374 | + * the CPU doesn't support TZ or this is a non-secure translation | ||
375 | + * regime, because the attribute will already be non-secure. | ||
376 | + */ | ||
377 | + attrs->secure = false; | ||
378 | + } | ||
379 | + *phys_ptr = phys_addr; | ||
380 | + return false; | ||
381 | +do_fault: | ||
382 | + fi->domain = domain; | ||
383 | + fi->level = level; | ||
384 | + return true; | ||
385 | +} | ||
386 | + | ||
387 | /** | ||
388 | * get_phys_addr - get the physical address for this virtual address | ||
389 | * | ||
390 | -- | 143 | -- |
391 | 2.25.1 | 144 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | The negation step in FCMLA by index mustn't negate a NaN when | ||
4 | FPCR.AH is set. Use the same approach as vector FCMLA of | ||
5 | passing in FPCR.AH and using it to select whether to negate | ||
6 | by XOR or by the muladd negate_product flag. | ||
7 | |||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | Message-id: 20220604040607.269301-19-richard.henderson@linaro.org | 9 | Message-id: 20250129013857.135256-27-richard.henderson@linaro.org |
10 | [PMM: Expanded commit message] | ||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 11 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
7 | --- | 13 | --- |
8 | target/arm/ptw.h | 2 -- | 14 | target/arm/tcg/translate-a64.c | 2 +- |
9 | target/arm/helper.c | 25 ------------------------- | 15 | target/arm/tcg/vec_helper.c | 44 ++++++++++++++++++++-------------- |
10 | target/arm/ptw.c | 25 +++++++++++++++++++++++++ | 16 | 2 files changed, 27 insertions(+), 19 deletions(-) |
11 | 3 files changed, 25 insertions(+), 27 deletions(-) | ||
12 | 17 | ||
13 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | 18 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
14 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/arm/ptw.h | 20 | --- a/target/arm/tcg/translate-a64.c |
16 | +++ b/target/arm/ptw.h | 21 | +++ b/target/arm/tcg/translate-a64.c |
17 | @@ -XXX,XX +XXX,XX @@ | 22 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) |
18 | 23 | if (fp_access_check(s)) { | |
19 | #ifndef CONFIG_USER_ONLY | 24 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, |
20 | 25 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | |
21 | -extern const uint8_t pamax_map[7]; | 26 | - (a->idx << 2) | a->rot, fn); |
22 | - | 27 | + (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn); |
23 | bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx); | 28 | } |
24 | bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx); | 29 | return true; |
25 | uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn); | 30 | } |
26 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 31 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
27 | index XXXXXXX..XXXXXXX 100644 | 32 | index XXXXXXX..XXXXXXX 100644 |
28 | --- a/target/arm/helper.c | 33 | --- a/target/arm/tcg/vec_helper.c |
29 | +++ b/target/arm/helper.c | 34 | +++ b/target/arm/tcg/vec_helper.c |
30 | @@ -XXX,XX +XXX,XX @@ bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level, | 35 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, void *va, |
31 | } | 36 | uintptr_t opr_sz = simd_oprsz(desc); |
32 | #endif /* !CONFIG_USER_ONLY */ | 37 | float16 *d = vd, *n = vn, *m = vm, *a = va; |
33 | 38 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | |
34 | -/* This mapping is common between ID_AA64MMFR0.PARANGE and TCR_ELx.{I}PS. */ | 39 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
35 | -const uint8_t pamax_map[] = { | 40 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
36 | - [0] = 32, | 41 | intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2); |
37 | - [1] = 36, | 42 | - uint32_t neg_real = flip ^ neg_imag; |
38 | - [2] = 40, | 43 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1); |
39 | - [3] = 42, | 44 | + uint32_t negf_real = flip ^ negf_imag; |
40 | - [4] = 44, | 45 | intptr_t elements = opr_sz / sizeof(float16); |
41 | - [5] = 48, | 46 | intptr_t eltspersegment = MIN(16 / sizeof(float16), elements); |
42 | - [6] = 52, | 47 | + float16 negx_imag, negx_real; |
43 | -}; | 48 | intptr_t i, j; |
44 | - | 49 | |
45 | -/* The cpu-specific constant value of PAMax; also used by hw/arm/virt. */ | 50 | - /* Shift boolean to the sign bit so we can xor to negate. */ |
46 | -unsigned int arm_pamax(ARMCPU *cpu) | 51 | - neg_real <<= 15; |
47 | -{ | 52 | - neg_imag <<= 15; |
48 | - unsigned int parange = | 53 | + /* With AH=0, use negx; with AH=1 use negf. */ |
49 | - FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE); | 54 | + negx_real = (negf_real & ~fpcr_ah) << 15; |
50 | - | 55 | + negx_imag = (negf_imag & ~fpcr_ah) << 15; |
51 | - /* | 56 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); |
52 | - * id_aa64mmfr0 is a read-only register so values outside of the | 57 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); |
53 | - * supported mappings can be considered an implementation error. | 58 | |
54 | - */ | 59 | for (i = 0; i < elements; i += eltspersegment) { |
55 | - assert(parange < ARRAY_SIZE(pamax_map)); | 60 | float16 mr = m[H2(i + 2 * index + 0)]; |
56 | - return pamax_map[parange]; | 61 | float16 mi = m[H2(i + 2 * index + 1)]; |
57 | -} | 62 | - float16 e1 = neg_real ^ (flip ? mi : mr); |
58 | - | 63 | - float16 e3 = neg_imag ^ (flip ? mr : mi); |
59 | int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx) | 64 | + float16 e1 = negx_real ^ (flip ? mi : mr); |
60 | { | 65 | + float16 e3 = negx_imag ^ (flip ? mr : mi); |
61 | if (regime_has_2_ranges(mmu_idx)) { | 66 | |
62 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | 67 | for (j = i; j < i + eltspersegment; j += 2) { |
63 | index XXXXXXX..XXXXXXX 100644 | 68 | float16 e2 = n[H2(j + flip)]; |
64 | --- a/target/arm/ptw.c | 69 | float16 e4 = e2; |
65 | +++ b/target/arm/ptw.c | 70 | |
66 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, | 71 | - d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], 0, fpst); |
67 | ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) | 72 | - d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], 0, fpst); |
68 | __attribute__((nonnull)); | 73 | + d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], negf_real, fpst); |
69 | 74 | + d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], negf_imag, fpst); | |
70 | +/* This mapping is common between ID_AA64MMFR0.PARANGE and TCR_ELx.{I}PS. */ | 75 | } |
71 | +static const uint8_t pamax_map[] = { | 76 | } |
72 | + [0] = 32, | 77 | clear_tail(d, opr_sz, simd_maxsz(desc)); |
73 | + [1] = 36, | 78 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, void *va, |
74 | + [2] = 40, | 79 | uintptr_t opr_sz = simd_oprsz(desc); |
75 | + [3] = 42, | 80 | float32 *d = vd, *n = vn, *m = vm, *a = va; |
76 | + [4] = 44, | 81 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); |
77 | + [5] = 48, | 82 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
78 | + [6] = 52, | 83 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
79 | +}; | 84 | intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2); |
80 | + | 85 | - uint32_t neg_real = flip ^ neg_imag; |
81 | +/* The cpu-specific constant value of PAMax; also used by hw/arm/virt. */ | 86 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1); |
82 | +unsigned int arm_pamax(ARMCPU *cpu) | 87 | + uint32_t negf_real = flip ^ negf_imag; |
83 | +{ | 88 | intptr_t elements = opr_sz / sizeof(float32); |
84 | + unsigned int parange = | 89 | intptr_t eltspersegment = MIN(16 / sizeof(float32), elements); |
85 | + FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE); | 90 | + float32 negx_imag, negx_real; |
86 | + | 91 | intptr_t i, j; |
87 | + /* | 92 | |
88 | + * id_aa64mmfr0 is a read-only register so values outside of the | 93 | - /* Shift boolean to the sign bit so we can xor to negate. */ |
89 | + * supported mappings can be considered an implementation error. | 94 | - neg_real <<= 31; |
90 | + */ | 95 | - neg_imag <<= 31; |
91 | + assert(parange < ARRAY_SIZE(pamax_map)); | 96 | + /* With AH=0, use negx; with AH=1 use negf. */ |
92 | + return pamax_map[parange]; | 97 | + negx_real = (negf_real & ~fpcr_ah) << 31; |
93 | +} | 98 | + negx_imag = (negf_imag & ~fpcr_ah) << 31; |
94 | + | 99 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); |
95 | static bool regime_translation_big_endian(CPUARMState *env, ARMMMUIdx mmu_idx) | 100 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); |
96 | { | 101 | |
97 | return (regime_sctlr(env, mmu_idx) & SCTLR_EE) != 0; | 102 | for (i = 0; i < elements; i += eltspersegment) { |
103 | float32 mr = m[H4(i + 2 * index + 0)]; | ||
104 | float32 mi = m[H4(i + 2 * index + 1)]; | ||
105 | - float32 e1 = neg_real ^ (flip ? mi : mr); | ||
106 | - float32 e3 = neg_imag ^ (flip ? mr : mi); | ||
107 | + float32 e1 = negx_real ^ (flip ? mi : mr); | ||
108 | + float32 e3 = negx_imag ^ (flip ? mr : mi); | ||
109 | |||
110 | for (j = i; j < i + eltspersegment; j += 2) { | ||
111 | float32 e2 = n[H4(j + flip)]; | ||
112 | float32 e4 = e2; | ||
113 | |||
114 | - d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], 0, fpst); | ||
115 | - d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], 0, fpst); | ||
116 | + d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], negf_real, fpst); | ||
117 | + d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], negf_imag, fpst); | ||
118 | } | ||
119 | } | ||
120 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
98 | -- | 121 | -- |
99 | 2.25.1 | 122 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | These functions are used for both page table walking and for | 3 | The negation step in SVE FCMLA mustn't negate a NaN when FPCR.AH is |
4 | deciding what format in which to deliver exception results. | 4 | set. Use the same approach as we did for A64 FCMLA of passing in |
5 | Since ptw.c is only present for system mode, put the functions | 5 | FPCR.AH and using it to select whether to negate by XOR or by the |
6 | into tlb_helper.c. | 6 | muladd negate_product flag. |
7 | 7 | ||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
9 | Message-id: 20220604040607.269301-18-richard.henderson@linaro.org | 9 | Message-id: 20250129013857.135256-28-richard.henderson@linaro.org |
10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
12 | --- | 12 | --- |
13 | target/arm/helper.c | 24 ------------------------ | 13 | target/arm/tcg/sve_helper.c | 69 +++++++++++++++++++++------------- |
14 | target/arm/tlb_helper.c | 26 ++++++++++++++++++++++++++ | 14 | target/arm/tcg/translate-sve.c | 2 +- |
15 | 2 files changed, 26 insertions(+), 24 deletions(-) | 15 | 2 files changed, 43 insertions(+), 28 deletions(-) |
16 | 16 | ||
17 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 17 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c |
18 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/target/arm/helper.c | 19 | --- a/target/arm/tcg/sve_helper.c |
20 | +++ b/target/arm/helper.c | 20 | +++ b/target/arm/tcg/sve_helper.c |
21 | @@ -XXX,XX +XXX,XX @@ ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx) | 21 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, |
22 | } | 22 | void *vg, float_status *status, uint32_t desc) |
23 | #endif /* !CONFIG_USER_ONLY */ | ||
24 | |||
25 | -/* Return true if the translation regime is using LPAE format page tables */ | ||
26 | -bool regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx) | ||
27 | -{ | ||
28 | - int el = regime_el(env, mmu_idx); | ||
29 | - if (el == 2 || arm_el_is_aa64(env, el)) { | ||
30 | - return true; | ||
31 | - } | ||
32 | - if (arm_feature(env, ARM_FEATURE_LPAE) | ||
33 | - && (regime_tcr(env, mmu_idx)->raw_tcr & TTBCR_EAE)) { | ||
34 | - return true; | ||
35 | - } | ||
36 | - return false; | ||
37 | -} | ||
38 | - | ||
39 | -/* Returns true if the stage 1 translation regime is using LPAE format page | ||
40 | - * tables. Used when raising alignment exceptions, whose FSR changes depending | ||
41 | - * on whether the long or short descriptor format is in use. */ | ||
42 | -bool arm_s1_regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx) | ||
43 | -{ | ||
44 | - mmu_idx = stage_1_mmu_idx(mmu_idx); | ||
45 | - | ||
46 | - return regime_using_lpae_format(env, mmu_idx); | ||
47 | -} | ||
48 | - | ||
49 | #ifndef CONFIG_USER_ONLY | ||
50 | bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) | ||
51 | { | 23 | { |
52 | diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c | 24 | intptr_t j, i = simd_oprsz(desc); |
25 | - unsigned rot = simd_data(desc); | ||
26 | - bool flip = rot & 1; | ||
27 | - float16 neg_imag, neg_real; | ||
28 | + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
29 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
30 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
31 | + uint32_t negf_real = flip ^ negf_imag; | ||
32 | + float16 negx_imag, negx_real; | ||
33 | uint64_t *g = vg; | ||
34 | |||
35 | - neg_imag = float16_set_sign(0, (rot & 2) != 0); | ||
36 | - neg_real = float16_set_sign(0, rot == 1 || rot == 2); | ||
37 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
38 | + negx_real = (negf_real & ~fpcr_ah) << 15; | ||
39 | + negx_imag = (negf_imag & ~fpcr_ah) << 15; | ||
40 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
41 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
42 | |||
43 | do { | ||
44 | uint64_t pg = g[(i - 1) >> 6]; | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
46 | mi = *(float16 *)(vm + H1_2(j)); | ||
47 | |||
48 | e2 = (flip ? ni : nr); | ||
49 | - e1 = (flip ? mi : mr) ^ neg_real; | ||
50 | + e1 = (flip ? mi : mr) ^ negx_real; | ||
51 | e4 = e2; | ||
52 | - e3 = (flip ? mr : mi) ^ neg_imag; | ||
53 | + e3 = (flip ? mr : mi) ^ negx_imag; | ||
54 | |||
55 | if (likely((pg >> (i & 63)) & 1)) { | ||
56 | d = *(float16 *)(va + H1_2(i)); | ||
57 | - d = float16_muladd(e2, e1, d, 0, status); | ||
58 | + d = float16_muladd(e2, e1, d, negf_real, status); | ||
59 | *(float16 *)(vd + H1_2(i)) = d; | ||
60 | } | ||
61 | if (likely((pg >> (j & 63)) & 1)) { | ||
62 | d = *(float16 *)(va + H1_2(j)); | ||
63 | - d = float16_muladd(e4, e3, d, 0, status); | ||
64 | + d = float16_muladd(e4, e3, d, negf_imag, status); | ||
65 | *(float16 *)(vd + H1_2(j)) = d; | ||
66 | } | ||
67 | } while (i & 63); | ||
68 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
69 | void *vg, float_status *status, uint32_t desc) | ||
70 | { | ||
71 | intptr_t j, i = simd_oprsz(desc); | ||
72 | - unsigned rot = simd_data(desc); | ||
73 | - bool flip = rot & 1; | ||
74 | - float32 neg_imag, neg_real; | ||
75 | + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
76 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
77 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
78 | + uint32_t negf_real = flip ^ negf_imag; | ||
79 | + float32 negx_imag, negx_real; | ||
80 | uint64_t *g = vg; | ||
81 | |||
82 | - neg_imag = float32_set_sign(0, (rot & 2) != 0); | ||
83 | - neg_real = float32_set_sign(0, rot == 1 || rot == 2); | ||
84 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
85 | + negx_real = (negf_real & ~fpcr_ah) << 31; | ||
86 | + negx_imag = (negf_imag & ~fpcr_ah) << 31; | ||
87 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
88 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
89 | |||
90 | do { | ||
91 | uint64_t pg = g[(i - 1) >> 6]; | ||
92 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
93 | mi = *(float32 *)(vm + H1_2(j)); | ||
94 | |||
95 | e2 = (flip ? ni : nr); | ||
96 | - e1 = (flip ? mi : mr) ^ neg_real; | ||
97 | + e1 = (flip ? mi : mr) ^ negx_real; | ||
98 | e4 = e2; | ||
99 | - e3 = (flip ? mr : mi) ^ neg_imag; | ||
100 | + e3 = (flip ? mr : mi) ^ negx_imag; | ||
101 | |||
102 | if (likely((pg >> (i & 63)) & 1)) { | ||
103 | d = *(float32 *)(va + H1_2(i)); | ||
104 | - d = float32_muladd(e2, e1, d, 0, status); | ||
105 | + d = float32_muladd(e2, e1, d, negf_real, status); | ||
106 | *(float32 *)(vd + H1_2(i)) = d; | ||
107 | } | ||
108 | if (likely((pg >> (j & 63)) & 1)) { | ||
109 | d = *(float32 *)(va + H1_2(j)); | ||
110 | - d = float32_muladd(e4, e3, d, 0, status); | ||
111 | + d = float32_muladd(e4, e3, d, negf_imag, status); | ||
112 | *(float32 *)(vd + H1_2(j)) = d; | ||
113 | } | ||
114 | } while (i & 63); | ||
115 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
116 | void *vg, float_status *status, uint32_t desc) | ||
117 | { | ||
118 | intptr_t j, i = simd_oprsz(desc); | ||
119 | - unsigned rot = simd_data(desc); | ||
120 | - bool flip = rot & 1; | ||
121 | - float64 neg_imag, neg_real; | ||
122 | + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
123 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
124 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
125 | + uint32_t negf_real = flip ^ negf_imag; | ||
126 | + float64 negx_imag, negx_real; | ||
127 | uint64_t *g = vg; | ||
128 | |||
129 | - neg_imag = float64_set_sign(0, (rot & 2) != 0); | ||
130 | - neg_real = float64_set_sign(0, rot == 1 || rot == 2); | ||
131 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
132 | + negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63; | ||
133 | + negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63; | ||
134 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
135 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
136 | |||
137 | do { | ||
138 | uint64_t pg = g[(i - 1) >> 6]; | ||
139 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
140 | mi = *(float64 *)(vm + H1_2(j)); | ||
141 | |||
142 | e2 = (flip ? ni : nr); | ||
143 | - e1 = (flip ? mi : mr) ^ neg_real; | ||
144 | + e1 = (flip ? mi : mr) ^ negx_real; | ||
145 | e4 = e2; | ||
146 | - e3 = (flip ? mr : mi) ^ neg_imag; | ||
147 | + e3 = (flip ? mr : mi) ^ negx_imag; | ||
148 | |||
149 | if (likely((pg >> (i & 63)) & 1)) { | ||
150 | d = *(float64 *)(va + H1_2(i)); | ||
151 | - d = float64_muladd(e2, e1, d, 0, status); | ||
152 | + d = float64_muladd(e2, e1, d, negf_real, status); | ||
153 | *(float64 *)(vd + H1_2(i)) = d; | ||
154 | } | ||
155 | if (likely((pg >> (j & 63)) & 1)) { | ||
156 | d = *(float64 *)(va + H1_2(j)); | ||
157 | - d = float64_muladd(e4, e3, d, 0, status); | ||
158 | + d = float64_muladd(e4, e3, d, negf_imag, status); | ||
159 | *(float64 *)(vd + H1_2(j)) = d; | ||
160 | } | ||
161 | } while (i & 63); | ||
162 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
53 | index XXXXXXX..XXXXXXX 100644 | 163 | index XXXXXXX..XXXXXXX 100644 |
54 | --- a/target/arm/tlb_helper.c | 164 | --- a/target/arm/tcg/translate-sve.c |
55 | +++ b/target/arm/tlb_helper.c | 165 | +++ b/target/arm/tcg/translate-sve.c |
56 | @@ -XXX,XX +XXX,XX @@ | 166 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { |
57 | #include "exec/exec-all.h" | 167 | gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, |
58 | #include "exec/helper-proto.h" | 168 | }; |
59 | 169 | TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], | |
60 | + | 170 | - a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, |
61 | +/* Return true if the translation regime is using LPAE format page tables */ | 171 | + a->rd, a->rn, a->rm, a->ra, a->pg, a->rot | (s->fpcr_ah << 2), |
62 | +bool regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx) | 172 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) |
63 | +{ | 173 | |
64 | + int el = regime_el(env, mmu_idx); | 174 | static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { |
65 | + if (el == 2 || arm_el_is_aa64(env, el)) { | ||
66 | + return true; | ||
67 | + } | ||
68 | + if (arm_feature(env, ARM_FEATURE_LPAE) | ||
69 | + && (regime_tcr(env, mmu_idx)->raw_tcr & TTBCR_EAE)) { | ||
70 | + return true; | ||
71 | + } | ||
72 | + return false; | ||
73 | +} | ||
74 | + | ||
75 | +/* | ||
76 | + * Returns true if the stage 1 translation regime is using LPAE format page | ||
77 | + * tables. Used when raising alignment exceptions, whose FSR changes depending | ||
78 | + * on whether the long or short descriptor format is in use. | ||
79 | + */ | ||
80 | +bool arm_s1_regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx) | ||
81 | +{ | ||
82 | + mmu_idx = stage_1_mmu_idx(mmu_idx); | ||
83 | + return regime_using_lpae_format(env, mmu_idx); | ||
84 | +} | ||
85 | + | ||
86 | static inline uint32_t merge_syn_data_abort(uint32_t template_syn, | ||
87 | unsigned int target_el, | ||
88 | bool same_el, bool ea, | ||
89 | -- | 175 | -- |
90 | 2.25.1 | 176 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | This is the final user of get_phys_addr_pmsav7_default | 3 | Handle FPCR.AH's requirement to not negate the sign of a NaN |
4 | within helper.c, so make it static within ptw.c. | 4 | in FMLSL by element and vector, using the usual trick of |
5 | negating by XOR when AH=0 and by muladd flags when AH=1. | ||
6 | |||
7 | Since we have the CPUARMState* in the helper anyway, we can | ||
8 | look directly at env->vfp.fpcr and don't need toa pass in the | ||
9 | FPCR.AH value via the SIMD data word. | ||
5 | 10 | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | Message-id: 20220604040607.269301-10-richard.henderson@linaro.org | 12 | Message-id: 20250129013857.135256-31-richard.henderson@linaro.org |
13 | [PMM: commit message tweaked] | ||
8 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 14 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | --- | 16 | --- |
11 | target/arm/ptw.h | 3 - | 17 | target/arm/tcg/vec_helper.c | 71 ++++++++++++++++++++++++------------- |
12 | target/arm/helper.c | 136 ----------------------------------------- | 18 | 1 file changed, 46 insertions(+), 25 deletions(-) |
13 | target/arm/ptw.c | 146 +++++++++++++++++++++++++++++++++++++++++++- | ||
14 | 3 files changed, 143 insertions(+), 142 deletions(-) | ||
15 | 19 | ||
16 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | 20 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
17 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/target/arm/ptw.h | 22 | --- a/target/arm/tcg/vec_helper.c |
19 | +++ b/target/arm/ptw.h | 23 | +++ b/target/arm/tcg/vec_helper.c |
20 | @@ -XXX,XX +XXX,XX @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) | 24 | @@ -XXX,XX +XXX,XX @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2) |
21 | bool m_is_ppb_region(CPUARMState *env, uint32_t address); | 25 | */ |
22 | bool m_is_system_region(CPUARMState *env, uint32_t address); | 26 | |
23 | 27 | static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, | |
24 | -void get_phys_addr_pmsav7_default(CPUARMState *env, | 28 | - uint32_t desc, bool fz16) |
25 | - ARMMMUIdx mmu_idx, | 29 | + uint64_t negx, int negf, uint32_t desc, bool fz16) |
26 | - int32_t address, int *prot); | 30 | { |
27 | bool pmsav7_use_background_region(ARMCPU *cpu, ARMMMUIdx mmu_idx, bool is_user); | 31 | intptr_t i, oprsz = simd_oprsz(desc); |
28 | 32 | - int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | |
29 | bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, | 33 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
30 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 34 | int is_q = oprsz == 16; |
31 | index XXXXXXX..XXXXXXX 100644 | 35 | uint64_t n_4, m_4; |
32 | --- a/target/arm/helper.c | 36 | |
33 | +++ b/target/arm/helper.c | 37 | - /* Pre-load all of the f16 data, avoiding overlap issues. */ |
34 | @@ -XXX,XX +XXX,XX @@ void v8m_security_lookup(CPUARMState *env, uint32_t address, | 38 | - n_4 = load4_f16(vn, is_q, is_2); |
35 | } | 39 | + /* |
36 | } | 40 | + * Pre-load all of the f16 data, avoiding overlap issues. |
37 | 41 | + * Negate all inputs for AH=0 FMLSL at once. | |
38 | -bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, | 42 | + */ |
39 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | 43 | + n_4 = load4_f16(vn, is_q, is_2) ^ negx; |
40 | - hwaddr *phys_ptr, MemTxAttrs *txattrs, | 44 | m_4 = load4_f16(vm, is_q, is_2); |
41 | - int *prot, bool *is_subpage, | 45 | |
42 | - ARMMMUFaultInfo *fi, uint32_t *mregion) | 46 | - /* Negate all inputs for FMLSL at once. */ |
43 | -{ | 47 | - if (is_s) { |
44 | - /* Perform a PMSAv8 MPU lookup (without also doing the SAU check | 48 | - n_4 ^= 0x8000800080008000ull; |
45 | - * that a full phys-to-virt translation does). | ||
46 | - * mregion is (if not NULL) set to the region number which matched, | ||
47 | - * or -1 if no region number is returned (MPU off, address did not | ||
48 | - * hit a region, address hit in multiple regions). | ||
49 | - * We set is_subpage to true if the region hit doesn't cover the | ||
50 | - * entire TARGET_PAGE the address is within. | ||
51 | - */ | ||
52 | - ARMCPU *cpu = env_archcpu(env); | ||
53 | - bool is_user = regime_is_user(env, mmu_idx); | ||
54 | - uint32_t secure = regime_is_secure(env, mmu_idx); | ||
55 | - int n; | ||
56 | - int matchregion = -1; | ||
57 | - bool hit = false; | ||
58 | - uint32_t addr_page_base = address & TARGET_PAGE_MASK; | ||
59 | - uint32_t addr_page_limit = addr_page_base + (TARGET_PAGE_SIZE - 1); | ||
60 | - | ||
61 | - *is_subpage = false; | ||
62 | - *phys_ptr = address; | ||
63 | - *prot = 0; | ||
64 | - if (mregion) { | ||
65 | - *mregion = -1; | ||
66 | - } | 49 | - } |
67 | - | 50 | - |
68 | - /* Unlike the ARM ARM pseudocode, we don't need to check whether this | 51 | for (i = 0; i < oprsz / 4; i++) { |
69 | - * was an exception vector read from the vector table (which is always | 52 | float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); |
70 | - * done using the default system address map), because those accesses | 53 | float32 m_1 = float16_to_float32_by_bits(m_4 >> (i * 16), fz16); |
71 | - * are done in arm_v7m_load_vector(), which always does a direct | 54 | - d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst); |
72 | - * read using address_space_ldl(), rather than going via this function. | 55 | + d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst); |
73 | - */ | 56 | } |
74 | - if (regime_translation_disabled(env, mmu_idx)) { /* MPU disabled */ | 57 | clear_tail(d, oprsz, simd_maxsz(desc)); |
75 | - hit = true; | 58 | } |
76 | - } else if (m_is_ppb_region(env, address)) { | 59 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, |
77 | - hit = true; | 60 | void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, |
78 | - } else { | 61 | CPUARMState *env, uint32_t desc) |
79 | - if (pmsav7_use_background_region(cpu, mmu_idx, is_user)) { | 62 | { |
80 | - hit = true; | 63 | - do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, desc, |
81 | - } | 64 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
65 | + uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
66 | + | ||
67 | + do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, | ||
68 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
69 | } | ||
70 | |||
71 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
72 | CPUARMState *env, uint32_t desc) | ||
73 | { | ||
74 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, desc, | ||
75 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
76 | + uint64_t negx = 0; | ||
77 | + int negf = 0; | ||
78 | + | ||
79 | + if (is_s) { | ||
80 | + if (env->vfp.fpcr & FPCR_AH) { | ||
81 | + negf = float_muladd_negate_product; | ||
82 | + } else { | ||
83 | + negx = 0x8000800080008000ull; | ||
84 | + } | ||
85 | + } | ||
86 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
87 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); | ||
88 | } | ||
89 | |||
90 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
91 | } | ||
92 | |||
93 | static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, | ||
94 | - uint32_t desc, bool fz16) | ||
95 | + uint64_t negx, int negf, uint32_t desc, bool fz16) | ||
96 | { | ||
97 | intptr_t i, oprsz = simd_oprsz(desc); | ||
98 | - int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
99 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
100 | int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3); | ||
101 | int is_q = oprsz == 16; | ||
102 | uint64_t n_4; | ||
103 | float32 m_1; | ||
104 | |||
105 | - /* Pre-load all of the f16 data, avoiding overlap issues. */ | ||
106 | - n_4 = load4_f16(vn, is_q, is_2); | ||
82 | - | 107 | - |
83 | - for (n = (int)cpu->pmsav7_dregion - 1; n >= 0; n--) { | 108 | - /* Negate all inputs for FMLSL at once. */ |
84 | - /* region search */ | 109 | - if (is_s) { |
85 | - /* Note that the base address is bits [31:5] from the register | 110 | - n_4 ^= 0x8000800080008000ull; |
86 | - * with bits [4:0] all zeroes, but the limit address is bits | ||
87 | - * [31:5] from the register with bits [4:0] all ones. | ||
88 | - */ | ||
89 | - uint32_t base = env->pmsav8.rbar[secure][n] & ~0x1f; | ||
90 | - uint32_t limit = env->pmsav8.rlar[secure][n] | 0x1f; | ||
91 | - | ||
92 | - if (!(env->pmsav8.rlar[secure][n] & 0x1)) { | ||
93 | - /* Region disabled */ | ||
94 | - continue; | ||
95 | - } | ||
96 | - | ||
97 | - if (address < base || address > limit) { | ||
98 | - /* | ||
99 | - * Address not in this region. We must check whether the | ||
100 | - * region covers addresses in the same page as our address. | ||
101 | - * In that case we must not report a size that covers the | ||
102 | - * whole page for a subsequent hit against a different MPU | ||
103 | - * region or the background region, because it would result in | ||
104 | - * incorrect TLB hits for subsequent accesses to addresses that | ||
105 | - * are in this MPU region. | ||
106 | - */ | ||
107 | - if (limit >= base && | ||
108 | - ranges_overlap(base, limit - base + 1, | ||
109 | - addr_page_base, | ||
110 | - TARGET_PAGE_SIZE)) { | ||
111 | - *is_subpage = true; | ||
112 | - } | ||
113 | - continue; | ||
114 | - } | ||
115 | - | ||
116 | - if (base > addr_page_base || limit < addr_page_limit) { | ||
117 | - *is_subpage = true; | ||
118 | - } | ||
119 | - | ||
120 | - if (matchregion != -1) { | ||
121 | - /* Multiple regions match -- always a failure (unlike | ||
122 | - * PMSAv7 where highest-numbered-region wins) | ||
123 | - */ | ||
124 | - fi->type = ARMFault_Permission; | ||
125 | - fi->level = 1; | ||
126 | - return true; | ||
127 | - } | ||
128 | - | ||
129 | - matchregion = n; | ||
130 | - hit = true; | ||
131 | - } | ||
132 | - } | 111 | - } |
133 | - | 112 | - |
134 | - if (!hit) { | 113 | + /* |
135 | - /* background fault */ | 114 | + * Pre-load all of the f16 data, avoiding overlap issues. |
136 | - fi->type = ARMFault_Background; | 115 | + * Negate all inputs for AH=0 FMLSL at once. |
137 | - return true; | 116 | + */ |
138 | - } | 117 | + n_4 = load4_f16(vn, is_q, is_2) ^ negx; |
139 | - | 118 | m_1 = float16_to_float32_by_bits(((float16 *)vm)[H2(index)], fz16); |
140 | - if (matchregion == -1) { | 119 | |
141 | - /* hit using the background region */ | 120 | for (i = 0; i < oprsz / 4; i++) { |
142 | - get_phys_addr_pmsav7_default(env, mmu_idx, address, prot); | 121 | float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); |
143 | - } else { | 122 | - d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst); |
144 | - uint32_t ap = extract32(env->pmsav8.rbar[secure][matchregion], 1, 2); | 123 | + d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst); |
145 | - uint32_t xn = extract32(env->pmsav8.rbar[secure][matchregion], 0, 1); | 124 | } |
146 | - bool pxn = false; | 125 | clear_tail(d, oprsz, simd_maxsz(desc)); |
147 | - | ||
148 | - if (arm_feature(env, ARM_FEATURE_V8_1M)) { | ||
149 | - pxn = extract32(env->pmsav8.rlar[secure][matchregion], 4, 1); | ||
150 | - } | ||
151 | - | ||
152 | - if (m_is_system_region(env, address)) { | ||
153 | - /* System space is always execute never */ | ||
154 | - xn = 1; | ||
155 | - } | ||
156 | - | ||
157 | - *prot = simple_ap_to_rw_prot(env, mmu_idx, ap); | ||
158 | - if (*prot && !xn && !(pxn && !is_user)) { | ||
159 | - *prot |= PAGE_EXEC; | ||
160 | - } | ||
161 | - /* We don't need to look the attribute up in the MAIR0/MAIR1 | ||
162 | - * registers because that only tells us about cacheability. | ||
163 | - */ | ||
164 | - if (mregion) { | ||
165 | - *mregion = matchregion; | ||
166 | - } | ||
167 | - } | ||
168 | - | ||
169 | - fi->type = ARMFault_Permission; | ||
170 | - fi->level = 1; | ||
171 | - return !(*prot & (1 << access_type)); | ||
172 | -} | ||
173 | - | ||
174 | /* Combine either inner or outer cacheability attributes for normal | ||
175 | * memory, according to table D4-42 and pseudocode procedure | ||
176 | * CombineS1S2AttrHints() of ARM DDI 0487B.b (the ARMv8 ARM). | ||
177 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
178 | index XXXXXXX..XXXXXXX 100644 | ||
179 | --- a/target/arm/ptw.c | ||
180 | +++ b/target/arm/ptw.c | ||
181 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address, | ||
182 | return false; | ||
183 | } | 126 | } |
184 | 127 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, | |
185 | -void get_phys_addr_pmsav7_default(CPUARMState *env, | 128 | void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, |
186 | - ARMMMUIdx mmu_idx, | 129 | CPUARMState *env, uint32_t desc) |
187 | - int32_t address, int *prot) | ||
188 | +static void get_phys_addr_pmsav7_default(CPUARMState *env, ARMMMUIdx mmu_idx, | ||
189 | + int32_t address, int *prot) | ||
190 | { | 130 | { |
191 | if (!arm_feature(env, ARM_FEATURE_M)) { | 131 | - do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, desc, |
192 | *prot = PAGE_READ | PAGE_WRITE; | 132 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
193 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, | 133 | + uint64_t negx = is_s ? 0x8000800080008000ull : 0; |
194 | return !(*prot & (1 << access_type)); | 134 | + |
135 | + do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, | ||
136 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
195 | } | 137 | } |
196 | 138 | ||
197 | +bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, | 139 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
198 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | 140 | CPUARMState *env, uint32_t desc) |
199 | + hwaddr *phys_ptr, MemTxAttrs *txattrs, | 141 | { |
200 | + int *prot, bool *is_subpage, | 142 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, desc, |
201 | + ARMMMUFaultInfo *fi, uint32_t *mregion) | 143 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
202 | +{ | 144 | + uint64_t negx = 0; |
203 | + /* | 145 | + int negf = 0; |
204 | + * Perform a PMSAv8 MPU lookup (without also doing the SAU check | ||
205 | + * that a full phys-to-virt translation does). | ||
206 | + * mregion is (if not NULL) set to the region number which matched, | ||
207 | + * or -1 if no region number is returned (MPU off, address did not | ||
208 | + * hit a region, address hit in multiple regions). | ||
209 | + * We set is_subpage to true if the region hit doesn't cover the | ||
210 | + * entire TARGET_PAGE the address is within. | ||
211 | + */ | ||
212 | + ARMCPU *cpu = env_archcpu(env); | ||
213 | + bool is_user = regime_is_user(env, mmu_idx); | ||
214 | + uint32_t secure = regime_is_secure(env, mmu_idx); | ||
215 | + int n; | ||
216 | + int matchregion = -1; | ||
217 | + bool hit = false; | ||
218 | + uint32_t addr_page_base = address & TARGET_PAGE_MASK; | ||
219 | + uint32_t addr_page_limit = addr_page_base + (TARGET_PAGE_SIZE - 1); | ||
220 | + | 146 | + |
221 | + *is_subpage = false; | 147 | + if (is_s) { |
222 | + *phys_ptr = address; | 148 | + if (env->vfp.fpcr & FPCR_AH) { |
223 | + *prot = 0; | 149 | + negf = float_muladd_negate_product; |
224 | + if (mregion) { | 150 | + } else { |
225 | + *mregion = -1; | 151 | + negx = 0x8000800080008000ull; |
226 | + } | ||
227 | + | ||
228 | + /* | ||
229 | + * Unlike the ARM ARM pseudocode, we don't need to check whether this | ||
230 | + * was an exception vector read from the vector table (which is always | ||
231 | + * done using the default system address map), because those accesses | ||
232 | + * are done in arm_v7m_load_vector(), which always does a direct | ||
233 | + * read using address_space_ldl(), rather than going via this function. | ||
234 | + */ | ||
235 | + if (regime_translation_disabled(env, mmu_idx)) { /* MPU disabled */ | ||
236 | + hit = true; | ||
237 | + } else if (m_is_ppb_region(env, address)) { | ||
238 | + hit = true; | ||
239 | + } else { | ||
240 | + if (pmsav7_use_background_region(cpu, mmu_idx, is_user)) { | ||
241 | + hit = true; | ||
242 | + } | ||
243 | + | ||
244 | + for (n = (int)cpu->pmsav7_dregion - 1; n >= 0; n--) { | ||
245 | + /* region search */ | ||
246 | + /* | ||
247 | + * Note that the base address is bits [31:5] from the register | ||
248 | + * with bits [4:0] all zeroes, but the limit address is bits | ||
249 | + * [31:5] from the register with bits [4:0] all ones. | ||
250 | + */ | ||
251 | + uint32_t base = env->pmsav8.rbar[secure][n] & ~0x1f; | ||
252 | + uint32_t limit = env->pmsav8.rlar[secure][n] | 0x1f; | ||
253 | + | ||
254 | + if (!(env->pmsav8.rlar[secure][n] & 0x1)) { | ||
255 | + /* Region disabled */ | ||
256 | + continue; | ||
257 | + } | ||
258 | + | ||
259 | + if (address < base || address > limit) { | ||
260 | + /* | ||
261 | + * Address not in this region. We must check whether the | ||
262 | + * region covers addresses in the same page as our address. | ||
263 | + * In that case we must not report a size that covers the | ||
264 | + * whole page for a subsequent hit against a different MPU | ||
265 | + * region or the background region, because it would result in | ||
266 | + * incorrect TLB hits for subsequent accesses to addresses that | ||
267 | + * are in this MPU region. | ||
268 | + */ | ||
269 | + if (limit >= base && | ||
270 | + ranges_overlap(base, limit - base + 1, | ||
271 | + addr_page_base, | ||
272 | + TARGET_PAGE_SIZE)) { | ||
273 | + *is_subpage = true; | ||
274 | + } | ||
275 | + continue; | ||
276 | + } | ||
277 | + | ||
278 | + if (base > addr_page_base || limit < addr_page_limit) { | ||
279 | + *is_subpage = true; | ||
280 | + } | ||
281 | + | ||
282 | + if (matchregion != -1) { | ||
283 | + /* | ||
284 | + * Multiple regions match -- always a failure (unlike | ||
285 | + * PMSAv7 where highest-numbered-region wins) | ||
286 | + */ | ||
287 | + fi->type = ARMFault_Permission; | ||
288 | + fi->level = 1; | ||
289 | + return true; | ||
290 | + } | ||
291 | + | ||
292 | + matchregion = n; | ||
293 | + hit = true; | ||
294 | + } | 152 | + } |
295 | + } | 153 | + } |
296 | + | 154 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, |
297 | + if (!hit) { | 155 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); |
298 | + /* background fault */ | 156 | } |
299 | + fi->type = ARMFault_Background; | 157 | |
300 | + return true; | ||
301 | + } | ||
302 | + | ||
303 | + if (matchregion == -1) { | ||
304 | + /* hit using the background region */ | ||
305 | + get_phys_addr_pmsav7_default(env, mmu_idx, address, prot); | ||
306 | + } else { | ||
307 | + uint32_t ap = extract32(env->pmsav8.rbar[secure][matchregion], 1, 2); | ||
308 | + uint32_t xn = extract32(env->pmsav8.rbar[secure][matchregion], 0, 1); | ||
309 | + bool pxn = false; | ||
310 | + | ||
311 | + if (arm_feature(env, ARM_FEATURE_V8_1M)) { | ||
312 | + pxn = extract32(env->pmsav8.rlar[secure][matchregion], 4, 1); | ||
313 | + } | ||
314 | + | ||
315 | + if (m_is_system_region(env, address)) { | ||
316 | + /* System space is always execute never */ | ||
317 | + xn = 1; | ||
318 | + } | ||
319 | + | ||
320 | + *prot = simple_ap_to_rw_prot(env, mmu_idx, ap); | ||
321 | + if (*prot && !xn && !(pxn && !is_user)) { | ||
322 | + *prot |= PAGE_EXEC; | ||
323 | + } | ||
324 | + /* | ||
325 | + * We don't need to look the attribute up in the MAIR0/MAIR1 | ||
326 | + * registers because that only tells us about cacheability. | ||
327 | + */ | ||
328 | + if (mregion) { | ||
329 | + *mregion = matchregion; | ||
330 | + } | ||
331 | + } | ||
332 | + | ||
333 | + fi->type = ARMFault_Permission; | ||
334 | + fi->level = 1; | ||
335 | + return !(*prot & (1 << access_type)); | ||
336 | +} | ||
337 | + | ||
338 | static bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address, | ||
339 | MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
340 | hwaddr *phys_ptr, MemTxAttrs *txattrs, | ||
341 | -- | 158 | -- |
342 | 2.25.1 | 159 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Handle FPCR.AH's requirement to not negate the sign of a NaN in SVE | ||
4 | FMLSL (indexed), using the usual trick of negating by XOR when AH=0 | ||
5 | and by muladd flags when AH=1. | ||
6 | |||
7 | Since we have the CPUARMState* in the helper anyway, we can | ||
8 | look directly at env->vfp.fpcr and don't need toa pass in the | ||
9 | FPCR.AH value via the SIMD data word. | ||
10 | |||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | Message-id: 20220604040607.269301-27-richard.henderson@linaro.org | 12 | Message-id: 20250129013857.135256-32-richard.henderson@linaro.org |
13 | [PMM: commit message tweaked] | ||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 14 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
7 | --- | 16 | --- |
8 | target/arm/helper.c | 26 -------------------------- | 17 | target/arm/tcg/vec_helper.c | 15 ++++++++++++--- |
9 | target/arm/ptw.c | 24 ++++++++++++++++++++++++ | 18 | 1 file changed, 12 insertions(+), 3 deletions(-) |
10 | 2 files changed, 24 insertions(+), 26 deletions(-) | ||
11 | 19 | ||
12 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 20 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
13 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/target/arm/helper.c | 22 | --- a/target/arm/tcg/vec_helper.c |
15 | +++ b/target/arm/helper.c | 23 | +++ b/target/arm/tcg/vec_helper.c |
16 | @@ -XXX,XX +XXX,XX @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, | 24 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, |
17 | }; | 25 | CPUARMState *env, uint32_t desc) |
18 | } | 26 | { |
19 | 27 | intptr_t i, j, oprsz = simd_oprsz(desc); | |
20 | -#ifndef CONFIG_USER_ONLY | 28 | - uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; |
21 | -hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, | 29 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
22 | - MemTxAttrs *attrs) | 30 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
23 | -{ | 31 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); |
24 | - ARMCPU *cpu = ARM_CPU(cs); | 32 | float_status *status = &env->vfp.fp_status_a64; |
25 | - CPUARMState *env = &cpu->env; | 33 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); |
26 | - hwaddr phys_addr; | 34 | + int negx = 0, negf = 0; |
27 | - target_ulong page_size; | 35 | + |
28 | - int prot; | 36 | + if (is_s) { |
29 | - bool ret; | 37 | + if (env->vfp.fpcr & FPCR_AH) { |
30 | - ARMMMUFaultInfo fi = {}; | 38 | + negf = float_muladd_negate_product; |
31 | - ARMMMUIdx mmu_idx = arm_mmu_idx(env); | 39 | + } else { |
32 | - ARMCacheAttrs cacheattrs = {}; | 40 | + negx = 0x8000; |
33 | - | 41 | + } |
34 | - *attrs = (MemTxAttrs) {}; | 42 | + } |
35 | - | 43 | |
36 | - ret = get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &phys_addr, | 44 | for (i = 0; i < oprsz; i += 16) { |
37 | - attrs, &prot, &page_size, &fi, &cacheattrs); | 45 | float16 mm_16 = *(float16 *)(vm + i + idx); |
38 | - | 46 | float32 mm = float16_to_float32_by_bits(mm_16, fz16); |
39 | - if (ret) { | 47 | |
40 | - return -1; | 48 | for (j = 0; j < 16; j += sizeof(float32)) { |
41 | - } | 49 | - float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negn; |
42 | - return phys_addr; | 50 | + float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negx; |
43 | -} | 51 | float32 nn = float16_to_float32_by_bits(nn_16, fz16); |
44 | -#endif | 52 | float32 aa = *(float32 *)(va + H1_4(i + j)); |
45 | - | 53 | |
46 | /* Note that signed overflow is undefined in C. The following routines are | 54 | *(float32 *)(vd + H1_4(i + j)) = |
47 | careful to use unsigned types where modulo arithmetic is required. | 55 | - float32_muladd(nn, mm, aa, 0, status); |
48 | Failure to do so _will_ break on newer gcc. */ | 56 | + float32_muladd(nn, mm, aa, negf, status); |
49 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | 57 | } |
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/target/arm/ptw.c | ||
52 | +++ b/target/arm/ptw.c | ||
53 | @@ -XXX,XX +XXX,XX @@ bool get_phys_addr(CPUARMState *env, target_ulong address, | ||
54 | phys_ptr, prot, page_size, fi); | ||
55 | } | 58 | } |
56 | } | 59 | } |
57 | + | ||
58 | +hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, | ||
59 | + MemTxAttrs *attrs) | ||
60 | +{ | ||
61 | + ARMCPU *cpu = ARM_CPU(cs); | ||
62 | + CPUARMState *env = &cpu->env; | ||
63 | + hwaddr phys_addr; | ||
64 | + target_ulong page_size; | ||
65 | + int prot; | ||
66 | + bool ret; | ||
67 | + ARMMMUFaultInfo fi = {}; | ||
68 | + ARMMMUIdx mmu_idx = arm_mmu_idx(env); | ||
69 | + ARMCacheAttrs cacheattrs = {}; | ||
70 | + | ||
71 | + *attrs = (MemTxAttrs) {}; | ||
72 | + | ||
73 | + ret = get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &phys_addr, | ||
74 | + attrs, &prot, &page_size, &fi, &cacheattrs); | ||
75 | + | ||
76 | + if (ret) { | ||
77 | + return -1; | ||
78 | + } | ||
79 | + return phys_addr; | ||
80 | +} | ||
81 | -- | 60 | -- |
82 | 2.25.1 | 61 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Move the decl from ptw.h to internals.h. Provide an inline | 3 | Handle FPCR.AH's requirement to not negate the sign of a NaN in SVE |
4 | version for user-only, just as we do for arm_stage1_mmu_idx. | 4 | FMLSL (indexed), using the usual trick of negating by XOR when AH=0 |
5 | Move an endif down to make the definition in helper.c be | 5 | and by muladd flags when AH=1. |
6 | system only. | 6 | |
7 | Since we have the CPUARMState* in the helper anyway, we can | ||
8 | look directly at env->vfp.fpcr and don't need toa pass in the | ||
9 | FPCR.AH value via the SIMD data word. | ||
7 | 10 | ||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
9 | Message-id: 20220604040607.269301-2-richard.henderson@linaro.org | 12 | Message-id: 20250129013857.135256-33-richard.henderson@linaro.org |
13 | [PMM: tweaked commit message] | ||
10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 14 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
12 | --- | 16 | --- |
13 | target/arm/internals.h | 5 +++++ | 17 | target/arm/tcg/vec_helper.c | 15 ++++++++++++--- |
14 | target/arm/helper.c | 5 ++--- | 18 | 1 file changed, 12 insertions(+), 3 deletions(-) |
15 | 2 files changed, 7 insertions(+), 3 deletions(-) | ||
16 | 19 | ||
17 | diff --git a/target/arm/internals.h b/target/arm/internals.h | 20 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
18 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/target/arm/internals.h | 22 | --- a/target/arm/tcg/vec_helper.c |
20 | +++ b/target/arm/internals.h | 23 | +++ b/target/arm/tcg/vec_helper.c |
21 | @@ -XXX,XX +XXX,XX @@ ARMMMUIdx arm_mmu_idx(CPUARMState *env); | 24 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
22 | * Return the ARMMMUIdx for the stage1 traversal for the current regime. | 25 | CPUARMState *env, uint32_t desc) |
23 | */ | ||
24 | #ifdef CONFIG_USER_ONLY | ||
25 | +static inline ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx) | ||
26 | +{ | ||
27 | + return ARMMMUIdx_Stage1_E0; | ||
28 | +} | ||
29 | static inline ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env) | ||
30 | { | 26 | { |
31 | return ARMMMUIdx_Stage1_E0; | 27 | intptr_t i, oprsz = simd_oprsz(desc); |
32 | } | 28 | - uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; |
33 | #else | 29 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
34 | +ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx); | 30 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
35 | ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env); | 31 | float_status *status = &env->vfp.fp_status_a64; |
36 | #endif | 32 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); |
37 | 33 | + int negx = 0, negf = 0; | |
38 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 34 | + |
39 | index XXXXXXX..XXXXXXX 100644 | 35 | + if (is_s) { |
40 | --- a/target/arm/helper.c | 36 | + if (env->vfp.fpcr & FPCR_AH) { |
41 | +++ b/target/arm/helper.c | 37 | + negf = float_muladd_negate_product; |
42 | @@ -XXX,XX +XXX,XX @@ static inline uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, | 38 | + } else { |
39 | + negx = 0x8000; | ||
40 | + } | ||
41 | + } | ||
42 | |||
43 | for (i = 0; i < oprsz; i += sizeof(float32)) { | ||
44 | - float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negn; | ||
45 | + float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negx; | ||
46 | float16 mm_16 = *(float16 *)(vm + H1_2(i + sel)); | ||
47 | float32 nn = float16_to_float32_by_bits(nn_16, fz16); | ||
48 | float32 mm = float16_to_float32_by_bits(mm_16, fz16); | ||
49 | float32 aa = *(float32 *)(va + H1_4(i)); | ||
50 | |||
51 | - *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, 0, status); | ||
52 | + *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, negf, status); | ||
43 | } | 53 | } |
44 | } | 54 | } |
45 | 55 | ||
46 | -#endif /* !CONFIG_USER_ONLY */ | ||
47 | - | ||
48 | /* Convert a possible stage1+2 MMU index into the appropriate | ||
49 | * stage 1 MMU index | ||
50 | */ | ||
51 | -static inline ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx) | ||
52 | +ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx) | ||
53 | { | ||
54 | switch (mmu_idx) { | ||
55 | case ARMMMUIdx_SE10_0: | ||
56 | @@ -XXX,XX +XXX,XX @@ static inline ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx) | ||
57 | return mmu_idx; | ||
58 | } | ||
59 | } | ||
60 | +#endif /* !CONFIG_USER_ONLY */ | ||
61 | |||
62 | /* Return true if the translation regime is using LPAE format page tables */ | ||
63 | static inline bool regime_using_lpae_format(CPUARMState *env, | ||
64 | -- | 56 | -- |
65 | 2.25.1 | 57 | 2.34.1 | diff view generated by jsdifflib |
1 | The architectural feature RASv1p1 introduces the following new | 1 | Now that we have completed the handling for FPCR.{AH,FIZ,NEP}, we |
---|---|---|---|
2 | features: | 2 | can enable FEAT_AFP for '-cpu max', and document that we support it. |
3 | * new registers ERXPFGCDN_EL1, ERXPFGCTL_EL1 and ERXPFGF_EL1 | ||
4 | * new bits in the fine-grained trap registers that control traps | ||
5 | for these new registers | ||
6 | * new trap bits HCR_EL2.FIEN and SCR_EL3.FIEN that control traps | ||
7 | for ERXPFGCDN_EL1, ERXPFGCTL_EL1, ERXPFGP_EL1 | ||
8 | * a larger number of the ERXMISC<n>_EL1 registers | ||
9 | * the format of ERR<n>STATUS registers changes | ||
10 | |||
11 | The architecture permits that if ERRIDR_EL1.NUM is 0 (as it is for | ||
12 | QEMU) then all these new registers may UNDEF, and the HCR_EL2.FIEN | ||
13 | and SCR_EL3.FIEN bits may be RES0. We don't have any ERR<n>STATUS | ||
14 | registers (again, because ERRIDR_EL1.NUM is 0). QEMU does not yet | ||
15 | implement the fine-grained-trap extension. So there is nothing we | ||
16 | need to implement to be compliant with the feature spec. Make the | ||
17 | 'max' CPU report the feature in its ID registers, and document it. | ||
18 | 3 | ||
19 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
20 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
21 | Message-id: 20220531114258.855804-1-peter.maydell@linaro.org | ||
22 | --- | 6 | --- |
23 | docs/system/arm/emulation.rst | 1 + | 7 | docs/system/arm/emulation.rst | 1 + |
24 | target/arm/cpu64.c | 1 + | 8 | target/arm/tcg/cpu64.c | 1 + |
25 | 2 files changed, 2 insertions(+) | 9 | 2 files changed, 2 insertions(+) |
26 | 10 | ||
27 | diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst | 11 | diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst |
28 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/docs/system/arm/emulation.rst | 13 | --- a/docs/system/arm/emulation.rst |
30 | +++ b/docs/system/arm/emulation.rst | 14 | +++ b/docs/system/arm/emulation.rst |
31 | @@ -XXX,XX +XXX,XX @@ the following architecture extensions: | 15 | @@ -XXX,XX +XXX,XX @@ the following architecture extensions: |
32 | - FEAT_PMUv3p1 (PMU Extensions v3.1) | 16 | - FEAT_AA64EL3 (Support for AArch64 at EL3) |
33 | - FEAT_PMUv3p4 (PMU Extensions v3.4) | 17 | - FEAT_AdvSIMD (Advanced SIMD Extension) |
34 | - FEAT_RAS (Reliability, availability, and serviceability) | 18 | - FEAT_AES (AESD and AESE instructions) |
35 | +- FEAT_RASv1p1 (RAS Extension v1.1) | 19 | +- FEAT_AFP (Alternate floating-point behavior) |
36 | - FEAT_RDM (Advanced SIMD rounding double multiply accumulate instructions) | 20 | - FEAT_Armv9_Crypto (Armv9 Cryptographic Extension) |
37 | - FEAT_RNG (Random number generator) | 21 | - FEAT_ASID16 (16 bit ASID) |
38 | - FEAT_S2FWB (Stage 2 forced Write-Back) | 22 | - FEAT_BBM at level 2 (Translation table break-before-make levels) |
39 | diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c | 23 | diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c |
40 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
41 | --- a/target/arm/cpu64.c | 25 | --- a/target/arm/tcg/cpu64.c |
42 | +++ b/target/arm/cpu64.c | 26 | +++ b/target/arm/tcg/cpu64.c |
43 | @@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj) | 27 | @@ -XXX,XX +XXX,XX @@ void aarch64_max_tcg_initfn(Object *obj) |
44 | * we do for EL2 with the virtualization=on property. | 28 | t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1); /* FEAT_XNX */ |
45 | */ | 29 | t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 2); /* FEAT_ETS2 */ |
46 | t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3); /* FEAT_MTE3 */ | 30 | t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1); /* FEAT_HCX */ |
47 | + t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 1); /* FEAT_RASv1p1 */ | 31 | + t = FIELD_DP64(t, ID_AA64MMFR1, AFP, 1); /* FEAT_AFP */ |
48 | t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_2 */ | 32 | t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1); /* FEAT_TIDCP1 */ |
49 | cpu->isar.id_aa64pfr1 = t; | 33 | t = FIELD_DP64(t, ID_AA64MMFR1, CMOW, 1); /* FEAT_CMOW */ |
50 | 34 | cpu->isar.id_aa64mmfr1 = t; | |
51 | -- | 35 | -- |
52 | 2.25.1 | 36 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Frederic Konrad <fkonrad@amd.com> | 1 | FEAT_RPRES implements an "increased precision" variant of the single |
---|---|---|---|
2 | precision FRECPE and FRSQRTE instructions from an 8 bit to a 12 | ||
3 | bit mantissa. This applies only when FPCR.AH == 1. Note that the | ||
4 | halfprec and double versions of these insns retain the 8 bit | ||
5 | precision regardless. | ||
2 | 6 | ||
3 | When the display port has been initially implemented the device | 7 | In this commit we add all the plumbing to make these instructions |
4 | driver wasn't using interrupts. Now that the display port driver | 8 | call a new helper function when the increased-precision is in |
5 | waits for vblank interrupt it has been noticed that the irq mapping | 9 | effect. In the following commit we will provide the actual change |
6 | is wrong. So use the value from the linux device tree and the | 10 | in behaviour in the helpers. |
7 | ultrascale+ reference manual. | ||
8 | 11 | ||
9 | Signed-off-by: Frederic Konrad <fkonrad@amd.com> | ||
10 | Reviewed-by: Edgar E. Iglesias <edgar.iglesias@amd.com> | ||
11 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
12 | Message-id: 20220601172353.3220232-5-fkonrad@xilinx.com | ||
13 | [PMM: refold lines in commit message] | ||
14 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
15 | --- | 14 | --- |
16 | hw/arm/xlnx-zynqmp.c | 4 ++-- | 15 | target/arm/cpu-features.h | 5 +++++ |
17 | 1 file changed, 2 insertions(+), 2 deletions(-) | 16 | target/arm/helper.h | 4 ++++ |
17 | target/arm/tcg/translate-a64.c | 34 ++++++++++++++++++++++++++++++---- | ||
18 | target/arm/tcg/translate-sve.c | 16 ++++++++++++++-- | ||
19 | target/arm/tcg/vec_helper.c | 2 ++ | ||
20 | target/arm/vfp_helper.c | 32 ++++++++++++++++++++++++++++++-- | ||
21 | 6 files changed, 85 insertions(+), 8 deletions(-) | ||
18 | 22 | ||
19 | diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c | 23 | diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h |
20 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/hw/arm/xlnx-zynqmp.c | 25 | --- a/target/arm/cpu-features.h |
22 | +++ b/hw/arm/xlnx-zynqmp.c | 26 | +++ b/target/arm/cpu-features.h |
23 | @@ -XXX,XX +XXX,XX @@ | 27 | @@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_mops(const ARMISARegisters *id) |
24 | #define SERDES_SIZE 0x20000 | 28 | return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, MOPS); |
25 | 29 | } | |
26 | #define DP_ADDR 0xfd4a0000 | 30 | |
27 | -#define DP_IRQ 113 | 31 | +static inline bool isar_feature_aa64_rpres(const ARMISARegisters *id) |
28 | +#define DP_IRQ 0x77 | 32 | +{ |
29 | 33 | + return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, RPRES); | |
30 | #define DPDMA_ADDR 0xfd4c0000 | 34 | +} |
31 | -#define DPDMA_IRQ 116 | 35 | + |
32 | +#define DPDMA_IRQ 0x7a | 36 | static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id) |
33 | 37 | { | |
34 | #define APU_ADDR 0xfd5c0000 | 38 | /* We always set the AdvSIMD and FP fields identically. */ |
35 | #define APU_IRQ 153 | 39 | diff --git a/target/arm/helper.h b/target/arm/helper.h |
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/target/arm/helper.h | ||
42 | +++ b/target/arm/helper.h | ||
43 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, fpst) | ||
44 | |||
45 | DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, fpst) | ||
46 | DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, fpst) | ||
47 | +DEF_HELPER_FLAGS_2(recpe_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst) | ||
48 | DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, fpst) | ||
49 | DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, fpst) | ||
50 | DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, fpst) | ||
51 | +DEF_HELPER_FLAGS_2(rsqrte_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst) | ||
52 | DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, fpst) | ||
53 | DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32) | ||
54 | DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32) | ||
55 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_vrintx_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
56 | |||
57 | DEF_HELPER_FLAGS_4(gvec_frecpe_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
58 | DEF_HELPER_FLAGS_4(gvec_frecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
59 | +DEF_HELPER_FLAGS_4(gvec_frecpe_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
60 | DEF_HELPER_FLAGS_4(gvec_frecpe_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
61 | |||
62 | DEF_HELPER_FLAGS_4(gvec_frsqrte_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
63 | DEF_HELPER_FLAGS_4(gvec_frsqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
64 | +DEF_HELPER_FLAGS_4(gvec_frsqrte_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
65 | DEF_HELPER_FLAGS_4(gvec_frsqrte_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
66 | |||
67 | DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | ||
68 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/target/arm/tcg/translate-a64.c | ||
71 | +++ b/target/arm/tcg/translate-a64.c | ||
72 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frecpe = { | ||
73 | gen_helper_recpe_f32, | ||
74 | gen_helper_recpe_f64, | ||
75 | }; | ||
76 | -TRANS(FRECPE_s, do_fp1_scalar_ah, a, &f_scalar_frecpe, -1) | ||
77 | +static const FPScalar1 f_scalar_frecpe_rpres = { | ||
78 | + gen_helper_recpe_f16, | ||
79 | + gen_helper_recpe_rpres_f32, | ||
80 | + gen_helper_recpe_f64, | ||
81 | +}; | ||
82 | +TRANS(FRECPE_s, do_fp1_scalar_ah, a, | ||
83 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? | ||
84 | + &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1) | ||
85 | |||
86 | static const FPScalar1 f_scalar_frecpx = { | ||
87 | gen_helper_frecpx_f16, | ||
88 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frsqrte = { | ||
89 | gen_helper_rsqrte_f32, | ||
90 | gen_helper_rsqrte_f64, | ||
91 | }; | ||
92 | -TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, &f_scalar_frsqrte, -1) | ||
93 | +static const FPScalar1 f_scalar_frsqrte_rpres = { | ||
94 | + gen_helper_rsqrte_f16, | ||
95 | + gen_helper_rsqrte_rpres_f32, | ||
96 | + gen_helper_rsqrte_f64, | ||
97 | +}; | ||
98 | +TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, | ||
99 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? | ||
100 | + &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1) | ||
101 | |||
102 | static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) | ||
103 | { | ||
104 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const f_frecpe[] = { | ||
105 | gen_helper_gvec_frecpe_s, | ||
106 | gen_helper_gvec_frecpe_d, | ||
107 | }; | ||
108 | -TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) | ||
109 | +static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = { | ||
110 | + gen_helper_gvec_frecpe_h, | ||
111 | + gen_helper_gvec_frecpe_rpres_s, | ||
112 | + gen_helper_gvec_frecpe_d, | ||
113 | +}; | ||
114 | +TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, | ||
115 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe) | ||
116 | |||
117 | static gen_helper_gvec_2_ptr * const f_frsqrte[] = { | ||
118 | gen_helper_gvec_frsqrte_h, | ||
119 | gen_helper_gvec_frsqrte_s, | ||
120 | gen_helper_gvec_frsqrte_d, | ||
121 | }; | ||
122 | -TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) | ||
123 | +static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = { | ||
124 | + gen_helper_gvec_frsqrte_h, | ||
125 | + gen_helper_gvec_frsqrte_rpres_s, | ||
126 | + gen_helper_gvec_frsqrte_d, | ||
127 | +}; | ||
128 | +TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, | ||
129 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte) | ||
130 | |||
131 | static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) | ||
132 | { | ||
133 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
134 | index XXXXXXX..XXXXXXX 100644 | ||
135 | --- a/target/arm/tcg/translate-sve.c | ||
136 | +++ b/target/arm/tcg/translate-sve.c | ||
137 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const frecpe_fns[] = { | ||
138 | NULL, gen_helper_gvec_frecpe_h, | ||
139 | gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, | ||
140 | }; | ||
141 | -TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frecpe_fns[a->esz], a, 0) | ||
142 | +static gen_helper_gvec_2_ptr * const frecpe_rpres_fns[] = { | ||
143 | + NULL, gen_helper_gvec_frecpe_h, | ||
144 | + gen_helper_gvec_frecpe_rpres_s, gen_helper_gvec_frecpe_d, | ||
145 | +}; | ||
146 | +TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, | ||
147 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? | ||
148 | + frecpe_rpres_fns[a->esz] : frecpe_fns[a->esz], a, 0) | ||
149 | |||
150 | static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { | ||
151 | NULL, gen_helper_gvec_frsqrte_h, | ||
152 | gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, | ||
153 | }; | ||
154 | -TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frsqrte_fns[a->esz], a, 0) | ||
155 | +static gen_helper_gvec_2_ptr * const frsqrte_rpres_fns[] = { | ||
156 | + NULL, gen_helper_gvec_frsqrte_h, | ||
157 | + gen_helper_gvec_frsqrte_rpres_s, gen_helper_gvec_frsqrte_d, | ||
158 | +}; | ||
159 | +TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, | ||
160 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? | ||
161 | + frsqrte_rpres_fns[a->esz] : frsqrte_fns[a->esz], a, 0) | ||
162 | |||
163 | /* | ||
164 | *** SVE Floating Point Compare with Zero Group | ||
165 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
166 | index XXXXXXX..XXXXXXX 100644 | ||
167 | --- a/target/arm/tcg/vec_helper.c | ||
168 | +++ b/target/arm/tcg/vec_helper.c | ||
169 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, float_status *stat, uint32_t desc) \ | ||
170 | |||
171 | DO_2OP(gvec_frecpe_h, helper_recpe_f16, float16) | ||
172 | DO_2OP(gvec_frecpe_s, helper_recpe_f32, float32) | ||
173 | +DO_2OP(gvec_frecpe_rpres_s, helper_recpe_rpres_f32, float32) | ||
174 | DO_2OP(gvec_frecpe_d, helper_recpe_f64, float64) | ||
175 | |||
176 | DO_2OP(gvec_frsqrte_h, helper_rsqrte_f16, float16) | ||
177 | DO_2OP(gvec_frsqrte_s, helper_rsqrte_f32, float32) | ||
178 | +DO_2OP(gvec_frsqrte_rpres_s, helper_rsqrte_rpres_f32, float32) | ||
179 | DO_2OP(gvec_frsqrte_d, helper_rsqrte_f64, float64) | ||
180 | |||
181 | DO_2OP(gvec_vrintx_h, float16_round_to_int, float16) | ||
182 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
183 | index XXXXXXX..XXXXXXX 100644 | ||
184 | --- a/target/arm/vfp_helper.c | ||
185 | +++ b/target/arm/vfp_helper.c | ||
186 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst) | ||
187 | return make_float16(f16_val); | ||
188 | } | ||
189 | |||
190 | -float32 HELPER(recpe_f32)(float32 input, float_status *fpst) | ||
191 | +/* | ||
192 | + * FEAT_RPRES means the f32 FRECPE has an "increased precision" variant | ||
193 | + * which is used when FPCR.AH == 1. | ||
194 | + */ | ||
195 | +static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres) | ||
196 | { | ||
197 | float32 f32 = float32_squash_input_denormal(input, fpst); | ||
198 | uint32_t f32_val = float32_val(f32); | ||
199 | @@ -XXX,XX +XXX,XX @@ float32 HELPER(recpe_f32)(float32 input, float_status *fpst) | ||
200 | return make_float32(f32_val); | ||
201 | } | ||
202 | |||
203 | +float32 HELPER(recpe_f32)(float32 input, float_status *fpst) | ||
204 | +{ | ||
205 | + return do_recpe_f32(input, fpst, false); | ||
206 | +} | ||
207 | + | ||
208 | +float32 HELPER(recpe_rpres_f32)(float32 input, float_status *fpst) | ||
209 | +{ | ||
210 | + return do_recpe_f32(input, fpst, true); | ||
211 | +} | ||
212 | + | ||
213 | float64 HELPER(recpe_f64)(float64 input, float_status *fpst) | ||
214 | { | ||
215 | float64 f64 = float64_squash_input_denormal(input, fpst); | ||
216 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s) | ||
217 | return make_float16(val); | ||
218 | } | ||
219 | |||
220 | -float32 HELPER(rsqrte_f32)(float32 input, float_status *s) | ||
221 | +/* | ||
222 | + * FEAT_RPRES means the f32 FRSQRTE has an "increased precision" variant | ||
223 | + * which is used when FPCR.AH == 1. | ||
224 | + */ | ||
225 | +static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres) | ||
226 | { | ||
227 | float32 f32 = float32_squash_input_denormal(input, s); | ||
228 | uint32_t val = float32_val(f32); | ||
229 | @@ -XXX,XX +XXX,XX @@ float32 HELPER(rsqrte_f32)(float32 input, float_status *s) | ||
230 | return make_float32(val); | ||
231 | } | ||
232 | |||
233 | +float32 HELPER(rsqrte_f32)(float32 input, float_status *s) | ||
234 | +{ | ||
235 | + return do_rsqrte_f32(input, s, false); | ||
236 | +} | ||
237 | + | ||
238 | +float32 HELPER(rsqrte_rpres_f32)(float32 input, float_status *s) | ||
239 | +{ | ||
240 | + return do_rsqrte_f32(input, s, true); | ||
241 | +} | ||
242 | + | ||
243 | float64 HELPER(rsqrte_f64)(float64 input, float_status *s) | ||
244 | { | ||
245 | float64 f64 = float64_squash_input_denormal(input, s); | ||
36 | -- | 246 | -- |
37 | 2.25.1 | 247 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Implement the increased precision variation of FRECPE. In the |
---|---|---|---|
2 | pseudocode this corresponds to the handling of the | ||
3 | "increasedprecision" boolean in the FPRecipEstimate() and | ||
4 | RecipEstimate() functions. | ||
2 | 5 | ||
3 | This check is buried within arm_hcr_el2_eff(), but since we | 6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | have to have the explicit check for CPTR_EL2.TZ, we might as | 7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
5 | well just check it once at the beginning of the block. | 8 | --- |
9 | target/arm/vfp_helper.c | 54 +++++++++++++++++++++++++++++++++++------ | ||
10 | 1 file changed, 46 insertions(+), 8 deletions(-) | ||
6 | 11 | ||
7 | Once this is done, we can test HCR_EL2.{E2H,TGE} directly, | 12 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
8 | rather than going through arm_hcr_el2_eff(). | ||
9 | |||
10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Message-id: 20220607203306.657998-9-richard.henderson@linaro.org | ||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
14 | --- | ||
15 | target/arm/helper.c | 13 +++++-------- | ||
16 | 1 file changed, 5 insertions(+), 8 deletions(-) | ||
17 | |||
18 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/target/arm/helper.c | 14 | --- a/target/arm/vfp_helper.c |
21 | +++ b/target/arm/helper.c | 15 | +++ b/target/arm/vfp_helper.c |
22 | @@ -XXX,XX +XXX,XX @@ int sve_exception_el(CPUARMState *env, int el) | 16 | @@ -XXX,XX +XXX,XX @@ static int recip_estimate(int input) |
17 | return r; | ||
18 | } | ||
19 | |||
20 | +/* | ||
21 | + * Increased precision version: | ||
22 | + * input is a 13 bit fixed point number | ||
23 | + * input range 2048 .. 4095 for a number from 0.5 <= x < 1.0. | ||
24 | + * result range 4096 .. 8191 for a number from 1.0 to 2.0 | ||
25 | + */ | ||
26 | +static int recip_estimate_incprec(int input) | ||
27 | +{ | ||
28 | + int a, b, r; | ||
29 | + assert(2048 <= input && input < 4096); | ||
30 | + a = (input * 2) + 1; | ||
31 | + /* | ||
32 | + * The pseudocode expresses this as an operation on infinite | ||
33 | + * precision reals where it calculates 2^25 / a and then looks | ||
34 | + * at the error between that and the rounded-down-to-integer | ||
35 | + * value to see if it should instead round up. We instead | ||
36 | + * follow the same approach as the pseudocode for the 8-bit | ||
37 | + * precision version, and calculate (2 * (2^25 / a)) as an | ||
38 | + * integer so we can do the "add one and halve" to round it. | ||
39 | + * So the 1 << 26 here is correct. | ||
40 | + */ | ||
41 | + b = (1 << 26) / a; | ||
42 | + r = (b + 1) >> 1; | ||
43 | + assert(4096 <= r && r < 8192); | ||
44 | + return r; | ||
45 | +} | ||
46 | + | ||
47 | /* | ||
48 | * Common wrapper to call recip_estimate | ||
49 | * | ||
50 | @@ -XXX,XX +XXX,XX @@ static int recip_estimate(int input) | ||
51 | * callee. | ||
52 | */ | ||
53 | |||
54 | -static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac) | ||
55 | +static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac, | ||
56 | + bool increasedprecision) | ||
57 | { | ||
58 | uint32_t scaled, estimate; | ||
59 | uint64_t result_frac; | ||
60 | @@ -XXX,XX +XXX,XX @@ static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac) | ||
23 | } | 61 | } |
24 | } | 62 | } |
25 | 63 | ||
26 | - /* | 64 | - /* scaled = UInt('1':fraction<51:44>) */ |
27 | - * CPTR_EL2 changes format with HCR_EL2.E2H (regardless of TGE). | 65 | - scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); |
28 | - */ | 66 | - estimate = recip_estimate(scaled); |
29 | - if (el <= 2) { | 67 | + if (increasedprecision) { |
30 | - uint64_t hcr_el2 = arm_hcr_el2_eff(env); | 68 | + /* scaled = UInt('1':fraction<51:41>) */ |
31 | - if (hcr_el2 & HCR_E2H) { | 69 | + scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11)); |
32 | + if (el <= 2 && arm_is_el2_enabled(env)) { | 70 | + estimate = recip_estimate_incprec(scaled); |
33 | + /* CPTR_EL2 changes format with HCR_EL2.E2H (regardless of TGE). */ | 71 | + } else { |
34 | + if (env->cp15.hcr_el2 & HCR_E2H) { | 72 | + /* scaled = UInt('1':fraction<51:44>) */ |
35 | switch (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, ZEN)) { | 73 | + scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); |
36 | case 1: | 74 | + estimate = recip_estimate(scaled); |
37 | - if (el != 0 || !(hcr_el2 & HCR_TGE)) { | 75 | + } |
38 | + if (el != 0 || !(env->cp15.hcr_el2 & HCR_TGE)) { | 76 | |
39 | break; | 77 | result_exp = exp_off - *exp; |
40 | } | 78 | - result_frac = deposit64(0, 44, 8, estimate); |
41 | /* fall through */ | 79 | + if (increasedprecision) { |
42 | @@ -XXX,XX +XXX,XX @@ int sve_exception_el(CPUARMState *env, int el) | 80 | + result_frac = deposit64(0, 40, 12, estimate); |
43 | case 2: | 81 | + } else { |
44 | return 2; | 82 | + result_frac = deposit64(0, 44, 8, estimate); |
45 | } | 83 | + } |
46 | - } else if (arm_is_el2_enabled(env)) { | 84 | if (result_exp == 0) { |
47 | + } else { | 85 | result_frac = deposit64(result_frac >> 1, 51, 1, 1); |
48 | if (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, TZ)) { | 86 | } else if (result_exp == -1) { |
49 | return 2; | 87 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst) |
50 | } | 88 | } |
89 | |||
90 | f64_frac = call_recip_estimate(&f16_exp, 29, | ||
91 | - ((uint64_t) f16_frac) << (52 - 10)); | ||
92 | + ((uint64_t) f16_frac) << (52 - 10), false); | ||
93 | |||
94 | /* result = sign : result_exp<4:0> : fraction<51:42> */ | ||
95 | f16_val = deposit32(0, 15, 1, f16_sign); | ||
96 | @@ -XXX,XX +XXX,XX @@ static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres) | ||
97 | } | ||
98 | |||
99 | f64_frac = call_recip_estimate(&f32_exp, 253, | ||
100 | - ((uint64_t) f32_frac) << (52 - 23)); | ||
101 | + ((uint64_t) f32_frac) << (52 - 23), rpres); | ||
102 | |||
103 | /* result = sign : result_exp<7:0> : fraction<51:29> */ | ||
104 | f32_val = deposit32(0, 31, 1, f32_sign); | ||
105 | @@ -XXX,XX +XXX,XX @@ float64 HELPER(recpe_f64)(float64 input, float_status *fpst) | ||
106 | return float64_set_sign(float64_zero, float64_is_neg(f64)); | ||
107 | } | ||
108 | |||
109 | - f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac); | ||
110 | + f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac, false); | ||
111 | |||
112 | /* result = sign : result_exp<10:0> : fraction<51:0>; */ | ||
113 | f64_val = deposit64(0, 63, 1, f64_sign); | ||
51 | -- | 114 | -- |
52 | 2.25.1 | 115 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Implement the increased precision variation of FRSQRTE. In the |
---|---|---|---|
2 | pseudocode this corresponds to the handling of the | ||
3 | "increasedprecision" boolean in the FPRSqrtEstimate() and | ||
4 | RecipSqrtEstimate() functions. | ||
2 | 5 | ||
3 | Move the data to vec_helper.c and the inline to vec_internal.h. | 6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | --- | ||
9 | target/arm/vfp_helper.c | 77 ++++++++++++++++++++++++++++++++++------- | ||
10 | 1 file changed, 64 insertions(+), 13 deletions(-) | ||
4 | 11 | ||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 12 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-id: 20220607203306.657998-18-richard.henderson@linaro.org | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | --- | ||
10 | target/arm/vec_internal.h | 7 +++++++ | ||
11 | target/arm/sve_helper.c | 29 ----------------------------- | ||
12 | target/arm/vec_helper.c | 26 ++++++++++++++++++++++++++ | ||
13 | 3 files changed, 33 insertions(+), 29 deletions(-) | ||
14 | |||
15 | diff --git a/target/arm/vec_internal.h b/target/arm/vec_internal.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/arm/vec_internal.h | 14 | --- a/target/arm/vfp_helper.c |
18 | +++ b/target/arm/vec_internal.h | 15 | +++ b/target/arm/vfp_helper.c |
19 | @@ -XXX,XX +XXX,XX @@ static inline uint64_t expand_pred_b(uint8_t byte) | 16 | @@ -XXX,XX +XXX,XX @@ static int do_recip_sqrt_estimate(int a) |
20 | return expand_pred_b_data[byte]; | 17 | return estimate; |
21 | } | 18 | } |
22 | 19 | ||
23 | +/* Similarly for half-word elements. */ | 20 | +static int do_recip_sqrt_estimate_incprec(int a) |
24 | +extern const uint64_t expand_pred_h_data[0x55 + 1]; | ||
25 | +static inline uint64_t expand_pred_h(uint8_t byte) | ||
26 | +{ | 21 | +{ |
27 | + return expand_pred_h_data[byte & 0x55]; | 22 | + /* |
23 | + * The Arm ARM describes the 12-bit precision version of RecipSqrtEstimate | ||
24 | + * in terms of an infinite-precision floating point calculation of a | ||
25 | + * square root. We implement this using the same kind of pure integer | ||
26 | + * algorithm as the 8-bit mantissa, to get the same bit-for-bit result. | ||
27 | + */ | ||
28 | + int64_t b, estimate; | ||
29 | |||
30 | -static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac) | ||
31 | + assert(1024 <= a && a < 4096); | ||
32 | + if (a < 2048) { | ||
33 | + a = a * 2 + 1; | ||
34 | + } else { | ||
35 | + a = (a >> 1) << 1; | ||
36 | + a = (a + 1) * 2; | ||
37 | + } | ||
38 | + b = 8192; | ||
39 | + while (a * (b + 1) * (b + 1) < (1ULL << 39)) { | ||
40 | + b += 1; | ||
41 | + } | ||
42 | + estimate = (b + 1) / 2; | ||
43 | + | ||
44 | + assert(4096 <= estimate && estimate < 8192); | ||
45 | + | ||
46 | + return estimate; | ||
28 | +} | 47 | +} |
29 | + | 48 | + |
30 | static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) | 49 | +static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac, |
50 | + bool increasedprecision) | ||
31 | { | 51 | { |
32 | uint64_t *d = vd + opr_sz; | 52 | int estimate; |
33 | diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c | 53 | uint32_t scaled; |
34 | index XXXXXXX..XXXXXXX 100644 | 54 | @@ -XXX,XX +XXX,XX @@ static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac) |
35 | --- a/target/arm/sve_helper.c | 55 | frac = extract64(frac, 0, 51) << 1; |
36 | +++ b/target/arm/sve_helper.c | 56 | } |
37 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(sve_predtest)(void *vd, void *vg, uint32_t words) | 57 | |
38 | return flags; | 58 | - if (*exp & 1) { |
59 | - /* scaled = UInt('01':fraction<51:45>) */ | ||
60 | - scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7)); | ||
61 | + if (increasedprecision) { | ||
62 | + if (*exp & 1) { | ||
63 | + /* scaled = UInt('01':fraction<51:42>) */ | ||
64 | + scaled = deposit32(1 << 10, 0, 10, extract64(frac, 42, 10)); | ||
65 | + } else { | ||
66 | + /* scaled = UInt('1':fraction<51:41>) */ | ||
67 | + scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11)); | ||
68 | + } | ||
69 | + estimate = do_recip_sqrt_estimate_incprec(scaled); | ||
70 | } else { | ||
71 | - /* scaled = UInt('1':fraction<51:44>) */ | ||
72 | - scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); | ||
73 | + if (*exp & 1) { | ||
74 | + /* scaled = UInt('01':fraction<51:45>) */ | ||
75 | + scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7)); | ||
76 | + } else { | ||
77 | + /* scaled = UInt('1':fraction<51:44>) */ | ||
78 | + scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); | ||
79 | + } | ||
80 | + estimate = do_recip_sqrt_estimate(scaled); | ||
81 | } | ||
82 | - estimate = do_recip_sqrt_estimate(scaled); | ||
83 | |||
84 | *exp = (exp_off - *exp) / 2; | ||
85 | - return extract64(estimate, 0, 8) << 44; | ||
86 | + if (increasedprecision) { | ||
87 | + return extract64(estimate, 0, 12) << 40; | ||
88 | + } else { | ||
89 | + return extract64(estimate, 0, 8) << 44; | ||
90 | + } | ||
39 | } | 91 | } |
40 | 92 | ||
41 | -/* Similarly for half-word elements. | 93 | uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s) |
42 | - * for (i = 0; i < 256; ++i) { | 94 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s) |
43 | - * unsigned long m = 0; | 95 | |
44 | - * if (i & 0xaa) { | 96 | f64_frac = ((uint64_t) f16_frac) << (52 - 10); |
45 | - * continue; | 97 | |
46 | - * } | 98 | - f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac); |
47 | - * for (j = 0; j < 8; j += 2) { | 99 | + f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac, false); |
48 | - * if ((i >> j) & 1) { | 100 | |
49 | - * m |= 0xfffful << (j << 3); | 101 | /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */ |
50 | - * } | 102 | val = deposit32(0, 15, 1, f16_sign); |
51 | - * } | 103 | @@ -XXX,XX +XXX,XX @@ static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres) |
52 | - * printf("[0x%x] = 0x%016lx,\n", i, m); | 104 | |
53 | - * } | 105 | f64_frac = ((uint64_t) f32_frac) << 29; |
54 | - */ | 106 | |
55 | -static inline uint64_t expand_pred_h(uint8_t byte) | 107 | - f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac); |
56 | -{ | 108 | + f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac, rpres); |
57 | - static const uint64_t word[] = { | 109 | |
58 | - [0x01] = 0x000000000000ffff, [0x04] = 0x00000000ffff0000, | 110 | - /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */ |
59 | - [0x05] = 0x00000000ffffffff, [0x10] = 0x0000ffff00000000, | 111 | + /* |
60 | - [0x11] = 0x0000ffff0000ffff, [0x14] = 0x0000ffffffff0000, | 112 | + * result = sign : result_exp<7:0> : estimate<7:0> : Zeros(15) |
61 | - [0x15] = 0x0000ffffffffffff, [0x40] = 0xffff000000000000, | 113 | + * or for increased precision |
62 | - [0x41] = 0xffff00000000ffff, [0x44] = 0xffff0000ffff0000, | 114 | + * result = sign : result_exp<7:0> : estimate<11:0> : Zeros(11) |
63 | - [0x45] = 0xffff0000ffffffff, [0x50] = 0xffffffff00000000, | 115 | + */ |
64 | - [0x51] = 0xffffffff0000ffff, [0x54] = 0xffffffffffff0000, | 116 | val = deposit32(0, 31, 1, f32_sign); |
65 | - [0x55] = 0xffffffffffffffff, | 117 | val = deposit32(val, 23, 8, f32_exp); |
66 | - }; | 118 | - val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8)); |
67 | - return word[byte & 0x55]; | 119 | + if (rpres) { |
68 | -} | 120 | + val = deposit32(val, 11, 12, extract64(f64_frac, 52 - 12, 12)); |
69 | - | 121 | + } else { |
70 | /* Similarly for single word elements. */ | 122 | + val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8)); |
71 | static inline uint64_t expand_pred_s(uint8_t byte) | 123 | + } |
72 | { | 124 | return make_float32(val); |
73 | diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c | 125 | } |
74 | index XXXXXXX..XXXXXXX 100644 | 126 | |
75 | --- a/target/arm/vec_helper.c | 127 | @@ -XXX,XX +XXX,XX @@ float64 HELPER(rsqrte_f64)(float64 input, float_status *s) |
76 | +++ b/target/arm/vec_helper.c | 128 | return float64_zero; |
77 | @@ -XXX,XX +XXX,XX @@ const uint64_t expand_pred_b_data[256] = { | 129 | } |
78 | 0xffffffffffffffff, | 130 | |
79 | }; | 131 | - f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac); |
80 | 132 | + f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac, false); | |
81 | +/* | 133 | |
82 | + * Similarly for half-word elements. | 134 | /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */ |
83 | + * for (i = 0; i < 256; ++i) { | 135 | val = deposit64(0, 61, 1, f64_sign); |
84 | + * unsigned long m = 0; | ||
85 | + * if (i & 0xaa) { | ||
86 | + * continue; | ||
87 | + * } | ||
88 | + * for (j = 0; j < 8; j += 2) { | ||
89 | + * if ((i >> j) & 1) { | ||
90 | + * m |= 0xfffful << (j << 3); | ||
91 | + * } | ||
92 | + * } | ||
93 | + * printf("[0x%x] = 0x%016lx,\n", i, m); | ||
94 | + * } | ||
95 | + */ | ||
96 | +const uint64_t expand_pred_h_data[0x55 + 1] = { | ||
97 | + [0x01] = 0x000000000000ffff, [0x04] = 0x00000000ffff0000, | ||
98 | + [0x05] = 0x00000000ffffffff, [0x10] = 0x0000ffff00000000, | ||
99 | + [0x11] = 0x0000ffff0000ffff, [0x14] = 0x0000ffffffff0000, | ||
100 | + [0x15] = 0x0000ffffffffffff, [0x40] = 0xffff000000000000, | ||
101 | + [0x41] = 0xffff00000000ffff, [0x44] = 0xffff0000ffff0000, | ||
102 | + [0x45] = 0xffff0000ffffffff, [0x50] = 0xffffffff00000000, | ||
103 | + [0x51] = 0xffffffff0000ffff, [0x54] = 0xffffffffffff0000, | ||
104 | + [0x55] = 0xffffffffffffffff, | ||
105 | +}; | ||
106 | + | ||
107 | /* Signed saturating rounding doubling multiply-accumulate high half, 8-bit */ | ||
108 | int8_t do_sqrdmlah_b(int8_t src1, int8_t src2, int8_t src3, | ||
109 | bool neg, bool round) | ||
110 | -- | 136 | -- |
111 | 2.25.1 | 137 | 2.34.1 | diff view generated by jsdifflib |
1 | The FEAT_DoubleFault extension adds the following: | 1 | Now the emulation is complete, we can enable FEAT_RPRES for the 'max' |
---|---|---|---|
2 | 2 | CPU type. | |
3 | * All external aborts on instruction fetches and translation table | ||
4 | walks for instruction fetches must be synchronous. For QEMU this | ||
5 | is already true. | ||
6 | |||
7 | * SCR_EL3 has a new bit NMEA which disables the masking of SError | ||
8 | interrupts by PSTATE.A when the SError interrupt is taken to EL3. | ||
9 | For QEMU we only need to make the bit writable, because we have no | ||
10 | sources of SError interrupts. | ||
11 | |||
12 | * SCR_EL3 has a new bit EASE which causes synchronous external | ||
13 | aborts taken to EL3 to be taken at the same entry point as SError. | ||
14 | (Note that this does not mean that they are SErrors for purposes | ||
15 | of PSTATE.A masking or that the syndrome register reports them as | ||
16 | SErrors: it just means that the vector offset is different.) | ||
17 | |||
18 | * The existing SCTLR_EL3.IESB has an effective value of 1 when | ||
19 | SCR_EL3.NMEA is 1. For QEMU this is a no-op because we don't need | ||
20 | different behaviour based on IESB (we don't need to do anything to | ||
21 | ensure that error exceptions are synchronized). | ||
22 | |||
23 | So for QEMU the things we need to change are: | ||
24 | * Make SCR_EL3.{NMEA,EASE} writable | ||
25 | * When taking a synchronous external abort at EL3, adjust the | ||
26 | vector entry point if SCR_EL3.EASE is set | ||
27 | * Advertise the feature in the ID registers | ||
28 | 3 | ||
29 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
30 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
31 | Message-id: 20220531151431.949322-1-peter.maydell@linaro.org | ||
32 | --- | 6 | --- |
33 | docs/system/arm/emulation.rst | 1 + | 7 | docs/system/arm/emulation.rst | 1 + |
34 | target/arm/cpu.h | 5 +++++ | 8 | target/arm/tcg/cpu64.c | 1 + |
35 | target/arm/cpu64.c | 4 ++-- | 9 | 2 files changed, 2 insertions(+) |
36 | target/arm/helper.c | 36 +++++++++++++++++++++++++++++++++++ | ||
37 | 4 files changed, 44 insertions(+), 2 deletions(-) | ||
38 | 10 | ||
39 | diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst | 11 | diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst |
40 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
41 | --- a/docs/system/arm/emulation.rst | 13 | --- a/docs/system/arm/emulation.rst |
42 | +++ b/docs/system/arm/emulation.rst | 14 | +++ b/docs/system/arm/emulation.rst |
43 | @@ -XXX,XX +XXX,XX @@ the following architecture extensions: | 15 | @@ -XXX,XX +XXX,XX @@ the following architecture extensions: |
44 | - FEAT_Debugv8p2 (Debug changes for v8.2) | 16 | - FEAT_RDM (Advanced SIMD rounding double multiply accumulate instructions) |
45 | - FEAT_Debugv8p4 (Debug changes for v8.4) | 17 | - FEAT_RME (Realm Management Extension) (NB: support status in QEMU is experimental) |
46 | - FEAT_DotProd (Advanced SIMD dot product instructions) | 18 | - FEAT_RNG (Random number generator) |
47 | +- FEAT_DoubleFault (Double Fault Extension) | 19 | +- FEAT_RPRES (Increased precision of FRECPE and FRSQRTE) |
48 | - FEAT_FCMA (Floating-point complex number instructions) | 20 | - FEAT_S2FWB (Stage 2 forced Write-Back) |
49 | - FEAT_FHM (Floating-point half-precision multiplication instructions) | 21 | - FEAT_SB (Speculation Barrier) |
50 | - FEAT_FP16 (Half-precision floating-point data processing) | 22 | - FEAT_SEL2 (Secure EL2) |
51 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 23 | diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c |
52 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
53 | --- a/target/arm/cpu.h | 25 | --- a/target/arm/tcg/cpu64.c |
54 | +++ b/target/arm/cpu.h | 26 | +++ b/target/arm/tcg/cpu64.c |
55 | @@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_ras(const ARMISARegisters *id) | 27 | @@ -XXX,XX +XXX,XX @@ void aarch64_max_tcg_initfn(Object *obj) |
56 | return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RAS) != 0; | 28 | cpu->isar.id_aa64isar1 = t; |
57 | } | 29 | |
58 | 30 | t = cpu->isar.id_aa64isar2; | |
59 | +static inline bool isar_feature_aa64_doublefault(const ARMISARegisters *id) | 31 | + t = FIELD_DP64(t, ID_AA64ISAR2, RPRES, 1); /* FEAT_RPRES */ |
60 | +{ | 32 | t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1); /* FEAT_MOPS */ |
61 | + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RAS) >= 2; | 33 | t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1); /* FEAT_HBC */ |
62 | +} | 34 | t = FIELD_DP64(t, ID_AA64ISAR2, WFXT, 2); /* FEAT_WFxT */ |
63 | + | ||
64 | static inline bool isar_feature_aa64_sve(const ARMISARegisters *id) | ||
65 | { | ||
66 | return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SVE) != 0; | ||
67 | diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c | ||
68 | index XXXXXXX..XXXXXXX 100644 | ||
69 | --- a/target/arm/cpu64.c | ||
70 | +++ b/target/arm/cpu64.c | ||
71 | @@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj) | ||
72 | t = cpu->isar.id_aa64pfr0; | ||
73 | t = FIELD_DP64(t, ID_AA64PFR0, FP, 1); /* FEAT_FP16 */ | ||
74 | t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 1); /* FEAT_FP16 */ | ||
75 | - t = FIELD_DP64(t, ID_AA64PFR0, RAS, 1); /* FEAT_RAS */ | ||
76 | + t = FIELD_DP64(t, ID_AA64PFR0, RAS, 2); /* FEAT_RASv1p1 + FEAT_DoubleFault */ | ||
77 | t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1); | ||
78 | t = FIELD_DP64(t, ID_AA64PFR0, SEL2, 1); /* FEAT_SEL2 */ | ||
79 | t = FIELD_DP64(t, ID_AA64PFR0, DIT, 1); /* FEAT_DIT */ | ||
80 | @@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj) | ||
81 | * we do for EL2 with the virtualization=on property. | ||
82 | */ | ||
83 | t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3); /* FEAT_MTE3 */ | ||
84 | - t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 1); /* FEAT_RASv1p1 */ | ||
85 | + t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 0); /* FEAT_RASv1p1 + FEAT_DoubleFault */ | ||
86 | t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_2 */ | ||
87 | cpu->isar.id_aa64pfr1 = t; | ||
88 | |||
89 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
90 | index XXXXXXX..XXXXXXX 100644 | ||
91 | --- a/target/arm/helper.c | ||
92 | +++ b/target/arm/helper.c | ||
93 | @@ -XXX,XX +XXX,XX @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) | ||
94 | if (cpu_isar_feature(aa64_scxtnum, cpu)) { | ||
95 | valid_mask |= SCR_ENSCXT; | ||
96 | } | ||
97 | + if (cpu_isar_feature(aa64_doublefault, cpu)) { | ||
98 | + valid_mask |= SCR_EASE | SCR_NMEA; | ||
99 | + } | ||
100 | } else { | ||
101 | valid_mask &= ~(SCR_RW | SCR_ST); | ||
102 | if (cpu_isar_feature(aa32_ras, cpu)) { | ||
103 | @@ -XXX,XX +XXX,XX @@ static uint32_t cpsr_read_for_spsr_elx(CPUARMState *env) | ||
104 | return ret; | ||
105 | } | ||
106 | |||
107 | +static bool syndrome_is_sync_extabt(uint32_t syndrome) | ||
108 | +{ | ||
109 | + /* Return true if this syndrome value is a synchronous external abort */ | ||
110 | + switch (syn_get_ec(syndrome)) { | ||
111 | + case EC_INSNABORT: | ||
112 | + case EC_INSNABORT_SAME_EL: | ||
113 | + case EC_DATAABORT: | ||
114 | + case EC_DATAABORT_SAME_EL: | ||
115 | + /* Look at fault status code for all the synchronous ext abort cases */ | ||
116 | + switch (syndrome & 0x3f) { | ||
117 | + case 0x10: | ||
118 | + case 0x13: | ||
119 | + case 0x14: | ||
120 | + case 0x15: | ||
121 | + case 0x16: | ||
122 | + case 0x17: | ||
123 | + return true; | ||
124 | + default: | ||
125 | + return false; | ||
126 | + } | ||
127 | + default: | ||
128 | + return false; | ||
129 | + } | ||
130 | +} | ||
131 | + | ||
132 | /* Handle exception entry to a target EL which is using AArch64 */ | ||
133 | static void arm_cpu_do_interrupt_aarch64(CPUState *cs) | ||
134 | { | ||
135 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) | ||
136 | switch (cs->exception_index) { | ||
137 | case EXCP_PREFETCH_ABORT: | ||
138 | case EXCP_DATA_ABORT: | ||
139 | + /* | ||
140 | + * FEAT_DoubleFault allows synchronous external aborts taken to EL3 | ||
141 | + * to be taken to the SError vector entrypoint. | ||
142 | + */ | ||
143 | + if (new_el == 3 && (env->cp15.scr_el3 & SCR_EASE) && | ||
144 | + syndrome_is_sync_extabt(env->exception.syndrome)) { | ||
145 | + addr += 0x180; | ||
146 | + } | ||
147 | env->cp15.far_el[new_el] = env->exception.vaddress; | ||
148 | qemu_log_mask(CPU_LOG_INT, "...with FAR 0x%" PRIx64 "\n", | ||
149 | env->cp15.far_el[new_el]); | ||
150 | -- | 35 | -- |
151 | 2.25.1 | 36 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Move ARMFPStatusFlavour to cpu.h with which to index | ||
4 | this array. For now, place the array in an anonymous | ||
5 | union with the existing structures. Adjust the order | ||
6 | of the existing structures to match the enum. | ||
7 | |||
8 | Simplify fpstatus_ptr() using the new array. | ||
9 | |||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | Message-id: 20220604040607.269301-24-richard.henderson@linaro.org | 11 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 12 | Message-id: 20250129013857.135256-7-richard.henderson@linaro.org |
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
7 | --- | 14 | --- |
8 | target/arm/ptw.h | 1 - | 15 | target/arm/cpu.h | 119 +++++++++++++++++++++---------------- |
9 | target/arm/helper.c | 24 ------------------------ | 16 | target/arm/tcg/translate.h | 64 +------------------- |
10 | target/arm/ptw.c | 22 ++++++++++++++++++++++ | 17 | 2 files changed, 70 insertions(+), 113 deletions(-) |
11 | 3 files changed, 22 insertions(+), 25 deletions(-) | 18 | |
12 | 19 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | |
13 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/arm/ptw.h | 21 | --- a/target/arm/cpu.h |
16 | +++ b/target/arm/ptw.h | 22 | +++ b/target/arm/cpu.h |
17 | @@ -XXX,XX +XXX,XX @@ | 23 | @@ -XXX,XX +XXX,XX @@ typedef struct ARMMMUFaultInfo ARMMMUFaultInfo; |
18 | 24 | ||
19 | #ifndef CONFIG_USER_ONLY | 25 | typedef struct NVICState NVICState; |
20 | 26 | ||
21 | -bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx); | 27 | +/* |
22 | bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx); | 28 | + * Enum for indexing vfp.fp_status[]. |
23 | uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn); | 29 | + * |
24 | 30 | + * FPST_A32: is the "normal" fp status for AArch32 insns | |
25 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 31 | + * FPST_A64: is the "normal" fp status for AArch64 insns |
32 | + * FPST_A32_F16: used for AArch32 half-precision calculations | ||
33 | + * FPST_A64_F16: used for AArch64 half-precision calculations | ||
34 | + * FPST_STD: the ARM "Standard FPSCR Value" | ||
35 | + * FPST_STD_F16: used for half-precision | ||
36 | + * calculations with the ARM "Standard FPSCR Value" | ||
37 | + * FPST_AH: used for the A64 insns which change behaviour | ||
38 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
39 | + * and the reciprocal and square root estimate/step insns) | ||
40 | + * FPST_AH_F16: used for the A64 insns which change behaviour | ||
41 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
42 | + * and the reciprocal and square root estimate/step insns); | ||
43 | + * for half-precision | ||
44 | + * | ||
45 | + * Half-precision operations are governed by a separate | ||
46 | + * flush-to-zero control bit in FPSCR:FZ16. We pass a separate | ||
47 | + * status structure to control this. | ||
48 | + * | ||
49 | + * The "Standard FPSCR", ie default-NaN, flush-to-zero, | ||
50 | + * round-to-nearest and is used by any operations (generally | ||
51 | + * Neon) which the architecture defines as controlled by the | ||
52 | + * standard FPSCR value rather than the FPSCR. | ||
53 | + * | ||
54 | + * The "standard FPSCR but for fp16 ops" is needed because | ||
55 | + * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | ||
56 | + * using a fixed value for it. | ||
57 | + * | ||
58 | + * The ah_fp_status is needed because some insns have different | ||
59 | + * behaviour when FPCR.AH == 1: they don't update cumulative | ||
60 | + * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
61 | + * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
62 | + * which means we need an ah_fp_status_f16 as well. | ||
63 | + * | ||
64 | + * To avoid having to transfer exception bits around, we simply | ||
65 | + * say that the FPSCR cumulative exception flags are the logical | ||
66 | + * OR of the flags in the four fp statuses. This relies on the | ||
67 | + * only thing which needs to read the exception flags being | ||
68 | + * an explicit FPSCR read. | ||
69 | + */ | ||
70 | +typedef enum ARMFPStatusFlavour { | ||
71 | + FPST_A32, | ||
72 | + FPST_A64, | ||
73 | + FPST_A32_F16, | ||
74 | + FPST_A64_F16, | ||
75 | + FPST_AH, | ||
76 | + FPST_AH_F16, | ||
77 | + FPST_STD, | ||
78 | + FPST_STD_F16, | ||
79 | +} ARMFPStatusFlavour; | ||
80 | +#define FPST_COUNT 8 | ||
81 | + | ||
82 | typedef struct CPUArchState { | ||
83 | /* Regs for current mode. */ | ||
84 | uint32_t regs[16]; | ||
85 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
86 | /* Scratch space for aa32 neon expansion. */ | ||
87 | uint32_t scratch[8]; | ||
88 | |||
89 | - /* There are a number of distinct float control structures: | ||
90 | - * | ||
91 | - * fp_status_a32: is the "normal" fp status for AArch32 insns | ||
92 | - * fp_status_a64: is the "normal" fp status for AArch64 insns | ||
93 | - * fp_status_fp16_a32: used for AArch32 half-precision calculations | ||
94 | - * fp_status_fp16_a64: used for AArch64 half-precision calculations | ||
95 | - * standard_fp_status : the ARM "Standard FPSCR Value" | ||
96 | - * standard_fp_status_fp16 : used for half-precision | ||
97 | - * calculations with the ARM "Standard FPSCR Value" | ||
98 | - * ah_fp_status: used for the A64 insns which change behaviour | ||
99 | - * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
100 | - * and the reciprocal and square root estimate/step insns) | ||
101 | - * ah_fp_status_f16: used for the A64 insns which change behaviour | ||
102 | - * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
103 | - * and the reciprocal and square root estimate/step insns); | ||
104 | - * for half-precision | ||
105 | - * | ||
106 | - * Half-precision operations are governed by a separate | ||
107 | - * flush-to-zero control bit in FPSCR:FZ16. We pass a separate | ||
108 | - * status structure to control this. | ||
109 | - * | ||
110 | - * The "Standard FPSCR", ie default-NaN, flush-to-zero, | ||
111 | - * round-to-nearest and is used by any operations (generally | ||
112 | - * Neon) which the architecture defines as controlled by the | ||
113 | - * standard FPSCR value rather than the FPSCR. | ||
114 | - * | ||
115 | - * The "standard FPSCR but for fp16 ops" is needed because | ||
116 | - * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | ||
117 | - * using a fixed value for it. | ||
118 | - * | ||
119 | - * The ah_fp_status is needed because some insns have different | ||
120 | - * behaviour when FPCR.AH == 1: they don't update cumulative | ||
121 | - * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
122 | - * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
123 | - * which means we need an ah_fp_status_f16 as well. | ||
124 | - * | ||
125 | - * To avoid having to transfer exception bits around, we simply | ||
126 | - * say that the FPSCR cumulative exception flags are the logical | ||
127 | - * OR of the flags in the four fp statuses. This relies on the | ||
128 | - * only thing which needs to read the exception flags being | ||
129 | - * an explicit FPSCR read. | ||
130 | - */ | ||
131 | - float_status fp_status_a32; | ||
132 | - float_status fp_status_a64; | ||
133 | - float_status fp_status_f16_a32; | ||
134 | - float_status fp_status_f16_a64; | ||
135 | - float_status standard_fp_status; | ||
136 | - float_status standard_fp_status_f16; | ||
137 | - float_status ah_fp_status; | ||
138 | - float_status ah_fp_status_f16; | ||
139 | + /* There are a number of distinct float control structures. */ | ||
140 | + union { | ||
141 | + float_status fp_status[FPST_COUNT]; | ||
142 | + struct { | ||
143 | + float_status fp_status_a32; | ||
144 | + float_status fp_status_a64; | ||
145 | + float_status fp_status_f16_a32; | ||
146 | + float_status fp_status_f16_a64; | ||
147 | + float_status ah_fp_status; | ||
148 | + float_status ah_fp_status_f16; | ||
149 | + float_status standard_fp_status; | ||
150 | + float_status standard_fp_status_f16; | ||
151 | + }; | ||
152 | + }; | ||
153 | |||
154 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ | ||
155 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ | ||
156 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | ||
26 | index XXXXXXX..XXXXXXX 100644 | 157 | index XXXXXXX..XXXXXXX 100644 |
27 | --- a/target/arm/helper.c | 158 | --- a/target/arm/tcg/translate.h |
28 | +++ b/target/arm/helper.c | 159 | +++ b/target/arm/tcg/translate.h |
29 | @@ -XXX,XX +XXX,XX @@ ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx) | 160 | @@ -XXX,XX +XXX,XX @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb) |
161 | return (CPUARMTBFlags){ tb->flags, tb->cs_base }; | ||
30 | } | 162 | } |
31 | #endif /* !CONFIG_USER_ONLY */ | 163 | |
32 | 164 | -/* | |
33 | -#ifndef CONFIG_USER_ONLY | 165 | - * Enum for argument to fpstatus_ptr(). |
34 | -bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) | 166 | - */ |
35 | -{ | 167 | -typedef enum ARMFPStatusFlavour { |
36 | - switch (mmu_idx) { | 168 | - FPST_A32, |
37 | - case ARMMMUIdx_SE10_0: | 169 | - FPST_A64, |
38 | - case ARMMMUIdx_E20_0: | 170 | - FPST_A32_F16, |
39 | - case ARMMMUIdx_SE20_0: | 171 | - FPST_A64_F16, |
40 | - case ARMMMUIdx_Stage1_E0: | 172 | - FPST_AH, |
41 | - case ARMMMUIdx_Stage1_SE0: | 173 | - FPST_AH_F16, |
42 | - case ARMMMUIdx_MUser: | 174 | - FPST_STD, |
43 | - case ARMMMUIdx_MSUser: | 175 | - FPST_STD_F16, |
44 | - case ARMMMUIdx_MUserNegPri: | 176 | -} ARMFPStatusFlavour; |
45 | - case ARMMMUIdx_MSUserNegPri: | 177 | - |
46 | - return true; | 178 | /** |
179 | * fpstatus_ptr: return TCGv_ptr to the specified fp_status field | ||
180 | * | ||
181 | * We have multiple softfloat float_status fields in the Arm CPU state struct | ||
182 | * (see the comment in cpu.h for details). Return a TCGv_ptr which has | ||
183 | * been set up to point to the requested field in the CPU state struct. | ||
184 | - * The options are: | ||
185 | - * | ||
186 | - * FPST_A32 | ||
187 | - * for AArch32 non-FP16 operations controlled by the FPCR | ||
188 | - * FPST_A64 | ||
189 | - * for AArch64 non-FP16 operations controlled by the FPCR | ||
190 | - * FPST_A32_F16 | ||
191 | - * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
192 | - * FPST_A64_F16 | ||
193 | - * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
194 | - * FPST_AH: | ||
195 | - * for AArch64 operations which change behaviour when AH=1 (specifically, | ||
196 | - * bfloat16 conversions and multiplies, and the reciprocal and square root | ||
197 | - * estimate/step insns) | ||
198 | - * FPST_AH_F16: | ||
199 | - * ditto, but for half-precision operations | ||
200 | - * FPST_STD | ||
201 | - * for A32/T32 Neon operations using the "standard FPSCR value" | ||
202 | - * FPST_STD_F16 | ||
203 | - * as FPST_STD, but where FPCR.FZ16 is to be used | ||
204 | */ | ||
205 | static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) | ||
206 | { | ||
207 | TCGv_ptr statusptr = tcg_temp_new_ptr(); | ||
208 | - int offset; | ||
209 | + int offset = offsetof(CPUARMState, vfp.fp_status[flavour]); | ||
210 | |||
211 | - switch (flavour) { | ||
212 | - case FPST_A32: | ||
213 | - offset = offsetof(CPUARMState, vfp.fp_status_a32); | ||
214 | - break; | ||
215 | - case FPST_A64: | ||
216 | - offset = offsetof(CPUARMState, vfp.fp_status_a64); | ||
217 | - break; | ||
218 | - case FPST_A32_F16: | ||
219 | - offset = offsetof(CPUARMState, vfp.fp_status_f16_a32); | ||
220 | - break; | ||
221 | - case FPST_A64_F16: | ||
222 | - offset = offsetof(CPUARMState, vfp.fp_status_f16_a64); | ||
223 | - break; | ||
224 | - case FPST_AH: | ||
225 | - offset = offsetof(CPUARMState, vfp.ah_fp_status); | ||
226 | - break; | ||
227 | - case FPST_AH_F16: | ||
228 | - offset = offsetof(CPUARMState, vfp.ah_fp_status_f16); | ||
229 | - break; | ||
230 | - case FPST_STD: | ||
231 | - offset = offsetof(CPUARMState, vfp.standard_fp_status); | ||
232 | - break; | ||
233 | - case FPST_STD_F16: | ||
234 | - offset = offsetof(CPUARMState, vfp.standard_fp_status_f16); | ||
235 | - break; | ||
47 | - default: | 236 | - default: |
48 | - return false; | ||
49 | - case ARMMMUIdx_E10_0: | ||
50 | - case ARMMMUIdx_E10_1: | ||
51 | - case ARMMMUIdx_E10_1_PAN: | ||
52 | - g_assert_not_reached(); | 237 | - g_assert_not_reached(); |
53 | - } | 238 | - } |
54 | -} | 239 | tcg_gen_addi_ptr(statusptr, tcg_env, offset); |
55 | -#endif /* !CONFIG_USER_ONLY */ | 240 | return statusptr; |
56 | - | ||
57 | int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx) | ||
58 | { | ||
59 | if (regime_has_2_ranges(mmu_idx)) { | ||
60 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/target/arm/ptw.c | ||
63 | +++ b/target/arm/ptw.c | ||
64 | @@ -XXX,XX +XXX,XX @@ static bool regime_translation_big_endian(CPUARMState *env, ARMMMUIdx mmu_idx) | ||
65 | return (regime_sctlr(env, mmu_idx) & SCTLR_EE) != 0; | ||
66 | } | 241 | } |
67 | |||
68 | +static bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) | ||
69 | +{ | ||
70 | + switch (mmu_idx) { | ||
71 | + case ARMMMUIdx_SE10_0: | ||
72 | + case ARMMMUIdx_E20_0: | ||
73 | + case ARMMMUIdx_SE20_0: | ||
74 | + case ARMMMUIdx_Stage1_E0: | ||
75 | + case ARMMMUIdx_Stage1_SE0: | ||
76 | + case ARMMMUIdx_MUser: | ||
77 | + case ARMMMUIdx_MSUser: | ||
78 | + case ARMMMUIdx_MUserNegPri: | ||
79 | + case ARMMMUIdx_MSUserNegPri: | ||
80 | + return true; | ||
81 | + default: | ||
82 | + return false; | ||
83 | + case ARMMMUIdx_E10_0: | ||
84 | + case ARMMMUIdx_E10_1: | ||
85 | + case ARMMMUIdx_E10_1_PAN: | ||
86 | + g_assert_not_reached(); | ||
87 | + } | ||
88 | +} | ||
89 | + | ||
90 | static bool ptw_attrs_are_device(CPUARMState *env, ARMCacheAttrs cacheattrs) | ||
91 | { | ||
92 | /* | ||
93 | -- | 242 | -- |
94 | 2.25.1 | 243 | 2.34.1 |
244 | |||
245 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | This will be used for both Normal and Streaming SVE, and the value | 3 | Replace with fp_status[FPST_STD_F16]. |
4 | does not necessarily come from ZCR_ELx. While we're at it, emphasize | ||
5 | the units in which the value is returned. | ||
6 | 4 | ||
7 | Patch produced by | ||
8 | git grep -l sve_zcr_len_for_el | \ | ||
9 | xargs -n1 sed -i 's/sve_zcr_len_for_el/sve_vqm1_for_el/g' | ||
10 | |||
11 | and then adding a function comment. | ||
12 | |||
13 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
14 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
15 | Message-id: 20220607203306.657998-13-richard.henderson@linaro.org | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
7 | Message-id: 20250129013857.135256-8-richard.henderson@linaro.org | ||
16 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
17 | --- | 9 | --- |
18 | target/arm/cpu.h | 11 ++++++++++- | 10 | target/arm/cpu.h | 1 - |
19 | target/arm/arch_dump.c | 2 +- | 11 | target/arm/cpu.c | 4 ++-- |
20 | target/arm/cpu.c | 2 +- | 12 | target/arm/tcg/mve_helper.c | 24 ++++++++++++------------ |
21 | target/arm/gdbstub64.c | 2 +- | 13 | target/arm/vfp_helper.c | 8 ++++---- |
22 | target/arm/helper.c | 12 ++++++------ | 14 | 4 files changed, 18 insertions(+), 19 deletions(-) |
23 | 5 files changed, 19 insertions(+), 10 deletions(-) | ||
24 | 15 | ||
25 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 16 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
26 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
27 | --- a/target/arm/cpu.h | 18 | --- a/target/arm/cpu.h |
28 | +++ b/target/arm/cpu.h | 19 | +++ b/target/arm/cpu.h |
29 | @@ -XXX,XX +XXX,XX @@ void aarch64_sync_64_to_32(CPUARMState *env); | 20 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
30 | 21 | float_status ah_fp_status; | |
31 | int fp_exception_el(CPUARMState *env, int cur_el); | 22 | float_status ah_fp_status_f16; |
32 | int sve_exception_el(CPUARMState *env, int cur_el); | 23 | float_status standard_fp_status; |
33 | -uint32_t sve_zcr_len_for_el(CPUARMState *env, int el); | 24 | - float_status standard_fp_status_f16; |
34 | + | 25 | }; |
35 | +/** | 26 | }; |
36 | + * sve_vqm1_for_el: | 27 | |
37 | + * @env: CPUARMState | ||
38 | + * @el: exception level | ||
39 | + * | ||
40 | + * Compute the current SVE vector length for @el, in units of | ||
41 | + * Quadwords Minus 1 -- the same scale used for ZCR_ELx.LEN. | ||
42 | + */ | ||
43 | +uint32_t sve_vqm1_for_el(CPUARMState *env, int el); | ||
44 | |||
45 | static inline bool is_a64(CPUARMState *env) | ||
46 | { | ||
47 | diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c | ||
48 | index XXXXXXX..XXXXXXX 100644 | ||
49 | --- a/target/arm/arch_dump.c | ||
50 | +++ b/target/arm/arch_dump.c | ||
51 | @@ -XXX,XX +XXX,XX @@ static off_t sve_fpcr_offset(uint32_t vq) | ||
52 | |||
53 | static uint32_t sve_current_vq(CPUARMState *env) | ||
54 | { | ||
55 | - return sve_zcr_len_for_el(env, arm_current_el(env)) + 1; | ||
56 | + return sve_vqm1_for_el(env, arm_current_el(env)) + 1; | ||
57 | } | ||
58 | |||
59 | static size_t sve_size_vq(uint32_t vq) | ||
60 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | 28 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
61 | index XXXXXXX..XXXXXXX 100644 | 29 | index XXXXXXX..XXXXXXX 100644 |
62 | --- a/target/arm/cpu.c | 30 | --- a/target/arm/cpu.c |
63 | +++ b/target/arm/cpu.c | 31 | +++ b/target/arm/cpu.c |
64 | @@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) | 32 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
65 | vfp_get_fpcr(env), vfp_get_fpsr(env)); | 33 | set_flush_to_zero(1, &env->vfp.standard_fp_status); |
66 | 34 | set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); | |
67 | if (cpu_isar_feature(aa64_sve, cpu) && sve_exception_el(env, el) == 0) { | 35 | set_default_nan_mode(1, &env->vfp.standard_fp_status); |
68 | - int j, zcr_len = sve_zcr_len_for_el(env, el); | 36 | - set_default_nan_mode(1, &env->vfp.standard_fp_status_f16); |
69 | + int j, zcr_len = sve_vqm1_for_el(env, el); | 37 | + set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); |
70 | 38 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | |
71 | for (i = 0; i <= FFR_PRED_NUM; i++) { | 39 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); |
72 | bool eol; | 40 | arm_set_default_fp_behaviours(&env->vfp.standard_fp_status); |
73 | diff --git a/target/arm/gdbstub64.c b/target/arm/gdbstub64.c | 41 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); |
42 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
43 | - arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16); | ||
44 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
45 | arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | ||
46 | set_flush_to_zero(1, &env->vfp.ah_fp_status); | ||
47 | set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | ||
48 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c | ||
74 | index XXXXXXX..XXXXXXX 100644 | 49 | index XXXXXXX..XXXXXXX 100644 |
75 | --- a/target/arm/gdbstub64.c | 50 | --- a/target/arm/tcg/mve_helper.c |
76 | +++ b/target/arm/gdbstub64.c | 51 | +++ b/target/arm/tcg/mve_helper.c |
77 | @@ -XXX,XX +XXX,XX @@ int arm_gdb_get_svereg(CPUARMState *env, GByteArray *buf, int reg) | 52 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) |
78 | * We report in Vector Granules (VG) which is 64bit in a Z reg | 53 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ |
79 | * while the ZCR works in Vector Quads (VQ) which is 128bit chunks. | 54 | continue; \ |
80 | */ | 55 | } \ |
81 | - int vq = sve_zcr_len_for_el(env, arm_current_el(env)) + 1; | 56 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ |
82 | + int vq = sve_vqm1_for_el(env, arm_current_el(env)) + 1; | 57 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
83 | return gdb_get_reg64(buf, vq * 2); | 58 | &env->vfp.standard_fp_status; \ |
59 | if (!(mask & 1)) { \ | ||
60 | /* We need the result but without updating flags */ \ | ||
61 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) | ||
62 | r[e] = 0; \ | ||
63 | continue; \ | ||
64 | } \ | ||
65 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
66 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
67 | &env->vfp.standard_fp_status; \ | ||
68 | if (!(tm & 1)) { \ | ||
69 | /* We need the result but without updating flags */ \ | ||
70 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) | ||
71 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
72 | continue; \ | ||
73 | } \ | ||
74 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
75 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
76 | &env->vfp.standard_fp_status; \ | ||
77 | if (!(mask & 1)) { \ | ||
78 | /* We need the result but without updating flags */ \ | ||
79 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) | ||
80 | if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \ | ||
81 | continue; \ | ||
82 | } \ | ||
83 | - fpst0 = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
84 | + fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
85 | &env->vfp.standard_fp_status; \ | ||
86 | fpst1 = fpst0; \ | ||
87 | if (!(mask & 1)) { \ | ||
88 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) | ||
89 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
90 | continue; \ | ||
91 | } \ | ||
92 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
93 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
94 | &env->vfp.standard_fp_status; \ | ||
95 | if (!(mask & 1)) { \ | ||
96 | /* We need the result but without updating flags */ \ | ||
97 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) | ||
98 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
99 | continue; \ | ||
100 | } \ | ||
101 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
102 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
103 | &env->vfp.standard_fp_status; \ | ||
104 | if (!(mask & 1)) { \ | ||
105 | /* We need the result but without updating flags */ \ | ||
106 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) | ||
107 | TYPE *m = vm; \ | ||
108 | TYPE ra = (TYPE)ra_in; \ | ||
109 | float_status *fpst = (ESIZE == 2) ? \ | ||
110 | - &env->vfp.standard_fp_status_f16 : \ | ||
111 | + &env->vfp.fp_status[FPST_STD_F16] : \ | ||
112 | &env->vfp.standard_fp_status; \ | ||
113 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
114 | if (mask & 1) { \ | ||
115 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
116 | if ((mask & emask) == 0) { \ | ||
117 | continue; \ | ||
118 | } \ | ||
119 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
120 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
121 | &env->vfp.standard_fp_status; \ | ||
122 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
123 | /* We need the result but without updating flags */ \ | ||
124 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
125 | if ((mask & emask) == 0) { \ | ||
126 | continue; \ | ||
127 | } \ | ||
128 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
129 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
130 | &env->vfp.standard_fp_status; \ | ||
131 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
132 | /* We need the result but without updating flags */ \ | ||
133 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) | ||
134 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
135 | continue; \ | ||
136 | } \ | ||
137 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
138 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
139 | &env->vfp.standard_fp_status; \ | ||
140 | if (!(mask & 1)) { \ | ||
141 | /* We need the result but without updating flags */ \ | ||
142 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) | ||
143 | float_status *fpst; \ | ||
144 | float_status scratch_fpst; \ | ||
145 | float_status *base_fpst = (ESIZE == 2) ? \ | ||
146 | - &env->vfp.standard_fp_status_f16 : \ | ||
147 | + &env->vfp.fp_status[FPST_STD_F16] : \ | ||
148 | &env->vfp.standard_fp_status; \ | ||
149 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ | ||
150 | set_float_rounding_mode(rmode, base_fpst); \ | ||
151 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) | ||
152 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
153 | continue; \ | ||
154 | } \ | ||
155 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
156 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
157 | &env->vfp.standard_fp_status; \ | ||
158 | if (!(mask & 1)) { \ | ||
159 | /* We need the result but without updating flags */ \ | ||
160 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
161 | index XXXXXXX..XXXXXXX 100644 | ||
162 | --- a/target/arm/vfp_helper.c | ||
163 | +++ b/target/arm/vfp_helper.c | ||
164 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
165 | /* FZ16 does not generate an input denormal exception. */ | ||
166 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
167 | & ~float_flag_input_denormal_flushed); | ||
168 | - a32_flags |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) | ||
169 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) | ||
170 | & ~float_flag_input_denormal_flushed); | ||
171 | |||
172 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
173 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
174 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | ||
175 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
176 | set_float_exception_flags(0, &env->vfp.standard_fp_status); | ||
177 | - set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); | ||
178 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
179 | set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
180 | set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
181 | } | ||
182 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
183 | bool ftz_enabled = val & FPCR_FZ16; | ||
184 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
185 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
186 | - set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | ||
187 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
188 | set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
189 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
190 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
191 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | ||
192 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
193 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
84 | } | 194 | } |
85 | default: | 195 | if (changed & FPCR_FZ) { |
86 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
87 | index XXXXXXX..XXXXXXX 100644 | ||
88 | --- a/target/arm/helper.c | ||
89 | +++ b/target/arm/helper.c | ||
90 | @@ -XXX,XX +XXX,XX @@ int sve_exception_el(CPUARMState *env, int el) | ||
91 | /* | ||
92 | * Given that SVE is enabled, return the vector length for EL. | ||
93 | */ | ||
94 | -uint32_t sve_zcr_len_for_el(CPUARMState *env, int el) | ||
95 | +uint32_t sve_vqm1_for_el(CPUARMState *env, int el) | ||
96 | { | ||
97 | ARMCPU *cpu = env_archcpu(env); | ||
98 | uint32_t len = cpu->sve_max_vq - 1; | ||
99 | @@ -XXX,XX +XXX,XX @@ static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri, | ||
100 | uint64_t value) | ||
101 | { | ||
102 | int cur_el = arm_current_el(env); | ||
103 | - int old_len = sve_zcr_len_for_el(env, cur_el); | ||
104 | + int old_len = sve_vqm1_for_el(env, cur_el); | ||
105 | int new_len; | ||
106 | |||
107 | /* Bits other than [3:0] are RAZ/WI. */ | ||
108 | @@ -XXX,XX +XXX,XX @@ static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri, | ||
109 | * Because we arrived here, we know both FP and SVE are enabled; | ||
110 | * otherwise we would have trapped access to the ZCR_ELn register. | ||
111 | */ | ||
112 | - new_len = sve_zcr_len_for_el(env, cur_el); | ||
113 | + new_len = sve_vqm1_for_el(env, cur_el); | ||
114 | if (new_len < old_len) { | ||
115 | aarch64_sve_narrow_vq(env, new_len + 1); | ||
116 | } | ||
117 | @@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, | ||
118 | sve_el = 0; | ||
119 | } | ||
120 | } else if (sve_el == 0) { | ||
121 | - DP_TBFLAG_A64(flags, VL, sve_zcr_len_for_el(env, el)); | ||
122 | + DP_TBFLAG_A64(flags, VL, sve_vqm1_for_el(env, el)); | ||
123 | } | ||
124 | DP_TBFLAG_A64(flags, SVEEXC_EL, sve_el); | ||
125 | } | ||
126 | @@ -XXX,XX +XXX,XX @@ void aarch64_sve_change_el(CPUARMState *env, int old_el, | ||
127 | */ | ||
128 | old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64; | ||
129 | old_len = (old_a64 && !sve_exception_el(env, old_el) | ||
130 | - ? sve_zcr_len_for_el(env, old_el) : 0); | ||
131 | + ? sve_vqm1_for_el(env, old_el) : 0); | ||
132 | new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64; | ||
133 | new_len = (new_a64 && !sve_exception_el(env, new_el) | ||
134 | - ? sve_zcr_len_for_el(env, new_el) : 0); | ||
135 | + ? sve_vqm1_for_el(env, new_el) : 0); | ||
136 | |||
137 | /* When changing vector length, clear inaccessible state. */ | ||
138 | if (new_len < old_len) { | ||
139 | -- | 196 | -- |
140 | 2.25.1 | 197 | 2.34.1 |
198 | |||
199 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | With SME, the vector length does not only come from ZCR_ELx. | 3 | Replace with fp_status[FPST_STD]. |
4 | Comment that this is either NVL or SVL, like the pseudocode. | 4 | |
5 | |||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | Message-id: 20220607203306.657998-2-richard.henderson@linaro.org | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
7 | Message-id: 20250129013857.135256-9-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | --- | 9 | --- |
11 | target/arm/cpu.h | 3 ++- | 10 | target/arm/cpu.h | 1 - |
12 | target/arm/translate-a64.h | 2 +- | 11 | target/arm/cpu.c | 8 ++++---- |
13 | target/arm/translate.h | 2 +- | 12 | target/arm/tcg/mve_helper.c | 28 ++++++++++++++-------------- |
14 | target/arm/helper.c | 2 +- | 13 | target/arm/tcg/vec_helper.c | 4 ++-- |
15 | target/arm/translate-a64.c | 2 +- | 14 | target/arm/vfp_helper.c | 4 ++-- |
16 | target/arm/translate-sve.c | 2 +- | 15 | 5 files changed, 22 insertions(+), 23 deletions(-) |
17 | 6 files changed, 7 insertions(+), 6 deletions(-) | ||
18 | 16 | ||
19 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 17 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
20 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/target/arm/cpu.h | 19 | --- a/target/arm/cpu.h |
22 | +++ b/target/arm/cpu.h | 20 | +++ b/target/arm/cpu.h |
23 | @@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_M32, MVE_NO_PRED, 5, 1) /* Not cached. */ | 21 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
24 | */ | 22 | float_status fp_status_f16_a64; |
25 | FIELD(TBFLAG_A64, TBII, 0, 2) | 23 | float_status ah_fp_status; |
26 | FIELD(TBFLAG_A64, SVEEXC_EL, 2, 2) | 24 | float_status ah_fp_status_f16; |
27 | -FIELD(TBFLAG_A64, ZCR_LEN, 4, 4) | 25 | - float_status standard_fp_status; |
28 | +/* The current vector length, either NVL or SVL. */ | 26 | }; |
29 | +FIELD(TBFLAG_A64, VL, 4, 4) | 27 | }; |
30 | FIELD(TBFLAG_A64, PAUTH_ACTIVE, 8, 1) | 28 | |
31 | FIELD(TBFLAG_A64, BT, 9, 1) | 29 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
32 | FIELD(TBFLAG_A64, BTYPE, 10, 2) /* Not cached. */ | 30 | index XXXXXXX..XXXXXXX 100644 |
33 | diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h | 31 | --- a/target/arm/cpu.c |
34 | index XXXXXXX..XXXXXXX 100644 | 32 | +++ b/target/arm/cpu.c |
35 | --- a/target/arm/translate-a64.h | 33 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
36 | +++ b/target/arm/translate-a64.h | 34 | env->sau.ctrl = 0; |
37 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr vec_full_reg_ptr(DisasContext *s, int regno) | 35 | } |
38 | /* Return the byte size of the "whole" vector register, VL / 8. */ | 36 | |
39 | static inline int vec_full_reg_size(DisasContext *s) | 37 | - set_flush_to_zero(1, &env->vfp.standard_fp_status); |
40 | { | 38 | - set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); |
41 | - return s->sve_len; | 39 | - set_default_nan_mode(1, &env->vfp.standard_fp_status); |
42 | + return s->vl; | 40 | + set_flush_to_zero(1, &env->vfp.fp_status[FPST_STD]); |
41 | + set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]); | ||
42 | + set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); | ||
43 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); | ||
44 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | ||
45 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
46 | - arm_set_default_fp_behaviours(&env->vfp.standard_fp_status); | ||
47 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
48 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
49 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
50 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
51 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/target/arm/tcg/mve_helper.c | ||
54 | +++ b/target/arm/tcg/mve_helper.c | ||
55 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) | ||
56 | continue; \ | ||
57 | } \ | ||
58 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
59 | - &env->vfp.standard_fp_status; \ | ||
60 | + &env->vfp.fp_status[FPST_STD]; \ | ||
61 | if (!(mask & 1)) { \ | ||
62 | /* We need the result but without updating flags */ \ | ||
63 | scratch_fpst = *fpst; \ | ||
64 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) | ||
65 | continue; \ | ||
66 | } \ | ||
67 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
68 | - &env->vfp.standard_fp_status; \ | ||
69 | + &env->vfp.fp_status[FPST_STD]; \ | ||
70 | if (!(tm & 1)) { \ | ||
71 | /* We need the result but without updating flags */ \ | ||
72 | scratch_fpst = *fpst; \ | ||
73 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) | ||
74 | continue; \ | ||
75 | } \ | ||
76 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
77 | - &env->vfp.standard_fp_status; \ | ||
78 | + &env->vfp.fp_status[FPST_STD]; \ | ||
79 | if (!(mask & 1)) { \ | ||
80 | /* We need the result but without updating flags */ \ | ||
81 | scratch_fpst = *fpst; \ | ||
82 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) | ||
83 | continue; \ | ||
84 | } \ | ||
85 | fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
86 | - &env->vfp.standard_fp_status; \ | ||
87 | + &env->vfp.fp_status[FPST_STD]; \ | ||
88 | fpst1 = fpst0; \ | ||
89 | if (!(mask & 1)) { \ | ||
90 | scratch_fpst = *fpst0; \ | ||
91 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) | ||
92 | continue; \ | ||
93 | } \ | ||
94 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
95 | - &env->vfp.standard_fp_status; \ | ||
96 | + &env->vfp.fp_status[FPST_STD]; \ | ||
97 | if (!(mask & 1)) { \ | ||
98 | /* We need the result but without updating flags */ \ | ||
99 | scratch_fpst = *fpst; \ | ||
100 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) | ||
101 | continue; \ | ||
102 | } \ | ||
103 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
104 | - &env->vfp.standard_fp_status; \ | ||
105 | + &env->vfp.fp_status[FPST_STD]; \ | ||
106 | if (!(mask & 1)) { \ | ||
107 | /* We need the result but without updating flags */ \ | ||
108 | scratch_fpst = *fpst; \ | ||
109 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) | ||
110 | TYPE ra = (TYPE)ra_in; \ | ||
111 | float_status *fpst = (ESIZE == 2) ? \ | ||
112 | &env->vfp.fp_status[FPST_STD_F16] : \ | ||
113 | - &env->vfp.standard_fp_status; \ | ||
114 | + &env->vfp.fp_status[FPST_STD]; \ | ||
115 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
116 | if (mask & 1) { \ | ||
117 | TYPE v = m[H##ESIZE(e)]; \ | ||
118 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
119 | continue; \ | ||
120 | } \ | ||
121 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
122 | - &env->vfp.standard_fp_status; \ | ||
123 | + &env->vfp.fp_status[FPST_STD]; \ | ||
124 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
125 | /* We need the result but without updating flags */ \ | ||
126 | scratch_fpst = *fpst; \ | ||
127 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
128 | continue; \ | ||
129 | } \ | ||
130 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
131 | - &env->vfp.standard_fp_status; \ | ||
132 | + &env->vfp.fp_status[FPST_STD]; \ | ||
133 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
134 | /* We need the result but without updating flags */ \ | ||
135 | scratch_fpst = *fpst; \ | ||
136 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) | ||
137 | continue; \ | ||
138 | } \ | ||
139 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
140 | - &env->vfp.standard_fp_status; \ | ||
141 | + &env->vfp.fp_status[FPST_STD]; \ | ||
142 | if (!(mask & 1)) { \ | ||
143 | /* We need the result but without updating flags */ \ | ||
144 | scratch_fpst = *fpst; \ | ||
145 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) | ||
146 | float_status scratch_fpst; \ | ||
147 | float_status *base_fpst = (ESIZE == 2) ? \ | ||
148 | &env->vfp.fp_status[FPST_STD_F16] : \ | ||
149 | - &env->vfp.standard_fp_status; \ | ||
150 | + &env->vfp.fp_status[FPST_STD]; \ | ||
151 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ | ||
152 | set_float_rounding_mode(rmode, base_fpst); \ | ||
153 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
154 | @@ -XXX,XX +XXX,XX @@ static void do_vcvt_sh(CPUARMState *env, void *vd, void *vm, int top) | ||
155 | unsigned e; | ||
156 | float_status *fpst; | ||
157 | float_status scratch_fpst; | ||
158 | - float_status *base_fpst = &env->vfp.standard_fp_status; | ||
159 | + float_status *base_fpst = &env->vfp.fp_status[FPST_STD]; | ||
160 | bool old_fz = get_flush_to_zero(base_fpst); | ||
161 | set_flush_to_zero(false, base_fpst); | ||
162 | for (e = 0; e < 16 / 4; e++, mask >>= 4) { | ||
163 | @@ -XXX,XX +XXX,XX @@ static void do_vcvt_hs(CPUARMState *env, void *vd, void *vm, int top) | ||
164 | unsigned e; | ||
165 | float_status *fpst; | ||
166 | float_status scratch_fpst; | ||
167 | - float_status *base_fpst = &env->vfp.standard_fp_status; | ||
168 | + float_status *base_fpst = &env->vfp.fp_status[FPST_STD]; | ||
169 | bool old_fiz = get_flush_inputs_to_zero(base_fpst); | ||
170 | set_flush_inputs_to_zero(false, base_fpst); | ||
171 | for (e = 0; e < 16 / 4; e++, mask >>= 4) { | ||
172 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) | ||
173 | continue; \ | ||
174 | } \ | ||
175 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
176 | - &env->vfp.standard_fp_status; \ | ||
177 | + &env->vfp.fp_status[FPST_STD]; \ | ||
178 | if (!(mask & 1)) { \ | ||
179 | /* We need the result but without updating flags */ \ | ||
180 | scratch_fpst = *fpst; \ | ||
181 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
182 | index XXXXXXX..XXXXXXX 100644 | ||
183 | --- a/target/arm/tcg/vec_helper.c | ||
184 | +++ b/target/arm/tcg/vec_helper.c | ||
185 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
186 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
187 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
188 | |||
189 | - do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, | ||
190 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
191 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
43 | } | 192 | } |
44 | 193 | ||
45 | bool disas_sve(DisasContext *, uint32_t); | 194 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, |
46 | diff --git a/target/arm/translate.h b/target/arm/translate.h | 195 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
47 | index XXXXXXX..XXXXXXX 100644 | 196 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; |
48 | --- a/target/arm/translate.h | 197 | |
49 | +++ b/target/arm/translate.h | 198 | - do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, |
50 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { | 199 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, |
51 | bool ns; /* Use non-secure CPREG bank on access */ | 200 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); |
52 | int fp_excp_el; /* FP exception EL or 0 if enabled */ | ||
53 | int sve_excp_el; /* SVE exception EL or 0 if enabled */ | ||
54 | - int sve_len; /* SVE vector length in bytes */ | ||
55 | + int vl; /* current vector length in bytes */ | ||
56 | /* Flag indicating that exceptions from secure mode are routed to EL3. */ | ||
57 | bool secure_routed_to_el3; | ||
58 | bool vfp_enabled; /* FP enabled via FPSCR.EN */ | ||
59 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
60 | index XXXXXXX..XXXXXXX 100644 | ||
61 | --- a/target/arm/helper.c | ||
62 | +++ b/target/arm/helper.c | ||
63 | @@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, | ||
64 | zcr_len = sve_zcr_len_for_el(env, el); | ||
65 | } | ||
66 | DP_TBFLAG_A64(flags, SVEEXC_EL, sve_el); | ||
67 | - DP_TBFLAG_A64(flags, ZCR_LEN, zcr_len); | ||
68 | + DP_TBFLAG_A64(flags, VL, zcr_len); | ||
69 | } | ||
70 | |||
71 | sctlr = regime_sctlr(env, stage1); | ||
72 | diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c | ||
73 | index XXXXXXX..XXXXXXX 100644 | ||
74 | --- a/target/arm/translate-a64.c | ||
75 | +++ b/target/arm/translate-a64.c | ||
76 | @@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, | ||
77 | dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); | ||
78 | dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); | ||
79 | dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); | ||
80 | - dc->sve_len = (EX_TBFLAG_A64(tb_flags, ZCR_LEN) + 1) * 16; | ||
81 | + dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; | ||
82 | dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); | ||
83 | dc->bt = EX_TBFLAG_A64(tb_flags, BT); | ||
84 | dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); | ||
85 | diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c | ||
86 | index XXXXXXX..XXXXXXX 100644 | ||
87 | --- a/target/arm/translate-sve.c | ||
88 | +++ b/target/arm/translate-sve.c | ||
89 | @@ -XXX,XX +XXX,XX @@ static inline int pred_full_reg_offset(DisasContext *s, int regno) | ||
90 | /* Return the byte size of the whole predicate register, VL / 64. */ | ||
91 | static inline int pred_full_reg_size(DisasContext *s) | ||
92 | { | ||
93 | - return s->sve_len >> 3; | ||
94 | + return s->vl >> 3; | ||
95 | } | 201 | } |
96 | 202 | ||
97 | /* Round up the size of a register to a size allowed by | 203 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
204 | index XXXXXXX..XXXXXXX 100644 | ||
205 | --- a/target/arm/vfp_helper.c | ||
206 | +++ b/target/arm/vfp_helper.c | ||
207 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
208 | uint32_t a32_flags = 0, a64_flags = 0; | ||
209 | |||
210 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
211 | - a32_flags |= get_float_exception_flags(&env->vfp.standard_fp_status); | ||
212 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); | ||
213 | /* FZ16 does not generate an input denormal exception. */ | ||
214 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
215 | & ~float_flag_input_denormal_flushed); | ||
216 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
217 | set_float_exception_flags(0, &env->vfp.fp_status_a64); | ||
218 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | ||
219 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
220 | - set_float_exception_flags(0, &env->vfp.standard_fp_status); | ||
221 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
222 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
223 | set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
224 | set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
98 | -- | 225 | -- |
99 | 2.25.1 | 226 | 2.34.1 |
227 | |||
228 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | This register is allocated from the existing block of id registers, | 3 | Replace with fp_status[FPST_AH_F16]. |
4 | so it is already RES0 for cpus that do not implement SME. | ||
5 | 4 | ||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | Message-id: 20220607203306.657998-21-richard.henderson@linaro.org | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
7 | Message-id: 20250129013857.135256-10-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | --- | 9 | --- |
11 | target/arm/cpu.h | 25 +++++++++++++++++++++++++ | 10 | target/arm/cpu.h | 3 +-- |
12 | target/arm/helper.c | 4 ++-- | 11 | target/arm/cpu.c | 2 +- |
13 | target/arm/kvm64.c | 11 +++++++---- | 12 | target/arm/vfp_helper.c | 10 +++++----- |
14 | 3 files changed, 34 insertions(+), 6 deletions(-) | 13 | 3 files changed, 7 insertions(+), 8 deletions(-) |
15 | 14 | ||
16 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 15 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
17 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/target/arm/cpu.h | 17 | --- a/target/arm/cpu.h |
19 | +++ b/target/arm/cpu.h | 18 | +++ b/target/arm/cpu.h |
20 | @@ -XXX,XX +XXX,XX @@ struct ArchCPU { | 19 | @@ -XXX,XX +XXX,XX @@ typedef struct NVICState NVICState; |
21 | uint64_t id_aa64dfr0; | 20 | * behaviour when FPCR.AH == 1: they don't update cumulative |
22 | uint64_t id_aa64dfr1; | 21 | * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and |
23 | uint64_t id_aa64zfr0; | 22 | * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, |
24 | + uint64_t id_aa64smfr0; | 23 | - * which means we need an ah_fp_status_f16 as well. |
25 | uint64_t reset_pmcr_el0; | 24 | + * which means we need an FPST_AH_F16 as well. |
26 | } isar; | 25 | * |
27 | uint64_t midr; | 26 | * To avoid having to transfer exception bits around, we simply |
28 | @@ -XXX,XX +XXX,XX @@ FIELD(ID_AA64ZFR0, I8MM, 44, 4) | 27 | * say that the FPSCR cumulative exception flags are the logical |
29 | FIELD(ID_AA64ZFR0, F32MM, 52, 4) | 28 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
30 | FIELD(ID_AA64ZFR0, F64MM, 56, 4) | 29 | float_status fp_status_f16_a32; |
31 | 30 | float_status fp_status_f16_a64; | |
32 | +FIELD(ID_AA64SMFR0, F32F32, 32, 1) | 31 | float_status ah_fp_status; |
33 | +FIELD(ID_AA64SMFR0, B16F32, 34, 1) | 32 | - float_status ah_fp_status_f16; |
34 | +FIELD(ID_AA64SMFR0, F16F32, 35, 1) | 33 | }; |
35 | +FIELD(ID_AA64SMFR0, I8I32, 36, 4) | 34 | }; |
36 | +FIELD(ID_AA64SMFR0, F64F64, 48, 1) | 35 | |
37 | +FIELD(ID_AA64SMFR0, I16I64, 52, 4) | 36 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
38 | +FIELD(ID_AA64SMFR0, SMEVER, 56, 4) | 37 | index XXXXXXX..XXXXXXX 100644 |
39 | +FIELD(ID_AA64SMFR0, FA64, 63, 1) | 38 | --- a/target/arm/cpu.c |
40 | + | 39 | +++ b/target/arm/cpu.c |
41 | FIELD(ID_DFR0, COPDBG, 0, 4) | 40 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
42 | FIELD(ID_DFR0, COPSDBG, 4, 4) | 41 | arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); |
43 | FIELD(ID_DFR0, MMAPDBG, 8, 4) | 42 | set_flush_to_zero(1, &env->vfp.ah_fp_status); |
44 | @@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_sve_f64mm(const ARMISARegisters *id) | 43 | set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); |
45 | return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, F64MM) != 0; | 44 | - arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status_f16); |
45 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]); | ||
46 | |||
47 | #ifndef CONFIG_USER_ONLY | ||
48 | if (kvm_enabled()) { | ||
49 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/target/arm/vfp_helper.c | ||
52 | +++ b/target/arm/vfp_helper.c | ||
53 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
54 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
55 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
56 | /* | ||
57 | - * We do not merge in flags from ah_fp_status or ah_fp_status_f16, because | ||
58 | + * We do not merge in flags from ah_fp_status or FPST_AH_F16, because | ||
59 | * they are used for insns that must not set the cumulative exception bits. | ||
60 | */ | ||
61 | |||
62 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
63 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
64 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
65 | set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
66 | - set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
67 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]); | ||
46 | } | 68 | } |
47 | 69 | ||
48 | +static inline bool isar_feature_aa64_sme_f64f64(const ARMISARegisters *id) | 70 | static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) |
49 | +{ | 71 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
50 | + return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, F64F64); | 72 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
51 | +} | 73 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); |
52 | + | 74 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); |
53 | +static inline bool isar_feature_aa64_sme_i16i64(const ARMISARegisters *id) | 75 | - set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); |
54 | +{ | 76 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
55 | + return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, I16I64) == 0xf; | 77 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
56 | +} | 78 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); |
57 | + | 79 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); |
58 | +static inline bool isar_feature_aa64_sme_fa64(const ARMISARegisters *id) | 80 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); |
59 | +{ | 81 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
60 | + return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, FA64); | 82 | } |
61 | +} | 83 | if (changed & FPCR_FZ) { |
62 | + | 84 | bool ftz_enabled = val & FPCR_FZ; |
63 | /* | 85 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
64 | * Feature tests for "does this exist in either 32-bit or 64-bit?" | 86 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); |
65 | */ | 87 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); |
66 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 88 | set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); |
67 | index XXXXXXX..XXXXXXX 100644 | 89 | - set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status_f16); |
68 | --- a/target/arm/helper.c | 90 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
69 | +++ b/target/arm/helper.c | 91 | } |
70 | @@ -XXX,XX +XXX,XX @@ void register_cp_regs_for_features(ARMCPU *cpu) | 92 | if (changed & FPCR_AH) { |
71 | .access = PL1_R, .type = ARM_CP_CONST, | 93 | bool ah_enabled = val & FPCR_AH; |
72 | .accessfn = access_aa64_tid3, | ||
73 | .resetvalue = cpu->isar.id_aa64zfr0 }, | ||
74 | - { .name = "ID_AA64PFR5_EL1_RESERVED", .state = ARM_CP_STATE_AA64, | ||
75 | + { .name = "ID_AA64SMFR0_EL1", .state = ARM_CP_STATE_AA64, | ||
76 | .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 5, | ||
77 | .access = PL1_R, .type = ARM_CP_CONST, | ||
78 | .accessfn = access_aa64_tid3, | ||
79 | - .resetvalue = 0 }, | ||
80 | + .resetvalue = cpu->isar.id_aa64smfr0 }, | ||
81 | { .name = "ID_AA64PFR6_EL1_RESERVED", .state = ARM_CP_STATE_AA64, | ||
82 | .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 6, | ||
83 | .access = PL1_R, .type = ARM_CP_CONST, | ||
84 | diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c | ||
85 | index XXXXXXX..XXXXXXX 100644 | ||
86 | --- a/target/arm/kvm64.c | ||
87 | +++ b/target/arm/kvm64.c | ||
88 | @@ -XXX,XX +XXX,XX @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) | ||
89 | } else { | ||
90 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1, | ||
91 | ARM64_SYS_REG(3, 0, 0, 4, 1)); | ||
92 | + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64smfr0, | ||
93 | + ARM64_SYS_REG(3, 0, 0, 4, 5)); | ||
94 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0, | ||
95 | ARM64_SYS_REG(3, 0, 0, 5, 0)); | ||
96 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1, | ||
97 | @@ -XXX,XX +XXX,XX @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) | ||
98 | ahcf->isar.id_aa64pfr0 = t; | ||
99 | |||
100 | /* | ||
101 | - * Before v5.1, KVM did not support SVE and did not expose | ||
102 | - * ID_AA64ZFR0_EL1 even as RAZ. After v5.1, KVM still does | ||
103 | - * not expose the register to "user" requests like this | ||
104 | - * unless the host supports SVE. | ||
105 | + * There is a range of kernels between kernel commit 73433762fcae | ||
106 | + * and f81cb2c3ad41 which have a bug where the kernel doesn't expose | ||
107 | + * SYS_ID_AA64ZFR0_EL1 via the ONE_REG API unless the VM has enabled | ||
108 | + * SVE support, so we only read it here, rather than together with all | ||
109 | + * the other ID registers earlier. | ||
110 | */ | ||
111 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64zfr0, | ||
112 | ARM64_SYS_REG(3, 0, 0, 4, 4)); | ||
113 | -- | 94 | -- |
114 | 2.25.1 | 95 | 2.34.1 |
96 | |||
97 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | This will be used for implementing FEAT_SME. | 3 | Replace with fp_status[FPST_AH]. |
4 | 4 | ||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | Message-id: 20220607203306.657998-20-richard.henderson@linaro.org | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
7 | Message-id: 20250129013857.135256-11-richard.henderson@linaro.org | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
9 | --- | 9 | --- |
10 | target/arm/cpu.h | 5 +++++ | 10 | target/arm/cpu.h | 3 +-- |
11 | 1 file changed, 5 insertions(+) | 11 | target/arm/cpu.c | 6 +++--- |
12 | target/arm/vfp_helper.c | 6 +++--- | ||
13 | 3 files changed, 7 insertions(+), 8 deletions(-) | ||
12 | 14 | ||
13 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 15 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
14 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/arm/cpu.h | 17 | --- a/target/arm/cpu.h |
16 | +++ b/target/arm/cpu.h | 18 | +++ b/target/arm/cpu.h |
17 | @@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_mte(const ARMISARegisters *id) | 19 | @@ -XXX,XX +XXX,XX @@ typedef struct NVICState NVICState; |
18 | return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 2; | 20 | * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than |
21 | * using a fixed value for it. | ||
22 | * | ||
23 | - * The ah_fp_status is needed because some insns have different | ||
24 | + * FPST_AH is needed because some insns have different | ||
25 | * behaviour when FPCR.AH == 1: they don't update cumulative | ||
26 | * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
27 | * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
28 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
29 | float_status fp_status_a64; | ||
30 | float_status fp_status_f16_a32; | ||
31 | float_status fp_status_f16_a64; | ||
32 | - float_status ah_fp_status; | ||
33 | }; | ||
34 | }; | ||
35 | |||
36 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/arm/cpu.c | ||
39 | +++ b/target/arm/cpu.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
41 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
42 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
43 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
44 | - arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | ||
45 | - set_flush_to_zero(1, &env->vfp.ah_fp_status); | ||
46 | - set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | ||
47 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); | ||
48 | + set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]); | ||
49 | + set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_AH]); | ||
50 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]); | ||
51 | |||
52 | #ifndef CONFIG_USER_ONLY | ||
53 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
54 | index XXXXXXX..XXXXXXX 100644 | ||
55 | --- a/target/arm/vfp_helper.c | ||
56 | +++ b/target/arm/vfp_helper.c | ||
57 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
58 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
59 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
60 | /* | ||
61 | - * We do not merge in flags from ah_fp_status or FPST_AH_F16, because | ||
62 | + * We do not merge in flags from FPST_AH or FPST_AH_F16, because | ||
63 | * they are used for insns that must not set the cumulative exception bits. | ||
64 | */ | ||
65 | |||
66 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
67 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
68 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
69 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
70 | - set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
71 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]); | ||
72 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]); | ||
19 | } | 73 | } |
20 | 74 | ||
21 | +static inline bool isar_feature_aa64_sme(const ARMISARegisters *id) | 75 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
22 | +{ | 76 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); |
23 | + return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, SME) != 0; | 77 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); |
24 | +} | 78 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); |
25 | + | 79 | - set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); |
26 | static inline bool isar_feature_aa64_pmu_8_1(const ARMISARegisters *id) | 80 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); |
27 | { | 81 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
28 | return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 && | 82 | } |
83 | if (changed & FPCR_AH) { | ||
29 | -- | 84 | -- |
30 | 2.25.1 | 85 | 2.34.1 |
86 | |||
87 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Replace with fp_status[FPST_A64_F16]. | ||
4 | |||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | Message-id: 20220604040607.269301-25-richard.henderson@linaro.org | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 7 | Message-id: 20250129013857.135256-12-richard.henderson@linaro.org |
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
7 | --- | 9 | --- |
8 | target/arm/ptw.h | 1 - | 10 | target/arm/cpu.h | 1 - |
9 | target/arm/helper.c | 16 ---------------- | 11 | target/arm/cpu.c | 2 +- |
10 | target/arm/ptw.c | 16 ++++++++++++++++ | 12 | target/arm/tcg/sme_helper.c | 2 +- |
11 | 3 files changed, 16 insertions(+), 17 deletions(-) | 13 | target/arm/tcg/vec_helper.c | 9 ++++----- |
14 | target/arm/vfp_helper.c | 16 ++++++++-------- | ||
15 | 5 files changed, 14 insertions(+), 16 deletions(-) | ||
12 | 16 | ||
13 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | 17 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
14 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/arm/ptw.h | 19 | --- a/target/arm/cpu.h |
16 | +++ b/target/arm/ptw.h | 20 | +++ b/target/arm/cpu.h |
17 | @@ -XXX,XX +XXX,XX @@ | 21 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
18 | #ifndef CONFIG_USER_ONLY | 22 | float_status fp_status_a32; |
19 | 23 | float_status fp_status_a64; | |
20 | bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx); | 24 | float_status fp_status_f16_a32; |
21 | -uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn); | 25 | - float_status fp_status_f16_a64; |
22 | 26 | }; | |
23 | #endif /* !CONFIG_USER_ONLY */ | 27 | }; |
24 | #endif /* TARGET_ARM_PTW_H */ | 28 | |
25 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 29 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
26 | index XXXXXXX..XXXXXXX 100644 | 30 | index XXXXXXX..XXXXXXX 100644 |
27 | --- a/target/arm/helper.c | 31 | --- a/target/arm/cpu.c |
28 | +++ b/target/arm/helper.c | 32 | +++ b/target/arm/cpu.c |
29 | @@ -XXX,XX +XXX,XX @@ bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx) | 33 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
30 | return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0; | 34 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); |
35 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
36 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
37 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
38 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
39 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
40 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); | ||
41 | set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]); | ||
42 | diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/arm/tcg/sme_helper.c | ||
45 | +++ b/target/arm/tcg/sme_helper.c | ||
46 | @@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, | ||
47 | * produces default NaNs. We also need a second copy of fp_status with | ||
48 | * round-to-odd -- see above. | ||
49 | */ | ||
50 | - fpst_f16 = env->vfp.fp_status_f16_a64; | ||
51 | + fpst_f16 = env->vfp.fp_status[FPST_A64_F16]; | ||
52 | fpst_std = env->vfp.fp_status_a64; | ||
53 | set_default_nan_mode(true, &fpst_std); | ||
54 | set_default_nan_mode(true, &fpst_f16); | ||
55 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/arm/tcg/vec_helper.c | ||
58 | +++ b/target/arm/tcg/vec_helper.c | ||
59 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
60 | } | ||
61 | } | ||
62 | do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
63 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); | ||
64 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
31 | } | 65 | } |
32 | 66 | ||
33 | -/* Return the TTBR associated with this translation regime */ | 67 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
34 | -uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn) | 68 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
35 | -{ | 69 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
36 | - if (mmu_idx == ARMMMUIdx_Stage2) { | 70 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
37 | - return env->cp15.vttbr_el2; | 71 | float_status *status = &env->vfp.fp_status_a64; |
38 | - } | 72 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); |
39 | - if (mmu_idx == ARMMMUIdx_Stage2_S) { | 73 | + bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); |
40 | - return env->cp15.vsttbr_el2; | 74 | int negx = 0, negf = 0; |
41 | - } | 75 | |
42 | - if (ttbrn == 0) { | 76 | if (is_s) { |
43 | - return env->cp15.ttbr0_el[regime_el(env, mmu_idx)]; | 77 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
44 | - } else { | 78 | } |
45 | - return env->cp15.ttbr1_el[regime_el(env, mmu_idx)]; | 79 | } |
46 | - } | 80 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, |
47 | -} | 81 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); |
82 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
83 | } | ||
84 | |||
85 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
86 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
87 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
88 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | ||
89 | float_status *status = &env->vfp.fp_status_a64; | ||
90 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); | ||
91 | + bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
92 | int negx = 0, negf = 0; | ||
93 | |||
94 | if (is_s) { | ||
95 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
96 | negx = 0x8000; | ||
97 | } | ||
98 | } | ||
48 | - | 99 | - |
49 | /* Convert a possible stage1+2 MMU index into the appropriate | 100 | for (i = 0; i < oprsz; i += 16) { |
50 | * stage 1 MMU index | 101 | float16 mm_16 = *(float16 *)(vm + i + idx); |
51 | */ | 102 | float32 mm = float16_to_float32_by_bits(mm_16, fz16); |
52 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | 103 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
53 | index XXXXXXX..XXXXXXX 100644 | 104 | index XXXXXXX..XXXXXXX 100644 |
54 | --- a/target/arm/ptw.c | 105 | --- a/target/arm/vfp_helper.c |
55 | +++ b/target/arm/ptw.c | 106 | +++ b/target/arm/vfp_helper.c |
56 | @@ -XXX,XX +XXX,XX @@ static bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) | 107 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) |
108 | & ~float_flag_input_denormal_flushed); | ||
109 | |||
110 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
111 | - a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
112 | + a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16]) | ||
113 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
114 | /* | ||
115 | * We do not merge in flags from FPST_AH or FPST_AH_F16, because | ||
116 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
117 | set_float_exception_flags(0, &env->vfp.fp_status_a32); | ||
118 | set_float_exception_flags(0, &env->vfp.fp_status_a64); | ||
119 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | ||
120 | - set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
121 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | ||
122 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
123 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
124 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]); | ||
125 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
126 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
127 | set_float_rounding_mode(i, &env->vfp.fp_status_a64); | ||
128 | set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32); | ||
129 | - set_float_rounding_mode(i, &env->vfp.fp_status_f16_a64); | ||
130 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
57 | } | 131 | } |
58 | } | 132 | if (changed & FPCR_FZ16) { |
59 | 133 | bool ftz_enabled = val & FPCR_FZ16; | |
60 | +/* Return the TTBR associated with this translation regime */ | 134 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
61 | +static uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn) | 135 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); |
62 | +{ | 136 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); |
63 | + if (mmu_idx == ARMMMUIdx_Stage2) { | 137 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); |
64 | + return env->cp15.vttbr_el2; | 138 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
65 | + } | 139 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
66 | + if (mmu_idx == ARMMMUIdx_Stage2_S) { | 140 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); |
67 | + return env->cp15.vsttbr_el2; | 141 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); |
68 | + } | 142 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); |
69 | + if (ttbrn == 0) { | 143 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
70 | + return env->cp15.ttbr0_el[regime_el(env, mmu_idx)]; | 144 | } |
71 | + } else { | 145 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
72 | + return env->cp15.ttbr1_el[regime_el(env, mmu_idx)]; | 146 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); |
73 | + } | 147 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); |
74 | +} | 148 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); |
75 | + | 149 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); |
76 | static bool ptw_attrs_are_device(CPUARMState *env, ARMCacheAttrs cacheattrs) | 150 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); |
77 | { | 151 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); |
152 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
153 | } | ||
154 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
155 | if (ah_enabled) { | ||
156 | /* Change behaviours for A64 FP operations */ | ||
157 | arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); | ||
158 | - arm_set_ah_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
159 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
160 | } else { | ||
161 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
162 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
163 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
164 | } | ||
165 | } | ||
78 | /* | 166 | /* |
79 | -- | 167 | -- |
80 | 2.25.1 | 168 | 2.34.1 |
169 | |||
170 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | The use of ARM_CPU to recover env from cs calls | 3 | Replace with fp_status[FPST_A32_F16]. |
4 | object_class_dynamic_cast, which shows up on the profile. | ||
5 | This is pointless, because all callers already have env, and | ||
6 | the reverse operation, env_cpu, is only pointer arithmetic. | ||
7 | 4 | ||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
9 | Message-id: 20220604040607.269301-29-richard.henderson@linaro.org | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 7 | Message-id: 20250129013857.135256-13-richard.henderson@linaro.org |
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
12 | --- | 9 | --- |
13 | target/arm/ptw.c | 23 +++++++++-------------- | 10 | target/arm/cpu.h | 1 - |
14 | 1 file changed, 9 insertions(+), 14 deletions(-) | 11 | target/arm/cpu.c | 2 +- |
12 | target/arm/tcg/vec_helper.c | 4 ++-- | ||
13 | target/arm/vfp_helper.c | 14 +++++++------- | ||
14 | 4 files changed, 10 insertions(+), 11 deletions(-) | ||
15 | 15 | ||
16 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | 16 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
17 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/target/arm/ptw.c | 18 | --- a/target/arm/cpu.h |
19 | +++ b/target/arm/ptw.c | 19 | +++ b/target/arm/cpu.h |
20 | @@ -XXX,XX +XXX,XX @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx, | 20 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
21 | struct { | ||
22 | float_status fp_status_a32; | ||
23 | float_status fp_status_a64; | ||
24 | - float_status fp_status_f16_a32; | ||
25 | }; | ||
26 | }; | ||
27 | |||
28 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/arm/cpu.c | ||
31 | +++ b/target/arm/cpu.c | ||
32 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
33 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | ||
34 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
35 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
36 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
37 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); | ||
38 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
39 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
40 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); | ||
41 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/arm/tcg/vec_helper.c | ||
44 | +++ b/target/arm/tcg/vec_helper.c | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
46 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
47 | |||
48 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
49 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
50 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); | ||
21 | } | 51 | } |
22 | 52 | ||
23 | /* All loads done in the course of a page table walk go through here. */ | 53 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
24 | -static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure, | 54 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, |
25 | +static uint32_t arm_ldl_ptw(CPUARMState *env, hwaddr addr, bool is_secure, | 55 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; |
26 | ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi) | 56 | |
27 | { | 57 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, |
28 | - ARMCPU *cpu = ARM_CPU(cs); | 58 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); |
29 | - CPUARMState *env = &cpu->env; | 59 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); |
30 | + CPUState *cs = env_cpu(env); | ||
31 | MemTxAttrs attrs = {}; | ||
32 | MemTxResult result = MEMTX_OK; | ||
33 | AddressSpace *as; | ||
34 | @@ -XXX,XX +XXX,XX @@ static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure, | ||
35 | return 0; | ||
36 | } | 60 | } |
37 | 61 | ||
38 | -static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure, | 62 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
39 | +static uint64_t arm_ldq_ptw(CPUARMState *env, hwaddr addr, bool is_secure, | 63 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
40 | ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi) | 64 | index XXXXXXX..XXXXXXX 100644 |
41 | { | 65 | --- a/target/arm/vfp_helper.c |
42 | - ARMCPU *cpu = ARM_CPU(cs); | 66 | +++ b/target/arm/vfp_helper.c |
43 | - CPUARMState *env = &cpu->env; | 67 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) |
44 | + CPUState *cs = env_cpu(env); | 68 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); |
45 | MemTxAttrs attrs = {}; | 69 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); |
46 | MemTxResult result = MEMTX_OK; | 70 | /* FZ16 does not generate an input denormal exception. */ |
47 | AddressSpace *as; | 71 | - a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) |
48 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_v5(CPUARMState *env, uint32_t address, | 72 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16]) |
49 | target_ulong *page_size, | 73 | & ~float_flag_input_denormal_flushed); |
50 | ARMMMUFaultInfo *fi) | 74 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) |
51 | { | 75 | & ~float_flag_input_denormal_flushed); |
52 | - CPUState *cs = env_cpu(env); | 76 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) |
53 | int level = 1; | 77 | */ |
54 | uint32_t table; | 78 | set_float_exception_flags(0, &env->vfp.fp_status_a32); |
55 | uint32_t desc; | 79 | set_float_exception_flags(0, &env->vfp.fp_status_a64); |
56 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_v5(CPUARMState *env, uint32_t address, | 80 | - set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); |
57 | fi->type = ARMFault_Translation; | 81 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); |
58 | goto do_fault; | 82 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); |
83 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
84 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
85 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
86 | } | ||
87 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
88 | set_float_rounding_mode(i, &env->vfp.fp_status_a64); | ||
89 | - set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32); | ||
90 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); | ||
91 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
59 | } | 92 | } |
60 | - desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx), | 93 | if (changed & FPCR_FZ16) { |
61 | + desc = arm_ldl_ptw(env, table, regime_is_secure(env, mmu_idx), | 94 | bool ftz_enabled = val & FPCR_FZ16; |
62 | mmu_idx, fi); | 95 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
63 | if (fi->type != ARMFault_None) { | 96 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); |
64 | goto do_fault; | 97 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); |
65 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_v5(CPUARMState *env, uint32_t address, | 98 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); |
66 | /* Fine pagetable. */ | 99 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
67 | table = (desc & 0xfffff000) | ((address >> 8) & 0xffc); | 100 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
68 | } | 101 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); |
69 | - desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx), | 102 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); |
70 | + desc = arm_ldl_ptw(env, table, regime_is_secure(env, mmu_idx), | 103 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); |
71 | mmu_idx, fi); | 104 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
72 | if (fi->type != ARMFault_None) { | 105 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
73 | goto do_fault; | 106 | bool dnan_enabled = val & FPCR_DN; |
74 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_v6(CPUARMState *env, uint32_t address, | 107 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); |
75 | hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot, | 108 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); |
76 | target_ulong *page_size, ARMMMUFaultInfo *fi) | 109 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); |
77 | { | 110 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); |
78 | - CPUState *cs = env_cpu(env); | 111 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); |
79 | ARMCPU *cpu = env_archcpu(env); | 112 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); |
80 | int level = 1; | 113 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
81 | uint32_t table; | 114 | @@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ |
82 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_v6(CPUARMState *env, uint32_t address, | 115 | softfloat_to_vfp_compare(env, \ |
83 | fi->type = ARMFault_Translation; | 116 | FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ |
84 | goto do_fault; | 117 | } |
85 | } | 118 | -DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16_a32) |
86 | - desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx), | 119 | +DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) |
87 | + desc = arm_ldl_ptw(env, table, regime_is_secure(env, mmu_idx), | 120 | DO_VFP_cmp(s, float32, float32, fp_status_a32) |
88 | mmu_idx, fi); | 121 | DO_VFP_cmp(d, float64, float64, fp_status_a32) |
89 | if (fi->type != ARMFault_None) { | 122 | #undef DO_VFP_cmp |
90 | goto do_fault; | ||
91 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_v6(CPUARMState *env, uint32_t address, | ||
92 | ns = extract32(desc, 3, 1); | ||
93 | /* Lookup l2 entry. */ | ||
94 | table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc); | ||
95 | - desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx), | ||
96 | + desc = arm_ldl_ptw(env, table, regime_is_secure(env, mmu_idx), | ||
97 | mmu_idx, fi); | ||
98 | if (fi->type != ARMFault_None) { | ||
99 | goto do_fault; | ||
100 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, | ||
101 | ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) | ||
102 | { | ||
103 | ARMCPU *cpu = env_archcpu(env); | ||
104 | - CPUState *cs = CPU(cpu); | ||
105 | /* Read an LPAE long-descriptor translation table. */ | ||
106 | ARMFaultType fault_type = ARMFault_Translation; | ||
107 | uint32_t level; | ||
108 | @@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, | ||
109 | descaddr |= (address >> (stride * (4 - level))) & indexmask; | ||
110 | descaddr &= ~7ULL; | ||
111 | nstable = extract32(tableattrs, 4, 1); | ||
112 | - descriptor = arm_ldq_ptw(cs, descaddr, !nstable, mmu_idx, fi); | ||
113 | + descriptor = arm_ldq_ptw(env, descaddr, !nstable, mmu_idx, fi); | ||
114 | if (fi->type != ARMFault_None) { | ||
115 | goto do_fault; | ||
116 | } | ||
117 | -- | 123 | -- |
118 | 2.25.1 | 124 | 2.34.1 |
125 | |||
126 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Add an interface function to extract the digested vector length | 3 | Replace with fp_status[FPST_A64]. |
4 | rather than the raw zcr_el[1] value. This fixes an incorrect | ||
5 | return from do_prctl_set_vl where we didn't take into account | ||
6 | the set of vector lengths supported by the cpu. | ||
7 | 4 | ||
8 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
10 | Message-id: 20220607203306.657998-3-richard.henderson@linaro.org | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
7 | Message-id: 20250129013857.135256-14-richard.henderson@linaro.org | ||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
12 | --- | 9 | --- |
13 | linux-user/aarch64/target_prctl.h | 20 +++++++++++++------- | 10 | target/arm/cpu.h | 1 - |
14 | target/arm/cpu.h | 11 +++++++++++ | 11 | target/arm/cpu.c | 2 +- |
15 | linux-user/aarch64/signal.c | 4 ++-- | 12 | target/arm/tcg/sme_helper.c | 2 +- |
16 | 3 files changed, 26 insertions(+), 9 deletions(-) | 13 | target/arm/tcg/vec_helper.c | 10 +++++----- |
14 | target/arm/vfp_helper.c | 16 ++++++++-------- | ||
15 | 5 files changed, 15 insertions(+), 16 deletions(-) | ||
17 | 16 | ||
18 | diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/linux-user/aarch64/target_prctl.h | ||
21 | +++ b/linux-user/aarch64/target_prctl.h | ||
22 | @@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_get_vl(CPUArchState *env) | ||
23 | { | ||
24 | ARMCPU *cpu = env_archcpu(env); | ||
25 | if (cpu_isar_feature(aa64_sve, cpu)) { | ||
26 | - return ((cpu->env.vfp.zcr_el[1] & 0xf) + 1) * 16; | ||
27 | + return sve_vq(env) * 16; | ||
28 | } | ||
29 | return -TARGET_EINVAL; | ||
30 | } | ||
31 | @@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_set_vl(CPUArchState *env, abi_long arg2) | ||
32 | */ | ||
33 | if (cpu_isar_feature(aa64_sve, env_archcpu(env)) | ||
34 | && arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) { | ||
35 | - ARMCPU *cpu = env_archcpu(env); | ||
36 | uint32_t vq, old_vq; | ||
37 | |||
38 | - old_vq = (env->vfp.zcr_el[1] & 0xf) + 1; | ||
39 | - vq = MAX(arg2 / 16, 1); | ||
40 | - vq = MIN(vq, cpu->sve_max_vq); | ||
41 | + old_vq = sve_vq(env); | ||
42 | |||
43 | + /* | ||
44 | + * Bound the value of arg2, so that we know that it fits into | ||
45 | + * the 4-bit field in ZCR_EL1. Rely on the hflags rebuild to | ||
46 | + * sort out the length supported by the cpu. | ||
47 | + */ | ||
48 | + vq = MAX(arg2 / 16, 1); | ||
49 | + vq = MIN(vq, ARM_MAX_VQ); | ||
50 | + env->vfp.zcr_el[1] = vq - 1; | ||
51 | + arm_rebuild_hflags(env); | ||
52 | + | ||
53 | + vq = sve_vq(env); | ||
54 | if (vq < old_vq) { | ||
55 | aarch64_sve_narrow_vq(env, vq); | ||
56 | } | ||
57 | - env->vfp.zcr_el[1] = vq - 1; | ||
58 | - arm_rebuild_hflags(env); | ||
59 | return vq * 16; | ||
60 | } | ||
61 | return -TARGET_EINVAL; | ||
62 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 17 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
63 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
64 | --- a/target/arm/cpu.h | 19 | --- a/target/arm/cpu.h |
65 | +++ b/target/arm/cpu.h | 20 | +++ b/target/arm/cpu.h |
66 | @@ -XXX,XX +XXX,XX @@ static inline int cpu_mmu_index(CPUARMState *env, bool ifetch) | 21 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
67 | return EX_TBFLAG_ANY(env->hflags, MMUIDX); | 22 | float_status fp_status[FPST_COUNT]; |
23 | struct { | ||
24 | float_status fp_status_a32; | ||
25 | - float_status fp_status_a64; | ||
26 | }; | ||
27 | }; | ||
28 | |||
29 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/target/arm/cpu.c | ||
32 | +++ b/target/arm/cpu.c | ||
33 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
34 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); | ||
35 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); | ||
36 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | ||
37 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
38 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | ||
39 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
40 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); | ||
41 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
42 | diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/arm/tcg/sme_helper.c | ||
45 | +++ b/target/arm/tcg/sme_helper.c | ||
46 | @@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, | ||
47 | * round-to-odd -- see above. | ||
48 | */ | ||
49 | fpst_f16 = env->vfp.fp_status[FPST_A64_F16]; | ||
50 | - fpst_std = env->vfp.fp_status_a64; | ||
51 | + fpst_std = env->vfp.fp_status[FPST_A64]; | ||
52 | set_default_nan_mode(true, &fpst_std); | ||
53 | set_default_nan_mode(true, &fpst_f16); | ||
54 | fpst_odd = fpst_std; | ||
55 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/arm/tcg/vec_helper.c | ||
58 | +++ b/target/arm/tcg/vec_helper.c | ||
59 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
60 | negx = 0x8000800080008000ull; | ||
61 | } | ||
62 | } | ||
63 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
64 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
65 | get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
68 | } | 66 | } |
69 | 67 | ||
70 | +/** | 68 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
71 | + * sve_vq | 69 | intptr_t i, oprsz = simd_oprsz(desc); |
72 | + * @env: the cpu context | 70 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
73 | + * | 71 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
74 | + * Return the VL cached within env->hflags, in units of quadwords. | 72 | - float_status *status = &env->vfp.fp_status_a64; |
75 | + */ | 73 | + float_status *status = &env->vfp.fp_status[FPST_A64]; |
76 | +static inline int sve_vq(CPUARMState *env) | 74 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); |
77 | +{ | 75 | int negx = 0, negf = 0; |
78 | + return EX_TBFLAG_A64(env->hflags, VL) + 1; | 76 | |
79 | +} | 77 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
80 | + | 78 | negx = 0x8000800080008000ull; |
81 | static inline bool bswap_code(bool sctlr_b) | 79 | } |
82 | { | 80 | } |
83 | #ifdef CONFIG_USER_ONLY | 81 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, |
84 | diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c | 82 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, |
83 | get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
84 | } | ||
85 | |||
86 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
87 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
88 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
89 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | ||
90 | - float_status *status = &env->vfp.fp_status_a64; | ||
91 | + float_status *status = &env->vfp.fp_status[FPST_A64]; | ||
92 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
93 | int negx = 0, negf = 0; | ||
94 | |||
95 | @@ -XXX,XX +XXX,XX @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp) | ||
96 | */ | ||
97 | bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF; | ||
98 | |||
99 | - *statusp = is_a64(env) ? env->vfp.fp_status_a64 : env->vfp.fp_status_a32; | ||
100 | + *statusp = is_a64(env) ? env->vfp.fp_status[FPST_A64] : env->vfp.fp_status_a32; | ||
101 | set_default_nan_mode(true, statusp); | ||
102 | |||
103 | if (ebf) { | ||
104 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
85 | index XXXXXXX..XXXXXXX 100644 | 105 | index XXXXXXX..XXXXXXX 100644 |
86 | --- a/linux-user/aarch64/signal.c | 106 | --- a/target/arm/vfp_helper.c |
87 | +++ b/linux-user/aarch64/signal.c | 107 | +++ b/target/arm/vfp_helper.c |
88 | @@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env, | 108 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) |
89 | 109 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) | |
90 | case TARGET_SVE_MAGIC: | 110 | & ~float_flag_input_denormal_flushed); |
91 | if (cpu_isar_feature(aa64_sve, env_archcpu(env))) { | 111 | |
92 | - vq = (env->vfp.zcr_el[1] & 0xf) + 1; | 112 | - a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); |
93 | + vq = sve_vq(env); | 113 | + a64_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A64]); |
94 | sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16); | 114 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16]) |
95 | if (!sve && size == sve_size) { | 115 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); |
96 | sve = (struct target_sve_context *)ctx; | 116 | /* |
97 | @@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka, | 117 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) |
98 | 118 | * be the architecturally up-to-date exception flag information first. | |
99 | /* SVE state needs saving only if it exists. */ | 119 | */ |
100 | if (cpu_isar_feature(aa64_sve, env_archcpu(env))) { | 120 | set_float_exception_flags(0, &env->vfp.fp_status_a32); |
101 | - vq = (env->vfp.zcr_el[1] & 0xf) + 1; | 121 | - set_float_exception_flags(0, &env->vfp.fp_status_a64); |
102 | + vq = sve_vq(env); | 122 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]); |
103 | sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16); | 123 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); |
104 | sve_ofs = alloc_sigframe_space(sve_size, &layout); | 124 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); |
125 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
126 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
127 | break; | ||
128 | } | ||
129 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
130 | - set_float_rounding_mode(i, &env->vfp.fp_status_a64); | ||
131 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]); | ||
132 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); | ||
133 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
134 | } | ||
135 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
136 | if (changed & FPCR_FZ) { | ||
137 | bool ftz_enabled = val & FPCR_FZ; | ||
138 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
139 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64); | ||
140 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]); | ||
141 | /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ | ||
142 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
143 | } | ||
144 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
145 | */ | ||
146 | bool fitz_enabled = (val & FPCR_FIZ) || | ||
147 | (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ; | ||
148 | - set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status_a64); | ||
149 | + set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_A64]); | ||
150 | } | ||
151 | if (changed & FPCR_DN) { | ||
152 | bool dnan_enabled = val & FPCR_DN; | ||
153 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
154 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
155 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]); | ||
156 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
157 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
158 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | ||
159 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
160 | |||
161 | if (ah_enabled) { | ||
162 | /* Change behaviours for A64 FP operations */ | ||
163 | - arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); | ||
164 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | ||
165 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
166 | } else { | ||
167 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
168 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | ||
169 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
170 | } | ||
105 | } | 171 | } |
106 | -- | 172 | -- |
107 | 2.25.1 | 173 | 2.34.1 |
174 | |||
175 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | We don't need to constrain the value set in zcr_el[1], | 3 | Replace with fp_status[FPST_A32]. As this was the last of the |
4 | because it will be done by sve_zcr_len_for_el. | 4 | old structures, we can remove the anonymous union and struct. |
5 | 5 | ||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | Message-id: 20220607203306.657998-10-richard.henderson@linaro.org | 7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
8 | Message-id: 20250129013857.135256-15-richard.henderson@linaro.org | ||
9 | [PMM: tweak to account for change to is_ebf()] | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | --- | 11 | --- |
11 | target/arm/cpu.c | 3 +-- | 12 | target/arm/cpu.h | 7 +------ |
12 | 1 file changed, 1 insertion(+), 2 deletions(-) | 13 | target/arm/cpu.c | 2 +- |
14 | target/arm/tcg/vec_helper.c | 2 +- | ||
15 | target/arm/vfp_helper.c | 18 +++++++++--------- | ||
16 | 4 files changed, 12 insertions(+), 17 deletions(-) | ||
13 | 17 | ||
18 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/arm/cpu.h | ||
21 | +++ b/target/arm/cpu.h | ||
22 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
23 | uint32_t scratch[8]; | ||
24 | |||
25 | /* There are a number of distinct float control structures. */ | ||
26 | - union { | ||
27 | - float_status fp_status[FPST_COUNT]; | ||
28 | - struct { | ||
29 | - float_status fp_status_a32; | ||
30 | - }; | ||
31 | - }; | ||
32 | + float_status fp_status[FPST_COUNT]; | ||
33 | |||
34 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ | ||
35 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ | ||
14 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | 36 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
15 | index XXXXXXX..XXXXXXX 100644 | 37 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/arm/cpu.c | 38 | --- a/target/arm/cpu.c |
17 | +++ b/target/arm/cpu.c | 39 | +++ b/target/arm/cpu.c |
18 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev) | 40 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
19 | CPACR_EL1, ZEN, 3); | 41 | set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]); |
20 | /* with reasonable vector length */ | 42 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); |
21 | if (cpu_isar_feature(aa64_sve, cpu)) { | 43 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); |
22 | - env->vfp.zcr_el[1] = | 44 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); |
23 | - aarch64_sve_zcr_get_valid_len(cpu, cpu->sve_default_vq - 1); | 45 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32]); |
24 | + env->vfp.zcr_el[1] = cpu->sve_default_vq - 1; | 46 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); |
47 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
48 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); | ||
49 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/target/arm/tcg/vec_helper.c | ||
52 | +++ b/target/arm/tcg/vec_helper.c | ||
53 | @@ -XXX,XX +XXX,XX @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp) | ||
54 | */ | ||
55 | bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF; | ||
56 | |||
57 | - *statusp = is_a64(env) ? env->vfp.fp_status[FPST_A64] : env->vfp.fp_status_a32; | ||
58 | + *statusp = env->vfp.fp_status[is_a64(env) ? FPST_A64 : FPST_A32]; | ||
59 | set_default_nan_mode(true, statusp); | ||
60 | |||
61 | if (ebf) { | ||
62 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/arm/vfp_helper.c | ||
65 | +++ b/target/arm/vfp_helper.c | ||
66 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
67 | { | ||
68 | uint32_t a32_flags = 0, a64_flags = 0; | ||
69 | |||
70 | - a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
71 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A32]); | ||
72 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); | ||
73 | /* FZ16 does not generate an input denormal exception. */ | ||
74 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16]) | ||
75 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
76 | * values. The caller should have arranged for env->vfp.fpsr to | ||
77 | * be the architecturally up-to-date exception flag information first. | ||
78 | */ | ||
79 | - set_float_exception_flags(0, &env->vfp.fp_status_a32); | ||
80 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32]); | ||
81 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]); | ||
82 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); | ||
83 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | ||
84 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
85 | i = float_round_to_zero; | ||
86 | break; | ||
25 | } | 87 | } |
88 | - set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
89 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32]); | ||
90 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]); | ||
91 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); | ||
92 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
93 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
94 | } | ||
95 | if (changed & FPCR_FZ) { | ||
96 | bool ftz_enabled = val & FPCR_FZ; | ||
97 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
98 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); | ||
99 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]); | ||
100 | /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ | ||
101 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
102 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); | ||
103 | } | ||
104 | if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { | ||
26 | /* | 105 | /* |
27 | * Enable 48-bit address space (TODO: take reserved_va into account). | 106 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
107 | } | ||
108 | if (changed & FPCR_DN) { | ||
109 | bool dnan_enabled = val & FPCR_DN; | ||
110 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
111 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32]); | ||
112 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]); | ||
113 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
114 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
115 | @@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ | ||
116 | FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ | ||
117 | } | ||
118 | DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) | ||
119 | -DO_VFP_cmp(s, float32, float32, fp_status_a32) | ||
120 | -DO_VFP_cmp(d, float64, float64, fp_status_a32) | ||
121 | +DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32]) | ||
122 | +DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32]) | ||
123 | #undef DO_VFP_cmp | ||
124 | |||
125 | /* Integer to float and float to integer conversions */ | ||
126 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(fjcvtzs)(float64 value, float_status *status) | ||
127 | |||
128 | uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env) | ||
129 | { | ||
130 | - uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status_a32); | ||
131 | + uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status[FPST_A32]); | ||
132 | uint32_t result = pair; | ||
133 | uint32_t z = (pair >> 32) == 0; | ||
134 | |||
28 | -- | 135 | -- |
29 | 2.25.1 | 136 | 2.34.1 |
137 | |||
138 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | The ARM pseudocode function NVL uses this predicate now, | 3 | Select on index instead of pointer. |
4 | and I think it's a bit clearer. Simplify the pseudocode | 4 | No functional change. |
5 | condition by noting that IsInHost is always false for EL1. | ||
6 | 5 | ||
7 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
9 | Message-id: 20220607203306.657998-7-richard.henderson@linaro.org | 7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
8 | Message-id: 20250129013857.135256-16-richard.henderson@linaro.org | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
11 | --- | 10 | --- |
12 | target/arm/helper.c | 3 +-- | 11 | target/arm/tcg/mve_helper.c | 40 +++++++++++++------------------------ |
13 | 1 file changed, 1 insertion(+), 2 deletions(-) | 12 | 1 file changed, 14 insertions(+), 26 deletions(-) |
14 | 13 | ||
15 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 14 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c |
16 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/arm/helper.c | 16 | --- a/target/arm/tcg/mve_helper.c |
18 | +++ b/target/arm/helper.c | 17 | +++ b/target/arm/tcg/mve_helper.c |
19 | @@ -XXX,XX +XXX,XX @@ uint32_t sve_zcr_len_for_el(CPUARMState *env, int el) | 18 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) |
20 | ARMCPU *cpu = env_archcpu(env); | 19 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ |
21 | uint32_t zcr_len = cpu->sve_max_vq - 1; | 20 | continue; \ |
22 | 21 | } \ | |
23 | - if (el <= 1 && | 22 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
24 | - (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) { | 23 | - &env->vfp.fp_status[FPST_STD]; \ |
25 | + if (el <= 1 && !el_is_in_host(env, el)) { | 24 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
26 | zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]); | 25 | if (!(mask & 1)) { \ |
27 | } | 26 | /* We need the result but without updating flags */ \ |
28 | if (el <= 2 && arm_feature(env, ARM_FEATURE_EL2)) { | 27 | scratch_fpst = *fpst; \ |
28 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) | ||
29 | r[e] = 0; \ | ||
30 | continue; \ | ||
31 | } \ | ||
32 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
33 | - &env->vfp.fp_status[FPST_STD]; \ | ||
34 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
35 | if (!(tm & 1)) { \ | ||
36 | /* We need the result but without updating flags */ \ | ||
37 | scratch_fpst = *fpst; \ | ||
38 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) | ||
39 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
40 | continue; \ | ||
41 | } \ | ||
42 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
43 | - &env->vfp.fp_status[FPST_STD]; \ | ||
44 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
45 | if (!(mask & 1)) { \ | ||
46 | /* We need the result but without updating flags */ \ | ||
47 | scratch_fpst = *fpst; \ | ||
48 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) | ||
49 | if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \ | ||
50 | continue; \ | ||
51 | } \ | ||
52 | - fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
53 | - &env->vfp.fp_status[FPST_STD]; \ | ||
54 | + fpst0 = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
55 | fpst1 = fpst0; \ | ||
56 | if (!(mask & 1)) { \ | ||
57 | scratch_fpst = *fpst0; \ | ||
58 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) | ||
59 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
60 | continue; \ | ||
61 | } \ | ||
62 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
63 | - &env->vfp.fp_status[FPST_STD]; \ | ||
64 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
65 | if (!(mask & 1)) { \ | ||
66 | /* We need the result but without updating flags */ \ | ||
67 | scratch_fpst = *fpst; \ | ||
68 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) | ||
69 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
70 | continue; \ | ||
71 | } \ | ||
72 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
73 | - &env->vfp.fp_status[FPST_STD]; \ | ||
74 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
75 | if (!(mask & 1)) { \ | ||
76 | /* We need the result but without updating flags */ \ | ||
77 | scratch_fpst = *fpst; \ | ||
78 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) | ||
79 | unsigned e; \ | ||
80 | TYPE *m = vm; \ | ||
81 | TYPE ra = (TYPE)ra_in; \ | ||
82 | - float_status *fpst = (ESIZE == 2) ? \ | ||
83 | - &env->vfp.fp_status[FPST_STD_F16] : \ | ||
84 | - &env->vfp.fp_status[FPST_STD]; \ | ||
85 | + float_status *fpst = \ | ||
86 | + &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
87 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
88 | if (mask & 1) { \ | ||
89 | TYPE v = m[H##ESIZE(e)]; \ | ||
90 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
91 | if ((mask & emask) == 0) { \ | ||
92 | continue; \ | ||
93 | } \ | ||
94 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
95 | - &env->vfp.fp_status[FPST_STD]; \ | ||
96 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
97 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
98 | /* We need the result but without updating flags */ \ | ||
99 | scratch_fpst = *fpst; \ | ||
100 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
101 | if ((mask & emask) == 0) { \ | ||
102 | continue; \ | ||
103 | } \ | ||
104 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
105 | - &env->vfp.fp_status[FPST_STD]; \ | ||
106 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
107 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
108 | /* We need the result but without updating flags */ \ | ||
109 | scratch_fpst = *fpst; \ | ||
110 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) | ||
111 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
112 | continue; \ | ||
113 | } \ | ||
114 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
115 | - &env->vfp.fp_status[FPST_STD]; \ | ||
116 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
117 | if (!(mask & 1)) { \ | ||
118 | /* We need the result but without updating flags */ \ | ||
119 | scratch_fpst = *fpst; \ | ||
120 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) | ||
121 | unsigned e; \ | ||
122 | float_status *fpst; \ | ||
123 | float_status scratch_fpst; \ | ||
124 | - float_status *base_fpst = (ESIZE == 2) ? \ | ||
125 | - &env->vfp.fp_status[FPST_STD_F16] : \ | ||
126 | - &env->vfp.fp_status[FPST_STD]; \ | ||
127 | + float_status *base_fpst = \ | ||
128 | + &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
129 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ | ||
130 | set_float_rounding_mode(rmode, base_fpst); \ | ||
131 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
132 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) | ||
133 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
134 | continue; \ | ||
135 | } \ | ||
136 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
137 | - &env->vfp.fp_status[FPST_STD]; \ | ||
138 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
139 | if (!(mask & 1)) { \ | ||
140 | /* We need the result but without updating flags */ \ | ||
141 | scratch_fpst = *fpst; \ | ||
29 | -- | 142 | -- |
30 | 2.25.1 | 143 | 2.34.1 |
144 | |||
145 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | We handle this routing in raise_exception. Promoting the value early | 3 | Pass ARMFPStatusFlavour index instead of fp_status[FOO]. |
4 | means that we can't directly compare FPEXC_EL and SVEEXC_EL. | ||
5 | 4 | ||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | Message-id: 20220607203306.657998-4-richard.henderson@linaro.org | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
7 | Message-id: 20250129013857.135256-17-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | --- | 9 | --- |
11 | target/arm/helper.c | 3 +-- | 10 | target/arm/vfp_helper.c | 10 +++++----- |
12 | 1 file changed, 1 insertion(+), 2 deletions(-) | 11 | 1 file changed, 5 insertions(+), 5 deletions(-) |
13 | 12 | ||
14 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 13 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
15 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/arm/helper.c | 15 | --- a/target/arm/vfp_helper.c |
17 | +++ b/target/arm/helper.c | 16 | +++ b/target/arm/vfp_helper.c |
18 | @@ -XXX,XX +XXX,XX @@ int sve_exception_el(CPUARMState *env, int el) | 17 | @@ -XXX,XX +XXX,XX @@ static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp) |
19 | /* fall through */ | 18 | void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ |
20 | case 0: | 19 | { \ |
21 | case 2: | 20 | softfloat_to_vfp_compare(env, \ |
22 | - /* route_to_el2 */ | 21 | - FLOATTYPE ## _compare_quiet(a, b, &env->vfp.FPST)); \ |
23 | - return hcr_el2 & HCR_TGE ? 2 : 1; | 22 | + FLOATTYPE ## _compare_quiet(a, b, &env->vfp.fp_status[FPST])); \ |
24 | + return 1; | 23 | } \ |
25 | } | 24 | void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ |
26 | 25 | { \ | |
27 | /* Check CPACR.FPEN. */ | 26 | softfloat_to_vfp_compare(env, \ |
27 | - FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ | ||
28 | + FLOATTYPE ## _compare(a, b, &env->vfp.fp_status[FPST])); \ | ||
29 | } | ||
30 | -DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) | ||
31 | -DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32]) | ||
32 | -DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32]) | ||
33 | +DO_VFP_cmp(h, float16, dh_ctype_f16, FPST_A32_F16) | ||
34 | +DO_VFP_cmp(s, float32, float32, FPST_A32) | ||
35 | +DO_VFP_cmp(d, float64, float64, FPST_A32) | ||
36 | #undef DO_VFP_cmp | ||
37 | |||
38 | /* Integer to float and float to integer conversions */ | ||
28 | -- | 39 | -- |
29 | 2.25.1 | 40 | 2.34.1 |
41 | |||
42 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Read the bit from the source, rather than from the proxy via | ||
4 | get_flush_inputs_to_zero. This makes it clear that it does | ||
5 | not matter which of the float_status structures is used. | ||
6 | |||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | Message-id: 20220604040607.269301-4-richard.henderson@linaro.org | 8 | Message-id: 20250129013857.135256-34-richard.henderson@linaro.org |
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 9 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
7 | --- | 11 | --- |
8 | target/arm/ptw.h | 15 +++-- | 12 | target/arm/tcg/vec_helper.c | 12 ++++++------ |
9 | target/arm/helper.c | 137 +++----------------------------------------- | 13 | 1 file changed, 6 insertions(+), 6 deletions(-) |
10 | target/arm/ptw.c | 123 +++++++++++++++++++++++++++++++++++++++ | ||
11 | 3 files changed, 140 insertions(+), 135 deletions(-) | ||
12 | 14 | ||
13 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | 15 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
14 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/arm/ptw.h | 17 | --- a/target/arm/tcg/vec_helper.c |
16 | +++ b/target/arm/ptw.h | 18 | +++ b/target/arm/tcg/vec_helper.c |
17 | @@ -XXX,XX +XXX,XX @@ | 19 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, |
18 | 20 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | |
19 | #ifndef CONFIG_USER_ONLY | 21 | |
20 | 22 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | |
21 | +uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure, | 23 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); |
22 | + ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi); | 24 | + env->vfp.fpcr & FPCR_FZ16); |
23 | +uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure, | ||
24 | + ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi); | ||
25 | + | ||
26 | bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx); | ||
27 | bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx); | ||
28 | ARMCacheAttrs combine_cacheattrs(CPUARMState *env, | ||
29 | ARMCacheAttrs s1, ARMCacheAttrs s2); | ||
30 | |||
31 | -bool get_phys_addr_v5(CPUARMState *env, uint32_t address, | ||
32 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
33 | - hwaddr *phys_ptr, int *prot, | ||
34 | - target_ulong *page_size, | ||
35 | - ARMMMUFaultInfo *fi); | ||
36 | +bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx, | ||
37 | + uint32_t *table, uint32_t address); | ||
38 | +int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, | ||
39 | + int ap, int domain_prot); | ||
40 | + | ||
41 | bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address, | ||
42 | MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
43 | hwaddr *phys_ptr, int *prot, | ||
44 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
45 | index XXXXXXX..XXXXXXX 100644 | ||
46 | --- a/target/arm/helper.c | ||
47 | +++ b/target/arm/helper.c | ||
48 | @@ -XXX,XX +XXX,XX @@ bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) | ||
49 | * @ap: The 3-bit access permissions (AP[2:0]) | ||
50 | * @domain_prot: The 2-bit domain access permissions | ||
51 | */ | ||
52 | -static inline int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, | ||
53 | - int ap, int domain_prot) | ||
54 | +int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap, int domain_prot) | ||
55 | { | ||
56 | bool is_user = regime_is_user(env, mmu_idx); | ||
57 | |||
58 | @@ -XXX,XX +XXX,XX @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, | ||
59 | return prot_rw | PAGE_EXEC; | ||
60 | } | 25 | } |
61 | 26 | ||
62 | -static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx, | 27 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
63 | - uint32_t *table, uint32_t address) | 28 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
64 | +bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx, | 29 | } |
65 | + uint32_t *table, uint32_t address) | 30 | } |
66 | { | 31 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, |
67 | /* Note that we can only get here for an AArch32 PL0/PL1 lookup */ | 32 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); |
68 | TCR *tcr = regime_tcr(env, mmu_idx); | 33 | + env->vfp.fpcr & FPCR_FZ16); |
69 | @@ -XXX,XX +XXX,XX @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx, | ||
70 | } | 34 | } |
71 | 35 | ||
72 | /* All loads done in the course of a page table walk go through here. */ | 36 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
73 | -static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure, | 37 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
74 | - ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi) | 38 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
75 | +uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure, | 39 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
76 | + ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi) | 40 | float_status *status = &env->vfp.fp_status[FPST_A64]; |
77 | { | 41 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); |
78 | ARMCPU *cpu = ARM_CPU(cs); | 42 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; |
79 | CPUARMState *env = &cpu->env; | 43 | int negx = 0, negf = 0; |
80 | @@ -XXX,XX +XXX,XX @@ static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure, | 44 | |
81 | return 0; | 45 | if (is_s) { |
46 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | ||
47 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
48 | |||
49 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
50 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); | ||
51 | + env->vfp.fpcr & FPCR_FZ16); | ||
82 | } | 52 | } |
83 | 53 | ||
84 | -static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure, | 54 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
85 | - ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi) | 55 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
86 | +uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure, | 56 | } |
87 | + ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi) | 57 | } |
88 | { | 58 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, |
89 | ARMCPU *cpu = ARM_CPU(cs); | 59 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); |
90 | CPUARMState *env = &cpu->env; | 60 | + env->vfp.fpcr & FPCR_FZ16); |
91 | @@ -XXX,XX +XXX,XX @@ static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure, | ||
92 | return 0; | ||
93 | } | 61 | } |
94 | 62 | ||
95 | -bool get_phys_addr_v5(CPUARMState *env, uint32_t address, | 63 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, |
96 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | 64 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, |
97 | - hwaddr *phys_ptr, int *prot, | 65 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
98 | - target_ulong *page_size, | 66 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); |
99 | - ARMMMUFaultInfo *fi) | 67 | float_status *status = &env->vfp.fp_status[FPST_A64]; |
100 | -{ | 68 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); |
101 | - CPUState *cs = env_cpu(env); | 69 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; |
102 | - int level = 1; | 70 | int negx = 0, negf = 0; |
103 | - uint32_t table; | 71 | |
104 | - uint32_t desc; | 72 | if (is_s) { |
105 | - int type; | ||
106 | - int ap; | ||
107 | - int domain = 0; | ||
108 | - int domain_prot; | ||
109 | - hwaddr phys_addr; | ||
110 | - uint32_t dacr; | ||
111 | - | ||
112 | - /* Pagetable walk. */ | ||
113 | - /* Lookup l1 descriptor. */ | ||
114 | - if (!get_level1_table_address(env, mmu_idx, &table, address)) { | ||
115 | - /* Section translation fault if page walk is disabled by PD0 or PD1 */ | ||
116 | - fi->type = ARMFault_Translation; | ||
117 | - goto do_fault; | ||
118 | - } | ||
119 | - desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx), | ||
120 | - mmu_idx, fi); | ||
121 | - if (fi->type != ARMFault_None) { | ||
122 | - goto do_fault; | ||
123 | - } | ||
124 | - type = (desc & 3); | ||
125 | - domain = (desc >> 5) & 0x0f; | ||
126 | - if (regime_el(env, mmu_idx) == 1) { | ||
127 | - dacr = env->cp15.dacr_ns; | ||
128 | - } else { | ||
129 | - dacr = env->cp15.dacr_s; | ||
130 | - } | ||
131 | - domain_prot = (dacr >> (domain * 2)) & 3; | ||
132 | - if (type == 0) { | ||
133 | - /* Section translation fault. */ | ||
134 | - fi->type = ARMFault_Translation; | ||
135 | - goto do_fault; | ||
136 | - } | ||
137 | - if (type != 2) { | ||
138 | - level = 2; | ||
139 | - } | ||
140 | - if (domain_prot == 0 || domain_prot == 2) { | ||
141 | - fi->type = ARMFault_Domain; | ||
142 | - goto do_fault; | ||
143 | - } | ||
144 | - if (type == 2) { | ||
145 | - /* 1Mb section. */ | ||
146 | - phys_addr = (desc & 0xfff00000) | (address & 0x000fffff); | ||
147 | - ap = (desc >> 10) & 3; | ||
148 | - *page_size = 1024 * 1024; | ||
149 | - } else { | ||
150 | - /* Lookup l2 entry. */ | ||
151 | - if (type == 1) { | ||
152 | - /* Coarse pagetable. */ | ||
153 | - table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc); | ||
154 | - } else { | ||
155 | - /* Fine pagetable. */ | ||
156 | - table = (desc & 0xfffff000) | ((address >> 8) & 0xffc); | ||
157 | - } | ||
158 | - desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx), | ||
159 | - mmu_idx, fi); | ||
160 | - if (fi->type != ARMFault_None) { | ||
161 | - goto do_fault; | ||
162 | - } | ||
163 | - switch (desc & 3) { | ||
164 | - case 0: /* Page translation fault. */ | ||
165 | - fi->type = ARMFault_Translation; | ||
166 | - goto do_fault; | ||
167 | - case 1: /* 64k page. */ | ||
168 | - phys_addr = (desc & 0xffff0000) | (address & 0xffff); | ||
169 | - ap = (desc >> (4 + ((address >> 13) & 6))) & 3; | ||
170 | - *page_size = 0x10000; | ||
171 | - break; | ||
172 | - case 2: /* 4k page. */ | ||
173 | - phys_addr = (desc & 0xfffff000) | (address & 0xfff); | ||
174 | - ap = (desc >> (4 + ((address >> 9) & 6))) & 3; | ||
175 | - *page_size = 0x1000; | ||
176 | - break; | ||
177 | - case 3: /* 1k page, or ARMv6/XScale "extended small (4k) page" */ | ||
178 | - if (type == 1) { | ||
179 | - /* ARMv6/XScale extended small page format */ | ||
180 | - if (arm_feature(env, ARM_FEATURE_XSCALE) | ||
181 | - || arm_feature(env, ARM_FEATURE_V6)) { | ||
182 | - phys_addr = (desc & 0xfffff000) | (address & 0xfff); | ||
183 | - *page_size = 0x1000; | ||
184 | - } else { | ||
185 | - /* UNPREDICTABLE in ARMv5; we choose to take a | ||
186 | - * page translation fault. | ||
187 | - */ | ||
188 | - fi->type = ARMFault_Translation; | ||
189 | - goto do_fault; | ||
190 | - } | ||
191 | - } else { | ||
192 | - phys_addr = (desc & 0xfffffc00) | (address & 0x3ff); | ||
193 | - *page_size = 0x400; | ||
194 | - } | ||
195 | - ap = (desc >> 4) & 3; | ||
196 | - break; | ||
197 | - default: | ||
198 | - /* Never happens, but compiler isn't smart enough to tell. */ | ||
199 | - g_assert_not_reached(); | ||
200 | - } | ||
201 | - } | ||
202 | - *prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot); | ||
203 | - *prot |= *prot ? PAGE_EXEC : 0; | ||
204 | - if (!(*prot & (1 << access_type))) { | ||
205 | - /* Access permission fault. */ | ||
206 | - fi->type = ARMFault_Permission; | ||
207 | - goto do_fault; | ||
208 | - } | ||
209 | - *phys_ptr = phys_addr; | ||
210 | - return false; | ||
211 | -do_fault: | ||
212 | - fi->domain = domain; | ||
213 | - fi->level = level; | ||
214 | - return true; | ||
215 | -} | ||
216 | - | ||
217 | bool get_phys_addr_v6(CPUARMState *env, uint32_t address, | ||
218 | MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
219 | hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot, | ||
220 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
221 | index XXXXXXX..XXXXXXX 100644 | ||
222 | --- a/target/arm/ptw.c | ||
223 | +++ b/target/arm/ptw.c | ||
224 | @@ -XXX,XX +XXX,XX @@ | ||
225 | #include "ptw.h" | ||
226 | |||
227 | |||
228 | +static bool get_phys_addr_v5(CPUARMState *env, uint32_t address, | ||
229 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
230 | + hwaddr *phys_ptr, int *prot, | ||
231 | + target_ulong *page_size, | ||
232 | + ARMMMUFaultInfo *fi) | ||
233 | +{ | ||
234 | + CPUState *cs = env_cpu(env); | ||
235 | + int level = 1; | ||
236 | + uint32_t table; | ||
237 | + uint32_t desc; | ||
238 | + int type; | ||
239 | + int ap; | ||
240 | + int domain = 0; | ||
241 | + int domain_prot; | ||
242 | + hwaddr phys_addr; | ||
243 | + uint32_t dacr; | ||
244 | + | ||
245 | + /* Pagetable walk. */ | ||
246 | + /* Lookup l1 descriptor. */ | ||
247 | + if (!get_level1_table_address(env, mmu_idx, &table, address)) { | ||
248 | + /* Section translation fault if page walk is disabled by PD0 or PD1 */ | ||
249 | + fi->type = ARMFault_Translation; | ||
250 | + goto do_fault; | ||
251 | + } | ||
252 | + desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx), | ||
253 | + mmu_idx, fi); | ||
254 | + if (fi->type != ARMFault_None) { | ||
255 | + goto do_fault; | ||
256 | + } | ||
257 | + type = (desc & 3); | ||
258 | + domain = (desc >> 5) & 0x0f; | ||
259 | + if (regime_el(env, mmu_idx) == 1) { | ||
260 | + dacr = env->cp15.dacr_ns; | ||
261 | + } else { | ||
262 | + dacr = env->cp15.dacr_s; | ||
263 | + } | ||
264 | + domain_prot = (dacr >> (domain * 2)) & 3; | ||
265 | + if (type == 0) { | ||
266 | + /* Section translation fault. */ | ||
267 | + fi->type = ARMFault_Translation; | ||
268 | + goto do_fault; | ||
269 | + } | ||
270 | + if (type != 2) { | ||
271 | + level = 2; | ||
272 | + } | ||
273 | + if (domain_prot == 0 || domain_prot == 2) { | ||
274 | + fi->type = ARMFault_Domain; | ||
275 | + goto do_fault; | ||
276 | + } | ||
277 | + if (type == 2) { | ||
278 | + /* 1Mb section. */ | ||
279 | + phys_addr = (desc & 0xfff00000) | (address & 0x000fffff); | ||
280 | + ap = (desc >> 10) & 3; | ||
281 | + *page_size = 1024 * 1024; | ||
282 | + } else { | ||
283 | + /* Lookup l2 entry. */ | ||
284 | + if (type == 1) { | ||
285 | + /* Coarse pagetable. */ | ||
286 | + table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc); | ||
287 | + } else { | ||
288 | + /* Fine pagetable. */ | ||
289 | + table = (desc & 0xfffff000) | ((address >> 8) & 0xffc); | ||
290 | + } | ||
291 | + desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx), | ||
292 | + mmu_idx, fi); | ||
293 | + if (fi->type != ARMFault_None) { | ||
294 | + goto do_fault; | ||
295 | + } | ||
296 | + switch (desc & 3) { | ||
297 | + case 0: /* Page translation fault. */ | ||
298 | + fi->type = ARMFault_Translation; | ||
299 | + goto do_fault; | ||
300 | + case 1: /* 64k page. */ | ||
301 | + phys_addr = (desc & 0xffff0000) | (address & 0xffff); | ||
302 | + ap = (desc >> (4 + ((address >> 13) & 6))) & 3; | ||
303 | + *page_size = 0x10000; | ||
304 | + break; | ||
305 | + case 2: /* 4k page. */ | ||
306 | + phys_addr = (desc & 0xfffff000) | (address & 0xfff); | ||
307 | + ap = (desc >> (4 + ((address >> 9) & 6))) & 3; | ||
308 | + *page_size = 0x1000; | ||
309 | + break; | ||
310 | + case 3: /* 1k page, or ARMv6/XScale "extended small (4k) page" */ | ||
311 | + if (type == 1) { | ||
312 | + /* ARMv6/XScale extended small page format */ | ||
313 | + if (arm_feature(env, ARM_FEATURE_XSCALE) | ||
314 | + || arm_feature(env, ARM_FEATURE_V6)) { | ||
315 | + phys_addr = (desc & 0xfffff000) | (address & 0xfff); | ||
316 | + *page_size = 0x1000; | ||
317 | + } else { | ||
318 | + /* | ||
319 | + * UNPREDICTABLE in ARMv5; we choose to take a | ||
320 | + * page translation fault. | ||
321 | + */ | ||
322 | + fi->type = ARMFault_Translation; | ||
323 | + goto do_fault; | ||
324 | + } | ||
325 | + } else { | ||
326 | + phys_addr = (desc & 0xfffffc00) | (address & 0x3ff); | ||
327 | + *page_size = 0x400; | ||
328 | + } | ||
329 | + ap = (desc >> 4) & 3; | ||
330 | + break; | ||
331 | + default: | ||
332 | + /* Never happens, but compiler isn't smart enough to tell. */ | ||
333 | + g_assert_not_reached(); | ||
334 | + } | ||
335 | + } | ||
336 | + *prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot); | ||
337 | + *prot |= *prot ? PAGE_EXEC : 0; | ||
338 | + if (!(*prot & (1 << access_type))) { | ||
339 | + /* Access permission fault. */ | ||
340 | + fi->type = ARMFault_Permission; | ||
341 | + goto do_fault; | ||
342 | + } | ||
343 | + *phys_ptr = phys_addr; | ||
344 | + return false; | ||
345 | +do_fault: | ||
346 | + fi->domain = domain; | ||
347 | + fi->level = level; | ||
348 | + return true; | ||
349 | +} | ||
350 | + | ||
351 | /** | ||
352 | * get_phys_addr - get the physical address for this virtual address | ||
353 | * | ||
354 | -- | 73 | -- |
355 | 2.25.1 | 74 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Sink common code from the callers into do_fmlal | ||
4 | and do_fmlal_idx. Reorder the arguments to minimize | ||
5 | the re-sorting from the caller's arguments. | ||
6 | |||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | Message-id: 20220604040607.269301-8-richard.henderson@linaro.org | 8 | Message-id: 20250129013857.135256-35-richard.henderson@linaro.org |
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 9 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
7 | --- | 11 | --- |
8 | target/arm/ptw.h | 10 +-- | 12 | target/arm/tcg/vec_helper.c | 28 ++++++++++++++++------------ |
9 | target/arm/helper.c | 194 +------------------------------------------- | 13 | 1 file changed, 16 insertions(+), 12 deletions(-) |
10 | target/arm/ptw.c | 190 +++++++++++++++++++++++++++++++++++++++++++ | ||
11 | 3 files changed, 198 insertions(+), 196 deletions(-) | ||
12 | 14 | ||
13 | diff --git a/target/arm/ptw.h b/target/arm/ptw.h | 15 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
14 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/arm/ptw.h | 17 | --- a/target/arm/tcg/vec_helper.c |
16 | +++ b/target/arm/ptw.h | 18 | +++ b/target/arm/tcg/vec_helper.c |
17 | @@ -XXX,XX +XXX,XX @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) | 19 | @@ -XXX,XX +XXX,XX @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2) |
18 | return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx)); | 20 | * as there is not yet SVE versions that might use blocking. |
21 | */ | ||
22 | |||
23 | -static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, | ||
24 | - uint64_t negx, int negf, uint32_t desc, bool fz16) | ||
25 | +static void do_fmlal(float32 *d, void *vn, void *vm, | ||
26 | + CPUARMState *env, uint32_t desc, | ||
27 | + ARMFPStatusFlavour fpst_idx, | ||
28 | + uint64_t negx, int negf) | ||
29 | { | ||
30 | + float_status *fpst = &env->vfp.fp_status[fpst_idx]; | ||
31 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; | ||
32 | intptr_t i, oprsz = simd_oprsz(desc); | ||
33 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
34 | int is_q = oprsz == 16; | ||
35 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
36 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
37 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
38 | |||
39 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
40 | - env->vfp.fpcr & FPCR_FZ16); | ||
41 | + do_fmlal(vd, vn, vm, env, desc, FPST_STD, negx, 0); | ||
19 | } | 42 | } |
20 | 43 | ||
21 | +bool m_is_ppb_region(CPUARMState *env, uint32_t address); | 44 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
22 | +bool m_is_system_region(CPUARMState *env, uint32_t address); | 45 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
23 | + | 46 | negx = 0x8000800080008000ull; |
24 | void get_phys_addr_pmsav7_default(CPUARMState *env, | 47 | } |
25 | ARMMMUIdx mmu_idx, | 48 | } |
26 | int32_t address, int *prot); | 49 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, |
27 | -bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, | 50 | - env->vfp.fpcr & FPCR_FZ16); |
28 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | 51 | + do_fmlal(vd, vn, vm, env, desc, FPST_A64, negx, negf); |
29 | - hwaddr *phys_ptr, int *prot, | ||
30 | - target_ulong *page_size, | ||
31 | - ARMMMUFaultInfo *fi); | ||
32 | +bool pmsav7_use_background_region(ARMCPU *cpu, ARMMMUIdx mmu_idx, bool is_user); | ||
33 | + | ||
34 | bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address, | ||
35 | MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
36 | hwaddr *phys_ptr, MemTxAttrs *txattrs, | ||
37 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/helper.c | ||
40 | +++ b/target/arm/helper.c | ||
41 | @@ -XXX,XX +XXX,XX @@ do_fault: | ||
42 | return true; | ||
43 | } | 52 | } |
44 | 53 | ||
45 | -static bool pmsav7_use_background_region(ARMCPU *cpu, | 54 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
46 | - ARMMMUIdx mmu_idx, bool is_user) | 55 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
47 | +bool pmsav7_use_background_region(ARMCPU *cpu, ARMMMUIdx mmu_idx, bool is_user) | ||
48 | { | ||
49 | /* Return true if we should use the default memory map as a | ||
50 | * "background" region if there are no hits against any MPU regions. | ||
51 | @@ -XXX,XX +XXX,XX @@ static bool pmsav7_use_background_region(ARMCPU *cpu, | ||
52 | } | 56 | } |
53 | } | 57 | } |
54 | 58 | ||
55 | -static inline bool m_is_ppb_region(CPUARMState *env, uint32_t address) | 59 | -static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, |
56 | +bool m_is_ppb_region(CPUARMState *env, uint32_t address) | 60 | - uint64_t negx, int negf, uint32_t desc, bool fz16) |
61 | +static void do_fmlal_idx(float32 *d, void *vn, void *vm, | ||
62 | + CPUARMState *env, uint32_t desc, | ||
63 | + ARMFPStatusFlavour fpst_idx, | ||
64 | + uint64_t negx, int negf) | ||
57 | { | 65 | { |
58 | /* True if address is in the M profile PPB region 0xe0000000 - 0xe00fffff */ | 66 | + float_status *fpst = &env->vfp.fp_status[fpst_idx]; |
59 | return arm_feature(env, ARM_FEATURE_M) && | 67 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; |
60 | extract32(address, 20, 12) == 0xe00; | 68 | intptr_t i, oprsz = simd_oprsz(desc); |
69 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
70 | int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3); | ||
71 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | ||
72 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
73 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
74 | |||
75 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
76 | - env->vfp.fpcr & FPCR_FZ16); | ||
77 | + do_fmlal_idx(vd, vn, vm, env, desc, FPST_STD, negx, 0); | ||
61 | } | 78 | } |
62 | 79 | ||
63 | -static inline bool m_is_system_region(CPUARMState *env, uint32_t address) | 80 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
64 | +bool m_is_system_region(CPUARMState *env, uint32_t address) | 81 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
65 | { | 82 | negx = 0x8000800080008000ull; |
66 | /* True if address is in the M profile system region | 83 | } |
67 | * 0xe0000000 - 0xffffffff | 84 | } |
68 | @@ -XXX,XX +XXX,XX @@ static inline bool m_is_system_region(CPUARMState *env, uint32_t address) | 85 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, |
69 | return arm_feature(env, ARM_FEATURE_M) && extract32(address, 29, 3) == 0x7; | 86 | - env->vfp.fpcr & FPCR_FZ16); |
87 | + do_fmlal_idx(vd, vn, vm, env, desc, FPST_A64, negx, negf); | ||
70 | } | 88 | } |
71 | 89 | ||
72 | -bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, | 90 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, |
73 | - MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
74 | - hwaddr *phys_ptr, int *prot, | ||
75 | - target_ulong *page_size, | ||
76 | - ARMMMUFaultInfo *fi) | ||
77 | -{ | ||
78 | - ARMCPU *cpu = env_archcpu(env); | ||
79 | - int n; | ||
80 | - bool is_user = regime_is_user(env, mmu_idx); | ||
81 | - | ||
82 | - *phys_ptr = address; | ||
83 | - *page_size = TARGET_PAGE_SIZE; | ||
84 | - *prot = 0; | ||
85 | - | ||
86 | - if (regime_translation_disabled(env, mmu_idx) || | ||
87 | - m_is_ppb_region(env, address)) { | ||
88 | - /* MPU disabled or M profile PPB access: use default memory map. | ||
89 | - * The other case which uses the default memory map in the | ||
90 | - * v7M ARM ARM pseudocode is exception vector reads from the vector | ||
91 | - * table. In QEMU those accesses are done in arm_v7m_load_vector(), | ||
92 | - * which always does a direct read using address_space_ldl(), rather | ||
93 | - * than going via this function, so we don't need to check that here. | ||
94 | - */ | ||
95 | - get_phys_addr_pmsav7_default(env, mmu_idx, address, prot); | ||
96 | - } else { /* MPU enabled */ | ||
97 | - for (n = (int)cpu->pmsav7_dregion - 1; n >= 0; n--) { | ||
98 | - /* region search */ | ||
99 | - uint32_t base = env->pmsav7.drbar[n]; | ||
100 | - uint32_t rsize = extract32(env->pmsav7.drsr[n], 1, 5); | ||
101 | - uint32_t rmask; | ||
102 | - bool srdis = false; | ||
103 | - | ||
104 | - if (!(env->pmsav7.drsr[n] & 0x1)) { | ||
105 | - continue; | ||
106 | - } | ||
107 | - | ||
108 | - if (!rsize) { | ||
109 | - qemu_log_mask(LOG_GUEST_ERROR, | ||
110 | - "DRSR[%d]: Rsize field cannot be 0\n", n); | ||
111 | - continue; | ||
112 | - } | ||
113 | - rsize++; | ||
114 | - rmask = (1ull << rsize) - 1; | ||
115 | - | ||
116 | - if (base & rmask) { | ||
117 | - qemu_log_mask(LOG_GUEST_ERROR, | ||
118 | - "DRBAR[%d]: 0x%" PRIx32 " misaligned " | ||
119 | - "to DRSR region size, mask = 0x%" PRIx32 "\n", | ||
120 | - n, base, rmask); | ||
121 | - continue; | ||
122 | - } | ||
123 | - | ||
124 | - if (address < base || address > base + rmask) { | ||
125 | - /* | ||
126 | - * Address not in this region. We must check whether the | ||
127 | - * region covers addresses in the same page as our address. | ||
128 | - * In that case we must not report a size that covers the | ||
129 | - * whole page for a subsequent hit against a different MPU | ||
130 | - * region or the background region, because it would result in | ||
131 | - * incorrect TLB hits for subsequent accesses to addresses that | ||
132 | - * are in this MPU region. | ||
133 | - */ | ||
134 | - if (ranges_overlap(base, rmask, | ||
135 | - address & TARGET_PAGE_MASK, | ||
136 | - TARGET_PAGE_SIZE)) { | ||
137 | - *page_size = 1; | ||
138 | - } | ||
139 | - continue; | ||
140 | - } | ||
141 | - | ||
142 | - /* Region matched */ | ||
143 | - | ||
144 | - if (rsize >= 8) { /* no subregions for regions < 256 bytes */ | ||
145 | - int i, snd; | ||
146 | - uint32_t srdis_mask; | ||
147 | - | ||
148 | - rsize -= 3; /* sub region size (power of 2) */ | ||
149 | - snd = ((address - base) >> rsize) & 0x7; | ||
150 | - srdis = extract32(env->pmsav7.drsr[n], snd + 8, 1); | ||
151 | - | ||
152 | - srdis_mask = srdis ? 0x3 : 0x0; | ||
153 | - for (i = 2; i <= 8 && rsize < TARGET_PAGE_BITS; i *= 2) { | ||
154 | - /* This will check in groups of 2, 4 and then 8, whether | ||
155 | - * the subregion bits are consistent. rsize is incremented | ||
156 | - * back up to give the region size, considering consistent | ||
157 | - * adjacent subregions as one region. Stop testing if rsize | ||
158 | - * is already big enough for an entire QEMU page. | ||
159 | - */ | ||
160 | - int snd_rounded = snd & ~(i - 1); | ||
161 | - uint32_t srdis_multi = extract32(env->pmsav7.drsr[n], | ||
162 | - snd_rounded + 8, i); | ||
163 | - if (srdis_mask ^ srdis_multi) { | ||
164 | - break; | ||
165 | - } | ||
166 | - srdis_mask = (srdis_mask << i) | srdis_mask; | ||
167 | - rsize++; | ||
168 | - } | ||
169 | - } | ||
170 | - if (srdis) { | ||
171 | - continue; | ||
172 | - } | ||
173 | - if (rsize < TARGET_PAGE_BITS) { | ||
174 | - *page_size = 1 << rsize; | ||
175 | - } | ||
176 | - break; | ||
177 | - } | ||
178 | - | ||
179 | - if (n == -1) { /* no hits */ | ||
180 | - if (!pmsav7_use_background_region(cpu, mmu_idx, is_user)) { | ||
181 | - /* background fault */ | ||
182 | - fi->type = ARMFault_Background; | ||
183 | - return true; | ||
184 | - } | ||
185 | - get_phys_addr_pmsav7_default(env, mmu_idx, address, prot); | ||
186 | - } else { /* a MPU hit! */ | ||
187 | - uint32_t ap = extract32(env->pmsav7.dracr[n], 8, 3); | ||
188 | - uint32_t xn = extract32(env->pmsav7.dracr[n], 12, 1); | ||
189 | - | ||
190 | - if (m_is_system_region(env, address)) { | ||
191 | - /* System space is always execute never */ | ||
192 | - xn = 1; | ||
193 | - } | ||
194 | - | ||
195 | - if (is_user) { /* User mode AP bit decoding */ | ||
196 | - switch (ap) { | ||
197 | - case 0: | ||
198 | - case 1: | ||
199 | - case 5: | ||
200 | - break; /* no access */ | ||
201 | - case 3: | ||
202 | - *prot |= PAGE_WRITE; | ||
203 | - /* fall through */ | ||
204 | - case 2: | ||
205 | - case 6: | ||
206 | - *prot |= PAGE_READ | PAGE_EXEC; | ||
207 | - break; | ||
208 | - case 7: | ||
209 | - /* for v7M, same as 6; for R profile a reserved value */ | ||
210 | - if (arm_feature(env, ARM_FEATURE_M)) { | ||
211 | - *prot |= PAGE_READ | PAGE_EXEC; | ||
212 | - break; | ||
213 | - } | ||
214 | - /* fall through */ | ||
215 | - default: | ||
216 | - qemu_log_mask(LOG_GUEST_ERROR, | ||
217 | - "DRACR[%d]: Bad value for AP bits: 0x%" | ||
218 | - PRIx32 "\n", n, ap); | ||
219 | - } | ||
220 | - } else { /* Priv. mode AP bits decoding */ | ||
221 | - switch (ap) { | ||
222 | - case 0: | ||
223 | - break; /* no access */ | ||
224 | - case 1: | ||
225 | - case 2: | ||
226 | - case 3: | ||
227 | - *prot |= PAGE_WRITE; | ||
228 | - /* fall through */ | ||
229 | - case 5: | ||
230 | - case 6: | ||
231 | - *prot |= PAGE_READ | PAGE_EXEC; | ||
232 | - break; | ||
233 | - case 7: | ||
234 | - /* for v7M, same as 6; for R profile a reserved value */ | ||
235 | - if (arm_feature(env, ARM_FEATURE_M)) { | ||
236 | - *prot |= PAGE_READ | PAGE_EXEC; | ||
237 | - break; | ||
238 | - } | ||
239 | - /* fall through */ | ||
240 | - default: | ||
241 | - qemu_log_mask(LOG_GUEST_ERROR, | ||
242 | - "DRACR[%d]: Bad value for AP bits: 0x%" | ||
243 | - PRIx32 "\n", n, ap); | ||
244 | - } | ||
245 | - } | ||
246 | - | ||
247 | - /* execute never */ | ||
248 | - if (xn) { | ||
249 | - *prot &= ~PAGE_EXEC; | ||
250 | - } | ||
251 | - } | ||
252 | - } | ||
253 | - | ||
254 | - fi->type = ARMFault_Permission; | ||
255 | - fi->level = 1; | ||
256 | - return !(*prot & (1 << access_type)); | ||
257 | -} | ||
258 | - | ||
259 | static bool v8m_is_sau_exempt(CPUARMState *env, | ||
260 | uint32_t address, MMUAccessType access_type) | ||
261 | { | ||
262 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
263 | index XXXXXXX..XXXXXXX 100644 | ||
264 | --- a/target/arm/ptw.c | ||
265 | +++ b/target/arm/ptw.c | ||
266 | @@ -XXX,XX +XXX,XX @@ | ||
267 | |||
268 | #include "qemu/osdep.h" | ||
269 | #include "qemu/log.h" | ||
270 | +#include "qemu/range.h" | ||
271 | #include "cpu.h" | ||
272 | #include "internals.h" | ||
273 | #include "ptw.h" | ||
274 | @@ -XXX,XX +XXX,XX @@ void get_phys_addr_pmsav7_default(CPUARMState *env, | ||
275 | } | ||
276 | } | ||
277 | |||
278 | +static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, | ||
279 | + MMUAccessType access_type, ARMMMUIdx mmu_idx, | ||
280 | + hwaddr *phys_ptr, int *prot, | ||
281 | + target_ulong *page_size, | ||
282 | + ARMMMUFaultInfo *fi) | ||
283 | +{ | ||
284 | + ARMCPU *cpu = env_archcpu(env); | ||
285 | + int n; | ||
286 | + bool is_user = regime_is_user(env, mmu_idx); | ||
287 | + | ||
288 | + *phys_ptr = address; | ||
289 | + *page_size = TARGET_PAGE_SIZE; | ||
290 | + *prot = 0; | ||
291 | + | ||
292 | + if (regime_translation_disabled(env, mmu_idx) || | ||
293 | + m_is_ppb_region(env, address)) { | ||
294 | + /* | ||
295 | + * MPU disabled or M profile PPB access: use default memory map. | ||
296 | + * The other case which uses the default memory map in the | ||
297 | + * v7M ARM ARM pseudocode is exception vector reads from the vector | ||
298 | + * table. In QEMU those accesses are done in arm_v7m_load_vector(), | ||
299 | + * which always does a direct read using address_space_ldl(), rather | ||
300 | + * than going via this function, so we don't need to check that here. | ||
301 | + */ | ||
302 | + get_phys_addr_pmsav7_default(env, mmu_idx, address, prot); | ||
303 | + } else { /* MPU enabled */ | ||
304 | + for (n = (int)cpu->pmsav7_dregion - 1; n >= 0; n--) { | ||
305 | + /* region search */ | ||
306 | + uint32_t base = env->pmsav7.drbar[n]; | ||
307 | + uint32_t rsize = extract32(env->pmsav7.drsr[n], 1, 5); | ||
308 | + uint32_t rmask; | ||
309 | + bool srdis = false; | ||
310 | + | ||
311 | + if (!(env->pmsav7.drsr[n] & 0x1)) { | ||
312 | + continue; | ||
313 | + } | ||
314 | + | ||
315 | + if (!rsize) { | ||
316 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
317 | + "DRSR[%d]: Rsize field cannot be 0\n", n); | ||
318 | + continue; | ||
319 | + } | ||
320 | + rsize++; | ||
321 | + rmask = (1ull << rsize) - 1; | ||
322 | + | ||
323 | + if (base & rmask) { | ||
324 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
325 | + "DRBAR[%d]: 0x%" PRIx32 " misaligned " | ||
326 | + "to DRSR region size, mask = 0x%" PRIx32 "\n", | ||
327 | + n, base, rmask); | ||
328 | + continue; | ||
329 | + } | ||
330 | + | ||
331 | + if (address < base || address > base + rmask) { | ||
332 | + /* | ||
333 | + * Address not in this region. We must check whether the | ||
334 | + * region covers addresses in the same page as our address. | ||
335 | + * In that case we must not report a size that covers the | ||
336 | + * whole page for a subsequent hit against a different MPU | ||
337 | + * region or the background region, because it would result in | ||
338 | + * incorrect TLB hits for subsequent accesses to addresses that | ||
339 | + * are in this MPU region. | ||
340 | + */ | ||
341 | + if (ranges_overlap(base, rmask, | ||
342 | + address & TARGET_PAGE_MASK, | ||
343 | + TARGET_PAGE_SIZE)) { | ||
344 | + *page_size = 1; | ||
345 | + } | ||
346 | + continue; | ||
347 | + } | ||
348 | + | ||
349 | + /* Region matched */ | ||
350 | + | ||
351 | + if (rsize >= 8) { /* no subregions for regions < 256 bytes */ | ||
352 | + int i, snd; | ||
353 | + uint32_t srdis_mask; | ||
354 | + | ||
355 | + rsize -= 3; /* sub region size (power of 2) */ | ||
356 | + snd = ((address - base) >> rsize) & 0x7; | ||
357 | + srdis = extract32(env->pmsav7.drsr[n], snd + 8, 1); | ||
358 | + | ||
359 | + srdis_mask = srdis ? 0x3 : 0x0; | ||
360 | + for (i = 2; i <= 8 && rsize < TARGET_PAGE_BITS; i *= 2) { | ||
361 | + /* | ||
362 | + * This will check in groups of 2, 4 and then 8, whether | ||
363 | + * the subregion bits are consistent. rsize is incremented | ||
364 | + * back up to give the region size, considering consistent | ||
365 | + * adjacent subregions as one region. Stop testing if rsize | ||
366 | + * is already big enough for an entire QEMU page. | ||
367 | + */ | ||
368 | + int snd_rounded = snd & ~(i - 1); | ||
369 | + uint32_t srdis_multi = extract32(env->pmsav7.drsr[n], | ||
370 | + snd_rounded + 8, i); | ||
371 | + if (srdis_mask ^ srdis_multi) { | ||
372 | + break; | ||
373 | + } | ||
374 | + srdis_mask = (srdis_mask << i) | srdis_mask; | ||
375 | + rsize++; | ||
376 | + } | ||
377 | + } | ||
378 | + if (srdis) { | ||
379 | + continue; | ||
380 | + } | ||
381 | + if (rsize < TARGET_PAGE_BITS) { | ||
382 | + *page_size = 1 << rsize; | ||
383 | + } | ||
384 | + break; | ||
385 | + } | ||
386 | + | ||
387 | + if (n == -1) { /* no hits */ | ||
388 | + if (!pmsav7_use_background_region(cpu, mmu_idx, is_user)) { | ||
389 | + /* background fault */ | ||
390 | + fi->type = ARMFault_Background; | ||
391 | + return true; | ||
392 | + } | ||
393 | + get_phys_addr_pmsav7_default(env, mmu_idx, address, prot); | ||
394 | + } else { /* a MPU hit! */ | ||
395 | + uint32_t ap = extract32(env->pmsav7.dracr[n], 8, 3); | ||
396 | + uint32_t xn = extract32(env->pmsav7.dracr[n], 12, 1); | ||
397 | + | ||
398 | + if (m_is_system_region(env, address)) { | ||
399 | + /* System space is always execute never */ | ||
400 | + xn = 1; | ||
401 | + } | ||
402 | + | ||
403 | + if (is_user) { /* User mode AP bit decoding */ | ||
404 | + switch (ap) { | ||
405 | + case 0: | ||
406 | + case 1: | ||
407 | + case 5: | ||
408 | + break; /* no access */ | ||
409 | + case 3: | ||
410 | + *prot |= PAGE_WRITE; | ||
411 | + /* fall through */ | ||
412 | + case 2: | ||
413 | + case 6: | ||
414 | + *prot |= PAGE_READ | PAGE_EXEC; | ||
415 | + break; | ||
416 | + case 7: | ||
417 | + /* for v7M, same as 6; for R profile a reserved value */ | ||
418 | + if (arm_feature(env, ARM_FEATURE_M)) { | ||
419 | + *prot |= PAGE_READ | PAGE_EXEC; | ||
420 | + break; | ||
421 | + } | ||
422 | + /* fall through */ | ||
423 | + default: | ||
424 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
425 | + "DRACR[%d]: Bad value for AP bits: 0x%" | ||
426 | + PRIx32 "\n", n, ap); | ||
427 | + } | ||
428 | + } else { /* Priv. mode AP bits decoding */ | ||
429 | + switch (ap) { | ||
430 | + case 0: | ||
431 | + break; /* no access */ | ||
432 | + case 1: | ||
433 | + case 2: | ||
434 | + case 3: | ||
435 | + *prot |= PAGE_WRITE; | ||
436 | + /* fall through */ | ||
437 | + case 5: | ||
438 | + case 6: | ||
439 | + *prot |= PAGE_READ | PAGE_EXEC; | ||
440 | + break; | ||
441 | + case 7: | ||
442 | + /* for v7M, same as 6; for R profile a reserved value */ | ||
443 | + if (arm_feature(env, ARM_FEATURE_M)) { | ||
444 | + *prot |= PAGE_READ | PAGE_EXEC; | ||
445 | + break; | ||
446 | + } | ||
447 | + /* fall through */ | ||
448 | + default: | ||
449 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
450 | + "DRACR[%d]: Bad value for AP bits: 0x%" | ||
451 | + PRIx32 "\n", n, ap); | ||
452 | + } | ||
453 | + } | ||
454 | + | ||
455 | + /* execute never */ | ||
456 | + if (xn) { | ||
457 | + *prot &= ~PAGE_EXEC; | ||
458 | + } | ||
459 | + } | ||
460 | + } | ||
461 | + | ||
462 | + fi->type = ARMFault_Permission; | ||
463 | + fi->level = 1; | ||
464 | + return !(*prot & (1 << access_type)); | ||
465 | +} | ||
466 | + | ||
467 | /** | ||
468 | * get_phys_addr - get the physical address for this virtual address | ||
469 | * | ||
470 | -- | 91 | -- |
471 | 2.25.1 | 92 | 2.34.1 | diff view generated by jsdifflib |