1
The following changes since commit 6d940eff4734bcb40b1a25f62d7cec5a396f994a:
1
Hi; this pullreq contains only my FEAT_AFP/FEAT_RPRES patches
2
(plus a fix for a target/alpha latent bug that would otherwise
3
be revealed by the fpu changes), because 68 patches is already
4
longer than I prefer to send in at one time...
2
5
3
Merge tag 'pull-tpm-2022-06-07-1' of https://github.com/stefanberger/qemu-tpm into staging (2022-06-07 19:22:18 -0700)
6
thanks
7
-- PMM
8
9
The following changes since commit ffaf7f0376f8040ce9068d71ae9ae8722505c42e:
10
11
Merge tag 'pull-10.0-testing-and-gdstub-updates-100225-1' of https://gitlab.com/stsquad/qemu into staging (2025-02-10 13:26:17 -0500)
4
12
5
are available in the Git repository at:
13
are available in the Git repository at:
6
14
7
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20220609
15
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20250211
8
16
9
for you to fetch changes up to 414c54d515dba16bfaef643a8acec200c05f229a:
17
for you to fetch changes up to ca4c34e07d1388df8e396520b5e7d60883cd3690:
10
18
11
target/arm: Add ID_AA64SMFR0_EL1 (2022-06-08 19:38:59 +0100)
19
target/arm: Sink fp_status and fpcr access into do_fmlal* (2025-02-11 16:22:08 +0000)
12
20
13
----------------------------------------------------------------
21
----------------------------------------------------------------
14
target-arm queue:
22
target-arm queue:
15
* target/arm: Declare support for FEAT_RASv1p1
23
* target/alpha: Don't corrupt error_code with unknown softfloat flags
16
* target/arm: Implement FEAT_DoubleFault
24
* target/arm: Implement FEAT_AFP and FEAT_RPRES
17
* Fix 'writeable' typos
18
* xlnx_dp: Implement vblank interrupt
19
* target/arm: Move page-table-walk code to ptw.c
20
* target/arm: Preparatory patches for SME support
21
25
22
----------------------------------------------------------------
26
----------------------------------------------------------------
23
Frederic Konrad (2):
27
Peter Maydell (49):
24
xlnx_dp: fix the wrong register size
28
target/alpha: Don't corrupt error_code with unknown softfloat flags
25
xlnx-zynqmp: fix the irq mapping for the display port and its dma
29
fpu: Add float_class_denormal
30
fpu: Implement float_flag_input_denormal_used
31
fpu: allow flushing of output denormals to be after rounding
32
target/arm: Define FPCR AH, FIZ, NEP bits
33
target/arm: Implement FPCR.FIZ handling
34
target/arm: Adjust FP behaviour for FPCR.AH = 1
35
target/arm: Adjust exception flag handling for AH = 1
36
target/arm: Add FPCR.AH to tbflags
37
target/arm: Set up float_status to use for FPCR.AH=1 behaviour
38
target/arm: Use FPST_FPCR_AH for FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS
39
target/arm: Use FPST_FPCR_AH for BFCVT* insns
40
target/arm: Use FPST_FPCR_AH for BFMLAL*, BFMLSL* insns
41
target/arm: Add FPCR.NEP to TBFLAGS
42
target/arm: Define and use new write_fp_*reg_merging() functions
43
target/arm: Handle FPCR.NEP for 3-input scalar operations
44
target/arm: Handle FPCR.NEP for BFCVT scalar
45
target/arm: Handle FPCR.NEP for 1-input scalar operations
46
target/arm: Handle FPCR.NEP in do_cvtf_scalar()
47
target/arm: Handle FPCR.NEP for scalar FABS and FNEG
48
target/arm: Handle FPCR.NEP for FCVTXN (scalar)
49
target/arm: Handle FPCR.NEP for NEP for FMUL, FMULX scalar by element
50
target/arm: Implement FPCR.AH semantics for scalar FMIN/FMAX
51
target/arm: Implement FPCR.AH semantics for vector FMIN/FMAX
52
target/arm: Implement FPCR.AH semantics for FMAXV and FMINV
53
target/arm: Implement FPCR.AH semantics for FMINP and FMAXP
54
target/arm: Implement FPCR.AH semantics for SVE FMAXV and FMINV
55
target/arm: Implement FPCR.AH semantics for SVE FMIN/FMAX immediate
56
target/arm: Implement FPCR.AH semantics for SVE FMIN/FMAX vector
57
target/arm: Implement FPCR.AH handling of negation of NaN
58
target/arm: Implement FPCR.AH handling for scalar FABS and FABD
59
target/arm: Handle FPCR.AH in vector FABD
60
target/arm: Handle FPCR.AH in SVE FNEG
61
target/arm: Handle FPCR.AH in SVE FABS
62
target/arm: Handle FPCR.AH in SVE FABD
63
target/arm: Handle FPCR.AH in negation steps in SVE FCADD
64
target/arm: Handle FPCR.AH in negation steps in FCADD
65
target/arm: Handle FPCR.AH in FRECPS and FRSQRTS scalar insns
66
target/arm: Handle FPCR.AH in FRECPS and FRSQRTS vector insns
67
target/arm: Handle FPCR.AH in negation step in FMLS (indexed)
68
target/arm: Handle FPCR.AH in negation in FMLS (vector)
69
target/arm: Handle FPCR.AH in negation step in SVE FMLS (vector)
70
target/arm: Handle FPCR.AH in SVE FTSSEL
71
target/arm: Handle FPCR.AH in SVE FTMAD
72
target/arm: Enable FEAT_AFP for '-cpu max'
73
target/arm: Plumb FEAT_RPRES frecpe and frsqrte through to new helper
74
target/arm: Implement increased precision FRECPE
75
target/arm: Implement increased precision FRSQRTE
76
target/arm: Enable FEAT_RPRES for -cpu max
26
77
27
Peter Maydell (3):
78
Richard Henderson (19):
28
target/arm: Declare support for FEAT_RASv1p1
79
target/arm: Handle FPCR.AH in vector FCMLA
29
target/arm: Implement FEAT_DoubleFault
80
target/arm: Handle FPCR.AH in FCMLA by index
30
Fix 'writeable' typos
81
target/arm: Handle FPCR.AH in SVE FCMLA
82
target/arm: Handle FPCR.AH in FMLSL (by element and vector)
83
target/arm: Handle FPCR.AH in SVE FMLSL (indexed)
84
target/arm: Handle FPCR.AH in SVE FMLSLB, FMLSLT (vectors)
85
target/arm: Introduce CPUARMState.vfp.fp_status[]
86
target/arm: Remove standard_fp_status_f16
87
target/arm: Remove standard_fp_status
88
target/arm: Remove ah_fp_status_f16
89
target/arm: Remove ah_fp_status
90
target/arm: Remove fp_status_f16_a64
91
target/arm: Remove fp_status_f16_a32
92
target/arm: Remove fp_status_a64
93
target/arm: Remove fp_status_a32
94
target/arm: Simplify fp_status indexing in mve_helper.c
95
target/arm: Simplify DO_VFP_cmp in vfp_helper.c
96
target/arm: Read fz16 from env->vfp.fpcr
97
target/arm: Sink fp_status and fpcr access into do_fmlal*
31
98
32
Richard Henderson (48):
99
docs/system/arm/emulation.rst | 2 +
33
target/arm: Move stage_1_mmu_idx decl to internals.h
100
include/fpu/softfloat-helpers.h | 11 +
34
target/arm: Move get_phys_addr to ptw.c
101
include/fpu/softfloat-types.h | 25 ++
35
target/arm: Move get_phys_addr_v5 to ptw.c
102
target/arm/cpu-features.h | 10 +
36
target/arm: Move get_phys_addr_v6 to ptw.c
103
target/arm/cpu.h | 97 +++--
37
target/arm: Move get_phys_addr_pmsav5 to ptw.c
104
target/arm/helper.h | 26 ++
38
target/arm: Move get_phys_addr_pmsav7_default to ptw.c
105
target/arm/internals.h | 6 +
39
target/arm: Move get_phys_addr_pmsav7 to ptw.c
106
target/arm/tcg/helper-a64.h | 13 +
40
target/arm: Move get_phys_addr_pmsav8 to ptw.c
107
target/arm/tcg/helper-sve.h | 120 ++++++
41
target/arm: Move pmsav8_mpu_lookup to ptw.c
108
target/arm/tcg/translate-a64.h | 13 +
42
target/arm: Move pmsav7_use_background_region to ptw.c
109
target/arm/tcg/translate.h | 54 +--
43
target/arm: Move v8m_security_lookup to ptw.c
110
target/arm/tcg/vec_internal.h | 35 ++
44
target/arm: Move m_is_{ppb,system}_region to ptw.c
111
target/mips/fpu_helper.h | 6 +
45
target/arm: Move get_level1_table_address to ptw.c
112
fpu/softfloat.c | 66 +++-
46
target/arm: Move combine_cacheattrs and subroutines to ptw.c
113
target/alpha/cpu.c | 7 +
47
target/arm: Move get_phys_addr_lpae to ptw.c
114
target/alpha/fpu_helper.c | 2 +
48
target/arm: Move arm_{ldl,ldq}_ptw to ptw.c
115
target/arm/cpu.c | 46 +--
49
target/arm: Move {arm_s1_, }regime_using_lpae_format to tlb_helper.c
116
target/arm/helper.c | 2 +-
50
target/arm: Move arm_pamax, pamax_map into ptw.c
117
target/arm/tcg/cpu64.c | 2 +
51
target/arm: Move get_S1prot, get_S2prot to ptw.c
118
target/arm/tcg/helper-a64.c | 151 ++++----
52
target/arm: Move check_s2_mmu_setup to ptw.c
119
target/arm/tcg/hflags.c | 13 +
53
target/arm: Move aa32_va_parameters to ptw.c
120
target/arm/tcg/mve_helper.c | 44 +--
54
target/arm: Move ap_to_tw_prot etc to ptw.c
121
target/arm/tcg/sme_helper.c | 4 +-
55
target/arm: Move regime_is_user to ptw.c
122
target/arm/tcg/sve_helper.c | 367 ++++++++++++++-----
56
target/arm: Move regime_ttbr to ptw.c
123
target/arm/tcg/translate-a64.c | 782 ++++++++++++++++++++++++++++++++--------
57
target/arm: Move regime_translation_disabled to ptw.c
124
target/arm/tcg/translate-sve.c | 193 +++++++---
58
target/arm: Move arm_cpu_get_phys_page_attrs_debug to ptw.c
125
target/arm/tcg/vec_helper.c | 387 ++++++++++++++------
59
target/arm: Move stage_1_mmu_idx, arm_stage1_mmu_idx to ptw.c
126
target/arm/vfp_helper.c | 374 +++++++++++++++----
60
target/arm: Pass CPUARMState to arm_ld[lq]_ptw
127
target/hppa/fpu_helper.c | 11 +
61
target/arm: Rename TBFLAG_A64 ZCR_LEN to VL
128
target/i386/tcg/fpu_helper.c | 8 +
62
linux-user/aarch64: Introduce sve_vq
129
target/mips/msa.c | 9 +
63
target/arm: Remove route_to_el2 check from sve_exception_el
130
target/ppc/cpu_init.c | 3 +
64
target/arm: Remove fp checks from sve_exception_el
131
target/rx/cpu.c | 8 +
65
target/arm: Add el_is_in_host
132
target/sh4/cpu.c | 8 +
66
target/arm: Use el_is_in_host for sve_zcr_len_for_el
133
target/tricore/helper.c | 1 +
67
target/arm: Use el_is_in_host for sve_exception_el
134
tests/fp/fp-bench.c | 1 +
68
target/arm: Hoist arm_is_el2_enabled check in sve_exception_el
135
fpu/softfloat-parts.c.inc | 127 +++++--
69
target/arm: Do not use aarch64_sve_zcr_get_valid_len in reset
136
37 files changed, 2325 insertions(+), 709 deletions(-)
70
target/arm: Merge aarch64_sve_zcr_get_valid_len into caller
71
target/arm: Use uint32_t instead of bitmap for sve vq's
72
target/arm: Rename sve_zcr_len_for_el to sve_vqm1_for_el
73
target/arm: Split out load/store primitives to sve_ldst_internal.h
74
target/arm: Export sve contiguous ldst support functions
75
target/arm: Move expand_pred_b to vec_internal.h
76
target/arm: Use expand_pred_b in mve_helper.c
77
target/arm: Move expand_pred_h to vec_internal.h
78
target/arm: Export bfdotadd from vec_helper.c
79
target/arm: Add isar_feature_aa64_sme
80
target/arm: Add ID_AA64SMFR0_EL1
81
82
Sai Pavan Boddu (2):
83
xlnx_dp: Introduce a vblank signal
84
xlnx_dp: Fix the interrupt disable logic
85
86
docs/interop/vhost-user.rst | 2 +-
87
docs/specs/vmgenid.txt | 4 +-
88
docs/system/arm/emulation.rst | 2 +
89
hw/scsi/mfi.h | 2 +-
90
include/hw/display/xlnx_dp.h | 12 +-
91
linux-user/aarch64/target_prctl.h | 20 +-
92
target/arm/cpu.h | 66 +-
93
target/arm/internals.h | 45 +-
94
target/arm/kvm_arm.h | 7 +-
95
target/arm/sve_ldst_internal.h | 221 +++
96
target/arm/translate-a64.h | 2 +-
97
target/arm/translate.h | 2 +-
98
target/arm/vec_internal.h | 28 +-
99
target/i386/hvf/vmcs.h | 2 +-
100
target/i386/hvf/vmx.h | 2 +-
101
accel/hvf/hvf-accel-ops.c | 4 +-
102
accel/kvm/kvm-all.c | 4 +-
103
accel/tcg/user-exec.c | 6 +-
104
hw/acpi/ghes.c | 2 +-
105
hw/arm/xlnx-zynqmp.c | 4 +-
106
hw/display/xlnx_dp.c | 49 +-
107
hw/intc/arm_gicv3_cpuif.c | 2 +-
108
hw/intc/arm_gicv3_dist.c | 2 +-
109
hw/intc/arm_gicv3_redist.c | 4 +-
110
hw/intc/riscv_aclint.c | 2 +-
111
hw/intc/riscv_aplic.c | 2 +-
112
hw/pci/shpc.c | 2 +-
113
hw/sparc64/sun4u_iommu.c | 2 +-
114
hw/timer/sse-timer.c | 2 +-
115
linux-user/aarch64/signal.c | 4 +-
116
target/arm/arch_dump.c | 2 +-
117
target/arm/cpu.c | 5 +-
118
target/arm/cpu64.c | 120 +-
119
target/arm/gdbstub.c | 2 +-
120
target/arm/gdbstub64.c | 2 +-
121
target/arm/helper.c | 2742 ++-----------------------------------
122
target/arm/hvf/hvf.c | 4 +-
123
target/arm/kvm64.c | 47 +-
124
target/arm/mve_helper.c | 6 +-
125
target/arm/ptw.c | 2540 ++++++++++++++++++++++++++++++++++
126
target/arm/sve_helper.c | 232 +---
127
target/arm/tlb_helper.c | 26 +
128
target/arm/translate-a64.c | 2 +-
129
target/arm/translate-sve.c | 2 +-
130
target/arm/vec_helper.c | 28 +-
131
target/i386/cpu-sysemu.c | 2 +-
132
target/s390x/ioinst.c | 2 +-
133
python/qemu/machine/machine.py | 2 +-
134
target/arm/meson.build | 1 +
135
tests/tcg/x86_64/system/boot.S | 2 +-
136
50 files changed, 3240 insertions(+), 3037 deletions(-)
137
create mode 100644 target/arm/sve_ldst_internal.h
138
create mode 100644 target/arm/ptw.c
diff view generated by jsdifflib
New patch
1
In do_cvttq() we set env->error_code with what is supposed to be a
2
set of FPCR exception bit values. However, if the set of float
3
exception flags we get back from softfloat for the conversion
4
includes a flag which is not one of the three we expect here
5
(invalid_cvti, invalid, inexact) then we will fall through the
6
if-ladder and set env->error_code to the unconverted softfloat
7
exception_flag value. This will then cause us to take a spurious
8
exception.
1
9
10
This is harmless now, but when we add new floating point exception
11
flags to softfloat it will cause problems. Add an else clause to the
12
if-ladder to make it ignore any float exception flags it doesn't care
13
about.
14
15
Specifically, without this fix, 'make check-tcg' will fail for Alpha
16
when the commit adding float_flag_input_denormal_used lands.
17
18
19
Fixes: aa3bad5b59e7 ("target/alpha: Use float64_to_int64_modulo for CVTTQ")
20
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
21
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
22
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
23
---
24
target/alpha/fpu_helper.c | 2 ++
25
1 file changed, 2 insertions(+)
26
27
diff --git a/target/alpha/fpu_helper.c b/target/alpha/fpu_helper.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/alpha/fpu_helper.c
30
+++ b/target/alpha/fpu_helper.c
31
@@ -XXX,XX +XXX,XX @@ static uint64_t do_cvttq(CPUAlphaState *env, uint64_t a, int roundmode)
32
exc = FPCR_INV;
33
} else if (exc & float_flag_inexact) {
34
exc = FPCR_INE;
35
+ } else {
36
+ exc = 0;
37
}
38
}
39
env->error_code = exc;
40
--
41
2.34.1
42
43
diff view generated by jsdifflib
1
From: Sai Pavan Boddu <sai.pavan.boddu@xilinx.com>
1
Currently in softfloat we canonicalize input denormals and so the
2
code that implements floating point operations does not need to care
3
whether the input value was originally normal or denormal. However,
4
both x86 and Arm FEAT_AFP require that an exception flag is set if:
5
* an input is denormal
6
* that input is not squashed to zero
7
* that input is actually used in the calculation (e.g. we
8
did not find the other input was a NaN)
2
9
3
Add a periodic timer which raises vblank at a frequency of 30Hz.
10
So we need to track that the input was a non-squashed denormal. To
11
do this we add a new value to the FloatClass enum. In this commit we
12
add the value and adjust the code everywhere that looks at FloatClass
13
values so that the new float_class_denormal behaves identically to
14
float_class_normal. We will add the code that does the "raise a new
15
float exception flag if an input was an unsquashed denormal and we
16
used it" in a subsequent commit.
4
17
5
Note that this is a migration compatibility break for the
18
There should be no behavioural change in this commit.
6
xlnx-zcu102 board type.
7
19
8
Signed-off-by: Sai Pavan Boddu <saipava@xilinx.com>
9
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
10
Signed-off-by: Frederic Konrad <fkonrad@amd.com>
11
Acked-by: Alistair Francis <alistair.francis@wdc.com>
12
Message-id: 20220601172353.3220232-3-fkonrad@xilinx.com
13
Changes by fkonrad:
14
- Switched to transaction-based ptimer API.
15
- Added the DP_INT_VBLNK_START macro.
16
Signed-off-by: Frederic Konrad <fkonrad@amd.com>
17
[PMM: bump vmstate version, add commit message note about
18
compat break]
19
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
20
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
20
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
21
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
21
---
22
---
22
include/hw/display/xlnx_dp.h | 3 +++
23
fpu/softfloat.c | 32 ++++++++++++++++++++++++++++---
23
hw/display/xlnx_dp.c | 30 ++++++++++++++++++++++++++----
24
fpu/softfloat-parts.c.inc | 40 ++++++++++++++++++++++++---------------
24
2 files changed, 29 insertions(+), 4 deletions(-)
25
2 files changed, 54 insertions(+), 18 deletions(-)
25
26
26
diff --git a/include/hw/display/xlnx_dp.h b/include/hw/display/xlnx_dp.h
27
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
27
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
28
--- a/include/hw/display/xlnx_dp.h
29
--- a/fpu/softfloat.c
29
+++ b/include/hw/display/xlnx_dp.h
30
+++ b/fpu/softfloat.c
30
@@ -XXX,XX +XXX,XX @@
31
@@ -XXX,XX +XXX,XX @@ float64_gen2(float64 xa, float64 xb, float_status *s,
31
#include "hw/dma/xlnx_dpdma.h"
32
/*
32
#include "audio/audio.h"
33
* Classify a floating point number. Everything above float_class_qnan
33
#include "qom/object.h"
34
* is a NaN so cls >= float_class_qnan is any NaN.
34
+#include "hw/ptimer.h"
35
+ *
35
36
+ * Note that we canonicalize denormals, so most code should treat
36
#define AUD_CHBUF_MAX_DEPTH (32 * KiB)
37
+ * class_normal and class_denormal identically.
37
#define MAX_QEMU_BUFFER_SIZE (4 * KiB)
38
*/
38
@@ -XXX,XX +XXX,XX @@ struct XlnxDPState {
39
39
*/
40
typedef enum __attribute__ ((__packed__)) {
40
DPCDState *dpcd;
41
float_class_unclassified,
41
I2CDDCState *edid;
42
float_class_zero,
42
+
43
float_class_normal,
43
+ ptimer_state *vblank;
44
+ float_class_denormal, /* input was a non-squashed denormal */
45
float_class_inf,
46
float_class_qnan, /* all NaNs from here */
47
float_class_snan,
48
@@ -XXX,XX +XXX,XX @@ typedef enum __attribute__ ((__packed__)) {
49
enum {
50
float_cmask_zero = float_cmask(float_class_zero),
51
float_cmask_normal = float_cmask(float_class_normal),
52
+ float_cmask_denormal = float_cmask(float_class_denormal),
53
float_cmask_inf = float_cmask(float_class_inf),
54
float_cmask_qnan = float_cmask(float_class_qnan),
55
float_cmask_snan = float_cmask(float_class_snan),
56
57
float_cmask_infzero = float_cmask_zero | float_cmask_inf,
58
float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
59
+ float_cmask_anynorm = float_cmask_normal | float_cmask_denormal,
44
};
60
};
45
61
46
#define TYPE_XLNX_DP "xlnx.v-dp"
62
/* Flags for parts_minmax. */
47
diff --git a/hw/display/xlnx_dp.c b/hw/display/xlnx_dp.c
63
@@ -XXX,XX +XXX,XX @@ static inline __attribute__((unused)) bool is_qnan(FloatClass c)
48
index XXXXXXX..XXXXXXX 100644
64
return c == float_class_qnan;
49
--- a/hw/display/xlnx_dp.c
50
+++ b/hw/display/xlnx_dp.c
51
@@ -XXX,XX +XXX,XX @@
52
#define DP_TX_N_AUD (0x032C >> 2)
53
#define DP_TX_AUDIO_EXT_DATA(n) ((0x0330 + 4 * n) >> 2)
54
#define DP_INT_STATUS (0x03A0 >> 2)
55
+#define DP_INT_VBLNK_START (1 << 13)
56
#define DP_INT_MASK (0x03A4 >> 2)
57
#define DP_INT_EN (0x03A8 >> 2)
58
#define DP_INT_DS (0x03AC >> 2)
59
@@ -XXX,XX +XXX,XX @@ typedef enum DPVideoFmt DPVideoFmt;
60
61
static const VMStateDescription vmstate_dp = {
62
.name = TYPE_XLNX_DP,
63
- .version_id = 1,
64
+ .version_id = 2,
65
.fields = (VMStateField[]){
66
VMSTATE_UINT32_ARRAY(core_registers, XlnxDPState,
67
DP_CORE_REG_ARRAY_SIZE),
68
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_dp = {
69
DP_VBLEND_REG_ARRAY_SIZE),
70
VMSTATE_UINT32_ARRAY(audio_registers, XlnxDPState,
71
DP_AUDIO_REG_ARRAY_SIZE),
72
+ VMSTATE_PTIMER(vblank, XlnxDPState),
73
VMSTATE_END_OF_LIST()
74
}
75
};
76
77
+#define DP_VBLANK_PTIMER_POLICY (PTIMER_POLICY_WRAP_AFTER_ONE_PERIOD | \
78
+ PTIMER_POLICY_CONTINUOUS_TRIGGER | \
79
+ PTIMER_POLICY_NO_IMMEDIATE_TRIGGER)
80
+
81
static void xlnx_dp_update_irq(XlnxDPState *s);
82
83
static uint64_t xlnx_dp_audio_read(void *opaque, hwaddr offset, unsigned size)
84
@@ -XXX,XX +XXX,XX @@ static void xlnx_dp_write(void *opaque, hwaddr offset, uint64_t value,
85
break;
86
case DP_TRANSMITTER_ENABLE:
87
s->core_registers[offset] = value & 0x01;
88
+ ptimer_transaction_begin(s->vblank);
89
+ if (value & 0x1) {
90
+ ptimer_run(s->vblank, 0);
91
+ } else {
92
+ ptimer_stop(s->vblank);
93
+ }
94
+ ptimer_transaction_commit(s->vblank);
95
break;
96
case DP_FORCE_SCRAMBLER_RESET:
97
/*
98
@@ -XXX,XX +XXX,XX @@ static void xlnx_dp_update_display(void *opaque)
99
return;
100
}
101
102
- s->core_registers[DP_INT_STATUS] |= (1 << 13);
103
- xlnx_dp_update_irq(s);
104
-
105
xlnx_dpdma_trigger_vsync_irq(s->dpdma);
106
107
/*
108
@@ -XXX,XX +XXX,XX @@ static void xlnx_dp_finalize(Object *obj)
109
fifo8_destroy(&s->rx_fifo);
110
}
65
}
111
66
112
+static void vblank_hit(void *opaque)
67
+/*
68
+ * Return true if the float_cmask has only normals in it
69
+ * (including input denormals that were canonicalized)
70
+ */
71
+static inline bool cmask_is_only_normals(int cmask)
113
+{
72
+{
114
+ XlnxDPState *s = XLNX_DP(opaque);
73
+ return !(cmask & ~float_cmask_anynorm);
115
+
116
+ s->core_registers[DP_INT_STATUS] |= DP_INT_VBLNK_START;
117
+ xlnx_dp_update_irq(s);
118
+}
74
+}
119
+
75
+
120
static void xlnx_dp_realize(DeviceState *dev, Error **errp)
76
+static inline bool is_anynorm(FloatClass c)
77
+{
78
+ return float_cmask(c) & float_cmask_anynorm;
79
+}
80
+
81
/*
82
* Structure holding all of the decomposed parts of a float.
83
* The exponent is unbiased and the fraction is normalized.
84
@@ -XXX,XX +XXX,XX @@ static float64 float64r32_round_pack_canonical(FloatParts64 *p,
85
*/
86
switch (p->cls) {
87
case float_class_normal:
88
+ case float_class_denormal:
89
if (unlikely(p->exp == 0)) {
90
/*
91
* The result is denormal for float32, but can be represented
92
@@ -XXX,XX +XXX,XX @@ static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
93
94
switch (p->cls) {
95
case float_class_normal:
96
+ case float_class_denormal:
97
if (s->floatx80_rounding_precision == floatx80_precision_x) {
98
parts_uncanon_normal(p, s, fmt);
99
frac = p->frac_hi;
100
@@ -XXX,XX +XXX,XX @@ static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
101
break;
102
103
case float_class_normal:
104
+ case float_class_denormal:
105
case float_class_zero:
106
break;
107
108
@@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
109
a->sign = b->sign;
110
a->exp = b->exp;
111
112
- if (a->cls == float_class_normal) {
113
+ if (is_anynorm(a->cls)) {
114
frac_truncjam(a, b);
115
} else if (is_nan(a->cls)) {
116
/* Discard the low bits of the NaN. */
117
@@ -XXX,XX +XXX,XX @@ static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
118
return int128_zero();
119
120
case float_class_normal:
121
+ case float_class_denormal:
122
if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
123
flags = float_flag_inexact;
124
}
125
@@ -XXX,XX +XXX,XX @@ static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
126
return int128_zero();
127
128
case float_class_normal:
129
+ case float_class_denormal:
130
if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
131
flags = float_flag_inexact;
132
if (p.cls == float_class_zero) {
133
@@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status)
134
float32_unpack_canonical(&xp, a, status);
135
if (unlikely(xp.cls != float_class_normal)) {
136
switch (xp.cls) {
137
+ case float_class_denormal:
138
+ break;
139
case float_class_snan:
140
case float_class_qnan:
141
parts_return_nan(&xp, status);
142
@@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status)
143
case float_class_zero:
144
return float32_one;
145
default:
146
- break;
147
+ g_assert_not_reached();
148
}
149
- g_assert_not_reached();
150
}
151
152
float_raise(float_flag_inexact, status);
153
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
154
index XXXXXXX..XXXXXXX 100644
155
--- a/fpu/softfloat-parts.c.inc
156
+++ b/fpu/softfloat-parts.c.inc
157
@@ -XXX,XX +XXX,XX @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
158
frac_clear(p);
159
} else {
160
int shift = frac_normalize(p);
161
- p->cls = float_class_normal;
162
+ p->cls = float_class_denormal;
163
p->exp = fmt->frac_shift - fmt->exp_bias
164
- shift + !fmt->m68k_denormal;
165
}
166
@@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
167
static void partsN(uncanon)(FloatPartsN *p, float_status *s,
168
const FloatFmt *fmt)
121
{
169
{
122
XlnxDPState *s = XLNX_DP(dev);
170
- if (likely(p->cls == float_class_normal)) {
123
@@ -XXX,XX +XXX,XX @@ static void xlnx_dp_realize(DeviceState *dev, Error **errp)
171
+ if (likely(is_anynorm(p->cls))) {
124
&as);
172
parts_uncanon_normal(p, s, fmt);
125
AUD_set_volume_out(s->amixer_output_stream, 0, 255, 255);
173
} else {
126
xlnx_dp_audio_activate(s);
174
switch (p->cls) {
127
+ s->vblank = ptimer_init(vblank_hit, s, DP_VBLANK_PTIMER_POLICY);
175
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
128
+ ptimer_transaction_begin(s->vblank);
176
129
+ ptimer_set_freq(s->vblank, 30);
177
if (a->sign != b_sign) {
130
+ ptimer_transaction_commit(s->vblank);
178
/* Subtraction */
131
}
179
- if (likely(ab_mask == float_cmask_normal)) {
132
180
+ if (likely(cmask_is_only_normals(ab_mask))) {
133
static void xlnx_dp_reset(DeviceState *dev)
181
if (parts_sub_normal(a, b)) {
182
return a;
183
}
184
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
185
}
186
} else {
187
/* Addition */
188
- if (likely(ab_mask == float_cmask_normal)) {
189
+ if (likely(cmask_is_only_normals(ab_mask))) {
190
parts_add_normal(a, b);
191
return a;
192
}
193
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
194
}
195
196
if (b->cls == float_class_zero) {
197
- g_assert(a->cls == float_class_normal);
198
+ g_assert(is_anynorm(a->cls));
199
return a;
200
}
201
202
g_assert(a->cls == float_class_zero);
203
- g_assert(b->cls == float_class_normal);
204
+ g_assert(is_anynorm(b->cls));
205
return_b:
206
b->sign = b_sign;
207
return b;
208
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
209
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
210
bool sign = a->sign ^ b->sign;
211
212
- if (likely(ab_mask == float_cmask_normal)) {
213
+ if (likely(cmask_is_only_normals(ab_mask))) {
214
FloatPartsW tmp;
215
216
frac_mulw(&tmp, a, b);
217
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
218
a->sign ^= 1;
219
}
220
221
- if (unlikely(ab_mask != float_cmask_normal)) {
222
+ if (unlikely(!cmask_is_only_normals(ab_mask))) {
223
if (unlikely(ab_mask == float_cmask_infzero)) {
224
float_raise(float_flag_invalid | float_flag_invalid_imz, s);
225
goto d_nan;
226
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
227
}
228
229
g_assert(ab_mask & float_cmask_zero);
230
- if (c->cls == float_class_normal) {
231
+ if (is_anynorm(c->cls)) {
232
*a = *c;
233
goto return_normal;
234
}
235
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
236
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
237
bool sign = a->sign ^ b->sign;
238
239
- if (likely(ab_mask == float_cmask_normal)) {
240
+ if (likely(cmask_is_only_normals(ab_mask))) {
241
a->sign = sign;
242
a->exp -= b->exp + frac_div(a, b);
243
return a;
244
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
245
{
246
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
247
248
- if (likely(ab_mask == float_cmask_normal)) {
249
+ if (likely(cmask_is_only_normals(ab_mask))) {
250
frac_modrem(a, b, mod_quot);
251
return a;
252
}
253
@@ -XXX,XX +XXX,XX @@ static void partsN(sqrt)(FloatPartsN *a, float_status *status,
254
255
if (unlikely(a->cls != float_class_normal)) {
256
switch (a->cls) {
257
+ case float_class_denormal:
258
+ break;
259
case float_class_snan:
260
case float_class_qnan:
261
parts_return_nan(a, status);
262
@@ -XXX,XX +XXX,XX @@ static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
263
case float_class_inf:
264
break;
265
case float_class_normal:
266
+ case float_class_denormal:
267
if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
268
float_raise(float_flag_inexact, s);
269
}
270
@@ -XXX,XX +XXX,XX @@ static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
271
return 0;
272
273
case float_class_normal:
274
+ case float_class_denormal:
275
/* TODO: N - 2 is frac_size for rounding; could use input fmt. */
276
if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
277
flags = float_flag_inexact;
278
@@ -XXX,XX +XXX,XX @@ static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
279
return 0;
280
281
case float_class_normal:
282
+ case float_class_denormal:
283
/* TODO: N - 2 is frac_size for rounding; could use input fmt. */
284
if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
285
flags = float_flag_inexact;
286
@@ -XXX,XX +XXX,XX @@ static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p,
287
return 0;
288
289
case float_class_normal:
290
+ case float_class_denormal:
291
/* TODO: N - 2 is frac_size for rounding; could use input fmt. */
292
if (parts_round_to_int_normal(p, rmode, 0, N - 2)) {
293
flags = float_flag_inexact;
294
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
295
a_exp = a->exp;
296
b_exp = b->exp;
297
298
- if (unlikely(ab_mask != float_cmask_normal)) {
299
+ if (unlikely(!cmask_is_only_normals(ab_mask))) {
300
switch (a->cls) {
301
case float_class_normal:
302
+ case float_class_denormal:
303
break;
304
case float_class_inf:
305
a_exp = INT16_MAX;
306
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
307
}
308
switch (b->cls) {
309
case float_class_normal:
310
+ case float_class_denormal:
311
break;
312
case float_class_inf:
313
b_exp = INT16_MAX;
314
@@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
315
{
316
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
317
318
- if (likely(ab_mask == float_cmask_normal)) {
319
+ if (likely(cmask_is_only_normals(ab_mask))) {
320
FloatRelation cmp;
321
322
if (a->sign != b->sign) {
323
@@ -XXX,XX +XXX,XX @@ static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
324
case float_class_inf:
325
break;
326
case float_class_normal:
327
+ case float_class_denormal:
328
a->exp += MIN(MAX(n, -0x10000), 0x10000);
329
break;
330
default:
331
@@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
332
333
if (unlikely(a->cls != float_class_normal)) {
334
switch (a->cls) {
335
+ case float_class_denormal:
336
+ break;
337
case float_class_snan:
338
case float_class_qnan:
339
parts_return_nan(a, s);
340
@@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
341
}
342
return;
343
default:
344
- break;
345
+ g_assert_not_reached();
346
}
347
- g_assert_not_reached();
348
}
349
if (unlikely(a->sign)) {
350
goto d_nan;
134
--
351
--
135
2.25.1
352
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
For the x86 and the Arm FEAT_AFP semantics, we need to be able to
2
tell the target code that the FPU operation has used an input
3
denormal. Implement this; when it happens we set the new
4
float_flag_denormal_input_used.
2
5
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Note that we only set this when an input denormal is actually used by
4
Message-id: 20220604040607.269301-16-richard.henderson@linaro.org
7
the operation: if the operation results in Invalid Operation or
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Divide By Zero or the result is a NaN because some other input was a
9
NaN then we never needed to look at the input denormal and do not set
10
denormal_input_used.
11
12
We mostly do not need to adjust the hardfloat codepaths to deal with
13
this flag, because almost all hardfloat operations are already gated
14
on the input not being a denormal, and will fall back to softfloat
15
for a denormal input. The only exception is the comparison
16
operations, where we need to add the check for input denormals, which
17
must now fall back to softfloat where they did not before.
18
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
19
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
20
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
21
---
8
target/arm/ptw.h | 10 ++
22
include/fpu/softfloat-types.h | 7 ++++
9
target/arm/helper.c | 416 +-------------------------------------------
23
fpu/softfloat.c | 38 +++++++++++++++++---
10
target/arm/ptw.c | 411 +++++++++++++++++++++++++++++++++++++++++++
24
fpu/softfloat-parts.c.inc | 68 ++++++++++++++++++++++++++++++++++-
11
3 files changed, 429 insertions(+), 408 deletions(-)
25
3 files changed, 107 insertions(+), 6 deletions(-)
12
26
13
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
27
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
14
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/ptw.h
29
--- a/include/fpu/softfloat-types.h
16
+++ b/target/arm/ptw.h
30
+++ b/include/fpu/softfloat-types.h
17
@@ -XXX,XX +XXX,XX @@
31
@@ -XXX,XX +XXX,XX @@ enum {
18
32
float_flag_invalid_sqrt = 0x0800, /* sqrt(-x) */
19
#ifndef CONFIG_USER_ONLY
33
float_flag_invalid_cvti = 0x1000, /* non-nan to integer */
20
34
float_flag_invalid_snan = 0x2000, /* any operand was snan */
21
+extern const uint8_t pamax_map[7];
35
+ /*
22
+
36
+ * An input was denormal and we used it (without flushing it to zero).
23
uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure,
37
+ * Not set if we do not actually use the denormal input (e.g.
24
ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi);
38
+ * because some other input was a NaN, or because the operation
25
uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure,
39
+ * wasn't actually carried out (divide-by-zero; invalid))
26
@@ -XXX,XX +XXX,XX @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap)
40
+ */
27
return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx));
41
+ float_flag_input_denormal_used = 0x4000,
42
};
43
44
/*
45
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
46
index XXXXXXX..XXXXXXX 100644
47
--- a/fpu/softfloat.c
48
+++ b/fpu/softfloat.c
49
@@ -XXX,XX +XXX,XX @@ static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
50
float16_params_ahp.frac_size + 1);
51
break;
52
53
- case float_class_normal:
54
case float_class_denormal:
55
+ float_raise(float_flag_input_denormal_used, s);
56
+ break;
57
+ case float_class_normal:
58
case float_class_zero:
59
break;
60
61
@@ -XXX,XX +XXX,XX @@ static void parts64_float_to_float(FloatParts64 *a, float_status *s)
62
if (is_nan(a->cls)) {
63
parts_return_nan(a, s);
64
}
65
+ if (a->cls == float_class_denormal) {
66
+ float_raise(float_flag_input_denormal_used, s);
67
+ }
28
}
68
}
29
69
30
+ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
70
static void parts128_float_to_float(FloatParts128 *a, float_status *s)
31
+ ARMMMUIdx mmu_idx);
71
@@ -XXX,XX +XXX,XX @@ static void parts128_float_to_float(FloatParts128 *a, float_status *s)
32
+bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level,
72
if (is_nan(a->cls)) {
33
+ int inputsize, int stride, int outputsize);
73
parts_return_nan(a, s);
34
+int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0);
74
}
35
+int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64,
75
+ if (a->cls == float_class_denormal) {
36
+ int ap, int ns, int xn, int pxn);
76
+ float_raise(float_flag_input_denormal_used, s);
37
+
77
+ }
38
bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
78
}
39
MMUAccessType access_type, ARMMMUIdx mmu_idx,
79
40
bool s1_is_el0,
80
#define parts_float_to_float(P, S) \
41
diff --git a/target/arm/helper.c b/target/arm/helper.c
81
@@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
82
a->sign = b->sign;
83
a->exp = b->exp;
84
85
- if (is_anynorm(a->cls)) {
86
+ switch (a->cls) {
87
+ case float_class_denormal:
88
+ float_raise(float_flag_input_denormal_used, s);
89
+ /* fall through */
90
+ case float_class_normal:
91
frac_truncjam(a, b);
92
- } else if (is_nan(a->cls)) {
93
+ break;
94
+ case float_class_snan:
95
+ case float_class_qnan:
96
/* Discard the low bits of the NaN. */
97
a->frac = b->frac_hi;
98
parts_return_nan(a, s);
99
+ break;
100
+ default:
101
+ break;
102
}
103
}
104
105
@@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
106
if (is_nan(a->cls)) {
107
parts_return_nan(a, s);
108
}
109
+ if (a->cls == float_class_denormal) {
110
+ float_raise(float_flag_input_denormal_used, s);
111
+ }
112
}
113
114
float32 float16_to_float32(float16 a, bool ieee, float_status *s)
115
@@ -XXX,XX +XXX,XX @@ float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
116
goto soft;
117
}
118
119
- float32_input_flush2(&ua.s, &ub.s, s);
120
+ if (unlikely(float32_is_denormal(ua.s) || float32_is_denormal(ub.s))) {
121
+ /* We may need to set the input_denormal_used flag */
122
+ goto soft;
123
+ }
124
+
125
if (isgreaterequal(ua.h, ub.h)) {
126
if (isgreater(ua.h, ub.h)) {
127
return float_relation_greater;
128
@@ -XXX,XX +XXX,XX @@ float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
129
goto soft;
130
}
131
132
- float64_input_flush2(&ua.s, &ub.s, s);
133
+ if (unlikely(float64_is_denormal(ua.s) || float64_is_denormal(ub.s))) {
134
+ /* We may need to set the input_denormal_used flag */
135
+ goto soft;
136
+ }
137
+
138
if (isgreaterequal(ua.h, ub.h)) {
139
if (isgreater(ua.h, ub.h)) {
140
return float_relation_greater;
141
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
42
index XXXXXXX..XXXXXXX 100644
142
index XXXXXXX..XXXXXXX 100644
43
--- a/target/arm/helper.c
143
--- a/fpu/softfloat-parts.c.inc
44
+++ b/target/arm/helper.c
144
+++ b/fpu/softfloat-parts.c.inc
45
@@ -XXX,XX +XXX,XX @@ int simple_ap_to_rw_prot_is_user(int ap, bool is_user)
145
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
46
* @xn: XN (execute-never) bits
146
bool b_sign = b->sign ^ subtract;
47
* @s1_is_el0: true if this is S2 of an S1+2 walk for EL0
147
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
48
*/
148
49
-static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0)
149
+ /*
50
+int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0)
150
+ * For addition and subtraction, we will consume an
51
{
151
+ * input denormal unless the other input is a NaN.
52
int prot = 0;
152
+ */
53
153
+ if ((ab_mask & (float_cmask_denormal | float_cmask_anynan)) ==
54
@@ -XXX,XX +XXX,XX @@ static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0)
154
+ float_cmask_denormal) {
55
* @xn: XN (execute-never) bit
155
+ float_raise(float_flag_input_denormal_used, s);
56
* @pxn: PXN (privileged execute-never) bit
156
+ }
57
*/
157
+
58
-static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64,
158
if (a->sign != b_sign) {
59
- int ap, int ns, int xn, int pxn)
159
/* Subtraction */
60
+int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64,
160
if (likely(cmask_is_only_normals(ab_mask))) {
61
+ int ap, int ns, int xn, int pxn)
161
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
62
{
162
if (likely(cmask_is_only_normals(ab_mask))) {
63
bool is_user = regime_is_user(env, mmu_idx);
163
FloatPartsW tmp;
64
int prot_rw, user_rw;
164
65
@@ -XXX,XX +XXX,XX @@ uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure,
165
+ if (ab_mask & float_cmask_denormal) {
66
* Returns true if the suggested S2 translation parameters are OK and
166
+ float_raise(float_flag_input_denormal_used, s);
67
* false otherwise.
68
*/
69
-static bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level,
70
- int inputsize, int stride, int outputsize)
71
+bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level,
72
+ int inputsize, int stride, int outputsize)
73
{
74
const int grainsize = stride + 3;
75
int startsizecheck;
76
@@ -XXX,XX +XXX,XX @@ static bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level,
77
#endif /* !CONFIG_USER_ONLY */
78
79
/* This mapping is common between ID_AA64MMFR0.PARANGE and TCR_ELx.{I}PS. */
80
-static const uint8_t pamax_map[] = {
81
+const uint8_t pamax_map[] = {
82
[0] = 32,
83
[1] = 36,
84
[2] = 40,
85
@@ -XXX,XX +XXX,XX @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
86
}
87
88
#ifndef CONFIG_USER_ONLY
89
-static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
90
- ARMMMUIdx mmu_idx)
91
+ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
92
+ ARMMMUIdx mmu_idx)
93
{
94
uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr;
95
uint32_t el = regime_el(env, mmu_idx);
96
@@ -XXX,XX +XXX,XX @@ static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
97
};
98
}
99
100
-/**
101
- * get_phys_addr_lpae: perform one stage of page table walk, LPAE format
102
- *
103
- * Returns false if the translation was successful. Otherwise, phys_ptr, attrs,
104
- * prot and page_size may not be filled in, and the populated fsr value provides
105
- * information on why the translation aborted, in the format of a long-format
106
- * DFSR/IFSR fault register, with the following caveats:
107
- * * the WnR bit is never set (the caller must do this).
108
- *
109
- * @env: CPUARMState
110
- * @address: virtual address to get physical address for
111
- * @access_type: MMU_DATA_LOAD, MMU_DATA_STORE or MMU_INST_FETCH
112
- * @mmu_idx: MMU index indicating required translation regime
113
- * @s1_is_el0: if @mmu_idx is ARMMMUIdx_Stage2 (so this is a stage 2 page table
114
- * walk), must be true if this is stage 2 of a stage 1+2 walk for an
115
- * EL0 access). If @mmu_idx is anything else, @s1_is_el0 is ignored.
116
- * @phys_ptr: set to the physical address corresponding to the virtual address
117
- * @attrs: set to the memory transaction attributes to use
118
- * @prot: set to the permissions for the page containing phys_ptr
119
- * @page_size_ptr: set to the size of the page containing phys_ptr
120
- * @fi: set to fault info if the translation fails
121
- * @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes
122
- */
123
-bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
124
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
125
- bool s1_is_el0,
126
- hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
127
- target_ulong *page_size_ptr,
128
- ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
129
-{
130
- ARMCPU *cpu = env_archcpu(env);
131
- CPUState *cs = CPU(cpu);
132
- /* Read an LPAE long-descriptor translation table. */
133
- ARMFaultType fault_type = ARMFault_Translation;
134
- uint32_t level;
135
- ARMVAParameters param;
136
- uint64_t ttbr;
137
- hwaddr descaddr, indexmask, indexmask_grainsize;
138
- uint32_t tableattrs;
139
- target_ulong page_size;
140
- uint32_t attrs;
141
- int32_t stride;
142
- int addrsize, inputsize, outputsize;
143
- TCR *tcr = regime_tcr(env, mmu_idx);
144
- int ap, ns, xn, pxn;
145
- uint32_t el = regime_el(env, mmu_idx);
146
- uint64_t descaddrmask;
147
- bool aarch64 = arm_el_is_aa64(env, el);
148
- bool guarded = false;
149
-
150
- /* TODO: This code does not support shareability levels. */
151
- if (aarch64) {
152
- int ps;
153
-
154
- param = aa64_va_parameters(env, address, mmu_idx,
155
- access_type != MMU_INST_FETCH);
156
- level = 0;
157
-
158
- /*
159
- * If TxSZ is programmed to a value larger than the maximum,
160
- * or smaller than the effective minimum, it is IMPLEMENTATION
161
- * DEFINED whether we behave as if the field were programmed
162
- * within bounds, or if a level 0 Translation fault is generated.
163
- *
164
- * With FEAT_LVA, fault on less than minimum becomes required,
165
- * so our choice is to always raise the fault.
166
- */
167
- if (param.tsz_oob) {
168
- fault_type = ARMFault_Translation;
169
- goto do_fault;
170
- }
171
-
172
- addrsize = 64 - 8 * param.tbi;
173
- inputsize = 64 - param.tsz;
174
-
175
- /*
176
- * Bound PS by PARANGE to find the effective output address size.
177
- * ID_AA64MMFR0 is a read-only register so values outside of the
178
- * supported mappings can be considered an implementation error.
179
- */
180
- ps = FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE);
181
- ps = MIN(ps, param.ps);
182
- assert(ps < ARRAY_SIZE(pamax_map));
183
- outputsize = pamax_map[ps];
184
- } else {
185
- param = aa32_va_parameters(env, address, mmu_idx);
186
- level = 1;
187
- addrsize = (mmu_idx == ARMMMUIdx_Stage2 ? 40 : 32);
188
- inputsize = addrsize - param.tsz;
189
- outputsize = 40;
190
- }
191
-
192
- /*
193
- * We determined the region when collecting the parameters, but we
194
- * have not yet validated that the address is valid for the region.
195
- * Extract the top bits and verify that they all match select.
196
- *
197
- * For aa32, if inputsize == addrsize, then we have selected the
198
- * region by exclusion in aa32_va_parameters and there is no more
199
- * validation to do here.
200
- */
201
- if (inputsize < addrsize) {
202
- target_ulong top_bits = sextract64(address, inputsize,
203
- addrsize - inputsize);
204
- if (-top_bits != param.select) {
205
- /* The gap between the two regions is a Translation fault */
206
- fault_type = ARMFault_Translation;
207
- goto do_fault;
208
- }
209
- }
210
-
211
- if (param.using64k) {
212
- stride = 13;
213
- } else if (param.using16k) {
214
- stride = 11;
215
- } else {
216
- stride = 9;
217
- }
218
-
219
- /* Note that QEMU ignores shareability and cacheability attributes,
220
- * so we don't need to do anything with the SH, ORGN, IRGN fields
221
- * in the TTBCR. Similarly, TTBCR:A1 selects whether we get the
222
- * ASID from TTBR0 or TTBR1, but QEMU's TLB doesn't currently
223
- * implement any ASID-like capability so we can ignore it (instead
224
- * we will always flush the TLB any time the ASID is changed).
225
- */
226
- ttbr = regime_ttbr(env, mmu_idx, param.select);
227
-
228
- /* Here we should have set up all the parameters for the translation:
229
- * inputsize, ttbr, epd, stride, tbi
230
- */
231
-
232
- if (param.epd) {
233
- /* Translation table walk disabled => Translation fault on TLB miss
234
- * Note: This is always 0 on 64-bit EL2 and EL3.
235
- */
236
- goto do_fault;
237
- }
238
-
239
- if (mmu_idx != ARMMMUIdx_Stage2 && mmu_idx != ARMMMUIdx_Stage2_S) {
240
- /* The starting level depends on the virtual address size (which can
241
- * be up to 48 bits) and the translation granule size. It indicates
242
- * the number of strides (stride bits at a time) needed to
243
- * consume the bits of the input address. In the pseudocode this is:
244
- * level = 4 - RoundUp((inputsize - grainsize) / stride)
245
- * where their 'inputsize' is our 'inputsize', 'grainsize' is
246
- * our 'stride + 3' and 'stride' is our 'stride'.
247
- * Applying the usual "rounded up m/n is (m+n-1)/n" and simplifying:
248
- * = 4 - (inputsize - stride - 3 + stride - 1) / stride
249
- * = 4 - (inputsize - 4) / stride;
250
- */
251
- level = 4 - (inputsize - 4) / stride;
252
- } else {
253
- /* For stage 2 translations the starting level is specified by the
254
- * VTCR_EL2.SL0 field (whose interpretation depends on the page size)
255
- */
256
- uint32_t sl0 = extract32(tcr->raw_tcr, 6, 2);
257
- uint32_t sl2 = extract64(tcr->raw_tcr, 33, 1);
258
- uint32_t startlevel;
259
- bool ok;
260
-
261
- /* SL2 is RES0 unless DS=1 & 4kb granule. */
262
- if (param.ds && stride == 9 && sl2) {
263
- if (sl0 != 0) {
264
- level = 0;
265
- fault_type = ARMFault_Translation;
266
- goto do_fault;
267
- }
268
- startlevel = -1;
269
- } else if (!aarch64 || stride == 9) {
270
- /* AArch32 or 4KB pages */
271
- startlevel = 2 - sl0;
272
-
273
- if (cpu_isar_feature(aa64_st, cpu)) {
274
- startlevel &= 3;
275
- }
276
- } else {
277
- /* 16KB or 64KB pages */
278
- startlevel = 3 - sl0;
279
- }
280
-
281
- /* Check that the starting level is valid. */
282
- ok = check_s2_mmu_setup(cpu, aarch64, startlevel,
283
- inputsize, stride, outputsize);
284
- if (!ok) {
285
- fault_type = ARMFault_Translation;
286
- goto do_fault;
287
- }
288
- level = startlevel;
289
- }
290
-
291
- indexmask_grainsize = MAKE_64BIT_MASK(0, stride + 3);
292
- indexmask = MAKE_64BIT_MASK(0, inputsize - (stride * (4 - level)));
293
-
294
- /* Now we can extract the actual base address from the TTBR */
295
- descaddr = extract64(ttbr, 0, 48);
296
-
297
- /*
298
- * For FEAT_LPA and PS=6, bits [51:48] of descaddr are in [5:2] of TTBR.
299
- *
300
- * Otherwise, if the base address is out of range, raise AddressSizeFault.
301
- * In the pseudocode, this is !IsZero(baseregister<47:outputsize>),
302
- * but we've just cleared the bits above 47, so simplify the test.
303
- */
304
- if (outputsize > 48) {
305
- descaddr |= extract64(ttbr, 2, 4) << 48;
306
- } else if (descaddr >> outputsize) {
307
- level = 0;
308
- fault_type = ARMFault_AddressSize;
309
- goto do_fault;
310
- }
311
-
312
- /*
313
- * We rely on this masking to clear the RES0 bits at the bottom of the TTBR
314
- * and also to mask out CnP (bit 0) which could validly be non-zero.
315
- */
316
- descaddr &= ~indexmask;
317
-
318
- /*
319
- * For AArch32, the address field in the descriptor goes up to bit 39
320
- * for both v7 and v8. However, for v8 the SBZ bits [47:40] must be 0
321
- * or an AddressSize fault is raised. So for v8 we extract those SBZ
322
- * bits as part of the address, which will be checked via outputsize.
323
- * For AArch64, the address field goes up to bit 47, or 49 with FEAT_LPA2;
324
- * the highest bits of a 52-bit output are placed elsewhere.
325
- */
326
- if (param.ds) {
327
- descaddrmask = MAKE_64BIT_MASK(0, 50);
328
- } else if (arm_feature(env, ARM_FEATURE_V8)) {
329
- descaddrmask = MAKE_64BIT_MASK(0, 48);
330
- } else {
331
- descaddrmask = MAKE_64BIT_MASK(0, 40);
332
- }
333
- descaddrmask &= ~indexmask_grainsize;
334
-
335
- /* Secure accesses start with the page table in secure memory and
336
- * can be downgraded to non-secure at any step. Non-secure accesses
337
- * remain non-secure. We implement this by just ORing in the NSTable/NS
338
- * bits at each step.
339
- */
340
- tableattrs = regime_is_secure(env, mmu_idx) ? 0 : (1 << 4);
341
- for (;;) {
342
- uint64_t descriptor;
343
- bool nstable;
344
-
345
- descaddr |= (address >> (stride * (4 - level))) & indexmask;
346
- descaddr &= ~7ULL;
347
- nstable = extract32(tableattrs, 4, 1);
348
- descriptor = arm_ldq_ptw(cs, descaddr, !nstable, mmu_idx, fi);
349
- if (fi->type != ARMFault_None) {
350
- goto do_fault;
351
- }
352
-
353
- if (!(descriptor & 1) ||
354
- (!(descriptor & 2) && (level == 3))) {
355
- /* Invalid, or the Reserved level 3 encoding */
356
- goto do_fault;
357
- }
358
-
359
- descaddr = descriptor & descaddrmask;
360
-
361
- /*
362
- * For FEAT_LPA and PS=6, bits [51:48] of descaddr are in [15:12]
363
- * of descriptor. For FEAT_LPA2 and effective DS, bits [51:50] of
364
- * descaddr are in [9:8]. Otherwise, if descaddr is out of range,
365
- * raise AddressSizeFault.
366
- */
367
- if (outputsize > 48) {
368
- if (param.ds) {
369
- descaddr |= extract64(descriptor, 8, 2) << 50;
370
- } else {
371
- descaddr |= extract64(descriptor, 12, 4) << 48;
372
- }
373
- } else if (descaddr >> outputsize) {
374
- fault_type = ARMFault_AddressSize;
375
- goto do_fault;
376
- }
377
-
378
- if ((descriptor & 2) && (level < 3)) {
379
- /* Table entry. The top five bits are attributes which may
380
- * propagate down through lower levels of the table (and
381
- * which are all arranged so that 0 means "no effect", so
382
- * we can gather them up by ORing in the bits at each level).
383
- */
384
- tableattrs |= extract64(descriptor, 59, 5);
385
- level++;
386
- indexmask = indexmask_grainsize;
387
- continue;
388
- }
389
- /*
390
- * Block entry at level 1 or 2, or page entry at level 3.
391
- * These are basically the same thing, although the number
392
- * of bits we pull in from the vaddr varies. Note that although
393
- * descaddrmask masks enough of the low bits of the descriptor
394
- * to give a correct page or table address, the address field
395
- * in a block descriptor is smaller; so we need to explicitly
396
- * clear the lower bits here before ORing in the low vaddr bits.
397
- */
398
- page_size = (1ULL << ((stride * (4 - level)) + 3));
399
- descaddr &= ~(page_size - 1);
400
- descaddr |= (address & (page_size - 1));
401
- /* Extract attributes from the descriptor */
402
- attrs = extract64(descriptor, 2, 10)
403
- | (extract64(descriptor, 52, 12) << 10);
404
-
405
- if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) {
406
- /* Stage 2 table descriptors do not include any attribute fields */
407
- break;
408
- }
409
- /* Merge in attributes from table descriptors */
410
- attrs |= nstable << 3; /* NS */
411
- guarded = extract64(descriptor, 50, 1); /* GP */
412
- if (param.hpd) {
413
- /* HPD disables all the table attributes except NSTable. */
414
- break;
415
- }
416
- attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */
417
- /* The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1
418
- * means "force PL1 access only", which means forcing AP[1] to 0.
419
- */
420
- attrs &= ~(extract32(tableattrs, 2, 1) << 4); /* !APT[0] => AP[1] */
421
- attrs |= extract32(tableattrs, 3, 1) << 5; /* APT[1] => AP[2] */
422
- break;
423
- }
424
- /* Here descaddr is the final physical address, and attributes
425
- * are all in attrs.
426
- */
427
- fault_type = ARMFault_AccessFlag;
428
- if ((attrs & (1 << 8)) == 0) {
429
- /* Access flag */
430
- goto do_fault;
431
- }
432
-
433
- ap = extract32(attrs, 4, 2);
434
-
435
- if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) {
436
- ns = mmu_idx == ARMMMUIdx_Stage2;
437
- xn = extract32(attrs, 11, 2);
438
- *prot = get_S2prot(env, ap, xn, s1_is_el0);
439
- } else {
440
- ns = extract32(attrs, 3, 1);
441
- xn = extract32(attrs, 12, 1);
442
- pxn = extract32(attrs, 11, 1);
443
- *prot = get_S1prot(env, mmu_idx, aarch64, ap, ns, xn, pxn);
444
- }
445
-
446
- fault_type = ARMFault_Permission;
447
- if (!(*prot & (1 << access_type))) {
448
- goto do_fault;
449
- }
450
-
451
- if (ns) {
452
- /* The NS bit will (as required by the architecture) have no effect if
453
- * the CPU doesn't support TZ or this is a non-secure translation
454
- * regime, because the attribute will already be non-secure.
455
- */
456
- txattrs->secure = false;
457
- }
458
- /* When in aarch64 mode, and BTI is enabled, remember GP in the IOTLB. */
459
- if (aarch64 && guarded && cpu_isar_feature(aa64_bti, cpu)) {
460
- arm_tlb_bti_gp(txattrs) = true;
461
- }
462
-
463
- if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) {
464
- cacheattrs->is_s2_format = true;
465
- cacheattrs->attrs = extract32(attrs, 0, 4);
466
- } else {
467
- /* Index into MAIR registers for cache attributes */
468
- uint8_t attrindx = extract32(attrs, 0, 3);
469
- uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)];
470
- assert(attrindx <= 7);
471
- cacheattrs->is_s2_format = false;
472
- cacheattrs->attrs = extract64(mair, attrindx * 8, 8);
473
- }
474
-
475
- /*
476
- * For FEAT_LPA2 and effective DS, the SH field in the attributes
477
- * was re-purposed for output address bits. The SH attribute in
478
- * that case comes from TCR_ELx, which we extracted earlier.
479
- */
480
- if (param.ds) {
481
- cacheattrs->shareability = param.sh;
482
- } else {
483
- cacheattrs->shareability = extract32(attrs, 6, 2);
484
- }
485
-
486
- *phys_ptr = descaddr;
487
- *page_size_ptr = page_size;
488
- return false;
489
-
490
-do_fault:
491
- fi->type = fault_type;
492
- fi->level = level;
493
- /* Tag the error as S2 for failed S1 PTW at S2 or ordinary S2. */
494
- fi->stage2 = fi->s1ptw || (mmu_idx == ARMMMUIdx_Stage2 ||
495
- mmu_idx == ARMMMUIdx_Stage2_S);
496
- fi->s1ns = mmu_idx == ARMMMUIdx_Stage2;
497
- return true;
498
-}
499
-
500
hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr,
501
MemTxAttrs *attrs)
502
{
503
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
504
index XXXXXXX..XXXXXXX 100644
505
--- a/target/arm/ptw.c
506
+++ b/target/arm/ptw.c
507
@@ -XXX,XX +XXX,XX @@ do_fault:
508
return true;
509
}
510
511
+/**
512
+ * get_phys_addr_lpae: perform one stage of page table walk, LPAE format
513
+ *
514
+ * Returns false if the translation was successful. Otherwise, phys_ptr,
515
+ * attrs, prot and page_size may not be filled in, and the populated fsr
516
+ * value provides information on why the translation aborted, in the format
517
+ * of a long-format DFSR/IFSR fault register, with the following caveat:
518
+ * the WnR bit is never set (the caller must do this).
519
+ *
520
+ * @env: CPUARMState
521
+ * @address: virtual address to get physical address for
522
+ * @access_type: MMU_DATA_LOAD, MMU_DATA_STORE or MMU_INST_FETCH
523
+ * @mmu_idx: MMU index indicating required translation regime
524
+ * @s1_is_el0: if @mmu_idx is ARMMMUIdx_Stage2 (so this is a stage 2 page
525
+ * table walk), must be true if this is stage 2 of a stage 1+2
526
+ * walk for an EL0 access. If @mmu_idx is anything else,
527
+ * @s1_is_el0 is ignored.
528
+ * @phys_ptr: set to the physical address corresponding to the virtual address
529
+ * @attrs: set to the memory transaction attributes to use
530
+ * @prot: set to the permissions for the page containing phys_ptr
531
+ * @page_size_ptr: set to the size of the page containing phys_ptr
532
+ * @fi: set to fault info if the translation fails
533
+ * @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes
534
+ */
535
+bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
536
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
537
+ bool s1_is_el0,
538
+ hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
539
+ target_ulong *page_size_ptr,
540
+ ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
541
+{
542
+ ARMCPU *cpu = env_archcpu(env);
543
+ CPUState *cs = CPU(cpu);
544
+ /* Read an LPAE long-descriptor translation table. */
545
+ ARMFaultType fault_type = ARMFault_Translation;
546
+ uint32_t level;
547
+ ARMVAParameters param;
548
+ uint64_t ttbr;
549
+ hwaddr descaddr, indexmask, indexmask_grainsize;
550
+ uint32_t tableattrs;
551
+ target_ulong page_size;
552
+ uint32_t attrs;
553
+ int32_t stride;
554
+ int addrsize, inputsize, outputsize;
555
+ TCR *tcr = regime_tcr(env, mmu_idx);
556
+ int ap, ns, xn, pxn;
557
+ uint32_t el = regime_el(env, mmu_idx);
558
+ uint64_t descaddrmask;
559
+ bool aarch64 = arm_el_is_aa64(env, el);
560
+ bool guarded = false;
561
+
562
+ /* TODO: This code does not support shareability levels. */
563
+ if (aarch64) {
564
+ int ps;
565
+
566
+ param = aa64_va_parameters(env, address, mmu_idx,
567
+ access_type != MMU_INST_FETCH);
568
+ level = 0;
569
+
570
+ /*
571
+ * If TxSZ is programmed to a value larger than the maximum,
572
+ * or smaller than the effective minimum, it is IMPLEMENTATION
573
+ * DEFINED whether we behave as if the field were programmed
574
+ * within bounds, or if a level 0 Translation fault is generated.
575
+ *
576
+ * With FEAT_LVA, fault on less than minimum becomes required,
577
+ * so our choice is to always raise the fault.
578
+ */
579
+ if (param.tsz_oob) {
580
+ fault_type = ARMFault_Translation;
581
+ goto do_fault;
582
+ }
167
+ }
583
+
168
+
584
+ addrsize = 64 - 8 * param.tbi;
169
frac_mulw(&tmp, a, b);
585
+ inputsize = 64 - param.tsz;
170
frac_truncjam(a, &tmp);
586
+
171
587
+ /*
172
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
588
+ * Bound PS by PARANGE to find the effective output address size.
173
}
589
+ * ID_AA64MMFR0 is a read-only register so values outside of the
174
590
+ * supported mappings can be considered an implementation error.
175
/* Multiply by 0 or Inf */
591
+ */
176
+ if (ab_mask & float_cmask_denormal) {
592
+ ps = FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE);
177
+ float_raise(float_flag_input_denormal_used, s);
593
+ ps = MIN(ps, param.ps);
178
+ }
594
+ assert(ps < ARRAY_SIZE(pamax_map));
179
+
595
+ outputsize = pamax_map[ps];
180
if (ab_mask & float_cmask_inf) {
596
+ } else {
181
a->cls = float_class_inf;
597
+ param = aa32_va_parameters(env, address, mmu_idx);
182
a->sign = sign;
598
+ level = 1;
183
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
599
+ addrsize = (mmu_idx == ARMMMUIdx_Stage2 ? 40 : 32);
184
if (flags & float_muladd_negate_result) {
600
+ inputsize = addrsize - param.tsz;
185
a->sign ^= 1;
601
+ outputsize = 40;
186
}
602
+ }
603
+
187
+
604
+ /*
188
+ /*
605
+ * We determined the region when collecting the parameters, but we
189
+ * All result types except for "return the default NaN
606
+ * have not yet validated that the address is valid for the region.
190
+ * because this is an Invalid Operation" go through here;
607
+ * Extract the top bits and verify that they all match select.
191
+ * this matches the set of cases where we consumed a
608
+ *
192
+ * denormal input.
609
+ * For aa32, if inputsize == addrsize, then we have selected the
610
+ * region by exclusion in aa32_va_parameters and there is no more
611
+ * validation to do here.
612
+ */
193
+ */
613
+ if (inputsize < addrsize) {
194
+ if (abc_mask & float_cmask_denormal) {
614
+ target_ulong top_bits = sextract64(address, inputsize,
195
+ float_raise(float_flag_input_denormal_used, s);
615
+ addrsize - inputsize);
196
+ }
616
+ if (-top_bits != param.select) {
197
return a;
617
+ /* The gap between the two regions is a Translation fault */
198
618
+ fault_type = ARMFault_Translation;
199
return_sub_zero:
619
+ goto do_fault;
200
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
201
bool sign = a->sign ^ b->sign;
202
203
if (likely(cmask_is_only_normals(ab_mask))) {
204
+ if (ab_mask & float_cmask_denormal) {
205
+ float_raise(float_flag_input_denormal_used, s);
620
+ }
206
+ }
621
+ }
207
a->sign = sign;
622
+
208
a->exp -= b->exp + frac_div(a, b);
623
+ if (param.using64k) {
209
return a;
624
+ stride = 13;
210
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
625
+ } else if (param.using16k) {
211
return parts_pick_nan(a, b, s);
626
+ stride = 11;
212
}
627
+ } else {
213
628
+ stride = 9;
214
+ if ((ab_mask & float_cmask_denormal) && b->cls != float_class_zero) {
629
+ }
215
+ float_raise(float_flag_input_denormal_used, s);
630
+
216
+ }
631
+ /*
217
+
632
+ * Note that QEMU ignores shareability and cacheability attributes,
218
a->sign = sign;
633
+ * so we don't need to do anything with the SH, ORGN, IRGN fields
219
634
+ * in the TTBCR. Similarly, TTBCR:A1 selects whether we get the
220
/* Inf / X */
635
+ * ASID from TTBR0 or TTBR1, but QEMU's TLB doesn't currently
221
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
636
+ * implement any ASID-like capability so we can ignore it (instead
222
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
637
+ * we will always flush the TLB any time the ASID is changed).
223
638
+ */
224
if (likely(cmask_is_only_normals(ab_mask))) {
639
+ ttbr = regime_ttbr(env, mmu_idx, param.select);
225
+ if (ab_mask & float_cmask_denormal) {
640
+
226
+ float_raise(float_flag_input_denormal_used, s);
641
+ /*
227
+ }
642
+ * Here we should have set up all the parameters for the translation:
228
frac_modrem(a, b, mod_quot);
643
+ * inputsize, ttbr, epd, stride, tbi
229
return a;
644
+ */
230
}
645
+
231
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
646
+ if (param.epd) {
232
return a;
647
+ /*
233
}
648
+ * Translation table walk disabled => Translation fault on TLB miss
234
649
+ * Note: This is always 0 on 64-bit EL2 and EL3.
235
+ if (ab_mask & float_cmask_denormal) {
650
+ */
236
+ float_raise(float_flag_input_denormal_used, s);
651
+ goto do_fault;
237
+ }
652
+ }
238
+
653
+
239
/* N % Inf; 0 % N */
654
+ if (mmu_idx != ARMMMUIdx_Stage2 && mmu_idx != ARMMMUIdx_Stage2_S) {
240
g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
655
+ /*
241
return a;
656
+ * The starting level depends on the virtual address size (which can
242
@@ -XXX,XX +XXX,XX @@ static void partsN(sqrt)(FloatPartsN *a, float_status *status,
657
+ * be up to 48 bits) and the translation granule size. It indicates
243
if (unlikely(a->cls != float_class_normal)) {
658
+ * the number of strides (stride bits at a time) needed to
244
switch (a->cls) {
659
+ * consume the bits of the input address. In the pseudocode this is:
245
case float_class_denormal:
660
+ * level = 4 - RoundUp((inputsize - grainsize) / stride)
246
+ if (!a->sign) {
661
+ * where their 'inputsize' is our 'inputsize', 'grainsize' is
247
+ /* -ve denormal will be InvalidOperation */
662
+ * our 'stride + 3' and 'stride' is our 'stride'.
248
+ float_raise(float_flag_input_denormal_used, status);
663
+ * Applying the usual "rounded up m/n is (m+n-1)/n" and simplifying:
664
+ * = 4 - (inputsize - stride - 3 + stride - 1) / stride
665
+ * = 4 - (inputsize - 4) / stride;
666
+ */
667
+ level = 4 - (inputsize - 4) / stride;
668
+ } else {
669
+ /*
670
+ * For stage 2 translations the starting level is specified by the
671
+ * VTCR_EL2.SL0 field (whose interpretation depends on the page size)
672
+ */
673
+ uint32_t sl0 = extract32(tcr->raw_tcr, 6, 2);
674
+ uint32_t sl2 = extract64(tcr->raw_tcr, 33, 1);
675
+ uint32_t startlevel;
676
+ bool ok;
677
+
678
+ /* SL2 is RES0 unless DS=1 & 4kb granule. */
679
+ if (param.ds && stride == 9 && sl2) {
680
+ if (sl0 != 0) {
681
+ level = 0;
682
+ fault_type = ARMFault_Translation;
683
+ goto do_fault;
684
+ }
249
+ }
685
+ startlevel = -1;
250
break;
686
+ } else if (!aarch64 || stride == 9) {
251
case float_class_snan:
687
+ /* AArch32 or 4KB pages */
252
case float_class_qnan:
688
+ startlevel = 2 - sl0;
253
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
689
+
254
if ((flags & (minmax_isnum | minmax_isnumber))
690
+ if (cpu_isar_feature(aa64_st, cpu)) {
255
&& !(ab_mask & float_cmask_snan)
691
+ startlevel &= 3;
256
&& (ab_mask & ~float_cmask_qnan)) {
257
+ if (ab_mask & float_cmask_denormal) {
258
+ float_raise(float_flag_input_denormal_used, s);
692
+ }
259
+ }
693
+ } else {
260
return is_nan(a->cls) ? b : a;
694
+ /* 16KB or 64KB pages */
261
}
695
+ startlevel = 3 - sl0;
262
263
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
264
return parts_pick_nan(a, b, s);
265
}
266
267
+ if (ab_mask & float_cmask_denormal) {
268
+ float_raise(float_flag_input_denormal_used, s);
269
+ }
270
+
271
a_exp = a->exp;
272
b_exp = b->exp;
273
274
@@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
275
if (likely(cmask_is_only_normals(ab_mask))) {
276
FloatRelation cmp;
277
278
+ if (ab_mask & float_cmask_denormal) {
279
+ float_raise(float_flag_input_denormal_used, s);
696
+ }
280
+ }
697
+
281
+
698
+ /* Check that the starting level is valid. */
282
if (a->sign != b->sign) {
699
+ ok = check_s2_mmu_setup(cpu, aarch64, startlevel,
283
goto a_sign;
700
+ inputsize, stride, outputsize);
284
}
701
+ if (!ok) {
285
@@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
702
+ fault_type = ARMFault_Translation;
286
return float_relation_unordered;
703
+ goto do_fault;
287
}
704
+ }
288
705
+ level = startlevel;
289
+ if (ab_mask & float_cmask_denormal) {
706
+ }
290
+ float_raise(float_flag_input_denormal_used, s);
707
+
291
+ }
708
+ indexmask_grainsize = MAKE_64BIT_MASK(0, stride + 3);
292
+
709
+ indexmask = MAKE_64BIT_MASK(0, inputsize - (stride * (4 - level)));
293
if (ab_mask & float_cmask_zero) {
710
+
294
if (ab_mask == float_cmask_zero) {
711
+ /* Now we can extract the actual base address from the TTBR */
295
return float_relation_equal;
712
+ descaddr = extract64(ttbr, 0, 48);
296
@@ -XXX,XX +XXX,XX @@ static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
713
+
297
case float_class_zero:
714
+ /*
298
case float_class_inf:
715
+ * For FEAT_LPA and PS=6, bits [51:48] of descaddr are in [5:2] of TTBR.
299
break;
716
+ *
300
- case float_class_normal:
717
+ * Otherwise, if the base address is out of range, raise AddressSizeFault.
301
case float_class_denormal:
718
+ * In the pseudocode, this is !IsZero(baseregister<47:outputsize>),
302
+ float_raise(float_flag_input_denormal_used, s);
719
+ * but we've just cleared the bits above 47, so simplify the test.
303
+ /* fall through */
720
+ */
304
+ case float_class_normal:
721
+ if (outputsize > 48) {
305
a->exp += MIN(MAX(n, -0x10000), 0x10000);
722
+ descaddr |= extract64(ttbr, 2, 4) << 48;
306
break;
723
+ } else if (descaddr >> outputsize) {
307
default:
724
+ level = 0;
308
@@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
725
+ fault_type = ARMFault_AddressSize;
309
if (unlikely(a->cls != float_class_normal)) {
726
+ goto do_fault;
310
switch (a->cls) {
727
+ }
311
case float_class_denormal:
728
+
312
+ if (!a->sign) {
729
+ /*
313
+ /* -ve denormal will be InvalidOperation */
730
+ * We rely on this masking to clear the RES0 bits at the bottom of the TTBR
314
+ float_raise(float_flag_input_denormal_used, s);
731
+ * and also to mask out CnP (bit 0) which could validly be non-zero.
732
+ */
733
+ descaddr &= ~indexmask;
734
+
735
+ /*
736
+ * For AArch32, the address field in the descriptor goes up to bit 39
737
+ * for both v7 and v8. However, for v8 the SBZ bits [47:40] must be 0
738
+ * or an AddressSize fault is raised. So for v8 we extract those SBZ
739
+ * bits as part of the address, which will be checked via outputsize.
740
+ * For AArch64, the address field goes up to bit 47, or 49 with FEAT_LPA2;
741
+ * the highest bits of a 52-bit output are placed elsewhere.
742
+ */
743
+ if (param.ds) {
744
+ descaddrmask = MAKE_64BIT_MASK(0, 50);
745
+ } else if (arm_feature(env, ARM_FEATURE_V8)) {
746
+ descaddrmask = MAKE_64BIT_MASK(0, 48);
747
+ } else {
748
+ descaddrmask = MAKE_64BIT_MASK(0, 40);
749
+ }
750
+ descaddrmask &= ~indexmask_grainsize;
751
+
752
+ /*
753
+ * Secure accesses start with the page table in secure memory and
754
+ * can be downgraded to non-secure at any step. Non-secure accesses
755
+ * remain non-secure. We implement this by just ORing in the NSTable/NS
756
+ * bits at each step.
757
+ */
758
+ tableattrs = regime_is_secure(env, mmu_idx) ? 0 : (1 << 4);
759
+ for (;;) {
760
+ uint64_t descriptor;
761
+ bool nstable;
762
+
763
+ descaddr |= (address >> (stride * (4 - level))) & indexmask;
764
+ descaddr &= ~7ULL;
765
+ nstable = extract32(tableattrs, 4, 1);
766
+ descriptor = arm_ldq_ptw(cs, descaddr, !nstable, mmu_idx, fi);
767
+ if (fi->type != ARMFault_None) {
768
+ goto do_fault;
769
+ }
770
+
771
+ if (!(descriptor & 1) ||
772
+ (!(descriptor & 2) && (level == 3))) {
773
+ /* Invalid, or the Reserved level 3 encoding */
774
+ goto do_fault;
775
+ }
776
+
777
+ descaddr = descriptor & descaddrmask;
778
+
779
+ /*
780
+ * For FEAT_LPA and PS=6, bits [51:48] of descaddr are in [15:12]
781
+ * of descriptor. For FEAT_LPA2 and effective DS, bits [51:50] of
782
+ * descaddr are in [9:8]. Otherwise, if descaddr is out of range,
783
+ * raise AddressSizeFault.
784
+ */
785
+ if (outputsize > 48) {
786
+ if (param.ds) {
787
+ descaddr |= extract64(descriptor, 8, 2) << 50;
788
+ } else {
789
+ descaddr |= extract64(descriptor, 12, 4) << 48;
790
+ }
315
+ }
791
+ } else if (descaddr >> outputsize) {
316
break;
792
+ fault_type = ARMFault_AddressSize;
317
case float_class_snan:
793
+ goto do_fault;
318
case float_class_qnan:
794
+ }
795
+
796
+ if ((descriptor & 2) && (level < 3)) {
797
+ /*
798
+ * Table entry. The top five bits are attributes which may
799
+ * propagate down through lower levels of the table (and
800
+ * which are all arranged so that 0 means "no effect", so
801
+ * we can gather them up by ORing in the bits at each level).
802
+ */
803
+ tableattrs |= extract64(descriptor, 59, 5);
804
+ level++;
805
+ indexmask = indexmask_grainsize;
806
+ continue;
807
+ }
808
+ /*
809
+ * Block entry at level 1 or 2, or page entry at level 3.
810
+ * These are basically the same thing, although the number
811
+ * of bits we pull in from the vaddr varies. Note that although
812
+ * descaddrmask masks enough of the low bits of the descriptor
813
+ * to give a correct page or table address, the address field
814
+ * in a block descriptor is smaller; so we need to explicitly
815
+ * clear the lower bits here before ORing in the low vaddr bits.
816
+ */
817
+ page_size = (1ULL << ((stride * (4 - level)) + 3));
818
+ descaddr &= ~(page_size - 1);
819
+ descaddr |= (address & (page_size - 1));
820
+ /* Extract attributes from the descriptor */
821
+ attrs = extract64(descriptor, 2, 10)
822
+ | (extract64(descriptor, 52, 12) << 10);
823
+
824
+ if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) {
825
+ /* Stage 2 table descriptors do not include any attribute fields */
826
+ break;
827
+ }
828
+ /* Merge in attributes from table descriptors */
829
+ attrs |= nstable << 3; /* NS */
830
+ guarded = extract64(descriptor, 50, 1); /* GP */
831
+ if (param.hpd) {
832
+ /* HPD disables all the table attributes except NSTable. */
833
+ break;
834
+ }
835
+ attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */
836
+ /*
837
+ * The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1
838
+ * means "force PL1 access only", which means forcing AP[1] to 0.
839
+ */
840
+ attrs &= ~(extract32(tableattrs, 2, 1) << 4); /* !APT[0] => AP[1] */
841
+ attrs |= extract32(tableattrs, 3, 1) << 5; /* APT[1] => AP[2] */
842
+ break;
843
+ }
844
+ /*
845
+ * Here descaddr is the final physical address, and attributes
846
+ * are all in attrs.
847
+ */
848
+ fault_type = ARMFault_AccessFlag;
849
+ if ((attrs & (1 << 8)) == 0) {
850
+ /* Access flag */
851
+ goto do_fault;
852
+ }
853
+
854
+ ap = extract32(attrs, 4, 2);
855
+
856
+ if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) {
857
+ ns = mmu_idx == ARMMMUIdx_Stage2;
858
+ xn = extract32(attrs, 11, 2);
859
+ *prot = get_S2prot(env, ap, xn, s1_is_el0);
860
+ } else {
861
+ ns = extract32(attrs, 3, 1);
862
+ xn = extract32(attrs, 12, 1);
863
+ pxn = extract32(attrs, 11, 1);
864
+ *prot = get_S1prot(env, mmu_idx, aarch64, ap, ns, xn, pxn);
865
+ }
866
+
867
+ fault_type = ARMFault_Permission;
868
+ if (!(*prot & (1 << access_type))) {
869
+ goto do_fault;
870
+ }
871
+
872
+ if (ns) {
873
+ /*
874
+ * The NS bit will (as required by the architecture) have no effect if
875
+ * the CPU doesn't support TZ or this is a non-secure translation
876
+ * regime, because the attribute will already be non-secure.
877
+ */
878
+ txattrs->secure = false;
879
+ }
880
+ /* When in aarch64 mode, and BTI is enabled, remember GP in the IOTLB. */
881
+ if (aarch64 && guarded && cpu_isar_feature(aa64_bti, cpu)) {
882
+ arm_tlb_bti_gp(txattrs) = true;
883
+ }
884
+
885
+ if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) {
886
+ cacheattrs->is_s2_format = true;
887
+ cacheattrs->attrs = extract32(attrs, 0, 4);
888
+ } else {
889
+ /* Index into MAIR registers for cache attributes */
890
+ uint8_t attrindx = extract32(attrs, 0, 3);
891
+ uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)];
892
+ assert(attrindx <= 7);
893
+ cacheattrs->is_s2_format = false;
894
+ cacheattrs->attrs = extract64(mair, attrindx * 8, 8);
895
+ }
896
+
897
+ /*
898
+ * For FEAT_LPA2 and effective DS, the SH field in the attributes
899
+ * was re-purposed for output address bits. The SH attribute in
900
+ * that case comes from TCR_ELx, which we extracted earlier.
901
+ */
902
+ if (param.ds) {
903
+ cacheattrs->shareability = param.sh;
904
+ } else {
905
+ cacheattrs->shareability = extract32(attrs, 6, 2);
906
+ }
907
+
908
+ *phys_ptr = descaddr;
909
+ *page_size_ptr = page_size;
910
+ return false;
911
+
912
+do_fault:
913
+ fi->type = fault_type;
914
+ fi->level = level;
915
+ /* Tag the error as S2 for failed S1 PTW at S2 or ordinary S2. */
916
+ fi->stage2 = fi->s1ptw || (mmu_idx == ARMMMUIdx_Stage2 ||
917
+ mmu_idx == ARMMMUIdx_Stage2_S);
918
+ fi->s1ns = mmu_idx == ARMMMUIdx_Stage2;
919
+ return true;
920
+}
921
+
922
static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address,
923
MMUAccessType access_type, ARMMMUIdx mmu_idx,
924
hwaddr *phys_ptr, int *prot,
925
--
319
--
926
2.25.1
320
2.34.1
diff view generated by jsdifflib
1
We have about 30 instances of the typo/variant spelling 'writeable',
1
Currently we handle flushing of output denormals in uncanon_normal
2
and over 500 of the more common 'writable'. Standardize on the
2
always before we deal with rounding. This works for architectures
3
latter.
3
that detect tininess before rounding, but is usually not the right
4
4
place when the architecture detects tininess after rounding. For
5
Change produced with:
5
example, for x86 the SDM states that the MXCSR FTZ control bit causes
6
6
outputs to be flushed to zero "when it detects a floating-point
7
sed -i -e 's/\([Ww][Rr][Ii][Tt]\)[Ee]\([Aa][Bb][Ll][Ee]\)/\1\2/g' $(git grep -il writeable)
7
underflow condition". This means that we mustn't flush to zero if
8
8
the input is such that after rounding it is no longer tiny.
9
and then hand-undoing the instance in linux-headers/linux/kvm.h.
9
10
10
At least one of our guest architectures does underflow detection
11
Most of these changes are in comments or documentation; the
11
after rounding but flushing of denormals before rounding (MIPS MSA);
12
exceptions are:
12
this means we need to have a config knob for this that is separate
13
* a local variable in accel/hvf/hvf-accel-ops.c
13
from our existing tininess_before_rounding setting.
14
* a local variable in accel/kvm/kvm-all.c
14
15
* the PMCR_WRITABLE_MASK macro in target/arm/internals.h
15
Add an ftz_detection flag. For consistency with
16
* the EPT_VIOLATION_GPA_WRITABLE macro in target/i386/hvf/vmcs.h
16
tininess_before_rounding, we make it default to "detect ftz after
17
(which is never used anywhere)
17
rounding"; this means that we need to explicitly set the flag to
18
* the AR_TYPE_WRITABLE_MASK macro in target/i386/hvf/vmx.h
18
"detect ftz before rounding" on every existing architecture that sets
19
(which is never used anywhere)
19
flush_to_zero, so that this commit has no behaviour change.
20
(This means more code change here but for the long term a less
21
confusing API.)
22
23
For several architectures the current behaviour is either
24
definitely or possibly wrong; annotate those with TODO comments.
25
These architectures are definitely wrong (and should detect
26
ftz after rounding):
27
* x86
28
* Alpha
29
30
For these architectures the spec is unclear:
31
* MIPS (for non-MSA)
32
* RX
33
* SH4
34
35
PA-RISC makes ftz detection IMPDEF, but we aren't setting the
36
"tininess before rounding" setting that we ought to.
20
37
21
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
38
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
22
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
39
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
23
Reviewed-by: Stefan Weil <sw@weilnetz.de>
24
Message-id: 20220505095015.2714666-1-peter.maydell@linaro.org
25
---
40
---
26
docs/interop/vhost-user.rst | 2 +-
41
include/fpu/softfloat-helpers.h | 11 +++++++++++
27
docs/specs/vmgenid.txt | 4 ++--
42
include/fpu/softfloat-types.h | 18 ++++++++++++++++++
28
hw/scsi/mfi.h | 2 +-
43
target/mips/fpu_helper.h | 6 ++++++
29
target/arm/internals.h | 4 ++--
44
target/alpha/cpu.c | 7 +++++++
30
target/i386/hvf/vmcs.h | 2 +-
45
target/arm/cpu.c | 1 +
31
target/i386/hvf/vmx.h | 2 +-
46
target/hppa/fpu_helper.c | 11 +++++++++++
32
accel/hvf/hvf-accel-ops.c | 4 ++--
47
target/i386/tcg/fpu_helper.c | 8 ++++++++
33
accel/kvm/kvm-all.c | 4 ++--
48
target/mips/msa.c | 9 +++++++++
34
accel/tcg/user-exec.c | 6 +++---
49
target/ppc/cpu_init.c | 3 +++
35
hw/acpi/ghes.c | 2 +-
50
target/rx/cpu.c | 8 ++++++++
36
hw/intc/arm_gicv3_cpuif.c | 2 +-
51
target/sh4/cpu.c | 8 ++++++++
37
hw/intc/arm_gicv3_dist.c | 2 +-
52
target/tricore/helper.c | 1 +
38
hw/intc/arm_gicv3_redist.c | 4 ++--
53
tests/fp/fp-bench.c | 1 +
39
hw/intc/riscv_aclint.c | 2 +-
54
fpu/softfloat-parts.c.inc | 21 +++++++++++++++------
40
hw/intc/riscv_aplic.c | 2 +-
55
14 files changed, 107 insertions(+), 6 deletions(-)
41
hw/pci/shpc.c | 2 +-
56
42
hw/sparc64/sun4u_iommu.c | 2 +-
57
diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h
43
hw/timer/sse-timer.c | 2 +-
58
index XXXXXXX..XXXXXXX 100644
44
target/arm/gdbstub.c | 2 +-
59
--- a/include/fpu/softfloat-helpers.h
45
target/arm/helper.c | 4 ++--
60
+++ b/include/fpu/softfloat-helpers.h
46
target/arm/hvf/hvf.c | 4 ++--
61
@@ -XXX,XX +XXX,XX @@ static inline void set_flush_inputs_to_zero(bool val, float_status *status)
47
target/i386/cpu-sysemu.c | 2 +-
62
status->flush_inputs_to_zero = val;
48
target/s390x/ioinst.c | 2 +-
63
}
49
python/qemu/machine/machine.py | 2 +-
64
50
tests/tcg/x86_64/system/boot.S | 2 +-
65
+static inline void set_float_ftz_detection(FloatFTZDetection d,
51
25 files changed, 34 insertions(+), 34 deletions(-)
66
+ float_status *status)
52
67
+{
53
diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
68
+ status->ftz_detection = d;
54
index XXXXXXX..XXXXXXX 100644
69
+}
55
--- a/docs/interop/vhost-user.rst
70
+
56
+++ b/docs/interop/vhost-user.rst
71
static inline void set_default_nan_mode(bool val, float_status *status)
57
@@ -XXX,XX +XXX,XX @@ Virtio device config space
72
{
58
:size: a 32-bit configuration space access size in bytes
73
status->default_nan_mode = val;
59
74
@@ -XXX,XX +XXX,XX @@ static inline bool get_default_nan_mode(const float_status *status)
60
:flags: a 32-bit value:
75
return status->default_nan_mode;
61
- - 0: Vhost front-end messages used for writeable fields
76
}
62
+ - 0: Vhost front-end messages used for writable fields
77
63
- 1: Vhost front-end messages used for live migration
78
+static inline FloatFTZDetection get_float_ftz_detection(const float_status *status)
64
79
+{
65
:payload: Size bytes array holding the contents of the virtio
80
+ return status->ftz_detection;
66
diff --git a/docs/specs/vmgenid.txt b/docs/specs/vmgenid.txt
81
+}
67
index XXXXXXX..XXXXXXX 100644
82
+
68
--- a/docs/specs/vmgenid.txt
83
#endif /* SOFTFLOAT_HELPERS_H */
69
+++ b/docs/specs/vmgenid.txt
84
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
70
@@ -XXX,XX +XXX,XX @@ change the contents of the memory at runtime, specifically when starting a
85
index XXXXXXX..XXXXXXX 100644
71
backed-up or snapshotted image. In order to do this, QEMU must know the
86
--- a/include/fpu/softfloat-types.h
72
address that has been allocated.
87
+++ b/include/fpu/softfloat-types.h
73
88
@@ -XXX,XX +XXX,XX @@ typedef enum __attribute__((__packed__)) {
74
-The mechanism chosen for this memory sharing is writeable fw_cfg blobs.
89
float_infzeronan_suppress_invalid = (1 << 7),
75
+The mechanism chosen for this memory sharing is writable fw_cfg blobs.
90
} FloatInfZeroNaNRule;
76
These are data object that are visible to both QEMU and guests, and are
91
77
addressable as sequential files.
92
+/*
78
93
+ * When flush_to_zero is set, should we detect denormal results to
79
@@ -XXX,XX +XXX,XX @@ Two fw_cfg blobs are used in this case:
94
+ * be flushed before or after rounding? For most architectures this
80
/etc/vmgenid_guid - contains the actual VM Generation ID GUID
95
+ * should be set to match the tininess_before_rounding setting,
81
- read-only to the guest
96
+ * but a few architectures, e.g. MIPS MSA, detect FTZ before
82
/etc/vmgenid_addr - contains the address of the downloaded vmgenid blob
97
+ * rounding but tininess after rounding.
83
- - writeable by the guest
98
+ *
84
+ - writable by the guest
99
+ * This enum is arranged so that the default if the target doesn't
85
100
+ * configure it matches the default for tininess_before_rounding
86
101
+ * (i.e. "after rounding").
87
QEMU sends the following commands to the guest at startup:
102
+ */
88
diff --git a/hw/scsi/mfi.h b/hw/scsi/mfi.h
103
+typedef enum __attribute__((__packed__)) {
89
index XXXXXXX..XXXXXXX 100644
104
+ float_ftz_after_rounding = 0,
90
--- a/hw/scsi/mfi.h
105
+ float_ftz_before_rounding = 1,
91
+++ b/hw/scsi/mfi.h
106
+} FloatFTZDetection;
92
@@ -XXX,XX +XXX,XX @@ struct mfi_ctrl_props {
107
+
93
* metadata and user data
94
* 1=5%, 2=10%, 3=15% and so on
95
*/
96
- uint8_t viewSpace; /* snapshot writeable VIEWs
97
+ uint8_t viewSpace; /* snapshot writable VIEWs
98
* capacity as a % of source LD
99
* capacity. 0=READ only
100
* 1=5%, 2=10%, 3=15% and so on
101
diff --git a/target/arm/internals.h b/target/arm/internals.h
102
index XXXXXXX..XXXXXXX 100644
103
--- a/target/arm/internals.h
104
+++ b/target/arm/internals.h
105
@@ -XXX,XX +XXX,XX @@ enum MVEECIState {
106
#define PMCRP 0x2
107
#define PMCRE 0x1
108
/*
108
/*
109
- * Mask of PMCR bits writeable by guest (not including WO bits like C, P,
109
* Floating Point Status. Individual architectures may maintain
110
+ * Mask of PMCR bits writable by guest (not including WO bits like C, P,
110
* several versions of float_status for different functions. The
111
* which can be written as 1 to trigger behaviour but which stay RAZ).
111
@@ -XXX,XX +XXX,XX @@ typedef struct float_status {
112
*/
112
bool tininess_before_rounding;
113
-#define PMCR_WRITEABLE_MASK (PMCRLC | PMCRDP | PMCRX | PMCRD | PMCRE)
113
/* should denormalised results go to zero and set output_denormal_flushed? */
114
+#define PMCR_WRITABLE_MASK (PMCRLC | PMCRDP | PMCRX | PMCRD | PMCRE)
114
bool flush_to_zero;
115
115
+ /* do we detect and flush denormal results before or after rounding? */
116
#define PMXEVTYPER_P 0x80000000
116
+ FloatFTZDetection ftz_detection;
117
#define PMXEVTYPER_U 0x40000000
117
/* should denormalised inputs go to zero and set input_denormal_flushed? */
118
diff --git a/target/i386/hvf/vmcs.h b/target/i386/hvf/vmcs.h
118
bool flush_inputs_to_zero;
119
index XXXXXXX..XXXXXXX 100644
119
bool default_nan_mode;
120
--- a/target/i386/hvf/vmcs.h
120
diff --git a/target/mips/fpu_helper.h b/target/mips/fpu_helper.h
121
+++ b/target/i386/hvf/vmcs.h
121
index XXXXXXX..XXXXXXX 100644
122
@@ -XXX,XX +XXX,XX @@
122
--- a/target/mips/fpu_helper.h
123
#define EPT_VIOLATION_DATA_WRITE (1UL << 1)
123
+++ b/target/mips/fpu_helper.h
124
#define EPT_VIOLATION_INST_FETCH (1UL << 2)
124
@@ -XXX,XX +XXX,XX @@ static inline void fp_reset(CPUMIPSState *env)
125
#define EPT_VIOLATION_GPA_READABLE (1UL << 3)
125
*/
126
-#define EPT_VIOLATION_GPA_WRITEABLE (1UL << 4)
126
set_float_2nan_prop_rule(float_2nan_prop_s_ab,
127
+#define EPT_VIOLATION_GPA_WRITABLE (1UL << 4)
127
&env->active_fpu.fp_status);
128
#define EPT_VIOLATION_GPA_EXECUTABLE (1UL << 5)
128
+ /*
129
#define EPT_VIOLATION_GLA_VALID (1UL << 7)
129
+ * TODO: the spec does't say clearly whether FTZ happens before
130
#define EPT_VIOLATION_XLAT_VALID (1UL << 8)
130
+ * or after rounding for normal FPU operations.
131
diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h
131
+ */
132
index XXXXXXX..XXXXXXX 100644
132
+ set_float_ftz_detection(float_ftz_before_rounding,
133
--- a/target/i386/hvf/vmx.h
133
+ &env->active_fpu.fp_status);
134
+++ b/target/i386/hvf/vmx.h
134
}
135
@@ -XXX,XX +XXX,XX @@ static inline uint64_t cap2ctrl(uint64_t cap, uint64_t ctrl)
135
136
136
/* MSA */
137
#define AR_TYPE_ACCESSES_MASK 1
137
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
138
#define AR_TYPE_READABLE_MASK (1 << 1)
138
index XXXXXXX..XXXXXXX 100644
139
-#define AR_TYPE_WRITEABLE_MASK (1 << 2)
139
--- a/target/alpha/cpu.c
140
+#define AR_TYPE_WRITABLE_MASK (1 << 2)
140
+++ b/target/alpha/cpu.c
141
#define AR_TYPE_CODE_MASK (1 << 3)
141
@@ -XXX,XX +XXX,XX @@ static void alpha_cpu_initfn(Object *obj)
142
#define AR_TYPE_MASK 0x0f
142
set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status);
143
#define AR_TYPE_BUSY_64_TSS 11
143
/* Default NaN: sign bit clear, msb frac bit set */
144
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
144
set_float_default_nan_pattern(0b01000000, &env->fp_status);
145
index XXXXXXX..XXXXXXX 100644
145
+ /*
146
--- a/accel/hvf/hvf-accel-ops.c
146
+ * TODO: this is incorrect. The Alpha Architecture Handbook version 4
147
+++ b/accel/hvf/hvf-accel-ops.c
147
+ * section 4.7.7.11 says that we flush to zero for underflow cases, so
148
@@ -XXX,XX +XXX,XX @@ static void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
148
+ * this should be float_ftz_after_rounding to match the
149
+ * tininess_after_rounding (which is specified in section 4.7.5).
150
+ */
151
+ set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
152
#if defined(CONFIG_USER_ONLY)
153
env->flags = ENV_FLAG_PS_USER | ENV_FLAG_FEN;
154
cpu_alpha_store_fpcr(env, (uint64_t)(FPCR_INVD | FPCR_DZED | FPCR_OVFD
155
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
156
index XXXXXXX..XXXXXXX 100644
157
--- a/target/arm/cpu.c
158
+++ b/target/arm/cpu.c
159
@@ -XXX,XX +XXX,XX @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
160
static void arm_set_default_fp_behaviours(float_status *s)
149
{
161
{
150
hvf_slot *mem;
162
set_float_detect_tininess(float_tininess_before_rounding, s);
151
MemoryRegion *area = section->mr;
163
+ set_float_ftz_detection(float_ftz_before_rounding, s);
152
- bool writeable = !area->readonly && !area->rom_device;
164
set_float_2nan_prop_rule(float_2nan_prop_s_ab, s);
153
+ bool writable = !area->readonly && !area->rom_device;
165
set_float_3nan_prop_rule(float_3nan_prop_s_cab, s);
154
hv_memory_flags_t flags;
166
set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s);
155
uint64_t page_size = qemu_real_host_page_size();
167
diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c
156
168
index XXXXXXX..XXXXXXX 100644
157
if (!memory_region_is_ram(area)) {
169
--- a/target/hppa/fpu_helper.c
158
- if (writeable) {
170
+++ b/target/hppa/fpu_helper.c
159
+ if (writable) {
171
@@ -XXX,XX +XXX,XX @@ void HELPER(loaded_fr0)(CPUHPPAState *env)
160
return;
172
set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status);
161
} else if (!memory_region_is_romd(area)) {
173
/* Default NaN: sign bit clear, msb-1 frac bit set */
162
/*
174
set_float_default_nan_pattern(0b00100000, &env->fp_status);
163
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
175
+ /*
164
index XXXXXXX..XXXXXXX 100644
176
+ * "PA-RISC 2.0 Architecture" says it is IMPDEF whether the flushing
165
--- a/accel/kvm/kvm-all.c
177
+ * enabled by FPSR.D happens before or after rounding. We pick "before"
166
+++ b/accel/kvm/kvm-all.c
178
+ * for consistency with tininess detection.
167
@@ -XXX,XX +XXX,XX @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
179
+ */
168
KVMSlot *mem;
180
+ set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
169
int err;
181
+ /*
170
MemoryRegion *mr = section->mr;
182
+ * TODO: "PA-RISC 2.0 Architecture" chapter 10 says that we should
171
- bool writeable = !mr->readonly && !mr->rom_device;
183
+ * detect tininess before rounding, but we don't set that here so we
172
+ bool writable = !mr->readonly && !mr->rom_device;
184
+ * get the default tininess after rounding.
173
hwaddr start_addr, size, slot_size, mr_offset;
185
+ */
174
ram_addr_t ram_start_offset;
186
}
175
void *ram;
187
176
188
void cpu_hppa_loaded_fr0(CPUHPPAState *env)
177
if (!memory_region_is_ram(mr)) {
189
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
178
- if (writeable || !kvm_readonly_mem_allowed) {
190
index XXXXXXX..XXXXXXX 100644
179
+ if (writable || !kvm_readonly_mem_allowed) {
191
--- a/target/i386/tcg/fpu_helper.c
180
return;
192
+++ b/target/i386/tcg/fpu_helper.c
181
} else if (!mr->romd_mode) {
193
@@ -XXX,XX +XXX,XX @@ void cpu_init_fp_statuses(CPUX86State *env)
182
/* If the memory device is not in romd_mode, then we actually want
194
set_float_default_nan_pattern(0b11000000, &env->fp_status);
183
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
195
set_float_default_nan_pattern(0b11000000, &env->mmx_status);
184
index XXXXXXX..XXXXXXX 100644
196
set_float_default_nan_pattern(0b11000000, &env->sse_status);
185
--- a/accel/tcg/user-exec.c
197
+ /*
186
+++ b/accel/tcg/user-exec.c
198
+ * TODO: x86 does flush-to-zero detection after rounding (the SDM
187
@@ -XXX,XX +XXX,XX @@ MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write)
199
+ * section 10.2.3.3 on the FTZ bit of MXCSR says that we flush
188
* Return true if the write fault has been handled, and should be re-tried.
200
+ * when we detect underflow, which x86 does after rounding).
189
*
201
+ */
190
* Note that it is important that we don't call page_unprotect() unless
202
+ set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
191
- * this is really a "write to nonwriteable page" fault, because
203
+ set_float_ftz_detection(float_ftz_before_rounding, &env->mmx_status);
192
+ * this is really a "write to nonwritable page" fault, because
204
+ set_float_ftz_detection(float_ftz_before_rounding, &env->sse_status);
193
* page_unprotect() assumes that if it is called for an access to
205
}
194
- * a page that's writeable this means we had two threads racing and
206
195
- * another thread got there first and already made the page writeable;
207
static inline uint8_t save_exception_flags(CPUX86State *env)
196
+ * a page that's writable this means we had two threads racing and
208
diff --git a/target/mips/msa.c b/target/mips/msa.c
197
+ * another thread got there first and already made the page writable;
209
index XXXXXXX..XXXXXXX 100644
198
* so we will retry the access. If we were to call page_unprotect()
210
--- a/target/mips/msa.c
199
* for some other kind of fault that should really be passed to the
211
+++ b/target/mips/msa.c
200
* guest, we'd end up in an infinite loop of retrying the faulting access.
212
@@ -XXX,XX +XXX,XX @@ void msa_reset(CPUMIPSState *env)
201
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
213
/* tininess detected after rounding.*/
202
index XXXXXXX..XXXXXXX 100644
214
set_float_detect_tininess(float_tininess_after_rounding,
203
--- a/hw/acpi/ghes.c
215
&env->active_tc.msa_fp_status);
204
+++ b/hw/acpi/ghes.c
216
+ /*
205
@@ -XXX,XX +XXX,XX @@ void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker)
217
+ * MSACSR.FS detects tiny results to flush to zero before rounding
206
for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
218
+ * (per "MIPS Architecture for Programmers Volume IV-j: The MIPS64 SIMD
207
/*
219
+ * Architecture Module, Revision 1.1" section 3.5.4), even though it
208
* Initialize the value of read_ack_register to 1, so GHES can be
220
+ * detects tininess after rounding for underflow purposes (section 3.4.2
209
- * writeable after (re)boot.
221
+ * table 3.3).
210
+ * writable after (re)boot.
222
+ */
211
* ACPI 6.2: 18.3.2.8 Generic Hardware Error Source version 2
223
+ set_float_ftz_detection(float_ftz_before_rounding,
212
* (GHESv2 - Type 10)
224
+ &env->active_tc.msa_fp_status);
213
*/
214
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
215
index XXXXXXX..XXXXXXX 100644
216
--- a/hw/intc/arm_gicv3_cpuif.c
217
+++ b/hw/intc/arm_gicv3_cpuif.c
218
@@ -XXX,XX +XXX,XX @@ static void icc_ctlr_el3_write(CPUARMState *env, const ARMCPRegInfo *ri,
219
cs->icc_ctlr_el1[GICV3_S] |= ICC_CTLR_EL1_CBPR;
220
}
221
222
- /* The only bit stored in icc_ctlr_el3 which is writeable is EOIMODE_EL3: */
223
+ /* The only bit stored in icc_ctlr_el3 which is writable is EOIMODE_EL3: */
224
mask = ICC_CTLR_EL3_EOIMODE_EL3;
225
226
cs->icc_ctlr_el3 &= ~mask;
227
diff --git a/hw/intc/arm_gicv3_dist.c b/hw/intc/arm_gicv3_dist.c
228
index XXXXXXX..XXXXXXX 100644
229
--- a/hw/intc/arm_gicv3_dist.c
230
+++ b/hw/intc/arm_gicv3_dist.c
231
@@ -XXX,XX +XXX,XX @@ static bool gicd_writel(GICv3State *s, hwaddr offset,
232
if (value & mask & GICD_CTLR_DS) {
233
/* We just set DS, so the ARE_NS and EnG1S bits are now RES0.
234
* Note that this is a one-way transition because if DS is set
235
- * then it's not writeable, so it can only go back to 0 with a
236
+ * then it's not writable, so it can only go back to 0 with a
237
* hardware reset.
238
*/
239
s->gicd_ctlr &= ~(GICD_CTLR_EN_GRP1S | GICD_CTLR_ARE_NS);
240
diff --git a/hw/intc/arm_gicv3_redist.c b/hw/intc/arm_gicv3_redist.c
241
index XXXXXXX..XXXXXXX 100644
242
--- a/hw/intc/arm_gicv3_redist.c
243
+++ b/hw/intc/arm_gicv3_redist.c
244
@@ -XXX,XX +XXX,XX @@ static void gicr_write_vpendbaser(GICv3CPUState *cs, uint64_t newval)
245
225
246
/*
226
/*
247
* The DIRTY bit is read-only and for us is always zero;
227
* According to MIPS specifications, if one of the two operands is
248
- * other fields are writeable.
228
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
249
+ * other fields are writable.
229
index XXXXXXX..XXXXXXX 100644
250
*/
230
--- a/target/ppc/cpu_init.c
251
newval &= R_GICR_VPENDBASER_INNERCACHE_MASK |
231
+++ b/target/ppc/cpu_init.c
252
R_GICR_VPENDBASER_SHAREABILITY_MASK |
232
@@ -XXX,XX +XXX,XX @@ static void ppc_cpu_reset_hold(Object *obj, ResetType type)
253
@@ -XXX,XX +XXX,XX @@ static MemTxResult gicr_writel(GICv3CPUState *cs, hwaddr offset,
233
/* tininess for underflow is detected before rounding */
254
/* RAZ/WI for our implementation */
234
set_float_detect_tininess(float_tininess_before_rounding,
255
return MEMTX_OK;
235
&env->fp_status);
256
case GICR_WAKER:
236
+ /* Similarly for flush-to-zero */
257
- /* Only the ProcessorSleep bit is writeable. When the guest sets
237
+ set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
258
+ /* Only the ProcessorSleep bit is writable. When the guest sets
238
+
259
* it it requests that we transition the channel between the
239
/*
260
* redistributor and the cpu interface to quiescent, and that
240
* PowerPC propagation rules:
261
* we set the ChildrenAsleep bit once the inteface has reached the
241
* 1. A if it sNaN or qNaN
262
diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c
242
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
263
index XXXXXXX..XXXXXXX 100644
243
index XXXXXXX..XXXXXXX 100644
264
--- a/hw/intc/riscv_aclint.c
244
--- a/target/rx/cpu.c
265
+++ b/hw/intc/riscv_aclint.c
245
+++ b/target/rx/cpu.c
266
@@ -XXX,XX +XXX,XX @@ static void riscv_aclint_swi_realize(DeviceState *dev, Error **errp)
246
@@ -XXX,XX +XXX,XX @@ static void rx_cpu_reset_hold(Object *obj, ResetType type)
267
/* Claim software interrupt bits */
247
set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status);
268
for (i = 0; i < swi->num_harts; i++) {
248
/* Default NaN value: sign bit clear, set frac msb */
269
RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(swi->hartid_base + i));
249
set_float_default_nan_pattern(0b01000000, &env->fp_status);
270
- /* We don't claim mip.SSIP because it is writeable by software */
250
+ /*
271
+ /* We don't claim mip.SSIP because it is writable by software */
251
+ * TODO: "RX Family RXv1 Instruction Set Architecture" is not 100% clear
272
if (riscv_cpu_claim_interrupts(cpu, swi->sswi ? 0 : MIP_MSIP) < 0) {
252
+ * on whether flush-to-zero should happen before or after rounding, but
273
error_report("MSIP already claimed");
253
+ * section 1.3.2 says that it happens when underflow is detected, and
274
exit(1);
254
+ * implies that underflow is detected after rounding. So this may not
275
diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c
255
+ * be the correct setting.
276
index XXXXXXX..XXXXXXX 100644
256
+ */
277
--- a/hw/intc/riscv_aplic.c
257
+ set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
278
+++ b/hw/intc/riscv_aplic.c
258
}
279
@@ -XXX,XX +XXX,XX @@ static void riscv_aplic_write(void *opaque, hwaddr addr, uint64_t value,
259
280
}
260
static ObjectClass *rx_cpu_class_by_name(const char *cpu_model)
281
261
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
282
if (addr == APLIC_DOMAINCFG) {
262
index XXXXXXX..XXXXXXX 100644
283
- /* Only IE bit writeable at the moment */
263
--- a/target/sh4/cpu.c
284
+ /* Only IE bit writable at the moment */
264
+++ b/target/sh4/cpu.c
285
value &= APLIC_DOMAINCFG_IE;
265
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_reset_hold(Object *obj, ResetType type)
286
aplic->domaincfg = value;
266
set_default_nan_mode(1, &env->fp_status);
287
} else if ((APLIC_SOURCECFG_BASE <= addr) &&
267
/* sign bit clear, set all frac bits other than msb */
288
diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c
268
set_float_default_nan_pattern(0b00111111, &env->fp_status);
289
index XXXXXXX..XXXXXXX 100644
269
+ /*
290
--- a/hw/pci/shpc.c
270
+ * TODO: "SH-4 CPU Core Architecture ADCS 7182230F" doesn't say whether
291
+++ b/hw/pci/shpc.c
271
+ * it detects tininess before or after rounding. Section 6.4 is clear
292
@@ -XXX,XX +XXX,XX @@ static int shpc_cap_add_config(PCIDevice *d, Error **errp)
272
+ * that flush-to-zero happens when the result underflows, though, so
293
pci_set_byte(config + SHPC_CAP_CxP, 0);
273
+ * either this should be "detect ftz after rounding" or else we should
294
pci_set_long(config + SHPC_CAP_DWORD_DATA, 0);
274
+ * be setting "detect tininess before rounding".
295
d->shpc->cap = config_offset;
275
+ */
296
- /* Make dword select and data writeable. */
276
+ set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
297
+ /* Make dword select and data writable. */
277
}
298
pci_set_byte(d->wmask + config_offset + SHPC_CAP_DWORD_SELECT, 0xff);
278
299
pci_set_long(d->wmask + config_offset + SHPC_CAP_DWORD_DATA, 0xffffffff);
279
static void superh_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
300
return 0;
280
diff --git a/target/tricore/helper.c b/target/tricore/helper.c
301
diff --git a/hw/sparc64/sun4u_iommu.c b/hw/sparc64/sun4u_iommu.c
281
index XXXXXXX..XXXXXXX 100644
302
index XXXXXXX..XXXXXXX 100644
282
--- a/target/tricore/helper.c
303
--- a/hw/sparc64/sun4u_iommu.c
283
+++ b/target/tricore/helper.c
304
+++ b/hw/sparc64/sun4u_iommu.c
284
@@ -XXX,XX +XXX,XX @@ void fpu_set_state(CPUTriCoreState *env)
305
@@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry sun4u_translate_iommu(IOMMUMemoryRegion *iommu,
285
set_flush_inputs_to_zero(1, &env->fp_status);
306
}
286
set_flush_to_zero(1, &env->fp_status);
307
287
set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status);
308
if (tte & IOMMU_TTE_DATA_W) {
288
+ set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
309
- /* Writeable */
289
set_default_nan_mode(1, &env->fp_status);
310
+ /* Writable */
290
/* Default NaN pattern: sign bit clear, frac msb set */
311
ret.perm = IOMMU_RW;
291
set_float_default_nan_pattern(0b01000000, &env->fp_status);
312
} else {
292
diff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c
313
ret.perm = IOMMU_RO;
293
index XXXXXXX..XXXXXXX 100644
314
diff --git a/hw/timer/sse-timer.c b/hw/timer/sse-timer.c
294
--- a/tests/fp/fp-bench.c
315
index XXXXXXX..XXXXXXX 100644
295
+++ b/tests/fp/fp-bench.c
316
--- a/hw/timer/sse-timer.c
296
@@ -XXX,XX +XXX,XX @@ static void run_bench(void)
317
+++ b/hw/timer/sse-timer.c
297
set_float_3nan_prop_rule(float_3nan_prop_s_cab, &soft_status);
318
@@ -XXX,XX +XXX,XX @@ static void sse_timer_write(void *opaque, hwaddr offset, uint64_t value,
298
set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, &soft_status);
319
{
299
set_float_default_nan_pattern(0b01000000, &soft_status);
320
uint32_t old_ctl = s->cntp_aival_ctl;
300
+ set_float_ftz_detection(float_ftz_before_rounding, &soft_status);
321
301
322
- /* EN bit is writeable; CLR bit is write-0-to-clear, write-1-ignored */
302
f = bench_funcs[operation][precision];
323
+ /* EN bit is writable; CLR bit is write-0-to-clear, write-1-ignored */
303
g_assert(f);
324
s->cntp_aival_ctl &= ~R_CNTP_AIVAL_CTL_EN_MASK;
304
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
325
s->cntp_aival_ctl |= value & R_CNTP_AIVAL_CTL_EN_MASK;
305
index XXXXXXX..XXXXXXX 100644
326
if (!(value & R_CNTP_AIVAL_CTL_CLR_MASK)) {
306
--- a/fpu/softfloat-parts.c.inc
327
diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
307
+++ b/fpu/softfloat-parts.c.inc
328
index XXXXXXX..XXXXXXX 100644
308
@@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
329
--- a/target/arm/gdbstub.c
309
p->frac_lo &= ~round_mask;
330
+++ b/target/arm/gdbstub.c
310
}
331
@@ -XXX,XX +XXX,XX @@ int arm_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
311
frac_shr(p, frac_shift);
332
/*
312
- } else if (s->flush_to_zero) {
333
* Don't allow writing to XPSR.Exception as it can cause
313
+ } else if (s->flush_to_zero &&
334
* a transition into or out of handler mode (it's not
314
+ s->ftz_detection == float_ftz_before_rounding) {
335
- * writeable via the MSR insn so this is a reasonable
315
flags |= float_flag_output_denormal_flushed;
336
+ * writable via the MSR insn so this is a reasonable
316
p->cls = float_class_zero;
337
* restriction). Other fields are safe to update.
317
exp = 0;
338
*/
318
@@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
339
xpsr_write(env, tmp, ~XPSR_EXCP);
319
exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal;
340
diff --git a/target/arm/helper.c b/target/arm/helper.c
320
frac_shr(p, frac_shift);
341
index XXXXXXX..XXXXXXX 100644
321
342
--- a/target/arm/helper.c
322
- if (is_tiny && (flags & float_flag_inexact)) {
343
+++ b/target/arm/helper.c
323
- flags |= float_flag_underflow;
344
@@ -XXX,XX +XXX,XX @@ static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
324
- }
325
- if (exp == 0 && frac_eqz(p)) {
326
- p->cls = float_class_zero;
327
+ if (is_tiny) {
328
+ if (s->flush_to_zero) {
329
+ assert(s->ftz_detection == float_ftz_after_rounding);
330
+ flags |= float_flag_output_denormal_flushed;
331
+ p->cls = float_class_zero;
332
+ exp = 0;
333
+ frac_clear(p);
334
+ } else if (flags & float_flag_inexact) {
335
+ flags |= float_flag_underflow;
336
+ }
337
+ if (exp == 0 && frac_eqz(p)) {
338
+ p->cls = float_class_zero;
339
+ }
345
}
340
}
346
}
341
}
347
342
p->exp = exp;
348
- env->cp15.c9_pmcr &= ~PMCR_WRITEABLE_MASK;
349
- env->cp15.c9_pmcr |= (value & PMCR_WRITEABLE_MASK);
350
+ env->cp15.c9_pmcr &= ~PMCR_WRITABLE_MASK;
351
+ env->cp15.c9_pmcr |= (value & PMCR_WRITABLE_MASK);
352
353
pmu_op_finish(env);
354
}
355
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
356
index XXXXXXX..XXXXXXX 100644
357
--- a/target/arm/hvf/hvf.c
358
+++ b/target/arm/hvf/hvf.c
359
@@ -XXX,XX +XXX,XX @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val)
360
}
361
}
362
363
- env->cp15.c9_pmcr &= ~PMCR_WRITEABLE_MASK;
364
- env->cp15.c9_pmcr |= (val & PMCR_WRITEABLE_MASK);
365
+ env->cp15.c9_pmcr &= ~PMCR_WRITABLE_MASK;
366
+ env->cp15.c9_pmcr |= (val & PMCR_WRITABLE_MASK);
367
368
pmu_op_finish(env);
369
break;
370
diff --git a/target/i386/cpu-sysemu.c b/target/i386/cpu-sysemu.c
371
index XXXXXXX..XXXXXXX 100644
372
--- a/target/i386/cpu-sysemu.c
373
+++ b/target/i386/cpu-sysemu.c
374
@@ -XXX,XX +XXX,XX @@ static void x86_cpu_to_dict(X86CPU *cpu, QDict *props)
375
376
/* Convert CPU model data from X86CPU object to a property dictionary
377
* that can recreate exactly the same CPU model, including every
378
- * writeable QOM property.
379
+ * writable QOM property.
380
*/
381
static void x86_cpu_to_dict_full(X86CPU *cpu, QDict *props)
382
{
383
diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c
384
index XXXXXXX..XXXXXXX 100644
385
--- a/target/s390x/ioinst.c
386
+++ b/target/s390x/ioinst.c
387
@@ -XXX,XX +XXX,XX @@ void ioinst_handle_stsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb,
388
g_assert(!s390_is_pv());
389
/*
390
* As operand exceptions have a lower priority than access exceptions,
391
- * we check whether the memory area is writeable (injecting the
392
+ * we check whether the memory area is writable (injecting the
393
* access execption if it is not) first.
394
*/
395
if (!s390_cpu_virt_mem_check_write(cpu, addr, ar, sizeof(schib))) {
396
diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py
397
index XXXXXXX..XXXXXXX 100644
398
--- a/python/qemu/machine/machine.py
399
+++ b/python/qemu/machine/machine.py
400
@@ -XXX,XX +XXX,XX @@ def _early_cleanup(self) -> None:
401
"""
402
# If we keep the console socket open, we may deadlock waiting
403
# for QEMU to exit, while QEMU is waiting for the socket to
404
- # become writeable.
405
+ # become writable.
406
if self._console_socket is not None:
407
self._console_socket.close()
408
self._console_socket = None
409
diff --git a/tests/tcg/x86_64/system/boot.S b/tests/tcg/x86_64/system/boot.S
410
index XXXXXXX..XXXXXXX 100644
411
--- a/tests/tcg/x86_64/system/boot.S
412
+++ b/tests/tcg/x86_64/system/boot.S
413
@@ -XXX,XX +XXX,XX @@
414
    *
415
    * - `ebx`: contains the physical memory address where the loader has placed
416
    * the boot start info structure.
417
-    * - `cr0`: bit 0 (PE) must be set. All the other writeable bits are cleared.
418
+    * - `cr0`: bit 0 (PE) must be set. All the other writable bits are cleared.
419
    * - `cr4`: all bits are cleared.
420
    * - `cs `: must be a 32-bit read/execute code segment with a base of ‘0’
421
    * and a limit of ‘0xFFFFFFFF’. The selector value is unspecified.
422
--
343
--
423
2.25.1
344
2.34.1
424
425
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
The Armv8.7 FEAT_AFP feature defines three new control bits in
2
the FPCR:
3
* FPCR.AH: "alternate floating point mode"; this changes floating
4
point behaviour in a variety of ways, including:
5
- the sign of a default NaN is 1, not 0
6
- if FPCR.FZ is also 1, denormals detected after rounding
7
with an unbounded exponent has been applied are flushed to zero
8
- FPCR.FZ does not cause denormalized inputs to be flushed to zero
9
- miscellaneous other corner-case behaviour changes
10
* FPCR.FIZ: flush denormalized numbers to zero on input for
11
most instructions
12
* FPCR.NEP: makes scalar SIMD operations merge the result with
13
higher vector elements in one of the source registers, instead
14
of zeroing the higher elements of the destination
2
15
3
Begin creation of sve_ldst_internal.h by moving the primitives
16
This commit defines the new bits in the FPCR, and allows them to be
4
that access host and tlb memory.
17
read or written when FEAT_AFP is implemented. Actual behaviour
18
changes will be implemented in subsequent commits.
5
19
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
20
Note that these are the first FPCR bits which don't appear in the
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
21
AArch32 FPSCR view of the register, and which share bit positions
8
Message-id: 20220607203306.657998-14-richard.henderson@linaro.org
22
with FPSR bits.
23
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
24
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
25
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
---
26
---
11
target/arm/sve_ldst_internal.h | 127 +++++++++++++++++++++++++++++++++
27
target/arm/cpu-features.h | 5 +++++
12
target/arm/sve_helper.c | 107 +--------------------------
28
target/arm/cpu.h | 3 +++
13
2 files changed, 128 insertions(+), 106 deletions(-)
29
target/arm/vfp_helper.c | 11 ++++++++---
14
create mode 100644 target/arm/sve_ldst_internal.h
30
3 files changed, 16 insertions(+), 3 deletions(-)
15
31
16
diff --git a/target/arm/sve_ldst_internal.h b/target/arm/sve_ldst_internal.h
32
diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
17
new file mode 100644
33
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX
34
--- a/target/arm/cpu-features.h
19
--- /dev/null
35
+++ b/target/arm/cpu-features.h
20
+++ b/target/arm/sve_ldst_internal.h
36
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_hcx(const ARMISARegisters *id)
21
@@ -XXX,XX +XXX,XX @@
37
return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HCX) != 0;
22
+/*
38
}
23
+ * ARM SVE Load/Store Helpers
39
24
+ *
40
+static inline bool isar_feature_aa64_afp(const ARMISARegisters *id)
25
+ * Copyright (c) 2018-2022 Linaro
41
+{
26
+ *
42
+ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, AFP) != 0;
27
+ * This library is free software; you can redistribute it and/or
28
+ * modify it under the terms of the GNU Lesser General Public
29
+ * License as published by the Free Software Foundation; either
30
+ * version 2.1 of the License, or (at your option) any later version.
31
+ *
32
+ * This library is distributed in the hope that it will be useful,
33
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
34
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35
+ * Lesser General Public License for more details.
36
+ *
37
+ * You should have received a copy of the GNU Lesser General Public
38
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
39
+ */
40
+
41
+#ifndef TARGET_ARM_SVE_LDST_INTERNAL_H
42
+#define TARGET_ARM_SVE_LDST_INTERNAL_H
43
+
44
+#include "exec/cpu_ldst.h"
45
+
46
+/*
47
+ * Load one element into @vd + @reg_off from @host.
48
+ * The controlling predicate is known to be true.
49
+ */
50
+typedef void sve_ldst1_host_fn(void *vd, intptr_t reg_off, void *host);
51
+
52
+/*
53
+ * Load one element into @vd + @reg_off from (@env, @vaddr, @ra).
54
+ * The controlling predicate is known to be true.
55
+ */
56
+typedef void sve_ldst1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off,
57
+ target_ulong vaddr, uintptr_t retaddr);
58
+
59
+/*
60
+ * Generate the above primitives.
61
+ */
62
+
63
+#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \
64
+static inline void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \
65
+{ TYPEM val = HOST(host); *(TYPEE *)(vd + H(reg_off)) = val; }
66
+
67
+#define DO_ST_HOST(NAME, H, TYPEE, TYPEM, HOST) \
68
+static inline void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \
69
+{ TYPEM val = *(TYPEE *)(vd + H(reg_off)); HOST(host, val); }
70
+
71
+#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, TLB) \
72
+static inline void sve_##NAME##_tlb(CPUARMState *env, void *vd, \
73
+ intptr_t reg_off, target_ulong addr, uintptr_t ra) \
74
+{ \
75
+ TYPEM val = TLB(env, useronly_clean_ptr(addr), ra); \
76
+ *(TYPEE *)(vd + H(reg_off)) = val; \
77
+}
43
+}
78
+
44
+
79
+#define DO_ST_TLB(NAME, H, TYPEE, TYPEM, TLB) \
45
static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id)
80
+static inline void sve_##NAME##_tlb(CPUARMState *env, void *vd, \
46
{
81
+ intptr_t reg_off, target_ulong addr, uintptr_t ra) \
47
return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0;
82
+{ \
48
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
83
+ TYPEM val = *(TYPEE *)(vd + H(reg_off)); \
84
+ TLB(env, useronly_clean_ptr(addr), val, ra); \
85
+}
86
+
87
+#define DO_LD_PRIM_1(NAME, H, TE, TM) \
88
+ DO_LD_HOST(NAME, H, TE, TM, ldub_p) \
89
+ DO_LD_TLB(NAME, H, TE, TM, cpu_ldub_data_ra)
90
+
91
+DO_LD_PRIM_1(ld1bb, H1, uint8_t, uint8_t)
92
+DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t)
93
+DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t, int8_t)
94
+DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t)
95
+DO_LD_PRIM_1(ld1bss, H1_4, uint32_t, int8_t)
96
+DO_LD_PRIM_1(ld1bdu, H1_8, uint64_t, uint8_t)
97
+DO_LD_PRIM_1(ld1bds, H1_8, uint64_t, int8_t)
98
+
99
+#define DO_ST_PRIM_1(NAME, H, TE, TM) \
100
+ DO_ST_HOST(st1##NAME, H, TE, TM, stb_p) \
101
+ DO_ST_TLB(st1##NAME, H, TE, TM, cpu_stb_data_ra)
102
+
103
+DO_ST_PRIM_1(bb, H1, uint8_t, uint8_t)
104
+DO_ST_PRIM_1(bh, H1_2, uint16_t, uint8_t)
105
+DO_ST_PRIM_1(bs, H1_4, uint32_t, uint8_t)
106
+DO_ST_PRIM_1(bd, H1_8, uint64_t, uint8_t)
107
+
108
+#define DO_LD_PRIM_2(NAME, H, TE, TM, LD) \
109
+ DO_LD_HOST(ld1##NAME##_be, H, TE, TM, LD##_be_p) \
110
+ DO_LD_HOST(ld1##NAME##_le, H, TE, TM, LD##_le_p) \
111
+ DO_LD_TLB(ld1##NAME##_be, H, TE, TM, cpu_##LD##_be_data_ra) \
112
+ DO_LD_TLB(ld1##NAME##_le, H, TE, TM, cpu_##LD##_le_data_ra)
113
+
114
+#define DO_ST_PRIM_2(NAME, H, TE, TM, ST) \
115
+ DO_ST_HOST(st1##NAME##_be, H, TE, TM, ST##_be_p) \
116
+ DO_ST_HOST(st1##NAME##_le, H, TE, TM, ST##_le_p) \
117
+ DO_ST_TLB(st1##NAME##_be, H, TE, TM, cpu_##ST##_be_data_ra) \
118
+ DO_ST_TLB(st1##NAME##_le, H, TE, TM, cpu_##ST##_le_data_ra)
119
+
120
+DO_LD_PRIM_2(hh, H1_2, uint16_t, uint16_t, lduw)
121
+DO_LD_PRIM_2(hsu, H1_4, uint32_t, uint16_t, lduw)
122
+DO_LD_PRIM_2(hss, H1_4, uint32_t, int16_t, lduw)
123
+DO_LD_PRIM_2(hdu, H1_8, uint64_t, uint16_t, lduw)
124
+DO_LD_PRIM_2(hds, H1_8, uint64_t, int16_t, lduw)
125
+
126
+DO_ST_PRIM_2(hh, H1_2, uint16_t, uint16_t, stw)
127
+DO_ST_PRIM_2(hs, H1_4, uint32_t, uint16_t, stw)
128
+DO_ST_PRIM_2(hd, H1_8, uint64_t, uint16_t, stw)
129
+
130
+DO_LD_PRIM_2(ss, H1_4, uint32_t, uint32_t, ldl)
131
+DO_LD_PRIM_2(sdu, H1_8, uint64_t, uint32_t, ldl)
132
+DO_LD_PRIM_2(sds, H1_8, uint64_t, int32_t, ldl)
133
+
134
+DO_ST_PRIM_2(ss, H1_4, uint32_t, uint32_t, stl)
135
+DO_ST_PRIM_2(sd, H1_8, uint64_t, uint32_t, stl)
136
+
137
+DO_LD_PRIM_2(dd, H1_8, uint64_t, uint64_t, ldq)
138
+DO_ST_PRIM_2(dd, H1_8, uint64_t, uint64_t, stq)
139
+
140
+#undef DO_LD_TLB
141
+#undef DO_ST_TLB
142
+#undef DO_LD_HOST
143
+#undef DO_LD_PRIM_1
144
+#undef DO_ST_PRIM_1
145
+#undef DO_LD_PRIM_2
146
+#undef DO_ST_PRIM_2
147
+
148
+#endif /* TARGET_ARM_SVE_LDST_INTERNAL_H */
149
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
150
index XXXXXXX..XXXXXXX 100644
49
index XXXXXXX..XXXXXXX 100644
151
--- a/target/arm/sve_helper.c
50
--- a/target/arm/cpu.h
152
+++ b/target/arm/sve_helper.c
51
+++ b/target/arm/cpu.h
153
@@ -XXX,XX +XXX,XX @@
52
@@ -XXX,XX +XXX,XX @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val);
154
#include "cpu.h"
155
#include "internals.h"
156
#include "exec/exec-all.h"
157
-#include "exec/cpu_ldst.h"
158
#include "exec/helper-proto.h"
159
#include "tcg/tcg-gvec-desc.h"
160
#include "fpu/softfloat.h"
161
#include "tcg/tcg.h"
162
#include "vec_internal.h"
163
+#include "sve_ldst_internal.h"
164
165
166
/* Return a value for NZCV as per the ARM PredTest pseudofunction.
167
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
168
* Load contiguous data, protected by a governing predicate.
169
*/
53
*/
170
54
171
-/*
55
/* FPCR bits */
172
- * Load one element into @vd + @reg_off from @host.
56
+#define FPCR_FIZ (1 << 0) /* Flush Inputs to Zero (FEAT_AFP) */
173
- * The controlling predicate is known to be true.
57
+#define FPCR_AH (1 << 1) /* Alternate Handling (FEAT_AFP) */
174
- */
58
+#define FPCR_NEP (1 << 2) /* SIMD scalar ops preserve elts (FEAT_AFP) */
175
-typedef void sve_ldst1_host_fn(void *vd, intptr_t reg_off, void *host);
59
#define FPCR_IOE (1 << 8) /* Invalid Operation exception trap enable */
176
-
60
#define FPCR_DZE (1 << 9) /* Divide by Zero exception trap enable */
177
-/*
61
#define FPCR_OFE (1 << 10) /* Overflow exception trap enable */
178
- * Load one element into @vd + @reg_off from (@env, @vaddr, @ra).
62
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
179
- * The controlling predicate is known to be true.
63
index XXXXXXX..XXXXXXX 100644
180
- */
64
--- a/target/arm/vfp_helper.c
181
-typedef void sve_ldst1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off,
65
+++ b/target/arm/vfp_helper.c
182
- target_ulong vaddr, uintptr_t retaddr);
66
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask)
183
-
67
if (!cpu_isar_feature(any_fp16, cpu)) {
184
-/*
68
val &= ~FPCR_FZ16;
185
- * Generate the above primitives.
69
}
186
- */
70
+ if (!cpu_isar_feature(aa64_afp, cpu)) {
187
-
71
+ val &= ~(FPCR_FIZ | FPCR_AH | FPCR_NEP);
188
-#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \
72
+ }
189
-static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \
73
190
-{ \
74
if (!cpu_isar_feature(aa64_ebf16, cpu)) {
191
- TYPEM val = HOST(host); \
75
val &= ~FPCR_EBF;
192
- *(TYPEE *)(vd + H(reg_off)) = val; \
76
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask)
193
-}
77
* We don't implement trapped exception handling, so the
194
-
78
* trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
195
-#define DO_ST_HOST(NAME, H, TYPEE, TYPEM, HOST) \
79
*
196
-static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \
80
- * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF
197
-{ HOST(host, (TYPEM)*(TYPEE *)(vd + H(reg_off))); }
81
- * and FZ16. Len, Stride and LTPSIZE we just handled. Store those bits
198
-
82
+ * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF, FZ16,
199
-#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, TLB) \
83
+ * FIZ, AH, and NEP.
200
-static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \
84
+ * Len, Stride and LTPSIZE we just handled. Store those bits
201
- target_ulong addr, uintptr_t ra) \
85
* there, and zero any of the other FPCR bits and the RES0 and RAZ/WI
202
-{ \
86
* bits.
203
- *(TYPEE *)(vd + H(reg_off)) = \
87
*/
204
- (TYPEM)TLB(env, useronly_clean_ptr(addr), ra); \
88
- val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 | FPCR_EBF;
205
-}
89
+ val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 |
206
-
90
+ FPCR_EBF | FPCR_FIZ | FPCR_AH | FPCR_NEP;
207
-#define DO_ST_TLB(NAME, H, TYPEE, TYPEM, TLB) \
91
env->vfp.fpcr &= ~mask;
208
-static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \
92
env->vfp.fpcr |= val;
209
- target_ulong addr, uintptr_t ra) \
93
}
210
-{ \
211
- TLB(env, useronly_clean_ptr(addr), \
212
- (TYPEM)*(TYPEE *)(vd + H(reg_off)), ra); \
213
-}
214
-
215
-#define DO_LD_PRIM_1(NAME, H, TE, TM) \
216
- DO_LD_HOST(NAME, H, TE, TM, ldub_p) \
217
- DO_LD_TLB(NAME, H, TE, TM, cpu_ldub_data_ra)
218
-
219
-DO_LD_PRIM_1(ld1bb, H1, uint8_t, uint8_t)
220
-DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t)
221
-DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t, int8_t)
222
-DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t)
223
-DO_LD_PRIM_1(ld1bss, H1_4, uint32_t, int8_t)
224
-DO_LD_PRIM_1(ld1bdu, H1_8, uint64_t, uint8_t)
225
-DO_LD_PRIM_1(ld1bds, H1_8, uint64_t, int8_t)
226
-
227
-#define DO_ST_PRIM_1(NAME, H, TE, TM) \
228
- DO_ST_HOST(st1##NAME, H, TE, TM, stb_p) \
229
- DO_ST_TLB(st1##NAME, H, TE, TM, cpu_stb_data_ra)
230
-
231
-DO_ST_PRIM_1(bb, H1, uint8_t, uint8_t)
232
-DO_ST_PRIM_1(bh, H1_2, uint16_t, uint8_t)
233
-DO_ST_PRIM_1(bs, H1_4, uint32_t, uint8_t)
234
-DO_ST_PRIM_1(bd, H1_8, uint64_t, uint8_t)
235
-
236
-#define DO_LD_PRIM_2(NAME, H, TE, TM, LD) \
237
- DO_LD_HOST(ld1##NAME##_be, H, TE, TM, LD##_be_p) \
238
- DO_LD_HOST(ld1##NAME##_le, H, TE, TM, LD##_le_p) \
239
- DO_LD_TLB(ld1##NAME##_be, H, TE, TM, cpu_##LD##_be_data_ra) \
240
- DO_LD_TLB(ld1##NAME##_le, H, TE, TM, cpu_##LD##_le_data_ra)
241
-
242
-#define DO_ST_PRIM_2(NAME, H, TE, TM, ST) \
243
- DO_ST_HOST(st1##NAME##_be, H, TE, TM, ST##_be_p) \
244
- DO_ST_HOST(st1##NAME##_le, H, TE, TM, ST##_le_p) \
245
- DO_ST_TLB(st1##NAME##_be, H, TE, TM, cpu_##ST##_be_data_ra) \
246
- DO_ST_TLB(st1##NAME##_le, H, TE, TM, cpu_##ST##_le_data_ra)
247
-
248
-DO_LD_PRIM_2(hh, H1_2, uint16_t, uint16_t, lduw)
249
-DO_LD_PRIM_2(hsu, H1_4, uint32_t, uint16_t, lduw)
250
-DO_LD_PRIM_2(hss, H1_4, uint32_t, int16_t, lduw)
251
-DO_LD_PRIM_2(hdu, H1_8, uint64_t, uint16_t, lduw)
252
-DO_LD_PRIM_2(hds, H1_8, uint64_t, int16_t, lduw)
253
-
254
-DO_ST_PRIM_2(hh, H1_2, uint16_t, uint16_t, stw)
255
-DO_ST_PRIM_2(hs, H1_4, uint32_t, uint16_t, stw)
256
-DO_ST_PRIM_2(hd, H1_8, uint64_t, uint16_t, stw)
257
-
258
-DO_LD_PRIM_2(ss, H1_4, uint32_t, uint32_t, ldl)
259
-DO_LD_PRIM_2(sdu, H1_8, uint64_t, uint32_t, ldl)
260
-DO_LD_PRIM_2(sds, H1_8, uint64_t, int32_t, ldl)
261
-
262
-DO_ST_PRIM_2(ss, H1_4, uint32_t, uint32_t, stl)
263
-DO_ST_PRIM_2(sd, H1_8, uint64_t, uint32_t, stl)
264
-
265
-DO_LD_PRIM_2(dd, H1_8, uint64_t, uint64_t, ldq)
266
-DO_ST_PRIM_2(dd, H1_8, uint64_t, uint64_t, stq)
267
-
268
-#undef DO_LD_TLB
269
-#undef DO_ST_TLB
270
-#undef DO_LD_HOST
271
-#undef DO_LD_PRIM_1
272
-#undef DO_ST_PRIM_1
273
-#undef DO_LD_PRIM_2
274
-#undef DO_ST_PRIM_2
275
-
276
/*
277
* Skip through a sequence of inactive elements in the guarding predicate @vg,
278
* beginning at @reg_off bounded by @reg_max. Return the offset of the active
279
--
94
--
280
2.25.1
95
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Part of FEAT_AFP is the new control bit FPCR.FIZ. This bit affects
2
flushing of single and double precision denormal inputs to zero for
3
AArch64 floating point instructions. (For half-precision, the
4
existing FPCR.FZ16 control remains the only one.)
2
5
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
FPCR.FIZ differs from FPCR.FZ in that if we flush an input denormal
4
Message-id: 20220604040607.269301-11-richard.henderson@linaro.org
7
only because of FPCR.FIZ then we should *not* set the cumulative
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
exception bit FPSR.IDC.
9
10
FEAT_AFP also defines that in AArch64 the existing FPCR.FZ only
11
applies when FPCR.AH is 0.
12
13
We can implement this by setting the "flush inputs to zero" state
14
appropriately when FPCR is written, and by not reflecting the
15
float_flag_input_denormal status flag into FPSR reads when it is the
16
result only of FPSR.FIZ.
17
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
19
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
20
---
8
target/arm/ptw.h | 2 --
21
target/arm/vfp_helper.c | 60 ++++++++++++++++++++++++++++++++++-------
9
target/arm/helper.c | 19 -------------------
22
1 file changed, 50 insertions(+), 10 deletions(-)
10
target/arm/ptw.c | 21 +++++++++++++++++++++
11
3 files changed, 21 insertions(+), 21 deletions(-)
12
23
13
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
24
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
14
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/ptw.h
26
--- a/target/arm/vfp_helper.c
16
+++ b/target/arm/ptw.h
27
+++ b/target/arm/vfp_helper.c
17
@@ -XXX,XX +XXX,XX @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap)
28
@@ -XXX,XX +XXX,XX @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits)
18
bool m_is_ppb_region(CPUARMState *env, uint32_t address);
29
19
bool m_is_system_region(CPUARMState *env, uint32_t address);
30
static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
20
31
{
21
-bool pmsav7_use_background_region(ARMCPU *cpu, ARMMMUIdx mmu_idx, bool is_user);
32
- uint32_t i = 0;
22
-
33
+ uint32_t a32_flags = 0, a64_flags = 0;
23
bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
34
24
MMUAccessType access_type, ARMMMUIdx mmu_idx,
35
- i |= get_float_exception_flags(&env->vfp.fp_status_a32);
25
bool s1_is_el0,
36
- i |= get_float_exception_flags(&env->vfp.fp_status_a64);
26
diff --git a/target/arm/helper.c b/target/arm/helper.c
37
- i |= get_float_exception_flags(&env->vfp.standard_fp_status);
27
index XXXXXXX..XXXXXXX 100644
38
+ a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32);
28
--- a/target/arm/helper.c
39
+ a32_flags |= get_float_exception_flags(&env->vfp.standard_fp_status);
29
+++ b/target/arm/helper.c
40
/* FZ16 does not generate an input denormal exception. */
30
@@ -XXX,XX +XXX,XX @@ do_fault:
41
- i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32)
31
return true;
42
+ a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32)
43
& ~float_flag_input_denormal_flushed);
44
- i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
45
+ a32_flags |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16)
46
& ~float_flag_input_denormal_flushed);
47
- i |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16)
48
+
49
+ a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64);
50
+ a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
51
& ~float_flag_input_denormal_flushed);
52
- return vfp_exceptbits_from_host(i);
53
+ /*
54
+ * Flushing an input denormal *only* because FPCR.FIZ == 1 does
55
+ * not set FPSR.IDC; if FPCR.FZ is also set then this takes
56
+ * precedence and IDC is set (see the FPUnpackBase pseudocode).
57
+ * So squash it unless (FPCR.AH == 0 && FPCR.FZ == 1).
58
+ * We only do this for the a64 flags because FIZ has no effect
59
+ * on AArch32 even if it is set.
60
+ */
61
+ if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) {
62
+ a64_flags &= ~float_flag_input_denormal_flushed;
63
+ }
64
+ return vfp_exceptbits_from_host(a32_flags | a64_flags);
32
}
65
}
33
66
34
-bool pmsav7_use_background_region(ARMCPU *cpu, ARMMMUIdx mmu_idx, bool is_user)
67
static void vfp_clear_float_status_exc_flags(CPUARMState *env)
35
-{
68
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
36
- /* Return true if we should use the default memory map as a
69
set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
37
- * "background" region if there are no hits against any MPU regions.
38
- */
39
- CPUARMState *env = &cpu->env;
40
-
41
- if (is_user) {
42
- return false;
43
- }
44
-
45
- if (arm_feature(env, ARM_FEATURE_M)) {
46
- return env->v7m.mpu_ctrl[regime_is_secure(env, mmu_idx)]
47
- & R_V7M_MPU_CTRL_PRIVDEFENA_MASK;
48
- } else {
49
- return regime_sctlr(env, mmu_idx) & SCTLR_BR;
50
- }
51
-}
52
-
53
bool m_is_ppb_region(CPUARMState *env, uint32_t address)
54
{
55
/* True if address is in the M profile PPB region 0xe0000000 - 0xe00fffff */
56
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/target/arm/ptw.c
59
+++ b/target/arm/ptw.c
60
@@ -XXX,XX +XXX,XX @@ static void get_phys_addr_pmsav7_default(CPUARMState *env, ARMMMUIdx mmu_idx,
61
}
62
}
70
}
63
71
64
+static bool pmsav7_use_background_region(ARMCPU *cpu, ARMMMUIdx mmu_idx,
72
+static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env)
65
+ bool is_user)
66
+{
73
+{
67
+ /*
74
+ /*
68
+ * Return true if we should use the default memory map as a
75
+ * Synchronize any pending exception-flag information in the
69
+ * "background" region if there are no hits against any MPU regions.
76
+ * float_status values into env->vfp.fpsr, and then clear out
77
+ * the float_status data.
70
+ */
78
+ */
71
+ CPUARMState *env = &cpu->env;
79
+ env->vfp.fpsr |= vfp_get_fpsr_from_host(env);
72
+
80
+ vfp_clear_float_status_exc_flags(env);
73
+ if (is_user) {
74
+ return false;
75
+ }
76
+
77
+ if (arm_feature(env, ARM_FEATURE_M)) {
78
+ return env->v7m.mpu_ctrl[regime_is_secure(env, mmu_idx)]
79
+ & R_V7M_MPU_CTRL_PRIVDEFENA_MASK;
80
+ } else {
81
+ return regime_sctlr(env, mmu_idx) & SCTLR_BR;
82
+ }
83
+}
81
+}
84
+
82
+
85
static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address,
83
static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
86
MMUAccessType access_type, ARMMMUIdx mmu_idx,
84
{
87
hwaddr *phys_ptr, int *prot,
85
uint64_t changed = env->vfp.fpcr;
86
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
87
if (changed & FPCR_FZ) {
88
bool ftz_enabled = val & FPCR_FZ;
89
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
90
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
91
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64);
92
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a64);
93
+ /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */
94
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
95
+ }
96
+ if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) {
97
+ /*
98
+ * A64: Flush denormalized inputs to zero if FPCR.FIZ = 1, or
99
+ * both FPCR.AH = 0 and FPCR.FZ = 1.
100
+ */
101
+ bool fitz_enabled = (val & FPCR_FIZ) ||
102
+ (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ;
103
+ set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status_a64);
104
}
105
if (changed & FPCR_DN) {
106
bool dnan_enabled = val & FPCR_DN;
107
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
108
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
109
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64);
110
}
111
+ /*
112
+ * If any bits changed that we look at in vfp_get_fpsr_from_host(),
113
+ * we must sync the float_status flags into vfp.fpsr now (under the
114
+ * old regime) before we update vfp.fpcr.
115
+ */
116
+ if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) {
117
+ vfp_sync_and_clear_float_status_exc_flags(env);
118
+ }
119
}
120
121
#else
88
--
122
--
89
2.25.1
123
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
When FPCR.AH is set, various behaviours of AArch64 floating point
2
operations which are controlled by softfloat config settings change:
3
* tininess and ftz detection before/after rounding
4
* NaN propagation order
5
* result of 0 * Inf + NaN
6
* default NaN value
2
7
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
When the guest changes the value of the AH bit, switch these config
4
Message-id: 20220604040607.269301-20-richard.henderson@linaro.org
9
settings on the fp_status_a64 and fp_status_f16_a64 float_status
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
fields.
11
12
This requires us to make the arm_set_default_fp_behaviours() function
13
global, since we now need to call it from cpu.c and vfp_helper.c; we
14
move it to vfp_helper.c so it can be next to the new
15
arm_set_ah_fp_behaviours().
16
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
19
---
8
target/arm/ptw.h | 3 --
20
target/arm/internals.h | 4 +++
9
target/arm/helper.c | 128 --------------------------------------------
21
target/arm/cpu.c | 23 ----------------
10
target/arm/ptw.c | 128 ++++++++++++++++++++++++++++++++++++++++++++
22
target/arm/vfp_helper.c | 58 ++++++++++++++++++++++++++++++++++++++++-
11
3 files changed, 128 insertions(+), 131 deletions(-)
23
3 files changed, 61 insertions(+), 24 deletions(-)
12
24
13
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
25
diff --git a/target/arm/internals.h b/target/arm/internals.h
14
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/ptw.h
27
--- a/target/arm/internals.h
16
+++ b/target/arm/ptw.h
28
+++ b/target/arm/internals.h
17
@@ -XXX,XX +XXX,XX @@ ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
29
@@ -XXX,XX +XXX,XX @@ uint64_t gt_virt_cnt_offset(CPUARMState *env);
18
ARMMMUIdx mmu_idx);
30
* all EL1" scope; this covers stage 1 and stage 2.
19
bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level,
31
*/
20
int inputsize, int stride, int outputsize);
32
int alle1_tlbmask(CPUARMState *env);
21
-int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0);
33
+
22
-int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64,
34
+/* Set the float_status behaviour to match the Arm defaults */
23
- int ap, int ns, int xn, int pxn);
35
+void arm_set_default_fp_behaviours(float_status *s);
24
36
+
25
#endif /* !CONFIG_USER_ONLY */
37
#endif
26
#endif /* TARGET_ARM_PTW_H */
38
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
27
diff --git a/target/arm/helper.c b/target/arm/helper.c
28
index XXXXXXX..XXXXXXX 100644
39
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/helper.c
40
--- a/target/arm/cpu.c
30
+++ b/target/arm/helper.c
41
+++ b/target/arm/cpu.c
31
@@ -XXX,XX +XXX,XX @@ int simple_ap_to_rw_prot_is_user(int ap, bool is_user)
42
@@ -XXX,XX +XXX,XX @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
32
}
43
QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node);
33
}
44
}
34
45
35
-/* Translate S2 section/page access permissions to protection flags
46
-/*
36
- *
47
- * Set the float_status behaviour to match the Arm defaults:
37
- * @env: CPUARMState
48
- * * tininess-before-rounding
38
- * @s2ap: The 2-bit stage2 access permissions (S2AP)
49
- * * 2-input NaN propagation prefers SNaN over QNaN, and then
39
- * @xn: XN (execute-never) bits
50
- * operand A over operand B (see FPProcessNaNs() pseudocode)
40
- * @s1_is_el0: true if this is S2 of an S1+2 walk for EL0
51
- * * 3-input NaN propagation prefers SNaN over QNaN, and then
52
- * operand C over A over B (see FPProcessNaNs3() pseudocode,
53
- * but note that for QEMU muladd is a * b + c, whereas for
54
- * the pseudocode function the arguments are in the order c, a, b.
55
- * * 0 * Inf + NaN returns the default NaN if the input NaN is quiet,
56
- * and the input NaN if it is signalling
57
- * * Default NaN has sign bit clear, msb frac bit set
41
- */
58
- */
42
-int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0)
59
-static void arm_set_default_fp_behaviours(float_status *s)
43
-{
60
-{
44
- int prot = 0;
61
- set_float_detect_tininess(float_tininess_before_rounding, s);
45
-
62
- set_float_ftz_detection(float_ftz_before_rounding, s);
46
- if (s2ap & 1) {
63
- set_float_2nan_prop_rule(float_2nan_prop_s_ab, s);
47
- prot |= PAGE_READ;
64
- set_float_3nan_prop_rule(float_3nan_prop_s_cab, s);
48
- }
65
- set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s);
49
- if (s2ap & 2) {
66
- set_float_default_nan_pattern(0b01000000, s);
50
- prot |= PAGE_WRITE;
51
- }
52
-
53
- if (cpu_isar_feature(any_tts2uxn, env_archcpu(env))) {
54
- switch (xn) {
55
- case 0:
56
- prot |= PAGE_EXEC;
57
- break;
58
- case 1:
59
- if (s1_is_el0) {
60
- prot |= PAGE_EXEC;
61
- }
62
- break;
63
- case 2:
64
- break;
65
- case 3:
66
- if (!s1_is_el0) {
67
- prot |= PAGE_EXEC;
68
- }
69
- break;
70
- default:
71
- g_assert_not_reached();
72
- }
73
- } else {
74
- if (!extract32(xn, 1, 1)) {
75
- if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) {
76
- prot |= PAGE_EXEC;
77
- }
78
- }
79
- }
80
- return prot;
81
-}
67
-}
82
-
68
-
83
-/* Translate section/page access permissions to protection flags
69
static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque)
84
- *
70
{
85
- * @env: CPUARMState
71
/* Reset a single ARMCPRegInfo register */
86
- * @mmu_idx: MMU index indicating required translation regime
72
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
87
- * @is_aa64: TRUE if AArch64
88
- * @ap: The 2-bit simple AP (AP[2:1])
89
- * @ns: NS (non-secure) bit
90
- * @xn: XN (execute-never) bit
91
- * @pxn: PXN (privileged execute-never) bit
92
- */
93
-int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64,
94
- int ap, int ns, int xn, int pxn)
95
-{
96
- bool is_user = regime_is_user(env, mmu_idx);
97
- int prot_rw, user_rw;
98
- bool have_wxn;
99
- int wxn = 0;
100
-
101
- assert(mmu_idx != ARMMMUIdx_Stage2);
102
- assert(mmu_idx != ARMMMUIdx_Stage2_S);
103
-
104
- user_rw = simple_ap_to_rw_prot_is_user(ap, true);
105
- if (is_user) {
106
- prot_rw = user_rw;
107
- } else {
108
- if (user_rw && regime_is_pan(env, mmu_idx)) {
109
- /* PAN forbids data accesses but doesn't affect insn fetch */
110
- prot_rw = 0;
111
- } else {
112
- prot_rw = simple_ap_to_rw_prot_is_user(ap, false);
113
- }
114
- }
115
-
116
- if (ns && arm_is_secure(env) && (env->cp15.scr_el3 & SCR_SIF)) {
117
- return prot_rw;
118
- }
119
-
120
- /* TODO have_wxn should be replaced with
121
- * ARM_FEATURE_V8 || (ARM_FEATURE_V7 && ARM_FEATURE_EL2)
122
- * when ARM_FEATURE_EL2 starts getting set. For now we assume all LPAE
123
- * compatible processors have EL2, which is required for [U]WXN.
124
- */
125
- have_wxn = arm_feature(env, ARM_FEATURE_LPAE);
126
-
127
- if (have_wxn) {
128
- wxn = regime_sctlr(env, mmu_idx) & SCTLR_WXN;
129
- }
130
-
131
- if (is_aa64) {
132
- if (regime_has_2_ranges(mmu_idx) && !is_user) {
133
- xn = pxn || (user_rw & PAGE_WRITE);
134
- }
135
- } else if (arm_feature(env, ARM_FEATURE_V7)) {
136
- switch (regime_el(env, mmu_idx)) {
137
- case 1:
138
- case 3:
139
- if (is_user) {
140
- xn = xn || !(user_rw & PAGE_READ);
141
- } else {
142
- int uwxn = 0;
143
- if (have_wxn) {
144
- uwxn = regime_sctlr(env, mmu_idx) & SCTLR_UWXN;
145
- }
146
- xn = xn || !(prot_rw & PAGE_READ) || pxn ||
147
- (uwxn && (user_rw & PAGE_WRITE));
148
- }
149
- break;
150
- case 2:
151
- break;
152
- }
153
- } else {
154
- xn = wxn = 0;
155
- }
156
-
157
- if (xn || (wxn && (prot_rw & PAGE_WRITE))) {
158
- return prot_rw;
159
- }
160
- return prot_rw | PAGE_EXEC;
161
-}
162
-
163
/*
164
* check_s2_mmu_setup
165
* @cpu: ARMCPU
166
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
167
index XXXXXXX..XXXXXXX 100644
73
index XXXXXXX..XXXXXXX 100644
168
--- a/target/arm/ptw.c
74
--- a/target/arm/vfp_helper.c
169
+++ b/target/arm/ptw.c
75
+++ b/target/arm/vfp_helper.c
170
@@ -XXX,XX +XXX,XX @@ do_fault:
76
@@ -XXX,XX +XXX,XX @@
171
return true;
77
#include "exec/helper-proto.h"
172
}
78
#include "internals.h"
79
#include "cpu-features.h"
80
+#include "fpu/softfloat.h"
81
#ifdef CONFIG_TCG
82
#include "qemu/log.h"
83
-#include "fpu/softfloat.h"
84
#endif
85
86
/* VFP support. We follow the convention used for VFP instructions:
87
Single precision routines have a "s" suffix, double precision a
88
"d" suffix. */
173
89
174
+/*
90
+/*
175
+ * Translate S2 section/page access permissions to protection flags
91
+ * Set the float_status behaviour to match the Arm defaults:
176
+ * @env: CPUARMState
92
+ * * tininess-before-rounding
177
+ * @s2ap: The 2-bit stage2 access permissions (S2AP)
93
+ * * 2-input NaN propagation prefers SNaN over QNaN, and then
178
+ * @xn: XN (execute-never) bits
94
+ * operand A over operand B (see FPProcessNaNs() pseudocode)
179
+ * @s1_is_el0: true if this is S2 of an S1+2 walk for EL0
95
+ * * 3-input NaN propagation prefers SNaN over QNaN, and then
96
+ * operand C over A over B (see FPProcessNaNs3() pseudocode,
97
+ * but note that for QEMU muladd is a * b + c, whereas for
98
+ * the pseudocode function the arguments are in the order c, a, b.
99
+ * * 0 * Inf + NaN returns the default NaN if the input NaN is quiet,
100
+ * and the input NaN if it is signalling
101
+ * * Default NaN has sign bit clear, msb frac bit set
180
+ */
102
+ */
181
+static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0)
103
+void arm_set_default_fp_behaviours(float_status *s)
182
+{
104
+{
183
+ int prot = 0;
105
+ set_float_detect_tininess(float_tininess_before_rounding, s);
184
+
106
+ set_float_ftz_detection(float_ftz_before_rounding, s);
185
+ if (s2ap & 1) {
107
+ set_float_2nan_prop_rule(float_2nan_prop_s_ab, s);
186
+ prot |= PAGE_READ;
108
+ set_float_3nan_prop_rule(float_3nan_prop_s_cab, s);
187
+ }
109
+ set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s);
188
+ if (s2ap & 2) {
110
+ set_float_default_nan_pattern(0b01000000, s);
189
+ prot |= PAGE_WRITE;
190
+ }
191
+
192
+ if (cpu_isar_feature(any_tts2uxn, env_archcpu(env))) {
193
+ switch (xn) {
194
+ case 0:
195
+ prot |= PAGE_EXEC;
196
+ break;
197
+ case 1:
198
+ if (s1_is_el0) {
199
+ prot |= PAGE_EXEC;
200
+ }
201
+ break;
202
+ case 2:
203
+ break;
204
+ case 3:
205
+ if (!s1_is_el0) {
206
+ prot |= PAGE_EXEC;
207
+ }
208
+ break;
209
+ default:
210
+ g_assert_not_reached();
211
+ }
212
+ } else {
213
+ if (!extract32(xn, 1, 1)) {
214
+ if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) {
215
+ prot |= PAGE_EXEC;
216
+ }
217
+ }
218
+ }
219
+ return prot;
220
+}
111
+}
221
+
112
+
222
+/*
113
+/*
223
+ * Translate section/page access permissions to protection flags
114
+ * Set the float_status behaviour to match the FEAT_AFP
224
+ * @env: CPUARMState
115
+ * FPCR.AH=1 requirements:
225
+ * @mmu_idx: MMU index indicating required translation regime
116
+ * * tininess-after-rounding
226
+ * @is_aa64: TRUE if AArch64
117
+ * * 2-input NaN propagation prefers the first NaN
227
+ * @ap: The 2-bit simple AP (AP[2:1])
118
+ * * 3-input NaN propagation prefers a over b over c
228
+ * @ns: NS (non-secure) bit
119
+ * * 0 * Inf + NaN always returns the input NaN and doesn't
229
+ * @xn: XN (execute-never) bit
120
+ * set Invalid for a QNaN
230
+ * @pxn: PXN (privileged execute-never) bit
121
+ * * default NaN has sign bit set, msb frac bit set
231
+ */
122
+ */
232
+static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64,
123
+static void arm_set_ah_fp_behaviours(float_status *s)
233
+ int ap, int ns, int xn, int pxn)
234
+{
124
+{
235
+ bool is_user = regime_is_user(env, mmu_idx);
125
+ set_float_detect_tininess(float_tininess_after_rounding, s);
236
+ int prot_rw, user_rw;
126
+ set_float_ftz_detection(float_ftz_after_rounding, s);
237
+ bool have_wxn;
127
+ set_float_2nan_prop_rule(float_2nan_prop_ab, s);
238
+ int wxn = 0;
128
+ set_float_3nan_prop_rule(float_3nan_prop_abc, s);
129
+ set_float_infzeronan_rule(float_infzeronan_dnan_never |
130
+ float_infzeronan_suppress_invalid, s);
131
+ set_float_default_nan_pattern(0b11000000, s);
132
+}
239
+
133
+
240
+ assert(mmu_idx != ARMMMUIdx_Stage2);
134
#ifdef CONFIG_TCG
241
+ assert(mmu_idx != ARMMMUIdx_Stage2_S);
135
136
/* Convert host exception flags to vfp form. */
137
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
138
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
139
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64);
140
}
141
+ if (changed & FPCR_AH) {
142
+ bool ah_enabled = val & FPCR_AH;
242
+
143
+
243
+ user_rw = simple_ap_to_rw_prot_is_user(ap, true);
144
+ if (ah_enabled) {
244
+ if (is_user) {
145
+ /* Change behaviours for A64 FP operations */
245
+ prot_rw = user_rw;
146
+ arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64);
246
+ } else {
147
+ arm_set_ah_fp_behaviours(&env->vfp.fp_status_f16_a64);
247
+ if (user_rw && regime_is_pan(env, mmu_idx)) {
248
+ /* PAN forbids data accesses but doesn't affect insn fetch */
249
+ prot_rw = 0;
250
+ } else {
148
+ } else {
251
+ prot_rw = simple_ap_to_rw_prot_is_user(ap, false);
149
+ arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
150
+ arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
252
+ }
151
+ }
253
+ }
152
+ }
254
+
153
/*
255
+ if (ns && arm_is_secure(env) && (env->cp15.scr_el3 & SCR_SIF)) {
154
* If any bits changed that we look at in vfp_get_fpsr_from_host(),
256
+ return prot_rw;
155
* we must sync the float_status flags into vfp.fpsr now (under the
257
+ }
258
+
259
+ /* TODO have_wxn should be replaced with
260
+ * ARM_FEATURE_V8 || (ARM_FEATURE_V7 && ARM_FEATURE_EL2)
261
+ * when ARM_FEATURE_EL2 starts getting set. For now we assume all LPAE
262
+ * compatible processors have EL2, which is required for [U]WXN.
263
+ */
264
+ have_wxn = arm_feature(env, ARM_FEATURE_LPAE);
265
+
266
+ if (have_wxn) {
267
+ wxn = regime_sctlr(env, mmu_idx) & SCTLR_WXN;
268
+ }
269
+
270
+ if (is_aa64) {
271
+ if (regime_has_2_ranges(mmu_idx) && !is_user) {
272
+ xn = pxn || (user_rw & PAGE_WRITE);
273
+ }
274
+ } else if (arm_feature(env, ARM_FEATURE_V7)) {
275
+ switch (regime_el(env, mmu_idx)) {
276
+ case 1:
277
+ case 3:
278
+ if (is_user) {
279
+ xn = xn || !(user_rw & PAGE_READ);
280
+ } else {
281
+ int uwxn = 0;
282
+ if (have_wxn) {
283
+ uwxn = regime_sctlr(env, mmu_idx) & SCTLR_UWXN;
284
+ }
285
+ xn = xn || !(prot_rw & PAGE_READ) || pxn ||
286
+ (uwxn && (user_rw & PAGE_WRITE));
287
+ }
288
+ break;
289
+ case 2:
290
+ break;
291
+ }
292
+ } else {
293
+ xn = wxn = 0;
294
+ }
295
+
296
+ if (xn || (wxn && (prot_rw & PAGE_WRITE))) {
297
+ return prot_rw;
298
+ }
299
+ return prot_rw | PAGE_EXEC;
300
+}
301
+
302
/**
303
* get_phys_addr_lpae: perform one stage of page table walk, LPAE format
304
*
305
--
156
--
306
2.25.1
157
2.34.1
diff view generated by jsdifflib
New patch
1
When FPCR.AH = 1, some of the cumulative exception flags in the FPSR
2
behave slightly differently for A64 operations:
3
* IDC is set when a denormal input is used without flushing
4
* IXC (Inexact) is set when an output denormal is flushed to zero
1
5
6
Update vfp_get_fpsr_from_host() to do this.
7
8
Note that because half-precision operations never set IDC, we now
9
need to add float_flag_input_denormal_used to the set we mask out of
10
fp_status_f16_a64.
11
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
---
15
target/arm/vfp_helper.c | 17 ++++++++++++++---
16
1 file changed, 14 insertions(+), 3 deletions(-)
17
18
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/vfp_helper.c
21
+++ b/target/arm/vfp_helper.c
22
@@ -XXX,XX +XXX,XX @@ static void arm_set_ah_fp_behaviours(float_status *s)
23
#ifdef CONFIG_TCG
24
25
/* Convert host exception flags to vfp form. */
26
-static inline uint32_t vfp_exceptbits_from_host(int host_bits)
27
+static inline uint32_t vfp_exceptbits_from_host(int host_bits, bool ah)
28
{
29
uint32_t target_bits = 0;
30
31
@@ -XXX,XX +XXX,XX @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits)
32
if (host_bits & float_flag_input_denormal_flushed) {
33
target_bits |= FPSR_IDC;
34
}
35
+ /*
36
+ * With FPCR.AH, IDC is set when an input denormal is used,
37
+ * and flushing an output denormal to zero sets both IXC and UFC.
38
+ */
39
+ if (ah && (host_bits & float_flag_input_denormal_used)) {
40
+ target_bits |= FPSR_IDC;
41
+ }
42
+ if (ah && (host_bits & float_flag_output_denormal_flushed)) {
43
+ target_bits |= FPSR_IXC;
44
+ }
45
return target_bits;
46
}
47
48
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
49
50
a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64);
51
a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
52
- & ~float_flag_input_denormal_flushed);
53
+ & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
54
/*
55
* Flushing an input denormal *only* because FPCR.FIZ == 1 does
56
* not set FPSR.IDC; if FPCR.FZ is also set then this takes
57
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
58
if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) {
59
a64_flags &= ~float_flag_input_denormal_flushed;
60
}
61
- return vfp_exceptbits_from_host(a32_flags | a64_flags);
62
+ return vfp_exceptbits_from_host(a64_flags, env->vfp.fpcr & FPCR_AH) |
63
+ vfp_exceptbits_from_host(a32_flags, false);
64
}
65
66
static void vfp_clear_float_status_exc_flags(CPUARMState *env)
67
--
68
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
We are going to need to generate different code in some cases when
2
FPCR.AH is 1. For example:
3
* Floating point neg and abs must not flip the sign bit of NaNs
4
* some insns (FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS, and various
5
BFCVT and BFM bfloat16 ops) need to use a different float_status
6
to the usual one
2
7
3
The bitmap need only hold 15 bits; bitmap is over-complicated.
8
Encode FPCR.AH into the A64 tbflags, so we can refer to it at
4
We can simplify operations quite a bit with plain logical ops.
9
translate time.
5
10
6
The introduction of SVE_VQ_POW2_MAP eliminates the need for
11
Because we now have a bit in FPCR that affects codegen, we can't mark
7
looping in order to search for powers of two. Simply perform
12
the AArch64 FPCR register as being SUPPRESS_TB_END any more; writes
8
the logical ops and use count leading or trailing zeros as
13
to it will now end the TB and trigger a regeneration of hflags.
9
required to find the result.
10
14
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
Message-id: 20220607203306.657998-12-richard.henderson@linaro.org
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
---
17
---
16
target/arm/cpu.h | 6 +--
18
target/arm/cpu.h | 1 +
17
target/arm/internals.h | 5 ++
19
target/arm/tcg/translate.h | 2 ++
18
target/arm/kvm_arm.h | 7 ++-
20
target/arm/helper.c | 2 +-
19
target/arm/cpu64.c | 117 ++++++++++++++++++++---------------------
21
target/arm/tcg/hflags.c | 4 ++++
20
target/arm/helper.c | 9 +---
22
target/arm/tcg/translate-a64.c | 1 +
21
target/arm/kvm64.c | 36 +++----------
23
5 files changed, 9 insertions(+), 1 deletion(-)
22
6 files changed, 75 insertions(+), 105 deletions(-)
23
24
24
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
25
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
25
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/cpu.h
27
--- a/target/arm/cpu.h
27
+++ b/target/arm/cpu.h
28
+++ b/target/arm/cpu.h
28
@@ -XXX,XX +XXX,XX @@ struct ArchCPU {
29
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, NV2, 34, 1)
29
* Bits set in sve_vq_supported represent valid vector lengths for
30
FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1)
30
* the CPU type.
31
/* Set if FEAT_NV2 RAM accesses are big-endian */
31
*/
32
FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1)
32
- DECLARE_BITMAP(sve_vq_map, ARM_MAX_VQ);
33
+FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */
33
- DECLARE_BITMAP(sve_vq_init, ARM_MAX_VQ);
34
34
- DECLARE_BITMAP(sve_vq_supported, ARM_MAX_VQ);
35
/*
35
+ uint32_t sve_vq_map;
36
* Helpers for using the above. Note that only the A64 accessors use
36
+ uint32_t sve_vq_init;
37
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
37
+ uint32_t sve_vq_supported;
38
39
/* Generic timer counter frequency, in Hz */
40
uint64_t gt_cntfrq_hz;
41
diff --git a/target/arm/internals.h b/target/arm/internals.h
42
index XXXXXXX..XXXXXXX 100644
38
index XXXXXXX..XXXXXXX 100644
43
--- a/target/arm/internals.h
39
--- a/target/arm/tcg/translate.h
44
+++ b/target/arm/internals.h
40
+++ b/target/arm/tcg/translate.h
45
@@ -XXX,XX +XXX,XX @@ bool el_is_in_host(CPUARMState *env, int el);
41
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
46
42
bool nv2_mem_e20;
47
void aa32_max_features(ARMCPU *cpu);
43
/* True if NV2 enabled and NV2 RAM accesses are big-endian */
48
44
bool nv2_mem_be;
49
+/* Powers of 2 for sve_vq_map et al. */
45
+ /* True if FPCR.AH is 1 (alternate floating point handling) */
50
+#define SVE_VQ_POW2_MAP \
46
+ bool fpcr_ah;
51
+ ((1 << (1 - 1)) | (1 << (2 - 1)) | \
52
+ (1 << (4 - 1)) | (1 << (8 - 1)) | (1 << (16 - 1)))
53
+
54
#endif
55
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/arm/kvm_arm.h
58
+++ b/target/arm/kvm_arm.h
59
@@ -XXX,XX +XXX,XX @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf);
60
/**
61
* kvm_arm_sve_get_vls:
62
* @cs: CPUState
63
- * @map: bitmap to fill in
64
*
65
* Get all the SVE vector lengths supported by the KVM host, setting
66
* the bits corresponding to their length in quadwords minus one
67
- * (vq - 1) in @map up to ARM_MAX_VQ.
68
+ * (vq - 1) up to ARM_MAX_VQ. Return the resulting map.
69
*/
70
-void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map);
71
+uint32_t kvm_arm_sve_get_vls(CPUState *cs);
72
73
/**
74
* kvm_arm_set_cpu_features_from_host:
75
@@ -XXX,XX +XXX,XX @@ static inline void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp)
76
g_assert_not_reached();
77
}
78
79
-static inline void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map)
80
+static inline uint32_t kvm_arm_sve_get_vls(CPUState *cs)
81
{
82
g_assert_not_reached();
83
}
84
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
85
index XXXXXXX..XXXXXXX 100644
86
--- a/target/arm/cpu64.c
87
+++ b/target/arm/cpu64.c
88
@@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
89
* any of the above. Finally, if SVE is not disabled, then at least one
90
* vector length must be enabled.
91
*/
92
- DECLARE_BITMAP(tmp, ARM_MAX_VQ);
93
- uint32_t vq, max_vq = 0;
94
+ uint32_t vq_map = cpu->sve_vq_map;
95
+ uint32_t vq_init = cpu->sve_vq_init;
96
+ uint32_t vq_supported;
97
+ uint32_t vq_mask = 0;
98
+ uint32_t tmp, vq, max_vq = 0;
99
100
/*
47
/*
101
* CPU models specify a set of supported vector lengths which are
48
* >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI.
102
@@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
49
* < 0, set by the current instruction.
103
* in the supported bitmap results in an error. When KVM is enabled we
104
* fetch the supported bitmap from the host.
105
*/
106
- if (kvm_enabled() && kvm_arm_sve_supported()) {
107
- kvm_arm_sve_get_vls(CPU(cpu), cpu->sve_vq_supported);
108
- } else if (kvm_enabled()) {
109
- assert(!cpu_isar_feature(aa64_sve, cpu));
110
+ if (kvm_enabled()) {
111
+ if (kvm_arm_sve_supported()) {
112
+ cpu->sve_vq_supported = kvm_arm_sve_get_vls(CPU(cpu));
113
+ vq_supported = cpu->sve_vq_supported;
114
+ } else {
115
+ assert(!cpu_isar_feature(aa64_sve, cpu));
116
+ vq_supported = 0;
117
+ }
118
+ } else {
119
+ vq_supported = cpu->sve_vq_supported;
120
}
121
122
/*
123
@@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
124
* From the properties, sve_vq_map<N> implies sve_vq_init<N>.
125
* Check first for any sve<N> enabled.
126
*/
127
- if (!bitmap_empty(cpu->sve_vq_map, ARM_MAX_VQ)) {
128
- max_vq = find_last_bit(cpu->sve_vq_map, ARM_MAX_VQ) + 1;
129
+ if (vq_map != 0) {
130
+ max_vq = 32 - clz32(vq_map);
131
+ vq_mask = MAKE_64BIT_MASK(0, max_vq);
132
133
if (cpu->sve_max_vq && max_vq > cpu->sve_max_vq) {
134
error_setg(errp, "cannot enable sve%d", max_vq * 128);
135
@@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
136
* For KVM we have to automatically enable all supported unitialized
137
* lengths, even when the smaller lengths are not all powers-of-two.
138
*/
139
- bitmap_andnot(tmp, cpu->sve_vq_supported, cpu->sve_vq_init, max_vq);
140
- bitmap_or(cpu->sve_vq_map, cpu->sve_vq_map, tmp, max_vq);
141
+ vq_map |= vq_supported & ~vq_init & vq_mask;
142
} else {
143
/* Propagate enabled bits down through required powers-of-two. */
144
- for (vq = pow2floor(max_vq); vq >= 1; vq >>= 1) {
145
- if (!test_bit(vq - 1, cpu->sve_vq_init)) {
146
- set_bit(vq - 1, cpu->sve_vq_map);
147
- }
148
- }
149
+ vq_map |= SVE_VQ_POW2_MAP & ~vq_init & vq_mask;
150
}
151
} else if (cpu->sve_max_vq == 0) {
152
/*
153
@@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
154
155
if (kvm_enabled()) {
156
/* Disabling a supported length disables all larger lengths. */
157
- for (vq = 1; vq <= ARM_MAX_VQ; ++vq) {
158
- if (test_bit(vq - 1, cpu->sve_vq_init) &&
159
- test_bit(vq - 1, cpu->sve_vq_supported)) {
160
- break;
161
- }
162
- }
163
+ tmp = vq_init & vq_supported;
164
} else {
165
/* Disabling a power-of-two disables all larger lengths. */
166
- for (vq = 1; vq <= ARM_MAX_VQ; vq <<= 1) {
167
- if (test_bit(vq - 1, cpu->sve_vq_init)) {
168
- break;
169
- }
170
- }
171
+ tmp = vq_init & SVE_VQ_POW2_MAP;
172
}
173
+ vq = ctz32(tmp) + 1;
174
175
max_vq = vq <= ARM_MAX_VQ ? vq - 1 : ARM_MAX_VQ;
176
- bitmap_andnot(cpu->sve_vq_map, cpu->sve_vq_supported,
177
- cpu->sve_vq_init, max_vq);
178
- if (max_vq == 0 || bitmap_empty(cpu->sve_vq_map, max_vq)) {
179
+ vq_mask = MAKE_64BIT_MASK(0, max_vq);
180
+ vq_map = vq_supported & ~vq_init & vq_mask;
181
+
182
+ if (max_vq == 0 || vq_map == 0) {
183
error_setg(errp, "cannot disable sve%d", vq * 128);
184
error_append_hint(errp, "Disabling sve%d results in all "
185
"vector lengths being disabled.\n",
186
@@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
187
return;
188
}
189
190
- max_vq = find_last_bit(cpu->sve_vq_map, max_vq) + 1;
191
+ max_vq = 32 - clz32(vq_map);
192
+ vq_mask = MAKE_64BIT_MASK(0, max_vq);
193
}
194
195
/*
196
@@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
197
*/
198
if (cpu->sve_max_vq != 0) {
199
max_vq = cpu->sve_max_vq;
200
+ vq_mask = MAKE_64BIT_MASK(0, max_vq);
201
202
- if (!test_bit(max_vq - 1, cpu->sve_vq_map) &&
203
- test_bit(max_vq - 1, cpu->sve_vq_init)) {
204
+ if (vq_init & ~vq_map & (1 << (max_vq - 1))) {
205
error_setg(errp, "cannot disable sve%d", max_vq * 128);
206
error_append_hint(errp, "The maximum vector length must be "
207
"enabled, sve-max-vq=%d (%d bits)\n",
208
@@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
209
}
210
211
/* Set all bits not explicitly set within sve-max-vq. */
212
- bitmap_complement(tmp, cpu->sve_vq_init, max_vq);
213
- bitmap_or(cpu->sve_vq_map, cpu->sve_vq_map, tmp, max_vq);
214
+ vq_map |= ~vq_init & vq_mask;
215
}
216
217
/*
218
@@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
219
* are clear, just in case anybody looks.
220
*/
221
assert(max_vq != 0);
222
- bitmap_clear(cpu->sve_vq_map, max_vq, ARM_MAX_VQ - max_vq);
223
+ assert(vq_mask != 0);
224
+ vq_map &= vq_mask;
225
226
/* Ensure the set of lengths matches what is supported. */
227
- bitmap_xor(tmp, cpu->sve_vq_map, cpu->sve_vq_supported, max_vq);
228
- if (!bitmap_empty(tmp, max_vq)) {
229
- vq = find_last_bit(tmp, max_vq) + 1;
230
- if (test_bit(vq - 1, cpu->sve_vq_map)) {
231
+ tmp = vq_map ^ (vq_supported & vq_mask);
232
+ if (tmp) {
233
+ vq = 32 - clz32(tmp);
234
+ if (vq_map & (1 << (vq - 1))) {
235
if (cpu->sve_max_vq) {
236
error_setg(errp, "cannot set sve-max-vq=%d", cpu->sve_max_vq);
237
error_append_hint(errp, "This CPU does not support "
238
@@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
239
return;
240
} else {
241
/* Ensure all required powers-of-two are enabled. */
242
- for (vq = pow2floor(max_vq); vq >= 1; vq >>= 1) {
243
- if (!test_bit(vq - 1, cpu->sve_vq_map)) {
244
- error_setg(errp, "cannot disable sve%d", vq * 128);
245
- error_append_hint(errp, "sve%d is required as it "
246
- "is a power-of-two length smaller "
247
- "than the maximum, sve%d\n",
248
- vq * 128, max_vq * 128);
249
- return;
250
- }
251
+ tmp = SVE_VQ_POW2_MAP & vq_mask & ~vq_map;
252
+ if (tmp) {
253
+ vq = 32 - clz32(tmp);
254
+ error_setg(errp, "cannot disable sve%d", vq * 128);
255
+ error_append_hint(errp, "sve%d is required as it "
256
+ "is a power-of-two length smaller "
257
+ "than the maximum, sve%d\n",
258
+ vq * 128, max_vq * 128);
259
+ return;
260
}
261
}
262
}
263
@@ -XXX,XX +XXX,XX @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
264
265
/* From now on sve_max_vq is the actual maximum supported length. */
266
cpu->sve_max_vq = max_vq;
267
+ cpu->sve_vq_map = vq_map;
268
}
269
270
static void cpu_max_get_sve_max_vq(Object *obj, Visitor *v, const char *name,
271
@@ -XXX,XX +XXX,XX @@ static void cpu_arm_get_sve_vq(Object *obj, Visitor *v, const char *name,
272
if (!cpu_isar_feature(aa64_sve, cpu)) {
273
value = false;
274
} else {
275
- value = test_bit(vq - 1, cpu->sve_vq_map);
276
+ value = extract32(cpu->sve_vq_map, vq - 1, 1);
277
}
278
visit_type_bool(v, name, &value, errp);
279
}
280
@@ -XXX,XX +XXX,XX @@ static void cpu_arm_set_sve_vq(Object *obj, Visitor *v, const char *name,
281
return;
282
}
283
284
- if (value) {
285
- set_bit(vq - 1, cpu->sve_vq_map);
286
- } else {
287
- clear_bit(vq - 1, cpu->sve_vq_map);
288
- }
289
- set_bit(vq - 1, cpu->sve_vq_init);
290
+ cpu->sve_vq_map = deposit32(cpu->sve_vq_map, vq - 1, 1, value);
291
+ cpu->sve_vq_init |= 1 << (vq - 1);
292
}
293
294
static bool cpu_arm_get_sve(Object *obj, Error **errp)
295
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
296
cpu->dcz_blocksize = 7; /* 512 bytes */
297
#endif
298
299
- bitmap_fill(cpu->sve_vq_supported, ARM_MAX_VQ);
300
+ cpu->sve_vq_supported = MAKE_64BIT_MASK(0, ARM_MAX_VQ);
301
302
aarch64_add_pauth_properties(obj);
303
aarch64_add_sve_properties(obj);
304
@@ -XXX,XX +XXX,XX @@ static void aarch64_a64fx_initfn(Object *obj)
305
cpu->gic_vprebits = 5;
306
cpu->gic_pribits = 5;
307
308
- /* Suppport of A64FX's vector length are 128,256 and 512bit only */
309
+ /* The A64FX supports only 128, 256 and 512 bit vector lengths */
310
aarch64_add_sve_properties(obj);
311
- bitmap_zero(cpu->sve_vq_supported, ARM_MAX_VQ);
312
- set_bit(0, cpu->sve_vq_supported); /* 128bit */
313
- set_bit(1, cpu->sve_vq_supported); /* 256bit */
314
- set_bit(3, cpu->sve_vq_supported); /* 512bit */
315
+ cpu->sve_vq_supported = (1 << 0) /* 128bit */
316
+ | (1 << 1) /* 256bit */
317
+ | (1 << 3); /* 512bit */
318
319
cpu->isar.reset_pmcr_el0 = 0x46014040;
320
321
diff --git a/target/arm/helper.c b/target/arm/helper.c
50
diff --git a/target/arm/helper.c b/target/arm/helper.c
322
index XXXXXXX..XXXXXXX 100644
51
index XXXXXXX..XXXXXXX 100644
323
--- a/target/arm/helper.c
52
--- a/target/arm/helper.c
324
+++ b/target/arm/helper.c
53
+++ b/target/arm/helper.c
325
@@ -XXX,XX +XXX,XX @@ uint32_t sve_zcr_len_for_el(CPUARMState *env, int el)
54
@@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
326
{
55
.writefn = aa64_daif_write, .resetfn = arm_cp_reset_ignore },
327
ARMCPU *cpu = env_archcpu(env);
56
{ .name = "FPCR", .state = ARM_CP_STATE_AA64,
328
uint32_t len = cpu->sve_max_vq - 1;
57
.opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 4,
329
- uint32_t end_len;
58
- .access = PL0_RW, .type = ARM_CP_FPU | ARM_CP_SUPPRESS_TB_END,
330
59
+ .access = PL0_RW, .type = ARM_CP_FPU,
331
if (el <= 1 && !el_is_in_host(env, el)) {
60
.readfn = aa64_fpcr_read, .writefn = aa64_fpcr_write },
332
len = MIN(len, 0xf & (uint32_t)env->vfp.zcr_el[1]);
61
{ .name = "FPSR", .state = ARM_CP_STATE_AA64,
333
@@ -XXX,XX +XXX,XX @@ uint32_t sve_zcr_len_for_el(CPUARMState *env, int el)
62
.opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4,
334
len = MIN(len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
63
diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
64
index XXXXXXX..XXXXXXX 100644
65
--- a/target/arm/tcg/hflags.c
66
+++ b/target/arm/tcg/hflags.c
67
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
68
DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx));
335
}
69
}
336
70
337
- end_len = len;
71
+ if (env->vfp.fpcr & FPCR_AH) {
338
- if (!test_bit(len, cpu->sve_vq_map)) {
72
+ DP_TBFLAG_A64(flags, AH, 1);
339
- end_len = find_last_bit(cpu->sve_vq_map, len);
73
+ }
340
- assert(end_len < len);
74
+
341
- }
75
return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
342
- return end_len;
343
+ len = 31 - clz32(cpu->sve_vq_map & MAKE_64BIT_MASK(0, len + 1));
344
+ return len;
345
}
76
}
346
77
347
static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
78
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
348
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
349
index XXXXXXX..XXXXXXX 100644
79
index XXXXXXX..XXXXXXX 100644
350
--- a/target/arm/kvm64.c
80
--- a/target/arm/tcg/translate-a64.c
351
+++ b/target/arm/kvm64.c
81
+++ b/target/arm/tcg/translate-a64.c
352
@@ -XXX,XX +XXX,XX @@ bool kvm_arm_steal_time_supported(void)
82
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
353
83
dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
354
QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1);
84
dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
355
85
dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
356
-void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map)
86
+ dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH);
357
+uint32_t kvm_arm_sve_get_vls(CPUState *cs)
87
dc->vec_len = 0;
358
{
88
dc->vec_stride = 0;
359
/* Only call this function if kvm_arm_sve_supported() returns true. */
89
dc->cp_regs = arm_cpu->cp_regs;
360
static uint64_t vls[KVM_ARM64_SVE_VLS_WORDS];
361
static bool probed;
362
uint32_t vq = 0;
363
- int i, j;
364
-
365
- bitmap_zero(map, ARM_MAX_VQ);
366
+ int i;
367
368
/*
369
* KVM ensures all host CPUs support the same set of vector lengths.
370
@@ -XXX,XX +XXX,XX @@ void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map)
371
if (vq > ARM_MAX_VQ) {
372
warn_report("KVM supports vector lengths larger than "
373
"QEMU can enable");
374
+ vls[0] &= MAKE_64BIT_MASK(0, ARM_MAX_VQ);
375
}
376
}
377
378
- for (i = 0; i < KVM_ARM64_SVE_VLS_WORDS; ++i) {
379
- if (!vls[i]) {
380
- continue;
381
- }
382
- for (j = 1; j <= 64; ++j) {
383
- vq = j + i * 64;
384
- if (vq > ARM_MAX_VQ) {
385
- return;
386
- }
387
- if (vls[i] & (1UL << (j - 1))) {
388
- set_bit(vq - 1, map);
389
- }
390
- }
391
- }
392
+ return vls[0];
393
}
394
395
static int kvm_arm_sve_set_vls(CPUState *cs)
396
{
397
- uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = {0};
398
+ ARMCPU *cpu = ARM_CPU(cs);
399
+ uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = { cpu->sve_vq_map };
400
struct kvm_one_reg reg = {
401
.id = KVM_REG_ARM64_SVE_VLS,
402
.addr = (uint64_t)&vls[0],
403
};
404
- ARMCPU *cpu = ARM_CPU(cs);
405
- uint32_t vq;
406
- int i, j;
407
408
assert(cpu->sve_max_vq <= KVM_ARM64_SVE_VQ_MAX);
409
410
- for (vq = 1; vq <= cpu->sve_max_vq; ++vq) {
411
- if (test_bit(vq - 1, cpu->sve_vq_map)) {
412
- i = (vq - 1) / 64;
413
- j = (vq - 1) % 64;
414
- vls[i] |= 1UL << j;
415
- }
416
- }
417
-
418
return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
419
}
420
421
--
90
--
422
2.25.1
91
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
When FPCR.AH is 1, the behaviour of some instructions changes:
2
2
* AdvSIMD BFCVT, BFCVTN, BFCVTN2, BFMLALB, BFMLALT
3
This function is used only once, and will need modification
3
* SVE BFCVT, BFCVTNT, BFMLALB, BFMLALT, BFMLSLB, BFMLSLT
4
for Streaming SVE mode.
4
* SME BFCVT, BFCVTN, BFMLAL, BFMLSL (these are all in SME2 which
5
5
QEMU does not yet implement)
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
* FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
8
Message-id: 20220607203306.657998-11-richard.henderson@linaro.org
8
The behaviour change is:
9
* the instructions do not update the FPSR cumulative exception flags
10
* trapped floating point exceptions are disabled (a no-op for QEMU,
11
which doesn't implement FPCR.{IDE,IXE,UFE,OFE,DZE,IOE})
12
* rounding is always round-to-nearest-even regardless of FPCR.RMode
13
* denormalized inputs and outputs are always flushed to zero, as if
14
FPCR.{FZ,FIZ} is {1,1}
15
* FPCR.FZ16 is still honoured for half-precision inputs
16
17
(See the Arm ARM DDI0487L.a section A1.5.9.)
18
19
We can provide all these behaviours with another pair of float_status fields
20
which we use only for these insns, when FPCR.AH is 1. These float_status
21
fields will always have:
22
* flush_to_zero and flush_inputs_to_zero set for the non-F16 field
23
* rounding mode set to round-to-nearest-even
24
and so the only FPCR fields they need to honour are DN and FZ16.
25
26
In this commit we only define the new fp_status fields and give them
27
the required behaviour when FPSR is updated. In subsequent commits
28
we will arrange to use this new fp_status field for the instructions
29
that should be affected by FPCR.AH in this way.
30
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
31
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
32
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
---
33
---
11
target/arm/internals.h | 11 -----------
34
target/arm/cpu.h | 15 +++++++++++++++
12
target/arm/helper.c | 30 +++++++++++-------------------
35
target/arm/internals.h | 2 ++
13
2 files changed, 11 insertions(+), 30 deletions(-)
36
target/arm/tcg/translate.h | 14 ++++++++++++++
14
37
target/arm/cpu.c | 4 ++++
38
target/arm/vfp_helper.c | 13 ++++++++++++-
39
5 files changed, 47 insertions(+), 1 deletion(-)
40
41
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
42
index XXXXXXX..XXXXXXX 100644
43
--- a/target/arm/cpu.h
44
+++ b/target/arm/cpu.h
45
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
46
* standard_fp_status : the ARM "Standard FPSCR Value"
47
* standard_fp_status_fp16 : used for half-precision
48
* calculations with the ARM "Standard FPSCR Value"
49
+ * ah_fp_status: used for the A64 insns which change behaviour
50
+ * when FPCR.AH == 1 (bfloat16 conversions and multiplies,
51
+ * and the reciprocal and square root estimate/step insns)
52
+ * ah_fp_status_f16: used for the A64 insns which change behaviour
53
+ * when FPCR.AH == 1 (bfloat16 conversions and multiplies,
54
+ * and the reciprocal and square root estimate/step insns);
55
+ * for half-precision
56
*
57
* Half-precision operations are governed by a separate
58
* flush-to-zero control bit in FPSCR:FZ16. We pass a separate
59
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
60
* the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than
61
* using a fixed value for it.
62
*
63
+ * The ah_fp_status is needed because some insns have different
64
+ * behaviour when FPCR.AH == 1: they don't update cumulative
65
+ * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and
66
+ * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16,
67
+ * which means we need an ah_fp_status_f16 as well.
68
+ *
69
* To avoid having to transfer exception bits around, we simply
70
* say that the FPSCR cumulative exception flags are the logical
71
* OR of the flags in the four fp statuses. This relies on the
72
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
73
float_status fp_status_f16_a64;
74
float_status standard_fp_status;
75
float_status standard_fp_status_f16;
76
+ float_status ah_fp_status;
77
+ float_status ah_fp_status_f16;
78
79
uint64_t zcr_el[4]; /* ZCR_EL[1-3] */
80
uint64_t smcr_el[4]; /* SMCR_EL[1-3] */
15
diff --git a/target/arm/internals.h b/target/arm/internals.h
81
diff --git a/target/arm/internals.h b/target/arm/internals.h
16
index XXXXXXX..XXXXXXX 100644
82
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/internals.h
83
--- a/target/arm/internals.h
18
+++ b/target/arm/internals.h
84
+++ b/target/arm/internals.h
19
@@ -XXX,XX +XXX,XX @@ void arm_translate_init(void);
85
@@ -XXX,XX +XXX,XX @@ int alle1_tlbmask(CPUARMState *env);
20
void arm_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
86
21
#endif /* CONFIG_TCG */
87
/* Set the float_status behaviour to match the Arm defaults */
22
88
void arm_set_default_fp_behaviours(float_status *s);
23
-/**
89
+/* Set the float_status behaviour to match Arm FPCR.AH=1 behaviour */
24
- * aarch64_sve_zcr_get_valid_len:
90
+void arm_set_ah_fp_behaviours(float_status *s);
25
- * @cpu: cpu context
91
26
- * @start_len: maximum len to consider
92
#endif
27
- *
93
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
28
- * Return the maximum supported sve vector length <= @start_len.
94
index XXXXXXX..XXXXXXX 100644
29
- * Note that both @start_len and the return value are in units
95
--- a/target/arm/tcg/translate.h
30
- * of ZCR_ELx.LEN, so the vector bit length is (x + 1) * 128.
96
+++ b/target/arm/tcg/translate.h
31
- */
97
@@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour {
32
-uint32_t aarch64_sve_zcr_get_valid_len(ARMCPU *cpu, uint32_t start_len);
98
FPST_A64,
33
-
99
FPST_A32_F16,
34
enum arm_fprounding {
100
FPST_A64_F16,
35
FPROUNDING_TIEEVEN,
101
+ FPST_AH,
36
FPROUNDING_POSINF,
102
+ FPST_AH_F16,
37
diff --git a/target/arm/helper.c b/target/arm/helper.c
103
FPST_STD,
38
index XXXXXXX..XXXXXXX 100644
104
FPST_STD_F16,
39
--- a/target/arm/helper.c
105
} ARMFPStatusFlavour;
40
+++ b/target/arm/helper.c
106
@@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour {
41
@@ -XXX,XX +XXX,XX @@ int sve_exception_el(CPUARMState *env, int el)
107
* for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used
42
return 0;
108
* FPST_A64_F16
109
* for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used
110
+ * FPST_AH:
111
+ * for AArch64 operations which change behaviour when AH=1 (specifically,
112
+ * bfloat16 conversions and multiplies, and the reciprocal and square root
113
+ * estimate/step insns)
114
+ * FPST_AH_F16:
115
+ * ditto, but for half-precision operations
116
* FPST_STD
117
* for A32/T32 Neon operations using the "standard FPSCR value"
118
* FPST_STD_F16
119
@@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
120
case FPST_A64_F16:
121
offset = offsetof(CPUARMState, vfp.fp_status_f16_a64);
122
break;
123
+ case FPST_AH:
124
+ offset = offsetof(CPUARMState, vfp.ah_fp_status);
125
+ break;
126
+ case FPST_AH_F16:
127
+ offset = offsetof(CPUARMState, vfp.ah_fp_status_f16);
128
+ break;
129
case FPST_STD:
130
offset = offsetof(CPUARMState, vfp.standard_fp_status);
131
break;
132
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
133
index XXXXXXX..XXXXXXX 100644
134
--- a/target/arm/cpu.c
135
+++ b/target/arm/cpu.c
136
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
137
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32);
138
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
139
arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16);
140
+ arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status);
141
+ set_flush_to_zero(1, &env->vfp.ah_fp_status);
142
+ set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status);
143
+ arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status_f16);
144
145
#ifndef CONFIG_USER_ONLY
146
if (kvm_enabled()) {
147
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
148
index XXXXXXX..XXXXXXX 100644
149
--- a/target/arm/vfp_helper.c
150
+++ b/target/arm/vfp_helper.c
151
@@ -XXX,XX +XXX,XX @@ void arm_set_default_fp_behaviours(float_status *s)
152
* set Invalid for a QNaN
153
* * default NaN has sign bit set, msb frac bit set
154
*/
155
-static void arm_set_ah_fp_behaviours(float_status *s)
156
+void arm_set_ah_fp_behaviours(float_status *s)
157
{
158
set_float_detect_tininess(float_tininess_after_rounding, s);
159
set_float_ftz_detection(float_ftz_after_rounding, s);
160
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
161
a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64);
162
a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
163
& ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
164
+ /*
165
+ * We do not merge in flags from ah_fp_status or ah_fp_status_f16, because
166
+ * they are used for insns that must not set the cumulative exception bits.
167
+ */
168
+
169
/*
170
* Flushing an input denormal *only* because FPCR.FIZ == 1 does
171
* not set FPSR.IDC; if FPCR.FZ is also set then this takes
172
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
173
set_float_exception_flags(0, &env->vfp.fp_status_f16_a64);
174
set_float_exception_flags(0, &env->vfp.standard_fp_status);
175
set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
176
+ set_float_exception_flags(0, &env->vfp.ah_fp_status);
177
+ set_float_exception_flags(0, &env->vfp.ah_fp_status_f16);
43
}
178
}
44
179
45
-uint32_t aarch64_sve_zcr_get_valid_len(ARMCPU *cpu, uint32_t start_len)
180
static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env)
46
-{
181
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
47
- uint32_t end_len;
182
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
48
-
183
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
49
- start_len = MIN(start_len, ARM_MAX_VQ - 1);
184
set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
50
- end_len = start_len;
185
+ set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16);
51
-
186
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
52
- if (!test_bit(start_len, cpu->sve_vq_map)) {
187
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
53
- end_len = find_last_bit(cpu->sve_vq_map, start_len);
188
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
54
- assert(end_len < start_len);
189
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16);
55
- }
56
- return end_len;
57
-}
58
-
59
/*
60
* Given that SVE is enabled, return the vector length for EL.
61
*/
62
uint32_t sve_zcr_len_for_el(CPUARMState *env, int el)
63
{
64
ARMCPU *cpu = env_archcpu(env);
65
- uint32_t zcr_len = cpu->sve_max_vq - 1;
66
+ uint32_t len = cpu->sve_max_vq - 1;
67
+ uint32_t end_len;
68
69
if (el <= 1 && !el_is_in_host(env, el)) {
70
- zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]);
71
+ len = MIN(len, 0xf & (uint32_t)env->vfp.zcr_el[1]);
72
}
190
}
73
if (el <= 2 && arm_feature(env, ARM_FEATURE_EL2)) {
191
if (changed & FPCR_FZ) {
74
- zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]);
192
bool ftz_enabled = val & FPCR_FZ;
75
+ len = MIN(len, 0xf & (uint32_t)env->vfp.zcr_el[2]);
193
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
194
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64);
195
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
196
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64);
197
+ set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status);
198
+ set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status_f16);
76
}
199
}
77
if (arm_feature(env, ARM_FEATURE_EL3)) {
200
if (changed & FPCR_AH) {
78
- zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
201
bool ah_enabled = val & FPCR_AH;
79
+ len = MIN(len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
80
}
81
82
- return aarch64_sve_zcr_get_valid_len(cpu, zcr_len);
83
+ end_len = len;
84
+ if (!test_bit(len, cpu->sve_vq_map)) {
85
+ end_len = find_last_bit(cpu->sve_vq_map, len);
86
+ assert(end_len < len);
87
+ }
88
+ return end_len;
89
}
90
91
static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
92
--
202
--
93
2.25.1
203
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
For the instructions FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS, use
2
FPST_FPCR_AH or FPST_FPCR_AH_F16 when FPCR.AH is 1, so that they get
3
the required behaviour changes.
2
4
3
Instead of checking these bits in fp_exception_el and
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
also in sve_exception_el, document that we must compare
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
the results. The only place where we have not already
7
---
6
checked that FP EL is zero is in rebuild_hflags_a64.
8
target/arm/tcg/translate-a64.h | 13 ++++
9
target/arm/tcg/translate-a64.c | 119 +++++++++++++++++++++++++--------
10
target/arm/tcg/translate-sve.c | 30 ++++++---
11
3 files changed, 127 insertions(+), 35 deletions(-)
7
12
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
13
diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-id: 20220607203306.657998-5-richard.henderson@linaro.org
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
13
target/arm/helper.c | 58 +++++++++++++++------------------------------
14
1 file changed, 19 insertions(+), 39 deletions(-)
15
16
diff --git a/target/arm/helper.c b/target/arm/helper.c
17
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/helper.c
15
--- a/target/arm/tcg/translate-a64.h
19
+++ b/target/arm/helper.c
16
+++ b/target/arm/tcg/translate-a64.h
20
@@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo minimal_ras_reginfo[] = {
17
@@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno)
21
.access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.vsesr_el2) },
18
return ret;
22
};
19
}
23
20
24
-/* Return the exception level to which exceptions should be taken
25
- * via SVEAccessTrap. If an exception should be routed through
26
- * AArch64.AdvSIMDFPAccessTrap, return 0; fp_exception_el should
27
- * take care of raising that exception.
28
- * C.f. the ARM pseudocode function CheckSVEEnabled.
29
+/*
21
+/*
30
+ * Return the exception level to which exceptions should be taken
22
+ * Return the ARMFPStatusFlavour to use based on element size and
31
+ * via SVEAccessTrap. This excludes the check for whether the exception
23
+ * whether FPCR.AH is set.
32
+ * should be routed through AArch64.AdvSIMDFPAccessTrap. That can easily
24
+ */
33
+ * be found by testing 0 < fp_exception_el < sve_exception_el.
25
+static inline ARMFPStatusFlavour select_ah_fpst(DisasContext *s, MemOp esz)
34
+ *
26
+{
35
+ * C.f. the ARM pseudocode function CheckSVEEnabled. Note that the
27
+ if (s->fpcr_ah) {
36
+ * pseudocode does *not* separate out the FP trap checks, but has them
28
+ return esz == MO_16 ? FPST_AH_F16 : FPST_AH;
37
+ * all in one function.
29
+ } else {
30
+ return esz == MO_16 ? FPST_A64_F16 : FPST_A64;
31
+ }
32
+}
33
+
34
bool disas_sve(DisasContext *, uint32_t);
35
bool disas_sme(DisasContext *, uint32_t);
36
37
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/target/arm/tcg/translate-a64.c
40
+++ b/target/arm/tcg/translate-a64.c
41
@@ -XXX,XX +XXX,XX @@ static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
42
* an out-of-line helper.
38
*/
43
*/
39
int sve_exception_el(CPUARMState *env, int el)
44
static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
40
{
45
- int rm, bool is_fp16, int data,
41
@@ -XXX,XX +XXX,XX @@ int sve_exception_el(CPUARMState *env, int el)
46
+ int rm, ARMFPStatusFlavour fpsttype, int data,
42
case 2:
47
gen_helper_gvec_3_ptr *fn)
43
return 1;
48
{
49
- TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64);
50
+ TCGv_ptr fpst = fpstatus_ptr(fpsttype);
51
tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
52
vec_full_reg_offset(s, rn),
53
vec_full_reg_offset(s, rm), fpst,
54
@@ -XXX,XX +XXX,XX @@ typedef struct FPScalar {
55
void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
56
} FPScalar;
57
58
-static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
59
+static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a,
60
+ const FPScalar *f,
61
+ ARMFPStatusFlavour fpsttype)
62
{
63
switch (a->esz) {
64
case MO_64:
65
if (fp_access_check(s)) {
66
TCGv_i64 t0 = read_fp_dreg(s, a->rn);
67
TCGv_i64 t1 = read_fp_dreg(s, a->rm);
68
- f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
69
+ f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype));
70
write_fp_dreg(s, a->rd, t0);
44
}
71
}
45
-
72
break;
46
- /* Check CPACR.FPEN. */
73
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
47
- switch (FIELD_EX64(env->cp15.cpacr_el1, CPACR_EL1, FPEN)) {
74
if (fp_access_check(s)) {
48
- case 1:
75
TCGv_i32 t0 = read_fp_sreg(s, a->rn);
49
- if (el != 0) {
76
TCGv_i32 t1 = read_fp_sreg(s, a->rm);
50
- break;
77
- f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
51
- }
78
+ f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype));
52
- /* fall through */
79
write_fp_sreg(s, a->rd, t0);
53
- case 0:
80
}
54
- case 2:
81
break;
55
- return 0;
82
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
56
- }
83
if (fp_access_check(s)) {
84
TCGv_i32 t0 = read_fp_hreg(s, a->rn);
85
TCGv_i32 t1 = read_fp_hreg(s, a->rm);
86
- f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
87
+ f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype));
88
write_fp_sreg(s, a->rd, t0);
89
}
90
break;
91
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
92
return true;
93
}
94
95
+static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
96
+{
97
+ return do_fp3_scalar_with_fpsttype(s, a, f,
98
+ a->esz == MO_16 ?
99
+ FPST_A64_F16 : FPST_A64);
100
+}
101
+
102
+static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
103
+{
104
+ return do_fp3_scalar_with_fpsttype(s, a, f, select_ah_fpst(s, a->esz));
105
+}
106
+
107
static const FPScalar f_scalar_fadd = {
108
gen_helper_vfp_addh,
109
gen_helper_vfp_adds,
110
@@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_frecps = {
111
gen_helper_recpsf_f32,
112
gen_helper_recpsf_f64,
113
};
114
-TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps)
115
+TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps)
116
117
static const FPScalar f_scalar_frsqrts = {
118
gen_helper_rsqrtsf_f16,
119
gen_helper_rsqrtsf_f32,
120
gen_helper_rsqrtsf_f64,
121
};
122
-TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts)
123
+TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts)
124
125
static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
126
const FPScalar *f, bool swap)
127
@@ -XXX,XX +XXX,XX @@ TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU)
128
TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ)
129
TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE)
130
131
-static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
132
- gen_helper_gvec_3_ptr * const fns[3])
133
+static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a,
134
+ int data,
135
+ gen_helper_gvec_3_ptr * const fns[3],
136
+ ARMFPStatusFlavour fpsttype)
137
{
138
MemOp esz = a->esz;
139
int check = fp_access_check_vector_hsd(s, a->q, esz);
140
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
141
return check == 0;
57
}
142
}
58
143
59
/*
144
- gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
60
@@ -XXX,XX +XXX,XX @@ int sve_exception_el(CPUARMState *env, int el)
145
- esz == MO_16, data, fns[esz - 1]);
61
case 2:
146
+ gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype,
62
return 2;
147
+ data, fns[esz - 1]);
63
}
148
return true;
64
-
149
}
65
- switch (FIELD_EX32(env->cp15.cptr_el[2], CPTR_EL2, FPEN)) {
150
66
- case 1:
151
+static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
67
- if (el == 2 || !(hcr_el2 & HCR_TGE)) {
152
+ gen_helper_gvec_3_ptr * const fns[3])
68
- break;
153
+{
69
- }
154
+ return do_fp3_vector_with_fpsttype(s, a, data, fns,
70
- /* fall through */
155
+ a->esz == MO_16 ?
71
- case 0:
156
+ FPST_A64_F16 : FPST_A64);
72
- case 2:
157
+}
73
- return 0;
158
+
74
- }
159
+static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data,
75
} else if (arm_is_el2_enabled(env)) {
160
+ gen_helper_gvec_3_ptr * const f[3])
76
if (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, TZ)) {
161
+{
77
return 2;
162
+ return do_fp3_vector_with_fpsttype(s, a, data, f,
78
}
163
+ select_ah_fpst(s, a->esz));
79
- if (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, TFP)) {
164
+}
80
- return 0;
165
+
81
- }
166
static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = {
82
}
167
gen_helper_gvec_fadd_h,
168
gen_helper_gvec_fadd_s,
169
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
170
gen_helper_gvec_recps_s,
171
gen_helper_gvec_recps_d,
172
};
173
-TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps)
174
+TRANS(FRECPS_v, do_fp3_vector_ah, a, 0, f_vector_frecps)
175
176
static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
177
gen_helper_gvec_rsqrts_h,
178
gen_helper_gvec_rsqrts_s,
179
gen_helper_gvec_rsqrts_d,
180
};
181
-TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts)
182
+TRANS(FRSQRTS_v, do_fp3_vector_ah, a, 0, f_vector_frsqrts)
183
184
static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
185
gen_helper_gvec_faddp_h,
186
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
83
}
187
}
84
188
85
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
189
gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
86
190
- esz == MO_16, a->idx, fns[esz - 1]);
87
if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
191
+ esz == MO_16 ? FPST_A64_F16 : FPST_A64,
88
int sve_el = sve_exception_el(env, el);
192
+ a->idx, fns[esz - 1]);
89
- uint32_t zcr_len;
193
return true;
90
194
}
91
/*
195
92
- * If SVE is disabled, but FP is enabled,
196
@@ -XXX,XX +XXX,XX @@ typedef struct FPScalar1 {
93
- * then the effective len is 0.
197
void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr);
94
+ * If either FP or SVE are disabled, translator does not need len.
198
} FPScalar1;
95
+ * If SVE EL > FP EL, FP exception has precedence, and translator
199
96
+ * does not need SVE EL. Save potential re-translations by forcing
200
-static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
97
+ * the unneeded data to zero.
201
- const FPScalar1 *f, int rmode)
98
*/
202
+static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a,
99
- if (sve_el != 0 && fp_el == 0) {
203
+ const FPScalar1 *f, int rmode,
100
- zcr_len = 0;
204
+ ARMFPStatusFlavour fpsttype)
101
- } else {
205
{
102
- zcr_len = sve_zcr_len_for_el(env, el);
206
TCGv_i32 tcg_rmode = NULL;
103
+ if (fp_el != 0) {
207
TCGv_ptr fpst;
104
+ if (sve_el > fp_el) {
208
@@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
105
+ sve_el = 0;
209
return check == 0;
106
+ }
107
+ } else if (sve_el == 0) {
108
+ DP_TBFLAG_A64(flags, VL, sve_zcr_len_for_el(env, el));
109
}
110
DP_TBFLAG_A64(flags, SVEEXC_EL, sve_el);
111
- DP_TBFLAG_A64(flags, VL, zcr_len);
112
}
210
}
113
211
114
sctlr = regime_sctlr(env, stage1);
212
- fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
213
+ fpst = fpstatus_ptr(fpsttype);
214
if (rmode >= 0) {
215
tcg_rmode = gen_set_rmode(rmode, fpst);
216
}
217
@@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
218
return true;
219
}
220
221
+static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
222
+ const FPScalar1 *f, int rmode)
223
+{
224
+ return do_fp1_scalar_with_fpsttype(s, a, f, rmode,
225
+ a->esz == MO_16 ?
226
+ FPST_A64_F16 : FPST_A64);
227
+}
228
+
229
+static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a,
230
+ const FPScalar1 *f, int rmode)
231
+{
232
+ return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz));
233
+}
234
+
235
static const FPScalar1 f_scalar_fsqrt = {
236
gen_helper_vfp_sqrth,
237
gen_helper_vfp_sqrts,
238
@@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frecpe = {
239
gen_helper_recpe_f32,
240
gen_helper_recpe_f64,
241
};
242
-TRANS(FRECPE_s, do_fp1_scalar, a, &f_scalar_frecpe, -1)
243
+TRANS(FRECPE_s, do_fp1_scalar_ah, a, &f_scalar_frecpe, -1)
244
245
static const FPScalar1 f_scalar_frecpx = {
246
gen_helper_frecpx_f16,
247
gen_helper_frecpx_f32,
248
gen_helper_frecpx_f64,
249
};
250
-TRANS(FRECPX_s, do_fp1_scalar, a, &f_scalar_frecpx, -1)
251
+TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1)
252
253
static const FPScalar1 f_scalar_frsqrte = {
254
gen_helper_rsqrte_f16,
255
gen_helper_rsqrte_f32,
256
gen_helper_rsqrte_f64,
257
};
258
-TRANS(FRSQRTE_s, do_fp1_scalar, a, &f_scalar_frsqrte, -1)
259
+TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, &f_scalar_frsqrte, -1)
260
261
static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a)
262
{
263
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a,
264
&f_scalar_frint64, FPROUNDING_ZERO)
265
TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1)
266
267
-static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
268
- int rd, int rn, int data,
269
- gen_helper_gvec_2_ptr * const fns[3])
270
+static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz,
271
+ bool is_q, int rd, int rn, int data,
272
+ gen_helper_gvec_2_ptr * const fns[3],
273
+ ARMFPStatusFlavour fpsttype)
274
{
275
int check = fp_access_check_vector_hsd(s, is_q, esz);
276
TCGv_ptr fpst;
277
@@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
278
return check == 0;
279
}
280
281
- fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
282
+ fpst = fpstatus_ptr(fpsttype);
283
tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
284
vec_full_reg_offset(s, rn), fpst,
285
is_q ? 16 : 8, vec_full_reg_size(s),
286
@@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
287
return true;
288
}
289
290
+static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
291
+ int rd, int rn, int data,
292
+ gen_helper_gvec_2_ptr * const fns[3])
293
+{
294
+ return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns,
295
+ esz == MO_16 ? FPST_A64_F16 :
296
+ FPST_A64);
297
+}
298
+
299
+static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q,
300
+ int rd, int rn, int data,
301
+ gen_helper_gvec_2_ptr * const fns[3])
302
+{
303
+ return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data,
304
+ fns, select_ah_fpst(s, esz));
305
+}
306
+
307
static gen_helper_gvec_2_ptr * const f_scvtf_v[] = {
308
gen_helper_gvec_vcvt_sh,
309
gen_helper_gvec_vcvt_sf,
310
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const f_frecpe[] = {
311
gen_helper_gvec_frecpe_s,
312
gen_helper_gvec_frecpe_d,
313
};
314
-TRANS(FRECPE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe)
315
+TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe)
316
317
static gen_helper_gvec_2_ptr * const f_frsqrte[] = {
318
gen_helper_gvec_frsqrte_h,
319
gen_helper_gvec_frsqrte_s,
320
gen_helper_gvec_frsqrte_d,
321
};
322
-TRANS(FRSQRTE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte)
323
+TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte)
324
325
static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a)
326
{
327
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
328
index XXXXXXX..XXXXXXX 100644
329
--- a/target/arm/tcg/translate-sve.c
330
+++ b/target/arm/tcg/translate-sve.c
331
@@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
332
return true;
333
}
334
335
-static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
336
- arg_rr_esz *a, int data)
337
+static bool gen_gvec_fpst_ah_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
338
+ arg_rr_esz *a, int data)
339
{
340
return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data,
341
- a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
342
+ select_ah_fpst(s, a->esz));
343
}
344
345
/* Invoke an out-of-line helper on 3 Zregs. */
346
@@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
347
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
348
}
349
350
+static bool gen_gvec_fpst_ah_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
351
+ arg_rrr_esz *a, int data)
352
+{
353
+ return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
354
+ select_ah_fpst(s, a->esz));
355
+}
356
+
357
/* Invoke an out-of-line helper on 4 Zregs. */
358
static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
359
int rd, int rn, int rm, int ra, int data)
360
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const frecpe_fns[] = {
361
NULL, gen_helper_gvec_frecpe_h,
362
gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d,
363
};
364
-TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0)
365
+TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frecpe_fns[a->esz], a, 0)
366
367
static gen_helper_gvec_2_ptr * const frsqrte_fns[] = {
368
NULL, gen_helper_gvec_frsqrte_h,
369
gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d,
370
};
371
-TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0)
372
+TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frsqrte_fns[a->esz], a, 0)
373
374
/*
375
*** SVE Floating Point Compare with Zero Group
376
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
377
}; \
378
TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0)
379
380
+#define DO_FP3_AH(NAME, name) \
381
+ static gen_helper_gvec_3_ptr * const name##_fns[4] = { \
382
+ NULL, gen_helper_gvec_##name##_h, \
383
+ gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
384
+ }; \
385
+ TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, name##_fns[a->esz], a, 0)
386
+
387
DO_FP3(FADD_zzz, fadd)
388
DO_FP3(FSUB_zzz, fsub)
389
DO_FP3(FMUL_zzz, fmul)
390
-DO_FP3(FRECPS, recps)
391
-DO_FP3(FRSQRTS, rsqrts)
392
+DO_FP3_AH(FRECPS, recps)
393
+DO_FP3_AH(FRSQRTS, rsqrts)
394
395
#undef DO_FP3
396
397
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frecpx_fns[] = {
398
gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d,
399
};
400
TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz],
401
- a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
402
+ a, 0, select_ah_fpst(s, a->esz))
403
404
static gen_helper_gvec_3_ptr * const fsqrt_fns[] = {
405
NULL, gen_helper_sve_fsqrt_h,
115
--
406
--
116
2.25.1
407
2.34.1
diff view generated by jsdifflib
New patch
1
When FPCR.AH is 1, use FPST_FPCR_AH for:
2
* AdvSIMD BFCVT, BFCVTN, BFCVTN2
3
* SVE BFCVT, BFCVTNT
1
4
5
so that they get the required behaviour changes.
6
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
target/arm/tcg/translate-a64.c | 27 +++++++++++++++++++++------
11
target/arm/tcg/translate-sve.c | 6 ++++--
12
2 files changed, 25 insertions(+), 8 deletions(-)
13
14
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/tcg/translate-a64.c
17
+++ b/target/arm/tcg/translate-a64.c
18
@@ -XXX,XX +XXX,XX @@ TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1)
19
static const FPScalar1 f_scalar_bfcvt = {
20
.gen_s = gen_helper_bfcvt,
21
};
22
-TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar, a, &f_scalar_bfcvt, -1)
23
+TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar_ah, a, &f_scalar_bfcvt, -1)
24
25
static const FPScalar1 f_scalar_frint32 = {
26
NULL,
27
@@ -XXX,XX +XXX,XX @@ static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n)
28
tcg_gen_extu_i32_i64(d, tmp);
29
}
30
31
-static ArithOneOp * const f_vector_bfcvtn[] = {
32
- NULL,
33
- gen_bfcvtn_hs,
34
- NULL,
35
+static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n)
36
+{
37
+ TCGv_ptr fpst = fpstatus_ptr(FPST_AH);
38
+ TCGv_i32 tmp = tcg_temp_new_i32();
39
+ gen_helper_bfcvt_pair(tmp, n, fpst);
40
+ tcg_gen_extu_i32_i64(d, tmp);
41
+}
42
+
43
+static ArithOneOp * const f_vector_bfcvtn[2][3] = {
44
+ {
45
+ NULL,
46
+ gen_bfcvtn_hs,
47
+ NULL,
48
+ }, {
49
+ NULL,
50
+ gen_bfcvtn_ah_hs,
51
+ NULL,
52
+ }
53
};
54
-TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, f_vector_bfcvtn)
55
+TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a,
56
+ f_vector_bfcvtn[s->fpcr_ah])
57
58
static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a)
59
{
60
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
61
index XXXXXXX..XXXXXXX 100644
62
--- a/target/arm/tcg/translate-sve.c
63
+++ b/target/arm/tcg/translate-sve.c
64
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
65
gen_helper_sve_fcvt_hs, a, 0, FPST_A64_F16)
66
67
TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
68
- gen_helper_sve_bfcvt, a, 0, FPST_A64)
69
+ gen_helper_sve_bfcvt, a, 0,
70
+ s->fpcr_ah ? FPST_AH : FPST_A64)
71
72
TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
73
gen_helper_sve_fcvt_dh, a, 0, FPST_A64)
74
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz,
75
gen_helper_sve2_fcvtnt_ds, a, 0, FPST_A64)
76
77
TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
78
- gen_helper_sve_bfcvtnt, a, 0, FPST_A64)
79
+ gen_helper_sve_bfcvtnt, a, 0,
80
+ s->fpcr_ah ? FPST_AH : FPST_A64)
81
82
TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz,
83
gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64)
84
--
85
2.34.1
diff view generated by jsdifflib
1
From: Frederic Konrad <fkonrad@amd.com>
1
When FPCR.AH is 1, use FPST_FPCR_AH for:
2
* AdvSIMD BFMLALB, BFMLALT
3
* SVE BFMLALB, BFMLALT, BFMLSLB, BFMLSLT
2
4
3
The core and the vblend registers size are wrong, they should respectively be
5
so that they get the required behaviour changes.
4
0x3B0 and 0x1E0 according to:
5
https://www.xilinx.com/htmldocs/registers/ug1087/ug1087-zynq-ultrascale-registers.html.
6
6
7
Let's fix that and use macros when creating the mmio region.
7
We do this by making gen_gvec_op4_fpst() take an ARMFPStatusFlavour
8
rather than a bool is_fp16; existing callsites now select
9
FPST_FPCR_F16_A64 vs FPST_FPCR_A64 themselves rather than passing in
10
the boolean.
8
11
9
Fixes: 58ac482a66d ("introduce xlnx-dp")
10
Signed-off-by: Frederic Konrad <fkonrad@amd.com>
11
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@amd.com>
12
Acked-by: Alistair Francis <alistair.francis@wdc.com>
13
Message-id: 20220601172353.3220232-2-fkonrad@xilinx.com
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
---
14
---
16
include/hw/display/xlnx_dp.h | 9 +++++++--
15
target/arm/tcg/translate-a64.c | 20 +++++++++++++-------
17
hw/display/xlnx_dp.c | 17 ++++++++++-------
16
target/arm/tcg/translate-sve.c | 6 ++++--
18
2 files changed, 17 insertions(+), 9 deletions(-)
17
2 files changed, 17 insertions(+), 9 deletions(-)
19
18
20
diff --git a/include/hw/display/xlnx_dp.h b/include/hw/display/xlnx_dp.h
19
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
21
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
22
--- a/include/hw/display/xlnx_dp.h
21
--- a/target/arm/tcg/translate-a64.c
23
+++ b/include/hw/display/xlnx_dp.h
22
+++ b/target/arm/tcg/translate-a64.c
24
@@ -XXX,XX +XXX,XX @@
23
@@ -XXX,XX +XXX,XX @@ static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn,
25
#define AUD_CHBUF_MAX_DEPTH (32 * KiB)
24
* an out-of-line helper.
26
#define MAX_QEMU_BUFFER_SIZE (4 * KiB)
25
*/
27
26
static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
28
-#define DP_CORE_REG_ARRAY_SIZE (0x3AF >> 2)
27
- int rm, int ra, bool is_fp16, int data,
29
+#define DP_CORE_REG_OFFSET (0x0000)
28
+ int rm, int ra, ARMFPStatusFlavour fpsttype,
30
+#define DP_CORE_REG_ARRAY_SIZE (0x3B0 >> 2)
29
+ int data,
31
+#define DP_AVBUF_REG_OFFSET (0xB000)
30
gen_helper_gvec_4_ptr *fn)
32
#define DP_AVBUF_REG_ARRAY_SIZE (0x238 >> 2)
31
{
33
-#define DP_VBLEND_REG_ARRAY_SIZE (0x1DF >> 2)
32
- TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64);
34
+#define DP_VBLEND_REG_OFFSET (0xA000)
33
+ TCGv_ptr fpst = fpstatus_ptr(fpsttype);
35
+#define DP_VBLEND_REG_ARRAY_SIZE (0x1E0 >> 2)
34
tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
36
+#define DP_AUDIO_REG_OFFSET (0xC000)
35
vec_full_reg_offset(s, rn),
37
#define DP_AUDIO_REG_ARRAY_SIZE (0x50 >> 2)
36
vec_full_reg_offset(s, rm),
38
+#define DP_CONTAINER_SIZE (0xC050)
37
@@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
39
38
}
40
struct PixmanPlane {
39
if (fp_access_check(s)) {
41
pixman_format_code_t format;
40
/* Q bit selects BFMLALB vs BFMLALT. */
42
diff --git a/hw/display/xlnx_dp.c b/hw/display/xlnx_dp.c
41
- gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q,
42
+ gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
43
+ s->fpcr_ah ? FPST_AH : FPST_A64, a->q,
44
gen_helper_gvec_bfmlal);
45
}
46
return true;
47
@@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
48
}
49
50
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
51
- a->esz == MO_16, a->rot, fn[a->esz]);
52
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
53
+ a->rot, fn[a->esz]);
54
return true;
55
}
56
57
@@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
58
}
59
60
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
61
- esz == MO_16, (a->idx << 1) | neg,
62
+ esz == MO_16 ? FPST_A64_F16 : FPST_A64,
63
+ (a->idx << 1) | neg,
64
fns[esz - 1]);
65
return true;
66
}
67
@@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
68
}
69
if (fp_access_check(s)) {
70
/* Q bit selects BFMLALB vs BFMLALT. */
71
- gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0,
72
+ gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
73
+ s->fpcr_ah ? FPST_AH : FPST_A64,
74
(a->idx << 1) | a->q,
75
gen_helper_gvec_bfmlal_idx);
76
}
77
@@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
78
}
79
if (fp_access_check(s)) {
80
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
81
- a->esz == MO_16, (a->idx << 2) | a->rot, fn);
82
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
83
+ (a->idx << 2) | a->rot, fn);
84
}
85
return true;
86
}
87
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
43
index XXXXXXX..XXXXXXX 100644
88
index XXXXXXX..XXXXXXX 100644
44
--- a/hw/display/xlnx_dp.c
89
--- a/target/arm/tcg/translate-sve.c
45
+++ b/hw/display/xlnx_dp.c
90
+++ b/target/arm/tcg/translate-sve.c
46
@@ -XXX,XX +XXX,XX @@ static void xlnx_dp_init(Object *obj)
91
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_env_arg_zzzz,
47
SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
92
static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
48
XlnxDPState *s = XLNX_DP(obj);
93
{
49
94
return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
50
- memory_region_init(&s->container, obj, TYPE_XLNX_DP, 0xC050);
95
- a->rd, a->rn, a->rm, a->ra, sel, FPST_A64);
51
+ memory_region_init(&s->container, obj, TYPE_XLNX_DP, DP_CONTAINER_SIZE);
96
+ a->rd, a->rn, a->rm, a->ra, sel,
52
97
+ s->fpcr_ah ? FPST_AH : FPST_A64);
53
memory_region_init_io(&s->core_iomem, obj, &dp_ops, s, TYPE_XLNX_DP
98
}
54
- ".core", 0x3AF);
99
55
- memory_region_add_subregion(&s->container, 0x0000, &s->core_iomem);
100
TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false)
56
+ ".core", sizeof(s->core_registers));
101
@@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
57
+ memory_region_add_subregion(&s->container, DP_CORE_REG_OFFSET,
102
{
58
+ &s->core_iomem);
103
return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
59
104
a->rd, a->rn, a->rm, a->ra,
60
memory_region_init_io(&s->vblend_iomem, obj, &vblend_ops, s, TYPE_XLNX_DP
105
- (a->index << 1) | sel, FPST_A64);
61
- ".v_blend", 0x1DF);
106
+ (a->index << 1) | sel,
62
- memory_region_add_subregion(&s->container, 0xA000, &s->vblend_iomem);
107
+ s->fpcr_ah ? FPST_AH : FPST_A64);
63
+ ".v_blend", sizeof(s->vblend_registers));
108
}
64
+ memory_region_add_subregion(&s->container, DP_VBLEND_REG_OFFSET,
109
65
+ &s->vblend_iomem);
110
TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
66
67
memory_region_init_io(&s->avbufm_iomem, obj, &avbufm_ops, s, TYPE_XLNX_DP
68
- ".av_buffer_manager", 0x238);
69
- memory_region_add_subregion(&s->container, 0xB000, &s->avbufm_iomem);
70
+ ".av_buffer_manager", sizeof(s->avbufm_registers));
71
+ memory_region_add_subregion(&s->container, DP_AVBUF_REG_OFFSET,
72
+ &s->avbufm_iomem);
73
74
memory_region_init_io(&s->audio_iomem, obj, &audio_ops, s, TYPE_XLNX_DP
75
".audio", sizeof(s->audio_registers));
76
--
111
--
77
2.25.1
112
2.34.1
diff view generated by jsdifflib
New patch
1
For FEAT_AFP, we want to emit different code when FPCR.NEP is set, so
2
that instead of zeroing the high elements of a vector register when
3
we write the output of a scalar operation to it, we instead merge in
4
those elements from one of the source registers. Since this affects
5
the generated code, we need to put FPCR.NEP into the TBFLAGS.
1
6
7
FPCR.NEP is treated as 0 when in streaming SVE mode and FEAT_SME_FA64
8
is not implemented or not enabled; we can implement this logic in
9
rebuild_hflags_a64().
10
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
---
14
target/arm/cpu.h | 1 +
15
target/arm/tcg/translate.h | 2 ++
16
target/arm/tcg/hflags.c | 9 +++++++++
17
target/arm/tcg/translate-a64.c | 1 +
18
4 files changed, 13 insertions(+)
19
20
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/target/arm/cpu.h
23
+++ b/target/arm/cpu.h
24
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1)
25
/* Set if FEAT_NV2 RAM accesses are big-endian */
26
FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1)
27
FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */
28
+FIELD(TBFLAG_A64, NEP, 38, 1) /* FPCR.NEP */
29
30
/*
31
* Helpers for using the above. Note that only the A64 accessors use
32
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/tcg/translate.h
35
+++ b/target/arm/tcg/translate.h
36
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
37
bool nv2_mem_be;
38
/* True if FPCR.AH is 1 (alternate floating point handling) */
39
bool fpcr_ah;
40
+ /* True if FPCR.NEP is 1 (FEAT_AFP scalar upper-element result handling) */
41
+ bool fpcr_nep;
42
/*
43
* >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI.
44
* < 0, set by the current instruction.
45
diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
46
index XXXXXXX..XXXXXXX 100644
47
--- a/target/arm/tcg/hflags.c
48
+++ b/target/arm/tcg/hflags.c
49
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
50
if (env->vfp.fpcr & FPCR_AH) {
51
DP_TBFLAG_A64(flags, AH, 1);
52
}
53
+ if (env->vfp.fpcr & FPCR_NEP) {
54
+ /*
55
+ * In streaming-SVE without FA64, NEP behaves as if zero;
56
+ * compare pseudocode IsMerging()
57
+ */
58
+ if (!(EX_TBFLAG_A64(flags, PSTATE_SM) && !sme_fa64(env, el))) {
59
+ DP_TBFLAG_A64(flags, NEP, 1);
60
+ }
61
+ }
62
63
return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
64
}
65
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
66
index XXXXXXX..XXXXXXX 100644
67
--- a/target/arm/tcg/translate-a64.c
68
+++ b/target/arm/tcg/translate-a64.c
69
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
70
dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
71
dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
72
dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH);
73
+ dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP);
74
dc->vec_len = 0;
75
dc->vec_stride = 0;
76
dc->cp_regs = arm_cpu->cp_regs;
77
--
78
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
For FEAT_AFP's FPCR.NEP bit, we need to programmatically change the
2
2
behaviour of the writeback of the result for most SIMD scalar
3
This function has one private helper, v8m_is_sau_exempt,
3
operations, so that instead of zeroing the upper part of the result
4
so move that at the same time.
4
register it merges the upper elements from one of the input
5
5
registers.
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
7
Message-id: 20220604040607.269301-12-richard.henderson@linaro.org
7
Provide new functions write_fp_*reg_merging() which can be used
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
instead of the existing write_fp_*reg() functions when we want this
9
"merge the result with one of the input registers if FPCR.NEP is
10
enabled" handling, and use them in do_fp3_scalar_with_fpsttype().
11
12
Note that (as documented in the description of the FPCR.NEP bit)
13
which input register to use as the merge source varies by
14
instruction: for these 2-input scalar operations, the comparison
15
instructions take from Rm, not Rn.
16
17
We'll extend this to also provide the merging behaviour for
18
the remaining scalar insns in subsequent commits.
19
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
20
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
21
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
---
22
---
11
target/arm/helper.c | 123 ------------------------------------------
23
target/arm/tcg/translate-a64.c | 117 +++++++++++++++++++++++++--------
12
target/arm/ptw.c | 126 ++++++++++++++++++++++++++++++++++++++++++++
24
1 file changed, 91 insertions(+), 26 deletions(-)
13
2 files changed, 126 insertions(+), 123 deletions(-)
25
14
26
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
15
diff --git a/target/arm/helper.c b/target/arm/helper.c
16
index XXXXXXX..XXXXXXX 100644
27
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/helper.c
28
--- a/target/arm/tcg/translate-a64.c
18
+++ b/target/arm/helper.c
29
+++ b/target/arm/tcg/translate-a64.c
19
@@ -XXX,XX +XXX,XX @@
30
@@ -XXX,XX +XXX,XX @@ static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
20
#include "qemu/osdep.h"
31
write_fp_dreg(s, reg, tmp);
21
#include "qemu/units.h"
22
#include "qemu/log.h"
23
-#include "target/arm/idau.h"
24
#include "trace.h"
25
#include "cpu.h"
26
#include "internals.h"
27
@@ -XXX,XX +XXX,XX @@ bool m_is_system_region(CPUARMState *env, uint32_t address)
28
return arm_feature(env, ARM_FEATURE_M) && extract32(address, 29, 3) == 0x7;
29
}
32
}
30
33
31
-static bool v8m_is_sau_exempt(CPUARMState *env,
34
+/*
32
- uint32_t address, MMUAccessType access_type)
35
+ * Write a double result to 128 bit vector register reg, honouring FPCR.NEP:
33
-{
36
+ * - if FPCR.NEP == 0, clear the high elements of reg
34
- /* The architecture specifies that certain address ranges are
37
+ * - if FPCR.NEP == 1, set the high elements of reg from mergereg
35
- * exempt from v8M SAU/IDAU checks.
38
+ * (i.e. merge the result with those high elements)
36
- */
39
+ * In either case, SVE register bits above 128 are zeroed (per R_WKYLB).
37
- return
40
+ */
38
- (access_type == MMU_INST_FETCH && m_is_system_region(env, address)) ||
41
+static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg,
39
- (address >= 0xe0000000 && address <= 0xe0002fff) ||
42
+ TCGv_i64 v)
40
- (address >= 0xe000e000 && address <= 0xe000efff) ||
41
- (address >= 0xe002e000 && address <= 0xe002efff) ||
42
- (address >= 0xe0040000 && address <= 0xe0041fff) ||
43
- (address >= 0xe00ff000 && address <= 0xe00fffff);
44
-}
45
-
46
-void v8m_security_lookup(CPUARMState *env, uint32_t address,
47
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
48
- V8M_SAttributes *sattrs)
49
-{
50
- /* Look up the security attributes for this address. Compare the
51
- * pseudocode SecurityCheck() function.
52
- * We assume the caller has zero-initialized *sattrs.
53
- */
54
- ARMCPU *cpu = env_archcpu(env);
55
- int r;
56
- bool idau_exempt = false, idau_ns = true, idau_nsc = true;
57
- int idau_region = IREGION_NOTVALID;
58
- uint32_t addr_page_base = address & TARGET_PAGE_MASK;
59
- uint32_t addr_page_limit = addr_page_base + (TARGET_PAGE_SIZE - 1);
60
-
61
- if (cpu->idau) {
62
- IDAUInterfaceClass *iic = IDAU_INTERFACE_GET_CLASS(cpu->idau);
63
- IDAUInterface *ii = IDAU_INTERFACE(cpu->idau);
64
-
65
- iic->check(ii, address, &idau_region, &idau_exempt, &idau_ns,
66
- &idau_nsc);
67
- }
68
-
69
- if (access_type == MMU_INST_FETCH && extract32(address, 28, 4) == 0xf) {
70
- /* 0xf0000000..0xffffffff is always S for insn fetches */
71
- return;
72
- }
73
-
74
- if (idau_exempt || v8m_is_sau_exempt(env, address, access_type)) {
75
- sattrs->ns = !regime_is_secure(env, mmu_idx);
76
- return;
77
- }
78
-
79
- if (idau_region != IREGION_NOTVALID) {
80
- sattrs->irvalid = true;
81
- sattrs->iregion = idau_region;
82
- }
83
-
84
- switch (env->sau.ctrl & 3) {
85
- case 0: /* SAU.ENABLE == 0, SAU.ALLNS == 0 */
86
- break;
87
- case 2: /* SAU.ENABLE == 0, SAU.ALLNS == 1 */
88
- sattrs->ns = true;
89
- break;
90
- default: /* SAU.ENABLE == 1 */
91
- for (r = 0; r < cpu->sau_sregion; r++) {
92
- if (env->sau.rlar[r] & 1) {
93
- uint32_t base = env->sau.rbar[r] & ~0x1f;
94
- uint32_t limit = env->sau.rlar[r] | 0x1f;
95
-
96
- if (base <= address && limit >= address) {
97
- if (base > addr_page_base || limit < addr_page_limit) {
98
- sattrs->subpage = true;
99
- }
100
- if (sattrs->srvalid) {
101
- /* If we hit in more than one region then we must report
102
- * as Secure, not NS-Callable, with no valid region
103
- * number info.
104
- */
105
- sattrs->ns = false;
106
- sattrs->nsc = false;
107
- sattrs->sregion = 0;
108
- sattrs->srvalid = false;
109
- break;
110
- } else {
111
- if (env->sau.rlar[r] & 2) {
112
- sattrs->nsc = true;
113
- } else {
114
- sattrs->ns = true;
115
- }
116
- sattrs->srvalid = true;
117
- sattrs->sregion = r;
118
- }
119
- } else {
120
- /*
121
- * Address not in this region. We must check whether the
122
- * region covers addresses in the same page as our address.
123
- * In that case we must not report a size that covers the
124
- * whole page for a subsequent hit against a different MPU
125
- * region or the background region, because it would result
126
- * in incorrect TLB hits for subsequent accesses to
127
- * addresses that are in this MPU region.
128
- */
129
- if (limit >= base &&
130
- ranges_overlap(base, limit - base + 1,
131
- addr_page_base,
132
- TARGET_PAGE_SIZE)) {
133
- sattrs->subpage = true;
134
- }
135
- }
136
- }
137
- }
138
- break;
139
- }
140
-
141
- /*
142
- * The IDAU will override the SAU lookup results if it specifies
143
- * higher security than the SAU does.
144
- */
145
- if (!idau_ns) {
146
- if (sattrs->ns || (!idau_nsc && sattrs->nsc)) {
147
- sattrs->ns = false;
148
- sattrs->nsc = idau_nsc;
149
- }
150
- }
151
-}
152
-
153
/* Combine either inner or outer cacheability attributes for normal
154
* memory, according to table D4-42 and pseudocode procedure
155
* CombineS1S2AttrHints() of ARM DDI 0487B.b (the ARMv8 ARM).
156
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
157
index XXXXXXX..XXXXXXX 100644
158
--- a/target/arm/ptw.c
159
+++ b/target/arm/ptw.c
160
@@ -XXX,XX +XXX,XX @@
161
#include "qemu/range.h"
162
#include "cpu.h"
163
#include "internals.h"
164
+#include "idau.h"
165
#include "ptw.h"
166
167
168
@@ -XXX,XX +XXX,XX @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address,
169
return !(*prot & (1 << access_type));
170
}
171
172
+static bool v8m_is_sau_exempt(CPUARMState *env,
173
+ uint32_t address, MMUAccessType access_type)
174
+{
43
+{
175
+ /*
44
+ if (!s->fpcr_nep) {
176
+ * The architecture specifies that certain address ranges are
45
+ write_fp_dreg(s, reg, v);
177
+ * exempt from v8M SAU/IDAU checks.
178
+ */
179
+ return
180
+ (access_type == MMU_INST_FETCH && m_is_system_region(env, address)) ||
181
+ (address >= 0xe0000000 && address <= 0xe0002fff) ||
182
+ (address >= 0xe000e000 && address <= 0xe000efff) ||
183
+ (address >= 0xe002e000 && address <= 0xe002efff) ||
184
+ (address >= 0xe0040000 && address <= 0xe0041fff) ||
185
+ (address >= 0xe00ff000 && address <= 0xe00fffff);
186
+}
187
+
188
+void v8m_security_lookup(CPUARMState *env, uint32_t address,
189
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
190
+ V8M_SAttributes *sattrs)
191
+{
192
+ /*
193
+ * Look up the security attributes for this address. Compare the
194
+ * pseudocode SecurityCheck() function.
195
+ * We assume the caller has zero-initialized *sattrs.
196
+ */
197
+ ARMCPU *cpu = env_archcpu(env);
198
+ int r;
199
+ bool idau_exempt = false, idau_ns = true, idau_nsc = true;
200
+ int idau_region = IREGION_NOTVALID;
201
+ uint32_t addr_page_base = address & TARGET_PAGE_MASK;
202
+ uint32_t addr_page_limit = addr_page_base + (TARGET_PAGE_SIZE - 1);
203
+
204
+ if (cpu->idau) {
205
+ IDAUInterfaceClass *iic = IDAU_INTERFACE_GET_CLASS(cpu->idau);
206
+ IDAUInterface *ii = IDAU_INTERFACE(cpu->idau);
207
+
208
+ iic->check(ii, address, &idau_region, &idau_exempt, &idau_ns,
209
+ &idau_nsc);
210
+ }
211
+
212
+ if (access_type == MMU_INST_FETCH && extract32(address, 28, 4) == 0xf) {
213
+ /* 0xf0000000..0xffffffff is always S for insn fetches */
214
+ return;
46
+ return;
215
+ }
47
+ }
216
+
48
+
217
+ if (idau_exempt || v8m_is_sau_exempt(env, address, access_type)) {
49
+ /*
218
+ sattrs->ns = !regime_is_secure(env, mmu_idx);
50
+ * Move from mergereg to reg; this sets the high elements and
51
+ * clears the bits above 128 as a side effect.
52
+ */
53
+ tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
54
+ vec_full_reg_offset(s, mergereg),
55
+ 16, vec_full_reg_size(s));
56
+ tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg));
57
+}
58
+
59
+/*
60
+ * Write a single-prec result, but only clear the higher elements
61
+ * of the destination register if FPCR.NEP is 0; otherwise preserve them.
62
+ */
63
+static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg,
64
+ TCGv_i32 v)
65
+{
66
+ if (!s->fpcr_nep) {
67
+ write_fp_sreg(s, reg, v);
219
+ return;
68
+ return;
220
+ }
69
+ }
221
+
70
+
222
+ if (idau_region != IREGION_NOTVALID) {
71
+ tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
223
+ sattrs->irvalid = true;
72
+ vec_full_reg_offset(s, mergereg),
224
+ sattrs->iregion = idau_region;
73
+ 16, vec_full_reg_size(s));
74
+ tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
75
+}
76
+
77
+/*
78
+ * Write a half-prec result, but only clear the higher elements
79
+ * of the destination register if FPCR.NEP is 0; otherwise preserve them.
80
+ * The caller must ensure that the top 16 bits of v are zero.
81
+ */
82
+static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg,
83
+ TCGv_i32 v)
84
+{
85
+ if (!s->fpcr_nep) {
86
+ write_fp_sreg(s, reg, v);
87
+ return;
225
+ }
88
+ }
226
+
89
+
227
+ switch (env->sau.ctrl & 3) {
90
+ tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
228
+ case 0: /* SAU.ENABLE == 0, SAU.ALLNS == 0 */
91
+ vec_full_reg_offset(s, mergereg),
229
+ break;
92
+ 16, vec_full_reg_size(s));
230
+ case 2: /* SAU.ENABLE == 0, SAU.ALLNS == 1 */
93
+ tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
231
+ sattrs->ns = true;
232
+ break;
233
+ default: /* SAU.ENABLE == 1 */
234
+ for (r = 0; r < cpu->sau_sregion; r++) {
235
+ if (env->sau.rlar[r] & 1) {
236
+ uint32_t base = env->sau.rbar[r] & ~0x1f;
237
+ uint32_t limit = env->sau.rlar[r] | 0x1f;
238
+
239
+ if (base <= address && limit >= address) {
240
+ if (base > addr_page_base || limit < addr_page_limit) {
241
+ sattrs->subpage = true;
242
+ }
243
+ if (sattrs->srvalid) {
244
+ /*
245
+ * If we hit in more than one region then we must report
246
+ * as Secure, not NS-Callable, with no valid region
247
+ * number info.
248
+ */
249
+ sattrs->ns = false;
250
+ sattrs->nsc = false;
251
+ sattrs->sregion = 0;
252
+ sattrs->srvalid = false;
253
+ break;
254
+ } else {
255
+ if (env->sau.rlar[r] & 2) {
256
+ sattrs->nsc = true;
257
+ } else {
258
+ sattrs->ns = true;
259
+ }
260
+ sattrs->srvalid = true;
261
+ sattrs->sregion = r;
262
+ }
263
+ } else {
264
+ /*
265
+ * Address not in this region. We must check whether the
266
+ * region covers addresses in the same page as our address.
267
+ * In that case we must not report a size that covers the
268
+ * whole page for a subsequent hit against a different MPU
269
+ * region or the background region, because it would result
270
+ * in incorrect TLB hits for subsequent accesses to
271
+ * addresses that are in this MPU region.
272
+ */
273
+ if (limit >= base &&
274
+ ranges_overlap(base, limit - base + 1,
275
+ addr_page_base,
276
+ TARGET_PAGE_SIZE)) {
277
+ sattrs->subpage = true;
278
+ }
279
+ }
280
+ }
281
+ }
282
+ break;
283
+ }
284
+
285
+ /*
286
+ * The IDAU will override the SAU lookup results if it specifies
287
+ * higher security than the SAU does.
288
+ */
289
+ if (!idau_ns) {
290
+ if (sattrs->ns || (!idau_nsc && sattrs->nsc)) {
291
+ sattrs->ns = false;
292
+ sattrs->nsc = idau_nsc;
293
+ }
294
+ }
295
+}
94
+}
296
+
95
+
297
static bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address,
96
/* Expand a 2-operand AdvSIMD vector operation using an expander function. */
298
MMUAccessType access_type, ARMMMUIdx mmu_idx,
97
static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
299
hwaddr *phys_ptr, MemTxAttrs *txattrs,
98
GVecGen2Fn *gvec_fn, int vece)
99
@@ -XXX,XX +XXX,XX @@ typedef struct FPScalar {
100
} FPScalar;
101
102
static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a,
103
- const FPScalar *f,
104
+ const FPScalar *f, int mergereg,
105
ARMFPStatusFlavour fpsttype)
106
{
107
switch (a->esz) {
108
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a,
109
TCGv_i64 t0 = read_fp_dreg(s, a->rn);
110
TCGv_i64 t1 = read_fp_dreg(s, a->rm);
111
f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype));
112
- write_fp_dreg(s, a->rd, t0);
113
+ write_fp_dreg_merging(s, a->rd, mergereg, t0);
114
}
115
break;
116
case MO_32:
117
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a,
118
TCGv_i32 t0 = read_fp_sreg(s, a->rn);
119
TCGv_i32 t1 = read_fp_sreg(s, a->rm);
120
f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype));
121
- write_fp_sreg(s, a->rd, t0);
122
+ write_fp_sreg_merging(s, a->rd, mergereg, t0);
123
}
124
break;
125
case MO_16:
126
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a,
127
TCGv_i32 t0 = read_fp_hreg(s, a->rn);
128
TCGv_i32 t1 = read_fp_hreg(s, a->rm);
129
f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype));
130
- write_fp_sreg(s, a->rd, t0);
131
+ write_fp_hreg_merging(s, a->rd, mergereg, t0);
132
}
133
break;
134
default:
135
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a,
136
return true;
137
}
138
139
-static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
140
+static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
141
+ int mergereg)
142
{
143
- return do_fp3_scalar_with_fpsttype(s, a, f,
144
+ return do_fp3_scalar_with_fpsttype(s, a, f, mergereg,
145
a->esz == MO_16 ?
146
FPST_A64_F16 : FPST_A64);
147
}
148
149
-static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
150
+static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
151
+ int mergereg)
152
{
153
- return do_fp3_scalar_with_fpsttype(s, a, f, select_ah_fpst(s, a->esz));
154
+ return do_fp3_scalar_with_fpsttype(s, a, f, mergereg,
155
+ select_ah_fpst(s, a->esz));
156
}
157
158
static const FPScalar f_scalar_fadd = {
159
@@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fadd = {
160
gen_helper_vfp_adds,
161
gen_helper_vfp_addd,
162
};
163
-TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd)
164
+TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn)
165
166
static const FPScalar f_scalar_fsub = {
167
gen_helper_vfp_subh,
168
gen_helper_vfp_subs,
169
gen_helper_vfp_subd,
170
};
171
-TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub)
172
+TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn)
173
174
static const FPScalar f_scalar_fdiv = {
175
gen_helper_vfp_divh,
176
gen_helper_vfp_divs,
177
gen_helper_vfp_divd,
178
};
179
-TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv)
180
+TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn)
181
182
static const FPScalar f_scalar_fmul = {
183
gen_helper_vfp_mulh,
184
gen_helper_vfp_muls,
185
gen_helper_vfp_muld,
186
};
187
-TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul)
188
+TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn)
189
190
static const FPScalar f_scalar_fmax = {
191
gen_helper_vfp_maxh,
192
gen_helper_vfp_maxs,
193
gen_helper_vfp_maxd,
194
};
195
-TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax)
196
+TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax, a->rn)
197
198
static const FPScalar f_scalar_fmin = {
199
gen_helper_vfp_minh,
200
gen_helper_vfp_mins,
201
gen_helper_vfp_mind,
202
};
203
-TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin)
204
+TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin, a->rn)
205
206
static const FPScalar f_scalar_fmaxnm = {
207
gen_helper_vfp_maxnumh,
208
gen_helper_vfp_maxnums,
209
gen_helper_vfp_maxnumd,
210
};
211
-TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm)
212
+TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn)
213
214
static const FPScalar f_scalar_fminnm = {
215
gen_helper_vfp_minnumh,
216
gen_helper_vfp_minnums,
217
gen_helper_vfp_minnumd,
218
};
219
-TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm)
220
+TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn)
221
222
static const FPScalar f_scalar_fmulx = {
223
gen_helper_advsimd_mulxh,
224
gen_helper_vfp_mulxs,
225
gen_helper_vfp_mulxd,
226
};
227
-TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx)
228
+TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn)
229
230
static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
231
{
232
@@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fnmul = {
233
gen_fnmul_s,
234
gen_fnmul_d,
235
};
236
-TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul)
237
+TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul, a->rn)
238
239
static const FPScalar f_scalar_fcmeq = {
240
gen_helper_advsimd_ceq_f16,
241
gen_helper_neon_ceq_f32,
242
gen_helper_neon_ceq_f64,
243
};
244
-TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq)
245
+TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm)
246
247
static const FPScalar f_scalar_fcmge = {
248
gen_helper_advsimd_cge_f16,
249
gen_helper_neon_cge_f32,
250
gen_helper_neon_cge_f64,
251
};
252
-TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge)
253
+TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm)
254
255
static const FPScalar f_scalar_fcmgt = {
256
gen_helper_advsimd_cgt_f16,
257
gen_helper_neon_cgt_f32,
258
gen_helper_neon_cgt_f64,
259
};
260
-TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt)
261
+TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm)
262
263
static const FPScalar f_scalar_facge = {
264
gen_helper_advsimd_acge_f16,
265
gen_helper_neon_acge_f32,
266
gen_helper_neon_acge_f64,
267
};
268
-TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge)
269
+TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm)
270
271
static const FPScalar f_scalar_facgt = {
272
gen_helper_advsimd_acgt_f16,
273
gen_helper_neon_acgt_f32,
274
gen_helper_neon_acgt_f64,
275
};
276
-TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt)
277
+TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm)
278
279
static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
280
{
281
@@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fabd = {
282
gen_fabd_s,
283
gen_fabd_d,
284
};
285
-TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd)
286
+TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd, a->rn)
287
288
static const FPScalar f_scalar_frecps = {
289
gen_helper_recpsf_f16,
290
gen_helper_recpsf_f32,
291
gen_helper_recpsf_f64,
292
};
293
-TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps)
294
+TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps, a->rn)
295
296
static const FPScalar f_scalar_frsqrts = {
297
gen_helper_rsqrtsf_f16,
298
gen_helper_rsqrtsf_f32,
299
gen_helper_rsqrtsf_f64,
300
};
301
-TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts)
302
+TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts, a->rn)
303
304
static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
305
const FPScalar *f, bool swap)
300
--
306
--
301
2.25.1
307
2.34.1
diff view generated by jsdifflib
New patch
1
Handle FPCR.NEP for the 3-input scalar operations which use
2
do_fmla_scalar_idx() and do_fmadd(), by making them call the
3
appropriate write_fp_*reg_merging() functions.
1
4
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/arm/tcg/translate-a64.c | 12 ++++++------
9
1 file changed, 6 insertions(+), 6 deletions(-)
10
11
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/tcg/translate-a64.c
14
+++ b/target/arm/tcg/translate-a64.c
15
@@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
16
gen_vfp_negd(t1, t1);
17
}
18
gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
19
- write_fp_dreg(s, a->rd, t0);
20
+ write_fp_dreg_merging(s, a->rd, a->rd, t0);
21
}
22
break;
23
case MO_32:
24
@@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
25
gen_vfp_negs(t1, t1);
26
}
27
gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
28
- write_fp_sreg(s, a->rd, t0);
29
+ write_fp_sreg_merging(s, a->rd, a->rd, t0);
30
}
31
break;
32
case MO_16:
33
@@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
34
}
35
gen_helper_advsimd_muladdh(t0, t1, t2, t0,
36
fpstatus_ptr(FPST_A64_F16));
37
- write_fp_sreg(s, a->rd, t0);
38
+ write_fp_hreg_merging(s, a->rd, a->rd, t0);
39
}
40
break;
41
default:
42
@@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
43
}
44
fpst = fpstatus_ptr(FPST_A64);
45
gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
46
- write_fp_dreg(s, a->rd, ta);
47
+ write_fp_dreg_merging(s, a->rd, a->ra, ta);
48
}
49
break;
50
51
@@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
52
}
53
fpst = fpstatus_ptr(FPST_A64);
54
gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
55
- write_fp_sreg(s, a->rd, ta);
56
+ write_fp_sreg_merging(s, a->rd, a->ra, ta);
57
}
58
break;
59
60
@@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
61
}
62
fpst = fpstatus_ptr(FPST_A64_F16);
63
gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
64
- write_fp_sreg(s, a->rd, ta);
65
+ write_fp_hreg_merging(s, a->rd, a->ra, ta);
66
}
67
break;
68
69
--
70
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Currently we implement BFCVT scalar via do_fp1_scalar(). This works
2
even though BFCVT is a narrowing operation from 32 to 16 bits,
3
because we can use write_fp_sreg() for float16. However, FPCR.NEP
4
support requires that we use write_fp_hreg_merging() for float16
5
outputs, so we can't continue to borrow the non-narrowing
6
do_fp1_scalar() function for this. Split out trans_BFCVT_s()
7
into its own implementation that honours FPCR.NEP.
2
8
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220604040607.269301-21-richard.henderson@linaro.org
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
11
---
8
target/arm/ptw.h | 2 --
12
target/arm/tcg/translate-a64.c | 25 +++++++++++++++++++++----
9
target/arm/helper.c | 70 ---------------------------------------------
13
1 file changed, 21 insertions(+), 4 deletions(-)
10
target/arm/ptw.c | 70 +++++++++++++++++++++++++++++++++++++++++++++
11
3 files changed, 70 insertions(+), 72 deletions(-)
12
14
13
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
15
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
14
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/ptw.h
17
--- a/target/arm/tcg/translate-a64.c
16
+++ b/target/arm/ptw.h
18
+++ b/target/arm/tcg/translate-a64.c
17
@@ -XXX,XX +XXX,XX @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap)
19
@@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frintx = {
18
20
};
19
ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
21
TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1)
20
ARMMMUIdx mmu_idx);
22
21
-bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level,
23
-static const FPScalar1 f_scalar_bfcvt = {
22
- int inputsize, int stride, int outputsize);
24
- .gen_s = gen_helper_bfcvt,
23
25
-};
24
#endif /* !CONFIG_USER_ONLY */
26
-TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar_ah, a, &f_scalar_bfcvt, -1)
25
#endif /* TARGET_ARM_PTW_H */
27
+static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a)
26
diff --git a/target/arm/helper.c b/target/arm/helper.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/arm/helper.c
29
+++ b/target/arm/helper.c
30
@@ -XXX,XX +XXX,XX @@ int simple_ap_to_rw_prot_is_user(int ap, bool is_user)
31
g_assert_not_reached();
32
}
33
}
34
-
35
-/*
36
- * check_s2_mmu_setup
37
- * @cpu: ARMCPU
38
- * @is_aa64: True if the translation regime is in AArch64 state
39
- * @startlevel: Suggested starting level
40
- * @inputsize: Bitsize of IPAs
41
- * @stride: Page-table stride (See the ARM ARM)
42
- *
43
- * Returns true if the suggested S2 translation parameters are OK and
44
- * false otherwise.
45
- */
46
-bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level,
47
- int inputsize, int stride, int outputsize)
48
-{
49
- const int grainsize = stride + 3;
50
- int startsizecheck;
51
-
52
- /*
53
- * Negative levels are usually not allowed...
54
- * Except for FEAT_LPA2, 4k page table, 52-bit address space, which
55
- * begins with level -1. Note that previous feature tests will have
56
- * eliminated this combination if it is not enabled.
57
- */
58
- if (level < (inputsize == 52 && stride == 9 ? -1 : 0)) {
59
- return false;
60
- }
61
-
62
- startsizecheck = inputsize - ((3 - level) * stride + grainsize);
63
- if (startsizecheck < 1 || startsizecheck > stride + 4) {
64
- return false;
65
- }
66
-
67
- if (is_aa64) {
68
- switch (stride) {
69
- case 13: /* 64KB Pages. */
70
- if (level == 0 || (level == 1 && outputsize <= 42)) {
71
- return false;
72
- }
73
- break;
74
- case 11: /* 16KB Pages. */
75
- if (level == 0 || (level == 1 && outputsize <= 40)) {
76
- return false;
77
- }
78
- break;
79
- case 9: /* 4KB Pages. */
80
- if (level == 0 && outputsize <= 42) {
81
- return false;
82
- }
83
- break;
84
- default:
85
- g_assert_not_reached();
86
- }
87
-
88
- /* Inputsize checks. */
89
- if (inputsize > outputsize &&
90
- (arm_el_is_aa64(&cpu->env, 1) || inputsize > 40)) {
91
- /* This is CONSTRAINED UNPREDICTABLE and we choose to fault. */
92
- return false;
93
- }
94
- } else {
95
- /* AArch32 only supports 4KB pages. Assert on that. */
96
- assert(stride == 9);
97
-
98
- if (level == 0) {
99
- return false;
100
- }
101
- }
102
- return true;
103
-}
104
#endif /* !CONFIG_USER_ONLY */
105
106
int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx)
107
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
108
index XXXXXXX..XXXXXXX 100644
109
--- a/target/arm/ptw.c
110
+++ b/target/arm/ptw.c
111
@@ -XXX,XX +XXX,XX @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64,
112
return prot_rw | PAGE_EXEC;
113
}
114
115
+/*
116
+ * check_s2_mmu_setup
117
+ * @cpu: ARMCPU
118
+ * @is_aa64: True if the translation regime is in AArch64 state
119
+ * @startlevel: Suggested starting level
120
+ * @inputsize: Bitsize of IPAs
121
+ * @stride: Page-table stride (See the ARM ARM)
122
+ *
123
+ * Returns true if the suggested S2 translation parameters are OK and
124
+ * false otherwise.
125
+ */
126
+static bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level,
127
+ int inputsize, int stride, int outputsize)
128
+{
28
+{
129
+ const int grainsize = stride + 3;
29
+ ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64;
130
+ int startsizecheck;
30
+ TCGv_i32 t32;
31
+ int check;
131
+
32
+
132
+ /*
33
+ if (!dc_isar_feature(aa64_bf16, s)) {
133
+ * Negative levels are usually not allowed...
134
+ * Except for FEAT_LPA2, 4k page table, 52-bit address space, which
135
+ * begins with level -1. Note that previous feature tests will have
136
+ * eliminated this combination if it is not enabled.
137
+ */
138
+ if (level < (inputsize == 52 && stride == 9 ? -1 : 0)) {
139
+ return false;
34
+ return false;
140
+ }
35
+ }
141
+
36
+
142
+ startsizecheck = inputsize - ((3 - level) * stride + grainsize);
37
+ check = fp_access_check_scalar_hsd(s, a->esz);
143
+ if (startsizecheck < 1 || startsizecheck > stride + 4) {
38
+
144
+ return false;
39
+ if (check <= 0) {
40
+ return check == 0;
145
+ }
41
+ }
146
+
42
+
147
+ if (is_aa64) {
43
+ t32 = read_fp_sreg(s, a->rn);
148
+ switch (stride) {
44
+ gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype));
149
+ case 13: /* 64KB Pages. */
45
+ write_fp_hreg_merging(s, a->rd, a->rd, t32);
150
+ if (level == 0 || (level == 1 && outputsize <= 42)) {
151
+ return false;
152
+ }
153
+ break;
154
+ case 11: /* 16KB Pages. */
155
+ if (level == 0 || (level == 1 && outputsize <= 40)) {
156
+ return false;
157
+ }
158
+ break;
159
+ case 9: /* 4KB Pages. */
160
+ if (level == 0 && outputsize <= 42) {
161
+ return false;
162
+ }
163
+ break;
164
+ default:
165
+ g_assert_not_reached();
166
+ }
167
+
168
+ /* Inputsize checks. */
169
+ if (inputsize > outputsize &&
170
+ (arm_el_is_aa64(&cpu->env, 1) || inputsize > 40)) {
171
+ /* This is CONSTRAINED UNPREDICTABLE and we choose to fault. */
172
+ return false;
173
+ }
174
+ } else {
175
+ /* AArch32 only supports 4KB pages. Assert on that. */
176
+ assert(stride == 9);
177
+
178
+ if (level == 0) {
179
+ return false;
180
+ }
181
+ }
182
+ return true;
46
+ return true;
183
+}
47
+}
184
+
48
185
/**
49
static const FPScalar1 f_scalar_frint32 = {
186
* get_phys_addr_lpae: perform one stage of page table walk, LPAE format
50
NULL,
187
*
188
--
51
--
189
2.25.1
52
2.34.1
diff view generated by jsdifflib
New patch
1
Handle FPCR.NEP for the 1-input scalar operations.
1
2
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/arm/tcg/translate-a64.c | 26 ++++++++++++++------------
7
1 file changed, 14 insertions(+), 12 deletions(-)
8
9
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/target/arm/tcg/translate-a64.c
12
+++ b/target/arm/tcg/translate-a64.c
13
@@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a,
14
case MO_64:
15
t64 = read_fp_dreg(s, a->rn);
16
f->gen_d(t64, t64, fpst);
17
- write_fp_dreg(s, a->rd, t64);
18
+ write_fp_dreg_merging(s, a->rd, a->rd, t64);
19
break;
20
case MO_32:
21
t32 = read_fp_sreg(s, a->rn);
22
f->gen_s(t32, t32, fpst);
23
- write_fp_sreg(s, a->rd, t32);
24
+ write_fp_sreg_merging(s, a->rd, a->rd, t32);
25
break;
26
case MO_16:
27
t32 = read_fp_hreg(s, a->rn);
28
f->gen_h(t32, t32, fpst);
29
- write_fp_sreg(s, a->rd, t32);
30
+ write_fp_hreg_merging(s, a->rd, a->rd, t32);
31
break;
32
default:
33
g_assert_not_reached();
34
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a)
35
TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
36
37
gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst);
38
- write_fp_dreg(s, a->rd, tcg_rd);
39
+ write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
40
}
41
return true;
42
}
43
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a)
44
TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
45
46
gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
47
- /* write_fp_sreg is OK here because top half of result is zero */
48
- write_fp_sreg(s, a->rd, tmp);
49
+ /* write_fp_hreg_merging is OK here because top half of result is zero */
50
+ write_fp_hreg_merging(s, a->rd, a->rd, tmp);
51
}
52
return true;
53
}
54
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a)
55
TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
56
57
gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst);
58
- write_fp_sreg(s, a->rd, tcg_rd);
59
+ write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
60
}
61
return true;
62
}
63
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a)
64
TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
65
66
gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
67
- /* write_fp_sreg is OK here because top half of tcg_rd is zero */
68
- write_fp_sreg(s, a->rd, tcg_rd);
69
+ /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */
70
+ write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd);
71
}
72
return true;
73
}
74
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a)
75
TCGv_i32 tcg_ahp = get_ahp_flag();
76
77
gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
78
- write_fp_sreg(s, a->rd, tcg_rd);
79
+ write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
80
}
81
return true;
82
}
83
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a)
84
TCGv_i32 tcg_ahp = get_ahp_flag();
85
86
gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
87
- write_fp_dreg(s, a->rd, tcg_rd);
88
+ write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
89
}
90
return true;
91
}
92
@@ -XXX,XX +XXX,XX @@ static bool do_fcvt_f(DisasContext *s, arg_fcvt *a,
93
do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0),
94
a->esz, tcg_int, a->shift, a->rn, rmode);
95
96
- clear_vec(s, a->rd);
97
+ if (!s->fpcr_nep) {
98
+ clear_vec(s, a->rd);
99
+ }
100
write_vec_element(s, tcg_int, a->rd, 0, a->esz);
101
return true;
102
}
103
--
104
2.34.1
diff view generated by jsdifflib
New patch
1
Handle FPCR.NEP in the operations handled by do_cvtf_scalar().
1
2
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/arm/tcg/translate-a64.c | 6 +++---
7
1 file changed, 3 insertions(+), 3 deletions(-)
8
9
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/target/arm/tcg/translate-a64.c
12
+++ b/target/arm/tcg/translate-a64.c
13
@@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
14
} else {
15
gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
16
}
17
- write_fp_dreg(s, rd, tcg_double);
18
+ write_fp_dreg_merging(s, rd, rd, tcg_double);
19
break;
20
21
case MO_32:
22
@@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
23
} else {
24
gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
25
}
26
- write_fp_sreg(s, rd, tcg_single);
27
+ write_fp_sreg_merging(s, rd, rd, tcg_single);
28
break;
29
30
case MO_16:
31
@@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
32
} else {
33
gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
34
}
35
- write_fp_sreg(s, rd, tcg_single);
36
+ write_fp_hreg_merging(s, rd, rd, tcg_single);
37
break;
38
39
default:
40
--
41
2.34.1
diff view generated by jsdifflib
New patch
1
Handle FPCR.NEP merging for scalar FABS and FNEG; this requires
2
an extra parameter to do_fp1_scalar_int(), since FMOV scalar
3
does not have the merging behaviour.
1
4
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/arm/tcg/translate-a64.c | 27 ++++++++++++++++++++-------
9
1 file changed, 20 insertions(+), 7 deletions(-)
10
11
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/tcg/translate-a64.c
14
+++ b/target/arm/tcg/translate-a64.c
15
@@ -XXX,XX +XXX,XX @@ typedef struct FPScalar1Int {
16
} FPScalar1Int;
17
18
static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a,
19
- const FPScalar1Int *f)
20
+ const FPScalar1Int *f,
21
+ bool merging)
22
{
23
switch (a->esz) {
24
case MO_64:
25
if (fp_access_check(s)) {
26
TCGv_i64 t = read_fp_dreg(s, a->rn);
27
f->gen_d(t, t);
28
- write_fp_dreg(s, a->rd, t);
29
+ if (merging) {
30
+ write_fp_dreg_merging(s, a->rd, a->rd, t);
31
+ } else {
32
+ write_fp_dreg(s, a->rd, t);
33
+ }
34
}
35
break;
36
case MO_32:
37
if (fp_access_check(s)) {
38
TCGv_i32 t = read_fp_sreg(s, a->rn);
39
f->gen_s(t, t);
40
- write_fp_sreg(s, a->rd, t);
41
+ if (merging) {
42
+ write_fp_sreg_merging(s, a->rd, a->rd, t);
43
+ } else {
44
+ write_fp_sreg(s, a->rd, t);
45
+ }
46
}
47
break;
48
case MO_16:
49
@@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a,
50
if (fp_access_check(s)) {
51
TCGv_i32 t = read_fp_hreg(s, a->rn);
52
f->gen_h(t, t);
53
- write_fp_sreg(s, a->rd, t);
54
+ if (merging) {
55
+ write_fp_hreg_merging(s, a->rd, a->rd, t);
56
+ } else {
57
+ write_fp_sreg(s, a->rd, t);
58
+ }
59
}
60
break;
61
default:
62
@@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fmov = {
63
tcg_gen_mov_i32,
64
tcg_gen_mov_i64,
65
};
66
-TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov)
67
+TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false)
68
69
static const FPScalar1Int f_scalar_fabs = {
70
gen_vfp_absh,
71
gen_vfp_abss,
72
gen_vfp_absd,
73
};
74
-TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs)
75
+TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs, true)
76
77
static const FPScalar1Int f_scalar_fneg = {
78
gen_vfp_negh,
79
gen_vfp_negs,
80
gen_vfp_negd,
81
};
82
-TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg)
83
+TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg, true)
84
85
typedef struct FPScalar1 {
86
void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr);
87
--
88
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Unlike the other users of do_2misc_narrow_scalar(), FCVTXN (scalar)
2
is always double-to-single and must honour FPCR.NEP. Implement this
3
directly in a trans function rather than using
4
do_2misc_narrow_scalar().
2
5
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
We still need gen_fcvtxn_sd() and the f_scalar_fcvtxn[] array for
4
Message-id: 20220604040607.269301-9-richard.henderson@linaro.org
7
the FCVTXN (vector) insn, so we move those down in the file to
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
where they are used.
9
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
12
---
8
target/arm/ptw.h | 5 ---
13
target/arm/tcg/translate-a64.c | 43 ++++++++++++++++++++++------------
9
target/arm/helper.c | 75 -------------------------------------------
14
1 file changed, 28 insertions(+), 15 deletions(-)
10
target/arm/ptw.c | 77 +++++++++++++++++++++++++++++++++++++++++++++
11
3 files changed, 77 insertions(+), 80 deletions(-)
12
15
13
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
16
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
14
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/ptw.h
18
--- a/target/arm/tcg/translate-a64.c
16
+++ b/target/arm/ptw.h
19
+++ b/target/arm/tcg/translate-a64.c
17
@@ -XXX,XX +XXX,XX @@ void get_phys_addr_pmsav7_default(CPUARMState *env,
20
@@ -XXX,XX +XXX,XX @@ static ArithOneOp * const f_scalar_uqxtn[] = {
18
int32_t address, int *prot);
21
};
19
bool pmsav7_use_background_region(ARMCPU *cpu, ARMMMUIdx mmu_idx, bool is_user);
22
TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn)
20
23
21
-bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address,
24
-static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n)
22
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
25
+static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a)
23
- hwaddr *phys_ptr, MemTxAttrs *txattrs,
26
{
24
- int *prot, target_ulong *page_size,
27
- /*
25
- ARMMMUFaultInfo *fi);
28
- * 64 bit to 32 bit float conversion
26
bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
29
- * with von Neumann rounding (round to odd)
27
MMUAccessType access_type, ARMMMUIdx mmu_idx,
30
- */
28
bool s1_is_el0,
31
- TCGv_i32 tmp = tcg_temp_new_i32();
29
diff --git a/target/arm/helper.c b/target/arm/helper.c
32
- gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64));
30
index XXXXXXX..XXXXXXX 100644
33
- tcg_gen_extu_i32_i64(d, tmp);
31
--- a/target/arm/helper.c
34
+ if (fp_access_check(s)) {
32
+++ b/target/arm/helper.c
35
+ /*
33
@@ -XXX,XX +XXX,XX @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address,
36
+ * 64 bit to 32 bit float conversion
34
return !(*prot & (1 << access_type));
37
+ * with von Neumann rounding (round to odd)
38
+ */
39
+ TCGv_i64 src = read_fp_dreg(s, a->rn);
40
+ TCGv_i32 dst = tcg_temp_new_i32();
41
+ gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64));
42
+ write_fp_sreg_merging(s, a->rd, a->rd, dst);
43
+ }
44
+ return true;
35
}
45
}
36
46
47
-static ArithOneOp * const f_scalar_fcvtxn[] = {
48
- NULL,
49
- NULL,
50
- gen_fcvtxn_sd,
51
-};
52
-TRANS(FCVTXN_s, do_2misc_narrow_scalar, a, f_scalar_fcvtxn)
37
-
53
-
38
-bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address,
54
#undef WRAP_ENV
39
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
55
40
- hwaddr *phys_ptr, MemTxAttrs *txattrs,
56
static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
41
- int *prot, target_ulong *page_size,
57
@@ -XXX,XX +XXX,XX @@ static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n)
42
- ARMMMUFaultInfo *fi)
58
tcg_gen_extu_i32_i64(d, tmp);
43
-{
44
- uint32_t secure = regime_is_secure(env, mmu_idx);
45
- V8M_SAttributes sattrs = {};
46
- bool ret;
47
- bool mpu_is_subpage;
48
-
49
- if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
50
- v8m_security_lookup(env, address, access_type, mmu_idx, &sattrs);
51
- if (access_type == MMU_INST_FETCH) {
52
- /* Instruction fetches always use the MMU bank and the
53
- * transaction attribute determined by the fetch address,
54
- * regardless of CPU state. This is painful for QEMU
55
- * to handle, because it would mean we need to encode
56
- * into the mmu_idx not just the (user, negpri) information
57
- * for the current security state but also that for the
58
- * other security state, which would balloon the number
59
- * of mmu_idx values needed alarmingly.
60
- * Fortunately we can avoid this because it's not actually
61
- * possible to arbitrarily execute code from memory with
62
- * the wrong security attribute: it will always generate
63
- * an exception of some kind or another, apart from the
64
- * special case of an NS CPU executing an SG instruction
65
- * in S&NSC memory. So we always just fail the translation
66
- * here and sort things out in the exception handler
67
- * (including possibly emulating an SG instruction).
68
- */
69
- if (sattrs.ns != !secure) {
70
- if (sattrs.nsc) {
71
- fi->type = ARMFault_QEMU_NSCExec;
72
- } else {
73
- fi->type = ARMFault_QEMU_SFault;
74
- }
75
- *page_size = sattrs.subpage ? 1 : TARGET_PAGE_SIZE;
76
- *phys_ptr = address;
77
- *prot = 0;
78
- return true;
79
- }
80
- } else {
81
- /* For data accesses we always use the MMU bank indicated
82
- * by the current CPU state, but the security attributes
83
- * might downgrade a secure access to nonsecure.
84
- */
85
- if (sattrs.ns) {
86
- txattrs->secure = false;
87
- } else if (!secure) {
88
- /* NS access to S memory must fault.
89
- * Architecturally we should first check whether the
90
- * MPU information for this address indicates that we
91
- * are doing an unaligned access to Device memory, which
92
- * should generate a UsageFault instead. QEMU does not
93
- * currently check for that kind of unaligned access though.
94
- * If we added it we would need to do so as a special case
95
- * for M_FAKE_FSR_SFAULT in arm_v7m_cpu_do_interrupt().
96
- */
97
- fi->type = ARMFault_QEMU_SFault;
98
- *page_size = sattrs.subpage ? 1 : TARGET_PAGE_SIZE;
99
- *phys_ptr = address;
100
- *prot = 0;
101
- return true;
102
- }
103
- }
104
- }
105
-
106
- ret = pmsav8_mpu_lookup(env, address, access_type, mmu_idx, phys_ptr,
107
- txattrs, prot, &mpu_is_subpage, fi, NULL);
108
- *page_size = sattrs.subpage || mpu_is_subpage ? 1 : TARGET_PAGE_SIZE;
109
- return ret;
110
-}
111
-
112
/* Combine either inner or outer cacheability attributes for normal
113
* memory, according to table D4-42 and pseudocode procedure
114
* CombineS1S2AttrHints() of ARM DDI 0487B.b (the ARMv8 ARM).
115
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
116
index XXXXXXX..XXXXXXX 100644
117
--- a/target/arm/ptw.c
118
+++ b/target/arm/ptw.c
119
@@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address,
120
return !(*prot & (1 << access_type));
121
}
59
}
122
60
123
+static bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address,
61
+static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n)
124
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
125
+ hwaddr *phys_ptr, MemTxAttrs *txattrs,
126
+ int *prot, target_ulong *page_size,
127
+ ARMMMUFaultInfo *fi)
128
+{
62
+{
129
+ uint32_t secure = regime_is_secure(env, mmu_idx);
63
+ /*
130
+ V8M_SAttributes sattrs = {};
64
+ * 64 bit to 32 bit float conversion
131
+ bool ret;
65
+ * with von Neumann rounding (round to odd)
132
+ bool mpu_is_subpage;
66
+ */
133
+
67
+ TCGv_i32 tmp = tcg_temp_new_i32();
134
+ if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
68
+ gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64));
135
+ v8m_security_lookup(env, address, access_type, mmu_idx, &sattrs);
69
+ tcg_gen_extu_i32_i64(d, tmp);
136
+ if (access_type == MMU_INST_FETCH) {
137
+ /*
138
+ * Instruction fetches always use the MMU bank and the
139
+ * transaction attribute determined by the fetch address,
140
+ * regardless of CPU state. This is painful for QEMU
141
+ * to handle, because it would mean we need to encode
142
+ * into the mmu_idx not just the (user, negpri) information
143
+ * for the current security state but also that for the
144
+ * other security state, which would balloon the number
145
+ * of mmu_idx values needed alarmingly.
146
+ * Fortunately we can avoid this because it's not actually
147
+ * possible to arbitrarily execute code from memory with
148
+ * the wrong security attribute: it will always generate
149
+ * an exception of some kind or another, apart from the
150
+ * special case of an NS CPU executing an SG instruction
151
+ * in S&NSC memory. So we always just fail the translation
152
+ * here and sort things out in the exception handler
153
+ * (including possibly emulating an SG instruction).
154
+ */
155
+ if (sattrs.ns != !secure) {
156
+ if (sattrs.nsc) {
157
+ fi->type = ARMFault_QEMU_NSCExec;
158
+ } else {
159
+ fi->type = ARMFault_QEMU_SFault;
160
+ }
161
+ *page_size = sattrs.subpage ? 1 : TARGET_PAGE_SIZE;
162
+ *phys_ptr = address;
163
+ *prot = 0;
164
+ return true;
165
+ }
166
+ } else {
167
+ /*
168
+ * For data accesses we always use the MMU bank indicated
169
+ * by the current CPU state, but the security attributes
170
+ * might downgrade a secure access to nonsecure.
171
+ */
172
+ if (sattrs.ns) {
173
+ txattrs->secure = false;
174
+ } else if (!secure) {
175
+ /*
176
+ * NS access to S memory must fault.
177
+ * Architecturally we should first check whether the
178
+ * MPU information for this address indicates that we
179
+ * are doing an unaligned access to Device memory, which
180
+ * should generate a UsageFault instead. QEMU does not
181
+ * currently check for that kind of unaligned access though.
182
+ * If we added it we would need to do so as a special case
183
+ * for M_FAKE_FSR_SFAULT in arm_v7m_cpu_do_interrupt().
184
+ */
185
+ fi->type = ARMFault_QEMU_SFault;
186
+ *page_size = sattrs.subpage ? 1 : TARGET_PAGE_SIZE;
187
+ *phys_ptr = address;
188
+ *prot = 0;
189
+ return true;
190
+ }
191
+ }
192
+ }
193
+
194
+ ret = pmsav8_mpu_lookup(env, address, access_type, mmu_idx, phys_ptr,
195
+ txattrs, prot, &mpu_is_subpage, fi, NULL);
196
+ *page_size = sattrs.subpage || mpu_is_subpage ? 1 : TARGET_PAGE_SIZE;
197
+ return ret;
198
+}
70
+}
199
+
71
+
200
/**
72
static ArithOneOp * const f_vector_fcvtn[] = {
201
* get_phys_addr - get the physical address for this virtual address
73
NULL,
202
*
74
gen_fcvtn_hs,
75
gen_fcvtn_sd,
76
};
77
+static ArithOneOp * const f_scalar_fcvtxn[] = {
78
+ NULL,
79
+ NULL,
80
+ gen_fcvtxn_sd,
81
+};
82
TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn)
83
TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn)
84
203
--
85
--
204
2.25.1
86
2.34.1
diff view generated by jsdifflib
1
From: Sai Pavan Boddu <sai.pavan.boddu@xilinx.com>
1
do_fp3_scalar_idx() is used only for the FMUL and FMULX scalar by
2
element instructions; these both need to merge the result with the Rn
3
register when FPCR.NEP is set.
2
4
3
Fix interrupt disable logic. Mask value 1 indicates that interrupts are
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
disabled.
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/arm/tcg/translate-a64.c | 6 +++---
9
1 file changed, 3 insertions(+), 3 deletions(-)
5
10
6
Signed-off-by: Sai Pavan Boddu <saipava@xilinx.com>
11
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
7
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
8
Signed-off-by: Frederic Konrad <fkonrad@amd.com>
9
Acked-by: Alistair Francis <alistair.francis@wdc.com>
10
Message-id: 20220601172353.3220232-4-fkonrad@xilinx.com
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
13
hw/display/xlnx_dp.c | 2 +-
14
1 file changed, 1 insertion(+), 1 deletion(-)
15
16
diff --git a/hw/display/xlnx_dp.c b/hw/display/xlnx_dp.c
17
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/display/xlnx_dp.c
13
--- a/target/arm/tcg/translate-a64.c
19
+++ b/hw/display/xlnx_dp.c
14
+++ b/target/arm/tcg/translate-a64.c
20
@@ -XXX,XX +XXX,XX @@ static void xlnx_dp_write(void *opaque, hwaddr offset, uint64_t value,
15
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
21
xlnx_dp_update_irq(s);
16
17
read_vec_element(s, t1, a->rm, a->idx, MO_64);
18
f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
19
- write_fp_dreg(s, a->rd, t0);
20
+ write_fp_dreg_merging(s, a->rd, a->rn, t0);
21
}
22
break;
22
break;
23
case DP_INT_DS:
23
case MO_32:
24
- s->core_registers[DP_INT_MASK] |= ~value;
24
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
25
+ s->core_registers[DP_INT_MASK] |= value;
25
26
xlnx_dp_update_irq(s);
26
read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
27
f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
28
- write_fp_sreg(s, a->rd, t0);
29
+ write_fp_sreg_merging(s, a->rd, a->rn, t0);
30
}
31
break;
32
case MO_16:
33
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
34
35
read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
36
f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
37
- write_fp_sreg(s, a->rd, t0);
38
+ write_fp_hreg_merging(s, a->rd, a->rn, t0);
39
}
27
break;
40
break;
28
default:
41
default:
29
--
42
--
30
2.25.1
43
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
When FPCR.AH == 1, floating point FMIN and FMAX have some odd special
2
cases:
2
3
3
This (newish) ARM pseudocode function is easier to work with
4
* comparing two zeroes (even of different sign) or comparing a NaN
4
than open-coded tests for HCR_E2H etc. Use of the function
5
with anything always returns the second argument (possibly
5
will be staged into the code base in parts.
6
squashed to zero)
7
* denormal outputs are not squashed to zero regardless of FZ or FZ16
6
8
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Implement these semantics in new helper functions and select them at
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
translate time if FPCR.AH is 1 for the scalar FMAX and FMIN insns.
9
Message-id: 20220607203306.657998-6-richard.henderson@linaro.org
11
(We will convert the other FMAX and FMIN insns in subsequent
12
commits.)
13
14
Note that FMINNM and FMAXNM are not affected.
15
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
---
18
---
12
target/arm/internals.h | 2 ++
19
target/arm/tcg/helper-a64.h | 7 +++++++
13
target/arm/helper.c | 28 ++++++++++++++++++++++++++++
20
target/arm/tcg/helper-a64.c | 36 ++++++++++++++++++++++++++++++++++
14
2 files changed, 30 insertions(+)
21
target/arm/tcg/translate-a64.c | 23 ++++++++++++++++++++--
22
3 files changed, 64 insertions(+), 2 deletions(-)
15
23
16
diff --git a/target/arm/internals.h b/target/arm/internals.h
24
diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h
17
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/internals.h
26
--- a/target/arm/tcg/helper-a64.h
19
+++ b/target/arm/internals.h
27
+++ b/target/arm/tcg/helper-a64.h
20
@@ -XXX,XX +XXX,XX @@ static inline void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) { }
28
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(advsimd_muladd2h, i32, i32, i32, i32, fpst)
21
void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu);
29
DEF_HELPER_2(advsimd_rinth_exact, f16, f16, fpst)
22
#endif
30
DEF_HELPER_2(advsimd_rinth, f16, f16, fpst)
23
31
24
+bool el_is_in_host(CPUARMState *env, int el);
32
+DEF_HELPER_3(vfp_ah_minh, f16, f16, f16, fpst)
33
+DEF_HELPER_3(vfp_ah_mins, f32, f32, f32, fpst)
34
+DEF_HELPER_3(vfp_ah_mind, f64, f64, f64, fpst)
35
+DEF_HELPER_3(vfp_ah_maxh, f16, f16, f16, fpst)
36
+DEF_HELPER_3(vfp_ah_maxs, f32, f32, f32, fpst)
37
+DEF_HELPER_3(vfp_ah_maxd, f64, f64, f64, fpst)
25
+
38
+
26
void aa32_max_features(ARMCPU *cpu);
39
DEF_HELPER_2(exception_return, void, env, i64)
27
40
DEF_HELPER_FLAGS_2(dc_zva, TCG_CALL_NO_WG, void, env, i64)
28
#endif
41
29
diff --git a/target/arm/helper.c b/target/arm/helper.c
42
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
30
index XXXXXXX..XXXXXXX 100644
43
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/helper.c
44
--- a/target/arm/tcg/helper-a64.c
32
+++ b/target/arm/helper.c
45
+++ b/target/arm/tcg/helper-a64.c
33
@@ -XXX,XX +XXX,XX @@ uint64_t arm_hcr_el2_eff(CPUARMState *env)
46
@@ -XXX,XX +XXX,XX @@ float32 HELPER(fcvtx_f64_to_f32)(float64 a, float_status *fpst)
34
return ret;
47
return r;
35
}
48
}
36
49
37
+/*
50
+/*
38
+ * Corresponds to ARM pseudocode function ELIsInHost().
51
+ * AH=1 min/max have some odd special cases:
52
+ * comparing two zeroes (regardless of sign), (NaN, anything),
53
+ * or (anything, NaN) should return the second argument (possibly
54
+ * squashed to zero).
55
+ * Also, denormal outputs are not squashed to zero regardless of FZ or FZ16.
39
+ */
56
+ */
40
+bool el_is_in_host(CPUARMState *env, int el)
57
+#define AH_MINMAX_HELPER(NAME, CTYPE, FLOATTYPE, MINMAX) \
41
+{
58
+ CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \
42
+ uint64_t mask;
59
+ { \
43
+
60
+ bool save; \
44
+ /*
61
+ CTYPE r; \
45
+ * Since we only care about E2H and TGE, we can skip arm_hcr_el2_eff().
62
+ a = FLOATTYPE ## _squash_input_denormal(a, fpst); \
46
+ * Perform the simplest bit tests first, and validate EL2 afterward.
63
+ b = FLOATTYPE ## _squash_input_denormal(b, fpst); \
47
+ */
64
+ if (FLOATTYPE ## _is_zero(a) && FLOATTYPE ## _is_zero(b)) { \
48
+ if (el & 1) {
65
+ return b; \
49
+ return false; /* EL1 or EL3 */
66
+ } \
67
+ if (FLOATTYPE ## _is_any_nan(a) || \
68
+ FLOATTYPE ## _is_any_nan(b)) { \
69
+ float_raise(float_flag_invalid, fpst); \
70
+ return b; \
71
+ } \
72
+ save = get_flush_to_zero(fpst); \
73
+ set_flush_to_zero(false, fpst); \
74
+ r = FLOATTYPE ## _ ## MINMAX(a, b, fpst); \
75
+ set_flush_to_zero(save, fpst); \
76
+ return r; \
50
+ }
77
+ }
51
+
78
+
52
+ /*
79
+AH_MINMAX_HELPER(vfp_ah_minh, dh_ctype_f16, float16, min)
53
+ * Note that hcr_write() checks isar_feature_aa64_vh(),
80
+AH_MINMAX_HELPER(vfp_ah_mins, float32, float32, min)
54
+ * aka HaveVirtHostExt(), in allowing HCR_E2H to be set.
81
+AH_MINMAX_HELPER(vfp_ah_mind, float64, float64, min)
55
+ */
82
+AH_MINMAX_HELPER(vfp_ah_maxh, dh_ctype_f16, float16, max)
56
+ mask = el ? HCR_E2H : HCR_E2H | HCR_TGE;
83
+AH_MINMAX_HELPER(vfp_ah_maxs, float32, float32, max)
57
+ if ((env->cp15.hcr_el2 & mask) != mask) {
84
+AH_MINMAX_HELPER(vfp_ah_maxd, float64, float64, max)
58
+ return false;
59
+ }
60
+
85
+
61
+ /* TGE and/or E2H set: double check those bits are currently legal. */
86
/* 64-bit versions of the CRC helpers. Note that although the operation
62
+ return arm_is_el2_enabled(env) && arm_el_is_aa64(env, 2);
87
* (and the prototypes of crc32c() and crc32() mean that only the bottom
88
* 32 bits of the accumulator and result are used, we pass and return
89
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
90
index XXXXXXX..XXXXXXX 100644
91
--- a/target/arm/tcg/translate-a64.c
92
+++ b/target/arm/tcg/translate-a64.c
93
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
94
select_ah_fpst(s, a->esz));
95
}
96
97
+/* Some insns need to call different helpers when FPCR.AH == 1 */
98
+static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a,
99
+ const FPScalar *fnormal,
100
+ const FPScalar *fah,
101
+ int mergereg)
102
+{
103
+ return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg);
63
+}
104
+}
64
+
105
+
65
static void hcrx_write(CPUARMState *env, const ARMCPRegInfo *ri,
106
static const FPScalar f_scalar_fadd = {
66
uint64_t value)
107
gen_helper_vfp_addh,
67
{
108
gen_helper_vfp_adds,
109
@@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fmax = {
110
gen_helper_vfp_maxs,
111
gen_helper_vfp_maxd,
112
};
113
-TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax, a->rn)
114
+static const FPScalar f_scalar_fmax_ah = {
115
+ gen_helper_vfp_ah_maxh,
116
+ gen_helper_vfp_ah_maxs,
117
+ gen_helper_vfp_ah_maxd,
118
+};
119
+TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn)
120
121
static const FPScalar f_scalar_fmin = {
122
gen_helper_vfp_minh,
123
gen_helper_vfp_mins,
124
gen_helper_vfp_mind,
125
};
126
-TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin, a->rn)
127
+static const FPScalar f_scalar_fmin_ah = {
128
+ gen_helper_vfp_ah_minh,
129
+ gen_helper_vfp_ah_mins,
130
+ gen_helper_vfp_ah_mind,
131
+};
132
+TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn)
133
134
static const FPScalar f_scalar_fmaxnm = {
135
gen_helper_vfp_maxnumh,
68
--
136
--
69
2.25.1
137
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Implement the FPCR.AH == 1 semantics for vector FMIN/FMAX, by
2
creating new _ah_ versions of the gvec helpers which invoke the
3
scalar fmin_ah and fmax_ah helpers on each element.
2
4
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220604040607.269301-26-richard.henderson@linaro.org
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
target/arm/ptw.h | 17 ----------------
8
target/arm/tcg/helper-sve.h | 14 ++++++++++++++
9
target/arm/helper.c | 47 ---------------------------------------------
9
target/arm/tcg/translate-a64.c | 21 +++++++++++++++++++--
10
target/arm/ptw.c | 47 ++++++++++++++++++++++++++++++++++++++++++++-
10
target/arm/tcg/vec_helper.c | 8 ++++++++
11
3 files changed, 46 insertions(+), 65 deletions(-)
11
3 files changed, 41 insertions(+), 2 deletions(-)
12
delete mode 100644 target/arm/ptw.h
13
12
14
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
13
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
15
deleted file mode 100644
16
index XXXXXXX..XXXXXXX
17
--- a/target/arm/ptw.h
18
+++ /dev/null
19
@@ -XXX,XX +XXX,XX @@
20
-/*
21
- * ARM page table walking.
22
- *
23
- * This code is licensed under the GNU GPL v2 or later.
24
- *
25
- * SPDX-License-Identifier: GPL-2.0-or-later
26
- */
27
-
28
-#ifndef TARGET_ARM_PTW_H
29
-#define TARGET_ARM_PTW_H
30
-
31
-#ifndef CONFIG_USER_ONLY
32
-
33
-bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx);
34
-
35
-#endif /* !CONFIG_USER_ONLY */
36
-#endif /* TARGET_ARM_PTW_H */
37
diff --git a/target/arm/helper.c b/target/arm/helper.c
38
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
39
--- a/target/arm/helper.c
15
--- a/target/arm/tcg/helper-sve.h
40
+++ b/target/arm/helper.c
16
+++ b/target/arm/tcg/helper-sve.h
41
@@ -XXX,XX +XXX,XX @@
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_rsqrts_s, TCG_CALL_NO_RWG,
42
#include "semihosting/common-semi.h"
18
DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG,
43
#endif
19
void, ptr, ptr, ptr, fpst, i32)
44
#include "cpregs.h"
20
45
-#include "ptw.h"
21
+DEF_HELPER_FLAGS_5(gvec_ah_fmax_h, TCG_CALL_NO_RWG,
46
22
+ void, ptr, ptr, ptr, fpst, i32)
47
#define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */
23
+DEF_HELPER_FLAGS_5(gvec_ah_fmax_s, TCG_CALL_NO_RWG,
48
24
+ void, ptr, ptr, ptr, fpst, i32)
49
@@ -XXX,XX +XXX,XX @@ uint64_t arm_sctlr(CPUARMState *env, int el)
25
+DEF_HELPER_FLAGS_5(gvec_ah_fmax_d, TCG_CALL_NO_RWG,
26
+ void, ptr, ptr, ptr, fpst, i32)
27
+
28
+DEF_HELPER_FLAGS_5(gvec_ah_fmin_h, TCG_CALL_NO_RWG,
29
+ void, ptr, ptr, ptr, fpst, i32)
30
+DEF_HELPER_FLAGS_5(gvec_ah_fmin_s, TCG_CALL_NO_RWG,
31
+ void, ptr, ptr, ptr, fpst, i32)
32
+DEF_HELPER_FLAGS_5(gvec_ah_fmin_d, TCG_CALL_NO_RWG,
33
+ void, ptr, ptr, ptr, fpst, i32)
34
+
35
DEF_HELPER_FLAGS_4(sve_faddv_h, TCG_CALL_NO_RWG,
36
i64, ptr, ptr, fpst, i32)
37
DEF_HELPER_FLAGS_4(sve_faddv_s, TCG_CALL_NO_RWG,
38
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/tcg/translate-a64.c
41
+++ b/target/arm/tcg/translate-a64.c
42
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
43
FPST_A64_F16 : FPST_A64);
50
}
44
}
51
45
52
#ifndef CONFIG_USER_ONLY
46
+static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data,
53
-
47
+ gen_helper_gvec_3_ptr * const fnormal[3],
54
-/* Return true if the specified stage of address translation is disabled */
48
+ gen_helper_gvec_3_ptr * const fah[3])
55
-bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx)
56
-{
57
- uint64_t hcr_el2;
58
-
59
- if (arm_feature(env, ARM_FEATURE_M)) {
60
- switch (env->v7m.mpu_ctrl[regime_is_secure(env, mmu_idx)] &
61
- (R_V7M_MPU_CTRL_ENABLE_MASK | R_V7M_MPU_CTRL_HFNMIENA_MASK)) {
62
- case R_V7M_MPU_CTRL_ENABLE_MASK:
63
- /* Enabled, but not for HardFault and NMI */
64
- return mmu_idx & ARM_MMU_IDX_M_NEGPRI;
65
- case R_V7M_MPU_CTRL_ENABLE_MASK | R_V7M_MPU_CTRL_HFNMIENA_MASK:
66
- /* Enabled for all cases */
67
- return false;
68
- case 0:
69
- default:
70
- /* HFNMIENA set and ENABLE clear is UNPREDICTABLE, but
71
- * we warned about that in armv7m_nvic.c when the guest set it.
72
- */
73
- return true;
74
- }
75
- }
76
-
77
- hcr_el2 = arm_hcr_el2_eff(env);
78
-
79
- if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) {
80
- /* HCR.DC means HCR.VM behaves as 1 */
81
- return (hcr_el2 & (HCR_DC | HCR_VM)) == 0;
82
- }
83
-
84
- if (hcr_el2 & HCR_TGE) {
85
- /* TGE means that NS EL0/1 act as if SCTLR_EL1.M is zero */
86
- if (!regime_is_secure(env, mmu_idx) && regime_el(env, mmu_idx) == 1) {
87
- return true;
88
- }
89
- }
90
-
91
- if ((hcr_el2 & HCR_DC) && arm_mmu_idx_is_stage1_of_2(mmu_idx)) {
92
- /* HCR.DC means SCTLR_EL1.M behaves as 0 */
93
- return true;
94
- }
95
-
96
- return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0;
97
-}
98
-
99
/* Convert a possible stage1+2 MMU index into the appropriate
100
* stage 1 MMU index
101
*/
102
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
103
index XXXXXXX..XXXXXXX 100644
104
--- a/target/arm/ptw.c
105
+++ b/target/arm/ptw.c
106
@@ -XXX,XX +XXX,XX @@
107
#include "cpu.h"
108
#include "internals.h"
109
#include "idau.h"
110
-#include "ptw.h"
111
112
113
static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
114
@@ -XXX,XX +XXX,XX @@ static uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn)
115
}
116
}
117
118
+/* Return true if the specified stage of address translation is disabled */
119
+static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx)
120
+{
49
+{
121
+ uint64_t hcr_el2;
50
+ return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal);
122
+
123
+ if (arm_feature(env, ARM_FEATURE_M)) {
124
+ switch (env->v7m.mpu_ctrl[regime_is_secure(env, mmu_idx)] &
125
+ (R_V7M_MPU_CTRL_ENABLE_MASK | R_V7M_MPU_CTRL_HFNMIENA_MASK)) {
126
+ case R_V7M_MPU_CTRL_ENABLE_MASK:
127
+ /* Enabled, but not for HardFault and NMI */
128
+ return mmu_idx & ARM_MMU_IDX_M_NEGPRI;
129
+ case R_V7M_MPU_CTRL_ENABLE_MASK | R_V7M_MPU_CTRL_HFNMIENA_MASK:
130
+ /* Enabled for all cases */
131
+ return false;
132
+ case 0:
133
+ default:
134
+ /*
135
+ * HFNMIENA set and ENABLE clear is UNPREDICTABLE, but
136
+ * we warned about that in armv7m_nvic.c when the guest set it.
137
+ */
138
+ return true;
139
+ }
140
+ }
141
+
142
+ hcr_el2 = arm_hcr_el2_eff(env);
143
+
144
+ if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) {
145
+ /* HCR.DC means HCR.VM behaves as 1 */
146
+ return (hcr_el2 & (HCR_DC | HCR_VM)) == 0;
147
+ }
148
+
149
+ if (hcr_el2 & HCR_TGE) {
150
+ /* TGE means that NS EL0/1 act as if SCTLR_EL1.M is zero */
151
+ if (!regime_is_secure(env, mmu_idx) && regime_el(env, mmu_idx) == 1) {
152
+ return true;
153
+ }
154
+ }
155
+
156
+ if ((hcr_el2 & HCR_DC) && arm_mmu_idx_is_stage1_of_2(mmu_idx)) {
157
+ /* HCR.DC means SCTLR_EL1.M behaves as 0 */
158
+ return true;
159
+ }
160
+
161
+ return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0;
162
+}
51
+}
163
+
52
+
164
static bool ptw_attrs_are_device(CPUARMState *env, ARMCacheAttrs cacheattrs)
53
static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data,
54
gen_helper_gvec_3_ptr * const f[3])
165
{
55
{
166
/*
56
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = {
57
gen_helper_gvec_fmax_s,
58
gen_helper_gvec_fmax_d,
59
};
60
-TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax)
61
+static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = {
62
+ gen_helper_gvec_ah_fmax_h,
63
+ gen_helper_gvec_ah_fmax_s,
64
+ gen_helper_gvec_ah_fmax_d,
65
+};
66
+TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah)
67
68
static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = {
69
gen_helper_gvec_fmin_h,
70
gen_helper_gvec_fmin_s,
71
gen_helper_gvec_fmin_d,
72
};
73
-TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin)
74
+static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = {
75
+ gen_helper_gvec_ah_fmin_h,
76
+ gen_helper_gvec_ah_fmin_s,
77
+ gen_helper_gvec_ah_fmin_d,
78
+};
79
+TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah)
80
81
static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = {
82
gen_helper_gvec_fmaxnum_h,
83
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
84
index XXXXXXX..XXXXXXX 100644
85
--- a/target/arm/tcg/vec_helper.c
86
+++ b/target/arm/tcg/vec_helper.c
87
@@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16)
88
DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32)
89
DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64)
90
91
+DO_3OP(gvec_ah_fmax_h, helper_vfp_ah_maxh, float16)
92
+DO_3OP(gvec_ah_fmax_s, helper_vfp_ah_maxs, float32)
93
+DO_3OP(gvec_ah_fmax_d, helper_vfp_ah_maxd, float64)
94
+
95
+DO_3OP(gvec_ah_fmin_h, helper_vfp_ah_minh, float16)
96
+DO_3OP(gvec_ah_fmin_s, helper_vfp_ah_mins, float32)
97
+DO_3OP(gvec_ah_fmin_d, helper_vfp_ah_mind, float64)
98
+
99
#endif
100
#undef DO_3OP
101
167
--
102
--
168
2.25.1
103
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Implement the FPCR.AH semantics for FMAXV and FMINV. These are the
2
"recursively reduce all lanes of a vector to a scalar result" insns;
3
we just need to use the _ah_ helper for the reduction step when
4
FPCR.AH == 1.
2
5
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220604040607.269301-7-richard.henderson@linaro.org
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
---
8
target/arm/ptw.h | 3 +++
9
target/arm/tcg/translate-a64.c | 28 ++++++++++++++++++----------
9
target/arm/helper.c | 41 -----------------------------------------
10
1 file changed, 18 insertions(+), 10 deletions(-)
10
target/arm/ptw.c | 41 +++++++++++++++++++++++++++++++++++++++++
11
3 files changed, 44 insertions(+), 41 deletions(-)
12
11
13
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
12
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
14
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/ptw.h
14
--- a/target/arm/tcg/translate-a64.c
16
+++ b/target/arm/ptw.h
15
+++ b/target/arm/tcg/translate-a64.c
17
@@ -XXX,XX +XXX,XX @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap)
16
@@ -XXX,XX +XXX,XX @@ static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
18
return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx));
19
}
17
}
20
18
21
+void get_phys_addr_pmsav7_default(CPUARMState *env,
19
static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
22
+ ARMMMUIdx mmu_idx,
20
- NeonGenTwoSingleOpFn *fn)
23
+ int32_t address, int *prot);
21
+ NeonGenTwoSingleOpFn *fnormal,
24
bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address,
22
+ NeonGenTwoSingleOpFn *fah)
25
MMUAccessType access_type, ARMMMUIdx mmu_idx,
23
{
26
hwaddr *phys_ptr, int *prot,
24
if (fp_access_check(s)) {
27
diff --git a/target/arm/helper.c b/target/arm/helper.c
25
MemOp esz = a->esz;
28
index XXXXXXX..XXXXXXX 100644
26
int elts = (a->q ? 16 : 8) >> esz;
29
--- a/target/arm/helper.c
27
TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
30
+++ b/target/arm/helper.c
28
- TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn);
31
@@ -XXX,XX +XXX,XX @@ do_fault:
29
+ TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst,
30
+ s->fpcr_ah ? fah : fnormal);
31
write_fp_sreg(s, a->rd, res);
32
}
32
return true;
33
return true;
33
}
34
}
34
35
35
-static inline void get_phys_addr_pmsav7_default(CPUARMState *env,
36
-TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxnumh)
36
- ARMMMUIdx mmu_idx,
37
-TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minnumh)
37
- int32_t address, int *prot)
38
-TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxh)
38
-{
39
-TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minh)
39
- if (!arm_feature(env, ARM_FEATURE_M)) {
40
+TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a,
40
- *prot = PAGE_READ | PAGE_WRITE;
41
+ gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh)
41
- switch (address) {
42
+TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a,
42
- case 0xF0000000 ... 0xFFFFFFFF:
43
+ gen_helper_vfp_minnumh, gen_helper_vfp_minnumh)
43
- if (regime_sctlr(env, mmu_idx) & SCTLR_V) {
44
+TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a,
44
- /* hivecs execing is ok */
45
+ gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh)
45
- *prot |= PAGE_EXEC;
46
+TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a,
46
- }
47
+ gen_helper_vfp_minh, gen_helper_vfp_ah_minh)
47
- break;
48
48
- case 0x00000000 ... 0x7FFFFFFF:
49
-TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums)
49
- *prot |= PAGE_EXEC;
50
-TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums)
50
- break;
51
-TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs)
51
- }
52
-TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins)
52
- } else {
53
+TRANS(FMAXNMV_s, do_fp_reduction, a,
53
- /* Default system address map for M profile cores.
54
+ gen_helper_vfp_maxnums, gen_helper_vfp_maxnums)
54
- * The architecture specifies which regions are execute-never;
55
+TRANS(FMINNMV_s, do_fp_reduction, a,
55
- * at the MPU level no other checks are defined.
56
+ gen_helper_vfp_minnums, gen_helper_vfp_minnums)
56
- */
57
+TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs)
57
- switch (address) {
58
+TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins)
58
- case 0x00000000 ... 0x1fffffff: /* ROM */
59
59
- case 0x20000000 ... 0x3fffffff: /* SRAM */
60
/*
60
- case 0x60000000 ... 0x7fffffff: /* RAM */
61
* Floating-point Immediate
61
- case 0x80000000 ... 0x9fffffff: /* RAM */
62
- *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
63
- break;
64
- case 0x40000000 ... 0x5fffffff: /* Peripheral */
65
- case 0xa0000000 ... 0xbfffffff: /* Device */
66
- case 0xc0000000 ... 0xdfffffff: /* Device */
67
- case 0xe0000000 ... 0xffffffff: /* System */
68
- *prot = PAGE_READ | PAGE_WRITE;
69
- break;
70
- default:
71
- g_assert_not_reached();
72
- }
73
- }
74
-}
75
-
76
static bool pmsav7_use_background_region(ARMCPU *cpu,
77
ARMMMUIdx mmu_idx, bool is_user)
78
{
79
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
80
index XXXXXXX..XXXXXXX 100644
81
--- a/target/arm/ptw.c
82
+++ b/target/arm/ptw.c
83
@@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address,
84
return false;
85
}
86
87
+void get_phys_addr_pmsav7_default(CPUARMState *env,
88
+ ARMMMUIdx mmu_idx,
89
+ int32_t address, int *prot)
90
+{
91
+ if (!arm_feature(env, ARM_FEATURE_M)) {
92
+ *prot = PAGE_READ | PAGE_WRITE;
93
+ switch (address) {
94
+ case 0xF0000000 ... 0xFFFFFFFF:
95
+ if (regime_sctlr(env, mmu_idx) & SCTLR_V) {
96
+ /* hivecs execing is ok */
97
+ *prot |= PAGE_EXEC;
98
+ }
99
+ break;
100
+ case 0x00000000 ... 0x7FFFFFFF:
101
+ *prot |= PAGE_EXEC;
102
+ break;
103
+ }
104
+ } else {
105
+ /* Default system address map for M profile cores.
106
+ * The architecture specifies which regions are execute-never;
107
+ * at the MPU level no other checks are defined.
108
+ */
109
+ switch (address) {
110
+ case 0x00000000 ... 0x1fffffff: /* ROM */
111
+ case 0x20000000 ... 0x3fffffff: /* SRAM */
112
+ case 0x60000000 ... 0x7fffffff: /* RAM */
113
+ case 0x80000000 ... 0x9fffffff: /* RAM */
114
+ *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
115
+ break;
116
+ case 0x40000000 ... 0x5fffffff: /* Peripheral */
117
+ case 0xa0000000 ... 0xbfffffff: /* Device */
118
+ case 0xc0000000 ... 0xdfffffff: /* Device */
119
+ case 0xe0000000 ... 0xffffffff: /* System */
120
+ *prot = PAGE_READ | PAGE_WRITE;
121
+ break;
122
+ default:
123
+ g_assert_not_reached();
124
+ }
125
+ }
126
+}
127
+
128
/**
129
* get_phys_addr - get the physical address for this virtual address
130
*
131
--
62
--
132
2.25.1
63
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Implement the FPCR.AH semantics for the pairwise floating
2
point minimum/maximum insns FMINP and FMAXP.
2
3
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220604040607.269301-13-richard.henderson@linaro.org
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
target/arm/ptw.h | 3 ---
7
target/arm/tcg/helper-sve.h | 14 ++++++++++++++
9
target/arm/helper.c | 15 ---------------
8
target/arm/tcg/translate-a64.c | 25 +++++++++++++++++++++----
10
target/arm/ptw.c | 16 ++++++++++++++++
9
target/arm/tcg/vec_helper.c | 10 ++++++++++
11
3 files changed, 16 insertions(+), 18 deletions(-)
10
3 files changed, 45 insertions(+), 4 deletions(-)
12
11
13
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
12
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
14
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/ptw.h
14
--- a/target/arm/tcg/helper-sve.h
16
+++ b/target/arm/ptw.h
15
+++ b/target/arm/tcg/helper-sve.h
17
@@ -XXX,XX +XXX,XX @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap)
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_ah_fmin_s, TCG_CALL_NO_RWG,
18
return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx));
17
DEF_HELPER_FLAGS_5(gvec_ah_fmin_d, TCG_CALL_NO_RWG,
19
}
18
void, ptr, ptr, ptr, fpst, i32)
20
19
21
-bool m_is_ppb_region(CPUARMState *env, uint32_t address);
20
+DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_h, TCG_CALL_NO_RWG,
22
-bool m_is_system_region(CPUARMState *env, uint32_t address);
21
+ void, ptr, ptr, ptr, fpst, i32)
23
-
22
+DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_s, TCG_CALL_NO_RWG,
24
bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
23
+ void, ptr, ptr, ptr, fpst, i32)
25
MMUAccessType access_type, ARMMMUIdx mmu_idx,
24
+DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_d, TCG_CALL_NO_RWG,
26
bool s1_is_el0,
25
+ void, ptr, ptr, ptr, fpst, i32)
27
diff --git a/target/arm/helper.c b/target/arm/helper.c
26
+
27
+DEF_HELPER_FLAGS_5(gvec_ah_fminp_h, TCG_CALL_NO_RWG,
28
+ void, ptr, ptr, ptr, fpst, i32)
29
+DEF_HELPER_FLAGS_5(gvec_ah_fminp_s, TCG_CALL_NO_RWG,
30
+ void, ptr, ptr, ptr, fpst, i32)
31
+DEF_HELPER_FLAGS_5(gvec_ah_fminp_d, TCG_CALL_NO_RWG,
32
+ void, ptr, ptr, ptr, fpst, i32)
33
+
34
DEF_HELPER_FLAGS_4(sve_faddv_h, TCG_CALL_NO_RWG,
35
i64, ptr, ptr, fpst, i32)
36
DEF_HELPER_FLAGS_4(sve_faddv_s, TCG_CALL_NO_RWG,
37
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
28
index XXXXXXX..XXXXXXX 100644
38
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/helper.c
39
--- a/target/arm/tcg/translate-a64.c
30
+++ b/target/arm/helper.c
40
+++ b/target/arm/tcg/translate-a64.c
31
@@ -XXX,XX +XXX,XX @@ do_fault:
41
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = {
42
gen_helper_gvec_fmaxp_s,
43
gen_helper_gvec_fmaxp_d,
44
};
45
-TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp)
46
+static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = {
47
+ gen_helper_gvec_ah_fmaxp_h,
48
+ gen_helper_gvec_ah_fmaxp_s,
49
+ gen_helper_gvec_ah_fmaxp_d,
50
+};
51
+TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp)
52
53
static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = {
54
gen_helper_gvec_fminp_h,
55
gen_helper_gvec_fminp_s,
56
gen_helper_gvec_fminp_d,
57
};
58
-TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp)
59
+static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = {
60
+ gen_helper_gvec_ah_fminp_h,
61
+ gen_helper_gvec_ah_fminp_s,
62
+ gen_helper_gvec_ah_fminp_d,
63
+};
64
+TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp)
65
66
static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = {
67
gen_helper_gvec_fmaxnump_h,
68
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
32
return true;
69
return true;
33
}
70
}
34
71
35
-bool m_is_ppb_region(CPUARMState *env, uint32_t address)
72
+static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a,
36
-{
73
+ const FPScalar *fnormal,
37
- /* True if address is in the M profile PPB region 0xe0000000 - 0xe00fffff */
74
+ const FPScalar *fah)
38
- return arm_feature(env, ARM_FEATURE_M) &&
39
- extract32(address, 20, 12) == 0xe00;
40
-}
41
-
42
-bool m_is_system_region(CPUARMState *env, uint32_t address)
43
-{
44
- /* True if address is in the M profile system region
45
- * 0xe0000000 - 0xffffffff
46
- */
47
- return arm_feature(env, ARM_FEATURE_M) && extract32(address, 29, 3) == 0x7;
48
-}
49
-
50
/* Combine either inner or outer cacheability attributes for normal
51
* memory, according to table D4-42 and pseudocode procedure
52
* CombineS1S2AttrHints() of ARM DDI 0487B.b (the ARMv8 ARM).
53
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
54
index XXXXXXX..XXXXXXX 100644
55
--- a/target/arm/ptw.c
56
+++ b/target/arm/ptw.c
57
@@ -XXX,XX +XXX,XX @@ static void get_phys_addr_pmsav7_default(CPUARMState *env, ARMMMUIdx mmu_idx,
58
}
59
}
60
61
+static bool m_is_ppb_region(CPUARMState *env, uint32_t address)
62
+{
75
+{
63
+ /* True if address is in the M profile PPB region 0xe0000000 - 0xe00fffff */
76
+ return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal);
64
+ return arm_feature(env, ARM_FEATURE_M) &&
65
+ extract32(address, 20, 12) == 0xe00;
66
+}
77
+}
67
+
78
+
68
+static bool m_is_system_region(CPUARMState *env, uint32_t address)
79
TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd)
69
+{
80
-TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax)
70
+ /*
81
-TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin)
71
+ * True if address is in the M profile system region
82
+TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah)
72
+ * 0xe0000000 - 0xffffffff
83
+TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah)
73
+ */
84
TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm)
74
+ return arm_feature(env, ARM_FEATURE_M) && extract32(address, 29, 3) == 0x7;
85
TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm)
75
+}
86
87
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
88
index XXXXXXX..XXXXXXX 100644
89
--- a/target/arm/tcg/vec_helper.c
90
+++ b/target/arm/tcg/vec_helper.c
91
@@ -XXX,XX +XXX,XX @@ DO_3OP_PAIR(gvec_fminnump_h, float16_minnum, float16, H2)
92
DO_3OP_PAIR(gvec_fminnump_s, float32_minnum, float32, H4)
93
DO_3OP_PAIR(gvec_fminnump_d, float64_minnum, float64, )
94
95
+#ifdef TARGET_AARCH64
96
+DO_3OP_PAIR(gvec_ah_fmaxp_h, helper_vfp_ah_maxh, float16, H2)
97
+DO_3OP_PAIR(gvec_ah_fmaxp_s, helper_vfp_ah_maxs, float32, H4)
98
+DO_3OP_PAIR(gvec_ah_fmaxp_d, helper_vfp_ah_maxd, float64, )
76
+
99
+
77
static bool pmsav7_use_background_region(ARMCPU *cpu, ARMMMUIdx mmu_idx,
100
+DO_3OP_PAIR(gvec_ah_fminp_h, helper_vfp_ah_minh, float16, H2)
78
bool is_user)
101
+DO_3OP_PAIR(gvec_ah_fminp_s, helper_vfp_ah_mins, float32, H4)
79
{
102
+DO_3OP_PAIR(gvec_ah_fminp_d, helper_vfp_ah_mind, float64, )
103
+#endif
104
+
105
#undef DO_3OP_PAIR
106
107
#define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \
80
--
108
--
81
2.25.1
109
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Implement the FPCR.AH semantics for the SVE FMAXV and FMINV
2
vector-reduction-to-scalar max/min operations.
2
3
3
Put the inline function near the array declaration.
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/arm/tcg/helper-sve.h | 14 +++++++++++
8
target/arm/tcg/sve_helper.c | 43 +++++++++++++++++++++-------------
9
target/arm/tcg/translate-sve.c | 16 +++++++++++--
10
3 files changed, 55 insertions(+), 18 deletions(-)
4
11
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220607203306.657998-16-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
10
target/arm/vec_internal.h | 8 +++++++-
11
target/arm/sve_helper.c | 9 ---------
12
2 files changed, 7 insertions(+), 10 deletions(-)
13
14
diff --git a/target/arm/vec_internal.h b/target/arm/vec_internal.h
15
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/vec_internal.h
14
--- a/target/arm/tcg/helper-sve.h
17
+++ b/target/arm/vec_internal.h
15
+++ b/target/arm/tcg/helper-sve.h
18
@@ -XXX,XX +XXX,XX @@
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fminv_s, TCG_CALL_NO_RWG,
19
#define H8(x) (x)
17
DEF_HELPER_FLAGS_4(sve_fminv_d, TCG_CALL_NO_RWG,
20
#define H1_8(x) (x)
18
i64, ptr, ptr, fpst, i32)
21
19
22
-/* Data for expanding active predicate bits to bytes, for byte elements. */
20
+DEF_HELPER_FLAGS_4(sve_ah_fmaxv_h, TCG_CALL_NO_RWG,
23
+/*
21
+ i64, ptr, ptr, fpst, i32)
24
+ * Expand active predicate bits to bytes, for byte elements.
22
+DEF_HELPER_FLAGS_4(sve_ah_fmaxv_s, TCG_CALL_NO_RWG,
25
+ */
23
+ i64, ptr, ptr, fpst, i32)
26
extern const uint64_t expand_pred_b_data[256];
24
+DEF_HELPER_FLAGS_4(sve_ah_fmaxv_d, TCG_CALL_NO_RWG,
27
+static inline uint64_t expand_pred_b(uint8_t byte)
25
+ i64, ptr, ptr, fpst, i32)
28
+{
26
+
29
+ return expand_pred_b_data[byte];
27
+DEF_HELPER_FLAGS_4(sve_ah_fminv_h, TCG_CALL_NO_RWG,
30
+}
28
+ i64, ptr, ptr, fpst, i32)
31
29
+DEF_HELPER_FLAGS_4(sve_ah_fminv_s, TCG_CALL_NO_RWG,
32
static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
30
+ i64, ptr, ptr, fpst, i32)
33
{
31
+DEF_HELPER_FLAGS_4(sve_ah_fminv_d, TCG_CALL_NO_RWG,
34
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
32
+ i64, ptr, ptr, fpst, i32)
33
+
34
DEF_HELPER_FLAGS_5(sve_fadda_h, TCG_CALL_NO_RWG,
35
i64, i64, ptr, ptr, fpst, i32)
36
DEF_HELPER_FLAGS_5(sve_fadda_s, TCG_CALL_NO_RWG,
37
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
35
index XXXXXXX..XXXXXXX 100644
38
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/sve_helper.c
39
--- a/target/arm/tcg/sve_helper.c
37
+++ b/target/arm/sve_helper.c
40
+++ b/target/arm/tcg/sve_helper.c
38
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(sve_predtest)(void *vd, void *vg, uint32_t words)
41
@@ -XXX,XX +XXX,XX @@ static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \
39
return flags;
42
uintptr_t half = n / 2; \
43
TYPE lo = NAME##_reduce(data, status, half); \
44
TYPE hi = NAME##_reduce(data + half, status, half); \
45
- return TYPE##_##FUNC(lo, hi, status); \
46
+ return FUNC(lo, hi, status); \
47
} \
48
} \
49
uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \
50
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \
51
return NAME##_reduce(data, s, maxsz / sizeof(TYPE)); \
40
}
52
}
41
53
42
-/*
54
-DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero)
43
- * Expand active predicate bits to bytes, for byte elements.
55
-DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero)
44
- * (The data table itself is in vec_helper.c as MVE also needs it.)
56
-DO_REDUCE(sve_faddv_d, float64, H1_8, add, float64_zero)
45
- */
57
+DO_REDUCE(sve_faddv_h, float16, H1_2, float16_add, float16_zero)
46
-static inline uint64_t expand_pred_b(uint8_t byte)
58
+DO_REDUCE(sve_faddv_s, float32, H1_4, float32_add, float32_zero)
47
-{
59
+DO_REDUCE(sve_faddv_d, float64, H1_8, float64_add, float64_zero)
48
- return expand_pred_b_data[byte];
60
49
-}
61
/* Identity is floatN_default_nan, without the function call. */
50
-
62
-DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00)
51
/* Similarly for half-word elements.
63
-DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000)
52
* for (i = 0; i < 256; ++i) {
64
-DO_REDUCE(sve_fminnmv_d, float64, H1_8, minnum, 0x7FF8000000000000ULL)
53
* unsigned long m = 0;
65
+DO_REDUCE(sve_fminnmv_h, float16, H1_2, float16_minnum, 0x7E00)
66
+DO_REDUCE(sve_fminnmv_s, float32, H1_4, float32_minnum, 0x7FC00000)
67
+DO_REDUCE(sve_fminnmv_d, float64, H1_8, float64_minnum, 0x7FF8000000000000ULL)
68
69
-DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00)
70
-DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000)
71
-DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, maxnum, 0x7FF8000000000000ULL)
72
+DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, float16_maxnum, 0x7E00)
73
+DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, float32_maxnum, 0x7FC00000)
74
+DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, float64_maxnum, 0x7FF8000000000000ULL)
75
76
-DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity)
77
-DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity)
78
-DO_REDUCE(sve_fminv_d, float64, H1_8, min, float64_infinity)
79
+DO_REDUCE(sve_fminv_h, float16, H1_2, float16_min, float16_infinity)
80
+DO_REDUCE(sve_fminv_s, float32, H1_4, float32_min, float32_infinity)
81
+DO_REDUCE(sve_fminv_d, float64, H1_8, float64_min, float64_infinity)
82
83
-DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity))
84
-DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity))
85
-DO_REDUCE(sve_fmaxv_d, float64, H1_8, max, float64_chs(float64_infinity))
86
+DO_REDUCE(sve_fmaxv_h, float16, H1_2, float16_max, float16_chs(float16_infinity))
87
+DO_REDUCE(sve_fmaxv_s, float32, H1_4, float32_max, float32_chs(float32_infinity))
88
+DO_REDUCE(sve_fmaxv_d, float64, H1_8, float64_max, float64_chs(float64_infinity))
89
+
90
+DO_REDUCE(sve_ah_fminv_h, float16, H1_2, helper_vfp_ah_minh, float16_infinity)
91
+DO_REDUCE(sve_ah_fminv_s, float32, H1_4, helper_vfp_ah_mins, float32_infinity)
92
+DO_REDUCE(sve_ah_fminv_d, float64, H1_8, helper_vfp_ah_mind, float64_infinity)
93
+
94
+DO_REDUCE(sve_ah_fmaxv_h, float16, H1_2, helper_vfp_ah_maxh,
95
+ float16_chs(float16_infinity))
96
+DO_REDUCE(sve_ah_fmaxv_s, float32, H1_4, helper_vfp_ah_maxs,
97
+ float32_chs(float32_infinity))
98
+DO_REDUCE(sve_ah_fmaxv_d, float64, H1_8, helper_vfp_ah_maxd,
99
+ float64_chs(float64_infinity))
100
101
#undef DO_REDUCE
102
103
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
104
index XXXXXXX..XXXXXXX 100644
105
--- a/target/arm/tcg/translate-sve.c
106
+++ b/target/arm/tcg/translate-sve.c
107
@@ -XXX,XX +XXX,XX @@ static bool do_reduce(DisasContext *s, arg_rpr_esz *a,
108
}; \
109
TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz])
110
111
+#define DO_VPZ_AH(NAME, name) \
112
+ static gen_helper_fp_reduce * const name##_fns[4] = { \
113
+ NULL, gen_helper_sve_##name##_h, \
114
+ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
115
+ }; \
116
+ static gen_helper_fp_reduce * const name##_ah_fns[4] = { \
117
+ NULL, gen_helper_sve_ah_##name##_h, \
118
+ gen_helper_sve_ah_##name##_s, gen_helper_sve_ah_##name##_d, \
119
+ }; \
120
+ TRANS_FEAT(NAME, aa64_sve, do_reduce, a, \
121
+ s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz])
122
+
123
DO_VPZ(FADDV, faddv)
124
DO_VPZ(FMINNMV, fminnmv)
125
DO_VPZ(FMAXNMV, fmaxnmv)
126
-DO_VPZ(FMINV, fminv)
127
-DO_VPZ(FMAXV, fmaxv)
128
+DO_VPZ_AH(FMINV, fminv)
129
+DO_VPZ_AH(FMAXV, fmaxv)
130
131
#undef DO_VPZ
132
54
--
133
--
55
2.25.1
134
2.34.1
diff view generated by jsdifflib
New patch
1
Implement the FPCR.AH semantics for the SVE FMAX and FMIN operations
2
that take an immediate as the second operand.
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/arm/tcg/helper-sve.h | 14 ++++++++++++++
8
target/arm/tcg/sve_helper.c | 8 ++++++++
9
target/arm/tcg/translate-sve.c | 25 +++++++++++++++++++++++--
10
3 files changed, 45 insertions(+), 2 deletions(-)
11
12
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/tcg/helper-sve.h
15
+++ b/target/arm/tcg/helper-sve.h
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fmins_s, TCG_CALL_NO_RWG,
17
DEF_HELPER_FLAGS_6(sve_fmins_d, TCG_CALL_NO_RWG,
18
void, ptr, ptr, ptr, i64, fpst, i32)
19
20
+DEF_HELPER_FLAGS_6(sve_ah_fmaxs_h, TCG_CALL_NO_RWG,
21
+ void, ptr, ptr, ptr, i64, fpst, i32)
22
+DEF_HELPER_FLAGS_6(sve_ah_fmaxs_s, TCG_CALL_NO_RWG,
23
+ void, ptr, ptr, ptr, i64, fpst, i32)
24
+DEF_HELPER_FLAGS_6(sve_ah_fmaxs_d, TCG_CALL_NO_RWG,
25
+ void, ptr, ptr, ptr, i64, fpst, i32)
26
+
27
+DEF_HELPER_FLAGS_6(sve_ah_fmins_h, TCG_CALL_NO_RWG,
28
+ void, ptr, ptr, ptr, i64, fpst, i32)
29
+DEF_HELPER_FLAGS_6(sve_ah_fmins_s, TCG_CALL_NO_RWG,
30
+ void, ptr, ptr, ptr, i64, fpst, i32)
31
+DEF_HELPER_FLAGS_6(sve_ah_fmins_d, TCG_CALL_NO_RWG,
32
+ void, ptr, ptr, ptr, i64, fpst, i32)
33
+
34
DEF_HELPER_FLAGS_5(sve_fcvt_sh, TCG_CALL_NO_RWG,
35
void, ptr, ptr, ptr, fpst, i32)
36
DEF_HELPER_FLAGS_5(sve_fcvt_dh, TCG_CALL_NO_RWG,
37
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/target/arm/tcg/sve_helper.c
40
+++ b/target/arm/tcg/sve_helper.c
41
@@ -XXX,XX +XXX,XX @@ DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min)
42
DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min)
43
DO_ZPZS_FP(sve_fmins_d, float64, H1_8, float64_min)
44
45
+DO_ZPZS_FP(sve_ah_fmaxs_h, float16, H1_2, helper_vfp_ah_maxh)
46
+DO_ZPZS_FP(sve_ah_fmaxs_s, float32, H1_4, helper_vfp_ah_maxs)
47
+DO_ZPZS_FP(sve_ah_fmaxs_d, float64, H1_8, helper_vfp_ah_maxd)
48
+
49
+DO_ZPZS_FP(sve_ah_fmins_h, float16, H1_2, helper_vfp_ah_minh)
50
+DO_ZPZS_FP(sve_ah_fmins_s, float32, H1_4, helper_vfp_ah_mins)
51
+DO_ZPZS_FP(sve_ah_fmins_d, float64, H1_8, helper_vfp_ah_mind)
52
+
53
/* Fully general two-operand expander, controlled by a predicate,
54
* With the extra float_status parameter.
55
*/
56
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/target/arm/tcg/translate-sve.c
59
+++ b/target/arm/tcg/translate-sve.c
60
@@ -XXX,XX +XXX,XX @@ static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
61
TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \
62
name##_const[a->esz][a->imm], name##_fns[a->esz])
63
64
+#define DO_FP_AH_IMM(NAME, name, const0, const1) \
65
+ static gen_helper_sve_fp2scalar * const name##_fns[4] = { \
66
+ NULL, gen_helper_sve_##name##_h, \
67
+ gen_helper_sve_##name##_s, \
68
+ gen_helper_sve_##name##_d \
69
+ }; \
70
+ static gen_helper_sve_fp2scalar * const name##_ah_fns[4] = { \
71
+ NULL, gen_helper_sve_ah_##name##_h, \
72
+ gen_helper_sve_ah_##name##_s, \
73
+ gen_helper_sve_ah_##name##_d \
74
+ }; \
75
+ static uint64_t const name##_const[4][2] = { \
76
+ { -1, -1 }, \
77
+ { float16_##const0, float16_##const1 }, \
78
+ { float32_##const0, float32_##const1 }, \
79
+ { float64_##const0, float64_##const1 }, \
80
+ }; \
81
+ TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \
82
+ name##_const[a->esz][a->imm], \
83
+ s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz])
84
+
85
DO_FP_IMM(FADD, fadds, half, one)
86
DO_FP_IMM(FSUB, fsubs, half, one)
87
DO_FP_IMM(FMUL, fmuls, half, two)
88
DO_FP_IMM(FSUBR, fsubrs, half, one)
89
DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
90
DO_FP_IMM(FMINNM, fminnms, zero, one)
91
-DO_FP_IMM(FMAX, fmaxs, zero, one)
92
-DO_FP_IMM(FMIN, fmins, zero, one)
93
+DO_FP_AH_IMM(FMAX, fmaxs, zero, one)
94
+DO_FP_AH_IMM(FMIN, fmins, zero, one)
95
96
#undef DO_FP_IMM
97
98
--
99
2.34.1
diff view generated by jsdifflib
New patch
1
Implement the FPCR.AH semantics for the SVE FMAX and FMIN
2
operations that take two vector operands.
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/arm/tcg/helper-sve.h | 14 ++++++++++++++
8
target/arm/tcg/sve_helper.c | 8 ++++++++
9
target/arm/tcg/translate-sve.c | 17 +++++++++++++++--
10
3 files changed, 37 insertions(+), 2 deletions(-)
11
12
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/tcg/helper-sve.h
15
+++ b/target/arm/tcg/helper-sve.h
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fmax_s, TCG_CALL_NO_RWG,
17
DEF_HELPER_FLAGS_6(sve_fmax_d, TCG_CALL_NO_RWG,
18
void, ptr, ptr, ptr, ptr, fpst, i32)
19
20
+DEF_HELPER_FLAGS_6(sve_ah_fmin_h, TCG_CALL_NO_RWG,
21
+ void, ptr, ptr, ptr, ptr, fpst, i32)
22
+DEF_HELPER_FLAGS_6(sve_ah_fmin_s, TCG_CALL_NO_RWG,
23
+ void, ptr, ptr, ptr, ptr, fpst, i32)
24
+DEF_HELPER_FLAGS_6(sve_ah_fmin_d, TCG_CALL_NO_RWG,
25
+ void, ptr, ptr, ptr, ptr, fpst, i32)
26
+
27
+DEF_HELPER_FLAGS_6(sve_ah_fmax_h, TCG_CALL_NO_RWG,
28
+ void, ptr, ptr, ptr, ptr, fpst, i32)
29
+DEF_HELPER_FLAGS_6(sve_ah_fmax_s, TCG_CALL_NO_RWG,
30
+ void, ptr, ptr, ptr, ptr, fpst, i32)
31
+DEF_HELPER_FLAGS_6(sve_ah_fmax_d, TCG_CALL_NO_RWG,
32
+ void, ptr, ptr, ptr, ptr, fpst, i32)
33
+
34
DEF_HELPER_FLAGS_6(sve_fminnum_h, TCG_CALL_NO_RWG,
35
void, ptr, ptr, ptr, ptr, fpst, i32)
36
DEF_HELPER_FLAGS_6(sve_fminnum_s, TCG_CALL_NO_RWG,
37
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/target/arm/tcg/sve_helper.c
40
+++ b/target/arm/tcg/sve_helper.c
41
@@ -XXX,XX +XXX,XX @@ DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max)
42
DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max)
43
DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max)
44
45
+DO_ZPZZ_FP(sve_ah_fmin_h, uint16_t, H1_2, helper_vfp_ah_minh)
46
+DO_ZPZZ_FP(sve_ah_fmin_s, uint32_t, H1_4, helper_vfp_ah_mins)
47
+DO_ZPZZ_FP(sve_ah_fmin_d, uint64_t, H1_8, helper_vfp_ah_mind)
48
+
49
+DO_ZPZZ_FP(sve_ah_fmax_h, uint16_t, H1_2, helper_vfp_ah_maxh)
50
+DO_ZPZZ_FP(sve_ah_fmax_s, uint32_t, H1_4, helper_vfp_ah_maxs)
51
+DO_ZPZZ_FP(sve_ah_fmax_d, uint64_t, H1_8, helper_vfp_ah_maxd)
52
+
53
DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum)
54
DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum)
55
DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum)
56
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/target/arm/tcg/translate-sve.c
59
+++ b/target/arm/tcg/translate-sve.c
60
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
61
}; \
62
TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a)
63
64
+#define DO_ZPZZ_AH_FP(NAME, FEAT, name, ah_name) \
65
+ static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \
66
+ NULL, gen_helper_##name##_h, \
67
+ gen_helper_##name##_s, gen_helper_##name##_d \
68
+ }; \
69
+ static gen_helper_gvec_4_ptr * const name##_ah_zpzz_fns[4] = { \
70
+ NULL, gen_helper_##ah_name##_h, \
71
+ gen_helper_##ah_name##_s, gen_helper_##ah_name##_d \
72
+ }; \
73
+ TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, \
74
+ s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \
75
+ name##_zpzz_fns[a->esz], a)
76
+
77
DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd)
78
DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub)
79
DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul)
80
-DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin)
81
-DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax)
82
+DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin)
83
+DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax)
84
DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum)
85
DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
86
DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd)
87
--
88
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
FPCR.AH == 1 mandates that negation of a NaN value should not flip
2
2
its sign bit. This means we can no longer use gen_vfp_neg*()
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
everywhere but must instead generate slightly more complex code when
4
Message-id: 20220604040607.269301-6-richard.henderson@linaro.org
4
FPCR.AH is set.
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
6
Make this change for the scalar FNEG and for those places in
7
translate-a64.c which were previously directly calling
8
gen_vfp_neg*().
9
10
This change in semantics also affects any other instruction whose
11
pseudocode calls FPNeg(); in following commits we extend this
12
change to the other affected instructions.
13
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
16
---
8
target/arm/ptw.h | 4 ---
17
target/arm/tcg/translate-a64.c | 125 ++++++++++++++++++++++++++++++---
9
target/arm/helper.c | 85 ---------------------------------------------
18
1 file changed, 114 insertions(+), 11 deletions(-)
10
target/arm/ptw.c | 85 +++++++++++++++++++++++++++++++++++++++++++++
19
11
3 files changed, 85 insertions(+), 89 deletions(-)
20
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
12
13
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
14
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/ptw.h
22
--- a/target/arm/tcg/translate-a64.c
16
+++ b/target/arm/ptw.h
23
+++ b/target/arm/tcg/translate-a64.c
17
@@ -XXX,XX +XXX,XX @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap)
24
@@ -XXX,XX +XXX,XX @@ static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
18
return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx));
25
is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
19
}
26
}
20
27
21
-bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address,
28
+/*
22
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
29
+ * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN.
23
- hwaddr *phys_ptr, int *prot,
30
+ * These functions implement
24
- ARMMMUFaultInfo *fi);
31
+ * d = floatN_is_any_nan(s) ? s : floatN_chs(s)
25
bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address,
32
+ * which for float32 is
26
MMUAccessType access_type, ARMMMUIdx mmu_idx,
33
+ * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31))
27
hwaddr *phys_ptr, int *prot,
34
+ * and similarly for the other float sizes.
28
diff --git a/target/arm/helper.c b/target/arm/helper.c
35
+ */
29
index XXXXXXX..XXXXXXX 100644
36
+static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s)
30
--- a/target/arm/helper.c
37
+{
31
+++ b/target/arm/helper.c
38
+ TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
32
@@ -XXX,XX +XXX,XX @@ bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address,
39
+
33
return ret;
40
+ gen_vfp_negh(chs_s, s);
41
+ gen_vfp_absh(abs_s, s);
42
+ tcg_gen_movcond_i32(TCG_COND_GTU, d,
43
+ abs_s, tcg_constant_i32(0x7c00),
44
+ s, chs_s);
45
+}
46
+
47
+static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s)
48
+{
49
+ TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
50
+
51
+ gen_vfp_negs(chs_s, s);
52
+ gen_vfp_abss(abs_s, s);
53
+ tcg_gen_movcond_i32(TCG_COND_GTU, d,
54
+ abs_s, tcg_constant_i32(0x7f800000UL),
55
+ s, chs_s);
56
+}
57
+
58
+static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s)
59
+{
60
+ TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64();
61
+
62
+ gen_vfp_negd(chs_s, s);
63
+ gen_vfp_absd(abs_s, s);
64
+ tcg_gen_movcond_i64(TCG_COND_GTU, d,
65
+ abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
66
+ s, chs_s);
67
+}
68
+
69
+static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
70
+{
71
+ if (dc->fpcr_ah) {
72
+ gen_vfp_ah_negh(d, s);
73
+ } else {
74
+ gen_vfp_negh(d, s);
75
+ }
76
+}
77
+
78
+static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
79
+{
80
+ if (dc->fpcr_ah) {
81
+ gen_vfp_ah_negs(d, s);
82
+ } else {
83
+ gen_vfp_negs(d, s);
84
+ }
85
+}
86
+
87
+static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s)
88
+{
89
+ if (dc->fpcr_ah) {
90
+ gen_vfp_ah_negd(d, s);
91
+ } else {
92
+ gen_vfp_negd(d, s);
93
+ }
94
+}
95
+
96
/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
97
* than the 32 bit equivalent.
98
*/
99
@@ -XXX,XX +XXX,XX @@ static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
100
gen_vfp_negd(d, d);
34
}
101
}
35
102
36
-bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address,
103
+static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
37
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
104
+{
38
- hwaddr *phys_ptr, int *prot,
105
+ gen_helper_vfp_mulh(d, n, m, s);
39
- ARMMMUFaultInfo *fi)
106
+ gen_vfp_ah_negh(d, d);
40
-{
107
+}
41
- int n;
108
+
42
- uint32_t mask;
109
+static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
43
- uint32_t base;
110
+{
44
- bool is_user = regime_is_user(env, mmu_idx);
111
+ gen_helper_vfp_muls(d, n, m, s);
45
-
112
+ gen_vfp_ah_negs(d, d);
46
- if (regime_translation_disabled(env, mmu_idx)) {
113
+}
47
- /* MPU disabled. */
114
+
48
- *phys_ptr = address;
115
+static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
49
- *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
116
+{
50
- return false;
117
+ gen_helper_vfp_muld(d, n, m, s);
51
- }
118
+ gen_vfp_ah_negd(d, d);
52
-
119
+}
53
- *phys_ptr = address;
120
+
54
- for (n = 7; n >= 0; n--) {
121
static const FPScalar f_scalar_fnmul = {
55
- base = env->cp15.c6_region[n];
122
gen_fnmul_h,
56
- if ((base & 1) == 0) {
123
gen_fnmul_s,
57
- continue;
124
gen_fnmul_d,
58
- }
125
};
59
- mask = 1 << ((base >> 1) & 0x1f);
126
-TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul, a->rn)
60
- /* Keep this shift separate from the above to avoid an
127
+static const FPScalar f_scalar_ah_fnmul = {
61
- (undefined) << 32. */
128
+ gen_fnmul_ah_h,
62
- mask = (mask << 1) - 1;
129
+ gen_fnmul_ah_s,
63
- if (((base ^ address) & ~mask) == 0) {
130
+ gen_fnmul_ah_d,
64
- break;
131
+};
65
- }
132
+TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn)
66
- }
133
67
- if (n < 0) {
134
static const FPScalar f_scalar_fcmeq = {
68
- fi->type = ARMFault_Background;
135
gen_helper_advsimd_ceq_f16,
69
- return true;
136
@@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
70
- }
137
71
-
138
read_vec_element(s, t2, a->rm, a->idx, MO_64);
72
- if (access_type == MMU_INST_FETCH) {
139
if (neg) {
73
- mask = env->cp15.pmsav5_insn_ap;
140
- gen_vfp_negd(t1, t1);
74
- } else {
141
+ gen_vfp_maybe_ah_negd(s, t1, t1);
75
- mask = env->cp15.pmsav5_data_ap;
142
}
76
- }
143
gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
77
- mask = (mask >> (n * 4)) & 0xf;
144
write_fp_dreg_merging(s, a->rd, a->rd, t0);
78
- switch (mask) {
145
@@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
79
- case 0:
146
80
- fi->type = ARMFault_Permission;
147
read_vec_element_i32(s, t2, a->rm, a->idx, MO_32);
81
- fi->level = 1;
148
if (neg) {
82
- return true;
149
- gen_vfp_negs(t1, t1);
83
- case 1:
150
+ gen_vfp_maybe_ah_negs(s, t1, t1);
84
- if (is_user) {
151
}
85
- fi->type = ARMFault_Permission;
152
gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
86
- fi->level = 1;
153
write_fp_sreg_merging(s, a->rd, a->rd, t0);
87
- return true;
154
@@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
88
- }
155
89
- *prot = PAGE_READ | PAGE_WRITE;
156
read_vec_element_i32(s, t2, a->rm, a->idx, MO_16);
90
- break;
157
if (neg) {
91
- case 2:
158
- gen_vfp_negh(t1, t1);
92
- *prot = PAGE_READ;
159
+ gen_vfp_maybe_ah_negh(s, t1, t1);
93
- if (!is_user) {
160
}
94
- *prot |= PAGE_WRITE;
161
gen_helper_advsimd_muladdh(t0, t1, t2, t0,
95
- }
162
fpstatus_ptr(FPST_A64_F16));
96
- break;
163
@@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
97
- case 3:
164
TCGv_i64 ta = read_fp_dreg(s, a->ra);
98
- *prot = PAGE_READ | PAGE_WRITE;
165
99
- break;
166
if (neg_a) {
100
- case 5:
167
- gen_vfp_negd(ta, ta);
101
- if (is_user) {
168
+ gen_vfp_maybe_ah_negd(s, ta, ta);
102
- fi->type = ARMFault_Permission;
169
}
103
- fi->level = 1;
170
if (neg_n) {
104
- return true;
171
- gen_vfp_negd(tn, tn);
105
- }
172
+ gen_vfp_maybe_ah_negd(s, tn, tn);
106
- *prot = PAGE_READ;
173
}
107
- break;
174
fpst = fpstatus_ptr(FPST_A64);
108
- case 6:
175
gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
109
- *prot = PAGE_READ;
176
@@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
110
- break;
177
TCGv_i32 ta = read_fp_sreg(s, a->ra);
111
- default:
178
112
- /* Bad permission. */
179
if (neg_a) {
113
- fi->type = ARMFault_Permission;
180
- gen_vfp_negs(ta, ta);
114
- fi->level = 1;
181
+ gen_vfp_maybe_ah_negs(s, ta, ta);
115
- return true;
182
}
116
- }
183
if (neg_n) {
117
- *prot |= PAGE_EXEC;
184
- gen_vfp_negs(tn, tn);
118
- return false;
185
+ gen_vfp_maybe_ah_negs(s, tn, tn);
119
-}
186
}
120
-
187
fpst = fpstatus_ptr(FPST_A64);
121
/* Combine either inner or outer cacheability attributes for normal
188
gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
122
* memory, according to table D4-42 and pseudocode procedure
189
@@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
123
* CombineS1S2AttrHints() of ARM DDI 0487B.b (the ARMv8 ARM).
190
TCGv_i32 ta = read_fp_hreg(s, a->ra);
124
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
191
125
index XXXXXXX..XXXXXXX 100644
192
if (neg_a) {
126
--- a/target/arm/ptw.c
193
- gen_vfp_negh(ta, ta);
127
+++ b/target/arm/ptw.c
194
+ gen_vfp_maybe_ah_negh(s, ta, ta);
128
@@ -XXX,XX +XXX,XX @@ do_fault:
195
}
196
if (neg_n) {
197
- gen_vfp_negh(tn, tn);
198
+ gen_vfp_maybe_ah_negh(s, tn, tn);
199
}
200
fpst = fpstatus_ptr(FPST_A64_F16);
201
gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
202
@@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a,
129
return true;
203
return true;
130
}
204
}
131
205
132
+static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address,
206
+static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a,
133
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
207
+ const FPScalar1Int *fnormal,
134
+ hwaddr *phys_ptr, int *prot,
208
+ const FPScalar1Int *fah)
135
+ ARMMMUFaultInfo *fi)
209
+{
136
+{
210
+ return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true);
137
+ int n;
211
+}
138
+ uint32_t mask;
212
+
139
+ uint32_t base;
213
static const FPScalar1Int f_scalar_fmov = {
140
+ bool is_user = regime_is_user(env, mmu_idx);
214
tcg_gen_mov_i32,
141
+
215
tcg_gen_mov_i32,
142
+ if (regime_translation_disabled(env, mmu_idx)) {
216
@@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fneg = {
143
+ /* MPU disabled. */
217
gen_vfp_negs,
144
+ *phys_ptr = address;
218
gen_vfp_negd,
145
+ *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
219
};
146
+ return false;
220
-TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg, true)
147
+ }
221
+static const FPScalar1Int f_scalar_ah_fneg = {
148
+
222
+ gen_vfp_ah_negh,
149
+ *phys_ptr = address;
223
+ gen_vfp_ah_negs,
150
+ for (n = 7; n >= 0; n--) {
224
+ gen_vfp_ah_negd,
151
+ base = env->cp15.c6_region[n];
225
+};
152
+ if ((base & 1) == 0) {
226
+TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg)
153
+ continue;
227
154
+ }
228
typedef struct FPScalar1 {
155
+ mask = 1 << ((base >> 1) & 0x1f);
229
void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr);
156
+ /* Keep this shift separate from the above to avoid an
157
+ (undefined) << 32. */
158
+ mask = (mask << 1) - 1;
159
+ if (((base ^ address) & ~mask) == 0) {
160
+ break;
161
+ }
162
+ }
163
+ if (n < 0) {
164
+ fi->type = ARMFault_Background;
165
+ return true;
166
+ }
167
+
168
+ if (access_type == MMU_INST_FETCH) {
169
+ mask = env->cp15.pmsav5_insn_ap;
170
+ } else {
171
+ mask = env->cp15.pmsav5_data_ap;
172
+ }
173
+ mask = (mask >> (n * 4)) & 0xf;
174
+ switch (mask) {
175
+ case 0:
176
+ fi->type = ARMFault_Permission;
177
+ fi->level = 1;
178
+ return true;
179
+ case 1:
180
+ if (is_user) {
181
+ fi->type = ARMFault_Permission;
182
+ fi->level = 1;
183
+ return true;
184
+ }
185
+ *prot = PAGE_READ | PAGE_WRITE;
186
+ break;
187
+ case 2:
188
+ *prot = PAGE_READ;
189
+ if (!is_user) {
190
+ *prot |= PAGE_WRITE;
191
+ }
192
+ break;
193
+ case 3:
194
+ *prot = PAGE_READ | PAGE_WRITE;
195
+ break;
196
+ case 5:
197
+ if (is_user) {
198
+ fi->type = ARMFault_Permission;
199
+ fi->level = 1;
200
+ return true;
201
+ }
202
+ *prot = PAGE_READ;
203
+ break;
204
+ case 6:
205
+ *prot = PAGE_READ;
206
+ break;
207
+ default:
208
+ /* Bad permission. */
209
+ fi->type = ARMFault_Permission;
210
+ fi->level = 1;
211
+ return true;
212
+ }
213
+ *prot |= PAGE_EXEC;
214
+ return false;
215
+}
216
+
217
/**
218
* get_phys_addr - get the physical address for this virtual address
219
*
220
--
230
--
221
2.25.1
231
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
FPCR.AH == 1 mandates that taking the absolute value of a NaN should
2
not change its sign bit. This means we can no longer use
3
gen_vfp_abs*() everywhere but must instead generate slightly more
4
complex code when FPCR.AH is set.
2
5
3
There are a handful of helpers for combine_cacheattrs
6
Implement these semantics for scalar FABS and FABD. This change also
4
that we can move at the same time as the main entry point.
7
affects all other instructions whose psuedocode calls FPAbs(); we
8
will extend the change to those instructions in following commits.
5
9
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220604040607.269301-15-richard.henderson@linaro.org
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
---
12
---
11
target/arm/ptw.h | 3 -
13
target/arm/tcg/translate-a64.c | 69 +++++++++++++++++++++++++++++++++-
12
target/arm/helper.c | 218 -------------------------------------------
14
1 file changed, 67 insertions(+), 2 deletions(-)
13
target/arm/ptw.c | 221 ++++++++++++++++++++++++++++++++++++++++++++
14
3 files changed, 221 insertions(+), 221 deletions(-)
15
15
16
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
16
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
17
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/ptw.h
18
--- a/target/arm/tcg/translate-a64.c
19
+++ b/target/arm/ptw.h
19
+++ b/target/arm/tcg/translate-a64.c
20
@@ -XXX,XX +XXX,XX @@ bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx);
20
@@ -XXX,XX +XXX,XX @@ static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s)
21
bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx);
21
s, chs_s);
22
uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn);
23
24
-ARMCacheAttrs combine_cacheattrs(CPUARMState *env,
25
- ARMCacheAttrs s1, ARMCacheAttrs s2);
26
-
27
int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx,
28
int ap, int domain_prot);
29
int simple_ap_to_rw_prot_is_user(int ap, bool is_user);
30
diff --git a/target/arm/helper.c b/target/arm/helper.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/helper.c
33
+++ b/target/arm/helper.c
34
@@ -XXX,XX +XXX,XX @@ static bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level,
35
}
36
return true;
37
}
22
}
38
-
39
-/* Translate from the 4-bit stage 2 representation of
40
- * memory attributes (without cache-allocation hints) to
41
- * the 8-bit representation of the stage 1 MAIR registers
42
- * (which includes allocation hints).
43
- *
44
- * ref: shared/translation/attrs/S2AttrDecode()
45
- * .../S2ConvertAttrsHints()
46
- */
47
-static uint8_t convert_stage2_attrs(CPUARMState *env, uint8_t s2attrs)
48
-{
49
- uint8_t hiattr = extract32(s2attrs, 2, 2);
50
- uint8_t loattr = extract32(s2attrs, 0, 2);
51
- uint8_t hihint = 0, lohint = 0;
52
-
53
- if (hiattr != 0) { /* normal memory */
54
- if (arm_hcr_el2_eff(env) & HCR_CD) { /* cache disabled */
55
- hiattr = loattr = 1; /* non-cacheable */
56
- } else {
57
- if (hiattr != 1) { /* Write-through or write-back */
58
- hihint = 3; /* RW allocate */
59
- }
60
- if (loattr != 1) { /* Write-through or write-back */
61
- lohint = 3; /* RW allocate */
62
- }
63
- }
64
- }
65
-
66
- return (hiattr << 6) | (hihint << 4) | (loattr << 2) | lohint;
67
-}
68
#endif /* !CONFIG_USER_ONLY */
69
70
/* This mapping is common between ID_AA64MMFR0.PARANGE and TCR_ELx.{I}PS. */
71
@@ -XXX,XX +XXX,XX @@ do_fault:
72
return true;
73
}
74
75
-/* Combine either inner or outer cacheability attributes for normal
76
- * memory, according to table D4-42 and pseudocode procedure
77
- * CombineS1S2AttrHints() of ARM DDI 0487B.b (the ARMv8 ARM).
78
- *
79
- * NB: only stage 1 includes allocation hints (RW bits), leading to
80
- * some asymmetry.
81
- */
82
-static uint8_t combine_cacheattr_nibble(uint8_t s1, uint8_t s2)
83
-{
84
- if (s1 == 4 || s2 == 4) {
85
- /* non-cacheable has precedence */
86
- return 4;
87
- } else if (extract32(s1, 2, 2) == 0 || extract32(s1, 2, 2) == 2) {
88
- /* stage 1 write-through takes precedence */
89
- return s1;
90
- } else if (extract32(s2, 2, 2) == 2) {
91
- /* stage 2 write-through takes precedence, but the allocation hint
92
- * is still taken from stage 1
93
- */
94
- return (2 << 2) | extract32(s1, 0, 2);
95
- } else { /* write-back */
96
- return s1;
97
- }
98
-}
99
-
100
-/*
101
- * Combine the memory type and cacheability attributes of
102
- * s1 and s2 for the HCR_EL2.FWB == 0 case, returning the
103
- * combined attributes in MAIR_EL1 format.
104
- */
105
-static uint8_t combined_attrs_nofwb(CPUARMState *env,
106
- ARMCacheAttrs s1, ARMCacheAttrs s2)
107
-{
108
- uint8_t s1lo, s2lo, s1hi, s2hi, s2_mair_attrs, ret_attrs;
109
-
110
- s2_mair_attrs = convert_stage2_attrs(env, s2.attrs);
111
-
112
- s1lo = extract32(s1.attrs, 0, 4);
113
- s2lo = extract32(s2_mair_attrs, 0, 4);
114
- s1hi = extract32(s1.attrs, 4, 4);
115
- s2hi = extract32(s2_mair_attrs, 4, 4);
116
-
117
- /* Combine memory type and cacheability attributes */
118
- if (s1hi == 0 || s2hi == 0) {
119
- /* Device has precedence over normal */
120
- if (s1lo == 0 || s2lo == 0) {
121
- /* nGnRnE has precedence over anything */
122
- ret_attrs = 0;
123
- } else if (s1lo == 4 || s2lo == 4) {
124
- /* non-Reordering has precedence over Reordering */
125
- ret_attrs = 4; /* nGnRE */
126
- } else if (s1lo == 8 || s2lo == 8) {
127
- /* non-Gathering has precedence over Gathering */
128
- ret_attrs = 8; /* nGRE */
129
- } else {
130
- ret_attrs = 0xc; /* GRE */
131
- }
132
- } else { /* Normal memory */
133
- /* Outer/inner cacheability combine independently */
134
- ret_attrs = combine_cacheattr_nibble(s1hi, s2hi) << 4
135
- | combine_cacheattr_nibble(s1lo, s2lo);
136
- }
137
- return ret_attrs;
138
-}
139
-
140
-static uint8_t force_cacheattr_nibble_wb(uint8_t attr)
141
-{
142
- /*
143
- * Given the 4 bits specifying the outer or inner cacheability
144
- * in MAIR format, return a value specifying Normal Write-Back,
145
- * with the allocation and transient hints taken from the input
146
- * if the input specified some kind of cacheable attribute.
147
- */
148
- if (attr == 0 || attr == 4) {
149
- /*
150
- * 0 == an UNPREDICTABLE encoding
151
- * 4 == Non-cacheable
152
- * Either way, force Write-Back RW allocate non-transient
153
- */
154
- return 0xf;
155
- }
156
- /* Change WriteThrough to WriteBack, keep allocation and transient hints */
157
- return attr | 4;
158
-}
159
-
160
-/*
161
- * Combine the memory type and cacheability attributes of
162
- * s1 and s2 for the HCR_EL2.FWB == 1 case, returning the
163
- * combined attributes in MAIR_EL1 format.
164
- */
165
-static uint8_t combined_attrs_fwb(CPUARMState *env,
166
- ARMCacheAttrs s1, ARMCacheAttrs s2)
167
-{
168
- switch (s2.attrs) {
169
- case 7:
170
- /* Use stage 1 attributes */
171
- return s1.attrs;
172
- case 6:
173
- /*
174
- * Force Normal Write-Back. Note that if S1 is Normal cacheable
175
- * then we take the allocation hints from it; otherwise it is
176
- * RW allocate, non-transient.
177
- */
178
- if ((s1.attrs & 0xf0) == 0) {
179
- /* S1 is Device */
180
- return 0xff;
181
- }
182
- /* Need to check the Inner and Outer nibbles separately */
183
- return force_cacheattr_nibble_wb(s1.attrs & 0xf) |
184
- force_cacheattr_nibble_wb(s1.attrs >> 4) << 4;
185
- case 5:
186
- /* If S1 attrs are Device, use them; otherwise Normal Non-cacheable */
187
- if ((s1.attrs & 0xf0) == 0) {
188
- return s1.attrs;
189
- }
190
- return 0x44;
191
- case 0 ... 3:
192
- /* Force Device, of subtype specified by S2 */
193
- return s2.attrs << 2;
194
- default:
195
- /*
196
- * RESERVED values (including RES0 descriptor bit [5] being nonzero);
197
- * arbitrarily force Device.
198
- */
199
- return 0;
200
- }
201
-}
202
-
203
-/* Combine S1 and S2 cacheability/shareability attributes, per D4.5.4
204
- * and CombineS1S2Desc()
205
- *
206
- * @env: CPUARMState
207
- * @s1: Attributes from stage 1 walk
208
- * @s2: Attributes from stage 2 walk
209
- */
210
-ARMCacheAttrs combine_cacheattrs(CPUARMState *env,
211
- ARMCacheAttrs s1, ARMCacheAttrs s2)
212
-{
213
- ARMCacheAttrs ret;
214
- bool tagged = false;
215
-
216
- assert(s2.is_s2_format && !s1.is_s2_format);
217
- ret.is_s2_format = false;
218
-
219
- if (s1.attrs == 0xf0) {
220
- tagged = true;
221
- s1.attrs = 0xff;
222
- }
223
-
224
- /* Combine shareability attributes (table D4-43) */
225
- if (s1.shareability == 2 || s2.shareability == 2) {
226
- /* if either are outer-shareable, the result is outer-shareable */
227
- ret.shareability = 2;
228
- } else if (s1.shareability == 3 || s2.shareability == 3) {
229
- /* if either are inner-shareable, the result is inner-shareable */
230
- ret.shareability = 3;
231
- } else {
232
- /* both non-shareable */
233
- ret.shareability = 0;
234
- }
235
-
236
- /* Combine memory type and cacheability attributes */
237
- if (arm_hcr_el2_eff(env) & HCR_FWB) {
238
- ret.attrs = combined_attrs_fwb(env, s1, s2);
239
- } else {
240
- ret.attrs = combined_attrs_nofwb(env, s1, s2);
241
- }
242
-
243
- /*
244
- * Any location for which the resultant memory type is any
245
- * type of Device memory is always treated as Outer Shareable.
246
- * Any location for which the resultant memory type is Normal
247
- * Inner Non-cacheable, Outer Non-cacheable is always treated
248
- * as Outer Shareable.
249
- * TODO: FEAT_XS adds another value (0x40) also meaning iNCoNC
250
- */
251
- if ((ret.attrs & 0xf0) == 0 || ret.attrs == 0x44) {
252
- ret.shareability = 2;
253
- }
254
-
255
- /* TODO: CombineS1S2Desc does not consider transient, only WB, RWA. */
256
- if (tagged && ret.attrs == 0xff) {
257
- ret.attrs = 0xf0;
258
- }
259
-
260
- return ret;
261
-}
262
-
263
hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr,
264
MemTxAttrs *attrs)
265
{
266
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
267
index XXXXXXX..XXXXXXX 100644
268
--- a/target/arm/ptw.c
269
+++ b/target/arm/ptw.c
270
@@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address,
271
return ret;
272
}
273
23
274
+/*
24
+/*
275
+ * Translate from the 4-bit stage 2 representation of
25
+ * These functions implement
276
+ * memory attributes (without cache-allocation hints) to
26
+ * d = floatN_is_any_nan(s) ? s : floatN_abs(s)
277
+ * the 8-bit representation of the stage 1 MAIR registers
27
+ * which for float32 is
278
+ * (which includes allocation hints).
28
+ * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31))
279
+ *
29
+ * and similarly for the other float sizes.
280
+ * ref: shared/translation/attrs/S2AttrDecode()
281
+ * .../S2ConvertAttrsHints()
282
+ */
30
+ */
283
+static uint8_t convert_stage2_attrs(CPUARMState *env, uint8_t s2attrs)
31
+static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s)
284
+{
32
+{
285
+ uint8_t hiattr = extract32(s2attrs, 2, 2);
33
+ TCGv_i32 abs_s = tcg_temp_new_i32();
286
+ uint8_t loattr = extract32(s2attrs, 0, 2);
287
+ uint8_t hihint = 0, lohint = 0;
288
+
34
+
289
+ if (hiattr != 0) { /* normal memory */
35
+ gen_vfp_absh(abs_s, s);
290
+ if (arm_hcr_el2_eff(env) & HCR_CD) { /* cache disabled */
36
+ tcg_gen_movcond_i32(TCG_COND_GTU, d,
291
+ hiattr = loattr = 1; /* non-cacheable */
37
+ abs_s, tcg_constant_i32(0x7c00),
292
+ } else {
38
+ s, abs_s);
293
+ if (hiattr != 1) { /* Write-through or write-back */
294
+ hihint = 3; /* RW allocate */
295
+ }
296
+ if (loattr != 1) { /* Write-through or write-back */
297
+ lohint = 3; /* RW allocate */
298
+ }
299
+ }
300
+ }
301
+
302
+ return (hiattr << 6) | (hihint << 4) | (loattr << 2) | lohint;
303
+}
39
+}
304
+
40
+
305
+/*
41
+static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s)
306
+ * Combine either inner or outer cacheability attributes for normal
307
+ * memory, according to table D4-42 and pseudocode procedure
308
+ * CombineS1S2AttrHints() of ARM DDI 0487B.b (the ARMv8 ARM).
309
+ *
310
+ * NB: only stage 1 includes allocation hints (RW bits), leading to
311
+ * some asymmetry.
312
+ */
313
+static uint8_t combine_cacheattr_nibble(uint8_t s1, uint8_t s2)
314
+{
42
+{
315
+ if (s1 == 4 || s2 == 4) {
43
+ TCGv_i32 abs_s = tcg_temp_new_i32();
316
+ /* non-cacheable has precedence */
44
+
317
+ return 4;
45
+ gen_vfp_abss(abs_s, s);
318
+ } else if (extract32(s1, 2, 2) == 0 || extract32(s1, 2, 2) == 2) {
46
+ tcg_gen_movcond_i32(TCG_COND_GTU, d,
319
+ /* stage 1 write-through takes precedence */
47
+ abs_s, tcg_constant_i32(0x7f800000UL),
320
+ return s1;
48
+ s, abs_s);
321
+ } else if (extract32(s2, 2, 2) == 2) {
322
+ /* stage 2 write-through takes precedence, but the allocation hint
323
+ * is still taken from stage 1
324
+ */
325
+ return (2 << 2) | extract32(s1, 0, 2);
326
+ } else { /* write-back */
327
+ return s1;
328
+ }
329
+}
49
+}
330
+
50
+
331
+/*
51
+static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s)
332
+ * Combine the memory type and cacheability attributes of
333
+ * s1 and s2 for the HCR_EL2.FWB == 0 case, returning the
334
+ * combined attributes in MAIR_EL1 format.
335
+ */
336
+static uint8_t combined_attrs_nofwb(CPUARMState *env,
337
+ ARMCacheAttrs s1, ARMCacheAttrs s2)
338
+{
52
+{
339
+ uint8_t s1lo, s2lo, s1hi, s2hi, s2_mair_attrs, ret_attrs;
53
+ TCGv_i64 abs_s = tcg_temp_new_i64();
340
+
54
+
341
+ s2_mair_attrs = convert_stage2_attrs(env, s2.attrs);
55
+ gen_vfp_absd(abs_s, s);
342
+
56
+ tcg_gen_movcond_i64(TCG_COND_GTU, d,
343
+ s1lo = extract32(s1.attrs, 0, 4);
57
+ abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
344
+ s2lo = extract32(s2_mair_attrs, 0, 4);
58
+ s, abs_s);
345
+ s1hi = extract32(s1.attrs, 4, 4);
346
+ s2hi = extract32(s2_mair_attrs, 4, 4);
347
+
348
+ /* Combine memory type and cacheability attributes */
349
+ if (s1hi == 0 || s2hi == 0) {
350
+ /* Device has precedence over normal */
351
+ if (s1lo == 0 || s2lo == 0) {
352
+ /* nGnRnE has precedence over anything */
353
+ ret_attrs = 0;
354
+ } else if (s1lo == 4 || s2lo == 4) {
355
+ /* non-Reordering has precedence over Reordering */
356
+ ret_attrs = 4; /* nGnRE */
357
+ } else if (s1lo == 8 || s2lo == 8) {
358
+ /* non-Gathering has precedence over Gathering */
359
+ ret_attrs = 8; /* nGRE */
360
+ } else {
361
+ ret_attrs = 0xc; /* GRE */
362
+ }
363
+ } else { /* Normal memory */
364
+ /* Outer/inner cacheability combine independently */
365
+ ret_attrs = combine_cacheattr_nibble(s1hi, s2hi) << 4
366
+ | combine_cacheattr_nibble(s1lo, s2lo);
367
+ }
368
+ return ret_attrs;
369
+}
59
+}
370
+
60
+
371
+static uint8_t force_cacheattr_nibble_wb(uint8_t attr)
61
static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
62
{
63
if (dc->fpcr_ah) {
64
@@ -XXX,XX +XXX,XX @@ static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
65
gen_vfp_absd(d, d);
66
}
67
68
+static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
372
+{
69
+{
373
+ /*
70
+ gen_helper_vfp_subh(d, n, m, s);
374
+ * Given the 4 bits specifying the outer or inner cacheability
71
+ gen_vfp_ah_absh(d, d);
375
+ * in MAIR format, return a value specifying Normal Write-Back,
376
+ * with the allocation and transient hints taken from the input
377
+ * if the input specified some kind of cacheable attribute.
378
+ */
379
+ if (attr == 0 || attr == 4) {
380
+ /*
381
+ * 0 == an UNPREDICTABLE encoding
382
+ * 4 == Non-cacheable
383
+ * Either way, force Write-Back RW allocate non-transient
384
+ */
385
+ return 0xf;
386
+ }
387
+ /* Change WriteThrough to WriteBack, keep allocation and transient hints */
388
+ return attr | 4;
389
+}
72
+}
390
+
73
+
391
+/*
74
+static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
392
+ * Combine the memory type and cacheability attributes of
393
+ * s1 and s2 for the HCR_EL2.FWB == 1 case, returning the
394
+ * combined attributes in MAIR_EL1 format.
395
+ */
396
+static uint8_t combined_attrs_fwb(CPUARMState *env,
397
+ ARMCacheAttrs s1, ARMCacheAttrs s2)
398
+{
75
+{
399
+ switch (s2.attrs) {
76
+ gen_helper_vfp_subs(d, n, m, s);
400
+ case 7:
77
+ gen_vfp_ah_abss(d, d);
401
+ /* Use stage 1 attributes */
402
+ return s1.attrs;
403
+ case 6:
404
+ /*
405
+ * Force Normal Write-Back. Note that if S1 is Normal cacheable
406
+ * then we take the allocation hints from it; otherwise it is
407
+ * RW allocate, non-transient.
408
+ */
409
+ if ((s1.attrs & 0xf0) == 0) {
410
+ /* S1 is Device */
411
+ return 0xff;
412
+ }
413
+ /* Need to check the Inner and Outer nibbles separately */
414
+ return force_cacheattr_nibble_wb(s1.attrs & 0xf) |
415
+ force_cacheattr_nibble_wb(s1.attrs >> 4) << 4;
416
+ case 5:
417
+ /* If S1 attrs are Device, use them; otherwise Normal Non-cacheable */
418
+ if ((s1.attrs & 0xf0) == 0) {
419
+ return s1.attrs;
420
+ }
421
+ return 0x44;
422
+ case 0 ... 3:
423
+ /* Force Device, of subtype specified by S2 */
424
+ return s2.attrs << 2;
425
+ default:
426
+ /*
427
+ * RESERVED values (including RES0 descriptor bit [5] being nonzero);
428
+ * arbitrarily force Device.
429
+ */
430
+ return 0;
431
+ }
432
+}
78
+}
433
+
79
+
434
+/*
80
+static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
435
+ * Combine S1 and S2 cacheability/shareability attributes, per D4.5.4
436
+ * and CombineS1S2Desc()
437
+ *
438
+ * @env: CPUARMState
439
+ * @s1: Attributes from stage 1 walk
440
+ * @s2: Attributes from stage 2 walk
441
+ */
442
+static ARMCacheAttrs combine_cacheattrs(CPUARMState *env,
443
+ ARMCacheAttrs s1, ARMCacheAttrs s2)
444
+{
81
+{
445
+ ARMCacheAttrs ret;
82
+ gen_helper_vfp_subd(d, n, m, s);
446
+ bool tagged = false;
83
+ gen_vfp_ah_absd(d, d);
447
+
448
+ assert(s2.is_s2_format && !s1.is_s2_format);
449
+ ret.is_s2_format = false;
450
+
451
+ if (s1.attrs == 0xf0) {
452
+ tagged = true;
453
+ s1.attrs = 0xff;
454
+ }
455
+
456
+ /* Combine shareability attributes (table D4-43) */
457
+ if (s1.shareability == 2 || s2.shareability == 2) {
458
+ /* if either are outer-shareable, the result is outer-shareable */
459
+ ret.shareability = 2;
460
+ } else if (s1.shareability == 3 || s2.shareability == 3) {
461
+ /* if either are inner-shareable, the result is inner-shareable */
462
+ ret.shareability = 3;
463
+ } else {
464
+ /* both non-shareable */
465
+ ret.shareability = 0;
466
+ }
467
+
468
+ /* Combine memory type and cacheability attributes */
469
+ if (arm_hcr_el2_eff(env) & HCR_FWB) {
470
+ ret.attrs = combined_attrs_fwb(env, s1, s2);
471
+ } else {
472
+ ret.attrs = combined_attrs_nofwb(env, s1, s2);
473
+ }
474
+
475
+ /*
476
+ * Any location for which the resultant memory type is any
477
+ * type of Device memory is always treated as Outer Shareable.
478
+ * Any location for which the resultant memory type is Normal
479
+ * Inner Non-cacheable, Outer Non-cacheable is always treated
480
+ * as Outer Shareable.
481
+ * TODO: FEAT_XS adds another value (0x40) also meaning iNCoNC
482
+ */
483
+ if ((ret.attrs & 0xf0) == 0 || ret.attrs == 0x44) {
484
+ ret.shareability = 2;
485
+ }
486
+
487
+ /* TODO: CombineS1S2Desc does not consider transient, only WB, RWA. */
488
+ if (tagged && ret.attrs == 0xff) {
489
+ ret.attrs = 0xf0;
490
+ }
491
+
492
+ return ret;
493
+}
84
+}
494
+
85
+
495
/**
86
static const FPScalar f_scalar_fabd = {
496
* get_phys_addr - get the physical address for this virtual address
87
gen_fabd_h,
497
*
88
gen_fabd_s,
89
gen_fabd_d,
90
};
91
-TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd, a->rn)
92
+static const FPScalar f_scalar_ah_fabd = {
93
+ gen_fabd_ah_h,
94
+ gen_fabd_ah_s,
95
+ gen_fabd_ah_d,
96
+};
97
+TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn)
98
99
static const FPScalar f_scalar_frecps = {
100
gen_helper_recpsf_f16,
101
@@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fabs = {
102
gen_vfp_abss,
103
gen_vfp_absd,
104
};
105
-TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs, true)
106
+static const FPScalar1Int f_scalar_ah_fabs = {
107
+ gen_vfp_ah_absh,
108
+ gen_vfp_ah_abss,
109
+ gen_vfp_ah_absd,
110
+};
111
+TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs)
112
113
static const FPScalar1Int f_scalar_fneg = {
114
gen_vfp_negh,
498
--
115
--
499
2.25.1
116
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Split the handling of vector FABD so that it calls a different set
2
of helpers when FPCR.AH is 1, which implement the "no negation of
3
the sign of a NaN" semantics.
2
4
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220604040607.269301-22-richard.henderson@linaro.org
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
target/arm/ptw.h | 3 ---
8
target/arm/helper.h | 4 ++++
9
target/arm/helper.c | 64 ---------------------------------------------
9
target/arm/tcg/translate-a64.c | 7 ++++++-
10
target/arm/ptw.c | 64 +++++++++++++++++++++++++++++++++++++++++++++
10
target/arm/tcg/vec_helper.c | 23 +++++++++++++++++++++++
11
3 files changed, 64 insertions(+), 67 deletions(-)
11
3 files changed, 33 insertions(+), 1 deletion(-)
12
12
13
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
13
diff --git a/target/arm/helper.h b/target/arm/helper.h
14
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/ptw.h
15
--- a/target/arm/helper.h
16
+++ b/target/arm/ptw.h
16
+++ b/target/arm/helper.h
17
@@ -XXX,XX +XXX,XX @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap)
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
18
return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx));
18
DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
19
DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
20
21
+DEF_HELPER_FLAGS_5(gvec_ah_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
22
+DEF_HELPER_FLAGS_5(gvec_ah_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
23
+DEF_HELPER_FLAGS_5(gvec_ah_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
24
+
25
DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
26
DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
27
DEF_HELPER_FLAGS_5(gvec_fceq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
28
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/tcg/translate-a64.c
31
+++ b/target/arm/tcg/translate-a64.c
32
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
33
gen_helper_gvec_fabd_s,
34
gen_helper_gvec_fabd_d,
35
};
36
-TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd)
37
+static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = {
38
+ gen_helper_gvec_ah_fabd_h,
39
+ gen_helper_gvec_ah_fabd_s,
40
+ gen_helper_gvec_ah_fabd_d,
41
+};
42
+TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd)
43
44
static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
45
gen_helper_gvec_recps_h,
46
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/tcg/vec_helper.c
49
+++ b/target/arm/tcg/vec_helper.c
50
@@ -XXX,XX +XXX,XX @@ static float64 float64_abd(float64 op1, float64 op2, float_status *stat)
51
return float64_abs(float64_sub(op1, op2, stat));
19
}
52
}
20
53
21
-ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
54
+/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */
22
- ARMMMUIdx mmu_idx);
55
+static float16 float16_ah_abd(float16 op1, float16 op2, float_status *stat)
23
-
24
#endif /* !CONFIG_USER_ONLY */
25
#endif /* TARGET_ARM_PTW_H */
26
diff --git a/target/arm/helper.c b/target/arm/helper.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/arm/helper.c
29
+++ b/target/arm/helper.c
30
@@ -XXX,XX +XXX,XX @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
31
}
32
33
#ifndef CONFIG_USER_ONLY
34
-ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
35
- ARMMMUIdx mmu_idx)
36
-{
37
- uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr;
38
- uint32_t el = regime_el(env, mmu_idx);
39
- int select, tsz;
40
- bool epd, hpd;
41
-
42
- assert(mmu_idx != ARMMMUIdx_Stage2_S);
43
-
44
- if (mmu_idx == ARMMMUIdx_Stage2) {
45
- /* VTCR */
46
- bool sext = extract32(tcr, 4, 1);
47
- bool sign = extract32(tcr, 3, 1);
48
-
49
- /*
50
- * If the sign-extend bit is not the same as t0sz[3], the result
51
- * is unpredictable. Flag this as a guest error.
52
- */
53
- if (sign != sext) {
54
- qemu_log_mask(LOG_GUEST_ERROR,
55
- "AArch32: VTCR.S / VTCR.T0SZ[3] mismatch\n");
56
- }
57
- tsz = sextract32(tcr, 0, 4) + 8;
58
- select = 0;
59
- hpd = false;
60
- epd = false;
61
- } else if (el == 2) {
62
- /* HTCR */
63
- tsz = extract32(tcr, 0, 3);
64
- select = 0;
65
- hpd = extract64(tcr, 24, 1);
66
- epd = false;
67
- } else {
68
- int t0sz = extract32(tcr, 0, 3);
69
- int t1sz = extract32(tcr, 16, 3);
70
-
71
- if (t1sz == 0) {
72
- select = va > (0xffffffffu >> t0sz);
73
- } else {
74
- /* Note that we will detect errors later. */
75
- select = va >= ~(0xffffffffu >> t1sz);
76
- }
77
- if (!select) {
78
- tsz = t0sz;
79
- epd = extract32(tcr, 7, 1);
80
- hpd = extract64(tcr, 41, 1);
81
- } else {
82
- tsz = t1sz;
83
- epd = extract32(tcr, 23, 1);
84
- hpd = extract64(tcr, 42, 1);
85
- }
86
- /* For aarch32, hpd0 is not enabled without t2e as well. */
87
- hpd &= extract32(tcr, 6, 1);
88
- }
89
-
90
- return (ARMVAParameters) {
91
- .tsz = tsz,
92
- .select = select,
93
- .epd = epd,
94
- .hpd = hpd,
95
- };
96
-}
97
-
98
hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr,
99
MemTxAttrs *attrs)
100
{
101
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
102
index XXXXXXX..XXXXXXX 100644
103
--- a/target/arm/ptw.c
104
+++ b/target/arm/ptw.c
105
@@ -XXX,XX +XXX,XX @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64,
106
return prot_rw | PAGE_EXEC;
107
}
108
109
+static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
110
+ ARMMMUIdx mmu_idx)
111
+{
56
+{
112
+ uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr;
57
+ float16 r = float16_sub(op1, op2, stat);
113
+ uint32_t el = regime_el(env, mmu_idx);
58
+ return float16_is_any_nan(r) ? r : float16_abs(r);
114
+ int select, tsz;
59
+}
115
+ bool epd, hpd;
116
+
60
+
117
+ assert(mmu_idx != ARMMMUIdx_Stage2_S);
61
+static float32 float32_ah_abd(float32 op1, float32 op2, float_status *stat)
62
+{
63
+ float32 r = float32_sub(op1, op2, stat);
64
+ return float32_is_any_nan(r) ? r : float32_abs(r);
65
+}
118
+
66
+
119
+ if (mmu_idx == ARMMMUIdx_Stage2) {
67
+static float64 float64_ah_abd(float64 op1, float64 op2, float_status *stat)
120
+ /* VTCR */
68
+{
121
+ bool sext = extract32(tcr, 4, 1);
69
+ float64 r = float64_sub(op1, op2, stat);
122
+ bool sign = extract32(tcr, 3, 1);
70
+ return float64_is_any_nan(r) ? r : float64_abs(r);
123
+
124
+ /*
125
+ * If the sign-extend bit is not the same as t0sz[3], the result
126
+ * is unpredictable. Flag this as a guest error.
127
+ */
128
+ if (sign != sext) {
129
+ qemu_log_mask(LOG_GUEST_ERROR,
130
+ "AArch32: VTCR.S / VTCR.T0SZ[3] mismatch\n");
131
+ }
132
+ tsz = sextract32(tcr, 0, 4) + 8;
133
+ select = 0;
134
+ hpd = false;
135
+ epd = false;
136
+ } else if (el == 2) {
137
+ /* HTCR */
138
+ tsz = extract32(tcr, 0, 3);
139
+ select = 0;
140
+ hpd = extract64(tcr, 24, 1);
141
+ epd = false;
142
+ } else {
143
+ int t0sz = extract32(tcr, 0, 3);
144
+ int t1sz = extract32(tcr, 16, 3);
145
+
146
+ if (t1sz == 0) {
147
+ select = va > (0xffffffffu >> t0sz);
148
+ } else {
149
+ /* Note that we will detect errors later. */
150
+ select = va >= ~(0xffffffffu >> t1sz);
151
+ }
152
+ if (!select) {
153
+ tsz = t0sz;
154
+ epd = extract32(tcr, 7, 1);
155
+ hpd = extract64(tcr, 41, 1);
156
+ } else {
157
+ tsz = t1sz;
158
+ epd = extract32(tcr, 23, 1);
159
+ hpd = extract64(tcr, 42, 1);
160
+ }
161
+ /* For aarch32, hpd0 is not enabled without t2e as well. */
162
+ hpd &= extract32(tcr, 6, 1);
163
+ }
164
+
165
+ return (ARMVAParameters) {
166
+ .tsz = tsz,
167
+ .select = select,
168
+ .epd = epd,
169
+ .hpd = hpd,
170
+ };
171
+}
71
+}
172
+
72
+
173
/*
73
/*
174
* check_s2_mmu_setup
74
* Reciprocal step. These are the AArch32 version which uses a
175
* @cpu: ARMCPU
75
* non-fused multiply-and-subtract.
76
@@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_fabd_h, float16_abd, float16)
77
DO_3OP(gvec_fabd_s, float32_abd, float32)
78
DO_3OP(gvec_fabd_d, float64_abd, float64)
79
80
+DO_3OP(gvec_ah_fabd_h, float16_ah_abd, float16)
81
+DO_3OP(gvec_ah_fabd_s, float32_ah_abd, float32)
82
+DO_3OP(gvec_ah_fabd_d, float64_ah_abd, float64)
83
+
84
DO_3OP(gvec_fceq_h, float16_ceq, float16)
85
DO_3OP(gvec_fceq_s, float32_ceq, float32)
86
DO_3OP(gvec_fceq_d, float64_ceq, float64)
176
--
87
--
177
2.25.1
88
2.34.1
diff view generated by jsdifflib
New patch
1
Make SVE FNEG honour the FPCR.AH "don't negate the sign of a NaN"
2
semantics.
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/arm/tcg/helper-sve.h | 4 ++++
8
target/arm/tcg/sve_helper.c | 8 ++++++++
9
target/arm/tcg/translate-sve.c | 7 ++++++-
10
3 files changed, 18 insertions(+), 1 deletion(-)
11
12
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/tcg/helper-sve.h
15
+++ b/target/arm/tcg/helper-sve.h
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
17
DEF_HELPER_FLAGS_4(sve_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
18
DEF_HELPER_FLAGS_4(sve_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
19
20
+DEF_HELPER_FLAGS_4(sve_ah_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
21
+DEF_HELPER_FLAGS_4(sve_ah_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
22
+DEF_HELPER_FLAGS_4(sve_ah_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
23
+
24
DEF_HELPER_FLAGS_4(sve_not_zpz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
25
DEF_HELPER_FLAGS_4(sve_not_zpz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
26
DEF_HELPER_FLAGS_4(sve_not_zpz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
27
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/tcg/sve_helper.c
30
+++ b/target/arm/tcg/sve_helper.c
31
@@ -XXX,XX +XXX,XX @@ DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG)
32
DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG)
33
DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG)
34
35
+#define DO_AH_FNEG_H(N) (float16_is_any_nan(N) ? (N) : DO_FNEG(N))
36
+#define DO_AH_FNEG_S(N) (float32_is_any_nan(N) ? (N) : DO_FNEG(N))
37
+#define DO_AH_FNEG_D(N) (float64_is_any_nan(N) ? (N) : DO_FNEG(N))
38
+
39
+DO_ZPZ(sve_ah_fneg_h, uint16_t, H1_2, DO_AH_FNEG_H)
40
+DO_ZPZ(sve_ah_fneg_s, uint32_t, H1_4, DO_AH_FNEG_S)
41
+DO_ZPZ_D(sve_ah_fneg_d, uint64_t, DO_AH_FNEG_D)
42
+
43
#define DO_NOT(N) (~N)
44
45
DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT)
46
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/tcg/translate-sve.c
49
+++ b/target/arm/tcg/translate-sve.c
50
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const fneg_fns[4] = {
51
NULL, gen_helper_sve_fneg_h,
52
gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
53
};
54
-TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0)
55
+static gen_helper_gvec_3 * const fneg_ah_fns[4] = {
56
+ NULL, gen_helper_sve_ah_fneg_h,
57
+ gen_helper_sve_ah_fneg_s, gen_helper_sve_ah_fneg_d,
58
+};
59
+TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz,
60
+ s->fpcr_ah ? fneg_ah_fns[a->esz] : fneg_fns[a->esz], a, 0)
61
62
static gen_helper_gvec_3 * const sxtb_fns[4] = {
63
NULL, gen_helper_sve_sxtb_h,
64
--
65
2.34.1
diff view generated by jsdifflib
New patch
1
Make SVE FABS honour the FPCR.AH "don't negate the sign of a NaN"
2
semantics.
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/arm/tcg/helper-sve.h | 4 ++++
8
target/arm/tcg/sve_helper.c | 8 ++++++++
9
target/arm/tcg/translate-sve.c | 7 ++++++-
10
3 files changed, 18 insertions(+), 1 deletion(-)
11
12
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/tcg/helper-sve.h
15
+++ b/target/arm/tcg/helper-sve.h
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
17
DEF_HELPER_FLAGS_4(sve_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
18
DEF_HELPER_FLAGS_4(sve_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
19
20
+DEF_HELPER_FLAGS_4(sve_ah_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
21
+DEF_HELPER_FLAGS_4(sve_ah_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
22
+DEF_HELPER_FLAGS_4(sve_ah_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
23
+
24
DEF_HELPER_FLAGS_4(sve_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
25
DEF_HELPER_FLAGS_4(sve_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
26
DEF_HELPER_FLAGS_4(sve_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
27
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/tcg/sve_helper.c
30
+++ b/target/arm/tcg/sve_helper.c
31
@@ -XXX,XX +XXX,XX @@ DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS)
32
DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS)
33
DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS)
34
35
+#define DO_AH_FABS_H(N) (float16_is_any_nan(N) ? (N) : DO_FABS(N))
36
+#define DO_AH_FABS_S(N) (float32_is_any_nan(N) ? (N) : DO_FABS(N))
37
+#define DO_AH_FABS_D(N) (float64_is_any_nan(N) ? (N) : DO_FABS(N))
38
+
39
+DO_ZPZ(sve_ah_fabs_h, uint16_t, H1_2, DO_AH_FABS_H)
40
+DO_ZPZ(sve_ah_fabs_s, uint32_t, H1_4, DO_AH_FABS_S)
41
+DO_ZPZ_D(sve_ah_fabs_d, uint64_t, DO_AH_FABS_D)
42
+
43
#define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1))
44
45
DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG)
46
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/tcg/translate-sve.c
49
+++ b/target/arm/tcg/translate-sve.c
50
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const fabs_fns[4] = {
51
NULL, gen_helper_sve_fabs_h,
52
gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
53
};
54
-TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0)
55
+static gen_helper_gvec_3 * const fabs_ah_fns[4] = {
56
+ NULL, gen_helper_sve_ah_fabs_h,
57
+ gen_helper_sve_ah_fabs_s, gen_helper_sve_ah_fabs_d,
58
+};
59
+TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz,
60
+ s->fpcr_ah ? fabs_ah_fns[a->esz] : fabs_fns[a->esz], a, 0)
61
62
static gen_helper_gvec_3 * const fneg_fns[4] = {
63
NULL, gen_helper_sve_fneg_h,
64
--
65
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Make the SVE FABD insn honour the FPCR.AH "don't negate the sign
2
of a NaN" semantics.
2
3
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220604040607.269301-28-richard.henderson@linaro.org
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
target/arm/helper.c | 32 --------------------------------
7
target/arm/tcg/helper-sve.h | 7 +++++++
9
target/arm/ptw.c | 28 ++++++++++++++++++++++++++++
8
target/arm/tcg/sve_helper.c | 22 ++++++++++++++++++++++
10
2 files changed, 28 insertions(+), 32 deletions(-)
9
target/arm/tcg/translate-sve.c | 2 +-
10
3 files changed, 30 insertions(+), 1 deletion(-)
11
11
12
diff --git a/target/arm/helper.c b/target/arm/helper.c
12
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/helper.c
14
--- a/target/arm/tcg/helper-sve.h
15
+++ b/target/arm/helper.c
15
+++ b/target/arm/tcg/helper-sve.h
16
@@ -XXX,XX +XXX,XX @@ uint64_t arm_sctlr(CPUARMState *env, int el)
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fabd_s, TCG_CALL_NO_RWG,
17
return env->cp15.sctlr_el[el];
17
DEF_HELPER_FLAGS_6(sve_fabd_d, TCG_CALL_NO_RWG,
18
void, ptr, ptr, ptr, ptr, fpst, i32)
19
20
+DEF_HELPER_FLAGS_6(sve_ah_fabd_h, TCG_CALL_NO_RWG,
21
+ void, ptr, ptr, ptr, ptr, fpst, i32)
22
+DEF_HELPER_FLAGS_6(sve_ah_fabd_s, TCG_CALL_NO_RWG,
23
+ void, ptr, ptr, ptr, ptr, fpst, i32)
24
+DEF_HELPER_FLAGS_6(sve_ah_fabd_d, TCG_CALL_NO_RWG,
25
+ void, ptr, ptr, ptr, ptr, fpst, i32)
26
+
27
DEF_HELPER_FLAGS_6(sve_fscalbn_h, TCG_CALL_NO_RWG,
28
void, ptr, ptr, ptr, ptr, fpst, i32)
29
DEF_HELPER_FLAGS_6(sve_fscalbn_s, TCG_CALL_NO_RWG,
30
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/tcg/sve_helper.c
33
+++ b/target/arm/tcg/sve_helper.c
34
@@ -XXX,XX +XXX,XX @@ static inline float64 abd_d(float64 a, float64 b, float_status *s)
35
return float64_abs(float64_sub(a, b, s));
18
}
36
}
19
37
20
-#ifndef CONFIG_USER_ONLY
38
+/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */
21
-/* Convert a possible stage1+2 MMU index into the appropriate
39
+static float16 ah_abd_h(float16 op1, float16 op2, float_status *stat)
22
- * stage 1 MMU index
23
- */
24
-ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx)
25
-{
26
- switch (mmu_idx) {
27
- case ARMMMUIdx_SE10_0:
28
- return ARMMMUIdx_Stage1_SE0;
29
- case ARMMMUIdx_SE10_1:
30
- return ARMMMUIdx_Stage1_SE1;
31
- case ARMMMUIdx_SE10_1_PAN:
32
- return ARMMMUIdx_Stage1_SE1_PAN;
33
- case ARMMMUIdx_E10_0:
34
- return ARMMMUIdx_Stage1_E0;
35
- case ARMMMUIdx_E10_1:
36
- return ARMMMUIdx_Stage1_E1;
37
- case ARMMMUIdx_E10_1_PAN:
38
- return ARMMMUIdx_Stage1_E1_PAN;
39
- default:
40
- return mmu_idx;
41
- }
42
-}
43
-#endif /* !CONFIG_USER_ONLY */
44
-
45
int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx)
46
{
47
if (regime_has_2_ranges(mmu_idx)) {
48
@@ -XXX,XX +XXX,XX @@ ARMMMUIdx arm_mmu_idx(CPUARMState *env)
49
return arm_mmu_idx_el(env, arm_current_el(env));
50
}
51
52
-#ifndef CONFIG_USER_ONLY
53
-ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env)
54
-{
55
- return stage_1_mmu_idx(arm_mmu_idx(env));
56
-}
57
-#endif
58
-
59
static CPUARMTBFlags rebuild_hflags_common(CPUARMState *env, int fp_el,
60
ARMMMUIdx mmu_idx,
61
CPUARMTBFlags flags)
62
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/arm/ptw.c
65
+++ b/target/arm/ptw.c
66
@@ -XXX,XX +XXX,XX @@ unsigned int arm_pamax(ARMCPU *cpu)
67
return pamax_map[parange];
68
}
69
70
+/*
71
+ * Convert a possible stage1+2 MMU index into the appropriate stage 1 MMU index
72
+ */
73
+ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx)
74
+{
40
+{
75
+ switch (mmu_idx) {
41
+ float16 r = float16_sub(op1, op2, stat);
76
+ case ARMMMUIdx_SE10_0:
42
+ return float16_is_any_nan(r) ? r : float16_abs(r);
77
+ return ARMMMUIdx_Stage1_SE0;
78
+ case ARMMMUIdx_SE10_1:
79
+ return ARMMMUIdx_Stage1_SE1;
80
+ case ARMMMUIdx_SE10_1_PAN:
81
+ return ARMMMUIdx_Stage1_SE1_PAN;
82
+ case ARMMMUIdx_E10_0:
83
+ return ARMMMUIdx_Stage1_E0;
84
+ case ARMMMUIdx_E10_1:
85
+ return ARMMMUIdx_Stage1_E1;
86
+ case ARMMMUIdx_E10_1_PAN:
87
+ return ARMMMUIdx_Stage1_E1_PAN;
88
+ default:
89
+ return mmu_idx;
90
+ }
91
+}
43
+}
92
+
44
+
93
+ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env)
45
+static float32 ah_abd_s(float32 op1, float32 op2, float_status *stat)
94
+{
46
+{
95
+ return stage_1_mmu_idx(arm_mmu_idx(env));
47
+ float32 r = float32_sub(op1, op2, stat);
48
+ return float32_is_any_nan(r) ? r : float32_abs(r);
96
+}
49
+}
97
+
50
+
98
static bool regime_translation_big_endian(CPUARMState *env, ARMMMUIdx mmu_idx)
51
+static float64 ah_abd_d(float64 op1, float64 op2, float_status *stat)
52
+{
53
+ float64 r = float64_sub(op1, op2, stat);
54
+ return float64_is_any_nan(r) ? r : float64_abs(r);
55
+}
56
+
57
DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h)
58
DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s)
59
DO_ZPZZ_FP(sve_fabd_d, uint64_t, H1_8, abd_d)
60
+DO_ZPZZ_FP(sve_ah_fabd_h, uint16_t, H1_2, ah_abd_h)
61
+DO_ZPZZ_FP(sve_ah_fabd_s, uint32_t, H1_4, ah_abd_s)
62
+DO_ZPZZ_FP(sve_ah_fabd_d, uint64_t, H1_8, ah_abd_d)
63
64
static inline float64 scalbn_d(float64 a, int64_t b, float_status *s)
99
{
65
{
100
return (regime_sctlr(env, mmu_idx) & SCTLR_EE) != 0;
66
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/target/arm/tcg/translate-sve.c
69
+++ b/target/arm/tcg/translate-sve.c
70
@@ -XXX,XX +XXX,XX @@ DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin)
71
DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax)
72
DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum)
73
DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
74
-DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd)
75
+DO_ZPZZ_AH_FP(FABD, aa64_sve, sve_fabd, sve_ah_fabd)
76
DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn)
77
DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv)
78
DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx)
101
--
79
--
102
2.25.1
80
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
The negation steps in FCADD must honour FPCR.AH's "don't change the
2
sign of a NaN" semantics. Implement this in the same way we did for
3
the base ASIMD FCADD, by encoding FPCR.AH into the SIMD data field
4
passed to the helper and using that to decide whether to negate the
5
values.
2
6
3
We will need this over in sme_helper.c.
7
The construction of neg_imag and neg_real were done to make it easy
8
to apply both in parallel with two simple logical operations. This
9
changed with FPCR.AH, which is more complex than that. Switch to
10
an approach that follows the pseudocode more closely, by extracting
11
the 'rot=1' parameter from the SIMD data field and changing the
12
sign of the appropriate input value.
4
13
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
14
Note that there was a naming issue with neg_imag and neg_real.
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
They were named backward, with neg_imag being non-zero for rot=1,
7
Message-id: 20220607203306.657998-19-richard.henderson@linaro.org
16
and vice versa. This was combined with reversed usage within the
17
loop, so that the negation in the end turned out correct.
18
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
19
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
20
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
---
21
---
10
target/arm/vec_internal.h | 13 +++++++++++++
22
target/arm/tcg/vec_internal.h | 17 ++++++++++++++
11
target/arm/vec_helper.c | 2 +-
23
target/arm/tcg/sve_helper.c | 42 ++++++++++++++++++++++++----------
12
2 files changed, 14 insertions(+), 1 deletion(-)
24
target/arm/tcg/translate-sve.c | 2 +-
25
3 files changed, 48 insertions(+), 13 deletions(-)
13
26
14
diff --git a/target/arm/vec_internal.h b/target/arm/vec_internal.h
27
diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h
15
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/vec_internal.h
29
--- a/target/arm/tcg/vec_internal.h
17
+++ b/target/arm/vec_internal.h
30
+++ b/target/arm/tcg/vec_internal.h
18
@@ -XXX,XX +XXX,XX @@ uint64_t pmull_h(uint64_t op1, uint64_t op2);
31
@@ -XXX,XX +XXX,XX @@
32
#ifndef TARGET_ARM_VEC_INTERNAL_H
33
#define TARGET_ARM_VEC_INTERNAL_H
34
35
+#include "fpu/softfloat.h"
36
+
37
/*
38
* Note that vector data is stored in host-endian 64-bit chunks,
39
* so addressing units smaller than that needs a host-endian fixup.
40
@@ -XXX,XX +XXX,XX @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2,
19
*/
41
*/
20
uint64_t pmull_w(uint64_t op1, uint64_t op2);
42
bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp);
21
43
22
+/**
44
+static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah)
23
+ * bfdotadd:
45
+{
24
+ * @sum: addend
46
+ return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a);
25
+ * @e1, @e2: multiplicand vectors
47
+}
26
+ *
48
+
27
+ * BFloat16 2-way dot product of @e1 & @e2, accumulating with @sum.
49
+static inline float32 float32_maybe_ah_chs(float32 a, bool fpcr_ah)
28
+ * The @e1 and @e2 operands correspond to the 32-bit source vector
50
+{
29
+ * slots and contain two Bfloat16 values each.
51
+ return fpcr_ah && float32_is_any_nan(a) ? a : float32_chs(a);
30
+ *
52
+}
31
+ * Corresponds to the ARM pseudocode function BFDotAdd.
53
+
32
+ */
54
+static inline float64 float64_maybe_ah_chs(float64 a, bool fpcr_ah)
33
+float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2);
55
+{
56
+ return fpcr_ah && float64_is_any_nan(a) ? a : float64_chs(a);
57
+}
34
+
58
+
35
#endif /* TARGET_ARM_VEC_INTERNAL_H */
59
#endif /* TARGET_ARM_VEC_INTERNAL_H */
36
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
60
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
37
index XXXXXXX..XXXXXXX 100644
61
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/vec_helper.c
62
--- a/target/arm/tcg/sve_helper.c
39
+++ b/target/arm/vec_helper.c
63
+++ b/target/arm/tcg/sve_helper.c
40
@@ -XXX,XX +XXX,XX @@ DO_MMLA_B(gvec_usmmla_b, do_usmmla_b)
64
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg,
41
* BFloat16 Dot Product
42
*/
43
44
-static float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2)
45
+float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2)
46
{
65
{
47
/* FPCR is ignored for BFDOT and BFMMLA. */
66
intptr_t j, i = simd_oprsz(desc);
48
float_status bf_status = {
67
uint64_t *g = vg;
68
- float16 neg_imag = float16_set_sign(0, simd_data(desc));
69
- float16 neg_real = float16_chs(neg_imag);
70
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
71
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
72
73
do {
74
uint64_t pg = g[(i - 1) >> 6];
75
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg,
76
i -= 2 * sizeof(float16);
77
78
e0 = *(float16 *)(vn + H1_2(i));
79
- e1 = *(float16 *)(vm + H1_2(j)) ^ neg_real;
80
+ e1 = *(float16 *)(vm + H1_2(j));
81
e2 = *(float16 *)(vn + H1_2(j));
82
- e3 = *(float16 *)(vm + H1_2(i)) ^ neg_imag;
83
+ e3 = *(float16 *)(vm + H1_2(i));
84
+
85
+ if (rot) {
86
+ e3 = float16_maybe_ah_chs(e3, fpcr_ah);
87
+ } else {
88
+ e1 = float16_maybe_ah_chs(e1, fpcr_ah);
89
+ }
90
91
if (likely((pg >> (i & 63)) & 1)) {
92
*(float16 *)(vd + H1_2(i)) = float16_add(e0, e1, s);
93
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg,
94
{
95
intptr_t j, i = simd_oprsz(desc);
96
uint64_t *g = vg;
97
- float32 neg_imag = float32_set_sign(0, simd_data(desc));
98
- float32 neg_real = float32_chs(neg_imag);
99
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
100
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
101
102
do {
103
uint64_t pg = g[(i - 1) >> 6];
104
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg,
105
i -= 2 * sizeof(float32);
106
107
e0 = *(float32 *)(vn + H1_2(i));
108
- e1 = *(float32 *)(vm + H1_2(j)) ^ neg_real;
109
+ e1 = *(float32 *)(vm + H1_2(j));
110
e2 = *(float32 *)(vn + H1_2(j));
111
- e3 = *(float32 *)(vm + H1_2(i)) ^ neg_imag;
112
+ e3 = *(float32 *)(vm + H1_2(i));
113
+
114
+ if (rot) {
115
+ e3 = float32_maybe_ah_chs(e3, fpcr_ah);
116
+ } else {
117
+ e1 = float32_maybe_ah_chs(e1, fpcr_ah);
118
+ }
119
120
if (likely((pg >> (i & 63)) & 1)) {
121
*(float32 *)(vd + H1_2(i)) = float32_add(e0, e1, s);
122
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg,
123
{
124
intptr_t j, i = simd_oprsz(desc);
125
uint64_t *g = vg;
126
- float64 neg_imag = float64_set_sign(0, simd_data(desc));
127
- float64 neg_real = float64_chs(neg_imag);
128
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
129
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
130
131
do {
132
uint64_t pg = g[(i - 1) >> 6];
133
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg,
134
i -= 2 * sizeof(float64);
135
136
e0 = *(float64 *)(vn + H1_2(i));
137
- e1 = *(float64 *)(vm + H1_2(j)) ^ neg_real;
138
+ e1 = *(float64 *)(vm + H1_2(j));
139
e2 = *(float64 *)(vn + H1_2(j));
140
- e3 = *(float64 *)(vm + H1_2(i)) ^ neg_imag;
141
+ e3 = *(float64 *)(vm + H1_2(i));
142
+
143
+ if (rot) {
144
+ e3 = float64_maybe_ah_chs(e3, fpcr_ah);
145
+ } else {
146
+ e1 = float64_maybe_ah_chs(e1, fpcr_ah);
147
+ }
148
149
if (likely((pg >> (i & 63)) & 1)) {
150
*(float64 *)(vd + H1_2(i)) = float64_add(e0, e1, s);
151
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
152
index XXXXXXX..XXXXXXX 100644
153
--- a/target/arm/tcg/translate-sve.c
154
+++ b/target/arm/tcg/translate-sve.c
155
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_4_ptr * const fcadd_fns[] = {
156
gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d,
157
};
158
TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
159
- a->rd, a->rn, a->rm, a->pg, a->rot,
160
+ a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1),
161
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
162
163
#define DO_FMLA(NAME, name) \
49
--
164
--
50
2.25.1
165
2.34.1
diff view generated by jsdifflib
New patch
1
The negation steps in FCADD must honour FPCR.AH's "don't change the
2
sign of a NaN" semantics. Implement this by encoding FPCR.AH into
3
the SIMD data field passed to the helper and using that to decide
4
whether to negate the values.
1
5
6
The construction of neg_imag and neg_real were done to make it easy
7
to apply both in parallel with two simple logical operations. This
8
changed with FPCR.AH, which is more complex than that. Switch to
9
an approach closer to the pseudocode, where we extract the rot
10
parameter from the SIMD data word and negate the appropriate
11
input value.
12
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
---
16
target/arm/tcg/translate-a64.c | 10 +++++--
17
target/arm/tcg/vec_helper.c | 54 +++++++++++++++++++---------------
18
2 files changed, 38 insertions(+), 26 deletions(-)
19
20
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/target/arm/tcg/translate-a64.c
23
+++ b/target/arm/tcg/translate-a64.c
24
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = {
25
gen_helper_gvec_fcadds,
26
gen_helper_gvec_fcaddd,
27
};
28
-TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd)
29
-TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd)
30
+/*
31
+ * Encode FPCR.AH into the data so the helper knows whether the
32
+ * negations it does should avoid flipping the sign bit on a NaN
33
+ */
34
+TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1),
35
+ f_vector_fcadd)
36
+TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1),
37
+ f_vector_fcadd)
38
39
static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
40
{
41
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
42
index XXXXXXX..XXXXXXX 100644
43
--- a/target/arm/tcg/vec_helper.c
44
+++ b/target/arm/tcg/vec_helper.c
45
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm,
46
float16 *d = vd;
47
float16 *n = vn;
48
float16 *m = vm;
49
- uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1);
50
- uint32_t neg_imag = neg_real ^ 1;
51
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
52
+ bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1);
53
uintptr_t i;
54
55
- /* Shift boolean to the sign bit so we can xor to negate. */
56
- neg_real <<= 15;
57
- neg_imag <<= 15;
58
-
59
for (i = 0; i < opr_sz / 2; i += 2) {
60
float16 e0 = n[H2(i)];
61
- float16 e1 = m[H2(i + 1)] ^ neg_imag;
62
+ float16 e1 = m[H2(i + 1)];
63
float16 e2 = n[H2(i + 1)];
64
- float16 e3 = m[H2(i)] ^ neg_real;
65
+ float16 e3 = m[H2(i)];
66
+
67
+ if (rot) {
68
+ e3 = float16_maybe_ah_chs(e3, fpcr_ah);
69
+ } else {
70
+ e1 = float16_maybe_ah_chs(e1, fpcr_ah);
71
+ }
72
73
d[H2(i)] = float16_add(e0, e1, fpst);
74
d[H2(i + 1)] = float16_add(e2, e3, fpst);
75
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcadds)(void *vd, void *vn, void *vm,
76
float32 *d = vd;
77
float32 *n = vn;
78
float32 *m = vm;
79
- uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1);
80
- uint32_t neg_imag = neg_real ^ 1;
81
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
82
+ bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1);
83
uintptr_t i;
84
85
- /* Shift boolean to the sign bit so we can xor to negate. */
86
- neg_real <<= 31;
87
- neg_imag <<= 31;
88
-
89
for (i = 0; i < opr_sz / 4; i += 2) {
90
float32 e0 = n[H4(i)];
91
- float32 e1 = m[H4(i + 1)] ^ neg_imag;
92
+ float32 e1 = m[H4(i + 1)];
93
float32 e2 = n[H4(i + 1)];
94
- float32 e3 = m[H4(i)] ^ neg_real;
95
+ float32 e3 = m[H4(i)];
96
+
97
+ if (rot) {
98
+ e3 = float32_maybe_ah_chs(e3, fpcr_ah);
99
+ } else {
100
+ e1 = float32_maybe_ah_chs(e1, fpcr_ah);
101
+ }
102
103
d[H4(i)] = float32_add(e0, e1, fpst);
104
d[H4(i + 1)] = float32_add(e2, e3, fpst);
105
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcaddd)(void *vd, void *vn, void *vm,
106
float64 *d = vd;
107
float64 *n = vn;
108
float64 *m = vm;
109
- uint64_t neg_real = extract64(desc, SIMD_DATA_SHIFT, 1);
110
- uint64_t neg_imag = neg_real ^ 1;
111
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
112
+ bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1);
113
uintptr_t i;
114
115
- /* Shift boolean to the sign bit so we can xor to negate. */
116
- neg_real <<= 63;
117
- neg_imag <<= 63;
118
-
119
for (i = 0; i < opr_sz / 8; i += 2) {
120
float64 e0 = n[i];
121
- float64 e1 = m[i + 1] ^ neg_imag;
122
+ float64 e1 = m[i + 1];
123
float64 e2 = n[i + 1];
124
- float64 e3 = m[i] ^ neg_real;
125
+ float64 e3 = m[i];
126
+
127
+ if (rot) {
128
+ e3 = float64_maybe_ah_chs(e3, fpcr_ah);
129
+ } else {
130
+ e1 = float64_maybe_ah_chs(e1, fpcr_ah);
131
+ }
132
133
d[i] = float64_add(e0, e1, fpst);
134
d[i + 1] = float64_add(e2, e3, fpst);
135
--
136
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Handle the FPCR.AH semantics that we do not change the sign of an
2
input NaN in the FRECPS and FRSQRTS scalar insns, by providing
3
new helper functions that do the CHS part of the operation
4
differently.
2
5
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Since the extra helper functions would be very repetitive if written
4
Message-id: 20220604040607.269301-23-richard.henderson@linaro.org
7
out longhand, we condense them and the existing non-AH helpers into
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
being emitted via macros.
9
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
12
---
8
target/arm/ptw.h | 10 ------
13
target/arm/tcg/helper-a64.h | 6 ++
9
target/arm/helper.c | 77 ------------------------------------------
14
target/arm/tcg/vec_internal.h | 18 ++++++
10
target/arm/ptw.c | 81 +++++++++++++++++++++++++++++++++++++++++++++
15
target/arm/tcg/helper-a64.c | 115 ++++++++++++---------------------
11
3 files changed, 81 insertions(+), 87 deletions(-)
16
target/arm/tcg/translate-a64.c | 25 +++++--
17
4 files changed, 83 insertions(+), 81 deletions(-)
12
18
13
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
19
diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h
14
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/ptw.h
21
--- a/target/arm/tcg/helper-a64.h
16
+++ b/target/arm/ptw.h
22
+++ b/target/arm/tcg/helper-a64.h
17
@@ -XXX,XX +XXX,XX @@ bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx);
23
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, fpst)
18
bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx);
24
DEF_HELPER_FLAGS_3(recpsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
19
uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn);
25
DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
20
26
DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
21
-int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx,
27
+DEF_HELPER_FLAGS_3(recpsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
22
- int ap, int domain_prot);
28
+DEF_HELPER_FLAGS_3(recpsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
23
-int simple_ap_to_rw_prot_is_user(int ap, bool is_user);
29
+DEF_HELPER_FLAGS_3(recpsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
24
-
30
DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
25
-static inline int
31
DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
26
-simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap)
32
DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
27
-{
33
+DEF_HELPER_FLAGS_3(rsqrtsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
28
- return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx));
34
+DEF_HELPER_FLAGS_3(rsqrtsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
29
-}
35
+DEF_HELPER_FLAGS_3(rsqrtsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
30
-
36
DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, fpst)
31
#endif /* !CONFIG_USER_ONLY */
37
DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
32
#endif /* TARGET_ARM_PTW_H */
38
DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, fpst)
33
diff --git a/target/arm/helper.c b/target/arm/helper.c
39
diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h
34
index XXXXXXX..XXXXXXX 100644
40
index XXXXXXX..XXXXXXX 100644
35
--- a/target/arm/helper.c
41
--- a/target/arm/tcg/vec_internal.h
36
+++ b/target/arm/helper.c
42
+++ b/target/arm/tcg/vec_internal.h
37
@@ -XXX,XX +XXX,XX @@ bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx)
43
@@ -XXX,XX +XXX,XX @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2,
38
g_assert_not_reached();
44
*/
39
}
45
bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp);
40
}
41
-
42
-/* Translate section/page access permissions to page
43
- * R/W protection flags
44
- *
45
- * @env: CPUARMState
46
- * @mmu_idx: MMU index indicating required translation regime
47
- * @ap: The 3-bit access permissions (AP[2:0])
48
- * @domain_prot: The 2-bit domain access permissions
49
- */
50
-int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap, int domain_prot)
51
-{
52
- bool is_user = regime_is_user(env, mmu_idx);
53
-
54
- if (domain_prot == 3) {
55
- return PAGE_READ | PAGE_WRITE;
56
- }
57
-
58
- switch (ap) {
59
- case 0:
60
- if (arm_feature(env, ARM_FEATURE_V7)) {
61
- return 0;
62
- }
63
- switch (regime_sctlr(env, mmu_idx) & (SCTLR_S | SCTLR_R)) {
64
- case SCTLR_S:
65
- return is_user ? 0 : PAGE_READ;
66
- case SCTLR_R:
67
- return PAGE_READ;
68
- default:
69
- return 0;
70
- }
71
- case 1:
72
- return is_user ? 0 : PAGE_READ | PAGE_WRITE;
73
- case 2:
74
- if (is_user) {
75
- return PAGE_READ;
76
- } else {
77
- return PAGE_READ | PAGE_WRITE;
78
- }
79
- case 3:
80
- return PAGE_READ | PAGE_WRITE;
81
- case 4: /* Reserved. */
82
- return 0;
83
- case 5:
84
- return is_user ? 0 : PAGE_READ;
85
- case 6:
86
- return PAGE_READ;
87
- case 7:
88
- if (!arm_feature(env, ARM_FEATURE_V6K)) {
89
- return 0;
90
- }
91
- return PAGE_READ;
92
- default:
93
- g_assert_not_reached();
94
- }
95
-}
96
-
97
-/* Translate section/page access permissions to page
98
- * R/W protection flags.
99
- *
100
- * @ap: The 2-bit simple AP (AP[2:1])
101
- * @is_user: TRUE if accessing from PL0
102
- */
103
-int simple_ap_to_rw_prot_is_user(int ap, bool is_user)
104
-{
105
- switch (ap) {
106
- case 0:
107
- return is_user ? 0 : PAGE_READ | PAGE_WRITE;
108
- case 1:
109
- return PAGE_READ | PAGE_WRITE;
110
- case 2:
111
- return is_user ? 0 : PAGE_READ;
112
- case 3:
113
- return PAGE_READ;
114
- default:
115
- g_assert_not_reached();
116
- }
117
-}
118
#endif /* !CONFIG_USER_ONLY */
119
120
int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx)
121
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
122
index XXXXXXX..XXXXXXX 100644
123
--- a/target/arm/ptw.c
124
+++ b/target/arm/ptw.c
125
@@ -XXX,XX +XXX,XX @@ static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx,
126
return true;
127
}
128
46
129
+/*
47
+/*
130
+ * Translate section/page access permissions to page R/W protection flags
48
+ * Negate as for FPCR.AH=1 -- do not negate NaNs.
131
+ * @env: CPUARMState
132
+ * @mmu_idx: MMU index indicating required translation regime
133
+ * @ap: The 3-bit access permissions (AP[2:0])
134
+ * @domain_prot: The 2-bit domain access permissions
135
+ */
49
+ */
136
+static int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx,
50
+static inline float16 float16_ah_chs(float16 a)
137
+ int ap, int domain_prot)
138
+{
51
+{
139
+ bool is_user = regime_is_user(env, mmu_idx);
52
+ return float16_is_any_nan(a) ? a : float16_chs(a);
140
+
141
+ if (domain_prot == 3) {
142
+ return PAGE_READ | PAGE_WRITE;
143
+ }
144
+
145
+ switch (ap) {
146
+ case 0:
147
+ if (arm_feature(env, ARM_FEATURE_V7)) {
148
+ return 0;
149
+ }
150
+ switch (regime_sctlr(env, mmu_idx) & (SCTLR_S | SCTLR_R)) {
151
+ case SCTLR_S:
152
+ return is_user ? 0 : PAGE_READ;
153
+ case SCTLR_R:
154
+ return PAGE_READ;
155
+ default:
156
+ return 0;
157
+ }
158
+ case 1:
159
+ return is_user ? 0 : PAGE_READ | PAGE_WRITE;
160
+ case 2:
161
+ if (is_user) {
162
+ return PAGE_READ;
163
+ } else {
164
+ return PAGE_READ | PAGE_WRITE;
165
+ }
166
+ case 3:
167
+ return PAGE_READ | PAGE_WRITE;
168
+ case 4: /* Reserved. */
169
+ return 0;
170
+ case 5:
171
+ return is_user ? 0 : PAGE_READ;
172
+ case 6:
173
+ return PAGE_READ;
174
+ case 7:
175
+ if (!arm_feature(env, ARM_FEATURE_V6K)) {
176
+ return 0;
177
+ }
178
+ return PAGE_READ;
179
+ default:
180
+ g_assert_not_reached();
181
+ }
182
+}
53
+}
183
+
54
+
184
+/*
55
+static inline float32 float32_ah_chs(float32 a)
185
+ * Translate section/page access permissions to page R/W protection flags.
186
+ * @ap: The 2-bit simple AP (AP[2:1])
187
+ * @is_user: TRUE if accessing from PL0
188
+ */
189
+static int simple_ap_to_rw_prot_is_user(int ap, bool is_user)
190
+{
56
+{
191
+ switch (ap) {
57
+ return float32_is_any_nan(a) ? a : float32_chs(a);
192
+ case 0:
193
+ return is_user ? 0 : PAGE_READ | PAGE_WRITE;
194
+ case 1:
195
+ return PAGE_READ | PAGE_WRITE;
196
+ case 2:
197
+ return is_user ? 0 : PAGE_READ;
198
+ case 3:
199
+ return PAGE_READ;
200
+ default:
201
+ g_assert_not_reached();
202
+ }
203
+}
58
+}
204
+
59
+
205
+static int simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap)
60
+static inline float64 float64_ah_chs(float64 a)
206
+{
61
+{
207
+ return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx));
62
+ return float64_is_any_nan(a) ? a : float64_chs(a);
208
+}
63
+}
209
+
64
+
210
static bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
65
static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah)
211
MMUAccessType access_type, ARMMMUIdx mmu_idx,
66
{
212
hwaddr *phys_ptr, int *prot,
67
return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a);
68
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/target/arm/tcg/helper-a64.c
71
+++ b/target/arm/tcg/helper-a64.c
72
@@ -XXX,XX +XXX,XX @@
73
#ifdef CONFIG_USER_ONLY
74
#include "user/page-protection.h"
75
#endif
76
+#include "vec_internal.h"
77
78
/* C2.4.7 Multiply and divide */
79
/* special cases for 0 and LLONG_MIN are mandated by the standard */
80
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst)
81
return -float64_lt(b, a, fpst);
82
}
83
84
-/* Reciprocal step and sqrt step. Note that unlike the A32/T32
85
+/*
86
+ * Reciprocal step and sqrt step. Note that unlike the A32/T32
87
* versions, these do a fully fused multiply-add or
88
* multiply-add-and-halve.
89
+ * The FPCR.AH == 1 versions need to avoid flipping the sign of NaN.
90
*/
91
-
92
-uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, float_status *fpst)
93
-{
94
- a = float16_squash_input_denormal(a, fpst);
95
- b = float16_squash_input_denormal(b, fpst);
96
-
97
- a = float16_chs(a);
98
- if ((float16_is_infinity(a) && float16_is_zero(b)) ||
99
- (float16_is_infinity(b) && float16_is_zero(a))) {
100
- return float16_two;
101
+#define DO_RECPS(NAME, CTYPE, FLOATTYPE, CHSFN) \
102
+ CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \
103
+ { \
104
+ a = FLOATTYPE ## _squash_input_denormal(a, fpst); \
105
+ b = FLOATTYPE ## _squash_input_denormal(b, fpst); \
106
+ a = FLOATTYPE ## _ ## CHSFN(a); \
107
+ if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \
108
+ (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \
109
+ return FLOATTYPE ## _two; \
110
+ } \
111
+ return FLOATTYPE ## _muladd(a, b, FLOATTYPE ## _two, 0, fpst); \
112
}
113
- return float16_muladd(a, b, float16_two, 0, fpst);
114
-}
115
116
-float32 HELPER(recpsf_f32)(float32 a, float32 b, float_status *fpst)
117
-{
118
- a = float32_squash_input_denormal(a, fpst);
119
- b = float32_squash_input_denormal(b, fpst);
120
+DO_RECPS(recpsf_f16, uint32_t, float16, chs)
121
+DO_RECPS(recpsf_f32, float32, float32, chs)
122
+DO_RECPS(recpsf_f64, float64, float64, chs)
123
+DO_RECPS(recpsf_ah_f16, uint32_t, float16, ah_chs)
124
+DO_RECPS(recpsf_ah_f32, float32, float32, ah_chs)
125
+DO_RECPS(recpsf_ah_f64, float64, float64, ah_chs)
126
127
- a = float32_chs(a);
128
- if ((float32_is_infinity(a) && float32_is_zero(b)) ||
129
- (float32_is_infinity(b) && float32_is_zero(a))) {
130
- return float32_two;
131
- }
132
- return float32_muladd(a, b, float32_two, 0, fpst);
133
-}
134
+#define DO_RSQRTSF(NAME, CTYPE, FLOATTYPE, CHSFN) \
135
+ CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \
136
+ { \
137
+ a = FLOATTYPE ## _squash_input_denormal(a, fpst); \
138
+ b = FLOATTYPE ## _squash_input_denormal(b, fpst); \
139
+ a = FLOATTYPE ## _ ## CHSFN(a); \
140
+ if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \
141
+ (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \
142
+ return FLOATTYPE ## _one_point_five; \
143
+ } \
144
+ return FLOATTYPE ## _muladd_scalbn(a, b, FLOATTYPE ## _three, \
145
+ -1, 0, fpst); \
146
+ } \
147
148
-float64 HELPER(recpsf_f64)(float64 a, float64 b, float_status *fpst)
149
-{
150
- a = float64_squash_input_denormal(a, fpst);
151
- b = float64_squash_input_denormal(b, fpst);
152
-
153
- a = float64_chs(a);
154
- if ((float64_is_infinity(a) && float64_is_zero(b)) ||
155
- (float64_is_infinity(b) && float64_is_zero(a))) {
156
- return float64_two;
157
- }
158
- return float64_muladd(a, b, float64_two, 0, fpst);
159
-}
160
-
161
-uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst)
162
-{
163
- a = float16_squash_input_denormal(a, fpst);
164
- b = float16_squash_input_denormal(b, fpst);
165
-
166
- a = float16_chs(a);
167
- if ((float16_is_infinity(a) && float16_is_zero(b)) ||
168
- (float16_is_infinity(b) && float16_is_zero(a))) {
169
- return float16_one_point_five;
170
- }
171
- return float16_muladd_scalbn(a, b, float16_three, -1, 0, fpst);
172
-}
173
-
174
-float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst)
175
-{
176
- a = float32_squash_input_denormal(a, fpst);
177
- b = float32_squash_input_denormal(b, fpst);
178
-
179
- a = float32_chs(a);
180
- if ((float32_is_infinity(a) && float32_is_zero(b)) ||
181
- (float32_is_infinity(b) && float32_is_zero(a))) {
182
- return float32_one_point_five;
183
- }
184
- return float32_muladd_scalbn(a, b, float32_three, -1, 0, fpst);
185
-}
186
-
187
-float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst)
188
-{
189
- a = float64_squash_input_denormal(a, fpst);
190
- b = float64_squash_input_denormal(b, fpst);
191
-
192
- a = float64_chs(a);
193
- if ((float64_is_infinity(a) && float64_is_zero(b)) ||
194
- (float64_is_infinity(b) && float64_is_zero(a))) {
195
- return float64_one_point_five;
196
- }
197
- return float64_muladd_scalbn(a, b, float64_three, -1, 0, fpst);
198
-}
199
+DO_RSQRTSF(rsqrtsf_f16, uint32_t, float16, chs)
200
+DO_RSQRTSF(rsqrtsf_f32, float32, float32, chs)
201
+DO_RSQRTSF(rsqrtsf_f64, float64, float64, chs)
202
+DO_RSQRTSF(rsqrtsf_ah_f16, uint32_t, float16, ah_chs)
203
+DO_RSQRTSF(rsqrtsf_ah_f32, float32, float32, ah_chs)
204
+DO_RSQRTSF(rsqrtsf_ah_f64, float64, float64, ah_chs)
205
206
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
207
uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst)
208
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
209
index XXXXXXX..XXXXXXX 100644
210
--- a/target/arm/tcg/translate-a64.c
211
+++ b/target/arm/tcg/translate-a64.c
212
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
213
FPST_A64_F16 : FPST_A64);
214
}
215
216
-static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
217
- int mergereg)
218
+static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a,
219
+ const FPScalar *fnormal, const FPScalar *fah,
220
+ int mergereg)
221
{
222
- return do_fp3_scalar_with_fpsttype(s, a, f, mergereg,
223
- select_ah_fpst(s, a->esz));
224
+ return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal,
225
+ mergereg, select_ah_fpst(s, a->esz));
226
}
227
228
/* Some insns need to call different helpers when FPCR.AH == 1 */
229
@@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_frecps = {
230
gen_helper_recpsf_f32,
231
gen_helper_recpsf_f64,
232
};
233
-TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps, a->rn)
234
+static const FPScalar f_scalar_ah_frecps = {
235
+ gen_helper_recpsf_ah_f16,
236
+ gen_helper_recpsf_ah_f32,
237
+ gen_helper_recpsf_ah_f64,
238
+};
239
+TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a,
240
+ &f_scalar_frecps, &f_scalar_ah_frecps, a->rn)
241
242
static const FPScalar f_scalar_frsqrts = {
243
gen_helper_rsqrtsf_f16,
244
gen_helper_rsqrtsf_f32,
245
gen_helper_rsqrtsf_f64,
246
};
247
-TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts, a->rn)
248
+static const FPScalar f_scalar_ah_frsqrts = {
249
+ gen_helper_rsqrtsf_ah_f16,
250
+ gen_helper_rsqrtsf_ah_f32,
251
+ gen_helper_rsqrtsf_ah_f64,
252
+};
253
+TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a,
254
+ &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn)
255
256
static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
257
const FPScalar *f, bool swap)
213
--
258
--
214
2.25.1
259
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Handle the FPCR.AH "don't negate the sign of a NaN" semantics
2
in the vector versions of FRECPS and FRSQRTS, by implementing
3
new vector wrappers that call the _ah_ scalar helpers.
2
4
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220604040607.269301-14-richard.henderson@linaro.org
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
target/arm/ptw.h | 4 ++--
8
target/arm/tcg/helper-sve.h | 14 ++++++++++++++
9
target/arm/helper.c | 26 +-------------------------
9
target/arm/tcg/translate-a64.c | 21 ++++++++++++++++-----
10
target/arm/ptw.c | 23 +++++++++++++++++++++++
10
target/arm/tcg/translate-sve.c | 7 ++++++-
11
3 files changed, 26 insertions(+), 27 deletions(-)
11
target/arm/tcg/vec_helper.c | 8 ++++++++
12
4 files changed, 44 insertions(+), 6 deletions(-)
12
13
13
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
14
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
14
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/ptw.h
16
--- a/target/arm/tcg/helper-sve.h
16
+++ b/target/arm/ptw.h
17
+++ b/target/arm/tcg/helper-sve.h
17
@@ -XXX,XX +XXX,XX @@ uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure,
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_rsqrts_s, TCG_CALL_NO_RWG,
18
19
DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG,
19
bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx);
20
void, ptr, ptr, ptr, fpst, i32)
20
bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx);
21
21
+uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn);
22
+DEF_HELPER_FLAGS_5(gvec_ah_recps_h, TCG_CALL_NO_RWG,
23
+ void, ptr, ptr, ptr, fpst, i32)
24
+DEF_HELPER_FLAGS_5(gvec_ah_recps_s, TCG_CALL_NO_RWG,
25
+ void, ptr, ptr, ptr, fpst, i32)
26
+DEF_HELPER_FLAGS_5(gvec_ah_recps_d, TCG_CALL_NO_RWG,
27
+ void, ptr, ptr, ptr, fpst, i32)
22
+
28
+
23
ARMCacheAttrs combine_cacheattrs(CPUARMState *env,
29
+DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_h, TCG_CALL_NO_RWG,
24
ARMCacheAttrs s1, ARMCacheAttrs s2);
30
+ void, ptr, ptr, ptr, fpst, i32)
25
31
+DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_s, TCG_CALL_NO_RWG,
26
-bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx,
32
+ void, ptr, ptr, ptr, fpst, i32)
27
- uint32_t *table, uint32_t address);
33
+DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_d, TCG_CALL_NO_RWG,
28
int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx,
34
+ void, ptr, ptr, ptr, fpst, i32)
29
int ap, int domain_prot);
35
+
30
int simple_ap_to_rw_prot_is_user(int ap, bool is_user);
36
DEF_HELPER_FLAGS_5(gvec_ah_fmax_h, TCG_CALL_NO_RWG,
31
diff --git a/target/arm/helper.c b/target/arm/helper.c
37
void, ptr, ptr, ptr, fpst, i32)
38
DEF_HELPER_FLAGS_5(gvec_ah_fmax_s, TCG_CALL_NO_RWG,
39
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
32
index XXXXXXX..XXXXXXX 100644
40
index XXXXXXX..XXXXXXX 100644
33
--- a/target/arm/helper.c
41
--- a/target/arm/tcg/translate-a64.c
34
+++ b/target/arm/helper.c
42
+++ b/target/arm/tcg/translate-a64.c
35
@@ -XXX,XX +XXX,XX @@ static inline bool regime_translation_big_endian(CPUARMState *env,
43
@@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data,
44
return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal);
36
}
45
}
37
46
38
/* Return the TTBR associated with this translation regime */
47
-static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data,
39
-static inline uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx,
48
- gen_helper_gvec_3_ptr * const f[3])
40
- int ttbrn)
49
+static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data,
41
+uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn)
50
+ gen_helper_gvec_3_ptr * const fnormal[3],
51
+ gen_helper_gvec_3_ptr * const fah[3])
42
{
52
{
43
if (mmu_idx == ARMMMUIdx_Stage2) {
53
- return do_fp3_vector_with_fpsttype(s, a, data, f,
44
return env->cp15.vttbr_el2;
54
+ return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal,
45
@@ -XXX,XX +XXX,XX @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64,
55
select_ah_fpst(s, a->esz));
46
return prot_rw | PAGE_EXEC;
47
}
56
}
48
57
49
-bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx,
58
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
50
- uint32_t *table, uint32_t address)
59
gen_helper_gvec_recps_s,
51
-{
60
gen_helper_gvec_recps_d,
52
- /* Note that we can only get here for an AArch32 PL0/PL1 lookup */
61
};
53
- TCR *tcr = regime_tcr(env, mmu_idx);
62
-TRANS(FRECPS_v, do_fp3_vector_ah, a, 0, f_vector_frecps)
54
-
63
+static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = {
55
- if (address & tcr->mask) {
64
+ gen_helper_gvec_ah_recps_h,
56
- if (tcr->raw_tcr & TTBCR_PD1) {
65
+ gen_helper_gvec_ah_recps_s,
57
- /* Translation table walk disabled for TTBR1 */
66
+ gen_helper_gvec_ah_recps_d,
58
- return false;
67
+};
59
- }
68
+TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps)
60
- *table = regime_ttbr(env, mmu_idx, 1) & 0xffffc000;
69
61
- } else {
70
static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
62
- if (tcr->raw_tcr & TTBCR_PD0) {
71
gen_helper_gvec_rsqrts_h,
63
- /* Translation table walk disabled for TTBR0 */
72
gen_helper_gvec_rsqrts_s,
64
- return false;
73
gen_helper_gvec_rsqrts_d,
65
- }
74
};
66
- *table = regime_ttbr(env, mmu_idx, 0) & tcr->base_mask;
75
-TRANS(FRSQRTS_v, do_fp3_vector_ah, a, 0, f_vector_frsqrts)
67
- }
76
+static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = {
68
- *table |= (address >> 18) & 0x3ffc;
77
+ gen_helper_gvec_ah_rsqrts_h,
69
- return true;
78
+ gen_helper_gvec_ah_rsqrts_s,
70
-}
79
+ gen_helper_gvec_ah_rsqrts_d,
71
-
80
+};
72
static bool ptw_attrs_are_device(CPUARMState *env, ARMCacheAttrs cacheattrs)
81
+TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts)
73
{
82
74
/*
83
static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
75
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
84
gen_helper_gvec_faddp_h,
85
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
76
index XXXXXXX..XXXXXXX 100644
86
index XXXXXXX..XXXXXXX 100644
77
--- a/target/arm/ptw.c
87
--- a/target/arm/tcg/translate-sve.c
78
+++ b/target/arm/ptw.c
88
+++ b/target/arm/tcg/translate-sve.c
79
@@ -XXX,XX +XXX,XX @@
89
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
80
#include "ptw.h"
90
NULL, gen_helper_gvec_##name##_h, \
81
91
gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
82
92
}; \
83
+static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx,
93
- TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, name##_fns[a->esz], a, 0)
84
+ uint32_t *table, uint32_t address)
94
+ static gen_helper_gvec_3_ptr * const name##_ah_fns[4] = { \
85
+{
95
+ NULL, gen_helper_gvec_ah_##name##_h, \
86
+ /* Note that we can only get here for an AArch32 PL0/PL1 lookup */
96
+ gen_helper_gvec_ah_##name##_s, gen_helper_gvec_ah_##name##_d \
87
+ TCR *tcr = regime_tcr(env, mmu_idx);
97
+ }; \
98
+ TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, \
99
+ s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], a, 0)
100
101
DO_FP3(FADD_zzz, fadd)
102
DO_FP3(FSUB_zzz, fsub)
103
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
104
index XXXXXXX..XXXXXXX 100644
105
--- a/target/arm/tcg/vec_helper.c
106
+++ b/target/arm/tcg/vec_helper.c
107
@@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16)
108
DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32)
109
DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64)
110
111
+DO_3OP(gvec_ah_recps_h, helper_recpsf_ah_f16, float16)
112
+DO_3OP(gvec_ah_recps_s, helper_recpsf_ah_f32, float32)
113
+DO_3OP(gvec_ah_recps_d, helper_recpsf_ah_f64, float64)
88
+
114
+
89
+ if (address & tcr->mask) {
115
+DO_3OP(gvec_ah_rsqrts_h, helper_rsqrtsf_ah_f16, float16)
90
+ if (tcr->raw_tcr & TTBCR_PD1) {
116
+DO_3OP(gvec_ah_rsqrts_s, helper_rsqrtsf_ah_f32, float32)
91
+ /* Translation table walk disabled for TTBR1 */
117
+DO_3OP(gvec_ah_rsqrts_d, helper_rsqrtsf_ah_f64, float64)
92
+ return false;
93
+ }
94
+ *table = regime_ttbr(env, mmu_idx, 1) & 0xffffc000;
95
+ } else {
96
+ if (tcr->raw_tcr & TTBCR_PD0) {
97
+ /* Translation table walk disabled for TTBR0 */
98
+ return false;
99
+ }
100
+ *table = regime_ttbr(env, mmu_idx, 0) & tcr->base_mask;
101
+ }
102
+ *table |= (address >> 18) & 0x3ffc;
103
+ return true;
104
+}
105
+
118
+
106
static bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
119
DO_3OP(gvec_ah_fmax_h, helper_vfp_ah_maxh, float16)
107
MMUAccessType access_type, ARMMMUIdx mmu_idx,
120
DO_3OP(gvec_ah_fmax_s, helper_vfp_ah_maxs, float32)
108
hwaddr *phys_ptr, int *prot,
121
DO_3OP(gvec_ah_fmax_d, helper_vfp_ah_maxd, float64)
109
--
122
--
110
2.25.1
123
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Handle the FPCR.AH "don't negate the sign of a NaN" semantics in FMLS
2
(indexed). We do this by creating 6 new helpers, which allow us to
3
do the negation either by XOR (for AH=0) or by muladd flags
4
(for AH=1).
2
5
3
Export all of the support functions for performing bulk
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
fault analysis on a set of elements at contiguous addresses
7
[PMM: Mostly from RTH's patch; error in index order into fns[][]
5
controlled by a predicate.
8
fixed]
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
target/arm/helper.h | 14 ++++++++++++++
12
target/arm/tcg/translate-a64.c | 17 +++++++++++------
13
target/arm/tcg/translate-sve.c | 31 +++++++++++++++++--------------
14
target/arm/tcg/vec_helper.c | 24 +++++++++++++++---------
15
4 files changed, 57 insertions(+), 29 deletions(-)
6
16
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
17
diff --git a/target/arm/helper.h b/target/arm/helper.h
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220607203306.657998-15-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
target/arm/sve_ldst_internal.h | 94 ++++++++++++++++++++++++++++++++++
13
target/arm/sve_helper.c | 87 ++++++-------------------------
14
2 files changed, 111 insertions(+), 70 deletions(-)
15
16
diff --git a/target/arm/sve_ldst_internal.h b/target/arm/sve_ldst_internal.h
17
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/sve_ldst_internal.h
19
--- a/target/arm/helper.h
19
+++ b/target/arm/sve_ldst_internal.h
20
+++ b/target/arm/helper.h
20
@@ -XXX,XX +XXX,XX @@ DO_ST_PRIM_2(dd, H1_8, uint64_t, uint64_t, stq)
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG,
21
#undef DO_LD_PRIM_2
22
DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG,
22
#undef DO_ST_PRIM_2
23
void, ptr, ptr, ptr, ptr, fpst, i32)
23
24
24
+/*
25
+DEF_HELPER_FLAGS_6(gvec_fmls_idx_h, TCG_CALL_NO_RWG,
25
+ * Resolve the guest virtual address to info->host and info->flags.
26
+ void, ptr, ptr, ptr, ptr, fpst, i32)
26
+ * If @nofault, return false if the page is invalid, otherwise
27
+DEF_HELPER_FLAGS_6(gvec_fmls_idx_s, TCG_CALL_NO_RWG,
27
+ * exit via page fault exception.
28
+ void, ptr, ptr, ptr, ptr, fpst, i32)
28
+ */
29
+DEF_HELPER_FLAGS_6(gvec_fmls_idx_d, TCG_CALL_NO_RWG,
30
+ void, ptr, ptr, ptr, ptr, fpst, i32)
29
+
31
+
30
+typedef struct {
32
+DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_h, TCG_CALL_NO_RWG,
31
+ void *host;
33
+ void, ptr, ptr, ptr, ptr, fpst, i32)
32
+ int flags;
34
+DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_s, TCG_CALL_NO_RWG,
33
+ MemTxAttrs attrs;
35
+ void, ptr, ptr, ptr, ptr, fpst, i32)
34
+} SVEHostPage;
36
+DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_d, TCG_CALL_NO_RWG,
37
+ void, ptr, ptr, ptr, ptr, fpst, i32)
35
+
38
+
36
+bool sve_probe_page(SVEHostPage *info, bool nofault, CPUARMState *env,
39
DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG,
37
+ target_ulong addr, int mem_off, MMUAccessType access_type,
40
void, ptr, ptr, ptr, ptr, i32)
38
+ int mmu_idx, uintptr_t retaddr);
41
DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG,
39
+
42
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
40
+/*
41
+ * Analyse contiguous data, protected by a governing predicate.
42
+ */
43
+
44
+typedef enum {
45
+ FAULT_NO,
46
+ FAULT_FIRST,
47
+ FAULT_ALL,
48
+} SVEContFault;
49
+
50
+typedef struct {
51
+ /*
52
+ * First and last element wholly contained within the two pages.
53
+ * mem_off_first[0] and reg_off_first[0] are always set >= 0.
54
+ * reg_off_last[0] may be < 0 if the first element crosses pages.
55
+ * All of mem_off_first[1], reg_off_first[1] and reg_off_last[1]
56
+ * are set >= 0 only if there are complete elements on a second page.
57
+ *
58
+ * The reg_off_* offsets are relative to the internal vector register.
59
+ * The mem_off_first offset is relative to the memory address; the
60
+ * two offsets are different when a load operation extends, a store
61
+ * operation truncates, or for multi-register operations.
62
+ */
63
+ int16_t mem_off_first[2];
64
+ int16_t reg_off_first[2];
65
+ int16_t reg_off_last[2];
66
+
67
+ /*
68
+ * One element that is misaligned and spans both pages,
69
+ * or -1 if there is no such active element.
70
+ */
71
+ int16_t mem_off_split;
72
+ int16_t reg_off_split;
73
+
74
+ /*
75
+ * The byte offset at which the entire operation crosses a page boundary.
76
+ * Set >= 0 if and only if the entire operation spans two pages.
77
+ */
78
+ int16_t page_split;
79
+
80
+ /* TLB data for the two pages. */
81
+ SVEHostPage page[2];
82
+} SVEContLdSt;
83
+
84
+/*
85
+ * Find first active element on each page, and a loose bound for the
86
+ * final element on each page. Identify any single element that spans
87
+ * the page boundary. Return true if there are any active elements.
88
+ */
89
+bool sve_cont_ldst_elements(SVEContLdSt *info, target_ulong addr, uint64_t *vg,
90
+ intptr_t reg_max, int esz, int msize);
91
+
92
+/*
93
+ * Resolve the guest virtual addresses to info->page[].
94
+ * Control the generation of page faults with @fault. Return false if
95
+ * there is no work to do, which can only happen with @fault == FAULT_NO.
96
+ */
97
+bool sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault,
98
+ CPUARMState *env, target_ulong addr,
99
+ MMUAccessType access_type, uintptr_t retaddr);
100
+
101
+#ifdef CONFIG_USER_ONLY
102
+static inline void
103
+sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env, uint64_t *vg,
104
+ target_ulong addr, int esize, int msize,
105
+ int wp_access, uintptr_t retaddr)
106
+{ }
107
+#else
108
+void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env,
109
+ uint64_t *vg, target_ulong addr,
110
+ int esize, int msize, int wp_access,
111
+ uintptr_t retaddr);
112
+#endif
113
+
114
+void sve_cont_ldst_mte_check(SVEContLdSt *info, CPUARMState *env, uint64_t *vg,
115
+ target_ulong addr, int esize, int msize,
116
+ uint32_t mtedesc, uintptr_t ra);
117
+
118
#endif /* TARGET_ARM_SVE_LDST_INTERNAL_H */
119
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
120
index XXXXXXX..XXXXXXX 100644
43
index XXXXXXX..XXXXXXX 100644
121
--- a/target/arm/sve_helper.c
44
--- a/target/arm/tcg/translate-a64.c
122
+++ b/target/arm/sve_helper.c
45
+++ b/target/arm/tcg/translate-a64.c
123
@@ -XXX,XX +XXX,XX @@ static intptr_t find_next_active(uint64_t *vg, intptr_t reg_off,
46
@@ -XXX,XX +XXX,XX @@ TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
124
* exit via page fault exception.
47
125
*/
48
static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
126
127
-typedef struct {
128
- void *host;
129
- int flags;
130
- MemTxAttrs attrs;
131
-} SVEHostPage;
132
-
133
-static bool sve_probe_page(SVEHostPage *info, bool nofault,
134
- CPUARMState *env, target_ulong addr,
135
- int mem_off, MMUAccessType access_type,
136
- int mmu_idx, uintptr_t retaddr)
137
+bool sve_probe_page(SVEHostPage *info, bool nofault, CPUARMState *env,
138
+ target_ulong addr, int mem_off, MMUAccessType access_type,
139
+ int mmu_idx, uintptr_t retaddr)
140
{
49
{
141
int flags;
50
- static gen_helper_gvec_4_ptr * const fns[3] = {
142
51
- gen_helper_gvec_fmla_idx_h,
143
@@ -XXX,XX +XXX,XX @@ static bool sve_probe_page(SVEHostPage *info, bool nofault,
52
- gen_helper_gvec_fmla_idx_s,
53
- gen_helper_gvec_fmla_idx_d,
54
+ static gen_helper_gvec_4_ptr * const fns[3][3] = {
55
+ { gen_helper_gvec_fmla_idx_h,
56
+ gen_helper_gvec_fmla_idx_s,
57
+ gen_helper_gvec_fmla_idx_d },
58
+ { gen_helper_gvec_fmls_idx_h,
59
+ gen_helper_gvec_fmls_idx_s,
60
+ gen_helper_gvec_fmls_idx_d },
61
+ { gen_helper_gvec_ah_fmls_idx_h,
62
+ gen_helper_gvec_ah_fmls_idx_s,
63
+ gen_helper_gvec_ah_fmls_idx_d },
64
};
65
MemOp esz = a->esz;
66
int check = fp_access_check_vector_hsd(s, a->q, esz);
67
@@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
68
69
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
70
esz == MO_16 ? FPST_A64_F16 : FPST_A64,
71
- (a->idx << 1) | neg,
72
- fns[esz - 1]);
73
+ a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]);
144
return true;
74
return true;
145
}
75
}
146
76
147
-
77
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
148
-/*
78
index XXXXXXX..XXXXXXX 100644
149
- * Analyse contiguous data, protected by a governing predicate.
79
--- a/target/arm/tcg/translate-sve.c
150
- */
80
+++ b/target/arm/tcg/translate-sve.c
151
-
81
@@ -XXX,XX +XXX,XX @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
152
-typedef enum {
82
*** SVE Floating Point Multiply-Add Indexed Group
153
- FAULT_NO,
83
*/
154
- FAULT_FIRST,
84
155
- FAULT_ALL,
85
-static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
156
-} SVEContFault;
86
-{
157
-
87
- static gen_helper_gvec_4_ptr * const fns[4] = {
158
-typedef struct {
88
- NULL,
159
- /*
89
- gen_helper_gvec_fmla_idx_h,
160
- * First and last element wholly contained within the two pages.
90
- gen_helper_gvec_fmla_idx_s,
161
- * mem_off_first[0] and reg_off_first[0] are always set >= 0.
91
- gen_helper_gvec_fmla_idx_d,
162
- * reg_off_last[0] may be < 0 if the first element crosses pages.
92
- };
163
- * All of mem_off_first[1], reg_off_first[1] and reg_off_last[1]
93
- return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
164
- * are set >= 0 only if there are complete elements on a second page.
94
- (a->index << 1) | sub,
165
- *
95
- a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
166
- * The reg_off_* offsets are relative to the internal vector register.
96
-}
167
- * The mem_off_first offset is relative to the memory address; the
97
+static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = {
168
- * two offsets are different when a load operation extends, a store
98
+ NULL, gen_helper_gvec_fmla_idx_h,
169
- * operation truncates, or for multi-register operations.
99
+ gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d
170
- */
100
+};
171
- int16_t mem_off_first[2];
101
+TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
172
- int16_t reg_off_first[2];
102
+ fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index,
173
- int16_t reg_off_last[2];
103
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
174
-
104
175
- /*
105
-TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
176
- * One element that is misaligned and spans both pages,
106
-TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true)
177
- * or -1 if there is no such active element.
107
+static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = {
178
- */
108
+ { NULL, NULL },
179
- int16_t mem_off_split;
109
+ { gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h },
180
- int16_t reg_off_split;
110
+ { gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s },
181
-
111
+ { gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d },
182
- /*
112
+};
183
- * The byte offset at which the entire operation crosses a page boundary.
113
+TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
184
- * Set >= 0 if and only if the entire operation spans two pages.
114
+ fmls_idx_fns[a->esz][s->fpcr_ah],
185
- */
115
+ a->rd, a->rn, a->rm, a->ra, a->index,
186
- int16_t page_split;
116
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
187
-
117
188
- /* TLB data for the two pages. */
189
- SVEHostPage page[2];
190
-} SVEContLdSt;
191
-
192
/*
118
/*
193
* Find first active element on each page, and a loose bound for the
119
*** SVE Floating Point Multiply Indexed Group
194
* final element on each page. Identify any single element that spans
120
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
195
* the page boundary. Return true if there are any active elements.
121
index XXXXXXX..XXXXXXX 100644
196
*/
122
--- a/target/arm/tcg/vec_helper.c
197
-static bool sve_cont_ldst_elements(SVEContLdSt *info, target_ulong addr,
123
+++ b/target/arm/tcg/vec_helper.c
198
- uint64_t *vg, intptr_t reg_max,
124
@@ -XXX,XX +XXX,XX @@ DO_FMUL_IDX(gvec_fmls_nf_idx_s, float32_sub, float32_mul, float32, H4)
199
- int esz, int msize)
125
200
+bool sve_cont_ldst_elements(SVEContLdSt *info, target_ulong addr, uint64_t *vg,
126
#undef DO_FMUL_IDX
201
+ intptr_t reg_max, int esz, int msize)
127
202
{
128
-#define DO_FMLA_IDX(NAME, TYPE, H) \
203
const int esize = 1 << esz;
129
+#define DO_FMLA_IDX(NAME, TYPE, H, NEGX, NEGF) \
204
const uint64_t pg_mask = pred_esz_masks[esz];
130
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
205
@@ -XXX,XX +XXX,XX @@ static bool sve_cont_ldst_elements(SVEContLdSt *info, target_ulong addr,
131
float_status *stat, uint32_t desc) \
206
* Control the generation of page faults with @fault. Return false if
132
{ \
207
* there is no work to do, which can only happen with @fault == FAULT_NO.
133
intptr_t i, j, oprsz = simd_oprsz(desc); \
208
*/
134
intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
209
-static bool sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault,
135
- TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \
210
- CPUARMState *env, target_ulong addr,
136
- intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \
211
- MMUAccessType access_type, uintptr_t retaddr)
137
+ intptr_t idx = simd_data(desc); \
212
+bool sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault,
138
TYPE *d = vd, *n = vn, *m = vm, *a = va; \
213
+ CPUARMState *env, target_ulong addr,
139
- op1_neg <<= (8 * sizeof(TYPE) - 1); \
214
+ MMUAccessType access_type, uintptr_t retaddr)
140
for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
215
{
141
TYPE mm = m[H(i + idx)]; \
216
int mmu_idx = cpu_mmu_index(env, false);
142
for (j = 0; j < segment; j++) { \
217
int mem_off = info->mem_off_first[0];
143
- d[i + j] = TYPE##_muladd(n[i + j] ^ op1_neg, \
218
@@ -XXX,XX +XXX,XX @@ static bool sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault,
144
- mm, a[i + j], 0, stat); \
219
return have_work;
145
+ d[i + j] = TYPE##_muladd(n[i + j] ^ NEGX, mm, \
146
+ a[i + j], NEGF, stat); \
147
} \
148
} \
149
clear_tail(d, oprsz, simd_maxsz(desc)); \
220
}
150
}
221
151
222
-static void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env,
152
-DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2)
223
- uint64_t *vg, target_ulong addr,
153
-DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4)
224
- int esize, int msize, int wp_access,
154
-DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8)
225
- uintptr_t retaddr)
155
+DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2, 0, 0)
226
-{
156
+DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4, 0, 0)
227
#ifndef CONFIG_USER_ONLY
157
+DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8, 0, 0)
228
+void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env,
158
+
229
+ uint64_t *vg, target_ulong addr,
159
+DO_FMLA_IDX(gvec_fmls_idx_h, float16, H2, INT16_MIN, 0)
230
+ int esize, int msize, int wp_access,
160
+DO_FMLA_IDX(gvec_fmls_idx_s, float32, H4, INT32_MIN, 0)
231
+ uintptr_t retaddr)
161
+DO_FMLA_IDX(gvec_fmls_idx_d, float64, H8, INT64_MIN, 0)
232
+{
162
+
233
intptr_t mem_off, reg_off, reg_last;
163
+DO_FMLA_IDX(gvec_ah_fmls_idx_h, float16, H2, 0, float_muladd_negate_product)
234
int flags0 = info->page[0].flags;
164
+DO_FMLA_IDX(gvec_ah_fmls_idx_s, float32, H4, 0, float_muladd_negate_product)
235
int flags1 = info->page[1].flags;
165
+DO_FMLA_IDX(gvec_ah_fmls_idx_d, float64, H8, 0, float_muladd_negate_product)
236
@@ -XXX,XX +XXX,XX @@ static void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env,
166
237
} while (reg_off & 63);
167
#undef DO_FMLA_IDX
238
} while (reg_off <= reg_last);
239
}
240
-#endif
241
}
242
+#endif
243
244
-static void sve_cont_ldst_mte_check(SVEContLdSt *info, CPUARMState *env,
245
- uint64_t *vg, target_ulong addr, int esize,
246
- int msize, uint32_t mtedesc, uintptr_t ra)
247
+void sve_cont_ldst_mte_check(SVEContLdSt *info, CPUARMState *env,
248
+ uint64_t *vg, target_ulong addr, int esize,
249
+ int msize, uint32_t mtedesc, uintptr_t ra)
250
{
251
intptr_t mem_off, reg_off, reg_last;
252
168
253
--
169
--
254
2.25.1
170
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Handle the FPCR.AH "don't negate the sign of a NaN" semantics
2
in FMLS (vector), by implementing a new set of helpers for
3
the AH=1 case.
2
4
3
Move the ptw load functions, plus 3 common subroutines:
5
The float_muladd_negate_product flag produces the same result
4
S1_ptw_translate, ptw_attrs_are_device, and regime_translation_big_endian.
6
as negating either of the multiplication operands, assuming
5
This also allows get_phys_addr_lpae to become static again.
7
neither of the operands are NaNs. But since FEAT_AFP does not
8
negate NaNs, this behaviour is exactly what we need.
6
9
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220604040607.269301-17-richard.henderson@linaro.org
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
---
12
target/arm/ptw.h | 13 ----
13
target/arm/helper.h | 4 ++++
13
target/arm/helper.c | 141 --------------------------------------
14
target/arm/tcg/translate-a64.c | 7 ++++++-
14
target/arm/ptw.c | 160 ++++++++++++++++++++++++++++++++++++++++++--
15
target/arm/tcg/vec_helper.c | 22 ++++++++++++++++++++++
15
3 files changed, 154 insertions(+), 160 deletions(-)
16
3 files changed, 32 insertions(+), 1 deletion(-)
16
17
17
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
18
diff --git a/target/arm/helper.h b/target/arm/helper.h
18
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/ptw.h
20
--- a/target/arm/helper.h
20
+++ b/target/arm/ptw.h
21
+++ b/target/arm/helper.h
21
@@ -XXX,XX +XXX,XX @@
22
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
22
23
DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
23
extern const uint8_t pamax_map[7];
24
DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
24
25
25
-uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure,
26
+DEF_HELPER_FLAGS_5(gvec_ah_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
26
- ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi);
27
+DEF_HELPER_FLAGS_5(gvec_ah_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
27
-uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure,
28
+DEF_HELPER_FLAGS_5(gvec_ah_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
28
- ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi);
29
+
29
-
30
DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG,
30
bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx);
31
void, ptr, ptr, ptr, fpst, i32)
31
bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx);
32
DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG,
32
uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn);
33
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
33
@@ -XXX,XX +XXX,XX @@ int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0);
34
int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64,
35
int ap, int ns, int xn, int pxn);
36
37
-bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
38
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
39
- bool s1_is_el0,
40
- hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
41
- target_ulong *page_size_ptr,
42
- ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
43
- __attribute__((nonnull));
44
-
45
#endif /* !CONFIG_USER_ONLY */
46
#endif /* TARGET_ARM_PTW_H */
47
diff --git a/target/arm/helper.c b/target/arm/helper.c
48
index XXXXXXX..XXXXXXX 100644
34
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/helper.c
35
--- a/target/arm/tcg/translate-a64.c
50
+++ b/target/arm/helper.c
36
+++ b/target/arm/tcg/translate-a64.c
51
@@ -XXX,XX +XXX,XX @@ bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx)
37
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
52
return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0;
38
gen_helper_gvec_vfms_s,
39
gen_helper_gvec_vfms_d,
40
};
41
-TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls)
42
+static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = {
43
+ gen_helper_gvec_ah_vfms_h,
44
+ gen_helper_gvec_ah_vfms_s,
45
+ gen_helper_gvec_ah_vfms_d,
46
+};
47
+TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah)
48
49
static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = {
50
gen_helper_gvec_fceq_h,
51
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/target/arm/tcg/vec_helper.c
54
+++ b/target/arm/tcg/vec_helper.c
55
@@ -XXX,XX +XXX,XX @@ static float64 float64_mulsub_f(float64 dest, float64 op1, float64 op2,
56
return float64_muladd(float64_chs(op1), op2, dest, 0, stat);
53
}
57
}
54
58
55
-static inline bool regime_translation_big_endian(CPUARMState *env,
59
+static float16 float16_ah_mulsub_f(float16 dest, float16 op1, float16 op2,
56
- ARMMMUIdx mmu_idx)
60
+ float_status *stat)
57
-{
58
- return (regime_sctlr(env, mmu_idx) & SCTLR_EE) != 0;
59
-}
60
-
61
/* Return the TTBR associated with this translation regime */
62
uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn)
63
{
64
@@ -XXX,XX +XXX,XX @@ int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64,
65
return prot_rw | PAGE_EXEC;
66
}
67
68
-static bool ptw_attrs_are_device(CPUARMState *env, ARMCacheAttrs cacheattrs)
69
-{
70
- /*
71
- * For an S1 page table walk, the stage 1 attributes are always
72
- * some form of "this is Normal memory". The combined S1+S2
73
- * attributes are therefore only Device if stage 2 specifies Device.
74
- * With HCR_EL2.FWB == 0 this is when descriptor bits [5:4] are 0b00,
75
- * ie when cacheattrs.attrs bits [3:2] are 0b00.
76
- * With HCR_EL2.FWB == 1 this is when descriptor bit [4] is 0, ie
77
- * when cacheattrs.attrs bit [2] is 0.
78
- */
79
- assert(cacheattrs.is_s2_format);
80
- if (arm_hcr_el2_eff(env) & HCR_FWB) {
81
- return (cacheattrs.attrs & 0x4) == 0;
82
- } else {
83
- return (cacheattrs.attrs & 0xc) == 0;
84
- }
85
-}
86
-
87
-/* Translate a S1 pagetable walk through S2 if needed. */
88
-static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx,
89
- hwaddr addr, bool *is_secure,
90
- ARMMMUFaultInfo *fi)
91
-{
92
- if (arm_mmu_idx_is_stage1_of_2(mmu_idx) &&
93
- !regime_translation_disabled(env, ARMMMUIdx_Stage2)) {
94
- target_ulong s2size;
95
- hwaddr s2pa;
96
- int s2prot;
97
- int ret;
98
- ARMMMUIdx s2_mmu_idx = *is_secure ? ARMMMUIdx_Stage2_S
99
- : ARMMMUIdx_Stage2;
100
- ARMCacheAttrs cacheattrs = {};
101
- MemTxAttrs txattrs = {};
102
-
103
- ret = get_phys_addr_lpae(env, addr, MMU_DATA_LOAD, s2_mmu_idx, false,
104
- &s2pa, &txattrs, &s2prot, &s2size, fi,
105
- &cacheattrs);
106
- if (ret) {
107
- assert(fi->type != ARMFault_None);
108
- fi->s2addr = addr;
109
- fi->stage2 = true;
110
- fi->s1ptw = true;
111
- fi->s1ns = !*is_secure;
112
- return ~0;
113
- }
114
- if ((arm_hcr_el2_eff(env) & HCR_PTW) &&
115
- ptw_attrs_are_device(env, cacheattrs)) {
116
- /*
117
- * PTW set and S1 walk touched S2 Device memory:
118
- * generate Permission fault.
119
- */
120
- fi->type = ARMFault_Permission;
121
- fi->s2addr = addr;
122
- fi->stage2 = true;
123
- fi->s1ptw = true;
124
- fi->s1ns = !*is_secure;
125
- return ~0;
126
- }
127
-
128
- if (arm_is_secure_below_el3(env)) {
129
- /* Check if page table walk is to secure or non-secure PA space. */
130
- if (*is_secure) {
131
- *is_secure = !(env->cp15.vstcr_el2.raw_tcr & VSTCR_SW);
132
- } else {
133
- *is_secure = !(env->cp15.vtcr_el2.raw_tcr & VTCR_NSW);
134
- }
135
- } else {
136
- assert(!*is_secure);
137
- }
138
-
139
- addr = s2pa;
140
- }
141
- return addr;
142
-}
143
-
144
-/* All loads done in the course of a page table walk go through here. */
145
-uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure,
146
- ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi)
147
-{
148
- ARMCPU *cpu = ARM_CPU(cs);
149
- CPUARMState *env = &cpu->env;
150
- MemTxAttrs attrs = {};
151
- MemTxResult result = MEMTX_OK;
152
- AddressSpace *as;
153
- uint32_t data;
154
-
155
- addr = S1_ptw_translate(env, mmu_idx, addr, &is_secure, fi);
156
- attrs.secure = is_secure;
157
- as = arm_addressspace(cs, attrs);
158
- if (fi->s1ptw) {
159
- return 0;
160
- }
161
- if (regime_translation_big_endian(env, mmu_idx)) {
162
- data = address_space_ldl_be(as, addr, attrs, &result);
163
- } else {
164
- data = address_space_ldl_le(as, addr, attrs, &result);
165
- }
166
- if (result == MEMTX_OK) {
167
- return data;
168
- }
169
- fi->type = ARMFault_SyncExternalOnWalk;
170
- fi->ea = arm_extabort_type(result);
171
- return 0;
172
-}
173
-
174
-uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure,
175
- ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi)
176
-{
177
- ARMCPU *cpu = ARM_CPU(cs);
178
- CPUARMState *env = &cpu->env;
179
- MemTxAttrs attrs = {};
180
- MemTxResult result = MEMTX_OK;
181
- AddressSpace *as;
182
- uint64_t data;
183
-
184
- addr = S1_ptw_translate(env, mmu_idx, addr, &is_secure, fi);
185
- attrs.secure = is_secure;
186
- as = arm_addressspace(cs, attrs);
187
- if (fi->s1ptw) {
188
- return 0;
189
- }
190
- if (regime_translation_big_endian(env, mmu_idx)) {
191
- data = address_space_ldq_be(as, addr, attrs, &result);
192
- } else {
193
- data = address_space_ldq_le(as, addr, attrs, &result);
194
- }
195
- if (result == MEMTX_OK) {
196
- return data;
197
- }
198
- fi->type = ARMFault_SyncExternalOnWalk;
199
- fi->ea = arm_extabort_type(result);
200
- return 0;
201
-}
202
-
203
/*
204
* check_s2_mmu_setup
205
* @cpu: ARMCPU
206
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
207
index XXXXXXX..XXXXXXX 100644
208
--- a/target/arm/ptw.c
209
+++ b/target/arm/ptw.c
210
@@ -XXX,XX +XXX,XX @@
211
#include "ptw.h"
212
213
214
+static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
215
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
216
+ bool s1_is_el0, hwaddr *phys_ptr,
217
+ MemTxAttrs *txattrs, int *prot,
218
+ target_ulong *page_size_ptr,
219
+ ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
220
+ __attribute__((nonnull));
221
+
222
+static bool regime_translation_big_endian(CPUARMState *env, ARMMMUIdx mmu_idx)
223
+{
61
+{
224
+ return (regime_sctlr(env, mmu_idx) & SCTLR_EE) != 0;
62
+ return float16_muladd(op1, op2, dest, float_muladd_negate_product, stat);
225
+}
63
+}
226
+
64
+
227
+static bool ptw_attrs_are_device(CPUARMState *env, ARMCacheAttrs cacheattrs)
65
+static float32 float32_ah_mulsub_f(float32 dest, float32 op1, float32 op2,
66
+ float_status *stat)
228
+{
67
+{
229
+ /*
68
+ return float32_muladd(op1, op2, dest, float_muladd_negate_product, stat);
230
+ * For an S1 page table walk, the stage 1 attributes are always
231
+ * some form of "this is Normal memory". The combined S1+S2
232
+ * attributes are therefore only Device if stage 2 specifies Device.
233
+ * With HCR_EL2.FWB == 0 this is when descriptor bits [5:4] are 0b00,
234
+ * ie when cacheattrs.attrs bits [3:2] are 0b00.
235
+ * With HCR_EL2.FWB == 1 this is when descriptor bit [4] is 0, ie
236
+ * when cacheattrs.attrs bit [2] is 0.
237
+ */
238
+ assert(cacheattrs.is_s2_format);
239
+ if (arm_hcr_el2_eff(env) & HCR_FWB) {
240
+ return (cacheattrs.attrs & 0x4) == 0;
241
+ } else {
242
+ return (cacheattrs.attrs & 0xc) == 0;
243
+ }
244
+}
69
+}
245
+
70
+
246
+/* Translate a S1 pagetable walk through S2 if needed. */
71
+static float64 float64_ah_mulsub_f(float64 dest, float64 op1, float64 op2,
247
+static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx,
72
+ float_status *stat)
248
+ hwaddr addr, bool *is_secure,
249
+ ARMMMUFaultInfo *fi)
250
+{
73
+{
251
+ if (arm_mmu_idx_is_stage1_of_2(mmu_idx) &&
74
+ return float64_muladd(op1, op2, dest, float_muladd_negate_product, stat);
252
+ !regime_translation_disabled(env, ARMMMUIdx_Stage2)) {
253
+ target_ulong s2size;
254
+ hwaddr s2pa;
255
+ int s2prot;
256
+ int ret;
257
+ ARMMMUIdx s2_mmu_idx = *is_secure ? ARMMMUIdx_Stage2_S
258
+ : ARMMMUIdx_Stage2;
259
+ ARMCacheAttrs cacheattrs = {};
260
+ MemTxAttrs txattrs = {};
261
+
262
+ ret = get_phys_addr_lpae(env, addr, MMU_DATA_LOAD, s2_mmu_idx, false,
263
+ &s2pa, &txattrs, &s2prot, &s2size, fi,
264
+ &cacheattrs);
265
+ if (ret) {
266
+ assert(fi->type != ARMFault_None);
267
+ fi->s2addr = addr;
268
+ fi->stage2 = true;
269
+ fi->s1ptw = true;
270
+ fi->s1ns = !*is_secure;
271
+ return ~0;
272
+ }
273
+ if ((arm_hcr_el2_eff(env) & HCR_PTW) &&
274
+ ptw_attrs_are_device(env, cacheattrs)) {
275
+ /*
276
+ * PTW set and S1 walk touched S2 Device memory:
277
+ * generate Permission fault.
278
+ */
279
+ fi->type = ARMFault_Permission;
280
+ fi->s2addr = addr;
281
+ fi->stage2 = true;
282
+ fi->s1ptw = true;
283
+ fi->s1ns = !*is_secure;
284
+ return ~0;
285
+ }
286
+
287
+ if (arm_is_secure_below_el3(env)) {
288
+ /* Check if page table walk is to secure or non-secure PA space. */
289
+ if (*is_secure) {
290
+ *is_secure = !(env->cp15.vstcr_el2.raw_tcr & VSTCR_SW);
291
+ } else {
292
+ *is_secure = !(env->cp15.vtcr_el2.raw_tcr & VTCR_NSW);
293
+ }
294
+ } else {
295
+ assert(!*is_secure);
296
+ }
297
+
298
+ addr = s2pa;
299
+ }
300
+ return addr;
301
+}
75
+}
302
+
76
+
303
+/* All loads done in the course of a page table walk go through here. */
77
#define DO_MULADD(NAME, FUNC, TYPE) \
304
+static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure,
78
void HELPER(NAME)(void *vd, void *vn, void *vm, \
305
+ ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi)
79
float_status *stat, uint32_t desc) \
306
+{
80
@@ -XXX,XX +XXX,XX @@ DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16)
307
+ ARMCPU *cpu = ARM_CPU(cs);
81
DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32)
308
+ CPUARMState *env = &cpu->env;
82
DO_MULADD(gvec_vfms_d, float64_mulsub_f, float64)
309
+ MemTxAttrs attrs = {};
83
310
+ MemTxResult result = MEMTX_OK;
84
+DO_MULADD(gvec_ah_vfms_h, float16_ah_mulsub_f, float16)
311
+ AddressSpace *as;
85
+DO_MULADD(gvec_ah_vfms_s, float32_ah_mulsub_f, float32)
312
+ uint32_t data;
86
+DO_MULADD(gvec_ah_vfms_d, float64_ah_mulsub_f, float64)
313
+
87
+
314
+ addr = S1_ptw_translate(env, mmu_idx, addr, &is_secure, fi);
88
/* For the indexed ops, SVE applies the index per 128-bit vector segment.
315
+ attrs.secure = is_secure;
89
* For AdvSIMD, there is of course only one such vector segment.
316
+ as = arm_addressspace(cs, attrs);
317
+ if (fi->s1ptw) {
318
+ return 0;
319
+ }
320
+ if (regime_translation_big_endian(env, mmu_idx)) {
321
+ data = address_space_ldl_be(as, addr, attrs, &result);
322
+ } else {
323
+ data = address_space_ldl_le(as, addr, attrs, &result);
324
+ }
325
+ if (result == MEMTX_OK) {
326
+ return data;
327
+ }
328
+ fi->type = ARMFault_SyncExternalOnWalk;
329
+ fi->ea = arm_extabort_type(result);
330
+ return 0;
331
+}
332
+
333
+static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure,
334
+ ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi)
335
+{
336
+ ARMCPU *cpu = ARM_CPU(cs);
337
+ CPUARMState *env = &cpu->env;
338
+ MemTxAttrs attrs = {};
339
+ MemTxResult result = MEMTX_OK;
340
+ AddressSpace *as;
341
+ uint64_t data;
342
+
343
+ addr = S1_ptw_translate(env, mmu_idx, addr, &is_secure, fi);
344
+ attrs.secure = is_secure;
345
+ as = arm_addressspace(cs, attrs);
346
+ if (fi->s1ptw) {
347
+ return 0;
348
+ }
349
+ if (regime_translation_big_endian(env, mmu_idx)) {
350
+ data = address_space_ldq_be(as, addr, attrs, &result);
351
+ } else {
352
+ data = address_space_ldq_le(as, addr, attrs, &result);
353
+ }
354
+ if (result == MEMTX_OK) {
355
+ return data;
356
+ }
357
+ fi->type = ARMFault_SyncExternalOnWalk;
358
+ fi->ea = arm_extabort_type(result);
359
+ return 0;
360
+}
361
+
362
static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx,
363
uint32_t *table, uint32_t address)
364
{
365
@@ -XXX,XX +XXX,XX @@ do_fault:
366
* @fi: set to fault info if the translation fails
367
* @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes
368
*/
90
*/
369
-bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
370
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
371
- bool s1_is_el0,
372
- hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
373
- target_ulong *page_size_ptr,
374
- ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
375
+static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
376
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
377
+ bool s1_is_el0, hwaddr *phys_ptr,
378
+ MemTxAttrs *txattrs, int *prot,
379
+ target_ulong *page_size_ptr,
380
+ ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
381
{
382
ARMCPU *cpu = env_archcpu(env);
383
CPUState *cs = CPU(cpu);
384
--
91
--
385
2.25.1
92
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Handle the FPCR.AH "don't negate the sign of a NaN" semantics fro the
2
SVE FMLS (vector) insns, by providing new helpers for the AH=1 case
3
which end up passing fpcr_ah = true to the do_fmla_zpzzz_* functions
4
that do the work.
2
5
3
The ARM pseudocode function CheckNormalSVEEnabled uses this
6
The float*_muladd functions have a flags argument that can
4
predicate now, and I think it's a bit clearer.
7
perform optional negation of various operand. We don't use
8
that for "normal" arm fmla, because the muladd flags are not
9
applied when an input is a NaN. But since FEAT_AFP does not
10
negate NaNs, this behaviour is exactly what we need.
5
11
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
The non-AH helpers pass in a zero flags argument and control the
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
negation via the neg1 and neg3 arguments; the AH helpers always pass
8
Message-id: 20220607203306.657998-8-richard.henderson@linaro.org
14
in neg1 and neg3 as zero and control the negation via the flags
15
argument. This allows us to avoid conditional branches within the
16
inner loop.
17
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
19
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
---
20
---
11
target/arm/helper.c | 5 ++---
21
target/arm/tcg/helper-sve.h | 21 ++++++++
12
1 file changed, 2 insertions(+), 3 deletions(-)
22
target/arm/tcg/sve_helper.c | 99 +++++++++++++++++++++++++++-------
23
target/arm/tcg/translate-sve.c | 18 ++++---
24
3 files changed, 114 insertions(+), 24 deletions(-)
13
25
14
diff --git a/target/arm/helper.c b/target/arm/helper.c
26
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
15
index XXXXXXX..XXXXXXX 100644
27
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper.c
28
--- a/target/arm/tcg/helper-sve.h
17
+++ b/target/arm/helper.c
29
+++ b/target/arm/tcg/helper-sve.h
18
@@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo minimal_ras_reginfo[] = {
30
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
19
int sve_exception_el(CPUARMState *env, int el)
31
DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
20
{
32
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
21
#ifndef CONFIG_USER_ONLY
33
22
- uint64_t hcr_el2 = arm_hcr_el2_eff(env);
34
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG,
23
-
35
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
24
- if (el <= 1 && (hcr_el2 & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) {
36
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG,
25
+ if (el <= 1 && !el_is_in_host(env, el)) {
37
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
26
switch (FIELD_EX64(env->cp15.cpacr_el1, CPACR_EL1, ZEN)) {
38
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG,
27
case 1:
39
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
28
if (el != 0) {
40
+
29
@@ -XXX,XX +XXX,XX @@ int sve_exception_el(CPUARMState *env, int el)
41
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG,
30
* CPTR_EL2 changes format with HCR_EL2.E2H (regardless of TGE).
42
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
31
*/
43
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG,
32
if (el <= 2) {
44
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
33
+ uint64_t hcr_el2 = arm_hcr_el2_eff(env);
45
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG,
34
if (hcr_el2 & HCR_E2H) {
46
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
35
switch (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, ZEN)) {
47
+
36
case 1:
48
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG,
49
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
50
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
51
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
52
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
53
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
54
+
55
DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG,
56
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
57
DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG,
58
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/target/arm/tcg/sve_helper.c
61
+++ b/target/arm/tcg/sve_helper.c
62
@@ -XXX,XX +XXX,XX @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int)
63
64
static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
65
float_status *status, uint32_t desc,
66
- uint16_t neg1, uint16_t neg3)
67
+ uint16_t neg1, uint16_t neg3, int flags)
68
{
69
intptr_t i = simd_oprsz(desc);
70
uint64_t *g = vg;
71
@@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
72
e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1;
73
e2 = *(uint16_t *)(vm + H1_2(i));
74
e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3;
75
- r = float16_muladd(e1, e2, e3, 0, status);
76
+ r = float16_muladd(e1, e2, e3, flags, status);
77
*(uint16_t *)(vd + H1_2(i)) = r;
78
}
79
} while (i & 63);
80
@@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
81
void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
82
void *vg, float_status *status, uint32_t desc)
83
{
84
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0);
85
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
86
}
87
88
void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
89
void *vg, float_status *status, uint32_t desc)
90
{
91
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0);
92
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0);
93
}
94
95
void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
96
void *vg, float_status *status, uint32_t desc)
97
{
98
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000);
99
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0);
100
}
101
102
void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
103
void *vg, float_status *status, uint32_t desc)
104
{
105
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000);
106
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0);
107
+}
108
+
109
+void HELPER(sve_ah_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
110
+ void *vg, float_status *status, uint32_t desc)
111
+{
112
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
113
+ float_muladd_negate_product);
114
+}
115
+
116
+void HELPER(sve_ah_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
117
+ void *vg, float_status *status, uint32_t desc)
118
+{
119
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
120
+ float_muladd_negate_product | float_muladd_negate_c);
121
+}
122
+
123
+void HELPER(sve_ah_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
124
+ void *vg, float_status *status, uint32_t desc)
125
+{
126
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
127
+ float_muladd_negate_c);
128
}
129
130
static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
131
float_status *status, uint32_t desc,
132
- uint32_t neg1, uint32_t neg3)
133
+ uint32_t neg1, uint32_t neg3, int flags)
134
{
135
intptr_t i = simd_oprsz(desc);
136
uint64_t *g = vg;
137
@@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
138
e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1;
139
e2 = *(uint32_t *)(vm + H1_4(i));
140
e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3;
141
- r = float32_muladd(e1, e2, e3, 0, status);
142
+ r = float32_muladd(e1, e2, e3, flags, status);
143
*(uint32_t *)(vd + H1_4(i)) = r;
144
}
145
} while (i & 63);
146
@@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
147
void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
148
void *vg, float_status *status, uint32_t desc)
149
{
150
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0);
151
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
152
}
153
154
void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
155
void *vg, float_status *status, uint32_t desc)
156
{
157
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0);
158
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, 0);
159
}
160
161
void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
162
void *vg, float_status *status, uint32_t desc)
163
{
164
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000);
165
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, 0);
166
}
167
168
void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
169
void *vg, float_status *status, uint32_t desc)
170
{
171
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000);
172
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, 0);
173
+}
174
+
175
+void HELPER(sve_ah_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
176
+ void *vg, float_status *status, uint32_t desc)
177
+{
178
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
179
+ float_muladd_negate_product);
180
+}
181
+
182
+void HELPER(sve_ah_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
183
+ void *vg, float_status *status, uint32_t desc)
184
+{
185
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
186
+ float_muladd_negate_product | float_muladd_negate_c);
187
+}
188
+
189
+void HELPER(sve_ah_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
190
+ void *vg, float_status *status, uint32_t desc)
191
+{
192
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
193
+ float_muladd_negate_c);
194
}
195
196
static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
197
float_status *status, uint32_t desc,
198
- uint64_t neg1, uint64_t neg3)
199
+ uint64_t neg1, uint64_t neg3, int flags)
200
{
201
intptr_t i = simd_oprsz(desc);
202
uint64_t *g = vg;
203
@@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
204
e1 = *(uint64_t *)(vn + i) ^ neg1;
205
e2 = *(uint64_t *)(vm + i);
206
e3 = *(uint64_t *)(va + i) ^ neg3;
207
- r = float64_muladd(e1, e2, e3, 0, status);
208
+ r = float64_muladd(e1, e2, e3, flags, status);
209
*(uint64_t *)(vd + i) = r;
210
}
211
} while (i & 63);
212
@@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
213
void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
214
void *vg, float_status *status, uint32_t desc)
215
{
216
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0);
217
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
218
}
219
220
void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
221
void *vg, float_status *status, uint32_t desc)
222
{
223
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0);
224
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, 0);
225
}
226
227
void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
228
void *vg, float_status *status, uint32_t desc)
229
{
230
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN);
231
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, 0);
232
}
233
234
void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
235
void *vg, float_status *status, uint32_t desc)
236
{
237
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN);
238
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, 0);
239
+}
240
+
241
+void HELPER(sve_ah_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
242
+ void *vg, float_status *status, uint32_t desc)
243
+{
244
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
245
+ float_muladd_negate_product);
246
+}
247
+
248
+void HELPER(sve_ah_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
249
+ void *vg, float_status *status, uint32_t desc)
250
+{
251
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
252
+ float_muladd_negate_product | float_muladd_negate_c);
253
+}
254
+
255
+void HELPER(sve_ah_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
256
+ void *vg, float_status *status, uint32_t desc)
257
+{
258
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
259
+ float_muladd_negate_c);
260
}
261
262
/* Two operand floating-point comparison controlled by a predicate.
263
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
264
index XXXXXXX..XXXXXXX 100644
265
--- a/target/arm/tcg/translate-sve.c
266
+++ b/target/arm/tcg/translate-sve.c
267
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
268
a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1),
269
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
270
271
-#define DO_FMLA(NAME, name) \
272
+#define DO_FMLA(NAME, name, ah_name) \
273
static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
274
NULL, gen_helper_sve_##name##_h, \
275
gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
276
}; \
277
- TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \
278
+ static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \
279
+ NULL, gen_helper_sve_##ah_name##_h, \
280
+ gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \
281
+ }; \
282
+ TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \
283
+ s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \
284
a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
285
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
286
287
-DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
288
-DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
289
-DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
290
-DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
291
+/* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */
292
+DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz)
293
+DO_FMLA(FMLS_zpzzz, fmls_zpzzz, ah_fmls_zpzzz)
294
+DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz, ah_fnmla_zpzzz)
295
+DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz, ah_fnmls_zpzzz)
296
297
#undef DO_FMLA
298
37
--
299
--
38
2.25.1
300
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
The negation step in the SVE FTSSEL insn mustn't negate a NaN when
2
FPCR.AH is set. Pass FPCR.AH to the helper via the SIMD data field
3
and use that to determine whether to do the negation.
2
4
3
Use the function instead of the array directly.
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/arm/tcg/sve_helper.c | 18 +++++++++++++++---
9
target/arm/tcg/translate-sve.c | 4 ++--
10
2 files changed, 17 insertions(+), 5 deletions(-)
4
11
5
Because the function performs its own masking, via the uint8_t
12
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
6
parameter, we need to do nothing extra within the users: the bits
7
above the first 2 (_uh) or 4 (_uw) will be discarded by assignment
8
to the local bmask variables, and of course _uq uses the entire
9
uint64_t result.
10
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
Message-id: 20220607203306.657998-17-richard.henderson@linaro.org
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
---
16
target/arm/mve_helper.c | 6 +++---
17
1 file changed, 3 insertions(+), 3 deletions(-)
18
19
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
20
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/mve_helper.c
14
--- a/target/arm/tcg/sve_helper.c
22
+++ b/target/arm/mve_helper.c
15
+++ b/target/arm/tcg/sve_helper.c
23
@@ -XXX,XX +XXX,XX @@ static void mergemask_sb(int8_t *d, int8_t r, uint16_t mask)
16
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fexpa_d)(void *vd, void *vn, uint32_t desc)
24
17
void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc)
25
static void mergemask_uh(uint16_t *d, uint16_t r, uint16_t mask)
26
{
18
{
27
- uint16_t bmask = expand_pred_b_data[mask & 3];
19
intptr_t i, opr_sz = simd_oprsz(desc) / 2;
28
+ uint16_t bmask = expand_pred_b(mask);
20
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1);
29
*d = (*d & ~bmask) | (r & bmask);
21
uint16_t *d = vd, *n = vn, *m = vm;
22
for (i = 0; i < opr_sz; i += 1) {
23
uint16_t nn = n[i];
24
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc)
25
if (mm & 1) {
26
nn = float16_one;
27
}
28
- d[i] = nn ^ (mm & 2) << 14;
29
+ if (mm & 2) {
30
+ nn = float16_maybe_ah_chs(nn, fpcr_ah);
31
+ }
32
+ d[i] = nn;
33
}
30
}
34
}
31
35
32
@@ -XXX,XX +XXX,XX @@ static void mergemask_sh(int16_t *d, int16_t r, uint16_t mask)
36
void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc)
33
34
static void mergemask_uw(uint32_t *d, uint32_t r, uint16_t mask)
35
{
37
{
36
- uint32_t bmask = expand_pred_b_data[mask & 0xf];
38
intptr_t i, opr_sz = simd_oprsz(desc) / 4;
37
+ uint32_t bmask = expand_pred_b(mask);
39
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1);
38
*d = (*d & ~bmask) | (r & bmask);
40
uint32_t *d = vd, *n = vn, *m = vm;
41
for (i = 0; i < opr_sz; i += 1) {
42
uint32_t nn = n[i];
43
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc)
44
if (mm & 1) {
45
nn = float32_one;
46
}
47
- d[i] = nn ^ (mm & 2) << 30;
48
+ if (mm & 2) {
49
+ nn = float32_maybe_ah_chs(nn, fpcr_ah);
50
+ }
51
+ d[i] = nn;
52
}
39
}
53
}
40
54
41
@@ -XXX,XX +XXX,XX @@ static void mergemask_sw(int32_t *d, int32_t r, uint16_t mask)
55
void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc)
42
43
static void mergemask_uq(uint64_t *d, uint64_t r, uint16_t mask)
44
{
56
{
45
- uint64_t bmask = expand_pred_b_data[mask & 0xff];
57
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
46
+ uint64_t bmask = expand_pred_b(mask);
58
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1);
47
*d = (*d & ~bmask) | (r & bmask);
59
uint64_t *d = vd, *n = vn, *m = vm;
60
for (i = 0; i < opr_sz; i += 1) {
61
uint64_t nn = n[i];
62
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc)
63
if (mm & 1) {
64
nn = float64_one;
65
}
66
- d[i] = nn ^ (mm & 2) << 62;
67
+ if (mm & 2) {
68
+ nn = float64_maybe_ah_chs(nn, fpcr_ah);
69
+ }
70
+ d[i] = nn;
71
}
48
}
72
}
49
73
74
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
75
index XXXXXXX..XXXXXXX 100644
76
--- a/target/arm/tcg/translate-sve.c
77
+++ b/target/arm/tcg/translate-sve.c
78
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2 * const fexpa_fns[4] = {
79
gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
80
};
81
TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
82
- fexpa_fns[a->esz], a->rd, a->rn, 0)
83
+ fexpa_fns[a->esz], a->rd, a->rn, s->fpcr_ah)
84
85
static gen_helper_gvec_3 * const ftssel_fns[4] = {
86
NULL, gen_helper_sve_ftssel_h,
87
gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
88
};
89
TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
90
- ftssel_fns[a->esz], a, 0)
91
+ ftssel_fns[a->esz], a, s->fpcr_ah)
92
93
/*
94
*** SVE Predicate Logical Operations Group
50
--
95
--
51
2.25.1
96
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
The negation step in the SVE FTMAD insn mustn't negate a NaN when
2
FPCR.AH is set. Pass FPCR.AH to the helper via the SIMD data field,
3
so we can select the correct behaviour.
2
4
3
Begin moving all of the page table walking functions
5
Because the operand is known to be negative, negating the operand
4
out of helper.c, starting with get_phys_addr().
6
is the same as taking the absolute value. Defer this to the muladd
7
operation via flags, so that it happens after NaN detection, which
8
is correct for FPCR.AH.
5
9
6
Create a temporary header file, "ptw.h", in which to
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
share declarations between the two C files while we
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
are moving functions.
12
---
13
target/arm/tcg/sve_helper.c | 42 ++++++++++++++++++++++++++--------
14
target/arm/tcg/translate-sve.c | 3 ++-
15
2 files changed, 35 insertions(+), 10 deletions(-)
9
16
10
Move a few declarations to "internals.h", which will
17
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
11
remain used by multiple C files.
12
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
Message-id: 20220604040607.269301-3-richard.henderson@linaro.org
15
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
---
18
target/arm/internals.h | 18 ++-
19
target/arm/ptw.h | 51 ++++++
20
target/arm/helper.c | 344 +++++------------------------------------
21
target/arm/ptw.c | 267 ++++++++++++++++++++++++++++++++
22
target/arm/meson.build | 1 +
23
5 files changed, 372 insertions(+), 309 deletions(-)
24
create mode 100644 target/arm/ptw.h
25
create mode 100644 target/arm/ptw.c
26
27
diff --git a/target/arm/internals.h b/target/arm/internals.h
28
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/internals.h
19
--- a/target/arm/tcg/sve_helper.c
30
+++ b/target/arm/internals.h
20
+++ b/target/arm/tcg/sve_helper.c
31
@@ -XXX,XX +XXX,XX @@ ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env,
21
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm,
32
/* Return the MMU index for a v7M CPU in the specified security state */
22
0x3c00, 0xb800, 0x293a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
33
ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate);
23
};
34
24
intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float16);
35
-/* Return true if the stage 1 translation regime is using LPAE format page
25
- intptr_t x = simd_data(desc);
36
- * tables */
26
+ intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3);
37
+/* Return true if the translation regime is using LPAE format page tables */
27
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1);
38
+bool regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx);
28
float16 *d = vd, *n = vn, *m = vm;
39
+
29
+
40
+/*
30
for (i = 0; i < opr_sz; i++) {
41
+ * Return true if the stage 1 translation regime is using LPAE
31
float16 mm = m[i];
42
+ * format page tables
32
intptr_t xx = x;
43
+ */
33
+ int flags = 0;
44
bool arm_s1_regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx);
34
+
45
35
if (float16_is_neg(mm)) {
46
/* Raise a data fault alignment exception for the specified virtual address */
36
- mm = float16_abs(mm);
47
@@ -XXX,XX +XXX,XX @@ static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx)
37
+ if (fpcr_ah) {
38
+ flags = float_muladd_negate_product;
39
+ } else {
40
+ mm = float16_abs(mm);
41
+ }
42
xx += 8;
43
}
44
- d[i] = float16_muladd(n[i], mm, coeff[xx], 0, s);
45
+ d[i] = float16_muladd(n[i], mm, coeff[xx], flags, s);
48
}
46
}
49
}
47
}
50
48
51
+/* Return the SCTLR value which controls this address translation regime */
49
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm,
52
+static inline uint64_t regime_sctlr(CPUARMState *env, ARMMMUIdx mmu_idx)
50
0x37cd37cc, 0x00000000, 0x00000000, 0x00000000,
53
+{
51
};
54
+ return env->cp15.sctlr_el[regime_el(env, mmu_idx)];
52
intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float32);
55
+}
53
- intptr_t x = simd_data(desc);
54
+ intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3);
55
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1);
56
float32 *d = vd, *n = vn, *m = vm;
56
+
57
+
57
/* Return the TCR controlling this translation regime */
58
for (i = 0; i < opr_sz; i++) {
58
static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx)
59
float32 mm = m[i];
59
{
60
intptr_t xx = x;
60
@@ -XXX,XX +XXX,XX @@ typedef struct ARMVAParameters {
61
+ int flags = 0;
61
ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
62
ARMMMUIdx mmu_idx, bool data);
63
64
+int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx);
65
+int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx);
66
+
62
+
67
static inline int exception_target_el(CPUARMState *env)
63
if (float32_is_neg(mm)) {
68
{
64
- mm = float32_abs(mm);
69
int target_el = MAX(1, arm_current_el(env));
65
+ if (fpcr_ah) {
70
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
66
+ flags = float_muladd_negate_product;
71
new file mode 100644
67
+ } else {
72
index XXXXXXX..XXXXXXX
68
+ mm = float32_abs(mm);
73
--- /dev/null
69
+ }
74
+++ b/target/arm/ptw.h
70
xx += 8;
75
@@ -XXX,XX +XXX,XX @@
71
}
76
+/*
72
- d[i] = float32_muladd(n[i], mm, coeff[xx], 0, s);
77
+ * ARM page table walking.
73
+ d[i] = float32_muladd(n[i], mm, coeff[xx], flags, s);
78
+ *
79
+ * This code is licensed under the GNU GPL v2 or later.
80
+ *
81
+ * SPDX-License-Identifier: GPL-2.0-or-later
82
+ */
83
+
84
+#ifndef TARGET_ARM_PTW_H
85
+#define TARGET_ARM_PTW_H
86
+
87
+#ifndef CONFIG_USER_ONLY
88
+
89
+bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx);
90
+bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx);
91
+ARMCacheAttrs combine_cacheattrs(CPUARMState *env,
92
+ ARMCacheAttrs s1, ARMCacheAttrs s2);
93
+
94
+bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
95
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
96
+ hwaddr *phys_ptr, int *prot,
97
+ target_ulong *page_size,
98
+ ARMMMUFaultInfo *fi);
99
+bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address,
100
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
101
+ hwaddr *phys_ptr, int *prot,
102
+ ARMMMUFaultInfo *fi);
103
+bool get_phys_addr_v6(CPUARMState *env, uint32_t address,
104
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
105
+ hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
106
+ target_ulong *page_size, ARMMMUFaultInfo *fi);
107
+bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address,
108
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
109
+ hwaddr *phys_ptr, int *prot,
110
+ target_ulong *page_size,
111
+ ARMMMUFaultInfo *fi);
112
+bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address,
113
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
114
+ hwaddr *phys_ptr, MemTxAttrs *txattrs,
115
+ int *prot, target_ulong *page_size,
116
+ ARMMMUFaultInfo *fi);
117
+bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
118
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
119
+ bool s1_is_el0,
120
+ hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
121
+ target_ulong *page_size_ptr,
122
+ ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
123
+ __attribute__((nonnull));
124
+
125
+#endif /* !CONFIG_USER_ONLY */
126
+#endif /* TARGET_ARM_PTW_H */
127
diff --git a/target/arm/helper.c b/target/arm/helper.c
128
index XXXXXXX..XXXXXXX 100644
129
--- a/target/arm/helper.c
130
+++ b/target/arm/helper.c
131
@@ -XXX,XX +XXX,XX @@
132
#include "semihosting/common-semi.h"
133
#endif
134
#include "cpregs.h"
135
+#include "ptw.h"
136
137
#define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */
138
139
-#ifndef CONFIG_USER_ONLY
140
-
141
-static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
142
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
143
- bool s1_is_el0,
144
- hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
145
- target_ulong *page_size_ptr,
146
- ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
147
- __attribute__((nonnull));
148
-#endif
149
-
150
static void switch_mode(CPUARMState *env, int mode);
151
-static int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx);
152
153
static uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri)
154
{
155
@@ -XXX,XX +XXX,XX @@ uint64_t arm_sctlr(CPUARMState *env, int el)
156
return env->cp15.sctlr_el[el];
157
}
158
159
-/* Return the SCTLR value which controls this address translation regime */
160
-static inline uint64_t regime_sctlr(CPUARMState *env, ARMMMUIdx mmu_idx)
161
-{
162
- return env->cp15.sctlr_el[regime_el(env, mmu_idx)];
163
-}
164
-
165
#ifndef CONFIG_USER_ONLY
166
167
/* Return true if the specified stage of address translation is disabled */
168
-static inline bool regime_translation_disabled(CPUARMState *env,
169
- ARMMMUIdx mmu_idx)
170
+bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx)
171
{
172
uint64_t hcr_el2;
173
174
@@ -XXX,XX +XXX,XX @@ ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx)
175
#endif /* !CONFIG_USER_ONLY */
176
177
/* Return true if the translation regime is using LPAE format page tables */
178
-static inline bool regime_using_lpae_format(CPUARMState *env,
179
- ARMMMUIdx mmu_idx)
180
+bool regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx)
181
{
182
int el = regime_el(env, mmu_idx);
183
if (el == 2 || arm_el_is_aa64(env, el)) {
184
@@ -XXX,XX +XXX,XX @@ bool arm_s1_regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx)
185
}
186
187
#ifndef CONFIG_USER_ONLY
188
-static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx)
189
+bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx)
190
{
191
switch (mmu_idx) {
192
case ARMMMUIdx_SE10_0:
193
@@ -XXX,XX +XXX,XX @@ static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure,
194
return 0;
195
}
196
197
-static bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
198
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
199
- hwaddr *phys_ptr, int *prot,
200
- target_ulong *page_size,
201
- ARMMMUFaultInfo *fi)
202
+bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
203
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
204
+ hwaddr *phys_ptr, int *prot,
205
+ target_ulong *page_size,
206
+ ARMMMUFaultInfo *fi)
207
{
208
CPUState *cs = env_cpu(env);
209
int level = 1;
210
@@ -XXX,XX +XXX,XX @@ do_fault:
211
return true;
212
}
213
214
-static bool get_phys_addr_v6(CPUARMState *env, uint32_t address,
215
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
216
- hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
217
- target_ulong *page_size, ARMMMUFaultInfo *fi)
218
+bool get_phys_addr_v6(CPUARMState *env, uint32_t address,
219
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
220
+ hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
221
+ target_ulong *page_size, ARMMMUFaultInfo *fi)
222
{
223
CPUState *cs = env_cpu(env);
224
ARMCPU *cpu = env_archcpu(env);
225
@@ -XXX,XX +XXX,XX @@ unsigned int arm_pamax(ARMCPU *cpu)
226
return pamax_map[parange];
227
}
228
229
-static int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx)
230
+int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx)
231
{
232
if (regime_has_2_ranges(mmu_idx)) {
233
return extract64(tcr, 37, 2);
234
@@ -XXX,XX +XXX,XX @@ static int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx)
235
}
74
}
236
}
75
}
237
76
238
-static int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx)
77
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm,
239
+int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx)
78
0x3e21ee96d2641b13ull, 0xbda8f76380fbb401ull,
240
{
79
};
241
if (regime_has_2_ranges(mmu_idx)) {
80
intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float64);
242
return extract64(tcr, 51, 2);
81
- intptr_t x = simd_data(desc);
243
@@ -XXX,XX +XXX,XX @@ static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
82
+ intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3);
244
* @fi: set to fault info if the translation fails
83
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1);
245
* @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes
84
float64 *d = vd, *n = vn, *m = vm;
246
*/
85
+
247
-static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
86
for (i = 0; i < opr_sz; i++) {
248
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
87
float64 mm = m[i];
249
- bool s1_is_el0,
88
intptr_t xx = x;
250
- hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
89
+ int flags = 0;
251
- target_ulong *page_size_ptr,
90
+
252
- ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
91
if (float64_is_neg(mm)) {
253
+bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
92
- mm = float64_abs(mm);
254
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
93
+ if (fpcr_ah) {
255
+ bool s1_is_el0,
94
+ flags = float_muladd_negate_product;
256
+ hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
95
+ } else {
257
+ target_ulong *page_size_ptr,
96
+ mm = float64_abs(mm);
258
+ ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
97
+ }
259
{
98
xx += 8;
260
ARMCPU *cpu = env_archcpu(env);
99
}
261
CPUState *cs = CPU(cpu);
100
- d[i] = float64_muladd(n[i], mm, coeff[xx], 0, s);
262
@@ -XXX,XX +XXX,XX @@ static inline bool m_is_system_region(CPUARMState *env, uint32_t address)
101
+ d[i] = float64_muladd(n[i], mm, coeff[xx], flags, s);
263
return arm_feature(env, ARM_FEATURE_M) && extract32(address, 29, 3) == 0x7;
102
}
264
}
103
}
265
104
266
-static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address,
105
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
267
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
268
- hwaddr *phys_ptr, int *prot,
269
- target_ulong *page_size,
270
- ARMMMUFaultInfo *fi)
271
+bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address,
272
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
273
+ hwaddr *phys_ptr, int *prot,
274
+ target_ulong *page_size,
275
+ ARMMMUFaultInfo *fi)
276
{
277
ARMCPU *cpu = env_archcpu(env);
278
int n;
279
@@ -XXX,XX +XXX,XX @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address,
280
}
281
282
283
-static bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address,
284
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
285
- hwaddr *phys_ptr, MemTxAttrs *txattrs,
286
- int *prot, target_ulong *page_size,
287
- ARMMMUFaultInfo *fi)
288
+bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address,
289
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
290
+ hwaddr *phys_ptr, MemTxAttrs *txattrs,
291
+ int *prot, target_ulong *page_size,
292
+ ARMMMUFaultInfo *fi)
293
{
294
uint32_t secure = regime_is_secure(env, mmu_idx);
295
V8M_SAttributes sattrs = {};
296
@@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address,
297
return ret;
298
}
299
300
-static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address,
301
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
302
- hwaddr *phys_ptr, int *prot,
303
- ARMMMUFaultInfo *fi)
304
+bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address,
305
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
306
+ hwaddr *phys_ptr, int *prot,
307
+ ARMMMUFaultInfo *fi)
308
{
309
int n;
310
uint32_t mask;
311
@@ -XXX,XX +XXX,XX @@ static uint8_t combined_attrs_fwb(CPUARMState *env,
312
* @s1: Attributes from stage 1 walk
313
* @s2: Attributes from stage 2 walk
314
*/
315
-static ARMCacheAttrs combine_cacheattrs(CPUARMState *env,
316
- ARMCacheAttrs s1, ARMCacheAttrs s2)
317
+ARMCacheAttrs combine_cacheattrs(CPUARMState *env,
318
+ ARMCacheAttrs s1, ARMCacheAttrs s2)
319
{
320
ARMCacheAttrs ret;
321
bool tagged = false;
322
@@ -XXX,XX +XXX,XX @@ static ARMCacheAttrs combine_cacheattrs(CPUARMState *env,
323
return ret;
324
}
325
326
-
327
-/* get_phys_addr - get the physical address for this virtual address
328
- *
329
- * Find the physical address corresponding to the given virtual address,
330
- * by doing a translation table walk on MMU based systems or using the
331
- * MPU state on MPU based systems.
332
- *
333
- * Returns false if the translation was successful. Otherwise, phys_ptr, attrs,
334
- * prot and page_size may not be filled in, and the populated fsr value provides
335
- * information on why the translation aborted, in the format of a
336
- * DFSR/IFSR fault register, with the following caveats:
337
- * * we honour the short vs long DFSR format differences.
338
- * * the WnR bit is never set (the caller must do this).
339
- * * for PSMAv5 based systems we don't bother to return a full FSR format
340
- * value.
341
- *
342
- * @env: CPUARMState
343
- * @address: virtual address to get physical address for
344
- * @access_type: 0 for read, 1 for write, 2 for execute
345
- * @mmu_idx: MMU index indicating required translation regime
346
- * @phys_ptr: set to the physical address corresponding to the virtual address
347
- * @attrs: set to the memory transaction attributes to use
348
- * @prot: set to the permissions for the page containing phys_ptr
349
- * @page_size: set to the size of the page containing phys_ptr
350
- * @fi: set to fault info if the translation fails
351
- * @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes
352
- */
353
-bool get_phys_addr(CPUARMState *env, target_ulong address,
354
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
355
- hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
356
- target_ulong *page_size,
357
- ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
358
-{
359
- ARMMMUIdx s1_mmu_idx = stage_1_mmu_idx(mmu_idx);
360
-
361
- if (mmu_idx != s1_mmu_idx) {
362
- /* Call ourselves recursively to do the stage 1 and then stage 2
363
- * translations if mmu_idx is a two-stage regime.
364
- */
365
- if (arm_feature(env, ARM_FEATURE_EL2)) {
366
- hwaddr ipa;
367
- int s2_prot;
368
- int ret;
369
- bool ipa_secure;
370
- ARMCacheAttrs cacheattrs2 = {};
371
- ARMMMUIdx s2_mmu_idx;
372
- bool is_el0;
373
-
374
- ret = get_phys_addr(env, address, access_type, s1_mmu_idx, &ipa,
375
- attrs, prot, page_size, fi, cacheattrs);
376
-
377
- /* If S1 fails or S2 is disabled, return early. */
378
- if (ret || regime_translation_disabled(env, ARMMMUIdx_Stage2)) {
379
- *phys_ptr = ipa;
380
- return ret;
381
- }
382
-
383
- ipa_secure = attrs->secure;
384
- if (arm_is_secure_below_el3(env)) {
385
- if (ipa_secure) {
386
- attrs->secure = !(env->cp15.vstcr_el2.raw_tcr & VSTCR_SW);
387
- } else {
388
- attrs->secure = !(env->cp15.vtcr_el2.raw_tcr & VTCR_NSW);
389
- }
390
- } else {
391
- assert(!ipa_secure);
392
- }
393
-
394
- s2_mmu_idx = attrs->secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2;
395
- is_el0 = mmu_idx == ARMMMUIdx_E10_0 || mmu_idx == ARMMMUIdx_SE10_0;
396
-
397
- /* S1 is done. Now do S2 translation. */
398
- ret = get_phys_addr_lpae(env, ipa, access_type, s2_mmu_idx, is_el0,
399
- phys_ptr, attrs, &s2_prot,
400
- page_size, fi, &cacheattrs2);
401
- fi->s2addr = ipa;
402
- /* Combine the S1 and S2 perms. */
403
- *prot &= s2_prot;
404
-
405
- /* If S2 fails, return early. */
406
- if (ret) {
407
- return ret;
408
- }
409
-
410
- /* Combine the S1 and S2 cache attributes. */
411
- if (arm_hcr_el2_eff(env) & HCR_DC) {
412
- /*
413
- * HCR.DC forces the first stage attributes to
414
- * Normal Non-Shareable,
415
- * Inner Write-Back Read-Allocate Write-Allocate,
416
- * Outer Write-Back Read-Allocate Write-Allocate.
417
- * Do not overwrite Tagged within attrs.
418
- */
419
- if (cacheattrs->attrs != 0xf0) {
420
- cacheattrs->attrs = 0xff;
421
- }
422
- cacheattrs->shareability = 0;
423
- }
424
- *cacheattrs = combine_cacheattrs(env, *cacheattrs, cacheattrs2);
425
-
426
- /* Check if IPA translates to secure or non-secure PA space. */
427
- if (arm_is_secure_below_el3(env)) {
428
- if (ipa_secure) {
429
- attrs->secure =
430
- !(env->cp15.vstcr_el2.raw_tcr & (VSTCR_SA | VSTCR_SW));
431
- } else {
432
- attrs->secure =
433
- !((env->cp15.vtcr_el2.raw_tcr & (VTCR_NSA | VTCR_NSW))
434
- || (env->cp15.vstcr_el2.raw_tcr & (VSTCR_SA | VSTCR_SW)));
435
- }
436
- }
437
- return 0;
438
- } else {
439
- /*
440
- * For non-EL2 CPUs a stage1+stage2 translation is just stage 1.
441
- */
442
- mmu_idx = stage_1_mmu_idx(mmu_idx);
443
- }
444
- }
445
-
446
- /* The page table entries may downgrade secure to non-secure, but
447
- * cannot upgrade an non-secure translation regime's attributes
448
- * to secure.
449
- */
450
- attrs->secure = regime_is_secure(env, mmu_idx);
451
- attrs->user = regime_is_user(env, mmu_idx);
452
-
453
- /* Fast Context Switch Extension. This doesn't exist at all in v8.
454
- * In v7 and earlier it affects all stage 1 translations.
455
- */
456
- if (address < 0x02000000 && mmu_idx != ARMMMUIdx_Stage2
457
- && !arm_feature(env, ARM_FEATURE_V8)) {
458
- if (regime_el(env, mmu_idx) == 3) {
459
- address += env->cp15.fcseidr_s;
460
- } else {
461
- address += env->cp15.fcseidr_ns;
462
- }
463
- }
464
-
465
- if (arm_feature(env, ARM_FEATURE_PMSA)) {
466
- bool ret;
467
- *page_size = TARGET_PAGE_SIZE;
468
-
469
- if (arm_feature(env, ARM_FEATURE_V8)) {
470
- /* PMSAv8 */
471
- ret = get_phys_addr_pmsav8(env, address, access_type, mmu_idx,
472
- phys_ptr, attrs, prot, page_size, fi);
473
- } else if (arm_feature(env, ARM_FEATURE_V7)) {
474
- /* PMSAv7 */
475
- ret = get_phys_addr_pmsav7(env, address, access_type, mmu_idx,
476
- phys_ptr, prot, page_size, fi);
477
- } else {
478
- /* Pre-v7 MPU */
479
- ret = get_phys_addr_pmsav5(env, address, access_type, mmu_idx,
480
- phys_ptr, prot, fi);
481
- }
482
- qemu_log_mask(CPU_LOG_MMU, "PMSA MPU lookup for %s at 0x%08" PRIx32
483
- " mmu_idx %u -> %s (prot %c%c%c)\n",
484
- access_type == MMU_DATA_LOAD ? "reading" :
485
- (access_type == MMU_DATA_STORE ? "writing" : "execute"),
486
- (uint32_t)address, mmu_idx,
487
- ret ? "Miss" : "Hit",
488
- *prot & PAGE_READ ? 'r' : '-',
489
- *prot & PAGE_WRITE ? 'w' : '-',
490
- *prot & PAGE_EXEC ? 'x' : '-');
491
-
492
- return ret;
493
- }
494
-
495
- /* Definitely a real MMU, not an MPU */
496
-
497
- if (regime_translation_disabled(env, mmu_idx)) {
498
- uint64_t hcr;
499
- uint8_t memattr;
500
-
501
- /*
502
- * MMU disabled. S1 addresses within aa64 translation regimes are
503
- * still checked for bounds -- see AArch64.TranslateAddressS1Off.
504
- */
505
- if (mmu_idx != ARMMMUIdx_Stage2 && mmu_idx != ARMMMUIdx_Stage2_S) {
506
- int r_el = regime_el(env, mmu_idx);
507
- if (arm_el_is_aa64(env, r_el)) {
508
- int pamax = arm_pamax(env_archcpu(env));
509
- uint64_t tcr = env->cp15.tcr_el[r_el].raw_tcr;
510
- int addrtop, tbi;
511
-
512
- tbi = aa64_va_parameter_tbi(tcr, mmu_idx);
513
- if (access_type == MMU_INST_FETCH) {
514
- tbi &= ~aa64_va_parameter_tbid(tcr, mmu_idx);
515
- }
516
- tbi = (tbi >> extract64(address, 55, 1)) & 1;
517
- addrtop = (tbi ? 55 : 63);
518
-
519
- if (extract64(address, pamax, addrtop - pamax + 1) != 0) {
520
- fi->type = ARMFault_AddressSize;
521
- fi->level = 0;
522
- fi->stage2 = false;
523
- return 1;
524
- }
525
-
526
- /*
527
- * When TBI is disabled, we've just validated that all of the
528
- * bits above PAMax are zero, so logically we only need to
529
- * clear the top byte for TBI. But it's clearer to follow
530
- * the pseudocode set of addrdesc.paddress.
531
- */
532
- address = extract64(address, 0, 52);
533
- }
534
- }
535
- *phys_ptr = address;
536
- *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
537
- *page_size = TARGET_PAGE_SIZE;
538
-
539
- /* Fill in cacheattr a-la AArch64.TranslateAddressS1Off. */
540
- hcr = arm_hcr_el2_eff(env);
541
- cacheattrs->shareability = 0;
542
- cacheattrs->is_s2_format = false;
543
- if (hcr & HCR_DC) {
544
- if (hcr & HCR_DCT) {
545
- memattr = 0xf0; /* Tagged, Normal, WB, RWA */
546
- } else {
547
- memattr = 0xff; /* Normal, WB, RWA */
548
- }
549
- } else if (access_type == MMU_INST_FETCH) {
550
- if (regime_sctlr(env, mmu_idx) & SCTLR_I) {
551
- memattr = 0xee; /* Normal, WT, RA, NT */
552
- } else {
553
- memattr = 0x44; /* Normal, NC, No */
554
- }
555
- cacheattrs->shareability = 2; /* outer sharable */
556
- } else {
557
- memattr = 0x00; /* Device, nGnRnE */
558
- }
559
- cacheattrs->attrs = memattr;
560
- return 0;
561
- }
562
-
563
- if (regime_using_lpae_format(env, mmu_idx)) {
564
- return get_phys_addr_lpae(env, address, access_type, mmu_idx, false,
565
- phys_ptr, attrs, prot, page_size,
566
- fi, cacheattrs);
567
- } else if (regime_sctlr(env, mmu_idx) & SCTLR_XP) {
568
- return get_phys_addr_v6(env, address, access_type, mmu_idx,
569
- phys_ptr, attrs, prot, page_size, fi);
570
- } else {
571
- return get_phys_addr_v5(env, address, access_type, mmu_idx,
572
- phys_ptr, prot, page_size, fi);
573
- }
574
-}
575
-
576
hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr,
577
MemTxAttrs *attrs)
578
{
579
@@ -XXX,XX +XXX,XX @@ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr,
580
}
581
return phys_addr;
582
}
583
-
584
#endif
585
586
/* Note that signed overflow is undefined in C. The following routines are
587
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
588
new file mode 100644
589
index XXXXXXX..XXXXXXX
590
--- /dev/null
591
+++ b/target/arm/ptw.c
592
@@ -XXX,XX +XXX,XX @@
593
+/*
594
+ * ARM page table walking.
595
+ *
596
+ * This code is licensed under the GNU GPL v2 or later.
597
+ *
598
+ * SPDX-License-Identifier: GPL-2.0-or-later
599
+ */
600
+
601
+#include "qemu/osdep.h"
602
+#include "qemu/log.h"
603
+#include "cpu.h"
604
+#include "internals.h"
605
+#include "ptw.h"
606
+
607
+
608
+/**
609
+ * get_phys_addr - get the physical address for this virtual address
610
+ *
611
+ * Find the physical address corresponding to the given virtual address,
612
+ * by doing a translation table walk on MMU based systems or using the
613
+ * MPU state on MPU based systems.
614
+ *
615
+ * Returns false if the translation was successful. Otherwise, phys_ptr, attrs,
616
+ * prot and page_size may not be filled in, and the populated fsr value provides
617
+ * information on why the translation aborted, in the format of a
618
+ * DFSR/IFSR fault register, with the following caveats:
619
+ * * we honour the short vs long DFSR format differences.
620
+ * * the WnR bit is never set (the caller must do this).
621
+ * * for PSMAv5 based systems we don't bother to return a full FSR format
622
+ * value.
623
+ *
624
+ * @env: CPUARMState
625
+ * @address: virtual address to get physical address for
626
+ * @access_type: 0 for read, 1 for write, 2 for execute
627
+ * @mmu_idx: MMU index indicating required translation regime
628
+ * @phys_ptr: set to the physical address corresponding to the virtual address
629
+ * @attrs: set to the memory transaction attributes to use
630
+ * @prot: set to the permissions for the page containing phys_ptr
631
+ * @page_size: set to the size of the page containing phys_ptr
632
+ * @fi: set to fault info if the translation fails
633
+ * @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes
634
+ */
635
+bool get_phys_addr(CPUARMState *env, target_ulong address,
636
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
637
+ hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
638
+ target_ulong *page_size,
639
+ ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
640
+{
641
+ ARMMMUIdx s1_mmu_idx = stage_1_mmu_idx(mmu_idx);
642
+
643
+ if (mmu_idx != s1_mmu_idx) {
644
+ /*
645
+ * Call ourselves recursively to do the stage 1 and then stage 2
646
+ * translations if mmu_idx is a two-stage regime.
647
+ */
648
+ if (arm_feature(env, ARM_FEATURE_EL2)) {
649
+ hwaddr ipa;
650
+ int s2_prot;
651
+ int ret;
652
+ bool ipa_secure;
653
+ ARMCacheAttrs cacheattrs2 = {};
654
+ ARMMMUIdx s2_mmu_idx;
655
+ bool is_el0;
656
+
657
+ ret = get_phys_addr(env, address, access_type, s1_mmu_idx, &ipa,
658
+ attrs, prot, page_size, fi, cacheattrs);
659
+
660
+ /* If S1 fails or S2 is disabled, return early. */
661
+ if (ret || regime_translation_disabled(env, ARMMMUIdx_Stage2)) {
662
+ *phys_ptr = ipa;
663
+ return ret;
664
+ }
665
+
666
+ ipa_secure = attrs->secure;
667
+ if (arm_is_secure_below_el3(env)) {
668
+ if (ipa_secure) {
669
+ attrs->secure = !(env->cp15.vstcr_el2.raw_tcr & VSTCR_SW);
670
+ } else {
671
+ attrs->secure = !(env->cp15.vtcr_el2.raw_tcr & VTCR_NSW);
672
+ }
673
+ } else {
674
+ assert(!ipa_secure);
675
+ }
676
+
677
+ s2_mmu_idx = attrs->secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2;
678
+ is_el0 = mmu_idx == ARMMMUIdx_E10_0 || mmu_idx == ARMMMUIdx_SE10_0;
679
+
680
+ /* S1 is done. Now do S2 translation. */
681
+ ret = get_phys_addr_lpae(env, ipa, access_type, s2_mmu_idx, is_el0,
682
+ phys_ptr, attrs, &s2_prot,
683
+ page_size, fi, &cacheattrs2);
684
+ fi->s2addr = ipa;
685
+ /* Combine the S1 and S2 perms. */
686
+ *prot &= s2_prot;
687
+
688
+ /* If S2 fails, return early. */
689
+ if (ret) {
690
+ return ret;
691
+ }
692
+
693
+ /* Combine the S1 and S2 cache attributes. */
694
+ if (arm_hcr_el2_eff(env) & HCR_DC) {
695
+ /*
696
+ * HCR.DC forces the first stage attributes to
697
+ * Normal Non-Shareable,
698
+ * Inner Write-Back Read-Allocate Write-Allocate,
699
+ * Outer Write-Back Read-Allocate Write-Allocate.
700
+ * Do not overwrite Tagged within attrs.
701
+ */
702
+ if (cacheattrs->attrs != 0xf0) {
703
+ cacheattrs->attrs = 0xff;
704
+ }
705
+ cacheattrs->shareability = 0;
706
+ }
707
+ *cacheattrs = combine_cacheattrs(env, *cacheattrs, cacheattrs2);
708
+
709
+ /* Check if IPA translates to secure or non-secure PA space. */
710
+ if (arm_is_secure_below_el3(env)) {
711
+ if (ipa_secure) {
712
+ attrs->secure =
713
+ !(env->cp15.vstcr_el2.raw_tcr & (VSTCR_SA | VSTCR_SW));
714
+ } else {
715
+ attrs->secure =
716
+ !((env->cp15.vtcr_el2.raw_tcr & (VTCR_NSA | VTCR_NSW))
717
+ || (env->cp15.vstcr_el2.raw_tcr & (VSTCR_SA | VSTCR_SW)));
718
+ }
719
+ }
720
+ return 0;
721
+ } else {
722
+ /*
723
+ * For non-EL2 CPUs a stage1+stage2 translation is just stage 1.
724
+ */
725
+ mmu_idx = stage_1_mmu_idx(mmu_idx);
726
+ }
727
+ }
728
+
729
+ /*
730
+ * The page table entries may downgrade secure to non-secure, but
731
+ * cannot upgrade an non-secure translation regime's attributes
732
+ * to secure.
733
+ */
734
+ attrs->secure = regime_is_secure(env, mmu_idx);
735
+ attrs->user = regime_is_user(env, mmu_idx);
736
+
737
+ /*
738
+ * Fast Context Switch Extension. This doesn't exist at all in v8.
739
+ * In v7 and earlier it affects all stage 1 translations.
740
+ */
741
+ if (address < 0x02000000 && mmu_idx != ARMMMUIdx_Stage2
742
+ && !arm_feature(env, ARM_FEATURE_V8)) {
743
+ if (regime_el(env, mmu_idx) == 3) {
744
+ address += env->cp15.fcseidr_s;
745
+ } else {
746
+ address += env->cp15.fcseidr_ns;
747
+ }
748
+ }
749
+
750
+ if (arm_feature(env, ARM_FEATURE_PMSA)) {
751
+ bool ret;
752
+ *page_size = TARGET_PAGE_SIZE;
753
+
754
+ if (arm_feature(env, ARM_FEATURE_V8)) {
755
+ /* PMSAv8 */
756
+ ret = get_phys_addr_pmsav8(env, address, access_type, mmu_idx,
757
+ phys_ptr, attrs, prot, page_size, fi);
758
+ } else if (arm_feature(env, ARM_FEATURE_V7)) {
759
+ /* PMSAv7 */
760
+ ret = get_phys_addr_pmsav7(env, address, access_type, mmu_idx,
761
+ phys_ptr, prot, page_size, fi);
762
+ } else {
763
+ /* Pre-v7 MPU */
764
+ ret = get_phys_addr_pmsav5(env, address, access_type, mmu_idx,
765
+ phys_ptr, prot, fi);
766
+ }
767
+ qemu_log_mask(CPU_LOG_MMU, "PMSA MPU lookup for %s at 0x%08" PRIx32
768
+ " mmu_idx %u -> %s (prot %c%c%c)\n",
769
+ access_type == MMU_DATA_LOAD ? "reading" :
770
+ (access_type == MMU_DATA_STORE ? "writing" : "execute"),
771
+ (uint32_t)address, mmu_idx,
772
+ ret ? "Miss" : "Hit",
773
+ *prot & PAGE_READ ? 'r' : '-',
774
+ *prot & PAGE_WRITE ? 'w' : '-',
775
+ *prot & PAGE_EXEC ? 'x' : '-');
776
+
777
+ return ret;
778
+ }
779
+
780
+ /* Definitely a real MMU, not an MPU */
781
+
782
+ if (regime_translation_disabled(env, mmu_idx)) {
783
+ uint64_t hcr;
784
+ uint8_t memattr;
785
+
786
+ /*
787
+ * MMU disabled. S1 addresses within aa64 translation regimes are
788
+ * still checked for bounds -- see AArch64.TranslateAddressS1Off.
789
+ */
790
+ if (mmu_idx != ARMMMUIdx_Stage2 && mmu_idx != ARMMMUIdx_Stage2_S) {
791
+ int r_el = regime_el(env, mmu_idx);
792
+ if (arm_el_is_aa64(env, r_el)) {
793
+ int pamax = arm_pamax(env_archcpu(env));
794
+ uint64_t tcr = env->cp15.tcr_el[r_el].raw_tcr;
795
+ int addrtop, tbi;
796
+
797
+ tbi = aa64_va_parameter_tbi(tcr, mmu_idx);
798
+ if (access_type == MMU_INST_FETCH) {
799
+ tbi &= ~aa64_va_parameter_tbid(tcr, mmu_idx);
800
+ }
801
+ tbi = (tbi >> extract64(address, 55, 1)) & 1;
802
+ addrtop = (tbi ? 55 : 63);
803
+
804
+ if (extract64(address, pamax, addrtop - pamax + 1) != 0) {
805
+ fi->type = ARMFault_AddressSize;
806
+ fi->level = 0;
807
+ fi->stage2 = false;
808
+ return 1;
809
+ }
810
+
811
+ /*
812
+ * When TBI is disabled, we've just validated that all of the
813
+ * bits above PAMax are zero, so logically we only need to
814
+ * clear the top byte for TBI. But it's clearer to follow
815
+ * the pseudocode set of addrdesc.paddress.
816
+ */
817
+ address = extract64(address, 0, 52);
818
+ }
819
+ }
820
+ *phys_ptr = address;
821
+ *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
822
+ *page_size = TARGET_PAGE_SIZE;
823
+
824
+ /* Fill in cacheattr a-la AArch64.TranslateAddressS1Off. */
825
+ hcr = arm_hcr_el2_eff(env);
826
+ cacheattrs->shareability = 0;
827
+ cacheattrs->is_s2_format = false;
828
+ if (hcr & HCR_DC) {
829
+ if (hcr & HCR_DCT) {
830
+ memattr = 0xf0; /* Tagged, Normal, WB, RWA */
831
+ } else {
832
+ memattr = 0xff; /* Normal, WB, RWA */
833
+ }
834
+ } else if (access_type == MMU_INST_FETCH) {
835
+ if (regime_sctlr(env, mmu_idx) & SCTLR_I) {
836
+ memattr = 0xee; /* Normal, WT, RA, NT */
837
+ } else {
838
+ memattr = 0x44; /* Normal, NC, No */
839
+ }
840
+ cacheattrs->shareability = 2; /* outer sharable */
841
+ } else {
842
+ memattr = 0x00; /* Device, nGnRnE */
843
+ }
844
+ cacheattrs->attrs = memattr;
845
+ return 0;
846
+ }
847
+
848
+ if (regime_using_lpae_format(env, mmu_idx)) {
849
+ return get_phys_addr_lpae(env, address, access_type, mmu_idx, false,
850
+ phys_ptr, attrs, prot, page_size,
851
+ fi, cacheattrs);
852
+ } else if (regime_sctlr(env, mmu_idx) & SCTLR_XP) {
853
+ return get_phys_addr_v6(env, address, access_type, mmu_idx,
854
+ phys_ptr, attrs, prot, page_size, fi);
855
+ } else {
856
+ return get_phys_addr_v5(env, address, access_type, mmu_idx,
857
+ phys_ptr, prot, page_size, fi);
858
+ }
859
+}
860
diff --git a/target/arm/meson.build b/target/arm/meson.build
861
index XXXXXXX..XXXXXXX 100644
106
index XXXXXXX..XXXXXXX 100644
862
--- a/target/arm/meson.build
107
--- a/target/arm/tcg/translate-sve.c
863
+++ b/target/arm/meson.build
108
+++ b/target/arm/tcg/translate-sve.c
864
@@ -XXX,XX +XXX,XX @@ arm_softmmu_ss.add(files(
109
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
865
'machine.c',
110
gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
866
'monitor.c',
111
};
867
'psci.c',
112
TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
868
+ 'ptw.c',
113
- ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
869
))
114
+ ftmad_fns[a->esz], a->rd, a->rn, a->rm,
870
115
+ a->imm | (s->fpcr_ah << 3),
871
subdir('hvf')
116
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
117
118
/*
872
--
119
--
873
2.25.1
120
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The negation step in FCMLA mustn't negate a NaN when FPCR.AH
4
is set. Handle this by passing FPCR.AH to the helper via the
5
SIMD data field, and use this to select whether to do the
6
negation via XOR or via the muladd negate_product flag.
7
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220604040607.269301-5-richard.henderson@linaro.org
9
Message-id: 20250129013857.135256-26-richard.henderson@linaro.org
10
[PMM: Expanded commit message]
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
13
---
8
target/arm/ptw.h | 11 +--
14
target/arm/tcg/translate-a64.c | 2 +-
9
target/arm/helper.c | 161 +-------------------------------------------
15
target/arm/tcg/vec_helper.c | 66 ++++++++++++++++++++--------------
10
target/arm/ptw.c | 153 +++++++++++++++++++++++++++++++++++++++++
16
2 files changed, 40 insertions(+), 28 deletions(-)
11
3 files changed, 161 insertions(+), 164 deletions(-)
12
17
13
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
18
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
14
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/ptw.h
20
--- a/target/arm/tcg/translate-a64.c
16
+++ b/target/arm/ptw.h
21
+++ b/target/arm/tcg/translate-a64.c
17
@@ -XXX,XX +XXX,XX @@ bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx,
22
@@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
18
uint32_t *table, uint32_t address);
23
19
int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx,
24
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
20
int ap, int domain_prot);
25
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
21
+int simple_ap_to_rw_prot_is_user(int ap, bool is_user);
26
- a->rot, fn[a->esz]);
22
+
27
+ a->rot | (s->fpcr_ah << 2), fn[a->esz]);
23
+static inline int
24
+simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap)
25
+{
26
+ return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx));
27
+}
28
29
bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address,
30
MMUAccessType access_type, ARMMMUIdx mmu_idx,
31
hwaddr *phys_ptr, int *prot,
32
ARMMMUFaultInfo *fi);
33
-bool get_phys_addr_v6(CPUARMState *env, uint32_t address,
34
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
35
- hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
36
- target_ulong *page_size, ARMMMUFaultInfo *fi);
37
bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address,
38
MMUAccessType access_type, ARMMMUIdx mmu_idx,
39
hwaddr *phys_ptr, int *prot,
40
diff --git a/target/arm/helper.c b/target/arm/helper.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/target/arm/helper.c
43
+++ b/target/arm/helper.c
44
@@ -XXX,XX +XXX,XX @@ int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap, int domain_prot)
45
* @ap: The 2-bit simple AP (AP[2:1])
46
* @is_user: TRUE if accessing from PL0
47
*/
48
-static inline int simple_ap_to_rw_prot_is_user(int ap, bool is_user)
49
+int simple_ap_to_rw_prot_is_user(int ap, bool is_user)
50
{
51
switch (ap) {
52
case 0:
53
@@ -XXX,XX +XXX,XX @@ static inline int simple_ap_to_rw_prot_is_user(int ap, bool is_user)
54
}
55
}
56
57
-static inline int
58
-simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap)
59
-{
60
- return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx));
61
-}
62
-
63
/* Translate S2 section/page access permissions to protection flags
64
*
65
* @env: CPUARMState
66
@@ -XXX,XX +XXX,XX @@ uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure,
67
return 0;
68
}
69
70
-bool get_phys_addr_v6(CPUARMState *env, uint32_t address,
71
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
72
- hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
73
- target_ulong *page_size, ARMMMUFaultInfo *fi)
74
-{
75
- CPUState *cs = env_cpu(env);
76
- ARMCPU *cpu = env_archcpu(env);
77
- int level = 1;
78
- uint32_t table;
79
- uint32_t desc;
80
- uint32_t xn;
81
- uint32_t pxn = 0;
82
- int type;
83
- int ap;
84
- int domain = 0;
85
- int domain_prot;
86
- hwaddr phys_addr;
87
- uint32_t dacr;
88
- bool ns;
89
-
90
- /* Pagetable walk. */
91
- /* Lookup l1 descriptor. */
92
- if (!get_level1_table_address(env, mmu_idx, &table, address)) {
93
- /* Section translation fault if page walk is disabled by PD0 or PD1 */
94
- fi->type = ARMFault_Translation;
95
- goto do_fault;
96
- }
97
- desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
98
- mmu_idx, fi);
99
- if (fi->type != ARMFault_None) {
100
- goto do_fault;
101
- }
102
- type = (desc & 3);
103
- if (type == 0 || (type == 3 && !cpu_isar_feature(aa32_pxn, cpu))) {
104
- /* Section translation fault, or attempt to use the encoding
105
- * which is Reserved on implementations without PXN.
106
- */
107
- fi->type = ARMFault_Translation;
108
- goto do_fault;
109
- }
110
- if ((type == 1) || !(desc & (1 << 18))) {
111
- /* Page or Section. */
112
- domain = (desc >> 5) & 0x0f;
113
- }
114
- if (regime_el(env, mmu_idx) == 1) {
115
- dacr = env->cp15.dacr_ns;
116
- } else {
117
- dacr = env->cp15.dacr_s;
118
- }
119
- if (type == 1) {
120
- level = 2;
121
- }
122
- domain_prot = (dacr >> (domain * 2)) & 3;
123
- if (domain_prot == 0 || domain_prot == 2) {
124
- /* Section or Page domain fault */
125
- fi->type = ARMFault_Domain;
126
- goto do_fault;
127
- }
128
- if (type != 1) {
129
- if (desc & (1 << 18)) {
130
- /* Supersection. */
131
- phys_addr = (desc & 0xff000000) | (address & 0x00ffffff);
132
- phys_addr |= (uint64_t)extract32(desc, 20, 4) << 32;
133
- phys_addr |= (uint64_t)extract32(desc, 5, 4) << 36;
134
- *page_size = 0x1000000;
135
- } else {
136
- /* Section. */
137
- phys_addr = (desc & 0xfff00000) | (address & 0x000fffff);
138
- *page_size = 0x100000;
139
- }
140
- ap = ((desc >> 10) & 3) | ((desc >> 13) & 4);
141
- xn = desc & (1 << 4);
142
- pxn = desc & 1;
143
- ns = extract32(desc, 19, 1);
144
- } else {
145
- if (cpu_isar_feature(aa32_pxn, cpu)) {
146
- pxn = (desc >> 2) & 1;
147
- }
148
- ns = extract32(desc, 3, 1);
149
- /* Lookup l2 entry. */
150
- table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc);
151
- desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
152
- mmu_idx, fi);
153
- if (fi->type != ARMFault_None) {
154
- goto do_fault;
155
- }
156
- ap = ((desc >> 4) & 3) | ((desc >> 7) & 4);
157
- switch (desc & 3) {
158
- case 0: /* Page translation fault. */
159
- fi->type = ARMFault_Translation;
160
- goto do_fault;
161
- case 1: /* 64k page. */
162
- phys_addr = (desc & 0xffff0000) | (address & 0xffff);
163
- xn = desc & (1 << 15);
164
- *page_size = 0x10000;
165
- break;
166
- case 2: case 3: /* 4k page. */
167
- phys_addr = (desc & 0xfffff000) | (address & 0xfff);
168
- xn = desc & 1;
169
- *page_size = 0x1000;
170
- break;
171
- default:
172
- /* Never happens, but compiler isn't smart enough to tell. */
173
- g_assert_not_reached();
174
- }
175
- }
176
- if (domain_prot == 3) {
177
- *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
178
- } else {
179
- if (pxn && !regime_is_user(env, mmu_idx)) {
180
- xn = 1;
181
- }
182
- if (xn && access_type == MMU_INST_FETCH) {
183
- fi->type = ARMFault_Permission;
184
- goto do_fault;
185
- }
186
-
187
- if (arm_feature(env, ARM_FEATURE_V6K) &&
188
- (regime_sctlr(env, mmu_idx) & SCTLR_AFE)) {
189
- /* The simplified model uses AP[0] as an access control bit. */
190
- if ((ap & 1) == 0) {
191
- /* Access flag fault. */
192
- fi->type = ARMFault_AccessFlag;
193
- goto do_fault;
194
- }
195
- *prot = simple_ap_to_rw_prot(env, mmu_idx, ap >> 1);
196
- } else {
197
- *prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot);
198
- }
199
- if (*prot && !xn) {
200
- *prot |= PAGE_EXEC;
201
- }
202
- if (!(*prot & (1 << access_type))) {
203
- /* Access permission fault. */
204
- fi->type = ARMFault_Permission;
205
- goto do_fault;
206
- }
207
- }
208
- if (ns) {
209
- /* The NS bit will (as required by the architecture) have no effect if
210
- * the CPU doesn't support TZ or this is a non-secure translation
211
- * regime, because the attribute will already be non-secure.
212
- */
213
- attrs->secure = false;
214
- }
215
- *phys_ptr = phys_addr;
216
- return false;
217
-do_fault:
218
- fi->domain = domain;
219
- fi->level = level;
220
- return true;
221
-}
222
-
223
/*
224
* check_s2_mmu_setup
225
* @cpu: ARMCPU
226
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
227
index XXXXXXX..XXXXXXX 100644
228
--- a/target/arm/ptw.c
229
+++ b/target/arm/ptw.c
230
@@ -XXX,XX +XXX,XX @@ do_fault:
231
return true;
28
return true;
232
}
29
}
233
30
234
+static bool get_phys_addr_v6(CPUARMState *env, uint32_t address,
31
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
235
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
32
index XXXXXXX..XXXXXXX 100644
236
+ hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
33
--- a/target/arm/tcg/vec_helper.c
237
+ target_ulong *page_size, ARMMMUFaultInfo *fi)
34
+++ b/target/arm/tcg/vec_helper.c
238
+{
35
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlah)(void *vd, void *vn, void *vm, void *va,
239
+ CPUState *cs = env_cpu(env);
36
uintptr_t opr_sz = simd_oprsz(desc);
240
+ ARMCPU *cpu = env_archcpu(env);
37
float16 *d = vd, *n = vn, *m = vm, *a = va;
241
+ int level = 1;
38
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
242
+ uint32_t table;
39
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
243
+ uint32_t desc;
40
- uint32_t neg_real = flip ^ neg_imag;
244
+ uint32_t xn;
41
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
245
+ uint32_t pxn = 0;
42
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
246
+ int type;
43
+ uint32_t negf_real = flip ^ negf_imag;
247
+ int ap;
44
+ float16 negx_imag, negx_real;
248
+ int domain = 0;
45
uintptr_t i;
249
+ int domain_prot;
46
250
+ hwaddr phys_addr;
47
- /* Shift boolean to the sign bit so we can xor to negate. */
251
+ uint32_t dacr;
48
- neg_real <<= 15;
252
+ bool ns;
49
- neg_imag <<= 15;
253
+
50
+ /* With AH=0, use negx; with AH=1 use negf. */
254
+ /* Pagetable walk. */
51
+ negx_real = (negf_real & ~fpcr_ah) << 15;
255
+ /* Lookup l1 descriptor. */
52
+ negx_imag = (negf_imag & ~fpcr_ah) << 15;
256
+ if (!get_level1_table_address(env, mmu_idx, &table, address)) {
53
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
257
+ /* Section translation fault if page walk is disabled by PD0 or PD1 */
54
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
258
+ fi->type = ARMFault_Translation;
55
259
+ goto do_fault;
56
for (i = 0; i < opr_sz / 2; i += 2) {
260
+ }
57
float16 e2 = n[H2(i + flip)];
261
+ desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
58
- float16 e1 = m[H2(i + flip)] ^ neg_real;
262
+ mmu_idx, fi);
59
+ float16 e1 = m[H2(i + flip)] ^ negx_real;
263
+ if (fi->type != ARMFault_None) {
60
float16 e4 = e2;
264
+ goto do_fault;
61
- float16 e3 = m[H2(i + 1 - flip)] ^ neg_imag;
265
+ }
62
+ float16 e3 = m[H2(i + 1 - flip)] ^ negx_imag;
266
+ type = (desc & 3);
63
267
+ if (type == 0 || (type == 3 && !cpu_isar_feature(aa32_pxn, cpu))) {
64
- d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], 0, fpst);
268
+ /* Section translation fault, or attempt to use the encoding
65
- d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], 0, fpst);
269
+ * which is Reserved on implementations without PXN.
66
+ d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], negf_real, fpst);
270
+ */
67
+ d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], negf_imag, fpst);
271
+ fi->type = ARMFault_Translation;
68
}
272
+ goto do_fault;
69
clear_tail(d, opr_sz, simd_maxsz(desc));
273
+ }
70
}
274
+ if ((type == 1) || !(desc & (1 << 18))) {
71
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlas)(void *vd, void *vn, void *vm, void *va,
275
+ /* Page or Section. */
72
uintptr_t opr_sz = simd_oprsz(desc);
276
+ domain = (desc >> 5) & 0x0f;
73
float32 *d = vd, *n = vn, *m = vm, *a = va;
277
+ }
74
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
278
+ if (regime_el(env, mmu_idx) == 1) {
75
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
279
+ dacr = env->cp15.dacr_ns;
76
- uint32_t neg_real = flip ^ neg_imag;
280
+ } else {
77
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
281
+ dacr = env->cp15.dacr_s;
78
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
282
+ }
79
+ uint32_t negf_real = flip ^ negf_imag;
283
+ if (type == 1) {
80
+ float32 negx_imag, negx_real;
284
+ level = 2;
81
uintptr_t i;
285
+ }
82
286
+ domain_prot = (dacr >> (domain * 2)) & 3;
83
- /* Shift boolean to the sign bit so we can xor to negate. */
287
+ if (domain_prot == 0 || domain_prot == 2) {
84
- neg_real <<= 31;
288
+ /* Section or Page domain fault */
85
- neg_imag <<= 31;
289
+ fi->type = ARMFault_Domain;
86
+ /* With AH=0, use negx; with AH=1 use negf. */
290
+ goto do_fault;
87
+ negx_real = (negf_real & ~fpcr_ah) << 31;
291
+ }
88
+ negx_imag = (negf_imag & ~fpcr_ah) << 31;
292
+ if (type != 1) {
89
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
293
+ if (desc & (1 << 18)) {
90
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
294
+ /* Supersection. */
91
295
+ phys_addr = (desc & 0xff000000) | (address & 0x00ffffff);
92
for (i = 0; i < opr_sz / 4; i += 2) {
296
+ phys_addr |= (uint64_t)extract32(desc, 20, 4) << 32;
93
float32 e2 = n[H4(i + flip)];
297
+ phys_addr |= (uint64_t)extract32(desc, 5, 4) << 36;
94
- float32 e1 = m[H4(i + flip)] ^ neg_real;
298
+ *page_size = 0x1000000;
95
+ float32 e1 = m[H4(i + flip)] ^ negx_real;
299
+ } else {
96
float32 e4 = e2;
300
+ /* Section. */
97
- float32 e3 = m[H4(i + 1 - flip)] ^ neg_imag;
301
+ phys_addr = (desc & 0xfff00000) | (address & 0x000fffff);
98
+ float32 e3 = m[H4(i + 1 - flip)] ^ negx_imag;
302
+ *page_size = 0x100000;
99
303
+ }
100
- d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], 0, fpst);
304
+ ap = ((desc >> 10) & 3) | ((desc >> 13) & 4);
101
- d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], 0, fpst);
305
+ xn = desc & (1 << 4);
102
+ d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], negf_real, fpst);
306
+ pxn = desc & 1;
103
+ d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], negf_imag, fpst);
307
+ ns = extract32(desc, 19, 1);
104
}
308
+ } else {
105
clear_tail(d, opr_sz, simd_maxsz(desc));
309
+ if (cpu_isar_feature(aa32_pxn, cpu)) {
106
}
310
+ pxn = (desc >> 2) & 1;
107
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm, void *va,
311
+ }
108
uintptr_t opr_sz = simd_oprsz(desc);
312
+ ns = extract32(desc, 3, 1);
109
float64 *d = vd, *n = vn, *m = vm, *a = va;
313
+ /* Lookup l2 entry. */
110
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
314
+ table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc);
111
- uint64_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
315
+ desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
112
- uint64_t neg_real = flip ^ neg_imag;
316
+ mmu_idx, fi);
113
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
317
+ if (fi->type != ARMFault_None) {
114
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
318
+ goto do_fault;
115
+ uint32_t negf_real = flip ^ negf_imag;
319
+ }
116
+ float64 negx_real, negx_imag;
320
+ ap = ((desc >> 4) & 3) | ((desc >> 7) & 4);
117
uintptr_t i;
321
+ switch (desc & 3) {
118
322
+ case 0: /* Page translation fault. */
119
- /* Shift boolean to the sign bit so we can xor to negate. */
323
+ fi->type = ARMFault_Translation;
120
- neg_real <<= 63;
324
+ goto do_fault;
121
- neg_imag <<= 63;
325
+ case 1: /* 64k page. */
122
+ /* With AH=0, use negx; with AH=1 use negf. */
326
+ phys_addr = (desc & 0xffff0000) | (address & 0xffff);
123
+ negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63;
327
+ xn = desc & (1 << 15);
124
+ negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63;
328
+ *page_size = 0x10000;
125
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
329
+ break;
126
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
330
+ case 2: case 3: /* 4k page. */
127
331
+ phys_addr = (desc & 0xfffff000) | (address & 0xfff);
128
for (i = 0; i < opr_sz / 8; i += 2) {
332
+ xn = desc & 1;
129
float64 e2 = n[i + flip];
333
+ *page_size = 0x1000;
130
- float64 e1 = m[i + flip] ^ neg_real;
334
+ break;
131
+ float64 e1 = m[i + flip] ^ negx_real;
335
+ default:
132
float64 e4 = e2;
336
+ /* Never happens, but compiler isn't smart enough to tell. */
133
- float64 e3 = m[i + 1 - flip] ^ neg_imag;
337
+ g_assert_not_reached();
134
+ float64 e3 = m[i + 1 - flip] ^ negx_imag;
338
+ }
135
339
+ }
136
- d[i] = float64_muladd(e2, e1, a[i], 0, fpst);
340
+ if (domain_prot == 3) {
137
- d[i + 1] = float64_muladd(e4, e3, a[i + 1], 0, fpst);
341
+ *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
138
+ d[i] = float64_muladd(e2, e1, a[i], negf_real, fpst);
342
+ } else {
139
+ d[i + 1] = float64_muladd(e4, e3, a[i + 1], negf_imag, fpst);
343
+ if (pxn && !regime_is_user(env, mmu_idx)) {
140
}
344
+ xn = 1;
141
clear_tail(d, opr_sz, simd_maxsz(desc));
345
+ }
142
}
346
+ if (xn && access_type == MMU_INST_FETCH) {
347
+ fi->type = ARMFault_Permission;
348
+ goto do_fault;
349
+ }
350
+
351
+ if (arm_feature(env, ARM_FEATURE_V6K) &&
352
+ (regime_sctlr(env, mmu_idx) & SCTLR_AFE)) {
353
+ /* The simplified model uses AP[0] as an access control bit. */
354
+ if ((ap & 1) == 0) {
355
+ /* Access flag fault. */
356
+ fi->type = ARMFault_AccessFlag;
357
+ goto do_fault;
358
+ }
359
+ *prot = simple_ap_to_rw_prot(env, mmu_idx, ap >> 1);
360
+ } else {
361
+ *prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot);
362
+ }
363
+ if (*prot && !xn) {
364
+ *prot |= PAGE_EXEC;
365
+ }
366
+ if (!(*prot & (1 << access_type))) {
367
+ /* Access permission fault. */
368
+ fi->type = ARMFault_Permission;
369
+ goto do_fault;
370
+ }
371
+ }
372
+ if (ns) {
373
+ /* The NS bit will (as required by the architecture) have no effect if
374
+ * the CPU doesn't support TZ or this is a non-secure translation
375
+ * regime, because the attribute will already be non-secure.
376
+ */
377
+ attrs->secure = false;
378
+ }
379
+ *phys_ptr = phys_addr;
380
+ return false;
381
+do_fault:
382
+ fi->domain = domain;
383
+ fi->level = level;
384
+ return true;
385
+}
386
+
387
/**
388
* get_phys_addr - get the physical address for this virtual address
389
*
390
--
143
--
391
2.25.1
144
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The negation step in FCMLA by index mustn't negate a NaN when
4
FPCR.AH is set. Use the same approach as vector FCMLA of
5
passing in FPCR.AH and using it to select whether to negate
6
by XOR or by the muladd negate_product flag.
7
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220604040607.269301-19-richard.henderson@linaro.org
9
Message-id: 20250129013857.135256-27-richard.henderson@linaro.org
10
[PMM: Expanded commit message]
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
13
---
8
target/arm/ptw.h | 2 --
14
target/arm/tcg/translate-a64.c | 2 +-
9
target/arm/helper.c | 25 -------------------------
15
target/arm/tcg/vec_helper.c | 44 ++++++++++++++++++++--------------
10
target/arm/ptw.c | 25 +++++++++++++++++++++++++
16
2 files changed, 27 insertions(+), 19 deletions(-)
11
3 files changed, 25 insertions(+), 27 deletions(-)
12
17
13
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
18
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
14
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/ptw.h
20
--- a/target/arm/tcg/translate-a64.c
16
+++ b/target/arm/ptw.h
21
+++ b/target/arm/tcg/translate-a64.c
17
@@ -XXX,XX +XXX,XX @@
22
@@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
18
23
if (fp_access_check(s)) {
19
#ifndef CONFIG_USER_ONLY
24
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
20
25
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
21
-extern const uint8_t pamax_map[7];
26
- (a->idx << 2) | a->rot, fn);
22
-
27
+ (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn);
23
bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx);
28
}
24
bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx);
29
return true;
25
uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn);
30
}
26
diff --git a/target/arm/helper.c b/target/arm/helper.c
31
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
27
index XXXXXXX..XXXXXXX 100644
32
index XXXXXXX..XXXXXXX 100644
28
--- a/target/arm/helper.c
33
--- a/target/arm/tcg/vec_helper.c
29
+++ b/target/arm/helper.c
34
+++ b/target/arm/tcg/vec_helper.c
30
@@ -XXX,XX +XXX,XX @@ bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level,
35
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, void *va,
31
}
36
uintptr_t opr_sz = simd_oprsz(desc);
32
#endif /* !CONFIG_USER_ONLY */
37
float16 *d = vd, *n = vn, *m = vm, *a = va;
33
38
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
34
-/* This mapping is common between ID_AA64MMFR0.PARANGE and TCR_ELx.{I}PS. */
39
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
35
-const uint8_t pamax_map[] = {
40
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
36
- [0] = 32,
41
intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
37
- [1] = 36,
42
- uint32_t neg_real = flip ^ neg_imag;
38
- [2] = 40,
43
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1);
39
- [3] = 42,
44
+ uint32_t negf_real = flip ^ negf_imag;
40
- [4] = 44,
45
intptr_t elements = opr_sz / sizeof(float16);
41
- [5] = 48,
46
intptr_t eltspersegment = MIN(16 / sizeof(float16), elements);
42
- [6] = 52,
47
+ float16 negx_imag, negx_real;
43
-};
48
intptr_t i, j;
44
-
49
45
-/* The cpu-specific constant value of PAMax; also used by hw/arm/virt. */
50
- /* Shift boolean to the sign bit so we can xor to negate. */
46
-unsigned int arm_pamax(ARMCPU *cpu)
51
- neg_real <<= 15;
47
-{
52
- neg_imag <<= 15;
48
- unsigned int parange =
53
+ /* With AH=0, use negx; with AH=1 use negf. */
49
- FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE);
54
+ negx_real = (negf_real & ~fpcr_ah) << 15;
50
-
55
+ negx_imag = (negf_imag & ~fpcr_ah) << 15;
51
- /*
56
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
52
- * id_aa64mmfr0 is a read-only register so values outside of the
57
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
53
- * supported mappings can be considered an implementation error.
58
54
- */
59
for (i = 0; i < elements; i += eltspersegment) {
55
- assert(parange < ARRAY_SIZE(pamax_map));
60
float16 mr = m[H2(i + 2 * index + 0)];
56
- return pamax_map[parange];
61
float16 mi = m[H2(i + 2 * index + 1)];
57
-}
62
- float16 e1 = neg_real ^ (flip ? mi : mr);
58
-
63
- float16 e3 = neg_imag ^ (flip ? mr : mi);
59
int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx)
64
+ float16 e1 = negx_real ^ (flip ? mi : mr);
60
{
65
+ float16 e3 = negx_imag ^ (flip ? mr : mi);
61
if (regime_has_2_ranges(mmu_idx)) {
66
62
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
67
for (j = i; j < i + eltspersegment; j += 2) {
63
index XXXXXXX..XXXXXXX 100644
68
float16 e2 = n[H2(j + flip)];
64
--- a/target/arm/ptw.c
69
float16 e4 = e2;
65
+++ b/target/arm/ptw.c
70
66
@@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
71
- d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], 0, fpst);
67
ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
72
- d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], 0, fpst);
68
__attribute__((nonnull));
73
+ d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], negf_real, fpst);
69
74
+ d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], negf_imag, fpst);
70
+/* This mapping is common between ID_AA64MMFR0.PARANGE and TCR_ELx.{I}PS. */
75
}
71
+static const uint8_t pamax_map[] = {
76
}
72
+ [0] = 32,
77
clear_tail(d, opr_sz, simd_maxsz(desc));
73
+ [1] = 36,
78
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, void *va,
74
+ [2] = 40,
79
uintptr_t opr_sz = simd_oprsz(desc);
75
+ [3] = 42,
80
float32 *d = vd, *n = vn, *m = vm, *a = va;
76
+ [4] = 44,
81
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
77
+ [5] = 48,
82
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
78
+ [6] = 52,
83
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
79
+};
84
intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
80
+
85
- uint32_t neg_real = flip ^ neg_imag;
81
+/* The cpu-specific constant value of PAMax; also used by hw/arm/virt. */
86
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1);
82
+unsigned int arm_pamax(ARMCPU *cpu)
87
+ uint32_t negf_real = flip ^ negf_imag;
83
+{
88
intptr_t elements = opr_sz / sizeof(float32);
84
+ unsigned int parange =
89
intptr_t eltspersegment = MIN(16 / sizeof(float32), elements);
85
+ FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE);
90
+ float32 negx_imag, negx_real;
86
+
91
intptr_t i, j;
87
+ /*
92
88
+ * id_aa64mmfr0 is a read-only register so values outside of the
93
- /* Shift boolean to the sign bit so we can xor to negate. */
89
+ * supported mappings can be considered an implementation error.
94
- neg_real <<= 31;
90
+ */
95
- neg_imag <<= 31;
91
+ assert(parange < ARRAY_SIZE(pamax_map));
96
+ /* With AH=0, use negx; with AH=1 use negf. */
92
+ return pamax_map[parange];
97
+ negx_real = (negf_real & ~fpcr_ah) << 31;
93
+}
98
+ negx_imag = (negf_imag & ~fpcr_ah) << 31;
94
+
99
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
95
static bool regime_translation_big_endian(CPUARMState *env, ARMMMUIdx mmu_idx)
100
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
96
{
101
97
return (regime_sctlr(env, mmu_idx) & SCTLR_EE) != 0;
102
for (i = 0; i < elements; i += eltspersegment) {
103
float32 mr = m[H4(i + 2 * index + 0)];
104
float32 mi = m[H4(i + 2 * index + 1)];
105
- float32 e1 = neg_real ^ (flip ? mi : mr);
106
- float32 e3 = neg_imag ^ (flip ? mr : mi);
107
+ float32 e1 = negx_real ^ (flip ? mi : mr);
108
+ float32 e3 = negx_imag ^ (flip ? mr : mi);
109
110
for (j = i; j < i + eltspersegment; j += 2) {
111
float32 e2 = n[H4(j + flip)];
112
float32 e4 = e2;
113
114
- d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], 0, fpst);
115
- d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], 0, fpst);
116
+ d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], negf_real, fpst);
117
+ d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], negf_imag, fpst);
118
}
119
}
120
clear_tail(d, opr_sz, simd_maxsz(desc));
98
--
121
--
99
2.25.1
122
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
These functions are used for both page table walking and for
3
The negation step in SVE FCMLA mustn't negate a NaN when FPCR.AH is
4
deciding what format in which to deliver exception results.
4
set. Use the same approach as we did for A64 FCMLA of passing in
5
Since ptw.c is only present for system mode, put the functions
5
FPCR.AH and using it to select whether to negate by XOR or by the
6
into tlb_helper.c.
6
muladd negate_product flag.
7
7
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220604040607.269301-18-richard.henderson@linaro.org
9
Message-id: 20250129013857.135256-28-richard.henderson@linaro.org
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
12
---
13
target/arm/helper.c | 24 ------------------------
13
target/arm/tcg/sve_helper.c | 69 +++++++++++++++++++++-------------
14
target/arm/tlb_helper.c | 26 ++++++++++++++++++++++++++
14
target/arm/tcg/translate-sve.c | 2 +-
15
2 files changed, 26 insertions(+), 24 deletions(-)
15
2 files changed, 43 insertions(+), 28 deletions(-)
16
16
17
diff --git a/target/arm/helper.c b/target/arm/helper.c
17
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
18
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper.c
19
--- a/target/arm/tcg/sve_helper.c
20
+++ b/target/arm/helper.c
20
+++ b/target/arm/tcg/sve_helper.c
21
@@ -XXX,XX +XXX,XX @@ ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx)
21
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
22
}
22
void *vg, float_status *status, uint32_t desc)
23
#endif /* !CONFIG_USER_ONLY */
24
25
-/* Return true if the translation regime is using LPAE format page tables */
26
-bool regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx)
27
-{
28
- int el = regime_el(env, mmu_idx);
29
- if (el == 2 || arm_el_is_aa64(env, el)) {
30
- return true;
31
- }
32
- if (arm_feature(env, ARM_FEATURE_LPAE)
33
- && (regime_tcr(env, mmu_idx)->raw_tcr & TTBCR_EAE)) {
34
- return true;
35
- }
36
- return false;
37
-}
38
-
39
-/* Returns true if the stage 1 translation regime is using LPAE format page
40
- * tables. Used when raising alignment exceptions, whose FSR changes depending
41
- * on whether the long or short descriptor format is in use. */
42
-bool arm_s1_regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx)
43
-{
44
- mmu_idx = stage_1_mmu_idx(mmu_idx);
45
-
46
- return regime_using_lpae_format(env, mmu_idx);
47
-}
48
-
49
#ifndef CONFIG_USER_ONLY
50
bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx)
51
{
23
{
52
diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c
24
intptr_t j, i = simd_oprsz(desc);
25
- unsigned rot = simd_data(desc);
26
- bool flip = rot & 1;
27
- float16 neg_imag, neg_real;
28
+ bool flip = extract32(desc, SIMD_DATA_SHIFT, 1);
29
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
30
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
31
+ uint32_t negf_real = flip ^ negf_imag;
32
+ float16 negx_imag, negx_real;
33
uint64_t *g = vg;
34
35
- neg_imag = float16_set_sign(0, (rot & 2) != 0);
36
- neg_real = float16_set_sign(0, rot == 1 || rot == 2);
37
+ /* With AH=0, use negx; with AH=1 use negf. */
38
+ negx_real = (negf_real & ~fpcr_ah) << 15;
39
+ negx_imag = (negf_imag & ~fpcr_ah) << 15;
40
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
41
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
42
43
do {
44
uint64_t pg = g[(i - 1) >> 6];
45
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
46
mi = *(float16 *)(vm + H1_2(j));
47
48
e2 = (flip ? ni : nr);
49
- e1 = (flip ? mi : mr) ^ neg_real;
50
+ e1 = (flip ? mi : mr) ^ negx_real;
51
e4 = e2;
52
- e3 = (flip ? mr : mi) ^ neg_imag;
53
+ e3 = (flip ? mr : mi) ^ negx_imag;
54
55
if (likely((pg >> (i & 63)) & 1)) {
56
d = *(float16 *)(va + H1_2(i));
57
- d = float16_muladd(e2, e1, d, 0, status);
58
+ d = float16_muladd(e2, e1, d, negf_real, status);
59
*(float16 *)(vd + H1_2(i)) = d;
60
}
61
if (likely((pg >> (j & 63)) & 1)) {
62
d = *(float16 *)(va + H1_2(j));
63
- d = float16_muladd(e4, e3, d, 0, status);
64
+ d = float16_muladd(e4, e3, d, negf_imag, status);
65
*(float16 *)(vd + H1_2(j)) = d;
66
}
67
} while (i & 63);
68
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
69
void *vg, float_status *status, uint32_t desc)
70
{
71
intptr_t j, i = simd_oprsz(desc);
72
- unsigned rot = simd_data(desc);
73
- bool flip = rot & 1;
74
- float32 neg_imag, neg_real;
75
+ bool flip = extract32(desc, SIMD_DATA_SHIFT, 1);
76
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
77
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
78
+ uint32_t negf_real = flip ^ negf_imag;
79
+ float32 negx_imag, negx_real;
80
uint64_t *g = vg;
81
82
- neg_imag = float32_set_sign(0, (rot & 2) != 0);
83
- neg_real = float32_set_sign(0, rot == 1 || rot == 2);
84
+ /* With AH=0, use negx; with AH=1 use negf. */
85
+ negx_real = (negf_real & ~fpcr_ah) << 31;
86
+ negx_imag = (negf_imag & ~fpcr_ah) << 31;
87
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
88
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
89
90
do {
91
uint64_t pg = g[(i - 1) >> 6];
92
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
93
mi = *(float32 *)(vm + H1_2(j));
94
95
e2 = (flip ? ni : nr);
96
- e1 = (flip ? mi : mr) ^ neg_real;
97
+ e1 = (flip ? mi : mr) ^ negx_real;
98
e4 = e2;
99
- e3 = (flip ? mr : mi) ^ neg_imag;
100
+ e3 = (flip ? mr : mi) ^ negx_imag;
101
102
if (likely((pg >> (i & 63)) & 1)) {
103
d = *(float32 *)(va + H1_2(i));
104
- d = float32_muladd(e2, e1, d, 0, status);
105
+ d = float32_muladd(e2, e1, d, negf_real, status);
106
*(float32 *)(vd + H1_2(i)) = d;
107
}
108
if (likely((pg >> (j & 63)) & 1)) {
109
d = *(float32 *)(va + H1_2(j));
110
- d = float32_muladd(e4, e3, d, 0, status);
111
+ d = float32_muladd(e4, e3, d, negf_imag, status);
112
*(float32 *)(vd + H1_2(j)) = d;
113
}
114
} while (i & 63);
115
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
116
void *vg, float_status *status, uint32_t desc)
117
{
118
intptr_t j, i = simd_oprsz(desc);
119
- unsigned rot = simd_data(desc);
120
- bool flip = rot & 1;
121
- float64 neg_imag, neg_real;
122
+ bool flip = extract32(desc, SIMD_DATA_SHIFT, 1);
123
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
124
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
125
+ uint32_t negf_real = flip ^ negf_imag;
126
+ float64 negx_imag, negx_real;
127
uint64_t *g = vg;
128
129
- neg_imag = float64_set_sign(0, (rot & 2) != 0);
130
- neg_real = float64_set_sign(0, rot == 1 || rot == 2);
131
+ /* With AH=0, use negx; with AH=1 use negf. */
132
+ negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63;
133
+ negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63;
134
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
135
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
136
137
do {
138
uint64_t pg = g[(i - 1) >> 6];
139
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
140
mi = *(float64 *)(vm + H1_2(j));
141
142
e2 = (flip ? ni : nr);
143
- e1 = (flip ? mi : mr) ^ neg_real;
144
+ e1 = (flip ? mi : mr) ^ negx_real;
145
e4 = e2;
146
- e3 = (flip ? mr : mi) ^ neg_imag;
147
+ e3 = (flip ? mr : mi) ^ negx_imag;
148
149
if (likely((pg >> (i & 63)) & 1)) {
150
d = *(float64 *)(va + H1_2(i));
151
- d = float64_muladd(e2, e1, d, 0, status);
152
+ d = float64_muladd(e2, e1, d, negf_real, status);
153
*(float64 *)(vd + H1_2(i)) = d;
154
}
155
if (likely((pg >> (j & 63)) & 1)) {
156
d = *(float64 *)(va + H1_2(j));
157
- d = float64_muladd(e4, e3, d, 0, status);
158
+ d = float64_muladd(e4, e3, d, negf_imag, status);
159
*(float64 *)(vd + H1_2(j)) = d;
160
}
161
} while (i & 63);
162
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
53
index XXXXXXX..XXXXXXX 100644
163
index XXXXXXX..XXXXXXX 100644
54
--- a/target/arm/tlb_helper.c
164
--- a/target/arm/tcg/translate-sve.c
55
+++ b/target/arm/tlb_helper.c
165
+++ b/target/arm/tcg/translate-sve.c
56
@@ -XXX,XX +XXX,XX @@
166
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = {
57
#include "exec/exec-all.h"
167
gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d,
58
#include "exec/helper-proto.h"
168
};
59
169
TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz],
60
+
170
- a->rd, a->rn, a->rm, a->ra, a->pg, a->rot,
61
+/* Return true if the translation regime is using LPAE format page tables */
171
+ a->rd, a->rn, a->rm, a->ra, a->pg, a->rot | (s->fpcr_ah << 2),
62
+bool regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx)
172
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
63
+{
173
64
+ int el = regime_el(env, mmu_idx);
174
static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = {
65
+ if (el == 2 || arm_el_is_aa64(env, el)) {
66
+ return true;
67
+ }
68
+ if (arm_feature(env, ARM_FEATURE_LPAE)
69
+ && (regime_tcr(env, mmu_idx)->raw_tcr & TTBCR_EAE)) {
70
+ return true;
71
+ }
72
+ return false;
73
+}
74
+
75
+/*
76
+ * Returns true if the stage 1 translation regime is using LPAE format page
77
+ * tables. Used when raising alignment exceptions, whose FSR changes depending
78
+ * on whether the long or short descriptor format is in use.
79
+ */
80
+bool arm_s1_regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx)
81
+{
82
+ mmu_idx = stage_1_mmu_idx(mmu_idx);
83
+ return regime_using_lpae_format(env, mmu_idx);
84
+}
85
+
86
static inline uint32_t merge_syn_data_abort(uint32_t template_syn,
87
unsigned int target_el,
88
bool same_el, bool ea,
89
--
175
--
90
2.25.1
176
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This is the final user of get_phys_addr_pmsav7_default
3
Handle FPCR.AH's requirement to not negate the sign of a NaN
4
within helper.c, so make it static within ptw.c.
4
in FMLSL by element and vector, using the usual trick of
5
negating by XOR when AH=0 and by muladd flags when AH=1.
6
7
Since we have the CPUARMState* in the helper anyway, we can
8
look directly at env->vfp.fpcr and don't need toa pass in the
9
FPCR.AH value via the SIMD data word.
5
10
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220604040607.269301-10-richard.henderson@linaro.org
12
Message-id: 20250129013857.135256-31-richard.henderson@linaro.org
13
[PMM: commit message tweaked]
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
14
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
16
---
11
target/arm/ptw.h | 3 -
17
target/arm/tcg/vec_helper.c | 71 ++++++++++++++++++++++++-------------
12
target/arm/helper.c | 136 -----------------------------------------
18
1 file changed, 46 insertions(+), 25 deletions(-)
13
target/arm/ptw.c | 146 +++++++++++++++++++++++++++++++++++++++++++-
14
3 files changed, 143 insertions(+), 142 deletions(-)
15
19
16
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
20
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
17
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/ptw.h
22
--- a/target/arm/tcg/vec_helper.c
19
+++ b/target/arm/ptw.h
23
+++ b/target/arm/tcg/vec_helper.c
20
@@ -XXX,XX +XXX,XX @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap)
24
@@ -XXX,XX +XXX,XX @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2)
21
bool m_is_ppb_region(CPUARMState *env, uint32_t address);
25
*/
22
bool m_is_system_region(CPUARMState *env, uint32_t address);
26
23
27
static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst,
24
-void get_phys_addr_pmsav7_default(CPUARMState *env,
28
- uint32_t desc, bool fz16)
25
- ARMMMUIdx mmu_idx,
29
+ uint64_t negx, int negf, uint32_t desc, bool fz16)
26
- int32_t address, int *prot);
30
{
27
bool pmsav7_use_background_region(ARMCPU *cpu, ARMMMUIdx mmu_idx, bool is_user);
31
intptr_t i, oprsz = simd_oprsz(desc);
28
32
- int is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
29
bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
33
int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
30
diff --git a/target/arm/helper.c b/target/arm/helper.c
34
int is_q = oprsz == 16;
31
index XXXXXXX..XXXXXXX 100644
35
uint64_t n_4, m_4;
32
--- a/target/arm/helper.c
36
33
+++ b/target/arm/helper.c
37
- /* Pre-load all of the f16 data, avoiding overlap issues. */
34
@@ -XXX,XX +XXX,XX @@ void v8m_security_lookup(CPUARMState *env, uint32_t address,
38
- n_4 = load4_f16(vn, is_q, is_2);
35
}
39
+ /*
36
}
40
+ * Pre-load all of the f16 data, avoiding overlap issues.
37
41
+ * Negate all inputs for AH=0 FMLSL at once.
38
-bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address,
42
+ */
39
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
43
+ n_4 = load4_f16(vn, is_q, is_2) ^ negx;
40
- hwaddr *phys_ptr, MemTxAttrs *txattrs,
44
m_4 = load4_f16(vm, is_q, is_2);
41
- int *prot, bool *is_subpage,
45
42
- ARMMMUFaultInfo *fi, uint32_t *mregion)
46
- /* Negate all inputs for FMLSL at once. */
43
-{
47
- if (is_s) {
44
- /* Perform a PMSAv8 MPU lookup (without also doing the SAU check
48
- n_4 ^= 0x8000800080008000ull;
45
- * that a full phys-to-virt translation does).
46
- * mregion is (if not NULL) set to the region number which matched,
47
- * or -1 if no region number is returned (MPU off, address did not
48
- * hit a region, address hit in multiple regions).
49
- * We set is_subpage to true if the region hit doesn't cover the
50
- * entire TARGET_PAGE the address is within.
51
- */
52
- ARMCPU *cpu = env_archcpu(env);
53
- bool is_user = regime_is_user(env, mmu_idx);
54
- uint32_t secure = regime_is_secure(env, mmu_idx);
55
- int n;
56
- int matchregion = -1;
57
- bool hit = false;
58
- uint32_t addr_page_base = address & TARGET_PAGE_MASK;
59
- uint32_t addr_page_limit = addr_page_base + (TARGET_PAGE_SIZE - 1);
60
-
61
- *is_subpage = false;
62
- *phys_ptr = address;
63
- *prot = 0;
64
- if (mregion) {
65
- *mregion = -1;
66
- }
49
- }
67
-
50
-
68
- /* Unlike the ARM ARM pseudocode, we don't need to check whether this
51
for (i = 0; i < oprsz / 4; i++) {
69
- * was an exception vector read from the vector table (which is always
52
float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16);
70
- * done using the default system address map), because those accesses
53
float32 m_1 = float16_to_float32_by_bits(m_4 >> (i * 16), fz16);
71
- * are done in arm_v7m_load_vector(), which always does a direct
54
- d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst);
72
- * read using address_space_ldl(), rather than going via this function.
55
+ d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst);
73
- */
56
}
74
- if (regime_translation_disabled(env, mmu_idx)) { /* MPU disabled */
57
clear_tail(d, oprsz, simd_maxsz(desc));
75
- hit = true;
58
}
76
- } else if (m_is_ppb_region(env, address)) {
59
@@ -XXX,XX +XXX,XX @@ static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst,
77
- hit = true;
60
void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
78
- } else {
61
CPUARMState *env, uint32_t desc)
79
- if (pmsav7_use_background_region(cpu, mmu_idx, is_user)) {
62
{
80
- hit = true;
63
- do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, desc,
81
- }
64
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
65
+ uint64_t negx = is_s ? 0x8000800080008000ull : 0;
66
+
67
+ do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc,
68
get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32));
69
}
70
71
void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
72
CPUARMState *env, uint32_t desc)
73
{
74
- do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, desc,
75
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
76
+ uint64_t negx = 0;
77
+ int negf = 0;
78
+
79
+ if (is_s) {
80
+ if (env->vfp.fpcr & FPCR_AH) {
81
+ negf = float_muladd_negate_product;
82
+ } else {
83
+ negx = 0x8000800080008000ull;
84
+ }
85
+ }
86
+ do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc,
87
get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64));
88
}
89
90
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
91
}
92
93
static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst,
94
- uint32_t desc, bool fz16)
95
+ uint64_t negx, int negf, uint32_t desc, bool fz16)
96
{
97
intptr_t i, oprsz = simd_oprsz(desc);
98
- int is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
99
int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
100
int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3);
101
int is_q = oprsz == 16;
102
uint64_t n_4;
103
float32 m_1;
104
105
- /* Pre-load all of the f16 data, avoiding overlap issues. */
106
- n_4 = load4_f16(vn, is_q, is_2);
82
-
107
-
83
- for (n = (int)cpu->pmsav7_dregion - 1; n >= 0; n--) {
108
- /* Negate all inputs for FMLSL at once. */
84
- /* region search */
109
- if (is_s) {
85
- /* Note that the base address is bits [31:5] from the register
110
- n_4 ^= 0x8000800080008000ull;
86
- * with bits [4:0] all zeroes, but the limit address is bits
87
- * [31:5] from the register with bits [4:0] all ones.
88
- */
89
- uint32_t base = env->pmsav8.rbar[secure][n] & ~0x1f;
90
- uint32_t limit = env->pmsav8.rlar[secure][n] | 0x1f;
91
-
92
- if (!(env->pmsav8.rlar[secure][n] & 0x1)) {
93
- /* Region disabled */
94
- continue;
95
- }
96
-
97
- if (address < base || address > limit) {
98
- /*
99
- * Address not in this region. We must check whether the
100
- * region covers addresses in the same page as our address.
101
- * In that case we must not report a size that covers the
102
- * whole page for a subsequent hit against a different MPU
103
- * region or the background region, because it would result in
104
- * incorrect TLB hits for subsequent accesses to addresses that
105
- * are in this MPU region.
106
- */
107
- if (limit >= base &&
108
- ranges_overlap(base, limit - base + 1,
109
- addr_page_base,
110
- TARGET_PAGE_SIZE)) {
111
- *is_subpage = true;
112
- }
113
- continue;
114
- }
115
-
116
- if (base > addr_page_base || limit < addr_page_limit) {
117
- *is_subpage = true;
118
- }
119
-
120
- if (matchregion != -1) {
121
- /* Multiple regions match -- always a failure (unlike
122
- * PMSAv7 where highest-numbered-region wins)
123
- */
124
- fi->type = ARMFault_Permission;
125
- fi->level = 1;
126
- return true;
127
- }
128
-
129
- matchregion = n;
130
- hit = true;
131
- }
132
- }
111
- }
133
-
112
-
134
- if (!hit) {
113
+ /*
135
- /* background fault */
114
+ * Pre-load all of the f16 data, avoiding overlap issues.
136
- fi->type = ARMFault_Background;
115
+ * Negate all inputs for AH=0 FMLSL at once.
137
- return true;
116
+ */
138
- }
117
+ n_4 = load4_f16(vn, is_q, is_2) ^ negx;
139
-
118
m_1 = float16_to_float32_by_bits(((float16 *)vm)[H2(index)], fz16);
140
- if (matchregion == -1) {
119
141
- /* hit using the background region */
120
for (i = 0; i < oprsz / 4; i++) {
142
- get_phys_addr_pmsav7_default(env, mmu_idx, address, prot);
121
float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16);
143
- } else {
122
- d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst);
144
- uint32_t ap = extract32(env->pmsav8.rbar[secure][matchregion], 1, 2);
123
+ d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst);
145
- uint32_t xn = extract32(env->pmsav8.rbar[secure][matchregion], 0, 1);
124
}
146
- bool pxn = false;
125
clear_tail(d, oprsz, simd_maxsz(desc));
147
-
148
- if (arm_feature(env, ARM_FEATURE_V8_1M)) {
149
- pxn = extract32(env->pmsav8.rlar[secure][matchregion], 4, 1);
150
- }
151
-
152
- if (m_is_system_region(env, address)) {
153
- /* System space is always execute never */
154
- xn = 1;
155
- }
156
-
157
- *prot = simple_ap_to_rw_prot(env, mmu_idx, ap);
158
- if (*prot && !xn && !(pxn && !is_user)) {
159
- *prot |= PAGE_EXEC;
160
- }
161
- /* We don't need to look the attribute up in the MAIR0/MAIR1
162
- * registers because that only tells us about cacheability.
163
- */
164
- if (mregion) {
165
- *mregion = matchregion;
166
- }
167
- }
168
-
169
- fi->type = ARMFault_Permission;
170
- fi->level = 1;
171
- return !(*prot & (1 << access_type));
172
-}
173
-
174
/* Combine either inner or outer cacheability attributes for normal
175
* memory, according to table D4-42 and pseudocode procedure
176
* CombineS1S2AttrHints() of ARM DDI 0487B.b (the ARMv8 ARM).
177
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
178
index XXXXXXX..XXXXXXX 100644
179
--- a/target/arm/ptw.c
180
+++ b/target/arm/ptw.c
181
@@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address,
182
return false;
183
}
126
}
184
127
@@ -XXX,XX +XXX,XX @@ static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst,
185
-void get_phys_addr_pmsav7_default(CPUARMState *env,
128
void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
186
- ARMMMUIdx mmu_idx,
129
CPUARMState *env, uint32_t desc)
187
- int32_t address, int *prot)
188
+static void get_phys_addr_pmsav7_default(CPUARMState *env, ARMMMUIdx mmu_idx,
189
+ int32_t address, int *prot)
190
{
130
{
191
if (!arm_feature(env, ARM_FEATURE_M)) {
131
- do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, desc,
192
*prot = PAGE_READ | PAGE_WRITE;
132
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
193
@@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address,
133
+ uint64_t negx = is_s ? 0x8000800080008000ull : 0;
194
return !(*prot & (1 << access_type));
134
+
135
+ do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc,
136
get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32));
195
}
137
}
196
138
197
+bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address,
139
void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
198
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
140
CPUARMState *env, uint32_t desc)
199
+ hwaddr *phys_ptr, MemTxAttrs *txattrs,
141
{
200
+ int *prot, bool *is_subpage,
142
- do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, desc,
201
+ ARMMMUFaultInfo *fi, uint32_t *mregion)
143
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
202
+{
144
+ uint64_t negx = 0;
203
+ /*
145
+ int negf = 0;
204
+ * Perform a PMSAv8 MPU lookup (without also doing the SAU check
205
+ * that a full phys-to-virt translation does).
206
+ * mregion is (if not NULL) set to the region number which matched,
207
+ * or -1 if no region number is returned (MPU off, address did not
208
+ * hit a region, address hit in multiple regions).
209
+ * We set is_subpage to true if the region hit doesn't cover the
210
+ * entire TARGET_PAGE the address is within.
211
+ */
212
+ ARMCPU *cpu = env_archcpu(env);
213
+ bool is_user = regime_is_user(env, mmu_idx);
214
+ uint32_t secure = regime_is_secure(env, mmu_idx);
215
+ int n;
216
+ int matchregion = -1;
217
+ bool hit = false;
218
+ uint32_t addr_page_base = address & TARGET_PAGE_MASK;
219
+ uint32_t addr_page_limit = addr_page_base + (TARGET_PAGE_SIZE - 1);
220
+
146
+
221
+ *is_subpage = false;
147
+ if (is_s) {
222
+ *phys_ptr = address;
148
+ if (env->vfp.fpcr & FPCR_AH) {
223
+ *prot = 0;
149
+ negf = float_muladd_negate_product;
224
+ if (mregion) {
150
+ } else {
225
+ *mregion = -1;
151
+ negx = 0x8000800080008000ull;
226
+ }
227
+
228
+ /*
229
+ * Unlike the ARM ARM pseudocode, we don't need to check whether this
230
+ * was an exception vector read from the vector table (which is always
231
+ * done using the default system address map), because those accesses
232
+ * are done in arm_v7m_load_vector(), which always does a direct
233
+ * read using address_space_ldl(), rather than going via this function.
234
+ */
235
+ if (regime_translation_disabled(env, mmu_idx)) { /* MPU disabled */
236
+ hit = true;
237
+ } else if (m_is_ppb_region(env, address)) {
238
+ hit = true;
239
+ } else {
240
+ if (pmsav7_use_background_region(cpu, mmu_idx, is_user)) {
241
+ hit = true;
242
+ }
243
+
244
+ for (n = (int)cpu->pmsav7_dregion - 1; n >= 0; n--) {
245
+ /* region search */
246
+ /*
247
+ * Note that the base address is bits [31:5] from the register
248
+ * with bits [4:0] all zeroes, but the limit address is bits
249
+ * [31:5] from the register with bits [4:0] all ones.
250
+ */
251
+ uint32_t base = env->pmsav8.rbar[secure][n] & ~0x1f;
252
+ uint32_t limit = env->pmsav8.rlar[secure][n] | 0x1f;
253
+
254
+ if (!(env->pmsav8.rlar[secure][n] & 0x1)) {
255
+ /* Region disabled */
256
+ continue;
257
+ }
258
+
259
+ if (address < base || address > limit) {
260
+ /*
261
+ * Address not in this region. We must check whether the
262
+ * region covers addresses in the same page as our address.
263
+ * In that case we must not report a size that covers the
264
+ * whole page for a subsequent hit against a different MPU
265
+ * region or the background region, because it would result in
266
+ * incorrect TLB hits for subsequent accesses to addresses that
267
+ * are in this MPU region.
268
+ */
269
+ if (limit >= base &&
270
+ ranges_overlap(base, limit - base + 1,
271
+ addr_page_base,
272
+ TARGET_PAGE_SIZE)) {
273
+ *is_subpage = true;
274
+ }
275
+ continue;
276
+ }
277
+
278
+ if (base > addr_page_base || limit < addr_page_limit) {
279
+ *is_subpage = true;
280
+ }
281
+
282
+ if (matchregion != -1) {
283
+ /*
284
+ * Multiple regions match -- always a failure (unlike
285
+ * PMSAv7 where highest-numbered-region wins)
286
+ */
287
+ fi->type = ARMFault_Permission;
288
+ fi->level = 1;
289
+ return true;
290
+ }
291
+
292
+ matchregion = n;
293
+ hit = true;
294
+ }
152
+ }
295
+ }
153
+ }
296
+
154
+ do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc,
297
+ if (!hit) {
155
get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64));
298
+ /* background fault */
156
}
299
+ fi->type = ARMFault_Background;
157
300
+ return true;
301
+ }
302
+
303
+ if (matchregion == -1) {
304
+ /* hit using the background region */
305
+ get_phys_addr_pmsav7_default(env, mmu_idx, address, prot);
306
+ } else {
307
+ uint32_t ap = extract32(env->pmsav8.rbar[secure][matchregion], 1, 2);
308
+ uint32_t xn = extract32(env->pmsav8.rbar[secure][matchregion], 0, 1);
309
+ bool pxn = false;
310
+
311
+ if (arm_feature(env, ARM_FEATURE_V8_1M)) {
312
+ pxn = extract32(env->pmsav8.rlar[secure][matchregion], 4, 1);
313
+ }
314
+
315
+ if (m_is_system_region(env, address)) {
316
+ /* System space is always execute never */
317
+ xn = 1;
318
+ }
319
+
320
+ *prot = simple_ap_to_rw_prot(env, mmu_idx, ap);
321
+ if (*prot && !xn && !(pxn && !is_user)) {
322
+ *prot |= PAGE_EXEC;
323
+ }
324
+ /*
325
+ * We don't need to look the attribute up in the MAIR0/MAIR1
326
+ * registers because that only tells us about cacheability.
327
+ */
328
+ if (mregion) {
329
+ *mregion = matchregion;
330
+ }
331
+ }
332
+
333
+ fi->type = ARMFault_Permission;
334
+ fi->level = 1;
335
+ return !(*prot & (1 << access_type));
336
+}
337
+
338
static bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address,
339
MMUAccessType access_type, ARMMMUIdx mmu_idx,
340
hwaddr *phys_ptr, MemTxAttrs *txattrs,
341
--
158
--
342
2.25.1
159
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Handle FPCR.AH's requirement to not negate the sign of a NaN in SVE
4
FMLSL (indexed), using the usual trick of negating by XOR when AH=0
5
and by muladd flags when AH=1.
6
7
Since we have the CPUARMState* in the helper anyway, we can
8
look directly at env->vfp.fpcr and don't need toa pass in the
9
FPCR.AH value via the SIMD data word.
10
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220604040607.269301-27-richard.henderson@linaro.org
12
Message-id: 20250129013857.135256-32-richard.henderson@linaro.org
13
[PMM: commit message tweaked]
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
14
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
16
---
8
target/arm/helper.c | 26 --------------------------
17
target/arm/tcg/vec_helper.c | 15 ++++++++++++---
9
target/arm/ptw.c | 24 ++++++++++++++++++++++++
18
1 file changed, 12 insertions(+), 3 deletions(-)
10
2 files changed, 24 insertions(+), 26 deletions(-)
11
19
12
diff --git a/target/arm/helper.c b/target/arm/helper.c
20
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
13
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/helper.c
22
--- a/target/arm/tcg/vec_helper.c
15
+++ b/target/arm/helper.c
23
+++ b/target/arm/tcg/vec_helper.c
16
@@ -XXX,XX +XXX,XX @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
24
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
17
};
25
CPUARMState *env, uint32_t desc)
18
}
26
{
19
27
intptr_t i, j, oprsz = simd_oprsz(desc);
20
-#ifndef CONFIG_USER_ONLY
28
- uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15;
21
-hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr,
29
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
22
- MemTxAttrs *attrs)
30
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
23
-{
31
intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16);
24
- ARMCPU *cpu = ARM_CPU(cs);
32
float_status *status = &env->vfp.fp_status_a64;
25
- CPUARMState *env = &cpu->env;
33
bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64);
26
- hwaddr phys_addr;
34
+ int negx = 0, negf = 0;
27
- target_ulong page_size;
35
+
28
- int prot;
36
+ if (is_s) {
29
- bool ret;
37
+ if (env->vfp.fpcr & FPCR_AH) {
30
- ARMMMUFaultInfo fi = {};
38
+ negf = float_muladd_negate_product;
31
- ARMMMUIdx mmu_idx = arm_mmu_idx(env);
39
+ } else {
32
- ARMCacheAttrs cacheattrs = {};
40
+ negx = 0x8000;
33
-
41
+ }
34
- *attrs = (MemTxAttrs) {};
42
+ }
35
-
43
36
- ret = get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &phys_addr,
44
for (i = 0; i < oprsz; i += 16) {
37
- attrs, &prot, &page_size, &fi, &cacheattrs);
45
float16 mm_16 = *(float16 *)(vm + i + idx);
38
-
46
float32 mm = float16_to_float32_by_bits(mm_16, fz16);
39
- if (ret) {
47
40
- return -1;
48
for (j = 0; j < 16; j += sizeof(float32)) {
41
- }
49
- float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negn;
42
- return phys_addr;
50
+ float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negx;
43
-}
51
float32 nn = float16_to_float32_by_bits(nn_16, fz16);
44
-#endif
52
float32 aa = *(float32 *)(va + H1_4(i + j));
45
-
53
46
/* Note that signed overflow is undefined in C. The following routines are
54
*(float32 *)(vd + H1_4(i + j)) =
47
careful to use unsigned types where modulo arithmetic is required.
55
- float32_muladd(nn, mm, aa, 0, status);
48
Failure to do so _will_ break on newer gcc. */
56
+ float32_muladd(nn, mm, aa, negf, status);
49
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
57
}
50
index XXXXXXX..XXXXXXX 100644
51
--- a/target/arm/ptw.c
52
+++ b/target/arm/ptw.c
53
@@ -XXX,XX +XXX,XX @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
54
phys_ptr, prot, page_size, fi);
55
}
58
}
56
}
59
}
57
+
58
+hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr,
59
+ MemTxAttrs *attrs)
60
+{
61
+ ARMCPU *cpu = ARM_CPU(cs);
62
+ CPUARMState *env = &cpu->env;
63
+ hwaddr phys_addr;
64
+ target_ulong page_size;
65
+ int prot;
66
+ bool ret;
67
+ ARMMMUFaultInfo fi = {};
68
+ ARMMMUIdx mmu_idx = arm_mmu_idx(env);
69
+ ARMCacheAttrs cacheattrs = {};
70
+
71
+ *attrs = (MemTxAttrs) {};
72
+
73
+ ret = get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &phys_addr,
74
+ attrs, &prot, &page_size, &fi, &cacheattrs);
75
+
76
+ if (ret) {
77
+ return -1;
78
+ }
79
+ return phys_addr;
80
+}
81
--
60
--
82
2.25.1
61
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Move the decl from ptw.h to internals.h. Provide an inline
3
Handle FPCR.AH's requirement to not negate the sign of a NaN in SVE
4
version for user-only, just as we do for arm_stage1_mmu_idx.
4
FMLSL (indexed), using the usual trick of negating by XOR when AH=0
5
Move an endif down to make the definition in helper.c be
5
and by muladd flags when AH=1.
6
system only.
6
7
Since we have the CPUARMState* in the helper anyway, we can
8
look directly at env->vfp.fpcr and don't need toa pass in the
9
FPCR.AH value via the SIMD data word.
7
10
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220604040607.269301-2-richard.henderson@linaro.org
12
Message-id: 20250129013857.135256-33-richard.henderson@linaro.org
13
[PMM: tweaked commit message]
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
14
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
16
---
13
target/arm/internals.h | 5 +++++
17
target/arm/tcg/vec_helper.c | 15 ++++++++++++---
14
target/arm/helper.c | 5 ++---
18
1 file changed, 12 insertions(+), 3 deletions(-)
15
2 files changed, 7 insertions(+), 3 deletions(-)
16
19
17
diff --git a/target/arm/internals.h b/target/arm/internals.h
20
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
18
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/internals.h
22
--- a/target/arm/tcg/vec_helper.c
20
+++ b/target/arm/internals.h
23
+++ b/target/arm/tcg/vec_helper.c
21
@@ -XXX,XX +XXX,XX @@ ARMMMUIdx arm_mmu_idx(CPUARMState *env);
24
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
22
* Return the ARMMMUIdx for the stage1 traversal for the current regime.
25
CPUARMState *env, uint32_t desc)
23
*/
24
#ifdef CONFIG_USER_ONLY
25
+static inline ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx)
26
+{
27
+ return ARMMMUIdx_Stage1_E0;
28
+}
29
static inline ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env)
30
{
26
{
31
return ARMMMUIdx_Stage1_E0;
27
intptr_t i, oprsz = simd_oprsz(desc);
32
}
28
- uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15;
33
#else
29
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
34
+ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx);
30
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
35
ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env);
31
float_status *status = &env->vfp.fp_status_a64;
36
#endif
32
bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64);
37
33
+ int negx = 0, negf = 0;
38
diff --git a/target/arm/helper.c b/target/arm/helper.c
34
+
39
index XXXXXXX..XXXXXXX 100644
35
+ if (is_s) {
40
--- a/target/arm/helper.c
36
+ if (env->vfp.fpcr & FPCR_AH) {
41
+++ b/target/arm/helper.c
37
+ negf = float_muladd_negate_product;
42
@@ -XXX,XX +XXX,XX @@ static inline uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx,
38
+ } else {
39
+ negx = 0x8000;
40
+ }
41
+ }
42
43
for (i = 0; i < oprsz; i += sizeof(float32)) {
44
- float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negn;
45
+ float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negx;
46
float16 mm_16 = *(float16 *)(vm + H1_2(i + sel));
47
float32 nn = float16_to_float32_by_bits(nn_16, fz16);
48
float32 mm = float16_to_float32_by_bits(mm_16, fz16);
49
float32 aa = *(float32 *)(va + H1_4(i));
50
51
- *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, 0, status);
52
+ *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, negf, status);
43
}
53
}
44
}
54
}
45
55
46
-#endif /* !CONFIG_USER_ONLY */
47
-
48
/* Convert a possible stage1+2 MMU index into the appropriate
49
* stage 1 MMU index
50
*/
51
-static inline ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx)
52
+ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx)
53
{
54
switch (mmu_idx) {
55
case ARMMMUIdx_SE10_0:
56
@@ -XXX,XX +XXX,XX @@ static inline ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx)
57
return mmu_idx;
58
}
59
}
60
+#endif /* !CONFIG_USER_ONLY */
61
62
/* Return true if the translation regime is using LPAE format page tables */
63
static inline bool regime_using_lpae_format(CPUARMState *env,
64
--
56
--
65
2.25.1
57
2.34.1
diff view generated by jsdifflib
1
The architectural feature RASv1p1 introduces the following new
1
Now that we have completed the handling for FPCR.{AH,FIZ,NEP}, we
2
features:
2
can enable FEAT_AFP for '-cpu max', and document that we support it.
3
* new registers ERXPFGCDN_EL1, ERXPFGCTL_EL1 and ERXPFGF_EL1
4
* new bits in the fine-grained trap registers that control traps
5
for these new registers
6
* new trap bits HCR_EL2.FIEN and SCR_EL3.FIEN that control traps
7
for ERXPFGCDN_EL1, ERXPFGCTL_EL1, ERXPFGP_EL1
8
* a larger number of the ERXMISC<n>_EL1 registers
9
* the format of ERR<n>STATUS registers changes
10
11
The architecture permits that if ERRIDR_EL1.NUM is 0 (as it is for
12
QEMU) then all these new registers may UNDEF, and the HCR_EL2.FIEN
13
and SCR_EL3.FIEN bits may be RES0. We don't have any ERR<n>STATUS
14
registers (again, because ERRIDR_EL1.NUM is 0). QEMU does not yet
15
implement the fine-grained-trap extension. So there is nothing we
16
need to implement to be compliant with the feature spec. Make the
17
'max' CPU report the feature in its ID registers, and document it.
18
3
19
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
20
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
21
Message-id: 20220531114258.855804-1-peter.maydell@linaro.org
22
---
6
---
23
docs/system/arm/emulation.rst | 1 +
7
docs/system/arm/emulation.rst | 1 +
24
target/arm/cpu64.c | 1 +
8
target/arm/tcg/cpu64.c | 1 +
25
2 files changed, 2 insertions(+)
9
2 files changed, 2 insertions(+)
26
10
27
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
11
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
28
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
29
--- a/docs/system/arm/emulation.rst
13
--- a/docs/system/arm/emulation.rst
30
+++ b/docs/system/arm/emulation.rst
14
+++ b/docs/system/arm/emulation.rst
31
@@ -XXX,XX +XXX,XX @@ the following architecture extensions:
15
@@ -XXX,XX +XXX,XX @@ the following architecture extensions:
32
- FEAT_PMUv3p1 (PMU Extensions v3.1)
16
- FEAT_AA64EL3 (Support for AArch64 at EL3)
33
- FEAT_PMUv3p4 (PMU Extensions v3.4)
17
- FEAT_AdvSIMD (Advanced SIMD Extension)
34
- FEAT_RAS (Reliability, availability, and serviceability)
18
- FEAT_AES (AESD and AESE instructions)
35
+- FEAT_RASv1p1 (RAS Extension v1.1)
19
+- FEAT_AFP (Alternate floating-point behavior)
36
- FEAT_RDM (Advanced SIMD rounding double multiply accumulate instructions)
20
- FEAT_Armv9_Crypto (Armv9 Cryptographic Extension)
37
- FEAT_RNG (Random number generator)
21
- FEAT_ASID16 (16 bit ASID)
38
- FEAT_S2FWB (Stage 2 forced Write-Back)
22
- FEAT_BBM at level 2 (Translation table break-before-make levels)
39
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
23
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
40
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
41
--- a/target/arm/cpu64.c
25
--- a/target/arm/tcg/cpu64.c
42
+++ b/target/arm/cpu64.c
26
+++ b/target/arm/tcg/cpu64.c
43
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
27
@@ -XXX,XX +XXX,XX @@ void aarch64_max_tcg_initfn(Object *obj)
44
* we do for EL2 with the virtualization=on property.
28
t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1); /* FEAT_XNX */
45
*/
29
t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 2); /* FEAT_ETS2 */
46
t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3); /* FEAT_MTE3 */
30
t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1); /* FEAT_HCX */
47
+ t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 1); /* FEAT_RASv1p1 */
31
+ t = FIELD_DP64(t, ID_AA64MMFR1, AFP, 1); /* FEAT_AFP */
48
t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_2 */
32
t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1); /* FEAT_TIDCP1 */
49
cpu->isar.id_aa64pfr1 = t;
33
t = FIELD_DP64(t, ID_AA64MMFR1, CMOW, 1); /* FEAT_CMOW */
50
34
cpu->isar.id_aa64mmfr1 = t;
51
--
35
--
52
2.25.1
36
2.34.1
diff view generated by jsdifflib
1
From: Frederic Konrad <fkonrad@amd.com>
1
FEAT_RPRES implements an "increased precision" variant of the single
2
precision FRECPE and FRSQRTE instructions from an 8 bit to a 12
3
bit mantissa. This applies only when FPCR.AH == 1. Note that the
4
halfprec and double versions of these insns retain the 8 bit
5
precision regardless.
2
6
3
When the display port has been initially implemented the device
7
In this commit we add all the plumbing to make these instructions
4
driver wasn't using interrupts. Now that the display port driver
8
call a new helper function when the increased-precision is in
5
waits for vblank interrupt it has been noticed that the irq mapping
9
effect. In the following commit we will provide the actual change
6
is wrong. So use the value from the linux device tree and the
10
in behaviour in the helpers.
7
ultrascale+ reference manual.
8
11
9
Signed-off-by: Frederic Konrad <fkonrad@amd.com>
10
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@amd.com>
11
Acked-by: Alistair Francis <alistair.francis@wdc.com>
12
Message-id: 20220601172353.3220232-5-fkonrad@xilinx.com
13
[PMM: refold lines in commit message]
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
---
14
---
16
hw/arm/xlnx-zynqmp.c | 4 ++--
15
target/arm/cpu-features.h | 5 +++++
17
1 file changed, 2 insertions(+), 2 deletions(-)
16
target/arm/helper.h | 4 ++++
17
target/arm/tcg/translate-a64.c | 34 ++++++++++++++++++++++++++++++----
18
target/arm/tcg/translate-sve.c | 16 ++++++++++++++--
19
target/arm/tcg/vec_helper.c | 2 ++
20
target/arm/vfp_helper.c | 32 ++++++++++++++++++++++++++++++--
21
6 files changed, 85 insertions(+), 8 deletions(-)
18
22
19
diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
23
diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
20
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/arm/xlnx-zynqmp.c
25
--- a/target/arm/cpu-features.h
22
+++ b/hw/arm/xlnx-zynqmp.c
26
+++ b/target/arm/cpu-features.h
23
@@ -XXX,XX +XXX,XX @@
27
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_mops(const ARMISARegisters *id)
24
#define SERDES_SIZE 0x20000
28
return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, MOPS);
25
29
}
26
#define DP_ADDR 0xfd4a0000
30
27
-#define DP_IRQ 113
31
+static inline bool isar_feature_aa64_rpres(const ARMISARegisters *id)
28
+#define DP_IRQ 0x77
32
+{
29
33
+ return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, RPRES);
30
#define DPDMA_ADDR 0xfd4c0000
34
+}
31
-#define DPDMA_IRQ 116
35
+
32
+#define DPDMA_IRQ 0x7a
36
static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id)
33
37
{
34
#define APU_ADDR 0xfd5c0000
38
/* We always set the AdvSIMD and FP fields identically. */
35
#define APU_IRQ 153
39
diff --git a/target/arm/helper.h b/target/arm/helper.h
40
index XXXXXXX..XXXXXXX 100644
41
--- a/target/arm/helper.h
42
+++ b/target/arm/helper.h
43
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, fpst)
44
45
DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, fpst)
46
DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
47
+DEF_HELPER_FLAGS_2(recpe_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
48
DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, fpst)
49
DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, fpst)
50
DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
51
+DEF_HELPER_FLAGS_2(rsqrte_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
52
DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, fpst)
53
DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32)
54
DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32)
55
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_vrintx_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
56
57
DEF_HELPER_FLAGS_4(gvec_frecpe_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
58
DEF_HELPER_FLAGS_4(gvec_frecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
59
+DEF_HELPER_FLAGS_4(gvec_frecpe_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
60
DEF_HELPER_FLAGS_4(gvec_frecpe_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
61
62
DEF_HELPER_FLAGS_4(gvec_frsqrte_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
63
DEF_HELPER_FLAGS_4(gvec_frsqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
64
+DEF_HELPER_FLAGS_4(gvec_frsqrte_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
65
DEF_HELPER_FLAGS_4(gvec_frsqrte_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
66
67
DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
68
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/target/arm/tcg/translate-a64.c
71
+++ b/target/arm/tcg/translate-a64.c
72
@@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frecpe = {
73
gen_helper_recpe_f32,
74
gen_helper_recpe_f64,
75
};
76
-TRANS(FRECPE_s, do_fp1_scalar_ah, a, &f_scalar_frecpe, -1)
77
+static const FPScalar1 f_scalar_frecpe_rpres = {
78
+ gen_helper_recpe_f16,
79
+ gen_helper_recpe_rpres_f32,
80
+ gen_helper_recpe_f64,
81
+};
82
+TRANS(FRECPE_s, do_fp1_scalar_ah, a,
83
+ s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
84
+ &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1)
85
86
static const FPScalar1 f_scalar_frecpx = {
87
gen_helper_frecpx_f16,
88
@@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frsqrte = {
89
gen_helper_rsqrte_f32,
90
gen_helper_rsqrte_f64,
91
};
92
-TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, &f_scalar_frsqrte, -1)
93
+static const FPScalar1 f_scalar_frsqrte_rpres = {
94
+ gen_helper_rsqrte_f16,
95
+ gen_helper_rsqrte_rpres_f32,
96
+ gen_helper_rsqrte_f64,
97
+};
98
+TRANS(FRSQRTE_s, do_fp1_scalar_ah, a,
99
+ s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
100
+ &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1)
101
102
static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a)
103
{
104
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const f_frecpe[] = {
105
gen_helper_gvec_frecpe_s,
106
gen_helper_gvec_frecpe_d,
107
};
108
-TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe)
109
+static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = {
110
+ gen_helper_gvec_frecpe_h,
111
+ gen_helper_gvec_frecpe_rpres_s,
112
+ gen_helper_gvec_frecpe_d,
113
+};
114
+TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
115
+ s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe)
116
117
static gen_helper_gvec_2_ptr * const f_frsqrte[] = {
118
gen_helper_gvec_frsqrte_h,
119
gen_helper_gvec_frsqrte_s,
120
gen_helper_gvec_frsqrte_d,
121
};
122
-TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte)
123
+static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = {
124
+ gen_helper_gvec_frsqrte_h,
125
+ gen_helper_gvec_frsqrte_rpres_s,
126
+ gen_helper_gvec_frsqrte_d,
127
+};
128
+TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
129
+ s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte)
130
131
static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a)
132
{
133
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
134
index XXXXXXX..XXXXXXX 100644
135
--- a/target/arm/tcg/translate-sve.c
136
+++ b/target/arm/tcg/translate-sve.c
137
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const frecpe_fns[] = {
138
NULL, gen_helper_gvec_frecpe_h,
139
gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d,
140
};
141
-TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frecpe_fns[a->esz], a, 0)
142
+static gen_helper_gvec_2_ptr * const frecpe_rpres_fns[] = {
143
+ NULL, gen_helper_gvec_frecpe_h,
144
+ gen_helper_gvec_frecpe_rpres_s, gen_helper_gvec_frecpe_d,
145
+};
146
+TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz,
147
+ s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
148
+ frecpe_rpres_fns[a->esz] : frecpe_fns[a->esz], a, 0)
149
150
static gen_helper_gvec_2_ptr * const frsqrte_fns[] = {
151
NULL, gen_helper_gvec_frsqrte_h,
152
gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d,
153
};
154
-TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frsqrte_fns[a->esz], a, 0)
155
+static gen_helper_gvec_2_ptr * const frsqrte_rpres_fns[] = {
156
+ NULL, gen_helper_gvec_frsqrte_h,
157
+ gen_helper_gvec_frsqrte_rpres_s, gen_helper_gvec_frsqrte_d,
158
+};
159
+TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz,
160
+ s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
161
+ frsqrte_rpres_fns[a->esz] : frsqrte_fns[a->esz], a, 0)
162
163
/*
164
*** SVE Floating Point Compare with Zero Group
165
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
166
index XXXXXXX..XXXXXXX 100644
167
--- a/target/arm/tcg/vec_helper.c
168
+++ b/target/arm/tcg/vec_helper.c
169
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, float_status *stat, uint32_t desc) \
170
171
DO_2OP(gvec_frecpe_h, helper_recpe_f16, float16)
172
DO_2OP(gvec_frecpe_s, helper_recpe_f32, float32)
173
+DO_2OP(gvec_frecpe_rpres_s, helper_recpe_rpres_f32, float32)
174
DO_2OP(gvec_frecpe_d, helper_recpe_f64, float64)
175
176
DO_2OP(gvec_frsqrte_h, helper_rsqrte_f16, float16)
177
DO_2OP(gvec_frsqrte_s, helper_rsqrte_f32, float32)
178
+DO_2OP(gvec_frsqrte_rpres_s, helper_rsqrte_rpres_f32, float32)
179
DO_2OP(gvec_frsqrte_d, helper_rsqrte_f64, float64)
180
181
DO_2OP(gvec_vrintx_h, float16_round_to_int, float16)
182
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
183
index XXXXXXX..XXXXXXX 100644
184
--- a/target/arm/vfp_helper.c
185
+++ b/target/arm/vfp_helper.c
186
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst)
187
return make_float16(f16_val);
188
}
189
190
-float32 HELPER(recpe_f32)(float32 input, float_status *fpst)
191
+/*
192
+ * FEAT_RPRES means the f32 FRECPE has an "increased precision" variant
193
+ * which is used when FPCR.AH == 1.
194
+ */
195
+static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres)
196
{
197
float32 f32 = float32_squash_input_denormal(input, fpst);
198
uint32_t f32_val = float32_val(f32);
199
@@ -XXX,XX +XXX,XX @@ float32 HELPER(recpe_f32)(float32 input, float_status *fpst)
200
return make_float32(f32_val);
201
}
202
203
+float32 HELPER(recpe_f32)(float32 input, float_status *fpst)
204
+{
205
+ return do_recpe_f32(input, fpst, false);
206
+}
207
+
208
+float32 HELPER(recpe_rpres_f32)(float32 input, float_status *fpst)
209
+{
210
+ return do_recpe_f32(input, fpst, true);
211
+}
212
+
213
float64 HELPER(recpe_f64)(float64 input, float_status *fpst)
214
{
215
float64 f64 = float64_squash_input_denormal(input, fpst);
216
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s)
217
return make_float16(val);
218
}
219
220
-float32 HELPER(rsqrte_f32)(float32 input, float_status *s)
221
+/*
222
+ * FEAT_RPRES means the f32 FRSQRTE has an "increased precision" variant
223
+ * which is used when FPCR.AH == 1.
224
+ */
225
+static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres)
226
{
227
float32 f32 = float32_squash_input_denormal(input, s);
228
uint32_t val = float32_val(f32);
229
@@ -XXX,XX +XXX,XX @@ float32 HELPER(rsqrte_f32)(float32 input, float_status *s)
230
return make_float32(val);
231
}
232
233
+float32 HELPER(rsqrte_f32)(float32 input, float_status *s)
234
+{
235
+ return do_rsqrte_f32(input, s, false);
236
+}
237
+
238
+float32 HELPER(rsqrte_rpres_f32)(float32 input, float_status *s)
239
+{
240
+ return do_rsqrte_f32(input, s, true);
241
+}
242
+
243
float64 HELPER(rsqrte_f64)(float64 input, float_status *s)
244
{
245
float64 f64 = float64_squash_input_denormal(input, s);
36
--
246
--
37
2.25.1
247
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Implement the increased precision variation of FRECPE. In the
2
pseudocode this corresponds to the handling of the
3
"increasedprecision" boolean in the FPRecipEstimate() and
4
RecipEstimate() functions.
2
5
3
This check is buried within arm_hcr_el2_eff(), but since we
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
have to have the explicit check for CPTR_EL2.TZ, we might as
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
well just check it once at the beginning of the block.
8
---
9
target/arm/vfp_helper.c | 54 +++++++++++++++++++++++++++++++++++------
10
1 file changed, 46 insertions(+), 8 deletions(-)
6
11
7
Once this is done, we can test HCR_EL2.{E2H,TGE} directly,
12
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
8
rather than going through arm_hcr_el2_eff().
9
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20220607203306.657998-9-richard.henderson@linaro.org
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
---
15
target/arm/helper.c | 13 +++++--------
16
1 file changed, 5 insertions(+), 8 deletions(-)
17
18
diff --git a/target/arm/helper.c b/target/arm/helper.c
19
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/helper.c
14
--- a/target/arm/vfp_helper.c
21
+++ b/target/arm/helper.c
15
+++ b/target/arm/vfp_helper.c
22
@@ -XXX,XX +XXX,XX @@ int sve_exception_el(CPUARMState *env, int el)
16
@@ -XXX,XX +XXX,XX @@ static int recip_estimate(int input)
17
return r;
18
}
19
20
+/*
21
+ * Increased precision version:
22
+ * input is a 13 bit fixed point number
23
+ * input range 2048 .. 4095 for a number from 0.5 <= x < 1.0.
24
+ * result range 4096 .. 8191 for a number from 1.0 to 2.0
25
+ */
26
+static int recip_estimate_incprec(int input)
27
+{
28
+ int a, b, r;
29
+ assert(2048 <= input && input < 4096);
30
+ a = (input * 2) + 1;
31
+ /*
32
+ * The pseudocode expresses this as an operation on infinite
33
+ * precision reals where it calculates 2^25 / a and then looks
34
+ * at the error between that and the rounded-down-to-integer
35
+ * value to see if it should instead round up. We instead
36
+ * follow the same approach as the pseudocode for the 8-bit
37
+ * precision version, and calculate (2 * (2^25 / a)) as an
38
+ * integer so we can do the "add one and halve" to round it.
39
+ * So the 1 << 26 here is correct.
40
+ */
41
+ b = (1 << 26) / a;
42
+ r = (b + 1) >> 1;
43
+ assert(4096 <= r && r < 8192);
44
+ return r;
45
+}
46
+
47
/*
48
* Common wrapper to call recip_estimate
49
*
50
@@ -XXX,XX +XXX,XX @@ static int recip_estimate(int input)
51
* callee.
52
*/
53
54
-static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac)
55
+static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac,
56
+ bool increasedprecision)
57
{
58
uint32_t scaled, estimate;
59
uint64_t result_frac;
60
@@ -XXX,XX +XXX,XX @@ static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac)
23
}
61
}
24
}
62
}
25
63
26
- /*
64
- /* scaled = UInt('1':fraction<51:44>) */
27
- * CPTR_EL2 changes format with HCR_EL2.E2H (regardless of TGE).
65
- scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
28
- */
66
- estimate = recip_estimate(scaled);
29
- if (el <= 2) {
67
+ if (increasedprecision) {
30
- uint64_t hcr_el2 = arm_hcr_el2_eff(env);
68
+ /* scaled = UInt('1':fraction<51:41>) */
31
- if (hcr_el2 & HCR_E2H) {
69
+ scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11));
32
+ if (el <= 2 && arm_is_el2_enabled(env)) {
70
+ estimate = recip_estimate_incprec(scaled);
33
+ /* CPTR_EL2 changes format with HCR_EL2.E2H (regardless of TGE). */
71
+ } else {
34
+ if (env->cp15.hcr_el2 & HCR_E2H) {
72
+ /* scaled = UInt('1':fraction<51:44>) */
35
switch (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, ZEN)) {
73
+ scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
36
case 1:
74
+ estimate = recip_estimate(scaled);
37
- if (el != 0 || !(hcr_el2 & HCR_TGE)) {
75
+ }
38
+ if (el != 0 || !(env->cp15.hcr_el2 & HCR_TGE)) {
76
39
break;
77
result_exp = exp_off - *exp;
40
}
78
- result_frac = deposit64(0, 44, 8, estimate);
41
/* fall through */
79
+ if (increasedprecision) {
42
@@ -XXX,XX +XXX,XX @@ int sve_exception_el(CPUARMState *env, int el)
80
+ result_frac = deposit64(0, 40, 12, estimate);
43
case 2:
81
+ } else {
44
return 2;
82
+ result_frac = deposit64(0, 44, 8, estimate);
45
}
83
+ }
46
- } else if (arm_is_el2_enabled(env)) {
84
if (result_exp == 0) {
47
+ } else {
85
result_frac = deposit64(result_frac >> 1, 51, 1, 1);
48
if (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, TZ)) {
86
} else if (result_exp == -1) {
49
return 2;
87
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst)
50
}
88
}
89
90
f64_frac = call_recip_estimate(&f16_exp, 29,
91
- ((uint64_t) f16_frac) << (52 - 10));
92
+ ((uint64_t) f16_frac) << (52 - 10), false);
93
94
/* result = sign : result_exp<4:0> : fraction<51:42> */
95
f16_val = deposit32(0, 15, 1, f16_sign);
96
@@ -XXX,XX +XXX,XX @@ static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres)
97
}
98
99
f64_frac = call_recip_estimate(&f32_exp, 253,
100
- ((uint64_t) f32_frac) << (52 - 23));
101
+ ((uint64_t) f32_frac) << (52 - 23), rpres);
102
103
/* result = sign : result_exp<7:0> : fraction<51:29> */
104
f32_val = deposit32(0, 31, 1, f32_sign);
105
@@ -XXX,XX +XXX,XX @@ float64 HELPER(recpe_f64)(float64 input, float_status *fpst)
106
return float64_set_sign(float64_zero, float64_is_neg(f64));
107
}
108
109
- f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac);
110
+ f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac, false);
111
112
/* result = sign : result_exp<10:0> : fraction<51:0>; */
113
f64_val = deposit64(0, 63, 1, f64_sign);
51
--
114
--
52
2.25.1
115
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Implement the increased precision variation of FRSQRTE. In the
2
pseudocode this corresponds to the handling of the
3
"increasedprecision" boolean in the FPRSqrtEstimate() and
4
RecipSqrtEstimate() functions.
2
5
3
Move the data to vec_helper.c and the inline to vec_internal.h.
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
target/arm/vfp_helper.c | 77 ++++++++++++++++++++++++++++++++++-------
10
1 file changed, 64 insertions(+), 13 deletions(-)
4
11
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220607203306.657998-18-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
10
target/arm/vec_internal.h | 7 +++++++
11
target/arm/sve_helper.c | 29 -----------------------------
12
target/arm/vec_helper.c | 26 ++++++++++++++++++++++++++
13
3 files changed, 33 insertions(+), 29 deletions(-)
14
15
diff --git a/target/arm/vec_internal.h b/target/arm/vec_internal.h
16
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/vec_internal.h
14
--- a/target/arm/vfp_helper.c
18
+++ b/target/arm/vec_internal.h
15
+++ b/target/arm/vfp_helper.c
19
@@ -XXX,XX +XXX,XX @@ static inline uint64_t expand_pred_b(uint8_t byte)
16
@@ -XXX,XX +XXX,XX @@ static int do_recip_sqrt_estimate(int a)
20
return expand_pred_b_data[byte];
17
return estimate;
21
}
18
}
22
19
23
+/* Similarly for half-word elements. */
20
+static int do_recip_sqrt_estimate_incprec(int a)
24
+extern const uint64_t expand_pred_h_data[0x55 + 1];
25
+static inline uint64_t expand_pred_h(uint8_t byte)
26
+{
21
+{
27
+ return expand_pred_h_data[byte & 0x55];
22
+ /*
23
+ * The Arm ARM describes the 12-bit precision version of RecipSqrtEstimate
24
+ * in terms of an infinite-precision floating point calculation of a
25
+ * square root. We implement this using the same kind of pure integer
26
+ * algorithm as the 8-bit mantissa, to get the same bit-for-bit result.
27
+ */
28
+ int64_t b, estimate;
29
30
-static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
31
+ assert(1024 <= a && a < 4096);
32
+ if (a < 2048) {
33
+ a = a * 2 + 1;
34
+ } else {
35
+ a = (a >> 1) << 1;
36
+ a = (a + 1) * 2;
37
+ }
38
+ b = 8192;
39
+ while (a * (b + 1) * (b + 1) < (1ULL << 39)) {
40
+ b += 1;
41
+ }
42
+ estimate = (b + 1) / 2;
43
+
44
+ assert(4096 <= estimate && estimate < 8192);
45
+
46
+ return estimate;
28
+}
47
+}
29
+
48
+
30
static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
49
+static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac,
50
+ bool increasedprecision)
31
{
51
{
32
uint64_t *d = vd + opr_sz;
52
int estimate;
33
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
53
uint32_t scaled;
34
index XXXXXXX..XXXXXXX 100644
54
@@ -XXX,XX +XXX,XX @@ static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
35
--- a/target/arm/sve_helper.c
55
frac = extract64(frac, 0, 51) << 1;
36
+++ b/target/arm/sve_helper.c
56
}
37
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(sve_predtest)(void *vd, void *vg, uint32_t words)
57
38
return flags;
58
- if (*exp & 1) {
59
- /* scaled = UInt('01':fraction<51:45>) */
60
- scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
61
+ if (increasedprecision) {
62
+ if (*exp & 1) {
63
+ /* scaled = UInt('01':fraction<51:42>) */
64
+ scaled = deposit32(1 << 10, 0, 10, extract64(frac, 42, 10));
65
+ } else {
66
+ /* scaled = UInt('1':fraction<51:41>) */
67
+ scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11));
68
+ }
69
+ estimate = do_recip_sqrt_estimate_incprec(scaled);
70
} else {
71
- /* scaled = UInt('1':fraction<51:44>) */
72
- scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
73
+ if (*exp & 1) {
74
+ /* scaled = UInt('01':fraction<51:45>) */
75
+ scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
76
+ } else {
77
+ /* scaled = UInt('1':fraction<51:44>) */
78
+ scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
79
+ }
80
+ estimate = do_recip_sqrt_estimate(scaled);
81
}
82
- estimate = do_recip_sqrt_estimate(scaled);
83
84
*exp = (exp_off - *exp) / 2;
85
- return extract64(estimate, 0, 8) << 44;
86
+ if (increasedprecision) {
87
+ return extract64(estimate, 0, 12) << 40;
88
+ } else {
89
+ return extract64(estimate, 0, 8) << 44;
90
+ }
39
}
91
}
40
92
41
-/* Similarly for half-word elements.
93
uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s)
42
- * for (i = 0; i < 256; ++i) {
94
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s)
43
- * unsigned long m = 0;
95
44
- * if (i & 0xaa) {
96
f64_frac = ((uint64_t) f16_frac) << (52 - 10);
45
- * continue;
97
46
- * }
98
- f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac);
47
- * for (j = 0; j < 8; j += 2) {
99
+ f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac, false);
48
- * if ((i >> j) & 1) {
100
49
- * m |= 0xfffful << (j << 3);
101
/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */
50
- * }
102
val = deposit32(0, 15, 1, f16_sign);
51
- * }
103
@@ -XXX,XX +XXX,XX @@ static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres)
52
- * printf("[0x%x] = 0x%016lx,\n", i, m);
104
53
- * }
105
f64_frac = ((uint64_t) f32_frac) << 29;
54
- */
106
55
-static inline uint64_t expand_pred_h(uint8_t byte)
107
- f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac);
56
-{
108
+ f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac, rpres);
57
- static const uint64_t word[] = {
109
58
- [0x01] = 0x000000000000ffff, [0x04] = 0x00000000ffff0000,
110
- /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */
59
- [0x05] = 0x00000000ffffffff, [0x10] = 0x0000ffff00000000,
111
+ /*
60
- [0x11] = 0x0000ffff0000ffff, [0x14] = 0x0000ffffffff0000,
112
+ * result = sign : result_exp<7:0> : estimate<7:0> : Zeros(15)
61
- [0x15] = 0x0000ffffffffffff, [0x40] = 0xffff000000000000,
113
+ * or for increased precision
62
- [0x41] = 0xffff00000000ffff, [0x44] = 0xffff0000ffff0000,
114
+ * result = sign : result_exp<7:0> : estimate<11:0> : Zeros(11)
63
- [0x45] = 0xffff0000ffffffff, [0x50] = 0xffffffff00000000,
115
+ */
64
- [0x51] = 0xffffffff0000ffff, [0x54] = 0xffffffffffff0000,
116
val = deposit32(0, 31, 1, f32_sign);
65
- [0x55] = 0xffffffffffffffff,
117
val = deposit32(val, 23, 8, f32_exp);
66
- };
118
- val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
67
- return word[byte & 0x55];
119
+ if (rpres) {
68
-}
120
+ val = deposit32(val, 11, 12, extract64(f64_frac, 52 - 12, 12));
69
-
121
+ } else {
70
/* Similarly for single word elements. */
122
+ val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
71
static inline uint64_t expand_pred_s(uint8_t byte)
123
+ }
72
{
124
return make_float32(val);
73
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
125
}
74
index XXXXXXX..XXXXXXX 100644
126
75
--- a/target/arm/vec_helper.c
127
@@ -XXX,XX +XXX,XX @@ float64 HELPER(rsqrte_f64)(float64 input, float_status *s)
76
+++ b/target/arm/vec_helper.c
128
return float64_zero;
77
@@ -XXX,XX +XXX,XX @@ const uint64_t expand_pred_b_data[256] = {
129
}
78
0xffffffffffffffff,
130
79
};
131
- f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac);
80
132
+ f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac, false);
81
+/*
133
82
+ * Similarly for half-word elements.
134
/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */
83
+ * for (i = 0; i < 256; ++i) {
135
val = deposit64(0, 61, 1, f64_sign);
84
+ * unsigned long m = 0;
85
+ * if (i & 0xaa) {
86
+ * continue;
87
+ * }
88
+ * for (j = 0; j < 8; j += 2) {
89
+ * if ((i >> j) & 1) {
90
+ * m |= 0xfffful << (j << 3);
91
+ * }
92
+ * }
93
+ * printf("[0x%x] = 0x%016lx,\n", i, m);
94
+ * }
95
+ */
96
+const uint64_t expand_pred_h_data[0x55 + 1] = {
97
+ [0x01] = 0x000000000000ffff, [0x04] = 0x00000000ffff0000,
98
+ [0x05] = 0x00000000ffffffff, [0x10] = 0x0000ffff00000000,
99
+ [0x11] = 0x0000ffff0000ffff, [0x14] = 0x0000ffffffff0000,
100
+ [0x15] = 0x0000ffffffffffff, [0x40] = 0xffff000000000000,
101
+ [0x41] = 0xffff00000000ffff, [0x44] = 0xffff0000ffff0000,
102
+ [0x45] = 0xffff0000ffffffff, [0x50] = 0xffffffff00000000,
103
+ [0x51] = 0xffffffff0000ffff, [0x54] = 0xffffffffffff0000,
104
+ [0x55] = 0xffffffffffffffff,
105
+};
106
+
107
/* Signed saturating rounding doubling multiply-accumulate high half, 8-bit */
108
int8_t do_sqrdmlah_b(int8_t src1, int8_t src2, int8_t src3,
109
bool neg, bool round)
110
--
136
--
111
2.25.1
137
2.34.1
diff view generated by jsdifflib
1
The FEAT_DoubleFault extension adds the following:
1
Now the emulation is complete, we can enable FEAT_RPRES for the 'max'
2
2
CPU type.
3
* All external aborts on instruction fetches and translation table
4
walks for instruction fetches must be synchronous. For QEMU this
5
is already true.
6
7
* SCR_EL3 has a new bit NMEA which disables the masking of SError
8
interrupts by PSTATE.A when the SError interrupt is taken to EL3.
9
For QEMU we only need to make the bit writable, because we have no
10
sources of SError interrupts.
11
12
* SCR_EL3 has a new bit EASE which causes synchronous external
13
aborts taken to EL3 to be taken at the same entry point as SError.
14
(Note that this does not mean that they are SErrors for purposes
15
of PSTATE.A masking or that the syndrome register reports them as
16
SErrors: it just means that the vector offset is different.)
17
18
* The existing SCTLR_EL3.IESB has an effective value of 1 when
19
SCR_EL3.NMEA is 1. For QEMU this is a no-op because we don't need
20
different behaviour based on IESB (we don't need to do anything to
21
ensure that error exceptions are synchronized).
22
23
So for QEMU the things we need to change are:
24
* Make SCR_EL3.{NMEA,EASE} writable
25
* When taking a synchronous external abort at EL3, adjust the
26
vector entry point if SCR_EL3.EASE is set
27
* Advertise the feature in the ID registers
28
3
29
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
30
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
31
Message-id: 20220531151431.949322-1-peter.maydell@linaro.org
32
---
6
---
33
docs/system/arm/emulation.rst | 1 +
7
docs/system/arm/emulation.rst | 1 +
34
target/arm/cpu.h | 5 +++++
8
target/arm/tcg/cpu64.c | 1 +
35
target/arm/cpu64.c | 4 ++--
9
2 files changed, 2 insertions(+)
36
target/arm/helper.c | 36 +++++++++++++++++++++++++++++++++++
37
4 files changed, 44 insertions(+), 2 deletions(-)
38
10
39
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
11
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
40
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
41
--- a/docs/system/arm/emulation.rst
13
--- a/docs/system/arm/emulation.rst
42
+++ b/docs/system/arm/emulation.rst
14
+++ b/docs/system/arm/emulation.rst
43
@@ -XXX,XX +XXX,XX @@ the following architecture extensions:
15
@@ -XXX,XX +XXX,XX @@ the following architecture extensions:
44
- FEAT_Debugv8p2 (Debug changes for v8.2)
16
- FEAT_RDM (Advanced SIMD rounding double multiply accumulate instructions)
45
- FEAT_Debugv8p4 (Debug changes for v8.4)
17
- FEAT_RME (Realm Management Extension) (NB: support status in QEMU is experimental)
46
- FEAT_DotProd (Advanced SIMD dot product instructions)
18
- FEAT_RNG (Random number generator)
47
+- FEAT_DoubleFault (Double Fault Extension)
19
+- FEAT_RPRES (Increased precision of FRECPE and FRSQRTE)
48
- FEAT_FCMA (Floating-point complex number instructions)
20
- FEAT_S2FWB (Stage 2 forced Write-Back)
49
- FEAT_FHM (Floating-point half-precision multiplication instructions)
21
- FEAT_SB (Speculation Barrier)
50
- FEAT_FP16 (Half-precision floating-point data processing)
22
- FEAT_SEL2 (Secure EL2)
51
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
23
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
52
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
53
--- a/target/arm/cpu.h
25
--- a/target/arm/tcg/cpu64.c
54
+++ b/target/arm/cpu.h
26
+++ b/target/arm/tcg/cpu64.c
55
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_ras(const ARMISARegisters *id)
27
@@ -XXX,XX +XXX,XX @@ void aarch64_max_tcg_initfn(Object *obj)
56
return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RAS) != 0;
28
cpu->isar.id_aa64isar1 = t;
57
}
29
58
30
t = cpu->isar.id_aa64isar2;
59
+static inline bool isar_feature_aa64_doublefault(const ARMISARegisters *id)
31
+ t = FIELD_DP64(t, ID_AA64ISAR2, RPRES, 1); /* FEAT_RPRES */
60
+{
32
t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1); /* FEAT_MOPS */
61
+ return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RAS) >= 2;
33
t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1); /* FEAT_HBC */
62
+}
34
t = FIELD_DP64(t, ID_AA64ISAR2, WFXT, 2); /* FEAT_WFxT */
63
+
64
static inline bool isar_feature_aa64_sve(const ARMISARegisters *id)
65
{
66
return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SVE) != 0;
67
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
68
index XXXXXXX..XXXXXXX 100644
69
--- a/target/arm/cpu64.c
70
+++ b/target/arm/cpu64.c
71
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
72
t = cpu->isar.id_aa64pfr0;
73
t = FIELD_DP64(t, ID_AA64PFR0, FP, 1); /* FEAT_FP16 */
74
t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 1); /* FEAT_FP16 */
75
- t = FIELD_DP64(t, ID_AA64PFR0, RAS, 1); /* FEAT_RAS */
76
+ t = FIELD_DP64(t, ID_AA64PFR0, RAS, 2); /* FEAT_RASv1p1 + FEAT_DoubleFault */
77
t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1);
78
t = FIELD_DP64(t, ID_AA64PFR0, SEL2, 1); /* FEAT_SEL2 */
79
t = FIELD_DP64(t, ID_AA64PFR0, DIT, 1); /* FEAT_DIT */
80
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
81
* we do for EL2 with the virtualization=on property.
82
*/
83
t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3); /* FEAT_MTE3 */
84
- t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 1); /* FEAT_RASv1p1 */
85
+ t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 0); /* FEAT_RASv1p1 + FEAT_DoubleFault */
86
t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_2 */
87
cpu->isar.id_aa64pfr1 = t;
88
89
diff --git a/target/arm/helper.c b/target/arm/helper.c
90
index XXXXXXX..XXXXXXX 100644
91
--- a/target/arm/helper.c
92
+++ b/target/arm/helper.c
93
@@ -XXX,XX +XXX,XX @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
94
if (cpu_isar_feature(aa64_scxtnum, cpu)) {
95
valid_mask |= SCR_ENSCXT;
96
}
97
+ if (cpu_isar_feature(aa64_doublefault, cpu)) {
98
+ valid_mask |= SCR_EASE | SCR_NMEA;
99
+ }
100
} else {
101
valid_mask &= ~(SCR_RW | SCR_ST);
102
if (cpu_isar_feature(aa32_ras, cpu)) {
103
@@ -XXX,XX +XXX,XX @@ static uint32_t cpsr_read_for_spsr_elx(CPUARMState *env)
104
return ret;
105
}
106
107
+static bool syndrome_is_sync_extabt(uint32_t syndrome)
108
+{
109
+ /* Return true if this syndrome value is a synchronous external abort */
110
+ switch (syn_get_ec(syndrome)) {
111
+ case EC_INSNABORT:
112
+ case EC_INSNABORT_SAME_EL:
113
+ case EC_DATAABORT:
114
+ case EC_DATAABORT_SAME_EL:
115
+ /* Look at fault status code for all the synchronous ext abort cases */
116
+ switch (syndrome & 0x3f) {
117
+ case 0x10:
118
+ case 0x13:
119
+ case 0x14:
120
+ case 0x15:
121
+ case 0x16:
122
+ case 0x17:
123
+ return true;
124
+ default:
125
+ return false;
126
+ }
127
+ default:
128
+ return false;
129
+ }
130
+}
131
+
132
/* Handle exception entry to a target EL which is using AArch64 */
133
static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
134
{
135
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
136
switch (cs->exception_index) {
137
case EXCP_PREFETCH_ABORT:
138
case EXCP_DATA_ABORT:
139
+ /*
140
+ * FEAT_DoubleFault allows synchronous external aborts taken to EL3
141
+ * to be taken to the SError vector entrypoint.
142
+ */
143
+ if (new_el == 3 && (env->cp15.scr_el3 & SCR_EASE) &&
144
+ syndrome_is_sync_extabt(env->exception.syndrome)) {
145
+ addr += 0x180;
146
+ }
147
env->cp15.far_el[new_el] = env->exception.vaddress;
148
qemu_log_mask(CPU_LOG_INT, "...with FAR 0x%" PRIx64 "\n",
149
env->cp15.far_el[new_el]);
150
--
35
--
151
2.25.1
36
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Move ARMFPStatusFlavour to cpu.h with which to index
4
this array. For now, place the array in an anonymous
5
union with the existing structures. Adjust the order
6
of the existing structures to match the enum.
7
8
Simplify fpstatus_ptr() using the new array.
9
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220604040607.269301-24-richard.henderson@linaro.org
11
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
Message-id: 20250129013857.135256-7-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
14
---
8
target/arm/ptw.h | 1 -
15
target/arm/cpu.h | 119 +++++++++++++++++++++----------------
9
target/arm/helper.c | 24 ------------------------
16
target/arm/tcg/translate.h | 64 +-------------------
10
target/arm/ptw.c | 22 ++++++++++++++++++++++
17
2 files changed, 70 insertions(+), 113 deletions(-)
11
3 files changed, 22 insertions(+), 25 deletions(-)
18
12
19
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
13
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
14
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/ptw.h
21
--- a/target/arm/cpu.h
16
+++ b/target/arm/ptw.h
22
+++ b/target/arm/cpu.h
17
@@ -XXX,XX +XXX,XX @@
23
@@ -XXX,XX +XXX,XX @@ typedef struct ARMMMUFaultInfo ARMMMUFaultInfo;
18
24
19
#ifndef CONFIG_USER_ONLY
25
typedef struct NVICState NVICState;
20
26
21
-bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx);
27
+/*
22
bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx);
28
+ * Enum for indexing vfp.fp_status[].
23
uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn);
29
+ *
24
30
+ * FPST_A32: is the "normal" fp status for AArch32 insns
25
diff --git a/target/arm/helper.c b/target/arm/helper.c
31
+ * FPST_A64: is the "normal" fp status for AArch64 insns
32
+ * FPST_A32_F16: used for AArch32 half-precision calculations
33
+ * FPST_A64_F16: used for AArch64 half-precision calculations
34
+ * FPST_STD: the ARM "Standard FPSCR Value"
35
+ * FPST_STD_F16: used for half-precision
36
+ * calculations with the ARM "Standard FPSCR Value"
37
+ * FPST_AH: used for the A64 insns which change behaviour
38
+ * when FPCR.AH == 1 (bfloat16 conversions and multiplies,
39
+ * and the reciprocal and square root estimate/step insns)
40
+ * FPST_AH_F16: used for the A64 insns which change behaviour
41
+ * when FPCR.AH == 1 (bfloat16 conversions and multiplies,
42
+ * and the reciprocal and square root estimate/step insns);
43
+ * for half-precision
44
+ *
45
+ * Half-precision operations are governed by a separate
46
+ * flush-to-zero control bit in FPSCR:FZ16. We pass a separate
47
+ * status structure to control this.
48
+ *
49
+ * The "Standard FPSCR", ie default-NaN, flush-to-zero,
50
+ * round-to-nearest and is used by any operations (generally
51
+ * Neon) which the architecture defines as controlled by the
52
+ * standard FPSCR value rather than the FPSCR.
53
+ *
54
+ * The "standard FPSCR but for fp16 ops" is needed because
55
+ * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than
56
+ * using a fixed value for it.
57
+ *
58
+ * The ah_fp_status is needed because some insns have different
59
+ * behaviour when FPCR.AH == 1: they don't update cumulative
60
+ * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and
61
+ * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16,
62
+ * which means we need an ah_fp_status_f16 as well.
63
+ *
64
+ * To avoid having to transfer exception bits around, we simply
65
+ * say that the FPSCR cumulative exception flags are the logical
66
+ * OR of the flags in the four fp statuses. This relies on the
67
+ * only thing which needs to read the exception flags being
68
+ * an explicit FPSCR read.
69
+ */
70
+typedef enum ARMFPStatusFlavour {
71
+ FPST_A32,
72
+ FPST_A64,
73
+ FPST_A32_F16,
74
+ FPST_A64_F16,
75
+ FPST_AH,
76
+ FPST_AH_F16,
77
+ FPST_STD,
78
+ FPST_STD_F16,
79
+} ARMFPStatusFlavour;
80
+#define FPST_COUNT 8
81
+
82
typedef struct CPUArchState {
83
/* Regs for current mode. */
84
uint32_t regs[16];
85
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
86
/* Scratch space for aa32 neon expansion. */
87
uint32_t scratch[8];
88
89
- /* There are a number of distinct float control structures:
90
- *
91
- * fp_status_a32: is the "normal" fp status for AArch32 insns
92
- * fp_status_a64: is the "normal" fp status for AArch64 insns
93
- * fp_status_fp16_a32: used for AArch32 half-precision calculations
94
- * fp_status_fp16_a64: used for AArch64 half-precision calculations
95
- * standard_fp_status : the ARM "Standard FPSCR Value"
96
- * standard_fp_status_fp16 : used for half-precision
97
- * calculations with the ARM "Standard FPSCR Value"
98
- * ah_fp_status: used for the A64 insns which change behaviour
99
- * when FPCR.AH == 1 (bfloat16 conversions and multiplies,
100
- * and the reciprocal and square root estimate/step insns)
101
- * ah_fp_status_f16: used for the A64 insns which change behaviour
102
- * when FPCR.AH == 1 (bfloat16 conversions and multiplies,
103
- * and the reciprocal and square root estimate/step insns);
104
- * for half-precision
105
- *
106
- * Half-precision operations are governed by a separate
107
- * flush-to-zero control bit in FPSCR:FZ16. We pass a separate
108
- * status structure to control this.
109
- *
110
- * The "Standard FPSCR", ie default-NaN, flush-to-zero,
111
- * round-to-nearest and is used by any operations (generally
112
- * Neon) which the architecture defines as controlled by the
113
- * standard FPSCR value rather than the FPSCR.
114
- *
115
- * The "standard FPSCR but for fp16 ops" is needed because
116
- * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than
117
- * using a fixed value for it.
118
- *
119
- * The ah_fp_status is needed because some insns have different
120
- * behaviour when FPCR.AH == 1: they don't update cumulative
121
- * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and
122
- * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16,
123
- * which means we need an ah_fp_status_f16 as well.
124
- *
125
- * To avoid having to transfer exception bits around, we simply
126
- * say that the FPSCR cumulative exception flags are the logical
127
- * OR of the flags in the four fp statuses. This relies on the
128
- * only thing which needs to read the exception flags being
129
- * an explicit FPSCR read.
130
- */
131
- float_status fp_status_a32;
132
- float_status fp_status_a64;
133
- float_status fp_status_f16_a32;
134
- float_status fp_status_f16_a64;
135
- float_status standard_fp_status;
136
- float_status standard_fp_status_f16;
137
- float_status ah_fp_status;
138
- float_status ah_fp_status_f16;
139
+ /* There are a number of distinct float control structures. */
140
+ union {
141
+ float_status fp_status[FPST_COUNT];
142
+ struct {
143
+ float_status fp_status_a32;
144
+ float_status fp_status_a64;
145
+ float_status fp_status_f16_a32;
146
+ float_status fp_status_f16_a64;
147
+ float_status ah_fp_status;
148
+ float_status ah_fp_status_f16;
149
+ float_status standard_fp_status;
150
+ float_status standard_fp_status_f16;
151
+ };
152
+ };
153
154
uint64_t zcr_el[4]; /* ZCR_EL[1-3] */
155
uint64_t smcr_el[4]; /* SMCR_EL[1-3] */
156
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
26
index XXXXXXX..XXXXXXX 100644
157
index XXXXXXX..XXXXXXX 100644
27
--- a/target/arm/helper.c
158
--- a/target/arm/tcg/translate.h
28
+++ b/target/arm/helper.c
159
+++ b/target/arm/tcg/translate.h
29
@@ -XXX,XX +XXX,XX @@ ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx)
160
@@ -XXX,XX +XXX,XX @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb)
161
return (CPUARMTBFlags){ tb->flags, tb->cs_base };
30
}
162
}
31
#endif /* !CONFIG_USER_ONLY */
163
32
164
-/*
33
-#ifndef CONFIG_USER_ONLY
165
- * Enum for argument to fpstatus_ptr().
34
-bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx)
166
- */
35
-{
167
-typedef enum ARMFPStatusFlavour {
36
- switch (mmu_idx) {
168
- FPST_A32,
37
- case ARMMMUIdx_SE10_0:
169
- FPST_A64,
38
- case ARMMMUIdx_E20_0:
170
- FPST_A32_F16,
39
- case ARMMMUIdx_SE20_0:
171
- FPST_A64_F16,
40
- case ARMMMUIdx_Stage1_E0:
172
- FPST_AH,
41
- case ARMMMUIdx_Stage1_SE0:
173
- FPST_AH_F16,
42
- case ARMMMUIdx_MUser:
174
- FPST_STD,
43
- case ARMMMUIdx_MSUser:
175
- FPST_STD_F16,
44
- case ARMMMUIdx_MUserNegPri:
176
-} ARMFPStatusFlavour;
45
- case ARMMMUIdx_MSUserNegPri:
177
-
46
- return true;
178
/**
179
* fpstatus_ptr: return TCGv_ptr to the specified fp_status field
180
*
181
* We have multiple softfloat float_status fields in the Arm CPU state struct
182
* (see the comment in cpu.h for details). Return a TCGv_ptr which has
183
* been set up to point to the requested field in the CPU state struct.
184
- * The options are:
185
- *
186
- * FPST_A32
187
- * for AArch32 non-FP16 operations controlled by the FPCR
188
- * FPST_A64
189
- * for AArch64 non-FP16 operations controlled by the FPCR
190
- * FPST_A32_F16
191
- * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used
192
- * FPST_A64_F16
193
- * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used
194
- * FPST_AH:
195
- * for AArch64 operations which change behaviour when AH=1 (specifically,
196
- * bfloat16 conversions and multiplies, and the reciprocal and square root
197
- * estimate/step insns)
198
- * FPST_AH_F16:
199
- * ditto, but for half-precision operations
200
- * FPST_STD
201
- * for A32/T32 Neon operations using the "standard FPSCR value"
202
- * FPST_STD_F16
203
- * as FPST_STD, but where FPCR.FZ16 is to be used
204
*/
205
static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
206
{
207
TCGv_ptr statusptr = tcg_temp_new_ptr();
208
- int offset;
209
+ int offset = offsetof(CPUARMState, vfp.fp_status[flavour]);
210
211
- switch (flavour) {
212
- case FPST_A32:
213
- offset = offsetof(CPUARMState, vfp.fp_status_a32);
214
- break;
215
- case FPST_A64:
216
- offset = offsetof(CPUARMState, vfp.fp_status_a64);
217
- break;
218
- case FPST_A32_F16:
219
- offset = offsetof(CPUARMState, vfp.fp_status_f16_a32);
220
- break;
221
- case FPST_A64_F16:
222
- offset = offsetof(CPUARMState, vfp.fp_status_f16_a64);
223
- break;
224
- case FPST_AH:
225
- offset = offsetof(CPUARMState, vfp.ah_fp_status);
226
- break;
227
- case FPST_AH_F16:
228
- offset = offsetof(CPUARMState, vfp.ah_fp_status_f16);
229
- break;
230
- case FPST_STD:
231
- offset = offsetof(CPUARMState, vfp.standard_fp_status);
232
- break;
233
- case FPST_STD_F16:
234
- offset = offsetof(CPUARMState, vfp.standard_fp_status_f16);
235
- break;
47
- default:
236
- default:
48
- return false;
49
- case ARMMMUIdx_E10_0:
50
- case ARMMMUIdx_E10_1:
51
- case ARMMMUIdx_E10_1_PAN:
52
- g_assert_not_reached();
237
- g_assert_not_reached();
53
- }
238
- }
54
-}
239
tcg_gen_addi_ptr(statusptr, tcg_env, offset);
55
-#endif /* !CONFIG_USER_ONLY */
240
return statusptr;
56
-
57
int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx)
58
{
59
if (regime_has_2_ranges(mmu_idx)) {
60
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
61
index XXXXXXX..XXXXXXX 100644
62
--- a/target/arm/ptw.c
63
+++ b/target/arm/ptw.c
64
@@ -XXX,XX +XXX,XX @@ static bool regime_translation_big_endian(CPUARMState *env, ARMMMUIdx mmu_idx)
65
return (regime_sctlr(env, mmu_idx) & SCTLR_EE) != 0;
66
}
241
}
67
68
+static bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx)
69
+{
70
+ switch (mmu_idx) {
71
+ case ARMMMUIdx_SE10_0:
72
+ case ARMMMUIdx_E20_0:
73
+ case ARMMMUIdx_SE20_0:
74
+ case ARMMMUIdx_Stage1_E0:
75
+ case ARMMMUIdx_Stage1_SE0:
76
+ case ARMMMUIdx_MUser:
77
+ case ARMMMUIdx_MSUser:
78
+ case ARMMMUIdx_MUserNegPri:
79
+ case ARMMMUIdx_MSUserNegPri:
80
+ return true;
81
+ default:
82
+ return false;
83
+ case ARMMMUIdx_E10_0:
84
+ case ARMMMUIdx_E10_1:
85
+ case ARMMMUIdx_E10_1_PAN:
86
+ g_assert_not_reached();
87
+ }
88
+}
89
+
90
static bool ptw_attrs_are_device(CPUARMState *env, ARMCacheAttrs cacheattrs)
91
{
92
/*
93
--
242
--
94
2.25.1
243
2.34.1
244
245
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This will be used for both Normal and Streaming SVE, and the value
3
Replace with fp_status[FPST_STD_F16].
4
does not necessarily come from ZCR_ELx. While we're at it, emphasize
5
the units in which the value is returned.
6
4
7
Patch produced by
8
git grep -l sve_zcr_len_for_el | \
9
xargs -n1 sed -i 's/sve_zcr_len_for_el/sve_vqm1_for_el/g'
10
11
and then adding a function comment.
12
13
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
Message-id: 20220607203306.657998-13-richard.henderson@linaro.org
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Message-id: 20250129013857.135256-8-richard.henderson@linaro.org
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
---
9
---
18
target/arm/cpu.h | 11 ++++++++++-
10
target/arm/cpu.h | 1 -
19
target/arm/arch_dump.c | 2 +-
11
target/arm/cpu.c | 4 ++--
20
target/arm/cpu.c | 2 +-
12
target/arm/tcg/mve_helper.c | 24 ++++++++++++------------
21
target/arm/gdbstub64.c | 2 +-
13
target/arm/vfp_helper.c | 8 ++++----
22
target/arm/helper.c | 12 ++++++------
14
4 files changed, 18 insertions(+), 19 deletions(-)
23
5 files changed, 19 insertions(+), 10 deletions(-)
24
15
25
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
16
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
26
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
27
--- a/target/arm/cpu.h
18
--- a/target/arm/cpu.h
28
+++ b/target/arm/cpu.h
19
+++ b/target/arm/cpu.h
29
@@ -XXX,XX +XXX,XX @@ void aarch64_sync_64_to_32(CPUARMState *env);
20
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
30
21
float_status ah_fp_status;
31
int fp_exception_el(CPUARMState *env, int cur_el);
22
float_status ah_fp_status_f16;
32
int sve_exception_el(CPUARMState *env, int cur_el);
23
float_status standard_fp_status;
33
-uint32_t sve_zcr_len_for_el(CPUARMState *env, int el);
24
- float_status standard_fp_status_f16;
34
+
25
};
35
+/**
26
};
36
+ * sve_vqm1_for_el:
27
37
+ * @env: CPUARMState
38
+ * @el: exception level
39
+ *
40
+ * Compute the current SVE vector length for @el, in units of
41
+ * Quadwords Minus 1 -- the same scale used for ZCR_ELx.LEN.
42
+ */
43
+uint32_t sve_vqm1_for_el(CPUARMState *env, int el);
44
45
static inline bool is_a64(CPUARMState *env)
46
{
47
diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/arch_dump.c
50
+++ b/target/arm/arch_dump.c
51
@@ -XXX,XX +XXX,XX @@ static off_t sve_fpcr_offset(uint32_t vq)
52
53
static uint32_t sve_current_vq(CPUARMState *env)
54
{
55
- return sve_zcr_len_for_el(env, arm_current_el(env)) + 1;
56
+ return sve_vqm1_for_el(env, arm_current_el(env)) + 1;
57
}
58
59
static size_t sve_size_vq(uint32_t vq)
60
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
28
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
61
index XXXXXXX..XXXXXXX 100644
29
index XXXXXXX..XXXXXXX 100644
62
--- a/target/arm/cpu.c
30
--- a/target/arm/cpu.c
63
+++ b/target/arm/cpu.c
31
+++ b/target/arm/cpu.c
64
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
32
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
65
vfp_get_fpcr(env), vfp_get_fpsr(env));
33
set_flush_to_zero(1, &env->vfp.standard_fp_status);
66
34
set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status);
67
if (cpu_isar_feature(aa64_sve, cpu) && sve_exception_el(env, el) == 0) {
35
set_default_nan_mode(1, &env->vfp.standard_fp_status);
68
- int j, zcr_len = sve_zcr_len_for_el(env, el);
36
- set_default_nan_mode(1, &env->vfp.standard_fp_status_f16);
69
+ int j, zcr_len = sve_vqm1_for_el(env, el);
37
+ set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]);
70
38
arm_set_default_fp_behaviours(&env->vfp.fp_status_a32);
71
for (i = 0; i <= FFR_PRED_NUM; i++) {
39
arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
72
bool eol;
40
arm_set_default_fp_behaviours(&env->vfp.standard_fp_status);
73
diff --git a/target/arm/gdbstub64.c b/target/arm/gdbstub64.c
41
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32);
42
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
43
- arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16);
44
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]);
45
arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status);
46
set_flush_to_zero(1, &env->vfp.ah_fp_status);
47
set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status);
48
diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c
74
index XXXXXXX..XXXXXXX 100644
49
index XXXXXXX..XXXXXXX 100644
75
--- a/target/arm/gdbstub64.c
50
--- a/target/arm/tcg/mve_helper.c
76
+++ b/target/arm/gdbstub64.c
51
+++ b/target/arm/tcg/mve_helper.c
77
@@ -XXX,XX +XXX,XX @@ int arm_gdb_get_svereg(CPUARMState *env, GByteArray *buf, int reg)
52
@@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN)
78
* We report in Vector Granules (VG) which is 64bit in a Z reg
53
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
79
* while the ZCR works in Vector Quads (VQ) which is 128bit chunks.
54
continue; \
80
*/
55
} \
81
- int vq = sve_zcr_len_for_el(env, arm_current_el(env)) + 1;
56
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
82
+ int vq = sve_vqm1_for_el(env, arm_current_el(env)) + 1;
57
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
83
return gdb_get_reg64(buf, vq * 2);
58
&env->vfp.standard_fp_status; \
59
if (!(mask & 1)) { \
60
/* We need the result but without updating flags */ \
61
@@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma)
62
r[e] = 0; \
63
continue; \
64
} \
65
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
66
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
67
&env->vfp.standard_fp_status; \
68
if (!(tm & 1)) { \
69
/* We need the result but without updating flags */ \
70
@@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub)
71
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
72
continue; \
73
} \
74
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
75
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
76
&env->vfp.standard_fp_status; \
77
if (!(mask & 1)) { \
78
/* We need the result but without updating flags */ \
79
@@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true)
80
if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \
81
continue; \
82
} \
83
- fpst0 = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
84
+ fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
85
&env->vfp.standard_fp_status; \
86
fpst1 = fpst0; \
87
if (!(mask & 1)) { \
88
@@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS)
89
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
90
continue; \
91
} \
92
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
93
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
94
&env->vfp.standard_fp_status; \
95
if (!(mask & 1)) { \
96
/* We need the result but without updating flags */ \
97
@@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul)
98
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
99
continue; \
100
} \
101
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
102
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
103
&env->vfp.standard_fp_status; \
104
if (!(mask & 1)) { \
105
/* We need the result but without updating flags */ \
106
@@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS)
107
TYPE *m = vm; \
108
TYPE ra = (TYPE)ra_in; \
109
float_status *fpst = (ESIZE == 2) ? \
110
- &env->vfp.standard_fp_status_f16 : \
111
+ &env->vfp.fp_status[FPST_STD_F16] : \
112
&env->vfp.standard_fp_status; \
113
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
114
if (mask & 1) { \
115
@@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
116
if ((mask & emask) == 0) { \
117
continue; \
118
} \
119
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
120
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
121
&env->vfp.standard_fp_status; \
122
if (!(mask & (1 << (e * ESIZE)))) { \
123
/* We need the result but without updating flags */ \
124
@@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
125
if ((mask & emask) == 0) { \
126
continue; \
127
} \
128
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
129
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
130
&env->vfp.standard_fp_status; \
131
if (!(mask & (1 << (e * ESIZE)))) { \
132
/* We need the result but without updating flags */ \
133
@@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32)
134
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
135
continue; \
136
} \
137
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
138
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
139
&env->vfp.standard_fp_status; \
140
if (!(mask & 1)) { \
141
/* We need the result but without updating flags */ \
142
@@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero)
143
float_status *fpst; \
144
float_status scratch_fpst; \
145
float_status *base_fpst = (ESIZE == 2) ? \
146
- &env->vfp.standard_fp_status_f16 : \
147
+ &env->vfp.fp_status[FPST_STD_F16] : \
148
&env->vfp.standard_fp_status; \
149
uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \
150
set_float_rounding_mode(rmode, base_fpst); \
151
@@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm)
152
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
153
continue; \
154
} \
155
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
156
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
157
&env->vfp.standard_fp_status; \
158
if (!(mask & 1)) { \
159
/* We need the result but without updating flags */ \
160
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
161
index XXXXXXX..XXXXXXX 100644
162
--- a/target/arm/vfp_helper.c
163
+++ b/target/arm/vfp_helper.c
164
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
165
/* FZ16 does not generate an input denormal exception. */
166
a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32)
167
& ~float_flag_input_denormal_flushed);
168
- a32_flags |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16)
169
+ a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16])
170
& ~float_flag_input_denormal_flushed);
171
172
a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64);
173
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
174
set_float_exception_flags(0, &env->vfp.fp_status_f16_a32);
175
set_float_exception_flags(0, &env->vfp.fp_status_f16_a64);
176
set_float_exception_flags(0, &env->vfp.standard_fp_status);
177
- set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
178
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
179
set_float_exception_flags(0, &env->vfp.ah_fp_status);
180
set_float_exception_flags(0, &env->vfp.ah_fp_status_f16);
181
}
182
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
183
bool ftz_enabled = val & FPCR_FZ16;
184
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
185
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
186
- set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
187
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
188
set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16);
189
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
190
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
191
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
192
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
193
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16);
84
}
194
}
85
default:
195
if (changed & FPCR_FZ) {
86
diff --git a/target/arm/helper.c b/target/arm/helper.c
87
index XXXXXXX..XXXXXXX 100644
88
--- a/target/arm/helper.c
89
+++ b/target/arm/helper.c
90
@@ -XXX,XX +XXX,XX @@ int sve_exception_el(CPUARMState *env, int el)
91
/*
92
* Given that SVE is enabled, return the vector length for EL.
93
*/
94
-uint32_t sve_zcr_len_for_el(CPUARMState *env, int el)
95
+uint32_t sve_vqm1_for_el(CPUARMState *env, int el)
96
{
97
ARMCPU *cpu = env_archcpu(env);
98
uint32_t len = cpu->sve_max_vq - 1;
99
@@ -XXX,XX +XXX,XX @@ static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
100
uint64_t value)
101
{
102
int cur_el = arm_current_el(env);
103
- int old_len = sve_zcr_len_for_el(env, cur_el);
104
+ int old_len = sve_vqm1_for_el(env, cur_el);
105
int new_len;
106
107
/* Bits other than [3:0] are RAZ/WI. */
108
@@ -XXX,XX +XXX,XX @@ static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
109
* Because we arrived here, we know both FP and SVE are enabled;
110
* otherwise we would have trapped access to the ZCR_ELn register.
111
*/
112
- new_len = sve_zcr_len_for_el(env, cur_el);
113
+ new_len = sve_vqm1_for_el(env, cur_el);
114
if (new_len < old_len) {
115
aarch64_sve_narrow_vq(env, new_len + 1);
116
}
117
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
118
sve_el = 0;
119
}
120
} else if (sve_el == 0) {
121
- DP_TBFLAG_A64(flags, VL, sve_zcr_len_for_el(env, el));
122
+ DP_TBFLAG_A64(flags, VL, sve_vqm1_for_el(env, el));
123
}
124
DP_TBFLAG_A64(flags, SVEEXC_EL, sve_el);
125
}
126
@@ -XXX,XX +XXX,XX @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
127
*/
128
old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
129
old_len = (old_a64 && !sve_exception_el(env, old_el)
130
- ? sve_zcr_len_for_el(env, old_el) : 0);
131
+ ? sve_vqm1_for_el(env, old_el) : 0);
132
new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
133
new_len = (new_a64 && !sve_exception_el(env, new_el)
134
- ? sve_zcr_len_for_el(env, new_el) : 0);
135
+ ? sve_vqm1_for_el(env, new_el) : 0);
136
137
/* When changing vector length, clear inaccessible state. */
138
if (new_len < old_len) {
139
--
196
--
140
2.25.1
197
2.34.1
198
199
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
With SME, the vector length does not only come from ZCR_ELx.
3
Replace with fp_status[FPST_STD].
4
Comment that this is either NVL or SVL, like the pseudocode.
4
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220607203306.657998-2-richard.henderson@linaro.org
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Message-id: 20250129013857.135256-9-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
9
---
11
target/arm/cpu.h | 3 ++-
10
target/arm/cpu.h | 1 -
12
target/arm/translate-a64.h | 2 +-
11
target/arm/cpu.c | 8 ++++----
13
target/arm/translate.h | 2 +-
12
target/arm/tcg/mve_helper.c | 28 ++++++++++++++--------------
14
target/arm/helper.c | 2 +-
13
target/arm/tcg/vec_helper.c | 4 ++--
15
target/arm/translate-a64.c | 2 +-
14
target/arm/vfp_helper.c | 4 ++--
16
target/arm/translate-sve.c | 2 +-
15
5 files changed, 22 insertions(+), 23 deletions(-)
17
6 files changed, 7 insertions(+), 6 deletions(-)
18
16
19
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
17
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
20
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/cpu.h
19
--- a/target/arm/cpu.h
22
+++ b/target/arm/cpu.h
20
+++ b/target/arm/cpu.h
23
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_M32, MVE_NO_PRED, 5, 1) /* Not cached. */
21
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
24
*/
22
float_status fp_status_f16_a64;
25
FIELD(TBFLAG_A64, TBII, 0, 2)
23
float_status ah_fp_status;
26
FIELD(TBFLAG_A64, SVEEXC_EL, 2, 2)
24
float_status ah_fp_status_f16;
27
-FIELD(TBFLAG_A64, ZCR_LEN, 4, 4)
25
- float_status standard_fp_status;
28
+/* The current vector length, either NVL or SVL. */
26
};
29
+FIELD(TBFLAG_A64, VL, 4, 4)
27
};
30
FIELD(TBFLAG_A64, PAUTH_ACTIVE, 8, 1)
28
31
FIELD(TBFLAG_A64, BT, 9, 1)
29
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
32
FIELD(TBFLAG_A64, BTYPE, 10, 2) /* Not cached. */
30
index XXXXXXX..XXXXXXX 100644
33
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
31
--- a/target/arm/cpu.c
34
index XXXXXXX..XXXXXXX 100644
32
+++ b/target/arm/cpu.c
35
--- a/target/arm/translate-a64.h
33
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
36
+++ b/target/arm/translate-a64.h
34
env->sau.ctrl = 0;
37
@@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr vec_full_reg_ptr(DisasContext *s, int regno)
35
}
38
/* Return the byte size of the "whole" vector register, VL / 8. */
36
39
static inline int vec_full_reg_size(DisasContext *s)
37
- set_flush_to_zero(1, &env->vfp.standard_fp_status);
40
{
38
- set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status);
41
- return s->sve_len;
39
- set_default_nan_mode(1, &env->vfp.standard_fp_status);
42
+ return s->vl;
40
+ set_flush_to_zero(1, &env->vfp.fp_status[FPST_STD]);
41
+ set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]);
42
+ set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]);
43
set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]);
44
arm_set_default_fp_behaviours(&env->vfp.fp_status_a32);
45
arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
46
- arm_set_default_fp_behaviours(&env->vfp.standard_fp_status);
47
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]);
48
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32);
49
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
50
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]);
51
diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/target/arm/tcg/mve_helper.c
54
+++ b/target/arm/tcg/mve_helper.c
55
@@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN)
56
continue; \
57
} \
58
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
59
- &env->vfp.standard_fp_status; \
60
+ &env->vfp.fp_status[FPST_STD]; \
61
if (!(mask & 1)) { \
62
/* We need the result but without updating flags */ \
63
scratch_fpst = *fpst; \
64
@@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma)
65
continue; \
66
} \
67
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
68
- &env->vfp.standard_fp_status; \
69
+ &env->vfp.fp_status[FPST_STD]; \
70
if (!(tm & 1)) { \
71
/* We need the result but without updating flags */ \
72
scratch_fpst = *fpst; \
73
@@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub)
74
continue; \
75
} \
76
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
77
- &env->vfp.standard_fp_status; \
78
+ &env->vfp.fp_status[FPST_STD]; \
79
if (!(mask & 1)) { \
80
/* We need the result but without updating flags */ \
81
scratch_fpst = *fpst; \
82
@@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true)
83
continue; \
84
} \
85
fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
86
- &env->vfp.standard_fp_status; \
87
+ &env->vfp.fp_status[FPST_STD]; \
88
fpst1 = fpst0; \
89
if (!(mask & 1)) { \
90
scratch_fpst = *fpst0; \
91
@@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS)
92
continue; \
93
} \
94
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
95
- &env->vfp.standard_fp_status; \
96
+ &env->vfp.fp_status[FPST_STD]; \
97
if (!(mask & 1)) { \
98
/* We need the result but without updating flags */ \
99
scratch_fpst = *fpst; \
100
@@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul)
101
continue; \
102
} \
103
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
104
- &env->vfp.standard_fp_status; \
105
+ &env->vfp.fp_status[FPST_STD]; \
106
if (!(mask & 1)) { \
107
/* We need the result but without updating flags */ \
108
scratch_fpst = *fpst; \
109
@@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS)
110
TYPE ra = (TYPE)ra_in; \
111
float_status *fpst = (ESIZE == 2) ? \
112
&env->vfp.fp_status[FPST_STD_F16] : \
113
- &env->vfp.standard_fp_status; \
114
+ &env->vfp.fp_status[FPST_STD]; \
115
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
116
if (mask & 1) { \
117
TYPE v = m[H##ESIZE(e)]; \
118
@@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
119
continue; \
120
} \
121
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
122
- &env->vfp.standard_fp_status; \
123
+ &env->vfp.fp_status[FPST_STD]; \
124
if (!(mask & (1 << (e * ESIZE)))) { \
125
/* We need the result but without updating flags */ \
126
scratch_fpst = *fpst; \
127
@@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
128
continue; \
129
} \
130
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
131
- &env->vfp.standard_fp_status; \
132
+ &env->vfp.fp_status[FPST_STD]; \
133
if (!(mask & (1 << (e * ESIZE)))) { \
134
/* We need the result but without updating flags */ \
135
scratch_fpst = *fpst; \
136
@@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32)
137
continue; \
138
} \
139
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
140
- &env->vfp.standard_fp_status; \
141
+ &env->vfp.fp_status[FPST_STD]; \
142
if (!(mask & 1)) { \
143
/* We need the result but without updating flags */ \
144
scratch_fpst = *fpst; \
145
@@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero)
146
float_status scratch_fpst; \
147
float_status *base_fpst = (ESIZE == 2) ? \
148
&env->vfp.fp_status[FPST_STD_F16] : \
149
- &env->vfp.standard_fp_status; \
150
+ &env->vfp.fp_status[FPST_STD]; \
151
uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \
152
set_float_rounding_mode(rmode, base_fpst); \
153
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
154
@@ -XXX,XX +XXX,XX @@ static void do_vcvt_sh(CPUARMState *env, void *vd, void *vm, int top)
155
unsigned e;
156
float_status *fpst;
157
float_status scratch_fpst;
158
- float_status *base_fpst = &env->vfp.standard_fp_status;
159
+ float_status *base_fpst = &env->vfp.fp_status[FPST_STD];
160
bool old_fz = get_flush_to_zero(base_fpst);
161
set_flush_to_zero(false, base_fpst);
162
for (e = 0; e < 16 / 4; e++, mask >>= 4) {
163
@@ -XXX,XX +XXX,XX @@ static void do_vcvt_hs(CPUARMState *env, void *vd, void *vm, int top)
164
unsigned e;
165
float_status *fpst;
166
float_status scratch_fpst;
167
- float_status *base_fpst = &env->vfp.standard_fp_status;
168
+ float_status *base_fpst = &env->vfp.fp_status[FPST_STD];
169
bool old_fiz = get_flush_inputs_to_zero(base_fpst);
170
set_flush_inputs_to_zero(false, base_fpst);
171
for (e = 0; e < 16 / 4; e++, mask >>= 4) {
172
@@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm)
173
continue; \
174
} \
175
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
176
- &env->vfp.standard_fp_status; \
177
+ &env->vfp.fp_status[FPST_STD]; \
178
if (!(mask & 1)) { \
179
/* We need the result but without updating flags */ \
180
scratch_fpst = *fpst; \
181
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
182
index XXXXXXX..XXXXXXX 100644
183
--- a/target/arm/tcg/vec_helper.c
184
+++ b/target/arm/tcg/vec_helper.c
185
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
186
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
187
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
188
189
- do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc,
190
+ do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
191
get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32));
43
}
192
}
44
193
45
bool disas_sve(DisasContext *, uint32_t);
194
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
46
diff --git a/target/arm/translate.h b/target/arm/translate.h
195
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
47
index XXXXXXX..XXXXXXX 100644
196
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
48
--- a/target/arm/translate.h
197
49
+++ b/target/arm/translate.h
198
- do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc,
50
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
199
+ do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
51
bool ns; /* Use non-secure CPREG bank on access */
200
get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32));
52
int fp_excp_el; /* FP exception EL or 0 if enabled */
53
int sve_excp_el; /* SVE exception EL or 0 if enabled */
54
- int sve_len; /* SVE vector length in bytes */
55
+ int vl; /* current vector length in bytes */
56
/* Flag indicating that exceptions from secure mode are routed to EL3. */
57
bool secure_routed_to_el3;
58
bool vfp_enabled; /* FP enabled via FPSCR.EN */
59
diff --git a/target/arm/helper.c b/target/arm/helper.c
60
index XXXXXXX..XXXXXXX 100644
61
--- a/target/arm/helper.c
62
+++ b/target/arm/helper.c
63
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
64
zcr_len = sve_zcr_len_for_el(env, el);
65
}
66
DP_TBFLAG_A64(flags, SVEEXC_EL, sve_el);
67
- DP_TBFLAG_A64(flags, ZCR_LEN, zcr_len);
68
+ DP_TBFLAG_A64(flags, VL, zcr_len);
69
}
70
71
sctlr = regime_sctlr(env, stage1);
72
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
73
index XXXXXXX..XXXXXXX 100644
74
--- a/target/arm/translate-a64.c
75
+++ b/target/arm/translate-a64.c
76
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
77
dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
78
dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
79
dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
80
- dc->sve_len = (EX_TBFLAG_A64(tb_flags, ZCR_LEN) + 1) * 16;
81
+ dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
82
dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
83
dc->bt = EX_TBFLAG_A64(tb_flags, BT);
84
dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
85
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
86
index XXXXXXX..XXXXXXX 100644
87
--- a/target/arm/translate-sve.c
88
+++ b/target/arm/translate-sve.c
89
@@ -XXX,XX +XXX,XX @@ static inline int pred_full_reg_offset(DisasContext *s, int regno)
90
/* Return the byte size of the whole predicate register, VL / 64. */
91
static inline int pred_full_reg_size(DisasContext *s)
92
{
93
- return s->sve_len >> 3;
94
+ return s->vl >> 3;
95
}
201
}
96
202
97
/* Round up the size of a register to a size allowed by
203
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
204
index XXXXXXX..XXXXXXX 100644
205
--- a/target/arm/vfp_helper.c
206
+++ b/target/arm/vfp_helper.c
207
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
208
uint32_t a32_flags = 0, a64_flags = 0;
209
210
a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32);
211
- a32_flags |= get_float_exception_flags(&env->vfp.standard_fp_status);
212
+ a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]);
213
/* FZ16 does not generate an input denormal exception. */
214
a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32)
215
& ~float_flag_input_denormal_flushed);
216
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
217
set_float_exception_flags(0, &env->vfp.fp_status_a64);
218
set_float_exception_flags(0, &env->vfp.fp_status_f16_a32);
219
set_float_exception_flags(0, &env->vfp.fp_status_f16_a64);
220
- set_float_exception_flags(0, &env->vfp.standard_fp_status);
221
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
222
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
223
set_float_exception_flags(0, &env->vfp.ah_fp_status);
224
set_float_exception_flags(0, &env->vfp.ah_fp_status_f16);
98
--
225
--
99
2.25.1
226
2.34.1
227
228
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This register is allocated from the existing block of id registers,
3
Replace with fp_status[FPST_AH_F16].
4
so it is already RES0 for cpus that do not implement SME.
5
4
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220607203306.657998-21-richard.henderson@linaro.org
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Message-id: 20250129013857.135256-10-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
9
---
11
target/arm/cpu.h | 25 +++++++++++++++++++++++++
10
target/arm/cpu.h | 3 +--
12
target/arm/helper.c | 4 ++--
11
target/arm/cpu.c | 2 +-
13
target/arm/kvm64.c | 11 +++++++----
12
target/arm/vfp_helper.c | 10 +++++-----
14
3 files changed, 34 insertions(+), 6 deletions(-)
13
3 files changed, 7 insertions(+), 8 deletions(-)
15
14
16
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
15
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
17
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/cpu.h
17
--- a/target/arm/cpu.h
19
+++ b/target/arm/cpu.h
18
+++ b/target/arm/cpu.h
20
@@ -XXX,XX +XXX,XX @@ struct ArchCPU {
19
@@ -XXX,XX +XXX,XX @@ typedef struct NVICState NVICState;
21
uint64_t id_aa64dfr0;
20
* behaviour when FPCR.AH == 1: they don't update cumulative
22
uint64_t id_aa64dfr1;
21
* exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and
23
uint64_t id_aa64zfr0;
22
* they ignore FPCR.RMode. But they don't ignore FPCR.FZ16,
24
+ uint64_t id_aa64smfr0;
23
- * which means we need an ah_fp_status_f16 as well.
25
uint64_t reset_pmcr_el0;
24
+ * which means we need an FPST_AH_F16 as well.
26
} isar;
25
*
27
uint64_t midr;
26
* To avoid having to transfer exception bits around, we simply
28
@@ -XXX,XX +XXX,XX @@ FIELD(ID_AA64ZFR0, I8MM, 44, 4)
27
* say that the FPSCR cumulative exception flags are the logical
29
FIELD(ID_AA64ZFR0, F32MM, 52, 4)
28
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
30
FIELD(ID_AA64ZFR0, F64MM, 56, 4)
29
float_status fp_status_f16_a32;
31
30
float_status fp_status_f16_a64;
32
+FIELD(ID_AA64SMFR0, F32F32, 32, 1)
31
float_status ah_fp_status;
33
+FIELD(ID_AA64SMFR0, B16F32, 34, 1)
32
- float_status ah_fp_status_f16;
34
+FIELD(ID_AA64SMFR0, F16F32, 35, 1)
33
};
35
+FIELD(ID_AA64SMFR0, I8I32, 36, 4)
34
};
36
+FIELD(ID_AA64SMFR0, F64F64, 48, 1)
35
37
+FIELD(ID_AA64SMFR0, I16I64, 52, 4)
36
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
38
+FIELD(ID_AA64SMFR0, SMEVER, 56, 4)
37
index XXXXXXX..XXXXXXX 100644
39
+FIELD(ID_AA64SMFR0, FA64, 63, 1)
38
--- a/target/arm/cpu.c
40
+
39
+++ b/target/arm/cpu.c
41
FIELD(ID_DFR0, COPDBG, 0, 4)
40
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
42
FIELD(ID_DFR0, COPSDBG, 4, 4)
41
arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status);
43
FIELD(ID_DFR0, MMAPDBG, 8, 4)
42
set_flush_to_zero(1, &env->vfp.ah_fp_status);
44
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_sve_f64mm(const ARMISARegisters *id)
43
set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status);
45
return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, F64MM) != 0;
44
- arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status_f16);
45
+ arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]);
46
47
#ifndef CONFIG_USER_ONLY
48
if (kvm_enabled()) {
49
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/target/arm/vfp_helper.c
52
+++ b/target/arm/vfp_helper.c
53
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
54
a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
55
& ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
56
/*
57
- * We do not merge in flags from ah_fp_status or ah_fp_status_f16, because
58
+ * We do not merge in flags from ah_fp_status or FPST_AH_F16, because
59
* they are used for insns that must not set the cumulative exception bits.
60
*/
61
62
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
63
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
64
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
65
set_float_exception_flags(0, &env->vfp.ah_fp_status);
66
- set_float_exception_flags(0, &env->vfp.ah_fp_status_f16);
67
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]);
46
}
68
}
47
69
48
+static inline bool isar_feature_aa64_sme_f64f64(const ARMISARegisters *id)
70
static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env)
49
+{
71
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
50
+ return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, F64F64);
72
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
51
+}
73
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
52
+
74
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
53
+static inline bool isar_feature_aa64_sme_i16i64(const ARMISARegisters *id)
75
- set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16);
54
+{
76
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
55
+ return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, I16I64) == 0xf;
77
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
56
+}
78
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
57
+
79
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
58
+static inline bool isar_feature_aa64_sme_fa64(const ARMISARegisters *id)
80
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16);
59
+{
81
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
60
+ return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, FA64);
82
}
61
+}
83
if (changed & FPCR_FZ) {
62
+
84
bool ftz_enabled = val & FPCR_FZ;
63
/*
85
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
64
* Feature tests for "does this exist in either 32-bit or 64-bit?"
86
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
65
*/
87
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64);
66
diff --git a/target/arm/helper.c b/target/arm/helper.c
88
set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status);
67
index XXXXXXX..XXXXXXX 100644
89
- set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status_f16);
68
--- a/target/arm/helper.c
90
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]);
69
+++ b/target/arm/helper.c
91
}
70
@@ -XXX,XX +XXX,XX @@ void register_cp_regs_for_features(ARMCPU *cpu)
92
if (changed & FPCR_AH) {
71
.access = PL1_R, .type = ARM_CP_CONST,
93
bool ah_enabled = val & FPCR_AH;
72
.accessfn = access_aa64_tid3,
73
.resetvalue = cpu->isar.id_aa64zfr0 },
74
- { .name = "ID_AA64PFR5_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
75
+ { .name = "ID_AA64SMFR0_EL1", .state = ARM_CP_STATE_AA64,
76
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 5,
77
.access = PL1_R, .type = ARM_CP_CONST,
78
.accessfn = access_aa64_tid3,
79
- .resetvalue = 0 },
80
+ .resetvalue = cpu->isar.id_aa64smfr0 },
81
{ .name = "ID_AA64PFR6_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
82
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 6,
83
.access = PL1_R, .type = ARM_CP_CONST,
84
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
85
index XXXXXXX..XXXXXXX 100644
86
--- a/target/arm/kvm64.c
87
+++ b/target/arm/kvm64.c
88
@@ -XXX,XX +XXX,XX @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
89
} else {
90
err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1,
91
ARM64_SYS_REG(3, 0, 0, 4, 1));
92
+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64smfr0,
93
+ ARM64_SYS_REG(3, 0, 0, 4, 5));
94
err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0,
95
ARM64_SYS_REG(3, 0, 0, 5, 0));
96
err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1,
97
@@ -XXX,XX +XXX,XX @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
98
ahcf->isar.id_aa64pfr0 = t;
99
100
/*
101
- * Before v5.1, KVM did not support SVE and did not expose
102
- * ID_AA64ZFR0_EL1 even as RAZ. After v5.1, KVM still does
103
- * not expose the register to "user" requests like this
104
- * unless the host supports SVE.
105
+ * There is a range of kernels between kernel commit 73433762fcae
106
+ * and f81cb2c3ad41 which have a bug where the kernel doesn't expose
107
+ * SYS_ID_AA64ZFR0_EL1 via the ONE_REG API unless the VM has enabled
108
+ * SVE support, so we only read it here, rather than together with all
109
+ * the other ID registers earlier.
110
*/
111
err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64zfr0,
112
ARM64_SYS_REG(3, 0, 0, 4, 4));
113
--
94
--
114
2.25.1
95
2.34.1
96
97
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This will be used for implementing FEAT_SME.
3
Replace with fp_status[FPST_AH].
4
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220607203306.657998-20-richard.henderson@linaro.org
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Message-id: 20250129013857.135256-11-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
9
---
10
target/arm/cpu.h | 5 +++++
10
target/arm/cpu.h | 3 +--
11
1 file changed, 5 insertions(+)
11
target/arm/cpu.c | 6 +++---
12
target/arm/vfp_helper.c | 6 +++---
13
3 files changed, 7 insertions(+), 8 deletions(-)
12
14
13
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
15
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
14
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/cpu.h
17
--- a/target/arm/cpu.h
16
+++ b/target/arm/cpu.h
18
+++ b/target/arm/cpu.h
17
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_mte(const ARMISARegisters *id)
19
@@ -XXX,XX +XXX,XX @@ typedef struct NVICState NVICState;
18
return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 2;
20
* the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than
21
* using a fixed value for it.
22
*
23
- * The ah_fp_status is needed because some insns have different
24
+ * FPST_AH is needed because some insns have different
25
* behaviour when FPCR.AH == 1: they don't update cumulative
26
* exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and
27
* they ignore FPCR.RMode. But they don't ignore FPCR.FZ16,
28
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
29
float_status fp_status_a64;
30
float_status fp_status_f16_a32;
31
float_status fp_status_f16_a64;
32
- float_status ah_fp_status;
33
};
34
};
35
36
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/cpu.c
39
+++ b/target/arm/cpu.c
40
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
41
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32);
42
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
43
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]);
44
- arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status);
45
- set_flush_to_zero(1, &env->vfp.ah_fp_status);
46
- set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status);
47
+ arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]);
48
+ set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]);
49
+ set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_AH]);
50
arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]);
51
52
#ifndef CONFIG_USER_ONLY
53
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
54
index XXXXXXX..XXXXXXX 100644
55
--- a/target/arm/vfp_helper.c
56
+++ b/target/arm/vfp_helper.c
57
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
58
a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
59
& ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
60
/*
61
- * We do not merge in flags from ah_fp_status or FPST_AH_F16, because
62
+ * We do not merge in flags from FPST_AH or FPST_AH_F16, because
63
* they are used for insns that must not set the cumulative exception bits.
64
*/
65
66
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
67
set_float_exception_flags(0, &env->vfp.fp_status_f16_a64);
68
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
69
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
70
- set_float_exception_flags(0, &env->vfp.ah_fp_status);
71
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]);
72
set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]);
19
}
73
}
20
74
21
+static inline bool isar_feature_aa64_sme(const ARMISARegisters *id)
75
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
22
+{
76
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64);
23
+ return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, SME) != 0;
77
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
24
+}
78
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64);
25
+
79
- set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status);
26
static inline bool isar_feature_aa64_pmu_8_1(const ARMISARegisters *id)
80
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]);
27
{
81
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]);
28
return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 &&
82
}
83
if (changed & FPCR_AH) {
29
--
84
--
30
2.25.1
85
2.34.1
86
87
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Replace with fp_status[FPST_A64_F16].
4
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220604040607.269301-25-richard.henderson@linaro.org
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Message-id: 20250129013857.135256-12-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
9
---
8
target/arm/ptw.h | 1 -
10
target/arm/cpu.h | 1 -
9
target/arm/helper.c | 16 ----------------
11
target/arm/cpu.c | 2 +-
10
target/arm/ptw.c | 16 ++++++++++++++++
12
target/arm/tcg/sme_helper.c | 2 +-
11
3 files changed, 16 insertions(+), 17 deletions(-)
13
target/arm/tcg/vec_helper.c | 9 ++++-----
14
target/arm/vfp_helper.c | 16 ++++++++--------
15
5 files changed, 14 insertions(+), 16 deletions(-)
12
16
13
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
17
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
14
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/ptw.h
19
--- a/target/arm/cpu.h
16
+++ b/target/arm/ptw.h
20
+++ b/target/arm/cpu.h
17
@@ -XXX,XX +XXX,XX @@
21
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
18
#ifndef CONFIG_USER_ONLY
22
float_status fp_status_a32;
19
23
float_status fp_status_a64;
20
bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx);
24
float_status fp_status_f16_a32;
21
-uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn);
25
- float_status fp_status_f16_a64;
22
26
};
23
#endif /* !CONFIG_USER_ONLY */
27
};
24
#endif /* TARGET_ARM_PTW_H */
28
25
diff --git a/target/arm/helper.c b/target/arm/helper.c
29
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
26
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
27
--- a/target/arm/helper.c
31
--- a/target/arm/cpu.c
28
+++ b/target/arm/helper.c
32
+++ b/target/arm/cpu.c
29
@@ -XXX,XX +XXX,XX @@ bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx)
33
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
30
return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0;
34
arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
35
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]);
36
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32);
37
- arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
38
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
39
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]);
40
arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]);
41
set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]);
42
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/tcg/sme_helper.c
45
+++ b/target/arm/tcg/sme_helper.c
46
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
47
* produces default NaNs. We also need a second copy of fp_status with
48
* round-to-odd -- see above.
49
*/
50
- fpst_f16 = env->vfp.fp_status_f16_a64;
51
+ fpst_f16 = env->vfp.fp_status[FPST_A64_F16];
52
fpst_std = env->vfp.fp_status_a64;
53
set_default_nan_mode(true, &fpst_std);
54
set_default_nan_mode(true, &fpst_f16);
55
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/arm/tcg/vec_helper.c
58
+++ b/target/arm/tcg/vec_helper.c
59
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
60
}
61
}
62
do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc,
63
- get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64));
64
+ get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]));
31
}
65
}
32
66
33
-/* Return the TTBR associated with this translation regime */
67
void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
34
-uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn)
68
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
35
-{
69
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
36
- if (mmu_idx == ARMMMUIdx_Stage2) {
70
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
37
- return env->cp15.vttbr_el2;
71
float_status *status = &env->vfp.fp_status_a64;
38
- }
72
- bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64);
39
- if (mmu_idx == ARMMMUIdx_Stage2_S) {
73
+ bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]);
40
- return env->cp15.vsttbr_el2;
74
int negx = 0, negf = 0;
41
- }
75
42
- if (ttbrn == 0) {
76
if (is_s) {
43
- return env->cp15.ttbr0_el[regime_el(env, mmu_idx)];
77
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
44
- } else {
78
}
45
- return env->cp15.ttbr1_el[regime_el(env, mmu_idx)];
79
}
46
- }
80
do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc,
47
-}
81
- get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64));
82
+ get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]));
83
}
84
85
void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
86
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
87
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
88
intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16);
89
float_status *status = &env->vfp.fp_status_a64;
90
- bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64);
91
+ bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]);
92
int negx = 0, negf = 0;
93
94
if (is_s) {
95
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
96
negx = 0x8000;
97
}
98
}
48
-
99
-
49
/* Convert a possible stage1+2 MMU index into the appropriate
100
for (i = 0; i < oprsz; i += 16) {
50
* stage 1 MMU index
101
float16 mm_16 = *(float16 *)(vm + i + idx);
51
*/
102
float32 mm = float16_to_float32_by_bits(mm_16, fz16);
52
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
103
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
53
index XXXXXXX..XXXXXXX 100644
104
index XXXXXXX..XXXXXXX 100644
54
--- a/target/arm/ptw.c
105
--- a/target/arm/vfp_helper.c
55
+++ b/target/arm/ptw.c
106
+++ b/target/arm/vfp_helper.c
56
@@ -XXX,XX +XXX,XX @@ static bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx)
107
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
108
& ~float_flag_input_denormal_flushed);
109
110
a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64);
111
- a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
112
+ a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16])
113
& ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
114
/*
115
* We do not merge in flags from FPST_AH or FPST_AH_F16, because
116
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
117
set_float_exception_flags(0, &env->vfp.fp_status_a32);
118
set_float_exception_flags(0, &env->vfp.fp_status_a64);
119
set_float_exception_flags(0, &env->vfp.fp_status_f16_a32);
120
- set_float_exception_flags(0, &env->vfp.fp_status_f16_a64);
121
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]);
122
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
123
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
124
set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]);
125
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
126
set_float_rounding_mode(i, &env->vfp.fp_status_a32);
127
set_float_rounding_mode(i, &env->vfp.fp_status_a64);
128
set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32);
129
- set_float_rounding_mode(i, &env->vfp.fp_status_f16_a64);
130
+ set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]);
57
}
131
}
58
}
132
if (changed & FPCR_FZ16) {
59
133
bool ftz_enabled = val & FPCR_FZ16;
60
+/* Return the TTBR associated with this translation regime */
134
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
61
+static uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn)
135
- set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
62
+{
136
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
63
+ if (mmu_idx == ARMMMUIdx_Stage2) {
137
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
64
+ return env->cp15.vttbr_el2;
138
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
65
+ }
139
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
66
+ if (mmu_idx == ARMMMUIdx_Stage2_S) {
140
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
67
+ return env->cp15.vsttbr_el2;
141
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
68
+ }
142
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
69
+ if (ttbrn == 0) {
143
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
70
+ return env->cp15.ttbr0_el[regime_el(env, mmu_idx)];
144
}
71
+ } else {
145
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
72
+ return env->cp15.ttbr1_el[regime_el(env, mmu_idx)];
146
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32);
73
+ }
147
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64);
74
+}
148
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
75
+
149
- set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64);
76
static bool ptw_attrs_are_device(CPUARMState *env, ARMCacheAttrs cacheattrs)
150
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]);
77
{
151
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]);
152
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]);
153
}
154
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
155
if (ah_enabled) {
156
/* Change behaviours for A64 FP operations */
157
arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64);
158
- arm_set_ah_fp_behaviours(&env->vfp.fp_status_f16_a64);
159
+ arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
160
} else {
161
arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
162
- arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
163
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
164
}
165
}
78
/*
166
/*
79
--
167
--
80
2.25.1
168
2.34.1
169
170
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The use of ARM_CPU to recover env from cs calls
3
Replace with fp_status[FPST_A32_F16].
4
object_class_dynamic_cast, which shows up on the profile.
5
This is pointless, because all callers already have env, and
6
the reverse operation, env_cpu, is only pointer arithmetic.
7
4
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220604040607.269301-29-richard.henderson@linaro.org
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Message-id: 20250129013857.135256-13-richard.henderson@linaro.org
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
9
---
13
target/arm/ptw.c | 23 +++++++++--------------
10
target/arm/cpu.h | 1 -
14
1 file changed, 9 insertions(+), 14 deletions(-)
11
target/arm/cpu.c | 2 +-
12
target/arm/tcg/vec_helper.c | 4 ++--
13
target/arm/vfp_helper.c | 14 +++++++-------
14
4 files changed, 10 insertions(+), 11 deletions(-)
15
15
16
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
16
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
17
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/ptw.c
18
--- a/target/arm/cpu.h
19
+++ b/target/arm/ptw.c
19
+++ b/target/arm/cpu.h
20
@@ -XXX,XX +XXX,XX @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx,
20
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
21
struct {
22
float_status fp_status_a32;
23
float_status fp_status_a64;
24
- float_status fp_status_f16_a32;
25
};
26
};
27
28
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/cpu.c
31
+++ b/target/arm/cpu.c
32
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
33
arm_set_default_fp_behaviours(&env->vfp.fp_status_a32);
34
arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
35
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]);
36
- arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32);
37
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]);
38
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
39
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]);
40
arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]);
41
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
42
index XXXXXXX..XXXXXXX 100644
43
--- a/target/arm/tcg/vec_helper.c
44
+++ b/target/arm/tcg/vec_helper.c
45
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
46
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
47
48
do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
49
- get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32));
50
+ get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16]));
21
}
51
}
22
52
23
/* All loads done in the course of a page table walk go through here. */
53
void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
24
-static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure,
54
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
25
+static uint32_t arm_ldl_ptw(CPUARMState *env, hwaddr addr, bool is_secure,
55
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
26
ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi)
56
27
{
57
do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
28
- ARMCPU *cpu = ARM_CPU(cs);
58
- get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32));
29
- CPUARMState *env = &cpu->env;
59
+ get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16]));
30
+ CPUState *cs = env_cpu(env);
31
MemTxAttrs attrs = {};
32
MemTxResult result = MEMTX_OK;
33
AddressSpace *as;
34
@@ -XXX,XX +XXX,XX @@ static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure,
35
return 0;
36
}
60
}
37
61
38
-static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure,
62
void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
39
+static uint64_t arm_ldq_ptw(CPUARMState *env, hwaddr addr, bool is_secure,
63
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
40
ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi)
64
index XXXXXXX..XXXXXXX 100644
41
{
65
--- a/target/arm/vfp_helper.c
42
- ARMCPU *cpu = ARM_CPU(cs);
66
+++ b/target/arm/vfp_helper.c
43
- CPUARMState *env = &cpu->env;
67
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
44
+ CPUState *cs = env_cpu(env);
68
a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32);
45
MemTxAttrs attrs = {};
69
a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]);
46
MemTxResult result = MEMTX_OK;
70
/* FZ16 does not generate an input denormal exception. */
47
AddressSpace *as;
71
- a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32)
48
@@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
72
+ a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16])
49
target_ulong *page_size,
73
& ~float_flag_input_denormal_flushed);
50
ARMMMUFaultInfo *fi)
74
a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16])
51
{
75
& ~float_flag_input_denormal_flushed);
52
- CPUState *cs = env_cpu(env);
76
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
53
int level = 1;
77
*/
54
uint32_t table;
78
set_float_exception_flags(0, &env->vfp.fp_status_a32);
55
uint32_t desc;
79
set_float_exception_flags(0, &env->vfp.fp_status_a64);
56
@@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
80
- set_float_exception_flags(0, &env->vfp.fp_status_f16_a32);
57
fi->type = ARMFault_Translation;
81
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]);
58
goto do_fault;
82
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]);
83
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
84
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
85
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
86
}
87
set_float_rounding_mode(i, &env->vfp.fp_status_a32);
88
set_float_rounding_mode(i, &env->vfp.fp_status_a64);
89
- set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32);
90
+ set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]);
91
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]);
59
}
92
}
60
- desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
93
if (changed & FPCR_FZ16) {
61
+ desc = arm_ldl_ptw(env, table, regime_is_secure(env, mmu_idx),
94
bool ftz_enabled = val & FPCR_FZ16;
62
mmu_idx, fi);
95
- set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
63
if (fi->type != ARMFault_None) {
96
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]);
64
goto do_fault;
97
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
65
@@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
98
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
66
/* Fine pagetable. */
99
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
67
table = (desc & 0xfffff000) | ((address >> 8) & 0xffc);
100
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
68
}
101
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]);
69
- desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
102
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
70
+ desc = arm_ldl_ptw(env, table, regime_is_secure(env, mmu_idx),
103
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
71
mmu_idx, fi);
104
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
72
if (fi->type != ARMFault_None) {
105
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
73
goto do_fault;
106
bool dnan_enabled = val & FPCR_DN;
74
@@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_v6(CPUARMState *env, uint32_t address,
107
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32);
75
hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
108
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64);
76
target_ulong *page_size, ARMMMUFaultInfo *fi)
109
- set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
77
{
110
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]);
78
- CPUState *cs = env_cpu(env);
111
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]);
79
ARMCPU *cpu = env_archcpu(env);
112
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]);
80
int level = 1;
113
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]);
81
uint32_t table;
114
@@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
82
@@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_v6(CPUARMState *env, uint32_t address,
115
softfloat_to_vfp_compare(env, \
83
fi->type = ARMFault_Translation;
116
FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \
84
goto do_fault;
117
}
85
}
118
-DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16_a32)
86
- desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
119
+DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16])
87
+ desc = arm_ldl_ptw(env, table, regime_is_secure(env, mmu_idx),
120
DO_VFP_cmp(s, float32, float32, fp_status_a32)
88
mmu_idx, fi);
121
DO_VFP_cmp(d, float64, float64, fp_status_a32)
89
if (fi->type != ARMFault_None) {
122
#undef DO_VFP_cmp
90
goto do_fault;
91
@@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_v6(CPUARMState *env, uint32_t address,
92
ns = extract32(desc, 3, 1);
93
/* Lookup l2 entry. */
94
table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc);
95
- desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
96
+ desc = arm_ldl_ptw(env, table, regime_is_secure(env, mmu_idx),
97
mmu_idx, fi);
98
if (fi->type != ARMFault_None) {
99
goto do_fault;
100
@@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
101
ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
102
{
103
ARMCPU *cpu = env_archcpu(env);
104
- CPUState *cs = CPU(cpu);
105
/* Read an LPAE long-descriptor translation table. */
106
ARMFaultType fault_type = ARMFault_Translation;
107
uint32_t level;
108
@@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
109
descaddr |= (address >> (stride * (4 - level))) & indexmask;
110
descaddr &= ~7ULL;
111
nstable = extract32(tableattrs, 4, 1);
112
- descriptor = arm_ldq_ptw(cs, descaddr, !nstable, mmu_idx, fi);
113
+ descriptor = arm_ldq_ptw(env, descaddr, !nstable, mmu_idx, fi);
114
if (fi->type != ARMFault_None) {
115
goto do_fault;
116
}
117
--
123
--
118
2.25.1
124
2.34.1
125
126
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Add an interface function to extract the digested vector length
3
Replace with fp_status[FPST_A64].
4
rather than the raw zcr_el[1] value. This fixes an incorrect
5
return from do_prctl_set_vl where we didn't take into account
6
the set of vector lengths supported by the cpu.
7
4
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-id: 20220607203306.657998-3-richard.henderson@linaro.org
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Message-id: 20250129013857.135256-14-richard.henderson@linaro.org
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
9
---
13
linux-user/aarch64/target_prctl.h | 20 +++++++++++++-------
10
target/arm/cpu.h | 1 -
14
target/arm/cpu.h | 11 +++++++++++
11
target/arm/cpu.c | 2 +-
15
linux-user/aarch64/signal.c | 4 ++--
12
target/arm/tcg/sme_helper.c | 2 +-
16
3 files changed, 26 insertions(+), 9 deletions(-)
13
target/arm/tcg/vec_helper.c | 10 +++++-----
14
target/arm/vfp_helper.c | 16 ++++++++--------
15
5 files changed, 15 insertions(+), 16 deletions(-)
17
16
18
diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/linux-user/aarch64/target_prctl.h
21
+++ b/linux-user/aarch64/target_prctl.h
22
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_get_vl(CPUArchState *env)
23
{
24
ARMCPU *cpu = env_archcpu(env);
25
if (cpu_isar_feature(aa64_sve, cpu)) {
26
- return ((cpu->env.vfp.zcr_el[1] & 0xf) + 1) * 16;
27
+ return sve_vq(env) * 16;
28
}
29
return -TARGET_EINVAL;
30
}
31
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_set_vl(CPUArchState *env, abi_long arg2)
32
*/
33
if (cpu_isar_feature(aa64_sve, env_archcpu(env))
34
&& arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) {
35
- ARMCPU *cpu = env_archcpu(env);
36
uint32_t vq, old_vq;
37
38
- old_vq = (env->vfp.zcr_el[1] & 0xf) + 1;
39
- vq = MAX(arg2 / 16, 1);
40
- vq = MIN(vq, cpu->sve_max_vq);
41
+ old_vq = sve_vq(env);
42
43
+ /*
44
+ * Bound the value of arg2, so that we know that it fits into
45
+ * the 4-bit field in ZCR_EL1. Rely on the hflags rebuild to
46
+ * sort out the length supported by the cpu.
47
+ */
48
+ vq = MAX(arg2 / 16, 1);
49
+ vq = MIN(vq, ARM_MAX_VQ);
50
+ env->vfp.zcr_el[1] = vq - 1;
51
+ arm_rebuild_hflags(env);
52
+
53
+ vq = sve_vq(env);
54
if (vq < old_vq) {
55
aarch64_sve_narrow_vq(env, vq);
56
}
57
- env->vfp.zcr_el[1] = vq - 1;
58
- arm_rebuild_hflags(env);
59
return vq * 16;
60
}
61
return -TARGET_EINVAL;
62
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
17
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
63
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
64
--- a/target/arm/cpu.h
19
--- a/target/arm/cpu.h
65
+++ b/target/arm/cpu.h
20
+++ b/target/arm/cpu.h
66
@@ -XXX,XX +XXX,XX @@ static inline int cpu_mmu_index(CPUARMState *env, bool ifetch)
21
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
67
return EX_TBFLAG_ANY(env->hflags, MMUIDX);
22
float_status fp_status[FPST_COUNT];
23
struct {
24
float_status fp_status_a32;
25
- float_status fp_status_a64;
26
};
27
};
28
29
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/cpu.c
32
+++ b/target/arm/cpu.c
33
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
34
set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]);
35
set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]);
36
arm_set_default_fp_behaviours(&env->vfp.fp_status_a32);
37
- arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
38
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
39
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]);
40
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]);
41
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
42
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/tcg/sme_helper.c
45
+++ b/target/arm/tcg/sme_helper.c
46
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
47
* round-to-odd -- see above.
48
*/
49
fpst_f16 = env->vfp.fp_status[FPST_A64_F16];
50
- fpst_std = env->vfp.fp_status_a64;
51
+ fpst_std = env->vfp.fp_status[FPST_A64];
52
set_default_nan_mode(true, &fpst_std);
53
set_default_nan_mode(true, &fpst_f16);
54
fpst_odd = fpst_std;
55
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/arm/tcg/vec_helper.c
58
+++ b/target/arm/tcg/vec_helper.c
59
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
60
negx = 0x8000800080008000ull;
61
}
62
}
63
- do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc,
64
+ do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc,
65
get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]));
68
}
66
}
69
67
70
+/**
68
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
71
+ * sve_vq
69
intptr_t i, oprsz = simd_oprsz(desc);
72
+ * @env: the cpu context
70
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
73
+ *
71
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
74
+ * Return the VL cached within env->hflags, in units of quadwords.
72
- float_status *status = &env->vfp.fp_status_a64;
75
+ */
73
+ float_status *status = &env->vfp.fp_status[FPST_A64];
76
+static inline int sve_vq(CPUARMState *env)
74
bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]);
77
+{
75
int negx = 0, negf = 0;
78
+ return EX_TBFLAG_A64(env->hflags, VL) + 1;
76
79
+}
77
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
80
+
78
negx = 0x8000800080008000ull;
81
static inline bool bswap_code(bool sctlr_b)
79
}
82
{
80
}
83
#ifdef CONFIG_USER_ONLY
81
- do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc,
84
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
82
+ do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc,
83
get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]));
84
}
85
86
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
87
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
88
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
89
intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16);
90
- float_status *status = &env->vfp.fp_status_a64;
91
+ float_status *status = &env->vfp.fp_status[FPST_A64];
92
bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]);
93
int negx = 0, negf = 0;
94
95
@@ -XXX,XX +XXX,XX @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp)
96
*/
97
bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF;
98
99
- *statusp = is_a64(env) ? env->vfp.fp_status_a64 : env->vfp.fp_status_a32;
100
+ *statusp = is_a64(env) ? env->vfp.fp_status[FPST_A64] : env->vfp.fp_status_a32;
101
set_default_nan_mode(true, statusp);
102
103
if (ebf) {
104
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
85
index XXXXXXX..XXXXXXX 100644
105
index XXXXXXX..XXXXXXX 100644
86
--- a/linux-user/aarch64/signal.c
106
--- a/target/arm/vfp_helper.c
87
+++ b/linux-user/aarch64/signal.c
107
+++ b/target/arm/vfp_helper.c
88
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
108
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
89
109
a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16])
90
case TARGET_SVE_MAGIC:
110
& ~float_flag_input_denormal_flushed);
91
if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
111
92
- vq = (env->vfp.zcr_el[1] & 0xf) + 1;
112
- a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64);
93
+ vq = sve_vq(env);
113
+ a64_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A64]);
94
sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
114
a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16])
95
if (!sve && size == sve_size) {
115
& ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
96
sve = (struct target_sve_context *)ctx;
116
/*
97
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
117
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
98
118
* be the architecturally up-to-date exception flag information first.
99
/* SVE state needs saving only if it exists. */
119
*/
100
if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
120
set_float_exception_flags(0, &env->vfp.fp_status_a32);
101
- vq = (env->vfp.zcr_el[1] & 0xf) + 1;
121
- set_float_exception_flags(0, &env->vfp.fp_status_a64);
102
+ vq = sve_vq(env);
122
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]);
103
sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
123
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]);
104
sve_ofs = alloc_sigframe_space(sve_size, &layout);
124
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]);
125
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
126
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
127
break;
128
}
129
set_float_rounding_mode(i, &env->vfp.fp_status_a32);
130
- set_float_rounding_mode(i, &env->vfp.fp_status_a64);
131
+ set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]);
132
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]);
133
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]);
134
}
135
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
136
if (changed & FPCR_FZ) {
137
bool ftz_enabled = val & FPCR_FZ;
138
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
139
- set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64);
140
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]);
141
/* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */
142
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
143
}
144
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
145
*/
146
bool fitz_enabled = (val & FPCR_FIZ) ||
147
(val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ;
148
- set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status_a64);
149
+ set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_A64]);
150
}
151
if (changed & FPCR_DN) {
152
bool dnan_enabled = val & FPCR_DN;
153
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32);
154
- set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64);
155
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]);
156
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]);
157
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]);
158
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]);
159
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
160
161
if (ah_enabled) {
162
/* Change behaviours for A64 FP operations */
163
- arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64);
164
+ arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
165
arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
166
} else {
167
- arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
168
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
169
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
170
}
105
}
171
}
106
--
172
--
107
2.25.1
173
2.34.1
174
175
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
We don't need to constrain the value set in zcr_el[1],
3
Replace with fp_status[FPST_A32]. As this was the last of the
4
because it will be done by sve_zcr_len_for_el.
4
old structures, we can remove the anonymous union and struct.
5
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220607203306.657998-10-richard.henderson@linaro.org
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Message-id: 20250129013857.135256-15-richard.henderson@linaro.org
9
[PMM: tweak to account for change to is_ebf()]
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
---
11
target/arm/cpu.c | 3 +--
12
target/arm/cpu.h | 7 +------
12
1 file changed, 1 insertion(+), 2 deletions(-)
13
target/arm/cpu.c | 2 +-
14
target/arm/tcg/vec_helper.c | 2 +-
15
target/arm/vfp_helper.c | 18 +++++++++---------
16
4 files changed, 12 insertions(+), 17 deletions(-)
13
17
18
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/cpu.h
21
+++ b/target/arm/cpu.h
22
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
23
uint32_t scratch[8];
24
25
/* There are a number of distinct float control structures. */
26
- union {
27
- float_status fp_status[FPST_COUNT];
28
- struct {
29
- float_status fp_status_a32;
30
- };
31
- };
32
+ float_status fp_status[FPST_COUNT];
33
34
uint64_t zcr_el[4]; /* ZCR_EL[1-3] */
35
uint64_t smcr_el[4]; /* SMCR_EL[1-3] */
14
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
36
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
15
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/cpu.c
38
--- a/target/arm/cpu.c
17
+++ b/target/arm/cpu.c
39
+++ b/target/arm/cpu.c
18
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
40
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
19
CPACR_EL1, ZEN, 3);
41
set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]);
20
/* with reasonable vector length */
42
set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]);
21
if (cpu_isar_feature(aa64_sve, cpu)) {
43
set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]);
22
- env->vfp.zcr_el[1] =
44
- arm_set_default_fp_behaviours(&env->vfp.fp_status_a32);
23
- aarch64_sve_zcr_get_valid_len(cpu, cpu->sve_default_vq - 1);
45
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32]);
24
+ env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
46
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
47
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]);
48
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]);
49
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/target/arm/tcg/vec_helper.c
52
+++ b/target/arm/tcg/vec_helper.c
53
@@ -XXX,XX +XXX,XX @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp)
54
*/
55
bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF;
56
57
- *statusp = is_a64(env) ? env->vfp.fp_status[FPST_A64] : env->vfp.fp_status_a32;
58
+ *statusp = env->vfp.fp_status[is_a64(env) ? FPST_A64 : FPST_A32];
59
set_default_nan_mode(true, statusp);
60
61
if (ebf) {
62
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/arm/vfp_helper.c
65
+++ b/target/arm/vfp_helper.c
66
@@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
67
{
68
uint32_t a32_flags = 0, a64_flags = 0;
69
70
- a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32);
71
+ a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A32]);
72
a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]);
73
/* FZ16 does not generate an input denormal exception. */
74
a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16])
75
@@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
76
* values. The caller should have arranged for env->vfp.fpsr to
77
* be the architecturally up-to-date exception flag information first.
78
*/
79
- set_float_exception_flags(0, &env->vfp.fp_status_a32);
80
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32]);
81
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]);
82
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]);
83
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]);
84
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
85
i = float_round_to_zero;
86
break;
25
}
87
}
88
- set_float_rounding_mode(i, &env->vfp.fp_status_a32);
89
+ set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32]);
90
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]);
91
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]);
92
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]);
93
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
94
}
95
if (changed & FPCR_FZ) {
96
bool ftz_enabled = val & FPCR_FZ;
97
- set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
98
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]);
99
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]);
100
/* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */
101
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
102
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]);
103
}
104
if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) {
26
/*
105
/*
27
* Enable 48-bit address space (TODO: take reserved_va into account).
106
@@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
107
}
108
if (changed & FPCR_DN) {
109
bool dnan_enabled = val & FPCR_DN;
110
- set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32);
111
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32]);
112
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]);
113
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]);
114
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]);
115
@@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
116
FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \
117
}
118
DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16])
119
-DO_VFP_cmp(s, float32, float32, fp_status_a32)
120
-DO_VFP_cmp(d, float64, float64, fp_status_a32)
121
+DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32])
122
+DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32])
123
#undef DO_VFP_cmp
124
125
/* Integer to float and float to integer conversions */
126
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(fjcvtzs)(float64 value, float_status *status)
127
128
uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env)
129
{
130
- uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status_a32);
131
+ uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status[FPST_A32]);
132
uint32_t result = pair;
133
uint32_t z = (pair >> 32) == 0;
134
28
--
135
--
29
2.25.1
136
2.34.1
137
138
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The ARM pseudocode function NVL uses this predicate now,
3
Select on index instead of pointer.
4
and I think it's a bit clearer. Simplify the pseudocode
4
No functional change.
5
condition by noting that IsInHost is always false for EL1.
6
5
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220607203306.657998-7-richard.henderson@linaro.org
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Message-id: 20250129013857.135256-16-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
10
---
12
target/arm/helper.c | 3 +--
11
target/arm/tcg/mve_helper.c | 40 +++++++++++++------------------------
13
1 file changed, 1 insertion(+), 2 deletions(-)
12
1 file changed, 14 insertions(+), 26 deletions(-)
14
13
15
diff --git a/target/arm/helper.c b/target/arm/helper.c
14
diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c
16
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/helper.c
16
--- a/target/arm/tcg/mve_helper.c
18
+++ b/target/arm/helper.c
17
+++ b/target/arm/tcg/mve_helper.c
19
@@ -XXX,XX +XXX,XX @@ uint32_t sve_zcr_len_for_el(CPUARMState *env, int el)
18
@@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN)
20
ARMCPU *cpu = env_archcpu(env);
19
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
21
uint32_t zcr_len = cpu->sve_max_vq - 1;
20
continue; \
22
21
} \
23
- if (el <= 1 &&
22
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
24
- (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) {
23
- &env->vfp.fp_status[FPST_STD]; \
25
+ if (el <= 1 && !el_is_in_host(env, el)) {
24
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
26
zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]);
25
if (!(mask & 1)) { \
27
}
26
/* We need the result but without updating flags */ \
28
if (el <= 2 && arm_feature(env, ARM_FEATURE_EL2)) {
27
scratch_fpst = *fpst; \
28
@@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma)
29
r[e] = 0; \
30
continue; \
31
} \
32
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
33
- &env->vfp.fp_status[FPST_STD]; \
34
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
35
if (!(tm & 1)) { \
36
/* We need the result but without updating flags */ \
37
scratch_fpst = *fpst; \
38
@@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub)
39
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
40
continue; \
41
} \
42
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
43
- &env->vfp.fp_status[FPST_STD]; \
44
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
45
if (!(mask & 1)) { \
46
/* We need the result but without updating flags */ \
47
scratch_fpst = *fpst; \
48
@@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true)
49
if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \
50
continue; \
51
} \
52
- fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
53
- &env->vfp.fp_status[FPST_STD]; \
54
+ fpst0 = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
55
fpst1 = fpst0; \
56
if (!(mask & 1)) { \
57
scratch_fpst = *fpst0; \
58
@@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS)
59
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
60
continue; \
61
} \
62
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
63
- &env->vfp.fp_status[FPST_STD]; \
64
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
65
if (!(mask & 1)) { \
66
/* We need the result but without updating flags */ \
67
scratch_fpst = *fpst; \
68
@@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul)
69
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
70
continue; \
71
} \
72
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
73
- &env->vfp.fp_status[FPST_STD]; \
74
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
75
if (!(mask & 1)) { \
76
/* We need the result but without updating flags */ \
77
scratch_fpst = *fpst; \
78
@@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS)
79
unsigned e; \
80
TYPE *m = vm; \
81
TYPE ra = (TYPE)ra_in; \
82
- float_status *fpst = (ESIZE == 2) ? \
83
- &env->vfp.fp_status[FPST_STD_F16] : \
84
- &env->vfp.fp_status[FPST_STD]; \
85
+ float_status *fpst = \
86
+ &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
87
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
88
if (mask & 1) { \
89
TYPE v = m[H##ESIZE(e)]; \
90
@@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
91
if ((mask & emask) == 0) { \
92
continue; \
93
} \
94
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
95
- &env->vfp.fp_status[FPST_STD]; \
96
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
97
if (!(mask & (1 << (e * ESIZE)))) { \
98
/* We need the result but without updating flags */ \
99
scratch_fpst = *fpst; \
100
@@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
101
if ((mask & emask) == 0) { \
102
continue; \
103
} \
104
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
105
- &env->vfp.fp_status[FPST_STD]; \
106
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
107
if (!(mask & (1 << (e * ESIZE)))) { \
108
/* We need the result but without updating flags */ \
109
scratch_fpst = *fpst; \
110
@@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32)
111
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
112
continue; \
113
} \
114
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
115
- &env->vfp.fp_status[FPST_STD]; \
116
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
117
if (!(mask & 1)) { \
118
/* We need the result but without updating flags */ \
119
scratch_fpst = *fpst; \
120
@@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero)
121
unsigned e; \
122
float_status *fpst; \
123
float_status scratch_fpst; \
124
- float_status *base_fpst = (ESIZE == 2) ? \
125
- &env->vfp.fp_status[FPST_STD_F16] : \
126
- &env->vfp.fp_status[FPST_STD]; \
127
+ float_status *base_fpst = \
128
+ &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
129
uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \
130
set_float_rounding_mode(rmode, base_fpst); \
131
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
132
@@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm)
133
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
134
continue; \
135
} \
136
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
137
- &env->vfp.fp_status[FPST_STD]; \
138
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
139
if (!(mask & 1)) { \
140
/* We need the result but without updating flags */ \
141
scratch_fpst = *fpst; \
29
--
142
--
30
2.25.1
143
2.34.1
144
145
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
We handle this routing in raise_exception. Promoting the value early
3
Pass ARMFPStatusFlavour index instead of fp_status[FOO].
4
means that we can't directly compare FPEXC_EL and SVEEXC_EL.
5
4
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220607203306.657998-4-richard.henderson@linaro.org
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Message-id: 20250129013857.135256-17-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
9
---
11
target/arm/helper.c | 3 +--
10
target/arm/vfp_helper.c | 10 +++++-----
12
1 file changed, 1 insertion(+), 2 deletions(-)
11
1 file changed, 5 insertions(+), 5 deletions(-)
13
12
14
diff --git a/target/arm/helper.c b/target/arm/helper.c
13
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
15
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper.c
15
--- a/target/arm/vfp_helper.c
17
+++ b/target/arm/helper.c
16
+++ b/target/arm/vfp_helper.c
18
@@ -XXX,XX +XXX,XX @@ int sve_exception_el(CPUARMState *env, int el)
17
@@ -XXX,XX +XXX,XX @@ static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp)
19
/* fall through */
18
void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
20
case 0:
19
{ \
21
case 2:
20
softfloat_to_vfp_compare(env, \
22
- /* route_to_el2 */
21
- FLOATTYPE ## _compare_quiet(a, b, &env->vfp.FPST)); \
23
- return hcr_el2 & HCR_TGE ? 2 : 1;
22
+ FLOATTYPE ## _compare_quiet(a, b, &env->vfp.fp_status[FPST])); \
24
+ return 1;
23
} \
25
}
24
void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
26
25
{ \
27
/* Check CPACR.FPEN. */
26
softfloat_to_vfp_compare(env, \
27
- FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \
28
+ FLOATTYPE ## _compare(a, b, &env->vfp.fp_status[FPST])); \
29
}
30
-DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16])
31
-DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32])
32
-DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32])
33
+DO_VFP_cmp(h, float16, dh_ctype_f16, FPST_A32_F16)
34
+DO_VFP_cmp(s, float32, float32, FPST_A32)
35
+DO_VFP_cmp(d, float64, float64, FPST_A32)
36
#undef DO_VFP_cmp
37
38
/* Integer to float and float to integer conversions */
28
--
39
--
29
2.25.1
40
2.34.1
41
42
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Read the bit from the source, rather than from the proxy via
4
get_flush_inputs_to_zero. This makes it clear that it does
5
not matter which of the float_status structures is used.
6
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220604040607.269301-4-richard.henderson@linaro.org
8
Message-id: 20250129013857.135256-34-richard.henderson@linaro.org
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
11
---
8
target/arm/ptw.h | 15 +++--
12
target/arm/tcg/vec_helper.c | 12 ++++++------
9
target/arm/helper.c | 137 +++-----------------------------------------
13
1 file changed, 6 insertions(+), 6 deletions(-)
10
target/arm/ptw.c | 123 +++++++++++++++++++++++++++++++++++++++
11
3 files changed, 140 insertions(+), 135 deletions(-)
12
14
13
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
15
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
14
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/ptw.h
17
--- a/target/arm/tcg/vec_helper.c
16
+++ b/target/arm/ptw.h
18
+++ b/target/arm/tcg/vec_helper.c
17
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
18
20
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
19
#ifndef CONFIG_USER_ONLY
21
20
22
do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
21
+uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure,
23
- get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16]));
22
+ ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi);
24
+ env->vfp.fpcr & FPCR_FZ16);
23
+uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure,
24
+ ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi);
25
+
26
bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx);
27
bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx);
28
ARMCacheAttrs combine_cacheattrs(CPUARMState *env,
29
ARMCacheAttrs s1, ARMCacheAttrs s2);
30
31
-bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
32
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
33
- hwaddr *phys_ptr, int *prot,
34
- target_ulong *page_size,
35
- ARMMMUFaultInfo *fi);
36
+bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx,
37
+ uint32_t *table, uint32_t address);
38
+int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx,
39
+ int ap, int domain_prot);
40
+
41
bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address,
42
MMUAccessType access_type, ARMMMUIdx mmu_idx,
43
hwaddr *phys_ptr, int *prot,
44
diff --git a/target/arm/helper.c b/target/arm/helper.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/target/arm/helper.c
47
+++ b/target/arm/helper.c
48
@@ -XXX,XX +XXX,XX @@ bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx)
49
* @ap: The 3-bit access permissions (AP[2:0])
50
* @domain_prot: The 2-bit domain access permissions
51
*/
52
-static inline int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx,
53
- int ap, int domain_prot)
54
+int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap, int domain_prot)
55
{
56
bool is_user = regime_is_user(env, mmu_idx);
57
58
@@ -XXX,XX +XXX,XX @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64,
59
return prot_rw | PAGE_EXEC;
60
}
25
}
61
26
62
-static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx,
27
void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
63
- uint32_t *table, uint32_t address)
28
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
64
+bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx,
29
}
65
+ uint32_t *table, uint32_t address)
30
}
66
{
31
do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc,
67
/* Note that we can only get here for an AArch32 PL0/PL1 lookup */
32
- get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]));
68
TCR *tcr = regime_tcr(env, mmu_idx);
33
+ env->vfp.fpcr & FPCR_FZ16);
69
@@ -XXX,XX +XXX,XX @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx,
70
}
34
}
71
35
72
/* All loads done in the course of a page table walk go through here. */
36
void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
73
-static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure,
37
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
74
- ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi)
38
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
75
+uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure,
39
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
76
+ ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi)
40
float_status *status = &env->vfp.fp_status[FPST_A64];
77
{
41
- bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]);
78
ARMCPU *cpu = ARM_CPU(cs);
42
+ bool fz16 = env->vfp.fpcr & FPCR_FZ16;
79
CPUARMState *env = &cpu->env;
43
int negx = 0, negf = 0;
80
@@ -XXX,XX +XXX,XX @@ static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure,
44
81
return 0;
45
if (is_s) {
46
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
47
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
48
49
do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
50
- get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16]));
51
+ env->vfp.fpcr & FPCR_FZ16);
82
}
52
}
83
53
84
-static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure,
54
void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
85
- ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi)
55
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
86
+uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure,
56
}
87
+ ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi)
57
}
88
{
58
do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc,
89
ARMCPU *cpu = ARM_CPU(cs);
59
- get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]));
90
CPUARMState *env = &cpu->env;
60
+ env->vfp.fpcr & FPCR_FZ16);
91
@@ -XXX,XX +XXX,XX @@ static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure,
92
return 0;
93
}
61
}
94
62
95
-bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
63
void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
96
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
64
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
97
- hwaddr *phys_ptr, int *prot,
65
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
98
- target_ulong *page_size,
66
intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16);
99
- ARMMMUFaultInfo *fi)
67
float_status *status = &env->vfp.fp_status[FPST_A64];
100
-{
68
- bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]);
101
- CPUState *cs = env_cpu(env);
69
+ bool fz16 = env->vfp.fpcr & FPCR_FZ16;
102
- int level = 1;
70
int negx = 0, negf = 0;
103
- uint32_t table;
71
104
- uint32_t desc;
72
if (is_s) {
105
- int type;
106
- int ap;
107
- int domain = 0;
108
- int domain_prot;
109
- hwaddr phys_addr;
110
- uint32_t dacr;
111
-
112
- /* Pagetable walk. */
113
- /* Lookup l1 descriptor. */
114
- if (!get_level1_table_address(env, mmu_idx, &table, address)) {
115
- /* Section translation fault if page walk is disabled by PD0 or PD1 */
116
- fi->type = ARMFault_Translation;
117
- goto do_fault;
118
- }
119
- desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
120
- mmu_idx, fi);
121
- if (fi->type != ARMFault_None) {
122
- goto do_fault;
123
- }
124
- type = (desc & 3);
125
- domain = (desc >> 5) & 0x0f;
126
- if (regime_el(env, mmu_idx) == 1) {
127
- dacr = env->cp15.dacr_ns;
128
- } else {
129
- dacr = env->cp15.dacr_s;
130
- }
131
- domain_prot = (dacr >> (domain * 2)) & 3;
132
- if (type == 0) {
133
- /* Section translation fault. */
134
- fi->type = ARMFault_Translation;
135
- goto do_fault;
136
- }
137
- if (type != 2) {
138
- level = 2;
139
- }
140
- if (domain_prot == 0 || domain_prot == 2) {
141
- fi->type = ARMFault_Domain;
142
- goto do_fault;
143
- }
144
- if (type == 2) {
145
- /* 1Mb section. */
146
- phys_addr = (desc & 0xfff00000) | (address & 0x000fffff);
147
- ap = (desc >> 10) & 3;
148
- *page_size = 1024 * 1024;
149
- } else {
150
- /* Lookup l2 entry. */
151
- if (type == 1) {
152
- /* Coarse pagetable. */
153
- table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc);
154
- } else {
155
- /* Fine pagetable. */
156
- table = (desc & 0xfffff000) | ((address >> 8) & 0xffc);
157
- }
158
- desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
159
- mmu_idx, fi);
160
- if (fi->type != ARMFault_None) {
161
- goto do_fault;
162
- }
163
- switch (desc & 3) {
164
- case 0: /* Page translation fault. */
165
- fi->type = ARMFault_Translation;
166
- goto do_fault;
167
- case 1: /* 64k page. */
168
- phys_addr = (desc & 0xffff0000) | (address & 0xffff);
169
- ap = (desc >> (4 + ((address >> 13) & 6))) & 3;
170
- *page_size = 0x10000;
171
- break;
172
- case 2: /* 4k page. */
173
- phys_addr = (desc & 0xfffff000) | (address & 0xfff);
174
- ap = (desc >> (4 + ((address >> 9) & 6))) & 3;
175
- *page_size = 0x1000;
176
- break;
177
- case 3: /* 1k page, or ARMv6/XScale "extended small (4k) page" */
178
- if (type == 1) {
179
- /* ARMv6/XScale extended small page format */
180
- if (arm_feature(env, ARM_FEATURE_XSCALE)
181
- || arm_feature(env, ARM_FEATURE_V6)) {
182
- phys_addr = (desc & 0xfffff000) | (address & 0xfff);
183
- *page_size = 0x1000;
184
- } else {
185
- /* UNPREDICTABLE in ARMv5; we choose to take a
186
- * page translation fault.
187
- */
188
- fi->type = ARMFault_Translation;
189
- goto do_fault;
190
- }
191
- } else {
192
- phys_addr = (desc & 0xfffffc00) | (address & 0x3ff);
193
- *page_size = 0x400;
194
- }
195
- ap = (desc >> 4) & 3;
196
- break;
197
- default:
198
- /* Never happens, but compiler isn't smart enough to tell. */
199
- g_assert_not_reached();
200
- }
201
- }
202
- *prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot);
203
- *prot |= *prot ? PAGE_EXEC : 0;
204
- if (!(*prot & (1 << access_type))) {
205
- /* Access permission fault. */
206
- fi->type = ARMFault_Permission;
207
- goto do_fault;
208
- }
209
- *phys_ptr = phys_addr;
210
- return false;
211
-do_fault:
212
- fi->domain = domain;
213
- fi->level = level;
214
- return true;
215
-}
216
-
217
bool get_phys_addr_v6(CPUARMState *env, uint32_t address,
218
MMUAccessType access_type, ARMMMUIdx mmu_idx,
219
hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
220
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
221
index XXXXXXX..XXXXXXX 100644
222
--- a/target/arm/ptw.c
223
+++ b/target/arm/ptw.c
224
@@ -XXX,XX +XXX,XX @@
225
#include "ptw.h"
226
227
228
+static bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
229
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
230
+ hwaddr *phys_ptr, int *prot,
231
+ target_ulong *page_size,
232
+ ARMMMUFaultInfo *fi)
233
+{
234
+ CPUState *cs = env_cpu(env);
235
+ int level = 1;
236
+ uint32_t table;
237
+ uint32_t desc;
238
+ int type;
239
+ int ap;
240
+ int domain = 0;
241
+ int domain_prot;
242
+ hwaddr phys_addr;
243
+ uint32_t dacr;
244
+
245
+ /* Pagetable walk. */
246
+ /* Lookup l1 descriptor. */
247
+ if (!get_level1_table_address(env, mmu_idx, &table, address)) {
248
+ /* Section translation fault if page walk is disabled by PD0 or PD1 */
249
+ fi->type = ARMFault_Translation;
250
+ goto do_fault;
251
+ }
252
+ desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
253
+ mmu_idx, fi);
254
+ if (fi->type != ARMFault_None) {
255
+ goto do_fault;
256
+ }
257
+ type = (desc & 3);
258
+ domain = (desc >> 5) & 0x0f;
259
+ if (regime_el(env, mmu_idx) == 1) {
260
+ dacr = env->cp15.dacr_ns;
261
+ } else {
262
+ dacr = env->cp15.dacr_s;
263
+ }
264
+ domain_prot = (dacr >> (domain * 2)) & 3;
265
+ if (type == 0) {
266
+ /* Section translation fault. */
267
+ fi->type = ARMFault_Translation;
268
+ goto do_fault;
269
+ }
270
+ if (type != 2) {
271
+ level = 2;
272
+ }
273
+ if (domain_prot == 0 || domain_prot == 2) {
274
+ fi->type = ARMFault_Domain;
275
+ goto do_fault;
276
+ }
277
+ if (type == 2) {
278
+ /* 1Mb section. */
279
+ phys_addr = (desc & 0xfff00000) | (address & 0x000fffff);
280
+ ap = (desc >> 10) & 3;
281
+ *page_size = 1024 * 1024;
282
+ } else {
283
+ /* Lookup l2 entry. */
284
+ if (type == 1) {
285
+ /* Coarse pagetable. */
286
+ table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc);
287
+ } else {
288
+ /* Fine pagetable. */
289
+ table = (desc & 0xfffff000) | ((address >> 8) & 0xffc);
290
+ }
291
+ desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
292
+ mmu_idx, fi);
293
+ if (fi->type != ARMFault_None) {
294
+ goto do_fault;
295
+ }
296
+ switch (desc & 3) {
297
+ case 0: /* Page translation fault. */
298
+ fi->type = ARMFault_Translation;
299
+ goto do_fault;
300
+ case 1: /* 64k page. */
301
+ phys_addr = (desc & 0xffff0000) | (address & 0xffff);
302
+ ap = (desc >> (4 + ((address >> 13) & 6))) & 3;
303
+ *page_size = 0x10000;
304
+ break;
305
+ case 2: /* 4k page. */
306
+ phys_addr = (desc & 0xfffff000) | (address & 0xfff);
307
+ ap = (desc >> (4 + ((address >> 9) & 6))) & 3;
308
+ *page_size = 0x1000;
309
+ break;
310
+ case 3: /* 1k page, or ARMv6/XScale "extended small (4k) page" */
311
+ if (type == 1) {
312
+ /* ARMv6/XScale extended small page format */
313
+ if (arm_feature(env, ARM_FEATURE_XSCALE)
314
+ || arm_feature(env, ARM_FEATURE_V6)) {
315
+ phys_addr = (desc & 0xfffff000) | (address & 0xfff);
316
+ *page_size = 0x1000;
317
+ } else {
318
+ /*
319
+ * UNPREDICTABLE in ARMv5; we choose to take a
320
+ * page translation fault.
321
+ */
322
+ fi->type = ARMFault_Translation;
323
+ goto do_fault;
324
+ }
325
+ } else {
326
+ phys_addr = (desc & 0xfffffc00) | (address & 0x3ff);
327
+ *page_size = 0x400;
328
+ }
329
+ ap = (desc >> 4) & 3;
330
+ break;
331
+ default:
332
+ /* Never happens, but compiler isn't smart enough to tell. */
333
+ g_assert_not_reached();
334
+ }
335
+ }
336
+ *prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot);
337
+ *prot |= *prot ? PAGE_EXEC : 0;
338
+ if (!(*prot & (1 << access_type))) {
339
+ /* Access permission fault. */
340
+ fi->type = ARMFault_Permission;
341
+ goto do_fault;
342
+ }
343
+ *phys_ptr = phys_addr;
344
+ return false;
345
+do_fault:
346
+ fi->domain = domain;
347
+ fi->level = level;
348
+ return true;
349
+}
350
+
351
/**
352
* get_phys_addr - get the physical address for this virtual address
353
*
354
--
73
--
355
2.25.1
74
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Sink common code from the callers into do_fmlal
4
and do_fmlal_idx. Reorder the arguments to minimize
5
the re-sorting from the caller's arguments.
6
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220604040607.269301-8-richard.henderson@linaro.org
8
Message-id: 20250129013857.135256-35-richard.henderson@linaro.org
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
11
---
8
target/arm/ptw.h | 10 +--
12
target/arm/tcg/vec_helper.c | 28 ++++++++++++++++------------
9
target/arm/helper.c | 194 +-------------------------------------------
13
1 file changed, 16 insertions(+), 12 deletions(-)
10
target/arm/ptw.c | 190 +++++++++++++++++++++++++++++++++++++++++++
11
3 files changed, 198 insertions(+), 196 deletions(-)
12
14
13
diff --git a/target/arm/ptw.h b/target/arm/ptw.h
15
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
14
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/ptw.h
17
--- a/target/arm/tcg/vec_helper.c
16
+++ b/target/arm/ptw.h
18
+++ b/target/arm/tcg/vec_helper.c
17
@@ -XXX,XX +XXX,XX @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap)
19
@@ -XXX,XX +XXX,XX @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2)
18
return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx));
20
* as there is not yet SVE versions that might use blocking.
21
*/
22
23
-static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst,
24
- uint64_t negx, int negf, uint32_t desc, bool fz16)
25
+static void do_fmlal(float32 *d, void *vn, void *vm,
26
+ CPUARMState *env, uint32_t desc,
27
+ ARMFPStatusFlavour fpst_idx,
28
+ uint64_t negx, int negf)
29
{
30
+ float_status *fpst = &env->vfp.fp_status[fpst_idx];
31
+ bool fz16 = env->vfp.fpcr & FPCR_FZ16;
32
intptr_t i, oprsz = simd_oprsz(desc);
33
int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
34
int is_q = oprsz == 16;
35
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
36
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
37
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
38
39
- do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
40
- env->vfp.fpcr & FPCR_FZ16);
41
+ do_fmlal(vd, vn, vm, env, desc, FPST_STD, negx, 0);
19
}
42
}
20
43
21
+bool m_is_ppb_region(CPUARMState *env, uint32_t address);
44
void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
22
+bool m_is_system_region(CPUARMState *env, uint32_t address);
45
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
23
+
46
negx = 0x8000800080008000ull;
24
void get_phys_addr_pmsav7_default(CPUARMState *env,
47
}
25
ARMMMUIdx mmu_idx,
48
}
26
int32_t address, int *prot);
49
- do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc,
27
-bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address,
50
- env->vfp.fpcr & FPCR_FZ16);
28
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
51
+ do_fmlal(vd, vn, vm, env, desc, FPST_A64, negx, negf);
29
- hwaddr *phys_ptr, int *prot,
30
- target_ulong *page_size,
31
- ARMMMUFaultInfo *fi);
32
+bool pmsav7_use_background_region(ARMCPU *cpu, ARMMMUIdx mmu_idx, bool is_user);
33
+
34
bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address,
35
MMUAccessType access_type, ARMMMUIdx mmu_idx,
36
hwaddr *phys_ptr, MemTxAttrs *txattrs,
37
diff --git a/target/arm/helper.c b/target/arm/helper.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/target/arm/helper.c
40
+++ b/target/arm/helper.c
41
@@ -XXX,XX +XXX,XX @@ do_fault:
42
return true;
43
}
52
}
44
53
45
-static bool pmsav7_use_background_region(ARMCPU *cpu,
54
void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
46
- ARMMMUIdx mmu_idx, bool is_user)
55
@@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
47
+bool pmsav7_use_background_region(ARMCPU *cpu, ARMMMUIdx mmu_idx, bool is_user)
48
{
49
/* Return true if we should use the default memory map as a
50
* "background" region if there are no hits against any MPU regions.
51
@@ -XXX,XX +XXX,XX @@ static bool pmsav7_use_background_region(ARMCPU *cpu,
52
}
56
}
53
}
57
}
54
58
55
-static inline bool m_is_ppb_region(CPUARMState *env, uint32_t address)
59
-static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst,
56
+bool m_is_ppb_region(CPUARMState *env, uint32_t address)
60
- uint64_t negx, int negf, uint32_t desc, bool fz16)
61
+static void do_fmlal_idx(float32 *d, void *vn, void *vm,
62
+ CPUARMState *env, uint32_t desc,
63
+ ARMFPStatusFlavour fpst_idx,
64
+ uint64_t negx, int negf)
57
{
65
{
58
/* True if address is in the M profile PPB region 0xe0000000 - 0xe00fffff */
66
+ float_status *fpst = &env->vfp.fp_status[fpst_idx];
59
return arm_feature(env, ARM_FEATURE_M) &&
67
+ bool fz16 = env->vfp.fpcr & FPCR_FZ16;
60
extract32(address, 20, 12) == 0xe00;
68
intptr_t i, oprsz = simd_oprsz(desc);
69
int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
70
int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3);
71
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
72
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
73
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
74
75
- do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
76
- env->vfp.fpcr & FPCR_FZ16);
77
+ do_fmlal_idx(vd, vn, vm, env, desc, FPST_STD, negx, 0);
61
}
78
}
62
79
63
-static inline bool m_is_system_region(CPUARMState *env, uint32_t address)
80
void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
64
+bool m_is_system_region(CPUARMState *env, uint32_t address)
81
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
65
{
82
negx = 0x8000800080008000ull;
66
/* True if address is in the M profile system region
83
}
67
* 0xe0000000 - 0xffffffff
84
}
68
@@ -XXX,XX +XXX,XX @@ static inline bool m_is_system_region(CPUARMState *env, uint32_t address)
85
- do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc,
69
return arm_feature(env, ARM_FEATURE_M) && extract32(address, 29, 3) == 0x7;
86
- env->vfp.fpcr & FPCR_FZ16);
87
+ do_fmlal_idx(vd, vn, vm, env, desc, FPST_A64, negx, negf);
70
}
88
}
71
89
72
-bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address,
90
void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
73
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
74
- hwaddr *phys_ptr, int *prot,
75
- target_ulong *page_size,
76
- ARMMMUFaultInfo *fi)
77
-{
78
- ARMCPU *cpu = env_archcpu(env);
79
- int n;
80
- bool is_user = regime_is_user(env, mmu_idx);
81
-
82
- *phys_ptr = address;
83
- *page_size = TARGET_PAGE_SIZE;
84
- *prot = 0;
85
-
86
- if (regime_translation_disabled(env, mmu_idx) ||
87
- m_is_ppb_region(env, address)) {
88
- /* MPU disabled or M profile PPB access: use default memory map.
89
- * The other case which uses the default memory map in the
90
- * v7M ARM ARM pseudocode is exception vector reads from the vector
91
- * table. In QEMU those accesses are done in arm_v7m_load_vector(),
92
- * which always does a direct read using address_space_ldl(), rather
93
- * than going via this function, so we don't need to check that here.
94
- */
95
- get_phys_addr_pmsav7_default(env, mmu_idx, address, prot);
96
- } else { /* MPU enabled */
97
- for (n = (int)cpu->pmsav7_dregion - 1; n >= 0; n--) {
98
- /* region search */
99
- uint32_t base = env->pmsav7.drbar[n];
100
- uint32_t rsize = extract32(env->pmsav7.drsr[n], 1, 5);
101
- uint32_t rmask;
102
- bool srdis = false;
103
-
104
- if (!(env->pmsav7.drsr[n] & 0x1)) {
105
- continue;
106
- }
107
-
108
- if (!rsize) {
109
- qemu_log_mask(LOG_GUEST_ERROR,
110
- "DRSR[%d]: Rsize field cannot be 0\n", n);
111
- continue;
112
- }
113
- rsize++;
114
- rmask = (1ull << rsize) - 1;
115
-
116
- if (base & rmask) {
117
- qemu_log_mask(LOG_GUEST_ERROR,
118
- "DRBAR[%d]: 0x%" PRIx32 " misaligned "
119
- "to DRSR region size, mask = 0x%" PRIx32 "\n",
120
- n, base, rmask);
121
- continue;
122
- }
123
-
124
- if (address < base || address > base + rmask) {
125
- /*
126
- * Address not in this region. We must check whether the
127
- * region covers addresses in the same page as our address.
128
- * In that case we must not report a size that covers the
129
- * whole page for a subsequent hit against a different MPU
130
- * region or the background region, because it would result in
131
- * incorrect TLB hits for subsequent accesses to addresses that
132
- * are in this MPU region.
133
- */
134
- if (ranges_overlap(base, rmask,
135
- address & TARGET_PAGE_MASK,
136
- TARGET_PAGE_SIZE)) {
137
- *page_size = 1;
138
- }
139
- continue;
140
- }
141
-
142
- /* Region matched */
143
-
144
- if (rsize >= 8) { /* no subregions for regions < 256 bytes */
145
- int i, snd;
146
- uint32_t srdis_mask;
147
-
148
- rsize -= 3; /* sub region size (power of 2) */
149
- snd = ((address - base) >> rsize) & 0x7;
150
- srdis = extract32(env->pmsav7.drsr[n], snd + 8, 1);
151
-
152
- srdis_mask = srdis ? 0x3 : 0x0;
153
- for (i = 2; i <= 8 && rsize < TARGET_PAGE_BITS; i *= 2) {
154
- /* This will check in groups of 2, 4 and then 8, whether
155
- * the subregion bits are consistent. rsize is incremented
156
- * back up to give the region size, considering consistent
157
- * adjacent subregions as one region. Stop testing if rsize
158
- * is already big enough for an entire QEMU page.
159
- */
160
- int snd_rounded = snd & ~(i - 1);
161
- uint32_t srdis_multi = extract32(env->pmsav7.drsr[n],
162
- snd_rounded + 8, i);
163
- if (srdis_mask ^ srdis_multi) {
164
- break;
165
- }
166
- srdis_mask = (srdis_mask << i) | srdis_mask;
167
- rsize++;
168
- }
169
- }
170
- if (srdis) {
171
- continue;
172
- }
173
- if (rsize < TARGET_PAGE_BITS) {
174
- *page_size = 1 << rsize;
175
- }
176
- break;
177
- }
178
-
179
- if (n == -1) { /* no hits */
180
- if (!pmsav7_use_background_region(cpu, mmu_idx, is_user)) {
181
- /* background fault */
182
- fi->type = ARMFault_Background;
183
- return true;
184
- }
185
- get_phys_addr_pmsav7_default(env, mmu_idx, address, prot);
186
- } else { /* a MPU hit! */
187
- uint32_t ap = extract32(env->pmsav7.dracr[n], 8, 3);
188
- uint32_t xn = extract32(env->pmsav7.dracr[n], 12, 1);
189
-
190
- if (m_is_system_region(env, address)) {
191
- /* System space is always execute never */
192
- xn = 1;
193
- }
194
-
195
- if (is_user) { /* User mode AP bit decoding */
196
- switch (ap) {
197
- case 0:
198
- case 1:
199
- case 5:
200
- break; /* no access */
201
- case 3:
202
- *prot |= PAGE_WRITE;
203
- /* fall through */
204
- case 2:
205
- case 6:
206
- *prot |= PAGE_READ | PAGE_EXEC;
207
- break;
208
- case 7:
209
- /* for v7M, same as 6; for R profile a reserved value */
210
- if (arm_feature(env, ARM_FEATURE_M)) {
211
- *prot |= PAGE_READ | PAGE_EXEC;
212
- break;
213
- }
214
- /* fall through */
215
- default:
216
- qemu_log_mask(LOG_GUEST_ERROR,
217
- "DRACR[%d]: Bad value for AP bits: 0x%"
218
- PRIx32 "\n", n, ap);
219
- }
220
- } else { /* Priv. mode AP bits decoding */
221
- switch (ap) {
222
- case 0:
223
- break; /* no access */
224
- case 1:
225
- case 2:
226
- case 3:
227
- *prot |= PAGE_WRITE;
228
- /* fall through */
229
- case 5:
230
- case 6:
231
- *prot |= PAGE_READ | PAGE_EXEC;
232
- break;
233
- case 7:
234
- /* for v7M, same as 6; for R profile a reserved value */
235
- if (arm_feature(env, ARM_FEATURE_M)) {
236
- *prot |= PAGE_READ | PAGE_EXEC;
237
- break;
238
- }
239
- /* fall through */
240
- default:
241
- qemu_log_mask(LOG_GUEST_ERROR,
242
- "DRACR[%d]: Bad value for AP bits: 0x%"
243
- PRIx32 "\n", n, ap);
244
- }
245
- }
246
-
247
- /* execute never */
248
- if (xn) {
249
- *prot &= ~PAGE_EXEC;
250
- }
251
- }
252
- }
253
-
254
- fi->type = ARMFault_Permission;
255
- fi->level = 1;
256
- return !(*prot & (1 << access_type));
257
-}
258
-
259
static bool v8m_is_sau_exempt(CPUARMState *env,
260
uint32_t address, MMUAccessType access_type)
261
{
262
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
263
index XXXXXXX..XXXXXXX 100644
264
--- a/target/arm/ptw.c
265
+++ b/target/arm/ptw.c
266
@@ -XXX,XX +XXX,XX @@
267
268
#include "qemu/osdep.h"
269
#include "qemu/log.h"
270
+#include "qemu/range.h"
271
#include "cpu.h"
272
#include "internals.h"
273
#include "ptw.h"
274
@@ -XXX,XX +XXX,XX @@ void get_phys_addr_pmsav7_default(CPUARMState *env,
275
}
276
}
277
278
+static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address,
279
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
280
+ hwaddr *phys_ptr, int *prot,
281
+ target_ulong *page_size,
282
+ ARMMMUFaultInfo *fi)
283
+{
284
+ ARMCPU *cpu = env_archcpu(env);
285
+ int n;
286
+ bool is_user = regime_is_user(env, mmu_idx);
287
+
288
+ *phys_ptr = address;
289
+ *page_size = TARGET_PAGE_SIZE;
290
+ *prot = 0;
291
+
292
+ if (regime_translation_disabled(env, mmu_idx) ||
293
+ m_is_ppb_region(env, address)) {
294
+ /*
295
+ * MPU disabled or M profile PPB access: use default memory map.
296
+ * The other case which uses the default memory map in the
297
+ * v7M ARM ARM pseudocode is exception vector reads from the vector
298
+ * table. In QEMU those accesses are done in arm_v7m_load_vector(),
299
+ * which always does a direct read using address_space_ldl(), rather
300
+ * than going via this function, so we don't need to check that here.
301
+ */
302
+ get_phys_addr_pmsav7_default(env, mmu_idx, address, prot);
303
+ } else { /* MPU enabled */
304
+ for (n = (int)cpu->pmsav7_dregion - 1; n >= 0; n--) {
305
+ /* region search */
306
+ uint32_t base = env->pmsav7.drbar[n];
307
+ uint32_t rsize = extract32(env->pmsav7.drsr[n], 1, 5);
308
+ uint32_t rmask;
309
+ bool srdis = false;
310
+
311
+ if (!(env->pmsav7.drsr[n] & 0x1)) {
312
+ continue;
313
+ }
314
+
315
+ if (!rsize) {
316
+ qemu_log_mask(LOG_GUEST_ERROR,
317
+ "DRSR[%d]: Rsize field cannot be 0\n", n);
318
+ continue;
319
+ }
320
+ rsize++;
321
+ rmask = (1ull << rsize) - 1;
322
+
323
+ if (base & rmask) {
324
+ qemu_log_mask(LOG_GUEST_ERROR,
325
+ "DRBAR[%d]: 0x%" PRIx32 " misaligned "
326
+ "to DRSR region size, mask = 0x%" PRIx32 "\n",
327
+ n, base, rmask);
328
+ continue;
329
+ }
330
+
331
+ if (address < base || address > base + rmask) {
332
+ /*
333
+ * Address not in this region. We must check whether the
334
+ * region covers addresses in the same page as our address.
335
+ * In that case we must not report a size that covers the
336
+ * whole page for a subsequent hit against a different MPU
337
+ * region or the background region, because it would result in
338
+ * incorrect TLB hits for subsequent accesses to addresses that
339
+ * are in this MPU region.
340
+ */
341
+ if (ranges_overlap(base, rmask,
342
+ address & TARGET_PAGE_MASK,
343
+ TARGET_PAGE_SIZE)) {
344
+ *page_size = 1;
345
+ }
346
+ continue;
347
+ }
348
+
349
+ /* Region matched */
350
+
351
+ if (rsize >= 8) { /* no subregions for regions < 256 bytes */
352
+ int i, snd;
353
+ uint32_t srdis_mask;
354
+
355
+ rsize -= 3; /* sub region size (power of 2) */
356
+ snd = ((address - base) >> rsize) & 0x7;
357
+ srdis = extract32(env->pmsav7.drsr[n], snd + 8, 1);
358
+
359
+ srdis_mask = srdis ? 0x3 : 0x0;
360
+ for (i = 2; i <= 8 && rsize < TARGET_PAGE_BITS; i *= 2) {
361
+ /*
362
+ * This will check in groups of 2, 4 and then 8, whether
363
+ * the subregion bits are consistent. rsize is incremented
364
+ * back up to give the region size, considering consistent
365
+ * adjacent subregions as one region. Stop testing if rsize
366
+ * is already big enough for an entire QEMU page.
367
+ */
368
+ int snd_rounded = snd & ~(i - 1);
369
+ uint32_t srdis_multi = extract32(env->pmsav7.drsr[n],
370
+ snd_rounded + 8, i);
371
+ if (srdis_mask ^ srdis_multi) {
372
+ break;
373
+ }
374
+ srdis_mask = (srdis_mask << i) | srdis_mask;
375
+ rsize++;
376
+ }
377
+ }
378
+ if (srdis) {
379
+ continue;
380
+ }
381
+ if (rsize < TARGET_PAGE_BITS) {
382
+ *page_size = 1 << rsize;
383
+ }
384
+ break;
385
+ }
386
+
387
+ if (n == -1) { /* no hits */
388
+ if (!pmsav7_use_background_region(cpu, mmu_idx, is_user)) {
389
+ /* background fault */
390
+ fi->type = ARMFault_Background;
391
+ return true;
392
+ }
393
+ get_phys_addr_pmsav7_default(env, mmu_idx, address, prot);
394
+ } else { /* a MPU hit! */
395
+ uint32_t ap = extract32(env->pmsav7.dracr[n], 8, 3);
396
+ uint32_t xn = extract32(env->pmsav7.dracr[n], 12, 1);
397
+
398
+ if (m_is_system_region(env, address)) {
399
+ /* System space is always execute never */
400
+ xn = 1;
401
+ }
402
+
403
+ if (is_user) { /* User mode AP bit decoding */
404
+ switch (ap) {
405
+ case 0:
406
+ case 1:
407
+ case 5:
408
+ break; /* no access */
409
+ case 3:
410
+ *prot |= PAGE_WRITE;
411
+ /* fall through */
412
+ case 2:
413
+ case 6:
414
+ *prot |= PAGE_READ | PAGE_EXEC;
415
+ break;
416
+ case 7:
417
+ /* for v7M, same as 6; for R profile a reserved value */
418
+ if (arm_feature(env, ARM_FEATURE_M)) {
419
+ *prot |= PAGE_READ | PAGE_EXEC;
420
+ break;
421
+ }
422
+ /* fall through */
423
+ default:
424
+ qemu_log_mask(LOG_GUEST_ERROR,
425
+ "DRACR[%d]: Bad value for AP bits: 0x%"
426
+ PRIx32 "\n", n, ap);
427
+ }
428
+ } else { /* Priv. mode AP bits decoding */
429
+ switch (ap) {
430
+ case 0:
431
+ break; /* no access */
432
+ case 1:
433
+ case 2:
434
+ case 3:
435
+ *prot |= PAGE_WRITE;
436
+ /* fall through */
437
+ case 5:
438
+ case 6:
439
+ *prot |= PAGE_READ | PAGE_EXEC;
440
+ break;
441
+ case 7:
442
+ /* for v7M, same as 6; for R profile a reserved value */
443
+ if (arm_feature(env, ARM_FEATURE_M)) {
444
+ *prot |= PAGE_READ | PAGE_EXEC;
445
+ break;
446
+ }
447
+ /* fall through */
448
+ default:
449
+ qemu_log_mask(LOG_GUEST_ERROR,
450
+ "DRACR[%d]: Bad value for AP bits: 0x%"
451
+ PRIx32 "\n", n, ap);
452
+ }
453
+ }
454
+
455
+ /* execute never */
456
+ if (xn) {
457
+ *prot &= ~PAGE_EXEC;
458
+ }
459
+ }
460
+ }
461
+
462
+ fi->type = ARMFault_Permission;
463
+ fi->level = 1;
464
+ return !(*prot & (1 << access_type));
465
+}
466
+
467
/**
468
* get_phys_addr - get the physical address for this virtual address
469
*
470
--
91
--
471
2.25.1
92
2.34.1
diff view generated by jsdifflib