1 | Arm queue. I still have a lot of stuff in my to-review queue, so | 1 | Hi; this pullreq contains only my FEAT_AFP/FEAT_RPRES patches |
---|---|---|---|
2 | won't be long til the next one. | 2 | (plus a fix for a target/alpha latent bug that would otherwise |
3 | 3 | be revealed by the fpu changes), because 68 patches is already | |
4 | I've thrown in a couple of minor non-arm patches (a xen code | 4 | longer than I prefer to send in at one time... |
5 | cleanup and a vl.c codestyle issue). | ||
6 | 5 | ||
7 | thanks | 6 | thanks |
8 | -- PMM | 7 | -- PMM |
9 | 8 | ||
10 | The following changes since commit de44c044420d1139480fa50c2d5be19223391218: | 9 | The following changes since commit ffaf7f0376f8040ce9068d71ae9ae8722505c42e: |
11 | 10 | ||
12 | Merge remote-tracking branch 'remotes/stsquad/tags/pull-tcg-testing-revivial-210618-2' into staging (2018-06-22 10:57:47 +0100) | 11 | Merge tag 'pull-10.0-testing-and-gdstub-updates-100225-1' of https://gitlab.com/stsquad/qemu into staging (2025-02-10 13:26:17 -0500) |
13 | 12 | ||
14 | are available in the Git repository at: | 13 | are available in the Git repository at: |
15 | 14 | ||
16 | git://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20180622 | 15 | https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20250211 |
17 | 16 | ||
18 | for you to fetch changes up to 6dad8260e82b69bd278685ee25209f5824360455: | 17 | for you to fetch changes up to ca4c34e07d1388df8e396520b5e7d60883cd3690: |
19 | 18 | ||
20 | xen: Don't use memory_region_init_ram_nomigrate() in pci_assign_dev_load_option_rom() (2018-06-22 13:28:42 +0100) | 19 | target/arm: Sink fp_status and fpcr access into do_fmlal* (2025-02-11 16:22:08 +0000) |
21 | 20 | ||
22 | ---------------------------------------------------------------- | 21 | ---------------------------------------------------------------- |
23 | target-arm queue: | 22 | target-arm queue: |
24 | * hw/intc/arm_gicv3: fix wrong values when reading IPRIORITYR | 23 | * target/alpha: Don't corrupt error_code with unknown softfloat flags |
25 | * target/arm: fix read of freed memory in kvm_arm_machine_init_done() | 24 | * target/arm: Implement FEAT_AFP and FEAT_RPRES |
26 | * virt: support up to 512 CPUs | ||
27 | * virt: support 256MB ECAM PCI region (for more PCI devices) | ||
28 | * xlnx-zynqmp: Use Cortex-R5F, not Cortex-R5 | ||
29 | * mps2-tz: Implement and use the TrustZone Memory Protection Controller | ||
30 | * target/arm: enforce alignment checking for v6M cores | ||
31 | * xen: Don't use memory_region_init_ram_nomigrate() in pci_assign_dev_load_option_rom() | ||
32 | * vl.c: Don't zero-initialize statics for serial_hds | ||
33 | 25 | ||
34 | ---------------------------------------------------------------- | 26 | ---------------------------------------------------------------- |
35 | Amol Surati (1): | 27 | Peter Maydell (49): |
36 | hw/intc/arm_gicv3: fix an extra left-shift when reading IPRIORITYR | 28 | target/alpha: Don't corrupt error_code with unknown softfloat flags |
29 | fpu: Add float_class_denormal | ||
30 | fpu: Implement float_flag_input_denormal_used | ||
31 | fpu: allow flushing of output denormals to be after rounding | ||
32 | target/arm: Define FPCR AH, FIZ, NEP bits | ||
33 | target/arm: Implement FPCR.FIZ handling | ||
34 | target/arm: Adjust FP behaviour for FPCR.AH = 1 | ||
35 | target/arm: Adjust exception flag handling for AH = 1 | ||
36 | target/arm: Add FPCR.AH to tbflags | ||
37 | target/arm: Set up float_status to use for FPCR.AH=1 behaviour | ||
38 | target/arm: Use FPST_FPCR_AH for FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS | ||
39 | target/arm: Use FPST_FPCR_AH for BFCVT* insns | ||
40 | target/arm: Use FPST_FPCR_AH for BFMLAL*, BFMLSL* insns | ||
41 | target/arm: Add FPCR.NEP to TBFLAGS | ||
42 | target/arm: Define and use new write_fp_*reg_merging() functions | ||
43 | target/arm: Handle FPCR.NEP for 3-input scalar operations | ||
44 | target/arm: Handle FPCR.NEP for BFCVT scalar | ||
45 | target/arm: Handle FPCR.NEP for 1-input scalar operations | ||
46 | target/arm: Handle FPCR.NEP in do_cvtf_scalar() | ||
47 | target/arm: Handle FPCR.NEP for scalar FABS and FNEG | ||
48 | target/arm: Handle FPCR.NEP for FCVTXN (scalar) | ||
49 | target/arm: Handle FPCR.NEP for NEP for FMUL, FMULX scalar by element | ||
50 | target/arm: Implement FPCR.AH semantics for scalar FMIN/FMAX | ||
51 | target/arm: Implement FPCR.AH semantics for vector FMIN/FMAX | ||
52 | target/arm: Implement FPCR.AH semantics for FMAXV and FMINV | ||
53 | target/arm: Implement FPCR.AH semantics for FMINP and FMAXP | ||
54 | target/arm: Implement FPCR.AH semantics for SVE FMAXV and FMINV | ||
55 | target/arm: Implement FPCR.AH semantics for SVE FMIN/FMAX immediate | ||
56 | target/arm: Implement FPCR.AH semantics for SVE FMIN/FMAX vector | ||
57 | target/arm: Implement FPCR.AH handling of negation of NaN | ||
58 | target/arm: Implement FPCR.AH handling for scalar FABS and FABD | ||
59 | target/arm: Handle FPCR.AH in vector FABD | ||
60 | target/arm: Handle FPCR.AH in SVE FNEG | ||
61 | target/arm: Handle FPCR.AH in SVE FABS | ||
62 | target/arm: Handle FPCR.AH in SVE FABD | ||
63 | target/arm: Handle FPCR.AH in negation steps in SVE FCADD | ||
64 | target/arm: Handle FPCR.AH in negation steps in FCADD | ||
65 | target/arm: Handle FPCR.AH in FRECPS and FRSQRTS scalar insns | ||
66 | target/arm: Handle FPCR.AH in FRECPS and FRSQRTS vector insns | ||
67 | target/arm: Handle FPCR.AH in negation step in FMLS (indexed) | ||
68 | target/arm: Handle FPCR.AH in negation in FMLS (vector) | ||
69 | target/arm: Handle FPCR.AH in negation step in SVE FMLS (vector) | ||
70 | target/arm: Handle FPCR.AH in SVE FTSSEL | ||
71 | target/arm: Handle FPCR.AH in SVE FTMAD | ||
72 | target/arm: Enable FEAT_AFP for '-cpu max' | ||
73 | target/arm: Plumb FEAT_RPRES frecpe and frsqrte through to new helper | ||
74 | target/arm: Implement increased precision FRECPE | ||
75 | target/arm: Implement increased precision FRSQRTE | ||
76 | target/arm: Enable FEAT_RPRES for -cpu max | ||
37 | 77 | ||
38 | Edgar E. Iglesias (2): | 78 | Richard Henderson (19): |
39 | target-arm: Add the Cortex-R5F | 79 | target/arm: Handle FPCR.AH in vector FCMLA |
40 | xlnx-zynqmp: Swap Cortex-R5 for Cortex-R5F | 80 | target/arm: Handle FPCR.AH in FCMLA by index |
81 | target/arm: Handle FPCR.AH in SVE FCMLA | ||
82 | target/arm: Handle FPCR.AH in FMLSL (by element and vector) | ||
83 | target/arm: Handle FPCR.AH in SVE FMLSL (indexed) | ||
84 | target/arm: Handle FPCR.AH in SVE FMLSLB, FMLSLT (vectors) | ||
85 | target/arm: Introduce CPUARMState.vfp.fp_status[] | ||
86 | target/arm: Remove standard_fp_status_f16 | ||
87 | target/arm: Remove standard_fp_status | ||
88 | target/arm: Remove ah_fp_status_f16 | ||
89 | target/arm: Remove ah_fp_status | ||
90 | target/arm: Remove fp_status_f16_a64 | ||
91 | target/arm: Remove fp_status_f16_a32 | ||
92 | target/arm: Remove fp_status_a64 | ||
93 | target/arm: Remove fp_status_a32 | ||
94 | target/arm: Simplify fp_status indexing in mve_helper.c | ||
95 | target/arm: Simplify DO_VFP_cmp in vfp_helper.c | ||
96 | target/arm: Read fz16 from env->vfp.fpcr | ||
97 | target/arm: Sink fp_status and fpcr access into do_fmlal* | ||
41 | 98 | ||
42 | Eric Auger (11): | 99 | docs/system/arm/emulation.rst | 2 + |
43 | linux-headers: Update to kernel mainline commit b357bf602 | 100 | include/fpu/softfloat-helpers.h | 11 + |
44 | target/arm: Allow KVM device address overwriting | 101 | include/fpu/softfloat-types.h | 25 ++ |
45 | hw/intc/arm_gicv3: Introduce redist-region-count array property | 102 | target/arm/cpu-features.h | 10 + |
46 | hw/intc/arm_gicv3_kvm: Get prepared to handle multiple redist regions | 103 | target/arm/cpu.h | 97 +++-- |
47 | hw/arm/virt: GICv3 DT node with one or two redistributor regions | 104 | target/arm/helper.h | 26 ++ |
48 | hw/arm/virt-acpi-build: Advertise one or two GICR structures | 105 | target/arm/internals.h | 6 + |
49 | hw/arm/virt: Register two redistributor regions when necessary | 106 | target/arm/tcg/helper-a64.h | 13 + |
50 | hw/arm/virt: Add a new 256MB ECAM region | 107 | target/arm/tcg/helper-sve.h | 120 ++++++ |
51 | hw/arm/virt: Add virt-3.0 machine type | 108 | target/arm/tcg/translate-a64.h | 13 + |
52 | hw/arm/virt: Use 256MB ECAM region by default | 109 | target/arm/tcg/translate.h | 54 +-- |
53 | hw/arm/virt: Increase max_cpus to 512 | 110 | target/arm/tcg/vec_internal.h | 35 ++ |
54 | 111 | target/mips/fpu_helper.h | 6 + | |
55 | Julia Suvorova (3): | 112 | fpu/softfloat.c | 66 +++- |
56 | target/arm: Minor cleanup for ARMv6-M 32-bit instructions | 113 | target/alpha/cpu.c | 7 + |
57 | target/arm: Introduce ARM_FEATURE_M_MAIN | 114 | target/alpha/fpu_helper.c | 2 + |
58 | target/arm: Strict alignment for ARMv6-M and ARMv8-M Baseline | 115 | target/arm/cpu.c | 46 +-- |
59 | 116 | target/arm/helper.c | 2 +- | |
60 | Peter Maydell (10): | 117 | target/arm/tcg/cpu64.c | 2 + |
61 | hw/misc/tz-mpc.c: Implement the Arm TrustZone Memory Protection Controller | 118 | target/arm/tcg/helper-a64.c | 151 ++++---- |
62 | hw/misc/tz-mpc.c: Implement registers | 119 | target/arm/tcg/hflags.c | 13 + |
63 | hw/misc/tz-mpc.c: Implement correct blocked-access behaviour | 120 | target/arm/tcg/mve_helper.c | 44 +-- |
64 | hw/misc/tz_mpc.c: Honour the BLK_LUT settings in translate | 121 | target/arm/tcg/sme_helper.c | 4 +- |
65 | hw/misc/iotkit-secctl.c: Implement SECMPCINTSTATUS | 122 | target/arm/tcg/sve_helper.c | 367 ++++++++++++++----- |
66 | hw/arm/iotkit: Instantiate MPC | 123 | target/arm/tcg/translate-a64.c | 782 ++++++++++++++++++++++++++++++++-------- |
67 | hw/arm/iotkit: Wire up MPC interrupt lines | 124 | target/arm/tcg/translate-sve.c | 193 +++++++--- |
68 | hw/arm/mps2-tz.c: Instantiate MPCs | 125 | target/arm/tcg/vec_helper.c | 387 ++++++++++++++------ |
69 | vl.c: Don't zero-initialize statics for serial_hds | 126 | target/arm/vfp_helper.c | 374 +++++++++++++++---- |
70 | xen: Don't use memory_region_init_ram_nomigrate() in pci_assign_dev_load_option_rom() | 127 | target/hppa/fpu_helper.c | 11 + |
71 | 128 | target/i386/tcg/fpu_helper.c | 8 + | |
72 | Zheng Xiang (1): | 129 | target/mips/msa.c | 9 + |
73 | target-arm: fix a segmentation fault due to illegal memory access | 130 | target/ppc/cpu_init.c | 3 + |
74 | 131 | target/rx/cpu.c | 8 + | |
75 | hw/misc/Makefile.objs | 1 + | 132 | target/sh4/cpu.c | 8 + |
76 | hw/xen/xen_pt.h | 2 +- | 133 | target/tricore/helper.c | 1 + |
77 | include/hw/arm/iotkit.h | 8 + | 134 | tests/fp/fp-bench.c | 1 + |
78 | include/hw/arm/virt.h | 19 + | 135 | fpu/softfloat-parts.c.inc | 127 +++++-- |
79 | include/hw/intc/arm_gicv3_common.h | 8 +- | 136 | 37 files changed, 2325 insertions(+), 709 deletions(-) |
80 | include/hw/misc/iotkit-secctl.h | 8 + | ||
81 | include/hw/misc/tz-mpc.h | 80 +++ | ||
82 | include/standard-headers/linux/pci_regs.h | 8 + | ||
83 | include/standard-headers/linux/virtio_gpu.h | 1 + | ||
84 | include/standard-headers/linux/virtio_net.h | 3 + | ||
85 | linux-headers/asm-arm/kvm.h | 1 + | ||
86 | linux-headers/asm-arm/unistd-common.h | 1 + | ||
87 | linux-headers/asm-arm64/kvm.h | 1 + | ||
88 | linux-headers/asm-generic/unistd.h | 4 +- | ||
89 | linux-headers/asm-powerpc/unistd.h | 1 + | ||
90 | linux-headers/asm-x86/unistd_32.h | 2 + | ||
91 | linux-headers/asm-x86/unistd_64.h | 2 + | ||
92 | linux-headers/asm-x86/unistd_x32.h | 2 + | ||
93 | linux-headers/linux/kvm.h | 5 +- | ||
94 | linux-headers/linux/psp-sev.h | 12 + | ||
95 | target/arm/cpu.h | 1 + | ||
96 | target/arm/kvm_arm.h | 3 +- | ||
97 | hw/arm/iotkit.c | 112 +++- | ||
98 | hw/arm/mps2-tz.c | 71 ++- | ||
99 | hw/arm/virt-acpi-build.c | 30 +- | ||
100 | hw/arm/virt.c | 100 +++- | ||
101 | hw/arm/xlnx-zcu102.c | 2 +- | ||
102 | hw/arm/xlnx-zynqmp.c | 2 +- | ||
103 | hw/intc/arm_gic_kvm.c | 4 +- | ||
104 | hw/intc/arm_gicv3.c | 12 +- | ||
105 | hw/intc/arm_gicv3_common.c | 38 +- | ||
106 | hw/intc/arm_gicv3_dist.c | 3 +- | ||
107 | hw/intc/arm_gicv3_its_kvm.c | 2 +- | ||
108 | hw/intc/arm_gicv3_kvm.c | 44 +- | ||
109 | hw/intc/arm_gicv3_redist.c | 3 +- | ||
110 | hw/misc/iotkit-secctl.c | 38 +- | ||
111 | hw/misc/tz-mpc.c | 628 +++++++++++++++++++++ | ||
112 | hw/xen/xen_pt_graphics.c | 2 +- | ||
113 | hw/xen/xen_pt_load_rom.c | 6 +- | ||
114 | target/arm/cpu.c | 12 + | ||
115 | target/arm/kvm.c | 11 +- | ||
116 | target/arm/translate.c | 45 +- | ||
117 | vl.c | 4 +- | ||
118 | MAINTAINERS | 2 + | ||
119 | default-configs/arm-softmmu.mak | 1 + | ||
120 | hw/misc/trace-events | 8 + | ||
121 | .../LICENSES/exceptions/Linux-syscall-note | 2 +- | ||
122 | linux-headers/LICENSES/preferred/GPL-2.0 | 6 + | ||
123 | 48 files changed, 1250 insertions(+), 111 deletions(-) | ||
124 | create mode 100644 include/hw/misc/tz-mpc.h | ||
125 | create mode 100644 hw/misc/tz-mpc.c | ||
126 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | In do_cvttq() we set env->error_code with what is supposed to be a | ||
2 | set of FPCR exception bit values. However, if the set of float | ||
3 | exception flags we get back from softfloat for the conversion | ||
4 | includes a flag which is not one of the three we expect here | ||
5 | (invalid_cvti, invalid, inexact) then we will fall through the | ||
6 | if-ladder and set env->error_code to the unconverted softfloat | ||
7 | exception_flag value. This will then cause us to take a spurious | ||
8 | exception. | ||
1 | 9 | ||
10 | This is harmless now, but when we add new floating point exception | ||
11 | flags to softfloat it will cause problems. Add an else clause to the | ||
12 | if-ladder to make it ignore any float exception flags it doesn't care | ||
13 | about. | ||
14 | |||
15 | Specifically, without this fix, 'make check-tcg' will fail for Alpha | ||
16 | when the commit adding float_flag_input_denormal_used lands. | ||
17 | |||
18 | |||
19 | Fixes: aa3bad5b59e7 ("target/alpha: Use float64_to_int64_modulo for CVTTQ") | ||
20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
21 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
22 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
23 | --- | ||
24 | target/alpha/fpu_helper.c | 2 ++ | ||
25 | 1 file changed, 2 insertions(+) | ||
26 | |||
27 | diff --git a/target/alpha/fpu_helper.c b/target/alpha/fpu_helper.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/alpha/fpu_helper.c | ||
30 | +++ b/target/alpha/fpu_helper.c | ||
31 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_cvttq(CPUAlphaState *env, uint64_t a, int roundmode) | ||
32 | exc = FPCR_INV; | ||
33 | } else if (exc & float_flag_inexact) { | ||
34 | exc = FPCR_INE; | ||
35 | + } else { | ||
36 | + exc = 0; | ||
37 | } | ||
38 | } | ||
39 | env->error_code = exc; | ||
40 | -- | ||
41 | 2.34.1 | ||
42 | |||
43 | diff view generated by jsdifflib |
1 | Implement the SECMPCINTSTATUS register. This is the only register | 1 | Currently in softfloat we canonicalize input denormals and so the |
---|---|---|---|
2 | in the security controller that deals with Memory Protection | 2 | code that implements floating point operations does not need to care |
3 | Controllers, and it simply provides a read-only view of the | 3 | whether the input value was originally normal or denormal. However, |
4 | interrupt lines from the various MPCs in the system. | 4 | both x86 and Arm FEAT_AFP require that an exception flag is set if: |
5 | * an input is denormal | ||
6 | * that input is not squashed to zero | ||
7 | * that input is actually used in the calculation (e.g. we | ||
8 | did not find the other input was a NaN) | ||
9 | |||
10 | So we need to track that the input was a non-squashed denormal. To | ||
11 | do this we add a new value to the FloatClass enum. In this commit we | ||
12 | add the value and adjust the code everywhere that looks at FloatClass | ||
13 | values so that the new float_class_denormal behaves identically to | ||
14 | float_class_normal. We will add the code that does the "raise a new | ||
15 | float exception flag if an input was an unsquashed denormal and we | ||
16 | used it" in a subsequent commit. | ||
17 | |||
18 | There should be no behavioural change in this commit. | ||
5 | 19 | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
7 | Message-id: 20180620132032.28865-6-peter.maydell@linaro.org | 21 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
8 | --- | 22 | --- |
9 | include/hw/misc/iotkit-secctl.h | 8 +++++++ | 23 | fpu/softfloat.c | 32 ++++++++++++++++++++++++++++--- |
10 | hw/misc/iotkit-secctl.c | 38 +++++++++++++++++++++++++++++++-- | 24 | fpu/softfloat-parts.c.inc | 40 ++++++++++++++++++++++++--------------- |
11 | 2 files changed, 44 insertions(+), 2 deletions(-) | 25 | 2 files changed, 54 insertions(+), 18 deletions(-) |
12 | 26 | ||
13 | diff --git a/include/hw/misc/iotkit-secctl.h b/include/hw/misc/iotkit-secctl.h | 27 | diff --git a/fpu/softfloat.c b/fpu/softfloat.c |
14 | index XXXXXXX..XXXXXXX 100644 | 28 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/include/hw/misc/iotkit-secctl.h | 29 | --- a/fpu/softfloat.c |
16 | +++ b/include/hw/misc/iotkit-secctl.h | 30 | +++ b/fpu/softfloat.c |
17 | @@ -XXX,XX +XXX,XX @@ | 31 | @@ -XXX,XX +XXX,XX @@ float64_gen2(float64 xa, float64 xb, float_status *s, |
18 | * + named GPIO outputs ahb_ppcexp{0,1,2,3}_irq_enable | 32 | /* |
19 | * + named GPIO outputs ahb_ppcexp{0,1,2,3}_irq_clear | 33 | * Classify a floating point number. Everything above float_class_qnan |
20 | * + named GPIO inputs ahb_ppcexp{0,1,2,3}_irq_status | 34 | * is a NaN so cls >= float_class_qnan is any NaN. |
21 | + * Controlling the MPC in the IoTKit: | 35 | + * |
22 | + * + named GPIO input mpc_status | 36 | + * Note that we canonicalize denormals, so most code should treat |
23 | + * Controlling each of the 16 expansion MPCs which a system using the IoTKit | 37 | + * class_normal and class_denormal identically. |
24 | + * might provide: | ||
25 | + * + named GPIO inputs mpcexp_status[0..15] | ||
26 | */ | 38 | */ |
27 | 39 | ||
28 | #ifndef IOTKIT_SECCTL_H | 40 | typedef enum __attribute__ ((__packed__)) { |
29 | @@ -XXX,XX +XXX,XX @@ | 41 | float_class_unclassified, |
30 | #define IOTS_NUM_APB_PPC 2 | 42 | float_class_zero, |
31 | #define IOTS_NUM_APB_EXP_PPC 4 | 43 | float_class_normal, |
32 | #define IOTS_NUM_AHB_EXP_PPC 4 | 44 | + float_class_denormal, /* input was a non-squashed denormal */ |
33 | +#define IOTS_NUM_EXP_MPC 16 | 45 | float_class_inf, |
34 | +#define IOTS_NUM_MPC 1 | 46 | float_class_qnan, /* all NaNs from here */ |
35 | 47 | float_class_snan, | |
36 | typedef struct IoTKitSecCtl IoTKitSecCtl; | 48 | @@ -XXX,XX +XXX,XX @@ typedef enum __attribute__ ((__packed__)) { |
37 | 49 | enum { | |
38 | @@ -XXX,XX +XXX,XX @@ struct IoTKitSecCtl { | 50 | float_cmask_zero = float_cmask(float_class_zero), |
39 | uint32_t secrespcfg; | 51 | float_cmask_normal = float_cmask(float_class_normal), |
40 | uint32_t nsccfg; | 52 | + float_cmask_denormal = float_cmask(float_class_denormal), |
41 | uint32_t brginten; | 53 | float_cmask_inf = float_cmask(float_class_inf), |
42 | + uint32_t mpcintstatus; | 54 | float_cmask_qnan = float_cmask(float_class_qnan), |
43 | 55 | float_cmask_snan = float_cmask(float_class_snan), | |
44 | IoTKitSecCtlPPC apb[IOTS_NUM_APB_PPC]; | 56 | |
45 | IoTKitSecCtlPPC apbexp[IOTS_NUM_APB_EXP_PPC]; | 57 | float_cmask_infzero = float_cmask_zero | float_cmask_inf, |
46 | diff --git a/hw/misc/iotkit-secctl.c b/hw/misc/iotkit-secctl.c | 58 | float_cmask_anynan = float_cmask_qnan | float_cmask_snan, |
47 | index XXXXXXX..XXXXXXX 100644 | 59 | + float_cmask_anynorm = float_cmask_normal | float_cmask_denormal, |
48 | --- a/hw/misc/iotkit-secctl.c | 60 | }; |
49 | +++ b/hw/misc/iotkit-secctl.c | 61 | |
50 | @@ -XXX,XX +XXX,XX @@ static MemTxResult iotkit_secctl_s_read(void *opaque, hwaddr addr, | 62 | /* Flags for parts_minmax. */ |
51 | case A_NSCCFG: | 63 | @@ -XXX,XX +XXX,XX @@ static inline __attribute__((unused)) bool is_qnan(FloatClass c) |
52 | r = s->nsccfg; | 64 | return c == float_class_qnan; |
53 | break; | ||
54 | + case A_SECMPCINTSTATUS: | ||
55 | + r = s->mpcintstatus; | ||
56 | + break; | ||
57 | case A_SECPPCINTSTAT: | ||
58 | r = s->secppcintstat; | ||
59 | break; | ||
60 | @@ -XXX,XX +XXX,XX @@ static MemTxResult iotkit_secctl_s_read(void *opaque, hwaddr addr, | ||
61 | case A_APBSPPPCEXP3: | ||
62 | r = s->apbexp[offset_to_ppc_idx(offset)].sp; | ||
63 | break; | ||
64 | - case A_SECMPCINTSTATUS: | ||
65 | case A_SECMSCINTSTAT: | ||
66 | case A_SECMSCINTEN: | ||
67 | case A_NSMSCEXP: | ||
68 | @@ -XXX,XX +XXX,XX @@ static void iotkit_secctl_reset(DeviceState *dev) | ||
69 | foreach_ppc(s, iotkit_secctl_reset_ppc); | ||
70 | } | 65 | } |
71 | 66 | ||
72 | +static void iotkit_secctl_mpc_status(void *opaque, int n, int level) | 67 | +/* |
68 | + * Return true if the float_cmask has only normals in it | ||
69 | + * (including input denormals that were canonicalized) | ||
70 | + */ | ||
71 | +static inline bool cmask_is_only_normals(int cmask) | ||
73 | +{ | 72 | +{ |
74 | + IoTKitSecCtl *s = IOTKIT_SECCTL(opaque); | 73 | + return !(cmask & ~float_cmask_anynorm); |
75 | + | ||
76 | + s->mpcintstatus = deposit32(s->mpcintstatus, 0, 1, !!level); | ||
77 | +} | 74 | +} |
78 | + | 75 | + |
79 | +static void iotkit_secctl_mpcexp_status(void *opaque, int n, int level) | 76 | +static inline bool is_anynorm(FloatClass c) |
80 | +{ | 77 | +{ |
81 | + IoTKitSecCtl *s = IOTKIT_SECCTL(opaque); | 78 | + return float_cmask(c) & float_cmask_anynorm; |
82 | + | ||
83 | + s->mpcintstatus = deposit32(s->mpcintstatus, n + 16, 1, !!level); | ||
84 | +} | 79 | +} |
85 | + | 80 | + |
86 | static void iotkit_secctl_ppc_irqstatus(void *opaque, int n, int level) | 81 | /* |
82 | * Structure holding all of the decomposed parts of a float. | ||
83 | * The exponent is unbiased and the fraction is normalized. | ||
84 | @@ -XXX,XX +XXX,XX @@ static float64 float64r32_round_pack_canonical(FloatParts64 *p, | ||
85 | */ | ||
86 | switch (p->cls) { | ||
87 | case float_class_normal: | ||
88 | + case float_class_denormal: | ||
89 | if (unlikely(p->exp == 0)) { | ||
90 | /* | ||
91 | * The result is denormal for float32, but can be represented | ||
92 | @@ -XXX,XX +XXX,XX @@ static floatx80 floatx80_round_pack_canonical(FloatParts128 *p, | ||
93 | |||
94 | switch (p->cls) { | ||
95 | case float_class_normal: | ||
96 | + case float_class_denormal: | ||
97 | if (s->floatx80_rounding_precision == floatx80_precision_x) { | ||
98 | parts_uncanon_normal(p, s, fmt); | ||
99 | frac = p->frac_hi; | ||
100 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_ahp(FloatParts64 *a, float_status *s) | ||
101 | break; | ||
102 | |||
103 | case float_class_normal: | ||
104 | + case float_class_denormal: | ||
105 | case float_class_zero: | ||
106 | break; | ||
107 | |||
108 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b, | ||
109 | a->sign = b->sign; | ||
110 | a->exp = b->exp; | ||
111 | |||
112 | - if (a->cls == float_class_normal) { | ||
113 | + if (is_anynorm(a->cls)) { | ||
114 | frac_truncjam(a, b); | ||
115 | } else if (is_nan(a->cls)) { | ||
116 | /* Discard the low bits of the NaN. */ | ||
117 | @@ -XXX,XX +XXX,XX @@ static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode, | ||
118 | return int128_zero(); | ||
119 | |||
120 | case float_class_normal: | ||
121 | + case float_class_denormal: | ||
122 | if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) { | ||
123 | flags = float_flag_inexact; | ||
124 | } | ||
125 | @@ -XXX,XX +XXX,XX @@ static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode, | ||
126 | return int128_zero(); | ||
127 | |||
128 | case float_class_normal: | ||
129 | + case float_class_denormal: | ||
130 | if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) { | ||
131 | flags = float_flag_inexact; | ||
132 | if (p.cls == float_class_zero) { | ||
133 | @@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status) | ||
134 | float32_unpack_canonical(&xp, a, status); | ||
135 | if (unlikely(xp.cls != float_class_normal)) { | ||
136 | switch (xp.cls) { | ||
137 | + case float_class_denormal: | ||
138 | + break; | ||
139 | case float_class_snan: | ||
140 | case float_class_qnan: | ||
141 | parts_return_nan(&xp, status); | ||
142 | @@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status) | ||
143 | case float_class_zero: | ||
144 | return float32_one; | ||
145 | default: | ||
146 | - break; | ||
147 | + g_assert_not_reached(); | ||
148 | } | ||
149 | - g_assert_not_reached(); | ||
150 | } | ||
151 | |||
152 | float_raise(float_flag_inexact, status); | ||
153 | diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc | ||
154 | index XXXXXXX..XXXXXXX 100644 | ||
155 | --- a/fpu/softfloat-parts.c.inc | ||
156 | +++ b/fpu/softfloat-parts.c.inc | ||
157 | @@ -XXX,XX +XXX,XX @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status, | ||
158 | frac_clear(p); | ||
159 | } else { | ||
160 | int shift = frac_normalize(p); | ||
161 | - p->cls = float_class_normal; | ||
162 | + p->cls = float_class_denormal; | ||
163 | p->exp = fmt->frac_shift - fmt->exp_bias | ||
164 | - shift + !fmt->m68k_denormal; | ||
165 | } | ||
166 | @@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, | ||
167 | static void partsN(uncanon)(FloatPartsN *p, float_status *s, | ||
168 | const FloatFmt *fmt) | ||
87 | { | 169 | { |
88 | IoTKitSecCtlPPC *ppc = opaque; | 170 | - if (likely(p->cls == float_class_normal)) { |
89 | @@ -XXX,XX +XXX,XX @@ static void iotkit_secctl_init(Object *obj) | 171 | + if (likely(is_anynorm(p->cls))) { |
90 | qdev_init_gpio_out_named(dev, &s->sec_resp_cfg, "sec_resp_cfg", 1); | 172 | parts_uncanon_normal(p, s, fmt); |
91 | qdev_init_gpio_out_named(dev, &s->nsc_cfg_irq, "nsc_cfg", 1); | 173 | } else { |
92 | 174 | switch (p->cls) { | |
93 | + qdev_init_gpio_in_named(dev, iotkit_secctl_mpc_status, "mpc_status", 1); | 175 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, |
94 | + qdev_init_gpio_in_named(dev, iotkit_secctl_mpcexp_status, | 176 | |
95 | + "mpcexp_status", IOTS_NUM_EXP_MPC); | 177 | if (a->sign != b_sign) { |
96 | + | 178 | /* Subtraction */ |
97 | memory_region_init_io(&s->s_regs, obj, &iotkit_secctl_s_ops, | 179 | - if (likely(ab_mask == float_cmask_normal)) { |
98 | s, "iotkit-secctl-s-regs", 0x1000); | 180 | + if (likely(cmask_is_only_normals(ab_mask))) { |
99 | memory_region_init_io(&s->ns_regs, obj, &iotkit_secctl_ns_ops, | 181 | if (parts_sub_normal(a, b)) { |
100 | @@ -XXX,XX +XXX,XX @@ static const VMStateDescription iotkit_secctl_ppc_vmstate = { | 182 | return a; |
101 | } | 183 | } |
102 | }; | 184 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, |
103 | 185 | } | |
104 | +static const VMStateDescription iotkit_secctl_mpcintstatus_vmstate = { | 186 | } else { |
105 | + .name = "iotkit-secctl-mpcintstatus", | 187 | /* Addition */ |
106 | + .version_id = 1, | 188 | - if (likely(ab_mask == float_cmask_normal)) { |
107 | + .minimum_version_id = 1, | 189 | + if (likely(cmask_is_only_normals(ab_mask))) { |
108 | + .fields = (VMStateField[]) { | 190 | parts_add_normal(a, b); |
109 | + VMSTATE_UINT32(mpcintstatus, IoTKitSecCtl), | 191 | return a; |
110 | + VMSTATE_END_OF_LIST() | 192 | } |
111 | + } | 193 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, |
112 | +}; | 194 | } |
113 | + | 195 | |
114 | static const VMStateDescription iotkit_secctl_vmstate = { | 196 | if (b->cls == float_class_zero) { |
115 | .name = "iotkit-secctl", | 197 | - g_assert(a->cls == float_class_normal); |
116 | .version_id = 1, | 198 | + g_assert(is_anynorm(a->cls)); |
117 | @@ -XXX,XX +XXX,XX @@ static const VMStateDescription iotkit_secctl_vmstate = { | 199 | return a; |
118 | VMSTATE_STRUCT_ARRAY(ahbexp, IoTKitSecCtl, IOTS_NUM_AHB_EXP_PPC, 1, | 200 | } |
119 | iotkit_secctl_ppc_vmstate, IoTKitSecCtlPPC), | 201 | |
120 | VMSTATE_END_OF_LIST() | 202 | g_assert(a->cls == float_class_zero); |
121 | - } | 203 | - g_assert(b->cls == float_class_normal); |
122 | + }, | 204 | + g_assert(is_anynorm(b->cls)); |
123 | + .subsections = (const VMStateDescription*[]) { | 205 | return_b: |
124 | + &iotkit_secctl_mpcintstatus_vmstate, | 206 | b->sign = b_sign; |
125 | + NULL | 207 | return b; |
126 | + }, | 208 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, |
127 | }; | 209 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); |
128 | 210 | bool sign = a->sign ^ b->sign; | |
129 | static void iotkit_secctl_class_init(ObjectClass *klass, void *data) | 211 | |
212 | - if (likely(ab_mask == float_cmask_normal)) { | ||
213 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
214 | FloatPartsW tmp; | ||
215 | |||
216 | frac_mulw(&tmp, a, b); | ||
217 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, | ||
218 | a->sign ^= 1; | ||
219 | } | ||
220 | |||
221 | - if (unlikely(ab_mask != float_cmask_normal)) { | ||
222 | + if (unlikely(!cmask_is_only_normals(ab_mask))) { | ||
223 | if (unlikely(ab_mask == float_cmask_infzero)) { | ||
224 | float_raise(float_flag_invalid | float_flag_invalid_imz, s); | ||
225 | goto d_nan; | ||
226 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, | ||
227 | } | ||
228 | |||
229 | g_assert(ab_mask & float_cmask_zero); | ||
230 | - if (c->cls == float_class_normal) { | ||
231 | + if (is_anynorm(c->cls)) { | ||
232 | *a = *c; | ||
233 | goto return_normal; | ||
234 | } | ||
235 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, | ||
236 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
237 | bool sign = a->sign ^ b->sign; | ||
238 | |||
239 | - if (likely(ab_mask == float_cmask_normal)) { | ||
240 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
241 | a->sign = sign; | ||
242 | a->exp -= b->exp + frac_div(a, b); | ||
243 | return a; | ||
244 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, | ||
245 | { | ||
246 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
247 | |||
248 | - if (likely(ab_mask == float_cmask_normal)) { | ||
249 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
250 | frac_modrem(a, b, mod_quot); | ||
251 | return a; | ||
252 | } | ||
253 | @@ -XXX,XX +XXX,XX @@ static void partsN(sqrt)(FloatPartsN *a, float_status *status, | ||
254 | |||
255 | if (unlikely(a->cls != float_class_normal)) { | ||
256 | switch (a->cls) { | ||
257 | + case float_class_denormal: | ||
258 | + break; | ||
259 | case float_class_snan: | ||
260 | case float_class_qnan: | ||
261 | parts_return_nan(a, status); | ||
262 | @@ -XXX,XX +XXX,XX @@ static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode, | ||
263 | case float_class_inf: | ||
264 | break; | ||
265 | case float_class_normal: | ||
266 | + case float_class_denormal: | ||
267 | if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) { | ||
268 | float_raise(float_flag_inexact, s); | ||
269 | } | ||
270 | @@ -XXX,XX +XXX,XX @@ static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode, | ||
271 | return 0; | ||
272 | |||
273 | case float_class_normal: | ||
274 | + case float_class_denormal: | ||
275 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | ||
276 | if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { | ||
277 | flags = float_flag_inexact; | ||
278 | @@ -XXX,XX +XXX,XX @@ static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode, | ||
279 | return 0; | ||
280 | |||
281 | case float_class_normal: | ||
282 | + case float_class_denormal: | ||
283 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | ||
284 | if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { | ||
285 | flags = float_flag_inexact; | ||
286 | @@ -XXX,XX +XXX,XX @@ static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p, | ||
287 | return 0; | ||
288 | |||
289 | case float_class_normal: | ||
290 | + case float_class_denormal: | ||
291 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | ||
292 | if (parts_round_to_int_normal(p, rmode, 0, N - 2)) { | ||
293 | flags = float_flag_inexact; | ||
294 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
295 | a_exp = a->exp; | ||
296 | b_exp = b->exp; | ||
297 | |||
298 | - if (unlikely(ab_mask != float_cmask_normal)) { | ||
299 | + if (unlikely(!cmask_is_only_normals(ab_mask))) { | ||
300 | switch (a->cls) { | ||
301 | case float_class_normal: | ||
302 | + case float_class_denormal: | ||
303 | break; | ||
304 | case float_class_inf: | ||
305 | a_exp = INT16_MAX; | ||
306 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
307 | } | ||
308 | switch (b->cls) { | ||
309 | case float_class_normal: | ||
310 | + case float_class_denormal: | ||
311 | break; | ||
312 | case float_class_inf: | ||
313 | b_exp = INT16_MAX; | ||
314 | @@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, | ||
315 | { | ||
316 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
317 | |||
318 | - if (likely(ab_mask == float_cmask_normal)) { | ||
319 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
320 | FloatRelation cmp; | ||
321 | |||
322 | if (a->sign != b->sign) { | ||
323 | @@ -XXX,XX +XXX,XX @@ static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) | ||
324 | case float_class_inf: | ||
325 | break; | ||
326 | case float_class_normal: | ||
327 | + case float_class_denormal: | ||
328 | a->exp += MIN(MAX(n, -0x10000), 0x10000); | ||
329 | break; | ||
330 | default: | ||
331 | @@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) | ||
332 | |||
333 | if (unlikely(a->cls != float_class_normal)) { | ||
334 | switch (a->cls) { | ||
335 | + case float_class_denormal: | ||
336 | + break; | ||
337 | case float_class_snan: | ||
338 | case float_class_qnan: | ||
339 | parts_return_nan(a, s); | ||
340 | @@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) | ||
341 | } | ||
342 | return; | ||
343 | default: | ||
344 | - break; | ||
345 | + g_assert_not_reached(); | ||
346 | } | ||
347 | - g_assert_not_reached(); | ||
348 | } | ||
349 | if (unlikely(a->sign)) { | ||
350 | goto d_nan; | ||
130 | -- | 351 | -- |
131 | 2.17.1 | 352 | 2.34.1 |
132 | |||
133 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | For the x86 and the Arm FEAT_AFP semantics, we need to be able to | ||
2 | tell the target code that the FPU operation has used an input | ||
3 | denormal. Implement this; when it happens we set the new | ||
4 | float_flag_denormal_input_used. | ||
1 | 5 | ||
6 | Note that we only set this when an input denormal is actually used by | ||
7 | the operation: if the operation results in Invalid Operation or | ||
8 | Divide By Zero or the result is a NaN because some other input was a | ||
9 | NaN then we never needed to look at the input denormal and do not set | ||
10 | denormal_input_used. | ||
11 | |||
12 | We mostly do not need to adjust the hardfloat codepaths to deal with | ||
13 | this flag, because almost all hardfloat operations are already gated | ||
14 | on the input not being a denormal, and will fall back to softfloat | ||
15 | for a denormal input. The only exception is the comparison | ||
16 | operations, where we need to add the check for input denormals, which | ||
17 | must now fall back to softfloat where they did not before. | ||
18 | |||
19 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
20 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
21 | --- | ||
22 | include/fpu/softfloat-types.h | 7 ++++ | ||
23 | fpu/softfloat.c | 38 +++++++++++++++++--- | ||
24 | fpu/softfloat-parts.c.inc | 68 ++++++++++++++++++++++++++++++++++- | ||
25 | 3 files changed, 107 insertions(+), 6 deletions(-) | ||
26 | |||
27 | diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/include/fpu/softfloat-types.h | ||
30 | +++ b/include/fpu/softfloat-types.h | ||
31 | @@ -XXX,XX +XXX,XX @@ enum { | ||
32 | float_flag_invalid_sqrt = 0x0800, /* sqrt(-x) */ | ||
33 | float_flag_invalid_cvti = 0x1000, /* non-nan to integer */ | ||
34 | float_flag_invalid_snan = 0x2000, /* any operand was snan */ | ||
35 | + /* | ||
36 | + * An input was denormal and we used it (without flushing it to zero). | ||
37 | + * Not set if we do not actually use the denormal input (e.g. | ||
38 | + * because some other input was a NaN, or because the operation | ||
39 | + * wasn't actually carried out (divide-by-zero; invalid)) | ||
40 | + */ | ||
41 | + float_flag_input_denormal_used = 0x4000, | ||
42 | }; | ||
43 | |||
44 | /* | ||
45 | diff --git a/fpu/softfloat.c b/fpu/softfloat.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/fpu/softfloat.c | ||
48 | +++ b/fpu/softfloat.c | ||
49 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_ahp(FloatParts64 *a, float_status *s) | ||
50 | float16_params_ahp.frac_size + 1); | ||
51 | break; | ||
52 | |||
53 | - case float_class_normal: | ||
54 | case float_class_denormal: | ||
55 | + float_raise(float_flag_input_denormal_used, s); | ||
56 | + break; | ||
57 | + case float_class_normal: | ||
58 | case float_class_zero: | ||
59 | break; | ||
60 | |||
61 | @@ -XXX,XX +XXX,XX @@ static void parts64_float_to_float(FloatParts64 *a, float_status *s) | ||
62 | if (is_nan(a->cls)) { | ||
63 | parts_return_nan(a, s); | ||
64 | } | ||
65 | + if (a->cls == float_class_denormal) { | ||
66 | + float_raise(float_flag_input_denormal_used, s); | ||
67 | + } | ||
68 | } | ||
69 | |||
70 | static void parts128_float_to_float(FloatParts128 *a, float_status *s) | ||
71 | @@ -XXX,XX +XXX,XX @@ static void parts128_float_to_float(FloatParts128 *a, float_status *s) | ||
72 | if (is_nan(a->cls)) { | ||
73 | parts_return_nan(a, s); | ||
74 | } | ||
75 | + if (a->cls == float_class_denormal) { | ||
76 | + float_raise(float_flag_input_denormal_used, s); | ||
77 | + } | ||
78 | } | ||
79 | |||
80 | #define parts_float_to_float(P, S) \ | ||
81 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b, | ||
82 | a->sign = b->sign; | ||
83 | a->exp = b->exp; | ||
84 | |||
85 | - if (is_anynorm(a->cls)) { | ||
86 | + switch (a->cls) { | ||
87 | + case float_class_denormal: | ||
88 | + float_raise(float_flag_input_denormal_used, s); | ||
89 | + /* fall through */ | ||
90 | + case float_class_normal: | ||
91 | frac_truncjam(a, b); | ||
92 | - } else if (is_nan(a->cls)) { | ||
93 | + break; | ||
94 | + case float_class_snan: | ||
95 | + case float_class_qnan: | ||
96 | /* Discard the low bits of the NaN. */ | ||
97 | a->frac = b->frac_hi; | ||
98 | parts_return_nan(a, s); | ||
99 | + break; | ||
100 | + default: | ||
101 | + break; | ||
102 | } | ||
103 | } | ||
104 | |||
105 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b, | ||
106 | if (is_nan(a->cls)) { | ||
107 | parts_return_nan(a, s); | ||
108 | } | ||
109 | + if (a->cls == float_class_denormal) { | ||
110 | + float_raise(float_flag_input_denormal_used, s); | ||
111 | + } | ||
112 | } | ||
113 | |||
114 | float32 float16_to_float32(float16 a, bool ieee, float_status *s) | ||
115 | @@ -XXX,XX +XXX,XX @@ float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet) | ||
116 | goto soft; | ||
117 | } | ||
118 | |||
119 | - float32_input_flush2(&ua.s, &ub.s, s); | ||
120 | + if (unlikely(float32_is_denormal(ua.s) || float32_is_denormal(ub.s))) { | ||
121 | + /* We may need to set the input_denormal_used flag */ | ||
122 | + goto soft; | ||
123 | + } | ||
124 | + | ||
125 | if (isgreaterequal(ua.h, ub.h)) { | ||
126 | if (isgreater(ua.h, ub.h)) { | ||
127 | return float_relation_greater; | ||
128 | @@ -XXX,XX +XXX,XX @@ float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet) | ||
129 | goto soft; | ||
130 | } | ||
131 | |||
132 | - float64_input_flush2(&ua.s, &ub.s, s); | ||
133 | + if (unlikely(float64_is_denormal(ua.s) || float64_is_denormal(ub.s))) { | ||
134 | + /* We may need to set the input_denormal_used flag */ | ||
135 | + goto soft; | ||
136 | + } | ||
137 | + | ||
138 | if (isgreaterequal(ua.h, ub.h)) { | ||
139 | if (isgreater(ua.h, ub.h)) { | ||
140 | return float_relation_greater; | ||
141 | diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc | ||
142 | index XXXXXXX..XXXXXXX 100644 | ||
143 | --- a/fpu/softfloat-parts.c.inc | ||
144 | +++ b/fpu/softfloat-parts.c.inc | ||
145 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | ||
146 | bool b_sign = b->sign ^ subtract; | ||
147 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
148 | |||
149 | + /* | ||
150 | + * For addition and subtraction, we will consume an | ||
151 | + * input denormal unless the other input is a NaN. | ||
152 | + */ | ||
153 | + if ((ab_mask & (float_cmask_denormal | float_cmask_anynan)) == | ||
154 | + float_cmask_denormal) { | ||
155 | + float_raise(float_flag_input_denormal_used, s); | ||
156 | + } | ||
157 | + | ||
158 | if (a->sign != b_sign) { | ||
159 | /* Subtraction */ | ||
160 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
161 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, | ||
162 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
163 | FloatPartsW tmp; | ||
164 | |||
165 | + if (ab_mask & float_cmask_denormal) { | ||
166 | + float_raise(float_flag_input_denormal_used, s); | ||
167 | + } | ||
168 | + | ||
169 | frac_mulw(&tmp, a, b); | ||
170 | frac_truncjam(a, &tmp); | ||
171 | |||
172 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, | ||
173 | } | ||
174 | |||
175 | /* Multiply by 0 or Inf */ | ||
176 | + if (ab_mask & float_cmask_denormal) { | ||
177 | + float_raise(float_flag_input_denormal_used, s); | ||
178 | + } | ||
179 | + | ||
180 | if (ab_mask & float_cmask_inf) { | ||
181 | a->cls = float_class_inf; | ||
182 | a->sign = sign; | ||
183 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, | ||
184 | if (flags & float_muladd_negate_result) { | ||
185 | a->sign ^= 1; | ||
186 | } | ||
187 | + | ||
188 | + /* | ||
189 | + * All result types except for "return the default NaN | ||
190 | + * because this is an Invalid Operation" go through here; | ||
191 | + * this matches the set of cases where we consumed a | ||
192 | + * denormal input. | ||
193 | + */ | ||
194 | + if (abc_mask & float_cmask_denormal) { | ||
195 | + float_raise(float_flag_input_denormal_used, s); | ||
196 | + } | ||
197 | return a; | ||
198 | |||
199 | return_sub_zero: | ||
200 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, | ||
201 | bool sign = a->sign ^ b->sign; | ||
202 | |||
203 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
204 | + if (ab_mask & float_cmask_denormal) { | ||
205 | + float_raise(float_flag_input_denormal_used, s); | ||
206 | + } | ||
207 | a->sign = sign; | ||
208 | a->exp -= b->exp + frac_div(a, b); | ||
209 | return a; | ||
210 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, | ||
211 | return parts_pick_nan(a, b, s); | ||
212 | } | ||
213 | |||
214 | + if ((ab_mask & float_cmask_denormal) && b->cls != float_class_zero) { | ||
215 | + float_raise(float_flag_input_denormal_used, s); | ||
216 | + } | ||
217 | + | ||
218 | a->sign = sign; | ||
219 | |||
220 | /* Inf / X */ | ||
221 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, | ||
222 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
223 | |||
224 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
225 | + if (ab_mask & float_cmask_denormal) { | ||
226 | + float_raise(float_flag_input_denormal_used, s); | ||
227 | + } | ||
228 | frac_modrem(a, b, mod_quot); | ||
229 | return a; | ||
230 | } | ||
231 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, | ||
232 | return a; | ||
233 | } | ||
234 | |||
235 | + if (ab_mask & float_cmask_denormal) { | ||
236 | + float_raise(float_flag_input_denormal_used, s); | ||
237 | + } | ||
238 | + | ||
239 | /* N % Inf; 0 % N */ | ||
240 | g_assert(b->cls == float_class_inf || a->cls == float_class_zero); | ||
241 | return a; | ||
242 | @@ -XXX,XX +XXX,XX @@ static void partsN(sqrt)(FloatPartsN *a, float_status *status, | ||
243 | if (unlikely(a->cls != float_class_normal)) { | ||
244 | switch (a->cls) { | ||
245 | case float_class_denormal: | ||
246 | + if (!a->sign) { | ||
247 | + /* -ve denormal will be InvalidOperation */ | ||
248 | + float_raise(float_flag_input_denormal_used, status); | ||
249 | + } | ||
250 | break; | ||
251 | case float_class_snan: | ||
252 | case float_class_qnan: | ||
253 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
254 | if ((flags & (minmax_isnum | minmax_isnumber)) | ||
255 | && !(ab_mask & float_cmask_snan) | ||
256 | && (ab_mask & ~float_cmask_qnan)) { | ||
257 | + if (ab_mask & float_cmask_denormal) { | ||
258 | + float_raise(float_flag_input_denormal_used, s); | ||
259 | + } | ||
260 | return is_nan(a->cls) ? b : a; | ||
261 | } | ||
262 | |||
263 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
264 | return parts_pick_nan(a, b, s); | ||
265 | } | ||
266 | |||
267 | + if (ab_mask & float_cmask_denormal) { | ||
268 | + float_raise(float_flag_input_denormal_used, s); | ||
269 | + } | ||
270 | + | ||
271 | a_exp = a->exp; | ||
272 | b_exp = b->exp; | ||
273 | |||
274 | @@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, | ||
275 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
276 | FloatRelation cmp; | ||
277 | |||
278 | + if (ab_mask & float_cmask_denormal) { | ||
279 | + float_raise(float_flag_input_denormal_used, s); | ||
280 | + } | ||
281 | + | ||
282 | if (a->sign != b->sign) { | ||
283 | goto a_sign; | ||
284 | } | ||
285 | @@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, | ||
286 | return float_relation_unordered; | ||
287 | } | ||
288 | |||
289 | + if (ab_mask & float_cmask_denormal) { | ||
290 | + float_raise(float_flag_input_denormal_used, s); | ||
291 | + } | ||
292 | + | ||
293 | if (ab_mask & float_cmask_zero) { | ||
294 | if (ab_mask == float_cmask_zero) { | ||
295 | return float_relation_equal; | ||
296 | @@ -XXX,XX +XXX,XX @@ static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) | ||
297 | case float_class_zero: | ||
298 | case float_class_inf: | ||
299 | break; | ||
300 | - case float_class_normal: | ||
301 | case float_class_denormal: | ||
302 | + float_raise(float_flag_input_denormal_used, s); | ||
303 | + /* fall through */ | ||
304 | + case float_class_normal: | ||
305 | a->exp += MIN(MAX(n, -0x10000), 0x10000); | ||
306 | break; | ||
307 | default: | ||
308 | @@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) | ||
309 | if (unlikely(a->cls != float_class_normal)) { | ||
310 | switch (a->cls) { | ||
311 | case float_class_denormal: | ||
312 | + if (!a->sign) { | ||
313 | + /* -ve denormal will be InvalidOperation */ | ||
314 | + float_raise(float_flag_input_denormal_used, s); | ||
315 | + } | ||
316 | break; | ||
317 | case float_class_snan: | ||
318 | case float_class_qnan: | ||
319 | -- | ||
320 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | Currently we handle flushing of output denormals in uncanon_normal | |
2 | always before we deal with rounding. This works for architectures | ||
3 | that detect tininess before rounding, but is usually not the right | ||
4 | place when the architecture detects tininess after rounding. For | ||
5 | example, for x86 the SDM states that the MXCSR FTZ control bit causes | ||
6 | outputs to be flushed to zero "when it detects a floating-point | ||
7 | underflow condition". This means that we mustn't flush to zero if | ||
8 | the input is such that after rounding it is no longer tiny. | ||
9 | |||
10 | At least one of our guest architectures does underflow detection | ||
11 | after rounding but flushing of denormals before rounding (MIPS MSA); | ||
12 | this means we need to have a config knob for this that is separate | ||
13 | from our existing tininess_before_rounding setting. | ||
14 | |||
15 | Add an ftz_detection flag. For consistency with | ||
16 | tininess_before_rounding, we make it default to "detect ftz after | ||
17 | rounding"; this means that we need to explicitly set the flag to | ||
18 | "detect ftz before rounding" on every existing architecture that sets | ||
19 | flush_to_zero, so that this commit has no behaviour change. | ||
20 | (This means more code change here but for the long term a less | ||
21 | confusing API.) | ||
22 | |||
23 | For several architectures the current behaviour is either | ||
24 | definitely or possibly wrong; annotate those with TODO comments. | ||
25 | These architectures are definitely wrong (and should detect | ||
26 | ftz after rounding): | ||
27 | * x86 | ||
28 | * Alpha | ||
29 | |||
30 | For these architectures the spec is unclear: | ||
31 | * MIPS (for non-MSA) | ||
32 | * RX | ||
33 | * SH4 | ||
34 | |||
35 | PA-RISC makes ftz detection IMPDEF, but we aren't setting the | ||
36 | "tininess before rounding" setting that we ought to. | ||
37 | |||
38 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
39 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
40 | --- | ||
41 | include/fpu/softfloat-helpers.h | 11 +++++++++++ | ||
42 | include/fpu/softfloat-types.h | 18 ++++++++++++++++++ | ||
43 | target/mips/fpu_helper.h | 6 ++++++ | ||
44 | target/alpha/cpu.c | 7 +++++++ | ||
45 | target/arm/cpu.c | 1 + | ||
46 | target/hppa/fpu_helper.c | 11 +++++++++++ | ||
47 | target/i386/tcg/fpu_helper.c | 8 ++++++++ | ||
48 | target/mips/msa.c | 9 +++++++++ | ||
49 | target/ppc/cpu_init.c | 3 +++ | ||
50 | target/rx/cpu.c | 8 ++++++++ | ||
51 | target/sh4/cpu.c | 8 ++++++++ | ||
52 | target/tricore/helper.c | 1 + | ||
53 | tests/fp/fp-bench.c | 1 + | ||
54 | fpu/softfloat-parts.c.inc | 21 +++++++++++++++------ | ||
55 | 14 files changed, 107 insertions(+), 6 deletions(-) | ||
56 | |||
57 | diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/include/fpu/softfloat-helpers.h | ||
60 | +++ b/include/fpu/softfloat-helpers.h | ||
61 | @@ -XXX,XX +XXX,XX @@ static inline void set_flush_inputs_to_zero(bool val, float_status *status) | ||
62 | status->flush_inputs_to_zero = val; | ||
63 | } | ||
64 | |||
65 | +static inline void set_float_ftz_detection(FloatFTZDetection d, | ||
66 | + float_status *status) | ||
67 | +{ | ||
68 | + status->ftz_detection = d; | ||
69 | +} | ||
70 | + | ||
71 | static inline void set_default_nan_mode(bool val, float_status *status) | ||
72 | { | ||
73 | status->default_nan_mode = val; | ||
74 | @@ -XXX,XX +XXX,XX @@ static inline bool get_default_nan_mode(const float_status *status) | ||
75 | return status->default_nan_mode; | ||
76 | } | ||
77 | |||
78 | +static inline FloatFTZDetection get_float_ftz_detection(const float_status *status) | ||
79 | +{ | ||
80 | + return status->ftz_detection; | ||
81 | +} | ||
82 | + | ||
83 | #endif /* SOFTFLOAT_HELPERS_H */ | ||
84 | diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h | ||
85 | index XXXXXXX..XXXXXXX 100644 | ||
86 | --- a/include/fpu/softfloat-types.h | ||
87 | +++ b/include/fpu/softfloat-types.h | ||
88 | @@ -XXX,XX +XXX,XX @@ typedef enum __attribute__((__packed__)) { | ||
89 | float_infzeronan_suppress_invalid = (1 << 7), | ||
90 | } FloatInfZeroNaNRule; | ||
91 | |||
92 | +/* | ||
93 | + * When flush_to_zero is set, should we detect denormal results to | ||
94 | + * be flushed before or after rounding? For most architectures this | ||
95 | + * should be set to match the tininess_before_rounding setting, | ||
96 | + * but a few architectures, e.g. MIPS MSA, detect FTZ before | ||
97 | + * rounding but tininess after rounding. | ||
98 | + * | ||
99 | + * This enum is arranged so that the default if the target doesn't | ||
100 | + * configure it matches the default for tininess_before_rounding | ||
101 | + * (i.e. "after rounding"). | ||
102 | + */ | ||
103 | +typedef enum __attribute__((__packed__)) { | ||
104 | + float_ftz_after_rounding = 0, | ||
105 | + float_ftz_before_rounding = 1, | ||
106 | +} FloatFTZDetection; | ||
107 | + | ||
108 | /* | ||
109 | * Floating Point Status. Individual architectures may maintain | ||
110 | * several versions of float_status for different functions. The | ||
111 | @@ -XXX,XX +XXX,XX @@ typedef struct float_status { | ||
112 | bool tininess_before_rounding; | ||
113 | /* should denormalised results go to zero and set output_denormal_flushed? */ | ||
114 | bool flush_to_zero; | ||
115 | + /* do we detect and flush denormal results before or after rounding? */ | ||
116 | + FloatFTZDetection ftz_detection; | ||
117 | /* should denormalised inputs go to zero and set input_denormal_flushed? */ | ||
118 | bool flush_inputs_to_zero; | ||
119 | bool default_nan_mode; | ||
120 | diff --git a/target/mips/fpu_helper.h b/target/mips/fpu_helper.h | ||
121 | index XXXXXXX..XXXXXXX 100644 | ||
122 | --- a/target/mips/fpu_helper.h | ||
123 | +++ b/target/mips/fpu_helper.h | ||
124 | @@ -XXX,XX +XXX,XX @@ static inline void fp_reset(CPUMIPSState *env) | ||
125 | */ | ||
126 | set_float_2nan_prop_rule(float_2nan_prop_s_ab, | ||
127 | &env->active_fpu.fp_status); | ||
128 | + /* | ||
129 | + * TODO: the spec does't say clearly whether FTZ happens before | ||
130 | + * or after rounding for normal FPU operations. | ||
131 | + */ | ||
132 | + set_float_ftz_detection(float_ftz_before_rounding, | ||
133 | + &env->active_fpu.fp_status); | ||
134 | } | ||
135 | |||
136 | /* MSA */ | ||
137 | diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c | ||
138 | index XXXXXXX..XXXXXXX 100644 | ||
139 | --- a/target/alpha/cpu.c | ||
140 | +++ b/target/alpha/cpu.c | ||
141 | @@ -XXX,XX +XXX,XX @@ static void alpha_cpu_initfn(Object *obj) | ||
142 | set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); | ||
143 | /* Default NaN: sign bit clear, msb frac bit set */ | ||
144 | set_float_default_nan_pattern(0b01000000, &env->fp_status); | ||
145 | + /* | ||
146 | + * TODO: this is incorrect. The Alpha Architecture Handbook version 4 | ||
147 | + * section 4.7.7.11 says that we flush to zero for underflow cases, so | ||
148 | + * this should be float_ftz_after_rounding to match the | ||
149 | + * tininess_after_rounding (which is specified in section 4.7.5). | ||
150 | + */ | ||
151 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
152 | #if defined(CONFIG_USER_ONLY) | ||
153 | env->flags = ENV_FLAG_PS_USER | ENV_FLAG_FEN; | ||
154 | cpu_alpha_store_fpcr(env, (uint64_t)(FPCR_INVD | FPCR_DZED | FPCR_OVFD | ||
155 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
156 | index XXXXXXX..XXXXXXX 100644 | ||
157 | --- a/target/arm/cpu.c | ||
158 | +++ b/target/arm/cpu.c | ||
159 | @@ -XXX,XX +XXX,XX @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, | ||
160 | static void arm_set_default_fp_behaviours(float_status *s) | ||
161 | { | ||
162 | set_float_detect_tininess(float_tininess_before_rounding, s); | ||
163 | + set_float_ftz_detection(float_ftz_before_rounding, s); | ||
164 | set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); | ||
165 | set_float_3nan_prop_rule(float_3nan_prop_s_cab, s); | ||
166 | set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s); | ||
167 | diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c | ||
168 | index XXXXXXX..XXXXXXX 100644 | ||
169 | --- a/target/hppa/fpu_helper.c | ||
170 | +++ b/target/hppa/fpu_helper.c | ||
171 | @@ -XXX,XX +XXX,XX @@ void HELPER(loaded_fr0)(CPUHPPAState *env) | ||
172 | set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status); | ||
173 | /* Default NaN: sign bit clear, msb-1 frac bit set */ | ||
174 | set_float_default_nan_pattern(0b00100000, &env->fp_status); | ||
175 | + /* | ||
176 | + * "PA-RISC 2.0 Architecture" says it is IMPDEF whether the flushing | ||
177 | + * enabled by FPSR.D happens before or after rounding. We pick "before" | ||
178 | + * for consistency with tininess detection. | ||
179 | + */ | ||
180 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
181 | + /* | ||
182 | + * TODO: "PA-RISC 2.0 Architecture" chapter 10 says that we should | ||
183 | + * detect tininess before rounding, but we don't set that here so we | ||
184 | + * get the default tininess after rounding. | ||
185 | + */ | ||
186 | } | ||
187 | |||
188 | void cpu_hppa_loaded_fr0(CPUHPPAState *env) | ||
189 | diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c | ||
190 | index XXXXXXX..XXXXXXX 100644 | ||
191 | --- a/target/i386/tcg/fpu_helper.c | ||
192 | +++ b/target/i386/tcg/fpu_helper.c | ||
193 | @@ -XXX,XX +XXX,XX @@ void cpu_init_fp_statuses(CPUX86State *env) | ||
194 | set_float_default_nan_pattern(0b11000000, &env->fp_status); | ||
195 | set_float_default_nan_pattern(0b11000000, &env->mmx_status); | ||
196 | set_float_default_nan_pattern(0b11000000, &env->sse_status); | ||
197 | + /* | ||
198 | + * TODO: x86 does flush-to-zero detection after rounding (the SDM | ||
199 | + * section 10.2.3.3 on the FTZ bit of MXCSR says that we flush | ||
200 | + * when we detect underflow, which x86 does after rounding). | ||
201 | + */ | ||
202 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
203 | + set_float_ftz_detection(float_ftz_before_rounding, &env->mmx_status); | ||
204 | + set_float_ftz_detection(float_ftz_before_rounding, &env->sse_status); | ||
205 | } | ||
206 | |||
207 | static inline uint8_t save_exception_flags(CPUX86State *env) | ||
208 | diff --git a/target/mips/msa.c b/target/mips/msa.c | ||
209 | index XXXXXXX..XXXXXXX 100644 | ||
210 | --- a/target/mips/msa.c | ||
211 | +++ b/target/mips/msa.c | ||
212 | @@ -XXX,XX +XXX,XX @@ void msa_reset(CPUMIPSState *env) | ||
213 | /* tininess detected after rounding.*/ | ||
214 | set_float_detect_tininess(float_tininess_after_rounding, | ||
215 | &env->active_tc.msa_fp_status); | ||
216 | + /* | ||
217 | + * MSACSR.FS detects tiny results to flush to zero before rounding | ||
218 | + * (per "MIPS Architecture for Programmers Volume IV-j: The MIPS64 SIMD | ||
219 | + * Architecture Module, Revision 1.1" section 3.5.4), even though it | ||
220 | + * detects tininess after rounding for underflow purposes (section 3.4.2 | ||
221 | + * table 3.3). | ||
222 | + */ | ||
223 | + set_float_ftz_detection(float_ftz_before_rounding, | ||
224 | + &env->active_tc.msa_fp_status); | ||
225 | |||
226 | /* | ||
227 | * According to MIPS specifications, if one of the two operands is | ||
228 | diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c | ||
229 | index XXXXXXX..XXXXXXX 100644 | ||
230 | --- a/target/ppc/cpu_init.c | ||
231 | +++ b/target/ppc/cpu_init.c | ||
232 | @@ -XXX,XX +XXX,XX @@ static void ppc_cpu_reset_hold(Object *obj, ResetType type) | ||
233 | /* tininess for underflow is detected before rounding */ | ||
234 | set_float_detect_tininess(float_tininess_before_rounding, | ||
235 | &env->fp_status); | ||
236 | + /* Similarly for flush-to-zero */ | ||
237 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
238 | + | ||
239 | /* | ||
240 | * PowerPC propagation rules: | ||
241 | * 1. A if it sNaN or qNaN | ||
242 | diff --git a/target/rx/cpu.c b/target/rx/cpu.c | ||
243 | index XXXXXXX..XXXXXXX 100644 | ||
244 | --- a/target/rx/cpu.c | ||
245 | +++ b/target/rx/cpu.c | ||
246 | @@ -XXX,XX +XXX,XX @@ static void rx_cpu_reset_hold(Object *obj, ResetType type) | ||
247 | set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); | ||
248 | /* Default NaN value: sign bit clear, set frac msb */ | ||
249 | set_float_default_nan_pattern(0b01000000, &env->fp_status); | ||
250 | + /* | ||
251 | + * TODO: "RX Family RXv1 Instruction Set Architecture" is not 100% clear | ||
252 | + * on whether flush-to-zero should happen before or after rounding, but | ||
253 | + * section 1.3.2 says that it happens when underflow is detected, and | ||
254 | + * implies that underflow is detected after rounding. So this may not | ||
255 | + * be the correct setting. | ||
256 | + */ | ||
257 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
258 | } | ||
259 | |||
260 | static ObjectClass *rx_cpu_class_by_name(const char *cpu_model) | ||
261 | diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c | ||
262 | index XXXXXXX..XXXXXXX 100644 | ||
263 | --- a/target/sh4/cpu.c | ||
264 | +++ b/target/sh4/cpu.c | ||
265 | @@ -XXX,XX +XXX,XX @@ static void superh_cpu_reset_hold(Object *obj, ResetType type) | ||
266 | set_default_nan_mode(1, &env->fp_status); | ||
267 | /* sign bit clear, set all frac bits other than msb */ | ||
268 | set_float_default_nan_pattern(0b00111111, &env->fp_status); | ||
269 | + /* | ||
270 | + * TODO: "SH-4 CPU Core Architecture ADCS 7182230F" doesn't say whether | ||
271 | + * it detects tininess before or after rounding. Section 6.4 is clear | ||
272 | + * that flush-to-zero happens when the result underflows, though, so | ||
273 | + * either this should be "detect ftz after rounding" or else we should | ||
274 | + * be setting "detect tininess before rounding". | ||
275 | + */ | ||
276 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
277 | } | ||
278 | |||
279 | static void superh_cpu_disas_set_info(CPUState *cpu, disassemble_info *info) | ||
280 | diff --git a/target/tricore/helper.c b/target/tricore/helper.c | ||
281 | index XXXXXXX..XXXXXXX 100644 | ||
282 | --- a/target/tricore/helper.c | ||
283 | +++ b/target/tricore/helper.c | ||
284 | @@ -XXX,XX +XXX,XX @@ void fpu_set_state(CPUTriCoreState *env) | ||
285 | set_flush_inputs_to_zero(1, &env->fp_status); | ||
286 | set_flush_to_zero(1, &env->fp_status); | ||
287 | set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status); | ||
288 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
289 | set_default_nan_mode(1, &env->fp_status); | ||
290 | /* Default NaN pattern: sign bit clear, frac msb set */ | ||
291 | set_float_default_nan_pattern(0b01000000, &env->fp_status); | ||
292 | diff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c | ||
293 | index XXXXXXX..XXXXXXX 100644 | ||
294 | --- a/tests/fp/fp-bench.c | ||
295 | +++ b/tests/fp/fp-bench.c | ||
296 | @@ -XXX,XX +XXX,XX @@ static void run_bench(void) | ||
297 | set_float_3nan_prop_rule(float_3nan_prop_s_cab, &soft_status); | ||
298 | set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, &soft_status); | ||
299 | set_float_default_nan_pattern(0b01000000, &soft_status); | ||
300 | + set_float_ftz_detection(float_ftz_before_rounding, &soft_status); | ||
301 | |||
302 | f = bench_funcs[operation][precision]; | ||
303 | g_assert(f); | ||
304 | diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc | ||
305 | index XXXXXXX..XXXXXXX 100644 | ||
306 | --- a/fpu/softfloat-parts.c.inc | ||
307 | +++ b/fpu/softfloat-parts.c.inc | ||
308 | @@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, | ||
309 | p->frac_lo &= ~round_mask; | ||
310 | } | ||
311 | frac_shr(p, frac_shift); | ||
312 | - } else if (s->flush_to_zero) { | ||
313 | + } else if (s->flush_to_zero && | ||
314 | + s->ftz_detection == float_ftz_before_rounding) { | ||
315 | flags |= float_flag_output_denormal_flushed; | ||
316 | p->cls = float_class_zero; | ||
317 | exp = 0; | ||
318 | @@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, | ||
319 | exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal; | ||
320 | frac_shr(p, frac_shift); | ||
321 | |||
322 | - if (is_tiny && (flags & float_flag_inexact)) { | ||
323 | - flags |= float_flag_underflow; | ||
324 | - } | ||
325 | - if (exp == 0 && frac_eqz(p)) { | ||
326 | - p->cls = float_class_zero; | ||
327 | + if (is_tiny) { | ||
328 | + if (s->flush_to_zero) { | ||
329 | + assert(s->ftz_detection == float_ftz_after_rounding); | ||
330 | + flags |= float_flag_output_denormal_flushed; | ||
331 | + p->cls = float_class_zero; | ||
332 | + exp = 0; | ||
333 | + frac_clear(p); | ||
334 | + } else if (flags & float_flag_inexact) { | ||
335 | + flags |= float_flag_underflow; | ||
336 | + } | ||
337 | + if (exp == 0 && frac_eqz(p)) { | ||
338 | + p->cls = float_class_zero; | ||
339 | + } | ||
340 | } | ||
341 | } | ||
342 | p->exp = exp; | ||
343 | -- | ||
344 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The Armv8.7 FEAT_AFP feature defines three new control bits in | ||
2 | the FPCR: | ||
3 | * FPCR.AH: "alternate floating point mode"; this changes floating | ||
4 | point behaviour in a variety of ways, including: | ||
5 | - the sign of a default NaN is 1, not 0 | ||
6 | - if FPCR.FZ is also 1, denormals detected after rounding | ||
7 | with an unbounded exponent has been applied are flushed to zero | ||
8 | - FPCR.FZ does not cause denormalized inputs to be flushed to zero | ||
9 | - miscellaneous other corner-case behaviour changes | ||
10 | * FPCR.FIZ: flush denormalized numbers to zero on input for | ||
11 | most instructions | ||
12 | * FPCR.NEP: makes scalar SIMD operations merge the result with | ||
13 | higher vector elements in one of the source registers, instead | ||
14 | of zeroing the higher elements of the destination | ||
1 | 15 | ||
16 | This commit defines the new bits in the FPCR, and allows them to be | ||
17 | read or written when FEAT_AFP is implemented. Actual behaviour | ||
18 | changes will be implemented in subsequent commits. | ||
19 | |||
20 | Note that these are the first FPCR bits which don't appear in the | ||
21 | AArch32 FPSCR view of the register, and which share bit positions | ||
22 | with FPSR bits. | ||
23 | |||
24 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
25 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
26 | --- | ||
27 | target/arm/cpu-features.h | 5 +++++ | ||
28 | target/arm/cpu.h | 3 +++ | ||
29 | target/arm/vfp_helper.c | 11 ++++++++--- | ||
30 | 3 files changed, 16 insertions(+), 3 deletions(-) | ||
31 | |||
32 | diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/target/arm/cpu-features.h | ||
35 | +++ b/target/arm/cpu-features.h | ||
36 | @@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_hcx(const ARMISARegisters *id) | ||
37 | return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HCX) != 0; | ||
38 | } | ||
39 | |||
40 | +static inline bool isar_feature_aa64_afp(const ARMISARegisters *id) | ||
41 | +{ | ||
42 | + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, AFP) != 0; | ||
43 | +} | ||
44 | + | ||
45 | static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id) | ||
46 | { | ||
47 | return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0; | ||
48 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/arm/cpu.h | ||
51 | +++ b/target/arm/cpu.h | ||
52 | @@ -XXX,XX +XXX,XX @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val); | ||
53 | */ | ||
54 | |||
55 | /* FPCR bits */ | ||
56 | +#define FPCR_FIZ (1 << 0) /* Flush Inputs to Zero (FEAT_AFP) */ | ||
57 | +#define FPCR_AH (1 << 1) /* Alternate Handling (FEAT_AFP) */ | ||
58 | +#define FPCR_NEP (1 << 2) /* SIMD scalar ops preserve elts (FEAT_AFP) */ | ||
59 | #define FPCR_IOE (1 << 8) /* Invalid Operation exception trap enable */ | ||
60 | #define FPCR_DZE (1 << 9) /* Divide by Zero exception trap enable */ | ||
61 | #define FPCR_OFE (1 << 10) /* Overflow exception trap enable */ | ||
62 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/arm/vfp_helper.c | ||
65 | +++ b/target/arm/vfp_helper.c | ||
66 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask) | ||
67 | if (!cpu_isar_feature(any_fp16, cpu)) { | ||
68 | val &= ~FPCR_FZ16; | ||
69 | } | ||
70 | + if (!cpu_isar_feature(aa64_afp, cpu)) { | ||
71 | + val &= ~(FPCR_FIZ | FPCR_AH | FPCR_NEP); | ||
72 | + } | ||
73 | |||
74 | if (!cpu_isar_feature(aa64_ebf16, cpu)) { | ||
75 | val &= ~FPCR_EBF; | ||
76 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask) | ||
77 | * We don't implement trapped exception handling, so the | ||
78 | * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!) | ||
79 | * | ||
80 | - * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF | ||
81 | - * and FZ16. Len, Stride and LTPSIZE we just handled. Store those bits | ||
82 | + * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF, FZ16, | ||
83 | + * FIZ, AH, and NEP. | ||
84 | + * Len, Stride and LTPSIZE we just handled. Store those bits | ||
85 | * there, and zero any of the other FPCR bits and the RES0 and RAZ/WI | ||
86 | * bits. | ||
87 | */ | ||
88 | - val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 | FPCR_EBF; | ||
89 | + val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 | | ||
90 | + FPCR_EBF | FPCR_FIZ | FPCR_AH | FPCR_NEP; | ||
91 | env->vfp.fpcr &= ~mask; | ||
92 | env->vfp.fpcr |= val; | ||
93 | } | ||
94 | -- | ||
95 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Part of FEAT_AFP is the new control bit FPCR.FIZ. This bit affects | ||
2 | flushing of single and double precision denormal inputs to zero for | ||
3 | AArch64 floating point instructions. (For half-precision, the | ||
4 | existing FPCR.FZ16 control remains the only one.) | ||
1 | 5 | ||
6 | FPCR.FIZ differs from FPCR.FZ in that if we flush an input denormal | ||
7 | only because of FPCR.FIZ then we should *not* set the cumulative | ||
8 | exception bit FPSR.IDC. | ||
9 | |||
10 | FEAT_AFP also defines that in AArch64 the existing FPCR.FZ only | ||
11 | applies when FPCR.AH is 0. | ||
12 | |||
13 | We can implement this by setting the "flush inputs to zero" state | ||
14 | appropriately when FPCR is written, and by not reflecting the | ||
15 | float_flag_input_denormal status flag into FPSR reads when it is the | ||
16 | result only of FPSR.FIZ. | ||
17 | |||
18 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
19 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
20 | --- | ||
21 | target/arm/vfp_helper.c | 60 ++++++++++++++++++++++++++++++++++------- | ||
22 | 1 file changed, 50 insertions(+), 10 deletions(-) | ||
23 | |||
24 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/target/arm/vfp_helper.c | ||
27 | +++ b/target/arm/vfp_helper.c | ||
28 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits) | ||
29 | |||
30 | static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
31 | { | ||
32 | - uint32_t i = 0; | ||
33 | + uint32_t a32_flags = 0, a64_flags = 0; | ||
34 | |||
35 | - i |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
36 | - i |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
37 | - i |= get_float_exception_flags(&env->vfp.standard_fp_status); | ||
38 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
39 | + a32_flags |= get_float_exception_flags(&env->vfp.standard_fp_status); | ||
40 | /* FZ16 does not generate an input denormal exception. */ | ||
41 | - i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
42 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
43 | & ~float_flag_input_denormal_flushed); | ||
44 | - i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
45 | + a32_flags |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) | ||
46 | & ~float_flag_input_denormal_flushed); | ||
47 | - i |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) | ||
48 | + | ||
49 | + a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
50 | + a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
51 | & ~float_flag_input_denormal_flushed); | ||
52 | - return vfp_exceptbits_from_host(i); | ||
53 | + /* | ||
54 | + * Flushing an input denormal *only* because FPCR.FIZ == 1 does | ||
55 | + * not set FPSR.IDC; if FPCR.FZ is also set then this takes | ||
56 | + * precedence and IDC is set (see the FPUnpackBase pseudocode). | ||
57 | + * So squash it unless (FPCR.AH == 0 && FPCR.FZ == 1). | ||
58 | + * We only do this for the a64 flags because FIZ has no effect | ||
59 | + * on AArch32 even if it is set. | ||
60 | + */ | ||
61 | + if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) { | ||
62 | + a64_flags &= ~float_flag_input_denormal_flushed; | ||
63 | + } | ||
64 | + return vfp_exceptbits_from_host(a32_flags | a64_flags); | ||
65 | } | ||
66 | |||
67 | static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
68 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
69 | set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); | ||
70 | } | ||
71 | |||
72 | +static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) | ||
73 | +{ | ||
74 | + /* | ||
75 | + * Synchronize any pending exception-flag information in the | ||
76 | + * float_status values into env->vfp.fpsr, and then clear out | ||
77 | + * the float_status data. | ||
78 | + */ | ||
79 | + env->vfp.fpsr |= vfp_get_fpsr_from_host(env); | ||
80 | + vfp_clear_float_status_exc_flags(env); | ||
81 | +} | ||
82 | + | ||
83 | static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
84 | { | ||
85 | uint64_t changed = env->vfp.fpcr; | ||
86 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
87 | if (changed & FPCR_FZ) { | ||
88 | bool ftz_enabled = val & FPCR_FZ; | ||
89 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
90 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
91 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64); | ||
92 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a64); | ||
93 | + /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ | ||
94 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
95 | + } | ||
96 | + if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { | ||
97 | + /* | ||
98 | + * A64: Flush denormalized inputs to zero if FPCR.FIZ = 1, or | ||
99 | + * both FPCR.AH = 0 and FPCR.FZ = 1. | ||
100 | + */ | ||
101 | + bool fitz_enabled = (val & FPCR_FIZ) || | ||
102 | + (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ; | ||
103 | + set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status_a64); | ||
104 | } | ||
105 | if (changed & FPCR_DN) { | ||
106 | bool dnan_enabled = val & FPCR_DN; | ||
107 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
108 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
109 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
110 | } | ||
111 | + /* | ||
112 | + * If any bits changed that we look at in vfp_get_fpsr_from_host(), | ||
113 | + * we must sync the float_status flags into vfp.fpsr now (under the | ||
114 | + * old regime) before we update vfp.fpcr. | ||
115 | + */ | ||
116 | + if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { | ||
117 | + vfp_sync_and_clear_float_status_exc_flags(env); | ||
118 | + } | ||
119 | } | ||
120 | |||
121 | #else | ||
122 | -- | ||
123 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | When FPCR.AH is set, various behaviours of AArch64 floating point | ||
2 | operations which are controlled by softfloat config settings change: | ||
3 | * tininess and ftz detection before/after rounding | ||
4 | * NaN propagation order | ||
5 | * result of 0 * Inf + NaN | ||
6 | * default NaN value | ||
1 | 7 | ||
8 | When the guest changes the value of the AH bit, switch these config | ||
9 | settings on the fp_status_a64 and fp_status_f16_a64 float_status | ||
10 | fields. | ||
11 | |||
12 | This requires us to make the arm_set_default_fp_behaviours() function | ||
13 | global, since we now need to call it from cpu.c and vfp_helper.c; we | ||
14 | move it to vfp_helper.c so it can be next to the new | ||
15 | arm_set_ah_fp_behaviours(). | ||
16 | |||
17 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
18 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
19 | --- | ||
20 | target/arm/internals.h | 4 +++ | ||
21 | target/arm/cpu.c | 23 ---------------- | ||
22 | target/arm/vfp_helper.c | 58 ++++++++++++++++++++++++++++++++++++++++- | ||
23 | 3 files changed, 61 insertions(+), 24 deletions(-) | ||
24 | |||
25 | diff --git a/target/arm/internals.h b/target/arm/internals.h | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/target/arm/internals.h | ||
28 | +++ b/target/arm/internals.h | ||
29 | @@ -XXX,XX +XXX,XX @@ uint64_t gt_virt_cnt_offset(CPUARMState *env); | ||
30 | * all EL1" scope; this covers stage 1 and stage 2. | ||
31 | */ | ||
32 | int alle1_tlbmask(CPUARMState *env); | ||
33 | + | ||
34 | +/* Set the float_status behaviour to match the Arm defaults */ | ||
35 | +void arm_set_default_fp_behaviours(float_status *s); | ||
36 | + | ||
37 | #endif | ||
38 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/target/arm/cpu.c | ||
41 | +++ b/target/arm/cpu.c | ||
42 | @@ -XXX,XX +XXX,XX @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, | ||
43 | QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node); | ||
44 | } | ||
45 | |||
46 | -/* | ||
47 | - * Set the float_status behaviour to match the Arm defaults: | ||
48 | - * * tininess-before-rounding | ||
49 | - * * 2-input NaN propagation prefers SNaN over QNaN, and then | ||
50 | - * operand A over operand B (see FPProcessNaNs() pseudocode) | ||
51 | - * * 3-input NaN propagation prefers SNaN over QNaN, and then | ||
52 | - * operand C over A over B (see FPProcessNaNs3() pseudocode, | ||
53 | - * but note that for QEMU muladd is a * b + c, whereas for | ||
54 | - * the pseudocode function the arguments are in the order c, a, b. | ||
55 | - * * 0 * Inf + NaN returns the default NaN if the input NaN is quiet, | ||
56 | - * and the input NaN if it is signalling | ||
57 | - * * Default NaN has sign bit clear, msb frac bit set | ||
58 | - */ | ||
59 | -static void arm_set_default_fp_behaviours(float_status *s) | ||
60 | -{ | ||
61 | - set_float_detect_tininess(float_tininess_before_rounding, s); | ||
62 | - set_float_ftz_detection(float_ftz_before_rounding, s); | ||
63 | - set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); | ||
64 | - set_float_3nan_prop_rule(float_3nan_prop_s_cab, s); | ||
65 | - set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s); | ||
66 | - set_float_default_nan_pattern(0b01000000, s); | ||
67 | -} | ||
68 | - | ||
69 | static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque) | ||
70 | { | ||
71 | /* Reset a single ARMCPRegInfo register */ | ||
72 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
73 | index XXXXXXX..XXXXXXX 100644 | ||
74 | --- a/target/arm/vfp_helper.c | ||
75 | +++ b/target/arm/vfp_helper.c | ||
76 | @@ -XXX,XX +XXX,XX @@ | ||
77 | #include "exec/helper-proto.h" | ||
78 | #include "internals.h" | ||
79 | #include "cpu-features.h" | ||
80 | +#include "fpu/softfloat.h" | ||
81 | #ifdef CONFIG_TCG | ||
82 | #include "qemu/log.h" | ||
83 | -#include "fpu/softfloat.h" | ||
84 | #endif | ||
85 | |||
86 | /* VFP support. We follow the convention used for VFP instructions: | ||
87 | Single precision routines have a "s" suffix, double precision a | ||
88 | "d" suffix. */ | ||
89 | |||
90 | +/* | ||
91 | + * Set the float_status behaviour to match the Arm defaults: | ||
92 | + * * tininess-before-rounding | ||
93 | + * * 2-input NaN propagation prefers SNaN over QNaN, and then | ||
94 | + * operand A over operand B (see FPProcessNaNs() pseudocode) | ||
95 | + * * 3-input NaN propagation prefers SNaN over QNaN, and then | ||
96 | + * operand C over A over B (see FPProcessNaNs3() pseudocode, | ||
97 | + * but note that for QEMU muladd is a * b + c, whereas for | ||
98 | + * the pseudocode function the arguments are in the order c, a, b. | ||
99 | + * * 0 * Inf + NaN returns the default NaN if the input NaN is quiet, | ||
100 | + * and the input NaN if it is signalling | ||
101 | + * * Default NaN has sign bit clear, msb frac bit set | ||
102 | + */ | ||
103 | +void arm_set_default_fp_behaviours(float_status *s) | ||
104 | +{ | ||
105 | + set_float_detect_tininess(float_tininess_before_rounding, s); | ||
106 | + set_float_ftz_detection(float_ftz_before_rounding, s); | ||
107 | + set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); | ||
108 | + set_float_3nan_prop_rule(float_3nan_prop_s_cab, s); | ||
109 | + set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s); | ||
110 | + set_float_default_nan_pattern(0b01000000, s); | ||
111 | +} | ||
112 | + | ||
113 | +/* | ||
114 | + * Set the float_status behaviour to match the FEAT_AFP | ||
115 | + * FPCR.AH=1 requirements: | ||
116 | + * * tininess-after-rounding | ||
117 | + * * 2-input NaN propagation prefers the first NaN | ||
118 | + * * 3-input NaN propagation prefers a over b over c | ||
119 | + * * 0 * Inf + NaN always returns the input NaN and doesn't | ||
120 | + * set Invalid for a QNaN | ||
121 | + * * default NaN has sign bit set, msb frac bit set | ||
122 | + */ | ||
123 | +static void arm_set_ah_fp_behaviours(float_status *s) | ||
124 | +{ | ||
125 | + set_float_detect_tininess(float_tininess_after_rounding, s); | ||
126 | + set_float_ftz_detection(float_ftz_after_rounding, s); | ||
127 | + set_float_2nan_prop_rule(float_2nan_prop_ab, s); | ||
128 | + set_float_3nan_prop_rule(float_3nan_prop_abc, s); | ||
129 | + set_float_infzeronan_rule(float_infzeronan_dnan_never | | ||
130 | + float_infzeronan_suppress_invalid, s); | ||
131 | + set_float_default_nan_pattern(0b11000000, s); | ||
132 | +} | ||
133 | + | ||
134 | #ifdef CONFIG_TCG | ||
135 | |||
136 | /* Convert host exception flags to vfp form. */ | ||
137 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
138 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
139 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
140 | } | ||
141 | + if (changed & FPCR_AH) { | ||
142 | + bool ah_enabled = val & FPCR_AH; | ||
143 | + | ||
144 | + if (ah_enabled) { | ||
145 | + /* Change behaviours for A64 FP operations */ | ||
146 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); | ||
147 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
148 | + } else { | ||
149 | + arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
150 | + arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
151 | + } | ||
152 | + } | ||
153 | /* | ||
154 | * If any bits changed that we look at in vfp_get_fpsr_from_host(), | ||
155 | * we must sync the float_status flags into vfp.fpsr now (under the | ||
156 | -- | ||
157 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | When FPCR.AH = 1, some of the cumulative exception flags in the FPSR | ||
2 | behave slightly differently for A64 operations: | ||
3 | * IDC is set when a denormal input is used without flushing | ||
4 | * IXC (Inexact) is set when an output denormal is flushed to zero | ||
1 | 5 | ||
6 | Update vfp_get_fpsr_from_host() to do this. | ||
7 | |||
8 | Note that because half-precision operations never set IDC, we now | ||
9 | need to add float_flag_input_denormal_used to the set we mask out of | ||
10 | fp_status_f16_a64. | ||
11 | |||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | --- | ||
15 | target/arm/vfp_helper.c | 17 ++++++++++++++--- | ||
16 | 1 file changed, 14 insertions(+), 3 deletions(-) | ||
17 | |||
18 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/arm/vfp_helper.c | ||
21 | +++ b/target/arm/vfp_helper.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static void arm_set_ah_fp_behaviours(float_status *s) | ||
23 | #ifdef CONFIG_TCG | ||
24 | |||
25 | /* Convert host exception flags to vfp form. */ | ||
26 | -static inline uint32_t vfp_exceptbits_from_host(int host_bits) | ||
27 | +static inline uint32_t vfp_exceptbits_from_host(int host_bits, bool ah) | ||
28 | { | ||
29 | uint32_t target_bits = 0; | ||
30 | |||
31 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits) | ||
32 | if (host_bits & float_flag_input_denormal_flushed) { | ||
33 | target_bits |= FPSR_IDC; | ||
34 | } | ||
35 | + /* | ||
36 | + * With FPCR.AH, IDC is set when an input denormal is used, | ||
37 | + * and flushing an output denormal to zero sets both IXC and UFC. | ||
38 | + */ | ||
39 | + if (ah && (host_bits & float_flag_input_denormal_used)) { | ||
40 | + target_bits |= FPSR_IDC; | ||
41 | + } | ||
42 | + if (ah && (host_bits & float_flag_output_denormal_flushed)) { | ||
43 | + target_bits |= FPSR_IXC; | ||
44 | + } | ||
45 | return target_bits; | ||
46 | } | ||
47 | |||
48 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
49 | |||
50 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
51 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
52 | - & ~float_flag_input_denormal_flushed); | ||
53 | + & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
54 | /* | ||
55 | * Flushing an input denormal *only* because FPCR.FIZ == 1 does | ||
56 | * not set FPSR.IDC; if FPCR.FZ is also set then this takes | ||
57 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
58 | if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) { | ||
59 | a64_flags &= ~float_flag_input_denormal_flushed; | ||
60 | } | ||
61 | - return vfp_exceptbits_from_host(a32_flags | a64_flags); | ||
62 | + return vfp_exceptbits_from_host(a64_flags, env->vfp.fpcr & FPCR_AH) | | ||
63 | + vfp_exceptbits_from_host(a32_flags, false); | ||
64 | } | ||
65 | |||
66 | static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
67 | -- | ||
68 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | We are going to need to generate different code in some cases when | ||
2 | FPCR.AH is 1. For example: | ||
3 | * Floating point neg and abs must not flip the sign bit of NaNs | ||
4 | * some insns (FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS, and various | ||
5 | BFCVT and BFM bfloat16 ops) need to use a different float_status | ||
6 | to the usual one | ||
1 | 7 | ||
8 | Encode FPCR.AH into the A64 tbflags, so we can refer to it at | ||
9 | translate time. | ||
10 | |||
11 | Because we now have a bit in FPCR that affects codegen, we can't mark | ||
12 | the AArch64 FPCR register as being SUPPRESS_TB_END any more; writes | ||
13 | to it will now end the TB and trigger a regeneration of hflags. | ||
14 | |||
15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
16 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
17 | --- | ||
18 | target/arm/cpu.h | 1 + | ||
19 | target/arm/tcg/translate.h | 2 ++ | ||
20 | target/arm/helper.c | 2 +- | ||
21 | target/arm/tcg/hflags.c | 4 ++++ | ||
22 | target/arm/tcg/translate-a64.c | 1 + | ||
23 | 5 files changed, 9 insertions(+), 1 deletion(-) | ||
24 | |||
25 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/target/arm/cpu.h | ||
28 | +++ b/target/arm/cpu.h | ||
29 | @@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, NV2, 34, 1) | ||
30 | FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1) | ||
31 | /* Set if FEAT_NV2 RAM accesses are big-endian */ | ||
32 | FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1) | ||
33 | +FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */ | ||
34 | |||
35 | /* | ||
36 | * Helpers for using the above. Note that only the A64 accessors use | ||
37 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/translate.h | ||
40 | +++ b/target/arm/tcg/translate.h | ||
41 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { | ||
42 | bool nv2_mem_e20; | ||
43 | /* True if NV2 enabled and NV2 RAM accesses are big-endian */ | ||
44 | bool nv2_mem_be; | ||
45 | + /* True if FPCR.AH is 1 (alternate floating point handling) */ | ||
46 | + bool fpcr_ah; | ||
47 | /* | ||
48 | * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI. | ||
49 | * < 0, set by the current instruction. | ||
50 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
51 | index XXXXXXX..XXXXXXX 100644 | ||
52 | --- a/target/arm/helper.c | ||
53 | +++ b/target/arm/helper.c | ||
54 | @@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo v8_cp_reginfo[] = { | ||
55 | .writefn = aa64_daif_write, .resetfn = arm_cp_reset_ignore }, | ||
56 | { .name = "FPCR", .state = ARM_CP_STATE_AA64, | ||
57 | .opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 4, | ||
58 | - .access = PL0_RW, .type = ARM_CP_FPU | ARM_CP_SUPPRESS_TB_END, | ||
59 | + .access = PL0_RW, .type = ARM_CP_FPU, | ||
60 | .readfn = aa64_fpcr_read, .writefn = aa64_fpcr_write }, | ||
61 | { .name = "FPSR", .state = ARM_CP_STATE_AA64, | ||
62 | .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4, | ||
63 | diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c | ||
64 | index XXXXXXX..XXXXXXX 100644 | ||
65 | --- a/target/arm/tcg/hflags.c | ||
66 | +++ b/target/arm/tcg/hflags.c | ||
67 | @@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, | ||
68 | DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx)); | ||
69 | } | ||
70 | |||
71 | + if (env->vfp.fpcr & FPCR_AH) { | ||
72 | + DP_TBFLAG_A64(flags, AH, 1); | ||
73 | + } | ||
74 | + | ||
75 | return rebuild_hflags_common(env, fp_el, mmu_idx, flags); | ||
76 | } | ||
77 | |||
78 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
79 | index XXXXXXX..XXXXXXX 100644 | ||
80 | --- a/target/arm/tcg/translate-a64.c | ||
81 | +++ b/target/arm/tcg/translate-a64.c | ||
82 | @@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, | ||
83 | dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); | ||
84 | dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); | ||
85 | dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); | ||
86 | + dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); | ||
87 | dc->vec_len = 0; | ||
88 | dc->vec_stride = 0; | ||
89 | dc->cp_regs = arm_cpu->cp_regs; | ||
90 | -- | ||
91 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | When FPCR.AH is 1, the behaviour of some instructions changes: | |
2 | * AdvSIMD BFCVT, BFCVTN, BFCVTN2, BFMLALB, BFMLALT | ||
3 | * SVE BFCVT, BFCVTNT, BFMLALB, BFMLALT, BFMLSLB, BFMLSLT | ||
4 | * SME BFCVT, BFCVTN, BFMLAL, BFMLSL (these are all in SME2 which | ||
5 | QEMU does not yet implement) | ||
6 | * FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS | ||
7 | |||
8 | The behaviour change is: | ||
9 | * the instructions do not update the FPSR cumulative exception flags | ||
10 | * trapped floating point exceptions are disabled (a no-op for QEMU, | ||
11 | which doesn't implement FPCR.{IDE,IXE,UFE,OFE,DZE,IOE}) | ||
12 | * rounding is always round-to-nearest-even regardless of FPCR.RMode | ||
13 | * denormalized inputs and outputs are always flushed to zero, as if | ||
14 | FPCR.{FZ,FIZ} is {1,1} | ||
15 | * FPCR.FZ16 is still honoured for half-precision inputs | ||
16 | |||
17 | (See the Arm ARM DDI0487L.a section A1.5.9.) | ||
18 | |||
19 | We can provide all these behaviours with another pair of float_status fields | ||
20 | which we use only for these insns, when FPCR.AH is 1. These float_status | ||
21 | fields will always have: | ||
22 | * flush_to_zero and flush_inputs_to_zero set for the non-F16 field | ||
23 | * rounding mode set to round-to-nearest-even | ||
24 | and so the only FPCR fields they need to honour are DN and FZ16. | ||
25 | |||
26 | In this commit we only define the new fp_status fields and give them | ||
27 | the required behaviour when FPSR is updated. In subsequent commits | ||
28 | we will arrange to use this new fp_status field for the instructions | ||
29 | that should be affected by FPCR.AH in this way. | ||
30 | |||
31 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
32 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
33 | --- | ||
34 | target/arm/cpu.h | 15 +++++++++++++++ | ||
35 | target/arm/internals.h | 2 ++ | ||
36 | target/arm/tcg/translate.h | 14 ++++++++++++++ | ||
37 | target/arm/cpu.c | 4 ++++ | ||
38 | target/arm/vfp_helper.c | 13 ++++++++++++- | ||
39 | 5 files changed, 47 insertions(+), 1 deletion(-) | ||
40 | |||
41 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/arm/cpu.h | ||
44 | +++ b/target/arm/cpu.h | ||
45 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
46 | * standard_fp_status : the ARM "Standard FPSCR Value" | ||
47 | * standard_fp_status_fp16 : used for half-precision | ||
48 | * calculations with the ARM "Standard FPSCR Value" | ||
49 | + * ah_fp_status: used for the A64 insns which change behaviour | ||
50 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
51 | + * and the reciprocal and square root estimate/step insns) | ||
52 | + * ah_fp_status_f16: used for the A64 insns which change behaviour | ||
53 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
54 | + * and the reciprocal and square root estimate/step insns); | ||
55 | + * for half-precision | ||
56 | * | ||
57 | * Half-precision operations are governed by a separate | ||
58 | * flush-to-zero control bit in FPSCR:FZ16. We pass a separate | ||
59 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
60 | * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | ||
61 | * using a fixed value for it. | ||
62 | * | ||
63 | + * The ah_fp_status is needed because some insns have different | ||
64 | + * behaviour when FPCR.AH == 1: they don't update cumulative | ||
65 | + * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
66 | + * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
67 | + * which means we need an ah_fp_status_f16 as well. | ||
68 | + * | ||
69 | * To avoid having to transfer exception bits around, we simply | ||
70 | * say that the FPSCR cumulative exception flags are the logical | ||
71 | * OR of the flags in the four fp statuses. This relies on the | ||
72 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
73 | float_status fp_status_f16_a64; | ||
74 | float_status standard_fp_status; | ||
75 | float_status standard_fp_status_f16; | ||
76 | + float_status ah_fp_status; | ||
77 | + float_status ah_fp_status_f16; | ||
78 | |||
79 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ | ||
80 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ | ||
81 | diff --git a/target/arm/internals.h b/target/arm/internals.h | ||
82 | index XXXXXXX..XXXXXXX 100644 | ||
83 | --- a/target/arm/internals.h | ||
84 | +++ b/target/arm/internals.h | ||
85 | @@ -XXX,XX +XXX,XX @@ int alle1_tlbmask(CPUARMState *env); | ||
86 | |||
87 | /* Set the float_status behaviour to match the Arm defaults */ | ||
88 | void arm_set_default_fp_behaviours(float_status *s); | ||
89 | +/* Set the float_status behaviour to match Arm FPCR.AH=1 behaviour */ | ||
90 | +void arm_set_ah_fp_behaviours(float_status *s); | ||
91 | |||
92 | #endif | ||
93 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | ||
94 | index XXXXXXX..XXXXXXX 100644 | ||
95 | --- a/target/arm/tcg/translate.h | ||
96 | +++ b/target/arm/tcg/translate.h | ||
97 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { | ||
98 | FPST_A64, | ||
99 | FPST_A32_F16, | ||
100 | FPST_A64_F16, | ||
101 | + FPST_AH, | ||
102 | + FPST_AH_F16, | ||
103 | FPST_STD, | ||
104 | FPST_STD_F16, | ||
105 | } ARMFPStatusFlavour; | ||
106 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { | ||
107 | * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
108 | * FPST_A64_F16 | ||
109 | * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
110 | + * FPST_AH: | ||
111 | + * for AArch64 operations which change behaviour when AH=1 (specifically, | ||
112 | + * bfloat16 conversions and multiplies, and the reciprocal and square root | ||
113 | + * estimate/step insns) | ||
114 | + * FPST_AH_F16: | ||
115 | + * ditto, but for half-precision operations | ||
116 | * FPST_STD | ||
117 | * for A32/T32 Neon operations using the "standard FPSCR value" | ||
118 | * FPST_STD_F16 | ||
119 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) | ||
120 | case FPST_A64_F16: | ||
121 | offset = offsetof(CPUARMState, vfp.fp_status_f16_a64); | ||
122 | break; | ||
123 | + case FPST_AH: | ||
124 | + offset = offsetof(CPUARMState, vfp.ah_fp_status); | ||
125 | + break; | ||
126 | + case FPST_AH_F16: | ||
127 | + offset = offsetof(CPUARMState, vfp.ah_fp_status_f16); | ||
128 | + break; | ||
129 | case FPST_STD: | ||
130 | offset = offsetof(CPUARMState, vfp.standard_fp_status); | ||
131 | break; | ||
132 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
133 | index XXXXXXX..XXXXXXX 100644 | ||
134 | --- a/target/arm/cpu.c | ||
135 | +++ b/target/arm/cpu.c | ||
136 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
137 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
138 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
139 | arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16); | ||
140 | + arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | ||
141 | + set_flush_to_zero(1, &env->vfp.ah_fp_status); | ||
142 | + set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | ||
143 | + arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status_f16); | ||
144 | |||
145 | #ifndef CONFIG_USER_ONLY | ||
146 | if (kvm_enabled()) { | ||
147 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
148 | index XXXXXXX..XXXXXXX 100644 | ||
149 | --- a/target/arm/vfp_helper.c | ||
150 | +++ b/target/arm/vfp_helper.c | ||
151 | @@ -XXX,XX +XXX,XX @@ void arm_set_default_fp_behaviours(float_status *s) | ||
152 | * set Invalid for a QNaN | ||
153 | * * default NaN has sign bit set, msb frac bit set | ||
154 | */ | ||
155 | -static void arm_set_ah_fp_behaviours(float_status *s) | ||
156 | +void arm_set_ah_fp_behaviours(float_status *s) | ||
157 | { | ||
158 | set_float_detect_tininess(float_tininess_after_rounding, s); | ||
159 | set_float_ftz_detection(float_ftz_after_rounding, s); | ||
160 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
161 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
162 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
163 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
164 | + /* | ||
165 | + * We do not merge in flags from ah_fp_status or ah_fp_status_f16, because | ||
166 | + * they are used for insns that must not set the cumulative exception bits. | ||
167 | + */ | ||
168 | + | ||
169 | /* | ||
170 | * Flushing an input denormal *only* because FPCR.FIZ == 1 does | ||
171 | * not set FPSR.IDC; if FPCR.FZ is also set then this takes | ||
172 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
173 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
174 | set_float_exception_flags(0, &env->vfp.standard_fp_status); | ||
175 | set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); | ||
176 | + set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
177 | + set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
178 | } | ||
179 | |||
180 | static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) | ||
181 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
182 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
183 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
184 | set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | ||
185 | + set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
186 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
187 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
188 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | ||
189 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
190 | } | ||
191 | if (changed & FPCR_FZ) { | ||
192 | bool ftz_enabled = val & FPCR_FZ; | ||
193 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
194 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
195 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
196 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
197 | + set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); | ||
198 | + set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status_f16); | ||
199 | } | ||
200 | if (changed & FPCR_AH) { | ||
201 | bool ah_enabled = val & FPCR_AH; | ||
202 | -- | ||
203 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | For the instructions FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS, use | ||
2 | FPST_FPCR_AH or FPST_FPCR_AH_F16 when FPCR.AH is 1, so that they get | ||
3 | the required behaviour changes. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/translate-a64.h | 13 ++++ | ||
9 | target/arm/tcg/translate-a64.c | 119 +++++++++++++++++++++++++-------- | ||
10 | target/arm/tcg/translate-sve.c | 30 ++++++--- | ||
11 | 3 files changed, 127 insertions(+), 35 deletions(-) | ||
12 | |||
13 | diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/arm/tcg/translate-a64.h | ||
16 | +++ b/target/arm/tcg/translate-a64.h | ||
17 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno) | ||
18 | return ret; | ||
19 | } | ||
20 | |||
21 | +/* | ||
22 | + * Return the ARMFPStatusFlavour to use based on element size and | ||
23 | + * whether FPCR.AH is set. | ||
24 | + */ | ||
25 | +static inline ARMFPStatusFlavour select_ah_fpst(DisasContext *s, MemOp esz) | ||
26 | +{ | ||
27 | + if (s->fpcr_ah) { | ||
28 | + return esz == MO_16 ? FPST_AH_F16 : FPST_AH; | ||
29 | + } else { | ||
30 | + return esz == MO_16 ? FPST_A64_F16 : FPST_A64; | ||
31 | + } | ||
32 | +} | ||
33 | + | ||
34 | bool disas_sve(DisasContext *, uint32_t); | ||
35 | bool disas_sme(DisasContext *, uint32_t); | ||
36 | |||
37 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/translate-a64.c | ||
40 | +++ b/target/arm/tcg/translate-a64.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, | ||
42 | * an out-of-line helper. | ||
43 | */ | ||
44 | static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, | ||
45 | - int rm, bool is_fp16, int data, | ||
46 | + int rm, ARMFPStatusFlavour fpsttype, int data, | ||
47 | gen_helper_gvec_3_ptr *fn) | ||
48 | { | ||
49 | - TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); | ||
50 | + TCGv_ptr fpst = fpstatus_ptr(fpsttype); | ||
51 | tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), | ||
52 | vec_full_reg_offset(s, rn), | ||
53 | vec_full_reg_offset(s, rm), fpst, | ||
54 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar { | ||
55 | void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); | ||
56 | } FPScalar; | ||
57 | |||
58 | -static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
59 | +static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
60 | + const FPScalar *f, | ||
61 | + ARMFPStatusFlavour fpsttype) | ||
62 | { | ||
63 | switch (a->esz) { | ||
64 | case MO_64: | ||
65 | if (fp_access_check(s)) { | ||
66 | TCGv_i64 t0 = read_fp_dreg(s, a->rn); | ||
67 | TCGv_i64 t1 = read_fp_dreg(s, a->rm); | ||
68 | - f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); | ||
69 | + f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
70 | write_fp_dreg(s, a->rd, t0); | ||
71 | } | ||
72 | break; | ||
73 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
74 | if (fp_access_check(s)) { | ||
75 | TCGv_i32 t0 = read_fp_sreg(s, a->rn); | ||
76 | TCGv_i32 t1 = read_fp_sreg(s, a->rm); | ||
77 | - f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); | ||
78 | + f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
79 | write_fp_sreg(s, a->rd, t0); | ||
80 | } | ||
81 | break; | ||
82 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
83 | if (fp_access_check(s)) { | ||
84 | TCGv_i32 t0 = read_fp_hreg(s, a->rn); | ||
85 | TCGv_i32 t1 = read_fp_hreg(s, a->rm); | ||
86 | - f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); | ||
87 | + f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
88 | write_fp_sreg(s, a->rd, t0); | ||
89 | } | ||
90 | break; | ||
91 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
92 | return true; | ||
93 | } | ||
94 | |||
95 | +static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
96 | +{ | ||
97 | + return do_fp3_scalar_with_fpsttype(s, a, f, | ||
98 | + a->esz == MO_16 ? | ||
99 | + FPST_A64_F16 : FPST_A64); | ||
100 | +} | ||
101 | + | ||
102 | +static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
103 | +{ | ||
104 | + return do_fp3_scalar_with_fpsttype(s, a, f, select_ah_fpst(s, a->esz)); | ||
105 | +} | ||
106 | + | ||
107 | static const FPScalar f_scalar_fadd = { | ||
108 | gen_helper_vfp_addh, | ||
109 | gen_helper_vfp_adds, | ||
110 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_frecps = { | ||
111 | gen_helper_recpsf_f32, | ||
112 | gen_helper_recpsf_f64, | ||
113 | }; | ||
114 | -TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps) | ||
115 | +TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps) | ||
116 | |||
117 | static const FPScalar f_scalar_frsqrts = { | ||
118 | gen_helper_rsqrtsf_f16, | ||
119 | gen_helper_rsqrtsf_f32, | ||
120 | gen_helper_rsqrtsf_f64, | ||
121 | }; | ||
122 | -TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts) | ||
123 | +TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts) | ||
124 | |||
125 | static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, | ||
126 | const FPScalar *f, bool swap) | ||
127 | @@ -XXX,XX +XXX,XX @@ TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) | ||
128 | TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) | ||
129 | TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) | ||
130 | |||
131 | -static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | ||
132 | - gen_helper_gvec_3_ptr * const fns[3]) | ||
133 | +static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a, | ||
134 | + int data, | ||
135 | + gen_helper_gvec_3_ptr * const fns[3], | ||
136 | + ARMFPStatusFlavour fpsttype) | ||
137 | { | ||
138 | MemOp esz = a->esz; | ||
139 | int check = fp_access_check_vector_hsd(s, a->q, esz); | ||
140 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | ||
141 | return check == 0; | ||
142 | } | ||
143 | |||
144 | - gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, | ||
145 | - esz == MO_16, data, fns[esz - 1]); | ||
146 | + gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype, | ||
147 | + data, fns[esz - 1]); | ||
148 | return true; | ||
149 | } | ||
150 | |||
151 | +static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | ||
152 | + gen_helper_gvec_3_ptr * const fns[3]) | ||
153 | +{ | ||
154 | + return do_fp3_vector_with_fpsttype(s, a, data, fns, | ||
155 | + a->esz == MO_16 ? | ||
156 | + FPST_A64_F16 : FPST_A64); | ||
157 | +} | ||
158 | + | ||
159 | +static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data, | ||
160 | + gen_helper_gvec_3_ptr * const f[3]) | ||
161 | +{ | ||
162 | + return do_fp3_vector_with_fpsttype(s, a, data, f, | ||
163 | + select_ah_fpst(s, a->esz)); | ||
164 | +} | ||
165 | + | ||
166 | static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { | ||
167 | gen_helper_gvec_fadd_h, | ||
168 | gen_helper_gvec_fadd_s, | ||
169 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { | ||
170 | gen_helper_gvec_recps_s, | ||
171 | gen_helper_gvec_recps_d, | ||
172 | }; | ||
173 | -TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps) | ||
174 | +TRANS(FRECPS_v, do_fp3_vector_ah, a, 0, f_vector_frecps) | ||
175 | |||
176 | static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { | ||
177 | gen_helper_gvec_rsqrts_h, | ||
178 | gen_helper_gvec_rsqrts_s, | ||
179 | gen_helper_gvec_rsqrts_d, | ||
180 | }; | ||
181 | -TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts) | ||
182 | +TRANS(FRSQRTS_v, do_fp3_vector_ah, a, 0, f_vector_frsqrts) | ||
183 | |||
184 | static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { | ||
185 | gen_helper_gvec_faddp_h, | ||
186 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, | ||
187 | } | ||
188 | |||
189 | gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, | ||
190 | - esz == MO_16, a->idx, fns[esz - 1]); | ||
191 | + esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
192 | + a->idx, fns[esz - 1]); | ||
193 | return true; | ||
194 | } | ||
195 | |||
196 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar1 { | ||
197 | void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr); | ||
198 | } FPScalar1; | ||
199 | |||
200 | -static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
201 | - const FPScalar1 *f, int rmode) | ||
202 | +static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a, | ||
203 | + const FPScalar1 *f, int rmode, | ||
204 | + ARMFPStatusFlavour fpsttype) | ||
205 | { | ||
206 | TCGv_i32 tcg_rmode = NULL; | ||
207 | TCGv_ptr fpst; | ||
208 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
209 | return check == 0; | ||
210 | } | ||
211 | |||
212 | - fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
213 | + fpst = fpstatus_ptr(fpsttype); | ||
214 | if (rmode >= 0) { | ||
215 | tcg_rmode = gen_set_rmode(rmode, fpst); | ||
216 | } | ||
217 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
218 | return true; | ||
219 | } | ||
220 | |||
221 | +static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
222 | + const FPScalar1 *f, int rmode) | ||
223 | +{ | ||
224 | + return do_fp1_scalar_with_fpsttype(s, a, f, rmode, | ||
225 | + a->esz == MO_16 ? | ||
226 | + FPST_A64_F16 : FPST_A64); | ||
227 | +} | ||
228 | + | ||
229 | +static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a, | ||
230 | + const FPScalar1 *f, int rmode) | ||
231 | +{ | ||
232 | + return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz)); | ||
233 | +} | ||
234 | + | ||
235 | static const FPScalar1 f_scalar_fsqrt = { | ||
236 | gen_helper_vfp_sqrth, | ||
237 | gen_helper_vfp_sqrts, | ||
238 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frecpe = { | ||
239 | gen_helper_recpe_f32, | ||
240 | gen_helper_recpe_f64, | ||
241 | }; | ||
242 | -TRANS(FRECPE_s, do_fp1_scalar, a, &f_scalar_frecpe, -1) | ||
243 | +TRANS(FRECPE_s, do_fp1_scalar_ah, a, &f_scalar_frecpe, -1) | ||
244 | |||
245 | static const FPScalar1 f_scalar_frecpx = { | ||
246 | gen_helper_frecpx_f16, | ||
247 | gen_helper_frecpx_f32, | ||
248 | gen_helper_frecpx_f64, | ||
249 | }; | ||
250 | -TRANS(FRECPX_s, do_fp1_scalar, a, &f_scalar_frecpx, -1) | ||
251 | +TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1) | ||
252 | |||
253 | static const FPScalar1 f_scalar_frsqrte = { | ||
254 | gen_helper_rsqrte_f16, | ||
255 | gen_helper_rsqrte_f32, | ||
256 | gen_helper_rsqrte_f64, | ||
257 | }; | ||
258 | -TRANS(FRSQRTE_s, do_fp1_scalar, a, &f_scalar_frsqrte, -1) | ||
259 | +TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, &f_scalar_frsqrte, -1) | ||
260 | |||
261 | static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) | ||
262 | { | ||
263 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a, | ||
264 | &f_scalar_frint64, FPROUNDING_ZERO) | ||
265 | TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1) | ||
266 | |||
267 | -static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
268 | - int rd, int rn, int data, | ||
269 | - gen_helper_gvec_2_ptr * const fns[3]) | ||
270 | +static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz, | ||
271 | + bool is_q, int rd, int rn, int data, | ||
272 | + gen_helper_gvec_2_ptr * const fns[3], | ||
273 | + ARMFPStatusFlavour fpsttype) | ||
274 | { | ||
275 | int check = fp_access_check_vector_hsd(s, is_q, esz); | ||
276 | TCGv_ptr fpst; | ||
277 | @@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
278 | return check == 0; | ||
279 | } | ||
280 | |||
281 | - fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
282 | + fpst = fpstatus_ptr(fpsttype); | ||
283 | tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), | ||
284 | vec_full_reg_offset(s, rn), fpst, | ||
285 | is_q ? 16 : 8, vec_full_reg_size(s), | ||
286 | @@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
287 | return true; | ||
288 | } | ||
289 | |||
290 | +static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
291 | + int rd, int rn, int data, | ||
292 | + gen_helper_gvec_2_ptr * const fns[3]) | ||
293 | +{ | ||
294 | + return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns, | ||
295 | + esz == MO_16 ? FPST_A64_F16 : | ||
296 | + FPST_A64); | ||
297 | +} | ||
298 | + | ||
299 | +static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
300 | + int rd, int rn, int data, | ||
301 | + gen_helper_gvec_2_ptr * const fns[3]) | ||
302 | +{ | ||
303 | + return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, | ||
304 | + fns, select_ah_fpst(s, esz)); | ||
305 | +} | ||
306 | + | ||
307 | static gen_helper_gvec_2_ptr * const f_scvtf_v[] = { | ||
308 | gen_helper_gvec_vcvt_sh, | ||
309 | gen_helper_gvec_vcvt_sf, | ||
310 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const f_frecpe[] = { | ||
311 | gen_helper_gvec_frecpe_s, | ||
312 | gen_helper_gvec_frecpe_d, | ||
313 | }; | ||
314 | -TRANS(FRECPE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) | ||
315 | +TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) | ||
316 | |||
317 | static gen_helper_gvec_2_ptr * const f_frsqrte[] = { | ||
318 | gen_helper_gvec_frsqrte_h, | ||
319 | gen_helper_gvec_frsqrte_s, | ||
320 | gen_helper_gvec_frsqrte_d, | ||
321 | }; | ||
322 | -TRANS(FRSQRTE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) | ||
323 | +TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) | ||
324 | |||
325 | static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) | ||
326 | { | ||
327 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
328 | index XXXXXXX..XXXXXXX 100644 | ||
329 | --- a/target/arm/tcg/translate-sve.c | ||
330 | +++ b/target/arm/tcg/translate-sve.c | ||
331 | @@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, | ||
332 | return true; | ||
333 | } | ||
334 | |||
335 | -static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, | ||
336 | - arg_rr_esz *a, int data) | ||
337 | +static bool gen_gvec_fpst_ah_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, | ||
338 | + arg_rr_esz *a, int data) | ||
339 | { | ||
340 | return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data, | ||
341 | - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
342 | + select_ah_fpst(s, a->esz)); | ||
343 | } | ||
344 | |||
345 | /* Invoke an out-of-line helper on 3 Zregs. */ | ||
346 | @@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, | ||
347 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
348 | } | ||
349 | |||
350 | +static bool gen_gvec_fpst_ah_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, | ||
351 | + arg_rrr_esz *a, int data) | ||
352 | +{ | ||
353 | + return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, | ||
354 | + select_ah_fpst(s, a->esz)); | ||
355 | +} | ||
356 | + | ||
357 | /* Invoke an out-of-line helper on 4 Zregs. */ | ||
358 | static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, | ||
359 | int rd, int rn, int rm, int ra, int data) | ||
360 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const frecpe_fns[] = { | ||
361 | NULL, gen_helper_gvec_frecpe_h, | ||
362 | gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, | ||
363 | }; | ||
364 | -TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0) | ||
365 | +TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frecpe_fns[a->esz], a, 0) | ||
366 | |||
367 | static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { | ||
368 | NULL, gen_helper_gvec_frsqrte_h, | ||
369 | gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, | ||
370 | }; | ||
371 | -TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0) | ||
372 | +TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frsqrte_fns[a->esz], a, 0) | ||
373 | |||
374 | /* | ||
375 | *** SVE Floating Point Compare with Zero Group | ||
376 | @@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) | ||
377 | }; \ | ||
378 | TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) | ||
379 | |||
380 | +#define DO_FP3_AH(NAME, name) \ | ||
381 | + static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ | ||
382 | + NULL, gen_helper_gvec_##name##_h, \ | ||
383 | + gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ | ||
384 | + }; \ | ||
385 | + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, name##_fns[a->esz], a, 0) | ||
386 | + | ||
387 | DO_FP3(FADD_zzz, fadd) | ||
388 | DO_FP3(FSUB_zzz, fsub) | ||
389 | DO_FP3(FMUL_zzz, fmul) | ||
390 | -DO_FP3(FRECPS, recps) | ||
391 | -DO_FP3(FRSQRTS, rsqrts) | ||
392 | +DO_FP3_AH(FRECPS, recps) | ||
393 | +DO_FP3_AH(FRSQRTS, rsqrts) | ||
394 | |||
395 | #undef DO_FP3 | ||
396 | |||
397 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frecpx_fns[] = { | ||
398 | gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d, | ||
399 | }; | ||
400 | TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz], | ||
401 | - a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
402 | + a, 0, select_ah_fpst(s, a->esz)) | ||
403 | |||
404 | static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { | ||
405 | NULL, gen_helper_sve_fsqrt_h, | ||
406 | -- | ||
407 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | When FPCR.AH is 1, use FPST_FPCR_AH for: | ||
2 | * AdvSIMD BFCVT, BFCVTN, BFCVTN2 | ||
3 | * SVE BFCVT, BFCVTNT | ||
1 | 4 | ||
5 | so that they get the required behaviour changes. | ||
6 | |||
7 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | --- | ||
10 | target/arm/tcg/translate-a64.c | 27 +++++++++++++++++++++------ | ||
11 | target/arm/tcg/translate-sve.c | 6 ++++-- | ||
12 | 2 files changed, 25 insertions(+), 8 deletions(-) | ||
13 | |||
14 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/arm/tcg/translate-a64.c | ||
17 | +++ b/target/arm/tcg/translate-a64.c | ||
18 | @@ -XXX,XX +XXX,XX @@ TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) | ||
19 | static const FPScalar1 f_scalar_bfcvt = { | ||
20 | .gen_s = gen_helper_bfcvt, | ||
21 | }; | ||
22 | -TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar, a, &f_scalar_bfcvt, -1) | ||
23 | +TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar_ah, a, &f_scalar_bfcvt, -1) | ||
24 | |||
25 | static const FPScalar1 f_scalar_frint32 = { | ||
26 | NULL, | ||
27 | @@ -XXX,XX +XXX,XX @@ static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n) | ||
28 | tcg_gen_extu_i32_i64(d, tmp); | ||
29 | } | ||
30 | |||
31 | -static ArithOneOp * const f_vector_bfcvtn[] = { | ||
32 | - NULL, | ||
33 | - gen_bfcvtn_hs, | ||
34 | - NULL, | ||
35 | +static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n) | ||
36 | +{ | ||
37 | + TCGv_ptr fpst = fpstatus_ptr(FPST_AH); | ||
38 | + TCGv_i32 tmp = tcg_temp_new_i32(); | ||
39 | + gen_helper_bfcvt_pair(tmp, n, fpst); | ||
40 | + tcg_gen_extu_i32_i64(d, tmp); | ||
41 | +} | ||
42 | + | ||
43 | +static ArithOneOp * const f_vector_bfcvtn[2][3] = { | ||
44 | + { | ||
45 | + NULL, | ||
46 | + gen_bfcvtn_hs, | ||
47 | + NULL, | ||
48 | + }, { | ||
49 | + NULL, | ||
50 | + gen_bfcvtn_ah_hs, | ||
51 | + NULL, | ||
52 | + } | ||
53 | }; | ||
54 | -TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, f_vector_bfcvtn) | ||
55 | +TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, | ||
56 | + f_vector_bfcvtn[s->fpcr_ah]) | ||
57 | |||
58 | static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a) | ||
59 | { | ||
60 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/target/arm/tcg/translate-sve.c | ||
63 | +++ b/target/arm/tcg/translate-sve.c | ||
64 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, | ||
65 | gen_helper_sve_fcvt_hs, a, 0, FPST_A64_F16) | ||
66 | |||
67 | TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, | ||
68 | - gen_helper_sve_bfcvt, a, 0, FPST_A64) | ||
69 | + gen_helper_sve_bfcvt, a, 0, | ||
70 | + s->fpcr_ah ? FPST_AH : FPST_A64) | ||
71 | |||
72 | TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, | ||
73 | gen_helper_sve_fcvt_dh, a, 0, FPST_A64) | ||
74 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, | ||
75 | gen_helper_sve2_fcvtnt_ds, a, 0, FPST_A64) | ||
76 | |||
77 | TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, | ||
78 | - gen_helper_sve_bfcvtnt, a, 0, FPST_A64) | ||
79 | + gen_helper_sve_bfcvtnt, a, 0, | ||
80 | + s->fpcr_ah ? FPST_AH : FPST_A64) | ||
81 | |||
82 | TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, | ||
83 | gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64) | ||
84 | -- | ||
85 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | When FPCR.AH is 1, use FPST_FPCR_AH for: | ||
2 | * AdvSIMD BFMLALB, BFMLALT | ||
3 | * SVE BFMLALB, BFMLALT, BFMLSLB, BFMLSLT | ||
1 | 4 | ||
5 | so that they get the required behaviour changes. | ||
6 | |||
7 | We do this by making gen_gvec_op4_fpst() take an ARMFPStatusFlavour | ||
8 | rather than a bool is_fp16; existing callsites now select | ||
9 | FPST_FPCR_F16_A64 vs FPST_FPCR_A64 themselves rather than passing in | ||
10 | the boolean. | ||
11 | |||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | --- | ||
15 | target/arm/tcg/translate-a64.c | 20 +++++++++++++------- | ||
16 | target/arm/tcg/translate-sve.c | 6 ++++-- | ||
17 | 2 files changed, 17 insertions(+), 9 deletions(-) | ||
18 | |||
19 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/target/arm/tcg/translate-a64.c | ||
22 | +++ b/target/arm/tcg/translate-a64.c | ||
23 | @@ -XXX,XX +XXX,XX @@ static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn, | ||
24 | * an out-of-line helper. | ||
25 | */ | ||
26 | static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, | ||
27 | - int rm, int ra, bool is_fp16, int data, | ||
28 | + int rm, int ra, ARMFPStatusFlavour fpsttype, | ||
29 | + int data, | ||
30 | gen_helper_gvec_4_ptr *fn) | ||
31 | { | ||
32 | - TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); | ||
33 | + TCGv_ptr fpst = fpstatus_ptr(fpsttype); | ||
34 | tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), | ||
35 | vec_full_reg_offset(s, rn), | ||
36 | vec_full_reg_offset(s, rm), | ||
37 | @@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) | ||
38 | } | ||
39 | if (fp_access_check(s)) { | ||
40 | /* Q bit selects BFMLALB vs BFMLALT. */ | ||
41 | - gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q, | ||
42 | + gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, | ||
43 | + s->fpcr_ah ? FPST_AH : FPST_A64, a->q, | ||
44 | gen_helper_gvec_bfmlal); | ||
45 | } | ||
46 | return true; | ||
47 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) | ||
48 | } | ||
49 | |||
50 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
51 | - a->esz == MO_16, a->rot, fn[a->esz]); | ||
52 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
53 | + a->rot, fn[a->esz]); | ||
54 | return true; | ||
55 | } | ||
56 | |||
57 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) | ||
58 | } | ||
59 | |||
60 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
61 | - esz == MO_16, (a->idx << 1) | neg, | ||
62 | + esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
63 | + (a->idx << 1) | neg, | ||
64 | fns[esz - 1]); | ||
65 | return true; | ||
66 | } | ||
67 | @@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) | ||
68 | } | ||
69 | if (fp_access_check(s)) { | ||
70 | /* Q bit selects BFMLALB vs BFMLALT. */ | ||
71 | - gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0, | ||
72 | + gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, | ||
73 | + s->fpcr_ah ? FPST_AH : FPST_A64, | ||
74 | (a->idx << 1) | a->q, | ||
75 | gen_helper_gvec_bfmlal_idx); | ||
76 | } | ||
77 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) | ||
78 | } | ||
79 | if (fp_access_check(s)) { | ||
80 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
81 | - a->esz == MO_16, (a->idx << 2) | a->rot, fn); | ||
82 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
83 | + (a->idx << 2) | a->rot, fn); | ||
84 | } | ||
85 | return true; | ||
86 | } | ||
87 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
88 | index XXXXXXX..XXXXXXX 100644 | ||
89 | --- a/target/arm/tcg/translate-sve.c | ||
90 | +++ b/target/arm/tcg/translate-sve.c | ||
91 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_env_arg_zzzz, | ||
92 | static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) | ||
93 | { | ||
94 | return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, | ||
95 | - a->rd, a->rn, a->rm, a->ra, sel, FPST_A64); | ||
96 | + a->rd, a->rn, a->rm, a->ra, sel, | ||
97 | + s->fpcr_ah ? FPST_AH : FPST_A64); | ||
98 | } | ||
99 | |||
100 | TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) | ||
101 | @@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) | ||
102 | { | ||
103 | return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, | ||
104 | a->rd, a->rn, a->rm, a->ra, | ||
105 | - (a->index << 1) | sel, FPST_A64); | ||
106 | + (a->index << 1) | sel, | ||
107 | + s->fpcr_ah ? FPST_AH : FPST_A64); | ||
108 | } | ||
109 | |||
110 | TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) | ||
111 | -- | ||
112 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | For FEAT_AFP, we want to emit different code when FPCR.NEP is set, so | ||
2 | that instead of zeroing the high elements of a vector register when | ||
3 | we write the output of a scalar operation to it, we instead merge in | ||
4 | those elements from one of the source registers. Since this affects | ||
5 | the generated code, we need to put FPCR.NEP into the TBFLAGS. | ||
1 | 6 | ||
7 | FPCR.NEP is treated as 0 when in streaming SVE mode and FEAT_SME_FA64 | ||
8 | is not implemented or not enabled; we can implement this logic in | ||
9 | rebuild_hflags_a64(). | ||
10 | |||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | --- | ||
14 | target/arm/cpu.h | 1 + | ||
15 | target/arm/tcg/translate.h | 2 ++ | ||
16 | target/arm/tcg/hflags.c | 9 +++++++++ | ||
17 | target/arm/tcg/translate-a64.c | 1 + | ||
18 | 4 files changed, 13 insertions(+) | ||
19 | |||
20 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/cpu.h | ||
23 | +++ b/target/arm/cpu.h | ||
24 | @@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1) | ||
25 | /* Set if FEAT_NV2 RAM accesses are big-endian */ | ||
26 | FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1) | ||
27 | FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */ | ||
28 | +FIELD(TBFLAG_A64, NEP, 38, 1) /* FPCR.NEP */ | ||
29 | |||
30 | /* | ||
31 | * Helpers for using the above. Note that only the A64 accessors use | ||
32 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/target/arm/tcg/translate.h | ||
35 | +++ b/target/arm/tcg/translate.h | ||
36 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { | ||
37 | bool nv2_mem_be; | ||
38 | /* True if FPCR.AH is 1 (alternate floating point handling) */ | ||
39 | bool fpcr_ah; | ||
40 | + /* True if FPCR.NEP is 1 (FEAT_AFP scalar upper-element result handling) */ | ||
41 | + bool fpcr_nep; | ||
42 | /* | ||
43 | * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI. | ||
44 | * < 0, set by the current instruction. | ||
45 | diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/target/arm/tcg/hflags.c | ||
48 | +++ b/target/arm/tcg/hflags.c | ||
49 | @@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, | ||
50 | if (env->vfp.fpcr & FPCR_AH) { | ||
51 | DP_TBFLAG_A64(flags, AH, 1); | ||
52 | } | ||
53 | + if (env->vfp.fpcr & FPCR_NEP) { | ||
54 | + /* | ||
55 | + * In streaming-SVE without FA64, NEP behaves as if zero; | ||
56 | + * compare pseudocode IsMerging() | ||
57 | + */ | ||
58 | + if (!(EX_TBFLAG_A64(flags, PSTATE_SM) && !sme_fa64(env, el))) { | ||
59 | + DP_TBFLAG_A64(flags, NEP, 1); | ||
60 | + } | ||
61 | + } | ||
62 | |||
63 | return rebuild_hflags_common(env, fp_el, mmu_idx, flags); | ||
64 | } | ||
65 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
66 | index XXXXXXX..XXXXXXX 100644 | ||
67 | --- a/target/arm/tcg/translate-a64.c | ||
68 | +++ b/target/arm/tcg/translate-a64.c | ||
69 | @@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, | ||
70 | dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); | ||
71 | dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); | ||
72 | dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); | ||
73 | + dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP); | ||
74 | dc->vec_len = 0; | ||
75 | dc->vec_stride = 0; | ||
76 | dc->cp_regs = arm_cpu->cp_regs; | ||
77 | -- | ||
78 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Eric Auger <eric.auger@redhat.com> | 1 | For FEAT_AFP's FPCR.NEP bit, we need to programmatically change the |
---|---|---|---|
2 | 2 | behaviour of the writeback of the result for most SIMD scalar | |
3 | To prepare for multiple redistributor regions, we introduce | 3 | operations, so that instead of zeroing the upper part of the result |
4 | an array of uint32_t properties that stores the redistributor | 4 | register it merges the upper elements from one of the input |
5 | count of each redistributor region. | 5 | registers. |
6 | 6 | ||
7 | Non accelerated VGICv3 only supports a single redistributor region. | 7 | Provide new functions write_fp_*reg_merging() which can be used |
8 | The capacity of all redist regions is checked against the number of | 8 | instead of the existing write_fp_*reg() functions when we want this |
9 | vcpus. | 9 | "merge the result with one of the input registers if FPCR.NEP is |
10 | 10 | enabled" handling, and use them in do_fp3_scalar_with_fpsttype(). | |
11 | Machvirt is updated to set those properties, ie. a single | 11 | |
12 | redistributor region with count set to the number of vcpus | 12 | Note that (as documented in the description of the FPCR.NEP bit) |
13 | capped by 123. | 13 | which input register to use as the merge source varies by |
14 | 14 | instruction: for these 2-input scalar operations, the comparison | |
15 | Signed-off-by: Eric Auger <eric.auger@redhat.com> | 15 | instructions take from Rm, not Rn. |
16 | Reviewed-by: Andrew Jones <drjones@redhat.com> | 16 | |
17 | Message-id: 1529072910-16156-4-git-send-email-eric.auger@redhat.com | 17 | We'll extend this to also provide the merging behaviour for |
18 | the remaining scalar insns in subsequent commits. | ||
19 | |||
18 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
21 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
19 | --- | 22 | --- |
20 | include/hw/intc/arm_gicv3_common.h | 8 +++++-- | 23 | target/arm/tcg/translate-a64.c | 117 +++++++++++++++++++++++++-------- |
21 | hw/arm/virt.c | 11 ++++++++- | 24 | 1 file changed, 91 insertions(+), 26 deletions(-) |
22 | hw/intc/arm_gicv3.c | 12 +++++++++- | 25 | |
23 | hw/intc/arm_gicv3_common.c | 38 ++++++++++++++++++++++++++---- | 26 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
24 | hw/intc/arm_gicv3_kvm.c | 9 +++++-- | ||
25 | 5 files changed, 67 insertions(+), 11 deletions(-) | ||
26 | |||
27 | diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h | ||
28 | index XXXXXXX..XXXXXXX 100644 | 27 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/include/hw/intc/arm_gicv3_common.h | 28 | --- a/target/arm/tcg/translate-a64.c |
30 | +++ b/include/hw/intc/arm_gicv3_common.h | 29 | +++ b/target/arm/tcg/translate-a64.c |
31 | @@ -XXX,XX +XXX,XX @@ | 30 | @@ -XXX,XX +XXX,XX @@ static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) |
32 | #define GICV3_MAXIRQ 1020 | 31 | write_fp_dreg(s, reg, tmp); |
33 | #define GICV3_MAXSPI (GICV3_MAXIRQ - GIC_INTERNAL) | 32 | } |
34 | 33 | ||
35 | +#define GICV3_REDIST_SIZE 0x20000 | 34 | +/* |
36 | + | 35 | + * Write a double result to 128 bit vector register reg, honouring FPCR.NEP: |
37 | /* Number of SGI target-list bits */ | 36 | + * - if FPCR.NEP == 0, clear the high elements of reg |
38 | #define GICV3_TARGETLIST_BITS 16 | 37 | + * - if FPCR.NEP == 1, set the high elements of reg from mergereg |
39 | 38 | + * (i.e. merge the result with those high elements) | |
40 | @@ -XXX,XX +XXX,XX @@ struct GICv3State { | 39 | + * In either case, SVE register bits above 128 are zeroed (per R_WKYLB). |
41 | /*< public >*/ | 40 | + */ |
42 | 41 | +static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg, | |
43 | MemoryRegion iomem_dist; /* Distributor */ | 42 | + TCGv_i64 v) |
44 | - MemoryRegion iomem_redist; /* Redistributors */ | 43 | +{ |
45 | + MemoryRegion *iomem_redist; /* Redistributor Regions */ | 44 | + if (!s->fpcr_nep) { |
46 | + uint32_t *redist_region_count; /* redistributor count within each region */ | 45 | + write_fp_dreg(s, reg, v); |
47 | + uint32_t nb_redist_regions; /* number of redist regions */ | ||
48 | |||
49 | uint32_t num_cpu; | ||
50 | uint32_t num_irq; | ||
51 | @@ -XXX,XX +XXX,XX @@ typedef struct ARMGICv3CommonClass { | ||
52 | } ARMGICv3CommonClass; | ||
53 | |||
54 | void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, | ||
55 | - const MemoryRegionOps *ops); | ||
56 | + const MemoryRegionOps *ops, Error **errp); | ||
57 | |||
58 | #endif | ||
59 | diff --git a/hw/arm/virt.c b/hw/arm/virt.c | ||
60 | index XXXXXXX..XXXXXXX 100644 | ||
61 | --- a/hw/arm/virt.c | ||
62 | +++ b/hw/arm/virt.c | ||
63 | @@ -XXX,XX +XXX,XX @@ static void create_gic(VirtMachineState *vms, qemu_irq *pic) | ||
64 | if (!kvm_irqchip_in_kernel()) { | ||
65 | qdev_prop_set_bit(gicdev, "has-security-extensions", vms->secure); | ||
66 | } | ||
67 | + | ||
68 | + if (type == 3) { | ||
69 | + uint32_t redist0_capacity = | ||
70 | + vms->memmap[VIRT_GIC_REDIST].size / GICV3_REDIST_SIZE; | ||
71 | + uint32_t redist0_count = MIN(smp_cpus, redist0_capacity); | ||
72 | + | ||
73 | + qdev_prop_set_uint32(gicdev, "len-redist-region-count", 1); | ||
74 | + qdev_prop_set_uint32(gicdev, "redist-region-count[0]", redist0_count); | ||
75 | + } | ||
76 | qdev_init_nofail(gicdev); | ||
77 | gicbusdev = SYS_BUS_DEVICE(gicdev); | ||
78 | sysbus_mmio_map(gicbusdev, 0, vms->memmap[VIRT_GIC_DIST].base); | ||
79 | @@ -XXX,XX +XXX,XX @@ static void machvirt_init(MachineState *machine) | ||
80 | * many redistributors we can fit into the memory map. | ||
81 | */ | ||
82 | if (vms->gic_version == 3) { | ||
83 | - virt_max_cpus = vms->memmap[VIRT_GIC_REDIST].size / 0x20000; | ||
84 | + virt_max_cpus = vms->memmap[VIRT_GIC_REDIST].size / GICV3_REDIST_SIZE; | ||
85 | } else { | ||
86 | virt_max_cpus = GIC_NCPU; | ||
87 | } | ||
88 | diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c | ||
89 | index XXXXXXX..XXXXXXX 100644 | ||
90 | --- a/hw/intc/arm_gicv3.c | ||
91 | +++ b/hw/intc/arm_gicv3.c | ||
92 | @@ -XXX,XX +XXX,XX @@ static void arm_gic_realize(DeviceState *dev, Error **errp) | ||
93 | return; | ||
94 | } | ||
95 | |||
96 | - gicv3_init_irqs_and_mmio(s, gicv3_set_irq, gic_ops); | ||
97 | + if (s->nb_redist_regions != 1) { | ||
98 | + error_setg(errp, "VGICv3 redist region number(%d) not equal to 1", | ||
99 | + s->nb_redist_regions); | ||
100 | + return; | 46 | + return; |
101 | + } | 47 | + } |
102 | + | 48 | + |
103 | + gicv3_init_irqs_and_mmio(s, gicv3_set_irq, gic_ops, &local_err); | 49 | + /* |
104 | + if (local_err) { | 50 | + * Move from mergereg to reg; this sets the high elements and |
105 | + error_propagate(errp, local_err); | 51 | + * clears the bits above 128 as a side effect. |
52 | + */ | ||
53 | + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), | ||
54 | + vec_full_reg_offset(s, mergereg), | ||
55 | + 16, vec_full_reg_size(s)); | ||
56 | + tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg)); | ||
57 | +} | ||
58 | + | ||
59 | +/* | ||
60 | + * Write a single-prec result, but only clear the higher elements | ||
61 | + * of the destination register if FPCR.NEP is 0; otherwise preserve them. | ||
62 | + */ | ||
63 | +static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg, | ||
64 | + TCGv_i32 v) | ||
65 | +{ | ||
66 | + if (!s->fpcr_nep) { | ||
67 | + write_fp_sreg(s, reg, v); | ||
106 | + return; | 68 | + return; |
107 | + } | 69 | + } |
108 | 70 | + | |
109 | gicv3_init_cpuif(s); | 71 | + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), |
110 | } | 72 | + vec_full_reg_offset(s, mergereg), |
111 | diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c | 73 | + 16, vec_full_reg_size(s)); |
112 | index XXXXXXX..XXXXXXX 100644 | 74 | + tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); |
113 | --- a/hw/intc/arm_gicv3_common.c | 75 | +} |
114 | +++ b/hw/intc/arm_gicv3_common.c | 76 | + |
115 | @@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_gicv3 = { | 77 | +/* |
116 | }; | 78 | + * Write a half-prec result, but only clear the higher elements |
117 | 79 | + * of the destination register if FPCR.NEP is 0; otherwise preserve them. | |
118 | void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, | 80 | + * The caller must ensure that the top 16 bits of v are zero. |
119 | - const MemoryRegionOps *ops) | 81 | + */ |
120 | + const MemoryRegionOps *ops, Error **errp) | 82 | +static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg, |
121 | { | 83 | + TCGv_i32 v) |
122 | SysBusDevice *sbd = SYS_BUS_DEVICE(s); | 84 | +{ |
123 | + int rdist_capacity = 0; | 85 | + if (!s->fpcr_nep) { |
124 | int i; | 86 | + write_fp_sreg(s, reg, v); |
125 | |||
126 | + for (i = 0; i < s->nb_redist_regions; i++) { | ||
127 | + rdist_capacity += s->redist_region_count[i]; | ||
128 | + } | ||
129 | + if (rdist_capacity < s->num_cpu) { | ||
130 | + error_setg(errp, "Capacity of the redist regions(%d) " | ||
131 | + "is less than number of vcpus(%d)", | ||
132 | + rdist_capacity, s->num_cpu); | ||
133 | + return; | 87 | + return; |
134 | + } | 88 | + } |
135 | + | 89 | + |
136 | /* For the GIC, also expose incoming GPIO lines for PPIs for each CPU. | 90 | + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), |
137 | * GPIO array layout is thus: | 91 | + vec_full_reg_offset(s, mergereg), |
138 | * [0..N-1] spi | 92 | + 16, vec_full_reg_size(s)); |
139 | @@ -XXX,XX +XXX,XX @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, | 93 | + tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); |
140 | 94 | +} | |
141 | memory_region_init_io(&s->iomem_dist, OBJECT(s), ops, s, | 95 | + |
142 | "gicv3_dist", 0x10000); | 96 | /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ |
143 | - memory_region_init_io(&s->iomem_redist, OBJECT(s), ops ? &ops[1] : NULL, s, | 97 | static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, |
144 | - "gicv3_redist", 0x20000 * s->num_cpu); | 98 | GVecGen2Fn *gvec_fn, int vece) |
145 | - | 99 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar { |
146 | sysbus_init_mmio(sbd, &s->iomem_dist); | 100 | } FPScalar; |
147 | - sysbus_init_mmio(sbd, &s->iomem_redist); | 101 | |
148 | + | 102 | static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, |
149 | + s->iomem_redist = g_new0(MemoryRegion, s->nb_redist_regions); | 103 | - const FPScalar *f, |
150 | + for (i = 0; i < s->nb_redist_regions; i++) { | 104 | + const FPScalar *f, int mergereg, |
151 | + char *name = g_strdup_printf("gicv3_redist_region[%d]", i); | 105 | ARMFPStatusFlavour fpsttype) |
152 | + | 106 | { |
153 | + memory_region_init_io(&s->iomem_redist[i], OBJECT(s), | 107 | switch (a->esz) { |
154 | + ops ? &ops[1] : NULL, s, name, | 108 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, |
155 | + s->redist_region_count[i] * GICV3_REDIST_SIZE); | 109 | TCGv_i64 t0 = read_fp_dreg(s, a->rn); |
156 | + sysbus_init_mmio(sbd, &s->iomem_redist[i]); | 110 | TCGv_i64 t1 = read_fp_dreg(s, a->rm); |
157 | + g_free(name); | 111 | f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype)); |
158 | + } | 112 | - write_fp_dreg(s, a->rd, t0); |
113 | + write_fp_dreg_merging(s, a->rd, mergereg, t0); | ||
114 | } | ||
115 | break; | ||
116 | case MO_32: | ||
117 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
118 | TCGv_i32 t0 = read_fp_sreg(s, a->rn); | ||
119 | TCGv_i32 t1 = read_fp_sreg(s, a->rm); | ||
120 | f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
121 | - write_fp_sreg(s, a->rd, t0); | ||
122 | + write_fp_sreg_merging(s, a->rd, mergereg, t0); | ||
123 | } | ||
124 | break; | ||
125 | case MO_16: | ||
126 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
127 | TCGv_i32 t0 = read_fp_hreg(s, a->rn); | ||
128 | TCGv_i32 t1 = read_fp_hreg(s, a->rm); | ||
129 | f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
130 | - write_fp_sreg(s, a->rd, t0); | ||
131 | + write_fp_hreg_merging(s, a->rd, mergereg, t0); | ||
132 | } | ||
133 | break; | ||
134 | default: | ||
135 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
136 | return true; | ||
159 | } | 137 | } |
160 | 138 | ||
161 | static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) | 139 | -static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) |
162 | @@ -XXX,XX +XXX,XX @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) | 140 | +static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, |
163 | } | 141 | + int mergereg) |
142 | { | ||
143 | - return do_fp3_scalar_with_fpsttype(s, a, f, | ||
144 | + return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, | ||
145 | a->esz == MO_16 ? | ||
146 | FPST_A64_F16 : FPST_A64); | ||
164 | } | 147 | } |
165 | 148 | ||
166 | +static void arm_gicv3_finalize(Object *obj) | 149 | -static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f) |
167 | +{ | 150 | +static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f, |
168 | + GICv3State *s = ARM_GICV3_COMMON(obj); | 151 | + int mergereg) |
169 | + | 152 | { |
170 | + g_free(s->redist_region_count); | 153 | - return do_fp3_scalar_with_fpsttype(s, a, f, select_ah_fpst(s, a->esz)); |
171 | +} | 154 | + return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, |
172 | + | 155 | + select_ah_fpst(s, a->esz)); |
173 | static void arm_gicv3_common_reset(DeviceState *dev) | 156 | } |
174 | { | 157 | |
175 | GICv3State *s = ARM_GICV3_COMMON(dev); | 158 | static const FPScalar f_scalar_fadd = { |
176 | @@ -XXX,XX +XXX,XX @@ static Property arm_gicv3_common_properties[] = { | 159 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fadd = { |
177 | DEFINE_PROP_UINT32("num-irq", GICv3State, num_irq, 32), | 160 | gen_helper_vfp_adds, |
178 | DEFINE_PROP_UINT32("revision", GICv3State, revision, 3), | 161 | gen_helper_vfp_addd, |
179 | DEFINE_PROP_BOOL("has-security-extensions", GICv3State, security_extn, 0), | 162 | }; |
180 | + DEFINE_PROP_ARRAY("redist-region-count", GICv3State, nb_redist_regions, | 163 | -TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd) |
181 | + redist_region_count, qdev_prop_uint32, uint32_t), | 164 | +TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn) |
182 | DEFINE_PROP_END_OF_LIST(), | 165 | |
183 | }; | 166 | static const FPScalar f_scalar_fsub = { |
184 | 167 | gen_helper_vfp_subh, | |
185 | @@ -XXX,XX +XXX,XX @@ static const TypeInfo arm_gicv3_common_type = { | 168 | gen_helper_vfp_subs, |
186 | .instance_size = sizeof(GICv3State), | 169 | gen_helper_vfp_subd, |
187 | .class_size = sizeof(ARMGICv3CommonClass), | 170 | }; |
188 | .class_init = arm_gicv3_common_class_init, | 171 | -TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub) |
189 | + .instance_finalize = arm_gicv3_finalize, | 172 | +TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn) |
190 | .abstract = true, | 173 | |
191 | .interfaces = (InterfaceInfo []) { | 174 | static const FPScalar f_scalar_fdiv = { |
192 | { TYPE_ARM_LINUX_BOOT_IF }, | 175 | gen_helper_vfp_divh, |
193 | diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c | 176 | gen_helper_vfp_divs, |
194 | index XXXXXXX..XXXXXXX 100644 | 177 | gen_helper_vfp_divd, |
195 | --- a/hw/intc/arm_gicv3_kvm.c | 178 | }; |
196 | +++ b/hw/intc/arm_gicv3_kvm.c | 179 | -TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv) |
197 | @@ -XXX,XX +XXX,XX @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) | 180 | +TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn) |
198 | return; | 181 | |
199 | } | 182 | static const FPScalar f_scalar_fmul = { |
200 | 183 | gen_helper_vfp_mulh, | |
201 | - gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL); | 184 | gen_helper_vfp_muls, |
202 | + gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL, &local_err); | 185 | gen_helper_vfp_muld, |
203 | + if (local_err) { | 186 | }; |
204 | + error_propagate(errp, local_err); | 187 | -TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul) |
205 | + return; | 188 | +TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn) |
206 | + } | 189 | |
207 | 190 | static const FPScalar f_scalar_fmax = { | |
208 | for (i = 0; i < s->num_cpu; i++) { | 191 | gen_helper_vfp_maxh, |
209 | ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i)); | 192 | gen_helper_vfp_maxs, |
210 | @@ -XXX,XX +XXX,XX @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) | 193 | gen_helper_vfp_maxd, |
211 | 194 | }; | |
212 | kvm_arm_register_device(&s->iomem_dist, -1, KVM_DEV_ARM_VGIC_GRP_ADDR, | 195 | -TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax) |
213 | KVM_VGIC_V3_ADDR_TYPE_DIST, s->dev_fd, 0); | 196 | +TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax, a->rn) |
214 | - kvm_arm_register_device(&s->iomem_redist, -1, KVM_DEV_ARM_VGIC_GRP_ADDR, | 197 | |
215 | + kvm_arm_register_device(&s->iomem_redist[0], -1, | 198 | static const FPScalar f_scalar_fmin = { |
216 | + KVM_DEV_ARM_VGIC_GRP_ADDR, | 199 | gen_helper_vfp_minh, |
217 | KVM_VGIC_V3_ADDR_TYPE_REDIST, s->dev_fd, 0); | 200 | gen_helper_vfp_mins, |
218 | 201 | gen_helper_vfp_mind, | |
219 | if (kvm_has_gsi_routing()) { | 202 | }; |
203 | -TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin) | ||
204 | +TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin, a->rn) | ||
205 | |||
206 | static const FPScalar f_scalar_fmaxnm = { | ||
207 | gen_helper_vfp_maxnumh, | ||
208 | gen_helper_vfp_maxnums, | ||
209 | gen_helper_vfp_maxnumd, | ||
210 | }; | ||
211 | -TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm) | ||
212 | +TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn) | ||
213 | |||
214 | static const FPScalar f_scalar_fminnm = { | ||
215 | gen_helper_vfp_minnumh, | ||
216 | gen_helper_vfp_minnums, | ||
217 | gen_helper_vfp_minnumd, | ||
218 | }; | ||
219 | -TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm) | ||
220 | +TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn) | ||
221 | |||
222 | static const FPScalar f_scalar_fmulx = { | ||
223 | gen_helper_advsimd_mulxh, | ||
224 | gen_helper_vfp_mulxs, | ||
225 | gen_helper_vfp_mulxd, | ||
226 | }; | ||
227 | -TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx) | ||
228 | +TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn) | ||
229 | |||
230 | static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
231 | { | ||
232 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fnmul = { | ||
233 | gen_fnmul_s, | ||
234 | gen_fnmul_d, | ||
235 | }; | ||
236 | -TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul) | ||
237 | +TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul, a->rn) | ||
238 | |||
239 | static const FPScalar f_scalar_fcmeq = { | ||
240 | gen_helper_advsimd_ceq_f16, | ||
241 | gen_helper_neon_ceq_f32, | ||
242 | gen_helper_neon_ceq_f64, | ||
243 | }; | ||
244 | -TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq) | ||
245 | +TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm) | ||
246 | |||
247 | static const FPScalar f_scalar_fcmge = { | ||
248 | gen_helper_advsimd_cge_f16, | ||
249 | gen_helper_neon_cge_f32, | ||
250 | gen_helper_neon_cge_f64, | ||
251 | }; | ||
252 | -TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge) | ||
253 | +TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm) | ||
254 | |||
255 | static const FPScalar f_scalar_fcmgt = { | ||
256 | gen_helper_advsimd_cgt_f16, | ||
257 | gen_helper_neon_cgt_f32, | ||
258 | gen_helper_neon_cgt_f64, | ||
259 | }; | ||
260 | -TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt) | ||
261 | +TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm) | ||
262 | |||
263 | static const FPScalar f_scalar_facge = { | ||
264 | gen_helper_advsimd_acge_f16, | ||
265 | gen_helper_neon_acge_f32, | ||
266 | gen_helper_neon_acge_f64, | ||
267 | }; | ||
268 | -TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge) | ||
269 | +TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm) | ||
270 | |||
271 | static const FPScalar f_scalar_facgt = { | ||
272 | gen_helper_advsimd_acgt_f16, | ||
273 | gen_helper_neon_acgt_f32, | ||
274 | gen_helper_neon_acgt_f64, | ||
275 | }; | ||
276 | -TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt) | ||
277 | +TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm) | ||
278 | |||
279 | static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
280 | { | ||
281 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fabd = { | ||
282 | gen_fabd_s, | ||
283 | gen_fabd_d, | ||
284 | }; | ||
285 | -TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd) | ||
286 | +TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd, a->rn) | ||
287 | |||
288 | static const FPScalar f_scalar_frecps = { | ||
289 | gen_helper_recpsf_f16, | ||
290 | gen_helper_recpsf_f32, | ||
291 | gen_helper_recpsf_f64, | ||
292 | }; | ||
293 | -TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps) | ||
294 | +TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps, a->rn) | ||
295 | |||
296 | static const FPScalar f_scalar_frsqrts = { | ||
297 | gen_helper_rsqrtsf_f16, | ||
298 | gen_helper_rsqrtsf_f32, | ||
299 | gen_helper_rsqrtsf_f64, | ||
300 | }; | ||
301 | -TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts) | ||
302 | +TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts, a->rn) | ||
303 | |||
304 | static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, | ||
305 | const FPScalar *f, bool swap) | ||
220 | -- | 306 | -- |
221 | 2.17.1 | 307 | 2.34.1 |
222 | |||
223 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle FPCR.NEP for the 3-input scalar operations which use | ||
2 | do_fmla_scalar_idx() and do_fmadd(), by making them call the | ||
3 | appropriate write_fp_*reg_merging() functions. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/translate-a64.c | 12 ++++++------ | ||
9 | 1 file changed, 6 insertions(+), 6 deletions(-) | ||
10 | |||
11 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/target/arm/tcg/translate-a64.c | ||
14 | +++ b/target/arm/tcg/translate-a64.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
16 | gen_vfp_negd(t1, t1); | ||
17 | } | ||
18 | gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | ||
19 | - write_fp_dreg(s, a->rd, t0); | ||
20 | + write_fp_dreg_merging(s, a->rd, a->rd, t0); | ||
21 | } | ||
22 | break; | ||
23 | case MO_32: | ||
24 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
25 | gen_vfp_negs(t1, t1); | ||
26 | } | ||
27 | gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | ||
28 | - write_fp_sreg(s, a->rd, t0); | ||
29 | + write_fp_sreg_merging(s, a->rd, a->rd, t0); | ||
30 | } | ||
31 | break; | ||
32 | case MO_16: | ||
33 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
34 | } | ||
35 | gen_helper_advsimd_muladdh(t0, t1, t2, t0, | ||
36 | fpstatus_ptr(FPST_A64_F16)); | ||
37 | - write_fp_sreg(s, a->rd, t0); | ||
38 | + write_fp_hreg_merging(s, a->rd, a->rd, t0); | ||
39 | } | ||
40 | break; | ||
41 | default: | ||
42 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
43 | } | ||
44 | fpst = fpstatus_ptr(FPST_A64); | ||
45 | gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); | ||
46 | - write_fp_dreg(s, a->rd, ta); | ||
47 | + write_fp_dreg_merging(s, a->rd, a->ra, ta); | ||
48 | } | ||
49 | break; | ||
50 | |||
51 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
52 | } | ||
53 | fpst = fpstatus_ptr(FPST_A64); | ||
54 | gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); | ||
55 | - write_fp_sreg(s, a->rd, ta); | ||
56 | + write_fp_sreg_merging(s, a->rd, a->ra, ta); | ||
57 | } | ||
58 | break; | ||
59 | |||
60 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
61 | } | ||
62 | fpst = fpstatus_ptr(FPST_A64_F16); | ||
63 | gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); | ||
64 | - write_fp_sreg(s, a->rd, ta); | ||
65 | + write_fp_hreg_merging(s, a->rd, a->ra, ta); | ||
66 | } | ||
67 | break; | ||
68 | |||
69 | -- | ||
70 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Currently we implement BFCVT scalar via do_fp1_scalar(). This works | ||
2 | even though BFCVT is a narrowing operation from 32 to 16 bits, | ||
3 | because we can use write_fp_sreg() for float16. However, FPCR.NEP | ||
4 | support requires that we use write_fp_hreg_merging() for float16 | ||
5 | outputs, so we can't continue to borrow the non-narrowing | ||
6 | do_fp1_scalar() function for this. Split out trans_BFCVT_s() | ||
7 | into its own implementation that honours FPCR.NEP. | ||
1 | 8 | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | --- | ||
12 | target/arm/tcg/translate-a64.c | 25 +++++++++++++++++++++---- | ||
13 | 1 file changed, 21 insertions(+), 4 deletions(-) | ||
14 | |||
15 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/arm/tcg/translate-a64.c | ||
18 | +++ b/target/arm/tcg/translate-a64.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frintx = { | ||
20 | }; | ||
21 | TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) | ||
22 | |||
23 | -static const FPScalar1 f_scalar_bfcvt = { | ||
24 | - .gen_s = gen_helper_bfcvt, | ||
25 | -}; | ||
26 | -TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar_ah, a, &f_scalar_bfcvt, -1) | ||
27 | +static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a) | ||
28 | +{ | ||
29 | + ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64; | ||
30 | + TCGv_i32 t32; | ||
31 | + int check; | ||
32 | + | ||
33 | + if (!dc_isar_feature(aa64_bf16, s)) { | ||
34 | + return false; | ||
35 | + } | ||
36 | + | ||
37 | + check = fp_access_check_scalar_hsd(s, a->esz); | ||
38 | + | ||
39 | + if (check <= 0) { | ||
40 | + return check == 0; | ||
41 | + } | ||
42 | + | ||
43 | + t32 = read_fp_sreg(s, a->rn); | ||
44 | + gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype)); | ||
45 | + write_fp_hreg_merging(s, a->rd, a->rd, t32); | ||
46 | + return true; | ||
47 | +} | ||
48 | |||
49 | static const FPScalar1 f_scalar_frint32 = { | ||
50 | NULL, | ||
51 | -- | ||
52 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle FPCR.NEP for the 1-input scalar operations. | ||
1 | 2 | ||
3 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | target/arm/tcg/translate-a64.c | 26 ++++++++++++++------------ | ||
7 | 1 file changed, 14 insertions(+), 12 deletions(-) | ||
8 | |||
9 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/target/arm/tcg/translate-a64.c | ||
12 | +++ b/target/arm/tcg/translate-a64.c | ||
13 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a, | ||
14 | case MO_64: | ||
15 | t64 = read_fp_dreg(s, a->rn); | ||
16 | f->gen_d(t64, t64, fpst); | ||
17 | - write_fp_dreg(s, a->rd, t64); | ||
18 | + write_fp_dreg_merging(s, a->rd, a->rd, t64); | ||
19 | break; | ||
20 | case MO_32: | ||
21 | t32 = read_fp_sreg(s, a->rn); | ||
22 | f->gen_s(t32, t32, fpst); | ||
23 | - write_fp_sreg(s, a->rd, t32); | ||
24 | + write_fp_sreg_merging(s, a->rd, a->rd, t32); | ||
25 | break; | ||
26 | case MO_16: | ||
27 | t32 = read_fp_hreg(s, a->rn); | ||
28 | f->gen_h(t32, t32, fpst); | ||
29 | - write_fp_sreg(s, a->rd, t32); | ||
30 | + write_fp_hreg_merging(s, a->rd, a->rd, t32); | ||
31 | break; | ||
32 | default: | ||
33 | g_assert_not_reached(); | ||
34 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) | ||
35 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
36 | |||
37 | gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst); | ||
38 | - write_fp_dreg(s, a->rd, tcg_rd); | ||
39 | + write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); | ||
40 | } | ||
41 | return true; | ||
42 | } | ||
43 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a) | ||
44 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
45 | |||
46 | gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); | ||
47 | - /* write_fp_sreg is OK here because top half of result is zero */ | ||
48 | - write_fp_sreg(s, a->rd, tmp); | ||
49 | + /* write_fp_hreg_merging is OK here because top half of result is zero */ | ||
50 | + write_fp_hreg_merging(s, a->rd, a->rd, tmp); | ||
51 | } | ||
52 | return true; | ||
53 | } | ||
54 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a) | ||
55 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
56 | |||
57 | gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst); | ||
58 | - write_fp_sreg(s, a->rd, tcg_rd); | ||
59 | + write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); | ||
60 | } | ||
61 | return true; | ||
62 | } | ||
63 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a) | ||
64 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
65 | |||
66 | gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); | ||
67 | - /* write_fp_sreg is OK here because top half of tcg_rd is zero */ | ||
68 | - write_fp_sreg(s, a->rd, tcg_rd); | ||
69 | + /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */ | ||
70 | + write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd); | ||
71 | } | ||
72 | return true; | ||
73 | } | ||
74 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a) | ||
75 | TCGv_i32 tcg_ahp = get_ahp_flag(); | ||
76 | |||
77 | gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); | ||
78 | - write_fp_sreg(s, a->rd, tcg_rd); | ||
79 | + write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); | ||
80 | } | ||
81 | return true; | ||
82 | } | ||
83 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a) | ||
84 | TCGv_i32 tcg_ahp = get_ahp_flag(); | ||
85 | |||
86 | gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); | ||
87 | - write_fp_dreg(s, a->rd, tcg_rd); | ||
88 | + write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); | ||
89 | } | ||
90 | return true; | ||
91 | } | ||
92 | @@ -XXX,XX +XXX,XX @@ static bool do_fcvt_f(DisasContext *s, arg_fcvt *a, | ||
93 | do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0), | ||
94 | a->esz, tcg_int, a->shift, a->rn, rmode); | ||
95 | |||
96 | - clear_vec(s, a->rd); | ||
97 | + if (!s->fpcr_nep) { | ||
98 | + clear_vec(s, a->rd); | ||
99 | + } | ||
100 | write_vec_element(s, tcg_int, a->rd, 0, a->esz); | ||
101 | return true; | ||
102 | } | ||
103 | -- | ||
104 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle FPCR.NEP in the operations handled by do_cvtf_scalar(). | ||
1 | 2 | ||
3 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | target/arm/tcg/translate-a64.c | 6 +++--- | ||
7 | 1 file changed, 3 insertions(+), 3 deletions(-) | ||
8 | |||
9 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/target/arm/tcg/translate-a64.c | ||
12 | +++ b/target/arm/tcg/translate-a64.c | ||
13 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, | ||
14 | } else { | ||
15 | gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); | ||
16 | } | ||
17 | - write_fp_dreg(s, rd, tcg_double); | ||
18 | + write_fp_dreg_merging(s, rd, rd, tcg_double); | ||
19 | break; | ||
20 | |||
21 | case MO_32: | ||
22 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, | ||
23 | } else { | ||
24 | gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); | ||
25 | } | ||
26 | - write_fp_sreg(s, rd, tcg_single); | ||
27 | + write_fp_sreg_merging(s, rd, rd, tcg_single); | ||
28 | break; | ||
29 | |||
30 | case MO_16: | ||
31 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, | ||
32 | } else { | ||
33 | gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); | ||
34 | } | ||
35 | - write_fp_sreg(s, rd, tcg_single); | ||
36 | + write_fp_hreg_merging(s, rd, rd, tcg_single); | ||
37 | break; | ||
38 | |||
39 | default: | ||
40 | -- | ||
41 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle FPCR.NEP merging for scalar FABS and FNEG; this requires | ||
2 | an extra parameter to do_fp1_scalar_int(), since FMOV scalar | ||
3 | does not have the merging behaviour. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/translate-a64.c | 27 ++++++++++++++++++++------- | ||
9 | 1 file changed, 20 insertions(+), 7 deletions(-) | ||
10 | |||
11 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/target/arm/tcg/translate-a64.c | ||
14 | +++ b/target/arm/tcg/translate-a64.c | ||
15 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar1Int { | ||
16 | } FPScalar1Int; | ||
17 | |||
18 | static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, | ||
19 | - const FPScalar1Int *f) | ||
20 | + const FPScalar1Int *f, | ||
21 | + bool merging) | ||
22 | { | ||
23 | switch (a->esz) { | ||
24 | case MO_64: | ||
25 | if (fp_access_check(s)) { | ||
26 | TCGv_i64 t = read_fp_dreg(s, a->rn); | ||
27 | f->gen_d(t, t); | ||
28 | - write_fp_dreg(s, a->rd, t); | ||
29 | + if (merging) { | ||
30 | + write_fp_dreg_merging(s, a->rd, a->rd, t); | ||
31 | + } else { | ||
32 | + write_fp_dreg(s, a->rd, t); | ||
33 | + } | ||
34 | } | ||
35 | break; | ||
36 | case MO_32: | ||
37 | if (fp_access_check(s)) { | ||
38 | TCGv_i32 t = read_fp_sreg(s, a->rn); | ||
39 | f->gen_s(t, t); | ||
40 | - write_fp_sreg(s, a->rd, t); | ||
41 | + if (merging) { | ||
42 | + write_fp_sreg_merging(s, a->rd, a->rd, t); | ||
43 | + } else { | ||
44 | + write_fp_sreg(s, a->rd, t); | ||
45 | + } | ||
46 | } | ||
47 | break; | ||
48 | case MO_16: | ||
49 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, | ||
50 | if (fp_access_check(s)) { | ||
51 | TCGv_i32 t = read_fp_hreg(s, a->rn); | ||
52 | f->gen_h(t, t); | ||
53 | - write_fp_sreg(s, a->rd, t); | ||
54 | + if (merging) { | ||
55 | + write_fp_hreg_merging(s, a->rd, a->rd, t); | ||
56 | + } else { | ||
57 | + write_fp_sreg(s, a->rd, t); | ||
58 | + } | ||
59 | } | ||
60 | break; | ||
61 | default: | ||
62 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fmov = { | ||
63 | tcg_gen_mov_i32, | ||
64 | tcg_gen_mov_i64, | ||
65 | }; | ||
66 | -TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov) | ||
67 | +TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false) | ||
68 | |||
69 | static const FPScalar1Int f_scalar_fabs = { | ||
70 | gen_vfp_absh, | ||
71 | gen_vfp_abss, | ||
72 | gen_vfp_absd, | ||
73 | }; | ||
74 | -TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs) | ||
75 | +TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs, true) | ||
76 | |||
77 | static const FPScalar1Int f_scalar_fneg = { | ||
78 | gen_vfp_negh, | ||
79 | gen_vfp_negs, | ||
80 | gen_vfp_negd, | ||
81 | }; | ||
82 | -TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg) | ||
83 | +TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg, true) | ||
84 | |||
85 | typedef struct FPScalar1 { | ||
86 | void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); | ||
87 | -- | ||
88 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Unlike the other users of do_2misc_narrow_scalar(), FCVTXN (scalar) | ||
2 | is always double-to-single and must honour FPCR.NEP. Implement this | ||
3 | directly in a trans function rather than using | ||
4 | do_2misc_narrow_scalar(). | ||
1 | 5 | ||
6 | We still need gen_fcvtxn_sd() and the f_scalar_fcvtxn[] array for | ||
7 | the FCVTXN (vector) insn, so we move those down in the file to | ||
8 | where they are used. | ||
9 | |||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | ||
13 | target/arm/tcg/translate-a64.c | 43 ++++++++++++++++++++++------------ | ||
14 | 1 file changed, 28 insertions(+), 15 deletions(-) | ||
15 | |||
16 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/arm/tcg/translate-a64.c | ||
19 | +++ b/target/arm/tcg/translate-a64.c | ||
20 | @@ -XXX,XX +XXX,XX @@ static ArithOneOp * const f_scalar_uqxtn[] = { | ||
21 | }; | ||
22 | TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn) | ||
23 | |||
24 | -static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) | ||
25 | +static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a) | ||
26 | { | ||
27 | - /* | ||
28 | - * 64 bit to 32 bit float conversion | ||
29 | - * with von Neumann rounding (round to odd) | ||
30 | - */ | ||
31 | - TCGv_i32 tmp = tcg_temp_new_i32(); | ||
32 | - gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); | ||
33 | - tcg_gen_extu_i32_i64(d, tmp); | ||
34 | + if (fp_access_check(s)) { | ||
35 | + /* | ||
36 | + * 64 bit to 32 bit float conversion | ||
37 | + * with von Neumann rounding (round to odd) | ||
38 | + */ | ||
39 | + TCGv_i64 src = read_fp_dreg(s, a->rn); | ||
40 | + TCGv_i32 dst = tcg_temp_new_i32(); | ||
41 | + gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64)); | ||
42 | + write_fp_sreg_merging(s, a->rd, a->rd, dst); | ||
43 | + } | ||
44 | + return true; | ||
45 | } | ||
46 | |||
47 | -static ArithOneOp * const f_scalar_fcvtxn[] = { | ||
48 | - NULL, | ||
49 | - NULL, | ||
50 | - gen_fcvtxn_sd, | ||
51 | -}; | ||
52 | -TRANS(FCVTXN_s, do_2misc_narrow_scalar, a, f_scalar_fcvtxn) | ||
53 | - | ||
54 | #undef WRAP_ENV | ||
55 | |||
56 | static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) | ||
57 | @@ -XXX,XX +XXX,XX @@ static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n) | ||
58 | tcg_gen_extu_i32_i64(d, tmp); | ||
59 | } | ||
60 | |||
61 | +static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) | ||
62 | +{ | ||
63 | + /* | ||
64 | + * 64 bit to 32 bit float conversion | ||
65 | + * with von Neumann rounding (round to odd) | ||
66 | + */ | ||
67 | + TCGv_i32 tmp = tcg_temp_new_i32(); | ||
68 | + gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); | ||
69 | + tcg_gen_extu_i32_i64(d, tmp); | ||
70 | +} | ||
71 | + | ||
72 | static ArithOneOp * const f_vector_fcvtn[] = { | ||
73 | NULL, | ||
74 | gen_fcvtn_hs, | ||
75 | gen_fcvtn_sd, | ||
76 | }; | ||
77 | +static ArithOneOp * const f_scalar_fcvtxn[] = { | ||
78 | + NULL, | ||
79 | + NULL, | ||
80 | + gen_fcvtxn_sd, | ||
81 | +}; | ||
82 | TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn) | ||
83 | TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn) | ||
84 | |||
85 | -- | ||
86 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | do_fp3_scalar_idx() is used only for the FMUL and FMULX scalar by | ||
2 | element instructions; these both need to merge the result with the Rn | ||
3 | register when FPCR.NEP is set. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/translate-a64.c | 6 +++--- | ||
9 | 1 file changed, 3 insertions(+), 3 deletions(-) | ||
10 | |||
11 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/target/arm/tcg/translate-a64.c | ||
14 | +++ b/target/arm/tcg/translate-a64.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) | ||
16 | |||
17 | read_vec_element(s, t1, a->rm, a->idx, MO_64); | ||
18 | f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); | ||
19 | - write_fp_dreg(s, a->rd, t0); | ||
20 | + write_fp_dreg_merging(s, a->rd, a->rn, t0); | ||
21 | } | ||
22 | break; | ||
23 | case MO_32: | ||
24 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) | ||
25 | |||
26 | read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); | ||
27 | f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); | ||
28 | - write_fp_sreg(s, a->rd, t0); | ||
29 | + write_fp_sreg_merging(s, a->rd, a->rn, t0); | ||
30 | } | ||
31 | break; | ||
32 | case MO_16: | ||
33 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) | ||
34 | |||
35 | read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); | ||
36 | f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); | ||
37 | - write_fp_sreg(s, a->rd, t0); | ||
38 | + write_fp_hreg_merging(s, a->rd, a->rn, t0); | ||
39 | } | ||
40 | break; | ||
41 | default: | ||
42 | -- | ||
43 | 2.34.1 | diff view generated by jsdifflib |
1 | Instantiate and wire up the Memory Protection Controllers | 1 | When FPCR.AH == 1, floating point FMIN and FMAX have some odd special |
---|---|---|---|
2 | in the MPS2 board itself. | 2 | cases: |
3 | |||
4 | * comparing two zeroes (even of different sign) or comparing a NaN | ||
5 | with anything always returns the second argument (possibly | ||
6 | squashed to zero) | ||
7 | * denormal outputs are not squashed to zero regardless of FZ or FZ16 | ||
8 | |||
9 | Implement these semantics in new helper functions and select them at | ||
10 | translate time if FPCR.AH is 1 for the scalar FMAX and FMIN insns. | ||
11 | (We will convert the other FMAX and FMIN insns in subsequent | ||
12 | commits.) | ||
13 | |||
14 | Note that FMINNM and FMAXNM are not affected. | ||
3 | 15 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 16 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
5 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 17 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Message-id: 20180620132032.28865-9-peter.maydell@linaro.org | ||
7 | --- | 18 | --- |
8 | hw/arm/mps2-tz.c | 71 ++++++++++++++++++++++++++++++------------------ | 19 | target/arm/tcg/helper-a64.h | 7 +++++++ |
9 | 1 file changed, 44 insertions(+), 27 deletions(-) | 20 | target/arm/tcg/helper-a64.c | 36 ++++++++++++++++++++++++++++++++++ |
21 | target/arm/tcg/translate-a64.c | 23 ++++++++++++++++++++-- | ||
22 | 3 files changed, 64 insertions(+), 2 deletions(-) | ||
10 | 23 | ||
11 | diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c | 24 | diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h |
12 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/hw/arm/mps2-tz.c | 26 | --- a/target/arm/tcg/helper-a64.h |
14 | +++ b/hw/arm/mps2-tz.c | 27 | +++ b/target/arm/tcg/helper-a64.h |
15 | @@ -XXX,XX +XXX,XX @@ | 28 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(advsimd_muladd2h, i32, i32, i32, i32, fpst) |
16 | #include "hw/timer/cmsdk-apb-timer.h" | 29 | DEF_HELPER_2(advsimd_rinth_exact, f16, f16, fpst) |
17 | #include "hw/misc/mps2-scc.h" | 30 | DEF_HELPER_2(advsimd_rinth, f16, f16, fpst) |
18 | #include "hw/misc/mps2-fpgaio.h" | 31 | |
19 | +#include "hw/misc/tz-mpc.h" | 32 | +DEF_HELPER_3(vfp_ah_minh, f16, f16, f16, fpst) |
20 | #include "hw/arm/iotkit.h" | 33 | +DEF_HELPER_3(vfp_ah_mins, f32, f32, f32, fpst) |
21 | #include "hw/devices.h" | 34 | +DEF_HELPER_3(vfp_ah_mind, f64, f64, f64, fpst) |
22 | #include "net/net.h" | 35 | +DEF_HELPER_3(vfp_ah_maxh, f16, f16, f16, fpst) |
23 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 36 | +DEF_HELPER_3(vfp_ah_maxs, f32, f32, f32, fpst) |
24 | 37 | +DEF_HELPER_3(vfp_ah_maxd, f64, f64, f64, fpst) | |
25 | IoTKit iotkit; | 38 | + |
26 | MemoryRegion psram; | 39 | DEF_HELPER_2(exception_return, void, env, i64) |
27 | - MemoryRegion ssram1; | 40 | DEF_HELPER_FLAGS_2(dc_zva, TCG_CALL_NO_WG, void, env, i64) |
28 | + MemoryRegion ssram[3]; | 41 | |
29 | MemoryRegion ssram1_m; | 42 | diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c |
30 | - MemoryRegion ssram23; | 43 | index XXXXXXX..XXXXXXX 100644 |
31 | MPS2SCC scc; | 44 | --- a/target/arm/tcg/helper-a64.c |
32 | MPS2FPGAIO fpgaio; | 45 | +++ b/target/arm/tcg/helper-a64.c |
33 | TZPPC ppc[5]; | 46 | @@ -XXX,XX +XXX,XX @@ float32 HELPER(fcvtx_f64_to_f32)(float64 a, float_status *fpst) |
34 | - UnimplementedDeviceState ssram_mpc[3]; | 47 | return r; |
35 | + TZMPC ssram_mpc[3]; | ||
36 | UnimplementedDeviceState spi[5]; | ||
37 | UnimplementedDeviceState i2c[4]; | ||
38 | UnimplementedDeviceState i2s_audio; | ||
39 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
40 | /* Main SYSCLK frequency in Hz */ | ||
41 | #define SYSCLK_FRQ 20000000 | ||
42 | |||
43 | -/* Initialize the auxiliary RAM region @mr and map it into | ||
44 | - * the memory map at @base. | ||
45 | - */ | ||
46 | -static void make_ram(MemoryRegion *mr, const char *name, | ||
47 | - hwaddr base, hwaddr size) | ||
48 | -{ | ||
49 | - memory_region_init_ram(mr, NULL, name, size, &error_fatal); | ||
50 | - memory_region_add_subregion(get_system_memory(), base, mr); | ||
51 | -} | ||
52 | - | ||
53 | /* Create an alias of an entire original MemoryRegion @orig | ||
54 | * located at @base in the memory map. | ||
55 | */ | ||
56 | @@ -XXX,XX +XXX,XX @@ static MemoryRegion *make_eth_dev(MPS2TZMachineState *mms, void *opaque, | ||
57 | return sysbus_mmio_get_region(s, 0); | ||
58 | } | 48 | } |
59 | 49 | ||
60 | +static MemoryRegion *make_mpc(MPS2TZMachineState *mms, void *opaque, | 50 | +/* |
61 | + const char *name, hwaddr size) | 51 | + * AH=1 min/max have some odd special cases: |
62 | +{ | 52 | + * comparing two zeroes (regardless of sign), (NaN, anything), |
63 | + TZMPC *mpc = opaque; | 53 | + * or (anything, NaN) should return the second argument (possibly |
64 | + int i = mpc - &mms->ssram_mpc[0]; | 54 | + * squashed to zero). |
65 | + MemoryRegion *ssram = &mms->ssram[i]; | 55 | + * Also, denormal outputs are not squashed to zero regardless of FZ or FZ16. |
66 | + MemoryRegion *upstream; | 56 | + */ |
67 | + char *mpcname = g_strdup_printf("%s-mpc", name); | 57 | +#define AH_MINMAX_HELPER(NAME, CTYPE, FLOATTYPE, MINMAX) \ |
68 | + static uint32_t ramsize[] = { 0x00400000, 0x00200000, 0x00200000 }; | 58 | + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ |
69 | + static uint32_t rambase[] = { 0x00000000, 0x28000000, 0x28200000 }; | 59 | + { \ |
70 | + | 60 | + bool save; \ |
71 | + memory_region_init_ram(ssram, NULL, name, ramsize[i], &error_fatal); | 61 | + CTYPE r; \ |
72 | + | 62 | + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ |
73 | + init_sysbus_child(OBJECT(mms), mpcname, mpc, | 63 | + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ |
74 | + sizeof(mms->ssram_mpc[0]), TYPE_TZ_MPC); | 64 | + if (FLOATTYPE ## _is_zero(a) && FLOATTYPE ## _is_zero(b)) { \ |
75 | + object_property_set_link(OBJECT(mpc), OBJECT(ssram), | 65 | + return b; \ |
76 | + "downstream", &error_fatal); | 66 | + } \ |
77 | + object_property_set_bool(OBJECT(mpc), true, "realized", &error_fatal); | 67 | + if (FLOATTYPE ## _is_any_nan(a) || \ |
78 | + /* Map the upstream end of the MPC into system memory */ | 68 | + FLOATTYPE ## _is_any_nan(b)) { \ |
79 | + upstream = sysbus_mmio_get_region(SYS_BUS_DEVICE(mpc), 1); | 69 | + float_raise(float_flag_invalid, fpst); \ |
80 | + memory_region_add_subregion(get_system_memory(), rambase[i], upstream); | 70 | + return b; \ |
81 | + /* and connect its interrupt to the IoTKit */ | 71 | + } \ |
82 | + qdev_connect_gpio_out_named(DEVICE(mpc), "irq", 0, | 72 | + save = get_flush_to_zero(fpst); \ |
83 | + qdev_get_gpio_in_named(DEVICE(&mms->iotkit), | 73 | + set_flush_to_zero(false, fpst); \ |
84 | + "mpcexp_status", i)); | 74 | + r = FLOATTYPE ## _ ## MINMAX(a, b, fpst); \ |
85 | + | 75 | + set_flush_to_zero(save, fpst); \ |
86 | + /* The first SSRAM is a special case as it has an alias; accesses to | 76 | + return r; \ |
87 | + * the alias region at 0x00400000 must also go to the MPC upstream. | ||
88 | + */ | ||
89 | + if (i == 0) { | ||
90 | + make_ram_alias(&mms->ssram1_m, "mps.ssram1_m", upstream, 0x00400000); | ||
91 | + } | 77 | + } |
92 | + | 78 | + |
93 | + g_free(mpcname); | 79 | +AH_MINMAX_HELPER(vfp_ah_minh, dh_ctype_f16, float16, min) |
94 | + /* Return the register interface MR for our caller to map behind the PPC */ | 80 | +AH_MINMAX_HELPER(vfp_ah_mins, float32, float32, min) |
95 | + return sysbus_mmio_get_region(SYS_BUS_DEVICE(mpc), 0); | 81 | +AH_MINMAX_HELPER(vfp_ah_mind, float64, float64, min) |
82 | +AH_MINMAX_HELPER(vfp_ah_maxh, dh_ctype_f16, float16, max) | ||
83 | +AH_MINMAX_HELPER(vfp_ah_maxs, float32, float32, max) | ||
84 | +AH_MINMAX_HELPER(vfp_ah_maxd, float64, float64, max) | ||
85 | + | ||
86 | /* 64-bit versions of the CRC helpers. Note that although the operation | ||
87 | * (and the prototypes of crc32c() and crc32() mean that only the bottom | ||
88 | * 32 bits of the accumulator and result are used, we pass and return | ||
89 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
90 | index XXXXXXX..XXXXXXX 100644 | ||
91 | --- a/target/arm/tcg/translate-a64.c | ||
92 | +++ b/target/arm/tcg/translate-a64.c | ||
93 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
94 | select_ah_fpst(s, a->esz)); | ||
95 | } | ||
96 | |||
97 | +/* Some insns need to call different helpers when FPCR.AH == 1 */ | ||
98 | +static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a, | ||
99 | + const FPScalar *fnormal, | ||
100 | + const FPScalar *fah, | ||
101 | + int mergereg) | ||
102 | +{ | ||
103 | + return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg); | ||
96 | +} | 104 | +} |
97 | + | 105 | + |
98 | static void mps2tz_common_init(MachineState *machine) | 106 | static const FPScalar f_scalar_fadd = { |
99 | { | 107 | gen_helper_vfp_addh, |
100 | MPS2TZMachineState *mms = MPS2TZ_MACHINE(machine); | 108 | gen_helper_vfp_adds, |
101 | @@ -XXX,XX +XXX,XX @@ static void mps2tz_common_init(MachineState *machine) | 109 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fmax = { |
102 | NULL, "mps.ram", 0x01000000); | 110 | gen_helper_vfp_maxs, |
103 | memory_region_add_subregion(system_memory, 0x80000000, &mms->psram); | 111 | gen_helper_vfp_maxd, |
104 | 112 | }; | |
105 | - /* The SSRAM memories should all be behind Memory Protection Controllers, | 113 | -TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax, a->rn) |
106 | - * but we don't implement that yet. | 114 | +static const FPScalar f_scalar_fmax_ah = { |
107 | - */ | 115 | + gen_helper_vfp_ah_maxh, |
108 | - make_ram(&mms->ssram1, "mps.ssram1", 0x00000000, 0x00400000); | 116 | + gen_helper_vfp_ah_maxs, |
109 | - make_ram_alias(&mms->ssram1_m, "mps.ssram1_m", &mms->ssram1, 0x00400000); | 117 | + gen_helper_vfp_ah_maxd, |
110 | - | 118 | +}; |
111 | - make_ram(&mms->ssram23, "mps.ssram23", 0x28000000, 0x00400000); | 119 | +TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn) |
112 | - | 120 | |
113 | /* The overflow IRQs for all UARTs are ORed together. | 121 | static const FPScalar f_scalar_fmin = { |
114 | * Tx, Rx and "combined" IRQs are sent to the NVIC separately. | 122 | gen_helper_vfp_minh, |
115 | * Create the OR gate for this. | 123 | gen_helper_vfp_mins, |
116 | @@ -XXX,XX +XXX,XX @@ static void mps2tz_common_init(MachineState *machine) | 124 | gen_helper_vfp_mind, |
117 | const PPCInfo ppcs[] = { { | 125 | }; |
118 | .name = "apb_ppcexp0", | 126 | -TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin, a->rn) |
119 | .ports = { | 127 | +static const FPScalar f_scalar_fmin_ah = { |
120 | - { "ssram-mpc0", make_unimp_dev, &mms->ssram_mpc[0], | 128 | + gen_helper_vfp_ah_minh, |
121 | - 0x58007000, 0x1000 }, | 129 | + gen_helper_vfp_ah_mins, |
122 | - { "ssram-mpc1", make_unimp_dev, &mms->ssram_mpc[1], | 130 | + gen_helper_vfp_ah_mind, |
123 | - 0x58008000, 0x1000 }, | 131 | +}; |
124 | - { "ssram-mpc2", make_unimp_dev, &mms->ssram_mpc[2], | 132 | +TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn) |
125 | - 0x58009000, 0x1000 }, | 133 | |
126 | + { "ssram-0", make_mpc, &mms->ssram_mpc[0], 0x58007000, 0x1000 }, | 134 | static const FPScalar f_scalar_fmaxnm = { |
127 | + { "ssram-1", make_mpc, &mms->ssram_mpc[1], 0x58008000, 0x1000 }, | 135 | gen_helper_vfp_maxnumh, |
128 | + { "ssram-2", make_mpc, &mms->ssram_mpc[2], 0x58009000, 0x1000 }, | ||
129 | }, | ||
130 | }, { | ||
131 | .name = "apb_ppcexp1", | ||
132 | -- | 136 | -- |
133 | 2.17.1 | 137 | 2.34.1 |
134 | |||
135 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH == 1 semantics for vector FMIN/FMAX, by | ||
2 | creating new _ah_ versions of the gvec helpers which invoke the | ||
3 | scalar fmin_ah and fmax_ah helpers on each element. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
9 | target/arm/tcg/translate-a64.c | 21 +++++++++++++++++++-- | ||
10 | target/arm/tcg/vec_helper.c | 8 ++++++++ | ||
11 | 3 files changed, 41 insertions(+), 2 deletions(-) | ||
12 | |||
13 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/arm/tcg/helper-sve.h | ||
16 | +++ b/target/arm/tcg/helper-sve.h | ||
17 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_rsqrts_s, TCG_CALL_NO_RWG, | ||
18 | DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG, | ||
19 | void, ptr, ptr, ptr, fpst, i32) | ||
20 | |||
21 | +DEF_HELPER_FLAGS_5(gvec_ah_fmax_h, TCG_CALL_NO_RWG, | ||
22 | + void, ptr, ptr, ptr, fpst, i32) | ||
23 | +DEF_HELPER_FLAGS_5(gvec_ah_fmax_s, TCG_CALL_NO_RWG, | ||
24 | + void, ptr, ptr, ptr, fpst, i32) | ||
25 | +DEF_HELPER_FLAGS_5(gvec_ah_fmax_d, TCG_CALL_NO_RWG, | ||
26 | + void, ptr, ptr, ptr, fpst, i32) | ||
27 | + | ||
28 | +DEF_HELPER_FLAGS_5(gvec_ah_fmin_h, TCG_CALL_NO_RWG, | ||
29 | + void, ptr, ptr, ptr, fpst, i32) | ||
30 | +DEF_HELPER_FLAGS_5(gvec_ah_fmin_s, TCG_CALL_NO_RWG, | ||
31 | + void, ptr, ptr, ptr, fpst, i32) | ||
32 | +DEF_HELPER_FLAGS_5(gvec_ah_fmin_d, TCG_CALL_NO_RWG, | ||
33 | + void, ptr, ptr, ptr, fpst, i32) | ||
34 | + | ||
35 | DEF_HELPER_FLAGS_4(sve_faddv_h, TCG_CALL_NO_RWG, | ||
36 | i64, ptr, ptr, fpst, i32) | ||
37 | DEF_HELPER_FLAGS_4(sve_faddv_s, TCG_CALL_NO_RWG, | ||
38 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/target/arm/tcg/translate-a64.c | ||
41 | +++ b/target/arm/tcg/translate-a64.c | ||
42 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | ||
43 | FPST_A64_F16 : FPST_A64); | ||
44 | } | ||
45 | |||
46 | +static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, | ||
47 | + gen_helper_gvec_3_ptr * const fnormal[3], | ||
48 | + gen_helper_gvec_3_ptr * const fah[3]) | ||
49 | +{ | ||
50 | + return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal); | ||
51 | +} | ||
52 | + | ||
53 | static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data, | ||
54 | gen_helper_gvec_3_ptr * const f[3]) | ||
55 | { | ||
56 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { | ||
57 | gen_helper_gvec_fmax_s, | ||
58 | gen_helper_gvec_fmax_d, | ||
59 | }; | ||
60 | -TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax) | ||
61 | +static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = { | ||
62 | + gen_helper_gvec_ah_fmax_h, | ||
63 | + gen_helper_gvec_ah_fmax_s, | ||
64 | + gen_helper_gvec_ah_fmax_d, | ||
65 | +}; | ||
66 | +TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah) | ||
67 | |||
68 | static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { | ||
69 | gen_helper_gvec_fmin_h, | ||
70 | gen_helper_gvec_fmin_s, | ||
71 | gen_helper_gvec_fmin_d, | ||
72 | }; | ||
73 | -TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin) | ||
74 | +static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = { | ||
75 | + gen_helper_gvec_ah_fmin_h, | ||
76 | + gen_helper_gvec_ah_fmin_s, | ||
77 | + gen_helper_gvec_ah_fmin_d, | ||
78 | +}; | ||
79 | +TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah) | ||
80 | |||
81 | static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { | ||
82 | gen_helper_gvec_fmaxnum_h, | ||
83 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
84 | index XXXXXXX..XXXXXXX 100644 | ||
85 | --- a/target/arm/tcg/vec_helper.c | ||
86 | +++ b/target/arm/tcg/vec_helper.c | ||
87 | @@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16) | ||
88 | DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32) | ||
89 | DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64) | ||
90 | |||
91 | +DO_3OP(gvec_ah_fmax_h, helper_vfp_ah_maxh, float16) | ||
92 | +DO_3OP(gvec_ah_fmax_s, helper_vfp_ah_maxs, float32) | ||
93 | +DO_3OP(gvec_ah_fmax_d, helper_vfp_ah_maxd, float64) | ||
94 | + | ||
95 | +DO_3OP(gvec_ah_fmin_h, helper_vfp_ah_minh, float16) | ||
96 | +DO_3OP(gvec_ah_fmin_s, helper_vfp_ah_mins, float32) | ||
97 | +DO_3OP(gvec_ah_fmin_d, helper_vfp_ah_mind, float64) | ||
98 | + | ||
99 | #endif | ||
100 | #undef DO_3OP | ||
101 | |||
102 | -- | ||
103 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for FMAXV and FMINV. These are the | ||
2 | "recursively reduce all lanes of a vector to a scalar result" insns; | ||
3 | we just need to use the _ah_ helper for the reduction step when | ||
4 | FPCR.AH == 1. | ||
1 | 5 | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | --- | ||
9 | target/arm/tcg/translate-a64.c | 28 ++++++++++++++++++---------- | ||
10 | 1 file changed, 18 insertions(+), 10 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/translate-a64.c | ||
15 | +++ b/target/arm/tcg/translate-a64.c | ||
16 | @@ -XXX,XX +XXX,XX @@ static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz, | ||
17 | } | ||
18 | |||
19 | static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, | ||
20 | - NeonGenTwoSingleOpFn *fn) | ||
21 | + NeonGenTwoSingleOpFn *fnormal, | ||
22 | + NeonGenTwoSingleOpFn *fah) | ||
23 | { | ||
24 | if (fp_access_check(s)) { | ||
25 | MemOp esz = a->esz; | ||
26 | int elts = (a->q ? 16 : 8) >> esz; | ||
27 | TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
28 | - TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn); | ||
29 | + TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, | ||
30 | + s->fpcr_ah ? fah : fnormal); | ||
31 | write_fp_sreg(s, a->rd, res); | ||
32 | } | ||
33 | return true; | ||
34 | } | ||
35 | |||
36 | -TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxnumh) | ||
37 | -TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minnumh) | ||
38 | -TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxh) | ||
39 | -TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minh) | ||
40 | +TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, | ||
41 | + gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh) | ||
42 | +TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, | ||
43 | + gen_helper_vfp_minnumh, gen_helper_vfp_minnumh) | ||
44 | +TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, | ||
45 | + gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh) | ||
46 | +TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, | ||
47 | + gen_helper_vfp_minh, gen_helper_vfp_ah_minh) | ||
48 | |||
49 | -TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums) | ||
50 | -TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums) | ||
51 | -TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs) | ||
52 | -TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins) | ||
53 | +TRANS(FMAXNMV_s, do_fp_reduction, a, | ||
54 | + gen_helper_vfp_maxnums, gen_helper_vfp_maxnums) | ||
55 | +TRANS(FMINNMV_s, do_fp_reduction, a, | ||
56 | + gen_helper_vfp_minnums, gen_helper_vfp_minnums) | ||
57 | +TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs) | ||
58 | +TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins) | ||
59 | |||
60 | /* | ||
61 | * Floating-point Immediate | ||
62 | -- | ||
63 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the pairwise floating | ||
2 | point minimum/maximum insns FMINP and FMAXP. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
8 | target/arm/tcg/translate-a64.c | 25 +++++++++++++++++++++---- | ||
9 | target/arm/tcg/vec_helper.c | 10 ++++++++++ | ||
10 | 3 files changed, 45 insertions(+), 4 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_ah_fmin_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_5(gvec_ah_fmin_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_5(gvec_ah_fminp_h, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_5(gvec_ah_fminp_s, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_5(gvec_ah_fminp_d, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_4(sve_faddv_h, TCG_CALL_NO_RWG, | ||
35 | i64, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_4(sve_faddv_s, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/translate-a64.c | ||
40 | +++ b/target/arm/tcg/translate-a64.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { | ||
42 | gen_helper_gvec_fmaxp_s, | ||
43 | gen_helper_gvec_fmaxp_d, | ||
44 | }; | ||
45 | -TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp) | ||
46 | +static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = { | ||
47 | + gen_helper_gvec_ah_fmaxp_h, | ||
48 | + gen_helper_gvec_ah_fmaxp_s, | ||
49 | + gen_helper_gvec_ah_fmaxp_d, | ||
50 | +}; | ||
51 | +TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp) | ||
52 | |||
53 | static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { | ||
54 | gen_helper_gvec_fminp_h, | ||
55 | gen_helper_gvec_fminp_s, | ||
56 | gen_helper_gvec_fminp_d, | ||
57 | }; | ||
58 | -TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp) | ||
59 | +static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = { | ||
60 | + gen_helper_gvec_ah_fminp_h, | ||
61 | + gen_helper_gvec_ah_fminp_s, | ||
62 | + gen_helper_gvec_ah_fminp_d, | ||
63 | +}; | ||
64 | +TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp) | ||
65 | |||
66 | static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { | ||
67 | gen_helper_gvec_fmaxnump_h, | ||
68 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) | ||
69 | return true; | ||
70 | } | ||
71 | |||
72 | +static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a, | ||
73 | + const FPScalar *fnormal, | ||
74 | + const FPScalar *fah) | ||
75 | +{ | ||
76 | + return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal); | ||
77 | +} | ||
78 | + | ||
79 | TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) | ||
80 | -TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax) | ||
81 | -TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin) | ||
82 | +TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah) | ||
83 | +TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah) | ||
84 | TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) | ||
85 | TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) | ||
86 | |||
87 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
88 | index XXXXXXX..XXXXXXX 100644 | ||
89 | --- a/target/arm/tcg/vec_helper.c | ||
90 | +++ b/target/arm/tcg/vec_helper.c | ||
91 | @@ -XXX,XX +XXX,XX @@ DO_3OP_PAIR(gvec_fminnump_h, float16_minnum, float16, H2) | ||
92 | DO_3OP_PAIR(gvec_fminnump_s, float32_minnum, float32, H4) | ||
93 | DO_3OP_PAIR(gvec_fminnump_d, float64_minnum, float64, ) | ||
94 | |||
95 | +#ifdef TARGET_AARCH64 | ||
96 | +DO_3OP_PAIR(gvec_ah_fmaxp_h, helper_vfp_ah_maxh, float16, H2) | ||
97 | +DO_3OP_PAIR(gvec_ah_fmaxp_s, helper_vfp_ah_maxs, float32, H4) | ||
98 | +DO_3OP_PAIR(gvec_ah_fmaxp_d, helper_vfp_ah_maxd, float64, ) | ||
99 | + | ||
100 | +DO_3OP_PAIR(gvec_ah_fminp_h, helper_vfp_ah_minh, float16, H2) | ||
101 | +DO_3OP_PAIR(gvec_ah_fminp_s, helper_vfp_ah_mins, float32, H4) | ||
102 | +DO_3OP_PAIR(gvec_ah_fminp_d, helper_vfp_ah_mind, float64, ) | ||
103 | +#endif | ||
104 | + | ||
105 | #undef DO_3OP_PAIR | ||
106 | |||
107 | #define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \ | ||
108 | -- | ||
109 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the SVE FMAXV and FMINV | ||
2 | vector-reduction-to-scalar max/min operations. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 +++++++++++ | ||
8 | target/arm/tcg/sve_helper.c | 43 +++++++++++++++++++++------------- | ||
9 | target/arm/tcg/translate-sve.c | 16 +++++++++++-- | ||
10 | 3 files changed, 55 insertions(+), 18 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fminv_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_4(sve_fminv_d, TCG_CALL_NO_RWG, | ||
18 | i64, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_4(sve_ah_fmaxv_h, TCG_CALL_NO_RWG, | ||
21 | + i64, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_4(sve_ah_fmaxv_s, TCG_CALL_NO_RWG, | ||
23 | + i64, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_4(sve_ah_fmaxv_d, TCG_CALL_NO_RWG, | ||
25 | + i64, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_4(sve_ah_fminv_h, TCG_CALL_NO_RWG, | ||
28 | + i64, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_4(sve_ah_fminv_s, TCG_CALL_NO_RWG, | ||
30 | + i64, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_4(sve_ah_fminv_d, TCG_CALL_NO_RWG, | ||
32 | + i64, ptr, ptr, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_5(sve_fadda_h, TCG_CALL_NO_RWG, | ||
35 | i64, i64, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_5(sve_fadda_s, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/sve_helper.c | ||
40 | +++ b/target/arm/tcg/sve_helper.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \ | ||
42 | uintptr_t half = n / 2; \ | ||
43 | TYPE lo = NAME##_reduce(data, status, half); \ | ||
44 | TYPE hi = NAME##_reduce(data + half, status, half); \ | ||
45 | - return TYPE##_##FUNC(lo, hi, status); \ | ||
46 | + return FUNC(lo, hi, status); \ | ||
47 | } \ | ||
48 | } \ | ||
49 | uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \ | ||
50 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \ | ||
51 | return NAME##_reduce(data, s, maxsz / sizeof(TYPE)); \ | ||
52 | } | ||
53 | |||
54 | -DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero) | ||
55 | -DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero) | ||
56 | -DO_REDUCE(sve_faddv_d, float64, H1_8, add, float64_zero) | ||
57 | +DO_REDUCE(sve_faddv_h, float16, H1_2, float16_add, float16_zero) | ||
58 | +DO_REDUCE(sve_faddv_s, float32, H1_4, float32_add, float32_zero) | ||
59 | +DO_REDUCE(sve_faddv_d, float64, H1_8, float64_add, float64_zero) | ||
60 | |||
61 | /* Identity is floatN_default_nan, without the function call. */ | ||
62 | -DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00) | ||
63 | -DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000) | ||
64 | -DO_REDUCE(sve_fminnmv_d, float64, H1_8, minnum, 0x7FF8000000000000ULL) | ||
65 | +DO_REDUCE(sve_fminnmv_h, float16, H1_2, float16_minnum, 0x7E00) | ||
66 | +DO_REDUCE(sve_fminnmv_s, float32, H1_4, float32_minnum, 0x7FC00000) | ||
67 | +DO_REDUCE(sve_fminnmv_d, float64, H1_8, float64_minnum, 0x7FF8000000000000ULL) | ||
68 | |||
69 | -DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00) | ||
70 | -DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000) | ||
71 | -DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, maxnum, 0x7FF8000000000000ULL) | ||
72 | +DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, float16_maxnum, 0x7E00) | ||
73 | +DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, float32_maxnum, 0x7FC00000) | ||
74 | +DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, float64_maxnum, 0x7FF8000000000000ULL) | ||
75 | |||
76 | -DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity) | ||
77 | -DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity) | ||
78 | -DO_REDUCE(sve_fminv_d, float64, H1_8, min, float64_infinity) | ||
79 | +DO_REDUCE(sve_fminv_h, float16, H1_2, float16_min, float16_infinity) | ||
80 | +DO_REDUCE(sve_fminv_s, float32, H1_4, float32_min, float32_infinity) | ||
81 | +DO_REDUCE(sve_fminv_d, float64, H1_8, float64_min, float64_infinity) | ||
82 | |||
83 | -DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity)) | ||
84 | -DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity)) | ||
85 | -DO_REDUCE(sve_fmaxv_d, float64, H1_8, max, float64_chs(float64_infinity)) | ||
86 | +DO_REDUCE(sve_fmaxv_h, float16, H1_2, float16_max, float16_chs(float16_infinity)) | ||
87 | +DO_REDUCE(sve_fmaxv_s, float32, H1_4, float32_max, float32_chs(float32_infinity)) | ||
88 | +DO_REDUCE(sve_fmaxv_d, float64, H1_8, float64_max, float64_chs(float64_infinity)) | ||
89 | + | ||
90 | +DO_REDUCE(sve_ah_fminv_h, float16, H1_2, helper_vfp_ah_minh, float16_infinity) | ||
91 | +DO_REDUCE(sve_ah_fminv_s, float32, H1_4, helper_vfp_ah_mins, float32_infinity) | ||
92 | +DO_REDUCE(sve_ah_fminv_d, float64, H1_8, helper_vfp_ah_mind, float64_infinity) | ||
93 | + | ||
94 | +DO_REDUCE(sve_ah_fmaxv_h, float16, H1_2, helper_vfp_ah_maxh, | ||
95 | + float16_chs(float16_infinity)) | ||
96 | +DO_REDUCE(sve_ah_fmaxv_s, float32, H1_4, helper_vfp_ah_maxs, | ||
97 | + float32_chs(float32_infinity)) | ||
98 | +DO_REDUCE(sve_ah_fmaxv_d, float64, H1_8, helper_vfp_ah_maxd, | ||
99 | + float64_chs(float64_infinity)) | ||
100 | |||
101 | #undef DO_REDUCE | ||
102 | |||
103 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
104 | index XXXXXXX..XXXXXXX 100644 | ||
105 | --- a/target/arm/tcg/translate-sve.c | ||
106 | +++ b/target/arm/tcg/translate-sve.c | ||
107 | @@ -XXX,XX +XXX,XX @@ static bool do_reduce(DisasContext *s, arg_rpr_esz *a, | ||
108 | }; \ | ||
109 | TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz]) | ||
110 | |||
111 | +#define DO_VPZ_AH(NAME, name) \ | ||
112 | + static gen_helper_fp_reduce * const name##_fns[4] = { \ | ||
113 | + NULL, gen_helper_sve_##name##_h, \ | ||
114 | + gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ | ||
115 | + }; \ | ||
116 | + static gen_helper_fp_reduce * const name##_ah_fns[4] = { \ | ||
117 | + NULL, gen_helper_sve_ah_##name##_h, \ | ||
118 | + gen_helper_sve_ah_##name##_s, gen_helper_sve_ah_##name##_d, \ | ||
119 | + }; \ | ||
120 | + TRANS_FEAT(NAME, aa64_sve, do_reduce, a, \ | ||
121 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) | ||
122 | + | ||
123 | DO_VPZ(FADDV, faddv) | ||
124 | DO_VPZ(FMINNMV, fminnmv) | ||
125 | DO_VPZ(FMAXNMV, fmaxnmv) | ||
126 | -DO_VPZ(FMINV, fminv) | ||
127 | -DO_VPZ(FMAXV, fmaxv) | ||
128 | +DO_VPZ_AH(FMINV, fminv) | ||
129 | +DO_VPZ_AH(FMAXV, fmaxv) | ||
130 | |||
131 | #undef DO_VPZ | ||
132 | |||
133 | -- | ||
134 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the SVE FMAX and FMIN operations | ||
2 | that take an immediate as the second operand. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 25 +++++++++++++++++++++++-- | ||
10 | 3 files changed, 45 insertions(+), 2 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fmins_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_6(sve_fmins_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, i64, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_6(sve_ah_fmaxs_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_6(sve_ah_fmaxs_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_6(sve_ah_fmaxs_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_6(sve_ah_fmins_h, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_6(sve_ah_fmins_s, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_6(sve_ah_fmins_d, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_5(sve_fcvt_sh, TCG_CALL_NO_RWG, | ||
35 | void, ptr, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_5(sve_fcvt_dh, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/sve_helper.c | ||
40 | +++ b/target/arm/tcg/sve_helper.c | ||
41 | @@ -XXX,XX +XXX,XX @@ DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min) | ||
42 | DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min) | ||
43 | DO_ZPZS_FP(sve_fmins_d, float64, H1_8, float64_min) | ||
44 | |||
45 | +DO_ZPZS_FP(sve_ah_fmaxs_h, float16, H1_2, helper_vfp_ah_maxh) | ||
46 | +DO_ZPZS_FP(sve_ah_fmaxs_s, float32, H1_4, helper_vfp_ah_maxs) | ||
47 | +DO_ZPZS_FP(sve_ah_fmaxs_d, float64, H1_8, helper_vfp_ah_maxd) | ||
48 | + | ||
49 | +DO_ZPZS_FP(sve_ah_fmins_h, float16, H1_2, helper_vfp_ah_minh) | ||
50 | +DO_ZPZS_FP(sve_ah_fmins_s, float32, H1_4, helper_vfp_ah_mins) | ||
51 | +DO_ZPZS_FP(sve_ah_fmins_d, float64, H1_8, helper_vfp_ah_mind) | ||
52 | + | ||
53 | /* Fully general two-operand expander, controlled by a predicate, | ||
54 | * With the extra float_status parameter. | ||
55 | */ | ||
56 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/arm/tcg/translate-sve.c | ||
59 | +++ b/target/arm/tcg/translate-sve.c | ||
60 | @@ -XXX,XX +XXX,XX @@ static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, | ||
61 | TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ | ||
62 | name##_const[a->esz][a->imm], name##_fns[a->esz]) | ||
63 | |||
64 | +#define DO_FP_AH_IMM(NAME, name, const0, const1) \ | ||
65 | + static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ | ||
66 | + NULL, gen_helper_sve_##name##_h, \ | ||
67 | + gen_helper_sve_##name##_s, \ | ||
68 | + gen_helper_sve_##name##_d \ | ||
69 | + }; \ | ||
70 | + static gen_helper_sve_fp2scalar * const name##_ah_fns[4] = { \ | ||
71 | + NULL, gen_helper_sve_ah_##name##_h, \ | ||
72 | + gen_helper_sve_ah_##name##_s, \ | ||
73 | + gen_helper_sve_ah_##name##_d \ | ||
74 | + }; \ | ||
75 | + static uint64_t const name##_const[4][2] = { \ | ||
76 | + { -1, -1 }, \ | ||
77 | + { float16_##const0, float16_##const1 }, \ | ||
78 | + { float32_##const0, float32_##const1 }, \ | ||
79 | + { float64_##const0, float64_##const1 }, \ | ||
80 | + }; \ | ||
81 | + TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ | ||
82 | + name##_const[a->esz][a->imm], \ | ||
83 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) | ||
84 | + | ||
85 | DO_FP_IMM(FADD, fadds, half, one) | ||
86 | DO_FP_IMM(FSUB, fsubs, half, one) | ||
87 | DO_FP_IMM(FMUL, fmuls, half, two) | ||
88 | DO_FP_IMM(FSUBR, fsubrs, half, one) | ||
89 | DO_FP_IMM(FMAXNM, fmaxnms, zero, one) | ||
90 | DO_FP_IMM(FMINNM, fminnms, zero, one) | ||
91 | -DO_FP_IMM(FMAX, fmaxs, zero, one) | ||
92 | -DO_FP_IMM(FMIN, fmins, zero, one) | ||
93 | +DO_FP_AH_IMM(FMAX, fmaxs, zero, one) | ||
94 | +DO_FP_AH_IMM(FMIN, fmins, zero, one) | ||
95 | |||
96 | #undef DO_FP_IMM | ||
97 | |||
98 | -- | ||
99 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the SVE FMAX and FMIN | ||
2 | operations that take two vector operands. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 17 +++++++++++++++-- | ||
10 | 3 files changed, 37 insertions(+), 2 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fmax_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_6(sve_fmax_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_6(sve_ah_fmin_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_6(sve_ah_fmin_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_6(sve_ah_fmin_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_6(sve_ah_fmax_h, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_6(sve_ah_fmax_s, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_6(sve_ah_fmax_d, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_6(sve_fminnum_h, TCG_CALL_NO_RWG, | ||
35 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_6(sve_fminnum_s, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/sve_helper.c | ||
40 | +++ b/target/arm/tcg/sve_helper.c | ||
41 | @@ -XXX,XX +XXX,XX @@ DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max) | ||
42 | DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max) | ||
43 | DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max) | ||
44 | |||
45 | +DO_ZPZZ_FP(sve_ah_fmin_h, uint16_t, H1_2, helper_vfp_ah_minh) | ||
46 | +DO_ZPZZ_FP(sve_ah_fmin_s, uint32_t, H1_4, helper_vfp_ah_mins) | ||
47 | +DO_ZPZZ_FP(sve_ah_fmin_d, uint64_t, H1_8, helper_vfp_ah_mind) | ||
48 | + | ||
49 | +DO_ZPZZ_FP(sve_ah_fmax_h, uint16_t, H1_2, helper_vfp_ah_maxh) | ||
50 | +DO_ZPZZ_FP(sve_ah_fmax_s, uint32_t, H1_4, helper_vfp_ah_maxs) | ||
51 | +DO_ZPZZ_FP(sve_ah_fmax_d, uint64_t, H1_8, helper_vfp_ah_maxd) | ||
52 | + | ||
53 | DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum) | ||
54 | DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum) | ||
55 | DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum) | ||
56 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/arm/tcg/translate-sve.c | ||
59 | +++ b/target/arm/tcg/translate-sve.c | ||
60 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, | ||
61 | }; \ | ||
62 | TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) | ||
63 | |||
64 | +#define DO_ZPZZ_AH_FP(NAME, FEAT, name, ah_name) \ | ||
65 | + static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ | ||
66 | + NULL, gen_helper_##name##_h, \ | ||
67 | + gen_helper_##name##_s, gen_helper_##name##_d \ | ||
68 | + }; \ | ||
69 | + static gen_helper_gvec_4_ptr * const name##_ah_zpzz_fns[4] = { \ | ||
70 | + NULL, gen_helper_##ah_name##_h, \ | ||
71 | + gen_helper_##ah_name##_s, gen_helper_##ah_name##_d \ | ||
72 | + }; \ | ||
73 | + TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, \ | ||
74 | + s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \ | ||
75 | + name##_zpzz_fns[a->esz], a) | ||
76 | + | ||
77 | DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) | ||
78 | DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) | ||
79 | DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) | ||
80 | -DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin) | ||
81 | -DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax) | ||
82 | +DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) | ||
83 | +DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) | ||
84 | DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) | ||
85 | DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) | ||
86 | DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) | ||
87 | -- | ||
88 | 2.34.1 | diff view generated by jsdifflib |
1 | The final part of the Memory Protection Controller we need to | 1 | FPCR.AH == 1 mandates that negation of a NaN value should not flip |
---|---|---|---|
2 | implement is actually using the BLK_LUT data programmed by the | 2 | its sign bit. This means we can no longer use gen_vfp_neg*() |
3 | guest to determine whether to block the transaction or not. | 3 | everywhere but must instead generate slightly more complex code when |
4 | 4 | FPCR.AH is set. | |
5 | Since this means we now change transaction mappings when | 5 | |
6 | the guest writes to BLK_LUT, we must also call the IOMMU | 6 | Make this change for the scalar FNEG and for those places in |
7 | notifiers at that point. | 7 | translate-a64.c which were previously directly calling |
8 | gen_vfp_neg*(). | ||
9 | |||
10 | This change in semantics also affects any other instruction whose | ||
11 | pseudocode calls FPNeg(); in following commits we extend this | ||
12 | change to the other affected instructions. | ||
8 | 13 | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 14 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | 15 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
11 | Message-id: 20180620132032.28865-5-peter.maydell@linaro.org | ||
12 | --- | 16 | --- |
13 | hw/misc/tz-mpc.c | 53 ++++++++++++++++++++++++++++++++++++++++++-- | 17 | target/arm/tcg/translate-a64.c | 125 ++++++++++++++++++++++++++++++--- |
14 | hw/misc/trace-events | 1 + | 18 | 1 file changed, 114 insertions(+), 11 deletions(-) |
15 | 2 files changed, 52 insertions(+), 2 deletions(-) | 19 | |
16 | 20 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | |
17 | diff --git a/hw/misc/tz-mpc.c b/hw/misc/tz-mpc.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/hw/misc/tz-mpc.c | 22 | --- a/target/arm/tcg/translate-a64.c |
20 | +++ b/hw/misc/tz-mpc.c | 23 | +++ b/target/arm/tcg/translate-a64.c |
21 | @@ -XXX,XX +XXX,XX @@ static void tz_mpc_irq_update(TZMPC *s) | 24 | @@ -XXX,XX +XXX,XX @@ static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, |
22 | qemu_set_irq(s->irq, s->int_stat && s->int_en); | 25 | is_q ? 16 : 8, vec_full_reg_size(s), data, fn); |
23 | } | 26 | } |
24 | 27 | ||
25 | +static void tz_mpc_iommu_notify(TZMPC *s, uint32_t lutidx, | 28 | +/* |
26 | + uint32_t oldlut, uint32_t newlut) | 29 | + * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN. |
27 | +{ | 30 | + * These functions implement |
28 | + /* Called when the LUT word at lutidx has changed from oldlut to newlut; | 31 | + * d = floatN_is_any_nan(s) ? s : floatN_chs(s) |
29 | + * must call the IOMMU notifiers for the changed blocks. | 32 | + * which for float32 is |
30 | + */ | 33 | + * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31)) |
31 | + IOMMUTLBEntry entry = { | 34 | + * and similarly for the other float sizes. |
32 | + .addr_mask = s->blocksize - 1, | 35 | + */ |
33 | + }; | 36 | +static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s) |
34 | + hwaddr addr = lutidx * s->blocksize * 32; | 37 | +{ |
35 | + int i; | 38 | + TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); |
36 | + | 39 | + |
37 | + for (i = 0; i < 32; i++, addr += s->blocksize) { | 40 | + gen_vfp_negh(chs_s, s); |
38 | + bool block_is_ns; | 41 | + gen_vfp_absh(abs_s, s); |
39 | + | 42 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, |
40 | + if (!((oldlut ^ newlut) & (1 << i))) { | 43 | + abs_s, tcg_constant_i32(0x7c00), |
41 | + continue; | 44 | + s, chs_s); |
42 | + } | 45 | +} |
43 | + /* This changes the mappings for both the S and the NS space, | 46 | + |
44 | + * so we need to do four notifies: an UNMAP then a MAP for each. | 47 | +static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s) |
45 | + */ | 48 | +{ |
46 | + block_is_ns = newlut & (1 << i); | 49 | + TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); |
47 | + | 50 | + |
48 | + trace_tz_mpc_iommu_notify(addr); | 51 | + gen_vfp_negs(chs_s, s); |
49 | + entry.iova = addr; | 52 | + gen_vfp_abss(abs_s, s); |
50 | + entry.translated_addr = addr; | 53 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, |
51 | + | 54 | + abs_s, tcg_constant_i32(0x7f800000UL), |
52 | + entry.perm = IOMMU_NONE; | 55 | + s, chs_s); |
53 | + memory_region_notify_iommu(&s->upstream, IOMMU_IDX_S, entry); | 56 | +} |
54 | + memory_region_notify_iommu(&s->upstream, IOMMU_IDX_NS, entry); | 57 | + |
55 | + | 58 | +static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s) |
56 | + entry.perm = IOMMU_RW; | 59 | +{ |
57 | + if (block_is_ns) { | 60 | + TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64(); |
58 | + entry.target_as = &s->blocked_io_as; | 61 | + |
59 | + } else { | 62 | + gen_vfp_negd(chs_s, s); |
60 | + entry.target_as = &s->downstream_as; | 63 | + gen_vfp_absd(abs_s, s); |
61 | + } | 64 | + tcg_gen_movcond_i64(TCG_COND_GTU, d, |
62 | + memory_region_notify_iommu(&s->upstream, IOMMU_IDX_S, entry); | 65 | + abs_s, tcg_constant_i64(0x7ff0000000000000ULL), |
63 | + if (block_is_ns) { | 66 | + s, chs_s); |
64 | + entry.target_as = &s->downstream_as; | 67 | +} |
65 | + } else { | 68 | + |
66 | + entry.target_as = &s->blocked_io_as; | 69 | +static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) |
67 | + } | 70 | +{ |
68 | + memory_region_notify_iommu(&s->upstream, IOMMU_IDX_NS, entry); | 71 | + if (dc->fpcr_ah) { |
72 | + gen_vfp_ah_negh(d, s); | ||
73 | + } else { | ||
74 | + gen_vfp_negh(d, s); | ||
69 | + } | 75 | + } |
70 | +} | 76 | +} |
71 | + | 77 | + |
72 | static void tz_mpc_autoinc_idx(TZMPC *s, unsigned access_size) | 78 | +static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) |
73 | { | 79 | +{ |
74 | /* Auto-increment BLK_IDX if necessary */ | 80 | + if (dc->fpcr_ah) { |
75 | @@ -XXX,XX +XXX,XX @@ static MemTxResult tz_mpc_reg_write(void *opaque, hwaddr addr, | 81 | + gen_vfp_ah_negs(d, s); |
76 | s->blk_idx = value % s->blk_max; | 82 | + } else { |
77 | break; | 83 | + gen_vfp_negs(d, s); |
78 | case A_BLK_LUT: | 84 | + } |
79 | + tz_mpc_iommu_notify(s, s->blk_idx, s->blk_lut[s->blk_idx], value); | 85 | +} |
80 | s->blk_lut[s->blk_idx] = value; | 86 | + |
81 | tz_mpc_autoinc_idx(s, size); | 87 | +static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s) |
82 | break; | 88 | +{ |
83 | @@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry tz_mpc_translate(IOMMUMemoryRegion *iommu, | 89 | + if (dc->fpcr_ah) { |
84 | /* Look at the per-block configuration for this address, and | 90 | + gen_vfp_ah_negd(d, s); |
85 | * return a TLB entry directing the transaction at either | 91 | + } else { |
86 | * downstream_as or blocked_io_as, as appropriate. | 92 | + gen_vfp_negd(d, s); |
87 | - * For the moment, always permit accesses. | 93 | + } |
88 | + * If the LUT cfg_ns bit is 1, only non-secure transactions | 94 | +} |
89 | + * may pass. If the bit is 0, only secure transactions may pass. | 95 | + |
90 | */ | 96 | /* Set ZF and NF based on a 64 bit result. This is alas fiddlier |
91 | - ok = true; | 97 | * than the 32 bit equivalent. |
92 | + ok = tz_mpc_cfg_ns(s, addr) == (iommu_idx == IOMMU_IDX_NS); | 98 | */ |
93 | 99 | @@ -XXX,XX +XXX,XX @@ static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) | |
94 | trace_tz_mpc_translate(addr, flags, | 100 | gen_vfp_negd(d, d); |
95 | iommu_idx == IOMMU_IDX_S ? "S" : "NS", | 101 | } |
96 | diff --git a/hw/misc/trace-events b/hw/misc/trace-events | 102 | |
97 | index XXXXXXX..XXXXXXX 100644 | 103 | +static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) |
98 | --- a/hw/misc/trace-events | 104 | +{ |
99 | +++ b/hw/misc/trace-events | 105 | + gen_helper_vfp_mulh(d, n, m, s); |
100 | @@ -XXX,XX +XXX,XX @@ tz_mpc_reg_write(uint32_t offset, uint64_t data, unsigned size) "TZ MPC regs wri | 106 | + gen_vfp_ah_negh(d, d); |
101 | tz_mpc_mem_blocked_read(uint64_t addr, unsigned size, bool secure) "TZ MPC blocked read: offset 0x%" PRIx64 " size %u secure %d" | 107 | +} |
102 | tz_mpc_mem_blocked_write(uint64_t addr, uint64_t data, unsigned size, bool secure) "TZ MPC blocked write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u secure %d" | 108 | + |
103 | tz_mpc_translate(uint64_t addr, int flags, const char *idx, const char *res) "TZ MPC translate: addr 0x%" PRIx64 " flags 0x%x iommu_idx %s: %s" | 109 | +static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) |
104 | +tz_mpc_iommu_notify(uint64_t addr) "TZ MPC iommu: notifying UNMAP/MAP for 0x%" PRIx64 | 110 | +{ |
105 | 111 | + gen_helper_vfp_muls(d, n, m, s); | |
106 | # hw/misc/tz-ppc.c | 112 | + gen_vfp_ah_negs(d, d); |
107 | tz_ppc_reset(void) "TZ PPC: reset" | 113 | +} |
114 | + | ||
115 | +static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) | ||
116 | +{ | ||
117 | + gen_helper_vfp_muld(d, n, m, s); | ||
118 | + gen_vfp_ah_negd(d, d); | ||
119 | +} | ||
120 | + | ||
121 | static const FPScalar f_scalar_fnmul = { | ||
122 | gen_fnmul_h, | ||
123 | gen_fnmul_s, | ||
124 | gen_fnmul_d, | ||
125 | }; | ||
126 | -TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul, a->rn) | ||
127 | +static const FPScalar f_scalar_ah_fnmul = { | ||
128 | + gen_fnmul_ah_h, | ||
129 | + gen_fnmul_ah_s, | ||
130 | + gen_fnmul_ah_d, | ||
131 | +}; | ||
132 | +TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn) | ||
133 | |||
134 | static const FPScalar f_scalar_fcmeq = { | ||
135 | gen_helper_advsimd_ceq_f16, | ||
136 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
137 | |||
138 | read_vec_element(s, t2, a->rm, a->idx, MO_64); | ||
139 | if (neg) { | ||
140 | - gen_vfp_negd(t1, t1); | ||
141 | + gen_vfp_maybe_ah_negd(s, t1, t1); | ||
142 | } | ||
143 | gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | ||
144 | write_fp_dreg_merging(s, a->rd, a->rd, t0); | ||
145 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
146 | |||
147 | read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); | ||
148 | if (neg) { | ||
149 | - gen_vfp_negs(t1, t1); | ||
150 | + gen_vfp_maybe_ah_negs(s, t1, t1); | ||
151 | } | ||
152 | gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | ||
153 | write_fp_sreg_merging(s, a->rd, a->rd, t0); | ||
154 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
155 | |||
156 | read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); | ||
157 | if (neg) { | ||
158 | - gen_vfp_negh(t1, t1); | ||
159 | + gen_vfp_maybe_ah_negh(s, t1, t1); | ||
160 | } | ||
161 | gen_helper_advsimd_muladdh(t0, t1, t2, t0, | ||
162 | fpstatus_ptr(FPST_A64_F16)); | ||
163 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
164 | TCGv_i64 ta = read_fp_dreg(s, a->ra); | ||
165 | |||
166 | if (neg_a) { | ||
167 | - gen_vfp_negd(ta, ta); | ||
168 | + gen_vfp_maybe_ah_negd(s, ta, ta); | ||
169 | } | ||
170 | if (neg_n) { | ||
171 | - gen_vfp_negd(tn, tn); | ||
172 | + gen_vfp_maybe_ah_negd(s, tn, tn); | ||
173 | } | ||
174 | fpst = fpstatus_ptr(FPST_A64); | ||
175 | gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); | ||
176 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
177 | TCGv_i32 ta = read_fp_sreg(s, a->ra); | ||
178 | |||
179 | if (neg_a) { | ||
180 | - gen_vfp_negs(ta, ta); | ||
181 | + gen_vfp_maybe_ah_negs(s, ta, ta); | ||
182 | } | ||
183 | if (neg_n) { | ||
184 | - gen_vfp_negs(tn, tn); | ||
185 | + gen_vfp_maybe_ah_negs(s, tn, tn); | ||
186 | } | ||
187 | fpst = fpstatus_ptr(FPST_A64); | ||
188 | gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); | ||
189 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
190 | TCGv_i32 ta = read_fp_hreg(s, a->ra); | ||
191 | |||
192 | if (neg_a) { | ||
193 | - gen_vfp_negh(ta, ta); | ||
194 | + gen_vfp_maybe_ah_negh(s, ta, ta); | ||
195 | } | ||
196 | if (neg_n) { | ||
197 | - gen_vfp_negh(tn, tn); | ||
198 | + gen_vfp_maybe_ah_negh(s, tn, tn); | ||
199 | } | ||
200 | fpst = fpstatus_ptr(FPST_A64_F16); | ||
201 | gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); | ||
202 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, | ||
203 | return true; | ||
204 | } | ||
205 | |||
206 | +static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a, | ||
207 | + const FPScalar1Int *fnormal, | ||
208 | + const FPScalar1Int *fah) | ||
209 | +{ | ||
210 | + return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true); | ||
211 | +} | ||
212 | + | ||
213 | static const FPScalar1Int f_scalar_fmov = { | ||
214 | tcg_gen_mov_i32, | ||
215 | tcg_gen_mov_i32, | ||
216 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fneg = { | ||
217 | gen_vfp_negs, | ||
218 | gen_vfp_negd, | ||
219 | }; | ||
220 | -TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg, true) | ||
221 | +static const FPScalar1Int f_scalar_ah_fneg = { | ||
222 | + gen_vfp_ah_negh, | ||
223 | + gen_vfp_ah_negs, | ||
224 | + gen_vfp_ah_negd, | ||
225 | +}; | ||
226 | +TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg) | ||
227 | |||
228 | typedef struct FPScalar1 { | ||
229 | void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); | ||
108 | -- | 230 | -- |
109 | 2.17.1 | 231 | 2.34.1 |
110 | |||
111 | diff view generated by jsdifflib |
1 | Implement the Arm TrustZone Memory Protection Controller, which sits | 1 | FPCR.AH == 1 mandates that taking the absolute value of a NaN should |
---|---|---|---|
2 | in front of RAM and allows secure software to configure it to either | 2 | not change its sign bit. This means we can no longer use |
3 | pass through or reject transactions. | 3 | gen_vfp_abs*() everywhere but must instead generate slightly more |
4 | complex code when FPCR.AH is set. | ||
4 | 5 | ||
5 | We implement the MPC as a QEMU IOMMU, which will direct transactions | 6 | Implement these semantics for scalar FABS and FABD. This change also |
6 | either through to the devices and memory behind it or to a special | 7 | affects all other instructions whose psuedocode calls FPAbs(); we |
7 | "never works" AddressSpace if they are blocked. | 8 | will extend the change to those instructions in following commits. |
8 | |||
9 | This initial commit implements the skeleton of the device: | ||
10 | * it always permits accesses | ||
11 | * it doesn't implement most of the registers | ||
12 | * it doesn't implement the interrupt or other behaviour | ||
13 | for blocked transactions | ||
14 | 9 | ||
15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
16 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
17 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
18 | Message-id: 20180620132032.28865-2-peter.maydell@linaro.org | ||
19 | --- | 12 | --- |
20 | hw/misc/Makefile.objs | 1 + | 13 | target/arm/tcg/translate-a64.c | 69 +++++++++++++++++++++++++++++++++- |
21 | include/hw/misc/tz-mpc.h | 70 ++++++ | 14 | 1 file changed, 67 insertions(+), 2 deletions(-) |
22 | hw/misc/tz-mpc.c | 399 ++++++++++++++++++++++++++++++++ | ||
23 | MAINTAINERS | 2 + | ||
24 | default-configs/arm-softmmu.mak | 1 + | ||
25 | hw/misc/trace-events | 7 + | ||
26 | 6 files changed, 480 insertions(+) | ||
27 | create mode 100644 include/hw/misc/tz-mpc.h | ||
28 | create mode 100644 hw/misc/tz-mpc.c | ||
29 | 15 | ||
30 | diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs | 16 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
31 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
32 | --- a/hw/misc/Makefile.objs | 18 | --- a/target/arm/tcg/translate-a64.c |
33 | +++ b/hw/misc/Makefile.objs | 19 | +++ b/target/arm/tcg/translate-a64.c |
34 | @@ -XXX,XX +XXX,XX @@ obj-$(CONFIG_MIPS_ITU) += mips_itu.o | 20 | @@ -XXX,XX +XXX,XX @@ static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s) |
35 | obj-$(CONFIG_MPS2_FPGAIO) += mps2-fpgaio.o | 21 | s, chs_s); |
36 | obj-$(CONFIG_MPS2_SCC) += mps2-scc.o | 22 | } |
37 | 23 | ||
38 | +obj-$(CONFIG_TZ_MPC) += tz-mpc.o | ||
39 | obj-$(CONFIG_TZ_PPC) += tz-ppc.o | ||
40 | obj-$(CONFIG_IOTKIT_SECCTL) += iotkit-secctl.o | ||
41 | |||
42 | diff --git a/include/hw/misc/tz-mpc.h b/include/hw/misc/tz-mpc.h | ||
43 | new file mode 100644 | ||
44 | index XXXXXXX..XXXXXXX | ||
45 | --- /dev/null | ||
46 | +++ b/include/hw/misc/tz-mpc.h | ||
47 | @@ -XXX,XX +XXX,XX @@ | ||
48 | +/* | 24 | +/* |
49 | + * ARM AHB5 TrustZone Memory Protection Controller emulation | 25 | + * These functions implement |
50 | + * | 26 | + * d = floatN_is_any_nan(s) ? s : floatN_abs(s) |
51 | + * Copyright (c) 2018 Linaro Limited | 27 | + * which for float32 is |
52 | + * Written by Peter Maydell | 28 | + * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31)) |
53 | + * | 29 | + * and similarly for the other float sizes. |
54 | + * This program is free software; you can redistribute it and/or modify | ||
55 | + * it under the terms of the GNU General Public License version 2 or | ||
56 | + * (at your option) any later version. | ||
57 | + */ | 30 | + */ |
31 | +static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s) | ||
32 | +{ | ||
33 | + TCGv_i32 abs_s = tcg_temp_new_i32(); | ||
58 | + | 34 | + |
59 | +/* This is a model of the TrustZone memory protection controller (MPC). | 35 | + gen_vfp_absh(abs_s, s); |
60 | + * It is documented in the ARM CoreLink SIE-200 System IP for Embedded TRM | 36 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, |
61 | + * (DDI 0571G): | 37 | + abs_s, tcg_constant_i32(0x7c00), |
62 | + * https://developer.arm.com/products/architecture/m-profile/docs/ddi0571/g | 38 | + s, abs_s); |
63 | + * | ||
64 | + * The MPC sits in front of memory and allows secure software to | ||
65 | + * configure it to either pass through or reject transactions. | ||
66 | + * Rejected transactions may be configured to either be aborted, or to | ||
67 | + * behave as RAZ/WI. An interrupt can be signalled for a rejected transaction. | ||
68 | + * | ||
69 | + * The MPC has a register interface which the guest uses to configure it. | ||
70 | + * | ||
71 | + * QEMU interface: | ||
72 | + * + sysbus MMIO region 0: MemoryRegion for the MPC's config registers | ||
73 | + * + sysbus MMIO region 1: MemoryRegion for the upstream end of the MPC | ||
74 | + * + Property "downstream": MemoryRegion defining the downstream memory | ||
75 | + * + Named GPIO output "irq": set for a transaction-failed interrupt | ||
76 | + */ | ||
77 | + | ||
78 | +#ifndef TZ_MPC_H | ||
79 | +#define TZ_MPC_H | ||
80 | + | ||
81 | +#include "hw/sysbus.h" | ||
82 | + | ||
83 | +#define TYPE_TZ_MPC "tz-mpc" | ||
84 | +#define TZ_MPC(obj) OBJECT_CHECK(TZMPC, (obj), TYPE_TZ_MPC) | ||
85 | + | ||
86 | +#define TZ_NUM_PORTS 16 | ||
87 | + | ||
88 | +#define TYPE_TZ_MPC_IOMMU_MEMORY_REGION "tz-mpc-iommu-memory-region" | ||
89 | + | ||
90 | +typedef struct TZMPC TZMPC; | ||
91 | + | ||
92 | +struct TZMPC { | ||
93 | + /*< private >*/ | ||
94 | + SysBusDevice parent_obj; | ||
95 | + | ||
96 | + /*< public >*/ | ||
97 | + | ||
98 | + qemu_irq irq; | ||
99 | + | ||
100 | + /* Properties */ | ||
101 | + MemoryRegion *downstream; | ||
102 | + | ||
103 | + hwaddr blocksize; | ||
104 | + uint32_t blk_max; | ||
105 | + | ||
106 | + /* MemoryRegions exposed to user */ | ||
107 | + MemoryRegion regmr; | ||
108 | + IOMMUMemoryRegion upstream; | ||
109 | + | ||
110 | + /* MemoryRegion used internally */ | ||
111 | + MemoryRegion blocked_io; | ||
112 | + | ||
113 | + AddressSpace downstream_as; | ||
114 | + AddressSpace blocked_io_as; | ||
115 | +}; | ||
116 | + | ||
117 | +#endif | ||
118 | diff --git a/hw/misc/tz-mpc.c b/hw/misc/tz-mpc.c | ||
119 | new file mode 100644 | ||
120 | index XXXXXXX..XXXXXXX | ||
121 | --- /dev/null | ||
122 | +++ b/hw/misc/tz-mpc.c | ||
123 | @@ -XXX,XX +XXX,XX @@ | ||
124 | +/* | ||
125 | + * ARM AHB5 TrustZone Memory Protection Controller emulation | ||
126 | + * | ||
127 | + * Copyright (c) 2018 Linaro Limited | ||
128 | + * Written by Peter Maydell | ||
129 | + * | ||
130 | + * This program is free software; you can redistribute it and/or modify | ||
131 | + * it under the terms of the GNU General Public License version 2 or | ||
132 | + * (at your option) any later version. | ||
133 | + */ | ||
134 | + | ||
135 | +#include "qemu/osdep.h" | ||
136 | +#include "qemu/log.h" | ||
137 | +#include "qapi/error.h" | ||
138 | +#include "trace.h" | ||
139 | +#include "hw/sysbus.h" | ||
140 | +#include "hw/registerfields.h" | ||
141 | +#include "hw/misc/tz-mpc.h" | ||
142 | + | ||
143 | +/* Our IOMMU has two IOMMU indexes, one for secure transactions and one for | ||
144 | + * non-secure transactions. | ||
145 | + */ | ||
146 | +enum { | ||
147 | + IOMMU_IDX_S, | ||
148 | + IOMMU_IDX_NS, | ||
149 | + IOMMU_NUM_INDEXES, | ||
150 | +}; | ||
151 | + | ||
152 | +/* Config registers */ | ||
153 | +REG32(CTRL, 0x00) | ||
154 | +REG32(BLK_MAX, 0x10) | ||
155 | +REG32(BLK_CFG, 0x14) | ||
156 | +REG32(BLK_IDX, 0x18) | ||
157 | +REG32(BLK_LUT, 0x1c) | ||
158 | +REG32(INT_STAT, 0x20) | ||
159 | +REG32(INT_CLEAR, 0x24) | ||
160 | +REG32(INT_EN, 0x28) | ||
161 | +REG32(INT_INFO1, 0x2c) | ||
162 | +REG32(INT_INFO2, 0x30) | ||
163 | +REG32(INT_SET, 0x34) | ||
164 | +REG32(PIDR4, 0xfd0) | ||
165 | +REG32(PIDR5, 0xfd4) | ||
166 | +REG32(PIDR6, 0xfd8) | ||
167 | +REG32(PIDR7, 0xfdc) | ||
168 | +REG32(PIDR0, 0xfe0) | ||
169 | +REG32(PIDR1, 0xfe4) | ||
170 | +REG32(PIDR2, 0xfe8) | ||
171 | +REG32(PIDR3, 0xfec) | ||
172 | +REG32(CIDR0, 0xff0) | ||
173 | +REG32(CIDR1, 0xff4) | ||
174 | +REG32(CIDR2, 0xff8) | ||
175 | +REG32(CIDR3, 0xffc) | ||
176 | + | ||
177 | +static const uint8_t tz_mpc_idregs[] = { | ||
178 | + 0x04, 0x00, 0x00, 0x00, | ||
179 | + 0x60, 0xb8, 0x1b, 0x00, | ||
180 | + 0x0d, 0xf0, 0x05, 0xb1, | ||
181 | +}; | ||
182 | + | ||
183 | +static MemTxResult tz_mpc_reg_read(void *opaque, hwaddr addr, | ||
184 | + uint64_t *pdata, | ||
185 | + unsigned size, MemTxAttrs attrs) | ||
186 | +{ | ||
187 | + uint64_t r; | ||
188 | + uint32_t offset = addr & ~0x3; | ||
189 | + | ||
190 | + if (!attrs.secure && offset < A_PIDR4) { | ||
191 | + /* NS accesses can only see the ID registers */ | ||
192 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
193 | + "TZ MPC register read: NS access to offset 0x%x\n", | ||
194 | + offset); | ||
195 | + r = 0; | ||
196 | + goto read_out; | ||
197 | + } | ||
198 | + | ||
199 | + switch (offset) { | ||
200 | + case A_PIDR4: | ||
201 | + case A_PIDR5: | ||
202 | + case A_PIDR6: | ||
203 | + case A_PIDR7: | ||
204 | + case A_PIDR0: | ||
205 | + case A_PIDR1: | ||
206 | + case A_PIDR2: | ||
207 | + case A_PIDR3: | ||
208 | + case A_CIDR0: | ||
209 | + case A_CIDR1: | ||
210 | + case A_CIDR2: | ||
211 | + case A_CIDR3: | ||
212 | + r = tz_mpc_idregs[(offset - A_PIDR4) / 4]; | ||
213 | + break; | ||
214 | + case A_INT_CLEAR: | ||
215 | + case A_INT_SET: | ||
216 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
217 | + "TZ MPC register read: write-only offset 0x%x\n", | ||
218 | + offset); | ||
219 | + r = 0; | ||
220 | + break; | ||
221 | + default: | ||
222 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
223 | + "TZ MPC register read: bad offset 0x%x\n", offset); | ||
224 | + r = 0; | ||
225 | + break; | ||
226 | + } | ||
227 | + | ||
228 | + if (size != 4) { | ||
229 | + /* None of our registers are read-sensitive (except BLK_LUT, | ||
230 | + * which can special case the "size not 4" case), so just | ||
231 | + * pull the right bytes out of the word read result. | ||
232 | + */ | ||
233 | + r = extract32(r, (addr & 3) * 8, size * 8); | ||
234 | + } | ||
235 | + | ||
236 | +read_out: | ||
237 | + trace_tz_mpc_reg_read(addr, r, size); | ||
238 | + *pdata = r; | ||
239 | + return MEMTX_OK; | ||
240 | +} | 39 | +} |
241 | + | 40 | + |
242 | +static MemTxResult tz_mpc_reg_write(void *opaque, hwaddr addr, | 41 | +static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s) |
243 | + uint64_t value, | ||
244 | + unsigned size, MemTxAttrs attrs) | ||
245 | +{ | 42 | +{ |
246 | + uint32_t offset = addr & ~0x3; | 43 | + TCGv_i32 abs_s = tcg_temp_new_i32(); |
247 | + | 44 | + |
248 | + trace_tz_mpc_reg_write(addr, value, size); | 45 | + gen_vfp_abss(abs_s, s); |
249 | + | 46 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, |
250 | + if (!attrs.secure && offset < A_PIDR4) { | 47 | + abs_s, tcg_constant_i32(0x7f800000UL), |
251 | + /* NS accesses can only see the ID registers */ | 48 | + s, abs_s); |
252 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
253 | + "TZ MPC register write: NS access to offset 0x%x\n", | ||
254 | + offset); | ||
255 | + return MEMTX_OK; | ||
256 | + } | ||
257 | + | ||
258 | + if (size != 4) { | ||
259 | + /* Expand the byte or halfword write to a full word size. | ||
260 | + * In most cases we can do this with zeroes; the exceptions | ||
261 | + * are CTRL, BLK_IDX and BLK_LUT. | ||
262 | + */ | ||
263 | + uint32_t oldval; | ||
264 | + | ||
265 | + switch (offset) { | ||
266 | + /* As we add support for registers which need expansions | ||
267 | + * other than zeroes we'll fill in cases here. | ||
268 | + */ | ||
269 | + default: | ||
270 | + oldval = 0; | ||
271 | + break; | ||
272 | + } | ||
273 | + value = deposit32(oldval, (addr & 3) * 8, size * 8, value); | ||
274 | + } | ||
275 | + | ||
276 | + switch (offset) { | ||
277 | + case A_PIDR4: | ||
278 | + case A_PIDR5: | ||
279 | + case A_PIDR6: | ||
280 | + case A_PIDR7: | ||
281 | + case A_PIDR0: | ||
282 | + case A_PIDR1: | ||
283 | + case A_PIDR2: | ||
284 | + case A_PIDR3: | ||
285 | + case A_CIDR0: | ||
286 | + case A_CIDR1: | ||
287 | + case A_CIDR2: | ||
288 | + case A_CIDR3: | ||
289 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
290 | + "TZ MPC register write: read-only offset 0x%x\n", offset); | ||
291 | + break; | ||
292 | + default: | ||
293 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
294 | + "TZ MPC register write: bad offset 0x%x\n", offset); | ||
295 | + break; | ||
296 | + } | ||
297 | + | ||
298 | + return MEMTX_OK; | ||
299 | +} | 49 | +} |
300 | + | 50 | + |
301 | +static const MemoryRegionOps tz_mpc_reg_ops = { | 51 | +static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s) |
302 | + .read_with_attrs = tz_mpc_reg_read, | 52 | +{ |
303 | + .write_with_attrs = tz_mpc_reg_write, | 53 | + TCGv_i64 abs_s = tcg_temp_new_i64(); |
304 | + .endianness = DEVICE_LITTLE_ENDIAN, | ||
305 | + .valid.min_access_size = 1, | ||
306 | + .valid.max_access_size = 4, | ||
307 | + .impl.min_access_size = 1, | ||
308 | + .impl.max_access_size = 4, | ||
309 | +}; | ||
310 | + | 54 | + |
311 | +/* Accesses only reach these read and write functions if the MPC is | 55 | + gen_vfp_absd(abs_s, s); |
312 | + * blocking them; non-blocked accesses go directly to the downstream | 56 | + tcg_gen_movcond_i64(TCG_COND_GTU, d, |
313 | + * memory region without passing through this code. | 57 | + abs_s, tcg_constant_i64(0x7ff0000000000000ULL), |
314 | + */ | 58 | + s, abs_s); |
315 | +static MemTxResult tz_mpc_mem_blocked_read(void *opaque, hwaddr addr, | ||
316 | + uint64_t *pdata, | ||
317 | + unsigned size, MemTxAttrs attrs) | ||
318 | +{ | ||
319 | + trace_tz_mpc_mem_blocked_read(addr, size, attrs.secure); | ||
320 | + | ||
321 | + *pdata = 0; | ||
322 | + return MEMTX_OK; | ||
323 | +} | 59 | +} |
324 | + | 60 | + |
325 | +static MemTxResult tz_mpc_mem_blocked_write(void *opaque, hwaddr addr, | 61 | static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) |
326 | + uint64_t value, | 62 | { |
327 | + unsigned size, MemTxAttrs attrs) | 63 | if (dc->fpcr_ah) { |
64 | @@ -XXX,XX +XXX,XX @@ static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) | ||
65 | gen_vfp_absd(d, d); | ||
66 | } | ||
67 | |||
68 | +static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
328 | +{ | 69 | +{ |
329 | + trace_tz_mpc_mem_blocked_write(addr, value, size, attrs.secure); | 70 | + gen_helper_vfp_subh(d, n, m, s); |
330 | + | 71 | + gen_vfp_ah_absh(d, d); |
331 | + return MEMTX_OK; | ||
332 | +} | 72 | +} |
333 | + | 73 | + |
334 | +static const MemoryRegionOps tz_mpc_mem_blocked_ops = { | 74 | +static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) |
335 | + .read_with_attrs = tz_mpc_mem_blocked_read, | ||
336 | + .write_with_attrs = tz_mpc_mem_blocked_write, | ||
337 | + .endianness = DEVICE_LITTLE_ENDIAN, | ||
338 | + .valid.min_access_size = 1, | ||
339 | + .valid.max_access_size = 8, | ||
340 | + .impl.min_access_size = 1, | ||
341 | + .impl.max_access_size = 8, | ||
342 | +}; | ||
343 | + | ||
344 | +static IOMMUTLBEntry tz_mpc_translate(IOMMUMemoryRegion *iommu, | ||
345 | + hwaddr addr, IOMMUAccessFlags flags, | ||
346 | + int iommu_idx) | ||
347 | +{ | 75 | +{ |
348 | + TZMPC *s = TZ_MPC(container_of(iommu, TZMPC, upstream)); | 76 | + gen_helper_vfp_subs(d, n, m, s); |
349 | + bool ok; | 77 | + gen_vfp_ah_abss(d, d); |
350 | + | ||
351 | + IOMMUTLBEntry ret = { | ||
352 | + .iova = addr & ~(s->blocksize - 1), | ||
353 | + .translated_addr = addr & ~(s->blocksize - 1), | ||
354 | + .addr_mask = s->blocksize - 1, | ||
355 | + .perm = IOMMU_RW, | ||
356 | + }; | ||
357 | + | ||
358 | + /* Look at the per-block configuration for this address, and | ||
359 | + * return a TLB entry directing the transaction at either | ||
360 | + * downstream_as or blocked_io_as, as appropriate. | ||
361 | + * For the moment, always permit accesses. | ||
362 | + */ | ||
363 | + ok = true; | ||
364 | + | ||
365 | + trace_tz_mpc_translate(addr, flags, | ||
366 | + iommu_idx == IOMMU_IDX_S ? "S" : "NS", | ||
367 | + ok ? "pass" : "block"); | ||
368 | + | ||
369 | + ret.target_as = ok ? &s->downstream_as : &s->blocked_io_as; | ||
370 | + return ret; | ||
371 | +} | 78 | +} |
372 | + | 79 | + |
373 | +static int tz_mpc_attrs_to_index(IOMMUMemoryRegion *iommu, MemTxAttrs attrs) | 80 | +static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) |
374 | +{ | 81 | +{ |
375 | + /* We treat unspecified attributes like secure. Transactions with | 82 | + gen_helper_vfp_subd(d, n, m, s); |
376 | + * unspecified attributes come from places like | 83 | + gen_vfp_ah_absd(d, d); |
377 | + * cpu_physical_memory_write_rom() for initial image load, and we want | ||
378 | + * those to pass through the from-reset "everything is secure" config. | ||
379 | + * All the real during-emulation transactions from the CPU will | ||
380 | + * specify attributes. | ||
381 | + */ | ||
382 | + return (attrs.unspecified || attrs.secure) ? IOMMU_IDX_S : IOMMU_IDX_NS; | ||
383 | +} | 84 | +} |
384 | + | 85 | + |
385 | +static int tz_mpc_num_indexes(IOMMUMemoryRegion *iommu) | 86 | static const FPScalar f_scalar_fabd = { |
386 | +{ | 87 | gen_fabd_h, |
387 | + return IOMMU_NUM_INDEXES; | 88 | gen_fabd_s, |
388 | +} | 89 | gen_fabd_d, |
389 | + | 90 | }; |
390 | +static void tz_mpc_reset(DeviceState *dev) | 91 | -TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd, a->rn) |
391 | +{ | 92 | +static const FPScalar f_scalar_ah_fabd = { |
392 | +} | 93 | + gen_fabd_ah_h, |
393 | + | 94 | + gen_fabd_ah_s, |
394 | +static void tz_mpc_init(Object *obj) | 95 | + gen_fabd_ah_d, |
395 | +{ | ||
396 | + DeviceState *dev = DEVICE(obj); | ||
397 | + TZMPC *s = TZ_MPC(obj); | ||
398 | + | ||
399 | + qdev_init_gpio_out_named(dev, &s->irq, "irq", 1); | ||
400 | +} | ||
401 | + | ||
402 | +static void tz_mpc_realize(DeviceState *dev, Error **errp) | ||
403 | +{ | ||
404 | + Object *obj = OBJECT(dev); | ||
405 | + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); | ||
406 | + TZMPC *s = TZ_MPC(dev); | ||
407 | + uint64_t size; | ||
408 | + | ||
409 | + /* We can't create the upstream end of the port until realize, | ||
410 | + * as we don't know the size of the MR used as the downstream until then. | ||
411 | + * We insist on having a downstream, to avoid complicating the code | ||
412 | + * with handling the "don't know how big this is" case. It's easy | ||
413 | + * enough for the user to create an unimplemented_device as downstream | ||
414 | + * if they have nothing else to plug into this. | ||
415 | + */ | ||
416 | + if (!s->downstream) { | ||
417 | + error_setg(errp, "MPC 'downstream' link not set"); | ||
418 | + return; | ||
419 | + } | ||
420 | + | ||
421 | + size = memory_region_size(s->downstream); | ||
422 | + | ||
423 | + memory_region_init_iommu(&s->upstream, sizeof(s->upstream), | ||
424 | + TYPE_TZ_MPC_IOMMU_MEMORY_REGION, | ||
425 | + obj, "tz-mpc-upstream", size); | ||
426 | + | ||
427 | + /* In real hardware the block size is configurable. In QEMU we could | ||
428 | + * make it configurable but will need it to be at least as big as the | ||
429 | + * target page size so we can execute out of the resulting MRs. Guest | ||
430 | + * software is supposed to check the block size using the BLK_CFG | ||
431 | + * register, so make it fixed at the page size. | ||
432 | + */ | ||
433 | + s->blocksize = memory_region_iommu_get_min_page_size(&s->upstream); | ||
434 | + if (size % s->blocksize != 0) { | ||
435 | + error_setg(errp, | ||
436 | + "MPC 'downstream' size %" PRId64 | ||
437 | + " is not a multiple of %" HWADDR_PRIx " bytes", | ||
438 | + size, s->blocksize); | ||
439 | + object_unref(OBJECT(&s->upstream)); | ||
440 | + return; | ||
441 | + } | ||
442 | + | ||
443 | + /* BLK_MAX is the max value of BLK_IDX, which indexes an array of 32-bit | ||
444 | + * words, each bit of which indicates one block. | ||
445 | + */ | ||
446 | + s->blk_max = DIV_ROUND_UP(size / s->blocksize, 32); | ||
447 | + | ||
448 | + memory_region_init_io(&s->regmr, obj, &tz_mpc_reg_ops, | ||
449 | + s, "tz-mpc-regs", 0x1000); | ||
450 | + sysbus_init_mmio(sbd, &s->regmr); | ||
451 | + | ||
452 | + sysbus_init_mmio(sbd, MEMORY_REGION(&s->upstream)); | ||
453 | + | ||
454 | + /* This memory region is not exposed to users of this device as a | ||
455 | + * sysbus MMIO region, but is instead used internally as something | ||
456 | + * that our IOMMU translate function might direct accesses to. | ||
457 | + */ | ||
458 | + memory_region_init_io(&s->blocked_io, obj, &tz_mpc_mem_blocked_ops, | ||
459 | + s, "tz-mpc-blocked-io", size); | ||
460 | + | ||
461 | + address_space_init(&s->downstream_as, s->downstream, | ||
462 | + "tz-mpc-downstream"); | ||
463 | + address_space_init(&s->blocked_io_as, &s->blocked_io, | ||
464 | + "tz-mpc-blocked-io"); | ||
465 | +} | ||
466 | + | ||
467 | +static const VMStateDescription tz_mpc_vmstate = { | ||
468 | + .name = "tz-mpc", | ||
469 | + .version_id = 1, | ||
470 | + .minimum_version_id = 1, | ||
471 | + .fields = (VMStateField[]) { | ||
472 | + VMSTATE_END_OF_LIST() | ||
473 | + } | ||
474 | +}; | 96 | +}; |
475 | + | 97 | +TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn) |
476 | +static Property tz_mpc_properties[] = { | 98 | |
477 | + DEFINE_PROP_LINK("downstream", TZMPC, downstream, | 99 | static const FPScalar f_scalar_frecps = { |
478 | + TYPE_MEMORY_REGION, MemoryRegion *), | 100 | gen_helper_recpsf_f16, |
479 | + DEFINE_PROP_END_OF_LIST(), | 101 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fabs = { |
102 | gen_vfp_abss, | ||
103 | gen_vfp_absd, | ||
104 | }; | ||
105 | -TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs, true) | ||
106 | +static const FPScalar1Int f_scalar_ah_fabs = { | ||
107 | + gen_vfp_ah_absh, | ||
108 | + gen_vfp_ah_abss, | ||
109 | + gen_vfp_ah_absd, | ||
480 | +}; | 110 | +}; |
481 | + | 111 | +TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs) |
482 | +static void tz_mpc_class_init(ObjectClass *klass, void *data) | 112 | |
483 | +{ | 113 | static const FPScalar1Int f_scalar_fneg = { |
484 | + DeviceClass *dc = DEVICE_CLASS(klass); | 114 | gen_vfp_negh, |
485 | + | ||
486 | + dc->realize = tz_mpc_realize; | ||
487 | + dc->vmsd = &tz_mpc_vmstate; | ||
488 | + dc->reset = tz_mpc_reset; | ||
489 | + dc->props = tz_mpc_properties; | ||
490 | +} | ||
491 | + | ||
492 | +static const TypeInfo tz_mpc_info = { | ||
493 | + .name = TYPE_TZ_MPC, | ||
494 | + .parent = TYPE_SYS_BUS_DEVICE, | ||
495 | + .instance_size = sizeof(TZMPC), | ||
496 | + .instance_init = tz_mpc_init, | ||
497 | + .class_init = tz_mpc_class_init, | ||
498 | +}; | ||
499 | + | ||
500 | +static void tz_mpc_iommu_memory_region_class_init(ObjectClass *klass, | ||
501 | + void *data) | ||
502 | +{ | ||
503 | + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); | ||
504 | + | ||
505 | + imrc->translate = tz_mpc_translate; | ||
506 | + imrc->attrs_to_index = tz_mpc_attrs_to_index; | ||
507 | + imrc->num_indexes = tz_mpc_num_indexes; | ||
508 | +} | ||
509 | + | ||
510 | +static const TypeInfo tz_mpc_iommu_memory_region_info = { | ||
511 | + .name = TYPE_TZ_MPC_IOMMU_MEMORY_REGION, | ||
512 | + .parent = TYPE_IOMMU_MEMORY_REGION, | ||
513 | + .class_init = tz_mpc_iommu_memory_region_class_init, | ||
514 | +}; | ||
515 | + | ||
516 | +static void tz_mpc_register_types(void) | ||
517 | +{ | ||
518 | + type_register_static(&tz_mpc_info); | ||
519 | + type_register_static(&tz_mpc_iommu_memory_region_info); | ||
520 | +} | ||
521 | + | ||
522 | +type_init(tz_mpc_register_types); | ||
523 | diff --git a/MAINTAINERS b/MAINTAINERS | ||
524 | index XXXXXXX..XXXXXXX 100644 | ||
525 | --- a/MAINTAINERS | ||
526 | +++ b/MAINTAINERS | ||
527 | @@ -XXX,XX +XXX,XX @@ F: hw/char/cmsdk-apb-uart.c | ||
528 | F: include/hw/char/cmsdk-apb-uart.h | ||
529 | F: hw/misc/tz-ppc.c | ||
530 | F: include/hw/misc/tz-ppc.h | ||
531 | +F: hw/misc/tz-mpc.c | ||
532 | +F: include/hw/misc/tz-mpc.h | ||
533 | |||
534 | ARM cores | ||
535 | M: Peter Maydell <peter.maydell@linaro.org> | ||
536 | diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak | ||
537 | index XXXXXXX..XXXXXXX 100644 | ||
538 | --- a/default-configs/arm-softmmu.mak | ||
539 | +++ b/default-configs/arm-softmmu.mak | ||
540 | @@ -XXX,XX +XXX,XX @@ CONFIG_CMSDK_APB_UART=y | ||
541 | CONFIG_MPS2_FPGAIO=y | ||
542 | CONFIG_MPS2_SCC=y | ||
543 | |||
544 | +CONFIG_TZ_MPC=y | ||
545 | CONFIG_TZ_PPC=y | ||
546 | CONFIG_IOTKIT=y | ||
547 | CONFIG_IOTKIT_SECCTL=y | ||
548 | diff --git a/hw/misc/trace-events b/hw/misc/trace-events | ||
549 | index XXXXXXX..XXXXXXX 100644 | ||
550 | --- a/hw/misc/trace-events | ||
551 | +++ b/hw/misc/trace-events | ||
552 | @@ -XXX,XX +XXX,XX @@ mos6522_set_sr_int(void) "set sr_int" | ||
553 | mos6522_write(uint64_t addr, uint64_t val) "reg=0x%"PRIx64 " val=0x%"PRIx64 | ||
554 | mos6522_read(uint64_t addr, unsigned val) "reg=0x%"PRIx64 " val=0x%x" | ||
555 | |||
556 | +# hw/misc/tz-mpc.c | ||
557 | +tz_mpc_reg_read(uint32_t offset, uint64_t data, unsigned size) "TZ MPC regs read: offset 0x%x data 0x%" PRIx64 " size %u" | ||
558 | +tz_mpc_reg_write(uint32_t offset, uint64_t data, unsigned size) "TZ MPC regs write: offset 0x%x data 0x%" PRIx64 " size %u" | ||
559 | +tz_mpc_mem_blocked_read(uint64_t addr, unsigned size, bool secure) "TZ MPC blocked read: offset 0x%" PRIx64 " size %u secure %d" | ||
560 | +tz_mpc_mem_blocked_write(uint64_t addr, uint64_t data, unsigned size, bool secure) "TZ MPC blocked write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u secure %d" | ||
561 | +tz_mpc_translate(uint64_t addr, int flags, const char *idx, const char *res) "TZ MPC translate: addr 0x%" PRIx64 " flags 0x%x iommu_idx %s: %s" | ||
562 | + | ||
563 | # hw/misc/tz-ppc.c | ||
564 | tz_ppc_reset(void) "TZ PPC: reset" | ||
565 | tz_ppc_cfg_nonsec(int n, int level) "TZ PPC: cfg_nonsec[%d] = %d" | ||
566 | -- | 115 | -- |
567 | 2.17.1 | 116 | 2.34.1 |
568 | |||
569 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Split the handling of vector FABD so that it calls a different set | ||
2 | of helpers when FPCR.AH is 1, which implement the "no negation of | ||
3 | the sign of a NaN" semantics. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/helper.h | 4 ++++ | ||
9 | target/arm/tcg/translate-a64.c | 7 ++++++- | ||
10 | target/arm/tcg/vec_helper.c | 23 +++++++++++++++++++++++ | ||
11 | 3 files changed, 33 insertions(+), 1 deletion(-) | ||
12 | |||
13 | diff --git a/target/arm/helper.h b/target/arm/helper.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/arm/helper.h | ||
16 | +++ b/target/arm/helper.h | ||
17 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
18 | DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
19 | DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
20 | |||
21 | +DEF_HELPER_FLAGS_5(gvec_ah_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_5(gvec_ah_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
23 | +DEF_HELPER_FLAGS_5(gvec_ah_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
24 | + | ||
25 | DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
26 | DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
27 | DEF_HELPER_FLAGS_5(gvec_fceq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
28 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/arm/tcg/translate-a64.c | ||
31 | +++ b/target/arm/tcg/translate-a64.c | ||
32 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { | ||
33 | gen_helper_gvec_fabd_s, | ||
34 | gen_helper_gvec_fabd_d, | ||
35 | }; | ||
36 | -TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd) | ||
37 | +static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = { | ||
38 | + gen_helper_gvec_ah_fabd_h, | ||
39 | + gen_helper_gvec_ah_fabd_s, | ||
40 | + gen_helper_gvec_ah_fabd_d, | ||
41 | +}; | ||
42 | +TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd) | ||
43 | |||
44 | static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { | ||
45 | gen_helper_gvec_recps_h, | ||
46 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/arm/tcg/vec_helper.c | ||
49 | +++ b/target/arm/tcg/vec_helper.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static float64 float64_abd(float64 op1, float64 op2, float_status *stat) | ||
51 | return float64_abs(float64_sub(op1, op2, stat)); | ||
52 | } | ||
53 | |||
54 | +/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */ | ||
55 | +static float16 float16_ah_abd(float16 op1, float16 op2, float_status *stat) | ||
56 | +{ | ||
57 | + float16 r = float16_sub(op1, op2, stat); | ||
58 | + return float16_is_any_nan(r) ? r : float16_abs(r); | ||
59 | +} | ||
60 | + | ||
61 | +static float32 float32_ah_abd(float32 op1, float32 op2, float_status *stat) | ||
62 | +{ | ||
63 | + float32 r = float32_sub(op1, op2, stat); | ||
64 | + return float32_is_any_nan(r) ? r : float32_abs(r); | ||
65 | +} | ||
66 | + | ||
67 | +static float64 float64_ah_abd(float64 op1, float64 op2, float_status *stat) | ||
68 | +{ | ||
69 | + float64 r = float64_sub(op1, op2, stat); | ||
70 | + return float64_is_any_nan(r) ? r : float64_abs(r); | ||
71 | +} | ||
72 | + | ||
73 | /* | ||
74 | * Reciprocal step. These are the AArch32 version which uses a | ||
75 | * non-fused multiply-and-subtract. | ||
76 | @@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_fabd_h, float16_abd, float16) | ||
77 | DO_3OP(gvec_fabd_s, float32_abd, float32) | ||
78 | DO_3OP(gvec_fabd_d, float64_abd, float64) | ||
79 | |||
80 | +DO_3OP(gvec_ah_fabd_h, float16_ah_abd, float16) | ||
81 | +DO_3OP(gvec_ah_fabd_s, float32_ah_abd, float32) | ||
82 | +DO_3OP(gvec_ah_fabd_d, float64_ah_abd, float64) | ||
83 | + | ||
84 | DO_3OP(gvec_fceq_h, float16_ceq, float16) | ||
85 | DO_3OP(gvec_fceq_s, float32_ceq, float32) | ||
86 | DO_3OP(gvec_fceq_d, float64_ceq, float64) | ||
87 | -- | ||
88 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Make SVE FNEG honour the FPCR.AH "don't negate the sign of a NaN" | ||
2 | semantics. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 4 ++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 7 ++++++- | ||
10 | 3 files changed, 18 insertions(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
17 | DEF_HELPER_FLAGS_4(sve_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
18 | DEF_HELPER_FLAGS_4(sve_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_4(sve_ah_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
21 | +DEF_HELPER_FLAGS_4(sve_ah_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
22 | +DEF_HELPER_FLAGS_4(sve_ah_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
23 | + | ||
24 | DEF_HELPER_FLAGS_4(sve_not_zpz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
25 | DEF_HELPER_FLAGS_4(sve_not_zpz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
26 | DEF_HELPER_FLAGS_4(sve_not_zpz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
27 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/arm/tcg/sve_helper.c | ||
30 | +++ b/target/arm/tcg/sve_helper.c | ||
31 | @@ -XXX,XX +XXX,XX @@ DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG) | ||
32 | DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG) | ||
33 | DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG) | ||
34 | |||
35 | +#define DO_AH_FNEG_H(N) (float16_is_any_nan(N) ? (N) : DO_FNEG(N)) | ||
36 | +#define DO_AH_FNEG_S(N) (float32_is_any_nan(N) ? (N) : DO_FNEG(N)) | ||
37 | +#define DO_AH_FNEG_D(N) (float64_is_any_nan(N) ? (N) : DO_FNEG(N)) | ||
38 | + | ||
39 | +DO_ZPZ(sve_ah_fneg_h, uint16_t, H1_2, DO_AH_FNEG_H) | ||
40 | +DO_ZPZ(sve_ah_fneg_s, uint32_t, H1_4, DO_AH_FNEG_S) | ||
41 | +DO_ZPZ_D(sve_ah_fneg_d, uint64_t, DO_AH_FNEG_D) | ||
42 | + | ||
43 | #define DO_NOT(N) (~N) | ||
44 | |||
45 | DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT) | ||
46 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/arm/tcg/translate-sve.c | ||
49 | +++ b/target/arm/tcg/translate-sve.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const fneg_fns[4] = { | ||
51 | NULL, gen_helper_sve_fneg_h, | ||
52 | gen_helper_sve_fneg_s, gen_helper_sve_fneg_d, | ||
53 | }; | ||
54 | -TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0) | ||
55 | +static gen_helper_gvec_3 * const fneg_ah_fns[4] = { | ||
56 | + NULL, gen_helper_sve_ah_fneg_h, | ||
57 | + gen_helper_sve_ah_fneg_s, gen_helper_sve_ah_fneg_d, | ||
58 | +}; | ||
59 | +TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, | ||
60 | + s->fpcr_ah ? fneg_ah_fns[a->esz] : fneg_fns[a->esz], a, 0) | ||
61 | |||
62 | static gen_helper_gvec_3 * const sxtb_fns[4] = { | ||
63 | NULL, gen_helper_sve_sxtb_h, | ||
64 | -- | ||
65 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Make SVE FABS honour the FPCR.AH "don't negate the sign of a NaN" | ||
2 | semantics. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 4 ++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 7 ++++++- | ||
10 | 3 files changed, 18 insertions(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
17 | DEF_HELPER_FLAGS_4(sve_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
18 | DEF_HELPER_FLAGS_4(sve_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_4(sve_ah_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
21 | +DEF_HELPER_FLAGS_4(sve_ah_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
22 | +DEF_HELPER_FLAGS_4(sve_ah_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
23 | + | ||
24 | DEF_HELPER_FLAGS_4(sve_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
25 | DEF_HELPER_FLAGS_4(sve_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
26 | DEF_HELPER_FLAGS_4(sve_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
27 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/arm/tcg/sve_helper.c | ||
30 | +++ b/target/arm/tcg/sve_helper.c | ||
31 | @@ -XXX,XX +XXX,XX @@ DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS) | ||
32 | DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS) | ||
33 | DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS) | ||
34 | |||
35 | +#define DO_AH_FABS_H(N) (float16_is_any_nan(N) ? (N) : DO_FABS(N)) | ||
36 | +#define DO_AH_FABS_S(N) (float32_is_any_nan(N) ? (N) : DO_FABS(N)) | ||
37 | +#define DO_AH_FABS_D(N) (float64_is_any_nan(N) ? (N) : DO_FABS(N)) | ||
38 | + | ||
39 | +DO_ZPZ(sve_ah_fabs_h, uint16_t, H1_2, DO_AH_FABS_H) | ||
40 | +DO_ZPZ(sve_ah_fabs_s, uint32_t, H1_4, DO_AH_FABS_S) | ||
41 | +DO_ZPZ_D(sve_ah_fabs_d, uint64_t, DO_AH_FABS_D) | ||
42 | + | ||
43 | #define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1)) | ||
44 | |||
45 | DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG) | ||
46 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/arm/tcg/translate-sve.c | ||
49 | +++ b/target/arm/tcg/translate-sve.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const fabs_fns[4] = { | ||
51 | NULL, gen_helper_sve_fabs_h, | ||
52 | gen_helper_sve_fabs_s, gen_helper_sve_fabs_d, | ||
53 | }; | ||
54 | -TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0) | ||
55 | +static gen_helper_gvec_3 * const fabs_ah_fns[4] = { | ||
56 | + NULL, gen_helper_sve_ah_fabs_h, | ||
57 | + gen_helper_sve_ah_fabs_s, gen_helper_sve_ah_fabs_d, | ||
58 | +}; | ||
59 | +TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, | ||
60 | + s->fpcr_ah ? fabs_ah_fns[a->esz] : fabs_fns[a->esz], a, 0) | ||
61 | |||
62 | static gen_helper_gvec_3 * const fneg_fns[4] = { | ||
63 | NULL, gen_helper_sve_fneg_h, | ||
64 | -- | ||
65 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Make the SVE FABD insn honour the FPCR.AH "don't negate the sign | ||
2 | of a NaN" semantics. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 7 +++++++ | ||
8 | target/arm/tcg/sve_helper.c | 22 ++++++++++++++++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 2 +- | ||
10 | 3 files changed, 30 insertions(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fabd_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_6(sve_fabd_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_6(sve_ah_fabd_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_6(sve_ah_fabd_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_6(sve_ah_fabd_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | DEF_HELPER_FLAGS_6(sve_fscalbn_h, TCG_CALL_NO_RWG, | ||
28 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
29 | DEF_HELPER_FLAGS_6(sve_fscalbn_s, TCG_CALL_NO_RWG, | ||
30 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/target/arm/tcg/sve_helper.c | ||
33 | +++ b/target/arm/tcg/sve_helper.c | ||
34 | @@ -XXX,XX +XXX,XX @@ static inline float64 abd_d(float64 a, float64 b, float_status *s) | ||
35 | return float64_abs(float64_sub(a, b, s)); | ||
36 | } | ||
37 | |||
38 | +/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */ | ||
39 | +static float16 ah_abd_h(float16 op1, float16 op2, float_status *stat) | ||
40 | +{ | ||
41 | + float16 r = float16_sub(op1, op2, stat); | ||
42 | + return float16_is_any_nan(r) ? r : float16_abs(r); | ||
43 | +} | ||
44 | + | ||
45 | +static float32 ah_abd_s(float32 op1, float32 op2, float_status *stat) | ||
46 | +{ | ||
47 | + float32 r = float32_sub(op1, op2, stat); | ||
48 | + return float32_is_any_nan(r) ? r : float32_abs(r); | ||
49 | +} | ||
50 | + | ||
51 | +static float64 ah_abd_d(float64 op1, float64 op2, float_status *stat) | ||
52 | +{ | ||
53 | + float64 r = float64_sub(op1, op2, stat); | ||
54 | + return float64_is_any_nan(r) ? r : float64_abs(r); | ||
55 | +} | ||
56 | + | ||
57 | DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h) | ||
58 | DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s) | ||
59 | DO_ZPZZ_FP(sve_fabd_d, uint64_t, H1_8, abd_d) | ||
60 | +DO_ZPZZ_FP(sve_ah_fabd_h, uint16_t, H1_2, ah_abd_h) | ||
61 | +DO_ZPZZ_FP(sve_ah_fabd_s, uint32_t, H1_4, ah_abd_s) | ||
62 | +DO_ZPZZ_FP(sve_ah_fabd_d, uint64_t, H1_8, ah_abd_d) | ||
63 | |||
64 | static inline float64 scalbn_d(float64 a, int64_t b, float_status *s) | ||
65 | { | ||
66 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
67 | index XXXXXXX..XXXXXXX 100644 | ||
68 | --- a/target/arm/tcg/translate-sve.c | ||
69 | +++ b/target/arm/tcg/translate-sve.c | ||
70 | @@ -XXX,XX +XXX,XX @@ DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) | ||
71 | DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) | ||
72 | DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) | ||
73 | DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) | ||
74 | -DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) | ||
75 | +DO_ZPZZ_AH_FP(FABD, aa64_sve, sve_fabd, sve_ah_fabd) | ||
76 | DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) | ||
77 | DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) | ||
78 | DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx) | ||
79 | -- | ||
80 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The negation steps in FCADD must honour FPCR.AH's "don't change the | ||
2 | sign of a NaN" semantics. Implement this in the same way we did for | ||
3 | the base ASIMD FCADD, by encoding FPCR.AH into the SIMD data field | ||
4 | passed to the helper and using that to decide whether to negate the | ||
5 | values. | ||
1 | 6 | ||
7 | The construction of neg_imag and neg_real were done to make it easy | ||
8 | to apply both in parallel with two simple logical operations. This | ||
9 | changed with FPCR.AH, which is more complex than that. Switch to | ||
10 | an approach that follows the pseudocode more closely, by extracting | ||
11 | the 'rot=1' parameter from the SIMD data field and changing the | ||
12 | sign of the appropriate input value. | ||
13 | |||
14 | Note that there was a naming issue with neg_imag and neg_real. | ||
15 | They were named backward, with neg_imag being non-zero for rot=1, | ||
16 | and vice versa. This was combined with reversed usage within the | ||
17 | loop, so that the negation in the end turned out correct. | ||
18 | |||
19 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
20 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
21 | --- | ||
22 | target/arm/tcg/vec_internal.h | 17 ++++++++++++++ | ||
23 | target/arm/tcg/sve_helper.c | 42 ++++++++++++++++++++++++---------- | ||
24 | target/arm/tcg/translate-sve.c | 2 +- | ||
25 | 3 files changed, 48 insertions(+), 13 deletions(-) | ||
26 | |||
27 | diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/arm/tcg/vec_internal.h | ||
30 | +++ b/target/arm/tcg/vec_internal.h | ||
31 | @@ -XXX,XX +XXX,XX @@ | ||
32 | #ifndef TARGET_ARM_VEC_INTERNAL_H | ||
33 | #define TARGET_ARM_VEC_INTERNAL_H | ||
34 | |||
35 | +#include "fpu/softfloat.h" | ||
36 | + | ||
37 | /* | ||
38 | * Note that vector data is stored in host-endian 64-bit chunks, | ||
39 | * so addressing units smaller than that needs a host-endian fixup. | ||
40 | @@ -XXX,XX +XXX,XX @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, | ||
41 | */ | ||
42 | bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp); | ||
43 | |||
44 | +static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah) | ||
45 | +{ | ||
46 | + return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a); | ||
47 | +} | ||
48 | + | ||
49 | +static inline float32 float32_maybe_ah_chs(float32 a, bool fpcr_ah) | ||
50 | +{ | ||
51 | + return fpcr_ah && float32_is_any_nan(a) ? a : float32_chs(a); | ||
52 | +} | ||
53 | + | ||
54 | +static inline float64 float64_maybe_ah_chs(float64 a, bool fpcr_ah) | ||
55 | +{ | ||
56 | + return fpcr_ah && float64_is_any_nan(a) ? a : float64_chs(a); | ||
57 | +} | ||
58 | + | ||
59 | #endif /* TARGET_ARM_VEC_INTERNAL_H */ | ||
60 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/target/arm/tcg/sve_helper.c | ||
63 | +++ b/target/arm/tcg/sve_helper.c | ||
64 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, | ||
65 | { | ||
66 | intptr_t j, i = simd_oprsz(desc); | ||
67 | uint64_t *g = vg; | ||
68 | - float16 neg_imag = float16_set_sign(0, simd_data(desc)); | ||
69 | - float16 neg_real = float16_chs(neg_imag); | ||
70 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
71 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
72 | |||
73 | do { | ||
74 | uint64_t pg = g[(i - 1) >> 6]; | ||
75 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, | ||
76 | i -= 2 * sizeof(float16); | ||
77 | |||
78 | e0 = *(float16 *)(vn + H1_2(i)); | ||
79 | - e1 = *(float16 *)(vm + H1_2(j)) ^ neg_real; | ||
80 | + e1 = *(float16 *)(vm + H1_2(j)); | ||
81 | e2 = *(float16 *)(vn + H1_2(j)); | ||
82 | - e3 = *(float16 *)(vm + H1_2(i)) ^ neg_imag; | ||
83 | + e3 = *(float16 *)(vm + H1_2(i)); | ||
84 | + | ||
85 | + if (rot) { | ||
86 | + e3 = float16_maybe_ah_chs(e3, fpcr_ah); | ||
87 | + } else { | ||
88 | + e1 = float16_maybe_ah_chs(e1, fpcr_ah); | ||
89 | + } | ||
90 | |||
91 | if (likely((pg >> (i & 63)) & 1)) { | ||
92 | *(float16 *)(vd + H1_2(i)) = float16_add(e0, e1, s); | ||
93 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, | ||
94 | { | ||
95 | intptr_t j, i = simd_oprsz(desc); | ||
96 | uint64_t *g = vg; | ||
97 | - float32 neg_imag = float32_set_sign(0, simd_data(desc)); | ||
98 | - float32 neg_real = float32_chs(neg_imag); | ||
99 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
100 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
101 | |||
102 | do { | ||
103 | uint64_t pg = g[(i - 1) >> 6]; | ||
104 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, | ||
105 | i -= 2 * sizeof(float32); | ||
106 | |||
107 | e0 = *(float32 *)(vn + H1_2(i)); | ||
108 | - e1 = *(float32 *)(vm + H1_2(j)) ^ neg_real; | ||
109 | + e1 = *(float32 *)(vm + H1_2(j)); | ||
110 | e2 = *(float32 *)(vn + H1_2(j)); | ||
111 | - e3 = *(float32 *)(vm + H1_2(i)) ^ neg_imag; | ||
112 | + e3 = *(float32 *)(vm + H1_2(i)); | ||
113 | + | ||
114 | + if (rot) { | ||
115 | + e3 = float32_maybe_ah_chs(e3, fpcr_ah); | ||
116 | + } else { | ||
117 | + e1 = float32_maybe_ah_chs(e1, fpcr_ah); | ||
118 | + } | ||
119 | |||
120 | if (likely((pg >> (i & 63)) & 1)) { | ||
121 | *(float32 *)(vd + H1_2(i)) = float32_add(e0, e1, s); | ||
122 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, | ||
123 | { | ||
124 | intptr_t j, i = simd_oprsz(desc); | ||
125 | uint64_t *g = vg; | ||
126 | - float64 neg_imag = float64_set_sign(0, simd_data(desc)); | ||
127 | - float64 neg_real = float64_chs(neg_imag); | ||
128 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
129 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
130 | |||
131 | do { | ||
132 | uint64_t pg = g[(i - 1) >> 6]; | ||
133 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, | ||
134 | i -= 2 * sizeof(float64); | ||
135 | |||
136 | e0 = *(float64 *)(vn + H1_2(i)); | ||
137 | - e1 = *(float64 *)(vm + H1_2(j)) ^ neg_real; | ||
138 | + e1 = *(float64 *)(vm + H1_2(j)); | ||
139 | e2 = *(float64 *)(vn + H1_2(j)); | ||
140 | - e3 = *(float64 *)(vm + H1_2(i)) ^ neg_imag; | ||
141 | + e3 = *(float64 *)(vm + H1_2(i)); | ||
142 | + | ||
143 | + if (rot) { | ||
144 | + e3 = float64_maybe_ah_chs(e3, fpcr_ah); | ||
145 | + } else { | ||
146 | + e1 = float64_maybe_ah_chs(e1, fpcr_ah); | ||
147 | + } | ||
148 | |||
149 | if (likely((pg >> (i & 63)) & 1)) { | ||
150 | *(float64 *)(vd + H1_2(i)) = float64_add(e0, e1, s); | ||
151 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
152 | index XXXXXXX..XXXXXXX 100644 | ||
153 | --- a/target/arm/tcg/translate-sve.c | ||
154 | +++ b/target/arm/tcg/translate-sve.c | ||
155 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_4_ptr * const fcadd_fns[] = { | ||
156 | gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d, | ||
157 | }; | ||
158 | TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], | ||
159 | - a->rd, a->rn, a->rm, a->pg, a->rot, | ||
160 | + a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), | ||
161 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
162 | |||
163 | #define DO_FMLA(NAME, name) \ | ||
164 | -- | ||
165 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The negation steps in FCADD must honour FPCR.AH's "don't change the | ||
2 | sign of a NaN" semantics. Implement this by encoding FPCR.AH into | ||
3 | the SIMD data field passed to the helper and using that to decide | ||
4 | whether to negate the values. | ||
1 | 5 | ||
6 | The construction of neg_imag and neg_real were done to make it easy | ||
7 | to apply both in parallel with two simple logical operations. This | ||
8 | changed with FPCR.AH, which is more complex than that. Switch to | ||
9 | an approach closer to the pseudocode, where we extract the rot | ||
10 | parameter from the SIMD data word and negate the appropriate | ||
11 | input value. | ||
12 | |||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
14 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
15 | --- | ||
16 | target/arm/tcg/translate-a64.c | 10 +++++-- | ||
17 | target/arm/tcg/vec_helper.c | 54 +++++++++++++++++++--------------- | ||
18 | 2 files changed, 38 insertions(+), 26 deletions(-) | ||
19 | |||
20 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/tcg/translate-a64.c | ||
23 | +++ b/target/arm/tcg/translate-a64.c | ||
24 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = { | ||
25 | gen_helper_gvec_fcadds, | ||
26 | gen_helper_gvec_fcaddd, | ||
27 | }; | ||
28 | -TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd) | ||
29 | -TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd) | ||
30 | +/* | ||
31 | + * Encode FPCR.AH into the data so the helper knows whether the | ||
32 | + * negations it does should avoid flipping the sign bit on a NaN | ||
33 | + */ | ||
34 | +TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1), | ||
35 | + f_vector_fcadd) | ||
36 | +TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1), | ||
37 | + f_vector_fcadd) | ||
38 | |||
39 | static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) | ||
40 | { | ||
41 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/arm/tcg/vec_helper.c | ||
44 | +++ b/target/arm/tcg/vec_helper.c | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm, | ||
46 | float16 *d = vd; | ||
47 | float16 *n = vn; | ||
48 | float16 *m = vm; | ||
49 | - uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
50 | - uint32_t neg_imag = neg_real ^ 1; | ||
51 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
52 | + bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); | ||
53 | uintptr_t i; | ||
54 | |||
55 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
56 | - neg_real <<= 15; | ||
57 | - neg_imag <<= 15; | ||
58 | - | ||
59 | for (i = 0; i < opr_sz / 2; i += 2) { | ||
60 | float16 e0 = n[H2(i)]; | ||
61 | - float16 e1 = m[H2(i + 1)] ^ neg_imag; | ||
62 | + float16 e1 = m[H2(i + 1)]; | ||
63 | float16 e2 = n[H2(i + 1)]; | ||
64 | - float16 e3 = m[H2(i)] ^ neg_real; | ||
65 | + float16 e3 = m[H2(i)]; | ||
66 | + | ||
67 | + if (rot) { | ||
68 | + e3 = float16_maybe_ah_chs(e3, fpcr_ah); | ||
69 | + } else { | ||
70 | + e1 = float16_maybe_ah_chs(e1, fpcr_ah); | ||
71 | + } | ||
72 | |||
73 | d[H2(i)] = float16_add(e0, e1, fpst); | ||
74 | d[H2(i + 1)] = float16_add(e2, e3, fpst); | ||
75 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcadds)(void *vd, void *vn, void *vm, | ||
76 | float32 *d = vd; | ||
77 | float32 *n = vn; | ||
78 | float32 *m = vm; | ||
79 | - uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
80 | - uint32_t neg_imag = neg_real ^ 1; | ||
81 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
82 | + bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); | ||
83 | uintptr_t i; | ||
84 | |||
85 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
86 | - neg_real <<= 31; | ||
87 | - neg_imag <<= 31; | ||
88 | - | ||
89 | for (i = 0; i < opr_sz / 4; i += 2) { | ||
90 | float32 e0 = n[H4(i)]; | ||
91 | - float32 e1 = m[H4(i + 1)] ^ neg_imag; | ||
92 | + float32 e1 = m[H4(i + 1)]; | ||
93 | float32 e2 = n[H4(i + 1)]; | ||
94 | - float32 e3 = m[H4(i)] ^ neg_real; | ||
95 | + float32 e3 = m[H4(i)]; | ||
96 | + | ||
97 | + if (rot) { | ||
98 | + e3 = float32_maybe_ah_chs(e3, fpcr_ah); | ||
99 | + } else { | ||
100 | + e1 = float32_maybe_ah_chs(e1, fpcr_ah); | ||
101 | + } | ||
102 | |||
103 | d[H4(i)] = float32_add(e0, e1, fpst); | ||
104 | d[H4(i + 1)] = float32_add(e2, e3, fpst); | ||
105 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcaddd)(void *vd, void *vn, void *vm, | ||
106 | float64 *d = vd; | ||
107 | float64 *n = vn; | ||
108 | float64 *m = vm; | ||
109 | - uint64_t neg_real = extract64(desc, SIMD_DATA_SHIFT, 1); | ||
110 | - uint64_t neg_imag = neg_real ^ 1; | ||
111 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
112 | + bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); | ||
113 | uintptr_t i; | ||
114 | |||
115 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
116 | - neg_real <<= 63; | ||
117 | - neg_imag <<= 63; | ||
118 | - | ||
119 | for (i = 0; i < opr_sz / 8; i += 2) { | ||
120 | float64 e0 = n[i]; | ||
121 | - float64 e1 = m[i + 1] ^ neg_imag; | ||
122 | + float64 e1 = m[i + 1]; | ||
123 | float64 e2 = n[i + 1]; | ||
124 | - float64 e3 = m[i] ^ neg_real; | ||
125 | + float64 e3 = m[i]; | ||
126 | + | ||
127 | + if (rot) { | ||
128 | + e3 = float64_maybe_ah_chs(e3, fpcr_ah); | ||
129 | + } else { | ||
130 | + e1 = float64_maybe_ah_chs(e1, fpcr_ah); | ||
131 | + } | ||
132 | |||
133 | d[i] = float64_add(e0, e1, fpst); | ||
134 | d[i + 1] = float64_add(e2, e3, fpst); | ||
135 | -- | ||
136 | 2.34.1 | diff view generated by jsdifflib |
1 | Implement the missing registers for the TZ MPC. | 1 | Handle the FPCR.AH semantics that we do not change the sign of an |
---|---|---|---|
2 | input NaN in the FRECPS and FRSQRTS scalar insns, by providing | ||
3 | new helper functions that do the CHS part of the operation | ||
4 | differently. | ||
5 | |||
6 | Since the extra helper functions would be very repetitive if written | ||
7 | out longhand, we condense them and the existing non-AH helpers into | ||
8 | being emitted via macros. | ||
2 | 9 | ||
3 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | 11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
5 | Message-id: 20180620132032.28865-3-peter.maydell@linaro.org | ||
6 | --- | 12 | --- |
7 | include/hw/misc/tz-mpc.h | 10 +++ | 13 | target/arm/tcg/helper-a64.h | 6 ++ |
8 | hw/misc/tz-mpc.c | 140 ++++++++++++++++++++++++++++++++++++++- | 14 | target/arm/tcg/vec_internal.h | 18 ++++++ |
9 | 2 files changed, 147 insertions(+), 3 deletions(-) | 15 | target/arm/tcg/helper-a64.c | 115 ++++++++++++--------------------- |
16 | target/arm/tcg/translate-a64.c | 25 +++++-- | ||
17 | 4 files changed, 83 insertions(+), 81 deletions(-) | ||
10 | 18 | ||
11 | diff --git a/include/hw/misc/tz-mpc.h b/include/hw/misc/tz-mpc.h | 19 | diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h |
12 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/include/hw/misc/tz-mpc.h | 21 | --- a/target/arm/tcg/helper-a64.h |
14 | +++ b/include/hw/misc/tz-mpc.h | 22 | +++ b/target/arm/tcg/helper-a64.h |
15 | @@ -XXX,XX +XXX,XX @@ struct TZMPC { | 23 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, fpst) |
16 | 24 | DEF_HELPER_FLAGS_3(recpsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) | |
17 | /*< public >*/ | 25 | DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) |
18 | 26 | DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) | |
19 | + /* State */ | 27 | +DEF_HELPER_FLAGS_3(recpsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) |
20 | + uint32_t ctrl; | 28 | +DEF_HELPER_FLAGS_3(recpsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) |
21 | + uint32_t blk_idx; | 29 | +DEF_HELPER_FLAGS_3(recpsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) |
22 | + uint32_t int_stat; | 30 | DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) |
23 | + uint32_t int_en; | 31 | DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) |
24 | + uint32_t int_info1; | 32 | DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) |
25 | + uint32_t int_info2; | 33 | +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) |
26 | + | 34 | +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) |
27 | + uint32_t *blk_lut; | 35 | +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) |
28 | + | 36 | DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, fpst) |
29 | qemu_irq irq; | 37 | DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, fpst) |
30 | 38 | DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, fpst) | |
31 | /* Properties */ | 39 | diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h |
32 | diff --git a/hw/misc/tz-mpc.c b/hw/misc/tz-mpc.c | 40 | index XXXXXXX..XXXXXXX 100644 |
33 | index XXXXXXX..XXXXXXX 100644 | 41 | --- a/target/arm/tcg/vec_internal.h |
34 | --- a/hw/misc/tz-mpc.c | 42 | +++ b/target/arm/tcg/vec_internal.h |
35 | +++ b/hw/misc/tz-mpc.c | 43 | @@ -XXX,XX +XXX,XX @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, |
36 | @@ -XXX,XX +XXX,XX @@ enum { | 44 | */ |
37 | 45 | bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp); | |
38 | /* Config registers */ | 46 | |
39 | REG32(CTRL, 0x00) | 47 | +/* |
40 | + FIELD(CTRL, SEC_RESP, 4, 1) | 48 | + * Negate as for FPCR.AH=1 -- do not negate NaNs. |
41 | + FIELD(CTRL, AUTOINC, 8, 1) | 49 | + */ |
42 | + FIELD(CTRL, LOCKDOWN, 31, 1) | 50 | +static inline float16 float16_ah_chs(float16 a) |
43 | REG32(BLK_MAX, 0x10) | ||
44 | REG32(BLK_CFG, 0x14) | ||
45 | REG32(BLK_IDX, 0x18) | ||
46 | REG32(BLK_LUT, 0x1c) | ||
47 | REG32(INT_STAT, 0x20) | ||
48 | + FIELD(INT_STAT, IRQ, 0, 1) | ||
49 | REG32(INT_CLEAR, 0x24) | ||
50 | + FIELD(INT_CLEAR, IRQ, 0, 1) | ||
51 | REG32(INT_EN, 0x28) | ||
52 | + FIELD(INT_EN, IRQ, 0, 1) | ||
53 | REG32(INT_INFO1, 0x2c) | ||
54 | REG32(INT_INFO2, 0x30) | ||
55 | REG32(INT_SET, 0x34) | ||
56 | + FIELD(INT_SET, IRQ, 0, 1) | ||
57 | REG32(PIDR4, 0xfd0) | ||
58 | REG32(PIDR5, 0xfd4) | ||
59 | REG32(PIDR6, 0xfd8) | ||
60 | @@ -XXX,XX +XXX,XX @@ static const uint8_t tz_mpc_idregs[] = { | ||
61 | 0x0d, 0xf0, 0x05, 0xb1, | ||
62 | }; | ||
63 | |||
64 | +static void tz_mpc_irq_update(TZMPC *s) | ||
65 | +{ | 51 | +{ |
66 | + qemu_set_irq(s->irq, s->int_stat && s->int_en); | 52 | + return float16_is_any_nan(a) ? a : float16_chs(a); |
67 | +} | 53 | +} |
68 | + | 54 | + |
69 | +static void tz_mpc_autoinc_idx(TZMPC *s, unsigned access_size) | 55 | +static inline float32 float32_ah_chs(float32 a) |
70 | +{ | 56 | +{ |
71 | + /* Auto-increment BLK_IDX if necessary */ | 57 | + return float32_is_any_nan(a) ? a : float32_chs(a); |
72 | + if (access_size == 4 && (s->ctrl & R_CTRL_AUTOINC_MASK)) { | ||
73 | + s->blk_idx++; | ||
74 | + s->blk_idx %= s->blk_max; | ||
75 | + } | ||
76 | +} | 58 | +} |
77 | + | 59 | + |
78 | static MemTxResult tz_mpc_reg_read(void *opaque, hwaddr addr, | 60 | +static inline float64 float64_ah_chs(float64 a) |
79 | uint64_t *pdata, | 61 | +{ |
80 | unsigned size, MemTxAttrs attrs) | 62 | + return float64_is_any_nan(a) ? a : float64_chs(a); |
81 | { | ||
82 | + TZMPC *s = TZ_MPC(opaque); | ||
83 | uint64_t r; | ||
84 | uint32_t offset = addr & ~0x3; | ||
85 | |||
86 | @@ -XXX,XX +XXX,XX @@ static MemTxResult tz_mpc_reg_read(void *opaque, hwaddr addr, | ||
87 | } | ||
88 | |||
89 | switch (offset) { | ||
90 | + case A_CTRL: | ||
91 | + r = s->ctrl; | ||
92 | + break; | ||
93 | + case A_BLK_MAX: | ||
94 | + r = s->blk_max; | ||
95 | + break; | ||
96 | + case A_BLK_CFG: | ||
97 | + /* We are never in "init in progress state", so this just indicates | ||
98 | + * the block size. s->blocksize == (1 << BLK_CFG + 5), so | ||
99 | + * BLK_CFG == ctz32(s->blocksize) - 5 | ||
100 | + */ | ||
101 | + r = ctz32(s->blocksize) - 5; | ||
102 | + break; | ||
103 | + case A_BLK_IDX: | ||
104 | + r = s->blk_idx; | ||
105 | + break; | ||
106 | + case A_BLK_LUT: | ||
107 | + r = s->blk_lut[s->blk_idx]; | ||
108 | + tz_mpc_autoinc_idx(s, size); | ||
109 | + break; | ||
110 | + case A_INT_STAT: | ||
111 | + r = s->int_stat; | ||
112 | + break; | ||
113 | + case A_INT_EN: | ||
114 | + r = s->int_en; | ||
115 | + break; | ||
116 | + case A_INT_INFO1: | ||
117 | + r = s->int_info1; | ||
118 | + break; | ||
119 | + case A_INT_INFO2: | ||
120 | + r = s->int_info2; | ||
121 | + break; | ||
122 | case A_PIDR4: | ||
123 | case A_PIDR5: | ||
124 | case A_PIDR6: | ||
125 | @@ -XXX,XX +XXX,XX @@ static MemTxResult tz_mpc_reg_write(void *opaque, hwaddr addr, | ||
126 | uint64_t value, | ||
127 | unsigned size, MemTxAttrs attrs) | ||
128 | { | ||
129 | + TZMPC *s = TZ_MPC(opaque); | ||
130 | uint32_t offset = addr & ~0x3; | ||
131 | |||
132 | trace_tz_mpc_reg_write(addr, value, size); | ||
133 | @@ -XXX,XX +XXX,XX @@ static MemTxResult tz_mpc_reg_write(void *opaque, hwaddr addr, | ||
134 | uint32_t oldval; | ||
135 | |||
136 | switch (offset) { | ||
137 | - /* As we add support for registers which need expansions | ||
138 | - * other than zeroes we'll fill in cases here. | ||
139 | - */ | ||
140 | + case A_CTRL: | ||
141 | + oldval = s->ctrl; | ||
142 | + break; | ||
143 | + case A_BLK_IDX: | ||
144 | + oldval = s->blk_idx; | ||
145 | + break; | ||
146 | + case A_BLK_LUT: | ||
147 | + oldval = s->blk_lut[s->blk_idx]; | ||
148 | + break; | ||
149 | default: | ||
150 | oldval = 0; | ||
151 | break; | ||
152 | @@ -XXX,XX +XXX,XX @@ static MemTxResult tz_mpc_reg_write(void *opaque, hwaddr addr, | ||
153 | value = deposit32(oldval, (addr & 3) * 8, size * 8, value); | ||
154 | } | ||
155 | |||
156 | + if ((s->ctrl & R_CTRL_LOCKDOWN_MASK) && | ||
157 | + (offset == A_CTRL || offset == A_BLK_LUT || offset == A_INT_EN)) { | ||
158 | + /* Lockdown mode makes these three registers read-only, and | ||
159 | + * the only way out of it is to reset the device. | ||
160 | + */ | ||
161 | + qemu_log_mask(LOG_GUEST_ERROR, "TZ MPC register write to offset 0x%x " | ||
162 | + "while MPC is in lockdown mode\n", offset); | ||
163 | + return MEMTX_OK; | ||
164 | + } | ||
165 | + | ||
166 | switch (offset) { | ||
167 | + case A_CTRL: | ||
168 | + /* We don't implement the 'data gating' feature so all other bits | ||
169 | + * are reserved and we make them RAZ/WI. | ||
170 | + */ | ||
171 | + s->ctrl = value & (R_CTRL_SEC_RESP_MASK | | ||
172 | + R_CTRL_AUTOINC_MASK | | ||
173 | + R_CTRL_LOCKDOWN_MASK); | ||
174 | + break; | ||
175 | + case A_BLK_IDX: | ||
176 | + s->blk_idx = value % s->blk_max; | ||
177 | + break; | ||
178 | + case A_BLK_LUT: | ||
179 | + s->blk_lut[s->blk_idx] = value; | ||
180 | + tz_mpc_autoinc_idx(s, size); | ||
181 | + break; | ||
182 | + case A_INT_CLEAR: | ||
183 | + if (value & R_INT_CLEAR_IRQ_MASK) { | ||
184 | + s->int_stat = 0; | ||
185 | + tz_mpc_irq_update(s); | ||
186 | + } | ||
187 | + break; | ||
188 | + case A_INT_EN: | ||
189 | + s->int_en = value & R_INT_EN_IRQ_MASK; | ||
190 | + tz_mpc_irq_update(s); | ||
191 | + break; | ||
192 | + case A_INT_SET: | ||
193 | + if (value & R_INT_SET_IRQ_MASK) { | ||
194 | + s->int_stat = R_INT_STAT_IRQ_MASK; | ||
195 | + tz_mpc_irq_update(s); | ||
196 | + } | ||
197 | + break; | ||
198 | case A_PIDR4: | ||
199 | case A_PIDR5: | ||
200 | case A_PIDR6: | ||
201 | @@ -XXX,XX +XXX,XX @@ static int tz_mpc_num_indexes(IOMMUMemoryRegion *iommu) | ||
202 | |||
203 | static void tz_mpc_reset(DeviceState *dev) | ||
204 | { | ||
205 | + TZMPC *s = TZ_MPC(dev); | ||
206 | + | ||
207 | + s->ctrl = 0x00000100; | ||
208 | + s->blk_idx = 0; | ||
209 | + s->int_stat = 0; | ||
210 | + s->int_en = 1; | ||
211 | + s->int_info1 = 0; | ||
212 | + s->int_info2 = 0; | ||
213 | + | ||
214 | + memset(s->blk_lut, 0, s->blk_max * sizeof(uint32_t)); | ||
215 | } | ||
216 | |||
217 | static void tz_mpc_init(Object *obj) | ||
218 | @@ -XXX,XX +XXX,XX @@ static void tz_mpc_realize(DeviceState *dev, Error **errp) | ||
219 | "tz-mpc-downstream"); | ||
220 | address_space_init(&s->blocked_io_as, &s->blocked_io, | ||
221 | "tz-mpc-blocked-io"); | ||
222 | + | ||
223 | + s->blk_lut = g_new(uint32_t, s->blk_max); | ||
224 | +} | 63 | +} |
225 | + | 64 | + |
226 | +static int tz_mpc_post_load(void *opaque, int version_id) | 65 | static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah) |
227 | +{ | 66 | { |
228 | + TZMPC *s = TZ_MPC(opaque); | 67 | return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a); |
229 | + | 68 | diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c |
230 | + /* Check the incoming data doesn't point blk_idx off the end of blk_lut. */ | 69 | index XXXXXXX..XXXXXXX 100644 |
231 | + if (s->blk_idx >= s->blk_max) { | 70 | --- a/target/arm/tcg/helper-a64.c |
232 | + return -1; | 71 | +++ b/target/arm/tcg/helper-a64.c |
233 | + } | 72 | @@ -XXX,XX +XXX,XX @@ |
234 | + return 0; | 73 | #ifdef CONFIG_USER_ONLY |
74 | #include "user/page-protection.h" | ||
75 | #endif | ||
76 | +#include "vec_internal.h" | ||
77 | |||
78 | /* C2.4.7 Multiply and divide */ | ||
79 | /* special cases for 0 and LLONG_MIN are mandated by the standard */ | ||
80 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst) | ||
81 | return -float64_lt(b, a, fpst); | ||
235 | } | 82 | } |
236 | 83 | ||
237 | static const VMStateDescription tz_mpc_vmstate = { | 84 | -/* Reciprocal step and sqrt step. Note that unlike the A32/T32 |
238 | .name = "tz-mpc", | 85 | +/* |
239 | .version_id = 1, | 86 | + * Reciprocal step and sqrt step. Note that unlike the A32/T32 |
240 | .minimum_version_id = 1, | 87 | * versions, these do a fully fused multiply-add or |
241 | + .post_load = tz_mpc_post_load, | 88 | * multiply-add-and-halve. |
242 | .fields = (VMStateField[]) { | 89 | + * The FPCR.AH == 1 versions need to avoid flipping the sign of NaN. |
243 | + VMSTATE_UINT32(ctrl, TZMPC), | 90 | */ |
244 | + VMSTATE_UINT32(blk_idx, TZMPC), | 91 | - |
245 | + VMSTATE_UINT32(int_stat, TZMPC), | 92 | -uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, float_status *fpst) |
246 | + VMSTATE_UINT32(int_en, TZMPC), | 93 | -{ |
247 | + VMSTATE_UINT32(int_info1, TZMPC), | 94 | - a = float16_squash_input_denormal(a, fpst); |
248 | + VMSTATE_UINT32(int_info2, TZMPC), | 95 | - b = float16_squash_input_denormal(b, fpst); |
249 | + VMSTATE_VARRAY_UINT32(blk_lut, TZMPC, blk_max, | 96 | - |
250 | + 0, vmstate_info_uint32, uint32_t), | 97 | - a = float16_chs(a); |
251 | VMSTATE_END_OF_LIST() | 98 | - if ((float16_is_infinity(a) && float16_is_zero(b)) || |
99 | - (float16_is_infinity(b) && float16_is_zero(a))) { | ||
100 | - return float16_two; | ||
101 | +#define DO_RECPS(NAME, CTYPE, FLOATTYPE, CHSFN) \ | ||
102 | + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ | ||
103 | + { \ | ||
104 | + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ | ||
105 | + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ | ||
106 | + a = FLOATTYPE ## _ ## CHSFN(a); \ | ||
107 | + if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ | ||
108 | + (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ | ||
109 | + return FLOATTYPE ## _two; \ | ||
110 | + } \ | ||
111 | + return FLOATTYPE ## _muladd(a, b, FLOATTYPE ## _two, 0, fpst); \ | ||
252 | } | 112 | } |
113 | - return float16_muladd(a, b, float16_two, 0, fpst); | ||
114 | -} | ||
115 | |||
116 | -float32 HELPER(recpsf_f32)(float32 a, float32 b, float_status *fpst) | ||
117 | -{ | ||
118 | - a = float32_squash_input_denormal(a, fpst); | ||
119 | - b = float32_squash_input_denormal(b, fpst); | ||
120 | +DO_RECPS(recpsf_f16, uint32_t, float16, chs) | ||
121 | +DO_RECPS(recpsf_f32, float32, float32, chs) | ||
122 | +DO_RECPS(recpsf_f64, float64, float64, chs) | ||
123 | +DO_RECPS(recpsf_ah_f16, uint32_t, float16, ah_chs) | ||
124 | +DO_RECPS(recpsf_ah_f32, float32, float32, ah_chs) | ||
125 | +DO_RECPS(recpsf_ah_f64, float64, float64, ah_chs) | ||
126 | |||
127 | - a = float32_chs(a); | ||
128 | - if ((float32_is_infinity(a) && float32_is_zero(b)) || | ||
129 | - (float32_is_infinity(b) && float32_is_zero(a))) { | ||
130 | - return float32_two; | ||
131 | - } | ||
132 | - return float32_muladd(a, b, float32_two, 0, fpst); | ||
133 | -} | ||
134 | +#define DO_RSQRTSF(NAME, CTYPE, FLOATTYPE, CHSFN) \ | ||
135 | + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ | ||
136 | + { \ | ||
137 | + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ | ||
138 | + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ | ||
139 | + a = FLOATTYPE ## _ ## CHSFN(a); \ | ||
140 | + if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ | ||
141 | + (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ | ||
142 | + return FLOATTYPE ## _one_point_five; \ | ||
143 | + } \ | ||
144 | + return FLOATTYPE ## _muladd_scalbn(a, b, FLOATTYPE ## _three, \ | ||
145 | + -1, 0, fpst); \ | ||
146 | + } \ | ||
147 | |||
148 | -float64 HELPER(recpsf_f64)(float64 a, float64 b, float_status *fpst) | ||
149 | -{ | ||
150 | - a = float64_squash_input_denormal(a, fpst); | ||
151 | - b = float64_squash_input_denormal(b, fpst); | ||
152 | - | ||
153 | - a = float64_chs(a); | ||
154 | - if ((float64_is_infinity(a) && float64_is_zero(b)) || | ||
155 | - (float64_is_infinity(b) && float64_is_zero(a))) { | ||
156 | - return float64_two; | ||
157 | - } | ||
158 | - return float64_muladd(a, b, float64_two, 0, fpst); | ||
159 | -} | ||
160 | - | ||
161 | -uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst) | ||
162 | -{ | ||
163 | - a = float16_squash_input_denormal(a, fpst); | ||
164 | - b = float16_squash_input_denormal(b, fpst); | ||
165 | - | ||
166 | - a = float16_chs(a); | ||
167 | - if ((float16_is_infinity(a) && float16_is_zero(b)) || | ||
168 | - (float16_is_infinity(b) && float16_is_zero(a))) { | ||
169 | - return float16_one_point_five; | ||
170 | - } | ||
171 | - return float16_muladd_scalbn(a, b, float16_three, -1, 0, fpst); | ||
172 | -} | ||
173 | - | ||
174 | -float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst) | ||
175 | -{ | ||
176 | - a = float32_squash_input_denormal(a, fpst); | ||
177 | - b = float32_squash_input_denormal(b, fpst); | ||
178 | - | ||
179 | - a = float32_chs(a); | ||
180 | - if ((float32_is_infinity(a) && float32_is_zero(b)) || | ||
181 | - (float32_is_infinity(b) && float32_is_zero(a))) { | ||
182 | - return float32_one_point_five; | ||
183 | - } | ||
184 | - return float32_muladd_scalbn(a, b, float32_three, -1, 0, fpst); | ||
185 | -} | ||
186 | - | ||
187 | -float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst) | ||
188 | -{ | ||
189 | - a = float64_squash_input_denormal(a, fpst); | ||
190 | - b = float64_squash_input_denormal(b, fpst); | ||
191 | - | ||
192 | - a = float64_chs(a); | ||
193 | - if ((float64_is_infinity(a) && float64_is_zero(b)) || | ||
194 | - (float64_is_infinity(b) && float64_is_zero(a))) { | ||
195 | - return float64_one_point_five; | ||
196 | - } | ||
197 | - return float64_muladd_scalbn(a, b, float64_three, -1, 0, fpst); | ||
198 | -} | ||
199 | +DO_RSQRTSF(rsqrtsf_f16, uint32_t, float16, chs) | ||
200 | +DO_RSQRTSF(rsqrtsf_f32, float32, float32, chs) | ||
201 | +DO_RSQRTSF(rsqrtsf_f64, float64, float64, chs) | ||
202 | +DO_RSQRTSF(rsqrtsf_ah_f16, uint32_t, float16, ah_chs) | ||
203 | +DO_RSQRTSF(rsqrtsf_ah_f32, float32, float32, ah_chs) | ||
204 | +DO_RSQRTSF(rsqrtsf_ah_f64, float64, float64, ah_chs) | ||
205 | |||
206 | /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ | ||
207 | uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst) | ||
208 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
209 | index XXXXXXX..XXXXXXX 100644 | ||
210 | --- a/target/arm/tcg/translate-a64.c | ||
211 | +++ b/target/arm/tcg/translate-a64.c | ||
212 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
213 | FPST_A64_F16 : FPST_A64); | ||
214 | } | ||
215 | |||
216 | -static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
217 | - int mergereg) | ||
218 | +static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a, | ||
219 | + const FPScalar *fnormal, const FPScalar *fah, | ||
220 | + int mergereg) | ||
221 | { | ||
222 | - return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, | ||
223 | - select_ah_fpst(s, a->esz)); | ||
224 | + return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal, | ||
225 | + mergereg, select_ah_fpst(s, a->esz)); | ||
226 | } | ||
227 | |||
228 | /* Some insns need to call different helpers when FPCR.AH == 1 */ | ||
229 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_frecps = { | ||
230 | gen_helper_recpsf_f32, | ||
231 | gen_helper_recpsf_f64, | ||
253 | }; | 232 | }; |
233 | -TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps, a->rn) | ||
234 | +static const FPScalar f_scalar_ah_frecps = { | ||
235 | + gen_helper_recpsf_ah_f16, | ||
236 | + gen_helper_recpsf_ah_f32, | ||
237 | + gen_helper_recpsf_ah_f64, | ||
238 | +}; | ||
239 | +TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a, | ||
240 | + &f_scalar_frecps, &f_scalar_ah_frecps, a->rn) | ||
241 | |||
242 | static const FPScalar f_scalar_frsqrts = { | ||
243 | gen_helper_rsqrtsf_f16, | ||
244 | gen_helper_rsqrtsf_f32, | ||
245 | gen_helper_rsqrtsf_f64, | ||
246 | }; | ||
247 | -TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts, a->rn) | ||
248 | +static const FPScalar f_scalar_ah_frsqrts = { | ||
249 | + gen_helper_rsqrtsf_ah_f16, | ||
250 | + gen_helper_rsqrtsf_ah_f32, | ||
251 | + gen_helper_rsqrtsf_ah_f64, | ||
252 | +}; | ||
253 | +TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a, | ||
254 | + &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn) | ||
255 | |||
256 | static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, | ||
257 | const FPScalar *f, bool swap) | ||
254 | -- | 258 | -- |
255 | 2.17.1 | 259 | 2.34.1 |
256 | |||
257 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics | ||
2 | in the vector versions of FRECPS and FRSQRTS, by implementing | ||
3 | new vector wrappers that call the _ah_ scalar helpers. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
9 | target/arm/tcg/translate-a64.c | 21 ++++++++++++++++----- | ||
10 | target/arm/tcg/translate-sve.c | 7 ++++++- | ||
11 | target/arm/tcg/vec_helper.c | 8 ++++++++ | ||
12 | 4 files changed, 44 insertions(+), 6 deletions(-) | ||
13 | |||
14 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/arm/tcg/helper-sve.h | ||
17 | +++ b/target/arm/tcg/helper-sve.h | ||
18 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_rsqrts_s, TCG_CALL_NO_RWG, | ||
19 | DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG, | ||
20 | void, ptr, ptr, ptr, fpst, i32) | ||
21 | |||
22 | +DEF_HELPER_FLAGS_5(gvec_ah_recps_h, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_5(gvec_ah_recps_s, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, fpst, i32) | ||
26 | +DEF_HELPER_FLAGS_5(gvec_ah_recps_d, TCG_CALL_NO_RWG, | ||
27 | + void, ptr, ptr, ptr, fpst, i32) | ||
28 | + | ||
29 | +DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_h, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_s, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, fpst, i32) | ||
33 | +DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_d, TCG_CALL_NO_RWG, | ||
34 | + void, ptr, ptr, ptr, fpst, i32) | ||
35 | + | ||
36 | DEF_HELPER_FLAGS_5(gvec_ah_fmax_h, TCG_CALL_NO_RWG, | ||
37 | void, ptr, ptr, ptr, fpst, i32) | ||
38 | DEF_HELPER_FLAGS_5(gvec_ah_fmax_s, TCG_CALL_NO_RWG, | ||
39 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/target/arm/tcg/translate-a64.c | ||
42 | +++ b/target/arm/tcg/translate-a64.c | ||
43 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, | ||
44 | return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal); | ||
45 | } | ||
46 | |||
47 | -static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data, | ||
48 | - gen_helper_gvec_3_ptr * const f[3]) | ||
49 | +static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data, | ||
50 | + gen_helper_gvec_3_ptr * const fnormal[3], | ||
51 | + gen_helper_gvec_3_ptr * const fah[3]) | ||
52 | { | ||
53 | - return do_fp3_vector_with_fpsttype(s, a, data, f, | ||
54 | + return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal, | ||
55 | select_ah_fpst(s, a->esz)); | ||
56 | } | ||
57 | |||
58 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { | ||
59 | gen_helper_gvec_recps_s, | ||
60 | gen_helper_gvec_recps_d, | ||
61 | }; | ||
62 | -TRANS(FRECPS_v, do_fp3_vector_ah, a, 0, f_vector_frecps) | ||
63 | +static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = { | ||
64 | + gen_helper_gvec_ah_recps_h, | ||
65 | + gen_helper_gvec_ah_recps_s, | ||
66 | + gen_helper_gvec_ah_recps_d, | ||
67 | +}; | ||
68 | +TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps) | ||
69 | |||
70 | static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { | ||
71 | gen_helper_gvec_rsqrts_h, | ||
72 | gen_helper_gvec_rsqrts_s, | ||
73 | gen_helper_gvec_rsqrts_d, | ||
74 | }; | ||
75 | -TRANS(FRSQRTS_v, do_fp3_vector_ah, a, 0, f_vector_frsqrts) | ||
76 | +static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = { | ||
77 | + gen_helper_gvec_ah_rsqrts_h, | ||
78 | + gen_helper_gvec_ah_rsqrts_s, | ||
79 | + gen_helper_gvec_ah_rsqrts_d, | ||
80 | +}; | ||
81 | +TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts) | ||
82 | |||
83 | static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { | ||
84 | gen_helper_gvec_faddp_h, | ||
85 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
86 | index XXXXXXX..XXXXXXX 100644 | ||
87 | --- a/target/arm/tcg/translate-sve.c | ||
88 | +++ b/target/arm/tcg/translate-sve.c | ||
89 | @@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) | ||
90 | NULL, gen_helper_gvec_##name##_h, \ | ||
91 | gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ | ||
92 | }; \ | ||
93 | - TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, name##_fns[a->esz], a, 0) | ||
94 | + static gen_helper_gvec_3_ptr * const name##_ah_fns[4] = { \ | ||
95 | + NULL, gen_helper_gvec_ah_##name##_h, \ | ||
96 | + gen_helper_gvec_ah_##name##_s, gen_helper_gvec_ah_##name##_d \ | ||
97 | + }; \ | ||
98 | + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, \ | ||
99 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], a, 0) | ||
100 | |||
101 | DO_FP3(FADD_zzz, fadd) | ||
102 | DO_FP3(FSUB_zzz, fsub) | ||
103 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
104 | index XXXXXXX..XXXXXXX 100644 | ||
105 | --- a/target/arm/tcg/vec_helper.c | ||
106 | +++ b/target/arm/tcg/vec_helper.c | ||
107 | @@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16) | ||
108 | DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32) | ||
109 | DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64) | ||
110 | |||
111 | +DO_3OP(gvec_ah_recps_h, helper_recpsf_ah_f16, float16) | ||
112 | +DO_3OP(gvec_ah_recps_s, helper_recpsf_ah_f32, float32) | ||
113 | +DO_3OP(gvec_ah_recps_d, helper_recpsf_ah_f64, float64) | ||
114 | + | ||
115 | +DO_3OP(gvec_ah_rsqrts_h, helper_rsqrtsf_ah_f16, float16) | ||
116 | +DO_3OP(gvec_ah_rsqrts_s, helper_rsqrtsf_ah_f32, float32) | ||
117 | +DO_3OP(gvec_ah_rsqrts_d, helper_rsqrtsf_ah_f64, float64) | ||
118 | + | ||
119 | DO_3OP(gvec_ah_fmax_h, helper_vfp_ah_maxh, float16) | ||
120 | DO_3OP(gvec_ah_fmax_s, helper_vfp_ah_maxs, float32) | ||
121 | DO_3OP(gvec_ah_fmax_d, helper_vfp_ah_maxd, float64) | ||
122 | -- | ||
123 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics in FMLS | ||
2 | (indexed). We do this by creating 6 new helpers, which allow us to | ||
3 | do the negation either by XOR (for AH=0) or by muladd flags | ||
4 | (for AH=1). | ||
1 | 5 | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | [PMM: Mostly from RTH's patch; error in index order into fns[][] | ||
8 | fixed] | ||
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | --- | ||
11 | target/arm/helper.h | 14 ++++++++++++++ | ||
12 | target/arm/tcg/translate-a64.c | 17 +++++++++++------ | ||
13 | target/arm/tcg/translate-sve.c | 31 +++++++++++++++++-------------- | ||
14 | target/arm/tcg/vec_helper.c | 24 +++++++++++++++--------- | ||
15 | 4 files changed, 57 insertions(+), 29 deletions(-) | ||
16 | |||
17 | diff --git a/target/arm/helper.h b/target/arm/helper.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/arm/helper.h | ||
20 | +++ b/target/arm/helper.h | ||
21 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG, | ||
22 | DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG, | ||
23 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
24 | |||
25 | +DEF_HELPER_FLAGS_6(gvec_fmls_idx_h, TCG_CALL_NO_RWG, | ||
26 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
27 | +DEF_HELPER_FLAGS_6(gvec_fmls_idx_s, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_6(gvec_fmls_idx_d, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
31 | + | ||
32 | +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_h, TCG_CALL_NO_RWG, | ||
33 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
34 | +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_s, TCG_CALL_NO_RWG, | ||
35 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
36 | +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_d, TCG_CALL_NO_RWG, | ||
37 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
38 | + | ||
39 | DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG, | ||
40 | void, ptr, ptr, ptr, ptr, i32) | ||
41 | DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG, | ||
42 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/arm/tcg/translate-a64.c | ||
45 | +++ b/target/arm/tcg/translate-a64.c | ||
46 | @@ -XXX,XX +XXX,XX @@ TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) | ||
47 | |||
48 | static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) | ||
49 | { | ||
50 | - static gen_helper_gvec_4_ptr * const fns[3] = { | ||
51 | - gen_helper_gvec_fmla_idx_h, | ||
52 | - gen_helper_gvec_fmla_idx_s, | ||
53 | - gen_helper_gvec_fmla_idx_d, | ||
54 | + static gen_helper_gvec_4_ptr * const fns[3][3] = { | ||
55 | + { gen_helper_gvec_fmla_idx_h, | ||
56 | + gen_helper_gvec_fmla_idx_s, | ||
57 | + gen_helper_gvec_fmla_idx_d }, | ||
58 | + { gen_helper_gvec_fmls_idx_h, | ||
59 | + gen_helper_gvec_fmls_idx_s, | ||
60 | + gen_helper_gvec_fmls_idx_d }, | ||
61 | + { gen_helper_gvec_ah_fmls_idx_h, | ||
62 | + gen_helper_gvec_ah_fmls_idx_s, | ||
63 | + gen_helper_gvec_ah_fmls_idx_d }, | ||
64 | }; | ||
65 | MemOp esz = a->esz; | ||
66 | int check = fp_access_check_vector_hsd(s, a->q, esz); | ||
67 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) | ||
68 | |||
69 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
70 | esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
71 | - (a->idx << 1) | neg, | ||
72 | - fns[esz - 1]); | ||
73 | + a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]); | ||
74 | return true; | ||
75 | } | ||
76 | |||
77 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
78 | index XXXXXXX..XXXXXXX 100644 | ||
79 | --- a/target/arm/tcg/translate-sve.c | ||
80 | +++ b/target/arm/tcg/translate-sve.c | ||
81 | @@ -XXX,XX +XXX,XX @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) | ||
82 | *** SVE Floating Point Multiply-Add Indexed Group | ||
83 | */ | ||
84 | |||
85 | -static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) | ||
86 | -{ | ||
87 | - static gen_helper_gvec_4_ptr * const fns[4] = { | ||
88 | - NULL, | ||
89 | - gen_helper_gvec_fmla_idx_h, | ||
90 | - gen_helper_gvec_fmla_idx_s, | ||
91 | - gen_helper_gvec_fmla_idx_d, | ||
92 | - }; | ||
93 | - return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, | ||
94 | - (a->index << 1) | sub, | ||
95 | - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
96 | -} | ||
97 | +static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = { | ||
98 | + NULL, gen_helper_gvec_fmla_idx_h, | ||
99 | + gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d | ||
100 | +}; | ||
101 | +TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, | ||
102 | + fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index, | ||
103 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
104 | |||
105 | -TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) | ||
106 | -TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true) | ||
107 | +static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = { | ||
108 | + { NULL, NULL }, | ||
109 | + { gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h }, | ||
110 | + { gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s }, | ||
111 | + { gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d }, | ||
112 | +}; | ||
113 | +TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz, | ||
114 | + fmls_idx_fns[a->esz][s->fpcr_ah], | ||
115 | + a->rd, a->rn, a->rm, a->ra, a->index, | ||
116 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
117 | |||
118 | /* | ||
119 | *** SVE Floating Point Multiply Indexed Group | ||
120 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
121 | index XXXXXXX..XXXXXXX 100644 | ||
122 | --- a/target/arm/tcg/vec_helper.c | ||
123 | +++ b/target/arm/tcg/vec_helper.c | ||
124 | @@ -XXX,XX +XXX,XX @@ DO_FMUL_IDX(gvec_fmls_nf_idx_s, float32_sub, float32_mul, float32, H4) | ||
125 | |||
126 | #undef DO_FMUL_IDX | ||
127 | |||
128 | -#define DO_FMLA_IDX(NAME, TYPE, H) \ | ||
129 | +#define DO_FMLA_IDX(NAME, TYPE, H, NEGX, NEGF) \ | ||
130 | void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \ | ||
131 | float_status *stat, uint32_t desc) \ | ||
132 | { \ | ||
133 | intptr_t i, j, oprsz = simd_oprsz(desc); \ | ||
134 | intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \ | ||
135 | - TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \ | ||
136 | - intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \ | ||
137 | + intptr_t idx = simd_data(desc); \ | ||
138 | TYPE *d = vd, *n = vn, *m = vm, *a = va; \ | ||
139 | - op1_neg <<= (8 * sizeof(TYPE) - 1); \ | ||
140 | for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ | ||
141 | TYPE mm = m[H(i + idx)]; \ | ||
142 | for (j = 0; j < segment; j++) { \ | ||
143 | - d[i + j] = TYPE##_muladd(n[i + j] ^ op1_neg, \ | ||
144 | - mm, a[i + j], 0, stat); \ | ||
145 | + d[i + j] = TYPE##_muladd(n[i + j] ^ NEGX, mm, \ | ||
146 | + a[i + j], NEGF, stat); \ | ||
147 | } \ | ||
148 | } \ | ||
149 | clear_tail(d, oprsz, simd_maxsz(desc)); \ | ||
150 | } | ||
151 | |||
152 | -DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2) | ||
153 | -DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4) | ||
154 | -DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8) | ||
155 | +DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2, 0, 0) | ||
156 | +DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4, 0, 0) | ||
157 | +DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8, 0, 0) | ||
158 | + | ||
159 | +DO_FMLA_IDX(gvec_fmls_idx_h, float16, H2, INT16_MIN, 0) | ||
160 | +DO_FMLA_IDX(gvec_fmls_idx_s, float32, H4, INT32_MIN, 0) | ||
161 | +DO_FMLA_IDX(gvec_fmls_idx_d, float64, H8, INT64_MIN, 0) | ||
162 | + | ||
163 | +DO_FMLA_IDX(gvec_ah_fmls_idx_h, float16, H2, 0, float_muladd_negate_product) | ||
164 | +DO_FMLA_IDX(gvec_ah_fmls_idx_s, float32, H4, 0, float_muladd_negate_product) | ||
165 | +DO_FMLA_IDX(gvec_ah_fmls_idx_d, float64, H8, 0, float_muladd_negate_product) | ||
166 | |||
167 | #undef DO_FMLA_IDX | ||
168 | |||
169 | -- | ||
170 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics | ||
2 | in FMLS (vector), by implementing a new set of helpers for | ||
3 | the AH=1 case. | ||
1 | 4 | ||
5 | The float_muladd_negate_product flag produces the same result | ||
6 | as negating either of the multiplication operands, assuming | ||
7 | neither of the operands are NaNs. But since FEAT_AFP does not | ||
8 | negate NaNs, this behaviour is exactly what we need. | ||
9 | |||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | ||
13 | target/arm/helper.h | 4 ++++ | ||
14 | target/arm/tcg/translate-a64.c | 7 ++++++- | ||
15 | target/arm/tcg/vec_helper.c | 22 ++++++++++++++++++++++ | ||
16 | 3 files changed, 32 insertions(+), 1 deletion(-) | ||
17 | |||
18 | diff --git a/target/arm/helper.h b/target/arm/helper.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/arm/helper.h | ||
21 | +++ b/target/arm/helper.h | ||
22 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
23 | DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
24 | DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
25 | |||
26 | +DEF_HELPER_FLAGS_5(gvec_ah_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
27 | +DEF_HELPER_FLAGS_5(gvec_ah_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
28 | +DEF_HELPER_FLAGS_5(gvec_ah_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
29 | + | ||
30 | DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG, | ||
31 | void, ptr, ptr, ptr, fpst, i32) | ||
32 | DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG, | ||
33 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/target/arm/tcg/translate-a64.c | ||
36 | +++ b/target/arm/tcg/translate-a64.c | ||
37 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { | ||
38 | gen_helper_gvec_vfms_s, | ||
39 | gen_helper_gvec_vfms_d, | ||
40 | }; | ||
41 | -TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls) | ||
42 | +static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = { | ||
43 | + gen_helper_gvec_ah_vfms_h, | ||
44 | + gen_helper_gvec_ah_vfms_s, | ||
45 | + gen_helper_gvec_ah_vfms_d, | ||
46 | +}; | ||
47 | +TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah) | ||
48 | |||
49 | static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { | ||
50 | gen_helper_gvec_fceq_h, | ||
51 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/target/arm/tcg/vec_helper.c | ||
54 | +++ b/target/arm/tcg/vec_helper.c | ||
55 | @@ -XXX,XX +XXX,XX @@ static float64 float64_mulsub_f(float64 dest, float64 op1, float64 op2, | ||
56 | return float64_muladd(float64_chs(op1), op2, dest, 0, stat); | ||
57 | } | ||
58 | |||
59 | +static float16 float16_ah_mulsub_f(float16 dest, float16 op1, float16 op2, | ||
60 | + float_status *stat) | ||
61 | +{ | ||
62 | + return float16_muladd(op1, op2, dest, float_muladd_negate_product, stat); | ||
63 | +} | ||
64 | + | ||
65 | +static float32 float32_ah_mulsub_f(float32 dest, float32 op1, float32 op2, | ||
66 | + float_status *stat) | ||
67 | +{ | ||
68 | + return float32_muladd(op1, op2, dest, float_muladd_negate_product, stat); | ||
69 | +} | ||
70 | + | ||
71 | +static float64 float64_ah_mulsub_f(float64 dest, float64 op1, float64 op2, | ||
72 | + float_status *stat) | ||
73 | +{ | ||
74 | + return float64_muladd(op1, op2, dest, float_muladd_negate_product, stat); | ||
75 | +} | ||
76 | + | ||
77 | #define DO_MULADD(NAME, FUNC, TYPE) \ | ||
78 | void HELPER(NAME)(void *vd, void *vn, void *vm, \ | ||
79 | float_status *stat, uint32_t desc) \ | ||
80 | @@ -XXX,XX +XXX,XX @@ DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16) | ||
81 | DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32) | ||
82 | DO_MULADD(gvec_vfms_d, float64_mulsub_f, float64) | ||
83 | |||
84 | +DO_MULADD(gvec_ah_vfms_h, float16_ah_mulsub_f, float16) | ||
85 | +DO_MULADD(gvec_ah_vfms_s, float32_ah_mulsub_f, float32) | ||
86 | +DO_MULADD(gvec_ah_vfms_d, float64_ah_mulsub_f, float64) | ||
87 | + | ||
88 | /* For the indexed ops, SVE applies the index per 128-bit vector segment. | ||
89 | * For AdvSIMD, there is of course only one such vector segment. | ||
90 | */ | ||
91 | -- | ||
92 | 2.34.1 | diff view generated by jsdifflib |
1 | Wire up the one MPC that is part of the IoTKit itself. For the | 1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics fro the |
---|---|---|---|
2 | moment we don't wire up its interrupt line. | 2 | SVE FMLS (vector) insns, by providing new helpers for the AH=1 case |
3 | which end up passing fpcr_ah = true to the do_fmla_zpzzz_* functions | ||
4 | that do the work. | ||
5 | |||
6 | The float*_muladd functions have a flags argument that can | ||
7 | perform optional negation of various operand. We don't use | ||
8 | that for "normal" arm fmla, because the muladd flags are not | ||
9 | applied when an input is a NaN. But since FEAT_AFP does not | ||
10 | negate NaNs, this behaviour is exactly what we need. | ||
11 | |||
12 | The non-AH helpers pass in a zero flags argument and control the | ||
13 | negation via the neg1 and neg3 arguments; the AH helpers always pass | ||
14 | in neg1 and neg3 as zero and control the negation via the flags | ||
15 | argument. This allows us to avoid conditional branches within the | ||
16 | inner loop. | ||
3 | 17 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 18 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
5 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 19 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Message-id: 20180620132032.28865-7-peter.maydell@linaro.org | ||
7 | --- | 20 | --- |
8 | include/hw/arm/iotkit.h | 2 ++ | 21 | target/arm/tcg/helper-sve.h | 21 ++++++++ |
9 | hw/arm/iotkit.c | 38 +++++++++++++++++++++++++++----------- | 22 | target/arm/tcg/sve_helper.c | 99 +++++++++++++++++++++++++++------- |
10 | 2 files changed, 29 insertions(+), 11 deletions(-) | 23 | target/arm/tcg/translate-sve.c | 18 ++++--- |
24 | 3 files changed, 114 insertions(+), 24 deletions(-) | ||
11 | 25 | ||
12 | diff --git a/include/hw/arm/iotkit.h b/include/hw/arm/iotkit.h | 26 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h |
13 | index XXXXXXX..XXXXXXX 100644 | 27 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/include/hw/arm/iotkit.h | 28 | --- a/target/arm/tcg/helper-sve.h |
15 | +++ b/include/hw/arm/iotkit.h | 29 | +++ b/target/arm/tcg/helper-sve.h |
16 | @@ -XXX,XX +XXX,XX @@ | 30 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, |
17 | #include "hw/arm/armv7m.h" | 31 | DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, |
18 | #include "hw/misc/iotkit-secctl.h" | 32 | void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
19 | #include "hw/misc/tz-ppc.h" | 33 | |
20 | +#include "hw/misc/tz-mpc.h" | 34 | +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG, |
21 | #include "hw/timer/cmsdk-apb-timer.h" | 35 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
22 | #include "hw/misc/unimp.h" | 36 | +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG, |
23 | #include "hw/or-irq.h" | 37 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
24 | @@ -XXX,XX +XXX,XX @@ typedef struct IoTKit { | 38 | +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG, |
25 | IoTKitSecCtl secctl; | 39 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
26 | TZPPC apb_ppc0; | 40 | + |
27 | TZPPC apb_ppc1; | 41 | +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG, |
28 | + TZMPC mpc; | 42 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
29 | CMSDKAPBTIMER timer0; | 43 | +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG, |
30 | CMSDKAPBTIMER timer1; | 44 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
31 | qemu_or_irq ppc_irq_orgate; | 45 | +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG, |
32 | diff --git a/hw/arm/iotkit.c b/hw/arm/iotkit.c | 46 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
47 | + | ||
48 | +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG, | ||
49 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
50 | +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG, | ||
51 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
52 | +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_d, TCG_CALL_NO_RWG, | ||
53 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
54 | + | ||
55 | DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG, | ||
56 | void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
57 | DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG, | ||
58 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
33 | index XXXXXXX..XXXXXXX 100644 | 59 | index XXXXXXX..XXXXXXX 100644 |
34 | --- a/hw/arm/iotkit.c | 60 | --- a/target/arm/tcg/sve_helper.c |
35 | +++ b/hw/arm/iotkit.c | 61 | +++ b/target/arm/tcg/sve_helper.c |
36 | @@ -XXX,XX +XXX,XX @@ static void iotkit_init(Object *obj) | 62 | @@ -XXX,XX +XXX,XX @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int) |
37 | TYPE_TZ_PPC); | 63 | |
38 | init_sysbus_child(obj, "apb-ppc1", &s->apb_ppc1, sizeof(s->apb_ppc1), | 64 | static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, |
39 | TYPE_TZ_PPC); | 65 | float_status *status, uint32_t desc, |
40 | + init_sysbus_child(obj, "mpc", &s->mpc, sizeof(s->mpc), TYPE_TZ_MPC); | 66 | - uint16_t neg1, uint16_t neg3) |
41 | init_sysbus_child(obj, "timer0", &s->timer0, sizeof(s->timer0), | 67 | + uint16_t neg1, uint16_t neg3, int flags) |
42 | TYPE_CMSDK_APB_TIMER); | 68 | { |
43 | init_sysbus_child(obj, "timer1", &s->timer1, sizeof(s->timer1), | 69 | intptr_t i = simd_oprsz(desc); |
44 | @@ -XXX,XX +XXX,XX @@ static void iotkit_realize(DeviceState *dev, Error **errp) | 70 | uint64_t *g = vg; |
45 | */ | 71 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, |
46 | make_alias(s, &s->alias3, "alias 3", 0x50000000, 0x10000000, 0x40000000); | 72 | e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1; |
47 | 73 | e2 = *(uint16_t *)(vm + H1_2(i)); | |
48 | - /* This RAM should be behind a Memory Protection Controller, but we | 74 | e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3; |
49 | - * don't implement that yet. | 75 | - r = float16_muladd(e1, e2, e3, 0, status); |
50 | - */ | 76 | + r = float16_muladd(e1, e2, e3, flags, status); |
51 | - memory_region_init_ram(&s->sram0, NULL, "iotkit.sram0", 0x00008000, &err); | 77 | *(uint16_t *)(vd + H1_2(i)) = r; |
52 | - if (err) { | 78 | } |
53 | - error_propagate(errp, err); | 79 | } while (i & 63); |
54 | - return; | 80 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, |
55 | - } | 81 | void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, |
56 | - memory_region_add_subregion(&s->container, 0x20000000, &s->sram0); | 82 | void *vg, float_status *status, uint32_t desc) |
57 | 83 | { | |
58 | /* Security controller */ | 84 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0); |
59 | object_property_set_bool(OBJECT(&s->secctl), true, "realized", &err); | 85 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, 0); |
60 | @@ -XXX,XX +XXX,XX @@ static void iotkit_realize(DeviceState *dev, Error **errp) | 86 | } |
61 | qdev_connect_gpio_out_named(dev_secctl, "sec_resp_cfg", 0, | 87 | |
62 | qdev_get_gpio_in(dev_splitter, 0)); | 88 | void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, |
63 | 89 | void *vg, float_status *status, uint32_t desc) | |
64 | + /* This RAM lives behind the Memory Protection Controller */ | 90 | { |
65 | + memory_region_init_ram(&s->sram0, NULL, "iotkit.sram0", 0x00008000, &err); | 91 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0); |
66 | + if (err) { | 92 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0); |
67 | + error_propagate(errp, err); | 93 | } |
68 | + return; | 94 | |
69 | + } | 95 | void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, |
70 | + object_property_set_link(OBJECT(&s->mpc), OBJECT(&s->sram0), | 96 | void *vg, float_status *status, uint32_t desc) |
71 | + "downstream", &err); | 97 | { |
72 | + if (err) { | 98 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000); |
73 | + error_propagate(errp, err); | 99 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0); |
74 | + return; | 100 | } |
75 | + } | 101 | |
76 | + object_property_set_bool(OBJECT(&s->mpc), true, "realized", &err); | 102 | void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, |
77 | + if (err) { | 103 | void *vg, float_status *status, uint32_t desc) |
78 | + error_propagate(errp, err); | 104 | { |
79 | + return; | 105 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000); |
80 | + } | 106 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0); |
81 | + /* Map the upstream end of the MPC into the right place... */ | 107 | +} |
82 | + memory_region_add_subregion(&s->container, 0x20000000, | 108 | + |
83 | + sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->mpc), | 109 | +void HELPER(sve_ah_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, |
84 | + 1)); | 110 | + void *vg, float_status *status, uint32_t desc) |
85 | + /* ...and its register interface */ | 111 | +{ |
86 | + memory_region_add_subregion(&s->container, 0x50083000, | 112 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, |
87 | + sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->mpc), | 113 | + float_muladd_negate_product); |
88 | + 0)); | 114 | +} |
89 | + | 115 | + |
90 | /* Devices behind APB PPC0: | 116 | +void HELPER(sve_ah_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, |
91 | * 0x40000000: timer0 | 117 | + void *vg, float_status *status, uint32_t desc) |
92 | * 0x40001000: timer1 | 118 | +{ |
93 | @@ -XXX,XX +XXX,XX @@ static void iotkit_realize(DeviceState *dev, Error **errp) | 119 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, |
94 | create_unimplemented_device("NS watchdog", 0x40081000, 0x1000); | 120 | + float_muladd_negate_product | float_muladd_negate_c); |
95 | create_unimplemented_device("S watchdog", 0x50081000, 0x1000); | 121 | +} |
96 | 122 | + | |
97 | - create_unimplemented_device("SRAM0 MPC", 0x50083000, 0x1000); | 123 | +void HELPER(sve_ah_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, |
98 | - | 124 | + void *vg, float_status *status, uint32_t desc) |
99 | for (i = 0; i < ARRAY_SIZE(s->ppc_irq_splitter); i++) { | 125 | +{ |
100 | Object *splitter = OBJECT(&s->ppc_irq_splitter[i]); | 126 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, |
127 | + float_muladd_negate_c); | ||
128 | } | ||
129 | |||
130 | static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, | ||
131 | float_status *status, uint32_t desc, | ||
132 | - uint32_t neg1, uint32_t neg3) | ||
133 | + uint32_t neg1, uint32_t neg3, int flags) | ||
134 | { | ||
135 | intptr_t i = simd_oprsz(desc); | ||
136 | uint64_t *g = vg; | ||
137 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, | ||
138 | e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1; | ||
139 | e2 = *(uint32_t *)(vm + H1_4(i)); | ||
140 | e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3; | ||
141 | - r = float32_muladd(e1, e2, e3, 0, status); | ||
142 | + r = float32_muladd(e1, e2, e3, flags, status); | ||
143 | *(uint32_t *)(vd + H1_4(i)) = r; | ||
144 | } | ||
145 | } while (i & 63); | ||
146 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, | ||
147 | void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
148 | void *vg, float_status *status, uint32_t desc) | ||
149 | { | ||
150 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0); | ||
151 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, 0); | ||
152 | } | ||
153 | |||
154 | void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
155 | void *vg, float_status *status, uint32_t desc) | ||
156 | { | ||
157 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0); | ||
158 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, 0); | ||
159 | } | ||
160 | |||
161 | void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
162 | void *vg, float_status *status, uint32_t desc) | ||
163 | { | ||
164 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000); | ||
165 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, 0); | ||
166 | } | ||
167 | |||
168 | void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
169 | void *vg, float_status *status, uint32_t desc) | ||
170 | { | ||
171 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000); | ||
172 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, 0); | ||
173 | +} | ||
174 | + | ||
175 | +void HELPER(sve_ah_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
176 | + void *vg, float_status *status, uint32_t desc) | ||
177 | +{ | ||
178 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
179 | + float_muladd_negate_product); | ||
180 | +} | ||
181 | + | ||
182 | +void HELPER(sve_ah_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
183 | + void *vg, float_status *status, uint32_t desc) | ||
184 | +{ | ||
185 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
186 | + float_muladd_negate_product | float_muladd_negate_c); | ||
187 | +} | ||
188 | + | ||
189 | +void HELPER(sve_ah_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
190 | + void *vg, float_status *status, uint32_t desc) | ||
191 | +{ | ||
192 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
193 | + float_muladd_negate_c); | ||
194 | } | ||
195 | |||
196 | static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, | ||
197 | float_status *status, uint32_t desc, | ||
198 | - uint64_t neg1, uint64_t neg3) | ||
199 | + uint64_t neg1, uint64_t neg3, int flags) | ||
200 | { | ||
201 | intptr_t i = simd_oprsz(desc); | ||
202 | uint64_t *g = vg; | ||
203 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, | ||
204 | e1 = *(uint64_t *)(vn + i) ^ neg1; | ||
205 | e2 = *(uint64_t *)(vm + i); | ||
206 | e3 = *(uint64_t *)(va + i) ^ neg3; | ||
207 | - r = float64_muladd(e1, e2, e3, 0, status); | ||
208 | + r = float64_muladd(e1, e2, e3, flags, status); | ||
209 | *(uint64_t *)(vd + i) = r; | ||
210 | } | ||
211 | } while (i & 63); | ||
212 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, | ||
213 | void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
214 | void *vg, float_status *status, uint32_t desc) | ||
215 | { | ||
216 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0); | ||
217 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, 0); | ||
218 | } | ||
219 | |||
220 | void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
221 | void *vg, float_status *status, uint32_t desc) | ||
222 | { | ||
223 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0); | ||
224 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, 0); | ||
225 | } | ||
226 | |||
227 | void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
228 | void *vg, float_status *status, uint32_t desc) | ||
229 | { | ||
230 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN); | ||
231 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, 0); | ||
232 | } | ||
233 | |||
234 | void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
235 | void *vg, float_status *status, uint32_t desc) | ||
236 | { | ||
237 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN); | ||
238 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, 0); | ||
239 | +} | ||
240 | + | ||
241 | +void HELPER(sve_ah_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
242 | + void *vg, float_status *status, uint32_t desc) | ||
243 | +{ | ||
244 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
245 | + float_muladd_negate_product); | ||
246 | +} | ||
247 | + | ||
248 | +void HELPER(sve_ah_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
249 | + void *vg, float_status *status, uint32_t desc) | ||
250 | +{ | ||
251 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
252 | + float_muladd_negate_product | float_muladd_negate_c); | ||
253 | +} | ||
254 | + | ||
255 | +void HELPER(sve_ah_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
256 | + void *vg, float_status *status, uint32_t desc) | ||
257 | +{ | ||
258 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
259 | + float_muladd_negate_c); | ||
260 | } | ||
261 | |||
262 | /* Two operand floating-point comparison controlled by a predicate. | ||
263 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
264 | index XXXXXXX..XXXXXXX 100644 | ||
265 | --- a/target/arm/tcg/translate-sve.c | ||
266 | +++ b/target/arm/tcg/translate-sve.c | ||
267 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], | ||
268 | a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), | ||
269 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
270 | |||
271 | -#define DO_FMLA(NAME, name) \ | ||
272 | +#define DO_FMLA(NAME, name, ah_name) \ | ||
273 | static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ | ||
274 | NULL, gen_helper_sve_##name##_h, \ | ||
275 | gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ | ||
276 | }; \ | ||
277 | - TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \ | ||
278 | + static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \ | ||
279 | + NULL, gen_helper_sve_##ah_name##_h, \ | ||
280 | + gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \ | ||
281 | + }; \ | ||
282 | + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \ | ||
283 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \ | ||
284 | a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ | ||
285 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
286 | |||
287 | -DO_FMLA(FMLA_zpzzz, fmla_zpzzz) | ||
288 | -DO_FMLA(FMLS_zpzzz, fmls_zpzzz) | ||
289 | -DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz) | ||
290 | -DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) | ||
291 | +/* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */ | ||
292 | +DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz) | ||
293 | +DO_FMLA(FMLS_zpzzz, fmls_zpzzz, ah_fmls_zpzzz) | ||
294 | +DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz, ah_fnmla_zpzzz) | ||
295 | +DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz, ah_fnmls_zpzzz) | ||
296 | |||
297 | #undef DO_FMLA | ||
101 | 298 | ||
102 | -- | 299 | -- |
103 | 2.17.1 | 300 | 2.34.1 |
104 | |||
105 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The negation step in the SVE FTSSEL insn mustn't negate a NaN when | ||
2 | FPCR.AH is set. Pass FPCR.AH to the helper via the SIMD data field | ||
3 | and use that to determine whether to do the negation. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/sve_helper.c | 18 +++++++++++++++--- | ||
9 | target/arm/tcg/translate-sve.c | 4 ++-- | ||
10 | 2 files changed, 17 insertions(+), 5 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/sve_helper.c | ||
15 | +++ b/target/arm/tcg/sve_helper.c | ||
16 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fexpa_d)(void *vd, void *vn, uint32_t desc) | ||
17 | void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc) | ||
18 | { | ||
19 | intptr_t i, opr_sz = simd_oprsz(desc) / 2; | ||
20 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
21 | uint16_t *d = vd, *n = vn, *m = vm; | ||
22 | for (i = 0; i < opr_sz; i += 1) { | ||
23 | uint16_t nn = n[i]; | ||
24 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc) | ||
25 | if (mm & 1) { | ||
26 | nn = float16_one; | ||
27 | } | ||
28 | - d[i] = nn ^ (mm & 2) << 14; | ||
29 | + if (mm & 2) { | ||
30 | + nn = float16_maybe_ah_chs(nn, fpcr_ah); | ||
31 | + } | ||
32 | + d[i] = nn; | ||
33 | } | ||
34 | } | ||
35 | |||
36 | void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc) | ||
37 | { | ||
38 | intptr_t i, opr_sz = simd_oprsz(desc) / 4; | ||
39 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
40 | uint32_t *d = vd, *n = vn, *m = vm; | ||
41 | for (i = 0; i < opr_sz; i += 1) { | ||
42 | uint32_t nn = n[i]; | ||
43 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc) | ||
44 | if (mm & 1) { | ||
45 | nn = float32_one; | ||
46 | } | ||
47 | - d[i] = nn ^ (mm & 2) << 30; | ||
48 | + if (mm & 2) { | ||
49 | + nn = float32_maybe_ah_chs(nn, fpcr_ah); | ||
50 | + } | ||
51 | + d[i] = nn; | ||
52 | } | ||
53 | } | ||
54 | |||
55 | void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc) | ||
56 | { | ||
57 | intptr_t i, opr_sz = simd_oprsz(desc) / 8; | ||
58 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
59 | uint64_t *d = vd, *n = vn, *m = vm; | ||
60 | for (i = 0; i < opr_sz; i += 1) { | ||
61 | uint64_t nn = n[i]; | ||
62 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc) | ||
63 | if (mm & 1) { | ||
64 | nn = float64_one; | ||
65 | } | ||
66 | - d[i] = nn ^ (mm & 2) << 62; | ||
67 | + if (mm & 2) { | ||
68 | + nn = float64_maybe_ah_chs(nn, fpcr_ah); | ||
69 | + } | ||
70 | + d[i] = nn; | ||
71 | } | ||
72 | } | ||
73 | |||
74 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/target/arm/tcg/translate-sve.c | ||
77 | +++ b/target/arm/tcg/translate-sve.c | ||
78 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2 * const fexpa_fns[4] = { | ||
79 | gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, | ||
80 | }; | ||
81 | TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, | ||
82 | - fexpa_fns[a->esz], a->rd, a->rn, 0) | ||
83 | + fexpa_fns[a->esz], a->rd, a->rn, s->fpcr_ah) | ||
84 | |||
85 | static gen_helper_gvec_3 * const ftssel_fns[4] = { | ||
86 | NULL, gen_helper_sve_ftssel_h, | ||
87 | gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, | ||
88 | }; | ||
89 | TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, | ||
90 | - ftssel_fns[a->esz], a, 0) | ||
91 | + ftssel_fns[a->esz], a, s->fpcr_ah) | ||
92 | |||
93 | /* | ||
94 | *** SVE Predicate Logical Operations Group | ||
95 | -- | ||
96 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The negation step in the SVE FTMAD insn mustn't negate a NaN when | ||
2 | FPCR.AH is set. Pass FPCR.AH to the helper via the SIMD data field, | ||
3 | so we can select the correct behaviour. | ||
1 | 4 | ||
5 | Because the operand is known to be negative, negating the operand | ||
6 | is the same as taking the absolute value. Defer this to the muladd | ||
7 | operation via flags, so that it happens after NaN detection, which | ||
8 | is correct for FPCR.AH. | ||
9 | |||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | ||
13 | target/arm/tcg/sve_helper.c | 42 ++++++++++++++++++++++++++-------- | ||
14 | target/arm/tcg/translate-sve.c | 3 ++- | ||
15 | 2 files changed, 35 insertions(+), 10 deletions(-) | ||
16 | |||
17 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/arm/tcg/sve_helper.c | ||
20 | +++ b/target/arm/tcg/sve_helper.c | ||
21 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm, | ||
22 | 0x3c00, 0xb800, 0x293a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | ||
23 | }; | ||
24 | intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float16); | ||
25 | - intptr_t x = simd_data(desc); | ||
26 | + intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); | ||
27 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); | ||
28 | float16 *d = vd, *n = vn, *m = vm; | ||
29 | + | ||
30 | for (i = 0; i < opr_sz; i++) { | ||
31 | float16 mm = m[i]; | ||
32 | intptr_t xx = x; | ||
33 | + int flags = 0; | ||
34 | + | ||
35 | if (float16_is_neg(mm)) { | ||
36 | - mm = float16_abs(mm); | ||
37 | + if (fpcr_ah) { | ||
38 | + flags = float_muladd_negate_product; | ||
39 | + } else { | ||
40 | + mm = float16_abs(mm); | ||
41 | + } | ||
42 | xx += 8; | ||
43 | } | ||
44 | - d[i] = float16_muladd(n[i], mm, coeff[xx], 0, s); | ||
45 | + d[i] = float16_muladd(n[i], mm, coeff[xx], flags, s); | ||
46 | } | ||
47 | } | ||
48 | |||
49 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, | ||
50 | 0x37cd37cc, 0x00000000, 0x00000000, 0x00000000, | ||
51 | }; | ||
52 | intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float32); | ||
53 | - intptr_t x = simd_data(desc); | ||
54 | + intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); | ||
55 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); | ||
56 | float32 *d = vd, *n = vn, *m = vm; | ||
57 | + | ||
58 | for (i = 0; i < opr_sz; i++) { | ||
59 | float32 mm = m[i]; | ||
60 | intptr_t xx = x; | ||
61 | + int flags = 0; | ||
62 | + | ||
63 | if (float32_is_neg(mm)) { | ||
64 | - mm = float32_abs(mm); | ||
65 | + if (fpcr_ah) { | ||
66 | + flags = float_muladd_negate_product; | ||
67 | + } else { | ||
68 | + mm = float32_abs(mm); | ||
69 | + } | ||
70 | xx += 8; | ||
71 | } | ||
72 | - d[i] = float32_muladd(n[i], mm, coeff[xx], 0, s); | ||
73 | + d[i] = float32_muladd(n[i], mm, coeff[xx], flags, s); | ||
74 | } | ||
75 | } | ||
76 | |||
77 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, | ||
78 | 0x3e21ee96d2641b13ull, 0xbda8f76380fbb401ull, | ||
79 | }; | ||
80 | intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float64); | ||
81 | - intptr_t x = simd_data(desc); | ||
82 | + intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); | ||
83 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); | ||
84 | float64 *d = vd, *n = vn, *m = vm; | ||
85 | + | ||
86 | for (i = 0; i < opr_sz; i++) { | ||
87 | float64 mm = m[i]; | ||
88 | intptr_t xx = x; | ||
89 | + int flags = 0; | ||
90 | + | ||
91 | if (float64_is_neg(mm)) { | ||
92 | - mm = float64_abs(mm); | ||
93 | + if (fpcr_ah) { | ||
94 | + flags = float_muladd_negate_product; | ||
95 | + } else { | ||
96 | + mm = float64_abs(mm); | ||
97 | + } | ||
98 | xx += 8; | ||
99 | } | ||
100 | - d[i] = float64_muladd(n[i], mm, coeff[xx], 0, s); | ||
101 | + d[i] = float64_muladd(n[i], mm, coeff[xx], flags, s); | ||
102 | } | ||
103 | } | ||
104 | |||
105 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
106 | index XXXXXXX..XXXXXXX 100644 | ||
107 | --- a/target/arm/tcg/translate-sve.c | ||
108 | +++ b/target/arm/tcg/translate-sve.c | ||
109 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { | ||
110 | gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, | ||
111 | }; | ||
112 | TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, | ||
113 | - ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm, | ||
114 | + ftmad_fns[a->esz], a->rd, a->rn, a->rm, | ||
115 | + a->imm | (s->fpcr_ah << 3), | ||
116 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
117 | |||
118 | /* | ||
119 | -- | ||
120 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
1 | 2 | ||
3 | The negation step in FCMLA mustn't negate a NaN when FPCR.AH | ||
4 | is set. Handle this by passing FPCR.AH to the helper via the | ||
5 | SIMD data field, and use this to select whether to do the | ||
6 | negation via XOR or via the muladd negate_product flag. | ||
7 | |||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-id: 20250129013857.135256-26-richard.henderson@linaro.org | ||
10 | [PMM: Expanded commit message] | ||
11 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | --- | ||
14 | target/arm/tcg/translate-a64.c | 2 +- | ||
15 | target/arm/tcg/vec_helper.c | 66 ++++++++++++++++++++-------------- | ||
16 | 2 files changed, 40 insertions(+), 28 deletions(-) | ||
17 | |||
18 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/arm/tcg/translate-a64.c | ||
21 | +++ b/target/arm/tcg/translate-a64.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) | ||
23 | |||
24 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
25 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
26 | - a->rot, fn[a->esz]); | ||
27 | + a->rot | (s->fpcr_ah << 2), fn[a->esz]); | ||
28 | return true; | ||
29 | } | ||
30 | |||
31 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/target/arm/tcg/vec_helper.c | ||
34 | +++ b/target/arm/tcg/vec_helper.c | ||
35 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlah)(void *vd, void *vn, void *vm, void *va, | ||
36 | uintptr_t opr_sz = simd_oprsz(desc); | ||
37 | float16 *d = vd, *n = vn, *m = vm, *a = va; | ||
38 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
39 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
40 | - uint32_t neg_real = flip ^ neg_imag; | ||
41 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
42 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
43 | + uint32_t negf_real = flip ^ negf_imag; | ||
44 | + float16 negx_imag, negx_real; | ||
45 | uintptr_t i; | ||
46 | |||
47 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
48 | - neg_real <<= 15; | ||
49 | - neg_imag <<= 15; | ||
50 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
51 | + negx_real = (negf_real & ~fpcr_ah) << 15; | ||
52 | + negx_imag = (negf_imag & ~fpcr_ah) << 15; | ||
53 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
54 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
55 | |||
56 | for (i = 0; i < opr_sz / 2; i += 2) { | ||
57 | float16 e2 = n[H2(i + flip)]; | ||
58 | - float16 e1 = m[H2(i + flip)] ^ neg_real; | ||
59 | + float16 e1 = m[H2(i + flip)] ^ negx_real; | ||
60 | float16 e4 = e2; | ||
61 | - float16 e3 = m[H2(i + 1 - flip)] ^ neg_imag; | ||
62 | + float16 e3 = m[H2(i + 1 - flip)] ^ negx_imag; | ||
63 | |||
64 | - d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], 0, fpst); | ||
65 | - d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], 0, fpst); | ||
66 | + d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], negf_real, fpst); | ||
67 | + d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], negf_imag, fpst); | ||
68 | } | ||
69 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
70 | } | ||
71 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlas)(void *vd, void *vn, void *vm, void *va, | ||
72 | uintptr_t opr_sz = simd_oprsz(desc); | ||
73 | float32 *d = vd, *n = vn, *m = vm, *a = va; | ||
74 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
75 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
76 | - uint32_t neg_real = flip ^ neg_imag; | ||
77 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
78 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
79 | + uint32_t negf_real = flip ^ negf_imag; | ||
80 | + float32 negx_imag, negx_real; | ||
81 | uintptr_t i; | ||
82 | |||
83 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
84 | - neg_real <<= 31; | ||
85 | - neg_imag <<= 31; | ||
86 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
87 | + negx_real = (negf_real & ~fpcr_ah) << 31; | ||
88 | + negx_imag = (negf_imag & ~fpcr_ah) << 31; | ||
89 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
90 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
91 | |||
92 | for (i = 0; i < opr_sz / 4; i += 2) { | ||
93 | float32 e2 = n[H4(i + flip)]; | ||
94 | - float32 e1 = m[H4(i + flip)] ^ neg_real; | ||
95 | + float32 e1 = m[H4(i + flip)] ^ negx_real; | ||
96 | float32 e4 = e2; | ||
97 | - float32 e3 = m[H4(i + 1 - flip)] ^ neg_imag; | ||
98 | + float32 e3 = m[H4(i + 1 - flip)] ^ negx_imag; | ||
99 | |||
100 | - d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], 0, fpst); | ||
101 | - d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], 0, fpst); | ||
102 | + d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], negf_real, fpst); | ||
103 | + d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], negf_imag, fpst); | ||
104 | } | ||
105 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
106 | } | ||
107 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm, void *va, | ||
108 | uintptr_t opr_sz = simd_oprsz(desc); | ||
109 | float64 *d = vd, *n = vn, *m = vm, *a = va; | ||
110 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
111 | - uint64_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
112 | - uint64_t neg_real = flip ^ neg_imag; | ||
113 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
114 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
115 | + uint32_t negf_real = flip ^ negf_imag; | ||
116 | + float64 negx_real, negx_imag; | ||
117 | uintptr_t i; | ||
118 | |||
119 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
120 | - neg_real <<= 63; | ||
121 | - neg_imag <<= 63; | ||
122 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
123 | + negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63; | ||
124 | + negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63; | ||
125 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
126 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
127 | |||
128 | for (i = 0; i < opr_sz / 8; i += 2) { | ||
129 | float64 e2 = n[i + flip]; | ||
130 | - float64 e1 = m[i + flip] ^ neg_real; | ||
131 | + float64 e1 = m[i + flip] ^ negx_real; | ||
132 | float64 e4 = e2; | ||
133 | - float64 e3 = m[i + 1 - flip] ^ neg_imag; | ||
134 | + float64 e3 = m[i + 1 - flip] ^ negx_imag; | ||
135 | |||
136 | - d[i] = float64_muladd(e2, e1, a[i], 0, fpst); | ||
137 | - d[i + 1] = float64_muladd(e4, e3, a[i + 1], 0, fpst); | ||
138 | + d[i] = float64_muladd(e2, e1, a[i], negf_real, fpst); | ||
139 | + d[i + 1] = float64_muladd(e4, e3, a[i + 1], negf_imag, fpst); | ||
140 | } | ||
141 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
142 | } | ||
143 | -- | ||
144 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
1 | 2 | ||
3 | The negation step in FCMLA by index mustn't negate a NaN when | ||
4 | FPCR.AH is set. Use the same approach as vector FCMLA of | ||
5 | passing in FPCR.AH and using it to select whether to negate | ||
6 | by XOR or by the muladd negate_product flag. | ||
7 | |||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-id: 20250129013857.135256-27-richard.henderson@linaro.org | ||
10 | [PMM: Expanded commit message] | ||
11 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | --- | ||
14 | target/arm/tcg/translate-a64.c | 2 +- | ||
15 | target/arm/tcg/vec_helper.c | 44 ++++++++++++++++++++-------------- | ||
16 | 2 files changed, 27 insertions(+), 19 deletions(-) | ||
17 | |||
18 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/arm/tcg/translate-a64.c | ||
21 | +++ b/target/arm/tcg/translate-a64.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) | ||
23 | if (fp_access_check(s)) { | ||
24 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
25 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
26 | - (a->idx << 2) | a->rot, fn); | ||
27 | + (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn); | ||
28 | } | ||
29 | return true; | ||
30 | } | ||
31 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/target/arm/tcg/vec_helper.c | ||
34 | +++ b/target/arm/tcg/vec_helper.c | ||
35 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, void *va, | ||
36 | uintptr_t opr_sz = simd_oprsz(desc); | ||
37 | float16 *d = vd, *n = vn, *m = vm, *a = va; | ||
38 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
39 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
40 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
41 | intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2); | ||
42 | - uint32_t neg_real = flip ^ neg_imag; | ||
43 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1); | ||
44 | + uint32_t negf_real = flip ^ negf_imag; | ||
45 | intptr_t elements = opr_sz / sizeof(float16); | ||
46 | intptr_t eltspersegment = MIN(16 / sizeof(float16), elements); | ||
47 | + float16 negx_imag, negx_real; | ||
48 | intptr_t i, j; | ||
49 | |||
50 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
51 | - neg_real <<= 15; | ||
52 | - neg_imag <<= 15; | ||
53 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
54 | + negx_real = (negf_real & ~fpcr_ah) << 15; | ||
55 | + negx_imag = (negf_imag & ~fpcr_ah) << 15; | ||
56 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
57 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
58 | |||
59 | for (i = 0; i < elements; i += eltspersegment) { | ||
60 | float16 mr = m[H2(i + 2 * index + 0)]; | ||
61 | float16 mi = m[H2(i + 2 * index + 1)]; | ||
62 | - float16 e1 = neg_real ^ (flip ? mi : mr); | ||
63 | - float16 e3 = neg_imag ^ (flip ? mr : mi); | ||
64 | + float16 e1 = negx_real ^ (flip ? mi : mr); | ||
65 | + float16 e3 = negx_imag ^ (flip ? mr : mi); | ||
66 | |||
67 | for (j = i; j < i + eltspersegment; j += 2) { | ||
68 | float16 e2 = n[H2(j + flip)]; | ||
69 | float16 e4 = e2; | ||
70 | |||
71 | - d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], 0, fpst); | ||
72 | - d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], 0, fpst); | ||
73 | + d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], negf_real, fpst); | ||
74 | + d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], negf_imag, fpst); | ||
75 | } | ||
76 | } | ||
77 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
78 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, void *va, | ||
79 | uintptr_t opr_sz = simd_oprsz(desc); | ||
80 | float32 *d = vd, *n = vn, *m = vm, *a = va; | ||
81 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
82 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
83 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
84 | intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2); | ||
85 | - uint32_t neg_real = flip ^ neg_imag; | ||
86 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1); | ||
87 | + uint32_t negf_real = flip ^ negf_imag; | ||
88 | intptr_t elements = opr_sz / sizeof(float32); | ||
89 | intptr_t eltspersegment = MIN(16 / sizeof(float32), elements); | ||
90 | + float32 negx_imag, negx_real; | ||
91 | intptr_t i, j; | ||
92 | |||
93 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
94 | - neg_real <<= 31; | ||
95 | - neg_imag <<= 31; | ||
96 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
97 | + negx_real = (negf_real & ~fpcr_ah) << 31; | ||
98 | + negx_imag = (negf_imag & ~fpcr_ah) << 31; | ||
99 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
100 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
101 | |||
102 | for (i = 0; i < elements; i += eltspersegment) { | ||
103 | float32 mr = m[H4(i + 2 * index + 0)]; | ||
104 | float32 mi = m[H4(i + 2 * index + 1)]; | ||
105 | - float32 e1 = neg_real ^ (flip ? mi : mr); | ||
106 | - float32 e3 = neg_imag ^ (flip ? mr : mi); | ||
107 | + float32 e1 = negx_real ^ (flip ? mi : mr); | ||
108 | + float32 e3 = negx_imag ^ (flip ? mr : mi); | ||
109 | |||
110 | for (j = i; j < i + eltspersegment; j += 2) { | ||
111 | float32 e2 = n[H4(j + flip)]; | ||
112 | float32 e4 = e2; | ||
113 | |||
114 | - d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], 0, fpst); | ||
115 | - d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], 0, fpst); | ||
116 | + d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], negf_real, fpst); | ||
117 | + d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], negf_imag, fpst); | ||
118 | } | ||
119 | } | ||
120 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
121 | -- | ||
122 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Zheng Xiang <xiang.zheng@linaro.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | The elements of kvm_devices_head list are freed in kvm_arm_machine_init_done(), | 3 | The negation step in SVE FCMLA mustn't negate a NaN when FPCR.AH is |
4 | but we still access these illegal memory in kvm_arm_devlistener_del(). | 4 | set. Use the same approach as we did for A64 FCMLA of passing in |
5 | FPCR.AH and using it to select whether to negate by XOR or by the | ||
6 | muladd negate_product flag. | ||
5 | 7 | ||
6 | This will cause segment fault when booting guest with MALLOC_PERTURB_=1. | 8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | 9 | Message-id: 20250129013857.135256-28-richard.henderson@linaro.org | |
8 | Signed-off-by: Zheng Xiang <xiang.zheng@linaro.org> | ||
9 | Message-id: 20180619075821.9884-1-zhengxiang9@huawei.com | ||
10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
12 | --- | 12 | --- |
13 | target/arm/kvm.c | 1 + | 13 | target/arm/tcg/sve_helper.c | 69 +++++++++++++++++++++------------- |
14 | 1 file changed, 1 insertion(+) | 14 | target/arm/tcg/translate-sve.c | 2 +- |
15 | 2 files changed, 43 insertions(+), 28 deletions(-) | ||
15 | 16 | ||
16 | diff --git a/target/arm/kvm.c b/target/arm/kvm.c | 17 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c |
17 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/target/arm/kvm.c | 19 | --- a/target/arm/tcg/sve_helper.c |
19 | +++ b/target/arm/kvm.c | 20 | +++ b/target/arm/tcg/sve_helper.c |
20 | @@ -XXX,XX +XXX,XX @@ static void kvm_arm_machine_init_done(Notifier *notifier, void *data) | 21 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, |
21 | kvm_arm_set_device_addr(kd); | 22 | void *vg, float_status *status, uint32_t desc) |
22 | } | 23 | { |
23 | memory_region_unref(kd->mr); | 24 | intptr_t j, i = simd_oprsz(desc); |
24 | + QSLIST_REMOVE_HEAD(&kvm_devices_head, entries); | 25 | - unsigned rot = simd_data(desc); |
25 | g_free(kd); | 26 | - bool flip = rot & 1; |
26 | } | 27 | - float16 neg_imag, neg_real; |
27 | memory_listener_unregister(&devlistener); | 28 | + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); |
29 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
30 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
31 | + uint32_t negf_real = flip ^ negf_imag; | ||
32 | + float16 negx_imag, negx_real; | ||
33 | uint64_t *g = vg; | ||
34 | |||
35 | - neg_imag = float16_set_sign(0, (rot & 2) != 0); | ||
36 | - neg_real = float16_set_sign(0, rot == 1 || rot == 2); | ||
37 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
38 | + negx_real = (negf_real & ~fpcr_ah) << 15; | ||
39 | + negx_imag = (negf_imag & ~fpcr_ah) << 15; | ||
40 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
41 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
42 | |||
43 | do { | ||
44 | uint64_t pg = g[(i - 1) >> 6]; | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
46 | mi = *(float16 *)(vm + H1_2(j)); | ||
47 | |||
48 | e2 = (flip ? ni : nr); | ||
49 | - e1 = (flip ? mi : mr) ^ neg_real; | ||
50 | + e1 = (flip ? mi : mr) ^ negx_real; | ||
51 | e4 = e2; | ||
52 | - e3 = (flip ? mr : mi) ^ neg_imag; | ||
53 | + e3 = (flip ? mr : mi) ^ negx_imag; | ||
54 | |||
55 | if (likely((pg >> (i & 63)) & 1)) { | ||
56 | d = *(float16 *)(va + H1_2(i)); | ||
57 | - d = float16_muladd(e2, e1, d, 0, status); | ||
58 | + d = float16_muladd(e2, e1, d, negf_real, status); | ||
59 | *(float16 *)(vd + H1_2(i)) = d; | ||
60 | } | ||
61 | if (likely((pg >> (j & 63)) & 1)) { | ||
62 | d = *(float16 *)(va + H1_2(j)); | ||
63 | - d = float16_muladd(e4, e3, d, 0, status); | ||
64 | + d = float16_muladd(e4, e3, d, negf_imag, status); | ||
65 | *(float16 *)(vd + H1_2(j)) = d; | ||
66 | } | ||
67 | } while (i & 63); | ||
68 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
69 | void *vg, float_status *status, uint32_t desc) | ||
70 | { | ||
71 | intptr_t j, i = simd_oprsz(desc); | ||
72 | - unsigned rot = simd_data(desc); | ||
73 | - bool flip = rot & 1; | ||
74 | - float32 neg_imag, neg_real; | ||
75 | + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
76 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
77 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
78 | + uint32_t negf_real = flip ^ negf_imag; | ||
79 | + float32 negx_imag, negx_real; | ||
80 | uint64_t *g = vg; | ||
81 | |||
82 | - neg_imag = float32_set_sign(0, (rot & 2) != 0); | ||
83 | - neg_real = float32_set_sign(0, rot == 1 || rot == 2); | ||
84 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
85 | + negx_real = (negf_real & ~fpcr_ah) << 31; | ||
86 | + negx_imag = (negf_imag & ~fpcr_ah) << 31; | ||
87 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
88 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
89 | |||
90 | do { | ||
91 | uint64_t pg = g[(i - 1) >> 6]; | ||
92 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
93 | mi = *(float32 *)(vm + H1_2(j)); | ||
94 | |||
95 | e2 = (flip ? ni : nr); | ||
96 | - e1 = (flip ? mi : mr) ^ neg_real; | ||
97 | + e1 = (flip ? mi : mr) ^ negx_real; | ||
98 | e4 = e2; | ||
99 | - e3 = (flip ? mr : mi) ^ neg_imag; | ||
100 | + e3 = (flip ? mr : mi) ^ negx_imag; | ||
101 | |||
102 | if (likely((pg >> (i & 63)) & 1)) { | ||
103 | d = *(float32 *)(va + H1_2(i)); | ||
104 | - d = float32_muladd(e2, e1, d, 0, status); | ||
105 | + d = float32_muladd(e2, e1, d, negf_real, status); | ||
106 | *(float32 *)(vd + H1_2(i)) = d; | ||
107 | } | ||
108 | if (likely((pg >> (j & 63)) & 1)) { | ||
109 | d = *(float32 *)(va + H1_2(j)); | ||
110 | - d = float32_muladd(e4, e3, d, 0, status); | ||
111 | + d = float32_muladd(e4, e3, d, negf_imag, status); | ||
112 | *(float32 *)(vd + H1_2(j)) = d; | ||
113 | } | ||
114 | } while (i & 63); | ||
115 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
116 | void *vg, float_status *status, uint32_t desc) | ||
117 | { | ||
118 | intptr_t j, i = simd_oprsz(desc); | ||
119 | - unsigned rot = simd_data(desc); | ||
120 | - bool flip = rot & 1; | ||
121 | - float64 neg_imag, neg_real; | ||
122 | + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
123 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
124 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
125 | + uint32_t negf_real = flip ^ negf_imag; | ||
126 | + float64 negx_imag, negx_real; | ||
127 | uint64_t *g = vg; | ||
128 | |||
129 | - neg_imag = float64_set_sign(0, (rot & 2) != 0); | ||
130 | - neg_real = float64_set_sign(0, rot == 1 || rot == 2); | ||
131 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
132 | + negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63; | ||
133 | + negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63; | ||
134 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
135 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
136 | |||
137 | do { | ||
138 | uint64_t pg = g[(i - 1) >> 6]; | ||
139 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
140 | mi = *(float64 *)(vm + H1_2(j)); | ||
141 | |||
142 | e2 = (flip ? ni : nr); | ||
143 | - e1 = (flip ? mi : mr) ^ neg_real; | ||
144 | + e1 = (flip ? mi : mr) ^ negx_real; | ||
145 | e4 = e2; | ||
146 | - e3 = (flip ? mr : mi) ^ neg_imag; | ||
147 | + e3 = (flip ? mr : mi) ^ negx_imag; | ||
148 | |||
149 | if (likely((pg >> (i & 63)) & 1)) { | ||
150 | d = *(float64 *)(va + H1_2(i)); | ||
151 | - d = float64_muladd(e2, e1, d, 0, status); | ||
152 | + d = float64_muladd(e2, e1, d, negf_real, status); | ||
153 | *(float64 *)(vd + H1_2(i)) = d; | ||
154 | } | ||
155 | if (likely((pg >> (j & 63)) & 1)) { | ||
156 | d = *(float64 *)(va + H1_2(j)); | ||
157 | - d = float64_muladd(e4, e3, d, 0, status); | ||
158 | + d = float64_muladd(e4, e3, d, negf_imag, status); | ||
159 | *(float64 *)(vd + H1_2(j)) = d; | ||
160 | } | ||
161 | } while (i & 63); | ||
162 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
163 | index XXXXXXX..XXXXXXX 100644 | ||
164 | --- a/target/arm/tcg/translate-sve.c | ||
165 | +++ b/target/arm/tcg/translate-sve.c | ||
166 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { | ||
167 | gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, | ||
168 | }; | ||
169 | TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], | ||
170 | - a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, | ||
171 | + a->rd, a->rn, a->rm, a->ra, a->pg, a->rot | (s->fpcr_ah << 2), | ||
172 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
173 | |||
174 | static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { | ||
28 | -- | 175 | -- |
29 | 2.17.1 | 176 | 2.34.1 |
30 | |||
31 | diff view generated by jsdifflib |
1 | From: "Edgar E. Iglesias" <edgar.iglesias@xilinx.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | The ZynqMP has Cortex-R5Fs with the optional FPU enabled. | 3 | Handle FPCR.AH's requirement to not negate the sign of a NaN |
4 | in FMLSL by element and vector, using the usual trick of | ||
5 | negating by XOR when AH=0 and by muladd flags when AH=1. | ||
4 | 6 | ||
5 | Reviewed-by: KONRAD Frederic <frederic.konrad@adacore.com> | 7 | Since we have the CPUARMState* in the helper anyway, we can |
6 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 8 | look directly at env->vfp.fpcr and don't need toa pass in the |
7 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 9 | FPCR.AH value via the SIMD data word. |
8 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 10 | |
9 | Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com> | 11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
10 | Message-id: 20180529124707.3025-3-edgar.iglesias@gmail.com | 12 | Message-id: 20250129013857.135256-31-richard.henderson@linaro.org |
13 | [PMM: commit message tweaked] | ||
14 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
12 | --- | 16 | --- |
13 | hw/arm/xlnx-zcu102.c | 2 +- | 17 | target/arm/tcg/vec_helper.c | 71 ++++++++++++++++++++++++------------- |
14 | hw/arm/xlnx-zynqmp.c | 2 +- | 18 | 1 file changed, 46 insertions(+), 25 deletions(-) |
15 | 2 files changed, 2 insertions(+), 2 deletions(-) | ||
16 | 19 | ||
17 | diff --git a/hw/arm/xlnx-zcu102.c b/hw/arm/xlnx-zcu102.c | 20 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
18 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/hw/arm/xlnx-zcu102.c | 22 | --- a/target/arm/tcg/vec_helper.c |
20 | +++ b/hw/arm/xlnx-zcu102.c | 23 | +++ b/target/arm/tcg/vec_helper.c |
21 | @@ -XXX,XX +XXX,XX @@ static void xlnx_zcu102_machine_class_init(ObjectClass *oc, void *data) | 24 | @@ -XXX,XX +XXX,XX @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2) |
25 | */ | ||
26 | |||
27 | static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, | ||
28 | - uint32_t desc, bool fz16) | ||
29 | + uint64_t negx, int negf, uint32_t desc, bool fz16) | ||
22 | { | 30 | { |
23 | MachineClass *mc = MACHINE_CLASS(oc); | 31 | intptr_t i, oprsz = simd_oprsz(desc); |
24 | 32 | - int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | |
25 | - mc->desc = "Xilinx ZynqMP ZCU102 board with 4xA53s and 2xR5s based on " \ | 33 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
26 | + mc->desc = "Xilinx ZynqMP ZCU102 board with 4xA53s and 2xR5Fs based on " \ | 34 | int is_q = oprsz == 16; |
27 | "the value of smp"; | 35 | uint64_t n_4, m_4; |
28 | mc->init = xlnx_zcu102_init; | 36 | |
29 | mc->block_default_type = IF_IDE; | 37 | - /* Pre-load all of the f16 data, avoiding overlap issues. */ |
30 | diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c | 38 | - n_4 = load4_f16(vn, is_q, is_2); |
31 | index XXXXXXX..XXXXXXX 100644 | 39 | + /* |
32 | --- a/hw/arm/xlnx-zynqmp.c | 40 | + * Pre-load all of the f16 data, avoiding overlap issues. |
33 | +++ b/hw/arm/xlnx-zynqmp.c | 41 | + * Negate all inputs for AH=0 FMLSL at once. |
34 | @@ -XXX,XX +XXX,XX @@ static void xlnx_zynqmp_create_rpu(XlnxZynqMPState *s, const char *boot_cpu, | 42 | + */ |
35 | char *name; | 43 | + n_4 = load4_f16(vn, is_q, is_2) ^ negx; |
36 | 44 | m_4 = load4_f16(vm, is_q, is_2); | |
37 | object_initialize(&s->rpu_cpu[i], sizeof(s->rpu_cpu[i]), | 45 | |
38 | - "cortex-r5-" TYPE_ARM_CPU); | 46 | - /* Negate all inputs for FMLSL at once. */ |
39 | + "cortex-r5f-" TYPE_ARM_CPU); | 47 | - if (is_s) { |
40 | object_property_add_child(OBJECT(s), "rpu-cpu[*]", | 48 | - n_4 ^= 0x8000800080008000ull; |
41 | OBJECT(&s->rpu_cpu[i]), &error_abort); | 49 | - } |
50 | - | ||
51 | for (i = 0; i < oprsz / 4; i++) { | ||
52 | float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); | ||
53 | float32 m_1 = float16_to_float32_by_bits(m_4 >> (i * 16), fz16); | ||
54 | - d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst); | ||
55 | + d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst); | ||
56 | } | ||
57 | clear_tail(d, oprsz, simd_maxsz(desc)); | ||
58 | } | ||
59 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, | ||
60 | void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
61 | CPUARMState *env, uint32_t desc) | ||
62 | { | ||
63 | - do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, desc, | ||
64 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
65 | + uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
66 | + | ||
67 | + do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, | ||
68 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
69 | } | ||
70 | |||
71 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
72 | CPUARMState *env, uint32_t desc) | ||
73 | { | ||
74 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, desc, | ||
75 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
76 | + uint64_t negx = 0; | ||
77 | + int negf = 0; | ||
78 | + | ||
79 | + if (is_s) { | ||
80 | + if (env->vfp.fpcr & FPCR_AH) { | ||
81 | + negf = float_muladd_negate_product; | ||
82 | + } else { | ||
83 | + negx = 0x8000800080008000ull; | ||
84 | + } | ||
85 | + } | ||
86 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
87 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); | ||
88 | } | ||
89 | |||
90 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
91 | } | ||
92 | |||
93 | static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, | ||
94 | - uint32_t desc, bool fz16) | ||
95 | + uint64_t negx, int negf, uint32_t desc, bool fz16) | ||
96 | { | ||
97 | intptr_t i, oprsz = simd_oprsz(desc); | ||
98 | - int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
99 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
100 | int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3); | ||
101 | int is_q = oprsz == 16; | ||
102 | uint64_t n_4; | ||
103 | float32 m_1; | ||
104 | |||
105 | - /* Pre-load all of the f16 data, avoiding overlap issues. */ | ||
106 | - n_4 = load4_f16(vn, is_q, is_2); | ||
107 | - | ||
108 | - /* Negate all inputs for FMLSL at once. */ | ||
109 | - if (is_s) { | ||
110 | - n_4 ^= 0x8000800080008000ull; | ||
111 | - } | ||
112 | - | ||
113 | + /* | ||
114 | + * Pre-load all of the f16 data, avoiding overlap issues. | ||
115 | + * Negate all inputs for AH=0 FMLSL at once. | ||
116 | + */ | ||
117 | + n_4 = load4_f16(vn, is_q, is_2) ^ negx; | ||
118 | m_1 = float16_to_float32_by_bits(((float16 *)vm)[H2(index)], fz16); | ||
119 | |||
120 | for (i = 0; i < oprsz / 4; i++) { | ||
121 | float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); | ||
122 | - d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst); | ||
123 | + d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst); | ||
124 | } | ||
125 | clear_tail(d, oprsz, simd_maxsz(desc)); | ||
126 | } | ||
127 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, | ||
128 | void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | ||
129 | CPUARMState *env, uint32_t desc) | ||
130 | { | ||
131 | - do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, desc, | ||
132 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
133 | + uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
134 | + | ||
135 | + do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, | ||
136 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
137 | } | ||
138 | |||
139 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
140 | CPUARMState *env, uint32_t desc) | ||
141 | { | ||
142 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, desc, | ||
143 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
144 | + uint64_t negx = 0; | ||
145 | + int negf = 0; | ||
146 | + | ||
147 | + if (is_s) { | ||
148 | + if (env->vfp.fpcr & FPCR_AH) { | ||
149 | + negf = float_muladd_negate_product; | ||
150 | + } else { | ||
151 | + negx = 0x8000800080008000ull; | ||
152 | + } | ||
153 | + } | ||
154 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
155 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); | ||
156 | } | ||
42 | 157 | ||
43 | -- | 158 | -- |
44 | 2.17.1 | 159 | 2.34.1 |
45 | |||
46 | diff view generated by jsdifflib |
1 | From: Julia Suvorova <jusual@mail.ru> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Unlike ARMv7-M, ARMv6-M and ARMv8-M Baseline only supports naturally | 3 | Handle FPCR.AH's requirement to not negate the sign of a NaN in SVE |
4 | aligned memory accesses for load/store instructions. | 4 | FMLSL (indexed), using the usual trick of negating by XOR when AH=0 |
5 | and by muladd flags when AH=1. | ||
5 | 6 | ||
6 | Signed-off-by: Julia Suvorova <jusual@mail.ru> | 7 | Since we have the CPUARMState* in the helper anyway, we can |
7 | Message-id: 20180622080138.17702-3-jusual@mail.ru | 8 | look directly at env->vfp.fpcr and don't need toa pass in the |
9 | FPCR.AH value via the SIMD data word. | ||
10 | |||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Message-id: 20250129013857.135256-32-richard.henderson@linaro.org | ||
13 | [PMM: commit message tweaked] | ||
8 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 14 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | --- | 16 | --- |
11 | target/arm/translate.c | 18 ++++++++++++++++-- | 17 | target/arm/tcg/vec_helper.c | 15 ++++++++++++--- |
12 | 1 file changed, 16 insertions(+), 2 deletions(-) | 18 | 1 file changed, 12 insertions(+), 3 deletions(-) |
13 | 19 | ||
14 | diff --git a/target/arm/translate.c b/target/arm/translate.c | 20 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
15 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/arm/translate.c | 22 | --- a/target/arm/tcg/vec_helper.c |
17 | +++ b/target/arm/translate.c | 23 | +++ b/target/arm/tcg/vec_helper.c |
18 | @@ -XXX,XX +XXX,XX @@ static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, TCGMemOp op) | 24 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, |
19 | static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32, | 25 | CPUARMState *env, uint32_t desc) |
20 | int index, TCGMemOp opc) | ||
21 | { | 26 | { |
22 | - TCGv addr = gen_aa32_addr(s, a32, opc); | 27 | intptr_t i, j, oprsz = simd_oprsz(desc); |
23 | + TCGv addr; | 28 | - uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; |
29 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
30 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
31 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | ||
32 | float_status *status = &env->vfp.fp_status_a64; | ||
33 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); | ||
34 | + int negx = 0, negf = 0; | ||
24 | + | 35 | + |
25 | + if (arm_dc_feature(s, ARM_FEATURE_M) && | 36 | + if (is_s) { |
26 | + !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) { | 37 | + if (env->vfp.fpcr & FPCR_AH) { |
27 | + opc |= MO_ALIGN; | 38 | + negf = float_muladd_negate_product; |
39 | + } else { | ||
40 | + negx = 0x8000; | ||
41 | + } | ||
28 | + } | 42 | + } |
29 | + | 43 | |
30 | + addr = gen_aa32_addr(s, a32, opc); | 44 | for (i = 0; i < oprsz; i += 16) { |
31 | tcg_gen_qemu_ld_i32(val, addr, index, opc); | 45 | float16 mm_16 = *(float16 *)(vm + i + idx); |
32 | tcg_temp_free(addr); | 46 | float32 mm = float16_to_float32_by_bits(mm_16, fz16); |
33 | } | 47 | |
34 | @@ -XXX,XX +XXX,XX @@ static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32, | 48 | for (j = 0; j < 16; j += sizeof(float32)) { |
35 | static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32, | 49 | - float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negn; |
36 | int index, TCGMemOp opc) | 50 | + float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negx; |
37 | { | 51 | float32 nn = float16_to_float32_by_bits(nn_16, fz16); |
38 | - TCGv addr = gen_aa32_addr(s, a32, opc); | 52 | float32 aa = *(float32 *)(va + H1_4(i + j)); |
39 | + TCGv addr; | 53 | |
40 | + | 54 | *(float32 *)(vd + H1_4(i + j)) = |
41 | + if (arm_dc_feature(s, ARM_FEATURE_M) && | 55 | - float32_muladd(nn, mm, aa, 0, status); |
42 | + !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) { | 56 | + float32_muladd(nn, mm, aa, negf, status); |
43 | + opc |= MO_ALIGN; | 57 | } |
44 | + } | 58 | } |
45 | + | ||
46 | + addr = gen_aa32_addr(s, a32, opc); | ||
47 | tcg_gen_qemu_st_i32(val, addr, index, opc); | ||
48 | tcg_temp_free(addr); | ||
49 | } | 59 | } |
50 | -- | 60 | -- |
51 | 2.17.1 | 61 | 2.34.1 |
52 | |||
53 | diff view generated by jsdifflib |
1 | From: Eric Auger <eric.auger@redhat.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Let's check if KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION is supported. | 3 | Handle FPCR.AH's requirement to not negate the sign of a NaN in SVE |
4 | If not, we check the number of redist region is equal to 1 and use the | 4 | FMLSL (indexed), using the usual trick of negating by XOR when AH=0 |
5 | legacy KVM_VGIC_V3_ADDR_TYPE_REDIST attribute. Otherwise we use | 5 | and by muladd flags when AH=1. |
6 | the new attribute and allow to register multiple regions to the | ||
7 | KVM device. | ||
8 | 6 | ||
9 | Signed-off-by: Eric Auger <eric.auger@redhat.com> | 7 | Since we have the CPUARMState* in the helper anyway, we can |
8 | look directly at env->vfp.fpcr and don't need toa pass in the | ||
9 | FPCR.AH value via the SIMD data word. | ||
10 | |||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Message-id: 20250129013857.135256-33-richard.henderson@linaro.org | ||
13 | [PMM: tweaked commit message] | ||
10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 14 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
11 | Reviewed-by: Andrew Jones <drjones@redhat.com> | ||
12 | Message-id: 1529072910-16156-5-git-send-email-eric.auger@redhat.com | ||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
14 | --- | 16 | --- |
15 | hw/intc/arm_gicv3_kvm.c | 37 ++++++++++++++++++++++++++++++++++--- | 17 | target/arm/tcg/vec_helper.c | 15 ++++++++++++--- |
16 | 1 file changed, 34 insertions(+), 3 deletions(-) | 18 | 1 file changed, 12 insertions(+), 3 deletions(-) |
17 | 19 | ||
18 | diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c | 20 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
19 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/hw/intc/arm_gicv3_kvm.c | 22 | --- a/target/arm/tcg/vec_helper.c |
21 | +++ b/hw/intc/arm_gicv3_kvm.c | 23 | +++ b/target/arm/tcg/vec_helper.c |
22 | @@ -XXX,XX +XXX,XX @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) | 24 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
25 | CPUARMState *env, uint32_t desc) | ||
23 | { | 26 | { |
24 | GICv3State *s = KVM_ARM_GICV3(dev); | 27 | intptr_t i, oprsz = simd_oprsz(desc); |
25 | KVMARMGICv3Class *kgc = KVM_ARM_GICV3_GET_CLASS(s); | 28 | - uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; |
26 | + bool multiple_redist_region_allowed; | 29 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
27 | Error *local_err = NULL; | 30 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
28 | int i; | 31 | float_status *status = &env->vfp.fp_status_a64; |
29 | 32 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); | |
30 | @@ -XXX,XX +XXX,XX @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) | 33 | + int negx = 0, negf = 0; |
31 | return; | ||
32 | } | ||
33 | |||
34 | + multiple_redist_region_allowed = | ||
35 | + kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, | ||
36 | + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION); | ||
37 | + | 34 | + |
38 | + if (!multiple_redist_region_allowed && s->nb_redist_regions > 1) { | 35 | + if (is_s) { |
39 | + error_setg(errp, "Multiple VGICv3 redistributor regions are not " | 36 | + if (env->vfp.fpcr & FPCR_AH) { |
40 | + "supported by this host kernel"); | 37 | + negf = float_muladd_negate_product; |
41 | + error_append_hint(errp, "A maximum of %d VCPUs can be used", | 38 | + } else { |
42 | + s->redist_region_count[0]); | 39 | + negx = 0x8000; |
43 | + return; | ||
44 | + } | ||
45 | + | ||
46 | kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, | ||
47 | 0, &s->num_irq, true, &error_abort); | ||
48 | |||
49 | @@ -XXX,XX +XXX,XX @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) | ||
50 | |||
51 | kvm_arm_register_device(&s->iomem_dist, -1, KVM_DEV_ARM_VGIC_GRP_ADDR, | ||
52 | KVM_VGIC_V3_ADDR_TYPE_DIST, s->dev_fd, 0); | ||
53 | - kvm_arm_register_device(&s->iomem_redist[0], -1, | ||
54 | - KVM_DEV_ARM_VGIC_GRP_ADDR, | ||
55 | - KVM_VGIC_V3_ADDR_TYPE_REDIST, s->dev_fd, 0); | ||
56 | + | ||
57 | + if (!multiple_redist_region_allowed) { | ||
58 | + kvm_arm_register_device(&s->iomem_redist[0], -1, | ||
59 | + KVM_DEV_ARM_VGIC_GRP_ADDR, | ||
60 | + KVM_VGIC_V3_ADDR_TYPE_REDIST, s->dev_fd, 0); | ||
61 | + } else { | ||
62 | + /* we register regions in reverse order as "devices" are inserted at | ||
63 | + * the head of a QSLIST and the list is then popped from the head | ||
64 | + * onwards by kvm_arm_machine_init_done() | ||
65 | + */ | ||
66 | + for (i = s->nb_redist_regions - 1; i >= 0; i--) { | ||
67 | + /* Address mask made of the rdist region index and count */ | ||
68 | + uint64_t addr_ormask = | ||
69 | + i | ((uint64_t)s->redist_region_count[i] << 52); | ||
70 | + | ||
71 | + kvm_arm_register_device(&s->iomem_redist[i], -1, | ||
72 | + KVM_DEV_ARM_VGIC_GRP_ADDR, | ||
73 | + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, | ||
74 | + s->dev_fd, addr_ormask); | ||
75 | + } | 40 | + } |
76 | + } | 41 | + } |
77 | 42 | ||
78 | if (kvm_has_gsi_routing()) { | 43 | for (i = 0; i < oprsz; i += sizeof(float32)) { |
79 | /* set up irq routing */ | 44 | - float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negn; |
45 | + float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negx; | ||
46 | float16 mm_16 = *(float16 *)(vm + H1_2(i + sel)); | ||
47 | float32 nn = float16_to_float32_by_bits(nn_16, fz16); | ||
48 | float32 mm = float16_to_float32_by_bits(mm_16, fz16); | ||
49 | float32 aa = *(float32 *)(va + H1_4(i)); | ||
50 | |||
51 | - *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, 0, status); | ||
52 | + *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, negf, status); | ||
53 | } | ||
54 | } | ||
55 | |||
80 | -- | 56 | -- |
81 | 2.17.1 | 57 | 2.34.1 |
82 | |||
83 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Now that we have completed the handling for FPCR.{AH,FIZ,NEP}, we | ||
2 | can enable FEAT_AFP for '-cpu max', and document that we support it. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | docs/system/arm/emulation.rst | 1 + | ||
8 | target/arm/tcg/cpu64.c | 1 + | ||
9 | 2 files changed, 2 insertions(+) | ||
10 | |||
11 | diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/docs/system/arm/emulation.rst | ||
14 | +++ b/docs/system/arm/emulation.rst | ||
15 | @@ -XXX,XX +XXX,XX @@ the following architecture extensions: | ||
16 | - FEAT_AA64EL3 (Support for AArch64 at EL3) | ||
17 | - FEAT_AdvSIMD (Advanced SIMD Extension) | ||
18 | - FEAT_AES (AESD and AESE instructions) | ||
19 | +- FEAT_AFP (Alternate floating-point behavior) | ||
20 | - FEAT_Armv9_Crypto (Armv9 Cryptographic Extension) | ||
21 | - FEAT_ASID16 (16 bit ASID) | ||
22 | - FEAT_BBM at level 2 (Translation table break-before-make levels) | ||
23 | diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/target/arm/tcg/cpu64.c | ||
26 | +++ b/target/arm/tcg/cpu64.c | ||
27 | @@ -XXX,XX +XXX,XX @@ void aarch64_max_tcg_initfn(Object *obj) | ||
28 | t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1); /* FEAT_XNX */ | ||
29 | t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 2); /* FEAT_ETS2 */ | ||
30 | t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1); /* FEAT_HCX */ | ||
31 | + t = FIELD_DP64(t, ID_AA64MMFR1, AFP, 1); /* FEAT_AFP */ | ||
32 | t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1); /* FEAT_TIDCP1 */ | ||
33 | t = FIELD_DP64(t, ID_AA64MMFR1, CMOW, 1); /* FEAT_CMOW */ | ||
34 | cpu->isar.id_aa64mmfr1 = t; | ||
35 | -- | ||
36 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Eric Auger <eric.auger@redhat.com> | 1 | FEAT_RPRES implements an "increased precision" variant of the single |
---|---|---|---|
2 | precision FRECPE and FRSQRTE instructions from an 8 bit to a 12 | ||
3 | bit mantissa. This applies only when FPCR.AH == 1. Note that the | ||
4 | halfprec and double versions of these insns retain the 8 bit | ||
5 | precision regardless. | ||
2 | 6 | ||
3 | Update our kernel headers to mainline commit | 7 | In this commit we add all the plumbing to make these instructions |
4 | b357bf6023a948cf6a9472f07a1b0caac0e4f8e8 | 8 | call a new helper function when the increased-precision is in |
5 | ("Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm") | 9 | effect. In the following commit we will provide the actual change |
10 | in behaviour in the helpers. | ||
6 | 11 | ||
7 | Signed-off-by: Eric Auger <eric.auger@redhat.com> | ||
8 | Message-id: 1529072910-16156-2-git-send-email-eric.auger@redhat.com | ||
9 | [PMM: clarified commit message] | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | --- | 14 | --- |
12 | include/standard-headers/linux/pci_regs.h | 8 ++++++++ | 15 | target/arm/cpu-features.h | 5 +++++ |
13 | include/standard-headers/linux/virtio_gpu.h | 1 + | 16 | target/arm/helper.h | 4 ++++ |
14 | include/standard-headers/linux/virtio_net.h | 3 +++ | 17 | target/arm/tcg/translate-a64.c | 34 ++++++++++++++++++++++++++++++---- |
15 | linux-headers/asm-arm/kvm.h | 1 + | 18 | target/arm/tcg/translate-sve.c | 16 ++++++++++++++-- |
16 | linux-headers/asm-arm/unistd-common.h | 1 + | 19 | target/arm/tcg/vec_helper.c | 2 ++ |
17 | linux-headers/asm-arm64/kvm.h | 1 + | 20 | target/arm/vfp_helper.c | 32 ++++++++++++++++++++++++++++++-- |
18 | linux-headers/asm-generic/unistd.h | 4 +++- | 21 | 6 files changed, 85 insertions(+), 8 deletions(-) |
19 | linux-headers/asm-powerpc/unistd.h | 1 + | ||
20 | linux-headers/asm-x86/unistd_32.h | 2 ++ | ||
21 | linux-headers/asm-x86/unistd_64.h | 2 ++ | ||
22 | linux-headers/asm-x86/unistd_x32.h | 2 ++ | ||
23 | linux-headers/linux/kvm.h | 5 +++-- | ||
24 | linux-headers/linux/psp-sev.h | 12 ++++++++++++ | ||
25 | linux-headers/LICENSES/exceptions/Linux-syscall-note | 2 +- | ||
26 | linux-headers/LICENSES/preferred/GPL-2.0 | 6 ++++++ | ||
27 | 15 files changed, 47 insertions(+), 4 deletions(-) | ||
28 | 22 | ||
29 | diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h | 23 | diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h |
30 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
31 | --- a/include/standard-headers/linux/pci_regs.h | 25 | --- a/target/arm/cpu-features.h |
32 | +++ b/include/standard-headers/linux/pci_regs.h | 26 | +++ b/target/arm/cpu-features.h |
33 | @@ -XXX,XX +XXX,XX @@ | 27 | @@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_mops(const ARMISARegisters *id) |
34 | #define PCI_EXP_DEVCTL_READRQ_256B 0x1000 /* 256 Bytes */ | 28 | return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, MOPS); |
35 | #define PCI_EXP_DEVCTL_READRQ_512B 0x2000 /* 512 Bytes */ | 29 | } |
36 | #define PCI_EXP_DEVCTL_READRQ_1024B 0x3000 /* 1024 Bytes */ | 30 | |
37 | +#define PCI_EXP_DEVCTL_READRQ_2048B 0x4000 /* 2048 Bytes */ | 31 | +static inline bool isar_feature_aa64_rpres(const ARMISARegisters *id) |
38 | +#define PCI_EXP_DEVCTL_READRQ_4096B 0x5000 /* 4096 Bytes */ | 32 | +{ |
39 | #define PCI_EXP_DEVCTL_BCR_FLR 0x8000 /* Bridge Configuration Retry / FLR */ | 33 | + return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, RPRES); |
40 | #define PCI_EXP_DEVSTA 10 /* Device Status */ | 34 | +} |
41 | #define PCI_EXP_DEVSTA_CED 0x0001 /* Correctable Error Detected */ | 35 | + |
42 | @@ -XXX,XX +XXX,XX @@ | 36 | static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id) |
43 | #define PCI_EXP_LNKCAP2_SLS_16_0GB 0x00000010 /* Supported Speed 16GT/s */ | 37 | { |
44 | #define PCI_EXP_LNKCAP2_CROSSLINK 0x00000100 /* Crosslink supported */ | 38 | /* We always set the AdvSIMD and FP fields identically. */ |
45 | #define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ | 39 | diff --git a/target/arm/helper.h b/target/arm/helper.h |
46 | +#define PCI_EXP_LNKCTL2_TLS 0x000f | 40 | index XXXXXXX..XXXXXXX 100644 |
47 | +#define PCI_EXP_LNKCTL2_TLS_2_5GT 0x0001 /* Supported Speed 2.5GT/s */ | 41 | --- a/target/arm/helper.h |
48 | +#define PCI_EXP_LNKCTL2_TLS_5_0GT 0x0002 /* Supported Speed 5GT/s */ | 42 | +++ b/target/arm/helper.h |
49 | +#define PCI_EXP_LNKCTL2_TLS_8_0GT 0x0003 /* Supported Speed 8GT/s */ | 43 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, fpst) |
50 | +#define PCI_EXP_LNKCTL2_TLS_16_0GT 0x0004 /* Supported Speed 16GT/s */ | 44 | |
51 | #define PCI_EXP_LNKSTA2 50 /* Link Status 2 */ | 45 | DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, fpst) |
52 | #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52 /* v2 endpoints with link end here */ | 46 | DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, fpst) |
53 | #define PCI_EXP_SLTCAP2 52 /* Slot Capabilities 2 */ | 47 | +DEF_HELPER_FLAGS_2(recpe_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst) |
54 | @@ -XXX,XX +XXX,XX @@ | 48 | DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, fpst) |
55 | #define PCI_EXP_DPC_CAP_DL_ACTIVE 0x1000 /* ERR_COR signal on DL_Active supported */ | 49 | DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, fpst) |
56 | 50 | DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, fpst) | |
57 | #define PCI_EXP_DPC_CTL 6 /* DPC control */ | 51 | +DEF_HELPER_FLAGS_2(rsqrte_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst) |
58 | +#define PCI_EXP_DPC_CTL_EN_FATAL 0x0001 /* Enable trigger on ERR_FATAL message */ | 52 | DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, fpst) |
59 | #define PCI_EXP_DPC_CTL_EN_NONFATAL 0x0002 /* Enable trigger on ERR_NONFATAL message */ | 53 | DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32) |
60 | #define PCI_EXP_DPC_CTL_INT_EN 0x0008 /* DPC Interrupt Enable */ | 54 | DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32) |
61 | 55 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_vrintx_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | |
62 | diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h | 56 | |
63 | index XXXXXXX..XXXXXXX 100644 | 57 | DEF_HELPER_FLAGS_4(gvec_frecpe_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
64 | --- a/include/standard-headers/linux/virtio_gpu.h | 58 | DEF_HELPER_FLAGS_4(gvec_frecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
65 | +++ b/include/standard-headers/linux/virtio_gpu.h | 59 | +DEF_HELPER_FLAGS_4(gvec_frecpe_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
66 | @@ -XXX,XX +XXX,XX @@ struct virtio_gpu_cmd_submit { | 60 | DEF_HELPER_FLAGS_4(gvec_frecpe_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
67 | }; | 61 | |
68 | 62 | DEF_HELPER_FLAGS_4(gvec_frsqrte_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | |
69 | #define VIRTIO_GPU_CAPSET_VIRGL 1 | 63 | DEF_HELPER_FLAGS_4(gvec_frsqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
70 | +#define VIRTIO_GPU_CAPSET_VIRGL2 2 | 64 | +DEF_HELPER_FLAGS_4(gvec_frsqrte_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
71 | 65 | DEF_HELPER_FLAGS_4(gvec_frsqrte_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) | |
72 | /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ | 66 | |
73 | struct virtio_gpu_get_capset_info { | 67 | DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
74 | diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h | 68 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
75 | index XXXXXXX..XXXXXXX 100644 | 69 | index XXXXXXX..XXXXXXX 100644 |
76 | --- a/include/standard-headers/linux/virtio_net.h | 70 | --- a/target/arm/tcg/translate-a64.c |
77 | +++ b/include/standard-headers/linux/virtio_net.h | 71 | +++ b/target/arm/tcg/translate-a64.c |
78 | @@ -XXX,XX +XXX,XX @@ | 72 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frecpe = { |
79 | * Steering */ | 73 | gen_helper_recpe_f32, |
80 | #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ | 74 | gen_helper_recpe_f64, |
81 | 75 | }; | |
82 | +#define VIRTIO_NET_F_STANDBY 62 /* Act as standby for another device | 76 | -TRANS(FRECPE_s, do_fp1_scalar_ah, a, &f_scalar_frecpe, -1) |
83 | + * with the same MAC. | 77 | +static const FPScalar1 f_scalar_frecpe_rpres = { |
84 | + */ | 78 | + gen_helper_recpe_f16, |
85 | #define VIRTIO_NET_F_SPEED_DUPLEX 63 /* Device set linkspeed and duplex */ | 79 | + gen_helper_recpe_rpres_f32, |
86 | 80 | + gen_helper_recpe_f64, | |
87 | #ifndef VIRTIO_NET_NO_LEGACY | 81 | +}; |
88 | diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h | 82 | +TRANS(FRECPE_s, do_fp1_scalar_ah, a, |
89 | index XXXXXXX..XXXXXXX 100644 | 83 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? |
90 | --- a/linux-headers/asm-arm/kvm.h | 84 | + &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1) |
91 | +++ b/linux-headers/asm-arm/kvm.h | 85 | |
92 | @@ -XXX,XX +XXX,XX @@ struct kvm_regs { | 86 | static const FPScalar1 f_scalar_frecpx = { |
93 | #define KVM_VGIC_V3_ADDR_TYPE_DIST 2 | 87 | gen_helper_frecpx_f16, |
94 | #define KVM_VGIC_V3_ADDR_TYPE_REDIST 3 | 88 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frsqrte = { |
95 | #define KVM_VGIC_ITS_ADDR_TYPE 4 | 89 | gen_helper_rsqrte_f32, |
96 | +#define KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION 5 | 90 | gen_helper_rsqrte_f64, |
97 | 91 | }; | |
98 | #define KVM_VGIC_V3_DIST_SIZE SZ_64K | 92 | -TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, &f_scalar_frsqrte, -1) |
99 | #define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K) | 93 | +static const FPScalar1 f_scalar_frsqrte_rpres = { |
100 | diff --git a/linux-headers/asm-arm/unistd-common.h b/linux-headers/asm-arm/unistd-common.h | 94 | + gen_helper_rsqrte_f16, |
101 | index XXXXXXX..XXXXXXX 100644 | 95 | + gen_helper_rsqrte_rpres_f32, |
102 | --- a/linux-headers/asm-arm/unistd-common.h | 96 | + gen_helper_rsqrte_f64, |
103 | +++ b/linux-headers/asm-arm/unistd-common.h | 97 | +}; |
104 | @@ -XXX,XX +XXX,XX @@ | 98 | +TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, |
105 | #define __NR_pkey_alloc (__NR_SYSCALL_BASE + 395) | 99 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? |
106 | #define __NR_pkey_free (__NR_SYSCALL_BASE + 396) | 100 | + &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1) |
107 | #define __NR_statx (__NR_SYSCALL_BASE + 397) | 101 | |
108 | +#define __NR_rseq (__NR_SYSCALL_BASE + 398) | 102 | static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) |
109 | 103 | { | |
110 | #endif /* _ASM_ARM_UNISTD_COMMON_H */ | 104 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const f_frecpe[] = { |
111 | diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h | 105 | gen_helper_gvec_frecpe_s, |
112 | index XXXXXXX..XXXXXXX 100644 | 106 | gen_helper_gvec_frecpe_d, |
113 | --- a/linux-headers/asm-arm64/kvm.h | 107 | }; |
114 | +++ b/linux-headers/asm-arm64/kvm.h | 108 | -TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) |
115 | @@ -XXX,XX +XXX,XX @@ struct kvm_regs { | 109 | +static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = { |
116 | #define KVM_VGIC_V3_ADDR_TYPE_DIST 2 | 110 | + gen_helper_gvec_frecpe_h, |
117 | #define KVM_VGIC_V3_ADDR_TYPE_REDIST 3 | 111 | + gen_helper_gvec_frecpe_rpres_s, |
118 | #define KVM_VGIC_ITS_ADDR_TYPE 4 | 112 | + gen_helper_gvec_frecpe_d, |
119 | +#define KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION 5 | 113 | +}; |
120 | 114 | +TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, | |
121 | #define KVM_VGIC_V3_DIST_SIZE SZ_64K | 115 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe) |
122 | #define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K) | 116 | |
123 | diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h | 117 | static gen_helper_gvec_2_ptr * const f_frsqrte[] = { |
124 | index XXXXXXX..XXXXXXX 100644 | 118 | gen_helper_gvec_frsqrte_h, |
125 | --- a/linux-headers/asm-generic/unistd.h | 119 | gen_helper_gvec_frsqrte_s, |
126 | +++ b/linux-headers/asm-generic/unistd.h | 120 | gen_helper_gvec_frsqrte_d, |
127 | @@ -XXX,XX +XXX,XX @@ __SYSCALL(__NR_pkey_alloc, sys_pkey_alloc) | 121 | }; |
128 | __SYSCALL(__NR_pkey_free, sys_pkey_free) | 122 | -TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) |
129 | #define __NR_statx 291 | 123 | +static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = { |
130 | __SYSCALL(__NR_statx, sys_statx) | 124 | + gen_helper_gvec_frsqrte_h, |
131 | +#define __NR_io_pgetevents 292 | 125 | + gen_helper_gvec_frsqrte_rpres_s, |
132 | +__SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents) | 126 | + gen_helper_gvec_frsqrte_d, |
133 | 127 | +}; | |
134 | #undef __NR_syscalls | 128 | +TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, |
135 | -#define __NR_syscalls 292 | 129 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte) |
136 | +#define __NR_syscalls 293 | 130 | |
131 | static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) | ||
132 | { | ||
133 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
134 | index XXXXXXX..XXXXXXX 100644 | ||
135 | --- a/target/arm/tcg/translate-sve.c | ||
136 | +++ b/target/arm/tcg/translate-sve.c | ||
137 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const frecpe_fns[] = { | ||
138 | NULL, gen_helper_gvec_frecpe_h, | ||
139 | gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, | ||
140 | }; | ||
141 | -TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frecpe_fns[a->esz], a, 0) | ||
142 | +static gen_helper_gvec_2_ptr * const frecpe_rpres_fns[] = { | ||
143 | + NULL, gen_helper_gvec_frecpe_h, | ||
144 | + gen_helper_gvec_frecpe_rpres_s, gen_helper_gvec_frecpe_d, | ||
145 | +}; | ||
146 | +TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, | ||
147 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? | ||
148 | + frecpe_rpres_fns[a->esz] : frecpe_fns[a->esz], a, 0) | ||
149 | |||
150 | static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { | ||
151 | NULL, gen_helper_gvec_frsqrte_h, | ||
152 | gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, | ||
153 | }; | ||
154 | -TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frsqrte_fns[a->esz], a, 0) | ||
155 | +static gen_helper_gvec_2_ptr * const frsqrte_rpres_fns[] = { | ||
156 | + NULL, gen_helper_gvec_frsqrte_h, | ||
157 | + gen_helper_gvec_frsqrte_rpres_s, gen_helper_gvec_frsqrte_d, | ||
158 | +}; | ||
159 | +TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, | ||
160 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? | ||
161 | + frsqrte_rpres_fns[a->esz] : frsqrte_fns[a->esz], a, 0) | ||
137 | 162 | ||
138 | /* | 163 | /* |
139 | * 32 bit systems traditionally used different | 164 | *** SVE Floating Point Compare with Zero Group |
140 | diff --git a/linux-headers/asm-powerpc/unistd.h b/linux-headers/asm-powerpc/unistd.h | 165 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
141 | index XXXXXXX..XXXXXXX 100644 | 166 | index XXXXXXX..XXXXXXX 100644 |
142 | --- a/linux-headers/asm-powerpc/unistd.h | 167 | --- a/target/arm/tcg/vec_helper.c |
143 | +++ b/linux-headers/asm-powerpc/unistd.h | 168 | +++ b/target/arm/tcg/vec_helper.c |
144 | @@ -XXX,XX +XXX,XX @@ | 169 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, float_status *stat, uint32_t desc) \ |
145 | #define __NR_pkey_alloc 384 | 170 | |
146 | #define __NR_pkey_free 385 | 171 | DO_2OP(gvec_frecpe_h, helper_recpe_f16, float16) |
147 | #define __NR_pkey_mprotect 386 | 172 | DO_2OP(gvec_frecpe_s, helper_recpe_f32, float32) |
148 | +#define __NR_rseq 387 | 173 | +DO_2OP(gvec_frecpe_rpres_s, helper_recpe_rpres_f32, float32) |
149 | 174 | DO_2OP(gvec_frecpe_d, helper_recpe_f64, float64) | |
150 | #endif /* _ASM_POWERPC_UNISTD_H_ */ | 175 | |
151 | diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h | 176 | DO_2OP(gvec_frsqrte_h, helper_rsqrte_f16, float16) |
152 | index XXXXXXX..XXXXXXX 100644 | 177 | DO_2OP(gvec_frsqrte_s, helper_rsqrte_f32, float32) |
153 | --- a/linux-headers/asm-x86/unistd_32.h | 178 | +DO_2OP(gvec_frsqrte_rpres_s, helper_rsqrte_rpres_f32, float32) |
154 | +++ b/linux-headers/asm-x86/unistd_32.h | 179 | DO_2OP(gvec_frsqrte_d, helper_rsqrte_f64, float64) |
155 | @@ -XXX,XX +XXX,XX @@ | 180 | |
156 | #define __NR_pkey_free 382 | 181 | DO_2OP(gvec_vrintx_h, float16_round_to_int, float16) |
157 | #define __NR_statx 383 | 182 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
158 | #define __NR_arch_prctl 384 | 183 | index XXXXXXX..XXXXXXX 100644 |
159 | +#define __NR_io_pgetevents 385 | 184 | --- a/target/arm/vfp_helper.c |
160 | +#define __NR_rseq 386 | 185 | +++ b/target/arm/vfp_helper.c |
161 | 186 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst) | |
162 | #endif /* _ASM_X86_UNISTD_32_H */ | 187 | return make_float16(f16_val); |
163 | diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h | 188 | } |
164 | index XXXXXXX..XXXXXXX 100644 | 189 | |
165 | --- a/linux-headers/asm-x86/unistd_64.h | 190 | -float32 HELPER(recpe_f32)(float32 input, float_status *fpst) |
166 | +++ b/linux-headers/asm-x86/unistd_64.h | 191 | +/* |
167 | @@ -XXX,XX +XXX,XX @@ | 192 | + * FEAT_RPRES means the f32 FRECPE has an "increased precision" variant |
168 | #define __NR_pkey_alloc 330 | 193 | + * which is used when FPCR.AH == 1. |
169 | #define __NR_pkey_free 331 | ||
170 | #define __NR_statx 332 | ||
171 | +#define __NR_io_pgetevents 333 | ||
172 | +#define __NR_rseq 334 | ||
173 | |||
174 | #endif /* _ASM_X86_UNISTD_64_H */ | ||
175 | diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h | ||
176 | index XXXXXXX..XXXXXXX 100644 | ||
177 | --- a/linux-headers/asm-x86/unistd_x32.h | ||
178 | +++ b/linux-headers/asm-x86/unistd_x32.h | ||
179 | @@ -XXX,XX +XXX,XX @@ | ||
180 | #define __NR_pkey_alloc (__X32_SYSCALL_BIT + 330) | ||
181 | #define __NR_pkey_free (__X32_SYSCALL_BIT + 331) | ||
182 | #define __NR_statx (__X32_SYSCALL_BIT + 332) | ||
183 | +#define __NR_io_pgetevents (__X32_SYSCALL_BIT + 333) | ||
184 | +#define __NR_rseq (__X32_SYSCALL_BIT + 334) | ||
185 | #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) | ||
186 | #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) | ||
187 | #define __NR_ioctl (__X32_SYSCALL_BIT + 514) | ||
188 | diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h | ||
189 | index XXXXXXX..XXXXXXX 100644 | ||
190 | --- a/linux-headers/linux/kvm.h | ||
191 | +++ b/linux-headers/linux/kvm.h | ||
192 | @@ -XXX,XX +XXX,XX @@ struct kvm_ioeventfd { | ||
193 | }; | ||
194 | |||
195 | #define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) | ||
196 | -#define KVM_X86_DISABLE_EXITS_HTL (1 << 1) | ||
197 | +#define KVM_X86_DISABLE_EXITS_HLT (1 << 1) | ||
198 | #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) | ||
199 | #define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ | ||
200 | - KVM_X86_DISABLE_EXITS_HTL | \ | ||
201 | + KVM_X86_DISABLE_EXITS_HLT | \ | ||
202 | KVM_X86_DISABLE_EXITS_PAUSE) | ||
203 | |||
204 | /* for KVM_ENABLE_CAP */ | ||
205 | @@ -XXX,XX +XXX,XX @@ struct kvm_ppc_resize_hpt { | ||
206 | #define KVM_CAP_S390_BPB 152 | ||
207 | #define KVM_CAP_GET_MSR_FEATURES 153 | ||
208 | #define KVM_CAP_HYPERV_EVENTFD 154 | ||
209 | +#define KVM_CAP_HYPERV_TLBFLUSH 155 | ||
210 | |||
211 | #ifdef KVM_CAP_IRQ_ROUTING | ||
212 | |||
213 | diff --git a/linux-headers/linux/psp-sev.h b/linux-headers/linux/psp-sev.h | ||
214 | index XXXXXXX..XXXXXXX 100644 | ||
215 | --- a/linux-headers/linux/psp-sev.h | ||
216 | +++ b/linux-headers/linux/psp-sev.h | ||
217 | @@ -XXX,XX +XXX,XX @@ enum { | ||
218 | SEV_PDH_GEN, | ||
219 | SEV_PDH_CERT_EXPORT, | ||
220 | SEV_PEK_CERT_IMPORT, | ||
221 | + SEV_GET_ID, | ||
222 | |||
223 | SEV_MAX, | ||
224 | }; | ||
225 | @@ -XXX,XX +XXX,XX @@ struct sev_user_data_pdh_cert_export { | ||
226 | __u32 cert_chain_len; /* In/Out */ | ||
227 | } __attribute__((packed)); | ||
228 | |||
229 | +/** | ||
230 | + * struct sev_user_data_get_id - GET_ID command parameters | ||
231 | + * | ||
232 | + * @socket1: Buffer to pass unique ID of first socket | ||
233 | + * @socket2: Buffer to pass unique ID of second socket | ||
234 | + */ | 194 | + */ |
235 | +struct sev_user_data_get_id { | 195 | +static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres) |
236 | + __u8 socket1[64]; /* Out */ | 196 | { |
237 | + __u8 socket2[64]; /* Out */ | 197 | float32 f32 = float32_squash_input_denormal(input, fpst); |
238 | +} __attribute__((packed)); | 198 | uint32_t f32_val = float32_val(f32); |
239 | + | 199 | @@ -XXX,XX +XXX,XX @@ float32 HELPER(recpe_f32)(float32 input, float_status *fpst) |
240 | /** | 200 | return make_float32(f32_val); |
241 | * struct sev_issue_cmd - SEV ioctl parameters | 201 | } |
242 | * | 202 | |
243 | diff --git a/linux-headers/LICENSES/exceptions/Linux-syscall-note b/linux-headers/LICENSES/exceptions/Linux-syscall-note | 203 | +float32 HELPER(recpe_f32)(float32 input, float_status *fpst) |
244 | index XXXXXXX..XXXXXXX 100644 | 204 | +{ |
245 | --- a/linux-headers/LICENSES/exceptions/Linux-syscall-note | 205 | + return do_recpe_f32(input, fpst, false); |
246 | +++ b/linux-headers/LICENSES/exceptions/Linux-syscall-note | 206 | +} |
247 | @@ -XXX,XX +XXX,XX @@ | 207 | + |
248 | SPDX-Exception-Identifier: Linux-syscall-note | 208 | +float32 HELPER(recpe_rpres_f32)(float32 input, float_status *fpst) |
249 | SPDX-URL: https://spdx.org/licenses/Linux-syscall-note.html | 209 | +{ |
250 | -SPDX-Licenses: GPL-2.0, GPL-2.0+, GPL-1.0+, LGPL-2.0, LGPL-2.0+, LGPL-2.1, LGPL-2.1+ | 210 | + return do_recpe_f32(input, fpst, true); |
251 | +SPDX-Licenses: GPL-2.0, GPL-2.0+, GPL-1.0+, LGPL-2.0, LGPL-2.0+, LGPL-2.1, LGPL-2.1+, GPL-2.0-only, GPL-2.0-or-later | 211 | +} |
252 | Usage-Guide: | 212 | + |
253 | This exception is used together with one of the above SPDX-Licenses | 213 | float64 HELPER(recpe_f64)(float64 input, float_status *fpst) |
254 | to mark user space API (uapi) header files so they can be included | 214 | { |
255 | diff --git a/linux-headers/LICENSES/preferred/GPL-2.0 b/linux-headers/LICENSES/preferred/GPL-2.0 | 215 | float64 f64 = float64_squash_input_denormal(input, fpst); |
256 | index XXXXXXX..XXXXXXX 100644 | 216 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s) |
257 | --- a/linux-headers/LICENSES/preferred/GPL-2.0 | 217 | return make_float16(val); |
258 | +++ b/linux-headers/LICENSES/preferred/GPL-2.0 | 218 | } |
259 | @@ -XXX,XX +XXX,XX @@ | 219 | |
260 | Valid-License-Identifier: GPL-2.0 | 220 | -float32 HELPER(rsqrte_f32)(float32 input, float_status *s) |
261 | +Valid-License-Identifier: GPL-2.0-only | 221 | +/* |
262 | Valid-License-Identifier: GPL-2.0+ | 222 | + * FEAT_RPRES means the f32 FRSQRTE has an "increased precision" variant |
263 | +Valid-License-Identifier: GPL-2.0-or-later | 223 | + * which is used when FPCR.AH == 1. |
264 | SPDX-URL: https://spdx.org/licenses/GPL-2.0.html | 224 | + */ |
265 | Usage-Guide: | 225 | +static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres) |
266 | To use this license in source code, put one of the following SPDX | 226 | { |
267 | @@ -XXX,XX +XXX,XX @@ Usage-Guide: | 227 | float32 f32 = float32_squash_input_denormal(input, s); |
268 | guidelines in the licensing rules documentation. | 228 | uint32_t val = float32_val(f32); |
269 | For 'GNU General Public License (GPL) version 2 only' use: | 229 | @@ -XXX,XX +XXX,XX @@ float32 HELPER(rsqrte_f32)(float32 input, float_status *s) |
270 | SPDX-License-Identifier: GPL-2.0 | 230 | return make_float32(val); |
271 | + or | 231 | } |
272 | + SPDX-License-Identifier: GPL-2.0-only | 232 | |
273 | For 'GNU General Public License (GPL) version 2 or any later version' use: | 233 | +float32 HELPER(rsqrte_f32)(float32 input, float_status *s) |
274 | SPDX-License-Identifier: GPL-2.0+ | 234 | +{ |
275 | + or | 235 | + return do_rsqrte_f32(input, s, false); |
276 | + SPDX-License-Identifier: GPL-2.0-or-later | 236 | +} |
277 | License-Text: | 237 | + |
278 | 238 | +float32 HELPER(rsqrte_rpres_f32)(float32 input, float_status *s) | |
279 | GNU GENERAL PUBLIC LICENSE | 239 | +{ |
240 | + return do_rsqrte_f32(input, s, true); | ||
241 | +} | ||
242 | + | ||
243 | float64 HELPER(rsqrte_f64)(float64 input, float_status *s) | ||
244 | { | ||
245 | float64 f64 = float64_squash_input_denormal(input, s); | ||
280 | -- | 246 | -- |
281 | 2.17.1 | 247 | 2.34.1 |
282 | |||
283 | diff view generated by jsdifflib |
1 | The interrupt outputs from the MPC in the IoTKit and the expansion | 1 | Implement the increased precision variation of FRECPE. In the |
---|---|---|---|
2 | MPCs in the board must be wired up to the security controller, and | 2 | pseudocode this corresponds to the handling of the |
3 | also all ORed together to produce a single line to the NVIC. | 3 | "increasedprecision" boolean in the FPRecipEstimate() and |
4 | RecipEstimate() functions. | ||
4 | 5 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
6 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
7 | Message-id: 20180620132032.28865-8-peter.maydell@linaro.org | ||
8 | --- | 8 | --- |
9 | include/hw/arm/iotkit.h | 6 ++++ | 9 | target/arm/vfp_helper.c | 54 +++++++++++++++++++++++++++++++++++------ |
10 | hw/arm/iotkit.c | 74 +++++++++++++++++++++++++++++++++++++++++ | 10 | 1 file changed, 46 insertions(+), 8 deletions(-) |
11 | 2 files changed, 80 insertions(+) | ||
12 | 11 | ||
13 | diff --git a/include/hw/arm/iotkit.h b/include/hw/arm/iotkit.h | 12 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
14 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/include/hw/arm/iotkit.h | 14 | --- a/target/arm/vfp_helper.c |
16 | +++ b/include/hw/arm/iotkit.h | 15 | +++ b/target/arm/vfp_helper.c |
17 | @@ -XXX,XX +XXX,XX @@ | 16 | @@ -XXX,XX +XXX,XX @@ static int recip_estimate(int input) |
18 | * + named GPIO outputs ahb_ppcexp{0,1,2,3}_irq_enable | 17 | return r; |
19 | * + named GPIO outputs ahb_ppcexp{0,1,2,3}_irq_clear | ||
20 | * + named GPIO inputs ahb_ppcexp{0,1,2,3}_irq_status | ||
21 | + * Controlling each of the 16 expansion MPCs which a system using the IoTKit | ||
22 | + * might provide: | ||
23 | + * + named GPIO inputs mpcexp_status[0..15] | ||
24 | */ | ||
25 | |||
26 | #ifndef IOTKIT_H | ||
27 | @@ -XXX,XX +XXX,XX @@ typedef struct IoTKit { | ||
28 | qemu_or_irq ppc_irq_orgate; | ||
29 | SplitIRQ sec_resp_splitter; | ||
30 | SplitIRQ ppc_irq_splitter[NUM_PPCS]; | ||
31 | + SplitIRQ mpc_irq_splitter[IOTS_NUM_EXP_MPC + IOTS_NUM_MPC]; | ||
32 | + qemu_or_irq mpc_irq_orgate; | ||
33 | |||
34 | UnimplementedDeviceState dualtimer; | ||
35 | UnimplementedDeviceState s32ktimer; | ||
36 | @@ -XXX,XX +XXX,XX @@ typedef struct IoTKit { | ||
37 | qemu_irq nsc_cfg_in; | ||
38 | |||
39 | qemu_irq irq_status_in[NUM_EXTERNAL_PPCS]; | ||
40 | + qemu_irq mpcexp_status_in[IOTS_NUM_EXP_MPC]; | ||
41 | |||
42 | uint32_t nsccfg; | ||
43 | |||
44 | diff --git a/hw/arm/iotkit.c b/hw/arm/iotkit.c | ||
45 | index XXXXXXX..XXXXXXX 100644 | ||
46 | --- a/hw/arm/iotkit.c | ||
47 | +++ b/hw/arm/iotkit.c | ||
48 | @@ -XXX,XX +XXX,XX @@ static void iotkit_init(Object *obj) | ||
49 | init_sysbus_child(obj, "apb-ppc1", &s->apb_ppc1, sizeof(s->apb_ppc1), | ||
50 | TYPE_TZ_PPC); | ||
51 | init_sysbus_child(obj, "mpc", &s->mpc, sizeof(s->mpc), TYPE_TZ_MPC); | ||
52 | + object_initialize(&s->mpc_irq_orgate, sizeof(s->mpc_irq_orgate), | ||
53 | + TYPE_OR_IRQ); | ||
54 | + object_property_add_child(obj, "mpc-irq-orgate", | ||
55 | + OBJECT(&s->mpc_irq_orgate), &error_abort); | ||
56 | + for (i = 0; i < ARRAY_SIZE(s->mpc_irq_splitter); i++) { | ||
57 | + char *name = g_strdup_printf("mpc-irq-splitter-%d", i); | ||
58 | + SplitIRQ *splitter = &s->mpc_irq_splitter[i]; | ||
59 | + | ||
60 | + object_initialize(splitter, sizeof(*splitter), TYPE_SPLIT_IRQ); | ||
61 | + object_property_add_child(obj, name, OBJECT(splitter), &error_abort); | ||
62 | + g_free(name); | ||
63 | + } | ||
64 | init_sysbus_child(obj, "timer0", &s->timer0, sizeof(s->timer0), | ||
65 | TYPE_CMSDK_APB_TIMER); | ||
66 | init_sysbus_child(obj, "timer1", &s->timer1, sizeof(s->timer1), | ||
67 | @@ -XXX,XX +XXX,XX @@ static void iotkit_exp_irq(void *opaque, int n, int level) | ||
68 | qemu_set_irq(s->exp_irqs[n], level); | ||
69 | } | 18 | } |
70 | 19 | ||
71 | +static void iotkit_mpcexp_status(void *opaque, int n, int level) | 20 | +/* |
21 | + * Increased precision version: | ||
22 | + * input is a 13 bit fixed point number | ||
23 | + * input range 2048 .. 4095 for a number from 0.5 <= x < 1.0. | ||
24 | + * result range 4096 .. 8191 for a number from 1.0 to 2.0 | ||
25 | + */ | ||
26 | +static int recip_estimate_incprec(int input) | ||
72 | +{ | 27 | +{ |
73 | + IoTKit *s = IOTKIT(opaque); | 28 | + int a, b, r; |
74 | + qemu_set_irq(s->mpcexp_status_in[n], level); | 29 | + assert(2048 <= input && input < 4096); |
30 | + a = (input * 2) + 1; | ||
31 | + /* | ||
32 | + * The pseudocode expresses this as an operation on infinite | ||
33 | + * precision reals where it calculates 2^25 / a and then looks | ||
34 | + * at the error between that and the rounded-down-to-integer | ||
35 | + * value to see if it should instead round up. We instead | ||
36 | + * follow the same approach as the pseudocode for the 8-bit | ||
37 | + * precision version, and calculate (2 * (2^25 / a)) as an | ||
38 | + * integer so we can do the "add one and halve" to round it. | ||
39 | + * So the 1 << 26 here is correct. | ||
40 | + */ | ||
41 | + b = (1 << 26) / a; | ||
42 | + r = (b + 1) >> 1; | ||
43 | + assert(4096 <= r && r < 8192); | ||
44 | + return r; | ||
75 | +} | 45 | +} |
76 | + | 46 | + |
77 | static void iotkit_realize(DeviceState *dev, Error **errp) | 47 | /* |
48 | * Common wrapper to call recip_estimate | ||
49 | * | ||
50 | @@ -XXX,XX +XXX,XX @@ static int recip_estimate(int input) | ||
51 | * callee. | ||
52 | */ | ||
53 | |||
54 | -static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac) | ||
55 | +static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac, | ||
56 | + bool increasedprecision) | ||
78 | { | 57 | { |
79 | IoTKit *s = IOTKIT(dev); | 58 | uint32_t scaled, estimate; |
80 | @@ -XXX,XX +XXX,XX @@ static void iotkit_realize(DeviceState *dev, Error **errp) | 59 | uint64_t result_frac; |
81 | sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->mpc), | 60 | @@ -XXX,XX +XXX,XX @@ static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac) |
82 | 0)); | 61 | } |
83 | 62 | } | |
84 | + /* We must OR together lines from the MPC splitters to go to the NVIC */ | 63 | |
85 | + object_property_set_int(OBJECT(&s->mpc_irq_orgate), | 64 | - /* scaled = UInt('1':fraction<51:44>) */ |
86 | + IOTS_NUM_EXP_MPC + IOTS_NUM_MPC, "num-lines", &err); | 65 | - scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); |
87 | + if (err) { | 66 | - estimate = recip_estimate(scaled); |
88 | + error_propagate(errp, err); | 67 | + if (increasedprecision) { |
89 | + return; | 68 | + /* scaled = UInt('1':fraction<51:41>) */ |
69 | + scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11)); | ||
70 | + estimate = recip_estimate_incprec(scaled); | ||
71 | + } else { | ||
72 | + /* scaled = UInt('1':fraction<51:44>) */ | ||
73 | + scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); | ||
74 | + estimate = recip_estimate(scaled); | ||
90 | + } | 75 | + } |
91 | + object_property_set_bool(OBJECT(&s->mpc_irq_orgate), true, | 76 | |
92 | + "realized", &err); | 77 | result_exp = exp_off - *exp; |
93 | + if (err) { | 78 | - result_frac = deposit64(0, 44, 8, estimate); |
94 | + error_propagate(errp, err); | 79 | + if (increasedprecision) { |
95 | + return; | 80 | + result_frac = deposit64(0, 40, 12, estimate); |
81 | + } else { | ||
82 | + result_frac = deposit64(0, 44, 8, estimate); | ||
96 | + } | 83 | + } |
97 | + qdev_connect_gpio_out(DEVICE(&s->mpc_irq_orgate), 0, | 84 | if (result_exp == 0) { |
98 | + qdev_get_gpio_in(DEVICE(&s->armv7m), 9)); | 85 | result_frac = deposit64(result_frac >> 1, 51, 1, 1); |
99 | + | 86 | } else if (result_exp == -1) { |
100 | /* Devices behind APB PPC0: | 87 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst) |
101 | * 0x40000000: timer0 | ||
102 | * 0x40001000: timer1 | ||
103 | @@ -XXX,XX +XXX,XX @@ static void iotkit_realize(DeviceState *dev, Error **errp) | ||
104 | g_free(gpioname); | ||
105 | } | 88 | } |
106 | 89 | ||
107 | + /* Wire up the splitters for the MPC IRQs */ | 90 | f64_frac = call_recip_estimate(&f16_exp, 29, |
108 | + for (i = 0; i < IOTS_NUM_EXP_MPC + IOTS_NUM_MPC; i++) { | 91 | - ((uint64_t) f16_frac) << (52 - 10)); |
109 | + SplitIRQ *splitter = &s->mpc_irq_splitter[i]; | 92 | + ((uint64_t) f16_frac) << (52 - 10), false); |
110 | + DeviceState *dev_splitter = DEVICE(splitter); | 93 | |
111 | + | 94 | /* result = sign : result_exp<4:0> : fraction<51:42> */ |
112 | + object_property_set_int(OBJECT(splitter), 2, "num-lines", &err); | 95 | f16_val = deposit32(0, 15, 1, f16_sign); |
113 | + if (err) { | 96 | @@ -XXX,XX +XXX,XX @@ static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres) |
114 | + error_propagate(errp, err); | 97 | } |
115 | + return; | 98 | |
116 | + } | 99 | f64_frac = call_recip_estimate(&f32_exp, 253, |
117 | + object_property_set_bool(OBJECT(splitter), true, "realized", &err); | 100 | - ((uint64_t) f32_frac) << (52 - 23)); |
118 | + if (err) { | 101 | + ((uint64_t) f32_frac) << (52 - 23), rpres); |
119 | + error_propagate(errp, err); | 102 | |
120 | + return; | 103 | /* result = sign : result_exp<7:0> : fraction<51:29> */ |
121 | + } | 104 | f32_val = deposit32(0, 31, 1, f32_sign); |
122 | + | 105 | @@ -XXX,XX +XXX,XX @@ float64 HELPER(recpe_f64)(float64 input, float_status *fpst) |
123 | + if (i < IOTS_NUM_EXP_MPC) { | 106 | return float64_set_sign(float64_zero, float64_is_neg(f64)); |
124 | + /* Splitter input is from GPIO input line */ | 107 | } |
125 | + s->mpcexp_status_in[i] = qdev_get_gpio_in(dev_splitter, 0); | 108 | |
126 | + qdev_connect_gpio_out(dev_splitter, 0, | 109 | - f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac); |
127 | + qdev_get_gpio_in_named(dev_secctl, | 110 | + f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac, false); |
128 | + "mpcexp_status", i)); | 111 | |
129 | + } else { | 112 | /* result = sign : result_exp<10:0> : fraction<51:0>; */ |
130 | + /* Splitter input is from our own MPC */ | 113 | f64_val = deposit64(0, 63, 1, f64_sign); |
131 | + qdev_connect_gpio_out_named(DEVICE(&s->mpc), "irq", 0, | ||
132 | + qdev_get_gpio_in(dev_splitter, 0)); | ||
133 | + qdev_connect_gpio_out(dev_splitter, 0, | ||
134 | + qdev_get_gpio_in_named(dev_secctl, | ||
135 | + "mpc_status", 0)); | ||
136 | + } | ||
137 | + | ||
138 | + qdev_connect_gpio_out(dev_splitter, 1, | ||
139 | + qdev_get_gpio_in(DEVICE(&s->mpc_irq_orgate), i)); | ||
140 | + } | ||
141 | + /* Create GPIO inputs which will pass the line state for our | ||
142 | + * mpcexp_irq inputs to the correct splitter devices. | ||
143 | + */ | ||
144 | + qdev_init_gpio_in_named(dev, iotkit_mpcexp_status, "mpcexp_status", | ||
145 | + IOTS_NUM_EXP_MPC); | ||
146 | + | ||
147 | iotkit_forward_sec_resp_cfg(s); | ||
148 | |||
149 | system_clock_scale = NANOSECONDS_PER_SECOND / s->mainclk_frq; | ||
150 | -- | 114 | -- |
151 | 2.17.1 | 115 | 2.34.1 |
152 | |||
153 | diff view generated by jsdifflib |
1 | The MPC is guest-configurable for whether blocked accesses: | 1 | Implement the increased precision variation of FRSQRTE. In the |
---|---|---|---|
2 | * should be RAZ/WI or cause a bus error | 2 | pseudocode this corresponds to the handling of the |
3 | * should generate an interrupt or not | 3 | "increasedprecision" boolean in the FPRSqrtEstimate() and |
4 | 4 | RecipSqrtEstimate() functions. | |
5 | Implement this behaviour in the blocked-access handlers. | ||
6 | 5 | ||
7 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
8 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | 7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
9 | Message-id: 20180620132032.28865-4-peter.maydell@linaro.org | ||
10 | --- | 8 | --- |
11 | hw/misc/tz-mpc.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++-- | 9 | target/arm/vfp_helper.c | 77 ++++++++++++++++++++++++++++++++++------- |
12 | 1 file changed, 48 insertions(+), 2 deletions(-) | 10 | 1 file changed, 64 insertions(+), 13 deletions(-) |
13 | 11 | ||
14 | diff --git a/hw/misc/tz-mpc.c b/hw/misc/tz-mpc.c | 12 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
15 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/hw/misc/tz-mpc.c | 14 | --- a/target/arm/vfp_helper.c |
17 | +++ b/hw/misc/tz-mpc.c | 15 | +++ b/target/arm/vfp_helper.c |
18 | @@ -XXX,XX +XXX,XX @@ REG32(INT_EN, 0x28) | 16 | @@ -XXX,XX +XXX,XX @@ static int do_recip_sqrt_estimate(int a) |
19 | FIELD(INT_EN, IRQ, 0, 1) | 17 | return estimate; |
20 | REG32(INT_INFO1, 0x2c) | 18 | } |
21 | REG32(INT_INFO2, 0x30) | 19 | |
22 | + FIELD(INT_INFO2, HMASTER, 0, 16) | 20 | +static int do_recip_sqrt_estimate_incprec(int a) |
23 | + FIELD(INT_INFO2, HNONSEC, 16, 1) | ||
24 | + FIELD(INT_INFO2, CFG_NS, 17, 1) | ||
25 | REG32(INT_SET, 0x34) | ||
26 | FIELD(INT_SET, IRQ, 0, 1) | ||
27 | REG32(PIDR4, 0xfd0) | ||
28 | @@ -XXX,XX +XXX,XX @@ static const MemoryRegionOps tz_mpc_reg_ops = { | ||
29 | .impl.max_access_size = 4, | ||
30 | }; | ||
31 | |||
32 | +static inline bool tz_mpc_cfg_ns(TZMPC *s, hwaddr addr) | ||
33 | +{ | 21 | +{ |
34 | + /* Return the cfg_ns bit from the LUT for the specified address */ | 22 | + /* |
35 | + hwaddr blknum = addr / s->blocksize; | 23 | + * The Arm ARM describes the 12-bit precision version of RecipSqrtEstimate |
36 | + hwaddr blkword = blknum / 32; | 24 | + * in terms of an infinite-precision floating point calculation of a |
37 | + uint32_t blkbit = 1U << (blknum % 32); | 25 | + * square root. We implement this using the same kind of pure integer |
26 | + * algorithm as the 8-bit mantissa, to get the same bit-for-bit result. | ||
27 | + */ | ||
28 | + int64_t b, estimate; | ||
29 | |||
30 | -static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac) | ||
31 | + assert(1024 <= a && a < 4096); | ||
32 | + if (a < 2048) { | ||
33 | + a = a * 2 + 1; | ||
34 | + } else { | ||
35 | + a = (a >> 1) << 1; | ||
36 | + a = (a + 1) * 2; | ||
37 | + } | ||
38 | + b = 8192; | ||
39 | + while (a * (b + 1) * (b + 1) < (1ULL << 39)) { | ||
40 | + b += 1; | ||
41 | + } | ||
42 | + estimate = (b + 1) / 2; | ||
38 | + | 43 | + |
39 | + /* This would imply the address was larger than the size we | 44 | + assert(4096 <= estimate && estimate < 8192); |
40 | + * defined this memory region to be, so it can't happen. | 45 | + |
41 | + */ | 46 | + return estimate; |
42 | + assert(blkword < s->blk_max); | ||
43 | + return s->blk_lut[blkword] & blkbit; | ||
44 | +} | 47 | +} |
45 | + | 48 | + |
46 | +static MemTxResult tz_mpc_handle_block(TZMPC *s, hwaddr addr, MemTxAttrs attrs) | 49 | +static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac, |
47 | +{ | 50 | + bool increasedprecision) |
48 | + /* Handle a blocked transaction: raise IRQ, capture info, etc */ | 51 | { |
49 | + if (!s->int_stat) { | 52 | int estimate; |
50 | + /* First blocked transfer: capture information into INT_INFO1 and | 53 | uint32_t scaled; |
51 | + * INT_INFO2. Subsequent transfers are still blocked but don't | 54 | @@ -XXX,XX +XXX,XX @@ static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac) |
52 | + * capture information until the guest clears the interrupt. | 55 | frac = extract64(frac, 0, 51) << 1; |
53 | + */ | 56 | } |
54 | + | 57 | |
55 | + s->int_info1 = addr; | 58 | - if (*exp & 1) { |
56 | + s->int_info2 = 0; | 59 | - /* scaled = UInt('01':fraction<51:45>) */ |
57 | + s->int_info2 = FIELD_DP32(s->int_info2, INT_INFO2, HMASTER, | 60 | - scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7)); |
58 | + attrs.requester_id & 0xffff); | 61 | + if (increasedprecision) { |
59 | + s->int_info2 = FIELD_DP32(s->int_info2, INT_INFO2, HNONSEC, | 62 | + if (*exp & 1) { |
60 | + ~attrs.secure); | 63 | + /* scaled = UInt('01':fraction<51:42>) */ |
61 | + s->int_info2 = FIELD_DP32(s->int_info2, INT_INFO2, CFG_NS, | 64 | + scaled = deposit32(1 << 10, 0, 10, extract64(frac, 42, 10)); |
62 | + tz_mpc_cfg_ns(s, addr)); | 65 | + } else { |
63 | + s->int_stat |= R_INT_STAT_IRQ_MASK; | 66 | + /* scaled = UInt('1':fraction<51:41>) */ |
64 | + tz_mpc_irq_update(s); | 67 | + scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11)); |
68 | + } | ||
69 | + estimate = do_recip_sqrt_estimate_incprec(scaled); | ||
70 | } else { | ||
71 | - /* scaled = UInt('1':fraction<51:44>) */ | ||
72 | - scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); | ||
73 | + if (*exp & 1) { | ||
74 | + /* scaled = UInt('01':fraction<51:45>) */ | ||
75 | + scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7)); | ||
76 | + } else { | ||
77 | + /* scaled = UInt('1':fraction<51:44>) */ | ||
78 | + scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); | ||
79 | + } | ||
80 | + estimate = do_recip_sqrt_estimate(scaled); | ||
81 | } | ||
82 | - estimate = do_recip_sqrt_estimate(scaled); | ||
83 | |||
84 | *exp = (exp_off - *exp) / 2; | ||
85 | - return extract64(estimate, 0, 8) << 44; | ||
86 | + if (increasedprecision) { | ||
87 | + return extract64(estimate, 0, 12) << 40; | ||
88 | + } else { | ||
89 | + return extract64(estimate, 0, 8) << 44; | ||
65 | + } | 90 | + } |
66 | + | ||
67 | + /* Generate bus error if desired; otherwise RAZ/WI */ | ||
68 | + return (s->ctrl & R_CTRL_SEC_RESP_MASK) ? MEMTX_ERROR : MEMTX_OK; | ||
69 | +} | ||
70 | + | ||
71 | /* Accesses only reach these read and write functions if the MPC is | ||
72 | * blocking them; non-blocked accesses go directly to the downstream | ||
73 | * memory region without passing through this code. | ||
74 | @@ -XXX,XX +XXX,XX @@ static MemTxResult tz_mpc_mem_blocked_read(void *opaque, hwaddr addr, | ||
75 | uint64_t *pdata, | ||
76 | unsigned size, MemTxAttrs attrs) | ||
77 | { | ||
78 | + TZMPC *s = TZ_MPC(opaque); | ||
79 | + | ||
80 | trace_tz_mpc_mem_blocked_read(addr, size, attrs.secure); | ||
81 | |||
82 | *pdata = 0; | ||
83 | - return MEMTX_OK; | ||
84 | + return tz_mpc_handle_block(s, addr, attrs); | ||
85 | } | 91 | } |
86 | 92 | ||
87 | static MemTxResult tz_mpc_mem_blocked_write(void *opaque, hwaddr addr, | 93 | uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s) |
88 | uint64_t value, | 94 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s) |
89 | unsigned size, MemTxAttrs attrs) | 95 | |
90 | { | 96 | f64_frac = ((uint64_t) f16_frac) << (52 - 10); |
91 | + TZMPC *s = TZ_MPC(opaque); | 97 | |
92 | + | 98 | - f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac); |
93 | trace_tz_mpc_mem_blocked_write(addr, value, size, attrs.secure); | 99 | + f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac, false); |
94 | 100 | ||
95 | - return MEMTX_OK; | 101 | /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */ |
96 | + return tz_mpc_handle_block(s, addr, attrs); | 102 | val = deposit32(0, 15, 1, f16_sign); |
103 | @@ -XXX,XX +XXX,XX @@ static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres) | ||
104 | |||
105 | f64_frac = ((uint64_t) f32_frac) << 29; | ||
106 | |||
107 | - f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac); | ||
108 | + f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac, rpres); | ||
109 | |||
110 | - /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */ | ||
111 | + /* | ||
112 | + * result = sign : result_exp<7:0> : estimate<7:0> : Zeros(15) | ||
113 | + * or for increased precision | ||
114 | + * result = sign : result_exp<7:0> : estimate<11:0> : Zeros(11) | ||
115 | + */ | ||
116 | val = deposit32(0, 31, 1, f32_sign); | ||
117 | val = deposit32(val, 23, 8, f32_exp); | ||
118 | - val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8)); | ||
119 | + if (rpres) { | ||
120 | + val = deposit32(val, 11, 12, extract64(f64_frac, 52 - 12, 12)); | ||
121 | + } else { | ||
122 | + val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8)); | ||
123 | + } | ||
124 | return make_float32(val); | ||
97 | } | 125 | } |
98 | 126 | ||
99 | static const MemoryRegionOps tz_mpc_mem_blocked_ops = { | 127 | @@ -XXX,XX +XXX,XX @@ float64 HELPER(rsqrte_f64)(float64 input, float_status *s) |
128 | return float64_zero; | ||
129 | } | ||
130 | |||
131 | - f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac); | ||
132 | + f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac, false); | ||
133 | |||
134 | /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */ | ||
135 | val = deposit64(0, 61, 1, f64_sign); | ||
100 | -- | 136 | -- |
101 | 2.17.1 | 137 | 2.34.1 |
102 | |||
103 | diff view generated by jsdifflib |
1 | checkpatch reminds us that statics shouldn't be zero-initialized: | 1 | Now the emulation is complete, we can enable FEAT_RPRES for the 'max' |
---|---|---|---|
2 | 2 | CPU type. | |
3 | ERROR: do not initialise statics to 0 or NULL | ||
4 | #35: FILE: vl.c:157: | ||
5 | +static int num_serial_hds = 0; | ||
6 | |||
7 | ERROR: do not initialise statics to 0 or NULL | ||
8 | #36: FILE: vl.c:158: | ||
9 | +static Chardev **serial_hds = NULL; | ||
10 | |||
11 | I forgot to fix this in 6af2692e86f9fdfb3d; do so now. | ||
12 | 3 | ||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
14 | Reviewed-by: Thomas Huth <thuth@redhat.com> | 5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
15 | Message-id: 20180426140253.3918-1-peter.maydell@linaro.org | ||
16 | --- | 6 | --- |
17 | vl.c | 4 ++-- | 7 | docs/system/arm/emulation.rst | 1 + |
18 | 1 file changed, 2 insertions(+), 2 deletions(-) | 8 | target/arm/tcg/cpu64.c | 1 + |
9 | 2 files changed, 2 insertions(+) | ||
19 | 10 | ||
20 | diff --git a/vl.c b/vl.c | 11 | diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst |
21 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/vl.c | 13 | --- a/docs/system/arm/emulation.rst |
23 | +++ b/vl.c | 14 | +++ b/docs/system/arm/emulation.rst |
24 | @@ -XXX,XX +XXX,XX @@ QEMUClockType rtc_clock; | 15 | @@ -XXX,XX +XXX,XX @@ the following architecture extensions: |
25 | int vga_interface_type = VGA_NONE; | 16 | - FEAT_RDM (Advanced SIMD rounding double multiply accumulate instructions) |
26 | static DisplayOptions dpy; | 17 | - FEAT_RME (Realm Management Extension) (NB: support status in QEMU is experimental) |
27 | int no_frame; | 18 | - FEAT_RNG (Random number generator) |
28 | -static int num_serial_hds = 0; | 19 | +- FEAT_RPRES (Increased precision of FRECPE and FRSQRTE) |
29 | -static Chardev **serial_hds = NULL; | 20 | - FEAT_S2FWB (Stage 2 forced Write-Back) |
30 | +static int num_serial_hds; | 21 | - FEAT_SB (Speculation Barrier) |
31 | +static Chardev **serial_hds; | 22 | - FEAT_SEL2 (Secure EL2) |
32 | Chardev *parallel_hds[MAX_PARALLEL_PORTS]; | 23 | diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c |
33 | Chardev *virtcon_hds[MAX_VIRTIO_CONSOLES]; | 24 | index XXXXXXX..XXXXXXX 100644 |
34 | int win2k_install_hack = 0; | 25 | --- a/target/arm/tcg/cpu64.c |
26 | +++ b/target/arm/tcg/cpu64.c | ||
27 | @@ -XXX,XX +XXX,XX @@ void aarch64_max_tcg_initfn(Object *obj) | ||
28 | cpu->isar.id_aa64isar1 = t; | ||
29 | |||
30 | t = cpu->isar.id_aa64isar2; | ||
31 | + t = FIELD_DP64(t, ID_AA64ISAR2, RPRES, 1); /* FEAT_RPRES */ | ||
32 | t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1); /* FEAT_MOPS */ | ||
33 | t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1); /* FEAT_HBC */ | ||
34 | t = FIELD_DP64(t, ID_AA64ISAR2, WFXT, 2); /* FEAT_WFxT */ | ||
35 | -- | 35 | -- |
36 | 2.17.1 | 36 | 2.34.1 |
37 | |||
38 | diff view generated by jsdifflib |
1 | From: Eric Auger <eric.auger@redhat.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | for KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION attribute, the attribute | 3 | Move ARMFPStatusFlavour to cpu.h with which to index |
4 | data pointed to by kvm_device_attr.addr is a OR of the | 4 | this array. For now, place the array in an anonymous |
5 | redistributor region address and other fields such as the index | 5 | union with the existing structures. Adjust the order |
6 | of the redistributor region and the number of redistributors the | 6 | of the existing structures to match the enum. |
7 | region can contain. | 7 | |
8 | 8 | Simplify fpstatus_ptr() using the new array. | |
9 | The existing machine init done notifier framework sets the address | 9 | |
10 | field to the actual address of the device and does not allow to OR | 10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
11 | this value with other fields. | 11 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
12 | 12 | Message-id: 20250129013857.135256-7-richard.henderson@linaro.org | |
13 | This patch extends the KVMDevice struct with a new kda_addr_ormask | ||
14 | member. Its value is passed at registration time and OR'ed with the | ||
15 | resolved address on kvm_arm_set_device_addr(). | ||
16 | |||
17 | Signed-off-by: Eric Auger <eric.auger@redhat.com> | ||
18 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
19 | Message-id: 1529072910-16156-3-git-send-email-eric.auger@redhat.com | ||
20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
21 | --- | 14 | --- |
22 | target/arm/kvm_arm.h | 3 ++- | 15 | target/arm/cpu.h | 119 +++++++++++++++++++++---------------- |
23 | hw/intc/arm_gic_kvm.c | 4 ++-- | 16 | target/arm/tcg/translate.h | 64 +------------------- |
24 | hw/intc/arm_gicv3_its_kvm.c | 2 +- | 17 | 2 files changed, 70 insertions(+), 113 deletions(-) |
25 | hw/intc/arm_gicv3_kvm.c | 4 ++-- | 18 | |
26 | target/arm/kvm.c | 10 +++++++++- | 19 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
27 | 5 files changed, 16 insertions(+), 7 deletions(-) | ||
28 | |||
29 | diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h | ||
30 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
31 | --- a/target/arm/kvm_arm.h | 21 | --- a/target/arm/cpu.h |
32 | +++ b/target/arm/kvm_arm.h | 22 | +++ b/target/arm/cpu.h |
33 | @@ -XXX,XX +XXX,XX @@ int kvm_arm_vcpu_init(CPUState *cs); | 23 | @@ -XXX,XX +XXX,XX @@ typedef struct ARMMMUFaultInfo ARMMMUFaultInfo; |
34 | * @group: device control API group for setting addresses | 24 | |
35 | * @attr: device control API address type | 25 | typedef struct NVICState NVICState; |
36 | * @dev_fd: device control device file descriptor (or -1 if not supported) | 26 | |
37 | + * @addr_ormask: value to be OR'ed with resolved address | 27 | +/* |
28 | + * Enum for indexing vfp.fp_status[]. | ||
29 | + * | ||
30 | + * FPST_A32: is the "normal" fp status for AArch32 insns | ||
31 | + * FPST_A64: is the "normal" fp status for AArch64 insns | ||
32 | + * FPST_A32_F16: used for AArch32 half-precision calculations | ||
33 | + * FPST_A64_F16: used for AArch64 half-precision calculations | ||
34 | + * FPST_STD: the ARM "Standard FPSCR Value" | ||
35 | + * FPST_STD_F16: used for half-precision | ||
36 | + * calculations with the ARM "Standard FPSCR Value" | ||
37 | + * FPST_AH: used for the A64 insns which change behaviour | ||
38 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
39 | + * and the reciprocal and square root estimate/step insns) | ||
40 | + * FPST_AH_F16: used for the A64 insns which change behaviour | ||
41 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
42 | + * and the reciprocal and square root estimate/step insns); | ||
43 | + * for half-precision | ||
44 | + * | ||
45 | + * Half-precision operations are governed by a separate | ||
46 | + * flush-to-zero control bit in FPSCR:FZ16. We pass a separate | ||
47 | + * status structure to control this. | ||
48 | + * | ||
49 | + * The "Standard FPSCR", ie default-NaN, flush-to-zero, | ||
50 | + * round-to-nearest and is used by any operations (generally | ||
51 | + * Neon) which the architecture defines as controlled by the | ||
52 | + * standard FPSCR value rather than the FPSCR. | ||
53 | + * | ||
54 | + * The "standard FPSCR but for fp16 ops" is needed because | ||
55 | + * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | ||
56 | + * using a fixed value for it. | ||
57 | + * | ||
58 | + * The ah_fp_status is needed because some insns have different | ||
59 | + * behaviour when FPCR.AH == 1: they don't update cumulative | ||
60 | + * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
61 | + * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
62 | + * which means we need an ah_fp_status_f16 as well. | ||
63 | + * | ||
64 | + * To avoid having to transfer exception bits around, we simply | ||
65 | + * say that the FPSCR cumulative exception flags are the logical | ||
66 | + * OR of the flags in the four fp statuses. This relies on the | ||
67 | + * only thing which needs to read the exception flags being | ||
68 | + * an explicit FPSCR read. | ||
69 | + */ | ||
70 | +typedef enum ARMFPStatusFlavour { | ||
71 | + FPST_A32, | ||
72 | + FPST_A64, | ||
73 | + FPST_A32_F16, | ||
74 | + FPST_A64_F16, | ||
75 | + FPST_AH, | ||
76 | + FPST_AH_F16, | ||
77 | + FPST_STD, | ||
78 | + FPST_STD_F16, | ||
79 | +} ARMFPStatusFlavour; | ||
80 | +#define FPST_COUNT 8 | ||
81 | + | ||
82 | typedef struct CPUArchState { | ||
83 | /* Regs for current mode. */ | ||
84 | uint32_t regs[16]; | ||
85 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
86 | /* Scratch space for aa32 neon expansion. */ | ||
87 | uint32_t scratch[8]; | ||
88 | |||
89 | - /* There are a number of distinct float control structures: | ||
90 | - * | ||
91 | - * fp_status_a32: is the "normal" fp status for AArch32 insns | ||
92 | - * fp_status_a64: is the "normal" fp status for AArch64 insns | ||
93 | - * fp_status_fp16_a32: used for AArch32 half-precision calculations | ||
94 | - * fp_status_fp16_a64: used for AArch64 half-precision calculations | ||
95 | - * standard_fp_status : the ARM "Standard FPSCR Value" | ||
96 | - * standard_fp_status_fp16 : used for half-precision | ||
97 | - * calculations with the ARM "Standard FPSCR Value" | ||
98 | - * ah_fp_status: used for the A64 insns which change behaviour | ||
99 | - * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
100 | - * and the reciprocal and square root estimate/step insns) | ||
101 | - * ah_fp_status_f16: used for the A64 insns which change behaviour | ||
102 | - * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
103 | - * and the reciprocal and square root estimate/step insns); | ||
104 | - * for half-precision | ||
105 | - * | ||
106 | - * Half-precision operations are governed by a separate | ||
107 | - * flush-to-zero control bit in FPSCR:FZ16. We pass a separate | ||
108 | - * status structure to control this. | ||
109 | - * | ||
110 | - * The "Standard FPSCR", ie default-NaN, flush-to-zero, | ||
111 | - * round-to-nearest and is used by any operations (generally | ||
112 | - * Neon) which the architecture defines as controlled by the | ||
113 | - * standard FPSCR value rather than the FPSCR. | ||
114 | - * | ||
115 | - * The "standard FPSCR but for fp16 ops" is needed because | ||
116 | - * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | ||
117 | - * using a fixed value for it. | ||
118 | - * | ||
119 | - * The ah_fp_status is needed because some insns have different | ||
120 | - * behaviour when FPCR.AH == 1: they don't update cumulative | ||
121 | - * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
122 | - * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
123 | - * which means we need an ah_fp_status_f16 as well. | ||
124 | - * | ||
125 | - * To avoid having to transfer exception bits around, we simply | ||
126 | - * say that the FPSCR cumulative exception flags are the logical | ||
127 | - * OR of the flags in the four fp statuses. This relies on the | ||
128 | - * only thing which needs to read the exception flags being | ||
129 | - * an explicit FPSCR read. | ||
130 | - */ | ||
131 | - float_status fp_status_a32; | ||
132 | - float_status fp_status_a64; | ||
133 | - float_status fp_status_f16_a32; | ||
134 | - float_status fp_status_f16_a64; | ||
135 | - float_status standard_fp_status; | ||
136 | - float_status standard_fp_status_f16; | ||
137 | - float_status ah_fp_status; | ||
138 | - float_status ah_fp_status_f16; | ||
139 | + /* There are a number of distinct float control structures. */ | ||
140 | + union { | ||
141 | + float_status fp_status[FPST_COUNT]; | ||
142 | + struct { | ||
143 | + float_status fp_status_a32; | ||
144 | + float_status fp_status_a64; | ||
145 | + float_status fp_status_f16_a32; | ||
146 | + float_status fp_status_f16_a64; | ||
147 | + float_status ah_fp_status; | ||
148 | + float_status ah_fp_status_f16; | ||
149 | + float_status standard_fp_status; | ||
150 | + float_status standard_fp_status_f16; | ||
151 | + }; | ||
152 | + }; | ||
153 | |||
154 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ | ||
155 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ | ||
156 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | ||
157 | index XXXXXXX..XXXXXXX 100644 | ||
158 | --- a/target/arm/tcg/translate.h | ||
159 | +++ b/target/arm/tcg/translate.h | ||
160 | @@ -XXX,XX +XXX,XX @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb) | ||
161 | return (CPUARMTBFlags){ tb->flags, tb->cs_base }; | ||
162 | } | ||
163 | |||
164 | -/* | ||
165 | - * Enum for argument to fpstatus_ptr(). | ||
166 | - */ | ||
167 | -typedef enum ARMFPStatusFlavour { | ||
168 | - FPST_A32, | ||
169 | - FPST_A64, | ||
170 | - FPST_A32_F16, | ||
171 | - FPST_A64_F16, | ||
172 | - FPST_AH, | ||
173 | - FPST_AH_F16, | ||
174 | - FPST_STD, | ||
175 | - FPST_STD_F16, | ||
176 | -} ARMFPStatusFlavour; | ||
177 | - | ||
178 | /** | ||
179 | * fpstatus_ptr: return TCGv_ptr to the specified fp_status field | ||
38 | * | 180 | * |
39 | * Remember the memory region @mr, and when it is mapped by the | 181 | * We have multiple softfloat float_status fields in the Arm CPU state struct |
40 | * machine model, tell the kernel that base address using the | 182 | * (see the comment in cpu.h for details). Return a TCGv_ptr which has |
41 | @@ -XXX,XX +XXX,XX @@ int kvm_arm_vcpu_init(CPUState *cs); | 183 | * been set up to point to the requested field in the CPU state struct. |
42 | * address at the point where machine init is complete. | 184 | - * The options are: |
185 | - * | ||
186 | - * FPST_A32 | ||
187 | - * for AArch32 non-FP16 operations controlled by the FPCR | ||
188 | - * FPST_A64 | ||
189 | - * for AArch64 non-FP16 operations controlled by the FPCR | ||
190 | - * FPST_A32_F16 | ||
191 | - * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
192 | - * FPST_A64_F16 | ||
193 | - * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
194 | - * FPST_AH: | ||
195 | - * for AArch64 operations which change behaviour when AH=1 (specifically, | ||
196 | - * bfloat16 conversions and multiplies, and the reciprocal and square root | ||
197 | - * estimate/step insns) | ||
198 | - * FPST_AH_F16: | ||
199 | - * ditto, but for half-precision operations | ||
200 | - * FPST_STD | ||
201 | - * for A32/T32 Neon operations using the "standard FPSCR value" | ||
202 | - * FPST_STD_F16 | ||
203 | - * as FPST_STD, but where FPCR.FZ16 is to be used | ||
43 | */ | 204 | */ |
44 | void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, | 205 | static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) |
45 | - uint64_t attr, int dev_fd); | ||
46 | + uint64_t attr, int dev_fd, uint64_t addr_ormask); | ||
47 | |||
48 | /** | ||
49 | * kvm_arm_init_cpreg_list: | ||
50 | diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c | ||
51 | index XXXXXXX..XXXXXXX 100644 | ||
52 | --- a/hw/intc/arm_gic_kvm.c | ||
53 | +++ b/hw/intc/arm_gic_kvm.c | ||
54 | @@ -XXX,XX +XXX,XX @@ static void kvm_arm_gic_realize(DeviceState *dev, Error **errp) | ||
55 | | KVM_VGIC_V2_ADDR_TYPE_DIST, | ||
56 | KVM_DEV_ARM_VGIC_GRP_ADDR, | ||
57 | KVM_VGIC_V2_ADDR_TYPE_DIST, | ||
58 | - s->dev_fd); | ||
59 | + s->dev_fd, 0); | ||
60 | /* CPU interface for current core. Unlike arm_gic, we don't | ||
61 | * provide the "interface for core #N" memory regions, because | ||
62 | * cores with a VGIC don't have those. | ||
63 | @@ -XXX,XX +XXX,XX @@ static void kvm_arm_gic_realize(DeviceState *dev, Error **errp) | ||
64 | | KVM_VGIC_V2_ADDR_TYPE_CPU, | ||
65 | KVM_DEV_ARM_VGIC_GRP_ADDR, | ||
66 | KVM_VGIC_V2_ADDR_TYPE_CPU, | ||
67 | - s->dev_fd); | ||
68 | + s->dev_fd, 0); | ||
69 | |||
70 | if (kvm_has_gsi_routing()) { | ||
71 | /* set up irq routing */ | ||
72 | diff --git a/hw/intc/arm_gicv3_its_kvm.c b/hw/intc/arm_gicv3_its_kvm.c | ||
73 | index XXXXXXX..XXXXXXX 100644 | ||
74 | --- a/hw/intc/arm_gicv3_its_kvm.c | ||
75 | +++ b/hw/intc/arm_gicv3_its_kvm.c | ||
76 | @@ -XXX,XX +XXX,XX @@ static void kvm_arm_its_realize(DeviceState *dev, Error **errp) | ||
77 | |||
78 | /* register the base address */ | ||
79 | kvm_arm_register_device(&s->iomem_its_cntrl, -1, KVM_DEV_ARM_VGIC_GRP_ADDR, | ||
80 | - KVM_VGIC_ITS_ADDR_TYPE, s->dev_fd); | ||
81 | + KVM_VGIC_ITS_ADDR_TYPE, s->dev_fd, 0); | ||
82 | |||
83 | gicv3_its_init_mmio(s, NULL); | ||
84 | |||
85 | diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c | ||
86 | index XXXXXXX..XXXXXXX 100644 | ||
87 | --- a/hw/intc/arm_gicv3_kvm.c | ||
88 | +++ b/hw/intc/arm_gicv3_kvm.c | ||
89 | @@ -XXX,XX +XXX,XX @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) | ||
90 | KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true, &error_abort); | ||
91 | |||
92 | kvm_arm_register_device(&s->iomem_dist, -1, KVM_DEV_ARM_VGIC_GRP_ADDR, | ||
93 | - KVM_VGIC_V3_ADDR_TYPE_DIST, s->dev_fd); | ||
94 | + KVM_VGIC_V3_ADDR_TYPE_DIST, s->dev_fd, 0); | ||
95 | kvm_arm_register_device(&s->iomem_redist, -1, KVM_DEV_ARM_VGIC_GRP_ADDR, | ||
96 | - KVM_VGIC_V3_ADDR_TYPE_REDIST, s->dev_fd); | ||
97 | + KVM_VGIC_V3_ADDR_TYPE_REDIST, s->dev_fd, 0); | ||
98 | |||
99 | if (kvm_has_gsi_routing()) { | ||
100 | /* set up irq routing */ | ||
101 | diff --git a/target/arm/kvm.c b/target/arm/kvm.c | ||
102 | index XXXXXXX..XXXXXXX 100644 | ||
103 | --- a/target/arm/kvm.c | ||
104 | +++ b/target/arm/kvm.c | ||
105 | @@ -XXX,XX +XXX,XX @@ unsigned long kvm_arch_vcpu_id(CPUState *cpu) | ||
106 | * We use a MemoryListener to track mapping and unmapping of | ||
107 | * the regions during board creation, so the board models don't | ||
108 | * need to do anything special for the KVM case. | ||
109 | + * | ||
110 | + * Sometimes the address must be OR'ed with some other fields | ||
111 | + * (for example for KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION). | ||
112 | + * @kda_addr_ormask aims at storing the value of those fields. | ||
113 | */ | ||
114 | typedef struct KVMDevice { | ||
115 | struct kvm_arm_device_addr kda; | ||
116 | struct kvm_device_attr kdattr; | ||
117 | + uint64_t kda_addr_ormask; | ||
118 | MemoryRegion *mr; | ||
119 | QSLIST_ENTRY(KVMDevice) entries; | ||
120 | int dev_fd; | ||
121 | @@ -XXX,XX +XXX,XX @@ static void kvm_arm_set_device_addr(KVMDevice *kd) | ||
122 | */ | ||
123 | if (kd->dev_fd >= 0) { | ||
124 | uint64_t addr = kd->kda.addr; | ||
125 | + | ||
126 | + addr |= kd->kda_addr_ormask; | ||
127 | attr->addr = (uintptr_t)&addr; | ||
128 | ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr); | ||
129 | } else { | ||
130 | @@ -XXX,XX +XXX,XX @@ static Notifier notify = { | ||
131 | }; | ||
132 | |||
133 | void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, | ||
134 | - uint64_t attr, int dev_fd) | ||
135 | + uint64_t attr, int dev_fd, uint64_t addr_ormask) | ||
136 | { | 206 | { |
137 | KVMDevice *kd; | 207 | TCGv_ptr statusptr = tcg_temp_new_ptr(); |
138 | 208 | - int offset; | |
139 | @@ -XXX,XX +XXX,XX @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, | 209 | + int offset = offsetof(CPUARMState, vfp.fp_status[flavour]); |
140 | kd->kdattr.group = group; | 210 | |
141 | kd->kdattr.attr = attr; | 211 | - switch (flavour) { |
142 | kd->dev_fd = dev_fd; | 212 | - case FPST_A32: |
143 | + kd->kda_addr_ormask = addr_ormask; | 213 | - offset = offsetof(CPUARMState, vfp.fp_status_a32); |
144 | QSLIST_INSERT_HEAD(&kvm_devices_head, kd, entries); | 214 | - break; |
145 | memory_region_ref(kd->mr); | 215 | - case FPST_A64: |
216 | - offset = offsetof(CPUARMState, vfp.fp_status_a64); | ||
217 | - break; | ||
218 | - case FPST_A32_F16: | ||
219 | - offset = offsetof(CPUARMState, vfp.fp_status_f16_a32); | ||
220 | - break; | ||
221 | - case FPST_A64_F16: | ||
222 | - offset = offsetof(CPUARMState, vfp.fp_status_f16_a64); | ||
223 | - break; | ||
224 | - case FPST_AH: | ||
225 | - offset = offsetof(CPUARMState, vfp.ah_fp_status); | ||
226 | - break; | ||
227 | - case FPST_AH_F16: | ||
228 | - offset = offsetof(CPUARMState, vfp.ah_fp_status_f16); | ||
229 | - break; | ||
230 | - case FPST_STD: | ||
231 | - offset = offsetof(CPUARMState, vfp.standard_fp_status); | ||
232 | - break; | ||
233 | - case FPST_STD_F16: | ||
234 | - offset = offsetof(CPUARMState, vfp.standard_fp_status_f16); | ||
235 | - break; | ||
236 | - default: | ||
237 | - g_assert_not_reached(); | ||
238 | - } | ||
239 | tcg_gen_addi_ptr(statusptr, tcg_env, offset); | ||
240 | return statusptr; | ||
146 | } | 241 | } |
147 | -- | 242 | -- |
148 | 2.17.1 | 243 | 2.34.1 |
149 | 244 | ||
150 | 245 | diff view generated by jsdifflib |
1 | From: Julia Suvorova <jusual@mail.ru> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | This feature is intended to distinguish ARMv8-M variants: Baseline and | 3 | Replace with fp_status[FPST_STD_F16]. |
4 | Mainline. ARMv7-M compatibility requires the Main Extension. ARMv6-M | ||
5 | compatibility is provided by all ARMv8-M implementations. | ||
6 | 4 | ||
7 | Signed-off-by: Julia Suvorova <jusual@mail.ru> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | Message-id: 20180622080138.17702-2-jusual@mail.ru | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
9 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 7 | Message-id: 20250129013857.135256-8-richard.henderson@linaro.org |
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
11 | --- | 9 | --- |
12 | target/arm/cpu.h | 1 + | 10 | target/arm/cpu.h | 1 - |
13 | target/arm/cpu.c | 3 +++ | 11 | target/arm/cpu.c | 4 ++-- |
14 | 2 files changed, 4 insertions(+) | 12 | target/arm/tcg/mve_helper.c | 24 ++++++++++++------------ |
13 | target/arm/vfp_helper.c | 8 ++++---- | ||
14 | 4 files changed, 18 insertions(+), 19 deletions(-) | ||
15 | 15 | ||
16 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 16 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
17 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/target/arm/cpu.h | 18 | --- a/target/arm/cpu.h |
19 | +++ b/target/arm/cpu.h | 19 | +++ b/target/arm/cpu.h |
20 | @@ -XXX,XX +XXX,XX @@ enum arm_features { | 20 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
21 | ARM_FEATURE_V8_RDM, /* implements v8.1 simd round multiply */ | 21 | float_status ah_fp_status; |
22 | ARM_FEATURE_V8_FP16, /* implements v8.2 half-precision float */ | 22 | float_status ah_fp_status_f16; |
23 | ARM_FEATURE_V8_FCMA, /* has complex number part of v8.3 extensions. */ | 23 | float_status standard_fp_status; |
24 | + ARM_FEATURE_M_MAIN, /* M profile Main Extension */ | 24 | - float_status standard_fp_status_f16; |
25 | }; | 25 | }; |
26 | 26 | }; | |
27 | static inline int arm_feature(CPUARMState *env, int feature) | 27 | |
28 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | 28 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
29 | index XXXXXXX..XXXXXXX 100644 | 29 | index XXXXXXX..XXXXXXX 100644 |
30 | --- a/target/arm/cpu.c | 30 | --- a/target/arm/cpu.c |
31 | +++ b/target/arm/cpu.c | 31 | +++ b/target/arm/cpu.c |
32 | @@ -XXX,XX +XXX,XX @@ static void cortex_m3_initfn(Object *obj) | 32 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
33 | ARMCPU *cpu = ARM_CPU(obj); | 33 | set_flush_to_zero(1, &env->vfp.standard_fp_status); |
34 | set_feature(&cpu->env, ARM_FEATURE_V7); | 34 | set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); |
35 | set_feature(&cpu->env, ARM_FEATURE_M); | 35 | set_default_nan_mode(1, &env->vfp.standard_fp_status); |
36 | + set_feature(&cpu->env, ARM_FEATURE_M_MAIN); | 36 | - set_default_nan_mode(1, &env->vfp.standard_fp_status_f16); |
37 | cpu->midr = 0x410fc231; | 37 | + set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); |
38 | cpu->pmsav7_dregion = 8; | 38 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); |
39 | cpu->id_pfr0 = 0x00000030; | 39 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); |
40 | @@ -XXX,XX +XXX,XX @@ static void cortex_m4_initfn(Object *obj) | 40 | arm_set_default_fp_behaviours(&env->vfp.standard_fp_status); |
41 | 41 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | |
42 | set_feature(&cpu->env, ARM_FEATURE_V7); | 42 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); |
43 | set_feature(&cpu->env, ARM_FEATURE_M); | 43 | - arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16); |
44 | + set_feature(&cpu->env, ARM_FEATURE_M_MAIN); | 44 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); |
45 | set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP); | 45 | arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); |
46 | cpu->midr = 0x410fc240; /* r0p0 */ | 46 | set_flush_to_zero(1, &env->vfp.ah_fp_status); |
47 | cpu->pmsav7_dregion = 8; | 47 | set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); |
48 | @@ -XXX,XX +XXX,XX @@ static void cortex_m33_initfn(Object *obj) | 48 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c |
49 | 49 | index XXXXXXX..XXXXXXX 100644 | |
50 | set_feature(&cpu->env, ARM_FEATURE_V8); | 50 | --- a/target/arm/tcg/mve_helper.c |
51 | set_feature(&cpu->env, ARM_FEATURE_M); | 51 | +++ b/target/arm/tcg/mve_helper.c |
52 | + set_feature(&cpu->env, ARM_FEATURE_M_MAIN); | 52 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) |
53 | set_feature(&cpu->env, ARM_FEATURE_M_SECURITY); | 53 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ |
54 | set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP); | 54 | continue; \ |
55 | cpu->midr = 0x410fd213; /* r0p3 */ | 55 | } \ |
56 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
57 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
58 | &env->vfp.standard_fp_status; \ | ||
59 | if (!(mask & 1)) { \ | ||
60 | /* We need the result but without updating flags */ \ | ||
61 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) | ||
62 | r[e] = 0; \ | ||
63 | continue; \ | ||
64 | } \ | ||
65 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
66 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
67 | &env->vfp.standard_fp_status; \ | ||
68 | if (!(tm & 1)) { \ | ||
69 | /* We need the result but without updating flags */ \ | ||
70 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) | ||
71 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
72 | continue; \ | ||
73 | } \ | ||
74 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
75 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
76 | &env->vfp.standard_fp_status; \ | ||
77 | if (!(mask & 1)) { \ | ||
78 | /* We need the result but without updating flags */ \ | ||
79 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) | ||
80 | if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \ | ||
81 | continue; \ | ||
82 | } \ | ||
83 | - fpst0 = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
84 | + fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
85 | &env->vfp.standard_fp_status; \ | ||
86 | fpst1 = fpst0; \ | ||
87 | if (!(mask & 1)) { \ | ||
88 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) | ||
89 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
90 | continue; \ | ||
91 | } \ | ||
92 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
93 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
94 | &env->vfp.standard_fp_status; \ | ||
95 | if (!(mask & 1)) { \ | ||
96 | /* We need the result but without updating flags */ \ | ||
97 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) | ||
98 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
99 | continue; \ | ||
100 | } \ | ||
101 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
102 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
103 | &env->vfp.standard_fp_status; \ | ||
104 | if (!(mask & 1)) { \ | ||
105 | /* We need the result but without updating flags */ \ | ||
106 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) | ||
107 | TYPE *m = vm; \ | ||
108 | TYPE ra = (TYPE)ra_in; \ | ||
109 | float_status *fpst = (ESIZE == 2) ? \ | ||
110 | - &env->vfp.standard_fp_status_f16 : \ | ||
111 | + &env->vfp.fp_status[FPST_STD_F16] : \ | ||
112 | &env->vfp.standard_fp_status; \ | ||
113 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
114 | if (mask & 1) { \ | ||
115 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
116 | if ((mask & emask) == 0) { \ | ||
117 | continue; \ | ||
118 | } \ | ||
119 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
120 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
121 | &env->vfp.standard_fp_status; \ | ||
122 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
123 | /* We need the result but without updating flags */ \ | ||
124 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
125 | if ((mask & emask) == 0) { \ | ||
126 | continue; \ | ||
127 | } \ | ||
128 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
129 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
130 | &env->vfp.standard_fp_status; \ | ||
131 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
132 | /* We need the result but without updating flags */ \ | ||
133 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) | ||
134 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
135 | continue; \ | ||
136 | } \ | ||
137 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
138 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
139 | &env->vfp.standard_fp_status; \ | ||
140 | if (!(mask & 1)) { \ | ||
141 | /* We need the result but without updating flags */ \ | ||
142 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) | ||
143 | float_status *fpst; \ | ||
144 | float_status scratch_fpst; \ | ||
145 | float_status *base_fpst = (ESIZE == 2) ? \ | ||
146 | - &env->vfp.standard_fp_status_f16 : \ | ||
147 | + &env->vfp.fp_status[FPST_STD_F16] : \ | ||
148 | &env->vfp.standard_fp_status; \ | ||
149 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ | ||
150 | set_float_rounding_mode(rmode, base_fpst); \ | ||
151 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) | ||
152 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
153 | continue; \ | ||
154 | } \ | ||
155 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
156 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
157 | &env->vfp.standard_fp_status; \ | ||
158 | if (!(mask & 1)) { \ | ||
159 | /* We need the result but without updating flags */ \ | ||
160 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
161 | index XXXXXXX..XXXXXXX 100644 | ||
162 | --- a/target/arm/vfp_helper.c | ||
163 | +++ b/target/arm/vfp_helper.c | ||
164 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
165 | /* FZ16 does not generate an input denormal exception. */ | ||
166 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
167 | & ~float_flag_input_denormal_flushed); | ||
168 | - a32_flags |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) | ||
169 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) | ||
170 | & ~float_flag_input_denormal_flushed); | ||
171 | |||
172 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
173 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
174 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | ||
175 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
176 | set_float_exception_flags(0, &env->vfp.standard_fp_status); | ||
177 | - set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); | ||
178 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
179 | set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
180 | set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
181 | } | ||
182 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
183 | bool ftz_enabled = val & FPCR_FZ16; | ||
184 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
185 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
186 | - set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | ||
187 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
188 | set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
189 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
190 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
191 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | ||
192 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
193 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
194 | } | ||
195 | if (changed & FPCR_FZ) { | ||
56 | -- | 196 | -- |
57 | 2.17.1 | 197 | 2.34.1 |
58 | 198 | ||
59 | 199 | diff view generated by jsdifflib |
1 | From: Eric Auger <eric.auger@redhat.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | With this patch, virt-3.0 machine uses a new 256MB ECAM region | 3 | Replace with fp_status[FPST_STD]. |
4 | by default instead of the legacy 16MB one, if highmem is set | 4 | |
5 | (LPAE supported by the guest) and (!firmware_loaded || aarch64). | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | |
7 | Indeed aarch32 mode FW may not support this high ECAM region. | 7 | Message-id: 20250129013857.135256-9-richard.henderson@linaro.org |
8 | |||
9 | Signed-off-by: Eric Auger <eric.auger@redhat.com> | ||
10 | Reviewed-by: Laszlo Ersek <lersek@redhat.com> | ||
11 | Reviewed-by: Andrew Jones <drjones@redhat.com> | ||
12 | Message-id: 1529072910-16156-11-git-send-email-eric.auger@redhat.com | ||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
14 | --- | 9 | --- |
15 | include/hw/arm/virt.h | 1 + | 10 | target/arm/cpu.h | 1 - |
16 | hw/arm/virt.c | 10 ++++++++++ | 11 | target/arm/cpu.c | 8 ++++---- |
17 | 2 files changed, 11 insertions(+) | 12 | target/arm/tcg/mve_helper.c | 28 ++++++++++++++-------------- |
18 | 13 | target/arm/tcg/vec_helper.c | 4 ++-- | |
19 | diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h | 14 | target/arm/vfp_helper.c | 4 ++-- |
20 | index XXXXXXX..XXXXXXX 100644 | 15 | 5 files changed, 22 insertions(+), 23 deletions(-) |
21 | --- a/include/hw/arm/virt.h | 16 | |
22 | +++ b/include/hw/arm/virt.h | 17 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
23 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 18 | index XXXXXXX..XXXXXXX 100644 |
24 | bool no_pmu; | 19 | --- a/target/arm/cpu.h |
25 | bool claim_edge_triggered_timers; | 20 | +++ b/target/arm/cpu.h |
26 | bool smbios_old_sys_ver; | 21 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
27 | + bool no_highmem_ecam; | 22 | float_status fp_status_f16_a64; |
28 | } VirtMachineClass; | 23 | float_status ah_fp_status; |
29 | 24 | float_status ah_fp_status_f16; | |
30 | typedef struct { | 25 | - float_status standard_fp_status; |
31 | diff --git a/hw/arm/virt.c b/hw/arm/virt.c | 26 | }; |
32 | index XXXXXXX..XXXXXXX 100644 | 27 | }; |
33 | --- a/hw/arm/virt.c | 28 | |
34 | +++ b/hw/arm/virt.c | 29 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
35 | @@ -XXX,XX +XXX,XX @@ static void machvirt_init(MachineState *machine) | 30 | index XXXXXXX..XXXXXXX 100644 |
36 | int n, virt_max_cpus; | 31 | --- a/target/arm/cpu.c |
37 | MemoryRegion *ram = g_new(MemoryRegion, 1); | 32 | +++ b/target/arm/cpu.c |
38 | bool firmware_loaded = bios_name || drive_get(IF_PFLASH, 0, 0); | 33 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
39 | + bool aarch64 = true; | 34 | env->sau.ctrl = 0; |
40 | |||
41 | /* We can probe only here because during property set | ||
42 | * KVM is not available yet | ||
43 | @@ -XXX,XX +XXX,XX @@ static void machvirt_init(MachineState *machine) | ||
44 | numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpuobj), | ||
45 | &error_fatal); | ||
46 | |||
47 | + aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL); | ||
48 | + | ||
49 | if (!vms->secure) { | ||
50 | object_property_set_bool(cpuobj, false, "has_el3", NULL); | ||
51 | } | ||
52 | @@ -XXX,XX +XXX,XX @@ static void machvirt_init(MachineState *machine) | ||
53 | create_uart(vms, pic, VIRT_SECURE_UART, secure_sysmem, serial_hd(1)); | ||
54 | } | 35 | } |
55 | 36 | ||
56 | + vms->highmem_ecam &= vms->highmem && (!firmware_loaded || aarch64); | 37 | - set_flush_to_zero(1, &env->vfp.standard_fp_status); |
57 | + | 38 | - set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); |
58 | create_rtc(vms, pic); | 39 | - set_default_nan_mode(1, &env->vfp.standard_fp_status); |
59 | 40 | + set_flush_to_zero(1, &env->vfp.fp_status[FPST_STD]); | |
60 | create_pcie(vms, pic); | 41 | + set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]); |
61 | @@ -XXX,XX +XXX,XX @@ static void virt_3_0_instance_init(Object *obj) | 42 | + set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); |
62 | "Set GIC version. " | 43 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); |
63 | "Valid values are 2, 3 and host", NULL); | 44 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); |
64 | 45 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | |
65 | + vms->highmem_ecam = !vmc->no_highmem_ecam; | 46 | - arm_set_default_fp_behaviours(&env->vfp.standard_fp_status); |
66 | + | 47 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); |
67 | if (vmc->no_its) { | 48 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); |
68 | vms->its = false; | 49 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); |
69 | } else { | 50 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); |
70 | @@ -XXX,XX +XXX,XX @@ static void virt_2_12_instance_init(Object *obj) | 51 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c |
71 | 52 | index XXXXXXX..XXXXXXX 100644 | |
72 | static void virt_machine_2_12_options(MachineClass *mc) | 53 | --- a/target/arm/tcg/mve_helper.c |
73 | { | 54 | +++ b/target/arm/tcg/mve_helper.c |
74 | + VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); | 55 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) |
75 | + | 56 | continue; \ |
76 | virt_machine_3_0_options(mc); | 57 | } \ |
77 | SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_12); | 58 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
78 | + vmc->no_highmem_ecam = true; | 59 | - &env->vfp.standard_fp_status; \ |
60 | + &env->vfp.fp_status[FPST_STD]; \ | ||
61 | if (!(mask & 1)) { \ | ||
62 | /* We need the result but without updating flags */ \ | ||
63 | scratch_fpst = *fpst; \ | ||
64 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) | ||
65 | continue; \ | ||
66 | } \ | ||
67 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
68 | - &env->vfp.standard_fp_status; \ | ||
69 | + &env->vfp.fp_status[FPST_STD]; \ | ||
70 | if (!(tm & 1)) { \ | ||
71 | /* We need the result but without updating flags */ \ | ||
72 | scratch_fpst = *fpst; \ | ||
73 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) | ||
74 | continue; \ | ||
75 | } \ | ||
76 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
77 | - &env->vfp.standard_fp_status; \ | ||
78 | + &env->vfp.fp_status[FPST_STD]; \ | ||
79 | if (!(mask & 1)) { \ | ||
80 | /* We need the result but without updating flags */ \ | ||
81 | scratch_fpst = *fpst; \ | ||
82 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) | ||
83 | continue; \ | ||
84 | } \ | ||
85 | fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
86 | - &env->vfp.standard_fp_status; \ | ||
87 | + &env->vfp.fp_status[FPST_STD]; \ | ||
88 | fpst1 = fpst0; \ | ||
89 | if (!(mask & 1)) { \ | ||
90 | scratch_fpst = *fpst0; \ | ||
91 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) | ||
92 | continue; \ | ||
93 | } \ | ||
94 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
95 | - &env->vfp.standard_fp_status; \ | ||
96 | + &env->vfp.fp_status[FPST_STD]; \ | ||
97 | if (!(mask & 1)) { \ | ||
98 | /* We need the result but without updating flags */ \ | ||
99 | scratch_fpst = *fpst; \ | ||
100 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) | ||
101 | continue; \ | ||
102 | } \ | ||
103 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
104 | - &env->vfp.standard_fp_status; \ | ||
105 | + &env->vfp.fp_status[FPST_STD]; \ | ||
106 | if (!(mask & 1)) { \ | ||
107 | /* We need the result but without updating flags */ \ | ||
108 | scratch_fpst = *fpst; \ | ||
109 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) | ||
110 | TYPE ra = (TYPE)ra_in; \ | ||
111 | float_status *fpst = (ESIZE == 2) ? \ | ||
112 | &env->vfp.fp_status[FPST_STD_F16] : \ | ||
113 | - &env->vfp.standard_fp_status; \ | ||
114 | + &env->vfp.fp_status[FPST_STD]; \ | ||
115 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
116 | if (mask & 1) { \ | ||
117 | TYPE v = m[H##ESIZE(e)]; \ | ||
118 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
119 | continue; \ | ||
120 | } \ | ||
121 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
122 | - &env->vfp.standard_fp_status; \ | ||
123 | + &env->vfp.fp_status[FPST_STD]; \ | ||
124 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
125 | /* We need the result but without updating flags */ \ | ||
126 | scratch_fpst = *fpst; \ | ||
127 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
128 | continue; \ | ||
129 | } \ | ||
130 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
131 | - &env->vfp.standard_fp_status; \ | ||
132 | + &env->vfp.fp_status[FPST_STD]; \ | ||
133 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
134 | /* We need the result but without updating flags */ \ | ||
135 | scratch_fpst = *fpst; \ | ||
136 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) | ||
137 | continue; \ | ||
138 | } \ | ||
139 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
140 | - &env->vfp.standard_fp_status; \ | ||
141 | + &env->vfp.fp_status[FPST_STD]; \ | ||
142 | if (!(mask & 1)) { \ | ||
143 | /* We need the result but without updating flags */ \ | ||
144 | scratch_fpst = *fpst; \ | ||
145 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) | ||
146 | float_status scratch_fpst; \ | ||
147 | float_status *base_fpst = (ESIZE == 2) ? \ | ||
148 | &env->vfp.fp_status[FPST_STD_F16] : \ | ||
149 | - &env->vfp.standard_fp_status; \ | ||
150 | + &env->vfp.fp_status[FPST_STD]; \ | ||
151 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ | ||
152 | set_float_rounding_mode(rmode, base_fpst); \ | ||
153 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
154 | @@ -XXX,XX +XXX,XX @@ static void do_vcvt_sh(CPUARMState *env, void *vd, void *vm, int top) | ||
155 | unsigned e; | ||
156 | float_status *fpst; | ||
157 | float_status scratch_fpst; | ||
158 | - float_status *base_fpst = &env->vfp.standard_fp_status; | ||
159 | + float_status *base_fpst = &env->vfp.fp_status[FPST_STD]; | ||
160 | bool old_fz = get_flush_to_zero(base_fpst); | ||
161 | set_flush_to_zero(false, base_fpst); | ||
162 | for (e = 0; e < 16 / 4; e++, mask >>= 4) { | ||
163 | @@ -XXX,XX +XXX,XX @@ static void do_vcvt_hs(CPUARMState *env, void *vd, void *vm, int top) | ||
164 | unsigned e; | ||
165 | float_status *fpst; | ||
166 | float_status scratch_fpst; | ||
167 | - float_status *base_fpst = &env->vfp.standard_fp_status; | ||
168 | + float_status *base_fpst = &env->vfp.fp_status[FPST_STD]; | ||
169 | bool old_fiz = get_flush_inputs_to_zero(base_fpst); | ||
170 | set_flush_inputs_to_zero(false, base_fpst); | ||
171 | for (e = 0; e < 16 / 4; e++, mask >>= 4) { | ||
172 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) | ||
173 | continue; \ | ||
174 | } \ | ||
175 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
176 | - &env->vfp.standard_fp_status; \ | ||
177 | + &env->vfp.fp_status[FPST_STD]; \ | ||
178 | if (!(mask & 1)) { \ | ||
179 | /* We need the result but without updating flags */ \ | ||
180 | scratch_fpst = *fpst; \ | ||
181 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
182 | index XXXXXXX..XXXXXXX 100644 | ||
183 | --- a/target/arm/tcg/vec_helper.c | ||
184 | +++ b/target/arm/tcg/vec_helper.c | ||
185 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
186 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
187 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
188 | |||
189 | - do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, | ||
190 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
191 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
79 | } | 192 | } |
80 | DEFINE_VIRT_MACHINE(2, 12) | 193 | |
81 | 194 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | |
195 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
196 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
197 | |||
198 | - do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, | ||
199 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
200 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
201 | } | ||
202 | |||
203 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
204 | index XXXXXXX..XXXXXXX 100644 | ||
205 | --- a/target/arm/vfp_helper.c | ||
206 | +++ b/target/arm/vfp_helper.c | ||
207 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
208 | uint32_t a32_flags = 0, a64_flags = 0; | ||
209 | |||
210 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
211 | - a32_flags |= get_float_exception_flags(&env->vfp.standard_fp_status); | ||
212 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); | ||
213 | /* FZ16 does not generate an input denormal exception. */ | ||
214 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
215 | & ~float_flag_input_denormal_flushed); | ||
216 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
217 | set_float_exception_flags(0, &env->vfp.fp_status_a64); | ||
218 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | ||
219 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
220 | - set_float_exception_flags(0, &env->vfp.standard_fp_status); | ||
221 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
222 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
223 | set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
224 | set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
82 | -- | 225 | -- |
83 | 2.17.1 | 226 | 2.34.1 |
84 | 227 | ||
85 | 228 | diff view generated by jsdifflib |
1 | The xen pci_assign_dev_load_option_rom() currently creates a RAM | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | memory region with memory_region_init_ram_nomigrate(), and then | ||
3 | manually registers it with vmstate_register_ram(). In fact for | ||
4 | its only callsite, the 'owner' pointer we use for the init call | ||
5 | and the '&dev->qdev' pointer we use for the vmstate_register_ram() | ||
6 | call refer to the same object. Simplify the function to only | ||
7 | take a pointer to the device once instead of twice, and use | ||
8 | memory_region_init_ram() which automatically does the vmstate | ||
9 | register for us. | ||
10 | 2 | ||
11 | Acked-by: Anthony PERARD <anthony.perard@citrix.com> | 3 | Replace with fp_status[FPST_AH_F16]. |
4 | |||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Message-id: 20250129013857.135256-10-richard.henderson@linaro.org | ||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
13 | --- | 9 | --- |
14 | hw/xen/xen_pt.h | 2 +- | 10 | target/arm/cpu.h | 3 +-- |
15 | hw/xen/xen_pt_graphics.c | 2 +- | 11 | target/arm/cpu.c | 2 +- |
16 | hw/xen/xen_pt_load_rom.c | 6 +++--- | 12 | target/arm/vfp_helper.c | 10 +++++----- |
17 | 3 files changed, 5 insertions(+), 5 deletions(-) | 13 | 3 files changed, 7 insertions(+), 8 deletions(-) |
18 | 14 | ||
19 | diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h | 15 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
20 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/hw/xen/xen_pt.h | 17 | --- a/target/arm/cpu.h |
22 | +++ b/hw/xen/xen_pt.h | 18 | +++ b/target/arm/cpu.h |
23 | @@ -XXX,XX +XXX,XX @@ static inline bool xen_pt_has_msix_mapping(XenPCIPassthroughState *s, int bar) | 19 | @@ -XXX,XX +XXX,XX @@ typedef struct NVICState NVICState; |
20 | * behaviour when FPCR.AH == 1: they don't update cumulative | ||
21 | * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
22 | * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
23 | - * which means we need an ah_fp_status_f16 as well. | ||
24 | + * which means we need an FPST_AH_F16 as well. | ||
25 | * | ||
26 | * To avoid having to transfer exception bits around, we simply | ||
27 | * say that the FPSCR cumulative exception flags are the logical | ||
28 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
29 | float_status fp_status_f16_a32; | ||
30 | float_status fp_status_f16_a64; | ||
31 | float_status ah_fp_status; | ||
32 | - float_status ah_fp_status_f16; | ||
33 | }; | ||
34 | }; | ||
35 | |||
36 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/arm/cpu.c | ||
39 | +++ b/target/arm/cpu.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
41 | arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | ||
42 | set_flush_to_zero(1, &env->vfp.ah_fp_status); | ||
43 | set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | ||
44 | - arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status_f16); | ||
45 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]); | ||
46 | |||
47 | #ifndef CONFIG_USER_ONLY | ||
48 | if (kvm_enabled()) { | ||
49 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/target/arm/vfp_helper.c | ||
52 | +++ b/target/arm/vfp_helper.c | ||
53 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
54 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
55 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
56 | /* | ||
57 | - * We do not merge in flags from ah_fp_status or ah_fp_status_f16, because | ||
58 | + * We do not merge in flags from ah_fp_status or FPST_AH_F16, because | ||
59 | * they are used for insns that must not set the cumulative exception bits. | ||
60 | */ | ||
61 | |||
62 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
63 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
64 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
65 | set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
66 | - set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
67 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]); | ||
24 | } | 68 | } |
25 | 69 | ||
26 | extern void *pci_assign_dev_load_option_rom(PCIDevice *dev, | 70 | static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) |
27 | - struct Object *owner, int *size, | 71 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
28 | + int *size, | 72 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
29 | unsigned int domain, | 73 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); |
30 | unsigned int bus, unsigned int slot, | 74 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); |
31 | unsigned int function); | 75 | - set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); |
32 | diff --git a/hw/xen/xen_pt_graphics.c b/hw/xen/xen_pt_graphics.c | 76 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
33 | index XXXXXXX..XXXXXXX 100644 | 77 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
34 | --- a/hw/xen/xen_pt_graphics.c | 78 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); |
35 | +++ b/hw/xen/xen_pt_graphics.c | 79 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); |
36 | @@ -XXX,XX +XXX,XX @@ int xen_pt_unregister_vga_regions(XenHostPCIDevice *dev) | 80 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); |
37 | static void *get_vgabios(XenPCIPassthroughState *s, int *size, | 81 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
38 | XenHostPCIDevice *dev) | 82 | } |
39 | { | 83 | if (changed & FPCR_FZ) { |
40 | - return pci_assign_dev_load_option_rom(&s->dev, OBJECT(&s->dev), size, | 84 | bool ftz_enabled = val & FPCR_FZ; |
41 | + return pci_assign_dev_load_option_rom(&s->dev, size, | 85 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
42 | dev->domain, dev->bus, | 86 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); |
43 | dev->dev, dev->func); | 87 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); |
44 | } | 88 | set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); |
45 | diff --git a/hw/xen/xen_pt_load_rom.c b/hw/xen/xen_pt_load_rom.c | 89 | - set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status_f16); |
46 | index XXXXXXX..XXXXXXX 100644 | 90 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
47 | --- a/hw/xen/xen_pt_load_rom.c | 91 | } |
48 | +++ b/hw/xen/xen_pt_load_rom.c | 92 | if (changed & FPCR_AH) { |
49 | @@ -XXX,XX +XXX,XX @@ | 93 | bool ah_enabled = val & FPCR_AH; |
50 | * load the corresponding ROM data to RAM. If an error occurs while loading an | ||
51 | * option ROM, we just ignore that option ROM and continue with the next one. | ||
52 | */ | ||
53 | -void *pci_assign_dev_load_option_rom(PCIDevice *dev, struct Object *owner, | ||
54 | +void *pci_assign_dev_load_option_rom(PCIDevice *dev, | ||
55 | int *size, unsigned int domain, | ||
56 | unsigned int bus, unsigned int slot, | ||
57 | unsigned int function) | ||
58 | @@ -XXX,XX +XXX,XX @@ void *pci_assign_dev_load_option_rom(PCIDevice *dev, struct Object *owner, | ||
59 | uint8_t val; | ||
60 | struct stat st; | ||
61 | void *ptr = NULL; | ||
62 | + Object *owner = OBJECT(dev); | ||
63 | |||
64 | /* If loading ROM from file, pci handles it */ | ||
65 | if (dev->romfile || !dev->rom_bar) { | ||
66 | @@ -XXX,XX +XXX,XX @@ void *pci_assign_dev_load_option_rom(PCIDevice *dev, struct Object *owner, | ||
67 | fseek(fp, 0, SEEK_SET); | ||
68 | |||
69 | snprintf(name, sizeof(name), "%s.rom", object_get_typename(owner)); | ||
70 | - memory_region_init_ram_nomigrate(&dev->rom, owner, name, st.st_size, &error_abort); | ||
71 | - vmstate_register_ram(&dev->rom, &dev->qdev); | ||
72 | + memory_region_init_ram(&dev->rom, owner, name, st.st_size, &error_abort); | ||
73 | ptr = memory_region_get_ram_ptr(&dev->rom); | ||
74 | memset(ptr, 0xff, st.st_size); | ||
75 | |||
76 | -- | 94 | -- |
77 | 2.17.1 | 95 | 2.34.1 |
78 | 96 | ||
79 | 97 | diff view generated by jsdifflib |
1 | From: Eric Auger <eric.auger@redhat.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Add virt-3.0 machine type. | 3 | Replace with fp_status[FPST_AH]. |
4 | 4 | ||
5 | Signed-off-by: Eric Auger <eric.auger@redhat.com> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Reviewed-by: Laszlo Ersek <lersek@redhat.com> | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
7 | Reviewed-by: Andrew Jones <drjones@redhat.com> | 7 | Message-id: 20250129013857.135256-11-richard.henderson@linaro.org |
8 | Message-id: 1529072910-16156-10-git-send-email-eric.auger@redhat.com | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | --- | 9 | --- |
11 | hw/arm/virt.c | 15 +++++++++++++-- | 10 | target/arm/cpu.h | 3 +-- |
12 | 1 file changed, 13 insertions(+), 2 deletions(-) | 11 | target/arm/cpu.c | 6 +++--- |
12 | target/arm/vfp_helper.c | 6 +++--- | ||
13 | 3 files changed, 7 insertions(+), 8 deletions(-) | ||
13 | 14 | ||
14 | diff --git a/hw/arm/virt.c b/hw/arm/virt.c | 15 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
15 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/hw/arm/virt.c | 17 | --- a/target/arm/cpu.h |
17 | +++ b/hw/arm/virt.c | 18 | +++ b/target/arm/cpu.h |
18 | @@ -XXX,XX +XXX,XX @@ type_init(machvirt_machine_init); | 19 | @@ -XXX,XX +XXX,XX @@ typedef struct NVICState NVICState; |
19 | #define VIRT_COMPAT_2_12 \ | 20 | * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than |
20 | HW_COMPAT_2_12 | 21 | * using a fixed value for it. |
21 | 22 | * | |
22 | -static void virt_2_12_instance_init(Object *obj) | 23 | - * The ah_fp_status is needed because some insns have different |
23 | +static void virt_3_0_instance_init(Object *obj) | 24 | + * FPST_AH is needed because some insns have different |
24 | { | 25 | * behaviour when FPCR.AH == 1: they don't update cumulative |
25 | VirtMachineState *vms = VIRT_MACHINE(obj); | 26 | * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and |
26 | VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); | 27 | * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, |
27 | @@ -XXX,XX +XXX,XX @@ static void virt_2_12_instance_init(Object *obj) | 28 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
28 | vms->irqmap = a15irqmap; | 29 | float_status fp_status_a64; |
30 | float_status fp_status_f16_a32; | ||
31 | float_status fp_status_f16_a64; | ||
32 | - float_status ah_fp_status; | ||
33 | }; | ||
34 | }; | ||
35 | |||
36 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/arm/cpu.c | ||
39 | +++ b/target/arm/cpu.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
41 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
42 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
43 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
44 | - arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | ||
45 | - set_flush_to_zero(1, &env->vfp.ah_fp_status); | ||
46 | - set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | ||
47 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); | ||
48 | + set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]); | ||
49 | + set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_AH]); | ||
50 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]); | ||
51 | |||
52 | #ifndef CONFIG_USER_ONLY | ||
53 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
54 | index XXXXXXX..XXXXXXX 100644 | ||
55 | --- a/target/arm/vfp_helper.c | ||
56 | +++ b/target/arm/vfp_helper.c | ||
57 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
58 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
59 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
60 | /* | ||
61 | - * We do not merge in flags from ah_fp_status or FPST_AH_F16, because | ||
62 | + * We do not merge in flags from FPST_AH or FPST_AH_F16, because | ||
63 | * they are used for insns that must not set the cumulative exception bits. | ||
64 | */ | ||
65 | |||
66 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
67 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
68 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
69 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
70 | - set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
71 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]); | ||
72 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]); | ||
29 | } | 73 | } |
30 | 74 | ||
31 | +static void virt_machine_3_0_options(MachineClass *mc) | 75 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
32 | +{ | 76 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); |
33 | +} | 77 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); |
34 | +DEFINE_VIRT_MACHINE_AS_LATEST(3, 0) | 78 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); |
35 | + | 79 | - set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); |
36 | +static void virt_2_12_instance_init(Object *obj) | 80 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); |
37 | +{ | 81 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
38 | + virt_3_0_instance_init(obj); | 82 | } |
39 | +} | 83 | if (changed & FPCR_AH) { |
40 | + | ||
41 | static void virt_machine_2_12_options(MachineClass *mc) | ||
42 | { | ||
43 | + virt_machine_3_0_options(mc); | ||
44 | SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_12); | ||
45 | } | ||
46 | -DEFINE_VIRT_MACHINE_AS_LATEST(2, 12) | ||
47 | +DEFINE_VIRT_MACHINE(2, 12) | ||
48 | |||
49 | #define VIRT_COMPAT_2_11 \ | ||
50 | HW_COMPAT_2_11 | ||
51 | -- | 84 | -- |
52 | 2.17.1 | 85 | 2.34.1 |
53 | 86 | ||
54 | 87 | diff view generated by jsdifflib |
1 | From: "Edgar E. Iglesias" <edgar.iglesias@xilinx.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Add the Cortex-R5F with the optional FPU enabled. | 3 | Replace with fp_status[FPST_A64_F16]. |
4 | 4 | ||
5 | Reviewed-by: KONRAD Frederic <frederic.konrad@adacore.com> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
7 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 7 | Message-id: 20250129013857.135256-12-richard.henderson@linaro.org |
8 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
9 | Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com> | ||
10 | Message-id: 20180529124707.3025-2-edgar.iglesias@gmail.com | ||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
12 | --- | 9 | --- |
13 | target/arm/cpu.c | 9 +++++++++ | 10 | target/arm/cpu.h | 1 - |
14 | 1 file changed, 9 insertions(+) | 11 | target/arm/cpu.c | 2 +- |
12 | target/arm/tcg/sme_helper.c | 2 +- | ||
13 | target/arm/tcg/vec_helper.c | 9 ++++----- | ||
14 | target/arm/vfp_helper.c | 16 ++++++++-------- | ||
15 | 5 files changed, 14 insertions(+), 16 deletions(-) | ||
15 | 16 | ||
17 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/arm/cpu.h | ||
20 | +++ b/target/arm/cpu.h | ||
21 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
22 | float_status fp_status_a32; | ||
23 | float_status fp_status_a64; | ||
24 | float_status fp_status_f16_a32; | ||
25 | - float_status fp_status_f16_a64; | ||
26 | }; | ||
27 | }; | ||
28 | |||
16 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | 29 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
17 | index XXXXXXX..XXXXXXX 100644 | 30 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/target/arm/cpu.c | 31 | --- a/target/arm/cpu.c |
19 | +++ b/target/arm/cpu.c | 32 | +++ b/target/arm/cpu.c |
20 | @@ -XXX,XX +XXX,XX @@ static void cortex_r5_initfn(Object *obj) | 33 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
21 | define_arm_cp_regs(cpu, cortexr5_cp_reginfo); | 34 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); |
35 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
36 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
37 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
38 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
39 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
40 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); | ||
41 | set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]); | ||
42 | diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/arm/tcg/sme_helper.c | ||
45 | +++ b/target/arm/tcg/sme_helper.c | ||
46 | @@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, | ||
47 | * produces default NaNs. We also need a second copy of fp_status with | ||
48 | * round-to-odd -- see above. | ||
49 | */ | ||
50 | - fpst_f16 = env->vfp.fp_status_f16_a64; | ||
51 | + fpst_f16 = env->vfp.fp_status[FPST_A64_F16]; | ||
52 | fpst_std = env->vfp.fp_status_a64; | ||
53 | set_default_nan_mode(true, &fpst_std); | ||
54 | set_default_nan_mode(true, &fpst_f16); | ||
55 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/arm/tcg/vec_helper.c | ||
58 | +++ b/target/arm/tcg/vec_helper.c | ||
59 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
60 | } | ||
61 | } | ||
62 | do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
63 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); | ||
64 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
22 | } | 65 | } |
23 | 66 | ||
24 | +static void cortex_r5f_initfn(Object *obj) | 67 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
25 | +{ | 68 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
26 | + ARMCPU *cpu = ARM_CPU(obj); | 69 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
27 | + | 70 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
28 | + cortex_r5_initfn(obj); | 71 | float_status *status = &env->vfp.fp_status_a64; |
29 | + set_feature(&cpu->env, ARM_FEATURE_VFP3); | 72 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); |
30 | +} | 73 | + bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); |
31 | + | 74 | int negx = 0, negf = 0; |
32 | static const ARMCPRegInfo cortexa8_cp_reginfo[] = { | 75 | |
33 | { .name = "L2LOCKDOWN", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 0, | 76 | if (is_s) { |
34 | .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, | 77 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
35 | @@ -XXX,XX +XXX,XX @@ static const ARMCPUInfo arm_cpus[] = { | 78 | } |
36 | { .name = "cortex-m33", .initfn = cortex_m33_initfn, | 79 | } |
37 | .class_init = arm_v7m_class_init }, | 80 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, |
38 | { .name = "cortex-r5", .initfn = cortex_r5_initfn }, | 81 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); |
39 | + { .name = "cortex-r5f", .initfn = cortex_r5f_initfn }, | 82 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); |
40 | { .name = "cortex-a7", .initfn = cortex_a7_initfn }, | 83 | } |
41 | { .name = "cortex-a8", .initfn = cortex_a8_initfn }, | 84 | |
42 | { .name = "cortex-a9", .initfn = cortex_a9_initfn }, | 85 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, |
86 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
87 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
88 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | ||
89 | float_status *status = &env->vfp.fp_status_a64; | ||
90 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); | ||
91 | + bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
92 | int negx = 0, negf = 0; | ||
93 | |||
94 | if (is_s) { | ||
95 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
96 | negx = 0x8000; | ||
97 | } | ||
98 | } | ||
99 | - | ||
100 | for (i = 0; i < oprsz; i += 16) { | ||
101 | float16 mm_16 = *(float16 *)(vm + i + idx); | ||
102 | float32 mm = float16_to_float32_by_bits(mm_16, fz16); | ||
103 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
104 | index XXXXXXX..XXXXXXX 100644 | ||
105 | --- a/target/arm/vfp_helper.c | ||
106 | +++ b/target/arm/vfp_helper.c | ||
107 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
108 | & ~float_flag_input_denormal_flushed); | ||
109 | |||
110 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
111 | - a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
112 | + a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16]) | ||
113 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
114 | /* | ||
115 | * We do not merge in flags from FPST_AH or FPST_AH_F16, because | ||
116 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
117 | set_float_exception_flags(0, &env->vfp.fp_status_a32); | ||
118 | set_float_exception_flags(0, &env->vfp.fp_status_a64); | ||
119 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | ||
120 | - set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
121 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | ||
122 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
123 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
124 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]); | ||
125 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
126 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
127 | set_float_rounding_mode(i, &env->vfp.fp_status_a64); | ||
128 | set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32); | ||
129 | - set_float_rounding_mode(i, &env->vfp.fp_status_f16_a64); | ||
130 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
131 | } | ||
132 | if (changed & FPCR_FZ16) { | ||
133 | bool ftz_enabled = val & FPCR_FZ16; | ||
134 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
135 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
136 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
137 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
138 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
139 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
140 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
141 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
142 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
143 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
144 | } | ||
145 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
146 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
147 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
148 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
149 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
150 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
151 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | ||
152 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
153 | } | ||
154 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
155 | if (ah_enabled) { | ||
156 | /* Change behaviours for A64 FP operations */ | ||
157 | arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); | ||
158 | - arm_set_ah_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
159 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
160 | } else { | ||
161 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
162 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
163 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
164 | } | ||
165 | } | ||
166 | /* | ||
43 | -- | 167 | -- |
44 | 2.17.1 | 168 | 2.34.1 |
45 | 169 | ||
46 | 170 | diff view generated by jsdifflib |
1 | From: Eric Auger <eric.auger@redhat.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | This patch defines a new ECAM region located after the 256GB limit. | 3 | Replace with fp_status[FPST_A32_F16]. |
4 | 4 | ||
5 | The virt machine state is augmented with a new highmem_ecam field | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | which guards the usage of this new ECAM region instead of the legacy | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
7 | 16MB one. With the highmem ECAM region, up to 256 PCIe buses can be | 7 | Message-id: 20250129013857.135256-13-richard.henderson@linaro.org |
8 | used. | ||
9 | |||
10 | Signed-off-by: Eric Auger <eric.auger@redhat.com> | ||
11 | Reviewed-by: Laszlo Ersek <lersek@redhat.com> | ||
12 | Reviewed-by: Andrew Jones <drjones@redhat.com> | ||
13 | Message-id: 1529072910-16156-9-git-send-email-eric.auger@redhat.com | ||
14 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
15 | --- | 9 | --- |
16 | include/hw/arm/virt.h | 4 ++++ | 10 | target/arm/cpu.h | 1 - |
17 | hw/arm/virt-acpi-build.c | 21 +++++++++++++-------- | 11 | target/arm/cpu.c | 2 +- |
18 | hw/arm/virt.c | 12 ++++++++---- | 12 | target/arm/tcg/vec_helper.c | 4 ++-- |
19 | 3 files changed, 25 insertions(+), 12 deletions(-) | 13 | target/arm/vfp_helper.c | 14 +++++++------- |
14 | 4 files changed, 10 insertions(+), 11 deletions(-) | ||
20 | 15 | ||
21 | diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h | 16 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
22 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
23 | --- a/include/hw/arm/virt.h | 18 | --- a/target/arm/cpu.h |
24 | +++ b/include/hw/arm/virt.h | 19 | +++ b/target/arm/cpu.h |
25 | @@ -XXX,XX +XXX,XX @@ enum { | 20 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
26 | VIRT_PCIE_MMIO, | 21 | struct { |
27 | VIRT_PCIE_PIO, | 22 | float_status fp_status_a32; |
28 | VIRT_PCIE_ECAM, | 23 | float_status fp_status_a64; |
29 | + VIRT_PCIE_ECAM_HIGH, | 24 | - float_status fp_status_f16_a32; |
30 | VIRT_PLATFORM_BUS, | 25 | }; |
31 | VIRT_PCIE_MMIO_HIGH, | 26 | }; |
32 | VIRT_GPIO, | 27 | |
33 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 28 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
34 | FWCfgState *fw_cfg; | ||
35 | bool secure; | ||
36 | bool highmem; | ||
37 | + bool highmem_ecam; | ||
38 | bool its; | ||
39 | bool virt; | ||
40 | int32_t gic_version; | ||
41 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
42 | int psci_conduit; | ||
43 | } VirtMachineState; | ||
44 | |||
45 | +#define VIRT_ECAM_ID(high) (high ? VIRT_PCIE_ECAM_HIGH : VIRT_PCIE_ECAM) | ||
46 | + | ||
47 | #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") | ||
48 | #define VIRT_MACHINE(obj) \ | ||
49 | OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE) | ||
50 | diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c | ||
51 | index XXXXXXX..XXXXXXX 100644 | 29 | index XXXXXXX..XXXXXXX 100644 |
52 | --- a/hw/arm/virt-acpi-build.c | 30 | --- a/target/arm/cpu.c |
53 | +++ b/hw/arm/virt-acpi-build.c | 31 | +++ b/target/arm/cpu.c |
54 | @@ -XXX,XX +XXX,XX @@ static void acpi_dsdt_add_virtio(Aml *scope, | 32 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
33 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | ||
34 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
35 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
36 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
37 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); | ||
38 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
39 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
40 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); | ||
41 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/arm/tcg/vec_helper.c | ||
44 | +++ b/target/arm/tcg/vec_helper.c | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
46 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
47 | |||
48 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
49 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
50 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); | ||
55 | } | 51 | } |
56 | 52 | ||
57 | static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap, | 53 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
58 | - uint32_t irq, bool use_highmem) | 54 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, |
59 | + uint32_t irq, bool use_highmem, bool highmem_ecam) | 55 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; |
60 | { | 56 | |
61 | + int ecam_id = VIRT_ECAM_ID(highmem_ecam); | 57 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, |
62 | Aml *method, *crs, *ifctx, *UUID, *ifctx1, *elsectx, *buf; | 58 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); |
63 | int i, bus_no; | 59 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); |
64 | hwaddr base_mmio = memmap[VIRT_PCIE_MMIO].base; | 60 | } |
65 | hwaddr size_mmio = memmap[VIRT_PCIE_MMIO].size; | 61 | |
66 | hwaddr base_pio = memmap[VIRT_PCIE_PIO].base; | 62 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
67 | hwaddr size_pio = memmap[VIRT_PCIE_PIO].size; | 63 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
68 | - hwaddr base_ecam = memmap[VIRT_PCIE_ECAM].base; | ||
69 | - hwaddr size_ecam = memmap[VIRT_PCIE_ECAM].size; | ||
70 | + hwaddr base_ecam = memmap[ecam_id].base; | ||
71 | + hwaddr size_ecam = memmap[ecam_id].size; | ||
72 | int nr_pcie_buses = size_ecam / PCIE_MMCFG_SIZE_MIN; | ||
73 | |||
74 | Aml *dev = aml_device("%s", "PCI0"); | ||
75 | @@ -XXX,XX +XXX,XX @@ static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap, | ||
76 | aml_append(dev, aml_name_decl("_CCA", aml_int(1))); | ||
77 | |||
78 | /* Declare the PCI Routing Table. */ | ||
79 | - Aml *rt_pkg = aml_package(nr_pcie_buses * PCI_NUM_PINS); | ||
80 | + Aml *rt_pkg = aml_varpackage(nr_pcie_buses * PCI_NUM_PINS); | ||
81 | for (bus_no = 0; bus_no < nr_pcie_buses; bus_no++) { | ||
82 | for (i = 0; i < PCI_NUM_PINS; i++) { | ||
83 | int gsi = (i + bus_no) % PCI_NUM_PINS; | ||
84 | @@ -XXX,XX +XXX,XX @@ static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap, | ||
85 | Aml *dev_res0 = aml_device("%s", "RES0"); | ||
86 | aml_append(dev_res0, aml_name_decl("_HID", aml_string("PNP0C02"))); | ||
87 | crs = aml_resource_template(); | ||
88 | - aml_append(crs, aml_memory32_fixed(base_ecam, size_ecam, AML_READ_WRITE)); | ||
89 | + aml_append(crs, | ||
90 | + aml_qword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED, | ||
91 | + AML_NON_CACHEABLE, AML_READ_WRITE, 0x0000, base_ecam, | ||
92 | + base_ecam + size_ecam - 1, 0x0000, size_ecam)); | ||
93 | aml_append(dev_res0, aml_name_decl("_CRS", crs)); | ||
94 | aml_append(dev, dev_res0); | ||
95 | aml_append(scope, dev); | ||
96 | @@ -XXX,XX +XXX,XX @@ build_mcfg(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) | ||
97 | { | ||
98 | AcpiTableMcfg *mcfg; | ||
99 | const MemMapEntry *memmap = vms->memmap; | ||
100 | + int ecam_id = VIRT_ECAM_ID(vms->highmem_ecam); | ||
101 | int len = sizeof(*mcfg) + sizeof(mcfg->allocation[0]); | ||
102 | int mcfg_start = table_data->len; | ||
103 | |||
104 | mcfg = acpi_data_push(table_data, len); | ||
105 | - mcfg->allocation[0].address = cpu_to_le64(memmap[VIRT_PCIE_ECAM].base); | ||
106 | + mcfg->allocation[0].address = cpu_to_le64(memmap[ecam_id].base); | ||
107 | |||
108 | /* Only a single allocation so no need to play with segments */ | ||
109 | mcfg->allocation[0].pci_segment = cpu_to_le16(0); | ||
110 | mcfg->allocation[0].start_bus_number = 0; | ||
111 | - mcfg->allocation[0].end_bus_number = (memmap[VIRT_PCIE_ECAM].size | ||
112 | + mcfg->allocation[0].end_bus_number = (memmap[ecam_id].size | ||
113 | / PCIE_MMCFG_SIZE_MIN) - 1; | ||
114 | |||
115 | build_header(linker, table_data, (void *)(table_data->data + mcfg_start), | ||
116 | @@ -XXX,XX +XXX,XX @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) | ||
117 | acpi_dsdt_add_virtio(scope, &memmap[VIRT_MMIO], | ||
118 | (irqmap[VIRT_MMIO] + ARM_SPI_BASE), NUM_VIRTIO_TRANSPORTS); | ||
119 | acpi_dsdt_add_pci(scope, memmap, (irqmap[VIRT_PCIE] + ARM_SPI_BASE), | ||
120 | - vms->highmem); | ||
121 | + vms->highmem, vms->highmem_ecam); | ||
122 | acpi_dsdt_add_gpio(scope, &memmap[VIRT_GPIO], | ||
123 | (irqmap[VIRT_GPIO] + ARM_SPI_BASE)); | ||
124 | acpi_dsdt_add_power_button(scope); | ||
125 | diff --git a/hw/arm/virt.c b/hw/arm/virt.c | ||
126 | index XXXXXXX..XXXXXXX 100644 | 64 | index XXXXXXX..XXXXXXX 100644 |
127 | --- a/hw/arm/virt.c | 65 | --- a/target/arm/vfp_helper.c |
128 | +++ b/hw/arm/virt.c | 66 | +++ b/target/arm/vfp_helper.c |
129 | @@ -XXX,XX +XXX,XX @@ static const MemMapEntry a15memmap[] = { | 67 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) |
130 | [VIRT_MEM] = { 0x40000000, RAMLIMIT_BYTES }, | 68 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); |
131 | /* Additional 64 MB redist region (can contain up to 512 redistributors) */ | 69 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); |
132 | [VIRT_GIC_REDIST2] = { 0x4000000000ULL, 0x4000000 }, | 70 | /* FZ16 does not generate an input denormal exception. */ |
133 | + [VIRT_PCIE_ECAM_HIGH] = { 0x4010000000ULL, 0x10000000 }, | 71 | - a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) |
134 | /* Second PCIe window, 512GB wide at the 512GB boundary */ | 72 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16]) |
135 | [VIRT_PCIE_MMIO_HIGH] = { 0x8000000000ULL, 0x8000000000ULL }, | 73 | & ~float_flag_input_denormal_flushed); |
136 | }; | 74 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) |
137 | @@ -XXX,XX +XXX,XX @@ static void create_pcie(VirtMachineState *vms, qemu_irq *pic) | 75 | & ~float_flag_input_denormal_flushed); |
138 | hwaddr size_mmio_high = vms->memmap[VIRT_PCIE_MMIO_HIGH].size; | 76 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) |
139 | hwaddr base_pio = vms->memmap[VIRT_PCIE_PIO].base; | 77 | */ |
140 | hwaddr size_pio = vms->memmap[VIRT_PCIE_PIO].size; | 78 | set_float_exception_flags(0, &env->vfp.fp_status_a32); |
141 | - hwaddr base_ecam = vms->memmap[VIRT_PCIE_ECAM].base; | 79 | set_float_exception_flags(0, &env->vfp.fp_status_a64); |
142 | - hwaddr size_ecam = vms->memmap[VIRT_PCIE_ECAM].size; | 80 | - set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); |
143 | + hwaddr base_ecam, size_ecam; | 81 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); |
144 | hwaddr base = base_mmio; | 82 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); |
145 | - int nr_pcie_buses = size_ecam / PCIE_MMCFG_SIZE_MIN; | 83 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); |
146 | + int nr_pcie_buses; | 84 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); |
147 | int irq = vms->irqmap[VIRT_PCIE]; | 85 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
148 | MemoryRegion *mmio_alias; | 86 | } |
149 | MemoryRegion *mmio_reg; | 87 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); |
150 | @@ -XXX,XX +XXX,XX @@ static void create_pcie(VirtMachineState *vms, qemu_irq *pic) | 88 | set_float_rounding_mode(i, &env->vfp.fp_status_a64); |
151 | MemoryRegion *ecam_reg; | 89 | - set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32); |
152 | DeviceState *dev; | 90 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); |
153 | char *nodename; | 91 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); |
154 | - int i; | 92 | } |
155 | + int i, ecam_id; | 93 | if (changed & FPCR_FZ16) { |
156 | PCIHostState *pci; | 94 | bool ftz_enabled = val & FPCR_FZ16; |
157 | 95 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | |
158 | dev = qdev_create(NULL, TYPE_GPEX_HOST); | 96 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); |
159 | qdev_init_nofail(dev); | 97 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); |
160 | 98 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | |
161 | + ecam_id = VIRT_ECAM_ID(vms->highmem_ecam); | 99 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
162 | + base_ecam = vms->memmap[ecam_id].base; | 100 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); |
163 | + size_ecam = vms->memmap[ecam_id].size; | 101 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); |
164 | + nr_pcie_buses = size_ecam / PCIE_MMCFG_SIZE_MIN; | 102 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); |
165 | /* Map only the first size_ecam bytes of ECAM space */ | 103 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); |
166 | ecam_alias = g_new0(MemoryRegion, 1); | 104 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); |
167 | ecam_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0); | 105 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) |
106 | bool dnan_enabled = val & FPCR_DN; | ||
107 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
108 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
109 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
110 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
111 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
112 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | ||
113 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
114 | @@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ | ||
115 | softfloat_to_vfp_compare(env, \ | ||
116 | FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ | ||
117 | } | ||
118 | -DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16_a32) | ||
119 | +DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) | ||
120 | DO_VFP_cmp(s, float32, float32, fp_status_a32) | ||
121 | DO_VFP_cmp(d, float64, float64, fp_status_a32) | ||
122 | #undef DO_VFP_cmp | ||
168 | -- | 123 | -- |
169 | 2.17.1 | 124 | 2.34.1 |
170 | 125 | ||
171 | 126 | diff view generated by jsdifflib |
1 | From: Eric Auger <eric.auger@redhat.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | With a VGICv3 KVM device, if the number of vcpus exceeds the | 3 | Replace with fp_status[FPST_A64]. |
4 | capacity of the legacy redistributor region (123 redistributors), | ||
5 | we now attempt to register a second redistributor region. Up to | ||
6 | 512 redistributors can fit in this latter on top of the 123 allowed | ||
7 | by the legacy redistributor region. | ||
8 | 4 | ||
9 | Registering this second redistributor region is possible if the | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
10 | host kernel supports the following VGICv3 KVM device group/attribute: | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
11 | KVM_DEV_ARM_VGIC_GRP_ADDR/KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION. | 7 | Message-id: 20250129013857.135256-14-richard.henderson@linaro.org |
12 | |||
13 | In case the host kernel does not support the registration of several | ||
14 | redistributor regions and the requested number of vcpus exceeds the | ||
15 | capacity of the legacy redistributor region, the GICv3 device | ||
16 | initialization fails with a proper error message and qemu exits. | ||
17 | |||
18 | At the moment the max number of vcpus still is capped by the | ||
19 | virt machine class max_cpus. | ||
20 | |||
21 | Signed-off-by: Eric Auger <eric.auger@redhat.com> | ||
22 | Reviewed-by: Andrew Jones <drjones@redhat.com> | ||
23 | Message-id: 1529072910-16156-8-git-send-email-eric.auger@redhat.com | ||
24 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
25 | --- | 9 | --- |
26 | hw/arm/virt.c | 18 +++++++++++++++++- | 10 | target/arm/cpu.h | 1 - |
27 | 1 file changed, 17 insertions(+), 1 deletion(-) | 11 | target/arm/cpu.c | 2 +- |
12 | target/arm/tcg/sme_helper.c | 2 +- | ||
13 | target/arm/tcg/vec_helper.c | 10 +++++----- | ||
14 | target/arm/vfp_helper.c | 16 ++++++++-------- | ||
15 | 5 files changed, 15 insertions(+), 16 deletions(-) | ||
28 | 16 | ||
29 | diff --git a/hw/arm/virt.c b/hw/arm/virt.c | 17 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
30 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
31 | --- a/hw/arm/virt.c | 19 | --- a/target/arm/cpu.h |
32 | +++ b/hw/arm/virt.c | 20 | +++ b/target/arm/cpu.h |
33 | @@ -XXX,XX +XXX,XX @@ static void create_gic(VirtMachineState *vms, qemu_irq *pic) | 21 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
34 | SysBusDevice *gicbusdev; | 22 | float_status fp_status[FPST_COUNT]; |
35 | const char *gictype; | 23 | struct { |
36 | int type = vms->gic_version, i; | 24 | float_status fp_status_a32; |
37 | + uint32_t nb_redist_regions = 0; | 25 | - float_status fp_status_a64; |
38 | 26 | }; | |
39 | gictype = (type == 3) ? gicv3_class_name() : gic_class_name(); | 27 | }; |
40 | 28 | ||
41 | @@ -XXX,XX +XXX,XX @@ static void create_gic(VirtMachineState *vms, qemu_irq *pic) | 29 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
42 | vms->memmap[VIRT_GIC_REDIST].size / GICV3_REDIST_SIZE; | 30 | index XXXXXXX..XXXXXXX 100644 |
43 | uint32_t redist0_count = MIN(smp_cpus, redist0_capacity); | 31 | --- a/target/arm/cpu.c |
44 | 32 | +++ b/target/arm/cpu.c | |
45 | - qdev_prop_set_uint32(gicdev, "len-redist-region-count", 1); | 33 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
46 | + nb_redist_regions = virt_gicv3_redist_region_count(vms); | 34 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); |
47 | + | 35 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); |
48 | + qdev_prop_set_uint32(gicdev, "len-redist-region-count", | 36 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); |
49 | + nb_redist_regions); | 37 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); |
50 | qdev_prop_set_uint32(gicdev, "redist-region-count[0]", redist0_count); | 38 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); |
51 | + | 39 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); |
52 | + if (nb_redist_regions == 2) { | 40 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); |
53 | + uint32_t redist1_capacity = | 41 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); |
54 | + vms->memmap[VIRT_GIC_REDIST2].size / GICV3_REDIST_SIZE; | 42 | diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c |
55 | + | 43 | index XXXXXXX..XXXXXXX 100644 |
56 | + qdev_prop_set_uint32(gicdev, "redist-region-count[1]", | 44 | --- a/target/arm/tcg/sme_helper.c |
57 | + MIN(smp_cpus - redist0_count, redist1_capacity)); | 45 | +++ b/target/arm/tcg/sme_helper.c |
58 | + } | 46 | @@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, |
47 | * round-to-odd -- see above. | ||
48 | */ | ||
49 | fpst_f16 = env->vfp.fp_status[FPST_A64_F16]; | ||
50 | - fpst_std = env->vfp.fp_status_a64; | ||
51 | + fpst_std = env->vfp.fp_status[FPST_A64]; | ||
52 | set_default_nan_mode(true, &fpst_std); | ||
53 | set_default_nan_mode(true, &fpst_f16); | ||
54 | fpst_odd = fpst_std; | ||
55 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/arm/tcg/vec_helper.c | ||
58 | +++ b/target/arm/tcg/vec_helper.c | ||
59 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
60 | negx = 0x8000800080008000ull; | ||
61 | } | ||
59 | } | 62 | } |
60 | qdev_init_nofail(gicdev); | 63 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, |
61 | gicbusdev = SYS_BUS_DEVICE(gicdev); | 64 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, |
62 | sysbus_mmio_map(gicbusdev, 0, vms->memmap[VIRT_GIC_DIST].base); | 65 | get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); |
63 | if (type == 3) { | 66 | } |
64 | sysbus_mmio_map(gicbusdev, 1, vms->memmap[VIRT_GIC_REDIST].base); | 67 | |
65 | + if (nb_redist_regions == 2) { | 68 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
66 | + sysbus_mmio_map(gicbusdev, 2, vms->memmap[VIRT_GIC_REDIST2].base); | 69 | intptr_t i, oprsz = simd_oprsz(desc); |
67 | + } | 70 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
68 | } else { | 71 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
69 | sysbus_mmio_map(gicbusdev, 1, vms->memmap[VIRT_GIC_CPU].base); | 72 | - float_status *status = &env->vfp.fp_status_a64; |
73 | + float_status *status = &env->vfp.fp_status[FPST_A64]; | ||
74 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
75 | int negx = 0, negf = 0; | ||
76 | |||
77 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
78 | negx = 0x8000800080008000ull; | ||
79 | } | ||
70 | } | 80 | } |
71 | @@ -XXX,XX +XXX,XX @@ static void machvirt_init(MachineState *machine) | 81 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, |
82 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
83 | get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
84 | } | ||
85 | |||
86 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
87 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
88 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
89 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | ||
90 | - float_status *status = &env->vfp.fp_status_a64; | ||
91 | + float_status *status = &env->vfp.fp_status[FPST_A64]; | ||
92 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
93 | int negx = 0, negf = 0; | ||
94 | |||
95 | @@ -XXX,XX +XXX,XX @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp) | ||
72 | */ | 96 | */ |
73 | if (vms->gic_version == 3) { | 97 | bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF; |
74 | virt_max_cpus = vms->memmap[VIRT_GIC_REDIST].size / GICV3_REDIST_SIZE; | 98 | |
75 | + virt_max_cpus += vms->memmap[VIRT_GIC_REDIST2].size / GICV3_REDIST_SIZE; | 99 | - *statusp = is_a64(env) ? env->vfp.fp_status_a64 : env->vfp.fp_status_a32; |
76 | } else { | 100 | + *statusp = is_a64(env) ? env->vfp.fp_status[FPST_A64] : env->vfp.fp_status_a32; |
77 | virt_max_cpus = GIC_NCPU; | 101 | set_default_nan_mode(true, statusp); |
102 | |||
103 | if (ebf) { | ||
104 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
105 | index XXXXXXX..XXXXXXX 100644 | ||
106 | --- a/target/arm/vfp_helper.c | ||
107 | +++ b/target/arm/vfp_helper.c | ||
108 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
109 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) | ||
110 | & ~float_flag_input_denormal_flushed); | ||
111 | |||
112 | - a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
113 | + a64_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A64]); | ||
114 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16]) | ||
115 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
116 | /* | ||
117 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
118 | * be the architecturally up-to-date exception flag information first. | ||
119 | */ | ||
120 | set_float_exception_flags(0, &env->vfp.fp_status_a32); | ||
121 | - set_float_exception_flags(0, &env->vfp.fp_status_a64); | ||
122 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]); | ||
123 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); | ||
124 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | ||
125 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
126 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
127 | break; | ||
128 | } | ||
129 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
130 | - set_float_rounding_mode(i, &env->vfp.fp_status_a64); | ||
131 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]); | ||
132 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); | ||
133 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
134 | } | ||
135 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
136 | if (changed & FPCR_FZ) { | ||
137 | bool ftz_enabled = val & FPCR_FZ; | ||
138 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
139 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64); | ||
140 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]); | ||
141 | /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ | ||
142 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
143 | } | ||
144 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
145 | */ | ||
146 | bool fitz_enabled = (val & FPCR_FIZ) || | ||
147 | (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ; | ||
148 | - set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status_a64); | ||
149 | + set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_A64]); | ||
150 | } | ||
151 | if (changed & FPCR_DN) { | ||
152 | bool dnan_enabled = val & FPCR_DN; | ||
153 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
154 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
155 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]); | ||
156 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
157 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
158 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | ||
159 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
160 | |||
161 | if (ah_enabled) { | ||
162 | /* Change behaviours for A64 FP operations */ | ||
163 | - arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); | ||
164 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | ||
165 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
166 | } else { | ||
167 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
168 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | ||
169 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
170 | } | ||
78 | } | 171 | } |
79 | -- | 172 | -- |
80 | 2.17.1 | 173 | 2.34.1 |
81 | 174 | ||
82 | 175 | diff view generated by jsdifflib |
1 | From: Eric Auger <eric.auger@redhat.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | virt 3.0 now allows up to 512 vcpus whereas for earlier machine | 3 | Replace with fp_status[FPST_A32]. As this was the last of the |
4 | types, max_cpus was set to 255 and any attempt to start the | 4 | old structures, we can remove the anonymous union and struct. |
5 | machine with vcpus > 255 was rejected at a very early stage, | ||
6 | in vl.c/main level. | ||
7 | 5 | ||
8 | 512 is the max supported by KVM. Anyway the actual vcpu count | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
9 | that can be achieved depends on other parameters such as the | 7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
10 | acceleration mode, the vgic version, the host kernel version. | 8 | Message-id: 20250129013857.135256-15-richard.henderson@linaro.org |
11 | Those are discovered later on. | 9 | [PMM: tweak to account for change to is_ebf()] |
12 | |||
13 | Signed-off-by: Eric Auger <eric.auger@redhat.com> | ||
14 | Reviewed-by: Andrew Jones <drjones@redhat.com> | ||
15 | Message-id: 1529072910-16156-12-git-send-email-eric.auger@redhat.com | ||
16 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
17 | --- | 11 | --- |
18 | hw/arm/virt.c | 7 ++++--- | 12 | target/arm/cpu.h | 7 +------ |
19 | 1 file changed, 4 insertions(+), 3 deletions(-) | 13 | target/arm/cpu.c | 2 +- |
14 | target/arm/tcg/vec_helper.c | 2 +- | ||
15 | target/arm/vfp_helper.c | 18 +++++++++--------- | ||
16 | 4 files changed, 12 insertions(+), 17 deletions(-) | ||
20 | 17 | ||
21 | diff --git a/hw/arm/virt.c b/hw/arm/virt.c | 18 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
22 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
23 | --- a/hw/arm/virt.c | 20 | --- a/target/arm/cpu.h |
24 | +++ b/hw/arm/virt.c | 21 | +++ b/target/arm/cpu.h |
25 | @@ -XXX,XX +XXX,XX @@ static void virt_machine_class_init(ObjectClass *oc, void *data) | 22 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
26 | HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); | 23 | uint32_t scratch[8]; |
27 | 24 | ||
28 | mc->init = machvirt_init; | 25 | /* There are a number of distinct float control structures. */ |
29 | - /* Start max_cpus at the maximum QEMU supports. We'll further restrict | 26 | - union { |
30 | - * it later in machvirt_init, where we have more information about the | 27 | - float_status fp_status[FPST_COUNT]; |
31 | + /* Start with max_cpus set to 512, which is the maximum supported by KVM. | 28 | - struct { |
32 | + * The value may be reduced later when we have more information about the | 29 | - float_status fp_status_a32; |
33 | * configuration of the particular instance. | 30 | - }; |
31 | - }; | ||
32 | + float_status fp_status[FPST_COUNT]; | ||
33 | |||
34 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ | ||
35 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ | ||
36 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/arm/cpu.c | ||
39 | +++ b/target/arm/cpu.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
41 | set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]); | ||
42 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); | ||
43 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); | ||
44 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | ||
45 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32]); | ||
46 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | ||
47 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
48 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); | ||
49 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/target/arm/tcg/vec_helper.c | ||
52 | +++ b/target/arm/tcg/vec_helper.c | ||
53 | @@ -XXX,XX +XXX,XX @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp) | ||
34 | */ | 54 | */ |
35 | - mc->max_cpus = 255; | 55 | bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF; |
36 | + mc->max_cpus = 512; | 56 | |
37 | machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_CALXEDA_XGMAC); | 57 | - *statusp = is_a64(env) ? env->vfp.fp_status[FPST_A64] : env->vfp.fp_status_a32; |
38 | machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE); | 58 | + *statusp = env->vfp.fp_status[is_a64(env) ? FPST_A64 : FPST_A32]; |
39 | machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE); | 59 | set_default_nan_mode(true, statusp); |
40 | @@ -XXX,XX +XXX,XX @@ static void virt_machine_2_12_options(MachineClass *mc) | 60 | |
41 | virt_machine_3_0_options(mc); | 61 | if (ebf) { |
42 | SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_12); | 62 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
43 | vmc->no_highmem_ecam = true; | 63 | index XXXXXXX..XXXXXXX 100644 |
44 | + mc->max_cpus = 255; | 64 | --- a/target/arm/vfp_helper.c |
65 | +++ b/target/arm/vfp_helper.c | ||
66 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
67 | { | ||
68 | uint32_t a32_flags = 0, a64_flags = 0; | ||
69 | |||
70 | - a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
71 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A32]); | ||
72 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); | ||
73 | /* FZ16 does not generate an input denormal exception. */ | ||
74 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16]) | ||
75 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
76 | * values. The caller should have arranged for env->vfp.fpsr to | ||
77 | * be the architecturally up-to-date exception flag information first. | ||
78 | */ | ||
79 | - set_float_exception_flags(0, &env->vfp.fp_status_a32); | ||
80 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32]); | ||
81 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]); | ||
82 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); | ||
83 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | ||
84 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
85 | i = float_round_to_zero; | ||
86 | break; | ||
87 | } | ||
88 | - set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
89 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32]); | ||
90 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]); | ||
91 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); | ||
92 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
93 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
94 | } | ||
95 | if (changed & FPCR_FZ) { | ||
96 | bool ftz_enabled = val & FPCR_FZ; | ||
97 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
98 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); | ||
99 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]); | ||
100 | /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ | ||
101 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
102 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); | ||
103 | } | ||
104 | if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { | ||
105 | /* | ||
106 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
107 | } | ||
108 | if (changed & FPCR_DN) { | ||
109 | bool dnan_enabled = val & FPCR_DN; | ||
110 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
111 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32]); | ||
112 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]); | ||
113 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
114 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
115 | @@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ | ||
116 | FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ | ||
45 | } | 117 | } |
46 | DEFINE_VIRT_MACHINE(2, 12) | 118 | DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) |
119 | -DO_VFP_cmp(s, float32, float32, fp_status_a32) | ||
120 | -DO_VFP_cmp(d, float64, float64, fp_status_a32) | ||
121 | +DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32]) | ||
122 | +DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32]) | ||
123 | #undef DO_VFP_cmp | ||
124 | |||
125 | /* Integer to float and float to integer conversions */ | ||
126 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(fjcvtzs)(float64 value, float_status *status) | ||
127 | |||
128 | uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env) | ||
129 | { | ||
130 | - uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status_a32); | ||
131 | + uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status[FPST_A32]); | ||
132 | uint32_t result = pair; | ||
133 | uint32_t z = (pair >> 32) == 0; | ||
47 | 134 | ||
48 | -- | 135 | -- |
49 | 2.17.1 | 136 | 2.34.1 |
50 | 137 | ||
51 | 138 | diff view generated by jsdifflib |
1 | From: Eric Auger <eric.auger@redhat.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | This patch allows the creation of a GICv3 node with 1 or 2 | 3 | Select on index instead of pointer. |
4 | redistributor regions depending on the number of smu_cpus. | 4 | No functional change. |
5 | The second redistributor region is located just after the | ||
6 | existing RAM region, at 256GB and contains up to up to 512 vcpus. | ||
7 | 5 | ||
8 | Please refer to kernel documentation for further node details: | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
9 | Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt | 7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
10 | 8 | Message-id: 20250129013857.135256-16-richard.henderson@linaro.org | |
11 | Signed-off-by: Eric Auger <eric.auger@redhat.com> | ||
12 | Reviewed-by: Andrew Jones <drjones@redhat.com> | ||
13 | Message-id: 1529072910-16156-6-git-send-email-eric.auger@redhat.com | ||
14 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
15 | --- | 10 | --- |
16 | include/hw/arm/virt.h | 14 ++++++++++++++ | 11 | target/arm/tcg/mve_helper.c | 40 +++++++++++++------------------------ |
17 | hw/arm/virt.c | 29 ++++++++++++++++++++++++----- | 12 | 1 file changed, 14 insertions(+), 26 deletions(-) |
18 | 2 files changed, 38 insertions(+), 5 deletions(-) | ||
19 | 13 | ||
20 | diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h | 14 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c |
21 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/include/hw/arm/virt.h | 16 | --- a/target/arm/tcg/mve_helper.c |
23 | +++ b/include/hw/arm/virt.h | 17 | +++ b/target/arm/tcg/mve_helper.c |
24 | @@ -XXX,XX +XXX,XX @@ | 18 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) |
25 | #include "qemu/notify.h" | 19 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ |
26 | #include "hw/boards.h" | 20 | continue; \ |
27 | #include "hw/arm/arm.h" | 21 | } \ |
28 | +#include "sysemu/kvm.h" | 22 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
29 | +#include "hw/intc/arm_gicv3_common.h" | 23 | - &env->vfp.fp_status[FPST_STD]; \ |
30 | 24 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | |
31 | #define NUM_GICV2M_SPIS 64 | 25 | if (!(mask & 1)) { \ |
32 | #define NUM_VIRTIO_TRANSPORTS 32 | 26 | /* We need the result but without updating flags */ \ |
33 | @@ -XXX,XX +XXX,XX @@ enum { | 27 | scratch_fpst = *fpst; \ |
34 | VIRT_GIC_V2M, | 28 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) |
35 | VIRT_GIC_ITS, | 29 | r[e] = 0; \ |
36 | VIRT_GIC_REDIST, | 30 | continue; \ |
37 | + VIRT_GIC_REDIST2, | 31 | } \ |
38 | VIRT_SMMU, | 32 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
39 | VIRT_UART, | 33 | - &env->vfp.fp_status[FPST_STD]; \ |
40 | VIRT_MMIO, | 34 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
41 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 35 | if (!(tm & 1)) { \ |
42 | 36 | /* We need the result but without updating flags */ \ | |
43 | void virt_acpi_setup(VirtMachineState *vms); | 37 | scratch_fpst = *fpst; \ |
44 | 38 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) | |
45 | +/* Return the number of used redistributor regions */ | 39 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ |
46 | +static inline int virt_gicv3_redist_region_count(VirtMachineState *vms) | 40 | continue; \ |
47 | +{ | 41 | } \ |
48 | + uint32_t redist0_capacity = | 42 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
49 | + vms->memmap[VIRT_GIC_REDIST].size / GICV3_REDIST_SIZE; | 43 | - &env->vfp.fp_status[FPST_STD]; \ |
50 | + | 44 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
51 | + assert(vms->gic_version == 3); | 45 | if (!(mask & 1)) { \ |
52 | + | 46 | /* We need the result but without updating flags */ \ |
53 | + return vms->smp_cpus > redist0_capacity ? 2 : 1; | 47 | scratch_fpst = *fpst; \ |
54 | +} | 48 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) |
55 | + | 49 | if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \ |
56 | #endif /* QEMU_ARM_VIRT_H */ | 50 | continue; \ |
57 | diff --git a/hw/arm/virt.c b/hw/arm/virt.c | 51 | } \ |
58 | index XXXXXXX..XXXXXXX 100644 | 52 | - fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
59 | --- a/hw/arm/virt.c | 53 | - &env->vfp.fp_status[FPST_STD]; \ |
60 | +++ b/hw/arm/virt.c | 54 | + fpst0 = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
61 | @@ -XXX,XX +XXX,XX @@ static const MemMapEntry a15memmap[] = { | 55 | fpst1 = fpst0; \ |
62 | [VIRT_PCIE_PIO] = { 0x3eff0000, 0x00010000 }, | 56 | if (!(mask & 1)) { \ |
63 | [VIRT_PCIE_ECAM] = { 0x3f000000, 0x01000000 }, | 57 | scratch_fpst = *fpst0; \ |
64 | [VIRT_MEM] = { 0x40000000, RAMLIMIT_BYTES }, | 58 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) |
65 | + /* Additional 64 MB redist region (can contain up to 512 redistributors) */ | 59 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ |
66 | + [VIRT_GIC_REDIST2] = { 0x4000000000ULL, 0x4000000 }, | 60 | continue; \ |
67 | /* Second PCIe window, 512GB wide at the 512GB boundary */ | 61 | } \ |
68 | [VIRT_PCIE_MMIO_HIGH] = { 0x8000000000ULL, 0x8000000000ULL }, | 62 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
69 | }; | 63 | - &env->vfp.fp_status[FPST_STD]; \ |
70 | @@ -XXX,XX +XXX,XX @@ static void fdt_add_gic_node(VirtMachineState *vms) | 64 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
71 | qemu_fdt_setprop_cell(vms->fdt, "/intc", "#size-cells", 0x2); | 65 | if (!(mask & 1)) { \ |
72 | qemu_fdt_setprop(vms->fdt, "/intc", "ranges", NULL, 0); | 66 | /* We need the result but without updating flags */ \ |
73 | if (vms->gic_version == 3) { | 67 | scratch_fpst = *fpst; \ |
74 | + int nb_redist_regions = virt_gicv3_redist_region_count(vms); | 68 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) |
75 | + | 69 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ |
76 | qemu_fdt_setprop_string(vms->fdt, "/intc", "compatible", | 70 | continue; \ |
77 | "arm,gic-v3"); | 71 | } \ |
78 | - qemu_fdt_setprop_sized_cells(vms->fdt, "/intc", "reg", | 72 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
79 | - 2, vms->memmap[VIRT_GIC_DIST].base, | 73 | - &env->vfp.fp_status[FPST_STD]; \ |
80 | - 2, vms->memmap[VIRT_GIC_DIST].size, | 74 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
81 | - 2, vms->memmap[VIRT_GIC_REDIST].base, | 75 | if (!(mask & 1)) { \ |
82 | - 2, vms->memmap[VIRT_GIC_REDIST].size); | 76 | /* We need the result but without updating flags */ \ |
83 | + | 77 | scratch_fpst = *fpst; \ |
84 | + qemu_fdt_setprop_cell(vms->fdt, "/intc", | 78 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) |
85 | + "#redistributor-regions", nb_redist_regions); | 79 | unsigned e; \ |
86 | + | 80 | TYPE *m = vm; \ |
87 | + if (nb_redist_regions == 1) { | 81 | TYPE ra = (TYPE)ra_in; \ |
88 | + qemu_fdt_setprop_sized_cells(vms->fdt, "/intc", "reg", | 82 | - float_status *fpst = (ESIZE == 2) ? \ |
89 | + 2, vms->memmap[VIRT_GIC_DIST].base, | 83 | - &env->vfp.fp_status[FPST_STD_F16] : \ |
90 | + 2, vms->memmap[VIRT_GIC_DIST].size, | 84 | - &env->vfp.fp_status[FPST_STD]; \ |
91 | + 2, vms->memmap[VIRT_GIC_REDIST].base, | 85 | + float_status *fpst = \ |
92 | + 2, vms->memmap[VIRT_GIC_REDIST].size); | 86 | + &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
93 | + } else { | 87 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ |
94 | + qemu_fdt_setprop_sized_cells(vms->fdt, "/intc", "reg", | 88 | if (mask & 1) { \ |
95 | + 2, vms->memmap[VIRT_GIC_DIST].base, | 89 | TYPE v = m[H##ESIZE(e)]; \ |
96 | + 2, vms->memmap[VIRT_GIC_DIST].size, | 90 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) |
97 | + 2, vms->memmap[VIRT_GIC_REDIST].base, | 91 | if ((mask & emask) == 0) { \ |
98 | + 2, vms->memmap[VIRT_GIC_REDIST].size, | 92 | continue; \ |
99 | + 2, vms->memmap[VIRT_GIC_REDIST2].base, | 93 | } \ |
100 | + 2, vms->memmap[VIRT_GIC_REDIST2].size); | 94 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
101 | + } | 95 | - &env->vfp.fp_status[FPST_STD]; \ |
102 | + | 96 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
103 | if (vms->virt) { | 97 | if (!(mask & (1 << (e * ESIZE)))) { \ |
104 | qemu_fdt_setprop_cells(vms->fdt, "/intc", "interrupts", | 98 | /* We need the result but without updating flags */ \ |
105 | GIC_FDT_IRQ_TYPE_PPI, ARCH_GICV3_MAINT_IRQ, | 99 | scratch_fpst = *fpst; \ |
100 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
101 | if ((mask & emask) == 0) { \ | ||
102 | continue; \ | ||
103 | } \ | ||
104 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
105 | - &env->vfp.fp_status[FPST_STD]; \ | ||
106 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
107 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
108 | /* We need the result but without updating flags */ \ | ||
109 | scratch_fpst = *fpst; \ | ||
110 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) | ||
111 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
112 | continue; \ | ||
113 | } \ | ||
114 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
115 | - &env->vfp.fp_status[FPST_STD]; \ | ||
116 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
117 | if (!(mask & 1)) { \ | ||
118 | /* We need the result but without updating flags */ \ | ||
119 | scratch_fpst = *fpst; \ | ||
120 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) | ||
121 | unsigned e; \ | ||
122 | float_status *fpst; \ | ||
123 | float_status scratch_fpst; \ | ||
124 | - float_status *base_fpst = (ESIZE == 2) ? \ | ||
125 | - &env->vfp.fp_status[FPST_STD_F16] : \ | ||
126 | - &env->vfp.fp_status[FPST_STD]; \ | ||
127 | + float_status *base_fpst = \ | ||
128 | + &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
129 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ | ||
130 | set_float_rounding_mode(rmode, base_fpst); \ | ||
131 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
132 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) | ||
133 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
134 | continue; \ | ||
135 | } \ | ||
136 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
137 | - &env->vfp.fp_status[FPST_STD]; \ | ||
138 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
139 | if (!(mask & 1)) { \ | ||
140 | /* We need the result but without updating flags */ \ | ||
141 | scratch_fpst = *fpst; \ | ||
106 | -- | 142 | -- |
107 | 2.17.1 | 143 | 2.34.1 |
108 | 144 | ||
109 | 145 | diff view generated by jsdifflib |
1 | From: Eric Auger <eric.auger@redhat.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Depending on the number of smp_cpus we now register one or two | 3 | Pass ARMFPStatusFlavour index instead of fp_status[FOO]. |
4 | GICR structures. | ||
5 | 4 | ||
6 | Signed-off-by: Eric Auger <eric.auger@redhat.com> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | Reviewed-by: Andrew Jones <drjones@redhat.com> | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
8 | Message-id: 1529072910-16156-7-git-send-email-eric.auger@redhat.com | 7 | Message-id: 20250129013857.135256-17-richard.henderson@linaro.org |
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | --- | 9 | --- |
11 | hw/arm/virt-acpi-build.c | 9 +++++++++ | 10 | target/arm/vfp_helper.c | 10 +++++----- |
12 | 1 file changed, 9 insertions(+) | 11 | 1 file changed, 5 insertions(+), 5 deletions(-) |
13 | 12 | ||
14 | diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c | 13 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
15 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/hw/arm/virt-acpi-build.c | 15 | --- a/target/arm/vfp_helper.c |
17 | +++ b/hw/arm/virt-acpi-build.c | 16 | +++ b/target/arm/vfp_helper.c |
18 | @@ -XXX,XX +XXX,XX @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) | 17 | @@ -XXX,XX +XXX,XX @@ static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp) |
19 | 18 | void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ | |
20 | if (vms->gic_version == 3) { | 19 | { \ |
21 | AcpiMadtGenericTranslator *gic_its; | 20 | softfloat_to_vfp_compare(env, \ |
22 | + int nb_redist_regions = virt_gicv3_redist_region_count(vms); | 21 | - FLOATTYPE ## _compare_quiet(a, b, &env->vfp.FPST)); \ |
23 | AcpiMadtGenericRedistributor *gicr = acpi_data_push(table_data, | 22 | + FLOATTYPE ## _compare_quiet(a, b, &env->vfp.fp_status[FPST])); \ |
24 | sizeof *gicr); | 23 | } \ |
25 | 24 | void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ | |
26 | @@ -XXX,XX +XXX,XX @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) | 25 | { \ |
27 | gicr->base_address = cpu_to_le64(memmap[VIRT_GIC_REDIST].base); | 26 | softfloat_to_vfp_compare(env, \ |
28 | gicr->range_length = cpu_to_le32(memmap[VIRT_GIC_REDIST].size); | 27 | - FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ |
29 | 28 | + FLOATTYPE ## _compare(a, b, &env->vfp.fp_status[FPST])); \ | |
30 | + if (nb_redist_regions == 2) { | 29 | } |
31 | + gicr = acpi_data_push(table_data, sizeof(*gicr)); | 30 | -DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) |
32 | + gicr->type = ACPI_APIC_GENERIC_REDISTRIBUTOR; | 31 | -DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32]) |
33 | + gicr->length = sizeof(*gicr); | 32 | -DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32]) |
34 | + gicr->base_address = cpu_to_le64(memmap[VIRT_GIC_REDIST2].base); | 33 | +DO_VFP_cmp(h, float16, dh_ctype_f16, FPST_A32_F16) |
35 | + gicr->range_length = cpu_to_le32(memmap[VIRT_GIC_REDIST2].size); | 34 | +DO_VFP_cmp(s, float32, float32, FPST_A32) |
36 | + } | 35 | +DO_VFP_cmp(d, float64, float64, FPST_A32) |
37 | + | 36 | #undef DO_VFP_cmp |
38 | if (its_class_name() && !vmc->no_its) { | 37 | |
39 | gic_its = acpi_data_push(table_data, sizeof *gic_its); | 38 | /* Integer to float and float to integer conversions */ |
40 | gic_its->type = ACPI_APIC_GENERIC_TRANSLATOR; | ||
41 | -- | 39 | -- |
42 | 2.17.1 | 40 | 2.34.1 |
43 | 41 | ||
44 | 42 | diff view generated by jsdifflib |
1 | From: Julia Suvorova <jusual@mail.ru> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | The arrays were made static, "if" was simplified because V7M and V8M | 3 | Read the bit from the source, rather than from the proxy via |
4 | define V6 feature. | 4 | get_flush_inputs_to_zero. This makes it clear that it does |
5 | not matter which of the float_status structures is used. | ||
5 | 6 | ||
6 | Signed-off-by: Julia Suvorova <jusual@mail.ru> | 7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 8 | Message-id: 20250129013857.135256-34-richard.henderson@linaro.org |
8 | Message-id: 20180618214604.6777-1-jusual@mail.ru | ||
9 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 9 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
11 | --- | 11 | --- |
12 | target/arm/translate.c | 27 +++++++++++++-------------- | 12 | target/arm/tcg/vec_helper.c | 12 ++++++------ |
13 | 1 file changed, 13 insertions(+), 14 deletions(-) | 13 | 1 file changed, 6 insertions(+), 6 deletions(-) |
14 | 14 | ||
15 | diff --git a/target/arm/translate.c b/target/arm/translate.c | 15 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
16 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/arm/translate.c | 17 | --- a/target/arm/tcg/vec_helper.c |
18 | +++ b/target/arm/translate.c | 18 | +++ b/target/arm/tcg/vec_helper.c |
19 | @@ -XXX,XX +XXX,XX @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) | 19 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, |
20 | !arm_dc_feature(s, ARM_FEATURE_V7)) { | 20 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; |
21 | int i; | 21 | |
22 | bool found = false; | 22 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, |
23 | - const uint32_t armv6m_insn[] = {0xf3808000 /* msr */, | 23 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); |
24 | - 0xf3b08040 /* dsb */, | 24 | + env->vfp.fpcr & FPCR_FZ16); |
25 | - 0xf3b08050 /* dmb */, | 25 | } |
26 | - 0xf3b08060 /* isb */, | 26 | |
27 | - 0xf3e08000 /* mrs */, | 27 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
28 | - 0xf000d000 /* bl */}; | 28 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
29 | - const uint32_t armv6m_mask[] = {0xffe0d000, | 29 | } |
30 | - 0xfff0d0f0, | 30 | } |
31 | - 0xfff0d0f0, | 31 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, |
32 | - 0xfff0d0f0, | 32 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); |
33 | - 0xffe0d000, | 33 | + env->vfp.fpcr & FPCR_FZ16); |
34 | - 0xf800d000}; | 34 | } |
35 | + static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */, | 35 | |
36 | + 0xf3b08040 /* dsb */, | 36 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
37 | + 0xf3b08050 /* dmb */, | 37 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
38 | + 0xf3b08060 /* isb */, | 38 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
39 | + 0xf3e08000 /* mrs */, | 39 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
40 | + 0xf000d000 /* bl */}; | 40 | float_status *status = &env->vfp.fp_status[FPST_A64]; |
41 | + static const uint32_t armv6m_mask[] = {0xffe0d000, | 41 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); |
42 | + 0xfff0d0f0, | 42 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; |
43 | + 0xfff0d0f0, | 43 | int negx = 0, negf = 0; |
44 | + 0xfff0d0f0, | 44 | |
45 | + 0xffe0d000, | 45 | if (is_s) { |
46 | + 0xf800d000}; | 46 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, |
47 | 47 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | |
48 | for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) { | 48 | |
49 | if ((insn & armv6m_mask[i]) == armv6m_insn[i]) { | 49 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, |
50 | @@ -XXX,XX +XXX,XX @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) | 50 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); |
51 | break; | 51 | + env->vfp.fpcr & FPCR_FZ16); |
52 | case 3: /* Special control operations. */ | 52 | } |
53 | if (!arm_dc_feature(s, ARM_FEATURE_V7) && | 53 | |
54 | - !(arm_dc_feature(s, ARM_FEATURE_V6) && | 54 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
55 | - arm_dc_feature(s, ARM_FEATURE_M))) { | 55 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
56 | + !arm_dc_feature(s, ARM_FEATURE_M)) { | 56 | } |
57 | goto illegal_op; | 57 | } |
58 | } | 58 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, |
59 | op = (insn >> 4) & 0xf; | 59 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); |
60 | + env->vfp.fpcr & FPCR_FZ16); | ||
61 | } | ||
62 | |||
63 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
64 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
65 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
66 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | ||
67 | float_status *status = &env->vfp.fp_status[FPST_A64]; | ||
68 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
69 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; | ||
70 | int negx = 0, negf = 0; | ||
71 | |||
72 | if (is_s) { | ||
60 | -- | 73 | -- |
61 | 2.17.1 | 74 | 2.34.1 |
62 | |||
63 | diff view generated by jsdifflib |
1 | From: Amol Surati <suratiamol@gmail.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | When either GICD_IPRIORITYR or GICR_IPRIORITYR is read as a 32-bit | 3 | Sink common code from the callers into do_fmlal |
4 | register, the post left-shift operator in the for loop causes an | 4 | and do_fmlal_idx. Reorder the arguments to minimize |
5 | extra shift after the least significant byte has been placed. | 5 | the re-sorting from the caller's arguments. |
6 | 6 | ||
7 | The 32-bit value actually returned is therefore the expected value | 7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | shifted left by 8 bits. | 8 | Message-id: 20250129013857.135256-35-richard.henderson@linaro.org |
9 | |||
10 | Signed-off-by: Amol Surati <suratiamol@gmail.com> | ||
11 | Message-id: 20180614054857.26248-1-suratiamol@gmail.com | ||
12 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 9 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
14 | --- | 11 | --- |
15 | hw/intc/arm_gicv3_dist.c | 3 ++- | 12 | target/arm/tcg/vec_helper.c | 28 ++++++++++++++++------------ |
16 | hw/intc/arm_gicv3_redist.c | 3 ++- | 13 | 1 file changed, 16 insertions(+), 12 deletions(-) |
17 | 2 files changed, 4 insertions(+), 2 deletions(-) | ||
18 | 14 | ||
19 | diff --git a/hw/intc/arm_gicv3_dist.c b/hw/intc/arm_gicv3_dist.c | 15 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
20 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/hw/intc/arm_gicv3_dist.c | 17 | --- a/target/arm/tcg/vec_helper.c |
22 | +++ b/hw/intc/arm_gicv3_dist.c | 18 | +++ b/target/arm/tcg/vec_helper.c |
23 | @@ -XXX,XX +XXX,XX @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset, | 19 | @@ -XXX,XX +XXX,XX @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2) |
24 | int i, irq = offset - GICD_IPRIORITYR; | 20 | * as there is not yet SVE versions that might use blocking. |
25 | uint32_t value = 0; | 21 | */ |
26 | 22 | ||
27 | - for (i = irq + 3; i >= irq; i--, value <<= 8) { | 23 | -static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, |
28 | + for (i = irq + 3; i >= irq; i--) { | 24 | - uint64_t negx, int negf, uint32_t desc, bool fz16) |
29 | + value <<= 8; | 25 | +static void do_fmlal(float32 *d, void *vn, void *vm, |
30 | value |= gicd_read_ipriorityr(s, attrs, i); | 26 | + CPUARMState *env, uint32_t desc, |
27 | + ARMFPStatusFlavour fpst_idx, | ||
28 | + uint64_t negx, int negf) | ||
29 | { | ||
30 | + float_status *fpst = &env->vfp.fp_status[fpst_idx]; | ||
31 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; | ||
32 | intptr_t i, oprsz = simd_oprsz(desc); | ||
33 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
34 | int is_q = oprsz == 16; | ||
35 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
36 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
37 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
38 | |||
39 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
40 | - env->vfp.fpcr & FPCR_FZ16); | ||
41 | + do_fmlal(vd, vn, vm, env, desc, FPST_STD, negx, 0); | ||
42 | } | ||
43 | |||
44 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
46 | negx = 0x8000800080008000ull; | ||
31 | } | 47 | } |
32 | *data = value; | 48 | } |
33 | diff --git a/hw/intc/arm_gicv3_redist.c b/hw/intc/arm_gicv3_redist.c | 49 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, |
34 | index XXXXXXX..XXXXXXX 100644 | 50 | - env->vfp.fpcr & FPCR_FZ16); |
35 | --- a/hw/intc/arm_gicv3_redist.c | 51 | + do_fmlal(vd, vn, vm, env, desc, FPST_A64, negx, negf); |
36 | +++ b/hw/intc/arm_gicv3_redist.c | 52 | } |
37 | @@ -XXX,XX +XXX,XX @@ static MemTxResult gicr_readl(GICv3CPUState *cs, hwaddr offset, | 53 | |
38 | int i, irq = offset - GICR_IPRIORITYR; | 54 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
39 | uint32_t value = 0; | 55 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
40 | 56 | } | |
41 | - for (i = irq + 3; i >= irq; i--, value <<= 8) { | 57 | } |
42 | + for (i = irq + 3; i >= irq; i--) { | 58 | |
43 | + value <<= 8; | 59 | -static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, |
44 | value |= gicr_read_ipriorityr(cs, attrs, i); | 60 | - uint64_t negx, int negf, uint32_t desc, bool fz16) |
61 | +static void do_fmlal_idx(float32 *d, void *vn, void *vm, | ||
62 | + CPUARMState *env, uint32_t desc, | ||
63 | + ARMFPStatusFlavour fpst_idx, | ||
64 | + uint64_t negx, int negf) | ||
65 | { | ||
66 | + float_status *fpst = &env->vfp.fp_status[fpst_idx]; | ||
67 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; | ||
68 | intptr_t i, oprsz = simd_oprsz(desc); | ||
69 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
70 | int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3); | ||
71 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | ||
72 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
73 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
74 | |||
75 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
76 | - env->vfp.fpcr & FPCR_FZ16); | ||
77 | + do_fmlal_idx(vd, vn, vm, env, desc, FPST_STD, negx, 0); | ||
78 | } | ||
79 | |||
80 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
81 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
82 | negx = 0x8000800080008000ull; | ||
45 | } | 83 | } |
46 | *data = value; | 84 | } |
85 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
86 | - env->vfp.fpcr & FPCR_FZ16); | ||
87 | + do_fmlal_idx(vd, vn, vm, env, desc, FPST_A64, negx, negf); | ||
88 | } | ||
89 | |||
90 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
47 | -- | 91 | -- |
48 | 2.17.1 | 92 | 2.34.1 |
49 | |||
50 | diff view generated by jsdifflib |