1 | The following changes since commit b367db48126d4ee14579af6cf5cdbffeb9496627: | 1 | Hi; this pullreq contains only my FEAT_AFP/FEAT_RPRES patches |
---|---|---|---|
2 | (plus a fix for a target/alpha latent bug that would otherwise | ||
3 | be revealed by the fpu changes), because 68 patches is already | ||
4 | longer than I prefer to send in at one time... | ||
2 | 5 | ||
3 | Merge remote-tracking branch 'remotes/aperard/tags/pull-xen-20220127' into staging (2022-01-28 11:05:29 +0000) | 6 | thanks |
7 | -- PMM | ||
8 | |||
9 | The following changes since commit ffaf7f0376f8040ce9068d71ae9ae8722505c42e: | ||
10 | |||
11 | Merge tag 'pull-10.0-testing-and-gdstub-updates-100225-1' of https://gitlab.com/stsquad/qemu into staging (2025-02-10 13:26:17 -0500) | ||
4 | 12 | ||
5 | are available in the Git repository at: | 13 | are available in the Git repository at: |
6 | 14 | ||
7 | https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20220128 | 15 | https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20250211 |
8 | 16 | ||
9 | for you to fetch changes up to 2c023d3675a3ffb54fc30504dcd715bc6f6e234f: | 17 | for you to fetch changes up to ca4c34e07d1388df8e396520b5e7d60883cd3690: |
10 | 18 | ||
11 | target/arm: Use correct entrypoint for SVC taken from Hyp to Hyp (2022-01-28 14:30:36 +0000) | 19 | target/arm: Sink fp_status and fpcr access into do_fmlal* (2025-02-11 16:22:08 +0000) |
12 | 20 | ||
13 | ---------------------------------------------------------------- | 21 | ---------------------------------------------------------------- |
14 | target-arm queue: | 22 | target-arm queue: |
15 | * Update copyright dates to 2022 | 23 | * target/alpha: Don't corrupt error_code with unknown softfloat flags |
16 | * hw/armv7m: Fix broken VMStateDescription | 24 | * target/arm: Implement FEAT_AFP and FEAT_RPRES |
17 | * hw/char/exynos4210_uart: Fix crash on trying to load VM state | ||
18 | * rtc: Move RTC function prototypes to their own header | ||
19 | * xlnx-versal-virt: Support PMC SLCR | ||
20 | * xlnx-versal-virt: Support OSPI flash memory controller | ||
21 | * scripts: Explain the difference between linux-headers and standard-headers | ||
22 | * target/arm: Log CPU index in 'Taking exception' log | ||
23 | * arm_gicv3_its: Various bugfixes and cleanups | ||
24 | * arm_gicv3_its: Implement the missing MOVI and MOVALL commands | ||
25 | * ast2600: Fix address mapping of second SPI controller | ||
26 | * target/arm: Use correct entrypoint for SVC taken from Hyp to Hyp | ||
27 | 25 | ||
28 | ---------------------------------------------------------------- | 26 | ---------------------------------------------------------------- |
29 | Andrew Baumann (1): | 27 | Peter Maydell (49): |
30 | MAINTAINERS: Remove myself (for raspi). | 28 | target/alpha: Don't corrupt error_code with unknown softfloat flags |
29 | fpu: Add float_class_denormal | ||
30 | fpu: Implement float_flag_input_denormal_used | ||
31 | fpu: allow flushing of output denormals to be after rounding | ||
32 | target/arm: Define FPCR AH, FIZ, NEP bits | ||
33 | target/arm: Implement FPCR.FIZ handling | ||
34 | target/arm: Adjust FP behaviour for FPCR.AH = 1 | ||
35 | target/arm: Adjust exception flag handling for AH = 1 | ||
36 | target/arm: Add FPCR.AH to tbflags | ||
37 | target/arm: Set up float_status to use for FPCR.AH=1 behaviour | ||
38 | target/arm: Use FPST_FPCR_AH for FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS | ||
39 | target/arm: Use FPST_FPCR_AH for BFCVT* insns | ||
40 | target/arm: Use FPST_FPCR_AH for BFMLAL*, BFMLSL* insns | ||
41 | target/arm: Add FPCR.NEP to TBFLAGS | ||
42 | target/arm: Define and use new write_fp_*reg_merging() functions | ||
43 | target/arm: Handle FPCR.NEP for 3-input scalar operations | ||
44 | target/arm: Handle FPCR.NEP for BFCVT scalar | ||
45 | target/arm: Handle FPCR.NEP for 1-input scalar operations | ||
46 | target/arm: Handle FPCR.NEP in do_cvtf_scalar() | ||
47 | target/arm: Handle FPCR.NEP for scalar FABS and FNEG | ||
48 | target/arm: Handle FPCR.NEP for FCVTXN (scalar) | ||
49 | target/arm: Handle FPCR.NEP for NEP for FMUL, FMULX scalar by element | ||
50 | target/arm: Implement FPCR.AH semantics for scalar FMIN/FMAX | ||
51 | target/arm: Implement FPCR.AH semantics for vector FMIN/FMAX | ||
52 | target/arm: Implement FPCR.AH semantics for FMAXV and FMINV | ||
53 | target/arm: Implement FPCR.AH semantics for FMINP and FMAXP | ||
54 | target/arm: Implement FPCR.AH semantics for SVE FMAXV and FMINV | ||
55 | target/arm: Implement FPCR.AH semantics for SVE FMIN/FMAX immediate | ||
56 | target/arm: Implement FPCR.AH semantics for SVE FMIN/FMAX vector | ||
57 | target/arm: Implement FPCR.AH handling of negation of NaN | ||
58 | target/arm: Implement FPCR.AH handling for scalar FABS and FABD | ||
59 | target/arm: Handle FPCR.AH in vector FABD | ||
60 | target/arm: Handle FPCR.AH in SVE FNEG | ||
61 | target/arm: Handle FPCR.AH in SVE FABS | ||
62 | target/arm: Handle FPCR.AH in SVE FABD | ||
63 | target/arm: Handle FPCR.AH in negation steps in SVE FCADD | ||
64 | target/arm: Handle FPCR.AH in negation steps in FCADD | ||
65 | target/arm: Handle FPCR.AH in FRECPS and FRSQRTS scalar insns | ||
66 | target/arm: Handle FPCR.AH in FRECPS and FRSQRTS vector insns | ||
67 | target/arm: Handle FPCR.AH in negation step in FMLS (indexed) | ||
68 | target/arm: Handle FPCR.AH in negation in FMLS (vector) | ||
69 | target/arm: Handle FPCR.AH in negation step in SVE FMLS (vector) | ||
70 | target/arm: Handle FPCR.AH in SVE FTSSEL | ||
71 | target/arm: Handle FPCR.AH in SVE FTMAD | ||
72 | target/arm: Enable FEAT_AFP for '-cpu max' | ||
73 | target/arm: Plumb FEAT_RPRES frecpe and frsqrte through to new helper | ||
74 | target/arm: Implement increased precision FRECPE | ||
75 | target/arm: Implement increased precision FRSQRTE | ||
76 | target/arm: Enable FEAT_RPRES for -cpu max | ||
31 | 77 | ||
32 | Cédric Le Goater (1): | 78 | Richard Henderson (19): |
33 | hw/arm: ast2600: Fix address mapping of second SPI controller | 79 | target/arm: Handle FPCR.AH in vector FCMLA |
80 | target/arm: Handle FPCR.AH in FCMLA by index | ||
81 | target/arm: Handle FPCR.AH in SVE FCMLA | ||
82 | target/arm: Handle FPCR.AH in FMLSL (by element and vector) | ||
83 | target/arm: Handle FPCR.AH in SVE FMLSL (indexed) | ||
84 | target/arm: Handle FPCR.AH in SVE FMLSLB, FMLSLT (vectors) | ||
85 | target/arm: Introduce CPUARMState.vfp.fp_status[] | ||
86 | target/arm: Remove standard_fp_status_f16 | ||
87 | target/arm: Remove standard_fp_status | ||
88 | target/arm: Remove ah_fp_status_f16 | ||
89 | target/arm: Remove ah_fp_status | ||
90 | target/arm: Remove fp_status_f16_a64 | ||
91 | target/arm: Remove fp_status_f16_a32 | ||
92 | target/arm: Remove fp_status_a64 | ||
93 | target/arm: Remove fp_status_a32 | ||
94 | target/arm: Simplify fp_status indexing in mve_helper.c | ||
95 | target/arm: Simplify DO_VFP_cmp in vfp_helper.c | ||
96 | target/arm: Read fz16 from env->vfp.fpcr | ||
97 | target/arm: Sink fp_status and fpcr access into do_fmlal* | ||
34 | 98 | ||
35 | Francisco Iglesias (10): | 99 | docs/system/arm/emulation.rst | 2 + |
36 | hw/misc: Add a model of Versal's PMC SLCR | 100 | include/fpu/softfloat-helpers.h | 11 + |
37 | hw/arm/xlnx-versal: 'Or' the interrupts from the BBRAM and RTC models | 101 | include/fpu/softfloat-types.h | 25 ++ |
38 | hw/arm/xlnx-versal: Connect Versal's PMC SLCR | 102 | target/arm/cpu-features.h | 10 + |
39 | include/hw/dma/xlnx_csu_dma: Add in missing includes in the header | 103 | target/arm/cpu.h | 97 +++-- |
40 | hw/dma/xlnx_csu_dma: Support starting a read transfer through a class method | 104 | target/arm/helper.h | 26 ++ |
41 | hw/ssi: Add a model of Xilinx Versal's OSPI flash memory controller | 105 | target/arm/internals.h | 6 + |
42 | hw/arm/xlnx-versal: Connect the OSPI flash memory controller model | 106 | target/arm/tcg/helper-a64.h | 13 + |
43 | hw/block/m25p80: Add support for Micron Xccela flash mt35xu01g | 107 | target/arm/tcg/helper-sve.h | 120 ++++++ |
44 | hw/arm/xlnx-versal-virt: Connect mt35xu01g flashes to the OSPI | 108 | target/arm/tcg/translate-a64.h | 13 + |
45 | MAINTAINERS: Add an entry for Xilinx Versal OSPI | 109 | target/arm/tcg/translate.h | 54 +-- |
46 | 110 | target/arm/tcg/vec_internal.h | 35 ++ | |
47 | Peter Maydell (20): | 111 | target/mips/fpu_helper.h | 6 + |
48 | Update copyright dates to 2022 | 112 | fpu/softfloat.c | 66 +++- |
49 | hw/armv7m: Fix broken VMStateDescription | 113 | target/alpha/cpu.c | 7 + |
50 | hw/char/exynos4210_uart: Fix crash on trying to load VM state | 114 | target/alpha/fpu_helper.c | 2 + |
51 | rtc: Move RTC function prototypes to their own header | 115 | target/arm/cpu.c | 46 +-- |
52 | scripts: Explain the difference between linux-headers and standard-headers | 116 | target/arm/helper.c | 2 +- |
53 | target/arm: Log CPU index in 'Taking exception' log | 117 | target/arm/tcg/cpu64.c | 2 + |
54 | hw/intc/arm_gicv3_its: Add tracepoints | 118 | target/arm/tcg/helper-a64.c | 151 ++++---- |
55 | hw/intc/arm_gicv3: Initialise dma_as in GIC, not ITS | 119 | target/arm/tcg/hflags.c | 13 + |
56 | hw/intc/arm_gicv3_its: Don't clear GITS_CREADR when GITS_CTLR.ENABLED is set | 120 | target/arm/tcg/mve_helper.c | 44 +-- |
57 | hw/intc/arm_gicv3_its: Don't clear GITS_CWRITER on writes to GITS_CBASER | 121 | target/arm/tcg/sme_helper.c | 4 +- |
58 | hw/intc/arm_gicv3: Honour GICD_CTLR.EnableGrp1NS for LPIs | 122 | target/arm/tcg/sve_helper.c | 367 ++++++++++++++----- |
59 | hw/intc/arm_gicv3_its: Sort ITS command list into numeric order | 123 | target/arm/tcg/translate-a64.c | 782 ++++++++++++++++++++++++++++++++-------- |
60 | hw/intc/arm_gicv3_redist: Remove unnecessary zero checks | 124 | target/arm/tcg/translate-sve.c | 193 +++++++--- |
61 | hw/intc/arm_gicv3: Set GICR_CTLR.CES if LPIs are supported | 125 | target/arm/tcg/vec_helper.c | 387 ++++++++++++++------ |
62 | hw/intc/arm_gicv3_its: Provide read accessor for translation_ops | 126 | target/arm/vfp_helper.c | 374 +++++++++++++++---- |
63 | hw/intc/arm_gicv3_its: Make GITS_BASER<n> RAZ/WI for unimplemented registers | 127 | target/hppa/fpu_helper.c | 11 + |
64 | hw/intc/arm_gicv3_its: Check table bounds against correct limit | 128 | target/i386/tcg/fpu_helper.c | 8 + |
65 | hw/intc/arm_gicv3_its: Implement MOVALL | 129 | target/mips/msa.c | 9 + |
66 | hw/intc/arm_gicv3_its: Implement MOVI | 130 | target/ppc/cpu_init.c | 3 + |
67 | target/arm: Use correct entrypoint for SVC taken from Hyp to Hyp | 131 | target/rx/cpu.c | 8 + |
68 | 132 | target/sh4/cpu.c | 8 + | |
69 | docs/conf.py | 2 +- | 133 | target/tricore/helper.c | 1 + |
70 | hw/intc/gicv3_internal.h | 43 +- | 134 | tests/fp/fp-bench.c | 1 + |
71 | include/hw/arm/xlnx-versal.h | 30 +- | 135 | fpu/softfloat-parts.c.inc | 127 +++++-- |
72 | include/hw/dma/xlnx_csu_dma.h | 24 +- | 136 | 37 files changed, 2325 insertions(+), 709 deletions(-) |
73 | include/hw/intc/arm_gicv3_its_common.h | 1 - | ||
74 | include/hw/misc/xlnx-versal-pmc-iou-slcr.h | 78 ++ | ||
75 | include/hw/ssi/xlnx-versal-ospi.h | 111 ++ | ||
76 | include/qemu-common.h | 5 +- | ||
77 | include/sysemu/rtc.h | 58 + | ||
78 | target/arm/internals.h | 2 +- | ||
79 | hw/arm/armv7m.c | 4 +- | ||
80 | hw/arm/aspeed_ast2600.c | 2 +- | ||
81 | hw/arm/omap1.c | 2 +- | ||
82 | hw/arm/pxa2xx.c | 2 +- | ||
83 | hw/arm/strongarm.c | 2 +- | ||
84 | hw/arm/xlnx-versal-virt.c | 25 +- | ||
85 | hw/arm/xlnx-versal.c | 190 ++- | ||
86 | hw/block/m25p80.c | 2 + | ||
87 | hw/char/exynos4210_uart.c | 2 +- | ||
88 | hw/dma/xlnx_csu_dma.c | 17 + | ||
89 | hw/intc/arm_gicv3.c | 1 + | ||
90 | hw/intc/arm_gicv3_common.c | 9 + | ||
91 | hw/intc/arm_gicv3_its.c | 258 +++- | ||
92 | hw/intc/arm_gicv3_redist.c | 115 +- | ||
93 | hw/misc/mac_via.c | 2 +- | ||
94 | hw/misc/macio/cuda.c | 2 +- | ||
95 | hw/misc/macio/pmu.c | 2 +- | ||
96 | hw/misc/xlnx-versal-pmc-iou-slcr.c | 1446 ++++++++++++++++++++++ | ||
97 | hw/ppc/spapr_rtc.c | 2 +- | ||
98 | hw/rtc/allwinner-rtc.c | 2 +- | ||
99 | hw/rtc/aspeed_rtc.c | 2 +- | ||
100 | hw/rtc/ds1338.c | 2 +- | ||
101 | hw/rtc/exynos4210_rtc.c | 2 +- | ||
102 | hw/rtc/goldfish_rtc.c | 2 +- | ||
103 | hw/rtc/m41t80.c | 2 +- | ||
104 | hw/rtc/m48t59.c | 2 +- | ||
105 | hw/rtc/mc146818rtc.c | 2 +- | ||
106 | hw/rtc/pl031.c | 2 +- | ||
107 | hw/rtc/twl92230.c | 2 +- | ||
108 | hw/rtc/xlnx-zynqmp-rtc.c | 2 +- | ||
109 | hw/s390x/tod-tcg.c | 2 +- | ||
110 | hw/scsi/megasas.c | 2 +- | ||
111 | hw/ssi/xlnx-versal-ospi.c | 1853 ++++++++++++++++++++++++++++ | ||
112 | net/dump.c | 2 +- | ||
113 | softmmu/rtc.c | 2 +- | ||
114 | target/arm/helper.c | 13 +- | ||
115 | target/arm/m_helper.c | 2 +- | ||
116 | MAINTAINERS | 7 +- | ||
117 | hw/intc/trace-events | 8 + | ||
118 | hw/misc/meson.build | 5 +- | ||
119 | hw/ssi/meson.build | 1 + | ||
120 | scripts/update-linux-headers.sh | 16 + | ||
121 | 52 files changed, 4300 insertions(+), 74 deletions(-) | ||
122 | create mode 100644 include/hw/misc/xlnx-versal-pmc-iou-slcr.h | ||
123 | create mode 100644 include/hw/ssi/xlnx-versal-ospi.h | ||
124 | create mode 100644 include/sysemu/rtc.h | ||
125 | create mode 100644 hw/misc/xlnx-versal-pmc-iou-slcr.c | ||
126 | create mode 100644 hw/ssi/xlnx-versal-ospi.c | ||
127 | diff view generated by jsdifflib |
1 | From: Francisco Iglesias <francisco.iglesias@xilinx.com> | 1 | In do_cvttq() we set env->error_code with what is supposed to be a |
---|---|---|---|
2 | set of FPCR exception bit values. However, if the set of float | ||
3 | exception flags we get back from softfloat for the conversion | ||
4 | includes a flag which is not one of the three we expect here | ||
5 | (invalid_cvti, invalid, inexact) then we will fall through the | ||
6 | if-ladder and set env->error_code to the unconverted softfloat | ||
7 | exception_flag value. This will then cause us to take a spurious | ||
8 | exception. | ||
2 | 9 | ||
3 | Add support for Micron Xccela flash mt35xu01g. | 10 | This is harmless now, but when we add new floating point exception |
11 | flags to softfloat it will cause problems. Add an else clause to the | ||
12 | if-ladder to make it ignore any float exception flags it doesn't care | ||
13 | about. | ||
4 | 14 | ||
5 | Signed-off-by: Francisco Iglesias <francisco.iglesias@xilinx.com> | 15 | Specifically, without this fix, 'make check-tcg' will fail for Alpha |
6 | Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com> | 16 | when the commit adding float_flag_input_denormal_used lands. |
7 | Message-id: 20220121161141.14389-9-francisco.iglesias@xilinx.com | 17 | |
18 | |||
19 | Fixes: aa3bad5b59e7 ("target/alpha: Use float64_to_int64_modulo for CVTTQ") | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
21 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
22 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
9 | --- | 23 | --- |
10 | hw/block/m25p80.c | 2 ++ | 24 | target/alpha/fpu_helper.c | 2 ++ |
11 | 1 file changed, 2 insertions(+) | 25 | 1 file changed, 2 insertions(+) |
12 | 26 | ||
13 | diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c | 27 | diff --git a/target/alpha/fpu_helper.c b/target/alpha/fpu_helper.c |
14 | index XXXXXXX..XXXXXXX 100644 | 28 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/hw/block/m25p80.c | 29 | --- a/target/alpha/fpu_helper.c |
16 | +++ b/hw/block/m25p80.c | 30 | +++ b/target/alpha/fpu_helper.c |
17 | @@ -XXX,XX +XXX,XX @@ static const FlashPartInfo known_devices[] = { | 31 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_cvttq(CPUAlphaState *env, uint64_t a, int roundmode) |
18 | { INFO("n25q512a", 0x20ba20, 0, 64 << 10, 1024, ER_4K) }, | 32 | exc = FPCR_INV; |
19 | { INFO("n25q512ax3", 0x20ba20, 0x1000, 64 << 10, 1024, ER_4K) }, | 33 | } else if (exc & float_flag_inexact) { |
20 | { INFO("mt25ql512ab", 0x20ba20, 0x1044, 64 << 10, 1024, ER_4K | ER_32K) }, | 34 | exc = FPCR_INE; |
21 | + { INFO_STACKED("mt35xu01g", 0x2c5b1b, 0x104100, 128 << 10, 1024, | 35 | + } else { |
22 | + ER_4K | ER_32K, 2) }, | 36 | + exc = 0; |
23 | { INFO_STACKED("n25q00", 0x20ba21, 0x1000, 64 << 10, 2048, ER_4K, 4) }, | 37 | } |
24 | { INFO_STACKED("n25q00a", 0x20bb21, 0x1000, 64 << 10, 2048, ER_4K, 4) }, | 38 | } |
25 | { INFO_STACKED("mt25ql01g", 0x20ba21, 0x1040, 64 << 10, 2048, ER_4K, 2) }, | 39 | env->error_code = exc; |
26 | -- | 40 | -- |
27 | 2.25.1 | 41 | 2.34.1 |
28 | 42 | ||
29 | 43 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Currently in softfloat we canonicalize input denormals and so the | ||
2 | code that implements floating point operations does not need to care | ||
3 | whether the input value was originally normal or denormal. However, | ||
4 | both x86 and Arm FEAT_AFP require that an exception flag is set if: | ||
5 | * an input is denormal | ||
6 | * that input is not squashed to zero | ||
7 | * that input is actually used in the calculation (e.g. we | ||
8 | did not find the other input was a NaN) | ||
1 | 9 | ||
10 | So we need to track that the input was a non-squashed denormal. To | ||
11 | do this we add a new value to the FloatClass enum. In this commit we | ||
12 | add the value and adjust the code everywhere that looks at FloatClass | ||
13 | values so that the new float_class_denormal behaves identically to | ||
14 | float_class_normal. We will add the code that does the "raise a new | ||
15 | float exception flag if an input was an unsquashed denormal and we | ||
16 | used it" in a subsequent commit. | ||
17 | |||
18 | There should be no behavioural change in this commit. | ||
19 | |||
20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
21 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
22 | --- | ||
23 | fpu/softfloat.c | 32 ++++++++++++++++++++++++++++--- | ||
24 | fpu/softfloat-parts.c.inc | 40 ++++++++++++++++++++++++--------------- | ||
25 | 2 files changed, 54 insertions(+), 18 deletions(-) | ||
26 | |||
27 | diff --git a/fpu/softfloat.c b/fpu/softfloat.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/fpu/softfloat.c | ||
30 | +++ b/fpu/softfloat.c | ||
31 | @@ -XXX,XX +XXX,XX @@ float64_gen2(float64 xa, float64 xb, float_status *s, | ||
32 | /* | ||
33 | * Classify a floating point number. Everything above float_class_qnan | ||
34 | * is a NaN so cls >= float_class_qnan is any NaN. | ||
35 | + * | ||
36 | + * Note that we canonicalize denormals, so most code should treat | ||
37 | + * class_normal and class_denormal identically. | ||
38 | */ | ||
39 | |||
40 | typedef enum __attribute__ ((__packed__)) { | ||
41 | float_class_unclassified, | ||
42 | float_class_zero, | ||
43 | float_class_normal, | ||
44 | + float_class_denormal, /* input was a non-squashed denormal */ | ||
45 | float_class_inf, | ||
46 | float_class_qnan, /* all NaNs from here */ | ||
47 | float_class_snan, | ||
48 | @@ -XXX,XX +XXX,XX @@ typedef enum __attribute__ ((__packed__)) { | ||
49 | enum { | ||
50 | float_cmask_zero = float_cmask(float_class_zero), | ||
51 | float_cmask_normal = float_cmask(float_class_normal), | ||
52 | + float_cmask_denormal = float_cmask(float_class_denormal), | ||
53 | float_cmask_inf = float_cmask(float_class_inf), | ||
54 | float_cmask_qnan = float_cmask(float_class_qnan), | ||
55 | float_cmask_snan = float_cmask(float_class_snan), | ||
56 | |||
57 | float_cmask_infzero = float_cmask_zero | float_cmask_inf, | ||
58 | float_cmask_anynan = float_cmask_qnan | float_cmask_snan, | ||
59 | + float_cmask_anynorm = float_cmask_normal | float_cmask_denormal, | ||
60 | }; | ||
61 | |||
62 | /* Flags for parts_minmax. */ | ||
63 | @@ -XXX,XX +XXX,XX @@ static inline __attribute__((unused)) bool is_qnan(FloatClass c) | ||
64 | return c == float_class_qnan; | ||
65 | } | ||
66 | |||
67 | +/* | ||
68 | + * Return true if the float_cmask has only normals in it | ||
69 | + * (including input denormals that were canonicalized) | ||
70 | + */ | ||
71 | +static inline bool cmask_is_only_normals(int cmask) | ||
72 | +{ | ||
73 | + return !(cmask & ~float_cmask_anynorm); | ||
74 | +} | ||
75 | + | ||
76 | +static inline bool is_anynorm(FloatClass c) | ||
77 | +{ | ||
78 | + return float_cmask(c) & float_cmask_anynorm; | ||
79 | +} | ||
80 | + | ||
81 | /* | ||
82 | * Structure holding all of the decomposed parts of a float. | ||
83 | * The exponent is unbiased and the fraction is normalized. | ||
84 | @@ -XXX,XX +XXX,XX @@ static float64 float64r32_round_pack_canonical(FloatParts64 *p, | ||
85 | */ | ||
86 | switch (p->cls) { | ||
87 | case float_class_normal: | ||
88 | + case float_class_denormal: | ||
89 | if (unlikely(p->exp == 0)) { | ||
90 | /* | ||
91 | * The result is denormal for float32, but can be represented | ||
92 | @@ -XXX,XX +XXX,XX @@ static floatx80 floatx80_round_pack_canonical(FloatParts128 *p, | ||
93 | |||
94 | switch (p->cls) { | ||
95 | case float_class_normal: | ||
96 | + case float_class_denormal: | ||
97 | if (s->floatx80_rounding_precision == floatx80_precision_x) { | ||
98 | parts_uncanon_normal(p, s, fmt); | ||
99 | frac = p->frac_hi; | ||
100 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_ahp(FloatParts64 *a, float_status *s) | ||
101 | break; | ||
102 | |||
103 | case float_class_normal: | ||
104 | + case float_class_denormal: | ||
105 | case float_class_zero: | ||
106 | break; | ||
107 | |||
108 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b, | ||
109 | a->sign = b->sign; | ||
110 | a->exp = b->exp; | ||
111 | |||
112 | - if (a->cls == float_class_normal) { | ||
113 | + if (is_anynorm(a->cls)) { | ||
114 | frac_truncjam(a, b); | ||
115 | } else if (is_nan(a->cls)) { | ||
116 | /* Discard the low bits of the NaN. */ | ||
117 | @@ -XXX,XX +XXX,XX @@ static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode, | ||
118 | return int128_zero(); | ||
119 | |||
120 | case float_class_normal: | ||
121 | + case float_class_denormal: | ||
122 | if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) { | ||
123 | flags = float_flag_inexact; | ||
124 | } | ||
125 | @@ -XXX,XX +XXX,XX @@ static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode, | ||
126 | return int128_zero(); | ||
127 | |||
128 | case float_class_normal: | ||
129 | + case float_class_denormal: | ||
130 | if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) { | ||
131 | flags = float_flag_inexact; | ||
132 | if (p.cls == float_class_zero) { | ||
133 | @@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status) | ||
134 | float32_unpack_canonical(&xp, a, status); | ||
135 | if (unlikely(xp.cls != float_class_normal)) { | ||
136 | switch (xp.cls) { | ||
137 | + case float_class_denormal: | ||
138 | + break; | ||
139 | case float_class_snan: | ||
140 | case float_class_qnan: | ||
141 | parts_return_nan(&xp, status); | ||
142 | @@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status) | ||
143 | case float_class_zero: | ||
144 | return float32_one; | ||
145 | default: | ||
146 | - break; | ||
147 | + g_assert_not_reached(); | ||
148 | } | ||
149 | - g_assert_not_reached(); | ||
150 | } | ||
151 | |||
152 | float_raise(float_flag_inexact, status); | ||
153 | diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc | ||
154 | index XXXXXXX..XXXXXXX 100644 | ||
155 | --- a/fpu/softfloat-parts.c.inc | ||
156 | +++ b/fpu/softfloat-parts.c.inc | ||
157 | @@ -XXX,XX +XXX,XX @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status, | ||
158 | frac_clear(p); | ||
159 | } else { | ||
160 | int shift = frac_normalize(p); | ||
161 | - p->cls = float_class_normal; | ||
162 | + p->cls = float_class_denormal; | ||
163 | p->exp = fmt->frac_shift - fmt->exp_bias | ||
164 | - shift + !fmt->m68k_denormal; | ||
165 | } | ||
166 | @@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, | ||
167 | static void partsN(uncanon)(FloatPartsN *p, float_status *s, | ||
168 | const FloatFmt *fmt) | ||
169 | { | ||
170 | - if (likely(p->cls == float_class_normal)) { | ||
171 | + if (likely(is_anynorm(p->cls))) { | ||
172 | parts_uncanon_normal(p, s, fmt); | ||
173 | } else { | ||
174 | switch (p->cls) { | ||
175 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | ||
176 | |||
177 | if (a->sign != b_sign) { | ||
178 | /* Subtraction */ | ||
179 | - if (likely(ab_mask == float_cmask_normal)) { | ||
180 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
181 | if (parts_sub_normal(a, b)) { | ||
182 | return a; | ||
183 | } | ||
184 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | ||
185 | } | ||
186 | } else { | ||
187 | /* Addition */ | ||
188 | - if (likely(ab_mask == float_cmask_normal)) { | ||
189 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
190 | parts_add_normal(a, b); | ||
191 | return a; | ||
192 | } | ||
193 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | ||
194 | } | ||
195 | |||
196 | if (b->cls == float_class_zero) { | ||
197 | - g_assert(a->cls == float_class_normal); | ||
198 | + g_assert(is_anynorm(a->cls)); | ||
199 | return a; | ||
200 | } | ||
201 | |||
202 | g_assert(a->cls == float_class_zero); | ||
203 | - g_assert(b->cls == float_class_normal); | ||
204 | + g_assert(is_anynorm(b->cls)); | ||
205 | return_b: | ||
206 | b->sign = b_sign; | ||
207 | return b; | ||
208 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, | ||
209 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
210 | bool sign = a->sign ^ b->sign; | ||
211 | |||
212 | - if (likely(ab_mask == float_cmask_normal)) { | ||
213 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
214 | FloatPartsW tmp; | ||
215 | |||
216 | frac_mulw(&tmp, a, b); | ||
217 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, | ||
218 | a->sign ^= 1; | ||
219 | } | ||
220 | |||
221 | - if (unlikely(ab_mask != float_cmask_normal)) { | ||
222 | + if (unlikely(!cmask_is_only_normals(ab_mask))) { | ||
223 | if (unlikely(ab_mask == float_cmask_infzero)) { | ||
224 | float_raise(float_flag_invalid | float_flag_invalid_imz, s); | ||
225 | goto d_nan; | ||
226 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, | ||
227 | } | ||
228 | |||
229 | g_assert(ab_mask & float_cmask_zero); | ||
230 | - if (c->cls == float_class_normal) { | ||
231 | + if (is_anynorm(c->cls)) { | ||
232 | *a = *c; | ||
233 | goto return_normal; | ||
234 | } | ||
235 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, | ||
236 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
237 | bool sign = a->sign ^ b->sign; | ||
238 | |||
239 | - if (likely(ab_mask == float_cmask_normal)) { | ||
240 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
241 | a->sign = sign; | ||
242 | a->exp -= b->exp + frac_div(a, b); | ||
243 | return a; | ||
244 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, | ||
245 | { | ||
246 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
247 | |||
248 | - if (likely(ab_mask == float_cmask_normal)) { | ||
249 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
250 | frac_modrem(a, b, mod_quot); | ||
251 | return a; | ||
252 | } | ||
253 | @@ -XXX,XX +XXX,XX @@ static void partsN(sqrt)(FloatPartsN *a, float_status *status, | ||
254 | |||
255 | if (unlikely(a->cls != float_class_normal)) { | ||
256 | switch (a->cls) { | ||
257 | + case float_class_denormal: | ||
258 | + break; | ||
259 | case float_class_snan: | ||
260 | case float_class_qnan: | ||
261 | parts_return_nan(a, status); | ||
262 | @@ -XXX,XX +XXX,XX @@ static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode, | ||
263 | case float_class_inf: | ||
264 | break; | ||
265 | case float_class_normal: | ||
266 | + case float_class_denormal: | ||
267 | if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) { | ||
268 | float_raise(float_flag_inexact, s); | ||
269 | } | ||
270 | @@ -XXX,XX +XXX,XX @@ static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode, | ||
271 | return 0; | ||
272 | |||
273 | case float_class_normal: | ||
274 | + case float_class_denormal: | ||
275 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | ||
276 | if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { | ||
277 | flags = float_flag_inexact; | ||
278 | @@ -XXX,XX +XXX,XX @@ static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode, | ||
279 | return 0; | ||
280 | |||
281 | case float_class_normal: | ||
282 | + case float_class_denormal: | ||
283 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | ||
284 | if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { | ||
285 | flags = float_flag_inexact; | ||
286 | @@ -XXX,XX +XXX,XX @@ static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p, | ||
287 | return 0; | ||
288 | |||
289 | case float_class_normal: | ||
290 | + case float_class_denormal: | ||
291 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | ||
292 | if (parts_round_to_int_normal(p, rmode, 0, N - 2)) { | ||
293 | flags = float_flag_inexact; | ||
294 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
295 | a_exp = a->exp; | ||
296 | b_exp = b->exp; | ||
297 | |||
298 | - if (unlikely(ab_mask != float_cmask_normal)) { | ||
299 | + if (unlikely(!cmask_is_only_normals(ab_mask))) { | ||
300 | switch (a->cls) { | ||
301 | case float_class_normal: | ||
302 | + case float_class_denormal: | ||
303 | break; | ||
304 | case float_class_inf: | ||
305 | a_exp = INT16_MAX; | ||
306 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
307 | } | ||
308 | switch (b->cls) { | ||
309 | case float_class_normal: | ||
310 | + case float_class_denormal: | ||
311 | break; | ||
312 | case float_class_inf: | ||
313 | b_exp = INT16_MAX; | ||
314 | @@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, | ||
315 | { | ||
316 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
317 | |||
318 | - if (likely(ab_mask == float_cmask_normal)) { | ||
319 | + if (likely(cmask_is_only_normals(ab_mask))) { | ||
320 | FloatRelation cmp; | ||
321 | |||
322 | if (a->sign != b->sign) { | ||
323 | @@ -XXX,XX +XXX,XX @@ static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) | ||
324 | case float_class_inf: | ||
325 | break; | ||
326 | case float_class_normal: | ||
327 | + case float_class_denormal: | ||
328 | a->exp += MIN(MAX(n, -0x10000), 0x10000); | ||
329 | break; | ||
330 | default: | ||
331 | @@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) | ||
332 | |||
333 | if (unlikely(a->cls != float_class_normal)) { | ||
334 | switch (a->cls) { | ||
335 | + case float_class_denormal: | ||
336 | + break; | ||
337 | case float_class_snan: | ||
338 | case float_class_qnan: | ||
339 | parts_return_nan(a, s); | ||
340 | @@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) | ||
341 | } | ||
342 | return; | ||
343 | default: | ||
344 | - break; | ||
345 | + g_assert_not_reached(); | ||
346 | } | ||
347 | - g_assert_not_reached(); | ||
348 | } | ||
349 | if (unlikely(a->sign)) { | ||
350 | goto d_nan; | ||
351 | -- | ||
352 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | For the x86 and the Arm FEAT_AFP semantics, we need to be able to | ||
2 | tell the target code that the FPU operation has used an input | ||
3 | denormal. Implement this; when it happens we set the new | ||
4 | float_flag_denormal_input_used. | ||
1 | 5 | ||
6 | Note that we only set this when an input denormal is actually used by | ||
7 | the operation: if the operation results in Invalid Operation or | ||
8 | Divide By Zero or the result is a NaN because some other input was a | ||
9 | NaN then we never needed to look at the input denormal and do not set | ||
10 | denormal_input_used. | ||
11 | |||
12 | We mostly do not need to adjust the hardfloat codepaths to deal with | ||
13 | this flag, because almost all hardfloat operations are already gated | ||
14 | on the input not being a denormal, and will fall back to softfloat | ||
15 | for a denormal input. The only exception is the comparison | ||
16 | operations, where we need to add the check for input denormals, which | ||
17 | must now fall back to softfloat where they did not before. | ||
18 | |||
19 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
20 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
21 | --- | ||
22 | include/fpu/softfloat-types.h | 7 ++++ | ||
23 | fpu/softfloat.c | 38 +++++++++++++++++--- | ||
24 | fpu/softfloat-parts.c.inc | 68 ++++++++++++++++++++++++++++++++++- | ||
25 | 3 files changed, 107 insertions(+), 6 deletions(-) | ||
26 | |||
27 | diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/include/fpu/softfloat-types.h | ||
30 | +++ b/include/fpu/softfloat-types.h | ||
31 | @@ -XXX,XX +XXX,XX @@ enum { | ||
32 | float_flag_invalid_sqrt = 0x0800, /* sqrt(-x) */ | ||
33 | float_flag_invalid_cvti = 0x1000, /* non-nan to integer */ | ||
34 | float_flag_invalid_snan = 0x2000, /* any operand was snan */ | ||
35 | + /* | ||
36 | + * An input was denormal and we used it (without flushing it to zero). | ||
37 | + * Not set if we do not actually use the denormal input (e.g. | ||
38 | + * because some other input was a NaN, or because the operation | ||
39 | + * wasn't actually carried out (divide-by-zero; invalid)) | ||
40 | + */ | ||
41 | + float_flag_input_denormal_used = 0x4000, | ||
42 | }; | ||
43 | |||
44 | /* | ||
45 | diff --git a/fpu/softfloat.c b/fpu/softfloat.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/fpu/softfloat.c | ||
48 | +++ b/fpu/softfloat.c | ||
49 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_ahp(FloatParts64 *a, float_status *s) | ||
50 | float16_params_ahp.frac_size + 1); | ||
51 | break; | ||
52 | |||
53 | - case float_class_normal: | ||
54 | case float_class_denormal: | ||
55 | + float_raise(float_flag_input_denormal_used, s); | ||
56 | + break; | ||
57 | + case float_class_normal: | ||
58 | case float_class_zero: | ||
59 | break; | ||
60 | |||
61 | @@ -XXX,XX +XXX,XX @@ static void parts64_float_to_float(FloatParts64 *a, float_status *s) | ||
62 | if (is_nan(a->cls)) { | ||
63 | parts_return_nan(a, s); | ||
64 | } | ||
65 | + if (a->cls == float_class_denormal) { | ||
66 | + float_raise(float_flag_input_denormal_used, s); | ||
67 | + } | ||
68 | } | ||
69 | |||
70 | static void parts128_float_to_float(FloatParts128 *a, float_status *s) | ||
71 | @@ -XXX,XX +XXX,XX @@ static void parts128_float_to_float(FloatParts128 *a, float_status *s) | ||
72 | if (is_nan(a->cls)) { | ||
73 | parts_return_nan(a, s); | ||
74 | } | ||
75 | + if (a->cls == float_class_denormal) { | ||
76 | + float_raise(float_flag_input_denormal_used, s); | ||
77 | + } | ||
78 | } | ||
79 | |||
80 | #define parts_float_to_float(P, S) \ | ||
81 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b, | ||
82 | a->sign = b->sign; | ||
83 | a->exp = b->exp; | ||
84 | |||
85 | - if (is_anynorm(a->cls)) { | ||
86 | + switch (a->cls) { | ||
87 | + case float_class_denormal: | ||
88 | + float_raise(float_flag_input_denormal_used, s); | ||
89 | + /* fall through */ | ||
90 | + case float_class_normal: | ||
91 | frac_truncjam(a, b); | ||
92 | - } else if (is_nan(a->cls)) { | ||
93 | + break; | ||
94 | + case float_class_snan: | ||
95 | + case float_class_qnan: | ||
96 | /* Discard the low bits of the NaN. */ | ||
97 | a->frac = b->frac_hi; | ||
98 | parts_return_nan(a, s); | ||
99 | + break; | ||
100 | + default: | ||
101 | + break; | ||
102 | } | ||
103 | } | ||
104 | |||
105 | @@ -XXX,XX +XXX,XX @@ static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b, | ||
106 | if (is_nan(a->cls)) { | ||
107 | parts_return_nan(a, s); | ||
108 | } | ||
109 | + if (a->cls == float_class_denormal) { | ||
110 | + float_raise(float_flag_input_denormal_used, s); | ||
111 | + } | ||
112 | } | ||
113 | |||
114 | float32 float16_to_float32(float16 a, bool ieee, float_status *s) | ||
115 | @@ -XXX,XX +XXX,XX @@ float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet) | ||
116 | goto soft; | ||
117 | } | ||
118 | |||
119 | - float32_input_flush2(&ua.s, &ub.s, s); | ||
120 | + if (unlikely(float32_is_denormal(ua.s) || float32_is_denormal(ub.s))) { | ||
121 | + /* We may need to set the input_denormal_used flag */ | ||
122 | + goto soft; | ||
123 | + } | ||
124 | + | ||
125 | if (isgreaterequal(ua.h, ub.h)) { | ||
126 | if (isgreater(ua.h, ub.h)) { | ||
127 | return float_relation_greater; | ||
128 | @@ -XXX,XX +XXX,XX @@ float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet) | ||
129 | goto soft; | ||
130 | } | ||
131 | |||
132 | - float64_input_flush2(&ua.s, &ub.s, s); | ||
133 | + if (unlikely(float64_is_denormal(ua.s) || float64_is_denormal(ub.s))) { | ||
134 | + /* We may need to set the input_denormal_used flag */ | ||
135 | + goto soft; | ||
136 | + } | ||
137 | + | ||
138 | if (isgreaterequal(ua.h, ub.h)) { | ||
139 | if (isgreater(ua.h, ub.h)) { | ||
140 | return float_relation_greater; | ||
141 | diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc | ||
142 | index XXXXXXX..XXXXXXX 100644 | ||
143 | --- a/fpu/softfloat-parts.c.inc | ||
144 | +++ b/fpu/softfloat-parts.c.inc | ||
145 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | ||
146 | bool b_sign = b->sign ^ subtract; | ||
147 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
148 | |||
149 | + /* | ||
150 | + * For addition and subtraction, we will consume an | ||
151 | + * input denormal unless the other input is a NaN. | ||
152 | + */ | ||
153 | + if ((ab_mask & (float_cmask_denormal | float_cmask_anynan)) == | ||
154 | + float_cmask_denormal) { | ||
155 | + float_raise(float_flag_input_denormal_used, s); | ||
156 | + } | ||
157 | + | ||
158 | if (a->sign != b_sign) { | ||
159 | /* Subtraction */ | ||
160 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
161 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, | ||
162 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
163 | FloatPartsW tmp; | ||
164 | |||
165 | + if (ab_mask & float_cmask_denormal) { | ||
166 | + float_raise(float_flag_input_denormal_used, s); | ||
167 | + } | ||
168 | + | ||
169 | frac_mulw(&tmp, a, b); | ||
170 | frac_truncjam(a, &tmp); | ||
171 | |||
172 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, | ||
173 | } | ||
174 | |||
175 | /* Multiply by 0 or Inf */ | ||
176 | + if (ab_mask & float_cmask_denormal) { | ||
177 | + float_raise(float_flag_input_denormal_used, s); | ||
178 | + } | ||
179 | + | ||
180 | if (ab_mask & float_cmask_inf) { | ||
181 | a->cls = float_class_inf; | ||
182 | a->sign = sign; | ||
183 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, | ||
184 | if (flags & float_muladd_negate_result) { | ||
185 | a->sign ^= 1; | ||
186 | } | ||
187 | + | ||
188 | + /* | ||
189 | + * All result types except for "return the default NaN | ||
190 | + * because this is an Invalid Operation" go through here; | ||
191 | + * this matches the set of cases where we consumed a | ||
192 | + * denormal input. | ||
193 | + */ | ||
194 | + if (abc_mask & float_cmask_denormal) { | ||
195 | + float_raise(float_flag_input_denormal_used, s); | ||
196 | + } | ||
197 | return a; | ||
198 | |||
199 | return_sub_zero: | ||
200 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, | ||
201 | bool sign = a->sign ^ b->sign; | ||
202 | |||
203 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
204 | + if (ab_mask & float_cmask_denormal) { | ||
205 | + float_raise(float_flag_input_denormal_used, s); | ||
206 | + } | ||
207 | a->sign = sign; | ||
208 | a->exp -= b->exp + frac_div(a, b); | ||
209 | return a; | ||
210 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, | ||
211 | return parts_pick_nan(a, b, s); | ||
212 | } | ||
213 | |||
214 | + if ((ab_mask & float_cmask_denormal) && b->cls != float_class_zero) { | ||
215 | + float_raise(float_flag_input_denormal_used, s); | ||
216 | + } | ||
217 | + | ||
218 | a->sign = sign; | ||
219 | |||
220 | /* Inf / X */ | ||
221 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, | ||
222 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | ||
223 | |||
224 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
225 | + if (ab_mask & float_cmask_denormal) { | ||
226 | + float_raise(float_flag_input_denormal_used, s); | ||
227 | + } | ||
228 | frac_modrem(a, b, mod_quot); | ||
229 | return a; | ||
230 | } | ||
231 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, | ||
232 | return a; | ||
233 | } | ||
234 | |||
235 | + if (ab_mask & float_cmask_denormal) { | ||
236 | + float_raise(float_flag_input_denormal_used, s); | ||
237 | + } | ||
238 | + | ||
239 | /* N % Inf; 0 % N */ | ||
240 | g_assert(b->cls == float_class_inf || a->cls == float_class_zero); | ||
241 | return a; | ||
242 | @@ -XXX,XX +XXX,XX @@ static void partsN(sqrt)(FloatPartsN *a, float_status *status, | ||
243 | if (unlikely(a->cls != float_class_normal)) { | ||
244 | switch (a->cls) { | ||
245 | case float_class_denormal: | ||
246 | + if (!a->sign) { | ||
247 | + /* -ve denormal will be InvalidOperation */ | ||
248 | + float_raise(float_flag_input_denormal_used, status); | ||
249 | + } | ||
250 | break; | ||
251 | case float_class_snan: | ||
252 | case float_class_qnan: | ||
253 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
254 | if ((flags & (minmax_isnum | minmax_isnumber)) | ||
255 | && !(ab_mask & float_cmask_snan) | ||
256 | && (ab_mask & ~float_cmask_qnan)) { | ||
257 | + if (ab_mask & float_cmask_denormal) { | ||
258 | + float_raise(float_flag_input_denormal_used, s); | ||
259 | + } | ||
260 | return is_nan(a->cls) ? b : a; | ||
261 | } | ||
262 | |||
263 | @@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | ||
264 | return parts_pick_nan(a, b, s); | ||
265 | } | ||
266 | |||
267 | + if (ab_mask & float_cmask_denormal) { | ||
268 | + float_raise(float_flag_input_denormal_used, s); | ||
269 | + } | ||
270 | + | ||
271 | a_exp = a->exp; | ||
272 | b_exp = b->exp; | ||
273 | |||
274 | @@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, | ||
275 | if (likely(cmask_is_only_normals(ab_mask))) { | ||
276 | FloatRelation cmp; | ||
277 | |||
278 | + if (ab_mask & float_cmask_denormal) { | ||
279 | + float_raise(float_flag_input_denormal_used, s); | ||
280 | + } | ||
281 | + | ||
282 | if (a->sign != b->sign) { | ||
283 | goto a_sign; | ||
284 | } | ||
285 | @@ -XXX,XX +XXX,XX @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, | ||
286 | return float_relation_unordered; | ||
287 | } | ||
288 | |||
289 | + if (ab_mask & float_cmask_denormal) { | ||
290 | + float_raise(float_flag_input_denormal_used, s); | ||
291 | + } | ||
292 | + | ||
293 | if (ab_mask & float_cmask_zero) { | ||
294 | if (ab_mask == float_cmask_zero) { | ||
295 | return float_relation_equal; | ||
296 | @@ -XXX,XX +XXX,XX @@ static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) | ||
297 | case float_class_zero: | ||
298 | case float_class_inf: | ||
299 | break; | ||
300 | - case float_class_normal: | ||
301 | case float_class_denormal: | ||
302 | + float_raise(float_flag_input_denormal_used, s); | ||
303 | + /* fall through */ | ||
304 | + case float_class_normal: | ||
305 | a->exp += MIN(MAX(n, -0x10000), 0x10000); | ||
306 | break; | ||
307 | default: | ||
308 | @@ -XXX,XX +XXX,XX @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) | ||
309 | if (unlikely(a->cls != float_class_normal)) { | ||
310 | switch (a->cls) { | ||
311 | case float_class_denormal: | ||
312 | + if (!a->sign) { | ||
313 | + /* -ve denormal will be InvalidOperation */ | ||
314 | + float_raise(float_flag_input_denormal_used, s); | ||
315 | + } | ||
316 | break; | ||
317 | case float_class_snan: | ||
318 | case float_class_qnan: | ||
319 | -- | ||
320 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | Currently we handle flushing of output denormals in uncanon_normal | |
2 | always before we deal with rounding. This works for architectures | ||
3 | that detect tininess before rounding, but is usually not the right | ||
4 | place when the architecture detects tininess after rounding. For | ||
5 | example, for x86 the SDM states that the MXCSR FTZ control bit causes | ||
6 | outputs to be flushed to zero "when it detects a floating-point | ||
7 | underflow condition". This means that we mustn't flush to zero if | ||
8 | the input is such that after rounding it is no longer tiny. | ||
9 | |||
10 | At least one of our guest architectures does underflow detection | ||
11 | after rounding but flushing of denormals before rounding (MIPS MSA); | ||
12 | this means we need to have a config knob for this that is separate | ||
13 | from our existing tininess_before_rounding setting. | ||
14 | |||
15 | Add an ftz_detection flag. For consistency with | ||
16 | tininess_before_rounding, we make it default to "detect ftz after | ||
17 | rounding"; this means that we need to explicitly set the flag to | ||
18 | "detect ftz before rounding" on every existing architecture that sets | ||
19 | flush_to_zero, so that this commit has no behaviour change. | ||
20 | (This means more code change here but for the long term a less | ||
21 | confusing API.) | ||
22 | |||
23 | For several architectures the current behaviour is either | ||
24 | definitely or possibly wrong; annotate those with TODO comments. | ||
25 | These architectures are definitely wrong (and should detect | ||
26 | ftz after rounding): | ||
27 | * x86 | ||
28 | * Alpha | ||
29 | |||
30 | For these architectures the spec is unclear: | ||
31 | * MIPS (for non-MSA) | ||
32 | * RX | ||
33 | * SH4 | ||
34 | |||
35 | PA-RISC makes ftz detection IMPDEF, but we aren't setting the | ||
36 | "tininess before rounding" setting that we ought to. | ||
37 | |||
38 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
39 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
40 | --- | ||
41 | include/fpu/softfloat-helpers.h | 11 +++++++++++ | ||
42 | include/fpu/softfloat-types.h | 18 ++++++++++++++++++ | ||
43 | target/mips/fpu_helper.h | 6 ++++++ | ||
44 | target/alpha/cpu.c | 7 +++++++ | ||
45 | target/arm/cpu.c | 1 + | ||
46 | target/hppa/fpu_helper.c | 11 +++++++++++ | ||
47 | target/i386/tcg/fpu_helper.c | 8 ++++++++ | ||
48 | target/mips/msa.c | 9 +++++++++ | ||
49 | target/ppc/cpu_init.c | 3 +++ | ||
50 | target/rx/cpu.c | 8 ++++++++ | ||
51 | target/sh4/cpu.c | 8 ++++++++ | ||
52 | target/tricore/helper.c | 1 + | ||
53 | tests/fp/fp-bench.c | 1 + | ||
54 | fpu/softfloat-parts.c.inc | 21 +++++++++++++++------ | ||
55 | 14 files changed, 107 insertions(+), 6 deletions(-) | ||
56 | |||
57 | diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/include/fpu/softfloat-helpers.h | ||
60 | +++ b/include/fpu/softfloat-helpers.h | ||
61 | @@ -XXX,XX +XXX,XX @@ static inline void set_flush_inputs_to_zero(bool val, float_status *status) | ||
62 | status->flush_inputs_to_zero = val; | ||
63 | } | ||
64 | |||
65 | +static inline void set_float_ftz_detection(FloatFTZDetection d, | ||
66 | + float_status *status) | ||
67 | +{ | ||
68 | + status->ftz_detection = d; | ||
69 | +} | ||
70 | + | ||
71 | static inline void set_default_nan_mode(bool val, float_status *status) | ||
72 | { | ||
73 | status->default_nan_mode = val; | ||
74 | @@ -XXX,XX +XXX,XX @@ static inline bool get_default_nan_mode(const float_status *status) | ||
75 | return status->default_nan_mode; | ||
76 | } | ||
77 | |||
78 | +static inline FloatFTZDetection get_float_ftz_detection(const float_status *status) | ||
79 | +{ | ||
80 | + return status->ftz_detection; | ||
81 | +} | ||
82 | + | ||
83 | #endif /* SOFTFLOAT_HELPERS_H */ | ||
84 | diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h | ||
85 | index XXXXXXX..XXXXXXX 100644 | ||
86 | --- a/include/fpu/softfloat-types.h | ||
87 | +++ b/include/fpu/softfloat-types.h | ||
88 | @@ -XXX,XX +XXX,XX @@ typedef enum __attribute__((__packed__)) { | ||
89 | float_infzeronan_suppress_invalid = (1 << 7), | ||
90 | } FloatInfZeroNaNRule; | ||
91 | |||
92 | +/* | ||
93 | + * When flush_to_zero is set, should we detect denormal results to | ||
94 | + * be flushed before or after rounding? For most architectures this | ||
95 | + * should be set to match the tininess_before_rounding setting, | ||
96 | + * but a few architectures, e.g. MIPS MSA, detect FTZ before | ||
97 | + * rounding but tininess after rounding. | ||
98 | + * | ||
99 | + * This enum is arranged so that the default if the target doesn't | ||
100 | + * configure it matches the default for tininess_before_rounding | ||
101 | + * (i.e. "after rounding"). | ||
102 | + */ | ||
103 | +typedef enum __attribute__((__packed__)) { | ||
104 | + float_ftz_after_rounding = 0, | ||
105 | + float_ftz_before_rounding = 1, | ||
106 | +} FloatFTZDetection; | ||
107 | + | ||
108 | /* | ||
109 | * Floating Point Status. Individual architectures may maintain | ||
110 | * several versions of float_status for different functions. The | ||
111 | @@ -XXX,XX +XXX,XX @@ typedef struct float_status { | ||
112 | bool tininess_before_rounding; | ||
113 | /* should denormalised results go to zero and set output_denormal_flushed? */ | ||
114 | bool flush_to_zero; | ||
115 | + /* do we detect and flush denormal results before or after rounding? */ | ||
116 | + FloatFTZDetection ftz_detection; | ||
117 | /* should denormalised inputs go to zero and set input_denormal_flushed? */ | ||
118 | bool flush_inputs_to_zero; | ||
119 | bool default_nan_mode; | ||
120 | diff --git a/target/mips/fpu_helper.h b/target/mips/fpu_helper.h | ||
121 | index XXXXXXX..XXXXXXX 100644 | ||
122 | --- a/target/mips/fpu_helper.h | ||
123 | +++ b/target/mips/fpu_helper.h | ||
124 | @@ -XXX,XX +XXX,XX @@ static inline void fp_reset(CPUMIPSState *env) | ||
125 | */ | ||
126 | set_float_2nan_prop_rule(float_2nan_prop_s_ab, | ||
127 | &env->active_fpu.fp_status); | ||
128 | + /* | ||
129 | + * TODO: the spec does't say clearly whether FTZ happens before | ||
130 | + * or after rounding for normal FPU operations. | ||
131 | + */ | ||
132 | + set_float_ftz_detection(float_ftz_before_rounding, | ||
133 | + &env->active_fpu.fp_status); | ||
134 | } | ||
135 | |||
136 | /* MSA */ | ||
137 | diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c | ||
138 | index XXXXXXX..XXXXXXX 100644 | ||
139 | --- a/target/alpha/cpu.c | ||
140 | +++ b/target/alpha/cpu.c | ||
141 | @@ -XXX,XX +XXX,XX @@ static void alpha_cpu_initfn(Object *obj) | ||
142 | set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); | ||
143 | /* Default NaN: sign bit clear, msb frac bit set */ | ||
144 | set_float_default_nan_pattern(0b01000000, &env->fp_status); | ||
145 | + /* | ||
146 | + * TODO: this is incorrect. The Alpha Architecture Handbook version 4 | ||
147 | + * section 4.7.7.11 says that we flush to zero for underflow cases, so | ||
148 | + * this should be float_ftz_after_rounding to match the | ||
149 | + * tininess_after_rounding (which is specified in section 4.7.5). | ||
150 | + */ | ||
151 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
152 | #if defined(CONFIG_USER_ONLY) | ||
153 | env->flags = ENV_FLAG_PS_USER | ENV_FLAG_FEN; | ||
154 | cpu_alpha_store_fpcr(env, (uint64_t)(FPCR_INVD | FPCR_DZED | FPCR_OVFD | ||
155 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
156 | index XXXXXXX..XXXXXXX 100644 | ||
157 | --- a/target/arm/cpu.c | ||
158 | +++ b/target/arm/cpu.c | ||
159 | @@ -XXX,XX +XXX,XX @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, | ||
160 | static void arm_set_default_fp_behaviours(float_status *s) | ||
161 | { | ||
162 | set_float_detect_tininess(float_tininess_before_rounding, s); | ||
163 | + set_float_ftz_detection(float_ftz_before_rounding, s); | ||
164 | set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); | ||
165 | set_float_3nan_prop_rule(float_3nan_prop_s_cab, s); | ||
166 | set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s); | ||
167 | diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c | ||
168 | index XXXXXXX..XXXXXXX 100644 | ||
169 | --- a/target/hppa/fpu_helper.c | ||
170 | +++ b/target/hppa/fpu_helper.c | ||
171 | @@ -XXX,XX +XXX,XX @@ void HELPER(loaded_fr0)(CPUHPPAState *env) | ||
172 | set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status); | ||
173 | /* Default NaN: sign bit clear, msb-1 frac bit set */ | ||
174 | set_float_default_nan_pattern(0b00100000, &env->fp_status); | ||
175 | + /* | ||
176 | + * "PA-RISC 2.0 Architecture" says it is IMPDEF whether the flushing | ||
177 | + * enabled by FPSR.D happens before or after rounding. We pick "before" | ||
178 | + * for consistency with tininess detection. | ||
179 | + */ | ||
180 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
181 | + /* | ||
182 | + * TODO: "PA-RISC 2.0 Architecture" chapter 10 says that we should | ||
183 | + * detect tininess before rounding, but we don't set that here so we | ||
184 | + * get the default tininess after rounding. | ||
185 | + */ | ||
186 | } | ||
187 | |||
188 | void cpu_hppa_loaded_fr0(CPUHPPAState *env) | ||
189 | diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c | ||
190 | index XXXXXXX..XXXXXXX 100644 | ||
191 | --- a/target/i386/tcg/fpu_helper.c | ||
192 | +++ b/target/i386/tcg/fpu_helper.c | ||
193 | @@ -XXX,XX +XXX,XX @@ void cpu_init_fp_statuses(CPUX86State *env) | ||
194 | set_float_default_nan_pattern(0b11000000, &env->fp_status); | ||
195 | set_float_default_nan_pattern(0b11000000, &env->mmx_status); | ||
196 | set_float_default_nan_pattern(0b11000000, &env->sse_status); | ||
197 | + /* | ||
198 | + * TODO: x86 does flush-to-zero detection after rounding (the SDM | ||
199 | + * section 10.2.3.3 on the FTZ bit of MXCSR says that we flush | ||
200 | + * when we detect underflow, which x86 does after rounding). | ||
201 | + */ | ||
202 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
203 | + set_float_ftz_detection(float_ftz_before_rounding, &env->mmx_status); | ||
204 | + set_float_ftz_detection(float_ftz_before_rounding, &env->sse_status); | ||
205 | } | ||
206 | |||
207 | static inline uint8_t save_exception_flags(CPUX86State *env) | ||
208 | diff --git a/target/mips/msa.c b/target/mips/msa.c | ||
209 | index XXXXXXX..XXXXXXX 100644 | ||
210 | --- a/target/mips/msa.c | ||
211 | +++ b/target/mips/msa.c | ||
212 | @@ -XXX,XX +XXX,XX @@ void msa_reset(CPUMIPSState *env) | ||
213 | /* tininess detected after rounding.*/ | ||
214 | set_float_detect_tininess(float_tininess_after_rounding, | ||
215 | &env->active_tc.msa_fp_status); | ||
216 | + /* | ||
217 | + * MSACSR.FS detects tiny results to flush to zero before rounding | ||
218 | + * (per "MIPS Architecture for Programmers Volume IV-j: The MIPS64 SIMD | ||
219 | + * Architecture Module, Revision 1.1" section 3.5.4), even though it | ||
220 | + * detects tininess after rounding for underflow purposes (section 3.4.2 | ||
221 | + * table 3.3). | ||
222 | + */ | ||
223 | + set_float_ftz_detection(float_ftz_before_rounding, | ||
224 | + &env->active_tc.msa_fp_status); | ||
225 | |||
226 | /* | ||
227 | * According to MIPS specifications, if one of the two operands is | ||
228 | diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c | ||
229 | index XXXXXXX..XXXXXXX 100644 | ||
230 | --- a/target/ppc/cpu_init.c | ||
231 | +++ b/target/ppc/cpu_init.c | ||
232 | @@ -XXX,XX +XXX,XX @@ static void ppc_cpu_reset_hold(Object *obj, ResetType type) | ||
233 | /* tininess for underflow is detected before rounding */ | ||
234 | set_float_detect_tininess(float_tininess_before_rounding, | ||
235 | &env->fp_status); | ||
236 | + /* Similarly for flush-to-zero */ | ||
237 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
238 | + | ||
239 | /* | ||
240 | * PowerPC propagation rules: | ||
241 | * 1. A if it sNaN or qNaN | ||
242 | diff --git a/target/rx/cpu.c b/target/rx/cpu.c | ||
243 | index XXXXXXX..XXXXXXX 100644 | ||
244 | --- a/target/rx/cpu.c | ||
245 | +++ b/target/rx/cpu.c | ||
246 | @@ -XXX,XX +XXX,XX @@ static void rx_cpu_reset_hold(Object *obj, ResetType type) | ||
247 | set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); | ||
248 | /* Default NaN value: sign bit clear, set frac msb */ | ||
249 | set_float_default_nan_pattern(0b01000000, &env->fp_status); | ||
250 | + /* | ||
251 | + * TODO: "RX Family RXv1 Instruction Set Architecture" is not 100% clear | ||
252 | + * on whether flush-to-zero should happen before or after rounding, but | ||
253 | + * section 1.3.2 says that it happens when underflow is detected, and | ||
254 | + * implies that underflow is detected after rounding. So this may not | ||
255 | + * be the correct setting. | ||
256 | + */ | ||
257 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
258 | } | ||
259 | |||
260 | static ObjectClass *rx_cpu_class_by_name(const char *cpu_model) | ||
261 | diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c | ||
262 | index XXXXXXX..XXXXXXX 100644 | ||
263 | --- a/target/sh4/cpu.c | ||
264 | +++ b/target/sh4/cpu.c | ||
265 | @@ -XXX,XX +XXX,XX @@ static void superh_cpu_reset_hold(Object *obj, ResetType type) | ||
266 | set_default_nan_mode(1, &env->fp_status); | ||
267 | /* sign bit clear, set all frac bits other than msb */ | ||
268 | set_float_default_nan_pattern(0b00111111, &env->fp_status); | ||
269 | + /* | ||
270 | + * TODO: "SH-4 CPU Core Architecture ADCS 7182230F" doesn't say whether | ||
271 | + * it detects tininess before or after rounding. Section 6.4 is clear | ||
272 | + * that flush-to-zero happens when the result underflows, though, so | ||
273 | + * either this should be "detect ftz after rounding" or else we should | ||
274 | + * be setting "detect tininess before rounding". | ||
275 | + */ | ||
276 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
277 | } | ||
278 | |||
279 | static void superh_cpu_disas_set_info(CPUState *cpu, disassemble_info *info) | ||
280 | diff --git a/target/tricore/helper.c b/target/tricore/helper.c | ||
281 | index XXXXXXX..XXXXXXX 100644 | ||
282 | --- a/target/tricore/helper.c | ||
283 | +++ b/target/tricore/helper.c | ||
284 | @@ -XXX,XX +XXX,XX @@ void fpu_set_state(CPUTriCoreState *env) | ||
285 | set_flush_inputs_to_zero(1, &env->fp_status); | ||
286 | set_flush_to_zero(1, &env->fp_status); | ||
287 | set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status); | ||
288 | + set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); | ||
289 | set_default_nan_mode(1, &env->fp_status); | ||
290 | /* Default NaN pattern: sign bit clear, frac msb set */ | ||
291 | set_float_default_nan_pattern(0b01000000, &env->fp_status); | ||
292 | diff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c | ||
293 | index XXXXXXX..XXXXXXX 100644 | ||
294 | --- a/tests/fp/fp-bench.c | ||
295 | +++ b/tests/fp/fp-bench.c | ||
296 | @@ -XXX,XX +XXX,XX @@ static void run_bench(void) | ||
297 | set_float_3nan_prop_rule(float_3nan_prop_s_cab, &soft_status); | ||
298 | set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, &soft_status); | ||
299 | set_float_default_nan_pattern(0b01000000, &soft_status); | ||
300 | + set_float_ftz_detection(float_ftz_before_rounding, &soft_status); | ||
301 | |||
302 | f = bench_funcs[operation][precision]; | ||
303 | g_assert(f); | ||
304 | diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc | ||
305 | index XXXXXXX..XXXXXXX 100644 | ||
306 | --- a/fpu/softfloat-parts.c.inc | ||
307 | +++ b/fpu/softfloat-parts.c.inc | ||
308 | @@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, | ||
309 | p->frac_lo &= ~round_mask; | ||
310 | } | ||
311 | frac_shr(p, frac_shift); | ||
312 | - } else if (s->flush_to_zero) { | ||
313 | + } else if (s->flush_to_zero && | ||
314 | + s->ftz_detection == float_ftz_before_rounding) { | ||
315 | flags |= float_flag_output_denormal_flushed; | ||
316 | p->cls = float_class_zero; | ||
317 | exp = 0; | ||
318 | @@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, | ||
319 | exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal; | ||
320 | frac_shr(p, frac_shift); | ||
321 | |||
322 | - if (is_tiny && (flags & float_flag_inexact)) { | ||
323 | - flags |= float_flag_underflow; | ||
324 | - } | ||
325 | - if (exp == 0 && frac_eqz(p)) { | ||
326 | - p->cls = float_class_zero; | ||
327 | + if (is_tiny) { | ||
328 | + if (s->flush_to_zero) { | ||
329 | + assert(s->ftz_detection == float_ftz_after_rounding); | ||
330 | + flags |= float_flag_output_denormal_flushed; | ||
331 | + p->cls = float_class_zero; | ||
332 | + exp = 0; | ||
333 | + frac_clear(p); | ||
334 | + } else if (flags & float_flag_inexact) { | ||
335 | + flags |= float_flag_underflow; | ||
336 | + } | ||
337 | + if (exp == 0 && frac_eqz(p)) { | ||
338 | + p->cls = float_class_zero; | ||
339 | + } | ||
340 | } | ||
341 | } | ||
342 | p->exp = exp; | ||
343 | -- | ||
344 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The Armv8.7 FEAT_AFP feature defines three new control bits in | ||
2 | the FPCR: | ||
3 | * FPCR.AH: "alternate floating point mode"; this changes floating | ||
4 | point behaviour in a variety of ways, including: | ||
5 | - the sign of a default NaN is 1, not 0 | ||
6 | - if FPCR.FZ is also 1, denormals detected after rounding | ||
7 | with an unbounded exponent has been applied are flushed to zero | ||
8 | - FPCR.FZ does not cause denormalized inputs to be flushed to zero | ||
9 | - miscellaneous other corner-case behaviour changes | ||
10 | * FPCR.FIZ: flush denormalized numbers to zero on input for | ||
11 | most instructions | ||
12 | * FPCR.NEP: makes scalar SIMD operations merge the result with | ||
13 | higher vector elements in one of the source registers, instead | ||
14 | of zeroing the higher elements of the destination | ||
1 | 15 | ||
16 | This commit defines the new bits in the FPCR, and allows them to be | ||
17 | read or written when FEAT_AFP is implemented. Actual behaviour | ||
18 | changes will be implemented in subsequent commits. | ||
19 | |||
20 | Note that these are the first FPCR bits which don't appear in the | ||
21 | AArch32 FPSCR view of the register, and which share bit positions | ||
22 | with FPSR bits. | ||
23 | |||
24 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
25 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
26 | --- | ||
27 | target/arm/cpu-features.h | 5 +++++ | ||
28 | target/arm/cpu.h | 3 +++ | ||
29 | target/arm/vfp_helper.c | 11 ++++++++--- | ||
30 | 3 files changed, 16 insertions(+), 3 deletions(-) | ||
31 | |||
32 | diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/target/arm/cpu-features.h | ||
35 | +++ b/target/arm/cpu-features.h | ||
36 | @@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_hcx(const ARMISARegisters *id) | ||
37 | return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HCX) != 0; | ||
38 | } | ||
39 | |||
40 | +static inline bool isar_feature_aa64_afp(const ARMISARegisters *id) | ||
41 | +{ | ||
42 | + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, AFP) != 0; | ||
43 | +} | ||
44 | + | ||
45 | static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id) | ||
46 | { | ||
47 | return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0; | ||
48 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/arm/cpu.h | ||
51 | +++ b/target/arm/cpu.h | ||
52 | @@ -XXX,XX +XXX,XX @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val); | ||
53 | */ | ||
54 | |||
55 | /* FPCR bits */ | ||
56 | +#define FPCR_FIZ (1 << 0) /* Flush Inputs to Zero (FEAT_AFP) */ | ||
57 | +#define FPCR_AH (1 << 1) /* Alternate Handling (FEAT_AFP) */ | ||
58 | +#define FPCR_NEP (1 << 2) /* SIMD scalar ops preserve elts (FEAT_AFP) */ | ||
59 | #define FPCR_IOE (1 << 8) /* Invalid Operation exception trap enable */ | ||
60 | #define FPCR_DZE (1 << 9) /* Divide by Zero exception trap enable */ | ||
61 | #define FPCR_OFE (1 << 10) /* Overflow exception trap enable */ | ||
62 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/arm/vfp_helper.c | ||
65 | +++ b/target/arm/vfp_helper.c | ||
66 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask) | ||
67 | if (!cpu_isar_feature(any_fp16, cpu)) { | ||
68 | val &= ~FPCR_FZ16; | ||
69 | } | ||
70 | + if (!cpu_isar_feature(aa64_afp, cpu)) { | ||
71 | + val &= ~(FPCR_FIZ | FPCR_AH | FPCR_NEP); | ||
72 | + } | ||
73 | |||
74 | if (!cpu_isar_feature(aa64_ebf16, cpu)) { | ||
75 | val &= ~FPCR_EBF; | ||
76 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask) | ||
77 | * We don't implement trapped exception handling, so the | ||
78 | * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!) | ||
79 | * | ||
80 | - * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF | ||
81 | - * and FZ16. Len, Stride and LTPSIZE we just handled. Store those bits | ||
82 | + * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF, FZ16, | ||
83 | + * FIZ, AH, and NEP. | ||
84 | + * Len, Stride and LTPSIZE we just handled. Store those bits | ||
85 | * there, and zero any of the other FPCR bits and the RES0 and RAZ/WI | ||
86 | * bits. | ||
87 | */ | ||
88 | - val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 | FPCR_EBF; | ||
89 | + val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 | | ||
90 | + FPCR_EBF | FPCR_FIZ | FPCR_AH | FPCR_NEP; | ||
91 | env->vfp.fpcr &= ~mask; | ||
92 | env->vfp.fpcr |= val; | ||
93 | } | ||
94 | -- | ||
95 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Part of FEAT_AFP is the new control bit FPCR.FIZ. This bit affects | ||
2 | flushing of single and double precision denormal inputs to zero for | ||
3 | AArch64 floating point instructions. (For half-precision, the | ||
4 | existing FPCR.FZ16 control remains the only one.) | ||
1 | 5 | ||
6 | FPCR.FIZ differs from FPCR.FZ in that if we flush an input denormal | ||
7 | only because of FPCR.FIZ then we should *not* set the cumulative | ||
8 | exception bit FPSR.IDC. | ||
9 | |||
10 | FEAT_AFP also defines that in AArch64 the existing FPCR.FZ only | ||
11 | applies when FPCR.AH is 0. | ||
12 | |||
13 | We can implement this by setting the "flush inputs to zero" state | ||
14 | appropriately when FPCR is written, and by not reflecting the | ||
15 | float_flag_input_denormal status flag into FPSR reads when it is the | ||
16 | result only of FPSR.FIZ. | ||
17 | |||
18 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
19 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
20 | --- | ||
21 | target/arm/vfp_helper.c | 60 ++++++++++++++++++++++++++++++++++------- | ||
22 | 1 file changed, 50 insertions(+), 10 deletions(-) | ||
23 | |||
24 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/target/arm/vfp_helper.c | ||
27 | +++ b/target/arm/vfp_helper.c | ||
28 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits) | ||
29 | |||
30 | static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
31 | { | ||
32 | - uint32_t i = 0; | ||
33 | + uint32_t a32_flags = 0, a64_flags = 0; | ||
34 | |||
35 | - i |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
36 | - i |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
37 | - i |= get_float_exception_flags(&env->vfp.standard_fp_status); | ||
38 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
39 | + a32_flags |= get_float_exception_flags(&env->vfp.standard_fp_status); | ||
40 | /* FZ16 does not generate an input denormal exception. */ | ||
41 | - i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
42 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
43 | & ~float_flag_input_denormal_flushed); | ||
44 | - i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
45 | + a32_flags |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) | ||
46 | & ~float_flag_input_denormal_flushed); | ||
47 | - i |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) | ||
48 | + | ||
49 | + a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
50 | + a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
51 | & ~float_flag_input_denormal_flushed); | ||
52 | - return vfp_exceptbits_from_host(i); | ||
53 | + /* | ||
54 | + * Flushing an input denormal *only* because FPCR.FIZ == 1 does | ||
55 | + * not set FPSR.IDC; if FPCR.FZ is also set then this takes | ||
56 | + * precedence and IDC is set (see the FPUnpackBase pseudocode). | ||
57 | + * So squash it unless (FPCR.AH == 0 && FPCR.FZ == 1). | ||
58 | + * We only do this for the a64 flags because FIZ has no effect | ||
59 | + * on AArch32 even if it is set. | ||
60 | + */ | ||
61 | + if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) { | ||
62 | + a64_flags &= ~float_flag_input_denormal_flushed; | ||
63 | + } | ||
64 | + return vfp_exceptbits_from_host(a32_flags | a64_flags); | ||
65 | } | ||
66 | |||
67 | static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
68 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
69 | set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); | ||
70 | } | ||
71 | |||
72 | +static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) | ||
73 | +{ | ||
74 | + /* | ||
75 | + * Synchronize any pending exception-flag information in the | ||
76 | + * float_status values into env->vfp.fpsr, and then clear out | ||
77 | + * the float_status data. | ||
78 | + */ | ||
79 | + env->vfp.fpsr |= vfp_get_fpsr_from_host(env); | ||
80 | + vfp_clear_float_status_exc_flags(env); | ||
81 | +} | ||
82 | + | ||
83 | static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
84 | { | ||
85 | uint64_t changed = env->vfp.fpcr; | ||
86 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
87 | if (changed & FPCR_FZ) { | ||
88 | bool ftz_enabled = val & FPCR_FZ; | ||
89 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
90 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
91 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64); | ||
92 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a64); | ||
93 | + /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ | ||
94 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
95 | + } | ||
96 | + if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { | ||
97 | + /* | ||
98 | + * A64: Flush denormalized inputs to zero if FPCR.FIZ = 1, or | ||
99 | + * both FPCR.AH = 0 and FPCR.FZ = 1. | ||
100 | + */ | ||
101 | + bool fitz_enabled = (val & FPCR_FIZ) || | ||
102 | + (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ; | ||
103 | + set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status_a64); | ||
104 | } | ||
105 | if (changed & FPCR_DN) { | ||
106 | bool dnan_enabled = val & FPCR_DN; | ||
107 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
108 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
109 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
110 | } | ||
111 | + /* | ||
112 | + * If any bits changed that we look at in vfp_get_fpsr_from_host(), | ||
113 | + * we must sync the float_status flags into vfp.fpsr now (under the | ||
114 | + * old regime) before we update vfp.fpcr. | ||
115 | + */ | ||
116 | + if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { | ||
117 | + vfp_sync_and_clear_float_status_exc_flags(env); | ||
118 | + } | ||
119 | } | ||
120 | |||
121 | #else | ||
122 | -- | ||
123 | 2.34.1 | diff view generated by jsdifflib |
1 | In an SMP system it can be unclear which CPU is taking an exception; | 1 | When FPCR.AH is set, various behaviours of AArch64 floating point |
---|---|---|---|
2 | add the CPU index (which is the same value used in the TCG 'Trace | 2 | operations which are controlled by softfloat config settings change: |
3 | %d:' logging) to the "Taking exception" log line to clarify it. | 3 | * tininess and ftz detection before/after rounding |
4 | * NaN propagation order | ||
5 | * result of 0 * Inf + NaN | ||
6 | * default NaN value | ||
7 | |||
8 | When the guest changes the value of the AH bit, switch these config | ||
9 | settings on the fp_status_a64 and fp_status_f16_a64 float_status | ||
10 | fields. | ||
11 | |||
12 | This requires us to make the arm_set_default_fp_behaviours() function | ||
13 | global, since we now need to call it from cpu.c and vfp_helper.c; we | ||
14 | move it to vfp_helper.c so it can be next to the new | ||
15 | arm_set_ah_fp_behaviours(). | ||
4 | 16 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 17 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
6 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 18 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
8 | Message-id: 20220122182444.724087-2-peter.maydell@linaro.org | ||
9 | --- | 19 | --- |
10 | target/arm/internals.h | 2 +- | 20 | target/arm/internals.h | 4 +++ |
11 | target/arm/helper.c | 9 ++++++--- | 21 | target/arm/cpu.c | 23 ---------------- |
12 | target/arm/m_helper.c | 2 +- | 22 | target/arm/vfp_helper.c | 58 ++++++++++++++++++++++++++++++++++++++++- |
13 | 3 files changed, 8 insertions(+), 5 deletions(-) | 23 | 3 files changed, 61 insertions(+), 24 deletions(-) |
14 | 24 | ||
15 | diff --git a/target/arm/internals.h b/target/arm/internals.h | 25 | diff --git a/target/arm/internals.h b/target/arm/internals.h |
16 | index XXXXXXX..XXXXXXX 100644 | 26 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/arm/internals.h | 27 | --- a/target/arm/internals.h |
18 | +++ b/target/arm/internals.h | 28 | +++ b/target/arm/internals.h |
19 | @@ -XXX,XX +XXX,XX @@ bool get_phys_addr(CPUARMState *env, target_ulong address, | 29 | @@ -XXX,XX +XXX,XX @@ uint64_t gt_virt_cnt_offset(CPUARMState *env); |
20 | ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) | 30 | * all EL1" scope; this covers stage 1 and stage 2. |
21 | __attribute__((nonnull)); | 31 | */ |
22 | 32 | int alle1_tlbmask(CPUARMState *env); | |
23 | -void arm_log_exception(int idx); | 33 | + |
24 | +void arm_log_exception(CPUState *cs); | 34 | +/* Set the float_status behaviour to match the Arm defaults */ |
25 | 35 | +void arm_set_default_fp_behaviours(float_status *s); | |
26 | #endif /* !CONFIG_USER_ONLY */ | 36 | + |
27 | 37 | #endif | |
28 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 38 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
29 | index XXXXXXX..XXXXXXX 100644 | 39 | index XXXXXXX..XXXXXXX 100644 |
30 | --- a/target/arm/helper.c | 40 | --- a/target/arm/cpu.c |
31 | +++ b/target/arm/helper.c | 41 | +++ b/target/arm/cpu.c |
32 | @@ -XXX,XX +XXX,XX @@ uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx, | 42 | @@ -XXX,XX +XXX,XX @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, |
33 | return target_el; | 43 | QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node); |
34 | } | 44 | } |
35 | 45 | ||
36 | -void arm_log_exception(int idx) | 46 | -/* |
37 | +void arm_log_exception(CPUState *cs) | 47 | - * Set the float_status behaviour to match the Arm defaults: |
48 | - * * tininess-before-rounding | ||
49 | - * * 2-input NaN propagation prefers SNaN over QNaN, and then | ||
50 | - * operand A over operand B (see FPProcessNaNs() pseudocode) | ||
51 | - * * 3-input NaN propagation prefers SNaN over QNaN, and then | ||
52 | - * operand C over A over B (see FPProcessNaNs3() pseudocode, | ||
53 | - * but note that for QEMU muladd is a * b + c, whereas for | ||
54 | - * the pseudocode function the arguments are in the order c, a, b. | ||
55 | - * * 0 * Inf + NaN returns the default NaN if the input NaN is quiet, | ||
56 | - * and the input NaN if it is signalling | ||
57 | - * * Default NaN has sign bit clear, msb frac bit set | ||
58 | - */ | ||
59 | -static void arm_set_default_fp_behaviours(float_status *s) | ||
60 | -{ | ||
61 | - set_float_detect_tininess(float_tininess_before_rounding, s); | ||
62 | - set_float_ftz_detection(float_ftz_before_rounding, s); | ||
63 | - set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); | ||
64 | - set_float_3nan_prop_rule(float_3nan_prop_s_cab, s); | ||
65 | - set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s); | ||
66 | - set_float_default_nan_pattern(0b01000000, s); | ||
67 | -} | ||
68 | - | ||
69 | static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque) | ||
38 | { | 70 | { |
39 | + int idx = cs->exception_index; | 71 | /* Reset a single ARMCPRegInfo register */ |
72 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
73 | index XXXXXXX..XXXXXXX 100644 | ||
74 | --- a/target/arm/vfp_helper.c | ||
75 | +++ b/target/arm/vfp_helper.c | ||
76 | @@ -XXX,XX +XXX,XX @@ | ||
77 | #include "exec/helper-proto.h" | ||
78 | #include "internals.h" | ||
79 | #include "cpu-features.h" | ||
80 | +#include "fpu/softfloat.h" | ||
81 | #ifdef CONFIG_TCG | ||
82 | #include "qemu/log.h" | ||
83 | -#include "fpu/softfloat.h" | ||
84 | #endif | ||
85 | |||
86 | /* VFP support. We follow the convention used for VFP instructions: | ||
87 | Single precision routines have a "s" suffix, double precision a | ||
88 | "d" suffix. */ | ||
89 | |||
90 | +/* | ||
91 | + * Set the float_status behaviour to match the Arm defaults: | ||
92 | + * * tininess-before-rounding | ||
93 | + * * 2-input NaN propagation prefers SNaN over QNaN, and then | ||
94 | + * operand A over operand B (see FPProcessNaNs() pseudocode) | ||
95 | + * * 3-input NaN propagation prefers SNaN over QNaN, and then | ||
96 | + * operand C over A over B (see FPProcessNaNs3() pseudocode, | ||
97 | + * but note that for QEMU muladd is a * b + c, whereas for | ||
98 | + * the pseudocode function the arguments are in the order c, a, b. | ||
99 | + * * 0 * Inf + NaN returns the default NaN if the input NaN is quiet, | ||
100 | + * and the input NaN if it is signalling | ||
101 | + * * Default NaN has sign bit clear, msb frac bit set | ||
102 | + */ | ||
103 | +void arm_set_default_fp_behaviours(float_status *s) | ||
104 | +{ | ||
105 | + set_float_detect_tininess(float_tininess_before_rounding, s); | ||
106 | + set_float_ftz_detection(float_ftz_before_rounding, s); | ||
107 | + set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); | ||
108 | + set_float_3nan_prop_rule(float_3nan_prop_s_cab, s); | ||
109 | + set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s); | ||
110 | + set_float_default_nan_pattern(0b01000000, s); | ||
111 | +} | ||
40 | + | 112 | + |
41 | if (qemu_loglevel_mask(CPU_LOG_INT)) { | 113 | +/* |
42 | const char *exc = NULL; | 114 | + * Set the float_status behaviour to match the FEAT_AFP |
43 | static const char * const excnames[] = { | 115 | + * FPCR.AH=1 requirements: |
44 | @@ -XXX,XX +XXX,XX @@ void arm_log_exception(int idx) | 116 | + * * tininess-after-rounding |
45 | if (!exc) { | 117 | + * * 2-input NaN propagation prefers the first NaN |
46 | exc = "unknown"; | 118 | + * * 3-input NaN propagation prefers a over b over c |
47 | } | 119 | + * * 0 * Inf + NaN always returns the input NaN and doesn't |
48 | - qemu_log_mask(CPU_LOG_INT, "Taking exception %d [%s]\n", idx, exc); | 120 | + * set Invalid for a QNaN |
49 | + qemu_log_mask(CPU_LOG_INT, "Taking exception %d [%s] on CPU %d\n", | 121 | + * * default NaN has sign bit set, msb frac bit set |
50 | + idx, exc, cs->cpu_index); | 122 | + */ |
123 | +static void arm_set_ah_fp_behaviours(float_status *s) | ||
124 | +{ | ||
125 | + set_float_detect_tininess(float_tininess_after_rounding, s); | ||
126 | + set_float_ftz_detection(float_ftz_after_rounding, s); | ||
127 | + set_float_2nan_prop_rule(float_2nan_prop_ab, s); | ||
128 | + set_float_3nan_prop_rule(float_3nan_prop_abc, s); | ||
129 | + set_float_infzeronan_rule(float_infzeronan_dnan_never | | ||
130 | + float_infzeronan_suppress_invalid, s); | ||
131 | + set_float_default_nan_pattern(0b11000000, s); | ||
132 | +} | ||
133 | + | ||
134 | #ifdef CONFIG_TCG | ||
135 | |||
136 | /* Convert host exception flags to vfp form. */ | ||
137 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
138 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
139 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
51 | } | 140 | } |
52 | } | 141 | + if (changed & FPCR_AH) { |
53 | 142 | + bool ah_enabled = val & FPCR_AH; | |
54 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_do_interrupt(CPUState *cs) | 143 | + |
55 | 144 | + if (ah_enabled) { | |
56 | assert(!arm_feature(env, ARM_FEATURE_M)); | 145 | + /* Change behaviours for A64 FP operations */ |
57 | 146 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); | |
58 | - arm_log_exception(cs->exception_index); | 147 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status_f16_a64); |
59 | + arm_log_exception(cs); | 148 | + } else { |
60 | qemu_log_mask(CPU_LOG_INT, "...from EL%d to EL%d\n", arm_current_el(env), | 149 | + arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); |
61 | new_el); | 150 | + arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); |
62 | if (qemu_loglevel_mask(CPU_LOG_INT) | 151 | + } |
63 | diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c | 152 | + } |
64 | index XXXXXXX..XXXXXXX 100644 | ||
65 | --- a/target/arm/m_helper.c | ||
66 | +++ b/target/arm/m_helper.c | ||
67 | @@ -XXX,XX +XXX,XX @@ void arm_v7m_cpu_do_interrupt(CPUState *cs) | ||
68 | uint32_t lr; | ||
69 | bool ignore_stackfaults; | ||
70 | |||
71 | - arm_log_exception(cs->exception_index); | ||
72 | + arm_log_exception(cs); | ||
73 | |||
74 | /* | 153 | /* |
75 | * For exceptions we just mark as pending on the NVIC, and let that | 154 | * If any bits changed that we look at in vfp_get_fpsr_from_host(), |
155 | * we must sync the float_status flags into vfp.fpsr now (under the | ||
76 | -- | 156 | -- |
77 | 2.25.1 | 157 | 2.34.1 |
78 | |||
79 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | When FPCR.AH = 1, some of the cumulative exception flags in the FPSR | ||
2 | behave slightly differently for A64 operations: | ||
3 | * IDC is set when a denormal input is used without flushing | ||
4 | * IXC (Inexact) is set when an output denormal is flushed to zero | ||
1 | 5 | ||
6 | Update vfp_get_fpsr_from_host() to do this. | ||
7 | |||
8 | Note that because half-precision operations never set IDC, we now | ||
9 | need to add float_flag_input_denormal_used to the set we mask out of | ||
10 | fp_status_f16_a64. | ||
11 | |||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | --- | ||
15 | target/arm/vfp_helper.c | 17 ++++++++++++++--- | ||
16 | 1 file changed, 14 insertions(+), 3 deletions(-) | ||
17 | |||
18 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/arm/vfp_helper.c | ||
21 | +++ b/target/arm/vfp_helper.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static void arm_set_ah_fp_behaviours(float_status *s) | ||
23 | #ifdef CONFIG_TCG | ||
24 | |||
25 | /* Convert host exception flags to vfp form. */ | ||
26 | -static inline uint32_t vfp_exceptbits_from_host(int host_bits) | ||
27 | +static inline uint32_t vfp_exceptbits_from_host(int host_bits, bool ah) | ||
28 | { | ||
29 | uint32_t target_bits = 0; | ||
30 | |||
31 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits) | ||
32 | if (host_bits & float_flag_input_denormal_flushed) { | ||
33 | target_bits |= FPSR_IDC; | ||
34 | } | ||
35 | + /* | ||
36 | + * With FPCR.AH, IDC is set when an input denormal is used, | ||
37 | + * and flushing an output denormal to zero sets both IXC and UFC. | ||
38 | + */ | ||
39 | + if (ah && (host_bits & float_flag_input_denormal_used)) { | ||
40 | + target_bits |= FPSR_IDC; | ||
41 | + } | ||
42 | + if (ah && (host_bits & float_flag_output_denormal_flushed)) { | ||
43 | + target_bits |= FPSR_IXC; | ||
44 | + } | ||
45 | return target_bits; | ||
46 | } | ||
47 | |||
48 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
49 | |||
50 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
51 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
52 | - & ~float_flag_input_denormal_flushed); | ||
53 | + & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
54 | /* | ||
55 | * Flushing an input denormal *only* because FPCR.FIZ == 1 does | ||
56 | * not set FPSR.IDC; if FPCR.FZ is also set then this takes | ||
57 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
58 | if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) { | ||
59 | a64_flags &= ~float_flag_input_denormal_flushed; | ||
60 | } | ||
61 | - return vfp_exceptbits_from_host(a32_flags | a64_flags); | ||
62 | + return vfp_exceptbits_from_host(a64_flags, env->vfp.fpcr & FPCR_AH) | | ||
63 | + vfp_exceptbits_from_host(a32_flags, false); | ||
64 | } | ||
65 | |||
66 | static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
67 | -- | ||
68 | 2.34.1 | diff view generated by jsdifflib |
1 | The exception caused by an SVC instruction may be taken to AArch32 | 1 | We are going to need to generate different code in some cases when |
---|---|---|---|
2 | Hyp mode for two reasons: | 2 | FPCR.AH is 1. For example: |
3 | * HCR.TGE indicates that exceptions from EL0 should trap to EL2 | 3 | * Floating point neg and abs must not flip the sign bit of NaNs |
4 | * we were already in Hyp mode | 4 | * some insns (FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS, and various |
5 | BFCVT and BFM bfloat16 ops) need to use a different float_status | ||
6 | to the usual one | ||
5 | 7 | ||
6 | The entrypoint in the vector table to be used differs in these two | 8 | Encode FPCR.AH into the A64 tbflags, so we can refer to it at |
7 | cases: for an exception routed to Hyp mode from EL0, we enter at the | 9 | translate time. |
8 | common 0x14 "hyp trap" entrypoint. For SVC from Hyp mode to Hyp | ||
9 | mode, we enter at the 0x08 (svc/hvc trap) entrypoint. | ||
10 | In the v8A Arm ARM pseudocode this is done in AArch32.TakeSVCException. | ||
11 | 10 | ||
12 | QEMU incorrectly routed both of these exceptions to the 0x14 | 11 | Because we now have a bit in FPCR that affects codegen, we can't mark |
13 | entrypoint. Correct the entrypoint for SVC from Hyp to Hyp by making | 12 | the AArch64 FPCR register as being SUPPRESS_TB_END any more; writes |
14 | use of the existing logic which handles "normal entrypoint for | 13 | to it will now end the TB and trigger a regeneration of hflags. |
15 | Hyp-to-Hyp, otherwise 0x14" for traps like UNDEF and data/prefetch | ||
16 | aborts (reproduced here since it's outside the visible context | ||
17 | in the diff for this commit): | ||
18 | |||
19 | if (arm_current_el(env) != 2 && addr < 0x14) { | ||
20 | addr = 0x14; | ||
21 | } | ||
22 | 14 | ||
23 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
24 | Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com> | ||
25 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 16 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
26 | Message-id: 20220117131953.3936137-1-peter.maydell@linaro.org | ||
27 | --- | 17 | --- |
28 | target/arm/helper.c | 4 ++-- | 18 | target/arm/cpu.h | 1 + |
29 | 1 file changed, 2 insertions(+), 2 deletions(-) | 19 | target/arm/tcg/translate.h | 2 ++ |
20 | target/arm/helper.c | 2 +- | ||
21 | target/arm/tcg/hflags.c | 4 ++++ | ||
22 | target/arm/tcg/translate-a64.c | 1 + | ||
23 | 5 files changed, 9 insertions(+), 1 deletion(-) | ||
30 | 24 | ||
25 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/target/arm/cpu.h | ||
28 | +++ b/target/arm/cpu.h | ||
29 | @@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, NV2, 34, 1) | ||
30 | FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1) | ||
31 | /* Set if FEAT_NV2 RAM accesses are big-endian */ | ||
32 | FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1) | ||
33 | +FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */ | ||
34 | |||
35 | /* | ||
36 | * Helpers for using the above. Note that only the A64 accessors use | ||
37 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/translate.h | ||
40 | +++ b/target/arm/tcg/translate.h | ||
41 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { | ||
42 | bool nv2_mem_e20; | ||
43 | /* True if NV2 enabled and NV2 RAM accesses are big-endian */ | ||
44 | bool nv2_mem_be; | ||
45 | + /* True if FPCR.AH is 1 (alternate floating point handling) */ | ||
46 | + bool fpcr_ah; | ||
47 | /* | ||
48 | * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI. | ||
49 | * < 0, set by the current instruction. | ||
31 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 50 | diff --git a/target/arm/helper.c b/target/arm/helper.c |
32 | index XXXXXXX..XXXXXXX 100644 | 51 | index XXXXXXX..XXXXXXX 100644 |
33 | --- a/target/arm/helper.c | 52 | --- a/target/arm/helper.c |
34 | +++ b/target/arm/helper.c | 53 | +++ b/target/arm/helper.c |
35 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_do_interrupt_aarch32_hyp(CPUState *cs) | 54 | @@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo v8_cp_reginfo[] = { |
36 | * separately here. | 55 | .writefn = aa64_daif_write, .resetfn = arm_cp_reset_ignore }, |
37 | * | 56 | { .name = "FPCR", .state = ARM_CP_STATE_AA64, |
38 | * The vector table entry used is always the 0x14 Hyp mode entry point, | 57 | .opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 4, |
39 | - * unless this is an UNDEF/HVC/abort taken from Hyp to Hyp. | 58 | - .access = PL0_RW, .type = ARM_CP_FPU | ARM_CP_SUPPRESS_TB_END, |
40 | + * unless this is an UNDEF/SVC/HVC/abort taken from Hyp to Hyp. | 59 | + .access = PL0_RW, .type = ARM_CP_FPU, |
41 | * The offset applied to the preferred return address is always zero | 60 | .readfn = aa64_fpcr_read, .writefn = aa64_fpcr_write }, |
42 | * (see DDI0487C.a section G1.12.3). | 61 | { .name = "FPSR", .state = ARM_CP_STATE_AA64, |
43 | * PSTATE A/I/F masks are set based only on the SCR.EA/IRQ/FIQ values. | 62 | .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4, |
44 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_do_interrupt_aarch32_hyp(CPUState *cs) | 63 | diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c |
45 | addr = 0x04; | 64 | index XXXXXXX..XXXXXXX 100644 |
46 | break; | 65 | --- a/target/arm/tcg/hflags.c |
47 | case EXCP_SWI: | 66 | +++ b/target/arm/tcg/hflags.c |
48 | - addr = 0x14; | 67 | @@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, |
49 | + addr = 0x08; | 68 | DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx)); |
50 | break; | 69 | } |
51 | case EXCP_BKPT: | 70 | |
52 | /* Fall through to prefetch abort. */ | 71 | + if (env->vfp.fpcr & FPCR_AH) { |
72 | + DP_TBFLAG_A64(flags, AH, 1); | ||
73 | + } | ||
74 | + | ||
75 | return rebuild_hflags_common(env, fp_el, mmu_idx, flags); | ||
76 | } | ||
77 | |||
78 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
79 | index XXXXXXX..XXXXXXX 100644 | ||
80 | --- a/target/arm/tcg/translate-a64.c | ||
81 | +++ b/target/arm/tcg/translate-a64.c | ||
82 | @@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, | ||
83 | dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); | ||
84 | dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); | ||
85 | dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); | ||
86 | + dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); | ||
87 | dc->vec_len = 0; | ||
88 | dc->vec_stride = 0; | ||
89 | dc->cp_regs = arm_cpu->cp_regs; | ||
53 | -- | 90 | -- |
54 | 2.25.1 | 91 | 2.34.1 |
55 | |||
56 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | When FPCR.AH is 1, the behaviour of some instructions changes: | |
2 | * AdvSIMD BFCVT, BFCVTN, BFCVTN2, BFMLALB, BFMLALT | ||
3 | * SVE BFCVT, BFCVTNT, BFMLALB, BFMLALT, BFMLSLB, BFMLSLT | ||
4 | * SME BFCVT, BFCVTN, BFMLAL, BFMLSL (these are all in SME2 which | ||
5 | QEMU does not yet implement) | ||
6 | * FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS | ||
7 | |||
8 | The behaviour change is: | ||
9 | * the instructions do not update the FPSR cumulative exception flags | ||
10 | * trapped floating point exceptions are disabled (a no-op for QEMU, | ||
11 | which doesn't implement FPCR.{IDE,IXE,UFE,OFE,DZE,IOE}) | ||
12 | * rounding is always round-to-nearest-even regardless of FPCR.RMode | ||
13 | * denormalized inputs and outputs are always flushed to zero, as if | ||
14 | FPCR.{FZ,FIZ} is {1,1} | ||
15 | * FPCR.FZ16 is still honoured for half-precision inputs | ||
16 | |||
17 | (See the Arm ARM DDI0487L.a section A1.5.9.) | ||
18 | |||
19 | We can provide all these behaviours with another pair of float_status fields | ||
20 | which we use only for these insns, when FPCR.AH is 1. These float_status | ||
21 | fields will always have: | ||
22 | * flush_to_zero and flush_inputs_to_zero set for the non-F16 field | ||
23 | * rounding mode set to round-to-nearest-even | ||
24 | and so the only FPCR fields they need to honour are DN and FZ16. | ||
25 | |||
26 | In this commit we only define the new fp_status fields and give them | ||
27 | the required behaviour when FPSR is updated. In subsequent commits | ||
28 | we will arrange to use this new fp_status field for the instructions | ||
29 | that should be affected by FPCR.AH in this way. | ||
30 | |||
31 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
32 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
33 | --- | ||
34 | target/arm/cpu.h | 15 +++++++++++++++ | ||
35 | target/arm/internals.h | 2 ++ | ||
36 | target/arm/tcg/translate.h | 14 ++++++++++++++ | ||
37 | target/arm/cpu.c | 4 ++++ | ||
38 | target/arm/vfp_helper.c | 13 ++++++++++++- | ||
39 | 5 files changed, 47 insertions(+), 1 deletion(-) | ||
40 | |||
41 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/arm/cpu.h | ||
44 | +++ b/target/arm/cpu.h | ||
45 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
46 | * standard_fp_status : the ARM "Standard FPSCR Value" | ||
47 | * standard_fp_status_fp16 : used for half-precision | ||
48 | * calculations with the ARM "Standard FPSCR Value" | ||
49 | + * ah_fp_status: used for the A64 insns which change behaviour | ||
50 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
51 | + * and the reciprocal and square root estimate/step insns) | ||
52 | + * ah_fp_status_f16: used for the A64 insns which change behaviour | ||
53 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
54 | + * and the reciprocal and square root estimate/step insns); | ||
55 | + * for half-precision | ||
56 | * | ||
57 | * Half-precision operations are governed by a separate | ||
58 | * flush-to-zero control bit in FPSCR:FZ16. We pass a separate | ||
59 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
60 | * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | ||
61 | * using a fixed value for it. | ||
62 | * | ||
63 | + * The ah_fp_status is needed because some insns have different | ||
64 | + * behaviour when FPCR.AH == 1: they don't update cumulative | ||
65 | + * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
66 | + * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
67 | + * which means we need an ah_fp_status_f16 as well. | ||
68 | + * | ||
69 | * To avoid having to transfer exception bits around, we simply | ||
70 | * say that the FPSCR cumulative exception flags are the logical | ||
71 | * OR of the flags in the four fp statuses. This relies on the | ||
72 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
73 | float_status fp_status_f16_a64; | ||
74 | float_status standard_fp_status; | ||
75 | float_status standard_fp_status_f16; | ||
76 | + float_status ah_fp_status; | ||
77 | + float_status ah_fp_status_f16; | ||
78 | |||
79 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ | ||
80 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ | ||
81 | diff --git a/target/arm/internals.h b/target/arm/internals.h | ||
82 | index XXXXXXX..XXXXXXX 100644 | ||
83 | --- a/target/arm/internals.h | ||
84 | +++ b/target/arm/internals.h | ||
85 | @@ -XXX,XX +XXX,XX @@ int alle1_tlbmask(CPUARMState *env); | ||
86 | |||
87 | /* Set the float_status behaviour to match the Arm defaults */ | ||
88 | void arm_set_default_fp_behaviours(float_status *s); | ||
89 | +/* Set the float_status behaviour to match Arm FPCR.AH=1 behaviour */ | ||
90 | +void arm_set_ah_fp_behaviours(float_status *s); | ||
91 | |||
92 | #endif | ||
93 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | ||
94 | index XXXXXXX..XXXXXXX 100644 | ||
95 | --- a/target/arm/tcg/translate.h | ||
96 | +++ b/target/arm/tcg/translate.h | ||
97 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { | ||
98 | FPST_A64, | ||
99 | FPST_A32_F16, | ||
100 | FPST_A64_F16, | ||
101 | + FPST_AH, | ||
102 | + FPST_AH_F16, | ||
103 | FPST_STD, | ||
104 | FPST_STD_F16, | ||
105 | } ARMFPStatusFlavour; | ||
106 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMFPStatusFlavour { | ||
107 | * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
108 | * FPST_A64_F16 | ||
109 | * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
110 | + * FPST_AH: | ||
111 | + * for AArch64 operations which change behaviour when AH=1 (specifically, | ||
112 | + * bfloat16 conversions and multiplies, and the reciprocal and square root | ||
113 | + * estimate/step insns) | ||
114 | + * FPST_AH_F16: | ||
115 | + * ditto, but for half-precision operations | ||
116 | * FPST_STD | ||
117 | * for A32/T32 Neon operations using the "standard FPSCR value" | ||
118 | * FPST_STD_F16 | ||
119 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) | ||
120 | case FPST_A64_F16: | ||
121 | offset = offsetof(CPUARMState, vfp.fp_status_f16_a64); | ||
122 | break; | ||
123 | + case FPST_AH: | ||
124 | + offset = offsetof(CPUARMState, vfp.ah_fp_status); | ||
125 | + break; | ||
126 | + case FPST_AH_F16: | ||
127 | + offset = offsetof(CPUARMState, vfp.ah_fp_status_f16); | ||
128 | + break; | ||
129 | case FPST_STD: | ||
130 | offset = offsetof(CPUARMState, vfp.standard_fp_status); | ||
131 | break; | ||
132 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
133 | index XXXXXXX..XXXXXXX 100644 | ||
134 | --- a/target/arm/cpu.c | ||
135 | +++ b/target/arm/cpu.c | ||
136 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
137 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
138 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
139 | arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16); | ||
140 | + arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | ||
141 | + set_flush_to_zero(1, &env->vfp.ah_fp_status); | ||
142 | + set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | ||
143 | + arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status_f16); | ||
144 | |||
145 | #ifndef CONFIG_USER_ONLY | ||
146 | if (kvm_enabled()) { | ||
147 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
148 | index XXXXXXX..XXXXXXX 100644 | ||
149 | --- a/target/arm/vfp_helper.c | ||
150 | +++ b/target/arm/vfp_helper.c | ||
151 | @@ -XXX,XX +XXX,XX @@ void arm_set_default_fp_behaviours(float_status *s) | ||
152 | * set Invalid for a QNaN | ||
153 | * * default NaN has sign bit set, msb frac bit set | ||
154 | */ | ||
155 | -static void arm_set_ah_fp_behaviours(float_status *s) | ||
156 | +void arm_set_ah_fp_behaviours(float_status *s) | ||
157 | { | ||
158 | set_float_detect_tininess(float_tininess_after_rounding, s); | ||
159 | set_float_ftz_detection(float_ftz_after_rounding, s); | ||
160 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
161 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
162 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
163 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
164 | + /* | ||
165 | + * We do not merge in flags from ah_fp_status or ah_fp_status_f16, because | ||
166 | + * they are used for insns that must not set the cumulative exception bits. | ||
167 | + */ | ||
168 | + | ||
169 | /* | ||
170 | * Flushing an input denormal *only* because FPCR.FIZ == 1 does | ||
171 | * not set FPSR.IDC; if FPCR.FZ is also set then this takes | ||
172 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
173 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
174 | set_float_exception_flags(0, &env->vfp.standard_fp_status); | ||
175 | set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); | ||
176 | + set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
177 | + set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
178 | } | ||
179 | |||
180 | static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) | ||
181 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
182 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
183 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
184 | set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | ||
185 | + set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
186 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
187 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
188 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | ||
189 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
190 | } | ||
191 | if (changed & FPCR_FZ) { | ||
192 | bool ftz_enabled = val & FPCR_FZ; | ||
193 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
194 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
195 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
196 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
197 | + set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); | ||
198 | + set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status_f16); | ||
199 | } | ||
200 | if (changed & FPCR_AH) { | ||
201 | bool ah_enabled = val & FPCR_AH; | ||
202 | -- | ||
203 | 2.34.1 | diff view generated by jsdifflib |
1 | The ITS-related parts of the redistributor code make some checks for | 1 | For the instructions FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS, use |
---|---|---|---|
2 | whether registers like GICR_PROPBASER and GICR_PENDBASER are zero. | 2 | FPST_FPCR_AH or FPST_FPCR_AH_F16 when FPCR.AH is 1, so that they get |
3 | There is no requirement in the specification for treating zeroes in | 3 | the required behaviour changes. |
4 | these address registers specially -- they contain guest physical | ||
5 | addresses and it is entirely valid (if unusual) for the guest to | ||
6 | choose to put the tables they address at guest physical address zero. | ||
7 | We use these values only to calculate guest addresses, and attempts | ||
8 | by the guest to use a bad address will be handled by the | ||
9 | address_space_* functions which we use to do the loads and stores. | ||
10 | |||
11 | Remove the unnecessary checks. | ||
12 | 4 | ||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
14 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
15 | Message-id: 20220122182444.724087-9-peter.maydell@linaro.org | ||
16 | --- | 7 | --- |
17 | hw/intc/arm_gicv3_redist.c | 8 +++----- | 8 | target/arm/tcg/translate-a64.h | 13 ++++ |
18 | 1 file changed, 3 insertions(+), 5 deletions(-) | 9 | target/arm/tcg/translate-a64.c | 119 +++++++++++++++++++++++++-------- |
10 | target/arm/tcg/translate-sve.c | 30 ++++++--- | ||
11 | 3 files changed, 127 insertions(+), 35 deletions(-) | ||
19 | 12 | ||
20 | diff --git a/hw/intc/arm_gicv3_redist.c b/hw/intc/arm_gicv3_redist.c | 13 | diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h |
21 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/hw/intc/arm_gicv3_redist.c | 15 | --- a/target/arm/tcg/translate-a64.h |
23 | +++ b/hw/intc/arm_gicv3_redist.c | 16 | +++ b/target/arm/tcg/translate-a64.h |
24 | @@ -XXX,XX +XXX,XX @@ void gicv3_redist_update_lpi_only(GICv3CPUState *cs) | 17 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno) |
25 | idbits = MIN(FIELD_EX64(cs->gicr_propbaser, GICR_PROPBASER, IDBITS), | 18 | return ret; |
26 | GICD_TYPER_IDBITS); | 19 | } |
27 | 20 | ||
28 | - if (!(cs->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) || !cs->gicr_propbaser || | 21 | +/* |
29 | - !cs->gicr_pendbaser) { | 22 | + * Return the ARMFPStatusFlavour to use based on element size and |
30 | + if (!(cs->gicr_ctlr & GICR_CTLR_ENABLE_LPIS)) { | 23 | + * whether FPCR.AH is set. |
31 | return; | 24 | + */ |
25 | +static inline ARMFPStatusFlavour select_ah_fpst(DisasContext *s, MemOp esz) | ||
26 | +{ | ||
27 | + if (s->fpcr_ah) { | ||
28 | + return esz == MO_16 ? FPST_AH_F16 : FPST_AH; | ||
29 | + } else { | ||
30 | + return esz == MO_16 ? FPST_A64_F16 : FPST_A64; | ||
31 | + } | ||
32 | +} | ||
33 | + | ||
34 | bool disas_sve(DisasContext *, uint32_t); | ||
35 | bool disas_sme(DisasContext *, uint32_t); | ||
36 | |||
37 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/translate-a64.c | ||
40 | +++ b/target/arm/tcg/translate-a64.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, | ||
42 | * an out-of-line helper. | ||
43 | */ | ||
44 | static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, | ||
45 | - int rm, bool is_fp16, int data, | ||
46 | + int rm, ARMFPStatusFlavour fpsttype, int data, | ||
47 | gen_helper_gvec_3_ptr *fn) | ||
48 | { | ||
49 | - TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); | ||
50 | + TCGv_ptr fpst = fpstatus_ptr(fpsttype); | ||
51 | tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), | ||
52 | vec_full_reg_offset(s, rn), | ||
53 | vec_full_reg_offset(s, rm), fpst, | ||
54 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar { | ||
55 | void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); | ||
56 | } FPScalar; | ||
57 | |||
58 | -static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
59 | +static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
60 | + const FPScalar *f, | ||
61 | + ARMFPStatusFlavour fpsttype) | ||
62 | { | ||
63 | switch (a->esz) { | ||
64 | case MO_64: | ||
65 | if (fp_access_check(s)) { | ||
66 | TCGv_i64 t0 = read_fp_dreg(s, a->rn); | ||
67 | TCGv_i64 t1 = read_fp_dreg(s, a->rm); | ||
68 | - f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); | ||
69 | + f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
70 | write_fp_dreg(s, a->rd, t0); | ||
71 | } | ||
72 | break; | ||
73 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
74 | if (fp_access_check(s)) { | ||
75 | TCGv_i32 t0 = read_fp_sreg(s, a->rn); | ||
76 | TCGv_i32 t1 = read_fp_sreg(s, a->rm); | ||
77 | - f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); | ||
78 | + f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
79 | write_fp_sreg(s, a->rd, t0); | ||
80 | } | ||
81 | break; | ||
82 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
83 | if (fp_access_check(s)) { | ||
84 | TCGv_i32 t0 = read_fp_hreg(s, a->rn); | ||
85 | TCGv_i32 t1 = read_fp_hreg(s, a->rm); | ||
86 | - f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); | ||
87 | + f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
88 | write_fp_sreg(s, a->rd, t0); | ||
89 | } | ||
90 | break; | ||
91 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
92 | return true; | ||
93 | } | ||
94 | |||
95 | +static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
96 | +{ | ||
97 | + return do_fp3_scalar_with_fpsttype(s, a, f, | ||
98 | + a->esz == MO_16 ? | ||
99 | + FPST_A64_F16 : FPST_A64); | ||
100 | +} | ||
101 | + | ||
102 | +static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
103 | +{ | ||
104 | + return do_fp3_scalar_with_fpsttype(s, a, f, select_ah_fpst(s, a->esz)); | ||
105 | +} | ||
106 | + | ||
107 | static const FPScalar f_scalar_fadd = { | ||
108 | gen_helper_vfp_addh, | ||
109 | gen_helper_vfp_adds, | ||
110 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_frecps = { | ||
111 | gen_helper_recpsf_f32, | ||
112 | gen_helper_recpsf_f64, | ||
113 | }; | ||
114 | -TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps) | ||
115 | +TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps) | ||
116 | |||
117 | static const FPScalar f_scalar_frsqrts = { | ||
118 | gen_helper_rsqrtsf_f16, | ||
119 | gen_helper_rsqrtsf_f32, | ||
120 | gen_helper_rsqrtsf_f64, | ||
121 | }; | ||
122 | -TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts) | ||
123 | +TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts) | ||
124 | |||
125 | static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, | ||
126 | const FPScalar *f, bool swap) | ||
127 | @@ -XXX,XX +XXX,XX @@ TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) | ||
128 | TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) | ||
129 | TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) | ||
130 | |||
131 | -static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | ||
132 | - gen_helper_gvec_3_ptr * const fns[3]) | ||
133 | +static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a, | ||
134 | + int data, | ||
135 | + gen_helper_gvec_3_ptr * const fns[3], | ||
136 | + ARMFPStatusFlavour fpsttype) | ||
137 | { | ||
138 | MemOp esz = a->esz; | ||
139 | int check = fp_access_check_vector_hsd(s, a->q, esz); | ||
140 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, | ||
141 | return check == 0; | ||
32 | } | 142 | } |
33 | 143 | ||
34 | @@ -XXX,XX +XXX,XX @@ void gicv3_redist_process_lpi(GICv3CPUState *cs, int irq, int level) | 144 | - gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, |
35 | idbits = MIN(FIELD_EX64(cs->gicr_propbaser, GICR_PROPBASER, IDBITS), | 145 | - esz == MO_16, data, fns[esz - 1]); |
36 | GICD_TYPER_IDBITS); | 146 | + gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype, |
37 | 147 | + data, fns[esz - 1]); | |
38 | - if (!(cs->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) || !cs->gicr_propbaser || | 148 | return true; |
39 | - !cs->gicr_pendbaser || (irq > (1ULL << (idbits + 1)) - 1) || | 149 | } |
40 | - irq < GICV3_LPI_INTID_START) { | 150 | |
41 | + if (!(cs->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) || | 151 | +static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, |
42 | + (irq > (1ULL << (idbits + 1)) - 1) || irq < GICV3_LPI_INTID_START) { | 152 | + gen_helper_gvec_3_ptr * const fns[3]) |
43 | return; | 153 | +{ |
154 | + return do_fp3_vector_with_fpsttype(s, a, data, fns, | ||
155 | + a->esz == MO_16 ? | ||
156 | + FPST_A64_F16 : FPST_A64); | ||
157 | +} | ||
158 | + | ||
159 | +static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data, | ||
160 | + gen_helper_gvec_3_ptr * const f[3]) | ||
161 | +{ | ||
162 | + return do_fp3_vector_with_fpsttype(s, a, data, f, | ||
163 | + select_ah_fpst(s, a->esz)); | ||
164 | +} | ||
165 | + | ||
166 | static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { | ||
167 | gen_helper_gvec_fadd_h, | ||
168 | gen_helper_gvec_fadd_s, | ||
169 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { | ||
170 | gen_helper_gvec_recps_s, | ||
171 | gen_helper_gvec_recps_d, | ||
172 | }; | ||
173 | -TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps) | ||
174 | +TRANS(FRECPS_v, do_fp3_vector_ah, a, 0, f_vector_frecps) | ||
175 | |||
176 | static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { | ||
177 | gen_helper_gvec_rsqrts_h, | ||
178 | gen_helper_gvec_rsqrts_s, | ||
179 | gen_helper_gvec_rsqrts_d, | ||
180 | }; | ||
181 | -TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts) | ||
182 | +TRANS(FRSQRTS_v, do_fp3_vector_ah, a, 0, f_vector_frsqrts) | ||
183 | |||
184 | static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { | ||
185 | gen_helper_gvec_faddp_h, | ||
186 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, | ||
44 | } | 187 | } |
45 | 188 | ||
189 | gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, | ||
190 | - esz == MO_16, a->idx, fns[esz - 1]); | ||
191 | + esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
192 | + a->idx, fns[esz - 1]); | ||
193 | return true; | ||
194 | } | ||
195 | |||
196 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar1 { | ||
197 | void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr); | ||
198 | } FPScalar1; | ||
199 | |||
200 | -static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
201 | - const FPScalar1 *f, int rmode) | ||
202 | +static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a, | ||
203 | + const FPScalar1 *f, int rmode, | ||
204 | + ARMFPStatusFlavour fpsttype) | ||
205 | { | ||
206 | TCGv_i32 tcg_rmode = NULL; | ||
207 | TCGv_ptr fpst; | ||
208 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
209 | return check == 0; | ||
210 | } | ||
211 | |||
212 | - fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
213 | + fpst = fpstatus_ptr(fpsttype); | ||
214 | if (rmode >= 0) { | ||
215 | tcg_rmode = gen_set_rmode(rmode, fpst); | ||
216 | } | ||
217 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
218 | return true; | ||
219 | } | ||
220 | |||
221 | +static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, | ||
222 | + const FPScalar1 *f, int rmode) | ||
223 | +{ | ||
224 | + return do_fp1_scalar_with_fpsttype(s, a, f, rmode, | ||
225 | + a->esz == MO_16 ? | ||
226 | + FPST_A64_F16 : FPST_A64); | ||
227 | +} | ||
228 | + | ||
229 | +static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a, | ||
230 | + const FPScalar1 *f, int rmode) | ||
231 | +{ | ||
232 | + return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz)); | ||
233 | +} | ||
234 | + | ||
235 | static const FPScalar1 f_scalar_fsqrt = { | ||
236 | gen_helper_vfp_sqrth, | ||
237 | gen_helper_vfp_sqrts, | ||
238 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frecpe = { | ||
239 | gen_helper_recpe_f32, | ||
240 | gen_helper_recpe_f64, | ||
241 | }; | ||
242 | -TRANS(FRECPE_s, do_fp1_scalar, a, &f_scalar_frecpe, -1) | ||
243 | +TRANS(FRECPE_s, do_fp1_scalar_ah, a, &f_scalar_frecpe, -1) | ||
244 | |||
245 | static const FPScalar1 f_scalar_frecpx = { | ||
246 | gen_helper_frecpx_f16, | ||
247 | gen_helper_frecpx_f32, | ||
248 | gen_helper_frecpx_f64, | ||
249 | }; | ||
250 | -TRANS(FRECPX_s, do_fp1_scalar, a, &f_scalar_frecpx, -1) | ||
251 | +TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1) | ||
252 | |||
253 | static const FPScalar1 f_scalar_frsqrte = { | ||
254 | gen_helper_rsqrte_f16, | ||
255 | gen_helper_rsqrte_f32, | ||
256 | gen_helper_rsqrte_f64, | ||
257 | }; | ||
258 | -TRANS(FRSQRTE_s, do_fp1_scalar, a, &f_scalar_frsqrte, -1) | ||
259 | +TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, &f_scalar_frsqrte, -1) | ||
260 | |||
261 | static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) | ||
262 | { | ||
263 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a, | ||
264 | &f_scalar_frint64, FPROUNDING_ZERO) | ||
265 | TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1) | ||
266 | |||
267 | -static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
268 | - int rd, int rn, int data, | ||
269 | - gen_helper_gvec_2_ptr * const fns[3]) | ||
270 | +static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz, | ||
271 | + bool is_q, int rd, int rn, int data, | ||
272 | + gen_helper_gvec_2_ptr * const fns[3], | ||
273 | + ARMFPStatusFlavour fpsttype) | ||
274 | { | ||
275 | int check = fp_access_check_vector_hsd(s, is_q, esz); | ||
276 | TCGv_ptr fpst; | ||
277 | @@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
278 | return check == 0; | ||
279 | } | ||
280 | |||
281 | - fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
282 | + fpst = fpstatus_ptr(fpsttype); | ||
283 | tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), | ||
284 | vec_full_reg_offset(s, rn), fpst, | ||
285 | is_q ? 16 : 8, vec_full_reg_size(s), | ||
286 | @@ -XXX,XX +XXX,XX @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
287 | return true; | ||
288 | } | ||
289 | |||
290 | +static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
291 | + int rd, int rn, int data, | ||
292 | + gen_helper_gvec_2_ptr * const fns[3]) | ||
293 | +{ | ||
294 | + return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns, | ||
295 | + esz == MO_16 ? FPST_A64_F16 : | ||
296 | + FPST_A64); | ||
297 | +} | ||
298 | + | ||
299 | +static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q, | ||
300 | + int rd, int rn, int data, | ||
301 | + gen_helper_gvec_2_ptr * const fns[3]) | ||
302 | +{ | ||
303 | + return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, | ||
304 | + fns, select_ah_fpst(s, esz)); | ||
305 | +} | ||
306 | + | ||
307 | static gen_helper_gvec_2_ptr * const f_scvtf_v[] = { | ||
308 | gen_helper_gvec_vcvt_sh, | ||
309 | gen_helper_gvec_vcvt_sf, | ||
310 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const f_frecpe[] = { | ||
311 | gen_helper_gvec_frecpe_s, | ||
312 | gen_helper_gvec_frecpe_d, | ||
313 | }; | ||
314 | -TRANS(FRECPE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) | ||
315 | +TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) | ||
316 | |||
317 | static gen_helper_gvec_2_ptr * const f_frsqrte[] = { | ||
318 | gen_helper_gvec_frsqrte_h, | ||
319 | gen_helper_gvec_frsqrte_s, | ||
320 | gen_helper_gvec_frsqrte_d, | ||
321 | }; | ||
322 | -TRANS(FRSQRTE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) | ||
323 | +TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) | ||
324 | |||
325 | static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) | ||
326 | { | ||
327 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
328 | index XXXXXXX..XXXXXXX 100644 | ||
329 | --- a/target/arm/tcg/translate-sve.c | ||
330 | +++ b/target/arm/tcg/translate-sve.c | ||
331 | @@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, | ||
332 | return true; | ||
333 | } | ||
334 | |||
335 | -static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, | ||
336 | - arg_rr_esz *a, int data) | ||
337 | +static bool gen_gvec_fpst_ah_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, | ||
338 | + arg_rr_esz *a, int data) | ||
339 | { | ||
340 | return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data, | ||
341 | - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
342 | + select_ah_fpst(s, a->esz)); | ||
343 | } | ||
344 | |||
345 | /* Invoke an out-of-line helper on 3 Zregs. */ | ||
346 | @@ -XXX,XX +XXX,XX @@ static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, | ||
347 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
348 | } | ||
349 | |||
350 | +static bool gen_gvec_fpst_ah_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, | ||
351 | + arg_rrr_esz *a, int data) | ||
352 | +{ | ||
353 | + return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, | ||
354 | + select_ah_fpst(s, a->esz)); | ||
355 | +} | ||
356 | + | ||
357 | /* Invoke an out-of-line helper on 4 Zregs. */ | ||
358 | static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, | ||
359 | int rd, int rn, int rm, int ra, int data) | ||
360 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const frecpe_fns[] = { | ||
361 | NULL, gen_helper_gvec_frecpe_h, | ||
362 | gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, | ||
363 | }; | ||
364 | -TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0) | ||
365 | +TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frecpe_fns[a->esz], a, 0) | ||
366 | |||
367 | static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { | ||
368 | NULL, gen_helper_gvec_frsqrte_h, | ||
369 | gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, | ||
370 | }; | ||
371 | -TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0) | ||
372 | +TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frsqrte_fns[a->esz], a, 0) | ||
373 | |||
374 | /* | ||
375 | *** SVE Floating Point Compare with Zero Group | ||
376 | @@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) | ||
377 | }; \ | ||
378 | TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) | ||
379 | |||
380 | +#define DO_FP3_AH(NAME, name) \ | ||
381 | + static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ | ||
382 | + NULL, gen_helper_gvec_##name##_h, \ | ||
383 | + gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ | ||
384 | + }; \ | ||
385 | + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, name##_fns[a->esz], a, 0) | ||
386 | + | ||
387 | DO_FP3(FADD_zzz, fadd) | ||
388 | DO_FP3(FSUB_zzz, fsub) | ||
389 | DO_FP3(FMUL_zzz, fmul) | ||
390 | -DO_FP3(FRECPS, recps) | ||
391 | -DO_FP3(FRSQRTS, rsqrts) | ||
392 | +DO_FP3_AH(FRECPS, recps) | ||
393 | +DO_FP3_AH(FRSQRTS, rsqrts) | ||
394 | |||
395 | #undef DO_FP3 | ||
396 | |||
397 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const frecpx_fns[] = { | ||
398 | gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d, | ||
399 | }; | ||
400 | TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz], | ||
401 | - a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
402 | + a, 0, select_ah_fpst(s, a->esz)) | ||
403 | |||
404 | static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { | ||
405 | NULL, gen_helper_sve_fsqrt_h, | ||
46 | -- | 406 | -- |
47 | 2.25.1 | 407 | 2.34.1 |
48 | |||
49 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | When FPCR.AH is 1, use FPST_FPCR_AH for: | ||
2 | * AdvSIMD BFCVT, BFCVTN, BFCVTN2 | ||
3 | * SVE BFCVT, BFCVTNT | ||
1 | 4 | ||
5 | so that they get the required behaviour changes. | ||
6 | |||
7 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | --- | ||
10 | target/arm/tcg/translate-a64.c | 27 +++++++++++++++++++++------ | ||
11 | target/arm/tcg/translate-sve.c | 6 ++++-- | ||
12 | 2 files changed, 25 insertions(+), 8 deletions(-) | ||
13 | |||
14 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/arm/tcg/translate-a64.c | ||
17 | +++ b/target/arm/tcg/translate-a64.c | ||
18 | @@ -XXX,XX +XXX,XX @@ TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) | ||
19 | static const FPScalar1 f_scalar_bfcvt = { | ||
20 | .gen_s = gen_helper_bfcvt, | ||
21 | }; | ||
22 | -TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar, a, &f_scalar_bfcvt, -1) | ||
23 | +TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar_ah, a, &f_scalar_bfcvt, -1) | ||
24 | |||
25 | static const FPScalar1 f_scalar_frint32 = { | ||
26 | NULL, | ||
27 | @@ -XXX,XX +XXX,XX @@ static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n) | ||
28 | tcg_gen_extu_i32_i64(d, tmp); | ||
29 | } | ||
30 | |||
31 | -static ArithOneOp * const f_vector_bfcvtn[] = { | ||
32 | - NULL, | ||
33 | - gen_bfcvtn_hs, | ||
34 | - NULL, | ||
35 | +static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n) | ||
36 | +{ | ||
37 | + TCGv_ptr fpst = fpstatus_ptr(FPST_AH); | ||
38 | + TCGv_i32 tmp = tcg_temp_new_i32(); | ||
39 | + gen_helper_bfcvt_pair(tmp, n, fpst); | ||
40 | + tcg_gen_extu_i32_i64(d, tmp); | ||
41 | +} | ||
42 | + | ||
43 | +static ArithOneOp * const f_vector_bfcvtn[2][3] = { | ||
44 | + { | ||
45 | + NULL, | ||
46 | + gen_bfcvtn_hs, | ||
47 | + NULL, | ||
48 | + }, { | ||
49 | + NULL, | ||
50 | + gen_bfcvtn_ah_hs, | ||
51 | + NULL, | ||
52 | + } | ||
53 | }; | ||
54 | -TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, f_vector_bfcvtn) | ||
55 | +TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, | ||
56 | + f_vector_bfcvtn[s->fpcr_ah]) | ||
57 | |||
58 | static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a) | ||
59 | { | ||
60 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/target/arm/tcg/translate-sve.c | ||
63 | +++ b/target/arm/tcg/translate-sve.c | ||
64 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, | ||
65 | gen_helper_sve_fcvt_hs, a, 0, FPST_A64_F16) | ||
66 | |||
67 | TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, | ||
68 | - gen_helper_sve_bfcvt, a, 0, FPST_A64) | ||
69 | + gen_helper_sve_bfcvt, a, 0, | ||
70 | + s->fpcr_ah ? FPST_AH : FPST_A64) | ||
71 | |||
72 | TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, | ||
73 | gen_helper_sve_fcvt_dh, a, 0, FPST_A64) | ||
74 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, | ||
75 | gen_helper_sve2_fcvtnt_ds, a, 0, FPST_A64) | ||
76 | |||
77 | TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, | ||
78 | - gen_helper_sve_bfcvtnt, a, 0, FPST_A64) | ||
79 | + gen_helper_sve_bfcvtnt, a, 0, | ||
80 | + s->fpcr_ah ? FPST_AH : FPST_A64) | ||
81 | |||
82 | TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, | ||
83 | gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64) | ||
84 | -- | ||
85 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | When FPCR.AH is 1, use FPST_FPCR_AH for: | ||
2 | * AdvSIMD BFMLALB, BFMLALT | ||
3 | * SVE BFMLALB, BFMLALT, BFMLSLB, BFMLSLT | ||
1 | 4 | ||
5 | so that they get the required behaviour changes. | ||
6 | |||
7 | We do this by making gen_gvec_op4_fpst() take an ARMFPStatusFlavour | ||
8 | rather than a bool is_fp16; existing callsites now select | ||
9 | FPST_FPCR_F16_A64 vs FPST_FPCR_A64 themselves rather than passing in | ||
10 | the boolean. | ||
11 | |||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | --- | ||
15 | target/arm/tcg/translate-a64.c | 20 +++++++++++++------- | ||
16 | target/arm/tcg/translate-sve.c | 6 ++++-- | ||
17 | 2 files changed, 17 insertions(+), 9 deletions(-) | ||
18 | |||
19 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/target/arm/tcg/translate-a64.c | ||
22 | +++ b/target/arm/tcg/translate-a64.c | ||
23 | @@ -XXX,XX +XXX,XX @@ static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn, | ||
24 | * an out-of-line helper. | ||
25 | */ | ||
26 | static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, | ||
27 | - int rm, int ra, bool is_fp16, int data, | ||
28 | + int rm, int ra, ARMFPStatusFlavour fpsttype, | ||
29 | + int data, | ||
30 | gen_helper_gvec_4_ptr *fn) | ||
31 | { | ||
32 | - TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); | ||
33 | + TCGv_ptr fpst = fpstatus_ptr(fpsttype); | ||
34 | tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), | ||
35 | vec_full_reg_offset(s, rn), | ||
36 | vec_full_reg_offset(s, rm), | ||
37 | @@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) | ||
38 | } | ||
39 | if (fp_access_check(s)) { | ||
40 | /* Q bit selects BFMLALB vs BFMLALT. */ | ||
41 | - gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q, | ||
42 | + gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, | ||
43 | + s->fpcr_ah ? FPST_AH : FPST_A64, a->q, | ||
44 | gen_helper_gvec_bfmlal); | ||
45 | } | ||
46 | return true; | ||
47 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) | ||
48 | } | ||
49 | |||
50 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
51 | - a->esz == MO_16, a->rot, fn[a->esz]); | ||
52 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
53 | + a->rot, fn[a->esz]); | ||
54 | return true; | ||
55 | } | ||
56 | |||
57 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) | ||
58 | } | ||
59 | |||
60 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
61 | - esz == MO_16, (a->idx << 1) | neg, | ||
62 | + esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
63 | + (a->idx << 1) | neg, | ||
64 | fns[esz - 1]); | ||
65 | return true; | ||
66 | } | ||
67 | @@ -XXX,XX +XXX,XX @@ static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) | ||
68 | } | ||
69 | if (fp_access_check(s)) { | ||
70 | /* Q bit selects BFMLALB vs BFMLALT. */ | ||
71 | - gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0, | ||
72 | + gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, | ||
73 | + s->fpcr_ah ? FPST_AH : FPST_A64, | ||
74 | (a->idx << 1) | a->q, | ||
75 | gen_helper_gvec_bfmlal_idx); | ||
76 | } | ||
77 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) | ||
78 | } | ||
79 | if (fp_access_check(s)) { | ||
80 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
81 | - a->esz == MO_16, (a->idx << 2) | a->rot, fn); | ||
82 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
83 | + (a->idx << 2) | a->rot, fn); | ||
84 | } | ||
85 | return true; | ||
86 | } | ||
87 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
88 | index XXXXXXX..XXXXXXX 100644 | ||
89 | --- a/target/arm/tcg/translate-sve.c | ||
90 | +++ b/target/arm/tcg/translate-sve.c | ||
91 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_env_arg_zzzz, | ||
92 | static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) | ||
93 | { | ||
94 | return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, | ||
95 | - a->rd, a->rn, a->rm, a->ra, sel, FPST_A64); | ||
96 | + a->rd, a->rn, a->rm, a->ra, sel, | ||
97 | + s->fpcr_ah ? FPST_AH : FPST_A64); | ||
98 | } | ||
99 | |||
100 | TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) | ||
101 | @@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) | ||
102 | { | ||
103 | return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, | ||
104 | a->rd, a->rn, a->rm, a->ra, | ||
105 | - (a->index << 1) | sel, FPST_A64); | ||
106 | + (a->index << 1) | sel, | ||
107 | + s->fpcr_ah ? FPST_AH : FPST_A64); | ||
108 | } | ||
109 | |||
110 | TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) | ||
111 | -- | ||
112 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | For FEAT_AFP, we want to emit different code when FPCR.NEP is set, so | ||
2 | that instead of zeroing the high elements of a vector register when | ||
3 | we write the output of a scalar operation to it, we instead merge in | ||
4 | those elements from one of the source registers. Since this affects | ||
5 | the generated code, we need to put FPCR.NEP into the TBFLAGS. | ||
1 | 6 | ||
7 | FPCR.NEP is treated as 0 when in streaming SVE mode and FEAT_SME_FA64 | ||
8 | is not implemented or not enabled; we can implement this logic in | ||
9 | rebuild_hflags_a64(). | ||
10 | |||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | --- | ||
14 | target/arm/cpu.h | 1 + | ||
15 | target/arm/tcg/translate.h | 2 ++ | ||
16 | target/arm/tcg/hflags.c | 9 +++++++++ | ||
17 | target/arm/tcg/translate-a64.c | 1 + | ||
18 | 4 files changed, 13 insertions(+) | ||
19 | |||
20 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/cpu.h | ||
23 | +++ b/target/arm/cpu.h | ||
24 | @@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1) | ||
25 | /* Set if FEAT_NV2 RAM accesses are big-endian */ | ||
26 | FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1) | ||
27 | FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */ | ||
28 | +FIELD(TBFLAG_A64, NEP, 38, 1) /* FPCR.NEP */ | ||
29 | |||
30 | /* | ||
31 | * Helpers for using the above. Note that only the A64 accessors use | ||
32 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/target/arm/tcg/translate.h | ||
35 | +++ b/target/arm/tcg/translate.h | ||
36 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { | ||
37 | bool nv2_mem_be; | ||
38 | /* True if FPCR.AH is 1 (alternate floating point handling) */ | ||
39 | bool fpcr_ah; | ||
40 | + /* True if FPCR.NEP is 1 (FEAT_AFP scalar upper-element result handling) */ | ||
41 | + bool fpcr_nep; | ||
42 | /* | ||
43 | * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI. | ||
44 | * < 0, set by the current instruction. | ||
45 | diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/target/arm/tcg/hflags.c | ||
48 | +++ b/target/arm/tcg/hflags.c | ||
49 | @@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, | ||
50 | if (env->vfp.fpcr & FPCR_AH) { | ||
51 | DP_TBFLAG_A64(flags, AH, 1); | ||
52 | } | ||
53 | + if (env->vfp.fpcr & FPCR_NEP) { | ||
54 | + /* | ||
55 | + * In streaming-SVE without FA64, NEP behaves as if zero; | ||
56 | + * compare pseudocode IsMerging() | ||
57 | + */ | ||
58 | + if (!(EX_TBFLAG_A64(flags, PSTATE_SM) && !sme_fa64(env, el))) { | ||
59 | + DP_TBFLAG_A64(flags, NEP, 1); | ||
60 | + } | ||
61 | + } | ||
62 | |||
63 | return rebuild_hflags_common(env, fp_el, mmu_idx, flags); | ||
64 | } | ||
65 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
66 | index XXXXXXX..XXXXXXX 100644 | ||
67 | --- a/target/arm/tcg/translate-a64.c | ||
68 | +++ b/target/arm/tcg/translate-a64.c | ||
69 | @@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, | ||
70 | dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); | ||
71 | dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); | ||
72 | dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); | ||
73 | + dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP); | ||
74 | dc->vec_len = 0; | ||
75 | dc->vec_stride = 0; | ||
76 | dc->cp_regs = arm_cpu->cp_regs; | ||
77 | -- | ||
78 | 2.34.1 | diff view generated by jsdifflib |
1 | Implement the ITS MOVI command. This command specifies a (physical) LPI | 1 | For FEAT_AFP's FPCR.NEP bit, we need to programmatically change the |
---|---|---|---|
2 | by DeviceID and EventID and provides a new ICID for it. The ITS must | 2 | behaviour of the writeback of the result for most SIMD scalar |
3 | find the interrupt translation table entry for the LPI, which will | 3 | operations, so that instead of zeroing the upper part of the result |
4 | tell it the old ICID. It then moves the pending state of the LPI from | 4 | register it merges the upper elements from one of the input |
5 | the old redistributor to the new one and updates the ICID field in | 5 | registers. |
6 | the translation table entry. | 6 | |
7 | 7 | Provide new functions write_fp_*reg_merging() which can be used | |
8 | This is another GICv3 ITS command that we forgot to implement. Linux | 8 | instead of the existing write_fp_*reg() functions when we want this |
9 | does use this one, but only if the guest powers off one of its CPUs. | 9 | "merge the result with one of the input registers if FPCR.NEP is |
10 | enabled" handling, and use them in do_fp3_scalar_with_fpsttype(). | ||
11 | |||
12 | Note that (as documented in the description of the FPCR.NEP bit) | ||
13 | which input register to use as the merge source varies by | ||
14 | instruction: for these 2-input scalar operations, the comparison | ||
15 | instructions take from Rm, not Rn. | ||
16 | |||
17 | We'll extend this to also provide the merging behaviour for | ||
18 | the remaining scalar insns in subsequent commits. | ||
10 | 19 | ||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 21 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
13 | Message-id: 20220122182444.724087-15-peter.maydell@linaro.org | ||
14 | --- | 22 | --- |
15 | hw/intc/gicv3_internal.h | 16 ++++ | 23 | target/arm/tcg/translate-a64.c | 117 +++++++++++++++++++++++++-------- |
16 | hw/intc/arm_gicv3_its.c | 146 +++++++++++++++++++++++++++++++++++++ | 24 | 1 file changed, 91 insertions(+), 26 deletions(-) |
17 | hw/intc/arm_gicv3_redist.c | 53 ++++++++++++++ | 25 | |
18 | 3 files changed, 215 insertions(+) | 26 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
19 | |||
20 | diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h | ||
21 | index XXXXXXX..XXXXXXX 100644 | 27 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/hw/intc/gicv3_internal.h | 28 | --- a/target/arm/tcg/translate-a64.c |
23 | +++ b/hw/intc/gicv3_internal.h | 29 | +++ b/target/arm/tcg/translate-a64.c |
24 | @@ -XXX,XX +XXX,XX @@ FIELD(GITS_TYPER, CIL, 36, 1) | 30 | @@ -XXX,XX +XXX,XX @@ static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) |
25 | #define CMD_MASK 0xff | 31 | write_fp_dreg(s, reg, tmp); |
26 | 32 | } | |
27 | /* ITS Commands */ | 33 | |
28 | +#define GITS_CMD_MOVI 0x01 | 34 | +/* |
29 | #define GITS_CMD_INT 0x03 | 35 | + * Write a double result to 128 bit vector register reg, honouring FPCR.NEP: |
30 | #define GITS_CMD_CLEAR 0x04 | 36 | + * - if FPCR.NEP == 0, clear the high elements of reg |
31 | #define GITS_CMD_SYNC 0x05 | 37 | + * - if FPCR.NEP == 1, set the high elements of reg from mergereg |
32 | @@ -XXX,XX +XXX,XX @@ FIELD(MAPC, RDBASE, 16, 32) | 38 | + * (i.e. merge the result with those high elements) |
33 | FIELD(MOVALL_2, RDBASE1, 16, 36) | 39 | + * In either case, SVE register bits above 128 are zeroed (per R_WKYLB). |
34 | FIELD(MOVALL_3, RDBASE2, 16, 36) | ||
35 | |||
36 | +/* MOVI command fields */ | ||
37 | +FIELD(MOVI_0, DEVICEID, 32, 32) | ||
38 | +FIELD(MOVI_1, EVENTID, 0, 32) | ||
39 | +FIELD(MOVI_2, ICID, 0, 16) | ||
40 | + | ||
41 | /* | ||
42 | * 12 bytes Interrupt translation Table Entry size | ||
43 | * as per Table 5.3 in GICv3 spec | ||
44 | @@ -XXX,XX +XXX,XX @@ void gicv3_redist_update_lpi(GICv3CPUState *cs); | ||
45 | * an incoming migration has loaded new state. | ||
46 | */ | ||
47 | void gicv3_redist_update_lpi_only(GICv3CPUState *cs); | ||
48 | +/** | ||
49 | + * gicv3_redist_mov_lpi: | ||
50 | + * @src: source redistributor | ||
51 | + * @dest: destination redistributor | ||
52 | + * @irq: LPI to update | ||
53 | + * | ||
54 | + * Move the pending state of the specified LPI from @src to @dest, | ||
55 | + * as required by the ITS MOVI command. | ||
56 | + */ | 40 | + */ |
57 | +void gicv3_redist_mov_lpi(GICv3CPUState *src, GICv3CPUState *dest, int irq); | 41 | +static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg, |
58 | /** | 42 | + TCGv_i64 v) |
59 | * gicv3_redist_movall_lpis: | ||
60 | * @src: source redistributor | ||
61 | diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c | ||
62 | index XXXXXXX..XXXXXXX 100644 | ||
63 | --- a/hw/intc/arm_gicv3_its.c | ||
64 | +++ b/hw/intc/arm_gicv3_its.c | ||
65 | @@ -XXX,XX +XXX,XX @@ static ItsCmdResult process_movall(GICv3ITSState *s, uint64_t value, | ||
66 | return CMD_CONTINUE; | ||
67 | } | ||
68 | |||
69 | +static ItsCmdResult process_movi(GICv3ITSState *s, uint64_t value, | ||
70 | + uint32_t offset) | ||
71 | +{ | 43 | +{ |
72 | + AddressSpace *as = &s->gicv3->dma_as; | 44 | + if (!s->fpcr_nep) { |
73 | + MemTxResult res = MEMTX_OK; | 45 | + write_fp_dreg(s, reg, v); |
74 | + uint32_t devid, eventid, intid; | ||
75 | + uint16_t old_icid, new_icid; | ||
76 | + uint64_t old_cte, new_cte; | ||
77 | + uint64_t old_rdbase, new_rdbase; | ||
78 | + uint64_t dte; | ||
79 | + bool dte_valid, ite_valid, cte_valid; | ||
80 | + uint64_t num_eventids; | ||
81 | + IteEntry ite = {}; | ||
82 | + | ||
83 | + devid = FIELD_EX64(value, MOVI_0, DEVICEID); | ||
84 | + | ||
85 | + offset += NUM_BYTES_IN_DW; | ||
86 | + value = address_space_ldq_le(as, s->cq.base_addr + offset, | ||
87 | + MEMTXATTRS_UNSPECIFIED, &res); | ||
88 | + if (res != MEMTX_OK) { | ||
89 | + return CMD_STALL; | ||
90 | + } | ||
91 | + eventid = FIELD_EX64(value, MOVI_1, EVENTID); | ||
92 | + | ||
93 | + offset += NUM_BYTES_IN_DW; | ||
94 | + value = address_space_ldq_le(as, s->cq.base_addr + offset, | ||
95 | + MEMTXATTRS_UNSPECIFIED, &res); | ||
96 | + if (res != MEMTX_OK) { | ||
97 | + return CMD_STALL; | ||
98 | + } | ||
99 | + new_icid = FIELD_EX64(value, MOVI_2, ICID); | ||
100 | + | ||
101 | + if (devid >= s->dt.num_entries) { | ||
102 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
103 | + "%s: invalid command attributes: devid %d>=%d", | ||
104 | + __func__, devid, s->dt.num_entries); | ||
105 | + return CMD_CONTINUE; | ||
106 | + } | ||
107 | + dte = get_dte(s, devid, &res); | ||
108 | + if (res != MEMTX_OK) { | ||
109 | + return CMD_STALL; | ||
110 | + } | ||
111 | + | ||
112 | + dte_valid = FIELD_EX64(dte, DTE, VALID); | ||
113 | + if (!dte_valid) { | ||
114 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
115 | + "%s: invalid command attributes: " | ||
116 | + "invalid dte: %"PRIx64" for %d\n", | ||
117 | + __func__, dte, devid); | ||
118 | + return CMD_CONTINUE; | ||
119 | + } | ||
120 | + | ||
121 | + num_eventids = 1ULL << (FIELD_EX64(dte, DTE, SIZE) + 1); | ||
122 | + if (eventid >= num_eventids) { | ||
123 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
124 | + "%s: invalid command attributes: eventid %d >= %" | ||
125 | + PRId64 "\n", | ||
126 | + __func__, eventid, num_eventids); | ||
127 | + return CMD_CONTINUE; | ||
128 | + } | ||
129 | + | ||
130 | + ite_valid = get_ite(s, eventid, dte, &old_icid, &intid, &res); | ||
131 | + if (res != MEMTX_OK) { | ||
132 | + return CMD_STALL; | ||
133 | + } | ||
134 | + | ||
135 | + if (!ite_valid) { | ||
136 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
137 | + "%s: invalid command attributes: invalid ITE\n", | ||
138 | + __func__); | ||
139 | + return CMD_CONTINUE; | ||
140 | + } | ||
141 | + | ||
142 | + if (old_icid >= s->ct.num_entries) { | ||
143 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
144 | + "%s: invalid ICID 0x%x in ITE (table corrupted?)\n", | ||
145 | + __func__, old_icid); | ||
146 | + return CMD_CONTINUE; | ||
147 | + } | ||
148 | + | ||
149 | + if (new_icid >= s->ct.num_entries) { | ||
150 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
151 | + "%s: invalid command attributes: ICID 0x%x\n", | ||
152 | + __func__, new_icid); | ||
153 | + return CMD_CONTINUE; | ||
154 | + } | ||
155 | + | ||
156 | + cte_valid = get_cte(s, old_icid, &old_cte, &res); | ||
157 | + if (res != MEMTX_OK) { | ||
158 | + return CMD_STALL; | ||
159 | + } | ||
160 | + if (!cte_valid) { | ||
161 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
162 | + "%s: invalid command attributes: " | ||
163 | + "invalid cte: %"PRIx64"\n", | ||
164 | + __func__, old_cte); | ||
165 | + return CMD_CONTINUE; | ||
166 | + } | ||
167 | + | ||
168 | + cte_valid = get_cte(s, new_icid, &new_cte, &res); | ||
169 | + if (res != MEMTX_OK) { | ||
170 | + return CMD_STALL; | ||
171 | + } | ||
172 | + if (!cte_valid) { | ||
173 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
174 | + "%s: invalid command attributes: " | ||
175 | + "invalid cte: %"PRIx64"\n", | ||
176 | + __func__, new_cte); | ||
177 | + return CMD_CONTINUE; | ||
178 | + } | ||
179 | + | ||
180 | + old_rdbase = FIELD_EX64(old_cte, CTE, RDBASE); | ||
181 | + if (old_rdbase >= s->gicv3->num_cpu) { | ||
182 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
183 | + "%s: CTE has invalid rdbase 0x%"PRIx64"\n", | ||
184 | + __func__, old_rdbase); | ||
185 | + return CMD_CONTINUE; | ||
186 | + } | ||
187 | + | ||
188 | + new_rdbase = FIELD_EX64(new_cte, CTE, RDBASE); | ||
189 | + if (new_rdbase >= s->gicv3->num_cpu) { | ||
190 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
191 | + "%s: CTE has invalid rdbase 0x%"PRIx64"\n", | ||
192 | + __func__, new_rdbase); | ||
193 | + return CMD_CONTINUE; | ||
194 | + } | ||
195 | + | ||
196 | + if (old_rdbase != new_rdbase) { | ||
197 | + /* Move the LPI from the old redistributor to the new one */ | ||
198 | + gicv3_redist_mov_lpi(&s->gicv3->cpu[old_rdbase], | ||
199 | + &s->gicv3->cpu[new_rdbase], | ||
200 | + intid); | ||
201 | + } | ||
202 | + | ||
203 | + /* Update the ICID field in the interrupt translation table entry */ | ||
204 | + ite.itel = FIELD_DP64(ite.itel, ITE_L, VALID, 1); | ||
205 | + ite.itel = FIELD_DP64(ite.itel, ITE_L, INTTYPE, ITE_INTTYPE_PHYSICAL); | ||
206 | + ite.itel = FIELD_DP64(ite.itel, ITE_L, INTID, intid); | ||
207 | + ite.itel = FIELD_DP64(ite.itel, ITE_L, DOORBELL, INTID_SPURIOUS); | ||
208 | + ite.iteh = FIELD_DP32(ite.iteh, ITE_H, ICID, new_icid); | ||
209 | + return update_ite(s, eventid, dte, ite) ? CMD_CONTINUE : CMD_STALL; | ||
210 | +} | ||
211 | + | ||
212 | /* | ||
213 | * Current implementation blocks until all | ||
214 | * commands are processed | ||
215 | @@ -XXX,XX +XXX,XX @@ static void process_cmdq(GICv3ITSState *s) | ||
216 | gicv3_redist_update_lpi(&s->gicv3->cpu[i]); | ||
217 | } | ||
218 | break; | ||
219 | + case GITS_CMD_MOVI: | ||
220 | + result = process_movi(s, data, cq_offset); | ||
221 | + break; | ||
222 | case GITS_CMD_MOVALL: | ||
223 | result = process_movall(s, data, cq_offset); | ||
224 | break; | ||
225 | diff --git a/hw/intc/arm_gicv3_redist.c b/hw/intc/arm_gicv3_redist.c | ||
226 | index XXXXXXX..XXXXXXX 100644 | ||
227 | --- a/hw/intc/arm_gicv3_redist.c | ||
228 | +++ b/hw/intc/arm_gicv3_redist.c | ||
229 | @@ -XXX,XX +XXX,XX @@ void gicv3_redist_process_lpi(GICv3CPUState *cs, int irq, int level) | ||
230 | gicv3_redist_lpi_pending(cs, irq, level); | ||
231 | } | ||
232 | |||
233 | +void gicv3_redist_mov_lpi(GICv3CPUState *src, GICv3CPUState *dest, int irq) | ||
234 | +{ | ||
235 | + /* | ||
236 | + * Move the specified LPI's pending state from the source redistributor | ||
237 | + * to the destination. | ||
238 | + * | ||
239 | + * If LPIs are disabled on dest this is CONSTRAINED UNPREDICTABLE: | ||
240 | + * we choose to NOP. If LPIs are disabled on source there's nothing | ||
241 | + * to be transferred anyway. | ||
242 | + */ | ||
243 | + AddressSpace *as = &src->gic->dma_as; | ||
244 | + uint64_t idbits; | ||
245 | + uint32_t pendt_size; | ||
246 | + uint64_t src_baddr; | ||
247 | + uint8_t src_pend; | ||
248 | + | ||
249 | + if (!(src->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) || | ||
250 | + !(dest->gicr_ctlr & GICR_CTLR_ENABLE_LPIS)) { | ||
251 | + return; | 46 | + return; |
252 | + } | 47 | + } |
253 | + | 48 | + |
254 | + idbits = MIN(FIELD_EX64(src->gicr_propbaser, GICR_PROPBASER, IDBITS), | 49 | + /* |
255 | + GICD_TYPER_IDBITS); | 50 | + * Move from mergereg to reg; this sets the high elements and |
256 | + idbits = MIN(FIELD_EX64(dest->gicr_propbaser, GICR_PROPBASER, IDBITS), | 51 | + * clears the bits above 128 as a side effect. |
257 | + idbits); | 52 | + */ |
258 | + | 53 | + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), |
259 | + pendt_size = 1ULL << (idbits + 1); | 54 | + vec_full_reg_offset(s, mergereg), |
260 | + if ((irq / 8) >= pendt_size) { | 55 | + 16, vec_full_reg_size(s)); |
56 | + tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg)); | ||
57 | +} | ||
58 | + | ||
59 | +/* | ||
60 | + * Write a single-prec result, but only clear the higher elements | ||
61 | + * of the destination register if FPCR.NEP is 0; otherwise preserve them. | ||
62 | + */ | ||
63 | +static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg, | ||
64 | + TCGv_i32 v) | ||
65 | +{ | ||
66 | + if (!s->fpcr_nep) { | ||
67 | + write_fp_sreg(s, reg, v); | ||
261 | + return; | 68 | + return; |
262 | + } | 69 | + } |
263 | + | 70 | + |
264 | + src_baddr = src->gicr_pendbaser & R_GICR_PENDBASER_PHYADDR_MASK; | 71 | + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), |
265 | + | 72 | + vec_full_reg_offset(s, mergereg), |
266 | + address_space_read(as, src_baddr + (irq / 8), | 73 | + 16, vec_full_reg_size(s)); |
267 | + MEMTXATTRS_UNSPECIFIED, &src_pend, sizeof(src_pend)); | 74 | + tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); |
268 | + if (!extract32(src_pend, irq % 8, 1)) { | 75 | +} |
269 | + /* Not pending on source, nothing to do */ | 76 | + |
77 | +/* | ||
78 | + * Write a half-prec result, but only clear the higher elements | ||
79 | + * of the destination register if FPCR.NEP is 0; otherwise preserve them. | ||
80 | + * The caller must ensure that the top 16 bits of v are zero. | ||
81 | + */ | ||
82 | +static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg, | ||
83 | + TCGv_i32 v) | ||
84 | +{ | ||
85 | + if (!s->fpcr_nep) { | ||
86 | + write_fp_sreg(s, reg, v); | ||
270 | + return; | 87 | + return; |
271 | + } | 88 | + } |
272 | + src_pend &= ~(1 << (irq % 8)); | 89 | + |
273 | + address_space_write(as, src_baddr + (irq / 8), | 90 | + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), |
274 | + MEMTXATTRS_UNSPECIFIED, &src_pend, sizeof(src_pend)); | 91 | + vec_full_reg_offset(s, mergereg), |
275 | + if (irq == src->hpplpi.irq) { | 92 | + 16, vec_full_reg_size(s)); |
276 | + /* | 93 | + tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); |
277 | + * We just made this LPI not-pending so only need to update | ||
278 | + * if it was previously the highest priority pending LPI | ||
279 | + */ | ||
280 | + gicv3_redist_update_lpi(src); | ||
281 | + } | ||
282 | + /* Mark it pending on the destination */ | ||
283 | + gicv3_redist_lpi_pending(dest, irq, 1); | ||
284 | +} | 94 | +} |
285 | + | 95 | + |
286 | void gicv3_redist_movall_lpis(GICv3CPUState *src, GICv3CPUState *dest) | 96 | /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ |
287 | { | 97 | static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, |
288 | /* | 98 | GVecGen2Fn *gvec_fn, int vece) |
99 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar { | ||
100 | } FPScalar; | ||
101 | |||
102 | static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
103 | - const FPScalar *f, | ||
104 | + const FPScalar *f, int mergereg, | ||
105 | ARMFPStatusFlavour fpsttype) | ||
106 | { | ||
107 | switch (a->esz) { | ||
108 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
109 | TCGv_i64 t0 = read_fp_dreg(s, a->rn); | ||
110 | TCGv_i64 t1 = read_fp_dreg(s, a->rm); | ||
111 | f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
112 | - write_fp_dreg(s, a->rd, t0); | ||
113 | + write_fp_dreg_merging(s, a->rd, mergereg, t0); | ||
114 | } | ||
115 | break; | ||
116 | case MO_32: | ||
117 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
118 | TCGv_i32 t0 = read_fp_sreg(s, a->rn); | ||
119 | TCGv_i32 t1 = read_fp_sreg(s, a->rm); | ||
120 | f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
121 | - write_fp_sreg(s, a->rd, t0); | ||
122 | + write_fp_sreg_merging(s, a->rd, mergereg, t0); | ||
123 | } | ||
124 | break; | ||
125 | case MO_16: | ||
126 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
127 | TCGv_i32 t0 = read_fp_hreg(s, a->rn); | ||
128 | TCGv_i32 t1 = read_fp_hreg(s, a->rm); | ||
129 | f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype)); | ||
130 | - write_fp_sreg(s, a->rd, t0); | ||
131 | + write_fp_hreg_merging(s, a->rd, mergereg, t0); | ||
132 | } | ||
133 | break; | ||
134 | default: | ||
135 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, | ||
136 | return true; | ||
137 | } | ||
138 | |||
139 | -static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
140 | +static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
141 | + int mergereg) | ||
142 | { | ||
143 | - return do_fp3_scalar_with_fpsttype(s, a, f, | ||
144 | + return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, | ||
145 | a->esz == MO_16 ? | ||
146 | FPST_A64_F16 : FPST_A64); | ||
147 | } | ||
148 | |||
149 | -static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f) | ||
150 | +static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
151 | + int mergereg) | ||
152 | { | ||
153 | - return do_fp3_scalar_with_fpsttype(s, a, f, select_ah_fpst(s, a->esz)); | ||
154 | + return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, | ||
155 | + select_ah_fpst(s, a->esz)); | ||
156 | } | ||
157 | |||
158 | static const FPScalar f_scalar_fadd = { | ||
159 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fadd = { | ||
160 | gen_helper_vfp_adds, | ||
161 | gen_helper_vfp_addd, | ||
162 | }; | ||
163 | -TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd) | ||
164 | +TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn) | ||
165 | |||
166 | static const FPScalar f_scalar_fsub = { | ||
167 | gen_helper_vfp_subh, | ||
168 | gen_helper_vfp_subs, | ||
169 | gen_helper_vfp_subd, | ||
170 | }; | ||
171 | -TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub) | ||
172 | +TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn) | ||
173 | |||
174 | static const FPScalar f_scalar_fdiv = { | ||
175 | gen_helper_vfp_divh, | ||
176 | gen_helper_vfp_divs, | ||
177 | gen_helper_vfp_divd, | ||
178 | }; | ||
179 | -TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv) | ||
180 | +TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn) | ||
181 | |||
182 | static const FPScalar f_scalar_fmul = { | ||
183 | gen_helper_vfp_mulh, | ||
184 | gen_helper_vfp_muls, | ||
185 | gen_helper_vfp_muld, | ||
186 | }; | ||
187 | -TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul) | ||
188 | +TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn) | ||
189 | |||
190 | static const FPScalar f_scalar_fmax = { | ||
191 | gen_helper_vfp_maxh, | ||
192 | gen_helper_vfp_maxs, | ||
193 | gen_helper_vfp_maxd, | ||
194 | }; | ||
195 | -TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax) | ||
196 | +TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax, a->rn) | ||
197 | |||
198 | static const FPScalar f_scalar_fmin = { | ||
199 | gen_helper_vfp_minh, | ||
200 | gen_helper_vfp_mins, | ||
201 | gen_helper_vfp_mind, | ||
202 | }; | ||
203 | -TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin) | ||
204 | +TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin, a->rn) | ||
205 | |||
206 | static const FPScalar f_scalar_fmaxnm = { | ||
207 | gen_helper_vfp_maxnumh, | ||
208 | gen_helper_vfp_maxnums, | ||
209 | gen_helper_vfp_maxnumd, | ||
210 | }; | ||
211 | -TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm) | ||
212 | +TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn) | ||
213 | |||
214 | static const FPScalar f_scalar_fminnm = { | ||
215 | gen_helper_vfp_minnumh, | ||
216 | gen_helper_vfp_minnums, | ||
217 | gen_helper_vfp_minnumd, | ||
218 | }; | ||
219 | -TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm) | ||
220 | +TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn) | ||
221 | |||
222 | static const FPScalar f_scalar_fmulx = { | ||
223 | gen_helper_advsimd_mulxh, | ||
224 | gen_helper_vfp_mulxs, | ||
225 | gen_helper_vfp_mulxd, | ||
226 | }; | ||
227 | -TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx) | ||
228 | +TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn) | ||
229 | |||
230 | static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
231 | { | ||
232 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fnmul = { | ||
233 | gen_fnmul_s, | ||
234 | gen_fnmul_d, | ||
235 | }; | ||
236 | -TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul) | ||
237 | +TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul, a->rn) | ||
238 | |||
239 | static const FPScalar f_scalar_fcmeq = { | ||
240 | gen_helper_advsimd_ceq_f16, | ||
241 | gen_helper_neon_ceq_f32, | ||
242 | gen_helper_neon_ceq_f64, | ||
243 | }; | ||
244 | -TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq) | ||
245 | +TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm) | ||
246 | |||
247 | static const FPScalar f_scalar_fcmge = { | ||
248 | gen_helper_advsimd_cge_f16, | ||
249 | gen_helper_neon_cge_f32, | ||
250 | gen_helper_neon_cge_f64, | ||
251 | }; | ||
252 | -TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge) | ||
253 | +TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm) | ||
254 | |||
255 | static const FPScalar f_scalar_fcmgt = { | ||
256 | gen_helper_advsimd_cgt_f16, | ||
257 | gen_helper_neon_cgt_f32, | ||
258 | gen_helper_neon_cgt_f64, | ||
259 | }; | ||
260 | -TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt) | ||
261 | +TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm) | ||
262 | |||
263 | static const FPScalar f_scalar_facge = { | ||
264 | gen_helper_advsimd_acge_f16, | ||
265 | gen_helper_neon_acge_f32, | ||
266 | gen_helper_neon_acge_f64, | ||
267 | }; | ||
268 | -TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge) | ||
269 | +TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm) | ||
270 | |||
271 | static const FPScalar f_scalar_facgt = { | ||
272 | gen_helper_advsimd_acgt_f16, | ||
273 | gen_helper_neon_acgt_f32, | ||
274 | gen_helper_neon_acgt_f64, | ||
275 | }; | ||
276 | -TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt) | ||
277 | +TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm) | ||
278 | |||
279 | static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
280 | { | ||
281 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fabd = { | ||
282 | gen_fabd_s, | ||
283 | gen_fabd_d, | ||
284 | }; | ||
285 | -TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd) | ||
286 | +TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd, a->rn) | ||
287 | |||
288 | static const FPScalar f_scalar_frecps = { | ||
289 | gen_helper_recpsf_f16, | ||
290 | gen_helper_recpsf_f32, | ||
291 | gen_helper_recpsf_f64, | ||
292 | }; | ||
293 | -TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps) | ||
294 | +TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps, a->rn) | ||
295 | |||
296 | static const FPScalar f_scalar_frsqrts = { | ||
297 | gen_helper_rsqrtsf_f16, | ||
298 | gen_helper_rsqrtsf_f32, | ||
299 | gen_helper_rsqrtsf_f64, | ||
300 | }; | ||
301 | -TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts) | ||
302 | +TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts, a->rn) | ||
303 | |||
304 | static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, | ||
305 | const FPScalar *f, bool swap) | ||
289 | -- | 306 | -- |
290 | 2.25.1 | 307 | 2.34.1 |
291 | |||
292 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle FPCR.NEP for the 3-input scalar operations which use | ||
2 | do_fmla_scalar_idx() and do_fmadd(), by making them call the | ||
3 | appropriate write_fp_*reg_merging() functions. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/translate-a64.c | 12 ++++++------ | ||
9 | 1 file changed, 6 insertions(+), 6 deletions(-) | ||
10 | |||
11 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/target/arm/tcg/translate-a64.c | ||
14 | +++ b/target/arm/tcg/translate-a64.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
16 | gen_vfp_negd(t1, t1); | ||
17 | } | ||
18 | gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | ||
19 | - write_fp_dreg(s, a->rd, t0); | ||
20 | + write_fp_dreg_merging(s, a->rd, a->rd, t0); | ||
21 | } | ||
22 | break; | ||
23 | case MO_32: | ||
24 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
25 | gen_vfp_negs(t1, t1); | ||
26 | } | ||
27 | gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | ||
28 | - write_fp_sreg(s, a->rd, t0); | ||
29 | + write_fp_sreg_merging(s, a->rd, a->rd, t0); | ||
30 | } | ||
31 | break; | ||
32 | case MO_16: | ||
33 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
34 | } | ||
35 | gen_helper_advsimd_muladdh(t0, t1, t2, t0, | ||
36 | fpstatus_ptr(FPST_A64_F16)); | ||
37 | - write_fp_sreg(s, a->rd, t0); | ||
38 | + write_fp_hreg_merging(s, a->rd, a->rd, t0); | ||
39 | } | ||
40 | break; | ||
41 | default: | ||
42 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
43 | } | ||
44 | fpst = fpstatus_ptr(FPST_A64); | ||
45 | gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); | ||
46 | - write_fp_dreg(s, a->rd, ta); | ||
47 | + write_fp_dreg_merging(s, a->rd, a->ra, ta); | ||
48 | } | ||
49 | break; | ||
50 | |||
51 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
52 | } | ||
53 | fpst = fpstatus_ptr(FPST_A64); | ||
54 | gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); | ||
55 | - write_fp_sreg(s, a->rd, ta); | ||
56 | + write_fp_sreg_merging(s, a->rd, a->ra, ta); | ||
57 | } | ||
58 | break; | ||
59 | |||
60 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
61 | } | ||
62 | fpst = fpstatus_ptr(FPST_A64_F16); | ||
63 | gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); | ||
64 | - write_fp_sreg(s, a->rd, ta); | ||
65 | + write_fp_hreg_merging(s, a->rd, a->ra, ta); | ||
66 | } | ||
67 | break; | ||
68 | |||
69 | -- | ||
70 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Currently we implement BFCVT scalar via do_fp1_scalar(). This works | ||
2 | even though BFCVT is a narrowing operation from 32 to 16 bits, | ||
3 | because we can use write_fp_sreg() for float16. However, FPCR.NEP | ||
4 | support requires that we use write_fp_hreg_merging() for float16 | ||
5 | outputs, so we can't continue to borrow the non-narrowing | ||
6 | do_fp1_scalar() function for this. Split out trans_BFCVT_s() | ||
7 | into its own implementation that honours FPCR.NEP. | ||
1 | 8 | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | --- | ||
12 | target/arm/tcg/translate-a64.c | 25 +++++++++++++++++++++---- | ||
13 | 1 file changed, 21 insertions(+), 4 deletions(-) | ||
14 | |||
15 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/arm/tcg/translate-a64.c | ||
18 | +++ b/target/arm/tcg/translate-a64.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frintx = { | ||
20 | }; | ||
21 | TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) | ||
22 | |||
23 | -static const FPScalar1 f_scalar_bfcvt = { | ||
24 | - .gen_s = gen_helper_bfcvt, | ||
25 | -}; | ||
26 | -TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar_ah, a, &f_scalar_bfcvt, -1) | ||
27 | +static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a) | ||
28 | +{ | ||
29 | + ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64; | ||
30 | + TCGv_i32 t32; | ||
31 | + int check; | ||
32 | + | ||
33 | + if (!dc_isar_feature(aa64_bf16, s)) { | ||
34 | + return false; | ||
35 | + } | ||
36 | + | ||
37 | + check = fp_access_check_scalar_hsd(s, a->esz); | ||
38 | + | ||
39 | + if (check <= 0) { | ||
40 | + return check == 0; | ||
41 | + } | ||
42 | + | ||
43 | + t32 = read_fp_sreg(s, a->rn); | ||
44 | + gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype)); | ||
45 | + write_fp_hreg_merging(s, a->rd, a->rd, t32); | ||
46 | + return true; | ||
47 | +} | ||
48 | |||
49 | static const FPScalar1 f_scalar_frint32 = { | ||
50 | NULL, | ||
51 | -- | ||
52 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle FPCR.NEP for the 1-input scalar operations. | ||
1 | 2 | ||
3 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | target/arm/tcg/translate-a64.c | 26 ++++++++++++++------------ | ||
7 | 1 file changed, 14 insertions(+), 12 deletions(-) | ||
8 | |||
9 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/target/arm/tcg/translate-a64.c | ||
12 | +++ b/target/arm/tcg/translate-a64.c | ||
13 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a, | ||
14 | case MO_64: | ||
15 | t64 = read_fp_dreg(s, a->rn); | ||
16 | f->gen_d(t64, t64, fpst); | ||
17 | - write_fp_dreg(s, a->rd, t64); | ||
18 | + write_fp_dreg_merging(s, a->rd, a->rd, t64); | ||
19 | break; | ||
20 | case MO_32: | ||
21 | t32 = read_fp_sreg(s, a->rn); | ||
22 | f->gen_s(t32, t32, fpst); | ||
23 | - write_fp_sreg(s, a->rd, t32); | ||
24 | + write_fp_sreg_merging(s, a->rd, a->rd, t32); | ||
25 | break; | ||
26 | case MO_16: | ||
27 | t32 = read_fp_hreg(s, a->rn); | ||
28 | f->gen_h(t32, t32, fpst); | ||
29 | - write_fp_sreg(s, a->rd, t32); | ||
30 | + write_fp_hreg_merging(s, a->rd, a->rd, t32); | ||
31 | break; | ||
32 | default: | ||
33 | g_assert_not_reached(); | ||
34 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) | ||
35 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
36 | |||
37 | gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst); | ||
38 | - write_fp_dreg(s, a->rd, tcg_rd); | ||
39 | + write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); | ||
40 | } | ||
41 | return true; | ||
42 | } | ||
43 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a) | ||
44 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
45 | |||
46 | gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); | ||
47 | - /* write_fp_sreg is OK here because top half of result is zero */ | ||
48 | - write_fp_sreg(s, a->rd, tmp); | ||
49 | + /* write_fp_hreg_merging is OK here because top half of result is zero */ | ||
50 | + write_fp_hreg_merging(s, a->rd, a->rd, tmp); | ||
51 | } | ||
52 | return true; | ||
53 | } | ||
54 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a) | ||
55 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
56 | |||
57 | gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst); | ||
58 | - write_fp_sreg(s, a->rd, tcg_rd); | ||
59 | + write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); | ||
60 | } | ||
61 | return true; | ||
62 | } | ||
63 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a) | ||
64 | TCGv_ptr fpst = fpstatus_ptr(FPST_A64); | ||
65 | |||
66 | gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); | ||
67 | - /* write_fp_sreg is OK here because top half of tcg_rd is zero */ | ||
68 | - write_fp_sreg(s, a->rd, tcg_rd); | ||
69 | + /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */ | ||
70 | + write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd); | ||
71 | } | ||
72 | return true; | ||
73 | } | ||
74 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a) | ||
75 | TCGv_i32 tcg_ahp = get_ahp_flag(); | ||
76 | |||
77 | gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); | ||
78 | - write_fp_sreg(s, a->rd, tcg_rd); | ||
79 | + write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); | ||
80 | } | ||
81 | return true; | ||
82 | } | ||
83 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a) | ||
84 | TCGv_i32 tcg_ahp = get_ahp_flag(); | ||
85 | |||
86 | gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); | ||
87 | - write_fp_dreg(s, a->rd, tcg_rd); | ||
88 | + write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); | ||
89 | } | ||
90 | return true; | ||
91 | } | ||
92 | @@ -XXX,XX +XXX,XX @@ static bool do_fcvt_f(DisasContext *s, arg_fcvt *a, | ||
93 | do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0), | ||
94 | a->esz, tcg_int, a->shift, a->rn, rmode); | ||
95 | |||
96 | - clear_vec(s, a->rd); | ||
97 | + if (!s->fpcr_nep) { | ||
98 | + clear_vec(s, a->rd); | ||
99 | + } | ||
100 | write_vec_element(s, tcg_int, a->rd, 0, a->esz); | ||
101 | return true; | ||
102 | } | ||
103 | -- | ||
104 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle FPCR.NEP in the operations handled by do_cvtf_scalar(). | ||
1 | 2 | ||
3 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | target/arm/tcg/translate-a64.c | 6 +++--- | ||
7 | 1 file changed, 3 insertions(+), 3 deletions(-) | ||
8 | |||
9 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/target/arm/tcg/translate-a64.c | ||
12 | +++ b/target/arm/tcg/translate-a64.c | ||
13 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, | ||
14 | } else { | ||
15 | gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); | ||
16 | } | ||
17 | - write_fp_dreg(s, rd, tcg_double); | ||
18 | + write_fp_dreg_merging(s, rd, rd, tcg_double); | ||
19 | break; | ||
20 | |||
21 | case MO_32: | ||
22 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, | ||
23 | } else { | ||
24 | gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); | ||
25 | } | ||
26 | - write_fp_sreg(s, rd, tcg_single); | ||
27 | + write_fp_sreg_merging(s, rd, rd, tcg_single); | ||
28 | break; | ||
29 | |||
30 | case MO_16: | ||
31 | @@ -XXX,XX +XXX,XX @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, | ||
32 | } else { | ||
33 | gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); | ||
34 | } | ||
35 | - write_fp_sreg(s, rd, tcg_single); | ||
36 | + write_fp_hreg_merging(s, rd, rd, tcg_single); | ||
37 | break; | ||
38 | |||
39 | default: | ||
40 | -- | ||
41 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle FPCR.NEP merging for scalar FABS and FNEG; this requires | ||
2 | an extra parameter to do_fp1_scalar_int(), since FMOV scalar | ||
3 | does not have the merging behaviour. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/translate-a64.c | 27 ++++++++++++++++++++------- | ||
9 | 1 file changed, 20 insertions(+), 7 deletions(-) | ||
10 | |||
11 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/target/arm/tcg/translate-a64.c | ||
14 | +++ b/target/arm/tcg/translate-a64.c | ||
15 | @@ -XXX,XX +XXX,XX @@ typedef struct FPScalar1Int { | ||
16 | } FPScalar1Int; | ||
17 | |||
18 | static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, | ||
19 | - const FPScalar1Int *f) | ||
20 | + const FPScalar1Int *f, | ||
21 | + bool merging) | ||
22 | { | ||
23 | switch (a->esz) { | ||
24 | case MO_64: | ||
25 | if (fp_access_check(s)) { | ||
26 | TCGv_i64 t = read_fp_dreg(s, a->rn); | ||
27 | f->gen_d(t, t); | ||
28 | - write_fp_dreg(s, a->rd, t); | ||
29 | + if (merging) { | ||
30 | + write_fp_dreg_merging(s, a->rd, a->rd, t); | ||
31 | + } else { | ||
32 | + write_fp_dreg(s, a->rd, t); | ||
33 | + } | ||
34 | } | ||
35 | break; | ||
36 | case MO_32: | ||
37 | if (fp_access_check(s)) { | ||
38 | TCGv_i32 t = read_fp_sreg(s, a->rn); | ||
39 | f->gen_s(t, t); | ||
40 | - write_fp_sreg(s, a->rd, t); | ||
41 | + if (merging) { | ||
42 | + write_fp_sreg_merging(s, a->rd, a->rd, t); | ||
43 | + } else { | ||
44 | + write_fp_sreg(s, a->rd, t); | ||
45 | + } | ||
46 | } | ||
47 | break; | ||
48 | case MO_16: | ||
49 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, | ||
50 | if (fp_access_check(s)) { | ||
51 | TCGv_i32 t = read_fp_hreg(s, a->rn); | ||
52 | f->gen_h(t, t); | ||
53 | - write_fp_sreg(s, a->rd, t); | ||
54 | + if (merging) { | ||
55 | + write_fp_hreg_merging(s, a->rd, a->rd, t); | ||
56 | + } else { | ||
57 | + write_fp_sreg(s, a->rd, t); | ||
58 | + } | ||
59 | } | ||
60 | break; | ||
61 | default: | ||
62 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fmov = { | ||
63 | tcg_gen_mov_i32, | ||
64 | tcg_gen_mov_i64, | ||
65 | }; | ||
66 | -TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov) | ||
67 | +TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false) | ||
68 | |||
69 | static const FPScalar1Int f_scalar_fabs = { | ||
70 | gen_vfp_absh, | ||
71 | gen_vfp_abss, | ||
72 | gen_vfp_absd, | ||
73 | }; | ||
74 | -TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs) | ||
75 | +TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs, true) | ||
76 | |||
77 | static const FPScalar1Int f_scalar_fneg = { | ||
78 | gen_vfp_negh, | ||
79 | gen_vfp_negs, | ||
80 | gen_vfp_negd, | ||
81 | }; | ||
82 | -TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg) | ||
83 | +TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg, true) | ||
84 | |||
85 | typedef struct FPScalar1 { | ||
86 | void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); | ||
87 | -- | ||
88 | 2.34.1 | diff view generated by jsdifflib |
1 | The MemoryRegionOps gicv3_its_translation_ops currently provides only | 1 | Unlike the other users of do_2misc_narrow_scalar(), FCVTXN (scalar) |
---|---|---|---|
2 | a .write_with_attrs function, because the only register in this | 2 | is always double-to-single and must honour FPCR.NEP. Implement this |
3 | region is the write-only GITS_TRANSLATER. However, if you don't | 3 | directly in a trans function rather than using |
4 | provide a read function and the guest tries reading from this memory | 4 | do_2misc_narrow_scalar(). |
5 | region, QEMU will crash because | ||
6 | memory_region_read_with_attrs_accessor() calls a NULL pointer. | ||
7 | 5 | ||
8 | Add a read function which always returns 0, to cover both bogus | 6 | We still need gen_fcvtxn_sd() and the f_scalar_fcvtxn[] array for |
9 | attempts to read GITS_TRANSLATER and also reads from the rest of the | 7 | the FCVTXN (vector) insn, so we move those down in the file to |
10 | region, which is documented to be reserved, RES0. | 8 | where they are used. |
11 | 9 | ||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
13 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
14 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
15 | Message-id: 20220122182444.724087-11-peter.maydell@linaro.org | ||
16 | --- | 12 | --- |
17 | hw/intc/arm_gicv3_its.c | 13 +++++++++++++ | 13 | target/arm/tcg/translate-a64.c | 43 ++++++++++++++++++++++------------ |
18 | 1 file changed, 13 insertions(+) | 14 | 1 file changed, 28 insertions(+), 15 deletions(-) |
19 | 15 | ||
20 | diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c | 16 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
21 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/hw/intc/arm_gicv3_its.c | 18 | --- a/target/arm/tcg/translate-a64.c |
23 | +++ b/hw/intc/arm_gicv3_its.c | 19 | +++ b/target/arm/tcg/translate-a64.c |
24 | @@ -XXX,XX +XXX,XX @@ static void extract_cmdq_params(GICv3ITSState *s) | 20 | @@ -XXX,XX +XXX,XX @@ static ArithOneOp * const f_scalar_uqxtn[] = { |
25 | } | 21 | }; |
22 | TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn) | ||
23 | |||
24 | -static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) | ||
25 | +static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a) | ||
26 | { | ||
27 | - /* | ||
28 | - * 64 bit to 32 bit float conversion | ||
29 | - * with von Neumann rounding (round to odd) | ||
30 | - */ | ||
31 | - TCGv_i32 tmp = tcg_temp_new_i32(); | ||
32 | - gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); | ||
33 | - tcg_gen_extu_i32_i64(d, tmp); | ||
34 | + if (fp_access_check(s)) { | ||
35 | + /* | ||
36 | + * 64 bit to 32 bit float conversion | ||
37 | + * with von Neumann rounding (round to odd) | ||
38 | + */ | ||
39 | + TCGv_i64 src = read_fp_dreg(s, a->rn); | ||
40 | + TCGv_i32 dst = tcg_temp_new_i32(); | ||
41 | + gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64)); | ||
42 | + write_fp_sreg_merging(s, a->rd, a->rd, dst); | ||
43 | + } | ||
44 | + return true; | ||
26 | } | 45 | } |
27 | 46 | ||
28 | +static MemTxResult gicv3_its_translation_read(void *opaque, hwaddr offset, | 47 | -static ArithOneOp * const f_scalar_fcvtxn[] = { |
29 | + uint64_t *data, unsigned size, | 48 | - NULL, |
30 | + MemTxAttrs attrs) | 49 | - NULL, |
50 | - gen_fcvtxn_sd, | ||
51 | -}; | ||
52 | -TRANS(FCVTXN_s, do_2misc_narrow_scalar, a, f_scalar_fcvtxn) | ||
53 | - | ||
54 | #undef WRAP_ENV | ||
55 | |||
56 | static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) | ||
57 | @@ -XXX,XX +XXX,XX @@ static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n) | ||
58 | tcg_gen_extu_i32_i64(d, tmp); | ||
59 | } | ||
60 | |||
61 | +static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) | ||
31 | +{ | 62 | +{ |
32 | + /* | 63 | + /* |
33 | + * GITS_TRANSLATER is write-only, and all other addresses | 64 | + * 64 bit to 32 bit float conversion |
34 | + * in the interrupt translation space frame are RES0. | 65 | + * with von Neumann rounding (round to odd) |
35 | + */ | 66 | + */ |
36 | + *data = 0; | 67 | + TCGv_i32 tmp = tcg_temp_new_i32(); |
37 | + return MEMTX_OK; | 68 | + gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); |
69 | + tcg_gen_extu_i32_i64(d, tmp); | ||
38 | +} | 70 | +} |
39 | + | 71 | + |
40 | static MemTxResult gicv3_its_translation_write(void *opaque, hwaddr offset, | 72 | static ArithOneOp * const f_vector_fcvtn[] = { |
41 | uint64_t data, unsigned size, | 73 | NULL, |
42 | MemTxAttrs attrs) | 74 | gen_fcvtn_hs, |
43 | @@ -XXX,XX +XXX,XX @@ static const MemoryRegionOps gicv3_its_control_ops = { | 75 | gen_fcvtn_sd, |
44 | }; | 76 | }; |
45 | 77 | +static ArithOneOp * const f_scalar_fcvtxn[] = { | |
46 | static const MemoryRegionOps gicv3_its_translation_ops = { | 78 | + NULL, |
47 | + .read_with_attrs = gicv3_its_translation_read, | 79 | + NULL, |
48 | .write_with_attrs = gicv3_its_translation_write, | 80 | + gen_fcvtxn_sd, |
49 | .valid.min_access_size = 2, | 81 | +}; |
50 | .valid.max_access_size = 4, | 82 | TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn) |
83 | TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn) | ||
84 | |||
51 | -- | 85 | -- |
52 | 2.25.1 | 86 | 2.34.1 |
53 | |||
54 | diff view generated by jsdifflib |
1 | The ITS specification says that when the guest writes to GITS_CBASER | 1 | do_fp3_scalar_idx() is used only for the FMUL and FMULX scalar by |
---|---|---|---|
2 | this causes GITS_CREADR to be cleared. However it does not have an | 2 | element instructions; these both need to merge the result with the Rn |
3 | equivalent clause for GITS_CWRITER. (This is because GITS_CREADR is | 3 | register when FPCR.NEP is set. |
4 | read-only, but GITS_CWRITER is writable and the guest can initialize | ||
5 | it.) Remove the code that clears GITS_CWRITER on GITS_CBASER writes. | ||
6 | 4 | ||
7 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
9 | Message-id: 20220122182444.724087-6-peter.maydell@linaro.org | ||
10 | --- | 7 | --- |
11 | hw/intc/arm_gicv3_its.c | 3 --- | 8 | target/arm/tcg/translate-a64.c | 6 +++--- |
12 | 1 file changed, 3 deletions(-) | 9 | 1 file changed, 3 insertions(+), 3 deletions(-) |
13 | 10 | ||
14 | diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c | 11 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
15 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/hw/intc/arm_gicv3_its.c | 13 | --- a/target/arm/tcg/translate-a64.c |
17 | +++ b/hw/intc/arm_gicv3_its.c | 14 | +++ b/target/arm/tcg/translate-a64.c |
18 | @@ -XXX,XX +XXX,XX @@ static bool its_writel(GICv3ITSState *s, hwaddr offset, | 15 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) |
19 | if (!(s->ctlr & R_GITS_CTLR_ENABLED_MASK)) { | 16 | |
20 | s->cbaser = deposit64(s->cbaser, 0, 32, value); | 17 | read_vec_element(s, t1, a->rm, a->idx, MO_64); |
21 | s->creadr = 0; | 18 | f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); |
22 | - s->cwriter = s->creadr; | 19 | - write_fp_dreg(s, a->rd, t0); |
20 | + write_fp_dreg_merging(s, a->rd, a->rn, t0); | ||
23 | } | 21 | } |
24 | break; | 22 | break; |
25 | case GITS_CBASER + 4: | 23 | case MO_32: |
26 | @@ -XXX,XX +XXX,XX @@ static bool its_writel(GICv3ITSState *s, hwaddr offset, | 24 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) |
27 | if (!(s->ctlr & R_GITS_CTLR_ENABLED_MASK)) { | 25 | |
28 | s->cbaser = deposit64(s->cbaser, 32, 32, value); | 26 | read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); |
29 | s->creadr = 0; | 27 | f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); |
30 | - s->cwriter = s->creadr; | 28 | - write_fp_sreg(s, a->rd, t0); |
29 | + write_fp_sreg_merging(s, a->rd, a->rn, t0); | ||
31 | } | 30 | } |
32 | break; | 31 | break; |
33 | case GITS_CWRITER: | 32 | case MO_16: |
34 | @@ -XXX,XX +XXX,XX @@ static bool its_writell(GICv3ITSState *s, hwaddr offset, | 33 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) |
35 | if (!(s->ctlr & R_GITS_CTLR_ENABLED_MASK)) { | 34 | |
36 | s->cbaser = value; | 35 | read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); |
37 | s->creadr = 0; | 36 | f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); |
38 | - s->cwriter = s->creadr; | 37 | - write_fp_sreg(s, a->rd, t0); |
38 | + write_fp_hreg_merging(s, a->rd, a->rn, t0); | ||
39 | } | 39 | } |
40 | break; | 40 | break; |
41 | case GITS_CWRITER: | 41 | default: |
42 | -- | 42 | -- |
43 | 2.25.1 | 43 | 2.34.1 |
44 | |||
45 | diff view generated by jsdifflib |
1 | In our implementation, all ITSes connected to a GIC share a single | 1 | When FPCR.AH == 1, floating point FMIN and FMAX have some odd special |
---|---|---|---|
2 | AddressSpace, which we keep in the GICv3State::dma_as field and | 2 | cases: |
3 | initialized based on the GIC's 'sysmem' property. The right place | ||
4 | to set it up by calling address_space_init() is therefore in the | ||
5 | GIC's realize method, not the ITS's realize. | ||
6 | 3 | ||
7 | This fixes a theoretical bug where QEMU hangs on startup if the board | 4 | * comparing two zeroes (even of different sign) or comparing a NaN |
8 | model creates two ITSes connected to the same GIC -- we would call | 5 | with anything always returns the second argument (possibly |
9 | address_space_init() twice on the same AddressSpace*, which creates | 6 | squashed to zero) |
10 | an infinite loop in the QTAILQ that softmmu/memory.c uses to store | 7 | * denormal outputs are not squashed to zero regardless of FZ or FZ16 |
11 | its list of AddressSpaces and causes any subsequent attempt to | 8 | |
12 | iterate through that list to loop forever. There aren't any board | 9 | Implement these semantics in new helper functions and select them at |
13 | models like that in the tree at the moment, though. | 10 | translate time if FPCR.AH is 1 for the scalar FMAX and FMIN insns. |
11 | (We will convert the other FMAX and FMIN insns in subsequent | ||
12 | commits.) | ||
13 | |||
14 | Note that FMINNM and FMAXNM are not affected. | ||
14 | 15 | ||
15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 16 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
16 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 17 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
17 | Message-id: 20220122182444.724087-4-peter.maydell@linaro.org | ||
18 | --- | 18 | --- |
19 | hw/intc/arm_gicv3_common.c | 5 +++++ | 19 | target/arm/tcg/helper-a64.h | 7 +++++++ |
20 | hw/intc/arm_gicv3_its.c | 3 --- | 20 | target/arm/tcg/helper-a64.c | 36 ++++++++++++++++++++++++++++++++++ |
21 | 2 files changed, 5 insertions(+), 3 deletions(-) | 21 | target/arm/tcg/translate-a64.c | 23 ++++++++++++++++++++-- |
22 | 3 files changed, 64 insertions(+), 2 deletions(-) | ||
22 | 23 | ||
23 | diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c | 24 | diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h |
24 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
25 | --- a/hw/intc/arm_gicv3_common.c | 26 | --- a/target/arm/tcg/helper-a64.h |
26 | +++ b/hw/intc/arm_gicv3_common.c | 27 | +++ b/target/arm/tcg/helper-a64.h |
27 | @@ -XXX,XX +XXX,XX @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) | 28 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(advsimd_muladd2h, i32, i32, i32, i32, fpst) |
28 | return; | 29 | DEF_HELPER_2(advsimd_rinth_exact, f16, f16, fpst) |
29 | } | 30 | DEF_HELPER_2(advsimd_rinth, f16, f16, fpst) |
30 | 31 | ||
31 | + if (s->lpi_enable) { | 32 | +DEF_HELPER_3(vfp_ah_minh, f16, f16, f16, fpst) |
32 | + address_space_init(&s->dma_as, s->dma, | 33 | +DEF_HELPER_3(vfp_ah_mins, f32, f32, f32, fpst) |
33 | + "gicv3-its-sysmem"); | 34 | +DEF_HELPER_3(vfp_ah_mind, f64, f64, f64, fpst) |
35 | +DEF_HELPER_3(vfp_ah_maxh, f16, f16, f16, fpst) | ||
36 | +DEF_HELPER_3(vfp_ah_maxs, f32, f32, f32, fpst) | ||
37 | +DEF_HELPER_3(vfp_ah_maxd, f64, f64, f64, fpst) | ||
38 | + | ||
39 | DEF_HELPER_2(exception_return, void, env, i64) | ||
40 | DEF_HELPER_FLAGS_2(dc_zva, TCG_CALL_NO_WG, void, env, i64) | ||
41 | |||
42 | diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/arm/tcg/helper-a64.c | ||
45 | +++ b/target/arm/tcg/helper-a64.c | ||
46 | @@ -XXX,XX +XXX,XX @@ float32 HELPER(fcvtx_f64_to_f32)(float64 a, float_status *fpst) | ||
47 | return r; | ||
48 | } | ||
49 | |||
50 | +/* | ||
51 | + * AH=1 min/max have some odd special cases: | ||
52 | + * comparing two zeroes (regardless of sign), (NaN, anything), | ||
53 | + * or (anything, NaN) should return the second argument (possibly | ||
54 | + * squashed to zero). | ||
55 | + * Also, denormal outputs are not squashed to zero regardless of FZ or FZ16. | ||
56 | + */ | ||
57 | +#define AH_MINMAX_HELPER(NAME, CTYPE, FLOATTYPE, MINMAX) \ | ||
58 | + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ | ||
59 | + { \ | ||
60 | + bool save; \ | ||
61 | + CTYPE r; \ | ||
62 | + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ | ||
63 | + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ | ||
64 | + if (FLOATTYPE ## _is_zero(a) && FLOATTYPE ## _is_zero(b)) { \ | ||
65 | + return b; \ | ||
66 | + } \ | ||
67 | + if (FLOATTYPE ## _is_any_nan(a) || \ | ||
68 | + FLOATTYPE ## _is_any_nan(b)) { \ | ||
69 | + float_raise(float_flag_invalid, fpst); \ | ||
70 | + return b; \ | ||
71 | + } \ | ||
72 | + save = get_flush_to_zero(fpst); \ | ||
73 | + set_flush_to_zero(false, fpst); \ | ||
74 | + r = FLOATTYPE ## _ ## MINMAX(a, b, fpst); \ | ||
75 | + set_flush_to_zero(save, fpst); \ | ||
76 | + return r; \ | ||
34 | + } | 77 | + } |
35 | + | 78 | + |
36 | s->cpu = g_new0(GICv3CPUState, s->num_cpu); | 79 | +AH_MINMAX_HELPER(vfp_ah_minh, dh_ctype_f16, float16, min) |
37 | 80 | +AH_MINMAX_HELPER(vfp_ah_mins, float32, float32, min) | |
38 | for (i = 0; i < s->num_cpu; i++) { | 81 | +AH_MINMAX_HELPER(vfp_ah_mind, float64, float64, min) |
39 | diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c | 82 | +AH_MINMAX_HELPER(vfp_ah_maxh, dh_ctype_f16, float16, max) |
83 | +AH_MINMAX_HELPER(vfp_ah_maxs, float32, float32, max) | ||
84 | +AH_MINMAX_HELPER(vfp_ah_maxd, float64, float64, max) | ||
85 | + | ||
86 | /* 64-bit versions of the CRC helpers. Note that although the operation | ||
87 | * (and the prototypes of crc32c() and crc32() mean that only the bottom | ||
88 | * 32 bits of the accumulator and result are used, we pass and return | ||
89 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
40 | index XXXXXXX..XXXXXXX 100644 | 90 | index XXXXXXX..XXXXXXX 100644 |
41 | --- a/hw/intc/arm_gicv3_its.c | 91 | --- a/target/arm/tcg/translate-a64.c |
42 | +++ b/hw/intc/arm_gicv3_its.c | 92 | +++ b/target/arm/tcg/translate-a64.c |
43 | @@ -XXX,XX +XXX,XX @@ static void gicv3_arm_its_realize(DeviceState *dev, Error **errp) | 93 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f, |
44 | 94 | select_ah_fpst(s, a->esz)); | |
45 | gicv3_its_init_mmio(s, &gicv3_its_control_ops, &gicv3_its_translation_ops); | 95 | } |
46 | 96 | ||
47 | - address_space_init(&s->gicv3->dma_as, s->gicv3->dma, | 97 | +/* Some insns need to call different helpers when FPCR.AH == 1 */ |
48 | - "gicv3-its-sysmem"); | 98 | +static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a, |
49 | - | 99 | + const FPScalar *fnormal, |
50 | /* set the ITS default features supported */ | 100 | + const FPScalar *fah, |
51 | s->typer = FIELD_DP64(s->typer, GITS_TYPER, PHYSICAL, 1); | 101 | + int mergereg) |
52 | s->typer = FIELD_DP64(s->typer, GITS_TYPER, ITT_ENTRY_SIZE, | 102 | +{ |
103 | + return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg); | ||
104 | +} | ||
105 | + | ||
106 | static const FPScalar f_scalar_fadd = { | ||
107 | gen_helper_vfp_addh, | ||
108 | gen_helper_vfp_adds, | ||
109 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_fmax = { | ||
110 | gen_helper_vfp_maxs, | ||
111 | gen_helper_vfp_maxd, | ||
112 | }; | ||
113 | -TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax, a->rn) | ||
114 | +static const FPScalar f_scalar_fmax_ah = { | ||
115 | + gen_helper_vfp_ah_maxh, | ||
116 | + gen_helper_vfp_ah_maxs, | ||
117 | + gen_helper_vfp_ah_maxd, | ||
118 | +}; | ||
119 | +TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn) | ||
120 | |||
121 | static const FPScalar f_scalar_fmin = { | ||
122 | gen_helper_vfp_minh, | ||
123 | gen_helper_vfp_mins, | ||
124 | gen_helper_vfp_mind, | ||
125 | }; | ||
126 | -TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin, a->rn) | ||
127 | +static const FPScalar f_scalar_fmin_ah = { | ||
128 | + gen_helper_vfp_ah_minh, | ||
129 | + gen_helper_vfp_ah_mins, | ||
130 | + gen_helper_vfp_ah_mind, | ||
131 | +}; | ||
132 | +TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn) | ||
133 | |||
134 | static const FPScalar f_scalar_fmaxnm = { | ||
135 | gen_helper_vfp_maxnumh, | ||
53 | -- | 136 | -- |
54 | 2.25.1 | 137 | 2.34.1 |
55 | |||
56 | diff view generated by jsdifflib |
1 | From: Francisco Iglesias <francisco.iglesias@xilinx.com> | 1 | Implement the FPCR.AH == 1 semantics for vector FMIN/FMAX, by |
---|---|---|---|
2 | creating new _ah_ versions of the gvec helpers which invoke the | ||
3 | scalar fmin_ah and fmax_ah helpers on each element. | ||
2 | 4 | ||
3 | Connect the OSPI flash memory controller model (including the source and | 5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | destination DMA). | 6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
7 | --- | ||
8 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
9 | target/arm/tcg/translate-a64.c | 21 +++++++++++++++++++-- | ||
10 | target/arm/tcg/vec_helper.c | 8 ++++++++ | ||
11 | 3 files changed, 41 insertions(+), 2 deletions(-) | ||
5 | 12 | ||
6 | Signed-off-by: Francisco Iglesias <francisco.iglesias@xilinx.com> | 13 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h |
7 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
8 | Message-id: 20220121161141.14389-8-francisco.iglesias@xilinx.com | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | --- | ||
11 | include/hw/arm/xlnx-versal.h | 20 ++++++++ | ||
12 | hw/arm/xlnx-versal.c | 93 ++++++++++++++++++++++++++++++++++++ | ||
13 | 2 files changed, 113 insertions(+) | ||
14 | |||
15 | diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/include/hw/arm/xlnx-versal.h | 15 | --- a/target/arm/tcg/helper-sve.h |
18 | +++ b/include/hw/arm/xlnx-versal.h | 16 | +++ b/target/arm/tcg/helper-sve.h |
19 | @@ -XXX,XX +XXX,XX @@ | 17 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_rsqrts_s, TCG_CALL_NO_RWG, |
20 | #include "hw/misc/xlnx-versal-xramc.h" | 18 | DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG, |
21 | #include "hw/nvram/xlnx-bbram.h" | 19 | void, ptr, ptr, ptr, fpst, i32) |
22 | #include "hw/nvram/xlnx-versal-efuse.h" | 20 | |
23 | +#include "hw/ssi/xlnx-versal-ospi.h" | 21 | +DEF_HELPER_FLAGS_5(gvec_ah_fmax_h, TCG_CALL_NO_RWG, |
24 | +#include "hw/dma/xlnx_csu_dma.h" | 22 | + void, ptr, ptr, ptr, fpst, i32) |
25 | #include "hw/misc/xlnx-versal-pmc-iou-slcr.h" | 23 | +DEF_HELPER_FLAGS_5(gvec_ah_fmax_s, TCG_CALL_NO_RWG, |
26 | 24 | + void, ptr, ptr, ptr, fpst, i32) | |
27 | #define TYPE_XLNX_VERSAL "xlnx-versal" | 25 | +DEF_HELPER_FLAGS_5(gvec_ah_fmax_d, TCG_CALL_NO_RWG, |
28 | @@ -XXX,XX +XXX,XX @@ struct Versal { | 26 | + void, ptr, ptr, ptr, fpst, i32) |
29 | struct { | ||
30 | SDHCIState sd[XLNX_VERSAL_NR_SDS]; | ||
31 | XlnxVersalPmcIouSlcr slcr; | ||
32 | + | 27 | + |
33 | + struct { | 28 | +DEF_HELPER_FLAGS_5(gvec_ah_fmin_h, TCG_CALL_NO_RWG, |
34 | + XlnxVersalOspi ospi; | 29 | + void, ptr, ptr, ptr, fpst, i32) |
35 | + XlnxCSUDMA dma_src; | 30 | +DEF_HELPER_FLAGS_5(gvec_ah_fmin_s, TCG_CALL_NO_RWG, |
36 | + XlnxCSUDMA dma_dst; | 31 | + void, ptr, ptr, ptr, fpst, i32) |
37 | + MemoryRegion linear_mr; | 32 | +DEF_HELPER_FLAGS_5(gvec_ah_fmin_d, TCG_CALL_NO_RWG, |
38 | + qemu_or_irq irq_orgate; | 33 | + void, ptr, ptr, ptr, fpst, i32) |
39 | + } ospi; | ||
40 | } iou; | ||
41 | |||
42 | XlnxZynqMPRTC rtc; | ||
43 | @@ -XXX,XX +XXX,XX @@ struct Versal { | ||
44 | #define VERSAL_ADMA_IRQ_0 60 | ||
45 | #define VERSAL_XRAM_IRQ_0 79 | ||
46 | #define VERSAL_PMC_APB_IRQ 121 | ||
47 | +#define VERSAL_OSPI_IRQ 124 | ||
48 | #define VERSAL_SD0_IRQ_0 126 | ||
49 | #define VERSAL_EFUSE_IRQ 139 | ||
50 | #define VERSAL_RTC_ALARM_IRQ 142 | ||
51 | @@ -XXX,XX +XXX,XX @@ struct Versal { | ||
52 | #define MM_PMC_PMC_IOU_SLCR 0xf1060000 | ||
53 | #define MM_PMC_PMC_IOU_SLCR_SIZE 0x10000 | ||
54 | |||
55 | +#define MM_PMC_OSPI 0xf1010000 | ||
56 | +#define MM_PMC_OSPI_SIZE 0x10000 | ||
57 | + | 34 | + |
58 | +#define MM_PMC_OSPI_DAC 0xc0000000 | 35 | DEF_HELPER_FLAGS_4(sve_faddv_h, TCG_CALL_NO_RWG, |
59 | +#define MM_PMC_OSPI_DAC_SIZE 0x20000000 | 36 | i64, ptr, ptr, fpst, i32) |
60 | + | 37 | DEF_HELPER_FLAGS_4(sve_faddv_s, TCG_CALL_NO_RWG, |
61 | +#define MM_PMC_OSPI_DMA_DST 0xf1011800 | 38 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
62 | +#define MM_PMC_OSPI_DMA_SRC 0xf1011000 | ||
63 | + | ||
64 | #define MM_PMC_SD0 0xf1040000U | ||
65 | #define MM_PMC_SD0_SIZE 0x10000 | ||
66 | #define MM_PMC_BBRAM_CTRL 0xf11f0000 | ||
67 | diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c | ||
68 | index XXXXXXX..XXXXXXX 100644 | 39 | index XXXXXXX..XXXXXXX 100644 |
69 | --- a/hw/arm/xlnx-versal.c | 40 | --- a/target/arm/tcg/translate-a64.c |
70 | +++ b/hw/arm/xlnx-versal.c | 41 | +++ b/target/arm/tcg/translate-a64.c |
71 | @@ -XXX,XX +XXX,XX @@ | 42 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, |
72 | #define GEM_REVISION 0x40070106 | 43 | FPST_A64_F16 : FPST_A64); |
73 | |||
74 | #define VERSAL_NUM_PMC_APB_IRQS 3 | ||
75 | +#define NUM_OSPI_IRQ_LINES 3 | ||
76 | |||
77 | static void versal_create_apu_cpus(Versal *s) | ||
78 | { | ||
79 | @@ -XXX,XX +XXX,XX @@ static void versal_create_pmc_iou_slcr(Versal *s, qemu_irq *pic) | ||
80 | qdev_get_gpio_in(DEVICE(&s->pmc.apb_irq_orgate), 2)); | ||
81 | } | 44 | } |
82 | 45 | ||
83 | +static void versal_create_ospi(Versal *s, qemu_irq *pic) | 46 | +static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, |
47 | + gen_helper_gvec_3_ptr * const fnormal[3], | ||
48 | + gen_helper_gvec_3_ptr * const fah[3]) | ||
84 | +{ | 49 | +{ |
85 | + SysBusDevice *sbd; | 50 | + return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal); |
86 | + MemoryRegion *mr_dac; | ||
87 | + qemu_irq ospi_mux_sel; | ||
88 | + DeviceState *orgate; | ||
89 | + | ||
90 | + memory_region_init(&s->pmc.iou.ospi.linear_mr, OBJECT(s), | ||
91 | + "versal-ospi-linear-mr" , MM_PMC_OSPI_DAC_SIZE); | ||
92 | + | ||
93 | + object_initialize_child(OBJECT(s), "versal-ospi", &s->pmc.iou.ospi.ospi, | ||
94 | + TYPE_XILINX_VERSAL_OSPI); | ||
95 | + | ||
96 | + mr_dac = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->pmc.iou.ospi.ospi), 1); | ||
97 | + memory_region_add_subregion(&s->pmc.iou.ospi.linear_mr, 0x0, mr_dac); | ||
98 | + | ||
99 | + /* Create the OSPI destination DMA */ | ||
100 | + object_initialize_child(OBJECT(s), "versal-ospi-dma-dst", | ||
101 | + &s->pmc.iou.ospi.dma_dst, | ||
102 | + TYPE_XLNX_CSU_DMA); | ||
103 | + | ||
104 | + object_property_set_link(OBJECT(&s->pmc.iou.ospi.dma_dst), | ||
105 | + "dma", OBJECT(get_system_memory()), | ||
106 | + &error_abort); | ||
107 | + | ||
108 | + sbd = SYS_BUS_DEVICE(&s->pmc.iou.ospi.dma_dst); | ||
109 | + sysbus_realize(sbd, &error_fatal); | ||
110 | + | ||
111 | + memory_region_add_subregion(&s->mr_ps, MM_PMC_OSPI_DMA_DST, | ||
112 | + sysbus_mmio_get_region(sbd, 0)); | ||
113 | + | ||
114 | + /* Create the OSPI source DMA */ | ||
115 | + object_initialize_child(OBJECT(s), "versal-ospi-dma-src", | ||
116 | + &s->pmc.iou.ospi.dma_src, | ||
117 | + TYPE_XLNX_CSU_DMA); | ||
118 | + | ||
119 | + object_property_set_bool(OBJECT(&s->pmc.iou.ospi.dma_src), "is-dst", | ||
120 | + false, &error_abort); | ||
121 | + | ||
122 | + object_property_set_link(OBJECT(&s->pmc.iou.ospi.dma_src), | ||
123 | + "dma", OBJECT(mr_dac), &error_abort); | ||
124 | + | ||
125 | + object_property_set_link(OBJECT(&s->pmc.iou.ospi.dma_src), | ||
126 | + "stream-connected-dma", | ||
127 | + OBJECT(&s->pmc.iou.ospi.dma_dst), | ||
128 | + &error_abort); | ||
129 | + | ||
130 | + sbd = SYS_BUS_DEVICE(&s->pmc.iou.ospi.dma_src); | ||
131 | + sysbus_realize(sbd, &error_fatal); | ||
132 | + | ||
133 | + memory_region_add_subregion(&s->mr_ps, MM_PMC_OSPI_DMA_SRC, | ||
134 | + sysbus_mmio_get_region(sbd, 0)); | ||
135 | + | ||
136 | + /* Realize the OSPI */ | ||
137 | + object_property_set_link(OBJECT(&s->pmc.iou.ospi.ospi), "dma-src", | ||
138 | + OBJECT(&s->pmc.iou.ospi.dma_src), &error_abort); | ||
139 | + | ||
140 | + sbd = SYS_BUS_DEVICE(&s->pmc.iou.ospi.ospi); | ||
141 | + sysbus_realize(sbd, &error_fatal); | ||
142 | + | ||
143 | + memory_region_add_subregion(&s->mr_ps, MM_PMC_OSPI, | ||
144 | + sysbus_mmio_get_region(sbd, 0)); | ||
145 | + | ||
146 | + memory_region_add_subregion(&s->mr_ps, MM_PMC_OSPI_DAC, | ||
147 | + &s->pmc.iou.ospi.linear_mr); | ||
148 | + | ||
149 | + /* ospi_mux_sel */ | ||
150 | + ospi_mux_sel = qdev_get_gpio_in_named(DEVICE(&s->pmc.iou.ospi.ospi), | ||
151 | + "ospi-mux-sel", 0); | ||
152 | + qdev_connect_gpio_out_named(DEVICE(&s->pmc.iou.slcr), "ospi-mux-sel", 0, | ||
153 | + ospi_mux_sel); | ||
154 | + | ||
155 | + /* OSPI irq */ | ||
156 | + object_initialize_child(OBJECT(s), "ospi-irq-orgate", | ||
157 | + &s->pmc.iou.ospi.irq_orgate, TYPE_OR_IRQ); | ||
158 | + object_property_set_int(OBJECT(&s->pmc.iou.ospi.irq_orgate), | ||
159 | + "num-lines", NUM_OSPI_IRQ_LINES, &error_fatal); | ||
160 | + | ||
161 | + orgate = DEVICE(&s->pmc.iou.ospi.irq_orgate); | ||
162 | + qdev_realize(orgate, NULL, &error_fatal); | ||
163 | + | ||
164 | + sysbus_connect_irq(SYS_BUS_DEVICE(&s->pmc.iou.ospi.ospi), 0, | ||
165 | + qdev_get_gpio_in(orgate, 0)); | ||
166 | + sysbus_connect_irq(SYS_BUS_DEVICE(&s->pmc.iou.ospi.dma_src), 0, | ||
167 | + qdev_get_gpio_in(orgate, 1)); | ||
168 | + sysbus_connect_irq(SYS_BUS_DEVICE(&s->pmc.iou.ospi.dma_dst), 0, | ||
169 | + qdev_get_gpio_in(orgate, 2)); | ||
170 | + | ||
171 | + qdev_connect_gpio_out(orgate, 0, pic[VERSAL_OSPI_IRQ]); | ||
172 | +} | 51 | +} |
173 | + | 52 | + |
174 | /* This takes the board allocated linear DDR memory and creates aliases | 53 | static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data, |
175 | * for each split DDR range/aperture on the Versal address map. | 54 | gen_helper_gvec_3_ptr * const f[3]) |
176 | */ | 55 | { |
177 | @@ -XXX,XX +XXX,XX @@ static void versal_realize(DeviceState *dev, Error **errp) | 56 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { |
178 | versal_create_bbram(s, pic); | 57 | gen_helper_gvec_fmax_s, |
179 | versal_create_efuse(s, pic); | 58 | gen_helper_gvec_fmax_d, |
180 | versal_create_pmc_iou_slcr(s, pic); | 59 | }; |
181 | + versal_create_ospi(s, pic); | 60 | -TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax) |
182 | versal_map_ddr(s); | 61 | +static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = { |
183 | versal_unimp(s); | 62 | + gen_helper_gvec_ah_fmax_h, |
63 | + gen_helper_gvec_ah_fmax_s, | ||
64 | + gen_helper_gvec_ah_fmax_d, | ||
65 | +}; | ||
66 | +TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah) | ||
67 | |||
68 | static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { | ||
69 | gen_helper_gvec_fmin_h, | ||
70 | gen_helper_gvec_fmin_s, | ||
71 | gen_helper_gvec_fmin_d, | ||
72 | }; | ||
73 | -TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin) | ||
74 | +static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = { | ||
75 | + gen_helper_gvec_ah_fmin_h, | ||
76 | + gen_helper_gvec_ah_fmin_s, | ||
77 | + gen_helper_gvec_ah_fmin_d, | ||
78 | +}; | ||
79 | +TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah) | ||
80 | |||
81 | static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { | ||
82 | gen_helper_gvec_fmaxnum_h, | ||
83 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
84 | index XXXXXXX..XXXXXXX 100644 | ||
85 | --- a/target/arm/tcg/vec_helper.c | ||
86 | +++ b/target/arm/tcg/vec_helper.c | ||
87 | @@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16) | ||
88 | DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32) | ||
89 | DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64) | ||
90 | |||
91 | +DO_3OP(gvec_ah_fmax_h, helper_vfp_ah_maxh, float16) | ||
92 | +DO_3OP(gvec_ah_fmax_s, helper_vfp_ah_maxs, float32) | ||
93 | +DO_3OP(gvec_ah_fmax_d, helper_vfp_ah_maxd, float64) | ||
94 | + | ||
95 | +DO_3OP(gvec_ah_fmin_h, helper_vfp_ah_minh, float16) | ||
96 | +DO_3OP(gvec_ah_fmin_s, helper_vfp_ah_mins, float32) | ||
97 | +DO_3OP(gvec_ah_fmin_d, helper_vfp_ah_mind, float64) | ||
98 | + | ||
99 | #endif | ||
100 | #undef DO_3OP | ||
184 | 101 | ||
185 | -- | 102 | -- |
186 | 2.25.1 | 103 | 2.34.1 |
187 | |||
188 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for FMAXV and FMINV. These are the | ||
2 | "recursively reduce all lanes of a vector to a scalar result" insns; | ||
3 | we just need to use the _ah_ helper for the reduction step when | ||
4 | FPCR.AH == 1. | ||
1 | 5 | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | --- | ||
9 | target/arm/tcg/translate-a64.c | 28 ++++++++++++++++++---------- | ||
10 | 1 file changed, 18 insertions(+), 10 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/translate-a64.c | ||
15 | +++ b/target/arm/tcg/translate-a64.c | ||
16 | @@ -XXX,XX +XXX,XX @@ static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz, | ||
17 | } | ||
18 | |||
19 | static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, | ||
20 | - NeonGenTwoSingleOpFn *fn) | ||
21 | + NeonGenTwoSingleOpFn *fnormal, | ||
22 | + NeonGenTwoSingleOpFn *fah) | ||
23 | { | ||
24 | if (fp_access_check(s)) { | ||
25 | MemOp esz = a->esz; | ||
26 | int elts = (a->q ? 16 : 8) >> esz; | ||
27 | TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
28 | - TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn); | ||
29 | + TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, | ||
30 | + s->fpcr_ah ? fah : fnormal); | ||
31 | write_fp_sreg(s, a->rd, res); | ||
32 | } | ||
33 | return true; | ||
34 | } | ||
35 | |||
36 | -TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxnumh) | ||
37 | -TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minnumh) | ||
38 | -TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxh) | ||
39 | -TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minh) | ||
40 | +TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, | ||
41 | + gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh) | ||
42 | +TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, | ||
43 | + gen_helper_vfp_minnumh, gen_helper_vfp_minnumh) | ||
44 | +TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, | ||
45 | + gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh) | ||
46 | +TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, | ||
47 | + gen_helper_vfp_minh, gen_helper_vfp_ah_minh) | ||
48 | |||
49 | -TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums) | ||
50 | -TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums) | ||
51 | -TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs) | ||
52 | -TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins) | ||
53 | +TRANS(FMAXNMV_s, do_fp_reduction, a, | ||
54 | + gen_helper_vfp_maxnums, gen_helper_vfp_maxnums) | ||
55 | +TRANS(FMINNMV_s, do_fp_reduction, a, | ||
56 | + gen_helper_vfp_minnums, gen_helper_vfp_minnums) | ||
57 | +TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs) | ||
58 | +TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins) | ||
59 | |||
60 | /* | ||
61 | * Floating-point Immediate | ||
62 | -- | ||
63 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the pairwise floating | ||
2 | point minimum/maximum insns FMINP and FMAXP. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
8 | target/arm/tcg/translate-a64.c | 25 +++++++++++++++++++++---- | ||
9 | target/arm/tcg/vec_helper.c | 10 ++++++++++ | ||
10 | 3 files changed, 45 insertions(+), 4 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_ah_fmin_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_5(gvec_ah_fmin_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_5(gvec_ah_fminp_h, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_5(gvec_ah_fminp_s, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_5(gvec_ah_fminp_d, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_4(sve_faddv_h, TCG_CALL_NO_RWG, | ||
35 | i64, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_4(sve_faddv_s, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/translate-a64.c | ||
40 | +++ b/target/arm/tcg/translate-a64.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { | ||
42 | gen_helper_gvec_fmaxp_s, | ||
43 | gen_helper_gvec_fmaxp_d, | ||
44 | }; | ||
45 | -TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp) | ||
46 | +static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = { | ||
47 | + gen_helper_gvec_ah_fmaxp_h, | ||
48 | + gen_helper_gvec_ah_fmaxp_s, | ||
49 | + gen_helper_gvec_ah_fmaxp_d, | ||
50 | +}; | ||
51 | +TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp) | ||
52 | |||
53 | static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { | ||
54 | gen_helper_gvec_fminp_h, | ||
55 | gen_helper_gvec_fminp_s, | ||
56 | gen_helper_gvec_fminp_d, | ||
57 | }; | ||
58 | -TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp) | ||
59 | +static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = { | ||
60 | + gen_helper_gvec_ah_fminp_h, | ||
61 | + gen_helper_gvec_ah_fminp_s, | ||
62 | + gen_helper_gvec_ah_fminp_d, | ||
63 | +}; | ||
64 | +TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp) | ||
65 | |||
66 | static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { | ||
67 | gen_helper_gvec_fmaxnump_h, | ||
68 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) | ||
69 | return true; | ||
70 | } | ||
71 | |||
72 | +static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a, | ||
73 | + const FPScalar *fnormal, | ||
74 | + const FPScalar *fah) | ||
75 | +{ | ||
76 | + return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal); | ||
77 | +} | ||
78 | + | ||
79 | TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) | ||
80 | -TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax) | ||
81 | -TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin) | ||
82 | +TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah) | ||
83 | +TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah) | ||
84 | TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) | ||
85 | TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) | ||
86 | |||
87 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
88 | index XXXXXXX..XXXXXXX 100644 | ||
89 | --- a/target/arm/tcg/vec_helper.c | ||
90 | +++ b/target/arm/tcg/vec_helper.c | ||
91 | @@ -XXX,XX +XXX,XX @@ DO_3OP_PAIR(gvec_fminnump_h, float16_minnum, float16, H2) | ||
92 | DO_3OP_PAIR(gvec_fminnump_s, float32_minnum, float32, H4) | ||
93 | DO_3OP_PAIR(gvec_fminnump_d, float64_minnum, float64, ) | ||
94 | |||
95 | +#ifdef TARGET_AARCH64 | ||
96 | +DO_3OP_PAIR(gvec_ah_fmaxp_h, helper_vfp_ah_maxh, float16, H2) | ||
97 | +DO_3OP_PAIR(gvec_ah_fmaxp_s, helper_vfp_ah_maxs, float32, H4) | ||
98 | +DO_3OP_PAIR(gvec_ah_fmaxp_d, helper_vfp_ah_maxd, float64, ) | ||
99 | + | ||
100 | +DO_3OP_PAIR(gvec_ah_fminp_h, helper_vfp_ah_minh, float16, H2) | ||
101 | +DO_3OP_PAIR(gvec_ah_fminp_s, helper_vfp_ah_mins, float32, H4) | ||
102 | +DO_3OP_PAIR(gvec_ah_fminp_d, helper_vfp_ah_mind, float64, ) | ||
103 | +#endif | ||
104 | + | ||
105 | #undef DO_3OP_PAIR | ||
106 | |||
107 | #define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \ | ||
108 | -- | ||
109 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the SVE FMAXV and FMINV | ||
2 | vector-reduction-to-scalar max/min operations. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 +++++++++++ | ||
8 | target/arm/tcg/sve_helper.c | 43 +++++++++++++++++++++------------- | ||
9 | target/arm/tcg/translate-sve.c | 16 +++++++++++-- | ||
10 | 3 files changed, 55 insertions(+), 18 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fminv_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_4(sve_fminv_d, TCG_CALL_NO_RWG, | ||
18 | i64, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_4(sve_ah_fmaxv_h, TCG_CALL_NO_RWG, | ||
21 | + i64, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_4(sve_ah_fmaxv_s, TCG_CALL_NO_RWG, | ||
23 | + i64, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_4(sve_ah_fmaxv_d, TCG_CALL_NO_RWG, | ||
25 | + i64, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_4(sve_ah_fminv_h, TCG_CALL_NO_RWG, | ||
28 | + i64, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_4(sve_ah_fminv_s, TCG_CALL_NO_RWG, | ||
30 | + i64, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_4(sve_ah_fminv_d, TCG_CALL_NO_RWG, | ||
32 | + i64, ptr, ptr, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_5(sve_fadda_h, TCG_CALL_NO_RWG, | ||
35 | i64, i64, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_5(sve_fadda_s, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/sve_helper.c | ||
40 | +++ b/target/arm/tcg/sve_helper.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \ | ||
42 | uintptr_t half = n / 2; \ | ||
43 | TYPE lo = NAME##_reduce(data, status, half); \ | ||
44 | TYPE hi = NAME##_reduce(data + half, status, half); \ | ||
45 | - return TYPE##_##FUNC(lo, hi, status); \ | ||
46 | + return FUNC(lo, hi, status); \ | ||
47 | } \ | ||
48 | } \ | ||
49 | uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \ | ||
50 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \ | ||
51 | return NAME##_reduce(data, s, maxsz / sizeof(TYPE)); \ | ||
52 | } | ||
53 | |||
54 | -DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero) | ||
55 | -DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero) | ||
56 | -DO_REDUCE(sve_faddv_d, float64, H1_8, add, float64_zero) | ||
57 | +DO_REDUCE(sve_faddv_h, float16, H1_2, float16_add, float16_zero) | ||
58 | +DO_REDUCE(sve_faddv_s, float32, H1_4, float32_add, float32_zero) | ||
59 | +DO_REDUCE(sve_faddv_d, float64, H1_8, float64_add, float64_zero) | ||
60 | |||
61 | /* Identity is floatN_default_nan, without the function call. */ | ||
62 | -DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00) | ||
63 | -DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000) | ||
64 | -DO_REDUCE(sve_fminnmv_d, float64, H1_8, minnum, 0x7FF8000000000000ULL) | ||
65 | +DO_REDUCE(sve_fminnmv_h, float16, H1_2, float16_minnum, 0x7E00) | ||
66 | +DO_REDUCE(sve_fminnmv_s, float32, H1_4, float32_minnum, 0x7FC00000) | ||
67 | +DO_REDUCE(sve_fminnmv_d, float64, H1_8, float64_minnum, 0x7FF8000000000000ULL) | ||
68 | |||
69 | -DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00) | ||
70 | -DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000) | ||
71 | -DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, maxnum, 0x7FF8000000000000ULL) | ||
72 | +DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, float16_maxnum, 0x7E00) | ||
73 | +DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, float32_maxnum, 0x7FC00000) | ||
74 | +DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, float64_maxnum, 0x7FF8000000000000ULL) | ||
75 | |||
76 | -DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity) | ||
77 | -DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity) | ||
78 | -DO_REDUCE(sve_fminv_d, float64, H1_8, min, float64_infinity) | ||
79 | +DO_REDUCE(sve_fminv_h, float16, H1_2, float16_min, float16_infinity) | ||
80 | +DO_REDUCE(sve_fminv_s, float32, H1_4, float32_min, float32_infinity) | ||
81 | +DO_REDUCE(sve_fminv_d, float64, H1_8, float64_min, float64_infinity) | ||
82 | |||
83 | -DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity)) | ||
84 | -DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity)) | ||
85 | -DO_REDUCE(sve_fmaxv_d, float64, H1_8, max, float64_chs(float64_infinity)) | ||
86 | +DO_REDUCE(sve_fmaxv_h, float16, H1_2, float16_max, float16_chs(float16_infinity)) | ||
87 | +DO_REDUCE(sve_fmaxv_s, float32, H1_4, float32_max, float32_chs(float32_infinity)) | ||
88 | +DO_REDUCE(sve_fmaxv_d, float64, H1_8, float64_max, float64_chs(float64_infinity)) | ||
89 | + | ||
90 | +DO_REDUCE(sve_ah_fminv_h, float16, H1_2, helper_vfp_ah_minh, float16_infinity) | ||
91 | +DO_REDUCE(sve_ah_fminv_s, float32, H1_4, helper_vfp_ah_mins, float32_infinity) | ||
92 | +DO_REDUCE(sve_ah_fminv_d, float64, H1_8, helper_vfp_ah_mind, float64_infinity) | ||
93 | + | ||
94 | +DO_REDUCE(sve_ah_fmaxv_h, float16, H1_2, helper_vfp_ah_maxh, | ||
95 | + float16_chs(float16_infinity)) | ||
96 | +DO_REDUCE(sve_ah_fmaxv_s, float32, H1_4, helper_vfp_ah_maxs, | ||
97 | + float32_chs(float32_infinity)) | ||
98 | +DO_REDUCE(sve_ah_fmaxv_d, float64, H1_8, helper_vfp_ah_maxd, | ||
99 | + float64_chs(float64_infinity)) | ||
100 | |||
101 | #undef DO_REDUCE | ||
102 | |||
103 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
104 | index XXXXXXX..XXXXXXX 100644 | ||
105 | --- a/target/arm/tcg/translate-sve.c | ||
106 | +++ b/target/arm/tcg/translate-sve.c | ||
107 | @@ -XXX,XX +XXX,XX @@ static bool do_reduce(DisasContext *s, arg_rpr_esz *a, | ||
108 | }; \ | ||
109 | TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz]) | ||
110 | |||
111 | +#define DO_VPZ_AH(NAME, name) \ | ||
112 | + static gen_helper_fp_reduce * const name##_fns[4] = { \ | ||
113 | + NULL, gen_helper_sve_##name##_h, \ | ||
114 | + gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ | ||
115 | + }; \ | ||
116 | + static gen_helper_fp_reduce * const name##_ah_fns[4] = { \ | ||
117 | + NULL, gen_helper_sve_ah_##name##_h, \ | ||
118 | + gen_helper_sve_ah_##name##_s, gen_helper_sve_ah_##name##_d, \ | ||
119 | + }; \ | ||
120 | + TRANS_FEAT(NAME, aa64_sve, do_reduce, a, \ | ||
121 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) | ||
122 | + | ||
123 | DO_VPZ(FADDV, faddv) | ||
124 | DO_VPZ(FMINNMV, fminnmv) | ||
125 | DO_VPZ(FMAXNMV, fmaxnmv) | ||
126 | -DO_VPZ(FMINV, fminv) | ||
127 | -DO_VPZ(FMAXV, fmaxv) | ||
128 | +DO_VPZ_AH(FMINV, fminv) | ||
129 | +DO_VPZ_AH(FMAXV, fmaxv) | ||
130 | |||
131 | #undef DO_VPZ | ||
132 | |||
133 | -- | ||
134 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the SVE FMAX and FMIN operations | ||
2 | that take an immediate as the second operand. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 25 +++++++++++++++++++++++-- | ||
10 | 3 files changed, 45 insertions(+), 2 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fmins_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_6(sve_fmins_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, i64, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_6(sve_ah_fmaxs_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_6(sve_ah_fmaxs_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_6(sve_ah_fmaxs_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_6(sve_ah_fmins_h, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_6(sve_ah_fmins_s, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_6(sve_ah_fmins_d, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, i64, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_5(sve_fcvt_sh, TCG_CALL_NO_RWG, | ||
35 | void, ptr, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_5(sve_fcvt_dh, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/sve_helper.c | ||
40 | +++ b/target/arm/tcg/sve_helper.c | ||
41 | @@ -XXX,XX +XXX,XX @@ DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min) | ||
42 | DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min) | ||
43 | DO_ZPZS_FP(sve_fmins_d, float64, H1_8, float64_min) | ||
44 | |||
45 | +DO_ZPZS_FP(sve_ah_fmaxs_h, float16, H1_2, helper_vfp_ah_maxh) | ||
46 | +DO_ZPZS_FP(sve_ah_fmaxs_s, float32, H1_4, helper_vfp_ah_maxs) | ||
47 | +DO_ZPZS_FP(sve_ah_fmaxs_d, float64, H1_8, helper_vfp_ah_maxd) | ||
48 | + | ||
49 | +DO_ZPZS_FP(sve_ah_fmins_h, float16, H1_2, helper_vfp_ah_minh) | ||
50 | +DO_ZPZS_FP(sve_ah_fmins_s, float32, H1_4, helper_vfp_ah_mins) | ||
51 | +DO_ZPZS_FP(sve_ah_fmins_d, float64, H1_8, helper_vfp_ah_mind) | ||
52 | + | ||
53 | /* Fully general two-operand expander, controlled by a predicate, | ||
54 | * With the extra float_status parameter. | ||
55 | */ | ||
56 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/arm/tcg/translate-sve.c | ||
59 | +++ b/target/arm/tcg/translate-sve.c | ||
60 | @@ -XXX,XX +XXX,XX @@ static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, | ||
61 | TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ | ||
62 | name##_const[a->esz][a->imm], name##_fns[a->esz]) | ||
63 | |||
64 | +#define DO_FP_AH_IMM(NAME, name, const0, const1) \ | ||
65 | + static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ | ||
66 | + NULL, gen_helper_sve_##name##_h, \ | ||
67 | + gen_helper_sve_##name##_s, \ | ||
68 | + gen_helper_sve_##name##_d \ | ||
69 | + }; \ | ||
70 | + static gen_helper_sve_fp2scalar * const name##_ah_fns[4] = { \ | ||
71 | + NULL, gen_helper_sve_ah_##name##_h, \ | ||
72 | + gen_helper_sve_ah_##name##_s, \ | ||
73 | + gen_helper_sve_ah_##name##_d \ | ||
74 | + }; \ | ||
75 | + static uint64_t const name##_const[4][2] = { \ | ||
76 | + { -1, -1 }, \ | ||
77 | + { float16_##const0, float16_##const1 }, \ | ||
78 | + { float32_##const0, float32_##const1 }, \ | ||
79 | + { float64_##const0, float64_##const1 }, \ | ||
80 | + }; \ | ||
81 | + TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ | ||
82 | + name##_const[a->esz][a->imm], \ | ||
83 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) | ||
84 | + | ||
85 | DO_FP_IMM(FADD, fadds, half, one) | ||
86 | DO_FP_IMM(FSUB, fsubs, half, one) | ||
87 | DO_FP_IMM(FMUL, fmuls, half, two) | ||
88 | DO_FP_IMM(FSUBR, fsubrs, half, one) | ||
89 | DO_FP_IMM(FMAXNM, fmaxnms, zero, one) | ||
90 | DO_FP_IMM(FMINNM, fminnms, zero, one) | ||
91 | -DO_FP_IMM(FMAX, fmaxs, zero, one) | ||
92 | -DO_FP_IMM(FMIN, fmins, zero, one) | ||
93 | +DO_FP_AH_IMM(FMAX, fmaxs, zero, one) | ||
94 | +DO_FP_AH_IMM(FMIN, fmins, zero, one) | ||
95 | |||
96 | #undef DO_FP_IMM | ||
97 | |||
98 | -- | ||
99 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Implement the FPCR.AH semantics for the SVE FMAX and FMIN | ||
2 | operations that take two vector operands. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 17 +++++++++++++++-- | ||
10 | 3 files changed, 37 insertions(+), 2 deletions(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fmax_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_6(sve_fmax_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_6(sve_ah_fmin_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_6(sve_ah_fmin_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_6(sve_ah_fmin_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | +DEF_HELPER_FLAGS_6(sve_ah_fmax_h, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_6(sve_ah_fmax_s, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_6(sve_ah_fmax_d, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
33 | + | ||
34 | DEF_HELPER_FLAGS_6(sve_fminnum_h, TCG_CALL_NO_RWG, | ||
35 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
36 | DEF_HELPER_FLAGS_6(sve_fminnum_s, TCG_CALL_NO_RWG, | ||
37 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/arm/tcg/sve_helper.c | ||
40 | +++ b/target/arm/tcg/sve_helper.c | ||
41 | @@ -XXX,XX +XXX,XX @@ DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max) | ||
42 | DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max) | ||
43 | DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max) | ||
44 | |||
45 | +DO_ZPZZ_FP(sve_ah_fmin_h, uint16_t, H1_2, helper_vfp_ah_minh) | ||
46 | +DO_ZPZZ_FP(sve_ah_fmin_s, uint32_t, H1_4, helper_vfp_ah_mins) | ||
47 | +DO_ZPZZ_FP(sve_ah_fmin_d, uint64_t, H1_8, helper_vfp_ah_mind) | ||
48 | + | ||
49 | +DO_ZPZZ_FP(sve_ah_fmax_h, uint16_t, H1_2, helper_vfp_ah_maxh) | ||
50 | +DO_ZPZZ_FP(sve_ah_fmax_s, uint32_t, H1_4, helper_vfp_ah_maxs) | ||
51 | +DO_ZPZZ_FP(sve_ah_fmax_d, uint64_t, H1_8, helper_vfp_ah_maxd) | ||
52 | + | ||
53 | DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum) | ||
54 | DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum) | ||
55 | DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum) | ||
56 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/arm/tcg/translate-sve.c | ||
59 | +++ b/target/arm/tcg/translate-sve.c | ||
60 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, | ||
61 | }; \ | ||
62 | TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) | ||
63 | |||
64 | +#define DO_ZPZZ_AH_FP(NAME, FEAT, name, ah_name) \ | ||
65 | + static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ | ||
66 | + NULL, gen_helper_##name##_h, \ | ||
67 | + gen_helper_##name##_s, gen_helper_##name##_d \ | ||
68 | + }; \ | ||
69 | + static gen_helper_gvec_4_ptr * const name##_ah_zpzz_fns[4] = { \ | ||
70 | + NULL, gen_helper_##ah_name##_h, \ | ||
71 | + gen_helper_##ah_name##_s, gen_helper_##ah_name##_d \ | ||
72 | + }; \ | ||
73 | + TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, \ | ||
74 | + s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \ | ||
75 | + name##_zpzz_fns[a->esz], a) | ||
76 | + | ||
77 | DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) | ||
78 | DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) | ||
79 | DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) | ||
80 | -DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin) | ||
81 | -DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax) | ||
82 | +DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) | ||
83 | +DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) | ||
84 | DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) | ||
85 | DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) | ||
86 | DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) | ||
87 | -- | ||
88 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | FPCR.AH == 1 mandates that negation of a NaN value should not flip | |
2 | its sign bit. This means we can no longer use gen_vfp_neg*() | ||
3 | everywhere but must instead generate slightly more complex code when | ||
4 | FPCR.AH is set. | ||
5 | |||
6 | Make this change for the scalar FNEG and for those places in | ||
7 | translate-a64.c which were previously directly calling | ||
8 | gen_vfp_neg*(). | ||
9 | |||
10 | This change in semantics also affects any other instruction whose | ||
11 | pseudocode calls FPNeg(); in following commits we extend this | ||
12 | change to the other affected instructions. | ||
13 | |||
14 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
15 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
16 | --- | ||
17 | target/arm/tcg/translate-a64.c | 125 ++++++++++++++++++++++++++++++--- | ||
18 | 1 file changed, 114 insertions(+), 11 deletions(-) | ||
19 | |||
20 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/tcg/translate-a64.c | ||
23 | +++ b/target/arm/tcg/translate-a64.c | ||
24 | @@ -XXX,XX +XXX,XX @@ static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, | ||
25 | is_q ? 16 : 8, vec_full_reg_size(s), data, fn); | ||
26 | } | ||
27 | |||
28 | +/* | ||
29 | + * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN. | ||
30 | + * These functions implement | ||
31 | + * d = floatN_is_any_nan(s) ? s : floatN_chs(s) | ||
32 | + * which for float32 is | ||
33 | + * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31)) | ||
34 | + * and similarly for the other float sizes. | ||
35 | + */ | ||
36 | +static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s) | ||
37 | +{ | ||
38 | + TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); | ||
39 | + | ||
40 | + gen_vfp_negh(chs_s, s); | ||
41 | + gen_vfp_absh(abs_s, s); | ||
42 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, | ||
43 | + abs_s, tcg_constant_i32(0x7c00), | ||
44 | + s, chs_s); | ||
45 | +} | ||
46 | + | ||
47 | +static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s) | ||
48 | +{ | ||
49 | + TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); | ||
50 | + | ||
51 | + gen_vfp_negs(chs_s, s); | ||
52 | + gen_vfp_abss(abs_s, s); | ||
53 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, | ||
54 | + abs_s, tcg_constant_i32(0x7f800000UL), | ||
55 | + s, chs_s); | ||
56 | +} | ||
57 | + | ||
58 | +static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s) | ||
59 | +{ | ||
60 | + TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64(); | ||
61 | + | ||
62 | + gen_vfp_negd(chs_s, s); | ||
63 | + gen_vfp_absd(abs_s, s); | ||
64 | + tcg_gen_movcond_i64(TCG_COND_GTU, d, | ||
65 | + abs_s, tcg_constant_i64(0x7ff0000000000000ULL), | ||
66 | + s, chs_s); | ||
67 | +} | ||
68 | + | ||
69 | +static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) | ||
70 | +{ | ||
71 | + if (dc->fpcr_ah) { | ||
72 | + gen_vfp_ah_negh(d, s); | ||
73 | + } else { | ||
74 | + gen_vfp_negh(d, s); | ||
75 | + } | ||
76 | +} | ||
77 | + | ||
78 | +static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) | ||
79 | +{ | ||
80 | + if (dc->fpcr_ah) { | ||
81 | + gen_vfp_ah_negs(d, s); | ||
82 | + } else { | ||
83 | + gen_vfp_negs(d, s); | ||
84 | + } | ||
85 | +} | ||
86 | + | ||
87 | +static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s) | ||
88 | +{ | ||
89 | + if (dc->fpcr_ah) { | ||
90 | + gen_vfp_ah_negd(d, s); | ||
91 | + } else { | ||
92 | + gen_vfp_negd(d, s); | ||
93 | + } | ||
94 | +} | ||
95 | + | ||
96 | /* Set ZF and NF based on a 64 bit result. This is alas fiddlier | ||
97 | * than the 32 bit equivalent. | ||
98 | */ | ||
99 | @@ -XXX,XX +XXX,XX @@ static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) | ||
100 | gen_vfp_negd(d, d); | ||
101 | } | ||
102 | |||
103 | +static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
104 | +{ | ||
105 | + gen_helper_vfp_mulh(d, n, m, s); | ||
106 | + gen_vfp_ah_negh(d, d); | ||
107 | +} | ||
108 | + | ||
109 | +static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
110 | +{ | ||
111 | + gen_helper_vfp_muls(d, n, m, s); | ||
112 | + gen_vfp_ah_negs(d, d); | ||
113 | +} | ||
114 | + | ||
115 | +static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) | ||
116 | +{ | ||
117 | + gen_helper_vfp_muld(d, n, m, s); | ||
118 | + gen_vfp_ah_negd(d, d); | ||
119 | +} | ||
120 | + | ||
121 | static const FPScalar f_scalar_fnmul = { | ||
122 | gen_fnmul_h, | ||
123 | gen_fnmul_s, | ||
124 | gen_fnmul_d, | ||
125 | }; | ||
126 | -TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul, a->rn) | ||
127 | +static const FPScalar f_scalar_ah_fnmul = { | ||
128 | + gen_fnmul_ah_h, | ||
129 | + gen_fnmul_ah_s, | ||
130 | + gen_fnmul_ah_d, | ||
131 | +}; | ||
132 | +TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn) | ||
133 | |||
134 | static const FPScalar f_scalar_fcmeq = { | ||
135 | gen_helper_advsimd_ceq_f16, | ||
136 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
137 | |||
138 | read_vec_element(s, t2, a->rm, a->idx, MO_64); | ||
139 | if (neg) { | ||
140 | - gen_vfp_negd(t1, t1); | ||
141 | + gen_vfp_maybe_ah_negd(s, t1, t1); | ||
142 | } | ||
143 | gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | ||
144 | write_fp_dreg_merging(s, a->rd, a->rd, t0); | ||
145 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
146 | |||
147 | read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); | ||
148 | if (neg) { | ||
149 | - gen_vfp_negs(t1, t1); | ||
150 | + gen_vfp_maybe_ah_negs(s, t1, t1); | ||
151 | } | ||
152 | gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); | ||
153 | write_fp_sreg_merging(s, a->rd, a->rd, t0); | ||
154 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) | ||
155 | |||
156 | read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); | ||
157 | if (neg) { | ||
158 | - gen_vfp_negh(t1, t1); | ||
159 | + gen_vfp_maybe_ah_negh(s, t1, t1); | ||
160 | } | ||
161 | gen_helper_advsimd_muladdh(t0, t1, t2, t0, | ||
162 | fpstatus_ptr(FPST_A64_F16)); | ||
163 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
164 | TCGv_i64 ta = read_fp_dreg(s, a->ra); | ||
165 | |||
166 | if (neg_a) { | ||
167 | - gen_vfp_negd(ta, ta); | ||
168 | + gen_vfp_maybe_ah_negd(s, ta, ta); | ||
169 | } | ||
170 | if (neg_n) { | ||
171 | - gen_vfp_negd(tn, tn); | ||
172 | + gen_vfp_maybe_ah_negd(s, tn, tn); | ||
173 | } | ||
174 | fpst = fpstatus_ptr(FPST_A64); | ||
175 | gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); | ||
176 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
177 | TCGv_i32 ta = read_fp_sreg(s, a->ra); | ||
178 | |||
179 | if (neg_a) { | ||
180 | - gen_vfp_negs(ta, ta); | ||
181 | + gen_vfp_maybe_ah_negs(s, ta, ta); | ||
182 | } | ||
183 | if (neg_n) { | ||
184 | - gen_vfp_negs(tn, tn); | ||
185 | + gen_vfp_maybe_ah_negs(s, tn, tn); | ||
186 | } | ||
187 | fpst = fpstatus_ptr(FPST_A64); | ||
188 | gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); | ||
189 | @@ -XXX,XX +XXX,XX @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) | ||
190 | TCGv_i32 ta = read_fp_hreg(s, a->ra); | ||
191 | |||
192 | if (neg_a) { | ||
193 | - gen_vfp_negh(ta, ta); | ||
194 | + gen_vfp_maybe_ah_negh(s, ta, ta); | ||
195 | } | ||
196 | if (neg_n) { | ||
197 | - gen_vfp_negh(tn, tn); | ||
198 | + gen_vfp_maybe_ah_negh(s, tn, tn); | ||
199 | } | ||
200 | fpst = fpstatus_ptr(FPST_A64_F16); | ||
201 | gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); | ||
202 | @@ -XXX,XX +XXX,XX @@ static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, | ||
203 | return true; | ||
204 | } | ||
205 | |||
206 | +static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a, | ||
207 | + const FPScalar1Int *fnormal, | ||
208 | + const FPScalar1Int *fah) | ||
209 | +{ | ||
210 | + return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true); | ||
211 | +} | ||
212 | + | ||
213 | static const FPScalar1Int f_scalar_fmov = { | ||
214 | tcg_gen_mov_i32, | ||
215 | tcg_gen_mov_i32, | ||
216 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fneg = { | ||
217 | gen_vfp_negs, | ||
218 | gen_vfp_negd, | ||
219 | }; | ||
220 | -TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg, true) | ||
221 | +static const FPScalar1Int f_scalar_ah_fneg = { | ||
222 | + gen_vfp_ah_negh, | ||
223 | + gen_vfp_ah_negs, | ||
224 | + gen_vfp_ah_negd, | ||
225 | +}; | ||
226 | +TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg) | ||
227 | |||
228 | typedef struct FPScalar1 { | ||
229 | void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); | ||
230 | -- | ||
231 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Francisco Iglesias <francisco.iglesias@xilinx.com> | 1 | FPCR.AH == 1 mandates that taking the absolute value of a NaN should |
---|---|---|---|
2 | not change its sign bit. This means we can no longer use | ||
3 | gen_vfp_abs*() everywhere but must instead generate slightly more | ||
4 | complex code when FPCR.AH is set. | ||
2 | 5 | ||
3 | Add a model of Versal's PMC SLCR (system-level control registers). | 6 | Implement these semantics for scalar FABS and FABD. This change also |
7 | affects all other instructions whose psuedocode calls FPAbs(); we | ||
8 | will extend the change to those instructions in following commits. | ||
4 | 9 | ||
5 | Signed-off-by: Francisco Iglesias <francisco.iglesias@xilinx.com> | ||
6 | Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com> | ||
7 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
8 | Reviewed-by: Luc Michel <luc@lmichel.fr> | ||
9 | Message-id: 20220121161141.14389-2-francisco.iglesias@xilinx.com | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | --- | 12 | --- |
12 | include/hw/misc/xlnx-versal-pmc-iou-slcr.h | 78 ++ | 13 | target/arm/tcg/translate-a64.c | 69 +++++++++++++++++++++++++++++++++- |
13 | hw/misc/xlnx-versal-pmc-iou-slcr.c | 1446 ++++++++++++++++++++ | 14 | 1 file changed, 67 insertions(+), 2 deletions(-) |
14 | hw/misc/meson.build | 5 +- | ||
15 | 3 files changed, 1528 insertions(+), 1 deletion(-) | ||
16 | create mode 100644 include/hw/misc/xlnx-versal-pmc-iou-slcr.h | ||
17 | create mode 100644 hw/misc/xlnx-versal-pmc-iou-slcr.c | ||
18 | 15 | ||
19 | diff --git a/include/hw/misc/xlnx-versal-pmc-iou-slcr.h b/include/hw/misc/xlnx-versal-pmc-iou-slcr.h | 16 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
20 | new file mode 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
21 | index XXXXXXX..XXXXXXX | 18 | --- a/target/arm/tcg/translate-a64.c |
22 | --- /dev/null | 19 | +++ b/target/arm/tcg/translate-a64.c |
23 | +++ b/include/hw/misc/xlnx-versal-pmc-iou-slcr.h | 20 | @@ -XXX,XX +XXX,XX @@ static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s) |
24 | @@ -XXX,XX +XXX,XX @@ | 21 | s, chs_s); |
22 | } | ||
23 | |||
25 | +/* | 24 | +/* |
26 | + * Header file for the Xilinx Versal's PMC IOU SLCR | 25 | + * These functions implement |
27 | + * | 26 | + * d = floatN_is_any_nan(s) ? s : floatN_abs(s) |
28 | + * Copyright (C) 2021 Xilinx Inc | 27 | + * which for float32 is |
29 | + * Written by Edgar E. Iglesias <edgar.iglesias@xilinx.com> | 28 | + * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31)) |
30 | + * | 29 | + * and similarly for the other float sizes. |
31 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
32 | + * of this software and associated documentation files (the "Software"), to deal | ||
33 | + * in the Software without restriction, including without limitation the rights | ||
34 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
35 | + * copies of the Software, and to permit persons to whom the Software is | ||
36 | + * furnished to do so, subject to the following conditions: | ||
37 | + * | ||
38 | + * The above copyright notice and this permission notice shall be included in | ||
39 | + * all copies or substantial portions of the Software. | ||
40 | + * | ||
41 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
42 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
43 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
44 | + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
45 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
46 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
47 | + * THE SOFTWARE. | ||
48 | + */ | 30 | + */ |
31 | +static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s) | ||
32 | +{ | ||
33 | + TCGv_i32 abs_s = tcg_temp_new_i32(); | ||
49 | + | 34 | + |
50 | +/* | 35 | + gen_vfp_absh(abs_s, s); |
51 | + * This is a model of Xilinx Versal's PMC I/O Peripheral Control and Status | 36 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, |
52 | + * module documented in Versal's Technical Reference manual [1] and the Versal | 37 | + abs_s, tcg_constant_i32(0x7c00), |
53 | + * ACAP Register reference [2]. | 38 | + s, abs_s); |
54 | + * | ||
55 | + * References: | ||
56 | + * | ||
57 | + * [1] Versal ACAP Technical Reference Manual, | ||
58 | + * https://www.xilinx.com/support/documentation/architecture-manuals/am011-versal-acap-trm.pdf | ||
59 | + * | ||
60 | + * [2] Versal ACAP Register Reference, | ||
61 | + * https://www.xilinx.com/html_docs/registers/am012/am012-versal-register-reference.html#mod___pmc_iop_slcr.html | ||
62 | + * | ||
63 | + * QEMU interface: | ||
64 | + * + sysbus MMIO region 0: MemoryRegion for the device's registers | ||
65 | + * + sysbus IRQ 0: PMC (AXI and APB) parity error interrupt detected by the PMC | ||
66 | + * I/O peripherals. | ||
67 | + * + sysbus IRQ 1: Device interrupt. | ||
68 | + * + Named GPIO output "sd-emmc-sel[0]": Enables 0: SD mode or 1: eMMC mode on | ||
69 | + * SD/eMMC controller 0. | ||
70 | + * + Named GPIO output "sd-emmc-sel[1]": Enables 0: SD mode or 1: eMMC mode on | ||
71 | + * SD/eMMC controller 1. | ||
72 | + * + Named GPIO output "qspi-ospi-mux-sel": Selects 0: QSPI linear region or 1: | ||
73 | + * OSPI linear region. | ||
74 | + * + Named GPIO output "ospi-mux-sel": Selects 0: OSPI Indirect access mode or | ||
75 | + * 1: OSPI direct access mode. | ||
76 | + */ | ||
77 | + | ||
78 | +#ifndef XILINX_VERSAL_PMC_IOU_SLCR_H | ||
79 | +#define XILINX_VERSAL_PMC_IOU_SLCR_H | ||
80 | + | ||
81 | +#include "hw/register.h" | ||
82 | + | ||
83 | +#define TYPE_XILINX_VERSAL_PMC_IOU_SLCR "xlnx.versal-pmc-iou-slcr" | ||
84 | + | ||
85 | +OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalPmcIouSlcr, XILINX_VERSAL_PMC_IOU_SLCR) | ||
86 | + | ||
87 | +#define XILINX_VERSAL_PMC_IOU_SLCR_R_MAX (0x828 / 4 + 1) | ||
88 | + | ||
89 | +struct XlnxVersalPmcIouSlcr { | ||
90 | + SysBusDevice parent_obj; | ||
91 | + MemoryRegion iomem; | ||
92 | + qemu_irq irq_parity_imr; | ||
93 | + qemu_irq irq_imr; | ||
94 | + qemu_irq sd_emmc_sel[2]; | ||
95 | + qemu_irq qspi_ospi_mux_sel; | ||
96 | + qemu_irq ospi_mux_sel; | ||
97 | + | ||
98 | + uint32_t regs[XILINX_VERSAL_PMC_IOU_SLCR_R_MAX]; | ||
99 | + RegisterInfo regs_info[XILINX_VERSAL_PMC_IOU_SLCR_R_MAX]; | ||
100 | +}; | ||
101 | + | ||
102 | +#endif /* XILINX_VERSAL_PMC_IOU_SLCR_H */ | ||
103 | diff --git a/hw/misc/xlnx-versal-pmc-iou-slcr.c b/hw/misc/xlnx-versal-pmc-iou-slcr.c | ||
104 | new file mode 100644 | ||
105 | index XXXXXXX..XXXXXXX | ||
106 | --- /dev/null | ||
107 | +++ b/hw/misc/xlnx-versal-pmc-iou-slcr.c | ||
108 | @@ -XXX,XX +XXX,XX @@ | ||
109 | +/* | ||
110 | + * QEMU model of Versal's PMC IOU SLCR (system level control registers) | ||
111 | + * | ||
112 | + * Copyright (c) 2021 Xilinx Inc. | ||
113 | + * Written by Edgar E. Iglesias <edgar.iglesias@xilinx.com> | ||
114 | + * | ||
115 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
116 | + * of this software and associated documentation files (the "Software"), to deal | ||
117 | + * in the Software without restriction, including without limitation the rights | ||
118 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
119 | + * copies of the Software, and to permit persons to whom the Software is | ||
120 | + * furnished to do so, subject to the following conditions: | ||
121 | + * | ||
122 | + * The above copyright notice and this permission notice shall be included in | ||
123 | + * all copies or substantial portions of the Software. | ||
124 | + * | ||
125 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
126 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
127 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
128 | + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
129 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
130 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
131 | + * THE SOFTWARE. | ||
132 | + */ | ||
133 | + | ||
134 | +#include "qemu/osdep.h" | ||
135 | +#include "hw/sysbus.h" | ||
136 | +#include "hw/register.h" | ||
137 | +#include "hw/irq.h" | ||
138 | +#include "qemu/bitops.h" | ||
139 | +#include "qemu/log.h" | ||
140 | +#include "migration/vmstate.h" | ||
141 | +#include "hw/qdev-properties.h" | ||
142 | +#include "hw/misc/xlnx-versal-pmc-iou-slcr.h" | ||
143 | + | ||
144 | +#ifndef XILINX_VERSAL_PMC_IOU_SLCR_ERR_DEBUG | ||
145 | +#define XILINX_VERSAL_PMC_IOU_SLCR_ERR_DEBUG 0 | ||
146 | +#endif | ||
147 | + | ||
148 | +REG32(MIO_PIN_0, 0x0) | ||
149 | + FIELD(MIO_PIN_0, L3_SEL, 7, 3) | ||
150 | + FIELD(MIO_PIN_0, L2_SEL, 5, 2) | ||
151 | + FIELD(MIO_PIN_0, L1_SEL, 3, 2) | ||
152 | + FIELD(MIO_PIN_0, L0_SEL, 1, 2) | ||
153 | +REG32(MIO_PIN_1, 0x4) | ||
154 | + FIELD(MIO_PIN_1, L3_SEL, 7, 3) | ||
155 | + FIELD(MIO_PIN_1, L2_SEL, 5, 2) | ||
156 | + FIELD(MIO_PIN_1, L1_SEL, 3, 2) | ||
157 | + FIELD(MIO_PIN_1, L0_SEL, 1, 2) | ||
158 | +REG32(MIO_PIN_2, 0x8) | ||
159 | + FIELD(MIO_PIN_2, L3_SEL, 7, 3) | ||
160 | + FIELD(MIO_PIN_2, L2_SEL, 5, 2) | ||
161 | + FIELD(MIO_PIN_2, L1_SEL, 3, 2) | ||
162 | + FIELD(MIO_PIN_2, L0_SEL, 1, 2) | ||
163 | +REG32(MIO_PIN_3, 0xc) | ||
164 | + FIELD(MIO_PIN_3, L3_SEL, 7, 3) | ||
165 | + FIELD(MIO_PIN_3, L2_SEL, 5, 2) | ||
166 | + FIELD(MIO_PIN_3, L1_SEL, 3, 2) | ||
167 | + FIELD(MIO_PIN_3, L0_SEL, 1, 2) | ||
168 | +REG32(MIO_PIN_4, 0x10) | ||
169 | + FIELD(MIO_PIN_4, L3_SEL, 7, 3) | ||
170 | + FIELD(MIO_PIN_4, L2_SEL, 5, 2) | ||
171 | + FIELD(MIO_PIN_4, L1_SEL, 3, 2) | ||
172 | + FIELD(MIO_PIN_4, L0_SEL, 1, 2) | ||
173 | +REG32(MIO_PIN_5, 0x14) | ||
174 | + FIELD(MIO_PIN_5, L3_SEL, 7, 3) | ||
175 | + FIELD(MIO_PIN_5, L2_SEL, 5, 2) | ||
176 | + FIELD(MIO_PIN_5, L1_SEL, 3, 2) | ||
177 | + FIELD(MIO_PIN_5, L0_SEL, 1, 2) | ||
178 | +REG32(MIO_PIN_6, 0x18) | ||
179 | + FIELD(MIO_PIN_6, L3_SEL, 7, 3) | ||
180 | + FIELD(MIO_PIN_6, L2_SEL, 5, 2) | ||
181 | + FIELD(MIO_PIN_6, L1_SEL, 3, 2) | ||
182 | + FIELD(MIO_PIN_6, L0_SEL, 1, 2) | ||
183 | +REG32(MIO_PIN_7, 0x1c) | ||
184 | + FIELD(MIO_PIN_7, L3_SEL, 7, 3) | ||
185 | + FIELD(MIO_PIN_7, L2_SEL, 5, 2) | ||
186 | + FIELD(MIO_PIN_7, L1_SEL, 3, 2) | ||
187 | + FIELD(MIO_PIN_7, L0_SEL, 1, 2) | ||
188 | +REG32(MIO_PIN_8, 0x20) | ||
189 | + FIELD(MIO_PIN_8, L3_SEL, 7, 3) | ||
190 | + FIELD(MIO_PIN_8, L2_SEL, 5, 2) | ||
191 | + FIELD(MIO_PIN_8, L1_SEL, 3, 2) | ||
192 | + FIELD(MIO_PIN_8, L0_SEL, 1, 2) | ||
193 | +REG32(MIO_PIN_9, 0x24) | ||
194 | + FIELD(MIO_PIN_9, L3_SEL, 7, 3) | ||
195 | + FIELD(MIO_PIN_9, L2_SEL, 5, 2) | ||
196 | + FIELD(MIO_PIN_9, L1_SEL, 3, 2) | ||
197 | + FIELD(MIO_PIN_9, L0_SEL, 1, 2) | ||
198 | +REG32(MIO_PIN_10, 0x28) | ||
199 | + FIELD(MIO_PIN_10, L3_SEL, 7, 3) | ||
200 | + FIELD(MIO_PIN_10, L2_SEL, 5, 2) | ||
201 | + FIELD(MIO_PIN_10, L1_SEL, 3, 2) | ||
202 | + FIELD(MIO_PIN_10, L0_SEL, 1, 2) | ||
203 | +REG32(MIO_PIN_11, 0x2c) | ||
204 | + FIELD(MIO_PIN_11, L3_SEL, 7, 3) | ||
205 | + FIELD(MIO_PIN_11, L2_SEL, 5, 2) | ||
206 | + FIELD(MIO_PIN_11, L1_SEL, 3, 2) | ||
207 | + FIELD(MIO_PIN_11, L0_SEL, 1, 2) | ||
208 | +REG32(MIO_PIN_12, 0x30) | ||
209 | + FIELD(MIO_PIN_12, L3_SEL, 7, 3) | ||
210 | + FIELD(MIO_PIN_12, L2_SEL, 5, 2) | ||
211 | + FIELD(MIO_PIN_12, L1_SEL, 3, 2) | ||
212 | + FIELD(MIO_PIN_12, L0_SEL, 1, 2) | ||
213 | +REG32(MIO_PIN_13, 0x34) | ||
214 | + FIELD(MIO_PIN_13, L3_SEL, 7, 3) | ||
215 | + FIELD(MIO_PIN_13, L2_SEL, 5, 2) | ||
216 | + FIELD(MIO_PIN_13, L1_SEL, 3, 2) | ||
217 | + FIELD(MIO_PIN_13, L0_SEL, 1, 2) | ||
218 | +REG32(MIO_PIN_14, 0x38) | ||
219 | + FIELD(MIO_PIN_14, L3_SEL, 7, 3) | ||
220 | + FIELD(MIO_PIN_14, L2_SEL, 5, 2) | ||
221 | + FIELD(MIO_PIN_14, L1_SEL, 3, 2) | ||
222 | + FIELD(MIO_PIN_14, L0_SEL, 1, 2) | ||
223 | +REG32(MIO_PIN_15, 0x3c) | ||
224 | + FIELD(MIO_PIN_15, L3_SEL, 7, 3) | ||
225 | + FIELD(MIO_PIN_15, L2_SEL, 5, 2) | ||
226 | + FIELD(MIO_PIN_15, L1_SEL, 3, 2) | ||
227 | + FIELD(MIO_PIN_15, L0_SEL, 1, 2) | ||
228 | +REG32(MIO_PIN_16, 0x40) | ||
229 | + FIELD(MIO_PIN_16, L3_SEL, 7, 3) | ||
230 | + FIELD(MIO_PIN_16, L2_SEL, 5, 2) | ||
231 | + FIELD(MIO_PIN_16, L1_SEL, 3, 2) | ||
232 | + FIELD(MIO_PIN_16, L0_SEL, 1, 2) | ||
233 | +REG32(MIO_PIN_17, 0x44) | ||
234 | + FIELD(MIO_PIN_17, L3_SEL, 7, 3) | ||
235 | + FIELD(MIO_PIN_17, L2_SEL, 5, 2) | ||
236 | + FIELD(MIO_PIN_17, L1_SEL, 3, 2) | ||
237 | + FIELD(MIO_PIN_17, L0_SEL, 1, 2) | ||
238 | +REG32(MIO_PIN_18, 0x48) | ||
239 | + FIELD(MIO_PIN_18, L3_SEL, 7, 3) | ||
240 | + FIELD(MIO_PIN_18, L2_SEL, 5, 2) | ||
241 | + FIELD(MIO_PIN_18, L1_SEL, 3, 2) | ||
242 | + FIELD(MIO_PIN_18, L0_SEL, 1, 2) | ||
243 | +REG32(MIO_PIN_19, 0x4c) | ||
244 | + FIELD(MIO_PIN_19, L3_SEL, 7, 3) | ||
245 | + FIELD(MIO_PIN_19, L2_SEL, 5, 2) | ||
246 | + FIELD(MIO_PIN_19, L1_SEL, 3, 2) | ||
247 | + FIELD(MIO_PIN_19, L0_SEL, 1, 2) | ||
248 | +REG32(MIO_PIN_20, 0x50) | ||
249 | + FIELD(MIO_PIN_20, L3_SEL, 7, 3) | ||
250 | + FIELD(MIO_PIN_20, L2_SEL, 5, 2) | ||
251 | + FIELD(MIO_PIN_20, L1_SEL, 3, 2) | ||
252 | + FIELD(MIO_PIN_20, L0_SEL, 1, 2) | ||
253 | +REG32(MIO_PIN_21, 0x54) | ||
254 | + FIELD(MIO_PIN_21, L3_SEL, 7, 3) | ||
255 | + FIELD(MIO_PIN_21, L2_SEL, 5, 2) | ||
256 | + FIELD(MIO_PIN_21, L1_SEL, 3, 2) | ||
257 | + FIELD(MIO_PIN_21, L0_SEL, 1, 2) | ||
258 | +REG32(MIO_PIN_22, 0x58) | ||
259 | + FIELD(MIO_PIN_22, L3_SEL, 7, 3) | ||
260 | + FIELD(MIO_PIN_22, L2_SEL, 5, 2) | ||
261 | + FIELD(MIO_PIN_22, L1_SEL, 3, 2) | ||
262 | + FIELD(MIO_PIN_22, L0_SEL, 1, 2) | ||
263 | +REG32(MIO_PIN_23, 0x5c) | ||
264 | + FIELD(MIO_PIN_23, L3_SEL, 7, 3) | ||
265 | + FIELD(MIO_PIN_23, L2_SEL, 5, 2) | ||
266 | + FIELD(MIO_PIN_23, L1_SEL, 3, 2) | ||
267 | + FIELD(MIO_PIN_23, L0_SEL, 1, 2) | ||
268 | +REG32(MIO_PIN_24, 0x60) | ||
269 | + FIELD(MIO_PIN_24, L3_SEL, 7, 3) | ||
270 | + FIELD(MIO_PIN_24, L2_SEL, 5, 2) | ||
271 | + FIELD(MIO_PIN_24, L1_SEL, 3, 2) | ||
272 | + FIELD(MIO_PIN_24, L0_SEL, 1, 2) | ||
273 | +REG32(MIO_PIN_25, 0x64) | ||
274 | + FIELD(MIO_PIN_25, L3_SEL, 7, 3) | ||
275 | + FIELD(MIO_PIN_25, L2_SEL, 5, 2) | ||
276 | + FIELD(MIO_PIN_25, L1_SEL, 3, 2) | ||
277 | + FIELD(MIO_PIN_25, L0_SEL, 1, 2) | ||
278 | +REG32(MIO_PIN_26, 0x68) | ||
279 | + FIELD(MIO_PIN_26, L3_SEL, 7, 3) | ||
280 | + FIELD(MIO_PIN_26, L2_SEL, 5, 2) | ||
281 | + FIELD(MIO_PIN_26, L1_SEL, 3, 2) | ||
282 | + FIELD(MIO_PIN_26, L0_SEL, 1, 2) | ||
283 | +REG32(MIO_PIN_27, 0x6c) | ||
284 | + FIELD(MIO_PIN_27, L3_SEL, 7, 3) | ||
285 | + FIELD(MIO_PIN_27, L2_SEL, 5, 2) | ||
286 | + FIELD(MIO_PIN_27, L1_SEL, 3, 2) | ||
287 | + FIELD(MIO_PIN_27, L0_SEL, 1, 2) | ||
288 | +REG32(MIO_PIN_28, 0x70) | ||
289 | + FIELD(MIO_PIN_28, L3_SEL, 7, 3) | ||
290 | + FIELD(MIO_PIN_28, L2_SEL, 5, 2) | ||
291 | + FIELD(MIO_PIN_28, L1_SEL, 3, 2) | ||
292 | + FIELD(MIO_PIN_28, L0_SEL, 1, 2) | ||
293 | +REG32(MIO_PIN_29, 0x74) | ||
294 | + FIELD(MIO_PIN_29, L3_SEL, 7, 3) | ||
295 | + FIELD(MIO_PIN_29, L2_SEL, 5, 2) | ||
296 | + FIELD(MIO_PIN_29, L1_SEL, 3, 2) | ||
297 | + FIELD(MIO_PIN_29, L0_SEL, 1, 2) | ||
298 | +REG32(MIO_PIN_30, 0x78) | ||
299 | + FIELD(MIO_PIN_30, L3_SEL, 7, 3) | ||
300 | + FIELD(MIO_PIN_30, L2_SEL, 5, 2) | ||
301 | + FIELD(MIO_PIN_30, L1_SEL, 3, 2) | ||
302 | + FIELD(MIO_PIN_30, L0_SEL, 1, 2) | ||
303 | +REG32(MIO_PIN_31, 0x7c) | ||
304 | + FIELD(MIO_PIN_31, L3_SEL, 7, 3) | ||
305 | + FIELD(MIO_PIN_31, L2_SEL, 5, 2) | ||
306 | + FIELD(MIO_PIN_31, L1_SEL, 3, 2) | ||
307 | + FIELD(MIO_PIN_31, L0_SEL, 1, 2) | ||
308 | +REG32(MIO_PIN_32, 0x80) | ||
309 | + FIELD(MIO_PIN_32, L3_SEL, 7, 3) | ||
310 | + FIELD(MIO_PIN_32, L2_SEL, 5, 2) | ||
311 | + FIELD(MIO_PIN_32, L1_SEL, 3, 2) | ||
312 | + FIELD(MIO_PIN_32, L0_SEL, 1, 2) | ||
313 | +REG32(MIO_PIN_33, 0x84) | ||
314 | + FIELD(MIO_PIN_33, L3_SEL, 7, 3) | ||
315 | + FIELD(MIO_PIN_33, L2_SEL, 5, 2) | ||
316 | + FIELD(MIO_PIN_33, L1_SEL, 3, 2) | ||
317 | + FIELD(MIO_PIN_33, L0_SEL, 1, 2) | ||
318 | +REG32(MIO_PIN_34, 0x88) | ||
319 | + FIELD(MIO_PIN_34, L3_SEL, 7, 3) | ||
320 | + FIELD(MIO_PIN_34, L2_SEL, 5, 2) | ||
321 | + FIELD(MIO_PIN_34, L1_SEL, 3, 2) | ||
322 | + FIELD(MIO_PIN_34, L0_SEL, 1, 2) | ||
323 | +REG32(MIO_PIN_35, 0x8c) | ||
324 | + FIELD(MIO_PIN_35, L3_SEL, 7, 3) | ||
325 | + FIELD(MIO_PIN_35, L2_SEL, 5, 2) | ||
326 | + FIELD(MIO_PIN_35, L1_SEL, 3, 2) | ||
327 | + FIELD(MIO_PIN_35, L0_SEL, 1, 2) | ||
328 | +REG32(MIO_PIN_36, 0x90) | ||
329 | + FIELD(MIO_PIN_36, L3_SEL, 7, 3) | ||
330 | + FIELD(MIO_PIN_36, L2_SEL, 5, 2) | ||
331 | + FIELD(MIO_PIN_36, L1_SEL, 3, 2) | ||
332 | + FIELD(MIO_PIN_36, L0_SEL, 1, 2) | ||
333 | +REG32(MIO_PIN_37, 0x94) | ||
334 | + FIELD(MIO_PIN_37, L3_SEL, 7, 3) | ||
335 | + FIELD(MIO_PIN_37, L2_SEL, 5, 2) | ||
336 | + FIELD(MIO_PIN_37, L1_SEL, 3, 2) | ||
337 | + FIELD(MIO_PIN_37, L0_SEL, 1, 2) | ||
338 | +REG32(MIO_PIN_38, 0x98) | ||
339 | + FIELD(MIO_PIN_38, L3_SEL, 7, 3) | ||
340 | + FIELD(MIO_PIN_38, L2_SEL, 5, 2) | ||
341 | + FIELD(MIO_PIN_38, L1_SEL, 3, 2) | ||
342 | + FIELD(MIO_PIN_38, L0_SEL, 1, 2) | ||
343 | +REG32(MIO_PIN_39, 0x9c) | ||
344 | + FIELD(MIO_PIN_39, L3_SEL, 7, 3) | ||
345 | + FIELD(MIO_PIN_39, L2_SEL, 5, 2) | ||
346 | + FIELD(MIO_PIN_39, L1_SEL, 3, 2) | ||
347 | + FIELD(MIO_PIN_39, L0_SEL, 1, 2) | ||
348 | +REG32(MIO_PIN_40, 0xa0) | ||
349 | + FIELD(MIO_PIN_40, L3_SEL, 7, 3) | ||
350 | + FIELD(MIO_PIN_40, L2_SEL, 5, 2) | ||
351 | + FIELD(MIO_PIN_40, L1_SEL, 3, 2) | ||
352 | + FIELD(MIO_PIN_40, L0_SEL, 1, 2) | ||
353 | +REG32(MIO_PIN_41, 0xa4) | ||
354 | + FIELD(MIO_PIN_41, L3_SEL, 7, 3) | ||
355 | + FIELD(MIO_PIN_41, L2_SEL, 5, 2) | ||
356 | + FIELD(MIO_PIN_41, L1_SEL, 3, 2) | ||
357 | + FIELD(MIO_PIN_41, L0_SEL, 1, 2) | ||
358 | +REG32(MIO_PIN_42, 0xa8) | ||
359 | + FIELD(MIO_PIN_42, L3_SEL, 7, 3) | ||
360 | + FIELD(MIO_PIN_42, L2_SEL, 5, 2) | ||
361 | + FIELD(MIO_PIN_42, L1_SEL, 3, 2) | ||
362 | + FIELD(MIO_PIN_42, L0_SEL, 1, 2) | ||
363 | +REG32(MIO_PIN_43, 0xac) | ||
364 | + FIELD(MIO_PIN_43, L3_SEL, 7, 3) | ||
365 | + FIELD(MIO_PIN_43, L2_SEL, 5, 2) | ||
366 | + FIELD(MIO_PIN_43, L1_SEL, 3, 2) | ||
367 | + FIELD(MIO_PIN_43, L0_SEL, 1, 2) | ||
368 | +REG32(MIO_PIN_44, 0xb0) | ||
369 | + FIELD(MIO_PIN_44, L3_SEL, 7, 3) | ||
370 | + FIELD(MIO_PIN_44, L2_SEL, 5, 2) | ||
371 | + FIELD(MIO_PIN_44, L1_SEL, 3, 2) | ||
372 | + FIELD(MIO_PIN_44, L0_SEL, 1, 2) | ||
373 | +REG32(MIO_PIN_45, 0xb4) | ||
374 | + FIELD(MIO_PIN_45, L3_SEL, 7, 3) | ||
375 | + FIELD(MIO_PIN_45, L2_SEL, 5, 2) | ||
376 | + FIELD(MIO_PIN_45, L1_SEL, 3, 2) | ||
377 | + FIELD(MIO_PIN_45, L0_SEL, 1, 2) | ||
378 | +REG32(MIO_PIN_46, 0xb8) | ||
379 | + FIELD(MIO_PIN_46, L3_SEL, 7, 3) | ||
380 | + FIELD(MIO_PIN_46, L2_SEL, 5, 2) | ||
381 | + FIELD(MIO_PIN_46, L1_SEL, 3, 2) | ||
382 | + FIELD(MIO_PIN_46, L0_SEL, 1, 2) | ||
383 | +REG32(MIO_PIN_47, 0xbc) | ||
384 | + FIELD(MIO_PIN_47, L3_SEL, 7, 3) | ||
385 | + FIELD(MIO_PIN_47, L2_SEL, 5, 2) | ||
386 | + FIELD(MIO_PIN_47, L1_SEL, 3, 2) | ||
387 | + FIELD(MIO_PIN_47, L0_SEL, 1, 2) | ||
388 | +REG32(MIO_PIN_48, 0xc0) | ||
389 | + FIELD(MIO_PIN_48, L3_SEL, 7, 3) | ||
390 | + FIELD(MIO_PIN_48, L2_SEL, 5, 2) | ||
391 | + FIELD(MIO_PIN_48, L1_SEL, 3, 2) | ||
392 | + FIELD(MIO_PIN_48, L0_SEL, 1, 2) | ||
393 | +REG32(MIO_PIN_49, 0xc4) | ||
394 | + FIELD(MIO_PIN_49, L3_SEL, 7, 3) | ||
395 | + FIELD(MIO_PIN_49, L2_SEL, 5, 2) | ||
396 | + FIELD(MIO_PIN_49, L1_SEL, 3, 2) | ||
397 | + FIELD(MIO_PIN_49, L0_SEL, 1, 2) | ||
398 | +REG32(MIO_PIN_50, 0xc8) | ||
399 | + FIELD(MIO_PIN_50, L3_SEL, 7, 3) | ||
400 | + FIELD(MIO_PIN_50, L2_SEL, 5, 2) | ||
401 | + FIELD(MIO_PIN_50, L1_SEL, 3, 2) | ||
402 | + FIELD(MIO_PIN_50, L0_SEL, 1, 2) | ||
403 | +REG32(MIO_PIN_51, 0xcc) | ||
404 | + FIELD(MIO_PIN_51, L3_SEL, 7, 3) | ||
405 | + FIELD(MIO_PIN_51, L2_SEL, 5, 2) | ||
406 | + FIELD(MIO_PIN_51, L1_SEL, 3, 2) | ||
407 | + FIELD(MIO_PIN_51, L0_SEL, 1, 2) | ||
408 | +REG32(BNK0_EN_RX, 0x100) | ||
409 | + FIELD(BNK0_EN_RX, BNK0_EN_RX, 0, 26) | ||
410 | +REG32(BNK0_SEL_RX0, 0x104) | ||
411 | +REG32(BNK0_SEL_RX1, 0x108) | ||
412 | + FIELD(BNK0_SEL_RX1, BNK0_SEL_RX, 0, 20) | ||
413 | +REG32(BNK0_EN_RX_SCHMITT_HYST, 0x10c) | ||
414 | + FIELD(BNK0_EN_RX_SCHMITT_HYST, BNK0_EN_RX_SCHMITT_HYST, 0, 26) | ||
415 | +REG32(BNK0_EN_WK_PD, 0x110) | ||
416 | + FIELD(BNK0_EN_WK_PD, BNK0_EN_WK_PD, 0, 26) | ||
417 | +REG32(BNK0_EN_WK_PU, 0x114) | ||
418 | + FIELD(BNK0_EN_WK_PU, BNK0_EN_WK_PU, 0, 26) | ||
419 | +REG32(BNK0_SEL_DRV0, 0x118) | ||
420 | +REG32(BNK0_SEL_DRV1, 0x11c) | ||
421 | + FIELD(BNK0_SEL_DRV1, BNK0_SEL_DRV, 0, 20) | ||
422 | +REG32(BNK0_SEL_SLEW, 0x120) | ||
423 | + FIELD(BNK0_SEL_SLEW, BNK0_SEL_SLEW, 0, 26) | ||
424 | +REG32(BNK0_EN_DFT_OPT_INV, 0x124) | ||
425 | + FIELD(BNK0_EN_DFT_OPT_INV, BNK0_EN_DFT_OPT_INV, 0, 26) | ||
426 | +REG32(BNK0_EN_PAD2PAD_LOOPBACK, 0x128) | ||
427 | + FIELD(BNK0_EN_PAD2PAD_LOOPBACK, BNK0_EN_PAD2PAD_LOOPBACK, 0, 13) | ||
428 | +REG32(BNK0_RX_SPARE0, 0x12c) | ||
429 | +REG32(BNK0_RX_SPARE1, 0x130) | ||
430 | + FIELD(BNK0_RX_SPARE1, BNK0_RX_SPARE, 0, 20) | ||
431 | +REG32(BNK0_TX_SPARE0, 0x134) | ||
432 | +REG32(BNK0_TX_SPARE1, 0x138) | ||
433 | + FIELD(BNK0_TX_SPARE1, BNK0_TX_SPARE, 0, 20) | ||
434 | +REG32(BNK0_SEL_EN1P8, 0x13c) | ||
435 | + FIELD(BNK0_SEL_EN1P8, BNK0_SEL_EN1P8, 0, 1) | ||
436 | +REG32(BNK0_EN_B_POR_DETECT, 0x140) | ||
437 | + FIELD(BNK0_EN_B_POR_DETECT, BNK0_EN_B_POR_DETECT, 0, 1) | ||
438 | +REG32(BNK0_LPF_BYP_POR_DETECT, 0x144) | ||
439 | + FIELD(BNK0_LPF_BYP_POR_DETECT, BNK0_LPF_BYP_POR_DETECT, 0, 1) | ||
440 | +REG32(BNK0_EN_LATCH, 0x148) | ||
441 | + FIELD(BNK0_EN_LATCH, BNK0_EN_LATCH, 0, 1) | ||
442 | +REG32(BNK0_VBG_LPF_BYP_B, 0x14c) | ||
443 | + FIELD(BNK0_VBG_LPF_BYP_B, BNK0_VBG_LPF_BYP_B, 0, 1) | ||
444 | +REG32(BNK0_EN_AMP_B, 0x150) | ||
445 | + FIELD(BNK0_EN_AMP_B, BNK0_EN_AMP_B, 0, 2) | ||
446 | +REG32(BNK0_SPARE_BIAS, 0x154) | ||
447 | + FIELD(BNK0_SPARE_BIAS, BNK0_SPARE_BIAS, 0, 4) | ||
448 | +REG32(BNK0_DRIVER_BIAS, 0x158) | ||
449 | + FIELD(BNK0_DRIVER_BIAS, BNK0_DRIVER_BIAS, 0, 15) | ||
450 | +REG32(BNK0_VMODE, 0x15c) | ||
451 | + FIELD(BNK0_VMODE, BNK0_VMODE, 0, 1) | ||
452 | +REG32(BNK0_SEL_AUX_IO_RX, 0x160) | ||
453 | + FIELD(BNK0_SEL_AUX_IO_RX, BNK0_SEL_AUX_IO_RX, 0, 26) | ||
454 | +REG32(BNK0_EN_TX_HS_MODE, 0x164) | ||
455 | + FIELD(BNK0_EN_TX_HS_MODE, BNK0_EN_TX_HS_MODE, 0, 26) | ||
456 | +REG32(MIO_MST_TRI0, 0x200) | ||
457 | + FIELD(MIO_MST_TRI0, PIN_25_TRI, 25, 1) | ||
458 | + FIELD(MIO_MST_TRI0, PIN_24_TRI, 24, 1) | ||
459 | + FIELD(MIO_MST_TRI0, PIN_23_TRI, 23, 1) | ||
460 | + FIELD(MIO_MST_TRI0, PIN_22_TRI, 22, 1) | ||
461 | + FIELD(MIO_MST_TRI0, PIN_21_TRI, 21, 1) | ||
462 | + FIELD(MIO_MST_TRI0, PIN_20_TRI, 20, 1) | ||
463 | + FIELD(MIO_MST_TRI0, PIN_19_TRI, 19, 1) | ||
464 | + FIELD(MIO_MST_TRI0, PIN_18_TRI, 18, 1) | ||
465 | + FIELD(MIO_MST_TRI0, PIN_17_TRI, 17, 1) | ||
466 | + FIELD(MIO_MST_TRI0, PIN_16_TRI, 16, 1) | ||
467 | + FIELD(MIO_MST_TRI0, PIN_15_TRI, 15, 1) | ||
468 | + FIELD(MIO_MST_TRI0, PIN_14_TRI, 14, 1) | ||
469 | + FIELD(MIO_MST_TRI0, PIN_13_TRI, 13, 1) | ||
470 | + FIELD(MIO_MST_TRI0, PIN_12_TRI, 12, 1) | ||
471 | + FIELD(MIO_MST_TRI0, PIN_11_TRI, 11, 1) | ||
472 | + FIELD(MIO_MST_TRI0, PIN_10_TRI, 10, 1) | ||
473 | + FIELD(MIO_MST_TRI0, PIN_09_TRI, 9, 1) | ||
474 | + FIELD(MIO_MST_TRI0, PIN_08_TRI, 8, 1) | ||
475 | + FIELD(MIO_MST_TRI0, PIN_07_TRI, 7, 1) | ||
476 | + FIELD(MIO_MST_TRI0, PIN_06_TRI, 6, 1) | ||
477 | + FIELD(MIO_MST_TRI0, PIN_05_TRI, 5, 1) | ||
478 | + FIELD(MIO_MST_TRI0, PIN_04_TRI, 4, 1) | ||
479 | + FIELD(MIO_MST_TRI0, PIN_03_TRI, 3, 1) | ||
480 | + FIELD(MIO_MST_TRI0, PIN_02_TRI, 2, 1) | ||
481 | + FIELD(MIO_MST_TRI0, PIN_01_TRI, 1, 1) | ||
482 | + FIELD(MIO_MST_TRI0, PIN_00_TRI, 0, 1) | ||
483 | +REG32(MIO_MST_TRI1, 0x204) | ||
484 | + FIELD(MIO_MST_TRI1, PIN_51_TRI, 25, 1) | ||
485 | + FIELD(MIO_MST_TRI1, PIN_50_TRI, 24, 1) | ||
486 | + FIELD(MIO_MST_TRI1, PIN_49_TRI, 23, 1) | ||
487 | + FIELD(MIO_MST_TRI1, PIN_48_TRI, 22, 1) | ||
488 | + FIELD(MIO_MST_TRI1, PIN_47_TRI, 21, 1) | ||
489 | + FIELD(MIO_MST_TRI1, PIN_46_TRI, 20, 1) | ||
490 | + FIELD(MIO_MST_TRI1, PIN_45_TRI, 19, 1) | ||
491 | + FIELD(MIO_MST_TRI1, PIN_44_TRI, 18, 1) | ||
492 | + FIELD(MIO_MST_TRI1, PIN_43_TRI, 17, 1) | ||
493 | + FIELD(MIO_MST_TRI1, PIN_42_TRI, 16, 1) | ||
494 | + FIELD(MIO_MST_TRI1, PIN_41_TRI, 15, 1) | ||
495 | + FIELD(MIO_MST_TRI1, PIN_40_TRI, 14, 1) | ||
496 | + FIELD(MIO_MST_TRI1, PIN_39_TRI, 13, 1) | ||
497 | + FIELD(MIO_MST_TRI1, PIN_38_TRI, 12, 1) | ||
498 | + FIELD(MIO_MST_TRI1, PIN_37_TRI, 11, 1) | ||
499 | + FIELD(MIO_MST_TRI1, PIN_36_TRI, 10, 1) | ||
500 | + FIELD(MIO_MST_TRI1, PIN_35_TRI, 9, 1) | ||
501 | + FIELD(MIO_MST_TRI1, PIN_34_TRI, 8, 1) | ||
502 | + FIELD(MIO_MST_TRI1, PIN_33_TRI, 7, 1) | ||
503 | + FIELD(MIO_MST_TRI1, PIN_32_TRI, 6, 1) | ||
504 | + FIELD(MIO_MST_TRI1, PIN_31_TRI, 5, 1) | ||
505 | + FIELD(MIO_MST_TRI1, PIN_30_TRI, 4, 1) | ||
506 | + FIELD(MIO_MST_TRI1, PIN_29_TRI, 3, 1) | ||
507 | + FIELD(MIO_MST_TRI1, PIN_28_TRI, 2, 1) | ||
508 | + FIELD(MIO_MST_TRI1, PIN_27_TRI, 1, 1) | ||
509 | + FIELD(MIO_MST_TRI1, PIN_26_TRI, 0, 1) | ||
510 | +REG32(BNK1_EN_RX, 0x300) | ||
511 | + FIELD(BNK1_EN_RX, BNK1_EN_RX, 0, 26) | ||
512 | +REG32(BNK1_SEL_RX0, 0x304) | ||
513 | +REG32(BNK1_SEL_RX1, 0x308) | ||
514 | + FIELD(BNK1_SEL_RX1, BNK1_SEL_RX, 0, 20) | ||
515 | +REG32(BNK1_EN_RX_SCHMITT_HYST, 0x30c) | ||
516 | + FIELD(BNK1_EN_RX_SCHMITT_HYST, BNK1_EN_RX_SCHMITT_HYST, 0, 26) | ||
517 | +REG32(BNK1_EN_WK_PD, 0x310) | ||
518 | + FIELD(BNK1_EN_WK_PD, BNK1_EN_WK_PD, 0, 26) | ||
519 | +REG32(BNK1_EN_WK_PU, 0x314) | ||
520 | + FIELD(BNK1_EN_WK_PU, BNK1_EN_WK_PU, 0, 26) | ||
521 | +REG32(BNK1_SEL_DRV0, 0x318) | ||
522 | +REG32(BNK1_SEL_DRV1, 0x31c) | ||
523 | + FIELD(BNK1_SEL_DRV1, BNK1_SEL_DRV, 0, 20) | ||
524 | +REG32(BNK1_SEL_SLEW, 0x320) | ||
525 | + FIELD(BNK1_SEL_SLEW, BNK1_SEL_SLEW, 0, 26) | ||
526 | +REG32(BNK1_EN_DFT_OPT_INV, 0x324) | ||
527 | + FIELD(BNK1_EN_DFT_OPT_INV, BNK1_EN_DFT_OPT_INV, 0, 26) | ||
528 | +REG32(BNK1_EN_PAD2PAD_LOOPBACK, 0x328) | ||
529 | + FIELD(BNK1_EN_PAD2PAD_LOOPBACK, BNK1_EN_PAD2PAD_LOOPBACK, 0, 13) | ||
530 | +REG32(BNK1_RX_SPARE0, 0x32c) | ||
531 | +REG32(BNK1_RX_SPARE1, 0x330) | ||
532 | + FIELD(BNK1_RX_SPARE1, BNK1_RX_SPARE, 0, 20) | ||
533 | +REG32(BNK1_TX_SPARE0, 0x334) | ||
534 | +REG32(BNK1_TX_SPARE1, 0x338) | ||
535 | + FIELD(BNK1_TX_SPARE1, BNK1_TX_SPARE, 0, 20) | ||
536 | +REG32(BNK1_SEL_EN1P8, 0x33c) | ||
537 | + FIELD(BNK1_SEL_EN1P8, BNK1_SEL_EN1P8, 0, 1) | ||
538 | +REG32(BNK1_EN_B_POR_DETECT, 0x340) | ||
539 | + FIELD(BNK1_EN_B_POR_DETECT, BNK1_EN_B_POR_DETECT, 0, 1) | ||
540 | +REG32(BNK1_LPF_BYP_POR_DETECT, 0x344) | ||
541 | + FIELD(BNK1_LPF_BYP_POR_DETECT, BNK1_LPF_BYP_POR_DETECT, 0, 1) | ||
542 | +REG32(BNK1_EN_LATCH, 0x348) | ||
543 | + FIELD(BNK1_EN_LATCH, BNK1_EN_LATCH, 0, 1) | ||
544 | +REG32(BNK1_VBG_LPF_BYP_B, 0x34c) | ||
545 | + FIELD(BNK1_VBG_LPF_BYP_B, BNK1_VBG_LPF_BYP_B, 0, 1) | ||
546 | +REG32(BNK1_EN_AMP_B, 0x350) | ||
547 | + FIELD(BNK1_EN_AMP_B, BNK1_EN_AMP_B, 0, 2) | ||
548 | +REG32(BNK1_SPARE_BIAS, 0x354) | ||
549 | + FIELD(BNK1_SPARE_BIAS, BNK1_SPARE_BIAS, 0, 4) | ||
550 | +REG32(BNK1_DRIVER_BIAS, 0x358) | ||
551 | + FIELD(BNK1_DRIVER_BIAS, BNK1_DRIVER_BIAS, 0, 15) | ||
552 | +REG32(BNK1_VMODE, 0x35c) | ||
553 | + FIELD(BNK1_VMODE, BNK1_VMODE, 0, 1) | ||
554 | +REG32(BNK1_SEL_AUX_IO_RX, 0x360) | ||
555 | + FIELD(BNK1_SEL_AUX_IO_RX, BNK1_SEL_AUX_IO_RX, 0, 26) | ||
556 | +REG32(BNK1_EN_TX_HS_MODE, 0x364) | ||
557 | + FIELD(BNK1_EN_TX_HS_MODE, BNK1_EN_TX_HS_MODE, 0, 26) | ||
558 | +REG32(SD0_CLK_CTRL, 0x400) | ||
559 | + FIELD(SD0_CLK_CTRL, SDIO0_FBCLK_SEL, 2, 1) | ||
560 | + FIELD(SD0_CLK_CTRL, SDIO0_RX_SRC_SEL, 0, 2) | ||
561 | +REG32(SD0_CTRL_REG, 0x404) | ||
562 | + FIELD(SD0_CTRL_REG, SD0_EMMC_SEL, 0, 1) | ||
563 | +REG32(SD0_CONFIG_REG1, 0x410) | ||
564 | + FIELD(SD0_CONFIG_REG1, SD0_BASECLK, 7, 8) | ||
565 | + FIELD(SD0_CONFIG_REG1, SD0_TUNIGCOUNT, 1, 6) | ||
566 | + FIELD(SD0_CONFIG_REG1, SD0_ASYNCWKPENA, 0, 1) | ||
567 | +REG32(SD0_CONFIG_REG2, 0x414) | ||
568 | + FIELD(SD0_CONFIG_REG2, SD0_SLOTTYPE, 12, 2) | ||
569 | + FIELD(SD0_CONFIG_REG2, SD0_ASYCINTR, 11, 1) | ||
570 | + FIELD(SD0_CONFIG_REG2, SD0_64BIT, 10, 1) | ||
571 | + FIELD(SD0_CONFIG_REG2, SD0_1P8V, 9, 1) | ||
572 | + FIELD(SD0_CONFIG_REG2, SD0_3P0V, 8, 1) | ||
573 | + FIELD(SD0_CONFIG_REG2, SD0_3P3V, 7, 1) | ||
574 | + FIELD(SD0_CONFIG_REG2, SD0_SUSPRES, 6, 1) | ||
575 | + FIELD(SD0_CONFIG_REG2, SD0_SDMA, 5, 1) | ||
576 | + FIELD(SD0_CONFIG_REG2, SD0_HIGHSPEED, 4, 1) | ||
577 | + FIELD(SD0_CONFIG_REG2, SD0_ADMA2, 3, 1) | ||
578 | + FIELD(SD0_CONFIG_REG2, SD0_8BIT, 2, 1) | ||
579 | + FIELD(SD0_CONFIG_REG2, SD0_MAXBLK, 0, 2) | ||
580 | +REG32(SD0_CONFIG_REG3, 0x418) | ||
581 | + FIELD(SD0_CONFIG_REG3, SD0_TUNINGSDR50, 10, 1) | ||
582 | + FIELD(SD0_CONFIG_REG3, SD0_RETUNETMR, 6, 4) | ||
583 | + FIELD(SD0_CONFIG_REG3, SD0_DDRIVER, 5, 1) | ||
584 | + FIELD(SD0_CONFIG_REG3, SD0_CDRIVER, 4, 1) | ||
585 | + FIELD(SD0_CONFIG_REG3, SD0_ADRIVER, 3, 1) | ||
586 | + FIELD(SD0_CONFIG_REG3, SD0_DDR50, 2, 1) | ||
587 | + FIELD(SD0_CONFIG_REG3, SD0_SDR104, 1, 1) | ||
588 | + FIELD(SD0_CONFIG_REG3, SD0_SDR50, 0, 1) | ||
589 | +REG32(SD0_INITPRESET, 0x41c) | ||
590 | + FIELD(SD0_INITPRESET, SD0_INITPRESET, 0, 13) | ||
591 | +REG32(SD0_DSPPRESET, 0x420) | ||
592 | + FIELD(SD0_DSPPRESET, SD0_DSPPRESET, 0, 13) | ||
593 | +REG32(SD0_HSPDPRESET, 0x424) | ||
594 | + FIELD(SD0_HSPDPRESET, SD0_HSPDPRESET, 0, 13) | ||
595 | +REG32(SD0_SDR12PRESET, 0x428) | ||
596 | + FIELD(SD0_SDR12PRESET, SD0_SDR12PRESET, 0, 13) | ||
597 | +REG32(SD0_SDR25PRESET, 0x42c) | ||
598 | + FIELD(SD0_SDR25PRESET, SD0_SDR25PRESET, 0, 13) | ||
599 | +REG32(SD0_SDR50PRSET, 0x430) | ||
600 | + FIELD(SD0_SDR50PRSET, SD0_SDR50PRESET, 0, 13) | ||
601 | +REG32(SD0_SDR104PRST, 0x434) | ||
602 | + FIELD(SD0_SDR104PRST, SD0_SDR104PRESET, 0, 13) | ||
603 | +REG32(SD0_DDR50PRESET, 0x438) | ||
604 | + FIELD(SD0_DDR50PRESET, SD0_DDR50PRESET, 0, 13) | ||
605 | +REG32(SD0_MAXCUR1P8, 0x43c) | ||
606 | + FIELD(SD0_MAXCUR1P8, SD0_MAXCUR1P8, 0, 8) | ||
607 | +REG32(SD0_MAXCUR3P0, 0x440) | ||
608 | + FIELD(SD0_MAXCUR3P0, SD0_MAXCUR3P0, 0, 8) | ||
609 | +REG32(SD0_MAXCUR3P3, 0x444) | ||
610 | + FIELD(SD0_MAXCUR3P3, SD0_MAXCUR3P3, 0, 8) | ||
611 | +REG32(SD0_DLL_CTRL, 0x448) | ||
612 | + FIELD(SD0_DLL_CTRL, SD0_CLKSTABLE_CFG, 9, 1) | ||
613 | + FIELD(SD0_DLL_CTRL, SD0_DLL_CFG, 5, 4) | ||
614 | + FIELD(SD0_DLL_CTRL, SD0_DLL_PSDONE, 4, 1) | ||
615 | + FIELD(SD0_DLL_CTRL, SD0_DLL_OVF, 3, 1) | ||
616 | + FIELD(SD0_DLL_CTRL, SD0_DLL_RST, 2, 1) | ||
617 | + FIELD(SD0_DLL_CTRL, SD0_DLL_TESTMODE, 1, 1) | ||
618 | + FIELD(SD0_DLL_CTRL, SD0_DLL_LOCK, 0, 1) | ||
619 | +REG32(SD0_CDN_CTRL, 0x44c) | ||
620 | + FIELD(SD0_CDN_CTRL, SD0_CDN_CTRL, 0, 1) | ||
621 | +REG32(SD0_DLL_TEST, 0x450) | ||
622 | + FIELD(SD0_DLL_TEST, DLL_DIV, 16, 8) | ||
623 | + FIELD(SD0_DLL_TEST, DLL_TX_SEL, 9, 7) | ||
624 | + FIELD(SD0_DLL_TEST, DLL_RX_SEL, 0, 9) | ||
625 | +REG32(SD0_RX_TUNING_SEL, 0x454) | ||
626 | + FIELD(SD0_RX_TUNING_SEL, SD0_RX_SEL, 0, 9) | ||
627 | +REG32(SD0_DLL_DIV_MAP0, 0x458) | ||
628 | + FIELD(SD0_DLL_DIV_MAP0, DIV_3, 24, 8) | ||
629 | + FIELD(SD0_DLL_DIV_MAP0, DIV_2, 16, 8) | ||
630 | + FIELD(SD0_DLL_DIV_MAP0, DIV_1, 8, 8) | ||
631 | + FIELD(SD0_DLL_DIV_MAP0, DIV_0, 0, 8) | ||
632 | +REG32(SD0_DLL_DIV_MAP1, 0x45c) | ||
633 | + FIELD(SD0_DLL_DIV_MAP1, DIV_7, 24, 8) | ||
634 | + FIELD(SD0_DLL_DIV_MAP1, DIV_6, 16, 8) | ||
635 | + FIELD(SD0_DLL_DIV_MAP1, DIV_5, 8, 8) | ||
636 | + FIELD(SD0_DLL_DIV_MAP1, DIV_4, 0, 8) | ||
637 | +REG32(SD0_IOU_COHERENT_CTRL, 0x460) | ||
638 | + FIELD(SD0_IOU_COHERENT_CTRL, SD0_AXI_COH, 0, 4) | ||
639 | +REG32(SD0_IOU_INTERCONNECT_ROUTE, 0x464) | ||
640 | + FIELD(SD0_IOU_INTERCONNECT_ROUTE, SD0, 0, 1) | ||
641 | +REG32(SD0_IOU_RAM, 0x468) | ||
642 | + FIELD(SD0_IOU_RAM, EMASA0, 6, 1) | ||
643 | + FIELD(SD0_IOU_RAM, EMAB0, 3, 3) | ||
644 | + FIELD(SD0_IOU_RAM, EMAA0, 0, 3) | ||
645 | +REG32(SD0_IOU_INTERCONNECT_QOS, 0x46c) | ||
646 | + FIELD(SD0_IOU_INTERCONNECT_QOS, SD0_QOS, 0, 4) | ||
647 | +REG32(SD1_CLK_CTRL, 0x480) | ||
648 | + FIELD(SD1_CLK_CTRL, SDIO1_FBCLK_SEL, 1, 1) | ||
649 | + FIELD(SD1_CLK_CTRL, SDIO1_RX_SRC_SEL, 0, 1) | ||
650 | +REG32(SD1_CTRL_REG, 0x484) | ||
651 | + FIELD(SD1_CTRL_REG, SD1_EMMC_SEL, 0, 1) | ||
652 | +REG32(SD1_CONFIG_REG1, 0x490) | ||
653 | + FIELD(SD1_CONFIG_REG1, SD1_BASECLK, 7, 8) | ||
654 | + FIELD(SD1_CONFIG_REG1, SD1_TUNIGCOUNT, 1, 6) | ||
655 | + FIELD(SD1_CONFIG_REG1, SD1_ASYNCWKPENA, 0, 1) | ||
656 | +REG32(SD1_CONFIG_REG2, 0x494) | ||
657 | + FIELD(SD1_CONFIG_REG2, SD1_SLOTTYPE, 12, 2) | ||
658 | + FIELD(SD1_CONFIG_REG2, SD1_ASYCINTR, 11, 1) | ||
659 | + FIELD(SD1_CONFIG_REG2, SD1_64BIT, 10, 1) | ||
660 | + FIELD(SD1_CONFIG_REG2, SD1_1P8V, 9, 1) | ||
661 | + FIELD(SD1_CONFIG_REG2, SD1_3P0V, 8, 1) | ||
662 | + FIELD(SD1_CONFIG_REG2, SD1_3P3V, 7, 1) | ||
663 | + FIELD(SD1_CONFIG_REG2, SD1_SUSPRES, 6, 1) | ||
664 | + FIELD(SD1_CONFIG_REG2, SD1_SDMA, 5, 1) | ||
665 | + FIELD(SD1_CONFIG_REG2, SD1_HIGHSPEED, 4, 1) | ||
666 | + FIELD(SD1_CONFIG_REG2, SD1_ADMA2, 3, 1) | ||
667 | + FIELD(SD1_CONFIG_REG2, SD1_8BIT, 2, 1) | ||
668 | + FIELD(SD1_CONFIG_REG2, SD1_MAXBLK, 0, 2) | ||
669 | +REG32(SD1_CONFIG_REG3, 0x498) | ||
670 | + FIELD(SD1_CONFIG_REG3, SD1_TUNINGSDR50, 10, 1) | ||
671 | + FIELD(SD1_CONFIG_REG3, SD1_RETUNETMR, 6, 4) | ||
672 | + FIELD(SD1_CONFIG_REG3, SD1_DDRIVER, 5, 1) | ||
673 | + FIELD(SD1_CONFIG_REG3, SD1_CDRIVER, 4, 1) | ||
674 | + FIELD(SD1_CONFIG_REG3, SD1_ADRIVER, 3, 1) | ||
675 | + FIELD(SD1_CONFIG_REG3, SD1_DDR50, 2, 1) | ||
676 | + FIELD(SD1_CONFIG_REG3, SD1_SDR104, 1, 1) | ||
677 | + FIELD(SD1_CONFIG_REG3, SD1_SDR50, 0, 1) | ||
678 | +REG32(SD1_INITPRESET, 0x49c) | ||
679 | + FIELD(SD1_INITPRESET, SD1_INITPRESET, 0, 13) | ||
680 | +REG32(SD1_DSPPRESET, 0x4a0) | ||
681 | + FIELD(SD1_DSPPRESET, SD1_DSPPRESET, 0, 13) | ||
682 | +REG32(SD1_HSPDPRESET, 0x4a4) | ||
683 | + FIELD(SD1_HSPDPRESET, SD1_HSPDPRESET, 0, 13) | ||
684 | +REG32(SD1_SDR12PRESET, 0x4a8) | ||
685 | + FIELD(SD1_SDR12PRESET, SD1_SDR12PRESET, 0, 13) | ||
686 | +REG32(SD1_SDR25PRESET, 0x4ac) | ||
687 | + FIELD(SD1_SDR25PRESET, SD1_SDR25PRESET, 0, 13) | ||
688 | +REG32(SD1_SDR50PRSET, 0x4b0) | ||
689 | + FIELD(SD1_SDR50PRSET, SD1_SDR50PRESET, 0, 13) | ||
690 | +REG32(SD1_SDR104PRST, 0x4b4) | ||
691 | + FIELD(SD1_SDR104PRST, SD1_SDR104PRESET, 0, 13) | ||
692 | +REG32(SD1_DDR50PRESET, 0x4b8) | ||
693 | + FIELD(SD1_DDR50PRESET, SD1_DDR50PRESET, 0, 13) | ||
694 | +REG32(SD1_MAXCUR1P8, 0x4bc) | ||
695 | + FIELD(SD1_MAXCUR1P8, SD1_MAXCUR1P8, 0, 8) | ||
696 | +REG32(SD1_MAXCUR3P0, 0x4c0) | ||
697 | + FIELD(SD1_MAXCUR3P0, SD1_MAXCUR3P0, 0, 8) | ||
698 | +REG32(SD1_MAXCUR3P3, 0x4c4) | ||
699 | + FIELD(SD1_MAXCUR3P3, SD1_MAXCUR3P3, 0, 8) | ||
700 | +REG32(SD1_DLL_CTRL, 0x4c8) | ||
701 | + FIELD(SD1_DLL_CTRL, SD1_CLKSTABLE_CFG, 9, 1) | ||
702 | + FIELD(SD1_DLL_CTRL, SD1_DLL_CFG, 5, 4) | ||
703 | + FIELD(SD1_DLL_CTRL, SD1_DLL_PSDONE, 4, 1) | ||
704 | + FIELD(SD1_DLL_CTRL, SD1_DLL_OVF, 3, 1) | ||
705 | + FIELD(SD1_DLL_CTRL, SD1_DLL_RST, 2, 1) | ||
706 | + FIELD(SD1_DLL_CTRL, SD1_DLL_TESTMODE, 1, 1) | ||
707 | + FIELD(SD1_DLL_CTRL, SD1_DLL_LOCK, 0, 1) | ||
708 | +REG32(SD1_CDN_CTRL, 0x4cc) | ||
709 | + FIELD(SD1_CDN_CTRL, SD1_CDN_CTRL, 0, 1) | ||
710 | +REG32(SD1_DLL_TEST, 0x4d0) | ||
711 | + FIELD(SD1_DLL_TEST, DLL_DIV, 16, 8) | ||
712 | + FIELD(SD1_DLL_TEST, DLL_TX_SEL, 9, 7) | ||
713 | + FIELD(SD1_DLL_TEST, DLL_RX_SEL, 0, 9) | ||
714 | +REG32(SD1_RX_TUNING_SEL, 0x4d4) | ||
715 | + FIELD(SD1_RX_TUNING_SEL, SD1_RX_SEL, 0, 9) | ||
716 | +REG32(SD1_DLL_DIV_MAP0, 0x4d8) | ||
717 | + FIELD(SD1_DLL_DIV_MAP0, DIV_3, 24, 8) | ||
718 | + FIELD(SD1_DLL_DIV_MAP0, DIV_2, 16, 8) | ||
719 | + FIELD(SD1_DLL_DIV_MAP0, DIV_1, 8, 8) | ||
720 | + FIELD(SD1_DLL_DIV_MAP0, DIV_0, 0, 8) | ||
721 | +REG32(SD1_DLL_DIV_MAP1, 0x4dc) | ||
722 | + FIELD(SD1_DLL_DIV_MAP1, DIV_7, 24, 8) | ||
723 | + FIELD(SD1_DLL_DIV_MAP1, DIV_6, 16, 8) | ||
724 | + FIELD(SD1_DLL_DIV_MAP1, DIV_5, 8, 8) | ||
725 | + FIELD(SD1_DLL_DIV_MAP1, DIV_4, 0, 8) | ||
726 | +REG32(SD1_IOU_COHERENT_CTRL, 0x4e0) | ||
727 | + FIELD(SD1_IOU_COHERENT_CTRL, SD1_AXI_COH, 0, 4) | ||
728 | +REG32(SD1_IOU_INTERCONNECT_ROUTE, 0x4e4) | ||
729 | + FIELD(SD1_IOU_INTERCONNECT_ROUTE, SD1, 0, 1) | ||
730 | +REG32(SD1_IOU_RAM, 0x4e8) | ||
731 | + FIELD(SD1_IOU_RAM, EMASA0, 6, 1) | ||
732 | + FIELD(SD1_IOU_RAM, EMAB0, 3, 3) | ||
733 | + FIELD(SD1_IOU_RAM, EMAA0, 0, 3) | ||
734 | +REG32(SD1_IOU_INTERCONNECT_QOS, 0x4ec) | ||
735 | + FIELD(SD1_IOU_INTERCONNECT_QOS, SD1_QOS, 0, 4) | ||
736 | +REG32(OSPI_QSPI_IOU_AXI_MUX_SEL, 0x504) | ||
737 | + FIELD(OSPI_QSPI_IOU_AXI_MUX_SEL, OSPI_MUX_SEL, 1, 1) | ||
738 | + FIELD(OSPI_QSPI_IOU_AXI_MUX_SEL, QSPI_OSPI_MUX_SEL, 0, 1) | ||
739 | +REG32(QSPI_IOU_COHERENT_CTRL, 0x508) | ||
740 | + FIELD(QSPI_IOU_COHERENT_CTRL, QSPI_AXI_COH, 0, 4) | ||
741 | +REG32(QSPI_IOU_INTERCONNECT_ROUTE, 0x50c) | ||
742 | + FIELD(QSPI_IOU_INTERCONNECT_ROUTE, QSPI, 0, 1) | ||
743 | +REG32(QSPI_IOU_RAM, 0x510) | ||
744 | + FIELD(QSPI_IOU_RAM, EMASA1, 13, 1) | ||
745 | + FIELD(QSPI_IOU_RAM, EMAB1, 10, 3) | ||
746 | + FIELD(QSPI_IOU_RAM, EMAA1, 7, 3) | ||
747 | + FIELD(QSPI_IOU_RAM, EMASA0, 6, 1) | ||
748 | + FIELD(QSPI_IOU_RAM, EMAB0, 3, 3) | ||
749 | + FIELD(QSPI_IOU_RAM, EMAA0, 0, 3) | ||
750 | +REG32(QSPI_IOU_INTERCONNECT_QOS, 0x514) | ||
751 | + FIELD(QSPI_IOU_INTERCONNECT_QOS, QSPI_QOS, 0, 4) | ||
752 | +REG32(OSPI_IOU_COHERENT_CTRL, 0x530) | ||
753 | + FIELD(OSPI_IOU_COHERENT_CTRL, OSPI_AXI_COH, 0, 4) | ||
754 | +REG32(OSPI_IOU_INTERCONNECT_ROUTE, 0x534) | ||
755 | + FIELD(OSPI_IOU_INTERCONNECT_ROUTE, OSPI, 0, 1) | ||
756 | +REG32(OSPI_IOU_RAM, 0x538) | ||
757 | + FIELD(OSPI_IOU_RAM, EMAS0, 5, 1) | ||
758 | + FIELD(OSPI_IOU_RAM, EMAW0, 3, 2) | ||
759 | + FIELD(OSPI_IOU_RAM, EMA0, 0, 3) | ||
760 | +REG32(OSPI_IOU_INTERCONNECT_QOS, 0x53c) | ||
761 | + FIELD(OSPI_IOU_INTERCONNECT_QOS, OSPI_QOS, 0, 4) | ||
762 | +REG32(OSPI_REFCLK_DLY_CTRL, 0x540) | ||
763 | + FIELD(OSPI_REFCLK_DLY_CTRL, DLY1, 3, 2) | ||
764 | + FIELD(OSPI_REFCLK_DLY_CTRL, DLY0, 0, 3) | ||
765 | +REG32(CUR_PWR_ST, 0x600) | ||
766 | + FIELD(CUR_PWR_ST, U2PMU, 0, 2) | ||
767 | +REG32(CONNECT_ST, 0x604) | ||
768 | + FIELD(CONNECT_ST, U2PMU, 0, 1) | ||
769 | +REG32(PW_STATE_REQ, 0x608) | ||
770 | + FIELD(PW_STATE_REQ, BIT_1_0, 0, 2) | ||
771 | +REG32(HOST_U2_PORT_DISABLE, 0x60c) | ||
772 | + FIELD(HOST_U2_PORT_DISABLE, BIT_0, 0, 1) | ||
773 | +REG32(DBG_U2PMU, 0x610) | ||
774 | +REG32(DBG_U2PMU_EXT1, 0x614) | ||
775 | +REG32(DBG_U2PMU_EXT2, 0x618) | ||
776 | + FIELD(DBG_U2PMU_EXT2, BIT_67_64, 0, 4) | ||
777 | +REG32(PME_GEN_U2PMU, 0x61c) | ||
778 | + FIELD(PME_GEN_U2PMU, BIT_0, 0, 1) | ||
779 | +REG32(PWR_CONFIG_USB2, 0x620) | ||
780 | + FIELD(PWR_CONFIG_USB2, STRAP, 0, 30) | ||
781 | +REG32(PHY_HUB, 0x624) | ||
782 | + FIELD(PHY_HUB, VBUS_CTRL, 1, 1) | ||
783 | + FIELD(PHY_HUB, OVER_CURRENT, 0, 1) | ||
784 | +REG32(CTRL, 0x700) | ||
785 | + FIELD(CTRL, SLVERR_ENABLE, 0, 1) | ||
786 | +REG32(ISR, 0x800) | ||
787 | + FIELD(ISR, ADDR_DECODE_ERR, 0, 1) | ||
788 | +REG32(IMR, 0x804) | ||
789 | + FIELD(IMR, ADDR_DECODE_ERR, 0, 1) | ||
790 | +REG32(IER, 0x808) | ||
791 | + FIELD(IER, ADDR_DECODE_ERR, 0, 1) | ||
792 | +REG32(IDR, 0x80c) | ||
793 | + FIELD(IDR, ADDR_DECODE_ERR, 0, 1) | ||
794 | +REG32(ITR, 0x810) | ||
795 | + FIELD(ITR, ADDR_DECODE_ERR, 0, 1) | ||
796 | +REG32(PARITY_ISR, 0x814) | ||
797 | + FIELD(PARITY_ISR, PERR_AXI_SD1_IOU, 12, 1) | ||
798 | + FIELD(PARITY_ISR, PERR_AXI_SD0_IOU, 11, 1) | ||
799 | + FIELD(PARITY_ISR, PERR_AXI_QSPI_IOU, 10, 1) | ||
800 | + FIELD(PARITY_ISR, PERR_AXI_OSPI_IOU, 9, 1) | ||
801 | + FIELD(PARITY_ISR, PERR_IOU_SD1, 8, 1) | ||
802 | + FIELD(PARITY_ISR, PERR_IOU_SD0, 7, 1) | ||
803 | + FIELD(PARITY_ISR, PERR_IOU_QSPI1, 6, 1) | ||
804 | + FIELD(PARITY_ISR, PERR_IOUSLCR_SECURE_APB, 5, 1) | ||
805 | + FIELD(PARITY_ISR, PERR_IOUSLCR_APB, 4, 1) | ||
806 | + FIELD(PARITY_ISR, PERR_QSPI0_APB, 3, 1) | ||
807 | + FIELD(PARITY_ISR, PERR_OSPI_APB, 2, 1) | ||
808 | + FIELD(PARITY_ISR, PERR_I2C_APB, 1, 1) | ||
809 | + FIELD(PARITY_ISR, PERR_GPIO_APB, 0, 1) | ||
810 | +REG32(PARITY_IMR, 0x818) | ||
811 | + FIELD(PARITY_IMR, PERR_AXI_SD1_IOU, 12, 1) | ||
812 | + FIELD(PARITY_IMR, PERR_AXI_SD0_IOU, 11, 1) | ||
813 | + FIELD(PARITY_IMR, PERR_AXI_QSPI_IOU, 10, 1) | ||
814 | + FIELD(PARITY_IMR, PERR_AXI_OSPI_IOU, 9, 1) | ||
815 | + FIELD(PARITY_IMR, PERR_IOU_SD1, 8, 1) | ||
816 | + FIELD(PARITY_IMR, PERR_IOU_SD0, 7, 1) | ||
817 | + FIELD(PARITY_IMR, PERR_IOU_QSPI1, 6, 1) | ||
818 | + FIELD(PARITY_IMR, PERR_IOUSLCR_SECURE_APB, 5, 1) | ||
819 | + FIELD(PARITY_IMR, PERR_IOUSLCR_APB, 4, 1) | ||
820 | + FIELD(PARITY_IMR, PERR_QSPI0_APB, 3, 1) | ||
821 | + FIELD(PARITY_IMR, PERR_OSPI_APB, 2, 1) | ||
822 | + FIELD(PARITY_IMR, PERR_I2C_APB, 1, 1) | ||
823 | + FIELD(PARITY_IMR, PERR_GPIO_APB, 0, 1) | ||
824 | +REG32(PARITY_IER, 0x81c) | ||
825 | + FIELD(PARITY_IER, PERR_AXI_SD1_IOU, 12, 1) | ||
826 | + FIELD(PARITY_IER, PERR_AXI_SD0_IOU, 11, 1) | ||
827 | + FIELD(PARITY_IER, PERR_AXI_QSPI_IOU, 10, 1) | ||
828 | + FIELD(PARITY_IER, PERR_AXI_OSPI_IOU, 9, 1) | ||
829 | + FIELD(PARITY_IER, PERR_IOU_SD1, 8, 1) | ||
830 | + FIELD(PARITY_IER, PERR_IOU_SD0, 7, 1) | ||
831 | + FIELD(PARITY_IER, PERR_IOU_QSPI1, 6, 1) | ||
832 | + FIELD(PARITY_IER, PERR_IOUSLCR_SECURE_APB, 5, 1) | ||
833 | + FIELD(PARITY_IER, PERR_IOUSLCR_APB, 4, 1) | ||
834 | + FIELD(PARITY_IER, PERR_QSPI0_APB, 3, 1) | ||
835 | + FIELD(PARITY_IER, PERR_OSPI_APB, 2, 1) | ||
836 | + FIELD(PARITY_IER, PERR_I2C_APB, 1, 1) | ||
837 | + FIELD(PARITY_IER, PERR_GPIO_APB, 0, 1) | ||
838 | +REG32(PARITY_IDR, 0x820) | ||
839 | + FIELD(PARITY_IDR, PERR_AXI_SD1_IOU, 12, 1) | ||
840 | + FIELD(PARITY_IDR, PERR_AXI_SD0_IOU, 11, 1) | ||
841 | + FIELD(PARITY_IDR, PERR_AXI_QSPI_IOU, 10, 1) | ||
842 | + FIELD(PARITY_IDR, PERR_AXI_OSPI_IOU, 9, 1) | ||
843 | + FIELD(PARITY_IDR, PERR_IOU_SD1, 8, 1) | ||
844 | + FIELD(PARITY_IDR, PERR_IOU_SD0, 7, 1) | ||
845 | + FIELD(PARITY_IDR, PERR_IOU_QSPI1, 6, 1) | ||
846 | + FIELD(PARITY_IDR, PERR_IOUSLCR_SECURE_APB, 5, 1) | ||
847 | + FIELD(PARITY_IDR, PERR_IOUSLCR_APB, 4, 1) | ||
848 | + FIELD(PARITY_IDR, PERR_QSPI0_APB, 3, 1) | ||
849 | + FIELD(PARITY_IDR, PERR_OSPI_APB, 2, 1) | ||
850 | + FIELD(PARITY_IDR, PERR_I2C_APB, 1, 1) | ||
851 | + FIELD(PARITY_IDR, PERR_GPIO_APB, 0, 1) | ||
852 | +REG32(PARITY_ITR, 0x824) | ||
853 | + FIELD(PARITY_ITR, PERR_AXI_SD1_IOU, 12, 1) | ||
854 | + FIELD(PARITY_ITR, PERR_AXI_SD0_IOU, 11, 1) | ||
855 | + FIELD(PARITY_ITR, PERR_AXI_QSPI_IOU, 10, 1) | ||
856 | + FIELD(PARITY_ITR, PERR_AXI_OSPI_IOU, 9, 1) | ||
857 | + FIELD(PARITY_ITR, PERR_IOU_SD1, 8, 1) | ||
858 | + FIELD(PARITY_ITR, PERR_IOU_SD0, 7, 1) | ||
859 | + FIELD(PARITY_ITR, PERR_IOU_QSPI1, 6, 1) | ||
860 | + FIELD(PARITY_ITR, PERR_IOUSLCR_SECURE_APB, 5, 1) | ||
861 | + FIELD(PARITY_ITR, PERR_IOUSLCR_APB, 4, 1) | ||
862 | + FIELD(PARITY_ITR, PERR_QSPI0_APB, 3, 1) | ||
863 | + FIELD(PARITY_ITR, PERR_OSPI_APB, 2, 1) | ||
864 | + FIELD(PARITY_ITR, PERR_I2C_APB, 1, 1) | ||
865 | + FIELD(PARITY_ITR, PERR_GPIO_APB, 0, 1) | ||
866 | +REG32(WPROT0, 0x828) | ||
867 | + FIELD(WPROT0, ACTIVE, 0, 1) | ||
868 | + | ||
869 | +static void parity_imr_update_irq(XlnxVersalPmcIouSlcr *s) | ||
870 | +{ | ||
871 | + bool pending = s->regs[R_PARITY_ISR] & ~s->regs[R_PARITY_IMR]; | ||
872 | + qemu_set_irq(s->irq_parity_imr, pending); | ||
873 | +} | 39 | +} |
874 | + | 40 | + |
875 | +static void parity_isr_postw(RegisterInfo *reg, uint64_t val64) | 41 | +static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s) |
876 | +{ | 42 | +{ |
877 | + XlnxVersalPmcIouSlcr *s = XILINX_VERSAL_PMC_IOU_SLCR(reg->opaque); | 43 | + TCGv_i32 abs_s = tcg_temp_new_i32(); |
878 | + parity_imr_update_irq(s); | 44 | + |
45 | + gen_vfp_abss(abs_s, s); | ||
46 | + tcg_gen_movcond_i32(TCG_COND_GTU, d, | ||
47 | + abs_s, tcg_constant_i32(0x7f800000UL), | ||
48 | + s, abs_s); | ||
879 | +} | 49 | +} |
880 | + | 50 | + |
881 | +static uint64_t parity_ier_prew(RegisterInfo *reg, uint64_t val64) | 51 | +static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s) |
882 | +{ | 52 | +{ |
883 | + XlnxVersalPmcIouSlcr *s = XILINX_VERSAL_PMC_IOU_SLCR(reg->opaque); | 53 | + TCGv_i64 abs_s = tcg_temp_new_i64(); |
884 | + uint32_t val = val64; | ||
885 | + | 54 | + |
886 | + s->regs[R_PARITY_IMR] &= ~val; | 55 | + gen_vfp_absd(abs_s, s); |
887 | + parity_imr_update_irq(s); | 56 | + tcg_gen_movcond_i64(TCG_COND_GTU, d, |
888 | + return 0; | 57 | + abs_s, tcg_constant_i64(0x7ff0000000000000ULL), |
58 | + s, abs_s); | ||
889 | +} | 59 | +} |
890 | + | 60 | + |
891 | +static uint64_t parity_idr_prew(RegisterInfo *reg, uint64_t val64) | 61 | static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) |
62 | { | ||
63 | if (dc->fpcr_ah) { | ||
64 | @@ -XXX,XX +XXX,XX @@ static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) | ||
65 | gen_vfp_absd(d, d); | ||
66 | } | ||
67 | |||
68 | +static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) | ||
892 | +{ | 69 | +{ |
893 | + XlnxVersalPmcIouSlcr *s = XILINX_VERSAL_PMC_IOU_SLCR(reg->opaque); | 70 | + gen_helper_vfp_subh(d, n, m, s); |
894 | + uint32_t val = val64; | 71 | + gen_vfp_ah_absh(d, d); |
895 | + | ||
896 | + s->regs[R_PARITY_IMR] |= val; | ||
897 | + parity_imr_update_irq(s); | ||
898 | + return 0; | ||
899 | +} | 72 | +} |
900 | + | 73 | + |
901 | +static uint64_t parity_itr_prew(RegisterInfo *reg, uint64_t val64) | 74 | +static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) |
902 | +{ | 75 | +{ |
903 | + XlnxVersalPmcIouSlcr *s = XILINX_VERSAL_PMC_IOU_SLCR(reg->opaque); | 76 | + gen_helper_vfp_subs(d, n, m, s); |
904 | + uint32_t val = val64; | 77 | + gen_vfp_ah_abss(d, d); |
905 | + | ||
906 | + s->regs[R_PARITY_ISR] |= val; | ||
907 | + parity_imr_update_irq(s); | ||
908 | + return 0; | ||
909 | +} | 78 | +} |
910 | + | 79 | + |
911 | +static void imr_update_irq(XlnxVersalPmcIouSlcr *s) | 80 | +static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) |
912 | +{ | 81 | +{ |
913 | + bool pending = s->regs[R_ISR] & ~s->regs[R_IMR]; | 82 | + gen_helper_vfp_subd(d, n, m, s); |
914 | + qemu_set_irq(s->irq_imr, pending); | 83 | + gen_vfp_ah_absd(d, d); |
915 | +} | 84 | +} |
916 | + | 85 | + |
917 | +static void isr_postw(RegisterInfo *reg, uint64_t val64) | 86 | static const FPScalar f_scalar_fabd = { |
918 | +{ | 87 | gen_fabd_h, |
919 | + XlnxVersalPmcIouSlcr *s = XILINX_VERSAL_PMC_IOU_SLCR(reg->opaque); | 88 | gen_fabd_s, |
920 | + imr_update_irq(s); | 89 | gen_fabd_d, |
921 | +} | 90 | }; |
922 | + | 91 | -TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd, a->rn) |
923 | +static uint64_t ier_prew(RegisterInfo *reg, uint64_t val64) | 92 | +static const FPScalar f_scalar_ah_fabd = { |
924 | +{ | 93 | + gen_fabd_ah_h, |
925 | + XlnxVersalPmcIouSlcr *s = XILINX_VERSAL_PMC_IOU_SLCR(reg->opaque); | 94 | + gen_fabd_ah_s, |
926 | + uint32_t val = val64; | 95 | + gen_fabd_ah_d, |
927 | + | ||
928 | + s->regs[R_IMR] &= ~val; | ||
929 | + imr_update_irq(s); | ||
930 | + return 0; | ||
931 | +} | ||
932 | + | ||
933 | +static uint64_t idr_prew(RegisterInfo *reg, uint64_t val64) | ||
934 | +{ | ||
935 | + XlnxVersalPmcIouSlcr *s = XILINX_VERSAL_PMC_IOU_SLCR(reg->opaque); | ||
936 | + uint32_t val = val64; | ||
937 | + | ||
938 | + s->regs[R_IMR] |= val; | ||
939 | + imr_update_irq(s); | ||
940 | + return 0; | ||
941 | +} | ||
942 | + | ||
943 | +static uint64_t itr_prew(RegisterInfo *reg, uint64_t val64) | ||
944 | +{ | ||
945 | + XlnxVersalPmcIouSlcr *s = XILINX_VERSAL_PMC_IOU_SLCR(reg->opaque); | ||
946 | + uint32_t val = val64; | ||
947 | + | ||
948 | + s->regs[R_ISR] |= val; | ||
949 | + imr_update_irq(s); | ||
950 | + return 0; | ||
951 | +} | ||
952 | + | ||
953 | +static uint64_t sd0_ctrl_reg_prew(RegisterInfo *reg, uint64_t val64) | ||
954 | +{ | ||
955 | + XlnxVersalPmcIouSlcr *s = XILINX_VERSAL_PMC_IOU_SLCR(reg->opaque); | ||
956 | + uint32_t prev = ARRAY_FIELD_EX32(s->regs, SD0_CTRL_REG, SD0_EMMC_SEL); | ||
957 | + | ||
958 | + if (prev != (val64 & R_SD0_CTRL_REG_SD0_EMMC_SEL_MASK)) { | ||
959 | + qemu_set_irq(s->sd_emmc_sel[0], !!val64); | ||
960 | + } | ||
961 | + | ||
962 | + return val64; | ||
963 | +} | ||
964 | + | ||
965 | +static uint64_t sd1_ctrl_reg_prew(RegisterInfo *reg, uint64_t val64) | ||
966 | +{ | ||
967 | + XlnxVersalPmcIouSlcr *s = XILINX_VERSAL_PMC_IOU_SLCR(reg->opaque); | ||
968 | + uint32_t prev = ARRAY_FIELD_EX32(s->regs, SD1_CTRL_REG, SD1_EMMC_SEL); | ||
969 | + | ||
970 | + if (prev != (val64 & R_SD1_CTRL_REG_SD1_EMMC_SEL_MASK)) { | ||
971 | + qemu_set_irq(s->sd_emmc_sel[1], !!val64); | ||
972 | + } | ||
973 | + | ||
974 | + return val64; | ||
975 | +} | ||
976 | + | ||
977 | +static uint64_t ospi_qspi_iou_axi_mux_sel_prew(RegisterInfo *reg, | ||
978 | + uint64_t val64) | ||
979 | +{ | ||
980 | + XlnxVersalPmcIouSlcr *s = XILINX_VERSAL_PMC_IOU_SLCR(reg->opaque); | ||
981 | + uint32_t val32 = (uint32_t) val64; | ||
982 | + uint8_t ospi_mux_sel = FIELD_EX32(val32, OSPI_QSPI_IOU_AXI_MUX_SEL, | ||
983 | + OSPI_MUX_SEL); | ||
984 | + uint8_t qspi_ospi_mux_sel = FIELD_EX32(val32, OSPI_QSPI_IOU_AXI_MUX_SEL, | ||
985 | + QSPI_OSPI_MUX_SEL); | ||
986 | + | ||
987 | + if (ospi_mux_sel != | ||
988 | + ARRAY_FIELD_EX32(s->regs, OSPI_QSPI_IOU_AXI_MUX_SEL, OSPI_MUX_SEL)) { | ||
989 | + qemu_set_irq(s->ospi_mux_sel, !!ospi_mux_sel); | ||
990 | + } | ||
991 | + | ||
992 | + if (qspi_ospi_mux_sel != | ||
993 | + ARRAY_FIELD_EX32(s->regs, OSPI_QSPI_IOU_AXI_MUX_SEL, | ||
994 | + QSPI_OSPI_MUX_SEL)) { | ||
995 | + qemu_set_irq(s->qspi_ospi_mux_sel, !!qspi_ospi_mux_sel); | ||
996 | + } | ||
997 | + | ||
998 | + return val64; | ||
999 | +} | ||
1000 | + | ||
1001 | +static RegisterAccessInfo pmc_iou_slcr_regs_info[] = { | ||
1002 | + { .name = "MIO_PIN_0", .addr = A_MIO_PIN_0, | ||
1003 | + .rsvd = 0xfffffc01, | ||
1004 | + },{ .name = "MIO_PIN_1", .addr = A_MIO_PIN_1, | ||
1005 | + .rsvd = 0xfffffc01, | ||
1006 | + },{ .name = "MIO_PIN_2", .addr = A_MIO_PIN_2, | ||
1007 | + .rsvd = 0xfffffc01, | ||
1008 | + },{ .name = "MIO_PIN_3", .addr = A_MIO_PIN_3, | ||
1009 | + .rsvd = 0xfffffc01, | ||
1010 | + },{ .name = "MIO_PIN_4", .addr = A_MIO_PIN_4, | ||
1011 | + .rsvd = 0xfffffc01, | ||
1012 | + },{ .name = "MIO_PIN_5", .addr = A_MIO_PIN_5, | ||
1013 | + .rsvd = 0xfffffc01, | ||
1014 | + },{ .name = "MIO_PIN_6", .addr = A_MIO_PIN_6, | ||
1015 | + .rsvd = 0xfffffc01, | ||
1016 | + },{ .name = "MIO_PIN_7", .addr = A_MIO_PIN_7, | ||
1017 | + .rsvd = 0xfffffc01, | ||
1018 | + },{ .name = "MIO_PIN_8", .addr = A_MIO_PIN_8, | ||
1019 | + .rsvd = 0xfffffc01, | ||
1020 | + },{ .name = "MIO_PIN_9", .addr = A_MIO_PIN_9, | ||
1021 | + .rsvd = 0xfffffc01, | ||
1022 | + },{ .name = "MIO_PIN_10", .addr = A_MIO_PIN_10, | ||
1023 | + .rsvd = 0xfffffc01, | ||
1024 | + },{ .name = "MIO_PIN_11", .addr = A_MIO_PIN_11, | ||
1025 | + .rsvd = 0xfffffc01, | ||
1026 | + },{ .name = "MIO_PIN_12", .addr = A_MIO_PIN_12, | ||
1027 | + .rsvd = 0xfffffc01, | ||
1028 | + },{ .name = "MIO_PIN_13", .addr = A_MIO_PIN_13, | ||
1029 | + .rsvd = 0xfffffc01, | ||
1030 | + },{ .name = "MIO_PIN_14", .addr = A_MIO_PIN_14, | ||
1031 | + .rsvd = 0xfffffc01, | ||
1032 | + },{ .name = "MIO_PIN_15", .addr = A_MIO_PIN_15, | ||
1033 | + .rsvd = 0xfffffc01, | ||
1034 | + },{ .name = "MIO_PIN_16", .addr = A_MIO_PIN_16, | ||
1035 | + .rsvd = 0xfffffc01, | ||
1036 | + },{ .name = "MIO_PIN_17", .addr = A_MIO_PIN_17, | ||
1037 | + .rsvd = 0xfffffc01, | ||
1038 | + },{ .name = "MIO_PIN_18", .addr = A_MIO_PIN_18, | ||
1039 | + .rsvd = 0xfffffc01, | ||
1040 | + },{ .name = "MIO_PIN_19", .addr = A_MIO_PIN_19, | ||
1041 | + .rsvd = 0xfffffc01, | ||
1042 | + },{ .name = "MIO_PIN_20", .addr = A_MIO_PIN_20, | ||
1043 | + .rsvd = 0xfffffc01, | ||
1044 | + },{ .name = "MIO_PIN_21", .addr = A_MIO_PIN_21, | ||
1045 | + .rsvd = 0xfffffc01, | ||
1046 | + },{ .name = "MIO_PIN_22", .addr = A_MIO_PIN_22, | ||
1047 | + .rsvd = 0xfffffc01, | ||
1048 | + },{ .name = "MIO_PIN_23", .addr = A_MIO_PIN_23, | ||
1049 | + .rsvd = 0xfffffc01, | ||
1050 | + },{ .name = "MIO_PIN_24", .addr = A_MIO_PIN_24, | ||
1051 | + .rsvd = 0xfffffc01, | ||
1052 | + },{ .name = "MIO_PIN_25", .addr = A_MIO_PIN_25, | ||
1053 | + .rsvd = 0xfffffc01, | ||
1054 | + },{ .name = "MIO_PIN_26", .addr = A_MIO_PIN_26, | ||
1055 | + .rsvd = 0xfffffc01, | ||
1056 | + },{ .name = "MIO_PIN_27", .addr = A_MIO_PIN_27, | ||
1057 | + .rsvd = 0xfffffc01, | ||
1058 | + },{ .name = "MIO_PIN_28", .addr = A_MIO_PIN_28, | ||
1059 | + .rsvd = 0xfffffc01, | ||
1060 | + },{ .name = "MIO_PIN_29", .addr = A_MIO_PIN_29, | ||
1061 | + .rsvd = 0xfffffc01, | ||
1062 | + },{ .name = "MIO_PIN_30", .addr = A_MIO_PIN_30, | ||
1063 | + .rsvd = 0xfffffc01, | ||
1064 | + },{ .name = "MIO_PIN_31", .addr = A_MIO_PIN_31, | ||
1065 | + .rsvd = 0xfffffc01, | ||
1066 | + },{ .name = "MIO_PIN_32", .addr = A_MIO_PIN_32, | ||
1067 | + .rsvd = 0xfffffc01, | ||
1068 | + },{ .name = "MIO_PIN_33", .addr = A_MIO_PIN_33, | ||
1069 | + .rsvd = 0xfffffc01, | ||
1070 | + },{ .name = "MIO_PIN_34", .addr = A_MIO_PIN_34, | ||
1071 | + .rsvd = 0xfffffc01, | ||
1072 | + },{ .name = "MIO_PIN_35", .addr = A_MIO_PIN_35, | ||
1073 | + .rsvd = 0xfffffc01, | ||
1074 | + },{ .name = "MIO_PIN_36", .addr = A_MIO_PIN_36, | ||
1075 | + .rsvd = 0xfffffc01, | ||
1076 | + },{ .name = "MIO_PIN_37", .addr = A_MIO_PIN_37, | ||
1077 | + .rsvd = 0xfffffc01, | ||
1078 | + },{ .name = "MIO_PIN_38", .addr = A_MIO_PIN_38, | ||
1079 | + .rsvd = 0xfffffc01, | ||
1080 | + },{ .name = "MIO_PIN_39", .addr = A_MIO_PIN_39, | ||
1081 | + .rsvd = 0xfffffc01, | ||
1082 | + },{ .name = "MIO_PIN_40", .addr = A_MIO_PIN_40, | ||
1083 | + .rsvd = 0xfffffc01, | ||
1084 | + },{ .name = "MIO_PIN_41", .addr = A_MIO_PIN_41, | ||
1085 | + .rsvd = 0xfffffc01, | ||
1086 | + },{ .name = "MIO_PIN_42", .addr = A_MIO_PIN_42, | ||
1087 | + .rsvd = 0xfffffc01, | ||
1088 | + },{ .name = "MIO_PIN_43", .addr = A_MIO_PIN_43, | ||
1089 | + .rsvd = 0xfffffc01, | ||
1090 | + },{ .name = "MIO_PIN_44", .addr = A_MIO_PIN_44, | ||
1091 | + .rsvd = 0xfffffc01, | ||
1092 | + },{ .name = "MIO_PIN_45", .addr = A_MIO_PIN_45, | ||
1093 | + .rsvd = 0xfffffc01, | ||
1094 | + },{ .name = "MIO_PIN_46", .addr = A_MIO_PIN_46, | ||
1095 | + .rsvd = 0xfffffc01, | ||
1096 | + },{ .name = "MIO_PIN_47", .addr = A_MIO_PIN_47, | ||
1097 | + .rsvd = 0xfffffc01, | ||
1098 | + },{ .name = "MIO_PIN_48", .addr = A_MIO_PIN_48, | ||
1099 | + .rsvd = 0xfffffc01, | ||
1100 | + },{ .name = "MIO_PIN_49", .addr = A_MIO_PIN_49, | ||
1101 | + .rsvd = 0xfffffc01, | ||
1102 | + },{ .name = "MIO_PIN_50", .addr = A_MIO_PIN_50, | ||
1103 | + .rsvd = 0xfffffc01, | ||
1104 | + },{ .name = "MIO_PIN_51", .addr = A_MIO_PIN_51, | ||
1105 | + .rsvd = 0xfffffc01, | ||
1106 | + },{ .name = "BNK0_EN_RX", .addr = A_BNK0_EN_RX, | ||
1107 | + .reset = 0x3ffffff, | ||
1108 | + .rsvd = 0xfc000000, | ||
1109 | + },{ .name = "BNK0_SEL_RX0", .addr = A_BNK0_SEL_RX0, | ||
1110 | + .reset = 0xffffffff, | ||
1111 | + },{ .name = "BNK0_SEL_RX1", .addr = A_BNK0_SEL_RX1, | ||
1112 | + .reset = 0xfffff, | ||
1113 | + .rsvd = 0xfff00000, | ||
1114 | + },{ .name = "BNK0_EN_RX_SCHMITT_HYST", .addr = A_BNK0_EN_RX_SCHMITT_HYST, | ||
1115 | + .rsvd = 0xfc000000, | ||
1116 | + },{ .name = "BNK0_EN_WK_PD", .addr = A_BNK0_EN_WK_PD, | ||
1117 | + .rsvd = 0xfc000000, | ||
1118 | + },{ .name = "BNK0_EN_WK_PU", .addr = A_BNK0_EN_WK_PU, | ||
1119 | + .reset = 0x3ffffff, | ||
1120 | + .rsvd = 0xfc000000, | ||
1121 | + },{ .name = "BNK0_SEL_DRV0", .addr = A_BNK0_SEL_DRV0, | ||
1122 | + .reset = 0xffffffff, | ||
1123 | + },{ .name = "BNK0_SEL_DRV1", .addr = A_BNK0_SEL_DRV1, | ||
1124 | + .reset = 0xfffff, | ||
1125 | + .rsvd = 0xfff00000, | ||
1126 | + },{ .name = "BNK0_SEL_SLEW", .addr = A_BNK0_SEL_SLEW, | ||
1127 | + .rsvd = 0xfc000000, | ||
1128 | + },{ .name = "BNK0_EN_DFT_OPT_INV", .addr = A_BNK0_EN_DFT_OPT_INV, | ||
1129 | + .rsvd = 0xfc000000, | ||
1130 | + },{ .name = "BNK0_EN_PAD2PAD_LOOPBACK", | ||
1131 | + .addr = A_BNK0_EN_PAD2PAD_LOOPBACK, | ||
1132 | + .rsvd = 0xffffe000, | ||
1133 | + },{ .name = "BNK0_RX_SPARE0", .addr = A_BNK0_RX_SPARE0, | ||
1134 | + },{ .name = "BNK0_RX_SPARE1", .addr = A_BNK0_RX_SPARE1, | ||
1135 | + .rsvd = 0xfff00000, | ||
1136 | + },{ .name = "BNK0_TX_SPARE0", .addr = A_BNK0_TX_SPARE0, | ||
1137 | + },{ .name = "BNK0_TX_SPARE1", .addr = A_BNK0_TX_SPARE1, | ||
1138 | + .rsvd = 0xfff00000, | ||
1139 | + },{ .name = "BNK0_SEL_EN1P8", .addr = A_BNK0_SEL_EN1P8, | ||
1140 | + .rsvd = 0xfffffffe, | ||
1141 | + },{ .name = "BNK0_EN_B_POR_DETECT", .addr = A_BNK0_EN_B_POR_DETECT, | ||
1142 | + .rsvd = 0xfffffffe, | ||
1143 | + },{ .name = "BNK0_LPF_BYP_POR_DETECT", .addr = A_BNK0_LPF_BYP_POR_DETECT, | ||
1144 | + .reset = 0x1, | ||
1145 | + .rsvd = 0xfffffffe, | ||
1146 | + },{ .name = "BNK0_EN_LATCH", .addr = A_BNK0_EN_LATCH, | ||
1147 | + .rsvd = 0xfffffffe, | ||
1148 | + },{ .name = "BNK0_VBG_LPF_BYP_B", .addr = A_BNK0_VBG_LPF_BYP_B, | ||
1149 | + .reset = 0x1, | ||
1150 | + .rsvd = 0xfffffffe, | ||
1151 | + },{ .name = "BNK0_EN_AMP_B", .addr = A_BNK0_EN_AMP_B, | ||
1152 | + .rsvd = 0xfffffffc, | ||
1153 | + },{ .name = "BNK0_SPARE_BIAS", .addr = A_BNK0_SPARE_BIAS, | ||
1154 | + .rsvd = 0xfffffff0, | ||
1155 | + },{ .name = "BNK0_DRIVER_BIAS", .addr = A_BNK0_DRIVER_BIAS, | ||
1156 | + .rsvd = 0xffff8000, | ||
1157 | + },{ .name = "BNK0_VMODE", .addr = A_BNK0_VMODE, | ||
1158 | + .rsvd = 0xfffffffe, | ||
1159 | + .ro = 0x1, | ||
1160 | + },{ .name = "BNK0_SEL_AUX_IO_RX", .addr = A_BNK0_SEL_AUX_IO_RX, | ||
1161 | + .rsvd = 0xfc000000, | ||
1162 | + },{ .name = "BNK0_EN_TX_HS_MODE", .addr = A_BNK0_EN_TX_HS_MODE, | ||
1163 | + .rsvd = 0xfc000000, | ||
1164 | + },{ .name = "MIO_MST_TRI0", .addr = A_MIO_MST_TRI0, | ||
1165 | + .reset = 0x3ffffff, | ||
1166 | + .rsvd = 0xfc000000, | ||
1167 | + },{ .name = "MIO_MST_TRI1", .addr = A_MIO_MST_TRI1, | ||
1168 | + .reset = 0x3ffffff, | ||
1169 | + .rsvd = 0xfc000000, | ||
1170 | + },{ .name = "BNK1_EN_RX", .addr = A_BNK1_EN_RX, | ||
1171 | + .reset = 0x3ffffff, | ||
1172 | + .rsvd = 0xfc000000, | ||
1173 | + },{ .name = "BNK1_SEL_RX0", .addr = A_BNK1_SEL_RX0, | ||
1174 | + .reset = 0xffffffff, | ||
1175 | + },{ .name = "BNK1_SEL_RX1", .addr = A_BNK1_SEL_RX1, | ||
1176 | + .reset = 0xfffff, | ||
1177 | + .rsvd = 0xfff00000, | ||
1178 | + },{ .name = "BNK1_EN_RX_SCHMITT_HYST", .addr = A_BNK1_EN_RX_SCHMITT_HYST, | ||
1179 | + .rsvd = 0xfc000000, | ||
1180 | + },{ .name = "BNK1_EN_WK_PD", .addr = A_BNK1_EN_WK_PD, | ||
1181 | + .rsvd = 0xfc000000, | ||
1182 | + },{ .name = "BNK1_EN_WK_PU", .addr = A_BNK1_EN_WK_PU, | ||
1183 | + .reset = 0x3ffffff, | ||
1184 | + .rsvd = 0xfc000000, | ||
1185 | + },{ .name = "BNK1_SEL_DRV0", .addr = A_BNK1_SEL_DRV0, | ||
1186 | + .reset = 0xffffffff, | ||
1187 | + },{ .name = "BNK1_SEL_DRV1", .addr = A_BNK1_SEL_DRV1, | ||
1188 | + .reset = 0xfffff, | ||
1189 | + .rsvd = 0xfff00000, | ||
1190 | + },{ .name = "BNK1_SEL_SLEW", .addr = A_BNK1_SEL_SLEW, | ||
1191 | + .rsvd = 0xfc000000, | ||
1192 | + },{ .name = "BNK1_EN_DFT_OPT_INV", .addr = A_BNK1_EN_DFT_OPT_INV, | ||
1193 | + .rsvd = 0xfc000000, | ||
1194 | + },{ .name = "BNK1_EN_PAD2PAD_LOOPBACK", | ||
1195 | + .addr = A_BNK1_EN_PAD2PAD_LOOPBACK, | ||
1196 | + .rsvd = 0xffffe000, | ||
1197 | + },{ .name = "BNK1_RX_SPARE0", .addr = A_BNK1_RX_SPARE0, | ||
1198 | + },{ .name = "BNK1_RX_SPARE1", .addr = A_BNK1_RX_SPARE1, | ||
1199 | + .rsvd = 0xfff00000, | ||
1200 | + },{ .name = "BNK1_TX_SPARE0", .addr = A_BNK1_TX_SPARE0, | ||
1201 | + },{ .name = "BNK1_TX_SPARE1", .addr = A_BNK1_TX_SPARE1, | ||
1202 | + .rsvd = 0xfff00000, | ||
1203 | + },{ .name = "BNK1_SEL_EN1P8", .addr = A_BNK1_SEL_EN1P8, | ||
1204 | + .rsvd = 0xfffffffe, | ||
1205 | + },{ .name = "BNK1_EN_B_POR_DETECT", .addr = A_BNK1_EN_B_POR_DETECT, | ||
1206 | + .rsvd = 0xfffffffe, | ||
1207 | + },{ .name = "BNK1_LPF_BYP_POR_DETECT", .addr = A_BNK1_LPF_BYP_POR_DETECT, | ||
1208 | + .reset = 0x1, | ||
1209 | + .rsvd = 0xfffffffe, | ||
1210 | + },{ .name = "BNK1_EN_LATCH", .addr = A_BNK1_EN_LATCH, | ||
1211 | + .rsvd = 0xfffffffe, | ||
1212 | + },{ .name = "BNK1_VBG_LPF_BYP_B", .addr = A_BNK1_VBG_LPF_BYP_B, | ||
1213 | + .reset = 0x1, | ||
1214 | + .rsvd = 0xfffffffe, | ||
1215 | + },{ .name = "BNK1_EN_AMP_B", .addr = A_BNK1_EN_AMP_B, | ||
1216 | + .rsvd = 0xfffffffc, | ||
1217 | + },{ .name = "BNK1_SPARE_BIAS", .addr = A_BNK1_SPARE_BIAS, | ||
1218 | + .rsvd = 0xfffffff0, | ||
1219 | + },{ .name = "BNK1_DRIVER_BIAS", .addr = A_BNK1_DRIVER_BIAS, | ||
1220 | + .rsvd = 0xffff8000, | ||
1221 | + },{ .name = "BNK1_VMODE", .addr = A_BNK1_VMODE, | ||
1222 | + .rsvd = 0xfffffffe, | ||
1223 | + .ro = 0x1, | ||
1224 | + },{ .name = "BNK1_SEL_AUX_IO_RX", .addr = A_BNK1_SEL_AUX_IO_RX, | ||
1225 | + .rsvd = 0xfc000000, | ||
1226 | + },{ .name = "BNK1_EN_TX_HS_MODE", .addr = A_BNK1_EN_TX_HS_MODE, | ||
1227 | + .rsvd = 0xfc000000, | ||
1228 | + },{ .name = "SD0_CLK_CTRL", .addr = A_SD0_CLK_CTRL, | ||
1229 | + .rsvd = 0xfffffff8, | ||
1230 | + },{ .name = "SD0_CTRL_REG", .addr = A_SD0_CTRL_REG, | ||
1231 | + .rsvd = 0xfffffffe, | ||
1232 | + .pre_write = sd0_ctrl_reg_prew, | ||
1233 | + },{ .name = "SD0_CONFIG_REG1", .addr = A_SD0_CONFIG_REG1, | ||
1234 | + .reset = 0x3250, | ||
1235 | + .rsvd = 0xffff8000, | ||
1236 | + },{ .name = "SD0_CONFIG_REG2", .addr = A_SD0_CONFIG_REG2, | ||
1237 | + .reset = 0xffc, | ||
1238 | + .rsvd = 0xffffc000, | ||
1239 | + },{ .name = "SD0_CONFIG_REG3", .addr = A_SD0_CONFIG_REG3, | ||
1240 | + .reset = 0x407, | ||
1241 | + .rsvd = 0xfffff800, | ||
1242 | + },{ .name = "SD0_INITPRESET", .addr = A_SD0_INITPRESET, | ||
1243 | + .reset = 0x100, | ||
1244 | + .rsvd = 0xffffe000, | ||
1245 | + },{ .name = "SD0_DSPPRESET", .addr = A_SD0_DSPPRESET, | ||
1246 | + .reset = 0x4, | ||
1247 | + .rsvd = 0xffffe000, | ||
1248 | + },{ .name = "SD0_HSPDPRESET", .addr = A_SD0_HSPDPRESET, | ||
1249 | + .reset = 0x2, | ||
1250 | + .rsvd = 0xffffe000, | ||
1251 | + },{ .name = "SD0_SDR12PRESET", .addr = A_SD0_SDR12PRESET, | ||
1252 | + .reset = 0x4, | ||
1253 | + .rsvd = 0xffffe000, | ||
1254 | + },{ .name = "SD0_SDR25PRESET", .addr = A_SD0_SDR25PRESET, | ||
1255 | + .reset = 0x2, | ||
1256 | + .rsvd = 0xffffe000, | ||
1257 | + },{ .name = "SD0_SDR50PRSET", .addr = A_SD0_SDR50PRSET, | ||
1258 | + .reset = 0x1, | ||
1259 | + .rsvd = 0xffffe000, | ||
1260 | + },{ .name = "SD0_SDR104PRST", .addr = A_SD0_SDR104PRST, | ||
1261 | + .rsvd = 0xffffe000, | ||
1262 | + },{ .name = "SD0_DDR50PRESET", .addr = A_SD0_DDR50PRESET, | ||
1263 | + .reset = 0x2, | ||
1264 | + .rsvd = 0xffffe000, | ||
1265 | + },{ .name = "SD0_MAXCUR1P8", .addr = A_SD0_MAXCUR1P8, | ||
1266 | + .rsvd = 0xffffff00, | ||
1267 | + },{ .name = "SD0_MAXCUR3P0", .addr = A_SD0_MAXCUR3P0, | ||
1268 | + .rsvd = 0xffffff00, | ||
1269 | + },{ .name = "SD0_MAXCUR3P3", .addr = A_SD0_MAXCUR3P3, | ||
1270 | + .rsvd = 0xffffff00, | ||
1271 | + },{ .name = "SD0_DLL_CTRL", .addr = A_SD0_DLL_CTRL, | ||
1272 | + .reset = 0x1, | ||
1273 | + .rsvd = 0xfffffc00, | ||
1274 | + .ro = 0x19, | ||
1275 | + },{ .name = "SD0_CDN_CTRL", .addr = A_SD0_CDN_CTRL, | ||
1276 | + .rsvd = 0xfffffffe, | ||
1277 | + },{ .name = "SD0_DLL_TEST", .addr = A_SD0_DLL_TEST, | ||
1278 | + .rsvd = 0xff000000, | ||
1279 | + },{ .name = "SD0_RX_TUNING_SEL", .addr = A_SD0_RX_TUNING_SEL, | ||
1280 | + .rsvd = 0xfffffe00, | ||
1281 | + .ro = 0x1ff, | ||
1282 | + },{ .name = "SD0_DLL_DIV_MAP0", .addr = A_SD0_DLL_DIV_MAP0, | ||
1283 | + .reset = 0x50505050, | ||
1284 | + },{ .name = "SD0_DLL_DIV_MAP1", .addr = A_SD0_DLL_DIV_MAP1, | ||
1285 | + .reset = 0x50505050, | ||
1286 | + },{ .name = "SD0_IOU_COHERENT_CTRL", .addr = A_SD0_IOU_COHERENT_CTRL, | ||
1287 | + .rsvd = 0xfffffff0, | ||
1288 | + },{ .name = "SD0_IOU_INTERCONNECT_ROUTE", | ||
1289 | + .addr = A_SD0_IOU_INTERCONNECT_ROUTE, | ||
1290 | + .rsvd = 0xfffffffe, | ||
1291 | + },{ .name = "SD0_IOU_RAM", .addr = A_SD0_IOU_RAM, | ||
1292 | + .reset = 0x24, | ||
1293 | + .rsvd = 0xffffff80, | ||
1294 | + },{ .name = "SD0_IOU_INTERCONNECT_QOS", | ||
1295 | + .addr = A_SD0_IOU_INTERCONNECT_QOS, | ||
1296 | + .rsvd = 0xfffffff0, | ||
1297 | + },{ .name = "SD1_CLK_CTRL", .addr = A_SD1_CLK_CTRL, | ||
1298 | + .rsvd = 0xfffffffc, | ||
1299 | + },{ .name = "SD1_CTRL_REG", .addr = A_SD1_CTRL_REG, | ||
1300 | + .rsvd = 0xfffffffe, | ||
1301 | + .pre_write = sd1_ctrl_reg_prew, | ||
1302 | + },{ .name = "SD1_CONFIG_REG1", .addr = A_SD1_CONFIG_REG1, | ||
1303 | + .reset = 0x3250, | ||
1304 | + .rsvd = 0xffff8000, | ||
1305 | + },{ .name = "SD1_CONFIG_REG2", .addr = A_SD1_CONFIG_REG2, | ||
1306 | + .reset = 0xffc, | ||
1307 | + .rsvd = 0xffffc000, | ||
1308 | + },{ .name = "SD1_CONFIG_REG3", .addr = A_SD1_CONFIG_REG3, | ||
1309 | + .reset = 0x407, | ||
1310 | + .rsvd = 0xfffff800, | ||
1311 | + },{ .name = "SD1_INITPRESET", .addr = A_SD1_INITPRESET, | ||
1312 | + .reset = 0x100, | ||
1313 | + .rsvd = 0xffffe000, | ||
1314 | + },{ .name = "SD1_DSPPRESET", .addr = A_SD1_DSPPRESET, | ||
1315 | + .reset = 0x4, | ||
1316 | + .rsvd = 0xffffe000, | ||
1317 | + },{ .name = "SD1_HSPDPRESET", .addr = A_SD1_HSPDPRESET, | ||
1318 | + .reset = 0x2, | ||
1319 | + .rsvd = 0xffffe000, | ||
1320 | + },{ .name = "SD1_SDR12PRESET", .addr = A_SD1_SDR12PRESET, | ||
1321 | + .reset = 0x4, | ||
1322 | + .rsvd = 0xffffe000, | ||
1323 | + },{ .name = "SD1_SDR25PRESET", .addr = A_SD1_SDR25PRESET, | ||
1324 | + .reset = 0x2, | ||
1325 | + .rsvd = 0xffffe000, | ||
1326 | + },{ .name = "SD1_SDR50PRSET", .addr = A_SD1_SDR50PRSET, | ||
1327 | + .reset = 0x1, | ||
1328 | + .rsvd = 0xffffe000, | ||
1329 | + },{ .name = "SD1_SDR104PRST", .addr = A_SD1_SDR104PRST, | ||
1330 | + .rsvd = 0xffffe000, | ||
1331 | + },{ .name = "SD1_DDR50PRESET", .addr = A_SD1_DDR50PRESET, | ||
1332 | + .reset = 0x2, | ||
1333 | + .rsvd = 0xffffe000, | ||
1334 | + },{ .name = "SD1_MAXCUR1P8", .addr = A_SD1_MAXCUR1P8, | ||
1335 | + .rsvd = 0xffffff00, | ||
1336 | + },{ .name = "SD1_MAXCUR3P0", .addr = A_SD1_MAXCUR3P0, | ||
1337 | + .rsvd = 0xffffff00, | ||
1338 | + },{ .name = "SD1_MAXCUR3P3", .addr = A_SD1_MAXCUR3P3, | ||
1339 | + .rsvd = 0xffffff00, | ||
1340 | + },{ .name = "SD1_DLL_CTRL", .addr = A_SD1_DLL_CTRL, | ||
1341 | + .reset = 0x1, | ||
1342 | + .rsvd = 0xfffffc00, | ||
1343 | + .ro = 0x19, | ||
1344 | + },{ .name = "SD1_CDN_CTRL", .addr = A_SD1_CDN_CTRL, | ||
1345 | + .rsvd = 0xfffffffe, | ||
1346 | + },{ .name = "SD1_DLL_TEST", .addr = A_SD1_DLL_TEST, | ||
1347 | + .rsvd = 0xff000000, | ||
1348 | + },{ .name = "SD1_RX_TUNING_SEL", .addr = A_SD1_RX_TUNING_SEL, | ||
1349 | + .rsvd = 0xfffffe00, | ||
1350 | + .ro = 0x1ff, | ||
1351 | + },{ .name = "SD1_DLL_DIV_MAP0", .addr = A_SD1_DLL_DIV_MAP0, | ||
1352 | + .reset = 0x50505050, | ||
1353 | + },{ .name = "SD1_DLL_DIV_MAP1", .addr = A_SD1_DLL_DIV_MAP1, | ||
1354 | + .reset = 0x50505050, | ||
1355 | + },{ .name = "SD1_IOU_COHERENT_CTRL", .addr = A_SD1_IOU_COHERENT_CTRL, | ||
1356 | + .rsvd = 0xfffffff0, | ||
1357 | + },{ .name = "SD1_IOU_INTERCONNECT_ROUTE", | ||
1358 | + .addr = A_SD1_IOU_INTERCONNECT_ROUTE, | ||
1359 | + .rsvd = 0xfffffffe, | ||
1360 | + },{ .name = "SD1_IOU_RAM", .addr = A_SD1_IOU_RAM, | ||
1361 | + .reset = 0x24, | ||
1362 | + .rsvd = 0xffffff80, | ||
1363 | + },{ .name = "SD1_IOU_INTERCONNECT_QOS", | ||
1364 | + .addr = A_SD1_IOU_INTERCONNECT_QOS, | ||
1365 | + .rsvd = 0xfffffff0, | ||
1366 | + },{ .name = "OSPI_QSPI_IOU_AXI_MUX_SEL", | ||
1367 | + .addr = A_OSPI_QSPI_IOU_AXI_MUX_SEL, | ||
1368 | + .reset = 0x1, | ||
1369 | + .rsvd = 0xfffffffc, | ||
1370 | + .pre_write = ospi_qspi_iou_axi_mux_sel_prew, | ||
1371 | + },{ .name = "QSPI_IOU_COHERENT_CTRL", .addr = A_QSPI_IOU_COHERENT_CTRL, | ||
1372 | + .rsvd = 0xfffffff0, | ||
1373 | + },{ .name = "QSPI_IOU_INTERCONNECT_ROUTE", | ||
1374 | + .addr = A_QSPI_IOU_INTERCONNECT_ROUTE, | ||
1375 | + .rsvd = 0xfffffffe, | ||
1376 | + },{ .name = "QSPI_IOU_RAM", .addr = A_QSPI_IOU_RAM, | ||
1377 | + .reset = 0x1224, | ||
1378 | + .rsvd = 0xffffc000, | ||
1379 | + },{ .name = "QSPI_IOU_INTERCONNECT_QOS", | ||
1380 | + .addr = A_QSPI_IOU_INTERCONNECT_QOS, | ||
1381 | + .rsvd = 0xfffffff0, | ||
1382 | + },{ .name = "OSPI_IOU_COHERENT_CTRL", .addr = A_OSPI_IOU_COHERENT_CTRL, | ||
1383 | + .rsvd = 0xfffffff0, | ||
1384 | + },{ .name = "OSPI_IOU_INTERCONNECT_ROUTE", | ||
1385 | + .addr = A_OSPI_IOU_INTERCONNECT_ROUTE, | ||
1386 | + .rsvd = 0xfffffffe, | ||
1387 | + },{ .name = "OSPI_IOU_RAM", .addr = A_OSPI_IOU_RAM, | ||
1388 | + .reset = 0xa, | ||
1389 | + .rsvd = 0xffffffc0, | ||
1390 | + },{ .name = "OSPI_IOU_INTERCONNECT_QOS", | ||
1391 | + .addr = A_OSPI_IOU_INTERCONNECT_QOS, | ||
1392 | + .rsvd = 0xfffffff0, | ||
1393 | + },{ .name = "OSPI_REFCLK_DLY_CTRL", .addr = A_OSPI_REFCLK_DLY_CTRL, | ||
1394 | + .reset = 0x13, | ||
1395 | + .rsvd = 0xffffffe0, | ||
1396 | + },{ .name = "CUR_PWR_ST", .addr = A_CUR_PWR_ST, | ||
1397 | + .rsvd = 0xfffffffc, | ||
1398 | + .ro = 0x3, | ||
1399 | + },{ .name = "CONNECT_ST", .addr = A_CONNECT_ST, | ||
1400 | + .rsvd = 0xfffffffe, | ||
1401 | + .ro = 0x1, | ||
1402 | + },{ .name = "PW_STATE_REQ", .addr = A_PW_STATE_REQ, | ||
1403 | + .rsvd = 0xfffffffc, | ||
1404 | + },{ .name = "HOST_U2_PORT_DISABLE", .addr = A_HOST_U2_PORT_DISABLE, | ||
1405 | + .rsvd = 0xfffffffe, | ||
1406 | + },{ .name = "DBG_U2PMU", .addr = A_DBG_U2PMU, | ||
1407 | + .ro = 0xffffffff, | ||
1408 | + },{ .name = "DBG_U2PMU_EXT1", .addr = A_DBG_U2PMU_EXT1, | ||
1409 | + .ro = 0xffffffff, | ||
1410 | + },{ .name = "DBG_U2PMU_EXT2", .addr = A_DBG_U2PMU_EXT2, | ||
1411 | + .rsvd = 0xfffffff0, | ||
1412 | + .ro = 0xf, | ||
1413 | + },{ .name = "PME_GEN_U2PMU", .addr = A_PME_GEN_U2PMU, | ||
1414 | + .rsvd = 0xfffffffe, | ||
1415 | + .ro = 0x1, | ||
1416 | + },{ .name = "PWR_CONFIG_USB2", .addr = A_PWR_CONFIG_USB2, | ||
1417 | + .rsvd = 0xc0000000, | ||
1418 | + },{ .name = "PHY_HUB", .addr = A_PHY_HUB, | ||
1419 | + .rsvd = 0xfffffffc, | ||
1420 | + .ro = 0x2, | ||
1421 | + },{ .name = "CTRL", .addr = A_CTRL, | ||
1422 | + },{ .name = "ISR", .addr = A_ISR, | ||
1423 | + .w1c = 0x1, | ||
1424 | + .post_write = isr_postw, | ||
1425 | + },{ .name = "IMR", .addr = A_IMR, | ||
1426 | + .reset = 0x1, | ||
1427 | + .ro = 0x1, | ||
1428 | + },{ .name = "IER", .addr = A_IER, | ||
1429 | + .pre_write = ier_prew, | ||
1430 | + },{ .name = "IDR", .addr = A_IDR, | ||
1431 | + .pre_write = idr_prew, | ||
1432 | + },{ .name = "ITR", .addr = A_ITR, | ||
1433 | + .pre_write = itr_prew, | ||
1434 | + },{ .name = "PARITY_ISR", .addr = A_PARITY_ISR, | ||
1435 | + .w1c = 0x1fff, | ||
1436 | + .post_write = parity_isr_postw, | ||
1437 | + },{ .name = "PARITY_IMR", .addr = A_PARITY_IMR, | ||
1438 | + .reset = 0x1fff, | ||
1439 | + .ro = 0x1fff, | ||
1440 | + },{ .name = "PARITY_IER", .addr = A_PARITY_IER, | ||
1441 | + .pre_write = parity_ier_prew, | ||
1442 | + },{ .name = "PARITY_IDR", .addr = A_PARITY_IDR, | ||
1443 | + .pre_write = parity_idr_prew, | ||
1444 | + },{ .name = "PARITY_ITR", .addr = A_PARITY_ITR, | ||
1445 | + .pre_write = parity_itr_prew, | ||
1446 | + },{ .name = "WPROT0", .addr = A_WPROT0, | ||
1447 | + .reset = 0x1, | ||
1448 | + } | ||
1449 | +}; | 96 | +}; |
1450 | + | 97 | +TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn) |
1451 | +static void xlnx_versal_pmc_iou_slcr_reset_init(Object *obj, ResetType type) | 98 | |
1452 | +{ | 99 | static const FPScalar f_scalar_frecps = { |
1453 | + XlnxVersalPmcIouSlcr *s = XILINX_VERSAL_PMC_IOU_SLCR(obj); | 100 | gen_helper_recpsf_f16, |
1454 | + unsigned int i; | 101 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1Int f_scalar_fabs = { |
1455 | + | 102 | gen_vfp_abss, |
1456 | + for (i = 0; i < ARRAY_SIZE(s->regs_info); ++i) { | 103 | gen_vfp_absd, |
1457 | + register_reset(&s->regs_info[i]); | 104 | }; |
1458 | + } | 105 | -TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs, true) |
1459 | +} | 106 | +static const FPScalar1Int f_scalar_ah_fabs = { |
1460 | + | 107 | + gen_vfp_ah_absh, |
1461 | +static void xlnx_versal_pmc_iou_slcr_reset_hold(Object *obj) | 108 | + gen_vfp_ah_abss, |
1462 | +{ | 109 | + gen_vfp_ah_absd, |
1463 | + XlnxVersalPmcIouSlcr *s = XILINX_VERSAL_PMC_IOU_SLCR(obj); | ||
1464 | + | ||
1465 | + parity_imr_update_irq(s); | ||
1466 | + imr_update_irq(s); | ||
1467 | + | ||
1468 | + /* | ||
1469 | + * Setup OSPI_QSPI mux | ||
1470 | + * By default axi slave interface is enabled for ospi-dma | ||
1471 | + */ | ||
1472 | + qemu_set_irq(s->ospi_mux_sel, 0); | ||
1473 | + qemu_set_irq(s->qspi_ospi_mux_sel, 1); | ||
1474 | +} | ||
1475 | + | ||
1476 | +static const MemoryRegionOps pmc_iou_slcr_ops = { | ||
1477 | + .read = register_read_memory, | ||
1478 | + .write = register_write_memory, | ||
1479 | + .endianness = DEVICE_LITTLE_ENDIAN, | ||
1480 | + .valid = { | ||
1481 | + .min_access_size = 4, | ||
1482 | + .max_access_size = 4, | ||
1483 | + }, | ||
1484 | +}; | 110 | +}; |
1485 | + | 111 | +TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs) |
1486 | +static void xlnx_versal_pmc_iou_slcr_realize(DeviceState *dev, Error **errp) | 112 | |
1487 | +{ | 113 | static const FPScalar1Int f_scalar_fneg = { |
1488 | + XlnxVersalPmcIouSlcr *s = XILINX_VERSAL_PMC_IOU_SLCR(dev); | 114 | gen_vfp_negh, |
1489 | + | ||
1490 | + qdev_init_gpio_out_named(dev, s->sd_emmc_sel, "sd-emmc-sel", 2); | ||
1491 | + qdev_init_gpio_out_named(dev, &s->qspi_ospi_mux_sel, | ||
1492 | + "qspi-ospi-mux-sel", 1); | ||
1493 | + qdev_init_gpio_out_named(dev, &s->ospi_mux_sel, "ospi-mux-sel", 1); | ||
1494 | +} | ||
1495 | + | ||
1496 | +static void xlnx_versal_pmc_iou_slcr_init(Object *obj) | ||
1497 | +{ | ||
1498 | + XlnxVersalPmcIouSlcr *s = XILINX_VERSAL_PMC_IOU_SLCR(obj); | ||
1499 | + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); | ||
1500 | + RegisterInfoArray *reg_array; | ||
1501 | + | ||
1502 | + memory_region_init(&s->iomem, obj, TYPE_XILINX_VERSAL_PMC_IOU_SLCR, | ||
1503 | + XILINX_VERSAL_PMC_IOU_SLCR_R_MAX * 4); | ||
1504 | + reg_array = | ||
1505 | + register_init_block32(DEVICE(obj), pmc_iou_slcr_regs_info, | ||
1506 | + ARRAY_SIZE(pmc_iou_slcr_regs_info), | ||
1507 | + s->regs_info, s->regs, | ||
1508 | + &pmc_iou_slcr_ops, | ||
1509 | + XILINX_VERSAL_PMC_IOU_SLCR_ERR_DEBUG, | ||
1510 | + XILINX_VERSAL_PMC_IOU_SLCR_R_MAX * 4); | ||
1511 | + memory_region_add_subregion(&s->iomem, | ||
1512 | + 0x0, | ||
1513 | + ®_array->mem); | ||
1514 | + sysbus_init_mmio(sbd, &s->iomem); | ||
1515 | + sysbus_init_irq(sbd, &s->irq_parity_imr); | ||
1516 | + sysbus_init_irq(sbd, &s->irq_imr); | ||
1517 | +} | ||
1518 | + | ||
1519 | +static const VMStateDescription vmstate_pmc_iou_slcr = { | ||
1520 | + .name = TYPE_XILINX_VERSAL_PMC_IOU_SLCR, | ||
1521 | + .version_id = 1, | ||
1522 | + .minimum_version_id = 1, | ||
1523 | + .fields = (VMStateField[]) { | ||
1524 | + VMSTATE_UINT32_ARRAY(regs, XlnxVersalPmcIouSlcr, | ||
1525 | + XILINX_VERSAL_PMC_IOU_SLCR_R_MAX), | ||
1526 | + VMSTATE_END_OF_LIST(), | ||
1527 | + } | ||
1528 | +}; | ||
1529 | + | ||
1530 | +static void xlnx_versal_pmc_iou_slcr_class_init(ObjectClass *klass, void *data) | ||
1531 | +{ | ||
1532 | + DeviceClass *dc = DEVICE_CLASS(klass); | ||
1533 | + ResettableClass *rc = RESETTABLE_CLASS(klass); | ||
1534 | + | ||
1535 | + dc->realize = xlnx_versal_pmc_iou_slcr_realize; | ||
1536 | + dc->vmsd = &vmstate_pmc_iou_slcr; | ||
1537 | + rc->phases.enter = xlnx_versal_pmc_iou_slcr_reset_init; | ||
1538 | + rc->phases.hold = xlnx_versal_pmc_iou_slcr_reset_hold; | ||
1539 | +} | ||
1540 | + | ||
1541 | +static const TypeInfo xlnx_versal_pmc_iou_slcr_info = { | ||
1542 | + .name = TYPE_XILINX_VERSAL_PMC_IOU_SLCR, | ||
1543 | + .parent = TYPE_SYS_BUS_DEVICE, | ||
1544 | + .instance_size = sizeof(XlnxVersalPmcIouSlcr), | ||
1545 | + .class_init = xlnx_versal_pmc_iou_slcr_class_init, | ||
1546 | + .instance_init = xlnx_versal_pmc_iou_slcr_init, | ||
1547 | +}; | ||
1548 | + | ||
1549 | +static void xlnx_versal_pmc_iou_slcr_register_types(void) | ||
1550 | +{ | ||
1551 | + type_register_static(&xlnx_versal_pmc_iou_slcr_info); | ||
1552 | +} | ||
1553 | + | ||
1554 | +type_init(xlnx_versal_pmc_iou_slcr_register_types) | ||
1555 | diff --git a/hw/misc/meson.build b/hw/misc/meson.build | ||
1556 | index XXXXXXX..XXXXXXX 100644 | ||
1557 | --- a/hw/misc/meson.build | ||
1558 | +++ b/hw/misc/meson.build | ||
1559 | @@ -XXX,XX +XXX,XX @@ softmmu_ss.add(when: 'CONFIG_RASPI', if_true: files( | ||
1560 | )) | ||
1561 | softmmu_ss.add(when: 'CONFIG_SLAVIO', if_true: files('slavio_misc.c')) | ||
1562 | softmmu_ss.add(when: 'CONFIG_ZYNQ', if_true: files('zynq_slcr.c')) | ||
1563 | -softmmu_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: files('xlnx-versal-xramc.c')) | ||
1564 | +softmmu_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: files( | ||
1565 | + 'xlnx-versal-xramc.c', | ||
1566 | + 'xlnx-versal-pmc-iou-slcr.c', | ||
1567 | +)) | ||
1568 | softmmu_ss.add(when: 'CONFIG_STM32F2XX_SYSCFG', if_true: files('stm32f2xx_syscfg.c')) | ||
1569 | softmmu_ss.add(when: 'CONFIG_STM32F4XX_SYSCFG', if_true: files('stm32f4xx_syscfg.c')) | ||
1570 | softmmu_ss.add(when: 'CONFIG_STM32F4XX_EXTI', if_true: files('stm32f4xx_exti.c')) | ||
1571 | -- | 115 | -- |
1572 | 2.25.1 | 116 | 2.34.1 |
1573 | |||
1574 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Split the handling of vector FABD so that it calls a different set | ||
2 | of helpers when FPCR.AH is 1, which implement the "no negation of | ||
3 | the sign of a NaN" semantics. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/helper.h | 4 ++++ | ||
9 | target/arm/tcg/translate-a64.c | 7 ++++++- | ||
10 | target/arm/tcg/vec_helper.c | 23 +++++++++++++++++++++++ | ||
11 | 3 files changed, 33 insertions(+), 1 deletion(-) | ||
12 | |||
13 | diff --git a/target/arm/helper.h b/target/arm/helper.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/arm/helper.h | ||
16 | +++ b/target/arm/helper.h | ||
17 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
18 | DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
19 | DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
20 | |||
21 | +DEF_HELPER_FLAGS_5(gvec_ah_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_5(gvec_ah_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
23 | +DEF_HELPER_FLAGS_5(gvec_ah_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
24 | + | ||
25 | DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
26 | DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
27 | DEF_HELPER_FLAGS_5(gvec_fceq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | ||
28 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/arm/tcg/translate-a64.c | ||
31 | +++ b/target/arm/tcg/translate-a64.c | ||
32 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { | ||
33 | gen_helper_gvec_fabd_s, | ||
34 | gen_helper_gvec_fabd_d, | ||
35 | }; | ||
36 | -TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd) | ||
37 | +static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = { | ||
38 | + gen_helper_gvec_ah_fabd_h, | ||
39 | + gen_helper_gvec_ah_fabd_s, | ||
40 | + gen_helper_gvec_ah_fabd_d, | ||
41 | +}; | ||
42 | +TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd) | ||
43 | |||
44 | static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { | ||
45 | gen_helper_gvec_recps_h, | ||
46 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/arm/tcg/vec_helper.c | ||
49 | +++ b/target/arm/tcg/vec_helper.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static float64 float64_abd(float64 op1, float64 op2, float_status *stat) | ||
51 | return float64_abs(float64_sub(op1, op2, stat)); | ||
52 | } | ||
53 | |||
54 | +/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */ | ||
55 | +static float16 float16_ah_abd(float16 op1, float16 op2, float_status *stat) | ||
56 | +{ | ||
57 | + float16 r = float16_sub(op1, op2, stat); | ||
58 | + return float16_is_any_nan(r) ? r : float16_abs(r); | ||
59 | +} | ||
60 | + | ||
61 | +static float32 float32_ah_abd(float32 op1, float32 op2, float_status *stat) | ||
62 | +{ | ||
63 | + float32 r = float32_sub(op1, op2, stat); | ||
64 | + return float32_is_any_nan(r) ? r : float32_abs(r); | ||
65 | +} | ||
66 | + | ||
67 | +static float64 float64_ah_abd(float64 op1, float64 op2, float_status *stat) | ||
68 | +{ | ||
69 | + float64 r = float64_sub(op1, op2, stat); | ||
70 | + return float64_is_any_nan(r) ? r : float64_abs(r); | ||
71 | +} | ||
72 | + | ||
73 | /* | ||
74 | * Reciprocal step. These are the AArch32 version which uses a | ||
75 | * non-fused multiply-and-subtract. | ||
76 | @@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_fabd_h, float16_abd, float16) | ||
77 | DO_3OP(gvec_fabd_s, float32_abd, float32) | ||
78 | DO_3OP(gvec_fabd_d, float64_abd, float64) | ||
79 | |||
80 | +DO_3OP(gvec_ah_fabd_h, float16_ah_abd, float16) | ||
81 | +DO_3OP(gvec_ah_fabd_s, float32_ah_abd, float32) | ||
82 | +DO_3OP(gvec_ah_fabd_d, float64_ah_abd, float64) | ||
83 | + | ||
84 | DO_3OP(gvec_fceq_h, float16_ceq, float16) | ||
85 | DO_3OP(gvec_fceq_s, float32_ceq, float32) | ||
86 | DO_3OP(gvec_fceq_d, float64_ceq, float64) | ||
87 | -- | ||
88 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Make SVE FNEG honour the FPCR.AH "don't negate the sign of a NaN" | ||
2 | semantics. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 4 ++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 7 ++++++- | ||
10 | 3 files changed, 18 insertions(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
17 | DEF_HELPER_FLAGS_4(sve_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
18 | DEF_HELPER_FLAGS_4(sve_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_4(sve_ah_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
21 | +DEF_HELPER_FLAGS_4(sve_ah_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
22 | +DEF_HELPER_FLAGS_4(sve_ah_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
23 | + | ||
24 | DEF_HELPER_FLAGS_4(sve_not_zpz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
25 | DEF_HELPER_FLAGS_4(sve_not_zpz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
26 | DEF_HELPER_FLAGS_4(sve_not_zpz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
27 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/arm/tcg/sve_helper.c | ||
30 | +++ b/target/arm/tcg/sve_helper.c | ||
31 | @@ -XXX,XX +XXX,XX @@ DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG) | ||
32 | DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG) | ||
33 | DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG) | ||
34 | |||
35 | +#define DO_AH_FNEG_H(N) (float16_is_any_nan(N) ? (N) : DO_FNEG(N)) | ||
36 | +#define DO_AH_FNEG_S(N) (float32_is_any_nan(N) ? (N) : DO_FNEG(N)) | ||
37 | +#define DO_AH_FNEG_D(N) (float64_is_any_nan(N) ? (N) : DO_FNEG(N)) | ||
38 | + | ||
39 | +DO_ZPZ(sve_ah_fneg_h, uint16_t, H1_2, DO_AH_FNEG_H) | ||
40 | +DO_ZPZ(sve_ah_fneg_s, uint32_t, H1_4, DO_AH_FNEG_S) | ||
41 | +DO_ZPZ_D(sve_ah_fneg_d, uint64_t, DO_AH_FNEG_D) | ||
42 | + | ||
43 | #define DO_NOT(N) (~N) | ||
44 | |||
45 | DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT) | ||
46 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/arm/tcg/translate-sve.c | ||
49 | +++ b/target/arm/tcg/translate-sve.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const fneg_fns[4] = { | ||
51 | NULL, gen_helper_sve_fneg_h, | ||
52 | gen_helper_sve_fneg_s, gen_helper_sve_fneg_d, | ||
53 | }; | ||
54 | -TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0) | ||
55 | +static gen_helper_gvec_3 * const fneg_ah_fns[4] = { | ||
56 | + NULL, gen_helper_sve_ah_fneg_h, | ||
57 | + gen_helper_sve_ah_fneg_s, gen_helper_sve_ah_fneg_d, | ||
58 | +}; | ||
59 | +TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, | ||
60 | + s->fpcr_ah ? fneg_ah_fns[a->esz] : fneg_fns[a->esz], a, 0) | ||
61 | |||
62 | static gen_helper_gvec_3 * const sxtb_fns[4] = { | ||
63 | NULL, gen_helper_sve_sxtb_h, | ||
64 | -- | ||
65 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Make SVE FABS honour the FPCR.AH "don't negate the sign of a NaN" | ||
2 | semantics. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 4 ++++ | ||
8 | target/arm/tcg/sve_helper.c | 8 ++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 7 ++++++- | ||
10 | 3 files changed, 18 insertions(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
17 | DEF_HELPER_FLAGS_4(sve_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
18 | DEF_HELPER_FLAGS_4(sve_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_4(sve_ah_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
21 | +DEF_HELPER_FLAGS_4(sve_ah_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
22 | +DEF_HELPER_FLAGS_4(sve_ah_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
23 | + | ||
24 | DEF_HELPER_FLAGS_4(sve_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
25 | DEF_HELPER_FLAGS_4(sve_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
26 | DEF_HELPER_FLAGS_4(sve_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
27 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/arm/tcg/sve_helper.c | ||
30 | +++ b/target/arm/tcg/sve_helper.c | ||
31 | @@ -XXX,XX +XXX,XX @@ DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS) | ||
32 | DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS) | ||
33 | DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS) | ||
34 | |||
35 | +#define DO_AH_FABS_H(N) (float16_is_any_nan(N) ? (N) : DO_FABS(N)) | ||
36 | +#define DO_AH_FABS_S(N) (float32_is_any_nan(N) ? (N) : DO_FABS(N)) | ||
37 | +#define DO_AH_FABS_D(N) (float64_is_any_nan(N) ? (N) : DO_FABS(N)) | ||
38 | + | ||
39 | +DO_ZPZ(sve_ah_fabs_h, uint16_t, H1_2, DO_AH_FABS_H) | ||
40 | +DO_ZPZ(sve_ah_fabs_s, uint32_t, H1_4, DO_AH_FABS_S) | ||
41 | +DO_ZPZ_D(sve_ah_fabs_d, uint64_t, DO_AH_FABS_D) | ||
42 | + | ||
43 | #define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1)) | ||
44 | |||
45 | DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG) | ||
46 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/arm/tcg/translate-sve.c | ||
49 | +++ b/target/arm/tcg/translate-sve.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const fabs_fns[4] = { | ||
51 | NULL, gen_helper_sve_fabs_h, | ||
52 | gen_helper_sve_fabs_s, gen_helper_sve_fabs_d, | ||
53 | }; | ||
54 | -TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0) | ||
55 | +static gen_helper_gvec_3 * const fabs_ah_fns[4] = { | ||
56 | + NULL, gen_helper_sve_ah_fabs_h, | ||
57 | + gen_helper_sve_ah_fabs_s, gen_helper_sve_ah_fabs_d, | ||
58 | +}; | ||
59 | +TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, | ||
60 | + s->fpcr_ah ? fabs_ah_fns[a->esz] : fabs_fns[a->esz], a, 0) | ||
61 | |||
62 | static gen_helper_gvec_3 * const fneg_fns[4] = { | ||
63 | NULL, gen_helper_sve_fneg_h, | ||
64 | -- | ||
65 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Make the SVE FABD insn honour the FPCR.AH "don't negate the sign | ||
2 | of a NaN" semantics. | ||
1 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/arm/tcg/helper-sve.h | 7 +++++++ | ||
8 | target/arm/tcg/sve_helper.c | 22 ++++++++++++++++++++++ | ||
9 | target/arm/tcg/translate-sve.c | 2 +- | ||
10 | 3 files changed, 30 insertions(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/arm/tcg/helper-sve.h | ||
15 | +++ b/target/arm/tcg/helper-sve.h | ||
16 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(sve_fabd_s, TCG_CALL_NO_RWG, | ||
17 | DEF_HELPER_FLAGS_6(sve_fabd_d, TCG_CALL_NO_RWG, | ||
18 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
19 | |||
20 | +DEF_HELPER_FLAGS_6(sve_ah_fabd_h, TCG_CALL_NO_RWG, | ||
21 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
22 | +DEF_HELPER_FLAGS_6(sve_ah_fabd_s, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_6(sve_ah_fabd_d, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
26 | + | ||
27 | DEF_HELPER_FLAGS_6(sve_fscalbn_h, TCG_CALL_NO_RWG, | ||
28 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
29 | DEF_HELPER_FLAGS_6(sve_fscalbn_s, TCG_CALL_NO_RWG, | ||
30 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/target/arm/tcg/sve_helper.c | ||
33 | +++ b/target/arm/tcg/sve_helper.c | ||
34 | @@ -XXX,XX +XXX,XX @@ static inline float64 abd_d(float64 a, float64 b, float_status *s) | ||
35 | return float64_abs(float64_sub(a, b, s)); | ||
36 | } | ||
37 | |||
38 | +/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */ | ||
39 | +static float16 ah_abd_h(float16 op1, float16 op2, float_status *stat) | ||
40 | +{ | ||
41 | + float16 r = float16_sub(op1, op2, stat); | ||
42 | + return float16_is_any_nan(r) ? r : float16_abs(r); | ||
43 | +} | ||
44 | + | ||
45 | +static float32 ah_abd_s(float32 op1, float32 op2, float_status *stat) | ||
46 | +{ | ||
47 | + float32 r = float32_sub(op1, op2, stat); | ||
48 | + return float32_is_any_nan(r) ? r : float32_abs(r); | ||
49 | +} | ||
50 | + | ||
51 | +static float64 ah_abd_d(float64 op1, float64 op2, float_status *stat) | ||
52 | +{ | ||
53 | + float64 r = float64_sub(op1, op2, stat); | ||
54 | + return float64_is_any_nan(r) ? r : float64_abs(r); | ||
55 | +} | ||
56 | + | ||
57 | DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h) | ||
58 | DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s) | ||
59 | DO_ZPZZ_FP(sve_fabd_d, uint64_t, H1_8, abd_d) | ||
60 | +DO_ZPZZ_FP(sve_ah_fabd_h, uint16_t, H1_2, ah_abd_h) | ||
61 | +DO_ZPZZ_FP(sve_ah_fabd_s, uint32_t, H1_4, ah_abd_s) | ||
62 | +DO_ZPZZ_FP(sve_ah_fabd_d, uint64_t, H1_8, ah_abd_d) | ||
63 | |||
64 | static inline float64 scalbn_d(float64 a, int64_t b, float_status *s) | ||
65 | { | ||
66 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
67 | index XXXXXXX..XXXXXXX 100644 | ||
68 | --- a/target/arm/tcg/translate-sve.c | ||
69 | +++ b/target/arm/tcg/translate-sve.c | ||
70 | @@ -XXX,XX +XXX,XX @@ DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) | ||
71 | DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) | ||
72 | DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) | ||
73 | DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) | ||
74 | -DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) | ||
75 | +DO_ZPZZ_AH_FP(FABD, aa64_sve, sve_fabd, sve_ah_fabd) | ||
76 | DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) | ||
77 | DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) | ||
78 | DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx) | ||
79 | -- | ||
80 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The negation steps in FCADD must honour FPCR.AH's "don't change the | ||
2 | sign of a NaN" semantics. Implement this in the same way we did for | ||
3 | the base ASIMD FCADD, by encoding FPCR.AH into the SIMD data field | ||
4 | passed to the helper and using that to decide whether to negate the | ||
5 | values. | ||
1 | 6 | ||
7 | The construction of neg_imag and neg_real were done to make it easy | ||
8 | to apply both in parallel with two simple logical operations. This | ||
9 | changed with FPCR.AH, which is more complex than that. Switch to | ||
10 | an approach that follows the pseudocode more closely, by extracting | ||
11 | the 'rot=1' parameter from the SIMD data field and changing the | ||
12 | sign of the appropriate input value. | ||
13 | |||
14 | Note that there was a naming issue with neg_imag and neg_real. | ||
15 | They were named backward, with neg_imag being non-zero for rot=1, | ||
16 | and vice versa. This was combined with reversed usage within the | ||
17 | loop, so that the negation in the end turned out correct. | ||
18 | |||
19 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
20 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
21 | --- | ||
22 | target/arm/tcg/vec_internal.h | 17 ++++++++++++++ | ||
23 | target/arm/tcg/sve_helper.c | 42 ++++++++++++++++++++++++---------- | ||
24 | target/arm/tcg/translate-sve.c | 2 +- | ||
25 | 3 files changed, 48 insertions(+), 13 deletions(-) | ||
26 | |||
27 | diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/arm/tcg/vec_internal.h | ||
30 | +++ b/target/arm/tcg/vec_internal.h | ||
31 | @@ -XXX,XX +XXX,XX @@ | ||
32 | #ifndef TARGET_ARM_VEC_INTERNAL_H | ||
33 | #define TARGET_ARM_VEC_INTERNAL_H | ||
34 | |||
35 | +#include "fpu/softfloat.h" | ||
36 | + | ||
37 | /* | ||
38 | * Note that vector data is stored in host-endian 64-bit chunks, | ||
39 | * so addressing units smaller than that needs a host-endian fixup. | ||
40 | @@ -XXX,XX +XXX,XX @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, | ||
41 | */ | ||
42 | bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp); | ||
43 | |||
44 | +static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah) | ||
45 | +{ | ||
46 | + return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a); | ||
47 | +} | ||
48 | + | ||
49 | +static inline float32 float32_maybe_ah_chs(float32 a, bool fpcr_ah) | ||
50 | +{ | ||
51 | + return fpcr_ah && float32_is_any_nan(a) ? a : float32_chs(a); | ||
52 | +} | ||
53 | + | ||
54 | +static inline float64 float64_maybe_ah_chs(float64 a, bool fpcr_ah) | ||
55 | +{ | ||
56 | + return fpcr_ah && float64_is_any_nan(a) ? a : float64_chs(a); | ||
57 | +} | ||
58 | + | ||
59 | #endif /* TARGET_ARM_VEC_INTERNAL_H */ | ||
60 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/target/arm/tcg/sve_helper.c | ||
63 | +++ b/target/arm/tcg/sve_helper.c | ||
64 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, | ||
65 | { | ||
66 | intptr_t j, i = simd_oprsz(desc); | ||
67 | uint64_t *g = vg; | ||
68 | - float16 neg_imag = float16_set_sign(0, simd_data(desc)); | ||
69 | - float16 neg_real = float16_chs(neg_imag); | ||
70 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
71 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
72 | |||
73 | do { | ||
74 | uint64_t pg = g[(i - 1) >> 6]; | ||
75 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, | ||
76 | i -= 2 * sizeof(float16); | ||
77 | |||
78 | e0 = *(float16 *)(vn + H1_2(i)); | ||
79 | - e1 = *(float16 *)(vm + H1_2(j)) ^ neg_real; | ||
80 | + e1 = *(float16 *)(vm + H1_2(j)); | ||
81 | e2 = *(float16 *)(vn + H1_2(j)); | ||
82 | - e3 = *(float16 *)(vm + H1_2(i)) ^ neg_imag; | ||
83 | + e3 = *(float16 *)(vm + H1_2(i)); | ||
84 | + | ||
85 | + if (rot) { | ||
86 | + e3 = float16_maybe_ah_chs(e3, fpcr_ah); | ||
87 | + } else { | ||
88 | + e1 = float16_maybe_ah_chs(e1, fpcr_ah); | ||
89 | + } | ||
90 | |||
91 | if (likely((pg >> (i & 63)) & 1)) { | ||
92 | *(float16 *)(vd + H1_2(i)) = float16_add(e0, e1, s); | ||
93 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, | ||
94 | { | ||
95 | intptr_t j, i = simd_oprsz(desc); | ||
96 | uint64_t *g = vg; | ||
97 | - float32 neg_imag = float32_set_sign(0, simd_data(desc)); | ||
98 | - float32 neg_real = float32_chs(neg_imag); | ||
99 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
100 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
101 | |||
102 | do { | ||
103 | uint64_t pg = g[(i - 1) >> 6]; | ||
104 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, | ||
105 | i -= 2 * sizeof(float32); | ||
106 | |||
107 | e0 = *(float32 *)(vn + H1_2(i)); | ||
108 | - e1 = *(float32 *)(vm + H1_2(j)) ^ neg_real; | ||
109 | + e1 = *(float32 *)(vm + H1_2(j)); | ||
110 | e2 = *(float32 *)(vn + H1_2(j)); | ||
111 | - e3 = *(float32 *)(vm + H1_2(i)) ^ neg_imag; | ||
112 | + e3 = *(float32 *)(vm + H1_2(i)); | ||
113 | + | ||
114 | + if (rot) { | ||
115 | + e3 = float32_maybe_ah_chs(e3, fpcr_ah); | ||
116 | + } else { | ||
117 | + e1 = float32_maybe_ah_chs(e1, fpcr_ah); | ||
118 | + } | ||
119 | |||
120 | if (likely((pg >> (i & 63)) & 1)) { | ||
121 | *(float32 *)(vd + H1_2(i)) = float32_add(e0, e1, s); | ||
122 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, | ||
123 | { | ||
124 | intptr_t j, i = simd_oprsz(desc); | ||
125 | uint64_t *g = vg; | ||
126 | - float64 neg_imag = float64_set_sign(0, simd_data(desc)); | ||
127 | - float64 neg_real = float64_chs(neg_imag); | ||
128 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
129 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
130 | |||
131 | do { | ||
132 | uint64_t pg = g[(i - 1) >> 6]; | ||
133 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, | ||
134 | i -= 2 * sizeof(float64); | ||
135 | |||
136 | e0 = *(float64 *)(vn + H1_2(i)); | ||
137 | - e1 = *(float64 *)(vm + H1_2(j)) ^ neg_real; | ||
138 | + e1 = *(float64 *)(vm + H1_2(j)); | ||
139 | e2 = *(float64 *)(vn + H1_2(j)); | ||
140 | - e3 = *(float64 *)(vm + H1_2(i)) ^ neg_imag; | ||
141 | + e3 = *(float64 *)(vm + H1_2(i)); | ||
142 | + | ||
143 | + if (rot) { | ||
144 | + e3 = float64_maybe_ah_chs(e3, fpcr_ah); | ||
145 | + } else { | ||
146 | + e1 = float64_maybe_ah_chs(e1, fpcr_ah); | ||
147 | + } | ||
148 | |||
149 | if (likely((pg >> (i & 63)) & 1)) { | ||
150 | *(float64 *)(vd + H1_2(i)) = float64_add(e0, e1, s); | ||
151 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
152 | index XXXXXXX..XXXXXXX 100644 | ||
153 | --- a/target/arm/tcg/translate-sve.c | ||
154 | +++ b/target/arm/tcg/translate-sve.c | ||
155 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_4_ptr * const fcadd_fns[] = { | ||
156 | gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d, | ||
157 | }; | ||
158 | TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], | ||
159 | - a->rd, a->rn, a->rm, a->pg, a->rot, | ||
160 | + a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), | ||
161 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
162 | |||
163 | #define DO_FMLA(NAME, name) \ | ||
164 | -- | ||
165 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The negation steps in FCADD must honour FPCR.AH's "don't change the | ||
2 | sign of a NaN" semantics. Implement this by encoding FPCR.AH into | ||
3 | the SIMD data field passed to the helper and using that to decide | ||
4 | whether to negate the values. | ||
1 | 5 | ||
6 | The construction of neg_imag and neg_real were done to make it easy | ||
7 | to apply both in parallel with two simple logical operations. This | ||
8 | changed with FPCR.AH, which is more complex than that. Switch to | ||
9 | an approach closer to the pseudocode, where we extract the rot | ||
10 | parameter from the SIMD data word and negate the appropriate | ||
11 | input value. | ||
12 | |||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
14 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
15 | --- | ||
16 | target/arm/tcg/translate-a64.c | 10 +++++-- | ||
17 | target/arm/tcg/vec_helper.c | 54 +++++++++++++++++++--------------- | ||
18 | 2 files changed, 38 insertions(+), 26 deletions(-) | ||
19 | |||
20 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/tcg/translate-a64.c | ||
23 | +++ b/target/arm/tcg/translate-a64.c | ||
24 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = { | ||
25 | gen_helper_gvec_fcadds, | ||
26 | gen_helper_gvec_fcaddd, | ||
27 | }; | ||
28 | -TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd) | ||
29 | -TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd) | ||
30 | +/* | ||
31 | + * Encode FPCR.AH into the data so the helper knows whether the | ||
32 | + * negations it does should avoid flipping the sign bit on a NaN | ||
33 | + */ | ||
34 | +TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1), | ||
35 | + f_vector_fcadd) | ||
36 | +TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1), | ||
37 | + f_vector_fcadd) | ||
38 | |||
39 | static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) | ||
40 | { | ||
41 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/arm/tcg/vec_helper.c | ||
44 | +++ b/target/arm/tcg/vec_helper.c | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm, | ||
46 | float16 *d = vd; | ||
47 | float16 *n = vn; | ||
48 | float16 *m = vm; | ||
49 | - uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
50 | - uint32_t neg_imag = neg_real ^ 1; | ||
51 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
52 | + bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); | ||
53 | uintptr_t i; | ||
54 | |||
55 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
56 | - neg_real <<= 15; | ||
57 | - neg_imag <<= 15; | ||
58 | - | ||
59 | for (i = 0; i < opr_sz / 2; i += 2) { | ||
60 | float16 e0 = n[H2(i)]; | ||
61 | - float16 e1 = m[H2(i + 1)] ^ neg_imag; | ||
62 | + float16 e1 = m[H2(i + 1)]; | ||
63 | float16 e2 = n[H2(i + 1)]; | ||
64 | - float16 e3 = m[H2(i)] ^ neg_real; | ||
65 | + float16 e3 = m[H2(i)]; | ||
66 | + | ||
67 | + if (rot) { | ||
68 | + e3 = float16_maybe_ah_chs(e3, fpcr_ah); | ||
69 | + } else { | ||
70 | + e1 = float16_maybe_ah_chs(e1, fpcr_ah); | ||
71 | + } | ||
72 | |||
73 | d[H2(i)] = float16_add(e0, e1, fpst); | ||
74 | d[H2(i + 1)] = float16_add(e2, e3, fpst); | ||
75 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcadds)(void *vd, void *vn, void *vm, | ||
76 | float32 *d = vd; | ||
77 | float32 *n = vn; | ||
78 | float32 *m = vm; | ||
79 | - uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
80 | - uint32_t neg_imag = neg_real ^ 1; | ||
81 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
82 | + bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); | ||
83 | uintptr_t i; | ||
84 | |||
85 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
86 | - neg_real <<= 31; | ||
87 | - neg_imag <<= 31; | ||
88 | - | ||
89 | for (i = 0; i < opr_sz / 4; i += 2) { | ||
90 | float32 e0 = n[H4(i)]; | ||
91 | - float32 e1 = m[H4(i + 1)] ^ neg_imag; | ||
92 | + float32 e1 = m[H4(i + 1)]; | ||
93 | float32 e2 = n[H4(i + 1)]; | ||
94 | - float32 e3 = m[H4(i)] ^ neg_real; | ||
95 | + float32 e3 = m[H4(i)]; | ||
96 | + | ||
97 | + if (rot) { | ||
98 | + e3 = float32_maybe_ah_chs(e3, fpcr_ah); | ||
99 | + } else { | ||
100 | + e1 = float32_maybe_ah_chs(e1, fpcr_ah); | ||
101 | + } | ||
102 | |||
103 | d[H4(i)] = float32_add(e0, e1, fpst); | ||
104 | d[H4(i + 1)] = float32_add(e2, e3, fpst); | ||
105 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcaddd)(void *vd, void *vn, void *vm, | ||
106 | float64 *d = vd; | ||
107 | float64 *n = vn; | ||
108 | float64 *m = vm; | ||
109 | - uint64_t neg_real = extract64(desc, SIMD_DATA_SHIFT, 1); | ||
110 | - uint64_t neg_imag = neg_real ^ 1; | ||
111 | + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
112 | + bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); | ||
113 | uintptr_t i; | ||
114 | |||
115 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
116 | - neg_real <<= 63; | ||
117 | - neg_imag <<= 63; | ||
118 | - | ||
119 | for (i = 0; i < opr_sz / 8; i += 2) { | ||
120 | float64 e0 = n[i]; | ||
121 | - float64 e1 = m[i + 1] ^ neg_imag; | ||
122 | + float64 e1 = m[i + 1]; | ||
123 | float64 e2 = n[i + 1]; | ||
124 | - float64 e3 = m[i] ^ neg_real; | ||
125 | + float64 e3 = m[i]; | ||
126 | + | ||
127 | + if (rot) { | ||
128 | + e3 = float64_maybe_ah_chs(e3, fpcr_ah); | ||
129 | + } else { | ||
130 | + e1 = float64_maybe_ah_chs(e1, fpcr_ah); | ||
131 | + } | ||
132 | |||
133 | d[i] = float64_add(e0, e1, fpst); | ||
134 | d[i + 1] = float64_add(e2, e3, fpst); | ||
135 | -- | ||
136 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle the FPCR.AH semantics that we do not change the sign of an | ||
2 | input NaN in the FRECPS and FRSQRTS scalar insns, by providing | ||
3 | new helper functions that do the CHS part of the operation | ||
4 | differently. | ||
1 | 5 | ||
6 | Since the extra helper functions would be very repetitive if written | ||
7 | out longhand, we condense them and the existing non-AH helpers into | ||
8 | being emitted via macros. | ||
9 | |||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | ||
13 | target/arm/tcg/helper-a64.h | 6 ++ | ||
14 | target/arm/tcg/vec_internal.h | 18 ++++++ | ||
15 | target/arm/tcg/helper-a64.c | 115 ++++++++++++--------------------- | ||
16 | target/arm/tcg/translate-a64.c | 25 +++++-- | ||
17 | 4 files changed, 83 insertions(+), 81 deletions(-) | ||
18 | |||
19 | diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/target/arm/tcg/helper-a64.h | ||
22 | +++ b/target/arm/tcg/helper-a64.h | ||
23 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, fpst) | ||
24 | DEF_HELPER_FLAGS_3(recpsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) | ||
25 | DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) | ||
26 | DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) | ||
27 | +DEF_HELPER_FLAGS_3(recpsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) | ||
28 | +DEF_HELPER_FLAGS_3(recpsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) | ||
29 | +DEF_HELPER_FLAGS_3(recpsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) | ||
30 | DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) | ||
31 | DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) | ||
32 | DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) | ||
33 | +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) | ||
34 | +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) | ||
35 | +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) | ||
36 | DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, fpst) | ||
37 | DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, fpst) | ||
38 | DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, fpst) | ||
39 | diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/target/arm/tcg/vec_internal.h | ||
42 | +++ b/target/arm/tcg/vec_internal.h | ||
43 | @@ -XXX,XX +XXX,XX @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, | ||
44 | */ | ||
45 | bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp); | ||
46 | |||
47 | +/* | ||
48 | + * Negate as for FPCR.AH=1 -- do not negate NaNs. | ||
49 | + */ | ||
50 | +static inline float16 float16_ah_chs(float16 a) | ||
51 | +{ | ||
52 | + return float16_is_any_nan(a) ? a : float16_chs(a); | ||
53 | +} | ||
54 | + | ||
55 | +static inline float32 float32_ah_chs(float32 a) | ||
56 | +{ | ||
57 | + return float32_is_any_nan(a) ? a : float32_chs(a); | ||
58 | +} | ||
59 | + | ||
60 | +static inline float64 float64_ah_chs(float64 a) | ||
61 | +{ | ||
62 | + return float64_is_any_nan(a) ? a : float64_chs(a); | ||
63 | +} | ||
64 | + | ||
65 | static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah) | ||
66 | { | ||
67 | return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a); | ||
68 | diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/target/arm/tcg/helper-a64.c | ||
71 | +++ b/target/arm/tcg/helper-a64.c | ||
72 | @@ -XXX,XX +XXX,XX @@ | ||
73 | #ifdef CONFIG_USER_ONLY | ||
74 | #include "user/page-protection.h" | ||
75 | #endif | ||
76 | +#include "vec_internal.h" | ||
77 | |||
78 | /* C2.4.7 Multiply and divide */ | ||
79 | /* special cases for 0 and LLONG_MIN are mandated by the standard */ | ||
80 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst) | ||
81 | return -float64_lt(b, a, fpst); | ||
82 | } | ||
83 | |||
84 | -/* Reciprocal step and sqrt step. Note that unlike the A32/T32 | ||
85 | +/* | ||
86 | + * Reciprocal step and sqrt step. Note that unlike the A32/T32 | ||
87 | * versions, these do a fully fused multiply-add or | ||
88 | * multiply-add-and-halve. | ||
89 | + * The FPCR.AH == 1 versions need to avoid flipping the sign of NaN. | ||
90 | */ | ||
91 | - | ||
92 | -uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, float_status *fpst) | ||
93 | -{ | ||
94 | - a = float16_squash_input_denormal(a, fpst); | ||
95 | - b = float16_squash_input_denormal(b, fpst); | ||
96 | - | ||
97 | - a = float16_chs(a); | ||
98 | - if ((float16_is_infinity(a) && float16_is_zero(b)) || | ||
99 | - (float16_is_infinity(b) && float16_is_zero(a))) { | ||
100 | - return float16_two; | ||
101 | +#define DO_RECPS(NAME, CTYPE, FLOATTYPE, CHSFN) \ | ||
102 | + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ | ||
103 | + { \ | ||
104 | + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ | ||
105 | + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ | ||
106 | + a = FLOATTYPE ## _ ## CHSFN(a); \ | ||
107 | + if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ | ||
108 | + (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ | ||
109 | + return FLOATTYPE ## _two; \ | ||
110 | + } \ | ||
111 | + return FLOATTYPE ## _muladd(a, b, FLOATTYPE ## _two, 0, fpst); \ | ||
112 | } | ||
113 | - return float16_muladd(a, b, float16_two, 0, fpst); | ||
114 | -} | ||
115 | |||
116 | -float32 HELPER(recpsf_f32)(float32 a, float32 b, float_status *fpst) | ||
117 | -{ | ||
118 | - a = float32_squash_input_denormal(a, fpst); | ||
119 | - b = float32_squash_input_denormal(b, fpst); | ||
120 | +DO_RECPS(recpsf_f16, uint32_t, float16, chs) | ||
121 | +DO_RECPS(recpsf_f32, float32, float32, chs) | ||
122 | +DO_RECPS(recpsf_f64, float64, float64, chs) | ||
123 | +DO_RECPS(recpsf_ah_f16, uint32_t, float16, ah_chs) | ||
124 | +DO_RECPS(recpsf_ah_f32, float32, float32, ah_chs) | ||
125 | +DO_RECPS(recpsf_ah_f64, float64, float64, ah_chs) | ||
126 | |||
127 | - a = float32_chs(a); | ||
128 | - if ((float32_is_infinity(a) && float32_is_zero(b)) || | ||
129 | - (float32_is_infinity(b) && float32_is_zero(a))) { | ||
130 | - return float32_two; | ||
131 | - } | ||
132 | - return float32_muladd(a, b, float32_two, 0, fpst); | ||
133 | -} | ||
134 | +#define DO_RSQRTSF(NAME, CTYPE, FLOATTYPE, CHSFN) \ | ||
135 | + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ | ||
136 | + { \ | ||
137 | + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ | ||
138 | + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ | ||
139 | + a = FLOATTYPE ## _ ## CHSFN(a); \ | ||
140 | + if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ | ||
141 | + (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ | ||
142 | + return FLOATTYPE ## _one_point_five; \ | ||
143 | + } \ | ||
144 | + return FLOATTYPE ## _muladd_scalbn(a, b, FLOATTYPE ## _three, \ | ||
145 | + -1, 0, fpst); \ | ||
146 | + } \ | ||
147 | |||
148 | -float64 HELPER(recpsf_f64)(float64 a, float64 b, float_status *fpst) | ||
149 | -{ | ||
150 | - a = float64_squash_input_denormal(a, fpst); | ||
151 | - b = float64_squash_input_denormal(b, fpst); | ||
152 | - | ||
153 | - a = float64_chs(a); | ||
154 | - if ((float64_is_infinity(a) && float64_is_zero(b)) || | ||
155 | - (float64_is_infinity(b) && float64_is_zero(a))) { | ||
156 | - return float64_two; | ||
157 | - } | ||
158 | - return float64_muladd(a, b, float64_two, 0, fpst); | ||
159 | -} | ||
160 | - | ||
161 | -uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst) | ||
162 | -{ | ||
163 | - a = float16_squash_input_denormal(a, fpst); | ||
164 | - b = float16_squash_input_denormal(b, fpst); | ||
165 | - | ||
166 | - a = float16_chs(a); | ||
167 | - if ((float16_is_infinity(a) && float16_is_zero(b)) || | ||
168 | - (float16_is_infinity(b) && float16_is_zero(a))) { | ||
169 | - return float16_one_point_five; | ||
170 | - } | ||
171 | - return float16_muladd_scalbn(a, b, float16_three, -1, 0, fpst); | ||
172 | -} | ||
173 | - | ||
174 | -float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst) | ||
175 | -{ | ||
176 | - a = float32_squash_input_denormal(a, fpst); | ||
177 | - b = float32_squash_input_denormal(b, fpst); | ||
178 | - | ||
179 | - a = float32_chs(a); | ||
180 | - if ((float32_is_infinity(a) && float32_is_zero(b)) || | ||
181 | - (float32_is_infinity(b) && float32_is_zero(a))) { | ||
182 | - return float32_one_point_five; | ||
183 | - } | ||
184 | - return float32_muladd_scalbn(a, b, float32_three, -1, 0, fpst); | ||
185 | -} | ||
186 | - | ||
187 | -float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst) | ||
188 | -{ | ||
189 | - a = float64_squash_input_denormal(a, fpst); | ||
190 | - b = float64_squash_input_denormal(b, fpst); | ||
191 | - | ||
192 | - a = float64_chs(a); | ||
193 | - if ((float64_is_infinity(a) && float64_is_zero(b)) || | ||
194 | - (float64_is_infinity(b) && float64_is_zero(a))) { | ||
195 | - return float64_one_point_five; | ||
196 | - } | ||
197 | - return float64_muladd_scalbn(a, b, float64_three, -1, 0, fpst); | ||
198 | -} | ||
199 | +DO_RSQRTSF(rsqrtsf_f16, uint32_t, float16, chs) | ||
200 | +DO_RSQRTSF(rsqrtsf_f32, float32, float32, chs) | ||
201 | +DO_RSQRTSF(rsqrtsf_f64, float64, float64, chs) | ||
202 | +DO_RSQRTSF(rsqrtsf_ah_f16, uint32_t, float16, ah_chs) | ||
203 | +DO_RSQRTSF(rsqrtsf_ah_f32, float32, float32, ah_chs) | ||
204 | +DO_RSQRTSF(rsqrtsf_ah_f64, float64, float64, ah_chs) | ||
205 | |||
206 | /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ | ||
207 | uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst) | ||
208 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
209 | index XXXXXXX..XXXXXXX 100644 | ||
210 | --- a/target/arm/tcg/translate-a64.c | ||
211 | +++ b/target/arm/tcg/translate-a64.c | ||
212 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
213 | FPST_A64_F16 : FPST_A64); | ||
214 | } | ||
215 | |||
216 | -static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f, | ||
217 | - int mergereg) | ||
218 | +static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a, | ||
219 | + const FPScalar *fnormal, const FPScalar *fah, | ||
220 | + int mergereg) | ||
221 | { | ||
222 | - return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, | ||
223 | - select_ah_fpst(s, a->esz)); | ||
224 | + return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal, | ||
225 | + mergereg, select_ah_fpst(s, a->esz)); | ||
226 | } | ||
227 | |||
228 | /* Some insns need to call different helpers when FPCR.AH == 1 */ | ||
229 | @@ -XXX,XX +XXX,XX @@ static const FPScalar f_scalar_frecps = { | ||
230 | gen_helper_recpsf_f32, | ||
231 | gen_helper_recpsf_f64, | ||
232 | }; | ||
233 | -TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps, a->rn) | ||
234 | +static const FPScalar f_scalar_ah_frecps = { | ||
235 | + gen_helper_recpsf_ah_f16, | ||
236 | + gen_helper_recpsf_ah_f32, | ||
237 | + gen_helper_recpsf_ah_f64, | ||
238 | +}; | ||
239 | +TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a, | ||
240 | + &f_scalar_frecps, &f_scalar_ah_frecps, a->rn) | ||
241 | |||
242 | static const FPScalar f_scalar_frsqrts = { | ||
243 | gen_helper_rsqrtsf_f16, | ||
244 | gen_helper_rsqrtsf_f32, | ||
245 | gen_helper_rsqrtsf_f64, | ||
246 | }; | ||
247 | -TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts, a->rn) | ||
248 | +static const FPScalar f_scalar_ah_frsqrts = { | ||
249 | + gen_helper_rsqrtsf_ah_f16, | ||
250 | + gen_helper_rsqrtsf_ah_f32, | ||
251 | + gen_helper_rsqrtsf_ah_f64, | ||
252 | +}; | ||
253 | +TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a, | ||
254 | + &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn) | ||
255 | |||
256 | static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, | ||
257 | const FPScalar *f, bool swap) | ||
258 | -- | ||
259 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics | ||
2 | in the vector versions of FRECPS and FRSQRTS, by implementing | ||
3 | new vector wrappers that call the _ah_ scalar helpers. | ||
1 | 4 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/arm/tcg/helper-sve.h | 14 ++++++++++++++ | ||
9 | target/arm/tcg/translate-a64.c | 21 ++++++++++++++++----- | ||
10 | target/arm/tcg/translate-sve.c | 7 ++++++- | ||
11 | target/arm/tcg/vec_helper.c | 8 ++++++++ | ||
12 | 4 files changed, 44 insertions(+), 6 deletions(-) | ||
13 | |||
14 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/arm/tcg/helper-sve.h | ||
17 | +++ b/target/arm/tcg/helper-sve.h | ||
18 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_rsqrts_s, TCG_CALL_NO_RWG, | ||
19 | DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG, | ||
20 | void, ptr, ptr, ptr, fpst, i32) | ||
21 | |||
22 | +DEF_HELPER_FLAGS_5(gvec_ah_recps_h, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, fpst, i32) | ||
24 | +DEF_HELPER_FLAGS_5(gvec_ah_recps_s, TCG_CALL_NO_RWG, | ||
25 | + void, ptr, ptr, ptr, fpst, i32) | ||
26 | +DEF_HELPER_FLAGS_5(gvec_ah_recps_d, TCG_CALL_NO_RWG, | ||
27 | + void, ptr, ptr, ptr, fpst, i32) | ||
28 | + | ||
29 | +DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_h, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, fpst, i32) | ||
31 | +DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_s, TCG_CALL_NO_RWG, | ||
32 | + void, ptr, ptr, ptr, fpst, i32) | ||
33 | +DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_d, TCG_CALL_NO_RWG, | ||
34 | + void, ptr, ptr, ptr, fpst, i32) | ||
35 | + | ||
36 | DEF_HELPER_FLAGS_5(gvec_ah_fmax_h, TCG_CALL_NO_RWG, | ||
37 | void, ptr, ptr, ptr, fpst, i32) | ||
38 | DEF_HELPER_FLAGS_5(gvec_ah_fmax_s, TCG_CALL_NO_RWG, | ||
39 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/target/arm/tcg/translate-a64.c | ||
42 | +++ b/target/arm/tcg/translate-a64.c | ||
43 | @@ -XXX,XX +XXX,XX @@ static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, | ||
44 | return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal); | ||
45 | } | ||
46 | |||
47 | -static bool do_fp3_vector_ah(DisasContext *s, arg_qrrr_e *a, int data, | ||
48 | - gen_helper_gvec_3_ptr * const f[3]) | ||
49 | +static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data, | ||
50 | + gen_helper_gvec_3_ptr * const fnormal[3], | ||
51 | + gen_helper_gvec_3_ptr * const fah[3]) | ||
52 | { | ||
53 | - return do_fp3_vector_with_fpsttype(s, a, data, f, | ||
54 | + return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal, | ||
55 | select_ah_fpst(s, a->esz)); | ||
56 | } | ||
57 | |||
58 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { | ||
59 | gen_helper_gvec_recps_s, | ||
60 | gen_helper_gvec_recps_d, | ||
61 | }; | ||
62 | -TRANS(FRECPS_v, do_fp3_vector_ah, a, 0, f_vector_frecps) | ||
63 | +static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = { | ||
64 | + gen_helper_gvec_ah_recps_h, | ||
65 | + gen_helper_gvec_ah_recps_s, | ||
66 | + gen_helper_gvec_ah_recps_d, | ||
67 | +}; | ||
68 | +TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps) | ||
69 | |||
70 | static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { | ||
71 | gen_helper_gvec_rsqrts_h, | ||
72 | gen_helper_gvec_rsqrts_s, | ||
73 | gen_helper_gvec_rsqrts_d, | ||
74 | }; | ||
75 | -TRANS(FRSQRTS_v, do_fp3_vector_ah, a, 0, f_vector_frsqrts) | ||
76 | +static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = { | ||
77 | + gen_helper_gvec_ah_rsqrts_h, | ||
78 | + gen_helper_gvec_ah_rsqrts_s, | ||
79 | + gen_helper_gvec_ah_rsqrts_d, | ||
80 | +}; | ||
81 | +TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts) | ||
82 | |||
83 | static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { | ||
84 | gen_helper_gvec_faddp_h, | ||
85 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
86 | index XXXXXXX..XXXXXXX 100644 | ||
87 | --- a/target/arm/tcg/translate-sve.c | ||
88 | +++ b/target/arm/tcg/translate-sve.c | ||
89 | @@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) | ||
90 | NULL, gen_helper_gvec_##name##_h, \ | ||
91 | gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ | ||
92 | }; \ | ||
93 | - TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, name##_fns[a->esz], a, 0) | ||
94 | + static gen_helper_gvec_3_ptr * const name##_ah_fns[4] = { \ | ||
95 | + NULL, gen_helper_gvec_ah_##name##_h, \ | ||
96 | + gen_helper_gvec_ah_##name##_s, gen_helper_gvec_ah_##name##_d \ | ||
97 | + }; \ | ||
98 | + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, \ | ||
99 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], a, 0) | ||
100 | |||
101 | DO_FP3(FADD_zzz, fadd) | ||
102 | DO_FP3(FSUB_zzz, fsub) | ||
103 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
104 | index XXXXXXX..XXXXXXX 100644 | ||
105 | --- a/target/arm/tcg/vec_helper.c | ||
106 | +++ b/target/arm/tcg/vec_helper.c | ||
107 | @@ -XXX,XX +XXX,XX @@ DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16) | ||
108 | DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32) | ||
109 | DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64) | ||
110 | |||
111 | +DO_3OP(gvec_ah_recps_h, helper_recpsf_ah_f16, float16) | ||
112 | +DO_3OP(gvec_ah_recps_s, helper_recpsf_ah_f32, float32) | ||
113 | +DO_3OP(gvec_ah_recps_d, helper_recpsf_ah_f64, float64) | ||
114 | + | ||
115 | +DO_3OP(gvec_ah_rsqrts_h, helper_rsqrtsf_ah_f16, float16) | ||
116 | +DO_3OP(gvec_ah_rsqrts_s, helper_rsqrtsf_ah_f32, float32) | ||
117 | +DO_3OP(gvec_ah_rsqrts_d, helper_rsqrtsf_ah_f64, float64) | ||
118 | + | ||
119 | DO_3OP(gvec_ah_fmax_h, helper_vfp_ah_maxh, float16) | ||
120 | DO_3OP(gvec_ah_fmax_s, helper_vfp_ah_maxs, float32) | ||
121 | DO_3OP(gvec_ah_fmax_d, helper_vfp_ah_maxd, float64) | ||
122 | -- | ||
123 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics in FMLS | ||
2 | (indexed). We do this by creating 6 new helpers, which allow us to | ||
3 | do the negation either by XOR (for AH=0) or by muladd flags | ||
4 | (for AH=1). | ||
1 | 5 | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | [PMM: Mostly from RTH's patch; error in index order into fns[][] | ||
8 | fixed] | ||
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | --- | ||
11 | target/arm/helper.h | 14 ++++++++++++++ | ||
12 | target/arm/tcg/translate-a64.c | 17 +++++++++++------ | ||
13 | target/arm/tcg/translate-sve.c | 31 +++++++++++++++++-------------- | ||
14 | target/arm/tcg/vec_helper.c | 24 +++++++++++++++--------- | ||
15 | 4 files changed, 57 insertions(+), 29 deletions(-) | ||
16 | |||
17 | diff --git a/target/arm/helper.h b/target/arm/helper.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/arm/helper.h | ||
20 | +++ b/target/arm/helper.h | ||
21 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG, | ||
22 | DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG, | ||
23 | void, ptr, ptr, ptr, ptr, fpst, i32) | ||
24 | |||
25 | +DEF_HELPER_FLAGS_6(gvec_fmls_idx_h, TCG_CALL_NO_RWG, | ||
26 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
27 | +DEF_HELPER_FLAGS_6(gvec_fmls_idx_s, TCG_CALL_NO_RWG, | ||
28 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
29 | +DEF_HELPER_FLAGS_6(gvec_fmls_idx_d, TCG_CALL_NO_RWG, | ||
30 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
31 | + | ||
32 | +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_h, TCG_CALL_NO_RWG, | ||
33 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
34 | +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_s, TCG_CALL_NO_RWG, | ||
35 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
36 | +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_d, TCG_CALL_NO_RWG, | ||
37 | + void, ptr, ptr, ptr, ptr, fpst, i32) | ||
38 | + | ||
39 | DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG, | ||
40 | void, ptr, ptr, ptr, ptr, i32) | ||
41 | DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG, | ||
42 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/arm/tcg/translate-a64.c | ||
45 | +++ b/target/arm/tcg/translate-a64.c | ||
46 | @@ -XXX,XX +XXX,XX @@ TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) | ||
47 | |||
48 | static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) | ||
49 | { | ||
50 | - static gen_helper_gvec_4_ptr * const fns[3] = { | ||
51 | - gen_helper_gvec_fmla_idx_h, | ||
52 | - gen_helper_gvec_fmla_idx_s, | ||
53 | - gen_helper_gvec_fmla_idx_d, | ||
54 | + static gen_helper_gvec_4_ptr * const fns[3][3] = { | ||
55 | + { gen_helper_gvec_fmla_idx_h, | ||
56 | + gen_helper_gvec_fmla_idx_s, | ||
57 | + gen_helper_gvec_fmla_idx_d }, | ||
58 | + { gen_helper_gvec_fmls_idx_h, | ||
59 | + gen_helper_gvec_fmls_idx_s, | ||
60 | + gen_helper_gvec_fmls_idx_d }, | ||
61 | + { gen_helper_gvec_ah_fmls_idx_h, | ||
62 | + gen_helper_gvec_ah_fmls_idx_s, | ||
63 | + gen_helper_gvec_ah_fmls_idx_d }, | ||
64 | }; | ||
65 | MemOp esz = a->esz; | ||
66 | int check = fp_access_check_vector_hsd(s, a->q, esz); | ||
67 | @@ -XXX,XX +XXX,XX @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) | ||
68 | |||
69 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
70 | esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
71 | - (a->idx << 1) | neg, | ||
72 | - fns[esz - 1]); | ||
73 | + a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]); | ||
74 | return true; | ||
75 | } | ||
76 | |||
77 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
78 | index XXXXXXX..XXXXXXX 100644 | ||
79 | --- a/target/arm/tcg/translate-sve.c | ||
80 | +++ b/target/arm/tcg/translate-sve.c | ||
81 | @@ -XXX,XX +XXX,XX @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) | ||
82 | *** SVE Floating Point Multiply-Add Indexed Group | ||
83 | */ | ||
84 | |||
85 | -static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) | ||
86 | -{ | ||
87 | - static gen_helper_gvec_4_ptr * const fns[4] = { | ||
88 | - NULL, | ||
89 | - gen_helper_gvec_fmla_idx_h, | ||
90 | - gen_helper_gvec_fmla_idx_s, | ||
91 | - gen_helper_gvec_fmla_idx_d, | ||
92 | - }; | ||
93 | - return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, | ||
94 | - (a->index << 1) | sub, | ||
95 | - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); | ||
96 | -} | ||
97 | +static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = { | ||
98 | + NULL, gen_helper_gvec_fmla_idx_h, | ||
99 | + gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d | ||
100 | +}; | ||
101 | +TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, | ||
102 | + fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index, | ||
103 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
104 | |||
105 | -TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) | ||
106 | -TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true) | ||
107 | +static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = { | ||
108 | + { NULL, NULL }, | ||
109 | + { gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h }, | ||
110 | + { gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s }, | ||
111 | + { gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d }, | ||
112 | +}; | ||
113 | +TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz, | ||
114 | + fmls_idx_fns[a->esz][s->fpcr_ah], | ||
115 | + a->rd, a->rn, a->rm, a->ra, a->index, | ||
116 | + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
117 | |||
118 | /* | ||
119 | *** SVE Floating Point Multiply Indexed Group | ||
120 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
121 | index XXXXXXX..XXXXXXX 100644 | ||
122 | --- a/target/arm/tcg/vec_helper.c | ||
123 | +++ b/target/arm/tcg/vec_helper.c | ||
124 | @@ -XXX,XX +XXX,XX @@ DO_FMUL_IDX(gvec_fmls_nf_idx_s, float32_sub, float32_mul, float32, H4) | ||
125 | |||
126 | #undef DO_FMUL_IDX | ||
127 | |||
128 | -#define DO_FMLA_IDX(NAME, TYPE, H) \ | ||
129 | +#define DO_FMLA_IDX(NAME, TYPE, H, NEGX, NEGF) \ | ||
130 | void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \ | ||
131 | float_status *stat, uint32_t desc) \ | ||
132 | { \ | ||
133 | intptr_t i, j, oprsz = simd_oprsz(desc); \ | ||
134 | intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \ | ||
135 | - TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \ | ||
136 | - intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \ | ||
137 | + intptr_t idx = simd_data(desc); \ | ||
138 | TYPE *d = vd, *n = vn, *m = vm, *a = va; \ | ||
139 | - op1_neg <<= (8 * sizeof(TYPE) - 1); \ | ||
140 | for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ | ||
141 | TYPE mm = m[H(i + idx)]; \ | ||
142 | for (j = 0; j < segment; j++) { \ | ||
143 | - d[i + j] = TYPE##_muladd(n[i + j] ^ op1_neg, \ | ||
144 | - mm, a[i + j], 0, stat); \ | ||
145 | + d[i + j] = TYPE##_muladd(n[i + j] ^ NEGX, mm, \ | ||
146 | + a[i + j], NEGF, stat); \ | ||
147 | } \ | ||
148 | } \ | ||
149 | clear_tail(d, oprsz, simd_maxsz(desc)); \ | ||
150 | } | ||
151 | |||
152 | -DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2) | ||
153 | -DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4) | ||
154 | -DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8) | ||
155 | +DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2, 0, 0) | ||
156 | +DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4, 0, 0) | ||
157 | +DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8, 0, 0) | ||
158 | + | ||
159 | +DO_FMLA_IDX(gvec_fmls_idx_h, float16, H2, INT16_MIN, 0) | ||
160 | +DO_FMLA_IDX(gvec_fmls_idx_s, float32, H4, INT32_MIN, 0) | ||
161 | +DO_FMLA_IDX(gvec_fmls_idx_d, float64, H8, INT64_MIN, 0) | ||
162 | + | ||
163 | +DO_FMLA_IDX(gvec_ah_fmls_idx_h, float16, H2, 0, float_muladd_negate_product) | ||
164 | +DO_FMLA_IDX(gvec_ah_fmls_idx_s, float32, H4, 0, float_muladd_negate_product) | ||
165 | +DO_FMLA_IDX(gvec_ah_fmls_idx_d, float64, H8, 0, float_muladd_negate_product) | ||
166 | |||
167 | #undef DO_FMLA_IDX | ||
168 | |||
169 | -- | ||
170 | 2.34.1 | diff view generated by jsdifflib |
1 | Implement the ITS MOVALL command, which takes all the pending | 1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics |
---|---|---|---|
2 | interrupts on a source redistributor and makes the not-pending on | 2 | in FMLS (vector), by implementing a new set of helpers for |
3 | that source redistributor and pending on a destination redistributor. | 3 | the AH=1 case. |
4 | 4 | ||
5 | This is a GICv3 ITS command which we forgot to implement. (It is | 5 | The float_muladd_negate_product flag produces the same result |
6 | not used by Linux guests.) | 6 | as negating either of the multiplication operands, assuming |
7 | neither of the operands are NaNs. But since FEAT_AFP does not | ||
8 | negate NaNs, this behaviour is exactly what we need. | ||
7 | 9 | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
10 | Message-id: 20220122182444.724087-14-peter.maydell@linaro.org | ||
11 | --- | 12 | --- |
12 | hw/intc/gicv3_internal.h | 16 +++++++++++ | 13 | target/arm/helper.h | 4 ++++ |
13 | hw/intc/arm_gicv3_its.c | 55 ++++++++++++++++++++++++++++++++++++++ | 14 | target/arm/tcg/translate-a64.c | 7 ++++++- |
14 | hw/intc/arm_gicv3_redist.c | 54 +++++++++++++++++++++++++++++++++++++ | 15 | target/arm/tcg/vec_helper.c | 22 ++++++++++++++++++++++ |
15 | 3 files changed, 125 insertions(+) | 16 | 3 files changed, 32 insertions(+), 1 deletion(-) |
16 | 17 | ||
17 | diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h | 18 | diff --git a/target/arm/helper.h b/target/arm/helper.h |
18 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/hw/intc/gicv3_internal.h | 20 | --- a/target/arm/helper.h |
20 | +++ b/hw/intc/gicv3_internal.h | 21 | +++ b/target/arm/helper.h |
21 | @@ -XXX,XX +XXX,XX @@ FIELD(GITS_TYPER, CIL, 36, 1) | 22 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) |
22 | #define GITS_CMD_MAPI 0x0B | 23 | DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) |
23 | #define GITS_CMD_INV 0x0C | 24 | DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) |
24 | #define GITS_CMD_INVALL 0x0D | 25 | |
25 | +#define GITS_CMD_MOVALL 0x0E | 26 | +DEF_HELPER_FLAGS_5(gvec_ah_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) |
26 | #define GITS_CMD_DISCARD 0x0F | 27 | +DEF_HELPER_FLAGS_5(gvec_ah_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) |
27 | 28 | +DEF_HELPER_FLAGS_5(gvec_ah_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) | |
28 | /* MAPC command fields */ | ||
29 | @@ -XXX,XX +XXX,XX @@ FIELD(MAPC, RDBASE, 16, 32) | ||
30 | #define L2_TABLE_VALID_MASK CMD_FIELD_VALID_MASK | ||
31 | #define TABLE_ENTRY_VALID_MASK (1ULL << 0) | ||
32 | |||
33 | +/* MOVALL command fields */ | ||
34 | +FIELD(MOVALL_2, RDBASE1, 16, 36) | ||
35 | +FIELD(MOVALL_3, RDBASE2, 16, 36) | ||
36 | + | 29 | + |
37 | /* | 30 | DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG, |
38 | * 12 bytes Interrupt translation Table Entry size | 31 | void, ptr, ptr, ptr, fpst, i32) |
39 | * as per Table 5.3 in GICv3 spec | 32 | DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG, |
40 | @@ -XXX,XX +XXX,XX @@ void gicv3_redist_update_lpi(GICv3CPUState *cs); | 33 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
41 | * an incoming migration has loaded new state. | ||
42 | */ | ||
43 | void gicv3_redist_update_lpi_only(GICv3CPUState *cs); | ||
44 | +/** | ||
45 | + * gicv3_redist_movall_lpis: | ||
46 | + * @src: source redistributor | ||
47 | + * @dest: destination redistributor | ||
48 | + * | ||
49 | + * Scan the LPI pending table for @src, and for each pending LPI there | ||
50 | + * mark it as not-pending for @src and pending for @dest, as required | ||
51 | + * by the ITS MOVALL command. | ||
52 | + */ | ||
53 | +void gicv3_redist_movall_lpis(GICv3CPUState *src, GICv3CPUState *dest); | ||
54 | + | ||
55 | void gicv3_redist_send_sgi(GICv3CPUState *cs, int grp, int irq, bool ns); | ||
56 | void gicv3_init_cpuif(GICv3State *s); | ||
57 | |||
58 | diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c | ||
59 | index XXXXXXX..XXXXXXX 100644 | 34 | index XXXXXXX..XXXXXXX 100644 |
60 | --- a/hw/intc/arm_gicv3_its.c | 35 | --- a/target/arm/tcg/translate-a64.c |
61 | +++ b/hw/intc/arm_gicv3_its.c | 36 | +++ b/target/arm/tcg/translate-a64.c |
62 | @@ -XXX,XX +XXX,XX @@ static ItsCmdResult process_mapd(GICv3ITSState *s, uint64_t value, | 37 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { |
63 | return update_dte(s, devid, valid, size, itt_addr) ? CMD_CONTINUE : CMD_STALL; | 38 | gen_helper_gvec_vfms_s, |
39 | gen_helper_gvec_vfms_d, | ||
40 | }; | ||
41 | -TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls) | ||
42 | +static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = { | ||
43 | + gen_helper_gvec_ah_vfms_h, | ||
44 | + gen_helper_gvec_ah_vfms_s, | ||
45 | + gen_helper_gvec_ah_vfms_d, | ||
46 | +}; | ||
47 | +TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah) | ||
48 | |||
49 | static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { | ||
50 | gen_helper_gvec_fceq_h, | ||
51 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/target/arm/tcg/vec_helper.c | ||
54 | +++ b/target/arm/tcg/vec_helper.c | ||
55 | @@ -XXX,XX +XXX,XX @@ static float64 float64_mulsub_f(float64 dest, float64 op1, float64 op2, | ||
56 | return float64_muladd(float64_chs(op1), op2, dest, 0, stat); | ||
64 | } | 57 | } |
65 | 58 | ||
66 | +static ItsCmdResult process_movall(GICv3ITSState *s, uint64_t value, | 59 | +static float16 float16_ah_mulsub_f(float16 dest, float16 op1, float16 op2, |
67 | + uint32_t offset) | 60 | + float_status *stat) |
68 | +{ | 61 | +{ |
69 | + AddressSpace *as = &s->gicv3->dma_as; | 62 | + return float16_muladd(op1, op2, dest, float_muladd_negate_product, stat); |
70 | + MemTxResult res = MEMTX_OK; | ||
71 | + uint64_t rd1, rd2; | ||
72 | + | ||
73 | + /* No fields in dwords 0 or 1 */ | ||
74 | + offset += NUM_BYTES_IN_DW; | ||
75 | + offset += NUM_BYTES_IN_DW; | ||
76 | + value = address_space_ldq_le(as, s->cq.base_addr + offset, | ||
77 | + MEMTXATTRS_UNSPECIFIED, &res); | ||
78 | + if (res != MEMTX_OK) { | ||
79 | + return CMD_STALL; | ||
80 | + } | ||
81 | + | ||
82 | + rd1 = FIELD_EX64(value, MOVALL_2, RDBASE1); | ||
83 | + if (rd1 >= s->gicv3->num_cpu) { | ||
84 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
85 | + "%s: RDBASE1 %" PRId64 | ||
86 | + " out of range (must be less than %d)\n", | ||
87 | + __func__, rd1, s->gicv3->num_cpu); | ||
88 | + return CMD_CONTINUE; | ||
89 | + } | ||
90 | + | ||
91 | + offset += NUM_BYTES_IN_DW; | ||
92 | + value = address_space_ldq_le(as, s->cq.base_addr + offset, | ||
93 | + MEMTXATTRS_UNSPECIFIED, &res); | ||
94 | + if (res != MEMTX_OK) { | ||
95 | + return CMD_STALL; | ||
96 | + } | ||
97 | + | ||
98 | + rd2 = FIELD_EX64(value, MOVALL_3, RDBASE2); | ||
99 | + if (rd2 >= s->gicv3->num_cpu) { | ||
100 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
101 | + "%s: RDBASE2 %" PRId64 | ||
102 | + " out of range (must be less than %d)\n", | ||
103 | + __func__, rd2, s->gicv3->num_cpu); | ||
104 | + return CMD_CONTINUE; | ||
105 | + } | ||
106 | + | ||
107 | + if (rd1 == rd2) { | ||
108 | + /* Move to same target must succeed as a no-op */ | ||
109 | + return CMD_CONTINUE; | ||
110 | + } | ||
111 | + | ||
112 | + /* Move all pending LPIs from redistributor 1 to redistributor 2 */ | ||
113 | + gicv3_redist_movall_lpis(&s->gicv3->cpu[rd1], &s->gicv3->cpu[rd2]); | ||
114 | + | ||
115 | + return CMD_CONTINUE; | ||
116 | +} | 63 | +} |
117 | + | 64 | + |
118 | /* | 65 | +static float32 float32_ah_mulsub_f(float32 dest, float32 op1, float32 op2, |
119 | * Current implementation blocks until all | 66 | + float_status *stat) |
120 | * commands are processed | ||
121 | @@ -XXX,XX +XXX,XX @@ static void process_cmdq(GICv3ITSState *s) | ||
122 | gicv3_redist_update_lpi(&s->gicv3->cpu[i]); | ||
123 | } | ||
124 | break; | ||
125 | + case GITS_CMD_MOVALL: | ||
126 | + result = process_movall(s, data, cq_offset); | ||
127 | + break; | ||
128 | default: | ||
129 | break; | ||
130 | } | ||
131 | diff --git a/hw/intc/arm_gicv3_redist.c b/hw/intc/arm_gicv3_redist.c | ||
132 | index XXXXXXX..XXXXXXX 100644 | ||
133 | --- a/hw/intc/arm_gicv3_redist.c | ||
134 | +++ b/hw/intc/arm_gicv3_redist.c | ||
135 | @@ -XXX,XX +XXX,XX @@ void gicv3_redist_process_lpi(GICv3CPUState *cs, int irq, int level) | ||
136 | gicv3_redist_lpi_pending(cs, irq, level); | ||
137 | } | ||
138 | |||
139 | +void gicv3_redist_movall_lpis(GICv3CPUState *src, GICv3CPUState *dest) | ||
140 | +{ | 67 | +{ |
141 | + /* | 68 | + return float32_muladd(op1, op2, dest, float_muladd_negate_product, stat); |
142 | + * We must move all pending LPIs from the source redistributor | ||
143 | + * to the destination. That is, for every pending LPI X on | ||
144 | + * src, we must set it not-pending on src and pending on dest. | ||
145 | + * LPIs that are already pending on dest are not cleared. | ||
146 | + * | ||
147 | + * If LPIs are disabled on dest this is CONSTRAINED UNPREDICTABLE: | ||
148 | + * we choose to NOP. If LPIs are disabled on source there's nothing | ||
149 | + * to be transferred anyway. | ||
150 | + */ | ||
151 | + AddressSpace *as = &src->gic->dma_as; | ||
152 | + uint64_t idbits; | ||
153 | + uint32_t pendt_size; | ||
154 | + uint64_t src_baddr, dest_baddr; | ||
155 | + int i; | ||
156 | + | ||
157 | + if (!(src->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) || | ||
158 | + !(dest->gicr_ctlr & GICR_CTLR_ENABLE_LPIS)) { | ||
159 | + return; | ||
160 | + } | ||
161 | + | ||
162 | + idbits = MIN(FIELD_EX64(src->gicr_propbaser, GICR_PROPBASER, IDBITS), | ||
163 | + GICD_TYPER_IDBITS); | ||
164 | + idbits = MIN(FIELD_EX64(dest->gicr_propbaser, GICR_PROPBASER, IDBITS), | ||
165 | + idbits); | ||
166 | + | ||
167 | + pendt_size = 1ULL << (idbits + 1); | ||
168 | + src_baddr = src->gicr_pendbaser & R_GICR_PENDBASER_PHYADDR_MASK; | ||
169 | + dest_baddr = dest->gicr_pendbaser & R_GICR_PENDBASER_PHYADDR_MASK; | ||
170 | + | ||
171 | + for (i = GICV3_LPI_INTID_START / 8; i < pendt_size / 8; i++) { | ||
172 | + uint8_t src_pend, dest_pend; | ||
173 | + | ||
174 | + address_space_read(as, src_baddr + i, MEMTXATTRS_UNSPECIFIED, | ||
175 | + &src_pend, sizeof(src_pend)); | ||
176 | + if (!src_pend) { | ||
177 | + continue; | ||
178 | + } | ||
179 | + address_space_read(as, dest_baddr + i, MEMTXATTRS_UNSPECIFIED, | ||
180 | + &dest_pend, sizeof(dest_pend)); | ||
181 | + dest_pend |= src_pend; | ||
182 | + src_pend = 0; | ||
183 | + address_space_write(as, src_baddr + i, MEMTXATTRS_UNSPECIFIED, | ||
184 | + &src_pend, sizeof(src_pend)); | ||
185 | + address_space_write(as, dest_baddr + i, MEMTXATTRS_UNSPECIFIED, | ||
186 | + &dest_pend, sizeof(dest_pend)); | ||
187 | + } | ||
188 | + | ||
189 | + gicv3_redist_update_lpi(src); | ||
190 | + gicv3_redist_update_lpi(dest); | ||
191 | +} | 69 | +} |
192 | + | 70 | + |
193 | void gicv3_redist_set_irq(GICv3CPUState *cs, int irq, int level) | 71 | +static float64 float64_ah_mulsub_f(float64 dest, float64 op1, float64 op2, |
194 | { | 72 | + float_status *stat) |
195 | /* Update redistributor state for a change in an external PPI input line */ | 73 | +{ |
74 | + return float64_muladd(op1, op2, dest, float_muladd_negate_product, stat); | ||
75 | +} | ||
76 | + | ||
77 | #define DO_MULADD(NAME, FUNC, TYPE) \ | ||
78 | void HELPER(NAME)(void *vd, void *vn, void *vm, \ | ||
79 | float_status *stat, uint32_t desc) \ | ||
80 | @@ -XXX,XX +XXX,XX @@ DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16) | ||
81 | DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32) | ||
82 | DO_MULADD(gvec_vfms_d, float64_mulsub_f, float64) | ||
83 | |||
84 | +DO_MULADD(gvec_ah_vfms_h, float16_ah_mulsub_f, float16) | ||
85 | +DO_MULADD(gvec_ah_vfms_s, float32_ah_mulsub_f, float32) | ||
86 | +DO_MULADD(gvec_ah_vfms_d, float64_ah_mulsub_f, float64) | ||
87 | + | ||
88 | /* For the indexed ops, SVE applies the index per 128-bit vector segment. | ||
89 | * For AdvSIMD, there is of course only one such vector segment. | ||
90 | */ | ||
196 | -- | 91 | -- |
197 | 2.25.1 | 92 | 2.34.1 |
198 | |||
199 | diff view generated by jsdifflib |
1 | The ITS has a bank of 8 GITS_BASER<n> registers, which allow the | 1 | Handle the FPCR.AH "don't negate the sign of a NaN" semantics fro the |
---|---|---|---|
2 | guest to specify the base address of various data tables. Each | 2 | SVE FMLS (vector) insns, by providing new helpers for the AH=1 case |
3 | register has a read-only type field indicating which table it is for | 3 | which end up passing fpcr_ah = true to the do_fmla_zpzzz_* functions |
4 | and a read-write field where the guest can write in the base address | 4 | that do the work. |
5 | (among other things). We currently allow the guest to write the | ||
6 | writeable fields for all eight registers, even if the type field is 0 | ||
7 | indicating "Unimplemented". This means the guest can provoke QEMU | ||
8 | into asserting by writing an address into one of these unimplemented | ||
9 | base registers, which bypasses the "if (!value) continue" check in | ||
10 | extract_table_params() and lets us hit the assertion that the type | ||
11 | field is one of the permitted table types. | ||
12 | 5 | ||
13 | Prevent the assertion by not allowing the guest to write to the | 6 | The float*_muladd functions have a flags argument that can |
14 | unimplemented base registers. This means their value will remain 0 | 7 | perform optional negation of various operand. We don't use |
15 | and extract_table_params() will ignore them. | 8 | that for "normal" arm fmla, because the muladd flags are not |
9 | applied when an input is a NaN. But since FEAT_AFP does not | ||
10 | negate NaNs, this behaviour is exactly what we need. | ||
11 | |||
12 | The non-AH helpers pass in a zero flags argument and control the | ||
13 | negation via the neg1 and neg3 arguments; the AH helpers always pass | ||
14 | in neg1 and neg3 as zero and control the negation via the flags | ||
15 | argument. This allows us to avoid conditional branches within the | ||
16 | inner loop. | ||
16 | 17 | ||
17 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 18 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
18 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 19 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
19 | Message-id: 20220122182444.724087-12-peter.maydell@linaro.org | ||
20 | --- | 20 | --- |
21 | hw/intc/arm_gicv3_its.c | 8 ++++++++ | 21 | target/arm/tcg/helper-sve.h | 21 ++++++++ |
22 | 1 file changed, 8 insertions(+) | 22 | target/arm/tcg/sve_helper.c | 99 +++++++++++++++++++++++++++------- |
23 | target/arm/tcg/translate-sve.c | 18 ++++--- | ||
24 | 3 files changed, 114 insertions(+), 24 deletions(-) | ||
23 | 25 | ||
24 | diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c | 26 | diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h |
25 | index XXXXXXX..XXXXXXX 100644 | 27 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/hw/intc/arm_gicv3_its.c | 28 | --- a/target/arm/tcg/helper-sve.h |
27 | +++ b/hw/intc/arm_gicv3_its.c | 29 | +++ b/target/arm/tcg/helper-sve.h |
28 | @@ -XXX,XX +XXX,XX @@ static bool its_writel(GICv3ITSState *s, hwaddr offset, | 30 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, |
29 | if (!(s->ctlr & R_GITS_CTLR_ENABLED_MASK)) { | 31 | DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, |
30 | index = (offset - GITS_BASER) / 8; | 32 | void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
31 | 33 | ||
32 | + if (s->baser[index] == 0) { | 34 | +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG, |
33 | + /* Unimplemented GITS_BASERn: RAZ/WI */ | 35 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
34 | + break; | 36 | +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG, |
35 | + } | 37 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
36 | if (offset & 7) { | 38 | +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG, |
37 | value <<= 32; | 39 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
38 | value &= ~GITS_BASER_RO_MASK; | 40 | + |
39 | @@ -XXX,XX +XXX,XX @@ static bool its_writell(GICv3ITSState *s, hwaddr offset, | 41 | +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG, |
40 | */ | 42 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
41 | if (!(s->ctlr & R_GITS_CTLR_ENABLED_MASK)) { | 43 | +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG, |
42 | index = (offset - GITS_BASER) / 8; | 44 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
43 | + if (s->baser[index] == 0) { | 45 | +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG, |
44 | + /* Unimplemented GITS_BASERn: RAZ/WI */ | 46 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
45 | + break; | 47 | + |
46 | + } | 48 | +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG, |
47 | s->baser[index] &= GITS_BASER_RO_MASK; | 49 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
48 | s->baser[index] |= (value & ~GITS_BASER_RO_MASK); | 50 | +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG, |
49 | } | 51 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) |
52 | +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_d, TCG_CALL_NO_RWG, | ||
53 | + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
54 | + | ||
55 | DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG, | ||
56 | void, ptr, ptr, ptr, ptr, ptr, fpst, i32) | ||
57 | DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG, | ||
58 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
59 | index XXXXXXX..XXXXXXX 100644 | ||
60 | --- a/target/arm/tcg/sve_helper.c | ||
61 | +++ b/target/arm/tcg/sve_helper.c | ||
62 | @@ -XXX,XX +XXX,XX @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int) | ||
63 | |||
64 | static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, | ||
65 | float_status *status, uint32_t desc, | ||
66 | - uint16_t neg1, uint16_t neg3) | ||
67 | + uint16_t neg1, uint16_t neg3, int flags) | ||
68 | { | ||
69 | intptr_t i = simd_oprsz(desc); | ||
70 | uint64_t *g = vg; | ||
71 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, | ||
72 | e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1; | ||
73 | e2 = *(uint16_t *)(vm + H1_2(i)); | ||
74 | e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3; | ||
75 | - r = float16_muladd(e1, e2, e3, 0, status); | ||
76 | + r = float16_muladd(e1, e2, e3, flags, status); | ||
77 | *(uint16_t *)(vd + H1_2(i)) = r; | ||
78 | } | ||
79 | } while (i & 63); | ||
80 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, | ||
81 | void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
82 | void *vg, float_status *status, uint32_t desc) | ||
83 | { | ||
84 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0); | ||
85 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, 0); | ||
86 | } | ||
87 | |||
88 | void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
89 | void *vg, float_status *status, uint32_t desc) | ||
90 | { | ||
91 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0); | ||
92 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0); | ||
93 | } | ||
94 | |||
95 | void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
96 | void *vg, float_status *status, uint32_t desc) | ||
97 | { | ||
98 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000); | ||
99 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0); | ||
100 | } | ||
101 | |||
102 | void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
103 | void *vg, float_status *status, uint32_t desc) | ||
104 | { | ||
105 | - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000); | ||
106 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0); | ||
107 | +} | ||
108 | + | ||
109 | +void HELPER(sve_ah_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
110 | + void *vg, float_status *status, uint32_t desc) | ||
111 | +{ | ||
112 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
113 | + float_muladd_negate_product); | ||
114 | +} | ||
115 | + | ||
116 | +void HELPER(sve_ah_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
117 | + void *vg, float_status *status, uint32_t desc) | ||
118 | +{ | ||
119 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
120 | + float_muladd_negate_product | float_muladd_negate_c); | ||
121 | +} | ||
122 | + | ||
123 | +void HELPER(sve_ah_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
124 | + void *vg, float_status *status, uint32_t desc) | ||
125 | +{ | ||
126 | + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
127 | + float_muladd_negate_c); | ||
128 | } | ||
129 | |||
130 | static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, | ||
131 | float_status *status, uint32_t desc, | ||
132 | - uint32_t neg1, uint32_t neg3) | ||
133 | + uint32_t neg1, uint32_t neg3, int flags) | ||
134 | { | ||
135 | intptr_t i = simd_oprsz(desc); | ||
136 | uint64_t *g = vg; | ||
137 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, | ||
138 | e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1; | ||
139 | e2 = *(uint32_t *)(vm + H1_4(i)); | ||
140 | e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3; | ||
141 | - r = float32_muladd(e1, e2, e3, 0, status); | ||
142 | + r = float32_muladd(e1, e2, e3, flags, status); | ||
143 | *(uint32_t *)(vd + H1_4(i)) = r; | ||
144 | } | ||
145 | } while (i & 63); | ||
146 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, | ||
147 | void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
148 | void *vg, float_status *status, uint32_t desc) | ||
149 | { | ||
150 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0); | ||
151 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, 0); | ||
152 | } | ||
153 | |||
154 | void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
155 | void *vg, float_status *status, uint32_t desc) | ||
156 | { | ||
157 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0); | ||
158 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, 0); | ||
159 | } | ||
160 | |||
161 | void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
162 | void *vg, float_status *status, uint32_t desc) | ||
163 | { | ||
164 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000); | ||
165 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, 0); | ||
166 | } | ||
167 | |||
168 | void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
169 | void *vg, float_status *status, uint32_t desc) | ||
170 | { | ||
171 | - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000); | ||
172 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, 0); | ||
173 | +} | ||
174 | + | ||
175 | +void HELPER(sve_ah_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
176 | + void *vg, float_status *status, uint32_t desc) | ||
177 | +{ | ||
178 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
179 | + float_muladd_negate_product); | ||
180 | +} | ||
181 | + | ||
182 | +void HELPER(sve_ah_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
183 | + void *vg, float_status *status, uint32_t desc) | ||
184 | +{ | ||
185 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
186 | + float_muladd_negate_product | float_muladd_negate_c); | ||
187 | +} | ||
188 | + | ||
189 | +void HELPER(sve_ah_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
190 | + void *vg, float_status *status, uint32_t desc) | ||
191 | +{ | ||
192 | + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
193 | + float_muladd_negate_c); | ||
194 | } | ||
195 | |||
196 | static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, | ||
197 | float_status *status, uint32_t desc, | ||
198 | - uint64_t neg1, uint64_t neg3) | ||
199 | + uint64_t neg1, uint64_t neg3, int flags) | ||
200 | { | ||
201 | intptr_t i = simd_oprsz(desc); | ||
202 | uint64_t *g = vg; | ||
203 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, | ||
204 | e1 = *(uint64_t *)(vn + i) ^ neg1; | ||
205 | e2 = *(uint64_t *)(vm + i); | ||
206 | e3 = *(uint64_t *)(va + i) ^ neg3; | ||
207 | - r = float64_muladd(e1, e2, e3, 0, status); | ||
208 | + r = float64_muladd(e1, e2, e3, flags, status); | ||
209 | *(uint64_t *)(vd + i) = r; | ||
210 | } | ||
211 | } while (i & 63); | ||
212 | @@ -XXX,XX +XXX,XX @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, | ||
213 | void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
214 | void *vg, float_status *status, uint32_t desc) | ||
215 | { | ||
216 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0); | ||
217 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, 0); | ||
218 | } | ||
219 | |||
220 | void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
221 | void *vg, float_status *status, uint32_t desc) | ||
222 | { | ||
223 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0); | ||
224 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, 0); | ||
225 | } | ||
226 | |||
227 | void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
228 | void *vg, float_status *status, uint32_t desc) | ||
229 | { | ||
230 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN); | ||
231 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, 0); | ||
232 | } | ||
233 | |||
234 | void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
235 | void *vg, float_status *status, uint32_t desc) | ||
236 | { | ||
237 | - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN); | ||
238 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, 0); | ||
239 | +} | ||
240 | + | ||
241 | +void HELPER(sve_ah_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
242 | + void *vg, float_status *status, uint32_t desc) | ||
243 | +{ | ||
244 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
245 | + float_muladd_negate_product); | ||
246 | +} | ||
247 | + | ||
248 | +void HELPER(sve_ah_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
249 | + void *vg, float_status *status, uint32_t desc) | ||
250 | +{ | ||
251 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
252 | + float_muladd_negate_product | float_muladd_negate_c); | ||
253 | +} | ||
254 | + | ||
255 | +void HELPER(sve_ah_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
256 | + void *vg, float_status *status, uint32_t desc) | ||
257 | +{ | ||
258 | + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, | ||
259 | + float_muladd_negate_c); | ||
260 | } | ||
261 | |||
262 | /* Two operand floating-point comparison controlled by a predicate. | ||
263 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
264 | index XXXXXXX..XXXXXXX 100644 | ||
265 | --- a/target/arm/tcg/translate-sve.c | ||
266 | +++ b/target/arm/tcg/translate-sve.c | ||
267 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], | ||
268 | a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), | ||
269 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
270 | |||
271 | -#define DO_FMLA(NAME, name) \ | ||
272 | +#define DO_FMLA(NAME, name, ah_name) \ | ||
273 | static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ | ||
274 | NULL, gen_helper_sve_##name##_h, \ | ||
275 | gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ | ||
276 | }; \ | ||
277 | - TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \ | ||
278 | + static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \ | ||
279 | + NULL, gen_helper_sve_##ah_name##_h, \ | ||
280 | + gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \ | ||
281 | + }; \ | ||
282 | + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \ | ||
283 | + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \ | ||
284 | a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ | ||
285 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
286 | |||
287 | -DO_FMLA(FMLA_zpzzz, fmla_zpzzz) | ||
288 | -DO_FMLA(FMLS_zpzzz, fmls_zpzzz) | ||
289 | -DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz) | ||
290 | -DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) | ||
291 | +/* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */ | ||
292 | +DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz) | ||
293 | +DO_FMLA(FMLS_zpzzz, fmls_zpzzz, ah_fmls_zpzzz) | ||
294 | +DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz, ah_fnmla_zpzzz) | ||
295 | +DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz, ah_fnmls_zpzzz) | ||
296 | |||
297 | #undef DO_FMLA | ||
298 | |||
50 | -- | 299 | -- |
51 | 2.25.1 | 300 | 2.34.1 |
52 | |||
53 | diff view generated by jsdifflib |
1 | Currently when we fill in a TableDesc based on the value the guest | 1 | The negation step in the SVE FTSSEL insn mustn't negate a NaN when |
---|---|---|---|
2 | has written to the GITS_BASER<n> register, we calculate both: | 2 | FPCR.AH is set. Pass FPCR.AH to the helper via the SIMD data field |
3 | * num_entries : the number of entries in the table, constrained | 3 | and use that to determine whether to do the negation. |
4 | by the amount of memory the guest has given it | ||
5 | * num_ids : the number of IDs we support for this table, | ||
6 | constrained by the implementation choices and the architecture | ||
7 | (eg DeviceIDs are 16 bits, so num_ids is 1 << 16) | ||
8 | |||
9 | When validating ITS commands, however, we check only num_ids, | ||
10 | thus allowing a broken guest to specify table entries that | ||
11 | index off the end of it. This will only corrupt guest memory, | ||
12 | but the ITS is supposed to reject such commands as invalid. | ||
13 | |||
14 | Instead of calculating both num_entries and num_ids, set | ||
15 | num_entries to the minimum of the two limits, and check that. | ||
16 | 4 | ||
17 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
18 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
19 | Message-id: 20220122182444.724087-13-peter.maydell@linaro.org | ||
20 | --- | 7 | --- |
21 | include/hw/intc/arm_gicv3_its_common.h | 1 - | 8 | target/arm/tcg/sve_helper.c | 18 +++++++++++++++--- |
22 | hw/intc/arm_gicv3_its.c | 18 +++++++++--------- | 9 | target/arm/tcg/translate-sve.c | 4 ++-- |
23 | 2 files changed, 9 insertions(+), 10 deletions(-) | 10 | 2 files changed, 17 insertions(+), 5 deletions(-) |
24 | 11 | ||
25 | diff --git a/include/hw/intc/arm_gicv3_its_common.h b/include/hw/intc/arm_gicv3_its_common.h | 12 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c |
26 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
27 | --- a/include/hw/intc/arm_gicv3_its_common.h | 14 | --- a/target/arm/tcg/sve_helper.c |
28 | +++ b/include/hw/intc/arm_gicv3_its_common.h | 15 | +++ b/target/arm/tcg/sve_helper.c |
29 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 16 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fexpa_d)(void *vd, void *vn, uint32_t desc) |
30 | uint16_t entry_sz; | 17 | void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc) |
31 | uint32_t page_sz; | 18 | { |
32 | uint32_t num_entries; | 19 | intptr_t i, opr_sz = simd_oprsz(desc) / 2; |
33 | - uint32_t num_ids; | 20 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1); |
34 | uint64_t base_addr; | 21 | uint16_t *d = vd, *n = vn, *m = vm; |
35 | } TableDesc; | 22 | for (i = 0; i < opr_sz; i += 1) { |
36 | 23 | uint16_t nn = n[i]; | |
37 | diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c | 24 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc) |
38 | index XXXXXXX..XXXXXXX 100644 | 25 | if (mm & 1) { |
39 | --- a/hw/intc/arm_gicv3_its.c | 26 | nn = float16_one; |
40 | +++ b/hw/intc/arm_gicv3_its.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static ItsCmdResult process_its_cmd(GICv3ITSState *s, uint64_t value, | ||
42 | |||
43 | eventid = (value & EVENTID_MASK); | ||
44 | |||
45 | - if (devid >= s->dt.num_ids) { | ||
46 | + if (devid >= s->dt.num_entries) { | ||
47 | qemu_log_mask(LOG_GUEST_ERROR, | ||
48 | "%s: invalid command attributes: devid %d>=%d", | ||
49 | - __func__, devid, s->dt.num_ids); | ||
50 | + __func__, devid, s->dt.num_entries); | ||
51 | return CMD_CONTINUE; | ||
52 | } | ||
53 | |||
54 | @@ -XXX,XX +XXX,XX @@ static ItsCmdResult process_its_cmd(GICv3ITSState *s, uint64_t value, | ||
55 | return CMD_CONTINUE; | ||
56 | } | ||
57 | |||
58 | - if (icid >= s->ct.num_ids) { | ||
59 | + if (icid >= s->ct.num_entries) { | ||
60 | qemu_log_mask(LOG_GUEST_ERROR, | ||
61 | "%s: invalid ICID 0x%x in ITE (table corrupted?)\n", | ||
62 | __func__, icid); | ||
63 | @@ -XXX,XX +XXX,XX @@ static ItsCmdResult process_mapti(GICv3ITSState *s, uint64_t value, | ||
64 | |||
65 | icid = value & ICID_MASK; | ||
66 | |||
67 | - if (devid >= s->dt.num_ids) { | ||
68 | + if (devid >= s->dt.num_entries) { | ||
69 | qemu_log_mask(LOG_GUEST_ERROR, | ||
70 | "%s: invalid command attributes: devid %d>=%d", | ||
71 | - __func__, devid, s->dt.num_ids); | ||
72 | + __func__, devid, s->dt.num_entries); | ||
73 | return CMD_CONTINUE; | ||
74 | } | ||
75 | |||
76 | @@ -XXX,XX +XXX,XX @@ static ItsCmdResult process_mapti(GICv3ITSState *s, uint64_t value, | ||
77 | num_eventids = 1ULL << (FIELD_EX64(dte, DTE, SIZE) + 1); | ||
78 | num_intids = 1ULL << (GICD_TYPER_IDBITS + 1); | ||
79 | |||
80 | - if ((icid >= s->ct.num_ids) | ||
81 | + if ((icid >= s->ct.num_entries) | ||
82 | || !dte_valid || (eventid >= num_eventids) || | ||
83 | (((pIntid < GICV3_LPI_INTID_START) || (pIntid >= num_intids)) && | ||
84 | (pIntid != INTID_SPURIOUS))) { | ||
85 | @@ -XXX,XX +XXX,XX @@ static ItsCmdResult process_mapc(GICv3ITSState *s, uint32_t offset) | ||
86 | |||
87 | valid = (value & CMD_FIELD_VALID_MASK); | ||
88 | |||
89 | - if ((icid >= s->ct.num_ids) || (rdbase >= s->gicv3->num_cpu)) { | ||
90 | + if ((icid >= s->ct.num_entries) || (rdbase >= s->gicv3->num_cpu)) { | ||
91 | qemu_log_mask(LOG_GUEST_ERROR, | ||
92 | "ITS MAPC: invalid collection table attributes " | ||
93 | "icid %d rdbase %" PRIu64 "\n", icid, rdbase); | ||
94 | @@ -XXX,XX +XXX,XX @@ static ItsCmdResult process_mapd(GICv3ITSState *s, uint64_t value, | ||
95 | |||
96 | valid = (value & CMD_FIELD_VALID_MASK); | ||
97 | |||
98 | - if ((devid >= s->dt.num_ids) || | ||
99 | + if ((devid >= s->dt.num_entries) || | ||
100 | (size > FIELD_EX64(s->typer, GITS_TYPER, IDBITS))) { | ||
101 | qemu_log_mask(LOG_GUEST_ERROR, | ||
102 | "ITS MAPD: invalid device table attributes " | ||
103 | @@ -XXX,XX +XXX,XX @@ static void extract_table_params(GICv3ITSState *s) | ||
104 | L1TABLE_ENTRY_SIZE) * | ||
105 | (page_sz / td->entry_sz)); | ||
106 | } | 27 | } |
107 | - td->num_ids = 1ULL << idbits; | 28 | - d[i] = nn ^ (mm & 2) << 14; |
108 | + td->num_entries = MIN(td->num_entries, 1ULL << idbits); | 29 | + if (mm & 2) { |
30 | + nn = float16_maybe_ah_chs(nn, fpcr_ah); | ||
31 | + } | ||
32 | + d[i] = nn; | ||
109 | } | 33 | } |
110 | } | 34 | } |
111 | 35 | ||
36 | void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc) | ||
37 | { | ||
38 | intptr_t i, opr_sz = simd_oprsz(desc) / 4; | ||
39 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
40 | uint32_t *d = vd, *n = vn, *m = vm; | ||
41 | for (i = 0; i < opr_sz; i += 1) { | ||
42 | uint32_t nn = n[i]; | ||
43 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc) | ||
44 | if (mm & 1) { | ||
45 | nn = float32_one; | ||
46 | } | ||
47 | - d[i] = nn ^ (mm & 2) << 30; | ||
48 | + if (mm & 2) { | ||
49 | + nn = float32_maybe_ah_chs(nn, fpcr_ah); | ||
50 | + } | ||
51 | + d[i] = nn; | ||
52 | } | ||
53 | } | ||
54 | |||
55 | void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc) | ||
56 | { | ||
57 | intptr_t i, opr_sz = simd_oprsz(desc) / 8; | ||
58 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
59 | uint64_t *d = vd, *n = vn, *m = vm; | ||
60 | for (i = 0; i < opr_sz; i += 1) { | ||
61 | uint64_t nn = n[i]; | ||
62 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc) | ||
63 | if (mm & 1) { | ||
64 | nn = float64_one; | ||
65 | } | ||
66 | - d[i] = nn ^ (mm & 2) << 62; | ||
67 | + if (mm & 2) { | ||
68 | + nn = float64_maybe_ah_chs(nn, fpcr_ah); | ||
69 | + } | ||
70 | + d[i] = nn; | ||
71 | } | ||
72 | } | ||
73 | |||
74 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/target/arm/tcg/translate-sve.c | ||
77 | +++ b/target/arm/tcg/translate-sve.c | ||
78 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2 * const fexpa_fns[4] = { | ||
79 | gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, | ||
80 | }; | ||
81 | TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, | ||
82 | - fexpa_fns[a->esz], a->rd, a->rn, 0) | ||
83 | + fexpa_fns[a->esz], a->rd, a->rn, s->fpcr_ah) | ||
84 | |||
85 | static gen_helper_gvec_3 * const ftssel_fns[4] = { | ||
86 | NULL, gen_helper_sve_ftssel_h, | ||
87 | gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, | ||
88 | }; | ||
89 | TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, | ||
90 | - ftssel_fns[a->esz], a, 0) | ||
91 | + ftssel_fns[a->esz], a, s->fpcr_ah) | ||
92 | |||
93 | /* | ||
94 | *** SVE Predicate Logical Operations Group | ||
112 | -- | 95 | -- |
113 | 2.25.1 | 96 | 2.34.1 |
114 | |||
115 | diff view generated by jsdifflib |
1 | The GICR_CTLR.CES bit is a read-only bit which is set to 1 to indicate | 1 | The negation step in the SVE FTMAD insn mustn't negate a NaN when |
---|---|---|---|
2 | that the GICR_CTLR.EnableLPIs bit can be written to 0 to disable | 2 | FPCR.AH is set. Pass FPCR.AH to the helper via the SIMD data field, |
3 | LPIs (as opposed to allowing LPIs to be enabled but not subsequently | 3 | so we can select the correct behaviour. |
4 | disabled). Our implementation permits this, so advertise it | 4 | |
5 | by setting CES to 1. | 5 | Because the operand is known to be negative, negating the operand |
6 | is the same as taking the absolute value. Defer this to the muladd | ||
7 | operation via flags, so that it happens after NaN detection, which | ||
8 | is correct for FPCR.AH. | ||
6 | 9 | ||
7 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
9 | Message-id: 20220122182444.724087-10-peter.maydell@linaro.org | ||
10 | --- | 12 | --- |
11 | hw/intc/gicv3_internal.h | 1 + | 13 | target/arm/tcg/sve_helper.c | 42 ++++++++++++++++++++++++++-------- |
12 | hw/intc/arm_gicv3_common.c | 4 ++++ | 14 | target/arm/tcg/translate-sve.c | 3 ++- |
13 | 2 files changed, 5 insertions(+) | 15 | 2 files changed, 35 insertions(+), 10 deletions(-) |
14 | 16 | ||
15 | diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h | 17 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c |
16 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/hw/intc/gicv3_internal.h | 19 | --- a/target/arm/tcg/sve_helper.c |
18 | +++ b/hw/intc/gicv3_internal.h | 20 | +++ b/target/arm/tcg/sve_helper.c |
19 | @@ -XXX,XX +XXX,XX @@ | 21 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm, |
20 | #define GICR_NSACR (GICR_SGI_OFFSET + 0x0E00) | 22 | 0x3c00, 0xb800, 0x293a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
21 | 23 | }; | |
22 | #define GICR_CTLR_ENABLE_LPIS (1U << 0) | 24 | intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float16); |
23 | +#define GICR_CTLR_CES (1U << 1) | 25 | - intptr_t x = simd_data(desc); |
24 | #define GICR_CTLR_RWP (1U << 3) | 26 | + intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); |
25 | #define GICR_CTLR_DPG0 (1U << 24) | 27 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); |
26 | #define GICR_CTLR_DPG1NS (1U << 25) | 28 | float16 *d = vd, *n = vn, *m = vm; |
27 | diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c | 29 | + |
30 | for (i = 0; i < opr_sz; i++) { | ||
31 | float16 mm = m[i]; | ||
32 | intptr_t xx = x; | ||
33 | + int flags = 0; | ||
34 | + | ||
35 | if (float16_is_neg(mm)) { | ||
36 | - mm = float16_abs(mm); | ||
37 | + if (fpcr_ah) { | ||
38 | + flags = float_muladd_negate_product; | ||
39 | + } else { | ||
40 | + mm = float16_abs(mm); | ||
41 | + } | ||
42 | xx += 8; | ||
43 | } | ||
44 | - d[i] = float16_muladd(n[i], mm, coeff[xx], 0, s); | ||
45 | + d[i] = float16_muladd(n[i], mm, coeff[xx], flags, s); | ||
46 | } | ||
47 | } | ||
48 | |||
49 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, | ||
50 | 0x37cd37cc, 0x00000000, 0x00000000, 0x00000000, | ||
51 | }; | ||
52 | intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float32); | ||
53 | - intptr_t x = simd_data(desc); | ||
54 | + intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); | ||
55 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); | ||
56 | float32 *d = vd, *n = vn, *m = vm; | ||
57 | + | ||
58 | for (i = 0; i < opr_sz; i++) { | ||
59 | float32 mm = m[i]; | ||
60 | intptr_t xx = x; | ||
61 | + int flags = 0; | ||
62 | + | ||
63 | if (float32_is_neg(mm)) { | ||
64 | - mm = float32_abs(mm); | ||
65 | + if (fpcr_ah) { | ||
66 | + flags = float_muladd_negate_product; | ||
67 | + } else { | ||
68 | + mm = float32_abs(mm); | ||
69 | + } | ||
70 | xx += 8; | ||
71 | } | ||
72 | - d[i] = float32_muladd(n[i], mm, coeff[xx], 0, s); | ||
73 | + d[i] = float32_muladd(n[i], mm, coeff[xx], flags, s); | ||
74 | } | ||
75 | } | ||
76 | |||
77 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, | ||
78 | 0x3e21ee96d2641b13ull, 0xbda8f76380fbb401ull, | ||
79 | }; | ||
80 | intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float64); | ||
81 | - intptr_t x = simd_data(desc); | ||
82 | + intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); | ||
83 | + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); | ||
84 | float64 *d = vd, *n = vn, *m = vm; | ||
85 | + | ||
86 | for (i = 0; i < opr_sz; i++) { | ||
87 | float64 mm = m[i]; | ||
88 | intptr_t xx = x; | ||
89 | + int flags = 0; | ||
90 | + | ||
91 | if (float64_is_neg(mm)) { | ||
92 | - mm = float64_abs(mm); | ||
93 | + if (fpcr_ah) { | ||
94 | + flags = float_muladd_negate_product; | ||
95 | + } else { | ||
96 | + mm = float64_abs(mm); | ||
97 | + } | ||
98 | xx += 8; | ||
99 | } | ||
100 | - d[i] = float64_muladd(n[i], mm, coeff[xx], 0, s); | ||
101 | + d[i] = float64_muladd(n[i], mm, coeff[xx], flags, s); | ||
102 | } | ||
103 | } | ||
104 | |||
105 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | 106 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/hw/intc/arm_gicv3_common.c | 107 | --- a/target/arm/tcg/translate-sve.c |
30 | +++ b/hw/intc/arm_gicv3_common.c | 108 | +++ b/target/arm/tcg/translate-sve.c |
31 | @@ -XXX,XX +XXX,XX @@ static void arm_gicv3_common_reset(DeviceState *dev) | 109 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { |
32 | 110 | gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, | |
33 | cs->level = 0; | 111 | }; |
34 | cs->gicr_ctlr = 0; | 112 | TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, |
35 | + if (s->lpi_enable) { | 113 | - ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm, |
36 | + /* Our implementation supports clearing GICR_CTLR.EnableLPIs */ | 114 | + ftmad_fns[a->esz], a->rd, a->rn, a->rm, |
37 | + cs->gicr_ctlr |= GICR_CTLR_CES; | 115 | + a->imm | (s->fpcr_ah << 3), |
38 | + } | 116 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) |
39 | cs->gicr_statusr[GICV3_S] = 0; | 117 | |
40 | cs->gicr_statusr[GICV3_NS] = 0; | 118 | /* |
41 | cs->gicr_waker = GICR_WAKER_ProcessorSleep | GICR_WAKER_ChildrenAsleep; | ||
42 | -- | 119 | -- |
43 | 2.25.1 | 120 | 2.34.1 |
44 | |||
45 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
1 | 2 | ||
3 | The negation step in FCMLA mustn't negate a NaN when FPCR.AH | ||
4 | is set. Handle this by passing FPCR.AH to the helper via the | ||
5 | SIMD data field, and use this to select whether to do the | ||
6 | negation via XOR or via the muladd negate_product flag. | ||
7 | |||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-id: 20250129013857.135256-26-richard.henderson@linaro.org | ||
10 | [PMM: Expanded commit message] | ||
11 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | --- | ||
14 | target/arm/tcg/translate-a64.c | 2 +- | ||
15 | target/arm/tcg/vec_helper.c | 66 ++++++++++++++++++++-------------- | ||
16 | 2 files changed, 40 insertions(+), 28 deletions(-) | ||
17 | |||
18 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/arm/tcg/translate-a64.c | ||
21 | +++ b/target/arm/tcg/translate-a64.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) | ||
23 | |||
24 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
25 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
26 | - a->rot, fn[a->esz]); | ||
27 | + a->rot | (s->fpcr_ah << 2), fn[a->esz]); | ||
28 | return true; | ||
29 | } | ||
30 | |||
31 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/target/arm/tcg/vec_helper.c | ||
34 | +++ b/target/arm/tcg/vec_helper.c | ||
35 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlah)(void *vd, void *vn, void *vm, void *va, | ||
36 | uintptr_t opr_sz = simd_oprsz(desc); | ||
37 | float16 *d = vd, *n = vn, *m = vm, *a = va; | ||
38 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
39 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
40 | - uint32_t neg_real = flip ^ neg_imag; | ||
41 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
42 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
43 | + uint32_t negf_real = flip ^ negf_imag; | ||
44 | + float16 negx_imag, negx_real; | ||
45 | uintptr_t i; | ||
46 | |||
47 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
48 | - neg_real <<= 15; | ||
49 | - neg_imag <<= 15; | ||
50 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
51 | + negx_real = (negf_real & ~fpcr_ah) << 15; | ||
52 | + negx_imag = (negf_imag & ~fpcr_ah) << 15; | ||
53 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
54 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
55 | |||
56 | for (i = 0; i < opr_sz / 2; i += 2) { | ||
57 | float16 e2 = n[H2(i + flip)]; | ||
58 | - float16 e1 = m[H2(i + flip)] ^ neg_real; | ||
59 | + float16 e1 = m[H2(i + flip)] ^ negx_real; | ||
60 | float16 e4 = e2; | ||
61 | - float16 e3 = m[H2(i + 1 - flip)] ^ neg_imag; | ||
62 | + float16 e3 = m[H2(i + 1 - flip)] ^ negx_imag; | ||
63 | |||
64 | - d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], 0, fpst); | ||
65 | - d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], 0, fpst); | ||
66 | + d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], negf_real, fpst); | ||
67 | + d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], negf_imag, fpst); | ||
68 | } | ||
69 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
70 | } | ||
71 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlas)(void *vd, void *vn, void *vm, void *va, | ||
72 | uintptr_t opr_sz = simd_oprsz(desc); | ||
73 | float32 *d = vd, *n = vn, *m = vm, *a = va; | ||
74 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
75 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
76 | - uint32_t neg_real = flip ^ neg_imag; | ||
77 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
78 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
79 | + uint32_t negf_real = flip ^ negf_imag; | ||
80 | + float32 negx_imag, negx_real; | ||
81 | uintptr_t i; | ||
82 | |||
83 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
84 | - neg_real <<= 31; | ||
85 | - neg_imag <<= 31; | ||
86 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
87 | + negx_real = (negf_real & ~fpcr_ah) << 31; | ||
88 | + negx_imag = (negf_imag & ~fpcr_ah) << 31; | ||
89 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
90 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
91 | |||
92 | for (i = 0; i < opr_sz / 4; i += 2) { | ||
93 | float32 e2 = n[H4(i + flip)]; | ||
94 | - float32 e1 = m[H4(i + flip)] ^ neg_real; | ||
95 | + float32 e1 = m[H4(i + flip)] ^ negx_real; | ||
96 | float32 e4 = e2; | ||
97 | - float32 e3 = m[H4(i + 1 - flip)] ^ neg_imag; | ||
98 | + float32 e3 = m[H4(i + 1 - flip)] ^ negx_imag; | ||
99 | |||
100 | - d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], 0, fpst); | ||
101 | - d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], 0, fpst); | ||
102 | + d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], negf_real, fpst); | ||
103 | + d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], negf_imag, fpst); | ||
104 | } | ||
105 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
106 | } | ||
107 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm, void *va, | ||
108 | uintptr_t opr_sz = simd_oprsz(desc); | ||
109 | float64 *d = vd, *n = vn, *m = vm, *a = va; | ||
110 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
111 | - uint64_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
112 | - uint64_t neg_real = flip ^ neg_imag; | ||
113 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
114 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
115 | + uint32_t negf_real = flip ^ negf_imag; | ||
116 | + float64 negx_real, negx_imag; | ||
117 | uintptr_t i; | ||
118 | |||
119 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
120 | - neg_real <<= 63; | ||
121 | - neg_imag <<= 63; | ||
122 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
123 | + negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63; | ||
124 | + negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63; | ||
125 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
126 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
127 | |||
128 | for (i = 0; i < opr_sz / 8; i += 2) { | ||
129 | float64 e2 = n[i + flip]; | ||
130 | - float64 e1 = m[i + flip] ^ neg_real; | ||
131 | + float64 e1 = m[i + flip] ^ negx_real; | ||
132 | float64 e4 = e2; | ||
133 | - float64 e3 = m[i + 1 - flip] ^ neg_imag; | ||
134 | + float64 e3 = m[i + 1 - flip] ^ negx_imag; | ||
135 | |||
136 | - d[i] = float64_muladd(e2, e1, a[i], 0, fpst); | ||
137 | - d[i + 1] = float64_muladd(e4, e3, a[i + 1], 0, fpst); | ||
138 | + d[i] = float64_muladd(e2, e1, a[i], negf_real, fpst); | ||
139 | + d[i + 1] = float64_muladd(e4, e3, a[i + 1], negf_imag, fpst); | ||
140 | } | ||
141 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
142 | } | ||
143 | -- | ||
144 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
1 | 2 | ||
3 | The negation step in FCMLA by index mustn't negate a NaN when | ||
4 | FPCR.AH is set. Use the same approach as vector FCMLA of | ||
5 | passing in FPCR.AH and using it to select whether to negate | ||
6 | by XOR or by the muladd negate_product flag. | ||
7 | |||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-id: 20250129013857.135256-27-richard.henderson@linaro.org | ||
10 | [PMM: Expanded commit message] | ||
11 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | --- | ||
14 | target/arm/tcg/translate-a64.c | 2 +- | ||
15 | target/arm/tcg/vec_helper.c | 44 ++++++++++++++++++++-------------- | ||
16 | 2 files changed, 27 insertions(+), 19 deletions(-) | ||
17 | |||
18 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/arm/tcg/translate-a64.c | ||
21 | +++ b/target/arm/tcg/translate-a64.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) | ||
23 | if (fp_access_check(s)) { | ||
24 | gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, | ||
25 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, | ||
26 | - (a->idx << 2) | a->rot, fn); | ||
27 | + (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn); | ||
28 | } | ||
29 | return true; | ||
30 | } | ||
31 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/target/arm/tcg/vec_helper.c | ||
34 | +++ b/target/arm/tcg/vec_helper.c | ||
35 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, void *va, | ||
36 | uintptr_t opr_sz = simd_oprsz(desc); | ||
37 | float16 *d = vd, *n = vn, *m = vm, *a = va; | ||
38 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
39 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
40 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
41 | intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2); | ||
42 | - uint32_t neg_real = flip ^ neg_imag; | ||
43 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1); | ||
44 | + uint32_t negf_real = flip ^ negf_imag; | ||
45 | intptr_t elements = opr_sz / sizeof(float16); | ||
46 | intptr_t eltspersegment = MIN(16 / sizeof(float16), elements); | ||
47 | + float16 negx_imag, negx_real; | ||
48 | intptr_t i, j; | ||
49 | |||
50 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
51 | - neg_real <<= 15; | ||
52 | - neg_imag <<= 15; | ||
53 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
54 | + negx_real = (negf_real & ~fpcr_ah) << 15; | ||
55 | + negx_imag = (negf_imag & ~fpcr_ah) << 15; | ||
56 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
57 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
58 | |||
59 | for (i = 0; i < elements; i += eltspersegment) { | ||
60 | float16 mr = m[H2(i + 2 * index + 0)]; | ||
61 | float16 mi = m[H2(i + 2 * index + 1)]; | ||
62 | - float16 e1 = neg_real ^ (flip ? mi : mr); | ||
63 | - float16 e3 = neg_imag ^ (flip ? mr : mi); | ||
64 | + float16 e1 = negx_real ^ (flip ? mi : mr); | ||
65 | + float16 e3 = negx_imag ^ (flip ? mr : mi); | ||
66 | |||
67 | for (j = i; j < i + eltspersegment; j += 2) { | ||
68 | float16 e2 = n[H2(j + flip)]; | ||
69 | float16 e4 = e2; | ||
70 | |||
71 | - d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], 0, fpst); | ||
72 | - d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], 0, fpst); | ||
73 | + d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], negf_real, fpst); | ||
74 | + d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], negf_imag, fpst); | ||
75 | } | ||
76 | } | ||
77 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
78 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, void *va, | ||
79 | uintptr_t opr_sz = simd_oprsz(desc); | ||
80 | float32 *d = vd, *n = vn, *m = vm, *a = va; | ||
81 | intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
82 | - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
83 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
84 | intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2); | ||
85 | - uint32_t neg_real = flip ^ neg_imag; | ||
86 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1); | ||
87 | + uint32_t negf_real = flip ^ negf_imag; | ||
88 | intptr_t elements = opr_sz / sizeof(float32); | ||
89 | intptr_t eltspersegment = MIN(16 / sizeof(float32), elements); | ||
90 | + float32 negx_imag, negx_real; | ||
91 | intptr_t i, j; | ||
92 | |||
93 | - /* Shift boolean to the sign bit so we can xor to negate. */ | ||
94 | - neg_real <<= 31; | ||
95 | - neg_imag <<= 31; | ||
96 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
97 | + negx_real = (negf_real & ~fpcr_ah) << 31; | ||
98 | + negx_imag = (negf_imag & ~fpcr_ah) << 31; | ||
99 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
100 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
101 | |||
102 | for (i = 0; i < elements; i += eltspersegment) { | ||
103 | float32 mr = m[H4(i + 2 * index + 0)]; | ||
104 | float32 mi = m[H4(i + 2 * index + 1)]; | ||
105 | - float32 e1 = neg_real ^ (flip ? mi : mr); | ||
106 | - float32 e3 = neg_imag ^ (flip ? mr : mi); | ||
107 | + float32 e1 = negx_real ^ (flip ? mi : mr); | ||
108 | + float32 e3 = negx_imag ^ (flip ? mr : mi); | ||
109 | |||
110 | for (j = i; j < i + eltspersegment; j += 2) { | ||
111 | float32 e2 = n[H4(j + flip)]; | ||
112 | float32 e4 = e2; | ||
113 | |||
114 | - d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], 0, fpst); | ||
115 | - d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], 0, fpst); | ||
116 | + d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], negf_real, fpst); | ||
117 | + d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], negf_imag, fpst); | ||
118 | } | ||
119 | } | ||
120 | clear_tail(d, opr_sz, simd_maxsz(desc)); | ||
121 | -- | ||
122 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
1 | 2 | ||
3 | The negation step in SVE FCMLA mustn't negate a NaN when FPCR.AH is | ||
4 | set. Use the same approach as we did for A64 FCMLA of passing in | ||
5 | FPCR.AH and using it to select whether to negate by XOR or by the | ||
6 | muladd negate_product flag. | ||
7 | |||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-id: 20250129013857.135256-28-richard.henderson@linaro.org | ||
10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | --- | ||
13 | target/arm/tcg/sve_helper.c | 69 +++++++++++++++++++++------------- | ||
14 | target/arm/tcg/translate-sve.c | 2 +- | ||
15 | 2 files changed, 43 insertions(+), 28 deletions(-) | ||
16 | |||
17 | diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/arm/tcg/sve_helper.c | ||
20 | +++ b/target/arm/tcg/sve_helper.c | ||
21 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
22 | void *vg, float_status *status, uint32_t desc) | ||
23 | { | ||
24 | intptr_t j, i = simd_oprsz(desc); | ||
25 | - unsigned rot = simd_data(desc); | ||
26 | - bool flip = rot & 1; | ||
27 | - float16 neg_imag, neg_real; | ||
28 | + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
29 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
30 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
31 | + uint32_t negf_real = flip ^ negf_imag; | ||
32 | + float16 negx_imag, negx_real; | ||
33 | uint64_t *g = vg; | ||
34 | |||
35 | - neg_imag = float16_set_sign(0, (rot & 2) != 0); | ||
36 | - neg_real = float16_set_sign(0, rot == 1 || rot == 2); | ||
37 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
38 | + negx_real = (negf_real & ~fpcr_ah) << 15; | ||
39 | + negx_imag = (negf_imag & ~fpcr_ah) << 15; | ||
40 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
41 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
42 | |||
43 | do { | ||
44 | uint64_t pg = g[(i - 1) >> 6]; | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, | ||
46 | mi = *(float16 *)(vm + H1_2(j)); | ||
47 | |||
48 | e2 = (flip ? ni : nr); | ||
49 | - e1 = (flip ? mi : mr) ^ neg_real; | ||
50 | + e1 = (flip ? mi : mr) ^ negx_real; | ||
51 | e4 = e2; | ||
52 | - e3 = (flip ? mr : mi) ^ neg_imag; | ||
53 | + e3 = (flip ? mr : mi) ^ negx_imag; | ||
54 | |||
55 | if (likely((pg >> (i & 63)) & 1)) { | ||
56 | d = *(float16 *)(va + H1_2(i)); | ||
57 | - d = float16_muladd(e2, e1, d, 0, status); | ||
58 | + d = float16_muladd(e2, e1, d, negf_real, status); | ||
59 | *(float16 *)(vd + H1_2(i)) = d; | ||
60 | } | ||
61 | if (likely((pg >> (j & 63)) & 1)) { | ||
62 | d = *(float16 *)(va + H1_2(j)); | ||
63 | - d = float16_muladd(e4, e3, d, 0, status); | ||
64 | + d = float16_muladd(e4, e3, d, negf_imag, status); | ||
65 | *(float16 *)(vd + H1_2(j)) = d; | ||
66 | } | ||
67 | } while (i & 63); | ||
68 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
69 | void *vg, float_status *status, uint32_t desc) | ||
70 | { | ||
71 | intptr_t j, i = simd_oprsz(desc); | ||
72 | - unsigned rot = simd_data(desc); | ||
73 | - bool flip = rot & 1; | ||
74 | - float32 neg_imag, neg_real; | ||
75 | + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
76 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
77 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
78 | + uint32_t negf_real = flip ^ negf_imag; | ||
79 | + float32 negx_imag, negx_real; | ||
80 | uint64_t *g = vg; | ||
81 | |||
82 | - neg_imag = float32_set_sign(0, (rot & 2) != 0); | ||
83 | - neg_real = float32_set_sign(0, rot == 1 || rot == 2); | ||
84 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
85 | + negx_real = (negf_real & ~fpcr_ah) << 31; | ||
86 | + negx_imag = (negf_imag & ~fpcr_ah) << 31; | ||
87 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
88 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
89 | |||
90 | do { | ||
91 | uint64_t pg = g[(i - 1) >> 6]; | ||
92 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, | ||
93 | mi = *(float32 *)(vm + H1_2(j)); | ||
94 | |||
95 | e2 = (flip ? ni : nr); | ||
96 | - e1 = (flip ? mi : mr) ^ neg_real; | ||
97 | + e1 = (flip ? mi : mr) ^ negx_real; | ||
98 | e4 = e2; | ||
99 | - e3 = (flip ? mr : mi) ^ neg_imag; | ||
100 | + e3 = (flip ? mr : mi) ^ negx_imag; | ||
101 | |||
102 | if (likely((pg >> (i & 63)) & 1)) { | ||
103 | d = *(float32 *)(va + H1_2(i)); | ||
104 | - d = float32_muladd(e2, e1, d, 0, status); | ||
105 | + d = float32_muladd(e2, e1, d, negf_real, status); | ||
106 | *(float32 *)(vd + H1_2(i)) = d; | ||
107 | } | ||
108 | if (likely((pg >> (j & 63)) & 1)) { | ||
109 | d = *(float32 *)(va + H1_2(j)); | ||
110 | - d = float32_muladd(e4, e3, d, 0, status); | ||
111 | + d = float32_muladd(e4, e3, d, negf_imag, status); | ||
112 | *(float32 *)(vd + H1_2(j)) = d; | ||
113 | } | ||
114 | } while (i & 63); | ||
115 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
116 | void *vg, float_status *status, uint32_t desc) | ||
117 | { | ||
118 | intptr_t j, i = simd_oprsz(desc); | ||
119 | - unsigned rot = simd_data(desc); | ||
120 | - bool flip = rot & 1; | ||
121 | - float64 neg_imag, neg_real; | ||
122 | + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
123 | + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); | ||
124 | + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
125 | + uint32_t negf_real = flip ^ negf_imag; | ||
126 | + float64 negx_imag, negx_real; | ||
127 | uint64_t *g = vg; | ||
128 | |||
129 | - neg_imag = float64_set_sign(0, (rot & 2) != 0); | ||
130 | - neg_real = float64_set_sign(0, rot == 1 || rot == 2); | ||
131 | + /* With AH=0, use negx; with AH=1 use negf. */ | ||
132 | + negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63; | ||
133 | + negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63; | ||
134 | + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); | ||
135 | + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); | ||
136 | |||
137 | do { | ||
138 | uint64_t pg = g[(i - 1) >> 6]; | ||
139 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, | ||
140 | mi = *(float64 *)(vm + H1_2(j)); | ||
141 | |||
142 | e2 = (flip ? ni : nr); | ||
143 | - e1 = (flip ? mi : mr) ^ neg_real; | ||
144 | + e1 = (flip ? mi : mr) ^ negx_real; | ||
145 | e4 = e2; | ||
146 | - e3 = (flip ? mr : mi) ^ neg_imag; | ||
147 | + e3 = (flip ? mr : mi) ^ negx_imag; | ||
148 | |||
149 | if (likely((pg >> (i & 63)) & 1)) { | ||
150 | d = *(float64 *)(va + H1_2(i)); | ||
151 | - d = float64_muladd(e2, e1, d, 0, status); | ||
152 | + d = float64_muladd(e2, e1, d, negf_real, status); | ||
153 | *(float64 *)(vd + H1_2(i)) = d; | ||
154 | } | ||
155 | if (likely((pg >> (j & 63)) & 1)) { | ||
156 | d = *(float64 *)(va + H1_2(j)); | ||
157 | - d = float64_muladd(e4, e3, d, 0, status); | ||
158 | + d = float64_muladd(e4, e3, d, negf_imag, status); | ||
159 | *(float64 *)(vd + H1_2(j)) = d; | ||
160 | } | ||
161 | } while (i & 63); | ||
162 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
163 | index XXXXXXX..XXXXXXX 100644 | ||
164 | --- a/target/arm/tcg/translate-sve.c | ||
165 | +++ b/target/arm/tcg/translate-sve.c | ||
166 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { | ||
167 | gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, | ||
168 | }; | ||
169 | TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], | ||
170 | - a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, | ||
171 | + a->rd, a->rn, a->rm, a->ra, a->pg, a->rot | (s->fpcr_ah << 2), | ||
172 | a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) | ||
173 | |||
174 | static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { | ||
175 | -- | ||
176 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Francisco Iglesias <francisco.iglesias@xilinx.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Add a model of Xilinx Versal's OSPI flash memory controller. | 3 | Handle FPCR.AH's requirement to not negate the sign of a NaN |
4 | in FMLSL by element and vector, using the usual trick of | ||
5 | negating by XOR when AH=0 and by muladd flags when AH=1. | ||
4 | 6 | ||
5 | Signed-off-by: Francisco Iglesias <francisco.iglesias@xilinx.com> | 7 | Since we have the CPUARMState* in the helper anyway, we can |
6 | Reviewed-by: Luc Michel <luc@lmichel.fr> | 8 | look directly at env->vfp.fpcr and don't need toa pass in the |
7 | Message-id: 20220121161141.14389-7-francisco.iglesias@xilinx.com | 9 | FPCR.AH value via the SIMD data word. |
8 | [PMM: fixed indent] | 10 | |
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Message-id: 20250129013857.135256-31-richard.henderson@linaro.org | ||
13 | [PMM: commit message tweaked] | ||
14 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | --- | 16 | --- |
11 | include/hw/ssi/xlnx-versal-ospi.h | 111 ++ | 17 | target/arm/tcg/vec_helper.c | 71 ++++++++++++++++++++++++------------- |
12 | hw/ssi/xlnx-versal-ospi.c | 1853 +++++++++++++++++++++++++++++ | 18 | 1 file changed, 46 insertions(+), 25 deletions(-) |
13 | hw/ssi/meson.build | 1 + | ||
14 | 3 files changed, 1965 insertions(+) | ||
15 | create mode 100644 include/hw/ssi/xlnx-versal-ospi.h | ||
16 | create mode 100644 hw/ssi/xlnx-versal-ospi.c | ||
17 | 19 | ||
18 | diff --git a/include/hw/ssi/xlnx-versal-ospi.h b/include/hw/ssi/xlnx-versal-ospi.h | 20 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
19 | new file mode 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
20 | index XXXXXXX..XXXXXXX | 22 | --- a/target/arm/tcg/vec_helper.c |
21 | --- /dev/null | 23 | +++ b/target/arm/tcg/vec_helper.c |
22 | +++ b/include/hw/ssi/xlnx-versal-ospi.h | 24 | @@ -XXX,XX +XXX,XX @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2) |
23 | @@ -XXX,XX +XXX,XX @@ | 25 | */ |
24 | +/* | 26 | |
25 | + * Header file for the Xilinx Versal's OSPI controller | 27 | static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, |
26 | + * | 28 | - uint32_t desc, bool fz16) |
27 | + * Copyright (C) 2021 Xilinx Inc | 29 | + uint64_t negx, int negf, uint32_t desc, bool fz16) |
28 | + * Written by Francisco Iglesias <francisco.iglesias@xilinx.com> | 30 | { |
29 | + * | 31 | intptr_t i, oprsz = simd_oprsz(desc); |
30 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | 32 | - int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
31 | + * of this software and associated documentation files (the "Software"), to deal | 33 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
32 | + * in the Software without restriction, including without limitation the rights | 34 | int is_q = oprsz == 16; |
33 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | 35 | uint64_t n_4, m_4; |
34 | + * copies of the Software, and to permit persons to whom the Software is | 36 | |
35 | + * furnished to do so, subject to the following conditions: | 37 | - /* Pre-load all of the f16 data, avoiding overlap issues. */ |
36 | + * | 38 | - n_4 = load4_f16(vn, is_q, is_2); |
37 | + * The above copyright notice and this permission notice shall be included in | 39 | + /* |
38 | + * all copies or substantial portions of the Software. | 40 | + * Pre-load all of the f16 data, avoiding overlap issues. |
39 | + * | 41 | + * Negate all inputs for AH=0 FMLSL at once. |
40 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 42 | + */ |
41 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 43 | + n_4 = load4_f16(vn, is_q, is_2) ^ negx; |
42 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | 44 | m_4 = load4_f16(vm, is_q, is_2); |
43 | + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 45 | |
44 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 46 | - /* Negate all inputs for FMLSL at once. */ |
45 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | 47 | - if (is_s) { |
46 | + * THE SOFTWARE. | 48 | - n_4 ^= 0x8000800080008000ull; |
47 | + */ | 49 | - } |
50 | - | ||
51 | for (i = 0; i < oprsz / 4; i++) { | ||
52 | float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); | ||
53 | float32 m_1 = float16_to_float32_by_bits(m_4 >> (i * 16), fz16); | ||
54 | - d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst); | ||
55 | + d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst); | ||
56 | } | ||
57 | clear_tail(d, oprsz, simd_maxsz(desc)); | ||
58 | } | ||
59 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, | ||
60 | void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
61 | CPUARMState *env, uint32_t desc) | ||
62 | { | ||
63 | - do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, desc, | ||
64 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
65 | + uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
48 | + | 66 | + |
49 | +/* | 67 | + do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, |
50 | + * This is a model of Xilinx Versal's Octal SPI flash memory controller | 68 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); |
51 | + * documented in Versal's Technical Reference manual [1] and the Versal ACAP | 69 | } |
52 | + * Register reference [2]. | 70 | |
53 | + * | 71 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
54 | + * References: | 72 | CPUARMState *env, uint32_t desc) |
55 | + * | 73 | { |
56 | + * [1] Versal ACAP Technical Reference Manual, | 74 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, desc, |
57 | + * https://www.xilinx.com/support/documentation/architecture-manuals/am011-versal-acap-trm.pdf | 75 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
58 | + * | 76 | + uint64_t negx = 0; |
59 | + * [2] Versal ACAP Register Reference, | 77 | + int negf = 0; |
60 | + * https://www.xilinx.com/html_docs/registers/am012/am012-versal-register-reference.html#mod___ospi.html | ||
61 | + * | ||
62 | + * | ||
63 | + * QEMU interface: | ||
64 | + * + sysbus MMIO region 0: MemoryRegion for the device's registers | ||
65 | + * + sysbus MMIO region 1: MemoryRegion for flash memory linear address space | ||
66 | + * (data transfer). | ||
67 | + * + sysbus IRQ 0: Device interrupt. | ||
68 | + * + Named GPIO input "ospi-mux-sel": 0: enables indirect access mode | ||
69 | + * and 1: enables direct access mode. | ||
70 | + * + Property "dac-with-indac": Allow both direct accesses and indirect | ||
71 | + * accesses simultaneously. | ||
72 | + * + Property "indac-write-disabled": Disable indirect access writes. | ||
73 | + */ | ||
74 | + | 78 | + |
75 | +#ifndef XILINX_VERSAL_OSPI_H | 79 | + if (is_s) { |
76 | +#define XILINX_VERSAL_OSPI_H | 80 | + if (env->vfp.fpcr & FPCR_AH) { |
77 | + | 81 | + negf = float_muladd_negate_product; |
78 | +#include "hw/register.h" | 82 | + } else { |
79 | +#include "hw/ssi/ssi.h" | 83 | + negx = 0x8000800080008000ull; |
80 | +#include "qemu/fifo8.h" | ||
81 | +#include "hw/dma/xlnx_csu_dma.h" | ||
82 | + | ||
83 | +#define TYPE_XILINX_VERSAL_OSPI "xlnx.versal-ospi" | ||
84 | + | ||
85 | +OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalOspi, XILINX_VERSAL_OSPI) | ||
86 | + | ||
87 | +#define XILINX_VERSAL_OSPI_R_MAX (0xfc / 4 + 1) | ||
88 | + | ||
89 | +/* | ||
90 | + * Indirect operations | ||
91 | + */ | ||
92 | +typedef struct IndOp { | ||
93 | + uint32_t flash_addr; | ||
94 | + uint32_t num_bytes; | ||
95 | + uint32_t done_bytes; | ||
96 | + bool completed; | ||
97 | +} IndOp; | ||
98 | + | ||
99 | +struct XlnxVersalOspi { | ||
100 | + SysBusDevice parent_obj; | ||
101 | + | ||
102 | + MemoryRegion iomem; | ||
103 | + MemoryRegion iomem_dac; | ||
104 | + | ||
105 | + uint8_t num_cs; | ||
106 | + qemu_irq *cs_lines; | ||
107 | + | ||
108 | + SSIBus *spi; | ||
109 | + | ||
110 | + Fifo8 rx_fifo; | ||
111 | + Fifo8 tx_fifo; | ||
112 | + | ||
113 | + Fifo8 rx_sram; | ||
114 | + Fifo8 tx_sram; | ||
115 | + | ||
116 | + qemu_irq irq; | ||
117 | + | ||
118 | + XlnxCSUDMA *dma_src; | ||
119 | + bool ind_write_disabled; | ||
120 | + bool dac_with_indac; | ||
121 | + bool dac_enable; | ||
122 | + bool src_dma_inprog; | ||
123 | + | ||
124 | + IndOp rd_ind_op[2]; | ||
125 | + IndOp wr_ind_op[2]; | ||
126 | + | ||
127 | + uint32_t regs[XILINX_VERSAL_OSPI_R_MAX]; | ||
128 | + RegisterInfo regs_info[XILINX_VERSAL_OSPI_R_MAX]; | ||
129 | + | ||
130 | + /* Maximum inferred membank size is 512 bytes */ | ||
131 | + uint8_t stig_membank[512]; | ||
132 | +}; | ||
133 | + | ||
134 | +#endif /* XILINX_VERSAL_OSPI_H */ | ||
135 | diff --git a/hw/ssi/xlnx-versal-ospi.c b/hw/ssi/xlnx-versal-ospi.c | ||
136 | new file mode 100644 | ||
137 | index XXXXXXX..XXXXXXX | ||
138 | --- /dev/null | ||
139 | +++ b/hw/ssi/xlnx-versal-ospi.c | ||
140 | @@ -XXX,XX +XXX,XX @@ | ||
141 | +/* | ||
142 | + * QEMU model of Xilinx Versal's OSPI controller. | ||
143 | + * | ||
144 | + * Copyright (c) 2021 Xilinx Inc. | ||
145 | + * Written by Francisco Iglesias <francisco.iglesias@xilinx.com> | ||
146 | + * | ||
147 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
148 | + * of this software and associated documentation files (the "Software"), to deal | ||
149 | + * in the Software without restriction, including without limitation the rights | ||
150 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
151 | + * copies of the Software, and to permit persons to whom the Software is | ||
152 | + * furnished to do so, subject to the following conditions: | ||
153 | + * | ||
154 | + * The above copyright notice and this permission notice shall be included in | ||
155 | + * all copies or substantial portions of the Software. | ||
156 | + * | ||
157 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
158 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
159 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
160 | + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
161 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
162 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
163 | + * THE SOFTWARE. | ||
164 | + */ | ||
165 | +#include "qemu/osdep.h" | ||
166 | +#include "hw/sysbus.h" | ||
167 | +#include "migration/vmstate.h" | ||
168 | +#include "hw/qdev-properties.h" | ||
169 | +#include "qemu/bitops.h" | ||
170 | +#include "qemu/log.h" | ||
171 | +#include "hw/irq.h" | ||
172 | +#include "hw/ssi/xlnx-versal-ospi.h" | ||
173 | + | ||
174 | +#ifndef XILINX_VERSAL_OSPI_ERR_DEBUG | ||
175 | +#define XILINX_VERSAL_OSPI_ERR_DEBUG 0 | ||
176 | +#endif | ||
177 | + | ||
178 | +REG32(CONFIG_REG, 0x0) | ||
179 | + FIELD(CONFIG_REG, IDLE_FLD, 31, 1) | ||
180 | + FIELD(CONFIG_REG, DUAL_BYTE_OPCODE_EN_FLD, 30, 1) | ||
181 | + FIELD(CONFIG_REG, CRC_ENABLE_FLD, 29, 1) | ||
182 | + FIELD(CONFIG_REG, CONFIG_RESV2_FLD, 26, 3) | ||
183 | + FIELD(CONFIG_REG, PIPELINE_PHY_FLD, 25, 1) | ||
184 | + FIELD(CONFIG_REG, ENABLE_DTR_PROTOCOL_FLD, 24, 1) | ||
185 | + FIELD(CONFIG_REG, ENABLE_AHB_DECODER_FLD, 23, 1) | ||
186 | + FIELD(CONFIG_REG, MSTR_BAUD_DIV_FLD, 19, 4) | ||
187 | + FIELD(CONFIG_REG, ENTER_XIP_MODE_IMM_FLD, 18, 1) | ||
188 | + FIELD(CONFIG_REG, ENTER_XIP_MODE_FLD, 17, 1) | ||
189 | + FIELD(CONFIG_REG, ENB_AHB_ADDR_REMAP_FLD, 16, 1) | ||
190 | + FIELD(CONFIG_REG, ENB_DMA_IF_FLD, 15, 1) | ||
191 | + FIELD(CONFIG_REG, WR_PROT_FLASH_FLD, 14, 1) | ||
192 | + FIELD(CONFIG_REG, PERIPH_CS_LINES_FLD, 10, 4) | ||
193 | + FIELD(CONFIG_REG, PERIPH_SEL_DEC_FLD, 9, 1) | ||
194 | + FIELD(CONFIG_REG, ENB_LEGACY_IP_MODE_FLD, 8, 1) | ||
195 | + FIELD(CONFIG_REG, ENB_DIR_ACC_CTLR_FLD, 7, 1) | ||
196 | + FIELD(CONFIG_REG, RESET_CFG_FLD, 6, 1) | ||
197 | + FIELD(CONFIG_REG, RESET_PIN_FLD, 5, 1) | ||
198 | + FIELD(CONFIG_REG, HOLD_PIN_FLD, 4, 1) | ||
199 | + FIELD(CONFIG_REG, PHY_MODE_ENABLE_FLD, 3, 1) | ||
200 | + FIELD(CONFIG_REG, SEL_CLK_PHASE_FLD, 2, 1) | ||
201 | + FIELD(CONFIG_REG, SEL_CLK_POL_FLD, 1, 1) | ||
202 | + FIELD(CONFIG_REG, ENB_SPI_FLD, 0, 1) | ||
203 | +REG32(DEV_INSTR_RD_CONFIG_REG, 0x4) | ||
204 | + FIELD(DEV_INSTR_RD_CONFIG_REG, RD_INSTR_RESV5_FLD, 29, 3) | ||
205 | + FIELD(DEV_INSTR_RD_CONFIG_REG, DUMMY_RD_CLK_CYCLES_FLD, 24, 5) | ||
206 | + FIELD(DEV_INSTR_RD_CONFIG_REG, RD_INSTR_RESV4_FLD, 21, 3) | ||
207 | + FIELD(DEV_INSTR_RD_CONFIG_REG, MODE_BIT_ENABLE_FLD, 20, 1) | ||
208 | + FIELD(DEV_INSTR_RD_CONFIG_REG, RD_INSTR_RESV3_FLD, 18, 2) | ||
209 | + FIELD(DEV_INSTR_RD_CONFIG_REG, DATA_XFER_TYPE_EXT_MODE_FLD, 16, 2) | ||
210 | + FIELD(DEV_INSTR_RD_CONFIG_REG, RD_INSTR_RESV2_FLD, 14, 2) | ||
211 | + FIELD(DEV_INSTR_RD_CONFIG_REG, ADDR_XFER_TYPE_STD_MODE_FLD, 12, 2) | ||
212 | + FIELD(DEV_INSTR_RD_CONFIG_REG, PRED_DIS_FLD, 11, 1) | ||
213 | + FIELD(DEV_INSTR_RD_CONFIG_REG, DDR_EN_FLD, 10, 1) | ||
214 | + FIELD(DEV_INSTR_RD_CONFIG_REG, INSTR_TYPE_FLD, 8, 2) | ||
215 | + FIELD(DEV_INSTR_RD_CONFIG_REG, RD_OPCODE_NON_XIP_FLD, 0, 8) | ||
216 | +REG32(DEV_INSTR_WR_CONFIG_REG, 0x8) | ||
217 | + FIELD(DEV_INSTR_WR_CONFIG_REG, WR_INSTR_RESV4_FLD, 29, 3) | ||
218 | + FIELD(DEV_INSTR_WR_CONFIG_REG, DUMMY_WR_CLK_CYCLES_FLD, 24, 5) | ||
219 | + FIELD(DEV_INSTR_WR_CONFIG_REG, WR_INSTR_RESV3_FLD, 18, 6) | ||
220 | + FIELD(DEV_INSTR_WR_CONFIG_REG, DATA_XFER_TYPE_EXT_MODE_FLD, 16, 2) | ||
221 | + FIELD(DEV_INSTR_WR_CONFIG_REG, WR_INSTR_RESV2_FLD, 14, 2) | ||
222 | + FIELD(DEV_INSTR_WR_CONFIG_REG, ADDR_XFER_TYPE_STD_MODE_FLD, 12, 2) | ||
223 | + FIELD(DEV_INSTR_WR_CONFIG_REG, WR_INSTR_RESV1_FLD, 9, 3) | ||
224 | + FIELD(DEV_INSTR_WR_CONFIG_REG, WEL_DIS_FLD, 8, 1) | ||
225 | + FIELD(DEV_INSTR_WR_CONFIG_REG, WR_OPCODE_FLD, 0, 8) | ||
226 | +REG32(DEV_DELAY_REG, 0xc) | ||
227 | + FIELD(DEV_DELAY_REG, D_NSS_FLD, 24, 8) | ||
228 | + FIELD(DEV_DELAY_REG, D_BTWN_FLD, 16, 8) | ||
229 | + FIELD(DEV_DELAY_REG, D_AFTER_FLD, 8, 8) | ||
230 | + FIELD(DEV_DELAY_REG, D_INIT_FLD, 0, 8) | ||
231 | +REG32(RD_DATA_CAPTURE_REG, 0x10) | ||
232 | + FIELD(RD_DATA_CAPTURE_REG, RD_DATA_RESV3_FLD, 20, 12) | ||
233 | + FIELD(RD_DATA_CAPTURE_REG, DDR_READ_DELAY_FLD, 16, 4) | ||
234 | + FIELD(RD_DATA_CAPTURE_REG, RD_DATA_RESV2_FLD, 9, 7) | ||
235 | + FIELD(RD_DATA_CAPTURE_REG, DQS_ENABLE_FLD, 8, 1) | ||
236 | + FIELD(RD_DATA_CAPTURE_REG, RD_DATA_RESV1_FLD, 6, 2) | ||
237 | + FIELD(RD_DATA_CAPTURE_REG, SAMPLE_EDGE_SEL_FLD, 5, 1) | ||
238 | + FIELD(RD_DATA_CAPTURE_REG, DELAY_FLD, 1, 4) | ||
239 | + FIELD(RD_DATA_CAPTURE_REG, BYPASS_FLD, 0, 1) | ||
240 | +REG32(DEV_SIZE_CONFIG_REG, 0x14) | ||
241 | + FIELD(DEV_SIZE_CONFIG_REG, DEV_SIZE_RESV_FLD, 29, 3) | ||
242 | + FIELD(DEV_SIZE_CONFIG_REG, MEM_SIZE_ON_CS3_FLD, 27, 2) | ||
243 | + FIELD(DEV_SIZE_CONFIG_REG, MEM_SIZE_ON_CS2_FLD, 25, 2) | ||
244 | + FIELD(DEV_SIZE_CONFIG_REG, MEM_SIZE_ON_CS1_FLD, 23, 2) | ||
245 | + FIELD(DEV_SIZE_CONFIG_REG, MEM_SIZE_ON_CS0_FLD, 21, 2) | ||
246 | + FIELD(DEV_SIZE_CONFIG_REG, BYTES_PER_SUBSECTOR_FLD, 16, 5) | ||
247 | + FIELD(DEV_SIZE_CONFIG_REG, BYTES_PER_DEVICE_PAGE_FLD, 4, 12) | ||
248 | + FIELD(DEV_SIZE_CONFIG_REG, NUM_ADDR_BYTES_FLD, 0, 4) | ||
249 | +REG32(SRAM_PARTITION_CFG_REG, 0x18) | ||
250 | + FIELD(SRAM_PARTITION_CFG_REG, SRAM_PARTITION_RESV_FLD, 8, 24) | ||
251 | + FIELD(SRAM_PARTITION_CFG_REG, ADDR_FLD, 0, 8) | ||
252 | +REG32(IND_AHB_ADDR_TRIGGER_REG, 0x1c) | ||
253 | +REG32(DMA_PERIPH_CONFIG_REG, 0x20) | ||
254 | + FIELD(DMA_PERIPH_CONFIG_REG, DMA_PERIPH_RESV2_FLD, 12, 20) | ||
255 | + FIELD(DMA_PERIPH_CONFIG_REG, NUM_BURST_REQ_BYTES_FLD, 8, 4) | ||
256 | + FIELD(DMA_PERIPH_CONFIG_REG, DMA_PERIPH_RESV1_FLD, 4, 4) | ||
257 | + FIELD(DMA_PERIPH_CONFIG_REG, NUM_SINGLE_REQ_BYTES_FLD, 0, 4) | ||
258 | +REG32(REMAP_ADDR_REG, 0x24) | ||
259 | +REG32(MODE_BIT_CONFIG_REG, 0x28) | ||
260 | + FIELD(MODE_BIT_CONFIG_REG, RX_CRC_DATA_LOW_FLD, 24, 8) | ||
261 | + FIELD(MODE_BIT_CONFIG_REG, RX_CRC_DATA_UP_FLD, 16, 8) | ||
262 | + FIELD(MODE_BIT_CONFIG_REG, CRC_OUT_ENABLE_FLD, 15, 1) | ||
263 | + FIELD(MODE_BIT_CONFIG_REG, MODE_BIT_RESV1_FLD, 11, 4) | ||
264 | + FIELD(MODE_BIT_CONFIG_REG, CHUNK_SIZE_FLD, 8, 3) | ||
265 | + FIELD(MODE_BIT_CONFIG_REG, MODE_FLD, 0, 8) | ||
266 | +REG32(SRAM_FILL_REG, 0x2c) | ||
267 | + FIELD(SRAM_FILL_REG, SRAM_FILL_INDAC_WRITE_FLD, 16, 16) | ||
268 | + FIELD(SRAM_FILL_REG, SRAM_FILL_INDAC_READ_FLD, 0, 16) | ||
269 | +REG32(TX_THRESH_REG, 0x30) | ||
270 | + FIELD(TX_THRESH_REG, TX_THRESH_RESV_FLD, 5, 27) | ||
271 | + FIELD(TX_THRESH_REG, LEVEL_FLD, 0, 5) | ||
272 | +REG32(RX_THRESH_REG, 0x34) | ||
273 | + FIELD(RX_THRESH_REG, RX_THRESH_RESV_FLD, 5, 27) | ||
274 | + FIELD(RX_THRESH_REG, LEVEL_FLD, 0, 5) | ||
275 | +REG32(WRITE_COMPLETION_CTRL_REG, 0x38) | ||
276 | + FIELD(WRITE_COMPLETION_CTRL_REG, POLL_REP_DELAY_FLD, 24, 8) | ||
277 | + FIELD(WRITE_COMPLETION_CTRL_REG, POLL_COUNT_FLD, 16, 8) | ||
278 | + FIELD(WRITE_COMPLETION_CTRL_REG, ENABLE_POLLING_EXP_FLD, 15, 1) | ||
279 | + FIELD(WRITE_COMPLETION_CTRL_REG, DISABLE_POLLING_FLD, 14, 1) | ||
280 | + FIELD(WRITE_COMPLETION_CTRL_REG, POLLING_POLARITY_FLD, 13, 1) | ||
281 | + FIELD(WRITE_COMPLETION_CTRL_REG, WR_COMP_CTRL_RESV1_FLD, 12, 1) | ||
282 | + FIELD(WRITE_COMPLETION_CTRL_REG, POLLING_ADDR_EN_FLD, 11, 1) | ||
283 | + FIELD(WRITE_COMPLETION_CTRL_REG, POLLING_BIT_INDEX_FLD, 8, 3) | ||
284 | + FIELD(WRITE_COMPLETION_CTRL_REG, OPCODE_FLD, 0, 8) | ||
285 | +REG32(NO_OF_POLLS_BEF_EXP_REG, 0x3c) | ||
286 | +REG32(IRQ_STATUS_REG, 0x40) | ||
287 | + FIELD(IRQ_STATUS_REG, IRQ_STAT_RESV_FLD, 20, 12) | ||
288 | + FIELD(IRQ_STATUS_REG, ECC_FAIL_FLD, 19, 1) | ||
289 | + FIELD(IRQ_STATUS_REG, TX_CRC_CHUNK_BRK_FLD, 18, 1) | ||
290 | + FIELD(IRQ_STATUS_REG, RX_CRC_DATA_VAL_FLD, 17, 1) | ||
291 | + FIELD(IRQ_STATUS_REG, RX_CRC_DATA_ERR_FLD, 16, 1) | ||
292 | + FIELD(IRQ_STATUS_REG, IRQ_STAT_RESV1_FLD, 15, 1) | ||
293 | + FIELD(IRQ_STATUS_REG, STIG_REQ_INT_FLD, 14, 1) | ||
294 | + FIELD(IRQ_STATUS_REG, POLL_EXP_INT_FLD, 13, 1) | ||
295 | + FIELD(IRQ_STATUS_REG, INDRD_SRAM_FULL_FLD, 12, 1) | ||
296 | + FIELD(IRQ_STATUS_REG, RX_FIFO_FULL_FLD, 11, 1) | ||
297 | + FIELD(IRQ_STATUS_REG, RX_FIFO_NOT_EMPTY_FLD, 10, 1) | ||
298 | + FIELD(IRQ_STATUS_REG, TX_FIFO_FULL_FLD, 9, 1) | ||
299 | + FIELD(IRQ_STATUS_REG, TX_FIFO_NOT_FULL_FLD, 8, 1) | ||
300 | + FIELD(IRQ_STATUS_REG, RECV_OVERFLOW_FLD, 7, 1) | ||
301 | + FIELD(IRQ_STATUS_REG, INDIRECT_XFER_LEVEL_BREACH_FLD, 6, 1) | ||
302 | + FIELD(IRQ_STATUS_REG, ILLEGAL_ACCESS_DET_FLD, 5, 1) | ||
303 | + FIELD(IRQ_STATUS_REG, PROT_WR_ATTEMPT_FLD, 4, 1) | ||
304 | + FIELD(IRQ_STATUS_REG, INDIRECT_TRANSFER_REJECT_FLD, 3, 1) | ||
305 | + FIELD(IRQ_STATUS_REG, INDIRECT_OP_DONE_FLD, 2, 1) | ||
306 | + FIELD(IRQ_STATUS_REG, UNDERFLOW_DET_FLD, 1, 1) | ||
307 | + FIELD(IRQ_STATUS_REG, MODE_M_FAIL_FLD, 0, 1) | ||
308 | +REG32(IRQ_MASK_REG, 0x44) | ||
309 | + FIELD(IRQ_MASK_REG, IRQ_MASK_RESV_FLD, 20, 12) | ||
310 | + FIELD(IRQ_MASK_REG, ECC_FAIL_MASK_FLD, 19, 1) | ||
311 | + FIELD(IRQ_MASK_REG, TX_CRC_CHUNK_BRK_MASK_FLD, 18, 1) | ||
312 | + FIELD(IRQ_MASK_REG, RX_CRC_DATA_VAL_MASK_FLD, 17, 1) | ||
313 | + FIELD(IRQ_MASK_REG, RX_CRC_DATA_ERR_MASK_FLD, 16, 1) | ||
314 | + FIELD(IRQ_MASK_REG, IRQ_MASK_RESV1_FLD, 15, 1) | ||
315 | + FIELD(IRQ_MASK_REG, STIG_REQ_MASK_FLD, 14, 1) | ||
316 | + FIELD(IRQ_MASK_REG, POLL_EXP_INT_MASK_FLD, 13, 1) | ||
317 | + FIELD(IRQ_MASK_REG, INDRD_SRAM_FULL_MASK_FLD, 12, 1) | ||
318 | + FIELD(IRQ_MASK_REG, RX_FIFO_FULL_MASK_FLD, 11, 1) | ||
319 | + FIELD(IRQ_MASK_REG, RX_FIFO_NOT_EMPTY_MASK_FLD, 10, 1) | ||
320 | + FIELD(IRQ_MASK_REG, TX_FIFO_FULL_MASK_FLD, 9, 1) | ||
321 | + FIELD(IRQ_MASK_REG, TX_FIFO_NOT_FULL_MASK_FLD, 8, 1) | ||
322 | + FIELD(IRQ_MASK_REG, RECV_OVERFLOW_MASK_FLD, 7, 1) | ||
323 | + FIELD(IRQ_MASK_REG, INDIRECT_XFER_LEVEL_BREACH_MASK_FLD, 6, 1) | ||
324 | + FIELD(IRQ_MASK_REG, ILLEGAL_ACCESS_DET_MASK_FLD, 5, 1) | ||
325 | + FIELD(IRQ_MASK_REG, PROT_WR_ATTEMPT_MASK_FLD, 4, 1) | ||
326 | + FIELD(IRQ_MASK_REG, INDIRECT_TRANSFER_REJECT_MASK_FLD, 3, 1) | ||
327 | + FIELD(IRQ_MASK_REG, INDIRECT_OP_DONE_MASK_FLD, 2, 1) | ||
328 | + FIELD(IRQ_MASK_REG, UNDERFLOW_DET_MASK_FLD, 1, 1) | ||
329 | + FIELD(IRQ_MASK_REG, MODE_M_FAIL_MASK_FLD, 0, 1) | ||
330 | +REG32(LOWER_WR_PROT_REG, 0x50) | ||
331 | +REG32(UPPER_WR_PROT_REG, 0x54) | ||
332 | +REG32(WR_PROT_CTRL_REG, 0x58) | ||
333 | + FIELD(WR_PROT_CTRL_REG, WR_PROT_CTRL_RESV_FLD, 2, 30) | ||
334 | + FIELD(WR_PROT_CTRL_REG, ENB_FLD, 1, 1) | ||
335 | + FIELD(WR_PROT_CTRL_REG, INV_FLD, 0, 1) | ||
336 | +REG32(INDIRECT_READ_XFER_CTRL_REG, 0x60) | ||
337 | + FIELD(INDIRECT_READ_XFER_CTRL_REG, INDIR_RD_XFER_RESV_FLD, 8, 24) | ||
338 | + FIELD(INDIRECT_READ_XFER_CTRL_REG, NUM_IND_OPS_DONE_FLD, 6, 2) | ||
339 | + FIELD(INDIRECT_READ_XFER_CTRL_REG, IND_OPS_DONE_STATUS_FLD, 5, 1) | ||
340 | + FIELD(INDIRECT_READ_XFER_CTRL_REG, RD_QUEUED_FLD, 4, 1) | ||
341 | + FIELD(INDIRECT_READ_XFER_CTRL_REG, SRAM_FULL_FLD, 3, 1) | ||
342 | + FIELD(INDIRECT_READ_XFER_CTRL_REG, RD_STATUS_FLD, 2, 1) | ||
343 | + FIELD(INDIRECT_READ_XFER_CTRL_REG, CANCEL_FLD, 1, 1) | ||
344 | + FIELD(INDIRECT_READ_XFER_CTRL_REG, START_FLD, 0, 1) | ||
345 | +REG32(INDIRECT_READ_XFER_WATERMARK_REG, 0x64) | ||
346 | +REG32(INDIRECT_READ_XFER_START_REG, 0x68) | ||
347 | +REG32(INDIRECT_READ_XFER_NUM_BYTES_REG, 0x6c) | ||
348 | +REG32(INDIRECT_WRITE_XFER_CTRL_REG, 0x70) | ||
349 | + FIELD(INDIRECT_WRITE_XFER_CTRL_REG, INDIR_WR_XFER_RESV2_FLD, 8, 24) | ||
350 | + FIELD(INDIRECT_WRITE_XFER_CTRL_REG, NUM_IND_OPS_DONE_FLD, 6, 2) | ||
351 | + FIELD(INDIRECT_WRITE_XFER_CTRL_REG, IND_OPS_DONE_STATUS_FLD, 5, 1) | ||
352 | + FIELD(INDIRECT_WRITE_XFER_CTRL_REG, WR_QUEUED_FLD, 4, 1) | ||
353 | + FIELD(INDIRECT_WRITE_XFER_CTRL_REG, INDIR_WR_XFER_RESV1_FLD, 3, 1) | ||
354 | + FIELD(INDIRECT_WRITE_XFER_CTRL_REG, WR_STATUS_FLD, 2, 1) | ||
355 | + FIELD(INDIRECT_WRITE_XFER_CTRL_REG, CANCEL_FLD, 1, 1) | ||
356 | + FIELD(INDIRECT_WRITE_XFER_CTRL_REG, START_FLD, 0, 1) | ||
357 | +REG32(INDIRECT_WRITE_XFER_WATERMARK_REG, 0x74) | ||
358 | +REG32(INDIRECT_WRITE_XFER_START_REG, 0x78) | ||
359 | +REG32(INDIRECT_WRITE_XFER_NUM_BYTES_REG, 0x7c) | ||
360 | +REG32(INDIRECT_TRIGGER_ADDR_RANGE_REG, 0x80) | ||
361 | + FIELD(INDIRECT_TRIGGER_ADDR_RANGE_REG, IND_RANGE_RESV1_FLD, 4, 28) | ||
362 | + FIELD(INDIRECT_TRIGGER_ADDR_RANGE_REG, IND_RANGE_WIDTH_FLD, 0, 4) | ||
363 | +REG32(FLASH_COMMAND_CTRL_MEM_REG, 0x8c) | ||
364 | + FIELD(FLASH_COMMAND_CTRL_MEM_REG, FLASH_COMMAND_CTRL_MEM_RESV1_FLD, 29, 3) | ||
365 | + FIELD(FLASH_COMMAND_CTRL_MEM_REG, MEM_BANK_ADDR_FLD, 20, 9) | ||
366 | + FIELD(FLASH_COMMAND_CTRL_MEM_REG, FLASH_COMMAND_CTRL_MEM_RESV2_FLD, 19, 1) | ||
367 | + FIELD(FLASH_COMMAND_CTRL_MEM_REG, NB_OF_STIG_READ_BYTES_FLD, 16, 3) | ||
368 | + FIELD(FLASH_COMMAND_CTRL_MEM_REG, MEM_BANK_READ_DATA_FLD, 8, 8) | ||
369 | + FIELD(FLASH_COMMAND_CTRL_MEM_REG, FLASH_COMMAND_CTRL_MEM_RESV3_FLD, 2, 6) | ||
370 | + FIELD(FLASH_COMMAND_CTRL_MEM_REG, MEM_BANK_REQ_IN_PROGRESS_FLD, 1, 1) | ||
371 | + FIELD(FLASH_COMMAND_CTRL_MEM_REG, TRIGGER_MEM_BANK_REQ_FLD, 0, 1) | ||
372 | +REG32(FLASH_CMD_CTRL_REG, 0x90) | ||
373 | + FIELD(FLASH_CMD_CTRL_REG, CMD_OPCODE_FLD, 24, 8) | ||
374 | + FIELD(FLASH_CMD_CTRL_REG, ENB_READ_DATA_FLD, 23, 1) | ||
375 | + FIELD(FLASH_CMD_CTRL_REG, NUM_RD_DATA_BYTES_FLD, 20, 3) | ||
376 | + FIELD(FLASH_CMD_CTRL_REG, ENB_COMD_ADDR_FLD, 19, 1) | ||
377 | + FIELD(FLASH_CMD_CTRL_REG, ENB_MODE_BIT_FLD, 18, 1) | ||
378 | + FIELD(FLASH_CMD_CTRL_REG, NUM_ADDR_BYTES_FLD, 16, 2) | ||
379 | + FIELD(FLASH_CMD_CTRL_REG, ENB_WRITE_DATA_FLD, 15, 1) | ||
380 | + FIELD(FLASH_CMD_CTRL_REG, NUM_WR_DATA_BYTES_FLD, 12, 3) | ||
381 | + FIELD(FLASH_CMD_CTRL_REG, NUM_DUMMY_CYCLES_FLD, 7, 5) | ||
382 | + FIELD(FLASH_CMD_CTRL_REG, FLASH_CMD_CTRL_RESV1_FLD, 3, 4) | ||
383 | + FIELD(FLASH_CMD_CTRL_REG, STIG_MEM_BANK_EN_FLD, 2, 1) | ||
384 | + FIELD(FLASH_CMD_CTRL_REG, CMD_EXEC_STATUS_FLD, 1, 1) | ||
385 | + FIELD(FLASH_CMD_CTRL_REG, CMD_EXEC_FLD, 0, 1) | ||
386 | +REG32(FLASH_CMD_ADDR_REG, 0x94) | ||
387 | +REG32(FLASH_RD_DATA_LOWER_REG, 0xa0) | ||
388 | +REG32(FLASH_RD_DATA_UPPER_REG, 0xa4) | ||
389 | +REG32(FLASH_WR_DATA_LOWER_REG, 0xa8) | ||
390 | +REG32(FLASH_WR_DATA_UPPER_REG, 0xac) | ||
391 | +REG32(POLLING_FLASH_STATUS_REG, 0xb0) | ||
392 | + FIELD(POLLING_FLASH_STATUS_REG, DEVICE_STATUS_RSVD_FLD2, 21, 11) | ||
393 | + FIELD(POLLING_FLASH_STATUS_REG, DEVICE_STATUS_NB_DUMMY, 16, 5) | ||
394 | + FIELD(POLLING_FLASH_STATUS_REG, DEVICE_STATUS_RSVD_FLD1, 9, 7) | ||
395 | + FIELD(POLLING_FLASH_STATUS_REG, DEVICE_STATUS_VALID_FLD, 8, 1) | ||
396 | + FIELD(POLLING_FLASH_STATUS_REG, DEVICE_STATUS_FLD, 0, 8) | ||
397 | +REG32(PHY_CONFIGURATION_REG, 0xb4) | ||
398 | + FIELD(PHY_CONFIGURATION_REG, PHY_CONFIG_RESYNC_FLD, 31, 1) | ||
399 | + FIELD(PHY_CONFIGURATION_REG, PHY_CONFIG_RESET_FLD, 30, 1) | ||
400 | + FIELD(PHY_CONFIGURATION_REG, PHY_CONFIG_RX_DLL_BYPASS_FLD, 29, 1) | ||
401 | + FIELD(PHY_CONFIGURATION_REG, PHY_CONFIG_RESV2_FLD, 23, 6) | ||
402 | + FIELD(PHY_CONFIGURATION_REG, PHY_CONFIG_TX_DLL_DELAY_FLD, 16, 7) | ||
403 | + FIELD(PHY_CONFIGURATION_REG, PHY_CONFIG_RESV1_FLD, 7, 9) | ||
404 | + FIELD(PHY_CONFIGURATION_REG, PHY_CONFIG_RX_DLL_DELAY_FLD, 0, 7) | ||
405 | +REG32(PHY_MASTER_CONTROL_REG, 0xb8) | ||
406 | + FIELD(PHY_MASTER_CONTROL_REG, PHY_MASTER_CONTROL_RESV3_FLD, 25, 7) | ||
407 | + FIELD(PHY_MASTER_CONTROL_REG, PHY_MASTER_LOCK_MODE_FLD, 24, 1) | ||
408 | + FIELD(PHY_MASTER_CONTROL_REG, PHY_MASTER_BYPASS_MODE_FLD, 23, 1) | ||
409 | + FIELD(PHY_MASTER_CONTROL_REG, PHY_MASTER_PHASE_DETECT_SELECTOR_FLD, 20, 3) | ||
410 | + FIELD(PHY_MASTER_CONTROL_REG, PHY_MASTER_CONTROL_RESV2_FLD, 19, 1) | ||
411 | + FIELD(PHY_MASTER_CONTROL_REG, PHY_MASTER_NB_INDICATIONS_FLD, 16, 3) | ||
412 | + FIELD(PHY_MASTER_CONTROL_REG, PHY_MASTER_CONTROL_RESV1_FLD, 7, 9) | ||
413 | + FIELD(PHY_MASTER_CONTROL_REG, PHY_MASTER_INITIAL_DELAY_FLD, 0, 7) | ||
414 | +REG32(DLL_OBSERVABLE_LOWER_REG, 0xbc) | ||
415 | + FIELD(DLL_OBSERVABLE_LOWER_REG, | ||
416 | + DLL_OBSERVABLE_LOWER_DLL_LOCK_INC_FLD, 24, 8) | ||
417 | + FIELD(DLL_OBSERVABLE_LOWER_REG, | ||
418 | + DLL_OBSERVABLE_LOWER_DLL_LOCK_DEC_FLD, 16, 8) | ||
419 | + FIELD(DLL_OBSERVABLE_LOWER_REG, | ||
420 | + DLL_OBSERVABLE_LOWER_LOOPBACK_LOCK_FLD, 15, 1) | ||
421 | + FIELD(DLL_OBSERVABLE_LOWER_REG, | ||
422 | + DLL_OBSERVABLE_LOWER_LOCK_VALUE_FLD, 8, 7) | ||
423 | + FIELD(DLL_OBSERVABLE_LOWER_REG, | ||
424 | + DLL_OBSERVABLE_LOWER_UNLOCK_COUNTER_FLD, 3, 5) | ||
425 | + FIELD(DLL_OBSERVABLE_LOWER_REG, | ||
426 | + DLL_OBSERVABLE_LOWER_LOCK_MODE_FLD, 1, 2) | ||
427 | + FIELD(DLL_OBSERVABLE_LOWER_REG, | ||
428 | + DLL_OBSERVABLE_LOWER_DLL_LOCK_FLD, 0, 1) | ||
429 | +REG32(DLL_OBSERVABLE_UPPER_REG, 0xc0) | ||
430 | + FIELD(DLL_OBSERVABLE_UPPER_REG, | ||
431 | + DLL_OBSERVABLE_UPPER_RESV2_FLD, 23, 9) | ||
432 | + FIELD(DLL_OBSERVABLE_UPPER_REG, | ||
433 | + DLL_OBSERVABLE_UPPER_TX_DECODER_OUTPUT_FLD, 16, 7) | ||
434 | + FIELD(DLL_OBSERVABLE_UPPER_REG, | ||
435 | + DLL_OBSERVABLE_UPPER_RESV1_FLD, 7, 9) | ||
436 | + FIELD(DLL_OBSERVABLE_UPPER_REG, | ||
437 | + DLL_OBSERVABLE__UPPER_RX_DECODER_OUTPUT_FLD, 0, 7) | ||
438 | +REG32(OPCODE_EXT_LOWER_REG, 0xe0) | ||
439 | + FIELD(OPCODE_EXT_LOWER_REG, EXT_READ_OPCODE_FLD, 24, 8) | ||
440 | + FIELD(OPCODE_EXT_LOWER_REG, EXT_WRITE_OPCODE_FLD, 16, 8) | ||
441 | + FIELD(OPCODE_EXT_LOWER_REG, EXT_POLL_OPCODE_FLD, 8, 8) | ||
442 | + FIELD(OPCODE_EXT_LOWER_REG, EXT_STIG_OPCODE_FLD, 0, 8) | ||
443 | +REG32(OPCODE_EXT_UPPER_REG, 0xe4) | ||
444 | + FIELD(OPCODE_EXT_UPPER_REG, WEL_OPCODE_FLD, 24, 8) | ||
445 | + FIELD(OPCODE_EXT_UPPER_REG, EXT_WEL_OPCODE_FLD, 16, 8) | ||
446 | + FIELD(OPCODE_EXT_UPPER_REG, OPCODE_EXT_UPPER_RESV1_FLD, 0, 16) | ||
447 | +REG32(MODULE_ID_REG, 0xfc) | ||
448 | + FIELD(MODULE_ID_REG, FIX_PATCH_FLD, 24, 8) | ||
449 | + FIELD(MODULE_ID_REG, MODULE_ID_FLD, 8, 16) | ||
450 | + FIELD(MODULE_ID_REG, MODULE_ID_RESV_FLD, 2, 6) | ||
451 | + FIELD(MODULE_ID_REG, CONF_FLD, 0, 2) | ||
452 | + | ||
453 | +#define RXFF_SZ 1024 | ||
454 | +#define TXFF_SZ 1024 | ||
455 | + | ||
456 | +#define MAX_RX_DEC_OUT 8 | ||
457 | + | ||
458 | +#define SZ_512MBIT (512 * 1024 * 1024) | ||
459 | +#define SZ_1GBIT (1024 * 1024 * 1024) | ||
460 | +#define SZ_2GBIT (2ULL * SZ_1GBIT) | ||
461 | +#define SZ_4GBIT (4ULL * SZ_1GBIT) | ||
462 | + | ||
463 | +#define IS_IND_DMA_START(op) (op->done_bytes == 0) | ||
464 | +/* | ||
465 | + * Bit field size of R_INDIRECT_WRITE_XFER_CTRL_REG_NUM_IND_OPS_DONE_FLD | ||
466 | + * is 2 bits, which can record max of 3 indac operations. | ||
467 | + */ | ||
468 | +#define IND_OPS_DONE_MAX 3 | ||
469 | + | ||
470 | +typedef enum { | ||
471 | + WREN = 0x6, | ||
472 | +} FlashCMD; | ||
473 | + | ||
474 | +static unsigned int ospi_stig_addr_len(XlnxVersalOspi *s) | ||
475 | +{ | ||
476 | + /* Num address bytes is NUM_ADDR_BYTES_FLD + 1 */ | ||
477 | + return ARRAY_FIELD_EX32(s->regs, | ||
478 | + FLASH_CMD_CTRL_REG, NUM_ADDR_BYTES_FLD) + 1; | ||
479 | +} | ||
480 | + | ||
481 | +static unsigned int ospi_stig_wr_data_len(XlnxVersalOspi *s) | ||
482 | +{ | ||
483 | + /* Num write data bytes is NUM_WR_DATA_BYTES_FLD + 1 */ | ||
484 | + return ARRAY_FIELD_EX32(s->regs, | ||
485 | + FLASH_CMD_CTRL_REG, NUM_WR_DATA_BYTES_FLD) + 1; | ||
486 | +} | ||
487 | + | ||
488 | +static unsigned int ospi_stig_rd_data_len(XlnxVersalOspi *s) | ||
489 | +{ | ||
490 | + /* Num read data bytes is NUM_RD_DATA_BYTES_FLD + 1 */ | ||
491 | + return ARRAY_FIELD_EX32(s->regs, | ||
492 | + FLASH_CMD_CTRL_REG, NUM_RD_DATA_BYTES_FLD) + 1; | ||
493 | +} | ||
494 | + | ||
495 | +/* | ||
496 | + * Status bits in R_IRQ_STATUS_REG are set when the event occurs and the | ||
497 | + * interrupt is enabled in the mask register ([1] Section 2.3.17) | ||
498 | + */ | ||
499 | +static void set_irq(XlnxVersalOspi *s, uint32_t set_mask) | ||
500 | +{ | ||
501 | + s->regs[R_IRQ_STATUS_REG] |= s->regs[R_IRQ_MASK_REG] & set_mask; | ||
502 | +} | ||
503 | + | ||
504 | +static void ospi_update_irq_line(XlnxVersalOspi *s) | ||
505 | +{ | ||
506 | + qemu_set_irq(s->irq, !!(s->regs[R_IRQ_STATUS_REG] & | ||
507 | + s->regs[R_IRQ_MASK_REG])); | ||
508 | +} | ||
509 | + | ||
510 | +static uint8_t ospi_get_wr_opcode(XlnxVersalOspi *s) | ||
511 | +{ | ||
512 | + return ARRAY_FIELD_EX32(s->regs, | ||
513 | + DEV_INSTR_WR_CONFIG_REG, WR_OPCODE_FLD); | ||
514 | +} | ||
515 | + | ||
516 | +static uint8_t ospi_get_rd_opcode(XlnxVersalOspi *s) | ||
517 | +{ | ||
518 | + return ARRAY_FIELD_EX32(s->regs, | ||
519 | + DEV_INSTR_RD_CONFIG_REG, RD_OPCODE_NON_XIP_FLD); | ||
520 | +} | ||
521 | + | ||
522 | +static uint32_t ospi_get_num_addr_bytes(XlnxVersalOspi *s) | ||
523 | +{ | ||
524 | + /* Num address bytes is NUM_ADDR_BYTES_FLD + 1 */ | ||
525 | + return ARRAY_FIELD_EX32(s->regs, | ||
526 | + DEV_SIZE_CONFIG_REG, NUM_ADDR_BYTES_FLD) + 1; | ||
527 | +} | ||
528 | + | ||
529 | +static void ospi_stig_membank_req(XlnxVersalOspi *s) | ||
530 | +{ | ||
531 | + int idx = ARRAY_FIELD_EX32(s->regs, | ||
532 | + FLASH_COMMAND_CTRL_MEM_REG, MEM_BANK_ADDR_FLD); | ||
533 | + | ||
534 | + ARRAY_FIELD_DP32(s->regs, FLASH_COMMAND_CTRL_MEM_REG, | ||
535 | + MEM_BANK_READ_DATA_FLD, s->stig_membank[idx]); | ||
536 | +} | ||
537 | + | ||
538 | +static int ospi_stig_membank_rd_bytes(XlnxVersalOspi *s) | ||
539 | +{ | ||
540 | + int rd_data_fld = ARRAY_FIELD_EX32(s->regs, FLASH_COMMAND_CTRL_MEM_REG, | ||
541 | + NB_OF_STIG_READ_BYTES_FLD); | ||
542 | + static const int sizes[6] = { 16, 32, 64, 128, 256, 512 }; | ||
543 | + return (rd_data_fld < 6) ? sizes[rd_data_fld] : 0; | ||
544 | +} | ||
545 | + | ||
546 | +static uint32_t ospi_get_page_sz(XlnxVersalOspi *s) | ||
547 | +{ | ||
548 | + return ARRAY_FIELD_EX32(s->regs, | ||
549 | + DEV_SIZE_CONFIG_REG, BYTES_PER_DEVICE_PAGE_FLD); | ||
550 | +} | ||
551 | + | ||
552 | +static bool ospi_ind_rd_watermark_enabled(XlnxVersalOspi *s) | ||
553 | +{ | ||
554 | + return s->regs[R_INDIRECT_READ_XFER_WATERMARK_REG]; | ||
555 | +} | ||
556 | + | ||
557 | +static void ind_op_advance(IndOp *op, unsigned int len) | ||
558 | +{ | ||
559 | + op->done_bytes += len; | ||
560 | + assert(op->done_bytes <= op->num_bytes); | ||
561 | + if (op->done_bytes == op->num_bytes) { | ||
562 | + op->completed = true; | ||
563 | + } | ||
564 | +} | ||
565 | + | ||
566 | +static uint32_t ind_op_next_byte(IndOp *op) | ||
567 | +{ | ||
568 | + return op->flash_addr + op->done_bytes; | ||
569 | +} | ||
570 | + | ||
571 | +static uint32_t ind_op_end_byte(IndOp *op) | ||
572 | +{ | ||
573 | + return op->flash_addr + op->num_bytes; | ||
574 | +} | ||
575 | + | ||
576 | +static void ospi_ind_op_next(IndOp *op) | ||
577 | +{ | ||
578 | + op[0] = op[1]; | ||
579 | + op[1].completed = true; | ||
580 | +} | ||
581 | + | ||
582 | +static void ind_op_setup(IndOp *op, uint32_t flash_addr, uint32_t num_bytes) | ||
583 | +{ | ||
584 | + if (num_bytes & 0x3) { | ||
585 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
586 | + "OSPI indirect op num bytes not word aligned\n"); | ||
587 | + } | ||
588 | + op->flash_addr = flash_addr; | ||
589 | + op->num_bytes = num_bytes; | ||
590 | + op->done_bytes = 0; | ||
591 | + op->completed = false; | ||
592 | +} | ||
593 | + | ||
594 | +static bool ospi_ind_op_completed(IndOp *op) | ||
595 | +{ | ||
596 | + return op->completed; | ||
597 | +} | ||
598 | + | ||
599 | +static bool ospi_ind_op_all_completed(XlnxVersalOspi *s) | ||
600 | +{ | ||
601 | + return s->rd_ind_op[0].completed && s->wr_ind_op[0].completed; | ||
602 | +} | ||
603 | + | ||
604 | +static void ospi_ind_op_cancel(IndOp *op) | ||
605 | +{ | ||
606 | + op[0].completed = true; | ||
607 | + op[1].completed = true; | ||
608 | +} | ||
609 | + | ||
610 | +static bool ospi_ind_op_add(IndOp *op, Fifo8 *fifo, | ||
611 | + uint32_t flash_addr, uint32_t num_bytes) | ||
612 | +{ | ||
613 | + /* Check if first indirect op has been completed */ | ||
614 | + if (op->completed) { | ||
615 | + fifo8_reset(fifo); | ||
616 | + ind_op_setup(op, flash_addr, num_bytes); | ||
617 | + return false; | ||
618 | + } | ||
619 | + | ||
620 | + /* Check if second indirect op has been completed */ | ||
621 | + op++; | ||
622 | + if (op->completed) { | ||
623 | + ind_op_setup(op, flash_addr, num_bytes); | ||
624 | + return false; | ||
625 | + } | ||
626 | + return true; | ||
627 | +} | ||
628 | + | ||
629 | +static void ospi_ind_op_queue_up_rd(XlnxVersalOspi *s) | ||
630 | +{ | ||
631 | + uint32_t num_bytes = s->regs[R_INDIRECT_READ_XFER_NUM_BYTES_REG]; | ||
632 | + uint32_t flash_addr = s->regs[R_INDIRECT_READ_XFER_START_REG]; | ||
633 | + bool failed; | ||
634 | + | ||
635 | + failed = ospi_ind_op_add(s->rd_ind_op, &s->rx_sram, flash_addr, num_bytes); | ||
636 | + /* If two already queued set rd reject interrupt */ | ||
637 | + if (failed) { | ||
638 | + set_irq(s, R_IRQ_STATUS_REG_INDIRECT_TRANSFER_REJECT_FLD_MASK); | ||
639 | + } | ||
640 | +} | ||
641 | + | ||
642 | +static void ospi_ind_op_queue_up_wr(XlnxVersalOspi *s) | ||
643 | +{ | ||
644 | + uint32_t num_bytes = s->regs[R_INDIRECT_WRITE_XFER_NUM_BYTES_REG]; | ||
645 | + uint32_t flash_addr = s->regs[R_INDIRECT_WRITE_XFER_START_REG]; | ||
646 | + bool failed; | ||
647 | + | ||
648 | + failed = ospi_ind_op_add(s->wr_ind_op, &s->tx_sram, flash_addr, num_bytes); | ||
649 | + /* If two already queued set rd reject interrupt */ | ||
650 | + if (failed) { | ||
651 | + set_irq(s, R_IRQ_STATUS_REG_INDIRECT_TRANSFER_REJECT_FLD_MASK); | ||
652 | + } | ||
653 | +} | ||
654 | + | ||
655 | +static uint64_t flash_sz(XlnxVersalOspi *s, unsigned int cs) | ||
656 | +{ | ||
657 | + /* Flash sizes in MB */ | ||
658 | + static const uint64_t sizes[4] = { SZ_512MBIT / 8, SZ_1GBIT / 8, | ||
659 | + SZ_2GBIT / 8, SZ_4GBIT / 8 }; | ||
660 | + uint32_t v = s->regs[R_DEV_SIZE_CONFIG_REG]; | ||
661 | + | ||
662 | + v >>= cs * R_DEV_SIZE_CONFIG_REG_MEM_SIZE_ON_CS0_FLD_LENGTH; | ||
663 | + return sizes[FIELD_EX32(v, DEV_SIZE_CONFIG_REG, MEM_SIZE_ON_CS0_FLD)]; | ||
664 | +} | ||
665 | + | ||
666 | +static unsigned int ospi_get_block_sz(XlnxVersalOspi *s) | ||
667 | +{ | ||
668 | + unsigned int block_fld = ARRAY_FIELD_EX32(s->regs, | ||
669 | + DEV_SIZE_CONFIG_REG, | ||
670 | + BYTES_PER_SUBSECTOR_FLD); | ||
671 | + return 1 << block_fld; | ||
672 | +} | ||
673 | + | ||
674 | +static unsigned int flash_blocks(XlnxVersalOspi *s, unsigned int cs) | ||
675 | +{ | ||
676 | + unsigned int b_sz = ospi_get_block_sz(s); | ||
677 | + unsigned int f_sz = flash_sz(s, cs); | ||
678 | + | ||
679 | + return f_sz / b_sz; | ||
680 | +} | ||
681 | + | ||
682 | +static int ospi_ahb_decoder_cs(XlnxVersalOspi *s, hwaddr addr) | ||
683 | +{ | ||
684 | + uint64_t end_addr = 0; | ||
685 | + int cs; | ||
686 | + | ||
687 | + for (cs = 0; cs < s->num_cs; cs++) { | ||
688 | + end_addr += flash_sz(s, cs); | ||
689 | + if (addr < end_addr) { | ||
690 | + break; | ||
691 | + } | 84 | + } |
692 | + } | 85 | + } |
86 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
87 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); | ||
88 | } | ||
89 | |||
90 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
91 | } | ||
92 | |||
93 | static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, | ||
94 | - uint32_t desc, bool fz16) | ||
95 | + uint64_t negx, int negf, uint32_t desc, bool fz16) | ||
96 | { | ||
97 | intptr_t i, oprsz = simd_oprsz(desc); | ||
98 | - int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
99 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
100 | int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3); | ||
101 | int is_q = oprsz == 16; | ||
102 | uint64_t n_4; | ||
103 | float32 m_1; | ||
104 | |||
105 | - /* Pre-load all of the f16 data, avoiding overlap issues. */ | ||
106 | - n_4 = load4_f16(vn, is_q, is_2); | ||
107 | - | ||
108 | - /* Negate all inputs for FMLSL at once. */ | ||
109 | - if (is_s) { | ||
110 | - n_4 ^= 0x8000800080008000ull; | ||
111 | - } | ||
112 | - | ||
113 | + /* | ||
114 | + * Pre-load all of the f16 data, avoiding overlap issues. | ||
115 | + * Negate all inputs for AH=0 FMLSL at once. | ||
116 | + */ | ||
117 | + n_4 = load4_f16(vn, is_q, is_2) ^ negx; | ||
118 | m_1 = float16_to_float32_by_bits(((float16 *)vm)[H2(index)], fz16); | ||
119 | |||
120 | for (i = 0; i < oprsz / 4; i++) { | ||
121 | float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); | ||
122 | - d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst); | ||
123 | + d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst); | ||
124 | } | ||
125 | clear_tail(d, oprsz, simd_maxsz(desc)); | ||
126 | } | ||
127 | @@ -XXX,XX +XXX,XX @@ static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, | ||
128 | void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | ||
129 | CPUARMState *env, uint32_t desc) | ||
130 | { | ||
131 | - do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, desc, | ||
132 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
133 | + uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
693 | + | 134 | + |
694 | + if (cs == s->num_cs) { | 135 | + do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, |
695 | + /* Address is out of range */ | 136 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); |
696 | + qemu_log_mask(LOG_GUEST_ERROR, | 137 | } |
697 | + "OSPI flash address does not fit in configuration\n"); | 138 | |
698 | + return -1; | 139 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
699 | + } | 140 | CPUARMState *env, uint32_t desc) |
700 | + return cs; | 141 | { |
701 | +} | 142 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, desc, |
143 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
144 | + uint64_t negx = 0; | ||
145 | + int negf = 0; | ||
702 | + | 146 | + |
703 | +static void ospi_ahb_decoder_enable_cs(XlnxVersalOspi *s, hwaddr addr) | 147 | + if (is_s) { |
704 | +{ | 148 | + if (env->vfp.fpcr & FPCR_AH) { |
705 | + int cs = ospi_ahb_decoder_cs(s, addr); | 149 | + negf = float_muladd_negate_product; |
706 | + | 150 | + } else { |
707 | + if (cs >= 0) { | 151 | + negx = 0x8000800080008000ull; |
708 | + for (int i = 0; i < s->num_cs; i++) { | ||
709 | + qemu_set_irq(s->cs_lines[i], cs != i); | ||
710 | + } | 152 | + } |
711 | + } | 153 | + } |
712 | +} | 154 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, |
713 | + | 155 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); |
714 | +static unsigned int single_cs(XlnxVersalOspi *s) | 156 | } |
715 | +{ | 157 | |
716 | + unsigned int field = ARRAY_FIELD_EX32(s->regs, | ||
717 | + CONFIG_REG, PERIPH_CS_LINES_FLD); | ||
718 | + | ||
719 | + /* | ||
720 | + * Below one liner is a trick that finds the rightmost zero and makes sure | ||
721 | + * all other bits are turned to 1. It is a variant of the 'Isolate the | ||
722 | + * rightmost 0-bit' trick found below at the time of writing: | ||
723 | + * | ||
724 | + * https://emre.me/computer-science/bit-manipulation-tricks/ | ||
725 | + * | ||
726 | + * 4'bXXX0 -> 4'b1110 | ||
727 | + * 4'bXX01 -> 4'b1101 | ||
728 | + * 4'bX011 -> 4'b1011 | ||
729 | + * 4'b0111 -> 4'b0111 | ||
730 | + * 4'b1111 -> 4'b1111 | ||
731 | + */ | ||
732 | + return (field | ~(field + 1)) & 0xf; | ||
733 | +} | ||
734 | + | ||
735 | +static void ospi_update_cs_lines(XlnxVersalOspi *s) | ||
736 | +{ | ||
737 | + unsigned int all_cs; | ||
738 | + int i; | ||
739 | + | ||
740 | + if (ARRAY_FIELD_EX32(s->regs, CONFIG_REG, PERIPH_SEL_DEC_FLD)) { | ||
741 | + all_cs = ARRAY_FIELD_EX32(s->regs, CONFIG_REG, PERIPH_CS_LINES_FLD); | ||
742 | + } else { | ||
743 | + all_cs = single_cs(s); | ||
744 | + } | ||
745 | + | ||
746 | + for (i = 0; i < s->num_cs; i++) { | ||
747 | + bool cs = (all_cs >> i) & 1; | ||
748 | + | ||
749 | + qemu_set_irq(s->cs_lines[i], cs); | ||
750 | + } | ||
751 | +} | ||
752 | + | ||
753 | +static void ospi_dac_cs(XlnxVersalOspi *s, hwaddr addr) | ||
754 | +{ | ||
755 | + if (ARRAY_FIELD_EX32(s->regs, CONFIG_REG, ENABLE_AHB_DECODER_FLD)) { | ||
756 | + ospi_ahb_decoder_enable_cs(s, addr); | ||
757 | + } else { | ||
758 | + ospi_update_cs_lines(s); | ||
759 | + } | ||
760 | +} | ||
761 | + | ||
762 | +static void ospi_disable_cs(XlnxVersalOspi *s) | ||
763 | +{ | ||
764 | + int i; | ||
765 | + | ||
766 | + for (i = 0; i < s->num_cs; i++) { | ||
767 | + qemu_set_irq(s->cs_lines[i], 1); | ||
768 | + } | ||
769 | +} | ||
770 | + | ||
771 | +static void ospi_flush_txfifo(XlnxVersalOspi *s) | ||
772 | +{ | ||
773 | + while (!fifo8_is_empty(&s->tx_fifo)) { | ||
774 | + uint32_t tx_rx = fifo8_pop(&s->tx_fifo); | ||
775 | + | ||
776 | + tx_rx = ssi_transfer(s->spi, tx_rx); | ||
777 | + fifo8_push(&s->rx_fifo, tx_rx); | ||
778 | + } | ||
779 | +} | ||
780 | + | ||
781 | +static void ospi_tx_fifo_push_address_raw(XlnxVersalOspi *s, | ||
782 | + uint32_t flash_addr, | ||
783 | + unsigned int addr_bytes) | ||
784 | +{ | ||
785 | + /* Push write address */ | ||
786 | + if (addr_bytes == 4) { | ||
787 | + fifo8_push(&s->tx_fifo, flash_addr >> 24); | ||
788 | + } | ||
789 | + if (addr_bytes >= 3) { | ||
790 | + fifo8_push(&s->tx_fifo, flash_addr >> 16); | ||
791 | + } | ||
792 | + if (addr_bytes >= 2) { | ||
793 | + fifo8_push(&s->tx_fifo, flash_addr >> 8); | ||
794 | + } | ||
795 | + fifo8_push(&s->tx_fifo, flash_addr); | ||
796 | +} | ||
797 | + | ||
798 | +static void ospi_tx_fifo_push_address(XlnxVersalOspi *s, uint32_t flash_addr) | ||
799 | +{ | ||
800 | + /* Push write address */ | ||
801 | + int addr_bytes = ospi_get_num_addr_bytes(s); | ||
802 | + | ||
803 | + ospi_tx_fifo_push_address_raw(s, flash_addr, addr_bytes); | ||
804 | +} | ||
805 | + | ||
806 | +static void ospi_tx_fifo_push_stig_addr(XlnxVersalOspi *s) | ||
807 | +{ | ||
808 | + uint32_t flash_addr = s->regs[R_FLASH_CMD_ADDR_REG]; | ||
809 | + unsigned int addr_bytes = ospi_stig_addr_len(s); | ||
810 | + | ||
811 | + ospi_tx_fifo_push_address_raw(s, flash_addr, addr_bytes); | ||
812 | +} | ||
813 | + | ||
814 | +static void ospi_tx_fifo_push_rd_op_addr(XlnxVersalOspi *s, uint32_t flash_addr) | ||
815 | +{ | ||
816 | + uint8_t inst_code = ospi_get_rd_opcode(s); | ||
817 | + | ||
818 | + fifo8_reset(&s->tx_fifo); | ||
819 | + | ||
820 | + /* Push read opcode */ | ||
821 | + fifo8_push(&s->tx_fifo, inst_code); | ||
822 | + | ||
823 | + /* Push read address */ | ||
824 | + ospi_tx_fifo_push_address(s, flash_addr); | ||
825 | +} | ||
826 | + | ||
827 | +static void ospi_tx_fifo_push_stig_wr_data(XlnxVersalOspi *s) | ||
828 | +{ | ||
829 | + uint64_t data = s->regs[R_FLASH_WR_DATA_LOWER_REG]; | ||
830 | + int wr_data_len = ospi_stig_wr_data_len(s); | ||
831 | + int i; | ||
832 | + | ||
833 | + data |= (uint64_t) s->regs[R_FLASH_WR_DATA_UPPER_REG] << 32; | ||
834 | + for (i = 0; i < wr_data_len; i++) { | ||
835 | + int shift = i * 8; | ||
836 | + fifo8_push(&s->tx_fifo, data >> shift); | ||
837 | + } | ||
838 | +} | ||
839 | + | ||
840 | +static void ospi_tx_fifo_push_stig_rd_data(XlnxVersalOspi *s) | ||
841 | +{ | ||
842 | + int rd_data_len; | ||
843 | + int i; | ||
844 | + | ||
845 | + if (ARRAY_FIELD_EX32(s->regs, FLASH_CMD_CTRL_REG, STIG_MEM_BANK_EN_FLD)) { | ||
846 | + rd_data_len = ospi_stig_membank_rd_bytes(s); | ||
847 | + } else { | ||
848 | + rd_data_len = ospi_stig_rd_data_len(s); | ||
849 | + } | ||
850 | + | ||
851 | + /* transmit second part (data) */ | ||
852 | + for (i = 0; i < rd_data_len; ++i) { | ||
853 | + fifo8_push(&s->tx_fifo, 0); | ||
854 | + } | ||
855 | +} | ||
856 | + | ||
857 | +static void ospi_rx_fifo_pop_stig_rd_data(XlnxVersalOspi *s) | ||
858 | +{ | ||
859 | + int size = ospi_stig_rd_data_len(s); | ||
860 | + uint8_t bytes[8] = {}; | ||
861 | + int i; | ||
862 | + | ||
863 | + size = MIN(fifo8_num_used(&s->rx_fifo), size); | ||
864 | + | ||
865 | + assert(size <= 8); | ||
866 | + | ||
867 | + for (i = 0; i < size; i++) { | ||
868 | + bytes[i] = fifo8_pop(&s->rx_fifo); | ||
869 | + } | ||
870 | + | ||
871 | + s->regs[R_FLASH_RD_DATA_LOWER_REG] = ldl_le_p(bytes); | ||
872 | + s->regs[R_FLASH_RD_DATA_UPPER_REG] = ldl_le_p(bytes + 4); | ||
873 | +} | ||
874 | + | ||
875 | +static void ospi_ind_read(XlnxVersalOspi *s, uint32_t flash_addr, uint32_t len) | ||
876 | +{ | ||
877 | + int i; | ||
878 | + | ||
879 | + /* Create first section of read cmd */ | ||
880 | + ospi_tx_fifo_push_rd_op_addr(s, flash_addr); | ||
881 | + | ||
882 | + /* transmit first part */ | ||
883 | + ospi_update_cs_lines(s); | ||
884 | + ospi_flush_txfifo(s); | ||
885 | + | ||
886 | + fifo8_reset(&s->rx_fifo); | ||
887 | + | ||
888 | + /* transmit second part (data) */ | ||
889 | + for (i = 0; i < len; ++i) { | ||
890 | + fifo8_push(&s->tx_fifo, 0); | ||
891 | + } | ||
892 | + ospi_flush_txfifo(s); | ||
893 | + | ||
894 | + for (i = 0; i < len; ++i) { | ||
895 | + fifo8_push(&s->rx_sram, fifo8_pop(&s->rx_fifo)); | ||
896 | + } | ||
897 | + | ||
898 | + /* done */ | ||
899 | + ospi_disable_cs(s); | ||
900 | +} | ||
901 | + | ||
902 | +static unsigned int ospi_dma_burst_size(XlnxVersalOspi *s) | ||
903 | +{ | ||
904 | + return 1 << ARRAY_FIELD_EX32(s->regs, | ||
905 | + DMA_PERIPH_CONFIG_REG, | ||
906 | + NUM_BURST_REQ_BYTES_FLD); | ||
907 | +} | ||
908 | + | ||
909 | +static unsigned int ospi_dma_single_size(XlnxVersalOspi *s) | ||
910 | +{ | ||
911 | + return 1 << ARRAY_FIELD_EX32(s->regs, | ||
912 | + DMA_PERIPH_CONFIG_REG, | ||
913 | + NUM_SINGLE_REQ_BYTES_FLD); | ||
914 | +} | ||
915 | + | ||
916 | +static void ind_rd_inc_num_done(XlnxVersalOspi *s) | ||
917 | +{ | ||
918 | + unsigned int done = ARRAY_FIELD_EX32(s->regs, | ||
919 | + INDIRECT_READ_XFER_CTRL_REG, | ||
920 | + NUM_IND_OPS_DONE_FLD); | ||
921 | + if (done < IND_OPS_DONE_MAX) { | ||
922 | + done++; | ||
923 | + } | ||
924 | + done &= 0x3; | ||
925 | + ARRAY_FIELD_DP32(s->regs, INDIRECT_READ_XFER_CTRL_REG, | ||
926 | + NUM_IND_OPS_DONE_FLD, done); | ||
927 | +} | ||
928 | + | ||
929 | +static void ospi_ind_rd_completed(XlnxVersalOspi *s) | ||
930 | +{ | ||
931 | + ARRAY_FIELD_DP32(s->regs, INDIRECT_READ_XFER_CTRL_REG, | ||
932 | + IND_OPS_DONE_STATUS_FLD, 1); | ||
933 | + | ||
934 | + ind_rd_inc_num_done(s); | ||
935 | + ospi_ind_op_next(s->rd_ind_op); | ||
936 | + if (ospi_ind_op_all_completed(s)) { | ||
937 | + set_irq(s, R_IRQ_STATUS_REG_INDIRECT_OP_DONE_FLD_MASK); | ||
938 | + } | ||
939 | +} | ||
940 | + | ||
941 | +static void ospi_dma_read(XlnxVersalOspi *s) | ||
942 | +{ | ||
943 | + IndOp *op = s->rd_ind_op; | ||
944 | + uint32_t dma_len = op->num_bytes; | ||
945 | + uint32_t burst_sz = ospi_dma_burst_size(s); | ||
946 | + uint32_t single_sz = ospi_dma_single_size(s); | ||
947 | + uint32_t ind_trig_range; | ||
948 | + uint32_t remainder; | ||
949 | + XlnxCSUDMAClass *xcdc = XLNX_CSU_DMA_GET_CLASS(s->dma_src); | ||
950 | + | ||
951 | + ind_trig_range = (1 << ARRAY_FIELD_EX32(s->regs, | ||
952 | + INDIRECT_TRIGGER_ADDR_RANGE_REG, | ||
953 | + IND_RANGE_WIDTH_FLD)); | ||
954 | + remainder = dma_len % burst_sz; | ||
955 | + remainder = remainder % single_sz; | ||
956 | + if (burst_sz > ind_trig_range || single_sz > ind_trig_range || | ||
957 | + remainder != 0) { | ||
958 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
959 | + "OSPI DMA burst size / single size config error\n"); | ||
960 | + } | ||
961 | + | ||
962 | + s->src_dma_inprog = true; | ||
963 | + if (xcdc->read(s->dma_src, 0, dma_len) != MEMTX_OK) { | ||
964 | + qemu_log_mask(LOG_GUEST_ERROR, "OSPI DMA configuration error\n"); | ||
965 | + } | ||
966 | + s->src_dma_inprog = false; | ||
967 | +} | ||
968 | + | ||
969 | +static void ospi_do_ind_read(XlnxVersalOspi *s) | ||
970 | +{ | ||
971 | + IndOp *op = s->rd_ind_op; | ||
972 | + uint32_t next_b; | ||
973 | + uint32_t end_b; | ||
974 | + uint32_t len; | ||
975 | + bool start_dma = IS_IND_DMA_START(op) && !s->src_dma_inprog; | ||
976 | + | ||
977 | + /* Continue to read flash until we run out of space in sram */ | ||
978 | + while (!ospi_ind_op_completed(op) && | ||
979 | + !fifo8_is_full(&s->rx_sram)) { | ||
980 | + /* Read reqested number of bytes, max bytes limited to size of sram */ | ||
981 | + next_b = ind_op_next_byte(op); | ||
982 | + end_b = next_b + fifo8_num_free(&s->rx_sram); | ||
983 | + end_b = MIN(end_b, ind_op_end_byte(op)); | ||
984 | + | ||
985 | + len = end_b - next_b; | ||
986 | + ospi_ind_read(s, next_b, len); | ||
987 | + ind_op_advance(op, len); | ||
988 | + | ||
989 | + if (ospi_ind_rd_watermark_enabled(s)) { | ||
990 | + ARRAY_FIELD_DP32(s->regs, IRQ_STATUS_REG, | ||
991 | + INDIRECT_XFER_LEVEL_BREACH_FLD, 1); | ||
992 | + set_irq(s, | ||
993 | + R_IRQ_STATUS_REG_INDIRECT_XFER_LEVEL_BREACH_FLD_MASK); | ||
994 | + } | ||
995 | + | ||
996 | + if (!s->src_dma_inprog && | ||
997 | + ARRAY_FIELD_EX32(s->regs, CONFIG_REG, ENB_DMA_IF_FLD)) { | ||
998 | + ospi_dma_read(s); | ||
999 | + } | ||
1000 | + } | ||
1001 | + | ||
1002 | + /* Set sram full */ | ||
1003 | + if (fifo8_num_used(&s->rx_sram) == RXFF_SZ) { | ||
1004 | + ARRAY_FIELD_DP32(s->regs, | ||
1005 | + INDIRECT_READ_XFER_CTRL_REG, SRAM_FULL_FLD, 1); | ||
1006 | + set_irq(s, R_IRQ_STATUS_REG_INDRD_SRAM_FULL_FLD_MASK); | ||
1007 | + } | ||
1008 | + | ||
1009 | + /* Signal completion if done, unless inside recursion via ospi_dma_read */ | ||
1010 | + if (!ARRAY_FIELD_EX32(s->regs, CONFIG_REG, ENB_DMA_IF_FLD) || start_dma) { | ||
1011 | + if (ospi_ind_op_completed(op)) { | ||
1012 | + ospi_ind_rd_completed(s); | ||
1013 | + } | ||
1014 | + } | ||
1015 | +} | ||
1016 | + | ||
1017 | +/* Transmit write enable instruction */ | ||
1018 | +static void ospi_transmit_wel(XlnxVersalOspi *s, bool ahb_decoder_cs, | ||
1019 | + hwaddr addr) | ||
1020 | +{ | ||
1021 | + fifo8_reset(&s->tx_fifo); | ||
1022 | + fifo8_push(&s->tx_fifo, WREN); | ||
1023 | + | ||
1024 | + if (ahb_decoder_cs) { | ||
1025 | + ospi_ahb_decoder_enable_cs(s, addr); | ||
1026 | + } else { | ||
1027 | + ospi_update_cs_lines(s); | ||
1028 | + } | ||
1029 | + | ||
1030 | + ospi_flush_txfifo(s); | ||
1031 | + ospi_disable_cs(s); | ||
1032 | + | ||
1033 | + fifo8_reset(&s->rx_fifo); | ||
1034 | +} | ||
1035 | + | ||
1036 | +static void ospi_ind_write(XlnxVersalOspi *s, uint32_t flash_addr, uint32_t len) | ||
1037 | +{ | ||
1038 | + bool ahb_decoder_cs = false; | ||
1039 | + uint8_t inst_code; | ||
1040 | + int i; | ||
1041 | + | ||
1042 | + assert(fifo8_num_used(&s->tx_sram) >= len); | ||
1043 | + | ||
1044 | + if (!ARRAY_FIELD_EX32(s->regs, DEV_INSTR_WR_CONFIG_REG, WEL_DIS_FLD)) { | ||
1045 | + ospi_transmit_wel(s, ahb_decoder_cs, 0); | ||
1046 | + } | ||
1047 | + | ||
1048 | + /* reset fifos */ | ||
1049 | + fifo8_reset(&s->tx_fifo); | ||
1050 | + fifo8_reset(&s->rx_fifo); | ||
1051 | + | ||
1052 | + /* Push write opcode */ | ||
1053 | + inst_code = ospi_get_wr_opcode(s); | ||
1054 | + fifo8_push(&s->tx_fifo, inst_code); | ||
1055 | + | ||
1056 | + /* Push write address */ | ||
1057 | + ospi_tx_fifo_push_address(s, flash_addr); | ||
1058 | + | ||
1059 | + /* data */ | ||
1060 | + for (i = 0; i < len; i++) { | ||
1061 | + fifo8_push(&s->tx_fifo, fifo8_pop(&s->tx_sram)); | ||
1062 | + } | ||
1063 | + | ||
1064 | + /* transmit */ | ||
1065 | + ospi_update_cs_lines(s); | ||
1066 | + ospi_flush_txfifo(s); | ||
1067 | + | ||
1068 | + /* done */ | ||
1069 | + ospi_disable_cs(s); | ||
1070 | + fifo8_reset(&s->rx_fifo); | ||
1071 | +} | ||
1072 | + | ||
1073 | +static void ind_wr_inc_num_done(XlnxVersalOspi *s) | ||
1074 | +{ | ||
1075 | + unsigned int done = ARRAY_FIELD_EX32(s->regs, INDIRECT_WRITE_XFER_CTRL_REG, | ||
1076 | + NUM_IND_OPS_DONE_FLD); | ||
1077 | + if (done < IND_OPS_DONE_MAX) { | ||
1078 | + done++; | ||
1079 | + } | ||
1080 | + done &= 0x3; | ||
1081 | + ARRAY_FIELD_DP32(s->regs, INDIRECT_WRITE_XFER_CTRL_REG, | ||
1082 | + NUM_IND_OPS_DONE_FLD, done); | ||
1083 | +} | ||
1084 | + | ||
1085 | +static void ospi_ind_wr_completed(XlnxVersalOspi *s) | ||
1086 | +{ | ||
1087 | + ARRAY_FIELD_DP32(s->regs, INDIRECT_WRITE_XFER_CTRL_REG, | ||
1088 | + IND_OPS_DONE_STATUS_FLD, 1); | ||
1089 | + ind_wr_inc_num_done(s); | ||
1090 | + ospi_ind_op_next(s->wr_ind_op); | ||
1091 | + /* Set indirect op done interrupt if enabled */ | ||
1092 | + if (ospi_ind_op_all_completed(s)) { | ||
1093 | + set_irq(s, R_IRQ_STATUS_REG_INDIRECT_OP_DONE_FLD_MASK); | ||
1094 | + } | ||
1095 | +} | ||
1096 | + | ||
1097 | +static void ospi_do_indirect_write(XlnxVersalOspi *s) | ||
1098 | +{ | ||
1099 | + uint32_t write_watermark = s->regs[R_INDIRECT_WRITE_XFER_WATERMARK_REG]; | ||
1100 | + uint32_t pagesz = ospi_get_page_sz(s); | ||
1101 | + uint32_t page_mask = ~(pagesz - 1); | ||
1102 | + IndOp *op = s->wr_ind_op; | ||
1103 | + uint32_t next_b; | ||
1104 | + uint32_t end_b; | ||
1105 | + uint32_t len; | ||
1106 | + | ||
1107 | + /* Write out tx_fifo in maximum page sz chunks */ | ||
1108 | + while (!ospi_ind_op_completed(op) && fifo8_num_used(&s->tx_sram) > 0) { | ||
1109 | + next_b = ind_op_next_byte(op); | ||
1110 | + end_b = next_b + MIN(fifo8_num_used(&s->tx_sram), pagesz); | ||
1111 | + | ||
1112 | + /* Dont cross page boundary */ | ||
1113 | + if ((end_b & page_mask) > next_b) { | ||
1114 | + end_b &= page_mask; | ||
1115 | + } | ||
1116 | + | ||
1117 | + len = end_b - next_b; | ||
1118 | + len = MIN(len, op->num_bytes - op->done_bytes); | ||
1119 | + ospi_ind_write(s, next_b, len); | ||
1120 | + ind_op_advance(op, len); | ||
1121 | + } | ||
1122 | + | ||
1123 | + /* | ||
1124 | + * Always set indirect transfer level breached interrupt if enabled | ||
1125 | + * (write watermark > 0) since the tx_sram always will be emptied | ||
1126 | + */ | ||
1127 | + if (write_watermark > 0) { | ||
1128 | + set_irq(s, R_IRQ_STATUS_REG_INDIRECT_XFER_LEVEL_BREACH_FLD_MASK); | ||
1129 | + } | ||
1130 | + | ||
1131 | + /* Signal completions if done */ | ||
1132 | + if (ospi_ind_op_completed(op)) { | ||
1133 | + ospi_ind_wr_completed(s); | ||
1134 | + } | ||
1135 | +} | ||
1136 | + | ||
1137 | +static void ospi_stig_fill_membank(XlnxVersalOspi *s) | ||
1138 | +{ | ||
1139 | + int num_rd_bytes = ospi_stig_membank_rd_bytes(s); | ||
1140 | + int idx = num_rd_bytes - 8; /* first of last 8 */ | ||
1141 | + int i; | ||
1142 | + | ||
1143 | + for (i = 0; i < num_rd_bytes; i++) { | ||
1144 | + s->stig_membank[i] = fifo8_pop(&s->rx_fifo); | ||
1145 | + } | ||
1146 | + | ||
1147 | + g_assert((idx + 4) < ARRAY_SIZE(s->stig_membank)); | ||
1148 | + | ||
1149 | + /* Fill in lower upper regs */ | ||
1150 | + s->regs[R_FLASH_RD_DATA_LOWER_REG] = ldl_le_p(&s->stig_membank[idx]); | ||
1151 | + s->regs[R_FLASH_RD_DATA_UPPER_REG] = ldl_le_p(&s->stig_membank[idx + 4]); | ||
1152 | +} | ||
1153 | + | ||
1154 | +static void ospi_stig_cmd_exec(XlnxVersalOspi *s) | ||
1155 | +{ | ||
1156 | + uint8_t inst_code; | ||
1157 | + | ||
1158 | + /* Reset fifos */ | ||
1159 | + fifo8_reset(&s->tx_fifo); | ||
1160 | + fifo8_reset(&s->rx_fifo); | ||
1161 | + | ||
1162 | + /* Push write opcode */ | ||
1163 | + inst_code = ARRAY_FIELD_EX32(s->regs, FLASH_CMD_CTRL_REG, CMD_OPCODE_FLD); | ||
1164 | + fifo8_push(&s->tx_fifo, inst_code); | ||
1165 | + | ||
1166 | + /* Push address if enabled */ | ||
1167 | + if (ARRAY_FIELD_EX32(s->regs, FLASH_CMD_CTRL_REG, ENB_COMD_ADDR_FLD)) { | ||
1168 | + ospi_tx_fifo_push_stig_addr(s); | ||
1169 | + } | ||
1170 | + | ||
1171 | + /* Enable cs */ | ||
1172 | + ospi_update_cs_lines(s); | ||
1173 | + | ||
1174 | + /* Data */ | ||
1175 | + if (ARRAY_FIELD_EX32(s->regs, FLASH_CMD_CTRL_REG, ENB_WRITE_DATA_FLD)) { | ||
1176 | + ospi_tx_fifo_push_stig_wr_data(s); | ||
1177 | + } else if (ARRAY_FIELD_EX32(s->regs, | ||
1178 | + FLASH_CMD_CTRL_REG, ENB_READ_DATA_FLD)) { | ||
1179 | + /* transmit first part */ | ||
1180 | + ospi_flush_txfifo(s); | ||
1181 | + fifo8_reset(&s->rx_fifo); | ||
1182 | + ospi_tx_fifo_push_stig_rd_data(s); | ||
1183 | + } | ||
1184 | + | ||
1185 | + /* Transmit */ | ||
1186 | + ospi_flush_txfifo(s); | ||
1187 | + ospi_disable_cs(s); | ||
1188 | + | ||
1189 | + if (ARRAY_FIELD_EX32(s->regs, FLASH_CMD_CTRL_REG, ENB_READ_DATA_FLD)) { | ||
1190 | + if (ARRAY_FIELD_EX32(s->regs, | ||
1191 | + FLASH_CMD_CTRL_REG, STIG_MEM_BANK_EN_FLD)) { | ||
1192 | + ospi_stig_fill_membank(s); | ||
1193 | + } else { | ||
1194 | + ospi_rx_fifo_pop_stig_rd_data(s); | ||
1195 | + } | ||
1196 | + } | ||
1197 | +} | ||
1198 | + | ||
1199 | +static uint32_t ospi_block_address(XlnxVersalOspi *s, unsigned int block) | ||
1200 | +{ | ||
1201 | + unsigned int block_sz = ospi_get_block_sz(s); | ||
1202 | + unsigned int cs = 0; | ||
1203 | + uint32_t addr = 0; | ||
1204 | + | ||
1205 | + while (cs < s->num_cs && block >= flash_blocks(s, cs)) { | ||
1206 | + block -= flash_blocks(s, 0); | ||
1207 | + addr += flash_sz(s, cs); | ||
1208 | + } | ||
1209 | + addr += block * block_sz; | ||
1210 | + return addr; | ||
1211 | +} | ||
1212 | + | ||
1213 | +static uint32_t ospi_get_wr_prot_addr_low(XlnxVersalOspi *s) | ||
1214 | +{ | ||
1215 | + unsigned int block = s->regs[R_LOWER_WR_PROT_REG]; | ||
1216 | + | ||
1217 | + return ospi_block_address(s, block); | ||
1218 | +} | ||
1219 | + | ||
1220 | +static uint32_t ospi_get_wr_prot_addr_upper(XlnxVersalOspi *s) | ||
1221 | +{ | ||
1222 | + unsigned int block = s->regs[R_UPPER_WR_PROT_REG]; | ||
1223 | + | ||
1224 | + /* Get address of first block out of defined range */ | ||
1225 | + return ospi_block_address(s, block + 1); | ||
1226 | +} | ||
1227 | + | ||
1228 | +static bool ospi_is_write_protected(XlnxVersalOspi *s, hwaddr addr) | ||
1229 | +{ | ||
1230 | + uint32_t wr_prot_addr_upper = ospi_get_wr_prot_addr_upper(s); | ||
1231 | + uint32_t wr_prot_addr_low = ospi_get_wr_prot_addr_low(s); | ||
1232 | + bool in_range = false; | ||
1233 | + | ||
1234 | + if (addr >= wr_prot_addr_low && addr < wr_prot_addr_upper) { | ||
1235 | + in_range = true; | ||
1236 | + } | ||
1237 | + | ||
1238 | + if (ARRAY_FIELD_EX32(s->regs, WR_PROT_CTRL_REG, INV_FLD)) { | ||
1239 | + in_range = !in_range; | ||
1240 | + } | ||
1241 | + return in_range; | ||
1242 | +} | ||
1243 | + | ||
1244 | +static uint64_t ospi_rx_sram_read(XlnxVersalOspi *s, unsigned int size) | ||
1245 | +{ | ||
1246 | + uint8_t bytes[8] = {}; | ||
1247 | + int i; | ||
1248 | + | ||
1249 | + if (size < 4 && fifo8_num_used(&s->rx_sram) >= 4) { | ||
1250 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
1251 | + "OSPI only last read of internal " | ||
1252 | + "sram is allowed to be < 32 bits\n"); | ||
1253 | + } | ||
1254 | + | ||
1255 | + size = MIN(fifo8_num_used(&s->rx_sram), size); | ||
1256 | + | ||
1257 | + assert(size <= 8); | ||
1258 | + | ||
1259 | + for (i = 0; i < size; i++) { | ||
1260 | + bytes[i] = fifo8_pop(&s->rx_sram); | ||
1261 | + } | ||
1262 | + | ||
1263 | + return ldq_le_p(bytes); | ||
1264 | +} | ||
1265 | + | ||
1266 | +static void ospi_tx_sram_write(XlnxVersalOspi *s, uint64_t value, | ||
1267 | + unsigned int size) | ||
1268 | +{ | ||
1269 | + int i; | ||
1270 | + for (i = 0; i < size && !fifo8_is_full(&s->tx_sram); i++) { | ||
1271 | + fifo8_push(&s->tx_sram, value >> 8 * i); | ||
1272 | + } | ||
1273 | +} | ||
1274 | + | ||
1275 | +static uint64_t ospi_do_dac_read(void *opaque, hwaddr addr, unsigned int size) | ||
1276 | +{ | ||
1277 | + XlnxVersalOspi *s = XILINX_VERSAL_OSPI(opaque); | ||
1278 | + uint8_t bytes[8] = {}; | ||
1279 | + int i; | ||
1280 | + | ||
1281 | + /* Create first section of read cmd */ | ||
1282 | + ospi_tx_fifo_push_rd_op_addr(s, (uint32_t) addr); | ||
1283 | + | ||
1284 | + /* Enable cs and transmit first part */ | ||
1285 | + ospi_dac_cs(s, addr); | ||
1286 | + ospi_flush_txfifo(s); | ||
1287 | + | ||
1288 | + fifo8_reset(&s->rx_fifo); | ||
1289 | + | ||
1290 | + /* transmit second part (data) */ | ||
1291 | + for (i = 0; i < size; ++i) { | ||
1292 | + fifo8_push(&s->tx_fifo, 0); | ||
1293 | + } | ||
1294 | + ospi_flush_txfifo(s); | ||
1295 | + | ||
1296 | + /* fill in result */ | ||
1297 | + size = MIN(fifo8_num_used(&s->rx_fifo), size); | ||
1298 | + | ||
1299 | + assert(size <= 8); | ||
1300 | + | ||
1301 | + for (i = 0; i < size; i++) { | ||
1302 | + bytes[i] = fifo8_pop(&s->rx_fifo); | ||
1303 | + } | ||
1304 | + | ||
1305 | + /* done */ | ||
1306 | + ospi_disable_cs(s); | ||
1307 | + | ||
1308 | + return ldq_le_p(bytes); | ||
1309 | +} | ||
1310 | + | ||
1311 | +static void ospi_do_dac_write(void *opaque, | ||
1312 | + hwaddr addr, | ||
1313 | + uint64_t value, | ||
1314 | + unsigned int size) | ||
1315 | +{ | ||
1316 | + XlnxVersalOspi *s = XILINX_VERSAL_OSPI(opaque); | ||
1317 | + bool ahb_decoder_cs = ARRAY_FIELD_EX32(s->regs, CONFIG_REG, | ||
1318 | + ENABLE_AHB_DECODER_FLD); | ||
1319 | + uint8_t inst_code; | ||
1320 | + unsigned int i; | ||
1321 | + | ||
1322 | + if (!ARRAY_FIELD_EX32(s->regs, DEV_INSTR_WR_CONFIG_REG, WEL_DIS_FLD)) { | ||
1323 | + ospi_transmit_wel(s, ahb_decoder_cs, addr); | ||
1324 | + } | ||
1325 | + | ||
1326 | + /* reset fifos */ | ||
1327 | + fifo8_reset(&s->tx_fifo); | ||
1328 | + fifo8_reset(&s->rx_fifo); | ||
1329 | + | ||
1330 | + /* Push write opcode */ | ||
1331 | + inst_code = ospi_get_wr_opcode(s); | ||
1332 | + fifo8_push(&s->tx_fifo, inst_code); | ||
1333 | + | ||
1334 | + /* Push write address */ | ||
1335 | + ospi_tx_fifo_push_address(s, addr); | ||
1336 | + | ||
1337 | + /* data */ | ||
1338 | + for (i = 0; i < size; i++) { | ||
1339 | + fifo8_push(&s->tx_fifo, value >> 8 * i); | ||
1340 | + } | ||
1341 | + | ||
1342 | + /* Enable cs and transmit */ | ||
1343 | + ospi_dac_cs(s, addr); | ||
1344 | + ospi_flush_txfifo(s); | ||
1345 | + ospi_disable_cs(s); | ||
1346 | + | ||
1347 | + fifo8_reset(&s->rx_fifo); | ||
1348 | +} | ||
1349 | + | ||
1350 | +static void flash_cmd_ctrl_mem_reg_post_write(RegisterInfo *reg, | ||
1351 | + uint64_t val) | ||
1352 | +{ | ||
1353 | + XlnxVersalOspi *s = XILINX_VERSAL_OSPI(reg->opaque); | ||
1354 | + if (ARRAY_FIELD_EX32(s->regs, CONFIG_REG, ENB_SPI_FLD)) { | ||
1355 | + if (ARRAY_FIELD_EX32(s->regs, | ||
1356 | + FLASH_COMMAND_CTRL_MEM_REG, | ||
1357 | + TRIGGER_MEM_BANK_REQ_FLD)) { | ||
1358 | + ospi_stig_membank_req(s); | ||
1359 | + ARRAY_FIELD_DP32(s->regs, FLASH_COMMAND_CTRL_MEM_REG, | ||
1360 | + TRIGGER_MEM_BANK_REQ_FLD, 0); | ||
1361 | + } | ||
1362 | + } | ||
1363 | +} | ||
1364 | + | ||
1365 | +static void flash_cmd_ctrl_reg_post_write(RegisterInfo *reg, uint64_t val) | ||
1366 | +{ | ||
1367 | + XlnxVersalOspi *s = XILINX_VERSAL_OSPI(reg->opaque); | ||
1368 | + | ||
1369 | + if (ARRAY_FIELD_EX32(s->regs, CONFIG_REG, ENB_SPI_FLD) && | ||
1370 | + ARRAY_FIELD_EX32(s->regs, FLASH_CMD_CTRL_REG, CMD_EXEC_FLD)) { | ||
1371 | + ospi_stig_cmd_exec(s); | ||
1372 | + set_irq(s, R_IRQ_STATUS_REG_STIG_REQ_INT_FLD_MASK); | ||
1373 | + ARRAY_FIELD_DP32(s->regs, FLASH_CMD_CTRL_REG, CMD_EXEC_FLD, 0); | ||
1374 | + } | ||
1375 | +} | ||
1376 | + | ||
1377 | +static uint64_t ind_wr_dec_num_done(XlnxVersalOspi *s, uint64_t val) | ||
1378 | +{ | ||
1379 | + unsigned int done = ARRAY_FIELD_EX32(s->regs, INDIRECT_WRITE_XFER_CTRL_REG, | ||
1380 | + NUM_IND_OPS_DONE_FLD); | ||
1381 | + done--; | ||
1382 | + done &= 0x3; | ||
1383 | + val = FIELD_DP32(val, INDIRECT_WRITE_XFER_CTRL_REG, | ||
1384 | + NUM_IND_OPS_DONE_FLD, done); | ||
1385 | + return val; | ||
1386 | +} | ||
1387 | + | ||
1388 | +static bool ind_wr_clearing_op_done(XlnxVersalOspi *s, uint64_t new_val) | ||
1389 | +{ | ||
1390 | + bool set_in_reg = ARRAY_FIELD_EX32(s->regs, INDIRECT_WRITE_XFER_CTRL_REG, | ||
1391 | + IND_OPS_DONE_STATUS_FLD); | ||
1392 | + bool set_in_new_val = FIELD_EX32(new_val, INDIRECT_WRITE_XFER_CTRL_REG, | ||
1393 | + IND_OPS_DONE_STATUS_FLD); | ||
1394 | + /* return true if clearing bit */ | ||
1395 | + return set_in_reg && !set_in_new_val; | ||
1396 | +} | ||
1397 | + | ||
1398 | +static uint64_t ind_wr_xfer_ctrl_reg_pre_write(RegisterInfo *reg, | ||
1399 | + uint64_t val) | ||
1400 | +{ | ||
1401 | + XlnxVersalOspi *s = XILINX_VERSAL_OSPI(reg->opaque); | ||
1402 | + | ||
1403 | + if (ind_wr_clearing_op_done(s, val)) { | ||
1404 | + val = ind_wr_dec_num_done(s, val); | ||
1405 | + } | ||
1406 | + return val; | ||
1407 | +} | ||
1408 | + | ||
1409 | +static void ind_wr_xfer_ctrl_reg_post_write(RegisterInfo *reg, uint64_t val) | ||
1410 | +{ | ||
1411 | + XlnxVersalOspi *s = XILINX_VERSAL_OSPI(reg->opaque); | ||
1412 | + | ||
1413 | + if (s->ind_write_disabled) { | ||
1414 | + return; | ||
1415 | + } | ||
1416 | + | ||
1417 | + if (ARRAY_FIELD_EX32(s->regs, INDIRECT_WRITE_XFER_CTRL_REG, START_FLD)) { | ||
1418 | + ospi_ind_op_queue_up_wr(s); | ||
1419 | + ospi_do_indirect_write(s); | ||
1420 | + ARRAY_FIELD_DP32(s->regs, INDIRECT_WRITE_XFER_CTRL_REG, START_FLD, 0); | ||
1421 | + } | ||
1422 | + | ||
1423 | + if (ARRAY_FIELD_EX32(s->regs, INDIRECT_WRITE_XFER_CTRL_REG, CANCEL_FLD)) { | ||
1424 | + ospi_ind_op_cancel(s->wr_ind_op); | ||
1425 | + fifo8_reset(&s->tx_sram); | ||
1426 | + ARRAY_FIELD_DP32(s->regs, INDIRECT_WRITE_XFER_CTRL_REG, CANCEL_FLD, 0); | ||
1427 | + } | ||
1428 | +} | ||
1429 | + | ||
1430 | +static uint64_t ind_wr_xfer_ctrl_reg_post_read(RegisterInfo *reg, | ||
1431 | + uint64_t val) | ||
1432 | +{ | ||
1433 | + XlnxVersalOspi *s = XILINX_VERSAL_OSPI(reg->opaque); | ||
1434 | + IndOp *op = s->wr_ind_op; | ||
1435 | + | ||
1436 | + /* Check if ind ops is ongoing */ | ||
1437 | + if (!ospi_ind_op_completed(&op[0])) { | ||
1438 | + /* Check if two ind ops are queued */ | ||
1439 | + if (!ospi_ind_op_completed(&op[1])) { | ||
1440 | + val = FIELD_DP32(val, INDIRECT_WRITE_XFER_CTRL_REG, | ||
1441 | + WR_QUEUED_FLD, 1); | ||
1442 | + } | ||
1443 | + val = FIELD_DP32(val, INDIRECT_WRITE_XFER_CTRL_REG, WR_STATUS_FLD, 1); | ||
1444 | + } | ||
1445 | + return val; | ||
1446 | +} | ||
1447 | + | ||
1448 | +static uint64_t ind_rd_dec_num_done(XlnxVersalOspi *s, uint64_t val) | ||
1449 | +{ | ||
1450 | + unsigned int done = ARRAY_FIELD_EX32(s->regs, INDIRECT_READ_XFER_CTRL_REG, | ||
1451 | + NUM_IND_OPS_DONE_FLD); | ||
1452 | + done--; | ||
1453 | + done &= 0x3; | ||
1454 | + val = FIELD_DP32(val, INDIRECT_READ_XFER_CTRL_REG, | ||
1455 | + NUM_IND_OPS_DONE_FLD, done); | ||
1456 | + return val; | ||
1457 | +} | ||
1458 | + | ||
1459 | +static uint64_t ind_rd_xfer_ctrl_reg_pre_write(RegisterInfo *reg, | ||
1460 | + uint64_t val) | ||
1461 | +{ | ||
1462 | + XlnxVersalOspi *s = XILINX_VERSAL_OSPI(reg->opaque); | ||
1463 | + | ||
1464 | + if (FIELD_EX32(val, INDIRECT_READ_XFER_CTRL_REG, | ||
1465 | + IND_OPS_DONE_STATUS_FLD)) { | ||
1466 | + val = ind_rd_dec_num_done(s, val); | ||
1467 | + val &= ~R_INDIRECT_READ_XFER_CTRL_REG_IND_OPS_DONE_STATUS_FLD_MASK; | ||
1468 | + } | ||
1469 | + return val; | ||
1470 | +} | ||
1471 | + | ||
1472 | +static void ind_rd_xfer_ctrl_reg_post_write(RegisterInfo *reg, uint64_t val) | ||
1473 | +{ | ||
1474 | + XlnxVersalOspi *s = XILINX_VERSAL_OSPI(reg->opaque); | ||
1475 | + | ||
1476 | + if (ARRAY_FIELD_EX32(s->regs, INDIRECT_READ_XFER_CTRL_REG, START_FLD)) { | ||
1477 | + ospi_ind_op_queue_up_rd(s); | ||
1478 | + ospi_do_ind_read(s); | ||
1479 | + ARRAY_FIELD_DP32(s->regs, INDIRECT_READ_XFER_CTRL_REG, START_FLD, 0); | ||
1480 | + } | ||
1481 | + | ||
1482 | + if (ARRAY_FIELD_EX32(s->regs, INDIRECT_READ_XFER_CTRL_REG, CANCEL_FLD)) { | ||
1483 | + ospi_ind_op_cancel(s->rd_ind_op); | ||
1484 | + fifo8_reset(&s->rx_sram); | ||
1485 | + ARRAY_FIELD_DP32(s->regs, INDIRECT_READ_XFER_CTRL_REG, CANCEL_FLD, 0); | ||
1486 | + } | ||
1487 | +} | ||
1488 | + | ||
1489 | +static uint64_t ind_rd_xfer_ctrl_reg_post_read(RegisterInfo *reg, | ||
1490 | + uint64_t val) | ||
1491 | +{ | ||
1492 | + XlnxVersalOspi *s = XILINX_VERSAL_OSPI(reg->opaque); | ||
1493 | + IndOp *op = s->rd_ind_op; | ||
1494 | + | ||
1495 | + /* Check if ind ops is ongoing */ | ||
1496 | + if (!ospi_ind_op_completed(&op[0])) { | ||
1497 | + /* Check if two ind ops are queued */ | ||
1498 | + if (!ospi_ind_op_completed(&op[1])) { | ||
1499 | + val = FIELD_DP32(val, INDIRECT_READ_XFER_CTRL_REG, | ||
1500 | + RD_QUEUED_FLD, 1); | ||
1501 | + } | ||
1502 | + val = FIELD_DP32(val, INDIRECT_READ_XFER_CTRL_REG, RD_STATUS_FLD, 1); | ||
1503 | + } | ||
1504 | + return val; | ||
1505 | +} | ||
1506 | + | ||
1507 | +static uint64_t sram_fill_reg_post_read(RegisterInfo *reg, uint64_t val) | ||
1508 | +{ | ||
1509 | + XlnxVersalOspi *s = XILINX_VERSAL_OSPI(reg->opaque); | ||
1510 | + val = ((fifo8_num_used(&s->tx_sram) & 0xFFFF) << 16) | | ||
1511 | + (fifo8_num_used(&s->rx_sram) & 0xFFFF); | ||
1512 | + return val; | ||
1513 | +} | ||
1514 | + | ||
1515 | +static uint64_t dll_obs_upper_reg_post_read(RegisterInfo *reg, uint64_t val) | ||
1516 | +{ | ||
1517 | + XlnxVersalOspi *s = XILINX_VERSAL_OSPI(reg->opaque); | ||
1518 | + uint32_t rx_dec_out; | ||
1519 | + | ||
1520 | + rx_dec_out = FIELD_EX32(val, DLL_OBSERVABLE_UPPER_REG, | ||
1521 | + DLL_OBSERVABLE__UPPER_RX_DECODER_OUTPUT_FLD); | ||
1522 | + | ||
1523 | + if (rx_dec_out < MAX_RX_DEC_OUT) { | ||
1524 | + ARRAY_FIELD_DP32(s->regs, DLL_OBSERVABLE_UPPER_REG, | ||
1525 | + DLL_OBSERVABLE__UPPER_RX_DECODER_OUTPUT_FLD, | ||
1526 | + rx_dec_out + 1); | ||
1527 | + } | ||
1528 | + | ||
1529 | + return val; | ||
1530 | +} | ||
1531 | + | ||
1532 | + | ||
1533 | +static void xlnx_versal_ospi_reset(DeviceState *dev) | ||
1534 | +{ | ||
1535 | + XlnxVersalOspi *s = XILINX_VERSAL_OSPI(dev); | ||
1536 | + unsigned int i; | ||
1537 | + | ||
1538 | + for (i = 0; i < ARRAY_SIZE(s->regs_info); ++i) { | ||
1539 | + register_reset(&s->regs_info[i]); | ||
1540 | + } | ||
1541 | + | ||
1542 | + fifo8_reset(&s->rx_fifo); | ||
1543 | + fifo8_reset(&s->tx_fifo); | ||
1544 | + fifo8_reset(&s->rx_sram); | ||
1545 | + fifo8_reset(&s->tx_sram); | ||
1546 | + | ||
1547 | + s->rd_ind_op[0].completed = true; | ||
1548 | + s->rd_ind_op[1].completed = true; | ||
1549 | + s->wr_ind_op[0].completed = true; | ||
1550 | + s->wr_ind_op[1].completed = true; | ||
1551 | + ARRAY_FIELD_DP32(s->regs, DLL_OBSERVABLE_LOWER_REG, | ||
1552 | + DLL_OBSERVABLE_LOWER_DLL_LOCK_FLD, 1); | ||
1553 | + ARRAY_FIELD_DP32(s->regs, DLL_OBSERVABLE_LOWER_REG, | ||
1554 | + DLL_OBSERVABLE_LOWER_LOOPBACK_LOCK_FLD, 1); | ||
1555 | +} | ||
1556 | + | ||
1557 | +static RegisterAccessInfo ospi_regs_info[] = { | ||
1558 | + { .name = "CONFIG_REG", | ||
1559 | + .addr = A_CONFIG_REG, | ||
1560 | + .reset = 0x80780081, | ||
1561 | + .ro = 0x9c000000, | ||
1562 | + },{ .name = "DEV_INSTR_RD_CONFIG_REG", | ||
1563 | + .addr = A_DEV_INSTR_RD_CONFIG_REG, | ||
1564 | + .reset = 0x3, | ||
1565 | + .ro = 0xe0ecc800, | ||
1566 | + },{ .name = "DEV_INSTR_WR_CONFIG_REG", | ||
1567 | + .addr = A_DEV_INSTR_WR_CONFIG_REG, | ||
1568 | + .reset = 0x2, | ||
1569 | + .ro = 0xe0fcce00, | ||
1570 | + },{ .name = "DEV_DELAY_REG", | ||
1571 | + .addr = A_DEV_DELAY_REG, | ||
1572 | + },{ .name = "RD_DATA_CAPTURE_REG", | ||
1573 | + .addr = A_RD_DATA_CAPTURE_REG, | ||
1574 | + .reset = 0x1, | ||
1575 | + .ro = 0xfff0fec0, | ||
1576 | + },{ .name = "DEV_SIZE_CONFIG_REG", | ||
1577 | + .addr = A_DEV_SIZE_CONFIG_REG, | ||
1578 | + .reset = 0x101002, | ||
1579 | + .ro = 0xe0000000, | ||
1580 | + },{ .name = "SRAM_PARTITION_CFG_REG", | ||
1581 | + .addr = A_SRAM_PARTITION_CFG_REG, | ||
1582 | + .reset = 0x80, | ||
1583 | + .ro = 0xffffff00, | ||
1584 | + },{ .name = "IND_AHB_ADDR_TRIGGER_REG", | ||
1585 | + .addr = A_IND_AHB_ADDR_TRIGGER_REG, | ||
1586 | + },{ .name = "DMA_PERIPH_CONFIG_REG", | ||
1587 | + .addr = A_DMA_PERIPH_CONFIG_REG, | ||
1588 | + .ro = 0xfffff0f0, | ||
1589 | + },{ .name = "REMAP_ADDR_REG", | ||
1590 | + .addr = A_REMAP_ADDR_REG, | ||
1591 | + },{ .name = "MODE_BIT_CONFIG_REG", | ||
1592 | + .addr = A_MODE_BIT_CONFIG_REG, | ||
1593 | + .reset = 0x200, | ||
1594 | + .ro = 0xffff7800, | ||
1595 | + },{ .name = "SRAM_FILL_REG", | ||
1596 | + .addr = A_SRAM_FILL_REG, | ||
1597 | + .ro = 0xffffffff, | ||
1598 | + .post_read = sram_fill_reg_post_read, | ||
1599 | + },{ .name = "TX_THRESH_REG", | ||
1600 | + .addr = A_TX_THRESH_REG, | ||
1601 | + .reset = 0x1, | ||
1602 | + .ro = 0xffffffe0, | ||
1603 | + },{ .name = "RX_THRESH_REG", | ||
1604 | + .addr = A_RX_THRESH_REG, | ||
1605 | + .reset = 0x1, | ||
1606 | + .ro = 0xffffffe0, | ||
1607 | + },{ .name = "WRITE_COMPLETION_CTRL_REG", | ||
1608 | + .addr = A_WRITE_COMPLETION_CTRL_REG, | ||
1609 | + .reset = 0x10005, | ||
1610 | + .ro = 0x1800, | ||
1611 | + },{ .name = "NO_OF_POLLS_BEF_EXP_REG", | ||
1612 | + .addr = A_NO_OF_POLLS_BEF_EXP_REG, | ||
1613 | + .reset = 0xffffffff, | ||
1614 | + },{ .name = "IRQ_STATUS_REG", | ||
1615 | + .addr = A_IRQ_STATUS_REG, | ||
1616 | + .ro = 0xfff08000, | ||
1617 | + .w1c = 0xf7fff, | ||
1618 | + },{ .name = "IRQ_MASK_REG", | ||
1619 | + .addr = A_IRQ_MASK_REG, | ||
1620 | + .ro = 0xfff08000, | ||
1621 | + },{ .name = "LOWER_WR_PROT_REG", | ||
1622 | + .addr = A_LOWER_WR_PROT_REG, | ||
1623 | + },{ .name = "UPPER_WR_PROT_REG", | ||
1624 | + .addr = A_UPPER_WR_PROT_REG, | ||
1625 | + },{ .name = "WR_PROT_CTRL_REG", | ||
1626 | + .addr = A_WR_PROT_CTRL_REG, | ||
1627 | + .ro = 0xfffffffc, | ||
1628 | + },{ .name = "INDIRECT_READ_XFER_CTRL_REG", | ||
1629 | + .addr = A_INDIRECT_READ_XFER_CTRL_REG, | ||
1630 | + .ro = 0xffffffd4, | ||
1631 | + .w1c = 0x08, | ||
1632 | + .pre_write = ind_rd_xfer_ctrl_reg_pre_write, | ||
1633 | + .post_write = ind_rd_xfer_ctrl_reg_post_write, | ||
1634 | + .post_read = ind_rd_xfer_ctrl_reg_post_read, | ||
1635 | + },{ .name = "INDIRECT_READ_XFER_WATERMARK_REG", | ||
1636 | + .addr = A_INDIRECT_READ_XFER_WATERMARK_REG, | ||
1637 | + },{ .name = "INDIRECT_READ_XFER_START_REG", | ||
1638 | + .addr = A_INDIRECT_READ_XFER_START_REG, | ||
1639 | + },{ .name = "INDIRECT_READ_XFER_NUM_BYTES_REG", | ||
1640 | + .addr = A_INDIRECT_READ_XFER_NUM_BYTES_REG, | ||
1641 | + },{ .name = "INDIRECT_WRITE_XFER_CTRL_REG", | ||
1642 | + .addr = A_INDIRECT_WRITE_XFER_CTRL_REG, | ||
1643 | + .ro = 0xffffffdc, | ||
1644 | + .w1c = 0x20, | ||
1645 | + .pre_write = ind_wr_xfer_ctrl_reg_pre_write, | ||
1646 | + .post_write = ind_wr_xfer_ctrl_reg_post_write, | ||
1647 | + .post_read = ind_wr_xfer_ctrl_reg_post_read, | ||
1648 | + },{ .name = "INDIRECT_WRITE_XFER_WATERMARK_REG", | ||
1649 | + .addr = A_INDIRECT_WRITE_XFER_WATERMARK_REG, | ||
1650 | + .reset = 0xffffffff, | ||
1651 | + },{ .name = "INDIRECT_WRITE_XFER_START_REG", | ||
1652 | + .addr = A_INDIRECT_WRITE_XFER_START_REG, | ||
1653 | + },{ .name = "INDIRECT_WRITE_XFER_NUM_BYTES_REG", | ||
1654 | + .addr = A_INDIRECT_WRITE_XFER_NUM_BYTES_REG, | ||
1655 | + },{ .name = "INDIRECT_TRIGGER_ADDR_RANGE_REG", | ||
1656 | + .addr = A_INDIRECT_TRIGGER_ADDR_RANGE_REG, | ||
1657 | + .reset = 0x4, | ||
1658 | + .ro = 0xfffffff0, | ||
1659 | + },{ .name = "FLASH_COMMAND_CTRL_MEM_REG", | ||
1660 | + .addr = A_FLASH_COMMAND_CTRL_MEM_REG, | ||
1661 | + .ro = 0xe008fffe, | ||
1662 | + .post_write = flash_cmd_ctrl_mem_reg_post_write, | ||
1663 | + },{ .name = "FLASH_CMD_CTRL_REG", | ||
1664 | + .addr = A_FLASH_CMD_CTRL_REG, | ||
1665 | + .ro = 0x7a, | ||
1666 | + .post_write = flash_cmd_ctrl_reg_post_write, | ||
1667 | + },{ .name = "FLASH_CMD_ADDR_REG", | ||
1668 | + .addr = A_FLASH_CMD_ADDR_REG, | ||
1669 | + },{ .name = "FLASH_RD_DATA_LOWER_REG", | ||
1670 | + .addr = A_FLASH_RD_DATA_LOWER_REG, | ||
1671 | + .ro = 0xffffffff, | ||
1672 | + },{ .name = "FLASH_RD_DATA_UPPER_REG", | ||
1673 | + .addr = A_FLASH_RD_DATA_UPPER_REG, | ||
1674 | + .ro = 0xffffffff, | ||
1675 | + },{ .name = "FLASH_WR_DATA_LOWER_REG", | ||
1676 | + .addr = A_FLASH_WR_DATA_LOWER_REG, | ||
1677 | + },{ .name = "FLASH_WR_DATA_UPPER_REG", | ||
1678 | + .addr = A_FLASH_WR_DATA_UPPER_REG, | ||
1679 | + },{ .name = "POLLING_FLASH_STATUS_REG", | ||
1680 | + .addr = A_POLLING_FLASH_STATUS_REG, | ||
1681 | + .ro = 0xfff0ffff, | ||
1682 | + },{ .name = "PHY_CONFIGURATION_REG", | ||
1683 | + .addr = A_PHY_CONFIGURATION_REG, | ||
1684 | + .reset = 0x40000000, | ||
1685 | + .ro = 0x1f80ff80, | ||
1686 | + },{ .name = "PHY_MASTER_CONTROL_REG", | ||
1687 | + .addr = A_PHY_MASTER_CONTROL_REG, | ||
1688 | + .reset = 0x800000, | ||
1689 | + .ro = 0xfe08ff80, | ||
1690 | + },{ .name = "DLL_OBSERVABLE_LOWER_REG", | ||
1691 | + .addr = A_DLL_OBSERVABLE_LOWER_REG, | ||
1692 | + .ro = 0xffffffff, | ||
1693 | + },{ .name = "DLL_OBSERVABLE_UPPER_REG", | ||
1694 | + .addr = A_DLL_OBSERVABLE_UPPER_REG, | ||
1695 | + .ro = 0xffffffff, | ||
1696 | + .post_read = dll_obs_upper_reg_post_read, | ||
1697 | + },{ .name = "OPCODE_EXT_LOWER_REG", | ||
1698 | + .addr = A_OPCODE_EXT_LOWER_REG, | ||
1699 | + .reset = 0x13edfa00, | ||
1700 | + },{ .name = "OPCODE_EXT_UPPER_REG", | ||
1701 | + .addr = A_OPCODE_EXT_UPPER_REG, | ||
1702 | + .reset = 0x6f90000, | ||
1703 | + .ro = 0xffff, | ||
1704 | + },{ .name = "MODULE_ID_REG", | ||
1705 | + .addr = A_MODULE_ID_REG, | ||
1706 | + .reset = 0x300, | ||
1707 | + .ro = 0xffffffff, | ||
1708 | + } | ||
1709 | +}; | ||
1710 | + | ||
1711 | +/* Return dev-obj from reg-region created by register_init_block32 */ | ||
1712 | +static XlnxVersalOspi *xilinx_ospi_of_mr(void *mr_accessor) | ||
1713 | +{ | ||
1714 | + RegisterInfoArray *reg_array = mr_accessor; | ||
1715 | + Object *dev; | ||
1716 | + | ||
1717 | + dev = reg_array->mem.owner; | ||
1718 | + assert(dev); | ||
1719 | + | ||
1720 | + return XILINX_VERSAL_OSPI(dev); | ||
1721 | +} | ||
1722 | + | ||
1723 | +static void ospi_write(void *opaque, hwaddr addr, uint64_t value, | ||
1724 | + unsigned int size) | ||
1725 | +{ | ||
1726 | + XlnxVersalOspi *s = xilinx_ospi_of_mr(opaque); | ||
1727 | + | ||
1728 | + register_write_memory(opaque, addr, value, size); | ||
1729 | + ospi_update_irq_line(s); | ||
1730 | +} | ||
1731 | + | ||
1732 | +static const MemoryRegionOps ospi_ops = { | ||
1733 | + .read = register_read_memory, | ||
1734 | + .write = ospi_write, | ||
1735 | + .endianness = DEVICE_LITTLE_ENDIAN, | ||
1736 | + .valid = { | ||
1737 | + .min_access_size = 4, | ||
1738 | + .max_access_size = 4, | ||
1739 | + }, | ||
1740 | +}; | ||
1741 | + | ||
1742 | +static uint64_t ospi_indac_read(void *opaque, unsigned int size) | ||
1743 | +{ | ||
1744 | + XlnxVersalOspi *s = XILINX_VERSAL_OSPI(opaque); | ||
1745 | + uint64_t ret = ospi_rx_sram_read(s, size); | ||
1746 | + | ||
1747 | + if (!ospi_ind_op_completed(s->rd_ind_op)) { | ||
1748 | + ospi_do_ind_read(s); | ||
1749 | + } | ||
1750 | + return ret; | ||
1751 | +} | ||
1752 | + | ||
1753 | +static void ospi_indac_write(void *opaque, uint64_t value, unsigned int size) | ||
1754 | +{ | ||
1755 | + XlnxVersalOspi *s = XILINX_VERSAL_OSPI(opaque); | ||
1756 | + | ||
1757 | + g_assert(!s->ind_write_disabled); | ||
1758 | + | ||
1759 | + if (!ospi_ind_op_completed(s->wr_ind_op)) { | ||
1760 | + ospi_tx_sram_write(s, value, size); | ||
1761 | + ospi_do_indirect_write(s); | ||
1762 | + } else { | ||
1763 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
1764 | + "OSPI wr into indac area while no ongoing indac wr\n"); | ||
1765 | + } | ||
1766 | +} | ||
1767 | + | ||
1768 | +static bool is_inside_indac_range(XlnxVersalOspi *s, hwaddr addr) | ||
1769 | +{ | ||
1770 | + uint32_t range_start; | ||
1771 | + uint32_t range_end; | ||
1772 | + | ||
1773 | + if (ARRAY_FIELD_EX32(s->regs, CONFIG_REG, ENB_DMA_IF_FLD)) { | ||
1774 | + return true; | ||
1775 | + } | ||
1776 | + | ||
1777 | + range_start = s->regs[R_IND_AHB_ADDR_TRIGGER_REG]; | ||
1778 | + range_end = range_start + | ||
1779 | + (1 << ARRAY_FIELD_EX32(s->regs, | ||
1780 | + INDIRECT_TRIGGER_ADDR_RANGE_REG, | ||
1781 | + IND_RANGE_WIDTH_FLD)); | ||
1782 | + | ||
1783 | + addr += s->regs[R_IND_AHB_ADDR_TRIGGER_REG] & 0xF0000000; | ||
1784 | + | ||
1785 | + return addr >= range_start && addr < range_end; | ||
1786 | +} | ||
1787 | + | ||
1788 | +static bool ospi_is_indac_active(XlnxVersalOspi *s) | ||
1789 | +{ | ||
1790 | + /* | ||
1791 | + * When dac and indac cannot be active at the same time, | ||
1792 | + * return true when dac is disabled. | ||
1793 | + */ | ||
1794 | + return s->dac_with_indac || !s->dac_enable; | ||
1795 | +} | ||
1796 | + | ||
1797 | +static uint64_t ospi_dac_read(void *opaque, hwaddr addr, unsigned int size) | ||
1798 | +{ | ||
1799 | + XlnxVersalOspi *s = XILINX_VERSAL_OSPI(opaque); | ||
1800 | + | ||
1801 | + if (ARRAY_FIELD_EX32(s->regs, CONFIG_REG, ENB_SPI_FLD)) { | ||
1802 | + if (ospi_is_indac_active(s) && | ||
1803 | + is_inside_indac_range(s, addr)) { | ||
1804 | + return ospi_indac_read(s, size); | ||
1805 | + } | ||
1806 | + if (ARRAY_FIELD_EX32(s->regs, CONFIG_REG, ENB_DIR_ACC_CTLR_FLD) | ||
1807 | + && s->dac_enable) { | ||
1808 | + if (ARRAY_FIELD_EX32(s->regs, | ||
1809 | + CONFIG_REG, ENB_AHB_ADDR_REMAP_FLD)) { | ||
1810 | + addr += s->regs[R_REMAP_ADDR_REG]; | ||
1811 | + } | ||
1812 | + return ospi_do_dac_read(opaque, addr, size); | ||
1813 | + } else { | ||
1814 | + qemu_log_mask(LOG_GUEST_ERROR, "OSPI AHB rd while DAC disabled\n"); | ||
1815 | + } | ||
1816 | + } else { | ||
1817 | + qemu_log_mask(LOG_GUEST_ERROR, "OSPI AHB rd while OSPI disabled\n"); | ||
1818 | + } | ||
1819 | + | ||
1820 | + return 0; | ||
1821 | +} | ||
1822 | + | ||
1823 | +static void ospi_dac_write(void *opaque, hwaddr addr, uint64_t value, | ||
1824 | + unsigned int size) | ||
1825 | +{ | ||
1826 | + XlnxVersalOspi *s = XILINX_VERSAL_OSPI(opaque); | ||
1827 | + | ||
1828 | + if (ARRAY_FIELD_EX32(s->regs, CONFIG_REG, ENB_SPI_FLD)) { | ||
1829 | + if (ospi_is_indac_active(s) && | ||
1830 | + !s->ind_write_disabled && | ||
1831 | + is_inside_indac_range(s, addr)) { | ||
1832 | + return ospi_indac_write(s, value, size); | ||
1833 | + } | ||
1834 | + if (ARRAY_FIELD_EX32(s->regs, CONFIG_REG, ENB_DIR_ACC_CTLR_FLD) && | ||
1835 | + s->dac_enable) { | ||
1836 | + if (ARRAY_FIELD_EX32(s->regs, | ||
1837 | + CONFIG_REG, ENB_AHB_ADDR_REMAP_FLD)) { | ||
1838 | + addr += s->regs[R_REMAP_ADDR_REG]; | ||
1839 | + } | ||
1840 | + /* Check if addr is write protected */ | ||
1841 | + if (ARRAY_FIELD_EX32(s->regs, WR_PROT_CTRL_REG, ENB_FLD) && | ||
1842 | + ospi_is_write_protected(s, addr)) { | ||
1843 | + set_irq(s, R_IRQ_STATUS_REG_PROT_WR_ATTEMPT_FLD_MASK); | ||
1844 | + ospi_update_irq_line(s); | ||
1845 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
1846 | + "OSPI writing into write protected area\n"); | ||
1847 | + return; | ||
1848 | + } | ||
1849 | + ospi_do_dac_write(opaque, addr, value, size); | ||
1850 | + } else { | ||
1851 | + qemu_log_mask(LOG_GUEST_ERROR, "OSPI AHB wr while DAC disabled\n"); | ||
1852 | + } | ||
1853 | + } else { | ||
1854 | + qemu_log_mask(LOG_GUEST_ERROR, "OSPI AHB wr while OSPI disabled\n"); | ||
1855 | + } | ||
1856 | +} | ||
1857 | + | ||
1858 | +static const MemoryRegionOps ospi_dac_ops = { | ||
1859 | + .read = ospi_dac_read, | ||
1860 | + .write = ospi_dac_write, | ||
1861 | + .endianness = DEVICE_LITTLE_ENDIAN, | ||
1862 | + .valid = { | ||
1863 | + .min_access_size = 4, | ||
1864 | + .max_access_size = 4, | ||
1865 | + }, | ||
1866 | +}; | ||
1867 | + | ||
1868 | +static void ospi_update_dac_status(void *opaque, int n, int level) | ||
1869 | +{ | ||
1870 | + XlnxVersalOspi *s = XILINX_VERSAL_OSPI(opaque); | ||
1871 | + | ||
1872 | + s->dac_enable = level; | ||
1873 | +} | ||
1874 | + | ||
1875 | +static void xlnx_versal_ospi_realize(DeviceState *dev, Error **errp) | ||
1876 | +{ | ||
1877 | + XlnxVersalOspi *s = XILINX_VERSAL_OSPI(dev); | ||
1878 | + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); | ||
1879 | + | ||
1880 | + s->num_cs = 4; | ||
1881 | + s->spi = ssi_create_bus(dev, "spi0"); | ||
1882 | + s->cs_lines = g_new0(qemu_irq, s->num_cs); | ||
1883 | + for (int i = 0; i < s->num_cs; ++i) { | ||
1884 | + sysbus_init_irq(sbd, &s->cs_lines[i]); | ||
1885 | + } | ||
1886 | + | ||
1887 | + fifo8_create(&s->rx_fifo, RXFF_SZ); | ||
1888 | + fifo8_create(&s->tx_fifo, TXFF_SZ); | ||
1889 | + fifo8_create(&s->rx_sram, RXFF_SZ); | ||
1890 | + fifo8_create(&s->tx_sram, TXFF_SZ); | ||
1891 | +} | ||
1892 | + | ||
1893 | +static void xlnx_versal_ospi_init(Object *obj) | ||
1894 | +{ | ||
1895 | + XlnxVersalOspi *s = XILINX_VERSAL_OSPI(obj); | ||
1896 | + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); | ||
1897 | + DeviceState *dev = DEVICE(obj); | ||
1898 | + RegisterInfoArray *reg_array; | ||
1899 | + | ||
1900 | + memory_region_init(&s->iomem, obj, TYPE_XILINX_VERSAL_OSPI, | ||
1901 | + XILINX_VERSAL_OSPI_R_MAX * 4); | ||
1902 | + reg_array = | ||
1903 | + register_init_block32(DEVICE(obj), ospi_regs_info, | ||
1904 | + ARRAY_SIZE(ospi_regs_info), | ||
1905 | + s->regs_info, s->regs, | ||
1906 | + &ospi_ops, | ||
1907 | + XILINX_VERSAL_OSPI_ERR_DEBUG, | ||
1908 | + XILINX_VERSAL_OSPI_R_MAX * 4); | ||
1909 | + memory_region_add_subregion(&s->iomem, 0x0, ®_array->mem); | ||
1910 | + sysbus_init_mmio(sbd, &s->iomem); | ||
1911 | + | ||
1912 | + memory_region_init_io(&s->iomem_dac, obj, &ospi_dac_ops, s, | ||
1913 | + TYPE_XILINX_VERSAL_OSPI "-dac", 0x20000000); | ||
1914 | + sysbus_init_mmio(sbd, &s->iomem_dac); | ||
1915 | + | ||
1916 | + sysbus_init_irq(sbd, &s->irq); | ||
1917 | + | ||
1918 | + object_property_add_link(obj, "dma-src", TYPE_XLNX_CSU_DMA, | ||
1919 | + (Object **)&s->dma_src, | ||
1920 | + object_property_allow_set_link, | ||
1921 | + OBJ_PROP_LINK_STRONG); | ||
1922 | + | ||
1923 | + qdev_init_gpio_in_named(dev, ospi_update_dac_status, "ospi-mux-sel", 1); | ||
1924 | +} | ||
1925 | + | ||
1926 | +static const VMStateDescription vmstate_ind_op = { | ||
1927 | + .name = "OSPIIndOp", | ||
1928 | + .version_id = 1, | ||
1929 | + .minimum_version_id = 1, | ||
1930 | + .fields = (VMStateField[]) { | ||
1931 | + VMSTATE_UINT32(flash_addr, IndOp), | ||
1932 | + VMSTATE_UINT32(num_bytes, IndOp), | ||
1933 | + VMSTATE_UINT32(done_bytes, IndOp), | ||
1934 | + VMSTATE_BOOL(completed, IndOp), | ||
1935 | + VMSTATE_END_OF_LIST() | ||
1936 | + } | ||
1937 | +}; | ||
1938 | + | ||
1939 | +static const VMStateDescription vmstate_xlnx_versal_ospi = { | ||
1940 | + .name = TYPE_XILINX_VERSAL_OSPI, | ||
1941 | + .version_id = 1, | ||
1942 | + .minimum_version_id = 1, | ||
1943 | + .minimum_version_id_old = 1, | ||
1944 | + .fields = (VMStateField[]) { | ||
1945 | + VMSTATE_FIFO8(rx_fifo, XlnxVersalOspi), | ||
1946 | + VMSTATE_FIFO8(tx_fifo, XlnxVersalOspi), | ||
1947 | + VMSTATE_FIFO8(rx_sram, XlnxVersalOspi), | ||
1948 | + VMSTATE_FIFO8(tx_sram, XlnxVersalOspi), | ||
1949 | + VMSTATE_BOOL(ind_write_disabled, XlnxVersalOspi), | ||
1950 | + VMSTATE_BOOL(dac_with_indac, XlnxVersalOspi), | ||
1951 | + VMSTATE_BOOL(dac_enable, XlnxVersalOspi), | ||
1952 | + VMSTATE_BOOL(src_dma_inprog, XlnxVersalOspi), | ||
1953 | + VMSTATE_STRUCT_ARRAY(rd_ind_op, XlnxVersalOspi, 2, 1, | ||
1954 | + vmstate_ind_op, IndOp), | ||
1955 | + VMSTATE_STRUCT_ARRAY(wr_ind_op, XlnxVersalOspi, 2, 1, | ||
1956 | + vmstate_ind_op, IndOp), | ||
1957 | + VMSTATE_UINT32_ARRAY(regs, XlnxVersalOspi, XILINX_VERSAL_OSPI_R_MAX), | ||
1958 | + VMSTATE_UINT8_ARRAY(stig_membank, XlnxVersalOspi, 512), | ||
1959 | + VMSTATE_END_OF_LIST(), | ||
1960 | + } | ||
1961 | +}; | ||
1962 | + | ||
1963 | +static Property xlnx_versal_ospi_properties[] = { | ||
1964 | + DEFINE_PROP_BOOL("dac-with-indac", XlnxVersalOspi, dac_with_indac, false), | ||
1965 | + DEFINE_PROP_BOOL("indac-write-disabled", XlnxVersalOspi, | ||
1966 | + ind_write_disabled, false), | ||
1967 | + DEFINE_PROP_END_OF_LIST(), | ||
1968 | +}; | ||
1969 | + | ||
1970 | +static void xlnx_versal_ospi_class_init(ObjectClass *klass, void *data) | ||
1971 | +{ | ||
1972 | + DeviceClass *dc = DEVICE_CLASS(klass); | ||
1973 | + | ||
1974 | + dc->reset = xlnx_versal_ospi_reset; | ||
1975 | + dc->realize = xlnx_versal_ospi_realize; | ||
1976 | + dc->vmsd = &vmstate_xlnx_versal_ospi; | ||
1977 | + device_class_set_props(dc, xlnx_versal_ospi_properties); | ||
1978 | +} | ||
1979 | + | ||
1980 | +static const TypeInfo xlnx_versal_ospi_info = { | ||
1981 | + .name = TYPE_XILINX_VERSAL_OSPI, | ||
1982 | + .parent = TYPE_SYS_BUS_DEVICE, | ||
1983 | + .instance_size = sizeof(XlnxVersalOspi), | ||
1984 | + .class_init = xlnx_versal_ospi_class_init, | ||
1985 | + .instance_init = xlnx_versal_ospi_init, | ||
1986 | +}; | ||
1987 | + | ||
1988 | +static void xlnx_versal_ospi_register_types(void) | ||
1989 | +{ | ||
1990 | + type_register_static(&xlnx_versal_ospi_info); | ||
1991 | +} | ||
1992 | + | ||
1993 | +type_init(xlnx_versal_ospi_register_types) | ||
1994 | diff --git a/hw/ssi/meson.build b/hw/ssi/meson.build | ||
1995 | index XXXXXXX..XXXXXXX 100644 | ||
1996 | --- a/hw/ssi/meson.build | ||
1997 | +++ b/hw/ssi/meson.build | ||
1998 | @@ -XXX,XX +XXX,XX @@ softmmu_ss.add(when: 'CONFIG_SSI', if_true: files('ssi.c')) | ||
1999 | softmmu_ss.add(when: 'CONFIG_STM32F2XX_SPI', if_true: files('stm32f2xx_spi.c')) | ||
2000 | softmmu_ss.add(when: 'CONFIG_XILINX_SPI', if_true: files('xilinx_spi.c')) | ||
2001 | softmmu_ss.add(when: 'CONFIG_XILINX_SPIPS', if_true: files('xilinx_spips.c')) | ||
2002 | +softmmu_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: files('xlnx-versal-ospi.c')) | ||
2003 | softmmu_ss.add(when: 'CONFIG_IMX', if_true: files('imx_spi.c')) | ||
2004 | softmmu_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_spi.c')) | ||
2005 | -- | 158 | -- |
2006 | 2.25.1 | 159 | 2.34.1 |
2007 | |||
2008 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
1 | 2 | ||
3 | Handle FPCR.AH's requirement to not negate the sign of a NaN in SVE | ||
4 | FMLSL (indexed), using the usual trick of negating by XOR when AH=0 | ||
5 | and by muladd flags when AH=1. | ||
6 | |||
7 | Since we have the CPUARMState* in the helper anyway, we can | ||
8 | look directly at env->vfp.fpcr and don't need toa pass in the | ||
9 | FPCR.AH value via the SIMD data word. | ||
10 | |||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Message-id: 20250129013857.135256-32-richard.henderson@linaro.org | ||
13 | [PMM: commit message tweaked] | ||
14 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
16 | --- | ||
17 | target/arm/tcg/vec_helper.c | 15 ++++++++++++--- | ||
18 | 1 file changed, 12 insertions(+), 3 deletions(-) | ||
19 | |||
20 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/tcg/vec_helper.c | ||
23 | +++ b/target/arm/tcg/vec_helper.c | ||
24 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
25 | CPUARMState *env, uint32_t desc) | ||
26 | { | ||
27 | intptr_t i, j, oprsz = simd_oprsz(desc); | ||
28 | - uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; | ||
29 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
30 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
31 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | ||
32 | float_status *status = &env->vfp.fp_status_a64; | ||
33 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); | ||
34 | + int negx = 0, negf = 0; | ||
35 | + | ||
36 | + if (is_s) { | ||
37 | + if (env->vfp.fpcr & FPCR_AH) { | ||
38 | + negf = float_muladd_negate_product; | ||
39 | + } else { | ||
40 | + negx = 0x8000; | ||
41 | + } | ||
42 | + } | ||
43 | |||
44 | for (i = 0; i < oprsz; i += 16) { | ||
45 | float16 mm_16 = *(float16 *)(vm + i + idx); | ||
46 | float32 mm = float16_to_float32_by_bits(mm_16, fz16); | ||
47 | |||
48 | for (j = 0; j < 16; j += sizeof(float32)) { | ||
49 | - float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negn; | ||
50 | + float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negx; | ||
51 | float32 nn = float16_to_float32_by_bits(nn_16, fz16); | ||
52 | float32 aa = *(float32 *)(va + H1_4(i + j)); | ||
53 | |||
54 | *(float32 *)(vd + H1_4(i + j)) = | ||
55 | - float32_muladd(nn, mm, aa, 0, status); | ||
56 | + float32_muladd(nn, mm, aa, negf, status); | ||
57 | } | ||
58 | } | ||
59 | } | ||
60 | -- | ||
61 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
1 | 2 | ||
3 | Handle FPCR.AH's requirement to not negate the sign of a NaN in SVE | ||
4 | FMLSL (indexed), using the usual trick of negating by XOR when AH=0 | ||
5 | and by muladd flags when AH=1. | ||
6 | |||
7 | Since we have the CPUARMState* in the helper anyway, we can | ||
8 | look directly at env->vfp.fpcr and don't need toa pass in the | ||
9 | FPCR.AH value via the SIMD data word. | ||
10 | |||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Message-id: 20250129013857.135256-33-richard.henderson@linaro.org | ||
13 | [PMM: tweaked commit message] | ||
14 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
16 | --- | ||
17 | target/arm/tcg/vec_helper.c | 15 ++++++++++++--- | ||
18 | 1 file changed, 12 insertions(+), 3 deletions(-) | ||
19 | |||
20 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/tcg/vec_helper.c | ||
23 | +++ b/target/arm/tcg/vec_helper.c | ||
24 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
25 | CPUARMState *env, uint32_t desc) | ||
26 | { | ||
27 | intptr_t i, oprsz = simd_oprsz(desc); | ||
28 | - uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; | ||
29 | + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
30 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
31 | float_status *status = &env->vfp.fp_status_a64; | ||
32 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); | ||
33 | + int negx = 0, negf = 0; | ||
34 | + | ||
35 | + if (is_s) { | ||
36 | + if (env->vfp.fpcr & FPCR_AH) { | ||
37 | + negf = float_muladd_negate_product; | ||
38 | + } else { | ||
39 | + negx = 0x8000; | ||
40 | + } | ||
41 | + } | ||
42 | |||
43 | for (i = 0; i < oprsz; i += sizeof(float32)) { | ||
44 | - float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negn; | ||
45 | + float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negx; | ||
46 | float16 mm_16 = *(float16 *)(vm + H1_2(i + sel)); | ||
47 | float32 nn = float16_to_float32_by_bits(nn_16, fz16); | ||
48 | float32 mm = float16_to_float32_by_bits(mm_16, fz16); | ||
49 | float32 aa = *(float32 *)(va + H1_4(i)); | ||
50 | |||
51 | - *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, 0, status); | ||
52 | + *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, negf, status); | ||
53 | } | ||
54 | } | ||
55 | |||
56 | -- | ||
57 | 2.34.1 | diff view generated by jsdifflib |
1 | The current ITS code clears GITS_CREADR when GITS_CTLR.ENABLED is set. | 1 | Now that we have completed the handling for FPCR.{AH,FIZ,NEP}, we |
---|---|---|---|
2 | This is not correct -- guest code can validly clear ENABLED and then | 2 | can enable FEAT_AFP for '-cpu max', and document that we support it. |
3 | set it again and expect the ITS to continue processing where it left | ||
4 | off. Remove the erroneous assignment. | ||
5 | 3 | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
8 | Message-id: 20220122182444.724087-5-peter.maydell@linaro.org | ||
9 | --- | 6 | --- |
10 | hw/intc/arm_gicv3_its.c | 1 - | 7 | docs/system/arm/emulation.rst | 1 + |
11 | 1 file changed, 1 deletion(-) | 8 | target/arm/tcg/cpu64.c | 1 + |
9 | 2 files changed, 2 insertions(+) | ||
12 | 10 | ||
13 | diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c | 11 | diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst |
14 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/hw/intc/arm_gicv3_its.c | 13 | --- a/docs/system/arm/emulation.rst |
16 | +++ b/hw/intc/arm_gicv3_its.c | 14 | +++ b/docs/system/arm/emulation.rst |
17 | @@ -XXX,XX +XXX,XX @@ static bool its_writel(GICv3ITSState *s, hwaddr offset, | 15 | @@ -XXX,XX +XXX,XX @@ the following architecture extensions: |
18 | s->ctlr |= R_GITS_CTLR_ENABLED_MASK; | 16 | - FEAT_AA64EL3 (Support for AArch64 at EL3) |
19 | extract_table_params(s); | 17 | - FEAT_AdvSIMD (Advanced SIMD Extension) |
20 | extract_cmdq_params(s); | 18 | - FEAT_AES (AESD and AESE instructions) |
21 | - s->creadr = 0; | 19 | +- FEAT_AFP (Alternate floating-point behavior) |
22 | process_cmdq(s); | 20 | - FEAT_Armv9_Crypto (Armv9 Cryptographic Extension) |
23 | } else { | 21 | - FEAT_ASID16 (16 bit ASID) |
24 | s->ctlr &= ~R_GITS_CTLR_ENABLED_MASK; | 22 | - FEAT_BBM at level 2 (Translation table break-before-make levels) |
23 | diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/target/arm/tcg/cpu64.c | ||
26 | +++ b/target/arm/tcg/cpu64.c | ||
27 | @@ -XXX,XX +XXX,XX @@ void aarch64_max_tcg_initfn(Object *obj) | ||
28 | t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1); /* FEAT_XNX */ | ||
29 | t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 2); /* FEAT_ETS2 */ | ||
30 | t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1); /* FEAT_HCX */ | ||
31 | + t = FIELD_DP64(t, ID_AA64MMFR1, AFP, 1); /* FEAT_AFP */ | ||
32 | t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1); /* FEAT_TIDCP1 */ | ||
33 | t = FIELD_DP64(t, ID_AA64MMFR1, CMOW, 1); /* FEAT_CMOW */ | ||
34 | cpu->isar.id_aa64mmfr1 = t; | ||
25 | -- | 35 | -- |
26 | 2.25.1 | 36 | 2.34.1 |
27 | |||
28 | diff view generated by jsdifflib |
1 | The ITS currently has no tracepoints; add a minimal set | 1 | FEAT_RPRES implements an "increased precision" variant of the single |
---|---|---|---|
2 | that allows basic monitoring of guest register accesses and | 2 | precision FRECPE and FRSQRTE instructions from an 8 bit to a 12 |
3 | reading of commands from the command queue. | 3 | bit mantissa. This applies only when FPCR.AH == 1. Note that the |
4 | halfprec and double versions of these insns retain the 8 bit | ||
5 | precision regardless. | ||
6 | |||
7 | In this commit we add all the plumbing to make these instructions | ||
8 | call a new helper function when the increased-precision is in | ||
9 | effect. In the following commit we will provide the actual change | ||
10 | in behaviour in the helpers. | ||
4 | 11 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
6 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
8 | Message-id: 20220122182444.724087-3-peter.maydell@linaro.org | ||
9 | --- | 14 | --- |
10 | hw/intc/arm_gicv3_its.c | 11 +++++++++++ | 15 | target/arm/cpu-features.h | 5 +++++ |
11 | hw/intc/trace-events | 8 ++++++++ | 16 | target/arm/helper.h | 4 ++++ |
12 | 2 files changed, 19 insertions(+) | 17 | target/arm/tcg/translate-a64.c | 34 ++++++++++++++++++++++++++++++---- |
18 | target/arm/tcg/translate-sve.c | 16 ++++++++++++++-- | ||
19 | target/arm/tcg/vec_helper.c | 2 ++ | ||
20 | target/arm/vfp_helper.c | 32 ++++++++++++++++++++++++++++++-- | ||
21 | 6 files changed, 85 insertions(+), 8 deletions(-) | ||
13 | 22 | ||
14 | diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c | 23 | diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h |
15 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/hw/intc/arm_gicv3_its.c | 25 | --- a/target/arm/cpu-features.h |
17 | +++ b/hw/intc/arm_gicv3_its.c | 26 | +++ b/target/arm/cpu-features.h |
18 | @@ -XXX,XX +XXX,XX @@ | 27 | @@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_mops(const ARMISARegisters *id) |
19 | 28 | return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, MOPS); | |
20 | #include "qemu/osdep.h" | 29 | } |
21 | #include "qemu/log.h" | 30 | |
22 | +#include "trace.h" | 31 | +static inline bool isar_feature_aa64_rpres(const ARMISARegisters *id) |
23 | #include "hw/qdev-properties.h" | 32 | +{ |
24 | #include "hw/intc/arm_gicv3_its_common.h" | 33 | + return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, RPRES); |
25 | #include "gicv3_internal.h" | 34 | +} |
26 | @@ -XXX,XX +XXX,XX @@ static void process_cmdq(GICv3ITSState *s) | 35 | + |
27 | 36 | static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id) | |
28 | cmd = (data & CMD_MASK); | 37 | { |
29 | 38 | /* We always set the AdvSIMD and FP fields identically. */ | |
30 | + trace_gicv3_its_process_command(rd_offset, cmd); | 39 | diff --git a/target/arm/helper.h b/target/arm/helper.h |
31 | + | 40 | index XXXXXXX..XXXXXXX 100644 |
32 | switch (cmd) { | 41 | --- a/target/arm/helper.h |
33 | case GITS_CMD_INT: | 42 | +++ b/target/arm/helper.h |
34 | result = process_its_cmd(s, data, cq_offset, INTERRUPT); | 43 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, fpst) |
35 | @@ -XXX,XX +XXX,XX @@ static MemTxResult gicv3_its_translation_write(void *opaque, hwaddr offset, | 44 | |
36 | bool result = true; | 45 | DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, fpst) |
37 | uint32_t devid = 0; | 46 | DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, fpst) |
38 | 47 | +DEF_HELPER_FLAGS_2(recpe_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst) | |
39 | + trace_gicv3_its_translation_write(offset, data, size, attrs.requester_id); | 48 | DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, fpst) |
40 | + | 49 | DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, fpst) |
41 | switch (offset) { | 50 | DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, fpst) |
42 | case GITS_TRANSLATER: | 51 | +DEF_HELPER_FLAGS_2(rsqrte_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst) |
43 | if (s->ctlr & R_GITS_CTLR_ENABLED_MASK) { | 52 | DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, fpst) |
44 | @@ -XXX,XX +XXX,XX @@ static MemTxResult gicv3_its_read(void *opaque, hwaddr offset, uint64_t *data, | 53 | DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32) |
45 | qemu_log_mask(LOG_GUEST_ERROR, | 54 | DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32) |
46 | "%s: invalid guest read at offset " TARGET_FMT_plx | 55 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_vrintx_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
47 | "size %u\n", __func__, offset, size); | 56 | |
48 | + trace_gicv3_its_badread(offset, size); | 57 | DEF_HELPER_FLAGS_4(gvec_frecpe_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
49 | /* | 58 | DEF_HELPER_FLAGS_4(gvec_frecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
50 | * The spec requires that reserved registers are RAZ/WI; | 59 | +DEF_HELPER_FLAGS_4(gvec_frecpe_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
51 | * so use false returns from leaf functions as a way to | 60 | DEF_HELPER_FLAGS_4(gvec_frecpe_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
52 | @@ -XXX,XX +XXX,XX @@ static MemTxResult gicv3_its_read(void *opaque, hwaddr offset, uint64_t *data, | 61 | |
53 | * the caller, or we'll cause a spurious guest data abort. | 62 | DEF_HELPER_FLAGS_4(gvec_frsqrte_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
54 | */ | 63 | DEF_HELPER_FLAGS_4(gvec_frsqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
55 | *data = 0; | 64 | +DEF_HELPER_FLAGS_4(gvec_frsqrte_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
56 | + } else { | 65 | DEF_HELPER_FLAGS_4(gvec_frsqrte_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
57 | + trace_gicv3_its_read(offset, *data, size); | 66 | |
58 | } | 67 | DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) |
59 | return MEMTX_OK; | 68 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
60 | } | 69 | index XXXXXXX..XXXXXXX 100644 |
61 | @@ -XXX,XX +XXX,XX @@ static MemTxResult gicv3_its_write(void *opaque, hwaddr offset, uint64_t data, | 70 | --- a/target/arm/tcg/translate-a64.c |
62 | qemu_log_mask(LOG_GUEST_ERROR, | 71 | +++ b/target/arm/tcg/translate-a64.c |
63 | "%s: invalid guest write at offset " TARGET_FMT_plx | 72 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frecpe = { |
64 | "size %u\n", __func__, offset, size); | 73 | gen_helper_recpe_f32, |
65 | + trace_gicv3_its_badwrite(offset, data, size); | 74 | gen_helper_recpe_f64, |
66 | /* | 75 | }; |
67 | * The spec requires that reserved registers are RAZ/WI; | 76 | -TRANS(FRECPE_s, do_fp1_scalar_ah, a, &f_scalar_frecpe, -1) |
68 | * so use false returns from leaf functions as a way to | 77 | +static const FPScalar1 f_scalar_frecpe_rpres = { |
69 | * trigger the guest-error logging but don't return it to | 78 | + gen_helper_recpe_f16, |
70 | * the caller, or we'll cause a spurious guest data abort. | 79 | + gen_helper_recpe_rpres_f32, |
71 | */ | 80 | + gen_helper_recpe_f64, |
72 | + } else { | 81 | +}; |
73 | + trace_gicv3_its_write(offset, data, size); | 82 | +TRANS(FRECPE_s, do_fp1_scalar_ah, a, |
74 | } | 83 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? |
75 | return MEMTX_OK; | 84 | + &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1) |
76 | } | 85 | |
77 | diff --git a/hw/intc/trace-events b/hw/intc/trace-events | 86 | static const FPScalar1 f_scalar_frecpx = { |
78 | index XXXXXXX..XXXXXXX 100644 | 87 | gen_helper_frecpx_f16, |
79 | --- a/hw/intc/trace-events | 88 | @@ -XXX,XX +XXX,XX @@ static const FPScalar1 f_scalar_frsqrte = { |
80 | +++ b/hw/intc/trace-events | 89 | gen_helper_rsqrte_f32, |
81 | @@ -XXX,XX +XXX,XX @@ gicv3_redist_badwrite(uint32_t cpu, uint64_t offset, uint64_t data, unsigned siz | 90 | gen_helper_rsqrte_f64, |
82 | gicv3_redist_set_irq(uint32_t cpu, int irq, int level) "GICv3 redistributor 0x%x interrupt %d level changed to %d" | 91 | }; |
83 | gicv3_redist_send_sgi(uint32_t cpu, int irq) "GICv3 redistributor 0x%x pending SGI %d" | 92 | -TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, &f_scalar_frsqrte, -1) |
84 | 93 | +static const FPScalar1 f_scalar_frsqrte_rpres = { | |
85 | +# arm_gicv3_its.c | 94 | + gen_helper_rsqrte_f16, |
86 | +gicv3_its_read(uint64_t offset, uint64_t data, unsigned size) "GICv3 ITS read: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u" | 95 | + gen_helper_rsqrte_rpres_f32, |
87 | +gicv3_its_badread(uint64_t offset, unsigned size) "GICv3 ITS read: offset 0x%" PRIx64 " size %u: error" | 96 | + gen_helper_rsqrte_f64, |
88 | +gicv3_its_write(uint64_t offset, uint64_t data, unsigned size) "GICv3 ITS write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u" | 97 | +}; |
89 | +gicv3_its_badwrite(uint64_t offset, uint64_t data, unsigned size) "GICv3 ITS write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u: error" | 98 | +TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, |
90 | +gicv3_its_translation_write(uint64_t offset, uint64_t data, unsigned size, uint32_t requester_id) "GICv3 ITS TRANSLATER write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u requester_id 0x%x" | 99 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? |
91 | +gicv3_its_process_command(uint32_t rd_offset, uint8_t cmd) "GICv3 ITS: processing command at offset 0x%x: 0x%x" | 100 | + &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1) |
92 | + | 101 | |
93 | # armv7m_nvic.c | 102 | static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) |
94 | nvic_recompute_state(int vectpending, int vectpending_prio, int exception_prio) "NVIC state recomputed: vectpending %d vectpending_prio %d exception_prio %d" | 103 | { |
95 | nvic_recompute_state_secure(int vectpending, bool vectpending_is_s_banked, int vectpending_prio, int exception_prio) "NVIC state recomputed: vectpending %d is_s_banked %d vectpending_prio %d exception_prio %d" | 104 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const f_frecpe[] = { |
105 | gen_helper_gvec_frecpe_s, | ||
106 | gen_helper_gvec_frecpe_d, | ||
107 | }; | ||
108 | -TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) | ||
109 | +static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = { | ||
110 | + gen_helper_gvec_frecpe_h, | ||
111 | + gen_helper_gvec_frecpe_rpres_s, | ||
112 | + gen_helper_gvec_frecpe_d, | ||
113 | +}; | ||
114 | +TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, | ||
115 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe) | ||
116 | |||
117 | static gen_helper_gvec_2_ptr * const f_frsqrte[] = { | ||
118 | gen_helper_gvec_frsqrte_h, | ||
119 | gen_helper_gvec_frsqrte_s, | ||
120 | gen_helper_gvec_frsqrte_d, | ||
121 | }; | ||
122 | -TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) | ||
123 | +static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = { | ||
124 | + gen_helper_gvec_frsqrte_h, | ||
125 | + gen_helper_gvec_frsqrte_rpres_s, | ||
126 | + gen_helper_gvec_frsqrte_d, | ||
127 | +}; | ||
128 | +TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, | ||
129 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte) | ||
130 | |||
131 | static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) | ||
132 | { | ||
133 | diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c | ||
134 | index XXXXXXX..XXXXXXX 100644 | ||
135 | --- a/target/arm/tcg/translate-sve.c | ||
136 | +++ b/target/arm/tcg/translate-sve.c | ||
137 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2_ptr * const frecpe_fns[] = { | ||
138 | NULL, gen_helper_gvec_frecpe_h, | ||
139 | gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, | ||
140 | }; | ||
141 | -TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frecpe_fns[a->esz], a, 0) | ||
142 | +static gen_helper_gvec_2_ptr * const frecpe_rpres_fns[] = { | ||
143 | + NULL, gen_helper_gvec_frecpe_h, | ||
144 | + gen_helper_gvec_frecpe_rpres_s, gen_helper_gvec_frecpe_d, | ||
145 | +}; | ||
146 | +TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, | ||
147 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? | ||
148 | + frecpe_rpres_fns[a->esz] : frecpe_fns[a->esz], a, 0) | ||
149 | |||
150 | static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { | ||
151 | NULL, gen_helper_gvec_frsqrte_h, | ||
152 | gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, | ||
153 | }; | ||
154 | -TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, frsqrte_fns[a->esz], a, 0) | ||
155 | +static gen_helper_gvec_2_ptr * const frsqrte_rpres_fns[] = { | ||
156 | + NULL, gen_helper_gvec_frsqrte_h, | ||
157 | + gen_helper_gvec_frsqrte_rpres_s, gen_helper_gvec_frsqrte_d, | ||
158 | +}; | ||
159 | +TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, | ||
160 | + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? | ||
161 | + frsqrte_rpres_fns[a->esz] : frsqrte_fns[a->esz], a, 0) | ||
162 | |||
163 | /* | ||
164 | *** SVE Floating Point Compare with Zero Group | ||
165 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
166 | index XXXXXXX..XXXXXXX 100644 | ||
167 | --- a/target/arm/tcg/vec_helper.c | ||
168 | +++ b/target/arm/tcg/vec_helper.c | ||
169 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, float_status *stat, uint32_t desc) \ | ||
170 | |||
171 | DO_2OP(gvec_frecpe_h, helper_recpe_f16, float16) | ||
172 | DO_2OP(gvec_frecpe_s, helper_recpe_f32, float32) | ||
173 | +DO_2OP(gvec_frecpe_rpres_s, helper_recpe_rpres_f32, float32) | ||
174 | DO_2OP(gvec_frecpe_d, helper_recpe_f64, float64) | ||
175 | |||
176 | DO_2OP(gvec_frsqrte_h, helper_rsqrte_f16, float16) | ||
177 | DO_2OP(gvec_frsqrte_s, helper_rsqrte_f32, float32) | ||
178 | +DO_2OP(gvec_frsqrte_rpres_s, helper_rsqrte_rpres_f32, float32) | ||
179 | DO_2OP(gvec_frsqrte_d, helper_rsqrte_f64, float64) | ||
180 | |||
181 | DO_2OP(gvec_vrintx_h, float16_round_to_int, float16) | ||
182 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
183 | index XXXXXXX..XXXXXXX 100644 | ||
184 | --- a/target/arm/vfp_helper.c | ||
185 | +++ b/target/arm/vfp_helper.c | ||
186 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst) | ||
187 | return make_float16(f16_val); | ||
188 | } | ||
189 | |||
190 | -float32 HELPER(recpe_f32)(float32 input, float_status *fpst) | ||
191 | +/* | ||
192 | + * FEAT_RPRES means the f32 FRECPE has an "increased precision" variant | ||
193 | + * which is used when FPCR.AH == 1. | ||
194 | + */ | ||
195 | +static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres) | ||
196 | { | ||
197 | float32 f32 = float32_squash_input_denormal(input, fpst); | ||
198 | uint32_t f32_val = float32_val(f32); | ||
199 | @@ -XXX,XX +XXX,XX @@ float32 HELPER(recpe_f32)(float32 input, float_status *fpst) | ||
200 | return make_float32(f32_val); | ||
201 | } | ||
202 | |||
203 | +float32 HELPER(recpe_f32)(float32 input, float_status *fpst) | ||
204 | +{ | ||
205 | + return do_recpe_f32(input, fpst, false); | ||
206 | +} | ||
207 | + | ||
208 | +float32 HELPER(recpe_rpres_f32)(float32 input, float_status *fpst) | ||
209 | +{ | ||
210 | + return do_recpe_f32(input, fpst, true); | ||
211 | +} | ||
212 | + | ||
213 | float64 HELPER(recpe_f64)(float64 input, float_status *fpst) | ||
214 | { | ||
215 | float64 f64 = float64_squash_input_denormal(input, fpst); | ||
216 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s) | ||
217 | return make_float16(val); | ||
218 | } | ||
219 | |||
220 | -float32 HELPER(rsqrte_f32)(float32 input, float_status *s) | ||
221 | +/* | ||
222 | + * FEAT_RPRES means the f32 FRSQRTE has an "increased precision" variant | ||
223 | + * which is used when FPCR.AH == 1. | ||
224 | + */ | ||
225 | +static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres) | ||
226 | { | ||
227 | float32 f32 = float32_squash_input_denormal(input, s); | ||
228 | uint32_t val = float32_val(f32); | ||
229 | @@ -XXX,XX +XXX,XX @@ float32 HELPER(rsqrte_f32)(float32 input, float_status *s) | ||
230 | return make_float32(val); | ||
231 | } | ||
232 | |||
233 | +float32 HELPER(rsqrte_f32)(float32 input, float_status *s) | ||
234 | +{ | ||
235 | + return do_rsqrte_f32(input, s, false); | ||
236 | +} | ||
237 | + | ||
238 | +float32 HELPER(rsqrte_rpres_f32)(float32 input, float_status *s) | ||
239 | +{ | ||
240 | + return do_rsqrte_f32(input, s, true); | ||
241 | +} | ||
242 | + | ||
243 | float64 HELPER(rsqrte_f64)(float64 input, float_status *s) | ||
244 | { | ||
245 | float64 f64 = float64_squash_input_denormal(input, s); | ||
96 | -- | 246 | -- |
97 | 2.25.1 | 247 | 2.34.1 |
98 | |||
99 | diff view generated by jsdifflib |
1 | softmmu/rtc.c defines two public functions: qemu_get_timedate() and | 1 | Implement the increased precision variation of FRECPE. In the |
---|---|---|---|
2 | qemu_timedate_diff(). Currently we keep the prototypes for these in | 2 | pseudocode this corresponds to the handling of the |
3 | qemu-common.h, but most files don't need them. Move them to their | 3 | "increasedprecision" boolean in the FPRecipEstimate() and |
4 | own header, a new include/sysemu/rtc.h. | 4 | RecipEstimate() functions. |
5 | |||
6 | Since the C files using these two functions did not need to include | ||
7 | qemu-common.h for any other reason, we can remove those include lines | ||
8 | when we add the include of the new rtc.h. | ||
9 | |||
10 | The license for the .h file follows that of the softmmu/rtc.c | ||
11 | where both the functions are defined. | ||
12 | 5 | ||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
14 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
15 | --- | 8 | --- |
16 | include/qemu-common.h | 3 --- | 9 | target/arm/vfp_helper.c | 54 +++++++++++++++++++++++++++++++++++------ |
17 | include/sysemu/rtc.h | 58 ++++++++++++++++++++++++++++++++++++++++ | 10 | 1 file changed, 46 insertions(+), 8 deletions(-) |
18 | hw/arm/omap1.c | 2 +- | ||
19 | hw/arm/pxa2xx.c | 2 +- | ||
20 | hw/arm/strongarm.c | 2 +- | ||
21 | hw/misc/mac_via.c | 2 +- | ||
22 | hw/misc/macio/cuda.c | 2 +- | ||
23 | hw/misc/macio/pmu.c | 2 +- | ||
24 | hw/ppc/spapr_rtc.c | 2 +- | ||
25 | hw/rtc/allwinner-rtc.c | 2 +- | ||
26 | hw/rtc/aspeed_rtc.c | 2 +- | ||
27 | hw/rtc/ds1338.c | 2 +- | ||
28 | hw/rtc/exynos4210_rtc.c | 2 +- | ||
29 | hw/rtc/goldfish_rtc.c | 2 +- | ||
30 | hw/rtc/m41t80.c | 2 +- | ||
31 | hw/rtc/m48t59.c | 2 +- | ||
32 | hw/rtc/mc146818rtc.c | 2 +- | ||
33 | hw/rtc/pl031.c | 2 +- | ||
34 | hw/rtc/twl92230.c | 2 +- | ||
35 | hw/rtc/xlnx-zynqmp-rtc.c | 2 +- | ||
36 | hw/s390x/tod-tcg.c | 2 +- | ||
37 | hw/scsi/megasas.c | 2 +- | ||
38 | net/dump.c | 2 +- | ||
39 | softmmu/rtc.c | 2 +- | ||
40 | 24 files changed, 80 insertions(+), 25 deletions(-) | ||
41 | create mode 100644 include/sysemu/rtc.h | ||
42 | 11 | ||
43 | diff --git a/include/qemu-common.h b/include/qemu-common.h | 12 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
44 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
45 | --- a/include/qemu-common.h | 14 | --- a/target/arm/vfp_helper.c |
46 | +++ b/include/qemu-common.h | 15 | +++ b/target/arm/vfp_helper.c |
47 | @@ -XXX,XX +XXX,XX @@ | 16 | @@ -XXX,XX +XXX,XX @@ static int recip_estimate(int input) |
48 | int qemu_main(int argc, char **argv, char **envp); | 17 | return r; |
49 | #endif | 18 | } |
50 | 19 | ||
51 | -void qemu_get_timedate(struct tm *tm, int offset); | ||
52 | -int qemu_timedate_diff(struct tm *tm); | ||
53 | - | ||
54 | void *qemu_oom_check(void *ptr); | ||
55 | |||
56 | ssize_t qemu_write_full(int fd, const void *buf, size_t count) | ||
57 | diff --git a/include/sysemu/rtc.h b/include/sysemu/rtc.h | ||
58 | new file mode 100644 | ||
59 | index XXXXXXX..XXXXXXX | ||
60 | --- /dev/null | ||
61 | +++ b/include/sysemu/rtc.h | ||
62 | @@ -XXX,XX +XXX,XX @@ | ||
63 | +/* | 20 | +/* |
64 | + * RTC configuration and clock read | 21 | + * Increased precision version: |
65 | + * | 22 | + * input is a 13 bit fixed point number |
66 | + * Copyright (c) 2003-2021 QEMU contributors | 23 | + * input range 2048 .. 4095 for a number from 0.5 <= x < 1.0. |
67 | + * | 24 | + * result range 4096 .. 8191 for a number from 1.0 to 2.0 |
68 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
69 | + * of this software and associated documentation files (the "Software"), to deal | ||
70 | + * in the Software without restriction, including without limitation the rights | ||
71 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
72 | + * copies of the Software, and to permit persons to whom the Software is | ||
73 | + * furnished to do so, subject to the following conditions: | ||
74 | + * | ||
75 | + * The above copyright notice and this permission notice shall be included in | ||
76 | + * all copies or substantial portions of the Software. | ||
77 | + * | ||
78 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
79 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
80 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
81 | + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
82 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
83 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
84 | + * THE SOFTWARE. | ||
85 | + */ | 25 | + */ |
26 | +static int recip_estimate_incprec(int input) | ||
27 | +{ | ||
28 | + int a, b, r; | ||
29 | + assert(2048 <= input && input < 4096); | ||
30 | + a = (input * 2) + 1; | ||
31 | + /* | ||
32 | + * The pseudocode expresses this as an operation on infinite | ||
33 | + * precision reals where it calculates 2^25 / a and then looks | ||
34 | + * at the error between that and the rounded-down-to-integer | ||
35 | + * value to see if it should instead round up. We instead | ||
36 | + * follow the same approach as the pseudocode for the 8-bit | ||
37 | + * precision version, and calculate (2 * (2^25 / a)) as an | ||
38 | + * integer so we can do the "add one and halve" to round it. | ||
39 | + * So the 1 << 26 here is correct. | ||
40 | + */ | ||
41 | + b = (1 << 26) / a; | ||
42 | + r = (b + 1) >> 1; | ||
43 | + assert(4096 <= r && r < 8192); | ||
44 | + return r; | ||
45 | +} | ||
86 | + | 46 | + |
87 | +#ifndef SYSEMU_RTC_H | 47 | /* |
88 | +#define SYSEMU_RTC_H | 48 | * Common wrapper to call recip_estimate |
89 | + | 49 | * |
90 | +/** | 50 | @@ -XXX,XX +XXX,XX @@ static int recip_estimate(int input) |
91 | + * qemu_get_timedate: Get the current RTC time | 51 | * callee. |
92 | + * @tm: struct tm to fill in with RTC time | ||
93 | + * @offset: offset in seconds to adjust the RTC time by before | ||
94 | + * converting to struct tm format. | ||
95 | + * | ||
96 | + * This function fills in @tm with the current RTC time, as adjusted | ||
97 | + * by @offset (for example, if @offset is 3600 then the returned time/date | ||
98 | + * will be one hour further ahead than the current RTC time). | ||
99 | + * | ||
100 | + * The usual use is by RTC device models, which should call this function | ||
101 | + * to find the time/date value that they should return to the guest | ||
102 | + * when it reads the RTC registers. | ||
103 | + * | ||
104 | + * The behaviour of the clock whose value this function returns will | ||
105 | + * depend on the -rtc command line option passed by the user. | ||
106 | + */ | ||
107 | +void qemu_get_timedate(struct tm *tm, int offset); | ||
108 | + | ||
109 | +/** | ||
110 | + * qemu_timedate_diff: Return difference between a struct tm and the RTC | ||
111 | + * @tm: struct tm containing the date/time to compare against | ||
112 | + * | ||
113 | + * Returns the difference in seconds between the RTC clock time | ||
114 | + * and the date/time specified in @tm. For example, if @tm specifies | ||
115 | + * a timestamp one hour further ahead than the current RTC time | ||
116 | + * then this function will return 3600. | ||
117 | + */ | ||
118 | +int qemu_timedate_diff(struct tm *tm); | ||
119 | + | ||
120 | +#endif | ||
121 | diff --git a/hw/arm/omap1.c b/hw/arm/omap1.c | ||
122 | index XXXXXXX..XXXXXXX 100644 | ||
123 | --- a/hw/arm/omap1.c | ||
124 | +++ b/hw/arm/omap1.c | ||
125 | @@ -XXX,XX +XXX,XX @@ | ||
126 | #include "qemu/error-report.h" | ||
127 | #include "qemu/main-loop.h" | ||
128 | #include "qapi/error.h" | ||
129 | -#include "qemu-common.h" | ||
130 | #include "cpu.h" | ||
131 | #include "exec/address-spaces.h" | ||
132 | #include "hw/hw.h" | ||
133 | @@ -XXX,XX +XXX,XX @@ | ||
134 | #include "sysemu/qtest.h" | ||
135 | #include "sysemu/reset.h" | ||
136 | #include "sysemu/runstate.h" | ||
137 | +#include "sysemu/rtc.h" | ||
138 | #include "qemu/range.h" | ||
139 | #include "hw/sysbus.h" | ||
140 | #include "qemu/cutils.h" | ||
141 | diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c | ||
142 | index XXXXXXX..XXXXXXX 100644 | ||
143 | --- a/hw/arm/pxa2xx.c | ||
144 | +++ b/hw/arm/pxa2xx.c | ||
145 | @@ -XXX,XX +XXX,XX @@ | ||
146 | */ | 52 | */ |
147 | 53 | ||
148 | #include "qemu/osdep.h" | 54 | -static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac) |
149 | -#include "qemu-common.h" | 55 | +static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac, |
150 | #include "qemu/error-report.h" | 56 | + bool increasedprecision) |
151 | #include "qemu/module.h" | 57 | { |
152 | #include "qapi/error.h" | 58 | uint32_t scaled, estimate; |
153 | @@ -XXX,XX +XXX,XX @@ | 59 | uint64_t result_frac; |
154 | #include "chardev/char-fe.h" | 60 | @@ -XXX,XX +XXX,XX @@ static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac) |
155 | #include "sysemu/blockdev.h" | 61 | } |
156 | #include "sysemu/qtest.h" | 62 | } |
157 | +#include "sysemu/rtc.h" | 63 | |
158 | #include "qemu/cutils.h" | 64 | - /* scaled = UInt('1':fraction<51:44>) */ |
159 | #include "qemu/log.h" | 65 | - scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); |
160 | #include "qom/object.h" | 66 | - estimate = recip_estimate(scaled); |
161 | diff --git a/hw/arm/strongarm.c b/hw/arm/strongarm.c | 67 | + if (increasedprecision) { |
162 | index XXXXXXX..XXXXXXX 100644 | 68 | + /* scaled = UInt('1':fraction<51:41>) */ |
163 | --- a/hw/arm/strongarm.c | 69 | + scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11)); |
164 | +++ b/hw/arm/strongarm.c | 70 | + estimate = recip_estimate_incprec(scaled); |
165 | @@ -XXX,XX +XXX,XX @@ | 71 | + } else { |
166 | */ | 72 | + /* scaled = UInt('1':fraction<51:44>) */ |
167 | 73 | + scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); | |
168 | #include "qemu/osdep.h" | 74 | + estimate = recip_estimate(scaled); |
169 | -#include "qemu-common.h" | 75 | + } |
170 | #include "cpu.h" | 76 | |
171 | #include "hw/irq.h" | 77 | result_exp = exp_off - *exp; |
172 | #include "hw/qdev-properties.h" | 78 | - result_frac = deposit64(0, 44, 8, estimate); |
173 | @@ -XXX,XX +XXX,XX @@ | 79 | + if (increasedprecision) { |
174 | #include "chardev/char-fe.h" | 80 | + result_frac = deposit64(0, 40, 12, estimate); |
175 | #include "chardev/char-serial.h" | 81 | + } else { |
176 | #include "sysemu/sysemu.h" | 82 | + result_frac = deposit64(0, 44, 8, estimate); |
177 | +#include "sysemu/rtc.h" | 83 | + } |
178 | #include "hw/ssi/ssi.h" | 84 | if (result_exp == 0) { |
179 | #include "qapi/error.h" | 85 | result_frac = deposit64(result_frac >> 1, 51, 1, 1); |
180 | #include "qemu/cutils.h" | 86 | } else if (result_exp == -1) { |
181 | diff --git a/hw/misc/mac_via.c b/hw/misc/mac_via.c | 87 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst) |
182 | index XXXXXXX..XXXXXXX 100644 | 88 | } |
183 | --- a/hw/misc/mac_via.c | 89 | |
184 | +++ b/hw/misc/mac_via.c | 90 | f64_frac = call_recip_estimate(&f16_exp, 29, |
185 | @@ -XXX,XX +XXX,XX @@ | 91 | - ((uint64_t) f16_frac) << (52 - 10)); |
186 | */ | 92 | + ((uint64_t) f16_frac) << (52 - 10), false); |
187 | 93 | ||
188 | #include "qemu/osdep.h" | 94 | /* result = sign : result_exp<4:0> : fraction<51:42> */ |
189 | -#include "qemu-common.h" | 95 | f16_val = deposit32(0, 15, 1, f16_sign); |
190 | #include "migration/vmstate.h" | 96 | @@ -XXX,XX +XXX,XX @@ static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres) |
191 | #include "hw/sysbus.h" | 97 | } |
192 | #include "hw/irq.h" | 98 | |
193 | @@ -XXX,XX +XXX,XX @@ | 99 | f64_frac = call_recip_estimate(&f32_exp, 253, |
194 | #include "hw/qdev-properties.h" | 100 | - ((uint64_t) f32_frac) << (52 - 23)); |
195 | #include "hw/qdev-properties-system.h" | 101 | + ((uint64_t) f32_frac) << (52 - 23), rpres); |
196 | #include "sysemu/block-backend.h" | 102 | |
197 | +#include "sysemu/rtc.h" | 103 | /* result = sign : result_exp<7:0> : fraction<51:29> */ |
198 | #include "trace.h" | 104 | f32_val = deposit32(0, 31, 1, f32_sign); |
199 | #include "qemu/log.h" | 105 | @@ -XXX,XX +XXX,XX @@ float64 HELPER(recpe_f64)(float64 input, float_status *fpst) |
200 | 106 | return float64_set_sign(float64_zero, float64_is_neg(f64)); | |
201 | diff --git a/hw/misc/macio/cuda.c b/hw/misc/macio/cuda.c | 107 | } |
202 | index XXXXXXX..XXXXXXX 100644 | 108 | |
203 | --- a/hw/misc/macio/cuda.c | 109 | - f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac); |
204 | +++ b/hw/misc/macio/cuda.c | 110 | + f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac, false); |
205 | @@ -XXX,XX +XXX,XX @@ | 111 | |
206 | */ | 112 | /* result = sign : result_exp<10:0> : fraction<51:0>; */ |
207 | 113 | f64_val = deposit64(0, 63, 1, f64_sign); | |
208 | #include "qemu/osdep.h" | ||
209 | -#include "qemu-common.h" | ||
210 | #include "hw/ppc/mac.h" | ||
211 | #include "hw/qdev-properties.h" | ||
212 | #include "migration/vmstate.h" | ||
213 | @@ -XXX,XX +XXX,XX @@ | ||
214 | #include "qapi/error.h" | ||
215 | #include "qemu/timer.h" | ||
216 | #include "sysemu/runstate.h" | ||
217 | +#include "sysemu/rtc.h" | ||
218 | #include "qapi/error.h" | ||
219 | #include "qemu/cutils.h" | ||
220 | #include "qemu/log.h" | ||
221 | diff --git a/hw/misc/macio/pmu.c b/hw/misc/macio/pmu.c | ||
222 | index XXXXXXX..XXXXXXX 100644 | ||
223 | --- a/hw/misc/macio/pmu.c | ||
224 | +++ b/hw/misc/macio/pmu.c | ||
225 | @@ -XXX,XX +XXX,XX @@ | ||
226 | */ | ||
227 | |||
228 | #include "qemu/osdep.h" | ||
229 | -#include "qemu-common.h" | ||
230 | #include "hw/ppc/mac.h" | ||
231 | #include "hw/qdev-properties.h" | ||
232 | #include "migration/vmstate.h" | ||
233 | @@ -XXX,XX +XXX,XX @@ | ||
234 | #include "qapi/error.h" | ||
235 | #include "qemu/timer.h" | ||
236 | #include "sysemu/runstate.h" | ||
237 | +#include "sysemu/rtc.h" | ||
238 | #include "qapi/error.h" | ||
239 | #include "qemu/cutils.h" | ||
240 | #include "qemu/log.h" | ||
241 | diff --git a/hw/ppc/spapr_rtc.c b/hw/ppc/spapr_rtc.c | ||
242 | index XXXXXXX..XXXXXXX 100644 | ||
243 | --- a/hw/ppc/spapr_rtc.c | ||
244 | +++ b/hw/ppc/spapr_rtc.c | ||
245 | @@ -XXX,XX +XXX,XX @@ | ||
246 | */ | ||
247 | |||
248 | #include "qemu/osdep.h" | ||
249 | -#include "qemu-common.h" | ||
250 | #include "qemu/timer.h" | ||
251 | #include "sysemu/sysemu.h" | ||
252 | +#include "sysemu/rtc.h" | ||
253 | #include "hw/ppc/spapr.h" | ||
254 | #include "migration/vmstate.h" | ||
255 | #include "qapi/error.h" | ||
256 | diff --git a/hw/rtc/allwinner-rtc.c b/hw/rtc/allwinner-rtc.c | ||
257 | index XXXXXXX..XXXXXXX 100644 | ||
258 | --- a/hw/rtc/allwinner-rtc.c | ||
259 | +++ b/hw/rtc/allwinner-rtc.c | ||
260 | @@ -XXX,XX +XXX,XX @@ | ||
261 | #include "migration/vmstate.h" | ||
262 | #include "qemu/log.h" | ||
263 | #include "qemu/module.h" | ||
264 | -#include "qemu-common.h" | ||
265 | #include "hw/qdev-properties.h" | ||
266 | #include "hw/rtc/allwinner-rtc.h" | ||
267 | +#include "sysemu/rtc.h" | ||
268 | #include "trace.h" | ||
269 | |||
270 | /* RTC registers */ | ||
271 | diff --git a/hw/rtc/aspeed_rtc.c b/hw/rtc/aspeed_rtc.c | ||
272 | index XXXXXXX..XXXXXXX 100644 | ||
273 | --- a/hw/rtc/aspeed_rtc.c | ||
274 | +++ b/hw/rtc/aspeed_rtc.c | ||
275 | @@ -XXX,XX +XXX,XX @@ | ||
276 | */ | ||
277 | |||
278 | #include "qemu/osdep.h" | ||
279 | -#include "qemu-common.h" | ||
280 | #include "hw/rtc/aspeed_rtc.h" | ||
281 | #include "migration/vmstate.h" | ||
282 | #include "qemu/log.h" | ||
283 | #include "qemu/timer.h" | ||
284 | +#include "sysemu/rtc.h" | ||
285 | |||
286 | #include "trace.h" | ||
287 | |||
288 | diff --git a/hw/rtc/ds1338.c b/hw/rtc/ds1338.c | ||
289 | index XXXXXXX..XXXXXXX 100644 | ||
290 | --- a/hw/rtc/ds1338.c | ||
291 | +++ b/hw/rtc/ds1338.c | ||
292 | @@ -XXX,XX +XXX,XX @@ | ||
293 | */ | ||
294 | |||
295 | #include "qemu/osdep.h" | ||
296 | -#include "qemu-common.h" | ||
297 | #include "hw/i2c/i2c.h" | ||
298 | #include "migration/vmstate.h" | ||
299 | #include "qemu/bcd.h" | ||
300 | #include "qemu/module.h" | ||
301 | #include "qom/object.h" | ||
302 | +#include "sysemu/rtc.h" | ||
303 | |||
304 | /* Size of NVRAM including both the user-accessible area and the | ||
305 | * secondary register area. | ||
306 | diff --git a/hw/rtc/exynos4210_rtc.c b/hw/rtc/exynos4210_rtc.c | ||
307 | index XXXXXXX..XXXXXXX 100644 | ||
308 | --- a/hw/rtc/exynos4210_rtc.c | ||
309 | +++ b/hw/rtc/exynos4210_rtc.c | ||
310 | @@ -XXX,XX +XXX,XX @@ | ||
311 | */ | ||
312 | |||
313 | #include "qemu/osdep.h" | ||
314 | -#include "qemu-common.h" | ||
315 | #include "qemu/log.h" | ||
316 | #include "qemu/module.h" | ||
317 | #include "hw/sysbus.h" | ||
318 | @@ -XXX,XX +XXX,XX @@ | ||
319 | |||
320 | #include "hw/arm/exynos4210.h" | ||
321 | #include "qom/object.h" | ||
322 | +#include "sysemu/rtc.h" | ||
323 | |||
324 | #define DEBUG_RTC 0 | ||
325 | |||
326 | diff --git a/hw/rtc/goldfish_rtc.c b/hw/rtc/goldfish_rtc.c | ||
327 | index XXXXXXX..XXXXXXX 100644 | ||
328 | --- a/hw/rtc/goldfish_rtc.c | ||
329 | +++ b/hw/rtc/goldfish_rtc.c | ||
330 | @@ -XXX,XX +XXX,XX @@ | ||
331 | */ | ||
332 | |||
333 | #include "qemu/osdep.h" | ||
334 | -#include "qemu-common.h" | ||
335 | #include "hw/rtc/goldfish_rtc.h" | ||
336 | #include "migration/vmstate.h" | ||
337 | #include "hw/irq.h" | ||
338 | @@ -XXX,XX +XXX,XX @@ | ||
339 | #include "qemu/bitops.h" | ||
340 | #include "qemu/timer.h" | ||
341 | #include "sysemu/sysemu.h" | ||
342 | +#include "sysemu/rtc.h" | ||
343 | #include "qemu/cutils.h" | ||
344 | #include "qemu/log.h" | ||
345 | |||
346 | diff --git a/hw/rtc/m41t80.c b/hw/rtc/m41t80.c | ||
347 | index XXXXXXX..XXXXXXX 100644 | ||
348 | --- a/hw/rtc/m41t80.c | ||
349 | +++ b/hw/rtc/m41t80.c | ||
350 | @@ -XXX,XX +XXX,XX @@ | ||
351 | */ | ||
352 | |||
353 | #include "qemu/osdep.h" | ||
354 | -#include "qemu-common.h" | ||
355 | #include "qemu/log.h" | ||
356 | #include "qemu/module.h" | ||
357 | #include "qemu/timer.h" | ||
358 | #include "qemu/bcd.h" | ||
359 | #include "hw/i2c/i2c.h" | ||
360 | #include "qom/object.h" | ||
361 | +#include "sysemu/rtc.h" | ||
362 | |||
363 | #define TYPE_M41T80 "m41t80" | ||
364 | OBJECT_DECLARE_SIMPLE_TYPE(M41t80State, M41T80) | ||
365 | diff --git a/hw/rtc/m48t59.c b/hw/rtc/m48t59.c | ||
366 | index XXXXXXX..XXXXXXX 100644 | ||
367 | --- a/hw/rtc/m48t59.c | ||
368 | +++ b/hw/rtc/m48t59.c | ||
369 | @@ -XXX,XX +XXX,XX @@ | ||
370 | */ | ||
371 | |||
372 | #include "qemu/osdep.h" | ||
373 | -#include "qemu-common.h" | ||
374 | #include "hw/irq.h" | ||
375 | #include "hw/qdev-properties.h" | ||
376 | #include "hw/rtc/m48t59.h" | ||
377 | #include "qemu/timer.h" | ||
378 | #include "sysemu/runstate.h" | ||
379 | +#include "sysemu/rtc.h" | ||
380 | #include "sysemu/sysemu.h" | ||
381 | #include "hw/sysbus.h" | ||
382 | #include "qapi/error.h" | ||
383 | diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c | ||
384 | index XXXXXXX..XXXXXXX 100644 | ||
385 | --- a/hw/rtc/mc146818rtc.c | ||
386 | +++ b/hw/rtc/mc146818rtc.c | ||
387 | @@ -XXX,XX +XXX,XX @@ | ||
388 | */ | ||
389 | |||
390 | #include "qemu/osdep.h" | ||
391 | -#include "qemu-common.h" | ||
392 | #include "qemu/cutils.h" | ||
393 | #include "qemu/module.h" | ||
394 | #include "qemu/bcd.h" | ||
395 | @@ -XXX,XX +XXX,XX @@ | ||
396 | #include "sysemu/replay.h" | ||
397 | #include "sysemu/reset.h" | ||
398 | #include "sysemu/runstate.h" | ||
399 | +#include "sysemu/rtc.h" | ||
400 | #include "hw/rtc/mc146818rtc.h" | ||
401 | #include "hw/rtc/mc146818rtc_regs.h" | ||
402 | #include "migration/vmstate.h" | ||
403 | diff --git a/hw/rtc/pl031.c b/hw/rtc/pl031.c | ||
404 | index XXXXXXX..XXXXXXX 100644 | ||
405 | --- a/hw/rtc/pl031.c | ||
406 | +++ b/hw/rtc/pl031.c | ||
407 | @@ -XXX,XX +XXX,XX @@ | ||
408 | */ | ||
409 | |||
410 | #include "qemu/osdep.h" | ||
411 | -#include "qemu-common.h" | ||
412 | #include "hw/rtc/pl031.h" | ||
413 | #include "migration/vmstate.h" | ||
414 | #include "hw/irq.h" | ||
415 | @@ -XXX,XX +XXX,XX @@ | ||
416 | #include "hw/sysbus.h" | ||
417 | #include "qemu/timer.h" | ||
418 | #include "sysemu/sysemu.h" | ||
419 | +#include "sysemu/rtc.h" | ||
420 | #include "qemu/cutils.h" | ||
421 | #include "qemu/log.h" | ||
422 | #include "qemu/module.h" | ||
423 | diff --git a/hw/rtc/twl92230.c b/hw/rtc/twl92230.c | ||
424 | index XXXXXXX..XXXXXXX 100644 | ||
425 | --- a/hw/rtc/twl92230.c | ||
426 | +++ b/hw/rtc/twl92230.c | ||
427 | @@ -XXX,XX +XXX,XX @@ | ||
428 | */ | ||
429 | |||
430 | #include "qemu/osdep.h" | ||
431 | -#include "qemu-common.h" | ||
432 | #include "qemu/timer.h" | ||
433 | #include "hw/i2c/i2c.h" | ||
434 | #include "hw/irq.h" | ||
435 | #include "migration/qemu-file-types.h" | ||
436 | #include "migration/vmstate.h" | ||
437 | #include "sysemu/sysemu.h" | ||
438 | +#include "sysemu/rtc.h" | ||
439 | #include "qemu/bcd.h" | ||
440 | #include "qemu/module.h" | ||
441 | #include "qom/object.h" | ||
442 | diff --git a/hw/rtc/xlnx-zynqmp-rtc.c b/hw/rtc/xlnx-zynqmp-rtc.c | ||
443 | index XXXXXXX..XXXXXXX 100644 | ||
444 | --- a/hw/rtc/xlnx-zynqmp-rtc.c | ||
445 | +++ b/hw/rtc/xlnx-zynqmp-rtc.c | ||
446 | @@ -XXX,XX +XXX,XX @@ | ||
447 | */ | ||
448 | |||
449 | #include "qemu/osdep.h" | ||
450 | -#include "qemu-common.h" | ||
451 | #include "hw/sysbus.h" | ||
452 | #include "hw/register.h" | ||
453 | #include "qemu/bitops.h" | ||
454 | @@ -XXX,XX +XXX,XX @@ | ||
455 | #include "hw/irq.h" | ||
456 | #include "qemu/cutils.h" | ||
457 | #include "sysemu/sysemu.h" | ||
458 | +#include "sysemu/rtc.h" | ||
459 | #include "trace.h" | ||
460 | #include "hw/rtc/xlnx-zynqmp-rtc.h" | ||
461 | #include "migration/vmstate.h" | ||
462 | diff --git a/hw/s390x/tod-tcg.c b/hw/s390x/tod-tcg.c | ||
463 | index XXXXXXX..XXXXXXX 100644 | ||
464 | --- a/hw/s390x/tod-tcg.c | ||
465 | +++ b/hw/s390x/tod-tcg.c | ||
466 | @@ -XXX,XX +XXX,XX @@ | ||
467 | */ | ||
468 | |||
469 | #include "qemu/osdep.h" | ||
470 | -#include "qemu-common.h" | ||
471 | #include "qapi/error.h" | ||
472 | #include "hw/s390x/tod.h" | ||
473 | #include "qemu/timer.h" | ||
474 | @@ -XXX,XX +XXX,XX @@ | ||
475 | #include "qemu/module.h" | ||
476 | #include "cpu.h" | ||
477 | #include "tcg/tcg_s390x.h" | ||
478 | +#include "sysemu/rtc.h" | ||
479 | |||
480 | static void qemu_s390_tod_get(const S390TODState *td, S390TOD *tod, | ||
481 | Error **errp) | ||
482 | diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c | ||
483 | index XXXXXXX..XXXXXXX 100644 | ||
484 | --- a/hw/scsi/megasas.c | ||
485 | +++ b/hw/scsi/megasas.c | ||
486 | @@ -XXX,XX +XXX,XX @@ | ||
487 | */ | ||
488 | |||
489 | #include "qemu/osdep.h" | ||
490 | -#include "qemu-common.h" | ||
491 | #include "hw/pci/pci.h" | ||
492 | #include "hw/qdev-properties.h" | ||
493 | #include "sysemu/dma.h" | ||
494 | #include "sysemu/block-backend.h" | ||
495 | +#include "sysemu/rtc.h" | ||
496 | #include "hw/pci/msi.h" | ||
497 | #include "hw/pci/msix.h" | ||
498 | #include "qemu/iov.h" | ||
499 | diff --git a/net/dump.c b/net/dump.c | ||
500 | index XXXXXXX..XXXXXXX 100644 | ||
501 | --- a/net/dump.c | ||
502 | +++ b/net/dump.c | ||
503 | @@ -XXX,XX +XXX,XX @@ | ||
504 | */ | ||
505 | |||
506 | #include "qemu/osdep.h" | ||
507 | -#include "qemu-common.h" | ||
508 | #include "clients.h" | ||
509 | #include "qapi/error.h" | ||
510 | #include "qemu/error-report.h" | ||
511 | @@ -XXX,XX +XXX,XX @@ | ||
512 | #include "qapi/visitor.h" | ||
513 | #include "net/filter.h" | ||
514 | #include "qom/object.h" | ||
515 | +#include "sysemu/rtc.h" | ||
516 | |||
517 | typedef struct DumpState { | ||
518 | int64_t start_ts; | ||
519 | diff --git a/softmmu/rtc.c b/softmmu/rtc.c | ||
520 | index XXXXXXX..XXXXXXX 100644 | ||
521 | --- a/softmmu/rtc.c | ||
522 | +++ b/softmmu/rtc.c | ||
523 | @@ -XXX,XX +XXX,XX @@ | ||
524 | */ | ||
525 | |||
526 | #include "qemu/osdep.h" | ||
527 | -#include "qemu-common.h" | ||
528 | #include "qemu/cutils.h" | ||
529 | #include "qapi/error.h" | ||
530 | #include "qapi/qmp/qerror.h" | ||
531 | @@ -XXX,XX +XXX,XX @@ | ||
532 | #include "qom/object.h" | ||
533 | #include "sysemu/replay.h" | ||
534 | #include "sysemu/sysemu.h" | ||
535 | +#include "sysemu/rtc.h" | ||
536 | |||
537 | static enum { | ||
538 | RTC_BASE_UTC, | ||
539 | -- | 114 | -- |
540 | 2.25.1 | 115 | 2.34.1 |
541 | |||
542 | diff view generated by jsdifflib |
1 | The GICD_CTLR distributor register has enable bits which control | 1 | Implement the increased precision variation of FRSQRTE. In the |
---|---|---|---|
2 | whether the different interrupt groups (Group 0, Non-secure Group 1 | 2 | pseudocode this corresponds to the handling of the |
3 | and Secure Group 1) are forwarded to the CPU. We get this right for | 3 | "increasedprecision" boolean in the FPRSqrtEstimate() and |
4 | traditional interrupts, but forgot to account for it when adding | 4 | RecipSqrtEstimate() functions. |
5 | LPIs. LPIs are always Group 1 NS and if the EnableGrp1NS bit is not | ||
6 | set we must not forward them to the CPU. | ||
7 | 5 | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
10 | Message-id: 20220122182444.724087-7-peter.maydell@linaro.org | ||
11 | --- | 8 | --- |
12 | hw/intc/arm_gicv3.c | 1 + | 9 | target/arm/vfp_helper.c | 77 ++++++++++++++++++++++++++++++++++------- |
13 | 1 file changed, 1 insertion(+) | 10 | 1 file changed, 64 insertions(+), 13 deletions(-) |
14 | 11 | ||
15 | diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c | 12 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
16 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/hw/intc/arm_gicv3.c | 14 | --- a/target/arm/vfp_helper.c |
18 | +++ b/hw/intc/arm_gicv3.c | 15 | +++ b/target/arm/vfp_helper.c |
19 | @@ -XXX,XX +XXX,XX @@ static void gicv3_redist_update_noirqset(GICv3CPUState *cs) | 16 | @@ -XXX,XX +XXX,XX @@ static int do_recip_sqrt_estimate(int a) |
17 | return estimate; | ||
18 | } | ||
19 | |||
20 | +static int do_recip_sqrt_estimate_incprec(int a) | ||
21 | +{ | ||
22 | + /* | ||
23 | + * The Arm ARM describes the 12-bit precision version of RecipSqrtEstimate | ||
24 | + * in terms of an infinite-precision floating point calculation of a | ||
25 | + * square root. We implement this using the same kind of pure integer | ||
26 | + * algorithm as the 8-bit mantissa, to get the same bit-for-bit result. | ||
27 | + */ | ||
28 | + int64_t b, estimate; | ||
29 | |||
30 | -static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac) | ||
31 | + assert(1024 <= a && a < 4096); | ||
32 | + if (a < 2048) { | ||
33 | + a = a * 2 + 1; | ||
34 | + } else { | ||
35 | + a = (a >> 1) << 1; | ||
36 | + a = (a + 1) * 2; | ||
37 | + } | ||
38 | + b = 8192; | ||
39 | + while (a * (b + 1) * (b + 1) < (1ULL << 39)) { | ||
40 | + b += 1; | ||
41 | + } | ||
42 | + estimate = (b + 1) / 2; | ||
43 | + | ||
44 | + assert(4096 <= estimate && estimate < 8192); | ||
45 | + | ||
46 | + return estimate; | ||
47 | +} | ||
48 | + | ||
49 | +static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac, | ||
50 | + bool increasedprecision) | ||
51 | { | ||
52 | int estimate; | ||
53 | uint32_t scaled; | ||
54 | @@ -XXX,XX +XXX,XX @@ static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac) | ||
55 | frac = extract64(frac, 0, 51) << 1; | ||
20 | } | 56 | } |
21 | 57 | ||
22 | if ((cs->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) && cs->gic->lpi_enable && | 58 | - if (*exp & 1) { |
23 | + (cs->gic->gicd_ctlr & GICD_CTLR_EN_GRP1NS) && | 59 | - /* scaled = UInt('01':fraction<51:45>) */ |
24 | (cs->hpplpi.prio != 0xff)) { | 60 | - scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7)); |
25 | if (irqbetter(cs, cs->hpplpi.irq, cs->hpplpi.prio)) { | 61 | + if (increasedprecision) { |
26 | cs->hppi.irq = cs->hpplpi.irq; | 62 | + if (*exp & 1) { |
63 | + /* scaled = UInt('01':fraction<51:42>) */ | ||
64 | + scaled = deposit32(1 << 10, 0, 10, extract64(frac, 42, 10)); | ||
65 | + } else { | ||
66 | + /* scaled = UInt('1':fraction<51:41>) */ | ||
67 | + scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11)); | ||
68 | + } | ||
69 | + estimate = do_recip_sqrt_estimate_incprec(scaled); | ||
70 | } else { | ||
71 | - /* scaled = UInt('1':fraction<51:44>) */ | ||
72 | - scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); | ||
73 | + if (*exp & 1) { | ||
74 | + /* scaled = UInt('01':fraction<51:45>) */ | ||
75 | + scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7)); | ||
76 | + } else { | ||
77 | + /* scaled = UInt('1':fraction<51:44>) */ | ||
78 | + scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); | ||
79 | + } | ||
80 | + estimate = do_recip_sqrt_estimate(scaled); | ||
81 | } | ||
82 | - estimate = do_recip_sqrt_estimate(scaled); | ||
83 | |||
84 | *exp = (exp_off - *exp) / 2; | ||
85 | - return extract64(estimate, 0, 8) << 44; | ||
86 | + if (increasedprecision) { | ||
87 | + return extract64(estimate, 0, 12) << 40; | ||
88 | + } else { | ||
89 | + return extract64(estimate, 0, 8) << 44; | ||
90 | + } | ||
91 | } | ||
92 | |||
93 | uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s) | ||
94 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s) | ||
95 | |||
96 | f64_frac = ((uint64_t) f16_frac) << (52 - 10); | ||
97 | |||
98 | - f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac); | ||
99 | + f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac, false); | ||
100 | |||
101 | /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */ | ||
102 | val = deposit32(0, 15, 1, f16_sign); | ||
103 | @@ -XXX,XX +XXX,XX @@ static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres) | ||
104 | |||
105 | f64_frac = ((uint64_t) f32_frac) << 29; | ||
106 | |||
107 | - f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac); | ||
108 | + f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac, rpres); | ||
109 | |||
110 | - /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */ | ||
111 | + /* | ||
112 | + * result = sign : result_exp<7:0> : estimate<7:0> : Zeros(15) | ||
113 | + * or for increased precision | ||
114 | + * result = sign : result_exp<7:0> : estimate<11:0> : Zeros(11) | ||
115 | + */ | ||
116 | val = deposit32(0, 31, 1, f32_sign); | ||
117 | val = deposit32(val, 23, 8, f32_exp); | ||
118 | - val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8)); | ||
119 | + if (rpres) { | ||
120 | + val = deposit32(val, 11, 12, extract64(f64_frac, 52 - 12, 12)); | ||
121 | + } else { | ||
122 | + val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8)); | ||
123 | + } | ||
124 | return make_float32(val); | ||
125 | } | ||
126 | |||
127 | @@ -XXX,XX +XXX,XX @@ float64 HELPER(rsqrte_f64)(float64 input, float_status *s) | ||
128 | return float64_zero; | ||
129 | } | ||
130 | |||
131 | - f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac); | ||
132 | + f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac, false); | ||
133 | |||
134 | /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */ | ||
135 | val = deposit64(0, 61, 1, f64_sign); | ||
27 | -- | 136 | -- |
28 | 2.25.1 | 137 | 2.34.1 |
29 | |||
30 | diff view generated by jsdifflib |
1 | It's a new year; update the copyright strings for our | 1 | Now the emulation is complete, we can enable FEAT_RPRES for the 'max' |
---|---|---|---|
2 | help/version/about information and for our documentation. | 2 | CPU type. |
3 | 3 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
5 | Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> | 5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
7 | Message-id: 20220120124713.288303-1-peter.maydell@linaro.org | ||
8 | --- | 6 | --- |
9 | docs/conf.py | 2 +- | 7 | docs/system/arm/emulation.rst | 1 + |
10 | include/qemu-common.h | 2 +- | 8 | target/arm/tcg/cpu64.c | 1 + |
11 | 2 files changed, 2 insertions(+), 2 deletions(-) | 9 | 2 files changed, 2 insertions(+) |
12 | 10 | ||
13 | diff --git a/docs/conf.py b/docs/conf.py | 11 | diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst |
14 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/docs/conf.py | 13 | --- a/docs/system/arm/emulation.rst |
16 | +++ b/docs/conf.py | 14 | +++ b/docs/system/arm/emulation.rst |
17 | @@ -XXX,XX +XXX,XX @@ | 15 | @@ -XXX,XX +XXX,XX @@ the following architecture extensions: |
18 | 16 | - FEAT_RDM (Advanced SIMD rounding double multiply accumulate instructions) | |
19 | # General information about the project. | 17 | - FEAT_RME (Realm Management Extension) (NB: support status in QEMU is experimental) |
20 | project = u'QEMU' | 18 | - FEAT_RNG (Random number generator) |
21 | -copyright = u'2021, The QEMU Project Developers' | 19 | +- FEAT_RPRES (Increased precision of FRECPE and FRSQRTE) |
22 | +copyright = u'2022, The QEMU Project Developers' | 20 | - FEAT_S2FWB (Stage 2 forced Write-Back) |
23 | author = u'The QEMU Project Developers' | 21 | - FEAT_SB (Speculation Barrier) |
24 | 22 | - FEAT_SEL2 (Secure EL2) | |
25 | # The version info for the project you're documenting, acts as replacement for | 23 | diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c |
26 | diff --git a/include/qemu-common.h b/include/qemu-common.h | ||
27 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
28 | --- a/include/qemu-common.h | 25 | --- a/target/arm/tcg/cpu64.c |
29 | +++ b/include/qemu-common.h | 26 | +++ b/target/arm/tcg/cpu64.c |
30 | @@ -XXX,XX +XXX,XX @@ | 27 | @@ -XXX,XX +XXX,XX @@ void aarch64_max_tcg_initfn(Object *obj) |
31 | #define TFR(expr) do { if ((expr) != -1) break; } while (errno == EINTR) | 28 | cpu->isar.id_aa64isar1 = t; |
32 | 29 | ||
33 | /* Copyright string for -version arguments, About dialogs, etc */ | 30 | t = cpu->isar.id_aa64isar2; |
34 | -#define QEMU_COPYRIGHT "Copyright (c) 2003-2021 " \ | 31 | + t = FIELD_DP64(t, ID_AA64ISAR2, RPRES, 1); /* FEAT_RPRES */ |
35 | +#define QEMU_COPYRIGHT "Copyright (c) 2003-2022 " \ | 32 | t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1); /* FEAT_MOPS */ |
36 | "Fabrice Bellard and the QEMU Project developers" | 33 | t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1); /* FEAT_HBC */ |
37 | 34 | t = FIELD_DP64(t, ID_AA64ISAR2, WFXT, 2); /* FEAT_WFxT */ | |
38 | /* Bug reporting information for --help arguments, About dialogs, etc */ | ||
39 | -- | 35 | -- |
40 | 2.25.1 | 36 | 2.34.1 |
41 | |||
42 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | From: Richard Henderson <richard.henderson@linaro.org> | |
2 | |||
3 | Move ARMFPStatusFlavour to cpu.h with which to index | ||
4 | this array. For now, place the array in an anonymous | ||
5 | union with the existing structures. Adjust the order | ||
6 | of the existing structures to match the enum. | ||
7 | |||
8 | Simplify fpstatus_ptr() using the new array. | ||
9 | |||
10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
12 | Message-id: 20250129013857.135256-7-richard.henderson@linaro.org | ||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
14 | --- | ||
15 | target/arm/cpu.h | 119 +++++++++++++++++++++---------------- | ||
16 | target/arm/tcg/translate.h | 64 +------------------- | ||
17 | 2 files changed, 70 insertions(+), 113 deletions(-) | ||
18 | |||
19 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/target/arm/cpu.h | ||
22 | +++ b/target/arm/cpu.h | ||
23 | @@ -XXX,XX +XXX,XX @@ typedef struct ARMMMUFaultInfo ARMMMUFaultInfo; | ||
24 | |||
25 | typedef struct NVICState NVICState; | ||
26 | |||
27 | +/* | ||
28 | + * Enum for indexing vfp.fp_status[]. | ||
29 | + * | ||
30 | + * FPST_A32: is the "normal" fp status for AArch32 insns | ||
31 | + * FPST_A64: is the "normal" fp status for AArch64 insns | ||
32 | + * FPST_A32_F16: used for AArch32 half-precision calculations | ||
33 | + * FPST_A64_F16: used for AArch64 half-precision calculations | ||
34 | + * FPST_STD: the ARM "Standard FPSCR Value" | ||
35 | + * FPST_STD_F16: used for half-precision | ||
36 | + * calculations with the ARM "Standard FPSCR Value" | ||
37 | + * FPST_AH: used for the A64 insns which change behaviour | ||
38 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
39 | + * and the reciprocal and square root estimate/step insns) | ||
40 | + * FPST_AH_F16: used for the A64 insns which change behaviour | ||
41 | + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
42 | + * and the reciprocal and square root estimate/step insns); | ||
43 | + * for half-precision | ||
44 | + * | ||
45 | + * Half-precision operations are governed by a separate | ||
46 | + * flush-to-zero control bit in FPSCR:FZ16. We pass a separate | ||
47 | + * status structure to control this. | ||
48 | + * | ||
49 | + * The "Standard FPSCR", ie default-NaN, flush-to-zero, | ||
50 | + * round-to-nearest and is used by any operations (generally | ||
51 | + * Neon) which the architecture defines as controlled by the | ||
52 | + * standard FPSCR value rather than the FPSCR. | ||
53 | + * | ||
54 | + * The "standard FPSCR but for fp16 ops" is needed because | ||
55 | + * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | ||
56 | + * using a fixed value for it. | ||
57 | + * | ||
58 | + * The ah_fp_status is needed because some insns have different | ||
59 | + * behaviour when FPCR.AH == 1: they don't update cumulative | ||
60 | + * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
61 | + * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
62 | + * which means we need an ah_fp_status_f16 as well. | ||
63 | + * | ||
64 | + * To avoid having to transfer exception bits around, we simply | ||
65 | + * say that the FPSCR cumulative exception flags are the logical | ||
66 | + * OR of the flags in the four fp statuses. This relies on the | ||
67 | + * only thing which needs to read the exception flags being | ||
68 | + * an explicit FPSCR read. | ||
69 | + */ | ||
70 | +typedef enum ARMFPStatusFlavour { | ||
71 | + FPST_A32, | ||
72 | + FPST_A64, | ||
73 | + FPST_A32_F16, | ||
74 | + FPST_A64_F16, | ||
75 | + FPST_AH, | ||
76 | + FPST_AH_F16, | ||
77 | + FPST_STD, | ||
78 | + FPST_STD_F16, | ||
79 | +} ARMFPStatusFlavour; | ||
80 | +#define FPST_COUNT 8 | ||
81 | + | ||
82 | typedef struct CPUArchState { | ||
83 | /* Regs for current mode. */ | ||
84 | uint32_t regs[16]; | ||
85 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
86 | /* Scratch space for aa32 neon expansion. */ | ||
87 | uint32_t scratch[8]; | ||
88 | |||
89 | - /* There are a number of distinct float control structures: | ||
90 | - * | ||
91 | - * fp_status_a32: is the "normal" fp status for AArch32 insns | ||
92 | - * fp_status_a64: is the "normal" fp status for AArch64 insns | ||
93 | - * fp_status_fp16_a32: used for AArch32 half-precision calculations | ||
94 | - * fp_status_fp16_a64: used for AArch64 half-precision calculations | ||
95 | - * standard_fp_status : the ARM "Standard FPSCR Value" | ||
96 | - * standard_fp_status_fp16 : used for half-precision | ||
97 | - * calculations with the ARM "Standard FPSCR Value" | ||
98 | - * ah_fp_status: used for the A64 insns which change behaviour | ||
99 | - * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
100 | - * and the reciprocal and square root estimate/step insns) | ||
101 | - * ah_fp_status_f16: used for the A64 insns which change behaviour | ||
102 | - * when FPCR.AH == 1 (bfloat16 conversions and multiplies, | ||
103 | - * and the reciprocal and square root estimate/step insns); | ||
104 | - * for half-precision | ||
105 | - * | ||
106 | - * Half-precision operations are governed by a separate | ||
107 | - * flush-to-zero control bit in FPSCR:FZ16. We pass a separate | ||
108 | - * status structure to control this. | ||
109 | - * | ||
110 | - * The "Standard FPSCR", ie default-NaN, flush-to-zero, | ||
111 | - * round-to-nearest and is used by any operations (generally | ||
112 | - * Neon) which the architecture defines as controlled by the | ||
113 | - * standard FPSCR value rather than the FPSCR. | ||
114 | - * | ||
115 | - * The "standard FPSCR but for fp16 ops" is needed because | ||
116 | - * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than | ||
117 | - * using a fixed value for it. | ||
118 | - * | ||
119 | - * The ah_fp_status is needed because some insns have different | ||
120 | - * behaviour when FPCR.AH == 1: they don't update cumulative | ||
121 | - * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and | ||
122 | - * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, | ||
123 | - * which means we need an ah_fp_status_f16 as well. | ||
124 | - * | ||
125 | - * To avoid having to transfer exception bits around, we simply | ||
126 | - * say that the FPSCR cumulative exception flags are the logical | ||
127 | - * OR of the flags in the four fp statuses. This relies on the | ||
128 | - * only thing which needs to read the exception flags being | ||
129 | - * an explicit FPSCR read. | ||
130 | - */ | ||
131 | - float_status fp_status_a32; | ||
132 | - float_status fp_status_a64; | ||
133 | - float_status fp_status_f16_a32; | ||
134 | - float_status fp_status_f16_a64; | ||
135 | - float_status standard_fp_status; | ||
136 | - float_status standard_fp_status_f16; | ||
137 | - float_status ah_fp_status; | ||
138 | - float_status ah_fp_status_f16; | ||
139 | + /* There are a number of distinct float control structures. */ | ||
140 | + union { | ||
141 | + float_status fp_status[FPST_COUNT]; | ||
142 | + struct { | ||
143 | + float_status fp_status_a32; | ||
144 | + float_status fp_status_a64; | ||
145 | + float_status fp_status_f16_a32; | ||
146 | + float_status fp_status_f16_a64; | ||
147 | + float_status ah_fp_status; | ||
148 | + float_status ah_fp_status_f16; | ||
149 | + float_status standard_fp_status; | ||
150 | + float_status standard_fp_status_f16; | ||
151 | + }; | ||
152 | + }; | ||
153 | |||
154 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ | ||
155 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ | ||
156 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | ||
157 | index XXXXXXX..XXXXXXX 100644 | ||
158 | --- a/target/arm/tcg/translate.h | ||
159 | +++ b/target/arm/tcg/translate.h | ||
160 | @@ -XXX,XX +XXX,XX @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb) | ||
161 | return (CPUARMTBFlags){ tb->flags, tb->cs_base }; | ||
162 | } | ||
163 | |||
164 | -/* | ||
165 | - * Enum for argument to fpstatus_ptr(). | ||
166 | - */ | ||
167 | -typedef enum ARMFPStatusFlavour { | ||
168 | - FPST_A32, | ||
169 | - FPST_A64, | ||
170 | - FPST_A32_F16, | ||
171 | - FPST_A64_F16, | ||
172 | - FPST_AH, | ||
173 | - FPST_AH_F16, | ||
174 | - FPST_STD, | ||
175 | - FPST_STD_F16, | ||
176 | -} ARMFPStatusFlavour; | ||
177 | - | ||
178 | /** | ||
179 | * fpstatus_ptr: return TCGv_ptr to the specified fp_status field | ||
180 | * | ||
181 | * We have multiple softfloat float_status fields in the Arm CPU state struct | ||
182 | * (see the comment in cpu.h for details). Return a TCGv_ptr which has | ||
183 | * been set up to point to the requested field in the CPU state struct. | ||
184 | - * The options are: | ||
185 | - * | ||
186 | - * FPST_A32 | ||
187 | - * for AArch32 non-FP16 operations controlled by the FPCR | ||
188 | - * FPST_A64 | ||
189 | - * for AArch64 non-FP16 operations controlled by the FPCR | ||
190 | - * FPST_A32_F16 | ||
191 | - * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
192 | - * FPST_A64_F16 | ||
193 | - * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used | ||
194 | - * FPST_AH: | ||
195 | - * for AArch64 operations which change behaviour when AH=1 (specifically, | ||
196 | - * bfloat16 conversions and multiplies, and the reciprocal and square root | ||
197 | - * estimate/step insns) | ||
198 | - * FPST_AH_F16: | ||
199 | - * ditto, but for half-precision operations | ||
200 | - * FPST_STD | ||
201 | - * for A32/T32 Neon operations using the "standard FPSCR value" | ||
202 | - * FPST_STD_F16 | ||
203 | - * as FPST_STD, but where FPCR.FZ16 is to be used | ||
204 | */ | ||
205 | static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) | ||
206 | { | ||
207 | TCGv_ptr statusptr = tcg_temp_new_ptr(); | ||
208 | - int offset; | ||
209 | + int offset = offsetof(CPUARMState, vfp.fp_status[flavour]); | ||
210 | |||
211 | - switch (flavour) { | ||
212 | - case FPST_A32: | ||
213 | - offset = offsetof(CPUARMState, vfp.fp_status_a32); | ||
214 | - break; | ||
215 | - case FPST_A64: | ||
216 | - offset = offsetof(CPUARMState, vfp.fp_status_a64); | ||
217 | - break; | ||
218 | - case FPST_A32_F16: | ||
219 | - offset = offsetof(CPUARMState, vfp.fp_status_f16_a32); | ||
220 | - break; | ||
221 | - case FPST_A64_F16: | ||
222 | - offset = offsetof(CPUARMState, vfp.fp_status_f16_a64); | ||
223 | - break; | ||
224 | - case FPST_AH: | ||
225 | - offset = offsetof(CPUARMState, vfp.ah_fp_status); | ||
226 | - break; | ||
227 | - case FPST_AH_F16: | ||
228 | - offset = offsetof(CPUARMState, vfp.ah_fp_status_f16); | ||
229 | - break; | ||
230 | - case FPST_STD: | ||
231 | - offset = offsetof(CPUARMState, vfp.standard_fp_status); | ||
232 | - break; | ||
233 | - case FPST_STD_F16: | ||
234 | - offset = offsetof(CPUARMState, vfp.standard_fp_status_f16); | ||
235 | - break; | ||
236 | - default: | ||
237 | - g_assert_not_reached(); | ||
238 | - } | ||
239 | tcg_gen_addi_ptr(statusptr, tcg_env, offset); | ||
240 | return statusptr; | ||
241 | } | ||
242 | -- | ||
243 | 2.34.1 | ||
244 | |||
245 | diff view generated by jsdifflib |
1 | From: Cédric Le Goater <clg@kaod.org> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Address should be 0x1E631000 and not 0x1E641000 as initially introduced. | 3 | Replace with fp_status[FPST_STD_F16]. |
4 | 4 | ||
5 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/838 | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Fixes: f25c0ae1079d ("aspeed/soc: Add AST2600 support") | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
7 | Suggested-by: Troy Lee <troy_lee@aspeedtech.com> | 7 | Message-id: 20250129013857.135256-8-richard.henderson@linaro.org |
8 | Signed-off-by: Cédric Le Goater <clg@kaod.org> | ||
9 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
10 | Message-id: 20220126083520.4135713-1-clg@kaod.org | ||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
12 | --- | 9 | --- |
13 | hw/arm/aspeed_ast2600.c | 2 +- | 10 | target/arm/cpu.h | 1 - |
14 | 1 file changed, 1 insertion(+), 1 deletion(-) | 11 | target/arm/cpu.c | 4 ++-- |
12 | target/arm/tcg/mve_helper.c | 24 ++++++++++++------------ | ||
13 | target/arm/vfp_helper.c | 8 ++++---- | ||
14 | 4 files changed, 18 insertions(+), 19 deletions(-) | ||
15 | 15 | ||
16 | diff --git a/hw/arm/aspeed_ast2600.c b/hw/arm/aspeed_ast2600.c | 16 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
17 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/hw/arm/aspeed_ast2600.c | 18 | --- a/target/arm/cpu.h |
19 | +++ b/hw/arm/aspeed_ast2600.c | 19 | +++ b/target/arm/cpu.h |
20 | @@ -XXX,XX +XXX,XX @@ static const hwaddr aspeed_soc_ast2600_memmap[] = { | 20 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
21 | [ASPEED_DEV_PWM] = 0x1E610000, | 21 | float_status ah_fp_status; |
22 | [ASPEED_DEV_FMC] = 0x1E620000, | 22 | float_status ah_fp_status_f16; |
23 | [ASPEED_DEV_SPI1] = 0x1E630000, | 23 | float_status standard_fp_status; |
24 | - [ASPEED_DEV_SPI2] = 0x1E641000, | 24 | - float_status standard_fp_status_f16; |
25 | + [ASPEED_DEV_SPI2] = 0x1E631000, | 25 | }; |
26 | [ASPEED_DEV_EHCI1] = 0x1E6A1000, | 26 | }; |
27 | [ASPEED_DEV_EHCI2] = 0x1E6A3000, | 27 | |
28 | [ASPEED_DEV_MII1] = 0x1E650000, | 28 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/arm/cpu.c | ||
31 | +++ b/target/arm/cpu.c | ||
32 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
33 | set_flush_to_zero(1, &env->vfp.standard_fp_status); | ||
34 | set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); | ||
35 | set_default_nan_mode(1, &env->vfp.standard_fp_status); | ||
36 | - set_default_nan_mode(1, &env->vfp.standard_fp_status_f16); | ||
37 | + set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); | ||
38 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | ||
39 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
40 | arm_set_default_fp_behaviours(&env->vfp.standard_fp_status); | ||
41 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
42 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
43 | - arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16); | ||
44 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
45 | arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | ||
46 | set_flush_to_zero(1, &env->vfp.ah_fp_status); | ||
47 | set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | ||
48 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/arm/tcg/mve_helper.c | ||
51 | +++ b/target/arm/tcg/mve_helper.c | ||
52 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) | ||
53 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
54 | continue; \ | ||
55 | } \ | ||
56 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
57 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
58 | &env->vfp.standard_fp_status; \ | ||
59 | if (!(mask & 1)) { \ | ||
60 | /* We need the result but without updating flags */ \ | ||
61 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) | ||
62 | r[e] = 0; \ | ||
63 | continue; \ | ||
64 | } \ | ||
65 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
66 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
67 | &env->vfp.standard_fp_status; \ | ||
68 | if (!(tm & 1)) { \ | ||
69 | /* We need the result but without updating flags */ \ | ||
70 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) | ||
71 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
72 | continue; \ | ||
73 | } \ | ||
74 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
75 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
76 | &env->vfp.standard_fp_status; \ | ||
77 | if (!(mask & 1)) { \ | ||
78 | /* We need the result but without updating flags */ \ | ||
79 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) | ||
80 | if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \ | ||
81 | continue; \ | ||
82 | } \ | ||
83 | - fpst0 = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
84 | + fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
85 | &env->vfp.standard_fp_status; \ | ||
86 | fpst1 = fpst0; \ | ||
87 | if (!(mask & 1)) { \ | ||
88 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) | ||
89 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
90 | continue; \ | ||
91 | } \ | ||
92 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
93 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
94 | &env->vfp.standard_fp_status; \ | ||
95 | if (!(mask & 1)) { \ | ||
96 | /* We need the result but without updating flags */ \ | ||
97 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) | ||
98 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
99 | continue; \ | ||
100 | } \ | ||
101 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
102 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
103 | &env->vfp.standard_fp_status; \ | ||
104 | if (!(mask & 1)) { \ | ||
105 | /* We need the result but without updating flags */ \ | ||
106 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) | ||
107 | TYPE *m = vm; \ | ||
108 | TYPE ra = (TYPE)ra_in; \ | ||
109 | float_status *fpst = (ESIZE == 2) ? \ | ||
110 | - &env->vfp.standard_fp_status_f16 : \ | ||
111 | + &env->vfp.fp_status[FPST_STD_F16] : \ | ||
112 | &env->vfp.standard_fp_status; \ | ||
113 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
114 | if (mask & 1) { \ | ||
115 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
116 | if ((mask & emask) == 0) { \ | ||
117 | continue; \ | ||
118 | } \ | ||
119 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
120 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
121 | &env->vfp.standard_fp_status; \ | ||
122 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
123 | /* We need the result but without updating flags */ \ | ||
124 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
125 | if ((mask & emask) == 0) { \ | ||
126 | continue; \ | ||
127 | } \ | ||
128 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
129 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
130 | &env->vfp.standard_fp_status; \ | ||
131 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
132 | /* We need the result but without updating flags */ \ | ||
133 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) | ||
134 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
135 | continue; \ | ||
136 | } \ | ||
137 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
138 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
139 | &env->vfp.standard_fp_status; \ | ||
140 | if (!(mask & 1)) { \ | ||
141 | /* We need the result but without updating flags */ \ | ||
142 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) | ||
143 | float_status *fpst; \ | ||
144 | float_status scratch_fpst; \ | ||
145 | float_status *base_fpst = (ESIZE == 2) ? \ | ||
146 | - &env->vfp.standard_fp_status_f16 : \ | ||
147 | + &env->vfp.fp_status[FPST_STD_F16] : \ | ||
148 | &env->vfp.standard_fp_status; \ | ||
149 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ | ||
150 | set_float_rounding_mode(rmode, base_fpst); \ | ||
151 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) | ||
152 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
153 | continue; \ | ||
154 | } \ | ||
155 | - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ | ||
156 | + fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
157 | &env->vfp.standard_fp_status; \ | ||
158 | if (!(mask & 1)) { \ | ||
159 | /* We need the result but without updating flags */ \ | ||
160 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
161 | index XXXXXXX..XXXXXXX 100644 | ||
162 | --- a/target/arm/vfp_helper.c | ||
163 | +++ b/target/arm/vfp_helper.c | ||
164 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
165 | /* FZ16 does not generate an input denormal exception. */ | ||
166 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
167 | & ~float_flag_input_denormal_flushed); | ||
168 | - a32_flags |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) | ||
169 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) | ||
170 | & ~float_flag_input_denormal_flushed); | ||
171 | |||
172 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
173 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
174 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | ||
175 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
176 | set_float_exception_flags(0, &env->vfp.standard_fp_status); | ||
177 | - set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); | ||
178 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
179 | set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
180 | set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
181 | } | ||
182 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
183 | bool ftz_enabled = val & FPCR_FZ16; | ||
184 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
185 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
186 | - set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | ||
187 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
188 | set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
189 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
190 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
191 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); | ||
192 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
193 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
194 | } | ||
195 | if (changed & FPCR_FZ) { | ||
29 | -- | 196 | -- |
30 | 2.25.1 | 197 | 2.34.1 |
31 | 198 | ||
32 | 199 | diff view generated by jsdifflib |
1 | If you don't know it, it's hard to figure out the difference between | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | the linux-headers folder and the include/standard-headers folder. | 2 | |
3 | So let's add a short explanation to clarify the difference. | 3 | Replace with fp_status[FPST_STD]. |
4 | 4 | ||
5 | Suggested-by: Thomas Huth <thuth@redhat.com> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Message-id: 20250129013857.135256-9-richard.henderson@linaro.org | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
7 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
8 | Reviewed-by: Cornelia Huck <cohuck@redhat.com> | ||
9 | Reviewed-by: Thomas Huth <thuth@redhat.com> | ||
10 | --- | 9 | --- |
11 | scripts/update-linux-headers.sh | 16 ++++++++++++++++ | 10 | target/arm/cpu.h | 1 - |
12 | 1 file changed, 16 insertions(+) | 11 | target/arm/cpu.c | 8 ++++---- |
13 | 12 | target/arm/tcg/mve_helper.c | 28 ++++++++++++++-------------- | |
14 | diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh | 13 | target/arm/tcg/vec_helper.c | 4 ++-- |
15 | index XXXXXXX..XXXXXXX 100755 | 14 | target/arm/vfp_helper.c | 4 ++-- |
16 | --- a/scripts/update-linux-headers.sh | 15 | 5 files changed, 22 insertions(+), 23 deletions(-) |
17 | +++ b/scripts/update-linux-headers.sh | 16 | |
18 | @@ -XXX,XX +XXX,XX @@ | 17 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
19 | # | 18 | index XXXXXXX..XXXXXXX 100644 |
20 | # This work is licensed under the terms of the GNU GPL version 2. | 19 | --- a/target/arm/cpu.h |
21 | # See the COPYING file in the top-level directory. | 20 | +++ b/target/arm/cpu.h |
22 | +# | 21 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
23 | +# The script will copy the headers into two target folders: | 22 | float_status fp_status_f16_a64; |
24 | +# | 23 | float_status ah_fp_status; |
25 | +# - linux-headers/ for files that are required for compiling for a | 24 | float_status ah_fp_status_f16; |
26 | +# Linux host. Generally we have these so we can use kernel structs | 25 | - float_status standard_fp_status; |
27 | +# and defines that are more recent than the headers that might be | 26 | }; |
28 | +# installed on the host system. Usually this script can do simple | 27 | }; |
29 | +# file copies for these headers. | 28 | |
30 | +# | 29 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
31 | +# - include/standard-headers/ for files that are used for guest | 30 | index XXXXXXX..XXXXXXX 100644 |
32 | +# device emulation and are required on all hosts. For instance, we | 31 | --- a/target/arm/cpu.c |
33 | +# get our definitions of the virtio structures from the Linux | 32 | +++ b/target/arm/cpu.c |
34 | +# kernel headers, but we need those definitions regardless of which | 33 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
35 | +# host OS we are building for. This script has to be careful to | 34 | env->sau.ctrl = 0; |
36 | +# sanitize the headers to remove any use of Linux-specifics such as | 35 | } |
37 | +# types like "__u64". This work is done in the cp_portable function. | 36 | |
38 | 37 | - set_flush_to_zero(1, &env->vfp.standard_fp_status); | |
39 | tmpdir=$(mktemp -d) | 38 | - set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); |
40 | linux="$1" | 39 | - set_default_nan_mode(1, &env->vfp.standard_fp_status); |
40 | + set_flush_to_zero(1, &env->vfp.fp_status[FPST_STD]); | ||
41 | + set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]); | ||
42 | + set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); | ||
43 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); | ||
44 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | ||
45 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
46 | - arm_set_default_fp_behaviours(&env->vfp.standard_fp_status); | ||
47 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
48 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
49 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
50 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
51 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/target/arm/tcg/mve_helper.c | ||
54 | +++ b/target/arm/tcg/mve_helper.c | ||
55 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) | ||
56 | continue; \ | ||
57 | } \ | ||
58 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
59 | - &env->vfp.standard_fp_status; \ | ||
60 | + &env->vfp.fp_status[FPST_STD]; \ | ||
61 | if (!(mask & 1)) { \ | ||
62 | /* We need the result but without updating flags */ \ | ||
63 | scratch_fpst = *fpst; \ | ||
64 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) | ||
65 | continue; \ | ||
66 | } \ | ||
67 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
68 | - &env->vfp.standard_fp_status; \ | ||
69 | + &env->vfp.fp_status[FPST_STD]; \ | ||
70 | if (!(tm & 1)) { \ | ||
71 | /* We need the result but without updating flags */ \ | ||
72 | scratch_fpst = *fpst; \ | ||
73 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) | ||
74 | continue; \ | ||
75 | } \ | ||
76 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
77 | - &env->vfp.standard_fp_status; \ | ||
78 | + &env->vfp.fp_status[FPST_STD]; \ | ||
79 | if (!(mask & 1)) { \ | ||
80 | /* We need the result but without updating flags */ \ | ||
81 | scratch_fpst = *fpst; \ | ||
82 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) | ||
83 | continue; \ | ||
84 | } \ | ||
85 | fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
86 | - &env->vfp.standard_fp_status; \ | ||
87 | + &env->vfp.fp_status[FPST_STD]; \ | ||
88 | fpst1 = fpst0; \ | ||
89 | if (!(mask & 1)) { \ | ||
90 | scratch_fpst = *fpst0; \ | ||
91 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) | ||
92 | continue; \ | ||
93 | } \ | ||
94 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
95 | - &env->vfp.standard_fp_status; \ | ||
96 | + &env->vfp.fp_status[FPST_STD]; \ | ||
97 | if (!(mask & 1)) { \ | ||
98 | /* We need the result but without updating flags */ \ | ||
99 | scratch_fpst = *fpst; \ | ||
100 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) | ||
101 | continue; \ | ||
102 | } \ | ||
103 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
104 | - &env->vfp.standard_fp_status; \ | ||
105 | + &env->vfp.fp_status[FPST_STD]; \ | ||
106 | if (!(mask & 1)) { \ | ||
107 | /* We need the result but without updating flags */ \ | ||
108 | scratch_fpst = *fpst; \ | ||
109 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) | ||
110 | TYPE ra = (TYPE)ra_in; \ | ||
111 | float_status *fpst = (ESIZE == 2) ? \ | ||
112 | &env->vfp.fp_status[FPST_STD_F16] : \ | ||
113 | - &env->vfp.standard_fp_status; \ | ||
114 | + &env->vfp.fp_status[FPST_STD]; \ | ||
115 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
116 | if (mask & 1) { \ | ||
117 | TYPE v = m[H##ESIZE(e)]; \ | ||
118 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
119 | continue; \ | ||
120 | } \ | ||
121 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
122 | - &env->vfp.standard_fp_status; \ | ||
123 | + &env->vfp.fp_status[FPST_STD]; \ | ||
124 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
125 | /* We need the result but without updating flags */ \ | ||
126 | scratch_fpst = *fpst; \ | ||
127 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
128 | continue; \ | ||
129 | } \ | ||
130 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
131 | - &env->vfp.standard_fp_status; \ | ||
132 | + &env->vfp.fp_status[FPST_STD]; \ | ||
133 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
134 | /* We need the result but without updating flags */ \ | ||
135 | scratch_fpst = *fpst; \ | ||
136 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) | ||
137 | continue; \ | ||
138 | } \ | ||
139 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
140 | - &env->vfp.standard_fp_status; \ | ||
141 | + &env->vfp.fp_status[FPST_STD]; \ | ||
142 | if (!(mask & 1)) { \ | ||
143 | /* We need the result but without updating flags */ \ | ||
144 | scratch_fpst = *fpst; \ | ||
145 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) | ||
146 | float_status scratch_fpst; \ | ||
147 | float_status *base_fpst = (ESIZE == 2) ? \ | ||
148 | &env->vfp.fp_status[FPST_STD_F16] : \ | ||
149 | - &env->vfp.standard_fp_status; \ | ||
150 | + &env->vfp.fp_status[FPST_STD]; \ | ||
151 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ | ||
152 | set_float_rounding_mode(rmode, base_fpst); \ | ||
153 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
154 | @@ -XXX,XX +XXX,XX @@ static void do_vcvt_sh(CPUARMState *env, void *vd, void *vm, int top) | ||
155 | unsigned e; | ||
156 | float_status *fpst; | ||
157 | float_status scratch_fpst; | ||
158 | - float_status *base_fpst = &env->vfp.standard_fp_status; | ||
159 | + float_status *base_fpst = &env->vfp.fp_status[FPST_STD]; | ||
160 | bool old_fz = get_flush_to_zero(base_fpst); | ||
161 | set_flush_to_zero(false, base_fpst); | ||
162 | for (e = 0; e < 16 / 4; e++, mask >>= 4) { | ||
163 | @@ -XXX,XX +XXX,XX @@ static void do_vcvt_hs(CPUARMState *env, void *vd, void *vm, int top) | ||
164 | unsigned e; | ||
165 | float_status *fpst; | ||
166 | float_status scratch_fpst; | ||
167 | - float_status *base_fpst = &env->vfp.standard_fp_status; | ||
168 | + float_status *base_fpst = &env->vfp.fp_status[FPST_STD]; | ||
169 | bool old_fiz = get_flush_inputs_to_zero(base_fpst); | ||
170 | set_flush_inputs_to_zero(false, base_fpst); | ||
171 | for (e = 0; e < 16 / 4; e++, mask >>= 4) { | ||
172 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) | ||
173 | continue; \ | ||
174 | } \ | ||
175 | fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
176 | - &env->vfp.standard_fp_status; \ | ||
177 | + &env->vfp.fp_status[FPST_STD]; \ | ||
178 | if (!(mask & 1)) { \ | ||
179 | /* We need the result but without updating flags */ \ | ||
180 | scratch_fpst = *fpst; \ | ||
181 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
182 | index XXXXXXX..XXXXXXX 100644 | ||
183 | --- a/target/arm/tcg/vec_helper.c | ||
184 | +++ b/target/arm/tcg/vec_helper.c | ||
185 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
186 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
187 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
188 | |||
189 | - do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, | ||
190 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
191 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
192 | } | ||
193 | |||
194 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, | ||
195 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
196 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
197 | |||
198 | - do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, negx, 0, desc, | ||
199 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
200 | get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
201 | } | ||
202 | |||
203 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
204 | index XXXXXXX..XXXXXXX 100644 | ||
205 | --- a/target/arm/vfp_helper.c | ||
206 | +++ b/target/arm/vfp_helper.c | ||
207 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
208 | uint32_t a32_flags = 0, a64_flags = 0; | ||
209 | |||
210 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
211 | - a32_flags |= get_float_exception_flags(&env->vfp.standard_fp_status); | ||
212 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); | ||
213 | /* FZ16 does not generate an input denormal exception. */ | ||
214 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
215 | & ~float_flag_input_denormal_flushed); | ||
216 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
217 | set_float_exception_flags(0, &env->vfp.fp_status_a64); | ||
218 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | ||
219 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
220 | - set_float_exception_flags(0, &env->vfp.standard_fp_status); | ||
221 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
222 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
223 | set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
224 | set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
41 | -- | 225 | -- |
42 | 2.25.1 | 226 | 2.34.1 |
43 | 227 | ||
44 | 228 | diff view generated by jsdifflib |
1 | From: Andrew Baumann <Andrew.Baumann@microsoft.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: Andrew Baumann <Andrew.Baumann@microsoft.com> | 3 | Replace with fp_status[FPST_AH_F16]. |
4 | Message-id: MW4PR21MB1940E8BB52F4053C943B1FCD9E219@MW4PR21MB1940.namprd21.prod.outlook.com | 4 | |
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Message-id: 20250129013857.135256-10-richard.henderson@linaro.org | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
6 | --- | 9 | --- |
7 | MAINTAINERS | 1 - | 10 | target/arm/cpu.h | 3 +-- |
8 | 1 file changed, 1 deletion(-) | 11 | target/arm/cpu.c | 2 +- |
12 | target/arm/vfp_helper.c | 10 +++++----- | ||
13 | 3 files changed, 7 insertions(+), 8 deletions(-) | ||
9 | 14 | ||
10 | diff --git a/MAINTAINERS b/MAINTAINERS | 15 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
11 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/MAINTAINERS | 17 | --- a/target/arm/cpu.h |
13 | +++ b/MAINTAINERS | 18 | +++ b/target/arm/cpu.h |
14 | @@ -XXX,XX +XXX,XX @@ F: docs/system/arm/palm.rst | 19 | @@ -XXX,XX +XXX,XX @@ typedef struct NVICState NVICState; |
15 | 20 | * behaviour when FPCR.AH == 1: they don't update cumulative | |
16 | Raspberry Pi | 21 | * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and |
17 | M: Peter Maydell <peter.maydell@linaro.org> | 22 | * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, |
18 | -R: Andrew Baumann <Andrew.Baumann@microsoft.com> | 23 | - * which means we need an ah_fp_status_f16 as well. |
19 | R: Philippe Mathieu-Daudé <f4bug@amsat.org> | 24 | + * which means we need an FPST_AH_F16 as well. |
20 | L: qemu-arm@nongnu.org | 25 | * |
21 | S: Odd Fixes | 26 | * To avoid having to transfer exception bits around, we simply |
27 | * say that the FPSCR cumulative exception flags are the logical | ||
28 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | ||
29 | float_status fp_status_f16_a32; | ||
30 | float_status fp_status_f16_a64; | ||
31 | float_status ah_fp_status; | ||
32 | - float_status ah_fp_status_f16; | ||
33 | }; | ||
34 | }; | ||
35 | |||
36 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/arm/cpu.c | ||
39 | +++ b/target/arm/cpu.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
41 | arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | ||
42 | set_flush_to_zero(1, &env->vfp.ah_fp_status); | ||
43 | set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | ||
44 | - arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status_f16); | ||
45 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]); | ||
46 | |||
47 | #ifndef CONFIG_USER_ONLY | ||
48 | if (kvm_enabled()) { | ||
49 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/target/arm/vfp_helper.c | ||
52 | +++ b/target/arm/vfp_helper.c | ||
53 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
54 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
55 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
56 | /* | ||
57 | - * We do not merge in flags from ah_fp_status or ah_fp_status_f16, because | ||
58 | + * We do not merge in flags from ah_fp_status or FPST_AH_F16, because | ||
59 | * they are used for insns that must not set the cumulative exception bits. | ||
60 | */ | ||
61 | |||
62 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
63 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
64 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
65 | set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
66 | - set_float_exception_flags(0, &env->vfp.ah_fp_status_f16); | ||
67 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]); | ||
68 | } | ||
69 | |||
70 | static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) | ||
71 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
72 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
73 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
74 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
75 | - set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
76 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
77 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
78 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
79 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
80 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16); | ||
81 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
82 | } | ||
83 | if (changed & FPCR_FZ) { | ||
84 | bool ftz_enabled = val & FPCR_FZ; | ||
85 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
86 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
87 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
88 | set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); | ||
89 | - set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status_f16); | ||
90 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
91 | } | ||
92 | if (changed & FPCR_AH) { | ||
93 | bool ah_enabled = val & FPCR_AH; | ||
22 | -- | 94 | -- |
23 | 2.25.1 | 95 | 2.34.1 |
24 | 96 | ||
25 | 97 | diff view generated by jsdifflib |
1 | From: Francisco Iglesias <francisco.iglesias@xilinx.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | List myself as maintainer for the Xilinx Versal OSPI controller. | 3 | Replace with fp_status[FPST_AH]. |
4 | 4 | ||
5 | Signed-off-by: Francisco Iglesias <francisco.iglesias@xilinx.com> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com> | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
7 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 7 | Message-id: 20250129013857.135256-11-richard.henderson@linaro.org |
8 | Message-id: 20220121161141.14389-11-francisco.iglesias@xilinx.com | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | --- | 9 | --- |
11 | MAINTAINERS | 6 ++++++ | 10 | target/arm/cpu.h | 3 +-- |
12 | 1 file changed, 6 insertions(+) | 11 | target/arm/cpu.c | 6 +++--- |
12 | target/arm/vfp_helper.c | 6 +++--- | ||
13 | 3 files changed, 7 insertions(+), 8 deletions(-) | ||
13 | 14 | ||
14 | diff --git a/MAINTAINERS b/MAINTAINERS | 15 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
15 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/MAINTAINERS | 17 | --- a/target/arm/cpu.h |
17 | +++ b/MAINTAINERS | 18 | +++ b/target/arm/cpu.h |
18 | @@ -XXX,XX +XXX,XX @@ F: hw/display/dpcd.c | 19 | @@ -XXX,XX +XXX,XX @@ typedef struct NVICState NVICState; |
19 | F: include/hw/display/dpcd.h | 20 | * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than |
20 | F: docs/system/arm/xlnx-versal-virt.rst | 21 | * using a fixed value for it. |
21 | 22 | * | |
22 | +Xilinx Versal OSPI | 23 | - * The ah_fp_status is needed because some insns have different |
23 | +M: Francisco Iglesias <francisco.iglesias@xilinx.com> | 24 | + * FPST_AH is needed because some insns have different |
24 | +S: Maintained | 25 | * behaviour when FPCR.AH == 1: they don't update cumulative |
25 | +F: hw/ssi/xlnx-versal-ospi.c | 26 | * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and |
26 | +F: include/hw/ssi/xlnx-versal-ospi.h | 27 | * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, |
27 | + | 28 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
28 | ARM ACPI Subsystem | 29 | float_status fp_status_a64; |
29 | M: Shannon Zhao <shannon.zhaosl@gmail.com> | 30 | float_status fp_status_f16_a32; |
30 | L: qemu-arm@nongnu.org | 31 | float_status fp_status_f16_a64; |
32 | - float_status ah_fp_status; | ||
33 | }; | ||
34 | }; | ||
35 | |||
36 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/arm/cpu.c | ||
39 | +++ b/target/arm/cpu.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
41 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
42 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
43 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
44 | - arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status); | ||
45 | - set_flush_to_zero(1, &env->vfp.ah_fp_status); | ||
46 | - set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status); | ||
47 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); | ||
48 | + set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]); | ||
49 | + set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_AH]); | ||
50 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]); | ||
51 | |||
52 | #ifndef CONFIG_USER_ONLY | ||
53 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
54 | index XXXXXXX..XXXXXXX 100644 | ||
55 | --- a/target/arm/vfp_helper.c | ||
56 | +++ b/target/arm/vfp_helper.c | ||
57 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
58 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
59 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
60 | /* | ||
61 | - * We do not merge in flags from ah_fp_status or FPST_AH_F16, because | ||
62 | + * We do not merge in flags from FPST_AH or FPST_AH_F16, because | ||
63 | * they are used for insns that must not set the cumulative exception bits. | ||
64 | */ | ||
65 | |||
66 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
67 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
68 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
69 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
70 | - set_float_exception_flags(0, &env->vfp.ah_fp_status); | ||
71 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]); | ||
72 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]); | ||
73 | } | ||
74 | |||
75 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
76 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
77 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
78 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
79 | - set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status); | ||
80 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | ||
81 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
82 | } | ||
83 | if (changed & FPCR_AH) { | ||
31 | -- | 84 | -- |
32 | 2.25.1 | 85 | 2.34.1 |
33 | 86 | ||
34 | 87 | diff view generated by jsdifflib |
1 | From: Francisco Iglesias <francisco.iglesias@xilinx.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Connect Micron Xccela mt35xu01g flashes to the OSPI flash memory | 3 | Replace with fp_status[FPST_A64_F16]. |
4 | controller. | ||
5 | 4 | ||
6 | Signed-off-by: Francisco Iglesias <francisco.iglesias@xilinx.com> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com> | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
8 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 7 | Message-id: 20250129013857.135256-12-richard.henderson@linaro.org |
9 | Message-id: 20220121161141.14389-10-francisco.iglesias@xilinx.com | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
11 | --- | 9 | --- |
12 | hw/arm/xlnx-versal-virt.c | 23 +++++++++++++++++++++++ | 10 | target/arm/cpu.h | 1 - |
13 | 1 file changed, 23 insertions(+) | 11 | target/arm/cpu.c | 2 +- |
12 | target/arm/tcg/sme_helper.c | 2 +- | ||
13 | target/arm/tcg/vec_helper.c | 9 ++++----- | ||
14 | target/arm/vfp_helper.c | 16 ++++++++-------- | ||
15 | 5 files changed, 14 insertions(+), 16 deletions(-) | ||
14 | 16 | ||
15 | diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c | 17 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
16 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/hw/arm/xlnx-versal-virt.c | 19 | --- a/target/arm/cpu.h |
18 | +++ b/hw/arm/xlnx-versal-virt.c | 20 | +++ b/target/arm/cpu.h |
19 | @@ -XXX,XX +XXX,XX @@ | 21 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
20 | #define TYPE_XLNX_VERSAL_VIRT_MACHINE MACHINE_TYPE_NAME("xlnx-versal-virt") | 22 | float_status fp_status_a32; |
21 | OBJECT_DECLARE_SIMPLE_TYPE(VersalVirt, XLNX_VERSAL_VIRT_MACHINE) | 23 | float_status fp_status_a64; |
22 | 24 | float_status fp_status_f16_a32; | |
23 | +#define XLNX_VERSAL_NUM_OSPI_FLASH 4 | 25 | - float_status fp_status_f16_a64; |
24 | + | 26 | }; |
25 | struct VersalVirt { | 27 | }; |
26 | MachineState parent_obj; | 28 | |
27 | 29 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | |
28 | @@ -XXX,XX +XXX,XX @@ static void versal_virt_init(MachineState *machine) | 30 | index XXXXXXX..XXXXXXX 100644 |
29 | exit(EXIT_FAILURE); | 31 | --- a/target/arm/cpu.c |
32 | +++ b/target/arm/cpu.c | ||
33 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
34 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
35 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
36 | arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
37 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
38 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
39 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
40 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); | ||
41 | set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]); | ||
42 | diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/arm/tcg/sme_helper.c | ||
45 | +++ b/target/arm/tcg/sme_helper.c | ||
46 | @@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, | ||
47 | * produces default NaNs. We also need a second copy of fp_status with | ||
48 | * round-to-odd -- see above. | ||
49 | */ | ||
50 | - fpst_f16 = env->vfp.fp_status_f16_a64; | ||
51 | + fpst_f16 = env->vfp.fp_status[FPST_A64_F16]; | ||
52 | fpst_std = env->vfp.fp_status_a64; | ||
53 | set_default_nan_mode(true, &fpst_std); | ||
54 | set_default_nan_mode(true, &fpst_f16); | ||
55 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/arm/tcg/vec_helper.c | ||
58 | +++ b/target/arm/tcg/vec_helper.c | ||
59 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
30 | } | 60 | } |
31 | } | 61 | } |
32 | + | 62 | do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, |
33 | + for (i = 0; i < XLNX_VERSAL_NUM_OSPI_FLASH; i++) { | 63 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); |
34 | + BusState *spi_bus; | 64 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); |
35 | + DeviceState *flash_dev; | ||
36 | + qemu_irq cs_line; | ||
37 | + DriveInfo *dinfo = drive_get(IF_MTD, 0, i); | ||
38 | + | ||
39 | + spi_bus = qdev_get_child_bus(DEVICE(&s->soc.pmc.iou.ospi), "spi0"); | ||
40 | + | ||
41 | + flash_dev = qdev_new("mt35xu01g"); | ||
42 | + if (dinfo) { | ||
43 | + qdev_prop_set_drive_err(flash_dev, "drive", | ||
44 | + blk_by_legacy_dinfo(dinfo), &error_fatal); | ||
45 | + } | ||
46 | + qdev_realize_and_unref(flash_dev, spi_bus, &error_fatal); | ||
47 | + | ||
48 | + cs_line = qdev_get_gpio_in_named(flash_dev, SSI_GPIO_CS, 0); | ||
49 | + | ||
50 | + sysbus_connect_irq(SYS_BUS_DEVICE(&s->soc.pmc.iou.ospi), | ||
51 | + i + 1, cs_line); | ||
52 | + } | ||
53 | } | 65 | } |
54 | 66 | ||
55 | static void versal_virt_machine_instance_init(Object *obj) | 67 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
68 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
69 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
70 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
71 | float_status *status = &env->vfp.fp_status_a64; | ||
72 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); | ||
73 | + bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
74 | int negx = 0, negf = 0; | ||
75 | |||
76 | if (is_s) { | ||
77 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
78 | } | ||
79 | } | ||
80 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
81 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64)); | ||
82 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
83 | } | ||
84 | |||
85 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
86 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
87 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
88 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | ||
89 | float_status *status = &env->vfp.fp_status_a64; | ||
90 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64); | ||
91 | + bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
92 | int negx = 0, negf = 0; | ||
93 | |||
94 | if (is_s) { | ||
95 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
96 | negx = 0x8000; | ||
97 | } | ||
98 | } | ||
99 | - | ||
100 | for (i = 0; i < oprsz; i += 16) { | ||
101 | float16 mm_16 = *(float16 *)(vm + i + idx); | ||
102 | float32 mm = float16_to_float32_by_bits(mm_16, fz16); | ||
103 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
104 | index XXXXXXX..XXXXXXX 100644 | ||
105 | --- a/target/arm/vfp_helper.c | ||
106 | +++ b/target/arm/vfp_helper.c | ||
107 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
108 | & ~float_flag_input_denormal_flushed); | ||
109 | |||
110 | a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
111 | - a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64) | ||
112 | + a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16]) | ||
113 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
114 | /* | ||
115 | * We do not merge in flags from FPST_AH or FPST_AH_F16, because | ||
116 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
117 | set_float_exception_flags(0, &env->vfp.fp_status_a32); | ||
118 | set_float_exception_flags(0, &env->vfp.fp_status_a64); | ||
119 | set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | ||
120 | - set_float_exception_flags(0, &env->vfp.fp_status_f16_a64); | ||
121 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | ||
122 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
123 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
124 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]); | ||
125 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
126 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
127 | set_float_rounding_mode(i, &env->vfp.fp_status_a64); | ||
128 | set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32); | ||
129 | - set_float_rounding_mode(i, &env->vfp.fp_status_f16_a64); | ||
130 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
131 | } | ||
132 | if (changed & FPCR_FZ16) { | ||
133 | bool ftz_enabled = val & FPCR_FZ16; | ||
134 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
135 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
136 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
137 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
138 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
139 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
140 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64); | ||
141 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
142 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
143 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
144 | } | ||
145 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
146 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
147 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
148 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
149 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64); | ||
150 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
151 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | ||
152 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
153 | } | ||
154 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
155 | if (ah_enabled) { | ||
156 | /* Change behaviours for A64 FP operations */ | ||
157 | arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); | ||
158 | - arm_set_ah_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
159 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
160 | } else { | ||
161 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
162 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64); | ||
163 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
164 | } | ||
165 | } | ||
166 | /* | ||
56 | -- | 167 | -- |
57 | 2.25.1 | 168 | 2.34.1 |
58 | 169 | ||
59 | 170 | diff view generated by jsdifflib |
1 | From: Francisco Iglesias <francisco.iglesias@xilinx.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | An option on real hardware when embedding a DMA engine into a peripheral | 3 | Replace with fp_status[FPST_A32_F16]. |
4 | is to make the peripheral control the engine through a custom DMA control | ||
5 | (hardware) interface between the two. Software drivers in this scenario | ||
6 | configure and trigger DMA operations through the controlling peripheral's | ||
7 | register API (for example, writing a specific bit in a register could | ||
8 | propagate down to a transfer start signal on the DMA control interface). | ||
9 | At the same time the status, results and interrupts for the transfer might | ||
10 | still be intended to be read and caught through the DMA engine's register | ||
11 | API (and signals). | ||
12 | 4 | ||
13 | This patch adds a class 'read' method for allowing to start read transfers | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
14 | from peripherals embedding and controlling the Xilinx CSU DMA engine as in | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
15 | above scenario. | 7 | Message-id: 20250129013857.135256-13-richard.henderson@linaro.org |
16 | |||
17 | Signed-off-by: Francisco Iglesias <francisco.iglesias@xilinx.com> | ||
18 | Reviewed-by: Luc Michel <luc@lmichel.fr> | ||
19 | Message-id: 20220121161141.14389-6-francisco.iglesias@xilinx.com | ||
20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
21 | --- | 9 | --- |
22 | include/hw/dma/xlnx_csu_dma.h | 19 +++++++++++++++++-- | 10 | target/arm/cpu.h | 1 - |
23 | hw/dma/xlnx_csu_dma.c | 17 +++++++++++++++++ | 11 | target/arm/cpu.c | 2 +- |
24 | 2 files changed, 34 insertions(+), 2 deletions(-) | 12 | target/arm/tcg/vec_helper.c | 4 ++-- |
13 | target/arm/vfp_helper.c | 14 +++++++------- | ||
14 | 4 files changed, 10 insertions(+), 11 deletions(-) | ||
25 | 15 | ||
26 | diff --git a/include/hw/dma/xlnx_csu_dma.h b/include/hw/dma/xlnx_csu_dma.h | 16 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
27 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
28 | --- a/include/hw/dma/xlnx_csu_dma.h | 18 | --- a/target/arm/cpu.h |
29 | +++ b/include/hw/dma/xlnx_csu_dma.h | 19 | +++ b/target/arm/cpu.h |
30 | @@ -XXX,XX +XXX,XX @@ typedef struct XlnxCSUDMA { | 20 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
31 | RegisterInfo regs_info[XLNX_CSU_DMA_R_MAX]; | 21 | struct { |
32 | } XlnxCSUDMA; | 22 | float_status fp_status_a32; |
33 | 23 | float_status fp_status_a64; | |
34 | -#define XLNX_CSU_DMA(obj) \ | 24 | - float_status fp_status_f16_a32; |
35 | - OBJECT_CHECK(XlnxCSUDMA, (obj), TYPE_XLNX_CSU_DMA) | 25 | }; |
36 | +OBJECT_DECLARE_TYPE(XlnxCSUDMA, XlnxCSUDMAClass, XLNX_CSU_DMA) | 26 | }; |
37 | + | 27 | |
38 | +struct XlnxCSUDMAClass { | 28 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
39 | + SysBusDeviceClass parent_class; | ||
40 | + | ||
41 | + /* | ||
42 | + * read: Start a read transfer on a Xilinx CSU DMA engine | ||
43 | + * | ||
44 | + * @s: the Xilinx CSU DMA engine to start the transfer on | ||
45 | + * @addr: the address to read | ||
46 | + * @len: the number of bytes to read at 'addr' | ||
47 | + * | ||
48 | + * @return a MemTxResult indicating whether the operation succeeded ('len' | ||
49 | + * bytes were read) or failed. | ||
50 | + */ | ||
51 | + MemTxResult (*read)(XlnxCSUDMA *s, hwaddr addr, uint32_t len); | ||
52 | +}; | ||
53 | |||
54 | #endif | ||
55 | diff --git a/hw/dma/xlnx_csu_dma.c b/hw/dma/xlnx_csu_dma.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | 29 | index XXXXXXX..XXXXXXX 100644 |
57 | --- a/hw/dma/xlnx_csu_dma.c | 30 | --- a/target/arm/cpu.c |
58 | +++ b/hw/dma/xlnx_csu_dma.c | 31 | +++ b/target/arm/cpu.c |
59 | @@ -XXX,XX +XXX,XX @@ static uint64_t addr_msb_pre_write(RegisterInfo *reg, uint64_t val) | 32 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) |
60 | return val & R_ADDR_MSB_ADDR_MSB_MASK; | 33 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); |
34 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
35 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
36 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32); | ||
37 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); | ||
38 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
39 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); | ||
40 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); | ||
41 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/arm/tcg/vec_helper.c | ||
44 | +++ b/target/arm/tcg/vec_helper.c | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
46 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
47 | |||
48 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
49 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); | ||
50 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); | ||
61 | } | 51 | } |
62 | 52 | ||
63 | +static MemTxResult xlnx_csu_dma_class_read(XlnxCSUDMA *s, hwaddr addr, | 53 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
64 | + uint32_t len) | 54 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, |
65 | +{ | 55 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; |
66 | + RegisterInfo *reg = &s->regs_info[R_SIZE]; | 56 | |
67 | + uint64_t we = MAKE_64BIT_MASK(0, 4 * 8); | 57 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, |
68 | + | 58 | - get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32)); |
69 | + s->regs[R_ADDR] = addr; | 59 | + get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); |
70 | + s->regs[R_ADDR_MSB] = (uint64_t)addr >> 32; | ||
71 | + | ||
72 | + register_write(reg, len, we, object_get_typename(OBJECT(s)), false); | ||
73 | + | ||
74 | + return (s->regs[R_SIZE] == 0) ? MEMTX_OK : MEMTX_ERROR; | ||
75 | +} | ||
76 | + | ||
77 | static const RegisterAccessInfo *xlnx_csu_dma_regs_info[] = { | ||
78 | #define DMACH_REGINFO(NAME, snd) \ | ||
79 | (const RegisterAccessInfo []) { \ | ||
80 | @@ -XXX,XX +XXX,XX @@ static void xlnx_csu_dma_class_init(ObjectClass *klass, void *data) | ||
81 | { | ||
82 | DeviceClass *dc = DEVICE_CLASS(klass); | ||
83 | StreamSinkClass *ssc = STREAM_SINK_CLASS(klass); | ||
84 | + XlnxCSUDMAClass *xcdc = XLNX_CSU_DMA_CLASS(klass); | ||
85 | |||
86 | dc->reset = xlnx_csu_dma_reset; | ||
87 | dc->realize = xlnx_csu_dma_realize; | ||
88 | @@ -XXX,XX +XXX,XX @@ static void xlnx_csu_dma_class_init(ObjectClass *klass, void *data) | ||
89 | |||
90 | ssc->push = xlnx_csu_dma_stream_push; | ||
91 | ssc->can_push = xlnx_csu_dma_stream_can_push; | ||
92 | + | ||
93 | + xcdc->read = xlnx_csu_dma_class_read; | ||
94 | } | 60 | } |
95 | 61 | ||
96 | static void xlnx_csu_dma_init(Object *obj) | 62 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
63 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
64 | index XXXXXXX..XXXXXXX 100644 | ||
65 | --- a/target/arm/vfp_helper.c | ||
66 | +++ b/target/arm/vfp_helper.c | ||
67 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
68 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
69 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); | ||
70 | /* FZ16 does not generate an input denormal exception. */ | ||
71 | - a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32) | ||
72 | + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16]) | ||
73 | & ~float_flag_input_denormal_flushed); | ||
74 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) | ||
75 | & ~float_flag_input_denormal_flushed); | ||
76 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
77 | */ | ||
78 | set_float_exception_flags(0, &env->vfp.fp_status_a32); | ||
79 | set_float_exception_flags(0, &env->vfp.fp_status_a64); | ||
80 | - set_float_exception_flags(0, &env->vfp.fp_status_f16_a32); | ||
81 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); | ||
82 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | ||
83 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
84 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); | ||
85 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
86 | } | ||
87 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
88 | set_float_rounding_mode(i, &env->vfp.fp_status_a64); | ||
89 | - set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32); | ||
90 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); | ||
91 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
92 | } | ||
93 | if (changed & FPCR_FZ16) { | ||
94 | bool ftz_enabled = val & FPCR_FZ16; | ||
95 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
96 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
97 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
98 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
99 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
100 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32); | ||
101 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
102 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
103 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); | ||
104 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
105 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
106 | bool dnan_enabled = val & FPCR_DN; | ||
107 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
108 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
109 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32); | ||
110 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
111 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
112 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | ||
113 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); | ||
114 | @@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ | ||
115 | softfloat_to_vfp_compare(env, \ | ||
116 | FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ | ||
117 | } | ||
118 | -DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16_a32) | ||
119 | +DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) | ||
120 | DO_VFP_cmp(s, float32, float32, fp_status_a32) | ||
121 | DO_VFP_cmp(d, float64, float64, fp_status_a32) | ||
122 | #undef DO_VFP_cmp | ||
97 | -- | 123 | -- |
98 | 2.25.1 | 124 | 2.34.1 |
99 | 125 | ||
100 | 126 | diff view generated by jsdifflib |
1 | From: Francisco Iglesias <francisco.iglesias@xilinx.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Add in the missing includes in the header for being able to build the DMA | 3 | Replace with fp_status[FPST_A64]. |
4 | model when reusing it. | ||
5 | 4 | ||
6 | Signed-off-by: Francisco Iglesias <francisco.iglesias@xilinx.com> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
8 | Reviewed-by: Luc Michel <luc@lmichel.fr> | 7 | Message-id: 20250129013857.135256-14-richard.henderson@linaro.org |
9 | Message-id: 20220121161141.14389-5-francisco.iglesias@xilinx.com | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
11 | --- | 9 | --- |
12 | include/hw/dma/xlnx_csu_dma.h | 5 +++++ | 10 | target/arm/cpu.h | 1 - |
13 | 1 file changed, 5 insertions(+) | 11 | target/arm/cpu.c | 2 +- |
12 | target/arm/tcg/sme_helper.c | 2 +- | ||
13 | target/arm/tcg/vec_helper.c | 10 +++++----- | ||
14 | target/arm/vfp_helper.c | 16 ++++++++-------- | ||
15 | 5 files changed, 15 insertions(+), 16 deletions(-) | ||
14 | 16 | ||
15 | diff --git a/include/hw/dma/xlnx_csu_dma.h b/include/hw/dma/xlnx_csu_dma.h | 17 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
16 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/include/hw/dma/xlnx_csu_dma.h | 19 | --- a/target/arm/cpu.h |
18 | +++ b/include/hw/dma/xlnx_csu_dma.h | 20 | +++ b/target/arm/cpu.h |
19 | @@ -XXX,XX +XXX,XX @@ | 21 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
20 | #ifndef XLNX_CSU_DMA_H | 22 | float_status fp_status[FPST_COUNT]; |
21 | #define XLNX_CSU_DMA_H | 23 | struct { |
22 | 24 | float_status fp_status_a32; | |
23 | +#include "hw/sysbus.h" | 25 | - float_status fp_status_a64; |
24 | +#include "hw/register.h" | 26 | }; |
25 | +#include "hw/ptimer.h" | 27 | }; |
26 | +#include "hw/stream.h" | 28 | |
27 | + | 29 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
28 | #define TYPE_XLNX_CSU_DMA "xlnx.csu_dma" | 30 | index XXXXXXX..XXXXXXX 100644 |
29 | 31 | --- a/target/arm/cpu.c | |
30 | #define XLNX_CSU_DMA_R_MAX (0x2c / 4) | 32 | +++ b/target/arm/cpu.c |
33 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
34 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); | ||
35 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); | ||
36 | arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | ||
37 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
38 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | ||
39 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
40 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); | ||
41 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
42 | diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/arm/tcg/sme_helper.c | ||
45 | +++ b/target/arm/tcg/sme_helper.c | ||
46 | @@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, | ||
47 | * round-to-odd -- see above. | ||
48 | */ | ||
49 | fpst_f16 = env->vfp.fp_status[FPST_A64_F16]; | ||
50 | - fpst_std = env->vfp.fp_status_a64; | ||
51 | + fpst_std = env->vfp.fp_status[FPST_A64]; | ||
52 | set_default_nan_mode(true, &fpst_std); | ||
53 | set_default_nan_mode(true, &fpst_f16); | ||
54 | fpst_odd = fpst_std; | ||
55 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/arm/tcg/vec_helper.c | ||
58 | +++ b/target/arm/tcg/vec_helper.c | ||
59 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
60 | negx = 0x8000800080008000ull; | ||
61 | } | ||
62 | } | ||
63 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
64 | + do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
65 | get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
66 | } | ||
67 | |||
68 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
69 | intptr_t i, oprsz = simd_oprsz(desc); | ||
70 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
71 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
72 | - float_status *status = &env->vfp.fp_status_a64; | ||
73 | + float_status *status = &env->vfp.fp_status[FPST_A64]; | ||
74 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
75 | int negx = 0, negf = 0; | ||
76 | |||
77 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, | ||
78 | negx = 0x8000800080008000ull; | ||
79 | } | ||
80 | } | ||
81 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, negx, negf, desc, | ||
82 | + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
83 | get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); | ||
84 | } | ||
85 | |||
86 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
87 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
88 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
89 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | ||
90 | - float_status *status = &env->vfp.fp_status_a64; | ||
91 | + float_status *status = &env->vfp.fp_status[FPST_A64]; | ||
92 | bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
93 | int negx = 0, negf = 0; | ||
94 | |||
95 | @@ -XXX,XX +XXX,XX @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp) | ||
96 | */ | ||
97 | bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF; | ||
98 | |||
99 | - *statusp = is_a64(env) ? env->vfp.fp_status_a64 : env->vfp.fp_status_a32; | ||
100 | + *statusp = is_a64(env) ? env->vfp.fp_status[FPST_A64] : env->vfp.fp_status_a32; | ||
101 | set_default_nan_mode(true, statusp); | ||
102 | |||
103 | if (ebf) { | ||
104 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
105 | index XXXXXXX..XXXXXXX 100644 | ||
106 | --- a/target/arm/vfp_helper.c | ||
107 | +++ b/target/arm/vfp_helper.c | ||
108 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
109 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) | ||
110 | & ~float_flag_input_denormal_flushed); | ||
111 | |||
112 | - a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64); | ||
113 | + a64_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A64]); | ||
114 | a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16]) | ||
115 | & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); | ||
116 | /* | ||
117 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
118 | * be the architecturally up-to-date exception flag information first. | ||
119 | */ | ||
120 | set_float_exception_flags(0, &env->vfp.fp_status_a32); | ||
121 | - set_float_exception_flags(0, &env->vfp.fp_status_a64); | ||
122 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]); | ||
123 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); | ||
124 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | ||
125 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); | ||
126 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
127 | break; | ||
128 | } | ||
129 | set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
130 | - set_float_rounding_mode(i, &env->vfp.fp_status_a64); | ||
131 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]); | ||
132 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); | ||
133 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
134 | } | ||
135 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
136 | if (changed & FPCR_FZ) { | ||
137 | bool ftz_enabled = val & FPCR_FZ; | ||
138 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
139 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64); | ||
140 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]); | ||
141 | /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ | ||
142 | set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
143 | } | ||
144 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
145 | */ | ||
146 | bool fitz_enabled = (val & FPCR_FIZ) || | ||
147 | (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ; | ||
148 | - set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status_a64); | ||
149 | + set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_A64]); | ||
150 | } | ||
151 | if (changed & FPCR_DN) { | ||
152 | bool dnan_enabled = val & FPCR_DN; | ||
153 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
154 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64); | ||
155 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]); | ||
156 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
157 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
158 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); | ||
159 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
160 | |||
161 | if (ah_enabled) { | ||
162 | /* Change behaviours for A64 FP operations */ | ||
163 | - arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64); | ||
164 | + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | ||
165 | arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
166 | } else { | ||
167 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a64); | ||
168 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | ||
169 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); | ||
170 | } | ||
171 | } | ||
31 | -- | 172 | -- |
32 | 2.25.1 | 173 | 2.34.1 |
33 | 174 | ||
34 | 175 | diff view generated by jsdifflib |
1 | The exynos4210_uart_post_load() function assumes that it is passed | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | the Exynos4210UartState, but it has been attached to the | ||
3 | VMStateDescription for the Exynos4210UartFIFO type. The result is a | ||
4 | SIGSEGV when attempting to load VM state for any machine type | ||
5 | including this device. | ||
6 | 2 | ||
7 | Fix the bug by attaching the post-load function to the VMSD for the | 3 | Replace with fp_status[FPST_A32]. As this was the last of the |
8 | Exynos4210UartState. This is the logical place for it, because the | 4 | old structures, we can remove the anonymous union and struct. |
9 | actions it does relate to the entire UART state, not just the FIFO. | ||
10 | 5 | ||
11 | Thanks to the bug reporter @TrungNguyen1909 for the clear bug | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
12 | description and the suggested fix. | 7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
8 | Message-id: 20250129013857.135256-15-richard.henderson@linaro.org | ||
9 | [PMM: tweak to account for change to is_ebf()] | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | --- | ||
12 | target/arm/cpu.h | 7 +------ | ||
13 | target/arm/cpu.c | 2 +- | ||
14 | target/arm/tcg/vec_helper.c | 2 +- | ||
15 | target/arm/vfp_helper.c | 18 +++++++++--------- | ||
16 | 4 files changed, 12 insertions(+), 17 deletions(-) | ||
13 | 17 | ||
14 | Fixes: c9d3396d80fe7ece9b | 18 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
15 | ("hw/char/exynos4210_uart: Implement post_load function") | ||
16 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/638 | ||
17 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
18 | Reviewed-by: Guenter Roeck <linux@roeck-us.net> | ||
19 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
20 | Message-id: 20220120151648.433736-1-peter.maydell@linaro.org | ||
21 | --- | ||
22 | hw/char/exynos4210_uart.c | 2 +- | ||
23 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
24 | |||
25 | diff --git a/hw/char/exynos4210_uart.c b/hw/char/exynos4210_uart.c | ||
26 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
27 | --- a/hw/char/exynos4210_uart.c | 20 | --- a/target/arm/cpu.h |
28 | +++ b/hw/char/exynos4210_uart.c | 21 | +++ b/target/arm/cpu.h |
29 | @@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_exynos4210_uart_fifo = { | 22 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { |
30 | .name = "exynos4210.uart.fifo", | 23 | uint32_t scratch[8]; |
31 | .version_id = 1, | 24 | |
32 | .minimum_version_id = 1, | 25 | /* There are a number of distinct float control structures. */ |
33 | - .post_load = exynos4210_uart_post_load, | 26 | - union { |
34 | .fields = (VMStateField[]) { | 27 | - float_status fp_status[FPST_COUNT]; |
35 | VMSTATE_UINT32(sp, Exynos4210UartFIFO), | 28 | - struct { |
36 | VMSTATE_UINT32(rp, Exynos4210UartFIFO), | 29 | - float_status fp_status_a32; |
37 | @@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_exynos4210_uart = { | 30 | - }; |
38 | .name = "exynos4210.uart", | 31 | - }; |
39 | .version_id = 1, | 32 | + float_status fp_status[FPST_COUNT]; |
40 | .minimum_version_id = 1, | 33 | |
41 | + .post_load = exynos4210_uart_post_load, | 34 | uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ |
42 | .fields = (VMStateField[]) { | 35 | uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ |
43 | VMSTATE_STRUCT(rx, Exynos4210UartState, 1, | 36 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c |
44 | vmstate_exynos4210_uart_fifo, Exynos4210UartFIFO), | 37 | index XXXXXXX..XXXXXXX 100644 |
38 | --- a/target/arm/cpu.c | ||
39 | +++ b/target/arm/cpu.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
41 | set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]); | ||
42 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); | ||
43 | set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); | ||
44 | - arm_set_default_fp_behaviours(&env->vfp.fp_status_a32); | ||
45 | + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32]); | ||
46 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); | ||
47 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); | ||
48 | arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); | ||
49 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/target/arm/tcg/vec_helper.c | ||
52 | +++ b/target/arm/tcg/vec_helper.c | ||
53 | @@ -XXX,XX +XXX,XX @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp) | ||
54 | */ | ||
55 | bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF; | ||
56 | |||
57 | - *statusp = is_a64(env) ? env->vfp.fp_status[FPST_A64] : env->vfp.fp_status_a32; | ||
58 | + *statusp = env->vfp.fp_status[is_a64(env) ? FPST_A64 : FPST_A32]; | ||
59 | set_default_nan_mode(true, statusp); | ||
60 | |||
61 | if (ebf) { | ||
62 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/arm/vfp_helper.c | ||
65 | +++ b/target/arm/vfp_helper.c | ||
66 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env) | ||
67 | { | ||
68 | uint32_t a32_flags = 0, a64_flags = 0; | ||
69 | |||
70 | - a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32); | ||
71 | + a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A32]); | ||
72 | a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); | ||
73 | /* FZ16 does not generate an input denormal exception. */ | ||
74 | a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16]) | ||
75 | @@ -XXX,XX +XXX,XX @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env) | ||
76 | * values. The caller should have arranged for env->vfp.fpsr to | ||
77 | * be the architecturally up-to-date exception flag information first. | ||
78 | */ | ||
79 | - set_float_exception_flags(0, &env->vfp.fp_status_a32); | ||
80 | + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32]); | ||
81 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]); | ||
82 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); | ||
83 | set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); | ||
84 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
85 | i = float_round_to_zero; | ||
86 | break; | ||
87 | } | ||
88 | - set_float_rounding_mode(i, &env->vfp.fp_status_a32); | ||
89 | + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32]); | ||
90 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]); | ||
91 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); | ||
92 | set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); | ||
93 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
94 | } | ||
95 | if (changed & FPCR_FZ) { | ||
96 | bool ftz_enabled = val & FPCR_FZ; | ||
97 | - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
98 | + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); | ||
99 | set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]); | ||
100 | /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ | ||
101 | - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32); | ||
102 | + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); | ||
103 | } | ||
104 | if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { | ||
105 | /* | ||
106 | @@ -XXX,XX +XXX,XX @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) | ||
107 | } | ||
108 | if (changed & FPCR_DN) { | ||
109 | bool dnan_enabled = val & FPCR_DN; | ||
110 | - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32); | ||
111 | + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32]); | ||
112 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]); | ||
113 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); | ||
114 | set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); | ||
115 | @@ -XXX,XX +XXX,XX @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ | ||
116 | FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ | ||
117 | } | ||
118 | DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) | ||
119 | -DO_VFP_cmp(s, float32, float32, fp_status_a32) | ||
120 | -DO_VFP_cmp(d, float64, float64, fp_status_a32) | ||
121 | +DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32]) | ||
122 | +DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32]) | ||
123 | #undef DO_VFP_cmp | ||
124 | |||
125 | /* Integer to float and float to integer conversions */ | ||
126 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(fjcvtzs)(float64 value, float_status *status) | ||
127 | |||
128 | uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env) | ||
129 | { | ||
130 | - uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status_a32); | ||
131 | + uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status[FPST_A32]); | ||
132 | uint32_t result = pair; | ||
133 | uint32_t z = (pair >> 32) == 0; | ||
134 | |||
45 | -- | 135 | -- |
46 | 2.25.1 | 136 | 2.34.1 |
47 | 137 | ||
48 | 138 | diff view generated by jsdifflib |
1 | In commit d5093d961585f02 we added a VMStateDescription to | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | the TYPE_ARMV7M object, to handle migration of its Clocks. | ||
3 | However a cut-and-paste error meant we used the wrong struct | ||
4 | name in the VMSTATE_CLOCK() macro arguments. The result was | ||
5 | that attempting a 'savevm' might result in an assertion | ||
6 | failure. | ||
7 | 2 | ||
8 | Cc: qemu-stable@nongnu.org | 3 | Select on index instead of pointer. |
9 | Buglink: https://gitlab.com/qemu-project/qemu/-/issues/803 | 4 | No functional change. |
10 | Fixes: d5093d961585f02 | 5 | |
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
8 | Message-id: 20250129013857.135256-16-richard.henderson@linaro.org | ||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
12 | Reviewed-by: Ani Sinha <ani@anisinha.ca> | ||
13 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
14 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
15 | Message-id: 20220120151609.433555-1-peter.maydell@linaro.org | ||
16 | --- | 10 | --- |
17 | hw/arm/armv7m.c | 4 ++-- | 11 | target/arm/tcg/mve_helper.c | 40 +++++++++++++------------------------ |
18 | 1 file changed, 2 insertions(+), 2 deletions(-) | 12 | 1 file changed, 14 insertions(+), 26 deletions(-) |
19 | 13 | ||
20 | diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c | 14 | diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c |
21 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/hw/arm/armv7m.c | 16 | --- a/target/arm/tcg/mve_helper.c |
23 | +++ b/hw/arm/armv7m.c | 17 | +++ b/target/arm/tcg/mve_helper.c |
24 | @@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_armv7m = { | 18 | @@ -XXX,XX +XXX,XX @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) |
25 | .version_id = 1, | 19 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ |
26 | .minimum_version_id = 1, | 20 | continue; \ |
27 | .fields = (VMStateField[]) { | 21 | } \ |
28 | - VMSTATE_CLOCK(refclk, SysTickState), | 22 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ |
29 | - VMSTATE_CLOCK(cpuclk, SysTickState), | 23 | - &env->vfp.fp_status[FPST_STD]; \ |
30 | + VMSTATE_CLOCK(refclk, ARMv7MState), | 24 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ |
31 | + VMSTATE_CLOCK(cpuclk, ARMv7MState), | 25 | if (!(mask & 1)) { \ |
32 | VMSTATE_END_OF_LIST() | 26 | /* We need the result but without updating flags */ \ |
33 | } | 27 | scratch_fpst = *fpst; \ |
34 | }; | 28 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ALL(vminnma, minnuma) |
29 | r[e] = 0; \ | ||
30 | continue; \ | ||
31 | } \ | ||
32 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
33 | - &env->vfp.fp_status[FPST_STD]; \ | ||
34 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
35 | if (!(tm & 1)) { \ | ||
36 | /* We need the result but without updating flags */ \ | ||
37 | scratch_fpst = *fpst; \ | ||
38 | @@ -XXX,XX +XXX,XX @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) | ||
39 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
40 | continue; \ | ||
41 | } \ | ||
42 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
43 | - &env->vfp.fp_status[FPST_STD]; \ | ||
44 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
45 | if (!(mask & 1)) { \ | ||
46 | /* We need the result but without updating flags */ \ | ||
47 | scratch_fpst = *fpst; \ | ||
48 | @@ -XXX,XX +XXX,XX @@ DO_VFMA(vfmss, 4, float32, true) | ||
49 | if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \ | ||
50 | continue; \ | ||
51 | } \ | ||
52 | - fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
53 | - &env->vfp.fp_status[FPST_STD]; \ | ||
54 | + fpst0 = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
55 | fpst1 = fpst0; \ | ||
56 | if (!(mask & 1)) { \ | ||
57 | scratch_fpst = *fpst0; \ | ||
58 | @@ -XXX,XX +XXX,XX @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) | ||
59 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
60 | continue; \ | ||
61 | } \ | ||
62 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
63 | - &env->vfp.fp_status[FPST_STD]; \ | ||
64 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
65 | if (!(mask & 1)) { \ | ||
66 | /* We need the result but without updating flags */ \ | ||
67 | scratch_fpst = *fpst; \ | ||
68 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) | ||
69 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
70 | continue; \ | ||
71 | } \ | ||
72 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
73 | - &env->vfp.fp_status[FPST_STD]; \ | ||
74 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
75 | if (!(mask & 1)) { \ | ||
76 | /* We need the result but without updating flags */ \ | ||
77 | scratch_fpst = *fpst; \ | ||
78 | @@ -XXX,XX +XXX,XX @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) | ||
79 | unsigned e; \ | ||
80 | TYPE *m = vm; \ | ||
81 | TYPE ra = (TYPE)ra_in; \ | ||
82 | - float_status *fpst = (ESIZE == 2) ? \ | ||
83 | - &env->vfp.fp_status[FPST_STD_F16] : \ | ||
84 | - &env->vfp.fp_status[FPST_STD]; \ | ||
85 | + float_status *fpst = \ | ||
86 | + &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
87 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
88 | if (mask & 1) { \ | ||
89 | TYPE v = m[H##ESIZE(e)]; \ | ||
90 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
91 | if ((mask & emask) == 0) { \ | ||
92 | continue; \ | ||
93 | } \ | ||
94 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
95 | - &env->vfp.fp_status[FPST_STD]; \ | ||
96 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
97 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
98 | /* We need the result but without updating flags */ \ | ||
99 | scratch_fpst = *fpst; \ | ||
100 | @@ -XXX,XX +XXX,XX @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) | ||
101 | if ((mask & emask) == 0) { \ | ||
102 | continue; \ | ||
103 | } \ | ||
104 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
105 | - &env->vfp.fp_status[FPST_STD]; \ | ||
106 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
107 | if (!(mask & (1 << (e * ESIZE)))) { \ | ||
108 | /* We need the result but without updating flags */ \ | ||
109 | scratch_fpst = *fpst; \ | ||
110 | @@ -XXX,XX +XXX,XX @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) | ||
111 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
112 | continue; \ | ||
113 | } \ | ||
114 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
115 | - &env->vfp.fp_status[FPST_STD]; \ | ||
116 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
117 | if (!(mask & 1)) { \ | ||
118 | /* We need the result but without updating flags */ \ | ||
119 | scratch_fpst = *fpst; \ | ||
120 | @@ -XXX,XX +XXX,XX @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) | ||
121 | unsigned e; \ | ||
122 | float_status *fpst; \ | ||
123 | float_status scratch_fpst; \ | ||
124 | - float_status *base_fpst = (ESIZE == 2) ? \ | ||
125 | - &env->vfp.fp_status[FPST_STD_F16] : \ | ||
126 | - &env->vfp.fp_status[FPST_STD]; \ | ||
127 | + float_status *base_fpst = \ | ||
128 | + &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
129 | uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ | ||
130 | set_float_rounding_mode(rmode, base_fpst); \ | ||
131 | for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ | ||
132 | @@ -XXX,XX +XXX,XX @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) | ||
133 | if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ | ||
134 | continue; \ | ||
135 | } \ | ||
136 | - fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \ | ||
137 | - &env->vfp.fp_status[FPST_STD]; \ | ||
138 | + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ | ||
139 | if (!(mask & 1)) { \ | ||
140 | /* We need the result but without updating flags */ \ | ||
141 | scratch_fpst = *fpst; \ | ||
35 | -- | 142 | -- |
36 | 2.25.1 | 143 | 2.34.1 |
37 | 144 | ||
38 | 145 | diff view generated by jsdifflib |
1 | The list of #defines for the ITS command packet numbers is neither | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | in alphabetical nor numeric order. Sort it into numeric order. | ||
3 | 2 | ||
3 | Pass ARMFPStatusFlavour index instead of fp_status[FOO]. | ||
4 | |||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Message-id: 20250129013857.135256-17-richard.henderson@linaro.org | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
5 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-id: 20220122182444.724087-8-peter.maydell@linaro.org | ||
8 | --- | 9 | --- |
9 | hw/intc/gicv3_internal.h | 10 +++++----- | 10 | target/arm/vfp_helper.c | 10 +++++----- |
10 | 1 file changed, 5 insertions(+), 5 deletions(-) | 11 | 1 file changed, 5 insertions(+), 5 deletions(-) |
11 | 12 | ||
12 | diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h | 13 | diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c |
13 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/hw/intc/gicv3_internal.h | 15 | --- a/target/arm/vfp_helper.c |
15 | +++ b/hw/intc/gicv3_internal.h | 16 | +++ b/target/arm/vfp_helper.c |
16 | @@ -XXX,XX +XXX,XX @@ FIELD(GITS_TYPER, CIL, 36, 1) | 17 | @@ -XXX,XX +XXX,XX @@ static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp) |
17 | #define CMD_MASK 0xff | 18 | void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ |
18 | 19 | { \ | |
19 | /* ITS Commands */ | 20 | softfloat_to_vfp_compare(env, \ |
20 | -#define GITS_CMD_CLEAR 0x04 | 21 | - FLOATTYPE ## _compare_quiet(a, b, &env->vfp.FPST)); \ |
21 | -#define GITS_CMD_DISCARD 0x0F | 22 | + FLOATTYPE ## _compare_quiet(a, b, &env->vfp.fp_status[FPST])); \ |
22 | #define GITS_CMD_INT 0x03 | 23 | } \ |
23 | -#define GITS_CMD_MAPC 0x09 | 24 | void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ |
24 | +#define GITS_CMD_CLEAR 0x04 | 25 | { \ |
25 | +#define GITS_CMD_SYNC 0x05 | 26 | softfloat_to_vfp_compare(env, \ |
26 | #define GITS_CMD_MAPD 0x08 | 27 | - FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ |
27 | -#define GITS_CMD_MAPI 0x0B | 28 | + FLOATTYPE ## _compare(a, b, &env->vfp.fp_status[FPST])); \ |
28 | +#define GITS_CMD_MAPC 0x09 | 29 | } |
29 | #define GITS_CMD_MAPTI 0x0A | 30 | -DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16]) |
30 | +#define GITS_CMD_MAPI 0x0B | 31 | -DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32]) |
31 | #define GITS_CMD_INV 0x0C | 32 | -DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32]) |
32 | #define GITS_CMD_INVALL 0x0D | 33 | +DO_VFP_cmp(h, float16, dh_ctype_f16, FPST_A32_F16) |
33 | -#define GITS_CMD_SYNC 0x05 | 34 | +DO_VFP_cmp(s, float32, float32, FPST_A32) |
34 | +#define GITS_CMD_DISCARD 0x0F | 35 | +DO_VFP_cmp(d, float64, float64, FPST_A32) |
35 | 36 | #undef DO_VFP_cmp | |
36 | /* MAPC command fields */ | 37 | |
37 | #define ICID_LENGTH 16 | 38 | /* Integer to float and float to integer conversions */ |
38 | -- | 39 | -- |
39 | 2.25.1 | 40 | 2.34.1 |
40 | 41 | ||
41 | 42 | diff view generated by jsdifflib |
1 | From: Francisco Iglesias <francisco.iglesias@xilinx.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Connect Versal's PMC SLCR (system-level control registers) model. | 3 | Read the bit from the source, rather than from the proxy via |
4 | get_flush_inputs_to_zero. This makes it clear that it does | ||
5 | not matter which of the float_status structures is used. | ||
4 | 6 | ||
5 | Signed-off-by: Francisco Iglesias <francisco.iglesias@xilinx.com> | 7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Reviewed-by: Luc Michel <luc@lmichel.fr> | 8 | Message-id: 20250129013857.135256-34-richard.henderson@linaro.org |
7 | Message-id: 20220121161141.14389-4-francisco.iglesias@xilinx.com | 9 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
9 | --- | 11 | --- |
10 | include/hw/arm/xlnx-versal.h | 5 +++ | 12 | target/arm/tcg/vec_helper.c | 12 ++++++------ |
11 | hw/arm/xlnx-versal.c | 71 +++++++++++++++++++++++++++++++++++- | 13 | 1 file changed, 6 insertions(+), 6 deletions(-) |
12 | 2 files changed, 75 insertions(+), 1 deletion(-) | ||
13 | 14 | ||
14 | diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h | 15 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
15 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/include/hw/arm/xlnx-versal.h | 17 | --- a/target/arm/tcg/vec_helper.c |
17 | +++ b/include/hw/arm/xlnx-versal.h | 18 | +++ b/target/arm/tcg/vec_helper.c |
18 | @@ -XXX,XX +XXX,XX @@ | 19 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, |
19 | #include "hw/misc/xlnx-versal-xramc.h" | 20 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; |
20 | #include "hw/nvram/xlnx-bbram.h" | 21 | |
21 | #include "hw/nvram/xlnx-versal-efuse.h" | 22 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, |
22 | +#include "hw/misc/xlnx-versal-pmc-iou-slcr.h" | 23 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); |
23 | 24 | + env->vfp.fpcr & FPCR_FZ16); | |
24 | #define TYPE_XLNX_VERSAL "xlnx-versal" | ||
25 | OBJECT_DECLARE_SIMPLE_TYPE(Versal, XLNX_VERSAL) | ||
26 | @@ -XXX,XX +XXX,XX @@ struct Versal { | ||
27 | struct { | ||
28 | struct { | ||
29 | SDHCIState sd[XLNX_VERSAL_NR_SDS]; | ||
30 | + XlnxVersalPmcIouSlcr slcr; | ||
31 | } iou; | ||
32 | |||
33 | XlnxZynqMPRTC rtc; | ||
34 | @@ -XXX,XX +XXX,XX @@ struct Versal { | ||
35 | #define MM_FPD_FPD_APU 0xfd5c0000 | ||
36 | #define MM_FPD_FPD_APU_SIZE 0x100 | ||
37 | |||
38 | +#define MM_PMC_PMC_IOU_SLCR 0xf1060000 | ||
39 | +#define MM_PMC_PMC_IOU_SLCR_SIZE 0x10000 | ||
40 | + | ||
41 | #define MM_PMC_SD0 0xf1040000U | ||
42 | #define MM_PMC_SD0_SIZE 0x10000 | ||
43 | #define MM_PMC_BBRAM_CTRL 0xf11f0000 | ||
44 | diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c | ||
45 | index XXXXXXX..XXXXXXX 100644 | ||
46 | --- a/hw/arm/xlnx-versal.c | ||
47 | +++ b/hw/arm/xlnx-versal.c | ||
48 | @@ -XXX,XX +XXX,XX @@ | ||
49 | #include "kvm_arm.h" | ||
50 | #include "hw/misc/unimp.h" | ||
51 | #include "hw/arm/xlnx-versal.h" | ||
52 | +#include "qemu/log.h" | ||
53 | +#include "hw/sysbus.h" | ||
54 | |||
55 | #define XLNX_VERSAL_ACPU_TYPE ARM_CPU_TYPE_NAME("cortex-a72") | ||
56 | #define GEM_REVISION 0x40070106 | ||
57 | |||
58 | -#define VERSAL_NUM_PMC_APB_IRQS 2 | ||
59 | +#define VERSAL_NUM_PMC_APB_IRQS 3 | ||
60 | |||
61 | static void versal_create_apu_cpus(Versal *s) | ||
62 | { | ||
63 | @@ -XXX,XX +XXX,XX @@ static void versal_create_pmc_apb_irq_orgate(Versal *s, qemu_irq *pic) | ||
64 | * models: | ||
65 | * - RTC | ||
66 | * - BBRAM | ||
67 | + * - PMC SLCR | ||
68 | */ | ||
69 | object_initialize_child(OBJECT(s), "pmc-apb-irq-orgate", | ||
70 | &s->pmc.apb_irq_orgate, TYPE_OR_IRQ); | ||
71 | @@ -XXX,XX +XXX,XX @@ static void versal_create_efuse(Versal *s, qemu_irq *pic) | ||
72 | sysbus_connect_irq(SYS_BUS_DEVICE(ctrl), 0, pic[VERSAL_EFUSE_IRQ]); | ||
73 | } | 25 | } |
74 | 26 | ||
75 | +static void versal_create_pmc_iou_slcr(Versal *s, qemu_irq *pic) | 27 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
76 | +{ | 28 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, |
77 | + SysBusDevice *sbd; | 29 | } |
78 | + | 30 | } |
79 | + object_initialize_child(OBJECT(s), "versal-pmc-iou-slcr", &s->pmc.iou.slcr, | 31 | do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, |
80 | + TYPE_XILINX_VERSAL_PMC_IOU_SLCR); | 32 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); |
81 | + | 33 | + env->vfp.fpcr & FPCR_FZ16); |
82 | + sbd = SYS_BUS_DEVICE(&s->pmc.iou.slcr); | ||
83 | + sysbus_realize(sbd, &error_fatal); | ||
84 | + | ||
85 | + memory_region_add_subregion(&s->mr_ps, MM_PMC_PMC_IOU_SLCR, | ||
86 | + sysbus_mmio_get_region(sbd, 0)); | ||
87 | + | ||
88 | + sysbus_connect_irq(sbd, 0, | ||
89 | + qdev_get_gpio_in(DEVICE(&s->pmc.apb_irq_orgate), 2)); | ||
90 | +} | ||
91 | + | ||
92 | /* This takes the board allocated linear DDR memory and creates aliases | ||
93 | * for each split DDR range/aperture on the Versal address map. | ||
94 | */ | ||
95 | @@ -XXX,XX +XXX,XX @@ static void versal_unimp_area(Versal *s, const char *name, | ||
96 | memory_region_add_subregion(mr, base, mr_dev); | ||
97 | } | 34 | } |
98 | 35 | ||
99 | +static void versal_unimp_sd_emmc_sel(void *opaque, int n, int level) | 36 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
100 | +{ | 37 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, |
101 | + qemu_log_mask(LOG_UNIMP, | 38 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
102 | + "Selecting between enabling SD mode or eMMC mode on " | 39 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); |
103 | + "controller %d is not yet implemented\n", n); | 40 | float_status *status = &env->vfp.fp_status[FPST_A64]; |
104 | +} | 41 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); |
105 | + | 42 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; |
106 | +static void versal_unimp_qspi_ospi_mux_sel(void *opaque, int n, int level) | 43 | int negx = 0, negf = 0; |
107 | +{ | 44 | |
108 | + qemu_log_mask(LOG_UNIMP, | 45 | if (is_s) { |
109 | + "Selecting between enabling the QSPI or OSPI linear address " | 46 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, |
110 | + "region is not yet implemented\n"); | 47 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; |
111 | +} | 48 | |
112 | + | 49 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, |
113 | +static void versal_unimp_irq_parity_imr(void *opaque, int n, int level) | 50 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16])); |
114 | +{ | 51 | + env->vfp.fpcr & FPCR_FZ16); |
115 | + qemu_log_mask(LOG_UNIMP, | ||
116 | + "PMC SLCR parity interrupt behaviour " | ||
117 | + "is not yet implemented\n"); | ||
118 | +} | ||
119 | + | ||
120 | static void versal_unimp(Versal *s) | ||
121 | { | ||
122 | + qemu_irq gpio_in; | ||
123 | + | ||
124 | versal_unimp_area(s, "psm", &s->mr_ps, | ||
125 | MM_PSM_START, MM_PSM_END - MM_PSM_START); | ||
126 | versal_unimp_area(s, "crl", &s->mr_ps, | ||
127 | @@ -XXX,XX +XXX,XX @@ static void versal_unimp(Versal *s) | ||
128 | MM_IOU_SCNTR, MM_IOU_SCNTR_SIZE); | ||
129 | versal_unimp_area(s, "iou-scntr-seucre", &s->mr_ps, | ||
130 | MM_IOU_SCNTRS, MM_IOU_SCNTRS_SIZE); | ||
131 | + | ||
132 | + qdev_init_gpio_in_named(DEVICE(s), versal_unimp_sd_emmc_sel, | ||
133 | + "sd-emmc-sel-dummy", 2); | ||
134 | + qdev_init_gpio_in_named(DEVICE(s), versal_unimp_qspi_ospi_mux_sel, | ||
135 | + "qspi-ospi-mux-sel-dummy", 1); | ||
136 | + qdev_init_gpio_in_named(DEVICE(s), versal_unimp_irq_parity_imr, | ||
137 | + "irq-parity-imr-dummy", 1); | ||
138 | + | ||
139 | + gpio_in = qdev_get_gpio_in_named(DEVICE(s), "sd-emmc-sel-dummy", 0); | ||
140 | + qdev_connect_gpio_out_named(DEVICE(&s->pmc.iou.slcr), "sd-emmc-sel", 0, | ||
141 | + gpio_in); | ||
142 | + | ||
143 | + gpio_in = qdev_get_gpio_in_named(DEVICE(s), "sd-emmc-sel-dummy", 1); | ||
144 | + qdev_connect_gpio_out_named(DEVICE(&s->pmc.iou.slcr), "sd-emmc-sel", 1, | ||
145 | + gpio_in); | ||
146 | + | ||
147 | + gpio_in = qdev_get_gpio_in_named(DEVICE(s), "qspi-ospi-mux-sel-dummy", 0); | ||
148 | + qdev_connect_gpio_out_named(DEVICE(&s->pmc.iou.slcr), | ||
149 | + "qspi-ospi-mux-sel", 0, | ||
150 | + gpio_in); | ||
151 | + | ||
152 | + gpio_in = qdev_get_gpio_in_named(DEVICE(s), "irq-parity-imr-dummy", 0); | ||
153 | + qdev_connect_gpio_out_named(DEVICE(&s->pmc.iou.slcr), | ||
154 | + SYSBUS_DEVICE_GPIO_IRQ, 0, | ||
155 | + gpio_in); | ||
156 | } | 52 | } |
157 | 53 | ||
158 | static void versal_realize(DeviceState *dev, Error **errp) | 54 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
159 | @@ -XXX,XX +XXX,XX @@ static void versal_realize(DeviceState *dev, Error **errp) | 55 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
160 | versal_create_xrams(s, pic); | 56 | } |
161 | versal_create_bbram(s, pic); | 57 | } |
162 | versal_create_efuse(s, pic); | 58 | do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, |
163 | + versal_create_pmc_iou_slcr(s, pic); | 59 | - get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16])); |
164 | versal_map_ddr(s); | 60 | + env->vfp.fpcr & FPCR_FZ16); |
165 | versal_unimp(s); | 61 | } |
166 | 62 | ||
63 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
64 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, | ||
65 | intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); | ||
66 | intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); | ||
67 | float_status *status = &env->vfp.fp_status[FPST_A64]; | ||
68 | - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]); | ||
69 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; | ||
70 | int negx = 0, negf = 0; | ||
71 | |||
72 | if (is_s) { | ||
167 | -- | 73 | -- |
168 | 2.25.1 | 74 | 2.34.1 |
169 | |||
170 | diff view generated by jsdifflib |
1 | From: Francisco Iglesias <francisco.iglesias@xilinx.com> | 1 | From: Richard Henderson <richard.henderson@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Add an orgate and 'or' the interrupts from the BBRAM and RTC models. | 3 | Sink common code from the callers into do_fmlal |
4 | and do_fmlal_idx. Reorder the arguments to minimize | ||
5 | the re-sorting from the caller's arguments. | ||
4 | 6 | ||
5 | Signed-off-by: Francisco Iglesias <francisco.iglesias@xilinx.com> | 7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | Message-id: 20250129013857.135256-35-richard.henderson@linaro.org | ||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 9 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
7 | Reviewed-by: Luc Michel <luc@lmichel.fr> | ||
8 | Message-id: 20220121161141.14389-3-francisco.iglesias@xilinx.com | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | --- | 11 | --- |
11 | include/hw/arm/xlnx-versal.h | 5 +++-- | 12 | target/arm/tcg/vec_helper.c | 28 ++++++++++++++++------------ |
12 | hw/arm/xlnx-versal-virt.c | 2 +- | 13 | 1 file changed, 16 insertions(+), 12 deletions(-) |
13 | hw/arm/xlnx-versal.c | 28 ++++++++++++++++++++++++++-- | ||
14 | 3 files changed, 30 insertions(+), 5 deletions(-) | ||
15 | 14 | ||
16 | diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h | 15 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c |
17 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/include/hw/arm/xlnx-versal.h | 17 | --- a/target/arm/tcg/vec_helper.c |
19 | +++ b/include/hw/arm/xlnx-versal.h | 18 | +++ b/target/arm/tcg/vec_helper.c |
20 | @@ -XXX,XX +XXX,XX @@ struct Versal { | 19 | @@ -XXX,XX +XXX,XX @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2) |
21 | XlnxEFuse efuse; | 20 | * as there is not yet SVE versions that might use blocking. |
22 | XlnxVersalEFuseCtrl efuse_ctrl; | 21 | */ |
23 | XlnxVersalEFuseCache efuse_cache; | 22 | |
24 | + | 23 | -static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, |
25 | + qemu_or_irq apb_irq_orgate; | 24 | - uint64_t negx, int negf, uint32_t desc, bool fz16) |
26 | } pmc; | 25 | +static void do_fmlal(float32 *d, void *vn, void *vm, |
27 | 26 | + CPUARMState *env, uint32_t desc, | |
28 | struct { | 27 | + ARMFPStatusFlavour fpst_idx, |
29 | @@ -XXX,XX +XXX,XX @@ struct Versal { | 28 | + uint64_t negx, int negf) |
30 | #define VERSAL_GEM1_WAKE_IRQ_0 59 | ||
31 | #define VERSAL_ADMA_IRQ_0 60 | ||
32 | #define VERSAL_XRAM_IRQ_0 79 | ||
33 | -#define VERSAL_BBRAM_APB_IRQ_0 121 | ||
34 | -#define VERSAL_RTC_APB_ERR_IRQ 121 | ||
35 | +#define VERSAL_PMC_APB_IRQ 121 | ||
36 | #define VERSAL_SD0_IRQ_0 126 | ||
37 | #define VERSAL_EFUSE_IRQ 139 | ||
38 | #define VERSAL_RTC_ALARM_IRQ 142 | ||
39 | diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/hw/arm/xlnx-versal-virt.c | ||
42 | +++ b/hw/arm/xlnx-versal-virt.c | ||
43 | @@ -XXX,XX +XXX,XX @@ static void fdt_add_bbram_node(VersalVirt *s) | ||
44 | qemu_fdt_add_subnode(s->fdt, name); | ||
45 | |||
46 | qemu_fdt_setprop_cells(s->fdt, name, "interrupts", | ||
47 | - GIC_FDT_IRQ_TYPE_SPI, VERSAL_BBRAM_APB_IRQ_0, | ||
48 | + GIC_FDT_IRQ_TYPE_SPI, VERSAL_PMC_APB_IRQ, | ||
49 | GIC_FDT_IRQ_FLAGS_LEVEL_HI); | ||
50 | qemu_fdt_setprop(s->fdt, name, "interrupt-names", | ||
51 | interrupt_names, sizeof(interrupt_names)); | ||
52 | diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c | ||
53 | index XXXXXXX..XXXXXXX 100644 | ||
54 | --- a/hw/arm/xlnx-versal.c | ||
55 | +++ b/hw/arm/xlnx-versal.c | ||
56 | @@ -XXX,XX +XXX,XX @@ | ||
57 | #define XLNX_VERSAL_ACPU_TYPE ARM_CPU_TYPE_NAME("cortex-a72") | ||
58 | #define GEM_REVISION 0x40070106 | ||
59 | |||
60 | +#define VERSAL_NUM_PMC_APB_IRQS 2 | ||
61 | + | ||
62 | static void versal_create_apu_cpus(Versal *s) | ||
63 | { | 29 | { |
64 | int i; | 30 | + float_status *fpst = &env->vfp.fp_status[fpst_idx]; |
65 | @@ -XXX,XX +XXX,XX @@ static void versal_create_sds(Versal *s, qemu_irq *pic) | 31 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; |
32 | intptr_t i, oprsz = simd_oprsz(desc); | ||
33 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); | ||
34 | int is_q = oprsz == 16; | ||
35 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, | ||
36 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); | ||
37 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; | ||
38 | |||
39 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
40 | - env->vfp.fpcr & FPCR_FZ16); | ||
41 | + do_fmlal(vd, vn, vm, env, desc, FPST_STD, negx, 0); | ||
42 | } | ||
43 | |||
44 | void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, | ||
46 | negx = 0x8000800080008000ull; | ||
47 | } | ||
48 | } | ||
49 | - do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, | ||
50 | - env->vfp.fpcr & FPCR_FZ16); | ||
51 | + do_fmlal(vd, vn, vm, env, desc, FPST_A64, negx, negf); | ||
52 | } | ||
53 | |||
54 | void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
55 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, | ||
66 | } | 56 | } |
67 | } | 57 | } |
68 | 58 | ||
69 | +static void versal_create_pmc_apb_irq_orgate(Versal *s, qemu_irq *pic) | 59 | -static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, |
70 | +{ | 60 | - uint64_t negx, int negf, uint32_t desc, bool fz16) |
71 | + DeviceState *orgate; | 61 | +static void do_fmlal_idx(float32 *d, void *vn, void *vm, |
72 | + | 62 | + CPUARMState *env, uint32_t desc, |
73 | + /* | 63 | + ARMFPStatusFlavour fpst_idx, |
74 | + * The VERSAL_PMC_APB_IRQ is an 'or' of the interrupts from the following | 64 | + uint64_t negx, int negf) |
75 | + * models: | ||
76 | + * - RTC | ||
77 | + * - BBRAM | ||
78 | + */ | ||
79 | + object_initialize_child(OBJECT(s), "pmc-apb-irq-orgate", | ||
80 | + &s->pmc.apb_irq_orgate, TYPE_OR_IRQ); | ||
81 | + orgate = DEVICE(&s->pmc.apb_irq_orgate); | ||
82 | + object_property_set_int(OBJECT(orgate), | ||
83 | + "num-lines", VERSAL_NUM_PMC_APB_IRQS, &error_fatal); | ||
84 | + qdev_realize(orgate, NULL, &error_fatal); | ||
85 | + qdev_connect_gpio_out(orgate, 0, pic[VERSAL_PMC_APB_IRQ]); | ||
86 | +} | ||
87 | + | ||
88 | static void versal_create_rtc(Versal *s, qemu_irq *pic) | ||
89 | { | 65 | { |
90 | SysBusDevice *sbd; | 66 | + float_status *fpst = &env->vfp.fp_status[fpst_idx]; |
91 | @@ -XXX,XX +XXX,XX @@ static void versal_create_rtc(Versal *s, qemu_irq *pic) | 67 | + bool fz16 = env->vfp.fpcr & FPCR_FZ16; |
92 | * TODO: Connect the ALARM and SECONDS interrupts once our RTC model | 68 | intptr_t i, oprsz = simd_oprsz(desc); |
93 | * supports them. | 69 | int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); |
94 | */ | 70 | int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3); |
95 | - sysbus_connect_irq(sbd, 1, pic[VERSAL_RTC_APB_ERR_IRQ]); | 71 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, |
96 | + sysbus_connect_irq(sbd, 1, | 72 | bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); |
97 | + qdev_get_gpio_in(DEVICE(&s->pmc.apb_irq_orgate), 0)); | 73 | uint64_t negx = is_s ? 0x8000800080008000ull : 0; |
74 | |||
75 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc, | ||
76 | - env->vfp.fpcr & FPCR_FZ16); | ||
77 | + do_fmlal_idx(vd, vn, vm, env, desc, FPST_STD, negx, 0); | ||
98 | } | 78 | } |
99 | 79 | ||
100 | static void versal_create_xrams(Versal *s, qemu_irq *pic) | 80 | void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
101 | @@ -XXX,XX +XXX,XX @@ static void versal_create_bbram(Versal *s, qemu_irq *pic) | 81 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, |
102 | sysbus_realize(sbd, &error_fatal); | 82 | negx = 0x8000800080008000ull; |
103 | memory_region_add_subregion(&s->mr_ps, MM_PMC_BBRAM_CTRL, | 83 | } |
104 | sysbus_mmio_get_region(sbd, 0)); | 84 | } |
105 | - sysbus_connect_irq(sbd, 0, pic[VERSAL_BBRAM_APB_IRQ_0]); | 85 | - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc, |
106 | + sysbus_connect_irq(sbd, 0, | 86 | - env->vfp.fpcr & FPCR_FZ16); |
107 | + qdev_get_gpio_in(DEVICE(&s->pmc.apb_irq_orgate), 1)); | 87 | + do_fmlal_idx(vd, vn, vm, env, desc, FPST_A64, negx, negf); |
108 | } | 88 | } |
109 | 89 | ||
110 | static void versal_realize_efuse_part(Versal *s, Object *dev, hwaddr base) | 90 | void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, |
111 | @@ -XXX,XX +XXX,XX @@ static void versal_realize(DeviceState *dev, Error **errp) | ||
112 | versal_create_gems(s, pic); | ||
113 | versal_create_admas(s, pic); | ||
114 | versal_create_sds(s, pic); | ||
115 | + versal_create_pmc_apb_irq_orgate(s, pic); | ||
116 | versal_create_rtc(s, pic); | ||
117 | versal_create_xrams(s, pic); | ||
118 | versal_create_bbram(s, pic); | ||
119 | -- | 91 | -- |
120 | 2.25.1 | 92 | 2.34.1 |
121 | |||
122 | diff view generated by jsdifflib |