1
I don't have anything else queued up at the moment, so this is just
1
First arm pullreq of the cycle; this is mostly my softfloat NaN
2
Richard's SME patches.
2
handling series. (Lots more in my to-review queue, but I don't
3
like pullreqs growing too close to a hundred patches at a time :-))
3
4
5
thanks
4
-- PMM
6
-- PMM
5
7
6
The following changes since commit 63b38f6c85acd312c2cab68554abf33adf4ee2b3:
8
The following changes since commit 97f2796a3736ed37a1b85dc1c76a6c45b829dd17:
7
9
8
Merge tag 'pull-target-arm-20220707' of https://git.linaro.org/people/pmaydell/qemu-arm into staging (2022-07-08 06:17:11 +0530)
10
Open 10.0 development tree (2024-12-10 17:41:17 +0000)
9
11
10
are available in the Git repository at:
12
are available in the Git repository at:
11
13
12
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20220711
14
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20241211
13
15
14
for you to fetch changes up to f9982ceaf26df27d15547a3a7990a95019e9e3a8:
16
for you to fetch changes up to 1abe28d519239eea5cf9620bb13149423e5665f8:
15
17
16
linux-user/aarch64: Add SME related hwcap entries (2022-07-11 13:43:52 +0100)
18
MAINTAINERS: Add correct email address for Vikram Garhwal (2024-12-11 15:31:09 +0000)
17
19
18
----------------------------------------------------------------
20
----------------------------------------------------------------
19
target-arm:
21
target-arm queue:
20
* Implement SME emulation, for both system and linux-user
22
* hw/net/lan9118: Extract PHY model, reuse with imx_fec, fix bugs
23
* fpu: Make muladd NaN handling runtime-selected, not compile-time
24
* fpu: Make default NaN pattern runtime-selected, not compile-time
25
* fpu: Minor NaN-related cleanups
26
* MAINTAINERS: email address updates
21
27
22
----------------------------------------------------------------
28
----------------------------------------------------------------
23
Richard Henderson (45):
29
Bernhard Beschow (5):
24
target/arm: Handle SME in aarch64_cpu_dump_state
30
hw/net/lan9118: Extract lan9118_phy
25
target/arm: Add infrastructure for disas_sme
31
hw/net/lan9118_phy: Reuse in imx_fec and consolidate implementations
26
target/arm: Trap non-streaming usage when Streaming SVE is active
32
hw/net/lan9118_phy: Fix off-by-one error in MII_ANLPAR register
27
target/arm: Mark ADR as non-streaming
33
hw/net/lan9118_phy: Reuse MII constants
28
target/arm: Mark RDFFR, WRFFR, SETFFR as non-streaming
34
hw/net/lan9118_phy: Add missing 100 mbps full duplex advertisement
29
target/arm: Mark BDEP, BEXT, BGRP, COMPACT, FEXPA, FTSSEL as non-streaming
30
target/arm: Mark PMULL, FMMLA as non-streaming
31
target/arm: Mark FTSMUL, FTMAD, FADDA as non-streaming
32
target/arm: Mark SMMLA, UMMLA, USMMLA as non-streaming
33
target/arm: Mark string/histo/crypto as non-streaming
34
target/arm: Mark gather/scatter load/store as non-streaming
35
target/arm: Mark gather prefetch as non-streaming
36
target/arm: Mark LDFF1 and LDNF1 as non-streaming
37
target/arm: Mark LD1RO as non-streaming
38
target/arm: Add SME enablement checks
39
target/arm: Handle SME in sve_access_check
40
target/arm: Implement SME RDSVL, ADDSVL, ADDSPL
41
target/arm: Implement SME ZERO
42
target/arm: Implement SME MOVA
43
target/arm: Implement SME LD1, ST1
44
target/arm: Export unpredicated ld/st from translate-sve.c
45
target/arm: Implement SME LDR, STR
46
target/arm: Implement SME ADDHA, ADDVA
47
target/arm: Implement FMOPA, FMOPS (non-widening)
48
target/arm: Implement BFMOPA, BFMOPS
49
target/arm: Implement FMOPA, FMOPS (widening)
50
target/arm: Implement SME integer outer product
51
target/arm: Implement PSEL
52
target/arm: Implement REVD
53
target/arm: Implement SCLAMP, UCLAMP
54
target/arm: Reset streaming sve state on exception boundaries
55
target/arm: Enable SME for -cpu max
56
linux-user/aarch64: Clear tpidr2_el0 if CLONE_SETTLS
57
linux-user/aarch64: Reset PSTATE.SM on syscalls
58
linux-user/aarch64: Add SM bit to SVE signal context
59
linux-user/aarch64: Tidy target_restore_sigframe error return
60
linux-user/aarch64: Do not allow duplicate or short sve records
61
linux-user/aarch64: Verify extra record lock succeeded
62
linux-user/aarch64: Move sve record checks into restore
63
linux-user/aarch64: Implement SME signal handling
64
linux-user: Rename sve prctls
65
linux-user/aarch64: Implement PR_SME_GET_VL, PR_SME_SET_VL
66
target/arm: Only set ZEN in reset if SVE present
67
target/arm: Enable SME for user-only
68
linux-user/aarch64: Add SME related hwcap entries
69
35
70
docs/system/arm/emulation.rst | 4 +
36
Leif Lindholm (1):
71
linux-user/aarch64/target_cpu.h | 5 +-
37
MAINTAINERS: update email address for Leif Lindholm
72
linux-user/aarch64/target_prctl.h | 62 +-
38
73
target/arm/cpu.h | 7 +
39
Peter Maydell (54):
74
target/arm/helper-sme.h | 126 ++++
40
fpu: handle raising Invalid for infzero in pick_nan_muladd
75
target/arm/helper-sve.h | 4 +
41
fpu: Check for default_nan_mode before calling pickNaNMulAdd
76
target/arm/helper.h | 18 +
42
softfloat: Allow runtime choice of inf * 0 + NaN result
77
target/arm/translate-a64.h | 45 ++
43
tests/fp: Explicitly set inf-zero-nan rule
78
target/arm/translate.h | 16 +
44
target/arm: Set FloatInfZeroNaNRule explicitly
79
target/arm/sme-fa64.decode | 60 ++
45
target/s390: Set FloatInfZeroNaNRule explicitly
80
target/arm/sme.decode | 88 +++
46
target/ppc: Set FloatInfZeroNaNRule explicitly
81
target/arm/sve.decode | 41 +-
47
target/mips: Set FloatInfZeroNaNRule explicitly
82
linux-user/aarch64/cpu_loop.c | 9 +
48
target/sparc: Set FloatInfZeroNaNRule explicitly
83
linux-user/aarch64/signal.c | 243 ++++++--
49
target/xtensa: Set FloatInfZeroNaNRule explicitly
84
linux-user/elfload.c | 20 +
50
target/x86: Set FloatInfZeroNaNRule explicitly
85
linux-user/syscall.c | 28 +-
51
target/loongarch: Set FloatInfZeroNaNRule explicitly
86
target/arm/cpu.c | 35 +-
52
target/hppa: Set FloatInfZeroNaNRule explicitly
87
target/arm/cpu64.c | 11 +
53
softfloat: Pass have_snan to pickNaNMulAdd
88
target/arm/helper.c | 56 +-
54
softfloat: Allow runtime choice of NaN propagation for muladd
89
target/arm/sme_helper.c | 1140 +++++++++++++++++++++++++++++++++++++
55
tests/fp: Explicitly set 3-NaN propagation rule
90
target/arm/sve_helper.c | 28 +
56
target/arm: Set Float3NaNPropRule explicitly
91
target/arm/translate-a64.c | 103 +++-
57
target/loongarch: Set Float3NaNPropRule explicitly
92
target/arm/translate-sme.c | 373 ++++++++++++
58
target/ppc: Set Float3NaNPropRule explicitly
93
target/arm/translate-sve.c | 393 ++++++++++---
59
target/s390x: Set Float3NaNPropRule explicitly
94
target/arm/translate-vfp.c | 12 +
60
target/sparc: Set Float3NaNPropRule explicitly
95
target/arm/translate.c | 2 +
61
target/mips: Set Float3NaNPropRule explicitly
96
target/arm/vec_helper.c | 24 +
62
target/xtensa: Set Float3NaNPropRule explicitly
97
target/arm/meson.build | 3 +
63
target/i386: Set Float3NaNPropRule explicitly
98
28 files changed, 2821 insertions(+), 135 deletions(-)
64
target/hppa: Set Float3NaNPropRule explicitly
99
create mode 100644 target/arm/sme-fa64.decode
65
fpu: Remove use_first_nan field from float_status
100
create mode 100644 target/arm/sme.decode
66
target/m68k: Don't pass NULL float_status to floatx80_default_nan()
101
create mode 100644 target/arm/translate-sme.c
67
softfloat: Create floatx80 default NaN from parts64_default_nan
68
target/loongarch: Use normal float_status in fclass_s and fclass_d helpers
69
target/m68k: In frem helper, initialize local float_status from env->fp_status
70
target/m68k: Init local float_status from env fp_status in gdb get/set reg
71
target/sparc: Initialize local scratch float_status from env->fp_status
72
target/ppc: Use env->fp_status in helper_compute_fprf functions
73
fpu: Allow runtime choice of default NaN value
74
tests/fp: Set default NaN pattern explicitly
75
target/microblaze: Set default NaN pattern explicitly
76
target/i386: Set default NaN pattern explicitly
77
target/hppa: Set default NaN pattern explicitly
78
target/alpha: Set default NaN pattern explicitly
79
target/arm: Set default NaN pattern explicitly
80
target/loongarch: Set default NaN pattern explicitly
81
target/m68k: Set default NaN pattern explicitly
82
target/mips: Set default NaN pattern explicitly
83
target/openrisc: Set default NaN pattern explicitly
84
target/ppc: Set default NaN pattern explicitly
85
target/sh4: Set default NaN pattern explicitly
86
target/rx: Set default NaN pattern explicitly
87
target/s390x: Set default NaN pattern explicitly
88
target/sparc: Set default NaN pattern explicitly
89
target/xtensa: Set default NaN pattern explicitly
90
target/hexagon: Set default NaN pattern explicitly
91
target/riscv: Set default NaN pattern explicitly
92
target/tricore: Set default NaN pattern explicitly
93
fpu: Remove default handling for dnan_pattern
94
95
Richard Henderson (11):
96
target/arm: Copy entire float_status in is_ebf
97
softfloat: Inline pickNaNMulAdd
98
softfloat: Use goto for default nan case in pick_nan_muladd
99
softfloat: Remove which from parts_pick_nan_muladd
100
softfloat: Pad array size in pick_nan_muladd
101
softfloat: Move propagateFloatx80NaN to softfloat.c
102
softfloat: Use parts_pick_nan in propagateFloatx80NaN
103
softfloat: Inline pickNaN
104
softfloat: Share code between parts_pick_nan cases
105
softfloat: Sink frac_cmp in parts_pick_nan until needed
106
softfloat: Replace WHICH with RET in parts_pick_nan
107
108
Vikram Garhwal (1):
109
MAINTAINERS: Add correct email address for Vikram Garhwal
110
111
MAINTAINERS | 4 +-
112
include/fpu/softfloat-helpers.h | 38 +++-
113
include/fpu/softfloat-types.h | 89 +++++++-
114
include/hw/net/imx_fec.h | 9 +-
115
include/hw/net/lan9118_phy.h | 37 ++++
116
include/hw/net/mii.h | 6 +
117
target/mips/fpu_helper.h | 20 ++
118
target/sparc/helper.h | 4 +-
119
fpu/softfloat.c | 19 ++
120
hw/net/imx_fec.c | 146 ++------------
121
hw/net/lan9118.c | 137 ++-----------
122
hw/net/lan9118_phy.c | 222 ++++++++++++++++++++
123
linux-user/arm/nwfpe/fpa11.c | 5 +
124
target/alpha/cpu.c | 2 +
125
target/arm/cpu.c | 10 +
126
target/arm/tcg/vec_helper.c | 20 +-
127
target/hexagon/cpu.c | 2 +
128
target/hppa/fpu_helper.c | 12 ++
129
target/i386/tcg/fpu_helper.c | 12 ++
130
target/loongarch/tcg/fpu_helper.c | 14 +-
131
target/m68k/cpu.c | 14 +-
132
target/m68k/fpu_helper.c | 6 +-
133
target/m68k/helper.c | 6 +-
134
target/microblaze/cpu.c | 2 +
135
target/mips/msa.c | 10 +
136
target/openrisc/cpu.c | 2 +
137
target/ppc/cpu_init.c | 19 ++
138
target/ppc/fpu_helper.c | 3 +-
139
target/riscv/cpu.c | 2 +
140
target/rx/cpu.c | 2 +
141
target/s390x/cpu.c | 5 +
142
target/sh4/cpu.c | 2 +
143
target/sparc/cpu.c | 6 +
144
target/sparc/fop_helper.c | 8 +-
145
target/sparc/translate.c | 4 +-
146
target/tricore/helper.c | 2 +
147
target/xtensa/cpu.c | 4 +
148
target/xtensa/fpu_helper.c | 3 +-
149
tests/fp/fp-bench.c | 7 +
150
tests/fp/fp-test-log2.c | 1 +
151
tests/fp/fp-test.c | 7 +
152
fpu/softfloat-parts.c.inc | 152 +++++++++++---
153
fpu/softfloat-specialize.c.inc | 412 ++------------------------------------
154
.mailmap | 5 +-
155
hw/net/Kconfig | 5 +
156
hw/net/meson.build | 1 +
157
hw/net/trace-events | 10 +-
158
47 files changed, 778 insertions(+), 730 deletions(-)
159
create mode 100644 include/hw/net/lan9118_phy.h
160
create mode 100644 hw/net/lan9118_phy.c
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Bernhard Beschow <shentey@gmail.com>
2
2
3
This includes the build rules for the decoder, and the
3
A very similar implementation of the same device exists in imx_fec. Prepare for
4
new file for translation, but excludes any instructions.
4
a common implementation by extracting a device model into its own files.
5
5
6
Some migration state has been moved into the new device model which breaks
7
migration compatibility for the following machines:
8
* smdkc210
9
* realview-*
10
* vexpress-*
11
* kzm
12
* mps2-*
13
14
While breaking migration ABI, fix the size of the MII registers to be 16 bit,
15
as defined by IEEE 802.3u.
16
17
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
18
Tested-by: Guenter Roeck <linux@roeck-us.net>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
19
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
20
Message-id: 20241102125724.532843-2-shentey@gmail.com
8
Message-id: 20220708151540.18136-3-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
21
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
22
---
11
target/arm/translate-a64.h | 1 +
23
include/hw/net/lan9118_phy.h | 37 ++++++++
12
target/arm/sme.decode | 20 ++++++++++++++++++++
24
hw/net/lan9118.c | 137 +++++-----------------------
13
target/arm/translate-a64.c | 7 ++++++-
25
hw/net/lan9118_phy.c | 169 +++++++++++++++++++++++++++++++++++
14
target/arm/translate-sme.c | 35 +++++++++++++++++++++++++++++++++++
26
hw/net/Kconfig | 4 +
15
target/arm/meson.build | 2 ++
27
hw/net/meson.build | 1 +
16
5 files changed, 64 insertions(+), 1 deletion(-)
28
5 files changed, 233 insertions(+), 115 deletions(-)
17
create mode 100644 target/arm/sme.decode
29
create mode 100644 include/hw/net/lan9118_phy.h
18
create mode 100644 target/arm/translate-sme.c
30
create mode 100644 hw/net/lan9118_phy.c
19
31
20
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
32
diff --git a/include/hw/net/lan9118_phy.h b/include/hw/net/lan9118_phy.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/target/arm/translate-a64.h
23
+++ b/target/arm/translate-a64.h
24
@@ -XXX,XX +XXX,XX @@ static inline int pred_gvec_reg_size(DisasContext *s)
25
}
26
27
bool disas_sve(DisasContext *, uint32_t);
28
+bool disas_sme(DisasContext *, uint32_t);
29
30
void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
31
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
32
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
33
new file mode 100644
33
new file mode 100644
34
index XXXXXXX..XXXXXXX
34
index XXXXXXX..XXXXXXX
35
--- /dev/null
35
--- /dev/null
36
+++ b/target/arm/sme.decode
36
+++ b/include/hw/net/lan9118_phy.h
37
@@ -XXX,XX +XXX,XX @@
37
@@ -XXX,XX +XXX,XX @@
38
+# AArch64 SME instruction descriptions
38
+/*
39
+#
39
+ * SMSC LAN9118 PHY emulation
40
+# Copyright (c) 2022 Linaro, Ltd
40
+ *
41
+#
41
+ * Copyright (c) 2009 CodeSourcery, LLC.
42
+# This library is free software; you can redistribute it and/or
42
+ * Written by Paul Brook
43
+# modify it under the terms of the GNU Lesser General Public
43
+ *
44
+# License as published by the Free Software Foundation; either
44
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
45
+# version 2.1 of the License, or (at your option) any later version.
45
+ * See the COPYING file in the top-level directory.
46
+#
46
+ */
47
+# This library is distributed in the hope that it will be useful,
47
+
48
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
48
+#ifndef HW_NET_LAN9118_PHY_H
49
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
49
+#define HW_NET_LAN9118_PHY_H
50
+# Lesser General Public License for more details.
50
+
51
+#
51
+#include "qom/object.h"
52
+# You should have received a copy of the GNU Lesser General Public
52
+#include "hw/sysbus.h"
53
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
53
+
54
+
54
+#define TYPE_LAN9118_PHY "lan9118-phy"
55
+#
55
+OBJECT_DECLARE_SIMPLE_TYPE(Lan9118PhyState, LAN9118_PHY)
56
+# This file is processed by scripts/decodetree.py
56
+
57
+#
57
+typedef struct Lan9118PhyState {
58
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
58
+ SysBusDevice parent_obj;
59
+
60
+ uint16_t status;
61
+ uint16_t control;
62
+ uint16_t advertise;
63
+ uint16_t ints;
64
+ uint16_t int_mask;
65
+ qemu_irq irq;
66
+ bool link_down;
67
+} Lan9118PhyState;
68
+
69
+void lan9118_phy_update_link(Lan9118PhyState *s, bool link_down);
70
+void lan9118_phy_reset(Lan9118PhyState *s);
71
+uint16_t lan9118_phy_read(Lan9118PhyState *s, int reg);
72
+void lan9118_phy_write(Lan9118PhyState *s, int reg, uint16_t val);
73
+
74
+#endif
75
diff --git a/hw/net/lan9118.c b/hw/net/lan9118.c
59
index XXXXXXX..XXXXXXX 100644
76
index XXXXXXX..XXXXXXX 100644
60
--- a/target/arm/translate-a64.c
77
--- a/hw/net/lan9118.c
61
+++ b/target/arm/translate-a64.c
78
+++ b/hw/net/lan9118.c
62
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
79
@@ -XXX,XX +XXX,XX @@
80
#include "net/net.h"
81
#include "net/eth.h"
82
#include "hw/irq.h"
83
+#include "hw/net/lan9118_phy.h"
84
#include "hw/net/lan9118.h"
85
#include "hw/ptimer.h"
86
#include "hw/qdev-properties.h"
87
@@ -XXX,XX +XXX,XX @@ do { printf("lan9118: " fmt , ## __VA_ARGS__); } while (0)
88
#define MAC_CR_RXEN 0x00000004
89
#define MAC_CR_RESERVED 0x7f404213
90
91
-#define PHY_INT_ENERGYON 0x80
92
-#define PHY_INT_AUTONEG_COMPLETE 0x40
93
-#define PHY_INT_FAULT 0x20
94
-#define PHY_INT_DOWN 0x10
95
-#define PHY_INT_AUTONEG_LP 0x08
96
-#define PHY_INT_PARFAULT 0x04
97
-#define PHY_INT_AUTONEG_PAGE 0x02
98
-
99
#define GPT_TIMER_EN 0x20000000
100
101
/*
102
@@ -XXX,XX +XXX,XX @@ struct lan9118_state {
103
uint32_t mac_mii_data;
104
uint32_t mac_flow;
105
106
- uint32_t phy_status;
107
- uint32_t phy_control;
108
- uint32_t phy_advertise;
109
- uint32_t phy_int;
110
- uint32_t phy_int_mask;
111
+ Lan9118PhyState mii;
112
+ IRQState mii_irq;
113
114
int32_t eeprom_writable;
115
uint8_t eeprom[128];
116
@@ -XXX,XX +XXX,XX @@ struct lan9118_state {
117
118
static const VMStateDescription vmstate_lan9118 = {
119
.name = "lan9118",
120
- .version_id = 2,
121
- .minimum_version_id = 1,
122
+ .version_id = 3,
123
+ .minimum_version_id = 3,
124
.fields = (const VMStateField[]) {
125
VMSTATE_PTIMER(timer, lan9118_state),
126
VMSTATE_UINT32(irq_cfg, lan9118_state),
127
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_lan9118 = {
128
VMSTATE_UINT32(mac_mii_acc, lan9118_state),
129
VMSTATE_UINT32(mac_mii_data, lan9118_state),
130
VMSTATE_UINT32(mac_flow, lan9118_state),
131
- VMSTATE_UINT32(phy_status, lan9118_state),
132
- VMSTATE_UINT32(phy_control, lan9118_state),
133
- VMSTATE_UINT32(phy_advertise, lan9118_state),
134
- VMSTATE_UINT32(phy_int, lan9118_state),
135
- VMSTATE_UINT32(phy_int_mask, lan9118_state),
136
VMSTATE_INT32(eeprom_writable, lan9118_state),
137
VMSTATE_UINT8_ARRAY(eeprom, lan9118_state, 128),
138
VMSTATE_INT32(tx_fifo_size, lan9118_state),
139
@@ -XXX,XX +XXX,XX @@ static void lan9118_reload_eeprom(lan9118_state *s)
140
lan9118_mac_changed(s);
141
}
142
143
-static void phy_update_irq(lan9118_state *s)
144
+static void lan9118_update_irq(void *opaque, int n, int level)
145
{
146
- if (s->phy_int & s->phy_int_mask) {
147
+ lan9118_state *s = opaque;
148
+
149
+ if (level) {
150
s->int_sts |= PHY_INT;
151
} else {
152
s->int_sts &= ~PHY_INT;
153
@@ -XXX,XX +XXX,XX @@ static void phy_update_irq(lan9118_state *s)
154
lan9118_update(s);
155
}
156
157
-static void phy_update_link(lan9118_state *s)
158
-{
159
- /* Autonegotiation status mirrors link status. */
160
- if (qemu_get_queue(s->nic)->link_down) {
161
- s->phy_status &= ~0x0024;
162
- s->phy_int |= PHY_INT_DOWN;
163
- } else {
164
- s->phy_status |= 0x0024;
165
- s->phy_int |= PHY_INT_ENERGYON;
166
- s->phy_int |= PHY_INT_AUTONEG_COMPLETE;
167
- }
168
- phy_update_irq(s);
169
-}
170
-
171
static void lan9118_set_link(NetClientState *nc)
172
{
173
- phy_update_link(qemu_get_nic_opaque(nc));
174
-}
175
-
176
-static void phy_reset(lan9118_state *s)
177
-{
178
- s->phy_status = 0x7809;
179
- s->phy_control = 0x3000;
180
- s->phy_advertise = 0x01e1;
181
- s->phy_int_mask = 0;
182
- s->phy_int = 0;
183
- phy_update_link(s);
184
+ lan9118_phy_update_link(&LAN9118(qemu_get_nic_opaque(nc))->mii,
185
+ nc->link_down);
186
}
187
188
static void lan9118_reset(DeviceState *d)
189
@@ -XXX,XX +XXX,XX @@ static void lan9118_reset(DeviceState *d)
190
s->read_word_n = 0;
191
s->write_word_n = 0;
192
193
- phy_reset(s);
194
-
195
s->eeprom_writable = 0;
196
lan9118_reload_eeprom(s);
197
}
198
@@ -XXX,XX +XXX,XX @@ static void do_tx_packet(lan9118_state *s)
199
uint32_t status;
200
201
/* FIXME: Honor TX disable, and allow queueing of packets. */
202
- if (s->phy_control & 0x4000) {
203
+ if (s->mii.control & 0x4000) {
204
/* This assumes the receive routine doesn't touch the VLANClient. */
205
qemu_receive_packet(qemu_get_queue(s->nic), s->txp->data, s->txp->len);
206
} else {
207
@@ -XXX,XX +XXX,XX @@ static void tx_fifo_push(lan9118_state *s, uint32_t val)
63
}
208
}
64
209
}
65
switch (extract32(insn, 25, 4)) {
210
66
- case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
211
-static uint32_t do_phy_read(lan9118_state *s, int reg)
67
+ case 0x0:
212
-{
68
+ if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) {
213
- uint32_t val;
69
+ unallocated_encoding(s);
214
-
70
+ }
215
- switch (reg) {
71
+ break;
216
- case 0: /* Basic Control */
72
+ case 0x1: case 0x3: /* UNALLOCATED */
217
- return s->phy_control;
73
unallocated_encoding(s);
218
- case 1: /* Basic Status */
219
- return s->phy_status;
220
- case 2: /* ID1 */
221
- return 0x0007;
222
- case 3: /* ID2 */
223
- return 0xc0d1;
224
- case 4: /* Auto-neg advertisement */
225
- return s->phy_advertise;
226
- case 5: /* Auto-neg Link Partner Ability */
227
- return 0x0f71;
228
- case 6: /* Auto-neg Expansion */
229
- return 1;
230
- /* TODO 17, 18, 27, 29, 30, 31 */
231
- case 29: /* Interrupt source. */
232
- val = s->phy_int;
233
- s->phy_int = 0;
234
- phy_update_irq(s);
235
- return val;
236
- case 30: /* Interrupt mask */
237
- return s->phy_int_mask;
238
- default:
239
- qemu_log_mask(LOG_GUEST_ERROR,
240
- "do_phy_read: PHY read reg %d\n", reg);
241
- return 0;
242
- }
243
-}
244
-
245
-static void do_phy_write(lan9118_state *s, int reg, uint32_t val)
246
-{
247
- switch (reg) {
248
- case 0: /* Basic Control */
249
- if (val & 0x8000) {
250
- phy_reset(s);
251
- break;
252
- }
253
- s->phy_control = val & 0x7980;
254
- /* Complete autonegotiation immediately. */
255
- if (val & 0x1000) {
256
- s->phy_status |= 0x0020;
257
- }
258
- break;
259
- case 4: /* Auto-neg advertisement */
260
- s->phy_advertise = (val & 0x2d7f) | 0x80;
261
- break;
262
- /* TODO 17, 18, 27, 31 */
263
- case 30: /* Interrupt mask */
264
- s->phy_int_mask = val & 0xff;
265
- phy_update_irq(s);
266
- break;
267
- default:
268
- qemu_log_mask(LOG_GUEST_ERROR,
269
- "do_phy_write: PHY write reg %d = 0x%04x\n", reg, val);
270
- }
271
-}
272
-
273
static void do_mac_write(lan9118_state *s, int reg, uint32_t val)
274
{
275
switch (reg) {
276
@@ -XXX,XX +XXX,XX @@ static void do_mac_write(lan9118_state *s, int reg, uint32_t val)
277
if (val & 2) {
278
DPRINTF("PHY write %d = 0x%04x\n",
279
(val >> 6) & 0x1f, s->mac_mii_data);
280
- do_phy_write(s, (val >> 6) & 0x1f, s->mac_mii_data);
281
+ lan9118_phy_write(&s->mii, (val >> 6) & 0x1f, s->mac_mii_data);
282
} else {
283
- s->mac_mii_data = do_phy_read(s, (val >> 6) & 0x1f);
284
+ s->mac_mii_data = lan9118_phy_read(&s->mii, (val >> 6) & 0x1f);
285
DPRINTF("PHY read %d = 0x%04x\n",
286
(val >> 6) & 0x1f, s->mac_mii_data);
287
}
288
@@ -XXX,XX +XXX,XX @@ static void lan9118_writel(void *opaque, hwaddr offset,
74
break;
289
break;
75
case 0x2:
290
case CSR_PMT_CTRL:
76
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
291
if (val & 0x400) {
292
- phy_reset(s);
293
+ lan9118_phy_reset(&s->mii);
294
}
295
s->pmt_ctrl &= ~0x34e;
296
s->pmt_ctrl |= (val & 0x34e);
297
@@ -XXX,XX +XXX,XX @@ static void lan9118_realize(DeviceState *dev, Error **errp)
298
const MemoryRegionOps *mem_ops =
299
s->mode_16bit ? &lan9118_16bit_mem_ops : &lan9118_mem_ops;
300
301
+ qemu_init_irq(&s->mii_irq, lan9118_update_irq, s, 0);
302
+ object_initialize_child(OBJECT(s), "mii", &s->mii, TYPE_LAN9118_PHY);
303
+ if (!sysbus_realize_and_unref(SYS_BUS_DEVICE(&s->mii), errp)) {
304
+ return;
305
+ }
306
+ qdev_connect_gpio_out(DEVICE(&s->mii), 0, &s->mii_irq);
307
+
308
memory_region_init_io(&s->mmio, OBJECT(dev), mem_ops, s,
309
"lan9118-mmio", 0x100);
310
sysbus_init_mmio(sbd, &s->mmio);
311
diff --git a/hw/net/lan9118_phy.c b/hw/net/lan9118_phy.c
77
new file mode 100644
312
new file mode 100644
78
index XXXXXXX..XXXXXXX
313
index XXXXXXX..XXXXXXX
79
--- /dev/null
314
--- /dev/null
80
+++ b/target/arm/translate-sme.c
315
+++ b/hw/net/lan9118_phy.c
81
@@ -XXX,XX +XXX,XX @@
316
@@ -XXX,XX +XXX,XX @@
82
+/*
317
+/*
83
+ * AArch64 SME translation
318
+ * SMSC LAN9118 PHY emulation
84
+ *
319
+ *
85
+ * Copyright (c) 2022 Linaro, Ltd
320
+ * Copyright (c) 2009 CodeSourcery, LLC.
321
+ * Written by Paul Brook
86
+ *
322
+ *
87
+ * This library is free software; you can redistribute it and/or
323
+ * This code is licensed under the GNU GPL v2
88
+ * modify it under the terms of the GNU Lesser General Public
89
+ * License as published by the Free Software Foundation; either
90
+ * version 2.1 of the License, or (at your option) any later version.
91
+ *
324
+ *
92
+ * This library is distributed in the hope that it will be useful,
325
+ * Contributions after 2012-01-13 are licensed under the terms of the
93
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
326
+ * GNU GPL, version 2 or (at your option) any later version.
94
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
95
+ * Lesser General Public License for more details.
96
+ *
97
+ * You should have received a copy of the GNU Lesser General Public
98
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
99
+ */
327
+ */
100
+
328
+
101
+#include "qemu/osdep.h"
329
+#include "qemu/osdep.h"
102
+#include "cpu.h"
330
+#include "hw/net/lan9118_phy.h"
103
+#include "tcg/tcg-op.h"
331
+#include "hw/irq.h"
104
+#include "tcg/tcg-op-gvec.h"
332
+#include "hw/resettable.h"
105
+#include "tcg/tcg-gvec-desc.h"
333
+#include "migration/vmstate.h"
106
+#include "translate.h"
334
+#include "qemu/log.h"
107
+#include "exec/helper-gen.h"
335
+
108
+#include "translate-a64.h"
336
+#define PHY_INT_ENERGYON (1 << 7)
109
+#include "fpu/softfloat.h"
337
+#define PHY_INT_AUTONEG_COMPLETE (1 << 6)
110
+
338
+#define PHY_INT_FAULT (1 << 5)
111
+
339
+#define PHY_INT_DOWN (1 << 4)
112
+/*
340
+#define PHY_INT_AUTONEG_LP (1 << 3)
113
+ * Include the generated decoder.
341
+#define PHY_INT_PARFAULT (1 << 2)
114
+ */
342
+#define PHY_INT_AUTONEG_PAGE (1 << 1)
115
+
343
+
116
+#include "decode-sme.c.inc"
344
+static void lan9118_phy_update_irq(Lan9118PhyState *s)
117
diff --git a/target/arm/meson.build b/target/arm/meson.build
345
+{
346
+ qemu_set_irq(s->irq, !!(s->ints & s->int_mask));
347
+}
348
+
349
+uint16_t lan9118_phy_read(Lan9118PhyState *s, int reg)
350
+{
351
+ uint16_t val;
352
+
353
+ switch (reg) {
354
+ case 0: /* Basic Control */
355
+ return s->control;
356
+ case 1: /* Basic Status */
357
+ return s->status;
358
+ case 2: /* ID1 */
359
+ return 0x0007;
360
+ case 3: /* ID2 */
361
+ return 0xc0d1;
362
+ case 4: /* Auto-neg advertisement */
363
+ return s->advertise;
364
+ case 5: /* Auto-neg Link Partner Ability */
365
+ return 0x0f71;
366
+ case 6: /* Auto-neg Expansion */
367
+ return 1;
368
+ /* TODO 17, 18, 27, 29, 30, 31 */
369
+ case 29: /* Interrupt source. */
370
+ val = s->ints;
371
+ s->ints = 0;
372
+ lan9118_phy_update_irq(s);
373
+ return val;
374
+ case 30: /* Interrupt mask */
375
+ return s->int_mask;
376
+ default:
377
+ qemu_log_mask(LOG_GUEST_ERROR,
378
+ "lan9118_phy_read: PHY read reg %d\n", reg);
379
+ return 0;
380
+ }
381
+}
382
+
383
+void lan9118_phy_write(Lan9118PhyState *s, int reg, uint16_t val)
384
+{
385
+ switch (reg) {
386
+ case 0: /* Basic Control */
387
+ if (val & 0x8000) {
388
+ lan9118_phy_reset(s);
389
+ break;
390
+ }
391
+ s->control = val & 0x7980;
392
+ /* Complete autonegotiation immediately. */
393
+ if (val & 0x1000) {
394
+ s->status |= 0x0020;
395
+ }
396
+ break;
397
+ case 4: /* Auto-neg advertisement */
398
+ s->advertise = (val & 0x2d7f) | 0x80;
399
+ break;
400
+ /* TODO 17, 18, 27, 31 */
401
+ case 30: /* Interrupt mask */
402
+ s->int_mask = val & 0xff;
403
+ lan9118_phy_update_irq(s);
404
+ break;
405
+ default:
406
+ qemu_log_mask(LOG_GUEST_ERROR,
407
+ "lan9118_phy_write: PHY write reg %d = 0x%04x\n", reg, val);
408
+ }
409
+}
410
+
411
+void lan9118_phy_update_link(Lan9118PhyState *s, bool link_down)
412
+{
413
+ s->link_down = link_down;
414
+
415
+ /* Autonegotiation status mirrors link status. */
416
+ if (link_down) {
417
+ s->status &= ~0x0024;
418
+ s->ints |= PHY_INT_DOWN;
419
+ } else {
420
+ s->status |= 0x0024;
421
+ s->ints |= PHY_INT_ENERGYON;
422
+ s->ints |= PHY_INT_AUTONEG_COMPLETE;
423
+ }
424
+ lan9118_phy_update_irq(s);
425
+}
426
+
427
+void lan9118_phy_reset(Lan9118PhyState *s)
428
+{
429
+ s->control = 0x3000;
430
+ s->status = 0x7809;
431
+ s->advertise = 0x01e1;
432
+ s->int_mask = 0;
433
+ s->ints = 0;
434
+ lan9118_phy_update_link(s, s->link_down);
435
+}
436
+
437
+static void lan9118_phy_reset_hold(Object *obj, ResetType type)
438
+{
439
+ Lan9118PhyState *s = LAN9118_PHY(obj);
440
+
441
+ lan9118_phy_reset(s);
442
+}
443
+
444
+static void lan9118_phy_init(Object *obj)
445
+{
446
+ Lan9118PhyState *s = LAN9118_PHY(obj);
447
+
448
+ qdev_init_gpio_out(DEVICE(s), &s->irq, 1);
449
+}
450
+
451
+static const VMStateDescription vmstate_lan9118_phy = {
452
+ .name = "lan9118-phy",
453
+ .version_id = 1,
454
+ .minimum_version_id = 1,
455
+ .fields = (const VMStateField[]) {
456
+ VMSTATE_UINT16(control, Lan9118PhyState),
457
+ VMSTATE_UINT16(status, Lan9118PhyState),
458
+ VMSTATE_UINT16(advertise, Lan9118PhyState),
459
+ VMSTATE_UINT16(ints, Lan9118PhyState),
460
+ VMSTATE_UINT16(int_mask, Lan9118PhyState),
461
+ VMSTATE_BOOL(link_down, Lan9118PhyState),
462
+ VMSTATE_END_OF_LIST()
463
+ }
464
+};
465
+
466
+static void lan9118_phy_class_init(ObjectClass *klass, void *data)
467
+{
468
+ ResettableClass *rc = RESETTABLE_CLASS(klass);
469
+ DeviceClass *dc = DEVICE_CLASS(klass);
470
+
471
+ rc->phases.hold = lan9118_phy_reset_hold;
472
+ dc->vmsd = &vmstate_lan9118_phy;
473
+}
474
+
475
+static const TypeInfo types[] = {
476
+ {
477
+ .name = TYPE_LAN9118_PHY,
478
+ .parent = TYPE_SYS_BUS_DEVICE,
479
+ .instance_size = sizeof(Lan9118PhyState),
480
+ .instance_init = lan9118_phy_init,
481
+ .class_init = lan9118_phy_class_init,
482
+ }
483
+};
484
+
485
+DEFINE_TYPES(types)
486
diff --git a/hw/net/Kconfig b/hw/net/Kconfig
118
index XXXXXXX..XXXXXXX 100644
487
index XXXXXXX..XXXXXXX 100644
119
--- a/target/arm/meson.build
488
--- a/hw/net/Kconfig
120
+++ b/target/arm/meson.build
489
+++ b/hw/net/Kconfig
121
@@ -XXX,XX +XXX,XX @@
490
@@ -XXX,XX +XXX,XX @@ config VMXNET3_PCI
122
gen = [
491
config SMC91C111
123
decodetree.process('sve.decode', extra_args: '--decode=disas_sve'),
492
bool
124
+ decodetree.process('sme.decode', extra_args: '--decode=disas_sme'),
493
125
decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'),
494
+config LAN9118_PHY
126
decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'),
495
+ bool
127
decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'),
496
+
128
@@ -XXX,XX +XXX,XX @@ arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
497
config LAN9118
129
'sme_helper.c',
498
bool
130
'translate-a64.c',
499
+ select LAN9118_PHY
131
'translate-sve.c',
500
select PTIMER
132
+ 'translate-sme.c',
501
133
))
502
config NE2000_ISA
134
503
diff --git a/hw/net/meson.build b/hw/net/meson.build
135
arm_softmmu_ss = ss.source_set()
504
index XXXXXXX..XXXXXXX 100644
505
--- a/hw/net/meson.build
506
+++ b/hw/net/meson.build
507
@@ -XXX,XX +XXX,XX @@ system_ss.add(when: 'CONFIG_VMXNET3_PCI', if_true: files('vmxnet3.c'))
508
509
system_ss.add(when: 'CONFIG_SMC91C111', if_true: files('smc91c111.c'))
510
system_ss.add(when: 'CONFIG_LAN9118', if_true: files('lan9118.c'))
511
+system_ss.add(when: 'CONFIG_LAN9118_PHY', if_true: files('lan9118_phy.c'))
512
system_ss.add(when: 'CONFIG_NE2000_ISA', if_true: files('ne2000-isa.c'))
513
system_ss.add(when: 'CONFIG_OPENCORES_ETH', if_true: files('opencores_eth.c'))
514
system_ss.add(when: 'CONFIG_XGMAC', if_true: files('xgmac.c'))
136
--
515
--
137
2.25.1
516
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Bernhard Beschow <shentey@gmail.com>
2
2
3
Mark these as a non-streaming instructions, which should trap
3
imx_fec models the same PHY as lan9118_phy. The code is almost the same with
4
if full a64 support is not enabled in streaming mode.
4
imx_fec having more logging and tracing. Merge these improvements into
5
lan9118_phy and reuse in imx_fec to fix the code duplication.
5
6
7
Some migration state how resides in the new device model which breaks migration
8
compatibility for the following machines:
9
* imx25-pdk
10
* sabrelite
11
* mcimx7d-sabre
12
* mcimx6ul-evk
13
14
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
15
Tested-by: Guenter Roeck <linux@roeck-us.net>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
16
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
17
Message-id: 20241102125724.532843-3-shentey@gmail.com
8
Message-id: 20220708151540.18136-9-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
19
---
11
target/arm/sme-fa64.decode | 3 ---
20
include/hw/net/imx_fec.h | 9 ++-
12
target/arm/translate-sve.c | 15 +++++++++++----
21
hw/net/imx_fec.c | 146 ++++-----------------------------------
13
2 files changed, 11 insertions(+), 7 deletions(-)
22
hw/net/lan9118_phy.c | 82 ++++++++++++++++------
23
hw/net/Kconfig | 1 +
24
hw/net/trace-events | 10 +--
25
5 files changed, 85 insertions(+), 163 deletions(-)
14
26
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
27
diff --git a/include/hw/net/imx_fec.h b/include/hw/net/imx_fec.h
16
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
29
--- a/include/hw/net/imx_fec.h
18
+++ b/target/arm/sme-fa64.decode
30
+++ b/include/hw/net/imx_fec.h
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
31
@@ -XXX,XX +XXX,XX @@ OBJECT_DECLARE_SIMPLE_TYPE(IMXFECState, IMX_FEC)
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
32
#define TYPE_IMX_ENET "imx.enet"
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
33
22
34
#include "hw/sysbus.h"
23
-FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
35
+#include "hw/net/lan9118_phy.h"
24
-FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
36
+#include "hw/irq.h"
25
-FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
37
#include "net/net.h"
26
FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
38
27
FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
39
#define ENET_EIR 1
28
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
40
@@ -XXX,XX +XXX,XX @@ struct IMXFECState {
29
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
41
uint32_t tx_descriptor[ENET_TX_RING_NUM];
42
uint32_t tx_ring_num;
43
44
- uint32_t phy_status;
45
- uint32_t phy_control;
46
- uint32_t phy_advertise;
47
- uint32_t phy_int;
48
- uint32_t phy_int_mask;
49
+ Lan9118PhyState mii;
50
+ IRQState mii_irq;
51
uint32_t phy_num;
52
bool phy_connected;
53
struct IMXFECState *phy_consumer;
54
diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c
30
index XXXXXXX..XXXXXXX 100644
55
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate-sve.c
56
--- a/hw/net/imx_fec.c
32
+++ b/target/arm/translate-sve.c
57
+++ b/hw/net/imx_fec.c
33
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
58
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_imx_eth_txdescs = {
34
NULL, gen_helper_sve_ftmad_h,
59
35
gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
60
static const VMStateDescription vmstate_imx_eth = {
61
.name = TYPE_IMX_FEC,
62
- .version_id = 2,
63
- .minimum_version_id = 2,
64
+ .version_id = 3,
65
+ .minimum_version_id = 3,
66
.fields = (const VMStateField[]) {
67
VMSTATE_UINT32_ARRAY(regs, IMXFECState, ENET_MAX),
68
VMSTATE_UINT32(rx_descriptor, IMXFECState),
69
VMSTATE_UINT32(tx_descriptor[0], IMXFECState),
70
- VMSTATE_UINT32(phy_status, IMXFECState),
71
- VMSTATE_UINT32(phy_control, IMXFECState),
72
- VMSTATE_UINT32(phy_advertise, IMXFECState),
73
- VMSTATE_UINT32(phy_int, IMXFECState),
74
- VMSTATE_UINT32(phy_int_mask, IMXFECState),
75
VMSTATE_END_OF_LIST()
76
},
77
.subsections = (const VMStateDescription * const []) {
78
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_imx_eth = {
79
},
36
};
80
};
37
-TRANS_FEAT(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
81
38
- ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
82
-#define PHY_INT_ENERGYON (1 << 7)
39
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
83
-#define PHY_INT_AUTONEG_COMPLETE (1 << 6)
40
+TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
84
-#define PHY_INT_FAULT (1 << 5)
41
+ ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
85
-#define PHY_INT_DOWN (1 << 4)
42
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
86
-#define PHY_INT_AUTONEG_LP (1 << 3)
87
-#define PHY_INT_PARFAULT (1 << 2)
88
-#define PHY_INT_AUTONEG_PAGE (1 << 1)
89
-
90
static void imx_eth_update(IMXFECState *s);
43
91
44
/*
92
/*
45
*** SVE Floating Point Accumulating Reduction Group
93
@@ -XXX,XX +XXX,XX @@ static void imx_eth_update(IMXFECState *s);
46
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
94
* For now we don't handle any GPIO/interrupt line, so the OS will
47
if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
95
* have to poll for the PHY status.
48
return false;
96
*/
97
-static void imx_phy_update_irq(IMXFECState *s)
98
+static void imx_phy_update_irq(void *opaque, int n, int level)
99
{
100
- imx_eth_update(s);
101
-}
102
-
103
-static void imx_phy_update_link(IMXFECState *s)
104
-{
105
- /* Autonegotiation status mirrors link status. */
106
- if (qemu_get_queue(s->nic)->link_down) {
107
- trace_imx_phy_update_link("down");
108
- s->phy_status &= ~0x0024;
109
- s->phy_int |= PHY_INT_DOWN;
110
- } else {
111
- trace_imx_phy_update_link("up");
112
- s->phy_status |= 0x0024;
113
- s->phy_int |= PHY_INT_ENERGYON;
114
- s->phy_int |= PHY_INT_AUTONEG_COMPLETE;
115
- }
116
- imx_phy_update_irq(s);
117
+ imx_eth_update(opaque);
118
}
119
120
static void imx_eth_set_link(NetClientState *nc)
121
{
122
- imx_phy_update_link(IMX_FEC(qemu_get_nic_opaque(nc)));
123
-}
124
-
125
-static void imx_phy_reset(IMXFECState *s)
126
-{
127
- trace_imx_phy_reset();
128
-
129
- s->phy_status = 0x7809;
130
- s->phy_control = 0x3000;
131
- s->phy_advertise = 0x01e1;
132
- s->phy_int_mask = 0;
133
- s->phy_int = 0;
134
- imx_phy_update_link(s);
135
+ lan9118_phy_update_link(&IMX_FEC(qemu_get_nic_opaque(nc))->mii,
136
+ nc->link_down);
137
}
138
139
static uint32_t imx_phy_read(IMXFECState *s, int reg)
140
{
141
- uint32_t val;
142
uint32_t phy = reg / 32;
143
144
if (!s->phy_connected) {
145
@@ -XXX,XX +XXX,XX @@ static uint32_t imx_phy_read(IMXFECState *s, int reg)
146
147
reg %= 32;
148
149
- switch (reg) {
150
- case 0: /* Basic Control */
151
- val = s->phy_control;
152
- break;
153
- case 1: /* Basic Status */
154
- val = s->phy_status;
155
- break;
156
- case 2: /* ID1 */
157
- val = 0x0007;
158
- break;
159
- case 3: /* ID2 */
160
- val = 0xc0d1;
161
- break;
162
- case 4: /* Auto-neg advertisement */
163
- val = s->phy_advertise;
164
- break;
165
- case 5: /* Auto-neg Link Partner Ability */
166
- val = 0x0f71;
167
- break;
168
- case 6: /* Auto-neg Expansion */
169
- val = 1;
170
- break;
171
- case 29: /* Interrupt source. */
172
- val = s->phy_int;
173
- s->phy_int = 0;
174
- imx_phy_update_irq(s);
175
- break;
176
- case 30: /* Interrupt mask */
177
- val = s->phy_int_mask;
178
- break;
179
- case 17:
180
- case 18:
181
- case 27:
182
- case 31:
183
- qemu_log_mask(LOG_UNIMP, "[%s.phy]%s: reg %d not implemented\n",
184
- TYPE_IMX_FEC, __func__, reg);
185
- val = 0;
186
- break;
187
- default:
188
- qemu_log_mask(LOG_GUEST_ERROR, "[%s.phy]%s: Bad address at offset %d\n",
189
- TYPE_IMX_FEC, __func__, reg);
190
- val = 0;
191
- break;
192
- }
193
-
194
- trace_imx_phy_read(val, phy, reg);
195
-
196
- return val;
197
+ return lan9118_phy_read(&s->mii, reg);
198
}
199
200
static void imx_phy_write(IMXFECState *s, int reg, uint32_t val)
201
@@ -XXX,XX +XXX,XX @@ static void imx_phy_write(IMXFECState *s, int reg, uint32_t val)
202
203
reg %= 32;
204
205
- trace_imx_phy_write(val, phy, reg);
206
-
207
- switch (reg) {
208
- case 0: /* Basic Control */
209
- if (val & 0x8000) {
210
- imx_phy_reset(s);
211
- } else {
212
- s->phy_control = val & 0x7980;
213
- /* Complete autonegotiation immediately. */
214
- if (val & 0x1000) {
215
- s->phy_status |= 0x0020;
216
- }
217
- }
218
- break;
219
- case 4: /* Auto-neg advertisement */
220
- s->phy_advertise = (val & 0x2d7f) | 0x80;
221
- break;
222
- case 30: /* Interrupt mask */
223
- s->phy_int_mask = val & 0xff;
224
- imx_phy_update_irq(s);
225
- break;
226
- case 17:
227
- case 18:
228
- case 27:
229
- case 31:
230
- qemu_log_mask(LOG_UNIMP, "[%s.phy)%s: reg %d not implemented\n",
231
- TYPE_IMX_FEC, __func__, reg);
232
- break;
233
- default:
234
- qemu_log_mask(LOG_GUEST_ERROR, "[%s.phy]%s: Bad address at offset %d\n",
235
- TYPE_IMX_FEC, __func__, reg);
236
- break;
237
- }
238
+ lan9118_phy_write(&s->mii, reg, val);
239
}
240
241
static void imx_fec_read_bd(IMXFECBufDesc *bd, dma_addr_t addr)
242
@@ -XXX,XX +XXX,XX @@ static void imx_eth_reset(DeviceState *d)
243
244
s->rx_descriptor = 0;
245
memset(s->tx_descriptor, 0, sizeof(s->tx_descriptor));
246
-
247
- /* We also reset the PHY */
248
- imx_phy_reset(s);
249
}
250
251
static uint32_t imx_default_read(IMXFECState *s, uint32_t index)
252
@@ -XXX,XX +XXX,XX @@ static void imx_eth_realize(DeviceState *dev, Error **errp)
253
sysbus_init_irq(sbd, &s->irq[0]);
254
sysbus_init_irq(sbd, &s->irq[1]);
255
256
+ qemu_init_irq(&s->mii_irq, imx_phy_update_irq, s, 0);
257
+ object_initialize_child(OBJECT(s), "mii", &s->mii, TYPE_LAN9118_PHY);
258
+ if (!sysbus_realize_and_unref(SYS_BUS_DEVICE(&s->mii), errp)) {
259
+ return;
260
+ }
261
+ qdev_connect_gpio_out(DEVICE(&s->mii), 0, &s->mii_irq);
262
+
263
qemu_macaddr_default_if_unset(&s->conf.macaddr);
264
265
s->nic = qemu_new_nic(&imx_eth_net_info, &s->conf,
266
diff --git a/hw/net/lan9118_phy.c b/hw/net/lan9118_phy.c
267
index XXXXXXX..XXXXXXX 100644
268
--- a/hw/net/lan9118_phy.c
269
+++ b/hw/net/lan9118_phy.c
270
@@ -XXX,XX +XXX,XX @@
271
* Copyright (c) 2009 CodeSourcery, LLC.
272
* Written by Paul Brook
273
*
274
+ * Copyright (c) 2013 Jean-Christophe Dubois. <jcd@tribudubois.net>
275
+ *
276
* This code is licensed under the GNU GPL v2
277
*
278
* Contributions after 2012-01-13 are licensed under the terms of the
279
@@ -XXX,XX +XXX,XX @@
280
#include "hw/resettable.h"
281
#include "migration/vmstate.h"
282
#include "qemu/log.h"
283
+#include "trace.h"
284
285
#define PHY_INT_ENERGYON (1 << 7)
286
#define PHY_INT_AUTONEG_COMPLETE (1 << 6)
287
@@ -XXX,XX +XXX,XX @@ uint16_t lan9118_phy_read(Lan9118PhyState *s, int reg)
288
289
switch (reg) {
290
case 0: /* Basic Control */
291
- return s->control;
292
+ val = s->control;
293
+ break;
294
case 1: /* Basic Status */
295
- return s->status;
296
+ val = s->status;
297
+ break;
298
case 2: /* ID1 */
299
- return 0x0007;
300
+ val = 0x0007;
301
+ break;
302
case 3: /* ID2 */
303
- return 0xc0d1;
304
+ val = 0xc0d1;
305
+ break;
306
case 4: /* Auto-neg advertisement */
307
- return s->advertise;
308
+ val = s->advertise;
309
+ break;
310
case 5: /* Auto-neg Link Partner Ability */
311
- return 0x0f71;
312
+ val = 0x0f71;
313
+ break;
314
case 6: /* Auto-neg Expansion */
315
- return 1;
316
- /* TODO 17, 18, 27, 29, 30, 31 */
317
+ val = 1;
318
+ break;
319
case 29: /* Interrupt source. */
320
val = s->ints;
321
s->ints = 0;
322
lan9118_phy_update_irq(s);
323
- return val;
324
+ break;
325
case 30: /* Interrupt mask */
326
- return s->int_mask;
327
+ val = s->int_mask;
328
+ break;
329
+ case 17:
330
+ case 18:
331
+ case 27:
332
+ case 31:
333
+ qemu_log_mask(LOG_UNIMP, "%s: reg %d not implemented\n",
334
+ __func__, reg);
335
+ val = 0;
336
+ break;
337
default:
338
- qemu_log_mask(LOG_GUEST_ERROR,
339
- "lan9118_phy_read: PHY read reg %d\n", reg);
340
- return 0;
341
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad address at offset %d\n",
342
+ __func__, reg);
343
+ val = 0;
344
+ break;
49
}
345
}
50
+ s->is_nonstreaming = true;
346
+
51
if (!sve_access_check(s)) {
347
+ trace_lan9118_phy_read(val, reg);
52
return true;
348
+
349
+ return val;
350
}
351
352
void lan9118_phy_write(Lan9118PhyState *s, int reg, uint16_t val)
353
{
354
+ trace_lan9118_phy_write(val, reg);
355
+
356
switch (reg) {
357
case 0: /* Basic Control */
358
if (val & 0x8000) {
359
lan9118_phy_reset(s);
360
- break;
361
- }
362
- s->control = val & 0x7980;
363
- /* Complete autonegotiation immediately. */
364
- if (val & 0x1000) {
365
- s->status |= 0x0020;
366
+ } else {
367
+ s->control = val & 0x7980;
368
+ /* Complete autonegotiation immediately. */
369
+ if (val & 0x1000) {
370
+ s->status |= 0x0020;
371
+ }
372
}
373
break;
374
case 4: /* Auto-neg advertisement */
375
s->advertise = (val & 0x2d7f) | 0x80;
376
break;
377
- /* TODO 17, 18, 27, 31 */
378
case 30: /* Interrupt mask */
379
s->int_mask = val & 0xff;
380
lan9118_phy_update_irq(s);
381
break;
382
+ case 17:
383
+ case 18:
384
+ case 27:
385
+ case 31:
386
+ qemu_log_mask(LOG_UNIMP, "%s: reg %d not implemented\n",
387
+ __func__, reg);
388
+ break;
389
default:
390
- qemu_log_mask(LOG_GUEST_ERROR,
391
- "lan9118_phy_write: PHY write reg %d = 0x%04x\n", reg, val);
392
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad address at offset %d\n",
393
+ __func__, reg);
394
+ break;
53
}
395
}
54
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
396
}
55
DO_FP3(FADD_zzz, fadd)
397
56
DO_FP3(FSUB_zzz, fsub)
398
@@ -XXX,XX +XXX,XX @@ void lan9118_phy_update_link(Lan9118PhyState *s, bool link_down)
57
DO_FP3(FMUL_zzz, fmul)
399
58
-DO_FP3(FTSMUL, ftsmul)
400
/* Autonegotiation status mirrors link status. */
59
DO_FP3(FRECPS, recps)
401
if (link_down) {
60
DO_FP3(FRSQRTS, rsqrts)
402
+ trace_lan9118_phy_update_link("down");
61
403
s->status &= ~0x0024;
62
#undef DO_FP3
404
s->ints |= PHY_INT_DOWN;
63
405
} else {
64
+static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = {
406
+ trace_lan9118_phy_update_link("up");
65
+ NULL, gen_helper_gvec_ftsmul_h,
407
s->status |= 0x0024;
66
+ gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d
408
s->ints |= PHY_INT_ENERGYON;
67
+};
409
s->ints |= PHY_INT_AUTONEG_COMPLETE;
68
+TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
410
@@ -XXX,XX +XXX,XX @@ void lan9118_phy_update_link(Lan9118PhyState *s, bool link_down)
69
+ ftsmul_fns[a->esz], a, 0)
411
70
+
412
void lan9118_phy_reset(Lan9118PhyState *s)
71
/*
413
{
72
*** SVE Floating Point Arithmetic - Predicated Group
414
+ trace_lan9118_phy_reset();
73
*/
415
+
416
s->control = 0x3000;
417
s->status = 0x7809;
418
s->advertise = 0x01e1;
419
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_lan9118_phy = {
420
.version_id = 1,
421
.minimum_version_id = 1,
422
.fields = (const VMStateField[]) {
423
- VMSTATE_UINT16(control, Lan9118PhyState),
424
VMSTATE_UINT16(status, Lan9118PhyState),
425
+ VMSTATE_UINT16(control, Lan9118PhyState),
426
VMSTATE_UINT16(advertise, Lan9118PhyState),
427
VMSTATE_UINT16(ints, Lan9118PhyState),
428
VMSTATE_UINT16(int_mask, Lan9118PhyState),
429
diff --git a/hw/net/Kconfig b/hw/net/Kconfig
430
index XXXXXXX..XXXXXXX 100644
431
--- a/hw/net/Kconfig
432
+++ b/hw/net/Kconfig
433
@@ -XXX,XX +XXX,XX @@ config ALLWINNER_SUN8I_EMAC
434
435
config IMX_FEC
436
bool
437
+ select LAN9118_PHY
438
439
config CADENCE
440
bool
441
diff --git a/hw/net/trace-events b/hw/net/trace-events
442
index XXXXXXX..XXXXXXX 100644
443
--- a/hw/net/trace-events
444
+++ b/hw/net/trace-events
445
@@ -XXX,XX +XXX,XX @@ allwinner_sun8i_emac_set_link(bool active) "Set link: active=%u"
446
allwinner_sun8i_emac_read(uint64_t offset, uint64_t val) "MMIO read: offset=0x%" PRIx64 " value=0x%" PRIx64
447
allwinner_sun8i_emac_write(uint64_t offset, uint64_t val) "MMIO write: offset=0x%" PRIx64 " value=0x%" PRIx64
448
449
+# lan9118_phy.c
450
+lan9118_phy_read(uint16_t val, int reg) "[0x%02x] -> 0x%04" PRIx16
451
+lan9118_phy_write(uint16_t val, int reg) "[0x%02x] <- 0x%04" PRIx16
452
+lan9118_phy_update_link(const char *s) "%s"
453
+lan9118_phy_reset(void) ""
454
+
455
# lance.c
456
lance_mem_readw(uint64_t addr, uint32_t ret) "addr=0x%"PRIx64"val=0x%04x"
457
lance_mem_writew(uint64_t addr, uint32_t val) "addr=0x%"PRIx64"val=0x%04x"
458
@@ -XXX,XX +XXX,XX @@ i82596_set_multicast(uint16_t count) "Added %d multicast entries"
459
i82596_channel_attention(void *s) "%p: Received CHANNEL ATTENTION"
460
461
# imx_fec.c
462
-imx_phy_read(uint32_t val, int phy, int reg) "0x%04"PRIx32" <= phy[%d].reg[%d]"
463
imx_phy_read_num(int phy, int configured) "read request from unconfigured phy %d (configured %d)"
464
-imx_phy_write(uint32_t val, int phy, int reg) "0x%04"PRIx32" => phy[%d].reg[%d]"
465
imx_phy_write_num(int phy, int configured) "write request to unconfigured phy %d (configured %d)"
466
-imx_phy_update_link(const char *s) "%s"
467
-imx_phy_reset(void) ""
468
imx_fec_read_bd(uint64_t addr, int flags, int len, int data) "tx_bd 0x%"PRIx64" flags 0x%04x len %d data 0x%08x"
469
imx_enet_read_bd(uint64_t addr, int flags, int len, int data, int options, int status) "tx_bd 0x%"PRIx64" flags 0x%04x len %d data 0x%08x option 0x%04x status 0x%04x"
470
imx_eth_tx_bd_busy(void) "tx_bd ran out of descriptors to transmit"
74
--
471
--
75
2.25.1
472
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Bernhard Beschow <shentey@gmail.com>
2
2
3
These prctl set the Streaming SVE vector length, which may
3
Turns 0x70 into 0xe0 (== 0x70 << 1) which adds the missing MII_ANLPAR_TX and
4
be completely different from the Normal SVE vector length.
4
fixes the MSB of selector field to be zero, as specified in the datasheet.
5
5
6
Fixes: 2a424990170b "LAN9118 emulation"
7
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
8
Tested-by: Guenter Roeck <linux@roeck-us.net>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-id: 20241102125724.532843-4-shentey@gmail.com
8
Message-id: 20220708151540.18136-43-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
12
---
11
linux-user/aarch64/target_prctl.h | 54 +++++++++++++++++++++++++++++++
13
hw/net/lan9118_phy.c | 2 +-
12
linux-user/syscall.c | 16 +++++++++
14
1 file changed, 1 insertion(+), 1 deletion(-)
13
2 files changed, 70 insertions(+)
14
15
15
diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h
16
diff --git a/hw/net/lan9118_phy.c b/hw/net/lan9118_phy.c
16
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
17
--- a/linux-user/aarch64/target_prctl.h
18
--- a/hw/net/lan9118_phy.c
18
+++ b/linux-user/aarch64/target_prctl.h
19
+++ b/hw/net/lan9118_phy.c
19
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_get_vl(CPUArchState *env)
20
@@ -XXX,XX +XXX,XX @@ uint16_t lan9118_phy_read(Lan9118PhyState *s, int reg)
20
{
21
val = s->advertise;
21
ARMCPU *cpu = env_archcpu(env);
22
break;
22
if (cpu_isar_feature(aa64_sve, cpu)) {
23
case 5: /* Auto-neg Link Partner Ability */
23
+ /* PSTATE.SM is always unset on syscall entry. */
24
- val = 0x0f71;
24
return sve_vq(env) * 16;
25
+ val = 0x0fe1;
25
}
26
break;
26
return -TARGET_EINVAL;
27
case 6: /* Auto-neg Expansion */
27
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
28
val = 1;
28
&& arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) {
29
uint32_t vq, old_vq;
30
31
+ /* PSTATE.SM is always unset on syscall entry. */
32
old_vq = sve_vq(env);
33
34
/*
35
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
36
}
37
#define do_prctl_sve_set_vl do_prctl_sve_set_vl
38
39
+static abi_long do_prctl_sme_get_vl(CPUArchState *env)
40
+{
41
+ ARMCPU *cpu = env_archcpu(env);
42
+ if (cpu_isar_feature(aa64_sme, cpu)) {
43
+ return sme_vq(env) * 16;
44
+ }
45
+ return -TARGET_EINVAL;
46
+}
47
+#define do_prctl_sme_get_vl do_prctl_sme_get_vl
48
+
49
+static abi_long do_prctl_sme_set_vl(CPUArchState *env, abi_long arg2)
50
+{
51
+ /*
52
+ * We cannot support either PR_SME_SET_VL_ONEXEC or PR_SME_VL_INHERIT.
53
+ * Note the kernel definition of sve_vl_valid allows for VQ=512,
54
+ * i.e. VL=8192, even though the architectural maximum is VQ=16.
55
+ */
56
+ if (cpu_isar_feature(aa64_sme, env_archcpu(env))
57
+ && arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) {
58
+ int vq, old_vq;
59
+
60
+ old_vq = sme_vq(env);
61
+
62
+ /*
63
+ * Bound the value of vq, so that we know that it fits into
64
+ * the 4-bit field in SMCR_EL1. Because PSTATE.SM is cleared
65
+ * on syscall entry, we are not modifying the current SVE
66
+ * vector length.
67
+ */
68
+ vq = MAX(arg2 / 16, 1);
69
+ vq = MIN(vq, 16);
70
+ env->vfp.smcr_el[1] =
71
+ FIELD_DP64(env->vfp.smcr_el[1], SMCR, LEN, vq - 1);
72
+
73
+ /* Delay rebuilding hflags until we know if ZA must change. */
74
+ vq = sve_vqm1_for_el_sm(env, 0, true) + 1;
75
+
76
+ if (vq != old_vq) {
77
+ /*
78
+ * PSTATE.ZA state is cleared on any change to SVL.
79
+ * We need not call arm_rebuild_hflags because PSTATE.SM was
80
+ * cleared on syscall entry, so this hasn't changed VL.
81
+ */
82
+ env->svcr = FIELD_DP64(env->svcr, SVCR, ZA, 0);
83
+ arm_rebuild_hflags(env);
84
+ }
85
+ return vq * 16;
86
+ }
87
+ return -TARGET_EINVAL;
88
+}
89
+#define do_prctl_sme_set_vl do_prctl_sme_set_vl
90
+
91
static abi_long do_prctl_reset_keys(CPUArchState *env, abi_long arg2)
92
{
93
ARMCPU *cpu = env_archcpu(env);
94
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
95
index XXXXXXX..XXXXXXX 100644
96
--- a/linux-user/syscall.c
97
+++ b/linux-user/syscall.c
98
@@ -XXX,XX +XXX,XX @@ abi_long do_arch_prctl(CPUX86State *env, int code, abi_ulong addr)
99
#ifndef PR_SET_SYSCALL_USER_DISPATCH
100
# define PR_SET_SYSCALL_USER_DISPATCH 59
101
#endif
102
+#ifndef PR_SME_SET_VL
103
+# define PR_SME_SET_VL 63
104
+# define PR_SME_GET_VL 64
105
+# define PR_SME_VL_LEN_MASK 0xffff
106
+# define PR_SME_VL_INHERIT (1 << 17)
107
+#endif
108
109
#include "target_prctl.h"
110
111
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2)
112
#ifndef do_prctl_set_unalign
113
#define do_prctl_set_unalign do_prctl_inval1
114
#endif
115
+#ifndef do_prctl_sme_get_vl
116
+#define do_prctl_sme_get_vl do_prctl_inval0
117
+#endif
118
+#ifndef do_prctl_sme_set_vl
119
+#define do_prctl_sme_set_vl do_prctl_inval1
120
+#endif
121
122
static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
123
abi_long arg3, abi_long arg4, abi_long arg5)
124
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
125
return do_prctl_sve_get_vl(env);
126
case PR_SVE_SET_VL:
127
return do_prctl_sve_set_vl(env, arg2);
128
+ case PR_SME_GET_VL:
129
+ return do_prctl_sme_get_vl(env);
130
+ case PR_SME_SET_VL:
131
+ return do_prctl_sme_set_vl(env, arg2);
132
case PR_PAC_RESET_KEYS:
133
if (arg3 || arg4 || arg5) {
134
return -TARGET_EINVAL;
135
--
29
--
136
2.25.1
30
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Bernhard Beschow <shentey@gmail.com>
2
2
3
We cannot reuse the SVE functions for LD[1-4] and ST[1-4],
3
Prefer named constants over magic values for better readability.
4
because those functions accept only a Zreg register number.
5
For SME, we want to pass a pointer into ZA storage.
6
4
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
9
Message-id: 20220708151540.18136-21-richard.henderson@linaro.org
7
Tested-by: Guenter Roeck <linux@roeck-us.net>
8
Message-id: 20241102125724.532843-5-shentey@gmail.com
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
10
---
12
target/arm/helper-sme.h | 82 +++++
11
include/hw/net/mii.h | 6 +++++
13
target/arm/sme.decode | 9 +
12
hw/net/lan9118_phy.c | 63 ++++++++++++++++++++++++++++----------------
14
target/arm/sme_helper.c | 595 +++++++++++++++++++++++++++++++++++++
13
2 files changed, 46 insertions(+), 23 deletions(-)
15
target/arm/translate-sme.c | 70 +++++
16
4 files changed, 756 insertions(+)
17
14
18
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
15
diff --git a/include/hw/net/mii.h b/include/hw/net/mii.h
19
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/helper-sme.h
17
--- a/include/hw/net/mii.h
21
+++ b/target/arm/helper-sme.h
18
+++ b/include/hw/net/mii.h
22
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
19
@@ -XXX,XX +XXX,XX @@
23
DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
20
#define MII_BMSR_JABBER (1 << 1) /* Jabber detected */
24
DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
21
#define MII_BMSR_EXTCAP (1 << 0) /* Ext-reg capability */
25
DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
22
23
+#define MII_ANAR_RFAULT (1 << 13) /* Say we can detect faults */
24
#define MII_ANAR_PAUSE_ASYM (1 << 11) /* Try for asymmetric pause */
25
#define MII_ANAR_PAUSE (1 << 10) /* Try for pause */
26
#define MII_ANAR_TXFD (1 << 8)
27
@@ -XXX,XX +XXX,XX @@
28
#define MII_ANAR_10FD (1 << 6)
29
#define MII_ANAR_10 (1 << 5)
30
#define MII_ANAR_CSMACD (1 << 0)
31
+#define MII_ANAR_SELECT (0x001f) /* Selector bits */
32
33
#define MII_ANLPAR_ACK (1 << 14)
34
#define MII_ANLPAR_PAUSEASY (1 << 11) /* can pause asymmetrically */
35
@@ -XXX,XX +XXX,XX @@
36
#define RTL8201CP_PHYID1 0x0000
37
#define RTL8201CP_PHYID2 0x8201
38
39
+/* SMSC LAN9118 */
40
+#define SMSCLAN9118_PHYID1 0x0007
41
+#define SMSCLAN9118_PHYID2 0xc0d1
26
+
42
+
27
+DEF_HELPER_FLAGS_5(sme_ld1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
43
/* RealTek 8211E */
28
+DEF_HELPER_FLAGS_5(sme_ld1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
44
#define RTL8211E_PHYID1 0x001c
29
+DEF_HELPER_FLAGS_5(sme_ld1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
45
#define RTL8211E_PHYID2 0xc915
30
+DEF_HELPER_FLAGS_5(sme_ld1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
46
diff --git a/hw/net/lan9118_phy.c b/hw/net/lan9118_phy.c
31
+
32
+DEF_HELPER_FLAGS_5(sme_ld1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
33
+DEF_HELPER_FLAGS_5(sme_ld1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
34
+DEF_HELPER_FLAGS_5(sme_ld1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
35
+DEF_HELPER_FLAGS_5(sme_ld1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
36
+DEF_HELPER_FLAGS_5(sme_ld1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
37
+DEF_HELPER_FLAGS_5(sme_ld1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
38
+DEF_HELPER_FLAGS_5(sme_ld1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
39
+DEF_HELPER_FLAGS_5(sme_ld1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
40
+
41
+DEF_HELPER_FLAGS_5(sme_ld1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
42
+DEF_HELPER_FLAGS_5(sme_ld1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
43
+DEF_HELPER_FLAGS_5(sme_ld1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
44
+DEF_HELPER_FLAGS_5(sme_ld1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
45
+DEF_HELPER_FLAGS_5(sme_ld1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
46
+DEF_HELPER_FLAGS_5(sme_ld1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
47
+DEF_HELPER_FLAGS_5(sme_ld1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
48
+DEF_HELPER_FLAGS_5(sme_ld1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
49
+
50
+DEF_HELPER_FLAGS_5(sme_ld1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
51
+DEF_HELPER_FLAGS_5(sme_ld1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
52
+DEF_HELPER_FLAGS_5(sme_ld1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
53
+DEF_HELPER_FLAGS_5(sme_ld1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
54
+DEF_HELPER_FLAGS_5(sme_ld1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
55
+DEF_HELPER_FLAGS_5(sme_ld1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
56
+DEF_HELPER_FLAGS_5(sme_ld1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
57
+DEF_HELPER_FLAGS_5(sme_ld1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
58
+
59
+DEF_HELPER_FLAGS_5(sme_ld1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
60
+DEF_HELPER_FLAGS_5(sme_ld1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
61
+DEF_HELPER_FLAGS_5(sme_ld1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
62
+DEF_HELPER_FLAGS_5(sme_ld1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
63
+DEF_HELPER_FLAGS_5(sme_ld1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
64
+DEF_HELPER_FLAGS_5(sme_ld1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
65
+DEF_HELPER_FLAGS_5(sme_ld1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
66
+DEF_HELPER_FLAGS_5(sme_ld1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
67
+
68
+DEF_HELPER_FLAGS_5(sme_st1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
69
+DEF_HELPER_FLAGS_5(sme_st1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
70
+DEF_HELPER_FLAGS_5(sme_st1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
71
+DEF_HELPER_FLAGS_5(sme_st1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
72
+
73
+DEF_HELPER_FLAGS_5(sme_st1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
74
+DEF_HELPER_FLAGS_5(sme_st1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
75
+DEF_HELPER_FLAGS_5(sme_st1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
76
+DEF_HELPER_FLAGS_5(sme_st1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
77
+DEF_HELPER_FLAGS_5(sme_st1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
78
+DEF_HELPER_FLAGS_5(sme_st1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
79
+DEF_HELPER_FLAGS_5(sme_st1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
80
+DEF_HELPER_FLAGS_5(sme_st1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
81
+
82
+DEF_HELPER_FLAGS_5(sme_st1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
83
+DEF_HELPER_FLAGS_5(sme_st1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
84
+DEF_HELPER_FLAGS_5(sme_st1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
85
+DEF_HELPER_FLAGS_5(sme_st1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
86
+DEF_HELPER_FLAGS_5(sme_st1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
87
+DEF_HELPER_FLAGS_5(sme_st1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
88
+DEF_HELPER_FLAGS_5(sme_st1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
89
+DEF_HELPER_FLAGS_5(sme_st1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
90
+
91
+DEF_HELPER_FLAGS_5(sme_st1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
92
+DEF_HELPER_FLAGS_5(sme_st1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
93
+DEF_HELPER_FLAGS_5(sme_st1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
94
+DEF_HELPER_FLAGS_5(sme_st1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
95
+DEF_HELPER_FLAGS_5(sme_st1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
96
+DEF_HELPER_FLAGS_5(sme_st1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
97
+DEF_HELPER_FLAGS_5(sme_st1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
98
+DEF_HELPER_FLAGS_5(sme_st1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
99
+
100
+DEF_HELPER_FLAGS_5(sme_st1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
101
+DEF_HELPER_FLAGS_5(sme_st1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
102
+DEF_HELPER_FLAGS_5(sme_st1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
103
+DEF_HELPER_FLAGS_5(sme_st1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
104
+DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
105
+DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
106
+DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
107
+DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
108
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
109
index XXXXXXX..XXXXXXX 100644
47
index XXXXXXX..XXXXXXX 100644
110
--- a/target/arm/sme.decode
48
--- a/hw/net/lan9118_phy.c
111
+++ b/target/arm/sme.decode
49
+++ b/hw/net/lan9118_phy.c
112
@@ -XXX,XX +XXX,XX @@ MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \
113
&mova to_vec=1 rs=%mova_rs
114
MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \
115
&mova to_vec=1 rs=%mova_rs esz=4
116
+
117
+### SME Memory
118
+
119
+&ldst esz rs pg rn rm za_imm v:bool st:bool
120
+
121
+LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
122
+ &ldst rs=%mova_rs
123
+LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
124
+ &ldst esz=4 rs=%mova_rs
125
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
126
index XXXXXXX..XXXXXXX 100644
127
--- a/target/arm/sme_helper.c
128
+++ b/target/arm/sme_helper.c
129
@@ -XXX,XX +XXX,XX @@
50
@@ -XXX,XX +XXX,XX @@
130
51
131
#include "qemu/osdep.h"
52
#include "qemu/osdep.h"
132
#include "cpu.h"
53
#include "hw/net/lan9118_phy.h"
133
+#include "internals.h"
54
+#include "hw/net/mii.h"
134
#include "tcg/tcg-gvec-desc.h"
55
#include "hw/irq.h"
135
#include "exec/helper-proto.h"
56
#include "hw/resettable.h"
136
+#include "exec/cpu_ldst.h"
57
#include "migration/vmstate.h"
137
+#include "exec/exec-all.h"
58
@@ -XXX,XX +XXX,XX @@ uint16_t lan9118_phy_read(Lan9118PhyState *s, int reg)
138
#include "qemu/int128.h"
59
uint16_t val;
139
#include "vec_internal.h"
60
140
+#include "sve_ldst_internal.h"
61
switch (reg) {
141
62
- case 0: /* Basic Control */
142
/* ResetSVEState */
63
+ case MII_BMCR:
143
void arm_reset_sve_state(CPUARMState *env)
64
val = s->control;
144
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc)
65
break;
145
}
66
- case 1: /* Basic Status */
146
67
+ case MII_BMSR:
147
#undef DO_MOVA_Z
68
val = s->status;
148
+
69
break;
149
+/*
70
- case 2: /* ID1 */
150
+ * Clear elements in a tile slice comprising len bytes.
71
- val = 0x0007;
151
+ */
72
+ case MII_PHYID1:
152
+
73
+ val = SMSCLAN9118_PHYID1;
153
+typedef void ClearFn(void *ptr, size_t off, size_t len);
74
break;
154
+
75
- case 3: /* ID2 */
155
+static void clear_horizontal(void *ptr, size_t off, size_t len)
76
- val = 0xc0d1;
156
+{
77
+ case MII_PHYID2:
157
+ memset(ptr + off, 0, len);
78
+ val = SMSCLAN9118_PHYID2;
158
+}
79
break;
159
+
80
- case 4: /* Auto-neg advertisement */
160
+static void clear_vertical_b(void *vptr, size_t off, size_t len)
81
+ case MII_ANAR:
161
+{
82
val = s->advertise;
162
+ for (size_t i = 0; i < len; ++i) {
83
break;
163
+ *(uint8_t *)(vptr + tile_vslice_offset(i + off)) = 0;
84
- case 5: /* Auto-neg Link Partner Ability */
164
+ }
85
- val = 0x0fe1;
165
+}
86
+ case MII_ANLPAR:
166
+
87
+ val = MII_ANLPAR_PAUSEASY | MII_ANLPAR_PAUSE | MII_ANLPAR_T4 |
167
+static void clear_vertical_h(void *vptr, size_t off, size_t len)
88
+ MII_ANLPAR_TXFD | MII_ANLPAR_TX | MII_ANLPAR_10FD |
168
+{
89
+ MII_ANLPAR_10 | MII_ANLPAR_CSMACD;
169
+ for (size_t i = 0; i < len; i += 2) {
90
break;
170
+ *(uint16_t *)(vptr + tile_vslice_offset(i + off)) = 0;
91
- case 6: /* Auto-neg Expansion */
171
+ }
92
- val = 1;
172
+}
93
+ case MII_ANER:
173
+
94
+ val = MII_ANER_NWAY;
174
+static void clear_vertical_s(void *vptr, size_t off, size_t len)
95
break;
175
+{
96
case 29: /* Interrupt source. */
176
+ for (size_t i = 0; i < len; i += 4) {
97
val = s->ints;
177
+ *(uint32_t *)(vptr + tile_vslice_offset(i + off)) = 0;
98
@@ -XXX,XX +XXX,XX @@ void lan9118_phy_write(Lan9118PhyState *s, int reg, uint16_t val)
178
+ }
99
trace_lan9118_phy_write(val, reg);
179
+}
100
180
+
101
switch (reg) {
181
+static void clear_vertical_d(void *vptr, size_t off, size_t len)
102
- case 0: /* Basic Control */
182
+{
103
- if (val & 0x8000) {
183
+ for (size_t i = 0; i < len; i += 8) {
104
+ case MII_BMCR:
184
+ *(uint64_t *)(vptr + tile_vslice_offset(i + off)) = 0;
105
+ if (val & MII_BMCR_RESET) {
185
+ }
106
lan9118_phy_reset(s);
186
+}
107
} else {
187
+
108
- s->control = val & 0x7980;
188
+static void clear_vertical_q(void *vptr, size_t off, size_t len)
109
+ s->control = val & (MII_BMCR_LOOPBACK | MII_BMCR_SPEED100 |
189
+{
110
+ MII_BMCR_AUTOEN | MII_BMCR_PDOWN | MII_BMCR_FD |
190
+ for (size_t i = 0; i < len; i += 16) {
111
+ MII_BMCR_CTST);
191
+ memset(vptr + tile_vslice_offset(i + off), 0, 16);
112
/* Complete autonegotiation immediately. */
192
+ }
113
- if (val & 0x1000) {
193
+}
114
- s->status |= 0x0020;
194
+
115
+ if (val & MII_BMCR_AUTOEN) {
195
+/*
116
+ s->status |= MII_BMSR_AN_COMP;
196
+ * Copy elements from an array into a tile slice comprising len bytes.
117
}
197
+ */
118
}
198
+
119
break;
199
+typedef void CopyFn(void *dst, const void *src, size_t len);
120
- case 4: /* Auto-neg advertisement */
200
+
121
- s->advertise = (val & 0x2d7f) | 0x80;
201
+static void copy_horizontal(void *dst, const void *src, size_t len)
122
+ case MII_ANAR:
202
+{
123
+ s->advertise = (val & (MII_ANAR_RFAULT | MII_ANAR_PAUSE_ASYM |
203
+ memcpy(dst, src, len);
124
+ MII_ANAR_PAUSE | MII_ANAR_10FD | MII_ANAR_10 |
204
+}
125
+ MII_ANAR_SELECT))
205
+
126
+ | MII_ANAR_TX;
206
+static void copy_vertical_b(void *vdst, const void *vsrc, size_t len)
127
break;
207
+{
128
case 30: /* Interrupt mask */
208
+ const uint8_t *src = vsrc;
129
s->int_mask = val & 0xff;
209
+ uint8_t *dst = vdst;
130
@@ -XXX,XX +XXX,XX @@ void lan9118_phy_update_link(Lan9118PhyState *s, bool link_down)
210
+ size_t i;
131
/* Autonegotiation status mirrors link status. */
211
+
132
if (link_down) {
212
+ for (i = 0; i < len; ++i) {
133
trace_lan9118_phy_update_link("down");
213
+ dst[tile_vslice_index(i)] = src[i];
134
- s->status &= ~0x0024;
214
+ }
135
+ s->status &= ~(MII_BMSR_AN_COMP | MII_BMSR_LINK_ST);
215
+}
136
s->ints |= PHY_INT_DOWN;
216
+
137
} else {
217
+static void copy_vertical_h(void *vdst, const void *vsrc, size_t len)
138
trace_lan9118_phy_update_link("up");
218
+{
139
- s->status |= 0x0024;
219
+ const uint16_t *src = vsrc;
140
+ s->status |= MII_BMSR_AN_COMP | MII_BMSR_LINK_ST;
220
+ uint16_t *dst = vdst;
141
s->ints |= PHY_INT_ENERGYON;
221
+ size_t i;
142
s->ints |= PHY_INT_AUTONEG_COMPLETE;
222
+
143
}
223
+ for (i = 0; i < len / 2; ++i) {
144
@@ -XXX,XX +XXX,XX @@ void lan9118_phy_reset(Lan9118PhyState *s)
224
+ dst[tile_vslice_index(i)] = src[i];
145
{
225
+ }
146
trace_lan9118_phy_reset();
226
+}
147
227
+
148
- s->control = 0x3000;
228
+static void copy_vertical_s(void *vdst, const void *vsrc, size_t len)
149
- s->status = 0x7809;
229
+{
150
- s->advertise = 0x01e1;
230
+ const uint32_t *src = vsrc;
151
+ s->control = MII_BMCR_AUTOEN | MII_BMCR_SPEED100;
231
+ uint32_t *dst = vdst;
152
+ s->status = MII_BMSR_100TX_FD
232
+ size_t i;
153
+ | MII_BMSR_100TX_HD
233
+
154
+ | MII_BMSR_10T_FD
234
+ for (i = 0; i < len / 4; ++i) {
155
+ | MII_BMSR_10T_HD
235
+ dst[tile_vslice_index(i)] = src[i];
156
+ | MII_BMSR_AUTONEG
236
+ }
157
+ | MII_BMSR_EXTCAP;
237
+}
158
+ s->advertise = MII_ANAR_TXFD
238
+
159
+ | MII_ANAR_TX
239
+static void copy_vertical_d(void *vdst, const void *vsrc, size_t len)
160
+ | MII_ANAR_10FD
240
+{
161
+ | MII_ANAR_10
241
+ const uint64_t *src = vsrc;
162
+ | MII_ANAR_CSMACD;
242
+ uint64_t *dst = vdst;
163
s->int_mask = 0;
243
+ size_t i;
164
s->ints = 0;
244
+
165
lan9118_phy_update_link(s, s->link_down);
245
+ for (i = 0; i < len / 8; ++i) {
246
+ dst[tile_vslice_index(i)] = src[i];
247
+ }
248
+}
249
+
250
+static void copy_vertical_q(void *vdst, const void *vsrc, size_t len)
251
+{
252
+ for (size_t i = 0; i < len; i += 16) {
253
+ memcpy(vdst + tile_vslice_offset(i), vsrc + i, 16);
254
+ }
255
+}
256
+
257
+/*
258
+ * Host and TLB primitives for vertical tile slice addressing.
259
+ */
260
+
261
+#define DO_LD(NAME, TYPE, HOST, TLB) \
262
+static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \
263
+{ \
264
+ TYPE val = HOST(host); \
265
+ *(TYPE *)(za + tile_vslice_offset(off)) = val; \
266
+} \
267
+static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \
268
+ intptr_t off, target_ulong addr, uintptr_t ra) \
269
+{ \
270
+ TYPE val = TLB(env, useronly_clean_ptr(addr), ra); \
271
+ *(TYPE *)(za + tile_vslice_offset(off)) = val; \
272
+}
273
+
274
+#define DO_ST(NAME, TYPE, HOST, TLB) \
275
+static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \
276
+{ \
277
+ TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \
278
+ HOST(host, val); \
279
+} \
280
+static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \
281
+ intptr_t off, target_ulong addr, uintptr_t ra) \
282
+{ \
283
+ TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \
284
+ TLB(env, useronly_clean_ptr(addr), val, ra); \
285
+}
286
+
287
+/*
288
+ * The ARMVectorReg elements are stored in host-endian 64-bit units.
289
+ * For 128-bit quantities, the sequence defined by the Elem[] pseudocode
290
+ * corresponds to storing the two 64-bit pieces in little-endian order.
291
+ */
292
+#define DO_LDQ(HNAME, VNAME, BE, HOST, TLB) \
293
+static inline void HNAME##_host(void *za, intptr_t off, void *host) \
294
+{ \
295
+ uint64_t val0 = HOST(host), val1 = HOST(host + 8); \
296
+ uint64_t *ptr = za + off; \
297
+ ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
298
+} \
299
+static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
300
+{ \
301
+ HNAME##_host(za, tile_vslice_offset(off), host); \
302
+} \
303
+static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
304
+ target_ulong addr, uintptr_t ra) \
305
+{ \
306
+ uint64_t val0 = TLB(env, useronly_clean_ptr(addr), ra); \
307
+ uint64_t val1 = TLB(env, useronly_clean_ptr(addr + 8), ra); \
308
+ uint64_t *ptr = za + off; \
309
+ ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
310
+} \
311
+static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
312
+ target_ulong addr, uintptr_t ra) \
313
+{ \
314
+ HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \
315
+}
316
+
317
+#define DO_STQ(HNAME, VNAME, BE, HOST, TLB) \
318
+static inline void HNAME##_host(void *za, intptr_t off, void *host) \
319
+{ \
320
+ uint64_t *ptr = za + off; \
321
+ HOST(host, ptr[BE]); \
322
+ HOST(host + 1, ptr[!BE]); \
323
+} \
324
+static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
325
+{ \
326
+ HNAME##_host(za, tile_vslice_offset(off), host); \
327
+} \
328
+static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
329
+ target_ulong addr, uintptr_t ra) \
330
+{ \
331
+ uint64_t *ptr = za + off; \
332
+ TLB(env, useronly_clean_ptr(addr), ptr[BE], ra); \
333
+ TLB(env, useronly_clean_ptr(addr + 8), ptr[!BE], ra); \
334
+} \
335
+static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
336
+ target_ulong addr, uintptr_t ra) \
337
+{ \
338
+ HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \
339
+}
340
+
341
+DO_LD(ld1b, uint8_t, ldub_p, cpu_ldub_data_ra)
342
+DO_LD(ld1h_be, uint16_t, lduw_be_p, cpu_lduw_be_data_ra)
343
+DO_LD(ld1h_le, uint16_t, lduw_le_p, cpu_lduw_le_data_ra)
344
+DO_LD(ld1s_be, uint32_t, ldl_be_p, cpu_ldl_be_data_ra)
345
+DO_LD(ld1s_le, uint32_t, ldl_le_p, cpu_ldl_le_data_ra)
346
+DO_LD(ld1d_be, uint64_t, ldq_be_p, cpu_ldq_be_data_ra)
347
+DO_LD(ld1d_le, uint64_t, ldq_le_p, cpu_ldq_le_data_ra)
348
+
349
+DO_LDQ(sve_ld1qq_be, sme_ld1q_be, 1, ldq_be_p, cpu_ldq_be_data_ra)
350
+DO_LDQ(sve_ld1qq_le, sme_ld1q_le, 0, ldq_le_p, cpu_ldq_le_data_ra)
351
+
352
+DO_ST(st1b, uint8_t, stb_p, cpu_stb_data_ra)
353
+DO_ST(st1h_be, uint16_t, stw_be_p, cpu_stw_be_data_ra)
354
+DO_ST(st1h_le, uint16_t, stw_le_p, cpu_stw_le_data_ra)
355
+DO_ST(st1s_be, uint32_t, stl_be_p, cpu_stl_be_data_ra)
356
+DO_ST(st1s_le, uint32_t, stl_le_p, cpu_stl_le_data_ra)
357
+DO_ST(st1d_be, uint64_t, stq_be_p, cpu_stq_be_data_ra)
358
+DO_ST(st1d_le, uint64_t, stq_le_p, cpu_stq_le_data_ra)
359
+
360
+DO_STQ(sve_st1qq_be, sme_st1q_be, 1, stq_be_p, cpu_stq_be_data_ra)
361
+DO_STQ(sve_st1qq_le, sme_st1q_le, 0, stq_le_p, cpu_stq_le_data_ra)
362
+
363
+#undef DO_LD
364
+#undef DO_ST
365
+#undef DO_LDQ
366
+#undef DO_STQ
367
+
368
+/*
369
+ * Common helper for all contiguous predicated loads.
370
+ */
371
+
372
+static inline QEMU_ALWAYS_INLINE
373
+void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
374
+ const target_ulong addr, uint32_t desc, const uintptr_t ra,
375
+ const int esz, uint32_t mtedesc, bool vertical,
376
+ sve_ldst1_host_fn *host_fn,
377
+ sve_ldst1_tlb_fn *tlb_fn,
378
+ ClearFn *clr_fn,
379
+ CopyFn *cpy_fn)
380
+{
381
+ const intptr_t reg_max = simd_oprsz(desc);
382
+ const intptr_t esize = 1 << esz;
383
+ intptr_t reg_off, reg_last;
384
+ SVEContLdSt info;
385
+ void *host;
386
+ int flags;
387
+
388
+ /* Find the active elements. */
389
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) {
390
+ /* The entire predicate was false; no load occurs. */
391
+ clr_fn(za, 0, reg_max);
392
+ return;
393
+ }
394
+
395
+ /* Probe the page(s). Exit with exception for any invalid page. */
396
+ sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_LOAD, ra);
397
+
398
+ /* Handle watchpoints for all active elements. */
399
+ sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize,
400
+ BP_MEM_READ, ra);
401
+
402
+ /*
403
+ * Handle mte checks for all active elements.
404
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
405
+ */
406
+ if (mtedesc) {
407
+ sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize,
408
+ mtedesc, ra);
409
+ }
410
+
411
+ flags = info.page[0].flags | info.page[1].flags;
412
+ if (unlikely(flags != 0)) {
413
+#ifdef CONFIG_USER_ONLY
414
+ g_assert_not_reached();
415
+#else
416
+ /*
417
+ * At least one page includes MMIO.
418
+ * Any bus operation can fail with cpu_transaction_failed,
419
+ * which for ARM will raise SyncExternal. Perform the load
420
+ * into scratch memory to preserve register state until the end.
421
+ */
422
+ ARMVectorReg scratch = { };
423
+
424
+ reg_off = info.reg_off_first[0];
425
+ reg_last = info.reg_off_last[1];
426
+ if (reg_last < 0) {
427
+ reg_last = info.reg_off_split;
428
+ if (reg_last < 0) {
429
+ reg_last = info.reg_off_last[0];
430
+ }
431
+ }
432
+
433
+ do {
434
+ uint64_t pg = vg[reg_off >> 6];
435
+ do {
436
+ if ((pg >> (reg_off & 63)) & 1) {
437
+ tlb_fn(env, &scratch, reg_off, addr + reg_off, ra);
438
+ }
439
+ reg_off += esize;
440
+ } while (reg_off & 63);
441
+ } while (reg_off <= reg_last);
442
+
443
+ cpy_fn(za, &scratch, reg_max);
444
+ return;
445
+#endif
446
+ }
447
+
448
+ /* The entire operation is in RAM, on valid pages. */
449
+
450
+ reg_off = info.reg_off_first[0];
451
+ reg_last = info.reg_off_last[0];
452
+ host = info.page[0].host;
453
+
454
+ if (!vertical) {
455
+ memset(za, 0, reg_max);
456
+ } else if (reg_off) {
457
+ clr_fn(za, 0, reg_off);
458
+ }
459
+
460
+ while (reg_off <= reg_last) {
461
+ uint64_t pg = vg[reg_off >> 6];
462
+ do {
463
+ if ((pg >> (reg_off & 63)) & 1) {
464
+ host_fn(za, reg_off, host + reg_off);
465
+ } else if (vertical) {
466
+ clr_fn(za, reg_off, esize);
467
+ }
468
+ reg_off += esize;
469
+ } while (reg_off <= reg_last && (reg_off & 63));
470
+ }
471
+
472
+ /*
473
+ * Use the slow path to manage the cross-page misalignment.
474
+ * But we know this is RAM and cannot trap.
475
+ */
476
+ reg_off = info.reg_off_split;
477
+ if (unlikely(reg_off >= 0)) {
478
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
479
+ }
480
+
481
+ reg_off = info.reg_off_first[1];
482
+ if (unlikely(reg_off >= 0)) {
483
+ reg_last = info.reg_off_last[1];
484
+ host = info.page[1].host;
485
+
486
+ do {
487
+ uint64_t pg = vg[reg_off >> 6];
488
+ do {
489
+ if ((pg >> (reg_off & 63)) & 1) {
490
+ host_fn(za, reg_off, host + reg_off);
491
+ } else if (vertical) {
492
+ clr_fn(za, reg_off, esize);
493
+ }
494
+ reg_off += esize;
495
+ } while (reg_off & 63);
496
+ } while (reg_off <= reg_last);
497
+ }
498
+}
499
+
500
+static inline QEMU_ALWAYS_INLINE
501
+void sme_ld1_mte(CPUARMState *env, void *za, uint64_t *vg,
502
+ target_ulong addr, uint32_t desc, uintptr_t ra,
503
+ const int esz, bool vertical,
504
+ sve_ldst1_host_fn *host_fn,
505
+ sve_ldst1_tlb_fn *tlb_fn,
506
+ ClearFn *clr_fn,
507
+ CopyFn *cpy_fn)
508
+{
509
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
510
+ int bit55 = extract64(addr, 55, 1);
511
+
512
+ /* Remove mtedesc from the normal sve descriptor. */
513
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
514
+
515
+ /* Perform gross MTE suppression early. */
516
+ if (!tbi_check(desc, bit55) ||
517
+ tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
518
+ mtedesc = 0;
519
+ }
520
+
521
+ sme_ld1(env, za, vg, addr, desc, ra, esz, mtedesc, vertical,
522
+ host_fn, tlb_fn, clr_fn, cpy_fn);
523
+}
524
+
525
+#define DO_LD(L, END, ESZ) \
526
+void HELPER(sme_ld1##L##END##_h)(CPUARMState *env, void *za, void *vg, \
527
+ target_ulong addr, uint32_t desc) \
528
+{ \
529
+ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \
530
+ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \
531
+ clear_horizontal, copy_horizontal); \
532
+} \
533
+void HELPER(sme_ld1##L##END##_v)(CPUARMState *env, void *za, void *vg, \
534
+ target_ulong addr, uint32_t desc) \
535
+{ \
536
+ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \
537
+ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \
538
+ clear_vertical_##L, copy_vertical_##L); \
539
+} \
540
+void HELPER(sme_ld1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \
541
+ target_ulong addr, uint32_t desc) \
542
+{ \
543
+ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \
544
+ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \
545
+ clear_horizontal, copy_horizontal); \
546
+} \
547
+void HELPER(sme_ld1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \
548
+ target_ulong addr, uint32_t desc) \
549
+{ \
550
+ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \
551
+ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \
552
+ clear_vertical_##L, copy_vertical_##L); \
553
+}
554
+
555
+DO_LD(b, , MO_8)
556
+DO_LD(h, _be, MO_16)
557
+DO_LD(h, _le, MO_16)
558
+DO_LD(s, _be, MO_32)
559
+DO_LD(s, _le, MO_32)
560
+DO_LD(d, _be, MO_64)
561
+DO_LD(d, _le, MO_64)
562
+DO_LD(q, _be, MO_128)
563
+DO_LD(q, _le, MO_128)
564
+
565
+#undef DO_LD
566
+
567
+/*
568
+ * Common helper for all contiguous predicated stores.
569
+ */
570
+
571
+static inline QEMU_ALWAYS_INLINE
572
+void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
573
+ const target_ulong addr, uint32_t desc, const uintptr_t ra,
574
+ const int esz, uint32_t mtedesc, bool vertical,
575
+ sve_ldst1_host_fn *host_fn,
576
+ sve_ldst1_tlb_fn *tlb_fn)
577
+{
578
+ const intptr_t reg_max = simd_oprsz(desc);
579
+ const intptr_t esize = 1 << esz;
580
+ intptr_t reg_off, reg_last;
581
+ SVEContLdSt info;
582
+ void *host;
583
+ int flags;
584
+
585
+ /* Find the active elements. */
586
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) {
587
+ /* The entire predicate was false; no store occurs. */
588
+ return;
589
+ }
590
+
591
+ /* Probe the page(s). Exit with exception for any invalid page. */
592
+ sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_STORE, ra);
593
+
594
+ /* Handle watchpoints for all active elements. */
595
+ sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize,
596
+ BP_MEM_WRITE, ra);
597
+
598
+ /*
599
+ * Handle mte checks for all active elements.
600
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
601
+ */
602
+ if (mtedesc) {
603
+ sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize,
604
+ mtedesc, ra);
605
+ }
606
+
607
+ flags = info.page[0].flags | info.page[1].flags;
608
+ if (unlikely(flags != 0)) {
609
+#ifdef CONFIG_USER_ONLY
610
+ g_assert_not_reached();
611
+#else
612
+ /*
613
+ * At least one page includes MMIO.
614
+ * Any bus operation can fail with cpu_transaction_failed,
615
+ * which for ARM will raise SyncExternal. We cannot avoid
616
+ * this fault and will leave with the store incomplete.
617
+ */
618
+ reg_off = info.reg_off_first[0];
619
+ reg_last = info.reg_off_last[1];
620
+ if (reg_last < 0) {
621
+ reg_last = info.reg_off_split;
622
+ if (reg_last < 0) {
623
+ reg_last = info.reg_off_last[0];
624
+ }
625
+ }
626
+
627
+ do {
628
+ uint64_t pg = vg[reg_off >> 6];
629
+ do {
630
+ if ((pg >> (reg_off & 63)) & 1) {
631
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
632
+ }
633
+ reg_off += esize;
634
+ } while (reg_off & 63);
635
+ } while (reg_off <= reg_last);
636
+ return;
637
+#endif
638
+ }
639
+
640
+ reg_off = info.reg_off_first[0];
641
+ reg_last = info.reg_off_last[0];
642
+ host = info.page[0].host;
643
+
644
+ while (reg_off <= reg_last) {
645
+ uint64_t pg = vg[reg_off >> 6];
646
+ do {
647
+ if ((pg >> (reg_off & 63)) & 1) {
648
+ host_fn(za, reg_off, host + reg_off);
649
+ }
650
+ reg_off += 1 << esz;
651
+ } while (reg_off <= reg_last && (reg_off & 63));
652
+ }
653
+
654
+ /*
655
+ * Use the slow path to manage the cross-page misalignment.
656
+ * But we know this is RAM and cannot trap.
657
+ */
658
+ reg_off = info.reg_off_split;
659
+ if (unlikely(reg_off >= 0)) {
660
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
661
+ }
662
+
663
+ reg_off = info.reg_off_first[1];
664
+ if (unlikely(reg_off >= 0)) {
665
+ reg_last = info.reg_off_last[1];
666
+ host = info.page[1].host;
667
+
668
+ do {
669
+ uint64_t pg = vg[reg_off >> 6];
670
+ do {
671
+ if ((pg >> (reg_off & 63)) & 1) {
672
+ host_fn(za, reg_off, host + reg_off);
673
+ }
674
+ reg_off += 1 << esz;
675
+ } while (reg_off & 63);
676
+ } while (reg_off <= reg_last);
677
+ }
678
+}
679
+
680
+static inline QEMU_ALWAYS_INLINE
681
+void sme_st1_mte(CPUARMState *env, void *za, uint64_t *vg, target_ulong addr,
682
+ uint32_t desc, uintptr_t ra, int esz, bool vertical,
683
+ sve_ldst1_host_fn *host_fn,
684
+ sve_ldst1_tlb_fn *tlb_fn)
685
+{
686
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
687
+ int bit55 = extract64(addr, 55, 1);
688
+
689
+ /* Remove mtedesc from the normal sve descriptor. */
690
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
691
+
692
+ /* Perform gross MTE suppression early. */
693
+ if (!tbi_check(desc, bit55) ||
694
+ tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
695
+ mtedesc = 0;
696
+ }
697
+
698
+ sme_st1(env, za, vg, addr, desc, ra, esz, mtedesc,
699
+ vertical, host_fn, tlb_fn);
700
+}
701
+
702
+#define DO_ST(L, END, ESZ) \
703
+void HELPER(sme_st1##L##END##_h)(CPUARMState *env, void *za, void *vg, \
704
+ target_ulong addr, uint32_t desc) \
705
+{ \
706
+ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \
707
+ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \
708
+} \
709
+void HELPER(sme_st1##L##END##_v)(CPUARMState *env, void *za, void *vg, \
710
+ target_ulong addr, uint32_t desc) \
711
+{ \
712
+ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \
713
+ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \
714
+} \
715
+void HELPER(sme_st1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \
716
+ target_ulong addr, uint32_t desc) \
717
+{ \
718
+ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \
719
+ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \
720
+} \
721
+void HELPER(sme_st1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \
722
+ target_ulong addr, uint32_t desc) \
723
+{ \
724
+ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \
725
+ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \
726
+}
727
+
728
+DO_ST(b, , MO_8)
729
+DO_ST(h, _be, MO_16)
730
+DO_ST(h, _le, MO_16)
731
+DO_ST(s, _be, MO_32)
732
+DO_ST(s, _le, MO_32)
733
+DO_ST(d, _be, MO_64)
734
+DO_ST(d, _le, MO_64)
735
+DO_ST(q, _be, MO_128)
736
+DO_ST(q, _le, MO_128)
737
+
738
+#undef DO_ST
739
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
740
index XXXXXXX..XXXXXXX 100644
741
--- a/target/arm/translate-sme.c
742
+++ b/target/arm/translate-sme.c
743
@@ -XXX,XX +XXX,XX @@ static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
744
745
return true;
746
}
747
+
748
+static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
749
+{
750
+ typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i32);
751
+
752
+ /*
753
+ * Indexed by [esz][be][v][mte][st], which is (except for load/store)
754
+ * also the order in which the elements appear in the function names,
755
+ * and so how we must concatenate the pieces.
756
+ */
757
+
758
+#define FN_LS(F) { gen_helper_sme_ld1##F, gen_helper_sme_st1##F }
759
+#define FN_MTE(F) { FN_LS(F), FN_LS(F##_mte) }
760
+#define FN_HV(F) { FN_MTE(F##_h), FN_MTE(F##_v) }
761
+#define FN_END(L, B) { FN_HV(L), FN_HV(B) }
762
+
763
+ static GenLdSt1 * const fns[5][2][2][2][2] = {
764
+ FN_END(b, b),
765
+ FN_END(h_le, h_be),
766
+ FN_END(s_le, s_be),
767
+ FN_END(d_le, d_be),
768
+ FN_END(q_le, q_be),
769
+ };
770
+
771
+#undef FN_LS
772
+#undef FN_MTE
773
+#undef FN_HV
774
+#undef FN_END
775
+
776
+ TCGv_ptr t_za, t_pg;
777
+ TCGv_i64 addr;
778
+ int svl, desc = 0;
779
+ bool be = s->be_data == MO_BE;
780
+ bool mte = s->mte_active[0];
781
+
782
+ if (!dc_isar_feature(aa64_sme, s)) {
783
+ return false;
784
+ }
785
+ if (!sme_smza_enabled_check(s)) {
786
+ return true;
787
+ }
788
+
789
+ t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
790
+ t_pg = pred_full_reg_ptr(s, a->pg);
791
+ addr = tcg_temp_new_i64();
792
+
793
+ tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz);
794
+ tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
795
+
796
+ if (mte) {
797
+ desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
798
+ desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
799
+ desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
800
+ desc = FIELD_DP32(desc, MTEDESC, WRITE, a->st);
801
+ desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << a->esz) - 1);
802
+ desc <<= SVE_MTEDESC_SHIFT;
803
+ } else {
804
+ addr = clean_data_tbi(s, addr);
805
+ }
806
+ svl = streaming_vec_reg_size(s);
807
+ desc = simd_desc(svl, svl, desc);
808
+
809
+ fns[a->esz][be][a->v][mte][a->st](cpu_env, t_za, t_pg, addr,
810
+ tcg_constant_i32(desc));
811
+
812
+ tcg_temp_free_ptr(t_za);
813
+ tcg_temp_free_ptr(t_pg);
814
+ tcg_temp_free_i64(addr);
815
+ return true;
816
+}
817
--
166
--
818
2.25.1
167
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Bernhard Beschow <shentey@gmail.com>
2
3
The real device advertises this mode and the device model already advertises
4
100 mbps half duplex and 10 mbps full+half duplex. So advertise this mode to
5
make the model more realistic.
2
6
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
5
Message-id: 20220708151540.18136-39-richard.henderson@linaro.org
9
Tested-by: Guenter Roeck <linux@roeck-us.net>
10
Message-id: 20241102125724.532843-6-shentey@gmail.com
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
12
---
8
linux-user/aarch64/signal.c | 3 +++
13
hw/net/lan9118_phy.c | 4 ++--
9
1 file changed, 3 insertions(+)
14
1 file changed, 2 insertions(+), 2 deletions(-)
10
15
11
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
16
diff --git a/hw/net/lan9118_phy.c b/hw/net/lan9118_phy.c
12
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
13
--- a/linux-user/aarch64/signal.c
18
--- a/hw/net/lan9118_phy.c
14
+++ b/linux-user/aarch64/signal.c
19
+++ b/hw/net/lan9118_phy.c
15
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
20
@@ -XXX,XX +XXX,XX @@ void lan9118_phy_write(Lan9118PhyState *s, int reg, uint16_t val)
16
__get_user(extra_size,
21
break;
17
&((struct target_extra_context *)ctx)->size);
22
case MII_ANAR:
18
extra = lock_user(VERIFY_READ, extra_datap, extra_size, 0);
23
s->advertise = (val & (MII_ANAR_RFAULT | MII_ANAR_PAUSE_ASYM |
19
+ if (!extra) {
24
- MII_ANAR_PAUSE | MII_ANAR_10FD | MII_ANAR_10 |
20
+ return 1;
25
- MII_ANAR_SELECT))
21
+ }
26
+ MII_ANAR_PAUSE | MII_ANAR_TXFD | MII_ANAR_10FD |
22
break;
27
+ MII_ANAR_10 | MII_ANAR_SELECT))
23
28
| MII_ANAR_TX;
24
default:
29
break;
30
case 30: /* Interrupt mask */
25
--
31
--
26
2.25.1
32
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
For IEEE fused multiply-add, the (0 * inf) + NaN case should raise
2
Invalid for the multiplication of 0 by infinity. Currently we handle
3
this in the per-architecture ifdef ladder in pickNaNMulAdd().
4
However, since this isn't really architecture specific we can hoist
5
it up to the generic code.
2
6
3
This is an SVE instruction that operates using the SVE vector
7
For the cases where the infzero test in pickNaNMulAdd was
4
length but that it is present only if SME is implemented.
8
returning 2, we can delete the check entirely and allow the
9
code to fall into the normal pick-a-NaN handling, because this
10
will return 2 anyway (input 'c' being the only NaN in this case).
11
For the cases where infzero was returning 3 to indicate "return
12
the default NaN", we must retain that "return 3".
5
13
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
14
For Arm, this looks like it might be a behaviour change because we
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
used to set float_flag_invalid | float_flag_invalid_imz only if C is
8
Message-id: 20220708151540.18136-29-richard.henderson@linaro.org
16
a quiet NaN. However, it is not, because Arm target code never looks
17
at float_flag_invalid_imz, and for the (0 * inf) + SNaN case we
18
already raised float_flag_invalid via the "abc_mask &
19
float_cmask_snan" check in pick_nan_muladd.
20
21
For any target architecture using the "default implementation" at the
22
bottom of the ifdef, this is a behaviour change but will be fixing a
23
bug (where we failed to raise the Invalid exception for (0 * inf +
24
QNaN). The architectures using the default case are:
25
* hppa
26
* i386
27
* sh4
28
* tricore
29
30
The x86, Tricore and SH4 CPU architecture manuals are clear that this
31
should have raised Invalid; HPPA is a bit vaguer but still seems
32
clear enough.
33
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
34
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
35
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
36
Message-id: 20241202131347.498124-2-peter.maydell@linaro.org
10
---
37
---
11
target/arm/sve.decode | 20 +++++++++++++
38
fpu/softfloat-parts.c.inc | 13 +++++++------
12
target/arm/translate-sve.c | 57 ++++++++++++++++++++++++++++++++++++++
39
fpu/softfloat-specialize.c.inc | 29 +----------------------------
13
2 files changed, 77 insertions(+)
40
2 files changed, 8 insertions(+), 34 deletions(-)
14
41
15
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
42
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
16
index XXXXXXX..XXXXXXX 100644
43
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sve.decode
44
--- a/fpu/softfloat-parts.c.inc
18
+++ b/target/arm/sve.decode
45
+++ b/fpu/softfloat-parts.c.inc
19
@@ -XXX,XX +XXX,XX @@ BFMLALT_zzxw 01100100 11 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2
46
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
20
47
int ab_mask, int abc_mask)
21
### SVE2 floating-point bfloat16 dot-product (indexed)
48
{
22
BFDOT_zzxz 01100100 01 1 ..... 010000 ..... ..... @rrxr_2 esz=2
49
int which;
23
+
50
+ bool infzero = (ab_mask == float_cmask_infzero);
24
+### SVE broadcast predicate element
51
25
+
52
if (unlikely(abc_mask & float_cmask_snan)) {
26
+&psel esz pd pn pm rv imm
53
float_raise(float_flag_invalid | float_flag_invalid_snan, s);
27
+%psel_rv 16:2 !function=plus_12
54
}
28
+%psel_imm_b 22:2 19:2
55
29
+%psel_imm_h 22:2 20:1
56
- which = pickNaNMulAdd(a->cls, b->cls, c->cls,
30
+%psel_imm_s 22:2
57
- ab_mask == float_cmask_infzero, s);
31
+%psel_imm_d 23:1
58
+ if (infzero) {
32
+@psel ........ .. . ... .. .. pn:4 . pm:4 . pd:4 \
59
+ /* This is (0 * inf) + NaN or (inf * 0) + NaN */
33
+ &psel rv=%psel_rv
60
+ float_raise(float_flag_invalid | float_flag_invalid_imz, s);
34
+
35
+PSEL 00100101 .. 1 ..1 .. 01 .... 0 .... 0 .... \
36
+ @psel esz=0 imm=%psel_imm_b
37
+PSEL 00100101 .. 1 .10 .. 01 .... 0 .... 0 .... \
38
+ @psel esz=1 imm=%psel_imm_h
39
+PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \
40
+ @psel esz=2 imm=%psel_imm_s
41
+PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \
42
+ @psel esz=3 imm=%psel_imm_d
43
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/arm/translate-sve.c
46
+++ b/target/arm/translate-sve.c
47
@@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
48
49
TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
50
TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)
51
+
52
+static bool trans_PSEL(DisasContext *s, arg_psel *a)
53
+{
54
+ int vl = vec_full_reg_size(s);
55
+ int pl = pred_gvec_reg_size(s);
56
+ int elements = vl >> a->esz;
57
+ TCGv_i64 tmp, didx, dbit;
58
+ TCGv_ptr ptr;
59
+
60
+ if (!dc_isar_feature(aa64_sme, s)) {
61
+ return false;
62
+ }
63
+ if (!sve_access_check(s)) {
64
+ return true;
65
+ }
61
+ }
66
+
62
+
67
+ tmp = tcg_temp_new_i64();
63
+ which = pickNaNMulAdd(a->cls, b->cls, c->cls, infzero, s);
68
+ dbit = tcg_temp_new_i64();
64
69
+ didx = tcg_temp_new_i64();
65
if (s->default_nan_mode || which == 3) {
70
+ ptr = tcg_temp_new_ptr();
66
- /*
67
- * Note that this check is after pickNaNMulAdd so that function
68
- * has an opportunity to set the Invalid flag for infzero.
69
- */
70
parts_default_nan(a, s);
71
return a;
72
}
73
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
74
index XXXXXXX..XXXXXXX 100644
75
--- a/fpu/softfloat-specialize.c.inc
76
+++ b/fpu/softfloat-specialize.c.inc
77
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
78
* the default NaN
79
*/
80
if (infzero && is_qnan(c_cls)) {
81
- float_raise(float_flag_invalid | float_flag_invalid_imz, status);
82
return 3;
83
}
84
85
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
86
* case sets InvalidOp and returns the default NaN
87
*/
88
if (infzero) {
89
- float_raise(float_flag_invalid | float_flag_invalid_imz, status);
90
return 3;
91
}
92
/* Prefer sNaN over qNaN, in the a, b, c order. */
93
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
94
* For MIPS systems that conform to IEEE754-2008, the (inf,zero,nan)
95
* case sets InvalidOp and returns the input value 'c'
96
*/
97
- if (infzero) {
98
- float_raise(float_flag_invalid | float_flag_invalid_imz, status);
99
- return 2;
100
- }
101
/* Prefer sNaN over qNaN, in the c, a, b order. */
102
if (is_snan(c_cls)) {
103
return 2;
104
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
105
* For LoongArch systems that conform to IEEE754-2008, the (inf,zero,nan)
106
* case sets InvalidOp and returns the input value 'c'
107
*/
108
- if (infzero) {
109
- float_raise(float_flag_invalid | float_flag_invalid_imz, status);
110
- return 2;
111
- }
71
+
112
+
72
+ /* Compute the predicate element. */
113
/* Prefer sNaN over qNaN, in the c, a, b order. */
73
+ tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm);
114
if (is_snan(c_cls)) {
74
+ if (is_power_of_2(elements)) {
115
return 2;
75
+ tcg_gen_andi_i64(tmp, tmp, elements - 1);
116
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
76
+ } else {
117
* to return an input NaN if we have one (ie c) rather than generating
77
+ tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements));
118
* a default NaN
78
+ }
119
*/
79
+
120
- if (infzero) {
80
+ /* Extract the predicate byte and bit indices. */
121
- float_raise(float_flag_invalid | float_flag_invalid_imz, status);
81
+ tcg_gen_shli_i64(tmp, tmp, a->esz);
122
- return 2;
82
+ tcg_gen_andi_i64(dbit, tmp, 7);
123
- }
83
+ tcg_gen_shri_i64(didx, tmp, 3);
124
84
+ if (HOST_BIG_ENDIAN) {
125
/* If fRA is a NaN return it; otherwise if fRB is a NaN return it;
85
+ tcg_gen_xori_i64(didx, didx, 7);
126
* otherwise return fRC. Note that muladd on PPC is (fRA * fRC) + frB
86
+ }
127
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
87
+
128
return 1;
88
+ /* Load the predicate word. */
129
}
89
+ tcg_gen_trunc_i64_ptr(ptr, didx);
130
#elif defined(TARGET_RISCV)
90
+ tcg_gen_add_ptr(ptr, ptr, cpu_env);
131
- /* For RISC-V, InvalidOp is set when multiplicands are Inf and zero */
91
+ tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm));
132
- if (infzero) {
92
+
133
- float_raise(float_flag_invalid | float_flag_invalid_imz, status);
93
+ /* Extract the predicate bit and replicate to MO_64. */
134
- }
94
+ tcg_gen_shr_i64(tmp, tmp, dbit);
135
return 3; /* default NaN */
95
+ tcg_gen_andi_i64(tmp, tmp, 1);
136
#elif defined(TARGET_S390X)
96
+ tcg_gen_neg_i64(tmp, tmp);
137
if (infzero) {
97
+
138
- float_raise(float_flag_invalid | float_flag_invalid_imz, status);
98
+ /* Apply to either copy the source, or write zeros. */
139
return 3;
99
+ tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd),
140
}
100
+ pred_full_reg_offset(s, a->pn), tmp, pl, pl);
141
101
+
142
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
102
+ tcg_temp_free_i64(tmp);
143
return 2;
103
+ tcg_temp_free_i64(dbit);
144
}
104
+ tcg_temp_free_i64(didx);
145
#elif defined(TARGET_SPARC)
105
+ tcg_temp_free_ptr(ptr);
146
- /* For (inf,0,nan) return c. */
106
+ return true;
147
- if (infzero) {
107
+}
148
- float_raise(float_flag_invalid | float_flag_invalid_imz, status);
149
- return 2;
150
- }
151
/* Prefer SNaN over QNaN, order C, B, A. */
152
if (is_snan(c_cls)) {
153
return 2;
154
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
155
* For Xtensa, the (inf,zero,nan) case sets InvalidOp and returns
156
* an input NaN if we have one (ie c).
157
*/
158
- if (infzero) {
159
- float_raise(float_flag_invalid | float_flag_invalid_imz, status);
160
- return 2;
161
- }
162
if (status->use_first_nan) {
163
if (is_nan(a_cls)) {
164
return 0;
108
--
165
--
109
2.25.1
166
2.34.1
diff view generated by jsdifflib
New patch
1
If the target sets default_nan_mode then we're always going to return
2
the default NaN, and pickNaNMulAdd() no longer has any side effects.
3
For consistency with pickNaN(), check for default_nan_mode before
4
calling pickNaNMulAdd().
1
5
6
When we convert pickNaNMulAdd() to allow runtime selection of the NaN
7
propagation rule, this means we won't have to make the targets which
8
use default_nan_mode also set a propagation rule.
9
10
Since RiscV always uses default_nan_mode, this allows us to remove
11
its ifdef case from pickNaNMulAdd().
12
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
Message-id: 20241202131347.498124-3-peter.maydell@linaro.org
16
---
17
fpu/softfloat-parts.c.inc | 8 ++++++--
18
fpu/softfloat-specialize.c.inc | 9 +++++++--
19
2 files changed, 13 insertions(+), 4 deletions(-)
20
21
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
22
index XXXXXXX..XXXXXXX 100644
23
--- a/fpu/softfloat-parts.c.inc
24
+++ b/fpu/softfloat-parts.c.inc
25
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
26
float_raise(float_flag_invalid | float_flag_invalid_imz, s);
27
}
28
29
- which = pickNaNMulAdd(a->cls, b->cls, c->cls, infzero, s);
30
+ if (s->default_nan_mode) {
31
+ which = 3;
32
+ } else {
33
+ which = pickNaNMulAdd(a->cls, b->cls, c->cls, infzero, s);
34
+ }
35
36
- if (s->default_nan_mode || which == 3) {
37
+ if (which == 3) {
38
parts_default_nan(a, s);
39
return a;
40
}
41
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
42
index XXXXXXX..XXXXXXX 100644
43
--- a/fpu/softfloat-specialize.c.inc
44
+++ b/fpu/softfloat-specialize.c.inc
45
@@ -XXX,XX +XXX,XX @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls,
46
static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
47
bool infzero, float_status *status)
48
{
49
+ /*
50
+ * We guarantee not to require the target to tell us how to
51
+ * pick a NaN if we're always returning the default NaN.
52
+ * But if we're not in default-NaN mode then the target must
53
+ * specify.
54
+ */
55
+ assert(!status->default_nan_mode);
56
#if defined(TARGET_ARM)
57
/* For ARM, the (inf,zero,qnan) case sets InvalidOp and returns
58
* the default NaN
59
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
60
} else {
61
return 1;
62
}
63
-#elif defined(TARGET_RISCV)
64
- return 3; /* default NaN */
65
#elif defined(TARGET_S390X)
66
if (infzero) {
67
return 3;
68
--
69
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
IEEE 758 does not define a fixed rule for what NaN to return in
2
2
the case of a fused multiply-add of inf * 0 + NaN. Different
3
Set the SM bit in the SVE record on signal delivery, create the ZA record.
3
architectures thus do different things:
4
Restore SM and ZA state according to the records present on return.
4
* some return the default NaN
5
5
* some return the input NaN
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
* Arm returns the default NaN if the input NaN is quiet,
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
and the input NaN if it is signalling
8
Message-id: 20220708151540.18136-41-richard.henderson@linaro.org
8
9
We want to make this logic be runtime selected rather than
10
hardcoded into the binary, because:
11
* this will let us have multiple targets in one QEMU binary
12
* the Arm FEAT_AFP architectural feature includes letting
13
the guest select a NaN propagation rule at runtime
14
15
In this commit we add an enum for the propagation rule, the field in
16
float_status, and the corresponding getters and setters. We change
17
pickNaNMulAdd to honour this, but because all targets still leave
18
this field at its default 0 value, the fallback logic will pick the
19
rule type with the old ifdef ladder.
20
21
Note that four architectures both use the muladd softfloat functions
22
and did not have a branch of the ifdef ladder to specify their
23
behaviour (and so were ending up with the "default" case, probably
24
wrongly): i386, HPPA, SH4 and Tricore. SH4 and Tricore both set
25
default_nan_mode, and so will never get into pickNaNMulAdd(). For
26
HPPA and i386 we retain the same behaviour as the old default-case,
27
which is to not ever return the default NaN. This might not be
28
correct but it is not a behaviour change.
29
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
30
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
31
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
32
Message-id: 20241202131347.498124-4-peter.maydell@linaro.org
10
---
33
---
11
linux-user/aarch64/signal.c | 167 +++++++++++++++++++++++++++++++++---
34
include/fpu/softfloat-helpers.h | 11 ++++
12
1 file changed, 154 insertions(+), 13 deletions(-)
35
include/fpu/softfloat-types.h | 23 +++++++++
13
36
fpu/softfloat-specialize.c.inc | 91 ++++++++++++++++++++++-----------
14
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
37
3 files changed, 95 insertions(+), 30 deletions(-)
38
39
diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h
15
index XXXXXXX..XXXXXXX 100644
40
index XXXXXXX..XXXXXXX 100644
16
--- a/linux-user/aarch64/signal.c
41
--- a/include/fpu/softfloat-helpers.h
17
+++ b/linux-user/aarch64/signal.c
42
+++ b/include/fpu/softfloat-helpers.h
18
@@ -XXX,XX +XXX,XX @@ struct target_sve_context {
43
@@ -XXX,XX +XXX,XX @@ static inline void set_float_2nan_prop_rule(Float2NaNPropRule rule,
19
44
status->float_2nan_prop_rule = rule;
20
#define TARGET_SVE_SIG_FLAG_SM 1
21
22
+#define TARGET_ZA_MAGIC 0x54366345
23
+
24
+struct target_za_context {
25
+ struct target_aarch64_ctx head;
26
+ uint16_t vl;
27
+ uint16_t reserved[3];
28
+ /* The actual ZA data immediately follows. */
29
+};
30
+
31
+#define TARGET_ZA_SIG_REGS_OFFSET \
32
+ QEMU_ALIGN_UP(sizeof(struct target_za_context), TARGET_SVE_VQ_BYTES)
33
+#define TARGET_ZA_SIG_ZAV_OFFSET(VQ, N) \
34
+ (TARGET_ZA_SIG_REGS_OFFSET + (VQ) * TARGET_SVE_VQ_BYTES * (N))
35
+#define TARGET_ZA_SIG_CONTEXT_SIZE(VQ) \
36
+ TARGET_ZA_SIG_ZAV_OFFSET(VQ, VQ * TARGET_SVE_VQ_BYTES)
37
+
38
struct target_rt_sigframe {
39
struct target_siginfo info;
40
struct target_ucontext uc;
41
@@ -XXX,XX +XXX,XX @@ static void target_setup_end_record(struct target_aarch64_ctx *end)
42
}
45
}
43
46
44
static void target_setup_sve_record(struct target_sve_context *sve,
47
+static inline void set_float_infzeronan_rule(FloatInfZeroNaNRule rule,
45
- CPUARMState *env, int vq, int size)
48
+ float_status *status)
46
+ CPUARMState *env, int size)
49
+{
50
+ status->float_infzeronan_rule = rule;
51
+}
52
+
53
static inline void set_flush_to_zero(bool val, float_status *status)
47
{
54
{
48
- int i, j;
55
status->flush_to_zero = val;
49
+ int i, j, vq = sve_vq(env);
56
@@ -XXX,XX +XXX,XX @@ static inline Float2NaNPropRule get_float_2nan_prop_rule(float_status *status)
50
57
return status->float_2nan_prop_rule;
51
memset(sve, 0, sizeof(*sve));
52
__put_user(TARGET_SVE_MAGIC, &sve->head.magic);
53
@@ -XXX,XX +XXX,XX @@ static void target_setup_sve_record(struct target_sve_context *sve,
54
}
55
}
58
}
56
59
57
+static void target_setup_za_record(struct target_za_context *za,
60
+static inline FloatInfZeroNaNRule get_float_infzeronan_rule(float_status *status)
58
+ CPUARMState *env, int size)
59
+{
61
+{
60
+ int vq = sme_vq(env);
62
+ return status->float_infzeronan_rule;
61
+ int vl = vq * TARGET_SVE_VQ_BYTES;
63
+}
62
+ int i, j;
64
+
63
+
65
static inline bool get_flush_to_zero(float_status *status)
64
+ memset(za, 0, sizeof(*za));
66
{
65
+ __put_user(TARGET_ZA_MAGIC, &za->head.magic);
67
return status->flush_to_zero;
66
+ __put_user(size, &za->head.size);
68
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
67
+ __put_user(vl, &za->vl);
69
index XXXXXXX..XXXXXXX 100644
68
+
70
--- a/include/fpu/softfloat-types.h
69
+ if (size == TARGET_ZA_SIG_CONTEXT_SIZE(0)) {
71
+++ b/include/fpu/softfloat-types.h
70
+ return;
72
@@ -XXX,XX +XXX,XX @@ typedef enum __attribute__((__packed__)) {
71
+ }
73
float_2nan_prop_x87,
72
+ assert(size == TARGET_ZA_SIG_CONTEXT_SIZE(vq));
74
} Float2NaNPropRule;
73
+
75
74
+ /*
76
+/*
75
+ * Note that ZA vectors are stored as a byte stream,
77
+ * Rule for result of fused multiply-add 0 * Inf + NaN.
76
+ * with each byte element at a subsequent address.
78
+ * This must be a NaN, but implementations differ on whether this
77
+ */
79
+ * is the input NaN or the default NaN.
78
+ for (i = 0; i < vl; ++i) {
80
+ *
79
+ uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
81
+ * You don't need to set this if default_nan_mode is enabled.
80
+ for (j = 0; j < vq * 2; ++j) {
82
+ * When not in default-NaN mode, it is an error for the target
81
+ __put_user_e(env->zarray[i].d[j], z + j, le);
83
+ * not to set the rule in float_status if it uses muladd, and we
84
+ * will assert if we need to handle an input NaN and no rule was
85
+ * selected.
86
+ */
87
+typedef enum __attribute__((__packed__)) {
88
+ /* No propagation rule specified */
89
+ float_infzeronan_none = 0,
90
+ /* Result is never the default NaN (so always the input NaN) */
91
+ float_infzeronan_dnan_never,
92
+ /* Result is always the default NaN */
93
+ float_infzeronan_dnan_always,
94
+ /* Result is the default NaN if the input NaN is quiet */
95
+ float_infzeronan_dnan_if_qnan,
96
+} FloatInfZeroNaNRule;
97
+
98
/*
99
* Floating Point Status. Individual architectures may maintain
100
* several versions of float_status for different functions. The
101
@@ -XXX,XX +XXX,XX @@ typedef struct float_status {
102
FloatRoundMode float_rounding_mode;
103
FloatX80RoundPrec floatx80_rounding_precision;
104
Float2NaNPropRule float_2nan_prop_rule;
105
+ FloatInfZeroNaNRule float_infzeronan_rule;
106
bool tininess_before_rounding;
107
/* should denormalised results go to zero and set the inexact flag? */
108
bool flush_to_zero;
109
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
110
index XXXXXXX..XXXXXXX 100644
111
--- a/fpu/softfloat-specialize.c.inc
112
+++ b/fpu/softfloat-specialize.c.inc
113
@@ -XXX,XX +XXX,XX @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls,
114
static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
115
bool infzero, float_status *status)
116
{
117
+ FloatInfZeroNaNRule rule = status->float_infzeronan_rule;
118
+
119
/*
120
* We guarantee not to require the target to tell us how to
121
* pick a NaN if we're always returning the default NaN.
122
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
123
* specify.
124
*/
125
assert(!status->default_nan_mode);
126
+
127
+ if (rule == float_infzeronan_none) {
128
+ /*
129
+ * Temporarily fall back to ifdef ladder
130
+ */
131
#if defined(TARGET_ARM)
132
- /* For ARM, the (inf,zero,qnan) case sets InvalidOp and returns
133
- * the default NaN
134
- */
135
- if (infzero && is_qnan(c_cls)) {
136
- return 3;
137
+ /*
138
+ * For ARM, the (inf,zero,qnan) case returns the default NaN,
139
+ * but (inf,zero,snan) returns the input NaN.
140
+ */
141
+ rule = float_infzeronan_dnan_if_qnan;
142
+#elif defined(TARGET_MIPS)
143
+ if (snan_bit_is_one(status)) {
144
+ /*
145
+ * For MIPS systems that conform to IEEE754-1985, the (inf,zero,nan)
146
+ * case sets InvalidOp and returns the default NaN
147
+ */
148
+ rule = float_infzeronan_dnan_always;
149
+ } else {
150
+ /*
151
+ * For MIPS systems that conform to IEEE754-2008, the (inf,zero,nan)
152
+ * case sets InvalidOp and returns the input value 'c'
153
+ */
154
+ rule = float_infzeronan_dnan_never;
155
+ }
156
+#elif defined(TARGET_PPC) || defined(TARGET_SPARC) || \
157
+ defined(TARGET_XTENSA) || defined(TARGET_HPPA) || \
158
+ defined(TARGET_I386) || defined(TARGET_LOONGARCH)
159
+ /*
160
+ * For LoongArch systems that conform to IEEE754-2008, the (inf,zero,nan)
161
+ * case sets InvalidOp and returns the input value 'c'
162
+ */
163
+ /*
164
+ * For PPC, the (inf,zero,qnan) case sets InvalidOp, but we prefer
165
+ * to return an input NaN if we have one (ie c) rather than generating
166
+ * a default NaN
167
+ */
168
+ rule = float_infzeronan_dnan_never;
169
+#elif defined(TARGET_S390X)
170
+ rule = float_infzeronan_dnan_always;
171
+#endif
172
}
173
174
+ if (infzero) {
175
+ /*
176
+ * Inf * 0 + NaN -- some implementations return the default NaN here,
177
+ * and some return the input NaN.
178
+ */
179
+ switch (rule) {
180
+ case float_infzeronan_dnan_never:
181
+ return 2;
182
+ case float_infzeronan_dnan_always:
183
+ return 3;
184
+ case float_infzeronan_dnan_if_qnan:
185
+ return is_qnan(c_cls) ? 3 : 2;
186
+ default:
187
+ g_assert_not_reached();
82
+ }
188
+ }
83
+ }
189
+ }
84
+}
190
+
85
+
191
+#if defined(TARGET_ARM)
86
static void target_restore_general_frame(CPUARMState *env,
192
+
87
struct target_rt_sigframe *sf)
193
/* This looks different from the ARM ARM pseudocode, because the ARM ARM
88
{
194
* puts the operands to a fused mac operation (a*b)+c in the order c,a,b.
89
@@ -XXX,XX +XXX,XX @@ static void target_restore_fpsimd_record(CPUARMState *env,
195
*/
90
196
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
91
static bool target_restore_sve_record(CPUARMState *env,
197
}
92
struct target_sve_context *sve,
198
#elif defined(TARGET_MIPS)
93
- int size)
199
if (snan_bit_is_one(status)) {
94
+ int size, int *svcr)
200
- /*
95
{
201
- * For MIPS systems that conform to IEEE754-1985, the (inf,zero,nan)
96
- int i, j, vl, vq;
202
- * case sets InvalidOp and returns the default NaN
97
+ int i, j, vl, vq, flags;
203
- */
98
+ bool sm;
204
- if (infzero) {
99
205
- return 3;
100
- if (!cpu_isar_feature(aa64_sve, env_archcpu(env))) {
206
- }
101
+ __get_user(vl, &sve->vl);
207
/* Prefer sNaN over qNaN, in the a, b, c order. */
102
+ __get_user(flags, &sve->flags);
208
if (is_snan(a_cls)) {
103
+
209
return 0;
104
+ sm = flags & TARGET_SVE_SIG_FLAG_SM;
210
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
105
+
211
return 2;
106
+ /* The cpu must support Streaming or Non-streaming SVE. */
212
}
107
+ if (sm
213
} else {
108
+ ? !cpu_isar_feature(aa64_sme, env_archcpu(env))
214
- /*
109
+ : !cpu_isar_feature(aa64_sve, env_archcpu(env))) {
215
- * For MIPS systems that conform to IEEE754-2008, the (inf,zero,nan)
110
return false;
216
- * case sets InvalidOp and returns the input value 'c'
111
}
217
- */
112
218
/* Prefer sNaN over qNaN, in the c, a, b order. */
113
- __get_user(vl, &sve->vl);
219
if (is_snan(c_cls)) {
114
- vq = sve_vq(env);
220
return 2;
115
+ /*
221
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
116
+ * Note that we cannot use sve_vq() because that depends on the
222
}
117
+ * current setting of PSTATE.SM, not the state to be restored.
223
}
118
+ */
224
#elif defined(TARGET_LOONGARCH64)
119
+ vq = sve_vqm1_for_el_sm(env, 0, sm) + 1;
225
- /*
120
226
- * For LoongArch systems that conform to IEEE754-2008, the (inf,zero,nan)
121
/* Reject mismatched VL. */
227
- * case sets InvalidOp and returns the input value 'c'
122
if (vl != vq * TARGET_SVE_VQ_BYTES) {
228
- */
123
@@ -XXX,XX +XXX,XX @@ static bool target_restore_sve_record(CPUARMState *env,
229
-
124
return false;
230
/* Prefer sNaN over qNaN, in the c, a, b order. */
125
}
231
if (is_snan(c_cls)) {
126
232
return 2;
127
+ *svcr = FIELD_DP64(*svcr, SVCR, SM, sm);
233
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
128
+
234
return 1;
129
/*
235
}
130
* Note that SVE regs are stored as a byte stream, with each byte element
236
#elif defined(TARGET_PPC)
131
* at a subsequent address. This corresponds to a little-endian load
237
- /* For PPC, the (inf,zero,qnan) case sets InvalidOp, but we prefer
132
@@ -XXX,XX +XXX,XX @@ static bool target_restore_sve_record(CPUARMState *env,
238
- * to return an input NaN if we have one (ie c) rather than generating
133
return true;
239
- * a default NaN
134
}
240
- */
135
241
-
136
+static bool target_restore_za_record(CPUARMState *env,
242
/* If fRA is a NaN return it; otherwise if fRB is a NaN return it;
137
+ struct target_za_context *za,
243
* otherwise return fRC. Note that muladd on PPC is (fRA * fRC) + frB
138
+ int size, int *svcr)
244
*/
139
+{
245
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
140
+ int i, j, vl, vq;
246
return 1;
141
+
247
}
142
+ if (!cpu_isar_feature(aa64_sme, env_archcpu(env))) {
248
#elif defined(TARGET_S390X)
143
+ return false;
249
- if (infzero) {
144
+ }
250
- return 3;
145
+
251
- }
146
+ __get_user(vl, &za->vl);
252
-
147
+ vq = sme_vq(env);
253
if (is_snan(a_cls)) {
148
+
254
return 0;
149
+ /* Reject mismatched VL. */
255
} else if (is_snan(b_cls)) {
150
+ if (vl != vq * TARGET_SVE_VQ_BYTES) {
151
+ return false;
152
+ }
153
+
154
+ /* Accept empty record -- used to clear PSTATE.ZA. */
155
+ if (size <= TARGET_ZA_SIG_CONTEXT_SIZE(0)) {
156
+ return true;
157
+ }
158
+
159
+ /* Reject non-empty but incomplete record. */
160
+ if (size < TARGET_ZA_SIG_CONTEXT_SIZE(vq)) {
161
+ return false;
162
+ }
163
+
164
+ *svcr = FIELD_DP64(*svcr, SVCR, ZA, 1);
165
+
166
+ for (i = 0; i < vl; ++i) {
167
+ uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
168
+ for (j = 0; j < vq * 2; ++j) {
169
+ __get_user_e(env->zarray[i].d[j], z + j, le);
170
+ }
171
+ }
172
+ return true;
173
+}
174
+
175
static int target_restore_sigframe(CPUARMState *env,
176
struct target_rt_sigframe *sf)
177
{
178
struct target_aarch64_ctx *ctx, *extra = NULL;
179
struct target_fpsimd_context *fpsimd = NULL;
180
struct target_sve_context *sve = NULL;
181
+ struct target_za_context *za = NULL;
182
uint64_t extra_datap = 0;
183
bool used_extra = false;
184
int sve_size = 0;
185
+ int za_size = 0;
186
+ int svcr = 0;
187
188
target_restore_general_frame(env, sf);
189
190
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
191
sve_size = size;
192
break;
193
194
+ case TARGET_ZA_MAGIC:
195
+ if (za || size < sizeof(struct target_za_context)) {
196
+ goto err;
197
+ }
198
+ za = (struct target_za_context *)ctx;
199
+ za_size = size;
200
+ break;
201
+
202
case TARGET_EXTRA_MAGIC:
203
if (extra || size != sizeof(struct target_extra_context)) {
204
goto err;
205
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
206
}
207
208
/* SVE data, if present, overwrites FPSIMD data. */
209
- if (sve && !target_restore_sve_record(env, sve, sve_size)) {
210
+ if (sve && !target_restore_sve_record(env, sve, sve_size, &svcr)) {
211
goto err;
212
}
213
+ if (za && !target_restore_za_record(env, za, za_size, &svcr)) {
214
+ goto err;
215
+ }
216
+ if (env->svcr != svcr) {
217
+ env->svcr = svcr;
218
+ arm_rebuild_hflags(env);
219
+ }
220
unlock_user(extra, extra_datap, 0);
221
return 0;
222
223
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
224
.total_size = offsetof(struct target_rt_sigframe,
225
uc.tuc_mcontext.__reserved),
226
};
227
- int fpsimd_ofs, fr_ofs, sve_ofs = 0, vq = 0, sve_size = 0;
228
+ int fpsimd_ofs, fr_ofs, sve_ofs = 0, za_ofs = 0;
229
+ int sve_size = 0, za_size = 0;
230
struct target_rt_sigframe *frame;
231
struct target_rt_frame_record *fr;
232
abi_ulong frame_addr, return_addr;
233
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
234
&layout);
235
236
/* SVE state needs saving only if it exists. */
237
- if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
238
- vq = sve_vq(env);
239
- sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
240
+ if (cpu_isar_feature(aa64_sve, env_archcpu(env)) ||
241
+ cpu_isar_feature(aa64_sme, env_archcpu(env))) {
242
+ sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(sve_vq(env)), 16);
243
sve_ofs = alloc_sigframe_space(sve_size, &layout);
244
}
245
+ if (cpu_isar_feature(aa64_sme, env_archcpu(env))) {
246
+ /* ZA state needs saving only if it is enabled. */
247
+ if (FIELD_EX64(env->svcr, SVCR, ZA)) {
248
+ za_size = TARGET_ZA_SIG_CONTEXT_SIZE(sme_vq(env));
249
+ } else {
250
+ za_size = TARGET_ZA_SIG_CONTEXT_SIZE(0);
251
+ }
252
+ za_ofs = alloc_sigframe_space(za_size, &layout);
253
+ }
254
255
if (layout.extra_ofs) {
256
/* Reserve space for the extra end marker. The standard end marker
257
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
258
target_setup_end_record((void *)frame + layout.extra_end_ofs);
259
}
260
if (sve_ofs) {
261
- target_setup_sve_record((void *)frame + sve_ofs, env, vq, sve_size);
262
+ target_setup_sve_record((void *)frame + sve_ofs, env, sve_size);
263
+ }
264
+ if (za_ofs) {
265
+ target_setup_za_record((void *)frame + za_ofs, env, za_size);
266
}
267
268
/* Set up the stack frame for unwinding. */
269
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
270
env->btype = 2;
271
}
272
273
+ /*
274
+ * Invoke the signal handler with both SM and ZA disabled.
275
+ * When clearing SM, ResetSVEState, per SMSTOP.
276
+ */
277
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
278
+ arm_reset_sve_state(env);
279
+ }
280
+ if (env->svcr) {
281
+ env->svcr = 0;
282
+ arm_rebuild_hflags(env);
283
+ }
284
+
285
if (info) {
286
tswap_siginfo(&frame->info, info);
287
env->xregs[1] = frame_addr + offsetof(struct target_rt_sigframe, info);
288
--
256
--
289
2.25.1
257
2.34.1
diff view generated by jsdifflib
New patch
1
Explicitly set a rule in the softfloat tests for the inf-zero-nan
2
muladd special case. In meson.build we put -DTARGET_ARM in fpcflags,
3
and so we should select here the Arm rule of
4
float_infzeronan_dnan_if_qnan.
1
5
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Message-id: 20241202131347.498124-5-peter.maydell@linaro.org
9
---
10
tests/fp/fp-bench.c | 5 +++++
11
tests/fp/fp-test.c | 5 +++++
12
2 files changed, 10 insertions(+)
13
14
diff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tests/fp/fp-bench.c
17
+++ b/tests/fp/fp-bench.c
18
@@ -XXX,XX +XXX,XX @@ static void run_bench(void)
19
{
20
bench_func_t f;
21
22
+ /*
23
+ * These implementation-defined choices for various things IEEE
24
+ * doesn't specify match those used by the Arm architecture.
25
+ */
26
set_float_2nan_prop_rule(float_2nan_prop_s_ab, &soft_status);
27
+ set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, &soft_status);
28
29
f = bench_funcs[operation][precision];
30
g_assert(f);
31
diff --git a/tests/fp/fp-test.c b/tests/fp/fp-test.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/tests/fp/fp-test.c
34
+++ b/tests/fp/fp-test.c
35
@@ -XXX,XX +XXX,XX @@ void run_test(void)
36
{
37
unsigned int i;
38
39
+ /*
40
+ * These implementation-defined choices for various things IEEE
41
+ * doesn't specify match those used by the Arm architecture.
42
+ */
43
set_float_2nan_prop_rule(float_2nan_prop_s_ab, &qsf);
44
+ set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, &qsf);
45
46
genCases_setLevel(test_level);
47
verCases_maxErrorCount = n_max_errors;
48
--
49
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Set the FloatInfZeroNaNRule explicitly for the Arm target,
2
so we can remove the ifdef from pickNaNMulAdd().
2
3
3
Enable SME, TPIDR2_EL0, and FA64 if supported by the cpu.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-45-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20241202131347.498124-6-peter.maydell@linaro.org
9
---
7
---
10
target/arm/cpu.c | 11 +++++++++++
8
target/arm/cpu.c | 3 +++
11
1 file changed, 11 insertions(+)
9
fpu/softfloat-specialize.c.inc | 8 +-------
10
2 files changed, 4 insertions(+), 7 deletions(-)
12
11
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
12
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
14
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/cpu.c
14
--- a/target/arm/cpu.c
16
+++ b/target/arm/cpu.c
15
+++ b/target/arm/cpu.c
17
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
16
@@ -XXX,XX +XXX,XX @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
18
CPACR_EL1, ZEN, 3);
17
* * tininess-before-rounding
19
env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
18
* * 2-input NaN propagation prefers SNaN over QNaN, and then
20
}
19
* operand A over operand B (see FPProcessNaNs() pseudocode)
21
+ /* and for SME instructions, with default vector length, and TPIDR2 */
20
+ * * 0 * Inf + NaN returns the default NaN if the input NaN is quiet,
22
+ if (cpu_isar_feature(aa64_sme, cpu)) {
21
+ * and the input NaN if it is signalling
23
+ env->cp15.sctlr_el[1] |= SCTLR_EnTP2;
22
*/
24
+ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
23
static void arm_set_default_fp_behaviours(float_status *s)
25
+ CPACR_EL1, SMEN, 3);
24
{
26
+ env->vfp.smcr_el[1] = cpu->sme_default_vq - 1;
25
set_float_detect_tininess(float_tininess_before_rounding, s);
27
+ if (cpu_isar_feature(aa64_sme_fa64, cpu)) {
26
set_float_2nan_prop_rule(float_2nan_prop_s_ab, s);
28
+ env->vfp.smcr_el[1] = FIELD_DP64(env->vfp.smcr_el[1],
27
+ set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s);
29
+ SMCR, FA64, 1);
28
}
30
+ }
29
31
+ }
30
static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque)
31
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
32
index XXXXXXX..XXXXXXX 100644
33
--- a/fpu/softfloat-specialize.c.inc
34
+++ b/fpu/softfloat-specialize.c.inc
35
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
32
/*
36
/*
33
* Enable 48-bit address space (TODO: take reserved_va into account).
37
* Temporarily fall back to ifdef ladder
34
* Enable TBI0 but not TBI1.
38
*/
39
-#if defined(TARGET_ARM)
40
- /*
41
- * For ARM, the (inf,zero,qnan) case returns the default NaN,
42
- * but (inf,zero,snan) returns the input NaN.
43
- */
44
- rule = float_infzeronan_dnan_if_qnan;
45
-#elif defined(TARGET_MIPS)
46
+#if defined(TARGET_MIPS)
47
if (snan_bit_is_one(status)) {
48
/*
49
* For MIPS systems that conform to IEEE754-1985, the (inf,zero,nan)
35
--
50
--
36
2.25.1
51
2.34.1
diff view generated by jsdifflib
New patch
1
Set the FloatInfZeroNaNRule explicitly for s390, so we
2
can remove the ifdef from pickNaNMulAdd().
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20241202131347.498124-7-peter.maydell@linaro.org
7
---
8
target/s390x/cpu.c | 2 ++
9
fpu/softfloat-specialize.c.inc | 2 --
10
2 files changed, 2 insertions(+), 2 deletions(-)
11
12
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/s390x/cpu.c
15
+++ b/target/s390x/cpu.c
16
@@ -XXX,XX +XXX,XX @@ static void s390_cpu_reset_hold(Object *obj, ResetType type)
17
set_float_detect_tininess(float_tininess_before_rounding,
18
&env->fpu_status);
19
set_float_2nan_prop_rule(float_2nan_prop_s_ab, &env->fpu_status);
20
+ set_float_infzeronan_rule(float_infzeronan_dnan_always,
21
+ &env->fpu_status);
22
/* fall through */
23
case RESET_TYPE_S390_CPU_NORMAL:
24
env->psw.mask &= ~PSW_MASK_RI;
25
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
26
index XXXXXXX..XXXXXXX 100644
27
--- a/fpu/softfloat-specialize.c.inc
28
+++ b/fpu/softfloat-specialize.c.inc
29
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
30
* a default NaN
31
*/
32
rule = float_infzeronan_dnan_never;
33
-#elif defined(TARGET_S390X)
34
- rule = float_infzeronan_dnan_always;
35
#endif
36
}
37
38
--
39
2.34.1
diff view generated by jsdifflib
New patch
1
Set the FloatInfZeroNaNRule explicitly for the PPC target,
2
so we can remove the ifdef from pickNaNMulAdd().
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20241202131347.498124-8-peter.maydell@linaro.org
7
---
8
target/ppc/cpu_init.c | 7 +++++++
9
fpu/softfloat-specialize.c.inc | 7 +------
10
2 files changed, 8 insertions(+), 6 deletions(-)
11
12
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/ppc/cpu_init.c
15
+++ b/target/ppc/cpu_init.c
16
@@ -XXX,XX +XXX,XX @@ static void ppc_cpu_reset_hold(Object *obj, ResetType type)
17
*/
18
set_float_2nan_prop_rule(float_2nan_prop_ab, &env->fp_status);
19
set_float_2nan_prop_rule(float_2nan_prop_ab, &env->vec_status);
20
+ /*
21
+ * For PPC, the (inf,zero,qnan) case sets InvalidOp, but we prefer
22
+ * to return an input NaN if we have one (ie c) rather than generating
23
+ * a default NaN
24
+ */
25
+ set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status);
26
+ set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->vec_status);
27
28
for (i = 0; i < ARRAY_SIZE(env->spr_cb); i++) {
29
ppc_spr_t *spr = &env->spr_cb[i];
30
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
31
index XXXXXXX..XXXXXXX 100644
32
--- a/fpu/softfloat-specialize.c.inc
33
+++ b/fpu/softfloat-specialize.c.inc
34
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
35
*/
36
rule = float_infzeronan_dnan_never;
37
}
38
-#elif defined(TARGET_PPC) || defined(TARGET_SPARC) || \
39
+#elif defined(TARGET_SPARC) || \
40
defined(TARGET_XTENSA) || defined(TARGET_HPPA) || \
41
defined(TARGET_I386) || defined(TARGET_LOONGARCH)
42
/*
43
* For LoongArch systems that conform to IEEE754-2008, the (inf,zero,nan)
44
* case sets InvalidOp and returns the input value 'c'
45
*/
46
- /*
47
- * For PPC, the (inf,zero,qnan) case sets InvalidOp, but we prefer
48
- * to return an input NaN if we have one (ie c) rather than generating
49
- * a default NaN
50
- */
51
rule = float_infzeronan_dnan_never;
52
#endif
53
}
54
--
55
2.34.1
diff view generated by jsdifflib
New patch
1
Set the FloatInfZeroNaNRule explicitly for the MIPS target,
2
so we can remove the ifdef from pickNaNMulAdd().
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20241202131347.498124-9-peter.maydell@linaro.org
7
---
8
target/mips/fpu_helper.h | 9 +++++++++
9
target/mips/msa.c | 4 ++++
10
fpu/softfloat-specialize.c.inc | 16 +---------------
11
3 files changed, 14 insertions(+), 15 deletions(-)
12
13
diff --git a/target/mips/fpu_helper.h b/target/mips/fpu_helper.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/mips/fpu_helper.h
16
+++ b/target/mips/fpu_helper.h
17
@@ -XXX,XX +XXX,XX @@ static inline void restore_flush_mode(CPUMIPSState *env)
18
static inline void restore_snan_bit_mode(CPUMIPSState *env)
19
{
20
bool nan2008 = env->active_fpu.fcr31 & (1 << FCR31_NAN2008);
21
+ FloatInfZeroNaNRule izn_rule;
22
23
/*
24
* With nan2008, SNaNs are silenced in the usual way.
25
@@ -XXX,XX +XXX,XX @@ static inline void restore_snan_bit_mode(CPUMIPSState *env)
26
*/
27
set_snan_bit_is_one(!nan2008, &env->active_fpu.fp_status);
28
set_default_nan_mode(!nan2008, &env->active_fpu.fp_status);
29
+ /*
30
+ * For MIPS systems that conform to IEEE754-1985, the (inf,zero,nan)
31
+ * case sets InvalidOp and returns the default NaN.
32
+ * For MIPS systems that conform to IEEE754-2008, the (inf,zero,nan)
33
+ * case sets InvalidOp and returns the input value 'c'.
34
+ */
35
+ izn_rule = nan2008 ? float_infzeronan_dnan_never : float_infzeronan_dnan_always;
36
+ set_float_infzeronan_rule(izn_rule, &env->active_fpu.fp_status);
37
}
38
39
static inline void restore_fp_status(CPUMIPSState *env)
40
diff --git a/target/mips/msa.c b/target/mips/msa.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/target/mips/msa.c
43
+++ b/target/mips/msa.c
44
@@ -XXX,XX +XXX,XX @@ void msa_reset(CPUMIPSState *env)
45
46
/* set proper signanling bit meaning ("1" means "quiet") */
47
set_snan_bit_is_one(0, &env->active_tc.msa_fp_status);
48
+
49
+ /* Inf * 0 + NaN returns the input NaN */
50
+ set_float_infzeronan_rule(float_infzeronan_dnan_never,
51
+ &env->active_tc.msa_fp_status);
52
}
53
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
54
index XXXXXXX..XXXXXXX 100644
55
--- a/fpu/softfloat-specialize.c.inc
56
+++ b/fpu/softfloat-specialize.c.inc
57
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
58
/*
59
* Temporarily fall back to ifdef ladder
60
*/
61
-#if defined(TARGET_MIPS)
62
- if (snan_bit_is_one(status)) {
63
- /*
64
- * For MIPS systems that conform to IEEE754-1985, the (inf,zero,nan)
65
- * case sets InvalidOp and returns the default NaN
66
- */
67
- rule = float_infzeronan_dnan_always;
68
- } else {
69
- /*
70
- * For MIPS systems that conform to IEEE754-2008, the (inf,zero,nan)
71
- * case sets InvalidOp and returns the input value 'c'
72
- */
73
- rule = float_infzeronan_dnan_never;
74
- }
75
-#elif defined(TARGET_SPARC) || \
76
+#if defined(TARGET_SPARC) || \
77
defined(TARGET_XTENSA) || defined(TARGET_HPPA) || \
78
defined(TARGET_I386) || defined(TARGET_LOONGARCH)
79
/*
80
--
81
2.34.1
diff view generated by jsdifflib
New patch
1
Set the FloatInfZeroNaNRule explicitly for the SPARC target,
2
so we can remove the ifdef from pickNaNMulAdd().
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20241202131347.498124-10-peter.maydell@linaro.org
7
---
8
target/sparc/cpu.c | 2 ++
9
fpu/softfloat-specialize.c.inc | 3 +--
10
2 files changed, 3 insertions(+), 2 deletions(-)
11
12
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/sparc/cpu.c
15
+++ b/target/sparc/cpu.c
16
@@ -XXX,XX +XXX,XX @@ static void sparc_cpu_realizefn(DeviceState *dev, Error **errp)
17
* the CPU state struct so it won't get zeroed on reset.
18
*/
19
set_float_2nan_prop_rule(float_2nan_prop_s_ba, &env->fp_status);
20
+ /* For inf * 0 + NaN, return the input NaN */
21
+ set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status);
22
23
cpu_exec_realizefn(cs, &local_err);
24
if (local_err != NULL) {
25
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
26
index XXXXXXX..XXXXXXX 100644
27
--- a/fpu/softfloat-specialize.c.inc
28
+++ b/fpu/softfloat-specialize.c.inc
29
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
30
/*
31
* Temporarily fall back to ifdef ladder
32
*/
33
-#if defined(TARGET_SPARC) || \
34
- defined(TARGET_XTENSA) || defined(TARGET_HPPA) || \
35
+#if defined(TARGET_XTENSA) || defined(TARGET_HPPA) || \
36
defined(TARGET_I386) || defined(TARGET_LOONGARCH)
37
/*
38
* For LoongArch systems that conform to IEEE754-2008, the (inf,zero,nan)
39
--
40
2.34.1
diff view generated by jsdifflib
New patch
1
Set the FloatInfZeroNaNRule explicitly for the xtensa target,
2
so we can remove the ifdef from pickNaNMulAdd().
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20241202131347.498124-11-peter.maydell@linaro.org
7
---
8
target/xtensa/cpu.c | 2 ++
9
fpu/softfloat-specialize.c.inc | 2 +-
10
2 files changed, 3 insertions(+), 1 deletion(-)
11
12
diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/xtensa/cpu.c
15
+++ b/target/xtensa/cpu.c
16
@@ -XXX,XX +XXX,XX @@ static void xtensa_cpu_reset_hold(Object *obj, ResetType type)
17
reset_mmu(env);
18
cs->halted = env->runstall;
19
#endif
20
+ /* For inf * 0 + NaN, return the input NaN */
21
+ set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status);
22
set_no_signaling_nans(!dfpu, &env->fp_status);
23
xtensa_use_first_nan(env, !dfpu);
24
}
25
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
26
index XXXXXXX..XXXXXXX 100644
27
--- a/fpu/softfloat-specialize.c.inc
28
+++ b/fpu/softfloat-specialize.c.inc
29
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
30
/*
31
* Temporarily fall back to ifdef ladder
32
*/
33
-#if defined(TARGET_XTENSA) || defined(TARGET_HPPA) || \
34
+#if defined(TARGET_HPPA) || \
35
defined(TARGET_I386) || defined(TARGET_LOONGARCH)
36
/*
37
* For LoongArch systems that conform to IEEE754-2008, the (inf,zero,nan)
38
--
39
2.34.1
diff view generated by jsdifflib
New patch
1
Set the FloatInfZeroNaNRule explicitly for the x86 target.
1
2
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20241202131347.498124-12-peter.maydell@linaro.org
6
---
7
target/i386/tcg/fpu_helper.c | 7 +++++++
8
fpu/softfloat-specialize.c.inc | 2 +-
9
2 files changed, 8 insertions(+), 1 deletion(-)
10
11
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/i386/tcg/fpu_helper.c
14
+++ b/target/i386/tcg/fpu_helper.c
15
@@ -XXX,XX +XXX,XX @@ void cpu_init_fp_statuses(CPUX86State *env)
16
*/
17
set_float_2nan_prop_rule(float_2nan_prop_x87, &env->mmx_status);
18
set_float_2nan_prop_rule(float_2nan_prop_x87, &env->sse_status);
19
+ /*
20
+ * Only SSE has multiply-add instructions. In the SDM Section 14.5.2
21
+ * "Fused-Multiply-ADD (FMA) Numeric Behavior" the NaN handling is
22
+ * specified -- for 0 * inf + NaN the input NaN is selected, and if
23
+ * there are multiple input NaNs they are selected in the order a, b, c.
24
+ */
25
+ set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->sse_status);
26
}
27
28
static inline uint8_t save_exception_flags(CPUX86State *env)
29
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
30
index XXXXXXX..XXXXXXX 100644
31
--- a/fpu/softfloat-specialize.c.inc
32
+++ b/fpu/softfloat-specialize.c.inc
33
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
34
* Temporarily fall back to ifdef ladder
35
*/
36
#if defined(TARGET_HPPA) || \
37
- defined(TARGET_I386) || defined(TARGET_LOONGARCH)
38
+ defined(TARGET_LOONGARCH)
39
/*
40
* For LoongArch systems that conform to IEEE754-2008, the (inf,zero,nan)
41
* case sets InvalidOp and returns the input value 'c'
42
--
43
2.34.1
diff view generated by jsdifflib
New patch
1
Set the FloatInfZeroNaNRule explicitly for the loongarch target.
1
2
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20241202131347.498124-13-peter.maydell@linaro.org
6
---
7
target/loongarch/tcg/fpu_helper.c | 5 +++++
8
fpu/softfloat-specialize.c.inc | 7 +------
9
2 files changed, 6 insertions(+), 6 deletions(-)
10
11
diff --git a/target/loongarch/tcg/fpu_helper.c b/target/loongarch/tcg/fpu_helper.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/loongarch/tcg/fpu_helper.c
14
+++ b/target/loongarch/tcg/fpu_helper.c
15
@@ -XXX,XX +XXX,XX @@ void restore_fp_status(CPULoongArchState *env)
16
&env->fp_status);
17
set_flush_to_zero(0, &env->fp_status);
18
set_float_2nan_prop_rule(float_2nan_prop_s_ab, &env->fp_status);
19
+ /*
20
+ * For LoongArch systems that conform to IEEE754-2008, the (inf,zero,nan)
21
+ * case sets InvalidOp and returns the input value 'c'
22
+ */
23
+ set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status);
24
}
25
26
int ieee_ex_to_loongarch(int xcpt)
27
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
28
index XXXXXXX..XXXXXXX 100644
29
--- a/fpu/softfloat-specialize.c.inc
30
+++ b/fpu/softfloat-specialize.c.inc
31
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
32
/*
33
* Temporarily fall back to ifdef ladder
34
*/
35
-#if defined(TARGET_HPPA) || \
36
- defined(TARGET_LOONGARCH)
37
- /*
38
- * For LoongArch systems that conform to IEEE754-2008, the (inf,zero,nan)
39
- * case sets InvalidOp and returns the input value 'c'
40
- */
41
+#if defined(TARGET_HPPA)
42
rule = float_infzeronan_dnan_never;
43
#endif
44
}
45
--
46
2.34.1
diff view generated by jsdifflib
New patch
1
Set the FloatInfZeroNaNRule explicitly for the HPPA target,
2
so we can remove the ifdef from pickNaNMulAdd().
1
3
4
As this is the last target to be converted to explicitly setting
5
the rule, we can remove the fallback code in pickNaNMulAdd()
6
entirely.
7
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-id: 20241202131347.498124-14-peter.maydell@linaro.org
11
---
12
target/hppa/fpu_helper.c | 2 ++
13
fpu/softfloat-specialize.c.inc | 13 +------------
14
2 files changed, 3 insertions(+), 12 deletions(-)
15
16
diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/hppa/fpu_helper.c
19
+++ b/target/hppa/fpu_helper.c
20
@@ -XXX,XX +XXX,XX @@ void HELPER(loaded_fr0)(CPUHPPAState *env)
21
* HPPA does note implement a CPU reset method at all...
22
*/
23
set_float_2nan_prop_rule(float_2nan_prop_s_ab, &env->fp_status);
24
+ /* For inf * 0 + NaN, return the input NaN */
25
+ set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status);
26
}
27
28
void cpu_hppa_loaded_fr0(CPUHPPAState *env)
29
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
30
index XXXXXXX..XXXXXXX 100644
31
--- a/fpu/softfloat-specialize.c.inc
32
+++ b/fpu/softfloat-specialize.c.inc
33
@@ -XXX,XX +XXX,XX @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls,
34
static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
35
bool infzero, float_status *status)
36
{
37
- FloatInfZeroNaNRule rule = status->float_infzeronan_rule;
38
-
39
/*
40
* We guarantee not to require the target to tell us how to
41
* pick a NaN if we're always returning the default NaN.
42
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
43
*/
44
assert(!status->default_nan_mode);
45
46
- if (rule == float_infzeronan_none) {
47
- /*
48
- * Temporarily fall back to ifdef ladder
49
- */
50
-#if defined(TARGET_HPPA)
51
- rule = float_infzeronan_dnan_never;
52
-#endif
53
- }
54
-
55
if (infzero) {
56
/*
57
* Inf * 0 + NaN -- some implementations return the default NaN here,
58
* and some return the input NaN.
59
*/
60
- switch (rule) {
61
+ switch (status->float_infzeronan_rule) {
62
case float_infzeronan_dnan_never:
63
return 2;
64
case float_infzeronan_dnan_always:
65
--
66
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
The new implementation of pickNaNMulAdd() will find it convenient
2
to know whether at least one of the three arguments to the muladd
3
was a signaling NaN. We already calculate that in the caller,
4
so pass it in as a new bool have_snan.
2
5
3
Add "sve" to the sve prctl functions, to distinguish
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
them from the coming "sme" prctls with similar names.
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20241202131347.498124-15-peter.maydell@linaro.org
9
---
10
fpu/softfloat-parts.c.inc | 5 +++--
11
fpu/softfloat-specialize.c.inc | 2 +-
12
2 files changed, 4 insertions(+), 3 deletions(-)
5
13
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
14
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-42-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
linux-user/aarch64/target_prctl.h | 8 ++++----
12
linux-user/syscall.c | 12 ++++++------
13
2 files changed, 10 insertions(+), 10 deletions(-)
14
15
diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h
16
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
17
--- a/linux-user/aarch64/target_prctl.h
16
--- a/fpu/softfloat-parts.c.inc
18
+++ b/linux-user/aarch64/target_prctl.h
17
+++ b/fpu/softfloat-parts.c.inc
19
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
20
#ifndef AARCH64_TARGET_PRCTL_H
21
#define AARCH64_TARGET_PRCTL_H
22
23
-static abi_long do_prctl_get_vl(CPUArchState *env)
24
+static abi_long do_prctl_sve_get_vl(CPUArchState *env)
25
{
19
{
26
ARMCPU *cpu = env_archcpu(env);
20
int which;
27
if (cpu_isar_feature(aa64_sve, cpu)) {
21
bool infzero = (ab_mask == float_cmask_infzero);
28
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_get_vl(CPUArchState *env)
22
+ bool have_snan = (abc_mask & float_cmask_snan);
23
24
- if (unlikely(abc_mask & float_cmask_snan)) {
25
+ if (unlikely(have_snan)) {
26
float_raise(float_flag_invalid | float_flag_invalid_snan, s);
29
}
27
}
30
return -TARGET_EINVAL;
28
31
}
29
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
32
-#define do_prctl_get_vl do_prctl_get_vl
30
if (s->default_nan_mode) {
33
+#define do_prctl_sve_get_vl do_prctl_sve_get_vl
31
which = 3;
34
32
} else {
35
-static abi_long do_prctl_set_vl(CPUArchState *env, abi_long arg2)
33
- which = pickNaNMulAdd(a->cls, b->cls, c->cls, infzero, s);
36
+static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
34
+ which = pickNaNMulAdd(a->cls, b->cls, c->cls, infzero, have_snan, s);
35
}
36
37
if (which == 3) {
38
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
39
index XXXXXXX..XXXXXXX 100644
40
--- a/fpu/softfloat-specialize.c.inc
41
+++ b/fpu/softfloat-specialize.c.inc
42
@@ -XXX,XX +XXX,XX @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls,
43
| Return values : 0 : a; 1 : b; 2 : c; 3 : default-NaN
44
*----------------------------------------------------------------------------*/
45
static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
46
- bool infzero, float_status *status)
47
+ bool infzero, bool have_snan, float_status *status)
37
{
48
{
38
/*
49
/*
39
* We cannot support either PR_SVE_SET_VL_ONEXEC or PR_SVE_VL_INHERIT.
50
* We guarantee not to require the target to tell us how to
40
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_set_vl(CPUArchState *env, abi_long arg2)
41
}
42
return -TARGET_EINVAL;
43
}
44
-#define do_prctl_set_vl do_prctl_set_vl
45
+#define do_prctl_sve_set_vl do_prctl_sve_set_vl
46
47
static abi_long do_prctl_reset_keys(CPUArchState *env, abi_long arg2)
48
{
49
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/linux-user/syscall.c
52
+++ b/linux-user/syscall.c
53
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2)
54
#ifndef do_prctl_set_fp_mode
55
#define do_prctl_set_fp_mode do_prctl_inval1
56
#endif
57
-#ifndef do_prctl_get_vl
58
-#define do_prctl_get_vl do_prctl_inval0
59
+#ifndef do_prctl_sve_get_vl
60
+#define do_prctl_sve_get_vl do_prctl_inval0
61
#endif
62
-#ifndef do_prctl_set_vl
63
-#define do_prctl_set_vl do_prctl_inval1
64
+#ifndef do_prctl_sve_set_vl
65
+#define do_prctl_sve_set_vl do_prctl_inval1
66
#endif
67
#ifndef do_prctl_reset_keys
68
#define do_prctl_reset_keys do_prctl_inval1
69
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
70
case PR_SET_FP_MODE:
71
return do_prctl_set_fp_mode(env, arg2);
72
case PR_SVE_GET_VL:
73
- return do_prctl_get_vl(env);
74
+ return do_prctl_sve_get_vl(env);
75
case PR_SVE_SET_VL:
76
- return do_prctl_set_vl(env, arg2);
77
+ return do_prctl_sve_set_vl(env, arg2);
78
case PR_PAC_RESET_KEYS:
79
if (arg3 || arg4 || arg5) {
80
return -TARGET_EINVAL;
81
--
51
--
82
2.25.1
52
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
IEEE 758 does not define a fixed rule for which NaN to pick as the
2
2
result if both operands of a 3-operand fused multiply-add operation
3
These functions will be used to verify that the cpu
3
are NaNs. As a result different architectures have ended up with
4
is in the correct state for a given instruction.
4
different rules for propagating NaNs.
5
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
QEMU currently hardcodes the NaN propagation logic into the binary
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
because pickNaNMulAdd() has an ifdef ladder for different targets.
8
Message-id: 20220708151540.18136-16-richard.henderson@linaro.org
8
We want to make the propagation rule instead be selectable at
9
runtime, because:
10
* this will let us have multiple targets in one QEMU binary
11
* the Arm FEAT_AFP architectural feature includes letting
12
the guest select a NaN propagation rule at runtime
13
14
In this commit we add an enum for the propagation rule, the field in
15
float_status, and the corresponding getters and setters. We change
16
pickNaNMulAdd to honour this, but because all targets still leave
17
this field at its default 0 value, the fallback logic will pick the
18
rule type with the old ifdef ladder.
19
20
It's valid not to set a propagation rule if default_nan_mode is
21
enabled, because in that case there's no need to pick a NaN; all the
22
callers of pickNaNMulAdd() catch this case and skip calling it.
23
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
24
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
25
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
26
Message-id: 20241202131347.498124-16-peter.maydell@linaro.org
10
---
27
---
11
target/arm/translate-a64.h | 21 +++++++++++++++++++++
28
include/fpu/softfloat-helpers.h | 11 +++
12
target/arm/translate-a64.c | 34 ++++++++++++++++++++++++++++++++++
29
include/fpu/softfloat-types.h | 55 +++++++++++
13
2 files changed, 55 insertions(+)
30
fpu/softfloat-specialize.c.inc | 167 ++++++++------------------------
14
31
3 files changed, 107 insertions(+), 126 deletions(-)
15
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
32
33
diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h
16
index XXXXXXX..XXXXXXX 100644
34
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/translate-a64.h
35
--- a/include/fpu/softfloat-helpers.h
18
+++ b/target/arm/translate-a64.h
36
+++ b/include/fpu/softfloat-helpers.h
19
@@ -XXX,XX +XXX,XX @@ void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v);
37
@@ -XXX,XX +XXX,XX @@ static inline void set_float_2nan_prop_rule(Float2NaNPropRule rule,
20
bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
38
status->float_2nan_prop_rule = rule;
21
unsigned int imms, unsigned int immr);
39
}
22
bool sve_access_check(DisasContext *s);
40
23
+bool sme_enabled_check(DisasContext *s);
41
+static inline void set_float_3nan_prop_rule(Float3NaNPropRule rule,
24
+bool sme_enabled_check_with_svcr(DisasContext *s, unsigned);
42
+ float_status *status)
25
+
26
+/* This function corresponds to CheckStreamingSVEEnabled. */
27
+static inline bool sme_sm_enabled_check(DisasContext *s)
28
+{
43
+{
29
+ return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK);
44
+ status->float_3nan_prop_rule = rule;
30
+}
45
+}
31
+
46
+
32
+/* This function corresponds to CheckSMEAndZAEnabled. */
47
static inline void set_float_infzeronan_rule(FloatInfZeroNaNRule rule,
33
+static inline bool sme_za_enabled_check(DisasContext *s)
48
float_status *status)
49
{
50
@@ -XXX,XX +XXX,XX @@ static inline Float2NaNPropRule get_float_2nan_prop_rule(float_status *status)
51
return status->float_2nan_prop_rule;
52
}
53
54
+static inline Float3NaNPropRule get_float_3nan_prop_rule(float_status *status)
34
+{
55
+{
35
+ return sme_enabled_check_with_svcr(s, R_SVCR_ZA_MASK);
56
+ return status->float_3nan_prop_rule;
36
+}
57
+}
37
+
58
+
38
+/* Note that this function corresponds to CheckStreamingSVEAndZAEnabled. */
59
static inline FloatInfZeroNaNRule get_float_infzeronan_rule(float_status *status)
39
+static inline bool sme_smza_enabled_check(DisasContext *s)
60
{
40
+{
61
return status->float_infzeronan_rule;
41
+ return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK | R_SVCR_ZA_MASK);
62
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
42
+}
43
+
44
TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr);
45
TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
46
bool tag_checked, int log2_size);
47
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
48
index XXXXXXX..XXXXXXX 100644
63
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/translate-a64.c
64
--- a/include/fpu/softfloat-types.h
50
+++ b/target/arm/translate-a64.c
65
+++ b/include/fpu/softfloat-types.h
51
@@ -XXX,XX +XXX,XX @@ static bool sme_access_check(DisasContext *s)
66
@@ -XXX,XX +XXX,XX @@ this code that are retained.
52
return true;
67
#ifndef SOFTFLOAT_TYPES_H
68
#define SOFTFLOAT_TYPES_H
69
70
+#include "hw/registerfields.h"
71
+
72
/*
73
* Software IEC/IEEE floating-point types.
74
*/
75
@@ -XXX,XX +XXX,XX @@ typedef enum __attribute__((__packed__)) {
76
float_2nan_prop_x87,
77
} Float2NaNPropRule;
78
79
+/*
80
+ * 3-input NaN propagation rule, for fused multiply-add. Individual
81
+ * architectures have different rules for which input NaN is
82
+ * propagated to the output when there is more than one NaN on the
83
+ * input.
84
+ *
85
+ * If default_nan_mode is enabled then it is valid not to set a NaN
86
+ * propagation rule, because the softfloat code guarantees not to try
87
+ * to pick a NaN to propagate in default NaN mode. When not in
88
+ * default-NaN mode, it is an error for the target not to set the rule
89
+ * in float_status if it uses a muladd, and we will assert if we need
90
+ * to handle an input NaN and no rule was selected.
91
+ *
92
+ * The naming scheme for Float3NaNPropRule values is:
93
+ * float_3nan_prop_s_abc:
94
+ * = "Prefer SNaN over QNaN, then operand A over B over C"
95
+ * float_3nan_prop_abc:
96
+ * = "Prefer A over B over C regardless of SNaN vs QNAN"
97
+ *
98
+ * For QEMU, the multiply-add operation is A * B + C.
99
+ */
100
+
101
+/*
102
+ * We set the Float3NaNPropRule enum values up so we can select the
103
+ * right value in pickNaNMulAdd in a data driven way.
104
+ */
105
+FIELD(3NAN, 1ST, 0, 2) /* which operand is most preferred ? */
106
+FIELD(3NAN, 2ND, 2, 2) /* which operand is next most preferred ? */
107
+FIELD(3NAN, 3RD, 4, 2) /* which operand is least preferred ? */
108
+FIELD(3NAN, SNAN, 6, 1) /* do we prefer SNaN over QNaN ? */
109
+
110
+#define PROPRULE(X, Y, Z) \
111
+ ((X << R_3NAN_1ST_SHIFT) | (Y << R_3NAN_2ND_SHIFT) | (Z << R_3NAN_3RD_SHIFT))
112
+
113
+typedef enum __attribute__((__packed__)) {
114
+ float_3nan_prop_none = 0, /* No propagation rule specified */
115
+ float_3nan_prop_abc = PROPRULE(0, 1, 2),
116
+ float_3nan_prop_acb = PROPRULE(0, 2, 1),
117
+ float_3nan_prop_bac = PROPRULE(1, 0, 2),
118
+ float_3nan_prop_bca = PROPRULE(1, 2, 0),
119
+ float_3nan_prop_cab = PROPRULE(2, 0, 1),
120
+ float_3nan_prop_cba = PROPRULE(2, 1, 0),
121
+ float_3nan_prop_s_abc = float_3nan_prop_abc | R_3NAN_SNAN_MASK,
122
+ float_3nan_prop_s_acb = float_3nan_prop_acb | R_3NAN_SNAN_MASK,
123
+ float_3nan_prop_s_bac = float_3nan_prop_bac | R_3NAN_SNAN_MASK,
124
+ float_3nan_prop_s_bca = float_3nan_prop_bca | R_3NAN_SNAN_MASK,
125
+ float_3nan_prop_s_cab = float_3nan_prop_cab | R_3NAN_SNAN_MASK,
126
+ float_3nan_prop_s_cba = float_3nan_prop_cba | R_3NAN_SNAN_MASK,
127
+} Float3NaNPropRule;
128
+
129
+#undef PROPRULE
130
+
131
/*
132
* Rule for result of fused multiply-add 0 * Inf + NaN.
133
* This must be a NaN, but implementations differ on whether this
134
@@ -XXX,XX +XXX,XX @@ typedef struct float_status {
135
FloatRoundMode float_rounding_mode;
136
FloatX80RoundPrec floatx80_rounding_precision;
137
Float2NaNPropRule float_2nan_prop_rule;
138
+ Float3NaNPropRule float_3nan_prop_rule;
139
FloatInfZeroNaNRule float_infzeronan_rule;
140
bool tininess_before_rounding;
141
/* should denormalised results go to zero and set the inexact flag? */
142
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
143
index XXXXXXX..XXXXXXX 100644
144
--- a/fpu/softfloat-specialize.c.inc
145
+++ b/fpu/softfloat-specialize.c.inc
146
@@ -XXX,XX +XXX,XX @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls,
147
static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
148
bool infzero, bool have_snan, float_status *status)
149
{
150
+ FloatClass cls[3] = { a_cls, b_cls, c_cls };
151
+ Float3NaNPropRule rule = status->float_3nan_prop_rule;
152
+ int which;
153
+
154
/*
155
* We guarantee not to require the target to tell us how to
156
* pick a NaN if we're always returning the default NaN.
157
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
158
}
159
}
160
161
+ if (rule == float_3nan_prop_none) {
162
#if defined(TARGET_ARM)
163
-
164
- /* This looks different from the ARM ARM pseudocode, because the ARM ARM
165
- * puts the operands to a fused mac operation (a*b)+c in the order c,a,b.
166
- */
167
- if (is_snan(c_cls)) {
168
- return 2;
169
- } else if (is_snan(a_cls)) {
170
- return 0;
171
- } else if (is_snan(b_cls)) {
172
- return 1;
173
- } else if (is_qnan(c_cls)) {
174
- return 2;
175
- } else if (is_qnan(a_cls)) {
176
- return 0;
177
- } else {
178
- return 1;
179
- }
180
+ /*
181
+ * This looks different from the ARM ARM pseudocode, because the ARM ARM
182
+ * puts the operands to a fused mac operation (a*b)+c in the order c,a,b
183
+ */
184
+ rule = float_3nan_prop_s_cab;
185
#elif defined(TARGET_MIPS)
186
- if (snan_bit_is_one(status)) {
187
- /* Prefer sNaN over qNaN, in the a, b, c order. */
188
- if (is_snan(a_cls)) {
189
- return 0;
190
- } else if (is_snan(b_cls)) {
191
- return 1;
192
- } else if (is_snan(c_cls)) {
193
- return 2;
194
- } else if (is_qnan(a_cls)) {
195
- return 0;
196
- } else if (is_qnan(b_cls)) {
197
- return 1;
198
+ if (snan_bit_is_one(status)) {
199
+ rule = float_3nan_prop_s_abc;
200
} else {
201
- return 2;
202
+ rule = float_3nan_prop_s_cab;
203
}
204
- } else {
205
- /* Prefer sNaN over qNaN, in the c, a, b order. */
206
- if (is_snan(c_cls)) {
207
- return 2;
208
- } else if (is_snan(a_cls)) {
209
- return 0;
210
- } else if (is_snan(b_cls)) {
211
- return 1;
212
- } else if (is_qnan(c_cls)) {
213
- return 2;
214
- } else if (is_qnan(a_cls)) {
215
- return 0;
216
- } else {
217
- return 1;
218
- }
219
- }
220
#elif defined(TARGET_LOONGARCH64)
221
- /* Prefer sNaN over qNaN, in the c, a, b order. */
222
- if (is_snan(c_cls)) {
223
- return 2;
224
- } else if (is_snan(a_cls)) {
225
- return 0;
226
- } else if (is_snan(b_cls)) {
227
- return 1;
228
- } else if (is_qnan(c_cls)) {
229
- return 2;
230
- } else if (is_qnan(a_cls)) {
231
- return 0;
232
- } else {
233
- return 1;
234
- }
235
+ rule = float_3nan_prop_s_cab;
236
#elif defined(TARGET_PPC)
237
- /* If fRA is a NaN return it; otherwise if fRB is a NaN return it;
238
- * otherwise return fRC. Note that muladd on PPC is (fRA * fRC) + frB
239
- */
240
- if (is_nan(a_cls)) {
241
- return 0;
242
- } else if (is_nan(c_cls)) {
243
- return 2;
244
- } else {
245
- return 1;
246
- }
247
+ /*
248
+ * If fRA is a NaN return it; otherwise if fRB is a NaN return it;
249
+ * otherwise return fRC. Note that muladd on PPC is (fRA * fRC) + frB
250
+ */
251
+ rule = float_3nan_prop_acb;
252
#elif defined(TARGET_S390X)
253
- if (is_snan(a_cls)) {
254
- return 0;
255
- } else if (is_snan(b_cls)) {
256
- return 1;
257
- } else if (is_snan(c_cls)) {
258
- return 2;
259
- } else if (is_qnan(a_cls)) {
260
- return 0;
261
- } else if (is_qnan(b_cls)) {
262
- return 1;
263
- } else {
264
- return 2;
265
- }
266
+ rule = float_3nan_prop_s_abc;
267
#elif defined(TARGET_SPARC)
268
- /* Prefer SNaN over QNaN, order C, B, A. */
269
- if (is_snan(c_cls)) {
270
- return 2;
271
- } else if (is_snan(b_cls)) {
272
- return 1;
273
- } else if (is_snan(a_cls)) {
274
- return 0;
275
- } else if (is_qnan(c_cls)) {
276
- return 2;
277
- } else if (is_qnan(b_cls)) {
278
- return 1;
279
- } else {
280
- return 0;
281
- }
282
+ rule = float_3nan_prop_s_cba;
283
#elif defined(TARGET_XTENSA)
284
- /*
285
- * For Xtensa, the (inf,zero,nan) case sets InvalidOp and returns
286
- * an input NaN if we have one (ie c).
287
- */
288
- if (status->use_first_nan) {
289
- if (is_nan(a_cls)) {
290
- return 0;
291
- } else if (is_nan(b_cls)) {
292
- return 1;
293
+ if (status->use_first_nan) {
294
+ rule = float_3nan_prop_abc;
295
} else {
296
- return 2;
297
+ rule = float_3nan_prop_cba;
298
}
299
- } else {
300
- if (is_nan(c_cls)) {
301
- return 2;
302
- } else if (is_nan(b_cls)) {
303
- return 1;
304
- } else {
305
- return 0;
306
- }
307
- }
308
#else
309
- /* A default implementation: prefer a to b to c.
310
- * This is unlikely to actually match any real implementation.
311
- */
312
- if (is_nan(a_cls)) {
313
- return 0;
314
- } else if (is_nan(b_cls)) {
315
- return 1;
316
- } else {
317
- return 2;
318
- }
319
+ rule = float_3nan_prop_abc;
320
#endif
321
+ }
322
+
323
+ assert(rule != float_3nan_prop_none);
324
+ if (have_snan && (rule & R_3NAN_SNAN_MASK)) {
325
+ /* We have at least one SNaN input and should prefer it */
326
+ do {
327
+ which = rule & R_3NAN_1ST_MASK;
328
+ rule >>= R_3NAN_1ST_LENGTH;
329
+ } while (!is_snan(cls[which]));
330
+ } else {
331
+ do {
332
+ which = rule & R_3NAN_1ST_MASK;
333
+ rule >>= R_3NAN_1ST_LENGTH;
334
+ } while (!is_nan(cls[which]));
335
+ }
336
+ return which;
53
}
337
}
54
338
55
+/* This function corresponds to CheckSMEEnabled. */
339
/*----------------------------------------------------------------------------
56
+bool sme_enabled_check(DisasContext *s)
57
+{
58
+ /*
59
+ * Note that unlike sve_excp_el, we have not constrained sme_excp_el
60
+ * to be zero when fp_excp_el has priority. This is because we need
61
+ * sme_excp_el by itself for cpregs access checks.
62
+ */
63
+ if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
64
+ s->fp_access_checked = true;
65
+ return sme_access_check(s);
66
+ }
67
+ return fp_access_check_only(s);
68
+}
69
+
70
+/* Common subroutine for CheckSMEAnd*Enabled. */
71
+bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
72
+{
73
+ if (!sme_enabled_check(s)) {
74
+ return false;
75
+ }
76
+ if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
77
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
78
+ syn_smetrap(SME_ET_NotStreaming, false));
79
+ return false;
80
+ }
81
+ if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
82
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
83
+ syn_smetrap(SME_ET_InactiveZA, false));
84
+ return false;
85
+ }
86
+ return true;
87
+}
88
+
89
/*
90
* This utility function is for doing register extension with an
91
* optional shift. You will likely want to pass a temporary for the
92
--
340
--
93
2.25.1
341
2.34.1
diff view generated by jsdifflib
New patch
1
Explicitly set a rule in the softfloat tests for propagating NaNs in
2
the muladd case. In meson.build we put -DTARGET_ARM in fpcflags, and
3
so we should select here the Arm rule of float_3nan_prop_s_cab.
1
4
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20241202131347.498124-17-peter.maydell@linaro.org
8
---
9
tests/fp/fp-bench.c | 1 +
10
tests/fp/fp-test.c | 1 +
11
2 files changed, 2 insertions(+)
12
13
diff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tests/fp/fp-bench.c
16
+++ b/tests/fp/fp-bench.c
17
@@ -XXX,XX +XXX,XX @@ static void run_bench(void)
18
* doesn't specify match those used by the Arm architecture.
19
*/
20
set_float_2nan_prop_rule(float_2nan_prop_s_ab, &soft_status);
21
+ set_float_3nan_prop_rule(float_3nan_prop_s_cab, &soft_status);
22
set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, &soft_status);
23
24
f = bench_funcs[operation][precision];
25
diff --git a/tests/fp/fp-test.c b/tests/fp/fp-test.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/tests/fp/fp-test.c
28
+++ b/tests/fp/fp-test.c
29
@@ -XXX,XX +XXX,XX @@ void run_test(void)
30
* doesn't specify match those used by the Arm architecture.
31
*/
32
set_float_2nan_prop_rule(float_2nan_prop_s_ab, &qsf);
33
+ set_float_3nan_prop_rule(float_3nan_prop_s_cab, &qsf);
34
set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, &qsf);
35
36
genCases_setLevel(test_level);
37
--
38
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Set the Float3NaNPropRule explicitly for Arm, and remove the
2
ifdef from pickNaNMulAdd().
2
3
3
There's no reason to set CPACR_EL1.ZEN if SVE disabled.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-44-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20241202131347.498124-18-peter.maydell@linaro.org
9
---
7
---
10
target/arm/cpu.c | 7 +++----
8
target/arm/cpu.c | 5 +++++
11
1 file changed, 3 insertions(+), 4 deletions(-)
9
fpu/softfloat-specialize.c.inc | 8 +-------
10
2 files changed, 6 insertions(+), 7 deletions(-)
12
11
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
12
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
14
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/cpu.c
14
--- a/target/arm/cpu.c
16
+++ b/target/arm/cpu.c
15
+++ b/target/arm/cpu.c
17
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
16
@@ -XXX,XX +XXX,XX @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
18
/* and to the FP/Neon instructions */
17
* * tininess-before-rounding
19
env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
18
* * 2-input NaN propagation prefers SNaN over QNaN, and then
20
CPACR_EL1, FPEN, 3);
19
* operand A over operand B (see FPProcessNaNs() pseudocode)
21
- /* and to the SVE instructions */
20
+ * * 3-input NaN propagation prefers SNaN over QNaN, and then
22
- env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
21
+ * operand C over A over B (see FPProcessNaNs3() pseudocode,
23
- CPACR_EL1, ZEN, 3);
22
+ * but note that for QEMU muladd is a * b + c, whereas for
24
- /* with reasonable vector length */
23
+ * the pseudocode function the arguments are in the order c, a, b.
25
+ /* and to the SVE instructions, with default vector length */
24
* * 0 * Inf + NaN returns the default NaN if the input NaN is quiet,
26
if (cpu_isar_feature(aa64_sve, cpu)) {
25
* and the input NaN if it is signalling
27
+ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
26
*/
28
+ CPACR_EL1, ZEN, 3);
27
@@ -XXX,XX +XXX,XX @@ static void arm_set_default_fp_behaviours(float_status *s)
29
env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
28
{
30
}
29
set_float_detect_tininess(float_tininess_before_rounding, s);
31
/*
30
set_float_2nan_prop_rule(float_2nan_prop_s_ab, s);
31
+ set_float_3nan_prop_rule(float_3nan_prop_s_cab, s);
32
set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s);
33
}
34
35
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
36
index XXXXXXX..XXXXXXX 100644
37
--- a/fpu/softfloat-specialize.c.inc
38
+++ b/fpu/softfloat-specialize.c.inc
39
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
40
}
41
42
if (rule == float_3nan_prop_none) {
43
-#if defined(TARGET_ARM)
44
- /*
45
- * This looks different from the ARM ARM pseudocode, because the ARM ARM
46
- * puts the operands to a fused mac operation (a*b)+c in the order c,a,b
47
- */
48
- rule = float_3nan_prop_s_cab;
49
-#elif defined(TARGET_MIPS)
50
+#if defined(TARGET_MIPS)
51
if (snan_bit_is_one(status)) {
52
rule = float_3nan_prop_s_abc;
53
} else {
32
--
54
--
33
2.25.1
55
2.34.1
diff view generated by jsdifflib
New patch
1
Set the Float3NaNPropRule explicitly for loongarch, and remove the
2
ifdef from pickNaNMulAdd().
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20241202131347.498124-19-peter.maydell@linaro.org
7
---
8
target/loongarch/tcg/fpu_helper.c | 1 +
9
fpu/softfloat-specialize.c.inc | 2 --
10
2 files changed, 1 insertion(+), 2 deletions(-)
11
12
diff --git a/target/loongarch/tcg/fpu_helper.c b/target/loongarch/tcg/fpu_helper.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/loongarch/tcg/fpu_helper.c
15
+++ b/target/loongarch/tcg/fpu_helper.c
16
@@ -XXX,XX +XXX,XX @@ void restore_fp_status(CPULoongArchState *env)
17
* case sets InvalidOp and returns the input value 'c'
18
*/
19
set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status);
20
+ set_float_3nan_prop_rule(float_3nan_prop_s_cab, &env->fp_status);
21
}
22
23
int ieee_ex_to_loongarch(int xcpt)
24
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
25
index XXXXXXX..XXXXXXX 100644
26
--- a/fpu/softfloat-specialize.c.inc
27
+++ b/fpu/softfloat-specialize.c.inc
28
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
29
} else {
30
rule = float_3nan_prop_s_cab;
31
}
32
-#elif defined(TARGET_LOONGARCH64)
33
- rule = float_3nan_prop_s_cab;
34
#elif defined(TARGET_PPC)
35
/*
36
* If fRA is a NaN return it; otherwise if fRB is a NaN return it;
37
--
38
2.34.1
diff view generated by jsdifflib
New patch
1
Set the Float3NaNPropRule explicitly for PPC, and remove the
2
ifdef from pickNaNMulAdd().
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20241202131347.498124-20-peter.maydell@linaro.org
7
---
8
target/ppc/cpu_init.c | 8 ++++++++
9
fpu/softfloat-specialize.c.inc | 6 ------
10
2 files changed, 8 insertions(+), 6 deletions(-)
11
12
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/ppc/cpu_init.c
15
+++ b/target/ppc/cpu_init.c
16
@@ -XXX,XX +XXX,XX @@ static void ppc_cpu_reset_hold(Object *obj, ResetType type)
17
*/
18
set_float_2nan_prop_rule(float_2nan_prop_ab, &env->fp_status);
19
set_float_2nan_prop_rule(float_2nan_prop_ab, &env->vec_status);
20
+ /*
21
+ * NaN propagation for fused multiply-add:
22
+ * if fRA is a NaN return it; otherwise if fRB is a NaN return it;
23
+ * otherwise return fRC. Note that muladd on PPC is (fRA * fRC) + frB
24
+ * whereas QEMU labels the operands as (a * b) + c.
25
+ */
26
+ set_float_3nan_prop_rule(float_3nan_prop_acb, &env->fp_status);
27
+ set_float_3nan_prop_rule(float_3nan_prop_acb, &env->vec_status);
28
/*
29
* For PPC, the (inf,zero,qnan) case sets InvalidOp, but we prefer
30
* to return an input NaN if we have one (ie c) rather than generating
31
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
32
index XXXXXXX..XXXXXXX 100644
33
--- a/fpu/softfloat-specialize.c.inc
34
+++ b/fpu/softfloat-specialize.c.inc
35
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
36
} else {
37
rule = float_3nan_prop_s_cab;
38
}
39
-#elif defined(TARGET_PPC)
40
- /*
41
- * If fRA is a NaN return it; otherwise if fRB is a NaN return it;
42
- * otherwise return fRC. Note that muladd on PPC is (fRA * fRC) + frB
43
- */
44
- rule = float_3nan_prop_acb;
45
#elif defined(TARGET_S390X)
46
rule = float_3nan_prop_s_abc;
47
#elif defined(TARGET_SPARC)
48
--
49
2.34.1
diff view generated by jsdifflib
New patch
1
Set the Float3NaNPropRule explicitly for s390x, and remove the
2
ifdef from pickNaNMulAdd().
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20241202131347.498124-21-peter.maydell@linaro.org
7
---
8
target/s390x/cpu.c | 1 +
9
fpu/softfloat-specialize.c.inc | 2 --
10
2 files changed, 1 insertion(+), 2 deletions(-)
11
12
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/s390x/cpu.c
15
+++ b/target/s390x/cpu.c
16
@@ -XXX,XX +XXX,XX @@ static void s390_cpu_reset_hold(Object *obj, ResetType type)
17
set_float_detect_tininess(float_tininess_before_rounding,
18
&env->fpu_status);
19
set_float_2nan_prop_rule(float_2nan_prop_s_ab, &env->fpu_status);
20
+ set_float_3nan_prop_rule(float_3nan_prop_s_abc, &env->fpu_status);
21
set_float_infzeronan_rule(float_infzeronan_dnan_always,
22
&env->fpu_status);
23
/* fall through */
24
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
25
index XXXXXXX..XXXXXXX 100644
26
--- a/fpu/softfloat-specialize.c.inc
27
+++ b/fpu/softfloat-specialize.c.inc
28
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
29
} else {
30
rule = float_3nan_prop_s_cab;
31
}
32
-#elif defined(TARGET_S390X)
33
- rule = float_3nan_prop_s_abc;
34
#elif defined(TARGET_SPARC)
35
rule = float_3nan_prop_s_cba;
36
#elif defined(TARGET_XTENSA)
37
--
38
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Set the Float3NaNPropRule explicitly for SPARC, and remove the
2
ifdef from pickNaNMulAdd().
2
3
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-14-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20241202131347.498124-22-peter.maydell@linaro.org
10
---
7
---
11
target/arm/sme-fa64.decode | 2 --
8
target/sparc/cpu.c | 2 ++
12
target/arm/translate-sve.c | 2 ++
9
fpu/softfloat-specialize.c.inc | 2 --
13
2 files changed, 2 insertions(+), 2 deletions(-)
10
2 files changed, 2 insertions(+), 2 deletions(-)
14
11
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
12
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
16
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
14
--- a/target/sparc/cpu.c
18
+++ b/target/arm/sme-fa64.decode
15
+++ b/target/sparc/cpu.c
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
16
@@ -XXX,XX +XXX,XX @@ static void sparc_cpu_realizefn(DeviceState *dev, Error **errp)
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
17
* the CPU state struct so it won't get zeroed on reset.
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
18
*/
22
19
set_float_2nan_prop_rule(float_2nan_prop_s_ba, &env->fp_status);
23
-FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
20
+ /* For fused-multiply add, prefer SNaN over QNaN, then C->B->A */
24
-FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
21
+ set_float_3nan_prop_rule(float_3nan_prop_s_cba, &env->fp_status);
25
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
22
/* For inf * 0 + NaN, return the input NaN */
26
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
23
set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status);
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
24
25
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
28
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-sve.c
27
--- a/fpu/softfloat-specialize.c.inc
30
+++ b/target/arm/translate-sve.c
28
+++ b/fpu/softfloat-specialize.c.inc
31
@@ -XXX,XX +XXX,XX @@ static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
29
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
32
if (!dc_isar_feature(aa64_sve, s)) {
30
} else {
33
return false;
31
rule = float_3nan_prop_s_cab;
34
}
32
}
35
+ s->is_nonstreaming = true;
33
-#elif defined(TARGET_SPARC)
36
if (sve_access_check(s)) {
34
- rule = float_3nan_prop_s_cba;
37
TCGv_i64 addr = new_tmp_a64(s);
35
#elif defined(TARGET_XTENSA)
38
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
36
if (status->use_first_nan) {
39
@@ -XXX,XX +XXX,XX @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
37
rule = float_3nan_prop_abc;
40
if (!dc_isar_feature(aa64_sve, s)) {
41
return false;
42
}
43
+ s->is_nonstreaming = true;
44
if (sve_access_check(s)) {
45
int vsz = vec_full_reg_size(s);
46
int elements = vsz >> dtype_esz[a->dtype];
47
--
38
--
48
2.25.1
39
2.34.1
diff view generated by jsdifflib
New patch
1
Set the Float3NaNPropRule explicitly for Arm, and remove the
2
ifdef from pickNaNMulAdd().
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20241202131347.498124-23-peter.maydell@linaro.org
7
---
8
target/mips/fpu_helper.h | 4 ++++
9
target/mips/msa.c | 3 +++
10
fpu/softfloat-specialize.c.inc | 8 +-------
11
3 files changed, 8 insertions(+), 7 deletions(-)
12
13
diff --git a/target/mips/fpu_helper.h b/target/mips/fpu_helper.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/mips/fpu_helper.h
16
+++ b/target/mips/fpu_helper.h
17
@@ -XXX,XX +XXX,XX @@ static inline void restore_snan_bit_mode(CPUMIPSState *env)
18
{
19
bool nan2008 = env->active_fpu.fcr31 & (1 << FCR31_NAN2008);
20
FloatInfZeroNaNRule izn_rule;
21
+ Float3NaNPropRule nan3_rule;
22
23
/*
24
* With nan2008, SNaNs are silenced in the usual way.
25
@@ -XXX,XX +XXX,XX @@ static inline void restore_snan_bit_mode(CPUMIPSState *env)
26
*/
27
izn_rule = nan2008 ? float_infzeronan_dnan_never : float_infzeronan_dnan_always;
28
set_float_infzeronan_rule(izn_rule, &env->active_fpu.fp_status);
29
+ nan3_rule = nan2008 ? float_3nan_prop_s_cab : float_3nan_prop_s_abc;
30
+ set_float_3nan_prop_rule(nan3_rule, &env->active_fpu.fp_status);
31
+
32
}
33
34
static inline void restore_fp_status(CPUMIPSState *env)
35
diff --git a/target/mips/msa.c b/target/mips/msa.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/target/mips/msa.c
38
+++ b/target/mips/msa.c
39
@@ -XXX,XX +XXX,XX @@ void msa_reset(CPUMIPSState *env)
40
set_float_2nan_prop_rule(float_2nan_prop_s_ab,
41
&env->active_tc.msa_fp_status);
42
43
+ set_float_3nan_prop_rule(float_3nan_prop_s_cab,
44
+ &env->active_tc.msa_fp_status);
45
+
46
/* clear float_status exception flags */
47
set_float_exception_flags(0, &env->active_tc.msa_fp_status);
48
49
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
50
index XXXXXXX..XXXXXXX 100644
51
--- a/fpu/softfloat-specialize.c.inc
52
+++ b/fpu/softfloat-specialize.c.inc
53
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
54
}
55
56
if (rule == float_3nan_prop_none) {
57
-#if defined(TARGET_MIPS)
58
- if (snan_bit_is_one(status)) {
59
- rule = float_3nan_prop_s_abc;
60
- } else {
61
- rule = float_3nan_prop_s_cab;
62
- }
63
-#elif defined(TARGET_XTENSA)
64
+#if defined(TARGET_XTENSA)
65
if (status->use_first_nan) {
66
rule = float_3nan_prop_abc;
67
} else {
68
--
69
2.34.1
diff view generated by jsdifflib
New patch
1
Set the Float3NaNPropRule explicitly for xtensa, and remove the
2
ifdef from pickNaNMulAdd().
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20241202131347.498124-24-peter.maydell@linaro.org
7
---
8
target/xtensa/fpu_helper.c | 2 ++
9
fpu/softfloat-specialize.c.inc | 8 --------
10
2 files changed, 2 insertions(+), 8 deletions(-)
11
12
diff --git a/target/xtensa/fpu_helper.c b/target/xtensa/fpu_helper.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/xtensa/fpu_helper.c
15
+++ b/target/xtensa/fpu_helper.c
16
@@ -XXX,XX +XXX,XX @@ void xtensa_use_first_nan(CPUXtensaState *env, bool use_first)
17
set_use_first_nan(use_first, &env->fp_status);
18
set_float_2nan_prop_rule(use_first ? float_2nan_prop_ab : float_2nan_prop_ba,
19
&env->fp_status);
20
+ set_float_3nan_prop_rule(use_first ? float_3nan_prop_abc : float_3nan_prop_cba,
21
+ &env->fp_status);
22
}
23
24
void HELPER(wur_fpu2k_fcr)(CPUXtensaState *env, uint32_t v)
25
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
26
index XXXXXXX..XXXXXXX 100644
27
--- a/fpu/softfloat-specialize.c.inc
28
+++ b/fpu/softfloat-specialize.c.inc
29
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
30
}
31
32
if (rule == float_3nan_prop_none) {
33
-#if defined(TARGET_XTENSA)
34
- if (status->use_first_nan) {
35
- rule = float_3nan_prop_abc;
36
- } else {
37
- rule = float_3nan_prop_cba;
38
- }
39
-#else
40
rule = float_3nan_prop_abc;
41
-#endif
42
}
43
44
assert(rule != float_3nan_prop_none);
45
--
46
2.34.1
diff view generated by jsdifflib
New patch
1
Set the Float3NaNPropRule explicitly for i386. We had no
2
i386-specific behaviour in the old ifdef ladder, so we were using the
3
default "prefer a then b then c" fallback; this is actually the
4
correct per-the-spec handling for i386.
1
5
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20241202131347.498124-25-peter.maydell@linaro.org
9
---
10
target/i386/tcg/fpu_helper.c | 1 +
11
1 file changed, 1 insertion(+)
12
13
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/i386/tcg/fpu_helper.c
16
+++ b/target/i386/tcg/fpu_helper.c
17
@@ -XXX,XX +XXX,XX @@ void cpu_init_fp_statuses(CPUX86State *env)
18
* there are multiple input NaNs they are selected in the order a, b, c.
19
*/
20
set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->sse_status);
21
+ set_float_3nan_prop_rule(float_3nan_prop_abc, &env->sse_status);
22
}
23
24
static inline uint8_t save_exception_flags(CPUX86State *env)
25
--
26
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Set the Float3NaNPropRule explicitly for HPPA, and remove the
2
ifdef from pickNaNMulAdd().
2
3
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
HPPA is the only target that was using the default branch of the
4
Message-id: 20220708151540.18136-26-richard.henderson@linaro.org
5
ifdef ladder (other targets either do not use muladd or set
6
default_nan_mode), so we can remove the ifdef fallback entirely now
7
(allowing the "rule not set" case to fall into the default of the
8
switch statement and assert).
9
10
We add a TODO note that the HPPA rule is probably wrong; this is
11
not a behavioural change for this refactoring.
12
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
14
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
Message-id: 20241202131347.498124-26-peter.maydell@linaro.org
7
---
16
---
8
target/arm/helper-sme.h | 2 ++
17
target/hppa/fpu_helper.c | 8 ++++++++
9
target/arm/sme.decode | 2 ++
18
fpu/softfloat-specialize.c.inc | 4 ----
10
target/arm/sme_helper.c | 56 ++++++++++++++++++++++++++++++++++++++
19
2 files changed, 8 insertions(+), 4 deletions(-)
11
target/arm/translate-sme.c | 30 ++++++++++++++++++++
12
4 files changed, 90 insertions(+)
13
20
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
21
diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c
15
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-sme.h
23
--- a/target/hppa/fpu_helper.c
17
+++ b/target/arm/helper-sme.h
24
+++ b/target/hppa/fpu_helper.c
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
25
@@ -XXX,XX +XXX,XX @@ void HELPER(loaded_fr0)(CPUHPPAState *env)
19
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
26
* HPPA does note implement a CPU reset method at all...
20
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
27
*/
21
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
28
set_float_2nan_prop_rule(float_2nan_prop_s_ab, &env->fp_status);
22
+DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG,
29
+ /*
23
+ void, ptr, ptr, ptr, ptr, ptr, i32)
30
+ * TODO: The HPPA architecture reference only documents its NaN
24
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
31
+ * propagation rule for 2-operand operations. Testing on real hardware
32
+ * might be necessary to confirm whether this order for muladd is correct.
33
+ * Not preferring the SNaN is almost certainly incorrect as it diverges
34
+ * from the documented rules for 2-operand operations.
35
+ */
36
+ set_float_3nan_prop_rule(float_3nan_prop_abc, &env->fp_status);
37
/* For inf * 0 + NaN, return the input NaN */
38
set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status);
39
}
40
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
25
index XXXXXXX..XXXXXXX 100644
41
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/sme.decode
42
--- a/fpu/softfloat-specialize.c.inc
27
+++ b/target/arm/sme.decode
43
+++ b/fpu/softfloat-specialize.c.inc
28
@@ -XXX,XX +XXX,XX @@ ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
44
@@ -XXX,XX +XXX,XX @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
29
30
FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
31
FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
32
+
33
+BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
34
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/sme_helper.c
37
+++ b/target/arm/sme_helper.c
38
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn,
39
}
45
}
40
}
46
}
41
}
47
42
+
48
- if (rule == float_3nan_prop_none) {
43
+/*
49
- rule = float_3nan_prop_abc;
44
+ * Alter PAIR as needed for controlling predicates being false,
50
- }
45
+ * and for NEG on an enabled row element.
51
-
46
+ */
52
assert(rule != float_3nan_prop_none);
47
+static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
53
if (have_snan && (rule & R_3NAN_SNAN_MASK)) {
48
+{
54
/* We have at least one SNaN input and should prefer it */
49
+ /*
50
+ * The pseudocode uses a conditional negate after the conditional zero.
51
+ * It is simpler here to unconditionally negate before conditional zero.
52
+ */
53
+ pair ^= neg;
54
+ if (!(pg & 1)) {
55
+ pair &= 0xffff0000u;
56
+ }
57
+ if (!(pg & 4)) {
58
+ pair &= 0x0000ffffu;
59
+ }
60
+ return pair;
61
+}
62
+
63
+void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
64
+ void *vpm, uint32_t desc)
65
+{
66
+ intptr_t row, col, oprsz = simd_maxsz(desc);
67
+ uint32_t neg = simd_data(desc) * 0x80008000u;
68
+ uint16_t *pn = vpn, *pm = vpm;
69
+
70
+ for (row = 0; row < oprsz; ) {
71
+ uint16_t prow = pn[H2(row >> 4)];
72
+ do {
73
+ void *vza_row = vza + tile_vslice_offset(row);
74
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row));
75
+
76
+ n = f16mop_adj_pair(n, prow, neg);
77
+
78
+ for (col = 0; col < oprsz; ) {
79
+ uint16_t pcol = pm[H2(col >> 4)];
80
+ do {
81
+ if (prow & pcol & 0b0101) {
82
+ uint32_t *a = vza_row + H1_4(col);
83
+ uint32_t m = *(uint32_t *)(vzm + H1_4(col));
84
+
85
+ m = f16mop_adj_pair(m, pcol, 0);
86
+ *a = bfdotadd(*a, n, m);
87
+
88
+ col += 4;
89
+ pcol >>= 4;
90
+ }
91
+ } while (col & 15);
92
+ }
93
+ row += 4;
94
+ prow >>= 4;
95
+ } while (row & 15);
96
+ }
97
+}
98
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/target/arm/translate-sme.c
101
+++ b/target/arm/translate-sme.c
102
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
103
TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
104
TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
105
106
+static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
107
+ gen_helper_gvec_5 *fn)
108
+{
109
+ int svl = streaming_vec_reg_size(s);
110
+ uint32_t desc = simd_desc(svl, svl, a->sub);
111
+ TCGv_ptr za, zn, zm, pn, pm;
112
+
113
+ if (!sme_smza_enabled_check(s)) {
114
+ return true;
115
+ }
116
+
117
+ /* Sum XZR+zad to find ZAd. */
118
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
119
+ zn = vec_full_reg_ptr(s, a->zn);
120
+ zm = vec_full_reg_ptr(s, a->zm);
121
+ pn = pred_full_reg_ptr(s, a->pn);
122
+ pm = pred_full_reg_ptr(s, a->pm);
123
+
124
+ fn(za, zn, zm, pn, pm, tcg_constant_i32(desc));
125
+
126
+ tcg_temp_free_ptr(za);
127
+ tcg_temp_free_ptr(zn);
128
+ tcg_temp_free_ptr(pn);
129
+ tcg_temp_free_ptr(pm);
130
+ return true;
131
+}
132
+
133
static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
134
gen_helper_gvec_5_ptr *fn)
135
{
136
@@ -XXX,XX +XXX,XX @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
137
138
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
139
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
140
+
141
+/* TODO: FEAT_EBF16 */
142
+TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
143
--
55
--
144
2.25.1
56
2.34.1
diff view generated by jsdifflib
New patch
1
The use_first_nan field in float_status was an xtensa-specific way to
2
select at runtime from two different NaN propagation rules. Now that
3
xtensa is using the target-agnostic NaN propagation rule selection
4
that we've just added, we can remove use_first_nan, because there is
5
no longer any code that reads it.
1
6
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20241202131347.498124-27-peter.maydell@linaro.org
10
---
11
include/fpu/softfloat-helpers.h | 5 -----
12
include/fpu/softfloat-types.h | 1 -
13
target/xtensa/fpu_helper.c | 1 -
14
3 files changed, 7 deletions(-)
15
16
diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/include/fpu/softfloat-helpers.h
19
+++ b/include/fpu/softfloat-helpers.h
20
@@ -XXX,XX +XXX,XX @@ static inline void set_snan_bit_is_one(bool val, float_status *status)
21
status->snan_bit_is_one = val;
22
}
23
24
-static inline void set_use_first_nan(bool val, float_status *status)
25
-{
26
- status->use_first_nan = val;
27
-}
28
-
29
static inline void set_no_signaling_nans(bool val, float_status *status)
30
{
31
status->no_signaling_nans = val;
32
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/include/fpu/softfloat-types.h
35
+++ b/include/fpu/softfloat-types.h
36
@@ -XXX,XX +XXX,XX @@ typedef struct float_status {
37
* softfloat-specialize.inc.c)
38
*/
39
bool snan_bit_is_one;
40
- bool use_first_nan;
41
bool no_signaling_nans;
42
/* should overflowed results subtract re_bias to its exponent? */
43
bool rebias_overflow;
44
diff --git a/target/xtensa/fpu_helper.c b/target/xtensa/fpu_helper.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/target/xtensa/fpu_helper.c
47
+++ b/target/xtensa/fpu_helper.c
48
@@ -XXX,XX +XXX,XX @@ static const struct {
49
50
void xtensa_use_first_nan(CPUXtensaState *env, bool use_first)
51
{
52
- set_use_first_nan(use_first, &env->fp_status);
53
set_float_2nan_prop_rule(use_first ? float_2nan_prop_ab : float_2nan_prop_ba,
54
&env->fp_status);
55
set_float_3nan_prop_rule(use_first ? float_3nan_prop_abc : float_3nan_prop_cba,
56
--
57
2.34.1
diff view generated by jsdifflib
New patch
1
Currently m68k_cpu_reset_hold() calls floatx80_default_nan(NULL)
2
to get the NaN bit pattern to reset the FPU registers. This
3
works because it happens that our implementation of
4
floatx80_default_nan() doesn't actually look at the float_status
5
pointer except for TARGET_MIPS. However, this isn't guaranteed,
6
and to be able to remove the ifdef in floatx80_default_nan()
7
we're going to need a real float_status here.
1
8
9
Rearrange m68k_cpu_reset_hold() so that we initialize env->fp_status
10
earlier, and thus can pass it to floatx80_default_nan().
11
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
Message-id: 20241202131347.498124-28-peter.maydell@linaro.org
15
---
16
target/m68k/cpu.c | 12 +++++++-----
17
1 file changed, 7 insertions(+), 5 deletions(-)
18
19
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/target/m68k/cpu.c
22
+++ b/target/m68k/cpu.c
23
@@ -XXX,XX +XXX,XX @@ static void m68k_cpu_reset_hold(Object *obj, ResetType type)
24
CPUState *cs = CPU(obj);
25
M68kCPUClass *mcc = M68K_CPU_GET_CLASS(obj);
26
CPUM68KState *env = cpu_env(cs);
27
- floatx80 nan = floatx80_default_nan(NULL);
28
+ floatx80 nan;
29
int i;
30
31
if (mcc->parent_phases.hold) {
32
@@ -XXX,XX +XXX,XX @@ static void m68k_cpu_reset_hold(Object *obj, ResetType type)
33
#else
34
cpu_m68k_set_sr(env, SR_S | SR_I);
35
#endif
36
- for (i = 0; i < 8; i++) {
37
- env->fregs[i].d = nan;
38
- }
39
- cpu_m68k_set_fpcr(env, 0);
40
/*
41
* M68000 FAMILY PROGRAMMER'S REFERENCE MANUAL
42
* 3.4 FLOATING-POINT INSTRUCTION DETAILS
43
@@ -XXX,XX +XXX,XX @@ static void m68k_cpu_reset_hold(Object *obj, ResetType type)
44
* preceding paragraph for nonsignaling NaNs.
45
*/
46
set_float_2nan_prop_rule(float_2nan_prop_ab, &env->fp_status);
47
+
48
+ nan = floatx80_default_nan(&env->fp_status);
49
+ for (i = 0; i < 8; i++) {
50
+ env->fregs[i].d = nan;
51
+ }
52
+ cpu_m68k_set_fpcr(env, 0);
53
env->fpsr = 0;
54
55
/* TODO: We should set PC from the interrupt vector. */
56
--
57
2.34.1
diff view generated by jsdifflib
New patch
1
We create our 128-bit default NaN by calling parts64_default_nan()
2
and then adjusting the result. We can do the same trick for creating
3
the floatx80 default NaN, which lets us drop a target ifdef.
1
4
5
floatx80 is used only by:
6
i386
7
m68k
8
arm nwfpe old floating-point emulation emulation support
9
(which is essentially dead, especially the parts involving floatx80)
10
PPC (only in the xsrqpxp instruction, which just rounds an input
11
value by converting to floatx80 and back, so will never generate
12
the default NaN)
13
14
The floatx80 default NaN as currently implemented is:
15
m68k: sign = 0, exp = 1...1, int = 1, frac = 1....1
16
i386: sign = 1, exp = 1...1, int = 1, frac = 10...0
17
18
These are the same as the parts64_default_nan for these architectures.
19
20
This is technically a possible behaviour change for arm linux-user
21
nwfpe emulation emulation, because the default NaN will now have the
22
sign bit clear. But we were already generating a different floatx80
23
default NaN from the real kernel emulation we are supposedly
24
following, which appears to use an all-bits-1 value:
25
https://elixir.bootlin.com/linux/v6.12/source/arch/arm/nwfpe/softfloat-specialize#L267
26
27
This won't affect the only "real" use of the nwfpe emulation, which
28
is ancient binaries that used it as part of the old floating point
29
calling convention; that only uses loads and stores of 32 and 64 bit
30
floats, not any of the floatx80 behaviour the original hardware had.
31
We also get the nwfpe float64 default NaN value wrong:
32
https://elixir.bootlin.com/linux/v6.12/source/arch/arm/nwfpe/softfloat-specialize#L166
33
so if we ever cared about this obscure corner the right fix would be
34
to correct that so nwfpe used its own default-NaN setting rather
35
than the Arm VFP one.
36
37
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
38
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
39
Message-id: 20241202131347.498124-29-peter.maydell@linaro.org
40
---
41
fpu/softfloat-specialize.c.inc | 20 ++++++++++----------
42
1 file changed, 10 insertions(+), 10 deletions(-)
43
44
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
45
index XXXXXXX..XXXXXXX 100644
46
--- a/fpu/softfloat-specialize.c.inc
47
+++ b/fpu/softfloat-specialize.c.inc
48
@@ -XXX,XX +XXX,XX @@ static void parts128_silence_nan(FloatParts128 *p, float_status *status)
49
floatx80 floatx80_default_nan(float_status *status)
50
{
51
floatx80 r;
52
+ /*
53
+ * Extrapolate from the choices made by parts64_default_nan to fill
54
+ * in the floatx80 format. We assume that floatx80's explicit
55
+ * integer bit is always set (this is true for i386 and m68k,
56
+ * which are the only real users of this format).
57
+ */
58
+ FloatParts64 p64;
59
+ parts64_default_nan(&p64, status);
60
61
- /* None of the targets that have snan_bit_is_one use floatx80. */
62
- assert(!snan_bit_is_one(status));
63
-#if defined(TARGET_M68K)
64
- r.low = UINT64_C(0xFFFFFFFFFFFFFFFF);
65
- r.high = 0x7FFF;
66
-#else
67
- /* X86 */
68
- r.low = UINT64_C(0xC000000000000000);
69
- r.high = 0xFFFF;
70
-#endif
71
+ r.high = 0x7FFF | (p64.sign << 15);
72
+ r.low = (1ULL << DECOMPOSED_BINARY_POINT) | p64.frac;
73
return r;
74
}
75
76
--
77
2.34.1
diff view generated by jsdifflib
New patch
1
In target/loongarch's helper_fclass_s() and helper_fclass_d() we pass
2
a zero-initialized float_status struct to float32_is_quiet_nan() and
3
float64_is_quiet_nan(), with the cryptic comment "for
4
snan_bit_is_one".
1
5
6
This pattern appears to have been copied from target/riscv, where it
7
is used because the functions there do not have ready access to the
8
CPU state struct. The comment presumably refers to the fact that the
9
main reason the is_quiet_nan() functions want the float_state is
10
because they want to know about the snan_bit_is_one config.
11
12
In the loongarch helpers, though, we have the CPU state struct
13
to hand. Use the usual env->fp_status here. This avoids our needing
14
to track that we need to update the initializer of the local
15
float_status structs when the core softfloat code adds new
16
options for targets to configure their behaviour.
17
18
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
19
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
20
Message-id: 20241202131347.498124-30-peter.maydell@linaro.org
21
---
22
target/loongarch/tcg/fpu_helper.c | 6 ++----
23
1 file changed, 2 insertions(+), 4 deletions(-)
24
25
diff --git a/target/loongarch/tcg/fpu_helper.c b/target/loongarch/tcg/fpu_helper.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/target/loongarch/tcg/fpu_helper.c
28
+++ b/target/loongarch/tcg/fpu_helper.c
29
@@ -XXX,XX +XXX,XX @@ uint64_t helper_fclass_s(CPULoongArchState *env, uint64_t fj)
30
} else if (float32_is_zero_or_denormal(f)) {
31
return sign ? 1 << 4 : 1 << 8;
32
} else if (float32_is_any_nan(f)) {
33
- float_status s = { }; /* for snan_bit_is_one */
34
- return float32_is_quiet_nan(f, &s) ? 1 << 1 : 1 << 0;
35
+ return float32_is_quiet_nan(f, &env->fp_status) ? 1 << 1 : 1 << 0;
36
} else {
37
return sign ? 1 << 3 : 1 << 7;
38
}
39
@@ -XXX,XX +XXX,XX @@ uint64_t helper_fclass_d(CPULoongArchState *env, uint64_t fj)
40
} else if (float64_is_zero_or_denormal(f)) {
41
return sign ? 1 << 4 : 1 << 8;
42
} else if (float64_is_any_nan(f)) {
43
- float_status s = { }; /* for snan_bit_is_one */
44
- return float64_is_quiet_nan(f, &s) ? 1 << 1 : 1 << 0;
45
+ return float64_is_quiet_nan(f, &env->fp_status) ? 1 << 1 : 1 << 0;
46
} else {
47
return sign ? 1 << 3 : 1 << 7;
48
}
49
--
50
2.34.1
diff view generated by jsdifflib
New patch
1
In the frem helper, we have a local float_status because we want to
2
execute the floatx80_div() with a custom rounding mode. Instead of
3
zero-initializing the local float_status and then having to set it up
4
with the m68k standard behaviour (including the NaN propagation rule
5
and copying the rounding precision from env->fp_status), initialize
6
it as a complete copy of env->fp_status. This will avoid our having
7
to add new code in this function for every new config knob we add
8
to fp_status.
1
9
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20241202131347.498124-31-peter.maydell@linaro.org
13
---
14
target/m68k/fpu_helper.c | 6 ++----
15
1 file changed, 2 insertions(+), 4 deletions(-)
16
17
diff --git a/target/m68k/fpu_helper.c b/target/m68k/fpu_helper.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/m68k/fpu_helper.c
20
+++ b/target/m68k/fpu_helper.c
21
@@ -XXX,XX +XXX,XX @@ void HELPER(frem)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1)
22
23
fp_rem = floatx80_rem(val1->d, val0->d, &env->fp_status);
24
if (!floatx80_is_any_nan(fp_rem)) {
25
- float_status fp_status = { };
26
+ /* Use local temporary fp_status to set different rounding mode */
27
+ float_status fp_status = env->fp_status;
28
uint32_t quotient;
29
int sign;
30
31
/* Calculate quotient directly using round to nearest mode */
32
- set_float_2nan_prop_rule(float_2nan_prop_ab, &fp_status);
33
set_float_rounding_mode(float_round_nearest_even, &fp_status);
34
- set_floatx80_rounding_precision(
35
- get_floatx80_rounding_precision(&env->fp_status), &fp_status);
36
fp_quot.d = floatx80_div(val1->d, val0->d, &fp_status);
37
38
sign = extractFloatx80Sign(fp_quot.d);
39
--
40
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
In cf_fpu_gdb_get_reg() and cf_fpu_gdb_set_reg() we do the conversion
2
from float64 to floatx80 using a scratch float_status, because we
3
don't want the conversion to affect the CPU's floating point exception
4
status. Currently we use a zero-initialized float_status. This will
5
get steadily more awkward as we add config knobs to float_status
6
that the target must initialize. Avoid having to add any of that
7
configuration here by instead initializing our local float_status
8
from the env->fp_status.
2
9
3
Mark these as a non-streaming instructions, which should trap
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
if full a64 support is not enabled in streaming mode.
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20241202131347.498124-32-peter.maydell@linaro.org
13
---
14
target/m68k/helper.c | 6 ++++--
15
1 file changed, 4 insertions(+), 2 deletions(-)
5
16
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
17
diff --git a/target/m68k/helper.c b/target/m68k/helper.c
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-12-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 9 ---------
12
target/arm/translate-sve.c | 6 ++++++
13
2 files changed, 6 insertions(+), 9 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
19
--- a/target/m68k/helper.c
18
+++ b/target/arm/sme-fa64.decode
20
+++ b/target/m68k/helper.c
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
21
@@ -XXX,XX +XXX,XX @@ static int cf_fpu_gdb_get_reg(CPUState *cs, GByteArray *mem_buf, int n)
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
22
CPUM68KState *env = &cpu->env;
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
23
22
24
if (n < 8) {
23
-FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
25
- float_status s = {};
24
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
26
+ /* Use scratch float_status so any exceptions don't change CPU state */
25
FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
27
+ float_status s = env->fp_status;
26
-FAIL 1000 010- -01- ---- 1--- ---- ---- ---- # SVE 32-bit gather load (vector+imm)
28
return gdb_get_reg64(mem_buf, floatx80_to_float64(env->fregs[n].d, &s));
27
-FAIL 1000 0100 0-0- ---- 0--- ---- ---- ---- # SVE 32-bit gather load byte (scalar+vector)
28
-FAIL 1000 0100 1--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load half (scalar+vector)
29
-FAIL 1000 0101 0--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load word (scalar+vector)
30
FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
31
FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
32
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
33
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
34
FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
35
-FAIL 1110 010- -00- ---- 001- ---- ---- ---- # SVE2 64-bit scatter NT store (vector+scalar)
36
-FAIL 1110 010- -10- ---- 001- ---- ---- ---- # SVE2 32-bit scatter NT store (vector+scalar)
37
-FAIL 1110 010- ---- ---- 1-0- ---- ---- ---- # SVE scatter store (scalar+32-bit vector)
38
-FAIL 1110 010- ---- ---- 101- ---- ---- ---- # SVE scatter store (misc)
39
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/target/arm/translate-sve.c
42
+++ b/target/arm/translate-sve.c
43
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
44
if (!dc_isar_feature(aa64_sve, s)) {
45
return false;
46
}
29
}
47
+ s->is_nonstreaming = true;
30
switch (n) {
48
if (!sve_access_check(s)) {
31
@@ -XXX,XX +XXX,XX @@ static int cf_fpu_gdb_set_reg(CPUState *cs, uint8_t *mem_buf, int n)
49
return true;
32
CPUM68KState *env = &cpu->env;
50
}
33
51
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
34
if (n < 8) {
52
if (!dc_isar_feature(aa64_sve, s)) {
35
- float_status s = {};
53
return false;
36
+ /* Use scratch float_status so any exceptions don't change CPU state */
54
}
37
+ float_status s = env->fp_status;
55
+ s->is_nonstreaming = true;
38
env->fregs[n].d = float64_to_floatx80(ldq_be_p(mem_buf), &s);
56
if (!sve_access_check(s)) {
39
return 8;
57
return true;
58
}
59
@@ -XXX,XX +XXX,XX @@ static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
60
if (!dc_isar_feature(aa64_sve2, s)) {
61
return false;
62
}
63
+ s->is_nonstreaming = true;
64
if (!sve_access_check(s)) {
65
return true;
66
}
67
@@ -XXX,XX +XXX,XX @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
68
if (!dc_isar_feature(aa64_sve, s)) {
69
return false;
70
}
71
+ s->is_nonstreaming = true;
72
if (!sve_access_check(s)) {
73
return true;
74
}
75
@@ -XXX,XX +XXX,XX @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
76
if (!dc_isar_feature(aa64_sve, s)) {
77
return false;
78
}
79
+ s->is_nonstreaming = true;
80
if (!sve_access_check(s)) {
81
return true;
82
}
83
@@ -XXX,XX +XXX,XX @@ static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
84
if (!dc_isar_feature(aa64_sve2, s)) {
85
return false;
86
}
87
+ s->is_nonstreaming = true;
88
if (!sve_access_check(s)) {
89
return true;
90
}
40
}
91
--
41
--
92
2.25.1
42
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
In the helper functions flcmps and flcmpd we use a scratch float_status
2
so that we don't change the CPU state if the comparison raises any
3
floating point exception flags. Instead of zero-initializing this
4
scratch float_status, initialize it as a copy of env->fp_status. This
5
avoids the need to explicitly initialize settings like the NaN
6
propagation rule or others we might add to softfloat in future.
2
7
3
These SME instructions are nominally within the SVE decode space,
8
To do this we need to pass the CPU env pointer in to the helper.
4
so we add them to sve.decode and translate-sve.c.
5
9
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-18-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20241202131347.498124-33-peter.maydell@linaro.org
10
---
13
---
11
target/arm/translate-a64.h | 12 ++++++++++++
14
target/sparc/helper.h | 4 ++--
12
target/arm/sve.decode | 5 ++++-
15
target/sparc/fop_helper.c | 8 ++++----
13
target/arm/translate-sve.c | 38 ++++++++++++++++++++++++++++++++++++++
16
target/sparc/translate.c | 4 ++--
14
3 files changed, 54 insertions(+), 1 deletion(-)
17
3 files changed, 8 insertions(+), 8 deletions(-)
15
18
16
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
19
diff --git a/target/sparc/helper.h b/target/sparc/helper.h
17
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/translate-a64.h
21
--- a/target/sparc/helper.h
19
+++ b/target/arm/translate-a64.h
22
+++ b/target/sparc/helper.h
20
@@ -XXX,XX +XXX,XX @@ static inline int vec_full_reg_size(DisasContext *s)
23
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(fcmpd, TCG_CALL_NO_WG, i32, env, f64, f64)
21
return s->vl;
24
DEF_HELPER_FLAGS_3(fcmped, TCG_CALL_NO_WG, i32, env, f64, f64)
25
DEF_HELPER_FLAGS_3(fcmpq, TCG_CALL_NO_WG, i32, env, i128, i128)
26
DEF_HELPER_FLAGS_3(fcmpeq, TCG_CALL_NO_WG, i32, env, i128, i128)
27
-DEF_HELPER_FLAGS_2(flcmps, TCG_CALL_NO_RWG_SE, i32, f32, f32)
28
-DEF_HELPER_FLAGS_2(flcmpd, TCG_CALL_NO_RWG_SE, i32, f64, f64)
29
+DEF_HELPER_FLAGS_3(flcmps, TCG_CALL_NO_RWG_SE, i32, env, f32, f32)
30
+DEF_HELPER_FLAGS_3(flcmpd, TCG_CALL_NO_RWG_SE, i32, env, f64, f64)
31
DEF_HELPER_2(raise_exception, noreturn, env, int)
32
33
DEF_HELPER_FLAGS_3(faddd, TCG_CALL_NO_WG, f64, env, f64, f64)
34
diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/sparc/fop_helper.c
37
+++ b/target/sparc/fop_helper.c
38
@@ -XXX,XX +XXX,XX @@ uint32_t helper_fcmpeq(CPUSPARCState *env, Int128 src1, Int128 src2)
39
return finish_fcmp(env, r, GETPC());
22
}
40
}
23
41
24
+/* Return the byte size of the vector register, SVL / 8. */
42
-uint32_t helper_flcmps(float32 src1, float32 src2)
25
+static inline int streaming_vec_reg_size(DisasContext *s)
43
+uint32_t helper_flcmps(CPUSPARCState *env, float32 src1, float32 src2)
26
+{
44
{
27
+ return s->svl;
45
/*
28
+}
46
* FLCMP never raises an exception nor modifies any FSR fields.
29
+
47
* Perform the comparison with a dummy fp environment.
30
/*
48
*/
31
* Return the offset info CPUARMState of the predicate vector register Pn.
49
- float_status discard = { };
32
* Note for this purpose, FFR is P16.
50
+ float_status discard = env->fp_status;
33
@@ -XXX,XX +XXX,XX @@ static inline int pred_full_reg_size(DisasContext *s)
51
FloatRelation r;
34
return s->vl >> 3;
52
53
set_float_2nan_prop_rule(float_2nan_prop_s_ba, &discard);
54
@@ -XXX,XX +XXX,XX @@ uint32_t helper_flcmps(float32 src1, float32 src2)
55
g_assert_not_reached();
35
}
56
}
36
57
37
+/* Return the byte size of the predicate register, SVL / 64. */
58
-uint32_t helper_flcmpd(float64 src1, float64 src2)
38
+static inline int streaming_pred_reg_size(DisasContext *s)
59
+uint32_t helper_flcmpd(CPUSPARCState *env, float64 src1, float64 src2)
39
+{
60
{
40
+ return s->svl >> 3;
61
- float_status discard = { };
41
+}
62
+ float_status discard = env->fp_status;
42
+
63
FloatRelation r;
43
/*
64
44
* Round up the size of a register to a size allowed by
65
set_float_2nan_prop_rule(float_2nan_prop_s_ba, &discard);
45
* the tcg vector infrastructure. Any operation which uses this
66
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
46
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
47
index XXXXXXX..XXXXXXX 100644
67
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/sve.decode
68
--- a/target/sparc/translate.c
49
+++ b/target/arm/sve.decode
69
+++ b/target/sparc/translate.c
50
@@ -XXX,XX +XXX,XX @@ INDEX_ri 00000100 esz:2 1 imm:s5 010001 rn:5 rd:5
70
@@ -XXX,XX +XXX,XX @@ static bool trans_FLCMPs(DisasContext *dc, arg_FLCMPs *a)
51
# SVE index generation (register start, register increment)
71
52
INDEX_rr 00000100 .. 1 ..... 010011 ..... ..... @rd_rn_rm
72
src1 = gen_load_fpr_F(dc, a->rs1);
53
73
src2 = gen_load_fpr_F(dc, a->rs2);
54
-### SVE Stack Allocation Group
74
- gen_helper_flcmps(cpu_fcc[a->cc], src1, src2);
55
+### SVE / Streaming SVE Stack Allocation Group
75
+ gen_helper_flcmps(cpu_fcc[a->cc], tcg_env, src1, src2);
56
76
return advance_pc(dc);
57
# SVE stack frame adjustment
58
ADDVL 00000100 001 ..... 01010 ...... ..... @rd_rn_i6
59
+ADDSVL 00000100 001 ..... 01011 ...... ..... @rd_rn_i6
60
ADDPL 00000100 011 ..... 01010 ...... ..... @rd_rn_i6
61
+ADDSPL 00000100 011 ..... 01011 ...... ..... @rd_rn_i6
62
63
# SVE stack frame size
64
RDVL 00000100 101 11111 01010 imm:s6 rd:5
65
+RDSVL 00000100 101 11111 01011 imm:s6 rd:5
66
67
### SVE Bitwise Shift - Unpredicated Group
68
69
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/target/arm/translate-sve.c
72
+++ b/target/arm/translate-sve.c
73
@@ -XXX,XX +XXX,XX @@ static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
74
return true;
75
}
77
}
76
78
77
+static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a)
79
@@ -XXX,XX +XXX,XX @@ static bool trans_FLCMPd(DisasContext *dc, arg_FLCMPd *a)
78
+{
80
79
+ if (!dc_isar_feature(aa64_sme, s)) {
81
src1 = gen_load_fpr_D(dc, a->rs1);
80
+ return false;
82
src2 = gen_load_fpr_D(dc, a->rs2);
81
+ }
83
- gen_helper_flcmpd(cpu_fcc[a->cc], src1, src2);
82
+ if (sme_enabled_check(s)) {
84
+ gen_helper_flcmpd(cpu_fcc[a->cc], tcg_env, src1, src2);
83
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
85
return advance_pc(dc);
84
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
85
+ tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s));
86
+ }
87
+ return true;
88
+}
89
+
90
static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
91
{
92
if (!dc_isar_feature(aa64_sve, s)) {
93
@@ -XXX,XX +XXX,XX @@ static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
94
return true;
95
}
86
}
96
87
97
+static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a)
98
+{
99
+ if (!dc_isar_feature(aa64_sme, s)) {
100
+ return false;
101
+ }
102
+ if (sme_enabled_check(s)) {
103
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
104
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
105
+ tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s));
106
+ }
107
+ return true;
108
+}
109
+
110
static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
111
{
112
if (!dc_isar_feature(aa64_sve, s)) {
113
@@ -XXX,XX +XXX,XX @@ static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
114
return true;
115
}
116
117
+static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a)
118
+{
119
+ if (!dc_isar_feature(aa64_sme, s)) {
120
+ return false;
121
+ }
122
+ if (sme_enabled_check(s)) {
123
+ TCGv_i64 reg = cpu_reg(s, a->rd);
124
+ tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s));
125
+ }
126
+ return true;
127
+}
128
+
129
/*
130
*** SVE Compute Vector Address Group
131
*/
132
--
88
--
133
2.25.1
89
2.34.1
diff view generated by jsdifflib
New patch
1
In the helper_compute_fprf functions, we pass a dummy float_status
2
in to the is_signaling_nan() function. This is unnecessary, because
3
we have convenient access to the CPU env pointer here and that
4
is already set up with the correct values for the snan_bit_is_one
5
and no_signaling_nans config settings. is_signaling_nan() doesn't
6
ever update the fp_status with any exception flags, so there is
7
no reason not to use env->fp_status here.
1
8
9
Use env->fp_status instead of the dummy fp_status.
10
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Message-id: 20241202131347.498124-34-peter.maydell@linaro.org
14
---
15
target/ppc/fpu_helper.c | 3 +--
16
1 file changed, 1 insertion(+), 2 deletions(-)
17
18
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/target/ppc/fpu_helper.c
21
+++ b/target/ppc/fpu_helper.c
22
@@ -XXX,XX +XXX,XX @@ void helper_compute_fprf_##tp(CPUPPCState *env, tp arg) \
23
} else if (tp##_is_infinity(arg)) { \
24
fprf = neg ? 0x09 << FPSCR_FPRF : 0x05 << FPSCR_FPRF; \
25
} else { \
26
- float_status dummy = { }; /* snan_bit_is_one = 0 */ \
27
- if (tp##_is_signaling_nan(arg, &dummy)) { \
28
+ if (tp##_is_signaling_nan(arg, &env->fp_status)) { \
29
fprf = 0x00 << FPSCR_FPRF; \
30
} else { \
31
fprf = 0x11 << FPSCR_FPRF; \
32
--
33
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Fold the return value setting into the goto, so each
3
Now that float_status has a bunch of fp parameters,
4
point of failure need not do both.
4
it is easier to copy an existing structure than create
5
one from scratch. Begin by copying the structure that
6
corresponds to the FPSR and make only the adjustments
7
required for BFloat16 semantics.
5
8
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20241203203949.483774-2-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-37-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
14
---
11
linux-user/aarch64/signal.c | 26 +++++++++++---------------
15
target/arm/tcg/vec_helper.c | 20 +++++++-------------
12
1 file changed, 11 insertions(+), 15 deletions(-)
16
1 file changed, 7 insertions(+), 13 deletions(-)
13
17
14
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
18
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
15
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
16
--- a/linux-user/aarch64/signal.c
20
--- a/target/arm/tcg/vec_helper.c
17
+++ b/linux-user/aarch64/signal.c
21
+++ b/target/arm/tcg/vec_helper.c
18
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
22
@@ -XXX,XX +XXX,XX @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp)
19
struct target_sve_context *sve = NULL;
23
* no effect on AArch32 instructions.
20
uint64_t extra_datap = 0;
24
*/
21
bool used_extra = false;
25
bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF;
22
- bool err = false;
26
- *statusp = (float_status){
23
int vq = 0, sve_size = 0;
27
- .tininess_before_rounding = float_tininess_before_rounding,
24
28
- .float_rounding_mode = float_round_to_odd_inf,
25
target_restore_general_frame(env, sf);
29
- .flush_to_zero = true,
26
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
30
- .flush_inputs_to_zero = true,
27
switch (magic) {
31
- .default_nan_mode = true,
28
case 0:
32
- };
29
if (size != 0) {
33
+
30
- err = true;
34
+ *statusp = env->vfp.fp_status;
31
- goto exit;
35
+ set_default_nan_mode(true, statusp);
32
+ goto err;
36
33
}
37
if (ebf) {
34
if (used_extra) {
38
- float_status *fpst = &env->vfp.fp_status;
35
ctx = NULL;
39
- set_flush_to_zero(get_flush_to_zero(fpst), statusp);
36
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
40
- set_flush_inputs_to_zero(get_flush_inputs_to_zero(fpst), statusp);
37
41
- set_float_rounding_mode(get_float_rounding_mode(fpst), statusp);
38
case TARGET_FPSIMD_MAGIC:
42
-
39
if (fpsimd || size != sizeof(struct target_fpsimd_context)) {
43
/* EBF=1 needs to do a step with round-to-odd semantics */
40
- err = true;
44
*oddstatusp = *statusp;
41
- goto exit;
45
set_float_rounding_mode(float_round_to_odd, oddstatusp);
42
+ goto err;
46
+ } else {
43
}
47
+ set_flush_to_zero(true, statusp);
44
fpsimd = (struct target_fpsimd_context *)ctx;
48
+ set_flush_inputs_to_zero(true, statusp);
45
break;
49
+ set_float_rounding_mode(float_round_to_odd_inf, statusp);
46
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
47
break;
48
}
49
}
50
- err = true;
51
- goto exit;
52
+ goto err;
53
54
case TARGET_EXTRA_MAGIC:
55
if (extra || size != sizeof(struct target_extra_context)) {
56
- err = true;
57
- goto exit;
58
+ goto err;
59
}
60
__get_user(extra_datap,
61
&((struct target_extra_context *)ctx)->datap);
62
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
63
/* Unknown record -- we certainly didn't generate it.
64
* Did we in fact get out of sync?
65
*/
66
- err = true;
67
- goto exit;
68
+ goto err;
69
}
70
ctx = (void *)ctx + size;
71
}
72
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
73
if (fpsimd) {
74
target_restore_fpsimd_record(env, fpsimd);
75
} else {
76
- err = true;
77
+ goto err;
78
}
79
80
/* SVE data, if present, overwrites FPSIMD data. */
81
if (sve) {
82
target_restore_sve_record(env, sve, vq);
83
}
50
}
84
-
51
-
85
- exit:
52
return ebf;
86
unlock_user(extra, extra_datap, 0);
87
- return err;
88
+ return 0;
89
+
90
+ err:
91
+ unlock_user(extra, extra_datap, 0);
92
+ return 1;
93
}
53
}
94
54
95
static abi_ulong get_sigframe(struct target_sigaction *ka,
96
--
55
--
97
2.25.1
56
2.34.1
57
58
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Currently we hardcode the default NaN value in parts64_default_nan()
2
using a compile-time ifdef ladder. This is awkward for two cases:
3
* for single-QEMU-binary we can't hard-code target-specifics like this
4
* for Arm FEAT_AFP the default NaN value depends on FPCR.AH
5
(specifically the sign bit is different)
2
6
3
We can reuse the SVE functions for implementing moves to/from
7
Add a field to float_status to specify the default NaN value; fall
4
horizontal tile slices, but we need new ones for moves to/from
8
back to the old ifdef behaviour if these are not set.
5
vertical tile slices.
6
9
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
The default NaN value is specified by setting a uint8_t to a
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
pattern corresponding to the sign and upper fraction parts of
9
Message-id: 20220708151540.18136-20-richard.henderson@linaro.org
12
the NaN; the lower bits of the fraction are set from bit 0 of
13
the pattern.
14
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
17
Message-id: 20241202131347.498124-35-peter.maydell@linaro.org
11
---
18
---
12
target/arm/helper-sme.h | 12 +++
19
include/fpu/softfloat-helpers.h | 11 +++++++
13
target/arm/helper-sve.h | 2 +
20
include/fpu/softfloat-types.h | 10 ++++++
14
target/arm/translate-a64.h | 8 ++
21
fpu/softfloat-specialize.c.inc | 55 ++++++++++++++++++++-------------
15
target/arm/translate.h | 5 ++
22
3 files changed, 54 insertions(+), 22 deletions(-)
16
target/arm/sme.decode | 15 ++++
17
target/arm/sme_helper.c | 151 ++++++++++++++++++++++++++++++++++++-
18
target/arm/sve_helper.c | 12 +++
19
target/arm/translate-sme.c | 127 +++++++++++++++++++++++++++++++
20
8 files changed, 331 insertions(+), 1 deletion(-)
21
23
22
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
24
diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h
23
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
24
--- a/target/arm/helper-sme.h
26
--- a/include/fpu/softfloat-helpers.h
25
+++ b/target/arm/helper-sme.h
27
+++ b/include/fpu/softfloat-helpers.h
26
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(set_pstate_sm, TCG_CALL_NO_RWG, void, env, i32)
28
@@ -XXX,XX +XXX,XX @@ static inline void set_float_infzeronan_rule(FloatInfZeroNaNRule rule,
27
DEF_HELPER_FLAGS_2(set_pstate_za, TCG_CALL_NO_RWG, void, env, i32)
29
status->float_infzeronan_rule = rule;
28
29
DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32)
30
+
31
+/* Move to/from vertical array slices, i.e. columns, so 'c'. */
32
+DEF_HELPER_FLAGS_4(sme_mova_cz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
33
+DEF_HELPER_FLAGS_4(sme_mova_zc_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_4(sme_mova_cz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
35
+DEF_HELPER_FLAGS_4(sme_mova_zc_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_4(sme_mova_cz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
37
+DEF_HELPER_FLAGS_4(sme_mova_zc_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
39
+DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
41
+DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
42
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/helper-sve.h
45
+++ b/target/arm/helper-sve.h
46
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sve_sel_zpzz_s, TCG_CALL_NO_RWG,
47
void, ptr, ptr, ptr, ptr, i32)
48
DEF_HELPER_FLAGS_5(sve_sel_zpzz_d, TCG_CALL_NO_RWG,
49
void, ptr, ptr, ptr, ptr, i32)
50
+DEF_HELPER_FLAGS_5(sve_sel_zpzz_q, TCG_CALL_NO_RWG,
51
+ void, ptr, ptr, ptr, ptr, i32)
52
53
DEF_HELPER_FLAGS_5(sve2_addp_zpzz_b, TCG_CALL_NO_RWG,
54
void, ptr, ptr, ptr, ptr, i32)
55
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/arm/translate-a64.h
58
+++ b/target/arm/translate-a64.h
59
@@ -XXX,XX +XXX,XX @@ static inline int pred_gvec_reg_size(DisasContext *s)
60
return size_for_gvec(pred_full_reg_size(s));
61
}
30
}
62
31
63
+/* Return a newly allocated pointer to the predicate register. */
32
+static inline void set_float_default_nan_pattern(uint8_t dnan_pattern,
64
+static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno)
33
+ float_status *status)
65
+{
34
+{
66
+ TCGv_ptr ret = tcg_temp_new_ptr();
35
+ status->default_nan_pattern = dnan_pattern;
67
+ tcg_gen_addi_ptr(ret, cpu_env, pred_full_reg_offset(s, regno));
68
+ return ret;
69
+}
36
+}
70
+
37
+
71
bool disas_sve(DisasContext *, uint32_t);
38
static inline void set_flush_to_zero(bool val, float_status *status)
72
bool disas_sme(DisasContext *, uint32_t);
39
{
73
40
status->flush_to_zero = val;
74
diff --git a/target/arm/translate.h b/target/arm/translate.h
41
@@ -XXX,XX +XXX,XX @@ static inline FloatInfZeroNaNRule get_float_infzeronan_rule(float_status *status
75
index XXXXXXX..XXXXXXX 100644
42
return status->float_infzeronan_rule;
76
--- a/target/arm/translate.h
77
+++ b/target/arm/translate.h
78
@@ -XXX,XX +XXX,XX @@ static inline int plus_2(DisasContext *s, int x)
79
return x + 2;
80
}
43
}
81
44
82
+static inline int plus_12(DisasContext *s, int x)
45
+static inline uint8_t get_float_default_nan_pattern(float_status *status)
83
+{
46
+{
84
+ return x + 12;
47
+ return status->default_nan_pattern;
85
+}
48
+}
86
+
49
+
87
static inline int times_2(DisasContext *s, int x)
50
static inline bool get_flush_to_zero(float_status *status)
88
{
51
{
89
return x * 2;
52
return status->flush_to_zero;
90
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
53
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
91
index XXXXXXX..XXXXXXX 100644
54
index XXXXXXX..XXXXXXX 100644
92
--- a/target/arm/sme.decode
55
--- a/include/fpu/softfloat-types.h
93
+++ b/target/arm/sme.decode
56
+++ b/include/fpu/softfloat-types.h
94
@@ -XXX,XX +XXX,XX @@
57
@@ -XXX,XX +XXX,XX @@ typedef struct float_status {
95
### SME Misc
58
/* should denormalised inputs go to zero and set the input_denormal flag? */
96
59
bool flush_inputs_to_zero;
97
ZERO 11000000 00 001 00000000000 imm:8
60
bool default_nan_mode;
61
+ /*
62
+ * The pattern to use for the default NaN. Here the high bit specifies
63
+ * the default NaN's sign bit, and bits 6..0 specify the high bits of the
64
+ * fractional part. The low bits of the fractional part are copies of bit 0.
65
+ * The exponent of the default NaN is (as for any NaN) always all 1s.
66
+ * Note that a value of 0 here is not a valid NaN. The target must set
67
+ * this to the correct non-zero value, or we will assert when trying to
68
+ * create a default NaN.
69
+ */
70
+ uint8_t default_nan_pattern;
71
/*
72
* The flags below are not used on all specializations and may
73
* constant fold away (see snan_bit_is_one()/no_signalling_nans() in
74
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
75
index XXXXXXX..XXXXXXX 100644
76
--- a/fpu/softfloat-specialize.c.inc
77
+++ b/fpu/softfloat-specialize.c.inc
78
@@ -XXX,XX +XXX,XX @@ static void parts64_default_nan(FloatParts64 *p, float_status *status)
79
{
80
bool sign = 0;
81
uint64_t frac;
82
+ uint8_t dnan_pattern = status->default_nan_pattern;
83
84
+ if (dnan_pattern == 0) {
85
#if defined(TARGET_SPARC) || defined(TARGET_M68K)
86
- /* !snan_bit_is_one, set all bits */
87
- frac = (1ULL << DECOMPOSED_BINARY_POINT) - 1;
88
-#elif defined(TARGET_I386) || defined(TARGET_X86_64) \
89
+ /* Sign bit clear, all frac bits set */
90
+ dnan_pattern = 0b01111111;
91
+#elif defined(TARGET_I386) || defined(TARGET_X86_64) \
92
|| defined(TARGET_MICROBLAZE)
93
- /* !snan_bit_is_one, set sign and msb */
94
- frac = 1ULL << (DECOMPOSED_BINARY_POINT - 1);
95
- sign = 1;
96
+ /* Sign bit set, most significant frac bit set */
97
+ dnan_pattern = 0b11000000;
98
#elif defined(TARGET_HPPA)
99
- /* snan_bit_is_one, set msb-1. */
100
- frac = 1ULL << (DECOMPOSED_BINARY_POINT - 2);
101
+ /* Sign bit clear, msb-1 frac bit set */
102
+ dnan_pattern = 0b00100000;
103
#elif defined(TARGET_HEXAGON)
104
- sign = 1;
105
- frac = ~0ULL;
106
+ /* Sign bit set, all frac bits set. */
107
+ dnan_pattern = 0b11111111;
108
#else
109
- /*
110
- * This case is true for Alpha, ARM, MIPS, OpenRISC, PPC, RISC-V,
111
- * S390, SH4, TriCore, and Xtensa. Our other supported targets
112
- * do not have floating-point.
113
- */
114
- if (snan_bit_is_one(status)) {
115
- /* set all bits other than msb */
116
- frac = (1ULL << (DECOMPOSED_BINARY_POINT - 1)) - 1;
117
- } else {
118
- /* set msb */
119
- frac = 1ULL << (DECOMPOSED_BINARY_POINT - 1);
120
- }
121
+ /*
122
+ * This case is true for Alpha, ARM, MIPS, OpenRISC, PPC, RISC-V,
123
+ * S390, SH4, TriCore, and Xtensa. Our other supported targets
124
+ * do not have floating-point.
125
+ */
126
+ if (snan_bit_is_one(status)) {
127
+ /* sign bit clear, set all frac bits other than msb */
128
+ dnan_pattern = 0b00111111;
129
+ } else {
130
+ /* sign bit clear, set frac msb */
131
+ dnan_pattern = 0b01000000;
132
+ }
133
#endif
134
+ }
135
+ assert(dnan_pattern != 0);
98
+
136
+
99
+### SME Move into/from Array
137
+ sign = dnan_pattern >> 7;
100
+
101
+%mova_rs 13:2 !function=plus_12
102
+&mova esz rs pg zr za_imm v:bool to_vec:bool
103
+
104
+MOVA 11000000 esz:2 00000 0 v:1 .. pg:3 zr:5 0 za_imm:4 \
105
+ &mova to_vec=0 rs=%mova_rs
106
+MOVA 11000000 11 00000 1 v:1 .. pg:3 zr:5 0 za_imm:4 \
107
+ &mova to_vec=0 rs=%mova_rs esz=4
108
+
109
+MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \
110
+ &mova to_vec=1 rs=%mova_rs
111
+MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \
112
+ &mova to_vec=1 rs=%mova_rs esz=4
113
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
114
index XXXXXXX..XXXXXXX 100644
115
--- a/target/arm/sme_helper.c
116
+++ b/target/arm/sme_helper.c
117
@@ -XXX,XX +XXX,XX @@
118
119
#include "qemu/osdep.h"
120
#include "cpu.h"
121
-#include "internals.h"
122
+#include "tcg/tcg-gvec-desc.h"
123
#include "exec/helper-proto.h"
124
+#include "qemu/int128.h"
125
+#include "vec_internal.h"
126
127
/* ResetSVEState */
128
void arm_reset_sve_state(CPUARMState *env)
129
@@ -XXX,XX +XXX,XX @@ void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
130
}
131
}
132
}
133
+
134
+
135
+/*
136
+ * When considering the ZA storage as an array of elements of
137
+ * type T, the index within that array of the Nth element of
138
+ * a vertical slice of a tile can be calculated like this,
139
+ * regardless of the size of type T. This is because the tiles
140
+ * are interleaved, so if type T is size N bytes then row 1 of
141
+ * the tile is N rows away from row 0. The division by N to
142
+ * convert a byte offset into an array index and the multiplication
143
+ * by N to convert from vslice-index-within-the-tile to
144
+ * the index within the ZA storage cancel out.
145
+ */
146
+#define tile_vslice_index(i) ((i) * sizeof(ARMVectorReg))
147
+
148
+/*
149
+ * When doing byte arithmetic on the ZA storage, the element
150
+ * byteoff bytes away in a tile vertical slice is always this
151
+ * many bytes away in the ZA storage, regardless of the
152
+ * size of the tile element, assuming that byteoff is a multiple
153
+ * of the element size. Again this is because of the interleaving
154
+ * of the tiles. For instance if we have 1 byte per element then
155
+ * each row of the ZA storage has one byte of the vslice data,
156
+ * and (counting from 0) byte 8 goes in row 8 of the storage
157
+ * at offset (8 * row-size-in-bytes).
158
+ * If we have 8 bytes per element then each row of the ZA storage
159
+ * has 8 bytes of the data, but there are 8 interleaved tiles and
160
+ * so byte 8 of the data goes into row 1 of the tile,
161
+ * which is again row 8 of the storage, so the offset is still
162
+ * (8 * row-size-in-bytes). Similarly for other element sizes.
163
+ */
164
+#define tile_vslice_offset(byteoff) ((byteoff) * sizeof(ARMVectorReg))
165
+
166
+
167
+/*
168
+ * Move Zreg vector to ZArray column.
169
+ */
170
+#define DO_MOVA_C(NAME, TYPE, H) \
171
+void HELPER(NAME)(void *za, void *vn, void *vg, uint32_t desc) \
172
+{ \
173
+ int i, oprsz = simd_oprsz(desc); \
174
+ for (i = 0; i < oprsz; ) { \
175
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
176
+ do { \
177
+ if (pg & 1) { \
178
+ *(TYPE *)(za + tile_vslice_offset(i)) = *(TYPE *)(vn + H(i)); \
179
+ } \
180
+ i += sizeof(TYPE); \
181
+ pg >>= sizeof(TYPE); \
182
+ } while (i & 15); \
183
+ } \
184
+}
185
+
186
+DO_MOVA_C(sme_mova_cz_b, uint8_t, H1)
187
+DO_MOVA_C(sme_mova_cz_h, uint16_t, H1_2)
188
+DO_MOVA_C(sme_mova_cz_s, uint32_t, H1_4)
189
+
190
+void HELPER(sme_mova_cz_d)(void *za, void *vn, void *vg, uint32_t desc)
191
+{
192
+ int i, oprsz = simd_oprsz(desc) / 8;
193
+ uint8_t *pg = vg;
194
+ uint64_t *n = vn;
195
+ uint64_t *a = za;
196
+
197
+ for (i = 0; i < oprsz; i++) {
198
+ if (pg[H1(i)] & 1) {
199
+ a[tile_vslice_index(i)] = n[i];
200
+ }
201
+ }
202
+}
203
+
204
+void HELPER(sme_mova_cz_q)(void *za, void *vn, void *vg, uint32_t desc)
205
+{
206
+ int i, oprsz = simd_oprsz(desc) / 16;
207
+ uint16_t *pg = vg;
208
+ Int128 *n = vn;
209
+ Int128 *a = za;
210
+
211
+ /*
138
+ /*
212
+ * Int128 is used here simply to copy 16 bytes, and to simplify
139
+ * Place default_nan_pattern [6:0] into bits [62:56],
213
+ * the address arithmetic.
140
+ * and replecate bit [0] down into [55:0]
214
+ */
141
+ */
215
+ for (i = 0; i < oprsz; i++) {
142
+ frac = deposit64(0, DECOMPOSED_BINARY_POINT - 7, 7, dnan_pattern);
216
+ if (pg[H2(i)] & 1) {
143
+ frac = deposit64(frac, 0, DECOMPOSED_BINARY_POINT - 7, -(dnan_pattern & 1));
217
+ a[tile_vslice_index(i)] = n[i];
144
218
+ }
145
*p = (FloatParts64) {
219
+ }
146
.cls = float_class_qnan,
220
+}
221
+
222
+#undef DO_MOVA_C
223
+
224
+/*
225
+ * Move ZArray column to Zreg vector.
226
+ */
227
+#define DO_MOVA_Z(NAME, TYPE, H) \
228
+void HELPER(NAME)(void *vd, void *za, void *vg, uint32_t desc) \
229
+{ \
230
+ int i, oprsz = simd_oprsz(desc); \
231
+ for (i = 0; i < oprsz; ) { \
232
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
233
+ do { \
234
+ if (pg & 1) { \
235
+ *(TYPE *)(vd + H(i)) = *(TYPE *)(za + tile_vslice_offset(i)); \
236
+ } \
237
+ i += sizeof(TYPE); \
238
+ pg >>= sizeof(TYPE); \
239
+ } while (i & 15); \
240
+ } \
241
+}
242
+
243
+DO_MOVA_Z(sme_mova_zc_b, uint8_t, H1)
244
+DO_MOVA_Z(sme_mova_zc_h, uint16_t, H1_2)
245
+DO_MOVA_Z(sme_mova_zc_s, uint32_t, H1_4)
246
+
247
+void HELPER(sme_mova_zc_d)(void *vd, void *za, void *vg, uint32_t desc)
248
+{
249
+ int i, oprsz = simd_oprsz(desc) / 8;
250
+ uint8_t *pg = vg;
251
+ uint64_t *d = vd;
252
+ uint64_t *a = za;
253
+
254
+ for (i = 0; i < oprsz; i++) {
255
+ if (pg[H1(i)] & 1) {
256
+ d[i] = a[tile_vslice_index(i)];
257
+ }
258
+ }
259
+}
260
+
261
+void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc)
262
+{
263
+ int i, oprsz = simd_oprsz(desc) / 16;
264
+ uint16_t *pg = vg;
265
+ Int128 *d = vd;
266
+ Int128 *a = za;
267
+
268
+ /*
269
+ * Int128 is used here simply to copy 16 bytes, and to simplify
270
+ * the address arithmetic.
271
+ */
272
+ for (i = 0; i < oprsz; i++, za += sizeof(ARMVectorReg)) {
273
+ if (pg[H2(i)] & 1) {
274
+ d[i] = a[tile_vslice_index(i)];
275
+ }
276
+ }
277
+}
278
+
279
+#undef DO_MOVA_Z
280
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
281
index XXXXXXX..XXXXXXX 100644
282
--- a/target/arm/sve_helper.c
283
+++ b/target/arm/sve_helper.c
284
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm,
285
}
286
}
287
288
+void HELPER(sve_sel_zpzz_q)(void *vd, void *vn, void *vm,
289
+ void *vg, uint32_t desc)
290
+{
291
+ intptr_t i, opr_sz = simd_oprsz(desc) / 16;
292
+ Int128 *d = vd, *n = vn, *m = vm;
293
+ uint16_t *pg = vg;
294
+
295
+ for (i = 0; i < opr_sz; i += 1) {
296
+ d[i] = (pg[H2(i)] & 1 ? n : m)[i];
297
+ }
298
+}
299
+
300
/* Two operand comparison controlled by a predicate.
301
* ??? It is very tempting to want to be able to expand this inline
302
* with x86 instructions, e.g.
303
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
304
index XXXXXXX..XXXXXXX 100644
305
--- a/target/arm/translate-sme.c
306
+++ b/target/arm/translate-sme.c
307
@@ -XXX,XX +XXX,XX @@
308
#include "decode-sme.c.inc"
309
310
311
+/*
312
+ * Resolve tile.size[index] to a host pointer, where tile and index
313
+ * are always decoded together, dependent on the element size.
314
+ */
315
+static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
316
+ int tile_index, bool vertical)
317
+{
318
+ int tile = tile_index >> (4 - esz);
319
+ int index = esz == MO_128 ? 0 : extract32(tile_index, 0, 4 - esz);
320
+ int pos, len, offset;
321
+ TCGv_i32 tmp;
322
+ TCGv_ptr addr;
323
+
324
+ /* Compute the final index, which is Rs+imm. */
325
+ tmp = tcg_temp_new_i32();
326
+ tcg_gen_trunc_tl_i32(tmp, cpu_reg(s, rs));
327
+ tcg_gen_addi_i32(tmp, tmp, index);
328
+
329
+ /* Prepare a power-of-two modulo via extraction of @len bits. */
330
+ len = ctz32(streaming_vec_reg_size(s)) - esz;
331
+
332
+ if (vertical) {
333
+ /*
334
+ * Compute the byte offset of the index within the tile:
335
+ * (index % (svl / size)) * size
336
+ * = (index % (svl >> esz)) << esz
337
+ * Perform the power-of-two modulo via extraction of the low @len bits.
338
+ * Perform the multiply by shifting left by @pos bits.
339
+ * Perform these operations simultaneously via deposit into zero.
340
+ */
341
+ pos = esz;
342
+ tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
343
+
344
+ /*
345
+ * For big-endian, adjust the indexed column byte offset within
346
+ * the uint64_t host words that make up env->zarray[].
347
+ */
348
+ if (HOST_BIG_ENDIAN && esz < MO_64) {
349
+ tcg_gen_xori_i32(tmp, tmp, 8 - (1 << esz));
350
+ }
351
+ } else {
352
+ /*
353
+ * Compute the byte offset of the index within the tile:
354
+ * (index % (svl / size)) * (size * sizeof(row))
355
+ * = (index % (svl >> esz)) << (esz + log2(sizeof(row)))
356
+ */
357
+ pos = esz + ctz32(sizeof(ARMVectorReg));
358
+ tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
359
+
360
+ /* Row slices are always aligned and need no endian adjustment. */
361
+ }
362
+
363
+ /* The tile byte offset within env->zarray is the row. */
364
+ offset = tile * sizeof(ARMVectorReg);
365
+
366
+ /* Include the byte offset of zarray to make this relative to env. */
367
+ offset += offsetof(CPUARMState, zarray);
368
+ tcg_gen_addi_i32(tmp, tmp, offset);
369
+
370
+ /* Add the byte offset to env to produce the final pointer. */
371
+ addr = tcg_temp_new_ptr();
372
+ tcg_gen_ext_i32_ptr(addr, tmp);
373
+ tcg_temp_free_i32(tmp);
374
+ tcg_gen_add_ptr(addr, addr, cpu_env);
375
+
376
+ return addr;
377
+}
378
+
379
static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
380
{
381
if (!dc_isar_feature(aa64_sme, s)) {
382
@@ -XXX,XX +XXX,XX @@ static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
383
}
384
return true;
385
}
386
+
387
+static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
388
+{
389
+ static gen_helper_gvec_4 * const h_fns[5] = {
390
+ gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
391
+ gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d,
392
+ gen_helper_sve_sel_zpzz_q
393
+ };
394
+ static gen_helper_gvec_3 * const cz_fns[5] = {
395
+ gen_helper_sme_mova_cz_b, gen_helper_sme_mova_cz_h,
396
+ gen_helper_sme_mova_cz_s, gen_helper_sme_mova_cz_d,
397
+ gen_helper_sme_mova_cz_q,
398
+ };
399
+ static gen_helper_gvec_3 * const zc_fns[5] = {
400
+ gen_helper_sme_mova_zc_b, gen_helper_sme_mova_zc_h,
401
+ gen_helper_sme_mova_zc_s, gen_helper_sme_mova_zc_d,
402
+ gen_helper_sme_mova_zc_q,
403
+ };
404
+
405
+ TCGv_ptr t_za, t_zr, t_pg;
406
+ TCGv_i32 t_desc;
407
+ int svl;
408
+
409
+ if (!dc_isar_feature(aa64_sme, s)) {
410
+ return false;
411
+ }
412
+ if (!sme_smza_enabled_check(s)) {
413
+ return true;
414
+ }
415
+
416
+ t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
417
+ t_zr = vec_full_reg_ptr(s, a->zr);
418
+ t_pg = pred_full_reg_ptr(s, a->pg);
419
+
420
+ svl = streaming_vec_reg_size(s);
421
+ t_desc = tcg_constant_i32(simd_desc(svl, svl, 0));
422
+
423
+ if (a->v) {
424
+ /* Vertical slice -- use sme mova helpers. */
425
+ if (a->to_vec) {
426
+ zc_fns[a->esz](t_zr, t_za, t_pg, t_desc);
427
+ } else {
428
+ cz_fns[a->esz](t_za, t_zr, t_pg, t_desc);
429
+ }
430
+ } else {
431
+ /* Horizontal slice -- reuse sve sel helpers. */
432
+ if (a->to_vec) {
433
+ h_fns[a->esz](t_zr, t_za, t_zr, t_pg, t_desc);
434
+ } else {
435
+ h_fns[a->esz](t_za, t_zr, t_za, t_pg, t_desc);
436
+ }
437
+ }
438
+
439
+ tcg_temp_free_ptr(t_za);
440
+ tcg_temp_free_ptr(t_zr);
441
+ tcg_temp_free_ptr(t_pg);
442
+
443
+ return true;
444
+}
445
--
147
--
446
2.25.1
148
2.34.1
diff view generated by jsdifflib
New patch
1
Set the default NaN pattern explicitly for the tests/fp code.
1
2
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20241202131347.498124-36-peter.maydell@linaro.org
6
---
7
tests/fp/fp-bench.c | 1 +
8
tests/fp/fp-test-log2.c | 1 +
9
tests/fp/fp-test.c | 1 +
10
3 files changed, 3 insertions(+)
11
12
diff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tests/fp/fp-bench.c
15
+++ b/tests/fp/fp-bench.c
16
@@ -XXX,XX +XXX,XX @@ static void run_bench(void)
17
set_float_2nan_prop_rule(float_2nan_prop_s_ab, &soft_status);
18
set_float_3nan_prop_rule(float_3nan_prop_s_cab, &soft_status);
19
set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, &soft_status);
20
+ set_float_default_nan_pattern(0b01000000, &soft_status);
21
22
f = bench_funcs[operation][precision];
23
g_assert(f);
24
diff --git a/tests/fp/fp-test-log2.c b/tests/fp/fp-test-log2.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/tests/fp/fp-test-log2.c
27
+++ b/tests/fp/fp-test-log2.c
28
@@ -XXX,XX +XXX,XX @@ int main(int ac, char **av)
29
int i;
30
31
set_float_2nan_prop_rule(float_2nan_prop_s_ab, &qsf);
32
+ set_float_default_nan_pattern(0b01000000, &qsf);
33
set_float_rounding_mode(float_round_nearest_even, &qsf);
34
35
test.d = 0.0;
36
diff --git a/tests/fp/fp-test.c b/tests/fp/fp-test.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/tests/fp/fp-test.c
39
+++ b/tests/fp/fp-test.c
40
@@ -XXX,XX +XXX,XX @@ void run_test(void)
41
*/
42
set_float_2nan_prop_rule(float_2nan_prop_s_ab, &qsf);
43
set_float_3nan_prop_rule(float_3nan_prop_s_cab, &qsf);
44
+ set_float_default_nan_pattern(0b01000000, &qsf);
45
set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, &qsf);
46
47
genCases_setLevel(test_level);
48
--
49
2.34.1
diff view generated by jsdifflib
New patch
1
Set the default NaN pattern explicitly, and remove the ifdef from
2
parts64_default_nan().
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20241202131347.498124-37-peter.maydell@linaro.org
7
---
8
target/microblaze/cpu.c | 2 ++
9
fpu/softfloat-specialize.c.inc | 3 +--
10
2 files changed, 3 insertions(+), 2 deletions(-)
11
12
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/microblaze/cpu.c
15
+++ b/target/microblaze/cpu.c
16
@@ -XXX,XX +XXX,XX @@ static void mb_cpu_reset_hold(Object *obj, ResetType type)
17
* this architecture.
18
*/
19
set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status);
20
+ /* Default NaN: sign bit set, most significant frac bit set */
21
+ set_float_default_nan_pattern(0b11000000, &env->fp_status);
22
23
#if defined(CONFIG_USER_ONLY)
24
/* start in user mode with interrupts enabled. */
25
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
26
index XXXXXXX..XXXXXXX 100644
27
--- a/fpu/softfloat-specialize.c.inc
28
+++ b/fpu/softfloat-specialize.c.inc
29
@@ -XXX,XX +XXX,XX @@ static void parts64_default_nan(FloatParts64 *p, float_status *status)
30
#if defined(TARGET_SPARC) || defined(TARGET_M68K)
31
/* Sign bit clear, all frac bits set */
32
dnan_pattern = 0b01111111;
33
-#elif defined(TARGET_I386) || defined(TARGET_X86_64) \
34
- || defined(TARGET_MICROBLAZE)
35
+#elif defined(TARGET_I386) || defined(TARGET_X86_64)
36
/* Sign bit set, most significant frac bit set */
37
dnan_pattern = 0b11000000;
38
#elif defined(TARGET_HPPA)
39
--
40
2.34.1
diff view generated by jsdifflib
New patch
1
Set the default NaN pattern explicitly, and remove the ifdef from
2
parts64_default_nan().
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20241202131347.498124-38-peter.maydell@linaro.org
7
---
8
target/i386/tcg/fpu_helper.c | 4 ++++
9
fpu/softfloat-specialize.c.inc | 3 ---
10
2 files changed, 4 insertions(+), 3 deletions(-)
11
12
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/i386/tcg/fpu_helper.c
15
+++ b/target/i386/tcg/fpu_helper.c
16
@@ -XXX,XX +XXX,XX @@ void cpu_init_fp_statuses(CPUX86State *env)
17
*/
18
set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->sse_status);
19
set_float_3nan_prop_rule(float_3nan_prop_abc, &env->sse_status);
20
+ /* Default NaN: sign bit set, most significant frac bit set */
21
+ set_float_default_nan_pattern(0b11000000, &env->fp_status);
22
+ set_float_default_nan_pattern(0b11000000, &env->mmx_status);
23
+ set_float_default_nan_pattern(0b11000000, &env->sse_status);
24
}
25
26
static inline uint8_t save_exception_flags(CPUX86State *env)
27
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
28
index XXXXXXX..XXXXXXX 100644
29
--- a/fpu/softfloat-specialize.c.inc
30
+++ b/fpu/softfloat-specialize.c.inc
31
@@ -XXX,XX +XXX,XX @@ static void parts64_default_nan(FloatParts64 *p, float_status *status)
32
#if defined(TARGET_SPARC) || defined(TARGET_M68K)
33
/* Sign bit clear, all frac bits set */
34
dnan_pattern = 0b01111111;
35
-#elif defined(TARGET_I386) || defined(TARGET_X86_64)
36
- /* Sign bit set, most significant frac bit set */
37
- dnan_pattern = 0b11000000;
38
#elif defined(TARGET_HPPA)
39
/* Sign bit clear, msb-1 frac bit set */
40
dnan_pattern = 0b00100000;
41
--
42
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Set the default NaN pattern explicitly, and remove the ifdef from
2
parts64_default_nan().
2
3
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-15-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20241202131347.498124-39-peter.maydell@linaro.org
10
---
7
---
11
target/arm/sme-fa64.decode | 3 ---
8
target/hppa/fpu_helper.c | 2 ++
12
target/arm/translate-sve.c | 2 ++
9
fpu/softfloat-specialize.c.inc | 3 ---
13
2 files changed, 2 insertions(+), 3 deletions(-)
10
2 files changed, 2 insertions(+), 3 deletions(-)
14
11
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
12
diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c
16
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
14
--- a/target/hppa/fpu_helper.c
18
+++ b/target/arm/sme-fa64.decode
15
+++ b/target/hppa/fpu_helper.c
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
16
@@ -XXX,XX +XXX,XX @@ void HELPER(loaded_fr0)(CPUHPPAState *env)
20
# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm)
17
set_float_3nan_prop_rule(float_3nan_prop_abc, &env->fp_status);
21
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
18
/* For inf * 0 + NaN, return the input NaN */
22
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
19
set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status);
23
-
20
+ /* Default NaN: sign bit clear, msb-1 frac bit set */
24
-FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
21
+ set_float_default_nan_pattern(0b00100000, &env->fp_status);
25
-FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
22
}
26
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
23
24
void cpu_hppa_loaded_fr0(CPUHPPAState *env)
25
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
27
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
28
--- a/target/arm/translate-sve.c
27
--- a/fpu/softfloat-specialize.c.inc
29
+++ b/target/arm/translate-sve.c
28
+++ b/fpu/softfloat-specialize.c.inc
30
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
29
@@ -XXX,XX +XXX,XX @@ static void parts64_default_nan(FloatParts64 *p, float_status *status)
31
if (a->rm == 31) {
30
#if defined(TARGET_SPARC) || defined(TARGET_M68K)
32
return false;
31
/* Sign bit clear, all frac bits set */
33
}
32
dnan_pattern = 0b01111111;
34
+ s->is_nonstreaming = true;
33
-#elif defined(TARGET_HPPA)
35
if (sve_access_check(s)) {
34
- /* Sign bit clear, msb-1 frac bit set */
36
TCGv_i64 addr = new_tmp_a64(s);
35
- dnan_pattern = 0b00100000;
37
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
36
#elif defined(TARGET_HEXAGON)
38
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
37
/* Sign bit set, all frac bits set. */
39
if (!dc_isar_feature(aa64_sve_f64mm, s)) {
38
dnan_pattern = 0b11111111;
40
return false;
41
}
42
+ s->is_nonstreaming = true;
43
if (sve_access_check(s)) {
44
TCGv_i64 addr = new_tmp_a64(s);
45
tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
46
--
39
--
47
2.25.1
40
2.34.1
diff view generated by jsdifflib
New patch
1
Set the default NaN pattern explicitly for the alpha target.
1
2
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20241202131347.498124-40-peter.maydell@linaro.org
6
---
7
target/alpha/cpu.c | 2 ++
8
1 file changed, 2 insertions(+)
9
10
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/alpha/cpu.c
13
+++ b/target/alpha/cpu.c
14
@@ -XXX,XX +XXX,XX @@ static void alpha_cpu_initfn(Object *obj)
15
* operand in Fa. That is float_2nan_prop_ba.
16
*/
17
set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status);
18
+ /* Default NaN: sign bit clear, msb frac bit set */
19
+ set_float_default_nan_pattern(0b01000000, &env->fp_status);
20
#if defined(CONFIG_USER_ONLY)
21
env->flags = ENV_FLAG_PS_USER | ENV_FLAG_FEN;
22
cpu_alpha_store_fpcr(env, (uint64_t)(FPCR_INVD | FPCR_DZED | FPCR_OVFD
23
--
24
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Set the default NaN pattern explicitly for the arm target.
2
This includes setting it for the old linux-user nwfpe emulation.
3
For nwfpe, our default doesn't match the real kernel, but we
4
avoid making a behaviour change in this commit.
2
5
3
Dump SVCR, plus use the correct access check for Streaming Mode.
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20241202131347.498124-41-peter.maydell@linaro.org
9
---
10
linux-user/arm/nwfpe/fpa11.c | 5 +++++
11
target/arm/cpu.c | 2 ++
12
2 files changed, 7 insertions(+)
4
13
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
14
diff --git a/linux-user/arm/nwfpe/fpa11.c b/linux-user/arm/nwfpe/fpa11.c
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
index XXXXXXX..XXXXXXX 100644
7
Message-id: 20220708151540.18136-2-richard.henderson@linaro.org
16
--- a/linux-user/arm/nwfpe/fpa11.c
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
+++ b/linux-user/arm/nwfpe/fpa11.c
9
---
18
@@ -XXX,XX +XXX,XX @@ void resetFPA11(void)
10
target/arm/cpu.c | 17 ++++++++++++++++-
19
* this late date.
11
1 file changed, 16 insertions(+), 1 deletion(-)
20
*/
12
21
set_float_2nan_prop_rule(float_2nan_prop_s_ab, &fpa11->fp_status);
22
+ /*
23
+ * Use the same default NaN value as Arm VFP. This doesn't match
24
+ * the Linux kernel's nwfpe emulation, which uses an all-1s value.
25
+ */
26
+ set_float_default_nan_pattern(0b01000000, &fpa11->fp_status);
27
}
28
29
void SetRoundingMode(const unsigned int opcode)
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
30
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
14
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/cpu.c
32
--- a/target/arm/cpu.c
16
+++ b/target/arm/cpu.c
33
+++ b/target/arm/cpu.c
17
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
34
@@ -XXX,XX +XXX,XX @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
18
int i;
35
* the pseudocode function the arguments are in the order c, a, b.
19
int el = arm_current_el(env);
36
* * 0 * Inf + NaN returns the default NaN if the input NaN is quiet,
20
const char *ns_status;
37
* and the input NaN if it is signalling
21
+ bool sve;
38
+ * * Default NaN has sign bit clear, msb frac bit set
22
39
*/
23
qemu_fprintf(f, " PC=%016" PRIx64 " ", env->pc);
40
static void arm_set_default_fp_behaviours(float_status *s)
24
for (i = 0; i < 32; i++) {
41
{
25
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
42
@@ -XXX,XX +XXX,XX @@ static void arm_set_default_fp_behaviours(float_status *s)
26
el,
43
set_float_2nan_prop_rule(float_2nan_prop_s_ab, s);
27
psr & PSTATE_SP ? 'h' : 't');
44
set_float_3nan_prop_rule(float_3nan_prop_s_cab, s);
28
45
set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s);
29
+ if (cpu_isar_feature(aa64_sme, cpu)) {
46
+ set_float_default_nan_pattern(0b01000000, s);
30
+ qemu_fprintf(f, " SVCR=%08" PRIx64 " %c%c",
47
}
31
+ env->svcr,
48
32
+ (FIELD_EX64(env->svcr, SVCR, ZA) ? 'Z' : '-'),
49
static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque)
33
+ (FIELD_EX64(env->svcr, SVCR, SM) ? 'S' : '-'));
34
+ }
35
if (cpu_isar_feature(aa64_bti, cpu)) {
36
qemu_fprintf(f, " BTYPE=%d", (psr & PSTATE_BTYPE) >> 10);
37
}
38
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
39
qemu_fprintf(f, " FPCR=%08x FPSR=%08x\n",
40
vfp_get_fpcr(env), vfp_get_fpsr(env));
41
42
- if (cpu_isar_feature(aa64_sve, cpu) && sve_exception_el(env, el) == 0) {
43
+ if (cpu_isar_feature(aa64_sme, cpu) && FIELD_EX64(env->svcr, SVCR, SM)) {
44
+ sve = sme_exception_el(env, el) == 0;
45
+ } else if (cpu_isar_feature(aa64_sve, cpu)) {
46
+ sve = sve_exception_el(env, el) == 0;
47
+ } else {
48
+ sve = false;
49
+ }
50
+
51
+ if (sve) {
52
int j, zcr_len = sve_vqm1_for_el(env, el);
53
54
for (i = 0; i <= FFR_PRED_NUM; i++) {
55
--
50
--
56
2.25.1
51
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Set the default NaN pattern explicitly for loongarch.
2
2
3
In parse_user_sigframe, the kernel rejects duplicate sve records,
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
or records that are smaller than the header. We were silently
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
allowing these cases to pass, dropping the record.
5
Message-id: 20241202131347.498124-42-peter.maydell@linaro.org
6
---
7
target/loongarch/tcg/fpu_helper.c | 2 ++
8
1 file changed, 2 insertions(+)
6
9
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
diff --git a/target/loongarch/tcg/fpu_helper.c b/target/loongarch/tcg/fpu_helper.c
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-38-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
linux-user/aarch64/signal.c | 5 ++++-
13
1 file changed, 4 insertions(+), 1 deletion(-)
14
15
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
16
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
17
--- a/linux-user/aarch64/signal.c
12
--- a/target/loongarch/tcg/fpu_helper.c
18
+++ b/linux-user/aarch64/signal.c
13
+++ b/target/loongarch/tcg/fpu_helper.c
19
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
14
@@ -XXX,XX +XXX,XX @@ void restore_fp_status(CPULoongArchState *env)
20
break;
15
*/
21
16
set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status);
22
case TARGET_SVE_MAGIC:
17
set_float_3nan_prop_rule(float_3nan_prop_s_cab, &env->fp_status);
23
+ if (sve || size < sizeof(struct target_sve_context)) {
18
+ /* Default NaN: sign bit clear, msb frac bit set */
24
+ goto err;
19
+ set_float_default_nan_pattern(0b01000000, &env->fp_status);
25
+ }
20
}
26
if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
21
27
vq = sve_vq(env);
22
int ieee_ex_to_loongarch(int xcpt)
28
sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
29
- if (!sve && size == sve_size) {
30
+ if (size == sve_size) {
31
sve = (struct target_sve_context *)ctx;
32
break;
33
}
34
--
23
--
35
2.25.1
24
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Set the default NaN pattern explicitly for m68k.
2
2
3
Make sure to zero the currently reserved fields.
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20241202131347.498124-43-peter.maydell@linaro.org
6
---
7
target/m68k/cpu.c | 2 ++
8
fpu/softfloat-specialize.c.inc | 2 +-
9
2 files changed, 3 insertions(+), 1 deletion(-)
4
10
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-36-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
10
linux-user/aarch64/signal.c | 9 ++++++++-
11
1 file changed, 8 insertions(+), 1 deletion(-)
12
13
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
14
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
15
--- a/linux-user/aarch64/signal.c
13
--- a/target/m68k/cpu.c
16
+++ b/linux-user/aarch64/signal.c
14
+++ b/target/m68k/cpu.c
17
@@ -XXX,XX +XXX,XX @@ struct target_extra_context {
15
@@ -XXX,XX +XXX,XX @@ static void m68k_cpu_reset_hold(Object *obj, ResetType type)
18
struct target_sve_context {
16
* preceding paragraph for nonsignaling NaNs.
19
struct target_aarch64_ctx head;
17
*/
20
uint16_t vl;
18
set_float_2nan_prop_rule(float_2nan_prop_ab, &env->fp_status);
21
- uint16_t reserved[3];
19
+ /* Default NaN: sign bit clear, all frac bits set */
22
+ uint16_t flags;
20
+ set_float_default_nan_pattern(0b01111111, &env->fp_status);
23
+ uint16_t reserved[2];
21
24
/* The actual SVE data immediately follows. It is laid out
22
nan = floatx80_default_nan(&env->fp_status);
25
* according to TARGET_SVE_SIG_{Z,P}REG_OFFSET, based off of
23
for (i = 0; i < 8; i++) {
26
* the original struct pointer.
24
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
27
@@ -XXX,XX +XXX,XX @@ struct target_sve_context {
25
index XXXXXXX..XXXXXXX 100644
28
#define TARGET_SVE_SIG_CONTEXT_SIZE(VQ) \
26
--- a/fpu/softfloat-specialize.c.inc
29
(TARGET_SVE_SIG_PREG_OFFSET(VQ, 17))
27
+++ b/fpu/softfloat-specialize.c.inc
30
28
@@ -XXX,XX +XXX,XX @@ static void parts64_default_nan(FloatParts64 *p, float_status *status)
31
+#define TARGET_SVE_SIG_FLAG_SM 1
29
uint8_t dnan_pattern = status->default_nan_pattern;
32
+
30
33
struct target_rt_sigframe {
31
if (dnan_pattern == 0) {
34
struct target_siginfo info;
32
-#if defined(TARGET_SPARC) || defined(TARGET_M68K)
35
struct target_ucontext uc;
33
+#if defined(TARGET_SPARC)
36
@@ -XXX,XX +XXX,XX @@ static void target_setup_sve_record(struct target_sve_context *sve,
34
/* Sign bit clear, all frac bits set */
37
{
35
dnan_pattern = 0b01111111;
38
int i, j;
36
#elif defined(TARGET_HEXAGON)
39
40
+ memset(sve, 0, sizeof(*sve));
41
__put_user(TARGET_SVE_MAGIC, &sve->head.magic);
42
__put_user(size, &sve->head.size);
43
__put_user(vq * TARGET_SVE_VQ_BYTES, &sve->vl);
44
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
45
+ __put_user(TARGET_SVE_SIG_FLAG_SM, &sve->flags);
46
+ }
47
48
/* Note that SVE regs are stored as a byte stream, with each byte element
49
* at a subsequent address. This corresponds to a little-endian store
50
--
37
--
51
2.25.1
38
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Set the default NaN pattern explicitly for MIPS. Note that this
2
is our only target which currently changes the default NaN
3
at runtime (which it was previously doing indirectly when it
4
changed the snan_bit_is_one setting).
2
5
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-46-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20241202131347.498124-44-peter.maydell@linaro.org
7
---
9
---
8
linux-user/elfload.c | 20 ++++++++++++++++++++
10
target/mips/fpu_helper.h | 7 +++++++
9
1 file changed, 20 insertions(+)
11
target/mips/msa.c | 3 +++
12
2 files changed, 10 insertions(+)
10
13
11
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
14
diff --git a/target/mips/fpu_helper.h b/target/mips/fpu_helper.h
12
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
13
--- a/linux-user/elfload.c
16
--- a/target/mips/fpu_helper.h
14
+++ b/linux-user/elfload.c
17
+++ b/target/mips/fpu_helper.h
15
@@ -XXX,XX +XXX,XX @@ enum {
18
@@ -XXX,XX +XXX,XX @@ static inline void restore_snan_bit_mode(CPUMIPSState *env)
16
ARM_HWCAP2_A64_RNG = 1 << 16,
19
set_float_infzeronan_rule(izn_rule, &env->active_fpu.fp_status);
17
ARM_HWCAP2_A64_BTI = 1 << 17,
20
nan3_rule = nan2008 ? float_3nan_prop_s_cab : float_3nan_prop_s_abc;
18
ARM_HWCAP2_A64_MTE = 1 << 18,
21
set_float_3nan_prop_rule(nan3_rule, &env->active_fpu.fp_status);
19
+ ARM_HWCAP2_A64_ECV = 1 << 19,
22
+ /*
20
+ ARM_HWCAP2_A64_AFP = 1 << 20,
23
+ * With nan2008, the default NaN value has the sign bit clear and the
21
+ ARM_HWCAP2_A64_RPRES = 1 << 21,
24
+ * frac msb set; with the older mode, the sign bit is clear, and all
22
+ ARM_HWCAP2_A64_MTE3 = 1 << 22,
25
+ * frac bits except the msb are set.
23
+ ARM_HWCAP2_A64_SME = 1 << 23,
26
+ */
24
+ ARM_HWCAP2_A64_SME_I16I64 = 1 << 24,
27
+ set_float_default_nan_pattern(nan2008 ? 0b01000000 : 0b00111111,
25
+ ARM_HWCAP2_A64_SME_F64F64 = 1 << 25,
28
+ &env->active_fpu.fp_status);
26
+ ARM_HWCAP2_A64_SME_I8I32 = 1 << 26,
29
27
+ ARM_HWCAP2_A64_SME_F16F32 = 1 << 27,
30
}
28
+ ARM_HWCAP2_A64_SME_B16F32 = 1 << 28,
31
29
+ ARM_HWCAP2_A64_SME_F32F32 = 1 << 29,
32
diff --git a/target/mips/msa.c b/target/mips/msa.c
30
+ ARM_HWCAP2_A64_SME_FA64 = 1 << 30,
33
index XXXXXXX..XXXXXXX 100644
31
};
34
--- a/target/mips/msa.c
32
35
+++ b/target/mips/msa.c
33
#define ELF_HWCAP get_elf_hwcap()
36
@@ -XXX,XX +XXX,XX @@ void msa_reset(CPUMIPSState *env)
34
@@ -XXX,XX +XXX,XX @@ static uint32_t get_elf_hwcap2(void)
37
/* Inf * 0 + NaN returns the input NaN */
35
GET_FEATURE_ID(aa64_rndr, ARM_HWCAP2_A64_RNG);
38
set_float_infzeronan_rule(float_infzeronan_dnan_never,
36
GET_FEATURE_ID(aa64_bti, ARM_HWCAP2_A64_BTI);
39
&env->active_tc.msa_fp_status);
37
GET_FEATURE_ID(aa64_mte, ARM_HWCAP2_A64_MTE);
40
+ /* Default NaN: sign bit clear, frac msb set */
38
+ GET_FEATURE_ID(aa64_sme, (ARM_HWCAP2_A64_SME |
41
+ set_float_default_nan_pattern(0b01000000,
39
+ ARM_HWCAP2_A64_SME_F32F32 |
42
+ &env->active_tc.msa_fp_status);
40
+ ARM_HWCAP2_A64_SME_B16F32 |
41
+ ARM_HWCAP2_A64_SME_F16F32 |
42
+ ARM_HWCAP2_A64_SME_I8I32));
43
+ GET_FEATURE_ID(aa64_sme_f64f64, ARM_HWCAP2_A64_SME_F64F64);
44
+ GET_FEATURE_ID(aa64_sme_i16i64, ARM_HWCAP2_A64_SME_I16I64);
45
+ GET_FEATURE_ID(aa64_sme_fa64, ARM_HWCAP2_A64_SME_FA64);
46
47
return hwcaps;
48
}
43
}
49
--
44
--
50
2.25.1
45
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Set the default NaN pattern explicitly for openrisc.
2
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-35-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20241202131347.498124-45-peter.maydell@linaro.org
7
---
6
---
8
linux-user/aarch64/cpu_loop.c | 9 +++++++++
7
target/openrisc/cpu.c | 2 ++
9
1 file changed, 9 insertions(+)
8
1 file changed, 2 insertions(+)
10
9
11
diff --git a/linux-user/aarch64/cpu_loop.c b/linux-user/aarch64/cpu_loop.c
10
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/linux-user/aarch64/cpu_loop.c
12
--- a/target/openrisc/cpu.c
14
+++ b/linux-user/aarch64/cpu_loop.c
13
+++ b/target/openrisc/cpu.c
15
@@ -XXX,XX +XXX,XX @@ void cpu_loop(CPUARMState *env)
14
@@ -XXX,XX +XXX,XX @@ static void openrisc_cpu_reset_hold(Object *obj, ResetType type)
16
15
*/
17
switch (trapnr) {
16
set_float_2nan_prop_rule(float_2nan_prop_x87, &cpu->env.fp_status);
18
case EXCP_SWI:
17
19
+ /*
18
+ /* Default NaN: sign bit clear, frac msb set */
20
+ * On syscall, PSTATE.ZA is preserved, along with the ZA matrix.
19
+ set_float_default_nan_pattern(0b01000000, &cpu->env.fp_status);
21
+ * PSTATE.SM is cleared, per SMSTOP, which does ResetSVEState.
20
22
+ */
21
#ifndef CONFIG_USER_ONLY
23
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
22
cpu->env.picmr = 0x00000000;
24
+ env->svcr = FIELD_DP64(env->svcr, SVCR, SM, 0);
25
+ arm_rebuild_hflags(env);
26
+ arm_reset_sve_state(env);
27
+ }
28
ret = do_syscall(env,
29
env->xregs[8],
30
env->xregs[0],
31
--
23
--
32
2.25.1
24
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Set the default NaN pattern explicitly for ppc.
2
2
3
Note that SME remains effectively disabled for user-only,
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
because we do not yet set CPACR_EL1.SMEN. This needs to
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
wait until the kernel ABI is implemented.
5
Message-id: 20241202131347.498124-46-peter.maydell@linaro.org
6
---
7
target/ppc/cpu_init.c | 4 ++++
8
1 file changed, 4 insertions(+)
6
9
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-33-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
docs/system/arm/emulation.rst | 4 ++++
13
target/arm/cpu64.c | 11 +++++++++++
14
2 files changed, 15 insertions(+)
15
16
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
17
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
18
--- a/docs/system/arm/emulation.rst
12
--- a/target/ppc/cpu_init.c
19
+++ b/docs/system/arm/emulation.rst
13
+++ b/target/ppc/cpu_init.c
20
@@ -XXX,XX +XXX,XX @@ the following architecture extensions:
14
@@ -XXX,XX +XXX,XX @@ static void ppc_cpu_reset_hold(Object *obj, ResetType type)
21
- FEAT_SHA512 (Advanced SIMD SHA512 instructions)
15
set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status);
22
- FEAT_SM3 (Advanced SIMD SM3 instructions)
16
set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->vec_status);
23
- FEAT_SM4 (Advanced SIMD SM4 instructions)
17
24
+- FEAT_SME (Scalable Matrix Extension)
18
+ /* Default NaN: sign bit clear, set frac msb */
25
+- FEAT_SME_FA64 (Full A64 instruction set in Streaming SVE mode)
19
+ set_float_default_nan_pattern(0b01000000, &env->fp_status);
26
+- FEAT_SME_F64F64 (Double-precision floating-point outer product instructions)
20
+ set_float_default_nan_pattern(0b01000000, &env->vec_status);
27
+- FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions)
28
- FEAT_SPECRES (Speculation restriction instructions)
29
- FEAT_SSBS (Speculative Store Bypass Safe)
30
- FEAT_TLBIOS (TLB invalidate instructions in Outer Shareable domain)
31
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/target/arm/cpu64.c
34
+++ b/target/arm/cpu64.c
35
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
36
*/
37
t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3); /* FEAT_MTE3 */
38
t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 0); /* FEAT_RASv1p1 + FEAT_DoubleFault */
39
+ t = FIELD_DP64(t, ID_AA64PFR1, SME, 1); /* FEAT_SME */
40
t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_2 */
41
cpu->isar.id_aa64pfr1 = t;
42
43
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
44
t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 5); /* FEAT_PMUv3p4 */
45
cpu->isar.id_aa64dfr0 = t;
46
47
+ t = cpu->isar.id_aa64smfr0;
48
+ t = FIELD_DP64(t, ID_AA64SMFR0, F32F32, 1); /* FEAT_SME */
49
+ t = FIELD_DP64(t, ID_AA64SMFR0, B16F32, 1); /* FEAT_SME */
50
+ t = FIELD_DP64(t, ID_AA64SMFR0, F16F32, 1); /* FEAT_SME */
51
+ t = FIELD_DP64(t, ID_AA64SMFR0, I8I32, 0xf); /* FEAT_SME */
52
+ t = FIELD_DP64(t, ID_AA64SMFR0, F64F64, 1); /* FEAT_SME_F64F64 */
53
+ t = FIELD_DP64(t, ID_AA64SMFR0, I16I64, 0xf); /* FEAT_SME_I16I64 */
54
+ t = FIELD_DP64(t, ID_AA64SMFR0, FA64, 1); /* FEAT_SME_FA64 */
55
+ cpu->isar.id_aa64smfr0 = t;
56
+
21
+
57
/* Replicate the same data to the 32-bit id registers. */
22
for (i = 0; i < ARRAY_SIZE(env->spr_cb); i++) {
58
aa32_max_features(cpu);
23
ppc_spr_t *spr = &env->spr_cb[i];
59
24
60
--
25
--
61
2.25.1
26
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Set the default NaN pattern explicitly for sh4. Note that sh4
2
is one of the only three targets (the others being HPPA and
3
sometimes MIPS) that has snan_bit_is_one set.
2
4
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-34-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20241202131347.498124-47-peter.maydell@linaro.org
7
---
8
---
8
linux-user/aarch64/target_cpu.h | 5 ++++-
9
target/sh4/cpu.c | 2 ++
9
1 file changed, 4 insertions(+), 1 deletion(-)
10
1 file changed, 2 insertions(+)
10
11
11
diff --git a/linux-user/aarch64/target_cpu.h b/linux-user/aarch64/target_cpu.h
12
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/linux-user/aarch64/target_cpu.h
14
--- a/target/sh4/cpu.c
14
+++ b/linux-user/aarch64/target_cpu.h
15
+++ b/target/sh4/cpu.c
15
@@ -XXX,XX +XXX,XX @@ static inline void cpu_clone_regs_parent(CPUARMState *env, unsigned flags)
16
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_reset_hold(Object *obj, ResetType type)
16
17
set_flush_to_zero(1, &env->fp_status);
17
static inline void cpu_set_tls(CPUARMState *env, target_ulong newtls)
18
#endif
18
{
19
set_default_nan_mode(1, &env->fp_status);
19
- /* Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is
20
+ /* sign bit clear, set all frac bits other than msb */
20
+ /*
21
+ set_float_default_nan_pattern(0b00111111, &env->fp_status);
21
+ * Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is
22
* different from AArch32 Linux, which uses TPIDRRO.
23
*/
24
env->cp15.tpidr_el[0] = newtls;
25
+ /* TPIDR2_EL0 is cleared with CLONE_SETTLS. */
26
+ env->cp15.tpidr2_el0 = 0;
27
}
22
}
28
23
29
static inline abi_ulong get_sp_from_cpustate(CPUARMState *state)
24
static void superh_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
30
--
25
--
31
2.25.1
26
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Set the default NaN pattern explicitly for rx.
2
2
3
Mark these as a non-streaming instructions, which should trap if full
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
a64 support is not enabled in streaming mode. In this case, introduce
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
PRF_ns (prefetch non-streaming) to handle the checks.
5
Message-id: 20241202131347.498124-48-peter.maydell@linaro.org
6
---
7
target/rx/cpu.c | 2 ++
8
1 file changed, 2 insertions(+)
6
9
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-13-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
target/arm/sme-fa64.decode | 3 ---
13
target/arm/sve.decode | 10 +++++-----
14
target/arm/translate-sve.c | 11 +++++++++++
15
3 files changed, 16 insertions(+), 8 deletions(-)
16
17
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
18
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/sme-fa64.decode
12
--- a/target/rx/cpu.c
20
+++ b/target/arm/sme-fa64.decode
13
+++ b/target/rx/cpu.c
21
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
14
@@ -XXX,XX +XXX,XX @@ static void rx_cpu_reset_hold(Object *obj, ResetType type)
22
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
15
* then prefer dest over source", which is float_2nan_prop_s_ab.
23
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
16
*/
24
17
set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status);
25
-FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
18
+ /* Default NaN value: sign bit clear, set frac msb */
26
-FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
19
+ set_float_default_nan_pattern(0b01000000, &env->fp_status);
27
FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
28
FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
29
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
30
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
31
-FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
32
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/sve.decode
35
+++ b/target/arm/sve.decode
36
@@ -XXX,XX +XXX,XX @@ LD1RO_zpri 1010010 .. 01 0.... 001 ... ..... ..... \
37
@rpri_load_msz nreg=0
38
39
# SVE 32-bit gather prefetch (scalar plus 32-bit scaled offsets)
40
-PRF 1000010 00 -1 ----- 0-- --- ----- 0 ----
41
+PRF_ns 1000010 00 -1 ----- 0-- --- ----- 0 ----
42
43
# SVE 32-bit gather prefetch (vector plus immediate)
44
-PRF 1000010 -- 00 ----- 111 --- ----- 0 ----
45
+PRF_ns 1000010 -- 00 ----- 111 --- ----- 0 ----
46
47
# SVE contiguous prefetch (scalar plus immediate)
48
PRF 1000010 11 1- ----- 0-- --- ----- 0 ----
49
@@ -XXX,XX +XXX,XX @@ LD1_zpiz 1100010 .. 01 ..... 1.. ... ..... ..... \
50
@rpri_g_load esz=3
51
52
# SVE 64-bit gather prefetch (scalar plus 64-bit scaled offsets)
53
-PRF 1100010 00 11 ----- 1-- --- ----- 0 ----
54
+PRF_ns 1100010 00 11 ----- 1-- --- ----- 0 ----
55
56
# SVE 64-bit gather prefetch (scalar plus unpacked 32-bit scaled offsets)
57
-PRF 1100010 00 -1 ----- 0-- --- ----- 0 ----
58
+PRF_ns 1100010 00 -1 ----- 0-- --- ----- 0 ----
59
60
# SVE 64-bit gather prefetch (vector plus immediate)
61
-PRF 1100010 -- 00 ----- 111 --- ----- 0 ----
62
+PRF_ns 1100010 -- 00 ----- 111 --- ----- 0 ----
63
64
### SVE Memory Store Group
65
66
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/target/arm/translate-sve.c
69
+++ b/target/arm/translate-sve.c
70
@@ -XXX,XX +XXX,XX @@ static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
71
return true;
72
}
20
}
73
21
74
+static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a)
22
static ObjectClass *rx_cpu_class_by_name(const char *cpu_model)
75
+{
76
+ if (!dc_isar_feature(aa64_sve, s)) {
77
+ return false;
78
+ }
79
+ /* Prefetch is a nop within QEMU. */
80
+ s->is_nonstreaming = true;
81
+ (void)sve_access_check(s);
82
+ return true;
83
+}
84
+
85
/*
86
* Move Prefix
87
*
88
--
23
--
89
2.25.1
24
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Set the default NaN pattern explicitly for s390x.
2
2
3
This is an SVE instruction that operates using the SVE vector
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
length but that it is present only if SME is implemented.
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20241202131347.498124-49-peter.maydell@linaro.org
6
---
7
target/s390x/cpu.c | 2 ++
8
1 file changed, 2 insertions(+)
5
9
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-31-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/helper.h | 18 +++++++
12
target/arm/sve.decode | 5 ++
13
target/arm/translate-sve.c | 102 +++++++++++++++++++++++++++++++++++++
14
target/arm/vec_helper.c | 24 +++++++++
15
4 files changed, 149 insertions(+)
16
17
diff --git a/target/arm/helper.h b/target/arm/helper.h
18
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper.h
12
--- a/target/s390x/cpu.c
20
+++ b/target/arm/helper.h
13
+++ b/target/s390x/cpu.c
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG,
14
@@ -XXX,XX +XXX,XX @@ static void s390_cpu_reset_hold(Object *obj, ResetType type)
22
DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG,
15
set_float_3nan_prop_rule(float_3nan_prop_s_abc, &env->fpu_status);
23
void, ptr, ptr, ptr, ptr, ptr, i32)
16
set_float_infzeronan_rule(float_infzeronan_dnan_always,
24
17
&env->fpu_status);
25
+DEF_HELPER_FLAGS_5(gvec_sclamp_b, TCG_CALL_NO_RWG,
18
+ /* Default NaN value: sign bit clear, frac msb set */
26
+ void, ptr, ptr, ptr, ptr, i32)
19
+ set_float_default_nan_pattern(0b01000000, &env->fpu_status);
27
+DEF_HELPER_FLAGS_5(gvec_sclamp_h, TCG_CALL_NO_RWG,
20
/* fall through */
28
+ void, ptr, ptr, ptr, ptr, i32)
21
case RESET_TYPE_S390_CPU_NORMAL:
29
+DEF_HELPER_FLAGS_5(gvec_sclamp_s, TCG_CALL_NO_RWG,
22
env->psw.mask &= ~PSW_MASK_RI;
30
+ void, ptr, ptr, ptr, ptr, i32)
31
+DEF_HELPER_FLAGS_5(gvec_sclamp_d, TCG_CALL_NO_RWG,
32
+ void, ptr, ptr, ptr, ptr, i32)
33
+
34
+DEF_HELPER_FLAGS_5(gvec_uclamp_b, TCG_CALL_NO_RWG,
35
+ void, ptr, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_5(gvec_uclamp_h, TCG_CALL_NO_RWG,
37
+ void, ptr, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG,
39
+ void, ptr, ptr, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG,
41
+ void, ptr, ptr, ptr, ptr, i32)
42
+
43
#ifdef TARGET_AARCH64
44
#include "helper-a64.h"
45
#include "helper-sve.h"
46
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/sve.decode
49
+++ b/target/arm/sve.decode
50
@@ -XXX,XX +XXX,XX @@ PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \
51
@psel esz=2 imm=%psel_imm_s
52
PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \
53
@psel esz=3 imm=%psel_imm_d
54
+
55
+### SVE clamp
56
+
57
+SCLAMP 01000100 .. 0 ..... 110000 ..... ..... @rda_rn_rm
58
+UCLAMP 01000100 .. 0 ..... 110001 ..... ..... @rda_rn_rm
59
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
60
index XXXXXXX..XXXXXXX 100644
61
--- a/target/arm/translate-sve.c
62
+++ b/target/arm/translate-sve.c
63
@@ -XXX,XX +XXX,XX @@ static bool trans_PSEL(DisasContext *s, arg_psel *a)
64
tcg_temp_free_ptr(ptr);
65
return true;
66
}
67
+
68
+static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
69
+{
70
+ tcg_gen_smax_i32(d, a, n);
71
+ tcg_gen_smin_i32(d, d, m);
72
+}
73
+
74
+static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
75
+{
76
+ tcg_gen_smax_i64(d, a, n);
77
+ tcg_gen_smin_i64(d, d, m);
78
+}
79
+
80
+static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
81
+ TCGv_vec m, TCGv_vec a)
82
+{
83
+ tcg_gen_smax_vec(vece, d, a, n);
84
+ tcg_gen_smin_vec(vece, d, d, m);
85
+}
86
+
87
+static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
88
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
89
+{
90
+ static const TCGOpcode vecop[] = {
91
+ INDEX_op_smin_vec, INDEX_op_smax_vec, 0
92
+ };
93
+ static const GVecGen4 ops[4] = {
94
+ { .fniv = gen_sclamp_vec,
95
+ .fno = gen_helper_gvec_sclamp_b,
96
+ .opt_opc = vecop,
97
+ .vece = MO_8 },
98
+ { .fniv = gen_sclamp_vec,
99
+ .fno = gen_helper_gvec_sclamp_h,
100
+ .opt_opc = vecop,
101
+ .vece = MO_16 },
102
+ { .fni4 = gen_sclamp_i32,
103
+ .fniv = gen_sclamp_vec,
104
+ .fno = gen_helper_gvec_sclamp_s,
105
+ .opt_opc = vecop,
106
+ .vece = MO_32 },
107
+ { .fni8 = gen_sclamp_i64,
108
+ .fniv = gen_sclamp_vec,
109
+ .fno = gen_helper_gvec_sclamp_d,
110
+ .opt_opc = vecop,
111
+ .vece = MO_64,
112
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
113
+ };
114
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
115
+}
116
+
117
+TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a)
118
+
119
+static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
120
+{
121
+ tcg_gen_umax_i32(d, a, n);
122
+ tcg_gen_umin_i32(d, d, m);
123
+}
124
+
125
+static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
126
+{
127
+ tcg_gen_umax_i64(d, a, n);
128
+ tcg_gen_umin_i64(d, d, m);
129
+}
130
+
131
+static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
132
+ TCGv_vec m, TCGv_vec a)
133
+{
134
+ tcg_gen_umax_vec(vece, d, a, n);
135
+ tcg_gen_umin_vec(vece, d, d, m);
136
+}
137
+
138
+static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
139
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
140
+{
141
+ static const TCGOpcode vecop[] = {
142
+ INDEX_op_umin_vec, INDEX_op_umax_vec, 0
143
+ };
144
+ static const GVecGen4 ops[4] = {
145
+ { .fniv = gen_uclamp_vec,
146
+ .fno = gen_helper_gvec_uclamp_b,
147
+ .opt_opc = vecop,
148
+ .vece = MO_8 },
149
+ { .fniv = gen_uclamp_vec,
150
+ .fno = gen_helper_gvec_uclamp_h,
151
+ .opt_opc = vecop,
152
+ .vece = MO_16 },
153
+ { .fni4 = gen_uclamp_i32,
154
+ .fniv = gen_uclamp_vec,
155
+ .fno = gen_helper_gvec_uclamp_s,
156
+ .opt_opc = vecop,
157
+ .vece = MO_32 },
158
+ { .fni8 = gen_uclamp_i64,
159
+ .fniv = gen_uclamp_vec,
160
+ .fno = gen_helper_gvec_uclamp_d,
161
+ .opt_opc = vecop,
162
+ .vece = MO_64,
163
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
164
+ };
165
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
166
+}
167
+
168
+TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a)
169
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
170
index XXXXXXX..XXXXXXX 100644
171
--- a/target/arm/vec_helper.c
172
+++ b/target/arm/vec_helper.c
173
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm,
174
}
175
clear_tail(d, opr_sz, simd_maxsz(desc));
176
}
177
+
178
+#define DO_CLAMP(NAME, TYPE) \
179
+void HELPER(NAME)(void *d, void *n, void *m, void *a, uint32_t desc) \
180
+{ \
181
+ intptr_t i, opr_sz = simd_oprsz(desc); \
182
+ for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
183
+ TYPE aa = *(TYPE *)(a + i); \
184
+ TYPE nn = *(TYPE *)(n + i); \
185
+ TYPE mm = *(TYPE *)(m + i); \
186
+ TYPE dd = MIN(MAX(aa, nn), mm); \
187
+ *(TYPE *)(d + i) = dd; \
188
+ } \
189
+ clear_tail(d, opr_sz, simd_maxsz(desc)); \
190
+}
191
+
192
+DO_CLAMP(gvec_sclamp_b, int8_t)
193
+DO_CLAMP(gvec_sclamp_h, int16_t)
194
+DO_CLAMP(gvec_sclamp_s, int32_t)
195
+DO_CLAMP(gvec_sclamp_d, int64_t)
196
+
197
+DO_CLAMP(gvec_uclamp_b, uint8_t)
198
+DO_CLAMP(gvec_uclamp_h, uint16_t)
199
+DO_CLAMP(gvec_uclamp_s, uint32_t)
200
+DO_CLAMP(gvec_uclamp_d, uint64_t)
201
--
23
--
202
2.25.1
24
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Set the default NaN pattern explicitly for SPARC, and remove
2
the ifdef from parts64_default_nan.
2
3
3
This is SMOPA, SUMOPA, USMOPA_s, UMOPA, for both Int8 and Int16.
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20241202131347.498124-50-peter.maydell@linaro.org
7
---
8
target/sparc/cpu.c | 2 ++
9
fpu/softfloat-specialize.c.inc | 5 +----
10
2 files changed, 3 insertions(+), 4 deletions(-)
4
11
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-28-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
10
target/arm/helper-sme.h | 16 ++++++++
11
target/arm/sme.decode | 10 +++++
12
target/arm/sme_helper.c | 82 ++++++++++++++++++++++++++++++++++++++
13
target/arm/translate-sme.c | 10 +++++
14
4 files changed, 118 insertions(+)
15
16
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
17
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/helper-sme.h
14
--- a/target/sparc/cpu.c
19
+++ b/target/arm/helper-sme.h
15
+++ b/target/sparc/cpu.c
20
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
16
@@ -XXX,XX +XXX,XX @@ static void sparc_cpu_realizefn(DeviceState *dev, Error **errp)
21
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
17
set_float_3nan_prop_rule(float_3nan_prop_s_cba, &env->fp_status);
22
DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG,
18
/* For inf * 0 + NaN, return the input NaN */
23
void, ptr, ptr, ptr, ptr, ptr, i32)
19
set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status);
24
+DEF_HELPER_FLAGS_6(sme_smopa_s, TCG_CALL_NO_RWG,
20
+ /* Default NaN value: sign bit clear, all frac bits set */
25
+ void, ptr, ptr, ptr, ptr, ptr, i32)
21
+ set_float_default_nan_pattern(0b01111111, &env->fp_status);
26
+DEF_HELPER_FLAGS_6(sme_umopa_s, TCG_CALL_NO_RWG,
22
27
+ void, ptr, ptr, ptr, ptr, ptr, i32)
23
cpu_exec_realizefn(cs, &local_err);
28
+DEF_HELPER_FLAGS_6(sme_sumopa_s, TCG_CALL_NO_RWG,
24
if (local_err != NULL) {
29
+ void, ptr, ptr, ptr, ptr, ptr, i32)
25
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
30
+DEF_HELPER_FLAGS_6(sme_usmopa_s, TCG_CALL_NO_RWG,
31
+ void, ptr, ptr, ptr, ptr, ptr, i32)
32
+DEF_HELPER_FLAGS_6(sme_smopa_d, TCG_CALL_NO_RWG,
33
+ void, ptr, ptr, ptr, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_6(sme_umopa_d, TCG_CALL_NO_RWG,
35
+ void, ptr, ptr, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_6(sme_sumopa_d, TCG_CALL_NO_RWG,
37
+ void, ptr, ptr, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_6(sme_usmopa_d, TCG_CALL_NO_RWG,
39
+ void, ptr, ptr, ptr, ptr, ptr, i32)
40
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
41
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
42
--- a/target/arm/sme.decode
27
--- a/fpu/softfloat-specialize.c.inc
43
+++ b/target/arm/sme.decode
28
+++ b/fpu/softfloat-specialize.c.inc
44
@@ -XXX,XX +XXX,XX @@ FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
29
@@ -XXX,XX +XXX,XX @@ static void parts64_default_nan(FloatParts64 *p, float_status *status)
45
30
uint8_t dnan_pattern = status->default_nan_pattern;
46
BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
31
47
FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32
32
if (dnan_pattern == 0) {
48
+
33
-#if defined(TARGET_SPARC)
49
+SMOPA_s 1010000 0 10 0 ..... ... ... ..... . 00 .. @op_32
34
- /* Sign bit clear, all frac bits set */
50
+SUMOPA_s 1010000 0 10 1 ..... ... ... ..... . 00 .. @op_32
35
- dnan_pattern = 0b01111111;
51
+USMOPA_s 1010000 1 10 0 ..... ... ... ..... . 00 .. @op_32
36
-#elif defined(TARGET_HEXAGON)
52
+UMOPA_s 1010000 1 10 1 ..... ... ... ..... . 00 .. @op_32
37
+#if defined(TARGET_HEXAGON)
53
+
38
/* Sign bit set, all frac bits set. */
54
+SMOPA_d 1010000 0 11 0 ..... ... ... ..... . 0 ... @op_64
39
dnan_pattern = 0b11111111;
55
+SUMOPA_d 1010000 0 11 1 ..... ... ... ..... . 0 ... @op_64
40
#else
56
+USMOPA_d 1010000 1 11 0 ..... ... ... ..... . 0 ... @op_64
57
+UMOPA_d 1010000 1 11 1 ..... ... ... ..... . 0 ... @op_64
58
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/target/arm/sme_helper.c
61
+++ b/target/arm/sme_helper.c
62
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
63
} while (row & 15);
64
}
65
}
66
+
67
+typedef uint64_t IMOPFn(uint64_t, uint64_t, uint64_t, uint8_t, bool);
68
+
69
+static inline void do_imopa(uint64_t *za, uint64_t *zn, uint64_t *zm,
70
+ uint8_t *pn, uint8_t *pm,
71
+ uint32_t desc, IMOPFn *fn)
72
+{
73
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
74
+ bool neg = simd_data(desc);
75
+
76
+ for (row = 0; row < oprsz; ++row) {
77
+ uint8_t pa = pn[H1(row)];
78
+ uint64_t *za_row = &za[tile_vslice_index(row)];
79
+ uint64_t n = zn[row];
80
+
81
+ for (col = 0; col < oprsz; ++col) {
82
+ uint8_t pb = pm[H1(col)];
83
+ uint64_t *a = &za_row[col];
84
+
85
+ *a = fn(n, zm[col], *a, pa & pb, neg);
86
+ }
87
+ }
88
+}
89
+
90
+#define DEF_IMOP_32(NAME, NTYPE, MTYPE) \
91
+static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
92
+{ \
93
+ uint32_t sum0 = 0, sum1 = 0; \
94
+ /* Apply P to N as a mask, making the inactive elements 0. */ \
95
+ n &= expand_pred_b(p); \
96
+ sum0 += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
97
+ sum0 += (NTYPE)(n >> 8) * (MTYPE)(m >> 8); \
98
+ sum0 += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
99
+ sum0 += (NTYPE)(n >> 24) * (MTYPE)(m >> 24); \
100
+ sum1 += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
101
+ sum1 += (NTYPE)(n >> 40) * (MTYPE)(m >> 40); \
102
+ sum1 += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
103
+ sum1 += (NTYPE)(n >> 56) * (MTYPE)(m >> 56); \
104
+ if (neg) { \
105
+ sum0 = (uint32_t)a - sum0, sum1 = (uint32_t)(a >> 32) - sum1; \
106
+ } else { \
107
+ sum0 = (uint32_t)a + sum0, sum1 = (uint32_t)(a >> 32) + sum1; \
108
+ } \
109
+ return ((uint64_t)sum1 << 32) | sum0; \
110
+}
111
+
112
+#define DEF_IMOP_64(NAME, NTYPE, MTYPE) \
113
+static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
114
+{ \
115
+ uint64_t sum = 0; \
116
+ /* Apply P to N as a mask, making the inactive elements 0. */ \
117
+ n &= expand_pred_h(p); \
118
+ sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
119
+ sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
120
+ sum += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
121
+ sum += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
122
+ return neg ? a - sum : a + sum; \
123
+}
124
+
125
+DEF_IMOP_32(smopa_s, int8_t, int8_t)
126
+DEF_IMOP_32(umopa_s, uint8_t, uint8_t)
127
+DEF_IMOP_32(sumopa_s, int8_t, uint8_t)
128
+DEF_IMOP_32(usmopa_s, uint8_t, int8_t)
129
+
130
+DEF_IMOP_64(smopa_d, int16_t, int16_t)
131
+DEF_IMOP_64(umopa_d, uint16_t, uint16_t)
132
+DEF_IMOP_64(sumopa_d, int16_t, uint16_t)
133
+DEF_IMOP_64(usmopa_d, uint16_t, int16_t)
134
+
135
+#define DEF_IMOPH(NAME) \
136
+ void HELPER(sme_##NAME)(void *vza, void *vzn, void *vzm, void *vpn, \
137
+ void *vpm, uint32_t desc) \
138
+ { do_imopa(vza, vzn, vzm, vpn, vpm, desc, NAME); }
139
+
140
+DEF_IMOPH(smopa_s)
141
+DEF_IMOPH(umopa_s)
142
+DEF_IMOPH(sumopa_s)
143
+DEF_IMOPH(usmopa_s)
144
+DEF_IMOPH(smopa_d)
145
+DEF_IMOPH(umopa_d)
146
+DEF_IMOPH(sumopa_d)
147
+DEF_IMOPH(usmopa_d)
148
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
149
index XXXXXXX..XXXXXXX 100644
150
--- a/target/arm/translate-sme.c
151
+++ b/target/arm/translate-sme.c
152
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_f
153
154
/* TODO: FEAT_EBF16 */
155
TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
156
+
157
+TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s)
158
+TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s)
159
+TRANS_FEAT(SUMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_sumopa_s)
160
+TRANS_FEAT(USMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_usmopa_s)
161
+
162
+TRANS_FEAT(SMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_smopa_d)
163
+TRANS_FEAT(UMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_umopa_d)
164
+TRANS_FEAT(SUMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_sumopa_d)
165
+TRANS_FEAT(USMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_usmopa_d)
166
--
41
--
167
2.25.1
42
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Set the default NaN pattern explicitly for xtensa.
2
2
3
Mark ADR as a non-streaming instruction, which should trap
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
if full a64 support is not enabled in streaming mode.
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20241202131347.498124-51-peter.maydell@linaro.org
6
---
7
target/xtensa/cpu.c | 2 ++
8
1 file changed, 2 insertions(+)
5
9
6
Removing entries from sme-fa64.decode is an easy way to see
10
diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
7
what remains to be done.
8
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20220708151540.18136-5-richard.henderson@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
14
target/arm/translate.h | 7 +++++++
15
target/arm/sme-fa64.decode | 1 -
16
target/arm/translate-sve.c | 8 ++++----
17
3 files changed, 11 insertions(+), 5 deletions(-)
18
19
diff --git a/target/arm/translate.h b/target/arm/translate.h
20
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/translate.h
12
--- a/target/xtensa/cpu.c
22
+++ b/target/arm/translate.h
13
+++ b/target/xtensa/cpu.c
23
@@ -XXX,XX +XXX,XX @@ uint64_t asimd_imm_const(uint32_t imm, int cmode, int op);
14
@@ -XXX,XX +XXX,XX @@ static void xtensa_cpu_reset_hold(Object *obj, ResetType type)
24
static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
15
/* For inf * 0 + NaN, return the input NaN */
25
{ return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); }
16
set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status);
26
17
set_no_signaling_nans(!dfpu, &env->fp_status);
27
+#define TRANS_FEAT_NONSTREAMING(NAME, FEAT, FUNC, ...) \
18
+ /* Default NaN value: sign bit clear, set frac msb */
28
+ static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
19
+ set_float_default_nan_pattern(0b01000000, &env->fp_status);
29
+ { \
20
xtensa_use_first_nan(env, !dfpu);
30
+ s->is_nonstreaming = true; \
31
+ return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); \
32
+ }
33
+
34
#endif /* TARGET_ARM_TRANSLATE_H */
35
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
36
index XXXXXXX..XXXXXXX 100644
37
--- a/target/arm/sme-fa64.decode
38
+++ b/target/arm/sme-fa64.decode
39
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
40
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
41
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
42
43
-FAIL 0000 0100 --1- ---- 1010 ---- ---- ---- # ADR
44
FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
45
FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
46
FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
47
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/translate-sve.c
50
+++ b/target/arm/translate-sve.c
51
@@ -XXX,XX +XXX,XX @@ static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
52
return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
53
}
21
}
54
22
55
-TRANS_FEAT(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
56
-TRANS_FEAT(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
57
-TRANS_FEAT(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
58
-TRANS_FEAT(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
59
+TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
60
+TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
61
+TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
62
+TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
63
64
/*
65
*** SVE Integer Misc - Unpredicated Group
66
--
23
--
67
2.25.1
24
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Set the default NaN pattern explicitly for hexagon.
2
Remove the ifdef from parts64_default_nan(); the only
3
remaining unconverted targets all use the default case.
2
4
3
This is an SVE instruction that operates using the SVE vector
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
length but that it is present only if SME is implemented.
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20241202131347.498124-52-peter.maydell@linaro.org
8
---
9
target/hexagon/cpu.c | 2 ++
10
fpu/softfloat-specialize.c.inc | 5 -----
11
2 files changed, 2 insertions(+), 5 deletions(-)
5
12
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
13
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-30-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/helper-sve.h | 2 ++
12
target/arm/sve.decode | 1 +
13
target/arm/sve_helper.c | 16 ++++++++++++++++
14
target/arm/translate-sve.c | 2 ++
15
4 files changed, 21 insertions(+)
16
17
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
18
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper-sve.h
15
--- a/target/hexagon/cpu.c
20
+++ b/target/arm/helper-sve.h
16
+++ b/target/hexagon/cpu.c
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_revh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
17
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_reset_hold(Object *obj, ResetType type)
22
18
23
DEF_HELPER_FLAGS_4(sve_revw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
19
set_default_nan_mode(1, &env->fp_status);
24
20
set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status);
25
+DEF_HELPER_FLAGS_4(sme_revd_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
21
+ /* Default NaN value: sign bit set, all frac bits set */
26
+
22
+ set_float_default_nan_pattern(0b11111111, &env->fp_status);
27
DEF_HELPER_FLAGS_4(sve_rbit_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
23
}
28
DEF_HELPER_FLAGS_4(sve_rbit_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
24
29
DEF_HELPER_FLAGS_4(sve_rbit_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
25
static void hexagon_cpu_disas_set_info(CPUState *s, disassemble_info *info)
30
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
26
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
31
index XXXXXXX..XXXXXXX 100644
27
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/sve.decode
28
--- a/fpu/softfloat-specialize.c.inc
33
+++ b/target/arm/sve.decode
29
+++ b/fpu/softfloat-specialize.c.inc
34
@@ -XXX,XX +XXX,XX @@ REVB 00000101 .. 1001 00 100 ... ..... ..... @rd_pg_rn
30
@@ -XXX,XX +XXX,XX @@ static void parts64_default_nan(FloatParts64 *p, float_status *status)
35
REVH 00000101 .. 1001 01 100 ... ..... ..... @rd_pg_rn
31
uint8_t dnan_pattern = status->default_nan_pattern;
36
REVW 00000101 .. 1001 10 100 ... ..... ..... @rd_pg_rn
32
37
RBIT 00000101 .. 1001 11 100 ... ..... ..... @rd_pg_rn
33
if (dnan_pattern == 0) {
38
+REVD 00000101 00 1011 10 100 ... ..... ..... @rd_pg_rn_e0
34
-#if defined(TARGET_HEXAGON)
39
35
- /* Sign bit set, all frac bits set. */
40
# SVE vector splice (predicated, destructive)
36
- dnan_pattern = 0b11111111;
41
SPLICE 00000101 .. 101 100 100 ... ..... ..... @rdn_pg_rm
37
-#else
42
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
38
/*
43
index XXXXXXX..XXXXXXX 100644
39
* This case is true for Alpha, ARM, MIPS, OpenRISC, PPC, RISC-V,
44
--- a/target/arm/sve_helper.c
40
* S390, SH4, TriCore, and Xtensa. Our other supported targets
45
+++ b/target/arm/sve_helper.c
41
@@ -XXX,XX +XXX,XX @@ static void parts64_default_nan(FloatParts64 *p, float_status *status)
46
@@ -XXX,XX +XXX,XX @@ DO_ZPZ_D(sve_revh_d, uint64_t, hswap64)
42
/* sign bit clear, set frac msb */
47
43
dnan_pattern = 0b01000000;
48
DO_ZPZ_D(sve_revw_d, uint64_t, wswap64)
44
}
49
45
-#endif
50
+void HELPER(sme_revd_q)(void *vd, void *vn, void *vg, uint32_t desc)
46
}
51
+{
47
assert(dnan_pattern != 0);
52
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
53
+ uint64_t *d = vd, *n = vn;
54
+ uint8_t *pg = vg;
55
+
56
+ for (i = 0; i < opr_sz; i += 2) {
57
+ if (pg[H1(i)] & 1) {
58
+ uint64_t n0 = n[i + 0];
59
+ uint64_t n1 = n[i + 1];
60
+ d[i + 0] = n1;
61
+ d[i + 1] = n0;
62
+ }
63
+ }
64
+}
65
+
66
DO_ZPZ(sve_rbit_b, uint8_t, H1, revbit8)
67
DO_ZPZ(sve_rbit_h, uint16_t, H1_2, revbit16)
68
DO_ZPZ(sve_rbit_s, uint32_t, H1_4, revbit32)
69
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/target/arm/translate-sve.c
72
+++ b/target/arm/translate-sve.c
73
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
74
TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
75
a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
76
77
+TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0)
78
+
79
TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
80
gen_helper_sve_splice, a, a->esz)
81
48
82
--
49
--
83
2.25.1
50
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Set the default NaN pattern explicitly for riscv.
2
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220708151540.18136-25-richard.henderson@linaro.org
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20241202131347.498124-53-peter.maydell@linaro.org
7
---
6
---
8
target/arm/helper-sme.h | 5 +++
7
target/riscv/cpu.c | 2 ++
9
target/arm/sme.decode | 9 +++++
8
1 file changed, 2 insertions(+)
10
target/arm/sme_helper.c | 69 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 32 ++++++++++++++++++
12
4 files changed, 115 insertions(+)
13
9
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
10
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
15
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-sme.h
12
--- a/target/riscv/cpu.c
17
+++ b/target/arm/helper-sme.h
13
+++ b/target/riscv/cpu.c
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
14
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_reset_hold(Object *obj, ResetType type)
19
DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
15
cs->exception_index = RISCV_EXCP_NONE;
20
DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
16
env->load_res = -1;
21
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
17
set_default_nan_mode(1, &env->fp_status);
22
+
18
+ /* Default NaN value: sign bit clear, frac msb set */
23
+DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
19
+ set_float_default_nan_pattern(0b01000000, &env->fp_status);
24
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
20
env->vill = true;
25
+DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
21
26
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
22
#ifndef CONFIG_USER_ONLY
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/sme.decode
30
+++ b/target/arm/sme.decode
31
@@ -XXX,XX +XXX,XX @@ ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32
32
ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32
33
ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64
34
ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
35
+
36
+### SME Outer Product
37
+
38
+&op zad zn zm pm pn sub:bool
39
+@op_32 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 .. zad:2 &op
40
+@op_64 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 . zad:3 &op
41
+
42
+FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
43
+FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
44
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/target/arm/sme_helper.c
47
+++ b/target/arm/sme_helper.c
48
@@ -XXX,XX +XXX,XX @@
49
#include "exec/cpu_ldst.h"
50
#include "exec/exec-all.h"
51
#include "qemu/int128.h"
52
+#include "fpu/softfloat.h"
53
#include "vec_internal.h"
54
#include "sve_ldst_internal.h"
55
56
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
57
}
58
}
59
}
60
+
61
+void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn,
62
+ void *vpm, void *vst, uint32_t desc)
63
+{
64
+ intptr_t row, col, oprsz = simd_maxsz(desc);
65
+ uint32_t neg = simd_data(desc) << 31;
66
+ uint16_t *pn = vpn, *pm = vpm;
67
+ float_status fpst;
68
+
69
+ /*
70
+ * Make a copy of float_status because this operation does not
71
+ * update the cumulative fp exception status. It also produces
72
+ * default nans.
73
+ */
74
+ fpst = *(float_status *)vst;
75
+ set_default_nan_mode(true, &fpst);
76
+
77
+ for (row = 0; row < oprsz; ) {
78
+ uint16_t pa = pn[H2(row >> 4)];
79
+ do {
80
+ if (pa & 1) {
81
+ void *vza_row = vza + tile_vslice_offset(row);
82
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row)) ^ neg;
83
+
84
+ for (col = 0; col < oprsz; ) {
85
+ uint16_t pb = pm[H2(col >> 4)];
86
+ do {
87
+ if (pb & 1) {
88
+ uint32_t *a = vza_row + H1_4(col);
89
+ uint32_t *m = vzm + H1_4(col);
90
+ *a = float32_muladd(n, *m, *a, 0, vst);
91
+ }
92
+ col += 4;
93
+ pb >>= 4;
94
+ } while (col & 15);
95
+ }
96
+ }
97
+ row += 4;
98
+ pa >>= 4;
99
+ } while (row & 15);
100
+ }
101
+}
102
+
103
+void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn,
104
+ void *vpm, void *vst, uint32_t desc)
105
+{
106
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
107
+ uint64_t neg = (uint64_t)simd_data(desc) << 63;
108
+ uint64_t *za = vza, *zn = vzn, *zm = vzm;
109
+ uint8_t *pn = vpn, *pm = vpm;
110
+ float_status fpst = *(float_status *)vst;
111
+
112
+ set_default_nan_mode(true, &fpst);
113
+
114
+ for (row = 0; row < oprsz; ++row) {
115
+ if (pn[H1(row)] & 1) {
116
+ uint64_t *za_row = &za[tile_vslice_index(row)];
117
+ uint64_t n = zn[row] ^ neg;
118
+
119
+ for (col = 0; col < oprsz; ++col) {
120
+ if (pm[H1(col)] & 1) {
121
+ uint64_t *a = &za_row[col];
122
+ *a = float64_muladd(n, zm[col], *a, 0, &fpst);
123
+ }
124
+ }
125
+ }
126
+ }
127
+}
128
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
129
index XXXXXXX..XXXXXXX 100644
130
--- a/target/arm/translate-sme.c
131
+++ b/target/arm/translate-sme.c
132
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
133
TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
134
TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
135
TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
136
+
137
+static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
138
+ gen_helper_gvec_5_ptr *fn)
139
+{
140
+ int svl = streaming_vec_reg_size(s);
141
+ uint32_t desc = simd_desc(svl, svl, a->sub);
142
+ TCGv_ptr za, zn, zm, pn, pm, fpst;
143
+
144
+ if (!sme_smza_enabled_check(s)) {
145
+ return true;
146
+ }
147
+
148
+ /* Sum XZR+zad to find ZAd. */
149
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
150
+ zn = vec_full_reg_ptr(s, a->zn);
151
+ zm = vec_full_reg_ptr(s, a->zm);
152
+ pn = pred_full_reg_ptr(s, a->pn);
153
+ pm = pred_full_reg_ptr(s, a->pm);
154
+ fpst = fpstatus_ptr(FPST_FPCR);
155
+
156
+ fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc));
157
+
158
+ tcg_temp_free_ptr(za);
159
+ tcg_temp_free_ptr(zn);
160
+ tcg_temp_free_ptr(pn);
161
+ tcg_temp_free_ptr(pm);
162
+ tcg_temp_free_ptr(fpst);
163
+ return true;
164
+}
165
+
166
+TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
167
+TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
168
--
23
--
169
2.25.1
24
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Set the default NaN pattern explicitly for tricore.
2
2
3
Mark these as a non-streaming instructions, which should trap
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
if full a64 support is not enabled in streaming mode.
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20241202131347.498124-54-peter.maydell@linaro.org
6
---
7
target/tricore/helper.c | 2 ++
8
1 file changed, 2 insertions(+)
5
9
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
diff --git a/target/tricore/helper.c b/target/tricore/helper.c
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-6-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 2 --
12
target/arm/translate-sve.c | 9 ++++++---
13
2 files changed, 6 insertions(+), 5 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
12
--- a/target/tricore/helper.c
18
+++ b/target/arm/sme-fa64.decode
13
+++ b/target/tricore/helper.c
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
14
@@ -XXX,XX +XXX,XX @@ void fpu_set_state(CPUTriCoreState *env)
20
15
set_flush_to_zero(1, &env->fp_status);
21
FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
16
set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status);
22
FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
17
set_default_nan_mode(1, &env->fp_status);
23
-FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
18
+ /* Default NaN pattern: sign bit clear, frac msb set */
24
-FAIL 0010 0101 --10 1--- 1001 ---- ---- ---- # WRFFR, SETFFR
19
+ set_float_default_nan_pattern(0b01000000, &env->fp_status);
25
FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
26
FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
27
FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
28
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/translate-sve.c
31
+++ b/target/arm/translate-sve.c
32
@@ -XXX,XX +XXX,XX @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
33
TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
34
35
/* Note pat == 31 is #all, to set all elements. */
36
-TRANS_FEAT(SETFFR, aa64_sve, do_predset, 0, FFR_PRED_NUM, 31, false)
37
+TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve,
38
+ do_predset, 0, FFR_PRED_NUM, 31, false)
39
40
/* Note pat == 32 is #unimp, to set no elements. */
41
TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
42
@@ -XXX,XX +XXX,XX @@ static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
43
.rd = a->rd, .pg = a->pg, .s = a->s,
44
.rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
45
};
46
+
47
+ s->is_nonstreaming = true;
48
return trans_AND_pppp(s, &alt_a);
49
}
20
}
50
21
51
-TRANS_FEAT(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
22
uint32_t psw_read(CPUTriCoreState *env)
52
-TRANS_FEAT(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
53
+TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
54
+TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
55
56
static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
57
void (*gen_fn)(TCGv_i32, TCGv_ptr,
58
--
23
--
59
2.25.1
24
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Now that all our targets have bene converted to explicitly specify
2
their pattern for the default NaN value we can remove the remaining
3
fallback code in parts64_default_nan().
2
4
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-24-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20241202131347.498124-55-peter.maydell@linaro.org
7
---
8
---
8
target/arm/helper-sme.h | 5 +++
9
fpu/softfloat-specialize.c.inc | 14 --------------
9
target/arm/sme.decode | 11 +++++
10
1 file changed, 14 deletions(-)
10
target/arm/sme_helper.c | 90 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 31 +++++++++++++
12
4 files changed, 137 insertions(+)
13
11
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
12
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
15
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-sme.h
14
--- a/fpu/softfloat-specialize.c.inc
17
+++ b/target/arm/helper-sme.h
15
+++ b/fpu/softfloat-specialize.c.inc
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i
16
@@ -XXX,XX +XXX,XX @@ static void parts64_default_nan(FloatParts64 *p, float_status *status)
19
DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
17
uint64_t frac;
20
DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
18
uint8_t dnan_pattern = status->default_nan_pattern;
21
DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
19
22
+
20
- if (dnan_pattern == 0) {
23
+DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
21
- /*
24
+DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
22
- * This case is true for Alpha, ARM, MIPS, OpenRISC, PPC, RISC-V,
25
+DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
23
- * S390, SH4, TriCore, and Xtensa. Our other supported targets
26
+DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
24
- * do not have floating-point.
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
25
- */
28
index XXXXXXX..XXXXXXX 100644
26
- if (snan_bit_is_one(status)) {
29
--- a/target/arm/sme.decode
27
- /* sign bit clear, set all frac bits other than msb */
30
+++ b/target/arm/sme.decode
28
- dnan_pattern = 0b00111111;
31
@@ -XXX,XX +XXX,XX @@ LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
29
- } else {
32
30
- /* sign bit clear, set frac msb */
33
LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr
31
- dnan_pattern = 0b01000000;
34
STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr
32
- }
35
+
33
- }
36
+### SME Add Vector to Array
34
assert(dnan_pattern != 0);
37
+
35
38
+&adda zad zn pm pn
36
sign = dnan_pattern >> 7;
39
+@adda_32 ........ .. ..... . pm:3 pn:3 zn:5 ... zad:2 &adda
40
+@adda_64 ........ .. ..... . pm:3 pn:3 zn:5 .. zad:3 &adda
41
+
42
+ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32
43
+ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32
44
+ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64
45
+ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
46
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/sme_helper.c
49
+++ b/target/arm/sme_helper.c
50
@@ -XXX,XX +XXX,XX @@ DO_ST(q, _be, MO_128)
51
DO_ST(q, _le, MO_128)
52
53
#undef DO_ST
54
+
55
+void HELPER(sme_addha_s)(void *vzda, void *vzn, void *vpn,
56
+ void *vpm, uint32_t desc)
57
+{
58
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
59
+ uint64_t *pn = vpn, *pm = vpm;
60
+ uint32_t *zda = vzda, *zn = vzn;
61
+
62
+ for (row = 0; row < oprsz; ) {
63
+ uint64_t pa = pn[row >> 4];
64
+ do {
65
+ if (pa & 1) {
66
+ for (col = 0; col < oprsz; ) {
67
+ uint64_t pb = pm[col >> 4];
68
+ do {
69
+ if (pb & 1) {
70
+ zda[tile_vslice_index(row) + H4(col)] += zn[H4(col)];
71
+ }
72
+ pb >>= 4;
73
+ } while (++col & 15);
74
+ }
75
+ }
76
+ pa >>= 4;
77
+ } while (++row & 15);
78
+ }
79
+}
80
+
81
+void HELPER(sme_addha_d)(void *vzda, void *vzn, void *vpn,
82
+ void *vpm, uint32_t desc)
83
+{
84
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
85
+ uint8_t *pn = vpn, *pm = vpm;
86
+ uint64_t *zda = vzda, *zn = vzn;
87
+
88
+ for (row = 0; row < oprsz; ++row) {
89
+ if (pn[H1(row)] & 1) {
90
+ for (col = 0; col < oprsz; ++col) {
91
+ if (pm[H1(col)] & 1) {
92
+ zda[tile_vslice_index(row) + col] += zn[col];
93
+ }
94
+ }
95
+ }
96
+ }
97
+}
98
+
99
+void HELPER(sme_addva_s)(void *vzda, void *vzn, void *vpn,
100
+ void *vpm, uint32_t desc)
101
+{
102
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
103
+ uint64_t *pn = vpn, *pm = vpm;
104
+ uint32_t *zda = vzda, *zn = vzn;
105
+
106
+ for (row = 0; row < oprsz; ) {
107
+ uint64_t pa = pn[row >> 4];
108
+ do {
109
+ if (pa & 1) {
110
+ uint32_t zn_row = zn[H4(row)];
111
+ for (col = 0; col < oprsz; ) {
112
+ uint64_t pb = pm[col >> 4];
113
+ do {
114
+ if (pb & 1) {
115
+ zda[tile_vslice_index(row) + H4(col)] += zn_row;
116
+ }
117
+ pb >>= 4;
118
+ } while (++col & 15);
119
+ }
120
+ }
121
+ pa >>= 4;
122
+ } while (++row & 15);
123
+ }
124
+}
125
+
126
+void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
127
+ void *vpm, uint32_t desc)
128
+{
129
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
130
+ uint8_t *pn = vpn, *pm = vpm;
131
+ uint64_t *zda = vzda, *zn = vzn;
132
+
133
+ for (row = 0; row < oprsz; ++row) {
134
+ if (pn[H1(row)] & 1) {
135
+ uint64_t zn_row = zn[row];
136
+ for (col = 0; col < oprsz; ++col) {
137
+ if (pm[H1(col)] & 1) {
138
+ zda[tile_vslice_index(row) + col] += zn_row;
139
+ }
140
+ }
141
+ }
142
+ }
143
+}
144
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
145
index XXXXXXX..XXXXXXX 100644
146
--- a/target/arm/translate-sme.c
147
+++ b/target/arm/translate-sme.c
148
@@ -XXX,XX +XXX,XX @@ static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
149
150
TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
151
TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
152
+
153
+static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz,
154
+ gen_helper_gvec_4 *fn)
155
+{
156
+ int svl = streaming_vec_reg_size(s);
157
+ uint32_t desc = simd_desc(svl, svl, 0);
158
+ TCGv_ptr za, zn, pn, pm;
159
+
160
+ if (!sme_smza_enabled_check(s)) {
161
+ return true;
162
+ }
163
+
164
+ /* Sum XZR+zad to find ZAd. */
165
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
166
+ zn = vec_full_reg_ptr(s, a->zn);
167
+ pn = pred_full_reg_ptr(s, a->pn);
168
+ pm = pred_full_reg_ptr(s, a->pm);
169
+
170
+ fn(za, zn, pn, pm, tcg_constant_i32(desc));
171
+
172
+ tcg_temp_free_ptr(za);
173
+ tcg_temp_free_ptr(zn);
174
+ tcg_temp_free_ptr(pn);
175
+ tcg_temp_free_ptr(pm);
176
+ return true;
177
+}
178
+
179
+TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
180
+TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
181
+TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
182
+TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
183
--
37
--
184
2.25.1
38
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Add a TCGv_ptr base argument, which will be cpu_env for SVE.
3
Inline pickNaNMulAdd into its only caller. This makes
4
We will reuse this for SME save and restore array insns.
4
one assert redundant with the immediately preceding IF.
5
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-22-richard.henderson@linaro.org
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Message-id: 20241203203949.483774-3-richard.henderson@linaro.org
9
[PMM: keep comment from old code in new location]
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
---
11
target/arm/translate-a64.h | 3 +++
12
fpu/softfloat-parts.c.inc | 41 +++++++++++++++++++++++++-
12
target/arm/translate-sve.c | 48 ++++++++++++++++++++++++++++----------
13
fpu/softfloat-specialize.c.inc | 54 ----------------------------------
13
2 files changed, 39 insertions(+), 12 deletions(-)
14
2 files changed, 40 insertions(+), 55 deletions(-)
14
15
15
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
16
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
16
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/translate-a64.h
18
--- a/fpu/softfloat-parts.c.inc
18
+++ b/target/arm/translate-a64.h
19
+++ b/fpu/softfloat-parts.c.inc
19
@@ -XXX,XX +XXX,XX @@ void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
20
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
20
uint32_t rm_ofs, int64_t shift,
21
}
21
uint32_t opr_sz, uint32_t max_sz);
22
22
23
if (s->default_nan_mode) {
23
+void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
24
+ /*
24
+void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
25
+ * We guarantee not to require the target to tell us how to
26
+ * pick a NaN if we're always returning the default NaN.
27
+ * But if we're not in default-NaN mode then the target must
28
+ * specify.
29
+ */
30
which = 3;
31
+ } else if (infzero) {
32
+ /*
33
+ * Inf * 0 + NaN -- some implementations return the
34
+ * default NaN here, and some return the input NaN.
35
+ */
36
+ switch (s->float_infzeronan_rule) {
37
+ case float_infzeronan_dnan_never:
38
+ which = 2;
39
+ break;
40
+ case float_infzeronan_dnan_always:
41
+ which = 3;
42
+ break;
43
+ case float_infzeronan_dnan_if_qnan:
44
+ which = is_qnan(c->cls) ? 3 : 2;
45
+ break;
46
+ default:
47
+ g_assert_not_reached();
48
+ }
49
} else {
50
- which = pickNaNMulAdd(a->cls, b->cls, c->cls, infzero, have_snan, s);
51
+ FloatClass cls[3] = { a->cls, b->cls, c->cls };
52
+ Float3NaNPropRule rule = s->float_3nan_prop_rule;
25
+
53
+
26
#endif /* TARGET_ARM_TRANSLATE_A64_H */
54
+ assert(rule != float_3nan_prop_none);
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
55
+ if (have_snan && (rule & R_3NAN_SNAN_MASK)) {
28
index XXXXXXX..XXXXXXX 100644
56
+ /* We have at least one SNaN input and should prefer it */
29
--- a/target/arm/translate-sve.c
57
+ do {
30
+++ b/target/arm/translate-sve.c
58
+ which = rule & R_3NAN_1ST_MASK;
31
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
59
+ rule >>= R_3NAN_1ST_LENGTH;
32
* The load should begin at the address Rn + IMM.
60
+ } while (!is_snan(cls[which]));
33
*/
61
+ } else {
34
62
+ do {
35
-static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
63
+ which = rule & R_3NAN_1ST_MASK;
36
+void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
64
+ rule >>= R_3NAN_1ST_LENGTH;
37
+ int len, int rn, int imm)
65
+ } while (!is_nan(cls[which]));
38
{
39
int len_align = QEMU_ALIGN_DOWN(len, 8);
40
int len_remain = len % 8;
41
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
42
t0 = tcg_temp_new_i64();
43
for (i = 0; i < len_align; i += 8) {
44
tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
45
- tcg_gen_st_i64(t0, cpu_env, vofs + i);
46
+ tcg_gen_st_i64(t0, base, vofs + i);
47
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
48
}
49
tcg_temp_free_i64(t0);
50
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
51
clean_addr = new_tmp_a64_local(s);
52
tcg_gen_mov_i64(clean_addr, t0);
53
54
+ if (base != cpu_env) {
55
+ TCGv_ptr b = tcg_temp_local_new_ptr();
56
+ tcg_gen_mov_ptr(b, base);
57
+ base = b;
58
+ }
59
+
60
gen_set_label(loop);
61
62
t0 = tcg_temp_new_i64();
63
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
64
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
65
66
tp = tcg_temp_new_ptr();
67
- tcg_gen_add_ptr(tp, cpu_env, i);
68
+ tcg_gen_add_ptr(tp, base, i);
69
tcg_gen_addi_ptr(i, i, 8);
70
tcg_gen_st_i64(t0, tp, vofs);
71
tcg_temp_free_ptr(tp);
72
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
73
74
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
75
tcg_temp_free_ptr(i);
76
+
77
+ if (base != cpu_env) {
78
+ tcg_temp_free_ptr(base);
79
+ assert(len_remain == 0);
80
+ }
66
+ }
81
}
67
}
82
68
83
/*
69
if (which == 3) {
84
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
70
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
85
default:
71
index XXXXXXX..XXXXXXX 100644
86
g_assert_not_reached();
72
--- a/fpu/softfloat-specialize.c.inc
87
}
73
+++ b/fpu/softfloat-specialize.c.inc
88
- tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
74
@@ -XXX,XX +XXX,XX @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls,
89
+ tcg_gen_st_i64(t0, base, vofs + len_align);
90
tcg_temp_free_i64(t0);
91
}
75
}
92
}
76
}
93
77
94
/* Similarly for stores. */
78
-/*----------------------------------------------------------------------------
95
-static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
79
-| Select which NaN to propagate for a three-input operation.
96
+void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
80
-| For the moment we assume that no CPU needs the 'larger significand'
97
+ int len, int rn, int imm)
81
-| information.
98
{
82
-| Return values : 0 : a; 1 : b; 2 : c; 3 : default-NaN
99
int len_align = QEMU_ALIGN_DOWN(len, 8);
83
-*----------------------------------------------------------------------------*/
100
int len_remain = len % 8;
84
-static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
101
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
85
- bool infzero, bool have_snan, float_status *status)
102
86
-{
103
t0 = tcg_temp_new_i64();
87
- FloatClass cls[3] = { a_cls, b_cls, c_cls };
104
for (i = 0; i < len_align; i += 8) {
88
- Float3NaNPropRule rule = status->float_3nan_prop_rule;
105
- tcg_gen_ld_i64(t0, cpu_env, vofs + i);
89
- int which;
106
+ tcg_gen_ld_i64(t0, base, vofs + i);
90
-
107
tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
91
- /*
108
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
92
- * We guarantee not to require the target to tell us how to
109
}
93
- * pick a NaN if we're always returning the default NaN.
110
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
94
- * But if we're not in default-NaN mode then the target must
111
clean_addr = new_tmp_a64_local(s);
95
- * specify.
112
tcg_gen_mov_i64(clean_addr, t0);
96
- */
113
97
- assert(!status->default_nan_mode);
114
+ if (base != cpu_env) {
98
-
115
+ TCGv_ptr b = tcg_temp_local_new_ptr();
99
- if (infzero) {
116
+ tcg_gen_mov_ptr(b, base);
100
- /*
117
+ base = b;
101
- * Inf * 0 + NaN -- some implementations return the default NaN here,
118
+ }
102
- * and some return the input NaN.
119
+
103
- */
120
gen_set_label(loop);
104
- switch (status->float_infzeronan_rule) {
121
105
- case float_infzeronan_dnan_never:
122
t0 = tcg_temp_new_i64();
106
- return 2;
123
tp = tcg_temp_new_ptr();
107
- case float_infzeronan_dnan_always:
124
- tcg_gen_add_ptr(tp, cpu_env, i);
108
- return 3;
125
+ tcg_gen_add_ptr(tp, base, i);
109
- case float_infzeronan_dnan_if_qnan:
126
tcg_gen_ld_i64(t0, tp, vofs);
110
- return is_qnan(c_cls) ? 3 : 2;
127
tcg_gen_addi_ptr(i, i, 8);
111
- default:
128
tcg_temp_free_ptr(tp);
112
- g_assert_not_reached();
129
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
113
- }
130
114
- }
131
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
115
-
132
tcg_temp_free_ptr(i);
116
- assert(rule != float_3nan_prop_none);
133
+
117
- if (have_snan && (rule & R_3NAN_SNAN_MASK)) {
134
+ if (base != cpu_env) {
118
- /* We have at least one SNaN input and should prefer it */
135
+ tcg_temp_free_ptr(base);
119
- do {
136
+ assert(len_remain == 0);
120
- which = rule & R_3NAN_1ST_MASK;
137
+ }
121
- rule >>= R_3NAN_1ST_LENGTH;
138
}
122
- } while (!is_snan(cls[which]));
139
123
- } else {
140
/* Predicate register stores can be any multiple of 2. */
124
- do {
141
if (len_remain) {
125
- which = rule & R_3NAN_1ST_MASK;
142
t0 = tcg_temp_new_i64();
126
- rule >>= R_3NAN_1ST_LENGTH;
143
- tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
127
- } while (!is_nan(cls[which]));
144
+ tcg_gen_ld_i64(t0, base, vofs + len_align);
128
- }
145
129
- return which;
146
switch (len_remain) {
130
-}
147
case 2:
131
-
148
@@ -XXX,XX +XXX,XX @@ static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
132
/*----------------------------------------------------------------------------
149
if (sve_access_check(s)) {
133
| Returns 1 if the double-precision floating-point value `a' is a quiet
150
int size = vec_full_reg_size(s);
134
| NaN; otherwise returns 0.
151
int off = vec_full_reg_offset(s, a->rd);
152
- do_ldr(s, off, size, a->rn, a->imm * size);
153
+ gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size);
154
}
155
return true;
156
}
157
@@ -XXX,XX +XXX,XX @@ static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
158
if (sve_access_check(s)) {
159
int size = pred_full_reg_size(s);
160
int off = pred_full_reg_offset(s, a->rd);
161
- do_ldr(s, off, size, a->rn, a->imm * size);
162
+ gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size);
163
}
164
return true;
165
}
166
@@ -XXX,XX +XXX,XX @@ static bool trans_STR_zri(DisasContext *s, arg_rri *a)
167
if (sve_access_check(s)) {
168
int size = vec_full_reg_size(s);
169
int off = vec_full_reg_offset(s, a->rd);
170
- do_str(s, off, size, a->rn, a->imm * size);
171
+ gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size);
172
}
173
return true;
174
}
175
@@ -XXX,XX +XXX,XX @@ static bool trans_STR_pri(DisasContext *s, arg_rri *a)
176
if (sve_access_check(s)) {
177
int size = pred_full_reg_size(s);
178
int off = pred_full_reg_offset(s, a->rd);
179
- do_str(s, off, size, a->rn, a->imm * size);
180
+ gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size);
181
}
182
return true;
183
}
184
--
135
--
185
2.25.1
136
2.34.1
137
138
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The pseudocode for CheckSVEEnabled gains a check for Streaming
3
Remove "3" as a special case for which and simply
4
SVE mode, and for SME present but SVE absent.
4
branch to return the desired value.
5
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-17-richard.henderson@linaro.org
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Message-id: 20241203203949.483774-4-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
target/arm/translate-a64.c | 22 ++++++++++++++++------
11
fpu/softfloat-parts.c.inc | 20 ++++++++++----------
12
1 file changed, 16 insertions(+), 6 deletions(-)
12
1 file changed, 10 insertions(+), 10 deletions(-)
13
13
14
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
14
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/translate-a64.c
16
--- a/fpu/softfloat-parts.c.inc
17
+++ b/target/arm/translate-a64.c
17
+++ b/fpu/softfloat-parts.c.inc
18
@@ -XXX,XX +XXX,XX @@ static bool fp_access_check(DisasContext *s)
18
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
19
return true;
19
* But if we're not in default-NaN mode then the target must
20
* specify.
21
*/
22
- which = 3;
23
+ goto default_nan;
24
} else if (infzero) {
25
/*
26
* Inf * 0 + NaN -- some implementations return the
27
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
28
*/
29
switch (s->float_infzeronan_rule) {
30
case float_infzeronan_dnan_never:
31
- which = 2;
32
break;
33
case float_infzeronan_dnan_always:
34
- which = 3;
35
- break;
36
+ goto default_nan;
37
case float_infzeronan_dnan_if_qnan:
38
- which = is_qnan(c->cls) ? 3 : 2;
39
+ if (is_qnan(c->cls)) {
40
+ goto default_nan;
41
+ }
42
break;
43
default:
44
g_assert_not_reached();
45
}
46
+ which = 2;
47
} else {
48
FloatClass cls[3] = { a->cls, b->cls, c->cls };
49
Float3NaNPropRule rule = s->float_3nan_prop_rule;
50
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
51
}
52
}
53
54
- if (which == 3) {
55
- parts_default_nan(a, s);
56
- return a;
57
- }
58
-
59
switch (which) {
60
case 0:
61
break;
62
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
63
parts_silence_nan(a, s);
64
}
65
return a;
66
+
67
+ default_nan:
68
+ parts_default_nan(a, s);
69
+ return a;
20
}
70
}
21
22
-/* Check that SVE access is enabled. If it is, return true.
23
+/*
24
+ * Check that SVE access is enabled. If it is, return true.
25
* If not, emit code to generate an appropriate exception and return false.
26
+ * This function corresponds to CheckSVEEnabled().
27
*/
28
bool sve_access_check(DisasContext *s)
29
{
30
- if (s->sve_excp_el) {
31
- assert(!s->sve_access_checked);
32
- s->sve_access_checked = true;
33
-
34
+ if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
35
+ assert(dc_isar_feature(aa64_sme, s));
36
+ if (!sme_sm_enabled_check(s)) {
37
+ goto fail_exit;
38
+ }
39
+ } else if (s->sve_excp_el) {
40
gen_exception_insn_el(s, s->pc_curr, EXCP_UDEF,
41
syn_sve_access_trap(), s->sve_excp_el);
42
- return false;
43
+ goto fail_exit;
44
}
45
s->sve_access_checked = true;
46
return fp_access_check(s);
47
+
48
+ fail_exit:
49
+ /* Assert that we only raise one exception per instruction. */
50
+ assert(!s->sve_access_checked);
51
+ s->sve_access_checked = true;
52
+ return false;
53
}
54
71
55
/*
72
/*
56
--
73
--
57
2.25.1
74
2.34.1
75
76
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
We can reuse the SVE functions for LDR and STR, passing in the
3
Assign the pointer return value to 'a' directly,
4
base of the ZA vector and a zero offset.
4
rather than going through an intermediary index.
5
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-23-richard.henderson@linaro.org
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Message-id: 20241203203949.483774-5-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
target/arm/sme.decode | 7 +++++++
11
fpu/softfloat-parts.c.inc | 32 ++++++++++----------------------
12
target/arm/translate-sme.c | 24 ++++++++++++++++++++++++
12
1 file changed, 10 insertions(+), 22 deletions(-)
13
2 files changed, 31 insertions(+)
14
13
15
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
14
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
16
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme.decode
16
--- a/fpu/softfloat-parts.c.inc
18
+++ b/target/arm/sme.decode
17
+++ b/fpu/softfloat-parts.c.inc
19
@@ -XXX,XX +XXX,XX @@ LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
18
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
20
&ldst rs=%mova_rs
19
FloatPartsN *c, float_status *s,
21
LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
20
int ab_mask, int abc_mask)
22
&ldst esz=4 rs=%mova_rs
21
{
23
+
22
- int which;
24
+&ldstr rv rn imm
23
bool infzero = (ab_mask == float_cmask_infzero);
25
+@ldstr ....... ... . ...... .. ... rn:5 . imm:4 \
24
bool have_snan = (abc_mask & float_cmask_snan);
26
+ &ldstr rv=%mova_rs
25
+ FloatPartsN *ret;
27
+
26
28
+LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr
27
if (unlikely(have_snan)) {
29
+STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr
28
float_raise(float_flag_invalid | float_flag_invalid_snan, s);
30
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
29
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
31
index XXXXXXX..XXXXXXX 100644
30
default:
32
--- a/target/arm/translate-sme.c
31
g_assert_not_reached();
33
+++ b/target/arm/translate-sme.c
32
}
34
@@ -XXX,XX +XXX,XX @@ static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
33
- which = 2;
35
tcg_temp_free_i64(addr);
34
+ ret = c;
36
return true;
35
} else {
37
}
36
- FloatClass cls[3] = { a->cls, b->cls, c->cls };
38
+
37
+ FloatPartsN *val[3] = { a, b, c };
39
+typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int);
38
Float3NaNPropRule rule = s->float_3nan_prop_rule;
40
+
39
41
+static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
40
assert(rule != float_3nan_prop_none);
42
+{
41
if (have_snan && (rule & R_3NAN_SNAN_MASK)) {
43
+ int svl = streaming_vec_reg_size(s);
42
/* We have at least one SNaN input and should prefer it */
44
+ int imm = a->imm;
43
do {
45
+ TCGv_ptr base;
44
- which = rule & R_3NAN_1ST_MASK;
46
+
45
+ ret = val[rule & R_3NAN_1ST_MASK];
47
+ if (!sme_za_enabled_check(s)) {
46
rule >>= R_3NAN_1ST_LENGTH;
48
+ return true;
47
- } while (!is_snan(cls[which]));
49
+ }
48
+ } while (!is_snan(ret->cls));
50
+
49
} else {
51
+ /* ZA[n] equates to ZA0H.B[n]. */
50
do {
52
+ base = get_tile_rowcol(s, MO_8, a->rv, imm, false);
51
- which = rule & R_3NAN_1ST_MASK;
53
+
52
+ ret = val[rule & R_3NAN_1ST_MASK];
54
+ fn(s, base, 0, svl, a->rn, imm * svl);
53
rule >>= R_3NAN_1ST_LENGTH;
55
+
54
- } while (!is_nan(cls[which]));
56
+ tcg_temp_free_ptr(base);
55
+ } while (!is_nan(ret->cls));
57
+ return true;
56
}
58
+}
57
}
59
+
58
60
+TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
59
- switch (which) {
61
+TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
60
- case 0:
61
- break;
62
- case 1:
63
- a = b;
64
- break;
65
- case 2:
66
- a = c;
67
- break;
68
- default:
69
- g_assert_not_reached();
70
+ if (is_snan(ret->cls)) {
71
+ parts_silence_nan(ret, s);
72
}
73
- if (is_snan(a->cls)) {
74
- parts_silence_nan(a, s);
75
- }
76
- return a;
77
+ return ret;
78
79
default_nan:
80
parts_default_nan(a, s);
62
--
81
--
63
2.25.1
82
2.34.1
83
84
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Mark these as non-streaming instructions, which should trap
3
While all indices into val[] should be in [0-2], the mask
4
if full a64 support is not enabled in streaming mode.
4
applied is two bits. To help static analysis see there is
5
no possibility of read beyond the end of the array, pad the
6
array to 4 entries, with the final being (implicitly) NULL.
5
7
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-11-richard.henderson@linaro.org
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Message-id: 20241203203949.483774-6-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
12
---
11
target/arm/sme-fa64.decode | 1 -
13
fpu/softfloat-parts.c.inc | 2 +-
12
target/arm/translate-sve.c | 35 ++++++++++++++++++-----------------
14
1 file changed, 1 insertion(+), 1 deletion(-)
13
2 files changed, 18 insertions(+), 18 deletions(-)
14
15
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
16
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
--- a/fpu/softfloat-parts.c.inc
18
+++ b/target/arm/sme-fa64.decode
19
+++ b/fpu/softfloat-parts.c.inc
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
}
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
ret = c;
22
23
} else {
23
-FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
24
- FloatPartsN *val[3] = { a, b, c };
24
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
25
+ FloatPartsN *val[R_3NAN_1ST_MASK + 1] = { a, b, c };
25
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
26
Float3NaNPropRule rule = s->float_3nan_prop_rule;
26
FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
27
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
28
assert(rule != float_3nan_prop_none);
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-sve.c
30
+++ b/target/arm/translate-sve.c
31
@@ -XXX,XX +XXX,XX @@ DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
32
static gen_helper_gvec_flags_4 * const match_fns[4] = {
33
gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
34
};
35
-TRANS_FEAT(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
36
+TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
37
38
static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
39
gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
40
};
41
-TRANS_FEAT(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
42
+TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
43
44
static gen_helper_gvec_4 * const histcnt_fns[4] = {
45
NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
46
};
47
-TRANS_FEAT(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
48
- histcnt_fns[a->esz], a, 0)
49
+TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
50
+ histcnt_fns[a->esz], a, 0)
51
52
-TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
53
- a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
54
+TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
55
+ a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
56
57
DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz)
58
DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz)
59
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
60
TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
61
a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
62
63
-TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
64
- gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
65
+TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
66
+ gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
67
68
-TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
69
- gen_helper_crypto_aese, a, false)
70
-TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
71
- gen_helper_crypto_aese, a, true)
72
+TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
73
+ gen_helper_crypto_aese, a, false)
74
+TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
75
+ gen_helper_crypto_aese, a, true)
76
77
-TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
78
- gen_helper_crypto_sm4e, a, 0)
79
-TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
80
- gen_helper_crypto_sm4ekey, a, 0)
81
+TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
82
+ gen_helper_crypto_sm4e, a, 0)
83
+TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
84
+ gen_helper_crypto_sm4ekey, a, 0)
85
86
-TRANS_FEAT(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, gen_gvec_rax1, a)
87
+TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz,
88
+ gen_gvec_rax1, a)
89
90
TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
91
gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
92
--
29
--
93
2.25.1
30
2.34.1
31
32
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Move the checks out of the parsing loop and into the
3
This function is part of the public interface and
4
restore function. This more closely mirrors the code
4
is not "specialized" to any target in any way.
5
structure in the kernel, and is slightly clearer.
6
5
7
Reject rather than silently skip incorrect VL and SVE record sizes,
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
bringing our checks in to line with those the kernel does.
9
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20241203203949.483774-7-richard.henderson@linaro.org
12
Message-id: 20220708151540.18136-40-richard.henderson@linaro.org
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
---
10
---
15
linux-user/aarch64/signal.c | 51 +++++++++++++++++++++++++------------
11
fpu/softfloat.c | 52 ++++++++++++++++++++++++++++++++++
16
1 file changed, 35 insertions(+), 16 deletions(-)
12
fpu/softfloat-specialize.c.inc | 52 ----------------------------------
13
2 files changed, 52 insertions(+), 52 deletions(-)
17
14
18
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
15
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
19
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
20
--- a/linux-user/aarch64/signal.c
17
--- a/fpu/softfloat.c
21
+++ b/linux-user/aarch64/signal.c
18
+++ b/fpu/softfloat.c
22
@@ -XXX,XX +XXX,XX @@ static void target_restore_fpsimd_record(CPUARMState *env,
19
@@ -XXX,XX +XXX,XX @@ void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
23
}
20
*zExpPtr = 1 - shiftCount;
24
}
21
}
25
22
26
-static void target_restore_sve_record(CPUARMState *env,
23
+/*----------------------------------------------------------------------------
27
- struct target_sve_context *sve, int vq)
24
+| Takes two extended double-precision floating-point values `a' and `b', one
28
+static bool target_restore_sve_record(CPUARMState *env,
25
+| of which is a NaN, and returns the appropriate NaN result. If either `a' or
29
+ struct target_sve_context *sve,
26
+| `b' is a signaling NaN, the invalid exception is raised.
30
+ int size)
27
+*----------------------------------------------------------------------------*/
31
{
28
+
32
- int i, j;
29
+floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status)
33
+ int i, j, vl, vq;
30
+{
34
31
+ bool aIsLargerSignificand;
35
- /* Note that SVE regs are stored as a byte stream, with each byte element
32
+ FloatClass a_cls, b_cls;
36
+ if (!cpu_isar_feature(aa64_sve, env_archcpu(env))) {
33
+
37
+ return false;
34
+ /* This is not complete, but is good enough for pickNaN. */
35
+ a_cls = (!floatx80_is_any_nan(a)
36
+ ? float_class_normal
37
+ : floatx80_is_signaling_nan(a, status)
38
+ ? float_class_snan
39
+ : float_class_qnan);
40
+ b_cls = (!floatx80_is_any_nan(b)
41
+ ? float_class_normal
42
+ : floatx80_is_signaling_nan(b, status)
43
+ ? float_class_snan
44
+ : float_class_qnan);
45
+
46
+ if (is_snan(a_cls) || is_snan(b_cls)) {
47
+ float_raise(float_flag_invalid, status);
38
+ }
48
+ }
39
+
49
+
40
+ __get_user(vl, &sve->vl);
50
+ if (status->default_nan_mode) {
41
+ vq = sve_vq(env);
51
+ return floatx80_default_nan(status);
42
+
43
+ /* Reject mismatched VL. */
44
+ if (vl != vq * TARGET_SVE_VQ_BYTES) {
45
+ return false;
46
+ }
52
+ }
47
+
53
+
48
+ /* Accept empty record -- used to clear PSTATE.SM. */
54
+ if (a.low < b.low) {
49
+ if (size <= sizeof(*sve)) {
55
+ aIsLargerSignificand = 0;
50
+ return true;
56
+ } else if (b.low < a.low) {
57
+ aIsLargerSignificand = 1;
58
+ } else {
59
+ aIsLargerSignificand = (a.high < b.high) ? 1 : 0;
51
+ }
60
+ }
52
+
61
+
53
+ /* Reject non-empty but incomplete record. */
62
+ if (pickNaN(a_cls, b_cls, aIsLargerSignificand, status)) {
54
+ if (size < TARGET_SVE_SIG_CONTEXT_SIZE(vq)) {
63
+ if (is_snan(b_cls)) {
55
+ return false;
64
+ return floatx80_silence_nan(b, status);
65
+ }
66
+ return b;
67
+ } else {
68
+ if (is_snan(a_cls)) {
69
+ return floatx80_silence_nan(a, status);
70
+ }
71
+ return a;
56
+ }
72
+ }
73
+}
57
+
74
+
58
+ /*
75
/*----------------------------------------------------------------------------
59
+ * Note that SVE regs are stored as a byte stream, with each byte element
76
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
60
* at a subsequent address. This corresponds to a little-endian load
77
| and extended significand formed by the concatenation of `zSig0' and `zSig1',
61
* of our 64-bit hunks.
78
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
62
*/
79
index XXXXXXX..XXXXXXX 100644
63
@@ -XXX,XX +XXX,XX @@ static void target_restore_sve_record(CPUARMState *env,
80
--- a/fpu/softfloat-specialize.c.inc
64
}
81
+++ b/fpu/softfloat-specialize.c.inc
65
}
82
@@ -XXX,XX +XXX,XX @@ floatx80 floatx80_silence_nan(floatx80 a, float_status *status)
66
}
83
return a;
67
+ return true;
68
}
84
}
69
85
70
static int target_restore_sigframe(CPUARMState *env,
86
-/*----------------------------------------------------------------------------
71
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
87
-| Takes two extended double-precision floating-point values `a' and `b', one
72
struct target_sve_context *sve = NULL;
88
-| of which is a NaN, and returns the appropriate NaN result. If either `a' or
73
uint64_t extra_datap = 0;
89
-| `b' is a signaling NaN, the invalid exception is raised.
74
bool used_extra = false;
90
-*----------------------------------------------------------------------------*/
75
- int vq = 0, sve_size = 0;
91
-
76
+ int sve_size = 0;
92
-floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status)
77
93
-{
78
target_restore_general_frame(env, sf);
94
- bool aIsLargerSignificand;
79
95
- FloatClass a_cls, b_cls;
80
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
96
-
81
if (sve || size < sizeof(struct target_sve_context)) {
97
- /* This is not complete, but is good enough for pickNaN. */
82
goto err;
98
- a_cls = (!floatx80_is_any_nan(a)
83
}
99
- ? float_class_normal
84
- if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
100
- : floatx80_is_signaling_nan(a, status)
85
- vq = sve_vq(env);
101
- ? float_class_snan
86
- sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
102
- : float_class_qnan);
87
- if (size == sve_size) {
103
- b_cls = (!floatx80_is_any_nan(b)
88
- sve = (struct target_sve_context *)ctx;
104
- ? float_class_normal
89
- break;
105
- : floatx80_is_signaling_nan(b, status)
90
- }
106
- ? float_class_snan
91
- }
107
- : float_class_qnan);
92
- goto err;
108
-
93
+ sve = (struct target_sve_context *)ctx;
109
- if (is_snan(a_cls) || is_snan(b_cls)) {
94
+ sve_size = size;
110
- float_raise(float_flag_invalid, status);
95
+ break;
111
- }
96
112
-
97
case TARGET_EXTRA_MAGIC:
113
- if (status->default_nan_mode) {
98
if (extra || size != sizeof(struct target_extra_context)) {
114
- return floatx80_default_nan(status);
99
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
115
- }
100
}
116
-
101
117
- if (a.low < b.low) {
102
/* SVE data, if present, overwrites FPSIMD data. */
118
- aIsLargerSignificand = 0;
103
- if (sve) {
119
- } else if (b.low < a.low) {
104
- target_restore_sve_record(env, sve, vq);
120
- aIsLargerSignificand = 1;
105
+ if (sve && !target_restore_sve_record(env, sve, sve_size)) {
121
- } else {
106
+ goto err;
122
- aIsLargerSignificand = (a.high < b.high) ? 1 : 0;
107
}
123
- }
108
unlock_user(extra, extra_datap, 0);
124
-
109
return 0;
125
- if (pickNaN(a_cls, b_cls, aIsLargerSignificand, status)) {
126
- if (is_snan(b_cls)) {
127
- return floatx80_silence_nan(b, status);
128
- }
129
- return b;
130
- } else {
131
- if (is_snan(a_cls)) {
132
- return floatx80_silence_nan(a, status);
133
- }
134
- return a;
135
- }
136
-}
137
-
138
/*----------------------------------------------------------------------------
139
| Returns 1 if the quadruple-precision floating-point value `a' is a quiet
140
| NaN; otherwise returns 0.
110
--
141
--
111
2.25.1
142
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Unpacking and repacking the parts may be slightly more work
4
than we did before, but we get to reuse more code. For a
5
code path handling exceptional values, this is an improvement.
6
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220708151540.18136-27-richard.henderson@linaro.org
8
Message-id: 20241203203949.483774-8-richard.henderson@linaro.org
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
---
11
---
8
target/arm/helper-sme.h | 2 ++
12
fpu/softfloat.c | 43 +++++--------------------------------------
9
target/arm/sme.decode | 1 +
13
1 file changed, 5 insertions(+), 38 deletions(-)
10
target/arm/sme_helper.c | 74 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 1 +
12
4 files changed, 78 insertions(+)
13
14
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
15
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
15
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-sme.h
17
--- a/fpu/softfloat.c
17
+++ b/target/arm/helper-sme.h
18
+++ b/fpu/softfloat.c
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
19
@@ -XXX,XX +XXX,XX @@ void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
19
DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
20
20
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
21
floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status)
21
22
{
22
+DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG,
23
- bool aIsLargerSignificand;
23
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
24
- FloatClass a_cls, b_cls;
24
DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
25
+ FloatParts128 pa, pb, *pr;
25
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
26
26
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
27
- /* This is not complete, but is good enough for pickNaN. */
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
28
- a_cls = (!floatx80_is_any_nan(a)
28
index XXXXXXX..XXXXXXX 100644
29
- ? float_class_normal
29
--- a/target/arm/sme.decode
30
- : floatx80_is_signaling_nan(a, status)
30
+++ b/target/arm/sme.decode
31
- ? float_class_snan
31
@@ -XXX,XX +XXX,XX @@ FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
32
- : float_class_qnan);
32
FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
33
- b_cls = (!floatx80_is_any_nan(b)
33
34
- ? float_class_normal
34
BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
35
- : floatx80_is_signaling_nan(b, status)
35
+FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32
36
- ? float_class_snan
36
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
37
- : float_class_qnan);
37
index XXXXXXX..XXXXXXX 100644
38
-
38
--- a/target/arm/sme_helper.c
39
- if (is_snan(a_cls) || is_snan(b_cls)) {
39
+++ b/target/arm/sme_helper.c
40
- float_raise(float_flag_invalid, status);
40
@@ -XXX,XX +XXX,XX @@ static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
41
- }
41
return pair;
42
-
43
- if (status->default_nan_mode) {
44
+ if (!floatx80_unpack_canonical(&pa, a, status) ||
45
+ !floatx80_unpack_canonical(&pb, b, status)) {
46
return floatx80_default_nan(status);
47
}
48
49
- if (a.low < b.low) {
50
- aIsLargerSignificand = 0;
51
- } else if (b.low < a.low) {
52
- aIsLargerSignificand = 1;
53
- } else {
54
- aIsLargerSignificand = (a.high < b.high) ? 1 : 0;
55
- }
56
-
57
- if (pickNaN(a_cls, b_cls, aIsLargerSignificand, status)) {
58
- if (is_snan(b_cls)) {
59
- return floatx80_silence_nan(b, status);
60
- }
61
- return b;
62
- } else {
63
- if (is_snan(a_cls)) {
64
- return floatx80_silence_nan(a, status);
65
- }
66
- return a;
67
- }
68
+ pr = parts_pick_nan(&pa, &pb, status);
69
+ return floatx80_round_pack_canonical(pr, status);
42
}
70
}
43
71
44
+static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2,
72
/*----------------------------------------------------------------------------
45
+ float_status *s_std, float_status *s_odd)
46
+{
47
+ float64 e1r = float16_to_float64(e1 & 0xffff, true, s_std);
48
+ float64 e1c = float16_to_float64(e1 >> 16, true, s_std);
49
+ float64 e2r = float16_to_float64(e2 & 0xffff, true, s_std);
50
+ float64 e2c = float16_to_float64(e2 >> 16, true, s_std);
51
+ float64 t64;
52
+ float32 t32;
53
+
54
+ /*
55
+ * The ARM pseudocode function FPDot performs both multiplies
56
+ * and the add with a single rounding operation. Emulate this
57
+ * by performing the first multiply in round-to-odd, then doing
58
+ * the second multiply as fused multiply-add, and rounding to
59
+ * float32 all in one step.
60
+ */
61
+ t64 = float64_mul(e1r, e2r, s_odd);
62
+ t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std);
63
+
64
+ /* This conversion is exact, because we've already rounded. */
65
+ t32 = float64_to_float32(t64, s_std);
66
+
67
+ /* The final accumulation step is not fused. */
68
+ return float32_add(sum, t32, s_std);
69
+}
70
+
71
+void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
72
+ void *vpm, void *vst, uint32_t desc)
73
+{
74
+ intptr_t row, col, oprsz = simd_maxsz(desc);
75
+ uint32_t neg = simd_data(desc) * 0x80008000u;
76
+ uint16_t *pn = vpn, *pm = vpm;
77
+ float_status fpst_odd, fpst_std;
78
+
79
+ /*
80
+ * Make a copy of float_status because this operation does not
81
+ * update the cumulative fp exception status. It also produces
82
+ * default nans. Make a second copy with round-to-odd -- see above.
83
+ */
84
+ fpst_std = *(float_status *)vst;
85
+ set_default_nan_mode(true, &fpst_std);
86
+ fpst_odd = fpst_std;
87
+ set_float_rounding_mode(float_round_to_odd, &fpst_odd);
88
+
89
+ for (row = 0; row < oprsz; ) {
90
+ uint16_t prow = pn[H2(row >> 4)];
91
+ do {
92
+ void *vza_row = vza + tile_vslice_offset(row);
93
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row));
94
+
95
+ n = f16mop_adj_pair(n, prow, neg);
96
+
97
+ for (col = 0; col < oprsz; ) {
98
+ uint16_t pcol = pm[H2(col >> 4)];
99
+ do {
100
+ if (prow & pcol & 0b0101) {
101
+ uint32_t *a = vza_row + H1_4(col);
102
+ uint32_t m = *(uint32_t *)(vzm + H1_4(col));
103
+
104
+ m = f16mop_adj_pair(m, pcol, 0);
105
+ *a = f16_dotadd(*a, n, m, &fpst_std, &fpst_odd);
106
+
107
+ col += 4;
108
+ pcol >>= 4;
109
+ }
110
+ } while (col & 15);
111
+ }
112
+ row += 4;
113
+ prow >>= 4;
114
+ } while (row & 15);
115
+ }
116
+}
117
+
118
void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
119
void *vpm, uint32_t desc)
120
{
121
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
122
index XXXXXXX..XXXXXXX 100644
123
--- a/target/arm/translate-sme.c
124
+++ b/target/arm/translate-sme.c
125
@@ -XXX,XX +XXX,XX @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
126
return true;
127
}
128
129
+TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h)
130
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
131
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
132
133
--
73
--
134
2.25.1
74
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Inline pickNaN into its only caller. This makes one assert
4
redundant with the immediately preceding IF.
5
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-19-richard.henderson@linaro.org
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Message-id: 20241203203949.483774-9-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
10
---
8
target/arm/helper-sme.h | 2 ++
11
fpu/softfloat-parts.c.inc | 82 +++++++++++++++++++++++++----
9
target/arm/sme.decode | 4 ++++
12
fpu/softfloat-specialize.c.inc | 96 ----------------------------------
10
target/arm/sme_helper.c | 25 +++++++++++++++++++++++++
13
2 files changed, 73 insertions(+), 105 deletions(-)
11
target/arm/translate-sme.c | 13 +++++++++++++
14
12
4 files changed, 44 insertions(+)
15
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
13
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
15
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-sme.h
17
--- a/fpu/softfloat-parts.c.inc
17
+++ b/target/arm/helper-sme.h
18
+++ b/fpu/softfloat-parts.c.inc
18
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@ static void partsN(return_nan)(FloatPartsN *a, float_status *s)
19
20
static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
20
DEF_HELPER_FLAGS_2(set_pstate_sm, TCG_CALL_NO_RWG, void, env, i32)
21
float_status *s)
21
DEF_HELPER_FLAGS_2(set_pstate_za, TCG_CALL_NO_RWG, void, env, i32)
22
{
23
+ int cmp, which;
22
+
24
+
23
+DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32)
25
if (is_snan(a->cls) || is_snan(b->cls)) {
24
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
26
float_raise(float_flag_invalid | float_flag_invalid_snan, s);
27
}
28
29
if (s->default_nan_mode) {
30
parts_default_nan(a, s);
31
- } else {
32
- int cmp = frac_cmp(a, b);
33
- if (cmp == 0) {
34
- cmp = a->sign < b->sign;
35
- }
36
+ return a;
37
+ }
38
39
- if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
40
- a = b;
41
- }
42
+ cmp = frac_cmp(a, b);
43
+ if (cmp == 0) {
44
+ cmp = a->sign < b->sign;
45
+ }
46
+
47
+ switch (s->float_2nan_prop_rule) {
48
+ case float_2nan_prop_s_ab:
49
if (is_snan(a->cls)) {
50
- parts_silence_nan(a, s);
51
+ which = 0;
52
+ } else if (is_snan(b->cls)) {
53
+ which = 1;
54
+ } else if (is_qnan(a->cls)) {
55
+ which = 0;
56
+ } else {
57
+ which = 1;
58
}
59
+ break;
60
+ case float_2nan_prop_s_ba:
61
+ if (is_snan(b->cls)) {
62
+ which = 1;
63
+ } else if (is_snan(a->cls)) {
64
+ which = 0;
65
+ } else if (is_qnan(b->cls)) {
66
+ which = 1;
67
+ } else {
68
+ which = 0;
69
+ }
70
+ break;
71
+ case float_2nan_prop_ab:
72
+ which = is_nan(a->cls) ? 0 : 1;
73
+ break;
74
+ case float_2nan_prop_ba:
75
+ which = is_nan(b->cls) ? 1 : 0;
76
+ break;
77
+ case float_2nan_prop_x87:
78
+ /*
79
+ * This implements x87 NaN propagation rules:
80
+ * SNaN + QNaN => return the QNaN
81
+ * two SNaNs => return the one with the larger significand, silenced
82
+ * two QNaNs => return the one with the larger significand
83
+ * SNaN and a non-NaN => return the SNaN, silenced
84
+ * QNaN and a non-NaN => return the QNaN
85
+ *
86
+ * If we get down to comparing significands and they are the same,
87
+ * return the NaN with the positive sign bit (if any).
88
+ */
89
+ if (is_snan(a->cls)) {
90
+ if (is_snan(b->cls)) {
91
+ which = cmp > 0 ? 0 : 1;
92
+ } else {
93
+ which = is_qnan(b->cls) ? 1 : 0;
94
+ }
95
+ } else if (is_qnan(a->cls)) {
96
+ if (is_snan(b->cls) || !is_qnan(b->cls)) {
97
+ which = 0;
98
+ } else {
99
+ which = cmp > 0 ? 0 : 1;
100
+ }
101
+ } else {
102
+ which = 1;
103
+ }
104
+ break;
105
+ default:
106
+ g_assert_not_reached();
107
+ }
108
+
109
+ if (which) {
110
+ a = b;
111
+ }
112
+ if (is_snan(a->cls)) {
113
+ parts_silence_nan(a, s);
114
}
115
return a;
116
}
117
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
25
index XXXXXXX..XXXXXXX 100644
118
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/sme.decode
119
--- a/fpu/softfloat-specialize.c.inc
27
+++ b/target/arm/sme.decode
120
+++ b/fpu/softfloat-specialize.c.inc
28
@@ -XXX,XX +XXX,XX @@
121
@@ -XXX,XX +XXX,XX @@ bool float32_is_signaling_nan(float32 a_, float_status *status)
29
#
30
# This file is processed by scripts/decodetree.py
31
#
32
+
33
+### SME Misc
34
+
35
+ZERO 11000000 00 001 00000000000 imm:8
36
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/sme_helper.c
39
+++ b/target/arm/sme_helper.c
40
@@ -XXX,XX +XXX,XX @@ void helper_set_pstate_za(CPUARMState *env, uint32_t i)
41
memset(env->zarray, 0, sizeof(env->zarray));
42
}
122
}
43
}
123
}
44
+
124
45
+void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
125
-/*----------------------------------------------------------------------------
46
+{
126
-| Select which NaN to propagate for a two-input operation.
47
+ uint32_t i;
127
-| IEEE754 doesn't specify all the details of this, so the
48
+
128
-| algorithm is target-specific.
49
+ /*
129
-| The routine is passed various bits of information about the
50
+ * Special case clearing the entire ZA space.
130
-| two NaNs and should return 0 to select NaN a and 1 for NaN b.
51
+ * This falls into the CONSTRAINED UNPREDICTABLE zeroing of any
131
-| Note that signalling NaNs are always squashed to quiet NaNs
52
+ * parts of the ZA storage outside of SVL.
132
-| by the caller, by calling floatXX_silence_nan() before
53
+ */
133
-| returning them.
54
+ if (imm == 0xff) {
134
-|
55
+ memset(env->zarray, 0, sizeof(env->zarray));
135
-| aIsLargerSignificand is only valid if both a and b are NaNs
56
+ return;
136
-| of some kind, and is true if a has the larger significand,
57
+ }
137
-| or if both a and b have the same significand but a is
58
+
138
-| positive but b is negative. It is only needed for the x87
59
+ /*
139
-| tie-break rule.
60
+ * Recall that ZAnH.D[m] is spread across ZA[n+8*m],
140
-*----------------------------------------------------------------------------*/
61
+ * so each row is discontiguous within ZA[].
141
-
62
+ */
142
-static int pickNaN(FloatClass a_cls, FloatClass b_cls,
63
+ for (i = 0; i < svl; i++) {
143
- bool aIsLargerSignificand, float_status *status)
64
+ if (imm & (1 << (i % 8))) {
144
-{
65
+ memset(&env->zarray[i], 0, svl);
145
- /*
66
+ }
146
- * We guarantee not to require the target to tell us how to
67
+ }
147
- * pick a NaN if we're always returning the default NaN.
68
+}
148
- * But if we're not in default-NaN mode then the target must
69
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
149
- * specify via set_float_2nan_prop_rule().
70
index XXXXXXX..XXXXXXX 100644
150
- */
71
--- a/target/arm/translate-sme.c
151
- assert(!status->default_nan_mode);
72
+++ b/target/arm/translate-sme.c
152
-
73
@@ -XXX,XX +XXX,XX @@
153
- switch (status->float_2nan_prop_rule) {
74
*/
154
- case float_2nan_prop_s_ab:
75
155
- if (is_snan(a_cls)) {
76
#include "decode-sme.c.inc"
156
- return 0;
77
+
157
- } else if (is_snan(b_cls)) {
78
+
158
- return 1;
79
+static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
159
- } else if (is_qnan(a_cls)) {
80
+{
160
- return 0;
81
+ if (!dc_isar_feature(aa64_sme, s)) {
161
- } else {
82
+ return false;
162
- return 1;
83
+ }
163
- }
84
+ if (sme_za_enabled_check(s)) {
164
- break;
85
+ gen_helper_sme_zero(cpu_env, tcg_constant_i32(a->imm),
165
- case float_2nan_prop_s_ba:
86
+ tcg_constant_i32(streaming_vec_reg_size(s)));
166
- if (is_snan(b_cls)) {
87
+ }
167
- return 1;
88
+ return true;
168
- } else if (is_snan(a_cls)) {
89
+}
169
- return 0;
170
- } else if (is_qnan(b_cls)) {
171
- return 1;
172
- } else {
173
- return 0;
174
- }
175
- break;
176
- case float_2nan_prop_ab:
177
- if (is_nan(a_cls)) {
178
- return 0;
179
- } else {
180
- return 1;
181
- }
182
- break;
183
- case float_2nan_prop_ba:
184
- if (is_nan(b_cls)) {
185
- return 1;
186
- } else {
187
- return 0;
188
- }
189
- break;
190
- case float_2nan_prop_x87:
191
- /*
192
- * This implements x87 NaN propagation rules:
193
- * SNaN + QNaN => return the QNaN
194
- * two SNaNs => return the one with the larger significand, silenced
195
- * two QNaNs => return the one with the larger significand
196
- * SNaN and a non-NaN => return the SNaN, silenced
197
- * QNaN and a non-NaN => return the QNaN
198
- *
199
- * If we get down to comparing significands and they are the same,
200
- * return the NaN with the positive sign bit (if any).
201
- */
202
- if (is_snan(a_cls)) {
203
- if (is_snan(b_cls)) {
204
- return aIsLargerSignificand ? 0 : 1;
205
- }
206
- return is_qnan(b_cls) ? 1 : 0;
207
- } else if (is_qnan(a_cls)) {
208
- if (is_snan(b_cls) || !is_qnan(b_cls)) {
209
- return 0;
210
- } else {
211
- return aIsLargerSignificand ? 0 : 1;
212
- }
213
- } else {
214
- return 1;
215
- }
216
- default:
217
- g_assert_not_reached();
218
- }
219
-}
220
-
221
/*----------------------------------------------------------------------------
222
| Returns 1 if the double-precision floating-point value `a' is a quiet
223
| NaN; otherwise returns 0.
90
--
224
--
91
2.25.1
225
2.34.1
226
227
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Mark these as a non-streaming instructions, which should trap
3
Remember if there was an SNaN, and use that to simplify
4
if full a64 support is not enabled in streaming mode.
4
float_2nan_prop_s_{ab,ba} to only the snan component.
5
Then, fall through to the corresponding
6
float_2nan_prop_{ab,ba} case to handle any remaining
7
nans, which must be quiet.
5
8
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20241203203949.483774-10-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-10-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
13
---
11
target/arm/sme-fa64.decode | 1 -
14
fpu/softfloat-parts.c.inc | 32 ++++++++++++--------------------
12
target/arm/translate-sve.c | 12 ++++++------
15
1 file changed, 12 insertions(+), 20 deletions(-)
13
2 files changed, 6 insertions(+), 7 deletions(-)
14
16
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
17
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
16
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
19
--- a/fpu/softfloat-parts.c.inc
18
+++ b/target/arm/sme-fa64.decode
20
+++ b/fpu/softfloat-parts.c.inc
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
21
@@ -XXX,XX +XXX,XX @@ static void partsN(return_nan)(FloatPartsN *a, float_status *s)
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
22
static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
23
float_status *s)
22
24
{
23
-FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
25
+ bool have_snan = false;
24
FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
26
int cmp, which;
25
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
27
26
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
28
if (is_snan(a->cls) || is_snan(b->cls)) {
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
29
float_raise(float_flag_invalid | float_flag_invalid_snan, s);
28
index XXXXXXX..XXXXXXX 100644
30
+ have_snan = true;
29
--- a/target/arm/translate-sve.c
31
}
30
+++ b/target/arm/translate-sve.c
32
31
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true)
33
if (s->default_nan_mode) {
32
TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false)
34
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
33
TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true)
35
34
36
switch (s->float_2nan_prop_rule) {
35
-TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
37
case float_2nan_prop_s_ab:
36
- gen_helper_gvec_smmla_b, a, 0)
38
- if (is_snan(a->cls)) {
37
-TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
39
- which = 0;
38
- gen_helper_gvec_usmmla_b, a, 0)
40
- } else if (is_snan(b->cls)) {
39
-TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
41
- which = 1;
40
- gen_helper_gvec_ummla_b, a, 0)
42
- } else if (is_qnan(a->cls)) {
41
+TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
43
- which = 0;
42
+ gen_helper_gvec_smmla_b, a, 0)
44
- } else {
43
+TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
45
- which = 1;
44
+ gen_helper_gvec_usmmla_b, a, 0)
46
+ if (have_snan) {
45
+TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
47
+ which = is_snan(a->cls) ? 0 : 1;
46
+ gen_helper_gvec_ummla_b, a, 0)
48
+ break;
47
49
}
48
TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
50
- break;
49
gen_helper_gvec_bfdot, a, 0)
51
- case float_2nan_prop_s_ba:
52
- if (is_snan(b->cls)) {
53
- which = 1;
54
- } else if (is_snan(a->cls)) {
55
- which = 0;
56
- } else if (is_qnan(b->cls)) {
57
- which = 1;
58
- } else {
59
- which = 0;
60
- }
61
- break;
62
+ /* fall through */
63
case float_2nan_prop_ab:
64
which = is_nan(a->cls) ? 0 : 1;
65
break;
66
+ case float_2nan_prop_s_ba:
67
+ if (have_snan) {
68
+ which = is_snan(b->cls) ? 1 : 0;
69
+ break;
70
+ }
71
+ /* fall through */
72
case float_2nan_prop_ba:
73
which = is_nan(b->cls) ? 1 : 0;
74
break;
50
--
75
--
51
2.25.1
76
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
We can handle both exception entry and exception return by
3
Move the fractional comparison to the end of the
4
hooking into aarch64_sve_change_el.
4
float_2nan_prop_x87 case. This is not required for
5
any other 2nan propagation rule. Reorganize the
6
x87 case itself to break out of the switch when the
7
fractional comparison is not required.
5
8
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20241203203949.483774-11-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-32-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
13
---
11
target/arm/helper.c | 15 +++++++++++++--
14
fpu/softfloat-parts.c.inc | 19 +++++++++----------
12
1 file changed, 13 insertions(+), 2 deletions(-)
15
1 file changed, 9 insertions(+), 10 deletions(-)
13
16
14
diff --git a/target/arm/helper.c b/target/arm/helper.c
17
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
15
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper.c
19
--- a/fpu/softfloat-parts.c.inc
17
+++ b/target/arm/helper.c
20
+++ b/fpu/softfloat-parts.c.inc
18
@@ -XXX,XX +XXX,XX @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
21
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
19
return;
22
return a;
20
}
23
}
21
24
22
+ old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
25
- cmp = frac_cmp(a, b);
23
+ new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
26
- if (cmp == 0) {
24
+
27
- cmp = a->sign < b->sign;
25
+ /*
28
- }
26
+ * Both AArch64.TakeException and AArch64.ExceptionReturn
29
-
27
+ * invoke ResetSVEState when taking an exception from, or
30
switch (s->float_2nan_prop_rule) {
28
+ * returning to, AArch32 state when PSTATE.SM is enabled.
31
case float_2nan_prop_s_ab:
29
+ */
32
if (have_snan) {
30
+ if (old_a64 != new_a64 && FIELD_EX64(env->svcr, SVCR, SM)) {
33
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
31
+ arm_reset_sve_state(env);
34
* return the NaN with the positive sign bit (if any).
32
+ return;
35
*/
33
+ }
36
if (is_snan(a->cls)) {
34
+
37
- if (is_snan(b->cls)) {
35
/*
38
- which = cmp > 0 ? 0 : 1;
36
* DDI0584A.d sec 3.2: "If SVE instructions are disabled or trapped
39
- } else {
37
* at ELx, or not available because the EL is in AArch32 state, then
40
+ if (!is_snan(b->cls)) {
38
@@ -XXX,XX +XXX,XX @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
41
which = is_qnan(b->cls) ? 1 : 0;
39
* we already have the correct register contents when encountering the
42
+ break;
40
* vq0->vq0 transition between EL0->EL1.
43
}
41
*/
44
} else if (is_qnan(a->cls)) {
42
- old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
45
if (is_snan(b->cls) || !is_qnan(b->cls)) {
43
old_len = (old_a64 && !sve_exception_el(env, old_el)
46
which = 0;
44
? sve_vqm1_for_el(env, old_el) : 0);
47
- } else {
45
- new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
48
- which = cmp > 0 ? 0 : 1;
46
new_len = (new_a64 && !sve_exception_el(env, new_el)
49
+ break;
47
? sve_vqm1_for_el(env, new_el) : 0);
50
}
48
51
} else {
52
which = 1;
53
+ break;
54
}
55
+ cmp = frac_cmp(a, b);
56
+ if (cmp == 0) {
57
+ cmp = a->sign < b->sign;
58
+ }
59
+ which = cmp > 0 ? 0 : 1;
60
break;
61
default:
62
g_assert_not_reached();
49
--
63
--
50
2.25.1
64
2.34.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This new behaviour is in the ARM pseudocode function
3
Replace the "index" selecting between A and B with a result variable
4
AArch64.CheckFPAdvSIMDEnabled, which applies to AArch32
4
of the proper type. This improves clarity within the function.
5
via AArch32.CheckAdvSIMDOrFPEnabled when the EL to which
6
the trap would be delivered is in AArch64 mode.
7
5
8
Given that ARMv9 drops support for AArch32 outside EL0, the trap EL
9
detection ought to be trivially true, but the pseudocode still contains
10
a number of conditions, and QEMU has not yet committed to dropping A32
11
support for EL[12] when v9 features are present.
12
13
Since the computation of SME_TRAP_NONSTREAMING is necessarily different
14
for the two modes, we might as well preserve bits within TBFLAG_ANY and
15
allocate separate bits within TBFLAG_A32 and TBFLAG_A64 instead.
16
17
Note that DDI0616A.a has typos for bits [22:21] of LD1RO in the table
18
of instructions illegal in streaming mode.
19
20
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
21
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
22
Message-id: 20220708151540.18136-4-richard.henderson@linaro.org
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Message-id: 20241203203949.483774-12-richard.henderson@linaro.org
23
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
24
---
10
---
25
target/arm/cpu.h | 7 +++
11
fpu/softfloat-parts.c.inc | 28 +++++++++++++---------------
26
target/arm/translate.h | 4 ++
12
1 file changed, 13 insertions(+), 15 deletions(-)
27
target/arm/sme-fa64.decode | 90 ++++++++++++++++++++++++++++++++++++++
28
target/arm/helper.c | 41 +++++++++++++++++
29
target/arm/translate-a64.c | 40 ++++++++++++++++-
30
target/arm/translate-vfp.c | 12 +++++
31
target/arm/translate.c | 2 +
32
target/arm/meson.build | 1 +
33
8 files changed, 195 insertions(+), 2 deletions(-)
34
create mode 100644 target/arm/sme-fa64.decode
35
13
36
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
14
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
37
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/cpu.h
16
--- a/fpu/softfloat-parts.c.inc
39
+++ b/target/arm/cpu.h
17
+++ b/fpu/softfloat-parts.c.inc
40
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A32, HSTR_ACTIVE, 9, 1)
18
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
41
* the same thing as the current security state of the processor!
19
float_status *s)
42
*/
20
{
43
FIELD(TBFLAG_A32, NS, 10, 1)
21
bool have_snan = false;
44
+/*
22
- int cmp, which;
45
+ * Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not.
23
+ FloatPartsN *ret;
46
+ * This requires an SME trap from AArch32 mode when using NEON.
24
+ int cmp;
47
+ */
25
48
+FIELD(TBFLAG_A32, SME_TRAP_NONSTREAMING, 11, 1)
26
if (is_snan(a->cls) || is_snan(b->cls)) {
49
27
float_raise(float_flag_invalid | float_flag_invalid_snan, s);
50
/*
28
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
51
* Bit usage when in AArch32 state, for M-profile only.
29
switch (s->float_2nan_prop_rule) {
52
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, SMEEXC_EL, 20, 2)
30
case float_2nan_prop_s_ab:
53
FIELD(TBFLAG_A64, PSTATE_SM, 22, 1)
31
if (have_snan) {
54
FIELD(TBFLAG_A64, PSTATE_ZA, 23, 1)
32
- which = is_snan(a->cls) ? 0 : 1;
55
FIELD(TBFLAG_A64, SVL, 24, 4)
33
+ ret = is_snan(a->cls) ? a : b;
56
+/* Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not. */
34
break;
57
+FIELD(TBFLAG_A64, SME_TRAP_NONSTREAMING, 28, 1)
58
59
/*
60
* Helpers for using the above.
61
diff --git a/target/arm/translate.h b/target/arm/translate.h
62
index XXXXXXX..XXXXXXX 100644
63
--- a/target/arm/translate.h
64
+++ b/target/arm/translate.h
65
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
66
bool pstate_sm;
67
/* True if PSTATE.ZA is set. */
68
bool pstate_za;
69
+ /* True if non-streaming insns should raise an SME Streaming exception. */
70
+ bool sme_trap_nonstreaming;
71
+ /* True if the current instruction is non-streaming. */
72
+ bool is_nonstreaming;
73
/* True if MVE insns are definitely not predicated by VPR or LTPSIZE */
74
bool mve_no_pred;
75
/*
76
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
77
new file mode 100644
78
index XXXXXXX..XXXXXXX
79
--- /dev/null
80
+++ b/target/arm/sme-fa64.decode
81
@@ -XXX,XX +XXX,XX @@
82
+# AArch64 SME allowed instruction decoding
83
+#
84
+# Copyright (c) 2022 Linaro, Ltd
85
+#
86
+# This library is free software; you can redistribute it and/or
87
+# modify it under the terms of the GNU Lesser General Public
88
+# License as published by the Free Software Foundation; either
89
+# version 2.1 of the License, or (at your option) any later version.
90
+#
91
+# This library is distributed in the hope that it will be useful,
92
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
93
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
94
+# Lesser General Public License for more details.
95
+#
96
+# You should have received a copy of the GNU Lesser General Public
97
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
98
+
99
+#
100
+# This file is processed by scripts/decodetree.py
101
+#
102
+
103
+# These patterns are taken from Appendix E1.1 of DDI0616 A.a,
104
+# Arm Architecture Reference Manual Supplement,
105
+# The Scalable Matrix Extension (SME), for Armv9-A
106
+
107
+{
108
+ [
109
+ OK 0-00 1110 0000 0001 0010 11-- ---- ---- # SMOV W|Xd,Vn.B[0]
110
+ OK 0-00 1110 0000 0010 0010 11-- ---- ---- # SMOV W|Xd,Vn.H[0]
111
+ OK 0100 1110 0000 0100 0010 11-- ---- ---- # SMOV Xd,Vn.S[0]
112
+ OK 0000 1110 0000 0001 0011 11-- ---- ---- # UMOV Wd,Vn.B[0]
113
+ OK 0000 1110 0000 0010 0011 11-- ---- ---- # UMOV Wd,Vn.H[0]
114
+ OK 0000 1110 0000 0100 0011 11-- ---- ---- # UMOV Wd,Vn.S[0]
115
+ OK 0100 1110 0000 1000 0011 11-- ---- ---- # UMOV Xd,Vn.D[0]
116
+ ]
117
+ FAIL 0--0 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD vector operations
118
+}
119
+
120
+{
121
+ [
122
+ OK 0101 1110 --1- ---- 11-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar)
123
+ OK 0101 1110 -10- ---- 00-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar, FP16)
124
+ OK 01-1 1110 1-10 0001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar)
125
+ OK 01-1 1110 1111 1001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar, FP16)
126
+ ]
127
+ FAIL 01-1 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD single-element operations
128
+}
129
+
130
+FAIL 0-00 110- ---- ---- ---- ---- ---- ---- # Advanced SIMD structure load/store
131
+FAIL 1100 1110 ---- ---- ---- ---- ---- ---- # Advanced SIMD cryptography extensions
132
+FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
133
+
134
+# These are the "avoidance of doubt" final table of Illegal Advanced SIMD instructions
135
+# We don't actually need to include these, as the default is OK.
136
+# -001 111- ---- ---- ---- ---- ---- ---- # Scalar floating-point operations
137
+# --10 110- ---- ---- ---- ---- ---- ---- # Load/store pair of FP registers
138
+# --01 1100 ---- ---- ---- ---- ---- ---- # Load FP register (PC-relative literal)
139
+# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm)
140
+# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
141
+# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
142
+
143
+FAIL 0000 0100 --1- ---- 1010 ---- ---- ---- # ADR
144
+FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
145
+FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
146
+FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
147
+FAIL 0010 0101 --10 1--- 1001 ---- ---- ---- # WRFFR, SETFFR
148
+FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
149
+FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
150
+FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
151
+FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
152
+FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
153
+FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
154
+FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
155
+FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
156
+FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
157
+FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
158
+FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
159
+FAIL 1000 010- -01- ---- 1--- ---- ---- ---- # SVE 32-bit gather load (vector+imm)
160
+FAIL 1000 0100 0-0- ---- 0--- ---- ---- ---- # SVE 32-bit gather load byte (scalar+vector)
161
+FAIL 1000 0100 1--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load half (scalar+vector)
162
+FAIL 1000 0101 0--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load word (scalar+vector)
163
+FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
164
+FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
165
+FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
166
+FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
167
+FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
168
+FAIL 1110 010- -00- ---- 001- ---- ---- ---- # SVE2 64-bit scatter NT store (vector+scalar)
169
+FAIL 1110 010- -10- ---- 001- ---- ---- ---- # SVE2 32-bit scatter NT store (vector+scalar)
170
+FAIL 1110 010- ---- ---- 1-0- ---- ---- ---- # SVE scatter store (scalar+32-bit vector)
171
+FAIL 1110 010- ---- ---- 101- ---- ---- ---- # SVE scatter store (misc)
172
diff --git a/target/arm/helper.c b/target/arm/helper.c
173
index XXXXXXX..XXXXXXX 100644
174
--- a/target/arm/helper.c
175
+++ b/target/arm/helper.c
176
@@ -XXX,XX +XXX,XX @@ int sme_exception_el(CPUARMState *env, int el)
177
return 0;
178
}
179
180
+/* This corresponds to the ARM pseudocode function IsFullA64Enabled(). */
181
+static bool sme_fa64(CPUARMState *env, int el)
182
+{
183
+ if (!cpu_isar_feature(aa64_sme_fa64, env_archcpu(env))) {
184
+ return false;
185
+ }
186
+
187
+ if (el <= 1 && !el_is_in_host(env, el)) {
188
+ if (!FIELD_EX64(env->vfp.smcr_el[1], SMCR, FA64)) {
189
+ return false;
190
+ }
191
+ }
192
+ if (el <= 2 && arm_is_el2_enabled(env)) {
193
+ if (!FIELD_EX64(env->vfp.smcr_el[2], SMCR, FA64)) {
194
+ return false;
195
+ }
196
+ }
197
+ if (arm_feature(env, ARM_FEATURE_EL3)) {
198
+ if (!FIELD_EX64(env->vfp.smcr_el[3], SMCR, FA64)) {
199
+ return false;
200
+ }
201
+ }
202
+
203
+ return true;
204
+}
205
+
206
/*
207
* Given that SVE is enabled, return the vector length for EL.
208
*/
209
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el,
210
DP_TBFLAG_ANY(flags, PSTATE__IL, 1);
211
}
212
213
+ /*
214
+ * The SME exception we are testing for is raised via
215
+ * AArch64.CheckFPAdvSIMDEnabled(), as called from
216
+ * AArch32.CheckAdvSIMDOrFPEnabled().
217
+ */
218
+ if (el == 0
219
+ && FIELD_EX64(env->svcr, SVCR, SM)
220
+ && (!arm_is_el2_enabled(env)
221
+ || (arm_el_is_aa64(env, 2) && !(env->cp15.hcr_el2 & HCR_TGE)))
222
+ && arm_el_is_aa64(env, 1)
223
+ && !sme_fa64(env, el)) {
224
+ DP_TBFLAG_A32(flags, SME_TRAP_NONSTREAMING, 1);
225
+ }
226
+
227
return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
228
}
229
230
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
231
}
35
}
232
if (FIELD_EX64(env->svcr, SVCR, SM)) {
36
/* fall through */
233
DP_TBFLAG_A64(flags, PSTATE_SM, 1);
37
case float_2nan_prop_ab:
234
+ DP_TBFLAG_A64(flags, SME_TRAP_NONSTREAMING, !sme_fa64(env, el));
38
- which = is_nan(a->cls) ? 0 : 1;
39
+ ret = is_nan(a->cls) ? a : b;
40
break;
41
case float_2nan_prop_s_ba:
42
if (have_snan) {
43
- which = is_snan(b->cls) ? 1 : 0;
44
+ ret = is_snan(b->cls) ? b : a;
45
break;
235
}
46
}
236
DP_TBFLAG_A64(flags, PSTATE_ZA, FIELD_EX64(env->svcr, SVCR, ZA));
47
/* fall through */
237
}
48
case float_2nan_prop_ba:
238
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
49
- which = is_nan(b->cls) ? 1 : 0;
239
index XXXXXXX..XXXXXXX 100644
50
+ ret = is_nan(b->cls) ? b : a;
240
--- a/target/arm/translate-a64.c
51
break;
241
+++ b/target/arm/translate-a64.c
52
case float_2nan_prop_x87:
242
@@ -XXX,XX +XXX,XX @@ static void do_vec_ld(DisasContext *s, int destidx, int element,
53
/*
243
* unallocated-encoding checks (otherwise the syndrome information
54
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
244
* for the resulting exception will be incorrect).
55
*/
245
*/
56
if (is_snan(a->cls)) {
246
-static bool fp_access_check(DisasContext *s)
57
if (!is_snan(b->cls)) {
247
+static bool fp_access_check_only(DisasContext *s)
58
- which = is_qnan(b->cls) ? 1 : 0;
248
{
59
+ ret = is_qnan(b->cls) ? b : a;
249
if (s->fp_excp_el) {
60
break;
250
assert(!s->fp_access_checked);
61
}
251
@@ -XXX,XX +XXX,XX @@ static bool fp_access_check(DisasContext *s)
62
} else if (is_qnan(a->cls)) {
252
return true;
63
if (is_snan(b->cls) || !is_qnan(b->cls)) {
253
}
64
- which = 0;
254
65
+ ret = a;
255
+static bool fp_access_check(DisasContext *s)
66
break;
256
+{
67
}
257
+ if (!fp_access_check_only(s)) {
68
} else {
258
+ return false;
69
- which = 1;
259
+ }
70
+ ret = b;
260
+ if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
71
break;
261
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
72
}
262
+ syn_smetrap(SME_ET_Streaming, false));
73
cmp = frac_cmp(a, b);
263
+ return false;
74
if (cmp == 0) {
264
+ }
75
cmp = a->sign < b->sign;
265
+ return true;
76
}
266
+}
77
- which = cmp > 0 ? 0 : 1;
267
+
78
+ ret = cmp > 0 ? a : b;
268
/* Check that SVE access is enabled. If it is, return true.
79
break;
269
* If not, emit code to generate an appropriate exception and return false.
270
*/
271
@@ -XXX,XX +XXX,XX @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
272
default:
80
default:
273
g_assert_not_reached();
81
g_assert_not_reached();
274
}
82
}
275
- if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
83
276
+ if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
84
- if (which) {
277
return;
85
- a = b;
278
} else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
86
+ if (is_snan(ret->cls)) {
279
return;
87
+ parts_silence_nan(ret, s);
280
@@ -XXX,XX +XXX,XX @@ static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
281
}
88
}
89
- if (is_snan(a->cls)) {
90
- parts_silence_nan(a, s);
91
- }
92
- return a;
93
+ return ret;
282
}
94
}
283
95
284
+/*
96
static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
285
+ * Include the generated SME FA64 decoder.
286
+ */
287
+
288
+#include "decode-sme-fa64.c.inc"
289
+
290
+static bool trans_OK(DisasContext *s, arg_OK *a)
291
+{
292
+ return true;
293
+}
294
+
295
+static bool trans_FAIL(DisasContext *s, arg_OK *a)
296
+{
297
+ s->is_nonstreaming = true;
298
+ return true;
299
+}
300
+
301
/**
302
* is_guarded_page:
303
* @env: The cpu environment
304
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
305
dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
306
dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
307
dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
308
+ dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
309
dc->vec_len = 0;
310
dc->vec_stride = 0;
311
dc->cp_regs = arm_cpu->cp_regs;
312
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
313
}
314
}
315
316
+ s->is_nonstreaming = false;
317
+ if (s->sme_trap_nonstreaming) {
318
+ disas_sme_fa64(s, insn);
319
+ }
320
+
321
switch (extract32(insn, 25, 4)) {
322
case 0x0:
323
if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) {
324
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
325
index XXXXXXX..XXXXXXX 100644
326
--- a/target/arm/translate-vfp.c
327
+++ b/target/arm/translate-vfp.c
328
@@ -XXX,XX +XXX,XX @@ static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled)
329
return false;
330
}
331
332
+ /*
333
+ * Note that rebuild_hflags_a32 has already accounted for being in EL0
334
+ * and the higher EL in A64 mode, etc. Unlike A64 mode, there do not
335
+ * appear to be any insns which touch VFP which are allowed.
336
+ */
337
+ if (s->sme_trap_nonstreaming) {
338
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
339
+ syn_smetrap(SME_ET_Streaming,
340
+ s->base.pc_next - s->pc_curr == 2));
341
+ return false;
342
+ }
343
+
344
if (!s->vfp_enabled && !ignore_vfp_enabled) {
345
assert(!arm_dc_feature(s, ARM_FEATURE_M));
346
unallocated_encoding(s);
347
diff --git a/target/arm/translate.c b/target/arm/translate.c
348
index XXXXXXX..XXXXXXX 100644
349
--- a/target/arm/translate.c
350
+++ b/target/arm/translate.c
351
@@ -XXX,XX +XXX,XX @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
352
dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
353
dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
354
}
355
+ dc->sme_trap_nonstreaming =
356
+ EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
357
}
358
dc->cp_regs = cpu->cp_regs;
359
dc->features = env->features;
360
diff --git a/target/arm/meson.build b/target/arm/meson.build
361
index XXXXXXX..XXXXXXX 100644
362
--- a/target/arm/meson.build
363
+++ b/target/arm/meson.build
364
@@ -XXX,XX +XXX,XX @@
365
gen = [
366
decodetree.process('sve.decode', extra_args: '--decode=disas_sve'),
367
decodetree.process('sme.decode', extra_args: '--decode=disas_sme'),
368
+ decodetree.process('sme-fa64.decode', extra_args: '--static-decode=disas_sme_fa64'),
369
decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'),
370
decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'),
371
decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'),
372
--
97
--
373
2.25.1
98
2.34.1
99
100
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Leif Lindholm <quic_llindhol@quicinc.com>
2
2
3
Mark these as a non-streaming instructions, which should trap
3
I'm migrating to Qualcomm's new open source email infrastructure, so
4
if full a64 support is not enabled in streaming mode.
4
update my email address, and update the mailmap to match.
5
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Leif Lindholm <leif.lindholm@oss.qualcomm.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Leif Lindholm <quic_llindhol@quicinc.com>
8
Message-id: 20220708151540.18136-8-richard.henderson@linaro.org
8
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
11
Message-id: 20241205114047.1125842-1-leif.lindholm@oss.qualcomm.com
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
13
---
11
target/arm/sme-fa64.decode | 2 --
14
MAINTAINERS | 2 +-
12
target/arm/translate-sve.c | 24 +++++++++++++++---------
15
.mailmap | 5 +++--
13
2 files changed, 15 insertions(+), 11 deletions(-)
16
2 files changed, 4 insertions(+), 3 deletions(-)
14
17
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
18
diff --git a/MAINTAINERS b/MAINTAINERS
16
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
20
--- a/MAINTAINERS
18
+++ b/target/arm/sme-fa64.decode
21
+++ b/MAINTAINERS
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
22
@@ -XXX,XX +XXX,XX @@ F: include/hw/ssi/imx_spi.h
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
23
SBSA-REF
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
24
M: Radoslaw Biernacki <rad@semihalf.com>
22
25
M: Peter Maydell <peter.maydell@linaro.org>
23
-FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
26
-R: Leif Lindholm <quic_llindhol@quicinc.com>
24
-FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
27
+R: Leif Lindholm <leif.lindholm@oss.qualcomm.com>
25
FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
28
R: Marcin Juszkiewicz <marcin.juszkiewicz@linaro.org>
26
FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
29
L: qemu-arm@nongnu.org
27
FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
30
S: Maintained
28
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
31
diff --git a/.mailmap b/.mailmap
29
index XXXXXXX..XXXXXXX 100644
32
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/translate-sve.c
33
--- a/.mailmap
31
+++ b/target/arm/translate-sve.c
34
+++ b/.mailmap
32
@@ -XXX,XX +XXX,XX @@ static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
35
@@ -XXX,XX +XXX,XX @@ Huacai Chen <chenhuacai@kernel.org> <chenhc@lemote.com>
33
gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
36
Huacai Chen <chenhuacai@kernel.org> <chenhuacai@loongson.cn>
34
NULL, gen_helper_sve2_pmull_d,
37
James Hogan <jhogan@kernel.org> <james.hogan@imgtec.com>
35
};
38
Juan Quintela <quintela@trasno.org> <quintela@redhat.com>
36
- if (a->esz == 0
39
-Leif Lindholm <quic_llindhol@quicinc.com> <leif.lindholm@linaro.org>
37
- ? !dc_isar_feature(aa64_sve2_pmull128, s)
40
-Leif Lindholm <quic_llindhol@quicinc.com> <leif@nuviainc.com>
38
- : !dc_isar_feature(aa64_sve, s)) {
41
+Leif Lindholm <leif.lindholm@oss.qualcomm.com> <quic_llindhol@quicinc.com>
39
+
42
+Leif Lindholm <leif.lindholm@oss.qualcomm.com> <leif.lindholm@linaro.org>
40
+ if (a->esz == 0) {
43
+Leif Lindholm <leif.lindholm@oss.qualcomm.com> <leif@nuviainc.com>
41
+ if (!dc_isar_feature(aa64_sve2_pmull128, s)) {
44
Luc Michel <luc@lmichel.fr> <luc.michel@git.antfield.fr>
42
+ return false;
45
Luc Michel <luc@lmichel.fr> <luc.michel@greensocs.com>
43
+ }
46
Luc Michel <luc@lmichel.fr> <lmichel@kalray.eu>
44
+ s->is_nonstreaming = true;
45
+ } else if (!dc_isar_feature(aa64_sve, s)) {
46
return false;
47
}
48
return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
49
@@ -XXX,XX +XXX,XX @@ DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
50
* SVE Integer Multiply-Add (unpredicated)
51
*/
52
53
-TRANS_FEAT(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_s,
54
- a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
55
-TRANS_FEAT(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_d,
56
- a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
57
+TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz,
58
+ gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra,
59
+ 0, FPST_FPCR)
60
+TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz,
61
+ gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra,
62
+ 0, FPST_FPCR)
63
64
static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
65
NULL, gen_helper_sve2_sqdmlal_zzzw_h,
66
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
67
TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
68
gen_helper_gvec_bfdot_idx, a)
69
70
-TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
71
- gen_helper_gvec_bfmmla, a, 0)
72
+TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
73
+ gen_helper_gvec_bfmmla, a, 0)
74
75
static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
76
{
77
--
47
--
78
2.25.1
48
2.34.1
49
50
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Vikram Garhwal <vikram.garhwal@bytedance.com>
2
2
3
Mark these as a non-streaming instructions, which should trap
3
Previously, maintainer role was paused due to inactive email id. Commit id:
4
if full a64 support is not enabled in streaming mode.
4
c009d715721861984c4987bcc78b7ee183e86d75.
5
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Vikram Garhwal <vikram.garhwal@bytedance.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Francisco Iglesias <francisco.iglesias@amd.com>
8
Message-id: 20220708151540.18136-7-richard.henderson@linaro.org
8
Message-id: 20241204184205.12952-1-vikram.garhwal@bytedance.com
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
target/arm/sme-fa64.decode | 3 ---
11
MAINTAINERS | 2 ++
12
target/arm/translate-sve.c | 22 ++++++++++++----------
12
1 file changed, 2 insertions(+)
13
2 files changed, 12 insertions(+), 13 deletions(-)
14
13
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
14
diff --git a/MAINTAINERS b/MAINTAINERS
16
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
16
--- a/MAINTAINERS
18
+++ b/target/arm/sme-fa64.decode
17
+++ b/MAINTAINERS
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
18
@@ -XXX,XX +XXX,XX @@ F: tests/qtest/fuzz-sb16-test.c
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
19
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
20
Xilinx CAN
22
21
M: Francisco Iglesias <francisco.iglesias@amd.com>
23
-FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
22
+M: Vikram Garhwal <vikram.garhwal@bytedance.com>
24
-FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
23
S: Maintained
25
-FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
24
F: hw/net/can/xlnx-*
26
FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
25
F: include/hw/net/xlnx-*
27
FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
26
@@ -XXX,XX +XXX,XX @@ F: include/hw/rx/
28
FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
27
CAN bus subsystem and hardware
29
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
28
M: Pavel Pisa <pisa@cmp.felk.cvut.cz>
30
index XXXXXXX..XXXXXXX 100644
29
M: Francisco Iglesias <francisco.iglesias@amd.com>
31
--- a/target/arm/translate-sve.c
30
+M: Vikram Garhwal <vikram.garhwal@bytedance.com>
32
+++ b/target/arm/translate-sve.c
31
S: Maintained
33
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2 * const fexpa_fns[4] = {
32
W: https://canbus.pages.fel.cvut.cz/
34
NULL, gen_helper_sve_fexpa_h,
33
F: net/can/*
35
gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
36
};
37
-TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
38
- fexpa_fns[a->esz], a->rd, a->rn, 0)
39
+TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
40
+ fexpa_fns[a->esz], a->rd, a->rn, 0)
41
42
static gen_helper_gvec_3 * const ftssel_fns[4] = {
43
NULL, gen_helper_sve_ftssel_h,
44
gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
45
};
46
-TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
47
+TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
48
+ ftssel_fns[a->esz], a, 0)
49
50
/*
51
*** SVE Predicate Logical Operations Group
52
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
53
static gen_helper_gvec_3 * const compact_fns[4] = {
54
NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
55
};
56
-TRANS_FEAT(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, compact_fns[a->esz], a, 0)
57
+TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz,
58
+ compact_fns[a->esz], a, 0)
59
60
/* Call the helper that computes the ARM LastActiveElement pseudocode
61
* function, scaled by the element size. This includes the not found
62
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const bext_fns[4] = {
63
gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
64
gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
65
};
66
-TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
67
- bext_fns[a->esz], a, 0)
68
+TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
69
+ bext_fns[a->esz], a, 0)
70
71
static gen_helper_gvec_3 * const bdep_fns[4] = {
72
gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
73
gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
74
};
75
-TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
76
- bdep_fns[a->esz], a, 0)
77
+TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
78
+ bdep_fns[a->esz], a, 0)
79
80
static gen_helper_gvec_3 * const bgrp_fns[4] = {
81
gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
82
gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
83
};
84
-TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
85
- bgrp_fns[a->esz], a, 0)
86
+TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
87
+ bgrp_fns[a->esz], a, 0)
88
89
static gen_helper_gvec_3 * const cadd_fns[4] = {
90
gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
91
--
34
--
92
2.25.1
35
2.34.1
diff view generated by jsdifflib