1
The following changes since commit 553cf5d7c47bee05a3dec9461c1f8430316d516b:
1
The following changes since commit c5ea91da443b458352c1b629b490ee6631775cb4:
2
2
3
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200626' into staging (2020-06-26 18:22:36 +0100)
3
Merge tag 'pull-trivial-patches' of https://gitlab.com/mjt0k/qemu into staging (2023-09-08 10:06:25 -0400)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
git@github.com:alistair23/qemu.git tags/pull-riscv-to-apply-20200626-1
7
https://github.com/alistair23/qemu.git tags/pull-riscv-to-apply-20230911
8
8
9
for you to fetch changes up to b39d59434ea10649fdb9e0a339c30c76e38c5e17:
9
for you to fetch changes up to e7a03409f29e2da59297d55afbaec98c96e43e3a:
10
10
11
target/riscv: configure and turn on vector extension from command line (2020-06-26 14:22:15 -0700)
11
target/riscv: don't read CSR in riscv_csrrw_do64 (2023-09-11 11:45:55 +1000)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
This PR contains two patches to improve PLIC support in QEMU.
14
First RISC-V PR for 8.2
15
15
16
The rest of the PR is adding support for the v0.7.1 RISC-V vector
16
* Remove 'host' CPU from TCG
17
extensions. This is experimental support as the vector extensions are
17
* riscv_htif Fixup printing on big endian hosts
18
still in a draft state.
18
* Add zmmul isa string
19
* Add smepmp isa string
20
* Fix page_check_range use in fault-only-first
21
* Use existing lookup tables for MixColumns
22
* Add RISC-V vector cryptographic instruction set support
23
* Implement WARL behaviour for mcountinhibit/mcounteren
24
* Add Zihintntl extension ISA string to DTS
25
* Fix zfa fleq.d and fltq.d
26
* Fix upper/lower mtime write calculation
27
* Make rtc variable names consistent
28
* Use abi type for linux-user target_ucontext
29
* Add RISC-V KVM AIA Support
30
* Fix riscv,pmu DT node path in the virt machine
31
* Update CSR bits name for svadu extension
32
* Mark zicond non-experimental
33
* Fix satp_mode_finalize() when satp_mode.supported = 0
34
* Fix non-KVM --enable-debug build
35
* Add new extensions to hwprobe
36
* Use accelerated helper for AES64KS1I
37
* Allocate itrigger timers only once
38
* Respect mseccfg.RLB for pmpaddrX changes
39
* Align the AIA model to v1.0 ratified spec
40
* Don't read the CSR in riscv_csrrw_do64
19
41
20
----------------------------------------------------------------
42
----------------------------------------------------------------
21
Jessica Clarke (2):
43
Akihiko Odaki (1):
22
riscv: plic: Honour source priorities
44
target/riscv: Allocate itrigger timers only once
23
riscv: plic: Add a couple of mising sifive_plic_update calls
24
45
25
LIU Zhiwei (61):
46
Ard Biesheuvel (2):
26
target/riscv: add vector extension field in CPURISCVState
47
target/riscv: Use existing lookup tables for MixColumns
27
target/riscv: implementation-defined constant parameters
48
target/riscv: Use accelerated helper for AES64KS1I
28
target/riscv: support vector extension csr
29
target/riscv: add vector configure instruction
30
target/riscv: add an internals.h header
31
target/riscv: add vector stride load and store instructions
32
target/riscv: add vector index load and store instructions
33
target/riscv: add fault-only-first unit stride load
34
target/riscv: add vector amo operations
35
target/riscv: vector single-width integer add and subtract
36
target/riscv: vector widening integer add and subtract
37
target/riscv: vector integer add-with-carry / subtract-with-borrow instructions
38
target/riscv: vector bitwise logical instructions
39
target/riscv: vector single-width bit shift instructions
40
target/riscv: vector narrowing integer right shift instructions
41
target/riscv: vector integer comparison instructions
42
target/riscv: vector integer min/max instructions
43
target/riscv: vector single-width integer multiply instructions
44
target/riscv: vector integer divide instructions
45
target/riscv: vector widening integer multiply instructions
46
target/riscv: vector single-width integer multiply-add instructions
47
target/riscv: vector widening integer multiply-add instructions
48
target/riscv: vector integer merge and move instructions
49
target/riscv: vector single-width saturating add and subtract
50
target/riscv: vector single-width averaging add and subtract
51
target/riscv: vector single-width fractional multiply with rounding and saturation
52
target/riscv: vector widening saturating scaled multiply-add
53
target/riscv: vector single-width scaling shift instructions
54
target/riscv: vector narrowing fixed-point clip instructions
55
target/riscv: vector single-width floating-point add/subtract instructions
56
target/riscv: vector widening floating-point add/subtract instructions
57
target/riscv: vector single-width floating-point multiply/divide instructions
58
target/riscv: vector widening floating-point multiply
59
target/riscv: vector single-width floating-point fused multiply-add instructions
60
target/riscv: vector widening floating-point fused multiply-add instructions
61
target/riscv: vector floating-point square-root instruction
62
target/riscv: vector floating-point min/max instructions
63
target/riscv: vector floating-point sign-injection instructions
64
target/riscv: vector floating-point compare instructions
65
target/riscv: vector floating-point classify instructions
66
target/riscv: vector floating-point merge instructions
67
target/riscv: vector floating-point/integer type-convert instructions
68
target/riscv: widening floating-point/integer type-convert instructions
69
target/riscv: narrowing floating-point/integer type-convert instructions
70
target/riscv: vector single-width integer reduction instructions
71
target/riscv: vector wideing integer reduction instructions
72
target/riscv: vector single-width floating-point reduction instructions
73
target/riscv: vector widening floating-point reduction instructions
74
target/riscv: vector mask-register logical instructions
75
target/riscv: vector mask population count vmpopc
76
target/riscv: vmfirst find-first-set mask bit
77
target/riscv: set-X-first mask bit
78
target/riscv: vector iota instruction
79
target/riscv: vector element index instruction
80
target/riscv: integer extract instruction
81
target/riscv: integer scalar move instruction
82
target/riscv: floating-point scalar move instructions
83
target/riscv: vector slide instructions
84
target/riscv: vector register gather instruction
85
target/riscv: vector compress instruction
86
target/riscv: configure and turn on vector extension from command line
87
49
88
target/riscv/cpu.h | 82 +-
50
Conor Dooley (1):
89
target/riscv/cpu_bits.h | 15 +
51
hw/riscv: virt: Fix riscv,pmu DT node path
90
target/riscv/helper.h | 1069 +++++++
91
target/riscv/internals.h | 41 +
92
target/riscv/insn32-64.decode | 11 +
93
target/riscv/insn32.decode | 372 +++
94
hw/riscv/sifive_plic.c | 20 +-
95
target/riscv/cpu.c | 50 +
96
target/riscv/csr.c | 75 +-
97
target/riscv/fpu_helper.c | 33 +-
98
target/riscv/insn_trans/trans_rvv.inc.c | 2888 ++++++++++++++++++
99
target/riscv/translate.c | 27 +-
100
target/riscv/vector_helper.c | 4899 +++++++++++++++++++++++++++++++
101
target/riscv/Makefile.objs | 2 +-
102
14 files changed, 9534 insertions(+), 50 deletions(-)
103
create mode 100644 target/riscv/internals.h
104
create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
105
create mode 100644 target/riscv/vector_helper.c
106
52
53
Daniel Henrique Barboza (6):
54
target/riscv/cpu.c: do not run 'host' CPU with TCG
55
target/riscv/cpu.c: add zmmul isa string
56
target/riscv/cpu.c: add smepmp isa string
57
target/riscv: fix satp_mode_finalize() when satp_mode.supported = 0
58
hw/riscv/virt.c: fix non-KVM --enable-debug build
59
hw/intc/riscv_aplic.c fix non-KVM --enable-debug build
60
61
Dickon Hood (2):
62
target/riscv: Refactor translation of vector-widening instruction
63
target/riscv: Add Zvbb ISA extension support
64
65
Jason Chien (3):
66
target/riscv: Add Zihintntl extension ISA string to DTS
67
hw/intc: Fix upper/lower mtime write calculation
68
hw/intc: Make rtc variable names consistent
69
70
Kiran Ostrolenk (4):
71
target/riscv: Refactor some of the generic vector functionality
72
target/riscv: Refactor vector-vector translation macro
73
target/riscv: Refactor some of the generic vector functionality
74
target/riscv: Add Zvknh ISA extension support
75
76
LIU Zhiwei (3):
77
target/riscv: Fix page_check_range use in fault-only-first
78
target/riscv: Fix zfa fleq.d and fltq.d
79
linux-user/riscv: Use abi type for target_ucontext
80
81
Lawrence Hunter (2):
82
target/riscv: Add Zvbc ISA extension support
83
target/riscv: Add Zvksh ISA extension support
84
85
Leon Schuermann (1):
86
target/riscv/pmp.c: respect mseccfg.RLB for pmpaddrX changes
87
88
Max Chou (3):
89
crypto: Create sm4_subword
90
crypto: Add SM4 constant parameter CK
91
target/riscv: Add Zvksed ISA extension support
92
93
Nazar Kazakov (4):
94
target/riscv: Remove redundant "cpu_vl == 0" checks
95
target/riscv: Move vector translation checks
96
target/riscv: Add Zvkned ISA extension support
97
target/riscv: Add Zvkg ISA extension support
98
99
Nikita Shubin (1):
100
target/riscv: don't read CSR in riscv_csrrw_do64
101
102
Rob Bradford (1):
103
target/riscv: Implement WARL behaviour for mcountinhibit/mcounteren
104
105
Robbin Ehn (1):
106
linux-user/riscv: Add new extensions to hwprobe
107
108
Thomas Huth (2):
109
hw/char/riscv_htif: Fix printing of console characters on big endian hosts
110
hw/char/riscv_htif: Fix the console syscall on big endian hosts
111
112
Tommy Wu (1):
113
target/riscv: Align the AIA model to v1.0 ratified spec
114
115
Vineet Gupta (1):
116
riscv: zicond: make non-experimental
117
118
Weiwei Li (1):
119
target/riscv: Update CSR bits name for svadu extension
120
121
Yong-Xuan Wang (5):
122
target/riscv: support the AIA device emulation with KVM enabled
123
target/riscv: check the in-kernel irqchip support
124
target/riscv: Create an KVM AIA irqchip
125
target/riscv: update APLIC and IMSIC to support KVM AIA
126
target/riscv: select KVM AIA in riscv virt machine
127
128
include/crypto/aes.h | 7 +
129
include/crypto/sm4.h | 9 +
130
target/riscv/cpu_bits.h | 8 +-
131
target/riscv/cpu_cfg.h | 9 +
132
target/riscv/debug.h | 3 +-
133
target/riscv/helper.h | 98 +++
134
target/riscv/kvm_riscv.h | 5 +
135
target/riscv/vector_internals.h | 228 +++++++
136
target/riscv/insn32.decode | 58 ++
137
crypto/aes.c | 4 +-
138
crypto/sm4.c | 10 +
139
hw/char/riscv_htif.c | 12 +-
140
hw/intc/riscv_aclint.c | 11 +-
141
hw/intc/riscv_aplic.c | 52 +-
142
hw/intc/riscv_imsic.c | 25 +-
143
hw/riscv/virt.c | 374 ++++++------
144
linux-user/riscv/signal.c | 4 +-
145
linux-user/syscall.c | 14 +-
146
target/arm/tcg/crypto_helper.c | 10 +-
147
target/riscv/cpu.c | 83 ++-
148
target/riscv/cpu_helper.c | 6 +-
149
target/riscv/crypto_helper.c | 51 +-
150
target/riscv/csr.c | 54 +-
151
target/riscv/debug.c | 15 +-
152
target/riscv/kvm.c | 201 ++++++-
153
target/riscv/pmp.c | 4 +
154
target/riscv/translate.c | 1 +
155
target/riscv/vcrypto_helper.c | 970 ++++++++++++++++++++++++++++++
156
target/riscv/vector_helper.c | 245 +-------
157
target/riscv/vector_internals.c | 81 +++
158
target/riscv/insn_trans/trans_rvv.c.inc | 171 +++---
159
target/riscv/insn_trans/trans_rvvk.c.inc | 606 +++++++++++++++++++
160
target/riscv/insn_trans/trans_rvzfa.c.inc | 4 +-
161
target/riscv/meson.build | 4 +-
162
34 files changed, 2785 insertions(+), 652 deletions(-)
163
create mode 100644 target/riscv/vector_internals.h
164
create mode 100644 target/riscv/vcrypto_helper.c
165
create mode 100644 target/riscv/vector_internals.c
166
create mode 100644 target/riscv/insn_trans/trans_rvvk.c.inc
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
The 'host' CPU is available in a CONFIG_KVM build and it's currently
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
available for all accels, but is a KVM only CPU. This means that in a
5
Message-id: 20200623215920.2594-60-zhiwei_liu@c-sky.com
5
RISC-V KVM capable host we can do things like this:
6
7
$ ./build/qemu-system-riscv64 -M virt,accel=tcg -cpu host --nographic
8
qemu-system-riscv64: H extension requires priv spec 1.12.0
9
10
This CPU does not have a priv spec because we don't filter its extensions
11
via priv spec. We shouldn't be reaching riscv_cpu_realize_tcg() at all
12
with the 'host' CPU.
13
14
We don't have a way to filter the 'host' CPU out of the available CPU
15
options (-cpu help) if the build includes both KVM and TCG. What we can
16
do is to error out during riscv_cpu_realize_tcg() if the user chooses
17
the 'host' CPU with accel=tcg:
18
19
$ ./build/qemu-system-riscv64 -M virt,accel=tcg -cpu host --nographic
20
qemu-system-riscv64: 'host' CPU is not compatible with TCG acceleration
21
22
Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
23
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
24
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
25
Message-Id: <20230721133411.474105-1-dbarboza@ventanamicro.com>
6
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
26
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
7
---
27
---
8
target/riscv/helper.h | 9 +++
28
target/riscv/cpu.c | 5 +++++
9
target/riscv/insn32.decode | 3 +
29
1 file changed, 5 insertions(+)
10
target/riscv/insn_trans/trans_rvv.inc.c | 78 +++++++++++++++++++++++++
11
target/riscv/vector_helper.c | 60 +++++++++++++++++++
12
4 files changed, 150 insertions(+)
13
30
14
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
31
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
15
index XXXXXXX..XXXXXXX 100644
32
index XXXXXXX..XXXXXXX 100644
16
--- a/target/riscv/helper.h
33
--- a/target/riscv/cpu.c
17
+++ b/target/riscv/helper.h
34
+++ b/target/riscv/cpu.c
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vslide1down_vx_b, void, ptr, ptr, tl, ptr, env, i32)
35
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_realize_tcg(DeviceState *dev, Error **errp)
19
DEF_HELPER_6(vslide1down_vx_h, void, ptr, ptr, tl, ptr, env, i32)
36
CPURISCVState *env = &cpu->env;
20
DEF_HELPER_6(vslide1down_vx_w, void, ptr, ptr, tl, ptr, env, i32)
37
Error *local_err = NULL;
21
DEF_HELPER_6(vslide1down_vx_d, void, ptr, ptr, tl, ptr, env, i32)
38
22
+
39
+ if (object_dynamic_cast(OBJECT(dev), TYPE_RISCV_CPU_HOST)) {
23
+DEF_HELPER_6(vrgather_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
40
+ error_setg(errp, "'host' CPU is not compatible with TCG acceleration");
24
+DEF_HELPER_6(vrgather_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
41
+ return;
25
+DEF_HELPER_6(vrgather_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vrgather_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vrgather_vx_b, void, ptr, ptr, tl, ptr, env, i32)
28
+DEF_HELPER_6(vrgather_vx_h, void, ptr, ptr, tl, ptr, env, i32)
29
+DEF_HELPER_6(vrgather_vx_w, void, ptr, ptr, tl, ptr, env, i32)
30
+DEF_HELPER_6(vrgather_vx_d, void, ptr, ptr, tl, ptr, env, i32)
31
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
32
index XXXXXXX..XXXXXXX 100644
33
--- a/target/riscv/insn32.decode
34
+++ b/target/riscv/insn32.decode
35
@@ -XXX,XX +XXX,XX @@ vslide1up_vx 001110 . ..... ..... 110 ..... 1010111 @r_vm
36
vslidedown_vx 001111 . ..... ..... 100 ..... 1010111 @r_vm
37
vslidedown_vi 001111 . ..... ..... 011 ..... 1010111 @r_vm
38
vslide1down_vx 001111 . ..... ..... 110 ..... 1010111 @r_vm
39
+vrgather_vv 001100 . ..... ..... 000 ..... 1010111 @r_vm
40
+vrgather_vx 001100 . ..... ..... 100 ..... 1010111 @r_vm
41
+vrgather_vi 001100 . ..... ..... 011 ..... 1010111 @r_vm
42
43
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
44
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
45
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
46
index XXXXXXX..XXXXXXX 100644
47
--- a/target/riscv/insn_trans/trans_rvv.inc.c
48
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
49
@@ -XXX,XX +XXX,XX @@ GEN_OPIVI_TRANS(vslideup_vi, 1, vslideup_vx, slideup_check)
50
GEN_OPIVX_TRANS(vslidedown_vx, opivx_check)
51
GEN_OPIVX_TRANS(vslide1down_vx, opivx_check)
52
GEN_OPIVI_TRANS(vslidedown_vi, 1, vslidedown_vx, opivx_check)
53
+
54
+/* Vector Register Gather Instruction */
55
+static bool vrgather_vv_check(DisasContext *s, arg_rmrr *a)
56
+{
57
+ return (vext_check_isa_ill(s) &&
58
+ vext_check_overlap_mask(s, a->rd, a->vm, true) &&
59
+ vext_check_reg(s, a->rd, false) &&
60
+ vext_check_reg(s, a->rs1, false) &&
61
+ vext_check_reg(s, a->rs2, false) &&
62
+ (a->rd != a->rs2) && (a->rd != a->rs1));
63
+}
64
+
65
+GEN_OPIVV_TRANS(vrgather_vv, vrgather_vv_check)
66
+
67
+static bool vrgather_vx_check(DisasContext *s, arg_rmrr *a)
68
+{
69
+ return (vext_check_isa_ill(s) &&
70
+ vext_check_overlap_mask(s, a->rd, a->vm, true) &&
71
+ vext_check_reg(s, a->rd, false) &&
72
+ vext_check_reg(s, a->rs2, false) &&
73
+ (a->rd != a->rs2));
74
+}
75
+
76
+/* vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
77
+static bool trans_vrgather_vx(DisasContext *s, arg_rmrr *a)
78
+{
79
+ if (!vrgather_vx_check(s, a)) {
80
+ return false;
81
+ }
42
+ }
82
+
43
+
83
+ if (a->vm && s->vl_eq_vlmax) {
44
riscv_cpu_validate_misa_mxl(cpu, &local_err);
84
+ int vlmax = s->vlen / s->mlen;
45
if (local_err != NULL) {
85
+ TCGv_i64 dest = tcg_temp_new_i64();
46
error_propagate(errp, local_err);
86
+
87
+ if (a->rs1 == 0) {
88
+ vec_element_loadi(s, dest, a->rs2, 0);
89
+ } else {
90
+ vec_element_loadx(s, dest, a->rs2, cpu_gpr[a->rs1], vlmax);
91
+ }
92
+
93
+ tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd),
94
+ MAXSZ(s), MAXSZ(s), dest);
95
+ tcg_temp_free_i64(dest);
96
+ } else {
97
+ static gen_helper_opivx * const fns[4] = {
98
+ gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h,
99
+ gen_helper_vrgather_vx_w, gen_helper_vrgather_vx_d
100
+ };
101
+ return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);
102
+ }
103
+ return true;
104
+}
105
+
106
+/* vrgather.vi vd, vs2, imm, vm # vd[i] = (imm >= VLMAX) ? 0 : vs2[imm] */
107
+static bool trans_vrgather_vi(DisasContext *s, arg_rmrr *a)
108
+{
109
+ if (!vrgather_vx_check(s, a)) {
110
+ return false;
111
+ }
112
+
113
+ if (a->vm && s->vl_eq_vlmax) {
114
+ if (a->rs1 >= s->vlen / s->mlen) {
115
+ tcg_gen_gvec_dup_imm(SEW64, vreg_ofs(s, a->rd),
116
+ MAXSZ(s), MAXSZ(s), 0);
117
+ } else {
118
+ tcg_gen_gvec_dup_mem(s->sew, vreg_ofs(s, a->rd),
119
+ endian_ofs(s, a->rs2, a->rs1),
120
+ MAXSZ(s), MAXSZ(s));
121
+ }
122
+ } else {
123
+ static gen_helper_opivx * const fns[4] = {
124
+ gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h,
125
+ gen_helper_vrgather_vx_w, gen_helper_vrgather_vx_d
126
+ };
127
+ return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s, 1);
128
+ }
129
+ return true;
130
+}
131
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
132
index XXXXXXX..XXXXXXX 100644
133
--- a/target/riscv/vector_helper.c
134
+++ b/target/riscv/vector_helper.c
135
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t, H1, clearb)
136
GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2, clearh)
137
GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4, clearl)
138
GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8, clearq)
139
+
140
+/* Vector Register Gather Instruction */
141
+#define GEN_VEXT_VRGATHER_VV(NAME, ETYPE, H, CLEAR_FN) \
142
+void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
143
+ CPURISCVState *env, uint32_t desc) \
144
+{ \
145
+ uint32_t mlen = vext_mlen(desc); \
146
+ uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \
147
+ uint32_t vm = vext_vm(desc); \
148
+ uint32_t vl = env->vl; \
149
+ uint32_t index, i; \
150
+ \
151
+ for (i = 0; i < vl; i++) { \
152
+ if (!vm && !vext_elem_mask(v0, mlen, i)) { \
153
+ continue; \
154
+ } \
155
+ index = *((ETYPE *)vs1 + H(i)); \
156
+ if (index >= vlmax) { \
157
+ *((ETYPE *)vd + H(i)) = 0; \
158
+ } else { \
159
+ *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
160
+ } \
161
+ } \
162
+ CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \
163
+}
164
+
165
+/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
166
+GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, H1, clearb)
167
+GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, H2, clearh)
168
+GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, H4, clearl)
169
+GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, H8, clearq)
170
+
171
+#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H, CLEAR_FN) \
172
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
173
+ CPURISCVState *env, uint32_t desc) \
174
+{ \
175
+ uint32_t mlen = vext_mlen(desc); \
176
+ uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \
177
+ uint32_t vm = vext_vm(desc); \
178
+ uint32_t vl = env->vl; \
179
+ uint32_t index = s1, i; \
180
+ \
181
+ for (i = 0; i < vl; i++) { \
182
+ if (!vm && !vext_elem_mask(v0, mlen, i)) { \
183
+ continue; \
184
+ } \
185
+ if (index >= vlmax) { \
186
+ *((ETYPE *)vd + H(i)) = 0; \
187
+ } else { \
188
+ *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
189
+ } \
190
+ } \
191
+ CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \
192
+}
193
+
194
+/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
195
+GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1, clearb)
196
+GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2, clearh)
197
+GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4, clearl)
198
+GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8, clearq)
199
--
47
--
200
2.27.0
48
2.41.0
201
49
202
50
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Thomas Huth <thuth@redhat.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
The character that should be printed is stored in the 64 bit "payload"
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
variable. The code currently tries to print it by taking the address
5
Message-id: 20200623215920.2594-58-zhiwei_liu@c-sky.com
5
of the variable and passing this pointer to qemu_chr_fe_write(). However,
6
this only works on little endian hosts where the least significant bits
7
are stored on the lowest address. To do this in a portable way, we have
8
to store the value in an uint8_t variable instead.
9
10
Fixes: 5033606780 ("RISC-V HTIF Console")
11
Signed-off-by: Thomas Huth <thuth@redhat.com>
12
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
13
Reviewed-by: Bin Meng <bmeng@tinylab.org>
14
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
15
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
16
Message-Id: <20230721094720.902454-2-thuth@redhat.com>
6
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
17
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
7
---
18
---
8
target/riscv/insn32.decode | 3 ++
19
hw/char/riscv_htif.c | 3 ++-
9
target/riscv/insn_trans/trans_rvv.inc.c | 49 +++++++++++++++++++++++++
20
1 file changed, 2 insertions(+), 1 deletion(-)
10
2 files changed, 52 insertions(+)
11
21
12
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
22
diff --git a/hw/char/riscv_htif.c b/hw/char/riscv_htif.c
13
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
14
--- a/target/riscv/insn32.decode
24
--- a/hw/char/riscv_htif.c
15
+++ b/target/riscv/insn32.decode
25
+++ b/hw/char/riscv_htif.c
16
@@ -XXX,XX +XXX,XX @@
26
@@ -XXX,XX +XXX,XX @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written)
17
@r2_vm ...... vm:1 ..... ..... ... ..... ....... &rmr %rs2 %rd
27
s->tohost = 0; /* clear to indicate we read */
18
@r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd
28
return;
19
@r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd
29
} else if (cmd == HTIF_CONSOLE_CMD_PUTC) {
20
+@r2rd ....... ..... ..... ... ..... ....... %rs2 %rd
30
- qemu_chr_fe_write(&s->chr, (uint8_t *)&payload, 1);
21
@r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd
31
+ uint8_t ch = (uint8_t)payload;
22
@r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd
32
+ qemu_chr_fe_write(&s->chr, &ch, 1);
23
@r_vm_0 ...... . ..... ..... ... ..... ....... &rmrr vm=0 %rs2 %rs1 %rd
33
resp = 0x100 | (uint8_t)payload;
24
@@ -XXX,XX +XXX,XX @@ viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm
34
} else {
25
vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm
35
qemu_log("HTIF device %d: unknown command\n", device);
26
vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r
27
vmv_s_x 001101 1 00000 ..... 110 ..... 1010111 @r2
28
+vfmv_f_s 001100 1 ..... 00000 001 ..... 1010111 @r2rd
29
+vfmv_s_f 001101 1 00000 ..... 101 ..... 1010111 @r2
30
31
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
32
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
33
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/target/riscv/insn_trans/trans_rvv.inc.c
36
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
37
@@ -XXX,XX +XXX,XX @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a)
38
}
39
return false;
40
}
41
+
42
+/* Floating-Point Scalar Move Instructions */
43
+static bool trans_vfmv_f_s(DisasContext *s, arg_vfmv_f_s *a)
44
+{
45
+ if (!s->vill && has_ext(s, RVF) &&
46
+ (s->mstatus_fs != 0) && (s->sew != 0)) {
47
+ unsigned int len = 8 << s->sew;
48
+
49
+ vec_element_loadi(s, cpu_fpr[a->rd], a->rs2, 0);
50
+ if (len < 64) {
51
+ tcg_gen_ori_i64(cpu_fpr[a->rd], cpu_fpr[a->rd],
52
+ MAKE_64BIT_MASK(len, 64 - len));
53
+ }
54
+
55
+ mark_fs_dirty(s);
56
+ return true;
57
+ }
58
+ return false;
59
+}
60
+
61
+/* vfmv.s.f vd, rs1 # vd[0] = rs1 (vs2=0) */
62
+static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a)
63
+{
64
+ if (!s->vill && has_ext(s, RVF) && (s->sew != 0)) {
65
+ TCGv_i64 t1;
66
+ /* The instructions ignore LMUL and vector register group. */
67
+ uint32_t vlmax = s->vlen >> 3;
68
+
69
+ /* if vl == 0, skip vector register write back */
70
+ TCGLabel *over = gen_new_label();
71
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
72
+
73
+ /* zeroed all elements */
74
+ tcg_gen_gvec_dup_imm(SEW64, vreg_ofs(s, a->rd), vlmax, vlmax, 0);
75
+
76
+ /* NaN-box f[rs1] as necessary for SEW */
77
+ t1 = tcg_temp_new_i64();
78
+ if (s->sew == MO_64 && !has_ext(s, RVD)) {
79
+ tcg_gen_ori_i64(t1, cpu_fpr[a->rs1], MAKE_64BIT_MASK(32, 32));
80
+ } else {
81
+ tcg_gen_mov_i64(t1, cpu_fpr[a->rs1]);
82
+ }
83
+ vec_element_storei(s, a->rd, 0, t1);
84
+ tcg_temp_free_i64(t1);
85
+ gen_set_label(over);
86
+ return true;
87
+ }
88
+ return false;
89
+}
90
--
36
--
91
2.27.0
37
2.41.0
92
38
93
39
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Thomas Huth <thuth@redhat.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
Values that have been read via cpu_physical_memory_read() from the
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
guest's memory have to be swapped in case the host endianess differs
5
Message-id: 20200623215920.2594-56-zhiwei_liu@c-sky.com
5
from the guest.
6
7
Fixes: a6e13e31d5 ("riscv_htif: Support console output via proxy syscall")
8
Signed-off-by: Thomas Huth <thuth@redhat.com>
9
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
10
Reviewed-by: Bin Meng <bmeng@tinylab.org>
11
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
12
Message-Id: <20230721094720.902454-3-thuth@redhat.com>
6
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
13
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
7
---
14
---
8
target/riscv/insn32.decode | 1 +
15
hw/char/riscv_htif.c | 9 +++++----
9
target/riscv/insn_trans/trans_rvv.inc.c | 116 ++++++++++++++++++++++++
16
1 file changed, 5 insertions(+), 4 deletions(-)
10
2 files changed, 117 insertions(+)
11
17
12
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
18
diff --git a/hw/char/riscv_htif.c b/hw/char/riscv_htif.c
13
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
14
--- a/target/riscv/insn32.decode
20
--- a/hw/char/riscv_htif.c
15
+++ b/target/riscv/insn32.decode
21
+++ b/hw/char/riscv_htif.c
16
@@ -XXX,XX +XXX,XX @@ vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm
22
@@ -XXX,XX +XXX,XX @@
17
vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm
23
#include "qemu/timer.h"
18
viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm
24
#include "qemu/error-report.h"
19
vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm
25
#include "exec/address-spaces.h"
20
+vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r
26
+#include "exec/tswap.h"
21
27
#include "sysemu/dma.h"
22
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
28
23
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
29
#define RISCV_DEBUG_HTIF 0
24
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
30
@@ -XXX,XX +XXX,XX @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written)
25
index XXXXXXX..XXXXXXX 100644
31
} else {
26
--- a/target/riscv/insn_trans/trans_rvv.inc.c
32
uint64_t syscall[8];
27
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
33
cpu_physical_memory_read(payload, syscall, sizeof(syscall));
28
@@ -XXX,XX +XXX,XX @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a)
34
- if (syscall[0] == PK_SYS_WRITE &&
29
}
35
- syscall[1] == HTIF_DEV_CONSOLE &&
30
return false;
36
- syscall[3] == HTIF_CONSOLE_CMD_PUTC) {
31
}
37
+ if (tswap64(syscall[0]) == PK_SYS_WRITE &&
32
+
38
+ tswap64(syscall[1]) == HTIF_DEV_CONSOLE &&
33
+/*
39
+ tswap64(syscall[3]) == HTIF_CONSOLE_CMD_PUTC) {
34
+ *** Vector Permutation Instructions
40
uint8_t ch;
35
+ */
41
- cpu_physical_memory_read(syscall[2], &ch, 1);
36
+
42
+ cpu_physical_memory_read(tswap64(syscall[2]), &ch, 1);
37
+/* Integer Extract Instruction */
43
qemu_chr_fe_write(&s->chr, &ch, 1);
38
+
44
resp = 0x100 | (uint8_t)payload;
39
+static void load_element(TCGv_i64 dest, TCGv_ptr base,
45
} else {
40
+ int ofs, int sew)
41
+{
42
+ switch (sew) {
43
+ case MO_8:
44
+ tcg_gen_ld8u_i64(dest, base, ofs);
45
+ break;
46
+ case MO_16:
47
+ tcg_gen_ld16u_i64(dest, base, ofs);
48
+ break;
49
+ case MO_32:
50
+ tcg_gen_ld32u_i64(dest, base, ofs);
51
+ break;
52
+ case MO_64:
53
+ tcg_gen_ld_i64(dest, base, ofs);
54
+ break;
55
+ default:
56
+ g_assert_not_reached();
57
+ break;
58
+ }
59
+}
60
+
61
+/* offset of the idx element with base regsiter r */
62
+static uint32_t endian_ofs(DisasContext *s, int r, int idx)
63
+{
64
+#ifdef HOST_WORDS_BIGENDIAN
65
+ return vreg_ofs(s, r) + ((idx ^ (7 >> s->sew)) << s->sew);
66
+#else
67
+ return vreg_ofs(s, r) + (idx << s->sew);
68
+#endif
69
+}
70
+
71
+/* adjust the index according to the endian */
72
+static void endian_adjust(TCGv_i32 ofs, int sew)
73
+{
74
+#ifdef HOST_WORDS_BIGENDIAN
75
+ tcg_gen_xori_i32(ofs, ofs, 7 >> sew);
76
+#endif
77
+}
78
+
79
+/* Load idx >= VLMAX ? 0 : vreg[idx] */
80
+static void vec_element_loadx(DisasContext *s, TCGv_i64 dest,
81
+ int vreg, TCGv idx, int vlmax)
82
+{
83
+ TCGv_i32 ofs = tcg_temp_new_i32();
84
+ TCGv_ptr base = tcg_temp_new_ptr();
85
+ TCGv_i64 t_idx = tcg_temp_new_i64();
86
+ TCGv_i64 t_vlmax, t_zero;
87
+
88
+ /*
89
+ * Mask the index to the length so that we do
90
+ * not produce an out-of-range load.
91
+ */
92
+ tcg_gen_trunc_tl_i32(ofs, idx);
93
+ tcg_gen_andi_i32(ofs, ofs, vlmax - 1);
94
+
95
+ /* Convert the index to an offset. */
96
+ endian_adjust(ofs, s->sew);
97
+ tcg_gen_shli_i32(ofs, ofs, s->sew);
98
+
99
+ /* Convert the index to a pointer. */
100
+ tcg_gen_ext_i32_ptr(base, ofs);
101
+ tcg_gen_add_ptr(base, base, cpu_env);
102
+
103
+ /* Perform the load. */
104
+ load_element(dest, base,
105
+ vreg_ofs(s, vreg), s->sew);
106
+ tcg_temp_free_ptr(base);
107
+ tcg_temp_free_i32(ofs);
108
+
109
+ /* Flush out-of-range indexing to zero. */
110
+ t_vlmax = tcg_const_i64(vlmax);
111
+ t_zero = tcg_const_i64(0);
112
+ tcg_gen_extu_tl_i64(t_idx, idx);
113
+
114
+ tcg_gen_movcond_i64(TCG_COND_LTU, dest, t_idx,
115
+ t_vlmax, dest, t_zero);
116
+
117
+ tcg_temp_free_i64(t_vlmax);
118
+ tcg_temp_free_i64(t_zero);
119
+ tcg_temp_free_i64(t_idx);
120
+}
121
+
122
+static void vec_element_loadi(DisasContext *s, TCGv_i64 dest,
123
+ int vreg, int idx)
124
+{
125
+ load_element(dest, cpu_env, endian_ofs(s, vreg, idx), s->sew);
126
+}
127
+
128
+static bool trans_vext_x_v(DisasContext *s, arg_r *a)
129
+{
130
+ TCGv_i64 tmp = tcg_temp_new_i64();
131
+ TCGv dest = tcg_temp_new();
132
+
133
+ if (a->rs1 == 0) {
134
+ /* Special case vmv.x.s rd, vs2. */
135
+ vec_element_loadi(s, tmp, a->rs2, 0);
136
+ } else {
137
+ /* This instruction ignores LMUL and vector register groups */
138
+ int vlmax = s->vlen >> (3 + s->sew);
139
+ vec_element_loadx(s, tmp, a->rs2, cpu_gpr[a->rs1], vlmax);
140
+ }
141
+ tcg_gen_trunc_i64_tl(dest, tmp);
142
+ gen_set_gpr(a->rd, dest);
143
+
144
+ tcg_temp_free(dest);
145
+ tcg_temp_free_i64(tmp);
146
+ return true;
147
+}
148
--
46
--
149
2.27.0
47
2.41.0
150
151
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
zmmul was promoted from experimental to ratified in commit 6d00ffad4e95.
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
Add a riscv,isa string for it.
5
Message-id: 20200623215920.2594-55-zhiwei_liu@c-sky.com
5
6
Fixes: 6d00ffad4e95 ("target/riscv: move zmmul out of the experimental properties")
7
Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
8
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
9
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
10
Message-Id: <20230720132424.371132-2-dbarboza@ventanamicro.com>
6
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
11
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
7
---
12
---
8
target/riscv/helper.h | 5 +++++
13
target/riscv/cpu.c | 1 +
9
target/riscv/insn32.decode | 2 ++
14
1 file changed, 1 insertion(+)
10
target/riscv/insn_trans/trans_rvv.inc.c | 25 +++++++++++++++++++++++++
11
target/riscv/vector_helper.c | 24 ++++++++++++++++++++++++
12
4 files changed, 56 insertions(+)
13
15
14
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
15
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
16
--- a/target/riscv/helper.h
18
--- a/target/riscv/cpu.c
17
+++ b/target/riscv/helper.h
19
+++ b/target/riscv/cpu.c
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(viota_m_b, void, ptr, ptr, ptr, env, i32)
20
@@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = {
19
DEF_HELPER_5(viota_m_h, void, ptr, ptr, ptr, env, i32)
21
ISA_EXT_DATA_ENTRY(zicsr, PRIV_VERSION_1_10_0, ext_icsr),
20
DEF_HELPER_5(viota_m_w, void, ptr, ptr, ptr, env, i32)
22
ISA_EXT_DATA_ENTRY(zifencei, PRIV_VERSION_1_10_0, ext_ifencei),
21
DEF_HELPER_5(viota_m_d, void, ptr, ptr, ptr, env, i32)
23
ISA_EXT_DATA_ENTRY(zihintpause, PRIV_VERSION_1_10_0, ext_zihintpause),
22
+
24
+ ISA_EXT_DATA_ENTRY(zmmul, PRIV_VERSION_1_12_0, ext_zmmul),
23
+DEF_HELPER_4(vid_v_b, void, ptr, ptr, env, i32)
25
ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs),
24
+DEF_HELPER_4(vid_v_h, void, ptr, ptr, env, i32)
26
ISA_EXT_DATA_ENTRY(zfa, PRIV_VERSION_1_12_0, ext_zfa),
25
+DEF_HELPER_4(vid_v_w, void, ptr, ptr, env, i32)
27
ISA_EXT_DATA_ENTRY(zfbfmin, PRIV_VERSION_1_12_0, ext_zfbfmin),
26
+DEF_HELPER_4(vid_v_d, void, ptr, ptr, env, i32)
27
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/riscv/insn32.decode
30
+++ b/target/riscv/insn32.decode
31
@@ -XXX,XX +XXX,XX @@
32
@r2 ....... ..... ..... ... ..... ....... %rs1 %rd
33
@r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd
34
@r2_vm ...... vm:1 ..... ..... ... ..... ....... &rmr %rs2 %rd
35
+@r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd
36
@r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd
37
@r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd
38
@r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd
39
@@ -XXX,XX +XXX,XX @@ vmsbf_m 010110 . ..... 00001 010 ..... 1010111 @r2_vm
40
vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm
41
vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm
42
viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm
43
+vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm
44
45
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
46
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
47
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/riscv/insn_trans/trans_rvv.inc.c
50
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
51
@@ -XXX,XX +XXX,XX @@ static bool trans_viota_m(DisasContext *s, arg_viota_m *a)
52
}
53
return false;
54
}
55
+
56
+/* Vector Element Index Instruction */
57
+static bool trans_vid_v(DisasContext *s, arg_vid_v *a)
58
+{
59
+ if (vext_check_isa_ill(s) &&
60
+ vext_check_reg(s, a->rd, false) &&
61
+ vext_check_overlap_mask(s, a->rd, a->vm, false)) {
62
+ uint32_t data = 0;
63
+ TCGLabel *over = gen_new_label();
64
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
65
+
66
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
67
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
68
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
69
+ static gen_helper_gvec_2_ptr * const fns[4] = {
70
+ gen_helper_vid_v_b, gen_helper_vid_v_h,
71
+ gen_helper_vid_v_w, gen_helper_vid_v_d,
72
+ };
73
+ tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
74
+ cpu_env, 0, s->vlen / 8, data, fns[s->sew]);
75
+ gen_set_label(over);
76
+ return true;
77
+ }
78
+ return false;
79
+}
80
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
81
index XXXXXXX..XXXXXXX 100644
82
--- a/target/riscv/vector_helper.c
83
+++ b/target/riscv/vector_helper.c
84
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1, clearb)
85
GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2, clearh)
86
GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4, clearl)
87
GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8, clearq)
88
+
89
+/* Vector Element Index Instruction */
90
+#define GEN_VEXT_VID_V(NAME, ETYPE, H, CLEAR_FN) \
91
+void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
92
+{ \
93
+ uint32_t mlen = vext_mlen(desc); \
94
+ uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \
95
+ uint32_t vm = vext_vm(desc); \
96
+ uint32_t vl = env->vl; \
97
+ int i; \
98
+ \
99
+ for (i = 0; i < vl; i++) { \
100
+ if (!vm && !vext_elem_mask(v0, mlen, i)) { \
101
+ continue; \
102
+ } \
103
+ *((ETYPE *)vd + H(i)) = i; \
104
+ } \
105
+ CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \
106
+}
107
+
108
+GEN_VEXT_VID_V(vid_v_b, uint8_t, H1, clearb)
109
+GEN_VEXT_VID_V(vid_v_h, uint16_t, H2, clearh)
110
+GEN_VEXT_VID_V(vid_v_w, uint32_t, H4, clearl)
111
+GEN_VEXT_VID_V(vid_v_d, uint64_t, H8, clearq)
112
--
28
--
113
2.27.0
29
2.41.0
114
115
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
The cpu->cfg.epmp extension is still experimental, but it already has a
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
'smepmp' riscv,isa string. Add it.
5
Message-id: 20200623215920.2594-54-zhiwei_liu@c-sky.com
5
6
Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
7
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
8
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
9
Message-Id: <20230720132424.371132-3-dbarboza@ventanamicro.com>
6
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
10
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
7
---
11
---
8
target/riscv/helper.h | 5 +++++
12
target/riscv/cpu.c | 1 +
9
target/riscv/insn32.decode | 1 +
13
1 file changed, 1 insertion(+)
10
target/riscv/insn_trans/trans_rvv.inc.c | 27 +++++++++++++++++++++++
11
target/riscv/vector_helper.c | 29 +++++++++++++++++++++++++
12
4 files changed, 62 insertions(+)
13
14
14
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
15
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
15
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
16
--- a/target/riscv/helper.h
17
--- a/target/riscv/cpu.c
17
+++ b/target/riscv/helper.h
18
+++ b/target/riscv/cpu.c
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vmfirst_m, tl, ptr, ptr, env, i32)
19
@@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = {
19
DEF_HELPER_5(vmsbf_m, void, ptr, ptr, ptr, env, i32)
20
ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx),
20
DEF_HELPER_5(vmsif_m, void, ptr, ptr, ptr, env, i32)
21
ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin),
21
DEF_HELPER_5(vmsof_m, void, ptr, ptr, ptr, env, i32)
22
ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia),
22
+
23
+ ISA_EXT_DATA_ENTRY(smepmp, PRIV_VERSION_1_12_0, epmp),
23
+DEF_HELPER_5(viota_m_b, void, ptr, ptr, ptr, env, i32)
24
ISA_EXT_DATA_ENTRY(smstateen, PRIV_VERSION_1_12_0, ext_smstateen),
24
+DEF_HELPER_5(viota_m_h, void, ptr, ptr, ptr, env, i32)
25
ISA_EXT_DATA_ENTRY(ssaia, PRIV_VERSION_1_12_0, ext_ssaia),
25
+DEF_HELPER_5(viota_m_w, void, ptr, ptr, ptr, env, i32)
26
ISA_EXT_DATA_ENTRY(sscofpmf, PRIV_VERSION_1_12_0, ext_sscofpmf),
26
+DEF_HELPER_5(viota_m_d, void, ptr, ptr, ptr, env, i32)
27
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/riscv/insn32.decode
30
+++ b/target/riscv/insn32.decode
31
@@ -XXX,XX +XXX,XX @@ vmfirst_m 010101 . ..... ----- 010 ..... 1010111 @r2_vm
32
vmsbf_m 010110 . ..... 00001 010 ..... 1010111 @r2_vm
33
vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm
34
vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm
35
+viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm
36
37
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
38
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
39
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/target/riscv/insn_trans/trans_rvv.inc.c
42
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
43
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
44
GEN_M_TRANS(vmsbf_m)
45
GEN_M_TRANS(vmsif_m)
46
GEN_M_TRANS(vmsof_m)
47
+
48
+/* Vector Iota Instruction */
49
+static bool trans_viota_m(DisasContext *s, arg_viota_m *a)
50
+{
51
+ if (vext_check_isa_ill(s) &&
52
+ vext_check_reg(s, a->rd, false) &&
53
+ vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2, 1) &&
54
+ (a->vm != 0 || a->rd != 0)) {
55
+ uint32_t data = 0;
56
+ TCGLabel *over = gen_new_label();
57
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
58
+
59
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
60
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
61
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
62
+ static gen_helper_gvec_3_ptr * const fns[4] = {
63
+ gen_helper_viota_m_b, gen_helper_viota_m_h,
64
+ gen_helper_viota_m_w, gen_helper_viota_m_d,
65
+ };
66
+ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
67
+ vreg_ofs(s, a->rs2), cpu_env, 0,
68
+ s->vlen / 8, data, fns[s->sew]);
69
+ gen_set_label(over);
70
+ return true;
71
+ }
72
+ return false;
73
+}
74
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
75
index XXXXXXX..XXXXXXX 100644
76
--- a/target/riscv/vector_helper.c
77
+++ b/target/riscv/vector_helper.c
78
@@ -XXX,XX +XXX,XX @@ void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
79
{
80
vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
81
}
82
+
83
+/* Vector Iota Instruction */
84
+#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H, CLEAR_FN) \
85
+void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
86
+ uint32_t desc) \
87
+{ \
88
+ uint32_t mlen = vext_mlen(desc); \
89
+ uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \
90
+ uint32_t vm = vext_vm(desc); \
91
+ uint32_t vl = env->vl; \
92
+ uint32_t sum = 0; \
93
+ int i; \
94
+ \
95
+ for (i = 0; i < vl; i++) { \
96
+ if (!vm && !vext_elem_mask(v0, mlen, i)) { \
97
+ continue; \
98
+ } \
99
+ *((ETYPE *)vd + H(i)) = sum; \
100
+ if (vext_elem_mask(vs2, mlen, i)) { \
101
+ sum++; \
102
+ } \
103
+ } \
104
+ CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \
105
+}
106
+
107
+GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1, clearb)
108
+GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2, clearh)
109
+GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4, clearl)
110
+GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8, clearq)
111
--
27
--
112
2.27.0
28
2.41.0
113
114
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
Commit bef6f008b98(accel/tcg: Return bool from page_check_range) converts
4
integer return value to bool type. However, it wrongly converted the use
5
of the API in riscv fault-only-first, where page_check_range < = 0, should
6
be converted to !page_check_range.
7
8
Signed-off-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20200623215920.2594-52-zhiwei_liu@c-sky.com
10
Message-ID: <20230729031618.821-1-zhiwei_liu@linux.alibaba.com>
6
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
11
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
7
---
12
---
8
target/riscv/helper.h | 2 ++
13
target/riscv/vector_helper.c | 2 +-
9
target/riscv/insn32.decode | 1 +
14
1 file changed, 1 insertion(+), 1 deletion(-)
10
target/riscv/insn_trans/trans_rvv.inc.c | 32 +++++++++++++++++++++++++
11
target/riscv/vector_helper.c | 19 +++++++++++++++
12
4 files changed, 54 insertions(+)
13
15
14
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/riscv/helper.h
17
+++ b/target/riscv/helper.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmornot_mm, void, ptr, ptr, ptr, ptr, env, i32)
19
DEF_HELPER_6(vmxnor_mm, void, ptr, ptr, ptr, ptr, env, i32)
20
21
DEF_HELPER_4(vmpopc_m, tl, ptr, ptr, env, i32)
22
+
23
+DEF_HELPER_4(vmfirst_m, tl, ptr, ptr, env, i32)
24
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/riscv/insn32.decode
27
+++ b/target/riscv/insn32.decode
28
@@ -XXX,XX +XXX,XX @@ vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r
29
vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r
30
vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r
31
vmpopc_m 010100 . ..... ----- 010 ..... 1010111 @r2_vm
32
+vmfirst_m 010101 . ..... ----- 010 ..... 1010111 @r2_vm
33
34
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
35
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
36
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/riscv/insn_trans/trans_rvv.inc.c
39
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
40
@@ -XXX,XX +XXX,XX @@ static bool trans_vmpopc_m(DisasContext *s, arg_rmr *a)
41
}
42
return false;
43
}
44
+
45
+/* vmfirst find-first-set mask bit */
46
+static bool trans_vmfirst_m(DisasContext *s, arg_rmr *a)
47
+{
48
+ if (vext_check_isa_ill(s)) {
49
+ TCGv_ptr src2, mask;
50
+ TCGv dst;
51
+ TCGv_i32 desc;
52
+ uint32_t data = 0;
53
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
54
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
55
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
56
+
57
+ mask = tcg_temp_new_ptr();
58
+ src2 = tcg_temp_new_ptr();
59
+ dst = tcg_temp_new();
60
+ desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
61
+
62
+ tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2));
63
+ tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
64
+
65
+ gen_helper_vmfirst_m(dst, mask, src2, cpu_env, desc);
66
+ gen_set_gpr(a->rd, dst);
67
+
68
+ tcg_temp_free_ptr(mask);
69
+ tcg_temp_free_ptr(src2);
70
+ tcg_temp_free(dst);
71
+ tcg_temp_free_i32(desc);
72
+ return true;
73
+ }
74
+ return false;
75
+}
76
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
16
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
77
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
78
--- a/target/riscv/vector_helper.c
18
--- a/target/riscv/vector_helper.c
79
+++ b/target/riscv/vector_helper.c
19
+++ b/target/riscv/vector_helper.c
80
@@ -XXX,XX +XXX,XX @@ target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env,
20
@@ -XXX,XX +XXX,XX @@ vext_ldff(void *vd, void *v0, target_ulong base,
81
}
21
cpu_mmu_index(env, false));
82
return cnt;
22
if (host) {
83
}
23
#ifdef CONFIG_USER_ONLY
84
+
24
- if (page_check_range(addr, offset, PAGE_READ)) {
85
+/* vmfirst find-first-set mask bit*/
25
+ if (!page_check_range(addr, offset, PAGE_READ)) {
86
+target_ulong HELPER(vmfirst_m)(void *v0, void *vs2, CPURISCVState *env,
26
vl = i;
87
+ uint32_t desc)
27
goto ProbeSuccess;
88
+{
28
}
89
+ uint32_t mlen = vext_mlen(desc);
90
+ uint32_t vm = vext_vm(desc);
91
+ uint32_t vl = env->vl;
92
+ int i;
93
+
94
+ for (i = 0; i < vl; i++) {
95
+ if (vm || vext_elem_mask(v0, mlen, i)) {
96
+ if (vext_elem_mask(vs2, mlen, i)) {
97
+ return i;
98
+ }
99
+ }
100
+ }
101
+ return -1LL;
102
+}
103
--
29
--
104
2.27.0
30
2.41.0
105
106
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Ard Biesheuvel <ardb@kernel.org>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
The AES MixColumns and InvMixColumns operations are relatively
4
expensive 4x4 matrix multiplications in GF(2^8), which is why C
5
implementations usually rely on precomputed lookup tables rather than
6
performing the calculations on demand.
7
8
Given that we already carry those tables in QEMU, we can just grab the
9
right value in the implementation of the RISC-V AES32 instructions. Note
10
that the tables in question are permuted according to the respective
11
Sbox, so we can omit the Sbox lookup as well in this case.
12
13
Cc: Richard Henderson <richard.henderson@linaro.org>
14
Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
15
Cc: Zewen Ye <lustrew@foxmail.com>
16
Cc: Weiwei Li <liweiwei@iscas.ac.cn>
17
Cc: Junqiang Wang <wangjunqiang@iscas.ac.cn>
18
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
19
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20200623215920.2594-57-zhiwei_liu@c-sky.com
20
Message-ID: <20230731084043.1791984-1-ardb@kernel.org>
6
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
21
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
7
---
22
---
8
target/riscv/internals.h | 6 +++
23
include/crypto/aes.h | 7 +++++++
9
target/riscv/insn32.decode | 1 +
24
crypto/aes.c | 4 ++--
10
target/riscv/insn_trans/trans_rvv.inc.c | 60 +++++++++++++++++++++++++
25
target/riscv/crypto_helper.c | 34 ++++------------------------------
11
3 files changed, 67 insertions(+)
26
3 files changed, 13 insertions(+), 32 deletions(-)
12
27
13
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
28
diff --git a/include/crypto/aes.h b/include/crypto/aes.h
14
index XXXXXXX..XXXXXXX 100644
29
index XXXXXXX..XXXXXXX 100644
15
--- a/target/riscv/internals.h
30
--- a/include/crypto/aes.h
16
+++ b/target/riscv/internals.h
31
+++ b/include/crypto/aes.h
17
@@ -XXX,XX +XXX,XX @@ FIELD(VDATA, WD, 11, 1)
32
@@ -XXX,XX +XXX,XX @@ void AES_decrypt(const unsigned char *in, unsigned char *out,
18
target_ulong fclass_h(uint64_t frs1);
33
extern const uint8_t AES_sbox[256];
19
target_ulong fclass_s(uint64_t frs1);
34
extern const uint8_t AES_isbox[256];
20
target_ulong fclass_d(uint64_t frs1);
35
36
+/*
37
+AES_Te0[x] = S [x].[02, 01, 01, 03];
38
+AES_Td0[x] = Si[x].[0e, 09, 0d, 0b];
39
+*/
21
+
40
+
22
+#define SEW8 0
41
+extern const uint32_t AES_Te0[256], AES_Td0[256];
23
+#define SEW16 1
24
+#define SEW32 2
25
+#define SEW64 3
26
+
42
+
27
#endif
43
#endif
28
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
44
diff --git a/crypto/aes.c b/crypto/aes.c
29
index XXXXXXX..XXXXXXX 100644
45
index XXXXXXX..XXXXXXX 100644
30
--- a/target/riscv/insn32.decode
46
--- a/crypto/aes.c
31
+++ b/target/riscv/insn32.decode
47
+++ b/crypto/aes.c
32
@@ -XXX,XX +XXX,XX @@ vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm
48
@@ -XXX,XX +XXX,XX @@ AES_Td3[x] = Si[x].[09, 0d, 0b, 0e];
33
viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm
49
AES_Td4[x] = Si[x].[01, 01, 01, 01];
34
vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm
50
*/
35
vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r
51
36
+vmv_s_x 001101 1 00000 ..... 110 ..... 1010111 @r2
52
-static const uint32_t AES_Te0[256] = {
37
53
+const uint32_t AES_Te0[256] = {
38
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
54
0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
39
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
55
0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
40
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
56
0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
57
@@ -XXX,XX +XXX,XX @@ static const uint32_t AES_Te4[256] = {
58
0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
59
};
60
61
-static const uint32_t AES_Td0[256] = {
62
+const uint32_t AES_Td0[256] = {
63
0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
64
0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
65
0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
66
diff --git a/target/riscv/crypto_helper.c b/target/riscv/crypto_helper.c
41
index XXXXXXX..XXXXXXX 100644
67
index XXXXXXX..XXXXXXX 100644
42
--- a/target/riscv/insn_trans/trans_rvv.inc.c
68
--- a/target/riscv/crypto_helper.c
43
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
69
+++ b/target/riscv/crypto_helper.c
44
@@ -XXX,XX +XXX,XX @@ static bool trans_vext_x_v(DisasContext *s, arg_r *a)
70
@@ -XXX,XX +XXX,XX @@
45
tcg_temp_free_i64(tmp);
71
#include "crypto/aes-round.h"
46
return true;
72
#include "crypto/sm4.h"
47
}
73
48
+
74
-#define AES_XTIME(a) \
49
+/* Integer Scalar Move Instruction */
75
- ((a << 1) ^ ((a & 0x80) ? 0x1b : 0))
50
+
76
-
51
+static void store_element(TCGv_i64 val, TCGv_ptr base,
77
-#define AES_GFMUL(a, b) (( \
52
+ int ofs, int sew)
78
- (((b) & 0x1) ? (a) : 0) ^ \
53
+{
79
- (((b) & 0x2) ? AES_XTIME(a) : 0) ^ \
54
+ switch (sew) {
80
- (((b) & 0x4) ? AES_XTIME(AES_XTIME(a)) : 0) ^ \
55
+ case MO_8:
81
- (((b) & 0x8) ? AES_XTIME(AES_XTIME(AES_XTIME(a))) : 0)) & 0xFF)
56
+ tcg_gen_st8_i64(val, base, ofs);
82
-
57
+ break;
83
-static inline uint32_t aes_mixcolumn_byte(uint8_t x, bool fwd)
58
+ case MO_16:
84
-{
59
+ tcg_gen_st16_i64(val, base, ofs);
85
- uint32_t u;
60
+ break;
86
-
61
+ case MO_32:
87
- if (fwd) {
62
+ tcg_gen_st32_i64(val, base, ofs);
88
- u = (AES_GFMUL(x, 3) << 24) | (x << 16) | (x << 8) |
63
+ break;
89
- (AES_GFMUL(x, 2) << 0);
64
+ case MO_64:
90
- } else {
65
+ tcg_gen_st_i64(val, base, ofs);
91
- u = (AES_GFMUL(x, 0xb) << 24) | (AES_GFMUL(x, 0xd) << 16) |
66
+ break;
92
- (AES_GFMUL(x, 0x9) << 8) | (AES_GFMUL(x, 0xe) << 0);
67
+ default:
93
- }
68
+ g_assert_not_reached();
94
- return u;
69
+ break;
95
-}
70
+ }
96
-
71
+}
97
#define sext32_xlen(x) (target_ulong)(int32_t)(x)
72
+
98
73
+/*
99
static inline target_ulong aes32_operation(target_ulong shamt,
74
+ * Store vreg[idx] = val.
100
@@ -XXX,XX +XXX,XX @@ static inline target_ulong aes32_operation(target_ulong shamt,
75
+ * The index must be in range of VLMAX.
101
bool enc, bool mix)
76
+ */
102
{
77
+static void vec_element_storei(DisasContext *s, int vreg,
103
uint8_t si = rs2 >> shamt;
78
+ int idx, TCGv_i64 val)
104
- uint8_t so;
79
+{
105
uint32_t mixed;
80
+ store_element(val, cpu_env, endian_ofs(s, vreg, idx), s->sew);
106
target_ulong res;
81
+}
107
82
+
108
if (enc) {
83
+/* vmv.s.x vd, rs1 # vd[0] = rs1 */
109
- so = AES_sbox[si];
84
+static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a)
110
if (mix) {
85
+{
111
- mixed = aes_mixcolumn_byte(so, true);
86
+ if (vext_check_isa_ill(s)) {
112
+ mixed = be32_to_cpu(AES_Te0[si]);
87
+ /* This instruction ignores LMUL and vector register groups */
113
} else {
88
+ int maxsz = s->vlen >> 3;
114
- mixed = so;
89
+ TCGv_i64 t1;
115
+ mixed = AES_sbox[si];
90
+ TCGLabel *over = gen_new_label();
116
}
91
+
117
} else {
92
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
118
- so = AES_isbox[si];
93
+ tcg_gen_gvec_dup_imm(SEW64, vreg_ofs(s, a->rd), maxsz, maxsz, 0);
119
if (mix) {
94
+ if (a->rs1 == 0) {
120
- mixed = aes_mixcolumn_byte(so, false);
95
+ goto done;
121
+ mixed = be32_to_cpu(AES_Td0[si]);
96
+ }
122
} else {
97
+
123
- mixed = so;
98
+ t1 = tcg_temp_new_i64();
124
+ mixed = AES_isbox[si];
99
+ tcg_gen_extu_tl_i64(t1, cpu_gpr[a->rs1]);
125
}
100
+ vec_element_storei(s, a->rd, 0, t1);
126
}
101
+ tcg_temp_free_i64(t1);
127
mixed = rol32(mixed, shamt);
102
+ done:
103
+ gen_set_label(over);
104
+ return true;
105
+ }
106
+ return false;
107
+}
108
--
128
--
109
2.27.0
129
2.41.0
110
130
111
131
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
Take some functions/macros out of `vector_helper` and put them in a new
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
4
module called `vector_internals`. This ensures they can be used by both
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
vector and vector-crypto helpers (latter implemented in proceeding
6
Message-id: 20200623215920.2594-11-zhiwei_liu@c-sky.com
6
commits).
7
8
Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
9
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
10
Signed-off-by: Max Chou <max.chou@sifive.com>
11
Acked-by: Alistair Francis <alistair.francis@wdc.com>
12
Message-ID: <20230711165917.2629866-2-max.chou@sifive.com>
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
13
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
14
---
9
target/riscv/helper.h | 25 ++
15
target/riscv/vector_internals.h | 182 +++++++++++++++++++++++++++++
10
target/riscv/insn32.decode | 10 +
16
target/riscv/vector_helper.c | 201 +-------------------------------
11
target/riscv/insn_trans/trans_rvv.inc.c | 291 ++++++++++++++++++++++++
17
target/riscv/vector_internals.c | 81 +++++++++++++
12
target/riscv/vector_helper.c | 183 +++++++++++++++
18
target/riscv/meson.build | 1 +
13
4 files changed, 509 insertions(+)
19
4 files changed, 265 insertions(+), 200 deletions(-)
20
create mode 100644 target/riscv/vector_internals.h
21
create mode 100644 target/riscv/vector_internals.c
14
22
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
23
diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h
16
index XXXXXXX..XXXXXXX 100644
24
new file mode 100644
17
--- a/target/riscv/helper.h
25
index XXXXXXX..XXXXXXX
18
+++ b/target/riscv/helper.h
26
--- /dev/null
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vamominw_v_w, void, ptr, ptr, tl, ptr, env, i32)
27
+++ b/target/riscv/vector_internals.h
20
DEF_HELPER_6(vamomaxw_v_w, void, ptr, ptr, tl, ptr, env, i32)
21
DEF_HELPER_6(vamominuw_v_w, void, ptr, ptr, tl, ptr, env, i32)
22
DEF_HELPER_6(vamomaxuw_v_w, void, ptr, ptr, tl, ptr, env, i32)
23
+
24
+DEF_HELPER_6(vadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
25
+DEF_HELPER_6(vadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_6(vsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_6(vadd_vx_b, void, ptr, ptr, tl, ptr, env, i32)
33
+DEF_HELPER_6(vadd_vx_h, void, ptr, ptr, tl, ptr, env, i32)
34
+DEF_HELPER_6(vadd_vx_w, void, ptr, ptr, tl, ptr, env, i32)
35
+DEF_HELPER_6(vadd_vx_d, void, ptr, ptr, tl, ptr, env, i32)
36
+DEF_HELPER_6(vsub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
37
+DEF_HELPER_6(vsub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
38
+DEF_HELPER_6(vsub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
39
+DEF_HELPER_6(vsub_vx_d, void, ptr, ptr, tl, ptr, env, i32)
40
+DEF_HELPER_6(vrsub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
41
+DEF_HELPER_6(vrsub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
42
+DEF_HELPER_6(vrsub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
43
+DEF_HELPER_6(vrsub_vx_d, void, ptr, ptr, tl, ptr, env, i32)
44
+DEF_HELPER_FLAGS_4(vec_rsubs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
45
+DEF_HELPER_FLAGS_4(vec_rsubs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
46
+DEF_HELPER_FLAGS_4(vec_rsubs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
47
+DEF_HELPER_FLAGS_4(vec_rsubs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
48
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/riscv/insn32.decode
51
+++ b/target/riscv/insn32.decode
52
@@ -XXX,XX +XXX,XX @@
28
@@ -XXX,XX +XXX,XX @@
53
&u imm rd
29
+/*
54
&shift shamt rs1 rd
30
+ * RISC-V Vector Extension Internals
55
&atomic aq rl rs2 rs1 rd
31
+ *
56
+&rmrr vm rd rs1 rs2
32
+ * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
57
&rwdvm vm wd rd rs1 rs2
33
+ *
58
&r2nfvm vm rd rs1 nf
34
+ * This program is free software; you can redistribute it and/or modify it
59
&rnfvm vm rd rs1 rs2 nf
35
+ * under the terms and conditions of the GNU General Public License,
60
@@ -XXX,XX +XXX,XX @@
36
+ * version 2 or later, as published by the Free Software Foundation.
61
@r2 ....... ..... ..... ... ..... ....... %rs1 %rd
37
+ *
62
@r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd
38
+ * This program is distributed in the hope it will be useful, but WITHOUT
63
@r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd
39
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
64
+@r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd
40
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
65
@r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... &rwdvm %rs2 %rs1 %rd
41
+ * more details.
66
@r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd
42
+ *
67
43
+ * You should have received a copy of the GNU General Public License along with
68
@@ -XXX,XX +XXX,XX @@ vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm
44
+ * this program. If not, see <http://www.gnu.org/licenses/>.
69
vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm
45
+ */
70
46
+
71
# *** new major opcode OP-V ***
47
+#ifndef TARGET_RISCV_VECTOR_INTERNALS_H
72
+vadd_vv 000000 . ..... ..... 000 ..... 1010111 @r_vm
48
+#define TARGET_RISCV_VECTOR_INTERNALS_H
73
+vadd_vx 000000 . ..... ..... 100 ..... 1010111 @r_vm
49
+
74
+vadd_vi 000000 . ..... ..... 011 ..... 1010111 @r_vm
50
+#include "qemu/osdep.h"
75
+vsub_vv 000010 . ..... ..... 000 ..... 1010111 @r_vm
51
+#include "qemu/bitops.h"
76
+vsub_vx 000010 . ..... ..... 100 ..... 1010111 @r_vm
52
+#include "cpu.h"
77
+vrsub_vx 000011 . ..... ..... 100 ..... 1010111 @r_vm
53
+#include "tcg/tcg-gvec-desc.h"
78
+vrsub_vi 000011 . ..... ..... 011 ..... 1010111 @r_vm
54
+#include "internals.h"
79
+
55
+
80
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
56
+static inline uint32_t vext_nf(uint32_t desc)
81
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
57
+{
82
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
58
+ return FIELD_EX32(simd_data(desc), VDATA, NF);
83
index XXXXXXX..XXXXXXX 100644
59
+}
84
--- a/target/riscv/insn_trans/trans_rvv.inc.c
60
+
85
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
61
+/*
86
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_TRANS(vamomaxd_v, 15, rwdvm, amo_op, amo_check)
62
+ * Note that vector data is stored in host-endian 64-bit chunks,
87
GEN_VEXT_TRANS(vamominud_v, 16, rwdvm, amo_op, amo_check)
63
+ * so addressing units smaller than that needs a host-endian fixup.
88
GEN_VEXT_TRANS(vamomaxud_v, 17, rwdvm, amo_op, amo_check)
64
+ */
89
#endif
65
+#if HOST_BIG_ENDIAN
90
+
66
+#define H1(x) ((x) ^ 7)
91
+/*
67
+#define H1_2(x) ((x) ^ 6)
92
+ *** Vector Integer Arithmetic Instructions
68
+#define H1_4(x) ((x) ^ 4)
93
+ */
69
+#define H2(x) ((x) ^ 3)
94
+#define MAXSZ(s) (s->vlen >> (3 - s->lmul))
70
+#define H4(x) ((x) ^ 1)
95
+
71
+#define H8(x) ((x))
96
+static bool opivv_check(DisasContext *s, arg_rmrr *a)
72
+#else
97
+{
73
+#define H1(x) (x)
98
+ return (vext_check_isa_ill(s) &&
74
+#define H1_2(x) (x)
99
+ vext_check_overlap_mask(s, a->rd, a->vm, false) &&
75
+#define H1_4(x) (x)
100
+ vext_check_reg(s, a->rd, false) &&
76
+#define H2(x) (x)
101
+ vext_check_reg(s, a->rs2, false) &&
77
+#define H4(x) (x)
102
+ vext_check_reg(s, a->rs1, false));
78
+#define H8(x) (x)
103
+}
79
+#endif
104
+
80
+
105
+typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
81
+/*
106
+ uint32_t, uint32_t, uint32_t);
82
+ * Encode LMUL to lmul as following:
107
+
83
+ * LMUL vlmul lmul
108
+static inline bool
84
+ * 1 000 0
109
+do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn,
85
+ * 2 001 1
110
+ gen_helper_gvec_4_ptr *fn)
86
+ * 4 010 2
111
+{
87
+ * 8 011 3
112
+ TCGLabel *over = gen_new_label();
88
+ * - 100 -
113
+ if (!opivv_check(s, a)) {
89
+ * 1/8 101 -3
114
+ return false;
90
+ * 1/4 110 -2
115
+ }
91
+ * 1/2 111 -1
116
+
92
+ */
117
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
93
+static inline int32_t vext_lmul(uint32_t desc)
118
+
94
+{
119
+ if (a->vm && s->vl_eq_vlmax) {
95
+ return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
120
+ gvec_fn(s->sew, vreg_ofs(s, a->rd),
96
+}
121
+ vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1),
97
+
122
+ MAXSZ(s), MAXSZ(s));
98
+static inline uint32_t vext_vm(uint32_t desc)
123
+ } else {
99
+{
124
+ uint32_t data = 0;
100
+ return FIELD_EX32(simd_data(desc), VDATA, VM);
125
+
101
+}
126
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
102
+
127
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
103
+static inline uint32_t vext_vma(uint32_t desc)
128
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
104
+{
129
+ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
105
+ return FIELD_EX32(simd_data(desc), VDATA, VMA);
130
+ vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
106
+}
131
+ cpu_env, 0, s->vlen / 8, data, fn);
107
+
132
+ }
108
+static inline uint32_t vext_vta(uint32_t desc)
133
+ gen_set_label(over);
109
+{
134
+ return true;
110
+ return FIELD_EX32(simd_data(desc), VDATA, VTA);
135
+}
111
+}
136
+
112
+
137
+/* OPIVV with GVEC IR */
113
+static inline uint32_t vext_vta_all_1s(uint32_t desc)
138
+#define GEN_OPIVV_GVEC_TRANS(NAME, SUF) \
114
+{
139
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
115
+ return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S);
140
+{ \
116
+}
141
+ static gen_helper_gvec_4_ptr * const fns[4] = { \
117
+
142
+ gen_helper_##NAME##_b, gen_helper_##NAME##_h, \
118
+/*
143
+ gen_helper_##NAME##_w, gen_helper_##NAME##_d, \
119
+ * Earlier designs (pre-0.9) had a varying number of bits
144
+ }; \
120
+ * per mask value (MLEN). In the 0.9 design, MLEN=1.
145
+ return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \
121
+ * (Section 4.5)
146
+}
122
+ */
147
+
123
+static inline int vext_elem_mask(void *v0, int index)
148
+GEN_OPIVV_GVEC_TRANS(vadd_vv, add)
124
+{
149
+GEN_OPIVV_GVEC_TRANS(vsub_vv, sub)
125
+ int idx = index / 64;
150
+
126
+ int pos = index % 64;
151
+typedef void gen_helper_opivx(TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr,
127
+ return (((uint64_t *)v0)[idx] >> pos) & 1;
152
+ TCGv_env, TCGv_i32);
128
+}
153
+
129
+
154
+static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm,
130
+/*
155
+ gen_helper_opivx *fn, DisasContext *s)
131
+ * Get number of total elements, including prestart, body and tail elements.
156
+{
132
+ * Note that when LMUL < 1, the tail includes the elements past VLMAX that
157
+ TCGv_ptr dest, src2, mask;
133
+ * are held in the same vector register.
158
+ TCGv src1;
134
+ */
159
+ TCGv_i32 desc;
135
+static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
160
+ uint32_t data = 0;
136
+ uint32_t esz)
161
+
137
+{
162
+ TCGLabel *over = gen_new_label();
138
+ uint32_t vlenb = simd_maxsz(desc);
163
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
139
+ uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
164
+
140
+ int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 :
165
+ dest = tcg_temp_new_ptr();
141
+ ctzl(esz) - ctzl(sew) + vext_lmul(desc);
166
+ mask = tcg_temp_new_ptr();
142
+ return (vlenb << emul) / esz;
167
+ src2 = tcg_temp_new_ptr();
143
+}
168
+ src1 = tcg_temp_new();
144
+
169
+ gen_get_gpr(src1, rs1);
145
+/* set agnostic elements to 1s */
170
+
146
+void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
171
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
147
+ uint32_t tot);
172
+ data = FIELD_DP32(data, VDATA, VM, vm);
173
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
174
+ desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
175
+
176
+ tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
177
+ tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2));
178
+ tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
179
+
180
+ fn(dest, mask, src1, src2, cpu_env, desc);
181
+
182
+ tcg_temp_free_ptr(dest);
183
+ tcg_temp_free_ptr(mask);
184
+ tcg_temp_free_ptr(src2);
185
+ tcg_temp_free(src1);
186
+ tcg_temp_free_i32(desc);
187
+ gen_set_label(over);
188
+ return true;
189
+}
190
+
191
+static bool opivx_check(DisasContext *s, arg_rmrr *a)
192
+{
193
+ return (vext_check_isa_ill(s) &&
194
+ vext_check_overlap_mask(s, a->rd, a->vm, false) &&
195
+ vext_check_reg(s, a->rd, false) &&
196
+ vext_check_reg(s, a->rs2, false));
197
+}
198
+
199
+typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, TCGv_i64,
200
+ uint32_t, uint32_t);
201
+
202
+static inline bool
203
+do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn,
204
+ gen_helper_opivx *fn)
205
+{
206
+ if (!opivx_check(s, a)) {
207
+ return false;
208
+ }
209
+
210
+ if (a->vm && s->vl_eq_vlmax) {
211
+ TCGv_i64 src1 = tcg_temp_new_i64();
212
+ TCGv tmp = tcg_temp_new();
213
+
214
+ gen_get_gpr(tmp, a->rs1);
215
+ tcg_gen_ext_tl_i64(src1, tmp);
216
+ gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
217
+ src1, MAXSZ(s), MAXSZ(s));
218
+
219
+ tcg_temp_free_i64(src1);
220
+ tcg_temp_free(tmp);
221
+ return true;
222
+ }
223
+ return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
224
+}
225
+
226
+/* OPIVX with GVEC IR */
227
+#define GEN_OPIVX_GVEC_TRANS(NAME, SUF) \
228
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
229
+{ \
230
+ static gen_helper_opivx * const fns[4] = { \
231
+ gen_helper_##NAME##_b, gen_helper_##NAME##_h, \
232
+ gen_helper_##NAME##_w, gen_helper_##NAME##_d, \
233
+ }; \
234
+ return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \
235
+}
236
+
237
+GEN_OPIVX_GVEC_TRANS(vadd_vx, adds)
238
+GEN_OPIVX_GVEC_TRANS(vsub_vx, subs)
239
+
240
+static void gen_vec_rsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
241
+{
242
+ tcg_gen_vec_sub8_i64(d, b, a);
243
+}
244
+
245
+static void gen_vec_rsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
246
+{
247
+ tcg_gen_vec_sub8_i64(d, b, a);
248
+}
249
+
250
+static void gen_rsub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
251
+{
252
+ tcg_gen_sub_i32(ret, arg2, arg1);
253
+}
254
+
255
+static void gen_rsub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
256
+{
257
+ tcg_gen_sub_i64(ret, arg2, arg1);
258
+}
259
+
260
+static void gen_rsub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
261
+{
262
+ tcg_gen_sub_vec(vece, r, b, a);
263
+}
264
+
265
+static void tcg_gen_gvec_rsubs(unsigned vece, uint32_t dofs, uint32_t aofs,
266
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
267
+{
268
+ static const GVecGen2s rsub_op[4] = {
269
+ { .fni8 = gen_vec_rsub8_i64,
270
+ .fniv = gen_rsub_vec,
271
+ .fno = gen_helper_vec_rsubs8,
272
+ .vece = MO_8 },
273
+ { .fni8 = gen_vec_rsub16_i64,
274
+ .fniv = gen_rsub_vec,
275
+ .fno = gen_helper_vec_rsubs16,
276
+ .vece = MO_16 },
277
+ { .fni4 = gen_rsub_i32,
278
+ .fniv = gen_rsub_vec,
279
+ .fno = gen_helper_vec_rsubs32,
280
+ .vece = MO_32 },
281
+ { .fni8 = gen_rsub_i64,
282
+ .fniv = gen_rsub_vec,
283
+ .fno = gen_helper_vec_rsubs64,
284
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
285
+ .vece = MO_64 },
286
+ };
287
+
288
+ tcg_debug_assert(vece <= MO_64);
289
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]);
290
+}
291
+
292
+GEN_OPIVX_GVEC_TRANS(vrsub_vx, rsubs)
293
+
294
+static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm,
295
+ gen_helper_opivx *fn, DisasContext *s, int zx)
296
+{
297
+ TCGv_ptr dest, src2, mask;
298
+ TCGv src1;
299
+ TCGv_i32 desc;
300
+ uint32_t data = 0;
301
+
302
+ TCGLabel *over = gen_new_label();
303
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
304
+
305
+ dest = tcg_temp_new_ptr();
306
+ mask = tcg_temp_new_ptr();
307
+ src2 = tcg_temp_new_ptr();
308
+ if (zx) {
309
+ src1 = tcg_const_tl(imm);
310
+ } else {
311
+ src1 = tcg_const_tl(sextract64(imm, 0, 5));
312
+ }
313
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
314
+ data = FIELD_DP32(data, VDATA, VM, vm);
315
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
316
+ desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
317
+
318
+ tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
319
+ tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2));
320
+ tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
321
+
322
+ fn(dest, mask, src1, src2, cpu_env, desc);
323
+
324
+ tcg_temp_free_ptr(dest);
325
+ tcg_temp_free_ptr(mask);
326
+ tcg_temp_free_ptr(src2);
327
+ tcg_temp_free(src1);
328
+ tcg_temp_free_i32(desc);
329
+ gen_set_label(over);
330
+ return true;
331
+}
332
+
333
+typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
334
+ uint32_t, uint32_t);
335
+
336
+static inline bool
337
+do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn,
338
+ gen_helper_opivx *fn, int zx)
339
+{
340
+ if (!opivx_check(s, a)) {
341
+ return false;
342
+ }
343
+
344
+ if (a->vm && s->vl_eq_vlmax) {
345
+ if (zx) {
346
+ gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
347
+ extract64(a->rs1, 0, 5), MAXSZ(s), MAXSZ(s));
348
+ } else {
349
+ gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
350
+ sextract64(a->rs1, 0, 5), MAXSZ(s), MAXSZ(s));
351
+ }
352
+ } else {
353
+ return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s, zx);
354
+ }
355
+ return true;
356
+}
357
+
358
+/* OPIVI with GVEC IR */
359
+#define GEN_OPIVI_GVEC_TRANS(NAME, ZX, OPIVX, SUF) \
360
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
361
+{ \
362
+ static gen_helper_opivx * const fns[4] = { \
363
+ gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \
364
+ gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \
365
+ }; \
366
+ return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, \
367
+ fns[s->sew], ZX); \
368
+}
369
+
370
+GEN_OPIVI_GVEC_TRANS(vadd_vi, 0, vadd_vx, addi)
371
+
372
+static void tcg_gen_gvec_rsubi(unsigned vece, uint32_t dofs, uint32_t aofs,
373
+ int64_t c, uint32_t oprsz, uint32_t maxsz)
374
+{
375
+ TCGv_i64 tmp = tcg_const_i64(c);
376
+ tcg_gen_gvec_rsubs(vece, dofs, aofs, tmp, oprsz, maxsz);
377
+ tcg_temp_free_i64(tmp);
378
+}
379
+
380
+GEN_OPIVI_GVEC_TRANS(vrsub_vi, 0, vrsub_vx, rsubi)
381
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
382
index XXXXXXX..XXXXXXX 100644
383
--- a/target/riscv/vector_helper.c
384
+++ b/target/riscv/vector_helper.c
385
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_AMO(vamominw_v_w, int32_t, int32_t, idx_w, clearl)
386
GEN_VEXT_AMO(vamomaxw_v_w, int32_t, int32_t, idx_w, clearl)
387
GEN_VEXT_AMO(vamominuw_v_w, uint32_t, uint32_t, idx_w, clearl)
388
GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl)
389
+
390
+/*
391
+ *** Vector Integer Arithmetic Instructions
392
+ */
393
+
148
+
394
+/* expand macro args before macro */
149
+/* expand macro args before macro */
395
+#define RVVCALL(macro, ...) macro(__VA_ARGS__)
150
+#define RVVCALL(macro, ...) macro(__VA_ARGS__)
396
+
151
+
397
+/* (TD, T1, T2, TX1, TX2) */
152
+/* (TD, T1, T2, TX1, TX2) */
398
+#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
153
+#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
399
+#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
154
+#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
400
+#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
155
+#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
401
+#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
156
+#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
402
+
157
+
403
+/* operation of two vector elements */
158
+/* operation of two vector elements */
404
+typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
159
+typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
405
+
160
+
406
+#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
161
+#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
407
+static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
162
+static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
408
+{ \
163
+{ \
409
+ TX1 s1 = *((T1 *)vs1 + HS1(i)); \
164
+ TX1 s1 = *((T1 *)vs1 + HS1(i)); \
410
+ TX2 s2 = *((T2 *)vs2 + HS2(i)); \
165
+ TX2 s2 = *((T2 *)vs2 + HS2(i)); \
411
+ *((TD *)vd + HD(i)) = OP(s2, s1); \
166
+ *((TD *)vd + HD(i)) = OP(s2, s1); \
412
+}
167
+}
413
+#define DO_SUB(N, M) (N - M)
168
+
414
+#define DO_RSUB(N, M) (M - N)
169
+void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
415
+
170
+ CPURISCVState *env, uint32_t desc,
416
+RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
171
+ opivv2_fn *fn, uint32_t esz);
417
+RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
418
+RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
419
+RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
420
+RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
421
+RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
422
+RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
423
+RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
424
+
425
+static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
426
+ CPURISCVState *env, uint32_t desc,
427
+ uint32_t esz, uint32_t dsz,
428
+ opivv2_fn *fn, clear_fn *clearfn)
429
+{
430
+ uint32_t vlmax = vext_maxsz(desc) / esz;
431
+ uint32_t mlen = vext_mlen(desc);
432
+ uint32_t vm = vext_vm(desc);
433
+ uint32_t vl = env->vl;
434
+ uint32_t i;
435
+
436
+ for (i = 0; i < vl; i++) {
437
+ if (!vm && !vext_elem_mask(v0, mlen, i)) {
438
+ continue;
439
+ }
440
+ fn(vd, vs1, vs2, i);
441
+ }
442
+ clearfn(vd, vl, vl * dsz, vlmax * dsz);
443
+}
444
+
172
+
445
+/* generate the helpers for OPIVV */
173
+/* generate the helpers for OPIVV */
446
+#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN) \
174
+#define GEN_VEXT_VV(NAME, ESZ) \
447
+void HELPER(NAME)(void *vd, void *v0, void *vs1, \
175
+void HELPER(NAME)(void *vd, void *v0, void *vs1, \
448
+ void *vs2, CPURISCVState *env, \
176
+ void *vs2, CPURISCVState *env, \
449
+ uint32_t desc) \
177
+ uint32_t desc) \
450
+{ \
178
+{ \
451
+ do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \
179
+ do_vext_vv(vd, v0, vs1, vs2, env, desc, \
452
+ do_##NAME, CLEAR_FN); \
180
+ do_##NAME, ESZ); \
453
+}
181
+}
454
+
455
+GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb)
456
+GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh)
457
+GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl)
458
+GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq)
459
+GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb)
460
+GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh)
461
+GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl)
462
+GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq)
463
+
182
+
464
+typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
183
+typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
465
+
184
+
466
+/*
185
+/*
467
+ * (T1)s1 gives the real operator type.
186
+ * (T1)s1 gives the real operator type.
...
...
472
+{ \
191
+{ \
473
+ TX2 s2 = *((T2 *)vs2 + HS2(i)); \
192
+ TX2 s2 = *((T2 *)vs2 + HS2(i)); \
474
+ *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \
193
+ *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \
475
+}
194
+}
476
+
195
+
477
+RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
196
+void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
478
+RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
197
+ CPURISCVState *env, uint32_t desc,
479
+RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
198
+ opivx2_fn fn, uint32_t esz);
480
+RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
199
+
481
+RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
200
+/* generate the helpers for OPIVX */
482
+RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
201
+#define GEN_VEXT_VX(NAME, ESZ) \
483
+RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
202
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
484
+RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
203
+ void *vs2, CPURISCVState *env, \
485
+RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
204
+ uint32_t desc) \
486
+RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
205
+{ \
487
+RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
206
+ do_vext_vx(vd, v0, s1, vs2, env, desc, \
488
+RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
207
+ do_##NAME, ESZ); \
489
+
208
+}
490
+static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
209
+
491
+ CPURISCVState *env, uint32_t desc,
210
+#endif /* TARGET_RISCV_VECTOR_INTERNALS_H */
492
+ uint32_t esz, uint32_t dsz,
211
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
493
+ opivx2_fn fn, clear_fn *clearfn)
212
index XXXXXXX..XXXXXXX 100644
494
+{
213
--- a/target/riscv/vector_helper.c
495
+ uint32_t vlmax = vext_maxsz(desc) / esz;
214
+++ b/target/riscv/vector_helper.c
496
+ uint32_t mlen = vext_mlen(desc);
215
@@ -XXX,XX +XXX,XX @@
216
#include "fpu/softfloat.h"
217
#include "tcg/tcg-gvec-desc.h"
218
#include "internals.h"
219
+#include "vector_internals.h"
220
#include <math.h>
221
222
target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
223
@@ -XXX,XX +XXX,XX @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
224
return vl;
225
}
226
227
-/*
228
- * Note that vector data is stored in host-endian 64-bit chunks,
229
- * so addressing units smaller than that needs a host-endian fixup.
230
- */
231
-#if HOST_BIG_ENDIAN
232
-#define H1(x) ((x) ^ 7)
233
-#define H1_2(x) ((x) ^ 6)
234
-#define H1_4(x) ((x) ^ 4)
235
-#define H2(x) ((x) ^ 3)
236
-#define H4(x) ((x) ^ 1)
237
-#define H8(x) ((x))
238
-#else
239
-#define H1(x) (x)
240
-#define H1_2(x) (x)
241
-#define H1_4(x) (x)
242
-#define H2(x) (x)
243
-#define H4(x) (x)
244
-#define H8(x) (x)
245
-#endif
246
-
247
-static inline uint32_t vext_nf(uint32_t desc)
248
-{
249
- return FIELD_EX32(simd_data(desc), VDATA, NF);
250
-}
251
-
252
-static inline uint32_t vext_vm(uint32_t desc)
253
-{
254
- return FIELD_EX32(simd_data(desc), VDATA, VM);
255
-}
256
-
257
-/*
258
- * Encode LMUL to lmul as following:
259
- * LMUL vlmul lmul
260
- * 1 000 0
261
- * 2 001 1
262
- * 4 010 2
263
- * 8 011 3
264
- * - 100 -
265
- * 1/8 101 -3
266
- * 1/4 110 -2
267
- * 1/2 111 -1
268
- */
269
-static inline int32_t vext_lmul(uint32_t desc)
270
-{
271
- return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
272
-}
273
-
274
-static inline uint32_t vext_vta(uint32_t desc)
275
-{
276
- return FIELD_EX32(simd_data(desc), VDATA, VTA);
277
-}
278
-
279
-static inline uint32_t vext_vma(uint32_t desc)
280
-{
281
- return FIELD_EX32(simd_data(desc), VDATA, VMA);
282
-}
283
-
284
-static inline uint32_t vext_vta_all_1s(uint32_t desc)
285
-{
286
- return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S);
287
-}
288
-
289
/*
290
* Get the maximum number of elements can be operated.
291
*
292
@@ -XXX,XX +XXX,XX @@ static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
293
return scale < 0 ? vlenb >> -scale : vlenb << scale;
294
}
295
296
-/*
297
- * Get number of total elements, including prestart, body and tail elements.
298
- * Note that when LMUL < 1, the tail includes the elements past VLMAX that
299
- * are held in the same vector register.
300
- */
301
-static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
302
- uint32_t esz)
303
-{
304
- uint32_t vlenb = simd_maxsz(desc);
305
- uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
306
- int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 :
307
- ctzl(esz) - ctzl(sew) + vext_lmul(desc);
308
- return (vlenb << emul) / esz;
309
-}
310
-
311
static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
312
{
313
return (addr & ~env->cur_pmmask) | env->cur_pmbase;
314
@@ -XXX,XX +XXX,XX @@ static void probe_pages(CPURISCVState *env, target_ulong addr,
315
}
316
}
317
318
-/* set agnostic elements to 1s */
319
-static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
320
- uint32_t tot)
321
-{
322
- if (is_agnostic == 0) {
323
- /* policy undisturbed */
324
- return;
325
- }
326
- if (tot - cnt == 0) {
327
- return;
328
- }
329
- memset(base + cnt, -1, tot - cnt);
330
-}
331
-
332
static inline void vext_set_elem_mask(void *v0, int index,
333
uint8_t value)
334
{
335
@@ -XXX,XX +XXX,XX @@ static inline void vext_set_elem_mask(void *v0, int index,
336
((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
337
}
338
339
-/*
340
- * Earlier designs (pre-0.9) had a varying number of bits
341
- * per mask value (MLEN). In the 0.9 design, MLEN=1.
342
- * (Section 4.5)
343
- */
344
-static inline int vext_elem_mask(void *v0, int index)
345
-{
346
- int idx = index / 64;
347
- int pos = index % 64;
348
- return (((uint64_t *)v0)[idx] >> pos) & 1;
349
-}
350
-
351
/* elements operations for load and store */
352
typedef void vext_ldst_elem_fn(CPURISCVState *env, abi_ptr addr,
353
uint32_t idx, void *vd, uintptr_t retaddr);
354
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
355
* Vector Integer Arithmetic Instructions
356
*/
357
358
-/* expand macro args before macro */
359
-#define RVVCALL(macro, ...) macro(__VA_ARGS__)
360
-
361
/* (TD, T1, T2, TX1, TX2) */
362
#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
363
#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
364
#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
365
#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
366
-#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
367
-#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
368
-#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
369
-#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
370
#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
371
#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
372
#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
373
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
374
#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
375
#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
376
377
-/* operation of two vector elements */
378
-typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
379
-
380
-#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
381
-static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
382
-{ \
383
- TX1 s1 = *((T1 *)vs1 + HS1(i)); \
384
- TX2 s2 = *((T2 *)vs2 + HS2(i)); \
385
- *((TD *)vd + HD(i)) = OP(s2, s1); \
386
-}
387
#define DO_SUB(N, M) (N - M)
388
#define DO_RSUB(N, M) (M - N)
389
390
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
391
RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
392
RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
393
394
-static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
395
- CPURISCVState *env, uint32_t desc,
396
- opivv2_fn *fn, uint32_t esz)
397
-{
398
- uint32_t vm = vext_vm(desc);
399
- uint32_t vl = env->vl;
400
- uint32_t total_elems = vext_get_total_elems(env, desc, esz);
401
- uint32_t vta = vext_vta(desc);
402
- uint32_t vma = vext_vma(desc);
403
- uint32_t i;
404
-
405
- for (i = env->vstart; i < vl; i++) {
406
- if (!vm && !vext_elem_mask(v0, i)) {
407
- /* set masked-off elements to 1s */
408
- vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
409
- continue;
410
- }
411
- fn(vd, vs1, vs2, i);
412
- }
413
- env->vstart = 0;
414
- /* set tail elements to 1s */
415
- vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
416
-}
417
-
418
-/* generate the helpers for OPIVV */
419
-#define GEN_VEXT_VV(NAME, ESZ) \
420
-void HELPER(NAME)(void *vd, void *v0, void *vs1, \
421
- void *vs2, CPURISCVState *env, \
422
- uint32_t desc) \
423
-{ \
424
- do_vext_vv(vd, v0, vs1, vs2, env, desc, \
425
- do_##NAME, ESZ); \
426
-}
427
-
428
GEN_VEXT_VV(vadd_vv_b, 1)
429
GEN_VEXT_VV(vadd_vv_h, 2)
430
GEN_VEXT_VV(vadd_vv_w, 4)
431
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VV(vsub_vv_h, 2)
432
GEN_VEXT_VV(vsub_vv_w, 4)
433
GEN_VEXT_VV(vsub_vv_d, 8)
434
435
-typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
436
-
437
-/*
438
- * (T1)s1 gives the real operator type.
439
- * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
440
- */
441
-#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
442
-static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
443
-{ \
444
- TX2 s2 = *((T2 *)vs2 + HS2(i)); \
445
- *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \
446
-}
447
448
RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
449
RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
450
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
451
RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
452
RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
453
454
-static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
455
- CPURISCVState *env, uint32_t desc,
456
- opivx2_fn fn, uint32_t esz)
457
-{
458
- uint32_t vm = vext_vm(desc);
459
- uint32_t vl = env->vl;
460
- uint32_t total_elems = vext_get_total_elems(env, desc, esz);
461
- uint32_t vta = vext_vta(desc);
462
- uint32_t vma = vext_vma(desc);
463
- uint32_t i;
464
-
465
- for (i = env->vstart; i < vl; i++) {
466
- if (!vm && !vext_elem_mask(v0, i)) {
467
- /* set masked-off elements to 1s */
468
- vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
469
- continue;
470
- }
471
- fn(vd, s1, vs2, i);
472
- }
473
- env->vstart = 0;
474
- /* set tail elements to 1s */
475
- vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
476
-}
477
-
478
-/* generate the helpers for OPIVX */
479
-#define GEN_VEXT_VX(NAME, ESZ) \
480
-void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
481
- void *vs2, CPURISCVState *env, \
482
- uint32_t desc) \
483
-{ \
484
- do_vext_vx(vd, v0, s1, vs2, env, desc, \
485
- do_##NAME, ESZ); \
486
-}
487
-
488
GEN_VEXT_VX(vadd_vx_b, 1)
489
GEN_VEXT_VX(vadd_vx_h, 2)
490
GEN_VEXT_VX(vadd_vx_w, 4)
491
diff --git a/target/riscv/vector_internals.c b/target/riscv/vector_internals.c
492
new file mode 100644
493
index XXXXXXX..XXXXXXX
494
--- /dev/null
495
+++ b/target/riscv/vector_internals.c
496
@@ -XXX,XX +XXX,XX @@
497
+/*
498
+ * RISC-V Vector Extension Internals
499
+ *
500
+ * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
501
+ *
502
+ * This program is free software; you can redistribute it and/or modify it
503
+ * under the terms and conditions of the GNU General Public License,
504
+ * version 2 or later, as published by the Free Software Foundation.
505
+ *
506
+ * This program is distributed in the hope it will be useful, but WITHOUT
507
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
508
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
509
+ * more details.
510
+ *
511
+ * You should have received a copy of the GNU General Public License along with
512
+ * this program. If not, see <http://www.gnu.org/licenses/>.
513
+ */
514
+
515
+#include "vector_internals.h"
516
+
517
+/* set agnostic elements to 1s */
518
+void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
519
+ uint32_t tot)
520
+{
521
+ if (is_agnostic == 0) {
522
+ /* policy undisturbed */
523
+ return;
524
+ }
525
+ if (tot - cnt == 0) {
526
+ return ;
527
+ }
528
+ memset(base + cnt, -1, tot - cnt);
529
+}
530
+
531
+void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
532
+ CPURISCVState *env, uint32_t desc,
533
+ opivv2_fn *fn, uint32_t esz)
534
+{
497
+ uint32_t vm = vext_vm(desc);
535
+ uint32_t vm = vext_vm(desc);
498
+ uint32_t vl = env->vl;
536
+ uint32_t vl = env->vl;
537
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
538
+ uint32_t vta = vext_vta(desc);
539
+ uint32_t vma = vext_vma(desc);
499
+ uint32_t i;
540
+ uint32_t i;
500
+
541
+
501
+ for (i = 0; i < vl; i++) {
542
+ for (i = env->vstart; i < vl; i++) {
502
+ if (!vm && !vext_elem_mask(v0, mlen, i)) {
543
+ if (!vm && !vext_elem_mask(v0, i)) {
544
+ /* set masked-off elements to 1s */
545
+ vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
546
+ continue;
547
+ }
548
+ fn(vd, vs1, vs2, i);
549
+ }
550
+ env->vstart = 0;
551
+ /* set tail elements to 1s */
552
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
553
+}
554
+
555
+void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
556
+ CPURISCVState *env, uint32_t desc,
557
+ opivx2_fn fn, uint32_t esz)
558
+{
559
+ uint32_t vm = vext_vm(desc);
560
+ uint32_t vl = env->vl;
561
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
562
+ uint32_t vta = vext_vta(desc);
563
+ uint32_t vma = vext_vma(desc);
564
+ uint32_t i;
565
+
566
+ for (i = env->vstart; i < vl; i++) {
567
+ if (!vm && !vext_elem_mask(v0, i)) {
568
+ /* set masked-off elements to 1s */
569
+ vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
503
+ continue;
570
+ continue;
504
+ }
571
+ }
505
+ fn(vd, s1, vs2, i);
572
+ fn(vd, s1, vs2, i);
506
+ }
573
+ }
507
+ clearfn(vd, vl, vl * dsz, vlmax * dsz);
574
+ env->vstart = 0;
508
+}
575
+ /* set tail elements to 1s */
509
+
576
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
510
+/* generate the helpers for OPIVX */
577
+}
511
+#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN) \
578
diff --git a/target/riscv/meson.build b/target/riscv/meson.build
512
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
579
index XXXXXXX..XXXXXXX 100644
513
+ void *vs2, CPURISCVState *env, \
580
--- a/target/riscv/meson.build
514
+ uint32_t desc) \
581
+++ b/target/riscv/meson.build
515
+{ \
582
@@ -XXX,XX +XXX,XX @@ riscv_ss.add(files(
516
+ do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \
583
'gdbstub.c',
517
+ do_##NAME, CLEAR_FN); \
584
'op_helper.c',
518
+}
585
'vector_helper.c',
519
+
586
+ 'vector_internals.c',
520
+GEN_VEXT_VX(vadd_vx_b, 1, 1, clearb)
587
'bitmanip_helper.c',
521
+GEN_VEXT_VX(vadd_vx_h, 2, 2, clearh)
588
'translate.c',
522
+GEN_VEXT_VX(vadd_vx_w, 4, 4, clearl)
589
'm128_helper.c',
523
+GEN_VEXT_VX(vadd_vx_d, 8, 8, clearq)
524
+GEN_VEXT_VX(vsub_vx_b, 1, 1, clearb)
525
+GEN_VEXT_VX(vsub_vx_h, 2, 2, clearh)
526
+GEN_VEXT_VX(vsub_vx_w, 4, 4, clearl)
527
+GEN_VEXT_VX(vsub_vx_d, 8, 8, clearq)
528
+GEN_VEXT_VX(vrsub_vx_b, 1, 1, clearb)
529
+GEN_VEXT_VX(vrsub_vx_h, 2, 2, clearh)
530
+GEN_VEXT_VX(vrsub_vx_w, 4, 4, clearl)
531
+GEN_VEXT_VX(vrsub_vx_d, 8, 8, clearq)
532
+
533
+void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
534
+{
535
+ intptr_t oprsz = simd_oprsz(desc);
536
+ intptr_t i;
537
+
538
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
539
+ *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
540
+ }
541
+}
542
+
543
+void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
544
+{
545
+ intptr_t oprsz = simd_oprsz(desc);
546
+ intptr_t i;
547
+
548
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
549
+ *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
550
+ }
551
+}
552
+
553
+void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
554
+{
555
+ intptr_t oprsz = simd_oprsz(desc);
556
+ intptr_t i;
557
+
558
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
559
+ *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
560
+ }
561
+}
562
+
563
+void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
564
+{
565
+ intptr_t oprsz = simd_oprsz(desc);
566
+ intptr_t i;
567
+
568
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
569
+ *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
570
+ }
571
+}
572
--
590
--
573
2.27.0
591
2.41.0
574
575
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
Refactor the non SEW-specific stuff out of `GEN_OPIVV_TRANS` into
4
function `opivv_trans` (similar to `opivi_trans`). `opivv_trans` will be
5
used in proceeding vector-crypto commits.
6
7
Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
9
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
6
Message-id: 20200623215920.2594-13-zhiwei_liu@c-sky.com
11
Signed-off-by: Max Chou <max.chou@sifive.com>
12
Message-ID: <20230711165917.2629866-3-max.chou@sifive.com>
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
13
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
14
---
9
target/riscv/helper.h | 33 ++++++
15
target/riscv/insn_trans/trans_rvv.c.inc | 62 +++++++++++++------------
10
target/riscv/insn32.decode | 11 ++
16
1 file changed, 32 insertions(+), 30 deletions(-)
11
target/riscv/insn_trans/trans_rvv.inc.c | 113 +++++++++++++++++++
12
target/riscv/vector_helper.c | 137 ++++++++++++++++++++++++
13
4 files changed, 294 insertions(+)
14
17
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
18
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
16
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
20
--- a/target/riscv/insn_trans/trans_rvv.c.inc
18
+++ b/target/riscv/helper.h
21
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vwadd_wx_w, void, ptr, ptr, tl, ptr, env, i32)
22
@@ -XXX,XX +XXX,XX @@ GEN_OPIWX_WIDEN_TRANS(vwadd_wx)
20
DEF_HELPER_6(vwsub_wx_b, void, ptr, ptr, tl, ptr, env, i32)
21
DEF_HELPER_6(vwsub_wx_h, void, ptr, ptr, tl, ptr, env, i32)
22
DEF_HELPER_6(vwsub_wx_w, void, ptr, ptr, tl, ptr, env, i32)
23
+
24
+DEF_HELPER_6(vadc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32)
25
+DEF_HELPER_6(vadc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vadc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vadc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vsbc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vsbc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vsbc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_6(vsbc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_6(vmadc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32)
33
+DEF_HELPER_6(vmadc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32)
34
+DEF_HELPER_6(vmadc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32)
35
+DEF_HELPER_6(vmadc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32)
36
+DEF_HELPER_6(vmsbc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32)
37
+DEF_HELPER_6(vmsbc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32)
38
+DEF_HELPER_6(vmsbc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32)
39
+DEF_HELPER_6(vmsbc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32)
40
+DEF_HELPER_6(vadc_vxm_b, void, ptr, ptr, tl, ptr, env, i32)
41
+DEF_HELPER_6(vadc_vxm_h, void, ptr, ptr, tl, ptr, env, i32)
42
+DEF_HELPER_6(vadc_vxm_w, void, ptr, ptr, tl, ptr, env, i32)
43
+DEF_HELPER_6(vadc_vxm_d, void, ptr, ptr, tl, ptr, env, i32)
44
+DEF_HELPER_6(vsbc_vxm_b, void, ptr, ptr, tl, ptr, env, i32)
45
+DEF_HELPER_6(vsbc_vxm_h, void, ptr, ptr, tl, ptr, env, i32)
46
+DEF_HELPER_6(vsbc_vxm_w, void, ptr, ptr, tl, ptr, env, i32)
47
+DEF_HELPER_6(vsbc_vxm_d, void, ptr, ptr, tl, ptr, env, i32)
48
+DEF_HELPER_6(vmadc_vxm_b, void, ptr, ptr, tl, ptr, env, i32)
49
+DEF_HELPER_6(vmadc_vxm_h, void, ptr, ptr, tl, ptr, env, i32)
50
+DEF_HELPER_6(vmadc_vxm_w, void, ptr, ptr, tl, ptr, env, i32)
51
+DEF_HELPER_6(vmadc_vxm_d, void, ptr, ptr, tl, ptr, env, i32)
52
+DEF_HELPER_6(vmsbc_vxm_b, void, ptr, ptr, tl, ptr, env, i32)
53
+DEF_HELPER_6(vmsbc_vxm_h, void, ptr, ptr, tl, ptr, env, i32)
54
+DEF_HELPER_6(vmsbc_vxm_w, void, ptr, ptr, tl, ptr, env, i32)
55
+DEF_HELPER_6(vmsbc_vxm_d, void, ptr, ptr, tl, ptr, env, i32)
56
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
57
index XXXXXXX..XXXXXXX 100644
58
--- a/target/riscv/insn32.decode
59
+++ b/target/riscv/insn32.decode
60
@@ -XXX,XX +XXX,XX @@
61
@r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd
62
@r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd
63
@r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd
64
+@r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd
65
@r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... &rwdvm %rs2 %rs1 %rd
66
@r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd
67
68
@@ -XXX,XX +XXX,XX @@ vwsubu_wv 110110 . ..... ..... 010 ..... 1010111 @r_vm
69
vwsubu_wx 110110 . ..... ..... 110 ..... 1010111 @r_vm
70
vwsub_wv 110111 . ..... ..... 010 ..... 1010111 @r_vm
71
vwsub_wx 110111 . ..... ..... 110 ..... 1010111 @r_vm
72
+vadc_vvm 010000 1 ..... ..... 000 ..... 1010111 @r_vm_1
73
+vadc_vxm 010000 1 ..... ..... 100 ..... 1010111 @r_vm_1
74
+vadc_vim 010000 1 ..... ..... 011 ..... 1010111 @r_vm_1
75
+vmadc_vvm 010001 1 ..... ..... 000 ..... 1010111 @r_vm_1
76
+vmadc_vxm 010001 1 ..... ..... 100 ..... 1010111 @r_vm_1
77
+vmadc_vim 010001 1 ..... ..... 011 ..... 1010111 @r_vm_1
78
+vsbc_vvm 010010 1 ..... ..... 000 ..... 1010111 @r_vm_1
79
+vsbc_vxm 010010 1 ..... ..... 100 ..... 1010111 @r_vm_1
80
+vmsbc_vvm 010011 1 ..... ..... 000 ..... 1010111 @r_vm_1
81
+vmsbc_vxm 010011 1 ..... ..... 100 ..... 1010111 @r_vm_1
82
83
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
84
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
85
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
86
index XXXXXXX..XXXXXXX 100644
87
--- a/target/riscv/insn_trans/trans_rvv.inc.c
88
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
89
@@ -XXX,XX +XXX,XX @@ GEN_OPIWX_WIDEN_TRANS(vwaddu_wx)
90
GEN_OPIWX_WIDEN_TRANS(vwadd_wx)
91
GEN_OPIWX_WIDEN_TRANS(vwsubu_wx)
23
GEN_OPIWX_WIDEN_TRANS(vwsubu_wx)
92
GEN_OPIWX_WIDEN_TRANS(vwsub_wx)
24
GEN_OPIWX_WIDEN_TRANS(vwsub_wx)
25
26
+static bool opivv_trans(uint32_t vd, uint32_t vs1, uint32_t vs2, uint32_t vm,
27
+ gen_helper_gvec_4_ptr *fn, DisasContext *s)
28
+{
29
+ uint32_t data = 0;
30
+ TCGLabel *over = gen_new_label();
31
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
32
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
93
+
33
+
94
+/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
34
+ data = FIELD_DP32(data, VDATA, VM, vm);
95
+/* OPIVV without GVEC IR */
35
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
96
+#define GEN_OPIVV_TRANS(NAME, CHECK) \
36
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
97
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
37
+ data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);
98
+{ \
38
+ data = FIELD_DP32(data, VDATA, VMA, s->vma);
99
+ if (CHECK(s, a)) { \
39
+ tcg_gen_gvec_4_ptr(vreg_ofs(s, vd), vreg_ofs(s, 0), vreg_ofs(s, vs1),
100
+ uint32_t data = 0; \
40
+ vreg_ofs(s, vs2), cpu_env, s->cfg_ptr->vlen / 8,
101
+ static gen_helper_gvec_4_ptr * const fns[4] = { \
41
+ s->cfg_ptr->vlen / 8, data, fn);
102
+ gen_helper_##NAME##_b, gen_helper_##NAME##_h, \
42
+ mark_vs_dirty(s);
103
+ gen_helper_##NAME##_w, gen_helper_##NAME##_d, \
43
+ gen_set_label(over);
104
+ }; \
44
+ return true;
105
+ TCGLabel *over = gen_new_label(); \
106
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
107
+ \
108
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
109
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
110
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
111
+ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
112
+ vreg_ofs(s, a->rs1), \
113
+ vreg_ofs(s, a->rs2), cpu_env, 0, \
114
+ s->vlen / 8, data, fns[s->sew]); \
115
+ gen_set_label(over); \
116
+ return true; \
117
+ } \
118
+ return false; \
119
+}
45
+}
120
+
46
+
121
+/*
47
/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
122
+ * For vadc and vsbc, an illegal instruction exception is raised if the
48
/* OPIVV without GVEC IR */
123
+ * destination vector register is v0 and LMUL > 1. (Section 12.3)
49
-#define GEN_OPIVV_TRANS(NAME, CHECK) \
124
+ */
50
-static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
125
+static bool opivv_vadc_check(DisasContext *s, arg_rmrr *a)
51
-{ \
126
+{
52
- if (CHECK(s, a)) { \
127
+ return (vext_check_isa_ill(s) &&
53
- uint32_t data = 0; \
128
+ vext_check_reg(s, a->rd, false) &&
54
- static gen_helper_gvec_4_ptr * const fns[4] = { \
129
+ vext_check_reg(s, a->rs2, false) &&
55
- gen_helper_##NAME##_b, gen_helper_##NAME##_h, \
130
+ vext_check_reg(s, a->rs1, false) &&
56
- gen_helper_##NAME##_w, gen_helper_##NAME##_d, \
131
+ ((a->rd != 0) || (s->lmul == 0)));
57
- }; \
132
+}
58
- TCGLabel *over = gen_new_label(); \
133
+
59
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
134
+GEN_OPIVV_TRANS(vadc_vvm, opivv_vadc_check)
60
- tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
135
+GEN_OPIVV_TRANS(vsbc_vvm, opivv_vadc_check)
61
- \
136
+
62
- data = FIELD_DP32(data, VDATA, VM, a->vm); \
137
+/*
63
- data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
138
+ * For vmadc and vmsbc, an illegal instruction exception is raised if the
64
- data = FIELD_DP32(data, VDATA, VTA, s->vta); \
139
+ * destination vector register overlaps a source vector register group.
65
- data = \
140
+ */
66
- FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\
141
+static bool opivv_vmadc_check(DisasContext *s, arg_rmrr *a)
67
- data = FIELD_DP32(data, VDATA, VMA, s->vma); \
142
+{
68
- tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
143
+ return (vext_check_isa_ill(s) &&
69
- vreg_ofs(s, a->rs1), \
144
+ vext_check_reg(s, a->rs2, false) &&
70
- vreg_ofs(s, a->rs2), cpu_env, \
145
+ vext_check_reg(s, a->rs1, false) &&
71
- s->cfg_ptr->vlen / 8, \
146
+ vext_check_overlap_group(a->rd, 1, a->rs1, 1 << s->lmul) &&
72
- s->cfg_ptr->vlen / 8, data, \
147
+ vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul));
73
- fns[s->sew]); \
148
+}
74
- mark_vs_dirty(s); \
149
+
75
- gen_set_label(over); \
150
+GEN_OPIVV_TRANS(vmadc_vvm, opivv_vmadc_check)
76
- return true; \
151
+GEN_OPIVV_TRANS(vmsbc_vvm, opivv_vmadc_check)
77
- } \
152
+
78
- return false; \
153
+static bool opivx_vadc_check(DisasContext *s, arg_rmrr *a)
79
+#define GEN_OPIVV_TRANS(NAME, CHECK) \
154
+{
155
+ return (vext_check_isa_ill(s) &&
156
+ vext_check_reg(s, a->rd, false) &&
157
+ vext_check_reg(s, a->rs2, false) &&
158
+ ((a->rd != 0) || (s->lmul == 0)));
159
+}
160
+
161
+/* OPIVX without GVEC IR */
162
+#define GEN_OPIVX_TRANS(NAME, CHECK) \
163
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
80
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
164
+{ \
81
+{ \
165
+ if (CHECK(s, a)) { \
82
+ if (CHECK(s, a)) { \
166
+ static gen_helper_opivx * const fns[4] = { \
83
+ static gen_helper_gvec_4_ptr * const fns[4] = { \
167
+ gen_helper_##NAME##_b, gen_helper_##NAME##_h, \
84
+ gen_helper_##NAME##_b, gen_helper_##NAME##_h, \
168
+ gen_helper_##NAME##_w, gen_helper_##NAME##_d, \
85
+ gen_helper_##NAME##_w, gen_helper_##NAME##_d, \
169
+ }; \
86
+ }; \
170
+ \
87
+ return opivv_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\
171
+ return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\
172
+ } \
88
+ } \
173
+ return false; \
89
+ return false; \
174
+}
175
+
176
+GEN_OPIVX_TRANS(vadc_vxm, opivx_vadc_check)
177
+GEN_OPIVX_TRANS(vsbc_vxm, opivx_vadc_check)
178
+
179
+static bool opivx_vmadc_check(DisasContext *s, arg_rmrr *a)
180
+{
181
+ return (vext_check_isa_ill(s) &&
182
+ vext_check_reg(s, a->rs2, false) &&
183
+ vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul));
184
+}
185
+
186
+GEN_OPIVX_TRANS(vmadc_vxm, opivx_vmadc_check)
187
+GEN_OPIVX_TRANS(vmsbc_vxm, opivx_vmadc_check)
188
+
189
+/* OPIVI without GVEC IR */
190
+#define GEN_OPIVI_TRANS(NAME, ZX, OPIVX, CHECK) \
191
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
192
+{ \
193
+ if (CHECK(s, a)) { \
194
+ static gen_helper_opivx * const fns[4] = { \
195
+ gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \
196
+ gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \
197
+ }; \
198
+ return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, \
199
+ fns[s->sew], s, ZX); \
200
+ } \
201
+ return false; \
202
+}
203
+
204
+GEN_OPIVI_TRANS(vadc_vim, 0, vadc_vxm, opivx_vadc_check)
205
+GEN_OPIVI_TRANS(vmadc_vim, 0, vmadc_vxm, opivx_vmadc_check)
206
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
207
index XXXXXXX..XXXXXXX 100644
208
--- a/target/riscv/vector_helper.c
209
+++ b/target/riscv/vector_helper.c
210
@@ -XXX,XX +XXX,XX @@ static void clearq(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
211
vext_clear(cur, cnt, tot);
212
}
90
}
213
91
214
+static inline void vext_set_elem_mask(void *v0, int mlen, int index,
92
/*
215
+ uint8_t value)
216
+{
217
+ int idx = (index * mlen) / 64;
218
+ int pos = (index * mlen) % 64;
219
+ uint64_t old = ((uint64_t *)v0)[idx];
220
+ ((uint64_t *)v0)[idx] = deposit64(old, pos, mlen, value);
221
+}
222
223
static inline int vext_elem_mask(void *v0, int mlen, int index)
224
{
225
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vwadd_wx_w, 4, 8, clearq)
226
GEN_VEXT_VX(vwsub_wx_b, 1, 2, clearh)
227
GEN_VEXT_VX(vwsub_wx_h, 2, 4, clearl)
228
GEN_VEXT_VX(vwsub_wx_w, 4, 8, clearq)
229
+
230
+/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
231
+#define DO_VADC(N, M, C) (N + M + C)
232
+#define DO_VSBC(N, M, C) (N - M - C)
233
+
234
+#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP, CLEAR_FN) \
235
+void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
236
+ CPURISCVState *env, uint32_t desc) \
237
+{ \
238
+ uint32_t mlen = vext_mlen(desc); \
239
+ uint32_t vl = env->vl; \
240
+ uint32_t esz = sizeof(ETYPE); \
241
+ uint32_t vlmax = vext_maxsz(desc) / esz; \
242
+ uint32_t i; \
243
+ \
244
+ for (i = 0; i < vl; i++) { \
245
+ ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
246
+ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
247
+ uint8_t carry = vext_elem_mask(v0, mlen, i); \
248
+ \
249
+ *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
250
+ } \
251
+ CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \
252
+}
253
+
254
+GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC, clearb)
255
+GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC, clearh)
256
+GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC, clearl)
257
+GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC, clearq)
258
+
259
+GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC, clearb)
260
+GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC, clearh)
261
+GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC, clearl)
262
+GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC, clearq)
263
+
264
+#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP, CLEAR_FN) \
265
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
266
+ CPURISCVState *env, uint32_t desc) \
267
+{ \
268
+ uint32_t mlen = vext_mlen(desc); \
269
+ uint32_t vl = env->vl; \
270
+ uint32_t esz = sizeof(ETYPE); \
271
+ uint32_t vlmax = vext_maxsz(desc) / esz; \
272
+ uint32_t i; \
273
+ \
274
+ for (i = 0; i < vl; i++) { \
275
+ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
276
+ uint8_t carry = vext_elem_mask(v0, mlen, i); \
277
+ \
278
+ *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
279
+ } \
280
+ CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \
281
+}
282
+
283
+GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC, clearb)
284
+GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC, clearh)
285
+GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC, clearl)
286
+GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC, clearq)
287
+
288
+GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC, clearb)
289
+GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC, clearh)
290
+GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC, clearl)
291
+GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC, clearq)
292
+
293
+#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
294
+ (__typeof(N))(N + M) < N)
295
+#define DO_MSBC(N, M, C) (C ? N <= M : N < M)
296
+
297
+#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
298
+void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
299
+ CPURISCVState *env, uint32_t desc) \
300
+{ \
301
+ uint32_t mlen = vext_mlen(desc); \
302
+ uint32_t vl = env->vl; \
303
+ uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \
304
+ uint32_t i; \
305
+ \
306
+ for (i = 0; i < vl; i++) { \
307
+ ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
308
+ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
309
+ uint8_t carry = vext_elem_mask(v0, mlen, i); \
310
+ \
311
+ vext_set_elem_mask(vd, mlen, i, DO_OP(s2, s1, carry));\
312
+ } \
313
+ for (; i < vlmax; i++) { \
314
+ vext_set_elem_mask(vd, mlen, i, 0); \
315
+ } \
316
+}
317
+
318
+GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
319
+GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
320
+GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
321
+GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
322
+
323
+GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
324
+GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
325
+GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
326
+GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
327
+
328
+#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
329
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
330
+ void *vs2, CPURISCVState *env, uint32_t desc) \
331
+{ \
332
+ uint32_t mlen = vext_mlen(desc); \
333
+ uint32_t vl = env->vl; \
334
+ uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \
335
+ uint32_t i; \
336
+ \
337
+ for (i = 0; i < vl; i++) { \
338
+ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
339
+ uint8_t carry = vext_elem_mask(v0, mlen, i); \
340
+ \
341
+ vext_set_elem_mask(vd, mlen, i, \
342
+ DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
343
+ } \
344
+ for (; i < vlmax; i++) { \
345
+ vext_set_elem_mask(vd, mlen, i, 0); \
346
+ } \
347
+}
348
+
349
+GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
350
+GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
351
+GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
352
+GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
353
+
354
+GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
355
+GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
356
+GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
357
+GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
358
--
93
--
359
2.27.0
94
2.41.0
360
361
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
Remove the redundant "vl == 0" check which is already included within the vstart >= vl check, when vl == 0.
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
5
Message-id: 20200623215920.2594-50-zhiwei_liu@c-sky.com
5
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
6
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
7
Signed-off-by: Max Chou <max.chou@sifive.com>
8
Acked-by: Alistair Francis <alistair.francis@wdc.com>
9
Message-ID: <20230711165917.2629866-4-max.chou@sifive.com>
6
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
10
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
7
---
11
---
8
target/riscv/helper.h | 9 ++++++
12
target/riscv/insn_trans/trans_rvv.c.inc | 31 +------------------------
9
target/riscv/insn32.decode | 8 +++++
13
1 file changed, 1 insertion(+), 30 deletions(-)
10
target/riscv/insn_trans/trans_rvv.inc.c | 35 ++++++++++++++++++++++
11
target/riscv/vector_helper.c | 40 +++++++++++++++++++++++++
12
4 files changed, 92 insertions(+)
13
14
14
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
15
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
15
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
16
--- a/target/riscv/helper.h
17
--- a/target/riscv/insn_trans/trans_rvv.c.inc
17
+++ b/target/riscv/helper.h
18
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfredmin_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
19
@@ -XXX,XX +XXX,XX @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data,
19
20
TCGv_i32 desc;
20
DEF_HELPER_6(vfwredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
21
21
DEF_HELPER_6(vfwredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
22
TCGLabel *over = gen_new_label();
22
+
23
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
23
+DEF_HELPER_6(vmand_mm, void, ptr, ptr, ptr, ptr, env, i32)
24
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
24
+DEF_HELPER_6(vmnand_mm, void, ptr, ptr, ptr, ptr, env, i32)
25
25
+DEF_HELPER_6(vmandnot_mm, void, ptr, ptr, ptr, ptr, env, i32)
26
dest = tcg_temp_new_ptr();
26
+DEF_HELPER_6(vmxor_mm, void, ptr, ptr, ptr, ptr, env, i32)
27
@@ -XXX,XX +XXX,XX @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2,
27
+DEF_HELPER_6(vmor_mm, void, ptr, ptr, ptr, ptr, env, i32)
28
TCGv_i32 desc;
28
+DEF_HELPER_6(vmnor_mm, void, ptr, ptr, ptr, ptr, env, i32)
29
29
+DEF_HELPER_6(vmornot_mm, void, ptr, ptr, ptr, ptr, env, i32)
30
TCGLabel *over = gen_new_label();
30
+DEF_HELPER_6(vmxnor_mm, void, ptr, ptr, ptr, ptr, env, i32)
31
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
31
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
32
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
32
index XXXXXXX..XXXXXXX 100644
33
33
--- a/target/riscv/insn32.decode
34
dest = tcg_temp_new_ptr();
34
+++ b/target/riscv/insn32.decode
35
@@ -XXX,XX +XXX,XX @@ static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
35
@@ -XXX,XX +XXX,XX @@ vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm
36
TCGv_i32 desc;
36
vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm
37
37
# Vector widening ordered and unordered float reduction sum
38
TCGLabel *over = gen_new_label();
38
vfwredsum_vs 1100-1 . ..... ..... 001 ..... 1010111 @r_vm
39
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
39
+vmand_mm 011001 - ..... ..... 010 ..... 1010111 @r
40
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
40
+vmnand_mm 011101 - ..... ..... 010 ..... 1010111 @r
41
41
+vmandnot_mm 011000 - ..... ..... 010 ..... 1010111 @r
42
dest = tcg_temp_new_ptr();
42
+vmxor_mm 011011 - ..... ..... 010 ..... 1010111 @r
43
@@ -XXX,XX +XXX,XX @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data,
43
+vmor_mm 011010 - ..... ..... 010 ..... 1010111 @r
44
TCGv_i32 desc;
44
+vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r
45
45
+vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r
46
TCGLabel *over = gen_new_label();
46
+vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r
47
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
47
48
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
48
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
49
49
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
50
dest = tcg_temp_new_ptr();
50
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
51
@@ -XXX,XX +XXX,XX @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn,
51
index XXXXXXX..XXXXXXX 100644
52
return false;
52
--- a/target/riscv/insn_trans/trans_rvv.inc.c
53
}
53
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
54
54
@@ -XXX,XX +XXX,XX @@ GEN_OPFVV_TRANS(vfredmin_vs, reduction_check)
55
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
55
56
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
56
/* Vector Widening Floating-Point Reduction Instructions */
57
57
GEN_OPFVV_WIDEN_TRANS(vfwredsum_vs, reduction_check)
58
if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
58
+
59
@@ -XXX,XX +XXX,XX @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm,
59
+/*
60
uint32_t data = 0;
60
+ *** Vector Mask Operations
61
61
+ */
62
TCGLabel *over = gen_new_label();
62
+
63
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
63
+/* Vector Mask-Register Logical Instructions */
64
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
64
+#define GEN_MM_TRANS(NAME) \
65
65
+static bool trans_##NAME(DisasContext *s, arg_r *a) \
66
dest = tcg_temp_new_ptr();
66
+{ \
67
@@ -XXX,XX +XXX,XX @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm,
67
+ if (vext_check_isa_ill(s)) { \
68
uint32_t data = 0;
68
+ uint32_t data = 0; \
69
69
+ gen_helper_gvec_4_ptr *fn = gen_helper_##NAME; \
70
TCGLabel *over = gen_new_label();
70
+ TCGLabel *over = gen_new_label(); \
71
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
71
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
72
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
72
+ \
73
73
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
74
dest = tcg_temp_new_ptr();
74
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
75
@@ -XXX,XX +XXX,XX @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
75
+ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
76
if (checkfn(s, a)) {
76
+ vreg_ofs(s, a->rs1), \
77
uint32_t data = 0;
77
+ vreg_ofs(s, a->rs2), cpu_env, 0, \
78
TCGLabel *over = gen_new_label();
78
+ s->vlen / 8, data, fn); \
79
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
79
+ gen_set_label(over); \
80
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
80
+ return true; \
81
81
+ } \
82
data = FIELD_DP32(data, VDATA, VM, a->vm);
82
+ return false; \
83
@@ -XXX,XX +XXX,XX @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
83
+}
84
if (opiwv_widen_check(s, a)) {
84
+
85
uint32_t data = 0;
85
+GEN_MM_TRANS(vmand_mm)
86
TCGLabel *over = gen_new_label();
86
+GEN_MM_TRANS(vmnand_mm)
87
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
87
+GEN_MM_TRANS(vmandnot_mm)
88
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
88
+GEN_MM_TRANS(vmxor_mm)
89
89
+GEN_MM_TRANS(vmor_mm)
90
data = FIELD_DP32(data, VDATA, VM, a->vm);
90
+GEN_MM_TRANS(vmnor_mm)
91
@@ -XXX,XX +XXX,XX @@ static bool opivv_trans(uint32_t vd, uint32_t vs1, uint32_t vs2, uint32_t vm,
91
+GEN_MM_TRANS(vmornot_mm)
92
{
92
+GEN_MM_TRANS(vmxnor_mm)
93
uint32_t data = 0;
93
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
94
TCGLabel *over = gen_new_label();
94
index XXXXXXX..XXXXXXX 100644
95
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
95
--- a/target/riscv/vector_helper.c
96
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
96
+++ b/target/riscv/vector_helper.c
97
97
@@ -XXX,XX +XXX,XX @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
98
data = FIELD_DP32(data, VDATA, VM, vm);
98
*((uint64_t *)vd) = s1;
99
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
99
clearq(vd, 1, sizeof(uint64_t), tot);
100
gen_helper_##NAME##_w, \
100
}
101
}; \
101
+
102
TCGLabel *over = gen_new_label(); \
102
+/*
103
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
103
+ *** Vector Mask Operations
104
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
104
+ */
105
\
105
+/* Vector Mask-Register Logical Instructions */
106
data = FIELD_DP32(data, VDATA, VM, a->vm); \
106
+#define GEN_VEXT_MASK_VV(NAME, OP) \
107
@@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a)
107
+void HELPER(NAME)(void *vd, void *v0, void *vs1, \
108
gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d,
108
+ void *vs2, CPURISCVState *env, \
109
};
109
+ uint32_t desc) \
110
TCGLabel *over = gen_new_label();
110
+{ \
111
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
111
+ uint32_t mlen = vext_mlen(desc); \
112
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
112
+ uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \
113
113
+ uint32_t vl = env->vl; \
114
tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1),
114
+ uint32_t i; \
115
@@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a)
115
+ int a, b; \
116
vext_check_ss(s, a->rd, 0, 1)) {
116
+ \
117
TCGv s1;
117
+ for (i = 0; i < vl; i++) { \
118
TCGLabel *over = gen_new_label();
118
+ a = vext_elem_mask(vs1, mlen, i); \
119
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
119
+ b = vext_elem_mask(vs2, mlen, i); \
120
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
120
+ vext_set_elem_mask(vd, mlen, i, OP(b, a)); \
121
121
+ } \
122
s1 = get_gpr(s, a->rs1, EXT_SIGN);
122
+ for (; i < vlmax; i++) { \
123
@@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a)
123
+ vext_set_elem_mask(vd, mlen, i, 0); \
124
gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d,
124
+ } \
125
};
125
+}
126
TCGLabel *over = gen_new_label();
126
+
127
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
127
+#define DO_NAND(N, M) (!(N & M))
128
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
128
+#define DO_ANDNOT(N, M) (N & !M)
129
129
+#define DO_NOR(N, M) (!(N | M))
130
s1 = tcg_constant_i64(simm);
130
+#define DO_ORNOT(N, M) (N | !M)
131
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
131
+#define DO_XNOR(N, M) (!(N ^ M))
132
}; \
132
+
133
TCGLabel *over = gen_new_label(); \
133
+GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
134
gen_set_rm(s, RISCV_FRM_DYN); \
134
+GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
135
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
135
+GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT)
136
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
136
+GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
137
\
137
+GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
138
data = FIELD_DP32(data, VDATA, VM, a->vm); \
138
+GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
139
@@ -XXX,XX +XXX,XX @@ static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
139
+GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT)
140
TCGv_i64 t1;
140
+GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
141
142
TCGLabel *over = gen_new_label();
143
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
144
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
145
146
dest = tcg_temp_new_ptr();
147
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
148
}; \
149
TCGLabel *over = gen_new_label(); \
150
gen_set_rm(s, RISCV_FRM_DYN); \
151
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
152
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);\
153
\
154
data = FIELD_DP32(data, VDATA, VM, a->vm); \
155
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
156
}; \
157
TCGLabel *over = gen_new_label(); \
158
gen_set_rm(s, RISCV_FRM_DYN); \
159
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
160
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
161
\
162
data = FIELD_DP32(data, VDATA, VM, a->vm); \
163
@@ -XXX,XX +XXX,XX @@ static bool do_opfv(DisasContext *s, arg_rmr *a,
164
uint32_t data = 0;
165
TCGLabel *over = gen_new_label();
166
gen_set_rm_chkfrm(s, rm);
167
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
168
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
169
170
data = FIELD_DP32(data, VDATA, VM, a->vm);
171
@@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a)
172
gen_helper_vmv_v_x_d,
173
};
174
TCGLabel *over = gen_new_label();
175
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
176
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
177
178
t1 = tcg_temp_new_i64();
179
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
180
}; \
181
TCGLabel *over = gen_new_label(); \
182
gen_set_rm_chkfrm(s, FRM); \
183
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
184
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
185
\
186
data = FIELD_DP32(data, VDATA, VM, a->vm); \
187
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
188
}; \
189
TCGLabel *over = gen_new_label(); \
190
gen_set_rm(s, RISCV_FRM_DYN); \
191
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
192
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
193
\
194
data = FIELD_DP32(data, VDATA, VM, a->vm); \
195
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
196
}; \
197
TCGLabel *over = gen_new_label(); \
198
gen_set_rm_chkfrm(s, FRM); \
199
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
200
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
201
\
202
data = FIELD_DP32(data, VDATA, VM, a->vm); \
203
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
204
}; \
205
TCGLabel *over = gen_new_label(); \
206
gen_set_rm_chkfrm(s, FRM); \
207
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
208
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
209
\
210
data = FIELD_DP32(data, VDATA, VM, a->vm); \
211
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_r *a) \
212
uint32_t data = 0; \
213
gen_helper_gvec_4_ptr *fn = gen_helper_##NAME; \
214
TCGLabel *over = gen_new_label(); \
215
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
216
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
217
\
218
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
219
@@ -XXX,XX +XXX,XX @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a)
220
require_vm(a->vm, a->rd)) {
221
uint32_t data = 0;
222
TCGLabel *over = gen_new_label();
223
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
224
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
225
226
data = FIELD_DP32(data, VDATA, VM, a->vm);
227
@@ -XXX,XX +XXX,XX @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a)
228
TCGv s1;
229
TCGLabel *over = gen_new_label();
230
231
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
232
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
233
234
t1 = tcg_temp_new_i64();
235
@@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a)
236
TCGv_i64 t1;
237
TCGLabel *over = gen_new_label();
238
239
- /* if vl == 0 or vstart >= vl, skip vector register write back */
240
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
241
+ /* if vstart >= vl, skip vector register write back */
242
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
243
244
/* NaN-box f[rs1] */
245
@@ -XXX,XX +XXX,XX @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq)
246
uint32_t data = 0;
247
gen_helper_gvec_3_ptr *fn;
248
TCGLabel *over = gen_new_label();
249
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
250
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
251
252
static gen_helper_gvec_3_ptr * const fns[6][4] = {
141
--
253
--
142
2.27.0
254
2.41.0
143
144
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
2
2
3
vsetvl and vsetvli are two configure instructions for vl, vtype. TB flags
3
This commit adds support for the Zvbc vector-crypto extension, which
4
should update after configure instructions. The (ill, lmul, sew ) of vtype
4
consists of the following instructions:
5
and the bit of (VSTART == 0 && VL == VLMAX) will be placed within tb_flags.
5
6
6
* vclmulh.[vx,vv]
7
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
7
* vclmul.[vx,vv]
8
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Translation functions are defined in
10
Message-id: 20200623215920.2594-5-zhiwei_liu@c-sky.com
10
`target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in
11
`target/riscv/vcrypto_helper.c`.
12
13
Co-authored-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
14
Co-authored-by: Max Chou <max.chou@sifive.com>
15
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
16
Signed-off-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
17
Signed-off-by: Max Chou <max.chou@sifive.com>
18
[max.chou@sifive.com: Exposed x-zvbc property]
19
Message-ID: <20230711165917.2629866-5-max.chou@sifive.com>
11
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
20
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
12
---
21
---
13
target/riscv/cpu.h | 63 +++++++++++++++++---
22
target/riscv/cpu_cfg.h | 1 +
14
target/riscv/helper.h | 3 +
23
target/riscv/helper.h | 6 +++
15
target/riscv/insn32.decode | 5 ++
24
target/riscv/insn32.decode | 6 +++
16
target/riscv/insn_trans/trans_rvv.inc.c | 79 +++++++++++++++++++++++++
25
target/riscv/cpu.c | 9 ++++
17
target/riscv/translate.c | 17 +++++-
26
target/riscv/translate.c | 1 +
18
target/riscv/vector_helper.c | 53 +++++++++++++++++
27
target/riscv/vcrypto_helper.c | 59 ++++++++++++++++++++++
19
target/riscv/Makefile.objs | 2 +-
28
target/riscv/insn_trans/trans_rvvk.c.inc | 62 ++++++++++++++++++++++++
20
7 files changed, 210 insertions(+), 12 deletions(-)
29
target/riscv/meson.build | 3 +-
21
create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c
30
8 files changed, 146 insertions(+), 1 deletion(-)
22
create mode 100644 target/riscv/vector_helper.c
31
create mode 100644 target/riscv/vcrypto_helper.c
23
32
create mode 100644 target/riscv/insn_trans/trans_rvvk.c.inc
24
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
33
25
index XXXXXXX..XXXXXXX 100644
34
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
26
--- a/target/riscv/cpu.h
35
index XXXXXXX..XXXXXXX 100644
27
+++ b/target/riscv/cpu.h
36
--- a/target/riscv/cpu_cfg.h
28
@@ -XXX,XX +XXX,XX @@
37
+++ b/target/riscv/cpu_cfg.h
29
#define RISCV_CPU_H
38
@@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig {
30
39
bool ext_zve32f;
31
#include "hw/core/cpu.h"
40
bool ext_zve64f;
32
+#include "hw/registerfields.h"
41
bool ext_zve64d;
33
#include "exec/cpu-defs.h"
42
+ bool ext_zvbc;
34
#include "fpu/softfloat-types.h"
43
bool ext_zmmul;
35
44
bool ext_zvfbfmin;
36
@@ -XXX,XX +XXX,XX @@ typedef struct CPURISCVState CPURISCVState;
45
bool ext_zvfbfwma;
37
38
#define RV_VLEN_MAX 512
39
40
+FIELD(VTYPE, VLMUL, 0, 2)
41
+FIELD(VTYPE, VSEW, 2, 3)
42
+FIELD(VTYPE, VEDIV, 5, 2)
43
+FIELD(VTYPE, RESERVED, 7, sizeof(target_ulong) * 8 - 9)
44
+FIELD(VTYPE, VILL, sizeof(target_ulong) * 8 - 2, 1)
45
+
46
struct CPURISCVState {
47
target_ulong gpr[32];
48
uint64_t fpr[32]; /* assume both F and D extensions */
49
@@ -XXX,XX +XXX,XX @@ void riscv_cpu_set_fflags(CPURISCVState *env, target_ulong);
50
#define TB_FLAGS_MMU_MASK 3
51
#define TB_FLAGS_MSTATUS_FS MSTATUS_FS
52
53
+typedef CPURISCVState CPUArchState;
54
+typedef RISCVCPU ArchCPU;
55
+#include "exec/cpu-all.h"
56
+
57
+FIELD(TB_FLAGS, VL_EQ_VLMAX, 2, 1)
58
+FIELD(TB_FLAGS, LMUL, 3, 2)
59
+FIELD(TB_FLAGS, SEW, 5, 3)
60
+FIELD(TB_FLAGS, VILL, 8, 1)
61
+
62
+/*
63
+ * A simplification for VLMAX
64
+ * = (1 << LMUL) * VLEN / (8 * (1 << SEW))
65
+ * = (VLEN << LMUL) / (8 << SEW)
66
+ * = (VLEN << LMUL) >> (SEW + 3)
67
+ * = VLEN >> (SEW + 3 - LMUL)
68
+ */
69
+static inline uint32_t vext_get_vlmax(RISCVCPU *cpu, target_ulong vtype)
70
+{
71
+ uint8_t sew, lmul;
72
+
73
+ sew = FIELD_EX64(vtype, VTYPE, VSEW);
74
+ lmul = FIELD_EX64(vtype, VTYPE, VLMUL);
75
+ return cpu->cfg.vlen >> (sew + 3 - lmul);
76
+}
77
+
78
static inline void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc,
79
- target_ulong *cs_base, uint32_t *flags)
80
+ target_ulong *cs_base, uint32_t *pflags)
81
{
82
+ uint32_t flags = 0;
83
+
84
*pc = env->pc;
85
*cs_base = 0;
86
+
87
+ if (riscv_has_ext(env, RVV)) {
88
+ uint32_t vlmax = vext_get_vlmax(env_archcpu(env), env->vtype);
89
+ bool vl_eq_vlmax = (env->vstart == 0) && (vlmax == env->vl);
90
+ flags = FIELD_DP32(flags, TB_FLAGS, VILL,
91
+ FIELD_EX64(env->vtype, VTYPE, VILL));
92
+ flags = FIELD_DP32(flags, TB_FLAGS, SEW,
93
+ FIELD_EX64(env->vtype, VTYPE, VSEW));
94
+ flags = FIELD_DP32(flags, TB_FLAGS, LMUL,
95
+ FIELD_EX64(env->vtype, VTYPE, VLMUL));
96
+ flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax);
97
+ } else {
98
+ flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1);
99
+ }
100
+
101
#ifdef CONFIG_USER_ONLY
102
- *flags = TB_FLAGS_MSTATUS_FS;
103
+ flags |= TB_FLAGS_MSTATUS_FS;
104
#else
105
- *flags = cpu_mmu_index(env, 0);
106
+ flags |= cpu_mmu_index(env, 0);
107
if (riscv_cpu_fp_enabled(env)) {
108
- *flags |= env->mstatus & MSTATUS_FS;
109
+ flags |= env->mstatus & MSTATUS_FS;
110
}
111
#endif
112
+ *pflags = flags;
113
}
114
115
int riscv_csrrw(CPURISCVState *env, int csrno, target_ulong *ret_value,
116
@@ -XXX,XX +XXX,XX @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops);
117
118
void riscv_cpu_register_gdb_regs_for_features(CPUState *cs);
119
120
-typedef CPURISCVState CPUArchState;
121
-typedef RISCVCPU ArchCPU;
122
-
123
-#include "exec/cpu-all.h"
124
-
125
#endif /* RISCV_CPU_H */
126
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
46
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
127
index XXXXXXX..XXXXXXX 100644
47
index XXXXXXX..XXXXXXX 100644
128
--- a/target/riscv/helper.h
48
--- a/target/riscv/helper.h
129
+++ b/target/riscv/helper.h
49
+++ b/target/riscv/helper.h
130
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_1(tlb_flush, void, env)
50
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vfwcvtbf16_f_f_v, void, ptr, ptr, ptr, env, i32)
131
#ifndef CONFIG_USER_ONLY
51
132
DEF_HELPER_1(hyp_tlb_flush, void, env)
52
DEF_HELPER_6(vfwmaccbf16_vv, void, ptr, ptr, ptr, ptr, env, i32)
133
#endif
53
DEF_HELPER_6(vfwmaccbf16_vf, void, ptr, ptr, i64, ptr, env, i32)
134
+
54
+
135
+/* Vector functions */
55
+/* Vector crypto functions */
136
+DEF_HELPER_3(vsetvl, tl, env, tl, tl)
56
+DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32)
57
+DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32)
58
+DEF_HELPER_6(vclmulh_vv, void, ptr, ptr, ptr, ptr, env, i32)
59
+DEF_HELPER_6(vclmulh_vx, void, ptr, ptr, tl, ptr, env, i32)
137
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
60
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
138
index XXXXXXX..XXXXXXX 100644
61
index XXXXXXX..XXXXXXX 100644
139
--- a/target/riscv/insn32.decode
62
--- a/target/riscv/insn32.decode
140
+++ b/target/riscv/insn32.decode
63
+++ b/target/riscv/insn32.decode
141
@@ -XXX,XX +XXX,XX @@
64
@@ -XXX,XX +XXX,XX @@ vfwcvtbf16_f_f_v 010010 . ..... 01101 001 ..... 1010111 @r2_vm
142
@r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd
65
# *** Zvfbfwma Standard Extension ***
143
@r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd
66
vfwmaccbf16_vv 111011 . ..... ..... 001 ..... 1010111 @r_vm
144
@r2 ....... ..... ..... ... ..... ....... %rs1 %rd
67
vfwmaccbf16_vf 111011 . ..... ..... 101 ..... 1010111 @r_vm
145
+@r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd
68
+
146
69
+# *** Zvbc vector crypto extension ***
147
@hfence_gvma ....... ..... ..... ... ..... ....... %rs2 %rs1
70
+vclmul_vv 001100 . ..... ..... 010 ..... 1010111 @r_vm
148
@hfence_vvma ....... ..... ..... ... ..... ....... %rs2 %rs1
71
+vclmul_vx 001100 . ..... ..... 110 ..... 1010111 @r_vm
149
@@ -XXX,XX +XXX,XX @@ fcvt_d_wu 1101001 00001 ..... ... ..... 1010011 @r2_rm
72
+vclmulh_vv 001101 . ..... ..... 010 ..... 1010111 @r_vm
150
# *** RV32H Base Instruction Set ***
73
+vclmulh_vx 001101 . ..... ..... 110 ..... 1010111 @r_vm
151
hfence_gvma 0110001 ..... ..... 000 00000 1110011 @hfence_gvma
74
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
152
hfence_vvma 0010001 ..... ..... 000 00000 1110011 @hfence_vvma
75
index XXXXXXX..XXXXXXX 100644
153
+
76
--- a/target/riscv/cpu.c
154
+# *** RV32V Extension ***
77
+++ b/target/riscv/cpu.c
155
+vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
78
@@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = {
156
+vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
79
ISA_EXT_DATA_ENTRY(zksed, PRIV_VERSION_1_12_0, ext_zksed),
157
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
80
ISA_EXT_DATA_ENTRY(zksh, PRIV_VERSION_1_12_0, ext_zksh),
81
ISA_EXT_DATA_ENTRY(zkt, PRIV_VERSION_1_12_0, ext_zkt),
82
+ ISA_EXT_DATA_ENTRY(zvbc, PRIV_VERSION_1_12_0, ext_zvbc),
83
ISA_EXT_DATA_ENTRY(zve32f, PRIV_VERSION_1_10_0, ext_zve32f),
84
ISA_EXT_DATA_ENTRY(zve64f, PRIV_VERSION_1_10_0, ext_zve64f),
85
ISA_EXT_DATA_ENTRY(zve64d, PRIV_VERSION_1_10_0, ext_zve64d),
86
@@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
87
return;
88
}
89
90
+ if (cpu->cfg.ext_zvbc && !cpu->cfg.ext_zve64f) {
91
+ error_setg(errp, "Zvbc extension requires V or Zve64{f,d} extensions");
92
+ return;
93
+ }
94
+
95
if (cpu->cfg.ext_zk) {
96
cpu->cfg.ext_zkn = true;
97
cpu->cfg.ext_zkr = true;
98
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = {
99
DEFINE_PROP_BOOL("x-zvfbfmin", RISCVCPU, cfg.ext_zvfbfmin, false),
100
DEFINE_PROP_BOOL("x-zvfbfwma", RISCVCPU, cfg.ext_zvfbfwma, false),
101
102
+ /* Vector cryptography extensions */
103
+ DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false),
104
+
105
DEFINE_PROP_END_OF_LIST(),
106
};
107
108
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
109
index XXXXXXX..XXXXXXX 100644
110
--- a/target/riscv/translate.c
111
+++ b/target/riscv/translate.c
112
@@ -XXX,XX +XXX,XX @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc)
113
#include "insn_trans/trans_rvzfa.c.inc"
114
#include "insn_trans/trans_rvzfh.c.inc"
115
#include "insn_trans/trans_rvk.c.inc"
116
+#include "insn_trans/trans_rvvk.c.inc"
117
#include "insn_trans/trans_privileged.c.inc"
118
#include "insn_trans/trans_svinval.c.inc"
119
#include "insn_trans/trans_rvbf16.c.inc"
120
diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c
158
new file mode 100644
121
new file mode 100644
159
index XXXXXXX..XXXXXXX
122
index XXXXXXX..XXXXXXX
160
--- /dev/null
123
--- /dev/null
161
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
124
+++ b/target/riscv/vcrypto_helper.c
162
@@ -XXX,XX +XXX,XX @@
125
@@ -XXX,XX +XXX,XX @@
163
+/*
126
+/*
164
+ * RISC-V translation routines for the RVV Standard Extension.
127
+ * RISC-V Vector Crypto Extension Helpers for QEMU.
165
+ *
128
+ *
166
+ * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
129
+ * Copyright (C) 2023 SiFive, Inc.
130
+ * Written by Codethink Ltd and SiFive.
167
+ *
131
+ *
168
+ * This program is free software; you can redistribute it and/or modify it
132
+ * This program is free software; you can redistribute it and/or modify it
169
+ * under the terms and conditions of the GNU General Public License,
133
+ * under the terms and conditions of the GNU General Public License,
170
+ * version 2 or later, as published by the Free Software Foundation.
134
+ * version 2 or later, as published by the Free Software Foundation.
171
+ *
135
+ *
...
...
176
+ *
140
+ *
177
+ * You should have received a copy of the GNU General Public License along with
141
+ * You should have received a copy of the GNU General Public License along with
178
+ * this program. If not, see <http://www.gnu.org/licenses/>.
142
+ * this program. If not, see <http://www.gnu.org/licenses/>.
179
+ */
143
+ */
180
+
144
+
181
+static bool trans_vsetvl(DisasContext *ctx, arg_vsetvl *a)
145
+#include "qemu/osdep.h"
182
+{
146
+#include "qemu/host-utils.h"
183
+ TCGv s1, s2, dst;
147
+#include "qemu/bitops.h"
184
+
148
+#include "cpu.h"
185
+ if (!has_ext(ctx, RVV)) {
149
+#include "exec/memop.h"
186
+ return false;
150
+#include "exec/exec-all.h"
187
+ }
151
+#include "exec/helper-proto.h"
188
+
152
+#include "internals.h"
189
+ s2 = tcg_temp_new();
153
+#include "vector_internals.h"
190
+ dst = tcg_temp_new();
154
+
191
+
155
+static uint64_t clmul64(uint64_t y, uint64_t x)
192
+ /* Using x0 as the rs1 register specifier, encodes an infinite AVL */
156
+{
193
+ if (a->rs1 == 0) {
157
+ uint64_t result = 0;
194
+ /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */
158
+ for (int j = 63; j >= 0; j--) {
195
+ s1 = tcg_const_tl(RV_VLEN_MAX);
159
+ if ((y >> j) & 1) {
196
+ } else {
160
+ result ^= (x << j);
197
+ s1 = tcg_temp_new();
161
+ }
198
+ gen_get_gpr(s1, a->rs1);
162
+ }
199
+ }
163
+ return result;
200
+ gen_get_gpr(s2, a->rs2);
164
+}
201
+ gen_helper_vsetvl(dst, cpu_env, s1, s2);
165
+
202
+ gen_set_gpr(a->rd, dst);
166
+static uint64_t clmulh64(uint64_t y, uint64_t x)
203
+ tcg_gen_movi_tl(cpu_pc, ctx->pc_succ_insn);
167
+{
204
+ lookup_and_goto_ptr(ctx);
168
+ uint64_t result = 0;
205
+ ctx->base.is_jmp = DISAS_NORETURN;
169
+ for (int j = 63; j >= 1; j--) {
206
+
170
+ if ((y >> j) & 1) {
207
+ tcg_temp_free(s1);
171
+ result ^= (x >> (64 - j));
208
+ tcg_temp_free(s2);
172
+ }
209
+ tcg_temp_free(dst);
173
+ }
210
+ return true;
174
+ return result;
211
+}
175
+}
212
+
176
+
213
+static bool trans_vsetvli(DisasContext *ctx, arg_vsetvli *a)
177
+RVVCALL(OPIVV2, vclmul_vv, OP_UUU_D, H8, H8, H8, clmul64)
214
+{
178
+GEN_VEXT_VV(vclmul_vv, 8)
215
+ TCGv s1, s2, dst;
179
+RVVCALL(OPIVX2, vclmul_vx, OP_UUU_D, H8, H8, clmul64)
216
+
180
+GEN_VEXT_VX(vclmul_vx, 8)
217
+ if (!has_ext(ctx, RVV)) {
181
+RVVCALL(OPIVV2, vclmulh_vv, OP_UUU_D, H8, H8, H8, clmulh64)
218
+ return false;
182
+GEN_VEXT_VV(vclmulh_vv, 8)
219
+ }
183
+RVVCALL(OPIVX2, vclmulh_vx, OP_UUU_D, H8, H8, clmulh64)
220
+
184
+GEN_VEXT_VX(vclmulh_vx, 8)
221
+ s2 = tcg_const_tl(a->zimm);
185
diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc
222
+ dst = tcg_temp_new();
223
+
224
+ /* Using x0 as the rs1 register specifier, encodes an infinite AVL */
225
+ if (a->rs1 == 0) {
226
+ /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */
227
+ s1 = tcg_const_tl(RV_VLEN_MAX);
228
+ } else {
229
+ s1 = tcg_temp_new();
230
+ gen_get_gpr(s1, a->rs1);
231
+ }
232
+ gen_helper_vsetvl(dst, cpu_env, s1, s2);
233
+ gen_set_gpr(a->rd, dst);
234
+ gen_goto_tb(ctx, 0, ctx->pc_succ_insn);
235
+ ctx->base.is_jmp = DISAS_NORETURN;
236
+
237
+ tcg_temp_free(s1);
238
+ tcg_temp_free(s2);
239
+ tcg_temp_free(dst);
240
+ return true;
241
+}
242
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
243
index XXXXXXX..XXXXXXX 100644
244
--- a/target/riscv/translate.c
245
+++ b/target/riscv/translate.c
246
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
247
to reset this known value. */
248
int frm;
249
bool ext_ifencei;
250
+ /* vector extension */
251
+ bool vill;
252
+ uint8_t lmul;
253
+ uint8_t sew;
254
+ uint16_t vlen;
255
+ bool vl_eq_vlmax;
256
} DisasContext;
257
258
#ifdef TARGET_RISCV64
259
@@ -XXX,XX +XXX,XX @@ static bool gen_shift(DisasContext *ctx, arg_r *a,
260
#include "insn_trans/trans_rvf.inc.c"
261
#include "insn_trans/trans_rvd.inc.c"
262
#include "insn_trans/trans_rvh.inc.c"
263
+#include "insn_trans/trans_rvv.inc.c"
264
#include "insn_trans/trans_privileged.inc.c"
265
266
/* Include the auto-generated decoder for 16 bit insn */
267
@@ -XXX,XX +XXX,XX @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
268
DisasContext *ctx = container_of(dcbase, DisasContext, base);
269
CPURISCVState *env = cs->env_ptr;
270
RISCVCPU *cpu = RISCV_CPU(cs);
271
+ uint32_t tb_flags = ctx->base.tb->flags;
272
273
ctx->pc_succ_insn = ctx->base.pc_first;
274
- ctx->mem_idx = ctx->base.tb->flags & TB_FLAGS_MMU_MASK;
275
- ctx->mstatus_fs = ctx->base.tb->flags & TB_FLAGS_MSTATUS_FS;
276
+ ctx->mem_idx = tb_flags & TB_FLAGS_MMU_MASK;
277
+ ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS;
278
ctx->priv_ver = env->priv_ver;
279
#if !defined(CONFIG_USER_ONLY)
280
if (riscv_has_ext(env, RVH)) {
281
@@ -XXX,XX +XXX,XX @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
282
ctx->misa = env->misa;
283
ctx->frm = -1; /* unknown rounding mode */
284
ctx->ext_ifencei = cpu->cfg.ext_ifencei;
285
+ ctx->vlen = cpu->cfg.vlen;
286
+ ctx->vill = FIELD_EX32(tb_flags, TB_FLAGS, VILL);
287
+ ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW);
288
+ ctx->lmul = FIELD_EX32(tb_flags, TB_FLAGS, LMUL);
289
+ ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX);
290
}
291
292
static void riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu)
293
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
294
new file mode 100644
186
new file mode 100644
295
index XXXXXXX..XXXXXXX
187
index XXXXXXX..XXXXXXX
296
--- /dev/null
188
--- /dev/null
297
+++ b/target/riscv/vector_helper.c
189
+++ b/target/riscv/insn_trans/trans_rvvk.c.inc
298
@@ -XXX,XX +XXX,XX @@
190
@@ -XXX,XX +XXX,XX @@
299
+/*
191
+/*
300
+ * RISC-V Vector Extension Helpers for QEMU.
192
+ * RISC-V translation routines for the vector crypto extension.
301
+ *
193
+ *
302
+ * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
194
+ * Copyright (C) 2023 SiFive, Inc.
195
+ * Written by Codethink Ltd and SiFive.
303
+ *
196
+ *
304
+ * This program is free software; you can redistribute it and/or modify it
197
+ * This program is free software; you can redistribute it and/or modify it
305
+ * under the terms and conditions of the GNU General Public License,
198
+ * under the terms and conditions of the GNU General Public License,
306
+ * version 2 or later, as published by the Free Software Foundation.
199
+ * version 2 or later, as published by the Free Software Foundation.
307
+ *
200
+ *
...
...
312
+ *
205
+ *
313
+ * You should have received a copy of the GNU General Public License along with
206
+ * You should have received a copy of the GNU General Public License along with
314
+ * this program. If not, see <http://www.gnu.org/licenses/>.
207
+ * this program. If not, see <http://www.gnu.org/licenses/>.
315
+ */
208
+ */
316
+
209
+
317
+#include "qemu/osdep.h"
210
+/*
318
+#include "cpu.h"
211
+ * Zvbc
319
+#include "exec/exec-all.h"
212
+ */
320
+#include "exec/helper-proto.h"
213
+
321
+#include <math.h>
214
+#define GEN_VV_MASKED_TRANS(NAME, CHECK) \
322
+
215
+ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
323
+target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
216
+ { \
324
+ target_ulong s2)
217
+ if (CHECK(s, a)) { \
325
+{
218
+ return opivv_trans(a->rd, a->rs1, a->rs2, a->vm, \
326
+ int vlmax, vl;
219
+ gen_helper_##NAME, s); \
327
+ RISCVCPU *cpu = env_archcpu(env);
220
+ } \
328
+ uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
221
+ return false; \
329
+ uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
222
+ }
330
+ bool vill = FIELD_EX64(s2, VTYPE, VILL);
223
+
331
+ target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED);
224
+static bool vclmul_vv_check(DisasContext *s, arg_rmrr *a)
332
+
225
+{
333
+ if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
226
+ return opivv_check(s, a) &&
334
+ /* only set vill bit. */
227
+ s->cfg_ptr->ext_zvbc == true &&
335
+ env->vtype = FIELD_DP64(0, VTYPE, VILL, 1);
228
+ s->sew == MO_64;
336
+ env->vl = 0;
229
+}
337
+ env->vstart = 0;
230
+
338
+ return 0;
231
+GEN_VV_MASKED_TRANS(vclmul_vv, vclmul_vv_check)
339
+ }
232
+GEN_VV_MASKED_TRANS(vclmulh_vv, vclmul_vv_check)
340
+
233
+
341
+ vlmax = vext_get_vlmax(cpu, s2);
234
+#define GEN_VX_MASKED_TRANS(NAME, CHECK) \
342
+ if (s1 <= vlmax) {
235
+ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
343
+ vl = s1;
236
+ { \
344
+ } else {
237
+ if (CHECK(s, a)) { \
345
+ vl = vlmax;
238
+ return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, \
346
+ }
239
+ gen_helper_##NAME, s); \
347
+ env->vl = vl;
240
+ } \
348
+ env->vtype = s2;
241
+ return false; \
349
+ env->vstart = 0;
242
+ }
350
+ return vl;
243
+
351
+}
244
+static bool vclmul_vx_check(DisasContext *s, arg_rmrr *a)
352
diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs
245
+{
353
index XXXXXXX..XXXXXXX 100644
246
+ return opivx_check(s, a) &&
354
--- a/target/riscv/Makefile.objs
247
+ s->cfg_ptr->ext_zvbc == true &&
355
+++ b/target/riscv/Makefile.objs
248
+ s->sew == MO_64;
356
@@ -XXX,XX +XXX,XX @@
249
+}
357
-obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o gdbstub.o
250
+
358
+obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o vector_helper.o gdbstub.o
251
+GEN_VX_MASKED_TRANS(vclmul_vx, vclmul_vx_check)
359
obj-$(CONFIG_SOFTMMU) += pmp.o
252
+GEN_VX_MASKED_TRANS(vclmulh_vx, vclmul_vx_check)
360
253
diff --git a/target/riscv/meson.build b/target/riscv/meson.build
361
ifeq ($(CONFIG_SOFTMMU),y)
254
index XXXXXXX..XXXXXXX 100644
255
--- a/target/riscv/meson.build
256
+++ b/target/riscv/meson.build
257
@@ -XXX,XX +XXX,XX @@ riscv_ss.add(files(
258
'translate.c',
259
'm128_helper.c',
260
'crypto_helper.c',
261
- 'zce_helper.c'
262
+ 'zce_helper.c',
263
+ 'vcrypto_helper.c'
264
))
265
riscv_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c'), if_false: files('kvm-stub.c'))
266
362
--
267
--
363
2.27.0
268
2.41.0
364
365
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
Move the checks out of `do_opiv{v,x,i}_gvec{,_shift}` functions
4
and into the corresponding macros. This enables the functions to be
5
reused in proceeding commits without check duplication.
6
7
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
9
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
6
Message-id: 20200623215920.2594-49-zhiwei_liu@c-sky.com
10
Signed-off-by: Max Chou <max.chou@sifive.com>
11
Message-ID: <20230711165917.2629866-6-max.chou@sifive.com>
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
12
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
13
---
9
target/riscv/helper.h | 3 ++
14
target/riscv/insn_trans/trans_rvv.c.inc | 28 +++++++++++--------------
10
target/riscv/insn32.decode | 2 ++
15
1 file changed, 12 insertions(+), 16 deletions(-)
11
target/riscv/insn_trans/trans_rvv.inc.c | 3 ++
12
target/riscv/vector_helper.c | 46 +++++++++++++++++++++++++
13
4 files changed, 54 insertions(+)
14
16
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
17
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
16
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
19
--- a/target/riscv/insn_trans/trans_rvv.c.inc
18
+++ b/target/riscv/helper.h
20
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfredmax_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
21
@@ -XXX,XX +XXX,XX @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn,
20
DEF_HELPER_6(vfredmin_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
22
gen_helper_gvec_4_ptr *fn)
21
DEF_HELPER_6(vfredmin_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
23
{
22
DEF_HELPER_6(vfredmin_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
24
TCGLabel *over = gen_new_label();
23
+
25
- if (!opivv_check(s, a)) {
24
+DEF_HELPER_6(vfwredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
26
- return false;
25
+DEF_HELPER_6(vfwredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
27
- }
26
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
28
27
index XXXXXXX..XXXXXXX 100644
29
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
28
--- a/target/riscv/insn32.decode
30
29
+++ b/target/riscv/insn32.decode
31
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
30
@@ -XXX,XX +XXX,XX @@ vwredsum_vs 110001 . ..... ..... 000 ..... 1010111 @r_vm
32
gen_helper_##NAME##_b, gen_helper_##NAME##_h, \
31
vfredsum_vs 0000-1 . ..... ..... 001 ..... 1010111 @r_vm
33
gen_helper_##NAME##_w, gen_helper_##NAME##_d, \
32
vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm
34
}; \
33
vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm
35
+ if (!opivv_check(s, a)) { \
34
+# Vector widening ordered and unordered float reduction sum
36
+ return false; \
35
+vfwredsum_vs 1100-1 . ..... ..... 001 ..... 1010111 @r_vm
37
+ } \
36
38
return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \
37
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
39
}
38
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
40
39
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
41
@@ -XXX,XX +XXX,XX @@ static inline bool
40
index XXXXXXX..XXXXXXX 100644
42
do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn,
41
--- a/target/riscv/insn_trans/trans_rvv.inc.c
43
gen_helper_opivx *fn)
42
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
44
{
43
@@ -XXX,XX +XXX,XX @@ GEN_OPIVV_WIDEN_TRANS(vwredsumu_vs, reduction_check)
45
- if (!opivx_check(s, a)) {
44
GEN_OPFVV_TRANS(vfredsum_vs, reduction_check)
46
- return false;
45
GEN_OPFVV_TRANS(vfredmax_vs, reduction_check)
47
- }
46
GEN_OPFVV_TRANS(vfredmin_vs, reduction_check)
48
-
47
+
49
if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
48
+/* Vector Widening Floating-Point Reduction Instructions */
50
TCGv_i64 src1 = tcg_temp_new_i64();
49
+GEN_OPFVV_WIDEN_TRANS(vfwredsum_vs, reduction_check)
51
50
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
52
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
51
index XXXXXXX..XXXXXXX 100644
53
gen_helper_##NAME##_b, gen_helper_##NAME##_h, \
52
--- a/target/riscv/vector_helper.c
54
gen_helper_##NAME##_w, gen_helper_##NAME##_d, \
53
+++ b/target/riscv/vector_helper.c
55
}; \
54
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum, clearq)
56
+ if (!opivx_check(s, a)) { \
55
GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum, clearh)
57
+ return false; \
56
GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum, clearl)
58
+ } \
57
GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum, clearq)
59
return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \
58
+
60
}
59
+/* Vector Widening Floating-Point Reduction Instructions */
61
60
+/* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
62
@@ -XXX,XX +XXX,XX @@ static inline bool
61
+void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
63
do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn,
62
+ void *vs2, CPURISCVState *env, uint32_t desc)
64
gen_helper_opivx *fn, imm_mode_t imm_mode)
63
+{
65
{
64
+ uint32_t mlen = vext_mlen(desc);
66
- if (!opivx_check(s, a)) {
65
+ uint32_t vm = vext_vm(desc);
67
- return false;
66
+ uint32_t vl = env->vl;
68
- }
67
+ uint32_t i;
69
-
68
+ uint32_t tot = env_archcpu(env)->cfg.vlen / 8;
70
if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
69
+ uint32_t s1 = *((uint32_t *)vs1 + H4(0));
71
gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
70
+
72
extract_imm(s, a->rs1, imm_mode), MAXSZ(s), MAXSZ(s));
71
+ for (i = 0; i < vl; i++) {
73
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
72
+ uint16_t s2 = *((uint16_t *)vs2 + H2(i));
74
gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \
73
+ if (!vm && !vext_elem_mask(v0, mlen, i)) {
75
gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \
74
+ continue;
76
}; \
75
+ }
77
+ if (!opivx_check(s, a)) { \
76
+ s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
78
+ return false; \
77
+ &env->fp_status);
79
+ } \
78
+ }
80
return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, \
79
+ *((uint32_t *)vd + H4(0)) = s1;
81
fns[s->sew], IMM_MODE); \
80
+ clearl(vd, 1, sizeof(uint32_t), tot);
82
}
81
+}
83
@@ -XXX,XX +XXX,XX @@ static inline bool
82
+
84
do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn,
83
+void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
85
gen_helper_opivx *fn)
84
+ void *vs2, CPURISCVState *env, uint32_t desc)
86
{
85
+{
87
- if (!opivx_check(s, a)) {
86
+ uint32_t mlen = vext_mlen(desc);
88
- return false;
87
+ uint32_t vm = vext_vm(desc);
89
- }
88
+ uint32_t vl = env->vl;
90
-
89
+ uint32_t i;
91
if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
90
+ uint32_t tot = env_archcpu(env)->cfg.vlen / 8;
92
TCGv_i32 src1 = tcg_temp_new_i32();
91
+ uint64_t s1 = *((uint64_t *)vs1);
93
92
+
94
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
93
+ for (i = 0; i < vl; i++) {
95
gen_helper_##NAME##_b, gen_helper_##NAME##_h, \
94
+ uint32_t s2 = *((uint32_t *)vs2 + H4(i));
96
gen_helper_##NAME##_w, gen_helper_##NAME##_d, \
95
+ if (!vm && !vext_elem_mask(v0, mlen, i)) {
97
}; \
96
+ continue;
98
- \
97
+ }
99
+ if (!opivx_check(s, a)) { \
98
+ s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
100
+ return false; \
99
+ &env->fp_status);
101
+ } \
100
+ }
102
return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \
101
+ *((uint64_t *)vd) = s1;
103
}
102
+ clearq(vd, 1, sizeof(uint64_t), tot);
104
103
+}
104
--
105
--
105
2.27.0
106
2.41.0
106
107
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Dickon Hood <dickon.hood@codethink.co.uk>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
Zvbb (implemented in later commit) has a widening instruction, which
4
requires an extra check on the enabled extensions. Refactor
5
GEN_OPIVX_WIDEN_TRANS() to take a check function to avoid reimplementing
6
it.
7
8
Signed-off-by: Dickon Hood <dickon.hood@codethink.co.uk>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
10
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
6
Message-id: 20200623215920.2594-15-zhiwei_liu@c-sky.com
11
Signed-off-by: Max Chou <max.chou@sifive.com>
12
Message-ID: <20230711165917.2629866-7-max.chou@sifive.com>
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
13
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
14
---
9
target/riscv/helper.h | 25 ++++++++
15
target/riscv/insn_trans/trans_rvv.c.inc | 52 +++++++++++--------------
10
target/riscv/insn32.decode | 9 +++
16
1 file changed, 23 insertions(+), 29 deletions(-)
11
target/riscv/insn_trans/trans_rvv.inc.c | 52 ++++++++++++++++
12
target/riscv/vector_helper.c | 79 +++++++++++++++++++++++++
13
4 files changed, 165 insertions(+)
14
17
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
18
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
16
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
20
--- a/target/riscv/insn_trans/trans_rvv.c.inc
18
+++ b/target/riscv/helper.h
21
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vxor_vx_b, void, ptr, ptr, tl, ptr, env, i32)
22
@@ -XXX,XX +XXX,XX @@ static bool opivx_widen_check(DisasContext *s, arg_rmrr *a)
20
DEF_HELPER_6(vxor_vx_h, void, ptr, ptr, tl, ptr, env, i32)
23
vext_check_ds(s, a->rd, a->rs2, a->vm);
21
DEF_HELPER_6(vxor_vx_w, void, ptr, ptr, tl, ptr, env, i32)
24
}
22
DEF_HELPER_6(vxor_vx_d, void, ptr, ptr, tl, ptr, env, i32)
25
23
+
26
-static bool do_opivx_widen(DisasContext *s, arg_rmrr *a,
24
+DEF_HELPER_6(vsll_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
27
- gen_helper_opivx *fn)
25
+DEF_HELPER_6(vsll_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
28
-{
26
+DEF_HELPER_6(vsll_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
29
- if (opivx_widen_check(s, a)) {
27
+DEF_HELPER_6(vsll_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
30
- return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
28
+DEF_HELPER_6(vsrl_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
31
- }
29
+DEF_HELPER_6(vsrl_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
32
- return false;
30
+DEF_HELPER_6(vsrl_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
33
-}
31
+DEF_HELPER_6(vsrl_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
34
-
32
+DEF_HELPER_6(vsra_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
35
-#define GEN_OPIVX_WIDEN_TRANS(NAME) \
33
+DEF_HELPER_6(vsra_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
36
-static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
34
+DEF_HELPER_6(vsra_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
37
-{ \
35
+DEF_HELPER_6(vsra_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
38
- static gen_helper_opivx * const fns[3] = { \
36
+DEF_HELPER_6(vsll_vx_b, void, ptr, ptr, tl, ptr, env, i32)
39
- gen_helper_##NAME##_b, \
37
+DEF_HELPER_6(vsll_vx_h, void, ptr, ptr, tl, ptr, env, i32)
40
- gen_helper_##NAME##_h, \
38
+DEF_HELPER_6(vsll_vx_w, void, ptr, ptr, tl, ptr, env, i32)
41
- gen_helper_##NAME##_w \
39
+DEF_HELPER_6(vsll_vx_d, void, ptr, ptr, tl, ptr, env, i32)
42
- }; \
40
+DEF_HELPER_6(vsrl_vx_b, void, ptr, ptr, tl, ptr, env, i32)
43
- return do_opivx_widen(s, a, fns[s->sew]); \
41
+DEF_HELPER_6(vsrl_vx_h, void, ptr, ptr, tl, ptr, env, i32)
44
+#define GEN_OPIVX_WIDEN_TRANS(NAME, CHECK) \
42
+DEF_HELPER_6(vsrl_vx_w, void, ptr, ptr, tl, ptr, env, i32)
43
+DEF_HELPER_6(vsrl_vx_d, void, ptr, ptr, tl, ptr, env, i32)
44
+DEF_HELPER_6(vsra_vx_b, void, ptr, ptr, tl, ptr, env, i32)
45
+DEF_HELPER_6(vsra_vx_h, void, ptr, ptr, tl, ptr, env, i32)
46
+DEF_HELPER_6(vsra_vx_w, void, ptr, ptr, tl, ptr, env, i32)
47
+DEF_HELPER_6(vsra_vx_d, void, ptr, ptr, tl, ptr, env, i32)
48
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/riscv/insn32.decode
51
+++ b/target/riscv/insn32.decode
52
@@ -XXX,XX +XXX,XX @@ vor_vi 001010 . ..... ..... 011 ..... 1010111 @r_vm
53
vxor_vv 001011 . ..... ..... 000 ..... 1010111 @r_vm
54
vxor_vx 001011 . ..... ..... 100 ..... 1010111 @r_vm
55
vxor_vi 001011 . ..... ..... 011 ..... 1010111 @r_vm
56
+vsll_vv 100101 . ..... ..... 000 ..... 1010111 @r_vm
57
+vsll_vx 100101 . ..... ..... 100 ..... 1010111 @r_vm
58
+vsll_vi 100101 . ..... ..... 011 ..... 1010111 @r_vm
59
+vsrl_vv 101000 . ..... ..... 000 ..... 1010111 @r_vm
60
+vsrl_vx 101000 . ..... ..... 100 ..... 1010111 @r_vm
61
+vsrl_vi 101000 . ..... ..... 011 ..... 1010111 @r_vm
62
+vsra_vv 101001 . ..... ..... 000 ..... 1010111 @r_vm
63
+vsra_vx 101001 . ..... ..... 100 ..... 1010111 @r_vm
64
+vsra_vi 101001 . ..... ..... 011 ..... 1010111 @r_vm
65
66
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
67
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
68
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/target/riscv/insn_trans/trans_rvv.inc.c
71
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
72
@@ -XXX,XX +XXX,XX @@ GEN_OPIVX_GVEC_TRANS(vxor_vx, xors)
73
GEN_OPIVI_GVEC_TRANS(vand_vi, 0, vand_vx, andi)
74
GEN_OPIVI_GVEC_TRANS(vor_vi, 0, vor_vx, ori)
75
GEN_OPIVI_GVEC_TRANS(vxor_vi, 0, vxor_vx, xori)
76
+
77
+/* Vector Single-Width Bit Shift Instructions */
78
+GEN_OPIVV_GVEC_TRANS(vsll_vv, shlv)
79
+GEN_OPIVV_GVEC_TRANS(vsrl_vv, shrv)
80
+GEN_OPIVV_GVEC_TRANS(vsra_vv, sarv)
81
+
82
+typedef void GVecGen2sFn32(unsigned, uint32_t, uint32_t, TCGv_i32,
83
+ uint32_t, uint32_t);
84
+
85
+static inline bool
86
+do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn,
87
+ gen_helper_opivx *fn)
88
+{
89
+ if (!opivx_check(s, a)) {
90
+ return false;
91
+ }
92
+
93
+ if (a->vm && s->vl_eq_vlmax) {
94
+ TCGv_i32 src1 = tcg_temp_new_i32();
95
+ TCGv tmp = tcg_temp_new();
96
+
97
+ gen_get_gpr(tmp, a->rs1);
98
+ tcg_gen_trunc_tl_i32(src1, tmp);
99
+ tcg_gen_extract_i32(src1, src1, 0, s->sew + 3);
100
+ gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
101
+ src1, MAXSZ(s), MAXSZ(s));
102
+
103
+ tcg_temp_free_i32(src1);
104
+ tcg_temp_free(tmp);
105
+ return true;
106
+ }
107
+ return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
108
+}
109
+
110
+#define GEN_OPIVX_GVEC_SHIFT_TRANS(NAME, SUF) \
111
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
45
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
112
+{ \
46
+{ \
113
+ static gen_helper_opivx * const fns[4] = { \
47
+ if (CHECK(s, a)) { \
114
+ gen_helper_##NAME##_b, gen_helper_##NAME##_h, \
48
+ static gen_helper_opivx * const fns[3] = { \
115
+ gen_helper_##NAME##_w, gen_helper_##NAME##_d, \
49
+ gen_helper_##NAME##_b, \
116
+ }; \
50
+ gen_helper_##NAME##_h, \
117
+ \
51
+ gen_helper_##NAME##_w \
118
+ return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \
52
+ }; \
119
+}
53
+ return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s); \
120
+
121
+GEN_OPIVX_GVEC_SHIFT_TRANS(vsll_vx, shls)
122
+GEN_OPIVX_GVEC_SHIFT_TRANS(vsrl_vx, shrs)
123
+GEN_OPIVX_GVEC_SHIFT_TRANS(vsra_vx, sars)
124
+
125
+GEN_OPIVI_GVEC_TRANS(vsll_vi, 1, vsll_vx, shli)
126
+GEN_OPIVI_GVEC_TRANS(vsrl_vi, 1, vsrl_vx, shri)
127
+GEN_OPIVI_GVEC_TRANS(vsra_vi, 1, vsra_vx, sari)
128
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
129
index XXXXXXX..XXXXXXX 100644
130
--- a/target/riscv/vector_helper.c
131
+++ b/target/riscv/vector_helper.c
132
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vxor_vx_b, 1, 1, clearb)
133
GEN_VEXT_VX(vxor_vx_h, 2, 2, clearh)
134
GEN_VEXT_VX(vxor_vx_w, 4, 4, clearl)
135
GEN_VEXT_VX(vxor_vx_d, 8, 8, clearq)
136
+
137
+/* Vector Single-Width Bit Shift Instructions */
138
+#define DO_SLL(N, M) (N << (M))
139
+#define DO_SRL(N, M) (N >> (M))
140
+
141
+/* generate the helpers for shift instructions with two vector operators */
142
+#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK, CLEAR_FN) \
143
+void HELPER(NAME)(void *vd, void *v0, void *vs1, \
144
+ void *vs2, CPURISCVState *env, uint32_t desc) \
145
+{ \
146
+ uint32_t mlen = vext_mlen(desc); \
147
+ uint32_t vm = vext_vm(desc); \
148
+ uint32_t vl = env->vl; \
149
+ uint32_t esz = sizeof(TS1); \
150
+ uint32_t vlmax = vext_maxsz(desc) / esz; \
151
+ uint32_t i; \
152
+ \
153
+ for (i = 0; i < vl; i++) { \
154
+ if (!vm && !vext_elem_mask(v0, mlen, i)) { \
155
+ continue; \
156
+ } \
157
+ TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
158
+ TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
159
+ *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
160
+ } \
54
+ } \
161
+ CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \
55
+ return false; \
162
+}
56
}
163
+
57
164
+GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7, clearb)
58
-GEN_OPIVX_WIDEN_TRANS(vwaddu_vx)
165
+GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf, clearh)
59
-GEN_OPIVX_WIDEN_TRANS(vwadd_vx)
166
+GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f, clearl)
60
-GEN_OPIVX_WIDEN_TRANS(vwsubu_vx)
167
+GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f, clearq)
61
-GEN_OPIVX_WIDEN_TRANS(vwsub_vx)
168
+
62
+GEN_OPIVX_WIDEN_TRANS(vwaddu_vx, opivx_widen_check)
169
+GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7, clearb)
63
+GEN_OPIVX_WIDEN_TRANS(vwadd_vx, opivx_widen_check)
170
+GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf, clearh)
64
+GEN_OPIVX_WIDEN_TRANS(vwsubu_vx, opivx_widen_check)
171
+GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f, clearl)
65
+GEN_OPIVX_WIDEN_TRANS(vwsub_vx, opivx_widen_check)
172
+GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f, clearq)
66
173
+
67
/* WIDEN OPIVV with WIDEN */
174
+GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7, clearb)
68
static bool opiwv_widen_check(DisasContext *s, arg_rmrr *a)
175
+GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh)
69
@@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vrem_vx, opivx_check)
176
+GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl)
70
GEN_OPIVV_WIDEN_TRANS(vwmul_vv, opivv_widen_check)
177
+GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq)
71
GEN_OPIVV_WIDEN_TRANS(vwmulu_vv, opivv_widen_check)
178
+
72
GEN_OPIVV_WIDEN_TRANS(vwmulsu_vv, opivv_widen_check)
179
+/* generate the helpers for shift instructions with one vector and one scalar */
73
-GEN_OPIVX_WIDEN_TRANS(vwmul_vx)
180
+#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK, CLEAR_FN) \
74
-GEN_OPIVX_WIDEN_TRANS(vwmulu_vx)
181
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
75
-GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx)
182
+ void *vs2, CPURISCVState *env, uint32_t desc) \
76
+GEN_OPIVX_WIDEN_TRANS(vwmul_vx, opivx_widen_check)
183
+{ \
77
+GEN_OPIVX_WIDEN_TRANS(vwmulu_vx, opivx_widen_check)
184
+ uint32_t mlen = vext_mlen(desc); \
78
+GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx, opivx_widen_check)
185
+ uint32_t vm = vext_vm(desc); \
79
186
+ uint32_t vl = env->vl; \
80
/* Vector Single-Width Integer Multiply-Add Instructions */
187
+ uint32_t esz = sizeof(TD); \
81
GEN_OPIVV_TRANS(vmacc_vv, opivv_check)
188
+ uint32_t vlmax = vext_maxsz(desc) / esz; \
82
@@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vnmsub_vx, opivx_check)
189
+ uint32_t i; \
83
GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_widen_check)
190
+ \
84
GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_widen_check)
191
+ for (i = 0; i < vl; i++) { \
85
GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_widen_check)
192
+ if (!vm && !vext_elem_mask(v0, mlen, i)) { \
86
-GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx)
193
+ continue; \
87
-GEN_OPIVX_WIDEN_TRANS(vwmacc_vx)
194
+ } \
88
-GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx)
195
+ TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
89
-GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx)
196
+ *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
90
+GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx, opivx_widen_check)
197
+ } \
91
+GEN_OPIVX_WIDEN_TRANS(vwmacc_vx, opivx_widen_check)
198
+ CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \
92
+GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx, opivx_widen_check)
199
+}
93
+GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx, opivx_widen_check)
200
+
94
201
+GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7, clearb)
95
/* Vector Integer Merge and Move Instructions */
202
+GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf, clearh)
96
static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a)
203
+GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f, clearl)
204
+GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f, clearq)
205
+
206
+GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7, clearb)
207
+GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf, clearh)
208
+GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f, clearl)
209
+GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f, clearq)
210
+
211
+GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7, clearb)
212
+GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh)
213
+GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl)
214
+GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq)
215
--
97
--
216
2.27.0
98
2.41.0
217
218
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
Move some macros out of `vector_helper` and into `vector_internals`.
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
This ensures they can be used by both vector and vector-crypto helpers
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
(latter implemented in proceeding commits).
6
Message-id: 20200623215920.2594-41-zhiwei_liu@c-sky.com
6
7
Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
8
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
9
Signed-off-by: Max Chou <max.chou@sifive.com>
10
Message-ID: <20230711165917.2629866-8-max.chou@sifive.com>
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
11
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
12
---
9
target/riscv/helper.h | 4 ++
13
target/riscv/vector_internals.h | 46 +++++++++++++++++++++++++++++++++
10
target/riscv/internals.h | 5 ++
14
target/riscv/vector_helper.c | 42 ------------------------------
11
target/riscv/insn32.decode | 1 +
15
2 files changed, 46 insertions(+), 42 deletions(-)
12
target/riscv/fpu_helper.c | 33 +--------
13
target/riscv/insn_trans/trans_rvv.inc.c | 3 +
14
target/riscv/vector_helper.c | 91 +++++++++++++++++++++++++
15
6 files changed, 107 insertions(+), 30 deletions(-)
16
16
17
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
17
diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h
18
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/riscv/helper.h
19
--- a/target/riscv/vector_internals.h
20
+++ b/target/riscv/helper.h
20
+++ b/target/riscv/vector_internals.h
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmford_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
21
@@ -XXX,XX +XXX,XX @@ void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
22
DEF_HELPER_6(vmford_vf_h, void, ptr, ptr, i64, ptr, env, i32)
22
/* expand macro args before macro */
23
DEF_HELPER_6(vmford_vf_w, void, ptr, ptr, i64, ptr, env, i32)
23
#define RVVCALL(macro, ...) macro(__VA_ARGS__)
24
DEF_HELPER_6(vmford_vf_d, void, ptr, ptr, i64, ptr, env, i32)
24
25
+/* (TD, T2, TX2) */
26
+#define OP_UU_B uint8_t, uint8_t, uint8_t
27
+#define OP_UU_H uint16_t, uint16_t, uint16_t
28
+#define OP_UU_W uint32_t, uint32_t, uint32_t
29
+#define OP_UU_D uint64_t, uint64_t, uint64_t
25
+
30
+
26
+DEF_HELPER_5(vfclass_v_h, void, ptr, ptr, ptr, env, i32)
31
/* (TD, T1, T2, TX1, TX2) */
27
+DEF_HELPER_5(vfclass_v_w, void, ptr, ptr, ptr, env, i32)
32
#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
28
+DEF_HELPER_5(vfclass_v_d, void, ptr, ptr, ptr, env, i32)
33
#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
29
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
34
#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
30
index XXXXXXX..XXXXXXX 100644
35
#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
31
--- a/target/riscv/internals.h
36
32
+++ b/target/riscv/internals.h
33
@@ -XXX,XX +XXX,XX @@ FIELD(VDATA, VM, 8, 1)
34
FIELD(VDATA, LMUL, 9, 2)
35
FIELD(VDATA, NF, 11, 4)
36
FIELD(VDATA, WD, 11, 1)
37
+
38
+/* float point classify helpers */
39
+target_ulong fclass_h(uint64_t frs1);
40
+target_ulong fclass_s(uint64_t frs1);
41
+target_ulong fclass_d(uint64_t frs1);
42
#endif
43
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/riscv/insn32.decode
46
+++ b/target/riscv/insn32.decode
47
@@ -XXX,XX +XXX,XX @@ vmfgt_vf 011101 . ..... ..... 101 ..... 1010111 @r_vm
48
vmfge_vf 011111 . ..... ..... 101 ..... 1010111 @r_vm
49
vmford_vv 011010 . ..... ..... 001 ..... 1010111 @r_vm
50
vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm
51
+vfclass_v 100011 . ..... 10000 001 ..... 1010111 @r2_vm
52
53
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
54
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
55
diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/riscv/fpu_helper.c
58
+++ b/target/riscv/fpu_helper.c
59
@@ -XXX,XX +XXX,XX @@
60
#include "exec/exec-all.h"
61
#include "exec/helper-proto.h"
62
#include "fpu/softfloat.h"
63
+#include "internals.h"
64
65
target_ulong riscv_cpu_get_fflags(CPURISCVState *env)
66
{
67
@@ -XXX,XX +XXX,XX @@ uint64_t helper_fcvt_s_lu(CPURISCVState *env, uint64_t rs1)
68
69
target_ulong helper_fclass_s(uint64_t frs1)
70
{
71
- float32 f = frs1;
72
- bool sign = float32_is_neg(f);
73
-
74
- if (float32_is_infinity(f)) {
75
- return sign ? 1 << 0 : 1 << 7;
76
- } else if (float32_is_zero(f)) {
77
- return sign ? 1 << 3 : 1 << 4;
78
- } else if (float32_is_zero_or_denormal(f)) {
79
- return sign ? 1 << 2 : 1 << 5;
80
- } else if (float32_is_any_nan(f)) {
81
- float_status s = { }; /* for snan_bit_is_one */
82
- return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
83
- } else {
84
- return sign ? 1 << 1 : 1 << 6;
85
- }
86
+ return fclass_s(frs1);
87
}
88
89
uint64_t helper_fadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
90
@@ -XXX,XX +XXX,XX @@ uint64_t helper_fcvt_d_lu(CPURISCVState *env, uint64_t rs1)
91
92
target_ulong helper_fclass_d(uint64_t frs1)
93
{
94
- float64 f = frs1;
95
- bool sign = float64_is_neg(f);
96
-
97
- if (float64_is_infinity(f)) {
98
- return sign ? 1 << 0 : 1 << 7;
99
- } else if (float64_is_zero(f)) {
100
- return sign ? 1 << 3 : 1 << 4;
101
- } else if (float64_is_zero_or_denormal(f)) {
102
- return sign ? 1 << 2 : 1 << 5;
103
- } else if (float64_is_any_nan(f)) {
104
- float_status s = { }; /* for snan_bit_is_one */
105
- return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
106
- } else {
107
- return sign ? 1 << 1 : 1 << 6;
108
- }
109
+ return fclass_d(frs1);
110
}
111
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
112
index XXXXXXX..XXXXXXX 100644
113
--- a/target/riscv/insn_trans/trans_rvv.inc.c
114
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
115
@@ -XXX,XX +XXX,XX @@ GEN_OPFVF_TRANS(vmfle_vf, opfvf_cmp_check)
116
GEN_OPFVF_TRANS(vmfgt_vf, opfvf_cmp_check)
117
GEN_OPFVF_TRANS(vmfge_vf, opfvf_cmp_check)
118
GEN_OPFVF_TRANS(vmford_vf, opfvf_cmp_check)
119
+
120
+/* Vector Floating-Point Classify Instruction */
121
+GEN_OPFV_TRANS(vfclass_v, opfv_check)
122
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
123
index XXXXXXX..XXXXXXX 100644
124
--- a/target/riscv/vector_helper.c
125
+++ b/target/riscv/vector_helper.c
126
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet)
127
GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet)
128
GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet)
129
GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet)
130
+
131
+/* Vector Floating-Point Classify Instruction */
132
+#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
37
+#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
133
+static void do_##NAME(void *vd, void *vs2, int i) \
38
+static void do_##NAME(void *vd, void *vs2, int i) \
134
+{ \
39
+{ \
135
+ TX2 s2 = *((T2 *)vs2 + HS2(i)); \
40
+ TX2 s2 = *((T2 *)vs2 + HS2(i)); \
136
+ *((TD *)vd + HD(i)) = OP(s2); \
41
+ *((TD *)vd + HD(i)) = OP(s2); \
137
+}
42
+}
138
+
43
+
139
+#define GEN_VEXT_V(NAME, ESZ, DSZ, CLEAR_FN) \
44
+#define GEN_VEXT_V(NAME, ESZ) \
140
+void HELPER(NAME)(void *vd, void *v0, void *vs2, \
45
+void HELPER(NAME)(void *vd, void *v0, void *vs2, \
141
+ CPURISCVState *env, uint32_t desc) \
46
+ CPURISCVState *env, uint32_t desc) \
142
+{ \
47
+{ \
143
+ uint32_t vlmax = vext_maxsz(desc) / ESZ; \
144
+ uint32_t mlen = vext_mlen(desc); \
145
+ uint32_t vm = vext_vm(desc); \
48
+ uint32_t vm = vext_vm(desc); \
146
+ uint32_t vl = env->vl; \
49
+ uint32_t vl = env->vl; \
50
+ uint32_t total_elems = \
51
+ vext_get_total_elems(env, desc, ESZ); \
52
+ uint32_t vta = vext_vta(desc); \
53
+ uint32_t vma = vext_vma(desc); \
147
+ uint32_t i; \
54
+ uint32_t i; \
148
+ \
55
+ \
149
+ for (i = 0; i < vl; i++) { \
56
+ for (i = env->vstart; i < vl; i++) { \
150
+ if (!vm && !vext_elem_mask(v0, mlen, i)) { \
57
+ if (!vm && !vext_elem_mask(v0, i)) { \
58
+ /* set masked-off elements to 1s */ \
59
+ vext_set_elems_1s(vd, vma, i * ESZ, \
60
+ (i + 1) * ESZ); \
151
+ continue; \
61
+ continue; \
152
+ } \
62
+ } \
153
+ do_##NAME(vd, vs2, i); \
63
+ do_##NAME(vd, vs2, i); \
154
+ } \
64
+ } \
155
+ CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \
65
+ env->vstart = 0; \
66
+ /* set tail elements to 1s */ \
67
+ vext_set_elems_1s(vd, vta, vl * ESZ, \
68
+ total_elems * ESZ); \
156
+}
69
+}
157
+
70
+
158
+target_ulong fclass_h(uint64_t frs1)
71
/* operation of two vector elements */
159
+{
72
typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
160
+ float16 f = frs1;
73
161
+ bool sign = float16_is_neg(f);
74
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
75
do_##NAME, ESZ); \
76
}
77
78
+/* Three of the widening shortening macros: */
79
+/* (TD, T1, T2, TX1, TX2) */
80
+#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
81
+#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
82
+#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
162
+
83
+
163
+ if (float16_is_infinity(f)) {
84
#endif /* TARGET_RISCV_VECTOR_INTERNALS_H */
164
+ return sign ? 1 << 0 : 1 << 7;
85
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
165
+ } else if (float16_is_zero(f)) {
86
index XXXXXXX..XXXXXXX 100644
166
+ return sign ? 1 << 3 : 1 << 4;
87
--- a/target/riscv/vector_helper.c
167
+ } else if (float16_is_zero_or_denormal(f)) {
88
+++ b/target/riscv/vector_helper.c
168
+ return sign ? 1 << 2 : 1 << 5;
89
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
169
+ } else if (float16_is_any_nan(f)) {
90
#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
170
+ float_status s = { }; /* for snan_bit_is_one */
91
#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
171
+ return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
92
#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
172
+ } else {
93
-#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
173
+ return sign ? 1 << 1 : 1 << 6;
94
-#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
174
+ }
95
-#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
175
+}
96
#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
176
+
97
#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
177
+target_ulong fclass_s(uint64_t frs1)
98
#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
178
+{
99
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VF(vfwnmsac_vf_h, 4)
179
+ float32 f = frs1;
100
GEN_VEXT_VF(vfwnmsac_vf_w, 8)
180
+ bool sign = float32_is_neg(f);
101
181
+
102
/* Vector Floating-Point Square-Root Instruction */
182
+ if (float32_is_infinity(f)) {
103
-/* (TD, T2, TX2) */
183
+ return sign ? 1 << 0 : 1 << 7;
104
-#define OP_UU_H uint16_t, uint16_t, uint16_t
184
+ } else if (float32_is_zero(f)) {
105
-#define OP_UU_W uint32_t, uint32_t, uint32_t
185
+ return sign ? 1 << 3 : 1 << 4;
106
-#define OP_UU_D uint64_t, uint64_t, uint64_t
186
+ } else if (float32_is_zero_or_denormal(f)) {
107
-
187
+ return sign ? 1 << 2 : 1 << 5;
108
#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
188
+ } else if (float32_is_any_nan(f)) {
109
static void do_##NAME(void *vd, void *vs2, int i, \
189
+ float_status s = { }; /* for snan_bit_is_one */
110
CPURISCVState *env) \
190
+ return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
111
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
191
+ } else {
112
GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
192
+ return sign ? 1 << 1 : 1 << 6;
113
193
+ }
114
/* Vector Floating-Point Classify Instruction */
194
+}
115
-#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
195
+
116
-static void do_##NAME(void *vd, void *vs2, int i) \
196
+target_ulong fclass_d(uint64_t frs1)
117
-{ \
197
+{
118
- TX2 s2 = *((T2 *)vs2 + HS2(i)); \
198
+ float64 f = frs1;
119
- *((TD *)vd + HD(i)) = OP(s2); \
199
+ bool sign = float64_is_neg(f);
120
-}
200
+
121
-
201
+ if (float64_is_infinity(f)) {
122
-#define GEN_VEXT_V(NAME, ESZ) \
202
+ return sign ? 1 << 0 : 1 << 7;
123
-void HELPER(NAME)(void *vd, void *v0, void *vs2, \
203
+ } else if (float64_is_zero(f)) {
124
- CPURISCVState *env, uint32_t desc) \
204
+ return sign ? 1 << 3 : 1 << 4;
125
-{ \
205
+ } else if (float64_is_zero_or_denormal(f)) {
126
- uint32_t vm = vext_vm(desc); \
206
+ return sign ? 1 << 2 : 1 << 5;
127
- uint32_t vl = env->vl; \
207
+ } else if (float64_is_any_nan(f)) {
128
- uint32_t total_elems = \
208
+ float_status s = { }; /* for snan_bit_is_one */
129
- vext_get_total_elems(env, desc, ESZ); \
209
+ return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
130
- uint32_t vta = vext_vta(desc); \
210
+ } else {
131
- uint32_t vma = vext_vma(desc); \
211
+ return sign ? 1 << 1 : 1 << 6;
132
- uint32_t i; \
212
+ }
133
- \
213
+}
134
- for (i = env->vstart; i < vl; i++) { \
214
+
135
- if (!vm && !vext_elem_mask(v0, i)) { \
215
+RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
136
- /* set masked-off elements to 1s */ \
216
+RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
137
- vext_set_elems_1s(vd, vma, i * ESZ, \
217
+RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
138
- (i + 1) * ESZ); \
218
+GEN_VEXT_V(vfclass_v_h, 2, 2, clearh)
139
- continue; \
219
+GEN_VEXT_V(vfclass_v_w, 4, 4, clearl)
140
- } \
220
+GEN_VEXT_V(vfclass_v_d, 8, 8, clearq)
141
- do_##NAME(vd, vs2, i); \
142
- } \
143
- env->vstart = 0; \
144
- /* set tail elements to 1s */ \
145
- vext_set_elems_1s(vd, vta, vl * ESZ, \
146
- total_elems * ESZ); \
147
-}
148
-
149
target_ulong fclass_h(uint64_t frs1)
150
{
151
float16 f = frs1;
221
--
152
--
222
2.27.0
153
2.41.0
223
224
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Dickon Hood <dickon.hood@codethink.co.uk>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
This commit adds support for the Zvbb vector-crypto extension, which
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
4
consists of the following instructions:
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
6
Message-id: 20200623215920.2594-24-zhiwei_liu@c-sky.com
6
* vrol.[vv,vx]
7
* vror.[vv,vx,vi]
8
* vbrev8.v
9
* vrev8.v
10
* vandn.[vv,vx]
11
* vbrev.v
12
* vclz.v
13
* vctz.v
14
* vcpop.v
15
* vwsll.[vv,vx,vi]
16
17
Translation functions are defined in
18
`target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in
19
`target/riscv/vcrypto_helper.c`.
20
21
Co-authored-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
22
Co-authored-by: William Salmon <will.salmon@codethink.co.uk>
23
Co-authored-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
24
[max.chou@sifive.com: Fix imm mode of vror.vi]
25
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
26
Signed-off-by: William Salmon <will.salmon@codethink.co.uk>
27
Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
28
Signed-off-by: Dickon Hood <dickon.hood@codethink.co.uk>
29
Signed-off-by: Max Chou <max.chou@sifive.com>
30
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
31
[max.chou@sifive.com: Exposed x-zvbb property]
32
Message-ID: <20230711165917.2629866-9-max.chou@sifive.com>
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
33
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
34
---
9
target/riscv/helper.h | 17 ++++
35
target/riscv/cpu_cfg.h | 1 +
10
target/riscv/insn32.decode | 7 ++
36
target/riscv/helper.h | 62 +++++++++
11
target/riscv/insn_trans/trans_rvv.inc.c | 113 ++++++++++++++++++++++++
37
target/riscv/insn32.decode | 20 +++
12
target/riscv/vector_helper.c | 88 ++++++++++++++++++
38
target/riscv/cpu.c | 12 ++
13
4 files changed, 225 insertions(+)
39
target/riscv/vcrypto_helper.c | 138 +++++++++++++++++++
40
target/riscv/insn_trans/trans_rvvk.c.inc | 164 +++++++++++++++++++++++
41
6 files changed, 397 insertions(+)
14
42
43
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/riscv/cpu_cfg.h
46
+++ b/target/riscv/cpu_cfg.h
47
@@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig {
48
bool ext_zve32f;
49
bool ext_zve64f;
50
bool ext_zve64d;
51
+ bool ext_zvbb;
52
bool ext_zvbc;
53
bool ext_zmmul;
54
bool ext_zvfbfmin;
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
55
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
index XXXXXXX..XXXXXXX 100644
56
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
57
--- a/target/riscv/helper.h
18
+++ b/target/riscv/helper.h
58
+++ b/target/riscv/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vwmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
59
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32)
20
DEF_HELPER_6(vwmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32)
60
DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32)
21
DEF_HELPER_6(vwmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32)
61
DEF_HELPER_6(vclmulh_vv, void, ptr, ptr, ptr, ptr, env, i32)
22
DEF_HELPER_6(vwmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32)
62
DEF_HELPER_6(vclmulh_vx, void, ptr, ptr, tl, ptr, env, i32)
23
+
63
+
24
+DEF_HELPER_6(vmerge_vvm_b, void, ptr, ptr, ptr, ptr, env, i32)
64
+DEF_HELPER_6(vror_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
25
+DEF_HELPER_6(vmerge_vvm_h, void, ptr, ptr, ptr, ptr, env, i32)
65
+DEF_HELPER_6(vror_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vmerge_vvm_w, void, ptr, ptr, ptr, ptr, env, i32)
66
+DEF_HELPER_6(vror_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vmerge_vvm_d, void, ptr, ptr, ptr, ptr, env, i32)
67
+DEF_HELPER_6(vror_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vmerge_vxm_b, void, ptr, ptr, tl, ptr, env, i32)
68
+
29
+DEF_HELPER_6(vmerge_vxm_h, void, ptr, ptr, tl, ptr, env, i32)
69
+DEF_HELPER_6(vror_vx_b, void, ptr, ptr, tl, ptr, env, i32)
30
+DEF_HELPER_6(vmerge_vxm_w, void, ptr, ptr, tl, ptr, env, i32)
70
+DEF_HELPER_6(vror_vx_h, void, ptr, ptr, tl, ptr, env, i32)
31
+DEF_HELPER_6(vmerge_vxm_d, void, ptr, ptr, tl, ptr, env, i32)
71
+DEF_HELPER_6(vror_vx_w, void, ptr, ptr, tl, ptr, env, i32)
32
+DEF_HELPER_4(vmv_v_v_b, void, ptr, ptr, env, i32)
72
+DEF_HELPER_6(vror_vx_d, void, ptr, ptr, tl, ptr, env, i32)
33
+DEF_HELPER_4(vmv_v_v_h, void, ptr, ptr, env, i32)
73
+
34
+DEF_HELPER_4(vmv_v_v_w, void, ptr, ptr, env, i32)
74
+DEF_HELPER_6(vrol_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
35
+DEF_HELPER_4(vmv_v_v_d, void, ptr, ptr, env, i32)
75
+DEF_HELPER_6(vrol_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
36
+DEF_HELPER_4(vmv_v_x_b, void, ptr, i64, env, i32)
76
+DEF_HELPER_6(vrol_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
37
+DEF_HELPER_4(vmv_v_x_h, void, ptr, i64, env, i32)
77
+DEF_HELPER_6(vrol_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
38
+DEF_HELPER_4(vmv_v_x_w, void, ptr, i64, env, i32)
78
+
39
+DEF_HELPER_4(vmv_v_x_d, void, ptr, i64, env, i32)
79
+DEF_HELPER_6(vrol_vx_b, void, ptr, ptr, tl, ptr, env, i32)
80
+DEF_HELPER_6(vrol_vx_h, void, ptr, ptr, tl, ptr, env, i32)
81
+DEF_HELPER_6(vrol_vx_w, void, ptr, ptr, tl, ptr, env, i32)
82
+DEF_HELPER_6(vrol_vx_d, void, ptr, ptr, tl, ptr, env, i32)
83
+
84
+DEF_HELPER_5(vrev8_v_b, void, ptr, ptr, ptr, env, i32)
85
+DEF_HELPER_5(vrev8_v_h, void, ptr, ptr, ptr, env, i32)
86
+DEF_HELPER_5(vrev8_v_w, void, ptr, ptr, ptr, env, i32)
87
+DEF_HELPER_5(vrev8_v_d, void, ptr, ptr, ptr, env, i32)
88
+DEF_HELPER_5(vbrev8_v_b, void, ptr, ptr, ptr, env, i32)
89
+DEF_HELPER_5(vbrev8_v_h, void, ptr, ptr, ptr, env, i32)
90
+DEF_HELPER_5(vbrev8_v_w, void, ptr, ptr, ptr, env, i32)
91
+DEF_HELPER_5(vbrev8_v_d, void, ptr, ptr, ptr, env, i32)
92
+DEF_HELPER_5(vbrev_v_b, void, ptr, ptr, ptr, env, i32)
93
+DEF_HELPER_5(vbrev_v_h, void, ptr, ptr, ptr, env, i32)
94
+DEF_HELPER_5(vbrev_v_w, void, ptr, ptr, ptr, env, i32)
95
+DEF_HELPER_5(vbrev_v_d, void, ptr, ptr, ptr, env, i32)
96
+
97
+DEF_HELPER_5(vclz_v_b, void, ptr, ptr, ptr, env, i32)
98
+DEF_HELPER_5(vclz_v_h, void, ptr, ptr, ptr, env, i32)
99
+DEF_HELPER_5(vclz_v_w, void, ptr, ptr, ptr, env, i32)
100
+DEF_HELPER_5(vclz_v_d, void, ptr, ptr, ptr, env, i32)
101
+DEF_HELPER_5(vctz_v_b, void, ptr, ptr, ptr, env, i32)
102
+DEF_HELPER_5(vctz_v_h, void, ptr, ptr, ptr, env, i32)
103
+DEF_HELPER_5(vctz_v_w, void, ptr, ptr, ptr, env, i32)
104
+DEF_HELPER_5(vctz_v_d, void, ptr, ptr, ptr, env, i32)
105
+DEF_HELPER_5(vcpop_v_b, void, ptr, ptr, ptr, env, i32)
106
+DEF_HELPER_5(vcpop_v_h, void, ptr, ptr, ptr, env, i32)
107
+DEF_HELPER_5(vcpop_v_w, void, ptr, ptr, ptr, env, i32)
108
+DEF_HELPER_5(vcpop_v_d, void, ptr, ptr, ptr, env, i32)
109
+
110
+DEF_HELPER_6(vwsll_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
111
+DEF_HELPER_6(vwsll_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
112
+DEF_HELPER_6(vwsll_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
113
+DEF_HELPER_6(vwsll_vx_b, void, ptr, ptr, tl, ptr, env, i32)
114
+DEF_HELPER_6(vwsll_vx_h, void, ptr, ptr, tl, ptr, env, i32)
115
+DEF_HELPER_6(vwsll_vx_w, void, ptr, ptr, tl, ptr, env, i32)
116
+
117
+DEF_HELPER_6(vandn_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
118
+DEF_HELPER_6(vandn_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
119
+DEF_HELPER_6(vandn_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
120
+DEF_HELPER_6(vandn_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
121
+DEF_HELPER_6(vandn_vx_b, void, ptr, ptr, tl, ptr, env, i32)
122
+DEF_HELPER_6(vandn_vx_h, void, ptr, ptr, tl, ptr, env, i32)
123
+DEF_HELPER_6(vandn_vx_w, void, ptr, ptr, tl, ptr, env, i32)
124
+DEF_HELPER_6(vandn_vx_d, void, ptr, ptr, tl, ptr, env, i32)
40
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
125
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
41
index XXXXXXX..XXXXXXX 100644
126
index XXXXXXX..XXXXXXX 100644
42
--- a/target/riscv/insn32.decode
127
--- a/target/riscv/insn32.decode
43
+++ b/target/riscv/insn32.decode
128
+++ b/target/riscv/insn32.decode
44
@@ -XXX,XX +XXX,XX @@
129
@@ -XXX,XX +XXX,XX @@
45
@r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd
130
%imm_u 12:s20 !function=ex_shift_12
131
%imm_bs 30:2 !function=ex_shift_3
132
%imm_rnum 20:4
133
+%imm_z6 26:1 15:5
134
135
# Argument sets:
136
&empty
137
@@ -XXX,XX +XXX,XX @@
46
@r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd
138
@r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd
47
@r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd
139
@r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd
48
+@r_vm_0 ...... . ..... ..... ... ..... ....... &rmrr vm=0 %rs2 %rs1 %rd
140
@r_vm_0 ...... . ..... ..... ... ..... ....... &rmrr vm=0 %rs2 %rs1 %rd
49
@r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... &rwdvm %rs2 %rs1 %rd
141
+@r2_zimm6 ..... . vm:1 ..... ..... ... ..... ....... &rmrr %rs2 rs1=%imm_z6 %rd
50
@r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd
142
@r2_zimm11 . zimm:11 ..... ... ..... ....... %rs1 %rd
51
143
@r2_zimm10 .. zimm:10 ..... ... ..... ....... %rs1 %rd
52
@@ -XXX,XX +XXX,XX @@ vwmacc_vx 111101 . ..... ..... 110 ..... 1010111 @r_vm
144
@r2_s ....... ..... ..... ... ..... ....... %rs2 %rs1
53
vwmaccsu_vv 111110 . ..... ..... 010 ..... 1010111 @r_vm
145
@@ -XXX,XX +XXX,XX @@ vclmul_vv 001100 . ..... ..... 010 ..... 1010111 @r_vm
54
vwmaccsu_vx 111110 . ..... ..... 110 ..... 1010111 @r_vm
146
vclmul_vx 001100 . ..... ..... 110 ..... 1010111 @r_vm
55
vwmaccus_vx 111111 . ..... ..... 110 ..... 1010111 @r_vm
147
vclmulh_vv 001101 . ..... ..... 010 ..... 1010111 @r_vm
56
+vmv_v_v 010111 1 00000 ..... 000 ..... 1010111 @r2
148
vclmulh_vx 001101 . ..... ..... 110 ..... 1010111 @r_vm
57
+vmv_v_x 010111 1 00000 ..... 100 ..... 1010111 @r2
149
+
58
+vmv_v_i 010111 1 00000 ..... 011 ..... 1010111 @r2
150
+# *** Zvbb vector crypto extension ***
59
+vmerge_vvm 010111 0 ..... ..... 000 ..... 1010111 @r_vm_0
151
+vrol_vv 010101 . ..... ..... 000 ..... 1010111 @r_vm
60
+vmerge_vxm 010111 0 ..... ..... 100 ..... 1010111 @r_vm_0
152
+vrol_vx 010101 . ..... ..... 100 ..... 1010111 @r_vm
61
+vmerge_vim 010111 0 ..... ..... 011 ..... 1010111 @r_vm_0
153
+vror_vv 010100 . ..... ..... 000 ..... 1010111 @r_vm
62
154
+vror_vx 010100 . ..... ..... 100 ..... 1010111 @r_vm
63
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
155
+vror_vi 01010. . ..... ..... 011 ..... 1010111 @r2_zimm6
64
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
156
+vbrev8_v 010010 . ..... 01000 010 ..... 1010111 @r2_vm
65
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
157
+vrev8_v 010010 . ..... 01001 010 ..... 1010111 @r2_vm
158
+vandn_vv 000001 . ..... ..... 000 ..... 1010111 @r_vm
159
+vandn_vx 000001 . ..... ..... 100 ..... 1010111 @r_vm
160
+vbrev_v 010010 . ..... 01010 010 ..... 1010111 @r2_vm
161
+vclz_v 010010 . ..... 01100 010 ..... 1010111 @r2_vm
162
+vctz_v 010010 . ..... 01101 010 ..... 1010111 @r2_vm
163
+vcpop_v 010010 . ..... 01110 010 ..... 1010111 @r2_vm
164
+vwsll_vv 110101 . ..... ..... 000 ..... 1010111 @r_vm
165
+vwsll_vx 110101 . ..... ..... 100 ..... 1010111 @r_vm
166
+vwsll_vi 110101 . ..... ..... 011 ..... 1010111 @r_vm
167
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
66
index XXXXXXX..XXXXXXX 100644
168
index XXXXXXX..XXXXXXX 100644
67
--- a/target/riscv/insn_trans/trans_rvv.inc.c
169
--- a/target/riscv/cpu.c
68
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
170
+++ b/target/riscv/cpu.c
69
@@ -XXX,XX +XXX,XX @@ GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx)
171
@@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = {
70
GEN_OPIVX_WIDEN_TRANS(vwmacc_vx)
172
ISA_EXT_DATA_ENTRY(zksed, PRIV_VERSION_1_12_0, ext_zksed),
71
GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx)
173
ISA_EXT_DATA_ENTRY(zksh, PRIV_VERSION_1_12_0, ext_zksh),
72
GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx)
174
ISA_EXT_DATA_ENTRY(zkt, PRIV_VERSION_1_12_0, ext_zkt),
73
+
175
+ ISA_EXT_DATA_ENTRY(zvbb, PRIV_VERSION_1_12_0, ext_zvbb),
74
+/* Vector Integer Merge and Move Instructions */
176
ISA_EXT_DATA_ENTRY(zvbc, PRIV_VERSION_1_12_0, ext_zvbc),
75
+static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a)
177
ISA_EXT_DATA_ENTRY(zve32f, PRIV_VERSION_1_10_0, ext_zve32f),
178
ISA_EXT_DATA_ENTRY(zve64f, PRIV_VERSION_1_10_0, ext_zve64f),
179
@@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
180
return;
181
}
182
183
+ /*
184
+ * In principle Zve*x would also suffice here, were they supported
185
+ * in qemu
186
+ */
187
+ if (cpu->cfg.ext_zvbb && !cpu->cfg.ext_zve32f) {
188
+ error_setg(errp,
189
+ "Vector crypto extensions require V or Zve* extensions");
190
+ return;
191
+ }
192
+
193
if (cpu->cfg.ext_zvbc && !cpu->cfg.ext_zve64f) {
194
error_setg(errp, "Zvbc extension requires V or Zve64{f,d} extensions");
195
return;
196
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = {
197
DEFINE_PROP_BOOL("x-zvfbfwma", RISCVCPU, cfg.ext_zvfbfwma, false),
198
199
/* Vector cryptography extensions */
200
+ DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false),
201
DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false),
202
203
DEFINE_PROP_END_OF_LIST(),
204
diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c
205
index XXXXXXX..XXXXXXX 100644
206
--- a/target/riscv/vcrypto_helper.c
207
+++ b/target/riscv/vcrypto_helper.c
208
@@ -XXX,XX +XXX,XX @@
209
#include "qemu/osdep.h"
210
#include "qemu/host-utils.h"
211
#include "qemu/bitops.h"
212
+#include "qemu/bswap.h"
213
#include "cpu.h"
214
#include "exec/memop.h"
215
#include "exec/exec-all.h"
216
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vclmulh_vv, OP_UUU_D, H8, H8, H8, clmulh64)
217
GEN_VEXT_VV(vclmulh_vv, 8)
218
RVVCALL(OPIVX2, vclmulh_vx, OP_UUU_D, H8, H8, clmulh64)
219
GEN_VEXT_VX(vclmulh_vx, 8)
220
+
221
+RVVCALL(OPIVV2, vror_vv_b, OP_UUU_B, H1, H1, H1, ror8)
222
+RVVCALL(OPIVV2, vror_vv_h, OP_UUU_H, H2, H2, H2, ror16)
223
+RVVCALL(OPIVV2, vror_vv_w, OP_UUU_W, H4, H4, H4, ror32)
224
+RVVCALL(OPIVV2, vror_vv_d, OP_UUU_D, H8, H8, H8, ror64)
225
+GEN_VEXT_VV(vror_vv_b, 1)
226
+GEN_VEXT_VV(vror_vv_h, 2)
227
+GEN_VEXT_VV(vror_vv_w, 4)
228
+GEN_VEXT_VV(vror_vv_d, 8)
229
+
230
+RVVCALL(OPIVX2, vror_vx_b, OP_UUU_B, H1, H1, ror8)
231
+RVVCALL(OPIVX2, vror_vx_h, OP_UUU_H, H2, H2, ror16)
232
+RVVCALL(OPIVX2, vror_vx_w, OP_UUU_W, H4, H4, ror32)
233
+RVVCALL(OPIVX2, vror_vx_d, OP_UUU_D, H8, H8, ror64)
234
+GEN_VEXT_VX(vror_vx_b, 1)
235
+GEN_VEXT_VX(vror_vx_h, 2)
236
+GEN_VEXT_VX(vror_vx_w, 4)
237
+GEN_VEXT_VX(vror_vx_d, 8)
238
+
239
+RVVCALL(OPIVV2, vrol_vv_b, OP_UUU_B, H1, H1, H1, rol8)
240
+RVVCALL(OPIVV2, vrol_vv_h, OP_UUU_H, H2, H2, H2, rol16)
241
+RVVCALL(OPIVV2, vrol_vv_w, OP_UUU_W, H4, H4, H4, rol32)
242
+RVVCALL(OPIVV2, vrol_vv_d, OP_UUU_D, H8, H8, H8, rol64)
243
+GEN_VEXT_VV(vrol_vv_b, 1)
244
+GEN_VEXT_VV(vrol_vv_h, 2)
245
+GEN_VEXT_VV(vrol_vv_w, 4)
246
+GEN_VEXT_VV(vrol_vv_d, 8)
247
+
248
+RVVCALL(OPIVX2, vrol_vx_b, OP_UUU_B, H1, H1, rol8)
249
+RVVCALL(OPIVX2, vrol_vx_h, OP_UUU_H, H2, H2, rol16)
250
+RVVCALL(OPIVX2, vrol_vx_w, OP_UUU_W, H4, H4, rol32)
251
+RVVCALL(OPIVX2, vrol_vx_d, OP_UUU_D, H8, H8, rol64)
252
+GEN_VEXT_VX(vrol_vx_b, 1)
253
+GEN_VEXT_VX(vrol_vx_h, 2)
254
+GEN_VEXT_VX(vrol_vx_w, 4)
255
+GEN_VEXT_VX(vrol_vx_d, 8)
256
+
257
+static uint64_t brev8(uint64_t val)
76
+{
258
+{
77
+ if (vext_check_isa_ill(s) &&
259
+ val = ((val & 0x5555555555555555ull) << 1) |
78
+ vext_check_reg(s, a->rd, false) &&
260
+ ((val & 0xAAAAAAAAAAAAAAAAull) >> 1);
79
+ vext_check_reg(s, a->rs1, false)) {
261
+ val = ((val & 0x3333333333333333ull) << 2) |
80
+
262
+ ((val & 0xCCCCCCCCCCCCCCCCull) >> 2);
81
+ if (s->vl_eq_vlmax) {
263
+ val = ((val & 0x0F0F0F0F0F0F0F0Full) << 4) |
82
+ tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd),
264
+ ((val & 0xF0F0F0F0F0F0F0F0ull) >> 4);
83
+ vreg_ofs(s, a->rs1),
265
+
84
+ MAXSZ(s), MAXSZ(s));
266
+ return val;
85
+ } else {
86
+ uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
87
+ static gen_helper_gvec_2_ptr * const fns[4] = {
88
+ gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h,
89
+ gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d,
90
+ };
91
+ TCGLabel *over = gen_new_label();
92
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
93
+
94
+ tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1),
95
+ cpu_env, 0, s->vlen / 8, data, fns[s->sew]);
96
+ gen_set_label(over);
97
+ }
98
+ return true;
99
+ }
100
+ return false;
101
+}
267
+}
102
+
268
+
103
+typedef void gen_helper_vmv_vx(TCGv_ptr, TCGv_i64, TCGv_env, TCGv_i32);
269
+RVVCALL(OPIVV1, vbrev8_v_b, OP_UU_B, H1, H1, brev8)
104
+static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a)
270
+RVVCALL(OPIVV1, vbrev8_v_h, OP_UU_H, H2, H2, brev8)
271
+RVVCALL(OPIVV1, vbrev8_v_w, OP_UU_W, H4, H4, brev8)
272
+RVVCALL(OPIVV1, vbrev8_v_d, OP_UU_D, H8, H8, brev8)
273
+GEN_VEXT_V(vbrev8_v_b, 1)
274
+GEN_VEXT_V(vbrev8_v_h, 2)
275
+GEN_VEXT_V(vbrev8_v_w, 4)
276
+GEN_VEXT_V(vbrev8_v_d, 8)
277
+
278
+#define DO_IDENTITY(a) (a)
279
+RVVCALL(OPIVV1, vrev8_v_b, OP_UU_B, H1, H1, DO_IDENTITY)
280
+RVVCALL(OPIVV1, vrev8_v_h, OP_UU_H, H2, H2, bswap16)
281
+RVVCALL(OPIVV1, vrev8_v_w, OP_UU_W, H4, H4, bswap32)
282
+RVVCALL(OPIVV1, vrev8_v_d, OP_UU_D, H8, H8, bswap64)
283
+GEN_VEXT_V(vrev8_v_b, 1)
284
+GEN_VEXT_V(vrev8_v_h, 2)
285
+GEN_VEXT_V(vrev8_v_w, 4)
286
+GEN_VEXT_V(vrev8_v_d, 8)
287
+
288
+#define DO_ANDN(a, b) ((a) & ~(b))
289
+RVVCALL(OPIVV2, vandn_vv_b, OP_UUU_B, H1, H1, H1, DO_ANDN)
290
+RVVCALL(OPIVV2, vandn_vv_h, OP_UUU_H, H2, H2, H2, DO_ANDN)
291
+RVVCALL(OPIVV2, vandn_vv_w, OP_UUU_W, H4, H4, H4, DO_ANDN)
292
+RVVCALL(OPIVV2, vandn_vv_d, OP_UUU_D, H8, H8, H8, DO_ANDN)
293
+GEN_VEXT_VV(vandn_vv_b, 1)
294
+GEN_VEXT_VV(vandn_vv_h, 2)
295
+GEN_VEXT_VV(vandn_vv_w, 4)
296
+GEN_VEXT_VV(vandn_vv_d, 8)
297
+
298
+RVVCALL(OPIVX2, vandn_vx_b, OP_UUU_B, H1, H1, DO_ANDN)
299
+RVVCALL(OPIVX2, vandn_vx_h, OP_UUU_H, H2, H2, DO_ANDN)
300
+RVVCALL(OPIVX2, vandn_vx_w, OP_UUU_W, H4, H4, DO_ANDN)
301
+RVVCALL(OPIVX2, vandn_vx_d, OP_UUU_D, H8, H8, DO_ANDN)
302
+GEN_VEXT_VX(vandn_vx_b, 1)
303
+GEN_VEXT_VX(vandn_vx_h, 2)
304
+GEN_VEXT_VX(vandn_vx_w, 4)
305
+GEN_VEXT_VX(vandn_vx_d, 8)
306
+
307
+RVVCALL(OPIVV1, vbrev_v_b, OP_UU_B, H1, H1, revbit8)
308
+RVVCALL(OPIVV1, vbrev_v_h, OP_UU_H, H2, H2, revbit16)
309
+RVVCALL(OPIVV1, vbrev_v_w, OP_UU_W, H4, H4, revbit32)
310
+RVVCALL(OPIVV1, vbrev_v_d, OP_UU_D, H8, H8, revbit64)
311
+GEN_VEXT_V(vbrev_v_b, 1)
312
+GEN_VEXT_V(vbrev_v_h, 2)
313
+GEN_VEXT_V(vbrev_v_w, 4)
314
+GEN_VEXT_V(vbrev_v_d, 8)
315
+
316
+RVVCALL(OPIVV1, vclz_v_b, OP_UU_B, H1, H1, clz8)
317
+RVVCALL(OPIVV1, vclz_v_h, OP_UU_H, H2, H2, clz16)
318
+RVVCALL(OPIVV1, vclz_v_w, OP_UU_W, H4, H4, clz32)
319
+RVVCALL(OPIVV1, vclz_v_d, OP_UU_D, H8, H8, clz64)
320
+GEN_VEXT_V(vclz_v_b, 1)
321
+GEN_VEXT_V(vclz_v_h, 2)
322
+GEN_VEXT_V(vclz_v_w, 4)
323
+GEN_VEXT_V(vclz_v_d, 8)
324
+
325
+RVVCALL(OPIVV1, vctz_v_b, OP_UU_B, H1, H1, ctz8)
326
+RVVCALL(OPIVV1, vctz_v_h, OP_UU_H, H2, H2, ctz16)
327
+RVVCALL(OPIVV1, vctz_v_w, OP_UU_W, H4, H4, ctz32)
328
+RVVCALL(OPIVV1, vctz_v_d, OP_UU_D, H8, H8, ctz64)
329
+GEN_VEXT_V(vctz_v_b, 1)
330
+GEN_VEXT_V(vctz_v_h, 2)
331
+GEN_VEXT_V(vctz_v_w, 4)
332
+GEN_VEXT_V(vctz_v_d, 8)
333
+
334
+RVVCALL(OPIVV1, vcpop_v_b, OP_UU_B, H1, H1, ctpop8)
335
+RVVCALL(OPIVV1, vcpop_v_h, OP_UU_H, H2, H2, ctpop16)
336
+RVVCALL(OPIVV1, vcpop_v_w, OP_UU_W, H4, H4, ctpop32)
337
+RVVCALL(OPIVV1, vcpop_v_d, OP_UU_D, H8, H8, ctpop64)
338
+GEN_VEXT_V(vcpop_v_b, 1)
339
+GEN_VEXT_V(vcpop_v_h, 2)
340
+GEN_VEXT_V(vcpop_v_w, 4)
341
+GEN_VEXT_V(vcpop_v_d, 8)
342
+
343
+#define DO_SLL(N, M) (N << (M & (sizeof(N) * 8 - 1)))
344
+RVVCALL(OPIVV2, vwsll_vv_b, WOP_UUU_B, H2, H1, H1, DO_SLL)
345
+RVVCALL(OPIVV2, vwsll_vv_h, WOP_UUU_H, H4, H2, H2, DO_SLL)
346
+RVVCALL(OPIVV2, vwsll_vv_w, WOP_UUU_W, H8, H4, H4, DO_SLL)
347
+GEN_VEXT_VV(vwsll_vv_b, 2)
348
+GEN_VEXT_VV(vwsll_vv_h, 4)
349
+GEN_VEXT_VV(vwsll_vv_w, 8)
350
+
351
+RVVCALL(OPIVX2, vwsll_vx_b, WOP_UUU_B, H2, H1, DO_SLL)
352
+RVVCALL(OPIVX2, vwsll_vx_h, WOP_UUU_H, H4, H2, DO_SLL)
353
+RVVCALL(OPIVX2, vwsll_vx_w, WOP_UUU_W, H8, H4, DO_SLL)
354
+GEN_VEXT_VX(vwsll_vx_b, 2)
355
+GEN_VEXT_VX(vwsll_vx_h, 4)
356
+GEN_VEXT_VX(vwsll_vx_w, 8)
357
diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc
358
index XXXXXXX..XXXXXXX 100644
359
--- a/target/riscv/insn_trans/trans_rvvk.c.inc
360
+++ b/target/riscv/insn_trans/trans_rvvk.c.inc
361
@@ -XXX,XX +XXX,XX @@ static bool vclmul_vx_check(DisasContext *s, arg_rmrr *a)
362
363
GEN_VX_MASKED_TRANS(vclmul_vx, vclmul_vx_check)
364
GEN_VX_MASKED_TRANS(vclmulh_vx, vclmul_vx_check)
365
+
366
+/*
367
+ * Zvbb
368
+ */
369
+
370
+#define GEN_OPIVI_GVEC_TRANS_CHECK(NAME, IMM_MODE, OPIVX, SUF, CHECK) \
371
+ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
372
+ { \
373
+ if (CHECK(s, a)) { \
374
+ static gen_helper_opivx *const fns[4] = { \
375
+ gen_helper_##OPIVX##_b, \
376
+ gen_helper_##OPIVX##_h, \
377
+ gen_helper_##OPIVX##_w, \
378
+ gen_helper_##OPIVX##_d, \
379
+ }; \
380
+ return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew], \
381
+ IMM_MODE); \
382
+ } \
383
+ return false; \
384
+ }
385
+
386
+#define GEN_OPIVV_GVEC_TRANS_CHECK(NAME, SUF, CHECK) \
387
+ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
388
+ { \
389
+ if (CHECK(s, a)) { \
390
+ static gen_helper_gvec_4_ptr *const fns[4] = { \
391
+ gen_helper_##NAME##_b, \
392
+ gen_helper_##NAME##_h, \
393
+ gen_helper_##NAME##_w, \
394
+ gen_helper_##NAME##_d, \
395
+ }; \
396
+ return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \
397
+ } \
398
+ return false; \
399
+ }
400
+
401
+#define GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(NAME, SUF, CHECK) \
402
+ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
403
+ { \
404
+ if (CHECK(s, a)) { \
405
+ static gen_helper_opivx *const fns[4] = { \
406
+ gen_helper_##NAME##_b, \
407
+ gen_helper_##NAME##_h, \
408
+ gen_helper_##NAME##_w, \
409
+ gen_helper_##NAME##_d, \
410
+ }; \
411
+ return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, \
412
+ fns[s->sew]); \
413
+ } \
414
+ return false; \
415
+ }
416
+
417
+static bool zvbb_vv_check(DisasContext *s, arg_rmrr *a)
105
+{
418
+{
106
+ if (vext_check_isa_ill(s) &&
419
+ return opivv_check(s, a) && s->cfg_ptr->ext_zvbb == true;
107
+ vext_check_reg(s, a->rd, false)) {
108
+
109
+ TCGv s1;
110
+ TCGLabel *over = gen_new_label();
111
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
112
+
113
+ s1 = tcg_temp_new();
114
+ gen_get_gpr(s1, a->rs1);
115
+
116
+ if (s->vl_eq_vlmax) {
117
+ tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd),
118
+ MAXSZ(s), MAXSZ(s), s1);
119
+ } else {
120
+ TCGv_i32 desc ;
121
+ TCGv_i64 s1_i64 = tcg_temp_new_i64();
122
+ TCGv_ptr dest = tcg_temp_new_ptr();
123
+ uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
124
+ static gen_helper_vmv_vx * const fns[4] = {
125
+ gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h,
126
+ gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d,
127
+ };
128
+
129
+ tcg_gen_ext_tl_i64(s1_i64, s1);
130
+ desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
131
+ tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, a->rd));
132
+ fns[s->sew](dest, s1_i64, cpu_env, desc);
133
+
134
+ tcg_temp_free_ptr(dest);
135
+ tcg_temp_free_i32(desc);
136
+ tcg_temp_free_i64(s1_i64);
137
+ }
138
+
139
+ tcg_temp_free(s1);
140
+ gen_set_label(over);
141
+ return true;
142
+ }
143
+ return false;
144
+}
420
+}
145
+
421
+
146
+static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a)
422
+static bool zvbb_vx_check(DisasContext *s, arg_rmrr *a)
147
+{
423
+{
148
+ if (vext_check_isa_ill(s) &&
424
+ return opivx_check(s, a) && s->cfg_ptr->ext_zvbb == true;
149
+ vext_check_reg(s, a->rd, false)) {
150
+
151
+ int64_t simm = sextract64(a->rs1, 0, 5);
152
+ if (s->vl_eq_vlmax) {
153
+ tcg_gen_gvec_dup_imm(s->sew, vreg_ofs(s, a->rd),
154
+ MAXSZ(s), MAXSZ(s), simm);
155
+ } else {
156
+ TCGv_i32 desc;
157
+ TCGv_i64 s1;
158
+ TCGv_ptr dest;
159
+ uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
160
+ static gen_helper_vmv_vx * const fns[4] = {
161
+ gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h,
162
+ gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d,
163
+ };
164
+ TCGLabel *over = gen_new_label();
165
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
166
+
167
+ s1 = tcg_const_i64(simm);
168
+ dest = tcg_temp_new_ptr();
169
+ desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
170
+ tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, a->rd));
171
+ fns[s->sew](dest, s1, cpu_env, desc);
172
+
173
+ tcg_temp_free_ptr(dest);
174
+ tcg_temp_free_i32(desc);
175
+ tcg_temp_free_i64(s1);
176
+ gen_set_label(over);
177
+ }
178
+ return true;
179
+ }
180
+ return false;
181
+}
425
+}
182
+
426
+
183
+GEN_OPIVV_TRANS(vmerge_vvm, opivv_vadc_check)
427
+/* vrol.v[vx] */
184
+GEN_OPIVX_TRANS(vmerge_vxm, opivx_vadc_check)
428
+GEN_OPIVV_GVEC_TRANS_CHECK(vrol_vv, rotlv, zvbb_vv_check)
185
+GEN_OPIVI_TRANS(vmerge_vim, 0, vmerge_vxm, opivx_vadc_check)
429
+GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(vrol_vx, rotls, zvbb_vx_check)
186
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
430
+
187
index XXXXXXX..XXXXXXX 100644
431
+/* vror.v[vxi] */
188
--- a/target/riscv/vector_helper.c
432
+GEN_OPIVV_GVEC_TRANS_CHECK(vror_vv, rotrv, zvbb_vv_check)
189
+++ b/target/riscv/vector_helper.c
433
+GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(vror_vx, rotrs, zvbb_vx_check)
190
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8, clearq)
434
+GEN_OPIVI_GVEC_TRANS_CHECK(vror_vi, IMM_TRUNC_SEW, vror_vx, rotri, zvbb_vx_check)
191
GEN_VEXT_VX(vwmaccus_vx_b, 1, 2, clearh)
435
+
192
GEN_VEXT_VX(vwmaccus_vx_h, 2, 4, clearl)
436
+#define GEN_OPIVX_GVEC_TRANS_CHECK(NAME, SUF, CHECK) \
193
GEN_VEXT_VX(vwmaccus_vx_w, 4, 8, clearq)
437
+ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
194
+
438
+ { \
195
+/* Vector Integer Merge and Move Instructions */
439
+ if (CHECK(s, a)) { \
196
+#define GEN_VEXT_VMV_VV(NAME, ETYPE, H, CLEAR_FN) \
440
+ static gen_helper_opivx *const fns[4] = { \
197
+void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
441
+ gen_helper_##NAME##_b, \
198
+ uint32_t desc) \
442
+ gen_helper_##NAME##_h, \
199
+{ \
443
+ gen_helper_##NAME##_w, \
200
+ uint32_t vl = env->vl; \
444
+ gen_helper_##NAME##_d, \
201
+ uint32_t esz = sizeof(ETYPE); \
445
+ }; \
202
+ uint32_t vlmax = vext_maxsz(desc) / esz; \
446
+ return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \
203
+ uint32_t i; \
447
+ } \
204
+ \
448
+ return false; \
205
+ for (i = 0; i < vl; i++) { \
449
+ }
206
+ ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
450
+
207
+ *((ETYPE *)vd + H(i)) = s1; \
451
+/* vandn.v[vx] */
208
+ } \
452
+GEN_OPIVV_GVEC_TRANS_CHECK(vandn_vv, andc, zvbb_vv_check)
209
+ CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \
453
+GEN_OPIVX_GVEC_TRANS_CHECK(vandn_vx, andcs, zvbb_vx_check)
454
+
455
+#define GEN_OPIV_TRANS(NAME, CHECK) \
456
+ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
457
+ { \
458
+ if (CHECK(s, a)) { \
459
+ uint32_t data = 0; \
460
+ static gen_helper_gvec_3_ptr *const fns[4] = { \
461
+ gen_helper_##NAME##_b, \
462
+ gen_helper_##NAME##_h, \
463
+ gen_helper_##NAME##_w, \
464
+ gen_helper_##NAME##_d, \
465
+ }; \
466
+ TCGLabel *over = gen_new_label(); \
467
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
468
+ \
469
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
470
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
471
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
472
+ data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \
473
+ data = FIELD_DP32(data, VDATA, VMA, s->vma); \
474
+ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
475
+ vreg_ofs(s, a->rs2), cpu_env, \
476
+ s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, \
477
+ data, fns[s->sew]); \
478
+ mark_vs_dirty(s); \
479
+ gen_set_label(over); \
480
+ return true; \
481
+ } \
482
+ return false; \
483
+ }
484
+
485
+static bool zvbb_opiv_check(DisasContext *s, arg_rmr *a)
486
+{
487
+ return s->cfg_ptr->ext_zvbb == true &&
488
+ require_rvv(s) &&
489
+ vext_check_isa_ill(s) &&
490
+ vext_check_ss(s, a->rd, a->rs2, a->vm);
210
+}
491
+}
211
+
492
+
212
+GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1, clearb)
493
+GEN_OPIV_TRANS(vbrev8_v, zvbb_opiv_check)
213
+GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2, clearh)
494
+GEN_OPIV_TRANS(vrev8_v, zvbb_opiv_check)
214
+GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4, clearl)
495
+GEN_OPIV_TRANS(vbrev_v, zvbb_opiv_check)
215
+GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8, clearq)
496
+GEN_OPIV_TRANS(vclz_v, zvbb_opiv_check)
216
+
497
+GEN_OPIV_TRANS(vctz_v, zvbb_opiv_check)
217
+#define GEN_VEXT_VMV_VX(NAME, ETYPE, H, CLEAR_FN) \
498
+GEN_OPIV_TRANS(vcpop_v, zvbb_opiv_check)
218
+void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
499
+
219
+ uint32_t desc) \
500
+static bool vwsll_vv_check(DisasContext *s, arg_rmrr *a)
220
+{ \
501
+{
221
+ uint32_t vl = env->vl; \
502
+ return s->cfg_ptr->ext_zvbb && opivv_widen_check(s, a);
222
+ uint32_t esz = sizeof(ETYPE); \
223
+ uint32_t vlmax = vext_maxsz(desc) / esz; \
224
+ uint32_t i; \
225
+ \
226
+ for (i = 0; i < vl; i++) { \
227
+ *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
228
+ } \
229
+ CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \
230
+}
503
+}
231
+
504
+
232
+GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1, clearb)
505
+static bool vwsll_vx_check(DisasContext *s, arg_rmrr *a)
233
+GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2, clearh)
506
+{
234
+GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4, clearl)
507
+ return s->cfg_ptr->ext_zvbb && opivx_widen_check(s, a);
235
+GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8, clearq)
236
+
237
+#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H, CLEAR_FN) \
238
+void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
239
+ CPURISCVState *env, uint32_t desc) \
240
+{ \
241
+ uint32_t mlen = vext_mlen(desc); \
242
+ uint32_t vl = env->vl; \
243
+ uint32_t esz = sizeof(ETYPE); \
244
+ uint32_t vlmax = vext_maxsz(desc) / esz; \
245
+ uint32_t i; \
246
+ \
247
+ for (i = 0; i < vl; i++) { \
248
+ ETYPE *vt = (!vext_elem_mask(v0, mlen, i) ? vs2 : vs1); \
249
+ *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
250
+ } \
251
+ CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \
252
+}
508
+}
253
+
509
+
254
+GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1, clearb)
510
+/* OPIVI without GVEC IR */
255
+GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2, clearh)
511
+#define GEN_OPIVI_WIDEN_TRANS(NAME, IMM_MODE, OPIVX, CHECK) \
256
+GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4, clearl)
512
+ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
257
+GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8, clearq)
513
+ { \
258
+
514
+ if (CHECK(s, a)) { \
259
+#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H, CLEAR_FN) \
515
+ static gen_helper_opivx *const fns[3] = { \
260
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
516
+ gen_helper_##OPIVX##_b, \
261
+ void *vs2, CPURISCVState *env, uint32_t desc) \
517
+ gen_helper_##OPIVX##_h, \
262
+{ \
518
+ gen_helper_##OPIVX##_w, \
263
+ uint32_t mlen = vext_mlen(desc); \
519
+ }; \
264
+ uint32_t vl = env->vl; \
520
+ return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s, \
265
+ uint32_t esz = sizeof(ETYPE); \
521
+ IMM_MODE); \
266
+ uint32_t vlmax = vext_maxsz(desc) / esz; \
522
+ } \
267
+ uint32_t i; \
523
+ return false; \
268
+ \
524
+ }
269
+ for (i = 0; i < vl; i++) { \
525
+
270
+ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
526
+GEN_OPIVV_WIDEN_TRANS(vwsll_vv, vwsll_vv_check)
271
+ ETYPE d = (!vext_elem_mask(v0, mlen, i) ? s2 : \
527
+GEN_OPIVX_WIDEN_TRANS(vwsll_vx, vwsll_vx_check)
272
+ (ETYPE)(target_long)s1); \
528
+GEN_OPIVI_WIDEN_TRANS(vwsll_vi, IMM_ZX, vwsll_vx, vwsll_vx_check)
273
+ *((ETYPE *)vd + H(i)) = d; \
274
+ } \
275
+ CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \
276
+}
277
+
278
+GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1, clearb)
279
+GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2, clearh)
280
+GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4, clearl)
281
+GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8, clearq)
282
--
529
--
283
2.27.0
530
2.41.0
284
285
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
2
2
3
Vector strided operations access the first memory element at the base address,
3
This commit adds support for the Zvkned vector-crypto extension, which
4
and then access subsequent elements at address increments given by the byte
4
consists of the following instructions:
5
offset contained in the x register specified by rs2.
6
5
7
Vector unit-stride operations access elements stored contiguously in memory
6
* vaesef.[vv,vs]
8
starting from the base effective address. It can been seen as a special
7
* vaesdf.[vv,vs]
9
case of strided operations.
8
* vaesdm.[vv,vs]
9
* vaesz.vs
10
* vaesem.[vv,vs]
11
* vaeskf1.vi
12
* vaeskf2.vi
10
13
11
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
14
Translation functions are defined in
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
`target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in
13
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
16
`target/riscv/vcrypto_helper.c`.
14
Message-id: 20200623215920.2594-7-zhiwei_liu@c-sky.com
17
18
Co-authored-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
19
Co-authored-by: William Salmon <will.salmon@codethink.co.uk>
20
[max.chou@sifive.com: Replaced vstart checking by TCG op]
21
Signed-off-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
22
Signed-off-by: William Salmon <will.salmon@codethink.co.uk>
23
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
24
Signed-off-by: Max Chou <max.chou@sifive.com>
25
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
26
[max.chou@sifive.com: Imported aes-round.h and exposed x-zvkned
27
property]
28
[max.chou@sifive.com: Fixed endian issues and replaced the vstart & vl
29
egs checking by helper function]
30
[max.chou@sifive.com: Replaced bswap32 calls in aes key expanding]
31
Message-ID: <20230711165917.2629866-10-max.chou@sifive.com>
15
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
32
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
16
---
33
---
17
target/riscv/helper.h | 105 ++++++
34
target/riscv/cpu_cfg.h | 1 +
18
target/riscv/internals.h | 5 +
35
target/riscv/helper.h | 14 ++
19
target/riscv/insn32.decode | 32 ++
36
target/riscv/insn32.decode | 14 ++
20
target/riscv/insn_trans/trans_rvv.inc.c | 355 ++++++++++++++++++++
37
target/riscv/cpu.c | 4 +-
21
target/riscv/translate.c | 7 +
38
target/riscv/vcrypto_helper.c | 202 +++++++++++++++++++++++
22
target/riscv/vector_helper.c | 410 ++++++++++++++++++++++++
39
target/riscv/insn_trans/trans_rvvk.c.inc | 147 +++++++++++++++++
23
6 files changed, 914 insertions(+)
40
6 files changed, 381 insertions(+), 1 deletion(-)
24
41
42
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/riscv/cpu_cfg.h
45
+++ b/target/riscv/cpu_cfg.h
46
@@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig {
47
bool ext_zve64d;
48
bool ext_zvbb;
49
bool ext_zvbc;
50
+ bool ext_zvkned;
51
bool ext_zmmul;
52
bool ext_zvfbfmin;
53
bool ext_zvfbfwma;
25
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
54
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
26
index XXXXXXX..XXXXXXX 100644
55
index XXXXXXX..XXXXXXX 100644
27
--- a/target/riscv/helper.h
56
--- a/target/riscv/helper.h
28
+++ b/target/riscv/helper.h
57
+++ b/target/riscv/helper.h
29
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_1(hyp_tlb_flush, void, env)
58
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vandn_vx_b, void, ptr, ptr, tl, ptr, env, i32)
30
59
DEF_HELPER_6(vandn_vx_h, void, ptr, ptr, tl, ptr, env, i32)
31
/* Vector functions */
60
DEF_HELPER_6(vandn_vx_w, void, ptr, ptr, tl, ptr, env, i32)
32
DEF_HELPER_3(vsetvl, tl, env, tl, tl)
61
DEF_HELPER_6(vandn_vx_d, void, ptr, ptr, tl, ptr, env, i32)
33
+DEF_HELPER_5(vlb_v_b, void, ptr, ptr, tl, env, i32)
62
+
34
+DEF_HELPER_5(vlb_v_b_mask, void, ptr, ptr, tl, env, i32)
63
+DEF_HELPER_2(egs_check, void, i32, env)
35
+DEF_HELPER_5(vlb_v_h, void, ptr, ptr, tl, env, i32)
64
+
36
+DEF_HELPER_5(vlb_v_h_mask, void, ptr, ptr, tl, env, i32)
65
+DEF_HELPER_4(vaesef_vv, void, ptr, ptr, env, i32)
37
+DEF_HELPER_5(vlb_v_w, void, ptr, ptr, tl, env, i32)
66
+DEF_HELPER_4(vaesef_vs, void, ptr, ptr, env, i32)
38
+DEF_HELPER_5(vlb_v_w_mask, void, ptr, ptr, tl, env, i32)
67
+DEF_HELPER_4(vaesdf_vv, void, ptr, ptr, env, i32)
39
+DEF_HELPER_5(vlb_v_d, void, ptr, ptr, tl, env, i32)
68
+DEF_HELPER_4(vaesdf_vs, void, ptr, ptr, env, i32)
40
+DEF_HELPER_5(vlb_v_d_mask, void, ptr, ptr, tl, env, i32)
69
+DEF_HELPER_4(vaesem_vv, void, ptr, ptr, env, i32)
41
+DEF_HELPER_5(vlh_v_h, void, ptr, ptr, tl, env, i32)
70
+DEF_HELPER_4(vaesem_vs, void, ptr, ptr, env, i32)
42
+DEF_HELPER_5(vlh_v_h_mask, void, ptr, ptr, tl, env, i32)
71
+DEF_HELPER_4(vaesdm_vv, void, ptr, ptr, env, i32)
43
+DEF_HELPER_5(vlh_v_w, void, ptr, ptr, tl, env, i32)
72
+DEF_HELPER_4(vaesdm_vs, void, ptr, ptr, env, i32)
44
+DEF_HELPER_5(vlh_v_w_mask, void, ptr, ptr, tl, env, i32)
73
+DEF_HELPER_4(vaesz_vs, void, ptr, ptr, env, i32)
45
+DEF_HELPER_5(vlh_v_d, void, ptr, ptr, tl, env, i32)
74
+DEF_HELPER_5(vaeskf1_vi, void, ptr, ptr, i32, env, i32)
46
+DEF_HELPER_5(vlh_v_d_mask, void, ptr, ptr, tl, env, i32)
75
+DEF_HELPER_5(vaeskf2_vi, void, ptr, ptr, i32, env, i32)
47
+DEF_HELPER_5(vlw_v_w, void, ptr, ptr, tl, env, i32)
48
+DEF_HELPER_5(vlw_v_w_mask, void, ptr, ptr, tl, env, i32)
49
+DEF_HELPER_5(vlw_v_d, void, ptr, ptr, tl, env, i32)
50
+DEF_HELPER_5(vlw_v_d_mask, void, ptr, ptr, tl, env, i32)
51
+DEF_HELPER_5(vle_v_b, void, ptr, ptr, tl, env, i32)
52
+DEF_HELPER_5(vle_v_b_mask, void, ptr, ptr, tl, env, i32)
53
+DEF_HELPER_5(vle_v_h, void, ptr, ptr, tl, env, i32)
54
+DEF_HELPER_5(vle_v_h_mask, void, ptr, ptr, tl, env, i32)
55
+DEF_HELPER_5(vle_v_w, void, ptr, ptr, tl, env, i32)
56
+DEF_HELPER_5(vle_v_w_mask, void, ptr, ptr, tl, env, i32)
57
+DEF_HELPER_5(vle_v_d, void, ptr, ptr, tl, env, i32)
58
+DEF_HELPER_5(vle_v_d_mask, void, ptr, ptr, tl, env, i32)
59
+DEF_HELPER_5(vlbu_v_b, void, ptr, ptr, tl, env, i32)
60
+DEF_HELPER_5(vlbu_v_b_mask, void, ptr, ptr, tl, env, i32)
61
+DEF_HELPER_5(vlbu_v_h, void, ptr, ptr, tl, env, i32)
62
+DEF_HELPER_5(vlbu_v_h_mask, void, ptr, ptr, tl, env, i32)
63
+DEF_HELPER_5(vlbu_v_w, void, ptr, ptr, tl, env, i32)
64
+DEF_HELPER_5(vlbu_v_w_mask, void, ptr, ptr, tl, env, i32)
65
+DEF_HELPER_5(vlbu_v_d, void, ptr, ptr, tl, env, i32)
66
+DEF_HELPER_5(vlbu_v_d_mask, void, ptr, ptr, tl, env, i32)
67
+DEF_HELPER_5(vlhu_v_h, void, ptr, ptr, tl, env, i32)
68
+DEF_HELPER_5(vlhu_v_h_mask, void, ptr, ptr, tl, env, i32)
69
+DEF_HELPER_5(vlhu_v_w, void, ptr, ptr, tl, env, i32)
70
+DEF_HELPER_5(vlhu_v_w_mask, void, ptr, ptr, tl, env, i32)
71
+DEF_HELPER_5(vlhu_v_d, void, ptr, ptr, tl, env, i32)
72
+DEF_HELPER_5(vlhu_v_d_mask, void, ptr, ptr, tl, env, i32)
73
+DEF_HELPER_5(vlwu_v_w, void, ptr, ptr, tl, env, i32)
74
+DEF_HELPER_5(vlwu_v_w_mask, void, ptr, ptr, tl, env, i32)
75
+DEF_HELPER_5(vlwu_v_d, void, ptr, ptr, tl, env, i32)
76
+DEF_HELPER_5(vlwu_v_d_mask, void, ptr, ptr, tl, env, i32)
77
+DEF_HELPER_5(vsb_v_b, void, ptr, ptr, tl, env, i32)
78
+DEF_HELPER_5(vsb_v_b_mask, void, ptr, ptr, tl, env, i32)
79
+DEF_HELPER_5(vsb_v_h, void, ptr, ptr, tl, env, i32)
80
+DEF_HELPER_5(vsb_v_h_mask, void, ptr, ptr, tl, env, i32)
81
+DEF_HELPER_5(vsb_v_w, void, ptr, ptr, tl, env, i32)
82
+DEF_HELPER_5(vsb_v_w_mask, void, ptr, ptr, tl, env, i32)
83
+DEF_HELPER_5(vsb_v_d, void, ptr, ptr, tl, env, i32)
84
+DEF_HELPER_5(vsb_v_d_mask, void, ptr, ptr, tl, env, i32)
85
+DEF_HELPER_5(vsh_v_h, void, ptr, ptr, tl, env, i32)
86
+DEF_HELPER_5(vsh_v_h_mask, void, ptr, ptr, tl, env, i32)
87
+DEF_HELPER_5(vsh_v_w, void, ptr, ptr, tl, env, i32)
88
+DEF_HELPER_5(vsh_v_w_mask, void, ptr, ptr, tl, env, i32)
89
+DEF_HELPER_5(vsh_v_d, void, ptr, ptr, tl, env, i32)
90
+DEF_HELPER_5(vsh_v_d_mask, void, ptr, ptr, tl, env, i32)
91
+DEF_HELPER_5(vsw_v_w, void, ptr, ptr, tl, env, i32)
92
+DEF_HELPER_5(vsw_v_w_mask, void, ptr, ptr, tl, env, i32)
93
+DEF_HELPER_5(vsw_v_d, void, ptr, ptr, tl, env, i32)
94
+DEF_HELPER_5(vsw_v_d_mask, void, ptr, ptr, tl, env, i32)
95
+DEF_HELPER_5(vse_v_b, void, ptr, ptr, tl, env, i32)
96
+DEF_HELPER_5(vse_v_b_mask, void, ptr, ptr, tl, env, i32)
97
+DEF_HELPER_5(vse_v_h, void, ptr, ptr, tl, env, i32)
98
+DEF_HELPER_5(vse_v_h_mask, void, ptr, ptr, tl, env, i32)
99
+DEF_HELPER_5(vse_v_w, void, ptr, ptr, tl, env, i32)
100
+DEF_HELPER_5(vse_v_w_mask, void, ptr, ptr, tl, env, i32)
101
+DEF_HELPER_5(vse_v_d, void, ptr, ptr, tl, env, i32)
102
+DEF_HELPER_5(vse_v_d_mask, void, ptr, ptr, tl, env, i32)
103
+DEF_HELPER_6(vlsb_v_b, void, ptr, ptr, tl, tl, env, i32)
104
+DEF_HELPER_6(vlsb_v_h, void, ptr, ptr, tl, tl, env, i32)
105
+DEF_HELPER_6(vlsb_v_w, void, ptr, ptr, tl, tl, env, i32)
106
+DEF_HELPER_6(vlsb_v_d, void, ptr, ptr, tl, tl, env, i32)
107
+DEF_HELPER_6(vlsh_v_h, void, ptr, ptr, tl, tl, env, i32)
108
+DEF_HELPER_6(vlsh_v_w, void, ptr, ptr, tl, tl, env, i32)
109
+DEF_HELPER_6(vlsh_v_d, void, ptr, ptr, tl, tl, env, i32)
110
+DEF_HELPER_6(vlsw_v_w, void, ptr, ptr, tl, tl, env, i32)
111
+DEF_HELPER_6(vlsw_v_d, void, ptr, ptr, tl, tl, env, i32)
112
+DEF_HELPER_6(vlse_v_b, void, ptr, ptr, tl, tl, env, i32)
113
+DEF_HELPER_6(vlse_v_h, void, ptr, ptr, tl, tl, env, i32)
114
+DEF_HELPER_6(vlse_v_w, void, ptr, ptr, tl, tl, env, i32)
115
+DEF_HELPER_6(vlse_v_d, void, ptr, ptr, tl, tl, env, i32)
116
+DEF_HELPER_6(vlsbu_v_b, void, ptr, ptr, tl, tl, env, i32)
117
+DEF_HELPER_6(vlsbu_v_h, void, ptr, ptr, tl, tl, env, i32)
118
+DEF_HELPER_6(vlsbu_v_w, void, ptr, ptr, tl, tl, env, i32)
119
+DEF_HELPER_6(vlsbu_v_d, void, ptr, ptr, tl, tl, env, i32)
120
+DEF_HELPER_6(vlshu_v_h, void, ptr, ptr, tl, tl, env, i32)
121
+DEF_HELPER_6(vlshu_v_w, void, ptr, ptr, tl, tl, env, i32)
122
+DEF_HELPER_6(vlshu_v_d, void, ptr, ptr, tl, tl, env, i32)
123
+DEF_HELPER_6(vlswu_v_w, void, ptr, ptr, tl, tl, env, i32)
124
+DEF_HELPER_6(vlswu_v_d, void, ptr, ptr, tl, tl, env, i32)
125
+DEF_HELPER_6(vssb_v_b, void, ptr, ptr, tl, tl, env, i32)
126
+DEF_HELPER_6(vssb_v_h, void, ptr, ptr, tl, tl, env, i32)
127
+DEF_HELPER_6(vssb_v_w, void, ptr, ptr, tl, tl, env, i32)
128
+DEF_HELPER_6(vssb_v_d, void, ptr, ptr, tl, tl, env, i32)
129
+DEF_HELPER_6(vssh_v_h, void, ptr, ptr, tl, tl, env, i32)
130
+DEF_HELPER_6(vssh_v_w, void, ptr, ptr, tl, tl, env, i32)
131
+DEF_HELPER_6(vssh_v_d, void, ptr, ptr, tl, tl, env, i32)
132
+DEF_HELPER_6(vssw_v_w, void, ptr, ptr, tl, tl, env, i32)
133
+DEF_HELPER_6(vssw_v_d, void, ptr, ptr, tl, tl, env, i32)
134
+DEF_HELPER_6(vsse_v_b, void, ptr, ptr, tl, tl, env, i32)
135
+DEF_HELPER_6(vsse_v_h, void, ptr, ptr, tl, tl, env, i32)
136
+DEF_HELPER_6(vsse_v_w, void, ptr, ptr, tl, tl, env, i32)
137
+DEF_HELPER_6(vsse_v_d, void, ptr, ptr, tl, tl, env, i32)
138
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
139
index XXXXXXX..XXXXXXX 100644
140
--- a/target/riscv/internals.h
141
+++ b/target/riscv/internals.h
142
@@ -XXX,XX +XXX,XX @@
143
144
#include "hw/registerfields.h"
145
146
+/* share data between vector helpers and decode code */
147
+FIELD(VDATA, MLEN, 0, 8)
148
+FIELD(VDATA, VM, 8, 1)
149
+FIELD(VDATA, LMUL, 9, 2)
150
+FIELD(VDATA, NF, 11, 4)
151
#endif
152
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
76
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
153
index XXXXXXX..XXXXXXX 100644
77
index XXXXXXX..XXXXXXX 100644
154
--- a/target/riscv/insn32.decode
78
--- a/target/riscv/insn32.decode
155
+++ b/target/riscv/insn32.decode
79
+++ b/target/riscv/insn32.decode
156
@@ -XXX,XX +XXX,XX @@
80
@@ -XXX,XX +XXX,XX @@
157
%sh10 20:10
158
%csr 20:12
159
%rm 12:3
160
+%nf 29:3 !function=ex_plus_1
161
162
# immediates:
163
%imm_i 20:s12
164
@@ -XXX,XX +XXX,XX @@
165
&u imm rd
166
&shift shamt rs1 rd
167
&atomic aq rl rs2 rs1 rd
168
+&r2nfvm vm rd rs1 nf
169
+&rnfvm vm rd rs1 rs2 nf
170
171
# Formats 32:
172
@r ....... ..... ..... ... ..... ....... &r %rs2 %rs1 %rd
173
@@ -XXX,XX +XXX,XX @@
174
@r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd
81
@r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd
175
@r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd
82
@r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd
176
@r2 ....... ..... ..... ... ..... ....... %rs1 %rd
83
@r2 ....... ..... ..... ... ..... ....... &r2 %rs1 %rd
177
+@r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd
84
+@r2_vm_1 ...... . ..... ..... ... ..... ....... &rmr vm=1 %rs2 %rd
178
+@r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd
85
@r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd
179
@r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd
86
@r2_vm ...... vm:1 ..... ..... ... ..... ....... &rmr %rs2 %rd
180
87
@r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd
181
@hfence_gvma ....... ..... ..... ... ..... ....... %rs2 %rs1
88
@@ -XXX,XX +XXX,XX @@ vcpop_v 010010 . ..... 01110 010 ..... 1010111 @r2_vm
182
@@ -XXX,XX +XXX,XX @@ hfence_gvma 0110001 ..... ..... 000 00000 1110011 @hfence_gvma
89
vwsll_vv 110101 . ..... ..... 000 ..... 1010111 @r_vm
183
hfence_vvma 0010001 ..... ..... 000 00000 1110011 @hfence_vvma
90
vwsll_vx 110101 . ..... ..... 100 ..... 1010111 @r_vm
184
91
vwsll_vi 110101 . ..... ..... 011 ..... 1010111 @r_vm
185
# *** RV32V Extension ***
92
+
186
+
93
+# *** Zvkned vector crypto extension ***
187
+# *** Vector loads and stores are encoded within LOADFP/STORE-FP ***
94
+vaesef_vv 101000 1 ..... 00011 010 ..... 1110111 @r2_vm_1
188
+vlb_v ... 100 . 00000 ..... 000 ..... 0000111 @r2_nfvm
95
+vaesef_vs 101001 1 ..... 00011 010 ..... 1110111 @r2_vm_1
189
+vlh_v ... 100 . 00000 ..... 101 ..... 0000111 @r2_nfvm
96
+vaesdf_vv 101000 1 ..... 00001 010 ..... 1110111 @r2_vm_1
190
+vlw_v ... 100 . 00000 ..... 110 ..... 0000111 @r2_nfvm
97
+vaesdf_vs 101001 1 ..... 00001 010 ..... 1110111 @r2_vm_1
191
+vle_v ... 000 . 00000 ..... 111 ..... 0000111 @r2_nfvm
98
+vaesem_vv 101000 1 ..... 00010 010 ..... 1110111 @r2_vm_1
192
+vlbu_v ... 000 . 00000 ..... 000 ..... 0000111 @r2_nfvm
99
+vaesem_vs 101001 1 ..... 00010 010 ..... 1110111 @r2_vm_1
193
+vlhu_v ... 000 . 00000 ..... 101 ..... 0000111 @r2_nfvm
100
+vaesdm_vv 101000 1 ..... 00000 010 ..... 1110111 @r2_vm_1
194
+vlwu_v ... 000 . 00000 ..... 110 ..... 0000111 @r2_nfvm
101
+vaesdm_vs 101001 1 ..... 00000 010 ..... 1110111 @r2_vm_1
195
+vsb_v ... 000 . 00000 ..... 000 ..... 0100111 @r2_nfvm
102
+vaesz_vs 101001 1 ..... 00111 010 ..... 1110111 @r2_vm_1
196
+vsh_v ... 000 . 00000 ..... 101 ..... 0100111 @r2_nfvm
103
+vaeskf1_vi 100010 1 ..... ..... 010 ..... 1110111 @r_vm_1
197
+vsw_v ... 000 . 00000 ..... 110 ..... 0100111 @r2_nfvm
104
+vaeskf2_vi 101010 1 ..... ..... 010 ..... 1110111 @r_vm_1
198
+vse_v ... 000 . 00000 ..... 111 ..... 0100111 @r2_nfvm
105
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
199
+
200
+vlsb_v ... 110 . ..... ..... 000 ..... 0000111 @r_nfvm
201
+vlsh_v ... 110 . ..... ..... 101 ..... 0000111 @r_nfvm
202
+vlsw_v ... 110 . ..... ..... 110 ..... 0000111 @r_nfvm
203
+vlse_v ... 010 . ..... ..... 111 ..... 0000111 @r_nfvm
204
+vlsbu_v ... 010 . ..... ..... 000 ..... 0000111 @r_nfvm
205
+vlshu_v ... 010 . ..... ..... 101 ..... 0000111 @r_nfvm
206
+vlswu_v ... 010 . ..... ..... 110 ..... 0000111 @r_nfvm
207
+vssb_v ... 010 . ..... ..... 000 ..... 0100111 @r_nfvm
208
+vssh_v ... 010 . ..... ..... 101 ..... 0100111 @r_nfvm
209
+vssw_v ... 010 . ..... ..... 110 ..... 0100111 @r_nfvm
210
+vsse_v ... 010 . ..... ..... 111 ..... 0100111 @r_nfvm
211
+
212
+# *** new major opcode OP-V ***
213
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
214
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
215
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
216
index XXXXXXX..XXXXXXX 100644
106
index XXXXXXX..XXXXXXX 100644
217
--- a/target/riscv/insn_trans/trans_rvv.inc.c
107
--- a/target/riscv/cpu.c
218
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
108
+++ b/target/riscv/cpu.c
109
@@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = {
110
ISA_EXT_DATA_ENTRY(zvfbfwma, PRIV_VERSION_1_12_0, ext_zvfbfwma),
111
ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh),
112
ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin),
113
+ ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned),
114
ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx),
115
ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin),
116
ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia),
117
@@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
118
* In principle Zve*x would also suffice here, were they supported
119
* in qemu
120
*/
121
- if (cpu->cfg.ext_zvbb && !cpu->cfg.ext_zve32f) {
122
+ if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned) && !cpu->cfg.ext_zve32f) {
123
error_setg(errp,
124
"Vector crypto extensions require V or Zve* extensions");
125
return;
126
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = {
127
/* Vector cryptography extensions */
128
DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false),
129
DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false),
130
+ DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false),
131
132
DEFINE_PROP_END_OF_LIST(),
133
};
134
diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c
135
index XXXXXXX..XXXXXXX 100644
136
--- a/target/riscv/vcrypto_helper.c
137
+++ b/target/riscv/vcrypto_helper.c
219
@@ -XXX,XX +XXX,XX @@
138
@@ -XXX,XX +XXX,XX @@
220
* You should have received a copy of the GNU General Public License along with
139
#include "qemu/bitops.h"
221
* this program. If not, see <http://www.gnu.org/licenses/>.
140
#include "qemu/bswap.h"
222
*/
223
+#include "tcg/tcg-op-gvec.h"
224
+#include "tcg/tcg-gvec-desc.h"
225
+#include "internals.h"
226
227
static bool trans_vsetvl(DisasContext *ctx, arg_vsetvl *a)
228
{
229
@@ -XXX,XX +XXX,XX @@ static bool trans_vsetvli(DisasContext *ctx, arg_vsetvli *a)
230
tcg_temp_free(dst);
231
return true;
232
}
233
+
234
+/* vector register offset from env */
235
+static uint32_t vreg_ofs(DisasContext *s, int reg)
236
+{
237
+ return offsetof(CPURISCVState, vreg) + reg * s->vlen / 8;
238
+}
239
+
240
+/* check functions */
241
+
242
+/*
243
+ * In cpu_get_tb_cpu_state(), set VILL if RVV was not present.
244
+ * So RVV is also be checked in this function.
245
+ */
246
+static bool vext_check_isa_ill(DisasContext *s)
247
+{
248
+ return !s->vill;
249
+}
250
+
251
+/*
252
+ * There are two rules check here.
253
+ *
254
+ * 1. Vector register numbers are multiples of LMUL. (Section 3.2)
255
+ *
256
+ * 2. For all widening instructions, the destination LMUL value must also be
257
+ * a supported LMUL value. (Section 11.2)
258
+ */
259
+static bool vext_check_reg(DisasContext *s, uint32_t reg, bool widen)
260
+{
261
+ /*
262
+ * The destination vector register group results are arranged as if both
263
+ * SEW and LMUL were at twice their current settings. (Section 11.2).
264
+ */
265
+ int legal = widen ? 2 << s->lmul : 1 << s->lmul;
266
+
267
+ return !((s->lmul == 0x3 && widen) || (reg % legal));
268
+}
269
+
270
+/*
271
+ * There are two rules check here.
272
+ *
273
+ * 1. The destination vector register group for a masked vector instruction can
274
+ * only overlap the source mask register (v0) when LMUL=1. (Section 5.3)
275
+ *
276
+ * 2. In widen instructions and some other insturctions, like vslideup.vx,
277
+ * there is no need to check whether LMUL=1.
278
+ */
279
+static bool vext_check_overlap_mask(DisasContext *s, uint32_t vd, bool vm,
280
+ bool force)
281
+{
282
+ return (vm != 0 || vd != 0) || (!force && (s->lmul == 0));
283
+}
284
+
285
+/* The LMUL setting must be such that LMUL * NFIELDS <= 8. (Section 7.8) */
286
+static bool vext_check_nf(DisasContext *s, uint32_t nf)
287
+{
288
+ return (1 << s->lmul) * nf <= 8;
289
+}
290
+
291
+/* common translation macro */
292
+#define GEN_VEXT_TRANS(NAME, SEQ, ARGTYPE, OP, CHECK) \
293
+static bool trans_##NAME(DisasContext *s, arg_##ARGTYPE *a)\
294
+{ \
295
+ if (CHECK(s, a)) { \
296
+ return OP(s, a, SEQ); \
297
+ } \
298
+ return false; \
299
+}
300
+
301
+/*
302
+ *** unit stride load and store
303
+ */
304
+typedef void gen_helper_ldst_us(TCGv_ptr, TCGv_ptr, TCGv,
305
+ TCGv_env, TCGv_i32);
306
+
307
+static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data,
308
+ gen_helper_ldst_us *fn, DisasContext *s)
309
+{
310
+ TCGv_ptr dest, mask;
311
+ TCGv base;
312
+ TCGv_i32 desc;
313
+
314
+ TCGLabel *over = gen_new_label();
315
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
316
+
317
+ dest = tcg_temp_new_ptr();
318
+ mask = tcg_temp_new_ptr();
319
+ base = tcg_temp_new();
320
+
321
+ /*
322
+ * As simd_desc supports at most 256 bytes, and in this implementation,
323
+ * the max vector group length is 2048 bytes. So split it into two parts.
324
+ *
325
+ * The first part is vlen in bytes, encoded in maxsz of simd_desc.
326
+ * The second part is lmul, encoded in data of simd_desc.
327
+ */
328
+ desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
329
+
330
+ gen_get_gpr(base, rs1);
331
+ tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
332
+ tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
333
+
334
+ fn(dest, mask, base, cpu_env, desc);
335
+
336
+ tcg_temp_free_ptr(dest);
337
+ tcg_temp_free_ptr(mask);
338
+ tcg_temp_free(base);
339
+ tcg_temp_free_i32(desc);
340
+ gen_set_label(over);
341
+ return true;
342
+}
343
+
344
+static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
345
+{
346
+ uint32_t data = 0;
347
+ gen_helper_ldst_us *fn;
348
+ static gen_helper_ldst_us * const fns[2][7][4] = {
349
+ /* masked unit stride load */
350
+ { { gen_helper_vlb_v_b_mask, gen_helper_vlb_v_h_mask,
351
+ gen_helper_vlb_v_w_mask, gen_helper_vlb_v_d_mask },
352
+ { NULL, gen_helper_vlh_v_h_mask,
353
+ gen_helper_vlh_v_w_mask, gen_helper_vlh_v_d_mask },
354
+ { NULL, NULL,
355
+ gen_helper_vlw_v_w_mask, gen_helper_vlw_v_d_mask },
356
+ { gen_helper_vle_v_b_mask, gen_helper_vle_v_h_mask,
357
+ gen_helper_vle_v_w_mask, gen_helper_vle_v_d_mask },
358
+ { gen_helper_vlbu_v_b_mask, gen_helper_vlbu_v_h_mask,
359
+ gen_helper_vlbu_v_w_mask, gen_helper_vlbu_v_d_mask },
360
+ { NULL, gen_helper_vlhu_v_h_mask,
361
+ gen_helper_vlhu_v_w_mask, gen_helper_vlhu_v_d_mask },
362
+ { NULL, NULL,
363
+ gen_helper_vlwu_v_w_mask, gen_helper_vlwu_v_d_mask } },
364
+ /* unmasked unit stride load */
365
+ { { gen_helper_vlb_v_b, gen_helper_vlb_v_h,
366
+ gen_helper_vlb_v_w, gen_helper_vlb_v_d },
367
+ { NULL, gen_helper_vlh_v_h,
368
+ gen_helper_vlh_v_w, gen_helper_vlh_v_d },
369
+ { NULL, NULL,
370
+ gen_helper_vlw_v_w, gen_helper_vlw_v_d },
371
+ { gen_helper_vle_v_b, gen_helper_vle_v_h,
372
+ gen_helper_vle_v_w, gen_helper_vle_v_d },
373
+ { gen_helper_vlbu_v_b, gen_helper_vlbu_v_h,
374
+ gen_helper_vlbu_v_w, gen_helper_vlbu_v_d },
375
+ { NULL, gen_helper_vlhu_v_h,
376
+ gen_helper_vlhu_v_w, gen_helper_vlhu_v_d },
377
+ { NULL, NULL,
378
+ gen_helper_vlwu_v_w, gen_helper_vlwu_v_d } }
379
+ };
380
+
381
+ fn = fns[a->vm][seq][s->sew];
382
+ if (fn == NULL) {
383
+ return false;
384
+ }
385
+
386
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
387
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
388
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
389
+ data = FIELD_DP32(data, VDATA, NF, a->nf);
390
+ return ldst_us_trans(a->rd, a->rs1, data, fn, s);
391
+}
392
+
393
+static bool ld_us_check(DisasContext *s, arg_r2nfvm* a)
394
+{
395
+ return (vext_check_isa_ill(s) &&
396
+ vext_check_overlap_mask(s, a->rd, a->vm, false) &&
397
+ vext_check_reg(s, a->rd, false) &&
398
+ vext_check_nf(s, a->nf));
399
+}
400
+
401
+GEN_VEXT_TRANS(vlb_v, 0, r2nfvm, ld_us_op, ld_us_check)
402
+GEN_VEXT_TRANS(vlh_v, 1, r2nfvm, ld_us_op, ld_us_check)
403
+GEN_VEXT_TRANS(vlw_v, 2, r2nfvm, ld_us_op, ld_us_check)
404
+GEN_VEXT_TRANS(vle_v, 3, r2nfvm, ld_us_op, ld_us_check)
405
+GEN_VEXT_TRANS(vlbu_v, 4, r2nfvm, ld_us_op, ld_us_check)
406
+GEN_VEXT_TRANS(vlhu_v, 5, r2nfvm, ld_us_op, ld_us_check)
407
+GEN_VEXT_TRANS(vlwu_v, 6, r2nfvm, ld_us_op, ld_us_check)
408
+
409
+static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
410
+{
411
+ uint32_t data = 0;
412
+ gen_helper_ldst_us *fn;
413
+ static gen_helper_ldst_us * const fns[2][4][4] = {
414
+ /* masked unit stride load and store */
415
+ { { gen_helper_vsb_v_b_mask, gen_helper_vsb_v_h_mask,
416
+ gen_helper_vsb_v_w_mask, gen_helper_vsb_v_d_mask },
417
+ { NULL, gen_helper_vsh_v_h_mask,
418
+ gen_helper_vsh_v_w_mask, gen_helper_vsh_v_d_mask },
419
+ { NULL, NULL,
420
+ gen_helper_vsw_v_w_mask, gen_helper_vsw_v_d_mask },
421
+ { gen_helper_vse_v_b_mask, gen_helper_vse_v_h_mask,
422
+ gen_helper_vse_v_w_mask, gen_helper_vse_v_d_mask } },
423
+ /* unmasked unit stride store */
424
+ { { gen_helper_vsb_v_b, gen_helper_vsb_v_h,
425
+ gen_helper_vsb_v_w, gen_helper_vsb_v_d },
426
+ { NULL, gen_helper_vsh_v_h,
427
+ gen_helper_vsh_v_w, gen_helper_vsh_v_d },
428
+ { NULL, NULL,
429
+ gen_helper_vsw_v_w, gen_helper_vsw_v_d },
430
+ { gen_helper_vse_v_b, gen_helper_vse_v_h,
431
+ gen_helper_vse_v_w, gen_helper_vse_v_d } }
432
+ };
433
+
434
+ fn = fns[a->vm][seq][s->sew];
435
+ if (fn == NULL) {
436
+ return false;
437
+ }
438
+
439
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
440
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
441
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
442
+ data = FIELD_DP32(data, VDATA, NF, a->nf);
443
+ return ldst_us_trans(a->rd, a->rs1, data, fn, s);
444
+}
445
+
446
+static bool st_us_check(DisasContext *s, arg_r2nfvm* a)
447
+{
448
+ return (vext_check_isa_ill(s) &&
449
+ vext_check_reg(s, a->rd, false) &&
450
+ vext_check_nf(s, a->nf));
451
+}
452
+
453
+GEN_VEXT_TRANS(vsb_v, 0, r2nfvm, st_us_op, st_us_check)
454
+GEN_VEXT_TRANS(vsh_v, 1, r2nfvm, st_us_op, st_us_check)
455
+GEN_VEXT_TRANS(vsw_v, 2, r2nfvm, st_us_op, st_us_check)
456
+GEN_VEXT_TRANS(vse_v, 3, r2nfvm, st_us_op, st_us_check)
457
+
458
+/*
459
+ *** stride load and store
460
+ */
461
+typedef void gen_helper_ldst_stride(TCGv_ptr, TCGv_ptr, TCGv,
462
+ TCGv, TCGv_env, TCGv_i32);
463
+
464
+static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2,
465
+ uint32_t data, gen_helper_ldst_stride *fn,
466
+ DisasContext *s)
467
+{
468
+ TCGv_ptr dest, mask;
469
+ TCGv base, stride;
470
+ TCGv_i32 desc;
471
+
472
+ TCGLabel *over = gen_new_label();
473
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
474
+
475
+ dest = tcg_temp_new_ptr();
476
+ mask = tcg_temp_new_ptr();
477
+ base = tcg_temp_new();
478
+ stride = tcg_temp_new();
479
+ desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
480
+
481
+ gen_get_gpr(base, rs1);
482
+ gen_get_gpr(stride, rs2);
483
+ tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
484
+ tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
485
+
486
+ fn(dest, mask, base, stride, cpu_env, desc);
487
+
488
+ tcg_temp_free_ptr(dest);
489
+ tcg_temp_free_ptr(mask);
490
+ tcg_temp_free(base);
491
+ tcg_temp_free(stride);
492
+ tcg_temp_free_i32(desc);
493
+ gen_set_label(over);
494
+ return true;
495
+}
496
+
497
+static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
498
+{
499
+ uint32_t data = 0;
500
+ gen_helper_ldst_stride *fn;
501
+ static gen_helper_ldst_stride * const fns[7][4] = {
502
+ { gen_helper_vlsb_v_b, gen_helper_vlsb_v_h,
503
+ gen_helper_vlsb_v_w, gen_helper_vlsb_v_d },
504
+ { NULL, gen_helper_vlsh_v_h,
505
+ gen_helper_vlsh_v_w, gen_helper_vlsh_v_d },
506
+ { NULL, NULL,
507
+ gen_helper_vlsw_v_w, gen_helper_vlsw_v_d },
508
+ { gen_helper_vlse_v_b, gen_helper_vlse_v_h,
509
+ gen_helper_vlse_v_w, gen_helper_vlse_v_d },
510
+ { gen_helper_vlsbu_v_b, gen_helper_vlsbu_v_h,
511
+ gen_helper_vlsbu_v_w, gen_helper_vlsbu_v_d },
512
+ { NULL, gen_helper_vlshu_v_h,
513
+ gen_helper_vlshu_v_w, gen_helper_vlshu_v_d },
514
+ { NULL, NULL,
515
+ gen_helper_vlswu_v_w, gen_helper_vlswu_v_d },
516
+ };
517
+
518
+ fn = fns[seq][s->sew];
519
+ if (fn == NULL) {
520
+ return false;
521
+ }
522
+
523
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
524
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
525
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
526
+ data = FIELD_DP32(data, VDATA, NF, a->nf);
527
+ return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s);
528
+}
529
+
530
+static bool ld_stride_check(DisasContext *s, arg_rnfvm* a)
531
+{
532
+ return (vext_check_isa_ill(s) &&
533
+ vext_check_overlap_mask(s, a->rd, a->vm, false) &&
534
+ vext_check_reg(s, a->rd, false) &&
535
+ vext_check_nf(s, a->nf));
536
+}
537
+
538
+GEN_VEXT_TRANS(vlsb_v, 0, rnfvm, ld_stride_op, ld_stride_check)
539
+GEN_VEXT_TRANS(vlsh_v, 1, rnfvm, ld_stride_op, ld_stride_check)
540
+GEN_VEXT_TRANS(vlsw_v, 2, rnfvm, ld_stride_op, ld_stride_check)
541
+GEN_VEXT_TRANS(vlse_v, 3, rnfvm, ld_stride_op, ld_stride_check)
542
+GEN_VEXT_TRANS(vlsbu_v, 4, rnfvm, ld_stride_op, ld_stride_check)
543
+GEN_VEXT_TRANS(vlshu_v, 5, rnfvm, ld_stride_op, ld_stride_check)
544
+GEN_VEXT_TRANS(vlswu_v, 6, rnfvm, ld_stride_op, ld_stride_check)
545
+
546
+static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
547
+{
548
+ uint32_t data = 0;
549
+ gen_helper_ldst_stride *fn;
550
+ static gen_helper_ldst_stride * const fns[4][4] = {
551
+ /* masked stride store */
552
+ { gen_helper_vssb_v_b, gen_helper_vssb_v_h,
553
+ gen_helper_vssb_v_w, gen_helper_vssb_v_d },
554
+ { NULL, gen_helper_vssh_v_h,
555
+ gen_helper_vssh_v_w, gen_helper_vssh_v_d },
556
+ { NULL, NULL,
557
+ gen_helper_vssw_v_w, gen_helper_vssw_v_d },
558
+ { gen_helper_vsse_v_b, gen_helper_vsse_v_h,
559
+ gen_helper_vsse_v_w, gen_helper_vsse_v_d }
560
+ };
561
+
562
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
563
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
564
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
565
+ data = FIELD_DP32(data, VDATA, NF, a->nf);
566
+ fn = fns[seq][s->sew];
567
+ if (fn == NULL) {
568
+ return false;
569
+ }
570
+
571
+ return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s);
572
+}
573
+
574
+static bool st_stride_check(DisasContext *s, arg_rnfvm* a)
575
+{
576
+ return (vext_check_isa_ill(s) &&
577
+ vext_check_reg(s, a->rd, false) &&
578
+ vext_check_nf(s, a->nf));
579
+}
580
+
581
+GEN_VEXT_TRANS(vssb_v, 0, rnfvm, st_stride_op, st_stride_check)
582
+GEN_VEXT_TRANS(vssh_v, 1, rnfvm, st_stride_op, st_stride_check)
583
+GEN_VEXT_TRANS(vssw_v, 2, rnfvm, st_stride_op, st_stride_check)
584
+GEN_VEXT_TRANS(vsse_v, 3, rnfvm, st_stride_op, st_stride_check)
585
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
586
index XXXXXXX..XXXXXXX 100644
587
--- a/target/riscv/translate.c
588
+++ b/target/riscv/translate.c
589
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
590
uint8_t lmul;
591
uint8_t sew;
592
uint16_t vlen;
593
+ uint16_t mlen;
594
bool vl_eq_vlmax;
595
} DisasContext;
596
597
@@ -XXX,XX +XXX,XX @@ static void decode_RV32_64C(DisasContext *ctx, uint16_t opcode)
598
}
599
}
600
601
+static int ex_plus_1(DisasContext *ctx, int nf)
602
+{
603
+ return nf + 1;
604
+}
605
+
606
#define EX_SH(amount) \
607
static int ex_shift_##amount(DisasContext *ctx, int imm) \
608
{ \
609
@@ -XXX,XX +XXX,XX @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
610
ctx->vill = FIELD_EX32(tb_flags, TB_FLAGS, VILL);
611
ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW);
612
ctx->lmul = FIELD_EX32(tb_flags, TB_FLAGS, LMUL);
613
+ ctx->mlen = 1 << (ctx->sew + 3 - ctx->lmul);
614
ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX);
615
}
616
617
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
618
index XXXXXXX..XXXXXXX 100644
619
--- a/target/riscv/vector_helper.c
620
+++ b/target/riscv/vector_helper.c
621
@@ -XXX,XX +XXX,XX @@
622
623
#include "qemu/osdep.h"
624
#include "cpu.h"
141
#include "cpu.h"
625
+#include "exec/memop.h"
142
+#include "crypto/aes.h"
143
+#include "crypto/aes-round.h"
144
#include "exec/memop.h"
626
#include "exec/exec-all.h"
145
#include "exec/exec-all.h"
627
#include "exec/helper-proto.h"
146
#include "exec/helper-proto.h"
628
+#include "tcg/tcg-gvec-desc.h"
147
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vwsll_vx_w, WOP_UUU_W, H8, H4, DO_SLL)
629
+#include "internals.h"
148
GEN_VEXT_VX(vwsll_vx_b, 2)
630
#include <math.h>
149
GEN_VEXT_VX(vwsll_vx_h, 4)
631
150
GEN_VEXT_VX(vwsll_vx_w, 8)
632
target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
151
+
633
@@ -XXX,XX +XXX,XX @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
152
+void HELPER(egs_check)(uint32_t egs, CPURISCVState *env)
634
env->vstart = 0;
153
+{
635
return vl;
154
+ uint32_t vl = env->vl;
636
}
155
+ uint32_t vstart = env->vstart;
156
+
157
+ if (vl % egs != 0 || vstart % egs != 0) {
158
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
159
+ }
160
+}
161
+
162
+static inline void xor_round_key(AESState *round_state, AESState *round_key)
163
+{
164
+ round_state->v = round_state->v ^ round_key->v;
165
+}
166
+
167
+#define GEN_ZVKNED_HELPER_VV(NAME, ...) \
168
+ void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \
169
+ uint32_t desc) \
170
+ { \
171
+ uint32_t vl = env->vl; \
172
+ uint32_t total_elems = vext_get_total_elems(env, desc, 4); \
173
+ uint32_t vta = vext_vta(desc); \
174
+ \
175
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { \
176
+ AESState round_key; \
177
+ round_key.d[0] = *((uint64_t *)vs2 + H8(i * 2 + 0)); \
178
+ round_key.d[1] = *((uint64_t *)vs2 + H8(i * 2 + 1)); \
179
+ AESState round_state; \
180
+ round_state.d[0] = *((uint64_t *)vd + H8(i * 2 + 0)); \
181
+ round_state.d[1] = *((uint64_t *)vd + H8(i * 2 + 1)); \
182
+ __VA_ARGS__; \
183
+ *((uint64_t *)vd + H8(i * 2 + 0)) = round_state.d[0]; \
184
+ *((uint64_t *)vd + H8(i * 2 + 1)) = round_state.d[1]; \
185
+ } \
186
+ env->vstart = 0; \
187
+ /* set tail elements to 1s */ \
188
+ vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); \
189
+ }
190
+
191
+#define GEN_ZVKNED_HELPER_VS(NAME, ...) \
192
+ void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \
193
+ uint32_t desc) \
194
+ { \
195
+ uint32_t vl = env->vl; \
196
+ uint32_t total_elems = vext_get_total_elems(env, desc, 4); \
197
+ uint32_t vta = vext_vta(desc); \
198
+ \
199
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { \
200
+ AESState round_key; \
201
+ round_key.d[0] = *((uint64_t *)vs2 + H8(0)); \
202
+ round_key.d[1] = *((uint64_t *)vs2 + H8(1)); \
203
+ AESState round_state; \
204
+ round_state.d[0] = *((uint64_t *)vd + H8(i * 2 + 0)); \
205
+ round_state.d[1] = *((uint64_t *)vd + H8(i * 2 + 1)); \
206
+ __VA_ARGS__; \
207
+ *((uint64_t *)vd + H8(i * 2 + 0)) = round_state.d[0]; \
208
+ *((uint64_t *)vd + H8(i * 2 + 1)) = round_state.d[1]; \
209
+ } \
210
+ env->vstart = 0; \
211
+ /* set tail elements to 1s */ \
212
+ vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); \
213
+ }
214
+
215
+GEN_ZVKNED_HELPER_VV(vaesef_vv, aesenc_SB_SR_AK(&round_state,
216
+ &round_state,
217
+ &round_key,
218
+ false);)
219
+GEN_ZVKNED_HELPER_VS(vaesef_vs, aesenc_SB_SR_AK(&round_state,
220
+ &round_state,
221
+ &round_key,
222
+ false);)
223
+GEN_ZVKNED_HELPER_VV(vaesdf_vv, aesdec_ISB_ISR_AK(&round_state,
224
+ &round_state,
225
+ &round_key,
226
+ false);)
227
+GEN_ZVKNED_HELPER_VS(vaesdf_vs, aesdec_ISB_ISR_AK(&round_state,
228
+ &round_state,
229
+ &round_key,
230
+ false);)
231
+GEN_ZVKNED_HELPER_VV(vaesem_vv, aesenc_SB_SR_MC_AK(&round_state,
232
+ &round_state,
233
+ &round_key,
234
+ false);)
235
+GEN_ZVKNED_HELPER_VS(vaesem_vs, aesenc_SB_SR_MC_AK(&round_state,
236
+ &round_state,
237
+ &round_key,
238
+ false);)
239
+GEN_ZVKNED_HELPER_VV(vaesdm_vv, aesdec_ISB_ISR_AK_IMC(&round_state,
240
+ &round_state,
241
+ &round_key,
242
+ false);)
243
+GEN_ZVKNED_HELPER_VS(vaesdm_vs, aesdec_ISB_ISR_AK_IMC(&round_state,
244
+ &round_state,
245
+ &round_key,
246
+ false);)
247
+GEN_ZVKNED_HELPER_VS(vaesz_vs, xor_round_key(&round_state, &round_key);)
248
+
249
+void HELPER(vaeskf1_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm,
250
+ CPURISCVState *env, uint32_t desc)
251
+{
252
+ uint32_t *vd = vd_vptr;
253
+ uint32_t *vs2 = vs2_vptr;
254
+ uint32_t vl = env->vl;
255
+ uint32_t total_elems = vext_get_total_elems(env, desc, 4);
256
+ uint32_t vta = vext_vta(desc);
257
+
258
+ uimm &= 0b1111;
259
+ if (uimm > 10 || uimm == 0) {
260
+ uimm ^= 0b1000;
261
+ }
262
+
263
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
264
+ uint32_t rk[8], tmp;
265
+ static const uint32_t rcon[] = {
266
+ 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010,
267
+ 0x00000020, 0x00000040, 0x00000080, 0x0000001B, 0x00000036,
268
+ };
269
+
270
+ rk[0] = vs2[i * 4 + H4(0)];
271
+ rk[1] = vs2[i * 4 + H4(1)];
272
+ rk[2] = vs2[i * 4 + H4(2)];
273
+ rk[3] = vs2[i * 4 + H4(3)];
274
+ tmp = ror32(rk[3], 8);
275
+
276
+ rk[4] = rk[0] ^ (((uint32_t)AES_sbox[(tmp >> 24) & 0xff] << 24) |
277
+ ((uint32_t)AES_sbox[(tmp >> 16) & 0xff] << 16) |
278
+ ((uint32_t)AES_sbox[(tmp >> 8) & 0xff] << 8) |
279
+ ((uint32_t)AES_sbox[(tmp >> 0) & 0xff] << 0))
280
+ ^ rcon[uimm - 1];
281
+ rk[5] = rk[1] ^ rk[4];
282
+ rk[6] = rk[2] ^ rk[5];
283
+ rk[7] = rk[3] ^ rk[6];
284
+
285
+ vd[i * 4 + H4(0)] = rk[4];
286
+ vd[i * 4 + H4(1)] = rk[5];
287
+ vd[i * 4 + H4(2)] = rk[6];
288
+ vd[i * 4 + H4(3)] = rk[7];
289
+ }
290
+ env->vstart = 0;
291
+ /* set tail elements to 1s */
292
+ vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4);
293
+}
294
+
295
+void HELPER(vaeskf2_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm,
296
+ CPURISCVState *env, uint32_t desc)
297
+{
298
+ uint32_t *vd = vd_vptr;
299
+ uint32_t *vs2 = vs2_vptr;
300
+ uint32_t vl = env->vl;
301
+ uint32_t total_elems = vext_get_total_elems(env, desc, 4);
302
+ uint32_t vta = vext_vta(desc);
303
+
304
+ uimm &= 0b1111;
305
+ if (uimm > 14 || uimm < 2) {
306
+ uimm ^= 0b1000;
307
+ }
308
+
309
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
310
+ uint32_t rk[12], tmp;
311
+ static const uint32_t rcon[] = {
312
+ 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010,
313
+ 0x00000020, 0x00000040, 0x00000080, 0x0000001B, 0x00000036,
314
+ };
315
+
316
+ rk[0] = vd[i * 4 + H4(0)];
317
+ rk[1] = vd[i * 4 + H4(1)];
318
+ rk[2] = vd[i * 4 + H4(2)];
319
+ rk[3] = vd[i * 4 + H4(3)];
320
+ rk[4] = vs2[i * 4 + H4(0)];
321
+ rk[5] = vs2[i * 4 + H4(1)];
322
+ rk[6] = vs2[i * 4 + H4(2)];
323
+ rk[7] = vs2[i * 4 + H4(3)];
324
+
325
+ if (uimm % 2 == 0) {
326
+ tmp = ror32(rk[7], 8);
327
+ rk[8] = rk[0] ^ (((uint32_t)AES_sbox[(tmp >> 24) & 0xff] << 24) |
328
+ ((uint32_t)AES_sbox[(tmp >> 16) & 0xff] << 16) |
329
+ ((uint32_t)AES_sbox[(tmp >> 8) & 0xff] << 8) |
330
+ ((uint32_t)AES_sbox[(tmp >> 0) & 0xff] << 0))
331
+ ^ rcon[(uimm - 1) / 2];
332
+ } else {
333
+ rk[8] = rk[0] ^ (((uint32_t)AES_sbox[(rk[7] >> 24) & 0xff] << 24) |
334
+ ((uint32_t)AES_sbox[(rk[7] >> 16) & 0xff] << 16) |
335
+ ((uint32_t)AES_sbox[(rk[7] >> 8) & 0xff] << 8) |
336
+ ((uint32_t)AES_sbox[(rk[7] >> 0) & 0xff] << 0));
337
+ }
338
+ rk[9] = rk[1] ^ rk[8];
339
+ rk[10] = rk[2] ^ rk[9];
340
+ rk[11] = rk[3] ^ rk[10];
341
+
342
+ vd[i * 4 + H4(0)] = rk[8];
343
+ vd[i * 4 + H4(1)] = rk[9];
344
+ vd[i * 4 + H4(2)] = rk[10];
345
+ vd[i * 4 + H4(3)] = rk[11];
346
+ }
347
+ env->vstart = 0;
348
+ /* set tail elements to 1s */
349
+ vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4);
350
+}
351
diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc
352
index XXXXXXX..XXXXXXX 100644
353
--- a/target/riscv/insn_trans/trans_rvvk.c.inc
354
+++ b/target/riscv/insn_trans/trans_rvvk.c.inc
355
@@ -XXX,XX +XXX,XX @@ static bool vwsll_vx_check(DisasContext *s, arg_rmrr *a)
356
GEN_OPIVV_WIDEN_TRANS(vwsll_vv, vwsll_vv_check)
357
GEN_OPIVX_WIDEN_TRANS(vwsll_vx, vwsll_vx_check)
358
GEN_OPIVI_WIDEN_TRANS(vwsll_vi, IMM_ZX, vwsll_vx, vwsll_vx_check)
637
+
359
+
638
+/*
360
+/*
639
+ * Note that vector data is stored in host-endian 64-bit chunks,
361
+ * Zvkned
640
+ * so addressing units smaller than that needs a host-endian fixup.
641
+ */
362
+ */
642
+#ifdef HOST_WORDS_BIGENDIAN
363
+
643
+#define H1(x) ((x) ^ 7)
364
+#define ZVKNED_EGS 4
644
+#define H1_2(x) ((x) ^ 6)
365
+
645
+#define H1_4(x) ((x) ^ 4)
366
+#define GEN_V_UNMASKED_TRANS(NAME, CHECK, EGS) \
646
+#define H2(x) ((x) ^ 3)
367
+ static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
647
+#define H4(x) ((x) ^ 1)
368
+ { \
648
+#define H8(x) ((x))
369
+ if (CHECK(s, a)) { \
649
+#else
370
+ TCGv_ptr rd_v, rs2_v; \
650
+#define H1(x) (x)
371
+ TCGv_i32 desc, egs; \
651
+#define H1_2(x) (x)
372
+ uint32_t data = 0; \
652
+#define H1_4(x) (x)
373
+ TCGLabel *over = gen_new_label(); \
653
+#define H2(x) (x)
374
+ \
654
+#define H4(x) (x)
375
+ if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { \
655
+#define H8(x) (x)
376
+ /* save opcode for unwinding in case we throw an exception */ \
656
+#endif
377
+ decode_save_opc(s); \
657
+
378
+ egs = tcg_constant_i32(EGS); \
658
+static inline uint32_t vext_nf(uint32_t desc)
379
+ gen_helper_egs_check(egs, cpu_env); \
659
+{
380
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
660
+ return FIELD_EX32(simd_data(desc), VDATA, NF);
381
+ } \
661
+}
382
+ \
662
+
383
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
663
+static inline uint32_t vext_mlen(uint32_t desc)
384
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
664
+{
385
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
665
+ return FIELD_EX32(simd_data(desc), VDATA, MLEN);
386
+ data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \
666
+}
387
+ data = FIELD_DP32(data, VDATA, VMA, s->vma); \
667
+
388
+ rd_v = tcg_temp_new_ptr(); \
668
+static inline uint32_t vext_vm(uint32_t desc)
389
+ rs2_v = tcg_temp_new_ptr(); \
669
+{
390
+ desc = tcg_constant_i32( \
670
+ return FIELD_EX32(simd_data(desc), VDATA, VM);
391
+ simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); \
671
+}
392
+ tcg_gen_addi_ptr(rd_v, cpu_env, vreg_ofs(s, a->rd)); \
672
+
393
+ tcg_gen_addi_ptr(rs2_v, cpu_env, vreg_ofs(s, a->rs2)); \
673
+static inline uint32_t vext_lmul(uint32_t desc)
394
+ gen_helper_##NAME(rd_v, rs2_v, cpu_env, desc); \
674
+{
395
+ mark_vs_dirty(s); \
675
+ return FIELD_EX32(simd_data(desc), VDATA, LMUL);
396
+ gen_set_label(over); \
676
+}
397
+ return true; \
677
+
398
+ } \
678
+/*
399
+ return false; \
679
+ * Get vector group length in bytes. Its range is [64, 2048].
400
+ }
680
+ *
401
+
681
+ * As simd_desc support at most 256, the max vlen is 512 bits.
402
+static bool vaes_check_vv(DisasContext *s, arg_rmr *a)
682
+ * So vlen in bytes is encoded as maxsz.
403
+{
683
+ */
404
+ int egw_bytes = ZVKNED_EGS << s->sew;
684
+static inline uint32_t vext_maxsz(uint32_t desc)
405
+ return s->cfg_ptr->ext_zvkned == true &&
685
+{
406
+ require_rvv(s) &&
686
+ return simd_maxsz(desc) << vext_lmul(desc);
407
+ vext_check_isa_ill(s) &&
687
+}
408
+ MAXSZ(s) >= egw_bytes &&
688
+
409
+ require_align(a->rd, s->lmul) &&
689
+/*
410
+ require_align(a->rs2, s->lmul) &&
690
+ * This function checks watchpoint before real load operation.
411
+ s->sew == MO_32;
691
+ *
412
+}
692
+ * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
413
+
693
+ * In user mode, there is no watchpoint support now.
414
+static bool vaes_check_overlap(DisasContext *s, int vd, int vs2)
694
+ *
415
+{
695
+ * It will trigger an exception if there is no mapping in TLB
416
+ int8_t op_size = s->lmul <= 0 ? 1 : 1 << s->lmul;
696
+ * and page table walk can't fill the TLB entry. Then the guest
417
+ return !is_overlapped(vd, op_size, vs2, 1);
697
+ * software can return here after process the exception or never return.
418
+}
698
+ */
419
+
699
+static void probe_pages(CPURISCVState *env, target_ulong addr,
420
+static bool vaes_check_vs(DisasContext *s, arg_rmr *a)
700
+ target_ulong len, uintptr_t ra,
421
+{
701
+ MMUAccessType access_type)
422
+ int egw_bytes = ZVKNED_EGS << s->sew;
702
+{
423
+ return vaes_check_overlap(s, a->rd, a->rs2) &&
703
+ target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
424
+ MAXSZ(s) >= egw_bytes &&
704
+ target_ulong curlen = MIN(pagelen, len);
425
+ s->cfg_ptr->ext_zvkned == true &&
705
+
426
+ require_rvv(s) &&
706
+ probe_access(env, addr, curlen, access_type,
427
+ vext_check_isa_ill(s) &&
707
+ cpu_mmu_index(env, false), ra);
428
+ require_align(a->rd, s->lmul) &&
708
+ if (len > curlen) {
429
+ s->sew == MO_32;
709
+ addr += curlen;
430
+}
710
+ curlen = len - curlen;
431
+
711
+ probe_access(env, addr, curlen, access_type,
432
+GEN_V_UNMASKED_TRANS(vaesef_vv, vaes_check_vv, ZVKNED_EGS)
712
+ cpu_mmu_index(env, false), ra);
433
+GEN_V_UNMASKED_TRANS(vaesef_vs, vaes_check_vs, ZVKNED_EGS)
713
+ }
434
+GEN_V_UNMASKED_TRANS(vaesdf_vv, vaes_check_vv, ZVKNED_EGS)
714
+}
435
+GEN_V_UNMASKED_TRANS(vaesdf_vs, vaes_check_vs, ZVKNED_EGS)
715
+
436
+GEN_V_UNMASKED_TRANS(vaesdm_vv, vaes_check_vv, ZVKNED_EGS)
716
+#ifdef HOST_WORDS_BIGENDIAN
437
+GEN_V_UNMASKED_TRANS(vaesdm_vs, vaes_check_vs, ZVKNED_EGS)
717
+static void vext_clear(void *tail, uint32_t cnt, uint32_t tot)
438
+GEN_V_UNMASKED_TRANS(vaesz_vs, vaes_check_vs, ZVKNED_EGS)
718
+{
439
+GEN_V_UNMASKED_TRANS(vaesem_vv, vaes_check_vv, ZVKNED_EGS)
719
+ /*
440
+GEN_V_UNMASKED_TRANS(vaesem_vs, vaes_check_vs, ZVKNED_EGS)
720
+ * Split the remaining range to two parts.
441
+
721
+ * The first part is in the last uint64_t unit.
442
+#define GEN_VI_UNMASKED_TRANS(NAME, CHECK, EGS) \
722
+ * The second part start from the next uint64_t unit.
443
+ static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
723
+ */
444
+ { \
724
+ int part1 = 0, part2 = tot - cnt;
445
+ if (CHECK(s, a)) { \
725
+ if (cnt % 8) {
446
+ TCGv_ptr rd_v, rs2_v; \
726
+ part1 = 8 - (cnt % 8);
447
+ TCGv_i32 uimm_v, desc, egs; \
727
+ part2 = tot - cnt - part1;
448
+ uint32_t data = 0; \
728
+ memset(tail & ~(7ULL), 0, part1);
449
+ TCGLabel *over = gen_new_label(); \
729
+ memset((tail + 8) & ~(7ULL), 0, part2);
450
+ \
730
+ } else {
451
+ if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { \
731
+ memset(tail, 0, part2);
452
+ /* save opcode for unwinding in case we throw an exception */ \
732
+ }
453
+ decode_save_opc(s); \
733
+}
454
+ egs = tcg_constant_i32(EGS); \
734
+#else
455
+ gen_helper_egs_check(egs, cpu_env); \
735
+static void vext_clear(void *tail, uint32_t cnt, uint32_t tot)
456
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
736
+{
457
+ } \
737
+ memset(tail, 0, tot - cnt);
458
+ \
738
+}
459
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
739
+#endif
460
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
740
+
461
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
741
+static void clearb(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
462
+ data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \
742
+{
463
+ data = FIELD_DP32(data, VDATA, VMA, s->vma); \
743
+ int8_t *cur = ((int8_t *)vd + H1(idx));
464
+ \
744
+ vext_clear(cur, cnt, tot);
465
+ rd_v = tcg_temp_new_ptr(); \
745
+}
466
+ rs2_v = tcg_temp_new_ptr(); \
746
+
467
+ uimm_v = tcg_constant_i32(a->rs1); \
747
+static void clearh(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
468
+ desc = tcg_constant_i32( \
748
+{
469
+ simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); \
749
+ int16_t *cur = ((int16_t *)vd + H2(idx));
470
+ tcg_gen_addi_ptr(rd_v, cpu_env, vreg_ofs(s, a->rd)); \
750
+ vext_clear(cur, cnt, tot);
471
+ tcg_gen_addi_ptr(rs2_v, cpu_env, vreg_ofs(s, a->rs2)); \
751
+}
472
+ gen_helper_##NAME(rd_v, rs2_v, uimm_v, cpu_env, desc); \
752
+
473
+ mark_vs_dirty(s); \
753
+static void clearl(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
474
+ gen_set_label(over); \
754
+{
475
+ return true; \
755
+ int32_t *cur = ((int32_t *)vd + H4(idx));
476
+ } \
756
+ vext_clear(cur, cnt, tot);
477
+ return false; \
757
+}
478
+ }
758
+
479
+
759
+static void clearq(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
480
+static bool vaeskf1_check(DisasContext *s, arg_vaeskf1_vi *a)
760
+{
481
+{
761
+ int64_t *cur = (int64_t *)vd + idx;
482
+ int egw_bytes = ZVKNED_EGS << s->sew;
762
+ vext_clear(cur, cnt, tot);
483
+ return s->cfg_ptr->ext_zvkned == true &&
763
+}
484
+ require_rvv(s) &&
764
+
485
+ vext_check_isa_ill(s) &&
765
+
486
+ MAXSZ(s) >= egw_bytes &&
766
+static inline int vext_elem_mask(void *v0, int mlen, int index)
487
+ s->sew == MO_32 &&
767
+{
488
+ require_align(a->rd, s->lmul) &&
768
+ int idx = (index * mlen) / 64;
489
+ require_align(a->rs2, s->lmul);
769
+ int pos = (index * mlen) % 64;
490
+}
770
+ return (((uint64_t *)v0)[idx] >> pos) & 1;
491
+
771
+}
492
+static bool vaeskf2_check(DisasContext *s, arg_vaeskf2_vi *a)
772
+
493
+{
773
+/* elements operations for load and store */
494
+ int egw_bytes = ZVKNED_EGS << s->sew;
774
+typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
495
+ return s->cfg_ptr->ext_zvkned == true &&
775
+ uint32_t idx, void *vd, uintptr_t retaddr);
496
+ require_rvv(s) &&
776
+typedef void clear_fn(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot);
497
+ vext_check_isa_ill(s) &&
777
+
498
+ MAXSZ(s) >= egw_bytes &&
778
+#define GEN_VEXT_LD_ELEM(NAME, MTYPE, ETYPE, H, LDSUF) \
499
+ s->sew == MO_32 &&
779
+static void NAME(CPURISCVState *env, abi_ptr addr, \
500
+ require_align(a->rd, s->lmul) &&
780
+ uint32_t idx, void *vd, uintptr_t retaddr)\
501
+ require_align(a->rs2, s->lmul);
781
+{ \
502
+}
782
+ MTYPE data; \
503
+
783
+ ETYPE *cur = ((ETYPE *)vd + H(idx)); \
504
+GEN_VI_UNMASKED_TRANS(vaeskf1_vi, vaeskf1_check, ZVKNED_EGS)
784
+ data = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
505
+GEN_VI_UNMASKED_TRANS(vaeskf2_vi, vaeskf2_check, ZVKNED_EGS)
785
+ *cur = data; \
786
+} \
787
+
788
+GEN_VEXT_LD_ELEM(ldb_b, int8_t, int8_t, H1, ldsb)
789
+GEN_VEXT_LD_ELEM(ldb_h, int8_t, int16_t, H2, ldsb)
790
+GEN_VEXT_LD_ELEM(ldb_w, int8_t, int32_t, H4, ldsb)
791
+GEN_VEXT_LD_ELEM(ldb_d, int8_t, int64_t, H8, ldsb)
792
+GEN_VEXT_LD_ELEM(ldh_h, int16_t, int16_t, H2, ldsw)
793
+GEN_VEXT_LD_ELEM(ldh_w, int16_t, int32_t, H4, ldsw)
794
+GEN_VEXT_LD_ELEM(ldh_d, int16_t, int64_t, H8, ldsw)
795
+GEN_VEXT_LD_ELEM(ldw_w, int32_t, int32_t, H4, ldl)
796
+GEN_VEXT_LD_ELEM(ldw_d, int32_t, int64_t, H8, ldl)
797
+GEN_VEXT_LD_ELEM(lde_b, int8_t, int8_t, H1, ldsb)
798
+GEN_VEXT_LD_ELEM(lde_h, int16_t, int16_t, H2, ldsw)
799
+GEN_VEXT_LD_ELEM(lde_w, int32_t, int32_t, H4, ldl)
800
+GEN_VEXT_LD_ELEM(lde_d, int64_t, int64_t, H8, ldq)
801
+GEN_VEXT_LD_ELEM(ldbu_b, uint8_t, uint8_t, H1, ldub)
802
+GEN_VEXT_LD_ELEM(ldbu_h, uint8_t, uint16_t, H2, ldub)
803
+GEN_VEXT_LD_ELEM(ldbu_w, uint8_t, uint32_t, H4, ldub)
804
+GEN_VEXT_LD_ELEM(ldbu_d, uint8_t, uint64_t, H8, ldub)
805
+GEN_VEXT_LD_ELEM(ldhu_h, uint16_t, uint16_t, H2, lduw)
806
+GEN_VEXT_LD_ELEM(ldhu_w, uint16_t, uint32_t, H4, lduw)
807
+GEN_VEXT_LD_ELEM(ldhu_d, uint16_t, uint64_t, H8, lduw)
808
+GEN_VEXT_LD_ELEM(ldwu_w, uint32_t, uint32_t, H4, ldl)
809
+GEN_VEXT_LD_ELEM(ldwu_d, uint32_t, uint64_t, H8, ldl)
810
+
811
+#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
812
+static void NAME(CPURISCVState *env, abi_ptr addr, \
813
+ uint32_t idx, void *vd, uintptr_t retaddr)\
814
+{ \
815
+ ETYPE data = *((ETYPE *)vd + H(idx)); \
816
+ cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
817
+}
818
+
819
+GEN_VEXT_ST_ELEM(stb_b, int8_t, H1, stb)
820
+GEN_VEXT_ST_ELEM(stb_h, int16_t, H2, stb)
821
+GEN_VEXT_ST_ELEM(stb_w, int32_t, H4, stb)
822
+GEN_VEXT_ST_ELEM(stb_d, int64_t, H8, stb)
823
+GEN_VEXT_ST_ELEM(sth_h, int16_t, H2, stw)
824
+GEN_VEXT_ST_ELEM(sth_w, int32_t, H4, stw)
825
+GEN_VEXT_ST_ELEM(sth_d, int64_t, H8, stw)
826
+GEN_VEXT_ST_ELEM(stw_w, int32_t, H4, stl)
827
+GEN_VEXT_ST_ELEM(stw_d, int64_t, H8, stl)
828
+GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
829
+GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
830
+GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
831
+GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
832
+
833
+/*
834
+ *** stride: access vector element from strided memory
835
+ */
836
+static void
837
+vext_ldst_stride(void *vd, void *v0, target_ulong base,
838
+ target_ulong stride, CPURISCVState *env,
839
+ uint32_t desc, uint32_t vm,
840
+ vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem,
841
+ uint32_t esz, uint32_t msz, uintptr_t ra,
842
+ MMUAccessType access_type)
843
+{
844
+ uint32_t i, k;
845
+ uint32_t nf = vext_nf(desc);
846
+ uint32_t mlen = vext_mlen(desc);
847
+ uint32_t vlmax = vext_maxsz(desc) / esz;
848
+
849
+ /* probe every access*/
850
+ for (i = 0; i < env->vl; i++) {
851
+ if (!vm && !vext_elem_mask(v0, mlen, i)) {
852
+ continue;
853
+ }
854
+ probe_pages(env, base + stride * i, nf * msz, ra, access_type);
855
+ }
856
+ /* do real access */
857
+ for (i = 0; i < env->vl; i++) {
858
+ k = 0;
859
+ if (!vm && !vext_elem_mask(v0, mlen, i)) {
860
+ continue;
861
+ }
862
+ while (k < nf) {
863
+ target_ulong addr = base + stride * i + k * msz;
864
+ ldst_elem(env, addr, i + k * vlmax, vd, ra);
865
+ k++;
866
+ }
867
+ }
868
+ /* clear tail elements */
869
+ if (clear_elem) {
870
+ for (k = 0; k < nf; k++) {
871
+ clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
872
+ }
873
+ }
874
+}
875
+
876
+#define GEN_VEXT_LD_STRIDE(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \
877
+void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
878
+ target_ulong stride, CPURISCVState *env, \
879
+ uint32_t desc) \
880
+{ \
881
+ uint32_t vm = vext_vm(desc); \
882
+ vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
883
+ CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \
884
+ GETPC(), MMU_DATA_LOAD); \
885
+}
886
+
887
+GEN_VEXT_LD_STRIDE(vlsb_v_b, int8_t, int8_t, ldb_b, clearb)
888
+GEN_VEXT_LD_STRIDE(vlsb_v_h, int8_t, int16_t, ldb_h, clearh)
889
+GEN_VEXT_LD_STRIDE(vlsb_v_w, int8_t, int32_t, ldb_w, clearl)
890
+GEN_VEXT_LD_STRIDE(vlsb_v_d, int8_t, int64_t, ldb_d, clearq)
891
+GEN_VEXT_LD_STRIDE(vlsh_v_h, int16_t, int16_t, ldh_h, clearh)
892
+GEN_VEXT_LD_STRIDE(vlsh_v_w, int16_t, int32_t, ldh_w, clearl)
893
+GEN_VEXT_LD_STRIDE(vlsh_v_d, int16_t, int64_t, ldh_d, clearq)
894
+GEN_VEXT_LD_STRIDE(vlsw_v_w, int32_t, int32_t, ldw_w, clearl)
895
+GEN_VEXT_LD_STRIDE(vlsw_v_d, int32_t, int64_t, ldw_d, clearq)
896
+GEN_VEXT_LD_STRIDE(vlse_v_b, int8_t, int8_t, lde_b, clearb)
897
+GEN_VEXT_LD_STRIDE(vlse_v_h, int16_t, int16_t, lde_h, clearh)
898
+GEN_VEXT_LD_STRIDE(vlse_v_w, int32_t, int32_t, lde_w, clearl)
899
+GEN_VEXT_LD_STRIDE(vlse_v_d, int64_t, int64_t, lde_d, clearq)
900
+GEN_VEXT_LD_STRIDE(vlsbu_v_b, uint8_t, uint8_t, ldbu_b, clearb)
901
+GEN_VEXT_LD_STRIDE(vlsbu_v_h, uint8_t, uint16_t, ldbu_h, clearh)
902
+GEN_VEXT_LD_STRIDE(vlsbu_v_w, uint8_t, uint32_t, ldbu_w, clearl)
903
+GEN_VEXT_LD_STRIDE(vlsbu_v_d, uint8_t, uint64_t, ldbu_d, clearq)
904
+GEN_VEXT_LD_STRIDE(vlshu_v_h, uint16_t, uint16_t, ldhu_h, clearh)
905
+GEN_VEXT_LD_STRIDE(vlshu_v_w, uint16_t, uint32_t, ldhu_w, clearl)
906
+GEN_VEXT_LD_STRIDE(vlshu_v_d, uint16_t, uint64_t, ldhu_d, clearq)
907
+GEN_VEXT_LD_STRIDE(vlswu_v_w, uint32_t, uint32_t, ldwu_w, clearl)
908
+GEN_VEXT_LD_STRIDE(vlswu_v_d, uint32_t, uint64_t, ldwu_d, clearq)
909
+
910
+#define GEN_VEXT_ST_STRIDE(NAME, MTYPE, ETYPE, STORE_FN) \
911
+void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
912
+ target_ulong stride, CPURISCVState *env, \
913
+ uint32_t desc) \
914
+{ \
915
+ uint32_t vm = vext_vm(desc); \
916
+ vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
917
+ NULL, sizeof(ETYPE), sizeof(MTYPE), \
918
+ GETPC(), MMU_DATA_STORE); \
919
+}
920
+
921
+GEN_VEXT_ST_STRIDE(vssb_v_b, int8_t, int8_t, stb_b)
922
+GEN_VEXT_ST_STRIDE(vssb_v_h, int8_t, int16_t, stb_h)
923
+GEN_VEXT_ST_STRIDE(vssb_v_w, int8_t, int32_t, stb_w)
924
+GEN_VEXT_ST_STRIDE(vssb_v_d, int8_t, int64_t, stb_d)
925
+GEN_VEXT_ST_STRIDE(vssh_v_h, int16_t, int16_t, sth_h)
926
+GEN_VEXT_ST_STRIDE(vssh_v_w, int16_t, int32_t, sth_w)
927
+GEN_VEXT_ST_STRIDE(vssh_v_d, int16_t, int64_t, sth_d)
928
+GEN_VEXT_ST_STRIDE(vssw_v_w, int32_t, int32_t, stw_w)
929
+GEN_VEXT_ST_STRIDE(vssw_v_d, int32_t, int64_t, stw_d)
930
+GEN_VEXT_ST_STRIDE(vsse_v_b, int8_t, int8_t, ste_b)
931
+GEN_VEXT_ST_STRIDE(vsse_v_h, int16_t, int16_t, ste_h)
932
+GEN_VEXT_ST_STRIDE(vsse_v_w, int32_t, int32_t, ste_w)
933
+GEN_VEXT_ST_STRIDE(vsse_v_d, int64_t, int64_t, ste_d)
934
+
935
+/*
936
+ *** unit-stride: access elements stored contiguously in memory
937
+ */
938
+
939
+/* unmasked unit-stride load and store operation*/
940
+static void
941
+vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
942
+ vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem,
943
+ uint32_t esz, uint32_t msz, uintptr_t ra,
944
+ MMUAccessType access_type)
945
+{
946
+ uint32_t i, k;
947
+ uint32_t nf = vext_nf(desc);
948
+ uint32_t vlmax = vext_maxsz(desc) / esz;
949
+
950
+ /* probe every access */
951
+ probe_pages(env, base, env->vl * nf * msz, ra, access_type);
952
+ /* load bytes from guest memory */
953
+ for (i = 0; i < env->vl; i++) {
954
+ k = 0;
955
+ while (k < nf) {
956
+ target_ulong addr = base + (i * nf + k) * msz;
957
+ ldst_elem(env, addr, i + k * vlmax, vd, ra);
958
+ k++;
959
+ }
960
+ }
961
+ /* clear tail elements */
962
+ if (clear_elem) {
963
+ for (k = 0; k < nf; k++) {
964
+ clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
965
+ }
966
+ }
967
+}
968
+
969
+/*
970
+ * masked unit-stride load and store operation will be a special case of stride,
971
+ * stride = NF * sizeof (MTYPE)
972
+ */
973
+
974
+#define GEN_VEXT_LD_US(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \
975
+void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
976
+ CPURISCVState *env, uint32_t desc) \
977
+{ \
978
+ uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \
979
+ vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
980
+ CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \
981
+ GETPC(), MMU_DATA_LOAD); \
982
+} \
983
+ \
984
+void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
985
+ CPURISCVState *env, uint32_t desc) \
986
+{ \
987
+ vext_ldst_us(vd, base, env, desc, LOAD_FN, CLEAR_FN, \
988
+ sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_LOAD); \
989
+}
990
+
991
+GEN_VEXT_LD_US(vlb_v_b, int8_t, int8_t, ldb_b, clearb)
992
+GEN_VEXT_LD_US(vlb_v_h, int8_t, int16_t, ldb_h, clearh)
993
+GEN_VEXT_LD_US(vlb_v_w, int8_t, int32_t, ldb_w, clearl)
994
+GEN_VEXT_LD_US(vlb_v_d, int8_t, int64_t, ldb_d, clearq)
995
+GEN_VEXT_LD_US(vlh_v_h, int16_t, int16_t, ldh_h, clearh)
996
+GEN_VEXT_LD_US(vlh_v_w, int16_t, int32_t, ldh_w, clearl)
997
+GEN_VEXT_LD_US(vlh_v_d, int16_t, int64_t, ldh_d, clearq)
998
+GEN_VEXT_LD_US(vlw_v_w, int32_t, int32_t, ldw_w, clearl)
999
+GEN_VEXT_LD_US(vlw_v_d, int32_t, int64_t, ldw_d, clearq)
1000
+GEN_VEXT_LD_US(vle_v_b, int8_t, int8_t, lde_b, clearb)
1001
+GEN_VEXT_LD_US(vle_v_h, int16_t, int16_t, lde_h, clearh)
1002
+GEN_VEXT_LD_US(vle_v_w, int32_t, int32_t, lde_w, clearl)
1003
+GEN_VEXT_LD_US(vle_v_d, int64_t, int64_t, lde_d, clearq)
1004
+GEN_VEXT_LD_US(vlbu_v_b, uint8_t, uint8_t, ldbu_b, clearb)
1005
+GEN_VEXT_LD_US(vlbu_v_h, uint8_t, uint16_t, ldbu_h, clearh)
1006
+GEN_VEXT_LD_US(vlbu_v_w, uint8_t, uint32_t, ldbu_w, clearl)
1007
+GEN_VEXT_LD_US(vlbu_v_d, uint8_t, uint64_t, ldbu_d, clearq)
1008
+GEN_VEXT_LD_US(vlhu_v_h, uint16_t, uint16_t, ldhu_h, clearh)
1009
+GEN_VEXT_LD_US(vlhu_v_w, uint16_t, uint32_t, ldhu_w, clearl)
1010
+GEN_VEXT_LD_US(vlhu_v_d, uint16_t, uint64_t, ldhu_d, clearq)
1011
+GEN_VEXT_LD_US(vlwu_v_w, uint32_t, uint32_t, ldwu_w, clearl)
1012
+GEN_VEXT_LD_US(vlwu_v_d, uint32_t, uint64_t, ldwu_d, clearq)
1013
+
1014
+#define GEN_VEXT_ST_US(NAME, MTYPE, ETYPE, STORE_FN) \
1015
+void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
1016
+ CPURISCVState *env, uint32_t desc) \
1017
+{ \
1018
+ uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \
1019
+ vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
1020
+ NULL, sizeof(ETYPE), sizeof(MTYPE), \
1021
+ GETPC(), MMU_DATA_STORE); \
1022
+} \
1023
+ \
1024
+void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
1025
+ CPURISCVState *env, uint32_t desc) \
1026
+{ \
1027
+ vext_ldst_us(vd, base, env, desc, STORE_FN, NULL, \
1028
+ sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_STORE);\
1029
+}
1030
+
1031
+GEN_VEXT_ST_US(vsb_v_b, int8_t, int8_t , stb_b)
1032
+GEN_VEXT_ST_US(vsb_v_h, int8_t, int16_t, stb_h)
1033
+GEN_VEXT_ST_US(vsb_v_w, int8_t, int32_t, stb_w)
1034
+GEN_VEXT_ST_US(vsb_v_d, int8_t, int64_t, stb_d)
1035
+GEN_VEXT_ST_US(vsh_v_h, int16_t, int16_t, sth_h)
1036
+GEN_VEXT_ST_US(vsh_v_w, int16_t, int32_t, sth_w)
1037
+GEN_VEXT_ST_US(vsh_v_d, int16_t, int64_t, sth_d)
1038
+GEN_VEXT_ST_US(vsw_v_w, int32_t, int32_t, stw_w)
1039
+GEN_VEXT_ST_US(vsw_v_d, int32_t, int64_t, stw_d)
1040
+GEN_VEXT_ST_US(vse_v_b, int8_t, int8_t , ste_b)
1041
+GEN_VEXT_ST_US(vse_v_h, int16_t, int16_t, ste_h)
1042
+GEN_VEXT_ST_US(vse_v_w, int32_t, int32_t, ste_w)
1043
+GEN_VEXT_ST_US(vse_v_d, int64_t, int64_t, ste_d)
1044
--
506
--
1045
2.27.0
507
2.41.0
1046
1047
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
This commit adds support for the Zvknh vector-crypto extension, which
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
consists of the following instructions:
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
6
Message-id: 20200623215920.2594-12-zhiwei_liu@c-sky.com
6
* vsha2ms.vv
7
* vsha2c[hl].vv
8
9
Translation functions are defined in
10
`target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in
11
`target/riscv/vcrypto_helper.c`.
12
13
Co-authored-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
14
Co-authored-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
15
[max.chou@sifive.com: Replaced vstart checking by TCG op]
16
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
17
Signed-off-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
18
Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
19
Signed-off-by: Max Chou <max.chou@sifive.com>
20
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
21
[max.chou@sifive.com: Exposed x-zvknha & x-zvknhb properties]
22
[max.chou@sifive.com: Replaced SEW selection to happened during
23
translation]
24
Message-ID: <20230711165917.2629866-11-max.chou@sifive.com>
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
25
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
26
---
9
target/riscv/helper.h | 49 +++++++
27
target/riscv/cpu_cfg.h | 2 +
10
target/riscv/insn32.decode | 16 ++
28
target/riscv/helper.h | 6 +
11
target/riscv/insn_trans/trans_rvv.inc.c | 186 ++++++++++++++++++++++++
29
target/riscv/insn32.decode | 5 +
12
target/riscv/vector_helper.c | 111 ++++++++++++++
30
target/riscv/cpu.c | 13 +-
13
4 files changed, 362 insertions(+)
31
target/riscv/vcrypto_helper.c | 238 +++++++++++++++++++++++
32
target/riscv/insn_trans/trans_rvvk.c.inc | 129 ++++++++++++
33
6 files changed, 390 insertions(+), 3 deletions(-)
14
34
35
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
36
index XXXXXXX..XXXXXXX 100644
37
--- a/target/riscv/cpu_cfg.h
38
+++ b/target/riscv/cpu_cfg.h
39
@@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig {
40
bool ext_zvbb;
41
bool ext_zvbc;
42
bool ext_zvkned;
43
+ bool ext_zvknha;
44
+ bool ext_zvknhb;
45
bool ext_zmmul;
46
bool ext_zvfbfmin;
47
bool ext_zvfbfwma;
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
48
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
index XXXXXXX..XXXXXXX 100644
49
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
50
--- a/target/riscv/helper.h
18
+++ b/target/riscv/helper.h
51
+++ b/target/riscv/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vec_rsubs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
52
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vaesdm_vs, void, ptr, ptr, env, i32)
20
DEF_HELPER_FLAGS_4(vec_rsubs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
53
DEF_HELPER_4(vaesz_vs, void, ptr, ptr, env, i32)
21
DEF_HELPER_FLAGS_4(vec_rsubs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
54
DEF_HELPER_5(vaeskf1_vi, void, ptr, ptr, i32, env, i32)
22
DEF_HELPER_FLAGS_4(vec_rsubs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
55
DEF_HELPER_5(vaeskf2_vi, void, ptr, ptr, i32, env, i32)
23
+
56
+
24
+DEF_HELPER_6(vwaddu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
57
+DEF_HELPER_5(vsha2ms_vv, void, ptr, ptr, ptr, env, i32)
25
+DEF_HELPER_6(vwaddu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
58
+DEF_HELPER_5(vsha2ch32_vv, void, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vwaddu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
59
+DEF_HELPER_5(vsha2ch64_vv, void, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vwsubu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
60
+DEF_HELPER_5(vsha2cl32_vv, void, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vwsubu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
61
+DEF_HELPER_5(vsha2cl64_vv, void, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vwsubu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vwadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_6(vwadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_6(vwadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
33
+DEF_HELPER_6(vwsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
34
+DEF_HELPER_6(vwsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
35
+DEF_HELPER_6(vwsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
36
+DEF_HELPER_6(vwaddu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
37
+DEF_HELPER_6(vwaddu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
38
+DEF_HELPER_6(vwaddu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
39
+DEF_HELPER_6(vwsubu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
40
+DEF_HELPER_6(vwsubu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
41
+DEF_HELPER_6(vwsubu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
42
+DEF_HELPER_6(vwadd_vx_b, void, ptr, ptr, tl, ptr, env, i32)
43
+DEF_HELPER_6(vwadd_vx_h, void, ptr, ptr, tl, ptr, env, i32)
44
+DEF_HELPER_6(vwadd_vx_w, void, ptr, ptr, tl, ptr, env, i32)
45
+DEF_HELPER_6(vwsub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
46
+DEF_HELPER_6(vwsub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
47
+DEF_HELPER_6(vwsub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
48
+DEF_HELPER_6(vwaddu_wv_b, void, ptr, ptr, ptr, ptr, env, i32)
49
+DEF_HELPER_6(vwaddu_wv_h, void, ptr, ptr, ptr, ptr, env, i32)
50
+DEF_HELPER_6(vwaddu_wv_w, void, ptr, ptr, ptr, ptr, env, i32)
51
+DEF_HELPER_6(vwsubu_wv_b, void, ptr, ptr, ptr, ptr, env, i32)
52
+DEF_HELPER_6(vwsubu_wv_h, void, ptr, ptr, ptr, ptr, env, i32)
53
+DEF_HELPER_6(vwsubu_wv_w, void, ptr, ptr, ptr, ptr, env, i32)
54
+DEF_HELPER_6(vwadd_wv_b, void, ptr, ptr, ptr, ptr, env, i32)
55
+DEF_HELPER_6(vwadd_wv_h, void, ptr, ptr, ptr, ptr, env, i32)
56
+DEF_HELPER_6(vwadd_wv_w, void, ptr, ptr, ptr, ptr, env, i32)
57
+DEF_HELPER_6(vwsub_wv_b, void, ptr, ptr, ptr, ptr, env, i32)
58
+DEF_HELPER_6(vwsub_wv_h, void, ptr, ptr, ptr, ptr, env, i32)
59
+DEF_HELPER_6(vwsub_wv_w, void, ptr, ptr, ptr, ptr, env, i32)
60
+DEF_HELPER_6(vwaddu_wx_b, void, ptr, ptr, tl, ptr, env, i32)
61
+DEF_HELPER_6(vwaddu_wx_h, void, ptr, ptr, tl, ptr, env, i32)
62
+DEF_HELPER_6(vwaddu_wx_w, void, ptr, ptr, tl, ptr, env, i32)
63
+DEF_HELPER_6(vwsubu_wx_b, void, ptr, ptr, tl, ptr, env, i32)
64
+DEF_HELPER_6(vwsubu_wx_h, void, ptr, ptr, tl, ptr, env, i32)
65
+DEF_HELPER_6(vwsubu_wx_w, void, ptr, ptr, tl, ptr, env, i32)
66
+DEF_HELPER_6(vwadd_wx_b, void, ptr, ptr, tl, ptr, env, i32)
67
+DEF_HELPER_6(vwadd_wx_h, void, ptr, ptr, tl, ptr, env, i32)
68
+DEF_HELPER_6(vwadd_wx_w, void, ptr, ptr, tl, ptr, env, i32)
69
+DEF_HELPER_6(vwsub_wx_b, void, ptr, ptr, tl, ptr, env, i32)
70
+DEF_HELPER_6(vwsub_wx_h, void, ptr, ptr, tl, ptr, env, i32)
71
+DEF_HELPER_6(vwsub_wx_w, void, ptr, ptr, tl, ptr, env, i32)
72
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
62
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
73
index XXXXXXX..XXXXXXX 100644
63
index XXXXXXX..XXXXXXX 100644
74
--- a/target/riscv/insn32.decode
64
--- a/target/riscv/insn32.decode
75
+++ b/target/riscv/insn32.decode
65
+++ b/target/riscv/insn32.decode
76
@@ -XXX,XX +XXX,XX @@ vsub_vv 000010 . ..... ..... 000 ..... 1010111 @r_vm
66
@@ -XXX,XX +XXX,XX @@ vaesdm_vs 101001 1 ..... 00000 010 ..... 1110111 @r2_vm_1
77
vsub_vx 000010 . ..... ..... 100 ..... 1010111 @r_vm
67
vaesz_vs 101001 1 ..... 00111 010 ..... 1110111 @r2_vm_1
78
vrsub_vx 000011 . ..... ..... 100 ..... 1010111 @r_vm
68
vaeskf1_vi 100010 1 ..... ..... 010 ..... 1110111 @r_vm_1
79
vrsub_vi 000011 . ..... ..... 011 ..... 1010111 @r_vm
69
vaeskf2_vi 101010 1 ..... ..... 010 ..... 1110111 @r_vm_1
80
+vwaddu_vv 110000 . ..... ..... 010 ..... 1010111 @r_vm
70
+
81
+vwaddu_vx 110000 . ..... ..... 110 ..... 1010111 @r_vm
71
+# *** Zvknh vector crypto extension ***
82
+vwadd_vv 110001 . ..... ..... 010 ..... 1010111 @r_vm
72
+vsha2ms_vv 101101 1 ..... ..... 010 ..... 1110111 @r_vm_1
83
+vwadd_vx 110001 . ..... ..... 110 ..... 1010111 @r_vm
73
+vsha2ch_vv 101110 1 ..... ..... 010 ..... 1110111 @r_vm_1
84
+vwsubu_vv 110010 . ..... ..... 010 ..... 1010111 @r_vm
74
+vsha2cl_vv 101111 1 ..... ..... 010 ..... 1110111 @r_vm_1
85
+vwsubu_vx 110010 . ..... ..... 110 ..... 1010111 @r_vm
75
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
86
+vwsub_vv 110011 . ..... ..... 010 ..... 1010111 @r_vm
87
+vwsub_vx 110011 . ..... ..... 110 ..... 1010111 @r_vm
88
+vwaddu_wv 110100 . ..... ..... 010 ..... 1010111 @r_vm
89
+vwaddu_wx 110100 . ..... ..... 110 ..... 1010111 @r_vm
90
+vwadd_wv 110101 . ..... ..... 010 ..... 1010111 @r_vm
91
+vwadd_wx 110101 . ..... ..... 110 ..... 1010111 @r_vm
92
+vwsubu_wv 110110 . ..... ..... 010 ..... 1010111 @r_vm
93
+vwsubu_wx 110110 . ..... ..... 110 ..... 1010111 @r_vm
94
+vwsub_wv 110111 . ..... ..... 010 ..... 1010111 @r_vm
95
+vwsub_wx 110111 . ..... ..... 110 ..... 1010111 @r_vm
96
97
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
98
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
99
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
100
index XXXXXXX..XXXXXXX 100644
76
index XXXXXXX..XXXXXXX 100644
101
--- a/target/riscv/insn_trans/trans_rvv.inc.c
77
--- a/target/riscv/cpu.c
102
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
78
+++ b/target/riscv/cpu.c
103
@@ -XXX,XX +XXX,XX @@ static bool vext_check_nf(DisasContext *s, uint32_t nf)
79
@@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = {
104
return (1 << s->lmul) * nf <= 8;
80
ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh),
81
ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin),
82
ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned),
83
+ ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha),
84
+ ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb),
85
ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx),
86
ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin),
87
ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia),
88
@@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
89
* In principle Zve*x would also suffice here, were they supported
90
* in qemu
91
*/
92
- if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned) && !cpu->cfg.ext_zve32f) {
93
+ if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha) &&
94
+ !cpu->cfg.ext_zve32f) {
95
error_setg(errp,
96
"Vector crypto extensions require V or Zve* extensions");
97
return;
98
}
99
100
- if (cpu->cfg.ext_zvbc && !cpu->cfg.ext_zve64f) {
101
- error_setg(errp, "Zvbc extension requires V or Zve64{f,d} extensions");
102
+ if ((cpu->cfg.ext_zvbc || cpu->cfg.ext_zvknhb) && !cpu->cfg.ext_zve64f) {
103
+ error_setg(
104
+ errp,
105
+ "Zvbc and Zvknhb extensions require V or Zve64{f,d} extensions");
106
return;
107
}
108
109
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = {
110
DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false),
111
DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false),
112
DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false),
113
+ DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false),
114
+ DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false),
115
116
DEFINE_PROP_END_OF_LIST(),
117
};
118
diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c
119
index XXXXXXX..XXXXXXX 100644
120
--- a/target/riscv/vcrypto_helper.c
121
+++ b/target/riscv/vcrypto_helper.c
122
@@ -XXX,XX +XXX,XX @@ void HELPER(vaeskf2_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm,
123
/* set tail elements to 1s */
124
vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4);
105
}
125
}
106
126
+
127
+static inline uint32_t sig0_sha256(uint32_t x)
128
+{
129
+ return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
130
+}
131
+
132
+static inline uint32_t sig1_sha256(uint32_t x)
133
+{
134
+ return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
135
+}
136
+
137
+static inline uint64_t sig0_sha512(uint64_t x)
138
+{
139
+ return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
140
+}
141
+
142
+static inline uint64_t sig1_sha512(uint64_t x)
143
+{
144
+ return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
145
+}
146
+
147
+static inline void vsha2ms_e32(uint32_t *vd, uint32_t *vs1, uint32_t *vs2)
148
+{
149
+ uint32_t res[4];
150
+ res[0] = sig1_sha256(vs1[H4(2)]) + vs2[H4(1)] + sig0_sha256(vd[H4(1)]) +
151
+ vd[H4(0)];
152
+ res[1] = sig1_sha256(vs1[H4(3)]) + vs2[H4(2)] + sig0_sha256(vd[H4(2)]) +
153
+ vd[H4(1)];
154
+ res[2] =
155
+ sig1_sha256(res[0]) + vs2[H4(3)] + sig0_sha256(vd[H4(3)]) + vd[H4(2)];
156
+ res[3] =
157
+ sig1_sha256(res[1]) + vs1[H4(0)] + sig0_sha256(vs2[H4(0)]) + vd[H4(3)];
158
+ vd[H4(3)] = res[3];
159
+ vd[H4(2)] = res[2];
160
+ vd[H4(1)] = res[1];
161
+ vd[H4(0)] = res[0];
162
+}
163
+
164
+static inline void vsha2ms_e64(uint64_t *vd, uint64_t *vs1, uint64_t *vs2)
165
+{
166
+ uint64_t res[4];
167
+ res[0] = sig1_sha512(vs1[2]) + vs2[1] + sig0_sha512(vd[1]) + vd[0];
168
+ res[1] = sig1_sha512(vs1[3]) + vs2[2] + sig0_sha512(vd[2]) + vd[1];
169
+ res[2] = sig1_sha512(res[0]) + vs2[3] + sig0_sha512(vd[3]) + vd[2];
170
+ res[3] = sig1_sha512(res[1]) + vs1[0] + sig0_sha512(vs2[0]) + vd[3];
171
+ vd[3] = res[3];
172
+ vd[2] = res[2];
173
+ vd[1] = res[1];
174
+ vd[0] = res[0];
175
+}
176
+
177
+void HELPER(vsha2ms_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
178
+ uint32_t desc)
179
+{
180
+ uint32_t sew = FIELD_EX64(env->vtype, VTYPE, VSEW);
181
+ uint32_t esz = sew == MO_32 ? 4 : 8;
182
+ uint32_t total_elems;
183
+ uint32_t vta = vext_vta(desc);
184
+
185
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
186
+ if (sew == MO_32) {
187
+ vsha2ms_e32(((uint32_t *)vd) + i * 4, ((uint32_t *)vs1) + i * 4,
188
+ ((uint32_t *)vs2) + i * 4);
189
+ } else {
190
+ /* If not 32 then SEW should be 64 */
191
+ vsha2ms_e64(((uint64_t *)vd) + i * 4, ((uint64_t *)vs1) + i * 4,
192
+ ((uint64_t *)vs2) + i * 4);
193
+ }
194
+ }
195
+ /* set tail elements to 1s */
196
+ total_elems = vext_get_total_elems(env, desc, esz);
197
+ vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz);
198
+ env->vstart = 0;
199
+}
200
+
201
+static inline uint64_t sum0_64(uint64_t x)
202
+{
203
+ return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
204
+}
205
+
206
+static inline uint32_t sum0_32(uint32_t x)
207
+{
208
+ return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
209
+}
210
+
211
+static inline uint64_t sum1_64(uint64_t x)
212
+{
213
+ return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
214
+}
215
+
216
+static inline uint32_t sum1_32(uint32_t x)
217
+{
218
+ return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
219
+}
220
+
221
+#define ch(x, y, z) ((x & y) ^ ((~x) & z))
222
+
223
+#define maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
224
+
225
+static void vsha2c_64(uint64_t *vs2, uint64_t *vd, uint64_t *vs1)
226
+{
227
+ uint64_t a = vs2[3], b = vs2[2], e = vs2[1], f = vs2[0];
228
+ uint64_t c = vd[3], d = vd[2], g = vd[1], h = vd[0];
229
+ uint64_t W0 = vs1[0], W1 = vs1[1];
230
+ uint64_t T1 = h + sum1_64(e) + ch(e, f, g) + W0;
231
+ uint64_t T2 = sum0_64(a) + maj(a, b, c);
232
+
233
+ h = g;
234
+ g = f;
235
+ f = e;
236
+ e = d + T1;
237
+ d = c;
238
+ c = b;
239
+ b = a;
240
+ a = T1 + T2;
241
+
242
+ T1 = h + sum1_64(e) + ch(e, f, g) + W1;
243
+ T2 = sum0_64(a) + maj(a, b, c);
244
+ h = g;
245
+ g = f;
246
+ f = e;
247
+ e = d + T1;
248
+ d = c;
249
+ c = b;
250
+ b = a;
251
+ a = T1 + T2;
252
+
253
+ vd[0] = f;
254
+ vd[1] = e;
255
+ vd[2] = b;
256
+ vd[3] = a;
257
+}
258
+
259
+static void vsha2c_32(uint32_t *vs2, uint32_t *vd, uint32_t *vs1)
260
+{
261
+ uint32_t a = vs2[H4(3)], b = vs2[H4(2)], e = vs2[H4(1)], f = vs2[H4(0)];
262
+ uint32_t c = vd[H4(3)], d = vd[H4(2)], g = vd[H4(1)], h = vd[H4(0)];
263
+ uint32_t W0 = vs1[H4(0)], W1 = vs1[H4(1)];
264
+ uint32_t T1 = h + sum1_32(e) + ch(e, f, g) + W0;
265
+ uint32_t T2 = sum0_32(a) + maj(a, b, c);
266
+
267
+ h = g;
268
+ g = f;
269
+ f = e;
270
+ e = d + T1;
271
+ d = c;
272
+ c = b;
273
+ b = a;
274
+ a = T1 + T2;
275
+
276
+ T1 = h + sum1_32(e) + ch(e, f, g) + W1;
277
+ T2 = sum0_32(a) + maj(a, b, c);
278
+ h = g;
279
+ g = f;
280
+ f = e;
281
+ e = d + T1;
282
+ d = c;
283
+ c = b;
284
+ b = a;
285
+ a = T1 + T2;
286
+
287
+ vd[H4(0)] = f;
288
+ vd[H4(1)] = e;
289
+ vd[H4(2)] = b;
290
+ vd[H4(3)] = a;
291
+}
292
+
293
+void HELPER(vsha2ch32_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
294
+ uint32_t desc)
295
+{
296
+ const uint32_t esz = 4;
297
+ uint32_t total_elems;
298
+ uint32_t vta = vext_vta(desc);
299
+
300
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
301
+ vsha2c_32(((uint32_t *)vs2) + 4 * i, ((uint32_t *)vd) + 4 * i,
302
+ ((uint32_t *)vs1) + 4 * i + 2);
303
+ }
304
+
305
+ /* set tail elements to 1s */
306
+ total_elems = vext_get_total_elems(env, desc, esz);
307
+ vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz);
308
+ env->vstart = 0;
309
+}
310
+
311
+void HELPER(vsha2ch64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
312
+ uint32_t desc)
313
+{
314
+ const uint32_t esz = 8;
315
+ uint32_t total_elems;
316
+ uint32_t vta = vext_vta(desc);
317
+
318
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
319
+ vsha2c_64(((uint64_t *)vs2) + 4 * i, ((uint64_t *)vd) + 4 * i,
320
+ ((uint64_t *)vs1) + 4 * i + 2);
321
+ }
322
+
323
+ /* set tail elements to 1s */
324
+ total_elems = vext_get_total_elems(env, desc, esz);
325
+ vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz);
326
+ env->vstart = 0;
327
+}
328
+
329
+void HELPER(vsha2cl32_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
330
+ uint32_t desc)
331
+{
332
+ const uint32_t esz = 4;
333
+ uint32_t total_elems;
334
+ uint32_t vta = vext_vta(desc);
335
+
336
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
337
+ vsha2c_32(((uint32_t *)vs2) + 4 * i, ((uint32_t *)vd) + 4 * i,
338
+ (((uint32_t *)vs1) + 4 * i));
339
+ }
340
+
341
+ /* set tail elements to 1s */
342
+ total_elems = vext_get_total_elems(env, desc, esz);
343
+ vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz);
344
+ env->vstart = 0;
345
+}
346
+
347
+void HELPER(vsha2cl64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
348
+ uint32_t desc)
349
+{
350
+ uint32_t esz = 8;
351
+ uint32_t total_elems;
352
+ uint32_t vta = vext_vta(desc);
353
+
354
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
355
+ vsha2c_64(((uint64_t *)vs2) + 4 * i, ((uint64_t *)vd) + 4 * i,
356
+ (((uint64_t *)vs1) + 4 * i));
357
+ }
358
+
359
+ /* set tail elements to 1s */
360
+ total_elems = vext_get_total_elems(env, desc, esz);
361
+ vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz);
362
+ env->vstart = 0;
363
+}
364
diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc
365
index XXXXXXX..XXXXXXX 100644
366
--- a/target/riscv/insn_trans/trans_rvvk.c.inc
367
+++ b/target/riscv/insn_trans/trans_rvvk.c.inc
368
@@ -XXX,XX +XXX,XX @@ static bool vaeskf2_check(DisasContext *s, arg_vaeskf2_vi *a)
369
370
GEN_VI_UNMASKED_TRANS(vaeskf1_vi, vaeskf1_check, ZVKNED_EGS)
371
GEN_VI_UNMASKED_TRANS(vaeskf2_vi, vaeskf2_check, ZVKNED_EGS)
372
+
107
+/*
373
+/*
108
+ * The destination vector register group cannot overlap a source vector register
374
+ * Zvknh
109
+ * group of a different element width. (Section 11.2)
110
+ */
375
+ */
111
+static inline bool vext_check_overlap_group(int rd, int dlen, int rs, int slen)
376
+
112
+{
377
+#define ZVKNH_EGS 4
113
+ return ((rd >= rs + slen) || (rs >= rd + dlen));
378
+
114
+}
379
+#define GEN_VV_UNMASKED_TRANS(NAME, CHECK, EGS) \
115
/* common translation macro */
380
+ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
116
#define GEN_VEXT_TRANS(NAME, SEQ, ARGTYPE, OP, CHECK) \
381
+ { \
117
static bool trans_##NAME(DisasContext *s, arg_##ARGTYPE *a)\
382
+ if (CHECK(s, a)) { \
118
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_gvec_rsubi(unsigned vece, uint32_t dofs, uint32_t aofs,
383
+ uint32_t data = 0; \
119
}
384
+ TCGLabel *over = gen_new_label(); \
120
385
+ TCGv_i32 egs; \
121
GEN_OPIVI_GVEC_TRANS(vrsub_vi, 0, vrsub_vx, rsubi)
386
+ \
122
+
387
+ if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { \
123
+/* Vector Widening Integer Add/Subtract */
388
+ /* save opcode for unwinding in case we throw an exception */ \
124
+
389
+ decode_save_opc(s); \
125
+/* OPIVV with WIDEN */
390
+ egs = tcg_constant_i32(EGS); \
126
+static bool opivv_widen_check(DisasContext *s, arg_rmrr *a)
391
+ gen_helper_egs_check(egs, cpu_env); \
127
+{
392
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
128
+ return (vext_check_isa_ill(s) &&
393
+ } \
129
+ vext_check_overlap_mask(s, a->rd, a->vm, true) &&
394
+ \
130
+ vext_check_reg(s, a->rd, true) &&
395
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
131
+ vext_check_reg(s, a->rs2, false) &&
396
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
132
+ vext_check_reg(s, a->rs1, false) &&
397
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
133
+ vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2,
398
+ data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \
134
+ 1 << s->lmul) &&
399
+ data = FIELD_DP32(data, VDATA, VMA, s->vma); \
135
+ vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1,
400
+ \
136
+ 1 << s->lmul) &&
401
+ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), \
137
+ (s->lmul < 0x3) && (s->sew < 0x3));
402
+ vreg_ofs(s, a->rs2), cpu_env, \
138
+}
403
+ s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, \
139
+
404
+ data, gen_helper_##NAME); \
140
+static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
405
+ \
141
+ gen_helper_gvec_4_ptr *fn,
406
+ mark_vs_dirty(s); \
142
+ bool (*checkfn)(DisasContext *, arg_rmrr *))
407
+ gen_set_label(over); \
143
+{
408
+ return true; \
144
+ if (checkfn(s, a)) {
409
+ } \
410
+ return false; \
411
+ }
412
+
413
+static bool vsha_check_sew(DisasContext *s)
414
+{
415
+ return (s->cfg_ptr->ext_zvknha == true && s->sew == MO_32) ||
416
+ (s->cfg_ptr->ext_zvknhb == true &&
417
+ (s->sew == MO_32 || s->sew == MO_64));
418
+}
419
+
420
+static bool vsha_check(DisasContext *s, arg_rmrr *a)
421
+{
422
+ int egw_bytes = ZVKNH_EGS << s->sew;
423
+ int mult = 1 << MAX(s->lmul, 0);
424
+ return opivv_check(s, a) &&
425
+ vsha_check_sew(s) &&
426
+ MAXSZ(s) >= egw_bytes &&
427
+ !is_overlapped(a->rd, mult, a->rs1, mult) &&
428
+ !is_overlapped(a->rd, mult, a->rs2, mult) &&
429
+ s->lmul >= 0;
430
+}
431
+
432
+GEN_VV_UNMASKED_TRANS(vsha2ms_vv, vsha_check, ZVKNH_EGS)
433
+
434
+static bool trans_vsha2cl_vv(DisasContext *s, arg_rmrr *a)
435
+{
436
+ if (vsha_check(s, a)) {
145
+ uint32_t data = 0;
437
+ uint32_t data = 0;
146
+ TCGLabel *over = gen_new_label();
438
+ TCGLabel *over = gen_new_label();
147
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
439
+ TCGv_i32 egs;
148
+
440
+
149
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
441
+ if (!s->vstart_eq_zero || !s->vl_eq_vlmax) {
442
+ /* save opcode for unwinding in case we throw an exception */
443
+ decode_save_opc(s);
444
+ egs = tcg_constant_i32(ZVKNH_EGS);
445
+ gen_helper_egs_check(egs, cpu_env);
446
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
447
+ }
448
+
150
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
449
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
151
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
450
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
152
+ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
451
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
153
+ vreg_ofs(s, a->rs1),
452
+ data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);
154
+ vreg_ofs(s, a->rs2),
453
+ data = FIELD_DP32(data, VDATA, VMA, s->vma);
155
+ cpu_env, 0, s->vlen / 8,
454
+
156
+ data, fn);
455
+ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1),
456
+ vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8,
457
+ s->cfg_ptr->vlen / 8, data,
458
+ s->sew == MO_32 ?
459
+ gen_helper_vsha2cl32_vv : gen_helper_vsha2cl64_vv);
460
+
461
+ mark_vs_dirty(s);
157
+ gen_set_label(over);
462
+ gen_set_label(over);
158
+ return true;
463
+ return true;
159
+ }
464
+ }
160
+ return false;
465
+ return false;
161
+}
466
+}
162
+
467
+
163
+#define GEN_OPIVV_WIDEN_TRANS(NAME, CHECK) \
468
+static bool trans_vsha2ch_vv(DisasContext *s, arg_rmrr *a)
164
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
469
+{
165
+{ \
470
+ if (vsha_check(s, a)) {
166
+ static gen_helper_gvec_4_ptr * const fns[3] = { \
167
+ gen_helper_##NAME##_b, \
168
+ gen_helper_##NAME##_h, \
169
+ gen_helper_##NAME##_w \
170
+ }; \
171
+ return do_opivv_widen(s, a, fns[s->sew], CHECK); \
172
+}
173
+
174
+GEN_OPIVV_WIDEN_TRANS(vwaddu_vv, opivv_widen_check)
175
+GEN_OPIVV_WIDEN_TRANS(vwadd_vv, opivv_widen_check)
176
+GEN_OPIVV_WIDEN_TRANS(vwsubu_vv, opivv_widen_check)
177
+GEN_OPIVV_WIDEN_TRANS(vwsub_vv, opivv_widen_check)
178
+
179
+/* OPIVX with WIDEN */
180
+static bool opivx_widen_check(DisasContext *s, arg_rmrr *a)
181
+{
182
+ return (vext_check_isa_ill(s) &&
183
+ vext_check_overlap_mask(s, a->rd, a->vm, true) &&
184
+ vext_check_reg(s, a->rd, true) &&
185
+ vext_check_reg(s, a->rs2, false) &&
186
+ vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2,
187
+ 1 << s->lmul) &&
188
+ (s->lmul < 0x3) && (s->sew < 0x3));
189
+}
190
+
191
+static bool do_opivx_widen(DisasContext *s, arg_rmrr *a,
192
+ gen_helper_opivx *fn)
193
+{
194
+ if (opivx_widen_check(s, a)) {
195
+ return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
196
+ }
197
+ return true;
198
+}
199
+
200
+#define GEN_OPIVX_WIDEN_TRANS(NAME) \
201
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
202
+{ \
203
+ static gen_helper_opivx * const fns[3] = { \
204
+ gen_helper_##NAME##_b, \
205
+ gen_helper_##NAME##_h, \
206
+ gen_helper_##NAME##_w \
207
+ }; \
208
+ return do_opivx_widen(s, a, fns[s->sew]); \
209
+}
210
+
211
+GEN_OPIVX_WIDEN_TRANS(vwaddu_vx)
212
+GEN_OPIVX_WIDEN_TRANS(vwadd_vx)
213
+GEN_OPIVX_WIDEN_TRANS(vwsubu_vx)
214
+GEN_OPIVX_WIDEN_TRANS(vwsub_vx)
215
+
216
+/* WIDEN OPIVV with WIDEN */
217
+static bool opiwv_widen_check(DisasContext *s, arg_rmrr *a)
218
+{
219
+ return (vext_check_isa_ill(s) &&
220
+ vext_check_overlap_mask(s, a->rd, a->vm, true) &&
221
+ vext_check_reg(s, a->rd, true) &&
222
+ vext_check_reg(s, a->rs2, true) &&
223
+ vext_check_reg(s, a->rs1, false) &&
224
+ vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1,
225
+ 1 << s->lmul) &&
226
+ (s->lmul < 0x3) && (s->sew < 0x3));
227
+}
228
+
229
+static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
230
+ gen_helper_gvec_4_ptr *fn)
231
+{
232
+ if (opiwv_widen_check(s, a)) {
233
+ uint32_t data = 0;
471
+ uint32_t data = 0;
234
+ TCGLabel *over = gen_new_label();
472
+ TCGLabel *over = gen_new_label();
235
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
473
+ TCGv_i32 egs;
236
+
474
+
237
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
475
+ if (!s->vstart_eq_zero || !s->vl_eq_vlmax) {
476
+ /* save opcode for unwinding in case we throw an exception */
477
+ decode_save_opc(s);
478
+ egs = tcg_constant_i32(ZVKNH_EGS);
479
+ gen_helper_egs_check(egs, cpu_env);
480
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
481
+ }
482
+
238
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
483
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
239
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
484
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
240
+ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
485
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
241
+ vreg_ofs(s, a->rs1),
486
+ data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);
242
+ vreg_ofs(s, a->rs2),
487
+ data = FIELD_DP32(data, VDATA, VMA, s->vma);
243
+ cpu_env, 0, s->vlen / 8, data, fn);
488
+
489
+ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1),
490
+ vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8,
491
+ s->cfg_ptr->vlen / 8, data,
492
+ s->sew == MO_32 ?
493
+ gen_helper_vsha2ch32_vv : gen_helper_vsha2ch64_vv);
494
+
495
+ mark_vs_dirty(s);
244
+ gen_set_label(over);
496
+ gen_set_label(over);
245
+ return true;
497
+ return true;
246
+ }
498
+ }
247
+ return false;
499
+ return false;
248
+}
500
+}
249
+
250
+#define GEN_OPIWV_WIDEN_TRANS(NAME) \
251
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
252
+{ \
253
+ static gen_helper_gvec_4_ptr * const fns[3] = { \
254
+ gen_helper_##NAME##_b, \
255
+ gen_helper_##NAME##_h, \
256
+ gen_helper_##NAME##_w \
257
+ }; \
258
+ return do_opiwv_widen(s, a, fns[s->sew]); \
259
+}
260
+
261
+GEN_OPIWV_WIDEN_TRANS(vwaddu_wv)
262
+GEN_OPIWV_WIDEN_TRANS(vwadd_wv)
263
+GEN_OPIWV_WIDEN_TRANS(vwsubu_wv)
264
+GEN_OPIWV_WIDEN_TRANS(vwsub_wv)
265
+
266
+/* WIDEN OPIVX with WIDEN */
267
+static bool opiwx_widen_check(DisasContext *s, arg_rmrr *a)
268
+{
269
+ return (vext_check_isa_ill(s) &&
270
+ vext_check_overlap_mask(s, a->rd, a->vm, true) &&
271
+ vext_check_reg(s, a->rd, true) &&
272
+ vext_check_reg(s, a->rs2, true) &&
273
+ (s->lmul < 0x3) && (s->sew < 0x3));
274
+}
275
+
276
+static bool do_opiwx_widen(DisasContext *s, arg_rmrr *a,
277
+ gen_helper_opivx *fn)
278
+{
279
+ if (opiwx_widen_check(s, a)) {
280
+ return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
281
+ }
282
+ return false;
283
+}
284
+
285
+#define GEN_OPIWX_WIDEN_TRANS(NAME) \
286
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
287
+{ \
288
+ static gen_helper_opivx * const fns[3] = { \
289
+ gen_helper_##NAME##_b, \
290
+ gen_helper_##NAME##_h, \
291
+ gen_helper_##NAME##_w \
292
+ }; \
293
+ return do_opiwx_widen(s, a, fns[s->sew]); \
294
+}
295
+
296
+GEN_OPIWX_WIDEN_TRANS(vwaddu_wx)
297
+GEN_OPIWX_WIDEN_TRANS(vwadd_wx)
298
+GEN_OPIWX_WIDEN_TRANS(vwsubu_wx)
299
+GEN_OPIWX_WIDEN_TRANS(vwsub_wx)
300
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
301
index XXXXXXX..XXXXXXX 100644
302
--- a/target/riscv/vector_helper.c
303
+++ b/target/riscv/vector_helper.c
304
@@ -XXX,XX +XXX,XX @@ void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
305
*(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
306
}
307
}
308
+
309
+/* Vector Widening Integer Add/Subtract */
310
+#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
311
+#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
312
+#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
313
+#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
314
+#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
315
+#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
316
+#define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
317
+#define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
318
+#define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
319
+#define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
320
+#define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
321
+#define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
322
+RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
323
+RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
324
+RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
325
+RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
326
+RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
327
+RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
328
+RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
329
+RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
330
+RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
331
+RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
332
+RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
333
+RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
334
+RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
335
+RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
336
+RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
337
+RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
338
+RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
339
+RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
340
+RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
341
+RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
342
+RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
343
+RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
344
+RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
345
+RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
346
+GEN_VEXT_VV(vwaddu_vv_b, 1, 2, clearh)
347
+GEN_VEXT_VV(vwaddu_vv_h, 2, 4, clearl)
348
+GEN_VEXT_VV(vwaddu_vv_w, 4, 8, clearq)
349
+GEN_VEXT_VV(vwsubu_vv_b, 1, 2, clearh)
350
+GEN_VEXT_VV(vwsubu_vv_h, 2, 4, clearl)
351
+GEN_VEXT_VV(vwsubu_vv_w, 4, 8, clearq)
352
+GEN_VEXT_VV(vwadd_vv_b, 1, 2, clearh)
353
+GEN_VEXT_VV(vwadd_vv_h, 2, 4, clearl)
354
+GEN_VEXT_VV(vwadd_vv_w, 4, 8, clearq)
355
+GEN_VEXT_VV(vwsub_vv_b, 1, 2, clearh)
356
+GEN_VEXT_VV(vwsub_vv_h, 2, 4, clearl)
357
+GEN_VEXT_VV(vwsub_vv_w, 4, 8, clearq)
358
+GEN_VEXT_VV(vwaddu_wv_b, 1, 2, clearh)
359
+GEN_VEXT_VV(vwaddu_wv_h, 2, 4, clearl)
360
+GEN_VEXT_VV(vwaddu_wv_w, 4, 8, clearq)
361
+GEN_VEXT_VV(vwsubu_wv_b, 1, 2, clearh)
362
+GEN_VEXT_VV(vwsubu_wv_h, 2, 4, clearl)
363
+GEN_VEXT_VV(vwsubu_wv_w, 4, 8, clearq)
364
+GEN_VEXT_VV(vwadd_wv_b, 1, 2, clearh)
365
+GEN_VEXT_VV(vwadd_wv_h, 2, 4, clearl)
366
+GEN_VEXT_VV(vwadd_wv_w, 4, 8, clearq)
367
+GEN_VEXT_VV(vwsub_wv_b, 1, 2, clearh)
368
+GEN_VEXT_VV(vwsub_wv_h, 2, 4, clearl)
369
+GEN_VEXT_VV(vwsub_wv_w, 4, 8, clearq)
370
+
371
+RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
372
+RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
373
+RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
374
+RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
375
+RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
376
+RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
377
+RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
378
+RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
379
+RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
380
+RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
381
+RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
382
+RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
383
+RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
384
+RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
385
+RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
386
+RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
387
+RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
388
+RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
389
+RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
390
+RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
391
+RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
392
+RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
393
+RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
394
+RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
395
+GEN_VEXT_VX(vwaddu_vx_b, 1, 2, clearh)
396
+GEN_VEXT_VX(vwaddu_vx_h, 2, 4, clearl)
397
+GEN_VEXT_VX(vwaddu_vx_w, 4, 8, clearq)
398
+GEN_VEXT_VX(vwsubu_vx_b, 1, 2, clearh)
399
+GEN_VEXT_VX(vwsubu_vx_h, 2, 4, clearl)
400
+GEN_VEXT_VX(vwsubu_vx_w, 4, 8, clearq)
401
+GEN_VEXT_VX(vwadd_vx_b, 1, 2, clearh)
402
+GEN_VEXT_VX(vwadd_vx_h, 2, 4, clearl)
403
+GEN_VEXT_VX(vwadd_vx_w, 4, 8, clearq)
404
+GEN_VEXT_VX(vwsub_vx_b, 1, 2, clearh)
405
+GEN_VEXT_VX(vwsub_vx_h, 2, 4, clearl)
406
+GEN_VEXT_VX(vwsub_vx_w, 4, 8, clearq)
407
+GEN_VEXT_VX(vwaddu_wx_b, 1, 2, clearh)
408
+GEN_VEXT_VX(vwaddu_wx_h, 2, 4, clearl)
409
+GEN_VEXT_VX(vwaddu_wx_w, 4, 8, clearq)
410
+GEN_VEXT_VX(vwsubu_wx_b, 1, 2, clearh)
411
+GEN_VEXT_VX(vwsubu_wx_h, 2, 4, clearl)
412
+GEN_VEXT_VX(vwsubu_wx_w, 4, 8, clearq)
413
+GEN_VEXT_VX(vwadd_wx_b, 1, 2, clearh)
414
+GEN_VEXT_VX(vwadd_wx_h, 2, 4, clearl)
415
+GEN_VEXT_VX(vwadd_wx_w, 4, 8, clearq)
416
+GEN_VEXT_VX(vwsub_wx_b, 1, 2, clearh)
417
+GEN_VEXT_VX(vwsub_wx_h, 2, 4, clearl)
418
+GEN_VEXT_VX(vwsub_wx_w, 4, 8, clearq)
419
--
501
--
420
2.27.0
502
2.41.0
421
422
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
This commit adds support for the Zvksh vector-crypto extension, which
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
consists of the following instructions:
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
6
Message-id: 20200623215920.2594-43-zhiwei_liu@c-sky.com
6
* vsm3me.vv
7
* vsm3c.vi
8
9
Translation functions are defined in
10
`target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in
11
`target/riscv/vcrypto_helper.c`.
12
13
Co-authored-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
14
[max.chou@sifive.com: Replaced vstart checking by TCG op]
15
Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
16
Signed-off-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
17
Signed-off-by: Max Chou <max.chou@sifive.com>
18
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
19
[max.chou@sifive.com: Exposed x-zvksh property]
20
Message-ID: <20230711165917.2629866-12-max.chou@sifive.com>
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
21
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
22
---
9
target/riscv/helper.h | 13 ++++++++++
23
target/riscv/cpu_cfg.h | 1 +
10
target/riscv/insn32.decode | 4 +++
24
target/riscv/helper.h | 3 +
11
target/riscv/insn_trans/trans_rvv.inc.c | 6 +++++
25
target/riscv/insn32.decode | 4 +
12
target/riscv/vector_helper.c | 33 +++++++++++++++++++++++++
26
target/riscv/cpu.c | 6 +-
13
4 files changed, 56 insertions(+)
27
target/riscv/vcrypto_helper.c | 134 +++++++++++++++++++++++
14
28
target/riscv/insn_trans/trans_rvvk.c.inc | 31 ++++++
29
6 files changed, 177 insertions(+), 2 deletions(-)
30
31
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
32
index XXXXXXX..XXXXXXX 100644
33
--- a/target/riscv/cpu_cfg.h
34
+++ b/target/riscv/cpu_cfg.h
35
@@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig {
36
bool ext_zvkned;
37
bool ext_zvknha;
38
bool ext_zvknhb;
39
+ bool ext_zvksh;
40
bool ext_zmmul;
41
bool ext_zvfbfmin;
42
bool ext_zvfbfwma;
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
43
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
index XXXXXXX..XXXXXXX 100644
44
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
45
--- a/target/riscv/helper.h
18
+++ b/target/riscv/helper.h
46
+++ b/target/riscv/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vfclass_v_d, void, ptr, ptr, ptr, env, i32)
47
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vsha2ch32_vv, void, ptr, ptr, ptr, env, i32)
20
DEF_HELPER_6(vfmerge_vfm_h, void, ptr, ptr, i64, ptr, env, i32)
48
DEF_HELPER_5(vsha2ch64_vv, void, ptr, ptr, ptr, env, i32)
21
DEF_HELPER_6(vfmerge_vfm_w, void, ptr, ptr, i64, ptr, env, i32)
49
DEF_HELPER_5(vsha2cl32_vv, void, ptr, ptr, ptr, env, i32)
22
DEF_HELPER_6(vfmerge_vfm_d, void, ptr, ptr, i64, ptr, env, i32)
50
DEF_HELPER_5(vsha2cl64_vv, void, ptr, ptr, ptr, env, i32)
23
+
51
+
24
+DEF_HELPER_5(vfcvt_xu_f_v_h, void, ptr, ptr, ptr, env, i32)
52
+DEF_HELPER_5(vsm3me_vv, void, ptr, ptr, ptr, env, i32)
25
+DEF_HELPER_5(vfcvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32)
53
+DEF_HELPER_5(vsm3c_vi, void, ptr, ptr, i32, env, i32)
26
+DEF_HELPER_5(vfcvt_xu_f_v_d, void, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_5(vfcvt_x_f_v_h, void, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_5(vfcvt_x_f_v_w, void, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_5(vfcvt_x_f_v_d, void, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_5(vfcvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_5(vfcvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_5(vfcvt_f_xu_v_d, void, ptr, ptr, ptr, env, i32)
33
+DEF_HELPER_5(vfcvt_f_x_v_h, void, ptr, ptr, ptr, env, i32)
34
+DEF_HELPER_5(vfcvt_f_x_v_w, void, ptr, ptr, ptr, env, i32)
35
+DEF_HELPER_5(vfcvt_f_x_v_d, void, ptr, ptr, ptr, env, i32)
36
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
54
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
37
index XXXXXXX..XXXXXXX 100644
55
index XXXXXXX..XXXXXXX 100644
38
--- a/target/riscv/insn32.decode
56
--- a/target/riscv/insn32.decode
39
+++ b/target/riscv/insn32.decode
57
+++ b/target/riscv/insn32.decode
40
@@ -XXX,XX +XXX,XX @@ vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm
58
@@ -XXX,XX +XXX,XX @@ vaeskf2_vi 101010 1 ..... ..... 010 ..... 1110111 @r_vm_1
41
vfclass_v 100011 . ..... 10000 001 ..... 1010111 @r2_vm
59
vsha2ms_vv 101101 1 ..... ..... 010 ..... 1110111 @r_vm_1
42
vfmerge_vfm 010111 0 ..... ..... 101 ..... 1010111 @r_vm_0
60
vsha2ch_vv 101110 1 ..... ..... 010 ..... 1110111 @r_vm_1
43
vfmv_v_f 010111 1 00000 ..... 101 ..... 1010111 @r2
61
vsha2cl_vv 101111 1 ..... ..... 010 ..... 1110111 @r_vm_1
44
+vfcvt_xu_f_v 100010 . ..... 00000 001 ..... 1010111 @r2_vm
62
+
45
+vfcvt_x_f_v 100010 . ..... 00001 001 ..... 1010111 @r2_vm
63
+# *** Zvksh vector crypto extension ***
46
+vfcvt_f_xu_v 100010 . ..... 00010 001 ..... 1010111 @r2_vm
64
+vsm3me_vv 100000 1 ..... ..... 010 ..... 1110111 @r_vm_1
47
+vfcvt_f_x_v 100010 . ..... 00011 001 ..... 1010111 @r2_vm
65
+vsm3c_vi 101011 1 ..... ..... 010 ..... 1110111 @r_vm_1
48
66
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
49
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
67
index XXXXXXX..XXXXXXX 100644
50
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
68
--- a/target/riscv/cpu.c
51
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
69
+++ b/target/riscv/cpu.c
52
index XXXXXXX..XXXXXXX 100644
70
@@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = {
53
--- a/target/riscv/insn_trans/trans_rvv.inc.c
71
ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned),
54
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
72
ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha),
55
@@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a)
73
ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb),
74
+ ISA_EXT_DATA_ENTRY(zvksh, PRIV_VERSION_1_12_0, ext_zvksh),
75
ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx),
76
ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin),
77
ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia),
78
@@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
79
* In principle Zve*x would also suffice here, were they supported
80
* in qemu
81
*/
82
- if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha) &&
83
- !cpu->cfg.ext_zve32f) {
84
+ if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha ||
85
+ cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) {
86
error_setg(errp,
87
"Vector crypto extensions require V or Zve* extensions");
88
return;
89
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = {
90
DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false),
91
DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false),
92
DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false),
93
+ DEFINE_PROP_BOOL("x-zvksh", RISCVCPU, cfg.ext_zvksh, false),
94
95
DEFINE_PROP_END_OF_LIST(),
96
};
97
diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c
98
index XXXXXXX..XXXXXXX 100644
99
--- a/target/riscv/vcrypto_helper.c
100
+++ b/target/riscv/vcrypto_helper.c
101
@@ -XXX,XX +XXX,XX @@ void HELPER(vsha2cl64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
102
vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz);
103
env->vstart = 0;
104
}
105
+
106
+static inline uint32_t p1(uint32_t x)
107
+{
108
+ return x ^ rol32(x, 15) ^ rol32(x, 23);
109
+}
110
+
111
+static inline uint32_t zvksh_w(uint32_t m16, uint32_t m9, uint32_t m3,
112
+ uint32_t m13, uint32_t m6)
113
+{
114
+ return p1(m16 ^ m9 ^ rol32(m3, 15)) ^ rol32(m13, 7) ^ m6;
115
+}
116
+
117
+void HELPER(vsm3me_vv)(void *vd_vptr, void *vs1_vptr, void *vs2_vptr,
118
+ CPURISCVState *env, uint32_t desc)
119
+{
120
+ uint32_t esz = memop_size(FIELD_EX64(env->vtype, VTYPE, VSEW));
121
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
122
+ uint32_t vta = vext_vta(desc);
123
+ uint32_t *vd = vd_vptr;
124
+ uint32_t *vs1 = vs1_vptr;
125
+ uint32_t *vs2 = vs2_vptr;
126
+
127
+ for (int i = env->vstart / 8; i < env->vl / 8; i++) {
128
+ uint32_t w[24];
129
+ for (int j = 0; j < 8; j++) {
130
+ w[j] = bswap32(vs1[H4((i * 8) + j)]);
131
+ w[j + 8] = bswap32(vs2[H4((i * 8) + j)]);
132
+ }
133
+ for (int j = 0; j < 8; j++) {
134
+ w[j + 16] =
135
+ zvksh_w(w[j], w[j + 7], w[j + 13], w[j + 3], w[j + 10]);
136
+ }
137
+ for (int j = 0; j < 8; j++) {
138
+ vd[(i * 8) + j] = bswap32(w[H4(j + 16)]);
139
+ }
140
+ }
141
+ vext_set_elems_1s(vd_vptr, vta, env->vl * esz, total_elems * esz);
142
+ env->vstart = 0;
143
+}
144
+
145
+static inline uint32_t ff1(uint32_t x, uint32_t y, uint32_t z)
146
+{
147
+ return x ^ y ^ z;
148
+}
149
+
150
+static inline uint32_t ff2(uint32_t x, uint32_t y, uint32_t z)
151
+{
152
+ return (x & y) | (x & z) | (y & z);
153
+}
154
+
155
+static inline uint32_t ff_j(uint32_t x, uint32_t y, uint32_t z, uint32_t j)
156
+{
157
+ return (j <= 15) ? ff1(x, y, z) : ff2(x, y, z);
158
+}
159
+
160
+static inline uint32_t gg1(uint32_t x, uint32_t y, uint32_t z)
161
+{
162
+ return x ^ y ^ z;
163
+}
164
+
165
+static inline uint32_t gg2(uint32_t x, uint32_t y, uint32_t z)
166
+{
167
+ return (x & y) | (~x & z);
168
+}
169
+
170
+static inline uint32_t gg_j(uint32_t x, uint32_t y, uint32_t z, uint32_t j)
171
+{
172
+ return (j <= 15) ? gg1(x, y, z) : gg2(x, y, z);
173
+}
174
+
175
+static inline uint32_t t_j(uint32_t j)
176
+{
177
+ return (j <= 15) ? 0x79cc4519 : 0x7a879d8a;
178
+}
179
+
180
+static inline uint32_t p_0(uint32_t x)
181
+{
182
+ return x ^ rol32(x, 9) ^ rol32(x, 17);
183
+}
184
+
185
+static void sm3c(uint32_t *vd, uint32_t *vs1, uint32_t *vs2, uint32_t uimm)
186
+{
187
+ uint32_t x0, x1;
188
+ uint32_t j;
189
+ uint32_t ss1, ss2, tt1, tt2;
190
+ x0 = vs2[0] ^ vs2[4];
191
+ x1 = vs2[1] ^ vs2[5];
192
+ j = 2 * uimm;
193
+ ss1 = rol32(rol32(vs1[0], 12) + vs1[4] + rol32(t_j(j), j % 32), 7);
194
+ ss2 = ss1 ^ rol32(vs1[0], 12);
195
+ tt1 = ff_j(vs1[0], vs1[1], vs1[2], j) + vs1[3] + ss2 + x0;
196
+ tt2 = gg_j(vs1[4], vs1[5], vs1[6], j) + vs1[7] + ss1 + vs2[0];
197
+ vs1[3] = vs1[2];
198
+ vd[3] = rol32(vs1[1], 9);
199
+ vs1[1] = vs1[0];
200
+ vd[1] = tt1;
201
+ vs1[7] = vs1[6];
202
+ vd[7] = rol32(vs1[5], 19);
203
+ vs1[5] = vs1[4];
204
+ vd[5] = p_0(tt2);
205
+ j = 2 * uimm + 1;
206
+ ss1 = rol32(rol32(vd[1], 12) + vd[5] + rol32(t_j(j), j % 32), 7);
207
+ ss2 = ss1 ^ rol32(vd[1], 12);
208
+ tt1 = ff_j(vd[1], vs1[1], vd[3], j) + vs1[3] + ss2 + x1;
209
+ tt2 = gg_j(vd[5], vs1[5], vd[7], j) + vs1[7] + ss1 + vs2[1];
210
+ vd[2] = rol32(vs1[1], 9);
211
+ vd[0] = tt1;
212
+ vd[6] = rol32(vs1[5], 19);
213
+ vd[4] = p_0(tt2);
214
+}
215
+
216
+void HELPER(vsm3c_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm,
217
+ CPURISCVState *env, uint32_t desc)
218
+{
219
+ uint32_t esz = memop_size(FIELD_EX64(env->vtype, VTYPE, VSEW));
220
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
221
+ uint32_t vta = vext_vta(desc);
222
+ uint32_t *vd = vd_vptr;
223
+ uint32_t *vs2 = vs2_vptr;
224
+ uint32_t v1[8], v2[8], v3[8];
225
+
226
+ for (int i = env->vstart / 8; i < env->vl / 8; i++) {
227
+ for (int k = 0; k < 8; k++) {
228
+ v2[k] = bswap32(vd[H4(i * 8 + k)]);
229
+ v3[k] = bswap32(vs2[H4(i * 8 + k)]);
230
+ }
231
+ sm3c(v1, v2, v3, uimm);
232
+ for (int k = 0; k < 8; k++) {
233
+ vd[i * 8 + k] = bswap32(v1[H4(k)]);
234
+ }
235
+ }
236
+ vext_set_elems_1s(vd_vptr, vta, env->vl * esz, total_elems * esz);
237
+ env->vstart = 0;
238
+}
239
diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc
240
index XXXXXXX..XXXXXXX 100644
241
--- a/target/riscv/insn_trans/trans_rvvk.c.inc
242
+++ b/target/riscv/insn_trans/trans_rvvk.c.inc
243
@@ -XXX,XX +XXX,XX @@ static bool trans_vsha2ch_vv(DisasContext *s, arg_rmrr *a)
56
}
244
}
57
return false;
245
return false;
58
}
246
}
59
+
247
+
60
+/* Single-Width Floating-Point/Integer Type-Convert Instructions */
248
+/*
61
+GEN_OPFV_TRANS(vfcvt_xu_f_v, opfv_check)
249
+ * Zvksh
62
+GEN_OPFV_TRANS(vfcvt_x_f_v, opfv_check)
250
+ */
63
+GEN_OPFV_TRANS(vfcvt_f_xu_v, opfv_check)
251
+
64
+GEN_OPFV_TRANS(vfcvt_f_x_v, opfv_check)
252
+#define ZVKSH_EGS 8
65
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
253
+
66
index XXXXXXX..XXXXXXX 100644
254
+static inline bool vsm3_check(DisasContext *s, arg_rmrr *a)
67
--- a/target/riscv/vector_helper.c
255
+{
68
+++ b/target/riscv/vector_helper.c
256
+ int egw_bytes = ZVKSH_EGS << s->sew;
69
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
257
+ int mult = 1 << MAX(s->lmul, 0);
70
GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2, clearh)
258
+ return s->cfg_ptr->ext_zvksh == true &&
71
GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4, clearl)
259
+ require_rvv(s) &&
72
GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8, clearq)
260
+ vext_check_isa_ill(s) &&
73
+
261
+ !is_overlapped(a->rd, mult, a->rs2, mult) &&
74
+/* Single-Width Floating-Point/Integer Type-Convert Instructions */
262
+ MAXSZ(s) >= egw_bytes &&
75
+/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
263
+ s->sew == MO_32;
76
+RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
264
+}
77
+RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
265
+
78
+RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
266
+static inline bool vsm3me_check(DisasContext *s, arg_rmrr *a)
79
+GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2, clearh)
267
+{
80
+GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4, clearl)
268
+ return vsm3_check(s, a) && vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm);
81
+GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8, clearq)
269
+}
82
+
270
+
83
+/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
271
+static inline bool vsm3c_check(DisasContext *s, arg_rmrr *a)
84
+RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
272
+{
85
+RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
273
+ return vsm3_check(s, a) && vext_check_ss(s, a->rd, a->rs2, a->vm);
86
+RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
274
+}
87
+GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2, clearh)
275
+
88
+GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4, clearl)
276
+GEN_VV_UNMASKED_TRANS(vsm3me_vv, vsm3me_check, ZVKSH_EGS)
89
+GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8, clearq)
277
+GEN_VI_UNMASKED_TRANS(vsm3c_vi, vsm3c_check, ZVKSH_EGS)
90
+
91
+/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
92
+RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
93
+RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
94
+RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
95
+GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2, clearh)
96
+GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4, clearl)
97
+GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8, clearq)
98
+
99
+/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
100
+RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
101
+RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
102
+RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
103
+GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2, clearh)
104
+GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4, clearl)
105
+GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8, clearq)
106
--
278
--
107
2.27.0
279
2.41.0
108
109
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
This commit adds support for the Zvkg vector-crypto extension, which
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
consists of the following instructions:
5
Message-id: 20200623215920.2594-59-zhiwei_liu@c-sky.com
5
6
* vgmul.vv
7
* vghsh.vv
8
9
Translation functions are defined in
10
`target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in
11
`target/riscv/vcrypto_helper.c`.
12
13
Co-authored-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
14
[max.chou@sifive.com: Replaced vstart checking by TCG op]
15
Signed-off-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
16
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
17
Signed-off-by: Max Chou <max.chou@sifive.com>
18
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
19
[max.chou@sifive.com: Exposed x-zvkg property]
20
[max.chou@sifive.com: Replaced uint by int for cross win32 build]
21
Message-ID: <20230711165917.2629866-13-max.chou@sifive.com>
6
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
22
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
7
---
23
---
8
target/riscv/helper.h | 17 ++++
24
target/riscv/cpu_cfg.h | 1 +
9
target/riscv/insn32.decode | 6 ++
25
target/riscv/helper.h | 3 +
10
target/riscv/insn_trans/trans_rvv.inc.c | 18 ++++
26
target/riscv/insn32.decode | 4 ++
11
target/riscv/vector_helper.c | 114 ++++++++++++++++++++++++
27
target/riscv/cpu.c | 6 +-
12
4 files changed, 155 insertions(+)
28
target/riscv/vcrypto_helper.c | 72 ++++++++++++++++++++++++
13
29
target/riscv/insn_trans/trans_rvvk.c.inc | 30 ++++++++++
30
6 files changed, 114 insertions(+), 2 deletions(-)
31
32
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/riscv/cpu_cfg.h
35
+++ b/target/riscv/cpu_cfg.h
36
@@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig {
37
bool ext_zve64d;
38
bool ext_zvbb;
39
bool ext_zvbc;
40
+ bool ext_zvkg;
41
bool ext_zvkned;
42
bool ext_zvknha;
43
bool ext_zvknhb;
14
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
44
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
15
index XXXXXXX..XXXXXXX 100644
45
index XXXXXXX..XXXXXXX 100644
16
--- a/target/riscv/helper.h
46
--- a/target/riscv/helper.h
17
+++ b/target/riscv/helper.h
47
+++ b/target/riscv/helper.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vid_v_b, void, ptr, ptr, env, i32)
48
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vsha2cl64_vv, void, ptr, ptr, ptr, env, i32)
19
DEF_HELPER_4(vid_v_h, void, ptr, ptr, env, i32)
49
20
DEF_HELPER_4(vid_v_w, void, ptr, ptr, env, i32)
50
DEF_HELPER_5(vsm3me_vv, void, ptr, ptr, ptr, env, i32)
21
DEF_HELPER_4(vid_v_d, void, ptr, ptr, env, i32)
51
DEF_HELPER_5(vsm3c_vi, void, ptr, ptr, i32, env, i32)
22
+
52
+
23
+DEF_HELPER_6(vslideup_vx_b, void, ptr, ptr, tl, ptr, env, i32)
53
+DEF_HELPER_5(vghsh_vv, void, ptr, ptr, ptr, env, i32)
24
+DEF_HELPER_6(vslideup_vx_h, void, ptr, ptr, tl, ptr, env, i32)
54
+DEF_HELPER_4(vgmul_vv, void, ptr, ptr, env, i32)
25
+DEF_HELPER_6(vslideup_vx_w, void, ptr, ptr, tl, ptr, env, i32)
26
+DEF_HELPER_6(vslideup_vx_d, void, ptr, ptr, tl, ptr, env, i32)
27
+DEF_HELPER_6(vslidedown_vx_b, void, ptr, ptr, tl, ptr, env, i32)
28
+DEF_HELPER_6(vslidedown_vx_h, void, ptr, ptr, tl, ptr, env, i32)
29
+DEF_HELPER_6(vslidedown_vx_w, void, ptr, ptr, tl, ptr, env, i32)
30
+DEF_HELPER_6(vslidedown_vx_d, void, ptr, ptr, tl, ptr, env, i32)
31
+DEF_HELPER_6(vslide1up_vx_b, void, ptr, ptr, tl, ptr, env, i32)
32
+DEF_HELPER_6(vslide1up_vx_h, void, ptr, ptr, tl, ptr, env, i32)
33
+DEF_HELPER_6(vslide1up_vx_w, void, ptr, ptr, tl, ptr, env, i32)
34
+DEF_HELPER_6(vslide1up_vx_d, void, ptr, ptr, tl, ptr, env, i32)
35
+DEF_HELPER_6(vslide1down_vx_b, void, ptr, ptr, tl, ptr, env, i32)
36
+DEF_HELPER_6(vslide1down_vx_h, void, ptr, ptr, tl, ptr, env, i32)
37
+DEF_HELPER_6(vslide1down_vx_w, void, ptr, ptr, tl, ptr, env, i32)
38
+DEF_HELPER_6(vslide1down_vx_d, void, ptr, ptr, tl, ptr, env, i32)
39
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
55
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
40
index XXXXXXX..XXXXXXX 100644
56
index XXXXXXX..XXXXXXX 100644
41
--- a/target/riscv/insn32.decode
57
--- a/target/riscv/insn32.decode
42
+++ b/target/riscv/insn32.decode
58
+++ b/target/riscv/insn32.decode
43
@@ -XXX,XX +XXX,XX @@ vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r
59
@@ -XXX,XX +XXX,XX @@ vsha2cl_vv 101111 1 ..... ..... 010 ..... 1110111 @r_vm_1
44
vmv_s_x 001101 1 00000 ..... 110 ..... 1010111 @r2
60
# *** Zvksh vector crypto extension ***
45
vfmv_f_s 001100 1 ..... 00000 001 ..... 1010111 @r2rd
61
vsm3me_vv 100000 1 ..... ..... 010 ..... 1110111 @r_vm_1
46
vfmv_s_f 001101 1 00000 ..... 101 ..... 1010111 @r2
62
vsm3c_vi 101011 1 ..... ..... 010 ..... 1110111 @r_vm_1
47
+vslideup_vx 001110 . ..... ..... 100 ..... 1010111 @r_vm
63
+
48
+vslideup_vi 001110 . ..... ..... 011 ..... 1010111 @r_vm
64
+# *** Zvkg vector crypto extension ***
49
+vslide1up_vx 001110 . ..... ..... 110 ..... 1010111 @r_vm
65
+vghsh_vv 101100 1 ..... ..... 010 ..... 1110111 @r_vm_1
50
+vslidedown_vx 001111 . ..... ..... 100 ..... 1010111 @r_vm
66
+vgmul_vv 101000 1 ..... 10001 010 ..... 1110111 @r2_vm_1
51
+vslidedown_vi 001111 . ..... ..... 011 ..... 1010111 @r_vm
67
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
52
+vslide1down_vx 001111 . ..... ..... 110 ..... 1010111 @r_vm
68
index XXXXXXX..XXXXXXX 100644
53
69
--- a/target/riscv/cpu.c
54
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
70
+++ b/target/riscv/cpu.c
55
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
71
@@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = {
56
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
72
ISA_EXT_DATA_ENTRY(zvfbfwma, PRIV_VERSION_1_12_0, ext_zvfbfwma),
57
index XXXXXXX..XXXXXXX 100644
73
ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh),
58
--- a/target/riscv/insn_trans/trans_rvv.inc.c
74
ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin),
59
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
75
+ ISA_EXT_DATA_ENTRY(zvkg, PRIV_VERSION_1_12_0, ext_zvkg),
60
@@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a)
76
ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned),
61
}
77
ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha),
62
return false;
78
ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb),
79
@@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
80
* In principle Zve*x would also suffice here, were they supported
81
* in qemu
82
*/
83
- if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha ||
84
- cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) {
85
+ if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkg || cpu->cfg.ext_zvkned ||
86
+ cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) {
87
error_setg(errp,
88
"Vector crypto extensions require V or Zve* extensions");
89
return;
90
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = {
91
/* Vector cryptography extensions */
92
DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false),
93
DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false),
94
+ DEFINE_PROP_BOOL("x-zvkg", RISCVCPU, cfg.ext_zvkg, false),
95
DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false),
96
DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false),
97
DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false),
98
diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/target/riscv/vcrypto_helper.c
101
+++ b/target/riscv/vcrypto_helper.c
102
@@ -XXX,XX +XXX,XX @@ void HELPER(vsm3c_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm,
103
vext_set_elems_1s(vd_vptr, vta, env->vl * esz, total_elems * esz);
104
env->vstart = 0;
63
}
105
}
64
+
106
+
65
+/* Vector Slide Instructions */
107
+void HELPER(vghsh_vv)(void *vd_vptr, void *vs1_vptr, void *vs2_vptr,
66
+static bool slideup_check(DisasContext *s, arg_rmrr *a)
108
+ CPURISCVState *env, uint32_t desc)
67
+{
109
+{
68
+ return (vext_check_isa_ill(s) &&
110
+ uint64_t *vd = vd_vptr;
69
+ vext_check_overlap_mask(s, a->rd, a->vm, true) &&
111
+ uint64_t *vs1 = vs1_vptr;
70
+ vext_check_reg(s, a->rd, false) &&
112
+ uint64_t *vs2 = vs2_vptr;
71
+ vext_check_reg(s, a->rs2, false) &&
113
+ uint32_t vta = vext_vta(desc);
72
+ (a->rd != a->rs2));
114
+ uint32_t total_elems = vext_get_total_elems(env, desc, 4);
73
+}
115
+
74
+
116
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
75
+GEN_OPIVX_TRANS(vslideup_vx, slideup_check)
117
+ uint64_t Y[2] = {vd[i * 2 + 0], vd[i * 2 + 1]};
76
+GEN_OPIVX_TRANS(vslide1up_vx, slideup_check)
118
+ uint64_t H[2] = {brev8(vs2[i * 2 + 0]), brev8(vs2[i * 2 + 1])};
77
+GEN_OPIVI_TRANS(vslideup_vi, 1, vslideup_vx, slideup_check)
119
+ uint64_t X[2] = {vs1[i * 2 + 0], vs1[i * 2 + 1]};
78
+
120
+ uint64_t Z[2] = {0, 0};
79
+GEN_OPIVX_TRANS(vslidedown_vx, opivx_check)
121
+
80
+GEN_OPIVX_TRANS(vslide1down_vx, opivx_check)
122
+ uint64_t S[2] = {brev8(Y[0] ^ X[0]), brev8(Y[1] ^ X[1])};
81
+GEN_OPIVI_TRANS(vslidedown_vi, 1, vslidedown_vx, opivx_check)
123
+
82
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
124
+ for (int j = 0; j < 128; j++) {
83
index XXXXXXX..XXXXXXX 100644
125
+ if ((S[j / 64] >> (j % 64)) & 1) {
84
--- a/target/riscv/vector_helper.c
126
+ Z[0] ^= H[0];
85
+++ b/target/riscv/vector_helper.c
127
+ Z[1] ^= H[1];
86
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VID_V(vid_v_b, uint8_t, H1, clearb)
128
+ }
87
GEN_VEXT_VID_V(vid_v_h, uint16_t, H2, clearh)
129
+ bool reduce = ((H[1] >> 63) & 1);
88
GEN_VEXT_VID_V(vid_v_w, uint32_t, H4, clearl)
130
+ H[1] = H[1] << 1 | H[0] >> 63;
89
GEN_VEXT_VID_V(vid_v_d, uint64_t, H8, clearq)
131
+ H[0] = H[0] << 1;
132
+ if (reduce) {
133
+ H[0] ^= 0x87;
134
+ }
135
+ }
136
+
137
+ vd[i * 2 + 0] = brev8(Z[0]);
138
+ vd[i * 2 + 1] = brev8(Z[1]);
139
+ }
140
+ /* set tail elements to 1s */
141
+ vext_set_elems_1s(vd, vta, env->vl * 4, total_elems * 4);
142
+ env->vstart = 0;
143
+}
144
+
145
+void HELPER(vgmul_vv)(void *vd_vptr, void *vs2_vptr, CPURISCVState *env,
146
+ uint32_t desc)
147
+{
148
+ uint64_t *vd = vd_vptr;
149
+ uint64_t *vs2 = vs2_vptr;
150
+ uint32_t vta = vext_vta(desc);
151
+ uint32_t total_elems = vext_get_total_elems(env, desc, 4);
152
+
153
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
154
+ uint64_t Y[2] = {brev8(vd[i * 2 + 0]), brev8(vd[i * 2 + 1])};
155
+ uint64_t H[2] = {brev8(vs2[i * 2 + 0]), brev8(vs2[i * 2 + 1])};
156
+ uint64_t Z[2] = {0, 0};
157
+
158
+ for (int j = 0; j < 128; j++) {
159
+ if ((Y[j / 64] >> (j % 64)) & 1) {
160
+ Z[0] ^= H[0];
161
+ Z[1] ^= H[1];
162
+ }
163
+ bool reduce = ((H[1] >> 63) & 1);
164
+ H[1] = H[1] << 1 | H[0] >> 63;
165
+ H[0] = H[0] << 1;
166
+ if (reduce) {
167
+ H[0] ^= 0x87;
168
+ }
169
+ }
170
+
171
+ vd[i * 2 + 0] = brev8(Z[0]);
172
+ vd[i * 2 + 1] = brev8(Z[1]);
173
+ }
174
+ /* set tail elements to 1s */
175
+ vext_set_elems_1s(vd, vta, env->vl * 4, total_elems * 4);
176
+ env->vstart = 0;
177
+}
178
diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc
179
index XXXXXXX..XXXXXXX 100644
180
--- a/target/riscv/insn_trans/trans_rvvk.c.inc
181
+++ b/target/riscv/insn_trans/trans_rvvk.c.inc
182
@@ -XXX,XX +XXX,XX @@ static inline bool vsm3c_check(DisasContext *s, arg_rmrr *a)
183
184
GEN_VV_UNMASKED_TRANS(vsm3me_vv, vsm3me_check, ZVKSH_EGS)
185
GEN_VI_UNMASKED_TRANS(vsm3c_vi, vsm3c_check, ZVKSH_EGS)
90
+
186
+
91
+/*
187
+/*
92
+ *** Vector Permutation Instructions
188
+ * Zvkg
93
+ */
189
+ */
94
+
190
+
95
+/* Vector Slide Instructions */
191
+#define ZVKG_EGS 4
96
+#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H, CLEAR_FN) \
192
+
97
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
193
+static bool vgmul_check(DisasContext *s, arg_rmr *a)
98
+ CPURISCVState *env, uint32_t desc) \
194
+{
99
+{ \
195
+ int egw_bytes = ZVKG_EGS << s->sew;
100
+ uint32_t mlen = vext_mlen(desc); \
196
+ return s->cfg_ptr->ext_zvkg == true &&
101
+ uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \
197
+ vext_check_isa_ill(s) &&
102
+ uint32_t vm = vext_vm(desc); \
198
+ require_rvv(s) &&
103
+ uint32_t vl = env->vl; \
199
+ MAXSZ(s) >= egw_bytes &&
104
+ target_ulong offset = s1, i; \
200
+ vext_check_ss(s, a->rd, a->rs2, a->vm) &&
105
+ \
201
+ s->sew == MO_32;
106
+ for (i = offset; i < vl; i++) { \
202
+}
107
+ if (!vm && !vext_elem_mask(v0, mlen, i)) { \
203
+
108
+ continue; \
204
+GEN_V_UNMASKED_TRANS(vgmul_vv, vgmul_check, ZVKG_EGS)
109
+ } \
205
+
110
+ *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
206
+static bool vghsh_check(DisasContext *s, arg_rmrr *a)
111
+ } \
207
+{
112
+ CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \
208
+ int egw_bytes = ZVKG_EGS << s->sew;
113
+}
209
+ return s->cfg_ptr->ext_zvkg == true &&
114
+
210
+ opivv_check(s, a) &&
115
+/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
211
+ MAXSZ(s) >= egw_bytes &&
116
+GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1, clearb)
212
+ s->sew == MO_32;
117
+GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2, clearh)
213
+}
118
+GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4, clearl)
214
+
119
+GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8, clearq)
215
+GEN_VV_UNMASKED_TRANS(vghsh_vv, vghsh_check, ZVKG_EGS)
120
+
121
+#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H, CLEAR_FN) \
122
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
123
+ CPURISCVState *env, uint32_t desc) \
124
+{ \
125
+ uint32_t mlen = vext_mlen(desc); \
126
+ uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \
127
+ uint32_t vm = vext_vm(desc); \
128
+ uint32_t vl = env->vl; \
129
+ target_ulong offset = s1, i; \
130
+ \
131
+ for (i = 0; i < vl; ++i) { \
132
+ target_ulong j = i + offset; \
133
+ if (!vm && !vext_elem_mask(v0, mlen, i)) { \
134
+ continue; \
135
+ } \
136
+ *((ETYPE *)vd + H(i)) = j >= vlmax ? 0 : *((ETYPE *)vs2 + H(j)); \
137
+ } \
138
+ CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \
139
+}
140
+
141
+/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
142
+GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1, clearb)
143
+GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2, clearh)
144
+GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4, clearl)
145
+GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8, clearq)
146
+
147
+#define GEN_VEXT_VSLIDE1UP_VX(NAME, ETYPE, H, CLEAR_FN) \
148
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
149
+ CPURISCVState *env, uint32_t desc) \
150
+{ \
151
+ uint32_t mlen = vext_mlen(desc); \
152
+ uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \
153
+ uint32_t vm = vext_vm(desc); \
154
+ uint32_t vl = env->vl; \
155
+ uint32_t i; \
156
+ \
157
+ for (i = 0; i < vl; i++) { \
158
+ if (!vm && !vext_elem_mask(v0, mlen, i)) { \
159
+ continue; \
160
+ } \
161
+ if (i == 0) { \
162
+ *((ETYPE *)vd + H(i)) = s1; \
163
+ } else { \
164
+ *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
165
+ } \
166
+ } \
167
+ CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \
168
+}
169
+
170
+/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
171
+GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, uint8_t, H1, clearb)
172
+GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, uint16_t, H2, clearh)
173
+GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, uint32_t, H4, clearl)
174
+GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, uint64_t, H8, clearq)
175
+
176
+#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ETYPE, H, CLEAR_FN) \
177
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
178
+ CPURISCVState *env, uint32_t desc) \
179
+{ \
180
+ uint32_t mlen = vext_mlen(desc); \
181
+ uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \
182
+ uint32_t vm = vext_vm(desc); \
183
+ uint32_t vl = env->vl; \
184
+ uint32_t i; \
185
+ \
186
+ for (i = 0; i < vl; i++) { \
187
+ if (!vm && !vext_elem_mask(v0, mlen, i)) { \
188
+ continue; \
189
+ } \
190
+ if (i == vl - 1) { \
191
+ *((ETYPE *)vd + H(i)) = s1; \
192
+ } else { \
193
+ *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
194
+ } \
195
+ } \
196
+ CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \
197
+}
198
+
199
+/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
200
+GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t, H1, clearb)
201
+GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2, clearh)
202
+GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4, clearl)
203
+GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8, clearq)
204
--
216
--
205
2.27.0
217
2.41.0
206
207
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Max Chou <max.chou@sifive.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
Allows sharing of sm4_subword between different targets.
4
5
Signed-off-by: Max Chou <max.chou@sifive.com>
6
Reviewed-by: Frank Chang <frank.chang@sifive.com>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Signed-off-by: Max Chou <max.chou@sifive.com>
6
Message-id: 20200623215920.2594-48-zhiwei_liu@c-sky.com
9
Message-ID: <20230711165917.2629866-14-max.chou@sifive.com>
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
10
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
11
---
9
target/riscv/helper.h | 10 +++++++
12
include/crypto/sm4.h | 8 ++++++++
10
target/riscv/insn32.decode | 4 +++
13
target/arm/tcg/crypto_helper.c | 10 ++--------
11
target/riscv/insn_trans/trans_rvv.inc.c | 5 ++++
14
2 files changed, 10 insertions(+), 8 deletions(-)
12
target/riscv/vector_helper.c | 39 +++++++++++++++++++++++++
13
4 files changed, 58 insertions(+)
14
15
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h
16
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
18
--- a/include/crypto/sm4.h
18
+++ b/target/riscv/helper.h
19
+++ b/include/crypto/sm4.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vwredsumu_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
20
@@ -XXX,XX +XXX,XX @@
20
DEF_HELPER_6(vwredsum_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
21
21
DEF_HELPER_6(vwredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
22
extern const uint8_t sm4_sbox[256];
22
DEF_HELPER_6(vwredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
23
23
+
24
+static inline uint32_t sm4_subword(uint32_t word)
24
+DEF_HELPER_6(vfredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
25
+{
25
+DEF_HELPER_6(vfredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
26
+ return sm4_sbox[word & 0xff] |
26
+DEF_HELPER_6(vfredsum_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
27
+ sm4_sbox[(word >> 8) & 0xff] << 8 |
27
+DEF_HELPER_6(vfredmax_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
28
+ sm4_sbox[(word >> 16) & 0xff] << 16 |
28
+DEF_HELPER_6(vfredmax_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
29
+ sm4_sbox[(word >> 24) & 0xff] << 24;
29
+DEF_HELPER_6(vfredmax_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vfredmin_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_6(vfredmin_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_6(vfredmin_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
33
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
34
index XXXXXXX..XXXXXXX 100644
35
--- a/target/riscv/insn32.decode
36
+++ b/target/riscv/insn32.decode
37
@@ -XXX,XX +XXX,XX @@ vredmaxu_vs 000110 . ..... ..... 010 ..... 1010111 @r_vm
38
vredmax_vs 000111 . ..... ..... 010 ..... 1010111 @r_vm
39
vwredsumu_vs 110000 . ..... ..... 000 ..... 1010111 @r_vm
40
vwredsum_vs 110001 . ..... ..... 000 ..... 1010111 @r_vm
41
+# Vector ordered and unordered reduction sum
42
+vfredsum_vs 0000-1 . ..... ..... 001 ..... 1010111 @r_vm
43
+vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm
44
+vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm
45
46
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
47
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
48
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/riscv/insn_trans/trans_rvv.inc.c
51
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
52
@@ -XXX,XX +XXX,XX @@ GEN_OPIVV_TRANS(vredxor_vs, reduction_check)
53
/* Vector Widening Integer Reduction Instructions */
54
GEN_OPIVV_WIDEN_TRANS(vwredsum_vs, reduction_check)
55
GEN_OPIVV_WIDEN_TRANS(vwredsumu_vs, reduction_check)
56
+
57
+/* Vector Single-Width Floating-Point Reduction Instructions */
58
+GEN_OPFVV_TRANS(vfredsum_vs, reduction_check)
59
+GEN_OPFVV_TRANS(vfredmax_vs, reduction_check)
60
+GEN_OPFVV_TRANS(vfredmin_vs, reduction_check)
61
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
62
index XXXXXXX..XXXXXXX 100644
63
--- a/target/riscv/vector_helper.c
64
+++ b/target/riscv/vector_helper.c
65
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD, clearq)
66
GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD, clearh)
67
GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD, clearl)
68
GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD, clearq)
69
+
70
+/* Vector Single-Width Floating-Point Reduction Instructions */
71
+#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\
72
+void HELPER(NAME)(void *vd, void *v0, void *vs1, \
73
+ void *vs2, CPURISCVState *env, \
74
+ uint32_t desc) \
75
+{ \
76
+ uint32_t mlen = vext_mlen(desc); \
77
+ uint32_t vm = vext_vm(desc); \
78
+ uint32_t vl = env->vl; \
79
+ uint32_t i; \
80
+ uint32_t tot = env_archcpu(env)->cfg.vlen / 8; \
81
+ TD s1 = *((TD *)vs1 + HD(0)); \
82
+ \
83
+ for (i = 0; i < vl; i++) { \
84
+ TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
85
+ if (!vm && !vext_elem_mask(v0, mlen, i)) { \
86
+ continue; \
87
+ } \
88
+ s1 = OP(s1, (TD)s2, &env->fp_status); \
89
+ } \
90
+ *((TD *)vd + HD(0)) = s1; \
91
+ CLEAR_FN(vd, 1, sizeof(TD), tot); \
92
+}
30
+}
93
+
31
+
94
+/* Unordered sum */
32
#endif
95
+GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add, clearh)
33
diff --git a/target/arm/tcg/crypto_helper.c b/target/arm/tcg/crypto_helper.c
96
+GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add, clearl)
34
index XXXXXXX..XXXXXXX 100644
97
+GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add, clearq)
35
--- a/target/arm/tcg/crypto_helper.c
98
+
36
+++ b/target/arm/tcg/crypto_helper.c
99
+/* Maximum value */
37
@@ -XXX,XX +XXX,XX @@ static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
100
+GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum, clearh)
38
CR_ST_WORD(d, (i + 3) % 4) ^
101
+GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum, clearl)
39
CR_ST_WORD(n, i);
102
+GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum, clearq)
40
103
+
41
- t = sm4_sbox[t & 0xff] |
104
+/* Minimum value */
42
- sm4_sbox[(t >> 8) & 0xff] << 8 |
105
+GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum, clearh)
43
- sm4_sbox[(t >> 16) & 0xff] << 16 |
106
+GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum, clearl)
44
- sm4_sbox[(t >> 24) & 0xff] << 24;
107
+GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum, clearq)
45
+ t = sm4_subword(t);
46
47
CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
48
rol32(t, 24);
49
@@ -XXX,XX +XXX,XX @@ static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
50
CR_ST_WORD(d, (i + 3) % 4) ^
51
CR_ST_WORD(m, i);
52
53
- t = sm4_sbox[t & 0xff] |
54
- sm4_sbox[(t >> 8) & 0xff] << 8 |
55
- sm4_sbox[(t >> 16) & 0xff] << 16 |
56
- sm4_sbox[(t >> 24) & 0xff] << 24;
57
+ t = sm4_subword(t);
58
59
CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
60
}
108
--
61
--
109
2.27.0
62
2.41.0
110
111
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Max Chou <max.chou@sifive.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
Adds sm4_ck constant for use in sm4 cryptography across different targets.
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
Signed-off-by: Max Chou <max.chou@sifive.com>
6
Message-id: 20200623215920.2594-47-zhiwei_liu@c-sky.com
6
Reviewed-by: Frank Chang <frank.chang@sifive.com>
7
Signed-off-by: Max Chou <max.chou@sifive.com>
8
Message-ID: <20230711165917.2629866-15-max.chou@sifive.com>
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
9
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
10
---
9
target/riscv/helper.h | 7 +++++++
11
include/crypto/sm4.h | 1 +
10
target/riscv/insn32.decode | 2 ++
12
crypto/sm4.c | 10 ++++++++++
11
target/riscv/insn_trans/trans_rvv.inc.c | 4 ++++
13
2 files changed, 11 insertions(+)
12
target/riscv/vector_helper.c | 11 +++++++++++
13
4 files changed, 24 insertions(+)
14
14
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
15
diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h
16
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
17
--- a/include/crypto/sm4.h
18
+++ b/target/riscv/helper.h
18
+++ b/include/crypto/sm4.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vredxor_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
19
@@ -XXX,XX +XXX,XX @@
20
DEF_HELPER_6(vredxor_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
20
#define QEMU_SM4_H
21
DEF_HELPER_6(vredxor_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
21
22
DEF_HELPER_6(vredxor_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
22
extern const uint8_t sm4_sbox[256];
23
+
23
+extern const uint32_t sm4_ck[32];
24
+DEF_HELPER_6(vwredsumu_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
24
25
+DEF_HELPER_6(vwredsumu_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
25
static inline uint32_t sm4_subword(uint32_t word)
26
+DEF_HELPER_6(vwredsumu_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
26
{
27
+DEF_HELPER_6(vwredsum_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
27
diff --git a/crypto/sm4.c b/crypto/sm4.c
28
+DEF_HELPER_6(vwredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vwredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
30
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
31
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
32
--- a/target/riscv/insn32.decode
29
--- a/crypto/sm4.c
33
+++ b/target/riscv/insn32.decode
30
+++ b/crypto/sm4.c
34
@@ -XXX,XX +XXX,XX @@ vredminu_vs 000100 . ..... ..... 010 ..... 1010111 @r_vm
31
@@ -XXX,XX +XXX,XX @@ uint8_t const sm4_sbox[] = {
35
vredmin_vs 000101 . ..... ..... 010 ..... 1010111 @r_vm
32
0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
36
vredmaxu_vs 000110 . ..... ..... 010 ..... 1010111 @r_vm
33
};
37
vredmax_vs 000111 . ..... ..... 010 ..... 1010111 @r_vm
34
38
+vwredsumu_vs 110000 . ..... ..... 000 ..... 1010111 @r_vm
35
+uint32_t const sm4_ck[] = {
39
+vwredsum_vs 110001 . ..... ..... 000 ..... 1010111 @r_vm
36
+ 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
40
37
+ 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
41
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
38
+ 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
42
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
39
+ 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
43
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
40
+ 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
44
index XXXXXXX..XXXXXXX 100644
41
+ 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
45
--- a/target/riscv/insn_trans/trans_rvv.inc.c
42
+ 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
46
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
43
+ 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
47
@@ -XXX,XX +XXX,XX @@ GEN_OPIVV_TRANS(vredmin_vs, reduction_check)
44
+};
48
GEN_OPIVV_TRANS(vredand_vs, reduction_check)
49
GEN_OPIVV_TRANS(vredor_vs, reduction_check)
50
GEN_OPIVV_TRANS(vredxor_vs, reduction_check)
51
+
52
+/* Vector Widening Integer Reduction Instructions */
53
+GEN_OPIVV_WIDEN_TRANS(vwredsum_vs, reduction_check)
54
+GEN_OPIVV_WIDEN_TRANS(vwredsumu_vs, reduction_check)
55
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/riscv/vector_helper.c
58
+++ b/target/riscv/vector_helper.c
59
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR, clearb)
60
GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR, clearh)
61
GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR, clearl)
62
GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR, clearq)
63
+
64
+/* Vector Widening Integer Reduction Instructions */
65
+/* signed sum reduction into double-width accumulator */
66
+GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD, clearh)
67
+GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD, clearl)
68
+GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD, clearq)
69
+
70
+/* Unsigned sum reduction into double-width accumulator */
71
+GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD, clearh)
72
+GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD, clearl)
73
+GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD, clearq)
74
--
45
--
75
2.27.0
46
2.41.0
76
77
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Max Chou <max.chou@sifive.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
This commit adds support for the Zvksed vector-crypto extension, which
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
consists of the following instructions:
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
6
Message-id: 20200623215920.2594-31-zhiwei_liu@c-sky.com
6
* vsm4k.vi
7
* vsm4r.[vv,vs]
8
9
Translation functions are defined in
10
`target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in
11
`target/riscv/vcrypto_helper.c`.
12
13
Signed-off-by: Max Chou <max.chou@sifive.com>
14
Reviewed-by: Frank Chang <frank.chang@sifive.com>
15
[lawrence.hunter@codethink.co.uk: Moved SM4 functions from
16
crypto_helper.c to vcrypto_helper.c]
17
[nazar.kazakov@codethink.co.uk: Added alignment checks, refactored code to
18
use macros, and minor style changes]
19
Signed-off-by: Max Chou <max.chou@sifive.com>
20
Message-ID: <20230711165917.2629866-16-max.chou@sifive.com>
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
21
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
22
---
9
target/riscv/helper.h | 16 ++++
23
target/riscv/cpu_cfg.h | 1 +
10
target/riscv/insn32.decode | 5 +
24
target/riscv/helper.h | 4 +
11
target/riscv/insn_trans/trans_rvv.inc.c | 118 ++++++++++++++++++++++++
25
target/riscv/insn32.decode | 5 +
12
target/riscv/vector_helper.c | 111 ++++++++++++++++++++++
26
target/riscv/cpu.c | 5 +-
13
4 files changed, 250 insertions(+)
27
target/riscv/vcrypto_helper.c | 127 +++++++++++++++++++++++
14
28
target/riscv/insn_trans/trans_rvvk.c.inc | 43 ++++++++
29
6 files changed, 184 insertions(+), 1 deletion(-)
30
31
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
32
index XXXXXXX..XXXXXXX 100644
33
--- a/target/riscv/cpu_cfg.h
34
+++ b/target/riscv/cpu_cfg.h
35
@@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig {
36
bool ext_zvkned;
37
bool ext_zvknha;
38
bool ext_zvknhb;
39
+ bool ext_zvksed;
40
bool ext_zvksh;
41
bool ext_zmmul;
42
bool ext_zvfbfmin;
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
43
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
index XXXXXXX..XXXXXXX 100644
44
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
45
--- a/target/riscv/helper.h
18
+++ b/target/riscv/helper.h
46
+++ b/target/riscv/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vnclipu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
47
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vsm3c_vi, void, ptr, ptr, i32, env, i32)
20
DEF_HELPER_6(vnclip_vx_b, void, ptr, ptr, tl, ptr, env, i32)
48
21
DEF_HELPER_6(vnclip_vx_h, void, ptr, ptr, tl, ptr, env, i32)
49
DEF_HELPER_5(vghsh_vv, void, ptr, ptr, ptr, env, i32)
22
DEF_HELPER_6(vnclip_vx_w, void, ptr, ptr, tl, ptr, env, i32)
50
DEF_HELPER_4(vgmul_vv, void, ptr, ptr, env, i32)
23
+
51
+
24
+DEF_HELPER_6(vfadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
52
+DEF_HELPER_5(vsm4k_vi, void, ptr, ptr, i32, env, i32)
25
+DEF_HELPER_6(vfadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
53
+DEF_HELPER_4(vsm4r_vv, void, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vfadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
54
+DEF_HELPER_4(vsm4r_vs, void, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vfsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vfsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vfsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vfadd_vf_h, void, ptr, ptr, i64, ptr, env, i32)
31
+DEF_HELPER_6(vfadd_vf_w, void, ptr, ptr, i64, ptr, env, i32)
32
+DEF_HELPER_6(vfadd_vf_d, void, ptr, ptr, i64, ptr, env, i32)
33
+DEF_HELPER_6(vfsub_vf_h, void, ptr, ptr, i64, ptr, env, i32)
34
+DEF_HELPER_6(vfsub_vf_w, void, ptr, ptr, i64, ptr, env, i32)
35
+DEF_HELPER_6(vfsub_vf_d, void, ptr, ptr, i64, ptr, env, i32)
36
+DEF_HELPER_6(vfrsub_vf_h, void, ptr, ptr, i64, ptr, env, i32)
37
+DEF_HELPER_6(vfrsub_vf_w, void, ptr, ptr, i64, ptr, env, i32)
38
+DEF_HELPER_6(vfrsub_vf_d, void, ptr, ptr, i64, ptr, env, i32)
39
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
55
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
40
index XXXXXXX..XXXXXXX 100644
56
index XXXXXXX..XXXXXXX 100644
41
--- a/target/riscv/insn32.decode
57
--- a/target/riscv/insn32.decode
42
+++ b/target/riscv/insn32.decode
58
+++ b/target/riscv/insn32.decode
43
@@ -XXX,XX +XXX,XX @@ vnclipu_vi 101110 . ..... ..... 011 ..... 1010111 @r_vm
59
@@ -XXX,XX +XXX,XX @@ vsm3c_vi 101011 1 ..... ..... 010 ..... 1110111 @r_vm_1
44
vnclip_vv 101111 . ..... ..... 000 ..... 1010111 @r_vm
60
# *** Zvkg vector crypto extension ***
45
vnclip_vx 101111 . ..... ..... 100 ..... 1010111 @r_vm
61
vghsh_vv 101100 1 ..... ..... 010 ..... 1110111 @r_vm_1
46
vnclip_vi 101111 . ..... ..... 011 ..... 1010111 @r_vm
62
vgmul_vv 101000 1 ..... 10001 010 ..... 1110111 @r2_vm_1
47
+vfadd_vv 000000 . ..... ..... 001 ..... 1010111 @r_vm
63
+
48
+vfadd_vf 000000 . ..... ..... 101 ..... 1010111 @r_vm
64
+# *** Zvksed vector crypto extension ***
49
+vfsub_vv 000010 . ..... ..... 001 ..... 1010111 @r_vm
65
+vsm4k_vi 100001 1 ..... ..... 010 ..... 1110111 @r_vm_1
50
+vfsub_vf 000010 . ..... ..... 101 ..... 1010111 @r_vm
66
+vsm4r_vv 101000 1 ..... 10000 010 ..... 1110111 @r2_vm_1
51
+vfrsub_vf 100111 . ..... ..... 101 ..... 1010111 @r_vm
67
+vsm4r_vs 101001 1 ..... 10000 010 ..... 1110111 @r2_vm_1
52
68
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
53
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
69
index XXXXXXX..XXXXXXX 100644
54
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
70
--- a/target/riscv/cpu.c
55
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
71
+++ b/target/riscv/cpu.c
56
index XXXXXXX..XXXXXXX 100644
72
@@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = {
57
--- a/target/riscv/insn_trans/trans_rvv.inc.c
73
ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned),
58
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
74
ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha),
59
@@ -XXX,XX +XXX,XX @@ GEN_OPIVX_NARROW_TRANS(vnclipu_vx)
75
ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb),
60
GEN_OPIVX_NARROW_TRANS(vnclip_vx)
76
+ ISA_EXT_DATA_ENTRY(zvksed, PRIV_VERSION_1_12_0, ext_zvksed),
61
GEN_OPIVI_NARROW_TRANS(vnclipu_vi, 1, vnclipu_vx)
77
ISA_EXT_DATA_ENTRY(zvksh, PRIV_VERSION_1_12_0, ext_zvksh),
62
GEN_OPIVI_NARROW_TRANS(vnclip_vi, 1, vnclip_vx)
78
ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx),
63
+
79
ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin),
64
+/*
80
@@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
65
+ *** Vector Float Point Arithmetic Instructions
81
* in qemu
66
+ */
82
*/
67
+/* Vector Single-Width Floating-Point Add/Subtract Instructions */
83
if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkg || cpu->cfg.ext_zvkned ||
68
+
84
- cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) {
69
+/*
85
+ cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksed || cpu->cfg.ext_zvksh) &&
70
+ * If the current SEW does not correspond to a supported IEEE floating-point
86
+ !cpu->cfg.ext_zve32f) {
71
+ * type, an illegal instruction exception is raised.
87
error_setg(errp,
72
+ */
88
"Vector crypto extensions require V or Zve* extensions");
73
+static bool opfvv_check(DisasContext *s, arg_rmrr *a)
89
return;
74
+{
90
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = {
75
+ return (vext_check_isa_ill(s) &&
91
DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false),
76
+ vext_check_overlap_mask(s, a->rd, a->vm, false) &&
92
DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false),
77
+ vext_check_reg(s, a->rd, false) &&
93
DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false),
78
+ vext_check_reg(s, a->rs2, false) &&
94
+ DEFINE_PROP_BOOL("x-zvksed", RISCVCPU, cfg.ext_zvksed, false),
79
+ vext_check_reg(s, a->rs1, false) &&
95
DEFINE_PROP_BOOL("x-zvksh", RISCVCPU, cfg.ext_zvksh, false),
80
+ (s->sew != 0));
96
81
+}
97
DEFINE_PROP_END_OF_LIST(),
82
+
98
diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c
83
+/* OPFVV without GVEC IR */
99
index XXXXXXX..XXXXXXX 100644
84
+#define GEN_OPFVV_TRANS(NAME, CHECK) \
100
--- a/target/riscv/vcrypto_helper.c
85
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
101
+++ b/target/riscv/vcrypto_helper.c
86
+{ \
87
+ if (CHECK(s, a)) { \
88
+ uint32_t data = 0; \
89
+ static gen_helper_gvec_4_ptr * const fns[3] = { \
90
+ gen_helper_##NAME##_h, \
91
+ gen_helper_##NAME##_w, \
92
+ gen_helper_##NAME##_d, \
93
+ }; \
94
+ TCGLabel *over = gen_new_label(); \
95
+ gen_set_rm(s, 7); \
96
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
97
+ \
98
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
99
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
100
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
101
+ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
102
+ vreg_ofs(s, a->rs1), \
103
+ vreg_ofs(s, a->rs2), cpu_env, 0, \
104
+ s->vlen / 8, data, fns[s->sew - 1]); \
105
+ gen_set_label(over); \
106
+ return true; \
107
+ } \
108
+ return false; \
109
+}
110
+GEN_OPFVV_TRANS(vfadd_vv, opfvv_check)
111
+GEN_OPFVV_TRANS(vfsub_vv, opfvv_check)
112
+
113
+typedef void gen_helper_opfvf(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_ptr,
114
+ TCGv_env, TCGv_i32);
115
+
116
+static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
117
+ uint32_t data, gen_helper_opfvf *fn, DisasContext *s)
118
+{
119
+ TCGv_ptr dest, src2, mask;
120
+ TCGv_i32 desc;
121
+
122
+ TCGLabel *over = gen_new_label();
123
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
124
+
125
+ dest = tcg_temp_new_ptr();
126
+ mask = tcg_temp_new_ptr();
127
+ src2 = tcg_temp_new_ptr();
128
+ desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
129
+
130
+ tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
131
+ tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2));
132
+ tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
133
+
134
+ fn(dest, mask, cpu_fpr[rs1], src2, cpu_env, desc);
135
+
136
+ tcg_temp_free_ptr(dest);
137
+ tcg_temp_free_ptr(mask);
138
+ tcg_temp_free_ptr(src2);
139
+ tcg_temp_free_i32(desc);
140
+ gen_set_label(over);
141
+ return true;
142
+}
143
+
144
+static bool opfvf_check(DisasContext *s, arg_rmrr *a)
145
+{
146
+/*
147
+ * If the current SEW does not correspond to a supported IEEE floating-point
148
+ * type, an illegal instruction exception is raised
149
+ */
150
+ return (vext_check_isa_ill(s) &&
151
+ vext_check_overlap_mask(s, a->rd, a->vm, false) &&
152
+ vext_check_reg(s, a->rd, false) &&
153
+ vext_check_reg(s, a->rs2, false) &&
154
+ (s->sew != 0));
155
+}
156
+
157
+/* OPFVF without GVEC IR */
158
+#define GEN_OPFVF_TRANS(NAME, CHECK) \
159
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
160
+{ \
161
+ if (CHECK(s, a)) { \
162
+ uint32_t data = 0; \
163
+ static gen_helper_opfvf *const fns[3] = { \
164
+ gen_helper_##NAME##_h, \
165
+ gen_helper_##NAME##_w, \
166
+ gen_helper_##NAME##_d, \
167
+ }; \
168
+ gen_set_rm(s, 7); \
169
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
170
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
171
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
172
+ return opfvf_trans(a->rd, a->rs1, a->rs2, data, \
173
+ fns[s->sew - 1], s); \
174
+ } \
175
+ return false; \
176
+}
177
+
178
+GEN_OPFVF_TRANS(vfadd_vf, opfvf_check)
179
+GEN_OPFVF_TRANS(vfsub_vf, opfvf_check)
180
+GEN_OPFVF_TRANS(vfrsub_vf, opfvf_check)
181
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
182
index XXXXXXX..XXXXXXX 100644
183
--- a/target/riscv/vector_helper.c
184
+++ b/target/riscv/vector_helper.c
185
@@ -XXX,XX +XXX,XX @@
102
@@ -XXX,XX +XXX,XX @@
103
#include "cpu.h"
104
#include "crypto/aes.h"
105
#include "crypto/aes-round.h"
106
+#include "crypto/sm4.h"
186
#include "exec/memop.h"
107
#include "exec/memop.h"
187
#include "exec/exec-all.h"
108
#include "exec/exec-all.h"
188
#include "exec/helper-proto.h"
109
#include "exec/helper-proto.h"
189
+#include "fpu/softfloat.h"
110
@@ -XXX,XX +XXX,XX @@ void HELPER(vgmul_vv)(void *vd_vptr, void *vs2_vptr, CPURISCVState *env,
190
#include "tcg/tcg-gvec-desc.h"
111
vext_set_elems_1s(vd, vta, env->vl * 4, total_elems * 4);
191
#include "internals.h"
112
env->vstart = 0;
192
#include <math.h>
113
}
193
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32)
114
+
194
GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1, clearb)
115
+void HELPER(vsm4k_vi)(void *vd, void *vs2, uint32_t uimm5, CPURISCVState *env,
195
GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2, clearh)
116
+ uint32_t desc)
196
GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4, clearl)
117
+{
118
+ const uint32_t egs = 4;
119
+ uint32_t rnd = uimm5 & 0x7;
120
+ uint32_t group_start = env->vstart / egs;
121
+ uint32_t group_end = env->vl / egs;
122
+ uint32_t esz = sizeof(uint32_t);
123
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
124
+
125
+ for (uint32_t i = group_start; i < group_end; ++i) {
126
+ uint32_t vstart = i * egs;
127
+ uint32_t vend = (i + 1) * egs;
128
+ uint32_t rk[4] = {0};
129
+ uint32_t tmp[8] = {0};
130
+
131
+ for (uint32_t j = vstart; j < vend; ++j) {
132
+ rk[j - vstart] = *((uint32_t *)vs2 + H4(j));
133
+ }
134
+
135
+ for (uint32_t j = 0; j < egs; ++j) {
136
+ tmp[j] = rk[j];
137
+ }
138
+
139
+ for (uint32_t j = 0; j < egs; ++j) {
140
+ uint32_t b, s;
141
+ b = tmp[j + 1] ^ tmp[j + 2] ^ tmp[j + 3] ^ sm4_ck[rnd * 4 + j];
142
+
143
+ s = sm4_subword(b);
144
+
145
+ tmp[j + 4] = tmp[j] ^ (s ^ rol32(s, 13) ^ rol32(s, 23));
146
+ }
147
+
148
+ for (uint32_t j = vstart; j < vend; ++j) {
149
+ *((uint32_t *)vd + H4(j)) = tmp[egs + (j - vstart)];
150
+ }
151
+ }
152
+
153
+ env->vstart = 0;
154
+ /* set tail elements to 1s */
155
+ vext_set_elems_1s(vd, vext_vta(desc), env->vl * esz, total_elems * esz);
156
+}
157
+
158
+static void do_sm4_round(uint32_t *rk, uint32_t *buf)
159
+{
160
+ const uint32_t egs = 4;
161
+ uint32_t s, b;
162
+
163
+ for (uint32_t j = egs; j < egs * 2; ++j) {
164
+ b = buf[j - 3] ^ buf[j - 2] ^ buf[j - 1] ^ rk[j - 4];
165
+
166
+ s = sm4_subword(b);
167
+
168
+ buf[j] = buf[j - 4] ^ (s ^ rol32(s, 2) ^ rol32(s, 10) ^ rol32(s, 18) ^
169
+ rol32(s, 24));
170
+ }
171
+}
172
+
173
+void HELPER(vsm4r_vv)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
174
+{
175
+ const uint32_t egs = 4;
176
+ uint32_t group_start = env->vstart / egs;
177
+ uint32_t group_end = env->vl / egs;
178
+ uint32_t esz = sizeof(uint32_t);
179
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
180
+
181
+ for (uint32_t i = group_start; i < group_end; ++i) {
182
+ uint32_t vstart = i * egs;
183
+ uint32_t vend = (i + 1) * egs;
184
+ uint32_t rk[4] = {0};
185
+ uint32_t tmp[8] = {0};
186
+
187
+ for (uint32_t j = vstart; j < vend; ++j) {
188
+ rk[j - vstart] = *((uint32_t *)vs2 + H4(j));
189
+ }
190
+
191
+ for (uint32_t j = vstart; j < vend; ++j) {
192
+ tmp[j - vstart] = *((uint32_t *)vd + H4(j));
193
+ }
194
+
195
+ do_sm4_round(rk, tmp);
196
+
197
+ for (uint32_t j = vstart; j < vend; ++j) {
198
+ *((uint32_t *)vd + H4(j)) = tmp[egs + (j - vstart)];
199
+ }
200
+ }
201
+
202
+ env->vstart = 0;
203
+ /* set tail elements to 1s */
204
+ vext_set_elems_1s(vd, vext_vta(desc), env->vl * esz, total_elems * esz);
205
+}
206
+
207
+void HELPER(vsm4r_vs)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
208
+{
209
+ const uint32_t egs = 4;
210
+ uint32_t group_start = env->vstart / egs;
211
+ uint32_t group_end = env->vl / egs;
212
+ uint32_t esz = sizeof(uint32_t);
213
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
214
+
215
+ for (uint32_t i = group_start; i < group_end; ++i) {
216
+ uint32_t vstart = i * egs;
217
+ uint32_t vend = (i + 1) * egs;
218
+ uint32_t rk[4] = {0};
219
+ uint32_t tmp[8] = {0};
220
+
221
+ for (uint32_t j = 0; j < egs; ++j) {
222
+ rk[j] = *((uint32_t *)vs2 + H4(j));
223
+ }
224
+
225
+ for (uint32_t j = vstart; j < vend; ++j) {
226
+ tmp[j - vstart] = *((uint32_t *)vd + H4(j));
227
+ }
228
+
229
+ do_sm4_round(rk, tmp);
230
+
231
+ for (uint32_t j = vstart; j < vend; ++j) {
232
+ *((uint32_t *)vd + H4(j)) = tmp[egs + (j - vstart)];
233
+ }
234
+ }
235
+
236
+ env->vstart = 0;
237
+ /* set tail elements to 1s */
238
+ vext_set_elems_1s(vd, vext_vta(desc), env->vl * esz, total_elems * esz);
239
+}
240
diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc
241
index XXXXXXX..XXXXXXX 100644
242
--- a/target/riscv/insn_trans/trans_rvvk.c.inc
243
+++ b/target/riscv/insn_trans/trans_rvvk.c.inc
244
@@ -XXX,XX +XXX,XX @@ static bool vghsh_check(DisasContext *s, arg_rmrr *a)
245
}
246
247
GEN_VV_UNMASKED_TRANS(vghsh_vv, vghsh_check, ZVKG_EGS)
197
+
248
+
198
+/*
249
+/*
199
+ *** Vector Float Point Arithmetic Instructions
250
+ * Zvksed
200
+ */
251
+ */
201
+/* Vector Single-Width Floating-Point Add/Subtract Instructions */
252
+
202
+#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
253
+#define ZVKSED_EGS 4
203
+static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
254
+
204
+ CPURISCVState *env) \
255
+static bool zvksed_check(DisasContext *s)
205
+{ \
256
+{
206
+ TX1 s1 = *((T1 *)vs1 + HS1(i)); \
257
+ int egw_bytes = ZVKSED_EGS << s->sew;
207
+ TX2 s2 = *((T2 *)vs2 + HS2(i)); \
258
+ return s->cfg_ptr->ext_zvksed == true &&
208
+ *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
259
+ require_rvv(s) &&
209
+}
260
+ vext_check_isa_ill(s) &&
210
+
261
+ MAXSZ(s) >= egw_bytes &&
211
+#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ, CLEAR_FN) \
262
+ s->sew == MO_32;
212
+void HELPER(NAME)(void *vd, void *v0, void *vs1, \
263
+}
213
+ void *vs2, CPURISCVState *env, \
264
+
214
+ uint32_t desc) \
265
+static bool vsm4k_vi_check(DisasContext *s, arg_rmrr *a)
215
+{ \
266
+{
216
+ uint32_t vlmax = vext_maxsz(desc) / ESZ; \
267
+ return zvksed_check(s) &&
217
+ uint32_t mlen = vext_mlen(desc); \
268
+ require_align(a->rd, s->lmul) &&
218
+ uint32_t vm = vext_vm(desc); \
269
+ require_align(a->rs2, s->lmul);
219
+ uint32_t vl = env->vl; \
270
+}
220
+ uint32_t i; \
271
+
221
+ \
272
+GEN_VI_UNMASKED_TRANS(vsm4k_vi, vsm4k_vi_check, ZVKSED_EGS)
222
+ for (i = 0; i < vl; i++) { \
273
+
223
+ if (!vm && !vext_elem_mask(v0, mlen, i)) { \
274
+static bool vsm4r_vv_check(DisasContext *s, arg_rmr *a)
224
+ continue; \
275
+{
225
+ } \
276
+ return zvksed_check(s) &&
226
+ do_##NAME(vd, vs1, vs2, i, env); \
277
+ require_align(a->rd, s->lmul) &&
227
+ } \
278
+ require_align(a->rs2, s->lmul);
228
+ CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \
279
+}
229
+}
280
+
230
+
281
+GEN_V_UNMASKED_TRANS(vsm4r_vv, vsm4r_vv_check, ZVKSED_EGS)
231
+RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
282
+
232
+RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
283
+static bool vsm4r_vs_check(DisasContext *s, arg_rmr *a)
233
+RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
284
+{
234
+GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2, clearh)
285
+ return zvksed_check(s) &&
235
+GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4, clearl)
286
+ !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), a->rs2, 1) &&
236
+GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8, clearq)
287
+ require_align(a->rd, s->lmul);
237
+
288
+}
238
+#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
289
+
239
+static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
290
+GEN_V_UNMASKED_TRANS(vsm4r_vs, vsm4r_vs_check, ZVKSED_EGS)
240
+ CPURISCVState *env) \
241
+{ \
242
+ TX2 s2 = *((T2 *)vs2 + HS2(i)); \
243
+ *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
244
+}
245
+
246
+#define GEN_VEXT_VF(NAME, ESZ, DSZ, CLEAR_FN) \
247
+void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
248
+ void *vs2, CPURISCVState *env, \
249
+ uint32_t desc) \
250
+{ \
251
+ uint32_t vlmax = vext_maxsz(desc) / ESZ; \
252
+ uint32_t mlen = vext_mlen(desc); \
253
+ uint32_t vm = vext_vm(desc); \
254
+ uint32_t vl = env->vl; \
255
+ uint32_t i; \
256
+ \
257
+ for (i = 0; i < vl; i++) { \
258
+ if (!vm && !vext_elem_mask(v0, mlen, i)) { \
259
+ continue; \
260
+ } \
261
+ do_##NAME(vd, s1, vs2, i, env); \
262
+ } \
263
+ CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \
264
+}
265
+
266
+RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
267
+RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
268
+RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
269
+GEN_VEXT_VF(vfadd_vf_h, 2, 2, clearh)
270
+GEN_VEXT_VF(vfadd_vf_w, 4, 4, clearl)
271
+GEN_VEXT_VF(vfadd_vf_d, 8, 8, clearq)
272
+
273
+RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
274
+RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
275
+RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
276
+GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2, clearh)
277
+GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4, clearl)
278
+GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8, clearq)
279
+RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
280
+RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
281
+RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
282
+GEN_VEXT_VF(vfsub_vf_h, 2, 2, clearh)
283
+GEN_VEXT_VF(vfsub_vf_w, 4, 4, clearl)
284
+GEN_VEXT_VF(vfsub_vf_d, 8, 8, clearq)
285
+
286
+static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
287
+{
288
+ return float16_sub(b, a, s);
289
+}
290
+
291
+static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
292
+{
293
+ return float32_sub(b, a, s);
294
+}
295
+
296
+static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
297
+{
298
+ return float64_sub(b, a, s);
299
+}
300
+
301
+RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
302
+RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
303
+RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
304
+GEN_VEXT_VF(vfrsub_vf_h, 2, 2, clearh)
305
+GEN_VEXT_VF(vfrsub_vf_w, 4, 4, clearl)
306
+GEN_VEXT_VF(vfrsub_vf_d, 8, 8, clearq)
307
--
291
--
308
2.27.0
292
2.41.0
309
310
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Rob Bradford <rbradford@rivosinc.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
These are WARL fields - zero out the bits for unavailable counters and
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
special case the TM bit in mcountinhibit which is hardwired to zero.
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
This patch achieves this by modifying the value written so that any use
6
Message-id: 20200623215920.2594-46-zhiwei_liu@c-sky.com
6
of the field will see the correctly masked bits.
7
8
Tested by modifying OpenSBI to write max value to these CSRs and upon
9
subsequent read the appropriate number of bits for number of PMUs is
10
enabled and the TM bit is zero in mcountinhibit.
11
12
Signed-off-by: Rob Bradford <rbradford@rivosinc.com>
13
Acked-by: Alistair Francis <alistair.francis@wdc.com>
14
Reviewed-by: Atish Patra <atishp@rivosinc.com>
15
Message-ID: <20230802124906.24197-1-rbradford@rivosinc.com>
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
16
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
17
---
9
target/riscv/helper.h | 33 +++++++++++
18
target/riscv/csr.c | 11 +++++++++--
10
target/riscv/insn32.decode | 8 +++
19
1 file changed, 9 insertions(+), 2 deletions(-)
11
target/riscv/insn_trans/trans_rvv.inc.c | 18 ++++++
12
target/riscv/vector_helper.c | 74 +++++++++++++++++++++++++
13
4 files changed, 133 insertions(+)
14
20
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
21
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
16
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
23
--- a/target/riscv/csr.c
18
+++ b/target/riscv/helper.h
24
+++ b/target/riscv/csr.c
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vfncvt_f_x_v_h, void, ptr, ptr, ptr, env, i32)
25
@@ -XXX,XX +XXX,XX @@ static RISCVException write_mcountinhibit(CPURISCVState *env, int csrno,
20
DEF_HELPER_5(vfncvt_f_x_v_w, void, ptr, ptr, ptr, env, i32)
26
{
21
DEF_HELPER_5(vfncvt_f_f_v_h, void, ptr, ptr, ptr, env, i32)
27
int cidx;
22
DEF_HELPER_5(vfncvt_f_f_v_w, void, ptr, ptr, ptr, env, i32)
28
PMUCTRState *counter;
29
+ RISCVCPU *cpu = env_archcpu(env);
30
31
- env->mcountinhibit = val;
32
+ /* WARL register - disable unavailable counters; TM bit is always 0 */
33
+ env->mcountinhibit =
34
+ val & (cpu->pmu_avail_ctrs | COUNTEREN_CY | COUNTEREN_IR);
35
36
/* Check if any other counter is also monitoring cycles/instructions */
37
for (cidx = 0; cidx < RV_MAX_MHPMCOUNTERS; cidx++) {
38
@@ -XXX,XX +XXX,XX @@ static RISCVException read_mcounteren(CPURISCVState *env, int csrno,
39
static RISCVException write_mcounteren(CPURISCVState *env, int csrno,
40
target_ulong val)
41
{
42
- env->mcounteren = val;
43
+ RISCVCPU *cpu = env_archcpu(env);
23
+
44
+
24
+DEF_HELPER_6(vredsum_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
45
+ /* WARL register - disable unavailable counters */
25
+DEF_HELPER_6(vredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
46
+ env->mcounteren = val & (cpu->pmu_avail_ctrs | COUNTEREN_CY | COUNTEREN_TM |
26
+DEF_HELPER_6(vredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
47
+ COUNTEREN_IR);
27
+DEF_HELPER_6(vredsum_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
48
return RISCV_EXCP_NONE;
28
+DEF_HELPER_6(vredmaxu_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
49
}
29
+DEF_HELPER_6(vredmaxu_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
50
30
+DEF_HELPER_6(vredmaxu_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_6(vredmaxu_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_6(vredmax_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
33
+DEF_HELPER_6(vredmax_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
34
+DEF_HELPER_6(vredmax_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
35
+DEF_HELPER_6(vredmax_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
36
+DEF_HELPER_6(vredminu_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
37
+DEF_HELPER_6(vredminu_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
38
+DEF_HELPER_6(vredminu_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
39
+DEF_HELPER_6(vredminu_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
40
+DEF_HELPER_6(vredmin_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
41
+DEF_HELPER_6(vredmin_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
42
+DEF_HELPER_6(vredmin_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
43
+DEF_HELPER_6(vredmin_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
44
+DEF_HELPER_6(vredand_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
45
+DEF_HELPER_6(vredand_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
46
+DEF_HELPER_6(vredand_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
47
+DEF_HELPER_6(vredand_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
48
+DEF_HELPER_6(vredor_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
49
+DEF_HELPER_6(vredor_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
50
+DEF_HELPER_6(vredor_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
51
+DEF_HELPER_6(vredor_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
52
+DEF_HELPER_6(vredxor_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
53
+DEF_HELPER_6(vredxor_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
54
+DEF_HELPER_6(vredxor_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
55
+DEF_HELPER_6(vredxor_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
56
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
57
index XXXXXXX..XXXXXXX 100644
58
--- a/target/riscv/insn32.decode
59
+++ b/target/riscv/insn32.decode
60
@@ -XXX,XX +XXX,XX @@ vfncvt_x_f_v 100010 . ..... 10001 001 ..... 1010111 @r2_vm
61
vfncvt_f_xu_v 100010 . ..... 10010 001 ..... 1010111 @r2_vm
62
vfncvt_f_x_v 100010 . ..... 10011 001 ..... 1010111 @r2_vm
63
vfncvt_f_f_v 100010 . ..... 10100 001 ..... 1010111 @r2_vm
64
+vredsum_vs 000000 . ..... ..... 010 ..... 1010111 @r_vm
65
+vredand_vs 000001 . ..... ..... 010 ..... 1010111 @r_vm
66
+vredor_vs 000010 . ..... ..... 010 ..... 1010111 @r_vm
67
+vredxor_vs 000011 . ..... ..... 010 ..... 1010111 @r_vm
68
+vredminu_vs 000100 . ..... ..... 010 ..... 1010111 @r_vm
69
+vredmin_vs 000101 . ..... ..... 010 ..... 1010111 @r_vm
70
+vredmaxu_vs 000110 . ..... ..... 010 ..... 1010111 @r_vm
71
+vredmax_vs 000111 . ..... ..... 010 ..... 1010111 @r_vm
72
73
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
74
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
75
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
76
index XXXXXXX..XXXXXXX 100644
77
--- a/target/riscv/insn_trans/trans_rvv.inc.c
78
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
79
@@ -XXX,XX +XXX,XX @@ GEN_OPFV_NARROW_TRANS(vfncvt_x_f_v)
80
GEN_OPFV_NARROW_TRANS(vfncvt_f_xu_v)
81
GEN_OPFV_NARROW_TRANS(vfncvt_f_x_v)
82
GEN_OPFV_NARROW_TRANS(vfncvt_f_f_v)
83
+
84
+/*
85
+ *** Vector Reduction Operations
86
+ */
87
+/* Vector Single-Width Integer Reduction Instructions */
88
+static bool reduction_check(DisasContext *s, arg_rmrr *a)
89
+{
90
+ return vext_check_isa_ill(s) && vext_check_reg(s, a->rs2, false);
91
+}
92
+
93
+GEN_OPIVV_TRANS(vredsum_vs, reduction_check)
94
+GEN_OPIVV_TRANS(vredmaxu_vs, reduction_check)
95
+GEN_OPIVV_TRANS(vredmax_vs, reduction_check)
96
+GEN_OPIVV_TRANS(vredminu_vs, reduction_check)
97
+GEN_OPIVV_TRANS(vredmin_vs, reduction_check)
98
+GEN_OPIVV_TRANS(vredand_vs, reduction_check)
99
+GEN_OPIVV_TRANS(vredor_vs, reduction_check)
100
+GEN_OPIVV_TRANS(vredxor_vs, reduction_check)
101
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
102
index XXXXXXX..XXXXXXX 100644
103
--- a/target/riscv/vector_helper.c
104
+++ b/target/riscv/vector_helper.c
105
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16)
106
RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32)
107
GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2, clearh)
108
GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4, clearl)
109
+
110
+/*
111
+ *** Vector Reduction Operations
112
+ */
113
+/* Vector Single-Width Integer Reduction Instructions */
114
+#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\
115
+void HELPER(NAME)(void *vd, void *v0, void *vs1, \
116
+ void *vs2, CPURISCVState *env, uint32_t desc) \
117
+{ \
118
+ uint32_t mlen = vext_mlen(desc); \
119
+ uint32_t vm = vext_vm(desc); \
120
+ uint32_t vl = env->vl; \
121
+ uint32_t i; \
122
+ uint32_t tot = env_archcpu(env)->cfg.vlen / 8; \
123
+ TD s1 = *((TD *)vs1 + HD(0)); \
124
+ \
125
+ for (i = 0; i < vl; i++) { \
126
+ TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
127
+ if (!vm && !vext_elem_mask(v0, mlen, i)) { \
128
+ continue; \
129
+ } \
130
+ s1 = OP(s1, (TD)s2); \
131
+ } \
132
+ *((TD *)vd + HD(0)) = s1; \
133
+ CLEAR_FN(vd, 1, sizeof(TD), tot); \
134
+}
135
+
136
+/* vd[0] = sum(vs1[0], vs2[*]) */
137
+GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD, clearb)
138
+GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD, clearh)
139
+GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD, clearl)
140
+GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD, clearq)
141
+
142
+/* vd[0] = maxu(vs1[0], vs2[*]) */
143
+GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX, clearb)
144
+GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX, clearh)
145
+GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX, clearl)
146
+GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX, clearq)
147
+
148
+/* vd[0] = max(vs1[0], vs2[*]) */
149
+GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX, clearb)
150
+GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX, clearh)
151
+GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX, clearl)
152
+GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX, clearq)
153
+
154
+/* vd[0] = minu(vs1[0], vs2[*]) */
155
+GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN, clearb)
156
+GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN, clearh)
157
+GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN, clearl)
158
+GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN, clearq)
159
+
160
+/* vd[0] = min(vs1[0], vs2[*]) */
161
+GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN, clearb)
162
+GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN, clearh)
163
+GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN, clearl)
164
+GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN, clearq)
165
+
166
+/* vd[0] = and(vs1[0], vs2[*]) */
167
+GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND, clearb)
168
+GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND, clearh)
169
+GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND, clearl)
170
+GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND, clearq)
171
+
172
+/* vd[0] = or(vs1[0], vs2[*]) */
173
+GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR, clearb)
174
+GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR, clearh)
175
+GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR, clearl)
176
+GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR, clearq)
177
+
178
+/* vd[0] = xor(vs1[0], vs2[*]) */
179
+GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR, clearb)
180
+GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR, clearh)
181
+GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR, clearl)
182
+GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR, clearq)
183
--
51
--
184
2.27.0
52
2.41.0
185
186
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Jason Chien <jason.chien@sifive.com>
2
2
3
Vector extension is default off. The only way to use vector extension is
3
RVA23 Profiles states:
4
1. use cpu rv32 or rv64
4
The RVA23 profiles are intended to be used for 64-bit application
5
2. turn on it by command line
5
processors that will run rich OS stacks from standard binary OS
6
"-cpu rv64,x-v=true,vlen=128,elen=64,vext_spec=v0.7.1".
6
distributions and with a substantial number of third-party binary user
7
applications that will be supported over a considerable length of time
8
in the field.
7
9
8
vlen is the vector register length, default value is 128 bit.
10
The chapter 4 of the unprivileged spec introduces the Zihintntl extension
9
elen is the max operator size in bits, default value is 64 bit.
11
and Zihintntl is a mandatory extension presented in RVA23 Profiles, whose
10
vext_spec is the vector specification version, default value is v0.7.1.
12
purpose is to enable application and operating system portability across
11
These properties can be specified with other values.
13
different implementations. Thus the DTS should contain the Zihintntl ISA
14
string in order to pass to software.
12
15
13
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
16
The unprivileged spec states:
17
Like any HINTs, these instructions may be freely ignored. Hence, although
18
they are described in terms of cache-based memory hierarchies, they do not
19
mandate the provision of caches.
20
21
These instructions are encoded with non-used opcode, e.g. ADD x0, x0, x2,
22
which QEMU already supports, and QEMU does not emulate cache. Therefore
23
these instructions can be considered as a no-op, and we only need to add
24
a new property for the Zihintntl extension.
25
26
Reviewed-by: Frank Chang <frank.chang@sifive.com>
14
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
27
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
15
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
28
Signed-off-by: Jason Chien <jason.chien@sifive.com>
16
Message-id: 20200623215920.2594-62-zhiwei_liu@c-sky.com
29
Message-ID: <20230726074049.19505-2-jason.chien@sifive.com>
17
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
30
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
18
---
31
---
19
target/riscv/cpu.h | 4 +++-
32
target/riscv/cpu_cfg.h | 1 +
20
target/riscv/cpu.c | 43 +++++++++++++++++++++++++++++++++++++++++++
33
target/riscv/cpu.c | 2 ++
21
2 files changed, 46 insertions(+), 1 deletion(-)
34
2 files changed, 3 insertions(+)
22
35
23
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
36
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
24
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
25
--- a/target/riscv/cpu.h
38
--- a/target/riscv/cpu_cfg.h
26
+++ b/target/riscv/cpu.h
39
+++ b/target/riscv/cpu_cfg.h
27
@@ -XXX,XX +XXX,XX @@ typedef struct CPURISCVState CPURISCVState;
40
@@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig {
28
41
bool ext_icbom;
29
#include "pmp.h"
42
bool ext_icboz;
30
43
bool ext_zicond;
31
-#define RV_VLEN_MAX 512
44
+ bool ext_zihintntl;
32
+#define RV_VLEN_MAX 256
45
bool ext_zihintpause;
33
46
bool ext_smstateen;
34
FIELD(VTYPE, VLMUL, 0, 2)
47
bool ext_sstc;
35
FIELD(VTYPE, VSEW, 2, 3)
36
@@ -XXX,XX +XXX,XX @@ typedef struct RISCVCPU {
37
bool ext_s;
38
bool ext_u;
39
bool ext_h;
40
+ bool ext_v;
41
bool ext_counters;
42
bool ext_ifencei;
43
bool ext_icsr;
44
45
char *priv_spec;
46
char *user_spec;
47
+ char *vext_spec;
48
uint16_t vlen;
49
uint16_t elen;
50
bool mmu;
51
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
48
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
52
index XXXXXXX..XXXXXXX 100644
49
index XXXXXXX..XXXXXXX 100644
53
--- a/target/riscv/cpu.c
50
--- a/target/riscv/cpu.c
54
+++ b/target/riscv/cpu.c
51
+++ b/target/riscv/cpu.c
55
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
52
@@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = {
56
if (cpu->cfg.ext_h) {
53
ISA_EXT_DATA_ENTRY(zicond, PRIV_VERSION_1_12_0, ext_zicond),
57
target_misa |= RVH;
54
ISA_EXT_DATA_ENTRY(zicsr, PRIV_VERSION_1_10_0, ext_icsr),
58
}
55
ISA_EXT_DATA_ENTRY(zifencei, PRIV_VERSION_1_10_0, ext_ifencei),
59
+ if (cpu->cfg.ext_v) {
56
+ ISA_EXT_DATA_ENTRY(zihintntl, PRIV_VERSION_1_10_0, ext_zihintntl),
60
+ target_misa |= RVV;
57
ISA_EXT_DATA_ENTRY(zihintpause, PRIV_VERSION_1_10_0, ext_zihintpause),
61
+ if (!is_power_of_2(cpu->cfg.vlen)) {
58
ISA_EXT_DATA_ENTRY(zmmul, PRIV_VERSION_1_12_0, ext_zmmul),
62
+ error_setg(errp,
59
ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs),
63
+ "Vector extension VLEN must be power of 2");
60
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = {
64
+ return;
61
DEFINE_PROP_BOOL("sscofpmf", RISCVCPU, cfg.ext_sscofpmf, false),
65
+ }
66
+ if (cpu->cfg.vlen > RV_VLEN_MAX || cpu->cfg.vlen < 128) {
67
+ error_setg(errp,
68
+ "Vector extension implementation only supports VLEN "
69
+ "in the range [128, %d]", RV_VLEN_MAX);
70
+ return;
71
+ }
72
+ if (!is_power_of_2(cpu->cfg.elen)) {
73
+ error_setg(errp,
74
+ "Vector extension ELEN must be power of 2");
75
+ return;
76
+ }
77
+ if (cpu->cfg.elen > 64 || cpu->cfg.vlen < 8) {
78
+ error_setg(errp,
79
+ "Vector extension implementation only supports ELEN "
80
+ "in the range [8, 64]");
81
+ return;
82
+ }
83
+ if (cpu->cfg.vext_spec) {
84
+ if (!g_strcmp0(cpu->cfg.vext_spec, "v0.7.1")) {
85
+ vext_version = VEXT_VERSION_0_07_1;
86
+ } else {
87
+ error_setg(errp,
88
+ "Unsupported vector spec version '%s'",
89
+ cpu->cfg.vext_spec);
90
+ return;
91
+ }
92
+ } else {
93
+ qemu_log("vector verison is not specified, "
94
+ "use the default value v0.7.1\n");
95
+ }
96
+ set_vext_version(env, vext_version);
97
+ }
98
99
set_misa(env, RVXLEN | target_misa);
100
}
101
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_properties[] = {
102
DEFINE_PROP_BOOL("u", RISCVCPU, cfg.ext_u, true),
103
/* This is experimental so mark with 'x-' */
104
DEFINE_PROP_BOOL("x-h", RISCVCPU, cfg.ext_h, false),
105
+ DEFINE_PROP_BOOL("x-v", RISCVCPU, cfg.ext_v, false),
106
DEFINE_PROP_BOOL("Counters", RISCVCPU, cfg.ext_counters, true),
107
DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true),
62
DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true),
108
DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true),
63
DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true),
109
DEFINE_PROP_STRING("priv_spec", RISCVCPU, cfg.priv_spec),
64
+ DEFINE_PROP_BOOL("Zihintntl", RISCVCPU, cfg.ext_zihintntl, true),
110
+ DEFINE_PROP_STRING("vext_spec", RISCVCPU, cfg.vext_spec),
65
DEFINE_PROP_BOOL("Zihintpause", RISCVCPU, cfg.ext_zihintpause, true),
111
+ DEFINE_PROP_UINT16("vlen", RISCVCPU, cfg.vlen, 128),
66
DEFINE_PROP_BOOL("Zawrs", RISCVCPU, cfg.ext_zawrs, true),
112
+ DEFINE_PROP_UINT16("elen", RISCVCPU, cfg.elen, 64),
67
DEFINE_PROP_BOOL("Zfa", RISCVCPU, cfg.ext_zfa, true),
113
DEFINE_PROP_BOOL("mmu", RISCVCPU, cfg.mmu, true),
114
DEFINE_PROP_BOOL("pmp", RISCVCPU, cfg.pmp, true),
115
DEFINE_PROP_END_OF_LIST(),
116
--
68
--
117
2.27.0
69
2.41.0
118
119
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
Commit a47842d ("riscv: Add support for the Zfa extension") implemented the zfa extension.
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
However, it has some typos for fleq.d and fltq.d. Both of them misused the fltq.s
5
Message-id: 20200623215920.2594-61-zhiwei_liu@c-sky.com
5
helper function.
6
7
Fixes: a47842d ("riscv: Add support for the Zfa extension")
8
Signed-off-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
9
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
10
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
11
Message-ID: <20230728003906.768-1-zhiwei_liu@linux.alibaba.com>
6
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
12
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
7
---
13
---
8
target/riscv/helper.h | 5 ++++
14
target/riscv/insn_trans/trans_rvzfa.c.inc | 4 ++--
9
target/riscv/insn32.decode | 1 +
15
1 file changed, 2 insertions(+), 2 deletions(-)
10
target/riscv/insn_trans/trans_rvv.inc.c | 32 +++++++++++++++++++++++++
11
target/riscv/vector_helper.c | 26 ++++++++++++++++++++
12
4 files changed, 64 insertions(+)
13
16
14
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
17
diff --git a/target/riscv/insn_trans/trans_rvzfa.c.inc b/target/riscv/insn_trans/trans_rvzfa.c.inc
15
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
16
--- a/target/riscv/helper.h
19
--- a/target/riscv/insn_trans/trans_rvzfa.c.inc
17
+++ b/target/riscv/helper.h
20
+++ b/target/riscv/insn_trans/trans_rvzfa.c.inc
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vrgather_vx_b, void, ptr, ptr, tl, ptr, env, i32)
21
@@ -XXX,XX +XXX,XX @@ bool trans_fleq_d(DisasContext *ctx, arg_fleq_d *a)
19
DEF_HELPER_6(vrgather_vx_h, void, ptr, ptr, tl, ptr, env, i32)
22
TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
20
DEF_HELPER_6(vrgather_vx_w, void, ptr, ptr, tl, ptr, env, i32)
23
TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
21
DEF_HELPER_6(vrgather_vx_d, void, ptr, ptr, tl, ptr, env, i32)
24
22
+
25
- gen_helper_fltq_s(dest, cpu_env, src1, src2);
23
+DEF_HELPER_6(vcompress_vm_b, void, ptr, ptr, ptr, ptr, env, i32)
26
+ gen_helper_fleq_d(dest, cpu_env, src1, src2);
24
+DEF_HELPER_6(vcompress_vm_h, void, ptr, ptr, ptr, ptr, env, i32)
27
gen_set_gpr(ctx, a->rd, dest);
25
+DEF_HELPER_6(vcompress_vm_w, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vcompress_vm_d, void, ptr, ptr, ptr, ptr, env, i32)
27
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/riscv/insn32.decode
30
+++ b/target/riscv/insn32.decode
31
@@ -XXX,XX +XXX,XX @@ vslide1down_vx 001111 . ..... ..... 110 ..... 1010111 @r_vm
32
vrgather_vv 001100 . ..... ..... 000 ..... 1010111 @r_vm
33
vrgather_vx 001100 . ..... ..... 100 ..... 1010111 @r_vm
34
vrgather_vi 001100 . ..... ..... 011 ..... 1010111 @r_vm
35
+vcompress_vm 010111 - ..... ..... 010 ..... 1010111 @r
36
37
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
38
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
39
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/target/riscv/insn_trans/trans_rvv.inc.c
42
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
43
@@ -XXX,XX +XXX,XX @@ static bool trans_vrgather_vi(DisasContext *s, arg_rmrr *a)
44
}
45
return true;
28
return true;
46
}
29
}
47
+
30
@@ -XXX,XX +XXX,XX @@ bool trans_fltq_d(DisasContext *ctx, arg_fltq_d *a)
48
+/* Vector Compress Instruction */
31
TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
49
+static bool vcompress_vm_check(DisasContext *s, arg_r *a)
32
TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
50
+{
33
51
+ return (vext_check_isa_ill(s) &&
34
- gen_helper_fltq_s(dest, cpu_env, src1, src2);
52
+ vext_check_reg(s, a->rd, false) &&
35
+ gen_helper_fltq_d(dest, cpu_env, src1, src2);
53
+ vext_check_reg(s, a->rs2, false) &&
36
gen_set_gpr(ctx, a->rd, dest);
54
+ vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs1, 1) &&
37
return true;
55
+ (a->rd != a->rs2));
38
}
56
+}
57
+
58
+static bool trans_vcompress_vm(DisasContext *s, arg_r *a)
59
+{
60
+ if (vcompress_vm_check(s, a)) {
61
+ uint32_t data = 0;
62
+ static gen_helper_gvec_4_ptr * const fns[4] = {
63
+ gen_helper_vcompress_vm_b, gen_helper_vcompress_vm_h,
64
+ gen_helper_vcompress_vm_w, gen_helper_vcompress_vm_d,
65
+ };
66
+ TCGLabel *over = gen_new_label();
67
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
68
+
69
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
70
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
71
+ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
72
+ vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
73
+ cpu_env, 0, s->vlen / 8, data, fns[s->sew]);
74
+ gen_set_label(over);
75
+ return true;
76
+ }
77
+ return false;
78
+}
79
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
80
index XXXXXXX..XXXXXXX 100644
81
--- a/target/riscv/vector_helper.c
82
+++ b/target/riscv/vector_helper.c
83
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1, clearb)
84
GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2, clearh)
85
GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4, clearl)
86
GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8, clearq)
87
+
88
+/* Vector Compress Instruction */
89
+#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H, CLEAR_FN) \
90
+void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
91
+ CPURISCVState *env, uint32_t desc) \
92
+{ \
93
+ uint32_t mlen = vext_mlen(desc); \
94
+ uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \
95
+ uint32_t vl = env->vl; \
96
+ uint32_t num = 0, i; \
97
+ \
98
+ for (i = 0; i < vl; i++) { \
99
+ if (!vext_elem_mask(vs1, mlen, i)) { \
100
+ continue; \
101
+ } \
102
+ *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
103
+ num++; \
104
+ } \
105
+ CLEAR_FN(vd, num, num * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \
106
+}
107
+
108
+/* Compress into vd elements of vs2 where vs1 is enabled */
109
+GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1, clearb)
110
+GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2, clearh)
111
+GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4, clearl)
112
+GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8, clearq)
113
--
39
--
114
2.27.0
40
2.41.0
115
116
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Jason Chien <jason.chien@sifive.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
When writing the upper mtime, we should keep the original lower mtime
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
whose value is given by cpu_riscv_read_rtc() instead of
5
cpu_riscv_read_rtc_raw(). The same logic applies to writes to lower mtime.
6
7
Signed-off-by: Jason Chien <jason.chien@sifive.com>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Message-id: 20200623215920.2594-45-zhiwei_liu@c-sky.com
9
Message-ID: <20230728082502.26439-1-jason.chien@sifive.com>
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
10
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
11
---
9
target/riscv/helper.h | 11 ++++++
12
hw/intc/riscv_aclint.c | 5 +++--
10
target/riscv/insn32.decode | 5 +++
13
1 file changed, 3 insertions(+), 2 deletions(-)
11
target/riscv/insn_trans/trans_rvv.inc.c | 48 +++++++++++++++++++++++++
12
target/riscv/vector_helper.c | 39 ++++++++++++++++++++
13
4 files changed, 103 insertions(+)
14
14
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
15
diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c
16
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
17
--- a/hw/intc/riscv_aclint.c
18
+++ b/target/riscv/helper.h
18
+++ b/hw/intc/riscv_aclint.c
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vfwcvt_f_x_v_h, void, ptr, ptr, ptr, env, i32)
19
@@ -XXX,XX +XXX,XX @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr,
20
DEF_HELPER_5(vfwcvt_f_x_v_w, void, ptr, ptr, ptr, env, i32)
20
return;
21
DEF_HELPER_5(vfwcvt_f_f_v_h, void, ptr, ptr, ptr, env, i32)
21
} else if (addr == mtimer->time_base || addr == mtimer->time_base + 4) {
22
DEF_HELPER_5(vfwcvt_f_f_v_w, void, ptr, ptr, ptr, env, i32)
22
uint64_t rtc_r = cpu_riscv_read_rtc_raw(mtimer->timebase_freq);
23
+
23
+ uint64_t rtc = cpu_riscv_read_rtc(mtimer);
24
+DEF_HELPER_5(vfncvt_xu_f_v_h, void, ptr, ptr, ptr, env, i32)
24
25
+DEF_HELPER_5(vfncvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32)
25
if (addr == mtimer->time_base) {
26
+DEF_HELPER_5(vfncvt_x_f_v_h, void, ptr, ptr, ptr, env, i32)
26
if (size == 4) {
27
+DEF_HELPER_5(vfncvt_x_f_v_w, void, ptr, ptr, ptr, env, i32)
27
/* time_lo for RV32/RV64 */
28
+DEF_HELPER_5(vfncvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32)
28
- mtimer->time_delta = ((rtc_r & ~0xFFFFFFFFULL) | value) - rtc_r;
29
+DEF_HELPER_5(vfncvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32)
29
+ mtimer->time_delta = ((rtc & ~0xFFFFFFFFULL) | value) - rtc_r;
30
+DEF_HELPER_5(vfncvt_f_x_v_h, void, ptr, ptr, ptr, env, i32)
30
} else {
31
+DEF_HELPER_5(vfncvt_f_x_v_w, void, ptr, ptr, ptr, env, i32)
31
/* time for RV64 */
32
+DEF_HELPER_5(vfncvt_f_f_v_h, void, ptr, ptr, ptr, env, i32)
32
mtimer->time_delta = value - rtc_r;
33
+DEF_HELPER_5(vfncvt_f_f_v_w, void, ptr, ptr, ptr, env, i32)
33
@@ -XXX,XX +XXX,XX @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr,
34
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
34
} else {
35
index XXXXXXX..XXXXXXX 100644
35
if (size == 4) {
36
--- a/target/riscv/insn32.decode
36
/* time_hi for RV32/RV64 */
37
+++ b/target/riscv/insn32.decode
37
- mtimer->time_delta = (value << 32 | (rtc_r & 0xFFFFFFFF)) - rtc_r;
38
@@ -XXX,XX +XXX,XX @@ vfwcvt_x_f_v 100010 . ..... 01001 001 ..... 1010111 @r2_vm
38
+ mtimer->time_delta = (value << 32 | (rtc & 0xFFFFFFFF)) - rtc_r;
39
vfwcvt_f_xu_v 100010 . ..... 01010 001 ..... 1010111 @r2_vm
39
} else {
40
vfwcvt_f_x_v 100010 . ..... 01011 001 ..... 1010111 @r2_vm
40
qemu_log_mask(LOG_GUEST_ERROR,
41
vfwcvt_f_f_v 100010 . ..... 01100 001 ..... 1010111 @r2_vm
41
"aclint-mtimer: invalid time_hi write: %08x",
42
+vfncvt_xu_f_v 100010 . ..... 10000 001 ..... 1010111 @r2_vm
43
+vfncvt_x_f_v 100010 . ..... 10001 001 ..... 1010111 @r2_vm
44
+vfncvt_f_xu_v 100010 . ..... 10010 001 ..... 1010111 @r2_vm
45
+vfncvt_f_x_v 100010 . ..... 10011 001 ..... 1010111 @r2_vm
46
+vfncvt_f_f_v 100010 . ..... 10100 001 ..... 1010111 @r2_vm
47
48
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
49
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
50
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/target/riscv/insn_trans/trans_rvv.inc.c
53
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
54
@@ -XXX,XX +XXX,XX @@ GEN_OPFV_WIDEN_TRANS(vfwcvt_x_f_v)
55
GEN_OPFV_WIDEN_TRANS(vfwcvt_f_xu_v)
56
GEN_OPFV_WIDEN_TRANS(vfwcvt_f_x_v)
57
GEN_OPFV_WIDEN_TRANS(vfwcvt_f_f_v)
58
+
59
+/* Narrowing Floating-Point/Integer Type-Convert Instructions */
60
+
61
+/*
62
+ * If the current SEW does not correspond to a supported IEEE floating-point
63
+ * type, an illegal instruction exception is raised
64
+ */
65
+static bool opfv_narrow_check(DisasContext *s, arg_rmr *a)
66
+{
67
+ return (vext_check_isa_ill(s) &&
68
+ vext_check_overlap_mask(s, a->rd, a->vm, false) &&
69
+ vext_check_reg(s, a->rd, false) &&
70
+ vext_check_reg(s, a->rs2, true) &&
71
+ vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2,
72
+ 2 << s->lmul) &&
73
+ (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0));
74
+}
75
+
76
+#define GEN_OPFV_NARROW_TRANS(NAME) \
77
+static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
78
+{ \
79
+ if (opfv_narrow_check(s, a)) { \
80
+ uint32_t data = 0; \
81
+ static gen_helper_gvec_3_ptr * const fns[2] = { \
82
+ gen_helper_##NAME##_h, \
83
+ gen_helper_##NAME##_w, \
84
+ }; \
85
+ TCGLabel *over = gen_new_label(); \
86
+ gen_set_rm(s, 7); \
87
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
88
+ \
89
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
90
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
91
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
92
+ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
93
+ vreg_ofs(s, a->rs2), cpu_env, 0, \
94
+ s->vlen / 8, data, fns[s->sew - 1]); \
95
+ gen_set_label(over); \
96
+ return true; \
97
+ } \
98
+ return false; \
99
+}
100
+
101
+GEN_OPFV_NARROW_TRANS(vfncvt_xu_f_v)
102
+GEN_OPFV_NARROW_TRANS(vfncvt_x_f_v)
103
+GEN_OPFV_NARROW_TRANS(vfncvt_f_xu_v)
104
+GEN_OPFV_NARROW_TRANS(vfncvt_f_x_v)
105
+GEN_OPFV_NARROW_TRANS(vfncvt_f_f_v)
106
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
107
index XXXXXXX..XXXXXXX 100644
108
--- a/target/riscv/vector_helper.c
109
+++ b/target/riscv/vector_helper.c
110
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
111
RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
112
GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4, clearl)
113
GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8, clearq)
114
+
115
+/* Narrowing Floating-Point/Integer Type-Convert Instructions */
116
+/* (TD, T2, TX2) */
117
+#define NOP_UU_H uint16_t, uint32_t, uint32_t
118
+#define NOP_UU_W uint32_t, uint64_t, uint64_t
119
+/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
120
+RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16)
121
+RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32)
122
+GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2, clearh)
123
+GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4, clearl)
124
+
125
+/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
126
+RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16)
127
+RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32)
128
+GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2, clearh)
129
+GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4, clearl)
130
+
131
+/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
132
+RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16)
133
+RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32)
134
+GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2, clearh)
135
+GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4, clearl)
136
+
137
+/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
138
+RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16)
139
+RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32)
140
+GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2, clearh)
141
+GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4, clearl)
142
+
143
+/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
144
+static uint16_t vfncvtffv16(uint32_t a, float_status *s)
145
+{
146
+ return float32_to_float16(a, true, s);
147
+}
148
+
149
+RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16)
150
+RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32)
151
+GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2, clearh)
152
+GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4, clearl)
153
--
42
--
154
2.27.0
43
2.41.0
155
156
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Jason Chien <jason.chien@sifive.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
The variables whose values are given by cpu_riscv_read_rtc() should be named
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
"rtc". The variables whose value are given by cpu_riscv_read_rtc_raw()
5
should be named "rtc_r".
6
7
Signed-off-by: Jason Chien <jason.chien@sifive.com>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Message-id: 20200623215920.2594-44-zhiwei_liu@c-sky.com
9
Message-ID: <20230728082502.26439-2-jason.chien@sifive.com>
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
10
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
11
---
9
target/riscv/helper.h | 11 ++++++
12
hw/intc/riscv_aclint.c | 6 +++---
10
target/riscv/insn32.decode | 5 +++
13
1 file changed, 3 insertions(+), 3 deletions(-)
11
target/riscv/insn_trans/trans_rvv.inc.c | 48 +++++++++++++++++++++++++
12
target/riscv/vector_helper.c | 42 ++++++++++++++++++++++
13
4 files changed, 106 insertions(+)
14
14
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
15
diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c
16
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
17
--- a/hw/intc/riscv_aclint.c
18
+++ b/target/riscv/helper.h
18
+++ b/hw/intc/riscv_aclint.c
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vfcvt_f_xu_v_d, void, ptr, ptr, ptr, env, i32)
19
@@ -XXX,XX +XXX,XX @@ static void riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer,
20
DEF_HELPER_5(vfcvt_f_x_v_h, void, ptr, ptr, ptr, env, i32)
20
uint64_t next;
21
DEF_HELPER_5(vfcvt_f_x_v_w, void, ptr, ptr, ptr, env, i32)
21
uint64_t diff;
22
DEF_HELPER_5(vfcvt_f_x_v_d, void, ptr, ptr, ptr, env, i32)
22
23
+
23
- uint64_t rtc_r = cpu_riscv_read_rtc(mtimer);
24
+DEF_HELPER_5(vfwcvt_xu_f_v_h, void, ptr, ptr, ptr, env, i32)
24
+ uint64_t rtc = cpu_riscv_read_rtc(mtimer);
25
+DEF_HELPER_5(vfwcvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32)
25
26
+DEF_HELPER_5(vfwcvt_x_f_v_h, void, ptr, ptr, ptr, env, i32)
26
/* Compute the relative hartid w.r.t the socket */
27
+DEF_HELPER_5(vfwcvt_x_f_v_w, void, ptr, ptr, ptr, env, i32)
27
hartid = hartid - mtimer->hartid_base;
28
+DEF_HELPER_5(vfwcvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32)
28
29
+DEF_HELPER_5(vfwcvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32)
29
mtimer->timecmp[hartid] = value;
30
+DEF_HELPER_5(vfwcvt_f_x_v_h, void, ptr, ptr, ptr, env, i32)
30
- if (mtimer->timecmp[hartid] <= rtc_r) {
31
+DEF_HELPER_5(vfwcvt_f_x_v_w, void, ptr, ptr, ptr, env, i32)
31
+ if (mtimer->timecmp[hartid] <= rtc) {
32
+DEF_HELPER_5(vfwcvt_f_f_v_h, void, ptr, ptr, ptr, env, i32)
32
/*
33
+DEF_HELPER_5(vfwcvt_f_f_v_w, void, ptr, ptr, ptr, env, i32)
33
* If we're setting an MTIMECMP value in the "past",
34
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
34
* immediately raise the timer interrupt
35
index XXXXXXX..XXXXXXX 100644
35
@@ -XXX,XX +XXX,XX @@ static void riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer,
36
--- a/target/riscv/insn32.decode
36
37
+++ b/target/riscv/insn32.decode
37
/* otherwise, set up the future timer interrupt */
38
@@ -XXX,XX +XXX,XX @@ vfcvt_xu_f_v 100010 . ..... 00000 001 ..... 1010111 @r2_vm
38
qemu_irq_lower(mtimer->timer_irqs[hartid]);
39
vfcvt_x_f_v 100010 . ..... 00001 001 ..... 1010111 @r2_vm
39
- diff = mtimer->timecmp[hartid] - rtc_r;
40
vfcvt_f_xu_v 100010 . ..... 00010 001 ..... 1010111 @r2_vm
40
+ diff = mtimer->timecmp[hartid] - rtc;
41
vfcvt_f_x_v 100010 . ..... 00011 001 ..... 1010111 @r2_vm
41
/* back to ns (note args switched in muldiv64) */
42
+vfwcvt_xu_f_v 100010 . ..... 01000 001 ..... 1010111 @r2_vm
42
uint64_t ns_diff = muldiv64(diff, NANOSECONDS_PER_SECOND, timebase_freq);
43
+vfwcvt_x_f_v 100010 . ..... 01001 001 ..... 1010111 @r2_vm
43
44
+vfwcvt_f_xu_v 100010 . ..... 01010 001 ..... 1010111 @r2_vm
45
+vfwcvt_f_x_v 100010 . ..... 01011 001 ..... 1010111 @r2_vm
46
+vfwcvt_f_f_v 100010 . ..... 01100 001 ..... 1010111 @r2_vm
47
48
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
49
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
50
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/target/riscv/insn_trans/trans_rvv.inc.c
53
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
54
@@ -XXX,XX +XXX,XX @@ GEN_OPFV_TRANS(vfcvt_xu_f_v, opfv_check)
55
GEN_OPFV_TRANS(vfcvt_x_f_v, opfv_check)
56
GEN_OPFV_TRANS(vfcvt_f_xu_v, opfv_check)
57
GEN_OPFV_TRANS(vfcvt_f_x_v, opfv_check)
58
+
59
+/* Widening Floating-Point/Integer Type-Convert Instructions */
60
+
61
+/*
62
+ * If the current SEW does not correspond to a supported IEEE floating-point
63
+ * type, an illegal instruction exception is raised
64
+ */
65
+static bool opfv_widen_check(DisasContext *s, arg_rmr *a)
66
+{
67
+ return (vext_check_isa_ill(s) &&
68
+ vext_check_overlap_mask(s, a->rd, a->vm, true) &&
69
+ vext_check_reg(s, a->rd, true) &&
70
+ vext_check_reg(s, a->rs2, false) &&
71
+ vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2,
72
+ 1 << s->lmul) &&
73
+ (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0));
74
+}
75
+
76
+#define GEN_OPFV_WIDEN_TRANS(NAME) \
77
+static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
78
+{ \
79
+ if (opfv_widen_check(s, a)) { \
80
+ uint32_t data = 0; \
81
+ static gen_helper_gvec_3_ptr * const fns[2] = { \
82
+ gen_helper_##NAME##_h, \
83
+ gen_helper_##NAME##_w, \
84
+ }; \
85
+ TCGLabel *over = gen_new_label(); \
86
+ gen_set_rm(s, 7); \
87
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
88
+ \
89
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
90
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
91
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
92
+ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
93
+ vreg_ofs(s, a->rs2), cpu_env, 0, \
94
+ s->vlen / 8, data, fns[s->sew - 1]); \
95
+ gen_set_label(over); \
96
+ return true; \
97
+ } \
98
+ return false; \
99
+}
100
+
101
+GEN_OPFV_WIDEN_TRANS(vfwcvt_xu_f_v)
102
+GEN_OPFV_WIDEN_TRANS(vfwcvt_x_f_v)
103
+GEN_OPFV_WIDEN_TRANS(vfwcvt_f_xu_v)
104
+GEN_OPFV_WIDEN_TRANS(vfwcvt_f_x_v)
105
+GEN_OPFV_WIDEN_TRANS(vfwcvt_f_f_v)
106
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
107
index XXXXXXX..XXXXXXX 100644
108
--- a/target/riscv/vector_helper.c
109
+++ b/target/riscv/vector_helper.c
110
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
111
GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2, clearh)
112
GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4, clearl)
113
GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8, clearq)
114
+
115
+/* Widening Floating-Point/Integer Type-Convert Instructions */
116
+/* (TD, T2, TX2) */
117
+#define WOP_UU_H uint32_t, uint16_t, uint16_t
118
+#define WOP_UU_W uint64_t, uint32_t, uint32_t
119
+/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
120
+RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
121
+RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
122
+GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4, clearl)
123
+GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8, clearq)
124
+
125
+/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
126
+RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
127
+RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
128
+GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4, clearl)
129
+GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8, clearq)
130
+
131
+/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
132
+RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
133
+RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
134
+GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4, clearl)
135
+GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8, clearq)
136
+
137
+/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
138
+RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
139
+RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
140
+GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4, clearl)
141
+GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8, clearq)
142
+
143
+/*
144
+ * vfwcvt.f.f.v vd, vs2, vm #
145
+ * Convert single-width float to double-width float.
146
+ */
147
+static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
148
+{
149
+ return float16_to_float32(a, true, s);
150
+}
151
+
152
+RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
153
+RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
154
+GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4, clearl)
155
+GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8, clearq)
156
--
44
--
157
2.27.0
45
2.41.0
158
159
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
We should not use types dependend on host arch for target_ucontext.
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
4
This bug is found when run rv32 applications.
5
6
Signed-off-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20200623215920.2594-42-zhiwei_liu@c-sky.com
8
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Message-ID: <20230811055438.1945-1-zhiwei_liu@linux.alibaba.com>
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
11
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
12
---
9
target/riscv/helper.h | 4 +++
13
linux-user/riscv/signal.c | 4 ++--
10
target/riscv/insn32.decode | 2 ++
14
1 file changed, 2 insertions(+), 2 deletions(-)
11
target/riscv/insn_trans/trans_rvv.inc.c | 38 +++++++++++++++++++++++++
12
target/riscv/vector_helper.c | 24 ++++++++++++++++
13
4 files changed, 68 insertions(+)
14
15
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
diff --git a/linux-user/riscv/signal.c b/linux-user/riscv/signal.c
16
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
18
--- a/linux-user/riscv/signal.c
18
+++ b/target/riscv/helper.h
19
+++ b/linux-user/riscv/signal.c
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmford_vf_d, void, ptr, ptr, i64, ptr, env, i32)
20
@@ -XXX,XX +XXX,XX @@ struct target_sigcontext {
20
DEF_HELPER_5(vfclass_v_h, void, ptr, ptr, ptr, env, i32)
21
}; /* cf. riscv-linux:arch/riscv/include/uapi/asm/ptrace.h */
21
DEF_HELPER_5(vfclass_v_w, void, ptr, ptr, ptr, env, i32)
22
22
DEF_HELPER_5(vfclass_v_d, void, ptr, ptr, ptr, env, i32)
23
struct target_ucontext {
23
+
24
- unsigned long uc_flags;
24
+DEF_HELPER_6(vfmerge_vfm_h, void, ptr, ptr, i64, ptr, env, i32)
25
- struct target_ucontext *uc_link;
25
+DEF_HELPER_6(vfmerge_vfm_w, void, ptr, ptr, i64, ptr, env, i32)
26
+ abi_ulong uc_flags;
26
+DEF_HELPER_6(vfmerge_vfm_d, void, ptr, ptr, i64, ptr, env, i32)
27
+ abi_ptr uc_link;
27
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
28
target_stack_t uc_stack;
28
index XXXXXXX..XXXXXXX 100644
29
target_sigset_t uc_sigmask;
29
--- a/target/riscv/insn32.decode
30
uint8_t __unused[1024 / 8 - sizeof(target_sigset_t)];
30
+++ b/target/riscv/insn32.decode
31
@@ -XXX,XX +XXX,XX @@ vmfge_vf 011111 . ..... ..... 101 ..... 1010111 @r_vm
32
vmford_vv 011010 . ..... ..... 001 ..... 1010111 @r_vm
33
vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm
34
vfclass_v 100011 . ..... 10000 001 ..... 1010111 @r2_vm
35
+vfmerge_vfm 010111 0 ..... ..... 101 ..... 1010111 @r_vm_0
36
+vfmv_v_f 010111 1 00000 ..... 101 ..... 1010111 @r2
37
38
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
39
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
40
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/target/riscv/insn_trans/trans_rvv.inc.c
43
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
44
@@ -XXX,XX +XXX,XX @@ GEN_OPFVF_TRANS(vmford_vf, opfvf_cmp_check)
45
46
/* Vector Floating-Point Classify Instruction */
47
GEN_OPFV_TRANS(vfclass_v, opfv_check)
48
+
49
+/* Vector Floating-Point Merge Instruction */
50
+GEN_OPFVF_TRANS(vfmerge_vfm, opfvf_check)
51
+
52
+static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a)
53
+{
54
+ if (vext_check_isa_ill(s) &&
55
+ vext_check_reg(s, a->rd, false) &&
56
+ (s->sew != 0)) {
57
+
58
+ if (s->vl_eq_vlmax) {
59
+ tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd),
60
+ MAXSZ(s), MAXSZ(s), cpu_fpr[a->rs1]);
61
+ } else {
62
+ TCGv_ptr dest;
63
+ TCGv_i32 desc;
64
+ uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
65
+ static gen_helper_vmv_vx * const fns[3] = {
66
+ gen_helper_vmv_v_x_h,
67
+ gen_helper_vmv_v_x_w,
68
+ gen_helper_vmv_v_x_d,
69
+ };
70
+ TCGLabel *over = gen_new_label();
71
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
72
+
73
+ dest = tcg_temp_new_ptr();
74
+ desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
75
+ tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, a->rd));
76
+ fns[s->sew - 1](dest, cpu_fpr[a->rs1], cpu_env, desc);
77
+
78
+ tcg_temp_free_ptr(dest);
79
+ tcg_temp_free_i32(desc);
80
+ gen_set_label(over);
81
+ }
82
+ return true;
83
+ }
84
+ return false;
85
+}
86
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
87
index XXXXXXX..XXXXXXX 100644
88
--- a/target/riscv/vector_helper.c
89
+++ b/target/riscv/vector_helper.c
90
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
91
GEN_VEXT_V(vfclass_v_h, 2, 2, clearh)
92
GEN_VEXT_V(vfclass_v_w, 4, 4, clearl)
93
GEN_VEXT_V(vfclass_v_d, 8, 8, clearq)
94
+
95
+/* Vector Floating-Point Merge Instruction */
96
+#define GEN_VFMERGE_VF(NAME, ETYPE, H, CLEAR_FN) \
97
+void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
98
+ CPURISCVState *env, uint32_t desc) \
99
+{ \
100
+ uint32_t mlen = vext_mlen(desc); \
101
+ uint32_t vm = vext_vm(desc); \
102
+ uint32_t vl = env->vl; \
103
+ uint32_t esz = sizeof(ETYPE); \
104
+ uint32_t vlmax = vext_maxsz(desc) / esz; \
105
+ uint32_t i; \
106
+ \
107
+ for (i = 0; i < vl; i++) { \
108
+ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
109
+ *((ETYPE *)vd + H(i)) \
110
+ = (!vm && !vext_elem_mask(v0, mlen, i) ? s2 : s1); \
111
+ } \
112
+ CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \
113
+}
114
+
115
+GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2, clearh)
116
+GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4, clearl)
117
+GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8, clearq)
118
--
31
--
119
2.27.0
32
2.41.0
120
33
121
34
diff view generated by jsdifflib
1
From: Jessica Clarke <jrtc27@jrtc27.com>
1
From: Yong-Xuan Wang <yongxuan.wang@sifive.com>
2
2
3
The source priorities can be used to order sources with respect to other
3
In this patch, we create the APLIC and IMSIC FDT helper functions and
4
sources, not just as a way to enable/disable them based off a threshold.
4
remove M mode AIA devices when using KVM acceleration.
5
We must therefore always claim the highest-priority source, rather than
6
the first source we find.
7
5
8
Signed-off-by: Jessica Clarke <jrtc27@jrtc27.com>
6
Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com>
9
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
7
Reviewed-by: Jim Shu <jim.shu@sifive.com>
10
Message-Id: <20200618202343.20455-1-jrtc27@jrtc27.com>
8
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
9
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
10
Message-ID: <20230727102439.22554-2-yongxuan.wang@sifive.com>
11
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
11
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
12
---
12
---
13
hw/riscv/sifive_plic.c | 17 ++++++++++++-----
13
hw/riscv/virt.c | 290 +++++++++++++++++++++++-------------------------
14
1 file changed, 12 insertions(+), 5 deletions(-)
14
1 file changed, 137 insertions(+), 153 deletions(-)
15
15
16
diff --git a/hw/riscv/sifive_plic.c b/hw/riscv/sifive_plic.c
16
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
17
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/riscv/sifive_plic.c
18
--- a/hw/riscv/virt.c
19
+++ b/hw/riscv/sifive_plic.c
19
+++ b/hw/riscv/virt.c
20
@@ -XXX,XX +XXX,XX @@ static void sifive_plic_update(SiFivePLICState *plic)
20
@@ -XXX,XX +XXX,XX @@ static uint32_t imsic_num_bits(uint32_t count)
21
static uint32_t sifive_plic_claim(SiFivePLICState *plic, uint32_t addrid)
21
return ret;
22
}
23
24
-static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap,
25
- uint32_t *phandle, uint32_t *intc_phandles,
26
- uint32_t *msi_m_phandle, uint32_t *msi_s_phandle)
27
+static void create_fdt_one_imsic(RISCVVirtState *s, hwaddr base_addr,
28
+ uint32_t *intc_phandles, uint32_t msi_phandle,
29
+ bool m_mode, uint32_t imsic_guest_bits)
22
{
30
{
23
int i, j;
31
int cpu, socket;
24
+ uint32_t max_irq = 0;
32
char *imsic_name;
25
+ uint32_t max_prio = plic->target_priority[addrid];
33
MachineState *ms = MACHINE(s);
26
+
34
int socket_count = riscv_socket_count(ms);
27
for (i = 0; i < plic->bitfield_words; i++) {
35
- uint32_t imsic_max_hart_per_socket, imsic_guest_bits;
28
uint32_t pending_enabled_not_claimed =
36
+ uint32_t imsic_max_hart_per_socket;
29
(plic->pending[i] & ~plic->claimed[i]) &
37
uint32_t *imsic_cells, *imsic_regs, imsic_addr, imsic_size;
30
@@ -XXX,XX +XXX,XX @@ static uint32_t sifive_plic_claim(SiFivePLICState *plic, uint32_t addrid)
38
31
int irq = (i << 5) + j;
39
- *msi_m_phandle = (*phandle)++;
32
uint32_t prio = plic->source_priority[irq];
40
- *msi_s_phandle = (*phandle)++;
33
int enabled = pending_enabled_not_claimed & (1 << j);
41
imsic_cells = g_new0(uint32_t, ms->smp.cpus * 2);
34
- if (enabled && prio > plic->target_priority[addrid]) {
42
imsic_regs = g_new0(uint32_t, socket_count * 4);
35
- sifive_plic_set_pending(plic, irq, false);
43
36
- sifive_plic_set_claimed(plic, irq, true);
44
- /* M-level IMSIC node */
37
- return irq;
45
for (cpu = 0; cpu < ms->smp.cpus; cpu++) {
38
+ if (enabled && prio > max_prio) {
46
imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
39
+ max_irq = irq;
47
- imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT);
40
+ max_prio = prio;
48
+ imsic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT);
41
}
49
}
50
- imsic_max_hart_per_socket = 0;
51
- for (socket = 0; socket < socket_count; socket++) {
52
- imsic_addr = memmap[VIRT_IMSIC_M].base +
53
- socket * VIRT_IMSIC_GROUP_MAX_SIZE;
54
- imsic_size = IMSIC_HART_SIZE(0) * s->soc[socket].num_harts;
55
- imsic_regs[socket * 4 + 0] = 0;
56
- imsic_regs[socket * 4 + 1] = cpu_to_be32(imsic_addr);
57
- imsic_regs[socket * 4 + 2] = 0;
58
- imsic_regs[socket * 4 + 3] = cpu_to_be32(imsic_size);
59
- if (imsic_max_hart_per_socket < s->soc[socket].num_harts) {
60
- imsic_max_hart_per_socket = s->soc[socket].num_harts;
61
- }
62
- }
63
- imsic_name = g_strdup_printf("/soc/imsics@%lx",
64
- (unsigned long)memmap[VIRT_IMSIC_M].base);
65
- qemu_fdt_add_subnode(ms->fdt, imsic_name);
66
- qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible",
67
- "riscv,imsics");
68
- qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells",
69
- FDT_IMSIC_INT_CELLS);
70
- qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller",
71
- NULL, 0);
72
- qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller",
73
- NULL, 0);
74
- qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended",
75
- imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2);
76
- qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs,
77
- socket_count * sizeof(uint32_t) * 4);
78
- qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,num-ids",
79
- VIRT_IRQCHIP_NUM_MSIS);
80
- if (socket_count > 1) {
81
- qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,hart-index-bits",
82
- imsic_num_bits(imsic_max_hart_per_socket));
83
- qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-bits",
84
- imsic_num_bits(socket_count));
85
- qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-shift",
86
- IMSIC_MMIO_GROUP_MIN_SHIFT);
87
- }
88
- qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", *msi_m_phandle);
89
-
90
- g_free(imsic_name);
91
92
- /* S-level IMSIC node */
93
- for (cpu = 0; cpu < ms->smp.cpus; cpu++) {
94
- imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
95
- imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT);
96
- }
97
- imsic_guest_bits = imsic_num_bits(s->aia_guests + 1);
98
imsic_max_hart_per_socket = 0;
99
for (socket = 0; socket < socket_count; socket++) {
100
- imsic_addr = memmap[VIRT_IMSIC_S].base +
101
- socket * VIRT_IMSIC_GROUP_MAX_SIZE;
102
+ imsic_addr = base_addr + socket * VIRT_IMSIC_GROUP_MAX_SIZE;
103
imsic_size = IMSIC_HART_SIZE(imsic_guest_bits) *
104
s->soc[socket].num_harts;
105
imsic_regs[socket * 4 + 0] = 0;
106
@@ -XXX,XX +XXX,XX @@ static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap,
107
imsic_max_hart_per_socket = s->soc[socket].num_harts;
42
}
108
}
43
}
109
}
44
- return 0;
110
- imsic_name = g_strdup_printf("/soc/imsics@%lx",
45
+
111
- (unsigned long)memmap[VIRT_IMSIC_S].base);
46
+ if (max_irq) {
112
+
47
+ sifive_plic_set_pending(plic, max_irq, false);
113
+ imsic_name = g_strdup_printf("/soc/imsics@%lx", (unsigned long)base_addr);
48
+ sifive_plic_set_claimed(plic, max_irq, true);
114
qemu_fdt_add_subnode(ms->fdt, imsic_name);
115
- qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible",
116
- "riscv,imsics");
117
+ qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", "riscv,imsics");
118
qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells",
119
- FDT_IMSIC_INT_CELLS);
120
- qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller",
121
- NULL, 0);
122
- qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller",
123
- NULL, 0);
124
+ FDT_IMSIC_INT_CELLS);
125
+ qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", NULL, 0);
126
+ qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", NULL, 0);
127
qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended",
128
- imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2);
129
+ imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2);
130
qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs,
131
- socket_count * sizeof(uint32_t) * 4);
132
+ socket_count * sizeof(uint32_t) * 4);
133
qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,num-ids",
134
- VIRT_IRQCHIP_NUM_MSIS);
135
+ VIRT_IRQCHIP_NUM_MSIS);
136
+
137
if (imsic_guest_bits) {
138
qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,guest-index-bits",
139
- imsic_guest_bits);
140
+ imsic_guest_bits);
141
}
142
+
143
if (socket_count > 1) {
144
qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,hart-index-bits",
145
- imsic_num_bits(imsic_max_hart_per_socket));
146
+ imsic_num_bits(imsic_max_hart_per_socket));
147
qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-bits",
148
- imsic_num_bits(socket_count));
149
+ imsic_num_bits(socket_count));
150
qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-shift",
151
- IMSIC_MMIO_GROUP_MIN_SHIFT);
152
+ IMSIC_MMIO_GROUP_MIN_SHIFT);
153
}
154
- qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", *msi_s_phandle);
155
- g_free(imsic_name);
156
+ qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", msi_phandle);
157
158
+ g_free(imsic_name);
159
g_free(imsic_regs);
160
g_free(imsic_cells);
161
}
162
163
-static void create_fdt_socket_aplic(RISCVVirtState *s,
164
- const MemMapEntry *memmap, int socket,
165
- uint32_t msi_m_phandle,
166
- uint32_t msi_s_phandle,
167
- uint32_t *phandle,
168
- uint32_t *intc_phandles,
169
- uint32_t *aplic_phandles)
170
+static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap,
171
+ uint32_t *phandle, uint32_t *intc_phandles,
172
+ uint32_t *msi_m_phandle, uint32_t *msi_s_phandle)
173
+{
174
+ *msi_m_phandle = (*phandle)++;
175
+ *msi_s_phandle = (*phandle)++;
176
+
177
+ if (!kvm_enabled()) {
178
+ /* M-level IMSIC node */
179
+ create_fdt_one_imsic(s, memmap[VIRT_IMSIC_M].base, intc_phandles,
180
+ *msi_m_phandle, true, 0);
49
+ }
181
+ }
50
+ return max_irq;
182
+
183
+ /* S-level IMSIC node */
184
+ create_fdt_one_imsic(s, memmap[VIRT_IMSIC_S].base, intc_phandles,
185
+ *msi_s_phandle, false,
186
+ imsic_num_bits(s->aia_guests + 1));
187
+
188
+}
189
+
190
+static void create_fdt_one_aplic(RISCVVirtState *s, int socket,
191
+ unsigned long aplic_addr, uint32_t aplic_size,
192
+ uint32_t msi_phandle,
193
+ uint32_t *intc_phandles,
194
+ uint32_t aplic_phandle,
195
+ uint32_t aplic_child_phandle,
196
+ bool m_mode)
197
{
198
int cpu;
199
char *aplic_name;
200
uint32_t *aplic_cells;
201
- unsigned long aplic_addr;
202
MachineState *ms = MACHINE(s);
203
- uint32_t aplic_m_phandle, aplic_s_phandle;
204
205
- aplic_m_phandle = (*phandle)++;
206
- aplic_s_phandle = (*phandle)++;
207
aplic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2);
208
209
- /* M-level APLIC node */
210
for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
211
aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
212
- aplic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT);
213
+ aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT);
214
}
215
- aplic_addr = memmap[VIRT_APLIC_M].base +
216
- (memmap[VIRT_APLIC_M].size * socket);
217
+
218
aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
219
qemu_fdt_add_subnode(ms->fdt, aplic_name);
220
qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic");
221
qemu_fdt_setprop_cell(ms->fdt, aplic_name,
222
- "#interrupt-cells", FDT_APLIC_INT_CELLS);
223
+ "#interrupt-cells", FDT_APLIC_INT_CELLS);
224
qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0);
225
+
226
if (s->aia_type == VIRT_AIA_TYPE_APLIC) {
227
qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended",
228
- aplic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 2);
229
+ aplic_cells,
230
+ s->soc[socket].num_harts * sizeof(uint32_t) * 2);
231
} else {
232
- qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent",
233
- msi_m_phandle);
234
+ qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle);
235
}
236
+
237
qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg",
238
- 0x0, aplic_addr, 0x0, memmap[VIRT_APLIC_M].size);
239
+ 0x0, aplic_addr, 0x0, aplic_size);
240
qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources",
241
- VIRT_IRQCHIP_NUM_SOURCES);
242
- qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children",
243
- aplic_s_phandle);
244
- qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate",
245
- aplic_s_phandle, 0x1, VIRT_IRQCHIP_NUM_SOURCES);
246
+ VIRT_IRQCHIP_NUM_SOURCES);
247
+
248
+ if (aplic_child_phandle) {
249
+ qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children",
250
+ aplic_child_phandle);
251
+ qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate",
252
+ aplic_child_phandle, 0x1,
253
+ VIRT_IRQCHIP_NUM_SOURCES);
254
+ }
255
+
256
riscv_socket_fdt_write_id(ms, aplic_name, socket);
257
- qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_m_phandle);
258
+ qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_phandle);
259
+
260
g_free(aplic_name);
261
+ g_free(aplic_cells);
262
+}
263
264
- /* S-level APLIC node */
265
- for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
266
- aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
267
- aplic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT);
268
+static void create_fdt_socket_aplic(RISCVVirtState *s,
269
+ const MemMapEntry *memmap, int socket,
270
+ uint32_t msi_m_phandle,
271
+ uint32_t msi_s_phandle,
272
+ uint32_t *phandle,
273
+ uint32_t *intc_phandles,
274
+ uint32_t *aplic_phandles)
275
+{
276
+ char *aplic_name;
277
+ unsigned long aplic_addr;
278
+ MachineState *ms = MACHINE(s);
279
+ uint32_t aplic_m_phandle, aplic_s_phandle;
280
+
281
+ aplic_m_phandle = (*phandle)++;
282
+ aplic_s_phandle = (*phandle)++;
283
+
284
+ if (!kvm_enabled()) {
285
+ /* M-level APLIC node */
286
+ aplic_addr = memmap[VIRT_APLIC_M].base +
287
+ (memmap[VIRT_APLIC_M].size * socket);
288
+ create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_M].size,
289
+ msi_m_phandle, intc_phandles,
290
+ aplic_m_phandle, aplic_s_phandle,
291
+ true);
292
}
293
+
294
+ /* S-level APLIC node */
295
aplic_addr = memmap[VIRT_APLIC_S].base +
296
(memmap[VIRT_APLIC_S].size * socket);
297
+ create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_S].size,
298
+ msi_s_phandle, intc_phandles,
299
+ aplic_s_phandle, 0,
300
+ false);
301
+
302
aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
303
- qemu_fdt_add_subnode(ms->fdt, aplic_name);
304
- qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic");
305
- qemu_fdt_setprop_cell(ms->fdt, aplic_name,
306
- "#interrupt-cells", FDT_APLIC_INT_CELLS);
307
- qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0);
308
- if (s->aia_type == VIRT_AIA_TYPE_APLIC) {
309
- qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended",
310
- aplic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 2);
311
- } else {
312
- qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent",
313
- msi_s_phandle);
314
- }
315
- qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg",
316
- 0x0, aplic_addr, 0x0, memmap[VIRT_APLIC_S].size);
317
- qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources",
318
- VIRT_IRQCHIP_NUM_SOURCES);
319
- riscv_socket_fdt_write_id(ms, aplic_name, socket);
320
- qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_s_phandle);
321
322
if (!socket) {
323
platform_bus_add_all_fdt_nodes(ms->fdt, aplic_name,
324
@@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aplic(RISCVVirtState *s,
325
326
g_free(aplic_name);
327
328
- g_free(aplic_cells);
329
aplic_phandles[socket] = aplic_s_phandle;
51
}
330
}
52
331
53
static uint64_t sifive_plic_read(void *opaque, hwaddr addr, unsigned size)
332
@@ -XXX,XX +XXX,XX @@ static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests,
333
int i;
334
hwaddr addr;
335
uint32_t guest_bits;
336
- DeviceState *aplic_m;
337
- bool msimode = (aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) ? true : false;
338
+ DeviceState *aplic_s = NULL;
339
+ DeviceState *aplic_m = NULL;
340
+ bool msimode = aia_type == VIRT_AIA_TYPE_APLIC_IMSIC;
341
342
if (msimode) {
343
- /* Per-socket M-level IMSICs */
344
- addr = memmap[VIRT_IMSIC_M].base + socket * VIRT_IMSIC_GROUP_MAX_SIZE;
345
- for (i = 0; i < hart_count; i++) {
346
- riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0),
347
- base_hartid + i, true, 1,
348
- VIRT_IRQCHIP_NUM_MSIS);
349
+ if (!kvm_enabled()) {
350
+ /* Per-socket M-level IMSICs */
351
+ addr = memmap[VIRT_IMSIC_M].base +
352
+ socket * VIRT_IMSIC_GROUP_MAX_SIZE;
353
+ for (i = 0; i < hart_count; i++) {
354
+ riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0),
355
+ base_hartid + i, true, 1,
356
+ VIRT_IRQCHIP_NUM_MSIS);
357
+ }
358
}
359
360
/* Per-socket S-level IMSICs */
361
@@ -XXX,XX +XXX,XX @@ static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests,
362
}
363
}
364
365
- /* Per-socket M-level APLIC */
366
- aplic_m = riscv_aplic_create(
367
- memmap[VIRT_APLIC_M].base + socket * memmap[VIRT_APLIC_M].size,
368
- memmap[VIRT_APLIC_M].size,
369
- (msimode) ? 0 : base_hartid,
370
- (msimode) ? 0 : hart_count,
371
- VIRT_IRQCHIP_NUM_SOURCES,
372
- VIRT_IRQCHIP_NUM_PRIO_BITS,
373
- msimode, true, NULL);
374
-
375
- if (aplic_m) {
376
- /* Per-socket S-level APLIC */
377
- riscv_aplic_create(
378
- memmap[VIRT_APLIC_S].base + socket * memmap[VIRT_APLIC_S].size,
379
- memmap[VIRT_APLIC_S].size,
380
- (msimode) ? 0 : base_hartid,
381
- (msimode) ? 0 : hart_count,
382
- VIRT_IRQCHIP_NUM_SOURCES,
383
- VIRT_IRQCHIP_NUM_PRIO_BITS,
384
- msimode, false, aplic_m);
385
+ if (!kvm_enabled()) {
386
+ /* Per-socket M-level APLIC */
387
+ aplic_m = riscv_aplic_create(memmap[VIRT_APLIC_M].base +
388
+ socket * memmap[VIRT_APLIC_M].size,
389
+ memmap[VIRT_APLIC_M].size,
390
+ (msimode) ? 0 : base_hartid,
391
+ (msimode) ? 0 : hart_count,
392
+ VIRT_IRQCHIP_NUM_SOURCES,
393
+ VIRT_IRQCHIP_NUM_PRIO_BITS,
394
+ msimode, true, NULL);
395
}
396
397
- return aplic_m;
398
+ /* Per-socket S-level APLIC */
399
+ aplic_s = riscv_aplic_create(memmap[VIRT_APLIC_S].base +
400
+ socket * memmap[VIRT_APLIC_S].size,
401
+ memmap[VIRT_APLIC_S].size,
402
+ (msimode) ? 0 : base_hartid,
403
+ (msimode) ? 0 : hart_count,
404
+ VIRT_IRQCHIP_NUM_SOURCES,
405
+ VIRT_IRQCHIP_NUM_PRIO_BITS,
406
+ msimode, false, aplic_m);
407
+
408
+ return kvm_enabled() ? aplic_s : aplic_m;
409
}
410
411
static void create_platform_bus(RISCVVirtState *s, DeviceState *irqchip)
54
--
412
--
55
2.27.0
413
2.41.0
56
57
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Yong-Xuan Wang <yongxuan.wang@sifive.com>
2
2
3
Vector AMOs operate as if aq and rl bits were zero on each element
3
We check the in-kernel irqchip support when using KVM acceleration.
4
with regard to ordering relative to other instructions in the same hart.
5
Vector AMOs provide no ordering guarantee between element operations
6
in the same vector AMO instruction
7
4
8
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
5
Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com>
9
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Reviewed-by: Jim Shu <jim.shu@sifive.com>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
11
Message-id: 20200623215920.2594-10-zhiwei_liu@c-sky.com
8
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
9
Message-ID: <20230727102439.22554-3-yongxuan.wang@sifive.com>
12
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
10
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
13
---
11
---
14
target/riscv/helper.h | 29 +++++
12
target/riscv/kvm.c | 10 +++++++++-
15
target/riscv/internals.h | 1 +
13
1 file changed, 9 insertions(+), 1 deletion(-)
16
target/riscv/insn32-64.decode | 11 ++
17
target/riscv/insn32.decode | 13 +++
18
target/riscv/insn_trans/trans_rvv.inc.c | 138 ++++++++++++++++++++++
19
target/riscv/vector_helper.c | 147 ++++++++++++++++++++++++
20
6 files changed, 339 insertions(+)
21
14
22
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
15
diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
23
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
24
--- a/target/riscv/helper.h
17
--- a/target/riscv/kvm.c
25
+++ b/target/riscv/helper.h
18
+++ b/target/riscv/kvm.c
26
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vlhuff_v_w, void, ptr, ptr, tl, env, i32)
19
@@ -XXX,XX +XXX,XX @@ int kvm_arch_init(MachineState *ms, KVMState *s)
27
DEF_HELPER_5(vlhuff_v_d, void, ptr, ptr, tl, env, i32)
20
28
DEF_HELPER_5(vlwuff_v_w, void, ptr, ptr, tl, env, i32)
21
int kvm_arch_irqchip_create(KVMState *s)
29
DEF_HELPER_5(vlwuff_v_d, void, ptr, ptr, tl, env, i32)
22
{
30
+#ifdef TARGET_RISCV64
23
- return 0;
31
+DEF_HELPER_6(vamoswapw_v_d, void, ptr, ptr, tl, ptr, env, i32)
24
+ if (kvm_kernel_irqchip_split()) {
32
+DEF_HELPER_6(vamoswapd_v_d, void, ptr, ptr, tl, ptr, env, i32)
25
+ error_report("-machine kernel_irqchip=split is not supported on RISC-V.");
33
+DEF_HELPER_6(vamoaddw_v_d, void, ptr, ptr, tl, ptr, env, i32)
26
+ exit(1);
34
+DEF_HELPER_6(vamoaddd_v_d, void, ptr, ptr, tl, ptr, env, i32)
35
+DEF_HELPER_6(vamoxorw_v_d, void, ptr, ptr, tl, ptr, env, i32)
36
+DEF_HELPER_6(vamoxord_v_d, void, ptr, ptr, tl, ptr, env, i32)
37
+DEF_HELPER_6(vamoandw_v_d, void, ptr, ptr, tl, ptr, env, i32)
38
+DEF_HELPER_6(vamoandd_v_d, void, ptr, ptr, tl, ptr, env, i32)
39
+DEF_HELPER_6(vamoorw_v_d, void, ptr, ptr, tl, ptr, env, i32)
40
+DEF_HELPER_6(vamoord_v_d, void, ptr, ptr, tl, ptr, env, i32)
41
+DEF_HELPER_6(vamominw_v_d, void, ptr, ptr, tl, ptr, env, i32)
42
+DEF_HELPER_6(vamomind_v_d, void, ptr, ptr, tl, ptr, env, i32)
43
+DEF_HELPER_6(vamomaxw_v_d, void, ptr, ptr, tl, ptr, env, i32)
44
+DEF_HELPER_6(vamomaxd_v_d, void, ptr, ptr, tl, ptr, env, i32)
45
+DEF_HELPER_6(vamominuw_v_d, void, ptr, ptr, tl, ptr, env, i32)
46
+DEF_HELPER_6(vamominud_v_d, void, ptr, ptr, tl, ptr, env, i32)
47
+DEF_HELPER_6(vamomaxuw_v_d, void, ptr, ptr, tl, ptr, env, i32)
48
+DEF_HELPER_6(vamomaxud_v_d, void, ptr, ptr, tl, ptr, env, i32)
49
+#endif
50
+DEF_HELPER_6(vamoswapw_v_w, void, ptr, ptr, tl, ptr, env, i32)
51
+DEF_HELPER_6(vamoaddw_v_w, void, ptr, ptr, tl, ptr, env, i32)
52
+DEF_HELPER_6(vamoxorw_v_w, void, ptr, ptr, tl, ptr, env, i32)
53
+DEF_HELPER_6(vamoandw_v_w, void, ptr, ptr, tl, ptr, env, i32)
54
+DEF_HELPER_6(vamoorw_v_w, void, ptr, ptr, tl, ptr, env, i32)
55
+DEF_HELPER_6(vamominw_v_w, void, ptr, ptr, tl, ptr, env, i32)
56
+DEF_HELPER_6(vamomaxw_v_w, void, ptr, ptr, tl, ptr, env, i32)
57
+DEF_HELPER_6(vamominuw_v_w, void, ptr, ptr, tl, ptr, env, i32)
58
+DEF_HELPER_6(vamomaxuw_v_w, void, ptr, ptr, tl, ptr, env, i32)
59
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
60
index XXXXXXX..XXXXXXX 100644
61
--- a/target/riscv/internals.h
62
+++ b/target/riscv/internals.h
63
@@ -XXX,XX +XXX,XX @@ FIELD(VDATA, MLEN, 0, 8)
64
FIELD(VDATA, VM, 8, 1)
65
FIELD(VDATA, LMUL, 9, 2)
66
FIELD(VDATA, NF, 11, 4)
67
+FIELD(VDATA, WD, 11, 1)
68
#endif
69
diff --git a/target/riscv/insn32-64.decode b/target/riscv/insn32-64.decode
70
index XXXXXXX..XXXXXXX 100644
71
--- a/target/riscv/insn32-64.decode
72
+++ b/target/riscv/insn32-64.decode
73
@@ -XXX,XX +XXX,XX @@ amomax_d 10100 . . ..... ..... 011 ..... 0101111 @atom_st
74
amominu_d 11000 . . ..... ..... 011 ..... 0101111 @atom_st
75
amomaxu_d 11100 . . ..... ..... 011 ..... 0101111 @atom_st
76
77
+#*** Vector AMO operations (in addition to Zvamo) ***
78
+vamoswapd_v 00001 . . ..... ..... 111 ..... 0101111 @r_wdvm
79
+vamoaddd_v 00000 . . ..... ..... 111 ..... 0101111 @r_wdvm
80
+vamoxord_v 00100 . . ..... ..... 111 ..... 0101111 @r_wdvm
81
+vamoandd_v 01100 . . ..... ..... 111 ..... 0101111 @r_wdvm
82
+vamoord_v 01000 . . ..... ..... 111 ..... 0101111 @r_wdvm
83
+vamomind_v 10000 . . ..... ..... 111 ..... 0101111 @r_wdvm
84
+vamomaxd_v 10100 . . ..... ..... 111 ..... 0101111 @r_wdvm
85
+vamominud_v 11000 . . ..... ..... 111 ..... 0101111 @r_wdvm
86
+vamomaxud_v 11100 . . ..... ..... 111 ..... 0101111 @r_wdvm
87
+
88
# *** RV64F Standard Extension (in addition to RV32F) ***
89
fcvt_l_s 1100000 00010 ..... ... ..... 1010011 @r2_rm
90
fcvt_lu_s 1100000 00011 ..... ... ..... 1010011 @r2_rm
91
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
92
index XXXXXXX..XXXXXXX 100644
93
--- a/target/riscv/insn32.decode
94
+++ b/target/riscv/insn32.decode
95
@@ -XXX,XX +XXX,XX @@
96
&u imm rd
97
&shift shamt rs1 rd
98
&atomic aq rl rs2 rs1 rd
99
+&rwdvm vm wd rd rs1 rs2
100
&r2nfvm vm rd rs1 nf
101
&rnfvm vm rd rs1 rs2 nf
102
103
@@ -XXX,XX +XXX,XX @@
104
@r2 ....... ..... ..... ... ..... ....... %rs1 %rd
105
@r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd
106
@r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd
107
+@r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... &rwdvm %rs2 %rs1 %rd
108
@r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd
109
110
@hfence_gvma ....... ..... ..... ... ..... ....... %rs2 %rs1
111
@@ -XXX,XX +XXX,XX @@ vsxh_v ... -11 . ..... ..... 101 ..... 0100111 @r_nfvm
112
vsxw_v ... -11 . ..... ..... 110 ..... 0100111 @r_nfvm
113
vsxe_v ... -11 . ..... ..... 111 ..... 0100111 @r_nfvm
114
115
+#*** Vector AMO operations are encoded under the standard AMO major opcode ***
116
+vamoswapw_v 00001 . . ..... ..... 110 ..... 0101111 @r_wdvm
117
+vamoaddw_v 00000 . . ..... ..... 110 ..... 0101111 @r_wdvm
118
+vamoxorw_v 00100 . . ..... ..... 110 ..... 0101111 @r_wdvm
119
+vamoandw_v 01100 . . ..... ..... 110 ..... 0101111 @r_wdvm
120
+vamoorw_v 01000 . . ..... ..... 110 ..... 0101111 @r_wdvm
121
+vamominw_v 10000 . . ..... ..... 110 ..... 0101111 @r_wdvm
122
+vamomaxw_v 10100 . . ..... ..... 110 ..... 0101111 @r_wdvm
123
+vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm
124
+vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm
125
+
126
# *** new major opcode OP-V ***
127
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
128
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
129
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
130
index XXXXXXX..XXXXXXX 100644
131
--- a/target/riscv/insn_trans/trans_rvv.inc.c
132
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
133
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_TRANS(vleff_v, 3, r2nfvm, ldff_op, ld_us_check)
134
GEN_VEXT_TRANS(vlbuff_v, 4, r2nfvm, ldff_op, ld_us_check)
135
GEN_VEXT_TRANS(vlhuff_v, 5, r2nfvm, ldff_op, ld_us_check)
136
GEN_VEXT_TRANS(vlwuff_v, 6, r2nfvm, ldff_op, ld_us_check)
137
+
138
+/*
139
+ *** vector atomic operation
140
+ */
141
+typedef void gen_helper_amo(TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr,
142
+ TCGv_env, TCGv_i32);
143
+
144
+static bool amo_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
145
+ uint32_t data, gen_helper_amo *fn, DisasContext *s)
146
+{
147
+ TCGv_ptr dest, mask, index;
148
+ TCGv base;
149
+ TCGv_i32 desc;
150
+
151
+ TCGLabel *over = gen_new_label();
152
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
153
+
154
+ dest = tcg_temp_new_ptr();
155
+ mask = tcg_temp_new_ptr();
156
+ index = tcg_temp_new_ptr();
157
+ base = tcg_temp_new();
158
+ desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
159
+
160
+ gen_get_gpr(base, rs1);
161
+ tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
162
+ tcg_gen_addi_ptr(index, cpu_env, vreg_ofs(s, vs2));
163
+ tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
164
+
165
+ fn(dest, mask, base, index, cpu_env, desc);
166
+
167
+ tcg_temp_free_ptr(dest);
168
+ tcg_temp_free_ptr(mask);
169
+ tcg_temp_free_ptr(index);
170
+ tcg_temp_free(base);
171
+ tcg_temp_free_i32(desc);
172
+ gen_set_label(over);
173
+ return true;
174
+}
175
+
176
+static bool amo_op(DisasContext *s, arg_rwdvm *a, uint8_t seq)
177
+{
178
+ uint32_t data = 0;
179
+ gen_helper_amo *fn;
180
+ static gen_helper_amo *const fnsw[9] = {
181
+ /* no atomic operation */
182
+ gen_helper_vamoswapw_v_w,
183
+ gen_helper_vamoaddw_v_w,
184
+ gen_helper_vamoxorw_v_w,
185
+ gen_helper_vamoandw_v_w,
186
+ gen_helper_vamoorw_v_w,
187
+ gen_helper_vamominw_v_w,
188
+ gen_helper_vamomaxw_v_w,
189
+ gen_helper_vamominuw_v_w,
190
+ gen_helper_vamomaxuw_v_w
191
+ };
192
+#ifdef TARGET_RISCV64
193
+ static gen_helper_amo *const fnsd[18] = {
194
+ gen_helper_vamoswapw_v_d,
195
+ gen_helper_vamoaddw_v_d,
196
+ gen_helper_vamoxorw_v_d,
197
+ gen_helper_vamoandw_v_d,
198
+ gen_helper_vamoorw_v_d,
199
+ gen_helper_vamominw_v_d,
200
+ gen_helper_vamomaxw_v_d,
201
+ gen_helper_vamominuw_v_d,
202
+ gen_helper_vamomaxuw_v_d,
203
+ gen_helper_vamoswapd_v_d,
204
+ gen_helper_vamoaddd_v_d,
205
+ gen_helper_vamoxord_v_d,
206
+ gen_helper_vamoandd_v_d,
207
+ gen_helper_vamoord_v_d,
208
+ gen_helper_vamomind_v_d,
209
+ gen_helper_vamomaxd_v_d,
210
+ gen_helper_vamominud_v_d,
211
+ gen_helper_vamomaxud_v_d
212
+ };
213
+#endif
214
+
215
+ if (tb_cflags(s->base.tb) & CF_PARALLEL) {
216
+ gen_helper_exit_atomic(cpu_env);
217
+ s->base.is_jmp = DISAS_NORETURN;
218
+ return true;
219
+ } else {
220
+ if (s->sew == 3) {
221
+#ifdef TARGET_RISCV64
222
+ fn = fnsd[seq];
223
+#else
224
+ /* Check done in amo_check(). */
225
+ g_assert_not_reached();
226
+#endif
227
+ } else {
228
+ fn = fnsw[seq];
229
+ }
230
+ }
27
+ }
231
+
28
+
232
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
29
+ /*
233
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
30
+ * We can create the VAIA using the newer device control API.
234
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
31
+ */
235
+ data = FIELD_DP32(data, VDATA, WD, a->wd);
32
+ return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL);
236
+ return amo_trans(a->rd, a->rs1, a->rs2, data, fn, s);
237
+}
238
+/*
239
+ * There are two rules check here.
240
+ *
241
+ * 1. SEW must be at least as wide as the AMO memory element size.
242
+ *
243
+ * 2. If SEW is greater than XLEN, an illegal instruction exception is raised.
244
+ */
245
+static bool amo_check(DisasContext *s, arg_rwdvm* a)
246
+{
247
+ return (!s->vill && has_ext(s, RVA) &&
248
+ (!a->wd || vext_check_overlap_mask(s, a->rd, a->vm, false)) &&
249
+ vext_check_reg(s, a->rd, false) &&
250
+ vext_check_reg(s, a->rs2, false) &&
251
+ ((1 << s->sew) <= sizeof(target_ulong)) &&
252
+ ((1 << s->sew) >= 4));
253
+}
254
+
255
+GEN_VEXT_TRANS(vamoswapw_v, 0, rwdvm, amo_op, amo_check)
256
+GEN_VEXT_TRANS(vamoaddw_v, 1, rwdvm, amo_op, amo_check)
257
+GEN_VEXT_TRANS(vamoxorw_v, 2, rwdvm, amo_op, amo_check)
258
+GEN_VEXT_TRANS(vamoandw_v, 3, rwdvm, amo_op, amo_check)
259
+GEN_VEXT_TRANS(vamoorw_v, 4, rwdvm, amo_op, amo_check)
260
+GEN_VEXT_TRANS(vamominw_v, 5, rwdvm, amo_op, amo_check)
261
+GEN_VEXT_TRANS(vamomaxw_v, 6, rwdvm, amo_op, amo_check)
262
+GEN_VEXT_TRANS(vamominuw_v, 7, rwdvm, amo_op, amo_check)
263
+GEN_VEXT_TRANS(vamomaxuw_v, 8, rwdvm, amo_op, amo_check)
264
+#ifdef TARGET_RISCV64
265
+GEN_VEXT_TRANS(vamoswapd_v, 9, rwdvm, amo_op, amo_check)
266
+GEN_VEXT_TRANS(vamoaddd_v, 10, rwdvm, amo_op, amo_check)
267
+GEN_VEXT_TRANS(vamoxord_v, 11, rwdvm, amo_op, amo_check)
268
+GEN_VEXT_TRANS(vamoandd_v, 12, rwdvm, amo_op, amo_check)
269
+GEN_VEXT_TRANS(vamoord_v, 13, rwdvm, amo_op, amo_check)
270
+GEN_VEXT_TRANS(vamomind_v, 14, rwdvm, amo_op, amo_check)
271
+GEN_VEXT_TRANS(vamomaxd_v, 15, rwdvm, amo_op, amo_check)
272
+GEN_VEXT_TRANS(vamominud_v, 16, rwdvm, amo_op, amo_check)
273
+GEN_VEXT_TRANS(vamomaxud_v, 17, rwdvm, amo_op, amo_check)
274
+#endif
275
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
276
index XXXXXXX..XXXXXXX 100644
277
--- a/target/riscv/vector_helper.c
278
+++ b/target/riscv/vector_helper.c
279
@@ -XXX,XX +XXX,XX @@ static inline uint32_t vext_lmul(uint32_t desc)
280
return FIELD_EX32(simd_data(desc), VDATA, LMUL);
281
}
33
}
282
34
283
+static uint32_t vext_wd(uint32_t desc)
35
int kvm_arch_process_async_events(CPUState *cs)
284
+{
285
+ return (simd_data(desc) >> 11) & 0x1;
286
+}
287
+
288
/*
289
* Get vector group length in bytes. Its range is [64, 2048].
290
*
291
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_LDFF(vlhuff_v_w, uint16_t, uint32_t, ldhu_w, clearl)
292
GEN_VEXT_LDFF(vlhuff_v_d, uint16_t, uint64_t, ldhu_d, clearq)
293
GEN_VEXT_LDFF(vlwuff_v_w, uint32_t, uint32_t, ldwu_w, clearl)
294
GEN_VEXT_LDFF(vlwuff_v_d, uint32_t, uint64_t, ldwu_d, clearq)
295
+
296
+/*
297
+ *** Vector AMO Operations (Zvamo)
298
+ */
299
+typedef void vext_amo_noatomic_fn(void *vs3, target_ulong addr,
300
+ uint32_t wd, uint32_t idx, CPURISCVState *env,
301
+ uintptr_t retaddr);
302
+
303
+/* no atomic opreation for vector atomic insructions */
304
+#define DO_SWAP(N, M) (M)
305
+#define DO_AND(N, M) (N & M)
306
+#define DO_XOR(N, M) (N ^ M)
307
+#define DO_OR(N, M) (N | M)
308
+#define DO_ADD(N, M) (N + M)
309
+
310
+#define GEN_VEXT_AMO_NOATOMIC_OP(NAME, ESZ, MSZ, H, DO_OP, SUF) \
311
+static void \
312
+vext_##NAME##_noatomic_op(void *vs3, target_ulong addr, \
313
+ uint32_t wd, uint32_t idx, \
314
+ CPURISCVState *env, uintptr_t retaddr)\
315
+{ \
316
+ typedef int##ESZ##_t ETYPE; \
317
+ typedef int##MSZ##_t MTYPE; \
318
+ typedef uint##MSZ##_t UMTYPE __attribute__((unused)); \
319
+ ETYPE *pe3 = (ETYPE *)vs3 + H(idx); \
320
+ MTYPE a = cpu_ld##SUF##_data(env, addr), b = *pe3; \
321
+ \
322
+ cpu_st##SUF##_data(env, addr, DO_OP(a, b)); \
323
+ if (wd) { \
324
+ *pe3 = a; \
325
+ } \
326
+}
327
+
328
+/* Signed min/max */
329
+#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
330
+#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
331
+
332
+/* Unsigned min/max */
333
+#define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
334
+#define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
335
+
336
+GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_w, 32, 32, H4, DO_SWAP, l)
337
+GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_w, 32, 32, H4, DO_ADD, l)
338
+GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_w, 32, 32, H4, DO_XOR, l)
339
+GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_w, 32, 32, H4, DO_AND, l)
340
+GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_w, 32, 32, H4, DO_OR, l)
341
+GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_w, 32, 32, H4, DO_MIN, l)
342
+GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_w, 32, 32, H4, DO_MAX, l)
343
+GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_w, 32, 32, H4, DO_MINU, l)
344
+GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_w, 32, 32, H4, DO_MAXU, l)
345
+#ifdef TARGET_RISCV64
346
+GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_d, 64, 32, H8, DO_SWAP, l)
347
+GEN_VEXT_AMO_NOATOMIC_OP(vamoswapd_v_d, 64, 64, H8, DO_SWAP, q)
348
+GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_d, 64, 32, H8, DO_ADD, l)
349
+GEN_VEXT_AMO_NOATOMIC_OP(vamoaddd_v_d, 64, 64, H8, DO_ADD, q)
350
+GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_d, 64, 32, H8, DO_XOR, l)
351
+GEN_VEXT_AMO_NOATOMIC_OP(vamoxord_v_d, 64, 64, H8, DO_XOR, q)
352
+GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_d, 64, 32, H8, DO_AND, l)
353
+GEN_VEXT_AMO_NOATOMIC_OP(vamoandd_v_d, 64, 64, H8, DO_AND, q)
354
+GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_d, 64, 32, H8, DO_OR, l)
355
+GEN_VEXT_AMO_NOATOMIC_OP(vamoord_v_d, 64, 64, H8, DO_OR, q)
356
+GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_d, 64, 32, H8, DO_MIN, l)
357
+GEN_VEXT_AMO_NOATOMIC_OP(vamomind_v_d, 64, 64, H8, DO_MIN, q)
358
+GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_d, 64, 32, H8, DO_MAX, l)
359
+GEN_VEXT_AMO_NOATOMIC_OP(vamomaxd_v_d, 64, 64, H8, DO_MAX, q)
360
+GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_d, 64, 32, H8, DO_MINU, l)
361
+GEN_VEXT_AMO_NOATOMIC_OP(vamominud_v_d, 64, 64, H8, DO_MINU, q)
362
+GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_d, 64, 32, H8, DO_MAXU, l)
363
+GEN_VEXT_AMO_NOATOMIC_OP(vamomaxud_v_d, 64, 64, H8, DO_MAXU, q)
364
+#endif
365
+
366
+static inline void
367
+vext_amo_noatomic(void *vs3, void *v0, target_ulong base,
368
+ void *vs2, CPURISCVState *env, uint32_t desc,
369
+ vext_get_index_addr get_index_addr,
370
+ vext_amo_noatomic_fn *noatomic_op,
371
+ clear_fn *clear_elem,
372
+ uint32_t esz, uint32_t msz, uintptr_t ra)
373
+{
374
+ uint32_t i;
375
+ target_long addr;
376
+ uint32_t wd = vext_wd(desc);
377
+ uint32_t vm = vext_vm(desc);
378
+ uint32_t mlen = vext_mlen(desc);
379
+ uint32_t vlmax = vext_maxsz(desc) / esz;
380
+
381
+ for (i = 0; i < env->vl; i++) {
382
+ if (!vm && !vext_elem_mask(v0, mlen, i)) {
383
+ continue;
384
+ }
385
+ probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_LOAD);
386
+ probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_STORE);
387
+ }
388
+ for (i = 0; i < env->vl; i++) {
389
+ if (!vm && !vext_elem_mask(v0, mlen, i)) {
390
+ continue;
391
+ }
392
+ addr = get_index_addr(base, i, vs2);
393
+ noatomic_op(vs3, addr, wd, i, env, ra);
394
+ }
395
+ clear_elem(vs3, env->vl, env->vl * esz, vlmax * esz);
396
+}
397
+
398
+#define GEN_VEXT_AMO(NAME, MTYPE, ETYPE, INDEX_FN, CLEAR_FN) \
399
+void HELPER(NAME)(void *vs3, void *v0, target_ulong base, \
400
+ void *vs2, CPURISCVState *env, uint32_t desc) \
401
+{ \
402
+ vext_amo_noatomic(vs3, v0, base, vs2, env, desc, \
403
+ INDEX_FN, vext_##NAME##_noatomic_op, \
404
+ CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \
405
+ GETPC()); \
406
+}
407
+
408
+#ifdef TARGET_RISCV64
409
+GEN_VEXT_AMO(vamoswapw_v_d, int32_t, int64_t, idx_d, clearq)
410
+GEN_VEXT_AMO(vamoswapd_v_d, int64_t, int64_t, idx_d, clearq)
411
+GEN_VEXT_AMO(vamoaddw_v_d, int32_t, int64_t, idx_d, clearq)
412
+GEN_VEXT_AMO(vamoaddd_v_d, int64_t, int64_t, idx_d, clearq)
413
+GEN_VEXT_AMO(vamoxorw_v_d, int32_t, int64_t, idx_d, clearq)
414
+GEN_VEXT_AMO(vamoxord_v_d, int64_t, int64_t, idx_d, clearq)
415
+GEN_VEXT_AMO(vamoandw_v_d, int32_t, int64_t, idx_d, clearq)
416
+GEN_VEXT_AMO(vamoandd_v_d, int64_t, int64_t, idx_d, clearq)
417
+GEN_VEXT_AMO(vamoorw_v_d, int32_t, int64_t, idx_d, clearq)
418
+GEN_VEXT_AMO(vamoord_v_d, int64_t, int64_t, idx_d, clearq)
419
+GEN_VEXT_AMO(vamominw_v_d, int32_t, int64_t, idx_d, clearq)
420
+GEN_VEXT_AMO(vamomind_v_d, int64_t, int64_t, idx_d, clearq)
421
+GEN_VEXT_AMO(vamomaxw_v_d, int32_t, int64_t, idx_d, clearq)
422
+GEN_VEXT_AMO(vamomaxd_v_d, int64_t, int64_t, idx_d, clearq)
423
+GEN_VEXT_AMO(vamominuw_v_d, uint32_t, uint64_t, idx_d, clearq)
424
+GEN_VEXT_AMO(vamominud_v_d, uint64_t, uint64_t, idx_d, clearq)
425
+GEN_VEXT_AMO(vamomaxuw_v_d, uint32_t, uint64_t, idx_d, clearq)
426
+GEN_VEXT_AMO(vamomaxud_v_d, uint64_t, uint64_t, idx_d, clearq)
427
+#endif
428
+GEN_VEXT_AMO(vamoswapw_v_w, int32_t, int32_t, idx_w, clearl)
429
+GEN_VEXT_AMO(vamoaddw_v_w, int32_t, int32_t, idx_w, clearl)
430
+GEN_VEXT_AMO(vamoxorw_v_w, int32_t, int32_t, idx_w, clearl)
431
+GEN_VEXT_AMO(vamoandw_v_w, int32_t, int32_t, idx_w, clearl)
432
+GEN_VEXT_AMO(vamoorw_v_w, int32_t, int32_t, idx_w, clearl)
433
+GEN_VEXT_AMO(vamominw_v_w, int32_t, int32_t, idx_w, clearl)
434
+GEN_VEXT_AMO(vamomaxw_v_w, int32_t, int32_t, idx_w, clearl)
435
+GEN_VEXT_AMO(vamominuw_v_w, uint32_t, uint32_t, idx_w, clearl)
436
+GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl)
437
--
36
--
438
2.27.0
37
2.41.0
439
440
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Yong-Xuan Wang <yongxuan.wang@sifive.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
We create a vAIA chip by using the KVM_DEV_TYPE_RISCV_AIA and then set up
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
the chip with the KVM_DEV_RISCV_AIA_GRP_* APIs.
5
Message-id: 20200623215920.2594-51-zhiwei_liu@c-sky.com
5
We also extend KVM accelerator to specify the KVM AIA mode. The "riscv-aia"
6
parameter is passed along with --accel in QEMU command-line.
7
1) "riscv-aia=emul": IMSIC is emulated by hypervisor
8
2) "riscv-aia=hwaccel": use hardware guest IMSIC
9
3) "riscv-aia=auto": use the hardware guest IMSICs whenever available
10
otherwise we fallback to software emulation.
11
12
Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com>
13
Reviewed-by: Jim Shu <jim.shu@sifive.com>
14
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
15
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
16
Message-ID: <20230727102439.22554-4-yongxuan.wang@sifive.com>
6
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
17
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
7
---
18
---
8
target/riscv/helper.h | 2 ++
19
target/riscv/kvm_riscv.h | 4 +
9
target/riscv/insn32.decode | 1 +
20
target/riscv/kvm.c | 186 +++++++++++++++++++++++++++++++++++++++
10
target/riscv/insn_trans/trans_rvv.inc.c | 32 +++++++++++++++++++++++++
21
2 files changed, 190 insertions(+)
11
target/riscv/vector_helper.c | 20 ++++++++++++++++
12
4 files changed, 55 insertions(+)
13
22
14
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
23
diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h
15
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
16
--- a/target/riscv/helper.h
25
--- a/target/riscv/kvm_riscv.h
17
+++ b/target/riscv/helper.h
26
+++ b/target/riscv/kvm_riscv.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmor_mm, void, ptr, ptr, ptr, ptr, env, i32)
27
@@ -XXX,XX +XXX,XX @@
19
DEF_HELPER_6(vmnor_mm, void, ptr, ptr, ptr, ptr, env, i32)
28
void kvm_riscv_init_user_properties(Object *cpu_obj);
20
DEF_HELPER_6(vmornot_mm, void, ptr, ptr, ptr, ptr, env, i32)
29
void kvm_riscv_reset_vcpu(RISCVCPU *cpu);
21
DEF_HELPER_6(vmxnor_mm, void, ptr, ptr, ptr, ptr, env, i32)
30
void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level);
22
+
31
+void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift,
23
+DEF_HELPER_4(vmpopc_m, tl, ptr, ptr, env, i32)
32
+ uint64_t aia_irq_num, uint64_t aia_msi_num,
24
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
33
+ uint64_t aplic_base, uint64_t imsic_base,
34
+ uint64_t guest_num);
35
36
#endif
37
diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
25
index XXXXXXX..XXXXXXX 100644
38
index XXXXXXX..XXXXXXX 100644
26
--- a/target/riscv/insn32.decode
39
--- a/target/riscv/kvm.c
27
+++ b/target/riscv/insn32.decode
40
+++ b/target/riscv/kvm.c
28
@@ -XXX,XX +XXX,XX @@ vmor_mm 011010 - ..... ..... 010 ..... 1010111 @r
41
@@ -XXX,XX +XXX,XX @@
29
vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r
42
#include "exec/address-spaces.h"
30
vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r
43
#include "hw/boards.h"
31
vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r
44
#include "hw/irq.h"
32
+vmpopc_m 010100 . ..... ----- 010 ..... 1010111 @r2_vm
45
+#include "hw/intc/riscv_imsic.h"
33
46
#include "qemu/log.h"
34
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
47
#include "hw/loader.h"
35
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
48
#include "kvm_riscv.h"
36
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
49
@@ -XXX,XX +XXX,XX @@
37
index XXXXXXX..XXXXXXX 100644
50
#include "chardev/char-fe.h"
38
--- a/target/riscv/insn_trans/trans_rvv.inc.c
51
#include "migration/migration.h"
39
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
52
#include "sysemu/runstate.h"
40
@@ -XXX,XX +XXX,XX @@ GEN_MM_TRANS(vmor_mm)
53
+#include "hw/riscv/numa.h"
41
GEN_MM_TRANS(vmnor_mm)
54
42
GEN_MM_TRANS(vmornot_mm)
55
static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type,
43
GEN_MM_TRANS(vmxnor_mm)
56
uint64_t idx)
44
+
57
@@ -XXX,XX +XXX,XX @@ bool kvm_arch_cpu_check_are_resettable(void)
45
+/* Vector mask population count vmpopc */
58
return true;
46
+static bool trans_vmpopc_m(DisasContext *s, arg_rmr *a)
59
}
47
+{
60
48
+ if (vext_check_isa_ill(s)) {
61
+static int aia_mode;
49
+ TCGv_ptr src2, mask;
62
+
50
+ TCGv dst;
63
+static const char *kvm_aia_mode_str(uint64_t mode)
51
+ TCGv_i32 desc;
64
+{
52
+ uint32_t data = 0;
65
+ switch (mode) {
53
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
66
+ case KVM_DEV_RISCV_AIA_MODE_EMUL:
54
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
67
+ return "emul";
55
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
68
+ case KVM_DEV_RISCV_AIA_MODE_HWACCEL:
56
+
69
+ return "hwaccel";
57
+ mask = tcg_temp_new_ptr();
70
+ case KVM_DEV_RISCV_AIA_MODE_AUTO:
58
+ src2 = tcg_temp_new_ptr();
71
+ default:
59
+ dst = tcg_temp_new();
72
+ return "auto";
60
+ desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
73
+ };
61
+
74
+}
62
+ tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2));
75
+
63
+ tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
76
+static char *riscv_get_kvm_aia(Object *obj, Error **errp)
64
+
77
+{
65
+ gen_helper_vmpopc_m(dst, mask, src2, cpu_env, desc);
78
+ return g_strdup(kvm_aia_mode_str(aia_mode));
66
+ gen_set_gpr(a->rd, dst);
79
+}
67
+
80
+
68
+ tcg_temp_free_ptr(mask);
81
+static void riscv_set_kvm_aia(Object *obj, const char *val, Error **errp)
69
+ tcg_temp_free_ptr(src2);
82
+{
70
+ tcg_temp_free(dst);
83
+ if (!strcmp(val, "emul")) {
71
+ tcg_temp_free_i32(desc);
84
+ aia_mode = KVM_DEV_RISCV_AIA_MODE_EMUL;
72
+ return true;
85
+ } else if (!strcmp(val, "hwaccel")) {
73
+ }
86
+ aia_mode = KVM_DEV_RISCV_AIA_MODE_HWACCEL;
74
+ return false;
87
+ } else if (!strcmp(val, "auto")) {
75
+}
88
+ aia_mode = KVM_DEV_RISCV_AIA_MODE_AUTO;
76
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
89
+ } else {
77
index XXXXXXX..XXXXXXX 100644
90
+ error_setg(errp, "Invalid KVM AIA mode");
78
--- a/target/riscv/vector_helper.c
91
+ error_append_hint(errp, "Valid values are emul, hwaccel, and auto.\n");
79
+++ b/target/riscv/vector_helper.c
92
+ }
80
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
93
+}
81
GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
94
+
82
GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT)
95
void kvm_arch_accel_class_init(ObjectClass *oc)
83
GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
96
{
84
+
97
+ object_class_property_add_str(oc, "riscv-aia", riscv_get_kvm_aia,
85
+/* Vector mask population count vmpopc */
98
+ riscv_set_kvm_aia);
86
+target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env,
99
+ object_class_property_set_description(oc, "riscv-aia",
87
+ uint32_t desc)
100
+ "Set KVM AIA mode. Valid values are "
88
+{
101
+ "emul, hwaccel, and auto. Default "
89
+ target_ulong cnt = 0;
102
+ "is auto.");
90
+ uint32_t mlen = vext_mlen(desc);
103
+ object_property_set_default_str(object_class_property_find(oc, "riscv-aia"),
91
+ uint32_t vm = vext_vm(desc);
104
+ "auto");
92
+ uint32_t vl = env->vl;
105
+}
93
+ int i;
106
+
94
+
107
+void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift,
95
+ for (i = 0; i < vl; i++) {
108
+ uint64_t aia_irq_num, uint64_t aia_msi_num,
96
+ if (vm || vext_elem_mask(v0, mlen, i)) {
109
+ uint64_t aplic_base, uint64_t imsic_base,
97
+ if (vext_elem_mask(vs2, mlen, i)) {
110
+ uint64_t guest_num)
98
+ cnt++;
111
+{
112
+ int ret, i;
113
+ int aia_fd = -1;
114
+ uint64_t default_aia_mode;
115
+ uint64_t socket_count = riscv_socket_count(machine);
116
+ uint64_t max_hart_per_socket = 0;
117
+ uint64_t socket, base_hart, hart_count, socket_imsic_base, imsic_addr;
118
+ uint64_t socket_bits, hart_bits, guest_bits;
119
+
120
+ aia_fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_RISCV_AIA, false);
121
+
122
+ if (aia_fd < 0) {
123
+ error_report("Unable to create in-kernel irqchip");
124
+ exit(1);
125
+ }
126
+
127
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
128
+ KVM_DEV_RISCV_AIA_CONFIG_MODE,
129
+ &default_aia_mode, false, NULL);
130
+ if (ret < 0) {
131
+ error_report("KVM AIA: failed to get current KVM AIA mode");
132
+ exit(1);
133
+ }
134
+ qemu_log("KVM AIA: default mode is %s\n",
135
+ kvm_aia_mode_str(default_aia_mode));
136
+
137
+ if (default_aia_mode != aia_mode) {
138
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
139
+ KVM_DEV_RISCV_AIA_CONFIG_MODE,
140
+ &aia_mode, true, NULL);
141
+ if (ret < 0)
142
+ warn_report("KVM AIA: failed to set KVM AIA mode");
143
+ else
144
+ qemu_log("KVM AIA: set current mode to %s\n",
145
+ kvm_aia_mode_str(aia_mode));
146
+ }
147
+
148
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
149
+ KVM_DEV_RISCV_AIA_CONFIG_SRCS,
150
+ &aia_irq_num, true, NULL);
151
+ if (ret < 0) {
152
+ error_report("KVM AIA: failed to set number of input irq lines");
153
+ exit(1);
154
+ }
155
+
156
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
157
+ KVM_DEV_RISCV_AIA_CONFIG_IDS,
158
+ &aia_msi_num, true, NULL);
159
+ if (ret < 0) {
160
+ error_report("KVM AIA: failed to set number of msi");
161
+ exit(1);
162
+ }
163
+
164
+ socket_bits = find_last_bit(&socket_count, BITS_PER_LONG) + 1;
165
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
166
+ KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS,
167
+ &socket_bits, true, NULL);
168
+ if (ret < 0) {
169
+ error_report("KVM AIA: failed to set group_bits");
170
+ exit(1);
171
+ }
172
+
173
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
174
+ KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT,
175
+ &group_shift, true, NULL);
176
+ if (ret < 0) {
177
+ error_report("KVM AIA: failed to set group_shift");
178
+ exit(1);
179
+ }
180
+
181
+ guest_bits = guest_num == 0 ? 0 :
182
+ find_last_bit(&guest_num, BITS_PER_LONG) + 1;
183
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
184
+ KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS,
185
+ &guest_bits, true, NULL);
186
+ if (ret < 0) {
187
+ error_report("KVM AIA: failed to set guest_bits");
188
+ exit(1);
189
+ }
190
+
191
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_ADDR,
192
+ KVM_DEV_RISCV_AIA_ADDR_APLIC,
193
+ &aplic_base, true, NULL);
194
+ if (ret < 0) {
195
+ error_report("KVM AIA: failed to set the base address of APLIC");
196
+ exit(1);
197
+ }
198
+
199
+ for (socket = 0; socket < socket_count; socket++) {
200
+ socket_imsic_base = imsic_base + socket * (1U << group_shift);
201
+ hart_count = riscv_socket_hart_count(machine, socket);
202
+ base_hart = riscv_socket_first_hartid(machine, socket);
203
+
204
+ if (max_hart_per_socket < hart_count) {
205
+ max_hart_per_socket = hart_count;
206
+ }
207
+
208
+ for (i = 0; i < hart_count; i++) {
209
+ imsic_addr = socket_imsic_base + i * IMSIC_HART_SIZE(guest_bits);
210
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_ADDR,
211
+ KVM_DEV_RISCV_AIA_ADDR_IMSIC(i + base_hart),
212
+ &imsic_addr, true, NULL);
213
+ if (ret < 0) {
214
+ error_report("KVM AIA: failed to set the IMSIC address for hart %d", i);
215
+ exit(1);
99
+ }
216
+ }
100
+ }
217
+ }
101
+ }
218
+ }
102
+ return cnt;
219
+
103
+}
220
+ hart_bits = find_last_bit(&max_hart_per_socket, BITS_PER_LONG) + 1;
221
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
222
+ KVM_DEV_RISCV_AIA_CONFIG_HART_BITS,
223
+ &hart_bits, true, NULL);
224
+ if (ret < 0) {
225
+ error_report("KVM AIA: failed to set hart_bits");
226
+ exit(1);
227
+ }
228
+
229
+ if (kvm_has_gsi_routing()) {
230
+ for (uint64_t idx = 0; idx < aia_irq_num + 1; ++idx) {
231
+ /* KVM AIA only has one APLIC instance */
232
+ kvm_irqchip_add_irq_route(kvm_state, idx, 0, idx);
233
+ }
234
+ kvm_gsi_routing_allowed = true;
235
+ kvm_irqchip_commit_routes(kvm_state);
236
+ }
237
+
238
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CTRL,
239
+ KVM_DEV_RISCV_AIA_CTRL_INIT,
240
+ NULL, true, NULL);
241
+ if (ret < 0) {
242
+ error_report("KVM AIA: initialized fail");
243
+ exit(1);
244
+ }
245
+
246
+ kvm_msi_via_irqfd_allowed = kvm_irqfds_enabled();
247
}
104
--
248
--
105
2.27.0
249
2.41.0
106
107
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Yong-Xuan Wang <yongxuan.wang@sifive.com>
2
2
3
The unit-stride fault-only-fault load instructions are used to
3
KVM AIA can't emulate APLIC only. When "aia=aplic" parameter is passed,
4
vectorize loops with data-dependent exit conditions(while loops).
4
APLIC devices is emulated by QEMU. For "aia=aplic-imsic", remove the
5
These instructions execute as a regular load except that they
5
mmio operations of APLIC when using KVM AIA and send wired interrupt
6
will only take a trap on element 0.
6
signal via KVM_IRQ_LINE API.
7
After KVM AIA enabled, MSI messages are delivered by KVM_SIGNAL_MSI API
8
when the IMSICs receive mmio write requests.
7
9
8
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
10
Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com>
9
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
11
Reviewed-by: Jim Shu <jim.shu@sifive.com>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
11
Message-id: 20200623215920.2594-9-zhiwei_liu@c-sky.com
13
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
14
Message-ID: <20230727102439.22554-5-yongxuan.wang@sifive.com>
12
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
15
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
13
---
16
---
14
target/riscv/helper.h | 22 +++++
17
hw/intc/riscv_aplic.c | 56 ++++++++++++++++++++++++++++++-------------
15
target/riscv/insn32.decode | 7 ++
18
hw/intc/riscv_imsic.c | 25 +++++++++++++++----
16
target/riscv/insn_trans/trans_rvv.inc.c | 73 ++++++++++++++++
19
2 files changed, 61 insertions(+), 20 deletions(-)
17
target/riscv/vector_helper.c | 110 ++++++++++++++++++++++++
18
4 files changed, 212 insertions(+)
19
20
20
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
21
diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c
21
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
22
--- a/target/riscv/helper.h
23
--- a/hw/intc/riscv_aplic.c
23
+++ b/target/riscv/helper.h
24
+++ b/hw/intc/riscv_aplic.c
24
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vsxe_v_b, void, ptr, ptr, tl, ptr, env, i32)
25
@@ -XXX,XX +XXX,XX @@
25
DEF_HELPER_6(vsxe_v_h, void, ptr, ptr, tl, ptr, env, i32)
26
#include "hw/irq.h"
26
DEF_HELPER_6(vsxe_v_w, void, ptr, ptr, tl, ptr, env, i32)
27
#include "target/riscv/cpu.h"
27
DEF_HELPER_6(vsxe_v_d, void, ptr, ptr, tl, ptr, env, i32)
28
#include "sysemu/sysemu.h"
28
+DEF_HELPER_5(vlbff_v_b, void, ptr, ptr, tl, env, i32)
29
+#include "sysemu/kvm.h"
29
+DEF_HELPER_5(vlbff_v_h, void, ptr, ptr, tl, env, i32)
30
#include "migration/vmstate.h"
30
+DEF_HELPER_5(vlbff_v_w, void, ptr, ptr, tl, env, i32)
31
31
+DEF_HELPER_5(vlbff_v_d, void, ptr, ptr, tl, env, i32)
32
#define APLIC_MAX_IDC (1UL << 14)
32
+DEF_HELPER_5(vlhff_v_h, void, ptr, ptr, tl, env, i32)
33
@@ -XXX,XX +XXX,XX @@
33
+DEF_HELPER_5(vlhff_v_w, void, ptr, ptr, tl, env, i32)
34
34
+DEF_HELPER_5(vlhff_v_d, void, ptr, ptr, tl, env, i32)
35
#define APLIC_IDC_CLAIMI 0x1c
35
+DEF_HELPER_5(vlwff_v_w, void, ptr, ptr, tl, env, i32)
36
36
+DEF_HELPER_5(vlwff_v_d, void, ptr, ptr, tl, env, i32)
37
+DEF_HELPER_5(vleff_v_b, void, ptr, ptr, tl, env, i32)
38
+DEF_HELPER_5(vleff_v_h, void, ptr, ptr, tl, env, i32)
39
+DEF_HELPER_5(vleff_v_w, void, ptr, ptr, tl, env, i32)
40
+DEF_HELPER_5(vleff_v_d, void, ptr, ptr, tl, env, i32)
41
+DEF_HELPER_5(vlbuff_v_b, void, ptr, ptr, tl, env, i32)
42
+DEF_HELPER_5(vlbuff_v_h, void, ptr, ptr, tl, env, i32)
43
+DEF_HELPER_5(vlbuff_v_w, void, ptr, ptr, tl, env, i32)
44
+DEF_HELPER_5(vlbuff_v_d, void, ptr, ptr, tl, env, i32)
45
+DEF_HELPER_5(vlhuff_v_h, void, ptr, ptr, tl, env, i32)
46
+DEF_HELPER_5(vlhuff_v_w, void, ptr, ptr, tl, env, i32)
47
+DEF_HELPER_5(vlhuff_v_d, void, ptr, ptr, tl, env, i32)
48
+DEF_HELPER_5(vlwuff_v_w, void, ptr, ptr, tl, env, i32)
49
+DEF_HELPER_5(vlwuff_v_d, void, ptr, ptr, tl, env, i32)
50
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
51
index XXXXXXX..XXXXXXX 100644
52
--- a/target/riscv/insn32.decode
53
+++ b/target/riscv/insn32.decode
54
@@ -XXX,XX +XXX,XX @@ vle_v ... 000 . 00000 ..... 111 ..... 0000111 @r2_nfvm
55
vlbu_v ... 000 . 00000 ..... 000 ..... 0000111 @r2_nfvm
56
vlhu_v ... 000 . 00000 ..... 101 ..... 0000111 @r2_nfvm
57
vlwu_v ... 000 . 00000 ..... 110 ..... 0000111 @r2_nfvm
58
+vlbff_v ... 100 . 10000 ..... 000 ..... 0000111 @r2_nfvm
59
+vlhff_v ... 100 . 10000 ..... 101 ..... 0000111 @r2_nfvm
60
+vlwff_v ... 100 . 10000 ..... 110 ..... 0000111 @r2_nfvm
61
+vleff_v ... 000 . 10000 ..... 111 ..... 0000111 @r2_nfvm
62
+vlbuff_v ... 000 . 10000 ..... 000 ..... 0000111 @r2_nfvm
63
+vlhuff_v ... 000 . 10000 ..... 101 ..... 0000111 @r2_nfvm
64
+vlwuff_v ... 000 . 10000 ..... 110 ..... 0000111 @r2_nfvm
65
vsb_v ... 000 . 00000 ..... 000 ..... 0100111 @r2_nfvm
66
vsh_v ... 000 . 00000 ..... 101 ..... 0100111 @r2_nfvm
67
vsw_v ... 000 . 00000 ..... 110 ..... 0100111 @r2_nfvm
68
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/target/riscv/insn_trans/trans_rvv.inc.c
71
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
72
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_TRANS(vsxb_v, 0, rnfvm, st_index_op, st_index_check)
73
GEN_VEXT_TRANS(vsxh_v, 1, rnfvm, st_index_op, st_index_check)
74
GEN_VEXT_TRANS(vsxw_v, 2, rnfvm, st_index_op, st_index_check)
75
GEN_VEXT_TRANS(vsxe_v, 3, rnfvm, st_index_op, st_index_check)
76
+
77
+/*
37
+/*
78
+ *** unit stride fault-only-first load
38
+ * KVM AIA only supports APLIC MSI, fallback to QEMU emulation if we want to use
39
+ * APLIC Wired.
79
+ */
40
+ */
80
+static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data,
41
+static bool is_kvm_aia(bool msimode)
81
+ gen_helper_ldst_us *fn, DisasContext *s)
82
+{
42
+{
83
+ TCGv_ptr dest, mask;
43
+ return kvm_irqchip_in_kernel() && msimode;
84
+ TCGv base;
85
+ TCGv_i32 desc;
86
+
87
+ TCGLabel *over = gen_new_label();
88
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
89
+
90
+ dest = tcg_temp_new_ptr();
91
+ mask = tcg_temp_new_ptr();
92
+ base = tcg_temp_new();
93
+ desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
94
+
95
+ gen_get_gpr(base, rs1);
96
+ tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
97
+ tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
98
+
99
+ fn(dest, mask, base, cpu_env, desc);
100
+
101
+ tcg_temp_free_ptr(dest);
102
+ tcg_temp_free_ptr(mask);
103
+ tcg_temp_free(base);
104
+ tcg_temp_free_i32(desc);
105
+ gen_set_label(over);
106
+ return true;
107
+}
44
+}
108
+
45
+
109
+static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
46
static uint32_t riscv_aplic_read_input_word(RISCVAPLICState *aplic,
47
uint32_t word)
48
{
49
@@ -XXX,XX +XXX,XX @@ static uint32_t riscv_aplic_idc_claimi(RISCVAPLICState *aplic, uint32_t idc)
50
return topi;
51
}
52
53
+static void riscv_kvm_aplic_request(void *opaque, int irq, int level)
110
+{
54
+{
111
+ uint32_t data = 0;
55
+ kvm_set_irq(kvm_state, irq, !!level);
112
+ gen_helper_ldst_us *fn;
113
+ static gen_helper_ldst_us * const fns[7][4] = {
114
+ { gen_helper_vlbff_v_b, gen_helper_vlbff_v_h,
115
+ gen_helper_vlbff_v_w, gen_helper_vlbff_v_d },
116
+ { NULL, gen_helper_vlhff_v_h,
117
+ gen_helper_vlhff_v_w, gen_helper_vlhff_v_d },
118
+ { NULL, NULL,
119
+ gen_helper_vlwff_v_w, gen_helper_vlwff_v_d },
120
+ { gen_helper_vleff_v_b, gen_helper_vleff_v_h,
121
+ gen_helper_vleff_v_w, gen_helper_vleff_v_d },
122
+ { gen_helper_vlbuff_v_b, gen_helper_vlbuff_v_h,
123
+ gen_helper_vlbuff_v_w, gen_helper_vlbuff_v_d },
124
+ { NULL, gen_helper_vlhuff_v_h,
125
+ gen_helper_vlhuff_v_w, gen_helper_vlhuff_v_d },
126
+ { NULL, NULL,
127
+ gen_helper_vlwuff_v_w, gen_helper_vlwuff_v_d }
128
+ };
129
+
130
+ fn = fns[seq][s->sew];
131
+ if (fn == NULL) {
132
+ return false;
133
+ }
134
+
135
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
136
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
137
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
138
+ data = FIELD_DP32(data, VDATA, NF, a->nf);
139
+ return ldff_trans(a->rd, a->rs1, data, fn, s);
140
+}
56
+}
141
+
57
+
142
+GEN_VEXT_TRANS(vlbff_v, 0, r2nfvm, ldff_op, ld_us_check)
58
static void riscv_aplic_request(void *opaque, int irq, int level)
143
+GEN_VEXT_TRANS(vlhff_v, 1, r2nfvm, ldff_op, ld_us_check)
59
{
144
+GEN_VEXT_TRANS(vlwff_v, 2, r2nfvm, ldff_op, ld_us_check)
60
bool update = false;
145
+GEN_VEXT_TRANS(vleff_v, 3, r2nfvm, ldff_op, ld_us_check)
61
@@ -XXX,XX +XXX,XX @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp)
146
+GEN_VEXT_TRANS(vlbuff_v, 4, r2nfvm, ldff_op, ld_us_check)
62
uint32_t i;
147
+GEN_VEXT_TRANS(vlhuff_v, 5, r2nfvm, ldff_op, ld_us_check)
63
RISCVAPLICState *aplic = RISCV_APLIC(dev);
148
+GEN_VEXT_TRANS(vlwuff_v, 6, r2nfvm, ldff_op, ld_us_check)
64
149
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
65
- aplic->bitfield_words = (aplic->num_irqs + 31) >> 5;
66
- aplic->sourcecfg = g_new0(uint32_t, aplic->num_irqs);
67
- aplic->state = g_new0(uint32_t, aplic->num_irqs);
68
- aplic->target = g_new0(uint32_t, aplic->num_irqs);
69
- if (!aplic->msimode) {
70
- for (i = 0; i < aplic->num_irqs; i++) {
71
- aplic->target[i] = 1;
72
+ if (!is_kvm_aia(aplic->msimode)) {
73
+ aplic->bitfield_words = (aplic->num_irqs + 31) >> 5;
74
+ aplic->sourcecfg = g_new0(uint32_t, aplic->num_irqs);
75
+ aplic->state = g_new0(uint32_t, aplic->num_irqs);
76
+ aplic->target = g_new0(uint32_t, aplic->num_irqs);
77
+ if (!aplic->msimode) {
78
+ for (i = 0; i < aplic->num_irqs; i++) {
79
+ aplic->target[i] = 1;
80
+ }
81
}
82
- }
83
- aplic->idelivery = g_new0(uint32_t, aplic->num_harts);
84
- aplic->iforce = g_new0(uint32_t, aplic->num_harts);
85
- aplic->ithreshold = g_new0(uint32_t, aplic->num_harts);
86
+ aplic->idelivery = g_new0(uint32_t, aplic->num_harts);
87
+ aplic->iforce = g_new0(uint32_t, aplic->num_harts);
88
+ aplic->ithreshold = g_new0(uint32_t, aplic->num_harts);
89
90
- memory_region_init_io(&aplic->mmio, OBJECT(dev), &riscv_aplic_ops, aplic,
91
- TYPE_RISCV_APLIC, aplic->aperture_size);
92
- sysbus_init_mmio(SYS_BUS_DEVICE(dev), &aplic->mmio);
93
+ memory_region_init_io(&aplic->mmio, OBJECT(dev), &riscv_aplic_ops,
94
+ aplic, TYPE_RISCV_APLIC, aplic->aperture_size);
95
+ sysbus_init_mmio(SYS_BUS_DEVICE(dev), &aplic->mmio);
96
+ }
97
98
/*
99
* Only root APLICs have hardware IRQ lines. All non-root APLICs
100
* have IRQ lines delegated by their parent APLIC.
101
*/
102
if (!aplic->parent) {
103
- qdev_init_gpio_in(dev, riscv_aplic_request, aplic->num_irqs);
104
+ if (is_kvm_aia(aplic->msimode)) {
105
+ qdev_init_gpio_in(dev, riscv_kvm_aplic_request, aplic->num_irqs);
106
+ } else {
107
+ qdev_init_gpio_in(dev, riscv_aplic_request, aplic->num_irqs);
108
+ }
109
}
110
111
/* Create output IRQ lines for non-MSI mode */
112
@@ -XXX,XX +XXX,XX @@ DeviceState *riscv_aplic_create(hwaddr addr, hwaddr size,
113
qdev_prop_set_bit(dev, "mmode", mmode);
114
115
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
116
- sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr);
117
+
118
+ if (!is_kvm_aia(msimode)) {
119
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr);
120
+ }
121
122
if (parent) {
123
riscv_aplic_add_child(parent, dev);
124
diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c
150
index XXXXXXX..XXXXXXX 100644
125
index XXXXXXX..XXXXXXX 100644
151
--- a/target/riscv/vector_helper.c
126
--- a/hw/intc/riscv_imsic.c
152
+++ b/target/riscv/vector_helper.c
127
+++ b/hw/intc/riscv_imsic.c
153
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_INDEX(vsxe_v_b, int8_t, int8_t, idx_b, ste_b)
128
@@ -XXX,XX +XXX,XX @@
154
GEN_VEXT_ST_INDEX(vsxe_v_h, int16_t, int16_t, idx_h, ste_h)
129
#include "target/riscv/cpu.h"
155
GEN_VEXT_ST_INDEX(vsxe_v_w, int32_t, int32_t, idx_w, ste_w)
130
#include "target/riscv/cpu_bits.h"
156
GEN_VEXT_ST_INDEX(vsxe_v_d, int64_t, int64_t, idx_d, ste_d)
131
#include "sysemu/sysemu.h"
132
+#include "sysemu/kvm.h"
133
#include "migration/vmstate.h"
134
135
#define IMSIC_MMIO_PAGE_LE 0x00
136
@@ -XXX,XX +XXX,XX @@ static void riscv_imsic_write(void *opaque, hwaddr addr, uint64_t value,
137
goto err;
138
}
139
140
+#if defined(CONFIG_KVM)
141
+ if (kvm_irqchip_in_kernel()) {
142
+ struct kvm_msi msi;
157
+
143
+
158
+/*
144
+ msi.address_lo = extract64(imsic->mmio.addr + addr, 0, 32);
159
+ *** unit-stride fault-only-fisrt load instructions
145
+ msi.address_hi = extract64(imsic->mmio.addr + addr, 32, 32);
160
+ */
146
+ msi.data = le32_to_cpu(value);
161
+static inline void
162
+vext_ldff(void *vd, void *v0, target_ulong base,
163
+ CPURISCVState *env, uint32_t desc,
164
+ vext_ldst_elem_fn *ldst_elem,
165
+ clear_fn *clear_elem,
166
+ uint32_t esz, uint32_t msz, uintptr_t ra)
167
+{
168
+ void *host;
169
+ uint32_t i, k, vl = 0;
170
+ uint32_t mlen = vext_mlen(desc);
171
+ uint32_t nf = vext_nf(desc);
172
+ uint32_t vm = vext_vm(desc);
173
+ uint32_t vlmax = vext_maxsz(desc) / esz;
174
+ target_ulong addr, offset, remain;
175
+
147
+
176
+ /* probe every access*/
148
+ kvm_vm_ioctl(kvm_state, KVM_SIGNAL_MSI, &msi);
177
+ for (i = 0; i < env->vl; i++) {
149
+
178
+ if (!vm && !vext_elem_mask(v0, mlen, i)) {
179
+ continue;
180
+ }
181
+ addr = base + nf * i * msz;
182
+ if (i == 0) {
183
+ probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD);
184
+ } else {
185
+ /* if it triggers an exception, no need to check watchpoint */
186
+ remain = nf * msz;
187
+ while (remain > 0) {
188
+ offset = -(addr | TARGET_PAGE_MASK);
189
+ host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
190
+ cpu_mmu_index(env, false));
191
+ if (host) {
192
+#ifdef CONFIG_USER_ONLY
193
+ if (page_check_range(addr, nf * msz, PAGE_READ) < 0) {
194
+ vl = i;
195
+ goto ProbeSuccess;
196
+ }
197
+#else
198
+ probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD);
199
+#endif
200
+ } else {
201
+ vl = i;
202
+ goto ProbeSuccess;
203
+ }
204
+ if (remain <= offset) {
205
+ break;
206
+ }
207
+ remain -= offset;
208
+ addr += offset;
209
+ }
210
+ }
211
+ }
212
+ProbeSuccess:
213
+ /* load bytes from guest memory */
214
+ if (vl != 0) {
215
+ env->vl = vl;
216
+ }
217
+ for (i = 0; i < env->vl; i++) {
218
+ k = 0;
219
+ if (!vm && !vext_elem_mask(v0, mlen, i)) {
220
+ continue;
221
+ }
222
+ while (k < nf) {
223
+ target_ulong addr = base + (i * nf + k) * msz;
224
+ ldst_elem(env, addr, i + k * vlmax, vd, ra);
225
+ k++;
226
+ }
227
+ }
228
+ /* clear tail elements */
229
+ if (vl != 0) {
230
+ return;
150
+ return;
231
+ }
151
+ }
232
+ for (k = 0; k < nf; k++) {
152
+#endif
233
+ clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
153
+
154
/* Writes only supported for MSI little-endian registers */
155
page = addr >> IMSIC_MMIO_PAGE_SHIFT;
156
if ((addr & (IMSIC_MMIO_PAGE_SZ - 1)) == IMSIC_MMIO_PAGE_LE) {
157
@@ -XXX,XX +XXX,XX @@ static void riscv_imsic_realize(DeviceState *dev, Error **errp)
158
CPUState *cpu = cpu_by_arch_id(imsic->hartid);
159
CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
160
161
- imsic->num_eistate = imsic->num_pages * imsic->num_irqs;
162
- imsic->eidelivery = g_new0(uint32_t, imsic->num_pages);
163
- imsic->eithreshold = g_new0(uint32_t, imsic->num_pages);
164
- imsic->eistate = g_new0(uint32_t, imsic->num_eistate);
165
+ if (!kvm_irqchip_in_kernel()) {
166
+ imsic->num_eistate = imsic->num_pages * imsic->num_irqs;
167
+ imsic->eidelivery = g_new0(uint32_t, imsic->num_pages);
168
+ imsic->eithreshold = g_new0(uint32_t, imsic->num_pages);
169
+ imsic->eistate = g_new0(uint32_t, imsic->num_eistate);
234
+ }
170
+ }
235
+}
171
236
+
172
memory_region_init_io(&imsic->mmio, OBJECT(dev), &riscv_imsic_ops,
237
+#define GEN_VEXT_LDFF(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \
173
imsic, TYPE_RISCV_IMSIC,
238
+void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
239
+ CPURISCVState *env, uint32_t desc) \
240
+{ \
241
+ vext_ldff(vd, v0, base, env, desc, LOAD_FN, CLEAR_FN, \
242
+ sizeof(ETYPE), sizeof(MTYPE), GETPC()); \
243
+}
244
+
245
+GEN_VEXT_LDFF(vlbff_v_b, int8_t, int8_t, ldb_b, clearb)
246
+GEN_VEXT_LDFF(vlbff_v_h, int8_t, int16_t, ldb_h, clearh)
247
+GEN_VEXT_LDFF(vlbff_v_w, int8_t, int32_t, ldb_w, clearl)
248
+GEN_VEXT_LDFF(vlbff_v_d, int8_t, int64_t, ldb_d, clearq)
249
+GEN_VEXT_LDFF(vlhff_v_h, int16_t, int16_t, ldh_h, clearh)
250
+GEN_VEXT_LDFF(vlhff_v_w, int16_t, int32_t, ldh_w, clearl)
251
+GEN_VEXT_LDFF(vlhff_v_d, int16_t, int64_t, ldh_d, clearq)
252
+GEN_VEXT_LDFF(vlwff_v_w, int32_t, int32_t, ldw_w, clearl)
253
+GEN_VEXT_LDFF(vlwff_v_d, int32_t, int64_t, ldw_d, clearq)
254
+GEN_VEXT_LDFF(vleff_v_b, int8_t, int8_t, lde_b, clearb)
255
+GEN_VEXT_LDFF(vleff_v_h, int16_t, int16_t, lde_h, clearh)
256
+GEN_VEXT_LDFF(vleff_v_w, int32_t, int32_t, lde_w, clearl)
257
+GEN_VEXT_LDFF(vleff_v_d, int64_t, int64_t, lde_d, clearq)
258
+GEN_VEXT_LDFF(vlbuff_v_b, uint8_t, uint8_t, ldbu_b, clearb)
259
+GEN_VEXT_LDFF(vlbuff_v_h, uint8_t, uint16_t, ldbu_h, clearh)
260
+GEN_VEXT_LDFF(vlbuff_v_w, uint8_t, uint32_t, ldbu_w, clearl)
261
+GEN_VEXT_LDFF(vlbuff_v_d, uint8_t, uint64_t, ldbu_d, clearq)
262
+GEN_VEXT_LDFF(vlhuff_v_h, uint16_t, uint16_t, ldhu_h, clearh)
263
+GEN_VEXT_LDFF(vlhuff_v_w, uint16_t, uint32_t, ldhu_w, clearl)
264
+GEN_VEXT_LDFF(vlhuff_v_d, uint16_t, uint64_t, ldhu_d, clearq)
265
+GEN_VEXT_LDFF(vlwuff_v_w, uint32_t, uint32_t, ldwu_w, clearl)
266
+GEN_VEXT_LDFF(vlwuff_v_d, uint32_t, uint64_t, ldwu_d, clearq)
267
--
174
--
268
2.27.0
175
2.41.0
269
270
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Yong-Xuan Wang <yongxuan.wang@sifive.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
Select KVM AIA when the host kernel has in-kernel AIA chip support.
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
4
Since KVM AIA only has one APLIC instance, we map the QEMU APLIC
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
devices to KVM APLIC.
6
Message-id: 20200623215920.2594-27-zhiwei_liu@c-sky.com
6
7
Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com>
8
Reviewed-by: Jim Shu <jim.shu@sifive.com>
9
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
10
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
11
Message-ID: <20230727102439.22554-6-yongxuan.wang@sifive.com>
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
12
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
13
---
9
target/riscv/helper.h | 9 ++
14
hw/riscv/virt.c | 94 +++++++++++++++++++++++++++++++++----------------
10
target/riscv/insn32.decode | 2 +
15
1 file changed, 63 insertions(+), 31 deletions(-)
11
target/riscv/insn_trans/trans_rvv.inc.c | 4 +
12
target/riscv/vector_helper.c | 107 ++++++++++++++++++++++++
13
4 files changed, 122 insertions(+)
14
16
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
17
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
16
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
19
--- a/hw/riscv/virt.c
18
+++ b/target/riscv/helper.h
20
+++ b/hw/riscv/virt.c
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vasub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
21
@@ -XXX,XX +XXX,XX @@
20
DEF_HELPER_6(vasub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
22
#include "hw/riscv/virt.h"
21
DEF_HELPER_6(vasub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
23
#include "hw/riscv/boot.h"
22
DEF_HELPER_6(vasub_vx_d, void, ptr, ptr, tl, ptr, env, i32)
24
#include "hw/riscv/numa.h"
23
+
25
+#include "kvm_riscv.h"
24
+DEF_HELPER_6(vsmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
26
#include "hw/intc/riscv_aclint.h"
25
+DEF_HELPER_6(vsmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
27
#include "hw/intc/riscv_aplic.h"
26
+DEF_HELPER_6(vsmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
28
#include "hw/intc/riscv_imsic.h"
27
+DEF_HELPER_6(vsmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
29
@@ -XXX,XX +XXX,XX @@
28
+DEF_HELPER_6(vsmul_vx_b, void, ptr, ptr, tl, ptr, env, i32)
30
#error "Can't accommodate all IMSIC groups in address space"
29
+DEF_HELPER_6(vsmul_vx_h, void, ptr, ptr, tl, ptr, env, i32)
31
#endif
30
+DEF_HELPER_6(vsmul_vx_w, void, ptr, ptr, tl, ptr, env, i32)
32
31
+DEF_HELPER_6(vsmul_vx_d, void, ptr, ptr, tl, ptr, env, i32)
33
+/* KVM AIA only supports APLIC MSI. APLIC Wired is always emulated by QEMU. */
32
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
34
+static bool virt_use_kvm_aia(RISCVVirtState *s)
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/riscv/insn32.decode
35
+++ b/target/riscv/insn32.decode
36
@@ -XXX,XX +XXX,XX @@ vaadd_vx 100100 . ..... ..... 100 ..... 1010111 @r_vm
37
vaadd_vi 100100 . ..... ..... 011 ..... 1010111 @r_vm
38
vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm
39
vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm
40
+vsmul_vv 100111 . ..... ..... 000 ..... 1010111 @r_vm
41
+vsmul_vx 100111 . ..... ..... 100 ..... 1010111 @r_vm
42
43
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
44
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
45
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
46
index XXXXXXX..XXXXXXX 100644
47
--- a/target/riscv/insn_trans/trans_rvv.inc.c
48
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
49
@@ -XXX,XX +XXX,XX @@ GEN_OPIVV_TRANS(vasub_vv, opivv_check)
50
GEN_OPIVX_TRANS(vaadd_vx, opivx_check)
51
GEN_OPIVX_TRANS(vasub_vx, opivx_check)
52
GEN_OPIVI_TRANS(vaadd_vi, 0, vaadd_vx, opivx_check)
53
+
54
+/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
55
+GEN_OPIVV_TRANS(vsmul_vv, opivv_check)
56
+GEN_OPIVX_TRANS(vsmul_vx, opivx_check)
57
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/target/riscv/vector_helper.c
60
+++ b/target/riscv/vector_helper.c
61
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX_RM(vasub_vx_b, 1, 1, clearb)
62
GEN_VEXT_VX_RM(vasub_vx_h, 2, 2, clearh)
63
GEN_VEXT_VX_RM(vasub_vx_w, 4, 4, clearl)
64
GEN_VEXT_VX_RM(vasub_vx_d, 8, 8, clearq)
65
+
66
+/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
67
+static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
68
+{
35
+{
69
+ uint8_t round;
36
+ return kvm_irqchip_in_kernel() && s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC;
70
+ int16_t res;
71
+
72
+ res = (int16_t)a * (int16_t)b;
73
+ round = get_round(vxrm, res, 7);
74
+ res = (res >> 7) + round;
75
+
76
+ if (res > INT8_MAX) {
77
+ env->vxsat = 0x1;
78
+ return INT8_MAX;
79
+ } else if (res < INT8_MIN) {
80
+ env->vxsat = 0x1;
81
+ return INT8_MIN;
82
+ } else {
83
+ return res;
84
+ }
85
+}
37
+}
86
+
38
+
87
+static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
39
static const MemMapEntry virt_memmap[] = {
88
+{
40
[VIRT_DEBUG] = { 0x0, 0x100 },
89
+ uint8_t round;
41
[VIRT_MROM] = { 0x1000, 0xf000 },
90
+ int32_t res;
42
@@ -XXX,XX +XXX,XX @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket,
43
uint32_t *intc_phandles,
44
uint32_t aplic_phandle,
45
uint32_t aplic_child_phandle,
46
- bool m_mode)
47
+ bool m_mode, int num_harts)
48
{
49
int cpu;
50
char *aplic_name;
51
uint32_t *aplic_cells;
52
MachineState *ms = MACHINE(s);
53
54
- aplic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2);
55
+ aplic_cells = g_new0(uint32_t, num_harts * 2);
56
57
- for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
58
+ for (cpu = 0; cpu < num_harts; cpu++) {
59
aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
60
aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT);
61
}
62
@@ -XXX,XX +XXX,XX @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket,
63
64
if (s->aia_type == VIRT_AIA_TYPE_APLIC) {
65
qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended",
66
- aplic_cells,
67
- s->soc[socket].num_harts * sizeof(uint32_t) * 2);
68
+ aplic_cells, num_harts * sizeof(uint32_t) * 2);
69
} else {
70
qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle);
71
}
72
@@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aplic(RISCVVirtState *s,
73
uint32_t msi_s_phandle,
74
uint32_t *phandle,
75
uint32_t *intc_phandles,
76
- uint32_t *aplic_phandles)
77
+ uint32_t *aplic_phandles,
78
+ int num_harts)
79
{
80
char *aplic_name;
81
unsigned long aplic_addr;
82
@@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aplic(RISCVVirtState *s,
83
create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_M].size,
84
msi_m_phandle, intc_phandles,
85
aplic_m_phandle, aplic_s_phandle,
86
- true);
87
+ true, num_harts);
88
}
89
90
/* S-level APLIC node */
91
@@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aplic(RISCVVirtState *s,
92
create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_S].size,
93
msi_s_phandle, intc_phandles,
94
aplic_s_phandle, 0,
95
- false);
96
+ false, num_harts);
97
98
aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
99
100
@@ -XXX,XX +XXX,XX @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap,
101
*msi_pcie_phandle = msi_s_phandle;
102
}
103
104
- phandle_pos = ms->smp.cpus;
105
- for (socket = (socket_count - 1); socket >= 0; socket--) {
106
- phandle_pos -= s->soc[socket].num_harts;
107
-
108
- if (s->aia_type == VIRT_AIA_TYPE_NONE) {
109
- create_fdt_socket_plic(s, memmap, socket, phandle,
110
- &intc_phandles[phandle_pos], xplic_phandles);
111
- } else {
112
- create_fdt_socket_aplic(s, memmap, socket,
113
- msi_m_phandle, msi_s_phandle, phandle,
114
- &intc_phandles[phandle_pos], xplic_phandles);
115
+ /* KVM AIA only has one APLIC instance */
116
+ if (virt_use_kvm_aia(s)) {
117
+ create_fdt_socket_aplic(s, memmap, 0,
118
+ msi_m_phandle, msi_s_phandle, phandle,
119
+ &intc_phandles[0], xplic_phandles,
120
+ ms->smp.cpus);
121
+ } else {
122
+ phandle_pos = ms->smp.cpus;
123
+ for (socket = (socket_count - 1); socket >= 0; socket--) {
124
+ phandle_pos -= s->soc[socket].num_harts;
91
+
125
+
92
+ res = (int32_t)a * (int32_t)b;
126
+ if (s->aia_type == VIRT_AIA_TYPE_NONE) {
93
+ round = get_round(vxrm, res, 15);
127
+ create_fdt_socket_plic(s, memmap, socket, phandle,
94
+ res = (res >> 15) + round;
128
+ &intc_phandles[phandle_pos],
95
+
129
+ xplic_phandles);
96
+ if (res > INT16_MAX) {
130
+ } else {
97
+ env->vxsat = 0x1;
131
+ create_fdt_socket_aplic(s, memmap, socket,
98
+ return INT16_MAX;
132
+ msi_m_phandle, msi_s_phandle, phandle,
99
+ } else if (res < INT16_MIN) {
133
+ &intc_phandles[phandle_pos],
100
+ env->vxsat = 0x1;
134
+ xplic_phandles,
101
+ return INT16_MIN;
135
+ s->soc[socket].num_harts);
136
+ }
137
}
138
}
139
140
g_free(intc_phandles);
141
142
- for (socket = 0; socket < socket_count; socket++) {
143
- if (socket == 0) {
144
- *irq_mmio_phandle = xplic_phandles[socket];
145
- *irq_virtio_phandle = xplic_phandles[socket];
146
- *irq_pcie_phandle = xplic_phandles[socket];
147
- }
148
- if (socket == 1) {
149
- *irq_virtio_phandle = xplic_phandles[socket];
150
- *irq_pcie_phandle = xplic_phandles[socket];
151
- }
152
- if (socket == 2) {
153
- *irq_pcie_phandle = xplic_phandles[socket];
154
+ if (virt_use_kvm_aia(s)) {
155
+ *irq_mmio_phandle = xplic_phandles[0];
156
+ *irq_virtio_phandle = xplic_phandles[0];
157
+ *irq_pcie_phandle = xplic_phandles[0];
102
+ } else {
158
+ } else {
103
+ return res;
159
+ for (socket = 0; socket < socket_count; socket++) {
104
+ }
160
+ if (socket == 0) {
105
+}
161
+ *irq_mmio_phandle = xplic_phandles[socket];
106
+
162
+ *irq_virtio_phandle = xplic_phandles[socket];
107
+static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
163
+ *irq_pcie_phandle = xplic_phandles[socket];
108
+{
164
+ }
109
+ uint8_t round;
165
+ if (socket == 1) {
110
+ int64_t res;
166
+ *irq_virtio_phandle = xplic_phandles[socket];
111
+
167
+ *irq_pcie_phandle = xplic_phandles[socket];
112
+ res = (int64_t)a * (int64_t)b;
168
+ }
113
+ round = get_round(vxrm, res, 31);
169
+ if (socket == 2) {
114
+ res = (res >> 31) + round;
170
+ *irq_pcie_phandle = xplic_phandles[socket];
115
+
171
+ }
116
+ if (res > INT32_MAX) {
172
}
117
+ env->vxsat = 0x1;
173
}
118
+ return INT32_MAX;
174
119
+ } else if (res < INT32_MIN) {
175
@@ -XXX,XX +XXX,XX @@ static void virt_machine_init(MachineState *machine)
120
+ env->vxsat = 0x1;
176
}
121
+ return INT32_MIN;
177
}
122
+ } else {
178
123
+ return res;
179
+ if (virt_use_kvm_aia(s)) {
124
+ }
180
+ kvm_riscv_aia_create(machine, IMSIC_MMIO_GROUP_MIN_SHIFT,
125
+}
181
+ VIRT_IRQCHIP_NUM_SOURCES, VIRT_IRQCHIP_NUM_MSIS,
126
+
182
+ memmap[VIRT_APLIC_S].base,
127
+static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
183
+ memmap[VIRT_IMSIC_S].base,
128
+{
184
+ s->aia_guests);
129
+ uint8_t round;
130
+ uint64_t hi_64, lo_64;
131
+ int64_t res;
132
+
133
+ if (a == INT64_MIN && b == INT64_MIN) {
134
+ env->vxsat = 1;
135
+ return INT64_MAX;
136
+ }
185
+ }
137
+
186
+
138
+ muls64(&lo_64, &hi_64, a, b);
187
if (riscv_is_32bit(&s->soc[0])) {
139
+ round = get_round(vxrm, lo_64, 63);
188
#if HOST_LONG_BITS == 64
140
+ /*
189
/* limit RAM size in a 32-bit system */
141
+ * Cannot overflow, as there are always
142
+ * 2 sign bits after multiply.
143
+ */
144
+ res = (hi_64 << 1) | (lo_64 >> 63);
145
+ if (round) {
146
+ if (res == INT64_MAX) {
147
+ env->vxsat = 1;
148
+ } else {
149
+ res += 1;
150
+ }
151
+ }
152
+ return res;
153
+}
154
+
155
+RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
156
+RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
157
+RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
158
+RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
159
+GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1, clearb)
160
+GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2, clearh)
161
+GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4, clearl)
162
+GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8, clearq)
163
+
164
+RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
165
+RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
166
+RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
167
+RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
168
+GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1, clearb)
169
+GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2, clearh)
170
+GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4, clearl)
171
+GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8, clearq)
172
--
190
--
173
2.27.0
191
2.41.0
174
175
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Conor Dooley <conor.dooley@microchip.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
On a dtb dumped from the virt machine, dt-validate complains:
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
4
soc: pmu: {'riscv,event-to-mhpmcounters': [[1, 1, 524281], [2, 2, 524284], [65561, 65561, 524280], [65563, 65563, 524280], [65569, 65569, 524280]], 'compatible': ['riscv,pmu']} should not be valid under {'type': 'object'}
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
from schema $id: http://devicetree.org/schemas/simple-bus.yaml#
6
Message-id: 20200623215920.2594-40-zhiwei_liu@c-sky.com
6
That's pretty cryptic, but running the dtb back through dtc produces
7
something a lot more reasonable:
8
Warning (simple_bus_reg): /soc/pmu: missing or empty reg/ranges property
9
10
Moving the riscv,pmu node out of the soc bus solves the problem.
11
12
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
13
Acked-by: Alistair Francis <alistair.francis@wdc.com>
14
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
15
Message-ID: <20230727-groom-decline-2c57ce42841c@spud>
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
16
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
17
---
9
target/riscv/helper.h | 37 +++++
18
hw/riscv/virt.c | 2 +-
10
target/riscv/insn32.decode | 12 ++
19
1 file changed, 1 insertion(+), 1 deletion(-)
11
target/riscv/insn_trans/trans_rvv.inc.c | 35 +++++
12
target/riscv/vector_helper.c | 174 ++++++++++++++++++++++++
13
4 files changed, 258 insertions(+)
14
20
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
21
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
16
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
23
--- a/hw/riscv/virt.c
18
+++ b/target/riscv/helper.h
24
+++ b/hw/riscv/virt.c
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfsgnjn_vf_d, void, ptr, ptr, i64, ptr, env, i32)
25
@@ -XXX,XX +XXX,XX @@ static void create_fdt_pmu(RISCVVirtState *s)
20
DEF_HELPER_6(vfsgnjx_vf_h, void, ptr, ptr, i64, ptr, env, i32)
26
MachineState *ms = MACHINE(s);
21
DEF_HELPER_6(vfsgnjx_vf_w, void, ptr, ptr, i64, ptr, env, i32)
27
RISCVCPU hart = s->soc[0].harts[0];
22
DEF_HELPER_6(vfsgnjx_vf_d, void, ptr, ptr, i64, ptr, env, i32)
28
23
+
29
- pmu_name = g_strdup_printf("/soc/pmu");
24
+DEF_HELPER_6(vmfeq_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
30
+ pmu_name = g_strdup_printf("/pmu");
25
+DEF_HELPER_6(vmfeq_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
31
qemu_fdt_add_subnode(ms->fdt, pmu_name);
26
+DEF_HELPER_6(vmfeq_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
32
qemu_fdt_setprop_string(ms->fdt, pmu_name, "compatible", "riscv,pmu");
27
+DEF_HELPER_6(vmfne_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
33
riscv_pmu_generate_fdt_node(ms->fdt, hart.cfg.pmu_num, pmu_name);
28
+DEF_HELPER_6(vmfne_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vmfne_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vmflt_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_6(vmflt_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_6(vmflt_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
33
+DEF_HELPER_6(vmfle_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
34
+DEF_HELPER_6(vmfle_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
35
+DEF_HELPER_6(vmfle_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
36
+DEF_HELPER_6(vmfeq_vf_h, void, ptr, ptr, i64, ptr, env, i32)
37
+DEF_HELPER_6(vmfeq_vf_w, void, ptr, ptr, i64, ptr, env, i32)
38
+DEF_HELPER_6(vmfeq_vf_d, void, ptr, ptr, i64, ptr, env, i32)
39
+DEF_HELPER_6(vmfne_vf_h, void, ptr, ptr, i64, ptr, env, i32)
40
+DEF_HELPER_6(vmfne_vf_w, void, ptr, ptr, i64, ptr, env, i32)
41
+DEF_HELPER_6(vmfne_vf_d, void, ptr, ptr, i64, ptr, env, i32)
42
+DEF_HELPER_6(vmflt_vf_h, void, ptr, ptr, i64, ptr, env, i32)
43
+DEF_HELPER_6(vmflt_vf_w, void, ptr, ptr, i64, ptr, env, i32)
44
+DEF_HELPER_6(vmflt_vf_d, void, ptr, ptr, i64, ptr, env, i32)
45
+DEF_HELPER_6(vmfle_vf_h, void, ptr, ptr, i64, ptr, env, i32)
46
+DEF_HELPER_6(vmfle_vf_w, void, ptr, ptr, i64, ptr, env, i32)
47
+DEF_HELPER_6(vmfle_vf_d, void, ptr, ptr, i64, ptr, env, i32)
48
+DEF_HELPER_6(vmfgt_vf_h, void, ptr, ptr, i64, ptr, env, i32)
49
+DEF_HELPER_6(vmfgt_vf_w, void, ptr, ptr, i64, ptr, env, i32)
50
+DEF_HELPER_6(vmfgt_vf_d, void, ptr, ptr, i64, ptr, env, i32)
51
+DEF_HELPER_6(vmfge_vf_h, void, ptr, ptr, i64, ptr, env, i32)
52
+DEF_HELPER_6(vmfge_vf_w, void, ptr, ptr, i64, ptr, env, i32)
53
+DEF_HELPER_6(vmfge_vf_d, void, ptr, ptr, i64, ptr, env, i32)
54
+DEF_HELPER_6(vmford_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
55
+DEF_HELPER_6(vmford_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
56
+DEF_HELPER_6(vmford_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
57
+DEF_HELPER_6(vmford_vf_h, void, ptr, ptr, i64, ptr, env, i32)
58
+DEF_HELPER_6(vmford_vf_w, void, ptr, ptr, i64, ptr, env, i32)
59
+DEF_HELPER_6(vmford_vf_d, void, ptr, ptr, i64, ptr, env, i32)
60
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
61
index XXXXXXX..XXXXXXX 100644
62
--- a/target/riscv/insn32.decode
63
+++ b/target/riscv/insn32.decode
64
@@ -XXX,XX +XXX,XX @@ vfsgnjn_vv 001001 . ..... ..... 001 ..... 1010111 @r_vm
65
vfsgnjn_vf 001001 . ..... ..... 101 ..... 1010111 @r_vm
66
vfsgnjx_vv 001010 . ..... ..... 001 ..... 1010111 @r_vm
67
vfsgnjx_vf 001010 . ..... ..... 101 ..... 1010111 @r_vm
68
+vmfeq_vv 011000 . ..... ..... 001 ..... 1010111 @r_vm
69
+vmfeq_vf 011000 . ..... ..... 101 ..... 1010111 @r_vm
70
+vmfne_vv 011100 . ..... ..... 001 ..... 1010111 @r_vm
71
+vmfne_vf 011100 . ..... ..... 101 ..... 1010111 @r_vm
72
+vmflt_vv 011011 . ..... ..... 001 ..... 1010111 @r_vm
73
+vmflt_vf 011011 . ..... ..... 101 ..... 1010111 @r_vm
74
+vmfle_vv 011001 . ..... ..... 001 ..... 1010111 @r_vm
75
+vmfle_vf 011001 . ..... ..... 101 ..... 1010111 @r_vm
76
+vmfgt_vf 011101 . ..... ..... 101 ..... 1010111 @r_vm
77
+vmfge_vf 011111 . ..... ..... 101 ..... 1010111 @r_vm
78
+vmford_vv 011010 . ..... ..... 001 ..... 1010111 @r_vm
79
+vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm
80
81
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
82
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
83
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
84
index XXXXXXX..XXXXXXX 100644
85
--- a/target/riscv/insn_trans/trans_rvv.inc.c
86
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
87
@@ -XXX,XX +XXX,XX @@ GEN_OPFVV_TRANS(vfsgnjx_vv, opfvv_check)
88
GEN_OPFVF_TRANS(vfsgnj_vf, opfvf_check)
89
GEN_OPFVF_TRANS(vfsgnjn_vf, opfvf_check)
90
GEN_OPFVF_TRANS(vfsgnjx_vf, opfvf_check)
91
+
92
+/* Vector Floating-Point Compare Instructions */
93
+static bool opfvv_cmp_check(DisasContext *s, arg_rmrr *a)
94
+{
95
+ return (vext_check_isa_ill(s) &&
96
+ vext_check_reg(s, a->rs2, false) &&
97
+ vext_check_reg(s, a->rs1, false) &&
98
+ (s->sew != 0) &&
99
+ ((vext_check_overlap_group(a->rd, 1, a->rs1, 1 << s->lmul) &&
100
+ vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)) ||
101
+ (s->lmul == 0)));
102
+}
103
+
104
+GEN_OPFVV_TRANS(vmfeq_vv, opfvv_cmp_check)
105
+GEN_OPFVV_TRANS(vmfne_vv, opfvv_cmp_check)
106
+GEN_OPFVV_TRANS(vmflt_vv, opfvv_cmp_check)
107
+GEN_OPFVV_TRANS(vmfle_vv, opfvv_cmp_check)
108
+GEN_OPFVV_TRANS(vmford_vv, opfvv_cmp_check)
109
+
110
+static bool opfvf_cmp_check(DisasContext *s, arg_rmrr *a)
111
+{
112
+ return (vext_check_isa_ill(s) &&
113
+ vext_check_reg(s, a->rs2, false) &&
114
+ (s->sew != 0) &&
115
+ (vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul) ||
116
+ (s->lmul == 0)));
117
+}
118
+
119
+GEN_OPFVF_TRANS(vmfeq_vf, opfvf_cmp_check)
120
+GEN_OPFVF_TRANS(vmfne_vf, opfvf_cmp_check)
121
+GEN_OPFVF_TRANS(vmflt_vf, opfvf_cmp_check)
122
+GEN_OPFVF_TRANS(vmfle_vf, opfvf_cmp_check)
123
+GEN_OPFVF_TRANS(vmfgt_vf, opfvf_cmp_check)
124
+GEN_OPFVF_TRANS(vmfge_vf, opfvf_cmp_check)
125
+GEN_OPFVF_TRANS(vmford_vf, opfvf_cmp_check)
126
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
127
index XXXXXXX..XXXXXXX 100644
128
--- a/target/riscv/vector_helper.c
129
+++ b/target/riscv/vector_helper.c
130
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
131
GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2, clearh)
132
GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4, clearl)
133
GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8, clearq)
134
+
135
+/* Vector Floating-Point Compare Instructions */
136
+#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
137
+void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
138
+ CPURISCVState *env, uint32_t desc) \
139
+{ \
140
+ uint32_t mlen = vext_mlen(desc); \
141
+ uint32_t vm = vext_vm(desc); \
142
+ uint32_t vl = env->vl; \
143
+ uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \
144
+ uint32_t i; \
145
+ \
146
+ for (i = 0; i < vl; i++) { \
147
+ ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
148
+ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
149
+ if (!vm && !vext_elem_mask(v0, mlen, i)) { \
150
+ continue; \
151
+ } \
152
+ vext_set_elem_mask(vd, mlen, i, \
153
+ DO_OP(s2, s1, &env->fp_status)); \
154
+ } \
155
+ for (; i < vlmax; i++) { \
156
+ vext_set_elem_mask(vd, mlen, i, 0); \
157
+ } \
158
+}
159
+
160
+static bool float16_eq_quiet(uint16_t a, uint16_t b, float_status *s)
161
+{
162
+ FloatRelation compare = float16_compare_quiet(a, b, s);
163
+ return compare == float_relation_equal;
164
+}
165
+
166
+GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
167
+GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
168
+GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
169
+
170
+#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
171
+void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
172
+ CPURISCVState *env, uint32_t desc) \
173
+{ \
174
+ uint32_t mlen = vext_mlen(desc); \
175
+ uint32_t vm = vext_vm(desc); \
176
+ uint32_t vl = env->vl; \
177
+ uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \
178
+ uint32_t i; \
179
+ \
180
+ for (i = 0; i < vl; i++) { \
181
+ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
182
+ if (!vm && !vext_elem_mask(v0, mlen, i)) { \
183
+ continue; \
184
+ } \
185
+ vext_set_elem_mask(vd, mlen, i, \
186
+ DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
187
+ } \
188
+ for (; i < vlmax; i++) { \
189
+ vext_set_elem_mask(vd, mlen, i, 0); \
190
+ } \
191
+}
192
+
193
+GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
194
+GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
195
+GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
196
+
197
+static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
198
+{
199
+ FloatRelation compare = float16_compare_quiet(a, b, s);
200
+ return compare != float_relation_equal;
201
+}
202
+
203
+static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
204
+{
205
+ FloatRelation compare = float32_compare_quiet(a, b, s);
206
+ return compare != float_relation_equal;
207
+}
208
+
209
+static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
210
+{
211
+ FloatRelation compare = float64_compare_quiet(a, b, s);
212
+ return compare != float_relation_equal;
213
+}
214
+
215
+GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
216
+GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
217
+GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
218
+GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
219
+GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
220
+GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
221
+
222
+static bool float16_lt(uint16_t a, uint16_t b, float_status *s)
223
+{
224
+ FloatRelation compare = float16_compare(a, b, s);
225
+ return compare == float_relation_less;
226
+}
227
+
228
+GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
229
+GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
230
+GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
231
+GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
232
+GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
233
+GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
234
+
235
+static bool float16_le(uint16_t a, uint16_t b, float_status *s)
236
+{
237
+ FloatRelation compare = float16_compare(a, b, s);
238
+ return compare == float_relation_less ||
239
+ compare == float_relation_equal;
240
+}
241
+
242
+GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
243
+GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
244
+GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
245
+GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
246
+GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
247
+GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
248
+
249
+static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
250
+{
251
+ FloatRelation compare = float16_compare(a, b, s);
252
+ return compare == float_relation_greater;
253
+}
254
+
255
+static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
256
+{
257
+ FloatRelation compare = float32_compare(a, b, s);
258
+ return compare == float_relation_greater;
259
+}
260
+
261
+static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
262
+{
263
+ FloatRelation compare = float64_compare(a, b, s);
264
+ return compare == float_relation_greater;
265
+}
266
+
267
+GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
268
+GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
269
+GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
270
+
271
+static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
272
+{
273
+ FloatRelation compare = float16_compare(a, b, s);
274
+ return compare == float_relation_greater ||
275
+ compare == float_relation_equal;
276
+}
277
+
278
+static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
279
+{
280
+ FloatRelation compare = float32_compare(a, b, s);
281
+ return compare == float_relation_greater ||
282
+ compare == float_relation_equal;
283
+}
284
+
285
+static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
286
+{
287
+ FloatRelation compare = float64_compare(a, b, s);
288
+ return compare == float_relation_greater ||
289
+ compare == float_relation_equal;
290
+}
291
+
292
+GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
293
+GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
294
+GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
295
+
296
+static bool float16_unordered_quiet(uint16_t a, uint16_t b, float_status *s)
297
+{
298
+ FloatRelation compare = float16_compare_quiet(a, b, s);
299
+ return compare == float_relation_unordered;
300
+}
301
+
302
+GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet)
303
+GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet)
304
+GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet)
305
+GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet)
306
+GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet)
307
+GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet)
308
--
34
--
309
2.27.0
35
2.41.0
310
311
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Weiwei Li <liweiwei@iscas.ac.cn>
2
2
3
The v0.7.1 specification does not define vector status within mstatus.
3
The Svadu specification updated the name of the *envcfg bit from
4
A future revision will define the privileged portion of the vector status.
4
HADE to ADUE.
5
5
6
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
6
Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn>
7
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
7
Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
9
Message-id: 20200623215920.2594-4-zhiwei_liu@c-sky.com
9
Message-ID: <20230816141916.66898-1-liweiwei@iscas.ac.cn>
10
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
10
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
11
---
11
---
12
target/riscv/cpu_bits.h | 15 +++++++++
12
target/riscv/cpu_bits.h | 8 ++++----
13
target/riscv/csr.c | 75 ++++++++++++++++++++++++++++++++++++++++-
13
target/riscv/cpu.c | 4 ++--
14
2 files changed, 89 insertions(+), 1 deletion(-)
14
target/riscv/cpu_helper.c | 6 +++---
15
target/riscv/csr.c | 12 ++++++------
16
4 files changed, 15 insertions(+), 15 deletions(-)
15
17
16
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
18
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
17
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
18
--- a/target/riscv/cpu_bits.h
20
--- a/target/riscv/cpu_bits.h
19
+++ b/target/riscv/cpu_bits.h
21
+++ b/target/riscv/cpu_bits.h
20
@@ -XXX,XX +XXX,XX @@
22
@@ -XXX,XX +XXX,XX @@ typedef enum RISCVException {
21
#define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT)
23
#define MENVCFG_CBIE (3UL << 4)
22
#define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA)
24
#define MENVCFG_CBCFE BIT(6)
23
25
#define MENVCFG_CBZE BIT(7)
24
+/* Vector Fixed-Point round model */
26
-#define MENVCFG_HADE (1ULL << 61)
25
+#define FSR_VXRM_SHIFT 9
27
+#define MENVCFG_ADUE (1ULL << 61)
26
+#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT)
28
#define MENVCFG_PBMTE (1ULL << 62)
27
+
29
#define MENVCFG_STCE (1ULL << 63)
28
+/* Vector Fixed-Point saturation flag */
30
29
+#define FSR_VXSAT_SHIFT 8
31
/* For RV32 */
30
+#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT)
32
-#define MENVCFGH_HADE BIT(29)
31
+
33
+#define MENVCFGH_ADUE BIT(29)
32
/* Control and Status Registers */
34
#define MENVCFGH_PBMTE BIT(30)
33
35
#define MENVCFGH_STCE BIT(31)
34
/* User Trap Setup */
36
35
@@ -XXX,XX +XXX,XX @@
37
@@ -XXX,XX +XXX,XX @@ typedef enum RISCVException {
36
#define CSR_FRM 0x002
38
#define HENVCFG_CBIE MENVCFG_CBIE
37
#define CSR_FCSR 0x003
39
#define HENVCFG_CBCFE MENVCFG_CBCFE
38
40
#define HENVCFG_CBZE MENVCFG_CBZE
39
+/* User Vector CSRs */
41
-#define HENVCFG_HADE MENVCFG_HADE
40
+#define CSR_VSTART 0x008
42
+#define HENVCFG_ADUE MENVCFG_ADUE
41
+#define CSR_VXSAT 0x009
43
#define HENVCFG_PBMTE MENVCFG_PBMTE
42
+#define CSR_VXRM 0x00a
44
#define HENVCFG_STCE MENVCFG_STCE
43
+#define CSR_VL 0xc20
45
44
+#define CSR_VTYPE 0xc21
46
/* For RV32 */
45
+
47
-#define HENVCFGH_HADE MENVCFGH_HADE
46
/* User Timers and Counters */
48
+#define HENVCFGH_ADUE MENVCFGH_ADUE
47
#define CSR_CYCLE 0xc00
49
#define HENVCFGH_PBMTE MENVCFGH_PBMTE
48
#define CSR_TIME 0xc01
50
#define HENVCFGH_STCE MENVCFGH_STCE
51
52
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/target/riscv/cpu.c
55
+++ b/target/riscv/cpu.c
56
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_reset_hold(Object *obj)
57
env->two_stage_lookup = false;
58
59
env->menvcfg = (cpu->cfg.ext_svpbmt ? MENVCFG_PBMTE : 0) |
60
- (cpu->cfg.ext_svadu ? MENVCFG_HADE : 0);
61
+ (cpu->cfg.ext_svadu ? MENVCFG_ADUE : 0);
62
env->henvcfg = (cpu->cfg.ext_svpbmt ? HENVCFG_PBMTE : 0) |
63
- (cpu->cfg.ext_svadu ? HENVCFG_HADE : 0);
64
+ (cpu->cfg.ext_svadu ? HENVCFG_ADUE : 0);
65
66
/* Initialized default priorities of local interrupts. */
67
for (i = 0; i < ARRAY_SIZE(env->miprio); i++) {
68
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/target/riscv/cpu_helper.c
71
+++ b/target/riscv/cpu_helper.c
72
@@ -XXX,XX +XXX,XX @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical,
73
}
74
75
bool pbmte = env->menvcfg & MENVCFG_PBMTE;
76
- bool hade = env->menvcfg & MENVCFG_HADE;
77
+ bool adue = env->menvcfg & MENVCFG_ADUE;
78
79
if (first_stage && two_stage && env->virt_enabled) {
80
pbmte = pbmte && (env->henvcfg & HENVCFG_PBMTE);
81
- hade = hade && (env->henvcfg & HENVCFG_HADE);
82
+ adue = adue && (env->henvcfg & HENVCFG_ADUE);
83
}
84
85
int ptshift = (levels - 1) * ptidxbits;
86
@@ -XXX,XX +XXX,XX @@ restart:
87
88
/* Page table updates need to be atomic with MTTCG enabled */
89
if (updated_pte != pte && !is_debug) {
90
- if (!hade) {
91
+ if (!adue) {
92
return TRANSLATE_FAIL;
93
}
94
49
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
95
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
50
index XXXXXXX..XXXXXXX 100644
96
index XXXXXXX..XXXXXXX 100644
51
--- a/target/riscv/csr.c
97
--- a/target/riscv/csr.c
52
+++ b/target/riscv/csr.c
98
+++ b/target/riscv/csr.c
53
@@ -XXX,XX +XXX,XX @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops)
99
@@ -XXX,XX +XXX,XX @@ static RISCVException write_menvcfg(CPURISCVState *env, int csrno,
54
static int fs(CPURISCVState *env, int csrno)
100
if (riscv_cpu_mxl(env) == MXL_RV64) {
101
mask |= (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) |
102
(cfg->ext_sstc ? MENVCFG_STCE : 0) |
103
- (cfg->ext_svadu ? MENVCFG_HADE : 0);
104
+ (cfg->ext_svadu ? MENVCFG_ADUE : 0);
105
}
106
env->menvcfg = (env->menvcfg & ~mask) | (val & mask);
107
108
@@ -XXX,XX +XXX,XX @@ static RISCVException write_menvcfgh(CPURISCVState *env, int csrno,
109
const RISCVCPUConfig *cfg = riscv_cpu_cfg(env);
110
uint64_t mask = (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) |
111
(cfg->ext_sstc ? MENVCFG_STCE : 0) |
112
- (cfg->ext_svadu ? MENVCFG_HADE : 0);
113
+ (cfg->ext_svadu ? MENVCFG_ADUE : 0);
114
uint64_t valh = (uint64_t)val << 32;
115
116
env->menvcfg = (env->menvcfg & ~mask) | (valh & mask);
117
@@ -XXX,XX +XXX,XX @@ static RISCVException read_henvcfg(CPURISCVState *env, int csrno,
118
* henvcfg.stce is read_only 0 when menvcfg.stce = 0
119
* henvcfg.hade is read_only 0 when menvcfg.hade = 0
120
*/
121
- *val = env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_HADE) |
122
+ *val = env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE) |
123
env->menvcfg);
124
return RISCV_EXCP_NONE;
125
}
126
@@ -XXX,XX +XXX,XX @@ static RISCVException write_henvcfg(CPURISCVState *env, int csrno,
127
}
128
129
if (riscv_cpu_mxl(env) == MXL_RV64) {
130
- mask |= env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_HADE);
131
+ mask |= env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE);
132
}
133
134
env->henvcfg = (env->henvcfg & ~mask) | (val & mask);
135
@@ -XXX,XX +XXX,XX @@ static RISCVException read_henvcfgh(CPURISCVState *env, int csrno,
136
return ret;
137
}
138
139
- *val = (env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_HADE) |
140
+ *val = (env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE) |
141
env->menvcfg)) >> 32;
142
return RISCV_EXCP_NONE;
143
}
144
@@ -XXX,XX +XXX,XX @@ static RISCVException write_henvcfgh(CPURISCVState *env, int csrno,
145
target_ulong val)
55
{
146
{
56
#if !defined(CONFIG_USER_ONLY)
147
uint64_t mask = env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE |
57
+ /* loose check condition for fcsr in vector extension */
148
- HENVCFG_HADE);
58
+ if ((csrno == CSR_FCSR) && (env->misa & RVV)) {
149
+ HENVCFG_ADUE);
59
+ return 0;
150
uint64_t valh = (uint64_t)val << 32;
60
+ }
151
RISCVException ret;
61
if (!env->debugger && !riscv_cpu_fp_enabled(env)) {
152
62
return -1;
63
}
64
@@ -XXX,XX +XXX,XX @@ static int fs(CPURISCVState *env, int csrno)
65
return 0;
66
}
67
68
+static int vs(CPURISCVState *env, int csrno)
69
+{
70
+ if (env->misa & RVV) {
71
+ return 0;
72
+ }
73
+ return -1;
74
+}
75
+
76
static int ctr(CPURISCVState *env, int csrno)
77
{
78
#if !defined(CONFIG_USER_ONLY)
79
@@ -XXX,XX +XXX,XX @@ static int read_fcsr(CPURISCVState *env, int csrno, target_ulong *val)
80
#endif
81
*val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
82
| (env->frm << FSR_RD_SHIFT);
83
+ if (vs(env, csrno) >= 0) {
84
+ *val |= (env->vxrm << FSR_VXRM_SHIFT)
85
+ | (env->vxsat << FSR_VXSAT_SHIFT);
86
+ }
87
return 0;
88
}
89
90
@@ -XXX,XX +XXX,XX @@ static int write_fcsr(CPURISCVState *env, int csrno, target_ulong val)
91
env->mstatus |= MSTATUS_FS;
92
#endif
93
env->frm = (val & FSR_RD) >> FSR_RD_SHIFT;
94
+ if (vs(env, csrno) >= 0) {
95
+ env->vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT;
96
+ env->vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT;
97
+ }
98
riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT);
99
return 0;
100
}
101
102
+static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val)
103
+{
104
+ *val = env->vtype;
105
+ return 0;
106
+}
107
+
108
+static int read_vl(CPURISCVState *env, int csrno, target_ulong *val)
109
+{
110
+ *val = env->vl;
111
+ return 0;
112
+}
113
+
114
+static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val)
115
+{
116
+ *val = env->vxrm;
117
+ return 0;
118
+}
119
+
120
+static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val)
121
+{
122
+ env->vxrm = val;
123
+ return 0;
124
+}
125
+
126
+static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val)
127
+{
128
+ *val = env->vxsat;
129
+ return 0;
130
+}
131
+
132
+static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val)
133
+{
134
+ env->vxsat = val;
135
+ return 0;
136
+}
137
+
138
+static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val)
139
+{
140
+ *val = env->vstart;
141
+ return 0;
142
+}
143
+
144
+static int write_vstart(CPURISCVState *env, int csrno, target_ulong val)
145
+{
146
+ env->vstart = val;
147
+ return 0;
148
+}
149
+
150
/* User Timers and Counters */
151
static int read_instret(CPURISCVState *env, int csrno, target_ulong *val)
152
{
153
@@ -XXX,XX +XXX,XX @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
154
[CSR_FFLAGS] = { fs, read_fflags, write_fflags },
155
[CSR_FRM] = { fs, read_frm, write_frm },
156
[CSR_FCSR] = { fs, read_fcsr, write_fcsr },
157
-
158
+ /* Vector CSRs */
159
+ [CSR_VSTART] = { vs, read_vstart, write_vstart },
160
+ [CSR_VXSAT] = { vs, read_vxsat, write_vxsat },
161
+ [CSR_VXRM] = { vs, read_vxrm, write_vxrm },
162
+ [CSR_VL] = { vs, read_vl },
163
+ [CSR_VTYPE] = { vs, read_vtype },
164
/* User Timers and Counters */
165
[CSR_CYCLE] = { ctr, read_instret },
166
[CSR_INSTRET] = { ctr, read_instret },
167
--
153
--
168
2.27.0
154
2.41.0
169
170
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
In the same emulated RISC-V host, the 'host' KVM CPU takes 4 times
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
longer to boot than the 'rv64' KVM CPU.
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
6
Message-id: 20200623215920.2594-25-zhiwei_liu@c-sky.com
6
The reason is an unintended behavior of riscv_cpu_satp_mode_finalize()
7
when satp_mode.supported = 0, i.e. when cpu_init() does not set
8
satp_mode_max_supported(). satp_mode_max_from_map(map) does:
9
10
31 - __builtin_clz(map)
11
12
This means that, if satp_mode.supported = 0, satp_mode_supported_max
13
wil be '31 - 32'. But this is C, so satp_mode_supported_max will gladly
14
set it to UINT_MAX (4294967295). After that, if the user didn't set a
15
satp_mode, set_satp_mode_default_map(cpu) will make
16
17
cfg.satp_mode.map = cfg.satp_mode.supported
18
19
So satp_mode.map = 0. And then satp_mode_map_max will be set to
20
satp_mode_max_from_map(cpu->cfg.satp_mode.map), i.e. also UINT_MAX. The
21
guard "satp_mode_map_max > satp_mode_supported_max" doesn't protect us
22
here since both are UINT_MAX.
23
24
And finally we have 2 loops:
25
26
for (int i = satp_mode_map_max - 1; i >= 0; --i) {
27
28
Which are, in fact, 2 loops from UINT_MAX -1 to -1. This is where the
29
extra delay when booting the 'host' CPU is coming from.
30
31
Commit 43d1de32f8 already set a precedence for satp_mode.supported = 0
32
in a different manner. We're doing the same here. If supported == 0,
33
interpret as 'the CPU wants the OS to handle satp mode alone' and skip
34
satp_mode_finalize().
35
36
We'll also put a guard in satp_mode_max_from_map() to assert out if map
37
is 0 since the function is not ready to deal with it.
38
39
Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
40
Fixes: 6f23aaeb9b ("riscv: Allow user to set the satp mode")
41
Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
42
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
43
Message-ID: <20230817152903.694926-1-dbarboza@ventanamicro.com>
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
44
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
45
---
9
target/riscv/helper.h | 33 ++
46
target/riscv/cpu.c | 23 ++++++++++++++++++++---
10
target/riscv/insn32.decode | 10 +
47
1 file changed, 20 insertions(+), 3 deletions(-)
11
target/riscv/insn_trans/trans_rvv.inc.c | 16 +
12
target/riscv/vector_helper.c | 385 ++++++++++++++++++++++++
13
4 files changed, 444 insertions(+)
14
48
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
49
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
16
index XXXXXXX..XXXXXXX 100644
50
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
51
--- a/target/riscv/cpu.c
18
+++ b/target/riscv/helper.h
52
+++ b/target/riscv/cpu.c
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vmv_v_x_b, void, ptr, i64, env, i32)
53
@@ -XXX,XX +XXX,XX @@ static uint8_t satp_mode_from_str(const char *satp_mode_str)
20
DEF_HELPER_4(vmv_v_x_h, void, ptr, i64, env, i32)
54
21
DEF_HELPER_4(vmv_v_x_w, void, ptr, i64, env, i32)
55
uint8_t satp_mode_max_from_map(uint32_t map)
22
DEF_HELPER_4(vmv_v_x_d, void, ptr, i64, env, i32)
56
{
57
+ /*
58
+ * 'map = 0' will make us return (31 - 32), which C will
59
+ * happily overflow to UINT_MAX. There's no good result to
60
+ * return if 'map = 0' (e.g. returning 0 will be ambiguous
61
+ * with the result for 'map = 1').
62
+ *
63
+ * Assert out if map = 0. Callers will have to deal with
64
+ * it outside of this function.
65
+ */
66
+ g_assert(map > 0);
23
+
67
+
24
+DEF_HELPER_6(vsaddu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
68
/* map here has at least one bit set, so no problem with clz */
25
+DEF_HELPER_6(vsaddu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
69
return 31 - __builtin_clz(map);
26
+DEF_HELPER_6(vsaddu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
70
}
27
+DEF_HELPER_6(vsaddu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
71
@@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
28
+DEF_HELPER_6(vsadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
72
static void riscv_cpu_satp_mode_finalize(RISCVCPU *cpu, Error **errp)
29
+DEF_HELPER_6(vsadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
73
{
30
+DEF_HELPER_6(vsadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
74
bool rv32 = riscv_cpu_mxl(&cpu->env) == MXL_RV32;
31
+DEF_HELPER_6(vsadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
75
- uint8_t satp_mode_map_max;
32
+DEF_HELPER_6(vssubu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
76
- uint8_t satp_mode_supported_max =
33
+DEF_HELPER_6(vssubu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
77
- satp_mode_max_from_map(cpu->cfg.satp_mode.supported);
34
+DEF_HELPER_6(vssubu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
78
+ uint8_t satp_mode_map_max, satp_mode_supported_max;
35
+DEF_HELPER_6(vssubu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
36
+DEF_HELPER_6(vssub_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
37
+DEF_HELPER_6(vssub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
38
+DEF_HELPER_6(vssub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
39
+DEF_HELPER_6(vssub_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
40
+DEF_HELPER_6(vsaddu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
41
+DEF_HELPER_6(vsaddu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
42
+DEF_HELPER_6(vsaddu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
43
+DEF_HELPER_6(vsaddu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
44
+DEF_HELPER_6(vsadd_vx_b, void, ptr, ptr, tl, ptr, env, i32)
45
+DEF_HELPER_6(vsadd_vx_h, void, ptr, ptr, tl, ptr, env, i32)
46
+DEF_HELPER_6(vsadd_vx_w, void, ptr, ptr, tl, ptr, env, i32)
47
+DEF_HELPER_6(vsadd_vx_d, void, ptr, ptr, tl, ptr, env, i32)
48
+DEF_HELPER_6(vssubu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
49
+DEF_HELPER_6(vssubu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
50
+DEF_HELPER_6(vssubu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
51
+DEF_HELPER_6(vssubu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
52
+DEF_HELPER_6(vssub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
53
+DEF_HELPER_6(vssub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
54
+DEF_HELPER_6(vssub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
55
+DEF_HELPER_6(vssub_vx_d, void, ptr, ptr, tl, ptr, env, i32)
56
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
57
index XXXXXXX..XXXXXXX 100644
58
--- a/target/riscv/insn32.decode
59
+++ b/target/riscv/insn32.decode
60
@@ -XXX,XX +XXX,XX @@ vmv_v_i 010111 1 00000 ..... 011 ..... 1010111 @r2
61
vmerge_vvm 010111 0 ..... ..... 000 ..... 1010111 @r_vm_0
62
vmerge_vxm 010111 0 ..... ..... 100 ..... 1010111 @r_vm_0
63
vmerge_vim 010111 0 ..... ..... 011 ..... 1010111 @r_vm_0
64
+vsaddu_vv 100000 . ..... ..... 000 ..... 1010111 @r_vm
65
+vsaddu_vx 100000 . ..... ..... 100 ..... 1010111 @r_vm
66
+vsaddu_vi 100000 . ..... ..... 011 ..... 1010111 @r_vm
67
+vsadd_vv 100001 . ..... ..... 000 ..... 1010111 @r_vm
68
+vsadd_vx 100001 . ..... ..... 100 ..... 1010111 @r_vm
69
+vsadd_vi 100001 . ..... ..... 011 ..... 1010111 @r_vm
70
+vssubu_vv 100010 . ..... ..... 000 ..... 1010111 @r_vm
71
+vssubu_vx 100010 . ..... ..... 100 ..... 1010111 @r_vm
72
+vssub_vv 100011 . ..... ..... 000 ..... 1010111 @r_vm
73
+vssub_vx 100011 . ..... ..... 100 ..... 1010111 @r_vm
74
75
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
76
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
77
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
78
index XXXXXXX..XXXXXXX 100644
79
--- a/target/riscv/insn_trans/trans_rvv.inc.c
80
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
81
@@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a)
82
GEN_OPIVV_TRANS(vmerge_vvm, opivv_vadc_check)
83
GEN_OPIVX_TRANS(vmerge_vxm, opivx_vadc_check)
84
GEN_OPIVI_TRANS(vmerge_vim, 0, vmerge_vxm, opivx_vadc_check)
85
+
79
+
86
+/*
80
+ /* The CPU wants the OS to decide which satp mode to use */
87
+ *** Vector Fixed-Point Arithmetic Instructions
81
+ if (cpu->cfg.satp_mode.supported == 0) {
88
+ */
82
+ return;
89
+
90
+/* Vector Single-Width Saturating Add and Subtract */
91
+GEN_OPIVV_TRANS(vsaddu_vv, opivv_check)
92
+GEN_OPIVV_TRANS(vsadd_vv, opivv_check)
93
+GEN_OPIVV_TRANS(vssubu_vv, opivv_check)
94
+GEN_OPIVV_TRANS(vssub_vv, opivv_check)
95
+GEN_OPIVX_TRANS(vsaddu_vx, opivx_check)
96
+GEN_OPIVX_TRANS(vsadd_vx, opivx_check)
97
+GEN_OPIVX_TRANS(vssubu_vx, opivx_check)
98
+GEN_OPIVX_TRANS(vssub_vx, opivx_check)
99
+GEN_OPIVI_TRANS(vsaddu_vi, 1, vsaddu_vx, opivx_check)
100
+GEN_OPIVI_TRANS(vsadd_vi, 0, vsadd_vx, opivx_check)
101
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
102
index XXXXXXX..XXXXXXX 100644
103
--- a/target/riscv/vector_helper.c
104
+++ b/target/riscv/vector_helper.c
105
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1, clearb)
106
GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2, clearh)
107
GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4, clearl)
108
GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8, clearq)
109
+
110
+/*
111
+ *** Vector Fixed-Point Arithmetic Instructions
112
+ */
113
+
114
+/* Vector Single-Width Saturating Add and Subtract */
115
+
116
+/*
117
+ * As fixed point instructions probably have round mode and saturation,
118
+ * define common macros for fixed point here.
119
+ */
120
+typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
121
+ CPURISCVState *env, int vxrm);
122
+
123
+#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
124
+static inline void \
125
+do_##NAME(void *vd, void *vs1, void *vs2, int i, \
126
+ CPURISCVState *env, int vxrm) \
127
+{ \
128
+ TX1 s1 = *((T1 *)vs1 + HS1(i)); \
129
+ TX2 s2 = *((T2 *)vs2 + HS2(i)); \
130
+ *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
131
+}
132
+
133
+static inline void
134
+vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
135
+ CPURISCVState *env,
136
+ uint32_t vl, uint32_t vm, uint32_t mlen, int vxrm,
137
+ opivv2_rm_fn *fn)
138
+{
139
+ for (uint32_t i = 0; i < vl; i++) {
140
+ if (!vm && !vext_elem_mask(v0, mlen, i)) {
141
+ continue;
142
+ }
143
+ fn(vd, vs1, vs2, i, env, vxrm);
144
+ }
145
+}
146
+
147
+static inline void
148
+vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
149
+ CPURISCVState *env,
150
+ uint32_t desc, uint32_t esz, uint32_t dsz,
151
+ opivv2_rm_fn *fn, clear_fn *clearfn)
152
+{
153
+ uint32_t vlmax = vext_maxsz(desc) / esz;
154
+ uint32_t mlen = vext_mlen(desc);
155
+ uint32_t vm = vext_vm(desc);
156
+ uint32_t vl = env->vl;
157
+
158
+ switch (env->vxrm) {
159
+ case 0: /* rnu */
160
+ vext_vv_rm_1(vd, v0, vs1, vs2,
161
+ env, vl, vm, mlen, 0, fn);
162
+ break;
163
+ case 1: /* rne */
164
+ vext_vv_rm_1(vd, v0, vs1, vs2,
165
+ env, vl, vm, mlen, 1, fn);
166
+ break;
167
+ case 2: /* rdn */
168
+ vext_vv_rm_1(vd, v0, vs1, vs2,
169
+ env, vl, vm, mlen, 2, fn);
170
+ break;
171
+ default: /* rod */
172
+ vext_vv_rm_1(vd, v0, vs1, vs2,
173
+ env, vl, vm, mlen, 3, fn);
174
+ break;
175
+ }
83
+ }
176
+
84
+
177
+ clearfn(vd, vl, vl * dsz, vlmax * dsz);
85
+ satp_mode_supported_max =
178
+}
86
+ satp_mode_max_from_map(cpu->cfg.satp_mode.supported);
179
+
87
180
+/* generate helpers for fixed point instructions with OPIVV format */
88
if (cpu->cfg.satp_mode.map == 0) {
181
+#define GEN_VEXT_VV_RM(NAME, ESZ, DSZ, CLEAR_FN) \
89
if (cpu->cfg.satp_mode.init == 0) {
182
+void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
183
+ CPURISCVState *env, uint32_t desc) \
184
+{ \
185
+ vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \
186
+ do_##NAME, CLEAR_FN); \
187
+}
188
+
189
+static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
190
+{
191
+ uint8_t res = a + b;
192
+ if (res < a) {
193
+ res = UINT8_MAX;
194
+ env->vxsat = 0x1;
195
+ }
196
+ return res;
197
+}
198
+
199
+static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
200
+ uint16_t b)
201
+{
202
+ uint16_t res = a + b;
203
+ if (res < a) {
204
+ res = UINT16_MAX;
205
+ env->vxsat = 0x1;
206
+ }
207
+ return res;
208
+}
209
+
210
+static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
211
+ uint32_t b)
212
+{
213
+ uint32_t res = a + b;
214
+ if (res < a) {
215
+ res = UINT32_MAX;
216
+ env->vxsat = 0x1;
217
+ }
218
+ return res;
219
+}
220
+
221
+static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
222
+ uint64_t b)
223
+{
224
+ uint64_t res = a + b;
225
+ if (res < a) {
226
+ res = UINT64_MAX;
227
+ env->vxsat = 0x1;
228
+ }
229
+ return res;
230
+}
231
+
232
+RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
233
+RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
234
+RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
235
+RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
236
+GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1, clearb)
237
+GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2, clearh)
238
+GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4, clearl)
239
+GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8, clearq)
240
+
241
+typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
242
+ CPURISCVState *env, int vxrm);
243
+
244
+#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
245
+static inline void \
246
+do_##NAME(void *vd, target_long s1, void *vs2, int i, \
247
+ CPURISCVState *env, int vxrm) \
248
+{ \
249
+ TX2 s2 = *((T2 *)vs2 + HS2(i)); \
250
+ *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
251
+}
252
+
253
+static inline void
254
+vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
255
+ CPURISCVState *env,
256
+ uint32_t vl, uint32_t vm, uint32_t mlen, int vxrm,
257
+ opivx2_rm_fn *fn)
258
+{
259
+ for (uint32_t i = 0; i < vl; i++) {
260
+ if (!vm && !vext_elem_mask(v0, mlen, i)) {
261
+ continue;
262
+ }
263
+ fn(vd, s1, vs2, i, env, vxrm);
264
+ }
265
+}
266
+
267
+static inline void
268
+vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
269
+ CPURISCVState *env,
270
+ uint32_t desc, uint32_t esz, uint32_t dsz,
271
+ opivx2_rm_fn *fn, clear_fn *clearfn)
272
+{
273
+ uint32_t vlmax = vext_maxsz(desc) / esz;
274
+ uint32_t mlen = vext_mlen(desc);
275
+ uint32_t vm = vext_vm(desc);
276
+ uint32_t vl = env->vl;
277
+
278
+ switch (env->vxrm) {
279
+ case 0: /* rnu */
280
+ vext_vx_rm_1(vd, v0, s1, vs2,
281
+ env, vl, vm, mlen, 0, fn);
282
+ break;
283
+ case 1: /* rne */
284
+ vext_vx_rm_1(vd, v0, s1, vs2,
285
+ env, vl, vm, mlen, 1, fn);
286
+ break;
287
+ case 2: /* rdn */
288
+ vext_vx_rm_1(vd, v0, s1, vs2,
289
+ env, vl, vm, mlen, 2, fn);
290
+ break;
291
+ default: /* rod */
292
+ vext_vx_rm_1(vd, v0, s1, vs2,
293
+ env, vl, vm, mlen, 3, fn);
294
+ break;
295
+ }
296
+
297
+ clearfn(vd, vl, vl * dsz, vlmax * dsz);
298
+}
299
+
300
+/* generate helpers for fixed point instructions with OPIVX format */
301
+#define GEN_VEXT_VX_RM(NAME, ESZ, DSZ, CLEAR_FN) \
302
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
303
+ void *vs2, CPURISCVState *env, uint32_t desc) \
304
+{ \
305
+ vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \
306
+ do_##NAME, CLEAR_FN); \
307
+}
308
+
309
+RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
310
+RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
311
+RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
312
+RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
313
+GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1, clearb)
314
+GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2, clearh)
315
+GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4, clearl)
316
+GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8, clearq)
317
+
318
+static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
319
+{
320
+ int8_t res = a + b;
321
+ if ((res ^ a) & (res ^ b) & INT8_MIN) {
322
+ res = a > 0 ? INT8_MAX : INT8_MIN;
323
+ env->vxsat = 0x1;
324
+ }
325
+ return res;
326
+}
327
+
328
+static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
329
+{
330
+ int16_t res = a + b;
331
+ if ((res ^ a) & (res ^ b) & INT16_MIN) {
332
+ res = a > 0 ? INT16_MAX : INT16_MIN;
333
+ env->vxsat = 0x1;
334
+ }
335
+ return res;
336
+}
337
+
338
+static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
339
+{
340
+ int32_t res = a + b;
341
+ if ((res ^ a) & (res ^ b) & INT32_MIN) {
342
+ res = a > 0 ? INT32_MAX : INT32_MIN;
343
+ env->vxsat = 0x1;
344
+ }
345
+ return res;
346
+}
347
+
348
+static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
349
+{
350
+ int64_t res = a + b;
351
+ if ((res ^ a) & (res ^ b) & INT64_MIN) {
352
+ res = a > 0 ? INT64_MAX : INT64_MIN;
353
+ env->vxsat = 0x1;
354
+ }
355
+ return res;
356
+}
357
+
358
+RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
359
+RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
360
+RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
361
+RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
362
+GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1, clearb)
363
+GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2, clearh)
364
+GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4, clearl)
365
+GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8, clearq)
366
+
367
+RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
368
+RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
369
+RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
370
+RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
371
+GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1, clearb)
372
+GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2, clearh)
373
+GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4, clearl)
374
+GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8, clearq)
375
+
376
+static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
377
+{
378
+ uint8_t res = a - b;
379
+ if (res > a) {
380
+ res = 0;
381
+ env->vxsat = 0x1;
382
+ }
383
+ return res;
384
+}
385
+
386
+static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
387
+ uint16_t b)
388
+{
389
+ uint16_t res = a - b;
390
+ if (res > a) {
391
+ res = 0;
392
+ env->vxsat = 0x1;
393
+ }
394
+ return res;
395
+}
396
+
397
+static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
398
+ uint32_t b)
399
+{
400
+ uint32_t res = a - b;
401
+ if (res > a) {
402
+ res = 0;
403
+ env->vxsat = 0x1;
404
+ }
405
+ return res;
406
+}
407
+
408
+static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
409
+ uint64_t b)
410
+{
411
+ uint64_t res = a - b;
412
+ if (res > a) {
413
+ res = 0;
414
+ env->vxsat = 0x1;
415
+ }
416
+ return res;
417
+}
418
+
419
+RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
420
+RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
421
+RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
422
+RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
423
+GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1, clearb)
424
+GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2, clearh)
425
+GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4, clearl)
426
+GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8, clearq)
427
+
428
+RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
429
+RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
430
+RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
431
+RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
432
+GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1, clearb)
433
+GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2, clearh)
434
+GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4, clearl)
435
+GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8, clearq)
436
+
437
+static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
438
+{
439
+ int8_t res = a - b;
440
+ if ((res ^ a) & (a ^ b) & INT8_MIN) {
441
+ res = a > 0 ? INT8_MAX : INT8_MIN;
442
+ env->vxsat = 0x1;
443
+ }
444
+ return res;
445
+}
446
+
447
+static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
448
+{
449
+ int16_t res = a - b;
450
+ if ((res ^ a) & (a ^ b) & INT16_MIN) {
451
+ res = a > 0 ? INT16_MAX : INT16_MIN;
452
+ env->vxsat = 0x1;
453
+ }
454
+ return res;
455
+}
456
+
457
+static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
458
+{
459
+ int32_t res = a - b;
460
+ if ((res ^ a) & (a ^ b) & INT32_MIN) {
461
+ res = a > 0 ? INT32_MAX : INT32_MIN;
462
+ env->vxsat = 0x1;
463
+ }
464
+ return res;
465
+}
466
+
467
+static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
468
+{
469
+ int64_t res = a - b;
470
+ if ((res ^ a) & (a ^ b) & INT64_MIN) {
471
+ res = a > 0 ? INT64_MAX : INT64_MIN;
472
+ env->vxsat = 0x1;
473
+ }
474
+ return res;
475
+}
476
+
477
+RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
478
+RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
479
+RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
480
+RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
481
+GEN_VEXT_VV_RM(vssub_vv_b, 1, 1, clearb)
482
+GEN_VEXT_VV_RM(vssub_vv_h, 2, 2, clearh)
483
+GEN_VEXT_VV_RM(vssub_vv_w, 4, 4, clearl)
484
+GEN_VEXT_VV_RM(vssub_vv_d, 8, 8, clearq)
485
+
486
+RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
487
+RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
488
+RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
489
+RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
490
+GEN_VEXT_VX_RM(vssub_vx_b, 1, 1, clearb)
491
+GEN_VEXT_VX_RM(vssub_vx_h, 2, 2, clearh)
492
+GEN_VEXT_VX_RM(vssub_vx_w, 4, 4, clearl)
493
+GEN_VEXT_VX_RM(vssub_vx_d, 8, 8, clearq)
494
--
90
--
495
2.27.0
91
2.41.0
496
497
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Vineet Gupta <vineetg@rivosinc.com>
2
2
3
vlen is the vector register length in bits.
3
zicond is now codegen supported in both llvm and gcc.
4
elen is the max element size in bits.
5
vext_spec is the vector specification version, default value is v0.7.1.
6
4
7
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
5
This change allows seamless enabling/testing of zicond in downstream
6
projects. e.g. currently riscv-gnu-toolchain parses elf attributes
7
to create a cmdline for qemu but fails short of enabling it because of
8
the "x-" prefix.
9
10
Signed-off-by: Vineet Gupta <vineetg@rivosinc.com>
11
Message-ID: <20230808181715.436395-1-vineetg@rivosinc.com>
8
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
12
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-id: 20200623215920.2594-3-zhiwei_liu@c-sky.com
11
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
13
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
12
---
14
---
13
target/riscv/cpu.h | 5 +++++
15
target/riscv/cpu.c | 2 +-
14
target/riscv/cpu.c | 7 +++++++
16
1 file changed, 1 insertion(+), 1 deletion(-)
15
2 files changed, 12 insertions(+)
16
17
17
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/riscv/cpu.h
20
+++ b/target/riscv/cpu.h
21
@@ -XXX,XX +XXX,XX @@ enum {
22
#define PRIV_VERSION_1_10_0 0x00011000
23
#define PRIV_VERSION_1_11_0 0x00011100
24
25
+#define VEXT_VERSION_0_07_1 0x00000701
26
+
27
#define TRANSLATE_PMP_FAIL 2
28
#define TRANSLATE_FAIL 1
29
#define TRANSLATE_SUCCESS 0
30
@@ -XXX,XX +XXX,XX @@ struct CPURISCVState {
31
target_ulong guest_phys_fault_addr;
32
33
target_ulong priv_ver;
34
+ target_ulong vext_ver;
35
target_ulong misa;
36
target_ulong misa_mask;
37
38
@@ -XXX,XX +XXX,XX @@ typedef struct RISCVCPU {
39
40
char *priv_spec;
41
char *user_spec;
42
+ uint16_t vlen;
43
+ uint16_t elen;
44
bool mmu;
45
bool pmp;
46
} cfg;
47
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
18
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
48
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
49
--- a/target/riscv/cpu.c
20
--- a/target/riscv/cpu.c
50
+++ b/target/riscv/cpu.c
21
+++ b/target/riscv/cpu.c
51
@@ -XXX,XX +XXX,XX @@ static void set_priv_version(CPURISCVState *env, int priv_ver)
22
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = {
52
env->priv_ver = priv_ver;
23
DEFINE_PROP_BOOL("zcf", RISCVCPU, cfg.ext_zcf, false),
53
}
24
DEFINE_PROP_BOOL("zcmp", RISCVCPU, cfg.ext_zcmp, false),
54
25
DEFINE_PROP_BOOL("zcmt", RISCVCPU, cfg.ext_zcmt, false),
55
+static void set_vext_version(CPURISCVState *env, int vext_ver)
26
+ DEFINE_PROP_BOOL("zicond", RISCVCPU, cfg.ext_zicond, false),
56
+{
27
57
+ env->vext_ver = vext_ver;
28
/* Vendor-specific custom extensions */
58
+}
29
DEFINE_PROP_BOOL("xtheadba", RISCVCPU, cfg.ext_xtheadba, false),
59
+
30
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = {
60
static void set_feature(CPURISCVState *env, int feature)
31
DEFINE_PROP_BOOL("xventanacondops", RISCVCPU, cfg.ext_XVentanaCondOps, false),
61
{
32
62
env->features |= (1ULL << feature);
33
/* These are experimental so mark with 'x-' */
63
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
34
- DEFINE_PROP_BOOL("x-zicond", RISCVCPU, cfg.ext_zicond, false),
64
CPURISCVState *env = &cpu->env;
35
65
RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(dev);
36
/* ePMP 0.9.3 */
66
int priv_version = PRIV_VERSION_1_11_0;
37
DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false),
67
+ int vext_version = VEXT_VERSION_0_07_1;
68
target_ulong target_misa = 0;
69
Error *local_err = NULL;
70
71
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
72
}
73
74
set_priv_version(env, priv_version);
75
+ set_vext_version(env, vext_version);
76
77
if (cpu->cfg.mmu) {
78
set_feature(env, RISCV_FEATURE_MMU);
79
--
38
--
80
2.27.0
39
2.41.0
81
82
diff view generated by jsdifflib
1
From: Jessica Clarke <jrtc27@jrtc27.com>
1
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
2
2
3
Claiming an interrupt and changing the source priority both potentially
3
A build with --enable-debug and without KVM will fail as follows:
4
affect whether an interrupt is pending, thus we must re-compute xEIP.
5
Note that we don't put the sifive_plic_update inside sifive_plic_claim
6
so that the logging of a claim (and the resulting IRQ) happens before
7
the state update, making the causal effect clear, and that we drop the
8
explicit call to sifive_plic_print_state when claiming since
9
sifive_plic_update already does that automatically at the end for us.
10
4
11
This can result in both spurious interrupt storms if you fail to
5
/usr/bin/ld: libqemu-riscv64-softmmu.fa.p/hw_riscv_virt.c.o: in function `virt_machine_init':
12
complete an IRQ before enabling interrupts (and no other actions occur
6
./qemu/build/../hw/riscv/virt.c:1465: undefined reference to `kvm_riscv_aia_create'
13
that result in a call to sifive_plic_update), but also more importantly
14
lost interrupts if a disabled interrupt is pending and then becomes
15
enabled.
16
7
17
Signed-off-by: Jessica Clarke <jrtc27@jrtc27.com>
8
This happens because the code block with "if virt_use_kvm_aia(s)" isn't
18
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
9
being ignored by the debug build, resulting in an undefined reference to
19
Message-id: 20200618210649.22451-1-jrtc27@jrtc27.com
10
a KVM only function.
20
Message-Id: <20200618210649.22451-1-jrtc27@jrtc27.com>
11
12
Add a 'kvm_enabled()' conditional together with virt_use_kvm_aia() will
13
make the compiler crop the kvm_riscv_aia_create() call entirely from a
14
non-KVM build. Note that adding the 'kvm_enabled()' conditional inside
15
virt_use_kvm_aia() won't fix the build because this function would need
16
to be inlined multiple times to make the compiler zero out the entire
17
block.
18
19
While we're at it, use kvm_enabled() in all instances where
20
virt_use_kvm_aia() is checked to allow the compiler to elide these other
21
kvm-only instances as well.
22
23
Suggested-by: Richard Henderson <richard.henderson@linaro.org>
24
Fixes: dbdb99948e ("target/riscv: select KVM AIA in riscv virt machine")
25
Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
26
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
27
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
28
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
29
Message-ID: <20230830133503.711138-2-dbarboza@ventanamicro.com>
21
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
30
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
22
---
31
---
23
hw/riscv/sifive_plic.c | 3 ++-
32
hw/riscv/virt.c | 6 +++---
24
1 file changed, 2 insertions(+), 1 deletion(-)
33
1 file changed, 3 insertions(+), 3 deletions(-)
25
34
26
diff --git a/hw/riscv/sifive_plic.c b/hw/riscv/sifive_plic.c
35
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
27
index XXXXXXX..XXXXXXX 100644
36
index XXXXXXX..XXXXXXX 100644
28
--- a/hw/riscv/sifive_plic.c
37
--- a/hw/riscv/virt.c
29
+++ b/hw/riscv/sifive_plic.c
38
+++ b/hw/riscv/virt.c
30
@@ -XXX,XX +XXX,XX @@ static uint64_t sifive_plic_read(void *opaque, hwaddr addr, unsigned size)
39
@@ -XXX,XX +XXX,XX @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap,
31
plic->addr_config[addrid].hartid,
40
}
32
mode_to_char(plic->addr_config[addrid].mode),
41
33
value);
42
/* KVM AIA only has one APLIC instance */
34
- sifive_plic_print_state(plic);
43
- if (virt_use_kvm_aia(s)) {
35
}
44
+ if (kvm_enabled() && virt_use_kvm_aia(s)) {
36
+ sifive_plic_update(plic);
45
create_fdt_socket_aplic(s, memmap, 0,
37
return value;
46
msi_m_phandle, msi_s_phandle, phandle,
47
&intc_phandles[0], xplic_phandles,
48
@@ -XXX,XX +XXX,XX @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap,
49
50
g_free(intc_phandles);
51
52
- if (virt_use_kvm_aia(s)) {
53
+ if (kvm_enabled() && virt_use_kvm_aia(s)) {
54
*irq_mmio_phandle = xplic_phandles[0];
55
*irq_virtio_phandle = xplic_phandles[0];
56
*irq_pcie_phandle = xplic_phandles[0];
57
@@ -XXX,XX +XXX,XX @@ static void virt_machine_init(MachineState *machine)
38
}
58
}
39
}
59
}
40
@@ -XXX,XX +XXX,XX @@ static void sifive_plic_write(void *opaque, hwaddr addr, uint64_t value,
60
41
qemu_log("plic: write priority: irq=%d priority=%d\n",
61
- if (virt_use_kvm_aia(s)) {
42
irq, plic->source_priority[irq]);
62
+ if (kvm_enabled() && virt_use_kvm_aia(s)) {
43
}
63
kvm_riscv_aia_create(machine, IMSIC_MMIO_GROUP_MIN_SHIFT,
44
+ sifive_plic_update(plic);
64
VIRT_IRQCHIP_NUM_SOURCES, VIRT_IRQCHIP_NUM_MSIS,
45
return;
65
memmap[VIRT_APLIC_S].base,
46
} else if (addr >= plic->pending_base && /* 1 bit per source */
47
addr < plic->pending_base + (plic->num_sources >> 3))
48
--
66
--
49
2.27.0
67
2.41.0
50
68
51
69
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
Commit 6df0b37e2ab breaks a --enable-debug build in a non-KVM
4
environment with the following error:
5
6
/usr/bin/ld: libqemu-riscv64-softmmu.fa.p/hw_intc_riscv_aplic.c.o: in function `riscv_kvm_aplic_request':
7
./qemu/build/../hw/intc/riscv_aplic.c:486: undefined reference to `kvm_set_irq'
8
collect2: error: ld returned 1 exit status
9
10
This happens because the debug build will poke into the
11
'if (is_kvm_aia(aplic->msimode))' block and fail to find a reference to
12
the KVM only function riscv_kvm_aplic_request().
13
14
There are multiple solutions to fix this. We'll go with the same
15
solution from the previous patch, i.e. add a kvm_enabled() conditional
16
to filter out the block. But there's a catch: riscv_kvm_aplic_request()
17
is a local function that would end up being used if the compiler crops
18
the block, and this won't work. Quoting Richard Henderson's explanation
19
in [1]:
20
21
"(...) the compiler won't eliminate entire unused functions with -O0"
22
23
We'll solve it by moving riscv_kvm_aplic_request() to kvm.c and add its
24
declaration in kvm_riscv.h, where all other KVM specific public
25
functions are already declared. Other archs handles KVM specific code in
26
this manner and we expect to do the same from now on.
27
28
[1] https://lore.kernel.org/qemu-riscv/d2f1ad02-eb03-138f-9d08-db676deeed05@linaro.org/
29
30
Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
31
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
32
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
33
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
34
Message-ID: <20230830133503.711138-3-dbarboza@ventanamicro.com>
6
Message-id: 20200623215920.2594-39-zhiwei_liu@c-sky.com
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
35
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
36
---
9
target/riscv/helper.h | 19 ++++++
37
target/riscv/kvm_riscv.h | 1 +
10
target/riscv/insn32.decode | 6 ++
38
hw/intc/riscv_aplic.c | 8 ++------
11
target/riscv/insn_trans/trans_rvv.inc.c | 8 +++
39
target/riscv/kvm.c | 5 +++++
12
target/riscv/vector_helper.c | 85 +++++++++++++++++++++++++
40
3 files changed, 8 insertions(+), 6 deletions(-)
13
4 files changed, 118 insertions(+)
14
41
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
42
diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h
16
index XXXXXXX..XXXXXXX 100644
43
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
44
--- a/target/riscv/kvm_riscv.h
18
+++ b/target/riscv/helper.h
45
+++ b/target/riscv/kvm_riscv.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfmin_vf_d, void, ptr, ptr, i64, ptr, env, i32)
46
@@ -XXX,XX +XXX,XX @@ void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift,
20
DEF_HELPER_6(vfmax_vf_h, void, ptr, ptr, i64, ptr, env, i32)
47
uint64_t aia_irq_num, uint64_t aia_msi_num,
21
DEF_HELPER_6(vfmax_vf_w, void, ptr, ptr, i64, ptr, env, i32)
48
uint64_t aplic_base, uint64_t imsic_base,
22
DEF_HELPER_6(vfmax_vf_d, void, ptr, ptr, i64, ptr, env, i32)
49
uint64_t guest_num);
23
+
50
+void riscv_kvm_aplic_request(void *opaque, int irq, int level);
24
+DEF_HELPER_6(vfsgnj_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
51
25
+DEF_HELPER_6(vfsgnj_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
52
#endif
26
+DEF_HELPER_6(vfsgnj_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
53
diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c
27
+DEF_HELPER_6(vfsgnjn_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vfsgnjn_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vfsgnjn_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vfsgnjx_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_6(vfsgnjx_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_6(vfsgnjx_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
33
+DEF_HELPER_6(vfsgnj_vf_h, void, ptr, ptr, i64, ptr, env, i32)
34
+DEF_HELPER_6(vfsgnj_vf_w, void, ptr, ptr, i64, ptr, env, i32)
35
+DEF_HELPER_6(vfsgnj_vf_d, void, ptr, ptr, i64, ptr, env, i32)
36
+DEF_HELPER_6(vfsgnjn_vf_h, void, ptr, ptr, i64, ptr, env, i32)
37
+DEF_HELPER_6(vfsgnjn_vf_w, void, ptr, ptr, i64, ptr, env, i32)
38
+DEF_HELPER_6(vfsgnjn_vf_d, void, ptr, ptr, i64, ptr, env, i32)
39
+DEF_HELPER_6(vfsgnjx_vf_h, void, ptr, ptr, i64, ptr, env, i32)
40
+DEF_HELPER_6(vfsgnjx_vf_w, void, ptr, ptr, i64, ptr, env, i32)
41
+DEF_HELPER_6(vfsgnjx_vf_d, void, ptr, ptr, i64, ptr, env, i32)
42
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
43
index XXXXXXX..XXXXXXX 100644
54
index XXXXXXX..XXXXXXX 100644
44
--- a/target/riscv/insn32.decode
55
--- a/hw/intc/riscv_aplic.c
45
+++ b/target/riscv/insn32.decode
56
+++ b/hw/intc/riscv_aplic.c
46
@@ -XXX,XX +XXX,XX @@ vfmin_vv 000100 . ..... ..... 001 ..... 1010111 @r_vm
57
@@ -XXX,XX +XXX,XX @@
47
vfmin_vf 000100 . ..... ..... 101 ..... 1010111 @r_vm
58
#include "target/riscv/cpu.h"
48
vfmax_vv 000110 . ..... ..... 001 ..... 1010111 @r_vm
59
#include "sysemu/sysemu.h"
49
vfmax_vf 000110 . ..... ..... 101 ..... 1010111 @r_vm
60
#include "sysemu/kvm.h"
50
+vfsgnj_vv 001000 . ..... ..... 001 ..... 1010111 @r_vm
61
+#include "kvm_riscv.h"
51
+vfsgnj_vf 001000 . ..... ..... 101 ..... 1010111 @r_vm
62
#include "migration/vmstate.h"
52
+vfsgnjn_vv 001001 . ..... ..... 001 ..... 1010111 @r_vm
63
53
+vfsgnjn_vf 001001 . ..... ..... 101 ..... 1010111 @r_vm
64
#define APLIC_MAX_IDC (1UL << 14)
54
+vfsgnjx_vv 001010 . ..... ..... 001 ..... 1010111 @r_vm
65
@@ -XXX,XX +XXX,XX @@ static uint32_t riscv_aplic_idc_claimi(RISCVAPLICState *aplic, uint32_t idc)
55
+vfsgnjx_vf 001010 . ..... ..... 101 ..... 1010111 @r_vm
66
return topi;
56
67
}
57
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
68
58
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
69
-static void riscv_kvm_aplic_request(void *opaque, int irq, int level)
59
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
70
-{
71
- kvm_set_irq(kvm_state, irq, !!level);
72
-}
73
-
74
static void riscv_aplic_request(void *opaque, int irq, int level)
75
{
76
bool update = false;
77
@@ -XXX,XX +XXX,XX @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp)
78
* have IRQ lines delegated by their parent APLIC.
79
*/
80
if (!aplic->parent) {
81
- if (is_kvm_aia(aplic->msimode)) {
82
+ if (kvm_enabled() && is_kvm_aia(aplic->msimode)) {
83
qdev_init_gpio_in(dev, riscv_kvm_aplic_request, aplic->num_irqs);
84
} else {
85
qdev_init_gpio_in(dev, riscv_aplic_request, aplic->num_irqs);
86
diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
60
index XXXXXXX..XXXXXXX 100644
87
index XXXXXXX..XXXXXXX 100644
61
--- a/target/riscv/insn_trans/trans_rvv.inc.c
88
--- a/target/riscv/kvm.c
62
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
89
+++ b/target/riscv/kvm.c
63
@@ -XXX,XX +XXX,XX @@ GEN_OPFVV_TRANS(vfmin_vv, opfvv_check)
90
@@ -XXX,XX +XXX,XX @@
64
GEN_OPFVV_TRANS(vfmax_vv, opfvv_check)
91
#include "sysemu/runstate.h"
65
GEN_OPFVF_TRANS(vfmin_vf, opfvf_check)
92
#include "hw/riscv/numa.h"
66
GEN_OPFVF_TRANS(vfmax_vf, opfvf_check)
93
67
+
94
+void riscv_kvm_aplic_request(void *opaque, int irq, int level)
68
+/* Vector Floating-Point Sign-Injection Instructions */
69
+GEN_OPFVV_TRANS(vfsgnj_vv, opfvv_check)
70
+GEN_OPFVV_TRANS(vfsgnjn_vv, opfvv_check)
71
+GEN_OPFVV_TRANS(vfsgnjx_vv, opfvv_check)
72
+GEN_OPFVF_TRANS(vfsgnj_vf, opfvf_check)
73
+GEN_OPFVF_TRANS(vfsgnjn_vf, opfvf_check)
74
+GEN_OPFVF_TRANS(vfsgnjx_vf, opfvf_check)
75
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
76
index XXXXXXX..XXXXXXX 100644
77
--- a/target/riscv/vector_helper.c
78
+++ b/target/riscv/vector_helper.c
79
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum)
80
GEN_VEXT_VF(vfmax_vf_h, 2, 2, clearh)
81
GEN_VEXT_VF(vfmax_vf_w, 4, 4, clearl)
82
GEN_VEXT_VF(vfmax_vf_d, 8, 8, clearq)
83
+
84
+/* Vector Floating-Point Sign-Injection Instructions */
85
+static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
86
+{
95
+{
87
+ return deposit64(b, 0, 15, a);
96
+ kvm_set_irq(kvm_state, irq, !!level);
88
+}
97
+}
89
+
98
+
90
+static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
99
static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type,
91
+{
100
uint64_t idx)
92
+ return deposit64(b, 0, 31, a);
101
{
93
+}
94
+
95
+static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
96
+{
97
+ return deposit64(b, 0, 63, a);
98
+}
99
+
100
+RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
101
+RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
102
+RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
103
+GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2, clearh)
104
+GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4, clearl)
105
+GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8, clearq)
106
+RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
107
+RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
108
+RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
109
+GEN_VEXT_VF(vfsgnj_vf_h, 2, 2, clearh)
110
+GEN_VEXT_VF(vfsgnj_vf_w, 4, 4, clearl)
111
+GEN_VEXT_VF(vfsgnj_vf_d, 8, 8, clearq)
112
+
113
+static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
114
+{
115
+ return deposit64(~b, 0, 15, a);
116
+}
117
+
118
+static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
119
+{
120
+ return deposit64(~b, 0, 31, a);
121
+}
122
+
123
+static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
124
+{
125
+ return deposit64(~b, 0, 63, a);
126
+}
127
+
128
+RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
129
+RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
130
+RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
131
+GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2, clearh)
132
+GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4, clearl)
133
+GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8, clearq)
134
+RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
135
+RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
136
+RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
137
+GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2, clearh)
138
+GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4, clearl)
139
+GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8, clearq)
140
+
141
+static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
142
+{
143
+ return deposit64(b ^ a, 0, 15, a);
144
+}
145
+
146
+static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
147
+{
148
+ return deposit64(b ^ a, 0, 31, a);
149
+}
150
+
151
+static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
152
+{
153
+ return deposit64(b ^ a, 0, 63, a);
154
+}
155
+
156
+RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
157
+RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
158
+RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
159
+GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2, clearh)
160
+GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4, clearl)
161
+GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8, clearq)
162
+RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
163
+RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
164
+RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
165
+GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2, clearh)
166
+GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4, clearl)
167
+GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8, clearq)
168
--
102
--
169
2.27.0
103
2.41.0
170
104
171
105
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Robbin Ehn <rehn@rivosinc.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
This patch adds the new extensions in
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
linux 6.5 to the hwprobe syscall.
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
6
Message-id: 20200623215920.2594-37-zhiwei_liu@c-sky.com
6
And fixes RVC check to OR with correct value.
7
The previous variable contains 0 therefore it
8
did work.
9
10
Signed-off-by: Robbin Ehn <rehn@rivosinc.com>
11
Acked-by: Richard Henderson <richard.henderson@linaro.org>
12
Acked-by: Alistair Francis <alistair.francis@wdc.com>
13
Message-ID: <bc82203b72d7efb30f1b4a8f9eb3d94699799dc8.camel@rivosinc.com>
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
14
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
15
---
9
target/riscv/helper.h | 4 +++
16
linux-user/syscall.c | 14 +++++++++++++-
10
target/riscv/insn32.decode | 3 ++
17
1 file changed, 13 insertions(+), 1 deletion(-)
11
target/riscv/insn_trans/trans_rvv.inc.c | 43 +++++++++++++++++++++++++
12
target/riscv/vector_helper.c | 43 +++++++++++++++++++++++++
13
4 files changed, 93 insertions(+)
14
18
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
19
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
16
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
21
--- a/linux-user/syscall.c
18
+++ b/target/riscv/helper.h
22
+++ b/linux-user/syscall.c
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfwmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32)
23
@@ -XXX,XX +XXX,XX @@ static int do_getdents64(abi_long dirfd, abi_long arg2, abi_long count)
20
DEF_HELPER_6(vfwmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32)
24
#define RISCV_HWPROBE_KEY_IMA_EXT_0 4
21
DEF_HELPER_6(vfwnmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32)
25
#define RISCV_HWPROBE_IMA_FD (1 << 0)
22
DEF_HELPER_6(vfwnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32)
26
#define RISCV_HWPROBE_IMA_C (1 << 1)
23
+
27
+#define RISCV_HWPROBE_IMA_V (1 << 2)
24
+DEF_HELPER_5(vfsqrt_v_h, void, ptr, ptr, ptr, env, i32)
28
+#define RISCV_HWPROBE_EXT_ZBA (1 << 3)
25
+DEF_HELPER_5(vfsqrt_v_w, void, ptr, ptr, ptr, env, i32)
29
+#define RISCV_HWPROBE_EXT_ZBB (1 << 4)
26
+DEF_HELPER_5(vfsqrt_v_d, void, ptr, ptr, ptr, env, i32)
30
+#define RISCV_HWPROBE_EXT_ZBS (1 << 5)
27
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
31
28
index XXXXXXX..XXXXXXX 100644
32
#define RISCV_HWPROBE_KEY_CPUPERF_0 5
29
--- a/target/riscv/insn32.decode
33
#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
30
+++ b/target/riscv/insn32.decode
34
@@ -XXX,XX +XXX,XX @@ static void risc_hwprobe_fill_pairs(CPURISCVState *env,
31
@@ -XXX,XX +XXX,XX @@
35
riscv_has_ext(env, RVD) ?
32
&shift shamt rs1 rd
36
RISCV_HWPROBE_IMA_FD : 0;
33
&atomic aq rl rs2 rs1 rd
37
value |= riscv_has_ext(env, RVC) ?
34
&rmrr vm rd rs1 rs2
38
- RISCV_HWPROBE_IMA_C : pair->value;
35
+&rmr vm rd rs2
39
+ RISCV_HWPROBE_IMA_C : 0;
36
&rwdvm vm wd rd rs1 rs2
40
+ value |= riscv_has_ext(env, RVV) ?
37
&r2nfvm vm rd rs1 nf
41
+ RISCV_HWPROBE_IMA_V : 0;
38
&rnfvm vm rd rs1 rs2 nf
42
+ value |= cfg->ext_zba ?
39
@@ -XXX,XX +XXX,XX @@
43
+ RISCV_HWPROBE_EXT_ZBA : 0;
40
@r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd
44
+ value |= cfg->ext_zbb ?
41
@r2 ....... ..... ..... ... ..... ....... %rs1 %rd
45
+ RISCV_HWPROBE_EXT_ZBB : 0;
42
@r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd
46
+ value |= cfg->ext_zbs ?
43
+@r2_vm ...... vm:1 ..... ..... ... ..... ....... &rmr %rs2 %rd
47
+ RISCV_HWPROBE_EXT_ZBS : 0;
44
@r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd
48
__put_user(value, &pair->value);
45
@r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd
49
break;
46
@r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd
50
case RISCV_HWPROBE_KEY_CPUPERF_0:
47
@@ -XXX,XX +XXX,XX @@ vfwmsac_vv 111110 . ..... ..... 001 ..... 1010111 @r_vm
48
vfwmsac_vf 111110 . ..... ..... 101 ..... 1010111 @r_vm
49
vfwnmsac_vv 111111 . ..... ..... 001 ..... 1010111 @r_vm
50
vfwnmsac_vf 111111 . ..... ..... 101 ..... 1010111 @r_vm
51
+vfsqrt_v 100011 . ..... 00000 001 ..... 1010111 @r2_vm
52
53
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
54
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
55
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/riscv/insn_trans/trans_rvv.inc.c
58
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
59
@@ -XXX,XX +XXX,XX @@ GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf)
60
GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf)
61
GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf)
62
GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf)
63
+
64
+/* Vector Floating-Point Square-Root Instruction */
65
+
66
+/*
67
+ * If the current SEW does not correspond to a supported IEEE floating-point
68
+ * type, an illegal instruction exception is raised
69
+ */
70
+static bool opfv_check(DisasContext *s, arg_rmr *a)
71
+{
72
+ return (vext_check_isa_ill(s) &&
73
+ vext_check_overlap_mask(s, a->rd, a->vm, false) &&
74
+ vext_check_reg(s, a->rd, false) &&
75
+ vext_check_reg(s, a->rs2, false) &&
76
+ (s->sew != 0));
77
+}
78
+
79
+#define GEN_OPFV_TRANS(NAME, CHECK) \
80
+static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
81
+{ \
82
+ if (CHECK(s, a)) { \
83
+ uint32_t data = 0; \
84
+ static gen_helper_gvec_3_ptr * const fns[3] = { \
85
+ gen_helper_##NAME##_h, \
86
+ gen_helper_##NAME##_w, \
87
+ gen_helper_##NAME##_d, \
88
+ }; \
89
+ TCGLabel *over = gen_new_label(); \
90
+ gen_set_rm(s, 7); \
91
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
92
+ \
93
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
94
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
95
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
96
+ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
97
+ vreg_ofs(s, a->rs2), cpu_env, 0, \
98
+ s->vlen / 8, data, fns[s->sew - 1]); \
99
+ gen_set_label(over); \
100
+ return true; \
101
+ } \
102
+ return false; \
103
+}
104
+
105
+GEN_OPFV_TRANS(vfsqrt_v, opfv_check)
106
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
107
index XXXXXXX..XXXXXXX 100644
108
--- a/target/riscv/vector_helper.c
109
+++ b/target/riscv/vector_helper.c
110
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
111
RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
112
GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4, clearl)
113
GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8, clearq)
114
+
115
+/* Vector Floating-Point Square-Root Instruction */
116
+/* (TD, T2, TX2) */
117
+#define OP_UU_H uint16_t, uint16_t, uint16_t
118
+#define OP_UU_W uint32_t, uint32_t, uint32_t
119
+#define OP_UU_D uint64_t, uint64_t, uint64_t
120
+
121
+#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
122
+static void do_##NAME(void *vd, void *vs2, int i, \
123
+ CPURISCVState *env) \
124
+{ \
125
+ TX2 s2 = *((T2 *)vs2 + HS2(i)); \
126
+ *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
127
+}
128
+
129
+#define GEN_VEXT_V_ENV(NAME, ESZ, DSZ, CLEAR_FN) \
130
+void HELPER(NAME)(void *vd, void *v0, void *vs2, \
131
+ CPURISCVState *env, uint32_t desc) \
132
+{ \
133
+ uint32_t vlmax = vext_maxsz(desc) / ESZ; \
134
+ uint32_t mlen = vext_mlen(desc); \
135
+ uint32_t vm = vext_vm(desc); \
136
+ uint32_t vl = env->vl; \
137
+ uint32_t i; \
138
+ \
139
+ if (vl == 0) { \
140
+ return; \
141
+ } \
142
+ for (i = 0; i < vl; i++) { \
143
+ if (!vm && !vext_elem_mask(v0, mlen, i)) { \
144
+ continue; \
145
+ } \
146
+ do_##NAME(vd, vs2, i, env); \
147
+ } \
148
+ CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \
149
+}
150
+
151
+RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
152
+RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
153
+RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
154
+GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2, clearh)
155
+GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4, clearl)
156
+GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8, clearq)
157
--
51
--
158
2.27.0
52
2.41.0
159
160
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Ard Biesheuvel <ardb@kernel.org>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
Use the accelerated SubBytes/ShiftRows/AddRoundKey AES helper to
4
implement the first half of the key schedule derivation. This does not
5
actually involve shifting rows, so clone the same value into all four
6
columns of the AES vector to counter that operation.
7
8
Cc: Richard Henderson <richard.henderson@linaro.org>
9
Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Cc: Palmer Dabbelt <palmer@dabbelt.com>
11
Cc: Alistair Francis <alistair.francis@wdc.com>
12
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
13
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
15
Message-ID: <20230831154118.138727-1-ardb@kernel.org>
6
Message-id: 20200623215920.2594-38-zhiwei_liu@c-sky.com
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
16
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
17
---
9
target/riscv/helper.h | 13 ++++++++++++
18
target/riscv/crypto_helper.c | 17 +++++------------
10
target/riscv/insn32.decode | 4 ++++
19
1 file changed, 5 insertions(+), 12 deletions(-)
11
target/riscv/insn_trans/trans_rvv.inc.c | 6 ++++++
12
target/riscv/vector_helper.c | 27 +++++++++++++++++++++++++
13
4 files changed, 50 insertions(+)
14
20
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
21
diff --git a/target/riscv/crypto_helper.c b/target/riscv/crypto_helper.c
16
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
23
--- a/target/riscv/crypto_helper.c
18
+++ b/target/riscv/helper.h
24
+++ b/target/riscv/crypto_helper.c
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfwnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32)
25
@@ -XXX,XX +XXX,XX @@ target_ulong HELPER(aes64ks1i)(target_ulong rs1, target_ulong rnum)
20
DEF_HELPER_5(vfsqrt_v_h, void, ptr, ptr, ptr, env, i32)
26
21
DEF_HELPER_5(vfsqrt_v_w, void, ptr, ptr, ptr, env, i32)
27
uint8_t enc_rnum = rnum;
22
DEF_HELPER_5(vfsqrt_v_d, void, ptr, ptr, ptr, env, i32)
28
uint32_t temp = (RS1 >> 32) & 0xFFFFFFFF;
23
+
29
- uint8_t rcon_ = 0;
24
+DEF_HELPER_6(vfmin_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
30
- target_ulong result;
25
+DEF_HELPER_6(vfmin_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
31
+ AESState t, rc = {};
26
+DEF_HELPER_6(vfmin_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
32
27
+DEF_HELPER_6(vfmax_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
33
if (enc_rnum != 0xA) {
28
+DEF_HELPER_6(vfmax_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
34
temp = ror32(temp, 8); /* Rotate right by 8 */
29
+DEF_HELPER_6(vfmax_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
35
- rcon_ = round_consts[enc_rnum];
30
+DEF_HELPER_6(vfmin_vf_h, void, ptr, ptr, i64, ptr, env, i32)
36
+ rc.w[0] = rc.w[1] = round_consts[enc_rnum];
31
+DEF_HELPER_6(vfmin_vf_w, void, ptr, ptr, i64, ptr, env, i32)
37
}
32
+DEF_HELPER_6(vfmin_vf_d, void, ptr, ptr, i64, ptr, env, i32)
38
33
+DEF_HELPER_6(vfmax_vf_h, void, ptr, ptr, i64, ptr, env, i32)
39
- temp = ((uint32_t)AES_sbox[(temp >> 24) & 0xFF] << 24) |
34
+DEF_HELPER_6(vfmax_vf_w, void, ptr, ptr, i64, ptr, env, i32)
40
- ((uint32_t)AES_sbox[(temp >> 16) & 0xFF] << 16) |
35
+DEF_HELPER_6(vfmax_vf_d, void, ptr, ptr, i64, ptr, env, i32)
41
- ((uint32_t)AES_sbox[(temp >> 8) & 0xFF] << 8) |
36
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
42
- ((uint32_t)AES_sbox[(temp >> 0) & 0xFF] << 0);
37
index XXXXXXX..XXXXXXX 100644
43
+ t.w[0] = t.w[1] = t.w[2] = t.w[3] = temp;
38
--- a/target/riscv/insn32.decode
44
+ aesenc_SB_SR_AK(&t, &t, &rc, false);
39
+++ b/target/riscv/insn32.decode
45
40
@@ -XXX,XX +XXX,XX @@ vfwmsac_vf 111110 . ..... ..... 101 ..... 1010111 @r_vm
46
- temp ^= rcon_;
41
vfwnmsac_vv 111111 . ..... ..... 001 ..... 1010111 @r_vm
47
-
42
vfwnmsac_vf 111111 . ..... ..... 101 ..... 1010111 @r_vm
48
- result = ((uint64_t)temp << 32) | temp;
43
vfsqrt_v 100011 . ..... 00000 001 ..... 1010111 @r2_vm
49
-
44
+vfmin_vv 000100 . ..... ..... 001 ..... 1010111 @r_vm
50
- return result;
45
+vfmin_vf 000100 . ..... ..... 101 ..... 1010111 @r_vm
51
+ return t.d[0];
46
+vfmax_vv 000110 . ..... ..... 001 ..... 1010111 @r_vm
47
+vfmax_vf 000110 . ..... ..... 101 ..... 1010111 @r_vm
48
49
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
50
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
51
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/target/riscv/insn_trans/trans_rvv.inc.c
54
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
55
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
56
}
52
}
57
53
58
GEN_OPFV_TRANS(vfsqrt_v, opfv_check)
54
target_ulong HELPER(aes64im)(target_ulong rs1)
59
+
60
+/* Vector Floating-Point MIN/MAX Instructions */
61
+GEN_OPFVV_TRANS(vfmin_vv, opfvv_check)
62
+GEN_OPFVV_TRANS(vfmax_vv, opfvv_check)
63
+GEN_OPFVF_TRANS(vfmin_vf, opfvf_check)
64
+GEN_OPFVF_TRANS(vfmax_vf, opfvf_check)
65
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
66
index XXXXXXX..XXXXXXX 100644
67
--- a/target/riscv/vector_helper.c
68
+++ b/target/riscv/vector_helper.c
69
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
70
GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2, clearh)
71
GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4, clearl)
72
GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8, clearq)
73
+
74
+/* Vector Floating-Point MIN/MAX Instructions */
75
+RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minnum)
76
+RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minnum)
77
+RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minnum)
78
+GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2, clearh)
79
+GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4, clearl)
80
+GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8, clearq)
81
+RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minnum)
82
+RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minnum)
83
+RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minnum)
84
+GEN_VEXT_VF(vfmin_vf_h, 2, 2, clearh)
85
+GEN_VEXT_VF(vfmin_vf_w, 4, 4, clearl)
86
+GEN_VEXT_VF(vfmin_vf_d, 8, 8, clearq)
87
+
88
+RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maxnum)
89
+RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maxnum)
90
+RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maxnum)
91
+GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2, clearh)
92
+GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4, clearl)
93
+GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8, clearq)
94
+RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maxnum)
95
+RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maxnum)
96
+RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum)
97
+GEN_VEXT_VF(vfmax_vf_h, 2, 2, clearh)
98
+GEN_VEXT_VF(vfmax_vf_w, 4, 4, clearl)
99
+GEN_VEXT_VF(vfmax_vf_d, 8, 8, clearq)
100
--
55
--
101
2.27.0
56
2.41.0
102
57
103
58
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
riscv_trigger_init() had been called on reset events that can happen
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
several times for a CPU and it allocated timers for itrigger. If old
5
Message-id: 20200623215920.2594-53-zhiwei_liu@c-sky.com
5
timers were present, they were simply overwritten by the new timers,
6
resulting in a memory leak.
7
8
Divide riscv_trigger_init() into two functions, namely
9
riscv_trigger_realize() and riscv_trigger_reset() and call them in
10
appropriate timing. The timer allocation will happen only once for a
11
CPU in riscv_trigger_realize().
12
13
Fixes: 5a4ae64cac ("target/riscv: Add itrigger support when icount is enabled")
14
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
15
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
16
Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
17
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
18
Message-ID: <20230818034059.9146-1-akihiko.odaki@daynix.com>
6
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
19
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
7
---
20
---
8
target/riscv/helper.h | 4 ++
21
target/riscv/debug.h | 3 ++-
9
target/riscv/insn32.decode | 3 ++
22
target/riscv/cpu.c | 8 +++++++-
10
target/riscv/insn_trans/trans_rvv.inc.c | 28 +++++++++++
23
target/riscv/debug.c | 15 ++++++++++++---
11
target/riscv/vector_helper.c | 63 +++++++++++++++++++++++++
24
3 files changed, 21 insertions(+), 5 deletions(-)
12
4 files changed, 98 insertions(+)
13
25
14
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
26
diff --git a/target/riscv/debug.h b/target/riscv/debug.h
15
index XXXXXXX..XXXXXXX 100644
27
index XXXXXXX..XXXXXXX 100644
16
--- a/target/riscv/helper.h
28
--- a/target/riscv/debug.h
17
+++ b/target/riscv/helper.h
29
+++ b/target/riscv/debug.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmxnor_mm, void, ptr, ptr, ptr, ptr, env, i32)
30
@@ -XXX,XX +XXX,XX @@ void riscv_cpu_debug_excp_handler(CPUState *cs);
19
DEF_HELPER_4(vmpopc_m, tl, ptr, ptr, env, i32)
31
bool riscv_cpu_debug_check_breakpoint(CPUState *cs);
20
32
bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp);
21
DEF_HELPER_4(vmfirst_m, tl, ptr, ptr, env, i32)
33
34
-void riscv_trigger_init(CPURISCVState *env);
35
+void riscv_trigger_realize(CPURISCVState *env);
36
+void riscv_trigger_reset_hold(CPURISCVState *env);
37
38
bool riscv_itrigger_enabled(CPURISCVState *env);
39
void riscv_itrigger_update_priv(CPURISCVState *env);
40
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/target/riscv/cpu.c
43
+++ b/target/riscv/cpu.c
44
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_reset_hold(Object *obj)
45
46
#ifndef CONFIG_USER_ONLY
47
if (cpu->cfg.debug) {
48
- riscv_trigger_init(env);
49
+ riscv_trigger_reset_hold(env);
50
}
51
52
if (kvm_enabled()) {
53
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
54
55
riscv_cpu_register_gdb_regs_for_features(cs);
56
57
+#ifndef CONFIG_USER_ONLY
58
+ if (cpu->cfg.debug) {
59
+ riscv_trigger_realize(&cpu->env);
60
+ }
61
+#endif
22
+
62
+
23
+DEF_HELPER_5(vmsbf_m, void, ptr, ptr, ptr, env, i32)
63
qemu_init_vcpu(cs);
24
+DEF_HELPER_5(vmsif_m, void, ptr, ptr, ptr, env, i32)
64
cpu_reset(cs);
25
+DEF_HELPER_5(vmsof_m, void, ptr, ptr, ptr, env, i32)
65
26
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
66
diff --git a/target/riscv/debug.c b/target/riscv/debug.c
27
index XXXXXXX..XXXXXXX 100644
67
index XXXXXXX..XXXXXXX 100644
28
--- a/target/riscv/insn32.decode
68
--- a/target/riscv/debug.c
29
+++ b/target/riscv/insn32.decode
69
+++ b/target/riscv/debug.c
30
@@ -XXX,XX +XXX,XX @@ vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r
70
@@ -XXX,XX +XXX,XX @@ bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp)
31
vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r
32
vmpopc_m 010100 . ..... ----- 010 ..... 1010111 @r2_vm
33
vmfirst_m 010101 . ..... ----- 010 ..... 1010111 @r2_vm
34
+vmsbf_m 010110 . ..... 00001 010 ..... 1010111 @r2_vm
35
+vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm
36
+vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm
37
38
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
39
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
40
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/target/riscv/insn_trans/trans_rvv.inc.c
43
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
44
@@ -XXX,XX +XXX,XX @@ static bool trans_vmfirst_m(DisasContext *s, arg_rmr *a)
45
}
46
return false;
71
return false;
47
}
72
}
73
74
-void riscv_trigger_init(CPURISCVState *env)
75
+void riscv_trigger_realize(CPURISCVState *env)
76
+{
77
+ int i;
48
+
78
+
49
+/* vmsbf.m set-before-first mask bit */
79
+ for (i = 0; i < RV_MAX_TRIGGERS; i++) {
50
+/* vmsif.m set-includ-first mask bit */
80
+ env->itrigger_timer[i] = timer_new_ns(QEMU_CLOCK_VIRTUAL,
51
+/* vmsof.m set-only-first mask bit */
81
+ riscv_itrigger_timer_cb, env);
52
+#define GEN_M_TRANS(NAME) \
53
+static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
54
+{ \
55
+ if (vext_check_isa_ill(s)) { \
56
+ uint32_t data = 0; \
57
+ gen_helper_gvec_3_ptr *fn = gen_helper_##NAME; \
58
+ TCGLabel *over = gen_new_label(); \
59
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
60
+ \
61
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
62
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
63
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
64
+ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), \
65
+ vreg_ofs(s, 0), vreg_ofs(s, a->rs2), \
66
+ cpu_env, 0, s->vlen / 8, data, fn); \
67
+ gen_set_label(over); \
68
+ return true; \
69
+ } \
70
+ return false; \
71
+}
72
+
73
+GEN_M_TRANS(vmsbf_m)
74
+GEN_M_TRANS(vmsif_m)
75
+GEN_M_TRANS(vmsof_m)
76
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
77
index XXXXXXX..XXXXXXX 100644
78
--- a/target/riscv/vector_helper.c
79
+++ b/target/riscv/vector_helper.c
80
@@ -XXX,XX +XXX,XX @@ target_ulong HELPER(vmfirst_m)(void *v0, void *vs2, CPURISCVState *env,
81
}
82
return -1LL;
83
}
84
+
85
+enum set_mask_type {
86
+ ONLY_FIRST = 1,
87
+ INCLUDE_FIRST,
88
+ BEFORE_FIRST,
89
+};
90
+
91
+static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
92
+ uint32_t desc, enum set_mask_type type)
93
+{
94
+ uint32_t mlen = vext_mlen(desc);
95
+ uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;
96
+ uint32_t vm = vext_vm(desc);
97
+ uint32_t vl = env->vl;
98
+ int i;
99
+ bool first_mask_bit = false;
100
+
101
+ for (i = 0; i < vl; i++) {
102
+ if (!vm && !vext_elem_mask(v0, mlen, i)) {
103
+ continue;
104
+ }
105
+ /* write a zero to all following active elements */
106
+ if (first_mask_bit) {
107
+ vext_set_elem_mask(vd, mlen, i, 0);
108
+ continue;
109
+ }
110
+ if (vext_elem_mask(vs2, mlen, i)) {
111
+ first_mask_bit = true;
112
+ if (type == BEFORE_FIRST) {
113
+ vext_set_elem_mask(vd, mlen, i, 0);
114
+ } else {
115
+ vext_set_elem_mask(vd, mlen, i, 1);
116
+ }
117
+ } else {
118
+ if (type == ONLY_FIRST) {
119
+ vext_set_elem_mask(vd, mlen, i, 0);
120
+ } else {
121
+ vext_set_elem_mask(vd, mlen, i, 1);
122
+ }
123
+ }
124
+ }
125
+ for (; i < vlmax; i++) {
126
+ vext_set_elem_mask(vd, mlen, i, 0);
127
+ }
82
+ }
128
+}
83
+}
129
+
84
+
130
+void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
85
+void riscv_trigger_reset_hold(CPURISCVState *env)
131
+ uint32_t desc)
86
{
132
+{
87
target_ulong tdata1 = build_tdata1(env, TRIGGER_TYPE_AD_MATCH, 0, 0);
133
+ vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
88
int i;
134
+}
89
@@ -XXX,XX +XXX,XX @@ void riscv_trigger_init(CPURISCVState *env)
135
+
90
env->tdata3[i] = 0;
136
+void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
91
env->cpu_breakpoint[i] = NULL;
137
+ uint32_t desc)
92
env->cpu_watchpoint[i] = NULL;
138
+{
93
- env->itrigger_timer[i] = timer_new_ns(QEMU_CLOCK_VIRTUAL,
139
+ vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
94
- riscv_itrigger_timer_cb, env);
140
+}
95
+ timer_del(env->itrigger_timer[i]);
141
+
96
}
142
+void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
97
}
143
+ uint32_t desc)
144
+{
145
+ vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
146
+}
147
--
98
--
148
2.27.0
99
2.41.0
149
100
150
101
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Leon Schuermann <leons@opentitan.org>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
When the rule-lock bypass (RLB) bit is set in the mseccfg CSR, the PMP
4
configuration lock bits must not apply. While this behavior is
5
implemented for the pmpcfgX CSRs, this bit is not respected for
6
changes to the pmpaddrX CSRs. This patch ensures that pmpaddrX CSR
7
writes work even on locked regions when the global rule-lock bypass is
8
enabled.
9
10
Signed-off-by: Leon Schuermann <leons@opentitan.org>
11
Reviewed-by: Mayuresh Chitale <mchitale@ventanamicro.com>
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
12
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Message-ID: <20230829215046.1430463-1-leon@is.currently.online>
6
Message-id: 20200623215920.2594-26-zhiwei_liu@c-sky.com
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
14
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
15
---
9
target/riscv/helper.h | 17 ++++
16
target/riscv/pmp.c | 4 ++++
10
target/riscv/insn32.decode | 5 ++
17
1 file changed, 4 insertions(+)
11
target/riscv/insn_trans/trans_rvv.inc.c | 7 ++
12
target/riscv/vector_helper.c | 100 ++++++++++++++++++++++++
13
4 files changed, 129 insertions(+)
14
18
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
19
diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c
16
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
21
--- a/target/riscv/pmp.c
18
+++ b/target/riscv/helper.h
22
+++ b/target/riscv/pmp.c
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vssub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
23
@@ -XXX,XX +XXX,XX @@ static inline uint8_t pmp_get_a_field(uint8_t cfg)
20
DEF_HELPER_6(vssub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
24
*/
21
DEF_HELPER_6(vssub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
25
static inline int pmp_is_locked(CPURISCVState *env, uint32_t pmp_index)
22
DEF_HELPER_6(vssub_vx_d, void, ptr, ptr, tl, ptr, env, i32)
26
{
23
+
27
+ /* mseccfg.RLB is set */
24
+DEF_HELPER_6(vaadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
28
+ if (MSECCFG_RLB_ISSET(env)) {
25
+DEF_HELPER_6(vaadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vaadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vaadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vasub_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vasub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vasub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_6(vasub_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_6(vaadd_vx_b, void, ptr, ptr, tl, ptr, env, i32)
33
+DEF_HELPER_6(vaadd_vx_h, void, ptr, ptr, tl, ptr, env, i32)
34
+DEF_HELPER_6(vaadd_vx_w, void, ptr, ptr, tl, ptr, env, i32)
35
+DEF_HELPER_6(vaadd_vx_d, void, ptr, ptr, tl, ptr, env, i32)
36
+DEF_HELPER_6(vasub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
37
+DEF_HELPER_6(vasub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
38
+DEF_HELPER_6(vasub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
39
+DEF_HELPER_6(vasub_vx_d, void, ptr, ptr, tl, ptr, env, i32)
40
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
41
index XXXXXXX..XXXXXXX 100644
42
--- a/target/riscv/insn32.decode
43
+++ b/target/riscv/insn32.decode
44
@@ -XXX,XX +XXX,XX @@ vssubu_vv 100010 . ..... ..... 000 ..... 1010111 @r_vm
45
vssubu_vx 100010 . ..... ..... 100 ..... 1010111 @r_vm
46
vssub_vv 100011 . ..... ..... 000 ..... 1010111 @r_vm
47
vssub_vx 100011 . ..... ..... 100 ..... 1010111 @r_vm
48
+vaadd_vv 100100 . ..... ..... 000 ..... 1010111 @r_vm
49
+vaadd_vx 100100 . ..... ..... 100 ..... 1010111 @r_vm
50
+vaadd_vi 100100 . ..... ..... 011 ..... 1010111 @r_vm
51
+vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm
52
+vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm
53
54
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
55
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
56
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/target/riscv/insn_trans/trans_rvv.inc.c
59
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
60
@@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vssubu_vx, opivx_check)
61
GEN_OPIVX_TRANS(vssub_vx, opivx_check)
62
GEN_OPIVI_TRANS(vsaddu_vi, 1, vsaddu_vx, opivx_check)
63
GEN_OPIVI_TRANS(vsadd_vi, 0, vsadd_vx, opivx_check)
64
+
65
+/* Vector Single-Width Averaging Add and Subtract */
66
+GEN_OPIVV_TRANS(vaadd_vv, opivv_check)
67
+GEN_OPIVV_TRANS(vasub_vv, opivv_check)
68
+GEN_OPIVX_TRANS(vaadd_vx, opivx_check)
69
+GEN_OPIVX_TRANS(vasub_vx, opivx_check)
70
+GEN_OPIVI_TRANS(vaadd_vi, 0, vaadd_vx, opivx_check)
71
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/target/riscv/vector_helper.c
74
+++ b/target/riscv/vector_helper.c
75
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX_RM(vssub_vx_b, 1, 1, clearb)
76
GEN_VEXT_VX_RM(vssub_vx_h, 2, 2, clearh)
77
GEN_VEXT_VX_RM(vssub_vx_w, 4, 4, clearl)
78
GEN_VEXT_VX_RM(vssub_vx_d, 8, 8, clearq)
79
+
80
+/* Vector Single-Width Averaging Add and Subtract */
81
+static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
82
+{
83
+ uint8_t d = extract64(v, shift, 1);
84
+ uint8_t d1;
85
+ uint64_t D1, D2;
86
+
87
+ if (shift == 0 || shift > 64) {
88
+ return 0;
29
+ return 0;
89
+ }
30
+ }
90
+
31
91
+ d1 = extract64(v, shift - 1, 1);
32
if (env->pmp_state.pmp[pmp_index].cfg_reg & PMP_LOCK) {
92
+ D1 = extract64(v, 0, shift);
33
return 1;
93
+ if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
94
+ return d1;
95
+ } else if (vxrm == 1) { /* round-to-nearest-even */
96
+ if (shift > 1) {
97
+ D2 = extract64(v, 0, shift - 1);
98
+ return d1 & ((D2 != 0) | d);
99
+ } else {
100
+ return d1 & d;
101
+ }
102
+ } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
103
+ return !d & (D1 != 0);
104
+ }
105
+ return 0; /* round-down (truncate) */
106
+}
107
+
108
+static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
109
+{
110
+ int64_t res = (int64_t)a + b;
111
+ uint8_t round = get_round(vxrm, res, 1);
112
+
113
+ return (res >> 1) + round;
114
+}
115
+
116
+static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
117
+{
118
+ int64_t res = a + b;
119
+ uint8_t round = get_round(vxrm, res, 1);
120
+ int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
121
+
122
+ /* With signed overflow, bit 64 is inverse of bit 63. */
123
+ return ((res >> 1) ^ over) + round;
124
+}
125
+
126
+RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
127
+RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
128
+RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
129
+RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
130
+GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1, clearb)
131
+GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2, clearh)
132
+GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4, clearl)
133
+GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8, clearq)
134
+
135
+RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
136
+RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
137
+RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
138
+RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
139
+GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1, clearb)
140
+GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2, clearh)
141
+GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4, clearl)
142
+GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8, clearq)
143
+
144
+static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
145
+{
146
+ int64_t res = (int64_t)a - b;
147
+ uint8_t round = get_round(vxrm, res, 1);
148
+
149
+ return (res >> 1) + round;
150
+}
151
+
152
+static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
153
+{
154
+ int64_t res = (int64_t)a - b;
155
+ uint8_t round = get_round(vxrm, res, 1);
156
+ int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
157
+
158
+ /* With signed overflow, bit 64 is inverse of bit 63. */
159
+ return ((res >> 1) ^ over) + round;
160
+}
161
+
162
+RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
163
+RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
164
+RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
165
+RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
166
+GEN_VEXT_VV_RM(vasub_vv_b, 1, 1, clearb)
167
+GEN_VEXT_VV_RM(vasub_vv_h, 2, 2, clearh)
168
+GEN_VEXT_VV_RM(vasub_vv_w, 4, 4, clearl)
169
+GEN_VEXT_VV_RM(vasub_vv_d, 8, 8, clearq)
170
+
171
+RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
172
+RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
173
+RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
174
+RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
175
+GEN_VEXT_VX_RM(vasub_vx_b, 1, 1, clearb)
176
+GEN_VEXT_VX_RM(vasub_vx_h, 2, 2, clearh)
177
+GEN_VEXT_VX_RM(vasub_vx_w, 4, 4, clearl)
178
+GEN_VEXT_VX_RM(vasub_vx_d, 8, 8, clearq)
179
--
34
--
180
2.27.0
35
2.41.0
181
182
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Tommy Wu <tommy.wu@sifive.com>
2
2
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
3
According to the new spec, when vsiselect has a reserved value, attempts
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
from M-mode or HS-mode to access vsireg, or from VS-mode to access
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
sireg, should preferably raise an illegal instruction exception.
6
Message-id: 20200623215920.2594-36-zhiwei_liu@c-sky.com
6
7
Signed-off-by: Tommy Wu <tommy.wu@sifive.com>
8
Reviewed-by: Frank Chang <frank.chang@sifive.com>
9
Message-ID: <20230816061647.600672-1-tommy.wu@sifive.com>
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
10
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
11
---
9
target/riscv/helper.h | 17 +++++
12
target/riscv/csr.c | 7 +++++--
10
target/riscv/insn32.decode | 8 +++
13
1 file changed, 5 insertions(+), 2 deletions(-)
11
target/riscv/insn_trans/trans_rvv.inc.c | 10 +++
12
target/riscv/vector_helper.c | 91 +++++++++++++++++++++++++
13
4 files changed, 126 insertions(+)
14
14
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
15
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
16
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
17
--- a/target/riscv/csr.c
18
+++ b/target/riscv/helper.h
18
+++ b/target/riscv/csr.c
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32)
19
@@ -XXX,XX +XXX,XX @@ static int rmw_iprio(target_ulong xlen,
20
DEF_HELPER_6(vfnmsub_vf_h, void, ptr, ptr, i64, ptr, env, i32)
20
static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val,
21
DEF_HELPER_6(vfnmsub_vf_w, void, ptr, ptr, i64, ptr, env, i32)
21
target_ulong new_val, target_ulong wr_mask)
22
DEF_HELPER_6(vfnmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32)
22
{
23
+
23
- bool virt;
24
+DEF_HELPER_6(vfwmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
24
+ bool virt, isel_reserved;
25
+DEF_HELPER_6(vfwmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
25
uint8_t *iprio;
26
+DEF_HELPER_6(vfwnmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
26
int ret = -EINVAL;
27
+DEF_HELPER_6(vfwnmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
27
target_ulong priv, isel, vgein;
28
+DEF_HELPER_6(vfwmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
28
@@ -XXX,XX +XXX,XX @@ static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val,
29
+DEF_HELPER_6(vfwmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
29
30
+DEF_HELPER_6(vfwnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
30
/* Decode register details from CSR number */
31
+DEF_HELPER_6(vfwnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
31
virt = false;
32
+DEF_HELPER_6(vfwmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32)
32
+ isel_reserved = false;
33
+DEF_HELPER_6(vfwmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32)
33
switch (csrno) {
34
+DEF_HELPER_6(vfwnmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32)
34
case CSR_MIREG:
35
+DEF_HELPER_6(vfwnmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32)
35
iprio = env->miprio;
36
+DEF_HELPER_6(vfwmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32)
36
@@ -XXX,XX +XXX,XX @@ static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val,
37
+DEF_HELPER_6(vfwmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32)
37
riscv_cpu_mxl_bits(env)),
38
+DEF_HELPER_6(vfwnmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32)
38
val, new_val, wr_mask);
39
+DEF_HELPER_6(vfwnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32)
39
}
40
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
40
+ } else {
41
index XXXXXXX..XXXXXXX 100644
41
+ isel_reserved = true;
42
--- a/target/riscv/insn32.decode
42
}
43
+++ b/target/riscv/insn32.decode
43
44
@@ -XXX,XX +XXX,XX @@ vfmsub_vv 101010 . ..... ..... 001 ..... 1010111 @r_vm
44
done:
45
vfmsub_vf 101010 . ..... ..... 101 ..... 1010111 @r_vm
45
if (ret) {
46
vfnmsub_vv 101011 . ..... ..... 001 ..... 1010111 @r_vm
46
- return (env->virt_enabled && virt) ?
47
vfnmsub_vf 101011 . ..... ..... 101 ..... 1010111 @r_vm
47
+ return (env->virt_enabled && virt && !isel_reserved) ?
48
+vfwmacc_vv 111100 . ..... ..... 001 ..... 1010111 @r_vm
48
RISCV_EXCP_VIRT_INSTRUCTION_FAULT : RISCV_EXCP_ILLEGAL_INST;
49
+vfwmacc_vf 111100 . ..... ..... 101 ..... 1010111 @r_vm
49
}
50
+vfwnmacc_vv 111101 . ..... ..... 001 ..... 1010111 @r_vm
50
return RISCV_EXCP_NONE;
51
+vfwnmacc_vf 111101 . ..... ..... 101 ..... 1010111 @r_vm
52
+vfwmsac_vv 111110 . ..... ..... 001 ..... 1010111 @r_vm
53
+vfwmsac_vf 111110 . ..... ..... 101 ..... 1010111 @r_vm
54
+vfwnmsac_vv 111111 . ..... ..... 001 ..... 1010111 @r_vm
55
+vfwnmsac_vf 111111 . ..... ..... 101 ..... 1010111 @r_vm
56
57
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
58
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
59
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
60
index XXXXXXX..XXXXXXX 100644
61
--- a/target/riscv/insn_trans/trans_rvv.inc.c
62
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
63
@@ -XXX,XX +XXX,XX @@ GEN_OPFVF_TRANS(vfmadd_vf, opfvf_check)
64
GEN_OPFVF_TRANS(vfnmadd_vf, opfvf_check)
65
GEN_OPFVF_TRANS(vfmsub_vf, opfvf_check)
66
GEN_OPFVF_TRANS(vfnmsub_vf, opfvf_check)
67
+
68
+/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
69
+GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_widen_check)
70
+GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_widen_check)
71
+GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_widen_check)
72
+GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_widen_check)
73
+GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf)
74
+GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf)
75
+GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf)
76
+GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf)
77
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
78
index XXXXXXX..XXXXXXX 100644
79
--- a/target/riscv/vector_helper.c
80
+++ b/target/riscv/vector_helper.c
81
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
82
GEN_VEXT_VF(vfnmsub_vf_h, 2, 2, clearh)
83
GEN_VEXT_VF(vfnmsub_vf_w, 4, 4, clearl)
84
GEN_VEXT_VF(vfnmsub_vf_d, 8, 8, clearq)
85
+
86
+/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
87
+static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
88
+{
89
+ return float32_muladd(float16_to_float32(a, true, s),
90
+ float16_to_float32(b, true, s), d, 0, s);
91
+}
92
+
93
+static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
94
+{
95
+ return float64_muladd(float32_to_float64(a, s),
96
+ float32_to_float64(b, s), d, 0, s);
97
+}
98
+
99
+RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
100
+RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
101
+GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4, clearl)
102
+GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8, clearq)
103
+RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
104
+RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
105
+GEN_VEXT_VF(vfwmacc_vf_h, 2, 4, clearl)
106
+GEN_VEXT_VF(vfwmacc_vf_w, 4, 8, clearq)
107
+
108
+static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
109
+{
110
+ return float32_muladd(float16_to_float32(a, true, s),
111
+ float16_to_float32(b, true, s), d,
112
+ float_muladd_negate_c | float_muladd_negate_product, s);
113
+}
114
+
115
+static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
116
+{
117
+ return float64_muladd(float32_to_float64(a, s),
118
+ float32_to_float64(b, s), d,
119
+ float_muladd_negate_c | float_muladd_negate_product, s);
120
+}
121
+
122
+RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
123
+RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
124
+GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4, clearl)
125
+GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8, clearq)
126
+RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
127
+RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
128
+GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4, clearl)
129
+GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8, clearq)
130
+
131
+static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
132
+{
133
+ return float32_muladd(float16_to_float32(a, true, s),
134
+ float16_to_float32(b, true, s), d,
135
+ float_muladd_negate_c, s);
136
+}
137
+
138
+static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
139
+{
140
+ return float64_muladd(float32_to_float64(a, s),
141
+ float32_to_float64(b, s), d,
142
+ float_muladd_negate_c, s);
143
+}
144
+
145
+RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
146
+RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
147
+GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4, clearl)
148
+GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8, clearq)
149
+RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
150
+RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
151
+GEN_VEXT_VF(vfwmsac_vf_h, 2, 4, clearl)
152
+GEN_VEXT_VF(vfwmsac_vf_w, 4, 8, clearq)
153
+
154
+static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
155
+{
156
+ return float32_muladd(float16_to_float32(a, true, s),
157
+ float16_to_float32(b, true, s), d,
158
+ float_muladd_negate_product, s);
159
+}
160
+
161
+static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
162
+{
163
+ return float64_muladd(float32_to_float64(a, s),
164
+ float32_to_float64(b, s), d,
165
+ float_muladd_negate_product, s);
166
+}
167
+
168
+RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
169
+RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
170
+GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4, clearl)
171
+GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8, clearq)
172
+RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
173
+RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
174
+GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4, clearl)
175
+GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8, clearq)
176
--
51
--
177
2.27.0
52
2.41.0
178
179
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
From: Nikita Shubin <n.shubin@yadro.com>
2
2
3
The 32 vector registers will be viewed as a continuous memory block.
3
As per ISA:
4
It avoids the convension between element index and (regno, offset).
5
Thus elements can be directly accessed by offset from the first vector
6
base address.
7
4
8
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
5
"For CSRRWI, if rd=x0, then the instruction shall not read the CSR and
9
Acked-by: Alistair Francis <alistair.francis@wdc.com>
6
shall not cause any of the side effects that might occur on a CSR read."
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
11
Message-id: 20200623215920.2594-2-zhiwei_liu@c-sky.com
8
trans_csrrwi() and trans_csrrw() call do_csrw() if rd=x0, do_csrw() calls
9
riscv_csrrw_do64(), via helper_csrw() passing NULL as *ret_value.
10
11
Signed-off-by: Nikita Shubin <n.shubin@yadro.com>
12
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
13
Message-ID: <20230808090914.17634-1-nikita.shubin@maquefel.me>
12
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
14
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
13
---
15
---
14
target/riscv/cpu.h | 12 ++++++++++++
16
target/riscv/csr.c | 24 +++++++++++++++---------
15
target/riscv/translate.c | 3 ++-
17
1 file changed, 15 insertions(+), 9 deletions(-)
16
2 files changed, 14 insertions(+), 1 deletion(-)
17
18
18
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
19
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
19
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
20
--- a/target/riscv/cpu.h
21
--- a/target/riscv/csr.c
21
+++ b/target/riscv/cpu.h
22
+++ b/target/riscv/csr.c
22
@@ -XXX,XX +XXX,XX @@
23
@@ -XXX,XX +XXX,XX @@ static RISCVException riscv_csrrw_do64(CPURISCVState *env, int csrno,
23
#define RVA RV('A')
24
target_ulong write_mask)
24
#define RVF RV('F')
25
{
25
#define RVD RV('D')
26
RISCVException ret;
26
+#define RVV RV('V')
27
- target_ulong old_value;
27
#define RVC RV('C')
28
+ target_ulong old_value = 0;
28
#define RVS RV('S')
29
29
#define RVU RV('U')
30
/* execute combined read/write operation if it exists */
30
@@ -XXX,XX +XXX,XX @@ typedef struct CPURISCVState CPURISCVState;
31
if (csr_ops[csrno].op) {
31
32
return csr_ops[csrno].op(env, csrno, ret_value, new_value, write_mask);
32
#include "pmp.h"
33
34
+#define RV_VLEN_MAX 512
35
+
36
struct CPURISCVState {
37
target_ulong gpr[32];
38
uint64_t fpr[32]; /* assume both F and D extensions */
39
+
40
+ /* vector coprocessor state. */
41
+ uint64_t vreg[32 * RV_VLEN_MAX / 64] QEMU_ALIGNED(16);
42
+ target_ulong vxrm;
43
+ target_ulong vxsat;
44
+ target_ulong vl;
45
+ target_ulong vstart;
46
+ target_ulong vtype;
47
+
48
target_ulong pc;
49
target_ulong load_res;
50
target_ulong load_val;
51
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/target/riscv/translate.c
54
+++ b/target/riscv/translate.c
55
@@ -XXX,XX +XXX,XX @@
56
#include "instmap.h"
57
58
/* global register indices */
59
-static TCGv cpu_gpr[32], cpu_pc;
60
+static TCGv cpu_gpr[32], cpu_pc, cpu_vl;
61
static TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */
62
static TCGv load_res;
63
static TCGv load_val;
64
@@ -XXX,XX +XXX,XX @@ void riscv_translate_init(void)
65
}
33
}
66
34
67
cpu_pc = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, pc), "pc");
35
- /* if no accessor exists then return failure */
68
+ cpu_vl = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, vl), "vl");
36
- if (!csr_ops[csrno].read) {
69
load_res = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, load_res),
37
- return RISCV_EXCP_ILLEGAL_INST;
70
"load_res");
38
- }
71
load_val = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, load_val),
39
- /* read old value */
40
- ret = csr_ops[csrno].read(env, csrno, &old_value);
41
- if (ret != RISCV_EXCP_NONE) {
42
- return ret;
43
+ /*
44
+ * ret_value == NULL means that rd=x0 and we're coming from helper_csrw()
45
+ * and we can't throw side effects caused by CSR reads.
46
+ */
47
+ if (ret_value) {
48
+ /* if no accessor exists then return failure */
49
+ if (!csr_ops[csrno].read) {
50
+ return RISCV_EXCP_ILLEGAL_INST;
51
+ }
52
+ /* read old value */
53
+ ret = csr_ops[csrno].read(env, csrno, &old_value);
54
+ if (ret != RISCV_EXCP_NONE) {
55
+ return ret;
56
+ }
57
}
58
59
/* write value if writable and write mask set, otherwise drop writes */
72
--
60
--
73
2.27.0
61
2.41.0
74
75
diff view generated by jsdifflib
Deleted patch
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
2
1
3
The internals.h keeps things that are not relevant to the actual architecture,
4
only to the implementation, separate.
5
6
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
7
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20200623215920.2594-6-zhiwei_liu@c-sky.com
10
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
11
---
12
target/riscv/internals.h | 24 ++++++++++++++++++++++++
13
1 file changed, 24 insertions(+)
14
create mode 100644 target/riscv/internals.h
15
16
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
17
new file mode 100644
18
index XXXXXXX..XXXXXXX
19
--- /dev/null
20
+++ b/target/riscv/internals.h
21
@@ -XXX,XX +XXX,XX @@
22
+/*
23
+ * QEMU RISC-V CPU -- internal functions and types
24
+ *
25
+ * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
26
+ *
27
+ * This program is free software; you can redistribute it and/or modify it
28
+ * under the terms and conditions of the GNU General Public License,
29
+ * version 2 or later, as published by the Free Software Foundation.
30
+ *
31
+ * This program is distributed in the hope it will be useful, but WITHOUT
32
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
33
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
34
+ * more details.
35
+ *
36
+ * You should have received a copy of the GNU General Public License along with
37
+ * this program. If not, see <http://www.gnu.org/licenses/>.
38
+ */
39
+
40
+#ifndef RISCV_CPU_INTERNALS_H
41
+#define RISCV_CPU_INTERNALS_H
42
+
43
+#include "hw/registerfields.h"
44
+
45
+#endif
46
--
47
2.27.0
48
49
diff view generated by jsdifflib
Deleted patch
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
2
1
3
Vector indexed operations add the contents of each element of the
4
vector offset operand specified by vs2 to the base effective address
5
to give the effective address of each element.
6
7
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
8
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-id: 20200623215920.2594-8-zhiwei_liu@c-sky.com
11
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
12
---
13
target/riscv/helper.h | 35 +++++++
14
target/riscv/insn32.decode | 13 +++
15
target/riscv/insn_trans/trans_rvv.inc.c | 129 ++++++++++++++++++++++++
16
target/riscv/vector_helper.c | 116 +++++++++++++++++++++
17
4 files changed, 293 insertions(+)
18
19
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/target/riscv/helper.h
22
+++ b/target/riscv/helper.h
23
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vsse_v_b, void, ptr, ptr, tl, tl, env, i32)
24
DEF_HELPER_6(vsse_v_h, void, ptr, ptr, tl, tl, env, i32)
25
DEF_HELPER_6(vsse_v_w, void, ptr, ptr, tl, tl, env, i32)
26
DEF_HELPER_6(vsse_v_d, void, ptr, ptr, tl, tl, env, i32)
27
+DEF_HELPER_6(vlxb_v_b, void, ptr, ptr, tl, ptr, env, i32)
28
+DEF_HELPER_6(vlxb_v_h, void, ptr, ptr, tl, ptr, env, i32)
29
+DEF_HELPER_6(vlxb_v_w, void, ptr, ptr, tl, ptr, env, i32)
30
+DEF_HELPER_6(vlxb_v_d, void, ptr, ptr, tl, ptr, env, i32)
31
+DEF_HELPER_6(vlxh_v_h, void, ptr, ptr, tl, ptr, env, i32)
32
+DEF_HELPER_6(vlxh_v_w, void, ptr, ptr, tl, ptr, env, i32)
33
+DEF_HELPER_6(vlxh_v_d, void, ptr, ptr, tl, ptr, env, i32)
34
+DEF_HELPER_6(vlxw_v_w, void, ptr, ptr, tl, ptr, env, i32)
35
+DEF_HELPER_6(vlxw_v_d, void, ptr, ptr, tl, ptr, env, i32)
36
+DEF_HELPER_6(vlxe_v_b, void, ptr, ptr, tl, ptr, env, i32)
37
+DEF_HELPER_6(vlxe_v_h, void, ptr, ptr, tl, ptr, env, i32)
38
+DEF_HELPER_6(vlxe_v_w, void, ptr, ptr, tl, ptr, env, i32)
39
+DEF_HELPER_6(vlxe_v_d, void, ptr, ptr, tl, ptr, env, i32)
40
+DEF_HELPER_6(vlxbu_v_b, void, ptr, ptr, tl, ptr, env, i32)
41
+DEF_HELPER_6(vlxbu_v_h, void, ptr, ptr, tl, ptr, env, i32)
42
+DEF_HELPER_6(vlxbu_v_w, void, ptr, ptr, tl, ptr, env, i32)
43
+DEF_HELPER_6(vlxbu_v_d, void, ptr, ptr, tl, ptr, env, i32)
44
+DEF_HELPER_6(vlxhu_v_h, void, ptr, ptr, tl, ptr, env, i32)
45
+DEF_HELPER_6(vlxhu_v_w, void, ptr, ptr, tl, ptr, env, i32)
46
+DEF_HELPER_6(vlxhu_v_d, void, ptr, ptr, tl, ptr, env, i32)
47
+DEF_HELPER_6(vlxwu_v_w, void, ptr, ptr, tl, ptr, env, i32)
48
+DEF_HELPER_6(vlxwu_v_d, void, ptr, ptr, tl, ptr, env, i32)
49
+DEF_HELPER_6(vsxb_v_b, void, ptr, ptr, tl, ptr, env, i32)
50
+DEF_HELPER_6(vsxb_v_h, void, ptr, ptr, tl, ptr, env, i32)
51
+DEF_HELPER_6(vsxb_v_w, void, ptr, ptr, tl, ptr, env, i32)
52
+DEF_HELPER_6(vsxb_v_d, void, ptr, ptr, tl, ptr, env, i32)
53
+DEF_HELPER_6(vsxh_v_h, void, ptr, ptr, tl, ptr, env, i32)
54
+DEF_HELPER_6(vsxh_v_w, void, ptr, ptr, tl, ptr, env, i32)
55
+DEF_HELPER_6(vsxh_v_d, void, ptr, ptr, tl, ptr, env, i32)
56
+DEF_HELPER_6(vsxw_v_w, void, ptr, ptr, tl, ptr, env, i32)
57
+DEF_HELPER_6(vsxw_v_d, void, ptr, ptr, tl, ptr, env, i32)
58
+DEF_HELPER_6(vsxe_v_b, void, ptr, ptr, tl, ptr, env, i32)
59
+DEF_HELPER_6(vsxe_v_h, void, ptr, ptr, tl, ptr, env, i32)
60
+DEF_HELPER_6(vsxe_v_w, void, ptr, ptr, tl, ptr, env, i32)
61
+DEF_HELPER_6(vsxe_v_d, void, ptr, ptr, tl, ptr, env, i32)
62
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/riscv/insn32.decode
65
+++ b/target/riscv/insn32.decode
66
@@ -XXX,XX +XXX,XX @@ vssh_v ... 010 . ..... ..... 101 ..... 0100111 @r_nfvm
67
vssw_v ... 010 . ..... ..... 110 ..... 0100111 @r_nfvm
68
vsse_v ... 010 . ..... ..... 111 ..... 0100111 @r_nfvm
69
70
+vlxb_v ... 111 . ..... ..... 000 ..... 0000111 @r_nfvm
71
+vlxh_v ... 111 . ..... ..... 101 ..... 0000111 @r_nfvm
72
+vlxw_v ... 111 . ..... ..... 110 ..... 0000111 @r_nfvm
73
+vlxe_v ... 011 . ..... ..... 111 ..... 0000111 @r_nfvm
74
+vlxbu_v ... 011 . ..... ..... 000 ..... 0000111 @r_nfvm
75
+vlxhu_v ... 011 . ..... ..... 101 ..... 0000111 @r_nfvm
76
+vlxwu_v ... 011 . ..... ..... 110 ..... 0000111 @r_nfvm
77
+# Vector ordered-indexed and unordered-indexed store insns.
78
+vsxb_v ... -11 . ..... ..... 000 ..... 0100111 @r_nfvm
79
+vsxh_v ... -11 . ..... ..... 101 ..... 0100111 @r_nfvm
80
+vsxw_v ... -11 . ..... ..... 110 ..... 0100111 @r_nfvm
81
+vsxe_v ... -11 . ..... ..... 111 ..... 0100111 @r_nfvm
82
+
83
# *** new major opcode OP-V ***
84
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
85
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
86
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
87
index XXXXXXX..XXXXXXX 100644
88
--- a/target/riscv/insn_trans/trans_rvv.inc.c
89
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
90
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_TRANS(vssb_v, 0, rnfvm, st_stride_op, st_stride_check)
91
GEN_VEXT_TRANS(vssh_v, 1, rnfvm, st_stride_op, st_stride_check)
92
GEN_VEXT_TRANS(vssw_v, 2, rnfvm, st_stride_op, st_stride_check)
93
GEN_VEXT_TRANS(vsse_v, 3, rnfvm, st_stride_op, st_stride_check)
94
+
95
+/*
96
+ *** index load and store
97
+ */
98
+typedef void gen_helper_ldst_index(TCGv_ptr, TCGv_ptr, TCGv,
99
+ TCGv_ptr, TCGv_env, TCGv_i32);
100
+
101
+static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
102
+ uint32_t data, gen_helper_ldst_index *fn,
103
+ DisasContext *s)
104
+{
105
+ TCGv_ptr dest, mask, index;
106
+ TCGv base;
107
+ TCGv_i32 desc;
108
+
109
+ TCGLabel *over = gen_new_label();
110
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
111
+
112
+ dest = tcg_temp_new_ptr();
113
+ mask = tcg_temp_new_ptr();
114
+ index = tcg_temp_new_ptr();
115
+ base = tcg_temp_new();
116
+ desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
117
+
118
+ gen_get_gpr(base, rs1);
119
+ tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
120
+ tcg_gen_addi_ptr(index, cpu_env, vreg_ofs(s, vs2));
121
+ tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
122
+
123
+ fn(dest, mask, base, index, cpu_env, desc);
124
+
125
+ tcg_temp_free_ptr(dest);
126
+ tcg_temp_free_ptr(mask);
127
+ tcg_temp_free_ptr(index);
128
+ tcg_temp_free(base);
129
+ tcg_temp_free_i32(desc);
130
+ gen_set_label(over);
131
+ return true;
132
+}
133
+
134
+static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
135
+{
136
+ uint32_t data = 0;
137
+ gen_helper_ldst_index *fn;
138
+ static gen_helper_ldst_index * const fns[7][4] = {
139
+ { gen_helper_vlxb_v_b, gen_helper_vlxb_v_h,
140
+ gen_helper_vlxb_v_w, gen_helper_vlxb_v_d },
141
+ { NULL, gen_helper_vlxh_v_h,
142
+ gen_helper_vlxh_v_w, gen_helper_vlxh_v_d },
143
+ { NULL, NULL,
144
+ gen_helper_vlxw_v_w, gen_helper_vlxw_v_d },
145
+ { gen_helper_vlxe_v_b, gen_helper_vlxe_v_h,
146
+ gen_helper_vlxe_v_w, gen_helper_vlxe_v_d },
147
+ { gen_helper_vlxbu_v_b, gen_helper_vlxbu_v_h,
148
+ gen_helper_vlxbu_v_w, gen_helper_vlxbu_v_d },
149
+ { NULL, gen_helper_vlxhu_v_h,
150
+ gen_helper_vlxhu_v_w, gen_helper_vlxhu_v_d },
151
+ { NULL, NULL,
152
+ gen_helper_vlxwu_v_w, gen_helper_vlxwu_v_d },
153
+ };
154
+
155
+ fn = fns[seq][s->sew];
156
+ if (fn == NULL) {
157
+ return false;
158
+ }
159
+
160
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
161
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
162
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
163
+ data = FIELD_DP32(data, VDATA, NF, a->nf);
164
+ return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
165
+}
166
+
167
+static bool ld_index_check(DisasContext *s, arg_rnfvm* a)
168
+{
169
+ return (vext_check_isa_ill(s) &&
170
+ vext_check_overlap_mask(s, a->rd, a->vm, false) &&
171
+ vext_check_reg(s, a->rd, false) &&
172
+ vext_check_reg(s, a->rs2, false) &&
173
+ vext_check_nf(s, a->nf));
174
+}
175
+
176
+GEN_VEXT_TRANS(vlxb_v, 0, rnfvm, ld_index_op, ld_index_check)
177
+GEN_VEXT_TRANS(vlxh_v, 1, rnfvm, ld_index_op, ld_index_check)
178
+GEN_VEXT_TRANS(vlxw_v, 2, rnfvm, ld_index_op, ld_index_check)
179
+GEN_VEXT_TRANS(vlxe_v, 3, rnfvm, ld_index_op, ld_index_check)
180
+GEN_VEXT_TRANS(vlxbu_v, 4, rnfvm, ld_index_op, ld_index_check)
181
+GEN_VEXT_TRANS(vlxhu_v, 5, rnfvm, ld_index_op, ld_index_check)
182
+GEN_VEXT_TRANS(vlxwu_v, 6, rnfvm, ld_index_op, ld_index_check)
183
+
184
+static bool st_index_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
185
+{
186
+ uint32_t data = 0;
187
+ gen_helper_ldst_index *fn;
188
+ static gen_helper_ldst_index * const fns[4][4] = {
189
+ { gen_helper_vsxb_v_b, gen_helper_vsxb_v_h,
190
+ gen_helper_vsxb_v_w, gen_helper_vsxb_v_d },
191
+ { NULL, gen_helper_vsxh_v_h,
192
+ gen_helper_vsxh_v_w, gen_helper_vsxh_v_d },
193
+ { NULL, NULL,
194
+ gen_helper_vsxw_v_w, gen_helper_vsxw_v_d },
195
+ { gen_helper_vsxe_v_b, gen_helper_vsxe_v_h,
196
+ gen_helper_vsxe_v_w, gen_helper_vsxe_v_d }
197
+ };
198
+
199
+ fn = fns[seq][s->sew];
200
+ if (fn == NULL) {
201
+ return false;
202
+ }
203
+
204
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
205
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
206
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
207
+ data = FIELD_DP32(data, VDATA, NF, a->nf);
208
+ return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
209
+}
210
+
211
+static bool st_index_check(DisasContext *s, arg_rnfvm* a)
212
+{
213
+ return (vext_check_isa_ill(s) &&
214
+ vext_check_reg(s, a->rd, false) &&
215
+ vext_check_reg(s, a->rs2, false) &&
216
+ vext_check_nf(s, a->nf));
217
+}
218
+
219
+GEN_VEXT_TRANS(vsxb_v, 0, rnfvm, st_index_op, st_index_check)
220
+GEN_VEXT_TRANS(vsxh_v, 1, rnfvm, st_index_op, st_index_check)
221
+GEN_VEXT_TRANS(vsxw_v, 2, rnfvm, st_index_op, st_index_check)
222
+GEN_VEXT_TRANS(vsxe_v, 3, rnfvm, st_index_op, st_index_check)
223
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
224
index XXXXXXX..XXXXXXX 100644
225
--- a/target/riscv/vector_helper.c
226
+++ b/target/riscv/vector_helper.c
227
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_US(vse_v_b, int8_t, int8_t , ste_b)
228
GEN_VEXT_ST_US(vse_v_h, int16_t, int16_t, ste_h)
229
GEN_VEXT_ST_US(vse_v_w, int32_t, int32_t, ste_w)
230
GEN_VEXT_ST_US(vse_v_d, int64_t, int64_t, ste_d)
231
+
232
+/*
233
+ *** index: access vector element from indexed memory
234
+ */
235
+typedef target_ulong vext_get_index_addr(target_ulong base,
236
+ uint32_t idx, void *vs2);
237
+
238
+#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
239
+static target_ulong NAME(target_ulong base, \
240
+ uint32_t idx, void *vs2) \
241
+{ \
242
+ return (base + *((ETYPE *)vs2 + H(idx))); \
243
+}
244
+
245
+GEN_VEXT_GET_INDEX_ADDR(idx_b, int8_t, H1)
246
+GEN_VEXT_GET_INDEX_ADDR(idx_h, int16_t, H2)
247
+GEN_VEXT_GET_INDEX_ADDR(idx_w, int32_t, H4)
248
+GEN_VEXT_GET_INDEX_ADDR(idx_d, int64_t, H8)
249
+
250
+static inline void
251
+vext_ldst_index(void *vd, void *v0, target_ulong base,
252
+ void *vs2, CPURISCVState *env, uint32_t desc,
253
+ vext_get_index_addr get_index_addr,
254
+ vext_ldst_elem_fn *ldst_elem,
255
+ clear_fn *clear_elem,
256
+ uint32_t esz, uint32_t msz, uintptr_t ra,
257
+ MMUAccessType access_type)
258
+{
259
+ uint32_t i, k;
260
+ uint32_t nf = vext_nf(desc);
261
+ uint32_t vm = vext_vm(desc);
262
+ uint32_t mlen = vext_mlen(desc);
263
+ uint32_t vlmax = vext_maxsz(desc) / esz;
264
+
265
+ /* probe every access*/
266
+ for (i = 0; i < env->vl; i++) {
267
+ if (!vm && !vext_elem_mask(v0, mlen, i)) {
268
+ continue;
269
+ }
270
+ probe_pages(env, get_index_addr(base, i, vs2), nf * msz, ra,
271
+ access_type);
272
+ }
273
+ /* load bytes from guest memory */
274
+ for (i = 0; i < env->vl; i++) {
275
+ k = 0;
276
+ if (!vm && !vext_elem_mask(v0, mlen, i)) {
277
+ continue;
278
+ }
279
+ while (k < nf) {
280
+ abi_ptr addr = get_index_addr(base, i, vs2) + k * msz;
281
+ ldst_elem(env, addr, i + k * vlmax, vd, ra);
282
+ k++;
283
+ }
284
+ }
285
+ /* clear tail elements */
286
+ if (clear_elem) {
287
+ for (k = 0; k < nf; k++) {
288
+ clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
289
+ }
290
+ }
291
+}
292
+
293
+#define GEN_VEXT_LD_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, LOAD_FN, CLEAR_FN) \
294
+void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
295
+ void *vs2, CPURISCVState *env, uint32_t desc) \
296
+{ \
297
+ vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
298
+ LOAD_FN, CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \
299
+ GETPC(), MMU_DATA_LOAD); \
300
+}
301
+
302
+GEN_VEXT_LD_INDEX(vlxb_v_b, int8_t, int8_t, idx_b, ldb_b, clearb)
303
+GEN_VEXT_LD_INDEX(vlxb_v_h, int8_t, int16_t, idx_h, ldb_h, clearh)
304
+GEN_VEXT_LD_INDEX(vlxb_v_w, int8_t, int32_t, idx_w, ldb_w, clearl)
305
+GEN_VEXT_LD_INDEX(vlxb_v_d, int8_t, int64_t, idx_d, ldb_d, clearq)
306
+GEN_VEXT_LD_INDEX(vlxh_v_h, int16_t, int16_t, idx_h, ldh_h, clearh)
307
+GEN_VEXT_LD_INDEX(vlxh_v_w, int16_t, int32_t, idx_w, ldh_w, clearl)
308
+GEN_VEXT_LD_INDEX(vlxh_v_d, int16_t, int64_t, idx_d, ldh_d, clearq)
309
+GEN_VEXT_LD_INDEX(vlxw_v_w, int32_t, int32_t, idx_w, ldw_w, clearl)
310
+GEN_VEXT_LD_INDEX(vlxw_v_d, int32_t, int64_t, idx_d, ldw_d, clearq)
311
+GEN_VEXT_LD_INDEX(vlxe_v_b, int8_t, int8_t, idx_b, lde_b, clearb)
312
+GEN_VEXT_LD_INDEX(vlxe_v_h, int16_t, int16_t, idx_h, lde_h, clearh)
313
+GEN_VEXT_LD_INDEX(vlxe_v_w, int32_t, int32_t, idx_w, lde_w, clearl)
314
+GEN_VEXT_LD_INDEX(vlxe_v_d, int64_t, int64_t, idx_d, lde_d, clearq)
315
+GEN_VEXT_LD_INDEX(vlxbu_v_b, uint8_t, uint8_t, idx_b, ldbu_b, clearb)
316
+GEN_VEXT_LD_INDEX(vlxbu_v_h, uint8_t, uint16_t, idx_h, ldbu_h, clearh)
317
+GEN_VEXT_LD_INDEX(vlxbu_v_w, uint8_t, uint32_t, idx_w, ldbu_w, clearl)
318
+GEN_VEXT_LD_INDEX(vlxbu_v_d, uint8_t, uint64_t, idx_d, ldbu_d, clearq)
319
+GEN_VEXT_LD_INDEX(vlxhu_v_h, uint16_t, uint16_t, idx_h, ldhu_h, clearh)
320
+GEN_VEXT_LD_INDEX(vlxhu_v_w, uint16_t, uint32_t, idx_w, ldhu_w, clearl)
321
+GEN_VEXT_LD_INDEX(vlxhu_v_d, uint16_t, uint64_t, idx_d, ldhu_d, clearq)
322
+GEN_VEXT_LD_INDEX(vlxwu_v_w, uint32_t, uint32_t, idx_w, ldwu_w, clearl)
323
+GEN_VEXT_LD_INDEX(vlxwu_v_d, uint32_t, uint64_t, idx_d, ldwu_d, clearq)
324
+
325
+#define GEN_VEXT_ST_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, STORE_FN)\
326
+void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
327
+ void *vs2, CPURISCVState *env, uint32_t desc) \
328
+{ \
329
+ vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
330
+ STORE_FN, NULL, sizeof(ETYPE), sizeof(MTYPE),\
331
+ GETPC(), MMU_DATA_STORE); \
332
+}
333
+
334
+GEN_VEXT_ST_INDEX(vsxb_v_b, int8_t, int8_t, idx_b, stb_b)
335
+GEN_VEXT_ST_INDEX(vsxb_v_h, int8_t, int16_t, idx_h, stb_h)
336
+GEN_VEXT_ST_INDEX(vsxb_v_w, int8_t, int32_t, idx_w, stb_w)
337
+GEN_VEXT_ST_INDEX(vsxb_v_d, int8_t, int64_t, idx_d, stb_d)
338
+GEN_VEXT_ST_INDEX(vsxh_v_h, int16_t, int16_t, idx_h, sth_h)
339
+GEN_VEXT_ST_INDEX(vsxh_v_w, int16_t, int32_t, idx_w, sth_w)
340
+GEN_VEXT_ST_INDEX(vsxh_v_d, int16_t, int64_t, idx_d, sth_d)
341
+GEN_VEXT_ST_INDEX(vsxw_v_w, int32_t, int32_t, idx_w, stw_w)
342
+GEN_VEXT_ST_INDEX(vsxw_v_d, int32_t, int64_t, idx_d, stw_d)
343
+GEN_VEXT_ST_INDEX(vsxe_v_b, int8_t, int8_t, idx_b, ste_b)
344
+GEN_VEXT_ST_INDEX(vsxe_v_h, int16_t, int16_t, idx_h, ste_h)
345
+GEN_VEXT_ST_INDEX(vsxe_v_w, int32_t, int32_t, idx_w, ste_w)
346
+GEN_VEXT_ST_INDEX(vsxe_v_d, int64_t, int64_t, idx_d, ste_d)
347
--
348
2.27.0
349
350
diff view generated by jsdifflib
Deleted patch
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
2
1
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Message-id: 20200623215920.2594-14-zhiwei_liu@c-sky.com
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
9
target/riscv/helper.h | 25 ++++++++++++
10
target/riscv/insn32.decode | 9 +++++
11
target/riscv/insn_trans/trans_rvv.inc.c | 11 ++++++
12
target/riscv/vector_helper.c | 51 +++++++++++++++++++++++++
13
4 files changed, 96 insertions(+)
14
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
18
+++ b/target/riscv/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmsbc_vxm_b, void, ptr, ptr, tl, ptr, env, i32)
20
DEF_HELPER_6(vmsbc_vxm_h, void, ptr, ptr, tl, ptr, env, i32)
21
DEF_HELPER_6(vmsbc_vxm_w, void, ptr, ptr, tl, ptr, env, i32)
22
DEF_HELPER_6(vmsbc_vxm_d, void, ptr, ptr, tl, ptr, env, i32)
23
+
24
+DEF_HELPER_6(vand_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
25
+DEF_HELPER_6(vand_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vand_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vand_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vor_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vor_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vor_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_6(vor_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_6(vxor_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
33
+DEF_HELPER_6(vxor_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
34
+DEF_HELPER_6(vxor_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
35
+DEF_HELPER_6(vxor_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
36
+DEF_HELPER_6(vand_vx_b, void, ptr, ptr, tl, ptr, env, i32)
37
+DEF_HELPER_6(vand_vx_h, void, ptr, ptr, tl, ptr, env, i32)
38
+DEF_HELPER_6(vand_vx_w, void, ptr, ptr, tl, ptr, env, i32)
39
+DEF_HELPER_6(vand_vx_d, void, ptr, ptr, tl, ptr, env, i32)
40
+DEF_HELPER_6(vor_vx_b, void, ptr, ptr, tl, ptr, env, i32)
41
+DEF_HELPER_6(vor_vx_h, void, ptr, ptr, tl, ptr, env, i32)
42
+DEF_HELPER_6(vor_vx_w, void, ptr, ptr, tl, ptr, env, i32)
43
+DEF_HELPER_6(vor_vx_d, void, ptr, ptr, tl, ptr, env, i32)
44
+DEF_HELPER_6(vxor_vx_b, void, ptr, ptr, tl, ptr, env, i32)
45
+DEF_HELPER_6(vxor_vx_h, void, ptr, ptr, tl, ptr, env, i32)
46
+DEF_HELPER_6(vxor_vx_w, void, ptr, ptr, tl, ptr, env, i32)
47
+DEF_HELPER_6(vxor_vx_d, void, ptr, ptr, tl, ptr, env, i32)
48
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/riscv/insn32.decode
51
+++ b/target/riscv/insn32.decode
52
@@ -XXX,XX +XXX,XX @@ vsbc_vvm 010010 1 ..... ..... 000 ..... 1010111 @r_vm_1
53
vsbc_vxm 010010 1 ..... ..... 100 ..... 1010111 @r_vm_1
54
vmsbc_vvm 010011 1 ..... ..... 000 ..... 1010111 @r_vm_1
55
vmsbc_vxm 010011 1 ..... ..... 100 ..... 1010111 @r_vm_1
56
+vand_vv 001001 . ..... ..... 000 ..... 1010111 @r_vm
57
+vand_vx 001001 . ..... ..... 100 ..... 1010111 @r_vm
58
+vand_vi 001001 . ..... ..... 011 ..... 1010111 @r_vm
59
+vor_vv 001010 . ..... ..... 000 ..... 1010111 @r_vm
60
+vor_vx 001010 . ..... ..... 100 ..... 1010111 @r_vm
61
+vor_vi 001010 . ..... ..... 011 ..... 1010111 @r_vm
62
+vxor_vv 001011 . ..... ..... 000 ..... 1010111 @r_vm
63
+vxor_vx 001011 . ..... ..... 100 ..... 1010111 @r_vm
64
+vxor_vi 001011 . ..... ..... 011 ..... 1010111 @r_vm
65
66
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
67
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
68
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/target/riscv/insn_trans/trans_rvv.inc.c
71
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
72
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
73
74
GEN_OPIVI_TRANS(vadc_vim, 0, vadc_vxm, opivx_vadc_check)
75
GEN_OPIVI_TRANS(vmadc_vim, 0, vmadc_vxm, opivx_vmadc_check)
76
+
77
+/* Vector Bitwise Logical Instructions */
78
+GEN_OPIVV_GVEC_TRANS(vand_vv, and)
79
+GEN_OPIVV_GVEC_TRANS(vor_vv, or)
80
+GEN_OPIVV_GVEC_TRANS(vxor_vv, xor)
81
+GEN_OPIVX_GVEC_TRANS(vand_vx, ands)
82
+GEN_OPIVX_GVEC_TRANS(vor_vx, ors)
83
+GEN_OPIVX_GVEC_TRANS(vxor_vx, xors)
84
+GEN_OPIVI_GVEC_TRANS(vand_vi, 0, vand_vx, andi)
85
+GEN_OPIVI_GVEC_TRANS(vor_vi, 0, vor_vx, ori)
86
+GEN_OPIVI_GVEC_TRANS(vxor_vi, 0, vxor_vx, xori)
87
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
88
index XXXXXXX..XXXXXXX 100644
89
--- a/target/riscv/vector_helper.c
90
+++ b/target/riscv/vector_helper.c
91
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
92
GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
93
GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
94
GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
95
+
96
+/* Vector Bitwise Logical Instructions */
97
+RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
98
+RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
99
+RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
100
+RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
101
+RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
102
+RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
103
+RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
104
+RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
105
+RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
106
+RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
107
+RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
108
+RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
109
+GEN_VEXT_VV(vand_vv_b, 1, 1, clearb)
110
+GEN_VEXT_VV(vand_vv_h, 2, 2, clearh)
111
+GEN_VEXT_VV(vand_vv_w, 4, 4, clearl)
112
+GEN_VEXT_VV(vand_vv_d, 8, 8, clearq)
113
+GEN_VEXT_VV(vor_vv_b, 1, 1, clearb)
114
+GEN_VEXT_VV(vor_vv_h, 2, 2, clearh)
115
+GEN_VEXT_VV(vor_vv_w, 4, 4, clearl)
116
+GEN_VEXT_VV(vor_vv_d, 8, 8, clearq)
117
+GEN_VEXT_VV(vxor_vv_b, 1, 1, clearb)
118
+GEN_VEXT_VV(vxor_vv_h, 2, 2, clearh)
119
+GEN_VEXT_VV(vxor_vv_w, 4, 4, clearl)
120
+GEN_VEXT_VV(vxor_vv_d, 8, 8, clearq)
121
+
122
+RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
123
+RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
124
+RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
125
+RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
126
+RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
127
+RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
128
+RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
129
+RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
130
+RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
131
+RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
132
+RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
133
+RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
134
+GEN_VEXT_VX(vand_vx_b, 1, 1, clearb)
135
+GEN_VEXT_VX(vand_vx_h, 2, 2, clearh)
136
+GEN_VEXT_VX(vand_vx_w, 4, 4, clearl)
137
+GEN_VEXT_VX(vand_vx_d, 8, 8, clearq)
138
+GEN_VEXT_VX(vor_vx_b, 1, 1, clearb)
139
+GEN_VEXT_VX(vor_vx_h, 2, 2, clearh)
140
+GEN_VEXT_VX(vor_vx_w, 4, 4, clearl)
141
+GEN_VEXT_VX(vor_vx_d, 8, 8, clearq)
142
+GEN_VEXT_VX(vxor_vx_b, 1, 1, clearb)
143
+GEN_VEXT_VX(vxor_vx_h, 2, 2, clearh)
144
+GEN_VEXT_VX(vxor_vx_w, 4, 4, clearl)
145
+GEN_VEXT_VX(vxor_vx_d, 8, 8, clearq)
146
--
147
2.27.0
148
149
diff view generated by jsdifflib
Deleted patch
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
2
1
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Message-id: 20200623215920.2594-16-zhiwei_liu@c-sky.com
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
9
target/riscv/helper.h | 13 ++++
10
target/riscv/insn32.decode | 6 ++
11
target/riscv/insn_trans/trans_rvv.inc.c | 90 +++++++++++++++++++++++++
12
target/riscv/vector_helper.c | 14 ++++
13
4 files changed, 123 insertions(+)
14
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
18
+++ b/target/riscv/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vsra_vx_b, void, ptr, ptr, tl, ptr, env, i32)
20
DEF_HELPER_6(vsra_vx_h, void, ptr, ptr, tl, ptr, env, i32)
21
DEF_HELPER_6(vsra_vx_w, void, ptr, ptr, tl, ptr, env, i32)
22
DEF_HELPER_6(vsra_vx_d, void, ptr, ptr, tl, ptr, env, i32)
23
+
24
+DEF_HELPER_6(vnsrl_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
25
+DEF_HELPER_6(vnsrl_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vnsrl_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vnsra_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vnsra_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vnsra_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vnsrl_vx_b, void, ptr, ptr, tl, ptr, env, i32)
31
+DEF_HELPER_6(vnsrl_vx_h, void, ptr, ptr, tl, ptr, env, i32)
32
+DEF_HELPER_6(vnsrl_vx_w, void, ptr, ptr, tl, ptr, env, i32)
33
+DEF_HELPER_6(vnsra_vx_b, void, ptr, ptr, tl, ptr, env, i32)
34
+DEF_HELPER_6(vnsra_vx_h, void, ptr, ptr, tl, ptr, env, i32)
35
+DEF_HELPER_6(vnsra_vx_w, void, ptr, ptr, tl, ptr, env, i32)
36
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/riscv/insn32.decode
39
+++ b/target/riscv/insn32.decode
40
@@ -XXX,XX +XXX,XX @@ vsrl_vi 101000 . ..... ..... 011 ..... 1010111 @r_vm
41
vsra_vv 101001 . ..... ..... 000 ..... 1010111 @r_vm
42
vsra_vx 101001 . ..... ..... 100 ..... 1010111 @r_vm
43
vsra_vi 101001 . ..... ..... 011 ..... 1010111 @r_vm
44
+vnsrl_vv 101100 . ..... ..... 000 ..... 1010111 @r_vm
45
+vnsrl_vx 101100 . ..... ..... 100 ..... 1010111 @r_vm
46
+vnsrl_vi 101100 . ..... ..... 011 ..... 1010111 @r_vm
47
+vnsra_vv 101101 . ..... ..... 000 ..... 1010111 @r_vm
48
+vnsra_vx 101101 . ..... ..... 100 ..... 1010111 @r_vm
49
+vnsra_vi 101101 . ..... ..... 011 ..... 1010111 @r_vm
50
51
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
52
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
53
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
54
index XXXXXXX..XXXXXXX 100644
55
--- a/target/riscv/insn_trans/trans_rvv.inc.c
56
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
57
@@ -XXX,XX +XXX,XX @@ GEN_OPIVX_GVEC_SHIFT_TRANS(vsra_vx, sars)
58
GEN_OPIVI_GVEC_TRANS(vsll_vi, 1, vsll_vx, shli)
59
GEN_OPIVI_GVEC_TRANS(vsrl_vi, 1, vsrl_vx, shri)
60
GEN_OPIVI_GVEC_TRANS(vsra_vi, 1, vsra_vx, sari)
61
+
62
+/* Vector Narrowing Integer Right Shift Instructions */
63
+static bool opivv_narrow_check(DisasContext *s, arg_rmrr *a)
64
+{
65
+ return (vext_check_isa_ill(s) &&
66
+ vext_check_overlap_mask(s, a->rd, a->vm, false) &&
67
+ vext_check_reg(s, a->rd, false) &&
68
+ vext_check_reg(s, a->rs2, true) &&
69
+ vext_check_reg(s, a->rs1, false) &&
70
+ vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2,
71
+ 2 << s->lmul) &&
72
+ (s->lmul < 0x3) && (s->sew < 0x3));
73
+}
74
+
75
+/* OPIVV with NARROW */
76
+#define GEN_OPIVV_NARROW_TRANS(NAME) \
77
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
78
+{ \
79
+ if (opivv_narrow_check(s, a)) { \
80
+ uint32_t data = 0; \
81
+ static gen_helper_gvec_4_ptr * const fns[3] = { \
82
+ gen_helper_##NAME##_b, \
83
+ gen_helper_##NAME##_h, \
84
+ gen_helper_##NAME##_w, \
85
+ }; \
86
+ TCGLabel *over = gen_new_label(); \
87
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
88
+ \
89
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
90
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
91
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
92
+ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
93
+ vreg_ofs(s, a->rs1), \
94
+ vreg_ofs(s, a->rs2), cpu_env, 0, \
95
+ s->vlen / 8, data, fns[s->sew]); \
96
+ gen_set_label(over); \
97
+ return true; \
98
+ } \
99
+ return false; \
100
+}
101
+GEN_OPIVV_NARROW_TRANS(vnsra_vv)
102
+GEN_OPIVV_NARROW_TRANS(vnsrl_vv)
103
+
104
+static bool opivx_narrow_check(DisasContext *s, arg_rmrr *a)
105
+{
106
+ return (vext_check_isa_ill(s) &&
107
+ vext_check_overlap_mask(s, a->rd, a->vm, false) &&
108
+ vext_check_reg(s, a->rd, false) &&
109
+ vext_check_reg(s, a->rs2, true) &&
110
+ vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2,
111
+ 2 << s->lmul) &&
112
+ (s->lmul < 0x3) && (s->sew < 0x3));
113
+}
114
+
115
+/* OPIVX with NARROW */
116
+#define GEN_OPIVX_NARROW_TRANS(NAME) \
117
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
118
+{ \
119
+ if (opivx_narrow_check(s, a)) { \
120
+ static gen_helper_opivx * const fns[3] = { \
121
+ gen_helper_##NAME##_b, \
122
+ gen_helper_##NAME##_h, \
123
+ gen_helper_##NAME##_w, \
124
+ }; \
125
+ return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\
126
+ } \
127
+ return false; \
128
+}
129
+
130
+GEN_OPIVX_NARROW_TRANS(vnsra_vx)
131
+GEN_OPIVX_NARROW_TRANS(vnsrl_vx)
132
+
133
+/* OPIVI with NARROW */
134
+#define GEN_OPIVI_NARROW_TRANS(NAME, ZX, OPIVX) \
135
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
136
+{ \
137
+ if (opivx_narrow_check(s, a)) { \
138
+ static gen_helper_opivx * const fns[3] = { \
139
+ gen_helper_##OPIVX##_b, \
140
+ gen_helper_##OPIVX##_h, \
141
+ gen_helper_##OPIVX##_w, \
142
+ }; \
143
+ return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, \
144
+ fns[s->sew], s, ZX); \
145
+ } \
146
+ return false; \
147
+}
148
+
149
+GEN_OPIVI_NARROW_TRANS(vnsra_vi, 1, vnsra_vx)
150
+GEN_OPIVI_NARROW_TRANS(vnsrl_vi, 1, vnsrl_vx)
151
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
152
index XXXXXXX..XXXXXXX 100644
153
--- a/target/riscv/vector_helper.c
154
+++ b/target/riscv/vector_helper.c
155
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7, clearb)
156
GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh)
157
GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl)
158
GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq)
159
+
160
+/* Vector Narrowing Integer Right Shift Instructions */
161
+GEN_VEXT_SHIFT_VV(vnsrl_vv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf, clearb)
162
+GEN_VEXT_SHIFT_VV(vnsrl_vv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f, clearh)
163
+GEN_VEXT_SHIFT_VV(vnsrl_vv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl)
164
+GEN_VEXT_SHIFT_VV(vnsra_vv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf, clearb)
165
+GEN_VEXT_SHIFT_VV(vnsra_vv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh)
166
+GEN_VEXT_SHIFT_VV(vnsra_vv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl)
167
+GEN_VEXT_SHIFT_VX(vnsrl_vx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf, clearb)
168
+GEN_VEXT_SHIFT_VX(vnsrl_vx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f, clearh)
169
+GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl)
170
+GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf, clearb)
171
+GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh)
172
+GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl)
173
--
174
2.27.0
175
176
diff view generated by jsdifflib
Deleted patch
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
2
1
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Message-id: 20200623215920.2594-17-zhiwei_liu@c-sky.com
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
9
target/riscv/helper.h | 57 +++++++++++
10
target/riscv/insn32.decode | 20 ++++
11
target/riscv/insn_trans/trans_rvv.inc.c | 46 +++++++++
12
target/riscv/vector_helper.c | 123 ++++++++++++++++++++++++
13
4 files changed, 246 insertions(+)
14
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
18
+++ b/target/riscv/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vnsrl_vx_w, void, ptr, ptr, tl, ptr, env, i32)
20
DEF_HELPER_6(vnsra_vx_b, void, ptr, ptr, tl, ptr, env, i32)
21
DEF_HELPER_6(vnsra_vx_h, void, ptr, ptr, tl, ptr, env, i32)
22
DEF_HELPER_6(vnsra_vx_w, void, ptr, ptr, tl, ptr, env, i32)
23
+
24
+DEF_HELPER_6(vmseq_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
25
+DEF_HELPER_6(vmseq_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vmseq_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vmseq_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vmsne_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vmsne_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vmsne_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_6(vmsne_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_6(vmsltu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
33
+DEF_HELPER_6(vmsltu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
34
+DEF_HELPER_6(vmsltu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
35
+DEF_HELPER_6(vmsltu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
36
+DEF_HELPER_6(vmslt_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
37
+DEF_HELPER_6(vmslt_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
38
+DEF_HELPER_6(vmslt_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
39
+DEF_HELPER_6(vmslt_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
40
+DEF_HELPER_6(vmsleu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
41
+DEF_HELPER_6(vmsleu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
42
+DEF_HELPER_6(vmsleu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
43
+DEF_HELPER_6(vmsleu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
44
+DEF_HELPER_6(vmsle_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
45
+DEF_HELPER_6(vmsle_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
46
+DEF_HELPER_6(vmsle_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
47
+DEF_HELPER_6(vmsle_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
48
+DEF_HELPER_6(vmseq_vx_b, void, ptr, ptr, tl, ptr, env, i32)
49
+DEF_HELPER_6(vmseq_vx_h, void, ptr, ptr, tl, ptr, env, i32)
50
+DEF_HELPER_6(vmseq_vx_w, void, ptr, ptr, tl, ptr, env, i32)
51
+DEF_HELPER_6(vmseq_vx_d, void, ptr, ptr, tl, ptr, env, i32)
52
+DEF_HELPER_6(vmsne_vx_b, void, ptr, ptr, tl, ptr, env, i32)
53
+DEF_HELPER_6(vmsne_vx_h, void, ptr, ptr, tl, ptr, env, i32)
54
+DEF_HELPER_6(vmsne_vx_w, void, ptr, ptr, tl, ptr, env, i32)
55
+DEF_HELPER_6(vmsne_vx_d, void, ptr, ptr, tl, ptr, env, i32)
56
+DEF_HELPER_6(vmsltu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
57
+DEF_HELPER_6(vmsltu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
58
+DEF_HELPER_6(vmsltu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
59
+DEF_HELPER_6(vmsltu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
60
+DEF_HELPER_6(vmslt_vx_b, void, ptr, ptr, tl, ptr, env, i32)
61
+DEF_HELPER_6(vmslt_vx_h, void, ptr, ptr, tl, ptr, env, i32)
62
+DEF_HELPER_6(vmslt_vx_w, void, ptr, ptr, tl, ptr, env, i32)
63
+DEF_HELPER_6(vmslt_vx_d, void, ptr, ptr, tl, ptr, env, i32)
64
+DEF_HELPER_6(vmsleu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
65
+DEF_HELPER_6(vmsleu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
66
+DEF_HELPER_6(vmsleu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
67
+DEF_HELPER_6(vmsleu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
68
+DEF_HELPER_6(vmsle_vx_b, void, ptr, ptr, tl, ptr, env, i32)
69
+DEF_HELPER_6(vmsle_vx_h, void, ptr, ptr, tl, ptr, env, i32)
70
+DEF_HELPER_6(vmsle_vx_w, void, ptr, ptr, tl, ptr, env, i32)
71
+DEF_HELPER_6(vmsle_vx_d, void, ptr, ptr, tl, ptr, env, i32)
72
+DEF_HELPER_6(vmsgtu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
73
+DEF_HELPER_6(vmsgtu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
74
+DEF_HELPER_6(vmsgtu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
75
+DEF_HELPER_6(vmsgtu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
76
+DEF_HELPER_6(vmsgt_vx_b, void, ptr, ptr, tl, ptr, env, i32)
77
+DEF_HELPER_6(vmsgt_vx_h, void, ptr, ptr, tl, ptr, env, i32)
78
+DEF_HELPER_6(vmsgt_vx_w, void, ptr, ptr, tl, ptr, env, i32)
79
+DEF_HELPER_6(vmsgt_vx_d, void, ptr, ptr, tl, ptr, env, i32)
80
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
81
index XXXXXXX..XXXXXXX 100644
82
--- a/target/riscv/insn32.decode
83
+++ b/target/riscv/insn32.decode
84
@@ -XXX,XX +XXX,XX @@ vnsrl_vi 101100 . ..... ..... 011 ..... 1010111 @r_vm
85
vnsra_vv 101101 . ..... ..... 000 ..... 1010111 @r_vm
86
vnsra_vx 101101 . ..... ..... 100 ..... 1010111 @r_vm
87
vnsra_vi 101101 . ..... ..... 011 ..... 1010111 @r_vm
88
+vmseq_vv 011000 . ..... ..... 000 ..... 1010111 @r_vm
89
+vmseq_vx 011000 . ..... ..... 100 ..... 1010111 @r_vm
90
+vmseq_vi 011000 . ..... ..... 011 ..... 1010111 @r_vm
91
+vmsne_vv 011001 . ..... ..... 000 ..... 1010111 @r_vm
92
+vmsne_vx 011001 . ..... ..... 100 ..... 1010111 @r_vm
93
+vmsne_vi 011001 . ..... ..... 011 ..... 1010111 @r_vm
94
+vmsltu_vv 011010 . ..... ..... 000 ..... 1010111 @r_vm
95
+vmsltu_vx 011010 . ..... ..... 100 ..... 1010111 @r_vm
96
+vmslt_vv 011011 . ..... ..... 000 ..... 1010111 @r_vm
97
+vmslt_vx 011011 . ..... ..... 100 ..... 1010111 @r_vm
98
+vmsleu_vv 011100 . ..... ..... 000 ..... 1010111 @r_vm
99
+vmsleu_vx 011100 . ..... ..... 100 ..... 1010111 @r_vm
100
+vmsleu_vi 011100 . ..... ..... 011 ..... 1010111 @r_vm
101
+vmsle_vv 011101 . ..... ..... 000 ..... 1010111 @r_vm
102
+vmsle_vx 011101 . ..... ..... 100 ..... 1010111 @r_vm
103
+vmsle_vi 011101 . ..... ..... 011 ..... 1010111 @r_vm
104
+vmsgtu_vx 011110 . ..... ..... 100 ..... 1010111 @r_vm
105
+vmsgtu_vi 011110 . ..... ..... 011 ..... 1010111 @r_vm
106
+vmsgt_vx 011111 . ..... ..... 100 ..... 1010111 @r_vm
107
+vmsgt_vi 011111 . ..... ..... 011 ..... 1010111 @r_vm
108
109
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
110
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
111
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
112
index XXXXXXX..XXXXXXX 100644
113
--- a/target/riscv/insn_trans/trans_rvv.inc.c
114
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
115
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
116
117
GEN_OPIVI_NARROW_TRANS(vnsra_vi, 1, vnsra_vx)
118
GEN_OPIVI_NARROW_TRANS(vnsrl_vi, 1, vnsrl_vx)
119
+
120
+/* Vector Integer Comparison Instructions */
121
+/*
122
+ * For all comparison instructions, an illegal instruction exception is raised
123
+ * if the destination vector register overlaps a source vector register group
124
+ * and LMUL > 1.
125
+ */
126
+static bool opivv_cmp_check(DisasContext *s, arg_rmrr *a)
127
+{
128
+ return (vext_check_isa_ill(s) &&
129
+ vext_check_reg(s, a->rs2, false) &&
130
+ vext_check_reg(s, a->rs1, false) &&
131
+ ((vext_check_overlap_group(a->rd, 1, a->rs1, 1 << s->lmul) &&
132
+ vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)) ||
133
+ (s->lmul == 0)));
134
+}
135
+GEN_OPIVV_TRANS(vmseq_vv, opivv_cmp_check)
136
+GEN_OPIVV_TRANS(vmsne_vv, opivv_cmp_check)
137
+GEN_OPIVV_TRANS(vmsltu_vv, opivv_cmp_check)
138
+GEN_OPIVV_TRANS(vmslt_vv, opivv_cmp_check)
139
+GEN_OPIVV_TRANS(vmsleu_vv, opivv_cmp_check)
140
+GEN_OPIVV_TRANS(vmsle_vv, opivv_cmp_check)
141
+
142
+static bool opivx_cmp_check(DisasContext *s, arg_rmrr *a)
143
+{
144
+ return (vext_check_isa_ill(s) &&
145
+ vext_check_reg(s, a->rs2, false) &&
146
+ (vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul) ||
147
+ (s->lmul == 0)));
148
+}
149
+
150
+GEN_OPIVX_TRANS(vmseq_vx, opivx_cmp_check)
151
+GEN_OPIVX_TRANS(vmsne_vx, opivx_cmp_check)
152
+GEN_OPIVX_TRANS(vmsltu_vx, opivx_cmp_check)
153
+GEN_OPIVX_TRANS(vmslt_vx, opivx_cmp_check)
154
+GEN_OPIVX_TRANS(vmsleu_vx, opivx_cmp_check)
155
+GEN_OPIVX_TRANS(vmsle_vx, opivx_cmp_check)
156
+GEN_OPIVX_TRANS(vmsgtu_vx, opivx_cmp_check)
157
+GEN_OPIVX_TRANS(vmsgt_vx, opivx_cmp_check)
158
+
159
+GEN_OPIVI_TRANS(vmseq_vi, 0, vmseq_vx, opivx_cmp_check)
160
+GEN_OPIVI_TRANS(vmsne_vi, 0, vmsne_vx, opivx_cmp_check)
161
+GEN_OPIVI_TRANS(vmsleu_vi, 1, vmsleu_vx, opivx_cmp_check)
162
+GEN_OPIVI_TRANS(vmsle_vi, 0, vmsle_vx, opivx_cmp_check)
163
+GEN_OPIVI_TRANS(vmsgtu_vi, 1, vmsgtu_vx, opivx_cmp_check)
164
+GEN_OPIVI_TRANS(vmsgt_vi, 0, vmsgt_vx, opivx_cmp_check)
165
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
166
index XXXXXXX..XXXXXXX 100644
167
--- a/target/riscv/vector_helper.c
168
+++ b/target/riscv/vector_helper.c
169
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl)
170
GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf, clearb)
171
GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh)
172
GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl)
173
+
174
+/* Vector Integer Comparison Instructions */
175
+#define DO_MSEQ(N, M) (N == M)
176
+#define DO_MSNE(N, M) (N != M)
177
+#define DO_MSLT(N, M) (N < M)
178
+#define DO_MSLE(N, M) (N <= M)
179
+#define DO_MSGT(N, M) (N > M)
180
+
181
+#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
182
+void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
183
+ CPURISCVState *env, uint32_t desc) \
184
+{ \
185
+ uint32_t mlen = vext_mlen(desc); \
186
+ uint32_t vm = vext_vm(desc); \
187
+ uint32_t vl = env->vl; \
188
+ uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \
189
+ uint32_t i; \
190
+ \
191
+ for (i = 0; i < vl; i++) { \
192
+ ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
193
+ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
194
+ if (!vm && !vext_elem_mask(v0, mlen, i)) { \
195
+ continue; \
196
+ } \
197
+ vext_set_elem_mask(vd, mlen, i, DO_OP(s2, s1)); \
198
+ } \
199
+ for (; i < vlmax; i++) { \
200
+ vext_set_elem_mask(vd, mlen, i, 0); \
201
+ } \
202
+}
203
+
204
+GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
205
+GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
206
+GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
207
+GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
208
+
209
+GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
210
+GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
211
+GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
212
+GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
213
+
214
+GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
215
+GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
216
+GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
217
+GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
218
+
219
+GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
220
+GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
221
+GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
222
+GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
223
+
224
+GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
225
+GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
226
+GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
227
+GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
228
+
229
+GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
230
+GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
231
+GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
232
+GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
233
+
234
+#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
235
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
236
+ CPURISCVState *env, uint32_t desc) \
237
+{ \
238
+ uint32_t mlen = vext_mlen(desc); \
239
+ uint32_t vm = vext_vm(desc); \
240
+ uint32_t vl = env->vl; \
241
+ uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \
242
+ uint32_t i; \
243
+ \
244
+ for (i = 0; i < vl; i++) { \
245
+ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
246
+ if (!vm && !vext_elem_mask(v0, mlen, i)) { \
247
+ continue; \
248
+ } \
249
+ vext_set_elem_mask(vd, mlen, i, \
250
+ DO_OP(s2, (ETYPE)(target_long)s1)); \
251
+ } \
252
+ for (; i < vlmax; i++) { \
253
+ vext_set_elem_mask(vd, mlen, i, 0); \
254
+ } \
255
+}
256
+
257
+GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
258
+GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
259
+GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
260
+GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
261
+
262
+GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
263
+GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
264
+GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
265
+GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
266
+
267
+GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
268
+GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
269
+GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
270
+GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
271
+
272
+GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
273
+GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
274
+GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
275
+GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
276
+
277
+GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
278
+GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
279
+GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
280
+GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
281
+
282
+GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
283
+GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
284
+GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
285
+GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
286
+
287
+GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
288
+GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
289
+GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
290
+GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
291
+
292
+GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
293
+GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
294
+GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
295
+GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
296
--
297
2.27.0
298
299
diff view generated by jsdifflib
Deleted patch
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
2
1
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Message-id: 20200623215920.2594-18-zhiwei_liu@c-sky.com
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
9
target/riscv/helper.h | 33 ++++++++++++
10
target/riscv/insn32.decode | 8 +++
11
target/riscv/insn_trans/trans_rvv.inc.c | 10 ++++
12
target/riscv/vector_helper.c | 71 +++++++++++++++++++++++++
13
4 files changed, 122 insertions(+)
14
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
18
+++ b/target/riscv/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmsgt_vx_b, void, ptr, ptr, tl, ptr, env, i32)
20
DEF_HELPER_6(vmsgt_vx_h, void, ptr, ptr, tl, ptr, env, i32)
21
DEF_HELPER_6(vmsgt_vx_w, void, ptr, ptr, tl, ptr, env, i32)
22
DEF_HELPER_6(vmsgt_vx_d, void, ptr, ptr, tl, ptr, env, i32)
23
+
24
+DEF_HELPER_6(vminu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
25
+DEF_HELPER_6(vminu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vminu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vminu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vmin_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vmin_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vmin_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_6(vmin_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_6(vmaxu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
33
+DEF_HELPER_6(vmaxu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
34
+DEF_HELPER_6(vmaxu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
35
+DEF_HELPER_6(vmaxu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
36
+DEF_HELPER_6(vmax_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
37
+DEF_HELPER_6(vmax_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
38
+DEF_HELPER_6(vmax_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
39
+DEF_HELPER_6(vmax_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
40
+DEF_HELPER_6(vminu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
41
+DEF_HELPER_6(vminu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
42
+DEF_HELPER_6(vminu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
43
+DEF_HELPER_6(vminu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
44
+DEF_HELPER_6(vmin_vx_b, void, ptr, ptr, tl, ptr, env, i32)
45
+DEF_HELPER_6(vmin_vx_h, void, ptr, ptr, tl, ptr, env, i32)
46
+DEF_HELPER_6(vmin_vx_w, void, ptr, ptr, tl, ptr, env, i32)
47
+DEF_HELPER_6(vmin_vx_d, void, ptr, ptr, tl, ptr, env, i32)
48
+DEF_HELPER_6(vmaxu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
49
+DEF_HELPER_6(vmaxu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
50
+DEF_HELPER_6(vmaxu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
51
+DEF_HELPER_6(vmaxu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
52
+DEF_HELPER_6(vmax_vx_b, void, ptr, ptr, tl, ptr, env, i32)
53
+DEF_HELPER_6(vmax_vx_h, void, ptr, ptr, tl, ptr, env, i32)
54
+DEF_HELPER_6(vmax_vx_w, void, ptr, ptr, tl, ptr, env, i32)
55
+DEF_HELPER_6(vmax_vx_d, void, ptr, ptr, tl, ptr, env, i32)
56
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
57
index XXXXXXX..XXXXXXX 100644
58
--- a/target/riscv/insn32.decode
59
+++ b/target/riscv/insn32.decode
60
@@ -XXX,XX +XXX,XX @@ vmsgtu_vx 011110 . ..... ..... 100 ..... 1010111 @r_vm
61
vmsgtu_vi 011110 . ..... ..... 011 ..... 1010111 @r_vm
62
vmsgt_vx 011111 . ..... ..... 100 ..... 1010111 @r_vm
63
vmsgt_vi 011111 . ..... ..... 011 ..... 1010111 @r_vm
64
+vminu_vv 000100 . ..... ..... 000 ..... 1010111 @r_vm
65
+vminu_vx 000100 . ..... ..... 100 ..... 1010111 @r_vm
66
+vmin_vv 000101 . ..... ..... 000 ..... 1010111 @r_vm
67
+vmin_vx 000101 . ..... ..... 100 ..... 1010111 @r_vm
68
+vmaxu_vv 000110 . ..... ..... 000 ..... 1010111 @r_vm
69
+vmaxu_vx 000110 . ..... ..... 100 ..... 1010111 @r_vm
70
+vmax_vv 000111 . ..... ..... 000 ..... 1010111 @r_vm
71
+vmax_vx 000111 . ..... ..... 100 ..... 1010111 @r_vm
72
73
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
74
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
75
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
76
index XXXXXXX..XXXXXXX 100644
77
--- a/target/riscv/insn_trans/trans_rvv.inc.c
78
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
79
@@ -XXX,XX +XXX,XX @@ GEN_OPIVI_TRANS(vmsleu_vi, 1, vmsleu_vx, opivx_cmp_check)
80
GEN_OPIVI_TRANS(vmsle_vi, 0, vmsle_vx, opivx_cmp_check)
81
GEN_OPIVI_TRANS(vmsgtu_vi, 1, vmsgtu_vx, opivx_cmp_check)
82
GEN_OPIVI_TRANS(vmsgt_vi, 0, vmsgt_vx, opivx_cmp_check)
83
+
84
+/* Vector Integer Min/Max Instructions */
85
+GEN_OPIVV_GVEC_TRANS(vminu_vv, umin)
86
+GEN_OPIVV_GVEC_TRANS(vmin_vv, smin)
87
+GEN_OPIVV_GVEC_TRANS(vmaxu_vv, umax)
88
+GEN_OPIVV_GVEC_TRANS(vmax_vv, smax)
89
+GEN_OPIVX_TRANS(vminu_vx, opivx_check)
90
+GEN_OPIVX_TRANS(vmin_vx, opivx_check)
91
+GEN_OPIVX_TRANS(vmaxu_vx, opivx_check)
92
+GEN_OPIVX_TRANS(vmax_vx, opivx_check)
93
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
94
index XXXXXXX..XXXXXXX 100644
95
--- a/target/riscv/vector_helper.c
96
+++ b/target/riscv/vector_helper.c
97
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl)
98
#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
99
#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
100
#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
101
+#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
102
+#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
103
+#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
104
+#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
105
106
/* operation of two vector elements */
107
typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
108
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
109
GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
110
GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
111
GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
112
+
113
+/* Vector Integer Min/Max Instructions */
114
+RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
115
+RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
116
+RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
117
+RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
118
+RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
119
+RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
120
+RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
121
+RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
122
+RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
123
+RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
124
+RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
125
+RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
126
+RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
127
+RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
128
+RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
129
+RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
130
+GEN_VEXT_VV(vminu_vv_b, 1, 1, clearb)
131
+GEN_VEXT_VV(vminu_vv_h, 2, 2, clearh)
132
+GEN_VEXT_VV(vminu_vv_w, 4, 4, clearl)
133
+GEN_VEXT_VV(vminu_vv_d, 8, 8, clearq)
134
+GEN_VEXT_VV(vmin_vv_b, 1, 1, clearb)
135
+GEN_VEXT_VV(vmin_vv_h, 2, 2, clearh)
136
+GEN_VEXT_VV(vmin_vv_w, 4, 4, clearl)
137
+GEN_VEXT_VV(vmin_vv_d, 8, 8, clearq)
138
+GEN_VEXT_VV(vmaxu_vv_b, 1, 1, clearb)
139
+GEN_VEXT_VV(vmaxu_vv_h, 2, 2, clearh)
140
+GEN_VEXT_VV(vmaxu_vv_w, 4, 4, clearl)
141
+GEN_VEXT_VV(vmaxu_vv_d, 8, 8, clearq)
142
+GEN_VEXT_VV(vmax_vv_b, 1, 1, clearb)
143
+GEN_VEXT_VV(vmax_vv_h, 2, 2, clearh)
144
+GEN_VEXT_VV(vmax_vv_w, 4, 4, clearl)
145
+GEN_VEXT_VV(vmax_vv_d, 8, 8, clearq)
146
+
147
+RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
148
+RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
149
+RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
150
+RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
151
+RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
152
+RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
153
+RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
154
+RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
155
+RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
156
+RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
157
+RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
158
+RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
159
+RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
160
+RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
161
+RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
162
+RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
163
+GEN_VEXT_VX(vminu_vx_b, 1, 1, clearb)
164
+GEN_VEXT_VX(vminu_vx_h, 2, 2, clearh)
165
+GEN_VEXT_VX(vminu_vx_w, 4, 4, clearl)
166
+GEN_VEXT_VX(vminu_vx_d, 8, 8, clearq)
167
+GEN_VEXT_VX(vmin_vx_b, 1, 1, clearb)
168
+GEN_VEXT_VX(vmin_vx_h, 2, 2, clearh)
169
+GEN_VEXT_VX(vmin_vx_w, 4, 4, clearl)
170
+GEN_VEXT_VX(vmin_vx_d, 8, 8, clearq)
171
+GEN_VEXT_VX(vmaxu_vx_b, 1, 1, clearb)
172
+GEN_VEXT_VX(vmaxu_vx_h, 2, 2, clearh)
173
+GEN_VEXT_VX(vmaxu_vx_w, 4, 4, clearl)
174
+GEN_VEXT_VX(vmaxu_vx_d, 8, 8, clearq)
175
+GEN_VEXT_VX(vmax_vx_b, 1, 1, clearb)
176
+GEN_VEXT_VX(vmax_vx_h, 2, 2, clearh)
177
+GEN_VEXT_VX(vmax_vx_w, 4, 4, clearl)
178
+GEN_VEXT_VX(vmax_vx_d, 8, 8, clearq)
179
--
180
2.27.0
181
182
diff view generated by jsdifflib
Deleted patch
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
2
1
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20200623215920.2594-19-zhiwei_liu@c-sky.com
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
9
target/riscv/helper.h | 33 +++++
10
target/riscv/insn32.decode | 8 ++
11
target/riscv/insn_trans/trans_rvv.inc.c | 10 ++
12
target/riscv/vector_helper.c | 163 ++++++++++++++++++++++++
13
4 files changed, 214 insertions(+)
14
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
18
+++ b/target/riscv/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmax_vx_b, void, ptr, ptr, tl, ptr, env, i32)
20
DEF_HELPER_6(vmax_vx_h, void, ptr, ptr, tl, ptr, env, i32)
21
DEF_HELPER_6(vmax_vx_w, void, ptr, ptr, tl, ptr, env, i32)
22
DEF_HELPER_6(vmax_vx_d, void, ptr, ptr, tl, ptr, env, i32)
23
+
24
+DEF_HELPER_6(vmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
25
+DEF_HELPER_6(vmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vmulh_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vmulh_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vmulh_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_6(vmulh_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_6(vmulhu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
33
+DEF_HELPER_6(vmulhu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
34
+DEF_HELPER_6(vmulhu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
35
+DEF_HELPER_6(vmulhu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
36
+DEF_HELPER_6(vmulhsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
37
+DEF_HELPER_6(vmulhsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
38
+DEF_HELPER_6(vmulhsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
39
+DEF_HELPER_6(vmulhsu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
40
+DEF_HELPER_6(vmul_vx_b, void, ptr, ptr, tl, ptr, env, i32)
41
+DEF_HELPER_6(vmul_vx_h, void, ptr, ptr, tl, ptr, env, i32)
42
+DEF_HELPER_6(vmul_vx_w, void, ptr, ptr, tl, ptr, env, i32)
43
+DEF_HELPER_6(vmul_vx_d, void, ptr, ptr, tl, ptr, env, i32)
44
+DEF_HELPER_6(vmulh_vx_b, void, ptr, ptr, tl, ptr, env, i32)
45
+DEF_HELPER_6(vmulh_vx_h, void, ptr, ptr, tl, ptr, env, i32)
46
+DEF_HELPER_6(vmulh_vx_w, void, ptr, ptr, tl, ptr, env, i32)
47
+DEF_HELPER_6(vmulh_vx_d, void, ptr, ptr, tl, ptr, env, i32)
48
+DEF_HELPER_6(vmulhu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
49
+DEF_HELPER_6(vmulhu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
50
+DEF_HELPER_6(vmulhu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
51
+DEF_HELPER_6(vmulhu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
52
+DEF_HELPER_6(vmulhsu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
53
+DEF_HELPER_6(vmulhsu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
54
+DEF_HELPER_6(vmulhsu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
55
+DEF_HELPER_6(vmulhsu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
56
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
57
index XXXXXXX..XXXXXXX 100644
58
--- a/target/riscv/insn32.decode
59
+++ b/target/riscv/insn32.decode
60
@@ -XXX,XX +XXX,XX @@ vmaxu_vv 000110 . ..... ..... 000 ..... 1010111 @r_vm
61
vmaxu_vx 000110 . ..... ..... 100 ..... 1010111 @r_vm
62
vmax_vv 000111 . ..... ..... 000 ..... 1010111 @r_vm
63
vmax_vx 000111 . ..... ..... 100 ..... 1010111 @r_vm
64
+vmul_vv 100101 . ..... ..... 010 ..... 1010111 @r_vm
65
+vmul_vx 100101 . ..... ..... 110 ..... 1010111 @r_vm
66
+vmulh_vv 100111 . ..... ..... 010 ..... 1010111 @r_vm
67
+vmulh_vx 100111 . ..... ..... 110 ..... 1010111 @r_vm
68
+vmulhu_vv 100100 . ..... ..... 010 ..... 1010111 @r_vm
69
+vmulhu_vx 100100 . ..... ..... 110 ..... 1010111 @r_vm
70
+vmulhsu_vv 100110 . ..... ..... 010 ..... 1010111 @r_vm
71
+vmulhsu_vx 100110 . ..... ..... 110 ..... 1010111 @r_vm
72
73
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
74
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
75
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
76
index XXXXXXX..XXXXXXX 100644
77
--- a/target/riscv/insn_trans/trans_rvv.inc.c
78
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
79
@@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vminu_vx, opivx_check)
80
GEN_OPIVX_TRANS(vmin_vx, opivx_check)
81
GEN_OPIVX_TRANS(vmaxu_vx, opivx_check)
82
GEN_OPIVX_TRANS(vmax_vx, opivx_check)
83
+
84
+/* Vector Single-Width Integer Multiply Instructions */
85
+GEN_OPIVV_GVEC_TRANS(vmul_vv, mul)
86
+GEN_OPIVV_TRANS(vmulh_vv, opivv_check)
87
+GEN_OPIVV_TRANS(vmulhu_vv, opivv_check)
88
+GEN_OPIVV_TRANS(vmulhsu_vv, opivv_check)
89
+GEN_OPIVX_GVEC_TRANS(vmul_vx, muls)
90
+GEN_OPIVX_TRANS(vmulh_vx, opivx_check)
91
+GEN_OPIVX_TRANS(vmulhu_vx, opivx_check)
92
+GEN_OPIVX_TRANS(vmulhsu_vx, opivx_check)
93
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
94
index XXXXXXX..XXXXXXX 100644
95
--- a/target/riscv/vector_helper.c
96
+++ b/target/riscv/vector_helper.c
97
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl)
98
#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
99
#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
100
#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
101
+#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
102
+#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
103
+#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
104
+#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
105
106
/* operation of two vector elements */
107
typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
108
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vmax_vx_b, 1, 1, clearb)
109
GEN_VEXT_VX(vmax_vx_h, 2, 2, clearh)
110
GEN_VEXT_VX(vmax_vx_w, 4, 4, clearl)
111
GEN_VEXT_VX(vmax_vx_d, 8, 8, clearq)
112
+
113
+/* Vector Single-Width Integer Multiply Instructions */
114
+#define DO_MUL(N, M) (N * M)
115
+RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
116
+RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
117
+RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
118
+RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
119
+GEN_VEXT_VV(vmul_vv_b, 1, 1, clearb)
120
+GEN_VEXT_VV(vmul_vv_h, 2, 2, clearh)
121
+GEN_VEXT_VV(vmul_vv_w, 4, 4, clearl)
122
+GEN_VEXT_VV(vmul_vv_d, 8, 8, clearq)
123
+
124
+static int8_t do_mulh_b(int8_t s2, int8_t s1)
125
+{
126
+ return (int16_t)s2 * (int16_t)s1 >> 8;
127
+}
128
+
129
+static int16_t do_mulh_h(int16_t s2, int16_t s1)
130
+{
131
+ return (int32_t)s2 * (int32_t)s1 >> 16;
132
+}
133
+
134
+static int32_t do_mulh_w(int32_t s2, int32_t s1)
135
+{
136
+ return (int64_t)s2 * (int64_t)s1 >> 32;
137
+}
138
+
139
+static int64_t do_mulh_d(int64_t s2, int64_t s1)
140
+{
141
+ uint64_t hi_64, lo_64;
142
+
143
+ muls64(&lo_64, &hi_64, s1, s2);
144
+ return hi_64;
145
+}
146
+
147
+static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
148
+{
149
+ return (uint16_t)s2 * (uint16_t)s1 >> 8;
150
+}
151
+
152
+static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
153
+{
154
+ return (uint32_t)s2 * (uint32_t)s1 >> 16;
155
+}
156
+
157
+static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
158
+{
159
+ return (uint64_t)s2 * (uint64_t)s1 >> 32;
160
+}
161
+
162
+static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
163
+{
164
+ uint64_t hi_64, lo_64;
165
+
166
+ mulu64(&lo_64, &hi_64, s2, s1);
167
+ return hi_64;
168
+}
169
+
170
+static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
171
+{
172
+ return (int16_t)s2 * (uint16_t)s1 >> 8;
173
+}
174
+
175
+static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
176
+{
177
+ return (int32_t)s2 * (uint32_t)s1 >> 16;
178
+}
179
+
180
+static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
181
+{
182
+ return (int64_t)s2 * (uint64_t)s1 >> 32;
183
+}
184
+
185
+/*
186
+ * Let A = signed operand,
187
+ * B = unsigned operand
188
+ * P = mulu64(A, B), unsigned product
189
+ *
190
+ * LET X = 2 ** 64 - A, 2's complement of A
191
+ * SP = signed product
192
+ * THEN
193
+ * IF A < 0
194
+ * SP = -X * B
195
+ * = -(2 ** 64 - A) * B
196
+ * = A * B - 2 ** 64 * B
197
+ * = P - 2 ** 64 * B
198
+ * ELSE
199
+ * SP = P
200
+ * THEN
201
+ * HI_P -= (A < 0 ? B : 0)
202
+ */
203
+
204
+static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
205
+{
206
+ uint64_t hi_64, lo_64;
207
+
208
+ mulu64(&lo_64, &hi_64, s2, s1);
209
+
210
+ hi_64 -= s2 < 0 ? s1 : 0;
211
+ return hi_64;
212
+}
213
+
214
+RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
215
+RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
216
+RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
217
+RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
218
+RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
219
+RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
220
+RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
221
+RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
222
+RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
223
+RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
224
+RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
225
+RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
226
+GEN_VEXT_VV(vmulh_vv_b, 1, 1, clearb)
227
+GEN_VEXT_VV(vmulh_vv_h, 2, 2, clearh)
228
+GEN_VEXT_VV(vmulh_vv_w, 4, 4, clearl)
229
+GEN_VEXT_VV(vmulh_vv_d, 8, 8, clearq)
230
+GEN_VEXT_VV(vmulhu_vv_b, 1, 1, clearb)
231
+GEN_VEXT_VV(vmulhu_vv_h, 2, 2, clearh)
232
+GEN_VEXT_VV(vmulhu_vv_w, 4, 4, clearl)
233
+GEN_VEXT_VV(vmulhu_vv_d, 8, 8, clearq)
234
+GEN_VEXT_VV(vmulhsu_vv_b, 1, 1, clearb)
235
+GEN_VEXT_VV(vmulhsu_vv_h, 2, 2, clearh)
236
+GEN_VEXT_VV(vmulhsu_vv_w, 4, 4, clearl)
237
+GEN_VEXT_VV(vmulhsu_vv_d, 8, 8, clearq)
238
+
239
+RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
240
+RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
241
+RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
242
+RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
243
+RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
244
+RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
245
+RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
246
+RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
247
+RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
248
+RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
249
+RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
250
+RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
251
+RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
252
+RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
253
+RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
254
+RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
255
+GEN_VEXT_VX(vmul_vx_b, 1, 1, clearb)
256
+GEN_VEXT_VX(vmul_vx_h, 2, 2, clearh)
257
+GEN_VEXT_VX(vmul_vx_w, 4, 4, clearl)
258
+GEN_VEXT_VX(vmul_vx_d, 8, 8, clearq)
259
+GEN_VEXT_VX(vmulh_vx_b, 1, 1, clearb)
260
+GEN_VEXT_VX(vmulh_vx_h, 2, 2, clearh)
261
+GEN_VEXT_VX(vmulh_vx_w, 4, 4, clearl)
262
+GEN_VEXT_VX(vmulh_vx_d, 8, 8, clearq)
263
+GEN_VEXT_VX(vmulhu_vx_b, 1, 1, clearb)
264
+GEN_VEXT_VX(vmulhu_vx_h, 2, 2, clearh)
265
+GEN_VEXT_VX(vmulhu_vx_w, 4, 4, clearl)
266
+GEN_VEXT_VX(vmulhu_vx_d, 8, 8, clearq)
267
+GEN_VEXT_VX(vmulhsu_vx_b, 1, 1, clearb)
268
+GEN_VEXT_VX(vmulhsu_vx_h, 2, 2, clearh)
269
+GEN_VEXT_VX(vmulhsu_vx_w, 4, 4, clearl)
270
+GEN_VEXT_VX(vmulhsu_vx_d, 8, 8, clearq)
271
--
272
2.27.0
273
274
diff view generated by jsdifflib
Deleted patch
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
2
1
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Message-id: 20200623215920.2594-20-zhiwei_liu@c-sky.com
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
9
target/riscv/helper.h | 33 +++++++++++
10
target/riscv/insn32.decode | 8 +++
11
target/riscv/insn_trans/trans_rvv.inc.c | 10 ++++
12
target/riscv/vector_helper.c | 74 +++++++++++++++++++++++++
13
4 files changed, 125 insertions(+)
14
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
18
+++ b/target/riscv/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmulhsu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
20
DEF_HELPER_6(vmulhsu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
21
DEF_HELPER_6(vmulhsu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
22
DEF_HELPER_6(vmulhsu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
23
+
24
+DEF_HELPER_6(vdivu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
25
+DEF_HELPER_6(vdivu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vdivu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vdivu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vdiv_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vdiv_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vdiv_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_6(vdiv_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_6(vremu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
33
+DEF_HELPER_6(vremu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
34
+DEF_HELPER_6(vremu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
35
+DEF_HELPER_6(vremu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
36
+DEF_HELPER_6(vrem_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
37
+DEF_HELPER_6(vrem_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
38
+DEF_HELPER_6(vrem_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
39
+DEF_HELPER_6(vrem_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
40
+DEF_HELPER_6(vdivu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
41
+DEF_HELPER_6(vdivu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
42
+DEF_HELPER_6(vdivu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
43
+DEF_HELPER_6(vdivu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
44
+DEF_HELPER_6(vdiv_vx_b, void, ptr, ptr, tl, ptr, env, i32)
45
+DEF_HELPER_6(vdiv_vx_h, void, ptr, ptr, tl, ptr, env, i32)
46
+DEF_HELPER_6(vdiv_vx_w, void, ptr, ptr, tl, ptr, env, i32)
47
+DEF_HELPER_6(vdiv_vx_d, void, ptr, ptr, tl, ptr, env, i32)
48
+DEF_HELPER_6(vremu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
49
+DEF_HELPER_6(vremu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
50
+DEF_HELPER_6(vremu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
51
+DEF_HELPER_6(vremu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
52
+DEF_HELPER_6(vrem_vx_b, void, ptr, ptr, tl, ptr, env, i32)
53
+DEF_HELPER_6(vrem_vx_h, void, ptr, ptr, tl, ptr, env, i32)
54
+DEF_HELPER_6(vrem_vx_w, void, ptr, ptr, tl, ptr, env, i32)
55
+DEF_HELPER_6(vrem_vx_d, void, ptr, ptr, tl, ptr, env, i32)
56
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
57
index XXXXXXX..XXXXXXX 100644
58
--- a/target/riscv/insn32.decode
59
+++ b/target/riscv/insn32.decode
60
@@ -XXX,XX +XXX,XX @@ vmulhu_vv 100100 . ..... ..... 010 ..... 1010111 @r_vm
61
vmulhu_vx 100100 . ..... ..... 110 ..... 1010111 @r_vm
62
vmulhsu_vv 100110 . ..... ..... 010 ..... 1010111 @r_vm
63
vmulhsu_vx 100110 . ..... ..... 110 ..... 1010111 @r_vm
64
+vdivu_vv 100000 . ..... ..... 010 ..... 1010111 @r_vm
65
+vdivu_vx 100000 . ..... ..... 110 ..... 1010111 @r_vm
66
+vdiv_vv 100001 . ..... ..... 010 ..... 1010111 @r_vm
67
+vdiv_vx 100001 . ..... ..... 110 ..... 1010111 @r_vm
68
+vremu_vv 100010 . ..... ..... 010 ..... 1010111 @r_vm
69
+vremu_vx 100010 . ..... ..... 110 ..... 1010111 @r_vm
70
+vrem_vv 100011 . ..... ..... 010 ..... 1010111 @r_vm
71
+vrem_vx 100011 . ..... ..... 110 ..... 1010111 @r_vm
72
73
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
74
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
75
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
76
index XXXXXXX..XXXXXXX 100644
77
--- a/target/riscv/insn_trans/trans_rvv.inc.c
78
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
79
@@ -XXX,XX +XXX,XX @@ GEN_OPIVX_GVEC_TRANS(vmul_vx, muls)
80
GEN_OPIVX_TRANS(vmulh_vx, opivx_check)
81
GEN_OPIVX_TRANS(vmulhu_vx, opivx_check)
82
GEN_OPIVX_TRANS(vmulhsu_vx, opivx_check)
83
+
84
+/* Vector Integer Divide Instructions */
85
+GEN_OPIVV_TRANS(vdivu_vv, opivv_check)
86
+GEN_OPIVV_TRANS(vdiv_vv, opivv_check)
87
+GEN_OPIVV_TRANS(vremu_vv, opivv_check)
88
+GEN_OPIVV_TRANS(vrem_vv, opivv_check)
89
+GEN_OPIVX_TRANS(vdivu_vx, opivx_check)
90
+GEN_OPIVX_TRANS(vdiv_vx, opivx_check)
91
+GEN_OPIVX_TRANS(vremu_vx, opivx_check)
92
+GEN_OPIVX_TRANS(vrem_vx, opivx_check)
93
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
94
index XXXXXXX..XXXXXXX 100644
95
--- a/target/riscv/vector_helper.c
96
+++ b/target/riscv/vector_helper.c
97
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vmulhsu_vx_b, 1, 1, clearb)
98
GEN_VEXT_VX(vmulhsu_vx_h, 2, 2, clearh)
99
GEN_VEXT_VX(vmulhsu_vx_w, 4, 4, clearl)
100
GEN_VEXT_VX(vmulhsu_vx_d, 8, 8, clearq)
101
+
102
+/* Vector Integer Divide Instructions */
103
+#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
104
+#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
105
+#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\
106
+ unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
107
+#define DO_REM(N, M) (unlikely(M == 0) ? N :\
108
+ unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
109
+
110
+RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
111
+RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
112
+RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
113
+RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
114
+RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
115
+RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
116
+RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
117
+RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
118
+RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
119
+RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
120
+RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
121
+RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
122
+RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
123
+RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
124
+RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
125
+RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
126
+GEN_VEXT_VV(vdivu_vv_b, 1, 1, clearb)
127
+GEN_VEXT_VV(vdivu_vv_h, 2, 2, clearh)
128
+GEN_VEXT_VV(vdivu_vv_w, 4, 4, clearl)
129
+GEN_VEXT_VV(vdivu_vv_d, 8, 8, clearq)
130
+GEN_VEXT_VV(vdiv_vv_b, 1, 1, clearb)
131
+GEN_VEXT_VV(vdiv_vv_h, 2, 2, clearh)
132
+GEN_VEXT_VV(vdiv_vv_w, 4, 4, clearl)
133
+GEN_VEXT_VV(vdiv_vv_d, 8, 8, clearq)
134
+GEN_VEXT_VV(vremu_vv_b, 1, 1, clearb)
135
+GEN_VEXT_VV(vremu_vv_h, 2, 2, clearh)
136
+GEN_VEXT_VV(vremu_vv_w, 4, 4, clearl)
137
+GEN_VEXT_VV(vremu_vv_d, 8, 8, clearq)
138
+GEN_VEXT_VV(vrem_vv_b, 1, 1, clearb)
139
+GEN_VEXT_VV(vrem_vv_h, 2, 2, clearh)
140
+GEN_VEXT_VV(vrem_vv_w, 4, 4, clearl)
141
+GEN_VEXT_VV(vrem_vv_d, 8, 8, clearq)
142
+
143
+RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
144
+RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
145
+RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
146
+RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
147
+RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
148
+RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
149
+RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
150
+RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
151
+RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
152
+RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
153
+RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
154
+RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
155
+RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
156
+RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
157
+RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
158
+RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
159
+GEN_VEXT_VX(vdivu_vx_b, 1, 1, clearb)
160
+GEN_VEXT_VX(vdivu_vx_h, 2, 2, clearh)
161
+GEN_VEXT_VX(vdivu_vx_w, 4, 4, clearl)
162
+GEN_VEXT_VX(vdivu_vx_d, 8, 8, clearq)
163
+GEN_VEXT_VX(vdiv_vx_b, 1, 1, clearb)
164
+GEN_VEXT_VX(vdiv_vx_h, 2, 2, clearh)
165
+GEN_VEXT_VX(vdiv_vx_w, 4, 4, clearl)
166
+GEN_VEXT_VX(vdiv_vx_d, 8, 8, clearq)
167
+GEN_VEXT_VX(vremu_vx_b, 1, 1, clearb)
168
+GEN_VEXT_VX(vremu_vx_h, 2, 2, clearh)
169
+GEN_VEXT_VX(vremu_vx_w, 4, 4, clearl)
170
+GEN_VEXT_VX(vremu_vx_d, 8, 8, clearq)
171
+GEN_VEXT_VX(vrem_vx_b, 1, 1, clearb)
172
+GEN_VEXT_VX(vrem_vx_h, 2, 2, clearh)
173
+GEN_VEXT_VX(vrem_vx_w, 4, 4, clearl)
174
+GEN_VEXT_VX(vrem_vx_d, 8, 8, clearq)
175
--
176
2.27.0
177
178
diff view generated by jsdifflib
Deleted patch
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
2
1
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Message-id: 20200623215920.2594-21-zhiwei_liu@c-sky.com
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
9
target/riscv/helper.h | 19 +++++++++
10
target/riscv/insn32.decode | 6 +++
11
target/riscv/insn_trans/trans_rvv.inc.c | 8 ++++
12
target/riscv/vector_helper.c | 51 +++++++++++++++++++++++++
13
4 files changed, 84 insertions(+)
14
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
18
+++ b/target/riscv/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vrem_vx_b, void, ptr, ptr, tl, ptr, env, i32)
20
DEF_HELPER_6(vrem_vx_h, void, ptr, ptr, tl, ptr, env, i32)
21
DEF_HELPER_6(vrem_vx_w, void, ptr, ptr, tl, ptr, env, i32)
22
DEF_HELPER_6(vrem_vx_d, void, ptr, ptr, tl, ptr, env, i32)
23
+
24
+DEF_HELPER_6(vwmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
25
+DEF_HELPER_6(vwmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vwmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vwmulu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vwmulu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vwmulu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vwmulsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_6(vwmulsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_6(vwmulsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
33
+DEF_HELPER_6(vwmul_vx_b, void, ptr, ptr, tl, ptr, env, i32)
34
+DEF_HELPER_6(vwmul_vx_h, void, ptr, ptr, tl, ptr, env, i32)
35
+DEF_HELPER_6(vwmul_vx_w, void, ptr, ptr, tl, ptr, env, i32)
36
+DEF_HELPER_6(vwmulu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
37
+DEF_HELPER_6(vwmulu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
38
+DEF_HELPER_6(vwmulu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
39
+DEF_HELPER_6(vwmulsu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
40
+DEF_HELPER_6(vwmulsu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
41
+DEF_HELPER_6(vwmulsu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
42
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/riscv/insn32.decode
45
+++ b/target/riscv/insn32.decode
46
@@ -XXX,XX +XXX,XX @@ vremu_vv 100010 . ..... ..... 010 ..... 1010111 @r_vm
47
vremu_vx 100010 . ..... ..... 110 ..... 1010111 @r_vm
48
vrem_vv 100011 . ..... ..... 010 ..... 1010111 @r_vm
49
vrem_vx 100011 . ..... ..... 110 ..... 1010111 @r_vm
50
+vwmulu_vv 111000 . ..... ..... 010 ..... 1010111 @r_vm
51
+vwmulu_vx 111000 . ..... ..... 110 ..... 1010111 @r_vm
52
+vwmulsu_vv 111010 . ..... ..... 010 ..... 1010111 @r_vm
53
+vwmulsu_vx 111010 . ..... ..... 110 ..... 1010111 @r_vm
54
+vwmul_vv 111011 . ..... ..... 010 ..... 1010111 @r_vm
55
+vwmul_vx 111011 . ..... ..... 110 ..... 1010111 @r_vm
56
57
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
58
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
59
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
60
index XXXXXXX..XXXXXXX 100644
61
--- a/target/riscv/insn_trans/trans_rvv.inc.c
62
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
63
@@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vdivu_vx, opivx_check)
64
GEN_OPIVX_TRANS(vdiv_vx, opivx_check)
65
GEN_OPIVX_TRANS(vremu_vx, opivx_check)
66
GEN_OPIVX_TRANS(vrem_vx, opivx_check)
67
+
68
+/* Vector Widening Integer Multiply Instructions */
69
+GEN_OPIVV_WIDEN_TRANS(vwmul_vv, opivv_widen_check)
70
+GEN_OPIVV_WIDEN_TRANS(vwmulu_vv, opivv_widen_check)
71
+GEN_OPIVV_WIDEN_TRANS(vwmulsu_vv, opivv_widen_check)
72
+GEN_OPIVX_WIDEN_TRANS(vwmul_vx)
73
+GEN_OPIVX_WIDEN_TRANS(vwmulu_vx)
74
+GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx)
75
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
76
index XXXXXXX..XXXXXXX 100644
77
--- a/target/riscv/vector_helper.c
78
+++ b/target/riscv/vector_helper.c
79
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl)
80
#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
81
#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
82
#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
83
+#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
84
+#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
85
+#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
86
+#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
87
+#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
88
+#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
89
+#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
90
+#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
91
+#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
92
+#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
93
+#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
94
+#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
95
96
/* operation of two vector elements */
97
typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
98
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vrem_vx_b, 1, 1, clearb)
99
GEN_VEXT_VX(vrem_vx_h, 2, 2, clearh)
100
GEN_VEXT_VX(vrem_vx_w, 4, 4, clearl)
101
GEN_VEXT_VX(vrem_vx_d, 8, 8, clearq)
102
+
103
+/* Vector Widening Integer Multiply Instructions */
104
+RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
105
+RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
106
+RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
107
+RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
108
+RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
109
+RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
110
+RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
111
+RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
112
+RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
113
+GEN_VEXT_VV(vwmul_vv_b, 1, 2, clearh)
114
+GEN_VEXT_VV(vwmul_vv_h, 2, 4, clearl)
115
+GEN_VEXT_VV(vwmul_vv_w, 4, 8, clearq)
116
+GEN_VEXT_VV(vwmulu_vv_b, 1, 2, clearh)
117
+GEN_VEXT_VV(vwmulu_vv_h, 2, 4, clearl)
118
+GEN_VEXT_VV(vwmulu_vv_w, 4, 8, clearq)
119
+GEN_VEXT_VV(vwmulsu_vv_b, 1, 2, clearh)
120
+GEN_VEXT_VV(vwmulsu_vv_h, 2, 4, clearl)
121
+GEN_VEXT_VV(vwmulsu_vv_w, 4, 8, clearq)
122
+
123
+RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
124
+RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
125
+RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
126
+RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
127
+RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
128
+RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
129
+RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
130
+RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
131
+RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
132
+GEN_VEXT_VX(vwmul_vx_b, 1, 2, clearh)
133
+GEN_VEXT_VX(vwmul_vx_h, 2, 4, clearl)
134
+GEN_VEXT_VX(vwmul_vx_w, 4, 8, clearq)
135
+GEN_VEXT_VX(vwmulu_vx_b, 1, 2, clearh)
136
+GEN_VEXT_VX(vwmulu_vx_h, 2, 4, clearl)
137
+GEN_VEXT_VX(vwmulu_vx_w, 4, 8, clearq)
138
+GEN_VEXT_VX(vwmulsu_vx_b, 1, 2, clearh)
139
+GEN_VEXT_VX(vwmulsu_vx_h, 2, 4, clearl)
140
+GEN_VEXT_VX(vwmulsu_vx_w, 4, 8, clearq)
141
--
142
2.27.0
143
144
diff view generated by jsdifflib
Deleted patch
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
2
1
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Message-id: 20200623215920.2594-22-zhiwei_liu@c-sky.com
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
9
target/riscv/helper.h | 33 ++++++++++
10
target/riscv/insn32.decode | 8 +++
11
target/riscv/insn_trans/trans_rvv.inc.c | 10 +++
12
target/riscv/vector_helper.c | 88 +++++++++++++++++++++++++
13
4 files changed, 139 insertions(+)
14
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
18
+++ b/target/riscv/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vwmulu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
20
DEF_HELPER_6(vwmulsu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
21
DEF_HELPER_6(vwmulsu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
22
DEF_HELPER_6(vwmulsu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
23
+
24
+DEF_HELPER_6(vmacc_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
25
+DEF_HELPER_6(vmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vmacc_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vnmsac_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_6(vnmsac_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_6(vmadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
33
+DEF_HELPER_6(vmadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
34
+DEF_HELPER_6(vmadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
35
+DEF_HELPER_6(vmadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
36
+DEF_HELPER_6(vnmsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
37
+DEF_HELPER_6(vnmsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
38
+DEF_HELPER_6(vnmsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
39
+DEF_HELPER_6(vnmsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
40
+DEF_HELPER_6(vmacc_vx_b, void, ptr, ptr, tl, ptr, env, i32)
41
+DEF_HELPER_6(vmacc_vx_h, void, ptr, ptr, tl, ptr, env, i32)
42
+DEF_HELPER_6(vmacc_vx_w, void, ptr, ptr, tl, ptr, env, i32)
43
+DEF_HELPER_6(vmacc_vx_d, void, ptr, ptr, tl, ptr, env, i32)
44
+DEF_HELPER_6(vnmsac_vx_b, void, ptr, ptr, tl, ptr, env, i32)
45
+DEF_HELPER_6(vnmsac_vx_h, void, ptr, ptr, tl, ptr, env, i32)
46
+DEF_HELPER_6(vnmsac_vx_w, void, ptr, ptr, tl, ptr, env, i32)
47
+DEF_HELPER_6(vnmsac_vx_d, void, ptr, ptr, tl, ptr, env, i32)
48
+DEF_HELPER_6(vmadd_vx_b, void, ptr, ptr, tl, ptr, env, i32)
49
+DEF_HELPER_6(vmadd_vx_h, void, ptr, ptr, tl, ptr, env, i32)
50
+DEF_HELPER_6(vmadd_vx_w, void, ptr, ptr, tl, ptr, env, i32)
51
+DEF_HELPER_6(vmadd_vx_d, void, ptr, ptr, tl, ptr, env, i32)
52
+DEF_HELPER_6(vnmsub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
53
+DEF_HELPER_6(vnmsub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
54
+DEF_HELPER_6(vnmsub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
55
+DEF_HELPER_6(vnmsub_vx_d, void, ptr, ptr, tl, ptr, env, i32)
56
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
57
index XXXXXXX..XXXXXXX 100644
58
--- a/target/riscv/insn32.decode
59
+++ b/target/riscv/insn32.decode
60
@@ -XXX,XX +XXX,XX @@ vwmulsu_vv 111010 . ..... ..... 010 ..... 1010111 @r_vm
61
vwmulsu_vx 111010 . ..... ..... 110 ..... 1010111 @r_vm
62
vwmul_vv 111011 . ..... ..... 010 ..... 1010111 @r_vm
63
vwmul_vx 111011 . ..... ..... 110 ..... 1010111 @r_vm
64
+vmacc_vv 101101 . ..... ..... 010 ..... 1010111 @r_vm
65
+vmacc_vx 101101 . ..... ..... 110 ..... 1010111 @r_vm
66
+vnmsac_vv 101111 . ..... ..... 010 ..... 1010111 @r_vm
67
+vnmsac_vx 101111 . ..... ..... 110 ..... 1010111 @r_vm
68
+vmadd_vv 101001 . ..... ..... 010 ..... 1010111 @r_vm
69
+vmadd_vx 101001 . ..... ..... 110 ..... 1010111 @r_vm
70
+vnmsub_vv 101011 . ..... ..... 010 ..... 1010111 @r_vm
71
+vnmsub_vx 101011 . ..... ..... 110 ..... 1010111 @r_vm
72
73
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
74
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
75
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
76
index XXXXXXX..XXXXXXX 100644
77
--- a/target/riscv/insn_trans/trans_rvv.inc.c
78
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
79
@@ -XXX,XX +XXX,XX @@ GEN_OPIVV_WIDEN_TRANS(vwmulsu_vv, opivv_widen_check)
80
GEN_OPIVX_WIDEN_TRANS(vwmul_vx)
81
GEN_OPIVX_WIDEN_TRANS(vwmulu_vx)
82
GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx)
83
+
84
+/* Vector Single-Width Integer Multiply-Add Instructions */
85
+GEN_OPIVV_TRANS(vmacc_vv, opivv_check)
86
+GEN_OPIVV_TRANS(vnmsac_vv, opivv_check)
87
+GEN_OPIVV_TRANS(vmadd_vv, opivv_check)
88
+GEN_OPIVV_TRANS(vnmsub_vv, opivv_check)
89
+GEN_OPIVX_TRANS(vmacc_vx, opivx_check)
90
+GEN_OPIVX_TRANS(vnmsac_vx, opivx_check)
91
+GEN_OPIVX_TRANS(vmadd_vx, opivx_check)
92
+GEN_OPIVX_TRANS(vnmsub_vx, opivx_check)
93
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
94
index XXXXXXX..XXXXXXX 100644
95
--- a/target/riscv/vector_helper.c
96
+++ b/target/riscv/vector_helper.c
97
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vwmulu_vx_w, 4, 8, clearq)
98
GEN_VEXT_VX(vwmulsu_vx_b, 1, 2, clearh)
99
GEN_VEXT_VX(vwmulsu_vx_h, 2, 4, clearl)
100
GEN_VEXT_VX(vwmulsu_vx_w, 4, 8, clearq)
101
+
102
+/* Vector Single-Width Integer Multiply-Add Instructions */
103
+#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
104
+static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
105
+{ \
106
+ TX1 s1 = *((T1 *)vs1 + HS1(i)); \
107
+ TX2 s2 = *((T2 *)vs2 + HS2(i)); \
108
+ TD d = *((TD *)vd + HD(i)); \
109
+ *((TD *)vd + HD(i)) = OP(s2, s1, d); \
110
+}
111
+
112
+#define DO_MACC(N, M, D) (M * N + D)
113
+#define DO_NMSAC(N, M, D) (-(M * N) + D)
114
+#define DO_MADD(N, M, D) (M * D + N)
115
+#define DO_NMSUB(N, M, D) (-(M * D) + N)
116
+RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
117
+RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
118
+RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
119
+RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
120
+RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
121
+RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
122
+RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
123
+RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
124
+RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
125
+RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
126
+RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
127
+RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
128
+RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
129
+RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
130
+RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
131
+RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
132
+GEN_VEXT_VV(vmacc_vv_b, 1, 1, clearb)
133
+GEN_VEXT_VV(vmacc_vv_h, 2, 2, clearh)
134
+GEN_VEXT_VV(vmacc_vv_w, 4, 4, clearl)
135
+GEN_VEXT_VV(vmacc_vv_d, 8, 8, clearq)
136
+GEN_VEXT_VV(vnmsac_vv_b, 1, 1, clearb)
137
+GEN_VEXT_VV(vnmsac_vv_h, 2, 2, clearh)
138
+GEN_VEXT_VV(vnmsac_vv_w, 4, 4, clearl)
139
+GEN_VEXT_VV(vnmsac_vv_d, 8, 8, clearq)
140
+GEN_VEXT_VV(vmadd_vv_b, 1, 1, clearb)
141
+GEN_VEXT_VV(vmadd_vv_h, 2, 2, clearh)
142
+GEN_VEXT_VV(vmadd_vv_w, 4, 4, clearl)
143
+GEN_VEXT_VV(vmadd_vv_d, 8, 8, clearq)
144
+GEN_VEXT_VV(vnmsub_vv_b, 1, 1, clearb)
145
+GEN_VEXT_VV(vnmsub_vv_h, 2, 2, clearh)
146
+GEN_VEXT_VV(vnmsub_vv_w, 4, 4, clearl)
147
+GEN_VEXT_VV(vnmsub_vv_d, 8, 8, clearq)
148
+
149
+#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
150
+static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
151
+{ \
152
+ TX2 s2 = *((T2 *)vs2 + HS2(i)); \
153
+ TD d = *((TD *)vd + HD(i)); \
154
+ *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
155
+}
156
+
157
+RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
158
+RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
159
+RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
160
+RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
161
+RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
162
+RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
163
+RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
164
+RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
165
+RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
166
+RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
167
+RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
168
+RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
169
+RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
170
+RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
171
+RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
172
+RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
173
+GEN_VEXT_VX(vmacc_vx_b, 1, 1, clearb)
174
+GEN_VEXT_VX(vmacc_vx_h, 2, 2, clearh)
175
+GEN_VEXT_VX(vmacc_vx_w, 4, 4, clearl)
176
+GEN_VEXT_VX(vmacc_vx_d, 8, 8, clearq)
177
+GEN_VEXT_VX(vnmsac_vx_b, 1, 1, clearb)
178
+GEN_VEXT_VX(vnmsac_vx_h, 2, 2, clearh)
179
+GEN_VEXT_VX(vnmsac_vx_w, 4, 4, clearl)
180
+GEN_VEXT_VX(vnmsac_vx_d, 8, 8, clearq)
181
+GEN_VEXT_VX(vmadd_vx_b, 1, 1, clearb)
182
+GEN_VEXT_VX(vmadd_vx_h, 2, 2, clearh)
183
+GEN_VEXT_VX(vmadd_vx_w, 4, 4, clearl)
184
+GEN_VEXT_VX(vmadd_vx_d, 8, 8, clearq)
185
+GEN_VEXT_VX(vnmsub_vx_b, 1, 1, clearb)
186
+GEN_VEXT_VX(vnmsub_vx_h, 2, 2, clearh)
187
+GEN_VEXT_VX(vnmsub_vx_w, 4, 4, clearl)
188
+GEN_VEXT_VX(vnmsub_vx_d, 8, 8, clearq)
189
--
190
2.27.0
191
192
diff view generated by jsdifflib
Deleted patch
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
2
1
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Message-id: 20200623215920.2594-23-zhiwei_liu@c-sky.com
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
9
target/riscv/helper.h | 22 ++++++++++++
10
target/riscv/insn32.decode | 7 ++++
11
target/riscv/insn_trans/trans_rvv.inc.c | 9 +++++
12
target/riscv/vector_helper.c | 45 +++++++++++++++++++++++++
13
4 files changed, 83 insertions(+)
14
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
18
+++ b/target/riscv/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vnmsub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
20
DEF_HELPER_6(vnmsub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
21
DEF_HELPER_6(vnmsub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
22
DEF_HELPER_6(vnmsub_vx_d, void, ptr, ptr, tl, ptr, env, i32)
23
+
24
+DEF_HELPER_6(vwmaccu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
25
+DEF_HELPER_6(vwmaccu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vwmaccu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vwmacc_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vwmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vwmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vwmaccsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_6(vwmaccsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_6(vwmaccsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
33
+DEF_HELPER_6(vwmaccu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
34
+DEF_HELPER_6(vwmaccu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
35
+DEF_HELPER_6(vwmaccu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
36
+DEF_HELPER_6(vwmacc_vx_b, void, ptr, ptr, tl, ptr, env, i32)
37
+DEF_HELPER_6(vwmacc_vx_h, void, ptr, ptr, tl, ptr, env, i32)
38
+DEF_HELPER_6(vwmacc_vx_w, void, ptr, ptr, tl, ptr, env, i32)
39
+DEF_HELPER_6(vwmaccsu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
40
+DEF_HELPER_6(vwmaccsu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
41
+DEF_HELPER_6(vwmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
42
+DEF_HELPER_6(vwmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32)
43
+DEF_HELPER_6(vwmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32)
44
+DEF_HELPER_6(vwmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32)
45
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
46
index XXXXXXX..XXXXXXX 100644
47
--- a/target/riscv/insn32.decode
48
+++ b/target/riscv/insn32.decode
49
@@ -XXX,XX +XXX,XX @@ vmadd_vv 101001 . ..... ..... 010 ..... 1010111 @r_vm
50
vmadd_vx 101001 . ..... ..... 110 ..... 1010111 @r_vm
51
vnmsub_vv 101011 . ..... ..... 010 ..... 1010111 @r_vm
52
vnmsub_vx 101011 . ..... ..... 110 ..... 1010111 @r_vm
53
+vwmaccu_vv 111100 . ..... ..... 010 ..... 1010111 @r_vm
54
+vwmaccu_vx 111100 . ..... ..... 110 ..... 1010111 @r_vm
55
+vwmacc_vv 111101 . ..... ..... 010 ..... 1010111 @r_vm
56
+vwmacc_vx 111101 . ..... ..... 110 ..... 1010111 @r_vm
57
+vwmaccsu_vv 111110 . ..... ..... 010 ..... 1010111 @r_vm
58
+vwmaccsu_vx 111110 . ..... ..... 110 ..... 1010111 @r_vm
59
+vwmaccus_vx 111111 . ..... ..... 110 ..... 1010111 @r_vm
60
61
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
62
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
63
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
64
index XXXXXXX..XXXXXXX 100644
65
--- a/target/riscv/insn_trans/trans_rvv.inc.c
66
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
67
@@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vmacc_vx, opivx_check)
68
GEN_OPIVX_TRANS(vnmsac_vx, opivx_check)
69
GEN_OPIVX_TRANS(vmadd_vx, opivx_check)
70
GEN_OPIVX_TRANS(vnmsub_vx, opivx_check)
71
+
72
+/* Vector Widening Integer Multiply-Add Instructions */
73
+GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_widen_check)
74
+GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_widen_check)
75
+GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_widen_check)
76
+GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx)
77
+GEN_OPIVX_WIDEN_TRANS(vwmacc_vx)
78
+GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx)
79
+GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx)
80
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
81
index XXXXXXX..XXXXXXX 100644
82
--- a/target/riscv/vector_helper.c
83
+++ b/target/riscv/vector_helper.c
84
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vnmsub_vx_b, 1, 1, clearb)
85
GEN_VEXT_VX(vnmsub_vx_h, 2, 2, clearh)
86
GEN_VEXT_VX(vnmsub_vx_w, 4, 4, clearl)
87
GEN_VEXT_VX(vnmsub_vx_d, 8, 8, clearq)
88
+
89
+/* Vector Widening Integer Multiply-Add Instructions */
90
+RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
91
+RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
92
+RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
93
+RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
94
+RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
95
+RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
96
+RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
97
+RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
98
+RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
99
+GEN_VEXT_VV(vwmaccu_vv_b, 1, 2, clearh)
100
+GEN_VEXT_VV(vwmaccu_vv_h, 2, 4, clearl)
101
+GEN_VEXT_VV(vwmaccu_vv_w, 4, 8, clearq)
102
+GEN_VEXT_VV(vwmacc_vv_b, 1, 2, clearh)
103
+GEN_VEXT_VV(vwmacc_vv_h, 2, 4, clearl)
104
+GEN_VEXT_VV(vwmacc_vv_w, 4, 8, clearq)
105
+GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2, clearh)
106
+GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4, clearl)
107
+GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8, clearq)
108
+
109
+RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
110
+RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
111
+RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
112
+RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
113
+RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
114
+RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
115
+RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
116
+RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
117
+RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
118
+RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
119
+RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
120
+RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
121
+GEN_VEXT_VX(vwmaccu_vx_b, 1, 2, clearh)
122
+GEN_VEXT_VX(vwmaccu_vx_h, 2, 4, clearl)
123
+GEN_VEXT_VX(vwmaccu_vx_w, 4, 8, clearq)
124
+GEN_VEXT_VX(vwmacc_vx_b, 1, 2, clearh)
125
+GEN_VEXT_VX(vwmacc_vx_h, 2, 4, clearl)
126
+GEN_VEXT_VX(vwmacc_vx_w, 4, 8, clearq)
127
+GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2, clearh)
128
+GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4, clearl)
129
+GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8, clearq)
130
+GEN_VEXT_VX(vwmaccus_vx_b, 1, 2, clearh)
131
+GEN_VEXT_VX(vwmaccus_vx_h, 2, 4, clearl)
132
+GEN_VEXT_VX(vwmaccus_vx_w, 4, 8, clearq)
133
--
134
2.27.0
135
136
diff view generated by jsdifflib
Deleted patch
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
2
1
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Message-id: 20200623215920.2594-28-zhiwei_liu@c-sky.com
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
9
target/riscv/helper.h | 22 +++
10
target/riscv/insn32.decode | 7 +
11
target/riscv/insn_trans/trans_rvv.inc.c | 9 ++
12
target/riscv/vector_helper.c | 205 ++++++++++++++++++++++++
13
4 files changed, 243 insertions(+)
14
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
18
+++ b/target/riscv/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vsmul_vx_b, void, ptr, ptr, tl, ptr, env, i32)
20
DEF_HELPER_6(vsmul_vx_h, void, ptr, ptr, tl, ptr, env, i32)
21
DEF_HELPER_6(vsmul_vx_w, void, ptr, ptr, tl, ptr, env, i32)
22
DEF_HELPER_6(vsmul_vx_d, void, ptr, ptr, tl, ptr, env, i32)
23
+
24
+DEF_HELPER_6(vwsmaccu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
25
+DEF_HELPER_6(vwsmaccu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vwsmaccu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vwsmacc_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vwsmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vwsmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vwsmaccsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_6(vwsmaccsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_6(vwsmaccsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
33
+DEF_HELPER_6(vwsmaccu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
34
+DEF_HELPER_6(vwsmaccu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
35
+DEF_HELPER_6(vwsmaccu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
36
+DEF_HELPER_6(vwsmacc_vx_b, void, ptr, ptr, tl, ptr, env, i32)
37
+DEF_HELPER_6(vwsmacc_vx_h, void, ptr, ptr, tl, ptr, env, i32)
38
+DEF_HELPER_6(vwsmacc_vx_w, void, ptr, ptr, tl, ptr, env, i32)
39
+DEF_HELPER_6(vwsmaccsu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
40
+DEF_HELPER_6(vwsmaccsu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
41
+DEF_HELPER_6(vwsmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
42
+DEF_HELPER_6(vwsmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32)
43
+DEF_HELPER_6(vwsmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32)
44
+DEF_HELPER_6(vwsmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32)
45
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
46
index XXXXXXX..XXXXXXX 100644
47
--- a/target/riscv/insn32.decode
48
+++ b/target/riscv/insn32.decode
49
@@ -XXX,XX +XXX,XX @@ vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm
50
vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm
51
vsmul_vv 100111 . ..... ..... 000 ..... 1010111 @r_vm
52
vsmul_vx 100111 . ..... ..... 100 ..... 1010111 @r_vm
53
+vwsmaccu_vv 111100 . ..... ..... 000 ..... 1010111 @r_vm
54
+vwsmaccu_vx 111100 . ..... ..... 100 ..... 1010111 @r_vm
55
+vwsmacc_vv 111101 . ..... ..... 000 ..... 1010111 @r_vm
56
+vwsmacc_vx 111101 . ..... ..... 100 ..... 1010111 @r_vm
57
+vwsmaccsu_vv 111110 . ..... ..... 000 ..... 1010111 @r_vm
58
+vwsmaccsu_vx 111110 . ..... ..... 100 ..... 1010111 @r_vm
59
+vwsmaccus_vx 111111 . ..... ..... 100 ..... 1010111 @r_vm
60
61
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
62
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
63
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
64
index XXXXXXX..XXXXXXX 100644
65
--- a/target/riscv/insn_trans/trans_rvv.inc.c
66
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
67
@@ -XXX,XX +XXX,XX @@ GEN_OPIVI_TRANS(vaadd_vi, 0, vaadd_vx, opivx_check)
68
/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
69
GEN_OPIVV_TRANS(vsmul_vv, opivv_check)
70
GEN_OPIVX_TRANS(vsmul_vx, opivx_check)
71
+
72
+/* Vector Widening Saturating Scaled Multiply-Add */
73
+GEN_OPIVV_WIDEN_TRANS(vwsmaccu_vv, opivv_widen_check)
74
+GEN_OPIVV_WIDEN_TRANS(vwsmacc_vv, opivv_widen_check)
75
+GEN_OPIVV_WIDEN_TRANS(vwsmaccsu_vv, opivv_widen_check)
76
+GEN_OPIVX_WIDEN_TRANS(vwsmaccu_vx)
77
+GEN_OPIVX_WIDEN_TRANS(vwsmacc_vx)
78
+GEN_OPIVX_WIDEN_TRANS(vwsmaccsu_vx)
79
+GEN_OPIVX_WIDEN_TRANS(vwsmaccus_vx)
80
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
81
index XXXXXXX..XXXXXXX 100644
82
--- a/target/riscv/vector_helper.c
83
+++ b/target/riscv/vector_helper.c
84
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1, clearb)
85
GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2, clearh)
86
GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4, clearl)
87
GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8, clearq)
88
+
89
+/* Vector Widening Saturating Scaled Multiply-Add */
90
+static inline uint16_t
91
+vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b,
92
+ uint16_t c)
93
+{
94
+ uint8_t round;
95
+ uint16_t res = (uint16_t)a * b;
96
+
97
+ round = get_round(vxrm, res, 4);
98
+ res = (res >> 4) + round;
99
+ return saddu16(env, vxrm, c, res);
100
+}
101
+
102
+static inline uint32_t
103
+vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b,
104
+ uint32_t c)
105
+{
106
+ uint8_t round;
107
+ uint32_t res = (uint32_t)a * b;
108
+
109
+ round = get_round(vxrm, res, 8);
110
+ res = (res >> 8) + round;
111
+ return saddu32(env, vxrm, c, res);
112
+}
113
+
114
+static inline uint64_t
115
+vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b,
116
+ uint64_t c)
117
+{
118
+ uint8_t round;
119
+ uint64_t res = (uint64_t)a * b;
120
+
121
+ round = get_round(vxrm, res, 16);
122
+ res = (res >> 16) + round;
123
+ return saddu64(env, vxrm, c, res);
124
+}
125
+
126
+#define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
127
+static inline void \
128
+do_##NAME(void *vd, void *vs1, void *vs2, int i, \
129
+ CPURISCVState *env, int vxrm) \
130
+{ \
131
+ TX1 s1 = *((T1 *)vs1 + HS1(i)); \
132
+ TX2 s2 = *((T2 *)vs2 + HS2(i)); \
133
+ TD d = *((TD *)vd + HD(i)); \
134
+ *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d); \
135
+}
136
+
137
+RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8)
138
+RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16)
139
+RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32)
140
+GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2, clearh)
141
+GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4, clearl)
142
+GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8, clearq)
143
+
144
+#define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
145
+static inline void \
146
+do_##NAME(void *vd, target_long s1, void *vs2, int i, \
147
+ CPURISCVState *env, int vxrm) \
148
+{ \
149
+ TX2 s2 = *((T2 *)vs2 + HS2(i)); \
150
+ TD d = *((TD *)vd + HD(i)); \
151
+ *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d); \
152
+}
153
+
154
+RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8)
155
+RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16)
156
+RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32)
157
+GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2, clearh)
158
+GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4, clearl)
159
+GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8, clearq)
160
+
161
+static inline int16_t
162
+vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c)
163
+{
164
+ uint8_t round;
165
+ int16_t res = (int16_t)a * b;
166
+
167
+ round = get_round(vxrm, res, 4);
168
+ res = (res >> 4) + round;
169
+ return sadd16(env, vxrm, c, res);
170
+}
171
+
172
+static inline int32_t
173
+vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c)
174
+{
175
+ uint8_t round;
176
+ int32_t res = (int32_t)a * b;
177
+
178
+ round = get_round(vxrm, res, 8);
179
+ res = (res >> 8) + round;
180
+ return sadd32(env, vxrm, c, res);
181
+
182
+}
183
+
184
+static inline int64_t
185
+vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c)
186
+{
187
+ uint8_t round;
188
+ int64_t res = (int64_t)a * b;
189
+
190
+ round = get_round(vxrm, res, 16);
191
+ res = (res >> 16) + round;
192
+ return sadd64(env, vxrm, c, res);
193
+}
194
+
195
+RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8)
196
+RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16)
197
+RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32)
198
+GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2, clearh)
199
+GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4, clearl)
200
+GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8, clearq)
201
+RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8)
202
+RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16)
203
+RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32)
204
+GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2, clearh)
205
+GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4, clearl)
206
+GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8, clearq)
207
+
208
+static inline int16_t
209
+vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c)
210
+{
211
+ uint8_t round;
212
+ int16_t res = a * (int16_t)b;
213
+
214
+ round = get_round(vxrm, res, 4);
215
+ res = (res >> 4) + round;
216
+ return ssub16(env, vxrm, c, res);
217
+}
218
+
219
+static inline int32_t
220
+vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c)
221
+{
222
+ uint8_t round;
223
+ int32_t res = a * (int32_t)b;
224
+
225
+ round = get_round(vxrm, res, 8);
226
+ res = (res >> 8) + round;
227
+ return ssub32(env, vxrm, c, res);
228
+}
229
+
230
+static inline int64_t
231
+vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c)
232
+{
233
+ uint8_t round;
234
+ int64_t res = a * (int64_t)b;
235
+
236
+ round = get_round(vxrm, res, 16);
237
+ res = (res >> 16) + round;
238
+ return ssub64(env, vxrm, c, res);
239
+}
240
+
241
+RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8)
242
+RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16)
243
+RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32)
244
+GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2, clearh)
245
+GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4, clearl)
246
+GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8, clearq)
247
+RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8)
248
+RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16)
249
+RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32)
250
+GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2, clearh)
251
+GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4, clearl)
252
+GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8, clearq)
253
+
254
+static inline int16_t
255
+vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c)
256
+{
257
+ uint8_t round;
258
+ int16_t res = (int16_t)a * b;
259
+
260
+ round = get_round(vxrm, res, 4);
261
+ res = (res >> 4) + round;
262
+ return ssub16(env, vxrm, c, res);
263
+}
264
+
265
+static inline int32_t
266
+vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c)
267
+{
268
+ uint8_t round;
269
+ int32_t res = (int32_t)a * b;
270
+
271
+ round = get_round(vxrm, res, 8);
272
+ res = (res >> 8) + round;
273
+ return ssub32(env, vxrm, c, res);
274
+}
275
+
276
+static inline int64_t
277
+vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c)
278
+{
279
+ uint8_t round;
280
+ int64_t res = (int64_t)a * b;
281
+
282
+ round = get_round(vxrm, res, 16);
283
+ res = (res >> 16) + round;
284
+ return ssub64(env, vxrm, c, res);
285
+}
286
+
287
+RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8)
288
+RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16)
289
+RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32)
290
+GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2, clearh)
291
+GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4, clearl)
292
+GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8, clearq)
293
--
294
2.27.0
295
296
diff view generated by jsdifflib
Deleted patch
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
2
1
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Message-id: 20200623215920.2594-29-zhiwei_liu@c-sky.com
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
9
target/riscv/helper.h | 17 ++++
10
target/riscv/insn32.decode | 6 ++
11
target/riscv/insn_trans/trans_rvv.inc.c | 8 ++
12
target/riscv/vector_helper.c | 117 ++++++++++++++++++++++++
13
4 files changed, 148 insertions(+)
14
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
18
+++ b/target/riscv/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vwsmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
20
DEF_HELPER_6(vwsmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32)
21
DEF_HELPER_6(vwsmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32)
22
DEF_HELPER_6(vwsmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32)
23
+
24
+DEF_HELPER_6(vssrl_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
25
+DEF_HELPER_6(vssrl_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vssrl_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vssrl_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vssra_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vssra_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vssra_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_6(vssra_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_6(vssrl_vx_b, void, ptr, ptr, tl, ptr, env, i32)
33
+DEF_HELPER_6(vssrl_vx_h, void, ptr, ptr, tl, ptr, env, i32)
34
+DEF_HELPER_6(vssrl_vx_w, void, ptr, ptr, tl, ptr, env, i32)
35
+DEF_HELPER_6(vssrl_vx_d, void, ptr, ptr, tl, ptr, env, i32)
36
+DEF_HELPER_6(vssra_vx_b, void, ptr, ptr, tl, ptr, env, i32)
37
+DEF_HELPER_6(vssra_vx_h, void, ptr, ptr, tl, ptr, env, i32)
38
+DEF_HELPER_6(vssra_vx_w, void, ptr, ptr, tl, ptr, env, i32)
39
+DEF_HELPER_6(vssra_vx_d, void, ptr, ptr, tl, ptr, env, i32)
40
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
41
index XXXXXXX..XXXXXXX 100644
42
--- a/target/riscv/insn32.decode
43
+++ b/target/riscv/insn32.decode
44
@@ -XXX,XX +XXX,XX @@ vwsmacc_vx 111101 . ..... ..... 100 ..... 1010111 @r_vm
45
vwsmaccsu_vv 111110 . ..... ..... 000 ..... 1010111 @r_vm
46
vwsmaccsu_vx 111110 . ..... ..... 100 ..... 1010111 @r_vm
47
vwsmaccus_vx 111111 . ..... ..... 100 ..... 1010111 @r_vm
48
+vssrl_vv 101010 . ..... ..... 000 ..... 1010111 @r_vm
49
+vssrl_vx 101010 . ..... ..... 100 ..... 1010111 @r_vm
50
+vssrl_vi 101010 . ..... ..... 011 ..... 1010111 @r_vm
51
+vssra_vv 101011 . ..... ..... 000 ..... 1010111 @r_vm
52
+vssra_vx 101011 . ..... ..... 100 ..... 1010111 @r_vm
53
+vssra_vi 101011 . ..... ..... 011 ..... 1010111 @r_vm
54
55
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
56
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
57
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/target/riscv/insn_trans/trans_rvv.inc.c
60
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
61
@@ -XXX,XX +XXX,XX @@ GEN_OPIVX_WIDEN_TRANS(vwsmaccu_vx)
62
GEN_OPIVX_WIDEN_TRANS(vwsmacc_vx)
63
GEN_OPIVX_WIDEN_TRANS(vwsmaccsu_vx)
64
GEN_OPIVX_WIDEN_TRANS(vwsmaccus_vx)
65
+
66
+/* Vector Single-Width Scaling Shift Instructions */
67
+GEN_OPIVV_TRANS(vssrl_vv, opivv_check)
68
+GEN_OPIVV_TRANS(vssra_vv, opivv_check)
69
+GEN_OPIVX_TRANS(vssrl_vx, opivx_check)
70
+GEN_OPIVX_TRANS(vssra_vx, opivx_check)
71
+GEN_OPIVI_TRANS(vssrl_vi, 1, vssrl_vx, opivx_check)
72
+GEN_OPIVI_TRANS(vssra_vi, 0, vssra_vx, opivx_check)
73
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/target/riscv/vector_helper.c
76
+++ b/target/riscv/vector_helper.c
77
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32)
78
GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2, clearh)
79
GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4, clearl)
80
GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8, clearq)
81
+
82
+/* Vector Single-Width Scaling Shift Instructions */
83
+static inline uint8_t
84
+vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
85
+{
86
+ uint8_t round, shift = b & 0x7;
87
+ uint8_t res;
88
+
89
+ round = get_round(vxrm, a, shift);
90
+ res = (a >> shift) + round;
91
+ return res;
92
+}
93
+static inline uint16_t
94
+vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
95
+{
96
+ uint8_t round, shift = b & 0xf;
97
+ uint16_t res;
98
+
99
+ round = get_round(vxrm, a, shift);
100
+ res = (a >> shift) + round;
101
+ return res;
102
+}
103
+static inline uint32_t
104
+vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
105
+{
106
+ uint8_t round, shift = b & 0x1f;
107
+ uint32_t res;
108
+
109
+ round = get_round(vxrm, a, shift);
110
+ res = (a >> shift) + round;
111
+ return res;
112
+}
113
+static inline uint64_t
114
+vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
115
+{
116
+ uint8_t round, shift = b & 0x3f;
117
+ uint64_t res;
118
+
119
+ round = get_round(vxrm, a, shift);
120
+ res = (a >> shift) + round;
121
+ return res;
122
+}
123
+RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
124
+RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
125
+RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
126
+RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
127
+GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1, clearb)
128
+GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2, clearh)
129
+GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4, clearl)
130
+GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8, clearq)
131
+
132
+RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
133
+RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
134
+RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
135
+RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
136
+GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1, clearb)
137
+GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2, clearh)
138
+GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4, clearl)
139
+GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8, clearq)
140
+
141
+static inline int8_t
142
+vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
143
+{
144
+ uint8_t round, shift = b & 0x7;
145
+ int8_t res;
146
+
147
+ round = get_round(vxrm, a, shift);
148
+ res = (a >> shift) + round;
149
+ return res;
150
+}
151
+static inline int16_t
152
+vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
153
+{
154
+ uint8_t round, shift = b & 0xf;
155
+ int16_t res;
156
+
157
+ round = get_round(vxrm, a, shift);
158
+ res = (a >> shift) + round;
159
+ return res;
160
+}
161
+static inline int32_t
162
+vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
163
+{
164
+ uint8_t round, shift = b & 0x1f;
165
+ int32_t res;
166
+
167
+ round = get_round(vxrm, a, shift);
168
+ res = (a >> shift) + round;
169
+ return res;
170
+}
171
+static inline int64_t
172
+vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
173
+{
174
+ uint8_t round, shift = b & 0x3f;
175
+ int64_t res;
176
+
177
+ round = get_round(vxrm, a, shift);
178
+ res = (a >> shift) + round;
179
+ return res;
180
+}
181
+RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
182
+RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
183
+RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
184
+RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
185
+GEN_VEXT_VV_RM(vssra_vv_b, 1, 1, clearb)
186
+GEN_VEXT_VV_RM(vssra_vv_h, 2, 2, clearh)
187
+GEN_VEXT_VV_RM(vssra_vv_w, 4, 4, clearl)
188
+GEN_VEXT_VV_RM(vssra_vv_d, 8, 8, clearq)
189
+
190
+RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
191
+RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
192
+RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
193
+RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
194
+GEN_VEXT_VX_RM(vssra_vx_b, 1, 1, clearb)
195
+GEN_VEXT_VX_RM(vssra_vx_h, 2, 2, clearh)
196
+GEN_VEXT_VX_RM(vssra_vx_w, 4, 4, clearl)
197
+GEN_VEXT_VX_RM(vssra_vx_d, 8, 8, clearq)
198
--
199
2.27.0
200
201
diff view generated by jsdifflib
Deleted patch
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
2
1
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Message-id: 20200623215920.2594-30-zhiwei_liu@c-sky.com
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
9
target/riscv/helper.h | 13 +++
10
target/riscv/insn32.decode | 6 +
11
target/riscv/insn_trans/trans_rvv.inc.c | 8 ++
12
target/riscv/vector_helper.c | 141 ++++++++++++++++++++++++
13
4 files changed, 168 insertions(+)
14
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
18
+++ b/target/riscv/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vssra_vx_b, void, ptr, ptr, tl, ptr, env, i32)
20
DEF_HELPER_6(vssra_vx_h, void, ptr, ptr, tl, ptr, env, i32)
21
DEF_HELPER_6(vssra_vx_w, void, ptr, ptr, tl, ptr, env, i32)
22
DEF_HELPER_6(vssra_vx_d, void, ptr, ptr, tl, ptr, env, i32)
23
+
24
+DEF_HELPER_6(vnclip_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
25
+DEF_HELPER_6(vnclip_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vnclip_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vnclipu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vnclipu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vnclipu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vnclipu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
31
+DEF_HELPER_6(vnclipu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
32
+DEF_HELPER_6(vnclipu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
33
+DEF_HELPER_6(vnclip_vx_b, void, ptr, ptr, tl, ptr, env, i32)
34
+DEF_HELPER_6(vnclip_vx_h, void, ptr, ptr, tl, ptr, env, i32)
35
+DEF_HELPER_6(vnclip_vx_w, void, ptr, ptr, tl, ptr, env, i32)
36
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/riscv/insn32.decode
39
+++ b/target/riscv/insn32.decode
40
@@ -XXX,XX +XXX,XX @@ vssrl_vi 101010 . ..... ..... 011 ..... 1010111 @r_vm
41
vssra_vv 101011 . ..... ..... 000 ..... 1010111 @r_vm
42
vssra_vx 101011 . ..... ..... 100 ..... 1010111 @r_vm
43
vssra_vi 101011 . ..... ..... 011 ..... 1010111 @r_vm
44
+vnclipu_vv 101110 . ..... ..... 000 ..... 1010111 @r_vm
45
+vnclipu_vx 101110 . ..... ..... 100 ..... 1010111 @r_vm
46
+vnclipu_vi 101110 . ..... ..... 011 ..... 1010111 @r_vm
47
+vnclip_vv 101111 . ..... ..... 000 ..... 1010111 @r_vm
48
+vnclip_vx 101111 . ..... ..... 100 ..... 1010111 @r_vm
49
+vnclip_vi 101111 . ..... ..... 011 ..... 1010111 @r_vm
50
51
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
52
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
53
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
54
index XXXXXXX..XXXXXXX 100644
55
--- a/target/riscv/insn_trans/trans_rvv.inc.c
56
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
57
@@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vssrl_vx, opivx_check)
58
GEN_OPIVX_TRANS(vssra_vx, opivx_check)
59
GEN_OPIVI_TRANS(vssrl_vi, 1, vssrl_vx, opivx_check)
60
GEN_OPIVI_TRANS(vssra_vi, 0, vssra_vx, opivx_check)
61
+
62
+/* Vector Narrowing Fixed-Point Clip Instructions */
63
+GEN_OPIVV_NARROW_TRANS(vnclipu_vv)
64
+GEN_OPIVV_NARROW_TRANS(vnclip_vv)
65
+GEN_OPIVX_NARROW_TRANS(vnclipu_vx)
66
+GEN_OPIVX_NARROW_TRANS(vnclip_vx)
67
+GEN_OPIVI_NARROW_TRANS(vnclipu_vi, 1, vnclipu_vx)
68
+GEN_OPIVI_NARROW_TRANS(vnclip_vi, 1, vnclip_vx)
69
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/target/riscv/vector_helper.c
72
+++ b/target/riscv/vector_helper.c
73
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl)
74
#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
75
#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
76
#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
77
+#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
78
+#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
79
+#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
80
+#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
81
+#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
82
+#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
83
84
/* operation of two vector elements */
85
typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
86
@@ -XXX,XX +XXX,XX @@ vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
87
res = (a >> shift) + round;
88
return res;
89
}
90
+
91
RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
92
RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
93
RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
94
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX_RM(vssra_vx_b, 1, 1, clearb)
95
GEN_VEXT_VX_RM(vssra_vx_h, 2, 2, clearh)
96
GEN_VEXT_VX_RM(vssra_vx_w, 4, 4, clearl)
97
GEN_VEXT_VX_RM(vssra_vx_d, 8, 8, clearq)
98
+
99
+/* Vector Narrowing Fixed-Point Clip Instructions */
100
+static inline int8_t
101
+vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
102
+{
103
+ uint8_t round, shift = b & 0xf;
104
+ int16_t res;
105
+
106
+ round = get_round(vxrm, a, shift);
107
+ res = (a >> shift) + round;
108
+ if (res > INT8_MAX) {
109
+ env->vxsat = 0x1;
110
+ return INT8_MAX;
111
+ } else if (res < INT8_MIN) {
112
+ env->vxsat = 0x1;
113
+ return INT8_MIN;
114
+ } else {
115
+ return res;
116
+ }
117
+}
118
+
119
+static inline int16_t
120
+vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
121
+{
122
+ uint8_t round, shift = b & 0x1f;
123
+ int32_t res;
124
+
125
+ round = get_round(vxrm, a, shift);
126
+ res = (a >> shift) + round;
127
+ if (res > INT16_MAX) {
128
+ env->vxsat = 0x1;
129
+ return INT16_MAX;
130
+ } else if (res < INT16_MIN) {
131
+ env->vxsat = 0x1;
132
+ return INT16_MIN;
133
+ } else {
134
+ return res;
135
+ }
136
+}
137
+
138
+static inline int32_t
139
+vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
140
+{
141
+ uint8_t round, shift = b & 0x3f;
142
+ int64_t res;
143
+
144
+ round = get_round(vxrm, a, shift);
145
+ res = (a >> shift) + round;
146
+ if (res > INT32_MAX) {
147
+ env->vxsat = 0x1;
148
+ return INT32_MAX;
149
+ } else if (res < INT32_MIN) {
150
+ env->vxsat = 0x1;
151
+ return INT32_MIN;
152
+ } else {
153
+ return res;
154
+ }
155
+}
156
+
157
+RVVCALL(OPIVV2_RM, vnclip_vv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
158
+RVVCALL(OPIVV2_RM, vnclip_vv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
159
+RVVCALL(OPIVV2_RM, vnclip_vv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
160
+GEN_VEXT_VV_RM(vnclip_vv_b, 1, 1, clearb)
161
+GEN_VEXT_VV_RM(vnclip_vv_h, 2, 2, clearh)
162
+GEN_VEXT_VV_RM(vnclip_vv_w, 4, 4, clearl)
163
+
164
+RVVCALL(OPIVX2_RM, vnclip_vx_b, NOP_SSS_B, H1, H2, vnclip8)
165
+RVVCALL(OPIVX2_RM, vnclip_vx_h, NOP_SSS_H, H2, H4, vnclip16)
166
+RVVCALL(OPIVX2_RM, vnclip_vx_w, NOP_SSS_W, H4, H8, vnclip32)
167
+GEN_VEXT_VX_RM(vnclip_vx_b, 1, 1, clearb)
168
+GEN_VEXT_VX_RM(vnclip_vx_h, 2, 2, clearh)
169
+GEN_VEXT_VX_RM(vnclip_vx_w, 4, 4, clearl)
170
+
171
+static inline uint8_t
172
+vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
173
+{
174
+ uint8_t round, shift = b & 0xf;
175
+ uint16_t res;
176
+
177
+ round = get_round(vxrm, a, shift);
178
+ res = (a >> shift) + round;
179
+ if (res > UINT8_MAX) {
180
+ env->vxsat = 0x1;
181
+ return UINT8_MAX;
182
+ } else {
183
+ return res;
184
+ }
185
+}
186
+
187
+static inline uint16_t
188
+vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
189
+{
190
+ uint8_t round, shift = b & 0x1f;
191
+ uint32_t res;
192
+
193
+ round = get_round(vxrm, a, shift);
194
+ res = (a >> shift) + round;
195
+ if (res > UINT16_MAX) {
196
+ env->vxsat = 0x1;
197
+ return UINT16_MAX;
198
+ } else {
199
+ return res;
200
+ }
201
+}
202
+
203
+static inline uint32_t
204
+vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
205
+{
206
+ uint8_t round, shift = b & 0x3f;
207
+ int64_t res;
208
+
209
+ round = get_round(vxrm, a, shift);
210
+ res = (a >> shift) + round;
211
+ if (res > UINT32_MAX) {
212
+ env->vxsat = 0x1;
213
+ return UINT32_MAX;
214
+ } else {
215
+ return res;
216
+ }
217
+}
218
+
219
+RVVCALL(OPIVV2_RM, vnclipu_vv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
220
+RVVCALL(OPIVV2_RM, vnclipu_vv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
221
+RVVCALL(OPIVV2_RM, vnclipu_vv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
222
+GEN_VEXT_VV_RM(vnclipu_vv_b, 1, 1, clearb)
223
+GEN_VEXT_VV_RM(vnclipu_vv_h, 2, 2, clearh)
224
+GEN_VEXT_VV_RM(vnclipu_vv_w, 4, 4, clearl)
225
+
226
+RVVCALL(OPIVX2_RM, vnclipu_vx_b, NOP_UUU_B, H1, H2, vnclipu8)
227
+RVVCALL(OPIVX2_RM, vnclipu_vx_h, NOP_UUU_H, H2, H4, vnclipu16)
228
+RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32)
229
+GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1, clearb)
230
+GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2, clearh)
231
+GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4, clearl)
232
--
233
2.27.0
234
235
diff view generated by jsdifflib
Deleted patch
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
2
1
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Message-id: 20200623215920.2594-32-zhiwei_liu@c-sky.com
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
9
target/riscv/helper.h | 17 +++
10
target/riscv/insn32.decode | 8 ++
11
target/riscv/insn_trans/trans_rvv.inc.c | 149 ++++++++++++++++++++++++
12
target/riscv/vector_helper.c | 83 +++++++++++++
13
4 files changed, 257 insertions(+)
14
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
18
+++ b/target/riscv/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfsub_vf_d, void, ptr, ptr, i64, ptr, env, i32)
20
DEF_HELPER_6(vfrsub_vf_h, void, ptr, ptr, i64, ptr, env, i32)
21
DEF_HELPER_6(vfrsub_vf_w, void, ptr, ptr, i64, ptr, env, i32)
22
DEF_HELPER_6(vfrsub_vf_d, void, ptr, ptr, i64, ptr, env, i32)
23
+
24
+DEF_HELPER_6(vfwadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
25
+DEF_HELPER_6(vfwadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vfwsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vfwsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vfwadd_wv_h, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vfwadd_wv_w, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vfwsub_wv_h, void, ptr, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_6(vfwsub_wv_w, void, ptr, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_6(vfwadd_vf_h, void, ptr, ptr, i64, ptr, env, i32)
33
+DEF_HELPER_6(vfwadd_vf_w, void, ptr, ptr, i64, ptr, env, i32)
34
+DEF_HELPER_6(vfwsub_vf_h, void, ptr, ptr, i64, ptr, env, i32)
35
+DEF_HELPER_6(vfwsub_vf_w, void, ptr, ptr, i64, ptr, env, i32)
36
+DEF_HELPER_6(vfwadd_wf_h, void, ptr, ptr, i64, ptr, env, i32)
37
+DEF_HELPER_6(vfwadd_wf_w, void, ptr, ptr, i64, ptr, env, i32)
38
+DEF_HELPER_6(vfwsub_wf_h, void, ptr, ptr, i64, ptr, env, i32)
39
+DEF_HELPER_6(vfwsub_wf_w, void, ptr, ptr, i64, ptr, env, i32)
40
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
41
index XXXXXXX..XXXXXXX 100644
42
--- a/target/riscv/insn32.decode
43
+++ b/target/riscv/insn32.decode
44
@@ -XXX,XX +XXX,XX @@ vfadd_vf 000000 . ..... ..... 101 ..... 1010111 @r_vm
45
vfsub_vv 000010 . ..... ..... 001 ..... 1010111 @r_vm
46
vfsub_vf 000010 . ..... ..... 101 ..... 1010111 @r_vm
47
vfrsub_vf 100111 . ..... ..... 101 ..... 1010111 @r_vm
48
+vfwadd_vv 110000 . ..... ..... 001 ..... 1010111 @r_vm
49
+vfwadd_vf 110000 . ..... ..... 101 ..... 1010111 @r_vm
50
+vfwadd_wv 110100 . ..... ..... 001 ..... 1010111 @r_vm
51
+vfwadd_wf 110100 . ..... ..... 101 ..... 1010111 @r_vm
52
+vfwsub_vv 110010 . ..... ..... 001 ..... 1010111 @r_vm
53
+vfwsub_vf 110010 . ..... ..... 101 ..... 1010111 @r_vm
54
+vfwsub_wv 110110 . ..... ..... 001 ..... 1010111 @r_vm
55
+vfwsub_wf 110110 . ..... ..... 101 ..... 1010111 @r_vm
56
57
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
58
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
59
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
60
index XXXXXXX..XXXXXXX 100644
61
--- a/target/riscv/insn_trans/trans_rvv.inc.c
62
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
63
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
64
GEN_OPFVF_TRANS(vfadd_vf, opfvf_check)
65
GEN_OPFVF_TRANS(vfsub_vf, opfvf_check)
66
GEN_OPFVF_TRANS(vfrsub_vf, opfvf_check)
67
+
68
+/* Vector Widening Floating-Point Add/Subtract Instructions */
69
+static bool opfvv_widen_check(DisasContext *s, arg_rmrr *a)
70
+{
71
+ return (vext_check_isa_ill(s) &&
72
+ vext_check_overlap_mask(s, a->rd, a->vm, true) &&
73
+ vext_check_reg(s, a->rd, true) &&
74
+ vext_check_reg(s, a->rs2, false) &&
75
+ vext_check_reg(s, a->rs1, false) &&
76
+ vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2,
77
+ 1 << s->lmul) &&
78
+ vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1,
79
+ 1 << s->lmul) &&
80
+ (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0));
81
+}
82
+
83
+/* OPFVV with WIDEN */
84
+#define GEN_OPFVV_WIDEN_TRANS(NAME, CHECK) \
85
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
86
+{ \
87
+ if (CHECK(s, a)) { \
88
+ uint32_t data = 0; \
89
+ static gen_helper_gvec_4_ptr * const fns[2] = { \
90
+ gen_helper_##NAME##_h, gen_helper_##NAME##_w, \
91
+ }; \
92
+ TCGLabel *over = gen_new_label(); \
93
+ gen_set_rm(s, 7); \
94
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
95
+ \
96
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
97
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
98
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
99
+ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
100
+ vreg_ofs(s, a->rs1), \
101
+ vreg_ofs(s, a->rs2), cpu_env, 0, \
102
+ s->vlen / 8, data, fns[s->sew - 1]); \
103
+ gen_set_label(over); \
104
+ return true; \
105
+ } \
106
+ return false; \
107
+}
108
+
109
+GEN_OPFVV_WIDEN_TRANS(vfwadd_vv, opfvv_widen_check)
110
+GEN_OPFVV_WIDEN_TRANS(vfwsub_vv, opfvv_widen_check)
111
+
112
+static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a)
113
+{
114
+ return (vext_check_isa_ill(s) &&
115
+ vext_check_overlap_mask(s, a->rd, a->vm, true) &&
116
+ vext_check_reg(s, a->rd, true) &&
117
+ vext_check_reg(s, a->rs2, false) &&
118
+ vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2,
119
+ 1 << s->lmul) &&
120
+ (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0));
121
+}
122
+
123
+/* OPFVF with WIDEN */
124
+#define GEN_OPFVF_WIDEN_TRANS(NAME) \
125
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
126
+{ \
127
+ if (opfvf_widen_check(s, a)) { \
128
+ uint32_t data = 0; \
129
+ static gen_helper_opfvf *const fns[2] = { \
130
+ gen_helper_##NAME##_h, gen_helper_##NAME##_w, \
131
+ }; \
132
+ gen_set_rm(s, 7); \
133
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
134
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
135
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
136
+ return opfvf_trans(a->rd, a->rs1, a->rs2, data, \
137
+ fns[s->sew - 1], s); \
138
+ } \
139
+ return false; \
140
+}
141
+
142
+GEN_OPFVF_WIDEN_TRANS(vfwadd_vf)
143
+GEN_OPFVF_WIDEN_TRANS(vfwsub_vf)
144
+
145
+static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a)
146
+{
147
+ return (vext_check_isa_ill(s) &&
148
+ vext_check_overlap_mask(s, a->rd, a->vm, true) &&
149
+ vext_check_reg(s, a->rd, true) &&
150
+ vext_check_reg(s, a->rs2, true) &&
151
+ vext_check_reg(s, a->rs1, false) &&
152
+ vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1,
153
+ 1 << s->lmul) &&
154
+ (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0));
155
+}
156
+
157
+/* WIDEN OPFVV with WIDEN */
158
+#define GEN_OPFWV_WIDEN_TRANS(NAME) \
159
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
160
+{ \
161
+ if (opfwv_widen_check(s, a)) { \
162
+ uint32_t data = 0; \
163
+ static gen_helper_gvec_4_ptr * const fns[2] = { \
164
+ gen_helper_##NAME##_h, gen_helper_##NAME##_w, \
165
+ }; \
166
+ TCGLabel *over = gen_new_label(); \
167
+ gen_set_rm(s, 7); \
168
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
169
+ \
170
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
171
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
172
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
173
+ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
174
+ vreg_ofs(s, a->rs1), \
175
+ vreg_ofs(s, a->rs2), cpu_env, 0, \
176
+ s->vlen / 8, data, fns[s->sew - 1]); \
177
+ gen_set_label(over); \
178
+ return true; \
179
+ } \
180
+ return false; \
181
+}
182
+
183
+GEN_OPFWV_WIDEN_TRANS(vfwadd_wv)
184
+GEN_OPFWV_WIDEN_TRANS(vfwsub_wv)
185
+
186
+static bool opfwf_widen_check(DisasContext *s, arg_rmrr *a)
187
+{
188
+ return (vext_check_isa_ill(s) &&
189
+ vext_check_overlap_mask(s, a->rd, a->vm, true) &&
190
+ vext_check_reg(s, a->rd, true) &&
191
+ vext_check_reg(s, a->rs2, true) &&
192
+ (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0));
193
+}
194
+
195
+/* WIDEN OPFVF with WIDEN */
196
+#define GEN_OPFWF_WIDEN_TRANS(NAME) \
197
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
198
+{ \
199
+ if (opfwf_widen_check(s, a)) { \
200
+ uint32_t data = 0; \
201
+ static gen_helper_opfvf *const fns[2] = { \
202
+ gen_helper_##NAME##_h, gen_helper_##NAME##_w, \
203
+ }; \
204
+ gen_set_rm(s, 7); \
205
+ data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
206
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
207
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
208
+ return opfvf_trans(a->rd, a->rs1, a->rs2, data, \
209
+ fns[s->sew - 1], s); \
210
+ } \
211
+ return false; \
212
+}
213
+
214
+GEN_OPFWF_WIDEN_TRANS(vfwadd_wf)
215
+GEN_OPFWF_WIDEN_TRANS(vfwsub_wf)
216
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
217
index XXXXXXX..XXXXXXX 100644
218
--- a/target/riscv/vector_helper.c
219
+++ b/target/riscv/vector_helper.c
220
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
221
GEN_VEXT_VF(vfrsub_vf_h, 2, 2, clearh)
222
GEN_VEXT_VF(vfrsub_vf_w, 4, 4, clearl)
223
GEN_VEXT_VF(vfrsub_vf_d, 8, 8, clearq)
224
+
225
+/* Vector Widening Floating-Point Add/Subtract Instructions */
226
+static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
227
+{
228
+ return float32_add(float16_to_float32(a, true, s),
229
+ float16_to_float32(b, true, s), s);
230
+}
231
+
232
+static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
233
+{
234
+ return float64_add(float32_to_float64(a, s),
235
+ float32_to_float64(b, s), s);
236
+
237
+}
238
+
239
+RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
240
+RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
241
+GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4, clearl)
242
+GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8, clearq)
243
+RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
244
+RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
245
+GEN_VEXT_VF(vfwadd_vf_h, 2, 4, clearl)
246
+GEN_VEXT_VF(vfwadd_vf_w, 4, 8, clearq)
247
+
248
+static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
249
+{
250
+ return float32_sub(float16_to_float32(a, true, s),
251
+ float16_to_float32(b, true, s), s);
252
+}
253
+
254
+static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
255
+{
256
+ return float64_sub(float32_to_float64(a, s),
257
+ float32_to_float64(b, s), s);
258
+
259
+}
260
+
261
+RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
262
+RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
263
+GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4, clearl)
264
+GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8, clearq)
265
+RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
266
+RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
267
+GEN_VEXT_VF(vfwsub_vf_h, 2, 4, clearl)
268
+GEN_VEXT_VF(vfwsub_vf_w, 4, 8, clearq)
269
+
270
+static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
271
+{
272
+ return float32_add(a, float16_to_float32(b, true, s), s);
273
+}
274
+
275
+static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
276
+{
277
+ return float64_add(a, float32_to_float64(b, s), s);
278
+}
279
+
280
+RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
281
+RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
282
+GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4, clearl)
283
+GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8, clearq)
284
+RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
285
+RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
286
+GEN_VEXT_VF(vfwadd_wf_h, 2, 4, clearl)
287
+GEN_VEXT_VF(vfwadd_wf_w, 4, 8, clearq)
288
+
289
+static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
290
+{
291
+ return float32_sub(a, float16_to_float32(b, true, s), s);
292
+}
293
+
294
+static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
295
+{
296
+ return float64_sub(a, float32_to_float64(b, s), s);
297
+}
298
+
299
+RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
300
+RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
301
+GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4, clearl)
302
+GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8, clearq)
303
+RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
304
+RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
305
+GEN_VEXT_VF(vfwsub_wf_h, 2, 4, clearl)
306
+GEN_VEXT_VF(vfwsub_wf_w, 4, 8, clearq)
307
--
308
2.27.0
309
310
diff view generated by jsdifflib
Deleted patch
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
2
1
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Message-id: 20200623215920.2594-33-zhiwei_liu@c-sky.com
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
9
target/riscv/helper.h | 16 ++++++++
10
target/riscv/insn32.decode | 5 +++
11
target/riscv/insn_trans/trans_rvv.inc.c | 7 ++++
12
target/riscv/vector_helper.c | 49 +++++++++++++++++++++++++
13
4 files changed, 77 insertions(+)
14
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
18
+++ b/target/riscv/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfwadd_wf_h, void, ptr, ptr, i64, ptr, env, i32)
20
DEF_HELPER_6(vfwadd_wf_w, void, ptr, ptr, i64, ptr, env, i32)
21
DEF_HELPER_6(vfwsub_wf_h, void, ptr, ptr, i64, ptr, env, i32)
22
DEF_HELPER_6(vfwsub_wf_w, void, ptr, ptr, i64, ptr, env, i32)
23
+
24
+DEF_HELPER_6(vfmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
25
+DEF_HELPER_6(vfmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vfmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vfdiv_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vfdiv_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vfdiv_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vfmul_vf_h, void, ptr, ptr, i64, ptr, env, i32)
31
+DEF_HELPER_6(vfmul_vf_w, void, ptr, ptr, i64, ptr, env, i32)
32
+DEF_HELPER_6(vfmul_vf_d, void, ptr, ptr, i64, ptr, env, i32)
33
+DEF_HELPER_6(vfdiv_vf_h, void, ptr, ptr, i64, ptr, env, i32)
34
+DEF_HELPER_6(vfdiv_vf_w, void, ptr, ptr, i64, ptr, env, i32)
35
+DEF_HELPER_6(vfdiv_vf_d, void, ptr, ptr, i64, ptr, env, i32)
36
+DEF_HELPER_6(vfrdiv_vf_h, void, ptr, ptr, i64, ptr, env, i32)
37
+DEF_HELPER_6(vfrdiv_vf_w, void, ptr, ptr, i64, ptr, env, i32)
38
+DEF_HELPER_6(vfrdiv_vf_d, void, ptr, ptr, i64, ptr, env, i32)
39
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
40
index XXXXXXX..XXXXXXX 100644
41
--- a/target/riscv/insn32.decode
42
+++ b/target/riscv/insn32.decode
43
@@ -XXX,XX +XXX,XX @@ vfwsub_vv 110010 . ..... ..... 001 ..... 1010111 @r_vm
44
vfwsub_vf 110010 . ..... ..... 101 ..... 1010111 @r_vm
45
vfwsub_wv 110110 . ..... ..... 001 ..... 1010111 @r_vm
46
vfwsub_wf 110110 . ..... ..... 101 ..... 1010111 @r_vm
47
+vfmul_vv 100100 . ..... ..... 001 ..... 1010111 @r_vm
48
+vfmul_vf 100100 . ..... ..... 101 ..... 1010111 @r_vm
49
+vfdiv_vv 100000 . ..... ..... 001 ..... 1010111 @r_vm
50
+vfdiv_vf 100000 . ..... ..... 101 ..... 1010111 @r_vm
51
+vfrdiv_vf 100001 . ..... ..... 101 ..... 1010111 @r_vm
52
53
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
54
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
55
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/riscv/insn_trans/trans_rvv.inc.c
58
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
59
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
60
61
GEN_OPFWF_WIDEN_TRANS(vfwadd_wf)
62
GEN_OPFWF_WIDEN_TRANS(vfwsub_wf)
63
+
64
+/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
65
+GEN_OPFVV_TRANS(vfmul_vv, opfvv_check)
66
+GEN_OPFVV_TRANS(vfdiv_vv, opfvv_check)
67
+GEN_OPFVF_TRANS(vfmul_vf, opfvf_check)
68
+GEN_OPFVF_TRANS(vfdiv_vf, opfvf_check)
69
+GEN_OPFVF_TRANS(vfrdiv_vf, opfvf_check)
70
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
71
index XXXXXXX..XXXXXXX 100644
72
--- a/target/riscv/vector_helper.c
73
+++ b/target/riscv/vector_helper.c
74
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
75
RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
76
GEN_VEXT_VF(vfwsub_wf_h, 2, 4, clearl)
77
GEN_VEXT_VF(vfwsub_wf_w, 4, 8, clearq)
78
+
79
+/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
80
+RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
81
+RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
82
+RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
83
+GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2, clearh)
84
+GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4, clearl)
85
+GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8, clearq)
86
+RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
87
+RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
88
+RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
89
+GEN_VEXT_VF(vfmul_vf_h, 2, 2, clearh)
90
+GEN_VEXT_VF(vfmul_vf_w, 4, 4, clearl)
91
+GEN_VEXT_VF(vfmul_vf_d, 8, 8, clearq)
92
+
93
+RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
94
+RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
95
+RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
96
+GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2, clearh)
97
+GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4, clearl)
98
+GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8, clearq)
99
+RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
100
+RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
101
+RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
102
+GEN_VEXT_VF(vfdiv_vf_h, 2, 2, clearh)
103
+GEN_VEXT_VF(vfdiv_vf_w, 4, 4, clearl)
104
+GEN_VEXT_VF(vfdiv_vf_d, 8, 8, clearq)
105
+
106
+static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
107
+{
108
+ return float16_div(b, a, s);
109
+}
110
+
111
+static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
112
+{
113
+ return float32_div(b, a, s);
114
+}
115
+
116
+static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
117
+{
118
+ return float64_div(b, a, s);
119
+}
120
+
121
+RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
122
+RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
123
+RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
124
+GEN_VEXT_VF(vfrdiv_vf_h, 2, 2, clearh)
125
+GEN_VEXT_VF(vfrdiv_vf_w, 4, 4, clearl)
126
+GEN_VEXT_VF(vfrdiv_vf_d, 8, 8, clearq)
127
--
128
2.27.0
129
130
diff view generated by jsdifflib
Deleted patch
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
2
1
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Message-id: 20200623215920.2594-34-zhiwei_liu@c-sky.com
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
9
target/riscv/helper.h | 5 +++++
10
target/riscv/insn32.decode | 2 ++
11
target/riscv/insn_trans/trans_rvv.inc.c | 4 ++++
12
target/riscv/vector_helper.c | 22 ++++++++++++++++++++++
13
4 files changed, 33 insertions(+)
14
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
18
+++ b/target/riscv/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfdiv_vf_d, void, ptr, ptr, i64, ptr, env, i32)
20
DEF_HELPER_6(vfrdiv_vf_h, void, ptr, ptr, i64, ptr, env, i32)
21
DEF_HELPER_6(vfrdiv_vf_w, void, ptr, ptr, i64, ptr, env, i32)
22
DEF_HELPER_6(vfrdiv_vf_d, void, ptr, ptr, i64, ptr, env, i32)
23
+
24
+DEF_HELPER_6(vfwmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
25
+DEF_HELPER_6(vfwmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vfwmul_vf_h, void, ptr, ptr, i64, ptr, env, i32)
27
+DEF_HELPER_6(vfwmul_vf_w, void, ptr, ptr, i64, ptr, env, i32)
28
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/riscv/insn32.decode
31
+++ b/target/riscv/insn32.decode
32
@@ -XXX,XX +XXX,XX @@ vfmul_vf 100100 . ..... ..... 101 ..... 1010111 @r_vm
33
vfdiv_vv 100000 . ..... ..... 001 ..... 1010111 @r_vm
34
vfdiv_vf 100000 . ..... ..... 101 ..... 1010111 @r_vm
35
vfrdiv_vf 100001 . ..... ..... 101 ..... 1010111 @r_vm
36
+vfwmul_vv 111000 . ..... ..... 001 ..... 1010111 @r_vm
37
+vfwmul_vf 111000 . ..... ..... 101 ..... 1010111 @r_vm
38
39
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
40
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
41
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
42
index XXXXXXX..XXXXXXX 100644
43
--- a/target/riscv/insn_trans/trans_rvv.inc.c
44
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
45
@@ -XXX,XX +XXX,XX @@ GEN_OPFVV_TRANS(vfdiv_vv, opfvv_check)
46
GEN_OPFVF_TRANS(vfmul_vf, opfvf_check)
47
GEN_OPFVF_TRANS(vfdiv_vf, opfvf_check)
48
GEN_OPFVF_TRANS(vfrdiv_vf, opfvf_check)
49
+
50
+/* Vector Widening Floating-Point Multiply */
51
+GEN_OPFVV_WIDEN_TRANS(vfwmul_vv, opfvv_widen_check)
52
+GEN_OPFVF_WIDEN_TRANS(vfwmul_vf)
53
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
54
index XXXXXXX..XXXXXXX 100644
55
--- a/target/riscv/vector_helper.c
56
+++ b/target/riscv/vector_helper.c
57
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
58
GEN_VEXT_VF(vfrdiv_vf_h, 2, 2, clearh)
59
GEN_VEXT_VF(vfrdiv_vf_w, 4, 4, clearl)
60
GEN_VEXT_VF(vfrdiv_vf_d, 8, 8, clearq)
61
+
62
+/* Vector Widening Floating-Point Multiply */
63
+static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
64
+{
65
+ return float32_mul(float16_to_float32(a, true, s),
66
+ float16_to_float32(b, true, s), s);
67
+}
68
+
69
+static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
70
+{
71
+ return float64_mul(float32_to_float64(a, s),
72
+ float32_to_float64(b, s), s);
73
+
74
+}
75
+RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
76
+RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
77
+GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4, clearl)
78
+GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8, clearq)
79
+RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
80
+RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
81
+GEN_VEXT_VF(vfwmul_vf_h, 2, 4, clearl)
82
+GEN_VEXT_VF(vfwmul_vf_w, 4, 8, clearq)
83
--
84
2.27.0
85
86
diff view generated by jsdifflib
Deleted patch
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
2
1
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Message-id: 20200623215920.2594-35-zhiwei_liu@c-sky.com
7
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
8
---
9
target/riscv/helper.h | 49 +++++
10
target/riscv/insn32.decode | 16 ++
11
target/riscv/insn_trans/trans_rvv.inc.c | 18 ++
12
target/riscv/vector_helper.c | 251 ++++++++++++++++++++++++
13
4 files changed, 334 insertions(+)
14
15
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/helper.h
18
+++ b/target/riscv/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfwmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
20
DEF_HELPER_6(vfwmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
21
DEF_HELPER_6(vfwmul_vf_h, void, ptr, ptr, i64, ptr, env, i32)
22
DEF_HELPER_6(vfwmul_vf_w, void, ptr, ptr, i64, ptr, env, i32)
23
+
24
+DEF_HELPER_6(vfmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
25
+DEF_HELPER_6(vfmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
26
+DEF_HELPER_6(vfmacc_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_6(vfnmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_6(vfnmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_6(vfnmacc_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_6(vfmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_6(vfmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_6(vfmsac_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
33
+DEF_HELPER_6(vfnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
34
+DEF_HELPER_6(vfnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
35
+DEF_HELPER_6(vfnmsac_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
36
+DEF_HELPER_6(vfmadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
37
+DEF_HELPER_6(vfmadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
38
+DEF_HELPER_6(vfmadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
39
+DEF_HELPER_6(vfnmadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
40
+DEF_HELPER_6(vfnmadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
41
+DEF_HELPER_6(vfnmadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
42
+DEF_HELPER_6(vfmsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
43
+DEF_HELPER_6(vfmsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
44
+DEF_HELPER_6(vfmsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
45
+DEF_HELPER_6(vfnmsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
46
+DEF_HELPER_6(vfnmsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
47
+DEF_HELPER_6(vfnmsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
48
+DEF_HELPER_6(vfmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32)
49
+DEF_HELPER_6(vfmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32)
50
+DEF_HELPER_6(vfmacc_vf_d, void, ptr, ptr, i64, ptr, env, i32)
51
+DEF_HELPER_6(vfnmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32)
52
+DEF_HELPER_6(vfnmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32)
53
+DEF_HELPER_6(vfnmacc_vf_d, void, ptr, ptr, i64, ptr, env, i32)
54
+DEF_HELPER_6(vfmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32)
55
+DEF_HELPER_6(vfmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32)
56
+DEF_HELPER_6(vfmsac_vf_d, void, ptr, ptr, i64, ptr, env, i32)
57
+DEF_HELPER_6(vfnmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32)
58
+DEF_HELPER_6(vfnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32)
59
+DEF_HELPER_6(vfnmsac_vf_d, void, ptr, ptr, i64, ptr, env, i32)
60
+DEF_HELPER_6(vfmadd_vf_h, void, ptr, ptr, i64, ptr, env, i32)
61
+DEF_HELPER_6(vfmadd_vf_w, void, ptr, ptr, i64, ptr, env, i32)
62
+DEF_HELPER_6(vfmadd_vf_d, void, ptr, ptr, i64, ptr, env, i32)
63
+DEF_HELPER_6(vfnmadd_vf_h, void, ptr, ptr, i64, ptr, env, i32)
64
+DEF_HELPER_6(vfnmadd_vf_w, void, ptr, ptr, i64, ptr, env, i32)
65
+DEF_HELPER_6(vfnmadd_vf_d, void, ptr, ptr, i64, ptr, env, i32)
66
+DEF_HELPER_6(vfmsub_vf_h, void, ptr, ptr, i64, ptr, env, i32)
67
+DEF_HELPER_6(vfmsub_vf_w, void, ptr, ptr, i64, ptr, env, i32)
68
+DEF_HELPER_6(vfmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32)
69
+DEF_HELPER_6(vfnmsub_vf_h, void, ptr, ptr, i64, ptr, env, i32)
70
+DEF_HELPER_6(vfnmsub_vf_w, void, ptr, ptr, i64, ptr, env, i32)
71
+DEF_HELPER_6(vfnmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32)
72
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
73
index XXXXXXX..XXXXXXX 100644
74
--- a/target/riscv/insn32.decode
75
+++ b/target/riscv/insn32.decode
76
@@ -XXX,XX +XXX,XX @@ vfdiv_vf 100000 . ..... ..... 101 ..... 1010111 @r_vm
77
vfrdiv_vf 100001 . ..... ..... 101 ..... 1010111 @r_vm
78
vfwmul_vv 111000 . ..... ..... 001 ..... 1010111 @r_vm
79
vfwmul_vf 111000 . ..... ..... 101 ..... 1010111 @r_vm
80
+vfmacc_vv 101100 . ..... ..... 001 ..... 1010111 @r_vm
81
+vfnmacc_vv 101101 . ..... ..... 001 ..... 1010111 @r_vm
82
+vfnmacc_vf 101101 . ..... ..... 101 ..... 1010111 @r_vm
83
+vfmacc_vf 101100 . ..... ..... 101 ..... 1010111 @r_vm
84
+vfmsac_vv 101110 . ..... ..... 001 ..... 1010111 @r_vm
85
+vfmsac_vf 101110 . ..... ..... 101 ..... 1010111 @r_vm
86
+vfnmsac_vv 101111 . ..... ..... 001 ..... 1010111 @r_vm
87
+vfnmsac_vf 101111 . ..... ..... 101 ..... 1010111 @r_vm
88
+vfmadd_vv 101000 . ..... ..... 001 ..... 1010111 @r_vm
89
+vfmadd_vf 101000 . ..... ..... 101 ..... 1010111 @r_vm
90
+vfnmadd_vv 101001 . ..... ..... 001 ..... 1010111 @r_vm
91
+vfnmadd_vf 101001 . ..... ..... 101 ..... 1010111 @r_vm
92
+vfmsub_vv 101010 . ..... ..... 001 ..... 1010111 @r_vm
93
+vfmsub_vf 101010 . ..... ..... 101 ..... 1010111 @r_vm
94
+vfnmsub_vv 101011 . ..... ..... 001 ..... 1010111 @r_vm
95
+vfnmsub_vf 101011 . ..... ..... 101 ..... 1010111 @r_vm
96
97
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
98
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
99
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
100
index XXXXXXX..XXXXXXX 100644
101
--- a/target/riscv/insn_trans/trans_rvv.inc.c
102
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
103
@@ -XXX,XX +XXX,XX @@ GEN_OPFVF_TRANS(vfrdiv_vf, opfvf_check)
104
/* Vector Widening Floating-Point Multiply */
105
GEN_OPFVV_WIDEN_TRANS(vfwmul_vv, opfvv_widen_check)
106
GEN_OPFVF_WIDEN_TRANS(vfwmul_vf)
107
+
108
+/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
109
+GEN_OPFVV_TRANS(vfmacc_vv, opfvv_check)
110
+GEN_OPFVV_TRANS(vfnmacc_vv, opfvv_check)
111
+GEN_OPFVV_TRANS(vfmsac_vv, opfvv_check)
112
+GEN_OPFVV_TRANS(vfnmsac_vv, opfvv_check)
113
+GEN_OPFVV_TRANS(vfmadd_vv, opfvv_check)
114
+GEN_OPFVV_TRANS(vfnmadd_vv, opfvv_check)
115
+GEN_OPFVV_TRANS(vfmsub_vv, opfvv_check)
116
+GEN_OPFVV_TRANS(vfnmsub_vv, opfvv_check)
117
+GEN_OPFVF_TRANS(vfmacc_vf, opfvf_check)
118
+GEN_OPFVF_TRANS(vfnmacc_vf, opfvf_check)
119
+GEN_OPFVF_TRANS(vfmsac_vf, opfvf_check)
120
+GEN_OPFVF_TRANS(vfnmsac_vf, opfvf_check)
121
+GEN_OPFVF_TRANS(vfmadd_vf, opfvf_check)
122
+GEN_OPFVF_TRANS(vfnmadd_vf, opfvf_check)
123
+GEN_OPFVF_TRANS(vfmsub_vf, opfvf_check)
124
+GEN_OPFVF_TRANS(vfnmsub_vf, opfvf_check)
125
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
126
index XXXXXXX..XXXXXXX 100644
127
--- a/target/riscv/vector_helper.c
128
+++ b/target/riscv/vector_helper.c
129
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
130
RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
131
GEN_VEXT_VF(vfwmul_vf_h, 2, 4, clearl)
132
GEN_VEXT_VF(vfwmul_vf_w, 4, 8, clearq)
133
+
134
+/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
135
+#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
136
+static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
137
+ CPURISCVState *env) \
138
+{ \
139
+ TX1 s1 = *((T1 *)vs1 + HS1(i)); \
140
+ TX2 s2 = *((T2 *)vs2 + HS2(i)); \
141
+ TD d = *((TD *)vd + HD(i)); \
142
+ *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
143
+}
144
+
145
+static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
146
+{
147
+ return float16_muladd(a, b, d, 0, s);
148
+}
149
+
150
+static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
151
+{
152
+ return float32_muladd(a, b, d, 0, s);
153
+}
154
+
155
+static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
156
+{
157
+ return float64_muladd(a, b, d, 0, s);
158
+}
159
+
160
+RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
161
+RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
162
+RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
163
+GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2, clearh)
164
+GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4, clearl)
165
+GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8, clearq)
166
+
167
+#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
168
+static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
169
+ CPURISCVState *env) \
170
+{ \
171
+ TX2 s2 = *((T2 *)vs2 + HS2(i)); \
172
+ TD d = *((TD *)vd + HD(i)); \
173
+ *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
174
+}
175
+
176
+RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
177
+RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
178
+RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
179
+GEN_VEXT_VF(vfmacc_vf_h, 2, 2, clearh)
180
+GEN_VEXT_VF(vfmacc_vf_w, 4, 4, clearl)
181
+GEN_VEXT_VF(vfmacc_vf_d, 8, 8, clearq)
182
+
183
+static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
184
+{
185
+ return float16_muladd(a, b, d,
186
+ float_muladd_negate_c | float_muladd_negate_product, s);
187
+}
188
+
189
+static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
190
+{
191
+ return float32_muladd(a, b, d,
192
+ float_muladd_negate_c | float_muladd_negate_product, s);
193
+}
194
+
195
+static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
196
+{
197
+ return float64_muladd(a, b, d,
198
+ float_muladd_negate_c | float_muladd_negate_product, s);
199
+}
200
+
201
+RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
202
+RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
203
+RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
204
+GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2, clearh)
205
+GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4, clearl)
206
+GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8, clearq)
207
+RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
208
+RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
209
+RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
210
+GEN_VEXT_VF(vfnmacc_vf_h, 2, 2, clearh)
211
+GEN_VEXT_VF(vfnmacc_vf_w, 4, 4, clearl)
212
+GEN_VEXT_VF(vfnmacc_vf_d, 8, 8, clearq)
213
+
214
+static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
215
+{
216
+ return float16_muladd(a, b, d, float_muladd_negate_c, s);
217
+}
218
+
219
+static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
220
+{
221
+ return float32_muladd(a, b, d, float_muladd_negate_c, s);
222
+}
223
+
224
+static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
225
+{
226
+ return float64_muladd(a, b, d, float_muladd_negate_c, s);
227
+}
228
+
229
+RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
230
+RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
231
+RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
232
+GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2, clearh)
233
+GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4, clearl)
234
+GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8, clearq)
235
+RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
236
+RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
237
+RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
238
+GEN_VEXT_VF(vfmsac_vf_h, 2, 2, clearh)
239
+GEN_VEXT_VF(vfmsac_vf_w, 4, 4, clearl)
240
+GEN_VEXT_VF(vfmsac_vf_d, 8, 8, clearq)
241
+
242
+static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
243
+{
244
+ return float16_muladd(a, b, d, float_muladd_negate_product, s);
245
+}
246
+
247
+static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
248
+{
249
+ return float32_muladd(a, b, d, float_muladd_negate_product, s);
250
+}
251
+
252
+static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
253
+{
254
+ return float64_muladd(a, b, d, float_muladd_negate_product, s);
255
+}
256
+
257
+RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
258
+RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
259
+RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
260
+GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2, clearh)
261
+GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4, clearl)
262
+GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8, clearq)
263
+RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
264
+RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
265
+RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
266
+GEN_VEXT_VF(vfnmsac_vf_h, 2, 2, clearh)
267
+GEN_VEXT_VF(vfnmsac_vf_w, 4, 4, clearl)
268
+GEN_VEXT_VF(vfnmsac_vf_d, 8, 8, clearq)
269
+
270
+static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
271
+{
272
+ return float16_muladd(d, b, a, 0, s);
273
+}
274
+
275
+static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
276
+{
277
+ return float32_muladd(d, b, a, 0, s);
278
+}
279
+
280
+static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
281
+{
282
+ return float64_muladd(d, b, a, 0, s);
283
+}
284
+
285
+RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
286
+RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
287
+RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
288
+GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2, clearh)
289
+GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4, clearl)
290
+GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8, clearq)
291
+RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
292
+RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
293
+RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
294
+GEN_VEXT_VF(vfmadd_vf_h, 2, 2, clearh)
295
+GEN_VEXT_VF(vfmadd_vf_w, 4, 4, clearl)
296
+GEN_VEXT_VF(vfmadd_vf_d, 8, 8, clearq)
297
+
298
+static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
299
+{
300
+ return float16_muladd(d, b, a,
301
+ float_muladd_negate_c | float_muladd_negate_product, s);
302
+}
303
+
304
+static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
305
+{
306
+ return float32_muladd(d, b, a,
307
+ float_muladd_negate_c | float_muladd_negate_product, s);
308
+}
309
+
310
+static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
311
+{
312
+ return float64_muladd(d, b, a,
313
+ float_muladd_negate_c | float_muladd_negate_product, s);
314
+}
315
+
316
+RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
317
+RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
318
+RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
319
+GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2, clearh)
320
+GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4, clearl)
321
+GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8, clearq)
322
+RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
323
+RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
324
+RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
325
+GEN_VEXT_VF(vfnmadd_vf_h, 2, 2, clearh)
326
+GEN_VEXT_VF(vfnmadd_vf_w, 4, 4, clearl)
327
+GEN_VEXT_VF(vfnmadd_vf_d, 8, 8, clearq)
328
+
329
+static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
330
+{
331
+ return float16_muladd(d, b, a, float_muladd_negate_c, s);
332
+}
333
+
334
+static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
335
+{
336
+ return float32_muladd(d, b, a, float_muladd_negate_c, s);
337
+}
338
+
339
+static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
340
+{
341
+ return float64_muladd(d, b, a, float_muladd_negate_c, s);
342
+}
343
+
344
+RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
345
+RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
346
+RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
347
+GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2, clearh)
348
+GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4, clearl)
349
+GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8, clearq)
350
+RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
351
+RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
352
+RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
353
+GEN_VEXT_VF(vfmsub_vf_h, 2, 2, clearh)
354
+GEN_VEXT_VF(vfmsub_vf_w, 4, 4, clearl)
355
+GEN_VEXT_VF(vfmsub_vf_d, 8, 8, clearq)
356
+
357
+static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
358
+{
359
+ return float16_muladd(d, b, a, float_muladd_negate_product, s);
360
+}
361
+
362
+static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
363
+{
364
+ return float32_muladd(d, b, a, float_muladd_negate_product, s);
365
+}
366
+
367
+static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
368
+{
369
+ return float64_muladd(d, b, a, float_muladd_negate_product, s);
370
+}
371
+
372
+RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
373
+RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
374
+RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
375
+GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2, clearh)
376
+GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4, clearl)
377
+GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8, clearq)
378
+RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
379
+RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
380
+RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
381
+GEN_VEXT_VF(vfnmsub_vf_h, 2, 2, clearh)
382
+GEN_VEXT_VF(vfnmsub_vf_w, 4, 4, clearl)
383
+GEN_VEXT_VF(vfnmsub_vf_d, 8, 8, clearq)
384
--
385
2.27.0
386
387
diff view generated by jsdifflib